csvpath 0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. csvpath/__init__.py +1 -0
  2. csvpath/csvpath.py +368 -0
  3. csvpath/matching/__init__.py +1 -0
  4. csvpath/matching/expression_encoder.py +108 -0
  5. csvpath/matching/expression_math.py +123 -0
  6. csvpath/matching/expression_utility.py +29 -0
  7. csvpath/matching/functions/above.py +36 -0
  8. csvpath/matching/functions/add.py +24 -0
  9. csvpath/matching/functions/below.py +36 -0
  10. csvpath/matching/functions/concat.py +25 -0
  11. csvpath/matching/functions/count.py +44 -0
  12. csvpath/matching/functions/count_lines.py +12 -0
  13. csvpath/matching/functions/count_scans.py +13 -0
  14. csvpath/matching/functions/divide.py +30 -0
  15. csvpath/matching/functions/end.py +18 -0
  16. csvpath/matching/functions/every.py +33 -0
  17. csvpath/matching/functions/first.py +46 -0
  18. csvpath/matching/functions/function.py +31 -0
  19. csvpath/matching/functions/function_factory.py +114 -0
  20. csvpath/matching/functions/inf.py +38 -0
  21. csvpath/matching/functions/is_instance.py +95 -0
  22. csvpath/matching/functions/length.py +33 -0
  23. csvpath/matching/functions/lower.py +21 -0
  24. csvpath/matching/functions/minf.py +167 -0
  25. csvpath/matching/functions/multiply.py +27 -0
  26. csvpath/matching/functions/no.py +10 -0
  27. csvpath/matching/functions/notf.py +26 -0
  28. csvpath/matching/functions/now.py +33 -0
  29. csvpath/matching/functions/orf.py +28 -0
  30. csvpath/matching/functions/percent.py +29 -0
  31. csvpath/matching/functions/random.py +33 -0
  32. csvpath/matching/functions/regex.py +38 -0
  33. csvpath/matching/functions/subtract.py +28 -0
  34. csvpath/matching/functions/tally.py +36 -0
  35. csvpath/matching/functions/upper.py +21 -0
  36. csvpath/matching/matcher.py +215 -0
  37. csvpath/matching/matching_lexer.py +66 -0
  38. csvpath/matching/parser.out +1287 -0
  39. csvpath/matching/parsetab.py +1427 -0
  40. csvpath/matching/productions/equality.py +158 -0
  41. csvpath/matching/productions/expression.py +16 -0
  42. csvpath/matching/productions/header.py +30 -0
  43. csvpath/matching/productions/matchable.py +41 -0
  44. csvpath/matching/productions/term.py +11 -0
  45. csvpath/matching/productions/variable.py +15 -0
  46. csvpath/parser_utility.py +39 -0
  47. csvpath/scanning/__init__.py +1 -0
  48. csvpath/scanning/parser.out +1 -0
  49. csvpath/scanning/parsetab.py +231 -0
  50. csvpath/scanning/scanner.py +165 -0
  51. csvpath/scanning/scanning_lexer.py +47 -0
  52. csvpath-0.0.2.dist-info/METADATA +184 -0
  53. csvpath-0.0.2.dist-info/RECORD +54 -0
  54. csvpath-0.0.2.dist-info/WHEEL +4 -0
@@ -0,0 +1,215 @@
1
+ import ply.yacc as yacc
2
+ from csvpath.matching.matching_lexer import MatchingLexer
3
+ from csvpath.parser_utility import ParserUtility
4
+ from csvpath.matching.productions.expression import Expression
5
+ from csvpath.matching.productions.equality import Equality
6
+ from csvpath.matching.productions.term import Term
7
+ from csvpath.matching.productions.variable import Variable
8
+ from csvpath.matching.productions.header import Header
9
+ from csvpath.matching.functions.function_factory import FunctionFactory
10
+ from typing import Any
11
+
12
+
13
+ class InputException(Exception):
14
+ pass
15
+
16
+
17
+ class Matcher:
18
+ tokens = MatchingLexer.tokens
19
+
20
+ def __init__(self, *, csvpath=None, data=None, line=None, headers=None):
21
+ if not headers:
22
+ # raise Exception("no headers!")
23
+ print("\nWARNING: no headers available. this is only Ok for unit testing.")
24
+ if not data:
25
+ raise InputException(f"need data input: data: {data}")
26
+ self.path = data
27
+ self.csvpath = csvpath
28
+ self.line = line
29
+ self.headers = headers
30
+ self.expressions = []
31
+ self.lexer = MatchingLexer()
32
+ self.block_print = csvpath.block_print if csvpath else True
33
+ self.parser = yacc.yacc(module=self, start="match_part")
34
+ self.parser.parse(data, lexer=self.lexer.lexer)
35
+
36
+ def __str__(self):
37
+ return f"""
38
+ line: {self.line}
39
+ csvpath: {self.csvpath}
40
+ parser: {self.parser}
41
+ lexer: {self.lexer}
42
+ """
43
+
44
+ def print(self, msg: str) -> None:
45
+ if self.csvpath:
46
+ self.csvpath.print(msg)
47
+
48
+ def header_index(self, name: str) -> int:
49
+ if not self.headers:
50
+ return None
51
+ for i, n in enumerate(self.headers):
52
+ self.print(f" ...header {i} = {n} ?= {name}")
53
+ if n == name:
54
+ return i
55
+ return None
56
+
57
+ def header_value(self, name: str) -> Any:
58
+ n = self.header_index(name)
59
+ ret = None
60
+ if n is None:
61
+ pass
62
+ else:
63
+ ret = self.line[n]
64
+ return ret
65
+
66
+ def matches(self, *, syntax_only=False) -> bool:
67
+ ret = True
68
+ for i, et in enumerate(self.expressions):
69
+ if et[1] is True:
70
+ ret = True
71
+ elif et[1] is False:
72
+ ret = False
73
+ elif not et[0].matches(skip=[]) and not syntax_only:
74
+ et[1] = False
75
+ ret = False
76
+ else:
77
+ et[1] = True
78
+ ret = True
79
+ if not ret:
80
+ break
81
+ return ret
82
+
83
+ def get_variable(self, name: str, *, tracking=None, set_if_none=None) -> Any:
84
+ return self.csvpath.get_variable(
85
+ name, tracking=tracking, set_if_none=set_if_none
86
+ )
87
+
88
+ def set_variable(self, name: str, *, value: Any, tracking=None) -> None:
89
+ return self.csvpath.set_variable(name, value=value, tracking=tracking)
90
+
91
+ def last_header_index(self) -> int:
92
+ if self.line and len(self.line) > 0:
93
+ return len(self.line) - 1
94
+ return None
95
+
96
+ def last_header_name(self) -> str:
97
+ if self.headers and len(self.headers) > 0:
98
+ return self.headers[self.last_header_index()]
99
+ return None
100
+
101
+ # ===================
102
+ # productions
103
+ # ===================
104
+
105
+ def p_error(self, p):
106
+ ParserUtility().error(self.parser, p)
107
+ raise InputException("halting for error")
108
+
109
+ def p_match_part(self, p):
110
+ """match_part : LEFT_BRACKET expression RIGHT_BRACKET
111
+ | LEFT_BRACKET expressions RIGHT_BRACKET
112
+ """
113
+
114
+ def p_expressions(self, p):
115
+ """expressions : expression
116
+ | expressions expression
117
+ """
118
+
119
+ def p_expression(self, p):
120
+ """expression : function
121
+ | assignment_or_equality
122
+ | header"""
123
+ e = Expression(self)
124
+ e.add_child(p[1])
125
+ self.expressions.append([e, None])
126
+ p[0] = e
127
+
128
+ def p_function(self, p):
129
+ """function : NAME OPEN_PAREN CLOSE_PAREN
130
+ | NAME OPEN_PAREN equality CLOSE_PAREN
131
+ | NAME OPEN_PAREN function CLOSE_PAREN
132
+ | NAME OPEN_PAREN var_or_header CLOSE_PAREN
133
+ | NAME OPEN_PAREN term CLOSE_PAREN
134
+ """
135
+ name = p[1]
136
+ child = p[3] if p and len(p) == 5 else None
137
+ f = FunctionFactory.get_function(self, name=name, child=child)
138
+ ParserUtility.enumerate_p("IN p_function", p)
139
+ p[0] = f
140
+
141
+ def p_assignment_or_equality(self, p):
142
+ """assignment_or_equality : equality
143
+ | assignment
144
+ """
145
+ p[0] = p[1]
146
+
147
+ def p_equality(self, p):
148
+ """
149
+ equality : function op term
150
+ | function op function
151
+ | function op var_or_header
152
+ | var_or_header op function
153
+ | var_or_header op term
154
+ | var_or_header op var_or_header
155
+ | term op var_or_header
156
+ | term op term
157
+ | term op function
158
+ | equality op equality
159
+ | equality op term
160
+ | equality op function
161
+ """
162
+ e = Equality(self)
163
+ e.set_left(p[1])
164
+ e.set_operation(p[2])
165
+ e.set_right(p[3])
166
+ p[0] = e
167
+
168
+ def p_op(self, p):
169
+ """op : EQUALS
170
+ | OPERATION
171
+ """
172
+ p[0] = p[1]
173
+
174
+ def p_assignment(self, p):
175
+ """
176
+ assignment : var ASSIGNMENT var
177
+ | var ASSIGNMENT term
178
+ | var ASSIGNMENT function
179
+ | var ASSIGNMENT header
180
+ """
181
+ e = Equality(self)
182
+ e.set_left(p[1])
183
+ e.set_operation(p[2])
184
+ e.set_right(p[3])
185
+ p[0] = e
186
+
187
+ def p_term(self, p):
188
+ """term : QUOTE NAME QUOTE
189
+ | QUOTE DATE QUOTE
190
+ | QUOTE NUMBER QUOTE
191
+ | NUMBER
192
+ | REGEX
193
+ """
194
+ if len(p) == 4:
195
+ p[0] = Term(self, value=p[2])
196
+ else:
197
+ p[0] = Term(self, value=p[1])
198
+
199
+ def p_var_or_header(self, p):
200
+ """var_or_header : header
201
+ | var
202
+ """
203
+ p[0] = p[1]
204
+
205
+ def p_var(self, p):
206
+ """var : VAR_SYM NAME"""
207
+ v = Variable(self, name=p[2])
208
+ p[0] = v
209
+
210
+ def p_header(self, p):
211
+ """header : HEADER_SYM NAME
212
+ | HEADER_SYM NUMBER
213
+ """
214
+ h = Header(self, name=p[2])
215
+ p[0] = h
@@ -0,0 +1,66 @@
1
+ import ply.lex as lex
2
+
3
+
4
+ class MatchingLexer(object):
5
+ tokens = [
6
+ "DATE",
7
+ "NAME",
8
+ "NUMBER",
9
+ "EQUALS",
10
+ "OPERATION",
11
+ "ASSIGNMENT",
12
+ "LEFT_BRACKET",
13
+ "RIGHT_BRACKET",
14
+ "OPEN_PAREN",
15
+ "CLOSE_PAREN",
16
+ "HEADER_SYM",
17
+ "VAR_SYM",
18
+ "REGEX",
19
+ "QUOTE",
20
+ ]
21
+
22
+ t_ignore = " \t\n\r"
23
+ t_QUOTE = r'"'
24
+ t_OPEN_PAREN = r"\("
25
+ t_CLOSE_PAREN = r"\)"
26
+ t_HEADER_SYM = r"\#"
27
+ t_EQUALS = r"=="
28
+ t_OPERATION = r"[><,\*\+\-]"
29
+ t_ASSIGNMENT = r"="
30
+ t_VAR_SYM = r"@"
31
+ t_LEFT_BRACKET = r"\["
32
+ t_RIGHT_BRACKET = r"\]"
33
+ t_NAME = r"[\$A-Za-z0-9\.%_|\s \-]+"
34
+ t_REGEX = r"/(?:[^/\\]|\\.)*/"
35
+
36
+ def t_DATE(self, t):
37
+ r"\d+[/-]\d+[/-]\d+"
38
+ return t
39
+
40
+ def t_NUMBER(self, t):
41
+ r"\d*\.?\d+"
42
+ try:
43
+ t.value = int(t.value)
44
+ except ValueError:
45
+ try:
46
+ t.value = float(t.value)
47
+ except ValueError:
48
+ raise Exception(
49
+ f"matching_lexer.t_NUMBER: cannot convert {t}: {t.value}"
50
+ )
51
+ return t
52
+
53
+ def t_error(self, t):
54
+ print(f"Illegal character '{t.value[0]}'")
55
+ t.lexer.skip(1)
56
+
57
+ def __init__(self):
58
+ self.lexer = lex.lex(module=self)
59
+
60
+ def tokenize(self, data):
61
+ self.lexer.input(data)
62
+ while True:
63
+ tok = self.lexer.token()
64
+ if not tok:
65
+ break
66
+ yield tok