csvpath 0.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- csvpath/__init__.py +1 -0
- csvpath/csvpath.py +368 -0
- csvpath/matching/__init__.py +1 -0
- csvpath/matching/expression_encoder.py +108 -0
- csvpath/matching/expression_math.py +123 -0
- csvpath/matching/expression_utility.py +29 -0
- csvpath/matching/functions/above.py +36 -0
- csvpath/matching/functions/add.py +24 -0
- csvpath/matching/functions/below.py +36 -0
- csvpath/matching/functions/concat.py +25 -0
- csvpath/matching/functions/count.py +44 -0
- csvpath/matching/functions/count_lines.py +12 -0
- csvpath/matching/functions/count_scans.py +13 -0
- csvpath/matching/functions/divide.py +30 -0
- csvpath/matching/functions/end.py +18 -0
- csvpath/matching/functions/every.py +33 -0
- csvpath/matching/functions/first.py +46 -0
- csvpath/matching/functions/function.py +31 -0
- csvpath/matching/functions/function_factory.py +114 -0
- csvpath/matching/functions/inf.py +38 -0
- csvpath/matching/functions/is_instance.py +95 -0
- csvpath/matching/functions/length.py +33 -0
- csvpath/matching/functions/lower.py +21 -0
- csvpath/matching/functions/minf.py +167 -0
- csvpath/matching/functions/multiply.py +27 -0
- csvpath/matching/functions/no.py +10 -0
- csvpath/matching/functions/notf.py +26 -0
- csvpath/matching/functions/now.py +33 -0
- csvpath/matching/functions/orf.py +28 -0
- csvpath/matching/functions/percent.py +29 -0
- csvpath/matching/functions/random.py +33 -0
- csvpath/matching/functions/regex.py +38 -0
- csvpath/matching/functions/subtract.py +28 -0
- csvpath/matching/functions/tally.py +36 -0
- csvpath/matching/functions/upper.py +21 -0
- csvpath/matching/matcher.py +215 -0
- csvpath/matching/matching_lexer.py +66 -0
- csvpath/matching/parser.out +1287 -0
- csvpath/matching/parsetab.py +1427 -0
- csvpath/matching/productions/equality.py +158 -0
- csvpath/matching/productions/expression.py +16 -0
- csvpath/matching/productions/header.py +30 -0
- csvpath/matching/productions/matchable.py +41 -0
- csvpath/matching/productions/term.py +11 -0
- csvpath/matching/productions/variable.py +15 -0
- csvpath/parser_utility.py +39 -0
- csvpath/scanning/__init__.py +1 -0
- csvpath/scanning/parser.out +1 -0
- csvpath/scanning/parsetab.py +231 -0
- csvpath/scanning/scanner.py +165 -0
- csvpath/scanning/scanning_lexer.py +47 -0
- csvpath-0.0.2.dist-info/METADATA +184 -0
- csvpath-0.0.2.dist-info/RECORD +54 -0
- csvpath-0.0.2.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
import ply.yacc as yacc
|
|
2
|
+
from csvpath.matching.matching_lexer import MatchingLexer
|
|
3
|
+
from csvpath.parser_utility import ParserUtility
|
|
4
|
+
from csvpath.matching.productions.expression import Expression
|
|
5
|
+
from csvpath.matching.productions.equality import Equality
|
|
6
|
+
from csvpath.matching.productions.term import Term
|
|
7
|
+
from csvpath.matching.productions.variable import Variable
|
|
8
|
+
from csvpath.matching.productions.header import Header
|
|
9
|
+
from csvpath.matching.functions.function_factory import FunctionFactory
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class InputException(Exception):
|
|
14
|
+
pass
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class Matcher:
|
|
18
|
+
tokens = MatchingLexer.tokens
|
|
19
|
+
|
|
20
|
+
def __init__(self, *, csvpath=None, data=None, line=None, headers=None):
|
|
21
|
+
if not headers:
|
|
22
|
+
# raise Exception("no headers!")
|
|
23
|
+
print("\nWARNING: no headers available. this is only Ok for unit testing.")
|
|
24
|
+
if not data:
|
|
25
|
+
raise InputException(f"need data input: data: {data}")
|
|
26
|
+
self.path = data
|
|
27
|
+
self.csvpath = csvpath
|
|
28
|
+
self.line = line
|
|
29
|
+
self.headers = headers
|
|
30
|
+
self.expressions = []
|
|
31
|
+
self.lexer = MatchingLexer()
|
|
32
|
+
self.block_print = csvpath.block_print if csvpath else True
|
|
33
|
+
self.parser = yacc.yacc(module=self, start="match_part")
|
|
34
|
+
self.parser.parse(data, lexer=self.lexer.lexer)
|
|
35
|
+
|
|
36
|
+
def __str__(self):
|
|
37
|
+
return f"""
|
|
38
|
+
line: {self.line}
|
|
39
|
+
csvpath: {self.csvpath}
|
|
40
|
+
parser: {self.parser}
|
|
41
|
+
lexer: {self.lexer}
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
def print(self, msg: str) -> None:
|
|
45
|
+
if self.csvpath:
|
|
46
|
+
self.csvpath.print(msg)
|
|
47
|
+
|
|
48
|
+
def header_index(self, name: str) -> int:
|
|
49
|
+
if not self.headers:
|
|
50
|
+
return None
|
|
51
|
+
for i, n in enumerate(self.headers):
|
|
52
|
+
self.print(f" ...header {i} = {n} ?= {name}")
|
|
53
|
+
if n == name:
|
|
54
|
+
return i
|
|
55
|
+
return None
|
|
56
|
+
|
|
57
|
+
def header_value(self, name: str) -> Any:
|
|
58
|
+
n = self.header_index(name)
|
|
59
|
+
ret = None
|
|
60
|
+
if n is None:
|
|
61
|
+
pass
|
|
62
|
+
else:
|
|
63
|
+
ret = self.line[n]
|
|
64
|
+
return ret
|
|
65
|
+
|
|
66
|
+
def matches(self, *, syntax_only=False) -> bool:
|
|
67
|
+
ret = True
|
|
68
|
+
for i, et in enumerate(self.expressions):
|
|
69
|
+
if et[1] is True:
|
|
70
|
+
ret = True
|
|
71
|
+
elif et[1] is False:
|
|
72
|
+
ret = False
|
|
73
|
+
elif not et[0].matches(skip=[]) and not syntax_only:
|
|
74
|
+
et[1] = False
|
|
75
|
+
ret = False
|
|
76
|
+
else:
|
|
77
|
+
et[1] = True
|
|
78
|
+
ret = True
|
|
79
|
+
if not ret:
|
|
80
|
+
break
|
|
81
|
+
return ret
|
|
82
|
+
|
|
83
|
+
def get_variable(self, name: str, *, tracking=None, set_if_none=None) -> Any:
|
|
84
|
+
return self.csvpath.get_variable(
|
|
85
|
+
name, tracking=tracking, set_if_none=set_if_none
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
def set_variable(self, name: str, *, value: Any, tracking=None) -> None:
|
|
89
|
+
return self.csvpath.set_variable(name, value=value, tracking=tracking)
|
|
90
|
+
|
|
91
|
+
def last_header_index(self) -> int:
|
|
92
|
+
if self.line and len(self.line) > 0:
|
|
93
|
+
return len(self.line) - 1
|
|
94
|
+
return None
|
|
95
|
+
|
|
96
|
+
def last_header_name(self) -> str:
|
|
97
|
+
if self.headers and len(self.headers) > 0:
|
|
98
|
+
return self.headers[self.last_header_index()]
|
|
99
|
+
return None
|
|
100
|
+
|
|
101
|
+
# ===================
|
|
102
|
+
# productions
|
|
103
|
+
# ===================
|
|
104
|
+
|
|
105
|
+
def p_error(self, p):
|
|
106
|
+
ParserUtility().error(self.parser, p)
|
|
107
|
+
raise InputException("halting for error")
|
|
108
|
+
|
|
109
|
+
def p_match_part(self, p):
|
|
110
|
+
"""match_part : LEFT_BRACKET expression RIGHT_BRACKET
|
|
111
|
+
| LEFT_BRACKET expressions RIGHT_BRACKET
|
|
112
|
+
"""
|
|
113
|
+
|
|
114
|
+
def p_expressions(self, p):
|
|
115
|
+
"""expressions : expression
|
|
116
|
+
| expressions expression
|
|
117
|
+
"""
|
|
118
|
+
|
|
119
|
+
def p_expression(self, p):
|
|
120
|
+
"""expression : function
|
|
121
|
+
| assignment_or_equality
|
|
122
|
+
| header"""
|
|
123
|
+
e = Expression(self)
|
|
124
|
+
e.add_child(p[1])
|
|
125
|
+
self.expressions.append([e, None])
|
|
126
|
+
p[0] = e
|
|
127
|
+
|
|
128
|
+
def p_function(self, p):
|
|
129
|
+
"""function : NAME OPEN_PAREN CLOSE_PAREN
|
|
130
|
+
| NAME OPEN_PAREN equality CLOSE_PAREN
|
|
131
|
+
| NAME OPEN_PAREN function CLOSE_PAREN
|
|
132
|
+
| NAME OPEN_PAREN var_or_header CLOSE_PAREN
|
|
133
|
+
| NAME OPEN_PAREN term CLOSE_PAREN
|
|
134
|
+
"""
|
|
135
|
+
name = p[1]
|
|
136
|
+
child = p[3] if p and len(p) == 5 else None
|
|
137
|
+
f = FunctionFactory.get_function(self, name=name, child=child)
|
|
138
|
+
ParserUtility.enumerate_p("IN p_function", p)
|
|
139
|
+
p[0] = f
|
|
140
|
+
|
|
141
|
+
def p_assignment_or_equality(self, p):
|
|
142
|
+
"""assignment_or_equality : equality
|
|
143
|
+
| assignment
|
|
144
|
+
"""
|
|
145
|
+
p[0] = p[1]
|
|
146
|
+
|
|
147
|
+
def p_equality(self, p):
|
|
148
|
+
"""
|
|
149
|
+
equality : function op term
|
|
150
|
+
| function op function
|
|
151
|
+
| function op var_or_header
|
|
152
|
+
| var_or_header op function
|
|
153
|
+
| var_or_header op term
|
|
154
|
+
| var_or_header op var_or_header
|
|
155
|
+
| term op var_or_header
|
|
156
|
+
| term op term
|
|
157
|
+
| term op function
|
|
158
|
+
| equality op equality
|
|
159
|
+
| equality op term
|
|
160
|
+
| equality op function
|
|
161
|
+
"""
|
|
162
|
+
e = Equality(self)
|
|
163
|
+
e.set_left(p[1])
|
|
164
|
+
e.set_operation(p[2])
|
|
165
|
+
e.set_right(p[3])
|
|
166
|
+
p[0] = e
|
|
167
|
+
|
|
168
|
+
def p_op(self, p):
|
|
169
|
+
"""op : EQUALS
|
|
170
|
+
| OPERATION
|
|
171
|
+
"""
|
|
172
|
+
p[0] = p[1]
|
|
173
|
+
|
|
174
|
+
def p_assignment(self, p):
|
|
175
|
+
"""
|
|
176
|
+
assignment : var ASSIGNMENT var
|
|
177
|
+
| var ASSIGNMENT term
|
|
178
|
+
| var ASSIGNMENT function
|
|
179
|
+
| var ASSIGNMENT header
|
|
180
|
+
"""
|
|
181
|
+
e = Equality(self)
|
|
182
|
+
e.set_left(p[1])
|
|
183
|
+
e.set_operation(p[2])
|
|
184
|
+
e.set_right(p[3])
|
|
185
|
+
p[0] = e
|
|
186
|
+
|
|
187
|
+
def p_term(self, p):
|
|
188
|
+
"""term : QUOTE NAME QUOTE
|
|
189
|
+
| QUOTE DATE QUOTE
|
|
190
|
+
| QUOTE NUMBER QUOTE
|
|
191
|
+
| NUMBER
|
|
192
|
+
| REGEX
|
|
193
|
+
"""
|
|
194
|
+
if len(p) == 4:
|
|
195
|
+
p[0] = Term(self, value=p[2])
|
|
196
|
+
else:
|
|
197
|
+
p[0] = Term(self, value=p[1])
|
|
198
|
+
|
|
199
|
+
def p_var_or_header(self, p):
|
|
200
|
+
"""var_or_header : header
|
|
201
|
+
| var
|
|
202
|
+
"""
|
|
203
|
+
p[0] = p[1]
|
|
204
|
+
|
|
205
|
+
def p_var(self, p):
|
|
206
|
+
"""var : VAR_SYM NAME"""
|
|
207
|
+
v = Variable(self, name=p[2])
|
|
208
|
+
p[0] = v
|
|
209
|
+
|
|
210
|
+
def p_header(self, p):
|
|
211
|
+
"""header : HEADER_SYM NAME
|
|
212
|
+
| HEADER_SYM NUMBER
|
|
213
|
+
"""
|
|
214
|
+
h = Header(self, name=p[2])
|
|
215
|
+
p[0] = h
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import ply.lex as lex
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class MatchingLexer(object):
|
|
5
|
+
tokens = [
|
|
6
|
+
"DATE",
|
|
7
|
+
"NAME",
|
|
8
|
+
"NUMBER",
|
|
9
|
+
"EQUALS",
|
|
10
|
+
"OPERATION",
|
|
11
|
+
"ASSIGNMENT",
|
|
12
|
+
"LEFT_BRACKET",
|
|
13
|
+
"RIGHT_BRACKET",
|
|
14
|
+
"OPEN_PAREN",
|
|
15
|
+
"CLOSE_PAREN",
|
|
16
|
+
"HEADER_SYM",
|
|
17
|
+
"VAR_SYM",
|
|
18
|
+
"REGEX",
|
|
19
|
+
"QUOTE",
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
t_ignore = " \t\n\r"
|
|
23
|
+
t_QUOTE = r'"'
|
|
24
|
+
t_OPEN_PAREN = r"\("
|
|
25
|
+
t_CLOSE_PAREN = r"\)"
|
|
26
|
+
t_HEADER_SYM = r"\#"
|
|
27
|
+
t_EQUALS = r"=="
|
|
28
|
+
t_OPERATION = r"[><,\*\+\-]"
|
|
29
|
+
t_ASSIGNMENT = r"="
|
|
30
|
+
t_VAR_SYM = r"@"
|
|
31
|
+
t_LEFT_BRACKET = r"\["
|
|
32
|
+
t_RIGHT_BRACKET = r"\]"
|
|
33
|
+
t_NAME = r"[\$A-Za-z0-9\.%_|\s \-]+"
|
|
34
|
+
t_REGEX = r"/(?:[^/\\]|\\.)*/"
|
|
35
|
+
|
|
36
|
+
def t_DATE(self, t):
|
|
37
|
+
r"\d+[/-]\d+[/-]\d+"
|
|
38
|
+
return t
|
|
39
|
+
|
|
40
|
+
def t_NUMBER(self, t):
|
|
41
|
+
r"\d*\.?\d+"
|
|
42
|
+
try:
|
|
43
|
+
t.value = int(t.value)
|
|
44
|
+
except ValueError:
|
|
45
|
+
try:
|
|
46
|
+
t.value = float(t.value)
|
|
47
|
+
except ValueError:
|
|
48
|
+
raise Exception(
|
|
49
|
+
f"matching_lexer.t_NUMBER: cannot convert {t}: {t.value}"
|
|
50
|
+
)
|
|
51
|
+
return t
|
|
52
|
+
|
|
53
|
+
def t_error(self, t):
|
|
54
|
+
print(f"Illegal character '{t.value[0]}'")
|
|
55
|
+
t.lexer.skip(1)
|
|
56
|
+
|
|
57
|
+
def __init__(self):
|
|
58
|
+
self.lexer = lex.lex(module=self)
|
|
59
|
+
|
|
60
|
+
def tokenize(self, data):
|
|
61
|
+
self.lexer.input(data)
|
|
62
|
+
while True:
|
|
63
|
+
tok = self.lexer.token()
|
|
64
|
+
if not tok:
|
|
65
|
+
break
|
|
66
|
+
yield tok
|