csvpath 0.0.45__tar.gz → 0.0.451__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {csvpath-0.0.45 → csvpath-0.0.451}/PKG-INFO +10 -10
- {csvpath-0.0.45 → csvpath-0.0.451}/README.md +9 -9
- csvpath-0.0.451/csvpath/matching/functions/above.py +35 -0
- csvpath-0.0.451/csvpath/matching/functions/correlate.md +31 -0
- csvpath-0.0.451/csvpath/matching/functions/correlate.py +112 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/exists.py +1 -1
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/function_factory.py +3 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/stop.md +2 -6
- {csvpath-0.0.45 → csvpath-0.0.451}/pyproject.toml +2 -2
- csvpath-0.0.45/csvpath/matching/functions/above.py +0 -36
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/__init__.py +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/csvpath.py +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/csvpaths.py +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/exceptions.py +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/__init__.py +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/expression_encoder.py +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/expression_utility.py +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/__init__.py +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/add.py +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/any.md +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/any.py +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/average.md +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/below.py +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/column.py +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/concat.py +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/count.md +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/count.py +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/count_lines.py +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/count_scans.py +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/divide.py +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/end.md +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/end.py +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/equals.py +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/every.md +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/every.py +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/first.md +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/first.py +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/function.py +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/header.md +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/header.py +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/in.md +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/increment.md +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/increment.py +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/inf.py +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/jinja.md +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/jinjaf.py +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/last.md +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/last.py +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/length.py +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/lower.py +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/max.md +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/minf.py +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/mod.py +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/multiply.py +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/no.md +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/no.py +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/nonef.py +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/notf.py +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/now.md +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/now.py +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/orf.py +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/percent.py +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/print.md +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/printf.py +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/random.py +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/regex.py +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/stop.py +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/strip.py +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/substring.py +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/subtract.md +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/subtract.py +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/tally.md +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/tally.py +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/upper.py +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/variable.md +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/variable.py +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/when._ +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/yes.py +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/matcher.py +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/matching_lexer.py +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/parser.out +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/parsetab.py +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/productions/__init__.py +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/productions/equality.py +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/productions/expression.py +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/productions/header.py +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/productions/matchable.py +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/productions/qualified.py +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/productions/term.py +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/productions/variable.py +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/parser_utility.py +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/scanning/__init__.py +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/scanning/parser.out +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/scanning/parsetab.py +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/scanning/scanner.py +0 -0
- {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/scanning/scanning_lexer.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: csvpath
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.451
|
|
4
4
|
Summary: A declarative language for data extraction and validation of CSV files
|
|
5
5
|
Author: David Kershaw
|
|
6
6
|
Author-email: dk107dk@hotmail.com
|
|
@@ -88,7 +88,7 @@ Two classes do all the work: CsvPath and CsvPaths. Each has only a few external
|
|
|
88
88
|
- a single .csv file or
|
|
89
89
|
- a directory of .csv files
|
|
90
90
|
|
|
91
|
-
There are several ways to set up csvpath file references. Read <a href='docs/files.md'>more about filenames</a>.
|
|
91
|
+
There are several ways to set up csvpath file references. Read <a href='https://github.com/dk107dk/csvpath/blob/main/docs/files.md'>more about filenames</a>.
|
|
92
92
|
|
|
93
93
|
This is a very basic programmatic use of CsvPath. For lots more examples, see the unit tests.
|
|
94
94
|
|
|
@@ -178,7 +178,7 @@ A string, number, or regular expression value.
|
|
|
178
178
|
|--------|---------|---------------|
|
|
179
179
|
|A value | Always true | `"a value"` |
|
|
180
180
|
|
|
181
|
-
<a href='docs/terms.md'>Read about terms here</a>.
|
|
181
|
+
<a href='https://github.com/dk107dk/csvpath/blob/main/docs/terms.md'>Read about terms here</a>.
|
|
182
182
|
|
|
183
183
|
## Function
|
|
184
184
|
A composable unit of functionality called once for every row scanned.
|
|
@@ -187,7 +187,7 @@ A composable unit of functionality called once for every row scanned.
|
|
|
187
187
|
|--------|---------|---------------|
|
|
188
188
|
|Calculated | Calculated | `count()` |
|
|
189
189
|
|
|
190
|
-
<a href='docs/functions.md'>Read about functions here</a>.
|
|
190
|
+
<a href='https://github.com/dk107dk/csvpath/blob/main/docs/functions.md'>Read about functions here</a>.
|
|
191
191
|
|
|
192
192
|
## Variable
|
|
193
193
|
A stored value that is set or retrieved once per row scanned.
|
|
@@ -196,7 +196,7 @@ A stored value that is set or retrieved once per row scanned.
|
|
|
196
196
|
|--------|---------|---------------|
|
|
197
197
|
|A value | True when set, unless `onchange`. Used alone it is an existence test. | `@firstname` |
|
|
198
198
|
|
|
199
|
-
<a href='docs/variables.md'>Read about variables here</a>.
|
|
199
|
+
<a href='https://github.com/dk107dk/csvpath/blob/main/docs/variables.md'>Read about variables here</a>.
|
|
200
200
|
|
|
201
201
|
## Header
|
|
202
202
|
|
|
@@ -206,7 +206,7 @@ A named column or a column identified by 0-based index.
|
|
|
206
206
|
|--------|---------|---------------|
|
|
207
207
|
|A value | Calculated. Used alone it is an existence test. | `#area_code` |
|
|
208
208
|
|
|
209
|
-
<a href='docs/
|
|
209
|
+
<a href='https://github.com/dk107dk/csvpath/blob/main/docs/headers.md'>Read about headers here</a>.
|
|
210
210
|
|
|
211
211
|
## Equality
|
|
212
212
|
Two of the other types joined with an "=" or "==".
|
|
@@ -245,7 +245,7 @@ Says to set the `firstname` variable to the value of the first column when the f
|
|
|
245
245
|
|
|
246
246
|
Qualifiers are tokens added to variable, header, and function names. They are separated from the names and each other with `.` characters. Each qualifier causes the qualified match component to behave in a different way than it otherwise would.
|
|
247
247
|
|
|
248
|
-
<a href='docs/qualifiers.md'>Read about qualifiers here.</a>
|
|
248
|
+
<a href='https://github.com/dk107dk/csvpath/blob/main/docs/qualifiers.md'>Read about qualifiers here.</a>
|
|
249
249
|
|
|
250
250
|
<a name="examples"></a>
|
|
251
251
|
## More Examples
|
|
@@ -261,9 +261,9 @@ In the path above, the rules applied are:
|
|
|
261
261
|
|
|
262
262
|
There are more examples scattered throughout the documentation. Good places to look include:
|
|
263
263
|
|
|
264
|
-
- The individual <a href='docs/functions.md'>function descriptions</a>
|
|
265
|
-
- The <a href='tests'>unit tests</a> _(not realistic, but a good source of ideas)_
|
|
266
|
-
- A few <a href='docs/examples.md'>more real-looking examples</a>
|
|
264
|
+
- The individual <a href='https://github.com/dk107dk/csvpath/blob/main/docs/functions.md'>function descriptions</a>
|
|
265
|
+
- The <a href='https://github.com/dk107dk/csvpath/tree/main/tests'>unit tests</a> _(not realistic, but a good source of ideas)_
|
|
266
|
+
- A few <a href='https://github.com/dk107dk/csvpath/blob/main/docs/examples.md'>more real-looking examples</a>
|
|
267
267
|
|
|
268
268
|
# Not Ready For Production
|
|
269
269
|
Anything could change and performance could be better. This project is a hobby.
|
|
@@ -58,7 +58,7 @@ Two classes do all the work: CsvPath and CsvPaths. Each has only a few external
|
|
|
58
58
|
- a single .csv file or
|
|
59
59
|
- a directory of .csv files
|
|
60
60
|
|
|
61
|
-
There are several ways to set up csvpath file references. Read <a href='docs/files.md'>more about filenames</a>.
|
|
61
|
+
There are several ways to set up csvpath file references. Read <a href='https://github.com/dk107dk/csvpath/blob/main/docs/files.md'>more about filenames</a>.
|
|
62
62
|
|
|
63
63
|
This is a very basic programmatic use of CsvPath. For lots more examples, see the unit tests.
|
|
64
64
|
|
|
@@ -148,7 +148,7 @@ A string, number, or regular expression value.
|
|
|
148
148
|
|--------|---------|---------------|
|
|
149
149
|
|A value | Always true | `"a value"` |
|
|
150
150
|
|
|
151
|
-
<a href='docs/terms.md'>Read about terms here</a>.
|
|
151
|
+
<a href='https://github.com/dk107dk/csvpath/blob/main/docs/terms.md'>Read about terms here</a>.
|
|
152
152
|
|
|
153
153
|
## Function
|
|
154
154
|
A composable unit of functionality called once for every row scanned.
|
|
@@ -157,7 +157,7 @@ A composable unit of functionality called once for every row scanned.
|
|
|
157
157
|
|--------|---------|---------------|
|
|
158
158
|
|Calculated | Calculated | `count()` |
|
|
159
159
|
|
|
160
|
-
<a href='docs/functions.md'>Read about functions here</a>.
|
|
160
|
+
<a href='https://github.com/dk107dk/csvpath/blob/main/docs/functions.md'>Read about functions here</a>.
|
|
161
161
|
|
|
162
162
|
## Variable
|
|
163
163
|
A stored value that is set or retrieved once per row scanned.
|
|
@@ -166,7 +166,7 @@ A stored value that is set or retrieved once per row scanned.
|
|
|
166
166
|
|--------|---------|---------------|
|
|
167
167
|
|A value | True when set, unless `onchange`. Used alone it is an existence test. | `@firstname` |
|
|
168
168
|
|
|
169
|
-
<a href='docs/variables.md'>Read about variables here</a>.
|
|
169
|
+
<a href='https://github.com/dk107dk/csvpath/blob/main/docs/variables.md'>Read about variables here</a>.
|
|
170
170
|
|
|
171
171
|
## Header
|
|
172
172
|
|
|
@@ -176,7 +176,7 @@ A named column or a column identified by 0-based index.
|
|
|
176
176
|
|--------|---------|---------------|
|
|
177
177
|
|A value | Calculated. Used alone it is an existence test. | `#area_code` |
|
|
178
178
|
|
|
179
|
-
<a href='docs/
|
|
179
|
+
<a href='https://github.com/dk107dk/csvpath/blob/main/docs/headers.md'>Read about headers here</a>.
|
|
180
180
|
|
|
181
181
|
## Equality
|
|
182
182
|
Two of the other types joined with an "=" or "==".
|
|
@@ -215,7 +215,7 @@ Says to set the `firstname` variable to the value of the first column when the f
|
|
|
215
215
|
|
|
216
216
|
Qualifiers are tokens added to variable, header, and function names. They are separated from the names and each other with `.` characters. Each qualifier causes the qualified match component to behave in a different way than it otherwise would.
|
|
217
217
|
|
|
218
|
-
<a href='docs/qualifiers.md'>Read about qualifiers here.</a>
|
|
218
|
+
<a href='https://github.com/dk107dk/csvpath/blob/main/docs/qualifiers.md'>Read about qualifiers here.</a>
|
|
219
219
|
|
|
220
220
|
<a name="examples"></a>
|
|
221
221
|
## More Examples
|
|
@@ -231,9 +231,9 @@ In the path above, the rules applied are:
|
|
|
231
231
|
|
|
232
232
|
There are more examples scattered throughout the documentation. Good places to look include:
|
|
233
233
|
|
|
234
|
-
- The individual <a href='docs/functions.md'>function descriptions</a>
|
|
235
|
-
- The <a href='tests'>unit tests</a> _(not realistic, but a good source of ideas)_
|
|
236
|
-
- A few <a href='docs/examples.md'>more real-looking examples</a>
|
|
234
|
+
- The individual <a href='https://github.com/dk107dk/csvpath/blob/main/docs/functions.md'>function descriptions</a>
|
|
235
|
+
- The <a href='https://github.com/dk107dk/csvpath/tree/main/tests'>unit tests</a> _(not realistic, but a good source of ideas)_
|
|
236
|
+
- A few <a href='https://github.com/dk107dk/csvpath/blob/main/docs/examples.md'>more real-looking examples</a>
|
|
237
237
|
|
|
238
238
|
# Not Ready For Production
|
|
239
239
|
Anything could change and performance could be better. This project is a hobby.
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
from .function import Function, ChildrenException
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class Above(Function):
|
|
6
|
+
def to_value(self, *, skip=[]) -> Any:
|
|
7
|
+
if self in skip:
|
|
8
|
+
return True
|
|
9
|
+
if len(self.children) != 1:
|
|
10
|
+
raise ChildrenException("Above function must have 1 child")
|
|
11
|
+
if self.children[0].op != ",":
|
|
12
|
+
raise ChildrenException(
|
|
13
|
+
f"Above function must have an equality with the ',' operation, not {self.children[0].op}"
|
|
14
|
+
)
|
|
15
|
+
if self.value is None:
|
|
16
|
+
|
|
17
|
+
thischild = self.children[0].children[0]
|
|
18
|
+
abovethatchild = self.children[0].children[1]
|
|
19
|
+
|
|
20
|
+
this_is = thischild.to_value(skip=skip)
|
|
21
|
+
above_that = abovethatchild.to_value(skip=skip)
|
|
22
|
+
this = -1
|
|
23
|
+
that = -1
|
|
24
|
+
try:
|
|
25
|
+
this = float(this_is)
|
|
26
|
+
that = float(above_that)
|
|
27
|
+
except Exception:
|
|
28
|
+
raise Exception(
|
|
29
|
+
f"Above.to_value: this: {this}, a {this.__class__}, and {that}, a {that.__class__}"
|
|
30
|
+
)
|
|
31
|
+
self.value = this > that
|
|
32
|
+
return self.value
|
|
33
|
+
|
|
34
|
+
def matches(self, *, skip=[]) -> bool:
|
|
35
|
+
return self.to_value(skip=skip)
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
|
|
2
|
+
# Correlate
|
|
3
|
+
|
|
4
|
+
Correlate calculates the correlation between two columns of numbers. It takes each column as a list of floats.If a row does not have a value in either of the columns, the tuple is captured to a `correlate_gap` variable with the row number as tracking key. The result of correlate is updated for each row seen.
|
|
5
|
+
|
|
6
|
+
The correlation value is given in a tuple with related data:
|
|
7
|
+
- The line number
|
|
8
|
+
- Variance left
|
|
9
|
+
- Variance right
|
|
10
|
+
- The covariance
|
|
11
|
+
- The correlation
|
|
12
|
+
|
|
13
|
+
The running track of the column values needed for the calculations is stored as a list of float in `correlate_left` and `correlate_right`.
|
|
14
|
+
|
|
15
|
+
Correlate takes `onmatch` and can have a name qualifier to set the key names of its data. For e.g. using:
|
|
16
|
+
|
|
17
|
+
[ correlate.cor(#0, #1) ]
|
|
18
|
+
|
|
19
|
+
You would have variables:
|
|
20
|
+
- cor_left
|
|
21
|
+
- cor_right
|
|
22
|
+
- cor_gap
|
|
23
|
+
- cor
|
|
24
|
+
|
|
25
|
+
## Example
|
|
26
|
+
|
|
27
|
+
$file.csv[1*][ correlate.cor(#years, #salary) ]
|
|
28
|
+
|
|
29
|
+
This path gives a correlation of experience to salary.
|
|
30
|
+
|
|
31
|
+
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
from .function import Function, ChildrenException
|
|
3
|
+
from math import sqrt
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class Correlate(Function):
|
|
7
|
+
def _float(self, v: Any) -> float:
|
|
8
|
+
try:
|
|
9
|
+
return float(v)
|
|
10
|
+
except Exception:
|
|
11
|
+
return None
|
|
12
|
+
|
|
13
|
+
def to_value(self, *, skip=[]) -> Any:
|
|
14
|
+
if self in skip:
|
|
15
|
+
return self.value
|
|
16
|
+
if len(self.children) != 1:
|
|
17
|
+
raise ChildrenException("Correlate function must have 1 child")
|
|
18
|
+
if self.children[0].op != ",":
|
|
19
|
+
raise ChildrenException(
|
|
20
|
+
f"Correlate function must have an equality with the ',' operation, not {self.children[0].op}"
|
|
21
|
+
)
|
|
22
|
+
if self.value is None:
|
|
23
|
+
om = self.has_onmatch()
|
|
24
|
+
if not om or self.line_matches():
|
|
25
|
+
child = self.children[0]
|
|
26
|
+
left = child.left
|
|
27
|
+
right = child.right
|
|
28
|
+
leftv = left.to_value()
|
|
29
|
+
rightv = right.to_value()
|
|
30
|
+
|
|
31
|
+
lv = self._float(leftv)
|
|
32
|
+
rv = self._float(rightv)
|
|
33
|
+
|
|
34
|
+
name = self.first_non_term_qualifier("correlate")
|
|
35
|
+
if lv is None or rv is None:
|
|
36
|
+
#
|
|
37
|
+
# variables[name][lineno]=(left,right)
|
|
38
|
+
self.matcher.set_variable(
|
|
39
|
+
f"{name}_gap",
|
|
40
|
+
value=(lv, rv),
|
|
41
|
+
tracking=self.matcher.csvpath.line_number,
|
|
42
|
+
)
|
|
43
|
+
else:
|
|
44
|
+
nl = f"{name}_left"
|
|
45
|
+
nr = f"{name}_right"
|
|
46
|
+
leftlist = self.matcher.get_variable(nl)
|
|
47
|
+
rightlist = self.matcher.get_variable(nr)
|
|
48
|
+
|
|
49
|
+
if leftlist is None:
|
|
50
|
+
leftlist = []
|
|
51
|
+
leftlist.append(lv)
|
|
52
|
+
|
|
53
|
+
if rightlist is None:
|
|
54
|
+
rightlist = []
|
|
55
|
+
rightlist.append(rv)
|
|
56
|
+
|
|
57
|
+
self.matcher.set_variable(nl, value=leftlist)
|
|
58
|
+
self.matcher.set_variable(nr, value=rightlist)
|
|
59
|
+
|
|
60
|
+
if len(leftlist) == 1:
|
|
61
|
+
#
|
|
62
|
+
# not a lot to go on. skip the rest.
|
|
63
|
+
return self.value
|
|
64
|
+
elif len(leftlist) != len(rightlist):
|
|
65
|
+
#
|
|
66
|
+
# how could this happen?
|
|
67
|
+
raise Exception(
|
|
68
|
+
"Number of values to calculate correlation from must be the same"
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
mean_left = sum(leftlist) / len(leftlist)
|
|
72
|
+
mean_right = sum(rightlist) / len(rightlist)
|
|
73
|
+
|
|
74
|
+
var_left = sum((li - mean_left) ** 2 for li in leftlist)
|
|
75
|
+
var_right = sum((ri - mean_right) ** 2 for ri in rightlist)
|
|
76
|
+
|
|
77
|
+
if var_left == 0 or var_right == 0:
|
|
78
|
+
#
|
|
79
|
+
# how do we want to handle this? how likely is it?
|
|
80
|
+
print(f"skipping because 0: {var_left}, {var_right}")
|
|
81
|
+
return None
|
|
82
|
+
|
|
83
|
+
cov_lr = sum(
|
|
84
|
+
(li - mean_left) * (ri - mean_right)
|
|
85
|
+
for li, ri in zip(leftlist, rightlist)
|
|
86
|
+
)
|
|
87
|
+
cor = cov_lr / (sqrt(var_left) * sqrt(var_right))
|
|
88
|
+
#
|
|
89
|
+
# store:
|
|
90
|
+
# - line number
|
|
91
|
+
# - variance left
|
|
92
|
+
# - variance right
|
|
93
|
+
# - covariance
|
|
94
|
+
# - correlation
|
|
95
|
+
#
|
|
96
|
+
vs = (
|
|
97
|
+
self.matcher.csvpath.line_number,
|
|
98
|
+
var_left,
|
|
99
|
+
var_right,
|
|
100
|
+
cov_lr,
|
|
101
|
+
cor,
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
self.matcher.set_variable(name, value=vs)
|
|
105
|
+
|
|
106
|
+
self.value = cor
|
|
107
|
+
|
|
108
|
+
return self.value
|
|
109
|
+
|
|
110
|
+
def matches(self, *, skip=[]) -> bool:
|
|
111
|
+
self.to_value(skip=skip)
|
|
112
|
+
return True
|
|
@@ -42,6 +42,7 @@ from .mod import Mod
|
|
|
42
42
|
from .equals import Equals
|
|
43
43
|
from .strip import Strip
|
|
44
44
|
from .jinjaf import Jinjaf
|
|
45
|
+
from .correlate import Correlate
|
|
45
46
|
|
|
46
47
|
|
|
47
48
|
class UnknownFunctionException(Exception):
|
|
@@ -164,6 +165,8 @@ class FunctionFactory:
|
|
|
164
165
|
f = Strip(matcher, name, child)
|
|
165
166
|
elif name == "jinja":
|
|
166
167
|
f = Jinjaf(matcher, name, child)
|
|
168
|
+
elif name == "correlate":
|
|
169
|
+
f = Correlate(matcher, name, child)
|
|
167
170
|
else:
|
|
168
171
|
raise UnknownFunctionException(f"{name}")
|
|
169
172
|
if child:
|
|
@@ -13,12 +13,8 @@ Stops the scan immediately on a condition or by being match-activated by an encl
|
|
|
13
13
|
|
|
14
14
|
This path stops the scan when the match count hits 5.
|
|
15
15
|
|
|
16
|
-
$file.csv[*][
|
|
17
|
-
|
|
18
|
-
count(),
|
|
19
|
-
5),
|
|
20
|
-
stop())
|
|
21
|
-
]
|
|
16
|
+
$file.csv[*][
|
|
17
|
+
above(count(), 5) -> stop() ]
|
|
22
18
|
|
|
23
19
|
This path stops scanning if its match count goes above 5.
|
|
24
20
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "csvpath"
|
|
3
|
-
version = "0.0.
|
|
3
|
+
version = "0.0.451"
|
|
4
4
|
description = "A declarative language for data extraction and validation of CSV files"
|
|
5
5
|
authors = ["David Kershaw <dk107dk@hotmail.com>"]
|
|
6
6
|
readme = "README.md"
|
|
@@ -15,7 +15,7 @@ classifiers = [
|
|
|
15
15
|
"Topic :: Software Development :: Quality Assurance",
|
|
16
16
|
"Topic :: Software Development :: Testing",
|
|
17
17
|
"Topic :: Text Processing",
|
|
18
|
-
"Topic :: Utilities"
|
|
18
|
+
"Topic :: Utilities"
|
|
19
19
|
]
|
|
20
20
|
|
|
21
21
|
[tool.poetry.urls]
|
|
@@ -1,36 +0,0 @@
|
|
|
1
|
-
from typing import Any
|
|
2
|
-
from .function import Function, ChildrenException
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
class Above(Function):
|
|
6
|
-
def to_value(self, *, skip=[]) -> Any:
|
|
7
|
-
if self in skip:
|
|
8
|
-
return True
|
|
9
|
-
if len(self.children) != 1:
|
|
10
|
-
self.matcher.print(
|
|
11
|
-
f"Above.to_value: must have 1 equality child: {self.children}"
|
|
12
|
-
)
|
|
13
|
-
raise ChildrenException("Above function must have 1 child")
|
|
14
|
-
if self.children[0].op != ",":
|
|
15
|
-
raise ChildrenException(
|
|
16
|
-
f"Above function must have an equality with the ',' operation, not {self.children[0].op}"
|
|
17
|
-
)
|
|
18
|
-
thischild = self.children[0].children[0]
|
|
19
|
-
abovethatchild = self.children[0].children[1]
|
|
20
|
-
|
|
21
|
-
this_is = thischild.to_value(skip=skip)
|
|
22
|
-
above_that = abovethatchild.to_value(skip=skip)
|
|
23
|
-
this = -1
|
|
24
|
-
that = -1
|
|
25
|
-
try:
|
|
26
|
-
this = float(this_is)
|
|
27
|
-
that = float(above_that)
|
|
28
|
-
except Exception:
|
|
29
|
-
raise Exception(
|
|
30
|
-
f"Above.to_value: this: {this}, a {this.__class__}, and {that}, a {that.__class__}"
|
|
31
|
-
)
|
|
32
|
-
b = this > that
|
|
33
|
-
return b
|
|
34
|
-
|
|
35
|
-
def matches(self, *, skip=[]) -> bool:
|
|
36
|
-
return self.to_value(skip=skip)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|