csvpath 0.0.45__tar.gz → 0.0.451__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. {csvpath-0.0.45 → csvpath-0.0.451}/PKG-INFO +10 -10
  2. {csvpath-0.0.45 → csvpath-0.0.451}/README.md +9 -9
  3. csvpath-0.0.451/csvpath/matching/functions/above.py +35 -0
  4. csvpath-0.0.451/csvpath/matching/functions/correlate.md +31 -0
  5. csvpath-0.0.451/csvpath/matching/functions/correlate.py +112 -0
  6. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/exists.py +1 -1
  7. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/function_factory.py +3 -0
  8. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/stop.md +2 -6
  9. {csvpath-0.0.45 → csvpath-0.0.451}/pyproject.toml +2 -2
  10. csvpath-0.0.45/csvpath/matching/functions/above.py +0 -36
  11. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/__init__.py +0 -0
  12. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/csvpath.py +0 -0
  13. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/csvpaths.py +0 -0
  14. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/exceptions.py +0 -0
  15. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/__init__.py +0 -0
  16. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/expression_encoder.py +0 -0
  17. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/expression_utility.py +0 -0
  18. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/__init__.py +0 -0
  19. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/add.py +0 -0
  20. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/any.md +0 -0
  21. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/any.py +0 -0
  22. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/average.md +0 -0
  23. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/below.py +0 -0
  24. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/column.py +0 -0
  25. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/concat.py +0 -0
  26. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/count.md +0 -0
  27. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/count.py +0 -0
  28. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/count_lines.py +0 -0
  29. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/count_scans.py +0 -0
  30. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/divide.py +0 -0
  31. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/end.md +0 -0
  32. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/end.py +0 -0
  33. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/equals.py +0 -0
  34. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/every.md +0 -0
  35. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/every.py +0 -0
  36. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/first.md +0 -0
  37. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/first.py +0 -0
  38. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/function.py +0 -0
  39. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/header.md +0 -0
  40. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/header.py +0 -0
  41. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/in.md +0 -0
  42. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/increment.md +0 -0
  43. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/increment.py +0 -0
  44. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/inf.py +0 -0
  45. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/jinja.md +0 -0
  46. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/jinjaf.py +0 -0
  47. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/last.md +0 -0
  48. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/last.py +0 -0
  49. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/length.py +0 -0
  50. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/lower.py +0 -0
  51. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/max.md +0 -0
  52. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/minf.py +0 -0
  53. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/mod.py +0 -0
  54. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/multiply.py +0 -0
  55. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/no.md +0 -0
  56. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/no.py +0 -0
  57. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/nonef.py +0 -0
  58. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/notf.py +0 -0
  59. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/now.md +0 -0
  60. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/now.py +0 -0
  61. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/orf.py +0 -0
  62. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/percent.py +0 -0
  63. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/print.md +0 -0
  64. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/printf.py +0 -0
  65. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/random.py +0 -0
  66. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/regex.py +0 -0
  67. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/stop.py +0 -0
  68. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/strip.py +0 -0
  69. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/substring.py +0 -0
  70. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/subtract.md +0 -0
  71. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/subtract.py +0 -0
  72. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/tally.md +0 -0
  73. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/tally.py +0 -0
  74. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/upper.py +0 -0
  75. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/variable.md +0 -0
  76. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/variable.py +0 -0
  77. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/when._ +0 -0
  78. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/functions/yes.py +0 -0
  79. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/matcher.py +0 -0
  80. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/matching_lexer.py +0 -0
  81. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/parser.out +0 -0
  82. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/parsetab.py +0 -0
  83. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/productions/__init__.py +0 -0
  84. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/productions/equality.py +0 -0
  85. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/productions/expression.py +0 -0
  86. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/productions/header.py +0 -0
  87. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/productions/matchable.py +0 -0
  88. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/productions/qualified.py +0 -0
  89. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/productions/term.py +0 -0
  90. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/matching/productions/variable.py +0 -0
  91. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/parser_utility.py +0 -0
  92. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/scanning/__init__.py +0 -0
  93. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/scanning/parser.out +0 -0
  94. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/scanning/parsetab.py +0 -0
  95. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/scanning/scanner.py +0 -0
  96. {csvpath-0.0.45 → csvpath-0.0.451}/csvpath/scanning/scanning_lexer.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: csvpath
3
- Version: 0.0.45
3
+ Version: 0.0.451
4
4
  Summary: A declarative language for data extraction and validation of CSV files
5
5
  Author: David Kershaw
6
6
  Author-email: dk107dk@hotmail.com
@@ -88,7 +88,7 @@ Two classes do all the work: CsvPath and CsvPaths. Each has only a few external
88
88
  - a single .csv file or
89
89
  - a directory of .csv files
90
90
 
91
- There are several ways to set up csvpath file references. Read <a href='docs/files.md'>more about filenames</a>.
91
+ There are several ways to set up csvpath file references. Read <a href='https://github.com/dk107dk/csvpath/blob/main/docs/files.md'>more about filenames</a>.
92
92
 
93
93
  This is a very basic programmatic use of CsvPath. For lots more examples, see the unit tests.
94
94
 
@@ -178,7 +178,7 @@ A string, number, or regular expression value.
178
178
  |--------|---------|---------------|
179
179
  |A value | Always true | `"a value"` |
180
180
 
181
- <a href='docs/terms.md'>Read about terms here</a>.
181
+ <a href='https://github.com/dk107dk/csvpath/blob/main/docs/terms.md'>Read about terms here</a>.
182
182
 
183
183
  ## Function
184
184
  A composable unit of functionality called once for every row scanned.
@@ -187,7 +187,7 @@ A composable unit of functionality called once for every row scanned.
187
187
  |--------|---------|---------------|
188
188
  |Calculated | Calculated | `count()` |
189
189
 
190
- <a href='docs/functions.md'>Read about functions here</a>.
190
+ <a href='https://github.com/dk107dk/csvpath/blob/main/docs/functions.md'>Read about functions here</a>.
191
191
 
192
192
  ## Variable
193
193
  A stored value that is set or retrieved once per row scanned.
@@ -196,7 +196,7 @@ A stored value that is set or retrieved once per row scanned.
196
196
  |--------|---------|---------------|
197
197
  |A value | True when set, unless `onchange`. Used alone it is an existence test. | `@firstname` |
198
198
 
199
- <a href='docs/variables.md'>Read about variables here</a>.
199
+ <a href='https://github.com/dk107dk/csvpath/blob/main/docs/variables.md'>Read about variables here</a>.
200
200
 
201
201
  ## Header
202
202
 
@@ -206,7 +206,7 @@ A named column or a column identified by 0-based index.
206
206
  |--------|---------|---------------|
207
207
  |A value | Calculated. Used alone it is an existence test. | `#area_code` |
208
208
 
209
- <a href='docs/variables.md'>Read about headers here</a>.
209
+ <a href='https://github.com/dk107dk/csvpath/blob/main/docs/headers.md'>Read about headers here</a>.
210
210
 
211
211
  ## Equality
212
212
  Two of the other types joined with an "=" or "==".
@@ -245,7 +245,7 @@ Says to set the `firstname` variable to the value of the first column when the f
245
245
 
246
246
  Qualifiers are tokens added to variable, header, and function names. They are separated from the names and each other with `.` characters. Each qualifier causes the qualified match component to behave in a different way than it otherwise would.
247
247
 
248
- <a href='docs/qualifiers.md'>Read about qualifiers here.</a>
248
+ <a href='https://github.com/dk107dk/csvpath/blob/main/docs/qualifiers.md'>Read about qualifiers here.</a>
249
249
 
250
250
  <a name="examples"></a>
251
251
  ## More Examples
@@ -261,9 +261,9 @@ In the path above, the rules applied are:
261
261
 
262
262
  There are more examples scattered throughout the documentation. Good places to look include:
263
263
 
264
- - The individual <a href='docs/functions.md'>function descriptions</a>
265
- - The <a href='tests'>unit tests</a> _(not realistic, but a good source of ideas)_
266
- - A few <a href='docs/examples.md'>more real-looking examples</a>
264
+ - The individual <a href='https://github.com/dk107dk/csvpath/blob/main/docs/functions.md'>function descriptions</a>
265
+ - The <a href='https://github.com/dk107dk/csvpath/tree/main/tests'>unit tests</a> _(not realistic, but a good source of ideas)_
266
+ - A few <a href='https://github.com/dk107dk/csvpath/blob/main/docs/examples.md'>more real-looking examples</a>
267
267
 
268
268
  # Not Ready For Production
269
269
  Anything could change and performance could be better. This project is a hobby.
@@ -58,7 +58,7 @@ Two classes do all the work: CsvPath and CsvPaths. Each has only a few external
58
58
  - a single .csv file or
59
59
  - a directory of .csv files
60
60
 
61
- There are several ways to set up csvpath file references. Read <a href='docs/files.md'>more about filenames</a>.
61
+ There are several ways to set up csvpath file references. Read <a href='https://github.com/dk107dk/csvpath/blob/main/docs/files.md'>more about filenames</a>.
62
62
 
63
63
  This is a very basic programmatic use of CsvPath. For lots more examples, see the unit tests.
64
64
 
@@ -148,7 +148,7 @@ A string, number, or regular expression value.
148
148
  |--------|---------|---------------|
149
149
  |A value | Always true | `"a value"` |
150
150
 
151
- <a href='docs/terms.md'>Read about terms here</a>.
151
+ <a href='https://github.com/dk107dk/csvpath/blob/main/docs/terms.md'>Read about terms here</a>.
152
152
 
153
153
  ## Function
154
154
  A composable unit of functionality called once for every row scanned.
@@ -157,7 +157,7 @@ A composable unit of functionality called once for every row scanned.
157
157
  |--------|---------|---------------|
158
158
  |Calculated | Calculated | `count()` |
159
159
 
160
- <a href='docs/functions.md'>Read about functions here</a>.
160
+ <a href='https://github.com/dk107dk/csvpath/blob/main/docs/functions.md'>Read about functions here</a>.
161
161
 
162
162
  ## Variable
163
163
  A stored value that is set or retrieved once per row scanned.
@@ -166,7 +166,7 @@ A stored value that is set or retrieved once per row scanned.
166
166
  |--------|---------|---------------|
167
167
  |A value | True when set, unless `onchange`. Used alone it is an existence test. | `@firstname` |
168
168
 
169
- <a href='docs/variables.md'>Read about variables here</a>.
169
+ <a href='https://github.com/dk107dk/csvpath/blob/main/docs/variables.md'>Read about variables here</a>.
170
170
 
171
171
  ## Header
172
172
 
@@ -176,7 +176,7 @@ A named column or a column identified by 0-based index.
176
176
  |--------|---------|---------------|
177
177
  |A value | Calculated. Used alone it is an existence test. | `#area_code` |
178
178
 
179
- <a href='docs/variables.md'>Read about headers here</a>.
179
+ <a href='https://github.com/dk107dk/csvpath/blob/main/docs/headers.md'>Read about headers here</a>.
180
180
 
181
181
  ## Equality
182
182
  Two of the other types joined with an "=" or "==".
@@ -215,7 +215,7 @@ Says to set the `firstname` variable to the value of the first column when the f
215
215
 
216
216
  Qualifiers are tokens added to variable, header, and function names. They are separated from the names and each other with `.` characters. Each qualifier causes the qualified match component to behave in a different way than it otherwise would.
217
217
 
218
- <a href='docs/qualifiers.md'>Read about qualifiers here.</a>
218
+ <a href='https://github.com/dk107dk/csvpath/blob/main/docs/qualifiers.md'>Read about qualifiers here.</a>
219
219
 
220
220
  <a name="examples"></a>
221
221
  ## More Examples
@@ -231,9 +231,9 @@ In the path above, the rules applied are:
231
231
 
232
232
  There are more examples scattered throughout the documentation. Good places to look include:
233
233
 
234
- - The individual <a href='docs/functions.md'>function descriptions</a>
235
- - The <a href='tests'>unit tests</a> _(not realistic, but a good source of ideas)_
236
- - A few <a href='docs/examples.md'>more real-looking examples</a>
234
+ - The individual <a href='https://github.com/dk107dk/csvpath/blob/main/docs/functions.md'>function descriptions</a>
235
+ - The <a href='https://github.com/dk107dk/csvpath/tree/main/tests'>unit tests</a> _(not realistic, but a good source of ideas)_
236
+ - A few <a href='https://github.com/dk107dk/csvpath/blob/main/docs/examples.md'>more real-looking examples</a>
237
237
 
238
238
  # Not Ready For Production
239
239
  Anything could change and performance could be better. This project is a hobby.
@@ -0,0 +1,35 @@
1
+ from typing import Any
2
+ from .function import Function, ChildrenException
3
+
4
+
5
+ class Above(Function):
6
+ def to_value(self, *, skip=[]) -> Any:
7
+ if self in skip:
8
+ return True
9
+ if len(self.children) != 1:
10
+ raise ChildrenException("Above function must have 1 child")
11
+ if self.children[0].op != ",":
12
+ raise ChildrenException(
13
+ f"Above function must have an equality with the ',' operation, not {self.children[0].op}"
14
+ )
15
+ if self.value is None:
16
+
17
+ thischild = self.children[0].children[0]
18
+ abovethatchild = self.children[0].children[1]
19
+
20
+ this_is = thischild.to_value(skip=skip)
21
+ above_that = abovethatchild.to_value(skip=skip)
22
+ this = -1
23
+ that = -1
24
+ try:
25
+ this = float(this_is)
26
+ that = float(above_that)
27
+ except Exception:
28
+ raise Exception(
29
+ f"Above.to_value: this: {this}, a {this.__class__}, and {that}, a {that.__class__}"
30
+ )
31
+ self.value = this > that
32
+ return self.value
33
+
34
+ def matches(self, *, skip=[]) -> bool:
35
+ return self.to_value(skip=skip)
@@ -0,0 +1,31 @@
1
+
2
+ # Correlate
3
+
4
+ Correlate calculates the correlation between two columns of numbers. It takes each column as a list of floats.If a row does not have a value in either of the columns, the tuple is captured to a `correlate_gap` variable with the row number as tracking key. The result of correlate is updated for each row seen.
5
+
6
+ The correlation value is given in a tuple with related data:
7
+ - The line number
8
+ - Variance left
9
+ - Variance right
10
+ - The covariance
11
+ - The correlation
12
+
13
+ The running track of the column values needed for the calculations is stored as a list of float in `correlate_left` and `correlate_right`.
14
+
15
+ Correlate takes `onmatch` and can have a name qualifier to set the key names of its data. For e.g. using:
16
+
17
+ [ correlate.cor(#0, #1) ]
18
+
19
+ You would have variables:
20
+ - cor_left
21
+ - cor_right
22
+ - cor_gap
23
+ - cor
24
+
25
+ ## Example
26
+
27
+ $file.csv[1*][ correlate.cor(#years, #salary) ]
28
+
29
+ This path gives a correlation of experience to salary.
30
+
31
+
@@ -0,0 +1,112 @@
1
+ from typing import Any
2
+ from .function import Function, ChildrenException
3
+ from math import sqrt
4
+
5
+
6
+ class Correlate(Function):
7
+ def _float(self, v: Any) -> float:
8
+ try:
9
+ return float(v)
10
+ except Exception:
11
+ return None
12
+
13
+ def to_value(self, *, skip=[]) -> Any:
14
+ if self in skip:
15
+ return self.value
16
+ if len(self.children) != 1:
17
+ raise ChildrenException("Correlate function must have 1 child")
18
+ if self.children[0].op != ",":
19
+ raise ChildrenException(
20
+ f"Correlate function must have an equality with the ',' operation, not {self.children[0].op}"
21
+ )
22
+ if self.value is None:
23
+ om = self.has_onmatch()
24
+ if not om or self.line_matches():
25
+ child = self.children[0]
26
+ left = child.left
27
+ right = child.right
28
+ leftv = left.to_value()
29
+ rightv = right.to_value()
30
+
31
+ lv = self._float(leftv)
32
+ rv = self._float(rightv)
33
+
34
+ name = self.first_non_term_qualifier("correlate")
35
+ if lv is None or rv is None:
36
+ #
37
+ # variables[name][lineno]=(left,right)
38
+ self.matcher.set_variable(
39
+ f"{name}_gap",
40
+ value=(lv, rv),
41
+ tracking=self.matcher.csvpath.line_number,
42
+ )
43
+ else:
44
+ nl = f"{name}_left"
45
+ nr = f"{name}_right"
46
+ leftlist = self.matcher.get_variable(nl)
47
+ rightlist = self.matcher.get_variable(nr)
48
+
49
+ if leftlist is None:
50
+ leftlist = []
51
+ leftlist.append(lv)
52
+
53
+ if rightlist is None:
54
+ rightlist = []
55
+ rightlist.append(rv)
56
+
57
+ self.matcher.set_variable(nl, value=leftlist)
58
+ self.matcher.set_variable(nr, value=rightlist)
59
+
60
+ if len(leftlist) == 1:
61
+ #
62
+ # not a lot to go on. skip the rest.
63
+ return self.value
64
+ elif len(leftlist) != len(rightlist):
65
+ #
66
+ # how could this happen?
67
+ raise Exception(
68
+ "Number of values to calculate correlation from must be the same"
69
+ )
70
+
71
+ mean_left = sum(leftlist) / len(leftlist)
72
+ mean_right = sum(rightlist) / len(rightlist)
73
+
74
+ var_left = sum((li - mean_left) ** 2 for li in leftlist)
75
+ var_right = sum((ri - mean_right) ** 2 for ri in rightlist)
76
+
77
+ if var_left == 0 or var_right == 0:
78
+ #
79
+ # how do we want to handle this? how likely is it?
80
+ print(f"skipping because 0: {var_left}, {var_right}")
81
+ return None
82
+
83
+ cov_lr = sum(
84
+ (li - mean_left) * (ri - mean_right)
85
+ for li, ri in zip(leftlist, rightlist)
86
+ )
87
+ cor = cov_lr / (sqrt(var_left) * sqrt(var_right))
88
+ #
89
+ # store:
90
+ # - line number
91
+ # - variance left
92
+ # - variance right
93
+ # - covariance
94
+ # - correlation
95
+ #
96
+ vs = (
97
+ self.matcher.csvpath.line_number,
98
+ var_left,
99
+ var_right,
100
+ cov_lr,
101
+ cor,
102
+ )
103
+
104
+ self.matcher.set_variable(name, value=vs)
105
+
106
+ self.value = cor
107
+
108
+ return self.value
109
+
110
+ def matches(self, *, skip=[]) -> bool:
111
+ self.to_value(skip=skip)
112
+ return True
@@ -27,7 +27,7 @@ class Exists(Function):
27
27
  self.match = v
28
28
  except Exception:
29
29
  self.match = False
30
- elif v is not None and v.strip() != "":
30
+ elif v is not None and f"{v}".strip() != "":
31
31
  self.match = True
32
32
  else:
33
33
  self.match = False
@@ -42,6 +42,7 @@ from .mod import Mod
42
42
  from .equals import Equals
43
43
  from .strip import Strip
44
44
  from .jinjaf import Jinjaf
45
+ from .correlate import Correlate
45
46
 
46
47
 
47
48
  class UnknownFunctionException(Exception):
@@ -164,6 +165,8 @@ class FunctionFactory:
164
165
  f = Strip(matcher, name, child)
165
166
  elif name == "jinja":
166
167
  f = Jinjaf(matcher, name, child)
168
+ elif name == "correlate":
169
+ f = Correlate(matcher, name, child)
167
170
  else:
168
171
  raise UnknownFunctionException(f"{name}")
169
172
  if child:
@@ -13,12 +13,8 @@ Stops the scan immediately on a condition or by being match-activated by an encl
13
13
 
14
14
  This path stops the scan when the match count hits 5.
15
15
 
16
- $file.csv[*][ when(
17
- above(
18
- count(),
19
- 5),
20
- stop())
21
- ]
16
+ $file.csv[*][
17
+ above(count(), 5) -> stop() ]
22
18
 
23
19
  This path stops scanning if its match count goes above 5.
24
20
 
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "csvpath"
3
- version = "0.0.45"
3
+ version = "0.0.451"
4
4
  description = "A declarative language for data extraction and validation of CSV files"
5
5
  authors = ["David Kershaw <dk107dk@hotmail.com>"]
6
6
  readme = "README.md"
@@ -15,7 +15,7 @@ classifiers = [
15
15
  "Topic :: Software Development :: Quality Assurance",
16
16
  "Topic :: Software Development :: Testing",
17
17
  "Topic :: Text Processing",
18
- "Topic :: Utilities",
18
+ "Topic :: Utilities"
19
19
  ]
20
20
 
21
21
  [tool.poetry.urls]
@@ -1,36 +0,0 @@
1
- from typing import Any
2
- from .function import Function, ChildrenException
3
-
4
-
5
- class Above(Function):
6
- def to_value(self, *, skip=[]) -> Any:
7
- if self in skip:
8
- return True
9
- if len(self.children) != 1:
10
- self.matcher.print(
11
- f"Above.to_value: must have 1 equality child: {self.children}"
12
- )
13
- raise ChildrenException("Above function must have 1 child")
14
- if self.children[0].op != ",":
15
- raise ChildrenException(
16
- f"Above function must have an equality with the ',' operation, not {self.children[0].op}"
17
- )
18
- thischild = self.children[0].children[0]
19
- abovethatchild = self.children[0].children[1]
20
-
21
- this_is = thischild.to_value(skip=skip)
22
- above_that = abovethatchild.to_value(skip=skip)
23
- this = -1
24
- that = -1
25
- try:
26
- this = float(this_is)
27
- that = float(above_that)
28
- except Exception:
29
- raise Exception(
30
- f"Above.to_value: this: {this}, a {this.__class__}, and {that}, a {that.__class__}"
31
- )
32
- b = this > that
33
- return b
34
-
35
- def matches(self, *, skip=[]) -> bool:
36
- return self.to_value(skip=skip)
File without changes
File without changes
File without changes