csvpath 0.0.2__tar.gz → 0.0.21__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. csvpath-0.0.2/README.md → csvpath-0.0.21/PKG-INFO +41 -11
  2. csvpath-0.0.2/PKG-INFO → csvpath-0.0.21/README.md +24 -26
  3. {csvpath-0.0.2 → csvpath-0.0.21}/csvpath/csvpath.py +77 -88
  4. csvpath-0.0.21/csvpath/matching/functions/count.md +28 -0
  5. {csvpath-0.0.2 → csvpath-0.0.21}/csvpath/matching/functions/count.py +11 -1
  6. csvpath-0.0.21/csvpath/matching/functions/every.md +58 -0
  7. csvpath-0.0.21/csvpath/matching/functions/every.py +47 -0
  8. csvpath-0.0.21/csvpath/matching/functions/first.md +23 -0
  9. {csvpath-0.0.2 → csvpath-0.0.21}/csvpath/matching/functions/first.py +10 -4
  10. {csvpath-0.0.2 → csvpath-0.0.21}/csvpath/matching/functions/function.py +4 -0
  11. {csvpath-0.0.2 → csvpath-0.0.21}/csvpath/matching/functions/function_factory.py +21 -0
  12. csvpath-0.0.21/csvpath/matching/functions/in.md +25 -0
  13. csvpath-0.0.21/csvpath/matching/functions/no.md +13 -0
  14. csvpath-0.0.21/csvpath/matching/functions/now.md +13 -0
  15. {csvpath-0.0.2 → csvpath-0.0.21}/csvpath/matching/functions/now.py +0 -1
  16. csvpath-0.0.21/csvpath/matching/functions/print.md +37 -0
  17. csvpath-0.0.21/csvpath/matching/functions/printf.py +145 -0
  18. csvpath-0.0.21/csvpath/matching/functions/tally.md +29 -0
  19. {csvpath-0.0.2 → csvpath-0.0.21}/csvpath/matching/functions/tally.py +3 -3
  20. csvpath-0.0.21/csvpath/matching/functions/yes.py +10 -0
  21. {csvpath-0.0.2 → csvpath-0.0.21}/csvpath/matching/matcher.py +45 -10
  22. {csvpath-0.0.2 → csvpath-0.0.21}/csvpath/matching/matching_lexer.py +1 -1
  23. {csvpath-0.0.2 → csvpath-0.0.21}/csvpath/matching/productions/equality.py +15 -12
  24. {csvpath-0.0.2 → csvpath-0.0.21}/csvpath/matching/productions/expression.py +4 -0
  25. csvpath-0.0.21/csvpath/matching/productions/header.py +39 -0
  26. {csvpath-0.0.2 → csvpath-0.0.21}/csvpath/matching/productions/matchable.py +8 -0
  27. {csvpath-0.0.2 → csvpath-0.0.21}/csvpath/matching/productions/term.py +3 -0
  28. csvpath-0.0.21/csvpath/matching/productions/variable.py +33 -0
  29. {csvpath-0.0.2 → csvpath-0.0.21}/csvpath/scanning/scanner.py +32 -0
  30. {csvpath-0.0.2 → csvpath-0.0.21}/pyproject.toml +3 -1
  31. csvpath-0.0.2/csvpath/matching/functions/every.py +0 -33
  32. csvpath-0.0.2/csvpath/matching/productions/header.py +0 -30
  33. csvpath-0.0.2/csvpath/matching/productions/variable.py +0 -15
  34. {csvpath-0.0.2 → csvpath-0.0.21}/csvpath/__init__.py +0 -0
  35. {csvpath-0.0.2 → csvpath-0.0.21}/csvpath/matching/__init__.py +0 -0
  36. {csvpath-0.0.2 → csvpath-0.0.21}/csvpath/matching/expression_encoder.py +0 -0
  37. {csvpath-0.0.2 → csvpath-0.0.21}/csvpath/matching/expression_math.py +0 -0
  38. {csvpath-0.0.2 → csvpath-0.0.21}/csvpath/matching/expression_utility.py +0 -0
  39. {csvpath-0.0.2 → csvpath-0.0.21}/csvpath/matching/functions/above.py +0 -0
  40. {csvpath-0.0.2 → csvpath-0.0.21}/csvpath/matching/functions/add.py +0 -0
  41. {csvpath-0.0.2 → csvpath-0.0.21}/csvpath/matching/functions/below.py +0 -0
  42. {csvpath-0.0.2 → csvpath-0.0.21}/csvpath/matching/functions/concat.py +0 -0
  43. {csvpath-0.0.2 → csvpath-0.0.21}/csvpath/matching/functions/count_lines.py +0 -0
  44. {csvpath-0.0.2 → csvpath-0.0.21}/csvpath/matching/functions/count_scans.py +0 -0
  45. {csvpath-0.0.2 → csvpath-0.0.21}/csvpath/matching/functions/divide.py +0 -0
  46. {csvpath-0.0.2 → csvpath-0.0.21}/csvpath/matching/functions/end.py +0 -0
  47. {csvpath-0.0.2 → csvpath-0.0.21}/csvpath/matching/functions/inf.py +0 -0
  48. {csvpath-0.0.2 → csvpath-0.0.21}/csvpath/matching/functions/is_instance.py +0 -0
  49. {csvpath-0.0.2 → csvpath-0.0.21}/csvpath/matching/functions/length.py +0 -0
  50. {csvpath-0.0.2 → csvpath-0.0.21}/csvpath/matching/functions/lower.py +0 -0
  51. {csvpath-0.0.2 → csvpath-0.0.21}/csvpath/matching/functions/minf.py +0 -0
  52. {csvpath-0.0.2 → csvpath-0.0.21}/csvpath/matching/functions/multiply.py +0 -0
  53. {csvpath-0.0.2 → csvpath-0.0.21}/csvpath/matching/functions/no.py +0 -0
  54. {csvpath-0.0.2 → csvpath-0.0.21}/csvpath/matching/functions/notf.py +0 -0
  55. {csvpath-0.0.2 → csvpath-0.0.21}/csvpath/matching/functions/orf.py +0 -0
  56. {csvpath-0.0.2 → csvpath-0.0.21}/csvpath/matching/functions/percent.py +0 -0
  57. {csvpath-0.0.2 → csvpath-0.0.21}/csvpath/matching/functions/random.py +0 -0
  58. {csvpath-0.0.2 → csvpath-0.0.21}/csvpath/matching/functions/regex.py +0 -0
  59. {csvpath-0.0.2 → csvpath-0.0.21}/csvpath/matching/functions/subtract.py +0 -0
  60. {csvpath-0.0.2 → csvpath-0.0.21}/csvpath/matching/functions/upper.py +0 -0
  61. {csvpath-0.0.2 → csvpath-0.0.21}/csvpath/matching/parser.out +0 -0
  62. {csvpath-0.0.2 → csvpath-0.0.21}/csvpath/matching/parsetab.py +0 -0
  63. {csvpath-0.0.2 → csvpath-0.0.21}/csvpath/parser_utility.py +0 -0
  64. {csvpath-0.0.2 → csvpath-0.0.21}/csvpath/scanning/__init__.py +0 -0
  65. {csvpath-0.0.2 → csvpath-0.0.21}/csvpath/scanning/parser.out +0 -0
  66. {csvpath-0.0.2 → csvpath-0.0.21}/csvpath/scanning/parsetab.py +0 -0
  67. {csvpath-0.0.2 → csvpath-0.0.21}/csvpath/scanning/scanning_lexer.py +0 -0
@@ -1,3 +1,19 @@
1
+ Metadata-Version: 2.1
2
+ Name: csvpath
3
+ Version: 0.0.21
4
+ Summary:
5
+ Author: David Kershaw
6
+ Author-email: dk107dk@hotmail.com
7
+ Requires-Python: >=3.12,<4.0
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Programming Language :: Python :: 3.12
10
+ Requires-Dist: pandas (>=2.2.2,<3.0.0)
11
+ Requires-Dist: ply (>=3.11,<4.0)
12
+ Requires-Dist: polars (>=1.1.0,<2.0.0)
13
+ Requires-Dist: pytest (>=8.2.2,<9.0.0)
14
+ Requires-Dist: python-dateutil (>=2.9.0.post0,<3.0.0)
15
+ Description-Content-Type: text/markdown
16
+
1
17
 
2
18
  # CsvPath
3
19
 
@@ -104,15 +120,26 @@ take a specific or unlimited number of types as arguments. </td>
104
120
  </tr>
105
121
  <table>
106
122
 
107
- [ #common_name #0=="field" @tail=end() not(in(@tail, 'short|medium')) ]
123
+ ## Example
124
+ [ #common_name #0=="field" @tail.onmatch=end() not(in(@tail, 'short|medium')) ]
108
125
 
109
126
  In the path above, the rules applied are:
110
127
  - `#common_name` indicates a header named "common_name". Headers are the values in the 0th line. This component of the match is an existence test.
111
128
  - `#2` means the 3rd column, counting from 0
112
129
  - Functions and column references are ANDed together
113
- - `@tail` creates a variable named "tail" and sets it to the value of the last column
130
+ - `@tail` creates a variable named "tail" and sets it to the value of the last column if all else matches
114
131
  - Functions can contain functions, equality tests, and/or literals
115
132
 
133
+ Variables are always set unless they are flagged with `.onmatch`. That means:
134
+
135
+ $file.csv[*][ @imcounting.onmatch = count_lines() no()]
136
+
137
+ will never set `imcounting`, but:
138
+
139
+ $file.csv[*][ @imcounting = count_lines() no()]
140
+
141
+ will always set it.
142
+
116
143
  Most of the work of matching is done in functions. The match functions are:
117
144
 
118
145
  | Function | What it does |Done|
@@ -122,15 +149,15 @@ Most of the work of matching is done in functions. The match functions are:
122
149
  | average(number, type) | returns the average up to current "line", "scan", "match" | X |
123
150
  | before(value) | finds things before a date, number, string | X |
124
151
  | concat(value, value) | counts the number of matches | X |
125
- | count() | counts the number of matches | X |
126
- | count(value) | count matches of value | X |
152
+ | [count()](csvpath/matching/functions/count.md) | counts the number of matches | X |
153
+ | [count(value)](csvpath/matching/functions/count.md) | count matches of value | X |
127
154
  | count_lines() | count lines to this point in the file | X |
128
155
  | count_scans() | count lines we checked for match | X |
129
156
  | divide(value, value, ...) | divides numbers | X |
130
157
  | end() | returns the value of the last column | X |
131
- | every(value, number) | match every Nth time a value is seen | X |
132
- | first(value) | match the first occurrence and capture line | X |
133
- | in(value, list) | match in a pipe-delimited list | X |
158
+ | [every(value, number)](csvpath/matching/functions/every.md) | match every Nth time a value is seen | X |
159
+ | [first(value, value, ...)](csvpath/matching/functions/first.md) | match the first occurrence and capture line | X |
160
+ | [in(value, list)](csvpath/matching/functions/in.md) | match in a pipe-delimited list | X |
134
161
  | increment(value, n) | increments a variable by n each time seen | |
135
162
  | isinstance(value, typestr) | tests for "int","float","complex","bool","usd" | X |
136
163
  | length(value) | returns the length of the value | X |
@@ -139,21 +166,24 @@ Most of the work of matching is done in functions. The match functions are:
139
166
  | median(value, type) | median value up to current "line", "scan", "match" | X |
140
167
  | min(value, type) | smallest value seen up to current "line", "scan", "match" | X |
141
168
  | multiply(value, value, ...) | multiplies numbers | X |
142
- | no() | always false | X |
169
+ | [no()](csvpath/matching/functions/no.md) | always false | X |
143
170
  | not(value) | negates a value | X |
144
- | now(format) | a datetime, optionally formatted | X |
171
+ | [now(format)](csvpath/matching/functions/now.md) | a datetime, optionally formatted | X |
145
172
  | or(value, value,...) | match any one | X |
146
173
  | percent(type) | % of total lines for "scan", "match", "line" | X |
174
+ | [print(value, str)](csvpath/matching/functions/print.md) | when matches prints the interpolated string | X |
147
175
  | random(list) | pick from a list | |
148
176
  | random(starting, ending) | generates a random int from starting to ending | X |
149
177
  | regex(regex-string) | match on a regular expression | X |
150
178
  | subtract(value, value, ...) | subtracts numbers | X |
151
- | tally(value, value, ...) | counts times values are seen, including as a set | X |
179
+ | [tally(value, value, ...)](csvpath/matching/functions/tally.md) | counts times values are seen, including as a set | X |
152
180
  | then(y,m,d,hh,mm,ss,format) | a datetime, optionally formatted | |
153
181
  | upper(value) | makes value uppercase | X |
182
+ | yes() | always true | X |
154
183
 
155
184
  # Not Ready For Production
156
- Anything could change. This project is a hobby.
185
+ Anything could change and performance could be better. This project is a hobby.
186
+
157
187
 
158
188
 
159
189
 
@@ -1,17 +1,3 @@
1
- Metadata-Version: 2.1
2
- Name: csvpath
3
- Version: 0.0.2
4
- Summary:
5
- Author: David Kershaw
6
- Author-email: dk107dk@hotmail.com
7
- Requires-Python: >=3.12,<4.0
8
- Classifier: Programming Language :: Python :: 3
9
- Classifier: Programming Language :: Python :: 3.12
10
- Requires-Dist: ply (>=3.11,<4.0)
11
- Requires-Dist: pytest (>=8.2.2,<9.0.0)
12
- Requires-Dist: python-dateutil (>=2.9.0.post0,<3.0.0)
13
- Description-Content-Type: text/markdown
14
-
15
1
 
16
2
  # CsvPath
17
3
 
@@ -118,15 +104,26 @@ take a specific or unlimited number of types as arguments. </td>
118
104
  </tr>
119
105
  <table>
120
106
 
121
- [ #common_name #0=="field" @tail=end() not(in(@tail, 'short|medium')) ]
107
+ ## Example
108
+ [ #common_name #0=="field" @tail.onmatch=end() not(in(@tail, 'short|medium')) ]
122
109
 
123
110
  In the path above, the rules applied are:
124
111
  - `#common_name` indicates a header named "common_name". Headers are the values in the 0th line. This component of the match is an existence test.
125
112
  - `#2` means the 3rd column, counting from 0
126
113
  - Functions and column references are ANDed together
127
- - `@tail` creates a variable named "tail" and sets it to the value of the last column
114
+ - `@tail` creates a variable named "tail" and sets it to the value of the last column if all else matches
128
115
  - Functions can contain functions, equality tests, and/or literals
129
116
 
117
+ Variables are always set unless they are flagged with `.onmatch`. That means:
118
+
119
+ $file.csv[*][ @imcounting.onmatch = count_lines() no()]
120
+
121
+ will never set `imcounting`, but:
122
+
123
+ $file.csv[*][ @imcounting = count_lines() no()]
124
+
125
+ will always set it.
126
+
130
127
  Most of the work of matching is done in functions. The match functions are:
131
128
 
132
129
  | Function | What it does |Done|
@@ -136,15 +133,15 @@ Most of the work of matching is done in functions. The match functions are:
136
133
  | average(number, type) | returns the average up to current "line", "scan", "match" | X |
137
134
  | before(value) | finds things before a date, number, string | X |
138
135
  | concat(value, value) | counts the number of matches | X |
139
- | count() | counts the number of matches | X |
140
- | count(value) | count matches of value | X |
136
+ | [count()](csvpath/matching/functions/count.md) | counts the number of matches | X |
137
+ | [count(value)](csvpath/matching/functions/count.md) | count matches of value | X |
141
138
  | count_lines() | count lines to this point in the file | X |
142
139
  | count_scans() | count lines we checked for match | X |
143
140
  | divide(value, value, ...) | divides numbers | X |
144
141
  | end() | returns the value of the last column | X |
145
- | every(value, number) | match every Nth time a value is seen | X |
146
- | first(value) | match the first occurrence and capture line | X |
147
- | in(value, list) | match in a pipe-delimited list | X |
142
+ | [every(value, number)](csvpath/matching/functions/every.md) | match every Nth time a value is seen | X |
143
+ | [first(value, value, ...)](csvpath/matching/functions/first.md) | match the first occurrence and capture line | X |
144
+ | [in(value, list)](csvpath/matching/functions/in.md) | match in a pipe-delimited list | X |
148
145
  | increment(value, n) | increments a variable by n each time seen | |
149
146
  | isinstance(value, typestr) | tests for "int","float","complex","bool","usd" | X |
150
147
  | length(value) | returns the length of the value | X |
@@ -153,22 +150,23 @@ Most of the work of matching is done in functions. The match functions are:
153
150
  | median(value, type) | median value up to current "line", "scan", "match" | X |
154
151
  | min(value, type) | smallest value seen up to current "line", "scan", "match" | X |
155
152
  | multiply(value, value, ...) | multiplies numbers | X |
156
- | no() | always false | X |
153
+ | [no()](csvpath/matching/functions/no.md) | always false | X |
157
154
  | not(value) | negates a value | X |
158
- | now(format) | a datetime, optionally formatted | X |
155
+ | [now(format)](csvpath/matching/functions/now.md) | a datetime, optionally formatted | X |
159
156
  | or(value, value,...) | match any one | X |
160
157
  | percent(type) | % of total lines for "scan", "match", "line" | X |
158
+ | [print(value, str)](csvpath/matching/functions/print.md) | when matches prints the interpolated string | X |
161
159
  | random(list) | pick from a list | |
162
160
  | random(starting, ending) | generates a random int from starting to ending | X |
163
161
  | regex(regex-string) | match on a regular expression | X |
164
162
  | subtract(value, value, ...) | subtracts numbers | X |
165
- | tally(value, value, ...) | counts times values are seen, including as a set | X |
163
+ | [tally(value, value, ...)](csvpath/matching/functions/tally.md) | counts times values are seen, including as a set | X |
166
164
  | then(y,m,d,hh,mm,ss,format) | a datetime, optionally formatted | |
167
165
  | upper(value) | makes value uppercase | X |
166
+ | yes() | always true | X |
168
167
 
169
168
  # Not Ready For Production
170
- Anything could change. This project is a hobby.
171
-
169
+ Anything could change and performance could be better. This project is a hobby.
172
170
 
173
171
 
174
172
 
@@ -5,6 +5,7 @@ from csvpath.matching.matcher import Matcher
5
5
  from csvpath.matching.expression_encoder import ExpressionEncoder
6
6
  from csvpath.matching.expression_math import ExpressionMath
7
7
  from csvpath.scanning.scanner import Scanner
8
+ import time
8
9
 
9
10
 
10
11
  class NoFileException(Exception):
@@ -13,7 +14,13 @@ class NoFileException(Exception):
13
14
 
14
15
  class CsvPath:
15
16
  def __init__(
16
- self, *, filename=None, delimiter=",", quotechar='"', block_print=True
17
+ self,
18
+ *,
19
+ filename=None,
20
+ delimiter=",",
21
+ quotechar='"',
22
+ block_print=True,
23
+ skip_blank_lines=True,
17
24
  ):
18
25
  self.filename = filename
19
26
  self.scanner = None
@@ -30,60 +37,35 @@ class CsvPath:
30
37
  self.quotechar = quotechar
31
38
  self.block_print = block_print
32
39
  self.total_lines = -1
33
- self._verbose = False
34
40
  self._dump_json = False
35
41
  self._do_math = False # off by default, still experimental
36
42
  self._collect_matchers = False
37
43
  self.matchers = []
38
44
  self.jsons = []
45
+ self.matcher = None
46
+ self.skip_blank_lines = skip_blank_lines
39
47
 
40
48
  def dump_json(self):
41
49
  self._dump_json = not self._dump_json
42
50
 
43
51
  def parse(self, data):
52
+ start = time.time()
44
53
  self.scanner = Scanner()
45
54
  s, mat, mod = self._find_scan_match_modify(data)
46
55
  self.scan = s
47
56
  self.match = mat
48
57
  self.modify = mod
49
58
  self.scanner.parse(s)
50
- self._load_headers()
51
- self.get_total_lines()
59
+ end = time.time()
60
+ print(f"parsed: {end - start}")
61
+ self.get_total_lines_and_headers()
52
62
  return self.scanner
53
63
 
54
- def verbose(self, set_verbose: bool = True) -> None:
55
- self._verbose = set_verbose
56
-
57
- # prints what the user needs to see
58
- def verbosity(self, msg: Any) -> None:
59
- if self._verbose:
60
- print(f"{msg}")
61
-
62
64
  # prints what the developer needs to see
63
65
  def print(self, msg: str) -> None:
64
66
  if not self.block_print:
65
67
  print(msg)
66
68
 
67
- def _load_headers(self) -> None:
68
- with open(self.scanner.filename, "r") as file:
69
- reader = csv.reader(
70
- file, delimiter=self.delimiter, quotechar=self.quotechar
71
- )
72
- for row in reader:
73
- self.headers = row
74
- break
75
- hs = self.headers[:]
76
- self.headers = []
77
- for header in hs:
78
- header = header.strip()
79
- header = header.replace(";", "")
80
- header = header.replace(",", "")
81
- header = header.replace("|", "")
82
- header = header.replace("\t", "")
83
- header = header.replace("`", "")
84
- self.headers.append(header)
85
- self.verbosity(f"header: {header}")
86
-
87
69
  def _find_scan_match_modify(self, data):
88
70
  scan = ""
89
71
  matches = ""
@@ -104,9 +86,6 @@ class CsvPath:
104
86
  matches = matches if len(matches) > 0 else None
105
87
  modify = modify.strip()
106
88
  modify = modify if len(modify) > 0 else None
107
- self.verbosity(f"scan: {scan}")
108
- self.verbosity(f"matches: {matches}")
109
- self.verbosity(f"modify: {modify}")
110
89
  return scan, matches, modify
111
90
 
112
91
  def __str__(self):
@@ -158,38 +137,81 @@ class CsvPath:
158
137
  def next(self):
159
138
  if self.scanner.filename is None:
160
139
  raise NoFileException("there is no filename")
161
- self.verbosity(f"filename: {self.scanner.filename}")
162
- total_lines = -1
163
- if self._verbose:
164
- total_lines = self.get_total_lines()
165
- self.verbosity(f"total lines: {total_lines}")
166
-
167
140
  with open(self.scanner.filename, "r") as file:
168
141
  reader = csv.reader(
169
142
  file, delimiter=self.delimiter, quotechar=self.quotechar
170
143
  )
144
+ start = time.time()
171
145
  for line in reader:
172
- self.verbosity(f"line number: {self.line_number} of {total_lines}")
173
- if self.includes(self.line_number):
146
+ if self.skip_blank_lines and len(line) == 0:
147
+ continue
148
+ if self.scanner.includes(self.line_number):
174
149
  self.scan_count = self.scan_count + 1
175
- self.print(f"CsvPath.next: line:{line}")
176
- self.verbosity(f"scan count: {self.scan_count}")
177
- if self.matches(line):
150
+ # from datetime import timedelta
151
+ # startmatch = time.perf_counter()
152
+ b = self.matches(line)
153
+ # endmatch = time.time()
154
+ # duration = timedelta(seconds=time.perf_counter()-startmatch)
155
+ if b:
178
156
  self.match_count = self.match_count + 1
179
- self.verbosity(f"match count: {self.match_count}")
180
157
  yield line
158
+ # if self.scan_count < 100:
159
+ # print(f"match {self.scan_count}: {duration}")
181
160
  self.line_number = self.line_number + 1
161
+ end = time.time()
162
+ print(f"iterated: {end - start}")
182
163
 
183
164
  def get_total_lines(self) -> int:
184
165
  if self.total_lines == -1:
166
+ return self.get_total_lines_and_headers()
167
+ return self.total_lines
168
+
169
+ def get_total_lines_and_headers(self) -> int:
170
+ if self.total_lines == -1:
171
+ start = time.time()
185
172
  with open(self.scanner.filename, "r") as file:
186
173
  reader = csv.reader(
187
174
  file, delimiter=self.delimiter, quotechar=self.quotechar
188
175
  )
176
+ i = 0
189
177
  for line in reader:
178
+ if i == 0:
179
+ self.headers = line
180
+ i += 1
190
181
  self.total_lines += 1
182
+ hs = self.headers[:]
183
+ self.headers = []
184
+ for header in hs:
185
+ header = header.strip()
186
+ header = header.replace(";", "")
187
+ header = header.replace(",", "")
188
+ header = header.replace("|", "")
189
+ header = header.replace("\t", "")
190
+ header = header.replace("`", "")
191
+ self.headers.append(header)
192
+ end = time.time()
193
+ print(f"lines and headers: {end - start}")
191
194
  return self.total_lines
192
195
 
196
+ def _load_headers(self) -> None:
197
+ with open(self.scanner.filename, "r") as file:
198
+ reader = csv.reader(
199
+ file, delimiter=self.delimiter, quotechar=self.quotechar
200
+ )
201
+ for row in reader:
202
+ self.headers = row
203
+ break
204
+ hs = self.headers[:]
205
+ self.headers = []
206
+ for header in hs:
207
+ header = header.strip()
208
+ header = header.replace(";", "")
209
+ header = header.replace(",", "")
210
+ header = header.replace("|", "")
211
+ header = header.replace("\t", "")
212
+ header = header.replace("`", "")
213
+ self.headers.append(header)
214
+
193
215
  def current_line_number(self) -> int:
194
216
  return self.line_number
195
217
 
@@ -208,11 +230,14 @@ class CsvPath:
208
230
  def matches(self, line) -> bool:
209
231
  if not self.match:
210
232
  return True
211
- self.print(f"CsvPath.matches: the match path: {self.match}")
212
-
213
- matcher = Matcher(
214
- csvpath=self, data=self.match, line=line, headers=self.headers
215
- )
233
+ if self.matcher is None:
234
+ self.matcher = Matcher(
235
+ csvpath=self, data=self.match, line=line, headers=self.headers
236
+ )
237
+ else:
238
+ self.matcher.reset()
239
+ self.matcher.line = line
240
+ matcher = self.matcher
216
241
 
217
242
  if self._do_math:
218
243
  em = ExpressionMath()
@@ -272,42 +297,6 @@ class CsvPath:
272
297
  thevalue = self.variables[name]
273
298
  return thevalue
274
299
 
275
- def includes(self, line: int) -> bool:
276
- from_line = self.scanner.from_line
277
- to_line = self.scanner.to_line
278
- all_lines = self.scanner.all_lines
279
- these = self.scanner.these
280
- return self._includes(
281
- line, from_line=from_line, to_line=to_line, all_lines=all_lines, these=these
282
- )
283
-
284
- def _includes(
285
- self,
286
- line: int,
287
- *,
288
- from_line: int = None,
289
- to_line: int = None,
290
- all_lines: bool = None,
291
- these: List[int] = [],
292
- ) -> bool:
293
- if line is None:
294
- return False
295
- if from_line is None and all_lines:
296
- return True
297
- if from_line is not None and all_lines:
298
- return line >= from_line
299
- if from_line == line:
300
- return True
301
- if from_line is not None and to_line is not None and from_line > to_line:
302
- return line >= to_line and line <= from_line
303
- if from_line is not None and to_line is not None:
304
- return line >= from_line and line <= to_line
305
- if line in these:
306
- return True
307
- if to_line is not None:
308
- return line < to_line
309
- return False
310
-
311
300
  def line_numbers(self) -> Iterator[int | str]:
312
301
  these = self.scanner.these
313
302
  from_line = self.scanner.from_line
@@ -0,0 +1,28 @@
1
+
2
+ # Count
3
+
4
+ Returns the number of matches. When used alone count() gives the total matches seen up to the current line in the file.
5
+
6
+ Matches can be scoped down to a contained existance test or equality. Counting an equality means a function, term, variable, or header compared to another function, term, variable, or header.
7
+
8
+ When the counted match is scoped to the contained existance or equality, the count is of values seen. When counting values seen the count function stores the value-integer pairs in a dict within CsvPath's variables under a key identifying the count function. The ID of the count function is a hash by default, making it difficult for a human to understand which count the key represents. To name the count use a qualifier on the count function. A qualifier is a name that follows the function name separated by a dot, as:
9
+
10
+ count.my_named_count(#0=True)
11
+
12
+ For example you can do do something like this:
13
+
14
+ $file.csv [*]
15
+ [
16
+ @t.onmatch=count.firstname_match(#firstname=="Ants")
17
+ #firstname=="Ants"
18
+ ]
19
+
20
+ This path counts the number of matches of firstname into the path's variables so that the variable name is like:
21
+
22
+ {'firstname_match':{True:1}}
23
+
24
+
25
+ ## Examples
26
+
27
+
28
+
@@ -20,6 +20,9 @@ class Count(Function):
20
20
  # contribute to if there's a match
21
21
  return self.value # or not. we have to act as if.
22
22
 
23
+ def matches(self, *, skip=[]) -> bool:
24
+ return self.value
25
+
23
26
  def _get_match_count(self) -> int:
24
27
  if not self.matcher or not self.matcher.csvpath:
25
28
  print("WARNING: no csvpath. are we testing?")
@@ -31,13 +34,20 @@ class Count(Function):
31
34
  # need to apply this count function to the contained obj's value
32
35
  #
33
36
  b = self._function_or_equality.matches(skip=skip)
34
- self._id = self.get_id(self._function_or_equality)
37
+ if not b:
38
+ return False
39
+ self._id = (
40
+ self.qualifier
41
+ if self.qualifier is not None
42
+ else self.get_id(self._function_or_equality)
43
+ )
35
44
  #
36
45
  # to_value() is often going to be a bool based on matches().
37
46
  # but in a case like: count(now('yyyy-mm-dd')) it would not be
38
47
  #
39
48
  tracked_value = self._function_or_equality.to_value(skip=skip)
40
49
  cnt = self.matcher.get_variable(self._id, tracking=tracked_value, set_if_none=0)
50
+ # print(f"count: cnt: {cnt}, b: {b}, tracked value: {tracked_value}")
41
51
  if b:
42
52
  cnt += 1
43
53
  self.matcher.set_variable(self._id, tracking=tracked_value, value=cnt)
@@ -0,0 +1,58 @@
1
+
2
+ # Every
3
+
4
+ Matches every N times a value is seen. Every takes two arguments: a value in the form of a function, header, or variable
5
+ and an int that indicates how many of the value must be seen for the counter to be increased.
6
+
7
+ Every creates two variables. One tracks the number of times a value is seen. The other tracks the number of times every() matched or didn't match.
8
+
9
+ ## Examples
10
+
11
+ $file.csv[*]
12
+ [
13
+ @t.onmatch=count()
14
+ every.who(#lastname, 2)
15
+ ]
16
+
17
+
18
+ This path matches every other time the value of the `lastname` is seen before. It results in a variable like:
19
+
20
+ {'who_every': {'lastname': 1, 'Kermit': 1, 'Bat': 7}, 'who': {False: 6, True: 3}, 't': 3}
21
+
22
+ This result indicates that the lastname column had:
23
+ - 1 'lastname'
24
+ - 1 'Kermit'
25
+ - 7 'Bat'
26
+
27
+ Those counts resulted in 3 matches and 6 times no match. 'lastname' and 'Kermit' didn't match because they only appear 1 time each. We would have to see 'Kermit' 2 times in order to get a match on 'Kermit'.
28
+
29
+
30
+ $file.csv[*]
31
+ [
32
+ @t.onmatch=count()
33
+ every.fish(#lastname=="Bat", 2)
34
+ ]
35
+
36
+ For a certain .csv file, this path matches 3 times and returns variables like:
37
+
38
+ {'fish_every': {False: 2, True: 7}, 'fish': {False: 5, True: 4}, 't': 4}
39
+
40
+ This means that `#lastname` was "Bat" seven times. There were 2 times `#lastname` was not "Bat". This result could be problematic because it doesn't indicate which rows it collects are the `False` rows and which were the `True` ones. If we care only about the `True` matches, we could filter out the `False` rows by selecting for `#lastname == "Bat" only.
41
+
42
+ $file.csv[*]
43
+ [
44
+ @t.onmatch=count()
45
+ every.fish(#lastname=="Bat", 2)
46
+ #lastname=="Bat"
47
+ ]
48
+
49
+ This results in `t==3` and the list of matched rows including only the 3 matched rows. The variables look like:
50
+
51
+ {'fish_every': {False: 2, True: 7}, 'fish': {False: 5, True: 4}, 't': 3}
52
+
53
+
54
+
55
+
56
+
57
+
58
+
@@ -0,0 +1,47 @@
1
+ from typing import Any
2
+ from csvpath.matching.functions.function import Function, ChildrenException
3
+ from csvpath.matching.productions.equality import Equality
4
+
5
+
6
+ class Every(Function):
7
+ def to_value(self, *, skip=[]) -> Any:
8
+ return self.matches(skip=skip)
9
+
10
+ def matches(self, *, skip=[]) -> bool:
11
+ if self.value is None:
12
+ if len(self.children) != 1:
13
+ raise ChildrenException("no children. there must be 1 equality child")
14
+ child = self.children[0]
15
+ if not isinstance(child, Equality):
16
+ raise ChildrenException("must be 1 equality child")
17
+
18
+ ###
19
+ # 1. we store a count of values under the ID of left. this is the value.to_value
20
+ # 2. we store the every-N-seen count under the qualifier or ID of every
21
+ # 3. we match based on count % n == 0
22
+ #
23
+ self._id = (
24
+ self.qualifier if self.qualifier is not None else self.get_id(self)
25
+ )
26
+ allcount = f"{self.get_id(self)}_{'every'}"
27
+ tracked_value = self.children[0].left.to_value(skip=skip)
28
+ print(f"Every.matches: tracked_value: {tracked_value}")
29
+ cnt = self.matcher.get_variable(
30
+ allcount, tracking=tracked_value, set_if_none=0
31
+ )
32
+ cnt += 1
33
+ self.matcher.set_variable(allcount, tracking=tracked_value, value=cnt)
34
+ every = self.children[0].right.to_value()
35
+ print(
36
+ f"Every.matches: {self._id}: every: {every}, cnt: {cnt} % {every} = {cnt % every}"
37
+ )
38
+ if cnt % every == 0:
39
+ self.value = True
40
+ else:
41
+ self.value = False
42
+ everycount = self.matcher.get_variable(
43
+ self._id, tracking=self.value, set_if_none=0
44
+ )
45
+ everycount += 1
46
+ self.matcher.set_variable(self._id, tracking=self.value, value=everycount)
47
+ return self.value
@@ -0,0 +1,23 @@
1
+
2
+ # First
3
+
4
+ Matches the first time a value is seen. A variable tracks the first line numbers for each value. First tracks None and other values that could be hard to interpret. Internally, the magic number First.NEVER = -9999999999 indicates an unset value.
5
+
6
+ ## Examples
7
+
8
+ $file.csv[*][first.folks(#firstname)]
9
+
10
+ This path matches when the value of the `firstname` has not been seen before. It results in a variable like:
11
+
12
+ {'folks': {'David': 1}}
13
+
14
+ Multiple values can be used as arguments to first().
15
+
16
+ $file.csv[*][first.dude(#firstname, #lastname)]
17
+
18
+ This path matches the first instance of the firstname and lastname column values together. The comparison simply concatenates the values. The result is a variable like:
19
+
20
+ {'dude': {'DavidKermit': 5}}
21
+
22
+
23
+