csvpath 0.0.22__tar.gz → 0.0.41__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {csvpath-0.0.22 → csvpath-0.0.41}/PKG-INFO +175 -34
- {csvpath-0.0.22 → csvpath-0.0.41}/README.md +174 -33
- csvpath-0.0.41/csvpath/__init__.py +8 -0
- {csvpath-0.0.22 → csvpath-0.0.41}/csvpath/csvpath.py +87 -55
- csvpath-0.0.41/csvpath/csvpaths.py +88 -0
- csvpath-0.0.41/csvpath/matching/expression_utility.py +112 -0
- {csvpath-0.0.22 → csvpath-0.0.41}/csvpath/matching/functions/above.py +1 -1
- {csvpath-0.0.22 → csvpath-0.0.41}/csvpath/matching/functions/add.py +5 -3
- csvpath-0.0.41/csvpath/matching/functions/any.md +33 -0
- csvpath-0.0.41/csvpath/matching/functions/any.py +104 -0
- csvpath-0.0.41/csvpath/matching/functions/average.md +21 -0
- {csvpath-0.0.22 → csvpath-0.0.41}/csvpath/matching/functions/below.py +1 -1
- csvpath-0.0.41/csvpath/matching/functions/column.py +23 -0
- {csvpath-0.0.22 → csvpath-0.0.41}/csvpath/matching/functions/concat.py +8 -8
- {csvpath-0.0.22 → csvpath-0.0.41}/csvpath/matching/functions/count.md +2 -2
- {csvpath-0.0.22 → csvpath-0.0.41}/csvpath/matching/functions/count.py +3 -3
- csvpath-0.0.41/csvpath/matching/functions/count_lines.py +10 -0
- {csvpath-0.0.22 → csvpath-0.0.41}/csvpath/matching/functions/count_scans.py +1 -1
- {csvpath-0.0.22 → csvpath-0.0.41}/csvpath/matching/functions/divide.py +4 -4
- {csvpath-0.0.22 → csvpath-0.0.41}/csvpath/matching/functions/end.py +1 -1
- csvpath-0.0.41/csvpath/matching/functions/equals.py +39 -0
- {csvpath-0.0.22 → csvpath-0.0.41}/csvpath/matching/functions/every.py +3 -2
- csvpath-0.0.41/csvpath/matching/functions/exists.py +27 -0
- {csvpath-0.0.22 → csvpath-0.0.41}/csvpath/matching/functions/first.py +2 -2
- {csvpath-0.0.22 → csvpath-0.0.41}/csvpath/matching/functions/function.py +1 -0
- {csvpath-0.0.22 → csvpath-0.0.41}/csvpath/matching/functions/function_factory.py +63 -34
- csvpath-0.0.41/csvpath/matching/functions/header.md +13 -0
- csvpath-0.0.41/csvpath/matching/functions/header.py +10 -0
- {csvpath-0.0.22 → csvpath-0.0.41}/csvpath/matching/functions/increment.md +6 -6
- {csvpath-0.0.22 → csvpath-0.0.41}/csvpath/matching/functions/increment.py +7 -11
- {csvpath-0.0.22 → csvpath-0.0.41}/csvpath/matching/functions/inf.py +1 -6
- csvpath-0.0.41/csvpath/matching/functions/last.md +12 -0
- csvpath-0.0.41/csvpath/matching/functions/last.py +14 -0
- {csvpath-0.0.22 → csvpath-0.0.41}/csvpath/matching/functions/length.py +1 -6
- {csvpath-0.0.22 → csvpath-0.0.41}/csvpath/matching/functions/lower.py +1 -1
- {csvpath-0.0.22 → csvpath-0.0.41}/csvpath/matching/functions/minf.py +19 -15
- csvpath-0.0.41/csvpath/matching/functions/mod.py +27 -0
- {csvpath-0.0.22 → csvpath-0.0.41}/csvpath/matching/functions/multiply.py +3 -3
- {csvpath-0.0.22 → csvpath-0.0.41}/csvpath/matching/functions/no.py +1 -1
- csvpath-0.0.41/csvpath/matching/functions/nonef.py +10 -0
- {csvpath-0.0.22 → csvpath-0.0.41}/csvpath/matching/functions/notf.py +1 -3
- {csvpath-0.0.22 → csvpath-0.0.41}/csvpath/matching/functions/now.py +1 -7
- {csvpath-0.0.22 → csvpath-0.0.41}/csvpath/matching/functions/orf.py +2 -2
- {csvpath-0.0.22 → csvpath-0.0.41}/csvpath/matching/functions/percent.py +1 -1
- {csvpath-0.0.22 → csvpath-0.0.41}/csvpath/matching/functions/print.md +3 -1
- {csvpath-0.0.22 → csvpath-0.0.41}/csvpath/matching/functions/printf.py +29 -22
- {csvpath-0.0.22 → csvpath-0.0.41}/csvpath/matching/functions/random.py +1 -1
- csvpath-0.0.41/csvpath/matching/functions/regex.py +35 -0
- csvpath-0.0.41/csvpath/matching/functions/stop.md +24 -0
- csvpath-0.0.41/csvpath/matching/functions/stop.py +23 -0
- csvpath-0.0.41/csvpath/matching/functions/substring.py +32 -0
- {csvpath-0.0.22 → csvpath-0.0.41}/csvpath/matching/functions/subtract.py +3 -4
- {csvpath-0.0.22 → csvpath-0.0.41}/csvpath/matching/functions/tally.py +2 -2
- {csvpath-0.0.22 → csvpath-0.0.41}/csvpath/matching/functions/upper.py +1 -1
- csvpath-0.0.41/csvpath/matching/functions/variable.md +13 -0
- csvpath-0.0.41/csvpath/matching/functions/variable.py +10 -0
- csvpath-0.0.41/csvpath/matching/functions/when._ +16 -0
- csvpath-0.0.41/csvpath/matching/functions/when.__ +29 -0
- {csvpath-0.0.22 → csvpath-0.0.41}/csvpath/matching/functions/yes.py +1 -1
- {csvpath-0.0.22 → csvpath-0.0.41}/csvpath/matching/matcher.py +42 -42
- csvpath-0.0.41/csvpath/matching/matching_lexer.py +122 -0
- csvpath-0.0.41/csvpath/matching/parser.out +2275 -0
- csvpath-0.0.41/csvpath/matching/parsetab.py +2490 -0
- csvpath-0.0.41/csvpath/matching/productions/__init__.py +8 -0
- {csvpath-0.0.22 → csvpath-0.0.41}/csvpath/matching/productions/equality.py +27 -47
- {csvpath-0.0.22 → csvpath-0.0.41}/csvpath/matching/productions/expression.py +1 -0
- {csvpath-0.0.22 → csvpath-0.0.41}/csvpath/matching/productions/header.py +14 -6
- {csvpath-0.0.22 → csvpath-0.0.41}/csvpath/matching/productions/matchable.py +59 -4
- {csvpath-0.0.22 → csvpath-0.0.41}/csvpath/matching/productions/term.py +4 -0
- {csvpath-0.0.22 → csvpath-0.0.41}/csvpath/matching/productions/variable.py +22 -3
- {csvpath-0.0.22 → csvpath-0.0.41}/csvpath/parser_utility.py +5 -3
- {csvpath-0.0.22 → csvpath-0.0.41}/pyproject.toml +1 -1
- csvpath-0.0.22/csvpath/matching/expression_math.py +0 -123
- csvpath-0.0.22/csvpath/matching/expression_utility.py +0 -29
- csvpath-0.0.22/csvpath/matching/functions/count_lines.py +0 -12
- csvpath-0.0.22/csvpath/matching/functions/is_instance.py +0 -95
- csvpath-0.0.22/csvpath/matching/functions/regex.py +0 -38
- csvpath-0.0.22/csvpath/matching/matching_lexer.py +0 -66
- csvpath-0.0.22/csvpath/matching/parser.out +0 -1287
- csvpath-0.0.22/csvpath/matching/parsetab.py +0 -1427
- {csvpath-0.0.22/csvpath → csvpath-0.0.41/csvpath/matching}/__init__.py +0 -0
- {csvpath-0.0.22 → csvpath-0.0.41}/csvpath/matching/expression_encoder.py +0 -0
- {csvpath-0.0.22/csvpath/matching → csvpath-0.0.41/csvpath/matching/functions}/__init__.py +0 -0
- {csvpath-0.0.22 → csvpath-0.0.41}/csvpath/matching/functions/every.md +0 -0
- {csvpath-0.0.22 → csvpath-0.0.41}/csvpath/matching/functions/first.md +0 -0
- {csvpath-0.0.22 → csvpath-0.0.41}/csvpath/matching/functions/in.md +0 -0
- {csvpath-0.0.22 → csvpath-0.0.41}/csvpath/matching/functions/no.md +0 -0
- {csvpath-0.0.22 → csvpath-0.0.41}/csvpath/matching/functions/now.md +0 -0
- {csvpath-0.0.22 → csvpath-0.0.41}/csvpath/matching/functions/tally.md +0 -0
- {csvpath-0.0.22 → csvpath-0.0.41}/csvpath/scanning/__init__.py +0 -0
- {csvpath-0.0.22 → csvpath-0.0.41}/csvpath/scanning/parser.out +0 -0
- {csvpath-0.0.22 → csvpath-0.0.41}/csvpath/scanning/parsetab.py +0 -0
- {csvpath-0.0.22 → csvpath-0.0.41}/csvpath/scanning/scanner.py +0 -0
- {csvpath-0.0.22 → csvpath-0.0.41}/csvpath/scanning/scanning_lexer.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: csvpath
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.41
|
|
4
4
|
Summary:
|
|
5
5
|
Author: David Kershaw
|
|
6
6
|
Author-email: dk107dk@hotmail.com
|
|
@@ -19,26 +19,120 @@ Description-Content-Type: text/markdown
|
|
|
19
19
|
|
|
20
20
|
CsvPath defines a declarative syntax for inspecting and updating CSV files. Though much simpler, it is similar to:
|
|
21
21
|
- XPath: CsvPath is to a CSV file like XPath is to an XML file
|
|
22
|
-
- Schematron: Schematron is basically XPath rules applied using XSLT. CsvPath paths can be used as validation rules.
|
|
23
|
-
- CSS selectors: CsvPath picks out structured data in a similar way to how CSS selectors pick out HTML structures.
|
|
22
|
+
- Schematron: Schematron validation is basically XPath rules applied using XSLT. CsvPath paths can be used as validation rules.
|
|
23
|
+
- CSS selectors: CsvPath picks out structured data in a conceptually similar way to how CSS selectors pick out HTML structures.
|
|
24
24
|
|
|
25
25
|
CsvPath is intended to fit with other DataOps and data quality tools. Files are streamed. The interface is simple. Custom functions can be added.
|
|
26
26
|
|
|
27
27
|
# Usage
|
|
28
|
-
CsvPath paths have two parts, scanning and matching. For usage, see the unit tests in [tests/test_scanner.py](tests/test_scanner.py), [tests/test_matcher.py](tests/test_matcher.py) and [tests/test_functions.py](tests/test_functions.py).
|
|
29
28
|
|
|
30
|
-
|
|
31
|
-
|
|
29
|
+
CsvPath paths have three parts:
|
|
30
|
+
- a "root" file name
|
|
31
|
+
- a scanning part
|
|
32
|
+
- a matching part
|
|
33
|
+
|
|
34
|
+
The root starts with `$`. The match and scan parts are enclosed by brackets.
|
|
35
|
+
|
|
36
|
+
A very simple csvpath might look like this:
|
|
37
|
+
|
|
38
|
+
$filename[*][yes()]
|
|
39
|
+
|
|
40
|
+
This path says open the file named `filename`, scan all the lines, and match every line scanned.
|
|
41
|
+
|
|
42
|
+
The filename following the `$` can be an actual relative or absolute file path. It could alternatively be a logical identifier that points indirectly to a physical file, as described below.
|
|
43
|
+
|
|
44
|
+
## Running CsvPath
|
|
45
|
+
|
|
46
|
+
There are two classes that do all the work: CsvPath and CsvPaths. Each has very few external methods.
|
|
47
|
+
- CsvPath
|
|
48
|
+
- parse() applies a csvpath to a file
|
|
49
|
+
- next() iterates over the matched rows
|
|
50
|
+
- fast_forward() processes all rows
|
|
51
|
+
- collect() processes all rows and collects the lines that matched as lists
|
|
52
|
+
- CsvPaths
|
|
53
|
+
- csvpath() gets a CsvPath that knows all the file names available
|
|
54
|
+
- set_named_files() sets the file names as a Dict[str,str] of named paths
|
|
55
|
+
- set_file_path() sets the file names from a JSON file of named paths or a single .csv file or a directory of .csv files
|
|
56
|
+
|
|
57
|
+
This is a very basic use of CsvPath. For more usage, see the unit tests.
|
|
58
|
+
|
|
59
|
+
path = CsvPath()
|
|
60
|
+
path.parse("""$test.csv
|
|
61
|
+
[5-25]
|
|
62
|
+
[
|
|
63
|
+
#0=="Frog"
|
|
64
|
+
@lastname.onmatch="Bats"
|
|
65
|
+
count()==2
|
|
66
|
+
]
|
|
67
|
+
""")
|
|
32
68
|
for i, line in enumerate( path.next() ):
|
|
33
69
|
print(f"{i}: {line}")
|
|
34
|
-
|
|
35
70
|
print(f"path vars: {path.variables}")
|
|
36
71
|
|
|
37
|
-
|
|
72
|
+
The csvpath says:
|
|
38
73
|
- Open test.csv
|
|
39
74
|
- Scan lines 5 through 25
|
|
40
75
|
- Match the second time we see a line where the first column equals "Frog" and set the variable called "lastname" to "Bats"
|
|
41
76
|
|
|
77
|
+
Another path that does the same thing a bit more simply might look like:
|
|
78
|
+
|
|
79
|
+
"""$test.csv
|
|
80
|
+
[5-25]
|
|
81
|
+
[
|
|
82
|
+
#0=="Frog"
|
|
83
|
+
@lastname.onmatch="Bats"
|
|
84
|
+
count()==2 -> print( "$.match_count: $.line")
|
|
85
|
+
]
|
|
86
|
+
"""
|
|
87
|
+
|
|
88
|
+
In this case we're using the "when" operator, `->`, to determine when to print.
|
|
89
|
+
|
|
90
|
+
## The print function
|
|
91
|
+
|
|
92
|
+
The `print` function has several uses, including:
|
|
93
|
+
- Debugging csvpaths
|
|
94
|
+
- Validating CSV files
|
|
95
|
+
- Creating new CSV files based on an existing file
|
|
96
|
+
|
|
97
|
+
### Validating CSV
|
|
98
|
+
|
|
99
|
+
CsvPath paths can be used for rules based validation. Rules based validation checks a file against content and structure rules but does not validate the file's structure against a schema. This validation approach is similar to XML's Schematron validation, where XPath rules are applied to XML.
|
|
100
|
+
|
|
101
|
+
There is no "standard" way to do CsvPath validation. The simplest way is to create csvpaths that print a validation message when a rule fails. For example:
|
|
102
|
+
|
|
103
|
+
$test.csv[*][@failed = equals(#firstname, "Frog")
|
|
104
|
+
@failed.asbool -> print("Error: Check line $.line_count for a row with the name Frog")]
|
|
105
|
+
|
|
106
|
+
Several rules can exist in the same csvpath for convenience and/or performance. Alternatively, you can run separate csvpaths for each rule.
|
|
107
|
+
|
|
108
|
+
### Creating new CSV files
|
|
109
|
+
|
|
110
|
+
Csvpaths can use the `print` function to generate new file content on system out. Redirecting the output to a file is an easy way to create a new CSV file based on an existing file. For e.g.
|
|
111
|
+
|
|
112
|
+
$test.csv[*][ line_count()==0 -> print("lastname, firstname, say")
|
|
113
|
+
above(line_count(), 0) -> print("$.headers.lastname, $.headers.firstname, $.headers.say")]
|
|
114
|
+
|
|
115
|
+
This csvpath reorders the headers of the test file at `tests/test_resources/test.csv`. The output file will have a header row.
|
|
116
|
+
|
|
117
|
+
## Named files
|
|
118
|
+
|
|
119
|
+
You can use the `CsvPaths` class to set up a list of named file paths so that you can have more concise csvpaths. Named paths can take the form of:
|
|
120
|
+
- A JSON file with a dictionary of file paths under name keys
|
|
121
|
+
- A dict object passed into the CsvPaths object containing the same named path structure
|
|
122
|
+
- The path to a csv file that will be put into the named paths dict under its name minus extension
|
|
123
|
+
- A file system path pointing to a directory that will be used to populate the named paths dict with all contined files
|
|
124
|
+
|
|
125
|
+
You can then use a csvpath like `$logical_name[*][yes()]` to apply the csvpath to the file named `logical_name` in the CsvPaths object's named paths dict. This use is nearly transparent:
|
|
126
|
+
|
|
127
|
+
paths = CsvPaths(filename = "my_named_paths.json")
|
|
128
|
+
path = paths.csvpath()
|
|
129
|
+
path.parse( """$test[*][#firstname=="Fred"]""" )
|
|
130
|
+
path.collect()
|
|
131
|
+
|
|
132
|
+
If my_named_paths.json contains the following structure, the name `test` will be used to find `tests/test_resources/test.csv`. The parse method will apply the csvpath and the collect method will gather all the matched rows.
|
|
133
|
+
|
|
134
|
+
{ "test":"test/test_resources/test.csv" }
|
|
135
|
+
|
|
42
136
|
# Scanning
|
|
43
137
|
The scanner enumerates lines. For each line returned, the line number, the scanned line count, and the match count are available. The set of line numbers scanned is also available.
|
|
44
138
|
|
|
@@ -51,8 +145,7 @@ The scan part of the path starts with a dollar sign to indicate the root, meanin
|
|
|
51
145
|
- `[1+3-8]` means line 1 and lines 3 through eight
|
|
52
146
|
|
|
53
147
|
# Matching
|
|
54
|
-
The match part is also bracketed. Matches have space separated
|
|
55
|
-
components or "values" that are ANDed together. A match component is one of several types:
|
|
148
|
+
The match part is also bracketed. Matches have space separated components or "values" that are ANDed together. The components' order is important. A match component is one of several types:
|
|
56
149
|
<table>
|
|
57
150
|
<tr>
|
|
58
151
|
<td>Type</td>
|
|
@@ -90,26 +183,36 @@ Qualifiers are described below. </td>
|
|
|
90
183
|
<tr>
|
|
91
184
|
<td>Variable </td>
|
|
92
185
|
<td>Value</td>
|
|
93
|
-
<td>True
|
|
94
|
-
<td>
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
only be set when the row matches all parts of the path.
|
|
186
|
+
<td>True when set unless `onchange` determines True/False.</td>
|
|
187
|
+
<td>
|
|
188
|
+
<p>
|
|
189
|
+
An @ followed by a name. A variable is set or tested depending on the usage. When used as the left hand side of an "=" its value is set. When it is used on either side of an "==" it is an equality test.
|
|
190
|
+
</p>
|
|
191
|
+
<p>
|
|
192
|
+
Variables can take an `onmatch` qualifier to indicate that the variable should only be set when the row matches all parts of the path.
|
|
193
|
+
<p/>
|
|
194
|
+
<p>
|
|
195
|
+
A variable can also take an `onchange` qualifier to make its assignment only match when its value changes. In the usual case, a variable assignment always matches, making it not a factor in the row's matching or not matching. With `onchange` the assignment can determine if the row fails to match the csvpath.
|
|
196
|
+
</p>
|
|
197
|
+
<p>
|
|
198
|
+
Note that at present a variable assignment of an equality test is not possible using `==`. In the future the csvpath grammar may be improved to address this gap. In the interim, use the `equals(value,value)` function. I.e.instead of
|
|
199
|
+
@test = @cat == @hat
|
|
200
|
+
use
|
|
201
|
+
@test = equals(@cat, @hat)
|
|
202
|
+
</p>
|
|
100
203
|
<td>
|
|
101
204
|
<li/> `@weather="cloudy"`
|
|
102
205
|
<li/> `count(@weather=="sunny")`
|
|
103
|
-
<li/> `@weather`
|
|
104
206
|
<li/> `#summer==@weather`
|
|
207
|
+
<li/> `@happy.onchange=#weather`
|
|
105
208
|
|
|
106
|
-
#1 is an assignment that sets the variable and returns True. #2 is an argument used as a test in a way that is specific to the function. #3 is
|
|
209
|
+
#1 is an assignment that sets the variable and returns True. #2 is an argument used as a test in a way that is specific to the function. #3 is a test. #4 sets the `happy` variable to the value of the `weather` header and fails the row matching until `happy`'s value changes.
|
|
107
210
|
</td>
|
|
108
211
|
</tr>
|
|
109
212
|
<tr>
|
|
110
213
|
<td>Header </td>
|
|
111
214
|
<td>Value </td>
|
|
112
|
-
<td>
|
|
215
|
+
<td>Calculated</td>
|
|
113
216
|
<td>A # followed by a name or integer. The name references a value in line 0, the header
|
|
114
217
|
row. A number references a column by the 0-based column order. </td>
|
|
115
218
|
<td>
|
|
@@ -129,9 +232,13 @@ only be set when the row matches all parts of the path.
|
|
|
129
232
|
</tr>
|
|
130
233
|
<table>
|
|
131
234
|
|
|
132
|
-
|
|
235
|
+
## Qualifiers
|
|
236
|
+
|
|
237
|
+
Variables and some functions can take qualifiers on their name. A qualifier takes the form of a dot plus a qualification name. At the moment there are only four qualifiers:
|
|
133
238
|
|
|
134
239
|
- `onmatch` to indicate that action on the variable or function only happens when the whole path matches a row
|
|
240
|
+
- `onchange` set on a variable to indicate that a row should only match when the variable is set to a new value
|
|
241
|
+
- `asbool` set on a variable or header to have its value interpreted as a bool rather than just a simple `is not None` test
|
|
135
242
|
- An arbitrary string to add a name for the function's internal use, typically to name a variable
|
|
136
243
|
|
|
137
244
|
Qualifiers look like:
|
|
@@ -144,36 +251,54 @@ Or:
|
|
|
144
251
|
|
|
145
252
|
When multiple qualifiers are used order is not important.
|
|
146
253
|
|
|
147
|
-
##
|
|
148
|
-
[ #common_name #0=="field" @tail.onmatch=end() not(in(@tail, 'short</td><td>medium')) ]
|
|
254
|
+
## Variables
|
|
149
255
|
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
256
|
+
A variable can be assigned early in the match part of a path and used later in that same path. The assignment and use will both be in the context of the same row in the file. For e.g.
|
|
257
|
+
|
|
258
|
+
[@a=#b #c==@a]
|
|
259
|
+
|
|
260
|
+
Can also be written as:
|
|
261
|
+
|
|
262
|
+
[#c==#b]
|
|
156
263
|
|
|
157
|
-
Variables are always set unless they are flagged with `.onmatch
|
|
264
|
+
Variables are always set unless they are flagged with the `.onmatch` qualifier. That means:
|
|
158
265
|
|
|
159
266
|
$file.csv[*][ @imcounting.onmatch = count_lines() no()]
|
|
160
267
|
|
|
161
|
-
will never set `imcounting`, but:
|
|
268
|
+
will never set `imcounting`, because of the `no()` function disallowing any matches, but:
|
|
162
269
|
|
|
163
270
|
$file.csv[*][ @imcounting = count_lines() no()]
|
|
164
271
|
|
|
165
272
|
will always set it.
|
|
166
273
|
|
|
274
|
+
As noted above, a variable can be flagged with the `onchange` qualifier. The effect is that a row will only match if the variable qualified by `onchange` changes in value.
|
|
275
|
+
|
|
276
|
+
## The when operator
|
|
277
|
+
|
|
278
|
+
`->`, the "when" operator, is used to act on a condition. `->` can take an equality or function on the left and trigger an equality, assignment, or function on the right. For e.g.
|
|
279
|
+
|
|
280
|
+
[ last() -> print("this is the last line") ]
|
|
281
|
+
|
|
282
|
+
Prints `this is the last line` just before the scan ends.
|
|
283
|
+
|
|
284
|
+
[ exists(#0) -> @firstname = #0 ]
|
|
285
|
+
|
|
286
|
+
Says to set the `firstname` variable to the value of the first column when the first column has a value.
|
|
287
|
+
|
|
288
|
+
## Match functions
|
|
289
|
+
|
|
167
290
|
Most of the work of matching is done in functions. The match functions are the following.
|
|
168
291
|
|
|
169
292
|
|
|
170
293
|
<table>
|
|
171
294
|
<tr><th> Group </th><th>Function </th><th> What it does </th></tr>
|
|
172
295
|
<tr><td> Boolean </td><td> </td><td> </td></tr>
|
|
296
|
+
<tr><td> </td><td> <a href='csvpath/matching/functions/any.md'>any(value, value)</a> </td><td> existence test across a range of places </td></tr>
|
|
173
297
|
<tr><td> </td><td> <a href='csvpath/matching/functions/no.md'>no()</a> </td><td> always false </td></tr>
|
|
174
298
|
<tr><td> </td><td> not(value) </td><td> negates a value </td></tr>
|
|
175
299
|
<tr><td> </td><td> or(value, value,...) </td><td> match any one </td></tr>
|
|
176
300
|
<tr><td> </td><td> yes() </td><td> always true </td></tr>
|
|
301
|
+
<tr><td> </td><td> exists(value) </td><td> tests if the value exists </td></tr>
|
|
177
302
|
<tr><td> </td><td> <a href='csvpath/matching/functions/in.md'>in(value, list)</a> </td><td> match in a pipe-delimited list </td></tr>
|
|
178
303
|
<tr><td> Math </td><td> </td><td> </td></tr>
|
|
179
304
|
<tr><td> </td><td> add(value, value, ...) </td><td> adds numbers </td></tr>
|
|
@@ -183,7 +308,7 @@ Most of the work of matching is done in functions. The match functions are the f
|
|
|
183
308
|
<tr><td> </td><td> after(value) </td><td> finds things after a date, number, string </td></tr>
|
|
184
309
|
<tr><td> </td><td> before(value) </td><td> finds things before a date, number, string </td></tr>
|
|
185
310
|
<tr><td> Stats </td><td> </td><td> </td></tr>
|
|
186
|
-
<tr><td> </td><td> average(number, type)
|
|
311
|
+
<tr><td> </td><td> <a href='csvpath/matching/functions/average.md'>average(number, type)</a> </td><td> returns the average up to current "line", "scan", "match" </td></tr>
|
|
187
312
|
<tr><td> </td><td> median(value, type) </td><td> median value up to current "line", "scan", "match" </td></tr>
|
|
188
313
|
<tr><td> </td><td> max(value, type) </td><td> largest value seen up to current "line", "scan", "match" </td></tr>
|
|
189
314
|
<tr><td> </td><td> min(value, type) </td><td> smallest value seen up to current "line", "scan", "match" </td></tr>
|
|
@@ -202,16 +327,32 @@ Most of the work of matching is done in functions. The match functions are the f
|
|
|
202
327
|
<tr><td> </td><td> length(value) </td><td> returns the length of the value </td></tr>
|
|
203
328
|
<tr><td> </td><td> lower(value) </td><td> makes value lowercase </td></tr>
|
|
204
329
|
<tr><td> </td><td> regex(regex-string, value) </td><td> match on a regular expression </td></tr>
|
|
330
|
+
<tr><td> </td><td> substring(value, int) </td><td> returns the first n chars from the value </td></tr>
|
|
205
331
|
<tr><td> </td><td> upper(value) </td><td> makes value uppercase </td></tr>
|
|
206
|
-
<tr><td>
|
|
332
|
+
<tr><td> Columns </td><td> </td><td> </td></tr>
|
|
207
333
|
<tr><td> </td><td> end() </td><td> returns the value of the last column </td></tr>
|
|
208
|
-
<tr><td> </td><td>
|
|
334
|
+
<tr><td> </td><td> column(value) </td><td> returns column name for an index or index for a name </td></tr>
|
|
335
|
+
<tr><td> Other </td><td> </td><td> </td></tr>
|
|
336
|
+
<tr><td> </td><td> header() </td><td> indicates to another function to look in headers </td></tr>
|
|
209
337
|
<tr><td> </td><td> <a href='csvpath/matching/functions/now.md'>now(format)</a></td><td> a datetime, optionally formatted </td></tr>
|
|
210
338
|
<tr><td> </td><td> <a href='csvpath/matching/functions/print.md'>print(value, str)</a></td><td> when matches prints the interpolated string </td></tr>
|
|
211
339
|
<tr><td> </td><td> random(starting, ending) </td><td> generates a random int from starting to ending </td>
|
|
340
|
+
<tr><td> </td><td> <a href='csvpath/matching/functions/stop.md'>stop(value)</a> </td><td> stops path scanning if a condition is met </td>
|
|
341
|
+
<tr><td> </td><td> <a href='csvpath/matching/functions/when.md'>when(value, value)</a> </td><td> activate a value when a condition matches </td>
|
|
342
|
+
<tr><td> </td><td> variable() </td><td> indicates to another function to look in variables </td></tr>
|
|
212
343
|
</tr>
|
|
213
344
|
</table>
|
|
214
345
|
|
|
346
|
+
## Another Example
|
|
347
|
+
[ exists(#common_name) #0=="field" @tail.onmatch=end() not(in(@tail, 'short|medium')) ]
|
|
348
|
+
|
|
349
|
+
In the path above, the rules applied are:
|
|
350
|
+
- The exists test of `#common_name` checks if the header named "common_name" has a value. Headers are the values in the 0th line.
|
|
351
|
+
- `#2` means the 3rd column, counting from 0
|
|
352
|
+
- Functions and column references are ANDed together
|
|
353
|
+
- `@tail` creates a variable named "tail" and sets it to the value of the last column if all else matches
|
|
354
|
+
- Functions can contain functions, equality tests, and/or literals
|
|
355
|
+
|
|
215
356
|
# Not Ready For Production
|
|
216
357
|
Anything could change and performance could be better. This project is a hobby.
|
|
217
358
|
|
|
@@ -3,26 +3,120 @@
|
|
|
3
3
|
|
|
4
4
|
CsvPath defines a declarative syntax for inspecting and updating CSV files. Though much simpler, it is similar to:
|
|
5
5
|
- XPath: CsvPath is to a CSV file like XPath is to an XML file
|
|
6
|
-
- Schematron: Schematron is basically XPath rules applied using XSLT. CsvPath paths can be used as validation rules.
|
|
7
|
-
- CSS selectors: CsvPath picks out structured data in a similar way to how CSS selectors pick out HTML structures.
|
|
6
|
+
- Schematron: Schematron validation is basically XPath rules applied using XSLT. CsvPath paths can be used as validation rules.
|
|
7
|
+
- CSS selectors: CsvPath picks out structured data in a conceptually similar way to how CSS selectors pick out HTML structures.
|
|
8
8
|
|
|
9
9
|
CsvPath is intended to fit with other DataOps and data quality tools. Files are streamed. The interface is simple. Custom functions can be added.
|
|
10
10
|
|
|
11
11
|
# Usage
|
|
12
|
-
CsvPath paths have two parts, scanning and matching. For usage, see the unit tests in [tests/test_scanner.py](tests/test_scanner.py), [tests/test_matcher.py](tests/test_matcher.py) and [tests/test_functions.py](tests/test_functions.py).
|
|
13
12
|
|
|
14
|
-
|
|
15
|
-
|
|
13
|
+
CsvPath paths have three parts:
|
|
14
|
+
- a "root" file name
|
|
15
|
+
- a scanning part
|
|
16
|
+
- a matching part
|
|
17
|
+
|
|
18
|
+
The root starts with `$`. The match and scan parts are enclosed by brackets.
|
|
19
|
+
|
|
20
|
+
A very simple csvpath might look like this:
|
|
21
|
+
|
|
22
|
+
$filename[*][yes()]
|
|
23
|
+
|
|
24
|
+
This path says open the file named `filename`, scan all the lines, and match every line scanned.
|
|
25
|
+
|
|
26
|
+
The filename following the `$` can be an actual relative or absolute file path. It could alternatively be a logical identifier that points indirectly to a physical file, as described below.
|
|
27
|
+
|
|
28
|
+
## Running CsvPath
|
|
29
|
+
|
|
30
|
+
There are two classes that do all the work: CsvPath and CsvPaths. Each has very few external methods.
|
|
31
|
+
- CsvPath
|
|
32
|
+
- parse() applies a csvpath to a file
|
|
33
|
+
- next() iterates over the matched rows
|
|
34
|
+
- fast_forward() processes all rows
|
|
35
|
+
- collect() processes all rows and collects the lines that matched as lists
|
|
36
|
+
- CsvPaths
|
|
37
|
+
- csvpath() gets a CsvPath that knows all the file names available
|
|
38
|
+
- set_named_files() sets the file names as a Dict[str,str] of named paths
|
|
39
|
+
- set_file_path() sets the file names from a JSON file of named paths or a single .csv file or a directory of .csv files
|
|
40
|
+
|
|
41
|
+
This is a very basic use of CsvPath. For more usage, see the unit tests.
|
|
42
|
+
|
|
43
|
+
path = CsvPath()
|
|
44
|
+
path.parse("""$test.csv
|
|
45
|
+
[5-25]
|
|
46
|
+
[
|
|
47
|
+
#0=="Frog"
|
|
48
|
+
@lastname.onmatch="Bats"
|
|
49
|
+
count()==2
|
|
50
|
+
]
|
|
51
|
+
""")
|
|
16
52
|
for i, line in enumerate( path.next() ):
|
|
17
53
|
print(f"{i}: {line}")
|
|
18
|
-
|
|
19
54
|
print(f"path vars: {path.variables}")
|
|
20
55
|
|
|
21
|
-
|
|
56
|
+
The csvpath says:
|
|
22
57
|
- Open test.csv
|
|
23
58
|
- Scan lines 5 through 25
|
|
24
59
|
- Match the second time we see a line where the first column equals "Frog" and set the variable called "lastname" to "Bats"
|
|
25
60
|
|
|
61
|
+
Another path that does the same thing a bit more simply might look like:
|
|
62
|
+
|
|
63
|
+
"""$test.csv
|
|
64
|
+
[5-25]
|
|
65
|
+
[
|
|
66
|
+
#0=="Frog"
|
|
67
|
+
@lastname.onmatch="Bats"
|
|
68
|
+
count()==2 -> print( "$.match_count: $.line")
|
|
69
|
+
]
|
|
70
|
+
"""
|
|
71
|
+
|
|
72
|
+
In this case we're using the "when" operator, `->`, to determine when to print.
|
|
73
|
+
|
|
74
|
+
## The print function
|
|
75
|
+
|
|
76
|
+
The `print` function has several uses, including:
|
|
77
|
+
- Debugging csvpaths
|
|
78
|
+
- Validating CSV files
|
|
79
|
+
- Creating new CSV files based on an existing file
|
|
80
|
+
|
|
81
|
+
### Validating CSV
|
|
82
|
+
|
|
83
|
+
CsvPath paths can be used for rules based validation. Rules based validation checks a file against content and structure rules but does not validate the file's structure against a schema. This validation approach is similar to XML's Schematron validation, where XPath rules are applied to XML.
|
|
84
|
+
|
|
85
|
+
There is no "standard" way to do CsvPath validation. The simplest way is to create csvpaths that print a validation message when a rule fails. For example:
|
|
86
|
+
|
|
87
|
+
$test.csv[*][@failed = equals(#firstname, "Frog")
|
|
88
|
+
@failed.asbool -> print("Error: Check line $.line_count for a row with the name Frog")]
|
|
89
|
+
|
|
90
|
+
Several rules can exist in the same csvpath for convenience and/or performance. Alternatively, you can run separate csvpaths for each rule.
|
|
91
|
+
|
|
92
|
+
### Creating new CSV files
|
|
93
|
+
|
|
94
|
+
Csvpaths can use the `print` function to generate new file content on system out. Redirecting the output to a file is an easy way to create a new CSV file based on an existing file. For e.g.
|
|
95
|
+
|
|
96
|
+
$test.csv[*][ line_count()==0 -> print("lastname, firstname, say")
|
|
97
|
+
above(line_count(), 0) -> print("$.headers.lastname, $.headers.firstname, $.headers.say")]
|
|
98
|
+
|
|
99
|
+
This csvpath reorders the headers of the test file at `tests/test_resources/test.csv`. The output file will have a header row.
|
|
100
|
+
|
|
101
|
+
## Named files
|
|
102
|
+
|
|
103
|
+
You can use the `CsvPaths` class to set up a list of named file paths so that you can have more concise csvpaths. Named paths can take the form of:
|
|
104
|
+
- A JSON file with a dictionary of file paths under name keys
|
|
105
|
+
- A dict object passed into the CsvPaths object containing the same named path structure
|
|
106
|
+
- The path to a csv file that will be put into the named paths dict under its name minus extension
|
|
107
|
+
- A file system path pointing to a directory that will be used to populate the named paths dict with all contined files
|
|
108
|
+
|
|
109
|
+
You can then use a csvpath like `$logical_name[*][yes()]` to apply the csvpath to the file named `logical_name` in the CsvPaths object's named paths dict. This use is nearly transparent:
|
|
110
|
+
|
|
111
|
+
paths = CsvPaths(filename = "my_named_paths.json")
|
|
112
|
+
path = paths.csvpath()
|
|
113
|
+
path.parse( """$test[*][#firstname=="Fred"]""" )
|
|
114
|
+
path.collect()
|
|
115
|
+
|
|
116
|
+
If my_named_paths.json contains the following structure, the name `test` will be used to find `tests/test_resources/test.csv`. The parse method will apply the csvpath and the collect method will gather all the matched rows.
|
|
117
|
+
|
|
118
|
+
{ "test":"test/test_resources/test.csv" }
|
|
119
|
+
|
|
26
120
|
# Scanning
|
|
27
121
|
The scanner enumerates lines. For each line returned, the line number, the scanned line count, and the match count are available. The set of line numbers scanned is also available.
|
|
28
122
|
|
|
@@ -35,8 +129,7 @@ The scan part of the path starts with a dollar sign to indicate the root, meanin
|
|
|
35
129
|
- `[1+3-8]` means line 1 and lines 3 through eight
|
|
36
130
|
|
|
37
131
|
# Matching
|
|
38
|
-
The match part is also bracketed. Matches have space separated
|
|
39
|
-
components or "values" that are ANDed together. A match component is one of several types:
|
|
132
|
+
The match part is also bracketed. Matches have space separated components or "values" that are ANDed together. The components' order is important. A match component is one of several types:
|
|
40
133
|
<table>
|
|
41
134
|
<tr>
|
|
42
135
|
<td>Type</td>
|
|
@@ -74,26 +167,36 @@ Qualifiers are described below. </td>
|
|
|
74
167
|
<tr>
|
|
75
168
|
<td>Variable </td>
|
|
76
169
|
<td>Value</td>
|
|
77
|
-
<td>True
|
|
78
|
-
<td>
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
only be set when the row matches all parts of the path.
|
|
170
|
+
<td>True when set unless `onchange` determines True/False.</td>
|
|
171
|
+
<td>
|
|
172
|
+
<p>
|
|
173
|
+
An @ followed by a name. A variable is set or tested depending on the usage. When used as the left hand side of an "=" its value is set. When it is used on either side of an "==" it is an equality test.
|
|
174
|
+
</p>
|
|
175
|
+
<p>
|
|
176
|
+
Variables can take an `onmatch` qualifier to indicate that the variable should only be set when the row matches all parts of the path.
|
|
177
|
+
<p/>
|
|
178
|
+
<p>
|
|
179
|
+
A variable can also take an `onchange` qualifier to make its assignment only match when its value changes. In the usual case, a variable assignment always matches, making it not a factor in the row's matching or not matching. With `onchange` the assignment can determine if the row fails to match the csvpath.
|
|
180
|
+
</p>
|
|
181
|
+
<p>
|
|
182
|
+
Note that at present a variable assignment of an equality test is not possible using `==`. In the future the csvpath grammar may be improved to address this gap. In the interim, use the `equals(value,value)` function. I.e.instead of
|
|
183
|
+
@test = @cat == @hat
|
|
184
|
+
use
|
|
185
|
+
@test = equals(@cat, @hat)
|
|
186
|
+
</p>
|
|
84
187
|
<td>
|
|
85
188
|
<li/> `@weather="cloudy"`
|
|
86
189
|
<li/> `count(@weather=="sunny")`
|
|
87
|
-
<li/> `@weather`
|
|
88
190
|
<li/> `#summer==@weather`
|
|
191
|
+
<li/> `@happy.onchange=#weather`
|
|
89
192
|
|
|
90
|
-
#1 is an assignment that sets the variable and returns True. #2 is an argument used as a test in a way that is specific to the function. #3 is
|
|
193
|
+
#1 is an assignment that sets the variable and returns True. #2 is an argument used as a test in a way that is specific to the function. #3 is a test. #4 sets the `happy` variable to the value of the `weather` header and fails the row matching until `happy`'s value changes.
|
|
91
194
|
</td>
|
|
92
195
|
</tr>
|
|
93
196
|
<tr>
|
|
94
197
|
<td>Header </td>
|
|
95
198
|
<td>Value </td>
|
|
96
|
-
<td>
|
|
199
|
+
<td>Calculated</td>
|
|
97
200
|
<td>A # followed by a name or integer. The name references a value in line 0, the header
|
|
98
201
|
row. A number references a column by the 0-based column order. </td>
|
|
99
202
|
<td>
|
|
@@ -113,9 +216,13 @@ only be set when the row matches all parts of the path.
|
|
|
113
216
|
</tr>
|
|
114
217
|
<table>
|
|
115
218
|
|
|
116
|
-
|
|
219
|
+
## Qualifiers
|
|
220
|
+
|
|
221
|
+
Variables and some functions can take qualifiers on their name. A qualifier takes the form of a dot plus a qualification name. At the moment there are only four qualifiers:
|
|
117
222
|
|
|
118
223
|
- `onmatch` to indicate that action on the variable or function only happens when the whole path matches a row
|
|
224
|
+
- `onchange` set on a variable to indicate that a row should only match when the variable is set to a new value
|
|
225
|
+
- `asbool` set on a variable or header to have its value interpreted as a bool rather than just a simple `is not None` test
|
|
119
226
|
- An arbitrary string to add a name for the function's internal use, typically to name a variable
|
|
120
227
|
|
|
121
228
|
Qualifiers look like:
|
|
@@ -128,36 +235,54 @@ Or:
|
|
|
128
235
|
|
|
129
236
|
When multiple qualifiers are used order is not important.
|
|
130
237
|
|
|
131
|
-
##
|
|
132
|
-
[ #common_name #0=="field" @tail.onmatch=end() not(in(@tail, 'short</td><td>medium')) ]
|
|
238
|
+
## Variables
|
|
133
239
|
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
240
|
+
A variable can be assigned early in the match part of a path and used later in that same path. The assignment and use will both be in the context of the same row in the file. For e.g.
|
|
241
|
+
|
|
242
|
+
[@a=#b #c==@a]
|
|
243
|
+
|
|
244
|
+
Can also be written as:
|
|
245
|
+
|
|
246
|
+
[#c==#b]
|
|
140
247
|
|
|
141
|
-
Variables are always set unless they are flagged with `.onmatch
|
|
248
|
+
Variables are always set unless they are flagged with the `.onmatch` qualifier. That means:
|
|
142
249
|
|
|
143
250
|
$file.csv[*][ @imcounting.onmatch = count_lines() no()]
|
|
144
251
|
|
|
145
|
-
will never set `imcounting`, but:
|
|
252
|
+
will never set `imcounting`, because of the `no()` function disallowing any matches, but:
|
|
146
253
|
|
|
147
254
|
$file.csv[*][ @imcounting = count_lines() no()]
|
|
148
255
|
|
|
149
256
|
will always set it.
|
|
150
257
|
|
|
258
|
+
As noted above, a variable can be flagged with the `onchange` qualifier. The effect is that a row will only match if the variable qualified by `onchange` changes in value.
|
|
259
|
+
|
|
260
|
+
## The when operator
|
|
261
|
+
|
|
262
|
+
`->`, the "when" operator, is used to act on a condition. `->` can take an equality or function on the left and trigger an equality, assignment, or function on the right. For e.g.
|
|
263
|
+
|
|
264
|
+
[ last() -> print("this is the last line") ]
|
|
265
|
+
|
|
266
|
+
Prints `this is the last line` just before the scan ends.
|
|
267
|
+
|
|
268
|
+
[ exists(#0) -> @firstname = #0 ]
|
|
269
|
+
|
|
270
|
+
Says to set the `firstname` variable to the value of the first column when the first column has a value.
|
|
271
|
+
|
|
272
|
+
## Match functions
|
|
273
|
+
|
|
151
274
|
Most of the work of matching is done in functions. The match functions are the following.
|
|
152
275
|
|
|
153
276
|
|
|
154
277
|
<table>
|
|
155
278
|
<tr><th> Group </th><th>Function </th><th> What it does </th></tr>
|
|
156
279
|
<tr><td> Boolean </td><td> </td><td> </td></tr>
|
|
280
|
+
<tr><td> </td><td> <a href='csvpath/matching/functions/any.md'>any(value, value)</a> </td><td> existence test across a range of places </td></tr>
|
|
157
281
|
<tr><td> </td><td> <a href='csvpath/matching/functions/no.md'>no()</a> </td><td> always false </td></tr>
|
|
158
282
|
<tr><td> </td><td> not(value) </td><td> negates a value </td></tr>
|
|
159
283
|
<tr><td> </td><td> or(value, value,...) </td><td> match any one </td></tr>
|
|
160
284
|
<tr><td> </td><td> yes() </td><td> always true </td></tr>
|
|
285
|
+
<tr><td> </td><td> exists(value) </td><td> tests if the value exists </td></tr>
|
|
161
286
|
<tr><td> </td><td> <a href='csvpath/matching/functions/in.md'>in(value, list)</a> </td><td> match in a pipe-delimited list </td></tr>
|
|
162
287
|
<tr><td> Math </td><td> </td><td> </td></tr>
|
|
163
288
|
<tr><td> </td><td> add(value, value, ...) </td><td> adds numbers </td></tr>
|
|
@@ -167,7 +292,7 @@ Most of the work of matching is done in functions. The match functions are the f
|
|
|
167
292
|
<tr><td> </td><td> after(value) </td><td> finds things after a date, number, string </td></tr>
|
|
168
293
|
<tr><td> </td><td> before(value) </td><td> finds things before a date, number, string </td></tr>
|
|
169
294
|
<tr><td> Stats </td><td> </td><td> </td></tr>
|
|
170
|
-
<tr><td> </td><td> average(number, type)
|
|
295
|
+
<tr><td> </td><td> <a href='csvpath/matching/functions/average.md'>average(number, type)</a> </td><td> returns the average up to current "line", "scan", "match" </td></tr>
|
|
171
296
|
<tr><td> </td><td> median(value, type) </td><td> median value up to current "line", "scan", "match" </td></tr>
|
|
172
297
|
<tr><td> </td><td> max(value, type) </td><td> largest value seen up to current "line", "scan", "match" </td></tr>
|
|
173
298
|
<tr><td> </td><td> min(value, type) </td><td> smallest value seen up to current "line", "scan", "match" </td></tr>
|
|
@@ -186,16 +311,32 @@ Most of the work of matching is done in functions. The match functions are the f
|
|
|
186
311
|
<tr><td> </td><td> length(value) </td><td> returns the length of the value </td></tr>
|
|
187
312
|
<tr><td> </td><td> lower(value) </td><td> makes value lowercase </td></tr>
|
|
188
313
|
<tr><td> </td><td> regex(regex-string, value) </td><td> match on a regular expression </td></tr>
|
|
314
|
+
<tr><td> </td><td> substring(value, int) </td><td> returns the first n chars from the value </td></tr>
|
|
189
315
|
<tr><td> </td><td> upper(value) </td><td> makes value uppercase </td></tr>
|
|
190
|
-
<tr><td>
|
|
316
|
+
<tr><td> Columns </td><td> </td><td> </td></tr>
|
|
191
317
|
<tr><td> </td><td> end() </td><td> returns the value of the last column </td></tr>
|
|
192
|
-
<tr><td> </td><td>
|
|
318
|
+
<tr><td> </td><td> column(value) </td><td> returns column name for an index or index for a name </td></tr>
|
|
319
|
+
<tr><td> Other </td><td> </td><td> </td></tr>
|
|
320
|
+
<tr><td> </td><td> header() </td><td> indicates to another function to look in headers </td></tr>
|
|
193
321
|
<tr><td> </td><td> <a href='csvpath/matching/functions/now.md'>now(format)</a></td><td> a datetime, optionally formatted </td></tr>
|
|
194
322
|
<tr><td> </td><td> <a href='csvpath/matching/functions/print.md'>print(value, str)</a></td><td> when matches prints the interpolated string </td></tr>
|
|
195
323
|
<tr><td> </td><td> random(starting, ending) </td><td> generates a random int from starting to ending </td>
|
|
324
|
+
<tr><td> </td><td> <a href='csvpath/matching/functions/stop.md'>stop(value)</a> </td><td> stops path scanning if a condition is met </td>
|
|
325
|
+
<tr><td> </td><td> <a href='csvpath/matching/functions/when.md'>when(value, value)</a> </td><td> activate a value when a condition matches </td>
|
|
326
|
+
<tr><td> </td><td> variable() </td><td> indicates to another function to look in variables </td></tr>
|
|
196
327
|
</tr>
|
|
197
328
|
</table>
|
|
198
329
|
|
|
330
|
+
## Another Example
|
|
331
|
+
[ exists(#common_name) #0=="field" @tail.onmatch=end() not(in(@tail, 'short|medium')) ]
|
|
332
|
+
|
|
333
|
+
In the path above, the rules applied are:
|
|
334
|
+
- The exists test of `#common_name` checks if the header named "common_name" has a value. Headers are the values in the 0th line.
|
|
335
|
+
- `#2` means the 3rd column, counting from 0
|
|
336
|
+
- Functions and column references are ANDed together
|
|
337
|
+
- `@tail` creates a variable named "tail" and sets it to the value of the last column if all else matches
|
|
338
|
+
- Functions can contain functions, equality tests, and/or literals
|
|
339
|
+
|
|
199
340
|
# Not Ready For Production
|
|
200
341
|
Anything could change and performance could be better. This project is a hobby.
|
|
201
342
|
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
from csvpath.matching.matcher import Matcher
|
|
2
|
+
from csvpath.matching.expression_encoder import ExpressionEncoder
|
|
3
|
+
from csvpath.scanning.scanner import Scanner
|
|
4
|
+
from csvpath.csvpath import CsvPath
|
|
5
|
+
from csvpath.csvpaths import CsvPaths
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
__all__ = ["CsvPath", "CsvPaths"]
|