csvpath 0.0.463__tar.gz → 0.0.465__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {csvpath-0.0.463 → csvpath-0.0.465}/PKG-INFO +7 -6
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/csvpath.py +2 -2
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/csvpaths.py +15 -4
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/managers/csvpaths_manager.py +18 -4
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/boolean/all.py +1 -1
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/boolean/any.py +1 -1
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/counting/tally.py +7 -1
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/function_factory.py +25 -11
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/lines/first_line.py +3 -3
- csvpath-0.0.465/csvpath/matching/functions/print/table.py +133 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/productions/equality.py +3 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/util/metadata_parser.py +49 -6
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/util/printer.py +2 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/pyproject.toml +2 -1
- {csvpath-0.0.463 → csvpath-0.0.465}/LICENSE +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/README.md +5 -5
- {csvpath-0.0.463 → csvpath-0.0.465}/config/config.ini +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/__init__.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/managers/__init__.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/managers/csvpath_result.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/managers/files_manager.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/managers/results_manager.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/__init__.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/__init__.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/boolean/andf.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/boolean/between.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/boolean/empty.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/boolean/exists.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/boolean/inf.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/boolean/no.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/boolean/notf.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/boolean/orf.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/boolean/yes.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/counting/count.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/counting/count_headers.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/counting/count_lines.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/counting/count_scans.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/counting/every.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/counting/has_matches.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/counting/increment.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/counting/total_lines.py +0 -0
- {csvpath-0.0.463/csvpath/matching/functions/misc → csvpath-0.0.465/csvpath/matching/functions/dates}/datef.py +0 -0
- {csvpath-0.0.463/csvpath/matching/functions/misc → csvpath-0.0.465/csvpath/matching/functions/dates}/now.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/function.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/function_focus.py +0 -0
- {csvpath-0.0.463/csvpath/matching/functions/misc → csvpath-0.0.465/csvpath/matching/functions/headers}/collect.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/headers/end.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/headers/header_name.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/headers/header_names_mismatch.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/headers/headers.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/headers/mismatch.py +0 -0
- {csvpath-0.0.463/csvpath/matching/functions/misc → csvpath-0.0.465/csvpath/matching/functions/headers}/replace.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/headers/reset_headers.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/lines/advance.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/lines/after_blank.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/lines/dups.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/lines/first.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/lines/last.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/lines/stop.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/math/above.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/math/add.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/math/divide.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/math/equals.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/math/mod.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/math/multiply.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/math/round.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/math/subtract.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/math/sum.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/misc/importf.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/misc/intf.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/misc/nonef.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/misc/random.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/print/jinjaf.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/print/print_line.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/print/print_queue.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/print/printf.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/stats/correlate.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/stats/minf.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/stats/percent.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/stats/percent_unique.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/stats/stdev.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/strings/concat.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/strings/length.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/strings/lower.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/strings/metaphone.py +0 -0
- {csvpath-0.0.463/csvpath/matching/functions/misc → csvpath-0.0.465/csvpath/matching/functions/strings}/regex.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/strings/starts_with.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/strings/strip.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/strings/substring.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/strings/upper.py +0 -0
- {csvpath-0.0.463/csvpath/matching/functions/misc → csvpath-0.0.465/csvpath/matching/functions/testing}/debug.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/validation.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/validity/fail.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/validity/failed.py +0 -0
- {csvpath-0.0.463/csvpath/matching/functions/misc → csvpath-0.0.465/csvpath/matching/functions/variables}/get.py +0 -0
- {csvpath-0.0.463/csvpath/matching/functions/misc → csvpath-0.0.465/csvpath/matching/functions/variables}/pushpop.py +0 -0
- {csvpath-0.0.463/csvpath/matching/functions/misc → csvpath-0.0.465/csvpath/matching/functions/variables}/track.py +0 -0
- {csvpath-0.0.463/csvpath/matching/functions/misc → csvpath-0.0.465/csvpath/matching/functions/variables}/variables.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/lark_parser.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/lark_transformer.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/matcher.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/productions/__init__.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/productions/expression.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/productions/header.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/productions/matchable.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/productions/qualified.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/productions/reference.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/productions/term.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/productions/variable.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/util/exceptions.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/util/expression_encoder.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/util/expression_utility.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/util/lark_print_parser.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/util/print_parser.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/scanning/__init__.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/scanning/exceptions.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/scanning/parser.out +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/scanning/parsetab.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/scanning/scanner.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/scanning/scanning_lexer.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/util/config.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/util/config_exception.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/util/error.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/util/exceptions.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/util/last_line_stats.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/util/line_monitor.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/csvpath/util/log_utility.py +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/docs/asbool.md +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/docs/assignment.md +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/docs/config.md +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/docs/examples.md +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/docs/files.md +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/docs/functions/above.md +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/docs/functions/advance.md +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/docs/functions/after_blank.md +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/docs/functions/all.md +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/docs/functions/andor.md +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/docs/functions/any.md +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/docs/functions/average.md +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/docs/functions/between.md +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/docs/functions/collect.md +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/docs/functions/correlate.md +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/docs/functions/count.md +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/docs/functions/count_headers.md +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/docs/functions/date.md +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/docs/functions/empty.md +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/docs/functions/end.md +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/docs/functions/every.md +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/docs/functions/fail.md +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/docs/functions/first.md +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/docs/functions/get.md +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/docs/functions/has_dups.md +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/docs/functions/header.md +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/docs/functions/header_name.md +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/docs/functions/header_names_mismatch.md +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/docs/functions/implementing_functions.md +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/docs/functions/import.md +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/docs/functions/in.md +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/docs/functions/increment.md +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/docs/functions/jinja.md +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/docs/functions/last.md +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/docs/functions/line_number.md +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/docs/functions/max.md +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/docs/functions/metaphone.md +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/docs/functions/mismatch.md +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/docs/functions/no.md +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/docs/functions/not.md +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/docs/functions/now.md +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/docs/functions/percent_unique.md +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/docs/functions/pop.md +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/docs/functions/print.md +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/docs/functions/print_line.md +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/docs/functions/print_queue.md +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/docs/functions/regex.md +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/docs/functions/replace.md +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/docs/functions/reset_headers.md +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/docs/functions/stdev.md +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/docs/functions/stop.md +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/docs/functions/string_functions.md +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/docs/functions/subtract.md +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/docs/functions/sum.md +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/docs/functions/tally.md +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/docs/functions/total_lines.md +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/docs/functions/track.md +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/docs/functions/variables.md +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/docs/functions/variables_and_headers.md +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/docs/functions.md +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/docs/grammar.md +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/docs/headers.md +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/docs/images/logo-wordmark-white-on-black-trimmed-padded.png +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/docs/images/logo-wordmark-white-trimmed.png +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/docs/paths.md +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/docs/qualifiers.md +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/docs/references.md +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/docs/terms.md +0 -0
- {csvpath-0.0.463 → csvpath-0.0.465}/docs/variables.md +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: csvpath
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.465
|
|
4
4
|
Summary: A declarative language for data extraction and validation of CSV files
|
|
5
5
|
Author: David Kershaw
|
|
6
6
|
Author-email: dk107dk@hotmail.com
|
|
@@ -25,6 +25,7 @@ Requires-Dist: lark (>=1.2.2,<2.0.0)
|
|
|
25
25
|
Requires-Dist: pandas (>=2.2.2,<3.0.0)
|
|
26
26
|
Requires-Dist: ply (>=3.11,<4.0)
|
|
27
27
|
Requires-Dist: python-dateutil (>=2.9.0.post0,<3.0.0)
|
|
28
|
+
Requires-Dist: tabulate (>=0.9.0,<0.10.0)
|
|
28
29
|
Project-URL: Csvpath.org, https://www.csvpath.org
|
|
29
30
|
Project-URL: Github, https://github.com/dk107dk/csvpath
|
|
30
31
|
Description-Content-Type: text/markdown
|
|
@@ -34,11 +35,6 @@ Description-Content-Type: text/markdown
|
|
|
34
35
|
|
|
35
36
|
CsvPath defines a declarative syntax for inspecting and validating CSV files.
|
|
36
37
|
|
|
37
|
-
Though much simpler, it is inspired by:
|
|
38
|
-
- XPath. CsvPath is to CSV files like XPath is to XML files.
|
|
39
|
-
- Validation of XML using <a href='https://schematron.com/'>Schematron rules</a>
|
|
40
|
-
- The way CSS selectors pick out HTML structures
|
|
41
|
-
|
|
42
38
|
CsvPath' goal is to make it easy to:
|
|
43
39
|
- Analyze the content and structure of a CSV
|
|
44
40
|
- Validate that the file matches expectations
|
|
@@ -47,6 +43,11 @@ CsvPath' goal is to make it easy to:
|
|
|
47
43
|
|
|
48
44
|
And do it all in an automation-friendly way.
|
|
49
45
|
|
|
46
|
+
Though much simpler, it is inspired by:
|
|
47
|
+
- XPath. CsvPath is to CSV files like XPath is to XML files.
|
|
48
|
+
- Validation of XML using <a href='https://schematron.com/'>Schematron rules</a>
|
|
49
|
+
- The way CSS selectors pick out HTML structures
|
|
50
|
+
|
|
50
51
|
CsvPath is intended to fit with other DataOps and data quality tools. Files are streamed. The interface is simple. New functions are easy to create.
|
|
51
52
|
|
|
52
53
|
Read more about CsvPath and see realistic CSV validation examples at <a href='https://www.csvpath.org'>csvpath.org</a>.
|
|
@@ -377,7 +377,7 @@ class CsvPath(CsvPathPublic, ErrorCollector): # pylint: disable=R0902, R0904
|
|
|
377
377
|
# CsvPaths will do this earlier but it stripped off
|
|
378
378
|
# the comments so we won't find them again
|
|
379
379
|
#
|
|
380
|
-
csvpath = MetadataParser().extract_metadata(instance=self, csvpath=csvpath)
|
|
380
|
+
csvpath = MetadataParser(self).extract_metadata(instance=self, csvpath=csvpath)
|
|
381
381
|
#
|
|
382
382
|
#
|
|
383
383
|
#
|
|
@@ -436,7 +436,7 @@ class CsvPath(CsvPathPublic, ErrorCollector): # pylint: disable=R0902, R0904
|
|
|
436
436
|
len(np),
|
|
437
437
|
)
|
|
438
438
|
path = np[0]
|
|
439
|
-
path = MetadataParser().extract_metadata(instance=self, csvpath=path)
|
|
439
|
+
path = MetadataParser(self).extract_metadata(instance=self, csvpath=path)
|
|
440
440
|
path = self._update_file_path(path)
|
|
441
441
|
dis = self.parse(path, disposably=disposably)
|
|
442
442
|
if disposably is True:
|
|
@@ -185,11 +185,14 @@ class CsvPaths(CsvPathsPublic):
|
|
|
185
185
|
)
|
|
186
186
|
|
|
187
187
|
def _load_csvpath(self, csvpath: CsvPath, path: str, file: str) -> None:
|
|
188
|
+
self.logger.debug("Beginning to load csvpath %s with file %s", path, file)
|
|
188
189
|
# we strip comments from above the path so we need to extract them first
|
|
189
|
-
path = MetadataParser().extract_metadata(instance=csvpath, csvpath=path)
|
|
190
|
-
|
|
190
|
+
path = MetadataParser(self).extract_metadata(instance=csvpath, csvpath=path)
|
|
191
|
+
self.logger.debug("Csvpath after metadata extract: %s", path)
|
|
191
192
|
f = path.find("[")
|
|
193
|
+
self.logger.debug("Csvpath matching part starts at char # %s", f)
|
|
192
194
|
apath = f"${file}{path[f:]}"
|
|
195
|
+
self.logger.info("Parsing csvpath %s", apath)
|
|
193
196
|
csvpath.parse(apath)
|
|
194
197
|
|
|
195
198
|
def fast_forward_paths(self, *, pathsname, filename):
|
|
@@ -204,17 +207,25 @@ class CsvPaths(CsvPathsPublic):
|
|
|
204
207
|
self.logger.info("Cleaning out any %s and %s results", filename, pathsname)
|
|
205
208
|
self.clean(paths=pathsname)
|
|
206
209
|
self.logger.info(
|
|
207
|
-
"Beginning fast_forward_paths %s with %s paths",
|
|
210
|
+
"Beginning fast_forward_paths %s with %s paths against file %s",
|
|
211
|
+
pathsname,
|
|
212
|
+
len(paths),
|
|
213
|
+
filename,
|
|
208
214
|
)
|
|
209
215
|
for i, path in enumerate(paths):
|
|
210
216
|
csvpath = self.csvpath()
|
|
217
|
+
self.logger.info("Beginning CsvPath instance: %s", csvpath)
|
|
211
218
|
result = CsvPathResult(
|
|
212
219
|
csvpath=csvpath, file_name=filename, paths_name=pathsname
|
|
213
220
|
)
|
|
214
221
|
try:
|
|
215
222
|
self.results_manager.add_named_result(result)
|
|
216
223
|
self._load_csvpath(csvpath, path=path, file=file)
|
|
217
|
-
self.logger.info(
|
|
224
|
+
self.logger.info(
|
|
225
|
+
"Parsed csvpath %s pointed at %s and starting to fast-forward",
|
|
226
|
+
i,
|
|
227
|
+
file,
|
|
228
|
+
)
|
|
218
229
|
csvpath.fast_forward()
|
|
219
230
|
self.logger.info(
|
|
220
231
|
"Completed fast forward of csvpath %s against %s", i, file
|
|
@@ -26,7 +26,7 @@ class CsvPathsManager(ABC):
|
|
|
26
26
|
contents of the file is straight cvspath, not json."""
|
|
27
27
|
|
|
28
28
|
@abstractmethod
|
|
29
|
-
def
|
|
29
|
+
def add_named_paths_from_json(self, file_path: str) -> None:
|
|
30
30
|
"""replaces the named paths dict with a dict found in a JSON file. lists
|
|
31
31
|
of paths are keyed by names."""
|
|
32
32
|
|
|
@@ -101,6 +101,7 @@ class PathsManager(CsvPathsManager): # pylint: disable=C0115, C0116
|
|
|
101
101
|
ErrorHandler(self.csvpaths).handle_error(ie)
|
|
102
102
|
|
|
103
103
|
def add_named_paths_from_file(self, *, name: str, file_path: str) -> None:
|
|
104
|
+
self.csvpaths.logger.debug("Reading csvpaths file at %s", file_path)
|
|
104
105
|
with open(file_path, "r", encoding="utf-8") as f:
|
|
105
106
|
cp = f.read()
|
|
106
107
|
_ = [
|
|
@@ -108,12 +109,15 @@ class PathsManager(CsvPathsManager): # pylint: disable=C0115, C0116
|
|
|
108
109
|
for apath in cp.split(PathsManager.MARKER)
|
|
109
110
|
if apath.strip() != ""
|
|
110
111
|
]
|
|
112
|
+
self.csvpaths.logger.debug("Found %s csvpaths in file", len(_))
|
|
111
113
|
self.add_named_paths(name, _)
|
|
112
114
|
|
|
113
|
-
def
|
|
115
|
+
def add_named_paths_from_json(self, file_path: str) -> None:
|
|
114
116
|
try:
|
|
117
|
+
self.csvpaths.logger.debug("Opening JSON file at %s", file_path)
|
|
115
118
|
with open(file_path, encoding="utf-8") as f:
|
|
116
119
|
j = json.load(f)
|
|
120
|
+
self.csvpaths.logger.debug("Found JSON file with %s keys", len(j))
|
|
117
121
|
for k in j:
|
|
118
122
|
v = j[k]
|
|
119
123
|
for f in v:
|
|
@@ -130,14 +134,24 @@ class PathsManager(CsvPathsManager): # pylint: disable=C0115, C0116
|
|
|
130
134
|
set_named_paths_from_json."""
|
|
131
135
|
)
|
|
132
136
|
ErrorHandler(self.csvpaths).handle_error(ie)
|
|
133
|
-
|
|
137
|
+
self.csvpaths.logger.debug("Adding csvpaths to named-paths group %s", name)
|
|
134
138
|
if name in self.named_paths:
|
|
135
139
|
for p in paths:
|
|
136
140
|
if p in self.named_paths[name]:
|
|
141
|
+
self.csvpaths.logger.debug(
|
|
142
|
+
"csvpaths %s already exists in named-paths group %s", p, name
|
|
143
|
+
)
|
|
137
144
|
pass
|
|
138
145
|
else:
|
|
139
|
-
self.
|
|
146
|
+
self.csvpaths.logger.debug("Adding %s to %s", p, name)
|
|
147
|
+
if isinstance(self.named_paths[name], str):
|
|
148
|
+
ps = []
|
|
149
|
+
ps.append(self.named_paths[name])
|
|
150
|
+
self.named_paths[name] = ps
|
|
151
|
+
self.named_paths[name].append(p)
|
|
140
152
|
else:
|
|
153
|
+
for _ in paths:
|
|
154
|
+
self.csvpaths.logger.debug("Adding %s to %s", _, name)
|
|
141
155
|
self.named_paths[name] = paths
|
|
142
156
|
|
|
143
157
|
#
|
|
@@ -4,7 +4,7 @@ from typing import Any
|
|
|
4
4
|
from csvpath.matching.productions import Equality
|
|
5
5
|
from csvpath.matching.util.exceptions import ChildrenException
|
|
6
6
|
from ..function_focus import MatchDecider
|
|
7
|
-
from ..
|
|
7
|
+
from ..variables.variables import Variables
|
|
8
8
|
from ..headers.headers import Headers
|
|
9
9
|
|
|
10
10
|
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
from csvpath.matching.productions import Equality, Term
|
|
3
3
|
from csvpath.matching.util.exceptions import ChildrenException
|
|
4
4
|
from csvpath.matching.util.expression_utility import ExpressionUtility
|
|
5
|
-
from ..
|
|
5
|
+
from ..variables.variables import Variables
|
|
6
6
|
from ..function_focus import MatchDecider
|
|
7
7
|
from ..headers.headers import Headers
|
|
8
8
|
|
|
@@ -18,18 +18,24 @@ class Tally(ValueProducer):
|
|
|
18
18
|
else:
|
|
19
19
|
siblings = [child]
|
|
20
20
|
tally = ""
|
|
21
|
+
|
|
21
22
|
for _ in siblings:
|
|
22
23
|
tally += f"{_.to_value(skip=skip)}|"
|
|
23
24
|
value = f"{_.to_value(skip=skip)}"
|
|
24
25
|
self._store(_.name, value)
|
|
25
26
|
if len(siblings) > 1:
|
|
26
27
|
self._store(
|
|
27
|
-
|
|
28
|
+
"", # we don't need to pass a name. this data just
|
|
29
|
+
# goes under "tally" or the qualifier
|
|
28
30
|
tally[0 : len(tally) - 1],
|
|
29
31
|
)
|
|
30
32
|
self.value = True
|
|
31
33
|
|
|
32
34
|
def _store(self, name, value):
|
|
35
|
+
if name == "":
|
|
36
|
+
name = self.first_non_term_qualifier("tally")
|
|
37
|
+
else:
|
|
38
|
+
name = f"""{self.first_non_term_qualifier("tally")}_{name}"""
|
|
33
39
|
count = self.matcher.get_variable(name, tracking=value)
|
|
34
40
|
if count is None:
|
|
35
41
|
count = 0
|
|
@@ -1,12 +1,15 @@
|
|
|
1
1
|
# pylint: disable=C0114
|
|
2
2
|
from csvpath.matching.productions.expression import Matchable
|
|
3
3
|
from .function import Function
|
|
4
|
+
from .dates.now import Now
|
|
5
|
+
from .dates.datef import Date
|
|
4
6
|
from .strings.lower import Lower
|
|
5
7
|
from .strings.upper import Upper
|
|
6
8
|
from .strings.substring import Substring
|
|
7
9
|
from .strings.starts_with import StartsWith
|
|
8
10
|
from .strings.strip import Strip
|
|
9
11
|
from .strings.length import Length, MinMaxLength
|
|
12
|
+
from .strings.regex import Regex
|
|
10
13
|
from .strings.concat import Concat
|
|
11
14
|
from .strings.metaphone import Metaphone
|
|
12
15
|
from .counting.count import Count
|
|
@@ -21,6 +24,8 @@ from .counting.increment import Increment
|
|
|
21
24
|
from .headers.reset_headers import ResetHeaders
|
|
22
25
|
from .headers.header_name import HeaderName
|
|
23
26
|
from .headers.header_names_mismatch import HeaderNamesMismatch
|
|
27
|
+
from .headers.collect import Collect
|
|
28
|
+
from .headers.replace import Replace
|
|
24
29
|
from .headers.headers import Headers
|
|
25
30
|
from .headers.mismatch import Mismatch
|
|
26
31
|
from .headers.end import End
|
|
@@ -50,6 +55,7 @@ from .stats.percent_unique import PercentUnique
|
|
|
50
55
|
from .stats.stdev import Stdev
|
|
51
56
|
from .stats.correlate import Correlate
|
|
52
57
|
from .print.printf import Print
|
|
58
|
+
from .print.table import HeaderTable, RowTable, VarTable
|
|
53
59
|
from .print.print_line import PrintLine
|
|
54
60
|
from .print.jinjaf import Jinjaf
|
|
55
61
|
from .print.print_queue import PrintQueue
|
|
@@ -60,20 +66,15 @@ from .lines.dups import HasDups, DupLines, CountDups
|
|
|
60
66
|
from .lines.first_line import FirstLine
|
|
61
67
|
from .lines.advance import Advance
|
|
62
68
|
from .lines.after_blank import AfterBlank
|
|
69
|
+
from .variables.variables import Variables
|
|
70
|
+
from .variables.pushpop import Push, PushDistinct, Pop, Peek, PeekSize, Stack
|
|
71
|
+
from .variables.get import Get
|
|
72
|
+
from .variables.track import Track
|
|
63
73
|
from .misc.random import Random
|
|
64
|
-
from .misc.regex import Regex
|
|
65
|
-
from .misc.now import Now
|
|
66
|
-
from .misc.variables import Variables
|
|
67
74
|
from .misc.nonef import Nonef
|
|
68
|
-
from .misc.pushpop import Push, PushDistinct, Pop, Peek, PeekSize, Stack
|
|
69
|
-
from .misc.datef import Date
|
|
70
|
-
from .misc.collect import Collect
|
|
71
|
-
from .misc.replace import Replace
|
|
72
75
|
from .misc.intf import Int
|
|
73
|
-
from .misc.get import Get
|
|
74
|
-
from .misc.track import Track
|
|
75
76
|
from .misc.importf import Import
|
|
76
|
-
from .
|
|
77
|
+
from .testing.debug import Debug, BriefStackTrace, VoteStack, DoWhenStack
|
|
77
78
|
from .validity.failed import Failed
|
|
78
79
|
from .validity.fail import Fail
|
|
79
80
|
|
|
@@ -176,7 +177,14 @@ class FunctionFactory:
|
|
|
176
177
|
f = AboveBelow(matcher, name, child)
|
|
177
178
|
elif name == "first":
|
|
178
179
|
f = First(matcher, name, child)
|
|
179
|
-
elif name in [
|
|
180
|
+
elif name in [
|
|
181
|
+
"firstline",
|
|
182
|
+
"firstmatch",
|
|
183
|
+
"firstscan",
|
|
184
|
+
"first_line",
|
|
185
|
+
"first_scan",
|
|
186
|
+
"first_match",
|
|
187
|
+
]:
|
|
180
188
|
f = FirstLine(matcher, name, child)
|
|
181
189
|
elif name == "count_lines":
|
|
182
190
|
f = CountLines(matcher, name, child)
|
|
@@ -337,6 +345,12 @@ class FunctionFactory:
|
|
|
337
345
|
f = DoWhenStack(matcher, name, child)
|
|
338
346
|
elif name == "metaphone":
|
|
339
347
|
f = Metaphone(matcher, name, child)
|
|
348
|
+
elif name == "header_table":
|
|
349
|
+
f = HeaderTable(matcher, name, child)
|
|
350
|
+
elif name == "row_table":
|
|
351
|
+
f = RowTable(matcher, name, child)
|
|
352
|
+
elif name == "var_table":
|
|
353
|
+
f = VarTable(matcher, name, child)
|
|
340
354
|
else:
|
|
341
355
|
if (
|
|
342
356
|
f is None
|
|
@@ -22,16 +22,16 @@ class FirstLine(MatchDecider):
|
|
|
22
22
|
|
|
23
23
|
def _decide_match(self, skip=None) -> None:
|
|
24
24
|
t = self.name
|
|
25
|
-
if t
|
|
25
|
+
if t in ["firstmatch", "first_match"]:
|
|
26
26
|
if self.matcher.csvpath.match_count == 0 and self.line_matches(): # 1-based
|
|
27
27
|
self.match = True
|
|
28
28
|
else:
|
|
29
29
|
self.match = False
|
|
30
|
-
elif t
|
|
30
|
+
elif t in ["firstscan", "first_scan"]:
|
|
31
31
|
self.match = (
|
|
32
32
|
self.matcher.csvpath.scan_count == 1
|
|
33
33
|
) # 1-based, set before matcher is called.
|
|
34
|
-
elif t
|
|
34
|
+
elif t in ["firstline", "first_line"]:
|
|
35
35
|
self.match = (
|
|
36
36
|
self.matcher.csvpath.line_monitor.data_line_number == 0
|
|
37
37
|
) # 0-based, updated after matcher is called.
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
# pylint: disable=C0114
|
|
2
|
+
import textwrap
|
|
3
|
+
from tabulate import tabulate
|
|
4
|
+
from ..function_focus import SideEffect
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class HeaderTable(SideEffect):
|
|
8
|
+
"""prints a header table"""
|
|
9
|
+
|
|
10
|
+
def check_valid(self) -> None:
|
|
11
|
+
self.validate_zero_args()
|
|
12
|
+
super().check_valid()
|
|
13
|
+
|
|
14
|
+
def _produce_value(self, skip=None) -> None:
|
|
15
|
+
self.value = self.matches(skip=skip)
|
|
16
|
+
|
|
17
|
+
def _decide_match(self, skip=None) -> None:
|
|
18
|
+
table = []
|
|
19
|
+
headers = ["#N", "#Name"]
|
|
20
|
+
for i, h in enumerate(self.matcher.csvpath.headers):
|
|
21
|
+
table.append([i, h])
|
|
22
|
+
self.matcher.csvpath.print(
|
|
23
|
+
tabulate(table, headers=headers, tablefmt="simple_grid")
|
|
24
|
+
)
|
|
25
|
+
self.match = self.default_match()
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class RowTable(SideEffect):
|
|
29
|
+
"""prints a row table"""
|
|
30
|
+
|
|
31
|
+
def check_valid(self) -> None:
|
|
32
|
+
self.validate_zero_one_or_two_args()
|
|
33
|
+
super().check_valid()
|
|
34
|
+
|
|
35
|
+
def _produce_value(self, skip=None) -> None:
|
|
36
|
+
self.value = self.matches(skip=skip)
|
|
37
|
+
|
|
38
|
+
def _decide_match(self, skip=None) -> None:
|
|
39
|
+
v1 = self._value_one()
|
|
40
|
+
v2 = self._value_two()
|
|
41
|
+
i = -1
|
|
42
|
+
j = -1
|
|
43
|
+
if v1 is None and v2 is None:
|
|
44
|
+
i = 0
|
|
45
|
+
j = len(self.matcher.csvpath.headers)
|
|
46
|
+
elif v2 is None:
|
|
47
|
+
i = v1
|
|
48
|
+
j = i
|
|
49
|
+
else:
|
|
50
|
+
i = v1
|
|
51
|
+
j = v2
|
|
52
|
+
headers = []
|
|
53
|
+
row = None
|
|
54
|
+
print(f"tables.i: {i}, {j}")
|
|
55
|
+
if i == j:
|
|
56
|
+
headers.append(self.matcher.csvpath.headers[i])
|
|
57
|
+
row = [[self.matcher.line[i]]]
|
|
58
|
+
else:
|
|
59
|
+
for k, h in enumerate(self.matcher.csvpath.headers[i : j + 1]):
|
|
60
|
+
headers.append(f"#{h} (#{k + i})")
|
|
61
|
+
row = [self.matcher.line[i : j + 1]]
|
|
62
|
+
|
|
63
|
+
self.matcher.csvpath.print(
|
|
64
|
+
tabulate(row, headers=headers, tablefmt="simple_grid")
|
|
65
|
+
)
|
|
66
|
+
self.match = self.default_match()
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class VarTable(SideEffect):
|
|
70
|
+
"""prints a variables table"""
|
|
71
|
+
|
|
72
|
+
def check_valid(self) -> None:
|
|
73
|
+
self.validate_zero_or_more_args()
|
|
74
|
+
super().check_valid()
|
|
75
|
+
|
|
76
|
+
def _produce_value(self, skip=None) -> None:
|
|
77
|
+
self.value = self.matches(skip=skip)
|
|
78
|
+
|
|
79
|
+
def _decide_match(self, skip=None) -> None:
|
|
80
|
+
v1 = self._value_one()
|
|
81
|
+
v2 = self._value_two()
|
|
82
|
+
if v1 is None:
|
|
83
|
+
self.print_all_vars()
|
|
84
|
+
elif v2 is None:
|
|
85
|
+
self.print_one_var()
|
|
86
|
+
else:
|
|
87
|
+
self.print_some_vars(skip)
|
|
88
|
+
self.match = self.default_match()
|
|
89
|
+
|
|
90
|
+
def print_all_vars(self):
|
|
91
|
+
headers = []
|
|
92
|
+
rows = [[]]
|
|
93
|
+
for k, v in self.matcher.csvpath.variables.items():
|
|
94
|
+
headers.append(k)
|
|
95
|
+
v = str(v)
|
|
96
|
+
if len(v) > 20:
|
|
97
|
+
v = textwrap.fill(v, width=20)
|
|
98
|
+
rows[0].append(v)
|
|
99
|
+
self.matcher.csvpath.print(
|
|
100
|
+
tabulate(rows, headers=headers, tablefmt="simple_grid")
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
def print_one_var(self):
|
|
104
|
+
h = self._value_one()
|
|
105
|
+
headers = [h]
|
|
106
|
+
rows = []
|
|
107
|
+
v = self.matcher.csvpath.variables[h]
|
|
108
|
+
if isinstance(v, list):
|
|
109
|
+
for a in v:
|
|
110
|
+
rows.append([a])
|
|
111
|
+
elif isinstance(v, dict):
|
|
112
|
+
headers.append("Tracking")
|
|
113
|
+
for k, _ in v.items():
|
|
114
|
+
rows.append([k, _])
|
|
115
|
+
self.matcher.csvpath.print(
|
|
116
|
+
tabulate(rows, headers=headers, tablefmt="simple_grid")
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
def print_some_vars(self, skip):
|
|
120
|
+
siblings = self[0].commas_to_list()
|
|
121
|
+
headers = []
|
|
122
|
+
for s in siblings:
|
|
123
|
+
headers.append(s.to_value(skip=skip))
|
|
124
|
+
rows = []
|
|
125
|
+
for h in headers:
|
|
126
|
+
v = self.matcher.csvpath.variables[h]
|
|
127
|
+
v = f"{v}"
|
|
128
|
+
if len(v) > 30:
|
|
129
|
+
v = textwrap.fill(v, width=30)
|
|
130
|
+
rows.append([v])
|
|
131
|
+
self.matcher.csvpath.print(
|
|
132
|
+
tabulate(rows, headers=headers, tablefmt="simple_grid")
|
|
133
|
+
)
|
|
@@ -88,10 +88,13 @@ class Equality(Matchable):
|
|
|
88
88
|
|
|
89
89
|
def commas_to_list(self) -> List[Any]:
|
|
90
90
|
"""gets the children of op==',' equalities as a list of args"""
|
|
91
|
+
"""
|
|
91
92
|
ls = []
|
|
92
93
|
for _ in self.children:
|
|
93
94
|
ls.append(_)
|
|
94
95
|
return ls
|
|
96
|
+
"""
|
|
97
|
+
return self.children[:]
|
|
95
98
|
|
|
96
99
|
def set_operation(self, op): # pylint: disable=C0116
|
|
97
100
|
self.op = op
|
|
@@ -1,7 +1,46 @@
|
|
|
1
|
+
from typing import Tuple
|
|
2
|
+
from .config_exception import ConfigurationException
|
|
3
|
+
from .exceptions import InputException
|
|
4
|
+
|
|
5
|
+
|
|
1
6
|
class MetadataParser:
|
|
7
|
+
def __init__(self, csvpath) -> None:
|
|
8
|
+
if not hasattr(csvpath, "logger"):
|
|
9
|
+
raise ConfigurationException(
|
|
10
|
+
"Log holder cannot be Nothing. You must pass a CsvPaths or CsvPath to MetadataParser."
|
|
11
|
+
)
|
|
12
|
+
self.log_holder = csvpath
|
|
13
|
+
|
|
2
14
|
def extract_metadata(self, *, instance, csvpath: str) -> str:
|
|
3
|
-
"""extracts metadata from
|
|
15
|
+
"""extracts metadata from a comment. the comment is removed.
|
|
16
|
+
at this time we're expecting 0 or 1 comments above the csvpath.
|
|
17
|
+
we do not look below or for secondary comments. both would
|
|
18
|
+
cause errors. we are also not looking within the csvpath. that
|
|
19
|
+
is handled by the matcher's parser and we do not collect
|
|
20
|
+
metadata from internal comments at this time. in principle we
|
|
21
|
+
could run the comments the matching parser finds through this
|
|
22
|
+
parser in order to extract metadata fields. not today's problem
|
|
23
|
+
though.
|
|
24
|
+
"""
|
|
25
|
+
self.log_holder.logger.debug(
|
|
26
|
+
"Beginning to extract metadata from csvpath: %s", csvpath
|
|
27
|
+
)
|
|
28
|
+
csvpath = csvpath.strip()
|
|
29
|
+
if not csvpath[0] in ["$", "~"]:
|
|
30
|
+
raise InputException(f"Csvpath must start with ~ or $, not {csvpath[0]}")
|
|
31
|
+
csvpath2, comment = self.extract_csvpath_and_comment(csvpath)
|
|
32
|
+
comment = comment.strip()
|
|
33
|
+
# if there are any characters in the comment we should parse. 3 is
|
|
34
|
+
# the minimum metadata, because "x:y", but there could be a number or something.
|
|
35
|
+
if len(comment) > 0:
|
|
36
|
+
self.collect_metadata(instance, comment)
|
|
37
|
+
# keep the original comment for future ref
|
|
38
|
+
if not instance.metadata:
|
|
39
|
+
instance.metadata = {}
|
|
40
|
+
instance.metadata["original_comment"] = comment
|
|
41
|
+
return csvpath2
|
|
4
42
|
|
|
43
|
+
def extract_csvpath_and_comment(self, csvpath) -> Tuple[str, str]:
|
|
5
44
|
csvpath2 = ""
|
|
6
45
|
comment = ""
|
|
7
46
|
state = 0 # 0 == outside, 1 == outer comment, 2 == inside
|
|
@@ -37,7 +76,9 @@ class MetadataParser:
|
|
|
37
76
|
comment += c
|
|
38
77
|
elif state == 2:
|
|
39
78
|
csvpath2 += c
|
|
79
|
+
return csvpath2, comment
|
|
40
80
|
|
|
81
|
+
def collect_metadata(self, instance, comment) -> None:
|
|
41
82
|
#
|
|
42
83
|
# pull the metadata out of the comment
|
|
43
84
|
#
|
|
@@ -69,13 +110,15 @@ class MetadataParser:
|
|
|
69
110
|
metafield += c
|
|
70
111
|
current_word = ""
|
|
71
112
|
else:
|
|
113
|
+
if metafield is not None:
|
|
114
|
+
metafield += c
|
|
72
115
|
current_word = ""
|
|
73
116
|
if metaname:
|
|
74
117
|
metadata_fields[metaname] = (
|
|
75
118
|
metafield.strip() if metafield is not None else None
|
|
76
119
|
)
|
|
77
|
-
|
|
78
|
-
if
|
|
79
|
-
instance.metadata =
|
|
80
|
-
|
|
81
|
-
|
|
120
|
+
# add found metadata to instance. keys will overwrite preexisting.
|
|
121
|
+
if not instance.metadata:
|
|
122
|
+
instance.metadata = {}
|
|
123
|
+
for k, v in metadata_fields.items():
|
|
124
|
+
instance.metadata[k] = v
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "csvpath"
|
|
3
|
-
version = "0.0.
|
|
3
|
+
version = "0.0.465"
|
|
4
4
|
description = "A declarative language for data extraction and validation of CSV files"
|
|
5
5
|
authors = ["David Kershaw <dk107dk@hotmail.com>"]
|
|
6
6
|
readme = "README.md"
|
|
@@ -32,6 +32,7 @@ jinja2 = "^3.1.4"
|
|
|
32
32
|
inflect = "^7.3.1"
|
|
33
33
|
lark = "^1.2.2"
|
|
34
34
|
jellyfish = "^1.1.0"
|
|
35
|
+
tabulate = "^0.9.0"
|
|
35
36
|
|
|
36
37
|
[tool.poetry.group.dev.dependencies]
|
|
37
38
|
flake8 = "^7.1.0"
|
|
File without changes
|
|
@@ -3,11 +3,6 @@
|
|
|
3
3
|
|
|
4
4
|
CsvPath defines a declarative syntax for inspecting and validating CSV files.
|
|
5
5
|
|
|
6
|
-
Though much simpler, it is inspired by:
|
|
7
|
-
- XPath. CsvPath is to CSV files like XPath is to XML files.
|
|
8
|
-
- Validation of XML using <a href='https://schematron.com/'>Schematron rules</a>
|
|
9
|
-
- The way CSS selectors pick out HTML structures
|
|
10
|
-
|
|
11
6
|
CsvPath' goal is to make it easy to:
|
|
12
7
|
- Analyze the content and structure of a CSV
|
|
13
8
|
- Validate that the file matches expectations
|
|
@@ -16,6 +11,11 @@ CsvPath' goal is to make it easy to:
|
|
|
16
11
|
|
|
17
12
|
And do it all in an automation-friendly way.
|
|
18
13
|
|
|
14
|
+
Though much simpler, it is inspired by:
|
|
15
|
+
- XPath. CsvPath is to CSV files like XPath is to XML files.
|
|
16
|
+
- Validation of XML using <a href='https://schematron.com/'>Schematron rules</a>
|
|
17
|
+
- The way CSS selectors pick out HTML structures
|
|
18
|
+
|
|
19
19
|
CsvPath is intended to fit with other DataOps and data quality tools. Files are streamed. The interface is simple. New functions are easy to create.
|
|
20
20
|
|
|
21
21
|
Read more about CsvPath and see realistic CSV validation examples at <a href='https://www.csvpath.org'>csvpath.org</a>.
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|