csvpath 0.0.488__tar.gz → 0.0.490__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {csvpath-0.0.488 → csvpath-0.0.490}/PKG-INFO +66 -36
- {csvpath-0.0.488 → csvpath-0.0.490}/README.md +60 -33
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/csvpath.py +102 -20
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/csvpaths.py +66 -22
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/managers/csvpaths_manager.py +161 -29
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/managers/file_manager.py +73 -20
- csvpath-0.0.490/csvpath/managers/file_registrar.py +204 -0
- csvpath-0.0.490/csvpath/managers/paths_registrar.py +98 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/managers/result.py +30 -1
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/managers/result_serializer.py +31 -7
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/managers/results_manager.py +139 -1
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/function_factory.py +7 -2
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/function_finder.py +4 -0
- csvpath-0.0.490/csvpath/matching/functions/headers/append.py +56 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/headers/collect.py +1 -0
- csvpath-0.0.490/csvpath/matching/functions/misc/fingerprint.py +71 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/matcher.py +1 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/util/print_parser.py +0 -83
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/util/runtime_data_collector.py +21 -3
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/util/cache.py +1 -1
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/util/config.py +60 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/util/error.py +11 -11
- csvpath-0.0.490/csvpath/util/file_readers.py +180 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/util/line_counter.py +2 -2
- csvpath-0.0.490/csvpath/util/pandas_data_reader.py +48 -0
- csvpath-0.0.490/csvpath/util/reference_parser.py +158 -0
- csvpath-0.0.490/csvpath/util/s3_data_reader.py +24 -0
- csvpath-0.0.490/docs/functions/fingerprint.md +38 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions.md +1 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/pyproject.toml +9 -3
- csvpath-0.0.488/config/config copy.ini +0 -29
- csvpath-0.0.488/csvpath/matching/functions/headers/append.py +0 -31
- csvpath-0.0.488/csvpath/util/file_readers.py +0 -86
- {csvpath-0.0.488 → csvpath-0.0.490}/LICENSE +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/config/config.ini +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/__init__.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/managers/__init__.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/__init__.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/__init__.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/args.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/boolean/all.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/boolean/andf.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/boolean/any.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/boolean/between.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/boolean/empty.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/boolean/exists.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/boolean/inf.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/boolean/no.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/boolean/notf.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/boolean/orf.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/boolean/yes.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/counting/count.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/counting/count_headers.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/counting/count_lines.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/counting/count_scans.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/counting/counter.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/counting/every.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/counting/has_matches.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/counting/increment.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/counting/tally.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/counting/total_lines.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/dates/now.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/function.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/function_focus.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/headers/empty_stack.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/headers/end.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/headers/header_name.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/headers/header_names_mismatch.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/headers/headers.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/headers/mismatch.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/headers/replace.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/headers/reset_headers.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/lines/advance.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/lines/after_blank.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/lines/dups.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/lines/first.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/lines/first_line.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/lines/last.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/lines/stop.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/math/above.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/math/add.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/math/divide.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/math/equals.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/math/intf.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/math/mod.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/math/multiply.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/math/round.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/math/subtotal.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/math/subtract.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/math/sum.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/misc/importf.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/misc/random.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/print/jinjaf.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/print/print_line.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/print/print_queue.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/print/printf.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/print/table.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/stats/minf.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/stats/percent.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/stats/percent_unique.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/stats/stdev.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/strings/concat.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/strings/length.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/strings/lower.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/strings/metaphone.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/strings/regex.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/strings/starts_with.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/strings/strip.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/strings/substring.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/strings/upper.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/testing/debug.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/types/__init__.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/types/boolean.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/types/datef.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/types/decimal.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/types/nonef.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/types/string.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/types/type.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/validity/fail.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/validity/failed.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/validity/line.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/variables/get.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/variables/pushpop.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/variables/put.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/variables/track.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/variables/variables.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/lark_parser.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/lark_transformer.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/productions/__init__.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/productions/equality.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/productions/expression.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/productions/header.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/productions/matchable.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/productions/qualified.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/productions/reference.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/productions/term.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/productions/variable.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/util/exceptions.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/util/expression_encoder.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/util/expression_utility.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/util/lark_print_parser.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/scanning/__init__.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/scanning/exceptions.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/scanning/parser.out +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/scanning/parsetab.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/scanning/scanner.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/scanning/scanning_lexer.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/util/config_exception.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/util/exceptions.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/util/last_line_stats.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/util/line_monitor.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/util/log_utility.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/util/metadata_parser.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/util/printer.py +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/asbool.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/assignment.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/comments.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/config.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/examples.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/files.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/above.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/advance.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/after_blank.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/all.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/andor.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/any.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/average.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/between.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/collect.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/correlate.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/count.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/count_headers.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/counter.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/date.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/empty.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/empty_stack.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/end.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/every.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/fail.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/first.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/get.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/has_dups.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/has_matches.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/header.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/header_name.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/header_names_mismatch.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/implementing_functions.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/import.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/in.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/increment.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/intf.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/jinja.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/last.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/line.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/line_number.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/max.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/metaphone.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/mismatch.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/no.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/not.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/now.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/percent_unique.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/pop.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/print.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/print_line.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/print_queue.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/random.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/regex.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/replace.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/reset_headers.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/stdev.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/stop.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/string_functions.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/subtotal.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/subtract.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/sum.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/tally.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/total_lines.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/track.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/types.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/variables.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/variables_and_headers.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/grammar.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/headers.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/images/logo-wordmark-white-on-black-trimmed-padded.png +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/images/logo-wordmark-white-trimmed.png +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/paths.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/printing.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/qualifiers.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/references.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/terms.md +0 -0
- {csvpath-0.0.488 → csvpath-0.0.490}/docs/variables.md +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: csvpath
|
|
3
|
-
Version: 0.0.
|
|
4
|
-
Summary: A declarative language for
|
|
3
|
+
Version: 0.0.490
|
|
4
|
+
Summary: A declarative language for validating CSV, Excel, and other tabular data files
|
|
5
5
|
Author: David Kershaw
|
|
6
6
|
Author-email: dk107dk@hotmail.com
|
|
7
7
|
Requires-Python: >=3.9,<4.0
|
|
@@ -21,37 +21,40 @@ Classifier: Topic :: Software Development :: Quality Assurance
|
|
|
21
21
|
Classifier: Topic :: Software Development :: Testing
|
|
22
22
|
Classifier: Topic :: Text Processing
|
|
23
23
|
Classifier: Topic :: Utilities
|
|
24
|
+
Provides-Extra: pandas
|
|
25
|
+
Provides-Extra: smartopen
|
|
24
26
|
Requires-Dist: inflect (>=7.3.1,<8.0.0)
|
|
25
27
|
Requires-Dist: jinja2 (>=3.1.4,<4.0.0)
|
|
26
28
|
Requires-Dist: lark (>=1.2.2,<2.0.0)
|
|
27
29
|
Requires-Dist: metaphone (>=0.6,<0.7)
|
|
30
|
+
Requires-Dist: pandas (>=2.2.3,<3.0.0) ; extra == "pandas"
|
|
28
31
|
Requires-Dist: ply (>=3.11,<4.0)
|
|
29
32
|
Requires-Dist: pylightxl (>=1.61,<2.0)
|
|
30
33
|
Requires-Dist: python-dateutil (>=2.9.0.post0,<3.0.0)
|
|
31
|
-
Requires-Dist: smart-open[s3] (>=7.0.5,<8.0.0)
|
|
34
|
+
Requires-Dist: smart-open[s3] (>=7.0.5,<8.0.0) ; extra == "smartopen"
|
|
32
35
|
Requires-Dist: tabulate (>=0.9.0,<0.10.0)
|
|
33
36
|
Project-URL: Csvpath.org, https://www.csvpath.org
|
|
34
37
|
Project-URL: Github, https://github.com/csvpath/csvpath.git
|
|
35
38
|
Description-Content-Type: text/markdown
|
|
36
39
|
|
|
37
40
|
|
|
38
|
-
# <img src='https://www.csvpath.org/~gitbook/image?url=https%3A%2F%2F3739708663-files.gitbook.io%2F%7E%2Ffiles%2Fv0%2Fb%2Fgitbook-x-prod.appspot.com%2Fo%2Forganizations%252FMXTJeGvaEsqwNG39F37h%252Fsites%252Fsite_SPBqJ%252Ficon%252FMCSxo7k6rXWnqoPE204u%252Fcsvpath-icon.png%3Falt%3Dmedia%26token%3D28869fdd-d54e-400e-8917-b8097f935f42&width=32&dpr=2&quality=100&sign=71ca9f3e&sv=1'
|
|
41
|
+
# <a href='https://www.csvpath.org/'><img src='https://www.csvpath.org/~gitbook/image?url=https%3A%2F%2F3739708663-files.gitbook.io%2F%7E%2Ffiles%2Fv0%2Fb%2Fgitbook-x-prod.appspot.com%2Fo%2Forganizations%252FMXTJeGvaEsqwNG39F37h%252Fsites%252Fsite_SPBqJ%252Ficon%252FMCSxo7k6rXWnqoPE204u%252Fcsvpath-icon.png%3Falt%3Dmedia%26token%3D28869fdd-d54e-400e-8917-b8097f935f42&width=32&dpr=2&quality=100&sign=71ca9f3e&sv=1'/></a> About CsvPath
|
|
39
42
|
|
|
40
|
-
CsvPath defines a declarative syntax for inspecting and validating CSV and Excel files.
|
|
43
|
+
CsvPath defines a declarative syntax for inspecting and validating CSV and Excel files, and other tabular data.
|
|
41
44
|
|
|
42
|
-
CsvPath' goal is to make it easy to:
|
|
43
|
-
-
|
|
44
|
-
-
|
|
45
|
-
-
|
|
46
|
-
-
|
|
45
|
+
CsvPath's goal is to make it easy to setup a Collect, Store, Validate-pattern flat-file landing zone that:
|
|
46
|
+
- Analyzes the content and structure of flat files
|
|
47
|
+
- Validates that files match expectations
|
|
48
|
+
- Reports on content validity
|
|
49
|
+
- Creates new derived files using copy-on-write
|
|
47
50
|
|
|
48
|
-
And
|
|
51
|
+
And does it all in an automation-friendly way.
|
|
49
52
|
|
|
50
|
-
CsvPath is inspired by:
|
|
53
|
+
CsvPath's validation is inspired by:
|
|
51
54
|
- XPath for XML files
|
|
52
55
|
- The ISO standard <a href='https://schematron.com/'>Schematron validation</a>
|
|
53
56
|
|
|
54
|
-
CsvPath is intended to fit with other DataOps and data quality tools. Files are streamed. The interface is simple. New functions are easy to create.
|
|
57
|
+
CsvPath is intended to fit with other DataOps and data quality tools. Files are streamed. The interface is simple. Metadata is plentiful. New functions are easy to create.
|
|
55
58
|
|
|
56
59
|
Read more about CsvPath and see realistic CSV and Excel validation examples at <a href='https://www.csvpath.org'>https://www.csvpath.org</a>.
|
|
57
60
|
|
|
@@ -63,6 +66,7 @@ If you need help, use the <a href='https://www.csvpath.org/getting-started/get-h
|
|
|
63
66
|
# Contents
|
|
64
67
|
|
|
65
68
|
- [Motivation](#motivation)
|
|
69
|
+
- [Install](#install)
|
|
66
70
|
- [High-level Description](#description)
|
|
67
71
|
- [Running CsvPath](#running)
|
|
68
72
|
- [Validation](#validating)
|
|
@@ -92,10 +96,37 @@ CSV files are everywhere!
|
|
|
92
96
|
|
|
93
97
|
A surprisingly large number of companies depend on CSV processing for significant amounts of revenue. Research organizations are awash in CSV. And everyone's favorite issue tracker, database GUI, spreadsheet, APM platform, and most any other type of tool we use day to day uses CSV for sharing. CSV is the lowest of common dominators. Many CSVs are invalid or broken in some way. Often times a lot of manual effort goes into finding problems and fixing them.
|
|
94
98
|
|
|
95
|
-
CsvPath is first and foremost a validation language. It
|
|
99
|
+
CsvPath is first and foremost a validation language. It describes tabular data in simple declarative rules that define what valid means for that data. CsvPath can also extract data, create reports, and do other useful things.
|
|
96
100
|
|
|
97
101
|
CsvPath's goal is to make simple validations almost trivial and more complex situations more manageable. It is a library, not a system, so it relies on being easy to integrate with other DataOps tools.
|
|
98
102
|
|
|
103
|
+
|
|
104
|
+
<a name="install"></a>
|
|
105
|
+
# Install
|
|
106
|
+
|
|
107
|
+
<a href='https://pypi.org/project/csvpath/'>CsvPath is available on PyPi</a>. Install with
|
|
108
|
+
```
|
|
109
|
+
pip install csvpath
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
CsvPath has two optional dependencies:
|
|
113
|
+
|
|
114
|
+
- <a target='_blank' href='https://pypi.org/project/pandas/'>Pandas</a>
|
|
115
|
+
- <a target='_blank' href='https://pypi.org/project/smart-open/'>Smart-open</a>
|
|
116
|
+
|
|
117
|
+
Pandas data frames can be used as a data source, much like Excel or CSV files. Install CsvPath with the Pandas option:
|
|
118
|
+
```
|
|
119
|
+
pip install csvpath[pandas]
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
Smart-open is an option for loading data files directly from S3. Install the Smart Open extra with:
|
|
123
|
+
```
|
|
124
|
+
pip install csvpath[smart-open]
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
Both of these optional dependencies can make it harder to use CsvPath in certain specific use cases. For e.g., using Pandas in an AWS Lambda layer may be less straightforward. If you need the capabilities, they are easy to install, but if you don't CsvPath is lighter weight without.
|
|
128
|
+
|
|
129
|
+
|
|
99
130
|
# Description
|
|
100
131
|
<a name="description"></a>
|
|
101
132
|
|
|
@@ -152,26 +183,25 @@ Two classes provide the functionality: CsvPath and CsvPaths. Each has only a few
|
|
|
152
183
|
### CsvPath
|
|
153
184
|
(<a href='https://github.com/csvpath/csvpath/blob/main/csvpath/csvpath.py'>code</a>)
|
|
154
185
|
The CsvPath class is the basic entry point for running csvpaths.
|
|
155
|
-
|method |function
|
|
156
|
-
|
|
157
|
-
|
|
|
158
|
-
|
|
|
159
|
-
|
|
|
160
|
-
|
|
|
161
|
-
| collect(n) | processes n rows and collects the lines that matched as lists |
|
|
186
|
+
|method |function |
|
|
187
|
+
|----------------------------|-----------------------------------------------------------------|
|
|
188
|
+
| next() | iterates over matched rows returning each matched row as a list |
|
|
189
|
+
| fast_forward() | iterates over the file collecting variables and side effects |
|
|
190
|
+
| advance() | skips forward n rows from within a `for row in path.next()` loop|
|
|
191
|
+
| collect() | processes n rows and collects the lines that matched as lists |
|
|
162
192
|
|
|
163
193
|
### CsvPaths
|
|
164
194
|
(<a href='https://github.com/dk107dk/csvpath/blob/main/csvpath/csvpaths.py'>code</a>)
|
|
165
195
|
The CsvPaths class helps you manage validations of multiple files and/or multiple csvpaths. It coordinates the work of multiple CsvPath instances.
|
|
166
|
-
|method
|
|
167
|
-
|
|
168
|
-
| csvpath()
|
|
169
|
-
| collect_paths()
|
|
170
|
-
| fast_forward_paths()
|
|
171
|
-
| next_paths()
|
|
172
|
-
| collect_by_line()
|
|
173
|
-
| fast_forward_by_line()| Same as CsvPath.fast_forward() but for all paths breadth first
|
|
174
|
-
| next_by_line()
|
|
196
|
+
|method |function |
|
|
197
|
+
|------------------------|-----------------------------------------------------------------|
|
|
198
|
+
| csvpath() | gets a CsvPath object that knows all the file names available |
|
|
199
|
+
| collect_paths() | Same as CsvPath.collect() but for all paths sequentially |
|
|
200
|
+
| fast_forward_paths() | Same as CsvPath.fast_forward() but for all paths sequentially |
|
|
201
|
+
| next_paths() | Same as CsvPath.next() but for all paths sequentially |
|
|
202
|
+
| collect_by_line() | Same as CsvPath.collect() but for all paths breadth first |
|
|
203
|
+
| fast_forward_by_line() | Same as CsvPath.fast_forward() but for all paths breadth first |
|
|
204
|
+
| next_by_line() | Same as CsvPath.next() but for all paths breadth first |
|
|
175
205
|
|
|
176
206
|
To be clear, the purpose of `CsvPaths` is to apply multiple csvpaths per CSV file. Its breadth-first versions of the `collect()`, `fast_forward()`, and `next()` methods attempt to match each csvpath to each row of a CSV file before continuing to the next row. As you can imagine, for very large files this approach is a must.
|
|
177
207
|
|
|
@@ -307,9 +337,9 @@ The match part is also bracketed. Matches have space separated components or "va
|
|
|
307
337
|
## Term
|
|
308
338
|
A string, number, or regular expression value.
|
|
309
339
|
|
|
310
|
-
|Returns | Matches
|
|
311
|
-
|
|
312
|
-
|A value | Always true | `"a value"`
|
|
340
|
+
|Returns | Matches | Examples |
|
|
341
|
+
|--------|-------------|-----------------|
|
|
342
|
+
|A value | Always true | `"a value"` |
|
|
313
343
|
|
|
314
344
|
<a href='https://github.com/dk107dk/csvpath/blob/main/docs/terms.md'>Read about terms here</a>.
|
|
315
345
|
|
|
@@ -317,9 +347,9 @@ A string, number, or regular expression value.
|
|
|
317
347
|
## Function
|
|
318
348
|
A composable unit of functionality called once for every row scanned.
|
|
319
349
|
|
|
320
|
-
|Returns
|
|
321
|
-
|
|
322
|
-
|Calculated | Calculated | `count()`
|
|
350
|
+
|Returns | Matches | Examples |
|
|
351
|
+
|-----------|------------|---------------|
|
|
352
|
+
|Calculated | Calculated | `count()` |
|
|
323
353
|
|
|
324
354
|
<a href='https://github.com/dk107dk/csvpath/blob/main/docs/functions.md'>Read about functions here</a>.
|
|
325
355
|
|
|
@@ -1,21 +1,21 @@
|
|
|
1
1
|
|
|
2
|
-
# <img src='https://www.csvpath.org/~gitbook/image?url=https%3A%2F%2F3739708663-files.gitbook.io%2F%7E%2Ffiles%2Fv0%2Fb%2Fgitbook-x-prod.appspot.com%2Fo%2Forganizations%252FMXTJeGvaEsqwNG39F37h%252Fsites%252Fsite_SPBqJ%252Ficon%252FMCSxo7k6rXWnqoPE204u%252Fcsvpath-icon.png%3Falt%3Dmedia%26token%3D28869fdd-d54e-400e-8917-b8097f935f42&width=32&dpr=2&quality=100&sign=71ca9f3e&sv=1'
|
|
2
|
+
# <a href='https://www.csvpath.org/'><img src='https://www.csvpath.org/~gitbook/image?url=https%3A%2F%2F3739708663-files.gitbook.io%2F%7E%2Ffiles%2Fv0%2Fb%2Fgitbook-x-prod.appspot.com%2Fo%2Forganizations%252FMXTJeGvaEsqwNG39F37h%252Fsites%252Fsite_SPBqJ%252Ficon%252FMCSxo7k6rXWnqoPE204u%252Fcsvpath-icon.png%3Falt%3Dmedia%26token%3D28869fdd-d54e-400e-8917-b8097f935f42&width=32&dpr=2&quality=100&sign=71ca9f3e&sv=1'/></a> About CsvPath
|
|
3
3
|
|
|
4
|
-
CsvPath defines a declarative syntax for inspecting and validating CSV and Excel files.
|
|
4
|
+
CsvPath defines a declarative syntax for inspecting and validating CSV and Excel files, and other tabular data.
|
|
5
5
|
|
|
6
|
-
CsvPath' goal is to make it easy to:
|
|
7
|
-
-
|
|
8
|
-
-
|
|
9
|
-
-
|
|
10
|
-
-
|
|
6
|
+
CsvPath's goal is to make it easy to setup a Collect, Store, Validate-pattern flat-file landing zone that:
|
|
7
|
+
- Analyzes the content and structure of flat files
|
|
8
|
+
- Validates that files match expectations
|
|
9
|
+
- Reports on content validity
|
|
10
|
+
- Creates new derived files using copy-on-write
|
|
11
11
|
|
|
12
|
-
And
|
|
12
|
+
And does it all in an automation-friendly way.
|
|
13
13
|
|
|
14
|
-
CsvPath is inspired by:
|
|
14
|
+
CsvPath's validation is inspired by:
|
|
15
15
|
- XPath for XML files
|
|
16
16
|
- The ISO standard <a href='https://schematron.com/'>Schematron validation</a>
|
|
17
17
|
|
|
18
|
-
CsvPath is intended to fit with other DataOps and data quality tools. Files are streamed. The interface is simple. New functions are easy to create.
|
|
18
|
+
CsvPath is intended to fit with other DataOps and data quality tools. Files are streamed. The interface is simple. Metadata is plentiful. New functions are easy to create.
|
|
19
19
|
|
|
20
20
|
Read more about CsvPath and see realistic CSV and Excel validation examples at <a href='https://www.csvpath.org'>https://www.csvpath.org</a>.
|
|
21
21
|
|
|
@@ -27,6 +27,7 @@ If you need help, use the <a href='https://www.csvpath.org/getting-started/get-h
|
|
|
27
27
|
# Contents
|
|
28
28
|
|
|
29
29
|
- [Motivation](#motivation)
|
|
30
|
+
- [Install](#install)
|
|
30
31
|
- [High-level Description](#description)
|
|
31
32
|
- [Running CsvPath](#running)
|
|
32
33
|
- [Validation](#validating)
|
|
@@ -56,10 +57,37 @@ CSV files are everywhere!
|
|
|
56
57
|
|
|
57
58
|
A surprisingly large number of companies depend on CSV processing for significant amounts of revenue. Research organizations are awash in CSV. And everyone's favorite issue tracker, database GUI, spreadsheet, APM platform, and most any other type of tool we use day to day uses CSV for sharing. CSV is the lowest of common dominators. Many CSVs are invalid or broken in some way. Often times a lot of manual effort goes into finding problems and fixing them.
|
|
58
59
|
|
|
59
|
-
CsvPath is first and foremost a validation language. It
|
|
60
|
+
CsvPath is first and foremost a validation language. It describes tabular data in simple declarative rules that define what valid means for that data. CsvPath can also extract data, create reports, and do other useful things.
|
|
60
61
|
|
|
61
62
|
CsvPath's goal is to make simple validations almost trivial and more complex situations more manageable. It is a library, not a system, so it relies on being easy to integrate with other DataOps tools.
|
|
62
63
|
|
|
64
|
+
|
|
65
|
+
<a name="install"></a>
|
|
66
|
+
# Install
|
|
67
|
+
|
|
68
|
+
<a href='https://pypi.org/project/csvpath/'>CsvPath is available on PyPi</a>. Install with
|
|
69
|
+
```
|
|
70
|
+
pip install csvpath
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
CsvPath has two optional dependencies:
|
|
74
|
+
|
|
75
|
+
- <a target='_blank' href='https://pypi.org/project/pandas/'>Pandas</a>
|
|
76
|
+
- <a target='_blank' href='https://pypi.org/project/smart-open/'>Smart-open</a>
|
|
77
|
+
|
|
78
|
+
Pandas data frames can be used as a data source, much like Excel or CSV files. Install CsvPath with the Pandas option:
|
|
79
|
+
```
|
|
80
|
+
pip install csvpath[pandas]
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
Smart-open is an option for loading data files directly from S3. Install the Smart Open extra with:
|
|
84
|
+
```
|
|
85
|
+
pip install csvpath[smart-open]
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
Both of these optional dependencies can make it harder to use CsvPath in certain specific use cases. For e.g., using Pandas in an AWS Lambda layer may be less straightforward. If you need the capabilities, they are easy to install, but if you don't CsvPath is lighter weight without.
|
|
89
|
+
|
|
90
|
+
|
|
63
91
|
# Description
|
|
64
92
|
<a name="description"></a>
|
|
65
93
|
|
|
@@ -116,26 +144,25 @@ Two classes provide the functionality: CsvPath and CsvPaths. Each has only a few
|
|
|
116
144
|
### CsvPath
|
|
117
145
|
(<a href='https://github.com/csvpath/csvpath/blob/main/csvpath/csvpath.py'>code</a>)
|
|
118
146
|
The CsvPath class is the basic entry point for running csvpaths.
|
|
119
|
-
|method |function
|
|
120
|
-
|
|
121
|
-
|
|
|
122
|
-
|
|
|
123
|
-
|
|
|
124
|
-
|
|
|
125
|
-
| collect(n) | processes n rows and collects the lines that matched as lists |
|
|
147
|
+
|method |function |
|
|
148
|
+
|----------------------------|-----------------------------------------------------------------|
|
|
149
|
+
| next() | iterates over matched rows returning each matched row as a list |
|
|
150
|
+
| fast_forward() | iterates over the file collecting variables and side effects |
|
|
151
|
+
| advance() | skips forward n rows from within a `for row in path.next()` loop|
|
|
152
|
+
| collect() | processes n rows and collects the lines that matched as lists |
|
|
126
153
|
|
|
127
154
|
### CsvPaths
|
|
128
155
|
(<a href='https://github.com/dk107dk/csvpath/blob/main/csvpath/csvpaths.py'>code</a>)
|
|
129
156
|
The CsvPaths class helps you manage validations of multiple files and/or multiple csvpaths. It coordinates the work of multiple CsvPath instances.
|
|
130
|
-
|method
|
|
131
|
-
|
|
132
|
-
| csvpath()
|
|
133
|
-
| collect_paths()
|
|
134
|
-
| fast_forward_paths()
|
|
135
|
-
| next_paths()
|
|
136
|
-
| collect_by_line()
|
|
137
|
-
| fast_forward_by_line()| Same as CsvPath.fast_forward() but for all paths breadth first
|
|
138
|
-
| next_by_line()
|
|
157
|
+
|method |function |
|
|
158
|
+
|------------------------|-----------------------------------------------------------------|
|
|
159
|
+
| csvpath() | gets a CsvPath object that knows all the file names available |
|
|
160
|
+
| collect_paths() | Same as CsvPath.collect() but for all paths sequentially |
|
|
161
|
+
| fast_forward_paths() | Same as CsvPath.fast_forward() but for all paths sequentially |
|
|
162
|
+
| next_paths() | Same as CsvPath.next() but for all paths sequentially |
|
|
163
|
+
| collect_by_line() | Same as CsvPath.collect() but for all paths breadth first |
|
|
164
|
+
| fast_forward_by_line() | Same as CsvPath.fast_forward() but for all paths breadth first |
|
|
165
|
+
| next_by_line() | Same as CsvPath.next() but for all paths breadth first |
|
|
139
166
|
|
|
140
167
|
To be clear, the purpose of `CsvPaths` is to apply multiple csvpaths per CSV file. Its breadth-first versions of the `collect()`, `fast_forward()`, and `next()` methods attempt to match each csvpath to each row of a CSV file before continuing to the next row. As you can imagine, for very large files this approach is a must.
|
|
141
168
|
|
|
@@ -271,9 +298,9 @@ The match part is also bracketed. Matches have space separated components or "va
|
|
|
271
298
|
## Term
|
|
272
299
|
A string, number, or regular expression value.
|
|
273
300
|
|
|
274
|
-
|Returns | Matches
|
|
275
|
-
|
|
276
|
-
|A value | Always true | `"a value"`
|
|
301
|
+
|Returns | Matches | Examples |
|
|
302
|
+
|--------|-------------|-----------------|
|
|
303
|
+
|A value | Always true | `"a value"` |
|
|
277
304
|
|
|
278
305
|
<a href='https://github.com/dk107dk/csvpath/blob/main/docs/terms.md'>Read about terms here</a>.
|
|
279
306
|
|
|
@@ -281,9 +308,9 @@ A string, number, or regular expression value.
|
|
|
281
308
|
## Function
|
|
282
309
|
A composable unit of functionality called once for every row scanned.
|
|
283
310
|
|
|
284
|
-
|Returns
|
|
285
|
-
|
|
286
|
-
|Calculated | Calculated | `count()`
|
|
311
|
+
|Returns | Matches | Examples |
|
|
312
|
+
|-----------|------------|---------------|
|
|
313
|
+
|Calculated | Calculated | `count()` |
|
|
287
314
|
|
|
288
315
|
<a href='https://github.com/dk107dk/csvpath/blob/main/docs/functions.md'>Read about functions here</a>.
|
|
289
316
|
|
|
@@ -27,7 +27,7 @@ from .util.exceptions import (
|
|
|
27
27
|
)
|
|
28
28
|
from .matching.util.exceptions import MatchException
|
|
29
29
|
from csvpath.util.printer import Printer
|
|
30
|
-
from csvpath.util.file_readers import
|
|
30
|
+
from csvpath.util.file_readers import DataFileReader
|
|
31
31
|
|
|
32
32
|
|
|
33
33
|
class CsvPathPublic(ABC):
|
|
@@ -35,7 +35,10 @@ class CsvPathPublic(ABC):
|
|
|
35
35
|
|
|
36
36
|
@abstractmethod
|
|
37
37
|
def parse(self, csvpath): # pragma: no cover
|
|
38
|
-
"""Reads a csvpath prepares to match against CSV file lines
|
|
38
|
+
"""Reads a csvpath prepares to match against CSV file lines. This
|
|
39
|
+
method is an alternative to simply passing the csvpath string to the
|
|
40
|
+
collect, next, or fast_forward methods. You don't do both.
|
|
41
|
+
"""
|
|
39
42
|
|
|
40
43
|
@abstractmethod
|
|
41
44
|
def parse_named_path(
|
|
@@ -55,24 +58,31 @@ class CsvPathPublic(ABC):
|
|
|
55
58
|
using the stop() function"""
|
|
56
59
|
|
|
57
60
|
@abstractmethod
|
|
58
|
-
def collect(
|
|
59
|
-
|
|
61
|
+
def collect(
|
|
62
|
+
self, csvpath: str = None, *, nexts: int = -1
|
|
63
|
+
) -> List[List[Any]]: # pragma: no cover
|
|
64
|
+
"""Returns the lines of a CSV file that match the csvpath. Pass
|
|
65
|
+
nexts to limit a run to collecting only N lines; the default
|
|
66
|
+
is -1 for collecting all. If you do not pass the csvpath
|
|
67
|
+
string here you must first use the parse method."""
|
|
60
68
|
|
|
61
69
|
@abstractmethod
|
|
62
70
|
def advance(self, ff: int = -1) -> None: # pragma: no cover
|
|
63
71
|
"""Advances the iteration by ff rows. -1 means to the end of the file."""
|
|
64
72
|
|
|
65
73
|
@abstractmethod
|
|
66
|
-
def fast_forward(self) -> None: # pragma: no cover
|
|
74
|
+
def fast_forward(self, csvpath: str = None) -> None: # pragma: no cover
|
|
67
75
|
"""Scans to the end of the CSV file. All scanned rows will be
|
|
68
76
|
considered for match and variables and side effects will happen,
|
|
69
77
|
but no rows will be returned or stored. -1 means to the end of
|
|
70
|
-
the file.
|
|
78
|
+
the file. If you do not pass the csvpath string here you must first
|
|
79
|
+
use the parse method."""
|
|
71
80
|
|
|
72
81
|
@abstractmethod
|
|
73
|
-
def next(self): # pragma: no cover
|
|
82
|
+
def next(self, csvpath: str = None): # pragma: no cover
|
|
74
83
|
"""A generator function that steps through the CSV file returning
|
|
75
|
-
matching rows
|
|
84
|
+
matching rows. If you do not pass the csvpath string here you must
|
|
85
|
+
first use the parse method."""
|
|
76
86
|
|
|
77
87
|
|
|
78
88
|
class CsvPath(CsvPathPublic, ErrorCollector, Printer): # pylint: disable=R0902, R0904
|
|
@@ -290,6 +300,52 @@ class CsvPath(CsvPathPublic, ErrorCollector, Printer): # pylint: disable=R0902,
|
|
|
290
300
|
self._created_at = datetime.now()
|
|
291
301
|
self._run_started_at = None
|
|
292
302
|
|
|
303
|
+
self._collecting = False
|
|
304
|
+
self._unmatched = None
|
|
305
|
+
self._unmatched_available = False
|
|
306
|
+
self._data_from_preceding = False
|
|
307
|
+
|
|
308
|
+
@property
|
|
309
|
+
def data_from_preceding(self) -> bool:
|
|
310
|
+
return self._data_from_preceding
|
|
311
|
+
|
|
312
|
+
@data_from_preceding.setter
|
|
313
|
+
def data_from_preceding(self, dfp: bool) -> None:
|
|
314
|
+
self._data_from_preceding = dfp
|
|
315
|
+
|
|
316
|
+
@property
|
|
317
|
+
def unmatched(self) -> list[list[Any]]:
|
|
318
|
+
return self._unmatched
|
|
319
|
+
|
|
320
|
+
@unmatched.setter
|
|
321
|
+
def unmatched(self, lines: list[list[Any]]) -> None:
|
|
322
|
+
self._unmatched = lines
|
|
323
|
+
|
|
324
|
+
@property
|
|
325
|
+
def collecting(self) -> bool:
|
|
326
|
+
return self._collecting
|
|
327
|
+
|
|
328
|
+
@collecting.setter
|
|
329
|
+
def collecting(self, c: bool) -> None:
|
|
330
|
+
self._collecting = c
|
|
331
|
+
|
|
332
|
+
def set_unmatched_availability(self) -> None:
|
|
333
|
+
um = self.metadata.get("unmatched-mode")
|
|
334
|
+
if um is not None and um.find("no-keep") > -1:
|
|
335
|
+
self.unmatched_available = False
|
|
336
|
+
elif um is not None and um.find("keep") > -1:
|
|
337
|
+
self.unmatched_available = True
|
|
338
|
+
else:
|
|
339
|
+
self.unmatched_available = False
|
|
340
|
+
|
|
341
|
+
@property
|
|
342
|
+
def unmatched_available(self) -> bool:
|
|
343
|
+
return self._unmatched_available
|
|
344
|
+
|
|
345
|
+
@unmatched_available.setter
|
|
346
|
+
def unmatched_available(self, ua: bool) -> None:
|
|
347
|
+
self._unmatched_available = ua
|
|
348
|
+
|
|
293
349
|
@property
|
|
294
350
|
def created_at(self) -> datetime:
|
|
295
351
|
return self._created_at
|
|
@@ -652,6 +708,9 @@ class CsvPath(CsvPathPublic, ErrorCollector, Printer): # pylint: disable=R0902,
|
|
|
652
708
|
# - return-mode: matches | no-matches
|
|
653
709
|
# - print-mode: default | no-default
|
|
654
710
|
# - validation-mode: (no-)print | log | (no-)raise | quiet | (no-)match
|
|
711
|
+
# - run-mode: no-run | run
|
|
712
|
+
# - unmatched-mode: no-keep | keep
|
|
713
|
+
# - source-mode: preceding | origin
|
|
655
714
|
#
|
|
656
715
|
self.update_logic_mode_if()
|
|
657
716
|
self.update_run_mode_if()
|
|
@@ -659,6 +718,18 @@ class CsvPath(CsvPathPublic, ErrorCollector, Printer): # pylint: disable=R0902,
|
|
|
659
718
|
self.update_print_mode_if()
|
|
660
719
|
self.update_explain_mode_if()
|
|
661
720
|
self.update_arg_validation_mode_if()
|
|
721
|
+
self.update_unmatched_mode_if()
|
|
722
|
+
self.update_data_from_preceding_if()
|
|
723
|
+
|
|
724
|
+
def update_data_from_preceding_if(self) -> None:
|
|
725
|
+
if self.metadata and "source-mode" in self.metadata:
|
|
726
|
+
dfp = self.metadata["source-mode"]
|
|
727
|
+
self.data_from_preceding = dfp == "preceding"
|
|
728
|
+
else:
|
|
729
|
+
self.data_from_preceding = False
|
|
730
|
+
|
|
731
|
+
def update_unmatched_mode_if(self) -> None:
|
|
732
|
+
self.set_unmatched_availability()
|
|
662
733
|
|
|
663
734
|
def update_arg_validation_mode_if(self) -> None:
|
|
664
735
|
if self.metadata and "validation-mode" in self.metadata:
|
|
@@ -938,13 +1009,16 @@ class CsvPath(CsvPathPublic, ErrorCollector, Printer): # pylint: disable=R0902,
|
|
|
938
1009
|
# collect(), fast_forward(), and next() are the central methods of CsvPath.
|
|
939
1010
|
#
|
|
940
1011
|
#
|
|
941
|
-
def collect(self, nexts: int = -1) -> List[List[Any]]:
|
|
1012
|
+
def collect(self, csvpath: str = None, *, nexts: int = -1) -> List[List[Any]]:
|
|
942
1013
|
"""Runs the csvpath forward and returns the matching lines seen as
|
|
943
1014
|
a list of lists"""
|
|
1015
|
+
if self.scanner is None and csvpath is not None:
|
|
1016
|
+
self.parse(csvpath)
|
|
944
1017
|
if nexts < -1:
|
|
945
1018
|
raise ProcessingException(
|
|
946
1019
|
"Input must be >= -1. -1 means collect to the end of the file."
|
|
947
1020
|
)
|
|
1021
|
+
self.collecting = True
|
|
948
1022
|
lines = []
|
|
949
1023
|
for _ in self.next():
|
|
950
1024
|
_ = _[:]
|
|
@@ -957,17 +1031,21 @@ class CsvPath(CsvPathPublic, ErrorCollector, Printer): # pylint: disable=R0902,
|
|
|
957
1031
|
break
|
|
958
1032
|
return lines
|
|
959
1033
|
|
|
960
|
-
def fast_forward(self) -> None:
|
|
1034
|
+
def fast_forward(self, csvpath=None) -> None:
|
|
961
1035
|
"""Runs the path for all rows of the file. Variables are collected
|
|
962
1036
|
and side effects like print happen. No lines are collected.
|
|
963
1037
|
"""
|
|
1038
|
+
if self.scanner is None and csvpath is not None:
|
|
1039
|
+
self.parse(csvpath)
|
|
964
1040
|
for _ in self.next():
|
|
965
1041
|
pass
|
|
966
1042
|
|
|
967
|
-
def next(self):
|
|
1043
|
+
def next(self, csvpath=None):
|
|
968
1044
|
"""Iterates over the lines in the CSV file returning those that match
|
|
969
1045
|
the csvpath. collect() and fast_forward() call next() behind the scenes.
|
|
970
1046
|
"""
|
|
1047
|
+
if self.scanner is None and csvpath is not None:
|
|
1048
|
+
self.parse(csvpath)
|
|
971
1049
|
start = time.time()
|
|
972
1050
|
if self.run_mode is True:
|
|
973
1051
|
for line in self._next_line():
|
|
@@ -983,6 +1061,12 @@ class CsvPath(CsvPathPublic, ErrorCollector, Printer): # pylint: disable=R0902,
|
|
|
983
1061
|
self.logger.error(msg)
|
|
984
1062
|
raise MatchException(msg)
|
|
985
1063
|
yield line
|
|
1064
|
+
elif self.collecting and self.unmatched_available:
|
|
1065
|
+
if self.unmatched is None:
|
|
1066
|
+
self.unmatched = []
|
|
1067
|
+
line = self.limit_collection(line)
|
|
1068
|
+
# we aren't None and 0 checking as above. needed?
|
|
1069
|
+
self.unmatched.append(line)
|
|
986
1070
|
if self.stopped:
|
|
987
1071
|
self.logger.info(
|
|
988
1072
|
"CsvPath has been stopped at line %s",
|
|
@@ -1018,14 +1102,7 @@ class CsvPath(CsvPathPublic, ErrorCollector, Printer): # pylint: disable=R0902,
|
|
|
1018
1102
|
#
|
|
1019
1103
|
if self.scanner.filename is None:
|
|
1020
1104
|
raise FileException("There is no filename")
|
|
1021
|
-
|
|
1022
|
-
with open(self.scanner.filename, "r", encoding="utf-8") as file:
|
|
1023
|
-
reader = csv.reader(
|
|
1024
|
-
file, delimiter=self.delimiter, quotechar=self.quotechar
|
|
1025
|
-
)
|
|
1026
|
-
for line in reader:
|
|
1027
|
-
"""
|
|
1028
|
-
reader = CsvDataFileReader(
|
|
1105
|
+
reader = DataFileReader(
|
|
1029
1106
|
self.scanner.filename, delimiter=self.delimiter, quotechar=self.quotechar
|
|
1030
1107
|
)
|
|
1031
1108
|
for line in reader.next():
|
|
@@ -1173,7 +1250,12 @@ class CsvPath(CsvPathPublic, ErrorCollector, Printer): # pylint: disable=R0902,
|
|
|
1173
1250
|
return line
|
|
1174
1251
|
ls = []
|
|
1175
1252
|
for k in self.limit_collection_to:
|
|
1176
|
-
|
|
1253
|
+
if k is None or k >= len(line):
|
|
1254
|
+
raise InputException(
|
|
1255
|
+
f"[{self.identity}] Line {self.line_monitor.physical_line_number}: unknown header name: {k}"
|
|
1256
|
+
)
|
|
1257
|
+
else:
|
|
1258
|
+
ls.append(line[k])
|
|
1177
1259
|
return ls
|
|
1178
1260
|
|
|
1179
1261
|
def advance(self, ff: int = -1) -> None:
|