csvpath 0.0.502__tar.gz → 0.0.504__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {csvpath-0.0.502 → csvpath-0.0.504}/PKG-INFO +8 -3
- {csvpath-0.0.502 → csvpath-0.0.504}/README.md +5 -2
- {csvpath-0.0.502 → csvpath-0.0.504}/config/config.ini +4 -2
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/csvpath.py +4 -3
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/files/file_cacher.py +6 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/files/file_manager.py +72 -47
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/files/file_registrar.py +14 -13
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/integrations/ckan/ckan.py +0 -1
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/metadata.py +3 -2
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/paths/paths_manager.py +29 -20
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/paths/paths_registrar.py +16 -13
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/results/readers/file_errors_reader.py +5 -3
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/results/readers/file_printouts_reader.py +5 -3
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/results/readers/readers.py +0 -13
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/results/result.py +0 -1
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/results/result_file_reader.py +8 -5
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/results/result_metadata.py +0 -1
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/results/result_registrar.py +27 -27
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/results/result_serializer.py +19 -60
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/results/results_manager.py +28 -21
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/results/results_registrar.py +19 -16
- csvpath-0.0.504/csvpath/managers/run/run_registrar.py +59 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/args.py +0 -3
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/scanning/scanning_lexer.py +1 -1
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/util/cache.py +2 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/util/class_loader.py +4 -2
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/util/config.py +4 -2
- csvpath-0.0.504/csvpath/util/file_info.py +29 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/util/file_readers.py +107 -30
- csvpath-0.0.504/csvpath/util/file_writers.py +97 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/util/line_spooler.py +31 -7
- csvpath-0.0.504/csvpath/util/nos.py +182 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/util/pandas_data_reader.py +10 -2
- csvpath-0.0.504/csvpath/util/s3/s3_data_reader.py +72 -0
- csvpath-0.0.504/csvpath/util/s3/s3_data_writer.py +44 -0
- csvpath-0.0.504/csvpath/util/s3/s3_fingerprinter.py +52 -0
- csvpath-0.0.504/csvpath/util/s3/s3_utils.py +66 -0
- csvpath-0.0.504/csvpath/util/s3/s3_xlsx_data_reader.py +37 -0
- csvpath-0.0.504/docs/images/ckan-logo-sm.png +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/pyproject.toml +5 -3
- csvpath-0.0.502/csvpath/managers/run/run_registrar.py +0 -44
- csvpath-0.0.502/csvpath/util/s3_data_reader.py +0 -24
- {csvpath-0.0.502 → csvpath-0.0.504}/LICENSE +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/__init__.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/cli/__init__.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/cli/cli.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/cli/drill_down.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/csvpaths.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/__init__.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/files/file_metadata.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/integrations/ckan/ckan_listener.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/integrations/ckan/datafile.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/integrations/ckan/dataset.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/integrations/ol/event.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/integrations/ol/event_result.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/integrations/ol/file_listener_ol.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/integrations/ol/job.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/integrations/ol/ol_listener.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/integrations/ol/paths_listener_ol.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/integrations/ol/result_listener_ol.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/integrations/ol/results_listener_ol.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/integrations/ol/run.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/integrations/ol/run_listener_ol.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/integrations/ol/run_state.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/integrations/ol/sender.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/integrations/slack/event.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/integrations/slack/sender.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/listener.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/paths/paths_metadata.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/registrar.py +2 -2
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/results/readers/file_lines_reader.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/results/readers/file_unmatched_reader.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/results/results_metadata.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/run/run_listener_stdout.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/run/run_metadata.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/__init__.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/__init__.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/boolean/all.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/boolean/andf.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/boolean/any.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/boolean/between.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/boolean/empty.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/boolean/exists.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/boolean/inf.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/boolean/no.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/boolean/notf.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/boolean/orf.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/boolean/yes.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/counting/count.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/counting/count_bytes.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/counting/count_headers.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/counting/count_lines.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/counting/count_scans.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/counting/counter.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/counting/every.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/counting/has_matches.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/counting/increment.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/counting/tally.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/counting/total_lines.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/dates/now.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/function.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/function_factory.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/function_finder.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/function_focus.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/headers/append.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/headers/collect.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/headers/empty_stack.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/headers/end.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/headers/header_name.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/headers/header_names_mismatch.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/headers/headers.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/headers/mismatch.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/headers/replace.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/headers/reset_headers.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/lines/advance.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/lines/after_blank.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/lines/dups.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/lines/first.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/lines/first_line.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/lines/last.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/lines/stop.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/math/above.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/math/add.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/math/divide.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/math/equals.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/math/intf.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/math/mod.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/math/multiply.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/math/round.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/math/subtotal.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/math/subtract.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/math/sum.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/misc/fingerprint.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/misc/importf.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/misc/random.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/print/jinjaf.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/print/print_line.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/print/print_queue.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/print/printf.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/print/table.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/stats/minf.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/stats/percent.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/stats/percent_unique.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/stats/stdev.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/strings/concat.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/strings/length.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/strings/lower.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/strings/metaphone.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/strings/regex.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/strings/starts_with.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/strings/strip.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/strings/substring.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/strings/upper.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/testing/debug.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/types/__init__.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/types/boolean.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/types/datef.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/types/decimal.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/types/nonef.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/types/string.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/types/type.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/validity/fail.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/validity/failed.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/validity/line.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/variables/get.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/variables/pushpop.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/variables/put.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/variables/track.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/variables/variables.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/lark_parser.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/lark_transformer.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/matcher.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/productions/__init__.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/productions/equality.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/productions/expression.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/productions/header.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/productions/matchable.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/productions/qualified.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/productions/reference.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/productions/term.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/productions/variable.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/util/exceptions.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/util/expression_encoder.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/util/expression_utility.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/util/lark_print_parser.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/util/print_parser.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/util/runtime_data_collector.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/modes/explain_mode.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/modes/files_mode.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/modes/logic_mode.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/modes/mode_controller.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/modes/print_mode.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/modes/return_mode.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/modes/run_mode.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/modes/source_mode.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/modes/transfer_mode.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/modes/unmatched_mode.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/modes/validation_mode.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/scanning/__init__.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/scanning/exceptions.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/scanning/parser.out +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/scanning/parsetab.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/scanning/scanner.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/util/config_exception.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/util/error.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/util/exceptions.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/util/last_line_stats.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/util/line_counter.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/util/line_monitor.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/util/log_utility.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/util/metadata_parser.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/util/printer.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/util/reference_parser.py +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/asbool.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/assignment.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/comments.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/config.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/examples.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/files.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/above.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/advance.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/after_blank.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/all.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/andor.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/any.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/average.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/between.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/collect.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/correlate.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/count.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/count_bytes.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/count_headers.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/counter.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/date.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/empty.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/empty_stack.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/end.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/every.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/fail.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/fingerprint.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/first.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/get.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/has_dups.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/has_matches.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/header.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/header_name.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/header_names_mismatch.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/implementing_functions.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/import.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/in.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/increment.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/intf.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/jinja.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/last.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/line.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/line_number.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/max.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/metaphone.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/mismatch.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/no.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/not.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/now.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/percent_unique.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/pop.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/print.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/print_line.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/print_queue.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/random.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/regex.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/replace.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/reset_headers.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/stdev.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/stop.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/string_functions.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/subtotal.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/subtract.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/sum.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/tally.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/total_lines.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/track.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/types.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/variables.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/variables_and_headers.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/grammar.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/headers.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/images/csvpath-icon-sm.png +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/images/csvpath-logo-wordmark-tight-2.svg +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/images/logo-wordmark-3.svg +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/images/logo-wordmark-4.svg +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/images/logo-wordmark-white-on-black-trimmed-padded.png +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/images/logo-wordmark-white-trimmed.png +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/images/marquez-logo-sm.png +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/images/openlineage-logo-sm.png +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/paths.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/printing.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/qualifiers.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/references.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/terms.md +0 -0
- {csvpath-0.0.502 → csvpath-0.0.504}/docs/variables.md +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: csvpath
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.504
|
|
4
4
|
Summary: A declarative language for validating CSV, Excel, and other tabular data files
|
|
5
5
|
Author: David Kershaw
|
|
6
6
|
Author-email: dk107dk@hotmail.com
|
|
@@ -24,6 +24,7 @@ Classifier: Topic :: Text Processing
|
|
|
24
24
|
Classifier: Topic :: Utilities
|
|
25
25
|
Provides-Extra: pandas
|
|
26
26
|
Provides-Extra: smartopen
|
|
27
|
+
Requires-Dist: boto3 (>=1.35.91,<2.0.0)
|
|
27
28
|
Requires-Dist: bullet (>=2.2.0,<3.0.0)
|
|
28
29
|
Requires-Dist: ckanapi (>=4.8,<5.0)
|
|
29
30
|
Requires-Dist: inflect (>=7.4.0,<8.0.0)
|
|
@@ -38,6 +39,7 @@ Requires-Dist: pytest (>=8.3.3,<9.0.0)
|
|
|
38
39
|
Requires-Dist: python-dateutil (>=2.9.0.post0,<3.0.0)
|
|
39
40
|
Requires-Dist: pytz (>=2024.2,<2025.0)
|
|
40
41
|
Requires-Dist: requests (>=2.32.3,<3.0.0)
|
|
42
|
+
Requires-Dist: smart-open (>=7.1.0,<8.0.0) ; extra == "smartopen"
|
|
41
43
|
Requires-Dist: tabulate (>=0.9.0,<0.10.0)
|
|
42
44
|
Project-URL: Csvpath.org, https://www.csvpath.org
|
|
43
45
|
Project-URL: Github, https://github.com/csvpath/csvpath.git
|
|
@@ -61,13 +63,16 @@ CsvPath's validation is inspired by:
|
|
|
61
63
|
- XPath for XML files
|
|
62
64
|
- The ISO standard <a href='https://schematron.com/'>Schematron validation</a>
|
|
63
65
|
|
|
64
|
-
CsvPath is intended to fit with other DataOps and data quality tools. Files are streamed. The interface is simple. Metadata is plentiful. New functions are easy to create.
|
|
66
|
+
CsvPath is intended to fit tightly with other DataOps and data quality tools. Files are streamed. The interface is simple. Metadata is plentiful. New functions and listeners are easy to create.
|
|
65
67
|
|
|
66
|
-
<a href=
|
|
68
|
+
CsvPath can stream lineage events to an OpenLineage server, such as the open source Marquez server. Read about <a href="https://www.csvpath.org/getting-started/getting-started-with-csvpath-+-openlineage" target="_blank">CsvPath and OpenLineage here</a>.
|
|
69
|
+
<br/><a href='https://openlineage.io' >
|
|
67
70
|
<img target='_blank' src="https://github.com/csvpath/csvpath/blob/main/docs/images/openlineage-logo-sm.png" alt="OpenLineage"/></a>
|
|
68
71
|
<a href='https://peppy-sprite-186812.netlify.app/' >
|
|
69
72
|
<img target='_blank' src="https://github.com/csvpath/csvpath/blob/main/docs/images/marquez-logo-sm.png" alt="Marquez Server"/></a>
|
|
70
73
|
|
|
74
|
+
Need to publish validated datasets to a CKAN data portal? <a href="https://www.csvpath.org/getting-started/getting-started-with-csvpath-+-ckan" target="_blank">Read about how CsvPath is integrated with CKAN</a>.
|
|
75
|
+
<a href="https://ckan.org/" target="_blank"><img target='_blank' src="https://github.com/csvpath/csvpath/blob/main/docs/images/ckan-logo-sm.png" alt="CKAN Data Portal"/></a>
|
|
71
76
|
|
|
72
77
|
Read more about CsvPath and see CSV, Excel, and Data Frames validation examples at <a href='https://www.csvpath.org'>https://www.csvpath.org</a>.
|
|
73
78
|
|
|
@@ -16,13 +16,16 @@ CsvPath's validation is inspired by:
|
|
|
16
16
|
- XPath for XML files
|
|
17
17
|
- The ISO standard <a href='https://schematron.com/'>Schematron validation</a>
|
|
18
18
|
|
|
19
|
-
CsvPath is intended to fit with other DataOps and data quality tools. Files are streamed. The interface is simple. Metadata is plentiful. New functions are easy to create.
|
|
19
|
+
CsvPath is intended to fit tightly with other DataOps and data quality tools. Files are streamed. The interface is simple. Metadata is plentiful. New functions and listeners are easy to create.
|
|
20
20
|
|
|
21
|
-
<a href=
|
|
21
|
+
CsvPath can stream lineage events to an OpenLineage server, such as the open source Marquez server. Read about <a href="https://www.csvpath.org/getting-started/getting-started-with-csvpath-+-openlineage" target="_blank">CsvPath and OpenLineage here</a>.
|
|
22
|
+
<br/><a href='https://openlineage.io' >
|
|
22
23
|
<img target='_blank' src="https://github.com/csvpath/csvpath/blob/main/docs/images/openlineage-logo-sm.png" alt="OpenLineage"/></a>
|
|
23
24
|
<a href='https://peppy-sprite-186812.netlify.app/' >
|
|
24
25
|
<img target='_blank' src="https://github.com/csvpath/csvpath/blob/main/docs/images/marquez-logo-sm.png" alt="Marquez Server"/></a>
|
|
25
26
|
|
|
27
|
+
Need to publish validated datasets to a CKAN data portal? <a href="https://www.csvpath.org/getting-started/getting-started-with-csvpath-+-ckan" target="_blank">Read about how CsvPath is integrated with CKAN</a>.
|
|
28
|
+
<a href="https://ckan.org/" target="_blank"><img target='_blank' src="https://github.com/csvpath/csvpath/blob/main/docs/images/ckan-logo-sm.png" alt="CKAN Data Portal"/></a>
|
|
26
29
|
|
|
27
30
|
Read more about CsvPath and see CSV, Excel, and Data Frames validation examples at <a href='https://www.csvpath.org'>https://www.csvpath.org</a>.
|
|
28
31
|
|
|
@@ -29,8 +29,6 @@ groups =
|
|
|
29
29
|
#slack, marquez, ckan
|
|
30
30
|
|
|
31
31
|
# add ckan to the list of groups above for alerts to slack webhooks
|
|
32
|
-
ckan.paths = from csvpath.managers.integrations.ckan.ckan_listener import CkanListener
|
|
33
|
-
ckan.result = from csvpath.managers.integrations.ckan.ckan_listener import CkanListener
|
|
34
32
|
ckan.results = from csvpath.managers.integrations.ckan.ckan_listener import CkanListener
|
|
35
33
|
|
|
36
34
|
#add marquez to the list of groups above for OpenLineage events to a local Marquez
|
|
@@ -45,6 +43,10 @@ slack.paths = from csvpath.managers.integrations.slack.sender import SlackSender
|
|
|
45
43
|
slack.result = from csvpath.managers.integrations.slack.sender import SlackSender
|
|
46
44
|
slack.results = from csvpath.managers.integrations.slack.sender import SlackSender
|
|
47
45
|
|
|
46
|
+
[ckan]
|
|
47
|
+
server = http://localhost:80
|
|
48
|
+
api_token = eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJqdGkiOiI3akJwc1ZuSkVrZm1aNnBtVTJfTW5CNlJXZ211YjdOOHVXZ1l1cUFDa0Q4IiwiaWF0IjoxNzM0NzE4NDQ3fQ.QXWXoJoSxVES4NwXYBteYUD7enX9D5T2htmETLGFzrs
|
|
49
|
+
|
|
48
50
|
[marquez]
|
|
49
51
|
base_url = http://localhost:5000
|
|
50
52
|
endpoint = api/v1/lineage
|
|
@@ -5,7 +5,7 @@ import time
|
|
|
5
5
|
import os
|
|
6
6
|
import hashlib
|
|
7
7
|
from datetime import datetime, timezone
|
|
8
|
-
from typing import List, Dict, Any
|
|
8
|
+
from typing import List, Dict, Any, Self
|
|
9
9
|
from collections.abc import Iterator
|
|
10
10
|
from abc import ABC, abstractmethod
|
|
11
11
|
from .util.config import Config
|
|
@@ -72,7 +72,7 @@ class CsvPathPublic(ABC):
|
|
|
72
72
|
"""Advances the iteration by ff rows. -1 means to the end of the file."""
|
|
73
73
|
|
|
74
74
|
@abstractmethod
|
|
75
|
-
def fast_forward(self, csvpath: str = None) ->
|
|
75
|
+
def fast_forward(self, csvpath: str = None) -> Self: # pragma: no cover
|
|
76
76
|
"""Scans to the end of the CSV file. All scanned rows will be
|
|
77
77
|
considered for match and variables and side effects will happen,
|
|
78
78
|
but no rows will be returned or stored. -1 means to the end of
|
|
@@ -965,7 +965,7 @@ class CsvPath(CsvPathPublic, ErrorCollector, Printer): # pylint: disable=R0902,
|
|
|
965
965
|
self.lines = None
|
|
966
966
|
return lines
|
|
967
967
|
|
|
968
|
-
def fast_forward(self, csvpath=None) ->
|
|
968
|
+
def fast_forward(self, csvpath=None) -> Self:
|
|
969
969
|
"""Runs the path for all rows of the file. Variables are collected
|
|
970
970
|
and side effects like print happen. No lines are collected.
|
|
971
971
|
"""
|
|
@@ -973,6 +973,7 @@ class CsvPath(CsvPathPublic, ErrorCollector, Printer): # pylint: disable=R0902,
|
|
|
973
973
|
self.parse(csvpath)
|
|
974
974
|
for _ in self.next():
|
|
975
975
|
pass
|
|
976
|
+
return self
|
|
976
977
|
|
|
977
978
|
def next(self, csvpath=None):
|
|
978
979
|
"""Iterates over the lines in the CSV file returning those that match
|
|
@@ -13,6 +13,8 @@ class FileCacher:
|
|
|
13
13
|
self.pathed_lines_and_headers = {}
|
|
14
14
|
|
|
15
15
|
def get_new_line_monitor(self, filename: str) -> LineMonitor:
|
|
16
|
+
if filename is None:
|
|
17
|
+
raise ValueError("Filename cannot be None")
|
|
16
18
|
if filename not in self.pathed_lines_and_headers:
|
|
17
19
|
self._find_lines_and_headers(filename)
|
|
18
20
|
lm = self.pathed_lines_and_headers[filename][0]
|
|
@@ -25,6 +27,8 @@ class FileCacher:
|
|
|
25
27
|
return self.pathed_lines_and_headers[filename][1][:]
|
|
26
28
|
|
|
27
29
|
def _find_lines_and_headers(self, filename: str) -> None:
|
|
30
|
+
if filename is None:
|
|
31
|
+
raise ValueError("Filename cannot be None")
|
|
28
32
|
lm, headers = self._cached_lines_and_headers(filename)
|
|
29
33
|
if lm is None or headers is None:
|
|
30
34
|
lc = LineCounter(self.csvpaths)
|
|
@@ -33,6 +37,8 @@ class FileCacher:
|
|
|
33
37
|
self.pathed_lines_and_headers[filename] = (lm, headers)
|
|
34
38
|
|
|
35
39
|
def _cached_lines_and_headers(self, filename: str) -> Tuple[LineMonitor, List[str]]:
|
|
40
|
+
if filename is None:
|
|
41
|
+
raise ValueError("Filename cannot be None")
|
|
36
42
|
lm = LineMonitor()
|
|
37
43
|
json = self.cache.cached_text(filename, "json")
|
|
38
44
|
if json is not None and not json.strip() == "":
|
|
@@ -1,21 +1,20 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import json
|
|
3
3
|
import csv
|
|
4
|
-
import hashlib
|
|
5
|
-
import shutil
|
|
6
4
|
from json import JSONDecodeError
|
|
7
|
-
from typing import Dict, List, Tuple
|
|
8
5
|
from csvpath.util.error import ErrorHandler
|
|
9
6
|
from csvpath.util.file_readers import DataFileReader
|
|
7
|
+
from csvpath.util.file_writers import DataFileWriter
|
|
10
8
|
from csvpath.util.reference_parser import ReferenceParser
|
|
11
9
|
from csvpath.util.exceptions import InputException, FileException
|
|
10
|
+
from csvpath.util.nos import Nos
|
|
12
11
|
from .file_registrar import FileRegistrar
|
|
13
12
|
from .file_cacher import FileCacher
|
|
14
13
|
from .file_metadata import FileMetadata
|
|
15
14
|
|
|
16
15
|
|
|
17
16
|
class FileManager:
|
|
18
|
-
def __init__(self, *, named_files:
|
|
17
|
+
def __init__(self, *, named_files: dict[str, str] = None, csvpaths=None):
|
|
19
18
|
if named_files is None:
|
|
20
19
|
named_files = {}
|
|
21
20
|
self._csvpaths = csvpaths
|
|
@@ -43,24 +42,25 @@ class FileManager:
|
|
|
43
42
|
def assure_named_file_home(self, name: str) -> str:
|
|
44
43
|
home = self.named_file_home(name)
|
|
45
44
|
if not os.path.exists(home):
|
|
46
|
-
|
|
45
|
+
Nos(home).makedirs()
|
|
47
46
|
return home
|
|
48
47
|
|
|
49
48
|
#
|
|
50
49
|
# file homes are paths to files like:
|
|
51
50
|
# inputs/named_files/March-2024/March-2024.csv/March-2024.csv
|
|
52
|
-
# which become paths to
|
|
51
|
+
# which become paths to fingerprint-named file versions like:
|
|
53
52
|
# inputs/named_files/March-2024/March-2024.csv/12467d811d1589ede586e3a42c41046641bedc1c73941f4c21e2fd2966f188b4.csv
|
|
54
53
|
# once the files have been fingerprinted
|
|
55
54
|
#
|
|
56
55
|
def assure_file_home(self, name: str, path: str) -> str:
|
|
57
56
|
if path.find("#") > -1:
|
|
58
57
|
path = path[0 : path.find("#")]
|
|
59
|
-
|
|
58
|
+
sep = Nos(path).sep
|
|
59
|
+
fname = path if path.rfind(sep) == -1 else path[path.rfind(sep) + 1 :]
|
|
60
60
|
home = self.named_file_home(name)
|
|
61
61
|
home = os.path.join(home, fname)
|
|
62
|
-
if not
|
|
63
|
-
|
|
62
|
+
if not Nos(home).exists():
|
|
63
|
+
Nos(home).makedirs()
|
|
64
64
|
return home
|
|
65
65
|
|
|
66
66
|
@property
|
|
@@ -70,28 +70,35 @@ class FileManager:
|
|
|
70
70
|
@property
|
|
71
71
|
def named_file_names(self) -> list:
|
|
72
72
|
b = self.named_files_dir
|
|
73
|
-
ns = [n for n in
|
|
73
|
+
ns = [n for n in Nos(b).listdir() if not Nos(os.path.join(b, n)).isfile()]
|
|
74
74
|
return ns
|
|
75
75
|
|
|
76
76
|
def name_exists(self, name: str) -> bool:
|
|
77
77
|
p = self.named_file_home(name)
|
|
78
|
-
|
|
78
|
+
b = Nos(p).dir_exists()
|
|
79
|
+
return b
|
|
79
80
|
|
|
80
81
|
def remove_named_file(self, name: str) -> None:
|
|
81
82
|
p = os.path.join(self.named_files_dir, name)
|
|
82
|
-
|
|
83
|
+
Nos(p).remove()
|
|
83
84
|
|
|
84
85
|
def remove_all_named_files(self) -> None:
|
|
85
86
|
names = self.named_file_names
|
|
86
87
|
for name in names:
|
|
87
88
|
self.remove_named_file(name)
|
|
88
89
|
|
|
89
|
-
def set_named_files(self, nf:
|
|
90
|
+
def set_named_files(self, nf: dict[str, str]) -> None:
|
|
90
91
|
for k, v in nf.items():
|
|
91
92
|
self.add_named_file(name=k, path=v)
|
|
92
93
|
|
|
93
94
|
def set_named_files_from_json(self, filename: str) -> None:
|
|
95
|
+
"""named-files from json files are always local"""
|
|
94
96
|
try:
|
|
97
|
+
#
|
|
98
|
+
# TODO: named-files json files are always local. they should
|
|
99
|
+
# be able to be on s3 so that we are completely independent of
|
|
100
|
+
# the local disk w/re file manager
|
|
101
|
+
#
|
|
95
102
|
with open(filename, "r", encoding="utf-8") as f:
|
|
96
103
|
j = json.load(f)
|
|
97
104
|
self.set_named_files(j)
|
|
@@ -99,7 +106,7 @@ class FileManager:
|
|
|
99
106
|
ErrorHandler(csvpaths=self._csvpaths).handle_error(ex)
|
|
100
107
|
|
|
101
108
|
def add_named_files_from_dir(self, dirname: str):
|
|
102
|
-
dlist =
|
|
109
|
+
dlist = Nos(dirname).listdir()
|
|
103
110
|
base = dirname
|
|
104
111
|
for p in dlist:
|
|
105
112
|
_ = p.lower()
|
|
@@ -115,7 +122,6 @@ class FileManager:
|
|
|
115
122
|
|
|
116
123
|
#
|
|
117
124
|
# -------------------------------------
|
|
118
|
-
# move functions to this class and file_data_filesystem_storekeeper
|
|
119
125
|
#
|
|
120
126
|
def add_named_file(self, *, name: str, path: str) -> None:
|
|
121
127
|
#
|
|
@@ -156,30 +162,41 @@ class FileManager:
|
|
|
156
162
|
mdata.fingerprint = h
|
|
157
163
|
mdata.file_path = rpath
|
|
158
164
|
mdata.file_home = file_home
|
|
159
|
-
mdata.file_name = file_home[file_home.rfind(
|
|
165
|
+
mdata.file_name = file_home[file_home.rfind(Nos(file_home).sep) + 1 :]
|
|
160
166
|
mdata.name_home = name_home
|
|
161
167
|
mdata.mark = mark
|
|
162
168
|
self.registrar.register_complete(mdata)
|
|
163
169
|
|
|
164
170
|
def _copy_in(self, path, home) -> None:
|
|
165
|
-
|
|
171
|
+
sep = Nos(path).sep
|
|
172
|
+
fname = path if path.rfind(sep) == -1 else path[path.rfind(sep) + 1 :]
|
|
166
173
|
# creates
|
|
167
174
|
# a/file.csv -> named_files/name/file.csv/file.csv
|
|
168
175
|
# the dir name matching the resulting file name is correct
|
|
169
176
|
# once the file is landed and fingerprinted, the file
|
|
170
177
|
# name is changed.
|
|
171
178
|
temp = os.path.join(home, fname)
|
|
172
|
-
|
|
173
|
-
|
|
179
|
+
#
|
|
180
|
+
# this is another place that is too s3 vs. local. we'll have
|
|
181
|
+
# other source/sinks to support.
|
|
182
|
+
#
|
|
183
|
+
if path.startswith("s3:") and not home.startswith("s3"):
|
|
184
|
+
self._copy_down(path, temp, mode="wb")
|
|
185
|
+
elif path.startswith("s3:") and home.startswith("s3"):
|
|
186
|
+
Nos(path).copy(temp)
|
|
187
|
+
elif not path.startswith("s3:") and not home.startswith("s3"):
|
|
188
|
+
self._copy_down(path, temp, mode="wb")
|
|
189
|
+
elif not path.startswith("s3:") and home.startswith("s3"):
|
|
190
|
+
self._copy_down(path, temp, mode="wb")
|
|
174
191
|
else:
|
|
175
|
-
|
|
192
|
+
... # not possible. just being explicit for the moment.
|
|
176
193
|
return temp
|
|
177
194
|
|
|
178
|
-
def _copy_down(self, path, temp) -> None:
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
195
|
+
def _copy_down(self, path, temp, mode="wb") -> None:
|
|
196
|
+
with DataFileReader(path) as reader:
|
|
197
|
+
with DataFileWriter(path=temp, mode=mode) as writer:
|
|
198
|
+
for line in reader.next_raw():
|
|
199
|
+
writer.append(line)
|
|
183
200
|
|
|
184
201
|
#
|
|
185
202
|
# can take a reference. the ref would only be expected to point
|
|
@@ -201,18 +218,21 @@ class FileManager:
|
|
|
201
218
|
else:
|
|
202
219
|
if not self.name_exists(name):
|
|
203
220
|
return None
|
|
204
|
-
|
|
221
|
+
n = self.named_file_home(name)
|
|
222
|
+
ret = self.registrar.registered_file(n)
|
|
205
223
|
return ret
|
|
206
224
|
|
|
207
225
|
def get_fingerprint_for_name(self, name) -> str:
|
|
208
226
|
if name.startswith("$"):
|
|
209
227
|
# atm, we don't give fingerprints for references doing rewind/replay
|
|
210
228
|
return ""
|
|
229
|
+
#
|
|
230
|
+
# note: this is not creating fingerprints, just getting existing ones.
|
|
231
|
+
#
|
|
211
232
|
return self.registrar.get_fingerprint(self.named_file_home(name))
|
|
212
233
|
|
|
213
234
|
#
|
|
214
235
|
# -------------------------------------
|
|
215
|
-
# move to file_data_filesystem_storekeeper?
|
|
216
236
|
#
|
|
217
237
|
def get_named_file_reader(self, name: str) -> DataFileReader:
|
|
218
238
|
path = self.get_named_file(name)
|
|
@@ -228,7 +248,8 @@ class FileManager:
|
|
|
228
248
|
)
|
|
229
249
|
|
|
230
250
|
def _fingerprint(self, path) -> str:
|
|
231
|
-
|
|
251
|
+
sep = Nos(path).sep
|
|
252
|
+
fname = path if path.rfind(sep) == -1 else path[path.rfind(sep) + 1 :]
|
|
232
253
|
t = None
|
|
233
254
|
i = fname.find(".")
|
|
234
255
|
if i > -1:
|
|
@@ -240,25 +261,29 @@ class FileManager:
|
|
|
240
261
|
# creating the initial file name, where the file starts
|
|
241
262
|
#
|
|
242
263
|
fpath = os.path.join(path, fname)
|
|
243
|
-
|
|
244
|
-
h = hashlib.file_digest(f, hashlib.sha256)
|
|
245
|
-
h = h.hexdigest()
|
|
246
|
-
#
|
|
247
|
-
# creating the new path using the hash as filename
|
|
248
|
-
#
|
|
249
|
-
hpath = os.path.join(path, h)
|
|
250
|
-
if t is not None:
|
|
251
|
-
hpath = f"{hpath}.{t}"
|
|
252
|
-
#
|
|
253
|
-
# if we're re-adding the file we don't need to make
|
|
254
|
-
# another copy of it. re-adds are fine.
|
|
255
|
-
#
|
|
256
|
-
b = os.path.exists(hpath)
|
|
257
|
-
if b:
|
|
258
|
-
os.remove(fpath)
|
|
259
|
-
return hpath, h
|
|
264
|
+
h = None
|
|
260
265
|
#
|
|
261
|
-
#
|
|
266
|
+
# this version should work local and minimize traffic when in S3
|
|
262
267
|
#
|
|
263
|
-
|
|
268
|
+
with DataFileReader(fpath) as f:
|
|
269
|
+
h = f.fingerprint()
|
|
270
|
+
#
|
|
271
|
+
# creating the new path using the fingerprint as filename
|
|
272
|
+
#
|
|
273
|
+
hpath = os.path.join(path, h)
|
|
274
|
+
if t is not None:
|
|
275
|
+
hpath = f"{hpath}.{t}"
|
|
276
|
+
#
|
|
277
|
+
# if we're re-adding the file we don't need to make
|
|
278
|
+
# another copy of it. re-adds are fine.
|
|
279
|
+
#
|
|
280
|
+
# need an s3 way to do this
|
|
281
|
+
b = Nos(hpath).exists()
|
|
282
|
+
if b:
|
|
283
|
+
Nos(fpath).remove()
|
|
284
|
+
return hpath, h
|
|
285
|
+
#
|
|
286
|
+
# if a first add, rename the file to the fingerprint + ext
|
|
287
|
+
#
|
|
288
|
+
Nos(fpath).rename(hpath)
|
|
264
289
|
return hpath, h
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import json
|
|
3
|
-
import hashlib
|
|
4
|
-
import shutil
|
|
5
3
|
from datetime import datetime
|
|
6
4
|
from csvpath.util.exceptions import InputException, FileException
|
|
7
5
|
from csvpath.util.file_readers import DataFileReader
|
|
6
|
+
from csvpath.util.file_writers import DataFileWriter
|
|
7
|
+
from csvpath.util.nos import Nos
|
|
8
8
|
from csvpath.managers.registrar import Registrar
|
|
9
9
|
from csvpath.managers.listener import Listener
|
|
10
10
|
from csvpath.managers.metadata import Metadata
|
|
@@ -30,17 +30,17 @@ class FileRegistrar(Registrar, Listener):
|
|
|
30
30
|
return man[len(man) - 1]["fingerprint"]
|
|
31
31
|
|
|
32
32
|
def manifest_path(self, home) -> str:
|
|
33
|
-
if not
|
|
33
|
+
if not Nos(home).dir_exists():
|
|
34
34
|
raise InputException(f"Named file home does not exist: {home}")
|
|
35
35
|
mf = os.path.join(home, "manifest.json")
|
|
36
|
-
if not
|
|
37
|
-
with
|
|
38
|
-
|
|
36
|
+
if not Nos(mf).exists():
|
|
37
|
+
with DataFileWriter(path=mf, mode="w") as writer:
|
|
38
|
+
writer.append("[]")
|
|
39
39
|
return mf
|
|
40
40
|
|
|
41
41
|
def get_manifest(self, mpath) -> list:
|
|
42
|
-
with
|
|
43
|
-
return json.load(
|
|
42
|
+
with DataFileReader(mpath) as reader:
|
|
43
|
+
return json.load(reader.source)
|
|
44
44
|
|
|
45
45
|
def metadata_update(self, mdata: Metadata) -> None:
|
|
46
46
|
path = mdata.origin_path
|
|
@@ -60,8 +60,8 @@ class FileRegistrar(Registrar, Listener):
|
|
|
60
60
|
mani["mark"] = mark
|
|
61
61
|
jdata = self.get_manifest(manifest_path)
|
|
62
62
|
jdata.append(mani)
|
|
63
|
-
with
|
|
64
|
-
json.dump(jdata,
|
|
63
|
+
with DataFileWriter(path=manifest_path, mode="w") as writer:
|
|
64
|
+
json.dump(jdata, writer.sink, indent=2)
|
|
65
65
|
|
|
66
66
|
def register_complete(self, mdata: Metadata) -> None:
|
|
67
67
|
path = mdata.origin_path
|
|
@@ -75,7 +75,8 @@ class FileRegistrar(Registrar, Listener):
|
|
|
75
75
|
raise InputException(
|
|
76
76
|
f"File mgr and registrar marks should match: {mdata.mark}, {mark}"
|
|
77
77
|
)
|
|
78
|
-
if not path.startswith("s3:") and not
|
|
78
|
+
if not path.startswith("s3:") and not Nos(path).exists():
|
|
79
|
+
# if not path.startswith("s3:") and not os.path.exists(path):
|
|
79
80
|
#
|
|
80
81
|
# try for a data reader in case we're smart-opening
|
|
81
82
|
#
|
|
@@ -134,8 +135,8 @@ class FileRegistrar(Registrar, Listener):
|
|
|
134
135
|
|
|
135
136
|
def registered_file(self, home: str) -> str:
|
|
136
137
|
mpath = self.manifest_path(home)
|
|
137
|
-
with
|
|
138
|
-
mdata = json.load(
|
|
138
|
+
with DataFileReader(mpath) as reader:
|
|
139
|
+
mdata = json.load(reader.source)
|
|
139
140
|
if mdata is None or len(mdata) == 0:
|
|
140
141
|
raise InputException(f"Manifest for {home} at {mpath} is empty")
|
|
141
142
|
m = mdata[len(mdata) - 1]
|
|
@@ -14,6 +14,7 @@ class Metadata(ABC):
|
|
|
14
14
|
self._uuid = uuid4()
|
|
15
15
|
self.manifest_path: str = None
|
|
16
16
|
self.archive_name: str = None
|
|
17
|
+
self.archive_path: str = None
|
|
17
18
|
self._base_path = None
|
|
18
19
|
self._named_files_root: str = None
|
|
19
20
|
self._named_paths_root: str = None
|
|
@@ -54,6 +55,8 @@ class Metadata(ABC):
|
|
|
54
55
|
self.manifest_path = m.get("manifest_path")
|
|
55
56
|
if m.get("archive_name") is not None:
|
|
56
57
|
self.archive_name = m.get("archive_name")
|
|
58
|
+
if m.get("archive_path") is not None:
|
|
59
|
+
self.archive_path = m.get("archive_path")
|
|
57
60
|
|
|
58
61
|
@property
|
|
59
62
|
def uuid(self) -> UUID:
|
|
@@ -68,7 +71,6 @@ class Metadata(ABC):
|
|
|
68
71
|
@property
|
|
69
72
|
def uuid_string(self) -> str:
|
|
70
73
|
return str(self._uuid)
|
|
71
|
-
# return self._uuid.hex
|
|
72
74
|
|
|
73
75
|
@uuid_string.setter
|
|
74
76
|
def uuid_string(self, u: str) -> None:
|
|
@@ -128,7 +130,6 @@ class Metadata(ABC):
|
|
|
128
130
|
|
|
129
131
|
@time_completed_string.setter
|
|
130
132
|
def time_completed_string(self, s: str) -> None:
|
|
131
|
-
# self._time_completed = datetime.date.fromisoformat(s)
|
|
132
133
|
self._time_completed = parser.parse(s)
|
|
133
134
|
|
|
134
135
|
@property
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
# pylint: disable=C0114
|
|
2
2
|
import os
|
|
3
3
|
import json
|
|
4
|
-
import shutil
|
|
5
4
|
from typing import NewType
|
|
6
5
|
from json import JSONDecodeError
|
|
7
6
|
from csvpath import CsvPath
|
|
@@ -9,6 +8,9 @@ from csvpath.util.exceptions import InputException
|
|
|
9
8
|
from csvpath.util.error import ErrorHandler
|
|
10
9
|
from csvpath.util.metadata_parser import MetadataParser
|
|
11
10
|
from csvpath.util.reference_parser import ReferenceParser
|
|
11
|
+
from csvpath.util.file_readers import DataFileReader
|
|
12
|
+
from csvpath.util.file_writers import DataFileWriter
|
|
13
|
+
from csvpath.util.nos import Nos
|
|
12
14
|
from .paths_registrar import PathsRegistrar
|
|
13
15
|
from .paths_metadata import PathsMetadata
|
|
14
16
|
|
|
@@ -37,8 +39,8 @@ class PathsManager:
|
|
|
37
39
|
|
|
38
40
|
def named_paths_home(self, name: NamedPathsName) -> str:
|
|
39
41
|
home = os.path.join(self.named_paths_dir, name)
|
|
40
|
-
if not
|
|
41
|
-
|
|
42
|
+
if not Nos(home).exists():
|
|
43
|
+
Nos(home).makedirs()
|
|
42
44
|
return home
|
|
43
45
|
|
|
44
46
|
@property
|
|
@@ -61,8 +63,8 @@ class PathsManager:
|
|
|
61
63
|
if directory is None:
|
|
62
64
|
ie = InputException("Named paths collection name needed")
|
|
63
65
|
ErrorHandler(csvpaths=self.csvpaths).handle_error(ie)
|
|
64
|
-
if
|
|
65
|
-
dlist =
|
|
66
|
+
if not Nos(directory).isfile():
|
|
67
|
+
dlist = Nos(directory).listdir()
|
|
66
68
|
base = directory
|
|
67
69
|
for p in dlist:
|
|
68
70
|
if p[0] == ".":
|
|
@@ -142,8 +144,9 @@ class PathsManager:
|
|
|
142
144
|
mdata = PathsMetadata(self.csvpaths.config)
|
|
143
145
|
mdata.archive_name = self.csvpaths.config.archive_name
|
|
144
146
|
mdata.named_paths_name = name
|
|
145
|
-
|
|
146
|
-
mdata.
|
|
147
|
+
sep = Nos(mdata.named_paths_root).sep
|
|
148
|
+
mdata.named_paths_home = f"{mdata.named_paths_root}{sep}{name}"
|
|
149
|
+
mdata.group_file_path = f"{mdata.named_paths_home}{sep}group.csvpaths"
|
|
147
150
|
mdata.named_paths = paths
|
|
148
151
|
mdata.named_paths_identities = ids
|
|
149
152
|
mdata.named_paths_count = len(ids)
|
|
@@ -193,15 +196,15 @@ class PathsManager:
|
|
|
193
196
|
def store_json_paths_file(self, name: str, jsonpath: str) -> None:
|
|
194
197
|
home = self.named_paths_home(name)
|
|
195
198
|
j = ""
|
|
196
|
-
with
|
|
199
|
+
with DataFileReader(jsonpath) as file:
|
|
197
200
|
j = file.read()
|
|
198
|
-
with
|
|
199
|
-
|
|
201
|
+
with DataFileWriter(path=os.path.join(home, "definition.json")) as writer:
|
|
202
|
+
writer.write(j)
|
|
200
203
|
|
|
201
204
|
@property
|
|
202
205
|
def named_paths_names(self) -> list[str]:
|
|
203
206
|
path = self.named_paths_dir
|
|
204
|
-
names = [n for n in
|
|
207
|
+
names = [n for n in Nos(path).listdir() if not n.startswith(".")]
|
|
205
208
|
return names
|
|
206
209
|
|
|
207
210
|
def remove_named_paths(self, name: NamedPathsName, strict: bool = False) -> None:
|
|
@@ -210,7 +213,7 @@ class PathsManager:
|
|
|
210
213
|
if not self.has_named_paths(name):
|
|
211
214
|
return
|
|
212
215
|
home = self.named_paths_home(name)
|
|
213
|
-
|
|
216
|
+
Nos(home).remove()
|
|
214
217
|
|
|
215
218
|
def remove_all_named_paths(self) -> None:
|
|
216
219
|
names = self.named_paths_names
|
|
@@ -219,7 +222,7 @@ class PathsManager:
|
|
|
219
222
|
|
|
220
223
|
def has_named_paths(self, name: NamedPathsName) -> bool:
|
|
221
224
|
path = os.path.join(self.named_paths_dir, name)
|
|
222
|
-
return
|
|
225
|
+
return Nos(path).dir_exists()
|
|
223
226
|
|
|
224
227
|
def number_of_named_paths(self, name: NamedPathsName) -> int:
|
|
225
228
|
return len(self._get_named_paths(name))
|
|
@@ -237,9 +240,9 @@ class PathsManager:
|
|
|
237
240
|
s = ""
|
|
238
241
|
path = self.named_paths_home(name)
|
|
239
242
|
grp = os.path.join(path, "group.csvpaths")
|
|
240
|
-
if
|
|
241
|
-
with
|
|
242
|
-
s =
|
|
243
|
+
if Nos(grp).exists():
|
|
244
|
+
with DataFileReader(grp) as reader:
|
|
245
|
+
s = reader.read()
|
|
243
246
|
cs = s.split("---- CSVPATH ----")
|
|
244
247
|
cs = [s for s in cs if s.strip() != ""]
|
|
245
248
|
#
|
|
@@ -258,8 +261,11 @@ class PathsManager:
|
|
|
258
261
|
|
|
259
262
|
def _copy_in(self, name, csvpathstr) -> None:
|
|
260
263
|
temp = self._group_file_path(name)
|
|
261
|
-
|
|
262
|
-
|
|
264
|
+
#
|
|
265
|
+
# TODO: use a DataFileWriter that supports S3 and local to write.
|
|
266
|
+
#
|
|
267
|
+
with DataFileWriter(path=temp, mode="w") as writer:
|
|
268
|
+
writer.append(csvpathstr)
|
|
263
269
|
return temp
|
|
264
270
|
|
|
265
271
|
def _group_file_path(self, name: NamedPathsName) -> str:
|
|
@@ -267,8 +273,11 @@ class PathsManager:
|
|
|
267
273
|
return temp
|
|
268
274
|
|
|
269
275
|
def _get_csvpaths_from_file(self, file_path: str) -> list[str]:
|
|
270
|
-
|
|
271
|
-
|
|
276
|
+
#
|
|
277
|
+
# TODO: use DataFileReader to support S3 and local
|
|
278
|
+
#
|
|
279
|
+
with DataFileReader(file_path) as reader:
|
|
280
|
+
cp = reader.read()
|
|
272
281
|
_ = [
|
|
273
282
|
apath.strip()
|
|
274
283
|
for apath in cp.split(PathsManager.MARKER)
|