csvpath 0.0.498__tar.gz → 0.0.499__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {csvpath-0.0.498 → csvpath-0.0.499}/PKG-INFO +23 -15
- {csvpath-0.0.498 → csvpath-0.0.499}/README.md +22 -14
- {csvpath-0.0.498 → csvpath-0.0.499}/config/config.ini +9 -1
- csvpath-0.0.499/csvpath/managers/files/file_listener_ol.py +6 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/managers/listener.py +16 -1
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/managers/ol/event.py +1 -44
- csvpath-0.0.499/csvpath/managers/ol/ol_listener.py +7 -0
- csvpath-0.0.499/csvpath/managers/ol/sender.py +38 -0
- csvpath-0.0.499/csvpath/managers/paths/paths_listener_ol.py +11 -0
- csvpath-0.0.499/csvpath/managers/results/result_listener_ol.py +6 -0
- csvpath-0.0.499/csvpath/managers/results/results_listener_ol.py +6 -0
- csvpath-0.0.499/csvpath/managers/run/run_listener_ol.py +6 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/util/config.py +6 -2
- csvpath-0.0.499/docs/config.md +109 -0
- csvpath-0.0.499/docs/images/csvpath-icon-sm.png +0 -0
- csvpath-0.0.499/docs/images/marquez-logo-sm.png +0 -0
- csvpath-0.0.499/docs/images/openlineage-logo-sm.png +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/pyproject.toml +1 -1
- csvpath-0.0.498/csvpath/managers/files/file_listener_ol.py +0 -27
- csvpath-0.0.498/csvpath/managers/paths/paths_listener_ol.py +0 -27
- csvpath-0.0.498/csvpath/managers/results/result_listener_ol.py +0 -27
- csvpath-0.0.498/csvpath/managers/results/results_listener_ol.py +0 -28
- csvpath-0.0.498/csvpath/managers/run/run_listener_ol.py +0 -30
- csvpath-0.0.498/docs/config.md +0 -70
- {csvpath-0.0.498 → csvpath-0.0.499}/LICENSE +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/__init__.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/cli/__init__.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/cli/cli.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/cli/drill_down.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/csvpath.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/csvpaths.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/managers/__init__.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/managers/files/file_cacher.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/managers/files/file_manager.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/managers/files/file_metadata.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/managers/files/file_registrar.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/managers/metadata.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/managers/ol/event_result.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/managers/ol/job.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/managers/ol/run.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/managers/ol/run_state.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/managers/paths/paths_manager.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/managers/paths/paths_metadata.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/managers/paths/paths_registrar.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/managers/registrar.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/managers/results/result.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/managers/results/result_metadata.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/managers/results/result_registrar.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/managers/results/result_serializer.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/managers/results/results_manager.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/managers/results/results_metadata.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/managers/results/results_registrar.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/managers/run/run_listener_stdout.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/managers/run/run_metadata.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/managers/run/run_registrar.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/__init__.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/__init__.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/args.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/boolean/all.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/boolean/andf.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/boolean/any.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/boolean/between.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/boolean/empty.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/boolean/exists.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/boolean/inf.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/boolean/no.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/boolean/notf.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/boolean/orf.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/boolean/yes.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/counting/count.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/counting/count_bytes.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/counting/count_headers.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/counting/count_lines.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/counting/count_scans.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/counting/counter.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/counting/every.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/counting/has_matches.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/counting/increment.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/counting/tally.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/counting/total_lines.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/dates/now.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/function.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/function_factory.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/function_finder.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/function_focus.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/headers/append.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/headers/collect.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/headers/empty_stack.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/headers/end.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/headers/header_name.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/headers/header_names_mismatch.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/headers/headers.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/headers/mismatch.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/headers/replace.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/headers/reset_headers.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/lines/advance.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/lines/after_blank.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/lines/dups.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/lines/first.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/lines/first_line.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/lines/last.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/lines/stop.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/math/above.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/math/add.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/math/divide.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/math/equals.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/math/intf.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/math/mod.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/math/multiply.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/math/round.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/math/subtotal.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/math/subtract.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/math/sum.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/misc/fingerprint.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/misc/importf.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/misc/random.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/print/jinjaf.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/print/print_line.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/print/print_queue.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/print/printf.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/print/table.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/stats/minf.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/stats/percent.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/stats/percent_unique.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/stats/stdev.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/strings/concat.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/strings/length.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/strings/lower.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/strings/metaphone.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/strings/regex.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/strings/starts_with.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/strings/strip.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/strings/substring.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/strings/upper.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/testing/debug.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/types/__init__.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/types/boolean.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/types/datef.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/types/decimal.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/types/nonef.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/types/string.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/types/type.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/validity/fail.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/validity/failed.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/validity/line.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/variables/get.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/variables/pushpop.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/variables/put.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/variables/track.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/variables/variables.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/lark_parser.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/lark_transformer.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/matcher.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/productions/__init__.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/productions/equality.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/productions/expression.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/productions/header.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/productions/matchable.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/productions/qualified.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/productions/reference.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/productions/term.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/productions/variable.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/util/exceptions.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/util/expression_encoder.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/util/expression_utility.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/util/lark_print_parser.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/util/print_parser.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/util/runtime_data_collector.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/modes/explain_mode.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/modes/files_mode.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/modes/logic_mode.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/modes/mode_controller.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/modes/print_mode.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/modes/return_mode.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/modes/run_mode.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/modes/source_mode.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/modes/transfer_mode.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/modes/unmatched_mode.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/modes/validation_mode.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/scanning/__init__.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/scanning/exceptions.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/scanning/parser.out +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/scanning/parsetab.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/scanning/scanner.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/scanning/scanning_lexer.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/util/cache.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/util/class_loader.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/util/config_exception.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/util/error.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/util/exceptions.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/util/file_readers.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/util/last_line_stats.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/util/line_counter.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/util/line_monitor.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/util/line_spooler.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/util/log_utility.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/util/metadata_parser.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/util/pandas_data_reader.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/util/printer.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/util/reference_parser.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/util/s3_data_reader.py +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/asbool.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/assignment.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/comments.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/examples.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/files.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/above.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/advance.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/after_blank.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/all.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/andor.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/any.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/average.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/between.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/collect.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/correlate.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/count.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/count_bytes.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/count_headers.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/counter.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/date.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/empty.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/empty_stack.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/end.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/every.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/fail.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/fingerprint.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/first.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/get.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/has_dups.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/has_matches.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/header.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/header_name.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/header_names_mismatch.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/implementing_functions.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/import.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/in.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/increment.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/intf.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/jinja.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/last.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/line.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/line_number.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/max.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/metaphone.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/mismatch.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/no.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/not.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/now.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/percent_unique.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/pop.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/print.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/print_line.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/print_queue.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/random.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/regex.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/replace.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/reset_headers.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/stdev.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/stop.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/string_functions.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/subtotal.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/subtract.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/sum.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/tally.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/total_lines.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/track.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/types.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/variables.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/variables_and_headers.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/grammar.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/headers.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/images/logo-wordmark-white-on-black-trimmed-padded.png +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/images/logo-wordmark-white-trimmed.png +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/paths.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/printing.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/qualifiers.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/references.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/terms.md +0 -0
- {csvpath-0.0.498 → csvpath-0.0.499}/docs/variables.md +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: csvpath
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.499
|
|
4
4
|
Summary: A declarative language for validating CSV, Excel, and other tabular data files
|
|
5
5
|
Author: David Kershaw
|
|
6
6
|
Author-email: dk107dk@hotmail.com
|
|
@@ -43,15 +43,16 @@ Project-URL: Github, https://github.com/csvpath/csvpath.git
|
|
|
43
43
|
Description-Content-Type: text/markdown
|
|
44
44
|
|
|
45
45
|
|
|
46
|
-
# <a href='https://www.csvpath.org/'><img src='https://
|
|
46
|
+
# <a href='https://www.csvpath.org/'><img src='https://github.com/csvpath/csvpath/blob/main/docs/images/csvpath-icon-sm.png'/></a> About CsvPath
|
|
47
47
|
|
|
48
|
-
CsvPath defines a declarative syntax for inspecting and validating CSV and Excel files, and other tabular data.
|
|
48
|
+
The CsvPath language defines a declarative syntax for inspecting and validating CSV and Excel files, and other tabular data.
|
|
49
49
|
|
|
50
|
-
CsvPath's goal is to make it easy to setup a Collect, Store, Validate
|
|
51
|
-
-
|
|
52
|
-
- Validates that
|
|
53
|
-
- Reports on content validity
|
|
54
|
-
-
|
|
50
|
+
The CsvPath library's goal is to make it easy to setup a <a href='https://www.atestaanalytics.com/s/The-Collect-Store-Validate-Pattern-Atesta-Analytics.pdf'>Collect, Store, Validate Pattern</a> flat-file landing zone that:
|
|
51
|
+
- Registers files
|
|
52
|
+
- Validates that the data matches expectations
|
|
53
|
+
- Reports on content validity and other metadata
|
|
54
|
+
- Shapes files for consistency using copy-on-write
|
|
55
|
+
- And stages the results for loading into a data lake
|
|
55
56
|
|
|
56
57
|
And does it all in an automation-friendly way.
|
|
57
58
|
|
|
@@ -59,9 +60,15 @@ CsvPath's validation is inspired by:
|
|
|
59
60
|
- XPath for XML files
|
|
60
61
|
- The ISO standard <a href='https://schematron.com/'>Schematron validation</a>
|
|
61
62
|
|
|
62
|
-
CsvPath is intended to fit with other DataOps and data quality tools. Files are streamed. The interface is simple. Metadata is plentiful. New functions are easy to create.
|
|
63
|
+
CsvPath is intended to fit with other DataOps and data quality tools. Files are streamed. The interface is simple. Metadata is plentiful. New functions are easy to create. CsvPath can stream events to an OpenLineage server, such as the open source Marquez server.
|
|
63
64
|
|
|
64
|
-
|
|
65
|
+
<a href='https://openlineage.io' >
|
|
66
|
+
<img target='_blank' src="https://github.com/csvpath/csvpath/blob/main/docs/images/openlineage-logo-sm.png" alt="OpenLineage"/></a>
|
|
67
|
+
<a href='https://peppy-sprite-186812.netlify.app/' >
|
|
68
|
+
<img target='_blank' src="https://github.com/csvpath/csvpath/blob/main/docs/images/marquez-logo-sm.png" alt="Marquez Server"/></a>
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
Read more about CsvPath and see CSV, Excel, and Data Frames validation examples at <a href='https://www.csvpath.org'>https://www.csvpath.org</a>.
|
|
65
72
|
|
|
66
73
|
If you need help, use the <a href='https://www.csvpath.org/getting-started/get-help'>contact form</a> or the <a href='https://github.com/csvpath/csvpath/issues'>issue tracker</a> or talk to one of our [sponsors](#sponsors).
|
|
67
74
|
|
|
@@ -99,11 +106,13 @@ If you need help, use the <a href='https://www.csvpath.org/getting-started/get-h
|
|
|
99
106
|
|
|
100
107
|
CSV files are everywhere!
|
|
101
108
|
|
|
102
|
-
|
|
109
|
+
The majority of companies depend on file processing for significant revenue operations. Research organizations and archives are awash in CSVs. And everyone's favorite issue tracker, database GUI, spreadsheet, APM platform, and most any other type of tool we use spits out CSV or Excel files for sharing. Delimited and tabular files are the lowest of common dominators. Many are invalid or broken in some way. Often times a lot of manual effort goes into finding problems and fixing them.
|
|
110
|
+
|
|
111
|
+
CsvPath is first and foremost a validation language. It describes tabular data in simple declarative rules that define what valid means for that data. CsvPath can also extract and shape data and create reports.
|
|
103
112
|
|
|
104
|
-
CsvPath
|
|
113
|
+
The CsvPath library implements the CsvPath language, but goes far beyond it to provide a full <a href='https://www.atestaanalytics.com/s/The-Collect-Store-Validate-Pattern-Atesta-Analytics.pdf'>Collect, Store, Validate Pattern</a> framework for landing flat files, registering them, validating them, shaping them to a consistent and comparable form, and staging them for a data lake. In that way, CsvPath fills the gap commonly found between an organization's MFT (managed file transfer) and a typical data lake architecture.
|
|
105
114
|
|
|
106
|
-
CsvPath's goal is to make simple validations almost trivial and more complex situations more manageable. It is a library, not a system, so it relies on being easy to integrate with other DataOps tools.
|
|
115
|
+
CsvPath's goal is to make simple validations almost trivial and more complex situations more manageable. It is a library and framework, not a system, so it relies on being easy to integrate with other DataOps tools.
|
|
107
116
|
|
|
108
117
|
|
|
109
118
|
<a name="install"></a>
|
|
@@ -217,8 +226,7 @@ The simplest way to get started is using the CLI. <a href='https://www.csvpath.o
|
|
|
217
226
|
When you're ready to think about automation, you'll want to start with a simple driver. This is a very basic programmatic use of CsvPath.
|
|
218
227
|
|
|
219
228
|
```python
|
|
220
|
-
path = CsvPath()
|
|
221
|
-
path.parse("""
|
|
229
|
+
path = CsvPath().parse("""
|
|
222
230
|
$test.csv[5-25][
|
|
223
231
|
#firstname == "Frog"
|
|
224
232
|
@lastname.onmatch = "Bat"
|
|
@@ -1,13 +1,14 @@
|
|
|
1
1
|
|
|
2
|
-
# <a href='https://www.csvpath.org/'><img src='https://
|
|
2
|
+
# <a href='https://www.csvpath.org/'><img src='https://github.com/csvpath/csvpath/blob/main/docs/images/csvpath-icon-sm.png'/></a> About CsvPath
|
|
3
3
|
|
|
4
|
-
CsvPath defines a declarative syntax for inspecting and validating CSV and Excel files, and other tabular data.
|
|
4
|
+
The CsvPath language defines a declarative syntax for inspecting and validating CSV and Excel files, and other tabular data.
|
|
5
5
|
|
|
6
|
-
CsvPath's goal is to make it easy to setup a Collect, Store, Validate
|
|
7
|
-
-
|
|
8
|
-
- Validates that
|
|
9
|
-
- Reports on content validity
|
|
10
|
-
-
|
|
6
|
+
The CsvPath library's goal is to make it easy to setup a <a href='https://www.atestaanalytics.com/s/The-Collect-Store-Validate-Pattern-Atesta-Analytics.pdf'>Collect, Store, Validate Pattern</a> flat-file landing zone that:
|
|
7
|
+
- Registers files
|
|
8
|
+
- Validates that the data matches expectations
|
|
9
|
+
- Reports on content validity and other metadata
|
|
10
|
+
- Shapes files for consistency using copy-on-write
|
|
11
|
+
- And stages the results for loading into a data lake
|
|
11
12
|
|
|
12
13
|
And does it all in an automation-friendly way.
|
|
13
14
|
|
|
@@ -15,9 +16,15 @@ CsvPath's validation is inspired by:
|
|
|
15
16
|
- XPath for XML files
|
|
16
17
|
- The ISO standard <a href='https://schematron.com/'>Schematron validation</a>
|
|
17
18
|
|
|
18
|
-
CsvPath is intended to fit with other DataOps and data quality tools. Files are streamed. The interface is simple. Metadata is plentiful. New functions are easy to create.
|
|
19
|
+
CsvPath is intended to fit with other DataOps and data quality tools. Files are streamed. The interface is simple. Metadata is plentiful. New functions are easy to create. CsvPath can stream events to an OpenLineage server, such as the open source Marquez server.
|
|
19
20
|
|
|
20
|
-
|
|
21
|
+
<a href='https://openlineage.io' >
|
|
22
|
+
<img target='_blank' src="https://github.com/csvpath/csvpath/blob/main/docs/images/openlineage-logo-sm.png" alt="OpenLineage"/></a>
|
|
23
|
+
<a href='https://peppy-sprite-186812.netlify.app/' >
|
|
24
|
+
<img target='_blank' src="https://github.com/csvpath/csvpath/blob/main/docs/images/marquez-logo-sm.png" alt="Marquez Server"/></a>
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
Read more about CsvPath and see CSV, Excel, and Data Frames validation examples at <a href='https://www.csvpath.org'>https://www.csvpath.org</a>.
|
|
21
28
|
|
|
22
29
|
If you need help, use the <a href='https://www.csvpath.org/getting-started/get-help'>contact form</a> or the <a href='https://github.com/csvpath/csvpath/issues'>issue tracker</a> or talk to one of our [sponsors](#sponsors).
|
|
23
30
|
|
|
@@ -55,11 +62,13 @@ If you need help, use the <a href='https://www.csvpath.org/getting-started/get-h
|
|
|
55
62
|
|
|
56
63
|
CSV files are everywhere!
|
|
57
64
|
|
|
58
|
-
|
|
65
|
+
The majority of companies depend on file processing for significant revenue operations. Research organizations and archives are awash in CSVs. And everyone's favorite issue tracker, database GUI, spreadsheet, APM platform, and most any other type of tool we use spits out CSV or Excel files for sharing. Delimited and tabular files are the lowest of common dominators. Many are invalid or broken in some way. Often times a lot of manual effort goes into finding problems and fixing them.
|
|
66
|
+
|
|
67
|
+
CsvPath is first and foremost a validation language. It describes tabular data in simple declarative rules that define what valid means for that data. CsvPath can also extract and shape data and create reports.
|
|
59
68
|
|
|
60
|
-
CsvPath
|
|
69
|
+
The CsvPath library implements the CsvPath language, but goes far beyond it to provide a full <a href='https://www.atestaanalytics.com/s/The-Collect-Store-Validate-Pattern-Atesta-Analytics.pdf'>Collect, Store, Validate Pattern</a> framework for landing flat files, registering them, validating them, shaping them to a consistent and comparable form, and staging them for a data lake. In that way, CsvPath fills the gap commonly found between an organization's MFT (managed file transfer) and a typical data lake architecture.
|
|
61
70
|
|
|
62
|
-
CsvPath's goal is to make simple validations almost trivial and more complex situations more manageable. It is a library, not a system, so it relies on being easy to integrate with other DataOps tools.
|
|
71
|
+
CsvPath's goal is to make simple validations almost trivial and more complex situations more manageable. It is a library and framework, not a system, so it relies on being easy to integrate with other DataOps tools.
|
|
63
72
|
|
|
64
73
|
|
|
65
74
|
<a name="install"></a>
|
|
@@ -173,8 +182,7 @@ The simplest way to get started is using the CLI. <a href='https://www.csvpath.o
|
|
|
173
182
|
When you're ready to think about automation, you'll want to start with a simple driver. This is a very basic programmatic use of CsvPath.
|
|
174
183
|
|
|
175
184
|
```python
|
|
176
|
-
path = CsvPath()
|
|
177
|
-
path.parse("""
|
|
185
|
+
path = CsvPath().parse("""
|
|
178
186
|
$test.csv[5-25][
|
|
179
187
|
#firstname == "Frog"
|
|
180
188
|
@lastname.onmatch = "Bat"
|
|
@@ -25,13 +25,21 @@ path = cache
|
|
|
25
25
|
imports = config/functions.imports
|
|
26
26
|
|
|
27
27
|
[listeners]
|
|
28
|
+
#uncomment for OpenLineage events to a local Marquez
|
|
29
|
+
#file = from csvpath.managers.files.file_listener_ol import OpenLineageFileListener
|
|
30
|
+
#paths = from csvpath.managers.paths.paths_listener_ol import OpenLineagePathsListener
|
|
31
|
+
#result = from csvpath.managers.results.result_listener_ol import OpenLineageResultListener
|
|
32
|
+
#results = from csvpath.managers.results.results_listener_ol import OpenLineageResultsListener
|
|
28
33
|
|
|
29
34
|
[marquez]
|
|
30
35
|
base_url = http://localhost:5000
|
|
36
|
+
endpoint = api/v1/lineage
|
|
37
|
+
api_key = "none"
|
|
38
|
+
timeout = 5
|
|
39
|
+
verify = False
|
|
31
40
|
|
|
32
41
|
[results]
|
|
33
42
|
archive = archive
|
|
34
|
-
transfers = transfers
|
|
35
43
|
|
|
36
44
|
[inputs]
|
|
37
45
|
files = inputs/named_files
|
|
@@ -1,10 +1,25 @@
|
|
|
1
1
|
from abc import ABC, abstractmethod
|
|
2
2
|
from .metadata import Metadata
|
|
3
|
+
from ..util.config import Config
|
|
3
4
|
|
|
4
5
|
|
|
5
6
|
class Listener(ABC):
|
|
6
7
|
def __init__(self, config=None) -> None:
|
|
7
|
-
|
|
8
|
+
super().__init__()
|
|
9
|
+
self._config = config
|
|
10
|
+
|
|
11
|
+
@property
|
|
12
|
+
def config(self):
|
|
13
|
+
if not self._config:
|
|
14
|
+
#
|
|
15
|
+
# this really should never happen. but perhaps in testing?
|
|
16
|
+
#
|
|
17
|
+
self._config = Config()
|
|
18
|
+
return self._config
|
|
19
|
+
|
|
20
|
+
@config.setter
|
|
21
|
+
def config(self, c):
|
|
22
|
+
self._config = c
|
|
8
23
|
|
|
9
24
|
@abstractmethod
|
|
10
25
|
def metadata_update(self, mdata: Metadata) -> None:
|
|
@@ -42,53 +42,10 @@ class EventBuilder:
|
|
|
42
42
|
# do we want to support this one, if it comes?
|
|
43
43
|
return None
|
|
44
44
|
|
|
45
|
-
"""
|
|
46
|
-
def get_identities_facet(self, mdata):
|
|
47
|
-
mp = f"{mdata.base_path}{os.sep}{mdata.named_paths_root}{os.sep}{mdata.named_paths_name}/manifest.json"
|
|
48
|
-
j = []
|
|
49
|
-
with open(mp, "r", encoding="utf-8") as file:
|
|
50
|
-
j = json.load(file)
|
|
51
|
-
d = j[len(j)-1]
|
|
52
|
-
ps = d["named_paths"]
|
|
53
|
-
fields=[]
|
|
54
|
-
for p in d:
|
|
55
|
-
f = schema_dataset.SchemaDatasetFacetFields(
|
|
56
|
-
name=p, type="CsvPath", description=""
|
|
57
|
-
)
|
|
58
|
-
fields.append(f)
|
|
59
|
-
csvpaths = self.dataset(
|
|
60
|
-
f"{mdata.archive_name}/{mdata.named_paths_name}",
|
|
61
|
-
schema_dataset.SchemaDatasetFacet(fields=fields),
|
|
62
|
-
mdata.archive_name
|
|
63
|
-
)
|
|
64
|
-
return csvpaths
|
|
65
|
-
|
|
66
|
-
def dummy_facets(self):
|
|
67
|
-
print(">>> creating dataset data #{i}" )
|
|
68
|
-
user_history = self.dataset(
|
|
69
|
-
"archive",
|
|
70
|
-
schema_dataset.SchemaDatasetFacet(
|
|
71
|
-
fields=[
|
|
72
|
-
schema_dataset.SchemaDatasetFacetFields(
|
|
73
|
-
name="id", type="BIGINT", description="the user id"
|
|
74
|
-
),
|
|
75
|
-
schema_dataset.SchemaDatasetFacetFields(
|
|
76
|
-
name="email_domain", type="VARCHAR", description="the user id"
|
|
77
|
-
),
|
|
78
|
-
schema_dataset.SchemaDatasetFacetFields(
|
|
79
|
-
name="status", type="BIGINT", description="the user id"
|
|
80
|
-
),
|
|
81
|
-
]
|
|
82
|
-
),
|
|
83
|
-
"archive"
|
|
84
|
-
)
|
|
85
|
-
return user_history
|
|
86
|
-
"""
|
|
87
|
-
|
|
88
45
|
def _build_results_event(self, mdata: Metadata, job, run, facets, inputs):
|
|
89
46
|
file = InputDataset(
|
|
90
47
|
namespace=mdata.archive_name, name=f"{mdata.named_file_name}"
|
|
91
|
-
)
|
|
48
|
+
)
|
|
92
49
|
path = InputDataset(
|
|
93
50
|
namespace=mdata.archive_name, name=f"{mdata.named_paths_name}"
|
|
94
51
|
)
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
from abc import ABC
|
|
2
|
+
from openlineage.client import OpenLineageClient
|
|
3
|
+
from openlineage.client.transport.http import (
|
|
4
|
+
ApiKeyTokenProvider,
|
|
5
|
+
HttpConfig,
|
|
6
|
+
HttpCompression,
|
|
7
|
+
HttpTransport,
|
|
8
|
+
)
|
|
9
|
+
from ..metadata import Metadata
|
|
10
|
+
from ..ol.event import EventBuilder
|
|
11
|
+
from ..listener import Listener
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class Sender(Listener):
|
|
15
|
+
def __init__(self, *, config=None, client=None):
|
|
16
|
+
super().__init__(config)
|
|
17
|
+
self._client = client
|
|
18
|
+
|
|
19
|
+
@property
|
|
20
|
+
def client(self):
|
|
21
|
+
if self._client is None:
|
|
22
|
+
h = HttpConfig(
|
|
23
|
+
url=self.config._get("marquez", "base_url", "https://backend:5000"),
|
|
24
|
+
endpoint=self.config._get("marquez", "endpoint", "api/v1/lineage"),
|
|
25
|
+
timeout=int(self.config._get("marquez", "timeout", 5)),
|
|
26
|
+
verify=bool(self.config._get("marquez", "verify", False)) is True,
|
|
27
|
+
auth=ApiKeyTokenProvider(
|
|
28
|
+
{"apiKey": self.config._get("marquez", "api_key", "none")}
|
|
29
|
+
),
|
|
30
|
+
compression=HttpCompression.GZIP,
|
|
31
|
+
)
|
|
32
|
+
self._client = OpenLineageClient(transport=HttpTransport(h))
|
|
33
|
+
return self._client
|
|
34
|
+
|
|
35
|
+
def metadata_update(self, mdata: Metadata) -> None:
|
|
36
|
+
es = EventBuilder().build(mdata)
|
|
37
|
+
for e in es:
|
|
38
|
+
self.client.emit(e)
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
from openlineage.client import OpenLineageClient
|
|
2
|
+
|
|
3
|
+
from ..metadata import Metadata
|
|
4
|
+
from ..ol.event import EventBuilder
|
|
5
|
+
from ..ol.sender import Sender
|
|
6
|
+
from ..ol.ol_listener import OpenLineageListener
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class OpenLineagePathsListener(OpenLineageListener):
|
|
10
|
+
def __init__(self, config=None, client=None):
|
|
11
|
+
super().__init__(config=config, client=client)
|
|
@@ -107,7 +107,7 @@ class Config:
|
|
|
107
107
|
def config_path(self) -> str:
|
|
108
108
|
return self._configpath
|
|
109
109
|
|
|
110
|
-
def _get(self, section: str, name: str):
|
|
110
|
+
def _get(self, section: str, name: str, default=None):
|
|
111
111
|
if self._config is None:
|
|
112
112
|
raise ConfigurationException("No config object available")
|
|
113
113
|
try:
|
|
@@ -121,7 +121,7 @@ class Config:
|
|
|
121
121
|
except KeyError:
|
|
122
122
|
if self.csvpath_log_level == LogLevels.DEBUG:
|
|
123
123
|
print(f"Check config at {self.config_path} for [{section}][{name}]")
|
|
124
|
-
return
|
|
124
|
+
return default
|
|
125
125
|
|
|
126
126
|
def add_to_config(self, section, key, value) -> None:
|
|
127
127
|
if not self._config.has_section(section):
|
|
@@ -176,6 +176,10 @@ path =
|
|
|
176
176
|
#results = from csvpath.managers.results.results_listener_ol import OpenLineageResultsListener
|
|
177
177
|
#[marquez]
|
|
178
178
|
#base_url = http://localhost:5000
|
|
179
|
+
#endpoint = api/v1/lineage
|
|
180
|
+
#api_key = "none"
|
|
181
|
+
#timeout = 5
|
|
182
|
+
#verify = False
|
|
179
183
|
#
|
|
180
184
|
[results]
|
|
181
185
|
archive = archive
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
|
|
2
|
+
# Config
|
|
3
|
+
|
|
4
|
+
CsvPaths has a few config options. By default, the config options are in `./config/config.ini`. You can change the location of your .ini file in two ways:
|
|
5
|
+
- Set a `CSVPATH_CONFIG_FILE` env var pointing to your file
|
|
6
|
+
- Create an instance of CsvPathConfig, set its CONFIG property, and call the `reload()` method
|
|
7
|
+
|
|
8
|
+
The config options, at this time, are about:
|
|
9
|
+
- File system locations
|
|
10
|
+
- File extensions
|
|
11
|
+
- Error handling
|
|
12
|
+
- Logging
|
|
13
|
+
- Event listeners
|
|
14
|
+
- Custom functions
|
|
15
|
+
|
|
16
|
+
## File System Locations
|
|
17
|
+
|
|
18
|
+
CsvPath stores files in three places:
|
|
19
|
+
- The data staging location
|
|
20
|
+
- The csvpath files location
|
|
21
|
+
- An archive or namespace of results
|
|
22
|
+
|
|
23
|
+
The first two are in the `[inputs]` section as `files` and `csvpaths`. The default location for data files and csvpath files is under the `./inputs` directory. Each has its own folder. You can move these two locations anywhere you like.
|
|
24
|
+
|
|
25
|
+
The archive is set in the `[results]` section as `archive`. By default it is a directory named `archive`. You can name the archive anything you like. Keep in mind that as well as simply storing files, the archive is also a namespacing tool. If you have many data partners or separate data operations you may want to have separate archives. If you do use separate archives and you are running OpenLineage events you will see your events namespaced by archive name. See below for configuring OpenLineage event listeners.
|
|
26
|
+
|
|
27
|
+
In addition, there are cache, config, and log file locations. They have sensible defaults but can be moved, if needed.
|
|
28
|
+
|
|
29
|
+
## File Extensions
|
|
30
|
+
|
|
31
|
+
There are two types of files you can set extensions for:
|
|
32
|
+
- CSV files
|
|
33
|
+
- CsvPath files
|
|
34
|
+
|
|
35
|
+
The defaults for these are:
|
|
36
|
+
|
|
37
|
+
```ini
|
|
38
|
+
[csvpath_files]
|
|
39
|
+
extensions = txt, csvpath
|
|
40
|
+
|
|
41
|
+
[csv_files]
|
|
42
|
+
extensions = txt, csv, tsv, dat, tab, psv, ssv
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
## Error Handling
|
|
46
|
+
|
|
47
|
+
The error settings are for when CsvPath or CsvPaths instances encounter problems. The options are:
|
|
48
|
+
- `stop` - Halt processing; the CsvPath stopped property is set to True
|
|
49
|
+
- `fail` - Mark the currently running CsvPath as having failed
|
|
50
|
+
- `raise` - Raise the exception in as noisy a way as possible
|
|
51
|
+
- `quiet` - Do nothing that affects the system out; this protects command line redirection of `print()` output. Logging is also minimized such that errors that would release a lot of metadata are slimmed down.
|
|
52
|
+
- `collect` - Collect the errors in the error results for the CsvPath. This option is available with and without a CsvPaths instance.
|
|
53
|
+
- `print` - Prints the errors using the Printer interface to whatever printers are available. By default this goes to standard out.
|
|
54
|
+
|
|
55
|
+
Multiple of these settings can be configured together.`quiet` and `raise` do not coexist well; likewise `quiet` and `print`. `raise` will win over `quiet` because seeing problems lets you fix them. `print` is most useful in getting simple inline error messages when `raise` is off.
|
|
56
|
+
|
|
57
|
+
## Logging
|
|
58
|
+
|
|
59
|
+
Logging levels are set at the major-component level. The components are:
|
|
60
|
+
- `csvpath`
|
|
61
|
+
- `csvpaths`
|
|
62
|
+
- `matcher`
|
|
63
|
+
- `scanner`
|
|
64
|
+
|
|
65
|
+
Four levels are available:
|
|
66
|
+
- `error`
|
|
67
|
+
- `warning`
|
|
68
|
+
- `debug`
|
|
69
|
+
- `info`
|
|
70
|
+
|
|
71
|
+
The levels are intended for the same functionality as their Python equivalents.
|
|
72
|
+
|
|
73
|
+
CsvPath logs are directed to a file. The log file settings are:
|
|
74
|
+
- `log_file` - a path to the log
|
|
75
|
+
- `log_files_to_keep` - a number of logs, 1 to 100, kept in rotation before being deleted
|
|
76
|
+
- `log_file_size` - an indication of roughly when a log file will be rotated
|
|
77
|
+
|
|
78
|
+
As an example:
|
|
79
|
+
```ini
|
|
80
|
+
log_file = logs/csvpath.log
|
|
81
|
+
log_files_to_keep = 100
|
|
82
|
+
log_file_size = 52428800
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
## Listeners
|
|
86
|
+
|
|
87
|
+
CsvPath generates events that it converts to manifest files full of asset and runtime metadata. You can add OpenLineage listeners that will send results to an OpenLineage server like Marquez. In principle any OpenLineage API could receive CsvPath events, but only Marquez is tested and supported.
|
|
88
|
+
|
|
89
|
+
Be aware, OpenLineage events are currently handled in line, not out of band, asynchronously. That means there is a small performance hit. Typically this would not be noticeable, but in certain instances it could be a factor. For example, CsvPath's hundreds of unit tests run slower when OpenLineage events are fired. This small performance hit may be remediated in the future if it becomes an issue.
|
|
90
|
+
|
|
91
|
+
The settings are:
|
|
92
|
+
```ini
|
|
93
|
+
[listeners]
|
|
94
|
+
#uncomment for OpenLineage events to a Marquez server
|
|
95
|
+
#file = from csvpath.managers.files.file_listener_ol import OpenLineageFileListener
|
|
96
|
+
#paths = from csvpath.managers.paths.paths_listener_ol import OpenLineagePathsListener
|
|
97
|
+
#result = from csvpath.managers.results.result_listener_ol import OpenLineageResultListener
|
|
98
|
+
#results = from csvpath.managers.results.results_listener_ol import OpenLineageResultsListener
|
|
99
|
+
|
|
100
|
+
[marquez]
|
|
101
|
+
base_url = http://localhost:5000
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
## Custom Functions
|
|
105
|
+
|
|
106
|
+
<a href='https://github.com/csvpath/csvpath/blob/main/docs/functions/implementing_functions.md'>See this page for how to create and run custom functions</a>
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
from openlineage.client import OpenLineageClient
|
|
2
|
-
|
|
3
|
-
from ..metadata import Metadata
|
|
4
|
-
from ..listener import Listener
|
|
5
|
-
from ..ol.event import EventBuilder
|
|
6
|
-
from ..ol.job import JobBuilder
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
class OpenLineageFileListener(Listener):
|
|
10
|
-
def __init__(self, config=None):
|
|
11
|
-
super().__init__(config)
|
|
12
|
-
self.ol_client = None
|
|
13
|
-
|
|
14
|
-
def metadata_update(self, mdata: Metadata) -> None:
|
|
15
|
-
if self.ol_client is None:
|
|
16
|
-
client_url = self.config._get("marquez", "base_url")
|
|
17
|
-
if client_url is None:
|
|
18
|
-
print(
|
|
19
|
-
"WARNING: OpenLineage listeners are live but there is no Marquez API URL"
|
|
20
|
-
)
|
|
21
|
-
return
|
|
22
|
-
# client_url = "http://localhost:5000"
|
|
23
|
-
self.ol_client = OpenLineageClient(url=client_url)
|
|
24
|
-
|
|
25
|
-
es = EventBuilder().build(mdata)
|
|
26
|
-
for e in es:
|
|
27
|
-
self.ol_client.emit(e)
|
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
from openlineage.client.client import OpenLineageClient
|
|
2
|
-
|
|
3
|
-
from ..metadata import Metadata
|
|
4
|
-
from ..listener import Listener
|
|
5
|
-
from ..ol.event import EventBuilder
|
|
6
|
-
from ..ol.job import JobBuilder
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
class OpenLineagePathsListener(Listener):
|
|
10
|
-
def __init__(self, config=None):
|
|
11
|
-
super().__init__(config)
|
|
12
|
-
self.ol_client = None
|
|
13
|
-
|
|
14
|
-
def metadata_update(self, mdata: Metadata) -> None:
|
|
15
|
-
if self.ol_client is None:
|
|
16
|
-
client_url = self.config._get("marquez", "base_url")
|
|
17
|
-
if client_url is None:
|
|
18
|
-
print(
|
|
19
|
-
"WARNING: OpenLineage listeners are live but there is no Marquez API URL"
|
|
20
|
-
)
|
|
21
|
-
return
|
|
22
|
-
# client_url = "http://localhost:5000"
|
|
23
|
-
self.ol_client = OpenLineageClient(url=client_url)
|
|
24
|
-
|
|
25
|
-
es = EventBuilder().build(mdata)
|
|
26
|
-
for e in es:
|
|
27
|
-
self.ol_client.emit(e)
|
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
from openlineage.client.client import OpenLineageClient
|
|
2
|
-
|
|
3
|
-
from ..metadata import Metadata
|
|
4
|
-
from ..listener import Listener
|
|
5
|
-
from ..ol.event import EventBuilder
|
|
6
|
-
from ..ol.job import JobBuilder
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
class OpenLineageResultListener(Listener):
|
|
10
|
-
def __init__(self, config=None):
|
|
11
|
-
super().__init__(config)
|
|
12
|
-
self.ol_client = None
|
|
13
|
-
|
|
14
|
-
def metadata_update(self, mdata: Metadata) -> None:
|
|
15
|
-
if self.ol_client is None:
|
|
16
|
-
client_url = self.config._get("marquez", "base_url")
|
|
17
|
-
if client_url is None:
|
|
18
|
-
print(
|
|
19
|
-
"WARNING: OpenLineage listeners are live but there is no Marquez API URL"
|
|
20
|
-
)
|
|
21
|
-
return
|
|
22
|
-
# client_url = "http://localhost:5000"
|
|
23
|
-
self.ol_client = OpenLineageClient(url=client_url)
|
|
24
|
-
|
|
25
|
-
es = EventBuilder().build(mdata)
|
|
26
|
-
for e in es:
|
|
27
|
-
self.ol_client.emit(e)
|
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
from openlineage.client.client import OpenLineageClient
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
from ..metadata import Metadata
|
|
5
|
-
from ..listener import Listener
|
|
6
|
-
from ..ol.event import EventBuilder
|
|
7
|
-
from ..ol.job import JobBuilder
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
class OpenLineageResultsListener(Listener):
|
|
11
|
-
def __init__(self, config=None):
|
|
12
|
-
super().__init__(config)
|
|
13
|
-
self.ol_client = None
|
|
14
|
-
|
|
15
|
-
def metadata_update(self, mdata: Metadata) -> None:
|
|
16
|
-
if self.ol_client is None:
|
|
17
|
-
client_url = self.config._get("marquez", "base_url")
|
|
18
|
-
if client_url is None:
|
|
19
|
-
print(
|
|
20
|
-
"WARNING: OpenLineage listeners are live but there is no Marquez API URL"
|
|
21
|
-
)
|
|
22
|
-
return
|
|
23
|
-
# client_url = "http://localhost:5000"
|
|
24
|
-
self.ol_client = OpenLineageClient(url=client_url)
|
|
25
|
-
|
|
26
|
-
es = EventBuilder().build(mdata)
|
|
27
|
-
for e in es:
|
|
28
|
-
self.ol_client.emit(e)
|
|
@@ -1,30 +0,0 @@
|
|
|
1
|
-
from openlineage.client.client import OpenLineageClient
|
|
2
|
-
from marquez_client import MarquezClient
|
|
3
|
-
|
|
4
|
-
from ..metadata import Metadata
|
|
5
|
-
from ..listener import Listener
|
|
6
|
-
from ..ol.event import EventBuilder
|
|
7
|
-
from ..ol.job import JobBuilder
|
|
8
|
-
from ..ol.run import RunBuilder
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
class OpenLineageRunListener(Listener):
|
|
12
|
-
def __init__(self, config=None):
|
|
13
|
-
super().__init__(config)
|
|
14
|
-
self.client = None
|
|
15
|
-
self.ol_client = None
|
|
16
|
-
|
|
17
|
-
def metadata_update(self, mdata: Metadata) -> None:
|
|
18
|
-
if self.client is None:
|
|
19
|
-
client_url = self.config._get("marquez", "base_url")
|
|
20
|
-
if client_url is None:
|
|
21
|
-
print(
|
|
22
|
-
"WARNING: OpenLineage listeners are live but there is no Marquez API URL"
|
|
23
|
-
)
|
|
24
|
-
return
|
|
25
|
-
# client_url = "http://localhost:5000"
|
|
26
|
-
self.client = MarquezClient(url=client_url)
|
|
27
|
-
self.ol_client = OpenLineageClient(url=client_url)
|
|
28
|
-
es = EventBuilder().build(mdata)
|
|
29
|
-
for e in es:
|
|
30
|
-
self.ol_client.emit(e)
|