csvpath 0.0.498__tar.gz → 0.0.499__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (281) hide show
  1. {csvpath-0.0.498 → csvpath-0.0.499}/PKG-INFO +23 -15
  2. {csvpath-0.0.498 → csvpath-0.0.499}/README.md +22 -14
  3. {csvpath-0.0.498 → csvpath-0.0.499}/config/config.ini +9 -1
  4. csvpath-0.0.499/csvpath/managers/files/file_listener_ol.py +6 -0
  5. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/managers/listener.py +16 -1
  6. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/managers/ol/event.py +1 -44
  7. csvpath-0.0.499/csvpath/managers/ol/ol_listener.py +7 -0
  8. csvpath-0.0.499/csvpath/managers/ol/sender.py +38 -0
  9. csvpath-0.0.499/csvpath/managers/paths/paths_listener_ol.py +11 -0
  10. csvpath-0.0.499/csvpath/managers/results/result_listener_ol.py +6 -0
  11. csvpath-0.0.499/csvpath/managers/results/results_listener_ol.py +6 -0
  12. csvpath-0.0.499/csvpath/managers/run/run_listener_ol.py +6 -0
  13. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/util/config.py +6 -2
  14. csvpath-0.0.499/docs/config.md +109 -0
  15. csvpath-0.0.499/docs/images/csvpath-icon-sm.png +0 -0
  16. csvpath-0.0.499/docs/images/marquez-logo-sm.png +0 -0
  17. csvpath-0.0.499/docs/images/openlineage-logo-sm.png +0 -0
  18. {csvpath-0.0.498 → csvpath-0.0.499}/pyproject.toml +1 -1
  19. csvpath-0.0.498/csvpath/managers/files/file_listener_ol.py +0 -27
  20. csvpath-0.0.498/csvpath/managers/paths/paths_listener_ol.py +0 -27
  21. csvpath-0.0.498/csvpath/managers/results/result_listener_ol.py +0 -27
  22. csvpath-0.0.498/csvpath/managers/results/results_listener_ol.py +0 -28
  23. csvpath-0.0.498/csvpath/managers/run/run_listener_ol.py +0 -30
  24. csvpath-0.0.498/docs/config.md +0 -70
  25. {csvpath-0.0.498 → csvpath-0.0.499}/LICENSE +0 -0
  26. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/__init__.py +0 -0
  27. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/cli/__init__.py +0 -0
  28. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/cli/cli.py +0 -0
  29. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/cli/drill_down.py +0 -0
  30. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/csvpath.py +0 -0
  31. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/csvpaths.py +0 -0
  32. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/managers/__init__.py +0 -0
  33. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/managers/files/file_cacher.py +0 -0
  34. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/managers/files/file_manager.py +0 -0
  35. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/managers/files/file_metadata.py +0 -0
  36. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/managers/files/file_registrar.py +0 -0
  37. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/managers/metadata.py +0 -0
  38. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/managers/ol/event_result.py +0 -0
  39. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/managers/ol/job.py +0 -0
  40. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/managers/ol/run.py +0 -0
  41. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/managers/ol/run_state.py +0 -0
  42. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/managers/paths/paths_manager.py +0 -0
  43. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/managers/paths/paths_metadata.py +0 -0
  44. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/managers/paths/paths_registrar.py +0 -0
  45. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/managers/registrar.py +0 -0
  46. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/managers/results/result.py +0 -0
  47. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/managers/results/result_metadata.py +0 -0
  48. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/managers/results/result_registrar.py +0 -0
  49. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/managers/results/result_serializer.py +0 -0
  50. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/managers/results/results_manager.py +0 -0
  51. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/managers/results/results_metadata.py +0 -0
  52. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/managers/results/results_registrar.py +0 -0
  53. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/managers/run/run_listener_stdout.py +0 -0
  54. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/managers/run/run_metadata.py +0 -0
  55. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/managers/run/run_registrar.py +0 -0
  56. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/__init__.py +0 -0
  57. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/__init__.py +0 -0
  58. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/args.py +0 -0
  59. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/boolean/all.py +0 -0
  60. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/boolean/andf.py +0 -0
  61. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/boolean/any.py +0 -0
  62. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/boolean/between.py +0 -0
  63. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/boolean/empty.py +0 -0
  64. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/boolean/exists.py +0 -0
  65. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/boolean/inf.py +0 -0
  66. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/boolean/no.py +0 -0
  67. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/boolean/notf.py +0 -0
  68. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/boolean/orf.py +0 -0
  69. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/boolean/yes.py +0 -0
  70. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/counting/count.py +0 -0
  71. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/counting/count_bytes.py +0 -0
  72. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/counting/count_headers.py +0 -0
  73. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/counting/count_lines.py +0 -0
  74. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/counting/count_scans.py +0 -0
  75. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/counting/counter.py +0 -0
  76. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/counting/every.py +0 -0
  77. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/counting/has_matches.py +0 -0
  78. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/counting/increment.py +0 -0
  79. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/counting/tally.py +0 -0
  80. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/counting/total_lines.py +0 -0
  81. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/dates/now.py +0 -0
  82. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/function.py +0 -0
  83. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/function_factory.py +0 -0
  84. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/function_finder.py +0 -0
  85. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/function_focus.py +0 -0
  86. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/headers/append.py +0 -0
  87. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/headers/collect.py +0 -0
  88. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/headers/empty_stack.py +0 -0
  89. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/headers/end.py +0 -0
  90. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/headers/header_name.py +0 -0
  91. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/headers/header_names_mismatch.py +0 -0
  92. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/headers/headers.py +0 -0
  93. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/headers/mismatch.py +0 -0
  94. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/headers/replace.py +0 -0
  95. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/headers/reset_headers.py +0 -0
  96. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/lines/advance.py +0 -0
  97. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/lines/after_blank.py +0 -0
  98. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/lines/dups.py +0 -0
  99. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/lines/first.py +0 -0
  100. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/lines/first_line.py +0 -0
  101. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/lines/last.py +0 -0
  102. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/lines/stop.py +0 -0
  103. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/math/above.py +0 -0
  104. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/math/add.py +0 -0
  105. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/math/divide.py +0 -0
  106. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/math/equals.py +0 -0
  107. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/math/intf.py +0 -0
  108. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/math/mod.py +0 -0
  109. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/math/multiply.py +0 -0
  110. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/math/round.py +0 -0
  111. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/math/subtotal.py +0 -0
  112. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/math/subtract.py +0 -0
  113. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/math/sum.py +0 -0
  114. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/misc/fingerprint.py +0 -0
  115. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/misc/importf.py +0 -0
  116. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/misc/random.py +0 -0
  117. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/print/jinjaf.py +0 -0
  118. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/print/print_line.py +0 -0
  119. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/print/print_queue.py +0 -0
  120. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/print/printf.py +0 -0
  121. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/print/table.py +0 -0
  122. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/stats/minf.py +0 -0
  123. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/stats/percent.py +0 -0
  124. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/stats/percent_unique.py +0 -0
  125. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/stats/stdev.py +0 -0
  126. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/strings/concat.py +0 -0
  127. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/strings/length.py +0 -0
  128. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/strings/lower.py +0 -0
  129. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/strings/metaphone.py +0 -0
  130. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/strings/regex.py +0 -0
  131. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/strings/starts_with.py +0 -0
  132. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/strings/strip.py +0 -0
  133. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/strings/substring.py +0 -0
  134. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/strings/upper.py +0 -0
  135. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/testing/debug.py +0 -0
  136. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/types/__init__.py +0 -0
  137. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/types/boolean.py +0 -0
  138. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/types/datef.py +0 -0
  139. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/types/decimal.py +0 -0
  140. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/types/nonef.py +0 -0
  141. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/types/string.py +0 -0
  142. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/types/type.py +0 -0
  143. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/validity/fail.py +0 -0
  144. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/validity/failed.py +0 -0
  145. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/validity/line.py +0 -0
  146. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/variables/get.py +0 -0
  147. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/variables/pushpop.py +0 -0
  148. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/variables/put.py +0 -0
  149. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/variables/track.py +0 -0
  150. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/functions/variables/variables.py +0 -0
  151. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/lark_parser.py +0 -0
  152. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/lark_transformer.py +0 -0
  153. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/matcher.py +0 -0
  154. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/productions/__init__.py +0 -0
  155. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/productions/equality.py +0 -0
  156. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/productions/expression.py +0 -0
  157. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/productions/header.py +0 -0
  158. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/productions/matchable.py +0 -0
  159. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/productions/qualified.py +0 -0
  160. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/productions/reference.py +0 -0
  161. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/productions/term.py +0 -0
  162. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/productions/variable.py +0 -0
  163. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/util/exceptions.py +0 -0
  164. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/util/expression_encoder.py +0 -0
  165. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/util/expression_utility.py +0 -0
  166. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/util/lark_print_parser.py +0 -0
  167. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/util/print_parser.py +0 -0
  168. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/matching/util/runtime_data_collector.py +0 -0
  169. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/modes/explain_mode.py +0 -0
  170. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/modes/files_mode.py +0 -0
  171. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/modes/logic_mode.py +0 -0
  172. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/modes/mode_controller.py +0 -0
  173. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/modes/print_mode.py +0 -0
  174. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/modes/return_mode.py +0 -0
  175. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/modes/run_mode.py +0 -0
  176. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/modes/source_mode.py +0 -0
  177. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/modes/transfer_mode.py +0 -0
  178. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/modes/unmatched_mode.py +0 -0
  179. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/modes/validation_mode.py +0 -0
  180. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/scanning/__init__.py +0 -0
  181. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/scanning/exceptions.py +0 -0
  182. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/scanning/parser.out +0 -0
  183. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/scanning/parsetab.py +0 -0
  184. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/scanning/scanner.py +0 -0
  185. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/scanning/scanning_lexer.py +0 -0
  186. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/util/cache.py +0 -0
  187. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/util/class_loader.py +0 -0
  188. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/util/config_exception.py +0 -0
  189. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/util/error.py +0 -0
  190. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/util/exceptions.py +0 -0
  191. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/util/file_readers.py +0 -0
  192. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/util/last_line_stats.py +0 -0
  193. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/util/line_counter.py +0 -0
  194. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/util/line_monitor.py +0 -0
  195. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/util/line_spooler.py +0 -0
  196. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/util/log_utility.py +0 -0
  197. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/util/metadata_parser.py +0 -0
  198. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/util/pandas_data_reader.py +0 -0
  199. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/util/printer.py +0 -0
  200. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/util/reference_parser.py +0 -0
  201. {csvpath-0.0.498 → csvpath-0.0.499}/csvpath/util/s3_data_reader.py +0 -0
  202. {csvpath-0.0.498 → csvpath-0.0.499}/docs/asbool.md +0 -0
  203. {csvpath-0.0.498 → csvpath-0.0.499}/docs/assignment.md +0 -0
  204. {csvpath-0.0.498 → csvpath-0.0.499}/docs/comments.md +0 -0
  205. {csvpath-0.0.498 → csvpath-0.0.499}/docs/examples.md +0 -0
  206. {csvpath-0.0.498 → csvpath-0.0.499}/docs/files.md +0 -0
  207. {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/above.md +0 -0
  208. {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/advance.md +0 -0
  209. {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/after_blank.md +0 -0
  210. {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/all.md +0 -0
  211. {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/andor.md +0 -0
  212. {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/any.md +0 -0
  213. {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/average.md +0 -0
  214. {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/between.md +0 -0
  215. {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/collect.md +0 -0
  216. {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/correlate.md +0 -0
  217. {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/count.md +0 -0
  218. {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/count_bytes.md +0 -0
  219. {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/count_headers.md +0 -0
  220. {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/counter.md +0 -0
  221. {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/date.md +0 -0
  222. {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/empty.md +0 -0
  223. {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/empty_stack.md +0 -0
  224. {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/end.md +0 -0
  225. {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/every.md +0 -0
  226. {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/fail.md +0 -0
  227. {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/fingerprint.md +0 -0
  228. {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/first.md +0 -0
  229. {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/get.md +0 -0
  230. {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/has_dups.md +0 -0
  231. {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/has_matches.md +0 -0
  232. {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/header.md +0 -0
  233. {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/header_name.md +0 -0
  234. {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/header_names_mismatch.md +0 -0
  235. {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/implementing_functions.md +0 -0
  236. {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/import.md +0 -0
  237. {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/in.md +0 -0
  238. {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/increment.md +0 -0
  239. {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/intf.md +0 -0
  240. {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/jinja.md +0 -0
  241. {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/last.md +0 -0
  242. {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/line.md +0 -0
  243. {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/line_number.md +0 -0
  244. {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/max.md +0 -0
  245. {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/metaphone.md +0 -0
  246. {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/mismatch.md +0 -0
  247. {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/no.md +0 -0
  248. {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/not.md +0 -0
  249. {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/now.md +0 -0
  250. {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/percent_unique.md +0 -0
  251. {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/pop.md +0 -0
  252. {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/print.md +0 -0
  253. {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/print_line.md +0 -0
  254. {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/print_queue.md +0 -0
  255. {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/random.md +0 -0
  256. {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/regex.md +0 -0
  257. {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/replace.md +0 -0
  258. {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/reset_headers.md +0 -0
  259. {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/stdev.md +0 -0
  260. {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/stop.md +0 -0
  261. {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/string_functions.md +0 -0
  262. {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/subtotal.md +0 -0
  263. {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/subtract.md +0 -0
  264. {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/sum.md +0 -0
  265. {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/tally.md +0 -0
  266. {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/total_lines.md +0 -0
  267. {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/track.md +0 -0
  268. {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/types.md +0 -0
  269. {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/variables.md +0 -0
  270. {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions/variables_and_headers.md +0 -0
  271. {csvpath-0.0.498 → csvpath-0.0.499}/docs/functions.md +0 -0
  272. {csvpath-0.0.498 → csvpath-0.0.499}/docs/grammar.md +0 -0
  273. {csvpath-0.0.498 → csvpath-0.0.499}/docs/headers.md +0 -0
  274. {csvpath-0.0.498 → csvpath-0.0.499}/docs/images/logo-wordmark-white-on-black-trimmed-padded.png +0 -0
  275. {csvpath-0.0.498 → csvpath-0.0.499}/docs/images/logo-wordmark-white-trimmed.png +0 -0
  276. {csvpath-0.0.498 → csvpath-0.0.499}/docs/paths.md +0 -0
  277. {csvpath-0.0.498 → csvpath-0.0.499}/docs/printing.md +0 -0
  278. {csvpath-0.0.498 → csvpath-0.0.499}/docs/qualifiers.md +0 -0
  279. {csvpath-0.0.498 → csvpath-0.0.499}/docs/references.md +0 -0
  280. {csvpath-0.0.498 → csvpath-0.0.499}/docs/terms.md +0 -0
  281. {csvpath-0.0.498 → csvpath-0.0.499}/docs/variables.md +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: csvpath
3
- Version: 0.0.498
3
+ Version: 0.0.499
4
4
  Summary: A declarative language for validating CSV, Excel, and other tabular data files
5
5
  Author: David Kershaw
6
6
  Author-email: dk107dk@hotmail.com
@@ -43,15 +43,16 @@ Project-URL: Github, https://github.com/csvpath/csvpath.git
43
43
  Description-Content-Type: text/markdown
44
44
 
45
45
 
46
- # <a href='https://www.csvpath.org/'><img src='https://www.csvpath.org/~gitbook/image?url=https%3A%2F%2F3739708663-files.gitbook.io%2F%7E%2Ffiles%2Fv0%2Fb%2Fgitbook-x-prod.appspot.com%2Fo%2Forganizations%252FMXTJeGvaEsqwNG39F37h%252Fsites%252Fsite_SPBqJ%252Ficon%252FMCSxo7k6rXWnqoPE204u%252Fcsvpath-icon.png%3Falt%3Dmedia%26token%3D28869fdd-d54e-400e-8917-b8097f935f42&width=32&dpr=2&quality=100&sign=71ca9f3e&sv=1'/></a> About CsvPath
46
+ # <a href='https://www.csvpath.org/'><img src='https://github.com/csvpath/csvpath/blob/main/docs/images/csvpath-icon-sm.png'/></a> About CsvPath
47
47
 
48
- CsvPath defines a declarative syntax for inspecting and validating CSV and Excel files, and other tabular data.
48
+ The CsvPath language defines a declarative syntax for inspecting and validating CSV and Excel files, and other tabular data.
49
49
 
50
- CsvPath's goal is to make it easy to setup a Collect, Store, Validate-pattern flat-file landing zone that:
51
- - Analyzes the content and structure of flat files
52
- - Validates that files match expectations
53
- - Reports on content validity
54
- - Creates new derived files using copy-on-write
50
+ The CsvPath library's goal is to make it easy to setup a <a href='https://www.atestaanalytics.com/s/The-Collect-Store-Validate-Pattern-Atesta-Analytics.pdf'>Collect, Store, Validate Pattern</a> flat-file landing zone that:
51
+ - Registers files
52
+ - Validates that the data matches expectations
53
+ - Reports on content validity and other metadata
54
+ - Shapes files for consistency using copy-on-write
55
+ - And stages the results for loading into a data lake
55
56
 
56
57
  And does it all in an automation-friendly way.
57
58
 
@@ -59,9 +60,15 @@ CsvPath's validation is inspired by:
59
60
  - XPath for XML files
60
61
  - The ISO standard <a href='https://schematron.com/'>Schematron validation</a>
61
62
 
62
- CsvPath is intended to fit with other DataOps and data quality tools. Files are streamed. The interface is simple. Metadata is plentiful. New functions are easy to create.
63
+ CsvPath is intended to fit with other DataOps and data quality tools. Files are streamed. The interface is simple. Metadata is plentiful. New functions are easy to create. CsvPath can stream events to an OpenLineage server, such as the open source Marquez server.
63
64
 
64
- Read more about CsvPath and see realistic CSV and Excel validation examples at <a href='https://www.csvpath.org'>https://www.csvpath.org</a>.
65
+ <a href='https://openlineage.io' >
66
+ <img target='_blank' src="https://github.com/csvpath/csvpath/blob/main/docs/images/openlineage-logo-sm.png" alt="OpenLineage"/></a>
67
+ <a href='https://peppy-sprite-186812.netlify.app/' >
68
+ <img target='_blank' src="https://github.com/csvpath/csvpath/blob/main/docs/images/marquez-logo-sm.png" alt="Marquez Server"/></a>
69
+
70
+
71
+ Read more about CsvPath and see CSV, Excel, and Data Frames validation examples at <a href='https://www.csvpath.org'>https://www.csvpath.org</a>.
65
72
 
66
73
  If you need help, use the <a href='https://www.csvpath.org/getting-started/get-help'>contact form</a> or the <a href='https://github.com/csvpath/csvpath/issues'>issue tracker</a> or talk to one of our [sponsors](#sponsors).
67
74
 
@@ -99,11 +106,13 @@ If you need help, use the <a href='https://www.csvpath.org/getting-started/get-h
99
106
 
100
107
  CSV files are everywhere!
101
108
 
102
- A surprisingly large number of companies depend on CSV processing for significant amounts of revenue. Research organizations are awash in CSV. And everyone's favorite issue tracker, database GUI, spreadsheet, APM platform, and most any other type of tool we use day to day uses CSV for sharing. CSV is the lowest of common dominators. Many CSVs are invalid or broken in some way. Often times a lot of manual effort goes into finding problems and fixing them.
109
+ The majority of companies depend on file processing for significant revenue operations. Research organizations and archives are awash in CSVs. And everyone's favorite issue tracker, database GUI, spreadsheet, APM platform, and most any other type of tool we use spits out CSV or Excel files for sharing. Delimited and tabular files are the lowest of common dominators. Many are invalid or broken in some way. Often times a lot of manual effort goes into finding problems and fixing them.
110
+
111
+ CsvPath is first and foremost a validation language. It describes tabular data in simple declarative rules that define what valid means for that data. CsvPath can also extract and shape data and create reports.
103
112
 
104
- CsvPath is first and foremost a validation language. It describes tabular data in simple declarative rules that define what valid means for that data. CsvPath can also extract data, create reports, and do other useful things.
113
+ The CsvPath library implements the CsvPath language, but goes far beyond it to provide a full <a href='https://www.atestaanalytics.com/s/The-Collect-Store-Validate-Pattern-Atesta-Analytics.pdf'>Collect, Store, Validate Pattern</a> framework for landing flat files, registering them, validating them, shaping them to a consistent and comparable form, and staging them for a data lake. In that way, CsvPath fills the gap commonly found between an organization's MFT (managed file transfer) and a typical data lake architecture.
105
114
 
106
- CsvPath's goal is to make simple validations almost trivial and more complex situations more manageable. It is a library, not a system, so it relies on being easy to integrate with other DataOps tools.
115
+ CsvPath's goal is to make simple validations almost trivial and more complex situations more manageable. It is a library and framework, not a system, so it relies on being easy to integrate with other DataOps tools.
107
116
 
108
117
 
109
118
  <a name="install"></a>
@@ -217,8 +226,7 @@ The simplest way to get started is using the CLI. <a href='https://www.csvpath.o
217
226
  When you're ready to think about automation, you'll want to start with a simple driver. This is a very basic programmatic use of CsvPath.
218
227
 
219
228
  ```python
220
- path = CsvPath()
221
- path.parse("""
229
+ path = CsvPath().parse("""
222
230
  $test.csv[5-25][
223
231
  #firstname == "Frog"
224
232
  @lastname.onmatch = "Bat"
@@ -1,13 +1,14 @@
1
1
 
2
- # <a href='https://www.csvpath.org/'><img src='https://www.csvpath.org/~gitbook/image?url=https%3A%2F%2F3739708663-files.gitbook.io%2F%7E%2Ffiles%2Fv0%2Fb%2Fgitbook-x-prod.appspot.com%2Fo%2Forganizations%252FMXTJeGvaEsqwNG39F37h%252Fsites%252Fsite_SPBqJ%252Ficon%252FMCSxo7k6rXWnqoPE204u%252Fcsvpath-icon.png%3Falt%3Dmedia%26token%3D28869fdd-d54e-400e-8917-b8097f935f42&width=32&dpr=2&quality=100&sign=71ca9f3e&sv=1'/></a> About CsvPath
2
+ # <a href='https://www.csvpath.org/'><img src='https://github.com/csvpath/csvpath/blob/main/docs/images/csvpath-icon-sm.png'/></a> About CsvPath
3
3
 
4
- CsvPath defines a declarative syntax for inspecting and validating CSV and Excel files, and other tabular data.
4
+ The CsvPath language defines a declarative syntax for inspecting and validating CSV and Excel files, and other tabular data.
5
5
 
6
- CsvPath's goal is to make it easy to setup a Collect, Store, Validate-pattern flat-file landing zone that:
7
- - Analyzes the content and structure of flat files
8
- - Validates that files match expectations
9
- - Reports on content validity
10
- - Creates new derived files using copy-on-write
6
+ The CsvPath library's goal is to make it easy to setup a <a href='https://www.atestaanalytics.com/s/The-Collect-Store-Validate-Pattern-Atesta-Analytics.pdf'>Collect, Store, Validate Pattern</a> flat-file landing zone that:
7
+ - Registers files
8
+ - Validates that the data matches expectations
9
+ - Reports on content validity and other metadata
10
+ - Shapes files for consistency using copy-on-write
11
+ - And stages the results for loading into a data lake
11
12
 
12
13
  And does it all in an automation-friendly way.
13
14
 
@@ -15,9 +16,15 @@ CsvPath's validation is inspired by:
15
16
  - XPath for XML files
16
17
  - The ISO standard <a href='https://schematron.com/'>Schematron validation</a>
17
18
 
18
- CsvPath is intended to fit with other DataOps and data quality tools. Files are streamed. The interface is simple. Metadata is plentiful. New functions are easy to create.
19
+ CsvPath is intended to fit with other DataOps and data quality tools. Files are streamed. The interface is simple. Metadata is plentiful. New functions are easy to create. CsvPath can stream events to an OpenLineage server, such as the open source Marquez server.
19
20
 
20
- Read more about CsvPath and see realistic CSV and Excel validation examples at <a href='https://www.csvpath.org'>https://www.csvpath.org</a>.
21
+ <a href='https://openlineage.io' >
22
+ <img target='_blank' src="https://github.com/csvpath/csvpath/blob/main/docs/images/openlineage-logo-sm.png" alt="OpenLineage"/></a>
23
+ <a href='https://peppy-sprite-186812.netlify.app/' >
24
+ <img target='_blank' src="https://github.com/csvpath/csvpath/blob/main/docs/images/marquez-logo-sm.png" alt="Marquez Server"/></a>
25
+
26
+
27
+ Read more about CsvPath and see CSV, Excel, and Data Frames validation examples at <a href='https://www.csvpath.org'>https://www.csvpath.org</a>.
21
28
 
22
29
  If you need help, use the <a href='https://www.csvpath.org/getting-started/get-help'>contact form</a> or the <a href='https://github.com/csvpath/csvpath/issues'>issue tracker</a> or talk to one of our [sponsors](#sponsors).
23
30
 
@@ -55,11 +62,13 @@ If you need help, use the <a href='https://www.csvpath.org/getting-started/get-h
55
62
 
56
63
  CSV files are everywhere!
57
64
 
58
- A surprisingly large number of companies depend on CSV processing for significant amounts of revenue. Research organizations are awash in CSV. And everyone's favorite issue tracker, database GUI, spreadsheet, APM platform, and most any other type of tool we use day to day uses CSV for sharing. CSV is the lowest of common dominators. Many CSVs are invalid or broken in some way. Often times a lot of manual effort goes into finding problems and fixing them.
65
+ The majority of companies depend on file processing for significant revenue operations. Research organizations and archives are awash in CSVs. And everyone's favorite issue tracker, database GUI, spreadsheet, APM platform, and most any other type of tool we use spits out CSV or Excel files for sharing. Delimited and tabular files are the lowest of common dominators. Many are invalid or broken in some way. Often times a lot of manual effort goes into finding problems and fixing them.
66
+
67
+ CsvPath is first and foremost a validation language. It describes tabular data in simple declarative rules that define what valid means for that data. CsvPath can also extract and shape data and create reports.
59
68
 
60
- CsvPath is first and foremost a validation language. It describes tabular data in simple declarative rules that define what valid means for that data. CsvPath can also extract data, create reports, and do other useful things.
69
+ The CsvPath library implements the CsvPath language, but goes far beyond it to provide a full <a href='https://www.atestaanalytics.com/s/The-Collect-Store-Validate-Pattern-Atesta-Analytics.pdf'>Collect, Store, Validate Pattern</a> framework for landing flat files, registering them, validating them, shaping them to a consistent and comparable form, and staging them for a data lake. In that way, CsvPath fills the gap commonly found between an organization's MFT (managed file transfer) and a typical data lake architecture.
61
70
 
62
- CsvPath's goal is to make simple validations almost trivial and more complex situations more manageable. It is a library, not a system, so it relies on being easy to integrate with other DataOps tools.
71
+ CsvPath's goal is to make simple validations almost trivial and more complex situations more manageable. It is a library and framework, not a system, so it relies on being easy to integrate with other DataOps tools.
63
72
 
64
73
 
65
74
  <a name="install"></a>
@@ -173,8 +182,7 @@ The simplest way to get started is using the CLI. <a href='https://www.csvpath.o
173
182
  When you're ready to think about automation, you'll want to start with a simple driver. This is a very basic programmatic use of CsvPath.
174
183
 
175
184
  ```python
176
- path = CsvPath()
177
- path.parse("""
185
+ path = CsvPath().parse("""
178
186
  $test.csv[5-25][
179
187
  #firstname == "Frog"
180
188
  @lastname.onmatch = "Bat"
@@ -25,13 +25,21 @@ path = cache
25
25
  imports = config/functions.imports
26
26
 
27
27
  [listeners]
28
+ #uncomment for OpenLineage events to a local Marquez
29
+ #file = from csvpath.managers.files.file_listener_ol import OpenLineageFileListener
30
+ #paths = from csvpath.managers.paths.paths_listener_ol import OpenLineagePathsListener
31
+ #result = from csvpath.managers.results.result_listener_ol import OpenLineageResultListener
32
+ #results = from csvpath.managers.results.results_listener_ol import OpenLineageResultsListener
28
33
 
29
34
  [marquez]
30
35
  base_url = http://localhost:5000
36
+ endpoint = api/v1/lineage
37
+ api_key = "none"
38
+ timeout = 5
39
+ verify = False
31
40
 
32
41
  [results]
33
42
  archive = archive
34
- transfers = transfers
35
43
 
36
44
  [inputs]
37
45
  files = inputs/named_files
@@ -0,0 +1,6 @@
1
+ from ..ol.ol_listener import OpenLineageListener
2
+
3
+
4
+ class OpenLineageFileListener(OpenLineageListener):
5
+ def __init__(self, config=None, client=None):
6
+ super().__init__(config=config, client=client)
@@ -1,10 +1,25 @@
1
1
  from abc import ABC, abstractmethod
2
2
  from .metadata import Metadata
3
+ from ..util.config import Config
3
4
 
4
5
 
5
6
  class Listener(ABC):
6
7
  def __init__(self, config=None) -> None:
7
- self.config = config
8
+ super().__init__()
9
+ self._config = config
10
+
11
+ @property
12
+ def config(self):
13
+ if not self._config:
14
+ #
15
+ # this really should never happen. but perhaps in testing?
16
+ #
17
+ self._config = Config()
18
+ return self._config
19
+
20
+ @config.setter
21
+ def config(self, c):
22
+ self._config = c
8
23
 
9
24
  @abstractmethod
10
25
  def metadata_update(self, mdata: Metadata) -> None:
@@ -42,53 +42,10 @@ class EventBuilder:
42
42
  # do we want to support this one, if it comes?
43
43
  return None
44
44
 
45
- """
46
- def get_identities_facet(self, mdata):
47
- mp = f"{mdata.base_path}{os.sep}{mdata.named_paths_root}{os.sep}{mdata.named_paths_name}/manifest.json"
48
- j = []
49
- with open(mp, "r", encoding="utf-8") as file:
50
- j = json.load(file)
51
- d = j[len(j)-1]
52
- ps = d["named_paths"]
53
- fields=[]
54
- for p in d:
55
- f = schema_dataset.SchemaDatasetFacetFields(
56
- name=p, type="CsvPath", description=""
57
- )
58
- fields.append(f)
59
- csvpaths = self.dataset(
60
- f"{mdata.archive_name}/{mdata.named_paths_name}",
61
- schema_dataset.SchemaDatasetFacet(fields=fields),
62
- mdata.archive_name
63
- )
64
- return csvpaths
65
-
66
- def dummy_facets(self):
67
- print(">>> creating dataset data #{i}" )
68
- user_history = self.dataset(
69
- "archive",
70
- schema_dataset.SchemaDatasetFacet(
71
- fields=[
72
- schema_dataset.SchemaDatasetFacetFields(
73
- name="id", type="BIGINT", description="the user id"
74
- ),
75
- schema_dataset.SchemaDatasetFacetFields(
76
- name="email_domain", type="VARCHAR", description="the user id"
77
- ),
78
- schema_dataset.SchemaDatasetFacetFields(
79
- name="status", type="BIGINT", description="the user id"
80
- ),
81
- ]
82
- ),
83
- "archive"
84
- )
85
- return user_history
86
- """
87
-
88
45
  def _build_results_event(self, mdata: Metadata, job, run, facets, inputs):
89
46
  file = InputDataset(
90
47
  namespace=mdata.archive_name, name=f"{mdata.named_file_name}"
91
- ) # , inputFacets=inputfacets
48
+ )
92
49
  path = InputDataset(
93
50
  namespace=mdata.archive_name, name=f"{mdata.named_paths_name}"
94
51
  )
@@ -0,0 +1,7 @@
1
+ from ..metadata import Metadata
2
+ from .sender import Sender
3
+
4
+
5
+ class OpenLineageListener(Sender):
6
+ def __init__(self, config=None, client=None):
7
+ super().__init__(config=config, client=client)
@@ -0,0 +1,38 @@
1
+ from abc import ABC
2
+ from openlineage.client import OpenLineageClient
3
+ from openlineage.client.transport.http import (
4
+ ApiKeyTokenProvider,
5
+ HttpConfig,
6
+ HttpCompression,
7
+ HttpTransport,
8
+ )
9
+ from ..metadata import Metadata
10
+ from ..ol.event import EventBuilder
11
+ from ..listener import Listener
12
+
13
+
14
+ class Sender(Listener):
15
+ def __init__(self, *, config=None, client=None):
16
+ super().__init__(config)
17
+ self._client = client
18
+
19
+ @property
20
+ def client(self):
21
+ if self._client is None:
22
+ h = HttpConfig(
23
+ url=self.config._get("marquez", "base_url", "https://backend:5000"),
24
+ endpoint=self.config._get("marquez", "endpoint", "api/v1/lineage"),
25
+ timeout=int(self.config._get("marquez", "timeout", 5)),
26
+ verify=bool(self.config._get("marquez", "verify", False)) is True,
27
+ auth=ApiKeyTokenProvider(
28
+ {"apiKey": self.config._get("marquez", "api_key", "none")}
29
+ ),
30
+ compression=HttpCompression.GZIP,
31
+ )
32
+ self._client = OpenLineageClient(transport=HttpTransport(h))
33
+ return self._client
34
+
35
+ def metadata_update(self, mdata: Metadata) -> None:
36
+ es = EventBuilder().build(mdata)
37
+ for e in es:
38
+ self.client.emit(e)
@@ -0,0 +1,11 @@
1
+ from openlineage.client import OpenLineageClient
2
+
3
+ from ..metadata import Metadata
4
+ from ..ol.event import EventBuilder
5
+ from ..ol.sender import Sender
6
+ from ..ol.ol_listener import OpenLineageListener
7
+
8
+
9
+ class OpenLineagePathsListener(OpenLineageListener):
10
+ def __init__(self, config=None, client=None):
11
+ super().__init__(config=config, client=client)
@@ -0,0 +1,6 @@
1
+ from ..ol.ol_listener import OpenLineageListener
2
+
3
+
4
+ class OpenLineageResultListener(OpenLineageListener):
5
+ def __init__(self, config=None, client=None):
6
+ super().__init__(config=config, client=client)
@@ -0,0 +1,6 @@
1
+ from ..ol.ol_listener import OpenLineageListener
2
+
3
+
4
+ class OpenLineageResultsListener(OpenLineageListener):
5
+ def __init__(self, config=None, client=None):
6
+ super().__init__(config=config, client=client)
@@ -0,0 +1,6 @@
1
+ from ..ol.ol_listener import OpenLineageListener
2
+
3
+
4
+ class OpenLineageResultsListener(OpenLineageListener):
5
+ def __init__(self, config=None, client=None):
6
+ super().__init__(config=config, client=client)
@@ -107,7 +107,7 @@ class Config:
107
107
  def config_path(self) -> str:
108
108
  return self._configpath
109
109
 
110
- def _get(self, section: str, name: str):
110
+ def _get(self, section: str, name: str, default=None):
111
111
  if self._config is None:
112
112
  raise ConfigurationException("No config object available")
113
113
  try:
@@ -121,7 +121,7 @@ class Config:
121
121
  except KeyError:
122
122
  if self.csvpath_log_level == LogLevels.DEBUG:
123
123
  print(f"Check config at {self.config_path} for [{section}][{name}]")
124
- return None
124
+ return default
125
125
 
126
126
  def add_to_config(self, section, key, value) -> None:
127
127
  if not self._config.has_section(section):
@@ -176,6 +176,10 @@ path =
176
176
  #results = from csvpath.managers.results.results_listener_ol import OpenLineageResultsListener
177
177
  #[marquez]
178
178
  #base_url = http://localhost:5000
179
+ #endpoint = api/v1/lineage
180
+ #api_key = "none"
181
+ #timeout = 5
182
+ #verify = False
179
183
  #
180
184
  [results]
181
185
  archive = archive
@@ -0,0 +1,109 @@
1
+
2
+ # Config
3
+
4
+ CsvPaths has a few config options. By default, the config options are in `./config/config.ini`. You can change the location of your .ini file in two ways:
5
+ - Set a `CSVPATH_CONFIG_FILE` env var pointing to your file
6
+ - Create an instance of CsvPathConfig, set its CONFIG property, and call the `reload()` method
7
+
8
+ The config options, at this time, are about:
9
+ - File system locations
10
+ - File extensions
11
+ - Error handling
12
+ - Logging
13
+ - Event listeners
14
+ - Custom functions
15
+
16
+ ## File System Locations
17
+
18
+ CsvPath stores files in three places:
19
+ - The data staging location
20
+ - The csvpath files location
21
+ - An archive or namespace of results
22
+
23
+ The first two are in the `[inputs]` section as `files` and `csvpaths`. The default location for data files and csvpath files is under the `./inputs` directory. Each has its own folder. You can move these two locations anywhere you like.
24
+
25
+ The archive is set in the `[results]` section as `archive`. By default it is a directory named `archive`. You can name the archive anything you like. Keep in mind that as well as simply storing files, the archive is also a namespacing tool. If you have many data partners or separate data operations you may want to have separate archives. If you do use separate archives and you are running OpenLineage events you will see your events namespaced by archive name. See below for configuring OpenLineage event listeners.
26
+
27
+ In addition, there are cache, config, and log file locations. They have sensible defaults but can be moved, if needed.
28
+
29
+ ## File Extensions
30
+
31
+ There are two types of files you can set extensions for:
32
+ - CSV files
33
+ - CsvPath files
34
+
35
+ The defaults for these are:
36
+
37
+ ```ini
38
+ [csvpath_files]
39
+ extensions = txt, csvpath
40
+
41
+ [csv_files]
42
+ extensions = txt, csv, tsv, dat, tab, psv, ssv
43
+ ```
44
+
45
+ ## Error Handling
46
+
47
+ The error settings are for when CsvPath or CsvPaths instances encounter problems. The options are:
48
+ - `stop` - Halt processing; the CsvPath stopped property is set to True
49
+ - `fail` - Mark the currently running CsvPath as having failed
50
+ - `raise` - Raise the exception in as noisy a way as possible
51
+ - `quiet` - Do nothing that affects the system out; this protects command line redirection of `print()` output. Logging is also minimized such that errors that would release a lot of metadata are slimmed down.
52
+ - `collect` - Collect the errors in the error results for the CsvPath. This option is available with and without a CsvPaths instance.
53
+ - `print` - Prints the errors using the Printer interface to whatever printers are available. By default this goes to standard out.
54
+
55
+ Multiple of these settings can be configured together.`quiet` and `raise` do not coexist well; likewise `quiet` and `print`. `raise` will win over `quiet` because seeing problems lets you fix them. `print` is most useful in getting simple inline error messages when `raise` is off.
56
+
57
+ ## Logging
58
+
59
+ Logging levels are set at the major-component level. The components are:
60
+ - `csvpath`
61
+ - `csvpaths`
62
+ - `matcher`
63
+ - `scanner`
64
+
65
+ Four levels are available:
66
+ - `error`
67
+ - `warning`
68
+ - `debug`
69
+ - `info`
70
+
71
+ The levels are intended for the same functionality as their Python equivalents.
72
+
73
+ CsvPath logs are directed to a file. The log file settings are:
74
+ - `log_file` - a path to the log
75
+ - `log_files_to_keep` - a number of logs, 1 to 100, kept in rotation before being deleted
76
+ - `log_file_size` - an indication of roughly when a log file will be rotated
77
+
78
+ As an example:
79
+ ```ini
80
+ log_file = logs/csvpath.log
81
+ log_files_to_keep = 100
82
+ log_file_size = 52428800
83
+ ```
84
+
85
+ ## Listeners
86
+
87
+ CsvPath generates events that it converts to manifest files full of asset and runtime metadata. You can add OpenLineage listeners that will send results to an OpenLineage server like Marquez. In principle any OpenLineage API could receive CsvPath events, but only Marquez is tested and supported.
88
+
89
+ Be aware, OpenLineage events are currently handled in line, not out of band, asynchronously. That means there is a small performance hit. Typically this would not be noticeable, but in certain instances it could be a factor. For example, CsvPath's hundreds of unit tests run slower when OpenLineage events are fired. This small performance hit may be remediated in the future if it becomes an issue.
90
+
91
+ The settings are:
92
+ ```ini
93
+ [listeners]
94
+ #uncomment for OpenLineage events to a Marquez server
95
+ #file = from csvpath.managers.files.file_listener_ol import OpenLineageFileListener
96
+ #paths = from csvpath.managers.paths.paths_listener_ol import OpenLineagePathsListener
97
+ #result = from csvpath.managers.results.result_listener_ol import OpenLineageResultListener
98
+ #results = from csvpath.managers.results.results_listener_ol import OpenLineageResultsListener
99
+
100
+ [marquez]
101
+ base_url = http://localhost:5000
102
+ ```
103
+
104
+ ## Custom Functions
105
+
106
+ <a href='https://github.com/csvpath/csvpath/blob/main/docs/functions/implementing_functions.md'>See this page for how to create and run custom functions</a>
107
+
108
+
109
+
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "csvpath"
3
- version = "0.0.498"
3
+ version = "0.0.499"
4
4
  description = "A declarative language for validating CSV, Excel, and other tabular data files"
5
5
  authors = ["David Kershaw <dk107dk@hotmail.com>"]
6
6
  readme = "README.md"
@@ -1,27 +0,0 @@
1
- from openlineage.client import OpenLineageClient
2
-
3
- from ..metadata import Metadata
4
- from ..listener import Listener
5
- from ..ol.event import EventBuilder
6
- from ..ol.job import JobBuilder
7
-
8
-
9
- class OpenLineageFileListener(Listener):
10
- def __init__(self, config=None):
11
- super().__init__(config)
12
- self.ol_client = None
13
-
14
- def metadata_update(self, mdata: Metadata) -> None:
15
- if self.ol_client is None:
16
- client_url = self.config._get("marquez", "base_url")
17
- if client_url is None:
18
- print(
19
- "WARNING: OpenLineage listeners are live but there is no Marquez API URL"
20
- )
21
- return
22
- # client_url = "http://localhost:5000"
23
- self.ol_client = OpenLineageClient(url=client_url)
24
-
25
- es = EventBuilder().build(mdata)
26
- for e in es:
27
- self.ol_client.emit(e)
@@ -1,27 +0,0 @@
1
- from openlineage.client.client import OpenLineageClient
2
-
3
- from ..metadata import Metadata
4
- from ..listener import Listener
5
- from ..ol.event import EventBuilder
6
- from ..ol.job import JobBuilder
7
-
8
-
9
- class OpenLineagePathsListener(Listener):
10
- def __init__(self, config=None):
11
- super().__init__(config)
12
- self.ol_client = None
13
-
14
- def metadata_update(self, mdata: Metadata) -> None:
15
- if self.ol_client is None:
16
- client_url = self.config._get("marquez", "base_url")
17
- if client_url is None:
18
- print(
19
- "WARNING: OpenLineage listeners are live but there is no Marquez API URL"
20
- )
21
- return
22
- # client_url = "http://localhost:5000"
23
- self.ol_client = OpenLineageClient(url=client_url)
24
-
25
- es = EventBuilder().build(mdata)
26
- for e in es:
27
- self.ol_client.emit(e)
@@ -1,27 +0,0 @@
1
- from openlineage.client.client import OpenLineageClient
2
-
3
- from ..metadata import Metadata
4
- from ..listener import Listener
5
- from ..ol.event import EventBuilder
6
- from ..ol.job import JobBuilder
7
-
8
-
9
- class OpenLineageResultListener(Listener):
10
- def __init__(self, config=None):
11
- super().__init__(config)
12
- self.ol_client = None
13
-
14
- def metadata_update(self, mdata: Metadata) -> None:
15
- if self.ol_client is None:
16
- client_url = self.config._get("marquez", "base_url")
17
- if client_url is None:
18
- print(
19
- "WARNING: OpenLineage listeners are live but there is no Marquez API URL"
20
- )
21
- return
22
- # client_url = "http://localhost:5000"
23
- self.ol_client = OpenLineageClient(url=client_url)
24
-
25
- es = EventBuilder().build(mdata)
26
- for e in es:
27
- self.ol_client.emit(e)
@@ -1,28 +0,0 @@
1
- from openlineage.client.client import OpenLineageClient
2
-
3
-
4
- from ..metadata import Metadata
5
- from ..listener import Listener
6
- from ..ol.event import EventBuilder
7
- from ..ol.job import JobBuilder
8
-
9
-
10
- class OpenLineageResultsListener(Listener):
11
- def __init__(self, config=None):
12
- super().__init__(config)
13
- self.ol_client = None
14
-
15
- def metadata_update(self, mdata: Metadata) -> None:
16
- if self.ol_client is None:
17
- client_url = self.config._get("marquez", "base_url")
18
- if client_url is None:
19
- print(
20
- "WARNING: OpenLineage listeners are live but there is no Marquez API URL"
21
- )
22
- return
23
- # client_url = "http://localhost:5000"
24
- self.ol_client = OpenLineageClient(url=client_url)
25
-
26
- es = EventBuilder().build(mdata)
27
- for e in es:
28
- self.ol_client.emit(e)
@@ -1,30 +0,0 @@
1
- from openlineage.client.client import OpenLineageClient
2
- from marquez_client import MarquezClient
3
-
4
- from ..metadata import Metadata
5
- from ..listener import Listener
6
- from ..ol.event import EventBuilder
7
- from ..ol.job import JobBuilder
8
- from ..ol.run import RunBuilder
9
-
10
-
11
- class OpenLineageRunListener(Listener):
12
- def __init__(self, config=None):
13
- super().__init__(config)
14
- self.client = None
15
- self.ol_client = None
16
-
17
- def metadata_update(self, mdata: Metadata) -> None:
18
- if self.client is None:
19
- client_url = self.config._get("marquez", "base_url")
20
- if client_url is None:
21
- print(
22
- "WARNING: OpenLineage listeners are live but there is no Marquez API URL"
23
- )
24
- return
25
- # client_url = "http://localhost:5000"
26
- self.client = MarquezClient(url=client_url)
27
- self.ol_client = OpenLineageClient(url=client_url)
28
- es = EventBuilder().build(mdata)
29
- for e in es:
30
- self.ol_client.emit(e)