csvpath 0.0.502__tar.gz → 0.0.504__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (300) hide show
  1. {csvpath-0.0.502 → csvpath-0.0.504}/PKG-INFO +8 -3
  2. {csvpath-0.0.502 → csvpath-0.0.504}/README.md +5 -2
  3. {csvpath-0.0.502 → csvpath-0.0.504}/config/config.ini +4 -2
  4. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/csvpath.py +4 -3
  5. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/files/file_cacher.py +6 -0
  6. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/files/file_manager.py +72 -47
  7. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/files/file_registrar.py +14 -13
  8. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/integrations/ckan/ckan.py +0 -1
  9. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/metadata.py +3 -2
  10. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/paths/paths_manager.py +29 -20
  11. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/paths/paths_registrar.py +16 -13
  12. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/results/readers/file_errors_reader.py +5 -3
  13. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/results/readers/file_printouts_reader.py +5 -3
  14. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/results/readers/readers.py +0 -13
  15. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/results/result.py +0 -1
  16. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/results/result_file_reader.py +8 -5
  17. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/results/result_metadata.py +0 -1
  18. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/results/result_registrar.py +27 -27
  19. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/results/result_serializer.py +19 -60
  20. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/results/results_manager.py +28 -21
  21. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/results/results_registrar.py +19 -16
  22. csvpath-0.0.504/csvpath/managers/run/run_registrar.py +59 -0
  23. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/args.py +0 -3
  24. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/scanning/scanning_lexer.py +1 -1
  25. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/util/cache.py +2 -0
  26. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/util/class_loader.py +4 -2
  27. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/util/config.py +4 -2
  28. csvpath-0.0.504/csvpath/util/file_info.py +29 -0
  29. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/util/file_readers.py +107 -30
  30. csvpath-0.0.504/csvpath/util/file_writers.py +97 -0
  31. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/util/line_spooler.py +31 -7
  32. csvpath-0.0.504/csvpath/util/nos.py +182 -0
  33. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/util/pandas_data_reader.py +10 -2
  34. csvpath-0.0.504/csvpath/util/s3/s3_data_reader.py +72 -0
  35. csvpath-0.0.504/csvpath/util/s3/s3_data_writer.py +44 -0
  36. csvpath-0.0.504/csvpath/util/s3/s3_fingerprinter.py +52 -0
  37. csvpath-0.0.504/csvpath/util/s3/s3_utils.py +66 -0
  38. csvpath-0.0.504/csvpath/util/s3/s3_xlsx_data_reader.py +37 -0
  39. csvpath-0.0.504/docs/images/ckan-logo-sm.png +0 -0
  40. {csvpath-0.0.502 → csvpath-0.0.504}/pyproject.toml +5 -3
  41. csvpath-0.0.502/csvpath/managers/run/run_registrar.py +0 -44
  42. csvpath-0.0.502/csvpath/util/s3_data_reader.py +0 -24
  43. {csvpath-0.0.502 → csvpath-0.0.504}/LICENSE +0 -0
  44. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/__init__.py +0 -0
  45. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/cli/__init__.py +0 -0
  46. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/cli/cli.py +0 -0
  47. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/cli/drill_down.py +0 -0
  48. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/csvpaths.py +0 -0
  49. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/__init__.py +0 -0
  50. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/files/file_metadata.py +0 -0
  51. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/integrations/ckan/ckan_listener.py +0 -0
  52. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/integrations/ckan/datafile.py +0 -0
  53. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/integrations/ckan/dataset.py +0 -0
  54. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/integrations/ol/event.py +0 -0
  55. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/integrations/ol/event_result.py +0 -0
  56. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/integrations/ol/file_listener_ol.py +0 -0
  57. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/integrations/ol/job.py +0 -0
  58. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/integrations/ol/ol_listener.py +0 -0
  59. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/integrations/ol/paths_listener_ol.py +0 -0
  60. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/integrations/ol/result_listener_ol.py +0 -0
  61. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/integrations/ol/results_listener_ol.py +0 -0
  62. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/integrations/ol/run.py +0 -0
  63. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/integrations/ol/run_listener_ol.py +0 -0
  64. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/integrations/ol/run_state.py +0 -0
  65. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/integrations/ol/sender.py +0 -0
  66. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/integrations/slack/event.py +0 -0
  67. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/integrations/slack/sender.py +0 -0
  68. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/listener.py +0 -0
  69. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/paths/paths_metadata.py +0 -0
  70. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/registrar.py +2 -2
  71. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/results/readers/file_lines_reader.py +0 -0
  72. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/results/readers/file_unmatched_reader.py +0 -0
  73. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/results/results_metadata.py +0 -0
  74. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/run/run_listener_stdout.py +0 -0
  75. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/managers/run/run_metadata.py +0 -0
  76. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/__init__.py +0 -0
  77. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/__init__.py +0 -0
  78. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/boolean/all.py +0 -0
  79. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/boolean/andf.py +0 -0
  80. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/boolean/any.py +0 -0
  81. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/boolean/between.py +0 -0
  82. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/boolean/empty.py +0 -0
  83. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/boolean/exists.py +0 -0
  84. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/boolean/inf.py +0 -0
  85. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/boolean/no.py +0 -0
  86. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/boolean/notf.py +0 -0
  87. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/boolean/orf.py +0 -0
  88. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/boolean/yes.py +0 -0
  89. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/counting/count.py +0 -0
  90. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/counting/count_bytes.py +0 -0
  91. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/counting/count_headers.py +0 -0
  92. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/counting/count_lines.py +0 -0
  93. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/counting/count_scans.py +0 -0
  94. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/counting/counter.py +0 -0
  95. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/counting/every.py +0 -0
  96. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/counting/has_matches.py +0 -0
  97. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/counting/increment.py +0 -0
  98. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/counting/tally.py +0 -0
  99. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/counting/total_lines.py +0 -0
  100. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/dates/now.py +0 -0
  101. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/function.py +0 -0
  102. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/function_factory.py +0 -0
  103. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/function_finder.py +0 -0
  104. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/function_focus.py +0 -0
  105. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/headers/append.py +0 -0
  106. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/headers/collect.py +0 -0
  107. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/headers/empty_stack.py +0 -0
  108. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/headers/end.py +0 -0
  109. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/headers/header_name.py +0 -0
  110. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/headers/header_names_mismatch.py +0 -0
  111. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/headers/headers.py +0 -0
  112. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/headers/mismatch.py +0 -0
  113. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/headers/replace.py +0 -0
  114. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/headers/reset_headers.py +0 -0
  115. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/lines/advance.py +0 -0
  116. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/lines/after_blank.py +0 -0
  117. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/lines/dups.py +0 -0
  118. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/lines/first.py +0 -0
  119. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/lines/first_line.py +0 -0
  120. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/lines/last.py +0 -0
  121. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/lines/stop.py +0 -0
  122. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/math/above.py +0 -0
  123. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/math/add.py +0 -0
  124. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/math/divide.py +0 -0
  125. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/math/equals.py +0 -0
  126. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/math/intf.py +0 -0
  127. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/math/mod.py +0 -0
  128. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/math/multiply.py +0 -0
  129. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/math/round.py +0 -0
  130. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/math/subtotal.py +0 -0
  131. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/math/subtract.py +0 -0
  132. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/math/sum.py +0 -0
  133. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/misc/fingerprint.py +0 -0
  134. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/misc/importf.py +0 -0
  135. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/misc/random.py +0 -0
  136. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/print/jinjaf.py +0 -0
  137. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/print/print_line.py +0 -0
  138. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/print/print_queue.py +0 -0
  139. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/print/printf.py +0 -0
  140. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/print/table.py +0 -0
  141. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/stats/minf.py +0 -0
  142. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/stats/percent.py +0 -0
  143. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/stats/percent_unique.py +0 -0
  144. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/stats/stdev.py +0 -0
  145. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/strings/concat.py +0 -0
  146. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/strings/length.py +0 -0
  147. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/strings/lower.py +0 -0
  148. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/strings/metaphone.py +0 -0
  149. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/strings/regex.py +0 -0
  150. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/strings/starts_with.py +0 -0
  151. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/strings/strip.py +0 -0
  152. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/strings/substring.py +0 -0
  153. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/strings/upper.py +0 -0
  154. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/testing/debug.py +0 -0
  155. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/types/__init__.py +0 -0
  156. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/types/boolean.py +0 -0
  157. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/types/datef.py +0 -0
  158. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/types/decimal.py +0 -0
  159. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/types/nonef.py +0 -0
  160. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/types/string.py +0 -0
  161. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/types/type.py +0 -0
  162. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/validity/fail.py +0 -0
  163. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/validity/failed.py +0 -0
  164. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/validity/line.py +0 -0
  165. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/variables/get.py +0 -0
  166. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/variables/pushpop.py +0 -0
  167. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/variables/put.py +0 -0
  168. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/variables/track.py +0 -0
  169. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/functions/variables/variables.py +0 -0
  170. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/lark_parser.py +0 -0
  171. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/lark_transformer.py +0 -0
  172. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/matcher.py +0 -0
  173. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/productions/__init__.py +0 -0
  174. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/productions/equality.py +0 -0
  175. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/productions/expression.py +0 -0
  176. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/productions/header.py +0 -0
  177. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/productions/matchable.py +0 -0
  178. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/productions/qualified.py +0 -0
  179. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/productions/reference.py +0 -0
  180. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/productions/term.py +0 -0
  181. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/productions/variable.py +0 -0
  182. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/util/exceptions.py +0 -0
  183. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/util/expression_encoder.py +0 -0
  184. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/util/expression_utility.py +0 -0
  185. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/util/lark_print_parser.py +0 -0
  186. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/util/print_parser.py +0 -0
  187. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/matching/util/runtime_data_collector.py +0 -0
  188. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/modes/explain_mode.py +0 -0
  189. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/modes/files_mode.py +0 -0
  190. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/modes/logic_mode.py +0 -0
  191. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/modes/mode_controller.py +0 -0
  192. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/modes/print_mode.py +0 -0
  193. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/modes/return_mode.py +0 -0
  194. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/modes/run_mode.py +0 -0
  195. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/modes/source_mode.py +0 -0
  196. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/modes/transfer_mode.py +0 -0
  197. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/modes/unmatched_mode.py +0 -0
  198. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/modes/validation_mode.py +0 -0
  199. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/scanning/__init__.py +0 -0
  200. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/scanning/exceptions.py +0 -0
  201. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/scanning/parser.out +0 -0
  202. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/scanning/parsetab.py +0 -0
  203. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/scanning/scanner.py +0 -0
  204. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/util/config_exception.py +0 -0
  205. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/util/error.py +0 -0
  206. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/util/exceptions.py +0 -0
  207. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/util/last_line_stats.py +0 -0
  208. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/util/line_counter.py +0 -0
  209. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/util/line_monitor.py +0 -0
  210. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/util/log_utility.py +0 -0
  211. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/util/metadata_parser.py +0 -0
  212. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/util/printer.py +0 -0
  213. {csvpath-0.0.502 → csvpath-0.0.504}/csvpath/util/reference_parser.py +0 -0
  214. {csvpath-0.0.502 → csvpath-0.0.504}/docs/asbool.md +0 -0
  215. {csvpath-0.0.502 → csvpath-0.0.504}/docs/assignment.md +0 -0
  216. {csvpath-0.0.502 → csvpath-0.0.504}/docs/comments.md +0 -0
  217. {csvpath-0.0.502 → csvpath-0.0.504}/docs/config.md +0 -0
  218. {csvpath-0.0.502 → csvpath-0.0.504}/docs/examples.md +0 -0
  219. {csvpath-0.0.502 → csvpath-0.0.504}/docs/files.md +0 -0
  220. {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/above.md +0 -0
  221. {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/advance.md +0 -0
  222. {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/after_blank.md +0 -0
  223. {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/all.md +0 -0
  224. {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/andor.md +0 -0
  225. {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/any.md +0 -0
  226. {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/average.md +0 -0
  227. {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/between.md +0 -0
  228. {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/collect.md +0 -0
  229. {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/correlate.md +0 -0
  230. {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/count.md +0 -0
  231. {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/count_bytes.md +0 -0
  232. {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/count_headers.md +0 -0
  233. {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/counter.md +0 -0
  234. {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/date.md +0 -0
  235. {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/empty.md +0 -0
  236. {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/empty_stack.md +0 -0
  237. {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/end.md +0 -0
  238. {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/every.md +0 -0
  239. {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/fail.md +0 -0
  240. {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/fingerprint.md +0 -0
  241. {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/first.md +0 -0
  242. {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/get.md +0 -0
  243. {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/has_dups.md +0 -0
  244. {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/has_matches.md +0 -0
  245. {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/header.md +0 -0
  246. {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/header_name.md +0 -0
  247. {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/header_names_mismatch.md +0 -0
  248. {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/implementing_functions.md +0 -0
  249. {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/import.md +0 -0
  250. {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/in.md +0 -0
  251. {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/increment.md +0 -0
  252. {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/intf.md +0 -0
  253. {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/jinja.md +0 -0
  254. {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/last.md +0 -0
  255. {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/line.md +0 -0
  256. {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/line_number.md +0 -0
  257. {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/max.md +0 -0
  258. {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/metaphone.md +0 -0
  259. {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/mismatch.md +0 -0
  260. {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/no.md +0 -0
  261. {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/not.md +0 -0
  262. {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/now.md +0 -0
  263. {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/percent_unique.md +0 -0
  264. {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/pop.md +0 -0
  265. {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/print.md +0 -0
  266. {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/print_line.md +0 -0
  267. {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/print_queue.md +0 -0
  268. {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/random.md +0 -0
  269. {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/regex.md +0 -0
  270. {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/replace.md +0 -0
  271. {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/reset_headers.md +0 -0
  272. {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/stdev.md +0 -0
  273. {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/stop.md +0 -0
  274. {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/string_functions.md +0 -0
  275. {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/subtotal.md +0 -0
  276. {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/subtract.md +0 -0
  277. {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/sum.md +0 -0
  278. {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/tally.md +0 -0
  279. {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/total_lines.md +0 -0
  280. {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/track.md +0 -0
  281. {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/types.md +0 -0
  282. {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/variables.md +0 -0
  283. {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions/variables_and_headers.md +0 -0
  284. {csvpath-0.0.502 → csvpath-0.0.504}/docs/functions.md +0 -0
  285. {csvpath-0.0.502 → csvpath-0.0.504}/docs/grammar.md +0 -0
  286. {csvpath-0.0.502 → csvpath-0.0.504}/docs/headers.md +0 -0
  287. {csvpath-0.0.502 → csvpath-0.0.504}/docs/images/csvpath-icon-sm.png +0 -0
  288. {csvpath-0.0.502 → csvpath-0.0.504}/docs/images/csvpath-logo-wordmark-tight-2.svg +0 -0
  289. {csvpath-0.0.502 → csvpath-0.0.504}/docs/images/logo-wordmark-3.svg +0 -0
  290. {csvpath-0.0.502 → csvpath-0.0.504}/docs/images/logo-wordmark-4.svg +0 -0
  291. {csvpath-0.0.502 → csvpath-0.0.504}/docs/images/logo-wordmark-white-on-black-trimmed-padded.png +0 -0
  292. {csvpath-0.0.502 → csvpath-0.0.504}/docs/images/logo-wordmark-white-trimmed.png +0 -0
  293. {csvpath-0.0.502 → csvpath-0.0.504}/docs/images/marquez-logo-sm.png +0 -0
  294. {csvpath-0.0.502 → csvpath-0.0.504}/docs/images/openlineage-logo-sm.png +0 -0
  295. {csvpath-0.0.502 → csvpath-0.0.504}/docs/paths.md +0 -0
  296. {csvpath-0.0.502 → csvpath-0.0.504}/docs/printing.md +0 -0
  297. {csvpath-0.0.502 → csvpath-0.0.504}/docs/qualifiers.md +0 -0
  298. {csvpath-0.0.502 → csvpath-0.0.504}/docs/references.md +0 -0
  299. {csvpath-0.0.502 → csvpath-0.0.504}/docs/terms.md +0 -0
  300. {csvpath-0.0.502 → csvpath-0.0.504}/docs/variables.md +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: csvpath
3
- Version: 0.0.502
3
+ Version: 0.0.504
4
4
  Summary: A declarative language for validating CSV, Excel, and other tabular data files
5
5
  Author: David Kershaw
6
6
  Author-email: dk107dk@hotmail.com
@@ -24,6 +24,7 @@ Classifier: Topic :: Text Processing
24
24
  Classifier: Topic :: Utilities
25
25
  Provides-Extra: pandas
26
26
  Provides-Extra: smartopen
27
+ Requires-Dist: boto3 (>=1.35.91,<2.0.0)
27
28
  Requires-Dist: bullet (>=2.2.0,<3.0.0)
28
29
  Requires-Dist: ckanapi (>=4.8,<5.0)
29
30
  Requires-Dist: inflect (>=7.4.0,<8.0.0)
@@ -38,6 +39,7 @@ Requires-Dist: pytest (>=8.3.3,<9.0.0)
38
39
  Requires-Dist: python-dateutil (>=2.9.0.post0,<3.0.0)
39
40
  Requires-Dist: pytz (>=2024.2,<2025.0)
40
41
  Requires-Dist: requests (>=2.32.3,<3.0.0)
42
+ Requires-Dist: smart-open (>=7.1.0,<8.0.0) ; extra == "smartopen"
41
43
  Requires-Dist: tabulate (>=0.9.0,<0.10.0)
42
44
  Project-URL: Csvpath.org, https://www.csvpath.org
43
45
  Project-URL: Github, https://github.com/csvpath/csvpath.git
@@ -61,13 +63,16 @@ CsvPath's validation is inspired by:
61
63
  - XPath for XML files
62
64
  - The ISO standard <a href='https://schematron.com/'>Schematron validation</a>
63
65
 
64
- CsvPath is intended to fit with other DataOps and data quality tools. Files are streamed. The interface is simple. Metadata is plentiful. New functions are easy to create. CsvPath can stream events to an OpenLineage server, such as the open source Marquez server.
66
+ CsvPath is intended to fit tightly with other DataOps and data quality tools. Files are streamed. The interface is simple. Metadata is plentiful. New functions and listeners are easy to create.
65
67
 
66
- <a href='https://openlineage.io' >
68
+ CsvPath can stream lineage events to an OpenLineage server, such as the open source Marquez server. Read about <a href="https://www.csvpath.org/getting-started/getting-started-with-csvpath-+-openlineage" target="_blank">CsvPath and OpenLineage here</a>.
69
+ <br/><a href='https://openlineage.io' >
67
70
  <img target='_blank' src="https://github.com/csvpath/csvpath/blob/main/docs/images/openlineage-logo-sm.png" alt="OpenLineage"/></a>
68
71
  <a href='https://peppy-sprite-186812.netlify.app/' >
69
72
  <img target='_blank' src="https://github.com/csvpath/csvpath/blob/main/docs/images/marquez-logo-sm.png" alt="Marquez Server"/></a>
70
73
 
74
+ Need to publish validated datasets to a CKAN data portal? <a href="https://www.csvpath.org/getting-started/getting-started-with-csvpath-+-ckan" target="_blank">Read about how CsvPath is integrated with CKAN</a>.
75
+ <a href="https://ckan.org/" target="_blank"><img target='_blank' src="https://github.com/csvpath/csvpath/blob/main/docs/images/ckan-logo-sm.png" alt="CKAN Data Portal"/></a>
71
76
 
72
77
  Read more about CsvPath and see CSV, Excel, and Data Frames validation examples at <a href='https://www.csvpath.org'>https://www.csvpath.org</a>.
73
78
 
@@ -16,13 +16,16 @@ CsvPath's validation is inspired by:
16
16
  - XPath for XML files
17
17
  - The ISO standard <a href='https://schematron.com/'>Schematron validation</a>
18
18
 
19
- CsvPath is intended to fit with other DataOps and data quality tools. Files are streamed. The interface is simple. Metadata is plentiful. New functions are easy to create. CsvPath can stream events to an OpenLineage server, such as the open source Marquez server.
19
+ CsvPath is intended to fit tightly with other DataOps and data quality tools. Files are streamed. The interface is simple. Metadata is plentiful. New functions and listeners are easy to create.
20
20
 
21
- <a href='https://openlineage.io' >
21
+ CsvPath can stream lineage events to an OpenLineage server, such as the open source Marquez server. Read about <a href="https://www.csvpath.org/getting-started/getting-started-with-csvpath-+-openlineage" target="_blank">CsvPath and OpenLineage here</a>.
22
+ <br/><a href='https://openlineage.io' >
22
23
  <img target='_blank' src="https://github.com/csvpath/csvpath/blob/main/docs/images/openlineage-logo-sm.png" alt="OpenLineage"/></a>
23
24
  <a href='https://peppy-sprite-186812.netlify.app/' >
24
25
  <img target='_blank' src="https://github.com/csvpath/csvpath/blob/main/docs/images/marquez-logo-sm.png" alt="Marquez Server"/></a>
25
26
 
27
+ Need to publish validated datasets to a CKAN data portal? <a href="https://www.csvpath.org/getting-started/getting-started-with-csvpath-+-ckan" target="_blank">Read about how CsvPath is integrated with CKAN</a>.
28
+ <a href="https://ckan.org/" target="_blank"><img target='_blank' src="https://github.com/csvpath/csvpath/blob/main/docs/images/ckan-logo-sm.png" alt="CKAN Data Portal"/></a>
26
29
 
27
30
  Read more about CsvPath and see CSV, Excel, and Data Frames validation examples at <a href='https://www.csvpath.org'>https://www.csvpath.org</a>.
28
31
 
@@ -29,8 +29,6 @@ groups =
29
29
  #slack, marquez, ckan
30
30
 
31
31
  # add ckan to the list of groups above for alerts to slack webhooks
32
- ckan.paths = from csvpath.managers.integrations.ckan.ckan_listener import CkanListener
33
- ckan.result = from csvpath.managers.integrations.ckan.ckan_listener import CkanListener
34
32
  ckan.results = from csvpath.managers.integrations.ckan.ckan_listener import CkanListener
35
33
 
36
34
  #add marquez to the list of groups above for OpenLineage events to a local Marquez
@@ -45,6 +43,10 @@ slack.paths = from csvpath.managers.integrations.slack.sender import SlackSender
45
43
  slack.result = from csvpath.managers.integrations.slack.sender import SlackSender
46
44
  slack.results = from csvpath.managers.integrations.slack.sender import SlackSender
47
45
 
46
+ [ckan]
47
+ server = http://localhost:80
48
+ api_token = eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJqdGkiOiI3akJwc1ZuSkVrZm1aNnBtVTJfTW5CNlJXZ211YjdOOHVXZ1l1cUFDa0Q4IiwiaWF0IjoxNzM0NzE4NDQ3fQ.QXWXoJoSxVES4NwXYBteYUD7enX9D5T2htmETLGFzrs
49
+
48
50
  [marquez]
49
51
  base_url = http://localhost:5000
50
52
  endpoint = api/v1/lineage
@@ -5,7 +5,7 @@ import time
5
5
  import os
6
6
  import hashlib
7
7
  from datetime import datetime, timezone
8
- from typing import List, Dict, Any
8
+ from typing import List, Dict, Any, Self
9
9
  from collections.abc import Iterator
10
10
  from abc import ABC, abstractmethod
11
11
  from .util.config import Config
@@ -72,7 +72,7 @@ class CsvPathPublic(ABC):
72
72
  """Advances the iteration by ff rows. -1 means to the end of the file."""
73
73
 
74
74
  @abstractmethod
75
- def fast_forward(self, csvpath: str = None) -> None: # pragma: no cover
75
+ def fast_forward(self, csvpath: str = None) -> Self: # pragma: no cover
76
76
  """Scans to the end of the CSV file. All scanned rows will be
77
77
  considered for match and variables and side effects will happen,
78
78
  but no rows will be returned or stored. -1 means to the end of
@@ -965,7 +965,7 @@ class CsvPath(CsvPathPublic, ErrorCollector, Printer): # pylint: disable=R0902,
965
965
  self.lines = None
966
966
  return lines
967
967
 
968
- def fast_forward(self, csvpath=None) -> None:
968
+ def fast_forward(self, csvpath=None) -> Self:
969
969
  """Runs the path for all rows of the file. Variables are collected
970
970
  and side effects like print happen. No lines are collected.
971
971
  """
@@ -973,6 +973,7 @@ class CsvPath(CsvPathPublic, ErrorCollector, Printer): # pylint: disable=R0902,
973
973
  self.parse(csvpath)
974
974
  for _ in self.next():
975
975
  pass
976
+ return self
976
977
 
977
978
  def next(self, csvpath=None):
978
979
  """Iterates over the lines in the CSV file returning those that match
@@ -13,6 +13,8 @@ class FileCacher:
13
13
  self.pathed_lines_and_headers = {}
14
14
 
15
15
  def get_new_line_monitor(self, filename: str) -> LineMonitor:
16
+ if filename is None:
17
+ raise ValueError("Filename cannot be None")
16
18
  if filename not in self.pathed_lines_and_headers:
17
19
  self._find_lines_and_headers(filename)
18
20
  lm = self.pathed_lines_and_headers[filename][0]
@@ -25,6 +27,8 @@ class FileCacher:
25
27
  return self.pathed_lines_and_headers[filename][1][:]
26
28
 
27
29
  def _find_lines_and_headers(self, filename: str) -> None:
30
+ if filename is None:
31
+ raise ValueError("Filename cannot be None")
28
32
  lm, headers = self._cached_lines_and_headers(filename)
29
33
  if lm is None or headers is None:
30
34
  lc = LineCounter(self.csvpaths)
@@ -33,6 +37,8 @@ class FileCacher:
33
37
  self.pathed_lines_and_headers[filename] = (lm, headers)
34
38
 
35
39
  def _cached_lines_and_headers(self, filename: str) -> Tuple[LineMonitor, List[str]]:
40
+ if filename is None:
41
+ raise ValueError("Filename cannot be None")
36
42
  lm = LineMonitor()
37
43
  json = self.cache.cached_text(filename, "json")
38
44
  if json is not None and not json.strip() == "":
@@ -1,21 +1,20 @@
1
1
  import os
2
2
  import json
3
3
  import csv
4
- import hashlib
5
- import shutil
6
4
  from json import JSONDecodeError
7
- from typing import Dict, List, Tuple
8
5
  from csvpath.util.error import ErrorHandler
9
6
  from csvpath.util.file_readers import DataFileReader
7
+ from csvpath.util.file_writers import DataFileWriter
10
8
  from csvpath.util.reference_parser import ReferenceParser
11
9
  from csvpath.util.exceptions import InputException, FileException
10
+ from csvpath.util.nos import Nos
12
11
  from .file_registrar import FileRegistrar
13
12
  from .file_cacher import FileCacher
14
13
  from .file_metadata import FileMetadata
15
14
 
16
15
 
17
16
  class FileManager:
18
- def __init__(self, *, named_files: Dict[str, str] = None, csvpaths=None):
17
+ def __init__(self, *, named_files: dict[str, str] = None, csvpaths=None):
19
18
  if named_files is None:
20
19
  named_files = {}
21
20
  self._csvpaths = csvpaths
@@ -43,24 +42,25 @@ class FileManager:
43
42
  def assure_named_file_home(self, name: str) -> str:
44
43
  home = self.named_file_home(name)
45
44
  if not os.path.exists(home):
46
- os.makedirs(home)
45
+ Nos(home).makedirs()
47
46
  return home
48
47
 
49
48
  #
50
49
  # file homes are paths to files like:
51
50
  # inputs/named_files/March-2024/March-2024.csv/March-2024.csv
52
- # which become paths to hash-named file versions like:
51
+ # which become paths to fingerprint-named file versions like:
53
52
  # inputs/named_files/March-2024/March-2024.csv/12467d811d1589ede586e3a42c41046641bedc1c73941f4c21e2fd2966f188b4.csv
54
53
  # once the files have been fingerprinted
55
54
  #
56
55
  def assure_file_home(self, name: str, path: str) -> str:
57
56
  if path.find("#") > -1:
58
57
  path = path[0 : path.find("#")]
59
- fname = path if path.rfind(os.sep) == -1 else path[path.rfind(os.sep) + 1 :]
58
+ sep = Nos(path).sep
59
+ fname = path if path.rfind(sep) == -1 else path[path.rfind(sep) + 1 :]
60
60
  home = self.named_file_home(name)
61
61
  home = os.path.join(home, fname)
62
- if not os.path.exists(home):
63
- os.makedirs(home)
62
+ if not Nos(home).exists():
63
+ Nos(home).makedirs()
64
64
  return home
65
65
 
66
66
  @property
@@ -70,28 +70,35 @@ class FileManager:
70
70
  @property
71
71
  def named_file_names(self) -> list:
72
72
  b = self.named_files_dir
73
- ns = [n for n in os.listdir(b) if not os.path.isfile(os.path.join(b, n))]
73
+ ns = [n for n in Nos(b).listdir() if not Nos(os.path.join(b, n)).isfile()]
74
74
  return ns
75
75
 
76
76
  def name_exists(self, name: str) -> bool:
77
77
  p = self.named_file_home(name)
78
- return os.path.exists(p)
78
+ b = Nos(p).dir_exists()
79
+ return b
79
80
 
80
81
  def remove_named_file(self, name: str) -> None:
81
82
  p = os.path.join(self.named_files_dir, name)
82
- shutil.rmtree(p)
83
+ Nos(p).remove()
83
84
 
84
85
  def remove_all_named_files(self) -> None:
85
86
  names = self.named_file_names
86
87
  for name in names:
87
88
  self.remove_named_file(name)
88
89
 
89
- def set_named_files(self, nf: Dict[str, str]) -> None:
90
+ def set_named_files(self, nf: dict[str, str]) -> None:
90
91
  for k, v in nf.items():
91
92
  self.add_named_file(name=k, path=v)
92
93
 
93
94
  def set_named_files_from_json(self, filename: str) -> None:
95
+ """named-files from json files are always local"""
94
96
  try:
97
+ #
98
+ # TODO: named-files json files are always local. they should
99
+ # be able to be on s3 so that we are completely independent of
100
+ # the local disk w/re file manager
101
+ #
95
102
  with open(filename, "r", encoding="utf-8") as f:
96
103
  j = json.load(f)
97
104
  self.set_named_files(j)
@@ -99,7 +106,7 @@ class FileManager:
99
106
  ErrorHandler(csvpaths=self._csvpaths).handle_error(ex)
100
107
 
101
108
  def add_named_files_from_dir(self, dirname: str):
102
- dlist = os.listdir(dirname)
109
+ dlist = Nos(dirname).listdir()
103
110
  base = dirname
104
111
  for p in dlist:
105
112
  _ = p.lower()
@@ -115,7 +122,6 @@ class FileManager:
115
122
 
116
123
  #
117
124
  # -------------------------------------
118
- # move functions to this class and file_data_filesystem_storekeeper
119
125
  #
120
126
  def add_named_file(self, *, name: str, path: str) -> None:
121
127
  #
@@ -156,30 +162,41 @@ class FileManager:
156
162
  mdata.fingerprint = h
157
163
  mdata.file_path = rpath
158
164
  mdata.file_home = file_home
159
- mdata.file_name = file_home[file_home.rfind(os.sep) + 1 :]
165
+ mdata.file_name = file_home[file_home.rfind(Nos(file_home).sep) + 1 :]
160
166
  mdata.name_home = name_home
161
167
  mdata.mark = mark
162
168
  self.registrar.register_complete(mdata)
163
169
 
164
170
  def _copy_in(self, path, home) -> None:
165
- fname = path if path.rfind(os.sep) == -1 else path[path.rfind(os.sep) + 1 :]
171
+ sep = Nos(path).sep
172
+ fname = path if path.rfind(sep) == -1 else path[path.rfind(sep) + 1 :]
166
173
  # creates
167
174
  # a/file.csv -> named_files/name/file.csv/file.csv
168
175
  # the dir name matching the resulting file name is correct
169
176
  # once the file is landed and fingerprinted, the file
170
177
  # name is changed.
171
178
  temp = os.path.join(home, fname)
172
- if path.startswith("s3:"):
173
- self._copy_down(path, temp)
179
+ #
180
+ # this is another place that is too s3 vs. local. we'll have
181
+ # other source/sinks to support.
182
+ #
183
+ if path.startswith("s3:") and not home.startswith("s3"):
184
+ self._copy_down(path, temp, mode="wb")
185
+ elif path.startswith("s3:") and home.startswith("s3"):
186
+ Nos(path).copy(temp)
187
+ elif not path.startswith("s3:") and not home.startswith("s3"):
188
+ self._copy_down(path, temp, mode="wb")
189
+ elif not path.startswith("s3:") and home.startswith("s3"):
190
+ self._copy_down(path, temp, mode="wb")
174
191
  else:
175
- shutil.copy(path, temp)
192
+ ... # not possible. just being explicit for the moment.
176
193
  return temp
177
194
 
178
- def _copy_down(self, path, temp) -> None:
179
- reader = DataFileReader(path)
180
- with open(temp, "w", encoding="utf-8") as file:
181
- for line in reader.next_raw():
182
- file.write(line)
195
+ def _copy_down(self, path, temp, mode="wb") -> None:
196
+ with DataFileReader(path) as reader:
197
+ with DataFileWriter(path=temp, mode=mode) as writer:
198
+ for line in reader.next_raw():
199
+ writer.append(line)
183
200
 
184
201
  #
185
202
  # can take a reference. the ref would only be expected to point
@@ -201,18 +218,21 @@ class FileManager:
201
218
  else:
202
219
  if not self.name_exists(name):
203
220
  return None
204
- ret = self.registrar.registered_file(self.named_file_home(name))
221
+ n = self.named_file_home(name)
222
+ ret = self.registrar.registered_file(n)
205
223
  return ret
206
224
 
207
225
  def get_fingerprint_for_name(self, name) -> str:
208
226
  if name.startswith("$"):
209
227
  # atm, we don't give fingerprints for references doing rewind/replay
210
228
  return ""
229
+ #
230
+ # note: this is not creating fingerprints, just getting existing ones.
231
+ #
211
232
  return self.registrar.get_fingerprint(self.named_file_home(name))
212
233
 
213
234
  #
214
235
  # -------------------------------------
215
- # move to file_data_filesystem_storekeeper?
216
236
  #
217
237
  def get_named_file_reader(self, name: str) -> DataFileReader:
218
238
  path = self.get_named_file(name)
@@ -228,7 +248,8 @@ class FileManager:
228
248
  )
229
249
 
230
250
  def _fingerprint(self, path) -> str:
231
- fname = path if path.rfind(os.sep) == -1 else path[path.rfind(os.sep) + 1 :]
251
+ sep = Nos(path).sep
252
+ fname = path if path.rfind(sep) == -1 else path[path.rfind(sep) + 1 :]
232
253
  t = None
233
254
  i = fname.find(".")
234
255
  if i > -1:
@@ -240,25 +261,29 @@ class FileManager:
240
261
  # creating the initial file name, where the file starts
241
262
  #
242
263
  fpath = os.path.join(path, fname)
243
- with open(fpath, "rb") as f:
244
- h = hashlib.file_digest(f, hashlib.sha256)
245
- h = h.hexdigest()
246
- #
247
- # creating the new path using the hash as filename
248
- #
249
- hpath = os.path.join(path, h)
250
- if t is not None:
251
- hpath = f"{hpath}.{t}"
252
- #
253
- # if we're re-adding the file we don't need to make
254
- # another copy of it. re-adds are fine.
255
- #
256
- b = os.path.exists(hpath)
257
- if b:
258
- os.remove(fpath)
259
- return hpath, h
264
+ h = None
260
265
  #
261
- # if a first add, rename the file to the hash + ext
266
+ # this version should work local and minimize traffic when in S3
262
267
  #
263
- os.rename(fpath, hpath)
268
+ with DataFileReader(fpath) as f:
269
+ h = f.fingerprint()
270
+ #
271
+ # creating the new path using the fingerprint as filename
272
+ #
273
+ hpath = os.path.join(path, h)
274
+ if t is not None:
275
+ hpath = f"{hpath}.{t}"
276
+ #
277
+ # if we're re-adding the file we don't need to make
278
+ # another copy of it. re-adds are fine.
279
+ #
280
+ # need an s3 way to do this
281
+ b = Nos(hpath).exists()
282
+ if b:
283
+ Nos(fpath).remove()
284
+ return hpath, h
285
+ #
286
+ # if a first add, rename the file to the fingerprint + ext
287
+ #
288
+ Nos(fpath).rename(hpath)
264
289
  return hpath, h
@@ -1,10 +1,10 @@
1
1
  import os
2
2
  import json
3
- import hashlib
4
- import shutil
5
3
  from datetime import datetime
6
4
  from csvpath.util.exceptions import InputException, FileException
7
5
  from csvpath.util.file_readers import DataFileReader
6
+ from csvpath.util.file_writers import DataFileWriter
7
+ from csvpath.util.nos import Nos
8
8
  from csvpath.managers.registrar import Registrar
9
9
  from csvpath.managers.listener import Listener
10
10
  from csvpath.managers.metadata import Metadata
@@ -30,17 +30,17 @@ class FileRegistrar(Registrar, Listener):
30
30
  return man[len(man) - 1]["fingerprint"]
31
31
 
32
32
  def manifest_path(self, home) -> str:
33
- if not os.path.exists(home):
33
+ if not Nos(home).dir_exists():
34
34
  raise InputException(f"Named file home does not exist: {home}")
35
35
  mf = os.path.join(home, "manifest.json")
36
- if not os.path.exists(mf):
37
- with open(mf, "w", encoding="utf-8") as file:
38
- file.write("[]")
36
+ if not Nos(mf).exists():
37
+ with DataFileWriter(path=mf, mode="w") as writer:
38
+ writer.append("[]")
39
39
  return mf
40
40
 
41
41
  def get_manifest(self, mpath) -> list:
42
- with open(mpath, "r", encoding="utf-8") as file:
43
- return json.load(file)
42
+ with DataFileReader(mpath) as reader:
43
+ return json.load(reader.source)
44
44
 
45
45
  def metadata_update(self, mdata: Metadata) -> None:
46
46
  path = mdata.origin_path
@@ -60,8 +60,8 @@ class FileRegistrar(Registrar, Listener):
60
60
  mani["mark"] = mark
61
61
  jdata = self.get_manifest(manifest_path)
62
62
  jdata.append(mani)
63
- with open(manifest_path, "w", encoding="utf-8") as file:
64
- json.dump(jdata, file, indent=2)
63
+ with DataFileWriter(path=manifest_path, mode="w") as writer:
64
+ json.dump(jdata, writer.sink, indent=2)
65
65
 
66
66
  def register_complete(self, mdata: Metadata) -> None:
67
67
  path = mdata.origin_path
@@ -75,7 +75,8 @@ class FileRegistrar(Registrar, Listener):
75
75
  raise InputException(
76
76
  f"File mgr and registrar marks should match: {mdata.mark}, {mark}"
77
77
  )
78
- if not path.startswith("s3:") and not os.path.exists(path):
78
+ if not path.startswith("s3:") and not Nos(path).exists():
79
+ # if not path.startswith("s3:") and not os.path.exists(path):
79
80
  #
80
81
  # try for a data reader in case we're smart-opening
81
82
  #
@@ -134,8 +135,8 @@ class FileRegistrar(Registrar, Listener):
134
135
 
135
136
  def registered_file(self, home: str) -> str:
136
137
  mpath = self.manifest_path(home)
137
- with open(mpath, "r", encoding="utf-8") as file:
138
- mdata = json.load(file)
138
+ with DataFileReader(mpath) as reader:
139
+ mdata = json.load(reader.source)
139
140
  if mdata is None or len(mdata) == 0:
140
141
  raise InputException(f"Manifest for {home} at {mpath} is empty")
141
142
  m = mdata[len(mdata) - 1]
@@ -4,7 +4,6 @@ from csvpath import CsvPaths
4
4
  from csvpath.util.config import Config
5
5
  from .dataset import Dataset
6
6
  from .datafile import Datafile
7
- import hashlib
8
7
  import os
9
8
 
10
9
 
@@ -14,6 +14,7 @@ class Metadata(ABC):
14
14
  self._uuid = uuid4()
15
15
  self.manifest_path: str = None
16
16
  self.archive_name: str = None
17
+ self.archive_path: str = None
17
18
  self._base_path = None
18
19
  self._named_files_root: str = None
19
20
  self._named_paths_root: str = None
@@ -54,6 +55,8 @@ class Metadata(ABC):
54
55
  self.manifest_path = m.get("manifest_path")
55
56
  if m.get("archive_name") is not None:
56
57
  self.archive_name = m.get("archive_name")
58
+ if m.get("archive_path") is not None:
59
+ self.archive_path = m.get("archive_path")
57
60
 
58
61
  @property
59
62
  def uuid(self) -> UUID:
@@ -68,7 +71,6 @@ class Metadata(ABC):
68
71
  @property
69
72
  def uuid_string(self) -> str:
70
73
  return str(self._uuid)
71
- # return self._uuid.hex
72
74
 
73
75
  @uuid_string.setter
74
76
  def uuid_string(self, u: str) -> None:
@@ -128,7 +130,6 @@ class Metadata(ABC):
128
130
 
129
131
  @time_completed_string.setter
130
132
  def time_completed_string(self, s: str) -> None:
131
- # self._time_completed = datetime.date.fromisoformat(s)
132
133
  self._time_completed = parser.parse(s)
133
134
 
134
135
  @property
@@ -1,7 +1,6 @@
1
1
  # pylint: disable=C0114
2
2
  import os
3
3
  import json
4
- import shutil
5
4
  from typing import NewType
6
5
  from json import JSONDecodeError
7
6
  from csvpath import CsvPath
@@ -9,6 +8,9 @@ from csvpath.util.exceptions import InputException
9
8
  from csvpath.util.error import ErrorHandler
10
9
  from csvpath.util.metadata_parser import MetadataParser
11
10
  from csvpath.util.reference_parser import ReferenceParser
11
+ from csvpath.util.file_readers import DataFileReader
12
+ from csvpath.util.file_writers import DataFileWriter
13
+ from csvpath.util.nos import Nos
12
14
  from .paths_registrar import PathsRegistrar
13
15
  from .paths_metadata import PathsMetadata
14
16
 
@@ -37,8 +39,8 @@ class PathsManager:
37
39
 
38
40
  def named_paths_home(self, name: NamedPathsName) -> str:
39
41
  home = os.path.join(self.named_paths_dir, name)
40
- if not os.path.exists(home):
41
- os.makedirs(home)
42
+ if not Nos(home).exists():
43
+ Nos(home).makedirs()
42
44
  return home
43
45
 
44
46
  @property
@@ -61,8 +63,8 @@ class PathsManager:
61
63
  if directory is None:
62
64
  ie = InputException("Named paths collection name needed")
63
65
  ErrorHandler(csvpaths=self.csvpaths).handle_error(ie)
64
- if os.path.isdir(directory):
65
- dlist = os.listdir(directory)
66
+ if not Nos(directory).isfile():
67
+ dlist = Nos(directory).listdir()
66
68
  base = directory
67
69
  for p in dlist:
68
70
  if p[0] == ".":
@@ -142,8 +144,9 @@ class PathsManager:
142
144
  mdata = PathsMetadata(self.csvpaths.config)
143
145
  mdata.archive_name = self.csvpaths.config.archive_name
144
146
  mdata.named_paths_name = name
145
- mdata.named_paths_home = f"{mdata.named_paths_root}{os.sep}{name}"
146
- mdata.group_file_path = f"{mdata.named_paths_home}{os.sep}group.csvpaths"
147
+ sep = Nos(mdata.named_paths_root).sep
148
+ mdata.named_paths_home = f"{mdata.named_paths_root}{sep}{name}"
149
+ mdata.group_file_path = f"{mdata.named_paths_home}{sep}group.csvpaths"
147
150
  mdata.named_paths = paths
148
151
  mdata.named_paths_identities = ids
149
152
  mdata.named_paths_count = len(ids)
@@ -193,15 +196,15 @@ class PathsManager:
193
196
  def store_json_paths_file(self, name: str, jsonpath: str) -> None:
194
197
  home = self.named_paths_home(name)
195
198
  j = ""
196
- with open(jsonpath, "r", encoding="utf-8") as file:
199
+ with DataFileReader(jsonpath) as file:
197
200
  j = file.read()
198
- with open(os.path.join(home, "definition.json"), "w", encoding="utf-8") as file:
199
- file.write(j)
201
+ with DataFileWriter(path=os.path.join(home, "definition.json")) as writer:
202
+ writer.write(j)
200
203
 
201
204
  @property
202
205
  def named_paths_names(self) -> list[str]:
203
206
  path = self.named_paths_dir
204
- names = [n for n in os.listdir(path) if not n.startswith(".")]
207
+ names = [n for n in Nos(path).listdir() if not n.startswith(".")]
205
208
  return names
206
209
 
207
210
  def remove_named_paths(self, name: NamedPathsName, strict: bool = False) -> None:
@@ -210,7 +213,7 @@ class PathsManager:
210
213
  if not self.has_named_paths(name):
211
214
  return
212
215
  home = self.named_paths_home(name)
213
- shutil.rmtree(home)
216
+ Nos(home).remove()
214
217
 
215
218
  def remove_all_named_paths(self) -> None:
216
219
  names = self.named_paths_names
@@ -219,7 +222,7 @@ class PathsManager:
219
222
 
220
223
  def has_named_paths(self, name: NamedPathsName) -> bool:
221
224
  path = os.path.join(self.named_paths_dir, name)
222
- return os.path.exists(path)
225
+ return Nos(path).dir_exists()
223
226
 
224
227
  def number_of_named_paths(self, name: NamedPathsName) -> int:
225
228
  return len(self._get_named_paths(name))
@@ -237,9 +240,9 @@ class PathsManager:
237
240
  s = ""
238
241
  path = self.named_paths_home(name)
239
242
  grp = os.path.join(path, "group.csvpaths")
240
- if os.path.exists(grp):
241
- with open(grp, "r", encoding="utf-8") as file:
242
- s = file.read()
243
+ if Nos(grp).exists():
244
+ with DataFileReader(grp) as reader:
245
+ s = reader.read()
243
246
  cs = s.split("---- CSVPATH ----")
244
247
  cs = [s for s in cs if s.strip() != ""]
245
248
  #
@@ -258,8 +261,11 @@ class PathsManager:
258
261
 
259
262
  def _copy_in(self, name, csvpathstr) -> None:
260
263
  temp = self._group_file_path(name)
261
- with open(temp, "w", encoding="utf-8") as file:
262
- file.write(csvpathstr)
264
+ #
265
+ # TODO: use a DataFileWriter that supports S3 and local to write.
266
+ #
267
+ with DataFileWriter(path=temp, mode="w") as writer:
268
+ writer.append(csvpathstr)
263
269
  return temp
264
270
 
265
271
  def _group_file_path(self, name: NamedPathsName) -> str:
@@ -267,8 +273,11 @@ class PathsManager:
267
273
  return temp
268
274
 
269
275
  def _get_csvpaths_from_file(self, file_path: str) -> list[str]:
270
- with open(file_path, "r", encoding="utf-8") as f:
271
- cp = f.read()
276
+ #
277
+ # TODO: use DataFileReader to support S3 and local
278
+ #
279
+ with DataFileReader(file_path) as reader:
280
+ cp = reader.read()
272
281
  _ = [
273
282
  apath.strip()
274
283
  for apath in cp.split(PathsManager.MARKER)