csvpath 0.0.488__tar.gz → 0.0.490__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (232) hide show
  1. {csvpath-0.0.488 → csvpath-0.0.490}/PKG-INFO +66 -36
  2. {csvpath-0.0.488 → csvpath-0.0.490}/README.md +60 -33
  3. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/csvpath.py +102 -20
  4. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/csvpaths.py +66 -22
  5. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/managers/csvpaths_manager.py +161 -29
  6. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/managers/file_manager.py +73 -20
  7. csvpath-0.0.490/csvpath/managers/file_registrar.py +204 -0
  8. csvpath-0.0.490/csvpath/managers/paths_registrar.py +98 -0
  9. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/managers/result.py +30 -1
  10. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/managers/result_serializer.py +31 -7
  11. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/managers/results_manager.py +139 -1
  12. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/function_factory.py +7 -2
  13. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/function_finder.py +4 -0
  14. csvpath-0.0.490/csvpath/matching/functions/headers/append.py +56 -0
  15. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/headers/collect.py +1 -0
  16. csvpath-0.0.490/csvpath/matching/functions/misc/fingerprint.py +71 -0
  17. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/matcher.py +1 -0
  18. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/util/print_parser.py +0 -83
  19. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/util/runtime_data_collector.py +21 -3
  20. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/util/cache.py +1 -1
  21. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/util/config.py +60 -0
  22. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/util/error.py +11 -11
  23. csvpath-0.0.490/csvpath/util/file_readers.py +180 -0
  24. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/util/line_counter.py +2 -2
  25. csvpath-0.0.490/csvpath/util/pandas_data_reader.py +48 -0
  26. csvpath-0.0.490/csvpath/util/reference_parser.py +158 -0
  27. csvpath-0.0.490/csvpath/util/s3_data_reader.py +24 -0
  28. csvpath-0.0.490/docs/functions/fingerprint.md +38 -0
  29. {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions.md +1 -0
  30. {csvpath-0.0.488 → csvpath-0.0.490}/pyproject.toml +9 -3
  31. csvpath-0.0.488/config/config copy.ini +0 -29
  32. csvpath-0.0.488/csvpath/matching/functions/headers/append.py +0 -31
  33. csvpath-0.0.488/csvpath/util/file_readers.py +0 -86
  34. {csvpath-0.0.488 → csvpath-0.0.490}/LICENSE +0 -0
  35. {csvpath-0.0.488 → csvpath-0.0.490}/config/config.ini +0 -0
  36. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/__init__.py +0 -0
  37. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/managers/__init__.py +0 -0
  38. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/__init__.py +0 -0
  39. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/__init__.py +0 -0
  40. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/args.py +0 -0
  41. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/boolean/all.py +0 -0
  42. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/boolean/andf.py +0 -0
  43. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/boolean/any.py +0 -0
  44. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/boolean/between.py +0 -0
  45. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/boolean/empty.py +0 -0
  46. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/boolean/exists.py +0 -0
  47. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/boolean/inf.py +0 -0
  48. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/boolean/no.py +0 -0
  49. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/boolean/notf.py +0 -0
  50. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/boolean/orf.py +0 -0
  51. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/boolean/yes.py +0 -0
  52. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/counting/count.py +0 -0
  53. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/counting/count_headers.py +0 -0
  54. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/counting/count_lines.py +0 -0
  55. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/counting/count_scans.py +0 -0
  56. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/counting/counter.py +0 -0
  57. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/counting/every.py +0 -0
  58. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/counting/has_matches.py +0 -0
  59. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/counting/increment.py +0 -0
  60. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/counting/tally.py +0 -0
  61. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/counting/total_lines.py +0 -0
  62. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/dates/now.py +0 -0
  63. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/function.py +0 -0
  64. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/function_focus.py +0 -0
  65. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/headers/empty_stack.py +0 -0
  66. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/headers/end.py +0 -0
  67. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/headers/header_name.py +0 -0
  68. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/headers/header_names_mismatch.py +0 -0
  69. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/headers/headers.py +0 -0
  70. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/headers/mismatch.py +0 -0
  71. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/headers/replace.py +0 -0
  72. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/headers/reset_headers.py +0 -0
  73. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/lines/advance.py +0 -0
  74. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/lines/after_blank.py +0 -0
  75. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/lines/dups.py +0 -0
  76. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/lines/first.py +0 -0
  77. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/lines/first_line.py +0 -0
  78. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/lines/last.py +0 -0
  79. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/lines/stop.py +0 -0
  80. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/math/above.py +0 -0
  81. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/math/add.py +0 -0
  82. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/math/divide.py +0 -0
  83. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/math/equals.py +0 -0
  84. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/math/intf.py +0 -0
  85. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/math/mod.py +0 -0
  86. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/math/multiply.py +0 -0
  87. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/math/round.py +0 -0
  88. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/math/subtotal.py +0 -0
  89. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/math/subtract.py +0 -0
  90. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/math/sum.py +0 -0
  91. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/misc/importf.py +0 -0
  92. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/misc/random.py +0 -0
  93. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/print/jinjaf.py +0 -0
  94. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/print/print_line.py +0 -0
  95. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/print/print_queue.py +0 -0
  96. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/print/printf.py +0 -0
  97. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/print/table.py +0 -0
  98. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/stats/minf.py +0 -0
  99. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/stats/percent.py +0 -0
  100. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/stats/percent_unique.py +0 -0
  101. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/stats/stdev.py +0 -0
  102. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/strings/concat.py +0 -0
  103. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/strings/length.py +0 -0
  104. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/strings/lower.py +0 -0
  105. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/strings/metaphone.py +0 -0
  106. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/strings/regex.py +0 -0
  107. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/strings/starts_with.py +0 -0
  108. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/strings/strip.py +0 -0
  109. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/strings/substring.py +0 -0
  110. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/strings/upper.py +0 -0
  111. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/testing/debug.py +0 -0
  112. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/types/__init__.py +0 -0
  113. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/types/boolean.py +0 -0
  114. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/types/datef.py +0 -0
  115. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/types/decimal.py +0 -0
  116. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/types/nonef.py +0 -0
  117. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/types/string.py +0 -0
  118. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/types/type.py +0 -0
  119. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/validity/fail.py +0 -0
  120. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/validity/failed.py +0 -0
  121. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/validity/line.py +0 -0
  122. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/variables/get.py +0 -0
  123. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/variables/pushpop.py +0 -0
  124. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/variables/put.py +0 -0
  125. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/variables/track.py +0 -0
  126. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/functions/variables/variables.py +0 -0
  127. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/lark_parser.py +0 -0
  128. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/lark_transformer.py +0 -0
  129. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/productions/__init__.py +0 -0
  130. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/productions/equality.py +0 -0
  131. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/productions/expression.py +0 -0
  132. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/productions/header.py +0 -0
  133. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/productions/matchable.py +0 -0
  134. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/productions/qualified.py +0 -0
  135. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/productions/reference.py +0 -0
  136. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/productions/term.py +0 -0
  137. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/productions/variable.py +0 -0
  138. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/util/exceptions.py +0 -0
  139. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/util/expression_encoder.py +0 -0
  140. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/util/expression_utility.py +0 -0
  141. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/matching/util/lark_print_parser.py +0 -0
  142. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/scanning/__init__.py +0 -0
  143. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/scanning/exceptions.py +0 -0
  144. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/scanning/parser.out +0 -0
  145. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/scanning/parsetab.py +0 -0
  146. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/scanning/scanner.py +0 -0
  147. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/scanning/scanning_lexer.py +0 -0
  148. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/util/config_exception.py +0 -0
  149. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/util/exceptions.py +0 -0
  150. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/util/last_line_stats.py +0 -0
  151. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/util/line_monitor.py +0 -0
  152. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/util/log_utility.py +0 -0
  153. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/util/metadata_parser.py +0 -0
  154. {csvpath-0.0.488 → csvpath-0.0.490}/csvpath/util/printer.py +0 -0
  155. {csvpath-0.0.488 → csvpath-0.0.490}/docs/asbool.md +0 -0
  156. {csvpath-0.0.488 → csvpath-0.0.490}/docs/assignment.md +0 -0
  157. {csvpath-0.0.488 → csvpath-0.0.490}/docs/comments.md +0 -0
  158. {csvpath-0.0.488 → csvpath-0.0.490}/docs/config.md +0 -0
  159. {csvpath-0.0.488 → csvpath-0.0.490}/docs/examples.md +0 -0
  160. {csvpath-0.0.488 → csvpath-0.0.490}/docs/files.md +0 -0
  161. {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/above.md +0 -0
  162. {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/advance.md +0 -0
  163. {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/after_blank.md +0 -0
  164. {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/all.md +0 -0
  165. {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/andor.md +0 -0
  166. {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/any.md +0 -0
  167. {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/average.md +0 -0
  168. {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/between.md +0 -0
  169. {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/collect.md +0 -0
  170. {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/correlate.md +0 -0
  171. {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/count.md +0 -0
  172. {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/count_headers.md +0 -0
  173. {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/counter.md +0 -0
  174. {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/date.md +0 -0
  175. {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/empty.md +0 -0
  176. {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/empty_stack.md +0 -0
  177. {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/end.md +0 -0
  178. {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/every.md +0 -0
  179. {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/fail.md +0 -0
  180. {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/first.md +0 -0
  181. {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/get.md +0 -0
  182. {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/has_dups.md +0 -0
  183. {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/has_matches.md +0 -0
  184. {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/header.md +0 -0
  185. {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/header_name.md +0 -0
  186. {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/header_names_mismatch.md +0 -0
  187. {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/implementing_functions.md +0 -0
  188. {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/import.md +0 -0
  189. {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/in.md +0 -0
  190. {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/increment.md +0 -0
  191. {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/intf.md +0 -0
  192. {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/jinja.md +0 -0
  193. {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/last.md +0 -0
  194. {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/line.md +0 -0
  195. {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/line_number.md +0 -0
  196. {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/max.md +0 -0
  197. {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/metaphone.md +0 -0
  198. {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/mismatch.md +0 -0
  199. {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/no.md +0 -0
  200. {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/not.md +0 -0
  201. {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/now.md +0 -0
  202. {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/percent_unique.md +0 -0
  203. {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/pop.md +0 -0
  204. {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/print.md +0 -0
  205. {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/print_line.md +0 -0
  206. {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/print_queue.md +0 -0
  207. {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/random.md +0 -0
  208. {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/regex.md +0 -0
  209. {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/replace.md +0 -0
  210. {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/reset_headers.md +0 -0
  211. {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/stdev.md +0 -0
  212. {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/stop.md +0 -0
  213. {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/string_functions.md +0 -0
  214. {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/subtotal.md +0 -0
  215. {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/subtract.md +0 -0
  216. {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/sum.md +0 -0
  217. {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/tally.md +0 -0
  218. {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/total_lines.md +0 -0
  219. {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/track.md +0 -0
  220. {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/types.md +0 -0
  221. {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/variables.md +0 -0
  222. {csvpath-0.0.488 → csvpath-0.0.490}/docs/functions/variables_and_headers.md +0 -0
  223. {csvpath-0.0.488 → csvpath-0.0.490}/docs/grammar.md +0 -0
  224. {csvpath-0.0.488 → csvpath-0.0.490}/docs/headers.md +0 -0
  225. {csvpath-0.0.488 → csvpath-0.0.490}/docs/images/logo-wordmark-white-on-black-trimmed-padded.png +0 -0
  226. {csvpath-0.0.488 → csvpath-0.0.490}/docs/images/logo-wordmark-white-trimmed.png +0 -0
  227. {csvpath-0.0.488 → csvpath-0.0.490}/docs/paths.md +0 -0
  228. {csvpath-0.0.488 → csvpath-0.0.490}/docs/printing.md +0 -0
  229. {csvpath-0.0.488 → csvpath-0.0.490}/docs/qualifiers.md +0 -0
  230. {csvpath-0.0.488 → csvpath-0.0.490}/docs/references.md +0 -0
  231. {csvpath-0.0.488 → csvpath-0.0.490}/docs/terms.md +0 -0
  232. {csvpath-0.0.488 → csvpath-0.0.490}/docs/variables.md +0 -0
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: csvpath
3
- Version: 0.0.488
4
- Summary: A declarative language for validation of CSV files
3
+ Version: 0.0.490
4
+ Summary: A declarative language for validating CSV, Excel, and other tabular data files
5
5
  Author: David Kershaw
6
6
  Author-email: dk107dk@hotmail.com
7
7
  Requires-Python: >=3.9,<4.0
@@ -21,37 +21,40 @@ Classifier: Topic :: Software Development :: Quality Assurance
21
21
  Classifier: Topic :: Software Development :: Testing
22
22
  Classifier: Topic :: Text Processing
23
23
  Classifier: Topic :: Utilities
24
+ Provides-Extra: pandas
25
+ Provides-Extra: smartopen
24
26
  Requires-Dist: inflect (>=7.3.1,<8.0.0)
25
27
  Requires-Dist: jinja2 (>=3.1.4,<4.0.0)
26
28
  Requires-Dist: lark (>=1.2.2,<2.0.0)
27
29
  Requires-Dist: metaphone (>=0.6,<0.7)
30
+ Requires-Dist: pandas (>=2.2.3,<3.0.0) ; extra == "pandas"
28
31
  Requires-Dist: ply (>=3.11,<4.0)
29
32
  Requires-Dist: pylightxl (>=1.61,<2.0)
30
33
  Requires-Dist: python-dateutil (>=2.9.0.post0,<3.0.0)
31
- Requires-Dist: smart-open[s3] (>=7.0.5,<8.0.0)
34
+ Requires-Dist: smart-open[s3] (>=7.0.5,<8.0.0) ; extra == "smartopen"
32
35
  Requires-Dist: tabulate (>=0.9.0,<0.10.0)
33
36
  Project-URL: Csvpath.org, https://www.csvpath.org
34
37
  Project-URL: Github, https://github.com/csvpath/csvpath.git
35
38
  Description-Content-Type: text/markdown
36
39
 
37
40
 
38
- # <img src='https://www.csvpath.org/~gitbook/image?url=https%3A%2F%2F3739708663-files.gitbook.io%2F%7E%2Ffiles%2Fv0%2Fb%2Fgitbook-x-prod.appspot.com%2Fo%2Forganizations%252FMXTJeGvaEsqwNG39F37h%252Fsites%252Fsite_SPBqJ%252Ficon%252FMCSxo7k6rXWnqoPE204u%252Fcsvpath-icon.png%3Falt%3Dmedia%26token%3D28869fdd-d54e-400e-8917-b8097f935f42&width=32&dpr=2&quality=100&sign=71ca9f3e&sv=1'/> About CsvPath
41
+ # <a href='https://www.csvpath.org/'><img src='https://www.csvpath.org/~gitbook/image?url=https%3A%2F%2F3739708663-files.gitbook.io%2F%7E%2Ffiles%2Fv0%2Fb%2Fgitbook-x-prod.appspot.com%2Fo%2Forganizations%252FMXTJeGvaEsqwNG39F37h%252Fsites%252Fsite_SPBqJ%252Ficon%252FMCSxo7k6rXWnqoPE204u%252Fcsvpath-icon.png%3Falt%3Dmedia%26token%3D28869fdd-d54e-400e-8917-b8097f935f42&width=32&dpr=2&quality=100&sign=71ca9f3e&sv=1'/></a> About CsvPath
39
42
 
40
- CsvPath defines a declarative syntax for inspecting and validating CSV and Excel files.
43
+ CsvPath defines a declarative syntax for inspecting and validating CSV and Excel files, and other tabular data.
41
44
 
42
- CsvPath' goal is to make it easy to:
43
- - Analyze the content and structure of a CSV or Excel file
44
- - Validate that the file matches expectations
45
- - Report on the content or validity
46
- - Create new derived CSV files
45
+ CsvPath's goal is to make it easy to setup a Collect, Store, Validate-pattern flat-file landing zone that:
46
+ - Analyzes the content and structure of flat files
47
+ - Validates that files match expectations
48
+ - Reports on content validity
49
+ - Creates new derived files using copy-on-write
47
50
 
48
- And do it all in an automation-friendly way.
51
+ And does it all in an automation-friendly way.
49
52
 
50
- CsvPath is inspired by:
53
+ CsvPath's validation is inspired by:
51
54
  - XPath for XML files
52
55
  - The ISO standard <a href='https://schematron.com/'>Schematron validation</a>
53
56
 
54
- CsvPath is intended to fit with other DataOps and data quality tools. Files are streamed. The interface is simple. New functions are easy to create.
57
+ CsvPath is intended to fit with other DataOps and data quality tools. Files are streamed. The interface is simple. Metadata is plentiful. New functions are easy to create.
55
58
 
56
59
  Read more about CsvPath and see realistic CSV and Excel validation examples at <a href='https://www.csvpath.org'>https://www.csvpath.org</a>.
57
60
 
@@ -63,6 +66,7 @@ If you need help, use the <a href='https://www.csvpath.org/getting-started/get-h
63
66
  # Contents
64
67
 
65
68
  - [Motivation](#motivation)
69
+ - [Install](#install)
66
70
  - [High-level Description](#description)
67
71
  - [Running CsvPath](#running)
68
72
  - [Validation](#validating)
@@ -92,10 +96,37 @@ CSV files are everywhere!
92
96
 
93
97
  A surprisingly large number of companies depend on CSV processing for significant amounts of revenue. Research organizations are awash in CSV. And everyone's favorite issue tracker, database GUI, spreadsheet, APM platform, and most any other type of tool we use day to day uses CSV for sharing. CSV is the lowest of common dominators. Many CSVs are invalid or broken in some way. Often times a lot of manual effort goes into finding problems and fixing them.
94
98
 
95
- CsvPath is first and foremost a validation language. It is intended to describe CSV files in simple declarative rules that indicate if a file is as expected. CsvPath can also extract data, create reports, and in other ways have useful side effects.
99
+ CsvPath is first and foremost a validation language. It describes tabular data in simple declarative rules that define what valid means for that data. CsvPath can also extract data, create reports, and do other useful things.
96
100
 
97
101
  CsvPath's goal is to make simple validations almost trivial and more complex situations more manageable. It is a library, not a system, so it relies on being easy to integrate with other DataOps tools.
98
102
 
103
+
104
+ <a name="install"></a>
105
+ # Install
106
+
107
+ <a href='https://pypi.org/project/csvpath/'>CsvPath is available on PyPi</a>. Install with
108
+ ```
109
+ pip install csvpath
110
+ ```
111
+
112
+ CsvPath has two optional dependencies:
113
+
114
+ - <a target='_blank' href='https://pypi.org/project/pandas/'>Pandas</a>
115
+ - <a target='_blank' href='https://pypi.org/project/smart-open/'>Smart-open</a>
116
+
117
+ Pandas data frames can be used as a data source, much like Excel or CSV files. Install CsvPath with the Pandas option:
118
+ ```
119
+ pip install csvpath[pandas]
120
+ ```
121
+
122
+ Smart-open is an option for loading data files directly from S3. Install the Smart Open extra with:
123
+ ```
124
+ pip install csvpath[smart-open]
125
+ ```
126
+
127
+ Both of these optional dependencies can make it harder to use CsvPath in certain specific use cases. For e.g., using Pandas in an AWS Lambda layer may be less straightforward. If you need the capabilities, they are easy to install, but if you don't CsvPath is lighter weight without.
128
+
129
+
99
130
  # Description
100
131
  <a name="description"></a>
101
132
 
@@ -152,26 +183,25 @@ Two classes provide the functionality: CsvPath and CsvPaths. Each has only a few
152
183
  ### CsvPath
153
184
  (<a href='https://github.com/csvpath/csvpath/blob/main/csvpath/csvpath.py'>code</a>)
154
185
  The CsvPath class is the basic entry point for running csvpaths.
155
- |method |function |
156
- |----------------------------|----------------------------------------------------------------|
157
- | parse(csvpath) | applies a csvpath |
158
- | next() | iterates over matched rows returning each matched row as a list|
159
- | fast_forward() | iterates over the file collecting variables and side effects |
160
- | advance(n) | skips forward n rows from within a `for row in path.next()` loop|
161
- | collect(n) | processes n rows and collects the lines that matched as lists |
186
+ |method |function |
187
+ |----------------------------|-----------------------------------------------------------------|
188
+ | next() | iterates over matched rows returning each matched row as a list |
189
+ | fast_forward() | iterates over the file collecting variables and side effects |
190
+ | advance() | skips forward n rows from within a `for row in path.next()` loop|
191
+ | collect() | processes n rows and collects the lines that matched as lists |
162
192
 
163
193
  ### CsvPaths
164
194
  (<a href='https://github.com/dk107dk/csvpath/blob/main/csvpath/csvpaths.py'>code</a>)
165
195
  The CsvPaths class helps you manage validations of multiple files and/or multiple csvpaths. It coordinates the work of multiple CsvPath instances.
166
- |method |function |
167
- |----------------------|-----------------------------------------------------------------|
168
- | csvpath() | gets a CsvPath object that knows all the file names available |
169
- | collect_paths() | Same as CsvPath.collect() but for all paths sequentially |
170
- | fast_forward_paths() | Same as CsvPath.fast_forward() but for all paths sequentially |
171
- | next_paths() | Same as CsvPath.next() but for all paths sequentially |
172
- | collect_by_line() | Same as CsvPath.collect() but for all paths breadth first |
173
- | fast_forward_by_line()| Same as CsvPath.fast_forward() but for all paths breadth first |
174
- | next_by_line() | Same as CsvPath.next() but for all paths breadth first |
196
+ |method |function |
197
+ |------------------------|-----------------------------------------------------------------|
198
+ | csvpath() | gets a CsvPath object that knows all the file names available |
199
+ | collect_paths() | Same as CsvPath.collect() but for all paths sequentially |
200
+ | fast_forward_paths() | Same as CsvPath.fast_forward() but for all paths sequentially |
201
+ | next_paths() | Same as CsvPath.next() but for all paths sequentially |
202
+ | collect_by_line() | Same as CsvPath.collect() but for all paths breadth first |
203
+ | fast_forward_by_line() | Same as CsvPath.fast_forward() but for all paths breadth first |
204
+ | next_by_line() | Same as CsvPath.next() but for all paths breadth first |
175
205
 
176
206
  To be clear, the purpose of `CsvPaths` is to apply multiple csvpaths per CSV file. Its breadth-first versions of the `collect()`, `fast_forward()`, and `next()` methods attempt to match each csvpath to each row of a CSV file before continuing to the next row. As you can imagine, for very large files this approach is a must.
177
207
 
@@ -307,9 +337,9 @@ The match part is also bracketed. Matches have space separated components or "va
307
337
  ## Term
308
338
  A string, number, or regular expression value.
309
339
 
310
- |Returns | Matches | Examples |
311
- |--------|---------|---------------|
312
- |A value | Always true | `"a value"` |
340
+ |Returns | Matches | Examples |
341
+ |--------|-------------|-----------------|
342
+ |A value | Always true | `"a value"` |
313
343
 
314
344
  <a href='https://github.com/dk107dk/csvpath/blob/main/docs/terms.md'>Read about terms here</a>.
315
345
 
@@ -317,9 +347,9 @@ A string, number, or regular expression value.
317
347
  ## Function
318
348
  A composable unit of functionality called once for every row scanned.
319
349
 
320
- |Returns | Matches | Examples |
321
- |--------|---------|---------------|
322
- |Calculated | Calculated | `count()` |
350
+ |Returns | Matches | Examples |
351
+ |-----------|------------|---------------|
352
+ |Calculated | Calculated | `count()` |
323
353
 
324
354
  <a href='https://github.com/dk107dk/csvpath/blob/main/docs/functions.md'>Read about functions here</a>.
325
355
 
@@ -1,21 +1,21 @@
1
1
 
2
- # <img src='https://www.csvpath.org/~gitbook/image?url=https%3A%2F%2F3739708663-files.gitbook.io%2F%7E%2Ffiles%2Fv0%2Fb%2Fgitbook-x-prod.appspot.com%2Fo%2Forganizations%252FMXTJeGvaEsqwNG39F37h%252Fsites%252Fsite_SPBqJ%252Ficon%252FMCSxo7k6rXWnqoPE204u%252Fcsvpath-icon.png%3Falt%3Dmedia%26token%3D28869fdd-d54e-400e-8917-b8097f935f42&width=32&dpr=2&quality=100&sign=71ca9f3e&sv=1'/> About CsvPath
2
+ # <a href='https://www.csvpath.org/'><img src='https://www.csvpath.org/~gitbook/image?url=https%3A%2F%2F3739708663-files.gitbook.io%2F%7E%2Ffiles%2Fv0%2Fb%2Fgitbook-x-prod.appspot.com%2Fo%2Forganizations%252FMXTJeGvaEsqwNG39F37h%252Fsites%252Fsite_SPBqJ%252Ficon%252FMCSxo7k6rXWnqoPE204u%252Fcsvpath-icon.png%3Falt%3Dmedia%26token%3D28869fdd-d54e-400e-8917-b8097f935f42&width=32&dpr=2&quality=100&sign=71ca9f3e&sv=1'/></a> About CsvPath
3
3
 
4
- CsvPath defines a declarative syntax for inspecting and validating CSV and Excel files.
4
+ CsvPath defines a declarative syntax for inspecting and validating CSV and Excel files, and other tabular data.
5
5
 
6
- CsvPath' goal is to make it easy to:
7
- - Analyze the content and structure of a CSV or Excel file
8
- - Validate that the file matches expectations
9
- - Report on the content or validity
10
- - Create new derived CSV files
6
+ CsvPath's goal is to make it easy to setup a Collect, Store, Validate-pattern flat-file landing zone that:
7
+ - Analyzes the content and structure of flat files
8
+ - Validates that files match expectations
9
+ - Reports on content validity
10
+ - Creates new derived files using copy-on-write
11
11
 
12
- And do it all in an automation-friendly way.
12
+ And does it all in an automation-friendly way.
13
13
 
14
- CsvPath is inspired by:
14
+ CsvPath's validation is inspired by:
15
15
  - XPath for XML files
16
16
  - The ISO standard <a href='https://schematron.com/'>Schematron validation</a>
17
17
 
18
- CsvPath is intended to fit with other DataOps and data quality tools. Files are streamed. The interface is simple. New functions are easy to create.
18
+ CsvPath is intended to fit with other DataOps and data quality tools. Files are streamed. The interface is simple. Metadata is plentiful. New functions are easy to create.
19
19
 
20
20
  Read more about CsvPath and see realistic CSV and Excel validation examples at <a href='https://www.csvpath.org'>https://www.csvpath.org</a>.
21
21
 
@@ -27,6 +27,7 @@ If you need help, use the <a href='https://www.csvpath.org/getting-started/get-h
27
27
  # Contents
28
28
 
29
29
  - [Motivation](#motivation)
30
+ - [Install](#install)
30
31
  - [High-level Description](#description)
31
32
  - [Running CsvPath](#running)
32
33
  - [Validation](#validating)
@@ -56,10 +57,37 @@ CSV files are everywhere!
56
57
 
57
58
  A surprisingly large number of companies depend on CSV processing for significant amounts of revenue. Research organizations are awash in CSV. And everyone's favorite issue tracker, database GUI, spreadsheet, APM platform, and most any other type of tool we use day to day uses CSV for sharing. CSV is the lowest of common dominators. Many CSVs are invalid or broken in some way. Often times a lot of manual effort goes into finding problems and fixing them.
58
59
 
59
- CsvPath is first and foremost a validation language. It is intended to describe CSV files in simple declarative rules that indicate if a file is as expected. CsvPath can also extract data, create reports, and in other ways have useful side effects.
60
+ CsvPath is first and foremost a validation language. It describes tabular data in simple declarative rules that define what valid means for that data. CsvPath can also extract data, create reports, and do other useful things.
60
61
 
61
62
  CsvPath's goal is to make simple validations almost trivial and more complex situations more manageable. It is a library, not a system, so it relies on being easy to integrate with other DataOps tools.
62
63
 
64
+
65
+ <a name="install"></a>
66
+ # Install
67
+
68
+ <a href='https://pypi.org/project/csvpath/'>CsvPath is available on PyPi</a>. Install with
69
+ ```
70
+ pip install csvpath
71
+ ```
72
+
73
+ CsvPath has two optional dependencies:
74
+
75
+ - <a target='_blank' href='https://pypi.org/project/pandas/'>Pandas</a>
76
+ - <a target='_blank' href='https://pypi.org/project/smart-open/'>Smart-open</a>
77
+
78
+ Pandas data frames can be used as a data source, much like Excel or CSV files. Install CsvPath with the Pandas option:
79
+ ```
80
+ pip install csvpath[pandas]
81
+ ```
82
+
83
+ Smart-open is an option for loading data files directly from S3. Install the Smart Open extra with:
84
+ ```
85
+ pip install csvpath[smart-open]
86
+ ```
87
+
88
+ Both of these optional dependencies can make it harder to use CsvPath in certain specific use cases. For e.g., using Pandas in an AWS Lambda layer may be less straightforward. If you need the capabilities, they are easy to install, but if you don't CsvPath is lighter weight without.
89
+
90
+
63
91
  # Description
64
92
  <a name="description"></a>
65
93
 
@@ -116,26 +144,25 @@ Two classes provide the functionality: CsvPath and CsvPaths. Each has only a few
116
144
  ### CsvPath
117
145
  (<a href='https://github.com/csvpath/csvpath/blob/main/csvpath/csvpath.py'>code</a>)
118
146
  The CsvPath class is the basic entry point for running csvpaths.
119
- |method |function |
120
- |----------------------------|----------------------------------------------------------------|
121
- | parse(csvpath) | applies a csvpath |
122
- | next() | iterates over matched rows returning each matched row as a list|
123
- | fast_forward() | iterates over the file collecting variables and side effects |
124
- | advance(n) | skips forward n rows from within a `for row in path.next()` loop|
125
- | collect(n) | processes n rows and collects the lines that matched as lists |
147
+ |method |function |
148
+ |----------------------------|-----------------------------------------------------------------|
149
+ | next() | iterates over matched rows returning each matched row as a list |
150
+ | fast_forward() | iterates over the file collecting variables and side effects |
151
+ | advance() | skips forward n rows from within a `for row in path.next()` loop|
152
+ | collect() | processes n rows and collects the lines that matched as lists |
126
153
 
127
154
  ### CsvPaths
128
155
  (<a href='https://github.com/dk107dk/csvpath/blob/main/csvpath/csvpaths.py'>code</a>)
129
156
  The CsvPaths class helps you manage validations of multiple files and/or multiple csvpaths. It coordinates the work of multiple CsvPath instances.
130
- |method |function |
131
- |----------------------|-----------------------------------------------------------------|
132
- | csvpath() | gets a CsvPath object that knows all the file names available |
133
- | collect_paths() | Same as CsvPath.collect() but for all paths sequentially |
134
- | fast_forward_paths() | Same as CsvPath.fast_forward() but for all paths sequentially |
135
- | next_paths() | Same as CsvPath.next() but for all paths sequentially |
136
- | collect_by_line() | Same as CsvPath.collect() but for all paths breadth first |
137
- | fast_forward_by_line()| Same as CsvPath.fast_forward() but for all paths breadth first |
138
- | next_by_line() | Same as CsvPath.next() but for all paths breadth first |
157
+ |method |function |
158
+ |------------------------|-----------------------------------------------------------------|
159
+ | csvpath() | gets a CsvPath object that knows all the file names available |
160
+ | collect_paths() | Same as CsvPath.collect() but for all paths sequentially |
161
+ | fast_forward_paths() | Same as CsvPath.fast_forward() but for all paths sequentially |
162
+ | next_paths() | Same as CsvPath.next() but for all paths sequentially |
163
+ | collect_by_line() | Same as CsvPath.collect() but for all paths breadth first |
164
+ | fast_forward_by_line() | Same as CsvPath.fast_forward() but for all paths breadth first |
165
+ | next_by_line() | Same as CsvPath.next() but for all paths breadth first |
139
166
 
140
167
  To be clear, the purpose of `CsvPaths` is to apply multiple csvpaths per CSV file. Its breadth-first versions of the `collect()`, `fast_forward()`, and `next()` methods attempt to match each csvpath to each row of a CSV file before continuing to the next row. As you can imagine, for very large files this approach is a must.
141
168
 
@@ -271,9 +298,9 @@ The match part is also bracketed. Matches have space separated components or "va
271
298
  ## Term
272
299
  A string, number, or regular expression value.
273
300
 
274
- |Returns | Matches | Examples |
275
- |--------|---------|---------------|
276
- |A value | Always true | `"a value"` |
301
+ |Returns | Matches | Examples |
302
+ |--------|-------------|-----------------|
303
+ |A value | Always true | `"a value"` |
277
304
 
278
305
  <a href='https://github.com/dk107dk/csvpath/blob/main/docs/terms.md'>Read about terms here</a>.
279
306
 
@@ -281,9 +308,9 @@ A string, number, or regular expression value.
281
308
  ## Function
282
309
  A composable unit of functionality called once for every row scanned.
283
310
 
284
- |Returns | Matches | Examples |
285
- |--------|---------|---------------|
286
- |Calculated | Calculated | `count()` |
311
+ |Returns | Matches | Examples |
312
+ |-----------|------------|---------------|
313
+ |Calculated | Calculated | `count()` |
287
314
 
288
315
  <a href='https://github.com/dk107dk/csvpath/blob/main/docs/functions.md'>Read about functions here</a>.
289
316
 
@@ -27,7 +27,7 @@ from .util.exceptions import (
27
27
  )
28
28
  from .matching.util.exceptions import MatchException
29
29
  from csvpath.util.printer import Printer
30
- from csvpath.util.file_readers import CsvDataFileReader
30
+ from csvpath.util.file_readers import DataFileReader
31
31
 
32
32
 
33
33
  class CsvPathPublic(ABC):
@@ -35,7 +35,10 @@ class CsvPathPublic(ABC):
35
35
 
36
36
  @abstractmethod
37
37
  def parse(self, csvpath): # pragma: no cover
38
- """Reads a csvpath prepares to match against CSV file lines"""
38
+ """Reads a csvpath prepares to match against CSV file lines. This
39
+ method is an alternative to simply passing the csvpath string to the
40
+ collect, next, or fast_forward methods. You don't do both.
41
+ """
39
42
 
40
43
  @abstractmethod
41
44
  def parse_named_path(
@@ -55,24 +58,31 @@ class CsvPathPublic(ABC):
55
58
  using the stop() function"""
56
59
 
57
60
  @abstractmethod
58
- def collect(self, nexts: int = -1) -> List[List[Any]]: # pragma: no cover
59
- """Returns the lines of a CSV file that match the csvpath"""
61
+ def collect(
62
+ self, csvpath: str = None, *, nexts: int = -1
63
+ ) -> List[List[Any]]: # pragma: no cover
64
+ """Returns the lines of a CSV file that match the csvpath. Pass
65
+ nexts to limit a run to collecting only N lines; the default
66
+ is -1 for collecting all. If you do not pass the csvpath
67
+ string here you must first use the parse method."""
60
68
 
61
69
  @abstractmethod
62
70
  def advance(self, ff: int = -1) -> None: # pragma: no cover
63
71
  """Advances the iteration by ff rows. -1 means to the end of the file."""
64
72
 
65
73
  @abstractmethod
66
- def fast_forward(self) -> None: # pragma: no cover
74
+ def fast_forward(self, csvpath: str = None) -> None: # pragma: no cover
67
75
  """Scans to the end of the CSV file. All scanned rows will be
68
76
  considered for match and variables and side effects will happen,
69
77
  but no rows will be returned or stored. -1 means to the end of
70
- the file."""
78
+ the file. If you do not pass the csvpath string here you must first
79
+ use the parse method."""
71
80
 
72
81
  @abstractmethod
73
- def next(self): # pragma: no cover
82
+ def next(self, csvpath: str = None): # pragma: no cover
74
83
  """A generator function that steps through the CSV file returning
75
- matching rows"""
84
+ matching rows. If you do not pass the csvpath string here you must
85
+ first use the parse method."""
76
86
 
77
87
 
78
88
  class CsvPath(CsvPathPublic, ErrorCollector, Printer): # pylint: disable=R0902, R0904
@@ -290,6 +300,52 @@ class CsvPath(CsvPathPublic, ErrorCollector, Printer): # pylint: disable=R0902,
290
300
  self._created_at = datetime.now()
291
301
  self._run_started_at = None
292
302
 
303
+ self._collecting = False
304
+ self._unmatched = None
305
+ self._unmatched_available = False
306
+ self._data_from_preceding = False
307
+
308
+ @property
309
+ def data_from_preceding(self) -> bool:
310
+ return self._data_from_preceding
311
+
312
+ @data_from_preceding.setter
313
+ def data_from_preceding(self, dfp: bool) -> None:
314
+ self._data_from_preceding = dfp
315
+
316
+ @property
317
+ def unmatched(self) -> list[list[Any]]:
318
+ return self._unmatched
319
+
320
+ @unmatched.setter
321
+ def unmatched(self, lines: list[list[Any]]) -> None:
322
+ self._unmatched = lines
323
+
324
+ @property
325
+ def collecting(self) -> bool:
326
+ return self._collecting
327
+
328
+ @collecting.setter
329
+ def collecting(self, c: bool) -> None:
330
+ self._collecting = c
331
+
332
+ def set_unmatched_availability(self) -> None:
333
+ um = self.metadata.get("unmatched-mode")
334
+ if um is not None and um.find("no-keep") > -1:
335
+ self.unmatched_available = False
336
+ elif um is not None and um.find("keep") > -1:
337
+ self.unmatched_available = True
338
+ else:
339
+ self.unmatched_available = False
340
+
341
+ @property
342
+ def unmatched_available(self) -> bool:
343
+ return self._unmatched_available
344
+
345
+ @unmatched_available.setter
346
+ def unmatched_available(self, ua: bool) -> None:
347
+ self._unmatched_available = ua
348
+
293
349
  @property
294
350
  def created_at(self) -> datetime:
295
351
  return self._created_at
@@ -652,6 +708,9 @@ class CsvPath(CsvPathPublic, ErrorCollector, Printer): # pylint: disable=R0902,
652
708
  # - return-mode: matches | no-matches
653
709
  # - print-mode: default | no-default
654
710
  # - validation-mode: (no-)print | log | (no-)raise | quiet | (no-)match
711
+ # - run-mode: no-run | run
712
+ # - unmatched-mode: no-keep | keep
713
+ # - source-mode: preceding | origin
655
714
  #
656
715
  self.update_logic_mode_if()
657
716
  self.update_run_mode_if()
@@ -659,6 +718,18 @@ class CsvPath(CsvPathPublic, ErrorCollector, Printer): # pylint: disable=R0902,
659
718
  self.update_print_mode_if()
660
719
  self.update_explain_mode_if()
661
720
  self.update_arg_validation_mode_if()
721
+ self.update_unmatched_mode_if()
722
+ self.update_data_from_preceding_if()
723
+
724
+ def update_data_from_preceding_if(self) -> None:
725
+ if self.metadata and "source-mode" in self.metadata:
726
+ dfp = self.metadata["source-mode"]
727
+ self.data_from_preceding = dfp == "preceding"
728
+ else:
729
+ self.data_from_preceding = False
730
+
731
+ def update_unmatched_mode_if(self) -> None:
732
+ self.set_unmatched_availability()
662
733
 
663
734
  def update_arg_validation_mode_if(self) -> None:
664
735
  if self.metadata and "validation-mode" in self.metadata:
@@ -938,13 +1009,16 @@ class CsvPath(CsvPathPublic, ErrorCollector, Printer): # pylint: disable=R0902,
938
1009
  # collect(), fast_forward(), and next() are the central methods of CsvPath.
939
1010
  #
940
1011
  #
941
- def collect(self, nexts: int = -1) -> List[List[Any]]:
1012
+ def collect(self, csvpath: str = None, *, nexts: int = -1) -> List[List[Any]]:
942
1013
  """Runs the csvpath forward and returns the matching lines seen as
943
1014
  a list of lists"""
1015
+ if self.scanner is None and csvpath is not None:
1016
+ self.parse(csvpath)
944
1017
  if nexts < -1:
945
1018
  raise ProcessingException(
946
1019
  "Input must be >= -1. -1 means collect to the end of the file."
947
1020
  )
1021
+ self.collecting = True
948
1022
  lines = []
949
1023
  for _ in self.next():
950
1024
  _ = _[:]
@@ -957,17 +1031,21 @@ class CsvPath(CsvPathPublic, ErrorCollector, Printer): # pylint: disable=R0902,
957
1031
  break
958
1032
  return lines
959
1033
 
960
- def fast_forward(self) -> None:
1034
+ def fast_forward(self, csvpath=None) -> None:
961
1035
  """Runs the path for all rows of the file. Variables are collected
962
1036
  and side effects like print happen. No lines are collected.
963
1037
  """
1038
+ if self.scanner is None and csvpath is not None:
1039
+ self.parse(csvpath)
964
1040
  for _ in self.next():
965
1041
  pass
966
1042
 
967
- def next(self):
1043
+ def next(self, csvpath=None):
968
1044
  """Iterates over the lines in the CSV file returning those that match
969
1045
  the csvpath. collect() and fast_forward() call next() behind the scenes.
970
1046
  """
1047
+ if self.scanner is None and csvpath is not None:
1048
+ self.parse(csvpath)
971
1049
  start = time.time()
972
1050
  if self.run_mode is True:
973
1051
  for line in self._next_line():
@@ -983,6 +1061,12 @@ class CsvPath(CsvPathPublic, ErrorCollector, Printer): # pylint: disable=R0902,
983
1061
  self.logger.error(msg)
984
1062
  raise MatchException(msg)
985
1063
  yield line
1064
+ elif self.collecting and self.unmatched_available:
1065
+ if self.unmatched is None:
1066
+ self.unmatched = []
1067
+ line = self.limit_collection(line)
1068
+ # we aren't None and 0 checking as above. needed?
1069
+ self.unmatched.append(line)
986
1070
  if self.stopped:
987
1071
  self.logger.info(
988
1072
  "CsvPath has been stopped at line %s",
@@ -1018,14 +1102,7 @@ class CsvPath(CsvPathPublic, ErrorCollector, Printer): # pylint: disable=R0902,
1018
1102
  #
1019
1103
  if self.scanner.filename is None:
1020
1104
  raise FileException("There is no filename")
1021
- """
1022
- with open(self.scanner.filename, "r", encoding="utf-8") as file:
1023
- reader = csv.reader(
1024
- file, delimiter=self.delimiter, quotechar=self.quotechar
1025
- )
1026
- for line in reader:
1027
- """
1028
- reader = CsvDataFileReader(
1105
+ reader = DataFileReader(
1029
1106
  self.scanner.filename, delimiter=self.delimiter, quotechar=self.quotechar
1030
1107
  )
1031
1108
  for line in reader.next():
@@ -1173,7 +1250,12 @@ class CsvPath(CsvPathPublic, ErrorCollector, Printer): # pylint: disable=R0902,
1173
1250
  return line
1174
1251
  ls = []
1175
1252
  for k in self.limit_collection_to:
1176
- ls.append(line[k])
1253
+ if k is None or k >= len(line):
1254
+ raise InputException(
1255
+ f"[{self.identity}] Line {self.line_monitor.physical_line_number}: unknown header name: {k}"
1256
+ )
1257
+ else:
1258
+ ls.append(line[k])
1177
1259
  return ls
1178
1260
 
1179
1261
  def advance(self, ff: int = -1) -> None: