etlplus 0.12.9__tar.gz → 0.13.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (224) hide show
  1. {etlplus-0.12.9/etlplus.egg-info → etlplus-0.13.0}/PKG-INFO +94 -33
  2. {etlplus-0.12.9 → etlplus-0.13.0}/README.md +93 -32
  3. {etlplus-0.12.9 → etlplus-0.13.0}/docs/pipeline-guide.md +10 -0
  4. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/config/jobs.py +14 -4
  5. etlplus-0.13.0/etlplus/dag.py +103 -0
  6. etlplus-0.13.0/etlplus/file/accdb.py +78 -0
  7. etlplus-0.13.0/etlplus/file/arrow.py +78 -0
  8. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/file/avro.py +13 -1
  9. etlplus-0.13.0/etlplus/file/bson.py +77 -0
  10. etlplus-0.13.0/etlplus/file/cbor.py +78 -0
  11. etlplus-0.13.0/etlplus/file/cfg.py +79 -0
  12. etlplus-0.13.0/etlplus/file/conf.py +80 -0
  13. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/file/csv.py +13 -1
  14. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/file/dat.py +13 -1
  15. etlplus-0.13.0/etlplus/file/dta.py +77 -0
  16. etlplus-0.13.0/etlplus/file/duckdb.py +78 -0
  17. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/file/enums.py +11 -5
  18. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/file/feather.py +13 -1
  19. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/file/fwf.py +12 -1
  20. etlplus-0.13.0/etlplus/file/hbs.py +78 -0
  21. etlplus-0.13.0/etlplus/file/hdf5.py +78 -0
  22. etlplus-0.13.0/etlplus/file/ini.py +79 -0
  23. etlplus-0.13.0/etlplus/file/ion.py +78 -0
  24. etlplus-0.13.0/etlplus/file/jinja2.py +78 -0
  25. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/file/json.py +13 -1
  26. etlplus-0.13.0/etlplus/file/log.py +78 -0
  27. etlplus-0.13.0/etlplus/file/mat.py +78 -0
  28. etlplus-0.13.0/etlplus/file/mdb.py +78 -0
  29. etlplus-0.13.0/etlplus/file/msgpack.py +78 -0
  30. etlplus-0.13.0/etlplus/file/mustache.py +78 -0
  31. etlplus-0.13.0/etlplus/file/nc.py +78 -0
  32. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/file/ndjson.py +12 -6
  33. etlplus-0.13.0/etlplus/file/numbers.py +75 -0
  34. etlplus-0.13.0/etlplus/file/ods.py +79 -0
  35. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/file/orc.py +13 -1
  36. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/file/parquet.py +13 -1
  37. etlplus-0.13.0/etlplus/file/pb.py +78 -0
  38. etlplus-0.13.0/etlplus/file/pbf.py +77 -0
  39. etlplus-0.13.0/etlplus/file/properties.py +78 -0
  40. etlplus-0.13.0/etlplus/file/proto.py +77 -0
  41. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/file/psv.py +14 -1
  42. etlplus-0.13.0/etlplus/file/rda.py +78 -0
  43. etlplus-0.13.0/etlplus/file/rds.py +78 -0
  44. etlplus-0.13.0/etlplus/file/sas7bdat.py +78 -0
  45. etlplus-0.13.0/etlplus/file/sav.py +77 -0
  46. etlplus-0.13.0/etlplus/file/sqlite.py +78 -0
  47. etlplus-0.13.0/etlplus/file/sylk.py +77 -0
  48. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/file/tab.py +3 -4
  49. etlplus-0.13.0/etlplus/file/toml.py +78 -0
  50. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/file/tsv.py +14 -1
  51. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/file/txt.py +11 -1
  52. etlplus-0.13.0/etlplus/file/vm.py +78 -0
  53. etlplus-0.13.0/etlplus/file/wks.py +77 -0
  54. etlplus-0.13.0/etlplus/file/xlsm.py +79 -0
  55. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/file/xml.py +12 -1
  56. etlplus-0.13.0/etlplus/file/xpt.py +78 -0
  57. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/file/yaml.py +12 -1
  58. etlplus-0.13.0/etlplus/file/zsav.py +77 -0
  59. {etlplus-0.12.9 → etlplus-0.13.0/etlplus.egg-info}/PKG-INFO +94 -33
  60. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus.egg-info/SOURCES.txt +38 -0
  61. {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/file/test_u_file_core.py +38 -0
  62. {etlplus-0.12.9 → etlplus-0.13.0}/.coveragerc +0 -0
  63. {etlplus-0.12.9 → etlplus-0.13.0}/.editorconfig +0 -0
  64. {etlplus-0.12.9 → etlplus-0.13.0}/.gitattributes +0 -0
  65. {etlplus-0.12.9 → etlplus-0.13.0}/.github/actions/python-bootstrap/action.yml +0 -0
  66. {etlplus-0.12.9 → etlplus-0.13.0}/.github/workflows/ci.yml +0 -0
  67. {etlplus-0.12.9 → etlplus-0.13.0}/.gitignore +0 -0
  68. {etlplus-0.12.9 → etlplus-0.13.0}/.pre-commit-config.yaml +0 -0
  69. {etlplus-0.12.9 → etlplus-0.13.0}/.ruff.toml +0 -0
  70. {etlplus-0.12.9 → etlplus-0.13.0}/CODE_OF_CONDUCT.md +0 -0
  71. {etlplus-0.12.9 → etlplus-0.13.0}/CONTRIBUTING.md +0 -0
  72. {etlplus-0.12.9 → etlplus-0.13.0}/DEMO.md +0 -0
  73. {etlplus-0.12.9 → etlplus-0.13.0}/LICENSE +0 -0
  74. {etlplus-0.12.9 → etlplus-0.13.0}/MANIFEST.in +0 -0
  75. {etlplus-0.12.9 → etlplus-0.13.0}/Makefile +0 -0
  76. {etlplus-0.12.9 → etlplus-0.13.0}/REFERENCES.md +0 -0
  77. {etlplus-0.12.9 → etlplus-0.13.0}/SECURITY.md +0 -0
  78. {etlplus-0.12.9 → etlplus-0.13.0}/SUPPORT.md +0 -0
  79. {etlplus-0.12.9 → etlplus-0.13.0}/docs/README.md +0 -0
  80. {etlplus-0.12.9 → etlplus-0.13.0}/docs/snippets/installation_version.md +0 -0
  81. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/README.md +0 -0
  82. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/__init__.py +0 -0
  83. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/__main__.py +0 -0
  84. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/__version__.py +0 -0
  85. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/api/README.md +0 -0
  86. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/api/__init__.py +0 -0
  87. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/api/auth.py +0 -0
  88. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/api/config.py +0 -0
  89. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/api/endpoint_client.py +0 -0
  90. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/api/errors.py +0 -0
  91. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/api/pagination/__init__.py +0 -0
  92. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/api/pagination/client.py +0 -0
  93. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/api/pagination/config.py +0 -0
  94. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/api/pagination/paginator.py +0 -0
  95. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/api/rate_limiting/__init__.py +0 -0
  96. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/api/rate_limiting/config.py +0 -0
  97. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/api/rate_limiting/rate_limiter.py +0 -0
  98. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/api/request_manager.py +0 -0
  99. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/api/retry_manager.py +0 -0
  100. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/api/transport.py +0 -0
  101. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/api/types.py +0 -0
  102. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/cli/README.md +0 -0
  103. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/cli/__init__.py +0 -0
  104. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/cli/commands.py +0 -0
  105. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/cli/constants.py +0 -0
  106. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/cli/handlers.py +0 -0
  107. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/cli/io.py +0 -0
  108. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/cli/main.py +0 -0
  109. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/cli/options.py +0 -0
  110. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/cli/state.py +0 -0
  111. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/cli/types.py +0 -0
  112. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/config/README.md +0 -0
  113. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/config/__init__.py +0 -0
  114. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/config/connector.py +0 -0
  115. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/config/pipeline.py +0 -0
  116. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/config/profile.py +0 -0
  117. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/config/types.py +0 -0
  118. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/config/utils.py +0 -0
  119. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/database/README.md +0 -0
  120. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/database/__init__.py +0 -0
  121. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/database/ddl.py +0 -0
  122. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/database/engine.py +0 -0
  123. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/database/orm.py +0 -0
  124. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/database/schema.py +0 -0
  125. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/database/types.py +0 -0
  126. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/enums.py +0 -0
  127. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/extract.py +0 -0
  128. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/file/README.md +0 -0
  129. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/file/__init__.py +0 -0
  130. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/file/_imports.py +0 -0
  131. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/file/_io.py +0 -0
  132. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/file/core.py +0 -0
  133. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/file/gz.py +0 -0
  134. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/file/stub.py +0 -0
  135. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/file/xls.py +0 -0
  136. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/file/xlsx.py +0 -0
  137. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/file/zip.py +0 -0
  138. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/load.py +0 -0
  139. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/mixins.py +0 -0
  140. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/py.typed +0 -0
  141. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/run.py +0 -0
  142. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/run_helpers.py +0 -0
  143. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/templates/README.md +0 -0
  144. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/templates/__init__.py +0 -0
  145. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/templates/ddl.sql.j2 +0 -0
  146. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/templates/view.sql.j2 +0 -0
  147. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/transform.py +0 -0
  148. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/types.py +0 -0
  149. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/utils.py +0 -0
  150. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/validate.py +0 -0
  151. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/validation/README.md +0 -0
  152. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/validation/__init__.py +0 -0
  153. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/validation/utils.py +0 -0
  154. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus.egg-info/dependency_links.txt +0 -0
  155. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus.egg-info/entry_points.txt +0 -0
  156. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus.egg-info/requires.txt +0 -0
  157. {etlplus-0.12.9 → etlplus-0.13.0}/etlplus.egg-info/top_level.txt +0 -0
  158. {etlplus-0.12.9 → etlplus-0.13.0}/examples/README.md +0 -0
  159. {etlplus-0.12.9 → etlplus-0.13.0}/examples/configs/ddl_spec.yml +0 -0
  160. {etlplus-0.12.9 → etlplus-0.13.0}/examples/configs/pipeline.yml +0 -0
  161. {etlplus-0.12.9 → etlplus-0.13.0}/examples/data/sample.csv +0 -0
  162. {etlplus-0.12.9 → etlplus-0.13.0}/examples/data/sample.json +0 -0
  163. {etlplus-0.12.9 → etlplus-0.13.0}/examples/data/sample.xml +0 -0
  164. {etlplus-0.12.9 → etlplus-0.13.0}/examples/data/sample.xsd +0 -0
  165. {etlplus-0.12.9 → etlplus-0.13.0}/examples/data/sample.yaml +0 -0
  166. {etlplus-0.12.9 → etlplus-0.13.0}/examples/quickstart_python.py +0 -0
  167. {etlplus-0.12.9 → etlplus-0.13.0}/pyproject.toml +0 -0
  168. {etlplus-0.12.9 → etlplus-0.13.0}/pytest.ini +0 -0
  169. {etlplus-0.12.9 → etlplus-0.13.0}/setup.cfg +0 -0
  170. {etlplus-0.12.9 → etlplus-0.13.0}/setup.py +0 -0
  171. {etlplus-0.12.9 → etlplus-0.13.0}/tests/__init__.py +0 -0
  172. {etlplus-0.12.9 → etlplus-0.13.0}/tests/conftest.py +0 -0
  173. {etlplus-0.12.9 → etlplus-0.13.0}/tests/integration/conftest.py +0 -0
  174. {etlplus-0.12.9 → etlplus-0.13.0}/tests/integration/test_i_cli.py +0 -0
  175. {etlplus-0.12.9 → etlplus-0.13.0}/tests/integration/test_i_examples_data_parity.py +0 -0
  176. {etlplus-0.12.9 → etlplus-0.13.0}/tests/integration/test_i_pagination_strategy.py +0 -0
  177. {etlplus-0.12.9 → etlplus-0.13.0}/tests/integration/test_i_pipeline_smoke.py +0 -0
  178. {etlplus-0.12.9 → etlplus-0.13.0}/tests/integration/test_i_pipeline_yaml_load.py +0 -0
  179. {etlplus-0.12.9 → etlplus-0.13.0}/tests/integration/test_i_run.py +0 -0
  180. {etlplus-0.12.9 → etlplus-0.13.0}/tests/integration/test_i_run_profile_pagination_defaults.py +0 -0
  181. {etlplus-0.12.9 → etlplus-0.13.0}/tests/integration/test_i_run_profile_rate_limit_defaults.py +0 -0
  182. {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/api/conftest.py +0 -0
  183. {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/api/test_u_auth.py +0 -0
  184. {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/api/test_u_config.py +0 -0
  185. {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/api/test_u_endpoint_client.py +0 -0
  186. {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/api/test_u_mocks.py +0 -0
  187. {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/api/test_u_pagination_client.py +0 -0
  188. {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/api/test_u_pagination_config.py +0 -0
  189. {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/api/test_u_paginator.py +0 -0
  190. {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/api/test_u_rate_limit_config.py +0 -0
  191. {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/api/test_u_rate_limiter.py +0 -0
  192. {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/api/test_u_request_manager.py +0 -0
  193. {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/api/test_u_retry_manager.py +0 -0
  194. {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/api/test_u_transport.py +0 -0
  195. {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/api/test_u_types.py +0 -0
  196. {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/cli/conftest.py +0 -0
  197. {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/cli/test_u_cli_handlers.py +0 -0
  198. {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/cli/test_u_cli_io.py +0 -0
  199. {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/cli/test_u_cli_main.py +0 -0
  200. {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/cli/test_u_cli_state.py +0 -0
  201. {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/config/test_u_config_utils.py +0 -0
  202. {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/config/test_u_connector.py +0 -0
  203. {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/config/test_u_jobs.py +0 -0
  204. {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/config/test_u_pipeline.py +0 -0
  205. {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/conftest.py +0 -0
  206. {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/database/test_u_database_ddl.py +0 -0
  207. {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/database/test_u_database_engine.py +0 -0
  208. {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/database/test_u_database_orm.py +0 -0
  209. {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/database/test_u_database_schema.py +0 -0
  210. {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/file/test_u_file_enums.py +0 -0
  211. {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/file/test_u_file_yaml.py +0 -0
  212. {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/test_u_enums.py +0 -0
  213. {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/test_u_extract.py +0 -0
  214. {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/test_u_load.py +0 -0
  215. {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/test_u_main.py +0 -0
  216. {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/test_u_mixins.py +0 -0
  217. {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/test_u_run.py +0 -0
  218. {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/test_u_run_helpers.py +0 -0
  219. {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/test_u_transform.py +0 -0
  220. {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/test_u_utils.py +0 -0
  221. {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/test_u_validate.py +0 -0
  222. {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/test_u_version.py +0 -0
  223. {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/validation/test_u_validation_utils.py +0 -0
  224. {etlplus-0.12.9 → etlplus-0.13.0}/tools/update_demo_snippets.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: etlplus
3
- Version: 0.12.9
3
+ Version: 0.13.0
4
4
  Summary: A Swiss Army knife for simple ETL operations
5
5
  Home-page: https://github.com/Dagitali/ETLPlus
6
6
  Author: ETLPlus Team
@@ -77,8 +77,12 @@ package and command-line interface for data extraction, validation, transformati
77
77
  - [Semi-Structured Text](#semi-structured-text)
78
78
  - [Columnar / Analytics-Friendly](#columnar--analytics-friendly)
79
79
  - [Binary Serialization and Interchange](#binary-serialization-and-interchange)
80
+ - [Databases and Embedded Storage](#databases-and-embedded-storage)
80
81
  - [Spreadsheets](#spreadsheets)
82
+ - [Statistical / Scientific / Numeric Computing](#statistical--scientific--numeric-computing)
83
+ - [Logs and Event Streams](#logs-and-event-streams)
81
84
  - [Data Archives](#data-archives)
85
+ - [Templates](#templates)
82
86
  - [Usage](#usage)
83
87
  - [Command Line Interface](#command-line-interface)
84
88
  - [Argument Order and Required Options](#argument-order-and-required-options)
@@ -219,65 +223,122 @@ DDL can be rendered from table specs for migrations or schema checks.
219
223
 
220
224
  ### Files (`file`)
221
225
 
222
- File formats are grouped as in `FileFormat`. Support is marked as:
226
+ Recognized file formats are listed in the tables below. Support for reading to or writing from a recognized file format is marked as:
223
227
 
224
228
  - **Y**: implemented (may require optional dependencies)
225
229
  - **N**: stubbed or not yet implemented
226
230
 
227
231
  #### Stubbed / Placeholder
228
232
 
229
- | Format | Supported | Description |
230
- | --- | --- | --- |
233
+ | Format | Read | Write | Description |
234
+ | --- | --- | --- | --- |
231
235
  | `stub` | N | Placeholder format for tests and future connectors. |
232
236
 
233
237
  #### Tabular & Delimited Text
234
238
 
235
- | Format | Supported | Description |
236
- | --- | --- | --- |
237
- | `csv` | Y | Comma-Separated Values |
238
- | `fwf` | N | Fixed-Width Fields |
239
- | `dat` | N | Generic data file, often delimited or fixed-width |
240
- | `psv` | N | Pipe-Separated Values |
241
- | `tab` | N | Often synonymous with TSV |
242
- | `tsv` | Y | Tab-Separated Values |
243
- | `txt` | Y | Plain text, often delimited or fixed-width |
239
+ | Format | Read | Write | Description |
240
+ | --- | --- | --- | --- |
241
+ | `csv` | Y | Y | Comma-Separated Values |
242
+ | `dat` | N | N | Generic data file, often delimited or fixed-width |
243
+ | `fwf` | N | N | Fixed-Width Fields |
244
+ | `psv` | N | N | Pipe-Separated Values |
245
+ | `tab` | N | N | Often synonymous with TSV |
246
+ | `tsv` | Y | Y | Tab-Separated Values |
247
+ | `txt` | Y | Y | Plain text, often delimited or fixed-width |
244
248
 
245
249
  #### Semi-Structured Text
246
250
 
247
- | Format | Supported | Description |
248
- | --- | --- | --- |
249
- | `json` | Y | JavaScript Object Notation |
250
- | `ndjson` | Y | Newline-Delimited JSON |
251
- | `xml` | Y | Extensible Markup Language |
252
- | `yaml` | Y | YAML Ain't Markup Language |
251
+ | Format | Read | Write | Description |
252
+ | --- | --- | --- | --- |
253
+ | `cfg` | N | N | Config-style key-value pairs |
254
+ | `conf` | N | N | Config-style key-value pairs |
255
+ | `ini` | N | N | Config-style key-value pairs |
256
+ | `json` | Y | Y | JavaScript Object Notation |
257
+ | `ndjson` | Y | Y | Newline-Delimited JSON |
258
+ | `properties` | N | N | Java-style key-value pairs |
259
+ | `toml` | N | N | Tom's Obvious Minimal Language |
260
+ | `xml` | Y | Y | Extensible Markup Language |
261
+ | `yaml` | Y | Y | YAML Ain't Markup Language |
253
262
 
254
263
  #### Columnar / Analytics-Friendly
255
264
 
256
- | Format | Supported | Description |
257
- | --- | --- | --- |
258
- | `feather` | Y | Apache Arrow Feather |
259
- | `orc` | Y | Optimized Row Columnar; common in Hadoop |
260
- | `parquet` | Y | Apache Parquet; common in Big Data |
265
+ | Format | Read | Write | Description |
266
+ | --- | --- | --- | --- |
267
+ | `arrow` | N | N | Apache Arrow IPC |
268
+ | `feather` | Y | Y | Apache Arrow Feather |
269
+ | `orc` | Y | Y | Optimized Row Columnar; common in Hadoop |
270
+ | `parquet` | Y | Y | Apache Parquet; common in Big Data |
261
271
 
262
272
  #### Binary Serialization and Interchange
263
273
 
264
- | Format | Supported | Description |
265
- | --- | --- | --- |
266
- | `avro` | Y | Apache Avro |
274
+ | Format | Read | Write | Description |
275
+ | --- | --- | --- | --- |
276
+ | `avro` | Y | Y | Apache Avro |
277
+ | `bson` | N | N | Binary JSON; common with MongoDB exports/dumps |
278
+ | `cbor` | N | N | Concise Binary Object Representation |
279
+ | `ion` | N | N | Amazon Ion |
280
+ | `msgpack` | N | N | MessagePack |
281
+ | `pb` | N | N | Protocol Buffers (Google Protobuf) |
282
+ | `pbf` | N | N | Protocolbuffer Binary Format; often for GIS data |
283
+ | `proto` | N | N | Protocol Buffers schema; often in .pb / .bin |
284
+
285
+ #### Databases and Embedded Storage
286
+
287
+ | Format | Read | Write | Description |
288
+ | --- | --- | --- | --- |
289
+ | `accdb` | N | N | Microsoft Access (newer format) |
290
+ | `duckdb` | N | N | DuckDB |
291
+ | `mdb` | N | N | Microsoft Access (older format) |
292
+ | `sqlite` | N | N | SQLite |
267
293
 
268
294
  #### Spreadsheets
269
295
 
296
+ | Format | Read | Write | Description |
297
+ | --- | --- | --- | --- |
298
+ | `numbers` | N | N | Apple Numbers |
299
+ | `ods` | N | N | OpenDocument |
300
+ | `wks` | N | N | Lotus 1-2-3 |
301
+ | `xls` | Y | Y | Microsoft Excel (BIFF) |
302
+ | `xlsm` | N | N | Microsoft Excel Macro-Enabled (Open XML) |
303
+ | `xlsx` | Y | Y | Microsoft Excel (Open XML) |
304
+
305
+ #### Statistical / Scientific / Numeric Computing
306
+
307
+ | Format | Read | Write | Description |
308
+ | --- | --- | --- | --- |
309
+ | `dta` | N | N | Stata |
310
+ | `hdf5` | N | N | Hierarchical Data Format |
311
+ | `mat` | N | N | MATLAB |
312
+ | `nc` | N | N | NetCDF |
313
+ | `rda` | N | N | RData workspace/object |
314
+ | `rds` | N | N | R data |
315
+ | `sas7bdat` | N | N | SAS data |
316
+ | `sav` | N | N | SPSS data |
317
+ | `sylk` | N | N | Symbolic Link |
318
+ | `xpt` | N | N | SAS Transport |
319
+ | `zsav` | N | N | Compressed SPSS data |
320
+
321
+ #### Logs and Event Streams
322
+
270
323
  | Format | Supported | Description |
271
324
  | --- | --- | --- |
272
- | `xls` | Y | Microsoft Excel (BIFF); read-only |
273
- | `xlsx` | Y | Microsoft Excel (Open XML) |
325
+ | `log` | N | N | Generic log file |
274
326
 
275
327
  #### Data Archives
276
328
 
277
- | Format | Supported | Description |
278
- | --- | --- | --- |
279
- | `gz` | Y | Gzip-compressed file |
280
- | `zip` | Y | ZIP archive |
329
+ | Format | Read | Write | Description |
330
+ | --- | --- | --- | --- |
331
+ | `gz` | Y | Y | Gzip-compressed file |
332
+ | `zip` | Y | Y | ZIP archive |
333
+
334
+ #### Templates
335
+
336
+ | Format | Read | Write | Description |
337
+ | --- | --- | --- | --- |
338
+ | `hbs` | N | N | Handlebars |
339
+ | `jinja2` | N | N | Jinja2 |
340
+ | `mustache` | N | N | Mustache |
341
+ | `vm` | N | N | Apache Velocity |
281
342
 
282
343
  ## Usage
283
344
 
@@ -27,8 +27,12 @@ package and command-line interface for data extraction, validation, transformati
27
27
  - [Semi-Structured Text](#semi-structured-text)
28
28
  - [Columnar / Analytics-Friendly](#columnar--analytics-friendly)
29
29
  - [Binary Serialization and Interchange](#binary-serialization-and-interchange)
30
+ - [Databases and Embedded Storage](#databases-and-embedded-storage)
30
31
  - [Spreadsheets](#spreadsheets)
32
+ - [Statistical / Scientific / Numeric Computing](#statistical--scientific--numeric-computing)
33
+ - [Logs and Event Streams](#logs-and-event-streams)
31
34
  - [Data Archives](#data-archives)
35
+ - [Templates](#templates)
32
36
  - [Usage](#usage)
33
37
  - [Command Line Interface](#command-line-interface)
34
38
  - [Argument Order and Required Options](#argument-order-and-required-options)
@@ -169,65 +173,122 @@ DDL can be rendered from table specs for migrations or schema checks.
169
173
 
170
174
  ### Files (`file`)
171
175
 
172
- File formats are grouped as in `FileFormat`. Support is marked as:
176
+ Recognized file formats are listed in the tables below. Support for reading to or writing from a recognized file format is marked as:
173
177
 
174
178
  - **Y**: implemented (may require optional dependencies)
175
179
  - **N**: stubbed or not yet implemented
176
180
 
177
181
  #### Stubbed / Placeholder
178
182
 
179
- | Format | Supported | Description |
180
- | --- | --- | --- |
183
+ | Format | Read | Write | Description |
184
+ | --- | --- | --- | --- |
181
185
  | `stub` | N | Placeholder format for tests and future connectors. |
182
186
 
183
187
  #### Tabular & Delimited Text
184
188
 
185
- | Format | Supported | Description |
186
- | --- | --- | --- |
187
- | `csv` | Y | Comma-Separated Values |
188
- | `fwf` | N | Fixed-Width Fields |
189
- | `dat` | N | Generic data file, often delimited or fixed-width |
190
- | `psv` | N | Pipe-Separated Values |
191
- | `tab` | N | Often synonymous with TSV |
192
- | `tsv` | Y | Tab-Separated Values |
193
- | `txt` | Y | Plain text, often delimited or fixed-width |
189
+ | Format | Read | Write | Description |
190
+ | --- | --- | --- | --- |
191
+ | `csv` | Y | Y | Comma-Separated Values |
192
+ | `dat` | N | N | Generic data file, often delimited or fixed-width |
193
+ | `fwf` | N | N | Fixed-Width Fields |
194
+ | `psv` | N | N | Pipe-Separated Values |
195
+ | `tab` | N | N | Often synonymous with TSV |
196
+ | `tsv` | Y | Y | Tab-Separated Values |
197
+ | `txt` | Y | Y | Plain text, often delimited or fixed-width |
194
198
 
195
199
  #### Semi-Structured Text
196
200
 
197
- | Format | Supported | Description |
198
- | --- | --- | --- |
199
- | `json` | Y | JavaScript Object Notation |
200
- | `ndjson` | Y | Newline-Delimited JSON |
201
- | `xml` | Y | Extensible Markup Language |
202
- | `yaml` | Y | YAML Ain't Markup Language |
201
+ | Format | Read | Write | Description |
202
+ | --- | --- | --- | --- |
203
+ | `cfg` | N | N | Config-style key-value pairs |
204
+ | `conf` | N | N | Config-style key-value pairs |
205
+ | `ini` | N | N | Config-style key-value pairs |
206
+ | `json` | Y | Y | JavaScript Object Notation |
207
+ | `ndjson` | Y | Y | Newline-Delimited JSON |
208
+ | `properties` | N | N | Java-style key-value pairs |
209
+ | `toml` | N | N | Tom's Obvious Minimal Language |
210
+ | `xml` | Y | Y | Extensible Markup Language |
211
+ | `yaml` | Y | Y | YAML Ain't Markup Language |
203
212
 
204
213
  #### Columnar / Analytics-Friendly
205
214
 
206
- | Format | Supported | Description |
207
- | --- | --- | --- |
208
- | `feather` | Y | Apache Arrow Feather |
209
- | `orc` | Y | Optimized Row Columnar; common in Hadoop |
210
- | `parquet` | Y | Apache Parquet; common in Big Data |
215
+ | Format | Read | Write | Description |
216
+ | --- | --- | --- | --- |
217
+ | `arrow` | N | N | Apache Arrow IPC |
218
+ | `feather` | Y | Y | Apache Arrow Feather |
219
+ | `orc` | Y | Y | Optimized Row Columnar; common in Hadoop |
220
+ | `parquet` | Y | Y | Apache Parquet; common in Big Data |
211
221
 
212
222
  #### Binary Serialization and Interchange
213
223
 
214
- | Format | Supported | Description |
215
- | --- | --- | --- |
216
- | `avro` | Y | Apache Avro |
224
+ | Format | Read | Write | Description |
225
+ | --- | --- | --- | --- |
226
+ | `avro` | Y | Y | Apache Avro |
227
+ | `bson` | N | N | Binary JSON; common with MongoDB exports/dumps |
228
+ | `cbor` | N | N | Concise Binary Object Representation |
229
+ | `ion` | N | N | Amazon Ion |
230
+ | `msgpack` | N | N | MessagePack |
231
+ | `pb` | N | N | Protocol Buffers (Google Protobuf) |
232
+ | `pbf` | N | N | Protocolbuffer Binary Format; often for GIS data |
233
+ | `proto` | N | N | Protocol Buffers schema; often in .pb / .bin |
234
+
235
+ #### Databases and Embedded Storage
236
+
237
+ | Format | Read | Write | Description |
238
+ | --- | --- | --- | --- |
239
+ | `accdb` | N | N | Microsoft Access (newer format) |
240
+ | `duckdb` | N | N | DuckDB |
241
+ | `mdb` | N | N | Microsoft Access (older format) |
242
+ | `sqlite` | N | N | SQLite |
217
243
 
218
244
  #### Spreadsheets
219
245
 
246
+ | Format | Read | Write | Description |
247
+ | --- | --- | --- | --- |
248
+ | `numbers` | N | N | Apple Numbers |
249
+ | `ods` | N | N | OpenDocument |
250
+ | `wks` | N | N | Lotus 1-2-3 |
251
+ | `xls` | Y | Y | Microsoft Excel (BIFF) |
252
+ | `xlsm` | N | N | Microsoft Excel Macro-Enabled (Open XML) |
253
+ | `xlsx` | Y | Y | Microsoft Excel (Open XML) |
254
+
255
+ #### Statistical / Scientific / Numeric Computing
256
+
257
+ | Format | Read | Write | Description |
258
+ | --- | --- | --- | --- |
259
+ | `dta` | N | N | Stata |
260
+ | `hdf5` | N | N | Hierarchical Data Format |
261
+ | `mat` | N | N | MATLAB |
262
+ | `nc` | N | N | NetCDF |
263
+ | `rda` | N | N | RData workspace/object |
264
+ | `rds` | N | N | R data |
265
+ | `sas7bdat` | N | N | SAS data |
266
+ | `sav` | N | N | SPSS data |
267
+ | `sylk` | N | N | Symbolic Link |
268
+ | `xpt` | N | N | SAS Transport |
269
+ | `zsav` | N | N | Compressed SPSS data |
270
+
271
+ #### Logs and Event Streams
272
+
220
273
  | Format | Supported | Description |
221
274
  | --- | --- | --- |
222
- | `xls` | Y | Microsoft Excel (BIFF); read-only |
223
- | `xlsx` | Y | Microsoft Excel (Open XML) |
275
+ | `log` | N | N | Generic log file |
224
276
 
225
277
  #### Data Archives
226
278
 
227
- | Format | Supported | Description |
228
- | --- | --- | --- |
229
- | `gz` | Y | Gzip-compressed file |
230
- | `zip` | Y | ZIP archive |
279
+ | Format | Read | Write | Description |
280
+ | --- | --- | --- | --- |
281
+ | `gz` | Y | Y | Gzip-compressed file |
282
+ | `zip` | Y | Y | ZIP archive |
283
+
284
+ #### Templates
285
+
286
+ | Format | Read | Write | Description |
287
+ | --- | --- | --- | --- |
288
+ | `hbs` | N | N | Handlebars |
289
+ | `jinja2` | N | N | Jinja2 |
290
+ | `mustache` | N | N | Mustache |
291
+ | `vm` | N | N | Apache Velocity |
231
292
 
232
293
  ## Usage
233
294
 
@@ -390,10 +390,20 @@ target:
390
390
  ```yaml
391
391
  jobs:
392
392
  - name: file_to_file_customers
393
+ depends_on: [seed_customers]
393
394
  extract: { source: customers_csv }
394
395
  validate: { ruleset: customers_basic }
395
396
  transform: { pipeline: clean_customers }
396
397
  load: { target: customers_json_out }
398
+ - name: seed_customers
399
+ extract: { source: seed_customers_csv }
400
+ load: { target: customers_db_out }
401
+ ```
402
+
403
+ Notes:
404
+
405
+ - `depends_on` is optional and can be a string or list of job names.
406
+ - Jobs without dependencies run first when ordered as a DAG.
397
407
 
398
408
  ## Running pipelines (CLI and Python)
399
409
 
@@ -34,10 +34,7 @@ __all__ = [
34
34
  ]
35
35
 
36
36
 
37
- # SECTION: TYPE ALIASES ===================================================== #
38
-
39
-
40
- # SECTION: CLASSES ========================================================== #
37
+ # SECTION: DATA CLASSES ===================================================== #
41
38
 
42
39
 
43
40
  @dataclass(kw_only=True, slots=True)
@@ -100,6 +97,8 @@ class JobConfig:
100
97
  Unique job name.
101
98
  description : str | None
102
99
  Optional human-friendly description.
100
+ depends_on : list[str]
101
+ Optional job dependency list. Dependencies must refer to other jobs.
103
102
  extract : ExtractRef | None
104
103
  Extraction reference.
105
104
  validate : ValidationRef | None
@@ -114,6 +113,7 @@ class JobConfig:
114
113
 
115
114
  name: str
116
115
  description: str | None = None
116
+ depends_on: list[str] = field(default_factory=list)
117
117
  extract: ExtractRef | None = None
118
118
  validate: ValidationRef | None = None
119
119
  transform: TransformRef | None = None
@@ -149,9 +149,19 @@ class JobConfig:
149
149
  if description is not None and not isinstance(description, str):
150
150
  description = str(description)
151
151
 
152
+ depends_raw = data.get('depends_on')
153
+ depends_on: list[str] = []
154
+ if isinstance(depends_raw, str):
155
+ depends_on = [depends_raw]
156
+ elif isinstance(depends_raw, list):
157
+ for entry in depends_raw:
158
+ if isinstance(entry, str):
159
+ depends_on.append(entry)
160
+
152
161
  return cls(
153
162
  name=name,
154
163
  description=description,
164
+ depends_on=depends_on,
155
165
  extract=ExtractRef.from_obj(data.get('extract')),
156
166
  validate=ValidationRef.from_obj(data.get('validate')),
157
167
  transform=TransformRef.from_obj(data.get('transform')),
@@ -0,0 +1,103 @@
1
+ """
2
+ :mod:`etlplus.dag` module.
3
+
4
+ Lightweight directed acyclic graph (DAG) helpers for ordering jobs based on
5
+ ``depends_on``.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from collections import deque
11
+ from dataclasses import dataclass
12
+
13
+ from .config.jobs import JobConfig
14
+
15
+ # SECTION: EXPORTS ========================================================== #
16
+
17
+
18
+ __all__ = [
19
+ 'DagError',
20
+ 'topological_sort_jobs',
21
+ ]
22
+
23
+
24
+ # SECTION: ERRORS =========================================================== #
25
+
26
+
27
+ @dataclass(slots=True)
28
+ class DagError(ValueError):
29
+ """
30
+ Raised when the job dependency graph is invalid.
31
+
32
+ Attributes
33
+ ----------
34
+ message : str
35
+ Error message.
36
+ """
37
+
38
+ # -- Attributes -- #
39
+
40
+ message: str
41
+
42
+ # -- Magic Methods (Object Representation) -- #
43
+
44
+ def __str__(self) -> str:
45
+ return self.message
46
+
47
+
48
+ # SECTION: FUNCTIONS ======================================================== #
49
+
50
+
51
+ def topological_sort_jobs(
52
+ jobs: list[JobConfig],
53
+ ) -> list[JobConfig]:
54
+ """
55
+ Return jobs in topological order based on ``depends_on``.
56
+
57
+ Parameters
58
+ ----------
59
+ jobs : list[JobConfig]
60
+ List of job configurations to sort.
61
+
62
+ Returns
63
+ -------
64
+ list[JobConfig]
65
+ Jobs sorted in topological order.
66
+
67
+ Raises
68
+ ------
69
+ DagError
70
+ If a dependency is missing, self-referential, or when a cycle is
71
+ detected.
72
+ """
73
+ index = {job.name: job for job in jobs}
74
+ edges: dict[str, set[str]] = {name: set() for name in index}
75
+ indegree: dict[str, int] = {name: 0 for name in index}
76
+
77
+ for job in jobs:
78
+ for dep in job.depends_on:
79
+ if dep not in index:
80
+ raise DagError(
81
+ f'Unknown dependency "{dep}" in job "{job.name}"',
82
+ )
83
+ if dep == job.name:
84
+ raise DagError(f'Job "{job.name}" depends on itself')
85
+ if job.name not in edges[dep]:
86
+ edges[dep].add(job.name)
87
+ indegree[job.name] += 1
88
+
89
+ queue = deque(sorted(name for name, deg in indegree.items() if deg == 0))
90
+ ordered: list[str] = []
91
+
92
+ while queue:
93
+ name = queue.popleft()
94
+ ordered.append(name)
95
+ for child in sorted(edges[name]):
96
+ indegree[child] -= 1
97
+ if indegree[child] == 0:
98
+ queue.append(child)
99
+
100
+ if len(ordered) != len(jobs):
101
+ raise DagError('Dependency cycle detected')
102
+
103
+ return [index[name] for name in ordered]
@@ -0,0 +1,78 @@
1
+ """
2
+ :mod:`etlplus.file.accdb` module.
3
+
4
+ Helpers for reading/writing newer Microsoft Access database (ACCDB) files.
5
+
6
+ Notes
7
+ -----
8
+ - An ACCDB file is a proprietary database file format used by Microsoft Access
9
+ 2007 and later.
10
+ - Common cases:
11
+ - Storing relational data for small to medium-sized applications.
12
+ - Desktop database applications.
13
+ - Data management for non-enterprise solutions.
14
+ - Rule of thumb:
15
+ - If the file follows the ACCDB specification, use this module for reading
16
+ and writing.
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ from pathlib import Path
22
+
23
+ from ..types import JSONData
24
+ from ..types import JSONList
25
+ from . import stub
26
+
27
+ # SECTION: EXPORTS ========================================================== #
28
+
29
+
30
+ __all__ = [
31
+ 'read',
32
+ 'write',
33
+ ]
34
+
35
+
36
+ # SECTION: FUNCTIONS ======================================================== #
37
+
38
+
39
+ def read(
40
+ path: Path,
41
+ ) -> JSONList:
42
+ """
43
+ Read ACCDB content from ``path``.
44
+
45
+ Parameters
46
+ ----------
47
+ path : Path
48
+ Path to the ACCDB file on disk.
49
+
50
+ Returns
51
+ -------
52
+ JSONList
53
+ The list of dictionaries read from the ACCDB file.
54
+ """
55
+ return stub.read(path, format_name='ACCDB')
56
+
57
+
58
+ def write(
59
+ path: Path,
60
+ data: JSONData,
61
+ ) -> int:
62
+ """
63
+ Write ``data`` to ACCDB at ``path`` and return record count.
64
+
65
+ Parameters
66
+ ----------
67
+ path : Path
68
+ Path to the ACCDB file on disk.
69
+ data : JSONData
70
+ Data to write as ACCDB. Should be a list of dictionaries or a single
71
+ dictionary.
72
+
73
+ Returns
74
+ -------
75
+ int
76
+ The number of rows written to the ACCDB file.
77
+ """
78
+ return stub.write(path, data, format_name='ACCDB')
@@ -0,0 +1,78 @@
1
+ """
2
+ :mod:`etlplus.file.arrow` module.
3
+
4
+ Helpers for reading/writing Apache Arrow (ARROW) files.
5
+
6
+ Notes
7
+ -----
8
+ - An ARROW file is a binary file format designed for efficient
9
+ columnar data storage and processing.
10
+ - Common cases:
11
+ - High-performance data analytics.
12
+ - Interoperability between different data processing systems.
13
+ - In-memory data representation for fast computations.
14
+ - Rule of thumb:
15
+ - If the file follows the Apache Arrow specification, use this module for
16
+ reading and writing.
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ from pathlib import Path
22
+
23
+ from ..types import JSONData
24
+ from ..types import JSONList
25
+ from . import stub
26
+
27
+ # SECTION: EXPORTS ========================================================== #
28
+
29
+
30
+ __all__ = [
31
+ 'read',
32
+ 'write',
33
+ ]
34
+
35
+
36
+ # SECTION: FUNCTIONS ======================================================== #
37
+
38
+
39
+ def read(
40
+ path: Path,
41
+ ) -> JSONList:
42
+ """
43
+ Read ARROW content from ``path``.
44
+
45
+ Parameters
46
+ ----------
47
+ path : Path
48
+ Path to the Apache Arrow file on disk.
49
+
50
+ Returns
51
+ -------
52
+ JSONList
53
+ The list of dictionaries read from the Apache Arrow file.
54
+ """
55
+ return stub.read(path, format_name='ARROW')
56
+
57
+
58
+ def write(
59
+ path: Path,
60
+ data: JSONData,
61
+ ) -> int:
62
+ """
63
+ Write ``data`` to ARROW at ``path`` and return record count.
64
+
65
+ Parameters
66
+ ----------
67
+ path : Path
68
+ Path to the ARROW file on disk.
69
+ data : JSONData
70
+ Data to write as ARROW. Should be a list of dictionaries or a
71
+ single dictionary.
72
+
73
+ Returns
74
+ -------
75
+ int
76
+ The number of rows written to the ARROW file.
77
+ """
78
+ return stub.write(path, data, format_name='ARROW')