etlplus 0.11.12__tar.gz → 0.12.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (193) hide show
  1. {etlplus-0.11.12/etlplus.egg-info → etlplus-0.12.3}/PKG-INFO +6 -1
  2. etlplus-0.12.3/etlplus/file/_io.py +120 -0
  3. etlplus-0.12.3/etlplus/file/_pandas.py +58 -0
  4. etlplus-0.12.3/etlplus/file/avro.py +186 -0
  5. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/file/core.py +3 -3
  6. etlplus-0.12.3/etlplus/file/csv.py +67 -0
  7. etlplus-0.12.3/etlplus/file/feather.py +99 -0
  8. etlplus-0.12.3/etlplus/file/gz.py +123 -0
  9. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/file/json.py +13 -2
  10. etlplus-0.12.3/etlplus/file/ndjson.py +109 -0
  11. etlplus-0.12.3/etlplus/file/orc.py +99 -0
  12. etlplus-0.12.3/etlplus/file/parquet.py +101 -0
  13. etlplus-0.12.3/etlplus/file/tsv.py +67 -0
  14. etlplus-0.12.3/etlplus/file/txt.py +99 -0
  15. etlplus-0.12.3/etlplus/file/xls.py +88 -0
  16. etlplus-0.12.3/etlplus/file/xlsx.py +99 -0
  17. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/file/xml.py +12 -3
  18. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/file/yaml.py +13 -2
  19. etlplus-0.12.3/etlplus/file/zip.py +175 -0
  20. {etlplus-0.11.12 → etlplus-0.12.3/etlplus.egg-info}/PKG-INFO +6 -1
  21. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus.egg-info/SOURCES.txt +2 -0
  22. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus.egg-info/requires.txt +5 -0
  23. {etlplus-0.11.12 → etlplus-0.12.3}/pyproject.toml +5 -0
  24. {etlplus-0.11.12 → etlplus-0.12.3}/setup.py +5 -0
  25. {etlplus-0.11.12 → etlplus-0.12.3}/tests/unit/file/test_u_file_core.py +254 -57
  26. etlplus-0.11.12/etlplus/file/avro.py +0 -59
  27. etlplus-0.11.12/etlplus/file/csv.py +0 -82
  28. etlplus-0.11.12/etlplus/file/feather.py +0 -59
  29. etlplus-0.11.12/etlplus/file/gz.py +0 -59
  30. etlplus-0.11.12/etlplus/file/ndjson.py +0 -59
  31. etlplus-0.11.12/etlplus/file/orc.py +0 -59
  32. etlplus-0.11.12/etlplus/file/parquet.py +0 -59
  33. etlplus-0.11.12/etlplus/file/tsv.py +0 -59
  34. etlplus-0.11.12/etlplus/file/txt.py +0 -59
  35. etlplus-0.11.12/etlplus/file/xls.py +0 -59
  36. etlplus-0.11.12/etlplus/file/xlsx.py +0 -59
  37. etlplus-0.11.12/etlplus/file/zip.py +0 -49
  38. {etlplus-0.11.12 → etlplus-0.12.3}/.coveragerc +0 -0
  39. {etlplus-0.11.12 → etlplus-0.12.3}/.editorconfig +0 -0
  40. {etlplus-0.11.12 → etlplus-0.12.3}/.gitattributes +0 -0
  41. {etlplus-0.11.12 → etlplus-0.12.3}/.github/actions/python-bootstrap/action.yml +0 -0
  42. {etlplus-0.11.12 → etlplus-0.12.3}/.github/workflows/ci.yml +0 -0
  43. {etlplus-0.11.12 → etlplus-0.12.3}/.gitignore +0 -0
  44. {etlplus-0.11.12 → etlplus-0.12.3}/.pre-commit-config.yaml +0 -0
  45. {etlplus-0.11.12 → etlplus-0.12.3}/.ruff.toml +0 -0
  46. {etlplus-0.11.12 → etlplus-0.12.3}/CODE_OF_CONDUCT.md +0 -0
  47. {etlplus-0.11.12 → etlplus-0.12.3}/CONTRIBUTING.md +0 -0
  48. {etlplus-0.11.12 → etlplus-0.12.3}/DEMO.md +0 -0
  49. {etlplus-0.11.12 → etlplus-0.12.3}/LICENSE +0 -0
  50. {etlplus-0.11.12 → etlplus-0.12.3}/MANIFEST.in +0 -0
  51. {etlplus-0.11.12 → etlplus-0.12.3}/Makefile +0 -0
  52. {etlplus-0.11.12 → etlplus-0.12.3}/README.md +0 -0
  53. {etlplus-0.11.12 → etlplus-0.12.3}/REFERENCES.md +0 -0
  54. {etlplus-0.11.12 → etlplus-0.12.3}/SECURITY.md +0 -0
  55. {etlplus-0.11.12 → etlplus-0.12.3}/SUPPORT.md +0 -0
  56. {etlplus-0.11.12 → etlplus-0.12.3}/docs/README.md +0 -0
  57. {etlplus-0.11.12 → etlplus-0.12.3}/docs/pipeline-guide.md +0 -0
  58. {etlplus-0.11.12 → etlplus-0.12.3}/docs/snippets/installation_version.md +0 -0
  59. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/README.md +0 -0
  60. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/__init__.py +0 -0
  61. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/__main__.py +0 -0
  62. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/__version__.py +0 -0
  63. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/api/README.md +0 -0
  64. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/api/__init__.py +0 -0
  65. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/api/auth.py +0 -0
  66. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/api/config.py +0 -0
  67. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/api/endpoint_client.py +0 -0
  68. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/api/errors.py +0 -0
  69. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/api/pagination/__init__.py +0 -0
  70. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/api/pagination/client.py +0 -0
  71. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/api/pagination/config.py +0 -0
  72. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/api/pagination/paginator.py +0 -0
  73. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/api/rate_limiting/__init__.py +0 -0
  74. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/api/rate_limiting/config.py +0 -0
  75. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/api/rate_limiting/rate_limiter.py +0 -0
  76. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/api/request_manager.py +0 -0
  77. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/api/retry_manager.py +0 -0
  78. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/api/transport.py +0 -0
  79. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/api/types.py +0 -0
  80. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/cli/README.md +0 -0
  81. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/cli/__init__.py +0 -0
  82. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/cli/commands.py +0 -0
  83. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/cli/constants.py +0 -0
  84. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/cli/handlers.py +0 -0
  85. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/cli/io.py +0 -0
  86. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/cli/main.py +0 -0
  87. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/cli/options.py +0 -0
  88. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/cli/state.py +0 -0
  89. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/cli/types.py +0 -0
  90. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/config/README.md +0 -0
  91. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/config/__init__.py +0 -0
  92. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/config/connector.py +0 -0
  93. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/config/jobs.py +0 -0
  94. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/config/pipeline.py +0 -0
  95. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/config/profile.py +0 -0
  96. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/config/types.py +0 -0
  97. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/config/utils.py +0 -0
  98. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/database/README.md +0 -0
  99. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/database/__init__.py +0 -0
  100. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/database/ddl.py +0 -0
  101. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/database/engine.py +0 -0
  102. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/database/orm.py +0 -0
  103. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/database/schema.py +0 -0
  104. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/database/types.py +0 -0
  105. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/enums.py +0 -0
  106. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/extract.py +0 -0
  107. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/file/README.md +0 -0
  108. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/file/__init__.py +0 -0
  109. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/file/enums.py +0 -0
  110. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/load.py +0 -0
  111. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/mixins.py +0 -0
  112. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/py.typed +0 -0
  113. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/run.py +0 -0
  114. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/run_helpers.py +0 -0
  115. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/templates/README.md +0 -0
  116. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/templates/__init__.py +0 -0
  117. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/templates/ddl.sql.j2 +0 -0
  118. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/templates/view.sql.j2 +0 -0
  119. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/transform.py +0 -0
  120. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/types.py +0 -0
  121. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/utils.py +0 -0
  122. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/validate.py +0 -0
  123. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/validation/README.md +0 -0
  124. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/validation/__init__.py +0 -0
  125. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus/validation/utils.py +0 -0
  126. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus.egg-info/dependency_links.txt +0 -0
  127. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus.egg-info/entry_points.txt +0 -0
  128. {etlplus-0.11.12 → etlplus-0.12.3}/etlplus.egg-info/top_level.txt +0 -0
  129. {etlplus-0.11.12 → etlplus-0.12.3}/examples/README.md +0 -0
  130. {etlplus-0.11.12 → etlplus-0.12.3}/examples/configs/ddl_spec.yml +0 -0
  131. {etlplus-0.11.12 → etlplus-0.12.3}/examples/configs/pipeline.yml +0 -0
  132. {etlplus-0.11.12 → etlplus-0.12.3}/examples/data/sample.csv +0 -0
  133. {etlplus-0.11.12 → etlplus-0.12.3}/examples/data/sample.json +0 -0
  134. {etlplus-0.11.12 → etlplus-0.12.3}/examples/data/sample.xml +0 -0
  135. {etlplus-0.11.12 → etlplus-0.12.3}/examples/data/sample.xsd +0 -0
  136. {etlplus-0.11.12 → etlplus-0.12.3}/examples/data/sample.yaml +0 -0
  137. {etlplus-0.11.12 → etlplus-0.12.3}/examples/quickstart_python.py +0 -0
  138. {etlplus-0.11.12 → etlplus-0.12.3}/pytest.ini +0 -0
  139. {etlplus-0.11.12 → etlplus-0.12.3}/setup.cfg +0 -0
  140. {etlplus-0.11.12 → etlplus-0.12.3}/tests/__init__.py +0 -0
  141. {etlplus-0.11.12 → etlplus-0.12.3}/tests/conftest.py +0 -0
  142. {etlplus-0.11.12 → etlplus-0.12.3}/tests/integration/conftest.py +0 -0
  143. {etlplus-0.11.12 → etlplus-0.12.3}/tests/integration/test_i_cli.py +0 -0
  144. {etlplus-0.11.12 → etlplus-0.12.3}/tests/integration/test_i_examples_data_parity.py +0 -0
  145. {etlplus-0.11.12 → etlplus-0.12.3}/tests/integration/test_i_pagination_strategy.py +0 -0
  146. {etlplus-0.11.12 → etlplus-0.12.3}/tests/integration/test_i_pipeline_smoke.py +0 -0
  147. {etlplus-0.11.12 → etlplus-0.12.3}/tests/integration/test_i_pipeline_yaml_load.py +0 -0
  148. {etlplus-0.11.12 → etlplus-0.12.3}/tests/integration/test_i_run.py +0 -0
  149. {etlplus-0.11.12 → etlplus-0.12.3}/tests/integration/test_i_run_profile_pagination_defaults.py +0 -0
  150. {etlplus-0.11.12 → etlplus-0.12.3}/tests/integration/test_i_run_profile_rate_limit_defaults.py +0 -0
  151. {etlplus-0.11.12 → etlplus-0.12.3}/tests/unit/api/conftest.py +0 -0
  152. {etlplus-0.11.12 → etlplus-0.12.3}/tests/unit/api/test_u_auth.py +0 -0
  153. {etlplus-0.11.12 → etlplus-0.12.3}/tests/unit/api/test_u_config.py +0 -0
  154. {etlplus-0.11.12 → etlplus-0.12.3}/tests/unit/api/test_u_endpoint_client.py +0 -0
  155. {etlplus-0.11.12 → etlplus-0.12.3}/tests/unit/api/test_u_mocks.py +0 -0
  156. {etlplus-0.11.12 → etlplus-0.12.3}/tests/unit/api/test_u_pagination_client.py +0 -0
  157. {etlplus-0.11.12 → etlplus-0.12.3}/tests/unit/api/test_u_pagination_config.py +0 -0
  158. {etlplus-0.11.12 → etlplus-0.12.3}/tests/unit/api/test_u_paginator.py +0 -0
  159. {etlplus-0.11.12 → etlplus-0.12.3}/tests/unit/api/test_u_rate_limit_config.py +0 -0
  160. {etlplus-0.11.12 → etlplus-0.12.3}/tests/unit/api/test_u_rate_limiter.py +0 -0
  161. {etlplus-0.11.12 → etlplus-0.12.3}/tests/unit/api/test_u_request_manager.py +0 -0
  162. {etlplus-0.11.12 → etlplus-0.12.3}/tests/unit/api/test_u_retry_manager.py +0 -0
  163. {etlplus-0.11.12 → etlplus-0.12.3}/tests/unit/api/test_u_transport.py +0 -0
  164. {etlplus-0.11.12 → etlplus-0.12.3}/tests/unit/api/test_u_types.py +0 -0
  165. {etlplus-0.11.12 → etlplus-0.12.3}/tests/unit/cli/conftest.py +0 -0
  166. {etlplus-0.11.12 → etlplus-0.12.3}/tests/unit/cli/test_u_cli_handlers.py +0 -0
  167. {etlplus-0.11.12 → etlplus-0.12.3}/tests/unit/cli/test_u_cli_io.py +0 -0
  168. {etlplus-0.11.12 → etlplus-0.12.3}/tests/unit/cli/test_u_cli_main.py +0 -0
  169. {etlplus-0.11.12 → etlplus-0.12.3}/tests/unit/cli/test_u_cli_state.py +0 -0
  170. {etlplus-0.11.12 → etlplus-0.12.3}/tests/unit/config/test_u_config_utils.py +0 -0
  171. {etlplus-0.11.12 → etlplus-0.12.3}/tests/unit/config/test_u_connector.py +0 -0
  172. {etlplus-0.11.12 → etlplus-0.12.3}/tests/unit/config/test_u_jobs.py +0 -0
  173. {etlplus-0.11.12 → etlplus-0.12.3}/tests/unit/config/test_u_pipeline.py +0 -0
  174. {etlplus-0.11.12 → etlplus-0.12.3}/tests/unit/conftest.py +0 -0
  175. {etlplus-0.11.12 → etlplus-0.12.3}/tests/unit/database/test_u_database_ddl.py +0 -0
  176. {etlplus-0.11.12 → etlplus-0.12.3}/tests/unit/database/test_u_database_engine.py +0 -0
  177. {etlplus-0.11.12 → etlplus-0.12.3}/tests/unit/database/test_u_database_orm.py +0 -0
  178. {etlplus-0.11.12 → etlplus-0.12.3}/tests/unit/database/test_u_database_schema.py +0 -0
  179. {etlplus-0.11.12 → etlplus-0.12.3}/tests/unit/file/test_u_file_enums.py +0 -0
  180. {etlplus-0.11.12 → etlplus-0.12.3}/tests/unit/file/test_u_file_yaml.py +0 -0
  181. {etlplus-0.11.12 → etlplus-0.12.3}/tests/unit/test_u_enums.py +0 -0
  182. {etlplus-0.11.12 → etlplus-0.12.3}/tests/unit/test_u_extract.py +0 -0
  183. {etlplus-0.11.12 → etlplus-0.12.3}/tests/unit/test_u_load.py +0 -0
  184. {etlplus-0.11.12 → etlplus-0.12.3}/tests/unit/test_u_main.py +0 -0
  185. {etlplus-0.11.12 → etlplus-0.12.3}/tests/unit/test_u_mixins.py +0 -0
  186. {etlplus-0.11.12 → etlplus-0.12.3}/tests/unit/test_u_run.py +0 -0
  187. {etlplus-0.11.12 → etlplus-0.12.3}/tests/unit/test_u_run_helpers.py +0 -0
  188. {etlplus-0.11.12 → etlplus-0.12.3}/tests/unit/test_u_transform.py +0 -0
  189. {etlplus-0.11.12 → etlplus-0.12.3}/tests/unit/test_u_utils.py +0 -0
  190. {etlplus-0.11.12 → etlplus-0.12.3}/tests/unit/test_u_validate.py +0 -0
  191. {etlplus-0.11.12 → etlplus-0.12.3}/tests/unit/test_u_version.py +0 -0
  192. {etlplus-0.11.12 → etlplus-0.12.3}/tests/unit/validation/test_u_validation_utils.py +0 -0
  193. {etlplus-0.11.12 → etlplus-0.12.3}/tools/update_demo_snippets.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: etlplus
3
- Version: 0.11.12
3
+ Version: 0.12.3
4
4
  Summary: A Swiss Army knife for simple ETL operations
5
5
  Home-page: https://github.com/Dagitali/ETLPlus
6
6
  Author: ETLPlus Team
@@ -17,8 +17,11 @@ Classifier: Programming Language :: Python :: 3.14
17
17
  Requires-Python: >=3.13,<3.15
18
18
  Description-Content-Type: text/markdown
19
19
  License-File: LICENSE
20
+ Requires-Dist: fastavro>=1.12.1
20
21
  Requires-Dist: jinja2>=3.1.6
22
+ Requires-Dist: openpyxl>=3.1.5
21
23
  Requires-Dist: pyodbc>=5.3.0
24
+ Requires-Dist: pyarrow>=22.0.0
22
25
  Requires-Dist: python-dotenv>=1.2.1
23
26
  Requires-Dist: pandas>=2.3.3
24
27
  Requires-Dist: pydantic>=2.12.5
@@ -26,6 +29,8 @@ Requires-Dist: PyYAML>=6.0.3
26
29
  Requires-Dist: requests>=2.32.5
27
30
  Requires-Dist: SQLAlchemy>=2.0.45
28
31
  Requires-Dist: typer>=0.21.0
32
+ Requires-Dist: xlrd>=2.0.2
33
+ Requires-Dist: xlwt>=1.3.0
29
34
  Provides-Extra: dev
30
35
  Requires-Dist: black>=25.9.0; extra == "dev"
31
36
  Requires-Dist: build>=1.2.2; extra == "dev"
@@ -0,0 +1,120 @@
1
+ """
2
+ :mod:`etlplus.file._io` module.
3
+
4
+ Shared helpers for record normalization and delimited text formats.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import csv
10
+ from pathlib import Path
11
+ from typing import cast
12
+
13
+ from ..types import JSONData
14
+ from ..types import JSONDict
15
+ from ..types import JSONList
16
+
17
+ # SECTION: FUNCTIONS ======================================================== #
18
+
19
+
20
+ def normalize_records(
21
+ data: JSONData,
22
+ format_name: str,
23
+ ) -> JSONList:
24
+ """
25
+ Normalize payloads into a list of dictionaries.
26
+
27
+ Parameters
28
+ ----------
29
+ data : JSONData
30
+ Input payload to normalize.
31
+ format_name : str
32
+ Human-readable format name for error messages.
33
+
34
+ Returns
35
+ -------
36
+ JSONList
37
+ Normalized list of dictionaries.
38
+
39
+ Raises
40
+ ------
41
+ TypeError
42
+ If a list payload contains non-dict items.
43
+ """
44
+ if isinstance(data, list):
45
+ if not all(isinstance(item, dict) for item in data):
46
+ raise TypeError(
47
+ f'{format_name} payloads must contain only objects (dicts)',
48
+ )
49
+ return cast(JSONList, data)
50
+ return [cast(JSONDict, data)]
51
+
52
+
53
+ def read_delimited(path: Path, *, delimiter: str) -> JSONList:
54
+ """
55
+ Read delimited content from ``path``.
56
+
57
+ Parameters
58
+ ----------
59
+ path : Path
60
+ Path to the delimited file on disk.
61
+ delimiter : str
62
+ Delimiter character for parsing.
63
+
64
+ Returns
65
+ -------
66
+ JSONList
67
+ The list of dictionaries read from the delimited file.
68
+ """
69
+ with path.open('r', encoding='utf-8', newline='') as handle:
70
+ reader: csv.DictReader[str] = csv.DictReader(
71
+ handle,
72
+ delimiter=delimiter,
73
+ )
74
+ rows: JSONList = []
75
+ for row in reader:
76
+ if not any(row.values()):
77
+ continue
78
+ rows.append(cast(JSONDict, dict(row)))
79
+ return rows
80
+
81
+
82
+ def write_delimited(path: Path, data: JSONData, *, delimiter: str) -> int:
83
+ """
84
+ Write ``data`` to a delimited file and return record count.
85
+
86
+ Parameters
87
+ ----------
88
+ path : Path
89
+ Path to the delimited file on disk.
90
+ data : JSONData
91
+ Data to write as delimited rows.
92
+ delimiter : str
93
+ Delimiter character for writing.
94
+
95
+ Returns
96
+ -------
97
+ int
98
+ The number of rows written.
99
+ """
100
+ rows: list[JSONDict]
101
+ if isinstance(data, list):
102
+ rows = [row for row in data if isinstance(row, dict)]
103
+ else:
104
+ rows = [data]
105
+
106
+ if not rows:
107
+ return 0
108
+
109
+ fieldnames = sorted({key for row in rows for key in row})
110
+ with path.open('w', encoding='utf-8', newline='') as handle:
111
+ writer = csv.DictWriter(
112
+ handle,
113
+ fieldnames=fieldnames,
114
+ delimiter=delimiter,
115
+ )
116
+ writer.writeheader()
117
+ for row in rows:
118
+ writer.writerow({field: row.get(field) for field in fieldnames})
119
+
120
+ return len(rows)
@@ -0,0 +1,58 @@
1
+ """
2
+ :mod:`etlplus.file._pandas` module.
3
+
4
+ Shared helpers for optional pandas usage.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from typing import Any
10
+
11
+ # SECTION: EXPORTS ========================================================== #
12
+
13
+
14
+ __all__ = [
15
+ 'get_pandas',
16
+ ]
17
+
18
+ # SECTION: INTERNAL CONSTANTS =============================================== #
19
+
20
+
21
+ _PANDAS_CACHE: dict[str, Any] = {}
22
+
23
+
24
+ # SECTION: FUNCTIONS ======================================================== #
25
+
26
+
27
+ def get_pandas(format_name: str) -> Any:
28
+ """
29
+ Return the pandas module, importing it on first use.
30
+
31
+ Parameters
32
+ ----------
33
+ format_name : str
34
+ Human-readable format name for error messages.
35
+
36
+ Returns
37
+ -------
38
+ Any
39
+ The pandas module.
40
+
41
+ Raises
42
+ ------
43
+ ImportError
44
+ If the optional dependency is missing.
45
+ """
46
+ mod = _PANDAS_CACHE.get('mod')
47
+ if mod is not None: # pragma: no cover - tiny branch
48
+ return mod
49
+ try:
50
+ _pd = __import__('pandas') # type: ignore[assignment]
51
+ except ImportError as e: # pragma: no cover
52
+ raise ImportError(
53
+ f'{format_name} support requires optional dependency "pandas".\n'
54
+ 'Install with: pip install pandas',
55
+ ) from e
56
+ _PANDAS_CACHE['mod'] = _pd
57
+
58
+ return _pd
@@ -0,0 +1,186 @@
1
+ """
2
+ :mod:`etlplus.file.avro` module.
3
+
4
+ Helpers for reading/writing Avro files.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from pathlib import Path
10
+ from typing import Any
11
+ from typing import cast
12
+
13
+ from ..types import JSONData
14
+ from ..types import JSONDict
15
+ from ..types import JSONList
16
+ from ._io import normalize_records
17
+
18
+ # SECTION: EXPORTS ========================================================== #
19
+
20
+
21
+ __all__ = [
22
+ 'read',
23
+ 'write',
24
+ ]
25
+
26
+
27
+ # SECTION: INTERNAL CONSTANTS =============================================== #
28
+
29
+
30
+ _FASTAVRO_CACHE: dict[str, Any] = {}
31
+
32
+
33
+ _PRIMITIVE_TYPES: tuple[type, ...] = (
34
+ bool,
35
+ int,
36
+ float,
37
+ str,
38
+ bytes,
39
+ bytearray,
40
+ )
41
+
42
+
43
+ # SECTION: INTERNAL FUNCTIONS =============================================== #
44
+
45
+
46
+ def _get_fastavro() -> Any:
47
+ """
48
+ Return the fastavro module, importing it on first use.
49
+
50
+ Raises an informative ImportError if the optional dependency is missing.
51
+ """
52
+ mod = _FASTAVRO_CACHE.get('mod')
53
+ if mod is not None: # pragma: no cover - tiny branch
54
+ return mod
55
+ try:
56
+ _fastavro = __import__('fastavro') # type: ignore[assignment]
57
+ except ImportError as e: # pragma: no cover
58
+ raise ImportError(
59
+ 'AVRO support requires optional dependency "fastavro".\n'
60
+ 'Install with: pip install fastavro',
61
+ ) from e
62
+ _FASTAVRO_CACHE['mod'] = _fastavro
63
+
64
+ return _fastavro
65
+
66
+
67
+ def _infer_schema(records: JSONList) -> dict[str, Any]:
68
+ """
69
+ Infer a basic Avro schema from record payloads.
70
+
71
+ Only primitive field values are supported; complex values raise TypeError.
72
+ """
73
+ field_names = sorted({key for record in records for key in record})
74
+ fields: list[dict[str, Any]] = []
75
+ for name in field_names:
76
+ types: list[str] = []
77
+ for record in records:
78
+ value = record.get(name)
79
+ if value is None:
80
+ types.append('null')
81
+ continue
82
+ if isinstance(value, dict | list):
83
+ raise TypeError(
84
+ 'AVRO payloads must contain only primitive values',
85
+ )
86
+ if not isinstance(value, _PRIMITIVE_TYPES):
87
+ raise TypeError(
88
+ 'AVRO payloads must contain only primitive values',
89
+ )
90
+ types.append(cast(str, _infer_value_type(value)))
91
+ fields.append({'name': name, 'type': _merge_types(types)})
92
+
93
+ return {
94
+ 'name': 'etlplus_record',
95
+ 'type': 'record',
96
+ 'fields': fields,
97
+ }
98
+
99
+
100
+ def _infer_value_type(value: object) -> str | list[str]:
101
+ """
102
+ Infer the Avro type for a primitive value.
103
+
104
+ Raises TypeError for unsupported types.
105
+ """
106
+ if value is None:
107
+ return 'null'
108
+ if isinstance(value, bool):
109
+ return 'boolean'
110
+ if isinstance(value, int):
111
+ return 'long'
112
+ if isinstance(value, float):
113
+ return 'double'
114
+ if isinstance(value, str):
115
+ return 'string'
116
+ if isinstance(value, (bytes, bytearray)):
117
+ return 'bytes'
118
+ raise TypeError('AVRO payloads must contain only primitive values')
119
+
120
+
121
+ def _merge_types(types: list[str]) -> str | list[str]:
122
+ """Return a stable Avro type union for a list of types."""
123
+ unique = list(dict.fromkeys(types))
124
+ if len(unique) == 1:
125
+ return unique[0]
126
+ ordered = ['null'] + sorted(t for t in unique if t != 'null')
127
+ return ordered
128
+
129
+
130
+ # SECTION: FUNCTIONS ======================================================== #
131
+
132
+
133
+ def read(
134
+ path: Path,
135
+ ) -> JSONList:
136
+ """
137
+ Read AVRO content from ``path``.
138
+
139
+ Parameters
140
+ ----------
141
+ path : Path
142
+ Path to the AVRO file on disk.
143
+
144
+ Returns
145
+ -------
146
+ JSONList
147
+ The list of dictionaries read from the AVRO file.
148
+ """
149
+ fastavro = _get_fastavro()
150
+ with path.open('rb') as handle:
151
+ reader = fastavro.reader(handle)
152
+ return [cast(JSONDict, record) for record in reader]
153
+
154
+
155
+ def write(
156
+ path: Path,
157
+ data: JSONData,
158
+ ) -> int:
159
+ """
160
+ Write ``data`` to AVRO at ``path`` and return record count.
161
+
162
+ Parameters
163
+ ----------
164
+ path : Path
165
+ Path to the AVRO file on disk.
166
+ data : JSONData
167
+ Data to write.
168
+
169
+ Returns
170
+ -------
171
+ int
172
+ Number of records written.
173
+ """
174
+ records = normalize_records(data, 'AVRO')
175
+ if not records:
176
+ return 0
177
+
178
+ fastavro = _get_fastavro()
179
+ schema = _infer_schema(records)
180
+ parsed_schema = fastavro.parse_schema(schema)
181
+
182
+ path.parent.mkdir(parents=True, exist_ok=True)
183
+ with path.open('wb') as handle:
184
+ fastavro.writer(handle, parsed_schema, records)
185
+
186
+ return len(records)
@@ -25,7 +25,7 @@ from . import xls
25
25
  from . import xlsx
26
26
  from . import xml
27
27
  from . import yaml
28
- from . import zip
28
+ from . import zip as zip_
29
29
  from .enums import FileFormat
30
30
  from .enums import infer_file_format_and_compression
31
31
 
@@ -222,7 +222,7 @@ class File:
222
222
  case FileFormat.YAML:
223
223
  return yaml.read(self.path)
224
224
  case FileFormat.ZIP:
225
- return zip.read(self.path)
225
+ return zip_.read(self.path)
226
226
  raise ValueError(f'Unsupported format: {fmt}')
227
227
 
228
228
  def write(
@@ -283,5 +283,5 @@ class File:
283
283
  case FileFormat.YAML:
284
284
  return yaml.write(self.path, data)
285
285
  case FileFormat.ZIP:
286
- return zip.write(self.path, data)
286
+ return zip_.write(self.path, data)
287
287
  raise ValueError(f'Unsupported format: {fmt}')
@@ -0,0 +1,67 @@
1
+ """
2
+ :mod:`etlplus.file.csv` module.
3
+
4
+ Helpers for reading/writing CSV files.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from pathlib import Path
10
+
11
+ from ..types import JSONData
12
+ from ..types import JSONList
13
+ from ._io import read_delimited
14
+ from ._io import write_delimited
15
+
16
+ # SECTION: EXPORTS ========================================================== #
17
+
18
+
19
+ __all__ = [
20
+ 'read',
21
+ 'write',
22
+ ]
23
+
24
+
25
+ # SECTION: FUNCTIONS ======================================================== #
26
+
27
+
28
+ def read(
29
+ path: Path,
30
+ ) -> JSONList:
31
+ """
32
+ Read CSV content from ``path``.
33
+
34
+ Parameters
35
+ ----------
36
+ path : Path
37
+ Path to the CSV file on disk.
38
+
39
+ Returns
40
+ -------
41
+ JSONList
42
+ The list of dictionaries read from the CSV file.
43
+ """
44
+ return read_delimited(path, delimiter=',')
45
+
46
+
47
+ def write(
48
+ path: Path,
49
+ data: JSONData,
50
+ ) -> int:
51
+ """
52
+ Write ``data`` to CSV at ``path`` and return record count.
53
+
54
+ Parameters
55
+ ----------
56
+ path : Path
57
+ Path to the CSV file on disk.
58
+ data : JSONData
59
+ Data to write as CSV. Should be a list of dictionaries or a
60
+ single dictionary.
61
+
62
+ Returns
63
+ -------
64
+ int
65
+ The number of rows written to the CSV file.
66
+ """
67
+ return write_delimited(path, data, delimiter=',')
@@ -0,0 +1,99 @@
1
+ """
2
+ :mod:`etlplus.file.feather` module.
3
+
4
+ Helpers for reading/writing Feather files.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from pathlib import Path
10
+ from typing import cast
11
+
12
+ from ..types import JSONData
13
+ from ..types import JSONList
14
+ from ._io import normalize_records
15
+ from ._pandas import get_pandas
16
+
17
+ # SECTION: EXPORTS ========================================================== #
18
+
19
+
20
+ __all__ = [
21
+ 'read',
22
+ 'write',
23
+ ]
24
+
25
+
26
+ # SECTION: FUNCTIONS ======================================================== #
27
+
28
+
29
+ def read(
30
+ path: Path,
31
+ ) -> JSONList:
32
+ """
33
+ Read Feather content from ``path``.
34
+
35
+ Parameters
36
+ ----------
37
+ path : Path
38
+ Path to the Feather file on disk.
39
+
40
+ Returns
41
+ -------
42
+ JSONList
43
+ The list of dictionaries read from the Feather file.
44
+
45
+ Raises
46
+ ------
47
+ ImportError
48
+ When optional dependency "pyarrow" is missing.
49
+ """
50
+ pandas = get_pandas('Feather')
51
+ try:
52
+ frame = pandas.read_feather(path)
53
+ except ImportError as e: # pragma: no cover
54
+ raise ImportError(
55
+ 'Feather support requires optional dependency "pyarrow".\n'
56
+ 'Install with: pip install pyarrow',
57
+ ) from e
58
+ return cast(JSONList, frame.to_dict(orient='records'))
59
+
60
+
61
+ def write(
62
+ path: Path,
63
+ data: JSONData,
64
+ ) -> int:
65
+ """
66
+ Write ``data`` to Feather at ``path`` and return record count.
67
+
68
+ Parameters
69
+ ----------
70
+ path : Path
71
+ Path to the Feather file on disk.
72
+ data : JSONData
73
+ Data to write.
74
+
75
+ Returns
76
+ -------
77
+ int
78
+ Number of records written.
79
+
80
+ Raises
81
+ ------
82
+ ImportError
83
+ When optional dependency "pyarrow" is missing.
84
+ """
85
+ records = normalize_records(data, 'Feather')
86
+ if not records:
87
+ return 0
88
+
89
+ pandas = get_pandas('Feather')
90
+ path.parent.mkdir(parents=True, exist_ok=True)
91
+ frame = pandas.DataFrame.from_records(records)
92
+ try:
93
+ frame.to_feather(path)
94
+ except ImportError as e: # pragma: no cover
95
+ raise ImportError(
96
+ 'Feather support requires optional dependency "pyarrow".\n'
97
+ 'Install with: pip install pyarrow',
98
+ ) from e
99
+ return len(records)
@@ -0,0 +1,123 @@
1
+ """
2
+ :mod:`etlplus.file.gz` module.
3
+
4
+ Helpers for reading/writing GZ files.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import gzip
10
+ import tempfile
11
+ from pathlib import Path
12
+
13
+ from ..types import JSONData
14
+ from .enums import CompressionFormat
15
+ from .enums import FileFormat
16
+ from .enums import infer_file_format_and_compression
17
+
18
+ # SECTION: EXPORTS ========================================================== #
19
+
20
+
21
+ __all__ = [
22
+ 'read',
23
+ 'write',
24
+ ]
25
+
26
+
27
+ # SECTION: INTERNAL FUNCTIONS =============================================== #
28
+
29
+
30
+ def _resolve_format(
31
+ path: Path,
32
+ ) -> FileFormat:
33
+ """
34
+ Resolve the inner file format from a .gz filename.
35
+
36
+ Parameters
37
+ ----------
38
+ path : Path
39
+ Path to the GZ file on disk.
40
+
41
+ Returns
42
+ -------
43
+ FileFormat
44
+ The inferred inner file format.
45
+
46
+ Raises
47
+ ------
48
+ ValueError
49
+ If the file format cannot be inferred from the filename.
50
+ """
51
+ fmt, compression = infer_file_format_and_compression(path)
52
+ if compression is not CompressionFormat.GZ:
53
+ raise ValueError(f'Not a gzip file: {path}')
54
+ if fmt is None:
55
+ raise ValueError(
56
+ f'Cannot infer file format from compressed file {path!r}',
57
+ )
58
+ return fmt
59
+
60
+
61
+ # SECTION: FUNCTIONS ======================================================== #
62
+
63
+
64
+ def read(
65
+ path: Path,
66
+ ) -> JSONData:
67
+ """
68
+ Read GZ content from ``path`` and parse the inner payload.
69
+
70
+ Parameters
71
+ ----------
72
+ path : Path
73
+ Path to the GZ file on disk.
74
+
75
+ Returns
76
+ -------
77
+ JSONData
78
+ Parsed payload.
79
+ """
80
+ fmt = _resolve_format(path)
81
+ with gzip.open(path, 'rb') as handle:
82
+ payload = handle.read()
83
+
84
+ with tempfile.TemporaryDirectory() as tmpdir:
85
+ tmp_path = Path(tmpdir) / f'payload.{fmt.value}'
86
+ tmp_path.write_bytes(payload)
87
+ from .core import File
88
+
89
+ return File(tmp_path, fmt).read()
90
+
91
+
92
+ def write(
93
+ path: Path,
94
+ data: JSONData,
95
+ ) -> int:
96
+ """
97
+ Write ``data`` to GZ at ``path`` and return record count.
98
+
99
+ Parameters
100
+ ----------
101
+ path : Path
102
+ Path to the GZ file on disk.
103
+ data : JSONData
104
+ Data to write.
105
+
106
+ Returns
107
+ -------
108
+ int
109
+ Number of records written.
110
+ """
111
+ fmt = _resolve_format(path)
112
+ with tempfile.TemporaryDirectory() as tmpdir:
113
+ tmp_path = Path(tmpdir) / f'payload.{fmt.value}'
114
+ from .core import File
115
+
116
+ count = File(tmp_path, fmt).write(data)
117
+ payload = tmp_path.read_bytes()
118
+
119
+ path.parent.mkdir(parents=True, exist_ok=True)
120
+ with gzip.open(path, 'wb') as handle:
121
+ handle.write(payload)
122
+
123
+ return count