etlplus 0.11.12__tar.gz → 0.12.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (190) hide show
  1. {etlplus-0.11.12/etlplus.egg-info → etlplus-0.12.2}/PKG-INFO +6 -1
  2. etlplus-0.12.2/etlplus/file/avro.py +198 -0
  3. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/file/core.py +3 -3
  4. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/file/csv.py +12 -3
  5. etlplus-0.12.2/etlplus/file/feather.py +144 -0
  6. etlplus-0.12.2/etlplus/file/gz.py +123 -0
  7. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/file/json.py +13 -2
  8. etlplus-0.12.2/etlplus/file/ndjson.py +109 -0
  9. etlplus-0.12.2/etlplus/file/orc.py +142 -0
  10. etlplus-0.12.2/etlplus/file/parquet.py +146 -0
  11. etlplus-0.12.2/etlplus/file/tsv.py +91 -0
  12. etlplus-0.12.2/etlplus/file/txt.py +99 -0
  13. etlplus-0.12.2/etlplus/file/xls.py +132 -0
  14. etlplus-0.12.2/etlplus/file/xlsx.py +142 -0
  15. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/file/xml.py +12 -3
  16. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/file/yaml.py +13 -2
  17. etlplus-0.12.2/etlplus/file/zip.py +175 -0
  18. {etlplus-0.11.12 → etlplus-0.12.2/etlplus.egg-info}/PKG-INFO +6 -1
  19. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus.egg-info/requires.txt +5 -0
  20. {etlplus-0.11.12 → etlplus-0.12.2}/pyproject.toml +5 -0
  21. {etlplus-0.11.12 → etlplus-0.12.2}/setup.py +5 -0
  22. {etlplus-0.11.12 → etlplus-0.12.2}/tests/unit/file/test_u_file_core.py +254 -57
  23. etlplus-0.11.12/etlplus/file/avro.py +0 -59
  24. etlplus-0.11.12/etlplus/file/feather.py +0 -59
  25. etlplus-0.11.12/etlplus/file/gz.py +0 -59
  26. etlplus-0.11.12/etlplus/file/ndjson.py +0 -59
  27. etlplus-0.11.12/etlplus/file/orc.py +0 -59
  28. etlplus-0.11.12/etlplus/file/parquet.py +0 -59
  29. etlplus-0.11.12/etlplus/file/tsv.py +0 -59
  30. etlplus-0.11.12/etlplus/file/txt.py +0 -59
  31. etlplus-0.11.12/etlplus/file/xls.py +0 -59
  32. etlplus-0.11.12/etlplus/file/xlsx.py +0 -59
  33. etlplus-0.11.12/etlplus/file/zip.py +0 -49
  34. {etlplus-0.11.12 → etlplus-0.12.2}/.coveragerc +0 -0
  35. {etlplus-0.11.12 → etlplus-0.12.2}/.editorconfig +0 -0
  36. {etlplus-0.11.12 → etlplus-0.12.2}/.gitattributes +0 -0
  37. {etlplus-0.11.12 → etlplus-0.12.2}/.github/actions/python-bootstrap/action.yml +0 -0
  38. {etlplus-0.11.12 → etlplus-0.12.2}/.github/workflows/ci.yml +0 -0
  39. {etlplus-0.11.12 → etlplus-0.12.2}/.gitignore +0 -0
  40. {etlplus-0.11.12 → etlplus-0.12.2}/.pre-commit-config.yaml +0 -0
  41. {etlplus-0.11.12 → etlplus-0.12.2}/.ruff.toml +0 -0
  42. {etlplus-0.11.12 → etlplus-0.12.2}/CODE_OF_CONDUCT.md +0 -0
  43. {etlplus-0.11.12 → etlplus-0.12.2}/CONTRIBUTING.md +0 -0
  44. {etlplus-0.11.12 → etlplus-0.12.2}/DEMO.md +0 -0
  45. {etlplus-0.11.12 → etlplus-0.12.2}/LICENSE +0 -0
  46. {etlplus-0.11.12 → etlplus-0.12.2}/MANIFEST.in +0 -0
  47. {etlplus-0.11.12 → etlplus-0.12.2}/Makefile +0 -0
  48. {etlplus-0.11.12 → etlplus-0.12.2}/README.md +0 -0
  49. {etlplus-0.11.12 → etlplus-0.12.2}/REFERENCES.md +0 -0
  50. {etlplus-0.11.12 → etlplus-0.12.2}/SECURITY.md +0 -0
  51. {etlplus-0.11.12 → etlplus-0.12.2}/SUPPORT.md +0 -0
  52. {etlplus-0.11.12 → etlplus-0.12.2}/docs/README.md +0 -0
  53. {etlplus-0.11.12 → etlplus-0.12.2}/docs/pipeline-guide.md +0 -0
  54. {etlplus-0.11.12 → etlplus-0.12.2}/docs/snippets/installation_version.md +0 -0
  55. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/README.md +0 -0
  56. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/__init__.py +0 -0
  57. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/__main__.py +0 -0
  58. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/__version__.py +0 -0
  59. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/api/README.md +0 -0
  60. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/api/__init__.py +0 -0
  61. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/api/auth.py +0 -0
  62. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/api/config.py +0 -0
  63. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/api/endpoint_client.py +0 -0
  64. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/api/errors.py +0 -0
  65. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/api/pagination/__init__.py +0 -0
  66. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/api/pagination/client.py +0 -0
  67. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/api/pagination/config.py +0 -0
  68. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/api/pagination/paginator.py +0 -0
  69. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/api/rate_limiting/__init__.py +0 -0
  70. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/api/rate_limiting/config.py +0 -0
  71. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/api/rate_limiting/rate_limiter.py +0 -0
  72. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/api/request_manager.py +0 -0
  73. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/api/retry_manager.py +0 -0
  74. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/api/transport.py +0 -0
  75. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/api/types.py +0 -0
  76. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/cli/README.md +0 -0
  77. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/cli/__init__.py +0 -0
  78. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/cli/commands.py +0 -0
  79. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/cli/constants.py +0 -0
  80. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/cli/handlers.py +0 -0
  81. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/cli/io.py +0 -0
  82. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/cli/main.py +0 -0
  83. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/cli/options.py +0 -0
  84. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/cli/state.py +0 -0
  85. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/cli/types.py +0 -0
  86. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/config/README.md +0 -0
  87. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/config/__init__.py +0 -0
  88. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/config/connector.py +0 -0
  89. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/config/jobs.py +0 -0
  90. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/config/pipeline.py +0 -0
  91. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/config/profile.py +0 -0
  92. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/config/types.py +0 -0
  93. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/config/utils.py +0 -0
  94. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/database/README.md +0 -0
  95. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/database/__init__.py +0 -0
  96. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/database/ddl.py +0 -0
  97. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/database/engine.py +0 -0
  98. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/database/orm.py +0 -0
  99. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/database/schema.py +0 -0
  100. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/database/types.py +0 -0
  101. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/enums.py +0 -0
  102. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/extract.py +0 -0
  103. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/file/README.md +0 -0
  104. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/file/__init__.py +0 -0
  105. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/file/enums.py +0 -0
  106. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/load.py +0 -0
  107. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/mixins.py +0 -0
  108. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/py.typed +0 -0
  109. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/run.py +0 -0
  110. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/run_helpers.py +0 -0
  111. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/templates/README.md +0 -0
  112. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/templates/__init__.py +0 -0
  113. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/templates/ddl.sql.j2 +0 -0
  114. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/templates/view.sql.j2 +0 -0
  115. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/transform.py +0 -0
  116. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/types.py +0 -0
  117. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/utils.py +0 -0
  118. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/validate.py +0 -0
  119. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/validation/README.md +0 -0
  120. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/validation/__init__.py +0 -0
  121. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus/validation/utils.py +0 -0
  122. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus.egg-info/SOURCES.txt +0 -0
  123. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus.egg-info/dependency_links.txt +0 -0
  124. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus.egg-info/entry_points.txt +0 -0
  125. {etlplus-0.11.12 → etlplus-0.12.2}/etlplus.egg-info/top_level.txt +0 -0
  126. {etlplus-0.11.12 → etlplus-0.12.2}/examples/README.md +0 -0
  127. {etlplus-0.11.12 → etlplus-0.12.2}/examples/configs/ddl_spec.yml +0 -0
  128. {etlplus-0.11.12 → etlplus-0.12.2}/examples/configs/pipeline.yml +0 -0
  129. {etlplus-0.11.12 → etlplus-0.12.2}/examples/data/sample.csv +0 -0
  130. {etlplus-0.11.12 → etlplus-0.12.2}/examples/data/sample.json +0 -0
  131. {etlplus-0.11.12 → etlplus-0.12.2}/examples/data/sample.xml +0 -0
  132. {etlplus-0.11.12 → etlplus-0.12.2}/examples/data/sample.xsd +0 -0
  133. {etlplus-0.11.12 → etlplus-0.12.2}/examples/data/sample.yaml +0 -0
  134. {etlplus-0.11.12 → etlplus-0.12.2}/examples/quickstart_python.py +0 -0
  135. {etlplus-0.11.12 → etlplus-0.12.2}/pytest.ini +0 -0
  136. {etlplus-0.11.12 → etlplus-0.12.2}/setup.cfg +0 -0
  137. {etlplus-0.11.12 → etlplus-0.12.2}/tests/__init__.py +0 -0
  138. {etlplus-0.11.12 → etlplus-0.12.2}/tests/conftest.py +0 -0
  139. {etlplus-0.11.12 → etlplus-0.12.2}/tests/integration/conftest.py +0 -0
  140. {etlplus-0.11.12 → etlplus-0.12.2}/tests/integration/test_i_cli.py +0 -0
  141. {etlplus-0.11.12 → etlplus-0.12.2}/tests/integration/test_i_examples_data_parity.py +0 -0
  142. {etlplus-0.11.12 → etlplus-0.12.2}/tests/integration/test_i_pagination_strategy.py +0 -0
  143. {etlplus-0.11.12 → etlplus-0.12.2}/tests/integration/test_i_pipeline_smoke.py +0 -0
  144. {etlplus-0.11.12 → etlplus-0.12.2}/tests/integration/test_i_pipeline_yaml_load.py +0 -0
  145. {etlplus-0.11.12 → etlplus-0.12.2}/tests/integration/test_i_run.py +0 -0
  146. {etlplus-0.11.12 → etlplus-0.12.2}/tests/integration/test_i_run_profile_pagination_defaults.py +0 -0
  147. {etlplus-0.11.12 → etlplus-0.12.2}/tests/integration/test_i_run_profile_rate_limit_defaults.py +0 -0
  148. {etlplus-0.11.12 → etlplus-0.12.2}/tests/unit/api/conftest.py +0 -0
  149. {etlplus-0.11.12 → etlplus-0.12.2}/tests/unit/api/test_u_auth.py +0 -0
  150. {etlplus-0.11.12 → etlplus-0.12.2}/tests/unit/api/test_u_config.py +0 -0
  151. {etlplus-0.11.12 → etlplus-0.12.2}/tests/unit/api/test_u_endpoint_client.py +0 -0
  152. {etlplus-0.11.12 → etlplus-0.12.2}/tests/unit/api/test_u_mocks.py +0 -0
  153. {etlplus-0.11.12 → etlplus-0.12.2}/tests/unit/api/test_u_pagination_client.py +0 -0
  154. {etlplus-0.11.12 → etlplus-0.12.2}/tests/unit/api/test_u_pagination_config.py +0 -0
  155. {etlplus-0.11.12 → etlplus-0.12.2}/tests/unit/api/test_u_paginator.py +0 -0
  156. {etlplus-0.11.12 → etlplus-0.12.2}/tests/unit/api/test_u_rate_limit_config.py +0 -0
  157. {etlplus-0.11.12 → etlplus-0.12.2}/tests/unit/api/test_u_rate_limiter.py +0 -0
  158. {etlplus-0.11.12 → etlplus-0.12.2}/tests/unit/api/test_u_request_manager.py +0 -0
  159. {etlplus-0.11.12 → etlplus-0.12.2}/tests/unit/api/test_u_retry_manager.py +0 -0
  160. {etlplus-0.11.12 → etlplus-0.12.2}/tests/unit/api/test_u_transport.py +0 -0
  161. {etlplus-0.11.12 → etlplus-0.12.2}/tests/unit/api/test_u_types.py +0 -0
  162. {etlplus-0.11.12 → etlplus-0.12.2}/tests/unit/cli/conftest.py +0 -0
  163. {etlplus-0.11.12 → etlplus-0.12.2}/tests/unit/cli/test_u_cli_handlers.py +0 -0
  164. {etlplus-0.11.12 → etlplus-0.12.2}/tests/unit/cli/test_u_cli_io.py +0 -0
  165. {etlplus-0.11.12 → etlplus-0.12.2}/tests/unit/cli/test_u_cli_main.py +0 -0
  166. {etlplus-0.11.12 → etlplus-0.12.2}/tests/unit/cli/test_u_cli_state.py +0 -0
  167. {etlplus-0.11.12 → etlplus-0.12.2}/tests/unit/config/test_u_config_utils.py +0 -0
  168. {etlplus-0.11.12 → etlplus-0.12.2}/tests/unit/config/test_u_connector.py +0 -0
  169. {etlplus-0.11.12 → etlplus-0.12.2}/tests/unit/config/test_u_jobs.py +0 -0
  170. {etlplus-0.11.12 → etlplus-0.12.2}/tests/unit/config/test_u_pipeline.py +0 -0
  171. {etlplus-0.11.12 → etlplus-0.12.2}/tests/unit/conftest.py +0 -0
  172. {etlplus-0.11.12 → etlplus-0.12.2}/tests/unit/database/test_u_database_ddl.py +0 -0
  173. {etlplus-0.11.12 → etlplus-0.12.2}/tests/unit/database/test_u_database_engine.py +0 -0
  174. {etlplus-0.11.12 → etlplus-0.12.2}/tests/unit/database/test_u_database_orm.py +0 -0
  175. {etlplus-0.11.12 → etlplus-0.12.2}/tests/unit/database/test_u_database_schema.py +0 -0
  176. {etlplus-0.11.12 → etlplus-0.12.2}/tests/unit/file/test_u_file_enums.py +0 -0
  177. {etlplus-0.11.12 → etlplus-0.12.2}/tests/unit/file/test_u_file_yaml.py +0 -0
  178. {etlplus-0.11.12 → etlplus-0.12.2}/tests/unit/test_u_enums.py +0 -0
  179. {etlplus-0.11.12 → etlplus-0.12.2}/tests/unit/test_u_extract.py +0 -0
  180. {etlplus-0.11.12 → etlplus-0.12.2}/tests/unit/test_u_load.py +0 -0
  181. {etlplus-0.11.12 → etlplus-0.12.2}/tests/unit/test_u_main.py +0 -0
  182. {etlplus-0.11.12 → etlplus-0.12.2}/tests/unit/test_u_mixins.py +0 -0
  183. {etlplus-0.11.12 → etlplus-0.12.2}/tests/unit/test_u_run.py +0 -0
  184. {etlplus-0.11.12 → etlplus-0.12.2}/tests/unit/test_u_run_helpers.py +0 -0
  185. {etlplus-0.11.12 → etlplus-0.12.2}/tests/unit/test_u_transform.py +0 -0
  186. {etlplus-0.11.12 → etlplus-0.12.2}/tests/unit/test_u_utils.py +0 -0
  187. {etlplus-0.11.12 → etlplus-0.12.2}/tests/unit/test_u_validate.py +0 -0
  188. {etlplus-0.11.12 → etlplus-0.12.2}/tests/unit/test_u_version.py +0 -0
  189. {etlplus-0.11.12 → etlplus-0.12.2}/tests/unit/validation/test_u_validation_utils.py +0 -0
  190. {etlplus-0.11.12 → etlplus-0.12.2}/tools/update_demo_snippets.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: etlplus
3
- Version: 0.11.12
3
+ Version: 0.12.2
4
4
  Summary: A Swiss Army knife for simple ETL operations
5
5
  Home-page: https://github.com/Dagitali/ETLPlus
6
6
  Author: ETLPlus Team
@@ -17,8 +17,11 @@ Classifier: Programming Language :: Python :: 3.14
17
17
  Requires-Python: >=3.13,<3.15
18
18
  Description-Content-Type: text/markdown
19
19
  License-File: LICENSE
20
+ Requires-Dist: fastavro>=1.12.1
20
21
  Requires-Dist: jinja2>=3.1.6
22
+ Requires-Dist: openpyxl>=3.1.5
21
23
  Requires-Dist: pyodbc>=5.3.0
24
+ Requires-Dist: pyarrow>=22.0.0
22
25
  Requires-Dist: python-dotenv>=1.2.1
23
26
  Requires-Dist: pandas>=2.3.3
24
27
  Requires-Dist: pydantic>=2.12.5
@@ -26,6 +29,8 @@ Requires-Dist: PyYAML>=6.0.3
26
29
  Requires-Dist: requests>=2.32.5
27
30
  Requires-Dist: SQLAlchemy>=2.0.45
28
31
  Requires-Dist: typer>=0.21.0
32
+ Requires-Dist: xlrd>=2.0.2
33
+ Requires-Dist: xlwt>=1.3.0
29
34
  Provides-Extra: dev
30
35
  Requires-Dist: black>=25.9.0; extra == "dev"
31
36
  Requires-Dist: build>=1.2.2; extra == "dev"
@@ -0,0 +1,198 @@
1
+ """
2
+ :mod:`etlplus.file.avro` module.
3
+
4
+ Helpers for reading/writing Avro files.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from pathlib import Path
10
+ from typing import Any
11
+ from typing import cast
12
+
13
+ from ..types import JSONData
14
+ from ..types import JSONDict
15
+ from ..types import JSONList
16
+
17
+ # SECTION: EXPORTS ========================================================== #
18
+
19
+
20
+ __all__ = [
21
+ 'read',
22
+ 'write',
23
+ ]
24
+
25
+
26
+ # SECTION: INTERNAL CONSTANTS =============================================== #
27
+
28
+
29
+ _FASTAVRO_CACHE: dict[str, Any] = {}
30
+
31
+
32
+ _PRIMITIVE_TYPES: tuple[type, ...] = (
33
+ bool,
34
+ int,
35
+ float,
36
+ str,
37
+ bytes,
38
+ bytearray,
39
+ )
40
+
41
+
42
+ # SECTION: INTERNAL FUNCTIONS =============================================== #
43
+
44
+
45
+ def _get_fastavro() -> Any:
46
+ """
47
+ Return the fastavro module, importing it on first use.
48
+
49
+ Raises an informative ImportError if the optional dependency is missing.
50
+ """
51
+ mod = _FASTAVRO_CACHE.get('mod')
52
+ if mod is not None: # pragma: no cover - tiny branch
53
+ return mod
54
+ try:
55
+ _fastavro = __import__('fastavro') # type: ignore[assignment]
56
+ except ImportError as e: # pragma: no cover
57
+ raise ImportError(
58
+ 'AVRO support requires optional dependency "fastavro".\n'
59
+ 'Install with: pip install fastavro',
60
+ ) from e
61
+ _FASTAVRO_CACHE['mod'] = _fastavro
62
+
63
+ return _fastavro
64
+
65
+
66
+ def _normalize_records(data: JSONData) -> JSONList:
67
+ """
68
+ Normalize JSON payloads into a list of dictionaries.
69
+
70
+ Raises TypeError when payloads contain non-dict items.
71
+ """
72
+ if isinstance(data, list):
73
+ if not all(isinstance(item, dict) for item in data):
74
+ raise TypeError('AVRO payloads must contain only objects (dicts)')
75
+ return cast(JSONList, data)
76
+ return [cast(JSONDict, data)]
77
+
78
+
79
+ def _infer_value_type(value: object) -> str | list[str]:
80
+ """
81
+ Infer the Avro type for a primitive value.
82
+
83
+ Raises TypeError for unsupported types.
84
+ """
85
+ if value is None:
86
+ return 'null'
87
+ if isinstance(value, bool):
88
+ return 'boolean'
89
+ if isinstance(value, int):
90
+ return 'long'
91
+ if isinstance(value, float):
92
+ return 'double'
93
+ if isinstance(value, str):
94
+ return 'string'
95
+ if isinstance(value, (bytes, bytearray)):
96
+ return 'bytes'
97
+ raise TypeError('AVRO payloads must contain only primitive values')
98
+
99
+
100
+ def _merge_types(types: list[str]) -> str | list[str]:
101
+ """Return a stable Avro type union for a list of types."""
102
+ unique = list(dict.fromkeys(types))
103
+ if len(unique) == 1:
104
+ return unique[0]
105
+ ordered = ['null'] + sorted(t for t in unique if t != 'null')
106
+ return ordered
107
+
108
+
109
+ def _infer_schema(records: JSONList) -> dict[str, Any]:
110
+ """
111
+ Infer a basic Avro schema from record payloads.
112
+
113
+ Only primitive field values are supported; complex values raise TypeError.
114
+ """
115
+ field_names = sorted({key for record in records for key in record})
116
+ fields: list[dict[str, Any]] = []
117
+ for name in field_names:
118
+ types: list[str] = []
119
+ for record in records:
120
+ value = record.get(name)
121
+ if value is None:
122
+ types.append('null')
123
+ continue
124
+ if isinstance(value, dict | list):
125
+ raise TypeError(
126
+ 'AVRO payloads must contain only primitive values',
127
+ )
128
+ if not isinstance(value, _PRIMITIVE_TYPES):
129
+ raise TypeError(
130
+ 'AVRO payloads must contain only primitive values',
131
+ )
132
+ types.append(cast(str, _infer_value_type(value)))
133
+ fields.append({'name': name, 'type': _merge_types(types)})
134
+
135
+ return {
136
+ 'name': 'etlplus_record',
137
+ 'type': 'record',
138
+ 'fields': fields,
139
+ }
140
+
141
+
142
+ # SECTION: FUNCTIONS ======================================================== #
143
+
144
+
145
+ def read(
146
+ path: Path,
147
+ ) -> JSONList:
148
+ """
149
+ Read AVRO content from ``path``.
150
+
151
+ Parameters
152
+ ----------
153
+ path : Path
154
+ Path to the AVRO file on disk.
155
+
156
+ Returns
157
+ -------
158
+ JSONList
159
+ The list of dictionaries read from the AVRO file.
160
+ """
161
+ fastavro = _get_fastavro()
162
+ with path.open('rb') as handle:
163
+ reader = fastavro.reader(handle)
164
+ return [cast(JSONDict, record) for record in reader]
165
+
166
+
167
+ def write(
168
+ path: Path,
169
+ data: JSONData,
170
+ ) -> int:
171
+ """
172
+ Write ``data`` to AVRO at ``path`` and return record count.
173
+
174
+ Parameters
175
+ ----------
176
+ path : Path
177
+ Path to the AVRO file on disk.
178
+ data : JSONData
179
+ Data to write.
180
+
181
+ Returns
182
+ -------
183
+ int
184
+ Number of records written.
185
+ """
186
+ records = _normalize_records(data)
187
+ if not records:
188
+ return 0
189
+
190
+ fastavro = _get_fastavro()
191
+ schema = _infer_schema(records)
192
+ parsed_schema = fastavro.parse_schema(schema)
193
+
194
+ path.parent.mkdir(parents=True, exist_ok=True)
195
+ with path.open('wb') as handle:
196
+ fastavro.writer(handle, parsed_schema, records)
197
+
198
+ return len(records)
@@ -25,7 +25,7 @@ from . import xls
25
25
  from . import xlsx
26
26
  from . import xml
27
27
  from . import yaml
28
- from . import zip
28
+ from . import zip as zip_
29
29
  from .enums import FileFormat
30
30
  from .enums import infer_file_format_and_compression
31
31
 
@@ -222,7 +222,7 @@ class File:
222
222
  case FileFormat.YAML:
223
223
  return yaml.read(self.path)
224
224
  case FileFormat.ZIP:
225
- return zip.read(self.path)
225
+ return zip_.read(self.path)
226
226
  raise ValueError(f'Unsupported format: {fmt}')
227
227
 
228
228
  def write(
@@ -283,5 +283,5 @@ class File:
283
283
  case FileFormat.YAML:
284
284
  return yaml.write(self.path, data)
285
285
  case FileFormat.ZIP:
286
- return zip.write(self.path, data)
286
+ return zip_.write(self.path, data)
287
287
  raise ValueError(f'Unsupported format: {fmt}')
@@ -1,7 +1,7 @@
1
1
  """
2
2
  :mod:`etlplus.file.csv` module.
3
3
 
4
- CSV read/write helpers.
4
+ Helpers for reading/writing CSV files.
5
5
  """
6
6
 
7
7
  from __future__ import annotations
@@ -14,6 +14,15 @@ from ..types import JSONData
14
14
  from ..types import JSONDict
15
15
  from ..types import JSONList
16
16
 
17
+ # SECTION: EXPORTS ========================================================== #
18
+
19
+
20
+ __all__ = [
21
+ 'read',
22
+ 'write',
23
+ ]
24
+
25
+
17
26
  # SECTION: FUNCTIONS ======================================================== #
18
27
 
19
28
 
@@ -21,7 +30,7 @@ def read(
21
30
  path: Path,
22
31
  ) -> JSONList:
23
32
  """
24
- Load CSV content as a list of dictionaries.
33
+ Read CSV content from ``path``.
25
34
 
26
35
  Parameters
27
36
  ----------
@@ -48,7 +57,7 @@ def write(
48
57
  data: JSONData,
49
58
  ) -> int:
50
59
  """
51
- Write CSV rows to ``path`` and return the number of rows.
60
+ Write ``data`` to CSV at ``path`` and return record count.
52
61
 
53
62
  Parameters
54
63
  ----------
@@ -0,0 +1,144 @@
1
+ """
2
+ :mod:`etlplus.file.feather` module.
3
+
4
+ Helpers for reading/writing Feather files.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from pathlib import Path
10
+ from typing import Any
11
+ from typing import cast
12
+
13
+ from ..types import JSONData
14
+ from ..types import JSONDict
15
+ from ..types import JSONList
16
+
17
+ # SECTION: EXPORTS ========================================================== #
18
+
19
+
20
+ __all__ = [
21
+ 'read',
22
+ 'write',
23
+ ]
24
+
25
+
26
+ # SECTION: INTERNAL CONSTANTS =============================================== #
27
+
28
+
29
+ _PANDAS_CACHE: dict[str, Any] = {}
30
+
31
+
32
+ # SECTION: INTERNAL FUNCTIONS =============================================== #
33
+
34
+
35
+ def _get_pandas() -> Any:
36
+ """
37
+ Return the pandas module, importing it on first use.
38
+
39
+ Raises an informative ImportError if the optional dependency is missing.
40
+ """
41
+ mod = _PANDAS_CACHE.get('mod')
42
+ if mod is not None: # pragma: no cover - tiny branch
43
+ return mod
44
+ try:
45
+ _pd = __import__('pandas') # type: ignore[assignment]
46
+ except ImportError as e: # pragma: no cover
47
+ raise ImportError(
48
+ 'Feather support requires optional dependency "pandas".\n'
49
+ 'Install with: pip install pandas',
50
+ ) from e
51
+ _PANDAS_CACHE['mod'] = _pd
52
+
53
+ return _pd
54
+
55
+
56
+ def _normalize_records(data: JSONData) -> JSONList:
57
+ """
58
+ Normalize JSON payloads into a list of dictionaries.
59
+
60
+ Raises TypeError when payloads contain non-dict items.
61
+ """
62
+ if isinstance(data, list):
63
+ if not all(isinstance(item, dict) for item in data):
64
+ raise TypeError(
65
+ 'Feather payloads must contain only objects (dicts)',
66
+ )
67
+ return cast(JSONList, data)
68
+ return [cast(JSONDict, data)]
69
+
70
+
71
+ # SECTION: FUNCTIONS ======================================================== #
72
+
73
+
74
+ def read(
75
+ path: Path,
76
+ ) -> JSONList:
77
+ """
78
+ Read Feather content from ``path``.
79
+
80
+ Parameters
81
+ ----------
82
+ path : Path
83
+ Path to the Feather file on disk.
84
+
85
+ Returns
86
+ -------
87
+ JSONList
88
+ The list of dictionaries read from the Feather file.
89
+
90
+ Raises
91
+ ------
92
+ ImportError
93
+ When optional dependency "pyarrow" is missing.
94
+ """
95
+ pandas = _get_pandas()
96
+ try:
97
+ frame = pandas.read_feather(path)
98
+ except ImportError as e: # pragma: no cover
99
+ raise ImportError(
100
+ 'Feather support requires optional dependency "pyarrow".\n'
101
+ 'Install with: pip install pyarrow',
102
+ ) from e
103
+ return cast(JSONList, frame.to_dict(orient='records'))
104
+
105
+
106
+ def write(
107
+ path: Path,
108
+ data: JSONData,
109
+ ) -> int:
110
+ """
111
+ Write ``data`` to Feather at ``path`` and return record count.
112
+
113
+ Parameters
114
+ ----------
115
+ path : Path
116
+ Path to the Feather file on disk.
117
+ data : JSONData
118
+ Data to write.
119
+
120
+ Returns
121
+ -------
122
+ int
123
+ Number of records written.
124
+
125
+ Raises
126
+ ------
127
+ ImportError
128
+ When optional dependency "pyarrow" is missing.
129
+ """
130
+ records = _normalize_records(data)
131
+ if not records:
132
+ return 0
133
+
134
+ pandas = _get_pandas()
135
+ path.parent.mkdir(parents=True, exist_ok=True)
136
+ frame = pandas.DataFrame.from_records(records)
137
+ try:
138
+ frame.to_feather(path)
139
+ except ImportError as e: # pragma: no cover
140
+ raise ImportError(
141
+ 'Feather support requires optional dependency "pyarrow".\n'
142
+ 'Install with: pip install pyarrow',
143
+ ) from e
144
+ return len(records)
@@ -0,0 +1,123 @@
1
+ """
2
+ :mod:`etlplus.file.gz` module.
3
+
4
+ Helpers for reading/writing GZ files.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import gzip
10
+ import tempfile
11
+ from pathlib import Path
12
+
13
+ from ..types import JSONData
14
+ from .enums import CompressionFormat
15
+ from .enums import FileFormat
16
+ from .enums import infer_file_format_and_compression
17
+
18
+ # SECTION: EXPORTS ========================================================== #
19
+
20
+
21
+ __all__ = [
22
+ 'read',
23
+ 'write',
24
+ ]
25
+
26
+
27
+ # SECTION: INTERNAL FUNCTIONS =============================================== #
28
+
29
+
30
+ def _resolve_format(
31
+ path: Path,
32
+ ) -> FileFormat:
33
+ """
34
+ Resolve the inner file format from a .gz filename.
35
+
36
+ Parameters
37
+ ----------
38
+ path : Path
39
+ Path to the GZ file on disk.
40
+
41
+ Returns
42
+ -------
43
+ FileFormat
44
+ The inferred inner file format.
45
+
46
+ Raises
47
+ ------
48
+ ValueError
49
+ If the file format cannot be inferred from the filename.
50
+ """
51
+ fmt, compression = infer_file_format_and_compression(path)
52
+ if compression is not CompressionFormat.GZ:
53
+ raise ValueError(f'Not a gzip file: {path}')
54
+ if fmt is None:
55
+ raise ValueError(
56
+ f'Cannot infer file format from compressed file {path!r}',
57
+ )
58
+ return fmt
59
+
60
+
61
+ # SECTION: FUNCTIONS ======================================================== #
62
+
63
+
64
+ def read(
65
+ path: Path,
66
+ ) -> JSONData:
67
+ """
68
+ Read GZ content from ``path`` and parse the inner payload.
69
+
70
+ Parameters
71
+ ----------
72
+ path : Path
73
+ Path to the GZ file on disk.
74
+
75
+ Returns
76
+ -------
77
+ JSONData
78
+ Parsed payload.
79
+ """
80
+ fmt = _resolve_format(path)
81
+ with gzip.open(path, 'rb') as handle:
82
+ payload = handle.read()
83
+
84
+ with tempfile.TemporaryDirectory() as tmpdir:
85
+ tmp_path = Path(tmpdir) / f'payload.{fmt.value}'
86
+ tmp_path.write_bytes(payload)
87
+ from .core import File
88
+
89
+ return File(tmp_path, fmt).read()
90
+
91
+
92
+ def write(
93
+ path: Path,
94
+ data: JSONData,
95
+ ) -> int:
96
+ """
97
+ Write ``data`` to GZ at ``path`` and return record count.
98
+
99
+ Parameters
100
+ ----------
101
+ path : Path
102
+ Path to the GZ file on disk.
103
+ data : JSONData
104
+ Data to write.
105
+
106
+ Returns
107
+ -------
108
+ int
109
+ Number of records written.
110
+ """
111
+ fmt = _resolve_format(path)
112
+ with tempfile.TemporaryDirectory() as tmpdir:
113
+ tmp_path = Path(tmpdir) / f'payload.{fmt.value}'
114
+ from .core import File
115
+
116
+ count = File(tmp_path, fmt).write(data)
117
+ payload = tmp_path.read_bytes()
118
+
119
+ path.parent.mkdir(parents=True, exist_ok=True)
120
+ with gzip.open(path, 'wb') as handle:
121
+ handle.write(payload)
122
+
123
+ return count
@@ -1,7 +1,7 @@
1
1
  """
2
2
  :mod:`etlplus.file.json` module.
3
3
 
4
- JSON read/write helpers.
4
+ Helpers for reading/writing JSON files.
5
5
  """
6
6
 
7
7
  from __future__ import annotations
@@ -15,6 +15,15 @@ from ..types import JSONDict
15
15
  from ..types import JSONList
16
16
  from ..utils import count_records
17
17
 
18
+ # SECTION: EXPORTS ========================================================== #
19
+
20
+
21
+ __all__ = [
22
+ 'read',
23
+ 'write',
24
+ ]
25
+
26
+
18
27
  # SECTION: FUNCTIONS ======================================================== #
19
28
 
20
29
 
@@ -22,7 +31,9 @@ def read(
22
31
  path: Path,
23
32
  ) -> JSONData:
24
33
  """
25
- Load and validate JSON payloads from ``path``.
34
+ Read JSON content from ``path``.
35
+
36
+ Validates that the JSON root is a dict or a list of dicts.
26
37
 
27
38
  Parameters
28
39
  ----------
@@ -0,0 +1,109 @@
1
+ """
2
+ :mod:`etlplus.file.ndjson` module.
3
+
4
+ Helpers for reading/writing NDJSON files.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ from pathlib import Path
11
+ from typing import cast
12
+
13
+ from ..types import JSONData
14
+ from ..types import JSONDict
15
+ from ..types import JSONList
16
+ from ..utils import count_records
17
+
18
+ # SECTION: EXPORTS ========================================================== #
19
+
20
+
21
+ __all__ = [
22
+ 'read',
23
+ 'write',
24
+ ]
25
+
26
+
27
+ # SECTION: FUNCTIONS ======================================================== #
28
+
29
+
30
+ def read(
31
+ path: Path,
32
+ ) -> JSONList:
33
+ """
34
+ Read NDJSON content from ``path``.
35
+
36
+ Parameters
37
+ ----------
38
+ path : Path
39
+ Path to the NDJSON file on disk.
40
+
41
+ Returns
42
+ -------
43
+ JSONList
44
+ The list of dictionaries read from the NDJSON file.
45
+
46
+ Raises
47
+ ------
48
+ TypeError
49
+ If any line in the NDJSON file is not a JSON object (dict).
50
+ """
51
+ rows: JSONList = []
52
+ with path.open('r', encoding='utf-8') as handle:
53
+ for idx, line in enumerate(handle, start=1):
54
+ text = line.strip()
55
+ if not text:
56
+ continue
57
+ payload = json.loads(text)
58
+ if not isinstance(payload, dict):
59
+ raise TypeError(
60
+ f'NDJSON lines must be objects (dicts) (line {idx})',
61
+ )
62
+ rows.append(cast(JSONDict, payload))
63
+ return rows
64
+
65
+
66
+ def write(
67
+ path: Path,
68
+ data: JSONData,
69
+ ) -> int:
70
+ """
71
+ Write ``data`` to NDJSON at ``path``.
72
+
73
+ Parameters
74
+ ----------
75
+ path : Path
76
+ Path to the NDJSON file on disk.
77
+ data : JSONData
78
+ Data to write.
79
+
80
+ Returns
81
+ -------
82
+ int
83
+ Number of records written.
84
+
85
+ Raises
86
+ ------
87
+ TypeError
88
+ If ``data`` is a list containing non-dict items.
89
+ """
90
+ rows: JSONList
91
+ if isinstance(data, list):
92
+ if not all(isinstance(item, dict) for item in data):
93
+ raise TypeError(
94
+ 'NDJSON payloads must contain only objects (dicts)',
95
+ )
96
+ rows = cast(JSONList, data)
97
+ else:
98
+ rows = [cast(JSONDict, data)]
99
+
100
+ if not rows:
101
+ return 0
102
+
103
+ path.parent.mkdir(parents=True, exist_ok=True)
104
+ with path.open('w', encoding='utf-8') as handle:
105
+ for row in rows:
106
+ handle.write(json.dumps(row, ensure_ascii=False))
107
+ handle.write('\n')
108
+
109
+ return count_records(rows)