etlplus 0.11.8__tar.gz → 0.12.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (183) hide show
  1. {etlplus-0.11.8 → etlplus-0.12.4}/PKG-INFO +58 -14
  2. etlplus-0.11.8/etlplus.egg-info/PKG-INFO → etlplus-0.12.4/README.md +52 -58
  3. etlplus-0.12.4/SECURITY.md +15 -0
  4. etlplus-0.12.4/SUPPORT.md +18 -0
  5. etlplus-0.12.4/etlplus/README.md +37 -0
  6. {etlplus-0.11.8 → etlplus-0.12.4}/etlplus/api/README.md +20 -3
  7. etlplus-0.12.4/etlplus/cli/README.md +40 -0
  8. {etlplus-0.11.8 → etlplus-0.12.4}/etlplus/cli/handlers.py +1 -1
  9. etlplus-0.12.4/etlplus/config/README.md +52 -0
  10. etlplus-0.12.4/etlplus/database/README.md +48 -0
  11. {etlplus-0.11.8 → etlplus-0.12.4}/etlplus/database/ddl.py +1 -1
  12. etlplus-0.12.4/etlplus/file/README.md +105 -0
  13. etlplus-0.12.4/etlplus/file/_io.py +120 -0
  14. etlplus-0.12.4/etlplus/file/_pandas.py +58 -0
  15. etlplus-0.12.4/etlplus/file/avro.py +186 -0
  16. {etlplus-0.11.8 → etlplus-0.12.4}/etlplus/file/core.py +104 -45
  17. etlplus-0.12.4/etlplus/file/csv.py +67 -0
  18. etlplus-0.12.4/etlplus/file/feather.py +99 -0
  19. etlplus-0.12.4/etlplus/file/gz.py +123 -0
  20. {etlplus-0.11.8 → etlplus-0.12.4}/etlplus/file/json.py +13 -2
  21. etlplus-0.12.4/etlplus/file/ndjson.py +109 -0
  22. etlplus-0.12.4/etlplus/file/orc.py +99 -0
  23. etlplus-0.12.4/etlplus/file/parquet.py +101 -0
  24. etlplus-0.12.4/etlplus/file/tsv.py +67 -0
  25. etlplus-0.12.4/etlplus/file/txt.py +99 -0
  26. etlplus-0.12.4/etlplus/file/xls.py +88 -0
  27. etlplus-0.12.4/etlplus/file/xlsx.py +99 -0
  28. {etlplus-0.11.8 → etlplus-0.12.4}/etlplus/file/xml.py +12 -3
  29. {etlplus-0.11.8 → etlplus-0.12.4}/etlplus/file/yaml.py +13 -2
  30. etlplus-0.12.4/etlplus/file/zip.py +175 -0
  31. etlplus-0.12.4/etlplus/templates/README.md +46 -0
  32. etlplus-0.12.4/etlplus/validation/README.md +50 -0
  33. etlplus-0.11.8/README.md → etlplus-0.12.4/etlplus.egg-info/PKG-INFO +102 -13
  34. {etlplus-0.11.8 → etlplus-0.12.4}/etlplus.egg-info/SOURCES.txt +22 -0
  35. {etlplus-0.11.8 → etlplus-0.12.4}/etlplus.egg-info/requires.txt +5 -0
  36. {etlplus-0.11.8 → etlplus-0.12.4}/pyproject.toml +5 -0
  37. {etlplus-0.11.8 → etlplus-0.12.4}/setup.py +5 -0
  38. {etlplus-0.11.8 → etlplus-0.12.4}/tests/integration/test_i_examples_data_parity.py +2 -2
  39. etlplus-0.12.4/tests/unit/file/test_u_file_core.py +493 -0
  40. {etlplus-0.11.8 → etlplus-0.12.4}/tests/unit/file/test_u_file_enums.py +27 -18
  41. {etlplus-0.11.8 → etlplus-0.12.4}/tests/unit/file/test_u_file_yaml.py +7 -8
  42. etlplus-0.11.8/etlplus/file/csv.py +0 -82
  43. etlplus-0.11.8/tests/unit/file/test_u_file_core.py +0 -261
  44. {etlplus-0.11.8 → etlplus-0.12.4}/.coveragerc +0 -0
  45. {etlplus-0.11.8 → etlplus-0.12.4}/.editorconfig +0 -0
  46. {etlplus-0.11.8 → etlplus-0.12.4}/.gitattributes +0 -0
  47. {etlplus-0.11.8 → etlplus-0.12.4}/.github/actions/python-bootstrap/action.yml +0 -0
  48. {etlplus-0.11.8 → etlplus-0.12.4}/.github/workflows/ci.yml +0 -0
  49. {etlplus-0.11.8 → etlplus-0.12.4}/.gitignore +0 -0
  50. {etlplus-0.11.8 → etlplus-0.12.4}/.pre-commit-config.yaml +0 -0
  51. {etlplus-0.11.8 → etlplus-0.12.4}/.ruff.toml +0 -0
  52. {etlplus-0.11.8 → etlplus-0.12.4}/CODE_OF_CONDUCT.md +0 -0
  53. {etlplus-0.11.8 → etlplus-0.12.4}/CONTRIBUTING.md +0 -0
  54. {etlplus-0.11.8 → etlplus-0.12.4}/DEMO.md +0 -0
  55. {etlplus-0.11.8 → etlplus-0.12.4}/LICENSE +0 -0
  56. {etlplus-0.11.8 → etlplus-0.12.4}/MANIFEST.in +0 -0
  57. {etlplus-0.11.8 → etlplus-0.12.4}/Makefile +0 -0
  58. {etlplus-0.11.8 → etlplus-0.12.4}/REFERENCES.md +0 -0
  59. {etlplus-0.11.8 → etlplus-0.12.4}/docs/README.md +0 -0
  60. {etlplus-0.11.8 → etlplus-0.12.4}/docs/pipeline-guide.md +0 -0
  61. {etlplus-0.11.8 → etlplus-0.12.4}/docs/snippets/installation_version.md +0 -0
  62. {etlplus-0.11.8 → etlplus-0.12.4}/etlplus/__init__.py +0 -0
  63. {etlplus-0.11.8 → etlplus-0.12.4}/etlplus/__main__.py +0 -0
  64. {etlplus-0.11.8 → etlplus-0.12.4}/etlplus/__version__.py +0 -0
  65. {etlplus-0.11.8 → etlplus-0.12.4}/etlplus/api/__init__.py +0 -0
  66. {etlplus-0.11.8 → etlplus-0.12.4}/etlplus/api/auth.py +0 -0
  67. {etlplus-0.11.8 → etlplus-0.12.4}/etlplus/api/config.py +0 -0
  68. {etlplus-0.11.8 → etlplus-0.12.4}/etlplus/api/endpoint_client.py +0 -0
  69. {etlplus-0.11.8 → etlplus-0.12.4}/etlplus/api/errors.py +0 -0
  70. {etlplus-0.11.8 → etlplus-0.12.4}/etlplus/api/pagination/__init__.py +0 -0
  71. {etlplus-0.11.8 → etlplus-0.12.4}/etlplus/api/pagination/client.py +0 -0
  72. {etlplus-0.11.8 → etlplus-0.12.4}/etlplus/api/pagination/config.py +0 -0
  73. {etlplus-0.11.8 → etlplus-0.12.4}/etlplus/api/pagination/paginator.py +0 -0
  74. {etlplus-0.11.8 → etlplus-0.12.4}/etlplus/api/rate_limiting/__init__.py +0 -0
  75. {etlplus-0.11.8 → etlplus-0.12.4}/etlplus/api/rate_limiting/config.py +0 -0
  76. {etlplus-0.11.8 → etlplus-0.12.4}/etlplus/api/rate_limiting/rate_limiter.py +0 -0
  77. {etlplus-0.11.8 → etlplus-0.12.4}/etlplus/api/request_manager.py +0 -0
  78. {etlplus-0.11.8 → etlplus-0.12.4}/etlplus/api/retry_manager.py +0 -0
  79. {etlplus-0.11.8 → etlplus-0.12.4}/etlplus/api/transport.py +0 -0
  80. {etlplus-0.11.8 → etlplus-0.12.4}/etlplus/api/types.py +0 -0
  81. {etlplus-0.11.8 → etlplus-0.12.4}/etlplus/cli/__init__.py +0 -0
  82. {etlplus-0.11.8 → etlplus-0.12.4}/etlplus/cli/commands.py +0 -0
  83. {etlplus-0.11.8 → etlplus-0.12.4}/etlplus/cli/constants.py +0 -0
  84. {etlplus-0.11.8 → etlplus-0.12.4}/etlplus/cli/io.py +0 -0
  85. {etlplus-0.11.8 → etlplus-0.12.4}/etlplus/cli/main.py +0 -0
  86. {etlplus-0.11.8 → etlplus-0.12.4}/etlplus/cli/options.py +0 -0
  87. {etlplus-0.11.8 → etlplus-0.12.4}/etlplus/cli/state.py +0 -0
  88. {etlplus-0.11.8 → etlplus-0.12.4}/etlplus/cli/types.py +0 -0
  89. {etlplus-0.11.8 → etlplus-0.12.4}/etlplus/config/__init__.py +0 -0
  90. {etlplus-0.11.8 → etlplus-0.12.4}/etlplus/config/connector.py +0 -0
  91. {etlplus-0.11.8 → etlplus-0.12.4}/etlplus/config/jobs.py +0 -0
  92. {etlplus-0.11.8 → etlplus-0.12.4}/etlplus/config/pipeline.py +0 -0
  93. {etlplus-0.11.8 → etlplus-0.12.4}/etlplus/config/profile.py +0 -0
  94. {etlplus-0.11.8 → etlplus-0.12.4}/etlplus/config/types.py +0 -0
  95. {etlplus-0.11.8 → etlplus-0.12.4}/etlplus/config/utils.py +0 -0
  96. {etlplus-0.11.8 → etlplus-0.12.4}/etlplus/database/__init__.py +0 -0
  97. {etlplus-0.11.8 → etlplus-0.12.4}/etlplus/database/engine.py +0 -0
  98. {etlplus-0.11.8 → etlplus-0.12.4}/etlplus/database/orm.py +0 -0
  99. {etlplus-0.11.8 → etlplus-0.12.4}/etlplus/database/schema.py +0 -0
  100. {etlplus-0.11.8 → etlplus-0.12.4}/etlplus/database/types.py +0 -0
  101. {etlplus-0.11.8 → etlplus-0.12.4}/etlplus/enums.py +0 -0
  102. {etlplus-0.11.8 → etlplus-0.12.4}/etlplus/extract.py +0 -0
  103. {etlplus-0.11.8 → etlplus-0.12.4}/etlplus/file/__init__.py +0 -0
  104. {etlplus-0.11.8 → etlplus-0.12.4}/etlplus/file/enums.py +0 -0
  105. {etlplus-0.11.8 → etlplus-0.12.4}/etlplus/load.py +0 -0
  106. {etlplus-0.11.8 → etlplus-0.12.4}/etlplus/mixins.py +0 -0
  107. {etlplus-0.11.8 → etlplus-0.12.4}/etlplus/py.typed +0 -0
  108. {etlplus-0.11.8 → etlplus-0.12.4}/etlplus/run.py +0 -0
  109. {etlplus-0.11.8 → etlplus-0.12.4}/etlplus/run_helpers.py +0 -0
  110. {etlplus-0.11.8 → etlplus-0.12.4}/etlplus/templates/__init__.py +0 -0
  111. {etlplus-0.11.8 → etlplus-0.12.4}/etlplus/templates/ddl.sql.j2 +0 -0
  112. {etlplus-0.11.8 → etlplus-0.12.4}/etlplus/templates/view.sql.j2 +0 -0
  113. {etlplus-0.11.8 → etlplus-0.12.4}/etlplus/transform.py +0 -0
  114. {etlplus-0.11.8 → etlplus-0.12.4}/etlplus/types.py +0 -0
  115. {etlplus-0.11.8 → etlplus-0.12.4}/etlplus/utils.py +0 -0
  116. {etlplus-0.11.8 → etlplus-0.12.4}/etlplus/validate.py +0 -0
  117. {etlplus-0.11.8 → etlplus-0.12.4}/etlplus/validation/__init__.py +0 -0
  118. {etlplus-0.11.8 → etlplus-0.12.4}/etlplus/validation/utils.py +0 -0
  119. {etlplus-0.11.8 → etlplus-0.12.4}/etlplus.egg-info/dependency_links.txt +0 -0
  120. {etlplus-0.11.8 → etlplus-0.12.4}/etlplus.egg-info/entry_points.txt +0 -0
  121. {etlplus-0.11.8 → etlplus-0.12.4}/etlplus.egg-info/top_level.txt +0 -0
  122. {etlplus-0.11.8 → etlplus-0.12.4}/examples/README.md +0 -0
  123. {etlplus-0.11.8 → etlplus-0.12.4}/examples/configs/ddl_spec.yml +0 -0
  124. {etlplus-0.11.8 → etlplus-0.12.4}/examples/configs/pipeline.yml +0 -0
  125. {etlplus-0.11.8 → etlplus-0.12.4}/examples/data/sample.csv +0 -0
  126. {etlplus-0.11.8 → etlplus-0.12.4}/examples/data/sample.json +0 -0
  127. {etlplus-0.11.8 → etlplus-0.12.4}/examples/data/sample.xml +0 -0
  128. {etlplus-0.11.8 → etlplus-0.12.4}/examples/data/sample.xsd +0 -0
  129. {etlplus-0.11.8 → etlplus-0.12.4}/examples/data/sample.yaml +0 -0
  130. {etlplus-0.11.8 → etlplus-0.12.4}/examples/quickstart_python.py +0 -0
  131. {etlplus-0.11.8 → etlplus-0.12.4}/pytest.ini +0 -0
  132. {etlplus-0.11.8 → etlplus-0.12.4}/setup.cfg +0 -0
  133. {etlplus-0.11.8 → etlplus-0.12.4}/tests/__init__.py +0 -0
  134. {etlplus-0.11.8 → etlplus-0.12.4}/tests/conftest.py +0 -0
  135. {etlplus-0.11.8 → etlplus-0.12.4}/tests/integration/conftest.py +0 -0
  136. {etlplus-0.11.8 → etlplus-0.12.4}/tests/integration/test_i_cli.py +0 -0
  137. {etlplus-0.11.8 → etlplus-0.12.4}/tests/integration/test_i_pagination_strategy.py +0 -0
  138. {etlplus-0.11.8 → etlplus-0.12.4}/tests/integration/test_i_pipeline_smoke.py +0 -0
  139. {etlplus-0.11.8 → etlplus-0.12.4}/tests/integration/test_i_pipeline_yaml_load.py +0 -0
  140. {etlplus-0.11.8 → etlplus-0.12.4}/tests/integration/test_i_run.py +0 -0
  141. {etlplus-0.11.8 → etlplus-0.12.4}/tests/integration/test_i_run_profile_pagination_defaults.py +0 -0
  142. {etlplus-0.11.8 → etlplus-0.12.4}/tests/integration/test_i_run_profile_rate_limit_defaults.py +0 -0
  143. {etlplus-0.11.8 → etlplus-0.12.4}/tests/unit/api/conftest.py +0 -0
  144. {etlplus-0.11.8 → etlplus-0.12.4}/tests/unit/api/test_u_auth.py +0 -0
  145. {etlplus-0.11.8 → etlplus-0.12.4}/tests/unit/api/test_u_config.py +0 -0
  146. {etlplus-0.11.8 → etlplus-0.12.4}/tests/unit/api/test_u_endpoint_client.py +0 -0
  147. {etlplus-0.11.8 → etlplus-0.12.4}/tests/unit/api/test_u_mocks.py +0 -0
  148. {etlplus-0.11.8 → etlplus-0.12.4}/tests/unit/api/test_u_pagination_client.py +0 -0
  149. {etlplus-0.11.8 → etlplus-0.12.4}/tests/unit/api/test_u_pagination_config.py +0 -0
  150. {etlplus-0.11.8 → etlplus-0.12.4}/tests/unit/api/test_u_paginator.py +0 -0
  151. {etlplus-0.11.8 → etlplus-0.12.4}/tests/unit/api/test_u_rate_limit_config.py +0 -0
  152. {etlplus-0.11.8 → etlplus-0.12.4}/tests/unit/api/test_u_rate_limiter.py +0 -0
  153. {etlplus-0.11.8 → etlplus-0.12.4}/tests/unit/api/test_u_request_manager.py +0 -0
  154. {etlplus-0.11.8 → etlplus-0.12.4}/tests/unit/api/test_u_retry_manager.py +0 -0
  155. {etlplus-0.11.8 → etlplus-0.12.4}/tests/unit/api/test_u_transport.py +0 -0
  156. {etlplus-0.11.8 → etlplus-0.12.4}/tests/unit/api/test_u_types.py +0 -0
  157. {etlplus-0.11.8 → etlplus-0.12.4}/tests/unit/cli/conftest.py +0 -0
  158. {etlplus-0.11.8 → etlplus-0.12.4}/tests/unit/cli/test_u_cli_handlers.py +0 -0
  159. {etlplus-0.11.8 → etlplus-0.12.4}/tests/unit/cli/test_u_cli_io.py +0 -0
  160. {etlplus-0.11.8 → etlplus-0.12.4}/tests/unit/cli/test_u_cli_main.py +0 -0
  161. {etlplus-0.11.8 → etlplus-0.12.4}/tests/unit/cli/test_u_cli_state.py +0 -0
  162. {etlplus-0.11.8 → etlplus-0.12.4}/tests/unit/config/test_u_config_utils.py +0 -0
  163. {etlplus-0.11.8 → etlplus-0.12.4}/tests/unit/config/test_u_connector.py +0 -0
  164. {etlplus-0.11.8 → etlplus-0.12.4}/tests/unit/config/test_u_jobs.py +0 -0
  165. {etlplus-0.11.8 → etlplus-0.12.4}/tests/unit/config/test_u_pipeline.py +0 -0
  166. {etlplus-0.11.8 → etlplus-0.12.4}/tests/unit/conftest.py +0 -0
  167. {etlplus-0.11.8 → etlplus-0.12.4}/tests/unit/database/test_u_database_ddl.py +0 -0
  168. {etlplus-0.11.8 → etlplus-0.12.4}/tests/unit/database/test_u_database_engine.py +0 -0
  169. {etlplus-0.11.8 → etlplus-0.12.4}/tests/unit/database/test_u_database_orm.py +0 -0
  170. {etlplus-0.11.8 → etlplus-0.12.4}/tests/unit/database/test_u_database_schema.py +0 -0
  171. {etlplus-0.11.8 → etlplus-0.12.4}/tests/unit/test_u_enums.py +0 -0
  172. {etlplus-0.11.8 → etlplus-0.12.4}/tests/unit/test_u_extract.py +0 -0
  173. {etlplus-0.11.8 → etlplus-0.12.4}/tests/unit/test_u_load.py +0 -0
  174. {etlplus-0.11.8 → etlplus-0.12.4}/tests/unit/test_u_main.py +0 -0
  175. {etlplus-0.11.8 → etlplus-0.12.4}/tests/unit/test_u_mixins.py +0 -0
  176. {etlplus-0.11.8 → etlplus-0.12.4}/tests/unit/test_u_run.py +0 -0
  177. {etlplus-0.11.8 → etlplus-0.12.4}/tests/unit/test_u_run_helpers.py +0 -0
  178. {etlplus-0.11.8 → etlplus-0.12.4}/tests/unit/test_u_transform.py +0 -0
  179. {etlplus-0.11.8 → etlplus-0.12.4}/tests/unit/test_u_utils.py +0 -0
  180. {etlplus-0.11.8 → etlplus-0.12.4}/tests/unit/test_u_validate.py +0 -0
  181. {etlplus-0.11.8 → etlplus-0.12.4}/tests/unit/test_u_version.py +0 -0
  182. {etlplus-0.11.8 → etlplus-0.12.4}/tests/unit/validation/test_u_validation_utils.py +0 -0
  183. {etlplus-0.11.8 → etlplus-0.12.4}/tools/update_demo_snippets.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: etlplus
3
- Version: 0.11.8
3
+ Version: 0.12.4
4
4
  Summary: A Swiss Army knife for simple ETL operations
5
5
  Home-page: https://github.com/Dagitali/ETLPlus
6
6
  Author: ETLPlus Team
@@ -17,8 +17,11 @@ Classifier: Programming Language :: Python :: 3.14
17
17
  Requires-Python: >=3.13,<3.15
18
18
  Description-Content-Type: text/markdown
19
19
  License-File: LICENSE
20
+ Requires-Dist: fastavro>=1.12.1
20
21
  Requires-Dist: jinja2>=3.1.6
22
+ Requires-Dist: openpyxl>=3.1.5
21
23
  Requires-Dist: pyodbc>=5.3.0
24
+ Requires-Dist: pyarrow>=22.0.0
22
25
  Requires-Dist: python-dotenv>=1.2.1
23
26
  Requires-Dist: pandas>=2.3.3
24
27
  Requires-Dist: pydantic>=2.12.5
@@ -26,6 +29,8 @@ Requires-Dist: PyYAML>=6.0.3
26
29
  Requires-Dist: requests>=2.32.5
27
30
  Requires-Dist: SQLAlchemy>=2.0.45
28
31
  Requires-Dist: typer>=0.21.0
32
+ Requires-Dist: xlrd>=2.0.2
33
+ Requires-Dist: xlwt>=1.3.0
29
34
  Provides-Extra: dev
30
35
  Requires-Dist: black>=25.9.0; extra == "dev"
31
36
  Requires-Dist: build>=1.2.2; extra == "dev"
@@ -59,6 +64,7 @@ ETLPlus is a veritable Swiss Army knife for enabling simple ETL operations, offe
59
64
  package and command-line interface for data extraction, validation, transformation, and loading.
60
65
 
61
66
  - [ETLPlus](#etlplus)
67
+ - [Getting Started](#getting-started)
62
68
  - [Features](#features)
63
69
  - [Installation](#installation)
64
70
  - [Quickstart](#quickstart)
@@ -87,11 +93,27 @@ package and command-line interface for data extraction, validation, transformati
87
93
  - [Linting](#linting)
88
94
  - [Updating Demo Snippets](#updating-demo-snippets)
89
95
  - [Releasing to PyPI](#releasing-to-pypi)
90
- - [Links](#links)
91
96
  - [License](#license)
92
97
  - [Contributing](#contributing)
98
+ - [Documentation](#documentation)
99
+ - [Python Packages/Subpackage](#python-packagessubpackage)
100
+ - [Community Health](#community-health)
101
+ - [Other](#other)
93
102
  - [Acknowledgments](#acknowledgments)
94
103
 
104
+ ## Getting Started
105
+
106
+ ETLPlus helps you extract, validate, transform, and load data from files, databases, and APIs, either
107
+ as a Python library or from the command line.
108
+
109
+ To get started:
110
+
111
+ - See [Installation](#installation) for setup instructions.
112
+ - Try the [Quickstart](#quickstart) for a minimal working example (CLI and Python).
113
+ - Explore [Usage](#usage) for more detailed options and workflows.
114
+
115
+ ETLPlus supports Python 3.13 and above.
116
+
95
117
  ## Features
96
118
 
97
119
  - **Check** data pipeline definitions before running them:
@@ -416,7 +438,7 @@ etlplus transform \
416
438
  # 3. Validate transformed data
417
439
  etlplus validate \
418
440
  --rules '{"name": {"type": "string", "required": true}, "email": {"type": "string", "required": true}}' \
419
- temo/sample_transformed.json
441
+ temp/sample_transformed.json
420
442
 
421
443
  # 4. Load to CSV
422
444
  cat temp/sample_transformed.json \
@@ -603,17 +625,6 @@ git push origin v1.4.0
603
625
  If you want an extra smoke-test before tagging, run `make dist && pip install dist/*.whl` locally;
604
626
  this exercises the same build path the workflow uses.
605
627
 
606
- ## Links
607
-
608
- - API client docs: [`etlplus/api/README.md`](etlplus/api/README.md)
609
- - Examples: [`examples/README.md`](examples/README.md)
610
- - Pipeline authoring guide: [`docs/pipeline-guide.md`](docs/pipeline-guide.md)
611
- - Runner internals: [`docs/run-module.md`](docs/run-module.md)
612
- - Design notes (Mapping inputs, dict outputs): [`docs/pipeline-guide.md#design-notes-mapping-inputs-dict-outputs`](docs/pipeline-guide.md#design-notes-mapping-inputs-dict-outputs)
613
- - Typing philosophy: [`CONTRIBUTING.md#typing-philosophy`](CONTRIBUTING.md#typing-philosophy)
614
- - Demo and walkthrough: [`DEMO.md`](DEMO.md)
615
- - Additional references: [`REFERENCES.md`](`REFERENCES.md)
616
-
617
628
  ## License
618
629
 
619
630
  This project is licensed under the [MIT License](LICENSE).
@@ -637,6 +648,39 @@ If you choose to be a code contributor, please first refer these documents:
637
648
  - Typing philosophy (TypedDicts as editor hints, permissive runtime):
638
649
  [`CONTRIBUTING.md#typing-philosophy`](CONTRIBUTING.md#typing-philosophy)
639
650
 
651
+ ## Documentation
652
+
653
+ ### Python Packages/Subpackage
654
+
655
+ Navigate to detailed documentation for each subpackage:
656
+
657
+ - [etlplus.api](etlplus/api/README.md): Lightweight HTTP client and paginated REST helpers
658
+ - [etlplus.file](etlplus/file/README.md): Unified file format support and helpers
659
+ - [etlplus.config](etlplus/config/README.md): Configuration helpers for connectors, pipelines, jobs,
660
+ and profiles
661
+ - [etlplus.cli](etlplus/cli/README.md): Command-line interface for ETLPlus workflows
662
+ - [etlplus.database](etlplus/database/README.md): Database engine, schema, and ORM helpers
663
+ - [etlplus.templates](etlplus/templates/README.md): SQL and DDL template helpers
664
+ - [etlplus.validation](etlplus/validation/README.md): Data validation utilities and helpers
665
+
666
+ ### Community Health
667
+
668
+ - [Contributing Guidelines](CONTRIBUTING.md): How to contribute, report issues, and submit PRs
669
+ - [Code of Conduct](CODE_OF_CONDUCT.md): Community standards and expectations
670
+ - [Security Policy](SECURITY.md): Responsible disclosure and vulnerability reporting
671
+ - [Support](SUPPORT.md): Where to get help
672
+
673
+ ### Other
674
+
675
+ - API client docs: [`etlplus/api/README.md`](etlplus/api/README.md)
676
+ - Examples: [`examples/README.md`](examples/README.md)
677
+ - Pipeline authoring guide: [`docs/pipeline-guide.md`](docs/pipeline-guide.md)
678
+ - Runner internals: [`docs/run-module.md`](docs/run-module.md)
679
+ - Design notes (Mapping inputs, dict outputs): [`docs/pipeline-guide.md#design-notes-mapping-inputs-dict-outputs`](docs/pipeline-guide.md#design-notes-mapping-inputs-dict-outputs)
680
+ - Typing philosophy: [`CONTRIBUTING.md#typing-philosophy`](CONTRIBUTING.md#typing-philosophy)
681
+ - Demo and walkthrough: [`DEMO.md`](DEMO.md)
682
+ - Additional references: [`REFERENCES.md`](REFERENCES.md)
683
+
640
684
  ## Acknowledgments
641
685
 
642
686
  ETLPlus is inspired by common work patterns in data engineering and software engineering patterns in
@@ -1,48 +1,3 @@
1
- Metadata-Version: 2.4
2
- Name: etlplus
3
- Version: 0.11.8
4
- Summary: A Swiss Army knife for simple ETL operations
5
- Home-page: https://github.com/Dagitali/ETLPlus
6
- Author: ETLPlus Team
7
- License: MIT
8
- Project-URL: Homepage, https://github.com/Dagitali/ETLPlus
9
- Project-URL: Repository, https://github.com/Dagitali/ETLPlus
10
- Classifier: Development Status :: 3 - Alpha
11
- Classifier: Intended Audience :: Developers
12
- Classifier: Topic :: Software Development :: Libraries :: Python Modules
13
- Classifier: License :: OSI Approved :: MIT License
14
- Classifier: Programming Language :: Python :: 3
15
- Classifier: Programming Language :: Python :: 3.13
16
- Classifier: Programming Language :: Python :: 3.14
17
- Requires-Python: >=3.13,<3.15
18
- Description-Content-Type: text/markdown
19
- License-File: LICENSE
20
- Requires-Dist: jinja2>=3.1.6
21
- Requires-Dist: pyodbc>=5.3.0
22
- Requires-Dist: python-dotenv>=1.2.1
23
- Requires-Dist: pandas>=2.3.3
24
- Requires-Dist: pydantic>=2.12.5
25
- Requires-Dist: PyYAML>=6.0.3
26
- Requires-Dist: requests>=2.32.5
27
- Requires-Dist: SQLAlchemy>=2.0.45
28
- Requires-Dist: typer>=0.21.0
29
- Provides-Extra: dev
30
- Requires-Dist: black>=25.9.0; extra == "dev"
31
- Requires-Dist: build>=1.2.2; extra == "dev"
32
- Requires-Dist: flake8>=7.3.0; extra == "dev"
33
- Requires-Dist: PyYAML>=6.0.3; extra == "dev"
34
- Requires-Dist: pydoclint>=0.8.1; extra == "dev"
35
- Requires-Dist: pydocstyle>=6.3.0; extra == "dev"
36
- Requires-Dist: pytest>=8.4.2; extra == "dev"
37
- Requires-Dist: pytest-cov>=7.0.0; extra == "dev"
38
- Requires-Dist: ruff>=0.14.4; extra == "dev"
39
- Provides-Extra: docs
40
- Requires-Dist: sphinx>=4.0.0; extra == "docs"
41
- Requires-Dist: sphinx-rtd-theme>=1.0.0; extra == "docs"
42
- Dynamic: home-page
43
- Dynamic: license-file
44
- Dynamic: requires-python
45
-
46
1
  # ETLPlus
47
2
 
48
3
  [![PyPI](https://img.shields.io/pypi/v/etlplus.svg)][PyPI package]
@@ -59,6 +14,7 @@ ETLPlus is a veritable Swiss Army knife for enabling simple ETL operations, offe
59
14
  package and command-line interface for data extraction, validation, transformation, and loading.
60
15
 
61
16
  - [ETLPlus](#etlplus)
17
+ - [Getting Started](#getting-started)
62
18
  - [Features](#features)
63
19
  - [Installation](#installation)
64
20
  - [Quickstart](#quickstart)
@@ -87,11 +43,27 @@ package and command-line interface for data extraction, validation, transformati
87
43
  - [Linting](#linting)
88
44
  - [Updating Demo Snippets](#updating-demo-snippets)
89
45
  - [Releasing to PyPI](#releasing-to-pypi)
90
- - [Links](#links)
91
46
  - [License](#license)
92
47
  - [Contributing](#contributing)
48
+ - [Documentation](#documentation)
49
+ - [Python Packages/Subpackage](#python-packagessubpackage)
50
+ - [Community Health](#community-health)
51
+ - [Other](#other)
93
52
  - [Acknowledgments](#acknowledgments)
94
53
 
54
+ ## Getting Started
55
+
56
+ ETLPlus helps you extract, validate, transform, and load data from files, databases, and APIs, either
57
+ as a Python library or from the command line.
58
+
59
+ To get started:
60
+
61
+ - See [Installation](#installation) for setup instructions.
62
+ - Try the [Quickstart](#quickstart) for a minimal working example (CLI and Python).
63
+ - Explore [Usage](#usage) for more detailed options and workflows.
64
+
65
+ ETLPlus supports Python 3.13 and above.
66
+
95
67
  ## Features
96
68
 
97
69
  - **Check** data pipeline definitions before running them:
@@ -416,7 +388,7 @@ etlplus transform \
416
388
  # 3. Validate transformed data
417
389
  etlplus validate \
418
390
  --rules '{"name": {"type": "string", "required": true}, "email": {"type": "string", "required": true}}' \
419
- temo/sample_transformed.json
391
+ temp/sample_transformed.json
420
392
 
421
393
  # 4. Load to CSV
422
394
  cat temp/sample_transformed.json \
@@ -603,17 +575,6 @@ git push origin v1.4.0
603
575
  If you want an extra smoke-test before tagging, run `make dist && pip install dist/*.whl` locally;
604
576
  this exercises the same build path the workflow uses.
605
577
 
606
- ## Links
607
-
608
- - API client docs: [`etlplus/api/README.md`](etlplus/api/README.md)
609
- - Examples: [`examples/README.md`](examples/README.md)
610
- - Pipeline authoring guide: [`docs/pipeline-guide.md`](docs/pipeline-guide.md)
611
- - Runner internals: [`docs/run-module.md`](docs/run-module.md)
612
- - Design notes (Mapping inputs, dict outputs): [`docs/pipeline-guide.md#design-notes-mapping-inputs-dict-outputs`](docs/pipeline-guide.md#design-notes-mapping-inputs-dict-outputs)
613
- - Typing philosophy: [`CONTRIBUTING.md#typing-philosophy`](CONTRIBUTING.md#typing-philosophy)
614
- - Demo and walkthrough: [`DEMO.md`](DEMO.md)
615
- - Additional references: [`REFERENCES.md`](`REFERENCES.md)
616
-
617
578
  ## License
618
579
 
619
580
  This project is licensed under the [MIT License](LICENSE).
@@ -637,6 +598,39 @@ If you choose to be a code contributor, please first refer these documents:
637
598
  - Typing philosophy (TypedDicts as editor hints, permissive runtime):
638
599
  [`CONTRIBUTING.md#typing-philosophy`](CONTRIBUTING.md#typing-philosophy)
639
600
 
601
+ ## Documentation
602
+
603
+ ### Python Packages/Subpackage
604
+
605
+ Navigate to detailed documentation for each subpackage:
606
+
607
+ - [etlplus.api](etlplus/api/README.md): Lightweight HTTP client and paginated REST helpers
608
+ - [etlplus.file](etlplus/file/README.md): Unified file format support and helpers
609
+ - [etlplus.config](etlplus/config/README.md): Configuration helpers for connectors, pipelines, jobs,
610
+ and profiles
611
+ - [etlplus.cli](etlplus/cli/README.md): Command-line interface for ETLPlus workflows
612
+ - [etlplus.database](etlplus/database/README.md): Database engine, schema, and ORM helpers
613
+ - [etlplus.templates](etlplus/templates/README.md): SQL and DDL template helpers
614
+ - [etlplus.validation](etlplus/validation/README.md): Data validation utilities and helpers
615
+
616
+ ### Community Health
617
+
618
+ - [Contributing Guidelines](CONTRIBUTING.md): How to contribute, report issues, and submit PRs
619
+ - [Code of Conduct](CODE_OF_CONDUCT.md): Community standards and expectations
620
+ - [Security Policy](SECURITY.md): Responsible disclosure and vulnerability reporting
621
+ - [Support](SUPPORT.md): Where to get help
622
+
623
+ ### Other
624
+
625
+ - API client docs: [`etlplus/api/README.md`](etlplus/api/README.md)
626
+ - Examples: [`examples/README.md`](examples/README.md)
627
+ - Pipeline authoring guide: [`docs/pipeline-guide.md`](docs/pipeline-guide.md)
628
+ - Runner internals: [`docs/run-module.md`](docs/run-module.md)
629
+ - Design notes (Mapping inputs, dict outputs): [`docs/pipeline-guide.md#design-notes-mapping-inputs-dict-outputs`](docs/pipeline-guide.md#design-notes-mapping-inputs-dict-outputs)
630
+ - Typing philosophy: [`CONTRIBUTING.md#typing-philosophy`](CONTRIBUTING.md#typing-philosophy)
631
+ - Demo and walkthrough: [`DEMO.md`](DEMO.md)
632
+ - Additional references: [`REFERENCES.md`](REFERENCES.md)
633
+
640
634
  ## Acknowledgments
641
635
 
642
636
  ETLPlus is inspired by common work patterns in data engineering and software engineering patterns in
@@ -0,0 +1,15 @@
1
+ # Security Policy
2
+
3
+ ## Reporting a Vulnerability
4
+
5
+ If you discover a security vulnerability in ETLPlus, please report it responsibly:
6
+
7
+ - **Do not** open a public issue for security problems.
8
+ - Email the maintainers at security@dagitali.com (or use a private contact method listed in the
9
+ repository).
10
+ - Include as much detail as possible: affected versions, steps to reproduce, and any relevant logs
11
+ or code.
12
+ - We will acknowledge your report within 3 business days and work with you to resolve the issue
13
+ promptly.
14
+
15
+ Thank you for helping keep ETLPlus and its users safe!
@@ -0,0 +1,18 @@
1
+ # Support
2
+
3
+ Thank you for using ETLPlus!
4
+
5
+ ## Where to Get Help
6
+
7
+ - **Questions & Usage**: Please use [GitHub Discussions][discussions] for general questions, usage
8
+ help, and best practices.
9
+ - **Bugs & Feature Requests**: Open an issue in the [GitHub Issues][issues] tracker.
10
+ - **Security Issues**: See [SECURITY.md](SECURITY.md) for responsible disclosure.
11
+ - **Documentation**: See the [README](README.md) and [docs/](docs/) directory for guides and
12
+ references.
13
+
14
+ We aim to respond to questions and issues as quickly as possible. Community contributions and peer
15
+ support are also welcome!
16
+
17
+ [discussions]: https://github.com/Dagitali/ETLPlus/discussions
18
+ [issues]: https://github.com/Dagitali/ETLPlus/issues
@@ -0,0 +1,37 @@
1
+ # etlplus package
2
+
3
+ The `etlplus` package provides a unified Python API and CLI for ETL operations: extraction,
4
+ validation, transformation, and loading of data from files, APIs, and databases.
5
+
6
+ - Top-level entry points for extract, validate, transform, and load
7
+ - Utilities for pipeline orchestration and helpers
8
+ - Exposes all subpackages for advanced usage
9
+
10
+ Back to project overview: see the top-level [README](../README.md).
11
+
12
+ ## Subpackages
13
+
14
+ - [etlplus.api](api/README.md): Lightweight HTTP client and paginated REST helpers
15
+ - [etlplus.file](file/README.md): Unified file format support and helpers
16
+ - [etlplus.config](config/README.md): Configuration helpers for connectors, pipelines, jobs, and
17
+ profiles
18
+ - [etlplus.cli](cli/README.md): Command-line interface for ETLPlus workflows
19
+ - [etlplus.database](database/README.md): Database engine, schema, and ORM helpers
20
+ - [etlplus.templates](templates/README.md): SQL and DDL template helpers
21
+ - [etlplus.validation](validation/README.md): Data validation utilities and helpers
22
+
23
+ ## Quickstart
24
+
25
+ ```python
26
+ from etlplus import extract, validate, transform, load
27
+
28
+ data = extract("file", "input.csv")
29
+ filtered = transform(data, {"filter": {"field": "age", "op": "gt", "value": 25}})
30
+ assert validate(filtered, {"age": {"type": "number", "min": 0}})["valid"]
31
+ load(filtered, "file", "output.json", file_format="json")
32
+ ```
33
+
34
+ ## See Also
35
+
36
+ - [Top-level project README](../README.md)
37
+ - [API reference](../docs/README.md)
@@ -1,7 +1,7 @@
1
- # etlplus.api module.
1
+ # etlplus.api subpackage
2
2
 
3
- Focused documentation for the `etlplus.api` subpackage: a lightweight HTTP client and helpers for
4
- paginated REST endpoints.
3
+ Documentation for the `etlplus.api` subpackage: a lightweight HTTP client and helpers for paginated
4
+ REST endpoints.
5
5
 
6
6
  - Provides a small `EndpointClient` for calling JSON APIs
7
7
  - Supports page-, offset-, and cursor-based pagination via `PaginationConfig`
@@ -12,6 +12,20 @@ paginated REST endpoints.
12
12
 
13
13
  Back to project overview: see the top-level [README](../../README.md).
14
14
 
15
+ - [etlplus.api subpackage](#etlplusapi-subpackage)
16
+ - [Installation](#installation)
17
+ - [Quickstart](#quickstart)
18
+ - [Overriding Rate Limits Per Call](#overriding-rate-limits-per-call)
19
+ - [Choosing `records_path` and `cursor_path`](#choosing-records_path-and-cursor_path)
20
+ - [Cursor-Based Pagination Example](#cursor-based-pagination-example)
21
+ - [Offset-based pagination example](#offset-based-pagination-example)
22
+ - [Authentication](#authentication)
23
+ - [Errors and Rate Limiting](#errors-and-rate-limiting)
24
+ - [Types and Transport](#types-and-transport)
25
+ - [Supporting Modules](#supporting-modules)
26
+ - [Minimal Contract](#minimal-contract)
27
+ - [See also](#see-also)
28
+
15
29
  ## Installation
16
30
 
17
31
  `etlplus.api` ships as part of the `etlplus` package. Install the package as usual:
@@ -233,3 +247,6 @@ providers can fall back to their own defaults. If you already possess a static t
233
247
  ## See also
234
248
 
235
249
  - Top-level CLI and library usage in the main [README](../../README.md)
250
+
251
+
252
+ [def]: #installation
@@ -0,0 +1,40 @@
1
+ # etlplus.cli subpackage
2
+
3
+ Documentation for the `etlplus.cli` subpackage: command-line interface for ETLPlus workflows.
4
+
5
+ - Provides a CLI for running ETL pipelines, jobs, and utilities
6
+ - Supports commands for running, validating, and inspecting pipelines
7
+ - Includes options for configuration, state, and output control
8
+ - Exposes handlers for custom command integration
9
+
10
+ Back to project overview: see the top-level [README](../../README.md).
11
+
12
+ - [etlplus.cli subpackage](#etlpluscli-subpackage)
13
+ - [Available Commands](#available-commands)
14
+ - [Command Options](#command-options)
15
+ - [Example: Running a Pipeline](#example-running-a-pipeline)
16
+ - [See Also](#see-also)
17
+
18
+ ## Available Commands
19
+
20
+ - **run**: Execute a pipeline or job
21
+ - **validate**: Validate pipeline or config files
22
+ - **inspect**: Show pipeline/job details
23
+
24
+ ## Command Options
25
+
26
+ - `--config`: Path to config file
27
+ - `--state`: Path to state file
28
+ - `--output`: Output file or format
29
+
30
+ ## Example: Running a Pipeline
31
+
32
+ ```bash
33
+ etlplus run --config configs/pipeline.yml --output results.json
34
+ ```
35
+
36
+ ## See Also
37
+
38
+ - Top-level CLI and library usage in the main [README](../../README.md)
39
+ - Command handlers in [handlers.py](handlers.py)
40
+ - Command options in [options.py](options.py)
@@ -570,7 +570,7 @@ def transform_handler(
570
570
  data = transform(payload, cast(TransformOperations, operations_payload))
571
571
 
572
572
  if target and target != '-':
573
- File.from_path(target, file_format=target_format).write(data)
573
+ File(target, file_format=target_format).write(data)
574
574
  print(f'Data transformed and saved to {target}')
575
575
  return 0
576
576
 
@@ -0,0 +1,52 @@
1
+ # etlplus.config subpackage
2
+
3
+ Documentation for the `etlplus.config` subpackage: configuration helpers for connectors, pipelines,
4
+ jobs, and profiles.
5
+
6
+ - Provides classes and utilities for managing ETL pipeline configuration
7
+ - Supports YAML/JSON config loading and validation
8
+ - Includes helpers for connectors, jobs, pipelines, and profiles
9
+ - Exposes type definitions for config schemas
10
+
11
+ Back to project overview: see the top-level [README](../../README.md).
12
+
13
+ - [etlplus.config subpackage](#etlplusconfig-subpackage)
14
+ - [Supported Configuration Types](#supported-configuration-types)
15
+ - [Loading and Validating Configs](#loading-and-validating-configs)
16
+ - [Example: Loading a Pipeline Config](#example-loading-a-pipeline-config)
17
+ - [See Also](#see-also)
18
+
19
+ ## Supported Configuration Types
20
+
21
+ - **Connector**: Connection details for databases, files, or APIs
22
+ - **Job**: ETL job definitions and scheduling
23
+ - **Pipeline**: End-to-end pipeline configuration
24
+ - **Profile**: User or environment-specific settings
25
+
26
+ ## Loading and Validating Configs
27
+
28
+ Use the provided classes to load and validate configuration files:
29
+
30
+ ```python
31
+ from etlplus.config import PipelineConfig
32
+
33
+ cfg = PipelineConfig.from_yaml("pipeline.yml")
34
+ ```
35
+
36
+ - Supports YAML and JSON formats
37
+ - Validates against expected schema
38
+
39
+ ## Example: Loading a Pipeline Config
40
+
41
+ ```python
42
+ from etlplus.config import PipelineConfig
43
+
44
+ pipeline = PipelineConfig.from_yaml("configs/pipeline.yml")
45
+ print(pipeline)
46
+ ```
47
+
48
+ ## See Also
49
+
50
+ - Top-level CLI and library usage in the main [README](../../README.md)
51
+ - Config type definitions in [types.py](types.py)
52
+ - Config utilities in [utils.py](utils.py)
@@ -0,0 +1,48 @@
1
+ # etlplus.database subpackage
2
+
3
+ Documentation for the `etlplus.database` subpackage: database engine, schema, and ORM helpers.
4
+
5
+ - Provides database engine and connection management
6
+ - Supports schema definition and DDL generation
7
+ - Includes lightweight ORM utilities for tabular data
8
+ - Exposes type definitions for database objects
9
+
10
+ Back to project overview: see the top-level [README](../../README.md).
11
+
12
+ - [etlplus.database subpackage](#etlplusdatabase-subpackage)
13
+ - [Database Engine and Connections](#database-engine-and-connections)
14
+ - [Schema and DDL Helpers](#schema-and-ddl-helpers)
15
+ - [ORM Utilities](#orm-utilities)
16
+ - [Example: Creating a Table](#example-creating-a-table)
17
+ - [See Also](#see-also)
18
+
19
+ ## Database Engine and Connections
20
+
21
+ - Manage connections to supported databases
22
+ - Configure engines for different backends
23
+
24
+ ## Schema and DDL Helpers
25
+
26
+ - Define table schemas and columns
27
+ - Generate DDL statements for supported databases
28
+
29
+ ## ORM Utilities
30
+
31
+ - Map rows to Python objects
32
+ - Simple CRUD helpers for tabular data
33
+
34
+ ## Example: Creating a Table
35
+
36
+ ```python
37
+ from etlplus.database import Schema, Engine
38
+
39
+ engine = Engine.connect("sqlite:///example.db")
40
+ schema = Schema.from_dict({"name": "users", "columns": [ ... ]})
41
+ engine.create_table(schema)
42
+ ```
43
+
44
+ ## See Also
45
+
46
+ - Top-level CLI and library usage in the main [README](../../README.md)
47
+ - Schema helpers in [schema.py](schema.py)
48
+ - ORM utilities in [orm.py](orm.py)
@@ -203,7 +203,7 @@ def load_table_spec(
203
203
  raise ValueError('Spec must be .json, .yml, or .yaml')
204
204
 
205
205
  try:
206
- spec = File.from_path(spec_path).read()
206
+ spec = File(spec_path).read()
207
207
  except ImportError as e:
208
208
  if suffix in {'.yml', '.yaml'}:
209
209
  raise RuntimeError(
@@ -0,0 +1,105 @@
1
+ # etlplus.file subpackage
2
+
3
+ Documentation for the `etlplus.file` subpackage: unified file format support and helpers for reading
4
+ and writing data files.
5
+
6
+ - Provides a consistent interface for reading and writing files in various formats
7
+ - Supports all formats defined in `FileFormat` (see below)
8
+ - Includes helpers for inferring file format and compression from filenames, extensions, or MIME
9
+ types
10
+ - Exposes a `File` class with instance methods for reading and writing data
11
+
12
+ Back to project overview: see the top-level [README](../../README.md).
13
+
14
+ - [etlplus.file subpackage](#etlplusfile-subpackage)
15
+ - [Supported File Formats](#supported-file-formats)
16
+ - [Inferring File Format and Compression](#inferring-file-format-and-compression)
17
+ - [Reading and Writing Files](#reading-and-writing-files)
18
+ - [Reading a File](#reading-a-file)
19
+ - [Writing a File](#writing-a-file)
20
+ - [File Instance Methods](#file-instance-methods)
21
+ - [Example: Reading and Writing](#example-reading-and-writing)
22
+ - [See Also](#see-also)
23
+
24
+ ## Supported File Formats
25
+
26
+ The following formats are defined in `FileFormat` and supported for reading and writing:
27
+
28
+ | Format | Description |
29
+ |-----------|---------------------------------------------|
30
+ | avro | Apache Avro binary serialization |
31
+ | csv | Comma-separated values text files |
32
+ | feather | Apache Arrow Feather columnar format |
33
+ | gz | Gzip-compressed files (see Compression) |
34
+ | json | Standard JSON files |
35
+ | ndjson | Newline-delimited JSON (JSON Lines) |
36
+ | orc | Apache ORC columnar format |
37
+ | parquet | Apache Parquet columnar format |
38
+ | tsv | Tab-separated values text files |
39
+ | txt | Plain text files |
40
+ | xls | Microsoft Excel (legacy .xls) |
41
+ | xlsx | Microsoft Excel (modern .xlsx) |
42
+ | zip | ZIP-compressed files (see Compression) |
43
+ | xml | XML files |
44
+ | yaml | YAML files |
45
+
46
+ Compression formats (gz, zip) are also supported as wrappers for other formats.
47
+
48
+ ## Inferring File Format and Compression
49
+
50
+ Use `infer_file_format_and_compression(value, filename=None)` to infer the file format and
51
+ compression from a filename, extension, or MIME type. Returns a tuple `(file_format,
52
+ compression_format)`.
53
+
54
+ ## Reading and Writing Files
55
+
56
+ The main entry point for file operations is the `File` class. To read or write files:
57
+
58
+ ### Reading a File
59
+
60
+ ```python
61
+ from etlplus.file import File
62
+
63
+ f = File("data/sample.csv")
64
+ data = f.read()
65
+ ```
66
+
67
+ - The `read()` method automatically detects the format and compression.
68
+ - Returns parsed data (e.g., list of dicts for tabular formats).
69
+
70
+ ### Writing a File
71
+
72
+ ```python
73
+ from etlplus.file import File
74
+
75
+ f = File("output.json")
76
+ f.write(data)
77
+ ```
78
+
79
+ - The `write()` method serializes and writes data in the appropriate format.
80
+ - Supports all formats listed above.
81
+
82
+ ## File Instance Methods
83
+
84
+ - `read()`: Reads and parses the file, returning structured data.
85
+ - `write(data)`: Writes structured data to the file in the detected format.
86
+
87
+ ## Example: Reading and Writing
88
+
89
+ ```python
90
+ from etlplus.file import File
91
+
92
+ # Read CSV
93
+ csv_file = File("data.csv")
94
+ rows = csv_file.read()
95
+
96
+ # Write JSON
97
+ json_file = File("output.json")
98
+ json_file.write(rows)
99
+ ```
100
+
101
+ ## See Also
102
+
103
+ - Top-level CLI and library usage in the main [README](../../README.md)
104
+ - File format enums in [enums.py](enums.py)
105
+ - Compression format enums in [enums.py](enums.py)