etlplus 0.9.2__py3-none-any.whl → 0.10.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- etlplus/__init__.py +26 -1
- etlplus/api/README.md +3 -51
- etlplus/api/__init__.py +0 -10
- etlplus/api/config.py +28 -39
- etlplus/api/endpoint_client.py +3 -3
- etlplus/api/pagination/client.py +1 -1
- etlplus/api/rate_limiting/config.py +1 -13
- etlplus/api/rate_limiting/rate_limiter.py +11 -8
- etlplus/api/request_manager.py +6 -11
- etlplus/api/transport.py +2 -14
- etlplus/api/types.py +6 -96
- etlplus/cli/commands.py +43 -76
- etlplus/cli/constants.py +1 -1
- etlplus/cli/handlers.py +12 -40
- etlplus/cli/io.py +2 -2
- etlplus/cli/main.py +1 -1
- etlplus/cli/state.py +7 -4
- etlplus/{workflow → config}/__init__.py +23 -10
- etlplus/{workflow → config}/connector.py +44 -58
- etlplus/{workflow → config}/jobs.py +32 -105
- etlplus/{workflow → config}/pipeline.py +51 -59
- etlplus/{workflow → config}/profile.py +5 -8
- etlplus/config/types.py +204 -0
- etlplus/config/utils.py +120 -0
- etlplus/database/ddl.py +1 -1
- etlplus/database/engine.py +3 -19
- etlplus/database/orm.py +0 -2
- etlplus/database/schema.py +1 -1
- etlplus/enums.py +288 -0
- etlplus/{ops/extract.py → extract.py} +99 -81
- etlplus/file.py +652 -0
- etlplus/{ops/load.py → load.py} +101 -78
- etlplus/{ops/run.py → run.py} +127 -159
- etlplus/{api/utils.py → run_helpers.py} +153 -209
- etlplus/{ops/transform.py → transform.py} +68 -75
- etlplus/types.py +4 -5
- etlplus/utils.py +2 -136
- etlplus/{ops/validate.py → validate.py} +12 -22
- etlplus/validation/__init__.py +44 -0
- etlplus/{ops → validation}/utils.py +17 -53
- {etlplus-0.9.2.dist-info → etlplus-0.10.2.dist-info}/METADATA +17 -210
- etlplus-0.10.2.dist-info/RECORD +65 -0
- {etlplus-0.9.2.dist-info → etlplus-0.10.2.dist-info}/WHEEL +1 -1
- etlplus/README.md +0 -37
- etlplus/api/enums.py +0 -51
- etlplus/cli/README.md +0 -40
- etlplus/database/README.md +0 -48
- etlplus/file/README.md +0 -105
- etlplus/file/__init__.py +0 -25
- etlplus/file/_imports.py +0 -141
- etlplus/file/_io.py +0 -160
- etlplus/file/accdb.py +0 -78
- etlplus/file/arrow.py +0 -78
- etlplus/file/avro.py +0 -176
- etlplus/file/bson.py +0 -77
- etlplus/file/cbor.py +0 -78
- etlplus/file/cfg.py +0 -79
- etlplus/file/conf.py +0 -80
- etlplus/file/core.py +0 -322
- etlplus/file/csv.py +0 -79
- etlplus/file/dat.py +0 -78
- etlplus/file/dta.py +0 -77
- etlplus/file/duckdb.py +0 -78
- etlplus/file/enums.py +0 -343
- etlplus/file/feather.py +0 -111
- etlplus/file/fwf.py +0 -77
- etlplus/file/gz.py +0 -123
- etlplus/file/hbs.py +0 -78
- etlplus/file/hdf5.py +0 -78
- etlplus/file/ini.py +0 -79
- etlplus/file/ion.py +0 -78
- etlplus/file/jinja2.py +0 -78
- etlplus/file/json.py +0 -98
- etlplus/file/log.py +0 -78
- etlplus/file/mat.py +0 -78
- etlplus/file/mdb.py +0 -78
- etlplus/file/msgpack.py +0 -78
- etlplus/file/mustache.py +0 -78
- etlplus/file/nc.py +0 -78
- etlplus/file/ndjson.py +0 -108
- etlplus/file/numbers.py +0 -75
- etlplus/file/ods.py +0 -79
- etlplus/file/orc.py +0 -111
- etlplus/file/parquet.py +0 -113
- etlplus/file/pb.py +0 -78
- etlplus/file/pbf.py +0 -77
- etlplus/file/properties.py +0 -78
- etlplus/file/proto.py +0 -77
- etlplus/file/psv.py +0 -79
- etlplus/file/rda.py +0 -78
- etlplus/file/rds.py +0 -78
- etlplus/file/sas7bdat.py +0 -78
- etlplus/file/sav.py +0 -77
- etlplus/file/sqlite.py +0 -78
- etlplus/file/stub.py +0 -84
- etlplus/file/sylk.py +0 -77
- etlplus/file/tab.py +0 -81
- etlplus/file/toml.py +0 -78
- etlplus/file/tsv.py +0 -80
- etlplus/file/txt.py +0 -102
- etlplus/file/vm.py +0 -78
- etlplus/file/wks.py +0 -77
- etlplus/file/xls.py +0 -88
- etlplus/file/xlsm.py +0 -79
- etlplus/file/xlsx.py +0 -99
- etlplus/file/xml.py +0 -185
- etlplus/file/xpt.py +0 -78
- etlplus/file/yaml.py +0 -95
- etlplus/file/zip.py +0 -175
- etlplus/file/zsav.py +0 -77
- etlplus/ops/README.md +0 -50
- etlplus/ops/__init__.py +0 -61
- etlplus/templates/README.md +0 -46
- etlplus/workflow/README.md +0 -52
- etlplus/workflow/dag.py +0 -105
- etlplus/workflow/types.py +0 -115
- etlplus-0.9.2.dist-info/RECORD +0 -134
- {etlplus-0.9.2.dist-info → etlplus-0.10.2.dist-info}/entry_points.txt +0 -0
- {etlplus-0.9.2.dist-info → etlplus-0.10.2.dist-info}/licenses/LICENSE +0 -0
- {etlplus-0.9.2.dist-info → etlplus-0.10.2.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: etlplus
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.10.2
|
|
4
4
|
Summary: A Swiss Army knife for simple ETL operations
|
|
5
5
|
Home-page: https://github.com/Dagitali/ETLPlus
|
|
6
6
|
Author: ETLPlus Team
|
|
@@ -17,11 +17,8 @@ Classifier: Programming Language :: Python :: 3.14
|
|
|
17
17
|
Requires-Python: >=3.13,<3.15
|
|
18
18
|
Description-Content-Type: text/markdown
|
|
19
19
|
License-File: LICENSE
|
|
20
|
-
Requires-Dist: fastavro>=1.12.1
|
|
21
20
|
Requires-Dist: jinja2>=3.1.6
|
|
22
|
-
Requires-Dist: openpyxl>=3.1.5
|
|
23
21
|
Requires-Dist: pyodbc>=5.3.0
|
|
24
|
-
Requires-Dist: pyarrow>=22.0.0
|
|
25
22
|
Requires-Dist: python-dotenv>=1.2.1
|
|
26
23
|
Requires-Dist: pandas>=2.3.3
|
|
27
24
|
Requires-Dist: pydantic>=2.12.5
|
|
@@ -29,8 +26,6 @@ Requires-Dist: PyYAML>=6.0.3
|
|
|
29
26
|
Requires-Dist: requests>=2.32.5
|
|
30
27
|
Requires-Dist: SQLAlchemy>=2.0.45
|
|
31
28
|
Requires-Dist: typer>=0.21.0
|
|
32
|
-
Requires-Dist: xlrd>=2.0.2
|
|
33
|
-
Requires-Dist: xlwt>=1.3.0
|
|
34
29
|
Provides-Extra: dev
|
|
35
30
|
Requires-Dist: black>=25.9.0; extra == "dev"
|
|
36
31
|
Requires-Dist: build>=1.2.2; extra == "dev"
|
|
@@ -64,25 +59,9 @@ ETLPlus is a veritable Swiss Army knife for enabling simple ETL operations, offe
|
|
|
64
59
|
package and command-line interface for data extraction, validation, transformation, and loading.
|
|
65
60
|
|
|
66
61
|
- [ETLPlus](#etlplus)
|
|
67
|
-
- [Getting Started](#getting-started)
|
|
68
62
|
- [Features](#features)
|
|
69
63
|
- [Installation](#installation)
|
|
70
64
|
- [Quickstart](#quickstart)
|
|
71
|
-
- [Data Connectors](#data-connectors)
|
|
72
|
-
- [REST APIs (`api`)](#rest-apis-api)
|
|
73
|
-
- [Databases (`database`)](#databases-database)
|
|
74
|
-
- [Files (`file`)](#files-file)
|
|
75
|
-
- [Stubbed / Placeholder](#stubbed--placeholder)
|
|
76
|
-
- [Tabular \& Delimited Text](#tabular--delimited-text)
|
|
77
|
-
- [Semi-Structured Text](#semi-structured-text)
|
|
78
|
-
- [Columnar / Analytics-Friendly](#columnar--analytics-friendly)
|
|
79
|
-
- [Binary Serialization and Interchange](#binary-serialization-and-interchange)
|
|
80
|
-
- [Databases and Embedded Storage](#databases-and-embedded-storage)
|
|
81
|
-
- [Spreadsheets](#spreadsheets)
|
|
82
|
-
- [Statistical / Scientific / Numeric Computing](#statistical--scientific--numeric-computing)
|
|
83
|
-
- [Logs and Event Streams](#logs-and-event-streams)
|
|
84
|
-
- [Data Archives](#data-archives)
|
|
85
|
-
- [Templates](#templates)
|
|
86
65
|
- [Usage](#usage)
|
|
87
66
|
- [Command Line Interface](#command-line-interface)
|
|
88
67
|
- [Argument Order and Required Options](#argument-order-and-required-options)
|
|
@@ -108,27 +87,11 @@ package and command-line interface for data extraction, validation, transformati
|
|
|
108
87
|
- [Linting](#linting)
|
|
109
88
|
- [Updating Demo Snippets](#updating-demo-snippets)
|
|
110
89
|
- [Releasing to PyPI](#releasing-to-pypi)
|
|
90
|
+
- [Links](#links)
|
|
111
91
|
- [License](#license)
|
|
112
92
|
- [Contributing](#contributing)
|
|
113
|
-
- [Documentation](#documentation)
|
|
114
|
-
- [Python Packages/Subpackage](#python-packagessubpackage)
|
|
115
|
-
- [Community Health](#community-health)
|
|
116
|
-
- [Other](#other)
|
|
117
93
|
- [Acknowledgments](#acknowledgments)
|
|
118
94
|
|
|
119
|
-
## Getting Started
|
|
120
|
-
|
|
121
|
-
ETLPlus helps you extract, validate, transform, and load data from files, databases, and APIs, either
|
|
122
|
-
as a Python library or from the command line.
|
|
123
|
-
|
|
124
|
-
To get started:
|
|
125
|
-
|
|
126
|
-
- See [Installation](#installation) for setup instructions.
|
|
127
|
-
- Try the [Quickstart](#quickstart) for a minimal working example (CLI and Python).
|
|
128
|
-
- Explore [Usage](#usage) for more detailed options and workflows.
|
|
129
|
-
|
|
130
|
-
ETLPlus supports Python 3.13 and above.
|
|
131
|
-
|
|
132
95
|
## Features
|
|
133
96
|
|
|
134
97
|
- **Check** data pipeline definitions before running them:
|
|
@@ -196,7 +159,7 @@ etlplus extract file examples/data/sample.csv \
|
|
|
196
159
|
[Python API](#python-api):
|
|
197
160
|
|
|
198
161
|
```python
|
|
199
|
-
from etlplus
|
|
162
|
+
from etlplus import extract, transform, validate, load
|
|
200
163
|
|
|
201
164
|
data = extract("file", "input.csv")
|
|
202
165
|
ops = {"filter": {"field": "age", "op": "gt", "value": 25}, "select": ["name", "email"]}
|
|
@@ -206,140 +169,6 @@ assert validate(filtered, rules)["valid"]
|
|
|
206
169
|
load(filtered, "file", "temp/sample_output.json", file_format="json")
|
|
207
170
|
```
|
|
208
171
|
|
|
209
|
-
## Data Connectors
|
|
210
|
-
|
|
211
|
-
Data connectors abstract sources from which to extract data and targets to which to load data. They
|
|
212
|
-
are differentiated by their types, each of which is represented in the subsections below.
|
|
213
|
-
|
|
214
|
-
### REST APIs (`api`)
|
|
215
|
-
|
|
216
|
-
ETLPlus can extract from REST APIs and load results via common HTTP methods. Supported operations
|
|
217
|
-
include GET for extract and PATCH/POST/PUT for load.
|
|
218
|
-
|
|
219
|
-
### Databases (`database`)
|
|
220
|
-
|
|
221
|
-
Database connectors use connection strings for extraction and loading, and
|
|
222
|
-
DDL can be rendered from table specs for migrations or schema checks.
|
|
223
|
-
|
|
224
|
-
### Files (`file`)
|
|
225
|
-
|
|
226
|
-
Recognized file formats are listed in the tables below. Support for reading to or writing from a recognized file format is marked as:
|
|
227
|
-
|
|
228
|
-
- **Y**: implemented (may require optional dependencies)
|
|
229
|
-
- **N**: stubbed or not yet implemented
|
|
230
|
-
|
|
231
|
-
#### Stubbed / Placeholder
|
|
232
|
-
|
|
233
|
-
| Format | Read | Write | Description |
|
|
234
|
-
| --- | --- | --- | --- |
|
|
235
|
-
| `stub` | N | Placeholder format for tests and future connectors. |
|
|
236
|
-
|
|
237
|
-
#### Tabular & Delimited Text
|
|
238
|
-
|
|
239
|
-
| Format | Read | Write | Description |
|
|
240
|
-
| --- | --- | --- | --- |
|
|
241
|
-
| `csv` | Y | Y | Comma-Separated Values |
|
|
242
|
-
| `dat` | N | N | Generic data file, often delimited or fixed-width |
|
|
243
|
-
| `fwf` | N | N | Fixed-Width Fields |
|
|
244
|
-
| `psv` | N | N | Pipe-Separated Values |
|
|
245
|
-
| `tab` | N | N | Often synonymous with TSV |
|
|
246
|
-
| `tsv` | Y | Y | Tab-Separated Values |
|
|
247
|
-
| `txt` | Y | Y | Plain text, often delimited or fixed-width |
|
|
248
|
-
|
|
249
|
-
#### Semi-Structured Text
|
|
250
|
-
|
|
251
|
-
| Format | Read | Write | Description |
|
|
252
|
-
| --- | --- | --- | --- |
|
|
253
|
-
| `cfg` | N | N | Config-style key-value pairs |
|
|
254
|
-
| `conf` | N | N | Config-style key-value pairs |
|
|
255
|
-
| `ini` | N | N | Config-style key-value pairs |
|
|
256
|
-
| `json` | Y | Y | JavaScript Object Notation |
|
|
257
|
-
| `ndjson` | Y | Y | Newline-Delimited JSON |
|
|
258
|
-
| `properties` | N | N | Java-style key-value pairs |
|
|
259
|
-
| `toml` | N | N | Tom's Obvious Minimal Language |
|
|
260
|
-
| `xml` | Y | Y | Extensible Markup Language |
|
|
261
|
-
| `yaml` | Y | Y | YAML Ain't Markup Language |
|
|
262
|
-
|
|
263
|
-
#### Columnar / Analytics-Friendly
|
|
264
|
-
|
|
265
|
-
| Format | Read | Write | Description |
|
|
266
|
-
| --- | --- | --- | --- |
|
|
267
|
-
| `arrow` | N | N | Apache Arrow IPC |
|
|
268
|
-
| `feather` | Y | Y | Apache Arrow Feather |
|
|
269
|
-
| `orc` | Y | Y | Optimized Row Columnar; common in Hadoop |
|
|
270
|
-
| `parquet` | Y | Y | Apache Parquet; common in Big Data |
|
|
271
|
-
|
|
272
|
-
#### Binary Serialization and Interchange
|
|
273
|
-
|
|
274
|
-
| Format | Read | Write | Description |
|
|
275
|
-
| --- | --- | --- | --- |
|
|
276
|
-
| `avro` | Y | Y | Apache Avro |
|
|
277
|
-
| `bson` | N | N | Binary JSON; common with MongoDB exports/dumps |
|
|
278
|
-
| `cbor` | N | N | Concise Binary Object Representation |
|
|
279
|
-
| `ion` | N | N | Amazon Ion |
|
|
280
|
-
| `msgpack` | N | N | MessagePack |
|
|
281
|
-
| `pb` | N | N | Protocol Buffers (Google Protobuf) |
|
|
282
|
-
| `pbf` | N | N | Protocolbuffer Binary Format; often for GIS data |
|
|
283
|
-
| `proto` | N | N | Protocol Buffers schema; often in .pb / .bin |
|
|
284
|
-
|
|
285
|
-
#### Databases and Embedded Storage
|
|
286
|
-
|
|
287
|
-
| Format | Read | Write | Description |
|
|
288
|
-
| --- | --- | --- | --- |
|
|
289
|
-
| `accdb` | N | N | Microsoft Access (newer format) |
|
|
290
|
-
| `duckdb` | N | N | DuckDB |
|
|
291
|
-
| `mdb` | N | N | Microsoft Access (older format) |
|
|
292
|
-
| `sqlite` | N | N | SQLite |
|
|
293
|
-
|
|
294
|
-
#### Spreadsheets
|
|
295
|
-
|
|
296
|
-
| Format | Read | Write | Description |
|
|
297
|
-
| --- | --- | --- | --- |
|
|
298
|
-
| `numbers` | N | N | Apple Numbers |
|
|
299
|
-
| `ods` | N | N | OpenDocument |
|
|
300
|
-
| `wks` | N | N | Lotus 1-2-3 |
|
|
301
|
-
| `xls` | Y | Y | Microsoft Excel (BIFF) |
|
|
302
|
-
| `xlsm` | N | N | Microsoft Excel Macro-Enabled (Open XML) |
|
|
303
|
-
| `xlsx` | Y | Y | Microsoft Excel (Open XML) |
|
|
304
|
-
|
|
305
|
-
#### Statistical / Scientific / Numeric Computing
|
|
306
|
-
|
|
307
|
-
| Format | Read | Write | Description |
|
|
308
|
-
| --- | --- | --- | --- |
|
|
309
|
-
| `dta` | N | N | Stata |
|
|
310
|
-
| `hdf5` | N | N | Hierarchical Data Format |
|
|
311
|
-
| `mat` | N | N | MATLAB |
|
|
312
|
-
| `nc` | N | N | NetCDF |
|
|
313
|
-
| `rda` | N | N | RData workspace/object |
|
|
314
|
-
| `rds` | N | N | R data |
|
|
315
|
-
| `sas7bdat` | N | N | SAS data |
|
|
316
|
-
| `sav` | N | N | SPSS data |
|
|
317
|
-
| `sylk` | N | N | Symbolic Link |
|
|
318
|
-
| `xpt` | N | N | SAS Transport |
|
|
319
|
-
| `zsav` | N | N | Compressed SPSS data |
|
|
320
|
-
|
|
321
|
-
#### Logs and Event Streams
|
|
322
|
-
|
|
323
|
-
| Format | Supported | Description |
|
|
324
|
-
| --- | --- | --- |
|
|
325
|
-
| `log` | N | N | Generic log file |
|
|
326
|
-
|
|
327
|
-
#### Data Archives
|
|
328
|
-
|
|
329
|
-
| Format | Read | Write | Description |
|
|
330
|
-
| --- | --- | --- | --- |
|
|
331
|
-
| `gz` | Y | Y | Gzip-compressed file |
|
|
332
|
-
| `zip` | Y | Y | ZIP archive |
|
|
333
|
-
|
|
334
|
-
#### Templates
|
|
335
|
-
|
|
336
|
-
| Format | Read | Write | Description |
|
|
337
|
-
| --- | --- | --- | --- |
|
|
338
|
-
| `hbs` | N | N | Handlebars |
|
|
339
|
-
| `jinja2` | N | N | Jinja2 |
|
|
340
|
-
| `mustache` | N | N | Mustache |
|
|
341
|
-
| `vm` | N | N | Apache Velocity |
|
|
342
|
-
|
|
343
172
|
## Usage
|
|
344
173
|
|
|
345
174
|
### Command Line Interface
|
|
@@ -531,7 +360,7 @@ cat examples/data/sample.json \
|
|
|
531
360
|
Use ETLPlus as a Python library:
|
|
532
361
|
|
|
533
362
|
```python
|
|
534
|
-
from etlplus
|
|
363
|
+
from etlplus import extract, validate, transform, load
|
|
535
364
|
|
|
536
365
|
# Extract data
|
|
537
366
|
data = extract("file", "data.json")
|
|
@@ -587,7 +416,7 @@ etlplus transform \
|
|
|
587
416
|
# 3. Validate transformed data
|
|
588
417
|
etlplus validate \
|
|
589
418
|
--rules '{"name": {"type": "string", "required": true}, "email": {"type": "string", "required": true}}' \
|
|
590
|
-
|
|
419
|
+
temo/sample_transformed.json
|
|
591
420
|
|
|
592
421
|
# 4. Load to CSV
|
|
593
422
|
cat temp/sample_transformed.json \
|
|
@@ -726,7 +555,7 @@ We split tests into two layers:
|
|
|
726
555
|
pagination + rate limit defaults, file/API connector interactions) may touch temp files and use
|
|
727
556
|
fake clients.
|
|
728
557
|
|
|
729
|
-
If a test calls `etlplus.cli.main()` or `etlplus.
|
|
558
|
+
If a test calls `etlplus.cli.main()` or `etlplus.run.run()` it’s integration by default. Full
|
|
730
559
|
criteria: [`CONTRIBUTING.md#testing`](CONTRIBUTING.md#testing).
|
|
731
560
|
|
|
732
561
|
### Code Coverage
|
|
@@ -774,6 +603,17 @@ git push origin v1.4.0
|
|
|
774
603
|
If you want an extra smoke-test before tagging, run `make dist && pip install dist/*.whl` locally;
|
|
775
604
|
this exercises the same build path the workflow uses.
|
|
776
605
|
|
|
606
|
+
## Links
|
|
607
|
+
|
|
608
|
+
- API client docs: [`etlplus/api/README.md`](etlplus/api/README.md)
|
|
609
|
+
- Examples: [`examples/README.md`](examples/README.md)
|
|
610
|
+
- Pipeline authoring guide: [`docs/pipeline-guide.md`](docs/pipeline-guide.md)
|
|
611
|
+
- Runner internals: [`docs/run-module.md`](docs/run-module.md)
|
|
612
|
+
- Design notes (Mapping inputs, dict outputs): [`docs/pipeline-guide.md#design-notes-mapping-inputs-dict-outputs`](docs/pipeline-guide.md#design-notes-mapping-inputs-dict-outputs)
|
|
613
|
+
- Typing philosophy: [`CONTRIBUTING.md#typing-philosophy`](CONTRIBUTING.md#typing-philosophy)
|
|
614
|
+
- Demo and walkthrough: [`DEMO.md`](DEMO.md)
|
|
615
|
+
- Additional references: [`REFERENCES.md`](`REFERENCES.md)
|
|
616
|
+
|
|
777
617
|
## License
|
|
778
618
|
|
|
779
619
|
This project is licensed under the [MIT License](LICENSE).
|
|
@@ -797,39 +637,6 @@ If you choose to be a code contributor, please first refer these documents:
|
|
|
797
637
|
- Typing philosophy (TypedDicts as editor hints, permissive runtime):
|
|
798
638
|
[`CONTRIBUTING.md#typing-philosophy`](CONTRIBUTING.md#typing-philosophy)
|
|
799
639
|
|
|
800
|
-
## Documentation
|
|
801
|
-
|
|
802
|
-
### Python Packages/Subpackage
|
|
803
|
-
|
|
804
|
-
Navigate to detailed documentation for each subpackage:
|
|
805
|
-
|
|
806
|
-
- [etlplus.api](etlplus/api/README.md): Lightweight HTTP client and paginated REST helpers
|
|
807
|
-
- [etlplus.file](etlplus/file/README.md): Unified file format support and helpers
|
|
808
|
-
- [etlplus.cli](etlplus/cli/README.md): Command-line interface definitions for `etlplus`
|
|
809
|
-
- [etlplus.database](etlplus/database/README.md): Database engine, schema, and ORM helpers
|
|
810
|
-
- [etlplus.templates](etlplus/templates/README.md): SQL and DDL template helpers
|
|
811
|
-
- [etlplus.validation](etlplus/validation/README.md): Data validation utilities and helpers
|
|
812
|
-
- [etlplus.workflow](etlplus/workflow/README.md): Helpers for data connectors, pipelines, jobs, and
|
|
813
|
-
profiles
|
|
814
|
-
|
|
815
|
-
### Community Health
|
|
816
|
-
|
|
817
|
-
- [Contributing Guidelines](CONTRIBUTING.md): How to contribute, report issues, and submit PRs
|
|
818
|
-
- [Code of Conduct](CODE_OF_CONDUCT.md): Community standards and expectations
|
|
819
|
-
- [Security Policy](SECURITY.md): Responsible disclosure and vulnerability reporting
|
|
820
|
-
- [Support](SUPPORT.md): Where to get help
|
|
821
|
-
|
|
822
|
-
### Other
|
|
823
|
-
|
|
824
|
-
- API client docs: [`etlplus/api/README.md`](etlplus/api/README.md)
|
|
825
|
-
- Examples: [`examples/README.md`](examples/README.md)
|
|
826
|
-
- Pipeline authoring guide: [`docs/pipeline-guide.md`](docs/pipeline-guide.md)
|
|
827
|
-
- Runner internals: [`docs/run-module.md`](docs/run-module.md)
|
|
828
|
-
- Design notes (Mapping inputs, dict outputs): [`docs/pipeline-guide.md#design-notes-mapping-inputs-dict-outputs`](docs/pipeline-guide.md#design-notes-mapping-inputs-dict-outputs)
|
|
829
|
-
- Typing philosophy: [`CONTRIBUTING.md#typing-philosophy`](CONTRIBUTING.md#typing-philosophy)
|
|
830
|
-
- Demo and walkthrough: [`DEMO.md`](DEMO.md)
|
|
831
|
-
- Additional references: [`REFERENCES.md`](REFERENCES.md)
|
|
832
|
-
|
|
833
640
|
## Acknowledgments
|
|
834
641
|
|
|
835
642
|
ETLPlus is inspired by common work patterns in data engineering and software engineering patterns in
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
etlplus/__init__.py,sha256=M2gScnyir6WOMAh_EuoQIiAzdcTls0_5hbd_Q6of8I0,1021
|
|
2
|
+
etlplus/__main__.py,sha256=btoROneNiigyfBU7BSzPKZ1R9gzBMpxcpsbPwmuHwTM,479
|
|
3
|
+
etlplus/__version__.py,sha256=1E0GMK_yUWCMQFKxXjTvyMwofi0qT2k4CDNiHWiymWE,327
|
|
4
|
+
etlplus/enums.py,sha256=JvXdX_x_U3LNnGZdX3Uo9dqipoDdqnLtZBmZE4DSei8,15426
|
|
5
|
+
etlplus/extract.py,sha256=f44JdHhNTACxgn44USx05paKTwq7LQY-V4wANCW9hVM,6173
|
|
6
|
+
etlplus/file.py,sha256=B-zebTrIFDKaaKzA9Fq5-L0JwDNYa2T--_6veR3N03s,17939
|
|
7
|
+
etlplus/load.py,sha256=R_y0_vtsEo1bwxWVQu2bfhB5ZIJoIoWu2ycCdvY4RnE,8737
|
|
8
|
+
etlplus/mixins.py,sha256=ifGpHwWv7U00yqGf-kN93vJax2IiK4jaGtTsPsO3Oak,1350
|
|
9
|
+
etlplus/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
|
+
etlplus/run.py,sha256=X4kp5FQlIWVf1_d9oSrchKau7BFDCE1Zkscvu7WPaWw,12340
|
|
11
|
+
etlplus/run_helpers.py,sha256=bj6MkaeFxjl3CeKG1HoXKx5DwAlXNERVW-GX-z1P_qQ,24373
|
|
12
|
+
etlplus/transform.py,sha256=uAUVDDHYCgx7GpVez9IK3OAZM-CnCuMa9iox3vwGGJA,25296
|
|
13
|
+
etlplus/types.py,sha256=1hsDlnF6r76zAwaUYay-i6pCM-Y0IU5nP7Crj8PLCQ4,6157
|
|
14
|
+
etlplus/utils.py,sha256=BMLTWAvCJj3zLEcffBgURYnu0UGhhXsfH2WWpAt7fV8,13363
|
|
15
|
+
etlplus/validate.py,sha256=7rJoEI_SIILdPpoBqqh2UJqg9oeReDz34mYSlc3t7Qg,12989
|
|
16
|
+
etlplus/api/README.md,sha256=ZiyjxLz0LfFCzeYKXwtH8yY1OJ4hXCju7t2ICroFoU8,7215
|
|
17
|
+
etlplus/api/__init__.py,sha256=P2JUYFy6Ep4t6xnsBiCBfQCkQLHYYhA-yXPXCobS8Y0,4295
|
|
18
|
+
etlplus/api/auth.py,sha256=GOO5on-LoMS1GXTAhtK9rFcfpjbBcNeA6NE5UZwIq0g,12158
|
|
19
|
+
etlplus/api/config.py,sha256=wRpOaZ31sPReVzEMme0jKl_37nqgraESwuYSNxP_xDo,17397
|
|
20
|
+
etlplus/api/endpoint_client.py,sha256=PxCvBsvFhTIjEbY6drIIvciynHXQEvKu47Pi63Gxwqs,30693
|
|
21
|
+
etlplus/api/errors.py,sha256=XjI2xW-sypMUNUbqfc2S57-IGyWnH3oCDFhCmKYYI_Q,4648
|
|
22
|
+
etlplus/api/request_manager.py,sha256=YkDz803HM3BBzamsEZdSdE9fbVT0avMbTaLAgar9Wzo,18481
|
|
23
|
+
etlplus/api/retry_manager.py,sha256=0GDhJVyIlb1Ww35JUWlYoa8QYUPjKLBtxQeZj3TdLbY,11306
|
|
24
|
+
etlplus/api/transport.py,sha256=LRsQEPxIYrvXQQMvgPPkIl_57YCmanzsWNEnSYdP_d8,9164
|
|
25
|
+
etlplus/api/types.py,sha256=687JigIf3qfYxgGTNBaMNsQsrza5Pja6DcK5llM9oRU,4591
|
|
26
|
+
etlplus/api/pagination/__init__.py,sha256=a4UX2J0AG8RMvmHt_CCofUm5vSmFo6GAfkb8XnSXypM,1395
|
|
27
|
+
etlplus/api/pagination/client.py,sha256=42cG442od3mQkw_JsvGvxT_w7y9J4HPM5PB4tFFU6EQ,5383
|
|
28
|
+
etlplus/api/pagination/config.py,sha256=3dXDJ-nMbO9Zk6i344n4roBFbUlHsa294D1_plPmm6E,13579
|
|
29
|
+
etlplus/api/pagination/paginator.py,sha256=wtdY_er4yfjx5yTUQJ1gPq-IuWmpLAHeG5buBQZJm54,24453
|
|
30
|
+
etlplus/api/rate_limiting/__init__.py,sha256=ZySB1dZettEDnWvI1EHf_TZ9L08M_kKsNR-Y_lbU6kI,1070
|
|
31
|
+
etlplus/api/rate_limiting/config.py,sha256=2b4wIynblN-1EyMqI4aXa71SljzSjXYh5N1Nngr3jOg,9406
|
|
32
|
+
etlplus/api/rate_limiting/rate_limiter.py,sha256=Uxozqd_Ej5Lsj-M-mLT2WexChgWh7x35_YP10yqYPQA,7159
|
|
33
|
+
etlplus/cli/__init__.py,sha256=J97-Rv931IL1_b4AXnB7Fbbd7HKnHBpx18NQfC_kE6c,299
|
|
34
|
+
etlplus/cli/commands.py,sha256=BK2qmFsser6AXOgEvpiadrYMIiwviAzqkSxMlBhRXRw,24670
|
|
35
|
+
etlplus/cli/constants.py,sha256=KIZj7J2tNf5mJbkqAdZmu5FXYW2FQmxwgeOKWc3-3Hg,1944
|
|
36
|
+
etlplus/cli/handlers.py,sha256=K0GazvrPgocJ-63HZqF0xhyJk8TB1Gcj-eIbWltXKRU,17759
|
|
37
|
+
etlplus/cli/io.py,sha256=7sldiZz4-Geomge5IO_XYykXPa6UiORfUWzLCdQePG8,7846
|
|
38
|
+
etlplus/cli/main.py,sha256=IgeqxypixfwLHR-QcpgVMQ7vMZ865bXOh2oO9v-BWeM,5234
|
|
39
|
+
etlplus/cli/options.py,sha256=vfXT3YLh7wG1iC-aTdSg6ItMC8l6n0Lozmy53XjqLbA,1199
|
|
40
|
+
etlplus/cli/state.py,sha256=Pfd8ru0wYIN7eGp1_A0tioqs1LiCDZCuJ6AnjZb6yYQ,8027
|
|
41
|
+
etlplus/cli/types.py,sha256=tclhKVJXDqHzlTQBYKARfqMgDOcuBJ-Zej2pvFy96WM,652
|
|
42
|
+
etlplus/config/__init__.py,sha256=VZWzOg7d2YR9NT6UwKTv44yf2FRUMjTHynkm1Dl5Qzo,1486
|
|
43
|
+
etlplus/config/connector.py,sha256=0-TIwevHbKRHVmucvyGpPd-3tB1dKHB-dj0yJ6kq5eY,9809
|
|
44
|
+
etlplus/config/jobs.py,sha256=hmzRCqt0OvCEZZR4ONKrd3lvSv0OmayjLc4yOBk3ug8,7399
|
|
45
|
+
etlplus/config/pipeline.py,sha256=Va4MQY6KEyKqHGMKPmh09ZcGpx95br-iNUjpkqtzVbw,9500
|
|
46
|
+
etlplus/config/profile.py,sha256=Ss2zedQGjkaGSpvBLTD4SZaWViMJ7TJPLB8Q2_BTpPg,1898
|
|
47
|
+
etlplus/config/types.py,sha256=a0epJ3z16HQ5bY3Ctf8s_cQPa3f0HHcwdOcjCP2xoG4,4954
|
|
48
|
+
etlplus/config/utils.py,sha256=4SUHMkt5bKBhMhiJm-DrnmE2Q4TfOgdNCKz8PJDS27o,3443
|
|
49
|
+
etlplus/database/__init__.py,sha256=AKJsDl2RHuRGPS-eXgNJeh4aSncJP5Y0yLApBF6i7i8,1052
|
|
50
|
+
etlplus/database/ddl.py,sha256=z9KvHi1MPhPBLHxMDdqJgLTp3A2-lcz0gqhZ7HIE6kU,7916
|
|
51
|
+
etlplus/database/engine.py,sha256=7rr7YndA8LwyWJL8k1YhQbqxxmW4gWEUQjp0NwQcYtc,4061
|
|
52
|
+
etlplus/database/orm.py,sha256=gCSqH-CjQz6tV9133-VqgiwokK5ylun0BwXaIWfImAo,10008
|
|
53
|
+
etlplus/database/schema.py,sha256=HNTgglI8qvQLInr7gq--2lLmLKHzAZTL2MJUOIw9DlY,7025
|
|
54
|
+
etlplus/database/types.py,sha256=_pkQyC14TzAlgyeIqZG4F5LWYknZbHw3TW68Auk7Ya0,795
|
|
55
|
+
etlplus/templates/__init__.py,sha256=tsniN7XJYs3NwYxJ6c2HD5upHP3CDkLx-bQCMt97UOM,106
|
|
56
|
+
etlplus/templates/ddl.sql.j2,sha256=s8fMWvcb4eaJVXkifuib1aQPljtZ8buuyB_uA-ZdU3Q,4734
|
|
57
|
+
etlplus/templates/view.sql.j2,sha256=Iy8DHfhq5yyvrUKDxqp_aHIEXY4Tm6j4wT7YDEFWAhk,2180
|
|
58
|
+
etlplus/validation/__init__.py,sha256=Pe5Xg1_EA4uiNZGYu5WTF3j7odjmyxnAJ8rcioaplSQ,1254
|
|
59
|
+
etlplus/validation/utils.py,sha256=Mtqg449VIke0ziy_wd2r6yrwJzQkA1iulZC87FzXMjo,10201
|
|
60
|
+
etlplus-0.10.2.dist-info/licenses/LICENSE,sha256=MuNO63i6kWmgnV2pbP2SLqP54mk1BGmu7CmbtxMmT-U,1069
|
|
61
|
+
etlplus-0.10.2.dist-info/METADATA,sha256=rz0pmbVOfFoJ4HWJuSiNm-oMVbZ8Uf44bcgFRlXxBxU,21036
|
|
62
|
+
etlplus-0.10.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
63
|
+
etlplus-0.10.2.dist-info/entry_points.txt,sha256=6w-2-jzuPa55spzK34h-UKh2JTEShh38adFRONNP9QE,45
|
|
64
|
+
etlplus-0.10.2.dist-info/top_level.txt,sha256=aWWF-udn_sLGuHTM6W6MLh99ArS9ROkUWO8Mi8y1_2U,8
|
|
65
|
+
etlplus-0.10.2.dist-info/RECORD,,
|
etlplus/README.md
DELETED
|
@@ -1,37 +0,0 @@
|
|
|
1
|
-
# `etlplus` Package
|
|
2
|
-
|
|
3
|
-
The `etlplus` package provides a unified Python API and CLI for ETL operations: extraction,
|
|
4
|
-
validation, transformation, and loading of data from files, APIs, and databases.
|
|
5
|
-
|
|
6
|
-
- Top-level entry points for extract, validate, transform, and load
|
|
7
|
-
- Utilities for pipeline orchestration and helpers
|
|
8
|
-
- Exposes all subpackages for advanced usage
|
|
9
|
-
|
|
10
|
-
Back to project overview: see the top-level [README](../README.md).
|
|
11
|
-
|
|
12
|
-
## Subpackages
|
|
13
|
-
|
|
14
|
-
- [etlplus.api](api/README.md): Lightweight HTTP client and paginated REST helpers
|
|
15
|
-
- [etlplus.file](file/README.md): Unified file format support and helpers
|
|
16
|
-
- [etlplus.cli](cli/README.md): Command-line interface definitions for `etlplus`
|
|
17
|
-
- [etlplus.database](database/README.md): Database engine, schema, and ORM helpers
|
|
18
|
-
- [etlplus.templates](templates/README.md): SQL and DDL template helpers
|
|
19
|
-
- [etlplus.validation](validation/README.md): Data validation utilities and helpers
|
|
20
|
-
- [etlplus.workflow](etlplus/workflow/README.md): Helpers for data connectors, pipelines, jobs, and
|
|
21
|
-
profiles
|
|
22
|
-
|
|
23
|
-
## Quickstart
|
|
24
|
-
|
|
25
|
-
```python
|
|
26
|
-
from etlplus.ops import extract, validate, transform, load
|
|
27
|
-
|
|
28
|
-
data = extract("file", "input.csv")
|
|
29
|
-
filtered = transform(data, {"filter": {"field": "age", "op": "gt", "value": 25}})
|
|
30
|
-
assert validate(filtered, {"age": {"type": "number", "min": 0}})["valid"]
|
|
31
|
-
load(filtered, "file", "output.json", file_format="json")
|
|
32
|
-
```
|
|
33
|
-
|
|
34
|
-
## See Also
|
|
35
|
-
|
|
36
|
-
- [Top-level project README](../README.md)
|
|
37
|
-
- [API reference](../docs/README.md)
|
etlplus/api/enums.py
DELETED
|
@@ -1,51 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
:mod:`etlplus.api.enums` module.
|
|
3
|
-
|
|
4
|
-
File-specific REST API-aligned enums and helpers.
|
|
5
|
-
"""
|
|
6
|
-
|
|
7
|
-
from __future__ import annotations
|
|
8
|
-
|
|
9
|
-
from ..enums import CoercibleStrEnum
|
|
10
|
-
|
|
11
|
-
# SECTION: EXPORTS ========================================================= #
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
__all__ = [
|
|
15
|
-
# Enums
|
|
16
|
-
'HttpMethod',
|
|
17
|
-
]
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
# SECTION: ENUMS ============================================================ #
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
class HttpMethod(CoercibleStrEnum):
|
|
24
|
-
"""Supported HTTP verbs that accept JSON payloads."""
|
|
25
|
-
|
|
26
|
-
# -- Constants -- #
|
|
27
|
-
|
|
28
|
-
CONNECT = 'connect'
|
|
29
|
-
DELETE = 'delete'
|
|
30
|
-
GET = 'get'
|
|
31
|
-
HEAD = 'head'
|
|
32
|
-
OPTIONS = 'options'
|
|
33
|
-
PATCH = 'patch'
|
|
34
|
-
POST = 'post'
|
|
35
|
-
PUT = 'put'
|
|
36
|
-
TRACE = 'trace'
|
|
37
|
-
|
|
38
|
-
# -- Getters -- #
|
|
39
|
-
|
|
40
|
-
@property
|
|
41
|
-
def allows_body(self) -> bool:
|
|
42
|
-
"""
|
|
43
|
-
Whether the method typically allows a request body.
|
|
44
|
-
|
|
45
|
-
Notes
|
|
46
|
-
-----
|
|
47
|
-
- RFCs do not strictly forbid bodies on some other methods (e.g.,
|
|
48
|
-
``DELETE``), but many servers/clients do not expect them. We mark
|
|
49
|
-
``POST``, ``PUT``, and ``PATCH`` as True.
|
|
50
|
-
"""
|
|
51
|
-
return self in {HttpMethod.POST, HttpMethod.PUT, HttpMethod.PATCH}
|
etlplus/cli/README.md
DELETED
|
@@ -1,40 +0,0 @@
|
|
|
1
|
-
# `etlplus.cli` Subpackage
|
|
2
|
-
|
|
3
|
-
Documentation for the `etlplus.cli` subpackage: command-line interface for ETLPlus workflows.
|
|
4
|
-
|
|
5
|
-
- Provides a CLI for running ETL pipelines, jobs, and utilities
|
|
6
|
-
- Supports commands for running, validating, and inspecting pipelines
|
|
7
|
-
- Includes options for configuration, state, and output control
|
|
8
|
-
- Exposes handlers for custom command integration
|
|
9
|
-
|
|
10
|
-
Back to project overview: see the top-level [README](../../README.md).
|
|
11
|
-
|
|
12
|
-
- [`etlplus.cli` Subpackage](#etlpluscli-subpackage)
|
|
13
|
-
- [Available Commands](#available-commands)
|
|
14
|
-
- [Command Options](#command-options)
|
|
15
|
-
- [Example: Running a Pipeline](#example-running-a-pipeline)
|
|
16
|
-
- [See Also](#see-also)
|
|
17
|
-
|
|
18
|
-
## Available Commands
|
|
19
|
-
|
|
20
|
-
- **run**: Execute a pipeline or job
|
|
21
|
-
- **validate**: Validate pipeline or config files
|
|
22
|
-
- **inspect**: Show pipeline/job details
|
|
23
|
-
|
|
24
|
-
## Command Options
|
|
25
|
-
|
|
26
|
-
- `--config`: Path to config file
|
|
27
|
-
- `--state`: Path to state file
|
|
28
|
-
- `--output`: Output file or format
|
|
29
|
-
|
|
30
|
-
## Example: Running a Pipeline
|
|
31
|
-
|
|
32
|
-
```bash
|
|
33
|
-
etlplus run --config configs/pipeline.yml --output results.json
|
|
34
|
-
```
|
|
35
|
-
|
|
36
|
-
## See Also
|
|
37
|
-
|
|
38
|
-
- Top-level CLI and library usage in the main [README](../../README.md)
|
|
39
|
-
- Command handlers in [handlers.py](handlers.py)
|
|
40
|
-
- Command options in [options.py](options.py)
|
etlplus/database/README.md
DELETED
|
@@ -1,48 +0,0 @@
|
|
|
1
|
-
# `etlplus.database` Subpackage
|
|
2
|
-
|
|
3
|
-
Documentation for the `etlplus.database` subpackage: database engine, schema, and ORM helpers.
|
|
4
|
-
|
|
5
|
-
- Provides database engine and connection management
|
|
6
|
-
- Supports schema definition and DDL generation
|
|
7
|
-
- Includes lightweight ORM utilities for tabular data
|
|
8
|
-
- Exposes type definitions for database objects
|
|
9
|
-
|
|
10
|
-
Back to project overview: see the top-level [README](../../README.md).
|
|
11
|
-
|
|
12
|
-
- [`etlplus.database` Subpackage](#etlplusdatabase-subpackage)
|
|
13
|
-
- [Database Engine and Connections](#database-engine-and-connections)
|
|
14
|
-
- [Schema and DDL Helpers](#schema-and-ddl-helpers)
|
|
15
|
-
- [ORM Utilities](#orm-utilities)
|
|
16
|
-
- [Example: Creating a Table](#example-creating-a-table)
|
|
17
|
-
- [See Also](#see-also)
|
|
18
|
-
|
|
19
|
-
## Database Engine and Connections
|
|
20
|
-
|
|
21
|
-
- Manage connections to supported databases
|
|
22
|
-
- Configure engines for different backends
|
|
23
|
-
|
|
24
|
-
## Schema and DDL Helpers
|
|
25
|
-
|
|
26
|
-
- Define table schemas and columns
|
|
27
|
-
- Generate DDL statements for supported databases
|
|
28
|
-
|
|
29
|
-
## ORM Utilities
|
|
30
|
-
|
|
31
|
-
- Map rows to Python objects
|
|
32
|
-
- Simple CRUD helpers for tabular data
|
|
33
|
-
|
|
34
|
-
## Example: Creating a Table
|
|
35
|
-
|
|
36
|
-
```python
|
|
37
|
-
from etlplus.database import Schema, Engine
|
|
38
|
-
|
|
39
|
-
engine = Engine.connect("sqlite:///example.db")
|
|
40
|
-
schema = Schema.from_dict({"name": "users", "columns": [ ... ]})
|
|
41
|
-
engine.create_table(schema)
|
|
42
|
-
```
|
|
43
|
-
|
|
44
|
-
## See Also
|
|
45
|
-
|
|
46
|
-
- Top-level CLI and library usage in the main [README](../../README.md)
|
|
47
|
-
- Schema helpers in [schema.py](schema.py)
|
|
48
|
-
- ORM utilities in [orm.py](orm.py)
|
etlplus/file/README.md
DELETED
|
@@ -1,105 +0,0 @@
|
|
|
1
|
-
# `etlplus.file` Subpackage
|
|
2
|
-
|
|
3
|
-
Documentation for the `etlplus.file` subpackage: unified file format support and helpers for reading
|
|
4
|
-
and writing data files.
|
|
5
|
-
|
|
6
|
-
- Provides a consistent interface for reading and writing files in various formats
|
|
7
|
-
- Supports all formats defined in `FileFormat` (see below)
|
|
8
|
-
- Includes helpers for inferring file format and compression from filenames, extensions, or MIME
|
|
9
|
-
types
|
|
10
|
-
- Exposes a `File` class with instance methods for reading and writing data
|
|
11
|
-
|
|
12
|
-
Back to project overview: see the top-level [README](../../README.md).
|
|
13
|
-
|
|
14
|
-
- [`etlplus.file` Subpackage](#etlplusfile-subpackage)
|
|
15
|
-
- [Supported File Formats](#supported-file-formats)
|
|
16
|
-
- [Inferring File Format and Compression](#inferring-file-format-and-compression)
|
|
17
|
-
- [Reading and Writing Files](#reading-and-writing-files)
|
|
18
|
-
- [Reading a File](#reading-a-file)
|
|
19
|
-
- [Writing a File](#writing-a-file)
|
|
20
|
-
- [File Instance Methods](#file-instance-methods)
|
|
21
|
-
- [Example: Reading and Writing](#example-reading-and-writing)
|
|
22
|
-
- [See Also](#see-also)
|
|
23
|
-
|
|
24
|
-
## Supported File Formats
|
|
25
|
-
|
|
26
|
-
The following formats are defined in `FileFormat` and supported for reading and writing:
|
|
27
|
-
|
|
28
|
-
| Format | Description |
|
|
29
|
-
|-----------|---------------------------------------------|
|
|
30
|
-
| avro | Apache Avro binary serialization |
|
|
31
|
-
| csv | Comma-separated values text files |
|
|
32
|
-
| feather | Apache Arrow Feather columnar format |
|
|
33
|
-
| gz | Gzip-compressed files (see Compression) |
|
|
34
|
-
| json | Standard JSON files |
|
|
35
|
-
| ndjson | Newline-delimited JSON (JSON Lines) |
|
|
36
|
-
| orc | Apache ORC columnar format |
|
|
37
|
-
| parquet | Apache Parquet columnar format |
|
|
38
|
-
| tsv | Tab-separated values text files |
|
|
39
|
-
| txt | Plain text files |
|
|
40
|
-
| xls | Microsoft Excel (legacy .xls) |
|
|
41
|
-
| xlsx | Microsoft Excel (modern .xlsx) |
|
|
42
|
-
| zip | ZIP-compressed files (see Compression) |
|
|
43
|
-
| xml | XML files |
|
|
44
|
-
| yaml | YAML files |
|
|
45
|
-
|
|
46
|
-
Compression formats (gz, zip) are also supported as wrappers for other formats.
|
|
47
|
-
|
|
48
|
-
## Inferring File Format and Compression
|
|
49
|
-
|
|
50
|
-
Use `infer_file_format_and_compression(value, filename=None)` to infer the file format and
|
|
51
|
-
compression from a filename, extension, or MIME type. Returns a tuple `(file_format,
|
|
52
|
-
compression_format)`.
|
|
53
|
-
|
|
54
|
-
## Reading and Writing Files
|
|
55
|
-
|
|
56
|
-
The main entry point for file operations is the `File` class. To read or write files:
|
|
57
|
-
|
|
58
|
-
### Reading a File
|
|
59
|
-
|
|
60
|
-
```python
|
|
61
|
-
from etlplus.file import File
|
|
62
|
-
|
|
63
|
-
f = File("data/sample.csv")
|
|
64
|
-
data = f.read()
|
|
65
|
-
```
|
|
66
|
-
|
|
67
|
-
- The `read()` method automatically detects the format and compression.
|
|
68
|
-
- Returns parsed data (e.g., list of dicts for tabular formats).
|
|
69
|
-
|
|
70
|
-
### Writing a File
|
|
71
|
-
|
|
72
|
-
```python
|
|
73
|
-
from etlplus.file import File
|
|
74
|
-
|
|
75
|
-
f = File("output.json")
|
|
76
|
-
f.write(data)
|
|
77
|
-
```
|
|
78
|
-
|
|
79
|
-
- The `write()` method serializes and writes data in the appropriate format.
|
|
80
|
-
- Supports all formats listed above.
|
|
81
|
-
|
|
82
|
-
## File Instance Methods
|
|
83
|
-
|
|
84
|
-
- `read()`: Reads and parses the file, returning structured data.
|
|
85
|
-
- `write(data)`: Writes structured data to the file in the detected format.
|
|
86
|
-
|
|
87
|
-
## Example: Reading and Writing
|
|
88
|
-
|
|
89
|
-
```python
|
|
90
|
-
from etlplus.file import File
|
|
91
|
-
|
|
92
|
-
# Read CSV
|
|
93
|
-
csv_file = File("data.csv")
|
|
94
|
-
rows = csv_file.read()
|
|
95
|
-
|
|
96
|
-
# Write JSON
|
|
97
|
-
json_file = File("output.json")
|
|
98
|
-
json_file.write(rows)
|
|
99
|
-
```
|
|
100
|
-
|
|
101
|
-
## See Also
|
|
102
|
-
|
|
103
|
-
- Top-level CLI and library usage in the main [README](../../README.md)
|
|
104
|
-
- File format enums in [enums.py](enums.py)
|
|
105
|
-
- Compression format enums in [enums.py](enums.py)
|