etlplus 0.5.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- etlplus/__init__.py +43 -0
- etlplus/__main__.py +22 -0
- etlplus/__version__.py +14 -0
- etlplus/api/README.md +237 -0
- etlplus/api/__init__.py +136 -0
- etlplus/api/auth.py +432 -0
- etlplus/api/config.py +633 -0
- etlplus/api/endpoint_client.py +885 -0
- etlplus/api/errors.py +170 -0
- etlplus/api/pagination/__init__.py +47 -0
- etlplus/api/pagination/client.py +188 -0
- etlplus/api/pagination/config.py +440 -0
- etlplus/api/pagination/paginator.py +775 -0
- etlplus/api/rate_limiting/__init__.py +38 -0
- etlplus/api/rate_limiting/config.py +343 -0
- etlplus/api/rate_limiting/rate_limiter.py +266 -0
- etlplus/api/request_manager.py +589 -0
- etlplus/api/retry_manager.py +430 -0
- etlplus/api/transport.py +325 -0
- etlplus/api/types.py +172 -0
- etlplus/cli/__init__.py +15 -0
- etlplus/cli/app.py +1367 -0
- etlplus/cli/handlers.py +775 -0
- etlplus/cli/main.py +616 -0
- etlplus/config/__init__.py +56 -0
- etlplus/config/connector.py +372 -0
- etlplus/config/jobs.py +311 -0
- etlplus/config/pipeline.py +339 -0
- etlplus/config/profile.py +78 -0
- etlplus/config/types.py +204 -0
- etlplus/config/utils.py +120 -0
- etlplus/ddl.py +197 -0
- etlplus/enums.py +414 -0
- etlplus/extract.py +218 -0
- etlplus/file.py +657 -0
- etlplus/load.py +336 -0
- etlplus/mixins.py +62 -0
- etlplus/py.typed +0 -0
- etlplus/run.py +368 -0
- etlplus/run_helpers.py +843 -0
- etlplus/templates/__init__.py +5 -0
- etlplus/templates/ddl.sql.j2 +128 -0
- etlplus/templates/view.sql.j2 +69 -0
- etlplus/transform.py +1049 -0
- etlplus/types.py +227 -0
- etlplus/utils.py +638 -0
- etlplus/validate.py +493 -0
- etlplus/validation/__init__.py +44 -0
- etlplus/validation/utils.py +389 -0
- etlplus-0.5.4.dist-info/METADATA +616 -0
- etlplus-0.5.4.dist-info/RECORD +55 -0
- etlplus-0.5.4.dist-info/WHEEL +5 -0
- etlplus-0.5.4.dist-info/entry_points.txt +2 -0
- etlplus-0.5.4.dist-info/licenses/LICENSE +21 -0
- etlplus-0.5.4.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,616 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: etlplus
|
|
3
|
+
Version: 0.5.4
|
|
4
|
+
Summary: A Swiss Army knife for simple ETL operations
|
|
5
|
+
Home-page: https://github.com/Dagitali/ETLPlus
|
|
6
|
+
Author: ETLPlus Team
|
|
7
|
+
License: MIT
|
|
8
|
+
Project-URL: Homepage, https://github.com/Dagitali/ETLPlus
|
|
9
|
+
Project-URL: Repository, https://github.com/Dagitali/ETLPlus
|
|
10
|
+
Classifier: Development Status :: 3 - Alpha
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
17
|
+
Requires-Python: >=3.13,<3.15
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
License-File: LICENSE
|
|
20
|
+
Requires-Dist: jinja2>=3.1.6
|
|
21
|
+
Requires-Dist: pyodbc>=5.3.0
|
|
22
|
+
Requires-Dist: python-dotenv>=1.2.1
|
|
23
|
+
Requires-Dist: pandas>=2.3.3
|
|
24
|
+
Requires-Dist: requests>=2.32.5
|
|
25
|
+
Requires-Dist: typer>=0.21.0
|
|
26
|
+
Provides-Extra: dev
|
|
27
|
+
Requires-Dist: black>=25.9.0; extra == "dev"
|
|
28
|
+
Requires-Dist: build>=1.2.2; extra == "dev"
|
|
29
|
+
Requires-Dist: flake8>=7.3.0; extra == "dev"
|
|
30
|
+
Requires-Dist: PyYAML>=6.0.3; extra == "dev"
|
|
31
|
+
Requires-Dist: pydoclint>=0.8.1; extra == "dev"
|
|
32
|
+
Requires-Dist: pydocstyle>=6.3.0; extra == "dev"
|
|
33
|
+
Requires-Dist: pytest>=8.4.2; extra == "dev"
|
|
34
|
+
Requires-Dist: pytest-cov>=7.0.0; extra == "dev"
|
|
35
|
+
Requires-Dist: ruff>=0.14.4; extra == "dev"
|
|
36
|
+
Provides-Extra: docs
|
|
37
|
+
Requires-Dist: sphinx>=4.0.0; extra == "docs"
|
|
38
|
+
Requires-Dist: sphinx-rtd-theme>=1.0.0; extra == "docs"
|
|
39
|
+
Dynamic: home-page
|
|
40
|
+
Dynamic: license-file
|
|
41
|
+
Dynamic: requires-python
|
|
42
|
+
|
|
43
|
+
# ETLPlus
|
|
44
|
+
|
|
45
|
+
[][PyPI package]
|
|
46
|
+
[][GitHub release]
|
|
47
|
+
[][PyPI package]
|
|
48
|
+
[](LICENSE)
|
|
49
|
+
[][GitHub Actions CI workflow]
|
|
50
|
+
[][Codecov project]
|
|
51
|
+
[][GitHub issues]
|
|
52
|
+
[][GitHub PRs]
|
|
53
|
+
[][GitHub contributors]
|
|
54
|
+
|
|
55
|
+
ETLPlus is a veritable Swiss Army knife for enabling simple ETL operations, offering both a Python
|
|
56
|
+
package and command-line interface for data extraction, validation, transformation, and loading.
|
|
57
|
+
|
|
58
|
+
- [ETLPlus](#etlplus)
|
|
59
|
+
- [Features](#features)
|
|
60
|
+
- [Installation](#installation)
|
|
61
|
+
- [Quickstart](#quickstart)
|
|
62
|
+
- [Usage](#usage)
|
|
63
|
+
- [Command Line Interface](#command-line-interface)
|
|
64
|
+
- [Inspect Pipelines](#inspect-pipelines)
|
|
65
|
+
- [Render SQL DDL](#render-sql-ddl)
|
|
66
|
+
- [Extract Data](#extract-data)
|
|
67
|
+
- [Validate Data](#validate-data)
|
|
68
|
+
- [Transform Data](#transform-data)
|
|
69
|
+
- [Load Data](#load-data)
|
|
70
|
+
- [Python API](#python-api)
|
|
71
|
+
- [Complete ETL Pipeline Example](#complete-etl-pipeline-example)
|
|
72
|
+
- [Format Overrides](#format-overrides)
|
|
73
|
+
- [Transformation Operations](#transformation-operations)
|
|
74
|
+
- [Filter Operations](#filter-operations)
|
|
75
|
+
- [Aggregation Functions](#aggregation-functions)
|
|
76
|
+
- [Validation Rules](#validation-rules)
|
|
77
|
+
- [Development](#development)
|
|
78
|
+
- [API Client Docs](#api-client-docs)
|
|
79
|
+
- [Runner Internals and Connectors](#runner-internals-and-connectors)
|
|
80
|
+
- [Running Tests](#running-tests)
|
|
81
|
+
- [Test Layers](#test-layers)
|
|
82
|
+
- [Code Coverage](#code-coverage)
|
|
83
|
+
- [Linting](#linting)
|
|
84
|
+
- [Updating Demo Snippets](#updating-demo-snippets)
|
|
85
|
+
- [Releasing to PyPI](#releasing-to-pypi)
|
|
86
|
+
- [Links](#links)
|
|
87
|
+
- [License](#license)
|
|
88
|
+
- [Contributing](#contributing)
|
|
89
|
+
- [Acknowledgments](#acknowledgments)
|
|
90
|
+
|
|
91
|
+
## Features
|
|
92
|
+
|
|
93
|
+
- **Check** data pipeline definitions before running them:
|
|
94
|
+
- Summarize jobs, sources, targets, and transforms
|
|
95
|
+
- Confirm configuration changes by printing focused sections on demand
|
|
96
|
+
|
|
97
|
+
- **Render** SQL DDL from shared table specs:
|
|
98
|
+
- Generate CREATE TABLE or view statements
|
|
99
|
+
- Swap templates or direct output to files for database migrations
|
|
100
|
+
|
|
101
|
+
- **Extract** data from multiple sources:
|
|
102
|
+
- Files (CSV, JSON, XML, YAML)
|
|
103
|
+
- Databases (connection string support)
|
|
104
|
+
- REST APIs (GET)
|
|
105
|
+
|
|
106
|
+
- **Validate** data with flexible rules:
|
|
107
|
+
- Type checking
|
|
108
|
+
- Required fields
|
|
109
|
+
- Value ranges (min/max)
|
|
110
|
+
- String length constraints
|
|
111
|
+
- Pattern matching
|
|
112
|
+
- Enum validation
|
|
113
|
+
|
|
114
|
+
- **Transform** data with powerful operations:
|
|
115
|
+
- Filter records
|
|
116
|
+
- Map/rename fields
|
|
117
|
+
- Select specific fields
|
|
118
|
+
- Sort data
|
|
119
|
+
- Aggregate functions (avg, count, max, min, sum)
|
|
120
|
+
|
|
121
|
+
- **Load** data to multiple targets:
|
|
122
|
+
- Files (CSV, JSON, XML, YAML)
|
|
123
|
+
- Databases (connection string support)
|
|
124
|
+
- REST APIs (PATCH, POST, PUT)
|
|
125
|
+
|
|
126
|
+
## Installation
|
|
127
|
+
|
|
128
|
+
```bash
|
|
129
|
+
pip install etlplus
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
For development:
|
|
133
|
+
|
|
134
|
+
```bash
|
|
135
|
+
pip install -e ".[dev]"
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
## Quickstart
|
|
139
|
+
|
|
140
|
+
Get up and running in under a minute.
|
|
141
|
+
|
|
142
|
+
[Command line interface](#command-line-interface):
|
|
143
|
+
|
|
144
|
+
```bash
|
|
145
|
+
# Inspect help and version
|
|
146
|
+
etlplus --help
|
|
147
|
+
etlplus --version
|
|
148
|
+
|
|
149
|
+
# One-liner: extract CSV, filter, select, and write JSON
|
|
150
|
+
etlplus extract file examples/data/sample.csv \
|
|
151
|
+
| etlplus transform - --operations '{"filter": {"field": "age", "op": "gt", "value": 25}, "select": ["name", "email"]}' \
|
|
152
|
+
-o temp/sample_output.json
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
[Python API](#python-api):
|
|
156
|
+
|
|
157
|
+
```python
|
|
158
|
+
from etlplus import extract, transform, validate, load
|
|
159
|
+
|
|
160
|
+
data = extract("file", "input.csv")
|
|
161
|
+
ops = {"filter": {"field": "age", "op": "gt", "value": 25}, "select": ["name", "email"]}
|
|
162
|
+
filtered = transform(data, ops)
|
|
163
|
+
rules = {"name": {"type": "string", "required": True}, "email": {"type": "string", "required": True}}
|
|
164
|
+
assert validate(filtered, rules)["valid"]
|
|
165
|
+
load(filtered, "file", "temp/sample_output.json", file_format="json")
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
## Usage
|
|
169
|
+
|
|
170
|
+
### Command Line Interface
|
|
171
|
+
|
|
172
|
+
ETLPlus provides a powerful CLI for ETL operations:
|
|
173
|
+
|
|
174
|
+
```bash
|
|
175
|
+
# Show help
|
|
176
|
+
etlplus --help
|
|
177
|
+
|
|
178
|
+
# Show version
|
|
179
|
+
etlplus --version
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
#### Check Pipelines
|
|
183
|
+
|
|
184
|
+
Use `etlplus check` to explore pipeline YAML definitions without running them. The command can print
|
|
185
|
+
job names, summarize configured sources and targets, or drill into specific sections.
|
|
186
|
+
|
|
187
|
+
List jobs and show a pipeline summary:
|
|
188
|
+
```bash
|
|
189
|
+
etlplus check --config examples/configs/pipeline.yml --jobs
|
|
190
|
+
etlplus check --config examples/configs/pipeline.yml --summary
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
Show sources or transforms for troubleshooting:
|
|
194
|
+
```bash
|
|
195
|
+
etlplus check --config examples/configs/pipeline.yml --sources
|
|
196
|
+
etlplus check --config examples/configs/pipeline.yml --transforms
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
#### Render SQL DDL
|
|
200
|
+
|
|
201
|
+
Use `etlplus render` to turn table schema specs into ready-to-run SQL. Render from a pipeline config
|
|
202
|
+
or from a standalone schema file, and choose the built-in `ddl` or `view` templates (or provide your
|
|
203
|
+
own).
|
|
204
|
+
|
|
205
|
+
Render all tables defined in a pipeline:
|
|
206
|
+
```bash
|
|
207
|
+
etlplus render --config examples/configs/pipeline.yml --template ddl
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
Render a single table in that pipeline:
|
|
211
|
+
```bash
|
|
212
|
+
etlplus render --config examples/configs/pipeline.yml --table customers --template view
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
Render from a standalone table spec to a file:
|
|
216
|
+
```bash
|
|
217
|
+
etlplus render --spec schemas/customer.yml --template view -o temp/customer_view.sql
|
|
218
|
+
```
|
|
219
|
+
|
|
220
|
+
#### Extract Data
|
|
221
|
+
|
|
222
|
+
Note: For file sources, the format is normally inferred from the filename extension. Use
|
|
223
|
+
`--source-format` to override inference when a file lacks an extension or when you want to force a
|
|
224
|
+
specific parser.
|
|
225
|
+
|
|
226
|
+
Extract from JSON file:
|
|
227
|
+
```bash
|
|
228
|
+
etlplus extract file examples/data/sample.json
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
Extract from CSV file:
|
|
232
|
+
```bash
|
|
233
|
+
etlplus extract file examples/data/sample.csv
|
|
234
|
+
```
|
|
235
|
+
|
|
236
|
+
Extract from XML file:
|
|
237
|
+
```bash
|
|
238
|
+
etlplus extract file examples/data/sample.xml
|
|
239
|
+
```
|
|
240
|
+
|
|
241
|
+
Extract from REST API:
|
|
242
|
+
```bash
|
|
243
|
+
etlplus extract api https://api.example.com/data
|
|
244
|
+
```
|
|
245
|
+
|
|
246
|
+
Save extracted data to file:
|
|
247
|
+
```bash
|
|
248
|
+
etlplus extract file examples/data/sample.csv -o temp/sample_output.json
|
|
249
|
+
```
|
|
250
|
+
|
|
251
|
+
#### Validate Data
|
|
252
|
+
|
|
253
|
+
Validate data from file or JSON string:
|
|
254
|
+
```bash
|
|
255
|
+
etlplus validate '{"name": "John", "age": 30}' --rules '{"name": {"type": "string", "required": true}, "age": {"type": "number", "min": 0, "max": 150}}'
|
|
256
|
+
```
|
|
257
|
+
|
|
258
|
+
Validate from file:
|
|
259
|
+
```bash
|
|
260
|
+
etlplus validate examples/data/sample.json --rules '{"email": {"type": "string", "pattern": "^[\\w.-]+@[\\w.-]+\\.\\w+$"}}'
|
|
261
|
+
```
|
|
262
|
+
|
|
263
|
+
#### Transform Data
|
|
264
|
+
|
|
265
|
+
When piping data through `etlplus transform`, use `--source-format` whenever the SOURCE argument is
|
|
266
|
+
`-` or a literal payload, mirroring the `etlplus extract` semantics. Use `--target-format` to
|
|
267
|
+
control the emitted format for stdout or other non-file outputs, just like `etlplus load`. File
|
|
268
|
+
paths continue to infer formats from their extensions. Use `--from` to override the inferred source
|
|
269
|
+
connector type and `--to` to override the inferred target connector type, matching the `etlplus
|
|
270
|
+
extract`/`etlplus load` behavior.
|
|
271
|
+
|
|
272
|
+
Transform file inputs while overriding connector types:
|
|
273
|
+
```bash
|
|
274
|
+
etlplus transform --from file examples/data/sample.json \
|
|
275
|
+
--operations '{"select": ["name", "email"]}' \
|
|
276
|
+
--to file -o temp/selected_output.json
|
|
277
|
+
```
|
|
278
|
+
|
|
279
|
+
Filter and select fields:
|
|
280
|
+
```bash
|
|
281
|
+
etlplus transform '[{"name": "John", "age": 30}, {"name": "Jane", "age": 25}]' \
|
|
282
|
+
--operations '{"filter": {"field": "age", "op": "gt", "value": 26}, "select": ["name"]}'
|
|
283
|
+
```
|
|
284
|
+
|
|
285
|
+
Sort data:
|
|
286
|
+
```bash
|
|
287
|
+
etlplus transform examples/data/sample.json --operations '{"sort": {"field": "age", "reverse": true}}'
|
|
288
|
+
```
|
|
289
|
+
|
|
290
|
+
Aggregate data:
|
|
291
|
+
```bash
|
|
292
|
+
etlplus transform examples/data/sample.json --operations '{"aggregate": {"field": "age", "func": "sum"}}'
|
|
293
|
+
```
|
|
294
|
+
|
|
295
|
+
Map/rename fields:
|
|
296
|
+
```bash
|
|
297
|
+
etlplus transform examples/data/sample.json --operations '{"map": {"name": "new_name"}}'
|
|
298
|
+
```
|
|
299
|
+
|
|
300
|
+
#### Load Data
|
|
301
|
+
|
|
302
|
+
`etlplus load` consumes JSON from stdin; provide only the target argument plus optional flags.
|
|
303
|
+
|
|
304
|
+
Load to JSON file:
|
|
305
|
+
```bash
|
|
306
|
+
etlplus extract file examples/data/sample.json \
|
|
307
|
+
| etlplus load --to file temp/sample_output.json
|
|
308
|
+
```
|
|
309
|
+
|
|
310
|
+
Load to CSV file:
|
|
311
|
+
```bash
|
|
312
|
+
etlplus extract file examples/data/sample.csv \
|
|
313
|
+
| etlplus load --to file temp/sample_output.csv
|
|
314
|
+
```
|
|
315
|
+
|
|
316
|
+
Load to REST API:
|
|
317
|
+
```bash
|
|
318
|
+
cat examples/data/sample.json \
|
|
319
|
+
| etlplus load --to api https://api.example.com/endpoint
|
|
320
|
+
```
|
|
321
|
+
|
|
322
|
+
### Python API
|
|
323
|
+
|
|
324
|
+
Use ETLPlus as a Python library:
|
|
325
|
+
|
|
326
|
+
```python
|
|
327
|
+
from etlplus import extract, validate, transform, load
|
|
328
|
+
|
|
329
|
+
# Extract data
|
|
330
|
+
data = extract("file", "data.json")
|
|
331
|
+
|
|
332
|
+
# Validate data
|
|
333
|
+
validation_rules = {
|
|
334
|
+
"name": {"type": "string", "required": True},
|
|
335
|
+
"age": {"type": "number", "min": 0, "max": 150}
|
|
336
|
+
}
|
|
337
|
+
result = validate(data, validation_rules)
|
|
338
|
+
if result["valid"]:
|
|
339
|
+
print("Data is valid!")
|
|
340
|
+
|
|
341
|
+
# Transform data
|
|
342
|
+
operations = {
|
|
343
|
+
"filter": {"field": "age", "op": "gt", "value": 18},
|
|
344
|
+
"select": ["name", "email"]
|
|
345
|
+
}
|
|
346
|
+
transformed = transform(data, operations)
|
|
347
|
+
|
|
348
|
+
# Load data
|
|
349
|
+
load(transformed, "file", "temp/sample_output.json", format="json")
|
|
350
|
+
```
|
|
351
|
+
|
|
352
|
+
For YAML-driven pipelines executed end-to-end (extract → validate → transform → load), see:
|
|
353
|
+
|
|
354
|
+
- Authoring: [`docs/pipeline-guide.md`](docs/pipeline-guide.md)
|
|
355
|
+
- Runner API and internals: [`docs/run-module.md`](docs/run-module.md)
|
|
356
|
+
|
|
357
|
+
CLI quick reference for pipelines:
|
|
358
|
+
|
|
359
|
+
```bash
|
|
360
|
+
# List jobs or show a pipeline summary
|
|
361
|
+
etlplus check --config examples/configs/pipeline.yml --jobs
|
|
362
|
+
etlplus check --config examples/configs/pipeline.yml --summary
|
|
363
|
+
|
|
364
|
+
# Run a job
|
|
365
|
+
etlplus run --config examples/configs/pipeline.yml --job file_to_file_customers
|
|
366
|
+
|
|
367
|
+
# Deprecated shim (will be removed): etlplus pipeline
|
|
368
|
+
```
|
|
369
|
+
|
|
370
|
+
### Complete ETL Pipeline Example
|
|
371
|
+
|
|
372
|
+
```bash
|
|
373
|
+
# 1. Extract from CSV
|
|
374
|
+
etlplus extract file examples/data/sample.csv -o temp/sample_extracted.json
|
|
375
|
+
|
|
376
|
+
# 2. Transform (filter and select fields)
|
|
377
|
+
etlplus transform temp/sample_extracted.json \
|
|
378
|
+
--operations '{"filter": {"field": "age", "op": "gt", "value": 25}, "select": ["name", "email"]}' \
|
|
379
|
+
-o temp/sample_transformed.json
|
|
380
|
+
|
|
381
|
+
# 3. Validate transformed data
|
|
382
|
+
etlplus validate temp/sample_transformed.json \
|
|
383
|
+
--rules '{"name": {"type": "string", "required": true}, "email": {"type": "string", "required": true}}'
|
|
384
|
+
|
|
385
|
+
# 4. Load to CSV
|
|
386
|
+
cat temp/sample_transformed.json \
|
|
387
|
+
| etlplus load --to temp/sample_output.csv
|
|
388
|
+
```
|
|
389
|
+
|
|
390
|
+
### Format Overrides
|
|
391
|
+
|
|
392
|
+
`--source-format` and `--target-format` override whichever format would normally be inferred from a
|
|
393
|
+
file extension. This is useful when an input lacks an extension (for example, `records.txt` that
|
|
394
|
+
actually contains CSV) or when you intentionally want to treat a file as another format.
|
|
395
|
+
|
|
396
|
+
Examples (zsh):
|
|
397
|
+
|
|
398
|
+
```zsh
|
|
399
|
+
# Force CSV parsing for an extension-less file
|
|
400
|
+
etlplus extract --from file data.txt --source-format csv
|
|
401
|
+
|
|
402
|
+
# Write CSV to a file without the .csv suffix
|
|
403
|
+
etlplus load --to file output.bin --target-format csv < data.json
|
|
404
|
+
|
|
405
|
+
# Leave the flags off when extensions already match the desired format
|
|
406
|
+
etlplus extract --from file data.csv
|
|
407
|
+
etlplus load --to file data.json < data.json
|
|
408
|
+
```
|
|
409
|
+
|
|
410
|
+
## Transformation Operations
|
|
411
|
+
|
|
412
|
+
### Filter Operations
|
|
413
|
+
|
|
414
|
+
Supported operators:
|
|
415
|
+
- `eq`: Equal
|
|
416
|
+
- `ne`: Not equal
|
|
417
|
+
- `gt`: Greater than
|
|
418
|
+
- `gte`: Greater than or equal
|
|
419
|
+
- `lt`: Less than
|
|
420
|
+
- `lte`: Less than or equal
|
|
421
|
+
- `in`: Value in list
|
|
422
|
+
- `contains`: List/string contains value
|
|
423
|
+
|
|
424
|
+
Example:
|
|
425
|
+
```json
|
|
426
|
+
{
|
|
427
|
+
"filter": {
|
|
428
|
+
"field": "status",
|
|
429
|
+
"op": "in",
|
|
430
|
+
"value": ["active", "pending"]
|
|
431
|
+
}
|
|
432
|
+
}
|
|
433
|
+
```
|
|
434
|
+
|
|
435
|
+
### Aggregation Functions
|
|
436
|
+
|
|
437
|
+
Supported functions:
|
|
438
|
+
- `sum`: Sum of values
|
|
439
|
+
- `avg`: Average of values
|
|
440
|
+
- `min`: Minimum value
|
|
441
|
+
- `max`: Maximum value
|
|
442
|
+
- `count`: Count of values
|
|
443
|
+
|
|
444
|
+
Example:
|
|
445
|
+
```json
|
|
446
|
+
{
|
|
447
|
+
"aggregate": {
|
|
448
|
+
"field": "revenue",
|
|
449
|
+
"func": "sum"
|
|
450
|
+
}
|
|
451
|
+
}
|
|
452
|
+
```
|
|
453
|
+
|
|
454
|
+
## Validation Rules
|
|
455
|
+
|
|
456
|
+
Supported validation rules:
|
|
457
|
+
- `type`: Data type (string, number, integer, boolean, array, object)
|
|
458
|
+
- `required`: Field is required (true/false)
|
|
459
|
+
- `min`: Minimum value for numbers
|
|
460
|
+
- `max`: Maximum value for numbers
|
|
461
|
+
- `minLength`: Minimum length for strings
|
|
462
|
+
- `maxLength`: Maximum length for strings
|
|
463
|
+
- `pattern`: Regex pattern for strings
|
|
464
|
+
- `enum`: List of allowed values
|
|
465
|
+
|
|
466
|
+
Example:
|
|
467
|
+
```json
|
|
468
|
+
{
|
|
469
|
+
"email": {
|
|
470
|
+
"type": "string",
|
|
471
|
+
"required": true,
|
|
472
|
+
"pattern": "^[\\w.-]+@[\\w.-]+\\.\\w+$"
|
|
473
|
+
},
|
|
474
|
+
"age": {
|
|
475
|
+
"type": "number",
|
|
476
|
+
"min": 0,
|
|
477
|
+
"max": 150
|
|
478
|
+
},
|
|
479
|
+
"status": {
|
|
480
|
+
"type": "string",
|
|
481
|
+
"enum": ["active", "inactive", "pending"]
|
|
482
|
+
}
|
|
483
|
+
}
|
|
484
|
+
```
|
|
485
|
+
|
|
486
|
+
## Development
|
|
487
|
+
|
|
488
|
+
### API Client Docs
|
|
489
|
+
|
|
490
|
+
Looking for the HTTP client and pagination helpers? See the dedicated docs in
|
|
491
|
+
`etlplus/api/README.md` for:
|
|
492
|
+
|
|
493
|
+
- Quickstart with `EndpointClient`
|
|
494
|
+
- Authentication via `EndpointCredentialsBearer`
|
|
495
|
+
- Pagination with `PaginationConfig` (page and cursor styles)
|
|
496
|
+
- Tips on `records_path` and `cursor_path`
|
|
497
|
+
|
|
498
|
+
### Runner Internals and Connectors
|
|
499
|
+
|
|
500
|
+
Curious how the pipeline runner composes API requests, pagination, and load calls?
|
|
501
|
+
|
|
502
|
+
- Runner overview and helpers: [`docs/run-module.md`](docs/run-module.md)
|
|
503
|
+
- Unified "connector" vocabulary (API/File/DB): `etlplus/config/connector.py`
|
|
504
|
+
- API/file targets reuse the same shapes as sources; API targets typically set a `method`.
|
|
505
|
+
|
|
506
|
+
### Running Tests
|
|
507
|
+
|
|
508
|
+
```bash
|
|
509
|
+
pytest tests/ -v
|
|
510
|
+
```
|
|
511
|
+
|
|
512
|
+
#### Test Layers
|
|
513
|
+
|
|
514
|
+
We split tests into two layers:
|
|
515
|
+
|
|
516
|
+
- **Unit (`tests/unit/`)**: single function or class, no real I/O, fast, uses stubs/monkeypatch
|
|
517
|
+
(e.g. `etlplus.cli.create_parser`, transform + validate helpers).
|
|
518
|
+
- **Integration (`tests/integration/`)**: end-to-end flows (CLI `main()`, pipeline `run()`,
|
|
519
|
+
pagination + rate limit defaults, file/API connector interactions) may touch temp files and use
|
|
520
|
+
fake clients.
|
|
521
|
+
|
|
522
|
+
If a test calls `etlplus.cli.main()` or `etlplus.run.run()` it’s integration by default. Full
|
|
523
|
+
criteria: [`CONTRIBUTING.md#testing`](CONTRIBUTING.md#testing).
|
|
524
|
+
|
|
525
|
+
### Code Coverage
|
|
526
|
+
|
|
527
|
+
```bash
|
|
528
|
+
pytest tests/ --cov=etlplus --cov-report=html
|
|
529
|
+
```
|
|
530
|
+
|
|
531
|
+
### Linting
|
|
532
|
+
|
|
533
|
+
```bash
|
|
534
|
+
flake8 etlplus/
|
|
535
|
+
black etlplus/
|
|
536
|
+
```
|
|
537
|
+
|
|
538
|
+
### Updating Demo Snippets
|
|
539
|
+
|
|
540
|
+
`DEMO.md` shows the real output of `etlplus --version` captured from a freshly built wheel. Regenerate
|
|
541
|
+
the snippet (and the companion file [docs/snippets/installation_version.md](docs/snippets/installation_version.md)) after changing anything that affects the version string:
|
|
542
|
+
|
|
543
|
+
```bash
|
|
544
|
+
make demo-snippets
|
|
545
|
+
```
|
|
546
|
+
|
|
547
|
+
The helper script in [tools/update_demo_snippets.py](tools/update_demo_snippets.py) builds the wheel,
|
|
548
|
+
installs it into a throwaway virtual environment, runs `etlplus --version`, and rewrites the snippet
|
|
549
|
+
between the markers in [DEMO.md](DEMO.md).
|
|
550
|
+
|
|
551
|
+
### Releasing to PyPI
|
|
552
|
+
|
|
553
|
+
`setuptools-scm` derives the package version from Git tags, so publishing is now entirely tag
|
|
554
|
+
driven—no hand-editing `pyproject.toml`, `setup.py`, or `etlplus/__version__.py`.
|
|
555
|
+
|
|
556
|
+
1. Ensure `main` is green and the changelog/docs are up to date.
|
|
557
|
+
2. Create and push a SemVer tag matching the `v*.*.*` pattern:
|
|
558
|
+
|
|
559
|
+
```bash
|
|
560
|
+
git tag -a v1.4.0 -m "Release v1.4.0"
|
|
561
|
+
git push origin v1.4.0
|
|
562
|
+
```
|
|
563
|
+
|
|
564
|
+
3. GitHub Actions fetches tags, builds the sdist/wheel, and publishes to PyPI via the `publish` job
|
|
565
|
+
in [.github/workflows/ci.yml](.github/workflows/ci.yml).
|
|
566
|
+
|
|
567
|
+
If you want an extra smoke-test before tagging, run `make dist && pip install dist/*.whl` locally;
|
|
568
|
+
this exercises the same build path the workflow uses.
|
|
569
|
+
|
|
570
|
+
## Links
|
|
571
|
+
|
|
572
|
+
- API client docs: [`etlplus/api/README.md`](etlplus/api/README.md)
|
|
573
|
+
- Examples: [`examples/README.md`](examples/README.md)
|
|
574
|
+
- Pipeline authoring guide: [`docs/pipeline-guide.md`](docs/pipeline-guide.md)
|
|
575
|
+
- Runner internals: [`docs/run-module.md`](docs/run-module.md)
|
|
576
|
+
- Design notes (Mapping inputs, dict outputs): [`docs/pipeline-guide.md#design-notes-mapping-inputs-dict-outputs`](docs/pipeline-guide.md#design-notes-mapping-inputs-dict-outputs)
|
|
577
|
+
- Typing philosophy: [`CONTRIBUTING.md#typing-philosophy`](CONTRIBUTING.md#typing-philosophy)
|
|
578
|
+
- Demo and walkthrough: [`DEMO.md`](DEMO.md)
|
|
579
|
+
- Additional references: [`REFERENCES.md`](`REFERENCES.md)
|
|
580
|
+
|
|
581
|
+
## License
|
|
582
|
+
|
|
583
|
+
This project is licensed under the [MIT License](LICENSE).
|
|
584
|
+
|
|
585
|
+
## Contributing
|
|
586
|
+
|
|
587
|
+
Code and codeless contributions are welcome! If you’d like to add a new feature, fix a bug, or
|
|
588
|
+
improve the documentation, please feel free to submit a pull request as follows:
|
|
589
|
+
|
|
590
|
+
1. Fork this repository.
|
|
591
|
+
2. Create a new feature branch for your changes (`git checkout -b feature/feature-name`).
|
|
592
|
+
3. Commit your changes (`git commit -m "Add feature"`).
|
|
593
|
+
4. Push to your branch (`git push origin feature-name`).
|
|
594
|
+
5. Submit a pull request with a detailed description.
|
|
595
|
+
|
|
596
|
+
If you choose to be a code contributor, please first refer these documents:
|
|
597
|
+
|
|
598
|
+
- Pipeline authoring guide: [`docs/pipeline-guide.md`](docs/pipeline-guide.md)
|
|
599
|
+
- Design notes (Mapping inputs, dict outputs):
|
|
600
|
+
[`docs/pipeline-guide.md#design-notes-mapping-inputs-dict-outputs`](docs/pipeline-guide.md#design-notes-mapping-inputs-dict-outputs)
|
|
601
|
+
- Typing philosophy (TypedDicts as editor hints, permissive runtime):
|
|
602
|
+
[`CONTRIBUTING.md#typing-philosophy`](CONTRIBUTING.md#typing-philosophy)
|
|
603
|
+
|
|
604
|
+
## Acknowledgments
|
|
605
|
+
|
|
606
|
+
ETLPlus is inspired by common work patterns in data engineering and software engineering patterns in
|
|
607
|
+
Python development, aiming to increase productivity and reduce boilerplate code. Feedback and
|
|
608
|
+
contributions are always appreciated!
|
|
609
|
+
|
|
610
|
+
[Codecov project]: https://codecov.io/github/Dagitali/ETLPlus?branch=main
|
|
611
|
+
[GitHub Actions CI workflow]: https://github.com/Dagitali/ETLPlus/actions/workflows/ci.yml
|
|
612
|
+
[GitHub contributors]: https://github.com/Dagitali/ETLPlus/graphs/contributors
|
|
613
|
+
[GitHub issues]: https://github.com/Dagitali/ETLPlus/issues
|
|
614
|
+
[GitHub PRs]: https://github.com/Dagitali/ETLPlus/pulls
|
|
615
|
+
[GitHub release]: https://github.com/Dagitali/ETLPlus/releases
|
|
616
|
+
[PyPI package]: https://pypi.org/project/etlplus/
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
etlplus/__init__.py,sha256=M2gScnyir6WOMAh_EuoQIiAzdcTls0_5hbd_Q6of8I0,1021
|
|
2
|
+
etlplus/__main__.py,sha256=btoROneNiigyfBU7BSzPKZ1R9gzBMpxcpsbPwmuHwTM,479
|
|
3
|
+
etlplus/__version__.py,sha256=1E0GMK_yUWCMQFKxXjTvyMwofi0qT2k4CDNiHWiymWE,327
|
|
4
|
+
etlplus/ddl.py,sha256=uYkiMTx1uDlUypnXCYy0K5ARnHRMHFVzzg8PizBQRLg,5306
|
|
5
|
+
etlplus/enums.py,sha256=V_j18Ud2BCXpFsBk2pZGrvCVrvAMJ7uja1z9fppFGso,10175
|
|
6
|
+
etlplus/extract.py,sha256=f44JdHhNTACxgn44USx05paKTwq7LQY-V4wANCW9hVM,6173
|
|
7
|
+
etlplus/file.py,sha256=RxIAsGDN4f_vNA2B5-ct88JNd_ISAyYbooIRE5DstS8,17972
|
|
8
|
+
etlplus/load.py,sha256=BwF3gT4gIr-5CvNMz_aLTCl-w2ihWSTxNVd4X92XFwI,8737
|
|
9
|
+
etlplus/mixins.py,sha256=ifGpHwWv7U00yqGf-kN93vJax2IiK4jaGtTsPsO3Oak,1350
|
|
10
|
+
etlplus/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
11
|
+
etlplus/run.py,sha256=zl_Yx35spcgaa9Xx7-kcJEb1CAYyMIiqtPlsSrYxRfs,12448
|
|
12
|
+
etlplus/run_helpers.py,sha256=bj6MkaeFxjl3CeKG1HoXKx5DwAlXNERVW-GX-z1P_qQ,24373
|
|
13
|
+
etlplus/transform.py,sha256=uAUVDDHYCgx7GpVez9IK3OAZM-CnCuMa9iox3vwGGJA,25296
|
|
14
|
+
etlplus/types.py,sha256=SJiZ7wJiSnV4CEvF-9E5nSFLBo4DT9OqHQqj1GSHkv8,6042
|
|
15
|
+
etlplus/utils.py,sha256=_fn8b-SAdxiw28VX-Ugr8sZUPZI9mEkWKAGExlgxhJA,13993
|
|
16
|
+
etlplus/validate.py,sha256=7rJoEI_SIILdPpoBqqh2UJqg9oeReDz34mYSlc3t7Qg,12989
|
|
17
|
+
etlplus/api/README.md,sha256=UkK5PiZWXbbnMNP0MaPa56S88PjSqOwhMNCyswOhvKc,7329
|
|
18
|
+
etlplus/api/__init__.py,sha256=P2JUYFy6Ep4t6xnsBiCBfQCkQLHYYhA-yXPXCobS8Y0,4295
|
|
19
|
+
etlplus/api/auth.py,sha256=GOO5on-LoMS1GXTAhtK9rFcfpjbBcNeA6NE5UZwIq0g,12158
|
|
20
|
+
etlplus/api/config.py,sha256=wRpOaZ31sPReVzEMme0jKl_37nqgraESwuYSNxP_xDo,17397
|
|
21
|
+
etlplus/api/endpoint_client.py,sha256=PxCvBsvFhTIjEbY6drIIvciynHXQEvKu47Pi63Gxwqs,30693
|
|
22
|
+
etlplus/api/errors.py,sha256=XjI2xW-sypMUNUbqfc2S57-IGyWnH3oCDFhCmKYYI_Q,4648
|
|
23
|
+
etlplus/api/request_manager.py,sha256=YkDz803HM3BBzamsEZdSdE9fbVT0avMbTaLAgar9Wzo,18481
|
|
24
|
+
etlplus/api/retry_manager.py,sha256=0GDhJVyIlb1Ww35JUWlYoa8QYUPjKLBtxQeZj3TdLbY,11306
|
|
25
|
+
etlplus/api/transport.py,sha256=LRsQEPxIYrvXQQMvgPPkIl_57YCmanzsWNEnSYdP_d8,9164
|
|
26
|
+
etlplus/api/types.py,sha256=687JigIf3qfYxgGTNBaMNsQsrza5Pja6DcK5llM9oRU,4591
|
|
27
|
+
etlplus/api/pagination/__init__.py,sha256=a4UX2J0AG8RMvmHt_CCofUm5vSmFo6GAfkb8XnSXypM,1395
|
|
28
|
+
etlplus/api/pagination/client.py,sha256=42cG442od3mQkw_JsvGvxT_w7y9J4HPM5PB4tFFU6EQ,5383
|
|
29
|
+
etlplus/api/pagination/config.py,sha256=3dXDJ-nMbO9Zk6i344n4roBFbUlHsa294D1_plPmm6E,13579
|
|
30
|
+
etlplus/api/pagination/paginator.py,sha256=wtdY_er4yfjx5yTUQJ1gPq-IuWmpLAHeG5buBQZJm54,24453
|
|
31
|
+
etlplus/api/rate_limiting/__init__.py,sha256=ZySB1dZettEDnWvI1EHf_TZ9L08M_kKsNR-Y_lbU6kI,1070
|
|
32
|
+
etlplus/api/rate_limiting/config.py,sha256=2b4wIynblN-1EyMqI4aXa71SljzSjXYh5N1Nngr3jOg,9406
|
|
33
|
+
etlplus/api/rate_limiting/rate_limiter.py,sha256=Uxozqd_Ej5Lsj-M-mLT2WexChgWh7x35_YP10yqYPQA,7159
|
|
34
|
+
etlplus/cli/__init__.py,sha256=J97-Rv931IL1_b4AXnB7Fbbd7HKnHBpx18NQfC_kE6c,299
|
|
35
|
+
etlplus/cli/app.py,sha256=nJkvc8fx7lCPN3qXdQ3Zb0modT3lC7f9ook1zKtS0oE,35829
|
|
36
|
+
etlplus/cli/handlers.py,sha256=3Hy-cERg7grXrBYOh7Q_FmEIYnXrUsVIX5-MH8r5e2Y,18894
|
|
37
|
+
etlplus/cli/main.py,sha256=j9WuG0J8vHJdE8Y6QHhgwo0frFdAfelaOWg9DY4hI-E,16512
|
|
38
|
+
etlplus/config/__init__.py,sha256=VZWzOg7d2YR9NT6UwKTv44yf2FRUMjTHynkm1Dl5Qzo,1486
|
|
39
|
+
etlplus/config/connector.py,sha256=0-TIwevHbKRHVmucvyGpPd-3tB1dKHB-dj0yJ6kq5eY,9809
|
|
40
|
+
etlplus/config/jobs.py,sha256=hmzRCqt0OvCEZZR4ONKrd3lvSv0OmayjLc4yOBk3ug8,7399
|
|
41
|
+
etlplus/config/pipeline.py,sha256=Va4MQY6KEyKqHGMKPmh09ZcGpx95br-iNUjpkqtzVbw,9500
|
|
42
|
+
etlplus/config/profile.py,sha256=Ss2zedQGjkaGSpvBLTD4SZaWViMJ7TJPLB8Q2_BTpPg,1898
|
|
43
|
+
etlplus/config/types.py,sha256=a0epJ3z16HQ5bY3Ctf8s_cQPa3f0HHcwdOcjCP2xoG4,4954
|
|
44
|
+
etlplus/config/utils.py,sha256=4SUHMkt5bKBhMhiJm-DrnmE2Q4TfOgdNCKz8PJDS27o,3443
|
|
45
|
+
etlplus/templates/__init__.py,sha256=tsniN7XJYs3NwYxJ6c2HD5upHP3CDkLx-bQCMt97UOM,106
|
|
46
|
+
etlplus/templates/ddl.sql.j2,sha256=s8fMWvcb4eaJVXkifuib1aQPljtZ8buuyB_uA-ZdU3Q,4734
|
|
47
|
+
etlplus/templates/view.sql.j2,sha256=Iy8DHfhq5yyvrUKDxqp_aHIEXY4Tm6j4wT7YDEFWAhk,2180
|
|
48
|
+
etlplus/validation/__init__.py,sha256=Pe5Xg1_EA4uiNZGYu5WTF3j7odjmyxnAJ8rcioaplSQ,1254
|
|
49
|
+
etlplus/validation/utils.py,sha256=Mtqg449VIke0ziy_wd2r6yrwJzQkA1iulZC87FzXMjo,10201
|
|
50
|
+
etlplus-0.5.4.dist-info/licenses/LICENSE,sha256=MuNO63i6kWmgnV2pbP2SLqP54mk1BGmu7CmbtxMmT-U,1069
|
|
51
|
+
etlplus-0.5.4.dist-info/METADATA,sha256=P7z86XFEsrqx1vVJG7NvNf4FS7h3-4oo3EwEXE6LAD0,19288
|
|
52
|
+
etlplus-0.5.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
53
|
+
etlplus-0.5.4.dist-info/entry_points.txt,sha256=6w-2-jzuPa55spzK34h-UKh2JTEShh38adFRONNP9QE,45
|
|
54
|
+
etlplus-0.5.4.dist-info/top_level.txt,sha256=aWWF-udn_sLGuHTM6W6MLh99ArS9ROkUWO8Mi8y1_2U,8
|
|
55
|
+
etlplus-0.5.4.dist-info/RECORD,,
|