aptdata 0.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. aptdata-0.0.2/LICENSE +21 -0
  2. aptdata-0.0.2/PKG-INFO +330 -0
  3. aptdata-0.0.2/README.md +285 -0
  4. aptdata-0.0.2/aptdata/__init__.py +3 -0
  5. aptdata-0.0.2/aptdata/cli/__init__.py +5 -0
  6. aptdata-0.0.2/aptdata/cli/app.py +247 -0
  7. aptdata-0.0.2/aptdata/cli/commands/__init__.py +9 -0
  8. aptdata-0.0.2/aptdata/cli/commands/config_cmd.py +128 -0
  9. aptdata-0.0.2/aptdata/cli/commands/mesh_cmd.py +435 -0
  10. aptdata-0.0.2/aptdata/cli/commands/plugin_cmd.py +107 -0
  11. aptdata-0.0.2/aptdata/cli/commands/system_cmd.py +90 -0
  12. aptdata-0.0.2/aptdata/cli/commands/telemetry_cmd.py +57 -0
  13. aptdata-0.0.2/aptdata/cli/completions.py +56 -0
  14. aptdata-0.0.2/aptdata/cli/interactive.py +269 -0
  15. aptdata-0.0.2/aptdata/cli/rendering/__init__.py +31 -0
  16. aptdata-0.0.2/aptdata/cli/rendering/console.py +119 -0
  17. aptdata-0.0.2/aptdata/cli/rendering/logger.py +26 -0
  18. aptdata-0.0.2/aptdata/cli/rendering/panels.py +87 -0
  19. aptdata-0.0.2/aptdata/cli/rendering/tables.py +81 -0
  20. aptdata-0.0.2/aptdata/cli/scaffold.py +1089 -0
  21. aptdata-0.0.2/aptdata/config/__init__.py +13 -0
  22. aptdata-0.0.2/aptdata/config/parser.py +136 -0
  23. aptdata-0.0.2/aptdata/config/schema.py +27 -0
  24. aptdata-0.0.2/aptdata/config/secrets.py +60 -0
  25. aptdata-0.0.2/aptdata/core/__init__.py +46 -0
  26. aptdata-0.0.2/aptdata/core/context.py +31 -0
  27. aptdata-0.0.2/aptdata/core/dataset.py +39 -0
  28. aptdata-0.0.2/aptdata/core/lineage.py +213 -0
  29. aptdata-0.0.2/aptdata/core/state.py +27 -0
  30. aptdata-0.0.2/aptdata/core/system.py +317 -0
  31. aptdata-0.0.2/aptdata/core/workflow.py +372 -0
  32. aptdata-0.0.2/aptdata/mcp/__init__.py +5 -0
  33. aptdata-0.0.2/aptdata/mcp/server.py +198 -0
  34. aptdata-0.0.2/aptdata/plugins/__init__.py +77 -0
  35. aptdata-0.0.2/aptdata/plugins/ai/__init__.py +6 -0
  36. aptdata-0.0.2/aptdata/plugins/ai/chunking.py +66 -0
  37. aptdata-0.0.2/aptdata/plugins/ai/embeddings.py +56 -0
  38. aptdata-0.0.2/aptdata/plugins/base.py +57 -0
  39. aptdata-0.0.2/aptdata/plugins/dataset.py +62 -0
  40. aptdata-0.0.2/aptdata/plugins/governance/__init__.py +32 -0
  41. aptdata-0.0.2/aptdata/plugins/governance/catalog.py +115 -0
  42. aptdata-0.0.2/aptdata/plugins/governance/classification.py +44 -0
  43. aptdata-0.0.2/aptdata/plugins/governance/lineage_store.py +49 -0
  44. aptdata-0.0.2/aptdata/plugins/governance/rules.py +180 -0
  45. aptdata-0.0.2/aptdata/plugins/local_fs.py +241 -0
  46. aptdata-0.0.2/aptdata/plugins/manager.py +142 -0
  47. aptdata-0.0.2/aptdata/plugins/postgres.py +113 -0
  48. aptdata-0.0.2/aptdata/plugins/quality/__init__.py +39 -0
  49. aptdata-0.0.2/aptdata/plugins/quality/contract.py +128 -0
  50. aptdata-0.0.2/aptdata/plugins/quality/expectations.py +310 -0
  51. aptdata-0.0.2/aptdata/plugins/quality/report.py +94 -0
  52. aptdata-0.0.2/aptdata/plugins/quality/validator.py +139 -0
  53. aptdata-0.0.2/aptdata/plugins/rest.py +135 -0
  54. aptdata-0.0.2/aptdata/plugins/transform/__init__.py +14 -0
  55. aptdata-0.0.2/aptdata/plugins/transform/pandas.py +129 -0
  56. aptdata-0.0.2/aptdata/plugins/transform/spark.py +134 -0
  57. aptdata-0.0.2/aptdata/plugins/vector/__init__.py +6 -0
  58. aptdata-0.0.2/aptdata/plugins/vector/base.py +19 -0
  59. aptdata-0.0.2/aptdata/plugins/vector/qdrant.py +41 -0
  60. aptdata-0.0.2/aptdata/telemetry/__init__.py +5 -0
  61. aptdata-0.0.2/aptdata/telemetry/instrumentation.py +164 -0
  62. aptdata-0.0.2/aptdata/tui/__init__.py +5 -0
  63. aptdata-0.0.2/aptdata/tui/monitor.py +279 -0
  64. aptdata-0.0.2/pyproject.toml +82 -0
aptdata-0.0.2/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 strondata
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
aptdata-0.0.2/PKG-INFO ADDED
@@ -0,0 +1,330 @@
1
+ Metadata-Version: 2.4
2
+ Name: aptdata
3
+ Version: 0.0.2
4
+ Summary: A declarative, extensible framework for building smart data pipelines in Python
5
+ License: MIT
6
+ License-File: LICENSE
7
+ Keywords: data-pipeline,framework,etl,pydantic,data-engineering
8
+ Author: strondata
9
+ Requires-Python: >=3.10,<4.0
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Programming Language :: Python :: 3.13
18
+ Classifier: Programming Language :: Python :: 3.14
19
+ Classifier: Topic :: Software Development :: Libraries :: Application Frameworks
20
+ Provides-Extra: all
21
+ Provides-Extra: pandas
22
+ Provides-Extra: plugins
23
+ Provides-Extra: spark
24
+ Requires-Dist: httpx (>=0.27,<0.28) ; extra == "plugins" or extra == "all"
25
+ Requires-Dist: mcp (>=1.26.0,<2.0.0)
26
+ Requires-Dist: opentelemetry-api (>=1.40.0,<2.0.0)
27
+ Requires-Dist: opentelemetry-sdk (>=1.40.0,<2.0.0)
28
+ Requires-Dist: pandas (>=2.2,<3.0) ; extra == "pandas" or extra == "all"
29
+ Requires-Dist: psycopg2-binary (>=2.9,<3.0) ; extra == "plugins" or extra == "all"
30
+ Requires-Dist: pyarrow (>=15.0,<16.0) ; extra == "plugins" or extra == "all"
31
+ Requires-Dist: pydantic (>=2.0,<3.0)
32
+ Requires-Dist: pyspark (>=3.5,<4.0) ; extra == "spark" or extra == "all"
33
+ Requires-Dist: python-dotenv (>=1.0,<2.0)
34
+ Requires-Dist: pyyaml (>=6.0,<7.0)
35
+ Requires-Dist: questionary (>=2.0)
36
+ Requires-Dist: rich (>=13.0,<14.0)
37
+ Requires-Dist: sqlalchemy (>=2.0,<3.0) ; extra == "plugins" or extra == "all"
38
+ Requires-Dist: textual (>=0.60,<0.61)
39
+ Requires-Dist: typer[all] (>=0.15,<0.16)
40
+ Project-URL: Documentation, https://strondata.github.io/smart-data
41
+ Project-URL: Homepage, https://strondata.github.io/smart-data
42
+ Project-URL: Repository, https://github.com/strondata/smart-data
43
+ Description-Content-Type: text/markdown
44
+
45
+ # aptdata
46
+
47
+ > **v0.0.2** · A declarative, extensible framework for building smart data pipelines in Python.
48
+
49
+ [![Python](https://img.shields.io/badge/python-3.10%2B-blue)](https://www.python.org/)
50
+ [![License](https://img.shields.io/badge/license-MIT-green)](LICENSE)
51
+ [![Version](https://img.shields.io/badge/version-0.0.2-orange)](CHANGELOG.md)
52
+
53
+ ---
54
+
55
+ ## Overview
56
+
57
+ **aptdata** is built around three universal abstractions — **System**,
58
+ **Flow**, and **Component** — that cover every data-processing paradigm in a
59
+ single, coherent model:
60
+
61
+ ```mermaid
62
+ flowchart TD
63
+ I["IComponent / IFlow / ISystem\n@dataclass + ABC — pure interfaces"]
64
+ B["BaseComponent / BaseFlow / BaseSystem\n@pydantic_dataclass — validated fields"]
65
+ Y["Your concrete implementations"]
66
+
67
+ I --> B --> Y
68
+ ```
69
+
70
+ Datasets remain the fundamental data-exchange contract (`IDataset` /
71
+ `BaseDataset`). Every outcome from the CLI is emitted as a machine-readable
72
+ JSON line, making aptdata a natural fit for AI orchestrators, CI/CD
73
+ pipelines and scripted workflows.
74
+
75
+ ---
76
+
77
+ ## Requirements
78
+
79
+ - Python ≥ 3.10
80
+ - [Poetry](https://python-poetry.org/) (for development)
81
+
82
+ ---
83
+
84
+ ## Installation
85
+
86
+ ### From PyPI
87
+
88
+ ```bash
89
+ pip install aptdata
90
+ ```
91
+
92
+ ### Optional extras
93
+
94
+ ```bash
95
+ pip install aptdata[pandas] # pandas support
96
+ pip install aptdata[spark] # PySpark support
97
+ pip install aptdata[plugins] # REST, PostgreSQL, Parquet I/O
98
+ pip install aptdata[all] # everything
99
+ ```
100
+
101
+ ### From source (development)
102
+
103
+ ```bash
104
+ git clone https://github.com/strondata/smart-data.git
105
+ cd aptdata
106
+ poetry install
107
+ ```
108
+
109
+ ---
110
+
111
+ ## Quick start
112
+
113
+ ```python
114
+ from pydantic.dataclasses import dataclass as pydantic_dataclass
115
+ from aptdata.core import (
116
+ BaseDataset, IDataset,
117
+ BaseComponent, ComponentMeta, ComponentKind,
118
+ BaseFlow, IFlow,
119
+ BaseSystem,
120
+ )
121
+
122
+ @pydantic_dataclass
123
+ class MemoryDataset(BaseDataset):
124
+ def __post_init__(self): self._data = None
125
+ def read(self): return self._data
126
+ def write(self, data): self._data = data
127
+
128
+ @pydantic_dataclass
129
+ class DoubleComponent(BaseComponent):
130
+ def validate_inputs(self, inputs: list[IDataset]) -> bool:
131
+ return len(inputs) == 1
132
+ def execute(self, inputs: list[IDataset]) -> list[IDataset]:
133
+ out = MemoryDataset(uri="memory://out")
134
+ out.write([x * 2 for x in inputs[0].read()])
135
+ return [out]
136
+
137
+ @pydantic_dataclass
138
+ class ETLFlow(BaseFlow):
139
+ def __post_init__(self):
140
+ self._nodes = {}
141
+ self._edges = []
142
+ self._compiled = False
143
+ def add_component(self, c): self._nodes[c.component_id] = c
144
+ def connect(self, src, tgt, condition=None): ...
145
+ def compile(self): self._compiled = True
146
+ def run(self, inputs): return inputs # wire your logic here
147
+
148
+ @pydantic_dataclass
149
+ class MySystem(BaseSystem):
150
+ def __post_init__(self): self._flows: list[IFlow] = []
151
+ def register_flow(self, flow): self._flows.append(flow)
152
+ def run(self):
153
+ for flow in self._flows:
154
+ flow.run([])
155
+
156
+ # Register and run via CLI
157
+ from aptdata.plugins import registry
158
+ registry.register("my_system", MySystem)
159
+ ```
160
+
161
+ ```bash
162
+ aptdata run my_system
163
+ # {"event": "pipeline.started", "pipeline": "my_system", "env": "dev", "dry_run": false, "trace_id": null}
164
+ # {"event": "pipeline.completed", "pipeline": "my_system", "env": "dev", "dry_run": false, "elapsed_seconds": 0.001, "trace_id": null}
165
+ ```
166
+
167
+ ---
168
+
169
+ ## CLI reference
170
+
171
+ ```
172
+ aptdata run SYSTEM_NAME [--env ENV] [--dry-run]
173
+ aptdata monitor [--refresh SECONDS]
174
+ aptdata scaffold PROJECT_NAME [--template TEMPLATE] [--output PATH]
175
+ aptdata schema export --output schema.json
176
+ aptdata system list [--json]
177
+ aptdata system info NAME [--json]
178
+ aptdata system validate NAME
179
+ aptdata plugin list [--json]
180
+ aptdata plugin inspect NAME [--json]
181
+ aptdata plugin preview READER [--limit N]
182
+ aptdata plugin load MODULE_PATH
183
+ aptdata config validate PATH
184
+ aptdata config init [--output PATH]
185
+ aptdata config show PATH
186
+ aptdata config run PATH [--env ENV]
187
+ aptdata telemetry status [--json]
188
+ aptdata telemetry export [--format json]
189
+ aptdata mesh list [--dir DIR] [--json]
190
+ aptdata mesh run COMPONENT [--dir DIR] [--dry-run] [--json]
191
+ aptdata mesh build COMPONENT [--dir DIR] [--json]
192
+ aptdata mcp-start [--transport TRANSPORT]
193
+ aptdata interactive
194
+ ```
195
+
196
+ Every static command supports `--json` for machine-readable JSON line output
197
+ (backward compatible). Without `--json`, commands render Rich tables, panels,
198
+ and syntax-highlighted output.
199
+
200
+ ### Scaffold templates
201
+
202
+ | Template | Description |
203
+ |-----------------------|-----------------------------------------------------|
204
+ | `hello-world` | Minimal pandas pipeline (default) |
205
+ | `medallion` | Bronze → Silver → Gold data lakehouse |
206
+ | `rag-ingestion` | RAG pipeline: extract → chunk → embed → load |
207
+ | `data-quality-test` | Schema contract + expectation suite |
208
+ | `job-wheel` | Python wheel executor for portable job packaging |
209
+ | `docker-compose-app` | Multi-service Docker Compose application |
210
+
211
+ ```bash
212
+ aptdata scaffold my_lakehouse --template medallion
213
+ aptdata scaffold my_job --template job-wheel
214
+ aptdata scaffold my_service --template docker-compose-app
215
+ ```
216
+
217
+ ---
218
+
219
+ ## Processing Engines
220
+
221
+ Engine-agnostic transformation wrappers for pandas and PySpark:
222
+
223
+ ```python
224
+ from aptdata.plugins.transform import PandasTransformer
225
+
226
+ def clean(df):
227
+ return df.dropna().drop_duplicates()
228
+
229
+ transformer = PandasTransformer("clean", clean)
230
+ result = transformer.transform(my_dataset)
231
+ ```
232
+
233
+ See [Transform Engines docs](docs/transform-engines.md) for PySpark usage.
234
+
235
+ ---
236
+
237
+ ## Data Quality & Contracts
238
+
239
+ ```python
240
+ from aptdata.plugins.quality import (
241
+ EnforcementMode, ExpectColumnToNotBeNull,
242
+ QualityValidator, SchemaContract,
243
+ )
244
+
245
+ validator = QualityValidator(
246
+ expectations=[ExpectColumnToNotBeNull("id")],
247
+ enforcement=EnforcementMode.ABORT,
248
+ )
249
+ clean_data = validator.validate(raw_df)
250
+ ```
251
+
252
+ See [Quality docs](docs/quality.md) for all built-in expectations.
253
+
254
+ ---
255
+
256
+ ## Data Governance
257
+
258
+ ```python
259
+ from aptdata.plugins.governance import (
260
+ BusinessRule, DatasetCatalog, DatasetCatalogEntry, LineageStore,
261
+ )
262
+ from aptdata.core.lineage import LineageGraph, LineageNode, LineageEventType
263
+
264
+ # Lineage tracking
265
+ graph = LineageGraph(run_id="run-1", workflow_name="etl")
266
+ graph.add_node(LineageNode(dataset_uri="s3://raw/data", event_type=LineageEventType.READ))
267
+
268
+ store = LineageStore()
269
+ store.save(graph)
270
+ ```
271
+
272
+ See [Governance docs](docs/governance.md) for the full API.
273
+
274
+ ---
275
+
276
+ ## Release process
277
+
278
+ Releases are automated via the [Release workflow](.github/workflows/release.yml).
279
+ After a PR is merged into `main`, the CI reads its labels and bumps the version
280
+ accordingly.
281
+
282
+ | Label | Effect |
283
+ |---|---|
284
+ | `release:patch` | `0.0.1 → 0.0.2` |
285
+ | `release:minor` | `0.0.1 → 0.1.0` |
286
+ | `release:major` | `0.0.1 → 1.0.0` |
287
+ | `release:skip` | no release (explicit opt-out) |
288
+ | *(no label)* | no release (silent skip) |
289
+
290
+ The workflow will:
291
+ 1. Detect the merged PR and its labels.
292
+ 2. Run `bump-my-version bump <part>` to update `pyproject.toml` and
293
+ `aptdata/__init__.py`.
294
+ 3. Create a `chore(release): bump version to X.Y.Z` commit and a `vX.Y.Z` tag.
295
+ 4. Push the commit and tag to `main`.
296
+ 5. The tag push automatically triggers the **Publish to PyPI** workflow.
297
+
298
+ > **Branch protection note:** GitHub Actions must have *read and write
299
+ > permissions* (Settings → Actions → General → Workflow permissions) and, if
300
+ > branch protection is enabled on `main`, the rule must allow GitHub Actions
301
+ > to bypass it.
302
+
303
+ ---
304
+
305
+ ## Development
306
+
307
+ ```bash
308
+ make install # install all dependencies
309
+ make test # run the test suite
310
+ make lint # lint with ruff
311
+ make docs # build the documentation
312
+ ```
313
+
314
+ ---
315
+
316
+ ## Documentation
317
+
318
+ Full documentation is available in the [`docs/`](docs/) directory and can be
319
+ served locally with:
320
+
321
+ ```bash
322
+ mkdocs serve
323
+ ```
324
+
325
+ ---
326
+
327
+ ## License
328
+
329
+ [MIT](LICENSE)
330
+
@@ -0,0 +1,285 @@
1
+ # aptdata
2
+
3
+ > **v0.0.2** · A declarative, extensible framework for building smart data pipelines in Python.
4
+
5
+ [![Python](https://img.shields.io/badge/python-3.10%2B-blue)](https://www.python.org/)
6
+ [![License](https://img.shields.io/badge/license-MIT-green)](LICENSE)
7
+ [![Version](https://img.shields.io/badge/version-0.0.2-orange)](CHANGELOG.md)
8
+
9
+ ---
10
+
11
+ ## Overview
12
+
13
+ **aptdata** is built around three universal abstractions — **System**,
14
+ **Flow**, and **Component** — that cover every data-processing paradigm in a
15
+ single, coherent model:
16
+
17
+ ```mermaid
18
+ flowchart TD
19
+ I["IComponent / IFlow / ISystem\n@dataclass + ABC — pure interfaces"]
20
+ B["BaseComponent / BaseFlow / BaseSystem\n@pydantic_dataclass — validated fields"]
21
+ Y["Your concrete implementations"]
22
+
23
+ I --> B --> Y
24
+ ```
25
+
26
+ Datasets remain the fundamental data-exchange contract (`IDataset` /
27
+ `BaseDataset`). Every outcome from the CLI is emitted as a machine-readable
28
+ JSON line, making aptdata a natural fit for AI orchestrators, CI/CD
29
+ pipelines and scripted workflows.
30
+
31
+ ---
32
+
33
+ ## Requirements
34
+
35
+ - Python ≥ 3.10
36
+ - [Poetry](https://python-poetry.org/) (for development)
37
+
38
+ ---
39
+
40
+ ## Installation
41
+
42
+ ### From PyPI
43
+
44
+ ```bash
45
+ pip install aptdata
46
+ ```
47
+
48
+ ### Optional extras
49
+
50
+ ```bash
51
+ pip install aptdata[pandas] # pandas support
52
+ pip install aptdata[spark] # PySpark support
53
+ pip install aptdata[plugins] # REST, PostgreSQL, Parquet I/O
54
+ pip install aptdata[all] # everything
55
+ ```
56
+
57
+ ### From source (development)
58
+
59
+ ```bash
60
+ git clone https://github.com/strondata/smart-data.git
61
+ cd aptdata
62
+ poetry install
63
+ ```
64
+
65
+ ---
66
+
67
+ ## Quick start
68
+
69
+ ```python
70
+ from pydantic.dataclasses import dataclass as pydantic_dataclass
71
+ from aptdata.core import (
72
+ BaseDataset, IDataset,
73
+ BaseComponent, ComponentMeta, ComponentKind,
74
+ BaseFlow, IFlow,
75
+ BaseSystem,
76
+ )
77
+
78
+ @pydantic_dataclass
79
+ class MemoryDataset(BaseDataset):
80
+ def __post_init__(self): self._data = None
81
+ def read(self): return self._data
82
+ def write(self, data): self._data = data
83
+
84
+ @pydantic_dataclass
85
+ class DoubleComponent(BaseComponent):
86
+ def validate_inputs(self, inputs: list[IDataset]) -> bool:
87
+ return len(inputs) == 1
88
+ def execute(self, inputs: list[IDataset]) -> list[IDataset]:
89
+ out = MemoryDataset(uri="memory://out")
90
+ out.write([x * 2 for x in inputs[0].read()])
91
+ return [out]
92
+
93
+ @pydantic_dataclass
94
+ class ETLFlow(BaseFlow):
95
+ def __post_init__(self):
96
+ self._nodes = {}
97
+ self._edges = []
98
+ self._compiled = False
99
+ def add_component(self, c): self._nodes[c.component_id] = c
100
+ def connect(self, src, tgt, condition=None): ...
101
+ def compile(self): self._compiled = True
102
+ def run(self, inputs): return inputs # wire your logic here
103
+
104
+ @pydantic_dataclass
105
+ class MySystem(BaseSystem):
106
+ def __post_init__(self): self._flows: list[IFlow] = []
107
+ def register_flow(self, flow): self._flows.append(flow)
108
+ def run(self):
109
+ for flow in self._flows:
110
+ flow.run([])
111
+
112
+ # Register and run via CLI
113
+ from aptdata.plugins import registry
114
+ registry.register("my_system", MySystem)
115
+ ```
116
+
117
+ ```bash
118
+ aptdata run my_system
119
+ # {"event": "pipeline.started", "pipeline": "my_system", "env": "dev", "dry_run": false, "trace_id": null}
120
+ # {"event": "pipeline.completed", "pipeline": "my_system", "env": "dev", "dry_run": false, "elapsed_seconds": 0.001, "trace_id": null}
121
+ ```
122
+
123
+ ---
124
+
125
+ ## CLI reference
126
+
127
+ ```
128
+ aptdata run SYSTEM_NAME [--env ENV] [--dry-run]
129
+ aptdata monitor [--refresh SECONDS]
130
+ aptdata scaffold PROJECT_NAME [--template TEMPLATE] [--output PATH]
131
+ aptdata schema export --output schema.json
132
+ aptdata system list [--json]
133
+ aptdata system info NAME [--json]
134
+ aptdata system validate NAME
135
+ aptdata plugin list [--json]
136
+ aptdata plugin inspect NAME [--json]
137
+ aptdata plugin preview READER [--limit N]
138
+ aptdata plugin load MODULE_PATH
139
+ aptdata config validate PATH
140
+ aptdata config init [--output PATH]
141
+ aptdata config show PATH
142
+ aptdata config run PATH [--env ENV]
143
+ aptdata telemetry status [--json]
144
+ aptdata telemetry export [--format json]
145
+ aptdata mesh list [--dir DIR] [--json]
146
+ aptdata mesh run COMPONENT [--dir DIR] [--dry-run] [--json]
147
+ aptdata mesh build COMPONENT [--dir DIR] [--json]
148
+ aptdata mcp-start [--transport TRANSPORT]
149
+ aptdata interactive
150
+ ```
151
+
152
+ Every static command supports `--json` for machine-readable JSON line output
153
+ (backward compatible). Without `--json`, commands render Rich tables, panels,
154
+ and syntax-highlighted output.
155
+
156
+ ### Scaffold templates
157
+
158
+ | Template | Description |
159
+ |-----------------------|-----------------------------------------------------|
160
+ | `hello-world` | Minimal pandas pipeline (default) |
161
+ | `medallion` | Bronze → Silver → Gold data lakehouse |
162
+ | `rag-ingestion` | RAG pipeline: extract → chunk → embed → load |
163
+ | `data-quality-test` | Schema contract + expectation suite |
164
+ | `job-wheel` | Python wheel executor for portable job packaging |
165
+ | `docker-compose-app` | Multi-service Docker Compose application |
166
+
167
+ ```bash
168
+ aptdata scaffold my_lakehouse --template medallion
169
+ aptdata scaffold my_job --template job-wheel
170
+ aptdata scaffold my_service --template docker-compose-app
171
+ ```
172
+
173
+ ---
174
+
175
+ ## Processing Engines
176
+
177
+ Engine-agnostic transformation wrappers for pandas and PySpark:
178
+
179
+ ```python
180
+ from aptdata.plugins.transform import PandasTransformer
181
+
182
+ def clean(df):
183
+ return df.dropna().drop_duplicates()
184
+
185
+ transformer = PandasTransformer("clean", clean)
186
+ result = transformer.transform(my_dataset)
187
+ ```
188
+
189
+ See [Transform Engines docs](docs/transform-engines.md) for PySpark usage.
190
+
191
+ ---
192
+
193
+ ## Data Quality & Contracts
194
+
195
+ ```python
196
+ from aptdata.plugins.quality import (
197
+ EnforcementMode, ExpectColumnToNotBeNull,
198
+ QualityValidator, SchemaContract,
199
+ )
200
+
201
+ validator = QualityValidator(
202
+ expectations=[ExpectColumnToNotBeNull("id")],
203
+ enforcement=EnforcementMode.ABORT,
204
+ )
205
+ clean_data = validator.validate(raw_df)
206
+ ```
207
+
208
+ See [Quality docs](docs/quality.md) for all built-in expectations.
209
+
210
+ ---
211
+
212
+ ## Data Governance
213
+
214
+ ```python
215
+ from aptdata.plugins.governance import (
216
+ BusinessRule, DatasetCatalog, DatasetCatalogEntry, LineageStore,
217
+ )
218
+ from aptdata.core.lineage import LineageGraph, LineageNode, LineageEventType
219
+
220
+ # Lineage tracking
221
+ graph = LineageGraph(run_id="run-1", workflow_name="etl")
222
+ graph.add_node(LineageNode(dataset_uri="s3://raw/data", event_type=LineageEventType.READ))
223
+
224
+ store = LineageStore()
225
+ store.save(graph)
226
+ ```
227
+
228
+ See [Governance docs](docs/governance.md) for the full API.
229
+
230
+ ---
231
+
232
+ ## Release process
233
+
234
+ Releases are automated via the [Release workflow](.github/workflows/release.yml).
235
+ After a PR is merged into `main`, the CI reads its labels and bumps the version
236
+ accordingly.
237
+
238
+ | Label | Effect |
239
+ |---|---|
240
+ | `release:patch` | `0.0.1 → 0.0.2` |
241
+ | `release:minor` | `0.0.1 → 0.1.0` |
242
+ | `release:major` | `0.0.1 → 1.0.0` |
243
+ | `release:skip` | no release (explicit opt-out) |
244
+ | *(no label)* | no release (silent skip) |
245
+
246
+ The workflow will:
247
+ 1. Detect the merged PR and its labels.
248
+ 2. Run `bump-my-version bump <part>` to update `pyproject.toml` and
249
+ `aptdata/__init__.py`.
250
+ 3. Create a `chore(release): bump version to X.Y.Z` commit and a `vX.Y.Z` tag.
251
+ 4. Push the commit and tag to `main`.
252
+ 5. The tag push automatically triggers the **Publish to PyPI** workflow.
253
+
254
+ > **Branch protection note:** GitHub Actions must have *read and write
255
+ > permissions* (Settings → Actions → General → Workflow permissions) and, if
256
+ > branch protection is enabled on `main`, the rule must allow GitHub Actions
257
+ > to bypass it.
258
+
259
+ ---
260
+
261
+ ## Development
262
+
263
+ ```bash
264
+ make install # install all dependencies
265
+ make test # run the test suite
266
+ make lint # lint with ruff
267
+ make docs # build the documentation
268
+ ```
269
+
270
+ ---
271
+
272
+ ## Documentation
273
+
274
+ Full documentation is available in the [`docs/`](docs/) directory and can be
275
+ served locally with:
276
+
277
+ ```bash
278
+ mkdocs serve
279
+ ```
280
+
281
+ ---
282
+
283
+ ## License
284
+
285
+ [MIT](LICENSE)
@@ -0,0 +1,3 @@
1
+ """aptdata: A framework for smart data pipelines."""
2
+
3
+ __version__ = "0.0.2"
@@ -0,0 +1,5 @@
1
+ """Static CLI entry-point for aptdata (Typer-based)."""
2
+
3
+ from aptdata.cli.app import app
4
+
5
+ __all__ = ["app"]