jerry-thomas 0.1.0__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/PKG-INFO +23 -23
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/README.md +22 -22
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/pyproject.toml +1 -1
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/cli/app.py +19 -19
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/cli/commands/link.py +4 -4
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/services/bootstrap.py +1 -1
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/services/project_paths.py +2 -2
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/services/scaffold/source.py +2 -2
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/templates/plugin_skeleton/README.md +13 -15
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/templates/plugin_skeleton/config/contracts/time_linear.yaml +1 -1
- {jerry_thomas-0.1.0/src/datapipeline/templates/plugin_skeleton/config/recipes → jerry_thomas-0.2.0/src/datapipeline/templates/plugin_skeleton/config/datasets}/default/project.yaml +2 -2
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/templates/plugin_skeleton/pyproject.toml +2 -2
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/jerry_thomas.egg-info/PKG-INFO +23 -23
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/jerry_thomas.egg-info/SOURCES.txt +3 -3
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/LICENSE +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/setup.cfg +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/__init__.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/analysis/__init__.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/analysis/vector_analyzer.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/cli/commands/domain.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/cli/commands/filter.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/cli/commands/inspect.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/cli/commands/list_.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/cli/commands/plugin.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/cli/commands/run.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/cli/commands/source.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/cli/visual_source.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/cli/visuals.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/common/__init__.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/common/geo.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/config/__init__.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/config/catalog.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/config/dataset/dataset.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/config/dataset/feature.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/config/dataset/loader.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/config/dataset/normalize.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/config/project.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/domain/__init__.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/domain/feature.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/domain/record.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/domain/vector.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/filters/filters.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/integrations/__init__.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/integrations/ml.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/mappers/noop.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/mappers/synthetic/time.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/parsers/identity.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/pipeline/__init__.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/pipeline/pipelines.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/pipeline/stages.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/pipeline/utils/keygen.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/pipeline/utils/memory_sort.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/pipeline/utils/ordering.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/pipeline/utils/transform_utils.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/plugins.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/registries/registries.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/registries/registry.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/services/constants.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/services/entrypoints.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/services/factories.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/services/paths.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/services/scaffold/__init__.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/services/scaffold/domain.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/services/scaffold/filter.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/services/scaffold/mappers.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/services/scaffold/plugin.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/services/scaffold/templates.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/sources/__init__.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/sources/composed_loader.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/sources/decoders.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/sources/factory.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/sources/models/__init__.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/sources/models/base.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/sources/models/generator.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/sources/models/loader.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/sources/models/parser.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/sources/models/source.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/sources/models/synthetic.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/sources/synthetic/__init__.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/sources/synthetic/time/__init__.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/sources/synthetic/time/loader.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/sources/synthetic/time/parser.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/sources/transports.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/templates/plugin_skeleton/config/contracts/time_hour_sin.yaml +0 -0
- /jerry_thomas-0.1.0/src/datapipeline/templates/plugin_skeleton/config/recipes/default/recipe.yaml → /jerry_thomas-0.2.0/src/datapipeline/templates/plugin_skeleton/config/datasets/default/dataset.yaml +0 -0
- {jerry_thomas-0.1.0/src/datapipeline/templates/plugin_skeleton/config/distilleries → jerry_thomas-0.2.0/src/datapipeline/templates/plugin_skeleton/config/sources}/time_ticks.yaml +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/templates/plugin_skeleton/src/{{PACKAGE_NAME}}/__init__.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/templates/stubs/dto.py.j2 +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/templates/stubs/filter.py.j2 +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/templates/stubs/loader_synthetic.py.j2 +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/templates/stubs/mapper.py.j2 +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/templates/stubs/parser.py.j2 +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/templates/stubs/parser_custom.py.j2 +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/templates/stubs/record.py.j2 +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/templates/stubs/source.yaml.j2 +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/transforms/debug/identity.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/transforms/debug/lint.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/transforms/feature/model.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/transforms/feature/scaler.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/transforms/filter.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/transforms/record/floor_time.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/transforms/record/lag.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/transforms/sequence.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/transforms/stream/ensure_ticks.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/transforms/stream/fill.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/transforms/stream/granularity.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/transforms/utils.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/transforms/vector.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/transforms/vector_utils.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/utils/__init__.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/utils/load.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/utils/paths.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/utils/time.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/jerry_thomas.egg-info/dependency_links.txt +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/jerry_thomas.egg-info/entry_points.txt +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/jerry_thomas.egg-info/requires.txt +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/jerry_thomas.egg-info/top_level.txt +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/tests/test_config_pipeline.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/tests/test_regression_vectors.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/tests/test_transforms.py +0 -0
- {jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/tests/test_vector_analyzer.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: jerry-thomas
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: Jerry-Thomas: a stream-first, plugin-friendly data pipeline (mixology-themed CLI)
|
|
5
5
|
Author: Anders Skott Lind
|
|
6
6
|
License: MIT
|
|
@@ -121,9 +121,9 @@ values—they are interpolated into downstream YAML specs during bootstrap
|
|
|
121
121
|
```yaml
|
|
122
122
|
version: 1
|
|
123
123
|
paths:
|
|
124
|
-
sources: ../../
|
|
124
|
+
sources: ../../sources
|
|
125
125
|
streams: ../../contracts
|
|
126
|
-
dataset:
|
|
126
|
+
dataset: dataset.yaml
|
|
127
127
|
globals:
|
|
128
128
|
opening_time: "2024-01-01T16:00:00Z"
|
|
129
129
|
last_call: "2024-01-02T02:00:00Z"
|
|
@@ -134,13 +134,13 @@ globals:
|
|
|
134
134
|
|
|
135
135
|
### 3. Stock the bottles (raw sources)
|
|
136
136
|
|
|
137
|
-
Create `config/
|
|
137
|
+
Create `config/sources/<alias>.yaml` files. Each must expose a `parser` and `loader`
|
|
138
138
|
pointing at entry points plus any constructor arguments
|
|
139
139
|
(`src/datapipeline/services/bootstrap.py`). Here is a synthetic clock source that feels
|
|
140
140
|
like a drip of barrel-aged bitters:
|
|
141
141
|
|
|
142
142
|
```yaml
|
|
143
|
-
# config/
|
|
143
|
+
# config/sources/time_ticks.yaml
|
|
144
144
|
parser:
|
|
145
145
|
entrypoint: "synthetic.time"
|
|
146
146
|
args: {}
|
|
@@ -182,7 +182,7 @@ are grouped (`src/datapipeline/config/dataset/dataset.py`). A minimal hourly men
|
|
|
182
182
|
look like:
|
|
183
183
|
|
|
184
184
|
```yaml
|
|
185
|
-
# config/recipes/default/
|
|
185
|
+
# config/recipes/default/dataset.yaml
|
|
186
186
|
group_by:
|
|
187
187
|
keys:
|
|
188
188
|
- type: time
|
|
@@ -241,9 +241,9 @@ materialize all registered sources and streams
|
|
|
241
241
|
### Prep any station (with visuals)
|
|
242
242
|
|
|
243
243
|
```bash
|
|
244
|
-
jerry prep pour --project config/
|
|
245
|
-
jerry prep build --project config/
|
|
246
|
-
jerry prep stir --project config/
|
|
244
|
+
jerry prep pour --project config/datasets/default/project.yaml --limit 20
|
|
245
|
+
jerry prep build --project config/datasets/default/project.yaml --limit 20
|
|
246
|
+
jerry prep stir --project config/datasets/default/project.yaml --limit 20
|
|
247
247
|
```
|
|
248
248
|
|
|
249
249
|
- `prep pour` shows the record-stage ingredients headed for each feature.
|
|
@@ -260,9 +260,9 @@ loaders. The CLI wires up `build_record_pipeline`, `build_feature_pipeline` and
|
|
|
260
260
|
### Serve the flights (production mode)
|
|
261
261
|
|
|
262
262
|
```bash
|
|
263
|
-
jerry serve --project config/
|
|
264
|
-
jerry serve --project config/
|
|
265
|
-
jerry serve --project config/
|
|
263
|
+
jerry serve --project config/datasets/default/project.yaml --output print
|
|
264
|
+
jerry serve --project config/datasets/default/project.yaml --output stream
|
|
265
|
+
jerry serve --project config/datasets/default/project.yaml --output exports/batch.pt
|
|
266
266
|
```
|
|
267
267
|
|
|
268
268
|
Production mode skips the bar flair and focuses on throughput. `print` writes tasting
|
|
@@ -313,26 +313,26 @@ or feature store SDKs—without adding opinionated glue to the runtime itself.
|
|
|
313
313
|
|
|
314
314
|
Use the inspect helpers for different outputs:
|
|
315
315
|
|
|
316
|
-
- `jerry inspect report --project config/
|
|
316
|
+
- `jerry inspect report --project config/datasets/default/project.yaml` — print a
|
|
317
317
|
human-readable quality report (totals, keep/below lists, optional partition detail).
|
|
318
|
-
- `jerry inspect coverage --project config/
|
|
318
|
+
- `jerry inspect coverage --project config/datasets/default/project.yaml` — persist the
|
|
319
319
|
coverage summary to `build/coverage.json` (keep/below feature and partition lists plus
|
|
320
320
|
coverage percentages).
|
|
321
|
-
- `jerry inspect matrix --project config/
|
|
321
|
+
- `jerry inspect matrix --project config/datasets/default/project.yaml --format html` —
|
|
322
322
|
export availability matrices (CSV or HTML) for deeper analysis.
|
|
323
|
-
- `jerry inspect partitions --project config/
|
|
323
|
+
- `jerry inspect partitions --project config/datasets/default/project.yaml` — write the
|
|
324
324
|
observed partition manifest to `build/partitions.json` for use in configs.
|
|
325
325
|
|
|
326
326
|
Note: `jerry prep taste` has been removed; use `jerry inspect report` and friends.
|
|
327
327
|
|
|
328
328
|
---
|
|
329
329
|
|
|
330
|
-
## Extending the
|
|
330
|
+
## Extending the CLI
|
|
331
331
|
|
|
332
332
|
### Scaffold a plugin package
|
|
333
333
|
|
|
334
334
|
```bash
|
|
335
|
-
jerry
|
|
335
|
+
jerry plugin init --name my_datapipeline --out .
|
|
336
336
|
```
|
|
337
337
|
|
|
338
338
|
The generator copies a ready-made skeleton (pyproject, README, package directory) and
|
|
@@ -346,15 +346,15 @@ transforms.
|
|
|
346
346
|
Use the CLI helpers to scaffold boilerplate code in your plugin workspace:
|
|
347
347
|
|
|
348
348
|
```bash
|
|
349
|
-
jerry
|
|
350
|
-
jerry
|
|
349
|
+
jerry source add --provider dmi --dataset metobs --transport fs --format csv
|
|
350
|
+
jerry domain add --domain metobs
|
|
351
351
|
jerry contract
|
|
352
352
|
```
|
|
353
353
|
|
|
354
|
-
The
|
|
355
|
-
YAML file in `config/
|
|
354
|
+
The source command writes DTO/parser stubs, updates entry points and drops a matching
|
|
355
|
+
YAML file in `config/sources/` pre-filled with composed-loader defaults for the chosen
|
|
356
356
|
transport (`src/datapipeline/cli/app.py`, `src/datapipeline/services/scaffold/source.py`).
|
|
357
|
-
`jerry
|
|
357
|
+
`jerry domain add` now always scaffolds `TimeSeriesRecord` domains so every mapper carries
|
|
358
358
|
an explicit timestamp alongside its value, and `jerry contract` wires that source/domain
|
|
359
359
|
pair up for canonical stream generation.
|
|
360
360
|
|
|
@@ -102,9 +102,9 @@ values—they are interpolated into downstream YAML specs during bootstrap
|
|
|
102
102
|
```yaml
|
|
103
103
|
version: 1
|
|
104
104
|
paths:
|
|
105
|
-
sources: ../../
|
|
105
|
+
sources: ../../sources
|
|
106
106
|
streams: ../../contracts
|
|
107
|
-
dataset:
|
|
107
|
+
dataset: dataset.yaml
|
|
108
108
|
globals:
|
|
109
109
|
opening_time: "2024-01-01T16:00:00Z"
|
|
110
110
|
last_call: "2024-01-02T02:00:00Z"
|
|
@@ -115,13 +115,13 @@ globals:
|
|
|
115
115
|
|
|
116
116
|
### 3. Stock the bottles (raw sources)
|
|
117
117
|
|
|
118
|
-
Create `config/
|
|
118
|
+
Create `config/sources/<alias>.yaml` files. Each must expose a `parser` and `loader`
|
|
119
119
|
pointing at entry points plus any constructor arguments
|
|
120
120
|
(`src/datapipeline/services/bootstrap.py`). Here is a synthetic clock source that feels
|
|
121
121
|
like a drip of barrel-aged bitters:
|
|
122
122
|
|
|
123
123
|
```yaml
|
|
124
|
-
# config/
|
|
124
|
+
# config/sources/time_ticks.yaml
|
|
125
125
|
parser:
|
|
126
126
|
entrypoint: "synthetic.time"
|
|
127
127
|
args: {}
|
|
@@ -163,7 +163,7 @@ are grouped (`src/datapipeline/config/dataset/dataset.py`). A minimal hourly men
|
|
|
163
163
|
look like:
|
|
164
164
|
|
|
165
165
|
```yaml
|
|
166
|
-
# config/recipes/default/
|
|
166
|
+
# config/recipes/default/dataset.yaml
|
|
167
167
|
group_by:
|
|
168
168
|
keys:
|
|
169
169
|
- type: time
|
|
@@ -222,9 +222,9 @@ materialize all registered sources and streams
|
|
|
222
222
|
### Prep any station (with visuals)
|
|
223
223
|
|
|
224
224
|
```bash
|
|
225
|
-
jerry prep pour --project config/
|
|
226
|
-
jerry prep build --project config/
|
|
227
|
-
jerry prep stir --project config/
|
|
225
|
+
jerry prep pour --project config/datasets/default/project.yaml --limit 20
|
|
226
|
+
jerry prep build --project config/datasets/default/project.yaml --limit 20
|
|
227
|
+
jerry prep stir --project config/datasets/default/project.yaml --limit 20
|
|
228
228
|
```
|
|
229
229
|
|
|
230
230
|
- `prep pour` shows the record-stage ingredients headed for each feature.
|
|
@@ -241,9 +241,9 @@ loaders. The CLI wires up `build_record_pipeline`, `build_feature_pipeline` and
|
|
|
241
241
|
### Serve the flights (production mode)
|
|
242
242
|
|
|
243
243
|
```bash
|
|
244
|
-
jerry serve --project config/
|
|
245
|
-
jerry serve --project config/
|
|
246
|
-
jerry serve --project config/
|
|
244
|
+
jerry serve --project config/datasets/default/project.yaml --output print
|
|
245
|
+
jerry serve --project config/datasets/default/project.yaml --output stream
|
|
246
|
+
jerry serve --project config/datasets/default/project.yaml --output exports/batch.pt
|
|
247
247
|
```
|
|
248
248
|
|
|
249
249
|
Production mode skips the bar flair and focuses on throughput. `print` writes tasting
|
|
@@ -294,26 +294,26 @@ or feature store SDKs—without adding opinionated glue to the runtime itself.
|
|
|
294
294
|
|
|
295
295
|
Use the inspect helpers for different outputs:
|
|
296
296
|
|
|
297
|
-
- `jerry inspect report --project config/
|
|
297
|
+
- `jerry inspect report --project config/datasets/default/project.yaml` — print a
|
|
298
298
|
human-readable quality report (totals, keep/below lists, optional partition detail).
|
|
299
|
-
- `jerry inspect coverage --project config/
|
|
299
|
+
- `jerry inspect coverage --project config/datasets/default/project.yaml` — persist the
|
|
300
300
|
coverage summary to `build/coverage.json` (keep/below feature and partition lists plus
|
|
301
301
|
coverage percentages).
|
|
302
|
-
- `jerry inspect matrix --project config/
|
|
302
|
+
- `jerry inspect matrix --project config/datasets/default/project.yaml --format html` —
|
|
303
303
|
export availability matrices (CSV or HTML) for deeper analysis.
|
|
304
|
-
- `jerry inspect partitions --project config/
|
|
304
|
+
- `jerry inspect partitions --project config/datasets/default/project.yaml` — write the
|
|
305
305
|
observed partition manifest to `build/partitions.json` for use in configs.
|
|
306
306
|
|
|
307
307
|
Note: `jerry prep taste` has been removed; use `jerry inspect report` and friends.
|
|
308
308
|
|
|
309
309
|
---
|
|
310
310
|
|
|
311
|
-
## Extending the
|
|
311
|
+
## Extending the CLI
|
|
312
312
|
|
|
313
313
|
### Scaffold a plugin package
|
|
314
314
|
|
|
315
315
|
```bash
|
|
316
|
-
jerry
|
|
316
|
+
jerry plugin init --name my_datapipeline --out .
|
|
317
317
|
```
|
|
318
318
|
|
|
319
319
|
The generator copies a ready-made skeleton (pyproject, README, package directory) and
|
|
@@ -327,15 +327,15 @@ transforms.
|
|
|
327
327
|
Use the CLI helpers to scaffold boilerplate code in your plugin workspace:
|
|
328
328
|
|
|
329
329
|
```bash
|
|
330
|
-
jerry
|
|
331
|
-
jerry
|
|
330
|
+
jerry source add --provider dmi --dataset metobs --transport fs --format csv
|
|
331
|
+
jerry domain add --domain metobs
|
|
332
332
|
jerry contract
|
|
333
333
|
```
|
|
334
334
|
|
|
335
|
-
The
|
|
336
|
-
YAML file in `config/
|
|
335
|
+
The source command writes DTO/parser stubs, updates entry points and drops a matching
|
|
336
|
+
YAML file in `config/sources/` pre-filled with composed-loader defaults for the chosen
|
|
337
337
|
transport (`src/datapipeline/cli/app.py`, `src/datapipeline/services/scaffold/source.py`).
|
|
338
|
-
`jerry
|
|
338
|
+
`jerry domain add` now always scaffolds `TimeSeriesRecord` domains so every mapper carries
|
|
339
339
|
an explicit timestamp alongside its value, and `jerry contract` wires that source/domain
|
|
340
340
|
pair up for canonical stream generation.
|
|
341
341
|
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "jerry-thomas"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.2.0"
|
|
8
8
|
description = "Jerry-Thomas: a stream-first, plugin-friendly data pipeline (mixology-themed CLI)"
|
|
9
9
|
readme = { file = "README.md", content-type = "text/markdown" }
|
|
10
10
|
requires-python = ">=3.10"
|
|
@@ -40,7 +40,7 @@ def main() -> None:
|
|
|
40
40
|
p_prep.add_argument(
|
|
41
41
|
"--project",
|
|
42
42
|
"-p",
|
|
43
|
-
default="config/
|
|
43
|
+
default="config/datasets/default/project.yaml",
|
|
44
44
|
help="path to project.yaml",
|
|
45
45
|
)
|
|
46
46
|
p_prep.add_argument("--limit", "-n", type=int, default=20)
|
|
@@ -54,7 +54,7 @@ def main() -> None:
|
|
|
54
54
|
p_prep_stage.add_argument(
|
|
55
55
|
"--project",
|
|
56
56
|
"-p",
|
|
57
|
-
default="config/
|
|
57
|
+
default="config/datasets/default/project.yaml",
|
|
58
58
|
help="path to project.yaml",
|
|
59
59
|
)
|
|
60
60
|
p_prep_stage.add_argument("--limit", "-n", type=int, default=20)
|
|
@@ -69,7 +69,7 @@ def main() -> None:
|
|
|
69
69
|
p_serve.add_argument(
|
|
70
70
|
"--project",
|
|
71
71
|
"-p",
|
|
72
|
-
default="config/
|
|
72
|
+
default="config/datasets/default/project.yaml",
|
|
73
73
|
help="path to project.yaml",
|
|
74
74
|
)
|
|
75
75
|
p_serve.add_argument(
|
|
@@ -81,9 +81,9 @@ def main() -> None:
|
|
|
81
81
|
help="output destination: 'print', 'stream', or a file ending in .pt",
|
|
82
82
|
)
|
|
83
83
|
|
|
84
|
-
#
|
|
84
|
+
# source
|
|
85
85
|
p_dist = sub.add_parser(
|
|
86
|
-
"
|
|
86
|
+
"source",
|
|
87
87
|
help="add or list raw sources",
|
|
88
88
|
parents=[common],
|
|
89
89
|
)
|
|
@@ -116,9 +116,9 @@ def main() -> None:
|
|
|
116
116
|
)
|
|
117
117
|
dist_sub.add_parser("list", help="list known sources")
|
|
118
118
|
|
|
119
|
-
#
|
|
119
|
+
# domain
|
|
120
120
|
p_spirit = sub.add_parser(
|
|
121
|
-
"
|
|
121
|
+
"domain",
|
|
122
122
|
help="add or list domains",
|
|
123
123
|
parents=[common],
|
|
124
124
|
)
|
|
@@ -134,13 +134,13 @@ def main() -> None:
|
|
|
134
134
|
# contract (link source ↔ domain)
|
|
135
135
|
p_contract = sub.add_parser(
|
|
136
136
|
"contract",
|
|
137
|
-
help="link a
|
|
137
|
+
help="link a source to a domain",
|
|
138
138
|
parents=[common],
|
|
139
139
|
)
|
|
140
140
|
|
|
141
|
-
#
|
|
141
|
+
# plugin (plugin scaffolding)
|
|
142
142
|
p_bar = sub.add_parser(
|
|
143
|
-
"
|
|
143
|
+
"plugin",
|
|
144
144
|
help="scaffold plugin workspaces",
|
|
145
145
|
parents=[common],
|
|
146
146
|
)
|
|
@@ -176,7 +176,7 @@ def main() -> None:
|
|
|
176
176
|
p_inspect_report.add_argument(
|
|
177
177
|
"--project",
|
|
178
178
|
"-p",
|
|
179
|
-
default="config/
|
|
179
|
+
default="config/datasets/default/project.yaml",
|
|
180
180
|
help="path to project.yaml",
|
|
181
181
|
)
|
|
182
182
|
p_inspect_report.add_argument(
|
|
@@ -207,7 +207,7 @@ def main() -> None:
|
|
|
207
207
|
p_inspect_cov.add_argument(
|
|
208
208
|
"--project",
|
|
209
209
|
"-p",
|
|
210
|
-
default="config/
|
|
210
|
+
default="config/datasets/default/project.yaml",
|
|
211
211
|
help="path to project.yaml",
|
|
212
212
|
)
|
|
213
213
|
p_inspect_cov.add_argument(
|
|
@@ -244,7 +244,7 @@ def main() -> None:
|
|
|
244
244
|
p_inspect_matrix.add_argument(
|
|
245
245
|
"--project",
|
|
246
246
|
"-p",
|
|
247
|
-
default="config/
|
|
247
|
+
default="config/datasets/default/project.yaml",
|
|
248
248
|
help="path to project.yaml",
|
|
249
249
|
)
|
|
250
250
|
p_inspect_matrix.add_argument(
|
|
@@ -297,7 +297,7 @@ def main() -> None:
|
|
|
297
297
|
p_inspect_parts.add_argument(
|
|
298
298
|
"--project",
|
|
299
299
|
"-p",
|
|
300
|
-
default="config/
|
|
300
|
+
default="config/datasets/default/project.yaml",
|
|
301
301
|
help="path to project.yaml",
|
|
302
302
|
)
|
|
303
303
|
p_inspect_parts.add_argument(
|
|
@@ -318,7 +318,7 @@ def main() -> None:
|
|
|
318
318
|
if args.cmd == "prep":
|
|
319
319
|
from datapipeline.cli.commands.run import handle_prep_stage
|
|
320
320
|
handle_prep_stage(
|
|
321
|
-
project=getattr(args, "project", "config/
|
|
321
|
+
project=getattr(args, "project", "config/datasets/default/project.yaml"),
|
|
322
322
|
stage=getattr(args, "num", 0),
|
|
323
323
|
limit=getattr(args, "limit", 20),
|
|
324
324
|
)
|
|
@@ -337,7 +337,7 @@ def main() -> None:
|
|
|
337
337
|
subcmd = getattr(args, "inspect_cmd", None)
|
|
338
338
|
if subcmd in (None, "report"):
|
|
339
339
|
handle_inspect_report(
|
|
340
|
-
project=getattr(args, "project", "config/
|
|
340
|
+
project=getattr(args, "project", "config/datasets/default/project.yaml"),
|
|
341
341
|
output=None,
|
|
342
342
|
threshold=getattr(args, "threshold", 0.95),
|
|
343
343
|
match_partition=getattr(args, "match_partition", "base"),
|
|
@@ -385,7 +385,7 @@ def main() -> None:
|
|
|
385
385
|
)
|
|
386
386
|
return
|
|
387
387
|
|
|
388
|
-
if args.cmd == "
|
|
388
|
+
if args.cmd == "source":
|
|
389
389
|
if args.dist_cmd == "list":
|
|
390
390
|
handle_list(subcmd="sources")
|
|
391
391
|
else:
|
|
@@ -398,7 +398,7 @@ def main() -> None:
|
|
|
398
398
|
)
|
|
399
399
|
return
|
|
400
400
|
|
|
401
|
-
if args.cmd == "
|
|
401
|
+
if args.cmd == "domain":
|
|
402
402
|
if args.spirit_cmd == "list":
|
|
403
403
|
handle_list(subcmd="domains")
|
|
404
404
|
else:
|
|
@@ -412,7 +412,7 @@ def main() -> None:
|
|
|
412
412
|
handle_link()
|
|
413
413
|
return
|
|
414
414
|
|
|
415
|
-
if args.cmd == "
|
|
415
|
+
if args.cmd == "plugin":
|
|
416
416
|
handle_bar(
|
|
417
417
|
subcmd=args.bar_cmd,
|
|
418
418
|
name=getattr(args, "name", None),
|
|
@@ -29,8 +29,8 @@ def handle() -> None:
|
|
|
29
29
|
root_dir, name, pyproject = pkg_root(None)
|
|
30
30
|
|
|
31
31
|
# Discover sources by scanning sources_dir YAMLs
|
|
32
|
-
# Default to
|
|
33
|
-
proj_path = root_dir / "config" / "
|
|
32
|
+
# Default to dataset-scoped project config
|
|
33
|
+
proj_path = root_dir / "config" / "datasets" / "default" / "project.yaml"
|
|
34
34
|
# Ensure a minimal project scaffold so we can resolve dirs interactively
|
|
35
35
|
ensure_project_scaffold(proj_path)
|
|
36
36
|
sources_dir = resolve_sources_dir(proj_path)
|
|
@@ -38,7 +38,7 @@ def handle() -> None:
|
|
|
38
38
|
if sources_dir.exists():
|
|
39
39
|
source_options = sorted(p.stem for p in sources_dir.glob("*.y*ml"))
|
|
40
40
|
if not source_options:
|
|
41
|
-
print("❗ No sources found. Create one first (jerry
|
|
41
|
+
print("❗ No sources found. Create one first (jerry source add ...)")
|
|
42
42
|
raise SystemExit(2)
|
|
43
43
|
|
|
44
44
|
src_key = _pick_from_list("Select a source to link:", source_options)
|
|
@@ -64,7 +64,7 @@ def handle() -> None:
|
|
|
64
64
|
domain_options = sorted(
|
|
65
65
|
read_group_entries(pyproject, FILTERS_GROUP).keys())
|
|
66
66
|
if not domain_options:
|
|
67
|
-
print("❗ No domains found. Create one first (jerry
|
|
67
|
+
print("❗ No domains found. Create one first (jerry domain add ...)")
|
|
68
68
|
raise SystemExit(2)
|
|
69
69
|
|
|
70
70
|
dom_name = _pick_from_list("Select a domain to link to:", domain_options)
|
|
@@ -117,7 +117,7 @@ def _load_sources_from_dir(project_yaml: Path, vars_: dict[str, str]) -> dict:
|
|
|
117
117
|
if isinstance(data.get(SRC_PARSER_KEY), dict) and isinstance(data.get(SRC_LOADER_KEY), dict):
|
|
118
118
|
alias = data.get(SOURCE_ID_KEY)
|
|
119
119
|
if not alias:
|
|
120
|
-
raise ValueError(f"Missing 'source_id' in
|
|
120
|
+
raise ValueError(f"Missing 'source_id' in source file: {fname}")
|
|
121
121
|
out[alias] = _interpolate(data, vars_)
|
|
122
122
|
continue
|
|
123
123
|
return out
|
|
@@ -49,8 +49,8 @@ def ensure_project_scaffold(project_yaml: Path) -> None:
|
|
|
49
49
|
"version: 1\n"
|
|
50
50
|
"paths:\n"
|
|
51
51
|
" streams: ../../contracts\n"
|
|
52
|
-
" sources: ../../
|
|
53
|
-
" dataset:
|
|
52
|
+
" sources: ../../sources\n"
|
|
53
|
+
" dataset: dataset.yaml\n"
|
|
54
54
|
"globals:\n"
|
|
55
55
|
" start_time: 2021-01-01T00:00:00Z\n"
|
|
56
56
|
" end_time: 2021-12-31T23:00:00Z\n"
|
|
@@ -131,10 +131,10 @@ def create_source(*, provider: str, dataset: str, transport: str,
|
|
|
131
131
|
alias = _source_alias(provider, dataset)
|
|
132
132
|
loader_ep, loader_args = _loader_ep_and_args(transport, format, ep_key)
|
|
133
133
|
|
|
134
|
-
# Resolve sources directory from a single
|
|
134
|
+
# Resolve sources directory from a single dataset-scoped project config.
|
|
135
135
|
# If not present or invalid, let the exception bubble up to prompt the user
|
|
136
136
|
# to provide a valid project path.
|
|
137
|
-
proj_yaml = root_dir / "config" / "
|
|
137
|
+
proj_yaml = root_dir / "config" / "datasets" / "default" / "project.yaml"
|
|
138
138
|
# Best-effort: create a minimal project scaffold if missing
|
|
139
139
|
ensure_project_scaffold(proj_yaml)
|
|
140
140
|
sources_dir = resolve_sources_dir(proj_yaml).resolve()
|
{jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/templates/plugin_skeleton/README.md
RENAMED
|
@@ -4,22 +4,20 @@ Minimal plugin skeleton for the Jerry Thomas (datapipeline) framework.
|
|
|
4
4
|
|
|
5
5
|
Quick start
|
|
6
6
|
- Initialize a plugin (already done if you’re reading this here):
|
|
7
|
-
- `jerry
|
|
7
|
+
- `jerry plugin init --name {{PACKAGE_NAME}}`
|
|
8
8
|
- Add a source via CLI (transport-specific placeholders are scaffolded):
|
|
9
|
-
- File data: `jerry
|
|
10
|
-
- URL data: `jerry
|
|
11
|
-
- Synthetic: `jerry
|
|
12
|
-
- Edit the generated `config/
|
|
9
|
+
- File data: `jerry source add -p <provider> -d <dataset> -t fs -f <csv|json|json-lines>`
|
|
10
|
+
- URL data: `jerry source add -p <provider> -d <dataset> -t url -f <json|json-lines|csv>`
|
|
11
|
+
- Synthetic: `jerry source add -p <provider> -d <dataset> -t synthetic`
|
|
12
|
+
- Edit the generated `config/sources/*.yaml` to fill in the `path`, delimiter, etc.
|
|
13
13
|
- Reinstall after EP changes (pyproject.toml) and restart Python processes:
|
|
14
14
|
- Core: `cd lib/datapipeline && python -m pip install -e .`
|
|
15
15
|
- This plugin: `python -m pip install -e .`
|
|
16
16
|
|
|
17
17
|
Folder layout
|
|
18
18
|
- `config/`
|
|
19
|
-
- `
|
|
19
|
+
- `sources/*.yaml` — raw source definitions (one file per source)
|
|
20
20
|
- `contracts/*.yaml` — canonical stream definitions
|
|
21
|
-
- `recipes/<name>/` — experiment configs (each directory holds a `project.yaml`,
|
|
22
|
-
`recipe.yaml`, and a `build/` folder for generated artifacts)
|
|
23
21
|
- `src/{{PACKAGE_NAME}}/`
|
|
24
22
|
- `sources/<provider>/<dataset>/dto.py` — DTO model for the source
|
|
25
23
|
- `sources/<provider>/<dataset>/parser.py` — parse raw → DTO
|
|
@@ -36,15 +34,15 @@ How loaders work
|
|
|
36
34
|
- Synthetic sources generate data in-process and keep a small loader stub.
|
|
37
35
|
|
|
38
36
|
Run data flows
|
|
39
|
-
- Records: `jerry prep pour -p config/
|
|
40
|
-
- Features: `jerry prep build -p config/
|
|
41
|
-
- Vectors: `jerry prep stir -p config/
|
|
37
|
+
- Records: `jerry prep pour -p config/datasets/default/project.yaml -n 100`
|
|
38
|
+
- Features: `jerry prep build -p config/datasets/default/project.yaml -n 100`
|
|
39
|
+
- Vectors: `jerry prep stir -p config/datasets/default/project.yaml -n 100`
|
|
42
40
|
|
|
43
41
|
Analyze vectors
|
|
44
|
-
- `jerry inspect report --project config/
|
|
45
|
-
- `jerry inspect coverage --project config/
|
|
46
|
-
- `jerry inspect matrix --project config/
|
|
47
|
-
- `jerry inspect partitions --project config/
|
|
42
|
+
- `jerry inspect report --project config/datasets/default/project.yaml` (console only)
|
|
43
|
+
- `jerry inspect coverage --project config/datasets/default/project.yaml` (writes build/coverage.json)
|
|
44
|
+
- `jerry inspect matrix --project config/datasets/default/project.yaml --format html` (writes build/matrix.html)
|
|
45
|
+
- `jerry inspect partitions --project config/datasets/default/project.yaml` (writes build/partitions.json)
|
|
48
46
|
- Use `vector_transforms` to keep coverage high (history/horizontal fills, constants, or
|
|
49
47
|
drop rules) before serving vectors.
|
|
50
48
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
source_id: time_ticks # raw source alias (see config/
|
|
1
|
+
source_id: time_ticks # raw source alias (see config/sources)
|
|
2
2
|
stream_id: time_linear # this stream id used by recipes
|
|
3
3
|
|
|
4
4
|
mapper: # normalize/reshape DTO -> TemporalRecord if not implemented will give you idenitymapper
|
{jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/templates/plugin_skeleton/pyproject.toml
RENAMED
|
@@ -4,8 +4,8 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "{{PACKAGE_NAME}}"
|
|
7
|
-
version = "0.1
|
|
7
|
+
version = "0.0.1"
|
|
8
8
|
description = "A DataPipeline plugin for the {{PACKAGE_NAME}} domain"
|
|
9
9
|
dependencies = [
|
|
10
|
-
"jerry-thomas>=0.
|
|
10
|
+
"jerry-thomas>=0.2.0",
|
|
11
11
|
]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: jerry-thomas
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: Jerry-Thomas: a stream-first, plugin-friendly data pipeline (mixology-themed CLI)
|
|
5
5
|
Author: Anders Skott Lind
|
|
6
6
|
License: MIT
|
|
@@ -121,9 +121,9 @@ values—they are interpolated into downstream YAML specs during bootstrap
|
|
|
121
121
|
```yaml
|
|
122
122
|
version: 1
|
|
123
123
|
paths:
|
|
124
|
-
sources: ../../
|
|
124
|
+
sources: ../../sources
|
|
125
125
|
streams: ../../contracts
|
|
126
|
-
dataset:
|
|
126
|
+
dataset: dataset.yaml
|
|
127
127
|
globals:
|
|
128
128
|
opening_time: "2024-01-01T16:00:00Z"
|
|
129
129
|
last_call: "2024-01-02T02:00:00Z"
|
|
@@ -134,13 +134,13 @@ globals:
|
|
|
134
134
|
|
|
135
135
|
### 3. Stock the bottles (raw sources)
|
|
136
136
|
|
|
137
|
-
Create `config/
|
|
137
|
+
Create `config/sources/<alias>.yaml` files. Each must expose a `parser` and `loader`
|
|
138
138
|
pointing at entry points plus any constructor arguments
|
|
139
139
|
(`src/datapipeline/services/bootstrap.py`). Here is a synthetic clock source that feels
|
|
140
140
|
like a drip of barrel-aged bitters:
|
|
141
141
|
|
|
142
142
|
```yaml
|
|
143
|
-
# config/
|
|
143
|
+
# config/sources/time_ticks.yaml
|
|
144
144
|
parser:
|
|
145
145
|
entrypoint: "synthetic.time"
|
|
146
146
|
args: {}
|
|
@@ -182,7 +182,7 @@ are grouped (`src/datapipeline/config/dataset/dataset.py`). A minimal hourly men
|
|
|
182
182
|
look like:
|
|
183
183
|
|
|
184
184
|
```yaml
|
|
185
|
-
# config/recipes/default/
|
|
185
|
+
# config/recipes/default/dataset.yaml
|
|
186
186
|
group_by:
|
|
187
187
|
keys:
|
|
188
188
|
- type: time
|
|
@@ -241,9 +241,9 @@ materialize all registered sources and streams
|
|
|
241
241
|
### Prep any station (with visuals)
|
|
242
242
|
|
|
243
243
|
```bash
|
|
244
|
-
jerry prep pour --project config/
|
|
245
|
-
jerry prep build --project config/
|
|
246
|
-
jerry prep stir --project config/
|
|
244
|
+
jerry prep pour --project config/datasets/default/project.yaml --limit 20
|
|
245
|
+
jerry prep build --project config/datasets/default/project.yaml --limit 20
|
|
246
|
+
jerry prep stir --project config/datasets/default/project.yaml --limit 20
|
|
247
247
|
```
|
|
248
248
|
|
|
249
249
|
- `prep pour` shows the record-stage ingredients headed for each feature.
|
|
@@ -260,9 +260,9 @@ loaders. The CLI wires up `build_record_pipeline`, `build_feature_pipeline` and
|
|
|
260
260
|
### Serve the flights (production mode)
|
|
261
261
|
|
|
262
262
|
```bash
|
|
263
|
-
jerry serve --project config/
|
|
264
|
-
jerry serve --project config/
|
|
265
|
-
jerry serve --project config/
|
|
263
|
+
jerry serve --project config/datasets/default/project.yaml --output print
|
|
264
|
+
jerry serve --project config/datasets/default/project.yaml --output stream
|
|
265
|
+
jerry serve --project config/datasets/default/project.yaml --output exports/batch.pt
|
|
266
266
|
```
|
|
267
267
|
|
|
268
268
|
Production mode skips the bar flair and focuses on throughput. `print` writes tasting
|
|
@@ -313,26 +313,26 @@ or feature store SDKs—without adding opinionated glue to the runtime itself.
|
|
|
313
313
|
|
|
314
314
|
Use the inspect helpers for different outputs:
|
|
315
315
|
|
|
316
|
-
- `jerry inspect report --project config/
|
|
316
|
+
- `jerry inspect report --project config/datasets/default/project.yaml` — print a
|
|
317
317
|
human-readable quality report (totals, keep/below lists, optional partition detail).
|
|
318
|
-
- `jerry inspect coverage --project config/
|
|
318
|
+
- `jerry inspect coverage --project config/datasets/default/project.yaml` — persist the
|
|
319
319
|
coverage summary to `build/coverage.json` (keep/below feature and partition lists plus
|
|
320
320
|
coverage percentages).
|
|
321
|
-
- `jerry inspect matrix --project config/
|
|
321
|
+
- `jerry inspect matrix --project config/datasets/default/project.yaml --format html` —
|
|
322
322
|
export availability matrices (CSV or HTML) for deeper analysis.
|
|
323
|
-
- `jerry inspect partitions --project config/
|
|
323
|
+
- `jerry inspect partitions --project config/datasets/default/project.yaml` — write the
|
|
324
324
|
observed partition manifest to `build/partitions.json` for use in configs.
|
|
325
325
|
|
|
326
326
|
Note: `jerry prep taste` has been removed; use `jerry inspect report` and friends.
|
|
327
327
|
|
|
328
328
|
---
|
|
329
329
|
|
|
330
|
-
## Extending the
|
|
330
|
+
## Extending the CLI
|
|
331
331
|
|
|
332
332
|
### Scaffold a plugin package
|
|
333
333
|
|
|
334
334
|
```bash
|
|
335
|
-
jerry
|
|
335
|
+
jerry plugin init --name my_datapipeline --out .
|
|
336
336
|
```
|
|
337
337
|
|
|
338
338
|
The generator copies a ready-made skeleton (pyproject, README, package directory) and
|
|
@@ -346,15 +346,15 @@ transforms.
|
|
|
346
346
|
Use the CLI helpers to scaffold boilerplate code in your plugin workspace:
|
|
347
347
|
|
|
348
348
|
```bash
|
|
349
|
-
jerry
|
|
350
|
-
jerry
|
|
349
|
+
jerry source add --provider dmi --dataset metobs --transport fs --format csv
|
|
350
|
+
jerry domain add --domain metobs
|
|
351
351
|
jerry contract
|
|
352
352
|
```
|
|
353
353
|
|
|
354
|
-
The
|
|
355
|
-
YAML file in `config/
|
|
354
|
+
The source command writes DTO/parser stubs, updates entry points and drops a matching
|
|
355
|
+
YAML file in `config/sources/` pre-filled with composed-loader defaults for the chosen
|
|
356
356
|
transport (`src/datapipeline/cli/app.py`, `src/datapipeline/services/scaffold/source.py`).
|
|
357
|
-
`jerry
|
|
357
|
+
`jerry domain add` now always scaffolds `TimeSeriesRecord` domains so every mapper carries
|
|
358
358
|
an explicit timestamp alongside its value, and `jerry contract` wires that source/domain
|
|
359
359
|
pair up for canonical stream generation.
|
|
360
360
|
|
|
@@ -77,9 +77,9 @@ src/datapipeline/templates/plugin_skeleton/README.md
|
|
|
77
77
|
src/datapipeline/templates/plugin_skeleton/pyproject.toml
|
|
78
78
|
src/datapipeline/templates/plugin_skeleton/config/contracts/time_hour_sin.yaml
|
|
79
79
|
src/datapipeline/templates/plugin_skeleton/config/contracts/time_linear.yaml
|
|
80
|
-
src/datapipeline/templates/plugin_skeleton/config/
|
|
81
|
-
src/datapipeline/templates/plugin_skeleton/config/
|
|
82
|
-
src/datapipeline/templates/plugin_skeleton/config/
|
|
80
|
+
src/datapipeline/templates/plugin_skeleton/config/datasets/default/dataset.yaml
|
|
81
|
+
src/datapipeline/templates/plugin_skeleton/config/datasets/default/project.yaml
|
|
82
|
+
src/datapipeline/templates/plugin_skeleton/config/sources/time_ticks.yaml
|
|
83
83
|
src/datapipeline/templates/plugin_skeleton/src/{{PACKAGE_NAME}}/__init__.py
|
|
84
84
|
src/datapipeline/templates/stubs/dto.py.j2
|
|
85
85
|
src/datapipeline/templates/stubs/filter.py.j2
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/pipeline/utils/transform_utils.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/sources/synthetic/time/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/templates/stubs/loader_synthetic.py.j2
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/templates/stubs/parser_custom.py.j2
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{jerry_thomas-0.1.0 → jerry_thomas-0.2.0}/src/datapipeline/transforms/stream/ensure_ticks.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|