jerry-thomas 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datapipeline/analysis/vector/collector.py +275 -0
- datapipeline/analysis/vector/matrix.py +527 -0
- datapipeline/analysis/vector/report.py +317 -0
- datapipeline/analysis/vector_analyzer.py +3 -694
- datapipeline/build/__init__.py +6 -0
- datapipeline/build/state.py +52 -0
- datapipeline/build/tasks.py +186 -0
- datapipeline/cli/app.py +125 -56
- datapipeline/cli/commands/build.py +39 -0
- datapipeline/cli/commands/domain.py +1 -1
- datapipeline/cli/commands/filter.py +1 -2
- datapipeline/cli/commands/inspect.py +77 -26
- datapipeline/cli/commands/link.py +11 -12
- datapipeline/cli/commands/plugin.py +1 -1
- datapipeline/cli/commands/run.py +234 -110
- datapipeline/cli/commands/source.py +3 -3
- datapipeline/cli/commands/writers.py +138 -0
- datapipeline/cli/visuals/__init__.py +14 -0
- datapipeline/cli/{visuals.py → visuals/labels.py} +35 -24
- datapipeline/cli/visuals/sources.py +138 -0
- datapipeline/config/build.py +64 -0
- datapipeline/config/dataset/dataset.py +1 -2
- datapipeline/config/dataset/loader.py +1 -81
- datapipeline/config/postprocess.py +14 -0
- datapipeline/config/project.py +13 -1
- datapipeline/config/run.py +116 -0
- datapipeline/config/split.py +35 -0
- datapipeline/domain/vector.py +0 -9
- datapipeline/filters/filters.py +1 -1
- datapipeline/integrations/ml/__init__.py +16 -0
- datapipeline/integrations/ml/adapter.py +120 -0
- datapipeline/integrations/ml/pandas_support.py +46 -0
- datapipeline/integrations/ml/rows.py +82 -0
- datapipeline/integrations/ml/torch_support.py +94 -0
- datapipeline/pipeline/context.py +69 -0
- datapipeline/pipeline/pipelines.py +21 -23
- datapipeline/pipeline/split.py +171 -0
- datapipeline/pipeline/stages.py +54 -15
- datapipeline/pipeline/utils/keygen.py +2 -2
- datapipeline/pipeline/utils/transform_utils.py +64 -23
- datapipeline/plugins.py +1 -1
- datapipeline/runtime.py +73 -0
- datapipeline/services/artifacts.py +96 -0
- datapipeline/services/bootstrap/__init__.py +12 -0
- datapipeline/services/bootstrap/config.py +141 -0
- datapipeline/services/bootstrap/core.py +186 -0
- datapipeline/services/constants.py +5 -0
- datapipeline/services/entrypoints.py +1 -1
- datapipeline/services/factories.py +5 -2
- datapipeline/services/paths.py +1 -1
- datapipeline/services/project_paths.py +21 -0
- datapipeline/services/scaffold/domain.py +1 -2
- datapipeline/services/scaffold/filter.py +1 -2
- datapipeline/services/scaffold/mappers.py +1 -1
- datapipeline/services/scaffold/plugin.py +31 -5
- datapipeline/services/scaffold/source.py +2 -4
- datapipeline/sources/models/generator.py +6 -2
- datapipeline/sources/models/loader.py +0 -3
- datapipeline/sources/models/synthetic.py +1 -1
- datapipeline/sources/synthetic/time/loader.py +10 -2
- datapipeline/templates/plugin_skeleton/README.md +52 -7
- datapipeline/templates/plugin_skeleton/config/contracts/{time_hour_sin.yaml → time_hour_sin.synthetic.yaml} +3 -3
- datapipeline/templates/plugin_skeleton/config/contracts/{time_linear.yaml → time_linear.synthetic.yaml} +3 -3
- datapipeline/templates/plugin_skeleton/config/datasets/default/build.yaml +9 -0
- datapipeline/templates/plugin_skeleton/config/datasets/default/dataset.yaml +3 -18
- datapipeline/templates/plugin_skeleton/config/datasets/default/postprocess.yaml +13 -0
- datapipeline/templates/plugin_skeleton/config/datasets/default/project.yaml +12 -0
- datapipeline/templates/plugin_skeleton/config/datasets/default/runs/run_test.yaml +10 -0
- datapipeline/templates/plugin_skeleton/config/datasets/default/runs/run_train.yaml +10 -0
- datapipeline/templates/plugin_skeleton/config/datasets/default/runs/run_val.yaml +10 -0
- datapipeline/templates/plugin_skeleton/pyproject.toml +2 -2
- datapipeline/templates/stubs/dto.py.j2 +2 -0
- datapipeline/templates/stubs/mapper.py.j2 +5 -3
- datapipeline/templates/stubs/parser.py.j2 +1 -0
- datapipeline/transforms/feature/scaler.py +127 -62
- datapipeline/transforms/filter.py +5 -2
- datapipeline/transforms/stream/fill.py +3 -25
- datapipeline/transforms/utils.py +16 -0
- datapipeline/transforms/vector.py +62 -78
- datapipeline/transforms/vector_utils.py +19 -67
- datapipeline/utils/load.py +2 -2
- datapipeline/utils/pickle_model.py +30 -0
- datapipeline/utils/placeholders.py +35 -0
- jerry_thomas-0.3.0.dist-info/METADATA +502 -0
- jerry_thomas-0.3.0.dist-info/RECORD +139 -0
- datapipeline/cli/visual_source.py +0 -32
- datapipeline/common/__init__.py +0 -0
- datapipeline/common/geo.py +0 -13
- datapipeline/integrations/ml.py +0 -319
- datapipeline/registries/registries.py +0 -15
- datapipeline/services/bootstrap.py +0 -191
- jerry_thomas-0.2.0.dist-info/METADATA +0 -402
- jerry_thomas-0.2.0.dist-info/RECORD +0 -112
- {jerry_thomas-0.2.0.dist-info → jerry_thomas-0.3.0.dist-info}/WHEEL +0 -0
- {jerry_thomas-0.2.0.dist-info → jerry_thomas-0.3.0.dist-info}/entry_points.txt +0 -0
- {jerry_thomas-0.2.0.dist-info → jerry_thomas-0.3.0.dist-info}/licenses/LICENSE +0 -0
- {jerry_thomas-0.2.0.dist-info → jerry_thomas-0.3.0.dist-info}/top_level.txt +0 -0
|
@@ -1,402 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: jerry-thomas
|
|
3
|
-
Version: 0.2.0
|
|
4
|
-
Summary: Jerry-Thomas: a stream-first, plugin-friendly data pipeline (mixology-themed CLI)
|
|
5
|
-
Author: Anders Skott Lind
|
|
6
|
-
License: MIT
|
|
7
|
-
Requires-Python: >=3.10
|
|
8
|
-
Description-Content-Type: text/markdown
|
|
9
|
-
License-File: LICENSE
|
|
10
|
-
Requires-Dist: numpy<3.0,>=1.24
|
|
11
|
-
Requires-Dist: pydantic>=2.0
|
|
12
|
-
Requires-Dist: PyYAML>=5.4
|
|
13
|
-
Requires-Dist: tqdm>=4.0
|
|
14
|
-
Requires-Dist: jinja2>=3.0
|
|
15
|
-
Provides-Extra: ml
|
|
16
|
-
Requires-Dist: pandas>=2.0; extra == "ml"
|
|
17
|
-
Requires-Dist: torch>=2.0; extra == "ml"
|
|
18
|
-
Dynamic: license-file
|
|
19
|
-
|
|
20
|
-
# Jerry Thomas
|
|
21
|
-
|
|
22
|
-
Time‑Series First
|
|
23
|
-
- This runtime is time‑series‑first. Every domain record must include a timezone‑aware `time` and a `value`.
|
|
24
|
-
- Grouping is defined by time buckets only (`group_by.keys: [ { type: time, ... } ]`).
|
|
25
|
-
- Feature streams are sorted by time; sequence transforms assume ordered series.
|
|
26
|
-
- Categorical dimensions (e.g., station, zone, ticker) belong in `partition_by` so they become partitions of the same time series.
|
|
27
|
-
- Non‑temporal grouping is not supported.
|
|
28
|
-
|
|
29
|
-
Jerry Thomas turns the datapipeline runtime into a cocktail program. You still install the
|
|
30
|
-
same Python package (`datapipeline`) and tap into the plugin architecture, but every CLI
|
|
31
|
-
dance step nods to a craft bar. Declarative YAML menus describe projects, sources and
|
|
32
|
-
datasets, pipelines move payloads through record/feature/vector stations, and setuptools
|
|
33
|
-
entry points keep the back bar stocked with new ingredients.
|
|
34
|
-
|
|
35
|
-
---
|
|
36
|
-
|
|
37
|
-
## How the bar is set up
|
|
38
|
-
|
|
39
|
-
```text
|
|
40
|
-
raw source → canonical stream → record stage → feature stage → vector stage
|
|
41
|
-
```
|
|
42
|
-
|
|
43
|
-
1. **Raw sources (bottles on the shelf)** bundle a loader + parser recipe. Loaders handle
|
|
44
|
-
the I/O (files, URLs or synthetic runs) and parsers map rows into typed records while
|
|
45
|
-
skimming the dregs (`src/datapipeline/sources/models/loader.py`,
|
|
46
|
-
`src/datapipeline/sources/models/source.py`). The bootstrapper registers each source under
|
|
47
|
-
an alias so you can order it later in the service flow (`src/datapipeline/streams/raw.py`,
|
|
48
|
-
`src/datapipeline/services/bootstrap.py`).
|
|
49
|
-
2. **Canonical streams (house infusions)** optionally apply a mapper on top of a raw
|
|
50
|
-
source to normalize payloads before the dataset drinks them
|
|
51
|
-
(`src/datapipeline/streams/canonical.py`, `src/datapipeline/services/factories.py`).
|
|
52
|
-
3. **Dataset stages (prep stations)** read the configured canonical streams. Record stages
|
|
53
|
-
are your strainers and shakers, feature stages bottle the clarified spirits into keyed
|
|
54
|
-
features (with optional sequence transforms), and vector stages line up the flights ready
|
|
55
|
-
for service (`src/datapipeline/pipeline/pipelines.py`, `src/datapipeline/pipeline/stages.py`,
|
|
56
|
-
`src/datapipeline/config/dataset/feature.py`).
|
|
57
|
-
4. **Vectors (tasting flights)** carry grouped feature values; downstream tasters can
|
|
58
|
-
inspect them for balance and completeness
|
|
59
|
-
(`src/datapipeline/domain/vector.py`, `src/datapipeline/analysis/vector_analyzer.py`).
|
|
60
|
-
|
|
61
|
-
---
|
|
62
|
-
|
|
63
|
-
## Bar back cheat sheet
|
|
64
|
-
|
|
65
|
-
| Path | What lives here |
|
|
66
|
-
| ---------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
|
67
|
-
| `src/datapipeline/cli` | Argparse-powered bar program with commands for running pipelines, inspecting pours, scaffolding plugins and projecting service flow (`cli/app.py`, `cli/openers.py`, `cli/visuals.py`). |
|
|
68
|
-
| `src/datapipeline/services` | Bootstrapping (project loading, YAML interpolation), runtime factories and scaffolding helpers for new bar tools (`services/bootstrap.py`, `services/factories.py`, `services/scaffold/plugin.py`). |
|
|
69
|
-
| `src/datapipeline/pipeline` | Pure functions that build record/feature/vector iterators plus supporting utilities for ordering and transform wiring (`pipeline/pipelines.py`, `pipeline/utils/transform_utils.py`). |
|
|
70
|
-
| `src/datapipeline/domain` | Data structures representing records, feature records and vectors coming off the line (`domain/record.py`, `domain/feature.py`, `domain/vector.py`). |
|
|
71
|
-
| `src/datapipeline/transforms` & `src/datapipeline/filters` | Built-in transforms (lagging timestamps, scaling, sliding windows) and filter helpers exposed through entry points (`transforms/record.py`, `transforms/feature.py`, `transforms/sequence.py`, `filters/filters.py`). |
|
|
72
|
-
| `src/datapipeline/sources/synthetic/time` | Example synthetic time-series loader/parser pair plus helper mappers for experimentation while the real spirits arrive (`sources/synthetic/time/loader.py`, `sources/synthetic/time/parser.py`, `mappers/synthetic/time.py`). |
|
|
73
|
-
|
|
74
|
-
---
|
|
75
|
-
|
|
76
|
-
## Built-in DSL identifiers
|
|
77
|
-
|
|
78
|
-
The YAML DSL resolves filters and transforms by entry-point name. These ship with the
|
|
79
|
-
template out of the box:
|
|
80
|
-
|
|
81
|
-
| Kind | Identifiers | Notes |
|
|
82
|
-
| ----------------- | ----------------------------------------------------------------------------------------------- | ----- |
|
|
83
|
-
| Filters | `eq`/`equals`, `ne`/`not_equal`, `lt`, `le`, `gt`, `ge`, `in`/`contains`, `nin`/`not_in` | Use as `- gt: { field: value }` or `- in: { field: [values...] }`. Synonyms map to the same implementation. |
|
|
84
|
-
| Record transforms | `time_lag`, `drop_missing` | `time_lag` expects a duration string (e.g. `1h`), `drop_missing` removes `None`/`NaN` records. |
|
|
85
|
-
| Feature transforms| `standard_scale` | Options: `with_mean`, `with_std`, optional `statistics`. |
|
|
86
|
-
| Sequence transforms | `time_window`, `time_fill_mean`, `time_fill_median` | `time_window` builds sliding windows; the fill transforms impute missing values from running mean/median with optional `window`/`min_samples`. |
|
|
87
|
-
| Vector transforms | `fill_history`, `fill_horizontal`, `fill_constant`, `drop_missing` | History fill uses prior buckets, horizontal fill aggregates sibling partitions, constant sets a default, and drop removes vectors below coverage thresholds. |
|
|
88
|
-
|
|
89
|
-
Extend `pyproject.toml` with additional entry points to register custom logic under your
|
|
90
|
-
own identifiers.
|
|
91
|
-
|
|
92
|
-
---
|
|
93
|
-
|
|
94
|
-
## Opening the bar
|
|
95
|
-
|
|
96
|
-
### 1. Install the tools
|
|
97
|
-
|
|
98
|
-
```bash
|
|
99
|
-
python -m venv .venv
|
|
100
|
-
source .venv/bin/activate # Windows: .venv\Scripts\activate
|
|
101
|
-
python -m pip install --upgrade pip
|
|
102
|
-
pip install jerry-thomas
|
|
103
|
-
```
|
|
104
|
-
|
|
105
|
-
The published wheel exposes the `jerry` CLI (backed by the `datapipeline` package) and
|
|
106
|
-
pulls in core dependencies like Pydantic, PyYAML, tqdm and Jinja2 (see
|
|
107
|
-
`pyproject.toml`). Prefer `pip install -e .` only when you are actively developing this
|
|
108
|
-
repository. Double-check the back bar is reachable:
|
|
109
|
-
|
|
110
|
-
```bash
|
|
111
|
-
python -c "import datapipeline; print('bar ready')"
|
|
112
|
-
```
|
|
113
|
-
|
|
114
|
-
### 2. Draft your bar book
|
|
115
|
-
|
|
116
|
-
Create a `config/recipes/<name>/project.yaml` so the runtime knows where to find
|
|
117
|
-
ingredients, infusions and the tasting menu. Globals are optional but handy for sharing
|
|
118
|
-
values—they are interpolated into downstream YAML specs during bootstrap
|
|
119
|
-
(`src/datapipeline/config/project.py`, `src/datapipeline/services/bootstrap.py`).
|
|
120
|
-
|
|
121
|
-
```yaml
|
|
122
|
-
version: 1
|
|
123
|
-
paths:
|
|
124
|
-
sources: ../../sources
|
|
125
|
-
streams: ../../contracts
|
|
126
|
-
dataset: dataset.yaml
|
|
127
|
-
globals:
|
|
128
|
-
opening_time: "2024-01-01T16:00:00Z"
|
|
129
|
-
last_call: "2024-01-02T02:00:00Z"
|
|
130
|
-
```
|
|
131
|
-
|
|
132
|
-
> Helper functions in `src/datapipeline/services/project_paths.py` resolve relative paths
|
|
133
|
-
> against the project root and ensure the mise en place folders exist.
|
|
134
|
-
|
|
135
|
-
### 3. Stock the bottles (raw sources)
|
|
136
|
-
|
|
137
|
-
Create `config/sources/<alias>.yaml` files. Each must expose a `parser` and `loader`
|
|
138
|
-
pointing at entry points plus any constructor arguments
|
|
139
|
-
(`src/datapipeline/services/bootstrap.py`). Here is a synthetic clock source that feels
|
|
140
|
-
like a drip of barrel-aged bitters:
|
|
141
|
-
|
|
142
|
-
```yaml
|
|
143
|
-
# config/sources/time_ticks.yaml
|
|
144
|
-
parser:
|
|
145
|
-
entrypoint: "synthetic.time"
|
|
146
|
-
args: {}
|
|
147
|
-
loader:
|
|
148
|
-
entrypoint: "synthetic.time"
|
|
149
|
-
args:
|
|
150
|
-
start: "${opening_time}"
|
|
151
|
-
end: "${last_call}"
|
|
152
|
-
frequency: "1h"
|
|
153
|
-
```
|
|
154
|
-
|
|
155
|
-
That file wires up the built-in `TimeTicksGenerator` + parser pair that yields
|
|
156
|
-
timezone-aware timestamps (`sources/synthetic/time/loader.py`,
|
|
157
|
-
`sources/synthetic/time/parser.py`).
|
|
158
|
-
|
|
159
|
-
### 4. Mix house infusions (canonical streams)
|
|
160
|
-
|
|
161
|
-
Canonical specs live under `config/contracts/` and reference a raw source alias plus an
|
|
162
|
-
optional mapper entry point (`src/datapipeline/services/bootstrap.py`,
|
|
163
|
-
`src/datapipeline/streams/canonical.py`). This example turns each timestamp into a citrus
|
|
164
|
-
spritz feature:
|
|
165
|
-
|
|
166
|
-
```yaml
|
|
167
|
-
# config/contracts/time/encode.yaml
|
|
168
|
-
source: time_ticks
|
|
169
|
-
mapper:
|
|
170
|
-
entrypoint: "synthetic.time.encode"
|
|
171
|
-
args:
|
|
172
|
-
mode: spritz
|
|
173
|
-
```
|
|
174
|
-
|
|
175
|
-
The mapper uses the provided mode to create a new `TimeSeriesRecord` stream ready for the
|
|
176
|
-
feature stage (`mappers/synthetic/time.py`).
|
|
177
|
-
|
|
178
|
-
### 5. Script the tasting menu (dataset)
|
|
179
|
-
|
|
180
|
-
Datasets describe which canonical streams should be read at each station and how flights
|
|
181
|
-
are grouped (`src/datapipeline/config/dataset/dataset.py`). A minimal hourly menu might
|
|
182
|
-
look like:
|
|
183
|
-
|
|
184
|
-
```yaml
|
|
185
|
-
# config/recipes/default/dataset.yaml
|
|
186
|
-
group_by:
|
|
187
|
-
keys:
|
|
188
|
-
- type: time
|
|
189
|
-
field: time
|
|
190
|
-
resolution: 1h
|
|
191
|
-
features:
|
|
192
|
-
- id: hour_spritz
|
|
193
|
-
stream: time.encode
|
|
194
|
-
transforms:
|
|
195
|
-
- record:
|
|
196
|
-
transform: time_lag
|
|
197
|
-
args: 0h
|
|
198
|
-
- feature:
|
|
199
|
-
transform: standard_scale
|
|
200
|
-
with_mean: true
|
|
201
|
-
with_std: true
|
|
202
|
-
- sequence:
|
|
203
|
-
transform: time_window
|
|
204
|
-
size: 4
|
|
205
|
-
stride: 1
|
|
206
|
-
- sequence:
|
|
207
|
-
transform: time_fill_mean
|
|
208
|
-
window: 24
|
|
209
|
-
min_samples: 6
|
|
210
|
-
```
|
|
211
|
-
|
|
212
|
-
Use the sample `dataset` template as a starting point if you prefer scaffolding before
|
|
213
|
-
pouring concrete values. Group keys now require explicit time bucketing (with automatic
|
|
214
|
-
flooring to the requested resolution) so every pipeline is clock-driven. You can attach
|
|
215
|
-
feature or sequence transforms—such as the sliding `TimeWindowTransformer` or the
|
|
216
|
-
`time_fill_mean`/`time_fill_median` imputers—directly in the YAML by referencing their
|
|
217
|
-
entry point names (`src/datapipeline/transforms/sequence.py`).
|
|
218
|
-
|
|
219
|
-
When vectors are assembled you can optionally apply `vector_transforms` to enforce schema
|
|
220
|
-
guarantees. The built-ins cover:
|
|
221
|
-
|
|
222
|
-
- `fill_history` – use running means/medians from prior buckets (per partition) with
|
|
223
|
-
configurable window/minimum samples.
|
|
224
|
-
- `fill_horizontal` – aggregate sibling partitions at the same timestamp (e.g. other
|
|
225
|
-
stations) using mean/median.
|
|
226
|
-
- `fill_constant` – provide a constant default for missing features/partitions.
|
|
227
|
-
- `drop_missing` – drop vectors that fall below a coverage threshold or omit required
|
|
228
|
-
features.
|
|
229
|
-
|
|
230
|
-
Transforms accept either an explicit `expected` list or a manifest path to discover the
|
|
231
|
-
full partition set (`build/partitions.json` produced by `jerry inspect partitions`).
|
|
232
|
-
|
|
233
|
-
Once the book is ready, run the bootstrapper (the CLI does this automatically) to
|
|
234
|
-
materialize all registered sources and streams
|
|
235
|
-
(`src/datapipeline/services/bootstrap.py`).
|
|
236
|
-
|
|
237
|
-
---
|
|
238
|
-
|
|
239
|
-
## Running service
|
|
240
|
-
|
|
241
|
-
### Prep any station (with visuals)
|
|
242
|
-
|
|
243
|
-
```bash
|
|
244
|
-
jerry prep pour --project config/datasets/default/project.yaml --limit 20
|
|
245
|
-
jerry prep build --project config/datasets/default/project.yaml --limit 20
|
|
246
|
-
jerry prep stir --project config/datasets/default/project.yaml --limit 20
|
|
247
|
-
```
|
|
248
|
-
|
|
249
|
-
- `prep pour` shows the record-stage ingredients headed for each feature.
|
|
250
|
-
- `prep build` highlights `FeatureRecord` entries after the shake/strain sequence.
|
|
251
|
-
- `prep stir` emits grouped vectors—the tasting flight before it leaves the pass.
|
|
252
|
-
|
|
253
|
-
All variants respect `--limit` and display tqdm-powered progress bars for the underlying
|
|
254
|
-
loaders. The CLI wires up `build_record_pipeline`, `build_feature_pipeline` and
|
|
255
|
-
`build_vector_pipeline`, so what you see mirrors the service line
|
|
256
|
-
(`src/datapipeline/cli/app.py`, `src/datapipeline/cli/commands/run.py`,
|
|
257
|
-
`src/datapipeline/cli/openers.py`, `src/datapipeline/cli/visuals.py`,
|
|
258
|
-
`src/datapipeline/pipeline/pipelines.py`).
|
|
259
|
-
|
|
260
|
-
### Serve the flights (production mode)
|
|
261
|
-
|
|
262
|
-
```bash
|
|
263
|
-
jerry serve --project config/datasets/default/project.yaml --output print
|
|
264
|
-
jerry serve --project config/datasets/default/project.yaml --output stream
|
|
265
|
-
jerry serve --project config/datasets/default/project.yaml --output exports/batch.pt
|
|
266
|
-
```
|
|
267
|
-
|
|
268
|
-
Production mode skips the bar flair and focuses on throughput. `print` writes tasting
|
|
269
|
-
notes to stdout, `stream` emits newline-delimited JSON (with values coerced to strings when
|
|
270
|
-
necessary), and a `.pt` destination stores a pickle-compatible payload for later pours.
|
|
271
|
-
|
|
272
|
-
## Funnel vectors into ML projects
|
|
273
|
-
|
|
274
|
-
Data scientists rarely want to shell out to the CLI; they need a programmatic
|
|
275
|
-
hand-off that plugs vectors straight into notebooks, feature stores or training
|
|
276
|
-
loops. The `datapipeline.integrations` package wraps the existing iterator
|
|
277
|
-
builders with ML-friendly adapters without pulling pandas or torch into the
|
|
278
|
-
core runtime.
|
|
279
|
-
|
|
280
|
-
```python
|
|
281
|
-
from datapipeline.integrations import (
|
|
282
|
-
VectorAdapter,
|
|
283
|
-
dataframe_from_vectors,
|
|
284
|
-
iter_vector_rows,
|
|
285
|
-
torch_dataset,
|
|
286
|
-
)
|
|
287
|
-
|
|
288
|
-
# Bootstrap once and stream ready-to-use rows.
|
|
289
|
-
adapter = VectorAdapter.from_project("config/project.yaml")
|
|
290
|
-
for row in adapter.iter_rows(limit=32, flatten_sequences=True):
|
|
291
|
-
send_to_feature_store(row)
|
|
292
|
-
|
|
293
|
-
# Helper functions cover ad-hoc jobs as well.
|
|
294
|
-
rows = iter_vector_rows(
|
|
295
|
-
"config/project.yaml",
|
|
296
|
-
include_group=True,
|
|
297
|
-
group_format="mapping",
|
|
298
|
-
flatten_sequences=True,
|
|
299
|
-
)
|
|
300
|
-
|
|
301
|
-
# Optional extras materialize into common ML containers if installed.
|
|
302
|
-
df = dataframe_from_vectors("config/project.yaml") # Requires pandas
|
|
303
|
-
dataset = torch_dataset("config/project.yaml", dtype=torch.float32) # Requires torch
|
|
304
|
-
```
|
|
305
|
-
|
|
306
|
-
Everything still flows through `build_vector_pipeline`; the integration layer
|
|
307
|
-
normalizes group keys, optionally flattens sequence features and demonstrates
|
|
308
|
-
how to turn the iterator into DataFrames or `torch.utils.data.Dataset`
|
|
309
|
-
instances. ML teams can fork the same pattern for their own stacks—Spark, NumPy
|
|
310
|
-
or feature store SDKs—without adding opinionated glue to the runtime itself.
|
|
311
|
-
|
|
312
|
-
### Inspect the balance (vector quality)
|
|
313
|
-
|
|
314
|
-
Use the inspect helpers for different outputs:
|
|
315
|
-
|
|
316
|
-
- `jerry inspect report --project config/datasets/default/project.yaml` — print a
|
|
317
|
-
human-readable quality report (totals, keep/below lists, optional partition detail).
|
|
318
|
-
- `jerry inspect coverage --project config/datasets/default/project.yaml` — persist the
|
|
319
|
-
coverage summary to `build/coverage.json` (keep/below feature and partition lists plus
|
|
320
|
-
coverage percentages).
|
|
321
|
-
- `jerry inspect matrix --project config/datasets/default/project.yaml --format html` —
|
|
322
|
-
export availability matrices (CSV or HTML) for deeper analysis.
|
|
323
|
-
- `jerry inspect partitions --project config/datasets/default/project.yaml` — write the
|
|
324
|
-
observed partition manifest to `build/partitions.json` for use in configs.
|
|
325
|
-
|
|
326
|
-
Note: `jerry prep taste` has been removed; use `jerry inspect report` and friends.
|
|
327
|
-
|
|
328
|
-
---
|
|
329
|
-
|
|
330
|
-
## Extending the CLI
|
|
331
|
-
|
|
332
|
-
### Scaffold a plugin package
|
|
333
|
-
|
|
334
|
-
```bash
|
|
335
|
-
jerry plugin init --name my_datapipeline --out .
|
|
336
|
-
```
|
|
337
|
-
|
|
338
|
-
The generator copies a ready-made skeleton (pyproject, README, package directory) and
|
|
339
|
-
swaps placeholders for your package name so you can start adding new spirits immediately
|
|
340
|
-
(`src/datapipeline/cli/app.py`, `src/datapipeline/services/scaffold/plugin.py`). Install the
|
|
341
|
-
resulting project in editable mode to expose your loaders, parsers, mappers and
|
|
342
|
-
transforms.
|
|
343
|
-
|
|
344
|
-
### Create new sources, domains and contracts
|
|
345
|
-
|
|
346
|
-
Use the CLI helpers to scaffold boilerplate code in your plugin workspace:
|
|
347
|
-
|
|
348
|
-
```bash
|
|
349
|
-
jerry source add --provider dmi --dataset metobs --transport fs --format csv
|
|
350
|
-
jerry domain add --domain metobs
|
|
351
|
-
jerry contract
|
|
352
|
-
```
|
|
353
|
-
|
|
354
|
-
The source command writes DTO/parser stubs, updates entry points and drops a matching
|
|
355
|
-
YAML file in `config/sources/` pre-filled with composed-loader defaults for the chosen
|
|
356
|
-
transport (`src/datapipeline/cli/app.py`, `src/datapipeline/services/scaffold/source.py`).
|
|
357
|
-
`jerry domain add` now always scaffolds `TimeSeriesRecord` domains so every mapper carries
|
|
358
|
-
an explicit timestamp alongside its value, and `jerry contract` wires that source/domain
|
|
359
|
-
pair up for canonical stream generation.
|
|
360
|
-
|
|
361
|
-
### Add custom filters or transforms
|
|
362
|
-
|
|
363
|
-
Register new functions/classes under the appropriate entry point group in your plugin’s
|
|
364
|
-
`pyproject.toml`. The runtime resolves them through `load_ep`, applies record filters first,
|
|
365
|
-
then record/feature/sequence transforms in the order declared in the dataset config
|
|
366
|
-
(`pyproject.toml`, `src/datapipeline/utils/load.py`,
|
|
367
|
-
`src/datapipeline/pipeline/utils/transform_utils.py`). Built-in helpers cover common
|
|
368
|
-
comparisons (including timezone-aware checks) and time-based transforms (lags, sliding
|
|
369
|
-
windows) if you need quick wins (`src/datapipeline/filters/filters.py`,
|
|
370
|
-
`src/datapipeline/transforms/record.py`, `src/datapipeline/transforms/feature.py`,
|
|
371
|
-
`src/datapipeline/transforms/sequence.py`).
|
|
372
|
-
|
|
373
|
-
### Prototype with synthetic time-series data
|
|
374
|
-
|
|
375
|
-
Need sample pours while wiring up transforms? Reuse the bundled synthetic time loader +
|
|
376
|
-
parser and season it with the `encode_time` mapper for engineered temporal features
|
|
377
|
-
(`src/datapipeline/sources/synthetic/time/loader.py`,
|
|
378
|
-
`src/datapipeline/sources/synthetic/time/parser.py`,
|
|
379
|
-
`src/datapipeline/mappers/synthetic/time.py`). Pair it with the `time_window` sequence
|
|
380
|
-
transform to build sliding-window feature flights without external datasets
|
|
381
|
-
(`src/datapipeline/transforms/sequence.py`).
|
|
382
|
-
|
|
383
|
-
---
|
|
384
|
-
|
|
385
|
-
## Data model tasting notes
|
|
386
|
-
|
|
387
|
-
| Type | Description |
|
|
388
|
-
| ------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
|
389
|
-
| `TimeSeriesRecord` | Canonical record with `time` (tz-aware, normalized to UTC) and `value`; the pipeline treats streams as ordered series (`src/datapipeline/domain/record.py`).|
|
|
390
|
-
| `FeatureRecord` | Links a record (or list of records from sequence transforms) to a `feature_id` and `group_key` (`src/datapipeline/domain/feature.py`). |
|
|
391
|
-
| `Vector` | Final grouped payload: a mapping of feature IDs to scalars or ordered lists plus helper methods for shape/key access (`src/datapipeline/domain/vector.py`). |
|
|
392
|
-
|
|
393
|
-
---
|
|
394
|
-
|
|
395
|
-
## Developer shift checklist
|
|
396
|
-
|
|
397
|
-
These commands mirror the tooling used in CI and are useful while iterating locally:
|
|
398
|
-
|
|
399
|
-
```bash
|
|
400
|
-
pip install -e .[dev]
|
|
401
|
-
pytest
|
|
402
|
-
```
|
|
@@ -1,112 +0,0 @@
|
|
|
1
|
-
datapipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
datapipeline/plugins.py,sha256=RPdzS1TUDLVsuLal-EMiADHwkn1TVJhiG1ukEcCH5LE,837
|
|
3
|
-
datapipeline/analysis/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
-
datapipeline/analysis/vector_analyzer.py,sha256=rZEAZC3xqsI83-VikKbCf3tHbhpCtEVYvmD9c6ui39U,27158
|
|
5
|
-
datapipeline/cli/app.py,sha256=iSHcwKvBFPwjSxsAoG-dzpiaC118WJ_8CQ3VNJ44DIc,13921
|
|
6
|
-
datapipeline/cli/visual_source.py,sha256=8Nl8KKwQh1apNkj-OzsteaA54K6AvX5daItHBAvqBsU,1174
|
|
7
|
-
datapipeline/cli/visuals.py,sha256=9OXMJh8B1a5_6_9sS7mgY4UeSu6fUhSuxsauVm8HTkU,2462
|
|
8
|
-
datapipeline/cli/commands/domain.py,sha256=w1xd19wtVWslqg_AFTh0m9uIR3Zr341Rt_8YZpRosf8,304
|
|
9
|
-
datapipeline/cli/commands/filter.py,sha256=IeAsp9KHm98y65oGGcQuoZxzVDz243If6l0dkN1F39s,304
|
|
10
|
-
datapipeline/cli/commands/inspect.py,sha256=kmXTqPOMg2mYic6lmZuoc9lxhs0-XCVExco7aVU0wnQ,5885
|
|
11
|
-
datapipeline/cli/commands/link.py,sha256=KsL1-V0o15DrFSYLNttAqQPJqKkhtrzsif9EWZdva_0,5031
|
|
12
|
-
datapipeline/cli/commands/list_.py,sha256=cCrx35CPNMyAaOVvVxkZje4DAx7K7HdGpngzZwhNn40,943
|
|
13
|
-
datapipeline/cli/commands/plugin.py,sha256=Ab24t0DwTIqGGjPcVfI0a0yue37cLb4ff-ZcI0ZLj9g,327
|
|
14
|
-
datapipeline/cli/commands/run.py,sha256=8CQR-DsNqzEm0oWr8efKh7_i8Bj2Rh0BVwGFW_lfd3Y,5211
|
|
15
|
-
datapipeline/cli/commands/source.py,sha256=bCIY15rYGx7aYensnQ4WbInRopLee458KF0pHCk-sqg,810
|
|
16
|
-
datapipeline/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
17
|
-
datapipeline/common/geo.py,sha256=oFWPhU9V3jJUloPVktLTxHJttdZxJh97RFUOm0B0Kfw,292
|
|
18
|
-
datapipeline/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
19
|
-
datapipeline/config/catalog.py,sha256=ezGSy2yFybnYZ2EHU9IjDdeFVHgHhQbePAHGXFPY9b8,1012
|
|
20
|
-
datapipeline/config/project.py,sha256=23t5UVNQXzJa5hy6ungCi3ipWllKyFQXId55uLhqRVA,502
|
|
21
|
-
datapipeline/config/dataset/dataset.py,sha256=an7S1CAEZ5bYNkgO5uHTnKJfSrfWSE4CjCMM4FN-L-s,629
|
|
22
|
-
datapipeline/config/dataset/feature.py,sha256=2Hxz0FXZskLI4ICXhmlG6b1Vvxzh0Ql9e6BwjMRtzSs,346
|
|
23
|
-
datapipeline/config/dataset/loader.py,sha256=mCOXorU6g2UbPWQjkln7N24b8NPJju4Fg6C8u1pDri4,4187
|
|
24
|
-
datapipeline/config/dataset/normalize.py,sha256=66yvPvbQyef6qQtyJOGTkAo5Q5ldDqpk-XIHx9hYH4c,825
|
|
25
|
-
datapipeline/domain/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
26
|
-
datapipeline/domain/feature.py,sha256=7BOI4H458BKU8B9vqdfez7WOO1YKiF6lt0oy7PMbqrQ,295
|
|
27
|
-
datapipeline/domain/record.py,sha256=Q-QjuR3FbQ01_vJ5LT9k8E40C-oYmOvWdbqtwtfIHPM,709
|
|
28
|
-
datapipeline/domain/vector.py,sha256=1DKa1eqigz966itN-6noc8E2d67D-73u55YCE8WhYsU,1278
|
|
29
|
-
datapipeline/filters/filters.py,sha256=oU4iu8JeJsbsKtLWJNeBBXOqwBk3uKaPzvVqkh8yw-Y,2650
|
|
30
|
-
datapipeline/integrations/__init__.py,sha256=tjTLsIa6NRWKI05wjwPAUuXozDA-gP98SccFJ9lYHs8,410
|
|
31
|
-
datapipeline/integrations/ml.py,sha256=fqzdF3JcLV_tazST4sicPsXjG6ZyDcKiNMKyRVgllpw,10483
|
|
32
|
-
datapipeline/mappers/noop.py,sha256=L8bH1QVbLH-ogIam0ppYdx7KuWQ7Dj44lvD8tvNlY0Q,111
|
|
33
|
-
datapipeline/mappers/synthetic/time.py,sha256=lt1pC0May6Y4E8bZO4sERm3D04_r-qv63Y5fwrtCaBQ,639
|
|
34
|
-
datapipeline/parsers/identity.py,sha256=pdGuz0SSQGfySPpvZSnLgfTXTkC36x-7dQMMei3XhsU,321
|
|
35
|
-
datapipeline/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
36
|
-
datapipeline/pipeline/pipelines.py,sha256=j1TioHie927r4q8dILCjnUJsanpnfrbw2AlP-lnIIYc,2865
|
|
37
|
-
datapipeline/pipeline/stages.py,sha256=ESgBNck0POKgNtFWfwvuJtQ01qm6pzyd6QP0lZcNGvg,4402
|
|
38
|
-
datapipeline/pipeline/utils/keygen.py,sha256=0gqgfxBAWNwfBHN37G5dIfurdfS25jLsPWu5SKjh2gg,1402
|
|
39
|
-
datapipeline/pipeline/utils/memory_sort.py,sha256=iPWcD81xtZZ8SXEX_Ph-hUCnpKlJMFybbxPZVOQdSs8,735
|
|
40
|
-
datapipeline/pipeline/utils/ordering.py,sha256=ZX18I7GFtvyMFJB96vWQpTOGwljjeJ6ppCg5a3Av3es,1450
|
|
41
|
-
datapipeline/pipeline/utils/transform_utils.py,sha256=05Udkl8K4Mot_nZ_Ih3q0oNmpnTthjgXyXE3QC0Xi4s,1826
|
|
42
|
-
datapipeline/registries/registries.py,sha256=2d6sut7AgmmxZIWr_3FbzDs5UxPdrOQHKuZNNwJVpak,774
|
|
43
|
-
datapipeline/registries/registry.py,sha256=MWWOHz2wT1oHQmovodtEreEuQhvH-i11Y2yXUUgZJhQ,641
|
|
44
|
-
datapipeline/services/bootstrap.py,sha256=UNYoMJOJ2wCMlo0ZWVeIdpqdY6Uz7T8TUzpIIoIyHI8,6426
|
|
45
|
-
datapipeline/services/constants.py,sha256=ZeTjk1mmxKVsKmRm4BJvnCnQ3Rwqh8ICUP_-VHFzNWE,336
|
|
46
|
-
datapipeline/services/entrypoints.py,sha256=ZmIh2Oq0M2Jy32Iqyfif69MjcYm8SatGts-zh4n33YE,2505
|
|
47
|
-
datapipeline/services/factories.py,sha256=Nmy5gXJOVquhkdXgJlQauUc4bvqCsFWh4T-Tj0qQznU,854
|
|
48
|
-
datapipeline/services/paths.py,sha256=6rjGaqHa37H8ylN9lD4nvPYKk8lxkWak1JcQ1qwhFxk,962
|
|
49
|
-
datapipeline/services/project_paths.py,sha256=IHcGr8RqRAEWmQnR6IHuMCRqokHu6XLL69lwYafFLEE,2507
|
|
50
|
-
datapipeline/services/scaffold/__init__.py,sha256=PaQNtYki9Kc7mQPnUtKDPU-rKohLHoXLvFVwdHdbXNM,68
|
|
51
|
-
datapipeline/services/scaffold/domain.py,sha256=2xmBv1dpEB-ZnOguQB0EptIN4gNDFs-3QttKn5wtLX4,959
|
|
52
|
-
datapipeline/services/scaffold/filter.py,sha256=FmlnmZKLu_BbjBxqSekI5OWD2iTkLhGx0XWPpjj2F1o,1059
|
|
53
|
-
datapipeline/services/scaffold/mappers.py,sha256=559IBU_-pIoqwzwVy8dFpsTVd3uBXPEJtrvC6ETYfqk,1991
|
|
54
|
-
datapipeline/services/scaffold/plugin.py,sha256=1hTM8hBHm05_7uYABeAvyVAD_wlf8DpgK7QqUKfm7MI,874
|
|
55
|
-
datapipeline/services/scaffold/source.py,sha256=HWYBcIpKeau6Slhu1cJWfcg7DPWEEABtVXwvKG7vv00,5610
|
|
56
|
-
datapipeline/services/scaffold/templates.py,sha256=B3YnZpFUZLynijJosTNxZQLXnPP_Y_t1RHqfI1lGOxU,634
|
|
57
|
-
datapipeline/sources/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
58
|
-
datapipeline/sources/composed_loader.py,sha256=dUmdAYncL6IJqwLhKpQ3AVyFNyut-_K55isSTr-iDXo,1350
|
|
59
|
-
datapipeline/sources/decoders.py,sha256=OEozYeV9bAA0yiWebPRvq_qKt0KXArk5jBJk_2sr7iI,1970
|
|
60
|
-
datapipeline/sources/factory.py,sha256=J177Y1TJsAJZimd54bKkJl1c7-rq8e2cs04ie3tHY8E,2097
|
|
61
|
-
datapipeline/sources/transports.py,sha256=yBfRLlZxxP4INQ5uFSR8zJecjUx_XlwU6KMQqJUYx44,2080
|
|
62
|
-
datapipeline/sources/models/__init__.py,sha256=rS3Rc8L2eXSd2CW9ejRConk9pmV8Zv7QCz3ekSIdcLQ,397
|
|
63
|
-
datapipeline/sources/models/base.py,sha256=MAUawd11fII-mxxuSPM4f6H1t1tbyZX_QWhoAgeYUcU,238
|
|
64
|
-
datapipeline/sources/models/generator.py,sha256=JK5o2k3aoNR8hVq2RP7WOyAmoBz6leV95cMgrxuvtzw,545
|
|
65
|
-
datapipeline/sources/models/loader.py,sha256=NbmRSNM1eU-6A30qsoNllFXA9HCDF4Shg14y3b_Fc0I,1092
|
|
66
|
-
datapipeline/sources/models/parser.py,sha256=Ts31aksHLDCw5ovF2D99w9g_j-NnEiZ8x0JHtUxmmXs,226
|
|
67
|
-
datapipeline/sources/models/source.py,sha256=PBtbJVdyuRABPGFSwkyDaSmT1BuHk2faL-lUvRFpOAo,796
|
|
68
|
-
datapipeline/sources/models/synthetic.py,sha256=uGi46h8b-bV0S0bArcs5RhiTvqqguWZjkq6X1Hir7QQ,290
|
|
69
|
-
datapipeline/sources/synthetic/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
70
|
-
datapipeline/sources/synthetic/time/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
71
|
-
datapipeline/sources/synthetic/time/loader.py,sha256=3bIlRSCmEJlES-MUX3ICyU1kLJoY6BCcyDEyVH0xSWw,1229
|
|
72
|
-
datapipeline/sources/synthetic/time/parser.py,sha256=d3GZMQ7L1Qi4LeEm7U3y0_pk0RdhskioQukYyqyoqic,343
|
|
73
|
-
datapipeline/templates/plugin_skeleton/README.md,sha256=5OtDUSM2pexqrwVAlvl54kE8ARv-V-9UJy97UPdnpVI,2805
|
|
74
|
-
datapipeline/templates/plugin_skeleton/pyproject.toml,sha256=knrTg5zYPiTzj7jpCaO4LMWzdHLgxtZaDHF3czckwOc,265
|
|
75
|
-
datapipeline/templates/plugin_skeleton/config/contracts/time_hour_sin.yaml,sha256=PLbmLw2fKbpMsSRgiM2BKEzD5WYcD-60BpCVnLU_j70,581
|
|
76
|
-
datapipeline/templates/plugin_skeleton/config/contracts/time_linear.yaml,sha256=OgYm92pe3DY_QrcoM2XTqbiu8jvBJI1BbmHu1brVHYk,1304
|
|
77
|
-
datapipeline/templates/plugin_skeleton/config/datasets/default/dataset.yaml,sha256=3uuRS72Wy9iRUEtNkm6frq-AS-YA6oB6BprJc6CNzvs,764
|
|
78
|
-
datapipeline/templates/plugin_skeleton/config/datasets/default/project.yaml,sha256=03XiBqNqVHg2aysjYt-C1adeI_aWz1C4xeU--HkE4Ic,171
|
|
79
|
-
datapipeline/templates/plugin_skeleton/config/sources/time_ticks.yaml,sha256=7tie6CqEmOOK8M629f2WZDWKaUPr9bePMC6Oj8RsqB8,190
|
|
80
|
-
datapipeline/templates/plugin_skeleton/src/{{PACKAGE_NAME}}/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
81
|
-
datapipeline/templates/stubs/dto.py.j2,sha256=nenydq0t1PmXi4ChD7o8yVp6fRbpKu-NNUgddM6WiLQ,734
|
|
82
|
-
datapipeline/templates/stubs/filter.py.j2,sha256=3LgRgAL_HRaENOOqQx8NdeM1AUy-T0rtHVTA7N2oWOs,466
|
|
83
|
-
datapipeline/templates/stubs/loader_synthetic.py.j2,sha256=9SQBeTBGlZmKs6nSYBKd8nbOPcFHgDx17Mh8xOEQnvs,1285
|
|
84
|
-
datapipeline/templates/stubs/mapper.py.j2,sha256=sF3ME7IVREKR6jDsRfBNbDt5ppoIsfHTvCYGQauTKpU,713
|
|
85
|
-
datapipeline/templates/stubs/parser.py.j2,sha256=CWWsPKWH56kOQ5X-N4NsSKrN__HYVs1pHHRy2FbVYuI,632
|
|
86
|
-
datapipeline/templates/stubs/parser_custom.py.j2,sha256=0Nytq43JdTZoyRj-4Mz6HWdMTmOP3VlFuYOB_A_13Vg,580
|
|
87
|
-
datapipeline/templates/stubs/record.py.j2,sha256=bzI8Jt0hcInLpn-IlyL2a8-q4VQY4gyZ2Z5BAMB70k4,660
|
|
88
|
-
datapipeline/templates/stubs/source.yaml.j2,sha256=mPOfYD3Hyvaw-lSgJslfsP2VqOp7qsg_ePOLvyTeSRw,416
|
|
89
|
-
datapipeline/transforms/filter.py,sha256=Jb4SIqic5xrCoyY9zQoABwhtckp55q4xkFN-g2On-qA,1294
|
|
90
|
-
datapipeline/transforms/sequence.py,sha256=5i-0w1jQcSHy12rhztBhzyhJ2FdnVbD35NWcPXPi_kQ,3059
|
|
91
|
-
datapipeline/transforms/utils.py,sha256=4ad3v0fhyl6sgHg9EHyf8C8U-46c8CLOZwBGK9vi-aY,194
|
|
92
|
-
datapipeline/transforms/vector.py,sha256=sOksLOa60oGOX75RTwsnhdqJZXmaA1tjx9d8Pm296us,8043
|
|
93
|
-
datapipeline/transforms/vector_utils.py,sha256=4rKzcAADE9OKXSrqr4X_eEya30cULymUPoSB_JVGbBk,2420
|
|
94
|
-
datapipeline/transforms/debug/identity.py,sha256=6bwnEYhMBYw0YPrMccrZPXDOQM4r_-odsKo8Hhpbz10,2515
|
|
95
|
-
datapipeline/transforms/debug/lint.py,sha256=6EBzGOfYjJbHzcZIIzVixlvW5RVr7liw6DieuWwxNUM,4057
|
|
96
|
-
datapipeline/transforms/feature/model.py,sha256=gB-GP80_P7bzEKJFSM4leRke75yiD4-S5eJ1p8g3JU8,382
|
|
97
|
-
datapipeline/transforms/feature/scaler.py,sha256=tExlpsVK8TNMC_qpPx5QdyX6AAMbUPmdvBNZAMZMS8E,3315
|
|
98
|
-
datapipeline/transforms/record/floor_time.py,sha256=dKxLjnmBNJmDClPQfuKBEM_lrW-356v8XfQtLog5K2k,627
|
|
99
|
-
datapipeline/transforms/record/lag.py,sha256=5wrPyVNFvidvdQddnK6ZeUOI5I8rfXEbzIg6tzKiJu4,536
|
|
100
|
-
datapipeline/transforms/stream/ensure_ticks.py,sha256=Q0AwKuRY2nRIOUKoaAINeAWUEuoOzSh30Ug5k8296Kw,1170
|
|
101
|
-
datapipeline/transforms/stream/fill.py,sha256=-NJhPD3LP_G7E2oLMBvNOFtdhGhjgCSMR9hM3_QPMAo,4230
|
|
102
|
-
datapipeline/transforms/stream/granularity.py,sha256=PzHDGDwyn8P07BCbcFZaorS_7lbAbEdMLqD9Wy61y0M,3376
|
|
103
|
-
datapipeline/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
104
|
-
datapipeline/utils/load.py,sha256=uQUqwyyAXXVVcBKjzX0RjoP_Nuc3zgBN1BplLZpJIYw,1282
|
|
105
|
-
datapipeline/utils/paths.py,sha256=5Y5rhNbjTiybUHfq9VfRMJ4gUfN9UltonM-4MABEG8w,798
|
|
106
|
-
datapipeline/utils/time.py,sha256=vOqa2arqwEqbDo-JWEhOFPMnI1E4Ib3i1L-Rt-cGH8c,1072
|
|
107
|
-
jerry_thomas-0.2.0.dist-info/licenses/LICENSE,sha256=pkBMylAJF5yChHAkdxwFhEptLGx13i-XFEKh-Sh6DkM,1073
|
|
108
|
-
jerry_thomas-0.2.0.dist-info/METADATA,sha256=LUh6IrWqfk1fjG-MMSO4d-GViqZ8qzc4sPhKkjZo3zw,19202
|
|
109
|
-
jerry_thomas-0.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
110
|
-
jerry_thomas-0.2.0.dist-info/entry_points.txt,sha256=7GKSNCgwbzIqI3_LdbOro2eiAbBvbpoOxAuh2XcqBN0,1669
|
|
111
|
-
jerry_thomas-0.2.0.dist-info/top_level.txt,sha256=N8aoNPdPyHefODO4YAm7tqTaUcw0e8LDcqycFTf8TbM,13
|
|
112
|
-
jerry_thomas-0.2.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|