jerry-thomas 0.3.0__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (164) hide show
  1. datapipeline/analysis/vector/collector.py +120 -17
  2. datapipeline/analysis/vector/matrix.py +33 -8
  3. datapipeline/analysis/vector/report.py +162 -32
  4. datapipeline/build/tasks/__init__.py +11 -0
  5. datapipeline/build/tasks/config.py +74 -0
  6. datapipeline/build/tasks/metadata.py +170 -0
  7. datapipeline/build/tasks/scaler.py +73 -0
  8. datapipeline/build/tasks/schema.py +60 -0
  9. datapipeline/build/tasks/utils.py +169 -0
  10. datapipeline/cli/app.py +304 -127
  11. datapipeline/cli/commands/build.py +240 -16
  12. datapipeline/cli/commands/contract.py +367 -0
  13. datapipeline/cli/commands/domain.py +8 -3
  14. datapipeline/cli/commands/inspect.py +401 -149
  15. datapipeline/cli/commands/list_.py +30 -7
  16. datapipeline/cli/commands/plugin.py +5 -1
  17. datapipeline/cli/commands/run.py +227 -241
  18. datapipeline/cli/commands/run_config.py +101 -0
  19. datapipeline/cli/commands/serve_pipeline.py +156 -0
  20. datapipeline/cli/commands/source.py +44 -8
  21. datapipeline/cli/visuals/__init__.py +4 -2
  22. datapipeline/cli/visuals/common.py +239 -0
  23. datapipeline/cli/visuals/labels.py +15 -15
  24. datapipeline/cli/visuals/runner.py +66 -0
  25. datapipeline/cli/visuals/sections.py +20 -0
  26. datapipeline/cli/visuals/sources.py +132 -119
  27. datapipeline/cli/visuals/sources_basic.py +260 -0
  28. datapipeline/cli/visuals/sources_off.py +76 -0
  29. datapipeline/cli/visuals/sources_rich.py +414 -0
  30. datapipeline/config/catalog.py +37 -3
  31. datapipeline/config/context.py +214 -0
  32. datapipeline/config/dataset/loader.py +21 -4
  33. datapipeline/config/dataset/normalize.py +4 -4
  34. datapipeline/config/metadata.py +43 -0
  35. datapipeline/config/postprocess.py +2 -2
  36. datapipeline/config/project.py +3 -2
  37. datapipeline/config/resolution.py +129 -0
  38. datapipeline/config/tasks.py +309 -0
  39. datapipeline/config/workspace.py +155 -0
  40. datapipeline/domain/__init__.py +12 -0
  41. datapipeline/domain/record.py +11 -0
  42. datapipeline/domain/sample.py +54 -0
  43. datapipeline/integrations/ml/adapter.py +34 -20
  44. datapipeline/integrations/ml/pandas_support.py +0 -2
  45. datapipeline/integrations/ml/rows.py +1 -6
  46. datapipeline/integrations/ml/torch_support.py +1 -3
  47. datapipeline/io/factory.py +112 -0
  48. datapipeline/io/output.py +132 -0
  49. datapipeline/io/protocols.py +21 -0
  50. datapipeline/io/serializers.py +219 -0
  51. datapipeline/io/sinks/__init__.py +23 -0
  52. datapipeline/io/sinks/base.py +2 -0
  53. datapipeline/io/sinks/files.py +79 -0
  54. datapipeline/io/sinks/rich.py +57 -0
  55. datapipeline/io/sinks/stdout.py +18 -0
  56. datapipeline/io/writers/__init__.py +14 -0
  57. datapipeline/io/writers/base.py +28 -0
  58. datapipeline/io/writers/csv_writer.py +25 -0
  59. datapipeline/io/writers/jsonl.py +52 -0
  60. datapipeline/io/writers/pickle_writer.py +30 -0
  61. datapipeline/pipeline/artifacts.py +58 -0
  62. datapipeline/pipeline/context.py +66 -7
  63. datapipeline/pipeline/observability.py +65 -0
  64. datapipeline/pipeline/pipelines.py +65 -13
  65. datapipeline/pipeline/split.py +11 -10
  66. datapipeline/pipeline/stages.py +127 -16
  67. datapipeline/pipeline/utils/keygen.py +20 -7
  68. datapipeline/pipeline/utils/memory_sort.py +22 -10
  69. datapipeline/pipeline/utils/transform_utils.py +22 -0
  70. datapipeline/runtime.py +5 -2
  71. datapipeline/services/artifacts.py +12 -6
  72. datapipeline/services/bootstrap/config.py +25 -0
  73. datapipeline/services/bootstrap/core.py +52 -37
  74. datapipeline/services/constants.py +6 -5
  75. datapipeline/services/factories.py +123 -1
  76. datapipeline/services/project_paths.py +43 -16
  77. datapipeline/services/runs.py +208 -0
  78. datapipeline/services/scaffold/domain.py +3 -2
  79. datapipeline/services/scaffold/filter.py +3 -2
  80. datapipeline/services/scaffold/mappers.py +9 -6
  81. datapipeline/services/scaffold/plugin.py +54 -10
  82. datapipeline/services/scaffold/source.py +93 -56
  83. datapipeline/sources/{composed_loader.py → data_loader.py} +9 -9
  84. datapipeline/sources/decoders.py +83 -18
  85. datapipeline/sources/factory.py +26 -16
  86. datapipeline/sources/models/__init__.py +2 -2
  87. datapipeline/sources/models/generator.py +0 -7
  88. datapipeline/sources/models/loader.py +3 -3
  89. datapipeline/sources/models/parsing_error.py +24 -0
  90. datapipeline/sources/models/source.py +6 -6
  91. datapipeline/sources/synthetic/time/loader.py +14 -2
  92. datapipeline/sources/transports.py +74 -37
  93. datapipeline/templates/plugin_skeleton/README.md +76 -30
  94. datapipeline/templates/plugin_skeleton/example/contracts/time.ticks.hour_sin.yaml +31 -0
  95. datapipeline/templates/plugin_skeleton/example/contracts/time.ticks.linear.yaml +30 -0
  96. datapipeline/templates/plugin_skeleton/example/dataset.yaml +18 -0
  97. datapipeline/templates/plugin_skeleton/example/postprocess.yaml +29 -0
  98. datapipeline/templates/plugin_skeleton/{config/datasets/default → example}/project.yaml +11 -8
  99. datapipeline/templates/plugin_skeleton/example/sources/synthetic.ticks.yaml +12 -0
  100. datapipeline/templates/plugin_skeleton/example/tasks/metadata.yaml +3 -0
  101. datapipeline/templates/plugin_skeleton/example/tasks/scaler.yaml +9 -0
  102. datapipeline/templates/plugin_skeleton/example/tasks/schema.yaml +2 -0
  103. datapipeline/templates/plugin_skeleton/example/tasks/serve.test.yaml +4 -0
  104. datapipeline/templates/plugin_skeleton/example/tasks/serve.train.yaml +28 -0
  105. datapipeline/templates/plugin_skeleton/example/tasks/serve.val.yaml +4 -0
  106. datapipeline/templates/plugin_skeleton/jerry.yaml +34 -0
  107. datapipeline/templates/plugin_skeleton/your-dataset/contracts/time.ticks.hour_sin.yaml +31 -0
  108. datapipeline/templates/plugin_skeleton/your-dataset/contracts/time.ticks.linear.yaml +30 -0
  109. datapipeline/templates/plugin_skeleton/your-dataset/dataset.yaml +18 -0
  110. datapipeline/templates/plugin_skeleton/your-dataset/postprocess.yaml +29 -0
  111. datapipeline/templates/plugin_skeleton/your-dataset/project.yaml +22 -0
  112. datapipeline/templates/plugin_skeleton/your-dataset/sources/synthetic.ticks.yaml +12 -0
  113. datapipeline/templates/plugin_skeleton/your-dataset/tasks/metadata.yaml +3 -0
  114. datapipeline/templates/plugin_skeleton/your-dataset/tasks/scaler.yaml +9 -0
  115. datapipeline/templates/plugin_skeleton/your-dataset/tasks/schema.yaml +2 -0
  116. datapipeline/templates/plugin_skeleton/your-dataset/tasks/serve.test.yaml +4 -0
  117. datapipeline/templates/plugin_skeleton/your-dataset/tasks/serve.train.yaml +28 -0
  118. datapipeline/templates/plugin_skeleton/your-dataset/tasks/serve.val.yaml +4 -0
  119. datapipeline/templates/stubs/dto.py.j2 +2 -0
  120. datapipeline/templates/stubs/mapper.py.j2 +5 -4
  121. datapipeline/templates/stubs/parser.py.j2 +2 -0
  122. datapipeline/templates/stubs/record.py.j2 +2 -0
  123. datapipeline/templates/stubs/source.yaml.j2 +2 -3
  124. datapipeline/transforms/debug/lint.py +26 -41
  125. datapipeline/transforms/feature/scaler.py +89 -13
  126. datapipeline/transforms/record/floor_time.py +4 -4
  127. datapipeline/transforms/sequence.py +2 -35
  128. datapipeline/transforms/stream/dedupe.py +24 -0
  129. datapipeline/transforms/stream/ensure_ticks.py +7 -6
  130. datapipeline/transforms/vector/__init__.py +5 -0
  131. datapipeline/transforms/vector/common.py +98 -0
  132. datapipeline/transforms/vector/drop/__init__.py +4 -0
  133. datapipeline/transforms/vector/drop/horizontal.py +79 -0
  134. datapipeline/transforms/vector/drop/orchestrator.py +59 -0
  135. datapipeline/transforms/vector/drop/vertical.py +182 -0
  136. datapipeline/transforms/vector/ensure_schema.py +184 -0
  137. datapipeline/transforms/vector/fill.py +87 -0
  138. datapipeline/transforms/vector/replace.py +62 -0
  139. datapipeline/utils/load.py +24 -3
  140. datapipeline/utils/rich_compat.py +38 -0
  141. datapipeline/utils/window.py +76 -0
  142. jerry_thomas-1.0.1.dist-info/METADATA +825 -0
  143. jerry_thomas-1.0.1.dist-info/RECORD +199 -0
  144. {jerry_thomas-0.3.0.dist-info → jerry_thomas-1.0.1.dist-info}/entry_points.txt +9 -8
  145. datapipeline/build/tasks.py +0 -186
  146. datapipeline/cli/commands/link.py +0 -128
  147. datapipeline/cli/commands/writers.py +0 -138
  148. datapipeline/config/build.py +0 -64
  149. datapipeline/config/run.py +0 -116
  150. datapipeline/templates/plugin_skeleton/config/contracts/time_hour_sin.synthetic.yaml +0 -24
  151. datapipeline/templates/plugin_skeleton/config/contracts/time_linear.synthetic.yaml +0 -23
  152. datapipeline/templates/plugin_skeleton/config/datasets/default/build.yaml +0 -9
  153. datapipeline/templates/plugin_skeleton/config/datasets/default/dataset.yaml +0 -14
  154. datapipeline/templates/plugin_skeleton/config/datasets/default/postprocess.yaml +0 -13
  155. datapipeline/templates/plugin_skeleton/config/datasets/default/runs/run_test.yaml +0 -10
  156. datapipeline/templates/plugin_skeleton/config/datasets/default/runs/run_train.yaml +0 -10
  157. datapipeline/templates/plugin_skeleton/config/datasets/default/runs/run_val.yaml +0 -10
  158. datapipeline/templates/plugin_skeleton/config/sources/time_ticks.yaml +0 -11
  159. datapipeline/transforms/vector.py +0 -210
  160. jerry_thomas-0.3.0.dist-info/METADATA +0 -502
  161. jerry_thomas-0.3.0.dist-info/RECORD +0 -139
  162. {jerry_thomas-0.3.0.dist-info → jerry_thomas-1.0.1.dist-info}/WHEEL +0 -0
  163. {jerry_thomas-0.3.0.dist-info → jerry_thomas-1.0.1.dist-info}/licenses/LICENSE +0 -0
  164. {jerry_thomas-0.3.0.dist-info → jerry_thomas-1.0.1.dist-info}/top_level.txt +0 -0
@@ -1,502 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: jerry-thomas
3
- Version: 0.3.0
4
- Summary: Jerry-Thomas: a stream-first, plugin-friendly data pipeline (mixology-themed CLI)
5
- Author: Anders Skott Lind
6
- License: MIT
7
- Requires-Python: >=3.10
8
- Description-Content-Type: text/markdown
9
- License-File: LICENSE
10
- Requires-Dist: numpy<3.0,>=1.24
11
- Requires-Dist: pydantic>=2.0
12
- Requires-Dist: PyYAML>=5.4
13
- Requires-Dist: tqdm>=4.0
14
- Requires-Dist: jinja2>=3.0
15
- Provides-Extra: ml
16
- Requires-Dist: pandas>=2.0; extra == "ml"
17
- Requires-Dist: torch>=2.0; extra == "ml"
18
- Dynamic: license-file
19
-
20
- # Datapipeline Runtime
21
-
22
- Jerry Thomas is a time-series-first data pipeline runtime. It turns declarative
23
- YAML projects into iterators that stream records, engineered features, and
24
- model-ready vectors. The CLI lets you preview every stage, build deterministic
25
- artifacts, inspect coverage, and scaffold plugins for custom loaders, parsers,
26
- transforms, and filters.
27
-
28
- > **Core assumptions**
29
- >
30
- > - Every record carries a timezone-aware `time` attribute and a numeric
31
- > `value`.
32
- > - Grouping is purely temporal. Dimensional splits belong in `partition_by`.
33
-
34
- ---
35
-
36
- ## Why You Might Use It
37
-
38
- - Materialize canonical time-series datasets from disparate sources.
39
- - Preview and debug each stage of the pipeline without writing ad-hoc scripts.
40
- - Enforce coverage/quality gates and publish artifacts (expected IDs, scaler
41
- stats) for downstream ML teams.
42
- - Extend the runtime with entry-point driven plugins for domain-specific I/O or
43
- feature engineering.
44
- - Consume vectors directly from Python via iterators, Pandas DataFrames, or
45
- `torch.utils.data.Dataset`.
46
-
47
- ---
48
-
49
- ## Quick Start
50
-
51
- ```bash
52
- # 1. Install in editable mode (with optional dev extras for testing).
53
- pip install -e .[dev]
54
-
55
- # 2. Bootstrap a project (scaffolds configs, plugin package, and templates).
56
- jerry plugin init --name my_datapipeline --out .
57
-
58
- # 3. Create a source & domain scaffold, then declare a canonical stream.
59
- jerry source add --provider demo --dataset weather --transport fs --format csv
60
- jerry domain add --domain weather
61
- # (edit config/contracts/<alias>.yaml to point at your mapper and policies)
62
-
63
- # 4. Configure dataset/postprocess/build files in config/datasets/<name>/.
64
- # Then preview the pipeline and serve a few vectors:
65
- jerry serve --project config/datasets/default/project.yaml --stage 2 --limit 5
66
- jerry serve --project config/datasets/default/project.yaml --output print --limit 3
67
-
68
- # 5. Inspect coverage and build artifacts:
69
- jerry inspect report --project config/datasets/default/project.yaml
70
- jerry build --project config/datasets/default/project.yaml
71
- ```
72
-
73
- The skeleton project in `src/datapipeline/templates/plugin_skeleton/` mirrors the
74
- paths expected by the CLI. Copy it or run `jerry plugin init` to get a ready-made
75
- layout with `config/`, `src/<package>/`, and entry-point stubs.
76
-
77
- ---
78
-
79
- ## Pipeline Architecture
80
-
81
- ```text
82
- raw source ──▶ canonical stream ──▶ record stage ──▶ feature stage ──▶ vector stage
83
- ```
84
-
85
- 1. **Raw sources** pair a loader with a parser. Loaders fetch bytes (file system,
86
- HTTP, synthetic generators). Parsers turn those bytes into typed DTOs.
87
- Register them via entry points (`loaders`, `parsers`) and declaratively wire
88
- them in `config/sources/*.yaml`.
89
- 2. **Canonical streams** decorate raw sources with mappers and per-stream
90
- policies. Contract files under `config/contracts/` define record transforms,
91
- feature transforms, sort hints, and partitioning.
92
- 3. **Record stage** applies canonical policies to DTOs, turning them into
93
- `TemporalRecord` instances (tz-aware timestamp + numeric value).
94
- 4. **Feature stage** wraps records into `FeatureRecord`s, handles per-feature
95
- sorting, optional scaling, and sequence windows (`FeatureRecordSequence`).
96
- 5. **Vector stage** merges all feature streams, buckets them using `group_by`
97
- cadence (e.g., `1h`), and emits `(group_key, Vector)` pairs ready for
98
- downstream consumers.
99
-
100
- The runtime (`src/datapipeline/runtime.py`) hosts registries for sources,
101
- transforms, artifacts, and postprocess rules. The CLI constructs lightweight
102
- `PipelineContext` objects to build iterators without mutating global state.
103
-
104
- ---
105
-
106
- ## Configuration Files
107
-
108
- All project configuration lives under `config/datasets/<name>/` by default.
109
-
110
- ### `project.yaml`
111
-
112
- ```yaml
113
- version: 1
114
- name: default
115
- paths:
116
- streams: ../../contracts
117
- sources: ../../sources
118
- dataset: dataset.yaml
119
- postprocess: postprocess.yaml
120
- artifacts: ../../build/datasets/${project_name}
121
- build: build.yaml
122
- run: run.yaml
123
- globals:
124
- start_time: 2021-01-01T00:00:00Z
125
- end_time: 2023-01-03T23:00:00Z
126
- split:
127
- mode: hash # hash | time
128
- key: group # group | feature:<id>
129
- seed: 42
130
- ratios: { train: 0.8, val: 0.1, test: 0.1 }
131
- ```
132
-
133
- - `name` provides a stable identifier you can reuse inside config files via `${project_name}`.
134
- - `paths.*` are resolved relative to the project file unless absolute; they also support `${var}` interpolation.
135
- - `globals` provide values for `${var}` interpolation across YAML files. Datetime
136
- values are normalized to strict UTC `YYYY-MM-DDTHH:MM:SSZ`.
137
- - `split` config defines how labels are assigned; the active label is selected by `run.yaml` or CLI `--keep`.
138
- - `paths.run` may point to a single file (default) or a directory. When it is a directory,
139
- every `*.yaml` file inside is treated as a run config; `jerry serve` executes them
140
- sequentially in alphabetical order unless you pass `--run <name>` (filename stem).
141
- - Label names are free-form: match whatever keys you declare in `split.ratios` (hash) or `split.labels` (time).
142
-
143
- ### `run.yaml`
144
-
145
- ```yaml
146
- version: 1
147
- keep: train # set to any label defined in globals.split (null disables filtering)
148
- output: print # override to 'stream' or a .pt path for binary dumps
149
- limit: 100 # cap vectors per serve run (null = unlimited)
150
- include_targets: false
151
- throttle_ms: null # sleep between vectors (milliseconds)
152
- log_level: INFO # DEBUG=progress bars, INFO=spinner, WARNING=quiet (null inherits CLI)
153
- ```
154
-
155
- - `keep` selects the currently served split. This file is referenced by `project.paths.run`.
156
- - `output`, `limit`, `include_targets`, `throttle_ms`, and `log_level` provide defaults for `jerry serve`; CLI flags still win per invocation.
157
- - Override `keep` (and other fields) per invocation via `jerry serve ... --keep val` etc.
158
- - To manage multiple runs, point `project.paths.run` at a folder (e.g., `config/datasets/default/runs/`)
159
- and drop additional `*.yaml` files there. `jerry serve` will run each file in order; pass
160
- `--run train` to execute only `runs/train.yaml`.
161
-
162
- ### `config/sources/<alias>.yaml`
163
-
164
- Each file defines a loader/parser pair exposed under `<alias>` (also the
165
- `source_id` the rest of the pipeline references).
166
-
167
- ```yaml
168
- source_id: demo_weather
169
- loader:
170
- entrypoint: demo.csv_loader
171
- args:
172
- path: data/weather.csv
173
- parser:
174
- entrypoint: demo.weather_parser
175
- args:
176
- timezone: UTC
177
- ```
178
-
179
- ### `config/contracts/<alias>.yaml`
180
-
181
- Canonical stream contracts describe how the runtime should map and prepare a
182
- source. `alias` normally matches the source alias; use folders to organize by
183
- domain.
184
-
185
- ```yaml
186
- source_id: demo_weather
187
- stream_id: demo_weather
188
-
189
- mapper:
190
- entrypoint: weather.domain.mapper
191
- args: {}
192
-
193
- partition_by: station
194
- sort_batch_size: 50000
195
-
196
- record:
197
- - filter: { operator: ge, field: time, comparand: "${start_time}" }
198
- - filter: { operator: lt, field: time, comparand: "${end_time}" }
199
- - floor_time: { resolution: 10m }
200
-
201
- stream:
202
- - ensure_ticks: { tick: 10m }
203
- - granularity: { mode: mean }
204
- - fill: { statistic: median, window: 6, min_samples: 2 }
205
-
206
- debug:
207
- - lint: { mode: warn, tick: 10m }
208
- ```
209
-
210
- - `record`: ordered record-level transforms (filters, floor/lag, custom
211
- transforms registered under the `record` entry-point group).
212
- - `stream`: transforms applied after feature wrapping, still per base feature.
213
- - `debug`: instrumentation-only transforms (linters, assertions).
214
- - `partition_by`: optional keys used to suffix feature IDs (e.g., `temp__station=XYZ`).
215
- - `sort_batch_size`: chunk size used by the in-memory sorter when normalizing
216
- order before stream transforms.
217
-
218
- ### `dataset.yaml`
219
-
220
- Defines which canonical streams become features/targets and the vector bucketing.
221
-
222
- ```yaml
223
- group_by: 1h
224
-
225
- features:
226
- - id: temp_c
227
- record_stream: demo_weather
228
- scale: true
229
- sequence: { size: 6, stride: 1, tick: 10m }
230
-
231
- targets:
232
- - id: precip
233
- record_stream: demo_weather
234
- ```
235
-
236
- - `group_by` controls the cadence for vector partitioning (accepts `Xm|min|Xh`
237
- — minutes or hours).
238
- - `scale: true` inserts the standard scaler feature transform (requires scaler
239
- stats artifact or inline statistics).
240
- - `sequence` emits `FeatureRecordSequence` windows (size, stride, optional
241
- cadence enforcement via `tick`).
242
-
243
- ### `postprocess.yaml`
244
-
245
- Project-scoped vector transforms that run after assembly and before serving.
246
-
247
- ```yaml
248
- - drop_missing:
249
- required: [temp_c__station=001]
250
- min_coverage: 0.95
251
- - fill_constant: { value: 0.0 }
252
- - fill_history:
253
- statistic: median
254
- window: 48
255
- min_samples: 6
256
- - fill_horizontal:
257
- statistic: mean
258
- min_samples: 2
259
- ```
260
-
261
- - Vector transforms rely on artifacts (expected IDs, scaler stats) to decide
262
- which features should be present.
263
- - When no transforms are configured the stream passes through unchanged.
264
-
265
- ### `build.yaml`
266
-
267
- Declares which artifacts the build step should materialize.
268
-
269
- ```yaml
270
- version: 1
271
- partitioned_ids:
272
- output: expected.txt
273
- include_targets: false
274
- scaler:
275
- enabled: true
276
- output: scaler.pkl
277
- include_targets: false
278
- split_label: train
279
- ```
280
-
281
- - `expected.txt` lists every fully partitioned feature ID observed in the latest
282
- run (used by vector postprocess transforms).
283
- - `scaler.pkl` is a pickled standard scaler fitted on the requested split.
284
-
285
- ---
286
-
287
- ## CLI Reference
288
-
289
- All commands live under the `jerry` entry point (`src/datapipeline/cli/app.py`).
290
- Pass `--help` on any command for flags.
291
-
292
- ### Preview Stages
293
-
294
- - `jerry serve --project <project.yaml> --stage <0-7> --limit N [--log-level LEVEL]`
295
- - Stage 0: raw DTOs
296
- - Stage 1: domain `TemporalRecord`s
297
- - Stage 2: record transforms applied
298
- - Stage 3: feature records (before sort/regularization)
299
- - Stage 4: feature regularization (post stream transforms)
300
- - Stage 5: feature transforms/sequence outputs
301
- - Stage 6: vectors assembled (no postprocess)
302
- - Stage 7: vectors + postprocess transforms
303
- - Use `--log-level DEBUG` for progress bars, `--log-level INFO` for spinner + prints, or the default (`WARNING`) for minimal output.
304
- - `jerry serve --project <project.yaml> --output print|stream|path.pt|path.csv|path.jsonl.gz --limit N [--include-targets] [--log-level LEVEL] [--run name]`
305
- - Applies postprocess transforms and optional dataset split before emitting.
306
- - Set `--log-level DEBUG` (or set `run.yaml` -> `log_level: DEBUG`) to reuse the tqdm progress bars when previewing stages.
307
- - When `project.paths.run` is a directory, add `--run val` (filename stem) to target a single config; otherwise every run file is executed sequentially.
308
- - Argument precedence: CLI flags > run.yaml > built‑in defaults.
309
-
310
- ### Build & Quality
311
-
312
- - `jerry inspect report --project <project.yaml> [--threshold 0.95] [--include-targets]`
313
- - Prints coverage summary (keep/below lists) and writes `coverage.json` under
314
- the artifacts directory.
315
- - Add `--matrix csv|html` to persist an availability matrix.
316
- - `jerry inspect partitions --project <project.yaml> [--include-targets]`
317
- - Writes discovered partition suffixes to `partitions.json`.
318
- - `jerry inspect expected --project <project.yaml> [--include-targets]`
319
- - Writes the full set of observed feature IDs to `expected.txt`.
320
- - `jerry build --project <project.yaml> [--force]`
321
- - Regenerates artifacts declared in `build.yaml` if configuration hash changed.
322
-
323
- ### Scaffolding & Reference
324
-
325
- - `jerry plugin init --name <package> --out <dir>`
326
- - Generates a plugin project (pyproject, package skeleton, config templates).
327
- - `jerry source add --provider <name> --dataset <slug> --transport fs|url|synthetic --format csv|json|json-lines`
328
- - Creates loader/parser stubs, updates entry points, and drops a matching
329
- source YAML.
330
- - `jerry domain add --domain <name>`
331
- - Adds a `domains/<name>/` package with a `model.py` stub.
332
- - `jerry filter create --name <identifier>`
333
- - Scaffolds an entry-point-ready filter (helpful for custom record predicates).
334
- - `jerry list sources|domains`
335
- - Introspect configured source aliases or domain packages.
336
-
337
- ---
338
-
339
- ## Transform & Filter Library
340
-
341
- ### Record Filters (`config/contracts[].record`)
342
-
343
- - Binary comparisons: `eq`, `ne`, `lt`, `le`, `gt`, `ge` (timezone-aware for ISO
344
- or datetime literals).
345
- - Membership: `in`, `nin`.
346
- ```yaml
347
- - filter: { operator: ge, field: time, comparand: "${start_time}" }
348
- - filter: { operator: in, field: station, comparand: [a, b, c] }
349
- ```
350
-
351
- ### Record Transforms
352
-
353
- - `floor_time`: snap timestamps down to the nearest resolution (`10m`, `1h`, …).
354
- - `lag`: add lagged copies of records (see `src/datapipeline/transforms/record/lag.py` for options).
355
-
356
- ### Stream (Feature) Transforms
357
-
358
- - `ensure_ticks`: backfill missing ticks with `value=None` records to enforce a
359
- strict cadence.
360
- - `granularity`: merge duplicate timestamps using `first|last|mean|median`.
361
- - `fill`: rolling statistic-based imputation within each feature stream.
362
- - Custom transforms can be registered under the `stream` entry-point group.
363
-
364
- ### Feature Transforms
365
-
366
- - `scale`: wraps `StandardScalerTransform`. Read statistics from the build
367
- artifact or accept inline `statistics`.
368
- ```yaml
369
- scale:
370
- with_mean: true
371
- with_std: true
372
- statistics:
373
- temp_c__station=001: { mean: 10.3, std: 2.1 }
374
- ```
375
-
376
- ### Sequence Transforms
377
-
378
- - `sequence`: sliding window generator (`size`, `stride`, optional `tick` to
379
- enforce gaps). Emits `FeatureRecordSequence` payloads with `.records`.
380
-
381
- ### Vector (Postprocess) Transforms
382
-
383
- - `drop_missing`: drop vectors that do not meet required IDs or coverage ratio.
384
- - `fill_constant`: seed absent IDs with a constant.
385
- - `fill_history`: impute using rolling statistics from prior vectors.
386
- - `fill_horizontal`: aggregate sibling partitions in the same timestamp.
387
-
388
- All transforms share a consistent entry-point signature and accept their config
389
- dict as keyword arguments. Register new ones in `pyproject.toml` under the
390
- appropriate group (`record`, `stream`, `feature`, `sequence`, `vector`,
391
- `filters`, `debug`).
392
-
393
- ---
394
-
395
- ## Artifacts & Postprocess
396
-
397
- - `expected.txt`: newline-delimited full feature IDs. Required by drop/fill
398
- transforms to know the target feature universe.
399
- - `scaler.pkl`: pickled standard scaler fitted on the configured split. Loaded
400
- lazily by feature transforms at runtime.
401
- - Build state is tracked in `artifacts/build/state.json`; config hashes avoid
402
- redundant runs.
403
-
404
- If a postprocess transform needs an artifact and it is missing, the runtime will
405
- raise a descriptive error suggesting `jerry build`.
406
-
407
- ---
408
-
409
- ## Splitting & Serving
410
-
411
- If `project.globals.split` is present, `jerry serve` filters vectors at the
412
- end of the pipeline:
413
-
414
- - `mode: hash` – deterministic entity hash using either the group key or a
415
- specified feature ID.
416
- - `mode: time` – boundary-based slicing using timestamp labels.
417
- - `run.keep` (or CLI `--keep`) selects the active slice; use any label name defined in your split config.
418
-
419
- The split configuration never mutates stored artifacts; it is only applied when
420
- serving vectors (either via CLI or the Python integrations).
421
-
422
- ---
423
-
424
- ## Python Integrations
425
-
426
- `datapipeline.integrations.ml` demonstrates how to reuse the runtime from
427
- application code:
428
-
429
- - `VectorAdapter.from_project(project_yaml)` – bootstrap once, then stream
430
- vectors or row dicts.
431
- - `stream_vectors(project_yaml, limit=...)` – iterator matching `jerry serve`.
432
- - `iter_vector_rows` / `collect_vector_rows` – handy for Pandas or custom sinks.
433
- - `dataframe_from_vectors` – eager helper that returns a Pandas DataFrame
434
- (requires `pandas`).
435
- - `torch_dataset` – builds a `torch.utils.data.Dataset` that yields tensors. See
436
- `examples/minimal_project/run_torch.py` for usage.
437
-
438
- ---
439
-
440
- ## Extending the Runtime
441
-
442
- ### Entry Points
443
-
444
- Register custom components in your plugin’s `pyproject.toml`:
445
-
446
- ```toml
447
- [project.entry-points."datapipeline.loaders"]
448
- demo.csv_loader = "my_datapipeline.loaders.csv:CsvLoader"
449
-
450
- [project.entry-points."datapipeline.parsers"]
451
- demo.weather_parser = "my_datapipeline.parsers.weather:WeatherParser"
452
-
453
- [project.entry-points."datapipeline.mappers"]
454
- weather.domain.mapper = "my_datapipeline.mappers.weather:DomainMapper"
455
-
456
- [project.entry-points."datapipeline.stream"]
457
- weather.fill = "my_datapipeline.transforms.weather:CustomFill"
458
- ```
459
-
460
- Loader, parser, mapper, and transform classes should provide a callable
461
- interface (usually `__call__`) matching the runtime expectations. Refer to the
462
- built-in implementations in `src/datapipeline/sources/`, `src/datapipeline/transforms/`,
463
- and `src/datapipeline/filters/`.
464
-
465
- ### Scaffolding Helpers
466
-
467
- - `datapipeline.services.scaffold.plugin.scaffold_plugin` – invoked by
468
- `jerry plugin init`.
469
- - `datapipeline.services.scaffold.source.create_source` – writes loader/parser
470
- stubs and updates entry points.
471
- - `datapipeline.services.scaffold.domain.create_domain` – domain DTO skeleton.
472
- - `datapipeline.services.scaffold.filter.create_filter` – custom filter stub.
473
- - `datapipeline.services.scaffold.mappers.attach_source_to_domain` – helper for
474
- programmatically wiring sources to domain mappers and emitting stream
475
- contracts (useful in custom automation or tests).
476
-
477
- ---
478
-
479
- ## Development Workflow
480
-
481
- - Install dependencies: `pip install -e .[dev]`.
482
- - Run tests: `pytest`.
483
- - When iterating on configs, use `jerry serve --stage <n>` to peek into problematic
484
- stages.
485
- - After tuning transforms, refresh artifacts: `jerry build`.
486
- - Use `jerry inspect report --include-targets` to ensure targets meet coverage
487
- gates before handing vectors to downstream consumers.
488
-
489
- ---
490
-
491
- ## Additional Resources
492
-
493
- - `src/datapipeline/analysis/vector_analyzer.py` – quality metrics collected by
494
- the inspect commands.
495
- - `src/datapipeline/pipeline/` – pure functions that wire each stage.
496
- - `src/datapipeline/services/bootstrap/` – runtime initialization and
497
- registry population (see `core.py`).
498
- - `examples/minimal_project/` – runnable demo showing config layout and Torch
499
- integration.
500
-
501
- Happy shipping! Build, inspect, and serve consistent time-series features with
502
- confidence.
@@ -1,139 +0,0 @@
1
- datapipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- datapipeline/plugins.py,sha256=Y0QfI313t5_w_m1ayQVEuac3lJ4YR_OSIYZol35ZOTk,838
3
- datapipeline/runtime.py,sha256=zNZmJNA6OTDl9NXGJikm5pRM7EDGAFtv4B_ErzDxVsY,2484
4
- datapipeline/analysis/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
- datapipeline/analysis/vector_analyzer.py,sha256=D6eDW0H55QGnWiULXJEirMjw6MeBcwjJ4zfS7M6tx98,175
6
- datapipeline/analysis/vector/collector.py,sha256=S-CHdKfWeRP7TuxQdi22z9tJ9rt8VtbN48uJa-dgvBg,10233
7
- datapipeline/analysis/vector/matrix.py,sha256=mbMJoj7rzf6wQMV-1HbS4OvriWcfkF40ox8bYet-0zg,17861
8
- datapipeline/analysis/vector/report.py,sha256=lPL1he-tf5MeO2TFPteOiFLov6c6JgEStsvwWFvjrao,11205
9
- datapipeline/build/__init__.py,sha256=XbuHhJzIRTNnOOJMYa_BHiA0P8yPrEk4tuVXnD2NQbI,109
10
- datapipeline/build/state.py,sha256=XsL2CtQl7x80OpE1SJE42D-ig3lBMPr7_HSlpq5xwU4,1826
11
- datapipeline/build/tasks.py,sha256=j2DzVTOjfkcEHb4HQA1fMC5_7o-4i_ffyv_fZMw71TE,6367
12
- datapipeline/cli/app.py,sha256=V2vuFfkOJemKEImN21uUENBYqTODNY_JcxS5WwTyZFE,16416
13
- datapipeline/cli/commands/build.py,sha256=N1ekzK4lO6W4Dfuw58jvOHtHsdMM8rjZqD5CgGVDSmo,1641
14
- datapipeline/cli/commands/domain.py,sha256=KZ0dn08k7wdjBWmJPgeQ0ogI7fQfhCmkcckIpTlMYD0,308
15
- datapipeline/cli/commands/filter.py,sha256=vhoCIETJNUJmiI37ZdBNaeJAm6O4AU_tveJxVj47S8A,307
16
- datapipeline/cli/commands/inspect.py,sha256=YXQmLAij3tkOlXp0TLioB3v_rBOpuH0ZeJPVLtI07Bk,7916
17
- datapipeline/cli/commands/link.py,sha256=vvGlVrEXQtqIcxETwMXvQq1NWVHuGsNCDD8VxIoV57c,4907
18
- datapipeline/cli/commands/list_.py,sha256=cCrx35CPNMyAaOVvVxkZje4DAx7K7HdGpngzZwhNn40,943
19
- datapipeline/cli/commands/plugin.py,sha256=9AKFnebNbUkuZ5RzFBAPfKp9NERwd4uhrhPctiPLtYM,331
20
- datapipeline/cli/commands/run.py,sha256=525eqyeT8of4jG6O41VHtfu-kJrpL1OYMUXMMqBElGE,9049
21
- datapipeline/cli/commands/source.py,sha256=U21LAOiX10dXSmBGDGC2gjJC_GoIQ8Pmbq_E6Gil-kE,822
22
- datapipeline/cli/commands/writers.py,sha256=oLlMutWH6xHYQjR_zP59dJw4tDGFgexgN8RubeAICYk,4287
23
- datapipeline/cli/visuals/__init__.py,sha256=3VAWQt8tEKN_mIyhPAKqBqBy38fulA_DFVtfIPWaCnM,297
24
- datapipeline/cli/visuals/labels.py,sha256=3Dk7RoCO4hyxpEiBV9ltmZd9Fr9i8lroxnBjNGbyO-M,2604
25
- datapipeline/cli/visuals/sources.py,sha256=Z6pjPq46n0KbDXJfmVpLxV-z7e9wQhJZ03nQREA8yKQ,4160
26
- datapipeline/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
27
- datapipeline/config/build.py,sha256=HcwOjuIfwM9PbMPaHncfpIhhI1WvfXWvgeRvrJKt_5c,1999
28
- datapipeline/config/catalog.py,sha256=ezGSy2yFybnYZ2EHU9IjDdeFVHgHhQbePAHGXFPY9b8,1012
29
- datapipeline/config/postprocess.py,sha256=E9Ic_yPCXVOmCRpmsFsHVI1t8JdIrT9nNqHn9H7kPg0,415
30
- datapipeline/config/project.py,sha256=GymhVVRbL-k0l7POKCRFfRUt6OMIJGpxp0b2i5JbuNw,988
31
- datapipeline/config/run.py,sha256=6SLmXC_wFJtyNKKUa2L3ZclQ6DR_OdG0h9vq-ylfnsU,3726
32
- datapipeline/config/split.py,sha256=VFYRF6Fz5xLTqqxIt3RVGB4kwlnHH8CxjOddEAJYG5Q,1048
33
- datapipeline/config/dataset/dataset.py,sha256=Q9cb5QoDtyPb4pbD9mSTZcJmXQhdEWwDLS52xKAcqXg,562
34
- datapipeline/config/dataset/feature.py,sha256=2Hxz0FXZskLI4ICXhmlG6b1Vvxzh0Ql9e6BwjMRtzSs,346
35
- datapipeline/config/dataset/loader.py,sha256=C_2zewx6Hk1MsQQSwctZaH2d3KPDzjnq5L08g5JMdLU,650
36
- datapipeline/config/dataset/normalize.py,sha256=66yvPvbQyef6qQtyJOGTkAo5Q5ldDqpk-XIHx9hYH4c,825
37
- datapipeline/domain/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
38
- datapipeline/domain/feature.py,sha256=7BOI4H458BKU8B9vqdfez7WOO1YKiF6lt0oy7PMbqrQ,295
39
- datapipeline/domain/record.py,sha256=Q-QjuR3FbQ01_vJ5LT9k8E40C-oYmOvWdbqtwtfIHPM,709
40
- datapipeline/domain/vector.py,sha256=apK1iu7tca2k2xgNGJAAQfKhirno7ZKZ3pDheKf9euM,1041
41
- datapipeline/filters/filters.py,sha256=dM6U-QpGCQQ4-CMBTJgWZp2zH2TVTk6uYOqGPC5NBCY,2649
42
- datapipeline/integrations/__init__.py,sha256=tjTLsIa6NRWKI05wjwPAUuXozDA-gP98SccFJ9lYHs8,410
43
- datapipeline/integrations/ml/__init__.py,sha256=oflJXnjQEn1Zv0Vho10mc2y3D6UkKusNZwE5yUtatb8,463
44
- datapipeline/integrations/ml/adapter.py,sha256=Xy4VXJ0pi5Ydsv_0MAYXpXGf_GY84SPwSseMfHsrVk8,3979
45
- datapipeline/integrations/ml/pandas_support.py,sha256=pSpSOFpstxD9gOwOCd-XuQ8bfkVPv6VRkxnfXTgJUWI,1305
46
- datapipeline/integrations/ml/rows.py,sha256=KyGl36LI7F-LhjZQnal0bhlR2TPB9gDchKtOwDyPgt8,2219
47
- datapipeline/integrations/ml/torch_support.py,sha256=RBuvEYD-YV-daC63yth6QIC4SnEWY_3KCV5ifvwHCmw,2799
48
- datapipeline/mappers/noop.py,sha256=L8bH1QVbLH-ogIam0ppYdx7KuWQ7Dj44lvD8tvNlY0Q,111
49
- datapipeline/mappers/synthetic/time.py,sha256=lt1pC0May6Y4E8bZO4sERm3D04_r-qv63Y5fwrtCaBQ,639
50
- datapipeline/parsers/identity.py,sha256=pdGuz0SSQGfySPpvZSnLgfTXTkC36x-7dQMMei3XhsU,321
51
- datapipeline/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
52
- datapipeline/pipeline/context.py,sha256=0dlm8hRZn6fqXvgGg0zydHaibKwUQoxBM6seh2lmfTU,1914
53
- datapipeline/pipeline/pipelines.py,sha256=7X0BpJQcxHG6UCE4mCVjAvD2adyqQyO4wp_K5GFfAbo,2882
54
- datapipeline/pipeline/split.py,sha256=tvyerkvjZBF4WKI8VKEmx8deE-SFIG1bSUMgj62gBWk,6122
55
- datapipeline/pipeline/stages.py,sha256=Ehx2TeuIWdMW1fGI3uG6cowMUhOFhw7j2RYdWaI49d0,5704
56
- datapipeline/pipeline/utils/keygen.py,sha256=wg7dBlHUp9az4BNwiiYZ84A1VIbohUKoqL7ShC73V60,1400
57
- datapipeline/pipeline/utils/memory_sort.py,sha256=iPWcD81xtZZ8SXEX_Ph-hUCnpKlJMFybbxPZVOQdSs8,735
58
- datapipeline/pipeline/utils/ordering.py,sha256=ZX18I7GFtvyMFJB96vWQpTOGwljjeJ6ppCg5a3Av3es,1450
59
- datapipeline/pipeline/utils/transform_utils.py,sha256=CfQGSzM4BCbX5NR1ERP-uRkSqG9dIQLvHnD1-vquCv8,3202
60
- datapipeline/registries/registry.py,sha256=MWWOHz2wT1oHQmovodtEreEuQhvH-i11Y2yXUUgZJhQ,641
61
- datapipeline/services/artifacts.py,sha256=qHDVeDdeMUlYPpe_g0xuo9xUl5Y_aqqFgIPK9ywP_yc,2847
62
- datapipeline/services/constants.py,sha256=LpV5KKCLWSxsKk0sR4236s1aF01oVCi1Xqn60tmtu0M,488
63
- datapipeline/services/entrypoints.py,sha256=NKcSbhGRtBLQXGf-TdujwbVSRH1zb5J-S2jxFPnk6HQ,2504
64
- datapipeline/services/factories.py,sha256=yod9bGp5ErfVbeDym4TC13lez84F8P3wnnyTRuJKBVs,1009
65
- datapipeline/services/paths.py,sha256=xHxos62Y2gjhLggrnrmRqPiLMseK10OX17NJjnVk8wE,966
66
- datapipeline/services/project_paths.py,sha256=_Td5sFLRqtfXBVwYxCeuBuh_3s5V8ymo3GPgLv4aXI8,3249
67
- datapipeline/services/bootstrap/__init__.py,sha256=Mc2w2S69kU1hnzCvsGMhFqyNoNMXPwQtxprAkGN-sYE,245
68
- datapipeline/services/bootstrap/config.py,sha256=rIhp0J8j4ZQIaJ_8Y7ioa8sYGj4O-e9Q6q8MG8L9RmU,4207
69
- datapipeline/services/bootstrap/core.py,sha256=M9c8DujiJlaWXYyIGdLda3YktKrM4riGCWEQcBW-1ag,6497
70
- datapipeline/services/scaffold/__init__.py,sha256=PaQNtYki9Kc7mQPnUtKDPU-rKohLHoXLvFVwdHdbXNM,68
71
- datapipeline/services/scaffold/domain.py,sha256=POYlFTRak3eDjB6u4DtjijGfNzpX3SrVw_BZYcH6G4E,918
72
- datapipeline/services/scaffold/filter.py,sha256=fVb6PokoDnJxDsA9nlaHp-rnZlOA4aAnVJ76jJQQBhs,1060
73
- datapipeline/services/scaffold/mappers.py,sha256=Kef04q2YA1hJ6OG_Fbc_SGVU9CrUwcRjpHEjFz1nt40,1993
74
- datapipeline/services/scaffold/plugin.py,sha256=KZM_mKZsKy_KyyiJjioBzzgD05tt1kCpP2QBF9JFQVU,1701
75
- datapipeline/services/scaffold/source.py,sha256=_kJO4Kpvlpr_93TzLa4JJd2B7nEErwE-xZa1RtVOPRc,5538
76
- datapipeline/services/scaffold/templates.py,sha256=B3YnZpFUZLynijJosTNxZQLXnPP_Y_t1RHqfI1lGOxU,634
77
- datapipeline/sources/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
78
- datapipeline/sources/composed_loader.py,sha256=dUmdAYncL6IJqwLhKpQ3AVyFNyut-_K55isSTr-iDXo,1350
79
- datapipeline/sources/decoders.py,sha256=OEozYeV9bAA0yiWebPRvq_qKt0KXArk5jBJk_2sr7iI,1970
80
- datapipeline/sources/factory.py,sha256=J177Y1TJsAJZimd54bKkJl1c7-rq8e2cs04ie3tHY8E,2097
81
- datapipeline/sources/transports.py,sha256=yBfRLlZxxP4INQ5uFSR8zJecjUx_XlwU6KMQqJUYx44,2080
82
- datapipeline/sources/models/__init__.py,sha256=rS3Rc8L2eXSd2CW9ejRConk9pmV8Zv7QCz3ekSIdcLQ,397
83
- datapipeline/sources/models/base.py,sha256=MAUawd11fII-mxxuSPM4f6H1t1tbyZX_QWhoAgeYUcU,238
84
- datapipeline/sources/models/generator.py,sha256=9hzZ_Tvdc8w_VnFMny_C0lLvhjpPgW22y2aXJGKSvdw,661
85
- datapipeline/sources/models/loader.py,sha256=WWQhyatf0dr6pIgh2Dbtif1r09kbCKq9G4-EX4PN9Fg,1025
86
- datapipeline/sources/models/parser.py,sha256=Ts31aksHLDCw5ovF2D99w9g_j-NnEiZ8x0JHtUxmmXs,226
87
- datapipeline/sources/models/source.py,sha256=PBtbJVdyuRABPGFSwkyDaSmT1BuHk2faL-lUvRFpOAo,796
88
- datapipeline/sources/models/synthetic.py,sha256=FLF2Jvdc06VCriTCliThuQTUXd6NrXIQpksIL8gBIH8,288
89
- datapipeline/sources/synthetic/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
90
- datapipeline/sources/synthetic/time/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
91
- datapipeline/sources/synthetic/time/loader.py,sha256=KSkLbaN4snFsA8U-ZjPoiUlTHX7u6sTH6L5kN828sEQ,1540
92
- datapipeline/sources/synthetic/time/parser.py,sha256=d3GZMQ7L1Qi4LeEm7U3y0_pk0RdhskioQukYyqyoqic,343
93
- datapipeline/templates/plugin_skeleton/README.md,sha256=5bdNVR9gqtXf2gZlTaXFLJrWadsakqqS-L9JJuesBaY,5982
94
- datapipeline/templates/plugin_skeleton/pyproject.toml,sha256=0lmO5Aia9tB81Ez4SxP56DGisekx-palMmGCUzmAl4E,259
95
- datapipeline/templates/plugin_skeleton/config/contracts/time_hour_sin.synthetic.yaml,sha256=FrSy5PPbnEGmZcduRuAdj5UiA7YOpJKSGzgVrsVB5x0,575
96
- datapipeline/templates/plugin_skeleton/config/contracts/time_linear.synthetic.yaml,sha256=3qSV6HZOPRZjQnLZqx2VbQsNzpwhjoYxhRQIcBUDcp4,1250
97
- datapipeline/templates/plugin_skeleton/config/datasets/default/build.yaml,sha256=zl4WCMc5M4Mke0B-pJD2CsdOsdUUQkfuDFDW9JH6bYs,431
98
- datapipeline/templates/plugin_skeleton/config/datasets/default/dataset.yaml,sha256=3LOOVBY36ECCqUf88RyVctlaZyggoQZaJp7Pok6FsBg,297
99
- datapipeline/templates/plugin_skeleton/config/datasets/default/postprocess.yaml,sha256=q-9VhMjB2j-Le4vZ3pZEH-N98DZEzDqXI3RvJXW-ung,310
100
- datapipeline/templates/plugin_skeleton/config/datasets/default/project.yaml,sha256=jsujHRmJ1mxXFGtjxN_wWzHEMfxp8UGY5fvLitXflu4,686
101
- datapipeline/templates/plugin_skeleton/config/datasets/default/runs/run_test.yaml,sha256=9_ZU0Zd1DYZoE3kd_PMZ1xIs6cv4NvbgxA2LVTbRzaw,580
102
- datapipeline/templates/plugin_skeleton/config/datasets/default/runs/run_train.yaml,sha256=fXy9RnBG2I_8VeQrfgQ5aJibqtwi6H0o754xQGgLJ4s,582
103
- datapipeline/templates/plugin_skeleton/config/datasets/default/runs/run_val.yaml,sha256=nqZ_h5jrUhnCzgvhKyv8Wb3hSxfcGF7p5N1VLE15sg0,578
104
- datapipeline/templates/plugin_skeleton/config/sources/time_ticks.yaml,sha256=7tie6CqEmOOK8M629f2WZDWKaUPr9bePMC6Oj8RsqB8,190
105
- datapipeline/templates/plugin_skeleton/src/{{PACKAGE_NAME}}/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
106
- datapipeline/templates/stubs/dto.py.j2,sha256=0tEtjGmCu1ddZee_fjmKyRrx357u3Yo6MKxXIpU8224,877
107
- datapipeline/templates/stubs/filter.py.j2,sha256=3LgRgAL_HRaENOOqQx8NdeM1AUy-T0rtHVTA7N2oWOs,466
108
- datapipeline/templates/stubs/loader_synthetic.py.j2,sha256=9SQBeTBGlZmKs6nSYBKd8nbOPcFHgDx17Mh8xOEQnvs,1285
109
- datapipeline/templates/stubs/mapper.py.j2,sha256=5pKaGUEiUWAgLU4fVTXdx4LNFsIQqI4LX5FYKcayxIY,762
110
- datapipeline/templates/stubs/parser.py.j2,sha256=KDWO_kDogHosqFt4MGki9QncEpdeD-KCkAWqrhLe7Lo,672
111
- datapipeline/templates/stubs/parser_custom.py.j2,sha256=0Nytq43JdTZoyRj-4Mz6HWdMTmOP3VlFuYOB_A_13Vg,580
112
- datapipeline/templates/stubs/record.py.j2,sha256=bzI8Jt0hcInLpn-IlyL2a8-q4VQY4gyZ2Z5BAMB70k4,660
113
- datapipeline/templates/stubs/source.yaml.j2,sha256=mPOfYD3Hyvaw-lSgJslfsP2VqOp7qsg_ePOLvyTeSRw,416
114
- datapipeline/transforms/filter.py,sha256=Jt8wTEIqWqe34s7GVVekcR8OdRozs317sj7Uw08GNOA,1433
115
- datapipeline/transforms/sequence.py,sha256=5i-0w1jQcSHy12rhztBhzyhJ2FdnVbD35NWcPXPi_kQ,3059
116
- datapipeline/transforms/utils.py,sha256=ts6dULY2Pc5fFs7AMd3goN4hDzQkv-6CDLdRH41lG9I,721
117
- datapipeline/transforms/vector.py,sha256=sXDNMlVcqpRFXcMw_oC5nEM1mMQa59AnZG6e_1PrnBQ,7609
118
- datapipeline/transforms/vector_utils.py,sha256=PcStTwRaaunONKZJuwv79bjdfaDcamLcwNLRHjZ5yXw,927
119
- datapipeline/transforms/debug/identity.py,sha256=6bwnEYhMBYw0YPrMccrZPXDOQM4r_-odsKo8Hhpbz10,2515
120
- datapipeline/transforms/debug/lint.py,sha256=6EBzGOfYjJbHzcZIIzVixlvW5RVr7liw6DieuWwxNUM,4057
121
- datapipeline/transforms/feature/model.py,sha256=gB-GP80_P7bzEKJFSM4leRke75yiD4-S5eJ1p8g3JU8,382
122
- datapipeline/transforms/feature/scaler.py,sha256=pcIb-dstNsZQ-T4ZsONkKwkg1fVWpJDV19FyQPM9HpM,5265
123
- datapipeline/transforms/record/floor_time.py,sha256=dKxLjnmBNJmDClPQfuKBEM_lrW-356v8XfQtLog5K2k,627
124
- datapipeline/transforms/record/lag.py,sha256=5wrPyVNFvidvdQddnK6ZeUOI5I8rfXEbzIg6tzKiJu4,536
125
- datapipeline/transforms/stream/ensure_ticks.py,sha256=Q0AwKuRY2nRIOUKoaAINeAWUEuoOzSh30Ug5k8296Kw,1170
126
- datapipeline/transforms/stream/fill.py,sha256=N_ybLUCvaMVvKsFP8-HcGuKqV9hXAnYmV7zyUB-Ugys,3500
127
- datapipeline/transforms/stream/granularity.py,sha256=PzHDGDwyn8P07BCbcFZaorS_7lbAbEdMLqD9Wy61y0M,3376
128
- datapipeline/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
129
- datapipeline/utils/load.py,sha256=7BJIx11DPiEtDaLf-ZOClCQxJ7c8oFcc3JdcnGjl4ig,1327
130
- datapipeline/utils/paths.py,sha256=5Y5rhNbjTiybUHfq9VfRMJ4gUfN9UltonM-4MABEG8w,798
131
- datapipeline/utils/pickle_model.py,sha256=Uyd4AajInyTUpWfSJDDEGLinXeQkHjQUNnyla0owtA4,854
132
- datapipeline/utils/placeholders.py,sha256=epZQ7NifUWI7_7hZKGEkCBDOaMnN9LiqJdI2gvBAEgE,890
133
- datapipeline/utils/time.py,sha256=vOqa2arqwEqbDo-JWEhOFPMnI1E4Ib3i1L-Rt-cGH8c,1072
134
- jerry_thomas-0.3.0.dist-info/licenses/LICENSE,sha256=pkBMylAJF5yChHAkdxwFhEptLGx13i-XFEKh-Sh6DkM,1073
135
- jerry_thomas-0.3.0.dist-info/METADATA,sha256=_zSaetgFZLkGnfG3Gd8UDDWQBZAwKlup-VAN_I-aNJ0,18413
136
- jerry_thomas-0.3.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
137
- jerry_thomas-0.3.0.dist-info/entry_points.txt,sha256=7GKSNCgwbzIqI3_LdbOro2eiAbBvbpoOxAuh2XcqBN0,1669
138
- jerry_thomas-0.3.0.dist-info/top_level.txt,sha256=N8aoNPdPyHefODO4YAm7tqTaUcw0e8LDcqycFTf8TbM,13
139
- jerry_thomas-0.3.0.dist-info/RECORD,,