jerry-thomas 1.0.3__py3-none-any.whl → 2.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (194) hide show
  1. datapipeline/analysis/vector/collector.py +0 -1
  2. datapipeline/build/tasks/config.py +0 -2
  3. datapipeline/build/tasks/metadata.py +0 -2
  4. datapipeline/build/tasks/scaler.py +0 -2
  5. datapipeline/build/tasks/schema.py +0 -2
  6. datapipeline/build/tasks/utils.py +0 -2
  7. datapipeline/cli/app.py +201 -81
  8. datapipeline/cli/commands/contract.py +145 -283
  9. datapipeline/cli/commands/demo.py +13 -0
  10. datapipeline/cli/commands/domain.py +4 -4
  11. datapipeline/cli/commands/dto.py +11 -0
  12. datapipeline/cli/commands/filter.py +2 -2
  13. datapipeline/cli/commands/inspect.py +0 -68
  14. datapipeline/cli/commands/list_.py +30 -13
  15. datapipeline/cli/commands/loader.py +11 -0
  16. datapipeline/cli/commands/mapper.py +82 -0
  17. datapipeline/cli/commands/parser.py +45 -0
  18. datapipeline/cli/commands/run_config.py +1 -3
  19. datapipeline/cli/commands/serve_pipeline.py +5 -7
  20. datapipeline/cli/commands/source.py +106 -18
  21. datapipeline/cli/commands/stream.py +292 -0
  22. datapipeline/cli/visuals/common.py +0 -2
  23. datapipeline/cli/visuals/sections.py +0 -2
  24. datapipeline/cli/workspace_utils.py +0 -3
  25. datapipeline/config/context.py +0 -2
  26. datapipeline/config/dataset/feature.py +1 -0
  27. datapipeline/config/metadata.py +0 -2
  28. datapipeline/config/project.py +0 -2
  29. datapipeline/config/resolution.py +10 -2
  30. datapipeline/config/tasks.py +9 -9
  31. datapipeline/domain/feature.py +3 -0
  32. datapipeline/domain/record.py +7 -7
  33. datapipeline/domain/sample.py +0 -2
  34. datapipeline/domain/vector.py +6 -8
  35. datapipeline/integrations/ml/adapter.py +0 -2
  36. datapipeline/integrations/ml/pandas_support.py +0 -2
  37. datapipeline/integrations/ml/rows.py +0 -2
  38. datapipeline/integrations/ml/torch_support.py +0 -2
  39. datapipeline/io/output.py +0 -2
  40. datapipeline/io/serializers.py +26 -16
  41. datapipeline/mappers/synthetic/time.py +9 -2
  42. datapipeline/pipeline/artifacts.py +3 -5
  43. datapipeline/pipeline/observability.py +0 -2
  44. datapipeline/pipeline/pipelines.py +118 -34
  45. datapipeline/pipeline/stages.py +54 -18
  46. datapipeline/pipeline/utils/spool_cache.py +142 -0
  47. datapipeline/pipeline/utils/transform_utils.py +27 -2
  48. datapipeline/services/artifacts.py +1 -4
  49. datapipeline/services/constants.py +1 -0
  50. datapipeline/services/factories.py +4 -6
  51. datapipeline/services/paths.py +10 -1
  52. datapipeline/services/project_paths.py +0 -2
  53. datapipeline/services/runs.py +0 -2
  54. datapipeline/services/scaffold/contract_yaml.py +76 -0
  55. datapipeline/services/scaffold/demo.py +141 -0
  56. datapipeline/services/scaffold/discovery.py +115 -0
  57. datapipeline/services/scaffold/domain.py +21 -13
  58. datapipeline/services/scaffold/dto.py +31 -0
  59. datapipeline/services/scaffold/filter.py +2 -1
  60. datapipeline/services/scaffold/layout.py +96 -0
  61. datapipeline/services/scaffold/loader.py +61 -0
  62. datapipeline/services/scaffold/mapper.py +116 -0
  63. datapipeline/services/scaffold/parser.py +56 -0
  64. datapipeline/services/scaffold/plugin.py +14 -2
  65. datapipeline/services/scaffold/source_yaml.py +91 -0
  66. datapipeline/services/scaffold/stream_plan.py +129 -0
  67. datapipeline/services/scaffold/utils.py +187 -0
  68. datapipeline/sources/data_loader.py +0 -2
  69. datapipeline/sources/decoders.py +49 -8
  70. datapipeline/sources/factory.py +9 -6
  71. datapipeline/sources/foreach.py +18 -3
  72. datapipeline/sources/synthetic/time/parser.py +1 -1
  73. datapipeline/sources/transports.py +10 -4
  74. datapipeline/templates/demo_skeleton/demo/contracts/equity.ohlcv.yaml +33 -0
  75. datapipeline/templates/demo_skeleton/demo/contracts/time.ticks.hour_sin.yaml +22 -0
  76. datapipeline/templates/demo_skeleton/demo/contracts/time.ticks.linear.yaml +22 -0
  77. datapipeline/templates/demo_skeleton/demo/data/APPL.jsonl +19 -0
  78. datapipeline/templates/demo_skeleton/demo/data/MSFT.jsonl +19 -0
  79. datapipeline/templates/demo_skeleton/demo/dataset.yaml +19 -0
  80. datapipeline/templates/demo_skeleton/demo/postprocess.yaml +19 -0
  81. datapipeline/templates/demo_skeleton/demo/project.yaml +19 -0
  82. datapipeline/templates/demo_skeleton/demo/sources/sandbox.ohlcv.yaml +17 -0
  83. datapipeline/templates/{plugin_skeleton/example → demo_skeleton/demo}/sources/synthetic.ticks.yaml +1 -1
  84. datapipeline/templates/demo_skeleton/demo/tasks/metadata.yaml +2 -0
  85. datapipeline/templates/demo_skeleton/demo/tasks/scaler.yaml +3 -0
  86. datapipeline/templates/demo_skeleton/demo/tasks/schema.yaml +2 -0
  87. datapipeline/templates/demo_skeleton/demo/tasks/serve.test.yaml +4 -0
  88. datapipeline/templates/demo_skeleton/demo/tasks/serve.train.yaml +4 -0
  89. datapipeline/templates/demo_skeleton/demo/tasks/serve.val.yaml +4 -0
  90. datapipeline/templates/demo_skeleton/scripts/run_dataframe.py +20 -0
  91. datapipeline/templates/demo_skeleton/scripts/run_torch.py +23 -0
  92. datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/__init__.py +0 -0
  93. datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/domains/equity/__init__.py +0 -0
  94. datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/domains/equity/model.py +18 -0
  95. datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/dtos/__init__.py +0 -0
  96. datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/dtos/sandbox_ohlcv_dto.py +14 -0
  97. datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/mappers/__init__.py +0 -0
  98. datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/mappers/map_sandbox_ohlcv_dto_to_equity.py +26 -0
  99. datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/parsers/__init__.py +0 -0
  100. datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/parsers/sandbox_ohlcv_dto_parser.py +46 -0
  101. datapipeline/templates/plugin_skeleton/README.md +57 -136
  102. datapipeline/templates/plugin_skeleton/jerry.yaml +12 -24
  103. datapipeline/templates/plugin_skeleton/reference/jerry.yaml +28 -0
  104. datapipeline/templates/plugin_skeleton/reference/reference/contracts/composed.reference.yaml +29 -0
  105. datapipeline/templates/plugin_skeleton/reference/reference/contracts/ingest.reference.yaml +31 -0
  106. datapipeline/templates/plugin_skeleton/reference/reference/contracts/overview.reference.yaml +34 -0
  107. datapipeline/templates/plugin_skeleton/reference/reference/dataset.yaml +29 -0
  108. datapipeline/templates/plugin_skeleton/reference/reference/postprocess.yaml +25 -0
  109. datapipeline/templates/plugin_skeleton/reference/reference/project.yaml +32 -0
  110. datapipeline/templates/plugin_skeleton/reference/reference/sources/foreach.http.reference.yaml +24 -0
  111. datapipeline/templates/plugin_skeleton/reference/reference/sources/foreach.reference.yaml +21 -0
  112. datapipeline/templates/plugin_skeleton/reference/reference/sources/fs.reference.yaml +16 -0
  113. datapipeline/templates/plugin_skeleton/reference/reference/sources/http.reference.yaml +17 -0
  114. datapipeline/templates/plugin_skeleton/reference/reference/sources/overview.reference.yaml +18 -0
  115. datapipeline/templates/plugin_skeleton/reference/reference/sources/synthetic.reference.yaml +15 -0
  116. datapipeline/templates/plugin_skeleton/reference/reference/tasks/metadata.reference.yaml +11 -0
  117. datapipeline/templates/plugin_skeleton/reference/reference/tasks/scaler.reference.yaml +10 -0
  118. datapipeline/templates/plugin_skeleton/reference/reference/tasks/schema.reference.yaml +10 -0
  119. datapipeline/templates/plugin_skeleton/reference/reference/tasks/serve.reference.yaml +28 -0
  120. datapipeline/templates/plugin_skeleton/src/{{PACKAGE_NAME}}/domains/__init__.py +2 -0
  121. datapipeline/templates/plugin_skeleton/src/{{PACKAGE_NAME}}/dtos/__init__.py +0 -0
  122. datapipeline/templates/plugin_skeleton/src/{{PACKAGE_NAME}}/loaders/__init__.py +0 -0
  123. datapipeline/templates/plugin_skeleton/src/{{PACKAGE_NAME}}/mappers/__init__.py +1 -0
  124. datapipeline/templates/plugin_skeleton/src/{{PACKAGE_NAME}}/parsers/__init__.py +0 -0
  125. datapipeline/templates/plugin_skeleton/your-dataset/dataset.yaml +12 -11
  126. datapipeline/templates/plugin_skeleton/your-dataset/postprocess.yaml +4 -13
  127. datapipeline/templates/plugin_skeleton/your-dataset/project.yaml +9 -11
  128. datapipeline/templates/plugin_skeleton/your-dataset/tasks/metadata.yaml +1 -2
  129. datapipeline/templates/plugin_skeleton/your-dataset/tasks/scaler.yaml +1 -7
  130. datapipeline/templates/plugin_skeleton/your-dataset/tasks/schema.yaml +1 -1
  131. datapipeline/templates/plugin_skeleton/your-dataset/tasks/serve.test.yaml +1 -1
  132. datapipeline/templates/plugin_skeleton/your-dataset/tasks/serve.train.yaml +1 -25
  133. datapipeline/templates/plugin_skeleton/your-dataset/tasks/serve.val.yaml +1 -1
  134. datapipeline/templates/plugin_skeleton/your-interim-data-builder/dataset.yaml +9 -0
  135. datapipeline/templates/plugin_skeleton/your-interim-data-builder/postprocess.yaml +1 -0
  136. datapipeline/templates/plugin_skeleton/your-interim-data-builder/project.yaml +15 -0
  137. datapipeline/templates/plugin_skeleton/your-interim-data-builder/tasks/serve.all.yaml +8 -0
  138. datapipeline/templates/stubs/contracts/composed.yaml.j2 +10 -0
  139. datapipeline/templates/stubs/contracts/ingest.yaml.j2 +25 -0
  140. datapipeline/templates/stubs/dto.py.j2 +2 -2
  141. datapipeline/templates/stubs/filter.py.j2 +1 -1
  142. datapipeline/templates/stubs/loaders/basic.py.j2 +11 -0
  143. datapipeline/templates/stubs/mappers/composed.py.j2 +13 -0
  144. datapipeline/templates/stubs/mappers/ingest.py.j2 +20 -0
  145. datapipeline/templates/stubs/parser.py.j2 +5 -1
  146. datapipeline/templates/stubs/record.py.j2 +1 -1
  147. datapipeline/templates/stubs/source.yaml.j2 +1 -1
  148. datapipeline/transforms/debug/identity.py +34 -16
  149. datapipeline/transforms/debug/lint.py +14 -11
  150. datapipeline/transforms/feature/scaler.py +5 -12
  151. datapipeline/transforms/filter.py +73 -17
  152. datapipeline/transforms/interfaces.py +58 -0
  153. datapipeline/transforms/record/floor_time.py +10 -7
  154. datapipeline/transforms/record/lag.py +8 -10
  155. datapipeline/transforms/sequence.py +2 -3
  156. datapipeline/transforms/stream/dedupe.py +5 -7
  157. datapipeline/transforms/stream/ensure_ticks.py +39 -24
  158. datapipeline/transforms/stream/fill.py +34 -25
  159. datapipeline/transforms/stream/filter.py +25 -0
  160. datapipeline/transforms/stream/floor_time.py +16 -0
  161. datapipeline/transforms/stream/granularity.py +52 -30
  162. datapipeline/transforms/stream/lag.py +17 -0
  163. datapipeline/transforms/stream/rolling.py +72 -0
  164. datapipeline/transforms/utils.py +42 -10
  165. datapipeline/transforms/vector/drop/horizontal.py +0 -3
  166. datapipeline/transforms/vector/drop/orchestrator.py +0 -3
  167. datapipeline/transforms/vector/drop/vertical.py +0 -2
  168. datapipeline/transforms/vector/ensure_schema.py +0 -2
  169. datapipeline/utils/paths.py +0 -2
  170. datapipeline/utils/placeholders.py +0 -2
  171. datapipeline/utils/rich_compat.py +0 -3
  172. datapipeline/utils/window.py +0 -2
  173. jerry_thomas-2.0.1.dist-info/METADATA +269 -0
  174. jerry_thomas-2.0.1.dist-info/RECORD +264 -0
  175. {jerry_thomas-1.0.3.dist-info → jerry_thomas-2.0.1.dist-info}/WHEEL +1 -1
  176. {jerry_thomas-1.0.3.dist-info → jerry_thomas-2.0.1.dist-info}/entry_points.txt +7 -3
  177. datapipeline/services/scaffold/mappers.py +0 -55
  178. datapipeline/services/scaffold/source.py +0 -191
  179. datapipeline/templates/plugin_skeleton/example/contracts/time.ticks.hour_sin.yaml +0 -31
  180. datapipeline/templates/plugin_skeleton/example/contracts/time.ticks.linear.yaml +0 -30
  181. datapipeline/templates/plugin_skeleton/example/dataset.yaml +0 -18
  182. datapipeline/templates/plugin_skeleton/example/postprocess.yaml +0 -29
  183. datapipeline/templates/plugin_skeleton/example/project.yaml +0 -23
  184. datapipeline/templates/plugin_skeleton/example/tasks/metadata.yaml +0 -3
  185. datapipeline/templates/plugin_skeleton/example/tasks/scaler.yaml +0 -9
  186. datapipeline/templates/plugin_skeleton/example/tasks/schema.yaml +0 -2
  187. datapipeline/templates/plugin_skeleton/example/tasks/serve.test.yaml +0 -4
  188. datapipeline/templates/plugin_skeleton/example/tasks/serve.train.yaml +0 -28
  189. datapipeline/templates/plugin_skeleton/example/tasks/serve.val.yaml +0 -4
  190. datapipeline/templates/stubs/mapper.py.j2 +0 -22
  191. jerry_thomas-1.0.3.dist-info/METADATA +0 -827
  192. jerry_thomas-1.0.3.dist-info/RECORD +0 -198
  193. {jerry_thomas-1.0.3.dist-info → jerry_thomas-2.0.1.dist-info}/licenses/LICENSE +0 -0
  194. {jerry_thomas-1.0.3.dist-info → jerry_thomas-2.0.1.dist-info}/top_level.txt +0 -0
@@ -1,29 +0,0 @@
1
- #### example combination of postprocessing steps ######
2
- #### making sure data is complete after these combinations ######
3
- - drop: # example of dropping sparse partitions/vertical-axis for targets
4
- axis: vertical
5
- payload: targets
6
- threshold: 0.9
7
-
8
- - drop: # example of dropping sparse partitions for features
9
- axis: vertical
10
- payload: features
11
- threshold: 0.9
12
-
13
- - drop: # dropping vectors/horizontal-axis that has features which none
14
- axis: horizontal
15
- payload: features
16
- threshold: 1
17
-
18
- - drop:
19
- axis: horizontal
20
- payload: targets
21
- threshold: 1
22
- ######
23
- # - fill:
24
- # statistic: median
25
- # window: 48
26
- # min_samples: 6
27
- # - replace:
28
- # payload: targets
29
- # value: 0.0
@@ -1,23 +0,0 @@
1
- version: 1
2
- name: example
3
- paths:
4
- streams: ./contracts
5
- sources: ./sources
6
- dataset: dataset.yaml
7
- postprocess: postprocess.yaml
8
- artifacts: ../artifacts/${project_name}/v${version}
9
- tasks: ./tasks
10
- globals:
11
- # Globals to use in your .yaml files via ${var_name}.
12
- # Primary dataset cadence; referenced from dataset.yaml (group_by)
13
- # and contracts via ${group_by}.
14
- group_by: 1h
15
- start_time: 2021-01-01T00:00:00Z
16
- end_time: 2021-01-02T00:00:00Z
17
- # Configure deterministic dataset split here (applied at serve time, after postprocess).
18
- # Adjust `ratios` as needed; the active split is selected via serve tasks or CLI.
19
- split:
20
- mode: hash # hash | time (time uses boundaries/labels)
21
- key: group # group | feature:<id> (entity-stable split)
22
- seed: 42 # deterministic hash seed
23
- ratios: { train: 0.8, val: 0.1, test: 0.1 }
@@ -1,3 +0,0 @@
1
- kind: metadata
2
- # window_mode: intersection # union|intersection|strict|relaxed (default: intersection)
3
-
@@ -1,9 +0,0 @@
1
- kind: scaler
2
-
3
- # Output path is relative to project.paths.artifacts; defaults to "scaler.pkl".
4
- # output: scaler.pkl
5
-
6
- # Split label to use when fitting scaler statistics.
7
- # Must match a label from globals.split.ratios.
8
- split_label: train
9
-
@@ -1,2 +0,0 @@
1
- kind: schema
2
-
@@ -1,4 +0,0 @@
1
- kind: serve
2
- name: test
3
- keep: test
4
-
@@ -1,28 +0,0 @@
1
- kind: serve
2
-
3
- # Optional identifier for this serve task; defaults to filename stem.
4
- name: train
5
-
6
- # Active split label to serve; must match a label from globals.split.ratios.
7
- # Set to null to disable split filtering.
8
- keep: train
9
- #output:
10
- # transport: stdout | fs
11
- # format: print | json-lines | json | csv | pickle
12
- # When using fs transport, set a directory (and optionally filename) for outputs:
13
- # directory: artifacts/serve
14
- # filename: vectors.train
15
-
16
- # Default max number of vectors to emit (null = unlimited).
17
- # limit: 5
18
- # Optional pipeline stage preview (0-7); null lets the CLI decide.
19
- # stage: 7
20
-
21
- # Optional pacing between emitted vectors (milliseconds).
22
- # throttle_ms: null
23
-
24
- # Visuals/logging knobs (inherit CLI or jerry.yaml defaults when omitted):
25
- # visuals: AUTO # AUTO | TQDM | RICH | OFF
26
- # progress: AUTO # AUTO | SPINNER | BARS | OFF
27
- # log_level: INFO # CRITICAL | ERROR | WARNING | INFO | DEBUG
28
-
@@ -1,4 +0,0 @@
1
- kind: serve
2
- name: val
3
- keep: val
4
-
@@ -1,22 +0,0 @@
1
- from typing import Any, Iterator
2
-
3
- from {{PACKAGE_NAME}}.domains.{{TARGET_DOMAIN}}.model import {{DomainRecord}}
4
- from {{PACKAGE_NAME}}.sources.{{ORIGIN}}.{{DATASET}}.dto import {{OriginDTO}}
5
-
6
-
7
- def {{FUNCTION_NAME}}(
8
- stream: Iterator[{{OriginDTO}}],
9
- **params: Any,
10
- ) -> Iterator[{{DomainRecord}}]:
11
- """Map raw {{ORIGIN}} DTOs to domain-level {{TARGET_DOMAIN}} records.
12
-
13
- - Required on domain record: time and value.
14
- - Additional options may be passed via kwargs (e.g., variant="..." or mode="...").
15
- """
16
- for dto in stream:
17
- # TODO: construct {{DomainRecord}} from dto fields
18
- yield {{DomainRecord}}(
19
- time=dto.time, # must include time
20
- value=dto.value, # must include value
21
- # additional fields...
22
- )