jerry-thomas 0.3.0__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (164) hide show
  1. datapipeline/analysis/vector/collector.py +120 -17
  2. datapipeline/analysis/vector/matrix.py +33 -8
  3. datapipeline/analysis/vector/report.py +162 -32
  4. datapipeline/build/tasks/__init__.py +11 -0
  5. datapipeline/build/tasks/config.py +74 -0
  6. datapipeline/build/tasks/metadata.py +170 -0
  7. datapipeline/build/tasks/scaler.py +73 -0
  8. datapipeline/build/tasks/schema.py +60 -0
  9. datapipeline/build/tasks/utils.py +169 -0
  10. datapipeline/cli/app.py +304 -127
  11. datapipeline/cli/commands/build.py +240 -16
  12. datapipeline/cli/commands/contract.py +367 -0
  13. datapipeline/cli/commands/domain.py +8 -3
  14. datapipeline/cli/commands/inspect.py +401 -149
  15. datapipeline/cli/commands/list_.py +30 -7
  16. datapipeline/cli/commands/plugin.py +1 -1
  17. datapipeline/cli/commands/run.py +227 -241
  18. datapipeline/cli/commands/run_config.py +101 -0
  19. datapipeline/cli/commands/serve_pipeline.py +156 -0
  20. datapipeline/cli/commands/source.py +44 -8
  21. datapipeline/cli/visuals/__init__.py +4 -2
  22. datapipeline/cli/visuals/common.py +239 -0
  23. datapipeline/cli/visuals/labels.py +15 -15
  24. datapipeline/cli/visuals/runner.py +66 -0
  25. datapipeline/cli/visuals/sections.py +20 -0
  26. datapipeline/cli/visuals/sources.py +132 -119
  27. datapipeline/cli/visuals/sources_basic.py +260 -0
  28. datapipeline/cli/visuals/sources_off.py +76 -0
  29. datapipeline/cli/visuals/sources_rich.py +414 -0
  30. datapipeline/config/catalog.py +37 -3
  31. datapipeline/config/context.py +214 -0
  32. datapipeline/config/dataset/loader.py +21 -4
  33. datapipeline/config/dataset/normalize.py +4 -4
  34. datapipeline/config/metadata.py +43 -0
  35. datapipeline/config/postprocess.py +2 -2
  36. datapipeline/config/project.py +3 -2
  37. datapipeline/config/resolution.py +129 -0
  38. datapipeline/config/tasks.py +309 -0
  39. datapipeline/config/workspace.py +155 -0
  40. datapipeline/domain/__init__.py +12 -0
  41. datapipeline/domain/record.py +11 -0
  42. datapipeline/domain/sample.py +54 -0
  43. datapipeline/integrations/ml/adapter.py +34 -20
  44. datapipeline/integrations/ml/pandas_support.py +0 -2
  45. datapipeline/integrations/ml/rows.py +1 -6
  46. datapipeline/integrations/ml/torch_support.py +1 -3
  47. datapipeline/io/factory.py +112 -0
  48. datapipeline/io/output.py +132 -0
  49. datapipeline/io/protocols.py +21 -0
  50. datapipeline/io/serializers.py +219 -0
  51. datapipeline/io/sinks/__init__.py +23 -0
  52. datapipeline/io/sinks/base.py +2 -0
  53. datapipeline/io/sinks/files.py +79 -0
  54. datapipeline/io/sinks/rich.py +57 -0
  55. datapipeline/io/sinks/stdout.py +18 -0
  56. datapipeline/io/writers/__init__.py +14 -0
  57. datapipeline/io/writers/base.py +28 -0
  58. datapipeline/io/writers/csv_writer.py +25 -0
  59. datapipeline/io/writers/jsonl.py +52 -0
  60. datapipeline/io/writers/pickle_writer.py +30 -0
  61. datapipeline/pipeline/artifacts.py +58 -0
  62. datapipeline/pipeline/context.py +66 -7
  63. datapipeline/pipeline/observability.py +65 -0
  64. datapipeline/pipeline/pipelines.py +65 -13
  65. datapipeline/pipeline/split.py +11 -10
  66. datapipeline/pipeline/stages.py +127 -16
  67. datapipeline/pipeline/utils/keygen.py +20 -7
  68. datapipeline/pipeline/utils/memory_sort.py +22 -10
  69. datapipeline/pipeline/utils/transform_utils.py +22 -0
  70. datapipeline/runtime.py +5 -2
  71. datapipeline/services/artifacts.py +12 -6
  72. datapipeline/services/bootstrap/config.py +25 -0
  73. datapipeline/services/bootstrap/core.py +52 -37
  74. datapipeline/services/constants.py +6 -5
  75. datapipeline/services/factories.py +123 -1
  76. datapipeline/services/project_paths.py +43 -16
  77. datapipeline/services/runs.py +208 -0
  78. datapipeline/services/scaffold/domain.py +3 -2
  79. datapipeline/services/scaffold/filter.py +3 -2
  80. datapipeline/services/scaffold/mappers.py +9 -6
  81. datapipeline/services/scaffold/plugin.py +3 -3
  82. datapipeline/services/scaffold/source.py +93 -56
  83. datapipeline/sources/{composed_loader.py → data_loader.py} +9 -9
  84. datapipeline/sources/decoders.py +83 -18
  85. datapipeline/sources/factory.py +26 -16
  86. datapipeline/sources/models/__init__.py +2 -2
  87. datapipeline/sources/models/generator.py +0 -7
  88. datapipeline/sources/models/loader.py +3 -3
  89. datapipeline/sources/models/parsing_error.py +24 -0
  90. datapipeline/sources/models/source.py +6 -6
  91. datapipeline/sources/synthetic/time/loader.py +14 -2
  92. datapipeline/sources/transports.py +74 -37
  93. datapipeline/templates/plugin_skeleton/README.md +74 -30
  94. datapipeline/templates/plugin_skeleton/example/contracts/time.ticks.hour_sin.yaml +31 -0
  95. datapipeline/templates/plugin_skeleton/example/contracts/time.ticks.linear.yaml +30 -0
  96. datapipeline/templates/plugin_skeleton/example/dataset.yaml +18 -0
  97. datapipeline/templates/plugin_skeleton/example/postprocess.yaml +29 -0
  98. datapipeline/templates/plugin_skeleton/{config/datasets/default → example}/project.yaml +11 -8
  99. datapipeline/templates/plugin_skeleton/example/sources/synthetic.ticks.yaml +12 -0
  100. datapipeline/templates/plugin_skeleton/example/tasks/metadata.yaml +3 -0
  101. datapipeline/templates/plugin_skeleton/example/tasks/scaler.yaml +9 -0
  102. datapipeline/templates/plugin_skeleton/example/tasks/schema.yaml +2 -0
  103. datapipeline/templates/plugin_skeleton/example/tasks/serve.test.yaml +4 -0
  104. datapipeline/templates/plugin_skeleton/example/tasks/serve.train.yaml +28 -0
  105. datapipeline/templates/plugin_skeleton/example/tasks/serve.val.yaml +4 -0
  106. datapipeline/templates/plugin_skeleton/jerry.yaml +28 -0
  107. datapipeline/templates/plugin_skeleton/your-dataset/contracts/time.ticks.hour_sin.yaml +31 -0
  108. datapipeline/templates/plugin_skeleton/your-dataset/contracts/time.ticks.linear.yaml +30 -0
  109. datapipeline/templates/plugin_skeleton/your-dataset/dataset.yaml +18 -0
  110. datapipeline/templates/plugin_skeleton/your-dataset/postprocess.yaml +29 -0
  111. datapipeline/templates/plugin_skeleton/your-dataset/project.yaml +22 -0
  112. datapipeline/templates/plugin_skeleton/your-dataset/sources/synthetic.ticks.yaml +12 -0
  113. datapipeline/templates/plugin_skeleton/your-dataset/tasks/metadata.yaml +3 -0
  114. datapipeline/templates/plugin_skeleton/your-dataset/tasks/scaler.yaml +9 -0
  115. datapipeline/templates/plugin_skeleton/your-dataset/tasks/schema.yaml +2 -0
  116. datapipeline/templates/plugin_skeleton/your-dataset/tasks/serve.test.yaml +4 -0
  117. datapipeline/templates/plugin_skeleton/your-dataset/tasks/serve.train.yaml +28 -0
  118. datapipeline/templates/plugin_skeleton/your-dataset/tasks/serve.val.yaml +4 -0
  119. datapipeline/templates/stubs/dto.py.j2 +2 -0
  120. datapipeline/templates/stubs/mapper.py.j2 +5 -4
  121. datapipeline/templates/stubs/parser.py.j2 +2 -0
  122. datapipeline/templates/stubs/record.py.j2 +2 -0
  123. datapipeline/templates/stubs/source.yaml.j2 +2 -3
  124. datapipeline/transforms/debug/lint.py +26 -41
  125. datapipeline/transforms/feature/scaler.py +89 -13
  126. datapipeline/transforms/record/floor_time.py +4 -4
  127. datapipeline/transforms/sequence.py +2 -35
  128. datapipeline/transforms/stream/dedupe.py +24 -0
  129. datapipeline/transforms/stream/ensure_ticks.py +7 -6
  130. datapipeline/transforms/vector/__init__.py +5 -0
  131. datapipeline/transforms/vector/common.py +98 -0
  132. datapipeline/transforms/vector/drop/__init__.py +4 -0
  133. datapipeline/transforms/vector/drop/horizontal.py +79 -0
  134. datapipeline/transforms/vector/drop/orchestrator.py +59 -0
  135. datapipeline/transforms/vector/drop/vertical.py +182 -0
  136. datapipeline/transforms/vector/ensure_schema.py +184 -0
  137. datapipeline/transforms/vector/fill.py +87 -0
  138. datapipeline/transforms/vector/replace.py +62 -0
  139. datapipeline/utils/load.py +24 -3
  140. datapipeline/utils/rich_compat.py +38 -0
  141. datapipeline/utils/window.py +76 -0
  142. jerry_thomas-1.0.0.dist-info/METADATA +825 -0
  143. jerry_thomas-1.0.0.dist-info/RECORD +199 -0
  144. {jerry_thomas-0.3.0.dist-info → jerry_thomas-1.0.0.dist-info}/entry_points.txt +9 -8
  145. datapipeline/build/tasks.py +0 -186
  146. datapipeline/cli/commands/link.py +0 -128
  147. datapipeline/cli/commands/writers.py +0 -138
  148. datapipeline/config/build.py +0 -64
  149. datapipeline/config/run.py +0 -116
  150. datapipeline/templates/plugin_skeleton/config/contracts/time_hour_sin.synthetic.yaml +0 -24
  151. datapipeline/templates/plugin_skeleton/config/contracts/time_linear.synthetic.yaml +0 -23
  152. datapipeline/templates/plugin_skeleton/config/datasets/default/build.yaml +0 -9
  153. datapipeline/templates/plugin_skeleton/config/datasets/default/dataset.yaml +0 -14
  154. datapipeline/templates/plugin_skeleton/config/datasets/default/postprocess.yaml +0 -13
  155. datapipeline/templates/plugin_skeleton/config/datasets/default/runs/run_test.yaml +0 -10
  156. datapipeline/templates/plugin_skeleton/config/datasets/default/runs/run_train.yaml +0 -10
  157. datapipeline/templates/plugin_skeleton/config/datasets/default/runs/run_val.yaml +0 -10
  158. datapipeline/templates/plugin_skeleton/config/sources/time_ticks.yaml +0 -11
  159. datapipeline/transforms/vector.py +0 -210
  160. jerry_thomas-0.3.0.dist-info/METADATA +0 -502
  161. jerry_thomas-0.3.0.dist-info/RECORD +0 -139
  162. {jerry_thomas-0.3.0.dist-info → jerry_thomas-1.0.0.dist-info}/WHEEL +0 -0
  163. {jerry_thomas-0.3.0.dist-info → jerry_thomas-1.0.0.dist-info}/licenses/LICENSE +0 -0
  164. {jerry_thomas-0.3.0.dist-info → jerry_thomas-1.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,76 @@
1
+ from __future__ import annotations
2
+
3
+ from datetime import datetime
4
+
5
+ from datapipeline.services.artifacts import (
6
+ ArtifactNotRegisteredError,
7
+ VECTOR_METADATA_SPEC,
8
+ VECTOR_SCHEMA_SPEC,
9
+ )
10
+ from datapipeline.config.metadata import VectorMetadata
11
+ from datapipeline.utils.time import parse_datetime
12
+ from datapipeline.runtime import Runtime
13
+
14
+
15
+ def resolve_window_bounds(runtime: Runtime, rectangular_required: bool) -> tuple[datetime | None, datetime | None]:
16
+ existing = getattr(runtime, "window_bounds", None)
17
+ if isinstance(existing, tuple) and len(existing) == 2:
18
+ cached_start, cached_end = existing
19
+ if not rectangular_required and (cached_start is not None or cached_end is not None):
20
+ return cached_start, cached_end
21
+ if cached_start is not None and cached_end is not None:
22
+ return cached_start, cached_end
23
+
24
+ start = end = None
25
+
26
+ # Window bounds are derived from artifacts (metadata/schema) only.
27
+ doc = None
28
+ try:
29
+ doc = runtime.artifacts.load(VECTOR_METADATA_SPEC)
30
+ except ArtifactNotRegisteredError:
31
+ doc = None
32
+ except Exception:
33
+ doc = None
34
+ if isinstance(doc, dict):
35
+ try:
36
+ meta = VectorMetadata.model_validate(doc)
37
+ window = meta.window
38
+ if window is not None:
39
+ start = window.start
40
+ end = window.end
41
+ except Exception:
42
+ start = end = None
43
+
44
+ # Fallback: try schema/spec window if metadata missing or invalid.
45
+ if start is None or end is None:
46
+ try:
47
+ doc = runtime.artifacts.load(VECTOR_SCHEMA_SPEC)
48
+ except ArtifactNotRegisteredError:
49
+ doc = None
50
+ except Exception:
51
+ doc = None
52
+ try:
53
+ if isinstance(doc, dict):
54
+ window = doc.get("window") or doc.get("meta", {}).get("window")
55
+ if isinstance(window, dict):
56
+ start = _parse_dt(window.get("start")) or start
57
+ end = _parse_dt(window.get("end")) or end
58
+ except Exception:
59
+ pass
60
+
61
+ if rectangular_required and (start is None or end is None):
62
+ raise RuntimeError(
63
+ "Window bounds unavailable (rebuild metadata to materialize metadata.json with a window); rectangular output required."
64
+ )
65
+ return start, end
66
+
67
+
68
+ def _parse_dt(value) -> datetime | None:
69
+ if value is None:
70
+ return None
71
+ if isinstance(value, datetime):
72
+ return value
73
+ try:
74
+ return parse_datetime(str(value))
75
+ except Exception:
76
+ return None