jerry-thomas 1.0.3__py3-none-any.whl → 2.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (194) hide show
  1. datapipeline/analysis/vector/collector.py +0 -1
  2. datapipeline/build/tasks/config.py +0 -2
  3. datapipeline/build/tasks/metadata.py +0 -2
  4. datapipeline/build/tasks/scaler.py +0 -2
  5. datapipeline/build/tasks/schema.py +0 -2
  6. datapipeline/build/tasks/utils.py +0 -2
  7. datapipeline/cli/app.py +201 -81
  8. datapipeline/cli/commands/contract.py +145 -283
  9. datapipeline/cli/commands/demo.py +13 -0
  10. datapipeline/cli/commands/domain.py +4 -4
  11. datapipeline/cli/commands/dto.py +11 -0
  12. datapipeline/cli/commands/filter.py +2 -2
  13. datapipeline/cli/commands/inspect.py +0 -68
  14. datapipeline/cli/commands/list_.py +30 -13
  15. datapipeline/cli/commands/loader.py +11 -0
  16. datapipeline/cli/commands/mapper.py +82 -0
  17. datapipeline/cli/commands/parser.py +45 -0
  18. datapipeline/cli/commands/run_config.py +1 -3
  19. datapipeline/cli/commands/serve_pipeline.py +5 -7
  20. datapipeline/cli/commands/source.py +106 -18
  21. datapipeline/cli/commands/stream.py +292 -0
  22. datapipeline/cli/visuals/common.py +0 -2
  23. datapipeline/cli/visuals/sections.py +0 -2
  24. datapipeline/cli/workspace_utils.py +0 -3
  25. datapipeline/config/context.py +0 -2
  26. datapipeline/config/dataset/feature.py +1 -0
  27. datapipeline/config/metadata.py +0 -2
  28. datapipeline/config/project.py +0 -2
  29. datapipeline/config/resolution.py +10 -2
  30. datapipeline/config/tasks.py +9 -9
  31. datapipeline/domain/feature.py +3 -0
  32. datapipeline/domain/record.py +7 -7
  33. datapipeline/domain/sample.py +0 -2
  34. datapipeline/domain/vector.py +6 -8
  35. datapipeline/integrations/ml/adapter.py +0 -2
  36. datapipeline/integrations/ml/pandas_support.py +0 -2
  37. datapipeline/integrations/ml/rows.py +0 -2
  38. datapipeline/integrations/ml/torch_support.py +0 -2
  39. datapipeline/io/output.py +0 -2
  40. datapipeline/io/serializers.py +26 -16
  41. datapipeline/mappers/synthetic/time.py +9 -2
  42. datapipeline/pipeline/artifacts.py +3 -5
  43. datapipeline/pipeline/observability.py +0 -2
  44. datapipeline/pipeline/pipelines.py +118 -34
  45. datapipeline/pipeline/stages.py +54 -18
  46. datapipeline/pipeline/utils/spool_cache.py +142 -0
  47. datapipeline/pipeline/utils/transform_utils.py +27 -2
  48. datapipeline/services/artifacts.py +1 -4
  49. datapipeline/services/constants.py +1 -0
  50. datapipeline/services/factories.py +4 -6
  51. datapipeline/services/paths.py +10 -1
  52. datapipeline/services/project_paths.py +0 -2
  53. datapipeline/services/runs.py +0 -2
  54. datapipeline/services/scaffold/contract_yaml.py +76 -0
  55. datapipeline/services/scaffold/demo.py +141 -0
  56. datapipeline/services/scaffold/discovery.py +115 -0
  57. datapipeline/services/scaffold/domain.py +21 -13
  58. datapipeline/services/scaffold/dto.py +31 -0
  59. datapipeline/services/scaffold/filter.py +2 -1
  60. datapipeline/services/scaffold/layout.py +96 -0
  61. datapipeline/services/scaffold/loader.py +61 -0
  62. datapipeline/services/scaffold/mapper.py +116 -0
  63. datapipeline/services/scaffold/parser.py +56 -0
  64. datapipeline/services/scaffold/plugin.py +14 -2
  65. datapipeline/services/scaffold/source_yaml.py +91 -0
  66. datapipeline/services/scaffold/stream_plan.py +129 -0
  67. datapipeline/services/scaffold/utils.py +187 -0
  68. datapipeline/sources/data_loader.py +0 -2
  69. datapipeline/sources/decoders.py +49 -8
  70. datapipeline/sources/factory.py +9 -6
  71. datapipeline/sources/foreach.py +18 -3
  72. datapipeline/sources/synthetic/time/parser.py +1 -1
  73. datapipeline/sources/transports.py +10 -4
  74. datapipeline/templates/demo_skeleton/demo/contracts/equity.ohlcv.yaml +33 -0
  75. datapipeline/templates/demo_skeleton/demo/contracts/time.ticks.hour_sin.yaml +22 -0
  76. datapipeline/templates/demo_skeleton/demo/contracts/time.ticks.linear.yaml +22 -0
  77. datapipeline/templates/demo_skeleton/demo/data/APPL.jsonl +19 -0
  78. datapipeline/templates/demo_skeleton/demo/data/MSFT.jsonl +19 -0
  79. datapipeline/templates/demo_skeleton/demo/dataset.yaml +19 -0
  80. datapipeline/templates/demo_skeleton/demo/postprocess.yaml +19 -0
  81. datapipeline/templates/demo_skeleton/demo/project.yaml +19 -0
  82. datapipeline/templates/demo_skeleton/demo/sources/sandbox.ohlcv.yaml +17 -0
  83. datapipeline/templates/{plugin_skeleton/example → demo_skeleton/demo}/sources/synthetic.ticks.yaml +1 -1
  84. datapipeline/templates/demo_skeleton/demo/tasks/metadata.yaml +2 -0
  85. datapipeline/templates/demo_skeleton/demo/tasks/scaler.yaml +3 -0
  86. datapipeline/templates/demo_skeleton/demo/tasks/schema.yaml +2 -0
  87. datapipeline/templates/demo_skeleton/demo/tasks/serve.test.yaml +4 -0
  88. datapipeline/templates/demo_skeleton/demo/tasks/serve.train.yaml +4 -0
  89. datapipeline/templates/demo_skeleton/demo/tasks/serve.val.yaml +4 -0
  90. datapipeline/templates/demo_skeleton/scripts/run_dataframe.py +20 -0
  91. datapipeline/templates/demo_skeleton/scripts/run_torch.py +23 -0
  92. datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/__init__.py +0 -0
  93. datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/domains/equity/__init__.py +0 -0
  94. datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/domains/equity/model.py +18 -0
  95. datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/dtos/__init__.py +0 -0
  96. datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/dtos/sandbox_ohlcv_dto.py +14 -0
  97. datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/mappers/__init__.py +0 -0
  98. datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/mappers/map_sandbox_ohlcv_dto_to_equity.py +26 -0
  99. datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/parsers/__init__.py +0 -0
  100. datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/parsers/sandbox_ohlcv_dto_parser.py +46 -0
  101. datapipeline/templates/plugin_skeleton/README.md +57 -136
  102. datapipeline/templates/plugin_skeleton/jerry.yaml +12 -24
  103. datapipeline/templates/plugin_skeleton/reference/jerry.yaml +28 -0
  104. datapipeline/templates/plugin_skeleton/reference/reference/contracts/composed.reference.yaml +29 -0
  105. datapipeline/templates/plugin_skeleton/reference/reference/contracts/ingest.reference.yaml +31 -0
  106. datapipeline/templates/plugin_skeleton/reference/reference/contracts/overview.reference.yaml +34 -0
  107. datapipeline/templates/plugin_skeleton/reference/reference/dataset.yaml +29 -0
  108. datapipeline/templates/plugin_skeleton/reference/reference/postprocess.yaml +25 -0
  109. datapipeline/templates/plugin_skeleton/reference/reference/project.yaml +32 -0
  110. datapipeline/templates/plugin_skeleton/reference/reference/sources/foreach.http.reference.yaml +24 -0
  111. datapipeline/templates/plugin_skeleton/reference/reference/sources/foreach.reference.yaml +21 -0
  112. datapipeline/templates/plugin_skeleton/reference/reference/sources/fs.reference.yaml +16 -0
  113. datapipeline/templates/plugin_skeleton/reference/reference/sources/http.reference.yaml +17 -0
  114. datapipeline/templates/plugin_skeleton/reference/reference/sources/overview.reference.yaml +18 -0
  115. datapipeline/templates/plugin_skeleton/reference/reference/sources/synthetic.reference.yaml +15 -0
  116. datapipeline/templates/plugin_skeleton/reference/reference/tasks/metadata.reference.yaml +11 -0
  117. datapipeline/templates/plugin_skeleton/reference/reference/tasks/scaler.reference.yaml +10 -0
  118. datapipeline/templates/plugin_skeleton/reference/reference/tasks/schema.reference.yaml +10 -0
  119. datapipeline/templates/plugin_skeleton/reference/reference/tasks/serve.reference.yaml +28 -0
  120. datapipeline/templates/plugin_skeleton/src/{{PACKAGE_NAME}}/domains/__init__.py +2 -0
  121. datapipeline/templates/plugin_skeleton/src/{{PACKAGE_NAME}}/dtos/__init__.py +0 -0
  122. datapipeline/templates/plugin_skeleton/src/{{PACKAGE_NAME}}/loaders/__init__.py +0 -0
  123. datapipeline/templates/plugin_skeleton/src/{{PACKAGE_NAME}}/mappers/__init__.py +1 -0
  124. datapipeline/templates/plugin_skeleton/src/{{PACKAGE_NAME}}/parsers/__init__.py +0 -0
  125. datapipeline/templates/plugin_skeleton/your-dataset/dataset.yaml +12 -11
  126. datapipeline/templates/plugin_skeleton/your-dataset/postprocess.yaml +4 -13
  127. datapipeline/templates/plugin_skeleton/your-dataset/project.yaml +9 -11
  128. datapipeline/templates/plugin_skeleton/your-dataset/tasks/metadata.yaml +1 -2
  129. datapipeline/templates/plugin_skeleton/your-dataset/tasks/scaler.yaml +1 -7
  130. datapipeline/templates/plugin_skeleton/your-dataset/tasks/schema.yaml +1 -1
  131. datapipeline/templates/plugin_skeleton/your-dataset/tasks/serve.test.yaml +1 -1
  132. datapipeline/templates/plugin_skeleton/your-dataset/tasks/serve.train.yaml +1 -25
  133. datapipeline/templates/plugin_skeleton/your-dataset/tasks/serve.val.yaml +1 -1
  134. datapipeline/templates/plugin_skeleton/your-interim-data-builder/dataset.yaml +9 -0
  135. datapipeline/templates/plugin_skeleton/your-interim-data-builder/postprocess.yaml +1 -0
  136. datapipeline/templates/plugin_skeleton/your-interim-data-builder/project.yaml +15 -0
  137. datapipeline/templates/plugin_skeleton/your-interim-data-builder/tasks/serve.all.yaml +8 -0
  138. datapipeline/templates/stubs/contracts/composed.yaml.j2 +10 -0
  139. datapipeline/templates/stubs/contracts/ingest.yaml.j2 +25 -0
  140. datapipeline/templates/stubs/dto.py.j2 +2 -2
  141. datapipeline/templates/stubs/filter.py.j2 +1 -1
  142. datapipeline/templates/stubs/loaders/basic.py.j2 +11 -0
  143. datapipeline/templates/stubs/mappers/composed.py.j2 +13 -0
  144. datapipeline/templates/stubs/mappers/ingest.py.j2 +20 -0
  145. datapipeline/templates/stubs/parser.py.j2 +5 -1
  146. datapipeline/templates/stubs/record.py.j2 +1 -1
  147. datapipeline/templates/stubs/source.yaml.j2 +1 -1
  148. datapipeline/transforms/debug/identity.py +34 -16
  149. datapipeline/transforms/debug/lint.py +14 -11
  150. datapipeline/transforms/feature/scaler.py +5 -12
  151. datapipeline/transforms/filter.py +73 -17
  152. datapipeline/transforms/interfaces.py +58 -0
  153. datapipeline/transforms/record/floor_time.py +10 -7
  154. datapipeline/transforms/record/lag.py +8 -10
  155. datapipeline/transforms/sequence.py +2 -3
  156. datapipeline/transforms/stream/dedupe.py +5 -7
  157. datapipeline/transforms/stream/ensure_ticks.py +39 -24
  158. datapipeline/transforms/stream/fill.py +34 -25
  159. datapipeline/transforms/stream/filter.py +25 -0
  160. datapipeline/transforms/stream/floor_time.py +16 -0
  161. datapipeline/transforms/stream/granularity.py +52 -30
  162. datapipeline/transforms/stream/lag.py +17 -0
  163. datapipeline/transforms/stream/rolling.py +72 -0
  164. datapipeline/transforms/utils.py +42 -10
  165. datapipeline/transforms/vector/drop/horizontal.py +0 -3
  166. datapipeline/transforms/vector/drop/orchestrator.py +0 -3
  167. datapipeline/transforms/vector/drop/vertical.py +0 -2
  168. datapipeline/transforms/vector/ensure_schema.py +0 -2
  169. datapipeline/utils/paths.py +0 -2
  170. datapipeline/utils/placeholders.py +0 -2
  171. datapipeline/utils/rich_compat.py +0 -3
  172. datapipeline/utils/window.py +0 -2
  173. jerry_thomas-2.0.1.dist-info/METADATA +269 -0
  174. jerry_thomas-2.0.1.dist-info/RECORD +264 -0
  175. {jerry_thomas-1.0.3.dist-info → jerry_thomas-2.0.1.dist-info}/WHEEL +1 -1
  176. {jerry_thomas-1.0.3.dist-info → jerry_thomas-2.0.1.dist-info}/entry_points.txt +7 -3
  177. datapipeline/services/scaffold/mappers.py +0 -55
  178. datapipeline/services/scaffold/source.py +0 -191
  179. datapipeline/templates/plugin_skeleton/example/contracts/time.ticks.hour_sin.yaml +0 -31
  180. datapipeline/templates/plugin_skeleton/example/contracts/time.ticks.linear.yaml +0 -30
  181. datapipeline/templates/plugin_skeleton/example/dataset.yaml +0 -18
  182. datapipeline/templates/plugin_skeleton/example/postprocess.yaml +0 -29
  183. datapipeline/templates/plugin_skeleton/example/project.yaml +0 -23
  184. datapipeline/templates/plugin_skeleton/example/tasks/metadata.yaml +0 -3
  185. datapipeline/templates/plugin_skeleton/example/tasks/scaler.yaml +0 -9
  186. datapipeline/templates/plugin_skeleton/example/tasks/schema.yaml +0 -2
  187. datapipeline/templates/plugin_skeleton/example/tasks/serve.test.yaml +0 -4
  188. datapipeline/templates/plugin_skeleton/example/tasks/serve.train.yaml +0 -28
  189. datapipeline/templates/plugin_skeleton/example/tasks/serve.val.yaml +0 -4
  190. datapipeline/templates/stubs/mapper.py.j2 +0 -22
  191. jerry_thomas-1.0.3.dist-info/METADATA +0 -827
  192. jerry_thomas-1.0.3.dist-info/RECORD +0 -198
  193. {jerry_thomas-1.0.3.dist-info → jerry_thomas-2.0.1.dist-info}/licenses/LICENSE +0 -0
  194. {jerry_thomas-1.0.3.dist-info → jerry_thomas-2.0.1.dist-info}/top_level.txt +0 -0
@@ -3,30 +3,63 @@ from pathlib import Path
3
3
 
4
4
  from datapipeline.config.workspace import WorkspaceContext
5
5
  from datapipeline.cli.workspace_utils import resolve_default_project_yaml
6
- from datapipeline.services.paths import pkg_root, resolve_base_pkg_dir
7
- from datapipeline.services.entrypoints import read_group_entries, inject_ep
8
- from datapipeline.services.constants import FILTERS_GROUP, MAPPERS_GROUP
9
- from datapipeline.services.project_paths import (
10
- sources_dir as resolve_sources_dir,
11
- streams_dir as resolve_streams_dir,
12
- ensure_project_scaffold,
13
- resolve_project_yaml_path,
6
+ from datapipeline.services.paths import pkg_root
7
+ from datapipeline.services.entrypoints import read_group_entries
8
+ from datapipeline.services.constants import FILTERS_GROUP
9
+ from datapipeline.services.project_paths import resolve_project_yaml_path
10
+ from datapipeline.services.scaffold.contract_yaml import (
11
+ write_ingest_contract,
12
+ write_composed_contract,
13
+ compose_inputs,
14
14
  )
15
- from datapipeline.services.scaffold.mappers import attach_source_to_domain
16
- import re
17
-
18
-
19
- def _pick_from_list(prompt: str, options: list[str]) -> str:
20
- print(prompt, file=sys.stderr)
21
- for i, opt in enumerate(options, 1):
22
- print(f" [{i}] {opt}", file=sys.stderr)
23
- while True:
24
- sel = input("> ").strip()
25
- if sel.isdigit():
26
- idx = int(sel)
27
- if 1 <= idx <= len(options):
28
- return options[idx - 1]
29
- print("Please enter a number from the list.", file=sys.stderr)
15
+ from datapipeline.services.scaffold.discovery import (
16
+ list_domains,
17
+ list_mappers,
18
+ list_sources,
19
+ list_streams,
20
+ )
21
+ from datapipeline.services.scaffold.utils import (
22
+ info,
23
+ status,
24
+ error_exit,
25
+ pick_from_menu,
26
+ pick_from_list,
27
+ pick_multiple_from_list,
28
+ choose_name,
29
+ )
30
+ from datapipeline.services.scaffold.layout import default_stream_id
31
+ from datapipeline.cli.commands.mapper import handle as handle_mapper
32
+ from datapipeline.services.scaffold.mapper import create_composed_mapper
33
+
34
+
35
+ def _select_mapper(*, allow_identity: bool, allow_create: bool, root: Path | None) -> str:
36
+ mappers = list_mappers(root=root)
37
+ options: list[tuple[str, str]] = []
38
+ if allow_create:
39
+ options.append(("create", "Create new mapper (default)"))
40
+ if mappers:
41
+ options.append(("existing", "Select existing mapper"))
42
+ if allow_identity:
43
+ options.append(("identity", "Identity mapper"))
44
+ options.append(("custom", "Custom mapper"))
45
+
46
+ if not options:
47
+ error_exit("No mapper options available")
48
+
49
+ choice = pick_from_menu("Mapper:", options)
50
+ if choice == "existing":
51
+ return pick_from_menu(
52
+ "Select mapper entrypoint:",
53
+ [(k, k) for k in sorted(mappers.keys())],
54
+ )
55
+ if choice == "create":
56
+ return handle_mapper(name=None, plugin_root=root)
57
+ if choice == "identity":
58
+ return "identity"
59
+ ep = input("Mapper entrypoint: ").strip()
60
+ if not ep:
61
+ error_exit("Mapper entrypoint is required")
62
+ return ep
30
63
 
31
64
 
32
65
  def handle(
@@ -38,20 +71,19 @@ def handle(
38
71
  root_dir, name, pyproject = pkg_root(plugin_root)
39
72
  default_project = resolve_default_project_yaml(workspace)
40
73
  # Select contract type: Ingest (source->stream) or Composed (streams->stream)
41
- print("Select contract type:", file=sys.stderr)
42
- print(" [1] Ingest (source → stream)", file=sys.stderr)
43
- print(" [2] Composed (streams → stream)", file=sys.stderr)
74
+ info("Contract type:")
75
+ info(" [1] Ingest (source → stream)")
76
+ info(" [2] Composed (streams → stream)")
44
77
  sel = input("> ").strip()
45
78
  if sel == "2":
46
79
  if use_identity:
47
- print("[error] --identity is only supported for ingest contracts.", file=sys.stderr)
48
- raise SystemExit(2)
80
+ error_exit("--identity is only supported for ingest contracts.")
49
81
  # Defer to composed scaffolder (fully interactive)
50
82
  scaffold_conflux(
51
83
  stream_id=None,
52
84
  inputs=None,
53
85
  mapper_path=None,
54
- with_mapper_stub=True,
86
+ with_mapper_stub=False,
55
87
  plugin_root=plugin_root,
56
88
  project_yaml=default_project,
57
89
  )
@@ -60,129 +92,47 @@ def handle(
60
92
  # Discover sources by scanning sources_dir YAMLs
61
93
  # Default to dataset-scoped project config
62
94
  proj_path = default_project or resolve_project_yaml_path(root_dir)
63
- # Ensure a minimal project scaffold so we can resolve dirs interactively
64
- ensure_project_scaffold(proj_path)
65
- sources_dir = resolve_sources_dir(proj_path)
66
- source_options: list[str] = []
67
- if sources_dir.exists():
68
- # Recursively scan YAMLs and read declared source id (alias)
69
- from datapipeline.utils.load import load_yaml
70
- from datapipeline.services.constants import PARSER_KEY, LOADER_KEY, SOURCE_ID_KEY
71
- for p in sorted(sources_dir.rglob("*.y*ml")):
72
- try:
73
- data = load_yaml(p)
74
- except Exception:
75
- continue
76
- if isinstance(data, dict) and isinstance(data.get(PARSER_KEY), dict) and isinstance(data.get(LOADER_KEY), dict):
77
- alias = data.get(SOURCE_ID_KEY)
78
- if isinstance(alias, str):
79
- source_options.append(alias)
80
- source_options = sorted(set(source_options))
95
+ source_options = list_sources(proj_path)
81
96
  if not source_options:
82
- print("[error] No sources found. Create one first (jerry source add ...)")
83
- raise SystemExit(2)
97
+ error_exit("No sources found. Create one first (jerry source create ...)")
84
98
 
85
- src_key = _pick_from_list(
86
- "Select a source for the contract:", source_options)
99
+ src_key = pick_from_list("Select source:", source_options)
87
100
  # Expect aliases as 'provider.dataset' (from source file's id)
88
101
  parts = src_key.split(".", 1)
89
102
  if len(parts) != 2:
90
- print("[error] Source alias must be 'provider.dataset' (from source file's id)", file=sys.stderr)
91
- raise SystemExit(2)
103
+ error_exit("Source alias must be 'provider.dataset' (from source file's id)")
92
104
  provider, dataset = parts[0], parts[1]
93
105
 
94
- # Discover domains by scanning the package, fallback to EPs if needed
95
- base = resolve_base_pkg_dir(root_dir, name)
96
- domain_options = []
97
- for dirname in ("domains",):
98
- dom_dir = base / dirname
99
- if dom_dir.exists():
100
- domain_options.extend(
101
- [p.name for p in dom_dir.iterdir() if p.is_dir()
102
- and (p / "model.py").exists()]
103
- )
104
- domain_options = sorted(set(domain_options))
106
+ domain_options = list_domains(root=plugin_root)
105
107
  if not domain_options:
106
108
  domain_options = sorted(
107
109
  read_group_entries(pyproject, FILTERS_GROUP).keys())
108
110
  if not domain_options:
109
- print("[error] No domains found. Create one first (jerry domain add ...)")
110
- raise SystemExit(2)
111
+ error_exit("No domains found. Create one first (jerry domain create ...)")
111
112
 
112
- dom_name = _pick_from_list(
113
- "Select a domain to contract with:", domain_options)
114
-
115
- def _slug(s: str) -> str:
116
- s = s.strip().lower()
117
- s = re.sub(r"[^a-z0-9]+", "_", s)
118
- return s.strip("_")
113
+ dom_name = pick_from_list("Select domain:", domain_options)
119
114
 
120
115
  if use_identity:
121
116
  mapper_ep = "identity"
122
- print("[ok] Using built-in mapper entry point 'identity'.")
117
+ status("ok", "Using built-in mapper entry point 'identity'.")
123
118
  else:
124
- # create mapper + EP (domain.origin)
125
- attach_source_to_domain(
126
- domain=dom_name,
127
- provider=provider,
128
- dataset=dataset,
119
+ mapper_ep = _select_mapper(
120
+ allow_identity=True,
121
+ allow_create=True,
129
122
  root=plugin_root,
130
123
  )
131
- ep_key = f"{_slug(dom_name)}.{_slug(dataset)}"
132
- print(f"[ok] Registered mapper entry point as '{ep_key}'.")
133
- mapper_ep = ep_key
134
124
 
135
125
  # Derive canonical stream id as domain.dataset[.variant]
136
- print("Optional variant suffix (press Enter to skip):", file=sys.stderr)
126
+ info("Optional variant suffix (press Enter to skip):")
137
127
  variant = input("> ").strip()
138
- if variant:
139
- canonical_alias = f"{_slug(dom_name)}.{_slug(dataset)}.{_slug(variant)}"
140
- else:
141
- canonical_alias = f"{_slug(dom_name)}.{_slug(dataset)}"
128
+ stream_id = choose_name("Stream id", default=default_stream_id(dom_name, dataset, variant or None))
142
129
 
143
- # Inject per-file canonical stream into streams directory
144
- streams_path = resolve_streams_dir(proj_path)
145
-
146
- # canonical_alias and mapper_ep defined above
147
- # Write a single-file canonical spec into streams directory, matching
148
- # ContractConfig schema with helpful commented placeholders per stage.
149
- try:
150
- # Ensure streams_path is a directory path
151
- streams_dir = streams_path if streams_path.is_dir() else streams_path.parent
152
- streams_dir.mkdir(parents=True, exist_ok=True)
153
- cfile = streams_dir / f"{canonical_alias}.yaml"
154
- # Build a richer scaffold as YAML text to preserve comments
155
- scaffold = f"""
156
- kind: ingest
157
- source: {src_key}
158
- id: {canonical_alias} # format: domain.dataset.(variant)
159
-
160
- mapper:
161
- entrypoint: {mapper_ep}
162
- args: {{}}
163
-
164
- # partition_by: <field or [fields]>
165
- # sort_batch_size: 100000 # in-memory sort chunk size
166
-
167
- record: # record-level transforms
168
- - filter: {{ operator: ge, field: time, comparand: "${{start_time}}" }}
169
- - filter: {{ operator: le, field: time, comparand: "${{end_time}}" }}
170
- # - floor_time: {{ resolution: 10m }}
171
- # - lag: {{ lag: 10m }}
172
-
173
- # stream: # per-feature transforms (input sorted by id,time)
174
- # - ensure_ticks: {{ tick: 10m }}
175
- # - granularity: {{ mode: first }}
176
- # - fill: {{ statistic: median, window: 6, min_samples: 1 }}
177
-
178
- # debug: # optional validation-only checks
179
- # - lint: {{ mode: warn, tick: 10m }}
180
- """
181
- with cfile.open("w", encoding="utf-8") as f:
182
- f.write(scaffold)
183
- print(f"[new] canonical spec: {cfile}")
184
- except Exception as e:
185
- print(f"[error] Failed to write canonical spec: {e}", file=sys.stderr)
130
+ write_ingest_contract(
131
+ project_yaml=proj_path,
132
+ stream_id=stream_id,
133
+ source=src_key,
134
+ mapper_entrypoint=mapper_ep,
135
+ )
186
136
 
187
137
 
188
138
  def scaffold_conflux(
@@ -200,174 +150,86 @@ def scaffold_conflux(
200
150
  mapper_path default: <pkg>.domains.<domain>:mapper where domain = stream_id.split('.')[0]
201
151
  """
202
152
  root_dir, name, _ = pkg_root(plugin_root)
203
- # Resolve default project path early for interactive selections
204
153
  proj_path = project_yaml or resolve_project_yaml_path(root_dir)
205
- ensure_project_scaffold(proj_path)
206
- # Defer target domain selection until after choosing inputs
207
-
208
- # We will write the contract after selecting inputs and target domain
209
- # Build inputs string first: interactive select, then target domain
210
154
  if not inputs:
211
- # Interactive selection of canonical streams (scan recursively, read ids)
212
- streams: list[str] = []
213
- sdir = resolve_streams_dir(proj_path)
214
- if sdir.exists():
215
- from datapipeline.utils.load import load_yaml
216
- from datapipeline.services.constants import STREAM_ID_KEY
217
- for p in sorted(sdir.rglob("*.y*ml")):
218
- try:
219
- data = load_yaml(p)
220
- except Exception:
221
- continue
222
- if isinstance(data, dict) and data.get("kind") in {"ingest", "composed"}:
223
- sid = data.get(STREAM_ID_KEY)
224
- if isinstance(sid, str) and sid:
225
- streams.append(sid)
226
- streams = sorted(set(streams))
155
+ streams = list_streams(proj_path)
227
156
  if not streams:
228
- print(
229
- "[error] No canonical streams found. Create them first via 'jerry contract' (ingest).", file=sys.stderr)
230
- raise SystemExit(2)
231
- print(
232
- "Select one or more input streams (comma-separated numbers):", file=sys.stderr)
233
- for i, sid in enumerate(streams, 1):
234
- print(f" [{i}] {sid}", file=sys.stderr)
235
- sel = input("> ").strip()
236
- try:
237
- idxs = [int(x) for x in sel.split(',') if x.strip()]
238
- except ValueError:
239
- print("[error] Invalid selection.", file=sys.stderr)
240
- raise SystemExit(2)
241
- picked = []
242
- for i in idxs:
243
- if 1 <= i <= len(streams):
244
- picked.append(streams[i-1])
245
- if not picked:
246
- print("[error] No inputs selected.", file=sys.stderr)
247
- raise SystemExit(2)
248
- # Build default aliases using domain+variant to avoid collisions.
249
- # Stream id format: domain.dataset.variant (variant optional)
250
- built = []
251
- for ref in picked:
252
- parts = ref.split(".")
253
- if len(parts) >= 3:
254
- domain, variant = parts[0], parts[-1]
255
- alias = f"{domain}_{variant}"
256
- elif len(parts) == 2:
257
- # No explicit variant -> use domain as alias
258
- alias = parts[0]
259
- else:
260
- # Fallback to full ref if unexpected
261
- alias = ref
262
- built.append(f"{alias}={ref}")
263
- inputs = ",".join(built)
264
-
265
- # YAML list items do not need commas; avoid embedding commas in item text
266
- inputs_list = "\n - ".join(
267
- s.strip() for s in inputs.split(",") if s.strip()
268
- )
157
+ error_exit("No canonical streams found. Create them first via 'jerry contract' (ingest).")
158
+ picked = pick_multiple_from_list(
159
+ "Select one or more input streams (comma-separated numbers):",
160
+ streams,
161
+ )
162
+ inputs_list, driver_key = compose_inputs(picked)
163
+ else:
164
+ inputs_list = "\n - ".join(s.strip() for s in inputs.split(",") if s.strip())
165
+ driver_key = inputs.split(",")[0].split("=")[0].strip()
269
166
 
270
167
  # If no stream_id, select target domain now and derive stream id (mirror ingest flow)
271
168
  if not stream_id:
272
- base = resolve_base_pkg_dir(root_dir, name)
273
- domain_options: list[str] = []
274
- dom_dir = base / "domains"
275
- if dom_dir.exists():
276
- domain_options.extend(
277
- [p.name for p in dom_dir.iterdir() if p.is_dir()
278
- and (p / "model.py").exists()]
279
- )
280
- domain_options = sorted(set(domain_options))
169
+ domain_options = list_domains(root=plugin_root)
281
170
  if not domain_options:
282
- print("[error] No domains found. Create one first (jerry domain add ...)")
283
- raise SystemExit(2)
284
- print("Select a target domain for the composed stream:", file=sys.stderr)
171
+ error_exit("No domains found. Create one first (jerry domain create ...)")
172
+ info("Select domain:")
285
173
  for i, opt in enumerate(domain_options, 1):
286
- print(f" [{i}] {opt}", file=sys.stderr)
174
+ info(f" [{i}] {opt}")
287
175
  sel = input("> ").strip()
288
176
  try:
289
177
  idx = int(sel)
290
178
  if idx < 1 or idx > len(domain_options):
291
179
  raise ValueError
292
180
  except Exception:
293
- print("[error] Invalid selection.", file=sys.stderr)
294
- raise SystemExit(2)
181
+ error_exit("Invalid selection.")
295
182
  domain = domain_options[idx - 1]
296
183
  stream_id = f"{domain}.processed"
297
- # Default mapper path uses import-safe package dir, not project name
298
- pkg_base = resolve_base_pkg_dir(root_dir, name).name
299
- mapper_path = mapper_path or f"{pkg_base}.mappers.{domain}:mapper"
300
184
  else:
301
- domain = stream_id.split('.')[0]
302
- pkg_base = resolve_base_pkg_dir(root_dir, name).name
303
- mapper_path = mapper_path or f"{pkg_base}.mappers.{domain}:mapper"
304
-
305
- # Optional mapper stub under mappers/
306
- if with_mapper_stub:
307
- base = resolve_base_pkg_dir(root_dir, name)
308
- map_pkg_dir = base / "mappers"
309
- map_pkg_dir.mkdir(parents=True, exist_ok=True)
310
- (map_pkg_dir / "__init__.py").touch(exist_ok=True)
311
- mapper_file = map_pkg_dir / f"{domain}.py"
312
- if not mapper_file.exists():
313
- mapper_file.write_text(
314
- """
315
- from typing import Iterator, Mapping
316
- from datapipeline.domain.record import TemporalRecord
317
-
318
-
319
- def mapper(
320
- inputs: Mapping[str, Iterator[TemporalRecord]],
321
- *, driver: str | None = None, aux: Mapping[str, Iterator[TemporalRecord]] | None = None, context=None, **params
322
- ) -> Iterator[TemporalRecord]:
323
- # TODO: implement domain math; inputs are ordered/regularized; aux is raw
324
- key = driver or next(iter(inputs.keys()))
325
- for rec in inputs[key]:
326
- yield rec # replace with your dataclass and computation
327
- """.lstrip()
185
+ domain = stream_id.split(".")[0]
186
+
187
+ # Mapper selection for composed contracts (no identity)
188
+ if not mapper_path:
189
+ mappers = list_mappers(root=plugin_root)
190
+ if mappers:
191
+ choice = pick_from_menu(
192
+ "Mapper:",
193
+ [
194
+ ("create", "Create new composed mapper (default)"),
195
+ ("existing", "Select existing mapper"),
196
+ ("custom", "Custom mapper"),
197
+ ],
328
198
  )
329
- print(f"[new] {mapper_file}")
330
- # Register mapper entry point under datapipeline.mappers
331
- # Choose EP name equal to stream_id for clarity/reuse
332
- ep_key = stream_id
333
- # If mapper_path looks like a dotted target (module:attr), use it; otherwise build default target
334
- package_name = base.name # filesystem package dir is import-safe (underscored)
335
- default_target = f"{package_name}.mappers.{domain}:mapper"
336
- ep_target = mapper_path if (
337
- mapper_path and ":" in mapper_path) else default_target
338
- pyproj_path = root_dir / "pyproject.toml"
339
- try:
340
- toml_text = pyproj_path.read_text()
341
- updated = inject_ep(toml_text, MAPPERS_GROUP, ep_key, ep_target)
342
- if updated != toml_text:
343
- pyproj_path.write_text(updated)
344
- print(
345
- f"[ok] Registered mapper entry point '{ep_key}' -> {ep_target}")
346
- except FileNotFoundError:
347
- print(
348
- "[info] pyproject.toml not found; skipping entry point registration", file=sys.stderr)
349
- # From here on, reference the EP name in the YAML
350
- mapper_path = ep_key
351
- # Contract file path (now that stream_id is known)
352
- ensure_project_scaffold(proj_path)
353
- streams_path = resolve_streams_dir(proj_path)
354
- streams_dir = streams_path if streams_path.is_dir() else streams_path.parent
355
- streams_dir.mkdir(parents=True, exist_ok=True)
356
- cfile = streams_dir / f"{stream_id}.yaml"
357
- if cfile.exists():
358
- print(f"[info] Contract already exists, skipping: {cfile}")
359
- return
360
-
361
- yaml_text = f"""
362
- kind: composed
363
- id: {stream_id} # format: domain.dataset.(variant)
364
- # partition_by: <field or [fields]>
365
- inputs:
366
- - {inputs_list}
367
-
368
- mapper:
369
- entrypoint: {mapper_path}
370
- args: {{ driver: {(inputs.split(',')[0].split('=')[0].strip() if '=' in inputs.split(',')[0] else inputs.split(',')[0].strip())} }}
371
- """
372
- cfile.write_text(yaml_text.strip() + "\n", encoding="utf-8")
373
- print(f"[new] composed contract: {cfile}")
199
+ else:
200
+ choice = pick_from_menu(
201
+ "Mapper:",
202
+ [
203
+ ("create", "Create new composed mapper (default)"),
204
+ ("custom", "Custom mapper"),
205
+ ],
206
+ )
207
+ if choice == "existing":
208
+ mapper_path = pick_from_menu(
209
+ "Select mapper entrypoint:",
210
+ [(k, k) for k in sorted(mappers.keys())],
211
+ )
212
+ with_mapper_stub = False
213
+ elif choice == "create":
214
+ with_mapper_stub = True
215
+ else:
216
+ mapper_path = input("Mapper entrypoint: ").strip()
217
+ if not mapper_path:
218
+ error_exit("Mapper entrypoint is required")
219
+ with_mapper_stub = False
220
+
221
+ # Optional mapper stub under mappers/ (composed signature)
222
+ if with_mapper_stub:
223
+ mapper_path = create_composed_mapper(
224
+ domain=domain,
225
+ stream_id=stream_id,
226
+ root=plugin_root,
227
+ mapper_path=mapper_path,
228
+ )
229
+ write_composed_contract(
230
+ project_yaml=proj_path,
231
+ stream_id=stream_id,
232
+ inputs_list=inputs_list,
233
+ mapper_entrypoint=mapper_path,
234
+ driver_key=driver_key,
235
+ )
@@ -0,0 +1,13 @@
1
+ from pathlib import Path
2
+
3
+ from datapipeline.services.scaffold.demo import scaffold_demo
4
+ from datapipeline.services.scaffold.plugin import scaffold_plugin
5
+
6
+
7
+ def handle(subcmd: str, *, out: str | None = None) -> None:
8
+ if subcmd != "init":
9
+ raise SystemExit(f"Unknown demo subcommand: {subcmd}")
10
+ demo_name = "demo"
11
+ target_root = Path(out or ".")
12
+ scaffold_plugin(demo_name, target_root)
13
+ scaffold_demo(target_root / demo_name)
@@ -1,14 +1,14 @@
1
1
  from pathlib import Path
2
2
 
3
3
  from datapipeline.services.scaffold.domain import create_domain
4
+ from datapipeline.services.scaffold.utils import error_exit
4
5
 
5
6
 
6
7
  def handle(subcmd: str, domain: str | None, *, plugin_root: Path | None = None) -> None:
7
- if subcmd in {"create", "add"}:
8
+ if subcmd == "create":
8
9
  if not domain:
9
- print(
10
- "[error] Domain name is required. Use 'jerry domain add <name>' "
10
+ error_exit(
11
+ "Domain name is required. Use 'jerry domain create <name>' "
11
12
  "or pass -n/--name."
12
13
  )
13
- raise SystemExit(2)
14
14
  create_domain(domain=domain, root=plugin_root)
@@ -0,0 +1,11 @@
1
+ from pathlib import Path
2
+
3
+ from datapipeline.services.scaffold.dto import create_dto
4
+ from datapipeline.services.scaffold.utils import status, prompt_required
5
+
6
+
7
+ def handle(name: str | None, *, plugin_root: Path | None = None) -> None:
8
+ if not name:
9
+ name = prompt_required("DTO class name")
10
+ create_dto(name=name, root=plugin_root)
11
+ status("ok", "DTO ready.")
@@ -1,9 +1,9 @@
1
1
  from datapipeline.services.scaffold.filter import create_filter
2
+ from datapipeline.services.scaffold.utils import error_exit
2
3
 
3
4
 
4
5
  def handle(subcmd: str, name: str | None) -> None:
5
6
  if subcmd == "create":
6
7
  if not name:
7
- print("[error] --name is required for filter create")
8
- raise SystemExit(2)
8
+ error_exit("--name is required for filter create")
9
9
  create_filter(name=name, root=None)
@@ -402,71 +402,3 @@ def partitions(
402
402
  work=_work,
403
403
  )
404
404
 
405
-
406
- def expected(
407
- project: str,
408
- *,
409
- output: str | None = None,
410
- visuals: str | None = None,
411
- progress: str | None = None,
412
- log_level: int | None = None,
413
- workspace=None,
414
- ) -> None:
415
- """Discover complete set of observed full feature IDs and write a list.
416
-
417
- Writes newline-separated ids to `<paths.artifacts>/expected.txt` by default.
418
- """
419
-
420
- _prepare_inspect_build(
421
- project,
422
- visuals=visuals,
423
- progress=progress,
424
- workspace=workspace,
425
- )
426
-
427
- def _work(dataset_ctx, progress_style):
428
- project_path = dataset_ctx.project
429
- dataset = dataset_ctx.dataset
430
- feature_cfgs = list(dataset.features or [])
431
- target_cfgs = list(dataset.targets or [])
432
-
433
- context = dataset_ctx.pipeline_context
434
- vectors = build_vector_pipeline(
435
- context,
436
- feature_cfgs,
437
- dataset.group_by,
438
- target_configs=target_cfgs,
439
- )
440
- vector_iter = _iter_with_progress(
441
- vectors,
442
- progress_style=progress_style,
443
- label="Processing vectors",
444
- )
445
- ids: set[str] = set()
446
- for sample in vector_iter:
447
- ids.update(sample.features.values.keys())
448
- if sample.targets:
449
- ids.update(sample.targets.values.keys())
450
-
451
- try:
452
- default_path = artifacts_root(project_path) / "expected.txt"
453
- except Exception as e:
454
- raise RuntimeError(
455
- f"{e}. Set `paths.artifacts` in your project.yaml to a writable directory."
456
- )
457
- output_path = Path(output) if output else default_path
458
- ensure_parent(output_path)
459
- with output_path.open("w", encoding="utf-8") as fh:
460
- for fid in sorted(ids):
461
- fh.write(f"{fid}\n")
462
- print(f"[write] Saved expected feature list to {output_path} ({len(ids)} ids)")
463
-
464
- _run_inspect_job(
465
- project,
466
- visuals=visuals,
467
- progress=progress,
468
- log_level=log_level,
469
- label="Inspect expected ids",
470
- section="expected",
471
- work=_work,
472
- )