jerry-thomas 1.0.3__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (192) hide show
  1. datapipeline/analysis/vector/collector.py +0 -1
  2. datapipeline/build/tasks/config.py +0 -2
  3. datapipeline/build/tasks/metadata.py +0 -2
  4. datapipeline/build/tasks/scaler.py +0 -2
  5. datapipeline/build/tasks/schema.py +0 -2
  6. datapipeline/build/tasks/utils.py +0 -2
  7. datapipeline/cli/app.py +201 -81
  8. datapipeline/cli/commands/contract.py +145 -283
  9. datapipeline/cli/commands/demo.py +13 -0
  10. datapipeline/cli/commands/domain.py +4 -4
  11. datapipeline/cli/commands/dto.py +11 -0
  12. datapipeline/cli/commands/filter.py +2 -2
  13. datapipeline/cli/commands/inspect.py +0 -68
  14. datapipeline/cli/commands/list_.py +30 -13
  15. datapipeline/cli/commands/loader.py +11 -0
  16. datapipeline/cli/commands/mapper.py +82 -0
  17. datapipeline/cli/commands/parser.py +45 -0
  18. datapipeline/cli/commands/run_config.py +1 -3
  19. datapipeline/cli/commands/serve_pipeline.py +5 -7
  20. datapipeline/cli/commands/source.py +106 -18
  21. datapipeline/cli/commands/stream.py +286 -0
  22. datapipeline/cli/visuals/common.py +0 -2
  23. datapipeline/cli/visuals/sections.py +0 -2
  24. datapipeline/cli/workspace_utils.py +0 -3
  25. datapipeline/config/context.py +0 -2
  26. datapipeline/config/dataset/feature.py +1 -0
  27. datapipeline/config/metadata.py +0 -2
  28. datapipeline/config/project.py +0 -2
  29. datapipeline/config/resolution.py +10 -2
  30. datapipeline/config/tasks.py +9 -9
  31. datapipeline/domain/feature.py +3 -0
  32. datapipeline/domain/record.py +7 -7
  33. datapipeline/domain/sample.py +0 -2
  34. datapipeline/domain/vector.py +6 -8
  35. datapipeline/integrations/ml/adapter.py +0 -2
  36. datapipeline/integrations/ml/pandas_support.py +0 -2
  37. datapipeline/integrations/ml/rows.py +0 -2
  38. datapipeline/integrations/ml/torch_support.py +0 -2
  39. datapipeline/io/output.py +0 -2
  40. datapipeline/io/serializers.py +26 -16
  41. datapipeline/mappers/synthetic/time.py +9 -2
  42. datapipeline/pipeline/artifacts.py +3 -5
  43. datapipeline/pipeline/observability.py +0 -2
  44. datapipeline/pipeline/pipelines.py +118 -34
  45. datapipeline/pipeline/stages.py +42 -17
  46. datapipeline/pipeline/utils/spool_cache.py +142 -0
  47. datapipeline/pipeline/utils/transform_utils.py +27 -2
  48. datapipeline/services/artifacts.py +1 -4
  49. datapipeline/services/constants.py +1 -0
  50. datapipeline/services/factories.py +4 -6
  51. datapipeline/services/project_paths.py +0 -2
  52. datapipeline/services/runs.py +0 -2
  53. datapipeline/services/scaffold/contract_yaml.py +76 -0
  54. datapipeline/services/scaffold/demo.py +141 -0
  55. datapipeline/services/scaffold/discovery.py +115 -0
  56. datapipeline/services/scaffold/domain.py +21 -13
  57. datapipeline/services/scaffold/dto.py +31 -0
  58. datapipeline/services/scaffold/filter.py +2 -1
  59. datapipeline/services/scaffold/layout.py +96 -0
  60. datapipeline/services/scaffold/loader.py +61 -0
  61. datapipeline/services/scaffold/mapper.py +116 -0
  62. datapipeline/services/scaffold/parser.py +56 -0
  63. datapipeline/services/scaffold/plugin.py +14 -2
  64. datapipeline/services/scaffold/source_yaml.py +91 -0
  65. datapipeline/services/scaffold/stream_plan.py +110 -0
  66. datapipeline/services/scaffold/utils.py +187 -0
  67. datapipeline/sources/data_loader.py +0 -2
  68. datapipeline/sources/decoders.py +49 -8
  69. datapipeline/sources/factory.py +9 -6
  70. datapipeline/sources/foreach.py +18 -3
  71. datapipeline/sources/synthetic/time/parser.py +1 -1
  72. datapipeline/sources/transports.py +10 -4
  73. datapipeline/templates/demo_skeleton/demo/contracts/equity.ohlcv.yaml +33 -0
  74. datapipeline/templates/demo_skeleton/demo/contracts/time.ticks.hour_sin.yaml +22 -0
  75. datapipeline/templates/demo_skeleton/demo/contracts/time.ticks.linear.yaml +22 -0
  76. datapipeline/templates/demo_skeleton/demo/data/APPL.jsonl +19 -0
  77. datapipeline/templates/demo_skeleton/demo/data/MSFT.jsonl +19 -0
  78. datapipeline/templates/demo_skeleton/demo/dataset.yaml +19 -0
  79. datapipeline/templates/demo_skeleton/demo/postprocess.yaml +19 -0
  80. datapipeline/templates/demo_skeleton/demo/project.yaml +19 -0
  81. datapipeline/templates/demo_skeleton/demo/sources/sandbox.ohlcv.yaml +17 -0
  82. datapipeline/templates/{plugin_skeleton/example → demo_skeleton/demo}/sources/synthetic.ticks.yaml +1 -1
  83. datapipeline/templates/demo_skeleton/demo/tasks/metadata.yaml +2 -0
  84. datapipeline/templates/demo_skeleton/demo/tasks/scaler.yaml +3 -0
  85. datapipeline/templates/demo_skeleton/demo/tasks/schema.yaml +2 -0
  86. datapipeline/templates/demo_skeleton/demo/tasks/serve.test.yaml +4 -0
  87. datapipeline/templates/demo_skeleton/demo/tasks/serve.train.yaml +4 -0
  88. datapipeline/templates/demo_skeleton/demo/tasks/serve.val.yaml +4 -0
  89. datapipeline/templates/demo_skeleton/scripts/run_dataframe.py +20 -0
  90. datapipeline/templates/demo_skeleton/scripts/run_torch.py +23 -0
  91. datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/__init__.py +0 -0
  92. datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/domains/equity/__init__.py +0 -0
  93. datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/domains/equity/model.py +18 -0
  94. datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/dtos/__init__.py +0 -0
  95. datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/dtos/sandbox_ohlcv_dto.py +14 -0
  96. datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/mappers/__init__.py +0 -0
  97. datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/mappers/map_sandbox_ohlcv_dto_to_equity.py +26 -0
  98. datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/parsers/__init__.py +0 -0
  99. datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/parsers/sandbox_ohlcv_dto_parser.py +46 -0
  100. datapipeline/templates/plugin_skeleton/README.md +57 -136
  101. datapipeline/templates/plugin_skeleton/jerry.yaml +12 -24
  102. datapipeline/templates/plugin_skeleton/reference/jerry.yaml +28 -0
  103. datapipeline/templates/plugin_skeleton/reference/reference/contracts/composed.reference.yaml +29 -0
  104. datapipeline/templates/plugin_skeleton/reference/reference/contracts/ingest.reference.yaml +31 -0
  105. datapipeline/templates/plugin_skeleton/reference/reference/contracts/overview.reference.yaml +34 -0
  106. datapipeline/templates/plugin_skeleton/reference/reference/dataset.yaml +29 -0
  107. datapipeline/templates/plugin_skeleton/reference/reference/postprocess.yaml +25 -0
  108. datapipeline/templates/plugin_skeleton/reference/reference/project.yaml +32 -0
  109. datapipeline/templates/plugin_skeleton/reference/reference/sources/foreach.http.reference.yaml +24 -0
  110. datapipeline/templates/plugin_skeleton/reference/reference/sources/foreach.reference.yaml +21 -0
  111. datapipeline/templates/plugin_skeleton/reference/reference/sources/fs.reference.yaml +16 -0
  112. datapipeline/templates/plugin_skeleton/reference/reference/sources/http.reference.yaml +17 -0
  113. datapipeline/templates/plugin_skeleton/reference/reference/sources/overview.reference.yaml +18 -0
  114. datapipeline/templates/plugin_skeleton/reference/reference/sources/synthetic.reference.yaml +15 -0
  115. datapipeline/templates/plugin_skeleton/reference/reference/tasks/metadata.reference.yaml +11 -0
  116. datapipeline/templates/plugin_skeleton/reference/reference/tasks/scaler.reference.yaml +10 -0
  117. datapipeline/templates/plugin_skeleton/reference/reference/tasks/schema.reference.yaml +10 -0
  118. datapipeline/templates/plugin_skeleton/reference/reference/tasks/serve.reference.yaml +28 -0
  119. datapipeline/templates/plugin_skeleton/src/{{PACKAGE_NAME}}/domains/__init__.py +2 -0
  120. datapipeline/templates/plugin_skeleton/src/{{PACKAGE_NAME}}/dtos/__init__.py +0 -0
  121. datapipeline/templates/plugin_skeleton/src/{{PACKAGE_NAME}}/loaders/__init__.py +0 -0
  122. datapipeline/templates/plugin_skeleton/src/{{PACKAGE_NAME}}/mappers/__init__.py +1 -0
  123. datapipeline/templates/plugin_skeleton/src/{{PACKAGE_NAME}}/parsers/__init__.py +0 -0
  124. datapipeline/templates/plugin_skeleton/your-dataset/dataset.yaml +12 -11
  125. datapipeline/templates/plugin_skeleton/your-dataset/postprocess.yaml +4 -13
  126. datapipeline/templates/plugin_skeleton/your-dataset/project.yaml +7 -10
  127. datapipeline/templates/plugin_skeleton/your-dataset/tasks/metadata.yaml +1 -2
  128. datapipeline/templates/plugin_skeleton/your-dataset/tasks/scaler.yaml +1 -7
  129. datapipeline/templates/plugin_skeleton/your-dataset/tasks/schema.yaml +1 -1
  130. datapipeline/templates/plugin_skeleton/your-dataset/tasks/serve.test.yaml +1 -1
  131. datapipeline/templates/plugin_skeleton/your-dataset/tasks/serve.train.yaml +1 -25
  132. datapipeline/templates/plugin_skeleton/your-dataset/tasks/serve.val.yaml +1 -1
  133. datapipeline/templates/plugin_skeleton/your-interim-data-builder/dataset.yaml +9 -0
  134. datapipeline/templates/plugin_skeleton/your-interim-data-builder/postprocess.yaml +1 -0
  135. datapipeline/templates/plugin_skeleton/your-interim-data-builder/project.yaml +14 -0
  136. datapipeline/templates/plugin_skeleton/your-interim-data-builder/tasks/serve.all.yaml +8 -0
  137. datapipeline/templates/stubs/contracts/composed.yaml.j2 +10 -0
  138. datapipeline/templates/stubs/contracts/ingest.yaml.j2 +25 -0
  139. datapipeline/templates/stubs/dto.py.j2 +1 -1
  140. datapipeline/templates/stubs/loaders/basic.py.j2 +11 -0
  141. datapipeline/templates/stubs/mappers/composed.py.j2 +13 -0
  142. datapipeline/templates/stubs/mappers/ingest.py.j2 +17 -0
  143. datapipeline/templates/stubs/parser.py.j2 +4 -0
  144. datapipeline/templates/stubs/record.py.j2 +0 -1
  145. datapipeline/templates/stubs/source.yaml.j2 +1 -1
  146. datapipeline/transforms/debug/identity.py +34 -16
  147. datapipeline/transforms/debug/lint.py +14 -11
  148. datapipeline/transforms/feature/scaler.py +5 -12
  149. datapipeline/transforms/filter.py +73 -17
  150. datapipeline/transforms/interfaces.py +58 -0
  151. datapipeline/transforms/record/floor_time.py +10 -7
  152. datapipeline/transforms/record/lag.py +8 -10
  153. datapipeline/transforms/sequence.py +2 -3
  154. datapipeline/transforms/stream/dedupe.py +5 -7
  155. datapipeline/transforms/stream/ensure_ticks.py +39 -24
  156. datapipeline/transforms/stream/fill.py +34 -25
  157. datapipeline/transforms/stream/filter.py +25 -0
  158. datapipeline/transforms/stream/floor_time.py +16 -0
  159. datapipeline/transforms/stream/granularity.py +52 -30
  160. datapipeline/transforms/stream/lag.py +17 -0
  161. datapipeline/transforms/stream/rolling.py +72 -0
  162. datapipeline/transforms/utils.py +42 -10
  163. datapipeline/transforms/vector/drop/horizontal.py +0 -3
  164. datapipeline/transforms/vector/drop/orchestrator.py +0 -3
  165. datapipeline/transforms/vector/drop/vertical.py +0 -2
  166. datapipeline/transforms/vector/ensure_schema.py +0 -2
  167. datapipeline/utils/paths.py +0 -2
  168. datapipeline/utils/placeholders.py +0 -2
  169. datapipeline/utils/rich_compat.py +0 -3
  170. datapipeline/utils/window.py +0 -2
  171. jerry_thomas-2.0.0.dist-info/METADATA +282 -0
  172. jerry_thomas-2.0.0.dist-info/RECORD +264 -0
  173. {jerry_thomas-1.0.3.dist-info → jerry_thomas-2.0.0.dist-info}/WHEEL +1 -1
  174. {jerry_thomas-1.0.3.dist-info → jerry_thomas-2.0.0.dist-info}/entry_points.txt +7 -3
  175. datapipeline/services/scaffold/mappers.py +0 -55
  176. datapipeline/services/scaffold/source.py +0 -191
  177. datapipeline/templates/plugin_skeleton/example/contracts/time.ticks.hour_sin.yaml +0 -31
  178. datapipeline/templates/plugin_skeleton/example/contracts/time.ticks.linear.yaml +0 -30
  179. datapipeline/templates/plugin_skeleton/example/dataset.yaml +0 -18
  180. datapipeline/templates/plugin_skeleton/example/postprocess.yaml +0 -29
  181. datapipeline/templates/plugin_skeleton/example/project.yaml +0 -23
  182. datapipeline/templates/plugin_skeleton/example/tasks/metadata.yaml +0 -3
  183. datapipeline/templates/plugin_skeleton/example/tasks/scaler.yaml +0 -9
  184. datapipeline/templates/plugin_skeleton/example/tasks/schema.yaml +0 -2
  185. datapipeline/templates/plugin_skeleton/example/tasks/serve.test.yaml +0 -4
  186. datapipeline/templates/plugin_skeleton/example/tasks/serve.train.yaml +0 -28
  187. datapipeline/templates/plugin_skeleton/example/tasks/serve.val.yaml +0 -4
  188. datapipeline/templates/stubs/mapper.py.j2 +0 -22
  189. jerry_thomas-1.0.3.dist-info/METADATA +0 -827
  190. jerry_thomas-1.0.3.dist-info/RECORD +0 -198
  191. {jerry_thomas-1.0.3.dist-info → jerry_thomas-2.0.0.dist-info}/licenses/LICENSE +0 -0
  192. {jerry_thomas-1.0.3.dist-info → jerry_thomas-2.0.0.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,17 @@
1
1
  from pathlib import Path
2
2
 
3
+ from datapipeline.config.workspace import WorkspaceContext
4
+ from datapipeline.cli.workspace_utils import resolve_default_project_yaml
3
5
  from datapipeline.services.paths import pkg_root, resolve_base_pkg_dir
4
6
  from datapipeline.services.bootstrap.core import load_streams
7
+ from datapipeline.services.scaffold.discovery import (
8
+ list_domains,
9
+ list_dtos,
10
+ list_loaders,
11
+ list_mappers,
12
+ list_parsers,
13
+ )
14
+ from datapipeline.services.scaffold.utils import error_exit
5
15
 
6
16
 
7
17
  def _default_project_path(root_dir: Path) -> Path | None:
@@ -19,27 +29,34 @@ def _default_project_path(root_dir: Path) -> Path | None:
19
29
  return None
20
30
 
21
31
 
22
- def handle(subcmd: str) -> None:
32
+ def handle(subcmd: str, *, workspace: WorkspaceContext | None = None) -> None:
23
33
  root_dir, name, pyproject = pkg_root(None)
24
34
  if subcmd == "sources":
25
35
  # Discover sources by scanning sources_dir for YAML files
26
- proj_path = _default_project_path(root_dir)
36
+ proj_path = resolve_default_project_yaml(workspace) if workspace is not None else None
27
37
  if proj_path is None:
28
- print("[error] No project.yaml found under config/.")
29
- return
38
+ proj_path = _default_project_path(root_dir)
39
+ if proj_path is None:
40
+ error_exit("No project.yaml found under config/.")
30
41
  try:
31
42
  streams = load_streams(proj_path)
32
43
  except FileNotFoundError as exc:
33
- print(f"[error] {exc}")
34
- return
44
+ error_exit(str(exc))
35
45
  aliases = sorted(streams.raw.keys())
36
46
  for alias in aliases:
37
47
  print(alias)
38
48
  elif subcmd == "domains":
39
- base = resolve_base_pkg_dir(root_dir, name)
40
- dom_dir = base / "domains"
41
- if dom_dir.exists():
42
- names = sorted(p.name for p in dom_dir.iterdir()
43
- if p.is_dir() and (p / "model.py").exists())
44
- for k in names:
45
- print(k)
49
+ for k in list_domains():
50
+ print(k)
51
+ elif subcmd == "parsers":
52
+ for k in sorted(list_parsers().keys()):
53
+ print(k)
54
+ elif subcmd == "mappers":
55
+ for k in sorted(list_mappers().keys()):
56
+ print(k)
57
+ elif subcmd == "loaders":
58
+ for k in sorted(list_loaders().keys()):
59
+ print(k)
60
+ elif subcmd == "dtos":
61
+ for k in sorted(list_dtos().keys()):
62
+ print(k)
@@ -0,0 +1,11 @@
1
+ from pathlib import Path
2
+
3
+ from datapipeline.services.scaffold.loader import create_loader
4
+ from datapipeline.services.scaffold.utils import choose_name, status
5
+
6
+
7
+ def handle(name: str | None, *, plugin_root: Path | None = None) -> None:
8
+ if not name:
9
+ name = choose_name("Loader name", default="custom_loader")
10
+ ep = create_loader(name=name, root=plugin_root)
11
+ status("ok", f"Registered loader entry point '{ep}'.")
@@ -0,0 +1,82 @@
1
+ from pathlib import Path
2
+
3
+ from datapipeline.services.scaffold.discovery import list_domains, list_dtos
4
+ from datapipeline.services.scaffold.dto import create_dto
5
+ from datapipeline.services.scaffold.domain import create_domain
6
+ from datapipeline.services.scaffold.mapper import create_mapper
7
+ from datapipeline.services.scaffold.utils import (
8
+ choose_existing_or_create,
9
+ choose_name,
10
+ error_exit,
11
+ info,
12
+ status,
13
+ pick_from_menu,
14
+ pick_from_list,
15
+ )
16
+ from datapipeline.services.scaffold.layout import (
17
+ default_mapper_name,
18
+ LABEL_DTO_FOR_MAPPER,
19
+ LABEL_DOMAIN_TO_MAP,
20
+ LABEL_MAPPER_INPUT,
21
+ default_mapper_name_for_identity,
22
+ )
23
+
24
+
25
+ def handle(name: str | None, *, plugin_root: Path | None = None) -> str:
26
+ input_class = None
27
+ input_module = None
28
+
29
+ input_choice = pick_from_menu(
30
+ f"{LABEL_MAPPER_INPUT}:",
31
+ [
32
+ ("dto", "DTO (default)"),
33
+ ("identity", "Any"),
34
+ ],
35
+ )
36
+ info("Mapper output (select domain):")
37
+
38
+ dto_map = list_dtos(root=plugin_root)
39
+ if input_choice == "dto":
40
+ dto_class = choose_existing_or_create(
41
+ label=LABEL_DTO_FOR_MAPPER,
42
+ existing=sorted(dto_map.keys()),
43
+ create_label="Create new DTO",
44
+ create_fn=create_dto,
45
+ prompt_new="DTO class name",
46
+ root=plugin_root,
47
+ )
48
+ dto_module = list_dtos(root=plugin_root).get(dto_class)
49
+ if not dto_module:
50
+ error_exit("Failed to resolve DTO module.")
51
+ input_class = dto_class
52
+ input_module = dto_module
53
+ else:
54
+ input_module = "typing"
55
+ input_class = "Any"
56
+
57
+ domains = list_domains(root=plugin_root)
58
+ domain = choose_existing_or_create(
59
+ label=LABEL_DOMAIN_TO_MAP,
60
+ existing=domains,
61
+ create_label="Create new domain",
62
+ create_fn=lambda name, root: create_domain(domain=name, root=root),
63
+ prompt_new="Domain name",
64
+ root=plugin_root,
65
+ )
66
+
67
+ if not name:
68
+ if input_choice == "identity":
69
+ name = choose_name(
70
+ "Mapper name", default=default_mapper_name_for_identity(domain))
71
+ else:
72
+ name = choose_name(
73
+ "Mapper name", default=default_mapper_name(input_module, domain))
74
+
75
+ ep = create_mapper(
76
+ name=name,
77
+ input_class=input_class,
78
+ input_module=input_module,
79
+ domain=domain,
80
+ root=plugin_root,
81
+ )
82
+ return ep
@@ -0,0 +1,45 @@
1
+ from pathlib import Path
2
+
3
+ from datapipeline.services.scaffold.dto import create_dto
4
+ from datapipeline.services.scaffold.discovery import list_dtos
5
+ from datapipeline.services.scaffold.parser import create_parser
6
+ from datapipeline.services.scaffold.utils import (
7
+ choose_existing_or_create,
8
+ choose_name,
9
+ error_exit,
10
+ status,
11
+ )
12
+ from datapipeline.services.scaffold.layout import default_parser_name, LABEL_DTO_FOR_PARSER
13
+
14
+
15
+ def handle(
16
+ name: str | None,
17
+ *,
18
+ plugin_root: Path | None = None,
19
+ default_dto: str | None = None,
20
+ ) -> str:
21
+ dto_map = list_dtos(root=plugin_root)
22
+ dto_class = choose_existing_or_create(
23
+ label=LABEL_DTO_FOR_PARSER,
24
+ existing=sorted(dto_map.keys()),
25
+ create_label="Create new DTO",
26
+ create_fn=create_dto,
27
+ prompt_new="DTO class name",
28
+ root=plugin_root,
29
+ default_new=default_dto or (f"{name}DTO" if name else None),
30
+ )
31
+ dto_module = list_dtos(root=plugin_root).get(dto_class)
32
+ if not dto_module:
33
+ error_exit("Failed to resolve DTO module.")
34
+
35
+ if not name:
36
+ name = choose_name("Parser class name", default=default_parser_name(dto_class))
37
+
38
+ ep = create_parser(
39
+ name=name,
40
+ dto_class=dto_class,
41
+ dto_module=dto_module,
42
+ root=plugin_root,
43
+ )
44
+ status("ok", f"Registered parser entry point '{ep}'.")
45
+ return ep
@@ -1,5 +1,3 @@
1
- from __future__ import annotations
2
-
3
1
  import logging
4
2
  from pathlib import Path
5
3
  from typing import Iterator, List, NamedTuple, Optional, Sequence
@@ -93,7 +91,7 @@ def determine_preview_stage(
93
91
  return None, None
94
92
  stages.append(cfg_stage)
95
93
 
96
- if not stages or any(stage > 5 for stage in stages):
94
+ if not stages or any(stage > 6 for stage in stages):
97
95
  return None, None
98
96
 
99
97
  if len(set(stages)) == 1:
@@ -1,5 +1,3 @@
1
- from __future__ import annotations
2
-
3
1
  import logging
4
2
  import time
5
3
  from itertools import islice
@@ -70,7 +68,7 @@ def report_serve(target: OutputTarget, count: int) -> None:
70
68
 
71
69
 
72
70
  def _is_full_pipeline_stage(stage: int | None) -> bool:
73
- return stage is None or stage >= 6
71
+ return stage is None or stage >= 7
74
72
 
75
73
 
76
74
  def serve_with_runtime(
@@ -99,12 +97,12 @@ def serve_with_runtime(
99
97
  run_status = "success"
100
98
  return
101
99
 
102
- rectangular = stage is None or stage > 5
100
+ rectangular = stage is None or stage > 6
103
101
 
104
- if stage is not None and stage <= 5:
102
+ if stage is not None and stage <= 6:
105
103
  if target.payload != "sample":
106
104
  logger.warning(
107
- "Ignoring payload '%s' for stage %s preview; preview outputs stream raw records.",
105
+ "Ignoring payload '%s' for stage %s preview; preview outputs record/feature streams.",
108
106
  target.payload,
109
107
  stage,
110
108
  )
@@ -129,7 +127,7 @@ def serve_with_runtime(
129
127
  rectangular=rectangular,
130
128
  )
131
129
 
132
- if stage in (None, 7):
130
+ if stage in (None, 8):
133
131
  vectors = post_process(context, vectors)
134
132
  if stage is None:
135
133
  vectors = apply_split_stage(runtime, vectors)
@@ -2,7 +2,19 @@ from pathlib import Path
2
2
 
3
3
  from datapipeline.config.workspace import WorkspaceContext
4
4
  from datapipeline.cli.workspace_utils import resolve_default_project_yaml
5
- from datapipeline.services.scaffold.source import create_source
5
+ from datapipeline.services.scaffold.source_yaml import (
6
+ create_source_yaml,
7
+ default_loader_config,
8
+ )
9
+ from datapipeline.services.scaffold.discovery import list_loaders, list_parsers
10
+ from datapipeline.services.scaffold.utils import (
11
+ error_exit,
12
+ info,
13
+ choose_name,
14
+ pick_from_menu,
15
+ prompt_required,
16
+ )
17
+ import sys
6
18
 
7
19
 
8
20
  def handle(
@@ -13,11 +25,13 @@ def handle(
13
25
  format: str | None = None,
14
26
  *,
15
27
  identity: bool = False,
28
+ loader: str | None = None,
29
+ parser: str | None = None,
16
30
  alias: str | None = None,
17
31
  plugin_root: Path | None = None,
18
32
  workspace: WorkspaceContext | None = None,
19
33
  ) -> None:
20
- if subcmd in {"create", "add"}:
34
+ if subcmd == "create":
21
35
  # Allow: positional provider dataset, --provider/--dataset, --alias, or provider as 'prov.ds'
22
36
  if (not provider or not dataset):
23
37
  # Try alias flag first
@@ -26,33 +40,107 @@ def handle(
26
40
  if len(parts) == 2 and all(parts):
27
41
  provider, dataset = parts[0], parts[1]
28
42
  else:
29
- print("[error] Alias must be 'provider.dataset'")
30
- raise SystemExit(2)
43
+ error_exit("Alias must be 'provider.dataset'")
31
44
  # Try provider passed as 'prov.ds' positional/flag
32
45
  elif provider and ("." in provider) and not dataset:
33
46
  parts = provider.split(".", 1)
34
47
  if len(parts) == 2 and all(parts):
35
48
  provider, dataset = parts[0], parts[1]
36
49
  else:
37
- print("[error] Source must be specified as '<provider> <dataset>' or '<provider>.<dataset>'")
38
- raise SystemExit(2)
50
+ error_exit("Source must be specified as '<provider> <dataset>' or '<provider>.<dataset>'")
39
51
 
40
52
  if not provider or not dataset:
41
- print("[error] Source requires '<provider> <dataset>' (or -a/--alias provider.dataset)")
42
- raise SystemExit(2)
43
- if not transport:
44
- print("[error] --transport is required (fs|http|synthetic)")
45
- raise SystemExit(2)
46
- if transport in {"fs", "http"} and not format:
47
- print("[error] --format is required for fs/http transports (fs: csv|json|json-lines|pickle, http: csv|json|json-lines)")
48
- raise SystemExit(2)
53
+ source_id = prompt_required("Source id (provider.dataset)")
54
+ parts = source_id.split(".", 1)
55
+ if len(parts) == 2 and all(parts):
56
+ provider, dataset = parts[0], parts[1]
57
+ else:
58
+ error_exit("Source id must be in the form 'provider.dataset'")
59
+
60
+ # Loader selection: either explicit loader EP or built-in transport defaults
61
+ loader_ep: str | None = loader
62
+ loader_args: dict = {}
63
+ if not loader_ep:
64
+ if not transport:
65
+ known_loaders = list_loaders(root=plugin_root)
66
+ options = [
67
+ ("fs", "Built-in fs"),
68
+ ("http", "Built-in http"),
69
+ ("synthetic", "Built-in synthetic"),
70
+ ]
71
+ if known_loaders:
72
+ options.append(("existing", "Select existing loader"))
73
+ options.append(("custom", "Custom loader"))
74
+ choice = pick_from_menu("Loader:", options)
75
+ if choice in {"fs", "http", "synthetic"}:
76
+ transport = choice
77
+ elif choice == "existing":
78
+ loader_ep = pick_from_menu(
79
+ "Select loader entrypoint:",
80
+ [(k, k) for k in sorted(known_loaders.keys())],
81
+ )
82
+ elif choice == "custom":
83
+ loader_ep = prompt_required("Loader entrypoint")
84
+ if not loader_ep:
85
+ if transport in {"fs", "http"} and not format:
86
+ format_options = [
87
+ ("csv", "csv"),
88
+ ("json", "json"),
89
+ ("json-lines", "json-lines"),
90
+ ]
91
+ if transport == "fs":
92
+ format_options.append(("pickle", "pickle"))
93
+ format = pick_from_menu("Format:", format_options)
94
+ if not transport:
95
+ error_exit("--transport is required when no --loader is provided")
96
+ loader_ep, loader_args = default_loader_config(transport, format)
97
+
98
+ # Parser selection (no code generation)
99
+ if identity:
100
+ parser_ep = "identity"
101
+ elif parser:
102
+ parser_ep = parser
103
+ else:
104
+ interactive = sys.stdin.isatty()
105
+ if not interactive:
106
+ parser_ep = "identity"
107
+ else:
108
+ parsers = list_parsers(root=plugin_root)
109
+ if parsers:
110
+ choice = pick_from_menu(
111
+ "Parser:",
112
+ [
113
+ ("existing", "Select existing parser (default)"),
114
+ ("identity", "Identity parser"),
115
+ ("custom", "Custom parser"),
116
+ ],
117
+ )
118
+ if choice == "existing":
119
+ parser_ep = pick_from_menu(
120
+ "Select parser entrypoint:",
121
+ [(k, k) for k in sorted(parsers.keys())],
122
+ )
123
+ elif choice == "identity":
124
+ parser_ep = "identity"
125
+ else:
126
+ parser_ep = prompt_required("Parser entrypoint")
127
+ else:
128
+ choice = pick_from_menu(
129
+ "Parser:",
130
+ [
131
+ ("identity", "Identity parser (default)"),
132
+ ("custom", "Custom parser"),
133
+ ],
134
+ )
135
+ parser_ep = "identity" if choice == "identity" else prompt_required("Parser entrypoint")
136
+
49
137
  project_yaml = resolve_default_project_yaml(workspace)
50
- create_source(
138
+ create_source_yaml(
51
139
  provider=provider,
52
140
  dataset=dataset,
53
- transport=transport,
54
- format=format,
141
+ loader_ep=loader_ep,
142
+ loader_args=loader_args,
143
+ parser_ep=parser_ep,
55
144
  root=plugin_root,
56
- identity=identity,
57
145
  **({"project_yaml": project_yaml} if project_yaml is not None else {}),
58
146
  )