jerry-thomas 1.0.2__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (200) hide show
  1. datapipeline/analysis/vector/collector.py +0 -1
  2. datapipeline/build/tasks/config.py +0 -2
  3. datapipeline/build/tasks/metadata.py +0 -2
  4. datapipeline/build/tasks/scaler.py +0 -2
  5. datapipeline/build/tasks/schema.py +0 -2
  6. datapipeline/build/tasks/utils.py +0 -2
  7. datapipeline/cli/app.py +205 -89
  8. datapipeline/cli/commands/contract.py +153 -285
  9. datapipeline/cli/commands/demo.py +13 -0
  10. datapipeline/cli/commands/domain.py +4 -4
  11. datapipeline/cli/commands/dto.py +11 -0
  12. datapipeline/cli/commands/filter.py +2 -2
  13. datapipeline/cli/commands/inspect.py +0 -68
  14. datapipeline/cli/commands/list_.py +30 -13
  15. datapipeline/cli/commands/loader.py +11 -0
  16. datapipeline/cli/commands/mapper.py +82 -0
  17. datapipeline/cli/commands/parser.py +45 -0
  18. datapipeline/cli/commands/run_config.py +1 -3
  19. datapipeline/cli/commands/serve_pipeline.py +5 -7
  20. datapipeline/cli/commands/source.py +111 -18
  21. datapipeline/cli/commands/stream.py +286 -0
  22. datapipeline/cli/visuals/common.py +57 -7
  23. datapipeline/cli/visuals/labels.py +8 -41
  24. datapipeline/cli/visuals/sections.py +0 -2
  25. datapipeline/cli/visuals/sources_rich.py +8 -3
  26. datapipeline/cli/workspace_utils.py +22 -0
  27. datapipeline/config/context.py +0 -2
  28. datapipeline/config/dataset/dataset.py +1 -1
  29. datapipeline/config/dataset/feature.py +1 -0
  30. datapipeline/config/dataset/normalize.py +9 -4
  31. datapipeline/config/metadata.py +0 -2
  32. datapipeline/config/project.py +0 -2
  33. datapipeline/config/resolution.py +10 -2
  34. datapipeline/config/tasks.py +9 -9
  35. datapipeline/config/workspace.py +15 -0
  36. datapipeline/domain/feature.py +3 -0
  37. datapipeline/domain/record.py +7 -7
  38. datapipeline/domain/sample.py +0 -2
  39. datapipeline/domain/vector.py +6 -8
  40. datapipeline/integrations/ml/adapter.py +0 -2
  41. datapipeline/integrations/ml/pandas_support.py +0 -2
  42. datapipeline/integrations/ml/rows.py +0 -2
  43. datapipeline/integrations/ml/torch_support.py +0 -2
  44. datapipeline/io/output.py +0 -2
  45. datapipeline/io/serializers.py +26 -16
  46. datapipeline/mappers/synthetic/time.py +9 -2
  47. datapipeline/pipeline/artifacts.py +3 -5
  48. datapipeline/pipeline/observability.py +0 -2
  49. datapipeline/pipeline/pipelines.py +118 -34
  50. datapipeline/pipeline/stages.py +42 -17
  51. datapipeline/pipeline/utils/spool_cache.py +142 -0
  52. datapipeline/pipeline/utils/transform_utils.py +27 -2
  53. datapipeline/services/artifacts.py +1 -4
  54. datapipeline/services/constants.py +1 -0
  55. datapipeline/services/factories.py +4 -6
  56. datapipeline/services/project_paths.py +0 -2
  57. datapipeline/services/runs.py +0 -2
  58. datapipeline/services/scaffold/contract_yaml.py +76 -0
  59. datapipeline/services/scaffold/demo.py +141 -0
  60. datapipeline/services/scaffold/discovery.py +115 -0
  61. datapipeline/services/scaffold/domain.py +21 -13
  62. datapipeline/services/scaffold/dto.py +31 -0
  63. datapipeline/services/scaffold/filter.py +2 -1
  64. datapipeline/services/scaffold/layout.py +96 -0
  65. datapipeline/services/scaffold/loader.py +61 -0
  66. datapipeline/services/scaffold/mapper.py +116 -0
  67. datapipeline/services/scaffold/parser.py +56 -0
  68. datapipeline/services/scaffold/plugin.py +14 -2
  69. datapipeline/services/scaffold/source_yaml.py +91 -0
  70. datapipeline/services/scaffold/stream_plan.py +110 -0
  71. datapipeline/services/scaffold/utils.py +187 -0
  72. datapipeline/sources/data_loader.py +0 -2
  73. datapipeline/sources/decoders.py +49 -8
  74. datapipeline/sources/factory.py +9 -6
  75. datapipeline/sources/foreach.py +166 -0
  76. datapipeline/sources/synthetic/time/parser.py +1 -1
  77. datapipeline/sources/transports.py +10 -4
  78. datapipeline/templates/demo_skeleton/demo/contracts/equity.ohlcv.yaml +33 -0
  79. datapipeline/templates/demo_skeleton/demo/contracts/time.ticks.hour_sin.yaml +22 -0
  80. datapipeline/templates/demo_skeleton/demo/contracts/time.ticks.linear.yaml +22 -0
  81. datapipeline/templates/demo_skeleton/demo/data/APPL.jsonl +19 -0
  82. datapipeline/templates/demo_skeleton/demo/data/MSFT.jsonl +19 -0
  83. datapipeline/templates/demo_skeleton/demo/dataset.yaml +19 -0
  84. datapipeline/templates/demo_skeleton/demo/postprocess.yaml +19 -0
  85. datapipeline/templates/demo_skeleton/demo/project.yaml +19 -0
  86. datapipeline/templates/demo_skeleton/demo/sources/sandbox.ohlcv.yaml +17 -0
  87. datapipeline/templates/{plugin_skeleton/example → demo_skeleton/demo}/sources/synthetic.ticks.yaml +1 -1
  88. datapipeline/templates/demo_skeleton/demo/tasks/metadata.yaml +2 -0
  89. datapipeline/templates/demo_skeleton/demo/tasks/scaler.yaml +3 -0
  90. datapipeline/templates/demo_skeleton/demo/tasks/schema.yaml +2 -0
  91. datapipeline/templates/demo_skeleton/demo/tasks/serve.test.yaml +4 -0
  92. datapipeline/templates/demo_skeleton/demo/tasks/serve.train.yaml +4 -0
  93. datapipeline/templates/demo_skeleton/demo/tasks/serve.val.yaml +4 -0
  94. datapipeline/templates/demo_skeleton/scripts/run_dataframe.py +20 -0
  95. datapipeline/templates/demo_skeleton/scripts/run_torch.py +23 -0
  96. datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/__init__.py +0 -0
  97. datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/domains/equity/__init__.py +0 -0
  98. datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/domains/equity/model.py +18 -0
  99. datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/dtos/__init__.py +0 -0
  100. datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/dtos/sandbox_ohlcv_dto.py +14 -0
  101. datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/mappers/__init__.py +0 -0
  102. datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/mappers/map_sandbox_ohlcv_dto_to_equity.py +26 -0
  103. datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/parsers/__init__.py +0 -0
  104. datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/parsers/sandbox_ohlcv_dto_parser.py +46 -0
  105. datapipeline/templates/plugin_skeleton/README.md +57 -136
  106. datapipeline/templates/plugin_skeleton/jerry.yaml +12 -24
  107. datapipeline/templates/plugin_skeleton/reference/jerry.yaml +28 -0
  108. datapipeline/templates/plugin_skeleton/reference/reference/contracts/composed.reference.yaml +29 -0
  109. datapipeline/templates/plugin_skeleton/reference/reference/contracts/ingest.reference.yaml +31 -0
  110. datapipeline/templates/plugin_skeleton/reference/reference/contracts/overview.reference.yaml +34 -0
  111. datapipeline/templates/plugin_skeleton/reference/reference/dataset.yaml +29 -0
  112. datapipeline/templates/plugin_skeleton/reference/reference/postprocess.yaml +25 -0
  113. datapipeline/templates/plugin_skeleton/reference/reference/project.yaml +32 -0
  114. datapipeline/templates/plugin_skeleton/reference/reference/sources/foreach.http.reference.yaml +24 -0
  115. datapipeline/templates/plugin_skeleton/reference/reference/sources/foreach.reference.yaml +21 -0
  116. datapipeline/templates/plugin_skeleton/reference/reference/sources/fs.reference.yaml +16 -0
  117. datapipeline/templates/plugin_skeleton/reference/reference/sources/http.reference.yaml +17 -0
  118. datapipeline/templates/plugin_skeleton/reference/reference/sources/overview.reference.yaml +18 -0
  119. datapipeline/templates/plugin_skeleton/reference/reference/sources/synthetic.reference.yaml +15 -0
  120. datapipeline/templates/plugin_skeleton/reference/reference/tasks/metadata.reference.yaml +11 -0
  121. datapipeline/templates/plugin_skeleton/reference/reference/tasks/scaler.reference.yaml +10 -0
  122. datapipeline/templates/plugin_skeleton/reference/reference/tasks/schema.reference.yaml +10 -0
  123. datapipeline/templates/plugin_skeleton/reference/reference/tasks/serve.reference.yaml +28 -0
  124. datapipeline/templates/plugin_skeleton/src/{{PACKAGE_NAME}}/domains/__init__.py +2 -0
  125. datapipeline/templates/plugin_skeleton/src/{{PACKAGE_NAME}}/dtos/__init__.py +0 -0
  126. datapipeline/templates/plugin_skeleton/src/{{PACKAGE_NAME}}/loaders/__init__.py +0 -0
  127. datapipeline/templates/plugin_skeleton/src/{{PACKAGE_NAME}}/mappers/__init__.py +1 -0
  128. datapipeline/templates/plugin_skeleton/src/{{PACKAGE_NAME}}/parsers/__init__.py +0 -0
  129. datapipeline/templates/plugin_skeleton/your-dataset/dataset.yaml +12 -11
  130. datapipeline/templates/plugin_skeleton/your-dataset/postprocess.yaml +4 -13
  131. datapipeline/templates/plugin_skeleton/your-dataset/project.yaml +6 -9
  132. datapipeline/templates/plugin_skeleton/your-dataset/tasks/metadata.yaml +1 -2
  133. datapipeline/templates/plugin_skeleton/your-dataset/tasks/scaler.yaml +1 -7
  134. datapipeline/templates/plugin_skeleton/your-dataset/tasks/schema.yaml +1 -1
  135. datapipeline/templates/plugin_skeleton/your-dataset/tasks/serve.test.yaml +1 -1
  136. datapipeline/templates/plugin_skeleton/your-dataset/tasks/serve.train.yaml +1 -25
  137. datapipeline/templates/plugin_skeleton/your-dataset/tasks/serve.val.yaml +1 -1
  138. datapipeline/templates/plugin_skeleton/your-interim-data-builder/dataset.yaml +9 -0
  139. datapipeline/templates/plugin_skeleton/your-interim-data-builder/postprocess.yaml +1 -0
  140. datapipeline/templates/plugin_skeleton/your-interim-data-builder/project.yaml +14 -0
  141. datapipeline/templates/plugin_skeleton/your-interim-data-builder/tasks/serve.all.yaml +8 -0
  142. datapipeline/templates/stubs/contracts/composed.yaml.j2 +10 -0
  143. datapipeline/templates/stubs/contracts/ingest.yaml.j2 +25 -0
  144. datapipeline/templates/stubs/dto.py.j2 +1 -1
  145. datapipeline/templates/stubs/loaders/basic.py.j2 +11 -0
  146. datapipeline/templates/stubs/mappers/composed.py.j2 +13 -0
  147. datapipeline/templates/stubs/mappers/ingest.py.j2 +17 -0
  148. datapipeline/templates/stubs/parser.py.j2 +4 -0
  149. datapipeline/templates/stubs/record.py.j2 +0 -1
  150. datapipeline/templates/stubs/source.yaml.j2 +1 -1
  151. datapipeline/transforms/debug/identity.py +34 -16
  152. datapipeline/transforms/debug/lint.py +14 -11
  153. datapipeline/transforms/feature/scaler.py +5 -12
  154. datapipeline/transforms/filter.py +73 -17
  155. datapipeline/transforms/interfaces.py +58 -0
  156. datapipeline/transforms/record/floor_time.py +10 -7
  157. datapipeline/transforms/record/lag.py +8 -10
  158. datapipeline/transforms/sequence.py +2 -3
  159. datapipeline/transforms/stream/dedupe.py +5 -7
  160. datapipeline/transforms/stream/ensure_ticks.py +39 -24
  161. datapipeline/transforms/stream/fill.py +34 -25
  162. datapipeline/transforms/stream/filter.py +25 -0
  163. datapipeline/transforms/stream/floor_time.py +16 -0
  164. datapipeline/transforms/stream/granularity.py +52 -30
  165. datapipeline/transforms/stream/lag.py +17 -0
  166. datapipeline/transforms/stream/rolling.py +72 -0
  167. datapipeline/transforms/utils.py +42 -10
  168. datapipeline/transforms/vector/drop/horizontal.py +0 -3
  169. datapipeline/transforms/vector/drop/orchestrator.py +0 -3
  170. datapipeline/transforms/vector/drop/vertical.py +0 -2
  171. datapipeline/transforms/vector/ensure_schema.py +0 -2
  172. datapipeline/utils/paths.py +0 -2
  173. datapipeline/utils/placeholders.py +0 -2
  174. datapipeline/utils/rich_compat.py +0 -3
  175. datapipeline/utils/window.py +0 -2
  176. jerry_thomas-2.0.0.dist-info/METADATA +282 -0
  177. jerry_thomas-2.0.0.dist-info/RECORD +264 -0
  178. {jerry_thomas-1.0.2.dist-info → jerry_thomas-2.0.0.dist-info}/WHEEL +1 -1
  179. {jerry_thomas-1.0.2.dist-info → jerry_thomas-2.0.0.dist-info}/entry_points.txt +8 -3
  180. datapipeline/services/scaffold/mappers.py +0 -55
  181. datapipeline/services/scaffold/source.py +0 -190
  182. datapipeline/templates/plugin_skeleton/example/contracts/time.ticks.hour_sin.yaml +0 -31
  183. datapipeline/templates/plugin_skeleton/example/contracts/time.ticks.linear.yaml +0 -30
  184. datapipeline/templates/plugin_skeleton/example/dataset.yaml +0 -18
  185. datapipeline/templates/plugin_skeleton/example/postprocess.yaml +0 -29
  186. datapipeline/templates/plugin_skeleton/example/project.yaml +0 -23
  187. datapipeline/templates/plugin_skeleton/example/tasks/metadata.yaml +0 -3
  188. datapipeline/templates/plugin_skeleton/example/tasks/scaler.yaml +0 -9
  189. datapipeline/templates/plugin_skeleton/example/tasks/schema.yaml +0 -2
  190. datapipeline/templates/plugin_skeleton/example/tasks/serve.test.yaml +0 -4
  191. datapipeline/templates/plugin_skeleton/example/tasks/serve.train.yaml +0 -28
  192. datapipeline/templates/plugin_skeleton/example/tasks/serve.val.yaml +0 -4
  193. datapipeline/templates/plugin_skeleton/your-dataset/contracts/time.ticks.hour_sin.yaml +0 -31
  194. datapipeline/templates/plugin_skeleton/your-dataset/contracts/time.ticks.linear.yaml +0 -30
  195. datapipeline/templates/plugin_skeleton/your-dataset/sources/synthetic.ticks.yaml +0 -12
  196. datapipeline/templates/stubs/mapper.py.j2 +0 -22
  197. jerry_thomas-1.0.2.dist-info/METADATA +0 -825
  198. jerry_thomas-1.0.2.dist-info/RECORD +0 -199
  199. {jerry_thomas-1.0.2.dist-info → jerry_thomas-2.0.0.dist-info}/licenses/LICENSE +0 -0
  200. {jerry_thomas-1.0.2.dist-info → jerry_thomas-2.0.0.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,3 @@
1
- from __future__ import annotations
2
1
  from collections import Counter, defaultdict
3
2
  from typing import Any, Hashable, Iterable, Literal
4
3
  from datapipeline.transforms.vector_utils import base_id as _base_id
@@ -1,5 +1,3 @@
1
- from __future__ import annotations
2
-
3
1
  import hashlib
4
2
  from pathlib import Path
5
3
  from typing import Iterable
@@ -1,5 +1,3 @@
1
- from __future__ import annotations
2
-
3
1
  import json
4
2
  from collections import defaultdict
5
3
  from datetime import datetime, timezone
@@ -1,5 +1,3 @@
1
- from __future__ import annotations
2
-
3
1
  from pathlib import Path
4
2
  from typing import Dict, Iterator, Tuple
5
3
 
@@ -1,5 +1,3 @@
1
- from __future__ import annotations
2
-
3
1
  import json
4
2
  from datetime import datetime, timezone
5
3
  from pathlib import Path
@@ -1,5 +1,3 @@
1
- from __future__ import annotations
2
-
3
1
  from collections import Counter, OrderedDict
4
2
  from datetime import datetime
5
3
  from typing import Any
datapipeline/cli/app.py CHANGED
@@ -5,9 +5,15 @@ from typing import Optional, Tuple
5
5
 
6
6
  from datapipeline.cli.commands.run import handle_serve
7
7
  from datapipeline.cli.commands.plugin import bar as handle_bar
8
+ from datapipeline.cli.commands.demo import handle as handle_demo
8
9
  from datapipeline.cli.commands.source import handle as handle_source
9
10
  from datapipeline.cli.commands.domain import handle as handle_domain
10
11
  from datapipeline.cli.commands.contract import handle as handle_contract
12
+ from datapipeline.cli.commands.dto import handle as handle_dto
13
+ from datapipeline.cli.commands.parser import handle as handle_parser
14
+ from datapipeline.cli.commands.mapper import handle as handle_mapper
15
+ from datapipeline.cli.commands.loader import handle as handle_loader
16
+ from datapipeline.cli.commands.stream import handle as handle_stream
11
17
  from datapipeline.cli.commands.list_ import handle as handle_list
12
18
  from datapipeline.cli.commands.filter import handle as handle_filter
13
19
  from datapipeline.cli.commands.inspect import (
@@ -31,15 +37,9 @@ def _dataset_to_project_path(
31
37
  """Resolve a dataset selector (alias, folder, or file) into a project.yaml path."""
32
38
  # 1) Alias via jerry.yaml datasets (wins over local folders with same name)
33
39
  if workspace is not None:
34
- datasets = getattr(workspace.config, "datasets", {}) or {}
35
- raw = datasets.get(dataset)
36
- if raw:
37
- base = workspace.root
38
- candidate = Path(raw)
39
- candidate = candidate if candidate.is_absolute() else (base / candidate)
40
- if candidate.is_dir():
41
- candidate = candidate / "project.yaml"
42
- return str(candidate.resolve())
40
+ resolved = workspace.resolve_dataset_alias(dataset)
41
+ if resolved is not None:
42
+ return str(resolved)
43
43
 
44
44
  # 2) Direct file path
45
45
  path = Path(dataset)
@@ -166,9 +166,9 @@ def main() -> None:
166
166
  "--stage",
167
167
  "-s",
168
168
  type=int,
169
- choices=range(0, 8),
169
+ choices=range(0, 9),
170
170
  default=None,
171
- help="preview a specific pipeline stage (0-5 feature stages, 6 assembled vectors, 7 transformed vectors)",
171
+ help="preview a specific pipeline stage (0-6 record/feature stages, 7 assembled vectors, 8 transformed vectors)",
172
172
  )
173
173
  p_serve.add_argument(
174
174
  "--visuals",
@@ -191,7 +191,7 @@ def main() -> None:
191
191
  # build (materialize artifacts)
192
192
  p_build = sub.add_parser(
193
193
  "build",
194
- help="materialize project artifacts (expected ids, hashes, etc.)",
194
+ help="materialize project artifacts (schema, hashes, etc.)",
195
195
  parents=[common],
196
196
  )
197
197
  p_build.add_argument(
@@ -223,22 +223,56 @@ def main() -> None:
223
223
  help="progress display: auto (spinner unless DEBUG), spinner, bars, or off",
224
224
  )
225
225
 
226
+ # demo (optional demo dataset)
227
+ p_demo = sub.add_parser(
228
+ "demo",
229
+ help="create an optional demo dataset inside a plugin",
230
+ parents=[common],
231
+ )
232
+ demo_sub = p_demo.add_subparsers(dest="demo_cmd", required=True)
233
+ demo_init = demo_sub.add_parser(
234
+ "init",
235
+ help="create a standalone demo plugin named demo",
236
+ parents=[common],
237
+ )
238
+ demo_init.add_argument(
239
+ "--out",
240
+ "-o",
241
+ help="override parent directory (demo will be created inside)",
242
+ )
243
+
244
+ # list
245
+ p_list = sub.add_parser(
246
+ "list",
247
+ help="list known resources",
248
+ parents=[common],
249
+ )
250
+ list_sub = p_list.add_subparsers(dest="list_cmd", required=True)
251
+ list_sub.add_parser("sources", help="list sources")
252
+ list_sub.add_parser("domains", help="list domains")
253
+ list_sub.add_parser("parsers", help="list parsers")
254
+ list_sub.add_parser("dtos", help="list DTOs")
255
+ list_sub.add_parser("mappers", help="list mappers")
256
+ list_sub.add_parser("loaders", help="list loaders")
257
+
226
258
  # source
227
259
  p_source = sub.add_parser(
228
260
  "source",
229
- help="add or list raw sources",
261
+ help="create or list raw sources",
230
262
  parents=[common],
231
263
  )
232
264
  source_sub = p_source.add_subparsers(dest="source_cmd", required=True)
233
- p_source_add = source_sub.add_parser(
234
- "add",
235
- help="create a provider+dataset source",
265
+ p_source_create = source_sub.add_parser(
266
+ "create",
267
+ help="create a provider+dataset source (yaml only)",
236
268
  description=(
237
- "Scaffold a source using transport + format.\n\n"
269
+ "Create a source YAML using transport + format or a loader entrypoint.\n\n"
238
270
  "Usage:\n"
239
- " jerry source add <provider> <dataset> -t fs -f csv\n"
240
- " jerry source add <provider>.<dataset> -t http -f json\n"
241
- " jerry source add -p <provider> -d <dataset> -t synthetic\n\n"
271
+ " jerry source create <provider>.<dataset> -t fs -f csv\n"
272
+ " jerry source create <provider>.<dataset> -t http -f json\n"
273
+ " jerry source create <provider>.<dataset> -t synthetic\n\n"
274
+ " jerry source create <provider> <dataset> --loader mypkg.loaders.demo:Loader\n"
275
+ " jerry source create <provider> <dataset> --parser myparser\n\n"
242
276
  "Examples:\n"
243
277
  " fs CSV: -t fs -f csv\n"
244
278
  " fs NDJSON: -t fs -f json-lines\n"
@@ -250,38 +284,47 @@ def main() -> None:
250
284
  # Support simple positionals, plus flags for compatibility
251
285
  # Allow either positionals or flags. Use distinct dest names for flags
252
286
  # to avoid ambiguity when both forms are present in some environments.
253
- p_source_add.add_argument("provider", nargs="?", help="provider name")
254
- p_source_add.add_argument("dataset", nargs="?", help="dataset slug")
255
- p_source_add.add_argument("--provider", "-p", dest="provider_opt", metavar="PROVIDER", help="provider name")
256
- p_source_add.add_argument("--dataset", "-d", dest="dataset_opt", metavar="DATASET", help="dataset slug")
257
- p_source_add.add_argument("--alias", "-a", help="provider.dataset alias")
258
- p_source_add.add_argument(
287
+ for p in (p_source_create,):
288
+ p.add_argument("provider", nargs="?", help="provider name")
289
+ p.add_argument("dataset", nargs="?", help="dataset slug")
290
+ p.add_argument("--provider", "-p", dest="provider_opt", metavar="PROVIDER", help="provider name")
291
+ p.add_argument("--dataset", "-d", dest="dataset_opt", metavar="DATASET", help="dataset slug")
292
+ p.add_argument("--alias", "-a", help="provider.dataset alias")
293
+ p.add_argument(
259
294
  "--transport", "-t",
260
295
  choices=["fs", "http", "synthetic"],
261
- required=True,
296
+ required=False,
262
297
  help="how data is accessed: fs/http/synthetic",
263
- )
264
- p_source_add.add_argument(
265
- "--format", "-f",
266
- choices=["csv", "json", "json-lines", "pickle"],
267
- help="data format for fs/http transports (ignored otherwise)",
268
- )
269
- p_source_add.add_argument(
270
- "--identity",
271
- action="store_true",
272
- help="use the built-in identity parser (skips DTO/parser scaffolding)",
273
- )
298
+ )
299
+ p.add_argument(
300
+ "--format", "-f",
301
+ choices=["csv", "json", "json-lines", "pickle"],
302
+ help="data format for fs/http transports (ignored otherwise)",
303
+ )
304
+ p.add_argument(
305
+ "--loader",
306
+ help="loader entrypoint (overrides --transport/--format)",
307
+ )
308
+ p.add_argument(
309
+ "--parser",
310
+ help="parser entrypoint (defaults to identity)",
311
+ )
312
+ p.add_argument(
313
+ "--identity",
314
+ action="store_true",
315
+ help="use the built-in identity parser (alias for --parser identity)",
316
+ )
274
317
  source_sub.add_parser("list", help="list known sources")
275
318
 
276
319
  # domain
277
320
  p_domain = sub.add_parser(
278
321
  "domain",
279
- help="add or list domains",
322
+ help="create or list domains",
280
323
  parents=[common],
281
324
  )
282
325
  domain_sub = p_domain.add_subparsers(dest="domain_cmd", required=True)
283
326
  p_domain_add = domain_sub.add_parser(
284
- "add",
327
+ "create",
285
328
  help="create a domain",
286
329
  description="Create a time-aware domain package rooted in TemporalRecord.",
287
330
  )
@@ -292,13 +335,64 @@ def main() -> None:
292
335
  )
293
336
  domain_sub.add_parser("list", help="list known domains")
294
337
 
338
+ # dto
339
+ p_dto = sub.add_parser(
340
+ "dto",
341
+ help="create DTOs",
342
+ parents=[common],
343
+ )
344
+ dto_sub = p_dto.add_subparsers(dest="dto_cmd", required=True)
345
+ p_dto_create = dto_sub.add_parser("create", help="create a DTO")
346
+ p_dto_create.add_argument("name", nargs="?", help="DTO class name")
347
+
348
+ # parser
349
+ p_parser = sub.add_parser(
350
+ "parser",
351
+ help="create parsers",
352
+ parents=[common],
353
+ )
354
+ parser_sub = p_parser.add_subparsers(dest="parser_cmd", required=True)
355
+ p_parser_create = parser_sub.add_parser("create", help="create a parser")
356
+ p_parser_create.add_argument("name", nargs="?", help="Parser class name")
357
+
358
+ # mapper
359
+ p_mapper = sub.add_parser(
360
+ "mapper",
361
+ help="create mappers",
362
+ parents=[common],
363
+ )
364
+ mapper_sub = p_mapper.add_subparsers(dest="mapper_cmd", required=True)
365
+ p_mapper_create = mapper_sub.add_parser("create", help="create a mapper")
366
+ p_mapper_create.add_argument("name", nargs="?", help="Mapper function name")
367
+
368
+ # loader
369
+ p_loader = sub.add_parser(
370
+ "loader",
371
+ help="create loaders",
372
+ parents=[common],
373
+ )
374
+ loader_sub = p_loader.add_subparsers(dest="loader_cmd", required=True)
375
+ p_loader_create = loader_sub.add_parser("create", help="create a loader")
376
+ p_loader_create.add_argument("name", nargs="?", help="Loader name")
377
+
378
+ # inflow
379
+ p_inflow = sub.add_parser(
380
+ "inflow",
381
+ help="create end-to-end inflow scaffolds",
382
+ parents=[common],
383
+ )
384
+ inflow_sub = p_inflow.add_subparsers(dest="inflow_cmd", required=True)
385
+ inflow_sub.add_parser("create", help="create an inflow")
386
+
295
387
  # contract (interactive: ingest or composed)
296
388
  p_contract = sub.add_parser(
297
389
  "contract",
298
390
  help="manage stream contracts (ingest or composed)",
299
391
  parents=[common],
300
392
  )
301
- p_contract.add_argument(
393
+ contract_sub = p_contract.add_subparsers(dest="contract_cmd", required=True)
394
+ p_contract_create = contract_sub.add_parser("create", help="create a contract")
395
+ p_contract_create.add_argument(
302
396
  "--identity",
303
397
  action="store_true",
304
398
  help="use built-in identity mapper (skip mapper scaffolding)",
@@ -467,24 +561,6 @@ def main() -> None:
467
561
  help="partitions manifest path (defaults to build/partitions.json)",
468
562
  )
469
563
 
470
- # Expected IDs (newline list)
471
- p_inspect_expected = inspect_sub.add_parser(
472
- "expected",
473
- help="discover full feature ids and write a newline list",
474
- parents=[inspect_common],
475
- )
476
- p_inspect_expected.add_argument(
477
- "--project",
478
- "-p",
479
- default=None,
480
- help="path to project.yaml",
481
- )
482
- p_inspect_expected.add_argument(
483
- "--output",
484
- "-o",
485
- default=None,
486
- help="expected ids output path (defaults to build/datasets/<name>/expected.txt)",
487
- )
488
564
 
489
565
  workspace_context = load_workspace_context(Path.cwd())
490
566
  args = parser.parse_args()
@@ -614,50 +690,83 @@ def main() -> None:
614
690
  log_level=base_level,
615
691
  workspace=workspace_context,
616
692
  )
617
- elif subcmd == "expected":
618
- from datapipeline.cli.commands.inspect import expected as handle_inspect_expected
619
- handle_inspect_expected(
620
- project=args.project,
621
- output=getattr(args, "output", None),
622
- visuals=inspect_visual_provider,
623
- progress=inspect_progress_style,
624
- log_level=base_level,
625
- workspace=workspace_context,
626
- )
627
693
  return
628
694
 
629
695
  if args.cmd == "source":
630
696
  if args.source_cmd == "list":
631
- handle_list(subcmd="sources")
632
- else:
633
- # Merge positionals and flags for provider/dataset
634
- handle_source(
635
- subcmd="add",
636
- provider=(getattr(args, "provider", None) or getattr(args, "provider_opt", None)),
637
- dataset=(getattr(args, "dataset", None) or getattr(args, "dataset_opt", None)),
638
- transport=getattr(args, "transport", None),
639
- format=getattr(args, "format", None),
640
- alias=getattr(args, "alias", None),
641
- identity=getattr(args, "identity", False),
642
- plugin_root=plugin_root,
643
- )
697
+ handle_list(subcmd="sources", workspace=workspace_context)
698
+ return
699
+ # Merge positionals and flags for provider/dataset
700
+ handle_source(
701
+ subcmd=args.source_cmd,
702
+ provider=(getattr(args, "provider", None) or getattr(args, "provider_opt", None)),
703
+ dataset=(getattr(args, "dataset", None) or getattr(args, "dataset_opt", None)),
704
+ transport=getattr(args, "transport", None),
705
+ format=getattr(args, "format", None),
706
+ alias=getattr(args, "alias", None),
707
+ identity=getattr(args, "identity", False),
708
+ loader=getattr(args, "loader", None),
709
+ parser=getattr(args, "parser", None),
710
+ plugin_root=plugin_root,
711
+ workspace=workspace_context,
712
+ )
713
+ return
714
+
715
+ if args.cmd == "list":
716
+ handle_list(subcmd=args.list_cmd, workspace=workspace_context)
644
717
  return
645
718
 
646
719
  if args.cmd == "domain":
647
720
  if args.domain_cmd == "list":
648
721
  handle_list(subcmd="domains")
649
- else:
650
- handle_domain(
651
- subcmd="add",
652
- domain=getattr(args, "domain", None),
653
- plugin_root=plugin_root,
654
- )
722
+ return
723
+ handle_domain(
724
+ subcmd=args.domain_cmd,
725
+ domain=getattr(args, "domain", None),
726
+ plugin_root=plugin_root,
727
+ )
728
+ return
729
+
730
+ if args.cmd == "dto":
731
+ handle_dto(
732
+ name=getattr(args, "name", None),
733
+ plugin_root=plugin_root,
734
+ )
735
+ return
736
+
737
+ if args.cmd == "parser":
738
+ handle_parser(
739
+ name=getattr(args, "name", None),
740
+ plugin_root=plugin_root,
741
+ )
742
+ return
743
+
744
+ if args.cmd == "mapper":
745
+ handle_mapper(
746
+ name=getattr(args, "name", None),
747
+ plugin_root=plugin_root,
748
+ )
749
+ return
750
+
751
+ if args.cmd == "loader":
752
+ handle_loader(
753
+ name=getattr(args, "name", None),
754
+ plugin_root=plugin_root,
755
+ )
756
+ return
757
+
758
+ if args.cmd == "inflow":
759
+ handle_stream(
760
+ plugin_root=plugin_root,
761
+ workspace=workspace_context,
762
+ )
655
763
  return
656
764
 
657
765
  if args.cmd == "contract":
658
766
  handle_contract(
659
767
  plugin_root=plugin_root,
660
- use_identity=args.identity,
768
+ use_identity=getattr(args, "identity", False),
769
+ workspace=workspace_context,
661
770
  )
662
771
  return
663
772
 
@@ -669,6 +778,13 @@ def main() -> None:
669
778
  )
670
779
  return
671
780
 
781
+ if args.cmd == "demo":
782
+ handle_demo(
783
+ subcmd=args.demo_cmd,
784
+ out=getattr(args, "out", None),
785
+ )
786
+ return
787
+
672
788
  if args.cmd == "filter":
673
789
  handle_filter(subcmd=args.filter_cmd, name=getattr(args, "name", None))
674
790
  return