jerry-thomas 0.3.0__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (164) hide show
  1. datapipeline/analysis/vector/collector.py +120 -17
  2. datapipeline/analysis/vector/matrix.py +33 -8
  3. datapipeline/analysis/vector/report.py +162 -32
  4. datapipeline/build/tasks/__init__.py +11 -0
  5. datapipeline/build/tasks/config.py +74 -0
  6. datapipeline/build/tasks/metadata.py +170 -0
  7. datapipeline/build/tasks/scaler.py +73 -0
  8. datapipeline/build/tasks/schema.py +60 -0
  9. datapipeline/build/tasks/utils.py +169 -0
  10. datapipeline/cli/app.py +304 -127
  11. datapipeline/cli/commands/build.py +240 -16
  12. datapipeline/cli/commands/contract.py +367 -0
  13. datapipeline/cli/commands/domain.py +8 -3
  14. datapipeline/cli/commands/inspect.py +401 -149
  15. datapipeline/cli/commands/list_.py +30 -7
  16. datapipeline/cli/commands/plugin.py +1 -1
  17. datapipeline/cli/commands/run.py +227 -241
  18. datapipeline/cli/commands/run_config.py +101 -0
  19. datapipeline/cli/commands/serve_pipeline.py +156 -0
  20. datapipeline/cli/commands/source.py +44 -8
  21. datapipeline/cli/visuals/__init__.py +4 -2
  22. datapipeline/cli/visuals/common.py +239 -0
  23. datapipeline/cli/visuals/labels.py +15 -15
  24. datapipeline/cli/visuals/runner.py +66 -0
  25. datapipeline/cli/visuals/sections.py +20 -0
  26. datapipeline/cli/visuals/sources.py +132 -119
  27. datapipeline/cli/visuals/sources_basic.py +260 -0
  28. datapipeline/cli/visuals/sources_off.py +76 -0
  29. datapipeline/cli/visuals/sources_rich.py +414 -0
  30. datapipeline/config/catalog.py +37 -3
  31. datapipeline/config/context.py +214 -0
  32. datapipeline/config/dataset/loader.py +21 -4
  33. datapipeline/config/dataset/normalize.py +4 -4
  34. datapipeline/config/metadata.py +43 -0
  35. datapipeline/config/postprocess.py +2 -2
  36. datapipeline/config/project.py +3 -2
  37. datapipeline/config/resolution.py +129 -0
  38. datapipeline/config/tasks.py +309 -0
  39. datapipeline/config/workspace.py +155 -0
  40. datapipeline/domain/__init__.py +12 -0
  41. datapipeline/domain/record.py +11 -0
  42. datapipeline/domain/sample.py +54 -0
  43. datapipeline/integrations/ml/adapter.py +34 -20
  44. datapipeline/integrations/ml/pandas_support.py +0 -2
  45. datapipeline/integrations/ml/rows.py +1 -6
  46. datapipeline/integrations/ml/torch_support.py +1 -3
  47. datapipeline/io/factory.py +112 -0
  48. datapipeline/io/output.py +132 -0
  49. datapipeline/io/protocols.py +21 -0
  50. datapipeline/io/serializers.py +219 -0
  51. datapipeline/io/sinks/__init__.py +23 -0
  52. datapipeline/io/sinks/base.py +2 -0
  53. datapipeline/io/sinks/files.py +79 -0
  54. datapipeline/io/sinks/rich.py +57 -0
  55. datapipeline/io/sinks/stdout.py +18 -0
  56. datapipeline/io/writers/__init__.py +14 -0
  57. datapipeline/io/writers/base.py +28 -0
  58. datapipeline/io/writers/csv_writer.py +25 -0
  59. datapipeline/io/writers/jsonl.py +52 -0
  60. datapipeline/io/writers/pickle_writer.py +30 -0
  61. datapipeline/pipeline/artifacts.py +58 -0
  62. datapipeline/pipeline/context.py +66 -7
  63. datapipeline/pipeline/observability.py +65 -0
  64. datapipeline/pipeline/pipelines.py +65 -13
  65. datapipeline/pipeline/split.py +11 -10
  66. datapipeline/pipeline/stages.py +127 -16
  67. datapipeline/pipeline/utils/keygen.py +20 -7
  68. datapipeline/pipeline/utils/memory_sort.py +22 -10
  69. datapipeline/pipeline/utils/transform_utils.py +22 -0
  70. datapipeline/runtime.py +5 -2
  71. datapipeline/services/artifacts.py +12 -6
  72. datapipeline/services/bootstrap/config.py +25 -0
  73. datapipeline/services/bootstrap/core.py +52 -37
  74. datapipeline/services/constants.py +6 -5
  75. datapipeline/services/factories.py +123 -1
  76. datapipeline/services/project_paths.py +43 -16
  77. datapipeline/services/runs.py +208 -0
  78. datapipeline/services/scaffold/domain.py +3 -2
  79. datapipeline/services/scaffold/filter.py +3 -2
  80. datapipeline/services/scaffold/mappers.py +9 -6
  81. datapipeline/services/scaffold/plugin.py +3 -3
  82. datapipeline/services/scaffold/source.py +93 -56
  83. datapipeline/sources/{composed_loader.py → data_loader.py} +9 -9
  84. datapipeline/sources/decoders.py +83 -18
  85. datapipeline/sources/factory.py +26 -16
  86. datapipeline/sources/models/__init__.py +2 -2
  87. datapipeline/sources/models/generator.py +0 -7
  88. datapipeline/sources/models/loader.py +3 -3
  89. datapipeline/sources/models/parsing_error.py +24 -0
  90. datapipeline/sources/models/source.py +6 -6
  91. datapipeline/sources/synthetic/time/loader.py +14 -2
  92. datapipeline/sources/transports.py +74 -37
  93. datapipeline/templates/plugin_skeleton/README.md +74 -30
  94. datapipeline/templates/plugin_skeleton/example/contracts/time.ticks.hour_sin.yaml +31 -0
  95. datapipeline/templates/plugin_skeleton/example/contracts/time.ticks.linear.yaml +30 -0
  96. datapipeline/templates/plugin_skeleton/example/dataset.yaml +18 -0
  97. datapipeline/templates/plugin_skeleton/example/postprocess.yaml +29 -0
  98. datapipeline/templates/plugin_skeleton/{config/datasets/default → example}/project.yaml +11 -8
  99. datapipeline/templates/plugin_skeleton/example/sources/synthetic.ticks.yaml +12 -0
  100. datapipeline/templates/plugin_skeleton/example/tasks/metadata.yaml +3 -0
  101. datapipeline/templates/plugin_skeleton/example/tasks/scaler.yaml +9 -0
  102. datapipeline/templates/plugin_skeleton/example/tasks/schema.yaml +2 -0
  103. datapipeline/templates/plugin_skeleton/example/tasks/serve.test.yaml +4 -0
  104. datapipeline/templates/plugin_skeleton/example/tasks/serve.train.yaml +28 -0
  105. datapipeline/templates/plugin_skeleton/example/tasks/serve.val.yaml +4 -0
  106. datapipeline/templates/plugin_skeleton/jerry.yaml +28 -0
  107. datapipeline/templates/plugin_skeleton/your-dataset/contracts/time.ticks.hour_sin.yaml +31 -0
  108. datapipeline/templates/plugin_skeleton/your-dataset/contracts/time.ticks.linear.yaml +30 -0
  109. datapipeline/templates/plugin_skeleton/your-dataset/dataset.yaml +18 -0
  110. datapipeline/templates/plugin_skeleton/your-dataset/postprocess.yaml +29 -0
  111. datapipeline/templates/plugin_skeleton/your-dataset/project.yaml +22 -0
  112. datapipeline/templates/plugin_skeleton/your-dataset/sources/synthetic.ticks.yaml +12 -0
  113. datapipeline/templates/plugin_skeleton/your-dataset/tasks/metadata.yaml +3 -0
  114. datapipeline/templates/plugin_skeleton/your-dataset/tasks/scaler.yaml +9 -0
  115. datapipeline/templates/plugin_skeleton/your-dataset/tasks/schema.yaml +2 -0
  116. datapipeline/templates/plugin_skeleton/your-dataset/tasks/serve.test.yaml +4 -0
  117. datapipeline/templates/plugin_skeleton/your-dataset/tasks/serve.train.yaml +28 -0
  118. datapipeline/templates/plugin_skeleton/your-dataset/tasks/serve.val.yaml +4 -0
  119. datapipeline/templates/stubs/dto.py.j2 +2 -0
  120. datapipeline/templates/stubs/mapper.py.j2 +5 -4
  121. datapipeline/templates/stubs/parser.py.j2 +2 -0
  122. datapipeline/templates/stubs/record.py.j2 +2 -0
  123. datapipeline/templates/stubs/source.yaml.j2 +2 -3
  124. datapipeline/transforms/debug/lint.py +26 -41
  125. datapipeline/transforms/feature/scaler.py +89 -13
  126. datapipeline/transforms/record/floor_time.py +4 -4
  127. datapipeline/transforms/sequence.py +2 -35
  128. datapipeline/transforms/stream/dedupe.py +24 -0
  129. datapipeline/transforms/stream/ensure_ticks.py +7 -6
  130. datapipeline/transforms/vector/__init__.py +5 -0
  131. datapipeline/transforms/vector/common.py +98 -0
  132. datapipeline/transforms/vector/drop/__init__.py +4 -0
  133. datapipeline/transforms/vector/drop/horizontal.py +79 -0
  134. datapipeline/transforms/vector/drop/orchestrator.py +59 -0
  135. datapipeline/transforms/vector/drop/vertical.py +182 -0
  136. datapipeline/transforms/vector/ensure_schema.py +184 -0
  137. datapipeline/transforms/vector/fill.py +87 -0
  138. datapipeline/transforms/vector/replace.py +62 -0
  139. datapipeline/utils/load.py +24 -3
  140. datapipeline/utils/rich_compat.py +38 -0
  141. datapipeline/utils/window.py +76 -0
  142. jerry_thomas-1.0.0.dist-info/METADATA +825 -0
  143. jerry_thomas-1.0.0.dist-info/RECORD +199 -0
  144. {jerry_thomas-0.3.0.dist-info → jerry_thomas-1.0.0.dist-info}/entry_points.txt +9 -8
  145. datapipeline/build/tasks.py +0 -186
  146. datapipeline/cli/commands/link.py +0 -128
  147. datapipeline/cli/commands/writers.py +0 -138
  148. datapipeline/config/build.py +0 -64
  149. datapipeline/config/run.py +0 -116
  150. datapipeline/templates/plugin_skeleton/config/contracts/time_hour_sin.synthetic.yaml +0 -24
  151. datapipeline/templates/plugin_skeleton/config/contracts/time_linear.synthetic.yaml +0 -23
  152. datapipeline/templates/plugin_skeleton/config/datasets/default/build.yaml +0 -9
  153. datapipeline/templates/plugin_skeleton/config/datasets/default/dataset.yaml +0 -14
  154. datapipeline/templates/plugin_skeleton/config/datasets/default/postprocess.yaml +0 -13
  155. datapipeline/templates/plugin_skeleton/config/datasets/default/runs/run_test.yaml +0 -10
  156. datapipeline/templates/plugin_skeleton/config/datasets/default/runs/run_train.yaml +0 -10
  157. datapipeline/templates/plugin_skeleton/config/datasets/default/runs/run_val.yaml +0 -10
  158. datapipeline/templates/plugin_skeleton/config/sources/time_ticks.yaml +0 -11
  159. datapipeline/transforms/vector.py +0 -210
  160. jerry_thomas-0.3.0.dist-info/METADATA +0 -502
  161. jerry_thomas-0.3.0.dist-info/RECORD +0 -139
  162. {jerry_thomas-0.3.0.dist-info → jerry_thomas-1.0.0.dist-info}/WHEEL +0 -0
  163. {jerry_thomas-0.3.0.dist-info → jerry_thomas-1.0.0.dist-info}/licenses/LICENSE +0 -0
  164. {jerry_thomas-0.3.0.dist-info → jerry_thomas-1.0.0.dist-info}/top_level.txt +0 -0
datapipeline/cli/app.py CHANGED
@@ -1,17 +1,100 @@
1
1
  import argparse
2
2
  import logging
3
+ from pathlib import Path
4
+ from typing import Optional, Tuple
3
5
 
4
6
  from datapipeline.cli.commands.run import handle_serve
5
7
  from datapipeline.cli.commands.plugin import bar as handle_bar
6
8
  from datapipeline.cli.commands.source import handle as handle_source
7
9
  from datapipeline.cli.commands.domain import handle as handle_domain
8
- from datapipeline.cli.commands.link import handle as handle_link
10
+ from datapipeline.cli.commands.contract import handle as handle_contract
9
11
  from datapipeline.cli.commands.list_ import handle as handle_list
10
12
  from datapipeline.cli.commands.filter import handle as handle_filter
11
13
  from datapipeline.cli.commands.inspect import (
12
14
  report as handle_inspect_report,
13
15
  )
14
16
  from datapipeline.cli.commands.build import handle as handle_build
17
+ from datapipeline.config.workspace import (
18
+ WorkspaceContext,
19
+ load_workspace_context,
20
+ )
21
+ from datapipeline.config.resolution import resolve_visuals
22
+ from datapipeline.utils.rich_compat import suppress_file_proxy_shutdown_errors
23
+
24
+ suppress_file_proxy_shutdown_errors()
25
+
26
+
27
+ def _dataset_to_project_path(
28
+ dataset: str,
29
+ workspace: Optional[WorkspaceContext],
30
+ ) -> str:
31
+ """Resolve a dataset selector (alias, folder, or file) into a project.yaml path."""
32
+ # 1) Alias via jerry.yaml datasets (wins over local folders with same name)
33
+ if workspace is not None:
34
+ datasets = getattr(workspace.config, "datasets", {}) or {}
35
+ raw = datasets.get(dataset)
36
+ if raw:
37
+ base = workspace.root
38
+ candidate = Path(raw)
39
+ candidate = candidate if candidate.is_absolute() else (base / candidate)
40
+ if candidate.is_dir():
41
+ candidate = candidate / "project.yaml"
42
+ return str(candidate.resolve())
43
+
44
+ # 2) Direct file path
45
+ path = Path(dataset)
46
+ if path.suffix in {".yaml", ".yml"}:
47
+ return str(path if path.is_absolute() else (Path.cwd() / path).resolve())
48
+
49
+ # 3) Directory: assume project.yaml inside
50
+ if path.is_dir():
51
+ candidate = path / "project.yaml"
52
+ return str(candidate.resolve())
53
+
54
+ raise SystemExit(f"Unknown dataset '{dataset}'. Define it under datasets: in jerry.yaml or pass a valid path.")
55
+
56
+
57
+ def _resolve_project_from_args(
58
+ project: Optional[str],
59
+ dataset: Optional[str],
60
+ workspace: Optional[WorkspaceContext],
61
+ ) -> Tuple[Optional[str], Optional[str]]:
62
+ """Resolve final project path from --project / --dataset / jerry.yaml defaults.
63
+
64
+ Rules:
65
+ - If both project and dataset are explicitly given (and project != DEFAULT_PROJECT_PATH), error.
66
+ - If dataset is given, resolve it to a project path (alias, dir, or file).
67
+ - If neither is given (or project==DEFAULT_PROJECT_PATH), and jerry.yaml declares default_dataset,
68
+ resolve that alias.
69
+ - Otherwise fall back to legacy DEFAULT_PROJECT_PATH resolution.
70
+ """
71
+ explicit_project = project is not None
72
+ explicit_dataset = dataset is not None
73
+
74
+ if explicit_project and explicit_dataset:
75
+ raise SystemExit("Cannot use both --project and --dataset; pick one.")
76
+
77
+ # Prefer dataset when provided
78
+ if explicit_dataset:
79
+ resolved = _dataset_to_project_path(dataset, workspace)
80
+ return resolved, dataset
81
+
82
+ # No explicit dataset; use default_dataset from workspace when project is not explicitly set
83
+ if not explicit_project and workspace is not None:
84
+ default_ds = getattr(workspace.config, "default_dataset", None)
85
+ if default_ds:
86
+ resolved = _dataset_to_project_path(default_ds, workspace)
87
+ return resolved, default_ds
88
+
89
+ # If project was given explicitly, use it as-is (caller is responsible for validity).
90
+ if explicit_project:
91
+ return project, dataset
92
+
93
+ # Nothing resolved: require explicit selection.
94
+ raise SystemExit(
95
+ "No dataset/project selected. Use --dataset <name|path>, --project <path>, "
96
+ "or define default_dataset in jerry.yaml."
97
+ )
15
98
 
16
99
 
17
100
  def main() -> None:
@@ -37,10 +120,15 @@ def main() -> None:
37
120
  help="produce vectors with configurable logging",
38
121
  parents=[common],
39
122
  )
123
+ p_serve.add_argument(
124
+ "--dataset",
125
+ "-d",
126
+ help="dataset alias, folder, or project.yaml path",
127
+ )
40
128
  p_serve.add_argument(
41
129
  "--project",
42
130
  "-p",
43
- default="config/datasets/default/project.yaml",
131
+ default=None,
44
132
  help="path to project.yaml",
45
133
  )
46
134
  p_serve.add_argument(
@@ -48,22 +136,31 @@ def main() -> None:
48
136
  help="optional cap on the number of vectors to emit",
49
137
  )
50
138
  p_serve.add_argument(
51
- "--output", "-o", default=None,
52
- help="output destination: 'print', 'stream', or a file ending in .pt",
139
+ "--out-transport",
140
+ choices=["stdout", "fs"],
141
+ help="output transport (stdout or fs) for serve runs",
53
142
  )
54
143
  p_serve.add_argument(
55
- "--include-targets",
56
- action=argparse.BooleanOptionalAction,
57
- default=None,
58
- help="include dataset.targets in served vectors (use --no-include-targets to force disable)",
144
+ "--out-format",
145
+ choices=["print", "json-lines", "json", "csv", "pickle"],
146
+ help="output format (print/json-lines/csv/pickle) for serve runs",
147
+ )
148
+ p_serve.add_argument(
149
+ "--out-payload",
150
+ choices=["sample", "vector"],
151
+ help="payload structure: full sample (default) or vector-only body",
152
+ )
153
+ p_serve.add_argument(
154
+ "--out-path",
155
+ help="destination file path when using fs transport",
59
156
  )
60
157
  p_serve.add_argument(
61
158
  "--keep",
62
- help="split label to serve; overrides run.yaml and project globals",
159
+ help="split label to serve; overrides serve tasks and project globals",
63
160
  )
64
161
  p_serve.add_argument(
65
162
  "--run",
66
- help="select a specific run config by filename stem when project.paths.run points to a folder",
163
+ help="select a serve task by name when project.paths.tasks contains multiple entries",
67
164
  )
68
165
  p_serve.add_argument(
69
166
  "--stage",
@@ -73,6 +170,23 @@ def main() -> None:
73
170
  default=None,
74
171
  help="preview a specific pipeline stage (0-5 feature stages, 6 assembled vectors, 7 transformed vectors)",
75
172
  )
173
+ p_serve.add_argument(
174
+ "--visuals",
175
+ choices=["auto", "tqdm", "rich", "off"],
176
+ default=None,
177
+ help="visuals renderer: auto (default), tqdm, rich, or off",
178
+ )
179
+ p_serve.add_argument(
180
+ "--progress",
181
+ choices=["auto", "spinner", "bars", "off"],
182
+ default=None,
183
+ help="progress display: auto (spinner unless DEBUG), spinner, bars, or off",
184
+ )
185
+ p_serve.add_argument(
186
+ "--skip-build",
187
+ action="store_true",
188
+ help="skip the automatic build step (useful for quick feature previews)",
189
+ )
76
190
 
77
191
  # build (materialize artifacts)
78
192
  p_build = sub.add_parser(
@@ -80,10 +194,15 @@ def main() -> None:
80
194
  help="materialize project artifacts (expected ids, hashes, etc.)",
81
195
  parents=[common],
82
196
  )
197
+ p_build.add_argument(
198
+ "--dataset",
199
+ "-d",
200
+ help="dataset alias, folder, or project.yaml path",
201
+ )
83
202
  p_build.add_argument(
84
203
  "--project",
85
204
  "-p",
86
- default="config/datasets/default/project.yaml",
205
+ default=None,
87
206
  help="path to project.yaml",
88
207
  )
89
208
  p_build.add_argument(
@@ -91,63 +210,99 @@ def main() -> None:
91
210
  action="store_true",
92
211
  help="rebuild even when the configuration hash matches the last run",
93
212
  )
213
+ p_build.add_argument(
214
+ "--visuals",
215
+ choices=["auto", "tqdm", "rich", "off"],
216
+ default=None,
217
+ help="visuals renderer: auto (default), tqdm, rich, or off",
218
+ )
219
+ p_build.add_argument(
220
+ "--progress",
221
+ choices=["auto", "spinner", "bars", "off"],
222
+ default=None,
223
+ help="progress display: auto (spinner unless DEBUG), spinner, bars, or off",
224
+ )
94
225
 
95
226
  # source
96
- p_dist = sub.add_parser(
227
+ p_source = sub.add_parser(
97
228
  "source",
98
229
  help="add or list raw sources",
99
230
  parents=[common],
100
231
  )
101
- dist_sub = p_dist.add_subparsers(dest="dist_cmd", required=True)
102
- p_dist_add = dist_sub.add_parser(
232
+ source_sub = p_source.add_subparsers(dest="source_cmd", required=True)
233
+ p_source_add = source_sub.add_parser(
103
234
  "add",
104
235
  help="create a provider+dataset source",
105
236
  description=(
106
237
  "Scaffold a source using transport + format.\n\n"
238
+ "Usage:\n"
239
+ " jerry source add <provider> <dataset> -t fs -f csv\n"
240
+ " jerry source add <provider>.<dataset> -t http -f json\n"
241
+ " jerry source add -p <provider> -d <dataset> -t synthetic\n\n"
107
242
  "Examples:\n"
108
243
  " fs CSV: -t fs -f csv\n"
109
244
  " fs NDJSON: -t fs -f json-lines\n"
110
- " URL JSON: -t url -f json\n"
245
+ " HTTP JSON: -t http -f json\n"
111
246
  " Synthetic: -t synthetic\n\n"
112
247
  "Note: set 'glob: true' in the generated YAML if your 'path' contains wildcards."
113
248
  ),
114
249
  )
115
- p_dist_add.add_argument("--provider", "-p", required=True)
116
- p_dist_add.add_argument("--dataset", "-d", required=True)
117
- p_dist_add.add_argument(
250
+ # Support simple positionals, plus flags for compatibility
251
+ # Allow either positionals or flags. Use distinct dest names for flags
252
+ # to avoid ambiguity when both forms are present in some environments.
253
+ p_source_add.add_argument("provider", nargs="?", help="provider name")
254
+ p_source_add.add_argument("dataset", nargs="?", help="dataset slug")
255
+ p_source_add.add_argument("--provider", "-p", dest="provider_opt", metavar="PROVIDER", help="provider name")
256
+ p_source_add.add_argument("--dataset", "-d", dest="dataset_opt", metavar="DATASET", help="dataset slug")
257
+ p_source_add.add_argument("--alias", "-a", help="provider.dataset alias")
258
+ p_source_add.add_argument(
118
259
  "--transport", "-t",
119
- choices=["fs", "url", "synthetic"],
260
+ choices=["fs", "http", "synthetic"],
120
261
  required=True,
121
- help="how data is accessed: fs/url/synthetic",
262
+ help="how data is accessed: fs/http/synthetic",
122
263
  )
123
- p_dist_add.add_argument(
264
+ p_source_add.add_argument(
124
265
  "--format", "-f",
125
- choices=["csv", "json", "json-lines"],
126
- help="data format for fs/url transports (ignored otherwise)",
266
+ choices=["csv", "json", "json-lines", "pickle"],
267
+ help="data format for fs/http transports (ignored otherwise)",
268
+ )
269
+ p_source_add.add_argument(
270
+ "--identity",
271
+ action="store_true",
272
+ help="use the built-in identity parser (skips DTO/parser scaffolding)",
127
273
  )
128
- dist_sub.add_parser("list", help="list known sources")
274
+ source_sub.add_parser("list", help="list known sources")
129
275
 
130
276
  # domain
131
- p_spirit = sub.add_parser(
277
+ p_domain = sub.add_parser(
132
278
  "domain",
133
279
  help="add or list domains",
134
280
  parents=[common],
135
281
  )
136
- spirit_sub = p_spirit.add_subparsers(dest="spirit_cmd", required=True)
137
- p_spirit_add = spirit_sub.add_parser(
282
+ domain_sub = p_domain.add_subparsers(dest="domain_cmd", required=True)
283
+ p_domain_add = domain_sub.add_parser(
138
284
  "add",
139
285
  help="create a domain",
140
286
  description="Create a time-aware domain package rooted in TemporalRecord.",
141
287
  )
142
- p_spirit_add.add_argument("--domain", "-d", required=True)
143
- spirit_sub.add_parser("list", help="list known domains")
288
+ # Accept positional name, plus flags for flexibility and consistency.
289
+ p_domain_add.add_argument("domain", nargs="?", help="domain name")
290
+ p_domain_add.add_argument(
291
+ "--name", "-n", dest="domain", help="domain name"
292
+ )
293
+ domain_sub.add_parser("list", help="list known domains")
144
294
 
145
- # contract (link source <-> domain)
295
+ # contract (interactive: ingest or composed)
146
296
  p_contract = sub.add_parser(
147
297
  "contract",
148
- help="link a source to a domain",
298
+ help="manage stream contracts (ingest or composed)",
149
299
  parents=[common],
150
300
  )
301
+ p_contract.add_argument(
302
+ "--identity",
303
+ action="store_true",
304
+ help="use built-in identity mapper (skip mapper scaffolding)",
305
+ )
151
306
 
152
307
  # plugin (plugin scaffolding)
153
308
  p_bar = sub.add_parser(
@@ -158,7 +313,9 @@ def main() -> None:
158
313
  bar_sub = p_bar.add_subparsers(dest="bar_cmd", required=True)
159
314
  p_bar_init = bar_sub.add_parser(
160
315
  "init", help="create a plugin skeleton")
161
- p_bar_init.add_argument("--name", "-n", required=True)
316
+ # Accept positional name and flag for flexibility
317
+ p_bar_init.add_argument("name", nargs="?", help="plugin distribution name")
318
+ p_bar_init.add_argument("--name", "-n", dest="name", help="plugin distribution name")
162
319
  p_bar_init.add_argument("--out", "-o", default=".")
163
320
 
164
321
  # filter (unchanged helper)
@@ -171,11 +328,31 @@ def main() -> None:
171
328
  help="filter entrypoint name and function/module name",
172
329
  )
173
330
 
331
+ # Shared visuals/progress controls for inspect commands
332
+ inspect_common = argparse.ArgumentParser(add_help=False)
333
+ inspect_common.add_argument(
334
+ "--visuals",
335
+ choices=["auto", "tqdm", "rich", "off"],
336
+ default=None,
337
+ help="visuals renderer: auto (default), tqdm, rich, or off",
338
+ )
339
+ inspect_common.add_argument(
340
+ "--progress",
341
+ choices=["auto", "spinner", "bars", "off"],
342
+ default=None,
343
+ help="progress display: auto (spinner unless DEBUG), spinner, bars, or off",
344
+ )
345
+ inspect_common.add_argument(
346
+ "--dataset",
347
+ "-d",
348
+ help="dataset alias, folder, or project.yaml path",
349
+ )
350
+
174
351
  # inspect (metadata helpers)
175
352
  p_inspect = sub.add_parser(
176
353
  "inspect",
177
- help="inspect dataset metadata: report, coverage, matrix, partitions",
178
- parents=[common],
354
+ help="inspect dataset metadata: report, matrix, partitions",
355
+ parents=[common, inspect_common],
179
356
  )
180
357
  inspect_sub = p_inspect.add_subparsers(dest="inspect_cmd", required=False)
181
358
 
@@ -183,11 +360,12 @@ def main() -> None:
183
360
  p_inspect_report = inspect_sub.add_parser(
184
361
  "report",
185
362
  help="print a quality report to stdout",
363
+ parents=[inspect_common],
186
364
  )
187
365
  p_inspect_report.add_argument(
188
366
  "--project",
189
367
  "-p",
190
- default="config/datasets/default/project.yaml",
368
+ default=None,
191
369
  help="path to project.yaml",
192
370
  )
193
371
  p_inspect_report.add_argument(
@@ -210,62 +388,22 @@ def main() -> None:
210
388
  help="whether to apply postprocess transforms (final) or skip them (raw)",
211
389
  )
212
390
  p_inspect_report.add_argument(
213
- "--include-targets",
214
- action="store_true",
215
- help="include dataset.targets when computing report/matrix/coverage",
216
- )
217
-
218
- # Coverage (JSON file)
219
- p_inspect_cov = inspect_sub.add_parser(
220
- "coverage",
221
- help="write coverage summary JSON",
222
- )
223
- p_inspect_cov.add_argument(
224
- "--project",
225
- "-p",
226
- default="config/datasets/default/project.yaml",
227
- help="path to project.yaml",
228
- )
229
- p_inspect_cov.add_argument(
230
- "--output",
231
- "-o",
232
- default=None,
233
- help="coverage JSON path (defaults to build/coverage.json)",
234
- )
235
- p_inspect_cov.add_argument(
236
- "--threshold",
237
- "-t",
238
- type=float,
239
- default=0.95,
240
- help="coverage threshold (0-1) for keep/drop lists",
241
- )
242
- p_inspect_cov.add_argument(
243
- "--match-partition",
244
- choices=["base", "full"],
245
- default="base",
246
- help="match features by base id or full partition id",
247
- )
248
- p_inspect_cov.add_argument(
249
- "--mode",
250
- choices=["final", "raw"],
251
- default="final",
252
- help="whether to apply postprocess transforms (final) or skip them (raw)",
253
- )
254
- p_inspect_cov.add_argument(
255
- "--include-targets",
256
- action="store_true",
257
- help="include dataset.targets when computing coverage",
391
+ "--sort",
392
+ choices=["missing", "nulls"],
393
+ default="missing",
394
+ help="feature ranking metric in the report (missing or nulls)",
258
395
  )
259
396
 
260
397
  # Matrix export
261
398
  p_inspect_matrix = inspect_sub.add_parser(
262
399
  "matrix",
263
400
  help="export availability matrix",
401
+ parents=[inspect_common],
264
402
  )
265
403
  p_inspect_matrix.add_argument(
266
404
  "--project",
267
405
  "-p",
268
- default="config/datasets/default/project.yaml",
406
+ default=None,
269
407
  help="path to project.yaml",
270
408
  )
271
409
  p_inspect_matrix.add_argument(
@@ -309,21 +447,17 @@ def main() -> None:
309
447
  default="final",
310
448
  help="whether to apply postprocess transforms (final) or skip them (raw)",
311
449
  )
312
- p_inspect_matrix.add_argument(
313
- "--include-targets",
314
- action="store_true",
315
- help="include dataset.targets when exporting the matrix",
316
- )
317
450
 
318
451
  # Partitions manifest subcommand
319
452
  p_inspect_parts = inspect_sub.add_parser(
320
453
  "partitions",
321
454
  help="discover partitions and write a manifest JSON",
455
+ parents=[inspect_common],
322
456
  )
323
457
  p_inspect_parts.add_argument(
324
458
  "--project",
325
459
  "-p",
326
- default="config/datasets/default/project.yaml",
460
+ default=None,
327
461
  help="path to project.yaml",
328
462
  )
329
463
  p_inspect_parts.add_argument(
@@ -332,21 +466,17 @@ def main() -> None:
332
466
  default=None,
333
467
  help="partitions manifest path (defaults to build/partitions.json)",
334
468
  )
335
- p_inspect_parts.add_argument(
336
- "--include-targets",
337
- action="store_true",
338
- help="include dataset.targets when discovering partitions",
339
- )
340
469
 
341
470
  # Expected IDs (newline list)
342
471
  p_inspect_expected = inspect_sub.add_parser(
343
472
  "expected",
344
473
  help="discover full feature ids and write a newline list",
474
+ parents=[inspect_common],
345
475
  )
346
476
  p_inspect_expected.add_argument(
347
477
  "--project",
348
478
  "-p",
349
- default="config/datasets/default/project.yaml",
479
+ default=None,
350
480
  help="path to project.yaml",
351
481
  )
352
482
  p_inspect_expected.add_argument(
@@ -355,47 +485,87 @@ def main() -> None:
355
485
  default=None,
356
486
  help="expected ids output path (defaults to build/datasets/<name>/expected.txt)",
357
487
  )
358
- p_inspect_expected.add_argument(
359
- "--include-targets",
360
- action="store_true",
361
- help="include dataset.targets when discovering expected ids",
362
- )
363
488
 
489
+ workspace_context = load_workspace_context(Path.cwd())
364
490
  args = parser.parse_args()
365
491
 
492
+ # Resolve dataset/project selection for commands that use a project.
493
+ if hasattr(args, "project") or hasattr(args, "dataset"):
494
+ raw_project = getattr(args, "project", None)
495
+ raw_dataset = getattr(args, "dataset", None)
496
+ resolved_project, resolved_dataset = _resolve_project_from_args(
497
+ raw_project,
498
+ raw_dataset,
499
+ workspace_context,
500
+ )
501
+ if hasattr(args, "project"):
502
+ args.project = resolved_project
503
+ if hasattr(args, "dataset"):
504
+ args.dataset = resolved_dataset
505
+
366
506
  cli_level_arg = getattr(args, "log_level", None)
367
- base_level_name = (cli_level_arg or "WARNING").upper()
507
+ shared_defaults = workspace_context.config.shared if workspace_context else None
508
+ # Default logging level: CLI flag > jerry.yaml shared.log_level > INFO
509
+ default_level_name = (
510
+ shared_defaults.log_level.upper()
511
+ if shared_defaults and shared_defaults.log_level
512
+ else "INFO"
513
+ )
514
+ base_level_name = (cli_level_arg or default_level_name).upper()
368
515
  base_level = logging._nameToLevel.get(base_level_name, logging.WARNING)
369
516
 
370
517
  logging.basicConfig(level=base_level, format="%(message)s")
518
+ plugin_root = (
519
+ workspace_context.resolve_plugin_root() if workspace_context else None
520
+ )
371
521
 
372
522
  if args.cmd == "serve":
373
523
  handle_serve(
374
524
  project=args.project,
375
525
  limit=getattr(args, "limit", None),
376
- output=args.output,
377
- include_targets=args.include_targets,
378
526
  keep=getattr(args, "keep", None),
379
527
  run_name=getattr(args, "run", None),
380
528
  stage=getattr(args, "stage", None),
529
+ out_transport=getattr(args, "out_transport", None),
530
+ out_format=getattr(args, "out_format", None),
531
+ out_payload=getattr(args, "out_payload", None),
532
+ out_path=getattr(args, "out_path", None),
533
+ skip_build=getattr(args, "skip_build", False),
381
534
  cli_log_level=cli_level_arg,
382
535
  base_log_level=base_level_name,
536
+ cli_visuals=getattr(args, "visuals", None),
537
+ cli_progress=getattr(args, "progress", None),
538
+ workspace=workspace_context,
383
539
  )
384
540
  return
385
541
  if args.cmd == "build":
386
542
  handle_build(
387
543
  project=args.project,
388
544
  force=getattr(args, "force", False),
545
+ cli_visuals=getattr(args, "visuals", None),
546
+ cli_progress=getattr(args, "progress", None),
547
+ workspace=workspace_context,
389
548
  )
390
549
  return
391
550
 
392
551
  if args.cmd == "inspect":
393
552
  # Default to 'report' when no subcommand is given
394
553
  subcmd = getattr(args, "inspect_cmd", None)
554
+ shared_visuals_default = shared_defaults.visuals if shared_defaults else None
555
+ shared_progress_default = shared_defaults.progress if shared_defaults else None
556
+ inspect_visuals = resolve_visuals(
557
+ cli_visuals=getattr(args, "visuals", None),
558
+ config_visuals=None,
559
+ workspace_visuals=shared_visuals_default,
560
+ cli_progress=getattr(args, "progress", None),
561
+ config_progress=None,
562
+ workspace_progress=shared_progress_default,
563
+ )
564
+ inspect_visual_provider = inspect_visuals.visuals or "auto"
565
+ inspect_progress_style = inspect_visuals.progress or "auto"
395
566
  if subcmd in (None, "report"):
396
567
  handle_inspect_report(
397
- project=getattr(args, "project",
398
- "config/datasets/default/project.yaml"),
568
+ project=args.project,
399
569
  output=None,
400
570
  threshold=getattr(args, "threshold", 0.95),
401
571
  match_partition=getattr(args, "match_partition", "base"),
@@ -406,22 +576,11 @@ def main() -> None:
406
576
  quiet=False,
407
577
  write_coverage=False,
408
578
  apply_postprocess=(getattr(args, "mode", "final") == "final"),
409
- include_targets=getattr(args, "include_targets", False),
410
- )
411
- elif subcmd == "coverage":
412
- handle_inspect_report(
413
- project=args.project,
414
- output=getattr(args, "output", None),
415
- threshold=getattr(args, "threshold", 0.95),
416
- match_partition=getattr(args, "match_partition", "base"),
417
- matrix="none",
418
- matrix_output=None,
419
- rows=20,
420
- cols=10,
421
- quiet=True,
422
- write_coverage=True,
423
- apply_postprocess=(getattr(args, "mode", "final") == "final"),
424
- include_targets=getattr(args, "include_targets", False),
579
+ visuals=inspect_visual_provider,
580
+ progress=inspect_progress_style,
581
+ log_level=base_level,
582
+ sort=getattr(args, "sort", "missing"),
583
+ workspace=workspace_context,
425
584
  )
426
585
  elif subcmd == "matrix":
427
586
  handle_inspect_report(
@@ -436,49 +595,67 @@ def main() -> None:
436
595
  quiet=getattr(args, "quiet", False),
437
596
  write_coverage=False,
438
597
  apply_postprocess=(getattr(args, "mode", "final") == "final"),
439
- include_targets=getattr(args, "include_targets", False),
598
+ visuals=inspect_visual_provider,
599
+ progress=inspect_progress_style,
600
+ log_level=base_level,
601
+ sort=getattr(args, "sort", "missing"),
602
+ workspace=workspace_context,
440
603
  )
441
604
  elif subcmd == "partitions":
442
605
  from datapipeline.cli.commands.inspect import partitions as handle_inspect_partitions
443
606
  handle_inspect_partitions(
444
607
  project=args.project,
445
608
  output=getattr(args, "output", None),
446
- include_targets=getattr(args, "include_targets", False),
609
+ visuals=inspect_visual_provider,
610
+ progress=inspect_progress_style,
611
+ log_level=base_level,
612
+ workspace=workspace_context,
447
613
  )
448
614
  elif subcmd == "expected":
449
615
  from datapipeline.cli.commands.inspect import expected as handle_inspect_expected
450
616
  handle_inspect_expected(
451
617
  project=args.project,
452
618
  output=getattr(args, "output", None),
453
- include_targets=getattr(args, "include_targets", False),
619
+ visuals=inspect_visual_provider,
620
+ progress=inspect_progress_style,
621
+ log_level=base_level,
622
+ workspace=workspace_context,
454
623
  )
455
624
  return
456
625
 
457
626
  if args.cmd == "source":
458
- if args.dist_cmd == "list":
627
+ if args.source_cmd == "list":
459
628
  handle_list(subcmd="sources")
460
629
  else:
630
+ # Merge positionals and flags for provider/dataset
461
631
  handle_source(
462
632
  subcmd="add",
463
- provider=getattr(args, "provider", None),
464
- dataset=getattr(args, "dataset", None),
633
+ provider=(getattr(args, "provider", None) or getattr(args, "provider_opt", None)),
634
+ dataset=(getattr(args, "dataset", None) or getattr(args, "dataset_opt", None)),
465
635
  transport=getattr(args, "transport", None),
466
636
  format=getattr(args, "format", None),
637
+ alias=getattr(args, "alias", None),
638
+ identity=getattr(args, "identity", False),
639
+ plugin_root=plugin_root,
467
640
  )
468
641
  return
469
642
 
470
643
  if args.cmd == "domain":
471
- if args.spirit_cmd == "list":
644
+ if args.domain_cmd == "list":
472
645
  handle_list(subcmd="domains")
473
646
  else:
474
647
  handle_domain(
475
648
  subcmd="add",
476
649
  domain=getattr(args, "domain", None),
650
+ plugin_root=plugin_root,
477
651
  )
478
652
  return
479
653
 
480
654
  if args.cmd == "contract":
481
- handle_link()
655
+ handle_contract(
656
+ plugin_root=plugin_root,
657
+ use_identity=args.identity,
658
+ )
482
659
  return
483
660
 
484
661
  if args.cmd == "plugin":