jerry-thomas 1.0.2__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datapipeline/analysis/vector/collector.py +0 -1
- datapipeline/build/tasks/config.py +0 -2
- datapipeline/build/tasks/metadata.py +0 -2
- datapipeline/build/tasks/scaler.py +0 -2
- datapipeline/build/tasks/schema.py +0 -2
- datapipeline/build/tasks/utils.py +0 -2
- datapipeline/cli/app.py +205 -89
- datapipeline/cli/commands/contract.py +153 -285
- datapipeline/cli/commands/demo.py +13 -0
- datapipeline/cli/commands/domain.py +4 -4
- datapipeline/cli/commands/dto.py +11 -0
- datapipeline/cli/commands/filter.py +2 -2
- datapipeline/cli/commands/inspect.py +0 -68
- datapipeline/cli/commands/list_.py +30 -13
- datapipeline/cli/commands/loader.py +11 -0
- datapipeline/cli/commands/mapper.py +82 -0
- datapipeline/cli/commands/parser.py +45 -0
- datapipeline/cli/commands/run_config.py +1 -3
- datapipeline/cli/commands/serve_pipeline.py +5 -7
- datapipeline/cli/commands/source.py +111 -18
- datapipeline/cli/commands/stream.py +286 -0
- datapipeline/cli/visuals/common.py +57 -7
- datapipeline/cli/visuals/labels.py +8 -41
- datapipeline/cli/visuals/sections.py +0 -2
- datapipeline/cli/visuals/sources_rich.py +8 -3
- datapipeline/cli/workspace_utils.py +22 -0
- datapipeline/config/context.py +0 -2
- datapipeline/config/dataset/dataset.py +1 -1
- datapipeline/config/dataset/feature.py +1 -0
- datapipeline/config/dataset/normalize.py +9 -4
- datapipeline/config/metadata.py +0 -2
- datapipeline/config/project.py +0 -2
- datapipeline/config/resolution.py +10 -2
- datapipeline/config/tasks.py +9 -9
- datapipeline/config/workspace.py +15 -0
- datapipeline/domain/feature.py +3 -0
- datapipeline/domain/record.py +7 -7
- datapipeline/domain/sample.py +0 -2
- datapipeline/domain/vector.py +6 -8
- datapipeline/integrations/ml/adapter.py +0 -2
- datapipeline/integrations/ml/pandas_support.py +0 -2
- datapipeline/integrations/ml/rows.py +0 -2
- datapipeline/integrations/ml/torch_support.py +0 -2
- datapipeline/io/output.py +0 -2
- datapipeline/io/serializers.py +26 -16
- datapipeline/mappers/synthetic/time.py +9 -2
- datapipeline/pipeline/artifacts.py +3 -5
- datapipeline/pipeline/observability.py +0 -2
- datapipeline/pipeline/pipelines.py +118 -34
- datapipeline/pipeline/stages.py +42 -17
- datapipeline/pipeline/utils/spool_cache.py +142 -0
- datapipeline/pipeline/utils/transform_utils.py +27 -2
- datapipeline/services/artifacts.py +1 -4
- datapipeline/services/constants.py +1 -0
- datapipeline/services/factories.py +4 -6
- datapipeline/services/project_paths.py +0 -2
- datapipeline/services/runs.py +0 -2
- datapipeline/services/scaffold/contract_yaml.py +76 -0
- datapipeline/services/scaffold/demo.py +141 -0
- datapipeline/services/scaffold/discovery.py +115 -0
- datapipeline/services/scaffold/domain.py +21 -13
- datapipeline/services/scaffold/dto.py +31 -0
- datapipeline/services/scaffold/filter.py +2 -1
- datapipeline/services/scaffold/layout.py +96 -0
- datapipeline/services/scaffold/loader.py +61 -0
- datapipeline/services/scaffold/mapper.py +116 -0
- datapipeline/services/scaffold/parser.py +56 -0
- datapipeline/services/scaffold/plugin.py +14 -2
- datapipeline/services/scaffold/source_yaml.py +91 -0
- datapipeline/services/scaffold/stream_plan.py +110 -0
- datapipeline/services/scaffold/utils.py +187 -0
- datapipeline/sources/data_loader.py +0 -2
- datapipeline/sources/decoders.py +49 -8
- datapipeline/sources/factory.py +9 -6
- datapipeline/sources/foreach.py +166 -0
- datapipeline/sources/synthetic/time/parser.py +1 -1
- datapipeline/sources/transports.py +10 -4
- datapipeline/templates/demo_skeleton/demo/contracts/equity.ohlcv.yaml +33 -0
- datapipeline/templates/demo_skeleton/demo/contracts/time.ticks.hour_sin.yaml +22 -0
- datapipeline/templates/demo_skeleton/demo/contracts/time.ticks.linear.yaml +22 -0
- datapipeline/templates/demo_skeleton/demo/data/APPL.jsonl +19 -0
- datapipeline/templates/demo_skeleton/demo/data/MSFT.jsonl +19 -0
- datapipeline/templates/demo_skeleton/demo/dataset.yaml +19 -0
- datapipeline/templates/demo_skeleton/demo/postprocess.yaml +19 -0
- datapipeline/templates/demo_skeleton/demo/project.yaml +19 -0
- datapipeline/templates/demo_skeleton/demo/sources/sandbox.ohlcv.yaml +17 -0
- datapipeline/templates/{plugin_skeleton/example → demo_skeleton/demo}/sources/synthetic.ticks.yaml +1 -1
- datapipeline/templates/demo_skeleton/demo/tasks/metadata.yaml +2 -0
- datapipeline/templates/demo_skeleton/demo/tasks/scaler.yaml +3 -0
- datapipeline/templates/demo_skeleton/demo/tasks/schema.yaml +2 -0
- datapipeline/templates/demo_skeleton/demo/tasks/serve.test.yaml +4 -0
- datapipeline/templates/demo_skeleton/demo/tasks/serve.train.yaml +4 -0
- datapipeline/templates/demo_skeleton/demo/tasks/serve.val.yaml +4 -0
- datapipeline/templates/demo_skeleton/scripts/run_dataframe.py +20 -0
- datapipeline/templates/demo_skeleton/scripts/run_torch.py +23 -0
- datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/__init__.py +0 -0
- datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/domains/equity/__init__.py +0 -0
- datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/domains/equity/model.py +18 -0
- datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/dtos/__init__.py +0 -0
- datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/dtos/sandbox_ohlcv_dto.py +14 -0
- datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/mappers/__init__.py +0 -0
- datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/mappers/map_sandbox_ohlcv_dto_to_equity.py +26 -0
- datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/parsers/__init__.py +0 -0
- datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/parsers/sandbox_ohlcv_dto_parser.py +46 -0
- datapipeline/templates/plugin_skeleton/README.md +57 -136
- datapipeline/templates/plugin_skeleton/jerry.yaml +12 -24
- datapipeline/templates/plugin_skeleton/reference/jerry.yaml +28 -0
- datapipeline/templates/plugin_skeleton/reference/reference/contracts/composed.reference.yaml +29 -0
- datapipeline/templates/plugin_skeleton/reference/reference/contracts/ingest.reference.yaml +31 -0
- datapipeline/templates/plugin_skeleton/reference/reference/contracts/overview.reference.yaml +34 -0
- datapipeline/templates/plugin_skeleton/reference/reference/dataset.yaml +29 -0
- datapipeline/templates/plugin_skeleton/reference/reference/postprocess.yaml +25 -0
- datapipeline/templates/plugin_skeleton/reference/reference/project.yaml +32 -0
- datapipeline/templates/plugin_skeleton/reference/reference/sources/foreach.http.reference.yaml +24 -0
- datapipeline/templates/plugin_skeleton/reference/reference/sources/foreach.reference.yaml +21 -0
- datapipeline/templates/plugin_skeleton/reference/reference/sources/fs.reference.yaml +16 -0
- datapipeline/templates/plugin_skeleton/reference/reference/sources/http.reference.yaml +17 -0
- datapipeline/templates/plugin_skeleton/reference/reference/sources/overview.reference.yaml +18 -0
- datapipeline/templates/plugin_skeleton/reference/reference/sources/synthetic.reference.yaml +15 -0
- datapipeline/templates/plugin_skeleton/reference/reference/tasks/metadata.reference.yaml +11 -0
- datapipeline/templates/plugin_skeleton/reference/reference/tasks/scaler.reference.yaml +10 -0
- datapipeline/templates/plugin_skeleton/reference/reference/tasks/schema.reference.yaml +10 -0
- datapipeline/templates/plugin_skeleton/reference/reference/tasks/serve.reference.yaml +28 -0
- datapipeline/templates/plugin_skeleton/src/{{PACKAGE_NAME}}/domains/__init__.py +2 -0
- datapipeline/templates/plugin_skeleton/src/{{PACKAGE_NAME}}/dtos/__init__.py +0 -0
- datapipeline/templates/plugin_skeleton/src/{{PACKAGE_NAME}}/loaders/__init__.py +0 -0
- datapipeline/templates/plugin_skeleton/src/{{PACKAGE_NAME}}/mappers/__init__.py +1 -0
- datapipeline/templates/plugin_skeleton/src/{{PACKAGE_NAME}}/parsers/__init__.py +0 -0
- datapipeline/templates/plugin_skeleton/your-dataset/dataset.yaml +12 -11
- datapipeline/templates/plugin_skeleton/your-dataset/postprocess.yaml +4 -13
- datapipeline/templates/plugin_skeleton/your-dataset/project.yaml +6 -9
- datapipeline/templates/plugin_skeleton/your-dataset/tasks/metadata.yaml +1 -2
- datapipeline/templates/plugin_skeleton/your-dataset/tasks/scaler.yaml +1 -7
- datapipeline/templates/plugin_skeleton/your-dataset/tasks/schema.yaml +1 -1
- datapipeline/templates/plugin_skeleton/your-dataset/tasks/serve.test.yaml +1 -1
- datapipeline/templates/plugin_skeleton/your-dataset/tasks/serve.train.yaml +1 -25
- datapipeline/templates/plugin_skeleton/your-dataset/tasks/serve.val.yaml +1 -1
- datapipeline/templates/plugin_skeleton/your-interim-data-builder/dataset.yaml +9 -0
- datapipeline/templates/plugin_skeleton/your-interim-data-builder/postprocess.yaml +1 -0
- datapipeline/templates/plugin_skeleton/your-interim-data-builder/project.yaml +14 -0
- datapipeline/templates/plugin_skeleton/your-interim-data-builder/tasks/serve.all.yaml +8 -0
- datapipeline/templates/stubs/contracts/composed.yaml.j2 +10 -0
- datapipeline/templates/stubs/contracts/ingest.yaml.j2 +25 -0
- datapipeline/templates/stubs/dto.py.j2 +1 -1
- datapipeline/templates/stubs/loaders/basic.py.j2 +11 -0
- datapipeline/templates/stubs/mappers/composed.py.j2 +13 -0
- datapipeline/templates/stubs/mappers/ingest.py.j2 +17 -0
- datapipeline/templates/stubs/parser.py.j2 +4 -0
- datapipeline/templates/stubs/record.py.j2 +0 -1
- datapipeline/templates/stubs/source.yaml.j2 +1 -1
- datapipeline/transforms/debug/identity.py +34 -16
- datapipeline/transforms/debug/lint.py +14 -11
- datapipeline/transforms/feature/scaler.py +5 -12
- datapipeline/transforms/filter.py +73 -17
- datapipeline/transforms/interfaces.py +58 -0
- datapipeline/transforms/record/floor_time.py +10 -7
- datapipeline/transforms/record/lag.py +8 -10
- datapipeline/transforms/sequence.py +2 -3
- datapipeline/transforms/stream/dedupe.py +5 -7
- datapipeline/transforms/stream/ensure_ticks.py +39 -24
- datapipeline/transforms/stream/fill.py +34 -25
- datapipeline/transforms/stream/filter.py +25 -0
- datapipeline/transforms/stream/floor_time.py +16 -0
- datapipeline/transforms/stream/granularity.py +52 -30
- datapipeline/transforms/stream/lag.py +17 -0
- datapipeline/transforms/stream/rolling.py +72 -0
- datapipeline/transforms/utils.py +42 -10
- datapipeline/transforms/vector/drop/horizontal.py +0 -3
- datapipeline/transforms/vector/drop/orchestrator.py +0 -3
- datapipeline/transforms/vector/drop/vertical.py +0 -2
- datapipeline/transforms/vector/ensure_schema.py +0 -2
- datapipeline/utils/paths.py +0 -2
- datapipeline/utils/placeholders.py +0 -2
- datapipeline/utils/rich_compat.py +0 -3
- datapipeline/utils/window.py +0 -2
- jerry_thomas-2.0.0.dist-info/METADATA +282 -0
- jerry_thomas-2.0.0.dist-info/RECORD +264 -0
- {jerry_thomas-1.0.2.dist-info → jerry_thomas-2.0.0.dist-info}/WHEEL +1 -1
- {jerry_thomas-1.0.2.dist-info → jerry_thomas-2.0.0.dist-info}/entry_points.txt +8 -3
- datapipeline/services/scaffold/mappers.py +0 -55
- datapipeline/services/scaffold/source.py +0 -190
- datapipeline/templates/plugin_skeleton/example/contracts/time.ticks.hour_sin.yaml +0 -31
- datapipeline/templates/plugin_skeleton/example/contracts/time.ticks.linear.yaml +0 -30
- datapipeline/templates/plugin_skeleton/example/dataset.yaml +0 -18
- datapipeline/templates/plugin_skeleton/example/postprocess.yaml +0 -29
- datapipeline/templates/plugin_skeleton/example/project.yaml +0 -23
- datapipeline/templates/plugin_skeleton/example/tasks/metadata.yaml +0 -3
- datapipeline/templates/plugin_skeleton/example/tasks/scaler.yaml +0 -9
- datapipeline/templates/plugin_skeleton/example/tasks/schema.yaml +0 -2
- datapipeline/templates/plugin_skeleton/example/tasks/serve.test.yaml +0 -4
- datapipeline/templates/plugin_skeleton/example/tasks/serve.train.yaml +0 -28
- datapipeline/templates/plugin_skeleton/example/tasks/serve.val.yaml +0 -4
- datapipeline/templates/plugin_skeleton/your-dataset/contracts/time.ticks.hour_sin.yaml +0 -31
- datapipeline/templates/plugin_skeleton/your-dataset/contracts/time.ticks.linear.yaml +0 -30
- datapipeline/templates/plugin_skeleton/your-dataset/sources/synthetic.ticks.yaml +0 -12
- datapipeline/templates/stubs/mapper.py.j2 +0 -22
- jerry_thomas-1.0.2.dist-info/METADATA +0 -825
- jerry_thomas-1.0.2.dist-info/RECORD +0 -199
- {jerry_thomas-1.0.2.dist-info → jerry_thomas-2.0.0.dist-info}/licenses/LICENSE +0 -0
- {jerry_thomas-1.0.2.dist-info → jerry_thomas-2.0.0.dist-info}/top_level.txt +0 -0
datapipeline/cli/app.py
CHANGED
|
@@ -5,9 +5,15 @@ from typing import Optional, Tuple
|
|
|
5
5
|
|
|
6
6
|
from datapipeline.cli.commands.run import handle_serve
|
|
7
7
|
from datapipeline.cli.commands.plugin import bar as handle_bar
|
|
8
|
+
from datapipeline.cli.commands.demo import handle as handle_demo
|
|
8
9
|
from datapipeline.cli.commands.source import handle as handle_source
|
|
9
10
|
from datapipeline.cli.commands.domain import handle as handle_domain
|
|
10
11
|
from datapipeline.cli.commands.contract import handle as handle_contract
|
|
12
|
+
from datapipeline.cli.commands.dto import handle as handle_dto
|
|
13
|
+
from datapipeline.cli.commands.parser import handle as handle_parser
|
|
14
|
+
from datapipeline.cli.commands.mapper import handle as handle_mapper
|
|
15
|
+
from datapipeline.cli.commands.loader import handle as handle_loader
|
|
16
|
+
from datapipeline.cli.commands.stream import handle as handle_stream
|
|
11
17
|
from datapipeline.cli.commands.list_ import handle as handle_list
|
|
12
18
|
from datapipeline.cli.commands.filter import handle as handle_filter
|
|
13
19
|
from datapipeline.cli.commands.inspect import (
|
|
@@ -31,15 +37,9 @@ def _dataset_to_project_path(
|
|
|
31
37
|
"""Resolve a dataset selector (alias, folder, or file) into a project.yaml path."""
|
|
32
38
|
# 1) Alias via jerry.yaml datasets (wins over local folders with same name)
|
|
33
39
|
if workspace is not None:
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
base = workspace.root
|
|
38
|
-
candidate = Path(raw)
|
|
39
|
-
candidate = candidate if candidate.is_absolute() else (base / candidate)
|
|
40
|
-
if candidate.is_dir():
|
|
41
|
-
candidate = candidate / "project.yaml"
|
|
42
|
-
return str(candidate.resolve())
|
|
40
|
+
resolved = workspace.resolve_dataset_alias(dataset)
|
|
41
|
+
if resolved is not None:
|
|
42
|
+
return str(resolved)
|
|
43
43
|
|
|
44
44
|
# 2) Direct file path
|
|
45
45
|
path = Path(dataset)
|
|
@@ -166,9 +166,9 @@ def main() -> None:
|
|
|
166
166
|
"--stage",
|
|
167
167
|
"-s",
|
|
168
168
|
type=int,
|
|
169
|
-
choices=range(0,
|
|
169
|
+
choices=range(0, 9),
|
|
170
170
|
default=None,
|
|
171
|
-
help="preview a specific pipeline stage (0-
|
|
171
|
+
help="preview a specific pipeline stage (0-6 record/feature stages, 7 assembled vectors, 8 transformed vectors)",
|
|
172
172
|
)
|
|
173
173
|
p_serve.add_argument(
|
|
174
174
|
"--visuals",
|
|
@@ -191,7 +191,7 @@ def main() -> None:
|
|
|
191
191
|
# build (materialize artifacts)
|
|
192
192
|
p_build = sub.add_parser(
|
|
193
193
|
"build",
|
|
194
|
-
help="materialize project artifacts (
|
|
194
|
+
help="materialize project artifacts (schema, hashes, etc.)",
|
|
195
195
|
parents=[common],
|
|
196
196
|
)
|
|
197
197
|
p_build.add_argument(
|
|
@@ -223,22 +223,56 @@ def main() -> None:
|
|
|
223
223
|
help="progress display: auto (spinner unless DEBUG), spinner, bars, or off",
|
|
224
224
|
)
|
|
225
225
|
|
|
226
|
+
# demo (optional demo dataset)
|
|
227
|
+
p_demo = sub.add_parser(
|
|
228
|
+
"demo",
|
|
229
|
+
help="create an optional demo dataset inside a plugin",
|
|
230
|
+
parents=[common],
|
|
231
|
+
)
|
|
232
|
+
demo_sub = p_demo.add_subparsers(dest="demo_cmd", required=True)
|
|
233
|
+
demo_init = demo_sub.add_parser(
|
|
234
|
+
"init",
|
|
235
|
+
help="create a standalone demo plugin named demo",
|
|
236
|
+
parents=[common],
|
|
237
|
+
)
|
|
238
|
+
demo_init.add_argument(
|
|
239
|
+
"--out",
|
|
240
|
+
"-o",
|
|
241
|
+
help="override parent directory (demo will be created inside)",
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
# list
|
|
245
|
+
p_list = sub.add_parser(
|
|
246
|
+
"list",
|
|
247
|
+
help="list known resources",
|
|
248
|
+
parents=[common],
|
|
249
|
+
)
|
|
250
|
+
list_sub = p_list.add_subparsers(dest="list_cmd", required=True)
|
|
251
|
+
list_sub.add_parser("sources", help="list sources")
|
|
252
|
+
list_sub.add_parser("domains", help="list domains")
|
|
253
|
+
list_sub.add_parser("parsers", help="list parsers")
|
|
254
|
+
list_sub.add_parser("dtos", help="list DTOs")
|
|
255
|
+
list_sub.add_parser("mappers", help="list mappers")
|
|
256
|
+
list_sub.add_parser("loaders", help="list loaders")
|
|
257
|
+
|
|
226
258
|
# source
|
|
227
259
|
p_source = sub.add_parser(
|
|
228
260
|
"source",
|
|
229
|
-
help="
|
|
261
|
+
help="create or list raw sources",
|
|
230
262
|
parents=[common],
|
|
231
263
|
)
|
|
232
264
|
source_sub = p_source.add_subparsers(dest="source_cmd", required=True)
|
|
233
|
-
|
|
234
|
-
"
|
|
235
|
-
help="create a provider+dataset source",
|
|
265
|
+
p_source_create = source_sub.add_parser(
|
|
266
|
+
"create",
|
|
267
|
+
help="create a provider+dataset source (yaml only)",
|
|
236
268
|
description=(
|
|
237
|
-
"
|
|
269
|
+
"Create a source YAML using transport + format or a loader entrypoint.\n\n"
|
|
238
270
|
"Usage:\n"
|
|
239
|
-
" jerry source
|
|
240
|
-
" jerry source
|
|
241
|
-
" jerry source
|
|
271
|
+
" jerry source create <provider>.<dataset> -t fs -f csv\n"
|
|
272
|
+
" jerry source create <provider>.<dataset> -t http -f json\n"
|
|
273
|
+
" jerry source create <provider>.<dataset> -t synthetic\n\n"
|
|
274
|
+
" jerry source create <provider> <dataset> --loader mypkg.loaders.demo:Loader\n"
|
|
275
|
+
" jerry source create <provider> <dataset> --parser myparser\n\n"
|
|
242
276
|
"Examples:\n"
|
|
243
277
|
" fs CSV: -t fs -f csv\n"
|
|
244
278
|
" fs NDJSON: -t fs -f json-lines\n"
|
|
@@ -250,38 +284,47 @@ def main() -> None:
|
|
|
250
284
|
# Support simple positionals, plus flags for compatibility
|
|
251
285
|
# Allow either positionals or flags. Use distinct dest names for flags
|
|
252
286
|
# to avoid ambiguity when both forms are present in some environments.
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
287
|
+
for p in (p_source_create,):
|
|
288
|
+
p.add_argument("provider", nargs="?", help="provider name")
|
|
289
|
+
p.add_argument("dataset", nargs="?", help="dataset slug")
|
|
290
|
+
p.add_argument("--provider", "-p", dest="provider_opt", metavar="PROVIDER", help="provider name")
|
|
291
|
+
p.add_argument("--dataset", "-d", dest="dataset_opt", metavar="DATASET", help="dataset slug")
|
|
292
|
+
p.add_argument("--alias", "-a", help="provider.dataset alias")
|
|
293
|
+
p.add_argument(
|
|
259
294
|
"--transport", "-t",
|
|
260
295
|
choices=["fs", "http", "synthetic"],
|
|
261
|
-
required=
|
|
296
|
+
required=False,
|
|
262
297
|
help="how data is accessed: fs/http/synthetic",
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
298
|
+
)
|
|
299
|
+
p.add_argument(
|
|
300
|
+
"--format", "-f",
|
|
301
|
+
choices=["csv", "json", "json-lines", "pickle"],
|
|
302
|
+
help="data format for fs/http transports (ignored otherwise)",
|
|
303
|
+
)
|
|
304
|
+
p.add_argument(
|
|
305
|
+
"--loader",
|
|
306
|
+
help="loader entrypoint (overrides --transport/--format)",
|
|
307
|
+
)
|
|
308
|
+
p.add_argument(
|
|
309
|
+
"--parser",
|
|
310
|
+
help="parser entrypoint (defaults to identity)",
|
|
311
|
+
)
|
|
312
|
+
p.add_argument(
|
|
313
|
+
"--identity",
|
|
314
|
+
action="store_true",
|
|
315
|
+
help="use the built-in identity parser (alias for --parser identity)",
|
|
316
|
+
)
|
|
274
317
|
source_sub.add_parser("list", help="list known sources")
|
|
275
318
|
|
|
276
319
|
# domain
|
|
277
320
|
p_domain = sub.add_parser(
|
|
278
321
|
"domain",
|
|
279
|
-
help="
|
|
322
|
+
help="create or list domains",
|
|
280
323
|
parents=[common],
|
|
281
324
|
)
|
|
282
325
|
domain_sub = p_domain.add_subparsers(dest="domain_cmd", required=True)
|
|
283
326
|
p_domain_add = domain_sub.add_parser(
|
|
284
|
-
"
|
|
327
|
+
"create",
|
|
285
328
|
help="create a domain",
|
|
286
329
|
description="Create a time-aware domain package rooted in TemporalRecord.",
|
|
287
330
|
)
|
|
@@ -292,13 +335,64 @@ def main() -> None:
|
|
|
292
335
|
)
|
|
293
336
|
domain_sub.add_parser("list", help="list known domains")
|
|
294
337
|
|
|
338
|
+
# dto
|
|
339
|
+
p_dto = sub.add_parser(
|
|
340
|
+
"dto",
|
|
341
|
+
help="create DTOs",
|
|
342
|
+
parents=[common],
|
|
343
|
+
)
|
|
344
|
+
dto_sub = p_dto.add_subparsers(dest="dto_cmd", required=True)
|
|
345
|
+
p_dto_create = dto_sub.add_parser("create", help="create a DTO")
|
|
346
|
+
p_dto_create.add_argument("name", nargs="?", help="DTO class name")
|
|
347
|
+
|
|
348
|
+
# parser
|
|
349
|
+
p_parser = sub.add_parser(
|
|
350
|
+
"parser",
|
|
351
|
+
help="create parsers",
|
|
352
|
+
parents=[common],
|
|
353
|
+
)
|
|
354
|
+
parser_sub = p_parser.add_subparsers(dest="parser_cmd", required=True)
|
|
355
|
+
p_parser_create = parser_sub.add_parser("create", help="create a parser")
|
|
356
|
+
p_parser_create.add_argument("name", nargs="?", help="Parser class name")
|
|
357
|
+
|
|
358
|
+
# mapper
|
|
359
|
+
p_mapper = sub.add_parser(
|
|
360
|
+
"mapper",
|
|
361
|
+
help="create mappers",
|
|
362
|
+
parents=[common],
|
|
363
|
+
)
|
|
364
|
+
mapper_sub = p_mapper.add_subparsers(dest="mapper_cmd", required=True)
|
|
365
|
+
p_mapper_create = mapper_sub.add_parser("create", help="create a mapper")
|
|
366
|
+
p_mapper_create.add_argument("name", nargs="?", help="Mapper function name")
|
|
367
|
+
|
|
368
|
+
# loader
|
|
369
|
+
p_loader = sub.add_parser(
|
|
370
|
+
"loader",
|
|
371
|
+
help="create loaders",
|
|
372
|
+
parents=[common],
|
|
373
|
+
)
|
|
374
|
+
loader_sub = p_loader.add_subparsers(dest="loader_cmd", required=True)
|
|
375
|
+
p_loader_create = loader_sub.add_parser("create", help="create a loader")
|
|
376
|
+
p_loader_create.add_argument("name", nargs="?", help="Loader name")
|
|
377
|
+
|
|
378
|
+
# inflow
|
|
379
|
+
p_inflow = sub.add_parser(
|
|
380
|
+
"inflow",
|
|
381
|
+
help="create end-to-end inflow scaffolds",
|
|
382
|
+
parents=[common],
|
|
383
|
+
)
|
|
384
|
+
inflow_sub = p_inflow.add_subparsers(dest="inflow_cmd", required=True)
|
|
385
|
+
inflow_sub.add_parser("create", help="create an inflow")
|
|
386
|
+
|
|
295
387
|
# contract (interactive: ingest or composed)
|
|
296
388
|
p_contract = sub.add_parser(
|
|
297
389
|
"contract",
|
|
298
390
|
help="manage stream contracts (ingest or composed)",
|
|
299
391
|
parents=[common],
|
|
300
392
|
)
|
|
301
|
-
p_contract.
|
|
393
|
+
contract_sub = p_contract.add_subparsers(dest="contract_cmd", required=True)
|
|
394
|
+
p_contract_create = contract_sub.add_parser("create", help="create a contract")
|
|
395
|
+
p_contract_create.add_argument(
|
|
302
396
|
"--identity",
|
|
303
397
|
action="store_true",
|
|
304
398
|
help="use built-in identity mapper (skip mapper scaffolding)",
|
|
@@ -467,24 +561,6 @@ def main() -> None:
|
|
|
467
561
|
help="partitions manifest path (defaults to build/partitions.json)",
|
|
468
562
|
)
|
|
469
563
|
|
|
470
|
-
# Expected IDs (newline list)
|
|
471
|
-
p_inspect_expected = inspect_sub.add_parser(
|
|
472
|
-
"expected",
|
|
473
|
-
help="discover full feature ids and write a newline list",
|
|
474
|
-
parents=[inspect_common],
|
|
475
|
-
)
|
|
476
|
-
p_inspect_expected.add_argument(
|
|
477
|
-
"--project",
|
|
478
|
-
"-p",
|
|
479
|
-
default=None,
|
|
480
|
-
help="path to project.yaml",
|
|
481
|
-
)
|
|
482
|
-
p_inspect_expected.add_argument(
|
|
483
|
-
"--output",
|
|
484
|
-
"-o",
|
|
485
|
-
default=None,
|
|
486
|
-
help="expected ids output path (defaults to build/datasets/<name>/expected.txt)",
|
|
487
|
-
)
|
|
488
564
|
|
|
489
565
|
workspace_context = load_workspace_context(Path.cwd())
|
|
490
566
|
args = parser.parse_args()
|
|
@@ -614,50 +690,83 @@ def main() -> None:
|
|
|
614
690
|
log_level=base_level,
|
|
615
691
|
workspace=workspace_context,
|
|
616
692
|
)
|
|
617
|
-
elif subcmd == "expected":
|
|
618
|
-
from datapipeline.cli.commands.inspect import expected as handle_inspect_expected
|
|
619
|
-
handle_inspect_expected(
|
|
620
|
-
project=args.project,
|
|
621
|
-
output=getattr(args, "output", None),
|
|
622
|
-
visuals=inspect_visual_provider,
|
|
623
|
-
progress=inspect_progress_style,
|
|
624
|
-
log_level=base_level,
|
|
625
|
-
workspace=workspace_context,
|
|
626
|
-
)
|
|
627
693
|
return
|
|
628
694
|
|
|
629
695
|
if args.cmd == "source":
|
|
630
696
|
if args.source_cmd == "list":
|
|
631
|
-
handle_list(subcmd="sources")
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
)
|
|
697
|
+
handle_list(subcmd="sources", workspace=workspace_context)
|
|
698
|
+
return
|
|
699
|
+
# Merge positionals and flags for provider/dataset
|
|
700
|
+
handle_source(
|
|
701
|
+
subcmd=args.source_cmd,
|
|
702
|
+
provider=(getattr(args, "provider", None) or getattr(args, "provider_opt", None)),
|
|
703
|
+
dataset=(getattr(args, "dataset", None) or getattr(args, "dataset_opt", None)),
|
|
704
|
+
transport=getattr(args, "transport", None),
|
|
705
|
+
format=getattr(args, "format", None),
|
|
706
|
+
alias=getattr(args, "alias", None),
|
|
707
|
+
identity=getattr(args, "identity", False),
|
|
708
|
+
loader=getattr(args, "loader", None),
|
|
709
|
+
parser=getattr(args, "parser", None),
|
|
710
|
+
plugin_root=plugin_root,
|
|
711
|
+
workspace=workspace_context,
|
|
712
|
+
)
|
|
713
|
+
return
|
|
714
|
+
|
|
715
|
+
if args.cmd == "list":
|
|
716
|
+
handle_list(subcmd=args.list_cmd, workspace=workspace_context)
|
|
644
717
|
return
|
|
645
718
|
|
|
646
719
|
if args.cmd == "domain":
|
|
647
720
|
if args.domain_cmd == "list":
|
|
648
721
|
handle_list(subcmd="domains")
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
722
|
+
return
|
|
723
|
+
handle_domain(
|
|
724
|
+
subcmd=args.domain_cmd,
|
|
725
|
+
domain=getattr(args, "domain", None),
|
|
726
|
+
plugin_root=plugin_root,
|
|
727
|
+
)
|
|
728
|
+
return
|
|
729
|
+
|
|
730
|
+
if args.cmd == "dto":
|
|
731
|
+
handle_dto(
|
|
732
|
+
name=getattr(args, "name", None),
|
|
733
|
+
plugin_root=plugin_root,
|
|
734
|
+
)
|
|
735
|
+
return
|
|
736
|
+
|
|
737
|
+
if args.cmd == "parser":
|
|
738
|
+
handle_parser(
|
|
739
|
+
name=getattr(args, "name", None),
|
|
740
|
+
plugin_root=plugin_root,
|
|
741
|
+
)
|
|
742
|
+
return
|
|
743
|
+
|
|
744
|
+
if args.cmd == "mapper":
|
|
745
|
+
handle_mapper(
|
|
746
|
+
name=getattr(args, "name", None),
|
|
747
|
+
plugin_root=plugin_root,
|
|
748
|
+
)
|
|
749
|
+
return
|
|
750
|
+
|
|
751
|
+
if args.cmd == "loader":
|
|
752
|
+
handle_loader(
|
|
753
|
+
name=getattr(args, "name", None),
|
|
754
|
+
plugin_root=plugin_root,
|
|
755
|
+
)
|
|
756
|
+
return
|
|
757
|
+
|
|
758
|
+
if args.cmd == "inflow":
|
|
759
|
+
handle_stream(
|
|
760
|
+
plugin_root=plugin_root,
|
|
761
|
+
workspace=workspace_context,
|
|
762
|
+
)
|
|
655
763
|
return
|
|
656
764
|
|
|
657
765
|
if args.cmd == "contract":
|
|
658
766
|
handle_contract(
|
|
659
767
|
plugin_root=plugin_root,
|
|
660
|
-
use_identity=args
|
|
768
|
+
use_identity=getattr(args, "identity", False),
|
|
769
|
+
workspace=workspace_context,
|
|
661
770
|
)
|
|
662
771
|
return
|
|
663
772
|
|
|
@@ -669,6 +778,13 @@ def main() -> None:
|
|
|
669
778
|
)
|
|
670
779
|
return
|
|
671
780
|
|
|
781
|
+
if args.cmd == "demo":
|
|
782
|
+
handle_demo(
|
|
783
|
+
subcmd=args.demo_cmd,
|
|
784
|
+
out=getattr(args, "out", None),
|
|
785
|
+
)
|
|
786
|
+
return
|
|
787
|
+
|
|
672
788
|
if args.cmd == "filter":
|
|
673
789
|
handle_filter(subcmd=args.filter_cmd, name=getattr(args, "name", None))
|
|
674
790
|
return
|