jerry-thomas 1.0.3__py3-none-any.whl → 2.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datapipeline/analysis/vector/collector.py +0 -1
- datapipeline/build/tasks/config.py +0 -2
- datapipeline/build/tasks/metadata.py +0 -2
- datapipeline/build/tasks/scaler.py +0 -2
- datapipeline/build/tasks/schema.py +0 -2
- datapipeline/build/tasks/utils.py +0 -2
- datapipeline/cli/app.py +201 -81
- datapipeline/cli/commands/contract.py +145 -283
- datapipeline/cli/commands/demo.py +13 -0
- datapipeline/cli/commands/domain.py +4 -4
- datapipeline/cli/commands/dto.py +11 -0
- datapipeline/cli/commands/filter.py +2 -2
- datapipeline/cli/commands/inspect.py +0 -68
- datapipeline/cli/commands/list_.py +30 -13
- datapipeline/cli/commands/loader.py +11 -0
- datapipeline/cli/commands/mapper.py +82 -0
- datapipeline/cli/commands/parser.py +45 -0
- datapipeline/cli/commands/run_config.py +1 -3
- datapipeline/cli/commands/serve_pipeline.py +5 -7
- datapipeline/cli/commands/source.py +106 -18
- datapipeline/cli/commands/stream.py +292 -0
- datapipeline/cli/visuals/common.py +0 -2
- datapipeline/cli/visuals/sections.py +0 -2
- datapipeline/cli/workspace_utils.py +0 -3
- datapipeline/config/context.py +0 -2
- datapipeline/config/dataset/feature.py +1 -0
- datapipeline/config/metadata.py +0 -2
- datapipeline/config/project.py +0 -2
- datapipeline/config/resolution.py +10 -2
- datapipeline/config/tasks.py +9 -9
- datapipeline/domain/feature.py +3 -0
- datapipeline/domain/record.py +7 -7
- datapipeline/domain/sample.py +0 -2
- datapipeline/domain/vector.py +6 -8
- datapipeline/integrations/ml/adapter.py +0 -2
- datapipeline/integrations/ml/pandas_support.py +0 -2
- datapipeline/integrations/ml/rows.py +0 -2
- datapipeline/integrations/ml/torch_support.py +0 -2
- datapipeline/io/output.py +0 -2
- datapipeline/io/serializers.py +26 -16
- datapipeline/mappers/synthetic/time.py +9 -2
- datapipeline/pipeline/artifacts.py +3 -5
- datapipeline/pipeline/observability.py +0 -2
- datapipeline/pipeline/pipelines.py +118 -34
- datapipeline/pipeline/stages.py +54 -18
- datapipeline/pipeline/utils/spool_cache.py +142 -0
- datapipeline/pipeline/utils/transform_utils.py +27 -2
- datapipeline/services/artifacts.py +1 -4
- datapipeline/services/constants.py +1 -0
- datapipeline/services/factories.py +4 -6
- datapipeline/services/paths.py +10 -1
- datapipeline/services/project_paths.py +0 -2
- datapipeline/services/runs.py +0 -2
- datapipeline/services/scaffold/contract_yaml.py +76 -0
- datapipeline/services/scaffold/demo.py +141 -0
- datapipeline/services/scaffold/discovery.py +115 -0
- datapipeline/services/scaffold/domain.py +21 -13
- datapipeline/services/scaffold/dto.py +31 -0
- datapipeline/services/scaffold/filter.py +2 -1
- datapipeline/services/scaffold/layout.py +96 -0
- datapipeline/services/scaffold/loader.py +61 -0
- datapipeline/services/scaffold/mapper.py +116 -0
- datapipeline/services/scaffold/parser.py +56 -0
- datapipeline/services/scaffold/plugin.py +14 -2
- datapipeline/services/scaffold/source_yaml.py +91 -0
- datapipeline/services/scaffold/stream_plan.py +129 -0
- datapipeline/services/scaffold/utils.py +187 -0
- datapipeline/sources/data_loader.py +0 -2
- datapipeline/sources/decoders.py +49 -8
- datapipeline/sources/factory.py +9 -6
- datapipeline/sources/foreach.py +18 -3
- datapipeline/sources/synthetic/time/parser.py +1 -1
- datapipeline/sources/transports.py +10 -4
- datapipeline/templates/demo_skeleton/demo/contracts/equity.ohlcv.yaml +33 -0
- datapipeline/templates/demo_skeleton/demo/contracts/time.ticks.hour_sin.yaml +22 -0
- datapipeline/templates/demo_skeleton/demo/contracts/time.ticks.linear.yaml +22 -0
- datapipeline/templates/demo_skeleton/demo/data/APPL.jsonl +19 -0
- datapipeline/templates/demo_skeleton/demo/data/MSFT.jsonl +19 -0
- datapipeline/templates/demo_skeleton/demo/dataset.yaml +19 -0
- datapipeline/templates/demo_skeleton/demo/postprocess.yaml +19 -0
- datapipeline/templates/demo_skeleton/demo/project.yaml +19 -0
- datapipeline/templates/demo_skeleton/demo/sources/sandbox.ohlcv.yaml +17 -0
- datapipeline/templates/{plugin_skeleton/example → demo_skeleton/demo}/sources/synthetic.ticks.yaml +1 -1
- datapipeline/templates/demo_skeleton/demo/tasks/metadata.yaml +2 -0
- datapipeline/templates/demo_skeleton/demo/tasks/scaler.yaml +3 -0
- datapipeline/templates/demo_skeleton/demo/tasks/schema.yaml +2 -0
- datapipeline/templates/demo_skeleton/demo/tasks/serve.test.yaml +4 -0
- datapipeline/templates/demo_skeleton/demo/tasks/serve.train.yaml +4 -0
- datapipeline/templates/demo_skeleton/demo/tasks/serve.val.yaml +4 -0
- datapipeline/templates/demo_skeleton/scripts/run_dataframe.py +20 -0
- datapipeline/templates/demo_skeleton/scripts/run_torch.py +23 -0
- datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/__init__.py +0 -0
- datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/domains/equity/__init__.py +0 -0
- datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/domains/equity/model.py +18 -0
- datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/dtos/__init__.py +0 -0
- datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/dtos/sandbox_ohlcv_dto.py +14 -0
- datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/mappers/__init__.py +0 -0
- datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/mappers/map_sandbox_ohlcv_dto_to_equity.py +26 -0
- datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/parsers/__init__.py +0 -0
- datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/parsers/sandbox_ohlcv_dto_parser.py +46 -0
- datapipeline/templates/plugin_skeleton/README.md +57 -136
- datapipeline/templates/plugin_skeleton/jerry.yaml +12 -24
- datapipeline/templates/plugin_skeleton/reference/jerry.yaml +28 -0
- datapipeline/templates/plugin_skeleton/reference/reference/contracts/composed.reference.yaml +29 -0
- datapipeline/templates/plugin_skeleton/reference/reference/contracts/ingest.reference.yaml +31 -0
- datapipeline/templates/plugin_skeleton/reference/reference/contracts/overview.reference.yaml +34 -0
- datapipeline/templates/plugin_skeleton/reference/reference/dataset.yaml +29 -0
- datapipeline/templates/plugin_skeleton/reference/reference/postprocess.yaml +25 -0
- datapipeline/templates/plugin_skeleton/reference/reference/project.yaml +32 -0
- datapipeline/templates/plugin_skeleton/reference/reference/sources/foreach.http.reference.yaml +24 -0
- datapipeline/templates/plugin_skeleton/reference/reference/sources/foreach.reference.yaml +21 -0
- datapipeline/templates/plugin_skeleton/reference/reference/sources/fs.reference.yaml +16 -0
- datapipeline/templates/plugin_skeleton/reference/reference/sources/http.reference.yaml +17 -0
- datapipeline/templates/plugin_skeleton/reference/reference/sources/overview.reference.yaml +18 -0
- datapipeline/templates/plugin_skeleton/reference/reference/sources/synthetic.reference.yaml +15 -0
- datapipeline/templates/plugin_skeleton/reference/reference/tasks/metadata.reference.yaml +11 -0
- datapipeline/templates/plugin_skeleton/reference/reference/tasks/scaler.reference.yaml +10 -0
- datapipeline/templates/plugin_skeleton/reference/reference/tasks/schema.reference.yaml +10 -0
- datapipeline/templates/plugin_skeleton/reference/reference/tasks/serve.reference.yaml +28 -0
- datapipeline/templates/plugin_skeleton/src/{{PACKAGE_NAME}}/domains/__init__.py +2 -0
- datapipeline/templates/plugin_skeleton/src/{{PACKAGE_NAME}}/dtos/__init__.py +0 -0
- datapipeline/templates/plugin_skeleton/src/{{PACKAGE_NAME}}/loaders/__init__.py +0 -0
- datapipeline/templates/plugin_skeleton/src/{{PACKAGE_NAME}}/mappers/__init__.py +1 -0
- datapipeline/templates/plugin_skeleton/src/{{PACKAGE_NAME}}/parsers/__init__.py +0 -0
- datapipeline/templates/plugin_skeleton/your-dataset/dataset.yaml +12 -11
- datapipeline/templates/plugin_skeleton/your-dataset/postprocess.yaml +4 -13
- datapipeline/templates/plugin_skeleton/your-dataset/project.yaml +9 -11
- datapipeline/templates/plugin_skeleton/your-dataset/tasks/metadata.yaml +1 -2
- datapipeline/templates/plugin_skeleton/your-dataset/tasks/scaler.yaml +1 -7
- datapipeline/templates/plugin_skeleton/your-dataset/tasks/schema.yaml +1 -1
- datapipeline/templates/plugin_skeleton/your-dataset/tasks/serve.test.yaml +1 -1
- datapipeline/templates/plugin_skeleton/your-dataset/tasks/serve.train.yaml +1 -25
- datapipeline/templates/plugin_skeleton/your-dataset/tasks/serve.val.yaml +1 -1
- datapipeline/templates/plugin_skeleton/your-interim-data-builder/dataset.yaml +9 -0
- datapipeline/templates/plugin_skeleton/your-interim-data-builder/postprocess.yaml +1 -0
- datapipeline/templates/plugin_skeleton/your-interim-data-builder/project.yaml +15 -0
- datapipeline/templates/plugin_skeleton/your-interim-data-builder/tasks/serve.all.yaml +8 -0
- datapipeline/templates/stubs/contracts/composed.yaml.j2 +10 -0
- datapipeline/templates/stubs/contracts/ingest.yaml.j2 +25 -0
- datapipeline/templates/stubs/dto.py.j2 +2 -2
- datapipeline/templates/stubs/filter.py.j2 +1 -1
- datapipeline/templates/stubs/loaders/basic.py.j2 +11 -0
- datapipeline/templates/stubs/mappers/composed.py.j2 +13 -0
- datapipeline/templates/stubs/mappers/ingest.py.j2 +20 -0
- datapipeline/templates/stubs/parser.py.j2 +5 -1
- datapipeline/templates/stubs/record.py.j2 +1 -1
- datapipeline/templates/stubs/source.yaml.j2 +1 -1
- datapipeline/transforms/debug/identity.py +34 -16
- datapipeline/transforms/debug/lint.py +14 -11
- datapipeline/transforms/feature/scaler.py +5 -12
- datapipeline/transforms/filter.py +73 -17
- datapipeline/transforms/interfaces.py +58 -0
- datapipeline/transforms/record/floor_time.py +10 -7
- datapipeline/transforms/record/lag.py +8 -10
- datapipeline/transforms/sequence.py +2 -3
- datapipeline/transforms/stream/dedupe.py +5 -7
- datapipeline/transforms/stream/ensure_ticks.py +39 -24
- datapipeline/transforms/stream/fill.py +34 -25
- datapipeline/transforms/stream/filter.py +25 -0
- datapipeline/transforms/stream/floor_time.py +16 -0
- datapipeline/transforms/stream/granularity.py +52 -30
- datapipeline/transforms/stream/lag.py +17 -0
- datapipeline/transforms/stream/rolling.py +72 -0
- datapipeline/transforms/utils.py +42 -10
- datapipeline/transforms/vector/drop/horizontal.py +0 -3
- datapipeline/transforms/vector/drop/orchestrator.py +0 -3
- datapipeline/transforms/vector/drop/vertical.py +0 -2
- datapipeline/transforms/vector/ensure_schema.py +0 -2
- datapipeline/utils/paths.py +0 -2
- datapipeline/utils/placeholders.py +0 -2
- datapipeline/utils/rich_compat.py +0 -3
- datapipeline/utils/window.py +0 -2
- jerry_thomas-2.0.1.dist-info/METADATA +269 -0
- jerry_thomas-2.0.1.dist-info/RECORD +264 -0
- {jerry_thomas-1.0.3.dist-info → jerry_thomas-2.0.1.dist-info}/WHEEL +1 -1
- {jerry_thomas-1.0.3.dist-info → jerry_thomas-2.0.1.dist-info}/entry_points.txt +7 -3
- datapipeline/services/scaffold/mappers.py +0 -55
- datapipeline/services/scaffold/source.py +0 -191
- datapipeline/templates/plugin_skeleton/example/contracts/time.ticks.hour_sin.yaml +0 -31
- datapipeline/templates/plugin_skeleton/example/contracts/time.ticks.linear.yaml +0 -30
- datapipeline/templates/plugin_skeleton/example/dataset.yaml +0 -18
- datapipeline/templates/plugin_skeleton/example/postprocess.yaml +0 -29
- datapipeline/templates/plugin_skeleton/example/project.yaml +0 -23
- datapipeline/templates/plugin_skeleton/example/tasks/metadata.yaml +0 -3
- datapipeline/templates/plugin_skeleton/example/tasks/scaler.yaml +0 -9
- datapipeline/templates/plugin_skeleton/example/tasks/schema.yaml +0 -2
- datapipeline/templates/plugin_skeleton/example/tasks/serve.test.yaml +0 -4
- datapipeline/templates/plugin_skeleton/example/tasks/serve.train.yaml +0 -28
- datapipeline/templates/plugin_skeleton/example/tasks/serve.val.yaml +0 -4
- datapipeline/templates/stubs/mapper.py.j2 +0 -22
- jerry_thomas-1.0.3.dist-info/METADATA +0 -827
- jerry_thomas-1.0.3.dist-info/RECORD +0 -198
- {jerry_thomas-1.0.3.dist-info → jerry_thomas-2.0.1.dist-info}/licenses/LICENSE +0 -0
- {jerry_thomas-1.0.3.dist-info → jerry_thomas-2.0.1.dist-info}/top_level.txt +0 -0
|
@@ -1,7 +1,17 @@
|
|
|
1
1
|
from pathlib import Path
|
|
2
2
|
|
|
3
|
+
from datapipeline.config.workspace import WorkspaceContext
|
|
4
|
+
from datapipeline.cli.workspace_utils import resolve_default_project_yaml
|
|
3
5
|
from datapipeline.services.paths import pkg_root, resolve_base_pkg_dir
|
|
4
6
|
from datapipeline.services.bootstrap.core import load_streams
|
|
7
|
+
from datapipeline.services.scaffold.discovery import (
|
|
8
|
+
list_domains,
|
|
9
|
+
list_dtos,
|
|
10
|
+
list_loaders,
|
|
11
|
+
list_mappers,
|
|
12
|
+
list_parsers,
|
|
13
|
+
)
|
|
14
|
+
from datapipeline.services.scaffold.utils import error_exit
|
|
5
15
|
|
|
6
16
|
|
|
7
17
|
def _default_project_path(root_dir: Path) -> Path | None:
|
|
@@ -19,27 +29,34 @@ def _default_project_path(root_dir: Path) -> Path | None:
|
|
|
19
29
|
return None
|
|
20
30
|
|
|
21
31
|
|
|
22
|
-
def handle(subcmd: str) -> None:
|
|
32
|
+
def handle(subcmd: str, *, workspace: WorkspaceContext | None = None) -> None:
|
|
23
33
|
root_dir, name, pyproject = pkg_root(None)
|
|
24
34
|
if subcmd == "sources":
|
|
25
35
|
# Discover sources by scanning sources_dir for YAML files
|
|
26
|
-
proj_path =
|
|
36
|
+
proj_path = resolve_default_project_yaml(workspace) if workspace is not None else None
|
|
27
37
|
if proj_path is None:
|
|
28
|
-
|
|
29
|
-
|
|
38
|
+
proj_path = _default_project_path(root_dir)
|
|
39
|
+
if proj_path is None:
|
|
40
|
+
error_exit("No project.yaml found under config/.")
|
|
30
41
|
try:
|
|
31
42
|
streams = load_streams(proj_path)
|
|
32
43
|
except FileNotFoundError as exc:
|
|
33
|
-
|
|
34
|
-
return
|
|
44
|
+
error_exit(str(exc))
|
|
35
45
|
aliases = sorted(streams.raw.keys())
|
|
36
46
|
for alias in aliases:
|
|
37
47
|
print(alias)
|
|
38
48
|
elif subcmd == "domains":
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
49
|
+
for k in list_domains():
|
|
50
|
+
print(k)
|
|
51
|
+
elif subcmd == "parsers":
|
|
52
|
+
for k in sorted(list_parsers().keys()):
|
|
53
|
+
print(k)
|
|
54
|
+
elif subcmd == "mappers":
|
|
55
|
+
for k in sorted(list_mappers().keys()):
|
|
56
|
+
print(k)
|
|
57
|
+
elif subcmd == "loaders":
|
|
58
|
+
for k in sorted(list_loaders().keys()):
|
|
59
|
+
print(k)
|
|
60
|
+
elif subcmd == "dtos":
|
|
61
|
+
for k in sorted(list_dtos().keys()):
|
|
62
|
+
print(k)
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
from datapipeline.services.scaffold.loader import create_loader
|
|
4
|
+
from datapipeline.services.scaffold.utils import choose_name, status
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def handle(name: str | None, *, plugin_root: Path | None = None) -> None:
|
|
8
|
+
if not name:
|
|
9
|
+
name = choose_name("Loader name", default="custom_loader")
|
|
10
|
+
ep = create_loader(name=name, root=plugin_root)
|
|
11
|
+
status("ok", f"Registered loader entry point '{ep}'.")
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
from datapipeline.services.scaffold.discovery import list_domains, list_dtos
|
|
4
|
+
from datapipeline.services.scaffold.dto import create_dto
|
|
5
|
+
from datapipeline.services.scaffold.domain import create_domain
|
|
6
|
+
from datapipeline.services.scaffold.mapper import create_mapper
|
|
7
|
+
from datapipeline.services.scaffold.utils import (
|
|
8
|
+
choose_existing_or_create,
|
|
9
|
+
choose_name,
|
|
10
|
+
error_exit,
|
|
11
|
+
info,
|
|
12
|
+
status,
|
|
13
|
+
pick_from_menu,
|
|
14
|
+
pick_from_list,
|
|
15
|
+
)
|
|
16
|
+
from datapipeline.services.scaffold.layout import (
|
|
17
|
+
default_mapper_name,
|
|
18
|
+
LABEL_DTO_FOR_MAPPER,
|
|
19
|
+
LABEL_DOMAIN_TO_MAP,
|
|
20
|
+
LABEL_MAPPER_INPUT,
|
|
21
|
+
default_mapper_name_for_identity,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def handle(name: str | None, *, plugin_root: Path | None = None) -> str:
|
|
26
|
+
input_class = None
|
|
27
|
+
input_module = None
|
|
28
|
+
|
|
29
|
+
input_choice = pick_from_menu(
|
|
30
|
+
f"{LABEL_MAPPER_INPUT}:",
|
|
31
|
+
[
|
|
32
|
+
("dto", "DTO (default)"),
|
|
33
|
+
("identity", "Any"),
|
|
34
|
+
],
|
|
35
|
+
)
|
|
36
|
+
info("Mapper output (select domain):")
|
|
37
|
+
|
|
38
|
+
dto_map = list_dtos(root=plugin_root)
|
|
39
|
+
if input_choice == "dto":
|
|
40
|
+
dto_class = choose_existing_or_create(
|
|
41
|
+
label=LABEL_DTO_FOR_MAPPER,
|
|
42
|
+
existing=sorted(dto_map.keys()),
|
|
43
|
+
create_label="Create new DTO",
|
|
44
|
+
create_fn=create_dto,
|
|
45
|
+
prompt_new="DTO class name",
|
|
46
|
+
root=plugin_root,
|
|
47
|
+
)
|
|
48
|
+
dto_module = list_dtos(root=plugin_root).get(dto_class)
|
|
49
|
+
if not dto_module:
|
|
50
|
+
error_exit("Failed to resolve DTO module.")
|
|
51
|
+
input_class = dto_class
|
|
52
|
+
input_module = dto_module
|
|
53
|
+
else:
|
|
54
|
+
input_module = "typing"
|
|
55
|
+
input_class = "Any"
|
|
56
|
+
|
|
57
|
+
domains = list_domains(root=plugin_root)
|
|
58
|
+
domain = choose_existing_or_create(
|
|
59
|
+
label=LABEL_DOMAIN_TO_MAP,
|
|
60
|
+
existing=domains,
|
|
61
|
+
create_label="Create new domain",
|
|
62
|
+
create_fn=lambda name, root: create_domain(domain=name, root=root),
|
|
63
|
+
prompt_new="Domain name",
|
|
64
|
+
root=plugin_root,
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
if not name:
|
|
68
|
+
if input_choice == "identity":
|
|
69
|
+
name = choose_name(
|
|
70
|
+
"Mapper name", default=default_mapper_name_for_identity(domain))
|
|
71
|
+
else:
|
|
72
|
+
name = choose_name(
|
|
73
|
+
"Mapper name", default=default_mapper_name(input_module, domain))
|
|
74
|
+
|
|
75
|
+
ep = create_mapper(
|
|
76
|
+
name=name,
|
|
77
|
+
input_class=input_class,
|
|
78
|
+
input_module=input_module,
|
|
79
|
+
domain=domain,
|
|
80
|
+
root=plugin_root,
|
|
81
|
+
)
|
|
82
|
+
return ep
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
from datapipeline.services.scaffold.dto import create_dto
|
|
4
|
+
from datapipeline.services.scaffold.discovery import list_dtos
|
|
5
|
+
from datapipeline.services.scaffold.parser import create_parser
|
|
6
|
+
from datapipeline.services.scaffold.utils import (
|
|
7
|
+
choose_existing_or_create,
|
|
8
|
+
choose_name,
|
|
9
|
+
error_exit,
|
|
10
|
+
status,
|
|
11
|
+
)
|
|
12
|
+
from datapipeline.services.scaffold.layout import default_parser_name, LABEL_DTO_FOR_PARSER
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def handle(
|
|
16
|
+
name: str | None,
|
|
17
|
+
*,
|
|
18
|
+
plugin_root: Path | None = None,
|
|
19
|
+
default_dto: str | None = None,
|
|
20
|
+
) -> str:
|
|
21
|
+
dto_map = list_dtos(root=plugin_root)
|
|
22
|
+
dto_class = choose_existing_or_create(
|
|
23
|
+
label=LABEL_DTO_FOR_PARSER,
|
|
24
|
+
existing=sorted(dto_map.keys()),
|
|
25
|
+
create_label="Create new DTO",
|
|
26
|
+
create_fn=create_dto,
|
|
27
|
+
prompt_new="DTO class name",
|
|
28
|
+
root=plugin_root,
|
|
29
|
+
default_new=default_dto or (f"{name}DTO" if name else None),
|
|
30
|
+
)
|
|
31
|
+
dto_module = list_dtos(root=plugin_root).get(dto_class)
|
|
32
|
+
if not dto_module:
|
|
33
|
+
error_exit("Failed to resolve DTO module.")
|
|
34
|
+
|
|
35
|
+
if not name:
|
|
36
|
+
name = choose_name("Parser class name", default=default_parser_name(dto_class))
|
|
37
|
+
|
|
38
|
+
ep = create_parser(
|
|
39
|
+
name=name,
|
|
40
|
+
dto_class=dto_class,
|
|
41
|
+
dto_module=dto_module,
|
|
42
|
+
root=plugin_root,
|
|
43
|
+
)
|
|
44
|
+
status("ok", f"Registered parser entry point '{ep}'.")
|
|
45
|
+
return ep
|
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
1
|
import logging
|
|
4
2
|
from pathlib import Path
|
|
5
3
|
from typing import Iterator, List, NamedTuple, Optional, Sequence
|
|
@@ -93,7 +91,7 @@ def determine_preview_stage(
|
|
|
93
91
|
return None, None
|
|
94
92
|
stages.append(cfg_stage)
|
|
95
93
|
|
|
96
|
-
if not stages or any(stage >
|
|
94
|
+
if not stages or any(stage > 6 for stage in stages):
|
|
97
95
|
return None, None
|
|
98
96
|
|
|
99
97
|
if len(set(stages)) == 1:
|
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
1
|
import logging
|
|
4
2
|
import time
|
|
5
3
|
from itertools import islice
|
|
@@ -70,7 +68,7 @@ def report_serve(target: OutputTarget, count: int) -> None:
|
|
|
70
68
|
|
|
71
69
|
|
|
72
70
|
def _is_full_pipeline_stage(stage: int | None) -> bool:
|
|
73
|
-
return stage is None or stage >=
|
|
71
|
+
return stage is None or stage >= 7
|
|
74
72
|
|
|
75
73
|
|
|
76
74
|
def serve_with_runtime(
|
|
@@ -99,12 +97,12 @@ def serve_with_runtime(
|
|
|
99
97
|
run_status = "success"
|
|
100
98
|
return
|
|
101
99
|
|
|
102
|
-
rectangular = stage is None or stage >
|
|
100
|
+
rectangular = stage is None or stage > 6
|
|
103
101
|
|
|
104
|
-
if stage is not None and stage <=
|
|
102
|
+
if stage is not None and stage <= 6:
|
|
105
103
|
if target.payload != "sample":
|
|
106
104
|
logger.warning(
|
|
107
|
-
"Ignoring payload '%s' for stage %s preview; preview outputs
|
|
105
|
+
"Ignoring payload '%s' for stage %s preview; preview outputs record/feature streams.",
|
|
108
106
|
target.payload,
|
|
109
107
|
stage,
|
|
110
108
|
)
|
|
@@ -129,7 +127,7 @@ def serve_with_runtime(
|
|
|
129
127
|
rectangular=rectangular,
|
|
130
128
|
)
|
|
131
129
|
|
|
132
|
-
if stage in (None,
|
|
130
|
+
if stage in (None, 8):
|
|
133
131
|
vectors = post_process(context, vectors)
|
|
134
132
|
if stage is None:
|
|
135
133
|
vectors = apply_split_stage(runtime, vectors)
|
|
@@ -2,7 +2,19 @@ from pathlib import Path
|
|
|
2
2
|
|
|
3
3
|
from datapipeline.config.workspace import WorkspaceContext
|
|
4
4
|
from datapipeline.cli.workspace_utils import resolve_default_project_yaml
|
|
5
|
-
from datapipeline.services.scaffold.
|
|
5
|
+
from datapipeline.services.scaffold.source_yaml import (
|
|
6
|
+
create_source_yaml,
|
|
7
|
+
default_loader_config,
|
|
8
|
+
)
|
|
9
|
+
from datapipeline.services.scaffold.discovery import list_loaders, list_parsers
|
|
10
|
+
from datapipeline.services.scaffold.utils import (
|
|
11
|
+
error_exit,
|
|
12
|
+
info,
|
|
13
|
+
choose_name,
|
|
14
|
+
pick_from_menu,
|
|
15
|
+
prompt_required,
|
|
16
|
+
)
|
|
17
|
+
import sys
|
|
6
18
|
|
|
7
19
|
|
|
8
20
|
def handle(
|
|
@@ -13,11 +25,13 @@ def handle(
|
|
|
13
25
|
format: str | None = None,
|
|
14
26
|
*,
|
|
15
27
|
identity: bool = False,
|
|
28
|
+
loader: str | None = None,
|
|
29
|
+
parser: str | None = None,
|
|
16
30
|
alias: str | None = None,
|
|
17
31
|
plugin_root: Path | None = None,
|
|
18
32
|
workspace: WorkspaceContext | None = None,
|
|
19
33
|
) -> None:
|
|
20
|
-
if subcmd
|
|
34
|
+
if subcmd == "create":
|
|
21
35
|
# Allow: positional provider dataset, --provider/--dataset, --alias, or provider as 'prov.ds'
|
|
22
36
|
if (not provider or not dataset):
|
|
23
37
|
# Try alias flag first
|
|
@@ -26,33 +40,107 @@ def handle(
|
|
|
26
40
|
if len(parts) == 2 and all(parts):
|
|
27
41
|
provider, dataset = parts[0], parts[1]
|
|
28
42
|
else:
|
|
29
|
-
|
|
30
|
-
raise SystemExit(2)
|
|
43
|
+
error_exit("Alias must be 'provider.dataset'")
|
|
31
44
|
# Try provider passed as 'prov.ds' positional/flag
|
|
32
45
|
elif provider and ("." in provider) and not dataset:
|
|
33
46
|
parts = provider.split(".", 1)
|
|
34
47
|
if len(parts) == 2 and all(parts):
|
|
35
48
|
provider, dataset = parts[0], parts[1]
|
|
36
49
|
else:
|
|
37
|
-
|
|
38
|
-
raise SystemExit(2)
|
|
50
|
+
error_exit("Source must be specified as '<provider> <dataset>' or '<provider>.<dataset>'")
|
|
39
51
|
|
|
40
52
|
if not provider or not dataset:
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
53
|
+
source_id = prompt_required("Source id (provider.dataset)")
|
|
54
|
+
parts = source_id.split(".", 1)
|
|
55
|
+
if len(parts) == 2 and all(parts):
|
|
56
|
+
provider, dataset = parts[0], parts[1]
|
|
57
|
+
else:
|
|
58
|
+
error_exit("Source id must be in the form 'provider.dataset'")
|
|
59
|
+
|
|
60
|
+
# Loader selection: either explicit loader EP or built-in transport defaults
|
|
61
|
+
loader_ep: str | None = loader
|
|
62
|
+
loader_args: dict = {}
|
|
63
|
+
if not loader_ep:
|
|
64
|
+
if not transport:
|
|
65
|
+
known_loaders = list_loaders(root=plugin_root)
|
|
66
|
+
options = [
|
|
67
|
+
("fs", "Built-in fs"),
|
|
68
|
+
("http", "Built-in http"),
|
|
69
|
+
("synthetic", "Built-in synthetic"),
|
|
70
|
+
]
|
|
71
|
+
if known_loaders:
|
|
72
|
+
options.append(("existing", "Select existing loader"))
|
|
73
|
+
options.append(("custom", "Custom loader"))
|
|
74
|
+
choice = pick_from_menu("Loader:", options)
|
|
75
|
+
if choice in {"fs", "http", "synthetic"}:
|
|
76
|
+
transport = choice
|
|
77
|
+
elif choice == "existing":
|
|
78
|
+
loader_ep = pick_from_menu(
|
|
79
|
+
"Select loader entrypoint:",
|
|
80
|
+
[(k, k) for k in sorted(known_loaders.keys())],
|
|
81
|
+
)
|
|
82
|
+
elif choice == "custom":
|
|
83
|
+
loader_ep = prompt_required("Loader entrypoint")
|
|
84
|
+
if not loader_ep:
|
|
85
|
+
if transport in {"fs", "http"} and not format:
|
|
86
|
+
format_options = [
|
|
87
|
+
("csv", "csv"),
|
|
88
|
+
("json", "json"),
|
|
89
|
+
("json-lines", "json-lines"),
|
|
90
|
+
]
|
|
91
|
+
if transport == "fs":
|
|
92
|
+
format_options.append(("pickle", "pickle"))
|
|
93
|
+
format = pick_from_menu("Format:", format_options)
|
|
94
|
+
if not transport:
|
|
95
|
+
error_exit("--transport is required when no --loader is provided")
|
|
96
|
+
loader_ep, loader_args = default_loader_config(transport, format)
|
|
97
|
+
|
|
98
|
+
# Parser selection (no code generation)
|
|
99
|
+
if identity:
|
|
100
|
+
parser_ep = "identity"
|
|
101
|
+
elif parser:
|
|
102
|
+
parser_ep = parser
|
|
103
|
+
else:
|
|
104
|
+
interactive = sys.stdin.isatty()
|
|
105
|
+
if not interactive:
|
|
106
|
+
parser_ep = "identity"
|
|
107
|
+
else:
|
|
108
|
+
parsers = list_parsers(root=plugin_root)
|
|
109
|
+
if parsers:
|
|
110
|
+
choice = pick_from_menu(
|
|
111
|
+
"Parser:",
|
|
112
|
+
[
|
|
113
|
+
("existing", "Select existing parser (default)"),
|
|
114
|
+
("identity", "Identity parser"),
|
|
115
|
+
("custom", "Custom parser"),
|
|
116
|
+
],
|
|
117
|
+
)
|
|
118
|
+
if choice == "existing":
|
|
119
|
+
parser_ep = pick_from_menu(
|
|
120
|
+
"Select parser entrypoint:",
|
|
121
|
+
[(k, k) for k in sorted(parsers.keys())],
|
|
122
|
+
)
|
|
123
|
+
elif choice == "identity":
|
|
124
|
+
parser_ep = "identity"
|
|
125
|
+
else:
|
|
126
|
+
parser_ep = prompt_required("Parser entrypoint")
|
|
127
|
+
else:
|
|
128
|
+
choice = pick_from_menu(
|
|
129
|
+
"Parser:",
|
|
130
|
+
[
|
|
131
|
+
("identity", "Identity parser (default)"),
|
|
132
|
+
("custom", "Custom parser"),
|
|
133
|
+
],
|
|
134
|
+
)
|
|
135
|
+
parser_ep = "identity" if choice == "identity" else prompt_required("Parser entrypoint")
|
|
136
|
+
|
|
49
137
|
project_yaml = resolve_default_project_yaml(workspace)
|
|
50
|
-
|
|
138
|
+
create_source_yaml(
|
|
51
139
|
provider=provider,
|
|
52
140
|
dataset=dataset,
|
|
53
|
-
|
|
54
|
-
|
|
141
|
+
loader_ep=loader_ep,
|
|
142
|
+
loader_args=loader_args,
|
|
143
|
+
parser_ep=parser_ep,
|
|
55
144
|
root=plugin_root,
|
|
56
|
-
identity=identity,
|
|
57
145
|
**({"project_yaml": project_yaml} if project_yaml is not None else {}),
|
|
58
146
|
)
|