jerry-thomas 1.0.3__py3-none-any.whl → 2.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (194) hide show
  1. datapipeline/analysis/vector/collector.py +0 -1
  2. datapipeline/build/tasks/config.py +0 -2
  3. datapipeline/build/tasks/metadata.py +0 -2
  4. datapipeline/build/tasks/scaler.py +0 -2
  5. datapipeline/build/tasks/schema.py +0 -2
  6. datapipeline/build/tasks/utils.py +0 -2
  7. datapipeline/cli/app.py +201 -81
  8. datapipeline/cli/commands/contract.py +145 -283
  9. datapipeline/cli/commands/demo.py +13 -0
  10. datapipeline/cli/commands/domain.py +4 -4
  11. datapipeline/cli/commands/dto.py +11 -0
  12. datapipeline/cli/commands/filter.py +2 -2
  13. datapipeline/cli/commands/inspect.py +0 -68
  14. datapipeline/cli/commands/list_.py +30 -13
  15. datapipeline/cli/commands/loader.py +11 -0
  16. datapipeline/cli/commands/mapper.py +82 -0
  17. datapipeline/cli/commands/parser.py +45 -0
  18. datapipeline/cli/commands/run_config.py +1 -3
  19. datapipeline/cli/commands/serve_pipeline.py +5 -7
  20. datapipeline/cli/commands/source.py +106 -18
  21. datapipeline/cli/commands/stream.py +292 -0
  22. datapipeline/cli/visuals/common.py +0 -2
  23. datapipeline/cli/visuals/sections.py +0 -2
  24. datapipeline/cli/workspace_utils.py +0 -3
  25. datapipeline/config/context.py +0 -2
  26. datapipeline/config/dataset/feature.py +1 -0
  27. datapipeline/config/metadata.py +0 -2
  28. datapipeline/config/project.py +0 -2
  29. datapipeline/config/resolution.py +10 -2
  30. datapipeline/config/tasks.py +9 -9
  31. datapipeline/domain/feature.py +3 -0
  32. datapipeline/domain/record.py +7 -7
  33. datapipeline/domain/sample.py +0 -2
  34. datapipeline/domain/vector.py +6 -8
  35. datapipeline/integrations/ml/adapter.py +0 -2
  36. datapipeline/integrations/ml/pandas_support.py +0 -2
  37. datapipeline/integrations/ml/rows.py +0 -2
  38. datapipeline/integrations/ml/torch_support.py +0 -2
  39. datapipeline/io/output.py +0 -2
  40. datapipeline/io/serializers.py +26 -16
  41. datapipeline/mappers/synthetic/time.py +9 -2
  42. datapipeline/pipeline/artifacts.py +3 -5
  43. datapipeline/pipeline/observability.py +0 -2
  44. datapipeline/pipeline/pipelines.py +118 -34
  45. datapipeline/pipeline/stages.py +54 -18
  46. datapipeline/pipeline/utils/spool_cache.py +142 -0
  47. datapipeline/pipeline/utils/transform_utils.py +27 -2
  48. datapipeline/services/artifacts.py +1 -4
  49. datapipeline/services/constants.py +1 -0
  50. datapipeline/services/factories.py +4 -6
  51. datapipeline/services/paths.py +10 -1
  52. datapipeline/services/project_paths.py +0 -2
  53. datapipeline/services/runs.py +0 -2
  54. datapipeline/services/scaffold/contract_yaml.py +76 -0
  55. datapipeline/services/scaffold/demo.py +141 -0
  56. datapipeline/services/scaffold/discovery.py +115 -0
  57. datapipeline/services/scaffold/domain.py +21 -13
  58. datapipeline/services/scaffold/dto.py +31 -0
  59. datapipeline/services/scaffold/filter.py +2 -1
  60. datapipeline/services/scaffold/layout.py +96 -0
  61. datapipeline/services/scaffold/loader.py +61 -0
  62. datapipeline/services/scaffold/mapper.py +116 -0
  63. datapipeline/services/scaffold/parser.py +56 -0
  64. datapipeline/services/scaffold/plugin.py +14 -2
  65. datapipeline/services/scaffold/source_yaml.py +91 -0
  66. datapipeline/services/scaffold/stream_plan.py +129 -0
  67. datapipeline/services/scaffold/utils.py +187 -0
  68. datapipeline/sources/data_loader.py +0 -2
  69. datapipeline/sources/decoders.py +49 -8
  70. datapipeline/sources/factory.py +9 -6
  71. datapipeline/sources/foreach.py +18 -3
  72. datapipeline/sources/synthetic/time/parser.py +1 -1
  73. datapipeline/sources/transports.py +10 -4
  74. datapipeline/templates/demo_skeleton/demo/contracts/equity.ohlcv.yaml +33 -0
  75. datapipeline/templates/demo_skeleton/demo/contracts/time.ticks.hour_sin.yaml +22 -0
  76. datapipeline/templates/demo_skeleton/demo/contracts/time.ticks.linear.yaml +22 -0
  77. datapipeline/templates/demo_skeleton/demo/data/APPL.jsonl +19 -0
  78. datapipeline/templates/demo_skeleton/demo/data/MSFT.jsonl +19 -0
  79. datapipeline/templates/demo_skeleton/demo/dataset.yaml +19 -0
  80. datapipeline/templates/demo_skeleton/demo/postprocess.yaml +19 -0
  81. datapipeline/templates/demo_skeleton/demo/project.yaml +19 -0
  82. datapipeline/templates/demo_skeleton/demo/sources/sandbox.ohlcv.yaml +17 -0
  83. datapipeline/templates/{plugin_skeleton/example → demo_skeleton/demo}/sources/synthetic.ticks.yaml +1 -1
  84. datapipeline/templates/demo_skeleton/demo/tasks/metadata.yaml +2 -0
  85. datapipeline/templates/demo_skeleton/demo/tasks/scaler.yaml +3 -0
  86. datapipeline/templates/demo_skeleton/demo/tasks/schema.yaml +2 -0
  87. datapipeline/templates/demo_skeleton/demo/tasks/serve.test.yaml +4 -0
  88. datapipeline/templates/demo_skeleton/demo/tasks/serve.train.yaml +4 -0
  89. datapipeline/templates/demo_skeleton/demo/tasks/serve.val.yaml +4 -0
  90. datapipeline/templates/demo_skeleton/scripts/run_dataframe.py +20 -0
  91. datapipeline/templates/demo_skeleton/scripts/run_torch.py +23 -0
  92. datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/__init__.py +0 -0
  93. datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/domains/equity/__init__.py +0 -0
  94. datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/domains/equity/model.py +18 -0
  95. datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/dtos/__init__.py +0 -0
  96. datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/dtos/sandbox_ohlcv_dto.py +14 -0
  97. datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/mappers/__init__.py +0 -0
  98. datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/mappers/map_sandbox_ohlcv_dto_to_equity.py +26 -0
  99. datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/parsers/__init__.py +0 -0
  100. datapipeline/templates/demo_skeleton/src/{{PACKAGE_NAME}}/parsers/sandbox_ohlcv_dto_parser.py +46 -0
  101. datapipeline/templates/plugin_skeleton/README.md +57 -136
  102. datapipeline/templates/plugin_skeleton/jerry.yaml +12 -24
  103. datapipeline/templates/plugin_skeleton/reference/jerry.yaml +28 -0
  104. datapipeline/templates/plugin_skeleton/reference/reference/contracts/composed.reference.yaml +29 -0
  105. datapipeline/templates/plugin_skeleton/reference/reference/contracts/ingest.reference.yaml +31 -0
  106. datapipeline/templates/plugin_skeleton/reference/reference/contracts/overview.reference.yaml +34 -0
  107. datapipeline/templates/plugin_skeleton/reference/reference/dataset.yaml +29 -0
  108. datapipeline/templates/plugin_skeleton/reference/reference/postprocess.yaml +25 -0
  109. datapipeline/templates/plugin_skeleton/reference/reference/project.yaml +32 -0
  110. datapipeline/templates/plugin_skeleton/reference/reference/sources/foreach.http.reference.yaml +24 -0
  111. datapipeline/templates/plugin_skeleton/reference/reference/sources/foreach.reference.yaml +21 -0
  112. datapipeline/templates/plugin_skeleton/reference/reference/sources/fs.reference.yaml +16 -0
  113. datapipeline/templates/plugin_skeleton/reference/reference/sources/http.reference.yaml +17 -0
  114. datapipeline/templates/plugin_skeleton/reference/reference/sources/overview.reference.yaml +18 -0
  115. datapipeline/templates/plugin_skeleton/reference/reference/sources/synthetic.reference.yaml +15 -0
  116. datapipeline/templates/plugin_skeleton/reference/reference/tasks/metadata.reference.yaml +11 -0
  117. datapipeline/templates/plugin_skeleton/reference/reference/tasks/scaler.reference.yaml +10 -0
  118. datapipeline/templates/plugin_skeleton/reference/reference/tasks/schema.reference.yaml +10 -0
  119. datapipeline/templates/plugin_skeleton/reference/reference/tasks/serve.reference.yaml +28 -0
  120. datapipeline/templates/plugin_skeleton/src/{{PACKAGE_NAME}}/domains/__init__.py +2 -0
  121. datapipeline/templates/plugin_skeleton/src/{{PACKAGE_NAME}}/dtos/__init__.py +0 -0
  122. datapipeline/templates/plugin_skeleton/src/{{PACKAGE_NAME}}/loaders/__init__.py +0 -0
  123. datapipeline/templates/plugin_skeleton/src/{{PACKAGE_NAME}}/mappers/__init__.py +1 -0
  124. datapipeline/templates/plugin_skeleton/src/{{PACKAGE_NAME}}/parsers/__init__.py +0 -0
  125. datapipeline/templates/plugin_skeleton/your-dataset/dataset.yaml +12 -11
  126. datapipeline/templates/plugin_skeleton/your-dataset/postprocess.yaml +4 -13
  127. datapipeline/templates/plugin_skeleton/your-dataset/project.yaml +9 -11
  128. datapipeline/templates/plugin_skeleton/your-dataset/tasks/metadata.yaml +1 -2
  129. datapipeline/templates/plugin_skeleton/your-dataset/tasks/scaler.yaml +1 -7
  130. datapipeline/templates/plugin_skeleton/your-dataset/tasks/schema.yaml +1 -1
  131. datapipeline/templates/plugin_skeleton/your-dataset/tasks/serve.test.yaml +1 -1
  132. datapipeline/templates/plugin_skeleton/your-dataset/tasks/serve.train.yaml +1 -25
  133. datapipeline/templates/plugin_skeleton/your-dataset/tasks/serve.val.yaml +1 -1
  134. datapipeline/templates/plugin_skeleton/your-interim-data-builder/dataset.yaml +9 -0
  135. datapipeline/templates/plugin_skeleton/your-interim-data-builder/postprocess.yaml +1 -0
  136. datapipeline/templates/plugin_skeleton/your-interim-data-builder/project.yaml +15 -0
  137. datapipeline/templates/plugin_skeleton/your-interim-data-builder/tasks/serve.all.yaml +8 -0
  138. datapipeline/templates/stubs/contracts/composed.yaml.j2 +10 -0
  139. datapipeline/templates/stubs/contracts/ingest.yaml.j2 +25 -0
  140. datapipeline/templates/stubs/dto.py.j2 +2 -2
  141. datapipeline/templates/stubs/filter.py.j2 +1 -1
  142. datapipeline/templates/stubs/loaders/basic.py.j2 +11 -0
  143. datapipeline/templates/stubs/mappers/composed.py.j2 +13 -0
  144. datapipeline/templates/stubs/mappers/ingest.py.j2 +20 -0
  145. datapipeline/templates/stubs/parser.py.j2 +5 -1
  146. datapipeline/templates/stubs/record.py.j2 +1 -1
  147. datapipeline/templates/stubs/source.yaml.j2 +1 -1
  148. datapipeline/transforms/debug/identity.py +34 -16
  149. datapipeline/transforms/debug/lint.py +14 -11
  150. datapipeline/transforms/feature/scaler.py +5 -12
  151. datapipeline/transforms/filter.py +73 -17
  152. datapipeline/transforms/interfaces.py +58 -0
  153. datapipeline/transforms/record/floor_time.py +10 -7
  154. datapipeline/transforms/record/lag.py +8 -10
  155. datapipeline/transforms/sequence.py +2 -3
  156. datapipeline/transforms/stream/dedupe.py +5 -7
  157. datapipeline/transforms/stream/ensure_ticks.py +39 -24
  158. datapipeline/transforms/stream/fill.py +34 -25
  159. datapipeline/transforms/stream/filter.py +25 -0
  160. datapipeline/transforms/stream/floor_time.py +16 -0
  161. datapipeline/transforms/stream/granularity.py +52 -30
  162. datapipeline/transforms/stream/lag.py +17 -0
  163. datapipeline/transforms/stream/rolling.py +72 -0
  164. datapipeline/transforms/utils.py +42 -10
  165. datapipeline/transforms/vector/drop/horizontal.py +0 -3
  166. datapipeline/transforms/vector/drop/orchestrator.py +0 -3
  167. datapipeline/transforms/vector/drop/vertical.py +0 -2
  168. datapipeline/transforms/vector/ensure_schema.py +0 -2
  169. datapipeline/utils/paths.py +0 -2
  170. datapipeline/utils/placeholders.py +0 -2
  171. datapipeline/utils/rich_compat.py +0 -3
  172. datapipeline/utils/window.py +0 -2
  173. jerry_thomas-2.0.1.dist-info/METADATA +269 -0
  174. jerry_thomas-2.0.1.dist-info/RECORD +264 -0
  175. {jerry_thomas-1.0.3.dist-info → jerry_thomas-2.0.1.dist-info}/WHEEL +1 -1
  176. {jerry_thomas-1.0.3.dist-info → jerry_thomas-2.0.1.dist-info}/entry_points.txt +7 -3
  177. datapipeline/services/scaffold/mappers.py +0 -55
  178. datapipeline/services/scaffold/source.py +0 -191
  179. datapipeline/templates/plugin_skeleton/example/contracts/time.ticks.hour_sin.yaml +0 -31
  180. datapipeline/templates/plugin_skeleton/example/contracts/time.ticks.linear.yaml +0 -30
  181. datapipeline/templates/plugin_skeleton/example/dataset.yaml +0 -18
  182. datapipeline/templates/plugin_skeleton/example/postprocess.yaml +0 -29
  183. datapipeline/templates/plugin_skeleton/example/project.yaml +0 -23
  184. datapipeline/templates/plugin_skeleton/example/tasks/metadata.yaml +0 -3
  185. datapipeline/templates/plugin_skeleton/example/tasks/scaler.yaml +0 -9
  186. datapipeline/templates/plugin_skeleton/example/tasks/schema.yaml +0 -2
  187. datapipeline/templates/plugin_skeleton/example/tasks/serve.test.yaml +0 -4
  188. datapipeline/templates/plugin_skeleton/example/tasks/serve.train.yaml +0 -28
  189. datapipeline/templates/plugin_skeleton/example/tasks/serve.val.yaml +0 -4
  190. datapipeline/templates/stubs/mapper.py.j2 +0 -22
  191. jerry_thomas-1.0.3.dist-info/METADATA +0 -827
  192. jerry_thomas-1.0.3.dist-info/RECORD +0 -198
  193. {jerry_thomas-1.0.3.dist-info → jerry_thomas-2.0.1.dist-info}/licenses/LICENSE +0 -0
  194. {jerry_thomas-1.0.3.dist-info → jerry_thomas-2.0.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,116 @@
1
+ from pathlib import Path
2
+ from typing import Optional
3
+
4
+ from datapipeline.services.paths import pkg_root, resolve_base_pkg_dir
5
+ from datapipeline.services.scaffold.templates import render
6
+ from datapipeline.services.scaffold.utils import (
7
+ ensure_pkg_dir,
8
+ ep_key_from_name,
9
+ info,
10
+ status,
11
+ to_snake,
12
+ validate_identifier,
13
+ write_if_missing,
14
+ )
15
+ from datapipeline.services.scaffold.layout import (
16
+ DIR_MAPPERS,
17
+ TPL_MAPPER_COMPOSED,
18
+ TPL_MAPPER_INGEST,
19
+ domain_record_class,
20
+ entrypoint_target,
21
+ pyproject_path,
22
+ )
23
+ from datapipeline.services.entrypoints import inject_ep
24
+ from datapipeline.services.constants import MAPPERS_GROUP
25
+
26
+
27
+ def create_mapper(
28
+ *,
29
+ name: str,
30
+ dto_class: str | None = None,
31
+ dto_module: str | None = None,
32
+ input_class: str | None = None,
33
+ input_module: str | None = None,
34
+ domain: str,
35
+ root: Optional[Path],
36
+ ) -> str:
37
+ validate_identifier(name, "Mapper name")
38
+
39
+ root_dir, pkg_name, _ = pkg_root(root)
40
+ base = resolve_base_pkg_dir(root_dir, pkg_name)
41
+ package_name = base.name
42
+
43
+ mappers_dir = ensure_pkg_dir(base, DIR_MAPPERS)
44
+ module_name = to_snake(name)
45
+ path = mappers_dir / f"{module_name}.py"
46
+
47
+ domain_module = f"{package_name}.domains.{domain}.model"
48
+ domain_record = domain_record_class(domain)
49
+
50
+ resolved_class = input_class or dto_class
51
+ resolved_module = input_module or dto_module
52
+ if not resolved_class or not resolved_module:
53
+ raise ValueError("Mapper input class/module is required")
54
+
55
+ write_if_missing(
56
+ path,
57
+ render(
58
+ TPL_MAPPER_INGEST,
59
+ FUNCTION_NAME=module_name,
60
+ INPUT_CLASS=resolved_class,
61
+ INPUT_IMPORT=resolved_module,
62
+ DOMAIN_MODULE=domain_module,
63
+ DOMAIN_RECORD=domain_record,
64
+ ),
65
+ label="Mapper",
66
+ )
67
+
68
+ ep_key = ep_key_from_name(name)
69
+ pyproject = pyproject_path(root_dir)
70
+ try:
71
+ toml_text = pyproject.read_text()
72
+ updated = inject_ep(
73
+ toml_text,
74
+ MAPPERS_GROUP,
75
+ ep_key,
76
+ entrypoint_target(package_name, "mappers", module_name, module_name),
77
+ )
78
+ if updated != toml_text:
79
+ pyproject.write_text(updated)
80
+ status("ok", f"Registered mapper entry point '{ep_key}'.")
81
+ else:
82
+ status("skip", f"Mapper entry point already registered: '{ep_key}'.")
83
+ except FileNotFoundError:
84
+ info("pyproject.toml not found; skipping entry point registration")
85
+ return ep_key
86
+
87
+
88
+ def create_composed_mapper(
89
+ *,
90
+ domain: str,
91
+ stream_id: str,
92
+ root: Optional[Path],
93
+ mapper_path: str | None = None,
94
+ ) -> str:
95
+ root_dir, name, _ = pkg_root(root)
96
+ base = resolve_base_pkg_dir(root_dir, name)
97
+ map_pkg_dir = ensure_pkg_dir(base, DIR_MAPPERS)
98
+ mapper_file = map_pkg_dir / f"{domain}.py"
99
+ if not mapper_file.exists():
100
+ mapper_file.write_text(render(TPL_MAPPER_COMPOSED))
101
+ status("new", str(mapper_file))
102
+
103
+ ep_key = stream_id
104
+ package_name = base.name
105
+ default_target = f"{package_name}.mappers.{domain}:mapper"
106
+ ep_target = mapper_path if (mapper_path and ":" in mapper_path) else default_target
107
+ pyproj_path = pyproject_path(root_dir)
108
+ try:
109
+ toml_text = pyproj_path.read_text()
110
+ updated = inject_ep(toml_text, MAPPERS_GROUP, ep_key, ep_target)
111
+ if updated != toml_text:
112
+ pyproj_path.write_text(updated)
113
+ status("ok", f"Registered mapper entry point '{ep_key}' -> {ep_target}")
114
+ except FileNotFoundError:
115
+ info("pyproject.toml not found; skipping entry point registration")
116
+ return ep_key
@@ -0,0 +1,56 @@
1
+ from pathlib import Path
2
+ from typing import Optional
3
+
4
+ from datapipeline.services.scaffold.templates import render
5
+ from datapipeline.services.paths import pkg_root, resolve_base_pkg_dir
6
+ from datapipeline.services.scaffold.utils import (
7
+ ensure_pkg_dir,
8
+ ep_key_from_name,
9
+ to_snake,
10
+ validate_identifier,
11
+ write_if_missing,
12
+ )
13
+ from datapipeline.services.scaffold.layout import DIR_PARSERS, TPL_PARSER
14
+ from datapipeline.services.scaffold.layout import entrypoint_target, pyproject_path
15
+ from datapipeline.services.entrypoints import inject_ep
16
+ from datapipeline.services.constants import PARSERS_GROUP
17
+
18
+
19
+ def create_parser(
20
+ *,
21
+ name: str,
22
+ dto_class: str,
23
+ dto_module: str,
24
+ root: Optional[Path],
25
+ ) -> str:
26
+ validate_identifier(name, "Parser name")
27
+
28
+ root_dir, pkg_name, _ = pkg_root(root)
29
+ base = resolve_base_pkg_dir(root_dir, pkg_name)
30
+ package_name = base.name
31
+
32
+ parsers_dir = ensure_pkg_dir(base, DIR_PARSERS)
33
+ module_name = to_snake(name)
34
+ path = parsers_dir / f"{module_name}.py"
35
+
36
+ write_if_missing(
37
+ path,
38
+ render(
39
+ TPL_PARSER,
40
+ CLASS_NAME=name,
41
+ DTO_CLASS=dto_class,
42
+ DTO_IMPORT=dto_module,
43
+ ),
44
+ label="Parser",
45
+ )
46
+
47
+ ep_key = ep_key_from_name(name)
48
+ pyproject = pyproject_path(root_dir)
49
+ toml = inject_ep(
50
+ pyproject.read_text(),
51
+ PARSERS_GROUP,
52
+ ep_key,
53
+ entrypoint_target(package_name, "parsers", module_name, name),
54
+ )
55
+ pyproject.write_text(toml)
56
+ return ep_key
@@ -2,6 +2,7 @@ from importlib.resources import as_file, files
2
2
  from pathlib import Path
3
3
  import logging
4
4
  import os
5
+ import sys
5
6
 
6
7
  import yaml
7
8
 
@@ -11,7 +12,8 @@ from ..constants import DEFAULT_IO_LOADER_EP
11
12
 
12
13
  logger = logging.getLogger(__name__)
13
14
 
14
- _RESERVED_PACKAGE_NAMES = {"datapipeline"}
15
+ _RESERVED_PACKAGE_NAMES = {"datapipeline", "test", "tests"}
16
+ _STDLIB_MODULE_NAMES = getattr(sys, "stdlib_module_names", set())
15
17
 
16
18
 
17
19
  def _normalized_package_name(dist_name: str) -> str:
@@ -21,6 +23,12 @@ def _normalized_package_name(dist_name: str) -> str:
21
23
  "`datapipeline` is reserved for the core package. Choose a different plugin name."
22
24
  )
23
25
  raise SystemExit(1)
26
+ if package_name in _STDLIB_MODULE_NAMES:
27
+ logger.error(
28
+ "Plugin name '%s' conflicts with a Python standard library module. Choose a different name.",
29
+ package_name,
30
+ )
31
+ raise SystemExit(1)
24
32
  if not package_name.isidentifier():
25
33
  logger.error(
26
34
  "Plugin names must be valid Python identifiers once hyphens are replaced with underscores."
@@ -47,7 +55,11 @@ def scaffold_plugin(name: str, outdir: Path) -> None:
47
55
  "{{DIST_NAME}}": name,
48
56
  "{{DEFAULT_IO_LOADER_EP}}": DEFAULT_IO_LOADER_EP,
49
57
  }
50
- for p in (target / "pyproject.toml", target / "README.md"):
58
+ for p in target.rglob("*"):
59
+ if not p.is_file():
60
+ continue
61
+ if p.suffix not in {".py", ".toml", ".md", ".yaml", ".yml"}:
62
+ continue
51
63
  text = p.read_text()
52
64
  for placeholder, value in replacements.items():
53
65
  text = text.replace(placeholder, value)
@@ -0,0 +1,91 @@
1
+ from pathlib import Path
2
+ from typing import Optional
3
+
4
+ from datapipeline.services.paths import pkg_root
5
+ from datapipeline.services.project_paths import (
6
+ sources_dir as resolve_sources_dir,
7
+ ensure_project_scaffold,
8
+ resolve_project_yaml_path,
9
+ )
10
+ from datapipeline.services.scaffold.templates import render
11
+ from datapipeline.services.constants import (
12
+ DEFAULT_IO_LOADER_EP,
13
+ DEFAULT_SYNTHETIC_LOADER_EP,
14
+ )
15
+ from datapipeline.services.scaffold.utils import status
16
+
17
+
18
+ def _loader_args(transport: str, fmt: Optional[str]) -> dict:
19
+ if transport == "fs":
20
+ args = {
21
+ "transport": "fs",
22
+ "format": fmt or "<FORMAT (csv|json|json-lines|pickle)>",
23
+ "path": "<PATH OR GLOB>",
24
+ "glob": False,
25
+ "encoding": "utf-8",
26
+ }
27
+ if fmt == "csv":
28
+ args["delimiter"] = ","
29
+ return args
30
+ if transport == "http":
31
+ args = {
32
+ "transport": "http",
33
+ "format": fmt or "<FORMAT (json|json-lines|csv)>",
34
+ "url": "<https://api.example.com/data.json>",
35
+ "headers": {},
36
+ "params": {},
37
+ "encoding": "utf-8",
38
+ }
39
+ if fmt == "csv":
40
+ args["delimiter"] = ","
41
+ return args
42
+ if transport == "synthetic":
43
+ return {"start": "<ISO8601>", "end": "<ISO8601>", "frequency": "1h"}
44
+ return {}
45
+
46
+
47
+ def create_source_yaml(
48
+ *,
49
+ provider: str,
50
+ dataset: str,
51
+ loader_ep: str,
52
+ loader_args: dict,
53
+ parser_ep: str,
54
+ parser_args: dict | None = None,
55
+ root: Optional[Path],
56
+ project_yaml: Optional[Path] = None,
57
+ ) -> None:
58
+ root_dir, _, _ = pkg_root(root)
59
+ alias = f"{provider}.{dataset}"
60
+ parser_args = parser_args or {}
61
+
62
+ proj_yaml = project_yaml.resolve() if project_yaml is not None else resolve_project_yaml_path(root_dir)
63
+ ensure_project_scaffold(proj_yaml)
64
+ sources_dir = resolve_sources_dir(proj_yaml).resolve()
65
+ sources_dir.mkdir(parents=True, exist_ok=True)
66
+
67
+ src_cfg_path = sources_dir / f"{alias}.yaml"
68
+ if src_cfg_path.exists():
69
+ status("skip", f"Source YAML already exists: {src_cfg_path.resolve()}")
70
+ return
71
+
72
+ src_cfg_path.write_text(
73
+ render(
74
+ "source.yaml.j2",
75
+ id=alias,
76
+ parser_ep=parser_ep,
77
+ parser_args=parser_args,
78
+ loader_ep=loader_ep,
79
+ loader_args=loader_args,
80
+ default_io_loader_ep=DEFAULT_IO_LOADER_EP,
81
+ )
82
+ )
83
+ status("new", str(src_cfg_path.resolve()))
84
+
85
+
86
+ def default_loader_config(transport: str, fmt: Optional[str]) -> tuple[str, dict]:
87
+ if transport in {"fs", "http"}:
88
+ return DEFAULT_IO_LOADER_EP, _loader_args(transport, fmt)
89
+ if transport == "synthetic":
90
+ return DEFAULT_SYNTHETIC_LOADER_EP, _loader_args(transport, fmt)
91
+ return DEFAULT_IO_LOADER_EP, {}
@@ -0,0 +1,129 @@
1
+ from dataclasses import dataclass
2
+ from pathlib import Path
3
+
4
+ from datapipeline.services.scaffold.domain import create_domain
5
+ from datapipeline.services.scaffold.dto import create_dto
6
+ from datapipeline.services.scaffold.parser import create_parser
7
+ from datapipeline.services.scaffold.mapper import create_mapper
8
+ from datapipeline.services.scaffold.source_yaml import create_source_yaml
9
+ from datapipeline.services.scaffold.contract_yaml import write_ingest_contract
10
+ from datapipeline.services.scaffold.discovery import list_dtos
11
+ from datapipeline.services.paths import pkg_root
12
+ from datapipeline.services.scaffold.utils import error_exit, status
13
+
14
+
15
+ @dataclass
16
+ class ParserPlan:
17
+ create: bool
18
+ create_dto: bool = False
19
+ dto_class: str | None = None
20
+ dto_module: str | None = None
21
+ parser_name: str | None = None
22
+ parser_ep: str | None = None
23
+
24
+
25
+ @dataclass
26
+ class MapperPlan:
27
+ create: bool
28
+ create_dto: bool = False
29
+ input_class: str | None = None
30
+ input_module: str | None = None
31
+ mapper_name: str | None = None
32
+ mapper_ep: str | None = None
33
+ domain: str | None = None
34
+
35
+
36
+ @dataclass
37
+ class StreamPlan:
38
+ provider: str
39
+ dataset: str
40
+ source_id: str
41
+ project_yaml: Path
42
+ stream_id: str
43
+ root: Path | None
44
+ create_source: bool
45
+ loader_ep: str | None = None
46
+ loader_args: dict | None = None
47
+ parser: ParserPlan | None = None
48
+ mapper: MapperPlan | None = None
49
+ domain: str | None = None
50
+ create_domain: bool = False
51
+
52
+
53
+ def execute_stream_plan(plan: StreamPlan) -> None:
54
+ pyproject_path = None
55
+ before_pyproject = None
56
+ try:
57
+ root_dir, _, pyproject = pkg_root(plan.root)
58
+ pyproject_path = pyproject
59
+ if pyproject_path.exists():
60
+ before_pyproject = pyproject_path.read_text()
61
+ except SystemExit:
62
+ pyproject_path = None
63
+ before_pyproject = None
64
+
65
+ if plan.create_domain and plan.domain:
66
+ create_domain(domain=plan.domain, root=plan.root)
67
+
68
+ parser_ep = None
69
+ if plan.parser:
70
+ if plan.parser.create:
71
+ if plan.parser.dto_class and plan.parser.create_dto:
72
+ create_dto(name=plan.parser.dto_class, root=plan.root)
73
+ dto_module = plan.parser.dto_module or list_dtos(root=plan.root).get(plan.parser.dto_class or "")
74
+ if not dto_module:
75
+ error_exit("Failed to resolve DTO module.")
76
+ parser_ep = create_parser(
77
+ name=plan.parser.parser_name or "parser",
78
+ dto_class=plan.parser.dto_class or "DTO",
79
+ dto_module=dto_module,
80
+ root=plan.root,
81
+ )
82
+ else:
83
+ parser_ep = plan.parser.parser_ep
84
+
85
+ mapper_ep = None
86
+ if plan.mapper:
87
+ if plan.mapper.create:
88
+ if plan.mapper.input_class and plan.mapper.create_dto:
89
+ create_dto(name=plan.mapper.input_class, root=plan.root)
90
+ input_module = plan.mapper.input_module
91
+ if not input_module and plan.mapper.input_class:
92
+ input_module = list_dtos(root=plan.root).get(plan.mapper.input_class)
93
+ if not input_module:
94
+ error_exit("Failed to resolve mapper input module.")
95
+ mapper_ep = create_mapper(
96
+ name=plan.mapper.mapper_name or "mapper",
97
+ input_class=plan.mapper.input_class or "Record",
98
+ input_module=input_module,
99
+ domain=plan.mapper.domain or plan.domain or "domain",
100
+ root=plan.root,
101
+ )
102
+ else:
103
+ mapper_ep = plan.mapper.mapper_ep
104
+
105
+ if plan.create_source and plan.loader_ep and plan.loader_args is not None:
106
+ create_source_yaml(
107
+ provider=plan.provider,
108
+ dataset=plan.dataset,
109
+ loader_ep=plan.loader_ep,
110
+ loader_args=plan.loader_args,
111
+ parser_ep=parser_ep or "identity",
112
+ root=plan.root,
113
+ project_yaml=plan.project_yaml,
114
+ )
115
+
116
+ write_ingest_contract(
117
+ project_yaml=plan.project_yaml,
118
+ stream_id=plan.stream_id,
119
+ source=plan.source_id,
120
+ mapper_entrypoint=mapper_ep or "identity",
121
+ )
122
+ status("ok", "Stream created.")
123
+ if pyproject_path and before_pyproject is not None:
124
+ after_pyproject = pyproject_path.read_text()
125
+ if after_pyproject != before_pyproject:
126
+ status(
127
+ "note",
128
+ f"Entry points updated; reinstall plugin: pip install -e {pyproject_path.parent}",
129
+ )
@@ -0,0 +1,187 @@
1
+ from pathlib import Path
2
+ import sys
3
+ import logging
4
+
5
+ from datapipeline.services.scaffold.layout import to_snake, slugify, ep_key_from_name
6
+
7
+ _LOGGER = logging.getLogger("datapipeline.cli")
8
+
9
+
10
+ def ensure_pkg_dir(base: Path, name: str) -> Path:
11
+ path = base / name
12
+ path.mkdir(parents=True, exist_ok=True)
13
+ (path / "__init__.py").touch(exist_ok=True)
14
+ return path
15
+
16
+
17
+ __all__ = [
18
+ "ensure_pkg_dir",
19
+ "to_snake",
20
+ "slugify",
21
+ "ep_key_from_name",
22
+ "validate_identifier",
23
+ "write_if_missing",
24
+ "prompt_required",
25
+ "prompt_optional",
26
+ "choose_name",
27
+ "status",
28
+ "info",
29
+ "error_exit",
30
+ "pick_from_list",
31
+ "pick_from_menu",
32
+ "choose_existing_or_create",
33
+ ]
34
+
35
+
36
+ def validate_identifier(name: str, label: str) -> None:
37
+ if not name or not name.isidentifier():
38
+ error_exit(f"{label} must be a valid Python identifier")
39
+
40
+
41
+ def write_if_missing(path: Path, text: str, *, label: str | None = None) -> bool:
42
+ if path.exists():
43
+ status("skip", f"{label or 'File'} already exists: {path}")
44
+ return False
45
+ path.write_text(text)
46
+ status("new", str(path))
47
+ return True
48
+
49
+
50
+ def prompt_required(prompt: str) -> str:
51
+ value = input(f"{prompt}: ").strip()
52
+ if not value:
53
+ error_exit(f"{prompt} is required")
54
+ return value
55
+
56
+
57
+ def prompt_optional(prompt: str) -> str | None:
58
+ value = input(f"{prompt}: ").strip()
59
+ return value or None
60
+
61
+
62
+ def choose_name(label: str, *, default: str | None = None) -> str:
63
+ if not default:
64
+ return prompt_required(label)
65
+ info(f"{label}:")
66
+ info(f" [1] {default} (default)")
67
+ info(" [2] Custom name")
68
+ while True:
69
+ sel = input("> ").strip()
70
+ if sel == "":
71
+ return default
72
+ if sel == "1":
73
+ return default
74
+ if sel == "2":
75
+ return prompt_required(label)
76
+ info("Please enter a number from the list.")
77
+
78
+
79
+ def info(message: str) -> None:
80
+ _LOGGER.info(message)
81
+
82
+
83
+ def status(tag: str, message: str) -> None:
84
+ _LOGGER.info("[%s] %s", tag, message)
85
+
86
+
87
+ def error_exit(message: str, code: int = 2) -> None:
88
+ _LOGGER.error(message)
89
+ raise SystemExit(code)
90
+
91
+
92
+ def pick_from_list(prompt: str, options: list[str]) -> str:
93
+ info(prompt)
94
+ for i, opt in enumerate(options, 1):
95
+ info(f" [{i}] {opt}")
96
+ while True:
97
+ sel = input("> ").strip()
98
+ if sel.isdigit():
99
+ idx = int(sel)
100
+ if 1 <= idx <= len(options):
101
+ return options[idx - 1]
102
+ info("Please enter a number from the list.")
103
+
104
+
105
+ def pick_from_menu(prompt: str, options: list[tuple[str, str]], *, allow_default: bool = True) -> str:
106
+ info(prompt)
107
+ for i, (_, label) in enumerate(options, 1):
108
+ info(f" [{i}] {label}")
109
+ while True:
110
+ sel = input("> ").strip()
111
+ if sel == "" and allow_default:
112
+ return options[0][0]
113
+ if sel.isdigit():
114
+ idx = int(sel)
115
+ if 1 <= idx <= len(options):
116
+ return options[idx - 1][0]
117
+ info("Please enter a number from the list.")
118
+
119
+
120
+ def pick_multiple_from_list(prompt: str, options: list[str]) -> list[str]:
121
+ info(prompt)
122
+ for i, opt in enumerate(options, 1):
123
+ info(f" [{i}] {opt}")
124
+ sel = input("> ").strip()
125
+ try:
126
+ idxs = [int(x) for x in sel.split(",") if x.strip()]
127
+ except ValueError:
128
+ error_exit("Invalid selection.")
129
+ picked = [options[i - 1] for i in idxs if 1 <= i <= len(options)]
130
+ if not picked:
131
+ error_exit("No inputs selected.")
132
+ return picked
133
+
134
+
135
+ def choose_existing_or_create(
136
+ *,
137
+ label: str,
138
+ existing: list[str],
139
+ create_label: str,
140
+ create_fn,
141
+ prompt_new: str,
142
+ root: Path | None,
143
+ default_new: str | None = None,
144
+ ) -> str:
145
+ info(f"{label}:")
146
+ info(f" [1] {create_label} (default)")
147
+ info(f" [2] Select existing {label}")
148
+ while True:
149
+ sel = input("> ").strip()
150
+ if sel == "":
151
+ sel = "1"
152
+ if sel == "1":
153
+ name = choose_name(prompt_new, default=default_new)
154
+ create_fn(name=name, root=root)
155
+ return name
156
+ if sel == "2":
157
+ if not existing:
158
+ error_exit(f"No existing {label} found.")
159
+ return pick_from_list(f"Select {label}:", existing)
160
+ info("Please enter a number from the list.")
161
+
162
+
163
+ def choose_existing_or_create_name(
164
+ *,
165
+ label: str,
166
+ existing: list[str],
167
+ create_label: str,
168
+ prompt_new: str,
169
+ default_new: str | None = None,
170
+ ) -> tuple[str, bool]:
171
+ """Return (name, created) without side effects."""
172
+ info(f"{label}:")
173
+ info(f" [1] {create_label} (default)")
174
+ info(f" [2] Select existing {label}")
175
+ while True:
176
+ sel = input("> ").strip()
177
+ if sel == "":
178
+ sel = "1"
179
+ if sel == "1":
180
+ name = choose_name(prompt_new, default=default_new)
181
+ return name, True
182
+ if sel == "2":
183
+ if not existing:
184
+ error_exit(f"No existing {label} found.")
185
+ name = pick_from_list(f"Select {label}:", existing)
186
+ return name, False
187
+ info("Please enter a number from the list.")
@@ -1,5 +1,3 @@
1
- from __future__ import annotations
2
-
3
1
  from typing import Iterator, Any, Optional
4
2
  from .models.loader import BaseDataLoader
5
3
  from .transports import Transport, HttpTransport