jerry-thomas 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
datapipeline/cli/app.py CHANGED
@@ -31,15 +31,9 @@ def _dataset_to_project_path(
31
31
  """Resolve a dataset selector (alias, folder, or file) into a project.yaml path."""
32
32
  # 1) Alias via jerry.yaml datasets (wins over local folders with same name)
33
33
  if workspace is not None:
34
- datasets = getattr(workspace.config, "datasets", {}) or {}
35
- raw = datasets.get(dataset)
36
- if raw:
37
- base = workspace.root
38
- candidate = Path(raw)
39
- candidate = candidate if candidate.is_absolute() else (base / candidate)
40
- if candidate.is_dir():
41
- candidate = candidate / "project.yaml"
42
- return str(candidate.resolve())
34
+ resolved = workspace.resolve_dataset_alias(dataset)
35
+ if resolved is not None:
36
+ return str(resolved)
43
37
 
44
38
  # 2) Direct file path
45
39
  path = Path(dataset)
@@ -490,7 +484,10 @@ def main() -> None:
490
484
  args = parser.parse_args()
491
485
 
492
486
  # Resolve dataset/project selection for commands that use a project.
493
- if hasattr(args, "project") or hasattr(args, "dataset"):
487
+ needs_project_resolution = args.cmd in {"serve", "build", "inspect"}
488
+ if needs_project_resolution and (
489
+ hasattr(args, "project") or hasattr(args, "dataset")
490
+ ):
494
491
  raw_project = getattr(args, "project", None)
495
492
  raw_dataset = getattr(args, "dataset", None)
496
493
  resolved_project, resolved_dataset = _resolve_project_from_args(
@@ -637,6 +634,7 @@ def main() -> None:
637
634
  alias=getattr(args, "alias", None),
638
635
  identity=getattr(args, "identity", False),
639
636
  plugin_root=plugin_root,
637
+ workspace=workspace_context,
640
638
  )
641
639
  return
642
640
 
@@ -655,6 +653,7 @@ def main() -> None:
655
653
  handle_contract(
656
654
  plugin_root=plugin_root,
657
655
  use_identity=args.identity,
656
+ workspace=workspace_context,
658
657
  )
659
658
  return
660
659
 
@@ -1,6 +1,8 @@
1
1
  import sys
2
2
  from pathlib import Path
3
3
 
4
+ from datapipeline.config.workspace import WorkspaceContext
5
+ from datapipeline.cli.workspace_utils import resolve_default_project_yaml
4
6
  from datapipeline.services.paths import pkg_root, resolve_base_pkg_dir
5
7
  from datapipeline.services.entrypoints import read_group_entries, inject_ep
6
8
  from datapipeline.services.constants import FILTERS_GROUP, MAPPERS_GROUP
@@ -31,8 +33,10 @@ def handle(
31
33
  *,
32
34
  plugin_root: Path | None = None,
33
35
  use_identity: bool = False,
36
+ workspace: WorkspaceContext | None = None,
34
37
  ) -> None:
35
38
  root_dir, name, pyproject = pkg_root(plugin_root)
39
+ default_project = resolve_default_project_yaml(workspace)
36
40
  # Select contract type: Ingest (source->stream) or Composed (streams->stream)
37
41
  print("Select contract type:", file=sys.stderr)
38
42
  print(" [1] Ingest (source → stream)", file=sys.stderr)
@@ -49,12 +53,13 @@ def handle(
49
53
  mapper_path=None,
50
54
  with_mapper_stub=True,
51
55
  plugin_root=plugin_root,
56
+ project_yaml=default_project,
52
57
  )
53
58
  return
54
59
 
55
60
  # Discover sources by scanning sources_dir YAMLs
56
61
  # Default to dataset-scoped project config
57
- proj_path = resolve_project_yaml_path(root_dir)
62
+ proj_path = default_project or resolve_project_yaml_path(root_dir)
58
63
  # Ensure a minimal project scaffold so we can resolve dirs interactively
59
64
  ensure_project_scaffold(proj_path)
60
65
  sources_dir = resolve_sources_dir(proj_path)
@@ -187,6 +192,7 @@ def scaffold_conflux(
187
192
  mapper_path: str | None,
188
193
  with_mapper_stub: bool,
189
194
  plugin_root: Path | None,
195
+ project_yaml: Path | None,
190
196
  ) -> None:
191
197
  """Scaffold a composed (multi-input) contract and optional mapper stub.
192
198
 
@@ -195,7 +201,7 @@ def scaffold_conflux(
195
201
  """
196
202
  root_dir, name, _ = pkg_root(plugin_root)
197
203
  # Resolve default project path early for interactive selections
198
- proj_path = resolve_project_yaml_path(root_dir)
204
+ proj_path = project_yaml or resolve_project_yaml_path(root_dir)
199
205
  ensure_project_scaffold(proj_path)
200
206
  # Defer target domain selection until after choosing inputs
201
207
 
@@ -1,5 +1,7 @@
1
1
  from pathlib import Path
2
2
 
3
+ from datapipeline.config.workspace import WorkspaceContext
4
+ from datapipeline.cli.workspace_utils import resolve_default_project_yaml
3
5
  from datapipeline.services.scaffold.source import create_source
4
6
 
5
7
 
@@ -13,6 +15,7 @@ def handle(
13
15
  identity: bool = False,
14
16
  alias: str | None = None,
15
17
  plugin_root: Path | None = None,
18
+ workspace: WorkspaceContext | None = None,
16
19
  ) -> None:
17
20
  if subcmd in {"create", "add"}:
18
21
  # Allow: positional provider dataset, --provider/--dataset, --alias, or provider as 'prov.ds'
@@ -43,6 +46,7 @@ def handle(
43
46
  if transport in {"fs", "http"} and not format:
44
47
  print("[error] --format is required for fs/http transports (fs: csv|json|json-lines|pickle, http: csv|json|json-lines)")
45
48
  raise SystemExit(2)
49
+ project_yaml = resolve_default_project_yaml(workspace)
46
50
  create_source(
47
51
  provider=provider,
48
52
  dataset=dataset,
@@ -50,4 +54,5 @@ def handle(
50
54
  format=format,
51
55
  root=plugin_root,
52
56
  identity=identity,
57
+ **({"project_yaml": project_yaml} if project_yaml is not None else {}),
53
58
  )
@@ -7,6 +7,7 @@ from typing import Optional, Sequence
7
7
 
8
8
  from urllib.parse import urlparse
9
9
  from datapipeline.sources.transports import FsGlobTransport, FsFileTransport, HttpTransport
10
+ from datapipeline.sources.foreach import ForeachLoader
10
11
 
11
12
  logger = logging.getLogger(__name__)
12
13
 
@@ -217,23 +218,74 @@ def current_transport_label(transport, *, glob_root: Optional[Path] = None) -> O
217
218
  current = getattr(transport, "current_path", None)
218
219
  if not current:
219
220
  return None
220
- return relative_label(current, glob_root)
221
+ return f"\"{relative_label(current, glob_root)}\""
221
222
  if isinstance(transport, FsFileTransport):
222
223
  path = getattr(transport, "path", None)
223
224
  if not path:
224
225
  return None
225
226
  try:
226
- return Path(path).name or str(path)
227
+ name = Path(path).name or str(path)
228
+ return f"\"{name}\""
227
229
  except Exception:
228
- return str(path)
230
+ return f"\"{path}\""
229
231
  if isinstance(transport, HttpTransport):
230
232
  url = getattr(transport, "url", None)
231
233
  if not url:
232
234
  return None
233
235
  try:
234
236
  parts = urlparse(url)
235
- name = Path(parts.path or "").name
236
- return name or (parts.netloc or "http")
237
+ host = parts.netloc or "http"
238
+ return f"@{host}"
237
239
  except Exception:
238
240
  return None
239
241
  return None
242
+
243
+
244
+ def current_loader_label(loader, transport, *, glob_root: Optional[Path] = None) -> Optional[str]:
245
+ """Return a human-friendly label for the loader's current unit of work."""
246
+ if isinstance(loader, ForeachLoader):
247
+ value = getattr(loader, "_current_value", None)
248
+ if value is None:
249
+ return None
250
+ idx = getattr(loader, "_current_index", None)
251
+ values = getattr(loader, "_values", None)
252
+ total = len(values) if isinstance(values, list) else None
253
+
254
+ item_label = f"\"{value}\""
255
+ status = None
256
+ if isinstance(idx, int) and isinstance(total, int) and total > 0:
257
+ status = f"({idx}/{total})"
258
+
259
+ def _with_item(action: str | None) -> str:
260
+ parts = []
261
+ if action:
262
+ parts.append(action)
263
+ parts.append(item_label)
264
+ if status:
265
+ parts.append(status)
266
+ return " ".join(parts)
267
+
268
+ spec = getattr(loader, "_loader_spec", None) or {}
269
+ entrypoint = spec.get("entrypoint", "") if isinstance(spec, dict) else ""
270
+ args = getattr(loader, "_current_args", None)
271
+ inner_transport = getattr(loader, "_current_transport", None)
272
+
273
+ if entrypoint == "core.io" and isinstance(args, dict):
274
+ t = args.get("transport")
275
+ if t == "http":
276
+ parts = urlparse(str(args.get("url", "")))
277
+ host = parts.netloc or "http"
278
+ return _with_item(f"Downloading @{host}")
279
+ if t == "fs":
280
+ inner_root = None
281
+ if isinstance(inner_transport, FsGlobTransport):
282
+ inner_root = compute_glob_root(getattr(inner_transport, "files", []))
283
+ label = current_transport_label(inner_transport, glob_root=inner_root)
284
+ action = f"Loading {label}" if label else "Loading fs"
285
+ return _with_item(action)
286
+
287
+ if entrypoint:
288
+ return _with_item(f"via {entrypoint}")
289
+ return _with_item(None)
290
+
291
+ return current_transport_label(transport, glob_root=glob_root)
@@ -1,41 +1,9 @@
1
- from pathlib import Path
2
- from urllib.parse import urlparse
3
-
4
1
  from datapipeline.sources.models.loader import SyntheticLoader, BaseDataLoader
5
2
  from datapipeline.sources.data_loader import DataLoader
3
+ from datapipeline.sources.foreach import ForeachLoader
6
4
  from datapipeline.sources.transports import FsFileTransport, FsGlobTransport, HttpTransport
7
5
  from datapipeline.sources.decoders import CsvDecoder, JsonDecoder, JsonLinesDecoder, PickleDecoder
8
6
 
9
- MAX_LABEL_LEN = 48
10
- GLOB_SEGMENTS = 3
11
-
12
-
13
- def _truncate_middle(text: str, max_len: int) -> str:
14
- if len(text) <= max_len:
15
- return text
16
- if max_len <= 3:
17
- return text[:max_len]
18
- keep = max_len - 3
19
- head = (keep + 1) // 2
20
- tail = keep - head
21
- suffix = text[-tail:] if tail > 0 else ""
22
- return f"{text[:head]}...{suffix}"
23
-
24
-
25
- def _compact_path_label(name: str) -> str:
26
- if not name:
27
- return "fs"
28
- normalized = name.replace("\\", "/").strip()
29
- if not normalized:
30
- return "fs"
31
- parts = [part for part in normalized.split("/") if part]
32
- if not parts:
33
- return normalized
34
- if len(parts) > GLOB_SEGMENTS:
35
- parts = parts[-GLOB_SEGMENTS:]
36
- label = "/".join(parts)
37
- return _truncate_middle(label, MAX_LABEL_LEN)
38
-
39
7
 
40
8
  def unit_for_loader(loader) -> str:
41
9
  if isinstance(loader, SyntheticLoader):
@@ -56,18 +24,17 @@ def build_source_label(loader: BaseDataLoader) -> str:
56
24
  except Exception:
57
25
  gen_name = loader.__class__.__name__
58
26
  return "Generating data with " + gen_name
27
+ if isinstance(loader, ForeachLoader):
28
+ key = str(getattr(loader, "_key", "item"))
29
+ values = getattr(loader, "_values", None)
30
+ n = len(values) if isinstance(values, list) else "?"
31
+ return f"Fan-out {key}×{n}:"
59
32
  if isinstance(loader, DataLoader):
60
33
  transport = getattr(loader, "transport", None)
61
34
  if isinstance(transport, (FsFileTransport, FsGlobTransport)):
62
- name = str(getattr(transport, "pattern", getattr(transport, "path", "")))
63
- if isinstance(transport, FsFileTransport) and name and "*" not in name:
64
- label = Path(name).name or "fs"
65
- else:
66
- label = _compact_path_label(name)
67
- return f"Loading data from: {label}"
35
+ return "Loading"
68
36
  if isinstance(transport, HttpTransport):
69
- host = urlparse(transport.url).netloc or "http"
70
- return f"Downloading data from: @{host}"
37
+ return "Downloading"
71
38
  return loader.__class__.__name__
72
39
 
73
40
 
@@ -24,13 +24,14 @@ from rich.text import Text
24
24
  from .labels import progress_meta_for_loader
25
25
  from .common import (
26
26
  compute_glob_root,
27
- current_transport_label,
27
+ current_loader_label,
28
28
  log_combined_stream,
29
29
  transport_debug_lines,
30
30
  transport_info_lines,
31
31
  )
32
32
  from datapipeline.runtime import Runtime
33
33
  from datapipeline.sources.models.source import Source
34
+ from datapipeline.sources.foreach import ForeachLoader
34
35
  from datapipeline.sources.transports import FsGlobTransport, FsFileTransport, HttpTransport
35
36
  logger = logging.getLogger(__name__)
36
37
 
@@ -144,8 +145,12 @@ class _RichSourceProxy(Source):
144
145
  glob_root = compute_glob_root(
145
146
  getattr(transport, "files", []))
146
147
 
148
+ is_foreach_loader = isinstance(loader, ForeachLoader)
149
+
147
150
  def compose_text(name: Optional[str]) -> str:
148
151
  if name:
152
+ if is_foreach_loader:
153
+ return str(name)
149
154
  base = header if sep else desc
150
155
  return f"{base} {name}".rstrip()
151
156
  if tail:
@@ -173,8 +178,8 @@ class _RichSourceProxy(Source):
173
178
 
174
179
  try:
175
180
  for item in self._inner.stream():
176
- current_label = current_transport_label(
177
- transport, glob_root=glob_root
181
+ current_label = current_loader_label(
182
+ loader, transport, glob_root=glob_root
178
183
  )
179
184
  # On first item: emit Start + transport details
180
185
  if not started_logged:
@@ -0,0 +1,25 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+
5
+ from datapipeline.config.workspace import WorkspaceContext
6
+
7
+
8
+ def resolve_default_project_yaml(workspace: WorkspaceContext | None) -> Path | None:
9
+ """Resolve default_dataset from jerry.yaml into a project.yaml path.
10
+
11
+ Returns None when no workspace context or no default_dataset is configured.
12
+ Raises SystemExit when default_dataset is set but missing from datasets:.
13
+ """
14
+ if workspace is None:
15
+ return None
16
+ alias = workspace.config.default_dataset
17
+ if not alias:
18
+ return None
19
+ resolved = workspace.resolve_dataset_alias(alias)
20
+ if resolved is None:
21
+ raise SystemExit(
22
+ f"Unknown default_dataset '{alias}'. Define it under datasets: in jerry.yaml."
23
+ )
24
+ return resolved
25
+
@@ -10,6 +10,6 @@ class RecordDatasetConfig(BaseModel):
10
10
 
11
11
 
12
12
  class FeatureDatasetConfig(BaseModel):
13
- group_by: str = Field(..., pattern=r"^\d+(m|min|h)$")
13
+ group_by: str = Field(..., pattern=r"^\d+(m|min|h|d)$")
14
14
  features: List[FeatureRecordConfig] = Field(default_factory=list)
15
15
  targets: List[FeatureRecordConfig] = Field(default_factory=list)
@@ -1,14 +1,14 @@
1
- from datetime import datetime
1
+ from datetime import datetime, timedelta
2
2
  import re
3
3
 
4
4
 
5
5
  def floor_time_to_bucket(ts: datetime, bucket: str) -> datetime:
6
6
  """Floor a timestamp to the nearest bucket boundary.
7
7
 
8
- Supports patterns like '10m', '10min', '1h', '2h'.
8
+ Supports patterns like '10m', '10min', '1h', '2h', '1d'.
9
9
  Minutes may be specified as 'm' or 'min'.
10
10
  """
11
- m = re.fullmatch(r"^(\d+)(m|min|h)$", bucket)
11
+ m = re.fullmatch(r"^(\d+)(m|min|h|d)$", bucket)
12
12
  if not m:
13
13
  raise ValueError(f"Unsupported cadence: {bucket}")
14
14
  n = int(m.group(1))
@@ -19,6 +19,11 @@ def floor_time_to_bucket(ts: datetime, bucket: str) -> datetime:
19
19
  if unit in ("m", "min"):
20
20
  floored_minute = (ts.minute // n) * n
21
21
  return ts.replace(minute=floored_minute, second=0, microsecond=0)
22
- else: # 'h'
22
+ if unit == "h":
23
23
  floored_hour = (ts.hour // n) * n
24
24
  return ts.replace(hour=floored_hour, minute=0, second=0, microsecond=0)
25
+ base = ts.replace(hour=0, minute=0, second=0, microsecond=0)
26
+ if n == 1:
27
+ return base
28
+ remainder = (base.toordinal() - 1) % n
29
+ return base - timedelta(days=remainder)
@@ -125,6 +125,21 @@ class WorkspaceContext:
125
125
  def root(self) -> Path:
126
126
  return self.file_path.parent
127
127
 
128
+ def resolve_dataset_alias(self, alias: str) -> Optional[Path]:
129
+ """Resolve a dataset alias from jerry.yaml into an absolute project.yaml path."""
130
+ raw = (self.config.datasets or {}).get(alias)
131
+ if not raw:
132
+ return None
133
+ candidate = Path(raw)
134
+ candidate = (
135
+ candidate.resolve()
136
+ if candidate.is_absolute()
137
+ else (self.root / candidate).resolve()
138
+ )
139
+ if candidate.is_dir():
140
+ candidate = candidate / "project.yaml"
141
+ return candidate.resolve()
142
+
128
143
  def resolve_plugin_root(self) -> Optional[Path]:
129
144
  raw = self.config.plugin_root
130
145
  if not raw:
@@ -98,6 +98,7 @@ def create_source(
98
98
  format: Optional[str],
99
99
  root: Optional[Path],
100
100
  identity: bool = False,
101
+ project_yaml: Optional[Path] = None,
101
102
  ) -> None:
102
103
  root_dir, name, _ = pkg_root(root)
103
104
  base = resolve_base_pkg_dir(root_dir, name)
@@ -169,7 +170,7 @@ def create_source(
169
170
  # Resolve sources directory from a single dataset-scoped project config.
170
171
  # If not present or invalid, let the exception bubble up to prompt the user
171
172
  # to provide a valid project path.
172
- proj_yaml = resolve_project_yaml_path(root_dir)
173
+ proj_yaml = project_yaml.resolve() if project_yaml is not None else resolve_project_yaml_path(root_dir)
173
174
  # Best-effort: create a minimal project scaffold if missing
174
175
  ensure_project_scaffold(proj_yaml)
175
176
  sources_dir = resolve_sources_dir(proj_yaml).resolve()
@@ -0,0 +1,151 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+ from typing import Any, Iterator, Mapping
5
+
6
+ from datapipeline.plugins import LOADERS_EP
7
+ from datapipeline.sources.models.loader import BaseDataLoader
8
+ from datapipeline.utils.load import load_ep
9
+ from datapipeline.utils.placeholders import normalize_args, MissingInterpolation, is_missing
10
+
11
+
12
+ _VAR_RE = re.compile(r"\$\{([^}]+)\}")
13
+
14
+
15
+ def _interpolate(obj: Any, vars_: Mapping[str, Any]) -> Any:
16
+ if isinstance(obj, dict):
17
+ return {k: _interpolate(v, vars_) for k, v in obj.items()}
18
+ if isinstance(obj, list):
19
+ return [_interpolate(v, vars_) for v in obj]
20
+ if isinstance(obj, str):
21
+ match = _VAR_RE.fullmatch(obj)
22
+ if match:
23
+ key = match.group(1)
24
+ if key in vars_:
25
+ value = vars_[key]
26
+ if value is None or is_missing(value):
27
+ return MissingInterpolation(key)
28
+ return value
29
+ return obj
30
+
31
+ def repl(m):
32
+ key = m.group(1)
33
+ value = vars_.get(key, m.group(0))
34
+ if value is None or is_missing(value):
35
+ return m.group(0)
36
+ return str(value)
37
+
38
+ return _VAR_RE.sub(repl, obj)
39
+ return obj
40
+
41
+
42
+ class ForeachLoader(BaseDataLoader):
43
+ """Expand a loader spec across a foreach map and concatenate results."""
44
+
45
+ def __init__(
46
+ self,
47
+ *,
48
+ foreach: Mapping[str, list[Any]],
49
+ loader: Mapping[str, Any],
50
+ inject_field: str | None = None,
51
+ inject: Mapping[str, Any] | None = None,
52
+ ):
53
+ self._key, self._values = self._normalize_foreach(foreach)
54
+ self._loader_spec = self._normalize_loader_spec(loader)
55
+ self._inject_field = inject_field
56
+ self._inject = inject
57
+ self._current_index: int | None = None
58
+ self._current_value: Any | None = None
59
+ self._current_args: dict[str, Any] | None = None
60
+ self._current_transport: Any | None = None
61
+
62
+ if inject_field and inject:
63
+ raise ValueError("core.foreach supports only one of inject_field or inject")
64
+ if inject_field and self._key is None:
65
+ raise ValueError("inject_field requires a non-empty foreach map")
66
+ if inject is not None and not isinstance(inject, Mapping):
67
+ raise TypeError("inject must be a mapping when provided")
68
+
69
+ def load(self) -> Iterator[Any]:
70
+ for i, value in enumerate(self._values, 1):
71
+ vars_ = {self._key: value}
72
+ loader_args = self._make_loader_args(vars_)
73
+ loader = self._build_loader(loader_args)
74
+ self._current_index = i
75
+ self._current_value = value
76
+ self._current_args = loader_args
77
+ self._current_transport = getattr(loader, "transport", None)
78
+ inject_map = self._build_inject(vars_)
79
+ for row in loader.load():
80
+ if inject_map:
81
+ yield self._apply_inject(row, inject_map)
82
+ else:
83
+ yield row
84
+
85
+ def count(self):
86
+ total = 0
87
+ for value in self._values:
88
+ vars_ = {self._key: value}
89
+ loader_args = self._make_loader_args(vars_)
90
+ loader = self._build_loader(loader_args)
91
+ c = loader.count()
92
+ if c is None:
93
+ return None
94
+ total += int(c)
95
+ return total
96
+
97
+ @staticmethod
98
+ def _normalize_foreach(foreach: Mapping[str, list[Any]]):
99
+ if not isinstance(foreach, Mapping) or not foreach:
100
+ raise ValueError("core.foreach requires a non-empty foreach mapping")
101
+ keys = list(foreach.keys())
102
+ if len(keys) != 1:
103
+ raise ValueError("core.foreach currently supports exactly one foreach key")
104
+ key = keys[0]
105
+ values = foreach[key]
106
+ if not isinstance(values, list):
107
+ raise TypeError("core.foreach foreach values must be a list")
108
+ return str(key), list(values)
109
+
110
+ @staticmethod
111
+ def _normalize_loader_spec(loader: Mapping[str, Any]) -> Mapping[str, Any]:
112
+ if not isinstance(loader, Mapping):
113
+ raise TypeError("core.foreach loader must be a mapping with entrypoint/args")
114
+ entrypoint = loader.get("entrypoint")
115
+ if not entrypoint or not isinstance(entrypoint, str):
116
+ raise ValueError("core.foreach loader.entrypoint must be a non-empty string")
117
+ args = loader.get("args")
118
+ if args is not None and not isinstance(args, Mapping):
119
+ raise TypeError("core.foreach loader.args must be a mapping when provided")
120
+ return dict(loader)
121
+
122
+ def _make_loader_args(self, vars_: Mapping[str, Any]) -> dict[str, Any]:
123
+ args = self._loader_spec.get("args") or {}
124
+ interpolated = _interpolate(args, vars_)
125
+ return normalize_args(interpolated)
126
+
127
+ def _build_loader(self, loader_args: dict[str, Any]) -> BaseDataLoader:
128
+ entrypoint = self._loader_spec["entrypoint"]
129
+ L = load_ep(LOADERS_EP, entrypoint)
130
+ return L(**loader_args)
131
+
132
+ def _build_inject(self, vars_: Mapping[str, Any]) -> Mapping[str, Any] | None:
133
+ if self._inject_field:
134
+ return {self._inject_field: vars_.get(self._key)}
135
+ if self._inject is None:
136
+ return None
137
+ interpolated = _interpolate(self._inject, vars_)
138
+ if not isinstance(interpolated, Mapping):
139
+ raise TypeError("core.foreach inject must resolve to a mapping")
140
+ return normalize_args(interpolated)
141
+
142
+ @staticmethod
143
+ def _apply_inject(row: Any, inject_map: Mapping[str, Any]) -> Any:
144
+ if isinstance(row, dict):
145
+ row.update(inject_map)
146
+ return row
147
+ if isinstance(row, Mapping):
148
+ out = dict(row)
149
+ out.update(inject_map)
150
+ return out
151
+ raise TypeError("core.foreach inject requires mapping rows")
@@ -7,5 +7,5 @@ name = "{{DIST_NAME}}"
7
7
  version = "0.0.1"
8
8
  description = "A DataPipeline plugin for the {{DIST_NAME}} domain"
9
9
  dependencies = [
10
- "jerry-thomas>=0.2.0",
10
+ "jerry-thomas>=1.0.2",
11
11
  ]
@@ -7,7 +7,7 @@ paths:
7
7
  postprocess: postprocess.yaml
8
8
  artifacts: ../artifacts/${project_name}/v${version}
9
9
  tasks: ./tasks
10
- globals:
10
+ globals: # Globals to use in your .yaml files via ${var_name}.
11
11
  # Primary dataset cadence; referenced from dataset.yaml (group_by)
12
12
  # and contracts via ${group_by}.
13
13
  group_by: <your-bucket-cadence>