interloper-core 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- interloper/__init__.py +140 -0
- interloper/assets/__init__.py +8 -0
- interloper/assets/base.py +594 -0
- interloper/assets/context.py +163 -0
- interloper/assets/decorator.py +92 -0
- interloper/assets/keys.py +22 -0
- interloper/backfillers/__init__.py +8 -0
- interloper/backfillers/base.py +254 -0
- interloper/backfillers/results.py +99 -0
- interloper/backfillers/serial.py +38 -0
- interloper/backfillers/state.py +141 -0
- interloper/cli/__init__.py +5 -0
- interloper/cli/config.py +50 -0
- interloper/cli/display.py +1068 -0
- interloper/cli/main.py +265 -0
- interloper/dag/__init__.py +6 -0
- interloper/dag/base.py +404 -0
- interloper/errors.py +155 -0
- interloper/events/__init__.py +29 -0
- interloper/events/base.py +480 -0
- interloper/events/server.py +148 -0
- interloper/io/__init__.py +21 -0
- interloper/io/adapter.py +106 -0
- interloper/io/base.py +73 -0
- interloper/io/context.py +31 -0
- interloper/io/csv.py +140 -0
- interloper/io/database.py +378 -0
- interloper/io/file.py +153 -0
- interloper/io/memory.py +149 -0
- interloper/normalizer/__init__.py +6 -0
- interloper/normalizer/base.py +228 -0
- interloper/normalizer/strategy.py +21 -0
- interloper/partitioning/__init__.py +21 -0
- interloper/partitioning/base.py +50 -0
- interloper/partitioning/time.py +83 -0
- interloper/rest/__init__.py +12 -0
- interloper/rest/auth.py +270 -0
- interloper/rest/client.py +66 -0
- interloper/rest/paginator.py +120 -0
- interloper/runners/__init__.py +14 -0
- interloper/runners/base.py +279 -0
- interloper/runners/multi_process.py +158 -0
- interloper/runners/multi_thread.py +100 -0
- interloper/runners/results.py +135 -0
- interloper/runners/serial.py +42 -0
- interloper/runners/state.py +229 -0
- interloper/schema/__init__.py +5 -0
- interloper/schema/base.py +179 -0
- interloper/serialization/__init__.py +21 -0
- interloper/serialization/asset.py +100 -0
- interloper/serialization/backfiller.py +29 -0
- interloper/serialization/base.py +43 -0
- interloper/serialization/config.py +45 -0
- interloper/serialization/dag.py +29 -0
- interloper/serialization/io.py +28 -0
- interloper/serialization/runner.py +29 -0
- interloper/serialization/source.py +68 -0
- interloper/source/__init__.py +7 -0
- interloper/source/base.py +389 -0
- interloper/source/config.py +20 -0
- interloper/source/decorator.py +77 -0
- interloper/utils/__init__.py +6 -0
- interloper/utils/imports.py +107 -0
- interloper/utils/text.py +94 -0
- interloper_core-0.2.0.dist-info/METADATA +18 -0
- interloper_core-0.2.0.dist-info/RECORD +68 -0
- interloper_core-0.2.0.dist-info/WHEEL +4 -0
- interloper_core-0.2.0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,594 @@
|
|
|
1
|
+
"""Asset definition and execution."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import copy
|
|
6
|
+
import inspect
|
|
7
|
+
import traceback
|
|
8
|
+
import warnings
|
|
9
|
+
from collections.abc import Callable
|
|
10
|
+
from dataclasses import dataclass, field
|
|
11
|
+
from typing import TYPE_CHECKING, Any, cast
|
|
12
|
+
|
|
13
|
+
from pydantic import BaseModel
|
|
14
|
+
|
|
15
|
+
from interloper.assets.context import ExecutionContext
|
|
16
|
+
from interloper.assets.keys import AssetDefinitionKey, AssetInstanceKey
|
|
17
|
+
from interloper.errors import AssetError, ConfigError, DependencyNotFoundError, PartitionError
|
|
18
|
+
from interloper.events import get_asset_event_metadata
|
|
19
|
+
from interloper.events.base import EventType, emit
|
|
20
|
+
from interloper.io.base import IO
|
|
21
|
+
from interloper.io.context import IOContext
|
|
22
|
+
from interloper.io.memory import MemoryIO
|
|
23
|
+
from interloper.partitioning.base import Partition, PartitionConfig, PartitionWindow
|
|
24
|
+
from interloper.serialization.asset import AssetSpec
|
|
25
|
+
from interloper.serialization.base import Serializable
|
|
26
|
+
from interloper.utils.imports import get_object_path
|
|
27
|
+
from interloper.utils.text import to_label, validate_name
|
|
28
|
+
|
|
29
|
+
if TYPE_CHECKING:
|
|
30
|
+
from interloper.dag.base import DAG
|
|
31
|
+
from interloper.normalizer.base import Normalizer
|
|
32
|
+
from interloper.normalizer.strategy import MaterializationStrategy
|
|
33
|
+
from interloper.source.base import Source, SourceDefinition
|
|
34
|
+
from interloper.source.config import Config
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@dataclass(frozen=True)
|
|
38
|
+
class AssetDefinition:
|
|
39
|
+
"""Definition of an asset created by the @asset decorator."""
|
|
40
|
+
|
|
41
|
+
func: Callable[..., Any]
|
|
42
|
+
source_definition: SourceDefinition | None = None
|
|
43
|
+
name: str = ""
|
|
44
|
+
label: str = ""
|
|
45
|
+
schema: type[BaseModel] | None = None
|
|
46
|
+
config: type[Config] | None = None
|
|
47
|
+
io: IO | None = None
|
|
48
|
+
normalizer: Normalizer | None = None
|
|
49
|
+
strategy: MaterializationStrategy | None = None
|
|
50
|
+
tags: tuple[str, ...] = ()
|
|
51
|
+
partitioning: PartitionConfig | None = None
|
|
52
|
+
dataset: str | None = None
|
|
53
|
+
requires: dict[str, AssetDefinitionKey] = field(default_factory=dict)
|
|
54
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
55
|
+
|
|
56
|
+
def __post_init__(self):
|
|
57
|
+
"""Set name to function name if not provided, validate."""
|
|
58
|
+
if not self.name:
|
|
59
|
+
object.__setattr__(self, "name", getattr(self.func, "__name__", "unknown"))
|
|
60
|
+
|
|
61
|
+
validate_name(self.name)
|
|
62
|
+
|
|
63
|
+
if not self.label:
|
|
64
|
+
object.__setattr__(self, "label", to_label(self.name))
|
|
65
|
+
|
|
66
|
+
@property
|
|
67
|
+
def definition_key(self) -> AssetDefinitionKey:
|
|
68
|
+
"""Return the asset definition key.
|
|
69
|
+
|
|
70
|
+
Format: ``{source-definition-key}:{asset-name}`` for source-bound assets,
|
|
71
|
+
or just ``{asset-name}`` for standalone assets.
|
|
72
|
+
"""
|
|
73
|
+
if self.source_definition:
|
|
74
|
+
return AssetDefinitionKey(f"{self.source_definition.name}:{self.name}")
|
|
75
|
+
return AssetDefinitionKey(self.name)
|
|
76
|
+
|
|
77
|
+
def __call__(
|
|
78
|
+
self,
|
|
79
|
+
*,
|
|
80
|
+
name: str | None = None,
|
|
81
|
+
config: Config | None = None,
|
|
82
|
+
io: IO | dict[str, IO] | None = None,
|
|
83
|
+
deps: dict[str, AssetInstanceKey] | None = None,
|
|
84
|
+
dataset: str | None = None,
|
|
85
|
+
default_io_key: str | None = None,
|
|
86
|
+
materializable: bool = True,
|
|
87
|
+
strategy: MaterializationStrategy | None = None,
|
|
88
|
+
) -> Asset:
|
|
89
|
+
"""Instantiate an ``Asset`` from this definition with runtime overrides.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
name: Override the asset name.
|
|
93
|
+
config: Override the config instance.
|
|
94
|
+
io: Override the IO backend (single or dict of named IOs).
|
|
95
|
+
deps: Explicit dependency mapping (param name to asset instance key).
|
|
96
|
+
dataset: Override the dataset name.
|
|
97
|
+
default_io_key: Default IO key for multi-IO setups.
|
|
98
|
+
materializable: Whether the asset can be materialized.
|
|
99
|
+
strategy: Override the materialization strategy.
|
|
100
|
+
|
|
101
|
+
Returns:
|
|
102
|
+
A new Asset instance with the given overrides applied.
|
|
103
|
+
|
|
104
|
+
Raises:
|
|
105
|
+
ConfigError: If the provided config does not match the expected type.
|
|
106
|
+
"""
|
|
107
|
+
if name is not None:
|
|
108
|
+
validate_name(name)
|
|
109
|
+
|
|
110
|
+
# If config is provided, check it's the correct type (if self.config is set)
|
|
111
|
+
if config is not None and self.config is not None and not issubclass(type(config), self.config):
|
|
112
|
+
raise ConfigError(
|
|
113
|
+
f"Config provided to asset '{self.name}' must be of type {self.config.__name__}, "
|
|
114
|
+
f"got {type(config).__name__}."
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
if config is not None and self.config is None and not self.source_definition:
|
|
118
|
+
warnings.warn(
|
|
119
|
+
f"Config provided to asset '{self.name}' but no config type is configured "
|
|
120
|
+
f"on the @asset decorator. The config will be used but cannot be type-checked.",
|
|
121
|
+
UserWarning,
|
|
122
|
+
stacklevel=2,
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
# Resolve config
|
|
126
|
+
resolved_config = config
|
|
127
|
+
if resolved_config is None and self.config is not None:
|
|
128
|
+
# Try to load from environment
|
|
129
|
+
try:
|
|
130
|
+
resolved_config = self.config()
|
|
131
|
+
except Exception as e:
|
|
132
|
+
raise ConfigError(
|
|
133
|
+
f"Config {self.config.__name__} is configured but cannot be resolved. "
|
|
134
|
+
f"Provide config explicitly or set environment variables. Error: {e}"
|
|
135
|
+
) from e
|
|
136
|
+
|
|
137
|
+
return Asset(
|
|
138
|
+
func=self.func,
|
|
139
|
+
name=name or self.name,
|
|
140
|
+
schema=self.schema,
|
|
141
|
+
config=resolved_config,
|
|
142
|
+
io=io or self.io,
|
|
143
|
+
normalizer=self.normalizer,
|
|
144
|
+
strategy=strategy or self.strategy,
|
|
145
|
+
partitioning=self.partitioning,
|
|
146
|
+
dataset=dataset or self.dataset,
|
|
147
|
+
default_io_key=default_io_key,
|
|
148
|
+
deps=deps or {},
|
|
149
|
+
definition=self,
|
|
150
|
+
materializable=materializable,
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
@dataclass
|
|
155
|
+
class Asset(Serializable[AssetSpec]):
|
|
156
|
+
"""Runtime instance of an asset."""
|
|
157
|
+
|
|
158
|
+
func: Callable
|
|
159
|
+
definition: AssetDefinition
|
|
160
|
+
name: str
|
|
161
|
+
label: str = ""
|
|
162
|
+
schema: type[BaseModel] | None = None
|
|
163
|
+
config: Config | None = None
|
|
164
|
+
io: IO | dict[str, IO] | None = None
|
|
165
|
+
normalizer: Normalizer | None = None
|
|
166
|
+
strategy: MaterializationStrategy | None = None
|
|
167
|
+
partitioning: PartitionConfig | None = None
|
|
168
|
+
dataset: str | None = None
|
|
169
|
+
default_io_key: str | None = None
|
|
170
|
+
deps: dict[str, AssetInstanceKey] = field(default_factory=dict)
|
|
171
|
+
source: Source | None = field(default=None, init=False, repr=False)
|
|
172
|
+
materializable: bool = True
|
|
173
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
174
|
+
|
|
175
|
+
def __post_init__(self) -> None:
|
|
176
|
+
"""Apply defaults after initialization."""
|
|
177
|
+
if not self.label:
|
|
178
|
+
object.__setattr__(self, "label", self.definition.label)
|
|
179
|
+
|
|
180
|
+
if self.io is None:
|
|
181
|
+
self.io = MemoryIO.singleton()
|
|
182
|
+
|
|
183
|
+
@property
|
|
184
|
+
def instance_key(self) -> AssetInstanceKey:
|
|
185
|
+
"""Return the unique key for this asset instance."""
|
|
186
|
+
if self.source:
|
|
187
|
+
return AssetInstanceKey(f"{self.source.instance_key}:{self.name}")
|
|
188
|
+
return AssetInstanceKey(self.name)
|
|
189
|
+
|
|
190
|
+
@property
|
|
191
|
+
def definition_key(self) -> AssetDefinitionKey:
|
|
192
|
+
"""Return the asset definition key."""
|
|
193
|
+
return self.definition.definition_key
|
|
194
|
+
|
|
195
|
+
def copy(
|
|
196
|
+
self,
|
|
197
|
+
config: Config | None = None,
|
|
198
|
+
io: IO | dict[str, IO] | None = None,
|
|
199
|
+
deps: dict[str, AssetInstanceKey] | None = None,
|
|
200
|
+
dataset: str | None = None,
|
|
201
|
+
materializable: bool | None = None,
|
|
202
|
+
) -> Asset:
|
|
203
|
+
"""Return a shallow copy of this asset with optional overrides."""
|
|
204
|
+
# Create a shallow copy and set attrs, since dataclasses.replace() fails on frozen/field-removed
|
|
205
|
+
asset = copy.copy(self)
|
|
206
|
+
if config is not None:
|
|
207
|
+
asset.config = config
|
|
208
|
+
if io is not None:
|
|
209
|
+
asset.io = io
|
|
210
|
+
if deps is not None:
|
|
211
|
+
asset.deps = deps
|
|
212
|
+
if dataset is not None:
|
|
213
|
+
asset.dataset = dataset
|
|
214
|
+
if materializable is not None:
|
|
215
|
+
asset.materializable = materializable
|
|
216
|
+
return asset
|
|
217
|
+
|
|
218
|
+
@property
|
|
219
|
+
def path(self) -> str:
|
|
220
|
+
"""Return the fully-qualified path used to locate this asset.
|
|
221
|
+
|
|
222
|
+
For source-bound assets: ``{source-class-path}:{asset-name}``.
|
|
223
|
+
For standalone assets: the import path of the decorated function.
|
|
224
|
+
"""
|
|
225
|
+
if self.source:
|
|
226
|
+
path = f"{get_object_path(self.source.definition.cls)}:{self.name}"
|
|
227
|
+
else:
|
|
228
|
+
path = get_object_path(self.func) # Points to the actual function
|
|
229
|
+
return path
|
|
230
|
+
|
|
231
|
+
def run(
|
|
232
|
+
self,
|
|
233
|
+
partition_or_window: Partition | PartitionWindow | None = None,
|
|
234
|
+
dag: DAG | None = None,
|
|
235
|
+
metadata: dict[str, Any] | None = None,
|
|
236
|
+
) -> Any:
|
|
237
|
+
"""Execute the asset and return the result without writing to IO.
|
|
238
|
+
|
|
239
|
+
Resolves context, config, and upstream dependencies (via DAG), then
|
|
240
|
+
runs the decorated function and applies schema validation.
|
|
241
|
+
|
|
242
|
+
Args:
|
|
243
|
+
partition_or_window: Partition or PartitionWindow for this run.
|
|
244
|
+
dag: DAG for dependency resolution (required if asset has deps).
|
|
245
|
+
metadata: Arbitrary metadata dict (e.g. run_id, backfill_id).
|
|
246
|
+
|
|
247
|
+
Returns:
|
|
248
|
+
The raw execution result.
|
|
249
|
+
|
|
250
|
+
Raises:
|
|
251
|
+
AssetError: If schema validation or normalizer reconciliation fails.
|
|
252
|
+
PartitionError: If partitioning requirements are not met.
|
|
253
|
+
"""
|
|
254
|
+
# Warn if partition provided for non-partitioned asset
|
|
255
|
+
if self.partitioning is None and partition_or_window is not None:
|
|
256
|
+
warnings.warn(f"Asset '{self.name}' is not partitioned, partition/partition_window will be ignored")
|
|
257
|
+
|
|
258
|
+
if self.partitioning is not None and partition_or_window is None:
|
|
259
|
+
raise PartitionError(f"Asset '{self.name}' is partitioned, but no partition/partition_window provided")
|
|
260
|
+
|
|
261
|
+
if (
|
|
262
|
+
self.partitioning is not None
|
|
263
|
+
and isinstance(partition_or_window, PartitionWindow)
|
|
264
|
+
and not self.partitioning.allow_window
|
|
265
|
+
):
|
|
266
|
+
raise PartitionError(
|
|
267
|
+
f"Asset '{self.instance_key}' does not support windowed runs (allow_window=False). "
|
|
268
|
+
"Use a partition window with backfill(windowed=False) to run one partition per run."
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
# Create context
|
|
272
|
+
context = ExecutionContext(
|
|
273
|
+
asset_key=self.instance_key,
|
|
274
|
+
partition_or_window=partition_or_window,
|
|
275
|
+
partitioning=self.partitioning,
|
|
276
|
+
metadata=metadata,
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
# Build function kwargs with dependency resolution
|
|
280
|
+
kwargs = self._build_kwargs(context, partition_or_window, dag)
|
|
281
|
+
|
|
282
|
+
# Execute core function
|
|
283
|
+
exec_metadata = {
|
|
284
|
+
**(metadata or {}),
|
|
285
|
+
**get_asset_event_metadata(self),
|
|
286
|
+
"partition_or_window": str(partition_or_window) if partition_or_window else None,
|
|
287
|
+
}
|
|
288
|
+
emit(EventType.ASSET_EXEC_STARTED, metadata=exec_metadata)
|
|
289
|
+
try:
|
|
290
|
+
result = self.func(**kwargs)
|
|
291
|
+
emit(EventType.ASSET_EXEC_COMPLETED, metadata=exec_metadata)
|
|
292
|
+
except Exception as e:
|
|
293
|
+
emit(
|
|
294
|
+
EventType.ASSET_EXEC_FAILED,
|
|
295
|
+
metadata={
|
|
296
|
+
**exec_metadata,
|
|
297
|
+
"error": str(e),
|
|
298
|
+
"traceback": traceback.format_exc(),
|
|
299
|
+
},
|
|
300
|
+
)
|
|
301
|
+
raise
|
|
302
|
+
|
|
303
|
+
# Apply normalizer if configured
|
|
304
|
+
if self.normalizer is not None:
|
|
305
|
+
from interloper.normalizer.strategy import MaterializationStrategy
|
|
306
|
+
|
|
307
|
+
result = self.normalizer.normalize(result)
|
|
308
|
+
strategy = self.strategy or MaterializationStrategy.AUTO
|
|
309
|
+
|
|
310
|
+
if strategy == MaterializationStrategy.RECONCILE:
|
|
311
|
+
if self.schema is None:
|
|
312
|
+
raise AssetError(f"Asset '{self.name}': strategy='reconcile' requires a schema.")
|
|
313
|
+
result = self.normalizer.reconcile(result, self.schema)
|
|
314
|
+
|
|
315
|
+
elif strategy == MaterializationStrategy.STRICT:
|
|
316
|
+
if self.schema is None:
|
|
317
|
+
raise AssetError(f"Asset '{self.name}': strategy='strict' requires a schema.")
|
|
318
|
+
self.normalizer.validate_schema(result, self.schema, strict=True)
|
|
319
|
+
|
|
320
|
+
else:
|
|
321
|
+
if self.schema is None and self.normalizer.infer:
|
|
322
|
+
self.schema = self.normalizer.infer_schema(result)
|
|
323
|
+
elif self.schema is not None:
|
|
324
|
+
self.normalizer.validate_schema(result, self.schema)
|
|
325
|
+
|
|
326
|
+
elif self.schema is not None:
|
|
327
|
+
self._validate_schema(result)
|
|
328
|
+
|
|
329
|
+
return result
|
|
330
|
+
|
|
331
|
+
def materialize(
|
|
332
|
+
self,
|
|
333
|
+
partition_or_window: Partition | PartitionWindow | None = None,
|
|
334
|
+
dag: DAG | None = None,
|
|
335
|
+
metadata: dict[str, Any] | None = None,
|
|
336
|
+
) -> Any:
|
|
337
|
+
"""Execute the asset and write the result to all configured IOs.
|
|
338
|
+
|
|
339
|
+
Equivalent to calling ``run()`` followed by writing to every IO target.
|
|
340
|
+
|
|
341
|
+
Args:
|
|
342
|
+
partition_or_window: Partition or PartitionWindow for this run.
|
|
343
|
+
dag: DAG for dependency resolution (required if asset has deps).
|
|
344
|
+
metadata: Arbitrary metadata dict (e.g. run_id, backfill_id).
|
|
345
|
+
|
|
346
|
+
Returns:
|
|
347
|
+
The execution result, or ``None`` if the asset is not materializable.
|
|
348
|
+
"""
|
|
349
|
+
if not self.materializable:
|
|
350
|
+
return None
|
|
351
|
+
|
|
352
|
+
metadata = metadata or {}
|
|
353
|
+
result = self.run(partition_or_window, dag, metadata)
|
|
354
|
+
self._io_write(partition_or_window, metadata, result)
|
|
355
|
+
return result
|
|
356
|
+
|
|
357
|
+
def _io_write(
|
|
358
|
+
self,
|
|
359
|
+
partition_or_window: Partition | PartitionWindow | None,
|
|
360
|
+
metadata: dict[str, Any],
|
|
361
|
+
result: Any,
|
|
362
|
+
) -> None:
|
|
363
|
+
"""Write the execution result to all configured IO targets.
|
|
364
|
+
|
|
365
|
+
Args:
|
|
366
|
+
partition_or_window: Partition or PartitionWindow for this run.
|
|
367
|
+
metadata: Arbitrary metadata dict (e.g. run_id, backfill_id).
|
|
368
|
+
result: The value to write.
|
|
369
|
+
"""
|
|
370
|
+
if self.io is None:
|
|
371
|
+
return
|
|
372
|
+
|
|
373
|
+
io_context = IOContext(
|
|
374
|
+
asset=self,
|
|
375
|
+
partition_or_window=partition_or_window if self.partitioning is not None else None,
|
|
376
|
+
metadata=metadata,
|
|
377
|
+
)
|
|
378
|
+
|
|
379
|
+
# Build list of (io_key, io) tuples
|
|
380
|
+
if isinstance(self.io, dict):
|
|
381
|
+
ios = list(self.io.items())
|
|
382
|
+
else:
|
|
383
|
+
ios = [(None, self.io)]
|
|
384
|
+
|
|
385
|
+
partition_str = str(partition_or_window) if partition_or_window else None
|
|
386
|
+
|
|
387
|
+
for io_key, io in ios:
|
|
388
|
+
io_metadata = {
|
|
389
|
+
**metadata,
|
|
390
|
+
**get_asset_event_metadata(self),
|
|
391
|
+
"partition_or_window": partition_str,
|
|
392
|
+
"io_key": io_key,
|
|
393
|
+
}
|
|
394
|
+
emit(EventType.IO_WRITE_STARTED, metadata=io_metadata)
|
|
395
|
+
try:
|
|
396
|
+
io.write(io_context, result)
|
|
397
|
+
emit(EventType.IO_WRITE_COMPLETED, metadata=io_metadata)
|
|
398
|
+
except Exception as e:
|
|
399
|
+
emit(
|
|
400
|
+
EventType.IO_WRITE_FAILED,
|
|
401
|
+
metadata={
|
|
402
|
+
**io_metadata,
|
|
403
|
+
"error": str(e),
|
|
404
|
+
"traceback": traceback.format_exc(),
|
|
405
|
+
},
|
|
406
|
+
)
|
|
407
|
+
raise
|
|
408
|
+
|
|
409
|
+
def _build_kwargs(
|
|
410
|
+
self,
|
|
411
|
+
context: ExecutionContext,
|
|
412
|
+
partition_or_window: Partition | PartitionWindow | None,
|
|
413
|
+
dag: DAG | None,
|
|
414
|
+
) -> dict[str, Any]:
|
|
415
|
+
"""Build kwargs for the asset function.
|
|
416
|
+
|
|
417
|
+
Maps function parameters to their values: ``context`` and ``config``
|
|
418
|
+
are injected directly, all other parameters are treated as upstream
|
|
419
|
+
dependencies and loaded from IO via the DAG.
|
|
420
|
+
|
|
421
|
+
Args:
|
|
422
|
+
context: Execution context for this run.
|
|
423
|
+
partition_or_window: Partition or PartitionWindow for this run.
|
|
424
|
+
dag: DAG for dependency resolution.
|
|
425
|
+
|
|
426
|
+
Returns:
|
|
427
|
+
Keyword arguments to pass to the asset function.
|
|
428
|
+
|
|
429
|
+
Raises:
|
|
430
|
+
AssetError: If a dependency cannot be resolved or read.
|
|
431
|
+
DependencyNotFoundError: If a dependency key is not present in the DAG.
|
|
432
|
+
"""
|
|
433
|
+
kwargs: dict[str, Any] = {}
|
|
434
|
+
sig = inspect.signature(self.func)
|
|
435
|
+
|
|
436
|
+
for param_name in sig.parameters:
|
|
437
|
+
if param_name == "context":
|
|
438
|
+
kwargs["context"] = context
|
|
439
|
+
elif param_name == "config":
|
|
440
|
+
kwargs["config"] = self.config
|
|
441
|
+
else:
|
|
442
|
+
# This is a dependency - load from IO via DAG
|
|
443
|
+
if dag is None:
|
|
444
|
+
raise AssetError(
|
|
445
|
+
f"Asset '{self.name}' has dependencies but no DAG provided. "
|
|
446
|
+
"Pass a DAG to run() or materialize() for dependency resolution."
|
|
447
|
+
)
|
|
448
|
+
|
|
449
|
+
upstream_key = dag.resolve_dependency_key(self, param_name)
|
|
450
|
+
|
|
451
|
+
if upstream_key not in dag.asset_map:
|
|
452
|
+
raise DependencyNotFoundError(
|
|
453
|
+
f"Dependency '{upstream_key}' not found in DAG for asset '{self.name}'"
|
|
454
|
+
)
|
|
455
|
+
|
|
456
|
+
upstream_asset = dag.asset_map[upstream_key]
|
|
457
|
+
|
|
458
|
+
# Determine which IO to read from
|
|
459
|
+
read_io = None
|
|
460
|
+
read_io_key = None
|
|
461
|
+
if isinstance(upstream_asset.io, dict):
|
|
462
|
+
# Use default_io_key
|
|
463
|
+
io_dict = cast(dict[str, IO], upstream_asset.io)
|
|
464
|
+
read_io_key = upstream_asset.default_io_key
|
|
465
|
+
if read_io_key:
|
|
466
|
+
read_io = io_dict[read_io_key]
|
|
467
|
+
else:
|
|
468
|
+
read_io = upstream_asset.io
|
|
469
|
+
|
|
470
|
+
if read_io is None:
|
|
471
|
+
raise AssetError(f"No IO found for upstream asset '{upstream_asset.name}'")
|
|
472
|
+
|
|
473
|
+
# Load data from IO using upstream's partitioning rules
|
|
474
|
+
if upstream_asset.partitioning is not None:
|
|
475
|
+
effective_partition_or_window = partition_or_window
|
|
476
|
+
else:
|
|
477
|
+
effective_partition_or_window = None
|
|
478
|
+
|
|
479
|
+
io_context = IOContext(
|
|
480
|
+
asset=upstream_asset,
|
|
481
|
+
partition_or_window=effective_partition_or_window,
|
|
482
|
+
metadata=context.metadata,
|
|
483
|
+
)
|
|
484
|
+
|
|
485
|
+
partition_str = str(effective_partition_or_window) if effective_partition_or_window else None
|
|
486
|
+
io_metadata = {
|
|
487
|
+
**context.metadata,
|
|
488
|
+
**get_asset_event_metadata(self),
|
|
489
|
+
"partition_or_window": partition_str,
|
|
490
|
+
"io_key": read_io_key,
|
|
491
|
+
}
|
|
492
|
+
emit(EventType.IO_READ_STARTED, metadata=io_metadata)
|
|
493
|
+
try:
|
|
494
|
+
kwargs[param_name] = read_io.read(io_context)
|
|
495
|
+
emit(EventType.IO_READ_COMPLETED, metadata=io_metadata)
|
|
496
|
+
except Exception as e:
|
|
497
|
+
emit(
|
|
498
|
+
EventType.IO_READ_FAILED,
|
|
499
|
+
metadata={
|
|
500
|
+
**io_metadata,
|
|
501
|
+
"error": str(e),
|
|
502
|
+
"traceback": traceback.format_exc(),
|
|
503
|
+
},
|
|
504
|
+
)
|
|
505
|
+
raise AssetError(f"Failed to load data from upstream asset '{upstream_asset.name}': {e}") from e
|
|
506
|
+
|
|
507
|
+
return kwargs
|
|
508
|
+
|
|
509
|
+
def _resolve_io(self, io_key: str | None = None) -> IO:
|
|
510
|
+
"""Resolve a single IO from this asset.
|
|
511
|
+
|
|
512
|
+
Args:
|
|
513
|
+
io_key: For multi-IO assets, the key identifying which IO to use.
|
|
514
|
+
When ``None``, uses :attr:`default_io_key` or the first entry.
|
|
515
|
+
|
|
516
|
+
Returns:
|
|
517
|
+
The resolved IO instance.
|
|
518
|
+
|
|
519
|
+
Raises:
|
|
520
|
+
ConfigError: If *io_key* is not found or no IO is configured.
|
|
521
|
+
"""
|
|
522
|
+
if isinstance(self.io, dict):
|
|
523
|
+
if io_key is not None:
|
|
524
|
+
if io_key not in self.io:
|
|
525
|
+
raise ConfigError(
|
|
526
|
+
f"IO key '{io_key}' not found on asset '{self.name}'. Available keys: {sorted(self.io.keys())}"
|
|
527
|
+
)
|
|
528
|
+
return self.io[io_key]
|
|
529
|
+
if self.default_io_key is not None and self.default_io_key in self.io:
|
|
530
|
+
return self.io[self.default_io_key]
|
|
531
|
+
return next(iter(self.io.values()))
|
|
532
|
+
|
|
533
|
+
if self.io is None:
|
|
534
|
+
raise ConfigError(f"Asset '{self.name}' has no IO configured.")
|
|
535
|
+
|
|
536
|
+
return self.io
|
|
537
|
+
|
|
538
|
+
def partition_row_counts(self, *, io_key: str | None = None) -> dict[str, int]:
|
|
539
|
+
"""Return row counts grouped by this asset's partition column.
|
|
540
|
+
|
|
541
|
+
Delegates to :meth:`IO.partition_row_counts` using the resolved IO.
|
|
542
|
+
|
|
543
|
+
Args:
|
|
544
|
+
io_key: For multi-IO assets, the IO key to query.
|
|
545
|
+
|
|
546
|
+
Returns:
|
|
547
|
+
Mapping from partition value (as string) to row count.
|
|
548
|
+
|
|
549
|
+
Raises:
|
|
550
|
+
PartitionError: If this asset is not partitioned.
|
|
551
|
+
"""
|
|
552
|
+
if self.partitioning is None:
|
|
553
|
+
raise PartitionError(
|
|
554
|
+
f"Asset '{self.name}' is not partitioned. "
|
|
555
|
+
"Cannot compute partition row counts without a partition column."
|
|
556
|
+
)
|
|
557
|
+
|
|
558
|
+
io = self._resolve_io(io_key)
|
|
559
|
+
context = IOContext(asset=self)
|
|
560
|
+
return io.partition_row_counts(context)
|
|
561
|
+
|
|
562
|
+
def _validate_schema(self, data: Any) -> None:
|
|
563
|
+
"""Validate data against schema.
|
|
564
|
+
|
|
565
|
+
Delegates to :func:`~interloper.schema.validate_schema`
|
|
566
|
+
when data is ``list[dict]``.
|
|
567
|
+
"""
|
|
568
|
+
if self.schema is None:
|
|
569
|
+
return
|
|
570
|
+
|
|
571
|
+
if isinstance(data, list) and data and isinstance(data[0], dict):
|
|
572
|
+
from interloper.schema import validate_schema
|
|
573
|
+
|
|
574
|
+
validate_schema(data, self.schema)
|
|
575
|
+
|
|
576
|
+
def to_spec(self) -> AssetSpec:
|
|
577
|
+
"""Convert to serializable spec.
|
|
578
|
+
|
|
579
|
+
Returns:
|
|
580
|
+
An AssetSpec representing this asset.
|
|
581
|
+
"""
|
|
582
|
+
# Serialize IO if present
|
|
583
|
+
io_spec = None
|
|
584
|
+
if isinstance(self.io, dict):
|
|
585
|
+
io_spec = {k: v.to_spec() for k, v in self.io.items()} # type: ignore[unresolved-attribute]
|
|
586
|
+
elif self.io is not None:
|
|
587
|
+
io_spec = self.io.to_spec()
|
|
588
|
+
|
|
589
|
+
return AssetSpec(
|
|
590
|
+
path=self.path,
|
|
591
|
+
io=io_spec,
|
|
592
|
+
materializable=self.materializable,
|
|
593
|
+
config=self.config.model_dump() if self.config is not None else None,
|
|
594
|
+
)
|