moose-lib 0.4.218__py3-none-any.whl → 0.4.220__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
moose_lib/dmv2.py DELETED
@@ -1,994 +0,0 @@
1
- """
2
- from __future__ import annotations
3
-
4
- Moose Data Model v2 (dmv2) - Python Implementation
5
-
6
- This module provides the Python classes for defining Moose v2 data model resources,
7
- including OLAP tables, streams, ingestion/consumption APIs, pipelines, and SQL views.
8
- It mirrors the functionality of the TypeScript `dmv2` module, enabling the definition
9
- of data infrastructure using Python and Pydantic models.
10
- """
11
- import dataclasses
12
- import datetime
13
- from typing import Any, Generic, Optional, TypeVar, Callable, Union, Literal, Awaitable
14
- from pydantic import BaseModel, ConfigDict, AliasGenerator
15
- from pydantic.alias_generators import to_camel
16
- from pydantic.fields import FieldInfo
17
- from pydantic.json_schema import JsonSchemaValue
18
-
19
- from moose_lib import ClickHouseEngines
20
-
21
- _tables: dict[str, "OlapTable"] = {}
22
- _streams: dict[str, "Stream"] = {}
23
- _ingest_apis: dict[str, "IngestApi"] = {}
24
- _egress_apis: dict[str, "ConsumptionApi"] = {}
25
- _sql_resources: dict[str, "SqlResource"] = {}
26
- _workflows: dict[str, "Workflow"] = {}
27
-
28
- T = TypeVar('T', bound=BaseModel)
29
- U = TypeVar('U', bound=BaseModel)
30
- T_none = TypeVar('T_none', bound=Union[BaseModel, None])
31
- U_none = TypeVar('U_none', bound=Union[BaseModel, None])
32
- type ZeroOrMany[T] = Union[T, list[T], None]
33
-
34
- type TaskRunFunc[T_none, U_none] = Union[
35
- # Case 1: No input, no output
36
- Callable[[], None],
37
- # Case 2: No input, with output
38
- Callable[[], Union[U_none, Awaitable[U_none]]],
39
- # Case 3: With input, no output
40
- Callable[[T_none], None],
41
- # Case 4: With input, with output
42
- Callable[[T_none], Union[U_none, Awaitable[U_none]]]
43
- ]
44
-
45
- class Columns(Generic[T]):
46
- """Provides runtime checked column name access for Moose resources.
47
-
48
- Instead of using string literals for column names, you can use attribute access
49
- on this object, which will verify the name against the Pydantic model's fields.
50
-
51
- Example:
52
- >>> class MyModel(BaseModel):
53
- ... user_id: int
54
- ... event_name: str
55
- >>> cols = Columns(MyModel)
56
- >>> print(cols.user_id) # Output: user_id
57
- >>> print(cols.non_existent) # Raises AttributeError
58
-
59
- Args:
60
- model: The Pydantic model type whose fields represent the columns.
61
- """
62
- _fields: dict[str, FieldInfo]
63
-
64
- def __init__(self, model: type[T]):
65
- self._fields = model.model_fields
66
-
67
- def __getattr__(self, item: str) -> str:
68
- if item in self._fields:
69
- return item # or some Column representation
70
- raise AttributeError(f"{item} is not a valid column name")
71
-
72
-
73
- class BaseTypedResource(Generic[T]):
74
- """Base class for Moose resources that are typed with a Pydantic model.
75
-
76
- Handles the association of a Pydantic model `T` with a Moose resource,
77
- providing type validation and access to the model type.
78
-
79
- Attributes:
80
- name (str): The name of the Moose resource.
81
- """
82
- _t: type[T]
83
- name: str
84
-
85
- @classmethod
86
- def _get_type(cls, keyword_args: dict):
87
- t = keyword_args.get('t')
88
- if t is None:
89
- raise ValueError(f"Use `{cls.__name__}[T](name='...')` to supply the Pydantic model type`")
90
- if not isinstance(t, type) or not issubclass(t, BaseModel):
91
- raise ValueError(f"{t} is not a Pydantic model")
92
- return t
93
-
94
- @property
95
- def model_type(self) -> type[T]:
96
- """Get the Pydantic model type associated with this resource."""
97
- return self._t
98
-
99
- def _set_type(self, name: str, t: type[T]):
100
- """Internal method to set the resource name and associated Pydantic type."""
101
- self._t = t
102
- self.name = name
103
-
104
- def __class_getitem__(cls, item: type[BaseModel]):
105
- def curried_constructor(*args, **kwargs):
106
- return cls(t=item, *args, **kwargs)
107
-
108
- return curried_constructor
109
-
110
-
111
- class TypedMooseResource(BaseTypedResource, Generic[T]):
112
- """Base class for Moose resources that have columns derived from a Pydantic model.
113
-
114
- Extends `BaseTypedResource` by adding a `Columns` helper for type-safe
115
- column name access.
116
-
117
- Attributes:
118
- columns (Columns[T]): An object providing attribute access to column names.
119
- """
120
- columns: Columns[T]
121
-
122
- def _set_type(self, name: str, t: type[T]):
123
- super()._set_type(name, t)
124
- self.columns = Columns[T](self._t)
125
-
126
-
127
- class OlapConfig(BaseModel):
128
- """Configuration for OLAP tables (e.g., ClickHouse tables).
129
-
130
- Attributes:
131
- order_by_fields: List of column names to use for the ORDER BY clause.
132
- Crucial for `ReplacingMergeTree` and performance.
133
- deduplicate: If True, uses the ReplacingMergeTree engine for automatic
134
- deduplication based on `order_by_fields`. Equivalent to
135
- setting `engine=ClickHouseEngines.ReplacingMergeTree`.
136
- engine: The ClickHouse table engine to use (e.g., MergeTree, ReplacingMergeTree).
137
- version: Optional version string for tracking configuration changes.
138
- metadata: Optional metadata for the table.
139
- """
140
- order_by_fields: list[str] = []
141
- # equivalent to setting `engine=ClickHouseEngines.ReplacingMergeTree`
142
- deduplicate: bool = False
143
- engine: Optional[ClickHouseEngines] = None
144
- version: Optional[str] = None
145
- metadata: Optional[dict] = None
146
-
147
-
148
- class OlapTable(TypedMooseResource, Generic[T]):
149
- """Represents an OLAP table (e.g., a ClickHouse table) typed with a Pydantic model.
150
-
151
- Args:
152
- name: The name of the OLAP table.
153
- config: Configuration options for the table engine, ordering, etc.
154
- t: The Pydantic model defining the table schema (passed via `OlapTable[MyModel](...)`).
155
-
156
- Attributes:
157
- config (OlapConfig): The configuration settings for this table.
158
- columns (Columns[T]): Helper for accessing column names safely.
159
- name (str): The name of the table.
160
- model_type (type[T]): The Pydantic model associated with this table.
161
- kind: The kind of the table (e.g., "OlapTable").
162
- """
163
- config: OlapConfig
164
- kind: str = "OlapTable"
165
-
166
- def __init__(self, name: str, config: OlapConfig = OlapConfig(), **kwargs):
167
- super().__init__()
168
- self._set_type(name, self._get_type(kwargs))
169
- self.config = config
170
- self.metadata = config.metadata
171
- _tables[name] = self
172
-
173
-
174
- class StreamConfig(BaseModel):
175
- """Configuration for data streams (e.g., Redpanda topics).
176
-
177
- Attributes:
178
- parallelism: Number of partitions for the stream.
179
- retention_period: Data retention period in seconds (default: 7 days).
180
- destination: Optional `OlapTable` where stream messages should be automatically ingested.
181
- version: Optional version string for tracking configuration changes.
182
- metadata: Optional metadata for the stream.
183
- """
184
- parallelism: int = 1
185
- retention_period: int = 60 * 60 * 24 * 7 # 7 days
186
- destination: Optional[OlapTable[Any]] = None
187
- version: Optional[str] = None
188
- metadata: Optional[dict] = None
189
-
190
-
191
- class TransformConfig(BaseModel):
192
- """Configuration for stream transformations.
193
-
194
- Attributes:
195
- version: Optional version string to identify a specific transformation.
196
- Allows multiple transformations to the same destination if versions differ.
197
- """
198
- version: Optional[str] = None
199
- dead_letter_queue: "Optional[DeadLetterQueue]" = None
200
- model_config = ConfigDict(arbitrary_types_allowed=True)
201
- metadata: Optional[dict] = None
202
-
203
-
204
- class ConsumerConfig(BaseModel):
205
- """Configuration for stream consumers.
206
-
207
- Attributes:
208
- version: Optional version string to identify a specific consumer.
209
- Allows multiple consumers if versions differ.
210
- """
211
- version: Optional[str] = None
212
- dead_letter_queue: "Optional[DeadLetterQueue]" = None
213
- model_config = ConfigDict(arbitrary_types_allowed=True)
214
-
215
-
216
- @dataclasses.dataclass
217
- class _RoutedMessage:
218
- """Internal class representing a message routed to a specific stream."""
219
- destination: "Stream[Any]"
220
- values: ZeroOrMany[Any]
221
-
222
-
223
- @dataclasses.dataclass
224
- class ConsumerEntry(Generic[T]):
225
- """Internal class representing a consumer with its configuration."""
226
- consumer: Callable[[T], None]
227
- config: ConsumerConfig
228
-
229
-
230
- @dataclasses.dataclass
231
- class TransformEntry(Generic[T]):
232
- """Internal class representing a transformation with its configuration."""
233
- destination: "Stream[Any]"
234
- transformation: Callable[[T], ZeroOrMany[Any]]
235
- config: TransformConfig
236
-
237
-
238
- class Stream(TypedMooseResource, Generic[T]):
239
- """Represents a data stream (e.g., a Redpanda topic) typed with a Pydantic model.
240
-
241
- Allows defining transformations to other streams and adding consumers.
242
-
243
- Args:
244
- name: The name of the stream.
245
- config: Configuration options for the stream (parallelism, retention, destination).
246
- t: The Pydantic model defining the stream message schema (passed via `Stream[MyModel](...)`).
247
-
248
- Attributes:
249
- config (StreamConfig): Configuration settings for this stream.
250
- transformations (dict[str, list[TransformEntry[T]]]): Dictionary mapping destination stream names
251
- to lists of transformation functions.
252
- consumers (list[ConsumerEntry[T]]): List of consumers attached to this stream.
253
- columns (Columns[T]): Helper for accessing message field names safely.
254
- name (str): The name of the stream.
255
- model_type (type[T]): The Pydantic model associated with this stream.
256
- """
257
- config: StreamConfig
258
- transformations: dict[str, list[TransformEntry[T]]]
259
- consumers: list[ConsumerEntry[T]]
260
- _multipleTransformations: Optional[Callable[[T], list[_RoutedMessage]]] = None
261
-
262
- def __init__(self, name: str, config: StreamConfig = StreamConfig(), **kwargs):
263
- super().__init__()
264
- self._set_type(name, self._get_type(kwargs))
265
- self.config = config
266
- self.metadata = config.metadata
267
- self.consumers = []
268
- self.transformations = {}
269
- _streams[name] = self
270
-
271
- def add_transform(self, destination: "Stream[U]", transformation: Callable[[T], ZeroOrMany[U]],
272
- config: TransformConfig = None):
273
- """Adds a transformation step from this stream to a destination stream.
274
-
275
- The transformation function receives a record of type `T` and should return
276
- a record of type `U`, a list of `U` records, or `None` to filter.
277
-
278
- Args:
279
- destination: The target `Stream` for the transformed records.
280
- transformation: A callable that performs the transformation.
281
- config: Optional configuration, primarily for setting a version.
282
- """
283
- config = config or TransformConfig()
284
- if destination.name in self.transformations:
285
- existing_transforms = self.transformations[destination.name]
286
- # Check if a transform with this version already exists
287
- has_version = any(t.config.version == config.version for t in existing_transforms)
288
- if not has_version:
289
- existing_transforms.append(
290
- TransformEntry(destination=destination, transformation=transformation, config=config))
291
- else:
292
- self.transformations[destination.name] = [
293
- TransformEntry(destination=destination, transformation=transformation, config=config)]
294
-
295
- def add_consumer(self, consumer: Callable[[T], None], config: ConsumerConfig = None):
296
- """Adds a consumer function to be executed for each record in the stream.
297
-
298
- Consumers are typically used for side effects like logging or triggering external actions.
299
-
300
- Args:
301
- consumer: A callable that accepts a record of type `T`.
302
- config: Optional configuration, primarily for setting a version.
303
- """
304
- config = config or ConsumerConfig()
305
- has_version = any(c.config.version == config.version for c in self.consumers)
306
- if not has_version:
307
- self.consumers.append(ConsumerEntry(consumer=consumer, config=config))
308
-
309
- def has_consumers(self) -> bool:
310
- """Checks if any consumers have been added to this stream.
311
-
312
- Returns:
313
- True if the stream has one or more consumers, False otherwise.
314
- """
315
- return len(self.consumers) > 0
316
-
317
- def routed(self, values: ZeroOrMany[T]) -> _RoutedMessage:
318
- """Creates a `_RoutedMessage` for use in multi-transform functions.
319
-
320
- Wraps the value(s) to be sent with this stream as the destination.
321
-
322
- Args:
323
- values: A single record, a list of records, or None.
324
-
325
- Returns:
326
- A `_RoutedMessage` object.
327
- """
328
- return _RoutedMessage(destination=self, values=values)
329
-
330
- def set_multi_transform(self, transformation: Callable[[T], list[_RoutedMessage]]):
331
- """Sets a transformation function capable of routing records to multiple streams.
332
-
333
- The provided function takes a single input record (`T`) and must return a list
334
- of `_RoutedMessage` objects, created using the `.routed()` method of the
335
- target streams.
336
-
337
- Example:
338
- def my_multi_transform(record: InputModel) -> list[_RoutedMessage]:
339
- output1 = transform_for_stream1(record)
340
- output2 = transform_for_stream2(record)
341
- return [
342
- stream1.routed(output1),
343
- stream2.routed(output2)
344
- ]
345
- input_stream.set_multi_transform(my_multi_transform)
346
-
347
- Note: Only one multi-transform function can be set per stream.
348
-
349
- Args:
350
- transformation: The multi-routing transformation function.
351
- """
352
- self._multipleTransformations = transformation
353
-
354
-
355
- class DeadLetterModel(BaseModel, Generic[T]):
356
- model_config = ConfigDict(alias_generator=AliasGenerator(
357
- serialization_alias=to_camel,
358
- ))
359
- original_record: Any
360
- error_message: str
361
- error_type: str
362
- failed_at: datetime.datetime
363
- source: Literal["api", "transform", "table"]
364
-
365
- def as_typed(self) -> T:
366
- return self._t.model_validate(self.original_record)
367
-
368
-
369
- class DeadLetterQueue(Stream, Generic[T]):
370
- """A specialized Stream for handling failed records.
371
-
372
- Dead letter queues store records that failed during processing, along with
373
- error information to help diagnose and potentially recover from failures.
374
-
375
- Attributes:
376
- All attributes inherited from Stream.
377
- """
378
-
379
- _model_type: type[T]
380
-
381
- def __init__(self, name: str, config: StreamConfig = StreamConfig(), **kwargs):
382
- """Initialize a new DeadLetterQueue.
383
-
384
- Args:
385
- name: The name of the dead letter queue stream.
386
- config: Configuration for the stream.
387
- """
388
- self._model_type = self._get_type(kwargs)
389
- kwargs["t"] = DeadLetterModel[self._model_type]
390
- super().__init__(name, config, **kwargs)
391
-
392
- def add_transform(self, destination: Stream[U], transformation: Callable[[DeadLetterModel[T]], ZeroOrMany[U]],
393
- config: TransformConfig = None):
394
- def wrapped_transform(record: DeadLetterModel[T]):
395
- record._t = self._model_type
396
- return transformation(record)
397
-
398
- config = config or TransformConfig()
399
- super().add_transform(destination, wrapped_transform, config)
400
-
401
- def add_consumer(self, consumer: Callable[[DeadLetterModel[T]], None], config: ConsumerConfig = None):
402
- def wrapped_consumer(record: DeadLetterModel[T]):
403
- record._t = self._model_type
404
- return consumer(record)
405
-
406
- config = config or ConsumerConfig()
407
- super().add_consumer(wrapped_consumer, config)
408
-
409
- def set_multi_transform(self, transformation: Callable[[DeadLetterModel[T]], list[_RoutedMessage]]):
410
- def wrapped_transform(record: DeadLetterModel[T]):
411
- record._t = self._model_type
412
- return transformation(record)
413
-
414
- super().set_multi_transform(wrapped_transform)
415
-
416
-
417
- class IngestConfig(BaseModel):
418
- """Basic configuration for an ingestion point.
419
-
420
- Attributes:
421
- version: Optional version string.
422
- metadata: Optional metadata for the ingestion point.
423
- """
424
- version: Optional[str] = None
425
- metadata: Optional[dict] = None
426
-
427
-
428
- @dataclasses.dataclass
429
- class IngestConfigWithDestination[T: BaseModel]:
430
- """Ingestion configuration that includes the mandatory destination stream.
431
-
432
- Attributes:
433
- destination: The `Stream` where ingested data will be sent.
434
- version: Optional version string.
435
- metadata: Optional metadata for the ingestion configuration.
436
- """
437
- destination: Stream[T]
438
- dead_letter_queue: Optional[DeadLetterQueue[T]] = None
439
- version: Optional[str] = None
440
- metadata: Optional[dict] = None
441
-
442
-
443
- class IngestPipelineConfig(BaseModel):
444
- """Configuration for creating a complete ingestion pipeline.
445
-
446
- Defines which components (table, stream, ingest API) should be created.
447
- Set a component to `True` for default settings, `False` to disable, or provide
448
- a specific config object (`OlapConfig`, `StreamConfig`, `IngestConfig`).
449
-
450
- Attributes:
451
- table: Configuration for the OLAP table component.
452
- stream: Configuration for the stream component.
453
- ingest: Configuration for the ingest API component.
454
- version: Optional version string applied to all created components.
455
- metadata: Optional metadata for the ingestion pipeline.
456
- """
457
- table: bool | OlapConfig = True
458
- stream: bool | StreamConfig = True
459
- ingest: bool | IngestConfig = True
460
- dead_letter_queue: bool | StreamConfig = True
461
- version: Optional[str] = None
462
- metadata: Optional[dict] = None
463
-
464
-
465
- class IngestApi(TypedMooseResource, Generic[T]):
466
- """Represents an Ingestion API endpoint typed with a Pydantic model.
467
-
468
- This endpoint receives data (matching schema `T`) and sends it to a configured
469
- destination stream.
470
-
471
- Args:
472
- name: The name of the ingestion API endpoint.
473
- config: Configuration specifying the destination stream and data format.
474
- t: The Pydantic model defining the expected input data schema
475
- (passed via `IngestApi[MyModel](...)`).
476
-
477
- Attributes:
478
- config (IngestConfigWithDestination[T]): The configuration for this API.
479
- columns (Columns[T]): Helper for accessing input field names safely.
480
- name (str): The name of the API.
481
- model_type (type[T]): The Pydantic model associated with this API's input.
482
- """
483
- config: IngestConfigWithDestination[T]
484
-
485
- def __init__(self, name: str, config: IngestConfigWithDestination[T], **kwargs):
486
- super().__init__()
487
- self._set_type(name, self._get_type(kwargs))
488
- self.config = config
489
- self.metadata = getattr(config, 'metadata', None)
490
- _ingest_apis[name] = self
491
-
492
-
493
- class IngestPipeline(TypedMooseResource, Generic[T]):
494
- """Creates and configures a linked Table, Stream, and Ingest API pipeline.
495
-
496
- Simplifies the common pattern of ingesting data through an API, processing it
497
- in a stream, and storing it in a table.
498
-
499
- Args:
500
- name: The base name used for all created components (table, stream, API).
501
- config: Specifies which components to create and their configurations.
502
- t: The Pydantic model defining the data schema for all components
503
- (passed via `IngestPipeline[MyModel](...)`).
504
-
505
- Attributes:
506
- table: The created `OlapTable` instance, if configured.
507
- stream: The created `Stream` instance, if configured.
508
- ingest_api: The created `IngestApi` instance, if configured.
509
- dead_letter_queue: The created `DeadLetterQueue` instance, if configured.
510
- columns (Columns[T]): Helper for accessing data field names safely.
511
- name (str): The base name of the pipeline.
512
- model_type (type[T]): The Pydantic model associated with this pipeline.
513
- """
514
- table: Optional[OlapTable[T]] = None
515
- stream: Optional[Stream[T]] = None
516
- ingest_api: Optional[IngestApi[T]] = None
517
- dead_letter_queue: Optional[DeadLetterQueue[T]] = None
518
- metadata: Optional[dict] = None
519
-
520
- def get_table(self) -> OlapTable[T]:
521
- """Retrieves the pipeline's OLAP table component.
522
-
523
- Raises:
524
- ValueError: If the table was not configured for this pipeline.
525
-
526
- Returns:
527
- The `OlapTable` instance.
528
- """
529
- if self.table is None:
530
- raise ValueError("Table was not configured for this pipeline")
531
- return self.table
532
-
533
- def get_stream(self) -> Stream[T]:
534
- """Retrieves the pipeline's stream component.
535
-
536
- Raises:
537
- ValueError: If the stream was not configured for this pipeline.
538
-
539
- Returns:
540
- The `Stream` instance.
541
- """
542
- if self.stream is None:
543
- raise ValueError("Stream was not configured for this pipeline")
544
- return self.stream
545
-
546
- def get_dead_letter_queue(self) -> Stream[T]:
547
- """Retrieves the pipeline's dead letter queue.
548
-
549
- Raises:
550
- ValueError: If the dead letter queue was not configured for this pipeline.
551
-
552
- Returns:
553
- The `Stream` instance.
554
- """
555
- if self.dead_letter_queue is None:
556
- raise ValueError("DLQ was not configured for this pipeline")
557
- return self.dead_letter_queue
558
-
559
- def get_ingest_api(self) -> IngestApi[T]:
560
- """Retrieves the pipeline's Ingestion API component.
561
-
562
- Raises:
563
- ValueError: If the Ingest API was not configured for this pipeline.
564
-
565
- Returns:
566
- The `IngestApi` instance.
567
- """
568
- if self.ingest_api is None:
569
- raise ValueError("Ingest API was not configured for this pipeline")
570
- return self.ingest_api
571
-
572
- def __init__(self, name: str, config: IngestPipelineConfig, **kwargs):
573
- super().__init__()
574
- self._set_type(name, self._get_type(kwargs))
575
- self.metadata = config.metadata
576
- table_metadata = config.metadata
577
- stream_metadata = config.metadata
578
- ingest_metadata = config.metadata
579
- if config.table:
580
- table_config = OlapConfig() if config.table is True else config.table
581
- if config.version:
582
- table_config.version = config.version
583
- table_config.metadata = table_metadata
584
- self.table = OlapTable(name, table_config, t=self._t)
585
- if config.stream:
586
- stream_config = StreamConfig() if config.stream is True else config.stream
587
- if config.table and stream_config.destination is not None:
588
- raise ValueError("The destination of the stream should be the table created in the IngestPipeline")
589
- stream_config.destination = self.table
590
- if config.version:
591
- stream_config.version = config.version
592
- stream_config.metadata = stream_metadata
593
- self.stream = Stream(name, stream_config, t=self._t)
594
- if config.dead_letter_queue:
595
- stream_config = StreamConfig() if config.dead_letter_queue is True else config.dead_letter_queue
596
- if config.version:
597
- stream_config.version = config.version
598
- stream_config.metadata = stream_metadata
599
- self.dead_letter_queue = DeadLetterQueue(f"{name}DeadLetterQueue", stream_config, t=self._t)
600
- if config.ingest:
601
- if self.stream is None:
602
- raise ValueError("Ingest API needs a stream to write to.")
603
- ingest_config_dict = (
604
- IngestConfig() if config.ingest is True else config.ingest
605
- ).model_dump()
606
- ingest_config_dict["destination"] = self.stream
607
- if config.version:
608
- ingest_config_dict["version"] = config.version
609
- if self.dead_letter_queue:
610
- ingest_config_dict["dead_letter_queue"] = self.dead_letter_queue
611
- ingest_config_dict["metadata"] = ingest_metadata
612
- ingest_config = IngestConfigWithDestination(**ingest_config_dict)
613
- self.ingest_api = IngestApi(name, ingest_config, t=self._t)
614
-
615
-
616
- class EgressConfig(BaseModel):
617
- """Configuration for Consumption (Egress) APIs.
618
-
619
- Attributes:
620
- version: Optional version string.
621
- metadata: Optional metadata for the consumption API.
622
- """
623
- version: Optional[str] = None
624
- metadata: Optional[dict] = None
625
-
626
-
627
- class ConsumptionApi(BaseTypedResource, Generic[T, U]):
628
- """Represents a Consumption (Egress) API endpoint.
629
-
630
- Allows querying data, typically powered by a user-defined function.
631
- Requires two Pydantic models: `T` for query parameters and `U` for the response body.
632
-
633
- Args:
634
- name: The name of the consumption API endpoint.
635
- query_function: The callable that executes the query logic.
636
- It receives parameters matching model `T` (and potentially
637
- other runtime utilities) and should return data matching model `U`.
638
- config: Optional configuration (currently only `version`).
639
- t: A tuple containing the input (`T`) and output (`U`) Pydantic models
640
- (passed via `ConsumptionApi[InputModel, OutputModel](...)`).
641
-
642
- Attributes:
643
- config (EgressConfig): Configuration for the API.
644
- query_function (Callable[..., U]): The handler function for the API.
645
- name (str): The name of the API.
646
- model_type (type[T]): The Pydantic model for the input/query parameters.
647
- return_type (type[U]): The Pydantic model for the response body.
648
- """
649
- config: EgressConfig
650
- query_function: Callable[..., U]
651
- _u: type[U]
652
-
653
- def __class_getitem__(cls, items):
654
- # Handle two type parameters
655
- if not isinstance(items, tuple) or len(items) != 2:
656
- raise ValueError(f"Use `{cls.__name__}[T, U](name='...')` to supply both input and output types")
657
- input_type, output_type = items
658
-
659
- def curried_constructor(*args, **kwargs):
660
- return cls(t=(input_type, output_type), *args, **kwargs)
661
-
662
- return curried_constructor
663
-
664
- def __init__(self, name: str, query_function: Callable[..., U], config: EgressConfig = EgressConfig(), **kwargs):
665
- super().__init__()
666
- self._set_type(name, self._get_type(kwargs))
667
- self.config = config
668
- self.query_function = query_function
669
- self.metadata = config.metadata
670
- _egress_apis[name] = self
671
-
672
- @classmethod
673
- def _get_type(cls, keyword_args: dict):
674
- t = keyword_args.get('t')
675
- if not isinstance(t, tuple) or len(t) != 2:
676
- raise ValueError(f"Use `{cls.__name__}[T, U](name='...')` to supply both input and output types")
677
-
678
- input_type, output_type = t
679
- if not isinstance(input_type, type) or not issubclass(input_type, BaseModel):
680
- raise ValueError(f"Input type {input_type} is not a Pydantic model")
681
- if not isinstance(output_type, type) or not issubclass(output_type, BaseModel):
682
- raise ValueError(f"Output type {output_type} is not a Pydantic model")
683
- return t
684
-
685
- def _set_type(self, name: str, t: tuple[type[T], type[U]]):
686
- input_type, output_type = t
687
- self._t = input_type
688
- self._u = output_type
689
- self.name = name
690
-
691
- def return_type(self) -> type[U]:
692
- """Get the Pydantic model type for the API's response body."""
693
- return self._u
694
-
695
- def get_response_schema(self) -> JsonSchemaValue:
696
- """Generates the JSON schema for the API's response body model (`U`).
697
-
698
- Returns:
699
- A dictionary representing the JSON schema.
700
- """
701
- from pydantic.type_adapter import TypeAdapter
702
- return TypeAdapter(self.return_type).json_schema(
703
- ref_template='#/components/schemas/{model}'
704
- )
705
-
706
-
707
- def _get_consumption_api(name: str) -> Optional[ConsumptionApi]:
708
- """Internal function to retrieve a registered ConsumptionApi by name."""
709
- return _egress_apis.get(name)
710
-
711
-
712
- # Removed BaseModel inheritance
713
- class SqlResource:
714
- """Base class for SQL resources like Views and Materialized Views.
715
-
716
- Handles the definition of setup (CREATE) and teardown (DROP) SQL commands
717
- and tracks data dependencies.
718
-
719
- Attributes:
720
- name (str): The name of the SQL resource (e.g., view name).
721
- setup (list[str]): SQL commands to create the resource.
722
- teardown (list[str]): SQL commands to drop the resource.
723
- pulls_data_from (list[SqlObject]): List of tables/views this resource reads from.
724
- pushes_data_to (list[SqlObject]): List of tables/views this resource writes to.
725
- kind: The kind of the SQL resource (e.g., "SqlResource").
726
- """
727
- setup: list[str]
728
- teardown: list[str]
729
- name: str
730
- kind: str = "SqlResource"
731
- pulls_data_from: list[Union[OlapTable, "SqlResource"]]
732
- pushes_data_to: list[Union[OlapTable, "SqlResource"]]
733
-
734
- def __init__(
735
- self,
736
- name: str,
737
- setup: list[str],
738
- teardown: list[str],
739
- pulls_data_from: Optional[list[Union[OlapTable, "SqlResource"]]] = None,
740
- pushes_data_to: Optional[list[Union[OlapTable, "SqlResource"]]] = None,
741
- metadata: dict = None
742
- ):
743
- self.name = name
744
- self.setup = setup
745
- self.teardown = teardown
746
- self.pulls_data_from = pulls_data_from or []
747
- self.pushes_data_to = pushes_data_to or []
748
- self.metadata = metadata
749
- _sql_resources[name] = self
750
-
751
-
752
- class View(SqlResource):
753
- """Represents a standard SQL database View.
754
-
755
- Args:
756
- name: The name of the view to be created.
757
- select_statement: The SQL SELECT statement defining the view.
758
- base_tables: A list of `OlapTable`, `View`, or `MaterializedView` objects
759
- that this view depends on.
760
- """
761
-
762
- def __init__(self, name: str, select_statement: str, base_tables: list[Union[OlapTable, SqlResource]],
763
- metadata: dict = None):
764
- setup = [
765
- f"CREATE VIEW IF NOT EXISTS {name} AS {select_statement}".strip()
766
- ]
767
- teardown = [f"DROP VIEW IF EXISTS {name}"]
768
- super().__init__(name, setup, teardown, pulls_data_from=base_tables, metadata=metadata)
769
-
770
-
771
- class MaterializedViewOptions(BaseModel):
772
- """Configuration options for creating a Materialized View.
773
-
774
- Attributes:
775
- select_statement: The SQL SELECT statement defining the view's data.
776
- select_tables: List of source tables/views the select statement reads from.
777
- table_name: The name of the underlying target table storing the materialized data.
778
- materialized_view_name: The name of the MATERIALIZED VIEW object itself.
779
- engine: Optional ClickHouse engine for the target table.
780
- order_by_fields: Optional ordering key for the target table (required for
781
- engines like ReplacingMergeTree).
782
- model_config: ConfigDict for Pydantic validation
783
- """
784
- select_statement: str
785
- select_tables: list[Union[OlapTable, SqlResource]]
786
- table_name: str
787
- materialized_view_name: str
788
- engine: Optional[ClickHouseEngines] = None
789
- order_by_fields: Optional[list[str]] = None
790
- metadata: Optional[dict] = None
791
- # Ensure arbitrary types are allowed for Pydantic validation
792
- model_config = ConfigDict(arbitrary_types_allowed=True)
793
-
794
-
795
- class MaterializedView(SqlResource, BaseTypedResource, Generic[T]):
796
- """Represents a ClickHouse Materialized View.
797
-
798
- Encapsulates the MATERIALIZED VIEW definition and the underlying target `OlapTable`
799
- that stores the data.
800
-
801
- Args:
802
- options: Configuration defining the select statement, names, and dependencies.
803
- t: The Pydantic model defining the schema of the target table
804
- (passed via `MaterializedView[MyModel](...)`).
805
-
806
- Attributes:
807
- target_table (OlapTable[T]): The `OlapTable` instance storing the materialized data.
808
- config (MaterializedViewOptions): The configuration options used to create the view.
809
- name (str): The name of the MATERIALIZED VIEW object.
810
- model_type (type[T]): The Pydantic model associated with the target table.
811
- setup (list[str]): SQL commands to create the view and populate the target table.
812
- teardown (list[str]): SQL command to drop the view.
813
- pulls_data_from (list[SqlObject]): Source tables/views.
814
- pushes_data_to (list[SqlObject]): The target table.
815
- """
816
- target_table: OlapTable[T]
817
- config: MaterializedViewOptions
818
-
819
- def __init__(
820
- self,
821
- options: MaterializedViewOptions,
822
- **kwargs
823
- ):
824
- self._set_type(options.materialized_view_name, self._get_type(kwargs))
825
-
826
- setup = [
827
- f"CREATE MATERIALIZED VIEW IF NOT EXISTS {options.materialized_view_name} TO {options.table_name} AS {options.select_statement}",
828
- f"INSERT INTO {options.table_name} {options.select_statement}"
829
- ]
830
- teardown = [f"DROP VIEW IF EXISTS {options.materialized_view_name}"]
831
-
832
- target_table = OlapTable(
833
- name=options.table_name,
834
- config=OlapConfig(
835
- order_by_fields=options.order_by_fields or [],
836
- engine=options.engine
837
- ),
838
- t=self._t
839
- )
840
-
841
- super().__init__(
842
- options.materialized_view_name,
843
- setup,
844
- teardown,
845
- pulls_data_from=options.select_tables,
846
- pushes_data_to=[target_table],
847
- metadata=options.metadata
848
- )
849
-
850
- self.target_table = target_table
851
- self.config = options
852
-
853
-
854
- @dataclasses.dataclass
855
- class TaskConfig(Generic[T_none, U_none]):
856
- """Configuration for a Task.
857
-
858
- Attributes:
859
- run: The handler function that executes the task logic.
860
- on_complete: Optional list of tasks to run after this task completes.
861
- timeout: Optional timeout string (e.g. "5m", "1h").
862
- retries: Optional number of retry attempts.
863
- """
864
- run: TaskRunFunc[T_none, U_none]
865
- on_complete: Optional[list["Task[U_none, Any]"]] = None
866
- timeout: Optional[str] = None
867
- retries: Optional[int] = None
868
-
869
-
870
- class Task(TypedMooseResource, Generic[T_none, U_none]):
871
- """Represents a task that can be executed as part of a workflow.
872
-
873
- Tasks are the basic unit of work in a workflow, with typed input and output.
874
- They can be chained together using the on_complete configuration.
875
-
876
- Args:
877
- name: The name of the task.
878
- config: Configuration specifying the task's behavior.
879
- t: The Pydantic model defining the task's input schema
880
- (passed via `Task[InputModel, OutputModel](...)`).
881
- OutputModel can be None for tasks that don't return a value.
882
-
883
- Attributes:
884
- config (TaskConfig[T, U]): The configuration for this task.
885
- columns (Columns[T]): Helper for accessing input field names safely.
886
- name (str): The name of the task.
887
- model_type (type[T]): The Pydantic model associated with this task's input.
888
- """
889
- config: TaskConfig[T_none, U_none]
890
-
891
- def __init__(self, name: str, config: TaskConfig[T_none, U_none], **kwargs):
892
- super().__init__()
893
- self._set_type(name, self._get_type(kwargs))
894
- self.config = config
895
-
896
- @classmethod
897
- def _get_type(cls, keyword_args: dict):
898
- t = keyword_args.get('t')
899
- if t is None:
900
- raise ValueError(f"Use `{cls.__name__}[T, U](name='...')` to supply both input and output types")
901
- if not isinstance(t, tuple) or len(t) != 2:
902
- raise ValueError(f"Use `{cls.__name__}[T, U](name='...')` to supply both input and output types")
903
-
904
- input_type, output_type = t
905
- if input_type is not None and (not isinstance(input_type, type) or not issubclass(input_type, BaseModel)):
906
- raise ValueError(f"Input type {input_type} is not a Pydantic model or None")
907
- if output_type is not None and (not isinstance(output_type, type) or not issubclass(output_type, BaseModel)):
908
- raise ValueError(f"Output type {output_type} is not a Pydantic model or None")
909
- return t
910
-
911
- def _set_type(self, name: str, t: tuple[type[T_none], type[U_none]]):
912
- input_type, output_type = t
913
- self._t = input_type
914
- self._u = output_type
915
- self.name = name
916
-
917
-
918
- @dataclasses.dataclass
919
- class WorkflowConfig:
920
- """Configuration for a workflow.
921
-
922
- Attributes:
923
- starting_task: The first task to execute in the workflow.
924
- retries: Optional number of retry attempts for the entire workflow.
925
- timeout: Optional timeout string for the entire workflow.
926
- schedule: Optional cron-like schedule string for recurring execution.
927
- """
928
- starting_task: Task[Any, Any]
929
- retries: Optional[int] = None
930
- timeout: Optional[str] = None
931
- schedule: Optional[str] = None
932
-
933
-
934
- class Workflow:
935
- """Represents a workflow composed of one or more tasks.
936
-
937
- Workflows define a sequence of tasks to be executed, with optional
938
- scheduling, retries, and timeouts at the workflow level.
939
-
940
- Args:
941
- name: The name of the workflow.
942
- config: Configuration specifying the workflow's behavior.
943
-
944
- Attributes:
945
- name (str): The name of the workflow.
946
- config (WorkflowConfig): The configuration for this workflow.
947
- """
948
- def __init__(self, name: str, config: WorkflowConfig):
949
- self.name = name
950
- self.config = config
951
- # Register the workflow in the internal registry
952
- _workflows[name] = self
953
-
954
- def get_task_names(self) -> list[str]:
955
- """Get a list of all task names in this workflow.
956
-
957
- Returns:
958
- list[str]: List of task names in the workflow, including all child tasks
959
- """
960
- def collect_task_names(task: Task) -> list[str]:
961
- names = [task.name]
962
- if task.config.on_complete:
963
- for child in task.config.on_complete:
964
- names.extend(collect_task_names(child))
965
- return names
966
-
967
- return collect_task_names(self.config.starting_task)
968
-
969
- def get_task(self, task_name: str) -> Optional[Task]:
970
- """Find a task in this workflow by name.
971
-
972
- Args:
973
- task_name: The name of the task to find
974
-
975
- Returns:
976
- Optional[Task]: The task if found, None otherwise
977
- """
978
- def find_task(task: Task) -> Optional[Task]:
979
- if task.name == task_name:
980
- return task
981
- if task.config.on_complete:
982
- for child in task.config.on_complete:
983
- found = find_task(child)
984
- if found:
985
- return found
986
- return None
987
-
988
- return find_task(self.config.starting_task)
989
-
990
- def _get_workflows() -> dict[str, Workflow]:
991
- return _workflows
992
-
993
- def _get_workflow(name: str) -> Optional[Workflow]:
994
- return _workflows.get(name)