pytrilogy 0.0.1.102__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pytrilogy might be problematic. Click here for more details.
- pytrilogy-0.0.1.102.dist-info/LICENSE.md +19 -0
- pytrilogy-0.0.1.102.dist-info/METADATA +277 -0
- pytrilogy-0.0.1.102.dist-info/RECORD +77 -0
- pytrilogy-0.0.1.102.dist-info/WHEEL +5 -0
- pytrilogy-0.0.1.102.dist-info/entry_points.txt +2 -0
- pytrilogy-0.0.1.102.dist-info/top_level.txt +1 -0
- trilogy/__init__.py +8 -0
- trilogy/compiler.py +0 -0
- trilogy/constants.py +30 -0
- trilogy/core/__init__.py +0 -0
- trilogy/core/constants.py +3 -0
- trilogy/core/enums.py +270 -0
- trilogy/core/env_processor.py +33 -0
- trilogy/core/environment_helpers.py +156 -0
- trilogy/core/ergonomics.py +187 -0
- trilogy/core/exceptions.py +23 -0
- trilogy/core/functions.py +320 -0
- trilogy/core/graph_models.py +55 -0
- trilogy/core/internal.py +37 -0
- trilogy/core/models.py +3145 -0
- trilogy/core/processing/__init__.py +0 -0
- trilogy/core/processing/concept_strategies_v3.py +603 -0
- trilogy/core/processing/graph_utils.py +44 -0
- trilogy/core/processing/node_generators/__init__.py +25 -0
- trilogy/core/processing/node_generators/basic_node.py +71 -0
- trilogy/core/processing/node_generators/common.py +239 -0
- trilogy/core/processing/node_generators/concept_merge.py +152 -0
- trilogy/core/processing/node_generators/filter_node.py +83 -0
- trilogy/core/processing/node_generators/group_node.py +92 -0
- trilogy/core/processing/node_generators/group_to_node.py +99 -0
- trilogy/core/processing/node_generators/merge_node.py +148 -0
- trilogy/core/processing/node_generators/multiselect_node.py +189 -0
- trilogy/core/processing/node_generators/rowset_node.py +130 -0
- trilogy/core/processing/node_generators/select_node.py +328 -0
- trilogy/core/processing/node_generators/unnest_node.py +37 -0
- trilogy/core/processing/node_generators/window_node.py +85 -0
- trilogy/core/processing/nodes/__init__.py +76 -0
- trilogy/core/processing/nodes/base_node.py +251 -0
- trilogy/core/processing/nodes/filter_node.py +49 -0
- trilogy/core/processing/nodes/group_node.py +110 -0
- trilogy/core/processing/nodes/merge_node.py +326 -0
- trilogy/core/processing/nodes/select_node_v2.py +198 -0
- trilogy/core/processing/nodes/unnest_node.py +54 -0
- trilogy/core/processing/nodes/window_node.py +34 -0
- trilogy/core/processing/utility.py +278 -0
- trilogy/core/query_processor.py +331 -0
- trilogy/dialect/__init__.py +0 -0
- trilogy/dialect/base.py +679 -0
- trilogy/dialect/bigquery.py +80 -0
- trilogy/dialect/common.py +43 -0
- trilogy/dialect/config.py +55 -0
- trilogy/dialect/duckdb.py +83 -0
- trilogy/dialect/enums.py +95 -0
- trilogy/dialect/postgres.py +86 -0
- trilogy/dialect/presto.py +82 -0
- trilogy/dialect/snowflake.py +82 -0
- trilogy/dialect/sql_server.py +89 -0
- trilogy/docs/__init__.py +0 -0
- trilogy/engine.py +48 -0
- trilogy/executor.py +242 -0
- trilogy/hooks/__init__.py +0 -0
- trilogy/hooks/base_hook.py +37 -0
- trilogy/hooks/graph_hook.py +24 -0
- trilogy/hooks/query_debugger.py +133 -0
- trilogy/metadata/__init__.py +0 -0
- trilogy/parser.py +10 -0
- trilogy/parsing/__init__.py +0 -0
- trilogy/parsing/common.py +176 -0
- trilogy/parsing/config.py +5 -0
- trilogy/parsing/exceptions.py +2 -0
- trilogy/parsing/helpers.py +1 -0
- trilogy/parsing/parse_engine.py +1951 -0
- trilogy/parsing/render.py +483 -0
- trilogy/py.typed +0 -0
- trilogy/scripts/__init__.py +0 -0
- trilogy/scripts/trilogy.py +127 -0
- trilogy/utility.py +31 -0
trilogy/core/models.py
ADDED
|
@@ -0,0 +1,3145 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
import difflib
|
|
3
|
+
import os
|
|
4
|
+
from enum import Enum
|
|
5
|
+
from typing import (
|
|
6
|
+
Dict,
|
|
7
|
+
TypeVar,
|
|
8
|
+
List,
|
|
9
|
+
Optional,
|
|
10
|
+
Union,
|
|
11
|
+
Set,
|
|
12
|
+
Any,
|
|
13
|
+
Sequence,
|
|
14
|
+
ValuesView,
|
|
15
|
+
Callable,
|
|
16
|
+
Annotated,
|
|
17
|
+
get_args,
|
|
18
|
+
Generic,
|
|
19
|
+
Tuple,
|
|
20
|
+
Type,
|
|
21
|
+
ItemsView,
|
|
22
|
+
)
|
|
23
|
+
from pydantic_core import core_schema
|
|
24
|
+
from pydantic.functional_validators import PlainValidator
|
|
25
|
+
from pydantic import (
|
|
26
|
+
BaseModel,
|
|
27
|
+
Field,
|
|
28
|
+
ConfigDict,
|
|
29
|
+
field_validator,
|
|
30
|
+
ValidationInfo,
|
|
31
|
+
ValidatorFunctionWrapHandler,
|
|
32
|
+
computed_field,
|
|
33
|
+
)
|
|
34
|
+
from lark.tree import Meta
|
|
35
|
+
from pathlib import Path
|
|
36
|
+
from trilogy.constants import logger, DEFAULT_NAMESPACE, ENV_CACHE_NAME, MagicConstants
|
|
37
|
+
from trilogy.core.constants import ALL_ROWS_CONCEPT, INTERNAL_NAMESPACE
|
|
38
|
+
from trilogy.core.enums import (
|
|
39
|
+
InfiniteFunctionArgs,
|
|
40
|
+
Purpose,
|
|
41
|
+
JoinType,
|
|
42
|
+
Ordering,
|
|
43
|
+
Modifier,
|
|
44
|
+
FunctionType,
|
|
45
|
+
FunctionClass,
|
|
46
|
+
BooleanOperator,
|
|
47
|
+
ComparisonOperator,
|
|
48
|
+
WindowOrder,
|
|
49
|
+
PurposeLineage,
|
|
50
|
+
SourceType,
|
|
51
|
+
WindowType,
|
|
52
|
+
ConceptSource,
|
|
53
|
+
DatePart,
|
|
54
|
+
ShowCategory,
|
|
55
|
+
Granularity,
|
|
56
|
+
)
|
|
57
|
+
from trilogy.core.exceptions import UndefinedConceptException, InvalidSyntaxException
|
|
58
|
+
from trilogy.utility import unique
|
|
59
|
+
from collections import UserList
|
|
60
|
+
from trilogy.utility import string_to_hash
|
|
61
|
+
from functools import cached_property
|
|
62
|
+
from abc import ABC
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
LOGGER_PREFIX = "[MODELS]"
|
|
66
|
+
|
|
67
|
+
KT = TypeVar("KT")
|
|
68
|
+
VT = TypeVar("VT")
|
|
69
|
+
LT = TypeVar("LT")
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def get_version():
|
|
73
|
+
from trilogy import __version__
|
|
74
|
+
|
|
75
|
+
return __version__
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def get_concept_arguments(expr) -> List["Concept"]:
|
|
79
|
+
output = []
|
|
80
|
+
if isinstance(expr, Concept):
|
|
81
|
+
output += [expr]
|
|
82
|
+
|
|
83
|
+
elif isinstance(
|
|
84
|
+
expr,
|
|
85
|
+
(
|
|
86
|
+
Comparison,
|
|
87
|
+
Conditional,
|
|
88
|
+
Function,
|
|
89
|
+
Parenthetical,
|
|
90
|
+
AggregateWrapper,
|
|
91
|
+
CaseWhen,
|
|
92
|
+
CaseElse,
|
|
93
|
+
),
|
|
94
|
+
):
|
|
95
|
+
output += expr.concept_arguments
|
|
96
|
+
return output
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
ALL_TYPES = Union["DataType", "MapType", "ListType", "StructType", "Concept"]
|
|
100
|
+
|
|
101
|
+
NAMESPACED_TYPES = Union[
|
|
102
|
+
"WindowItem",
|
|
103
|
+
"FilterItem",
|
|
104
|
+
"Conditional",
|
|
105
|
+
"Comparison",
|
|
106
|
+
"Concept",
|
|
107
|
+
"CaseWhen",
|
|
108
|
+
"CaseElse",
|
|
109
|
+
"Function",
|
|
110
|
+
"AggregateWrapper",
|
|
111
|
+
"Parenthetical",
|
|
112
|
+
]
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
class Namespaced(ABC):
|
|
116
|
+
pass
|
|
117
|
+
|
|
118
|
+
def with_namespace(self, namespace: str):
|
|
119
|
+
raise NotImplementedError
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
class SelectGrain(ABC):
|
|
123
|
+
pass
|
|
124
|
+
|
|
125
|
+
def with_select_grain(self, grain: Grain):
|
|
126
|
+
raise NotImplementedError
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
class DataType(Enum):
|
|
130
|
+
# PRIMITIVES
|
|
131
|
+
STRING = "string"
|
|
132
|
+
BOOL = "bool"
|
|
133
|
+
MAP = "map"
|
|
134
|
+
LIST = "list"
|
|
135
|
+
NUMBER = "number"
|
|
136
|
+
FLOAT = "float"
|
|
137
|
+
NUMERIC = "numeric"
|
|
138
|
+
INTEGER = "int"
|
|
139
|
+
BIGINT = "bigint"
|
|
140
|
+
DATE = "date"
|
|
141
|
+
DATETIME = "datetime"
|
|
142
|
+
TIMESTAMP = "timestamp"
|
|
143
|
+
ARRAY = "array"
|
|
144
|
+
DATE_PART = "date_part"
|
|
145
|
+
STRUCT = "struct"
|
|
146
|
+
|
|
147
|
+
# GRANULAR
|
|
148
|
+
UNIX_SECONDS = "unix_seconds"
|
|
149
|
+
|
|
150
|
+
# PARSING
|
|
151
|
+
UNKNOWN = "unknown"
|
|
152
|
+
|
|
153
|
+
@property
|
|
154
|
+
def data_type(self):
|
|
155
|
+
return self
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
class ListType(BaseModel):
|
|
159
|
+
model_config = ConfigDict(frozen=True)
|
|
160
|
+
type: ALL_TYPES
|
|
161
|
+
|
|
162
|
+
def __str__(self) -> str:
|
|
163
|
+
return f"ListType<{self.type}>"
|
|
164
|
+
|
|
165
|
+
@property
|
|
166
|
+
def data_type(self):
|
|
167
|
+
return DataType.LIST
|
|
168
|
+
|
|
169
|
+
@property
|
|
170
|
+
def value(self):
|
|
171
|
+
return self.data_type.value
|
|
172
|
+
|
|
173
|
+
@property
|
|
174
|
+
def value_data_type(self) -> DataType | StructType | MapType | ListType:
|
|
175
|
+
if isinstance(self.type, Concept):
|
|
176
|
+
return self.type.datatype
|
|
177
|
+
return self.type
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
class MapType(BaseModel):
|
|
181
|
+
key_type: DataType
|
|
182
|
+
content_type: ALL_TYPES
|
|
183
|
+
|
|
184
|
+
@property
|
|
185
|
+
def data_type(self):
|
|
186
|
+
return DataType.MAP
|
|
187
|
+
|
|
188
|
+
@property
|
|
189
|
+
def value(self):
|
|
190
|
+
return self.data_type.value
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
class StructType(BaseModel):
|
|
194
|
+
fields: List[ALL_TYPES]
|
|
195
|
+
fields_map: Dict[str, Concept] = Field(default_factory=dict)
|
|
196
|
+
|
|
197
|
+
@property
|
|
198
|
+
def data_type(self):
|
|
199
|
+
return DataType.STRUCT
|
|
200
|
+
|
|
201
|
+
@property
|
|
202
|
+
def value(self):
|
|
203
|
+
return self.data_type.value
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
class ListWrapper(Generic[VT], UserList):
|
|
207
|
+
"""Used to distinguish parsed list objects from other lists"""
|
|
208
|
+
|
|
209
|
+
def __init__(self, *args, type: DataType, **kwargs):
|
|
210
|
+
super().__init__(*args, **kwargs)
|
|
211
|
+
self.type = type
|
|
212
|
+
|
|
213
|
+
@classmethod
|
|
214
|
+
def __get_pydantic_core_schema__(
|
|
215
|
+
cls, source_type: Any, handler: Callable[[Any], core_schema.CoreSchema]
|
|
216
|
+
) -> core_schema.CoreSchema:
|
|
217
|
+
args = get_args(source_type)
|
|
218
|
+
if args:
|
|
219
|
+
schema = handler(List[args]) # type: ignore
|
|
220
|
+
else:
|
|
221
|
+
schema = handler(List)
|
|
222
|
+
return core_schema.no_info_after_validator_function(cls.validate, schema)
|
|
223
|
+
|
|
224
|
+
@classmethod
|
|
225
|
+
def validate(cls, v):
|
|
226
|
+
return cls(v, type=arg_to_datatype(v[0]))
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
class Metadata(BaseModel):
|
|
230
|
+
"""Metadata container object.
|
|
231
|
+
TODO: support arbitrary tags"""
|
|
232
|
+
|
|
233
|
+
description: Optional[str] = None
|
|
234
|
+
line_number: Optional[int] = None
|
|
235
|
+
concept_source: ConceptSource = ConceptSource.MANUAL
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def lineage_validator(
|
|
239
|
+
v: Any, handler: ValidatorFunctionWrapHandler, info: ValidationInfo
|
|
240
|
+
) -> Union[Function, WindowItem, FilterItem, AggregateWrapper]:
|
|
241
|
+
if v and not isinstance(v, (Function, WindowItem, FilterItem, AggregateWrapper)):
|
|
242
|
+
raise ValueError(v)
|
|
243
|
+
return v
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def empty_grain() -> Grain:
|
|
247
|
+
return Grain(components=[])
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
class Concept(Namespaced, SelectGrain, BaseModel):
|
|
251
|
+
name: str
|
|
252
|
+
datatype: DataType | ListType | StructType | MapType
|
|
253
|
+
purpose: Purpose
|
|
254
|
+
metadata: Optional[Metadata] = Field(
|
|
255
|
+
default_factory=lambda: Metadata(description=None, line_number=None),
|
|
256
|
+
validate_default=True,
|
|
257
|
+
)
|
|
258
|
+
lineage: Optional[
|
|
259
|
+
Union[
|
|
260
|
+
Function,
|
|
261
|
+
WindowItem,
|
|
262
|
+
FilterItem,
|
|
263
|
+
AggregateWrapper,
|
|
264
|
+
RowsetItem,
|
|
265
|
+
MultiSelectStatement | MergeStatement,
|
|
266
|
+
]
|
|
267
|
+
] = None
|
|
268
|
+
# lineage: Annotated[Optional[
|
|
269
|
+
# Union[Function, WindowItem, FilterItem, AggregateWrapper]
|
|
270
|
+
# ], WrapValidator(lineage_validator)] = None
|
|
271
|
+
namespace: Optional[str] = Field(default=DEFAULT_NAMESPACE, validate_default=True)
|
|
272
|
+
keys: Optional[Tuple["Concept", ...]] = None
|
|
273
|
+
grain: "Grain" = Field(default=None, validate_default=True)
|
|
274
|
+
modifiers: Optional[List[Modifier]] = Field(default_factory=list)
|
|
275
|
+
|
|
276
|
+
def __hash__(self):
|
|
277
|
+
return hash(str(self))
|
|
278
|
+
|
|
279
|
+
@field_validator("keys", mode="before")
|
|
280
|
+
@classmethod
|
|
281
|
+
def keys_validator(cls, v, info: ValidationInfo):
|
|
282
|
+
if v is None:
|
|
283
|
+
return v
|
|
284
|
+
if not isinstance(v, (list, tuple)):
|
|
285
|
+
raise ValueError(f"Keys must be a list or tuple, got {type(v)}")
|
|
286
|
+
if isinstance(v, list):
|
|
287
|
+
return tuple(v)
|
|
288
|
+
return v
|
|
289
|
+
|
|
290
|
+
@field_validator("namespace", mode="plain")
|
|
291
|
+
@classmethod
|
|
292
|
+
def namespace_validation(cls, v):
|
|
293
|
+
return v or DEFAULT_NAMESPACE
|
|
294
|
+
|
|
295
|
+
@field_validator("metadata")
|
|
296
|
+
@classmethod
|
|
297
|
+
def metadata_validation(cls, v):
|
|
298
|
+
v = v or Metadata()
|
|
299
|
+
return v
|
|
300
|
+
|
|
301
|
+
@field_validator("purpose", mode="after")
|
|
302
|
+
@classmethod
|
|
303
|
+
def purpose_validation(cls, v):
|
|
304
|
+
if v == Purpose.AUTO:
|
|
305
|
+
raise ValueError("Cannot set purpose to AUTO")
|
|
306
|
+
return v
|
|
307
|
+
|
|
308
|
+
@field_validator("grain", mode="before")
|
|
309
|
+
@classmethod
|
|
310
|
+
def parse_grain(cls, v, info: ValidationInfo) -> Grain:
|
|
311
|
+
# this is silly - rethink how we do grains
|
|
312
|
+
values = info.data
|
|
313
|
+
if not v and values.get("purpose", None) == Purpose.KEY:
|
|
314
|
+
v = Grain(
|
|
315
|
+
components=[
|
|
316
|
+
Concept(
|
|
317
|
+
namespace=values.get("namespace", DEFAULT_NAMESPACE),
|
|
318
|
+
name=values["name"],
|
|
319
|
+
datatype=values["datatype"],
|
|
320
|
+
purpose=values["purpose"],
|
|
321
|
+
grain=Grain(),
|
|
322
|
+
)
|
|
323
|
+
]
|
|
324
|
+
)
|
|
325
|
+
elif (
|
|
326
|
+
"lineage" in values
|
|
327
|
+
and isinstance(values["lineage"], AggregateWrapper)
|
|
328
|
+
and values["lineage"].by
|
|
329
|
+
):
|
|
330
|
+
v = Grain(components=values["lineage"].by)
|
|
331
|
+
elif not v:
|
|
332
|
+
v = Grain(components=[])
|
|
333
|
+
elif isinstance(v, Concept):
|
|
334
|
+
v = Grain(components=[v])
|
|
335
|
+
if not v:
|
|
336
|
+
raise SyntaxError(f"Invalid grain {v} for concept {values['name']}")
|
|
337
|
+
return v
|
|
338
|
+
|
|
339
|
+
def __eq__(self, other: object):
|
|
340
|
+
if isinstance(other, str):
|
|
341
|
+
if self.address == str:
|
|
342
|
+
return True
|
|
343
|
+
if not isinstance(other, Concept):
|
|
344
|
+
return False
|
|
345
|
+
return (
|
|
346
|
+
self.name == other.name
|
|
347
|
+
and self.datatype == other.datatype
|
|
348
|
+
and self.purpose == other.purpose
|
|
349
|
+
and self.namespace == other.namespace
|
|
350
|
+
and self.grain == other.grain
|
|
351
|
+
# and self.keys == other.keys
|
|
352
|
+
)
|
|
353
|
+
|
|
354
|
+
def __str__(self):
|
|
355
|
+
grain = ",".join([str(c.address) for c in self.grain.components])
|
|
356
|
+
return f"{self.namespace}.{self.name}<{grain}>"
|
|
357
|
+
|
|
358
|
+
@property
|
|
359
|
+
def address(self) -> str:
|
|
360
|
+
return f"{self.namespace}.{self.name}"
|
|
361
|
+
|
|
362
|
+
@property
|
|
363
|
+
def output(self) -> "Concept":
|
|
364
|
+
return self
|
|
365
|
+
|
|
366
|
+
@property
|
|
367
|
+
def safe_address(self) -> str:
|
|
368
|
+
if self.namespace == DEFAULT_NAMESPACE:
|
|
369
|
+
return self.name.replace(".", "_")
|
|
370
|
+
elif self.namespace:
|
|
371
|
+
return f"{self.namespace.replace('.','_')}_{self.name.replace('.','_')}"
|
|
372
|
+
return self.name.replace(".", "_")
|
|
373
|
+
|
|
374
|
+
@property
|
|
375
|
+
def grain_components(self) -> List["Concept"]:
|
|
376
|
+
return self.grain.components_copy if self.grain else []
|
|
377
|
+
|
|
378
|
+
def with_namespace(self, namespace: str) -> "Concept":
|
|
379
|
+
return self.__class__(
|
|
380
|
+
name=self.name,
|
|
381
|
+
datatype=self.datatype,
|
|
382
|
+
purpose=self.purpose,
|
|
383
|
+
metadata=self.metadata,
|
|
384
|
+
lineage=self.lineage.with_namespace(namespace) if self.lineage else None,
|
|
385
|
+
grain=(
|
|
386
|
+
self.grain.with_namespace(namespace)
|
|
387
|
+
if self.grain
|
|
388
|
+
else Grain(components=[])
|
|
389
|
+
),
|
|
390
|
+
namespace=(
|
|
391
|
+
namespace + "." + self.namespace
|
|
392
|
+
if self.namespace
|
|
393
|
+
and self.namespace != DEFAULT_NAMESPACE
|
|
394
|
+
and self.namespace != namespace
|
|
395
|
+
else namespace
|
|
396
|
+
),
|
|
397
|
+
keys=(
|
|
398
|
+
tuple([x.with_namespace(namespace) for x in self.keys])
|
|
399
|
+
if self.keys
|
|
400
|
+
else None
|
|
401
|
+
),
|
|
402
|
+
modifiers=self.modifiers,
|
|
403
|
+
)
|
|
404
|
+
|
|
405
|
+
def with_select_grain(self, grain: Optional["Grain"] = None) -> "Concept":
|
|
406
|
+
if not all([isinstance(x, Concept) for x in self.keys or []]):
|
|
407
|
+
raise ValueError(f"Invalid keys {self.keys} for concept {self.address}")
|
|
408
|
+
new_grain = grain or self.grain
|
|
409
|
+
new_lineage = self.lineage
|
|
410
|
+
if isinstance(self.lineage, SelectGrain):
|
|
411
|
+
new_lineage = self.lineage.with_select_grain(new_grain)
|
|
412
|
+
return self.__class__(
|
|
413
|
+
name=self.name,
|
|
414
|
+
datatype=self.datatype,
|
|
415
|
+
purpose=self.purpose,
|
|
416
|
+
metadata=self.metadata,
|
|
417
|
+
lineage=new_lineage,
|
|
418
|
+
grain=new_grain,
|
|
419
|
+
namespace=self.namespace,
|
|
420
|
+
keys=self.keys,
|
|
421
|
+
modifiers=self.modifiers,
|
|
422
|
+
)
|
|
423
|
+
|
|
424
|
+
def with_grain(self, grain: Optional["Grain"] = None) -> "Concept":
|
|
425
|
+
if not all([isinstance(x, Concept) for x in self.keys or []]):
|
|
426
|
+
raise ValueError(f"Invalid keys {self.keys} for concept {self.address}")
|
|
427
|
+
return self.__class__(
|
|
428
|
+
name=self.name,
|
|
429
|
+
datatype=self.datatype,
|
|
430
|
+
purpose=self.purpose,
|
|
431
|
+
metadata=self.metadata,
|
|
432
|
+
lineage=self.lineage,
|
|
433
|
+
grain=grain if grain else Grain(components=[]),
|
|
434
|
+
namespace=self.namespace,
|
|
435
|
+
keys=self.keys,
|
|
436
|
+
modifiers=self.modifiers,
|
|
437
|
+
)
|
|
438
|
+
|
|
439
|
+
def with_default_grain(self) -> "Concept":
|
|
440
|
+
if self.purpose == Purpose.KEY:
|
|
441
|
+
# we need to make this abstract
|
|
442
|
+
grain = Grain(components=[self.with_grain(Grain())], nested=True)
|
|
443
|
+
elif self.purpose == Purpose.PROPERTY:
|
|
444
|
+
components = []
|
|
445
|
+
if self.keys:
|
|
446
|
+
components = [*self.keys]
|
|
447
|
+
if self.lineage:
|
|
448
|
+
for item in self.lineage.arguments:
|
|
449
|
+
if isinstance(item, Concept):
|
|
450
|
+
if item.keys and not all(c in components for c in item.keys):
|
|
451
|
+
components += item.sources
|
|
452
|
+
else:
|
|
453
|
+
components += item.sources
|
|
454
|
+
grain = Grain(components=components)
|
|
455
|
+
elif self.purpose == Purpose.METRIC:
|
|
456
|
+
grain = Grain()
|
|
457
|
+
elif self.purpose == Purpose.CONSTANT:
|
|
458
|
+
if self.derivation != PurposeLineage.CONSTANT:
|
|
459
|
+
grain = Grain(components=[self.with_grain(Grain())], nested=True)
|
|
460
|
+
else:
|
|
461
|
+
grain = self.grain
|
|
462
|
+
else:
|
|
463
|
+
grain = self.grain # type: ignore
|
|
464
|
+
return self.__class__(
|
|
465
|
+
name=self.name,
|
|
466
|
+
datatype=self.datatype,
|
|
467
|
+
purpose=self.purpose,
|
|
468
|
+
metadata=self.metadata,
|
|
469
|
+
lineage=self.lineage,
|
|
470
|
+
grain=grain,
|
|
471
|
+
keys=self.keys,
|
|
472
|
+
namespace=self.namespace,
|
|
473
|
+
modifiers=self.modifiers,
|
|
474
|
+
)
|
|
475
|
+
|
|
476
|
+
@property
|
|
477
|
+
def sources(self) -> List["Concept"]:
|
|
478
|
+
if self.lineage:
|
|
479
|
+
output = []
|
|
480
|
+
for item in self.lineage.arguments:
|
|
481
|
+
if isinstance(item, Concept):
|
|
482
|
+
if item.address == self.address:
|
|
483
|
+
raise SyntaxError(f"Concept {self.address} references itself")
|
|
484
|
+
output.append(item)
|
|
485
|
+
output += item.sources
|
|
486
|
+
return output
|
|
487
|
+
return []
|
|
488
|
+
|
|
489
|
+
@property
|
|
490
|
+
def concept_arguments(self) -> List[Concept]:
|
|
491
|
+
return self.lineage.concept_arguments if self.lineage else []
|
|
492
|
+
|
|
493
|
+
@property
|
|
494
|
+
def input(self):
|
|
495
|
+
return [self] + self.sources
|
|
496
|
+
|
|
497
|
+
@property
|
|
498
|
+
def derivation(self) -> PurposeLineage:
|
|
499
|
+
if self.lineage and isinstance(self.lineage, WindowItem):
|
|
500
|
+
return PurposeLineage.WINDOW
|
|
501
|
+
elif self.lineage and isinstance(self.lineage, FilterItem):
|
|
502
|
+
return PurposeLineage.FILTER
|
|
503
|
+
elif self.lineage and isinstance(self.lineage, AggregateWrapper):
|
|
504
|
+
return PurposeLineage.AGGREGATE
|
|
505
|
+
elif self.lineage and isinstance(self.lineage, RowsetItem):
|
|
506
|
+
return PurposeLineage.ROWSET
|
|
507
|
+
elif self.lineage and isinstance(self.lineage, MultiSelectStatement):
|
|
508
|
+
return PurposeLineage.MULTISELECT
|
|
509
|
+
elif self.lineage and isinstance(self.lineage, MergeStatement):
|
|
510
|
+
return PurposeLineage.MERGE
|
|
511
|
+
elif (
|
|
512
|
+
self.lineage
|
|
513
|
+
and isinstance(self.lineage, Function)
|
|
514
|
+
and self.lineage.operator in FunctionClass.AGGREGATE_FUNCTIONS.value
|
|
515
|
+
):
|
|
516
|
+
return PurposeLineage.AGGREGATE
|
|
517
|
+
elif (
|
|
518
|
+
self.lineage
|
|
519
|
+
and isinstance(self.lineage, Function)
|
|
520
|
+
and self.lineage.operator == FunctionType.UNNEST
|
|
521
|
+
):
|
|
522
|
+
return PurposeLineage.UNNEST
|
|
523
|
+
elif (
|
|
524
|
+
self.lineage
|
|
525
|
+
and isinstance(self.lineage, Function)
|
|
526
|
+
and self.lineage.operator in FunctionClass.SINGLE_ROW.value
|
|
527
|
+
):
|
|
528
|
+
return PurposeLineage.CONSTANT
|
|
529
|
+
|
|
530
|
+
elif self.lineage and isinstance(self.lineage, Function):
|
|
531
|
+
if not self.lineage.concept_arguments:
|
|
532
|
+
return PurposeLineage.CONSTANT
|
|
533
|
+
return PurposeLineage.BASIC
|
|
534
|
+
elif self.purpose == Purpose.CONSTANT:
|
|
535
|
+
return PurposeLineage.CONSTANT
|
|
536
|
+
return PurposeLineage.ROOT
|
|
537
|
+
|
|
538
|
+
@property
|
|
539
|
+
def granularity(self) -> Granularity:
|
|
540
|
+
""" "used to determine if concepts need to be included in grain
|
|
541
|
+
calculations"""
|
|
542
|
+
if self.derivation == PurposeLineage.CONSTANT:
|
|
543
|
+
# constants are a single row
|
|
544
|
+
return Granularity.SINGLE_ROW
|
|
545
|
+
elif self.derivation == PurposeLineage.AGGREGATE:
|
|
546
|
+
# if it's an aggregate grouped over all rows
|
|
547
|
+
# there is only one row left and it's fine to cross_join
|
|
548
|
+
if all([x.name == ALL_ROWS_CONCEPT for x in self.grain.components]):
|
|
549
|
+
return Granularity.SINGLE_ROW
|
|
550
|
+
elif self.namespace == INTERNAL_NAMESPACE and self.name == ALL_ROWS_CONCEPT:
|
|
551
|
+
return Granularity.SINGLE_ROW
|
|
552
|
+
elif (
|
|
553
|
+
self.lineage
|
|
554
|
+
and isinstance(self.lineage, Function)
|
|
555
|
+
and self.lineage.operator == FunctionType.UNNEST
|
|
556
|
+
):
|
|
557
|
+
return Granularity.MULTI_ROW
|
|
558
|
+
elif self.lineage and all(
|
|
559
|
+
[
|
|
560
|
+
x.granularity == Granularity.SINGLE_ROW
|
|
561
|
+
for x in self.lineage.concept_arguments
|
|
562
|
+
]
|
|
563
|
+
):
|
|
564
|
+
return Granularity.SINGLE_ROW
|
|
565
|
+
return Granularity.MULTI_ROW
|
|
566
|
+
|
|
567
|
+
|
|
568
|
+
class Grain(BaseModel):
|
|
569
|
+
nested: bool = False
|
|
570
|
+
components: List[Concept] = Field(default_factory=list, validate_default=True)
|
|
571
|
+
|
|
572
|
+
@field_validator("components")
|
|
573
|
+
def component_validator(cls, v, info: ValidationInfo):
|
|
574
|
+
values = info.data
|
|
575
|
+
if not values.get("nested", False):
|
|
576
|
+
v2: List[Concept] = unique(
|
|
577
|
+
[safe_concept(c).with_default_grain() for c in v], "address"
|
|
578
|
+
)
|
|
579
|
+
else:
|
|
580
|
+
v2 = unique(v, "address")
|
|
581
|
+
final = []
|
|
582
|
+
for sub in v2:
|
|
583
|
+
if sub.purpose in (Purpose.PROPERTY, Purpose.METRIC) and sub.keys:
|
|
584
|
+
if all([c in v2 for c in sub.keys]):
|
|
585
|
+
continue
|
|
586
|
+
final.append(sub)
|
|
587
|
+
v2 = sorted(final, key=lambda x: x.name)
|
|
588
|
+
return v2
|
|
589
|
+
|
|
590
|
+
@property
|
|
591
|
+
def components_copy(self) -> List[Concept]:
|
|
592
|
+
return [*self.components]
|
|
593
|
+
|
|
594
|
+
def __str__(self):
|
|
595
|
+
if self.abstract:
|
|
596
|
+
return (
|
|
597
|
+
"Grain<Abstract" + ",".join([c.address for c in self.components]) + ">"
|
|
598
|
+
)
|
|
599
|
+
return "Grain<" + ",".join([c.address for c in self.components]) + ">"
|
|
600
|
+
|
|
601
|
+
def with_namespace(self, namespace: str) -> "Grain":
|
|
602
|
+
return Grain(
|
|
603
|
+
components=[c.with_namespace(namespace) for c in self.components],
|
|
604
|
+
nested=self.nested,
|
|
605
|
+
)
|
|
606
|
+
|
|
607
|
+
@property
|
|
608
|
+
def abstract(self):
|
|
609
|
+
return not self.components or all(
|
|
610
|
+
[c.name == ALL_ROWS_CONCEPT for c in self.components]
|
|
611
|
+
)
|
|
612
|
+
|
|
613
|
+
@property
|
|
614
|
+
def set(self):
|
|
615
|
+
return set([c.address for c in self.components_copy])
|
|
616
|
+
|
|
617
|
+
def __eq__(self, other: object):
|
|
618
|
+
if isinstance(other, list):
|
|
619
|
+
return self.set == set([c.address for c in other])
|
|
620
|
+
if not isinstance(other, Grain):
|
|
621
|
+
return False
|
|
622
|
+
return self.set == other.set
|
|
623
|
+
|
|
624
|
+
def issubset(self, other: "Grain"):
|
|
625
|
+
return self.set.issubset(other.set)
|
|
626
|
+
|
|
627
|
+
def union(self, other: "Grain"):
|
|
628
|
+
addresses = self.set.union(other.set)
|
|
629
|
+
|
|
630
|
+
return Grain(
|
|
631
|
+
components=[c for c in self.components if c.address in addresses]
|
|
632
|
+
+ [c for c in other.components if c.address in addresses]
|
|
633
|
+
)
|
|
634
|
+
|
|
635
|
+
def isdisjoint(self, other: "Grain"):
|
|
636
|
+
return self.set.isdisjoint(other.set)
|
|
637
|
+
|
|
638
|
+
def intersection(self, other: "Grain") -> "Grain":
|
|
639
|
+
intersection = self.set.intersection(other.set)
|
|
640
|
+
components = [i for i in self.components if i.address in intersection]
|
|
641
|
+
return Grain(components=components)
|
|
642
|
+
|
|
643
|
+
def __add__(self, other: "Grain") -> "Grain":
|
|
644
|
+
components: List[Concept] = []
|
|
645
|
+
for clist in [self.components_copy, other.components_copy]:
|
|
646
|
+
for component in clist:
|
|
647
|
+
if component.with_default_grain() in components:
|
|
648
|
+
continue
|
|
649
|
+
components.append(component.with_default_grain())
|
|
650
|
+
base_components = [c for c in components if c.purpose == Purpose.KEY]
|
|
651
|
+
for c in components:
|
|
652
|
+
if c.purpose == Purpose.PROPERTY and not any(
|
|
653
|
+
[key in base_components for key in (c.keys or [])]
|
|
654
|
+
):
|
|
655
|
+
base_components.append(c)
|
|
656
|
+
elif (
|
|
657
|
+
c.purpose == Purpose.CONSTANT
|
|
658
|
+
and not c.derivation == PurposeLineage.CONSTANT
|
|
659
|
+
):
|
|
660
|
+
base_components.append(c)
|
|
661
|
+
return Grain(components=base_components)
|
|
662
|
+
|
|
663
|
+
def __radd__(self, other) -> "Grain":
|
|
664
|
+
if other == 0:
|
|
665
|
+
return self
|
|
666
|
+
else:
|
|
667
|
+
return self.__add__(other)
|
|
668
|
+
|
|
669
|
+
|
|
670
|
+
class RawColumnExpr(BaseModel):
|
|
671
|
+
text: str
|
|
672
|
+
|
|
673
|
+
|
|
674
|
+
class ColumnAssignment(BaseModel):
|
|
675
|
+
alias: str | RawColumnExpr | Function
|
|
676
|
+
concept: Concept
|
|
677
|
+
modifiers: List[Modifier] = Field(default_factory=list)
|
|
678
|
+
|
|
679
|
+
@property
|
|
680
|
+
def is_complete(self) -> bool:
|
|
681
|
+
return Modifier.PARTIAL not in self.modifiers
|
|
682
|
+
|
|
683
|
+
def with_namespace(self, namespace: str) -> "ColumnAssignment":
|
|
684
|
+
return ColumnAssignment(
|
|
685
|
+
alias=(
|
|
686
|
+
self.alias.with_namespace(namespace)
|
|
687
|
+
if isinstance(self.alias, Function)
|
|
688
|
+
else self.alias
|
|
689
|
+
),
|
|
690
|
+
concept=self.concept.with_namespace(namespace),
|
|
691
|
+
modifiers=self.modifiers,
|
|
692
|
+
)
|
|
693
|
+
|
|
694
|
+
|
|
695
|
+
class Statement(BaseModel):
|
|
696
|
+
pass
|
|
697
|
+
|
|
698
|
+
|
|
699
|
+
class LooseConceptList(BaseModel):
|
|
700
|
+
concepts: List[Concept]
|
|
701
|
+
|
|
702
|
+
@cached_property
|
|
703
|
+
def addresses(self) -> set[str]:
|
|
704
|
+
return {s.address for s in self.concepts}
|
|
705
|
+
|
|
706
|
+
@classmethod
|
|
707
|
+
def validate(cls, v):
|
|
708
|
+
return cls(v)
|
|
709
|
+
|
|
710
|
+
def __str__(self) -> str:
|
|
711
|
+
return f"lcl{str(self.addresses)}"
|
|
712
|
+
|
|
713
|
+
def __iter__(self):
|
|
714
|
+
return iter(self.concepts)
|
|
715
|
+
|
|
716
|
+
def __eq__(self, other):
|
|
717
|
+
if not isinstance(other, LooseConceptList):
|
|
718
|
+
return False
|
|
719
|
+
return self.addresses == other.addresses
|
|
720
|
+
|
|
721
|
+
def issubset(self, other):
|
|
722
|
+
if not isinstance(other, LooseConceptList):
|
|
723
|
+
return False
|
|
724
|
+
return self.addresses.issubset(other.addresses)
|
|
725
|
+
|
|
726
|
+
def __contains__(self, other):
|
|
727
|
+
if isinstance(other, str):
|
|
728
|
+
return other in self.addresses
|
|
729
|
+
if not isinstance(other, Concept):
|
|
730
|
+
return False
|
|
731
|
+
return other.address in self.addresses
|
|
732
|
+
|
|
733
|
+
def difference(self, other):
|
|
734
|
+
if not isinstance(other, LooseConceptList):
|
|
735
|
+
return False
|
|
736
|
+
return self.addresses.difference(other.addresses)
|
|
737
|
+
|
|
738
|
+
def isdisjoint(self, other):
|
|
739
|
+
if not isinstance(other, LooseConceptList):
|
|
740
|
+
return False
|
|
741
|
+
return self.addresses.isdisjoint(other.addresses)
|
|
742
|
+
|
|
743
|
+
|
|
744
|
+
class Function(Namespaced, SelectGrain, BaseModel):
|
|
745
|
+
operator: FunctionType
|
|
746
|
+
arg_count: int = Field(default=1)
|
|
747
|
+
output_datatype: DataType | ListType | StructType | MapType
|
|
748
|
+
output_purpose: Purpose
|
|
749
|
+
valid_inputs: Optional[
|
|
750
|
+
Union[
|
|
751
|
+
Set[DataType | ListType | StructType],
|
|
752
|
+
List[Set[DataType | ListType | StructType]],
|
|
753
|
+
]
|
|
754
|
+
] = None
|
|
755
|
+
arguments: Sequence[
|
|
756
|
+
Union[
|
|
757
|
+
Concept,
|
|
758
|
+
"AggregateWrapper",
|
|
759
|
+
"Function",
|
|
760
|
+
int,
|
|
761
|
+
float,
|
|
762
|
+
str,
|
|
763
|
+
DataType,
|
|
764
|
+
ListType,
|
|
765
|
+
DatePart,
|
|
766
|
+
"Parenthetical",
|
|
767
|
+
CaseWhen,
|
|
768
|
+
"CaseElse",
|
|
769
|
+
ListWrapper[int],
|
|
770
|
+
ListWrapper[str],
|
|
771
|
+
ListWrapper[float],
|
|
772
|
+
]
|
|
773
|
+
]
|
|
774
|
+
|
|
775
|
+
def __str__(self):
|
|
776
|
+
return f'{self.operator.value}({",".join([str(a) for a in self.arguments])})'
|
|
777
|
+
|
|
778
|
+
@property
|
|
779
|
+
def datatype(self):
|
|
780
|
+
return self.output_datatype
|
|
781
|
+
|
|
782
|
+
def with_select_grain(self, grain: Grain) -> Function:
|
|
783
|
+
return Function(
|
|
784
|
+
operator=self.operator,
|
|
785
|
+
arguments=[
|
|
786
|
+
(
|
|
787
|
+
c.with_select_grain(grain)
|
|
788
|
+
if isinstance(
|
|
789
|
+
c,
|
|
790
|
+
SelectGrain,
|
|
791
|
+
)
|
|
792
|
+
else c
|
|
793
|
+
)
|
|
794
|
+
for c in self.arguments
|
|
795
|
+
],
|
|
796
|
+
output_datatype=self.output_datatype,
|
|
797
|
+
output_purpose=self.output_purpose,
|
|
798
|
+
valid_inputs=self.valid_inputs,
|
|
799
|
+
arg_count=self.arg_count,
|
|
800
|
+
)
|
|
801
|
+
|
|
802
|
+
@field_validator("arguments")
|
|
803
|
+
@classmethod
|
|
804
|
+
def parse_arguments(cls, v, info: ValidationInfo):
|
|
805
|
+
from trilogy.parsing.exceptions import ParseError
|
|
806
|
+
|
|
807
|
+
values = info.data
|
|
808
|
+
arg_count = len(v)
|
|
809
|
+
target_arg_count = values["arg_count"]
|
|
810
|
+
operator_name = values["operator"].name
|
|
811
|
+
# surface right error
|
|
812
|
+
if "valid_inputs" not in values:
|
|
813
|
+
return v
|
|
814
|
+
valid_inputs = values["valid_inputs"]
|
|
815
|
+
if not arg_count <= target_arg_count:
|
|
816
|
+
if target_arg_count != InfiniteFunctionArgs:
|
|
817
|
+
raise ParseError(
|
|
818
|
+
f"Incorrect argument count to {operator_name} function, expects"
|
|
819
|
+
f" {target_arg_count}, got {arg_count}"
|
|
820
|
+
)
|
|
821
|
+
# if all arguments can be any of the set type
|
|
822
|
+
# turn this into an array for validation
|
|
823
|
+
if isinstance(valid_inputs, set):
|
|
824
|
+
valid_inputs = [valid_inputs for _ in v]
|
|
825
|
+
elif not valid_inputs:
|
|
826
|
+
return v
|
|
827
|
+
for idx, arg in enumerate(v):
|
|
828
|
+
if (
|
|
829
|
+
isinstance(arg, Concept)
|
|
830
|
+
and arg.datatype.data_type not in valid_inputs[idx]
|
|
831
|
+
):
|
|
832
|
+
if arg.datatype != DataType.UNKNOWN:
|
|
833
|
+
raise TypeError(
|
|
834
|
+
f"Invalid input datatype {arg.datatype.data_type} passed into position {idx}"
|
|
835
|
+
f" for {operator_name} from concept {arg.name}, valid is {valid_inputs[idx]}"
|
|
836
|
+
)
|
|
837
|
+
if (
|
|
838
|
+
isinstance(arg, Function)
|
|
839
|
+
and arg.output_datatype not in valid_inputs[idx]
|
|
840
|
+
):
|
|
841
|
+
if arg.output_datatype != DataType.UNKNOWN:
|
|
842
|
+
raise TypeError(
|
|
843
|
+
f"Invalid input datatype {arg.output_datatype} passed into"
|
|
844
|
+
f" {operator_name} from function {arg.operator.name}"
|
|
845
|
+
)
|
|
846
|
+
# check constants
|
|
847
|
+
comparisons: List[Tuple[Type, DataType]] = [
|
|
848
|
+
(str, DataType.STRING),
|
|
849
|
+
(int, DataType.INTEGER),
|
|
850
|
+
(float, DataType.FLOAT),
|
|
851
|
+
(bool, DataType.BOOL),
|
|
852
|
+
(DatePart, DataType.DATE_PART),
|
|
853
|
+
]
|
|
854
|
+
for ptype, dtype in comparisons:
|
|
855
|
+
if isinstance(arg, ptype) and dtype in valid_inputs[idx]:
|
|
856
|
+
# attempt to exit early to avoid checking all types
|
|
857
|
+
break
|
|
858
|
+
elif isinstance(arg, ptype):
|
|
859
|
+
raise TypeError(
|
|
860
|
+
f"Invalid {dtype} constant passed into {operator_name} {arg}, expecting one of {valid_inputs[idx]}"
|
|
861
|
+
)
|
|
862
|
+
return v
|
|
863
|
+
|
|
864
|
+
def with_namespace(self, namespace: str) -> "Function":
|
|
865
|
+
return Function(
|
|
866
|
+
operator=self.operator,
|
|
867
|
+
arguments=[
|
|
868
|
+
(
|
|
869
|
+
c.with_namespace(namespace)
|
|
870
|
+
if isinstance(
|
|
871
|
+
c,
|
|
872
|
+
Namespaced,
|
|
873
|
+
)
|
|
874
|
+
else c
|
|
875
|
+
)
|
|
876
|
+
for c in self.arguments
|
|
877
|
+
],
|
|
878
|
+
output_datatype=self.output_datatype,
|
|
879
|
+
output_purpose=self.output_purpose,
|
|
880
|
+
valid_inputs=self.valid_inputs,
|
|
881
|
+
arg_count=self.arg_count,
|
|
882
|
+
)
|
|
883
|
+
|
|
884
|
+
@property
|
|
885
|
+
def concept_arguments(self) -> List[Concept]:
|
|
886
|
+
base = []
|
|
887
|
+
for arg in self.arguments:
|
|
888
|
+
base += get_concept_arguments(arg)
|
|
889
|
+
return base
|
|
890
|
+
|
|
891
|
+
@property
|
|
892
|
+
def output_grain(self):
|
|
893
|
+
# aggregates have an abstract grain
|
|
894
|
+
base_grain = Grain(components=[])
|
|
895
|
+
if self.operator in FunctionClass.AGGREGATE_FUNCTIONS.value:
|
|
896
|
+
return base_grain
|
|
897
|
+
# scalars have implicit grain of all arguments
|
|
898
|
+
for input in self.concept_arguments:
|
|
899
|
+
base_grain += input.grain
|
|
900
|
+
return base_grain
|
|
901
|
+
|
|
902
|
+
@property
|
|
903
|
+
def output_keys(self) -> list[Concept]:
|
|
904
|
+
# aggregates have an abstract grain
|
|
905
|
+
components = []
|
|
906
|
+
# scalars have implicit grain of all arguments
|
|
907
|
+
for input in self.concept_arguments:
|
|
908
|
+
if input.purpose == Purpose.KEY:
|
|
909
|
+
components.append(input)
|
|
910
|
+
elif input.keys:
|
|
911
|
+
components += input.keys
|
|
912
|
+
return list(set(components))
|
|
913
|
+
|
|
914
|
+
|
|
915
|
+
class ConceptTransform(Namespaced, BaseModel):
|
|
916
|
+
function: Function | FilterItem | WindowItem | AggregateWrapper
|
|
917
|
+
output: Concept
|
|
918
|
+
modifiers: List[Modifier] = Field(default_factory=list)
|
|
919
|
+
|
|
920
|
+
@property
|
|
921
|
+
def input(self) -> List[Concept]:
|
|
922
|
+
return [v for v in self.function.arguments if isinstance(v, Concept)]
|
|
923
|
+
|
|
924
|
+
def with_namespace(self, namespace: str) -> "ConceptTransform":
|
|
925
|
+
return ConceptTransform(
|
|
926
|
+
function=self.function.with_namespace(namespace),
|
|
927
|
+
output=self.output.with_namespace(namespace),
|
|
928
|
+
modifiers=self.modifiers,
|
|
929
|
+
)
|
|
930
|
+
|
|
931
|
+
def with_filter(self, where: "WhereClause") -> "ConceptTransform":
|
|
932
|
+
id_hash = string_to_hash(str(where))
|
|
933
|
+
new_parent_concept = Concept(
|
|
934
|
+
name=f"_anon_concept_transform_filter_input_{id_hash}",
|
|
935
|
+
datatype=self.output.datatype,
|
|
936
|
+
purpose=self.output.purpose,
|
|
937
|
+
lineage=self.output.lineage,
|
|
938
|
+
namespace=DEFAULT_NAMESPACE,
|
|
939
|
+
grain=self.output.grain,
|
|
940
|
+
keys=self.output.keys,
|
|
941
|
+
)
|
|
942
|
+
new_parent = FilterItem(content=new_parent_concept, where=where)
|
|
943
|
+
self.output.lineage = new_parent
|
|
944
|
+
return ConceptTransform(
|
|
945
|
+
function=new_parent, output=self.output, modifiers=self.modifiers
|
|
946
|
+
)
|
|
947
|
+
|
|
948
|
+
|
|
949
|
+
class Window(BaseModel):
|
|
950
|
+
count: int
|
|
951
|
+
window_order: WindowOrder
|
|
952
|
+
|
|
953
|
+
def __str__(self):
|
|
954
|
+
return f"Window<{self.window_order}>"
|
|
955
|
+
|
|
956
|
+
|
|
957
|
+
class WindowItemOver(BaseModel):
|
|
958
|
+
contents: List[Concept]
|
|
959
|
+
|
|
960
|
+
|
|
961
|
+
class WindowItemOrder(BaseModel):
|
|
962
|
+
contents: List["OrderItem"]
|
|
963
|
+
|
|
964
|
+
|
|
965
|
+
class WindowItem(Namespaced, SelectGrain, BaseModel):
|
|
966
|
+
type: WindowType
|
|
967
|
+
content: Concept
|
|
968
|
+
order_by: List["OrderItem"]
|
|
969
|
+
over: List["Concept"] = Field(default_factory=list)
|
|
970
|
+
|
|
971
|
+
def with_namespace(self, namespace: str) -> "WindowItem":
|
|
972
|
+
return WindowItem(
|
|
973
|
+
type=self.type,
|
|
974
|
+
content=self.content.with_namespace(namespace),
|
|
975
|
+
over=[x.with_namespace(namespace) for x in self.over],
|
|
976
|
+
order_by=[x.with_namespace(namespace) for x in self.order_by],
|
|
977
|
+
)
|
|
978
|
+
|
|
979
|
+
def with_select_grain(self, grain: Grain) -> "WindowItem":
|
|
980
|
+
return WindowItem(
|
|
981
|
+
type=self.type,
|
|
982
|
+
content=self.content.with_select_grain(grain),
|
|
983
|
+
over=[x.with_select_grain(grain) for x in self.over],
|
|
984
|
+
order_by=[x.with_select_grain(grain) for x in self.order_by],
|
|
985
|
+
)
|
|
986
|
+
|
|
987
|
+
@property
|
|
988
|
+
def concept_arguments(self) -> List[Concept]:
|
|
989
|
+
return self.arguments
|
|
990
|
+
|
|
991
|
+
@property
|
|
992
|
+
def arguments(self) -> List[Concept]:
|
|
993
|
+
output = [self.content]
|
|
994
|
+
for order in self.order_by:
|
|
995
|
+
output += [order.output]
|
|
996
|
+
for item in self.over:
|
|
997
|
+
output += [item]
|
|
998
|
+
return output
|
|
999
|
+
|
|
1000
|
+
@property
|
|
1001
|
+
def output(self) -> Concept:
|
|
1002
|
+
if isinstance(self.content, ConceptTransform):
|
|
1003
|
+
return self.content.output
|
|
1004
|
+
return self.content
|
|
1005
|
+
|
|
1006
|
+
@output.setter
|
|
1007
|
+
def output(self, value):
|
|
1008
|
+
if isinstance(self.content, ConceptTransform):
|
|
1009
|
+
self.content.output = value
|
|
1010
|
+
else:
|
|
1011
|
+
self.content = value
|
|
1012
|
+
|
|
1013
|
+
@property
|
|
1014
|
+
def input(self) -> List[Concept]:
|
|
1015
|
+
base = self.content.input
|
|
1016
|
+
for v in self.order_by:
|
|
1017
|
+
base += v.input
|
|
1018
|
+
for c in self.over:
|
|
1019
|
+
base += c.input
|
|
1020
|
+
return base
|
|
1021
|
+
|
|
1022
|
+
@property
|
|
1023
|
+
def output_datatype(self):
|
|
1024
|
+
return self.content.datatype
|
|
1025
|
+
|
|
1026
|
+
@property
|
|
1027
|
+
def output_purpose(self):
|
|
1028
|
+
return Purpose.PROPERTY
|
|
1029
|
+
|
|
1030
|
+
|
|
1031
|
+
class FilterItem(Namespaced, SelectGrain, BaseModel):
|
|
1032
|
+
content: Concept
|
|
1033
|
+
where: "WhereClause"
|
|
1034
|
+
|
|
1035
|
+
def __str__(self):
|
|
1036
|
+
return f"<Filter: {str(self.content)} where {str(self.where)}>"
|
|
1037
|
+
|
|
1038
|
+
def with_namespace(self, namespace: str) -> "FilterItem":
|
|
1039
|
+
return FilterItem(
|
|
1040
|
+
content=self.content.with_namespace(namespace),
|
|
1041
|
+
where=self.where.with_namespace(namespace),
|
|
1042
|
+
)
|
|
1043
|
+
|
|
1044
|
+
def with_select_grain(self, grain: Grain) -> FilterItem:
|
|
1045
|
+
return FilterItem(
|
|
1046
|
+
content=self.content.with_select_grain(grain),
|
|
1047
|
+
where=self.where.with_select_grain(grain),
|
|
1048
|
+
)
|
|
1049
|
+
|
|
1050
|
+
@property
|
|
1051
|
+
def arguments(self) -> List[Concept]:
|
|
1052
|
+
output = [self.content]
|
|
1053
|
+
output += self.where.input
|
|
1054
|
+
return output
|
|
1055
|
+
|
|
1056
|
+
@property
|
|
1057
|
+
def output(self) -> Concept:
|
|
1058
|
+
if isinstance(self.content, ConceptTransform):
|
|
1059
|
+
return self.content.output
|
|
1060
|
+
return self.content
|
|
1061
|
+
|
|
1062
|
+
@output.setter
|
|
1063
|
+
def output(self, value):
|
|
1064
|
+
if isinstance(self.content, ConceptTransform):
|
|
1065
|
+
self.content.output = value
|
|
1066
|
+
else:
|
|
1067
|
+
self.content = value
|
|
1068
|
+
|
|
1069
|
+
@property
|
|
1070
|
+
def input(self) -> List[Concept]:
|
|
1071
|
+
base = self.content.input
|
|
1072
|
+
base += self.where.input
|
|
1073
|
+
return base
|
|
1074
|
+
|
|
1075
|
+
@property
|
|
1076
|
+
def output_datatype(self):
|
|
1077
|
+
return self.content.datatype
|
|
1078
|
+
|
|
1079
|
+
@property
|
|
1080
|
+
def output_purpose(self):
|
|
1081
|
+
return self.content.purpose
|
|
1082
|
+
|
|
1083
|
+
@property
|
|
1084
|
+
def concept_arguments(self):
|
|
1085
|
+
return [self.content] + self.where.concept_arguments
|
|
1086
|
+
|
|
1087
|
+
|
|
1088
|
+
class SelectItem(Namespaced, BaseModel):
|
|
1089
|
+
content: Union[Concept, ConceptTransform]
|
|
1090
|
+
modifiers: List[Modifier] = Field(default_factory=list)
|
|
1091
|
+
|
|
1092
|
+
@property
|
|
1093
|
+
def output(self) -> Concept:
|
|
1094
|
+
if isinstance(self.content, ConceptTransform):
|
|
1095
|
+
return self.content.output
|
|
1096
|
+
elif isinstance(self.content, WindowItem):
|
|
1097
|
+
return self.content.output
|
|
1098
|
+
return self.content
|
|
1099
|
+
|
|
1100
|
+
@property
|
|
1101
|
+
def input(self) -> List[Concept]:
|
|
1102
|
+
return self.content.input
|
|
1103
|
+
|
|
1104
|
+
def with_namespace(self, namespace: str) -> "SelectItem":
|
|
1105
|
+
return SelectItem(
|
|
1106
|
+
content=self.content.with_namespace(namespace),
|
|
1107
|
+
modifiers=self.modifiers,
|
|
1108
|
+
)
|
|
1109
|
+
|
|
1110
|
+
|
|
1111
|
+
class OrderItem(SelectGrain, Namespaced, BaseModel):
|
|
1112
|
+
expr: Concept
|
|
1113
|
+
order: Ordering
|
|
1114
|
+
|
|
1115
|
+
def with_namespace(self, namespace: str) -> "OrderItem":
|
|
1116
|
+
return OrderItem(expr=self.expr.with_namespace(namespace), order=self.order)
|
|
1117
|
+
|
|
1118
|
+
def with_select_grain(self, grain: Grain) -> "OrderItem":
|
|
1119
|
+
return OrderItem(expr=self.expr.with_grain(grain), order=self.order)
|
|
1120
|
+
|
|
1121
|
+
@property
|
|
1122
|
+
def input(self):
|
|
1123
|
+
return self.expr.input
|
|
1124
|
+
|
|
1125
|
+
@property
|
|
1126
|
+
def output(self):
|
|
1127
|
+
return self.expr.output
|
|
1128
|
+
|
|
1129
|
+
|
|
1130
|
+
class OrderBy(Namespaced, BaseModel):
|
|
1131
|
+
items: List[OrderItem]
|
|
1132
|
+
|
|
1133
|
+
def with_namespace(self, namespace: str) -> "OrderBy":
|
|
1134
|
+
return OrderBy(items=[x.with_namespace(namespace) for x in self.items])
|
|
1135
|
+
|
|
1136
|
+
|
|
1137
|
+
class SelectStatement(Namespaced, BaseModel):
|
|
1138
|
+
selection: List[SelectItem]
|
|
1139
|
+
where_clause: Optional["WhereClause"] = None
|
|
1140
|
+
order_by: Optional[OrderBy] = None
|
|
1141
|
+
limit: Optional[int] = None
|
|
1142
|
+
|
|
1143
|
+
def __str__(self):
|
|
1144
|
+
from trilogy.parsing.render import render_query
|
|
1145
|
+
|
|
1146
|
+
return render_query(self)
|
|
1147
|
+
|
|
1148
|
+
def __init__(self, *args, **kwargs) -> None:
|
|
1149
|
+
super().__init__(*args, **kwargs)
|
|
1150
|
+
for nitem in self.selection:
|
|
1151
|
+
if not isinstance(nitem.content, Concept):
|
|
1152
|
+
continue
|
|
1153
|
+
if nitem.content.grain == Grain():
|
|
1154
|
+
if nitem.content.derivation == PurposeLineage.AGGREGATE:
|
|
1155
|
+
nitem.content = nitem.content.with_grain(self.grain)
|
|
1156
|
+
|
|
1157
|
+
@field_validator("selection", mode="before")
|
|
1158
|
+
@classmethod
|
|
1159
|
+
def selection_validation(cls, v):
|
|
1160
|
+
new = []
|
|
1161
|
+
for item in v:
|
|
1162
|
+
if isinstance(item, (Concept, ConceptTransform)):
|
|
1163
|
+
new.append(SelectItem(content=item))
|
|
1164
|
+
else:
|
|
1165
|
+
new.append(item)
|
|
1166
|
+
return new
|
|
1167
|
+
|
|
1168
|
+
@property
|
|
1169
|
+
def input_components(self) -> List[Concept]:
|
|
1170
|
+
output = set()
|
|
1171
|
+
output_list = []
|
|
1172
|
+
for item in self.selection:
|
|
1173
|
+
for concept in item.input:
|
|
1174
|
+
if concept.name in output:
|
|
1175
|
+
continue
|
|
1176
|
+
output.add(concept.name)
|
|
1177
|
+
output_list.append(concept)
|
|
1178
|
+
if self.where_clause:
|
|
1179
|
+
for concept in self.where_clause.input:
|
|
1180
|
+
if concept.name in output:
|
|
1181
|
+
continue
|
|
1182
|
+
output.add(concept.name)
|
|
1183
|
+
output_list.append(concept)
|
|
1184
|
+
|
|
1185
|
+
return output_list
|
|
1186
|
+
|
|
1187
|
+
@property
|
|
1188
|
+
def output_components(self) -> List[Concept]:
|
|
1189
|
+
output = []
|
|
1190
|
+
for item in self.selection:
|
|
1191
|
+
if isinstance(item, Concept):
|
|
1192
|
+
output.append(item)
|
|
1193
|
+
else:
|
|
1194
|
+
output.append(item.output)
|
|
1195
|
+
return output
|
|
1196
|
+
|
|
1197
|
+
@property
|
|
1198
|
+
def hidden_components(self) -> List[Concept]:
|
|
1199
|
+
output = []
|
|
1200
|
+
for item in self.selection:
|
|
1201
|
+
if isinstance(item, SelectItem) and Modifier.HIDDEN in item.modifiers:
|
|
1202
|
+
output.append(item.output)
|
|
1203
|
+
return output
|
|
1204
|
+
|
|
1205
|
+
@property
|
|
1206
|
+
def all_components(self) -> List[Concept]:
|
|
1207
|
+
return (
|
|
1208
|
+
self.input_components + self.output_components + self.grain.components_copy
|
|
1209
|
+
)
|
|
1210
|
+
|
|
1211
|
+
def to_datasource(
|
|
1212
|
+
self,
|
|
1213
|
+
namespace: str,
|
|
1214
|
+
identifier: str,
|
|
1215
|
+
address: Address,
|
|
1216
|
+
grain: Grain | None = None,
|
|
1217
|
+
) -> Datasource:
|
|
1218
|
+
columns = [
|
|
1219
|
+
# TODO: replace hardcoded replacement here
|
|
1220
|
+
ColumnAssignment(alias=c.address.replace(".", "_"), concept=c)
|
|
1221
|
+
for c in self.output_components
|
|
1222
|
+
]
|
|
1223
|
+
new_datasource = Datasource(
|
|
1224
|
+
identifier=identifier,
|
|
1225
|
+
address=address,
|
|
1226
|
+
grain=grain or self.grain,
|
|
1227
|
+
columns=columns,
|
|
1228
|
+
namespace=namespace,
|
|
1229
|
+
)
|
|
1230
|
+
for column in columns:
|
|
1231
|
+
column.concept = column.concept.with_grain(new_datasource.grain)
|
|
1232
|
+
return new_datasource
|
|
1233
|
+
|
|
1234
|
+
@property
|
|
1235
|
+
def grain(self) -> "Grain":
|
|
1236
|
+
output = []
|
|
1237
|
+
for item in self.output_components:
|
|
1238
|
+
if item.purpose == Purpose.KEY:
|
|
1239
|
+
output.append(item)
|
|
1240
|
+
if self.where_clause:
|
|
1241
|
+
for item in self.where_clause.concept_arguments:
|
|
1242
|
+
if item.purpose == Purpose.KEY:
|
|
1243
|
+
output.append(item)
|
|
1244
|
+
# elif item.purpose == Purpose.PROPERTY and item.grain:
|
|
1245
|
+
# output += item.grain.components
|
|
1246
|
+
# TODO: handle other grain cases
|
|
1247
|
+
# new if block by design
|
|
1248
|
+
# add back any purpose that is not at the grain
|
|
1249
|
+
# if a query already has the key of the property in the grain
|
|
1250
|
+
# we want to group to that grain and ignore the property, which is a derivation
|
|
1251
|
+
# otherwise, we need to include property as the group by
|
|
1252
|
+
for item in self.output_components:
|
|
1253
|
+
if (
|
|
1254
|
+
item.purpose == Purpose.PROPERTY
|
|
1255
|
+
and item.grain
|
|
1256
|
+
and (
|
|
1257
|
+
not item.grain.components
|
|
1258
|
+
or not item.grain.issubset(
|
|
1259
|
+
Grain(components=unique(output, "address"))
|
|
1260
|
+
)
|
|
1261
|
+
)
|
|
1262
|
+
):
|
|
1263
|
+
output.append(item)
|
|
1264
|
+
if (
|
|
1265
|
+
item.purpose == Purpose.CONSTANT
|
|
1266
|
+
and item.derivation != PurposeLineage.CONSTANT
|
|
1267
|
+
and item.grain
|
|
1268
|
+
and (
|
|
1269
|
+
not item.grain.components
|
|
1270
|
+
or not item.grain.issubset(
|
|
1271
|
+
Grain(components=unique(output, "address"))
|
|
1272
|
+
)
|
|
1273
|
+
)
|
|
1274
|
+
):
|
|
1275
|
+
output.append(item)
|
|
1276
|
+
return Grain(components=unique(output, "address"))
|
|
1277
|
+
|
|
1278
|
+
def with_namespace(self, namespace: str) -> "SelectStatement":
|
|
1279
|
+
return SelectStatement(
|
|
1280
|
+
selection=[c.with_namespace(namespace) for c in self.selection],
|
|
1281
|
+
where_clause=(
|
|
1282
|
+
self.where_clause.with_namespace(namespace)
|
|
1283
|
+
if self.where_clause
|
|
1284
|
+
else None
|
|
1285
|
+
),
|
|
1286
|
+
order_by=self.order_by.with_namespace(namespace) if self.order_by else None,
|
|
1287
|
+
limit=self.limit,
|
|
1288
|
+
)
|
|
1289
|
+
|
|
1290
|
+
|
|
1291
|
+
class AlignItem(Namespaced, BaseModel):
|
|
1292
|
+
alias: str
|
|
1293
|
+
concepts: List[Concept]
|
|
1294
|
+
namespace: Optional[str] = Field(default=DEFAULT_NAMESPACE, validate_default=True)
|
|
1295
|
+
|
|
1296
|
+
@computed_field # type: ignore
|
|
1297
|
+
@cached_property
|
|
1298
|
+
def concepts_lcl(self) -> LooseConceptList:
|
|
1299
|
+
return LooseConceptList(concepts=self.concepts)
|
|
1300
|
+
|
|
1301
|
+
def with_namespace(self, namespace: str) -> "AlignItem":
|
|
1302
|
+
return AlignItem(
|
|
1303
|
+
alias=self.alias,
|
|
1304
|
+
concepts=[c.with_namespace(namespace) for c in self.concepts],
|
|
1305
|
+
namespace=namespace,
|
|
1306
|
+
)
|
|
1307
|
+
|
|
1308
|
+
def gen_concept(self, parent: MultiSelectStatement):
|
|
1309
|
+
datatypes = set([c.datatype for c in self.concepts])
|
|
1310
|
+
purposes = set([c.purpose for c in self.concepts])
|
|
1311
|
+
if len(datatypes) > 1:
|
|
1312
|
+
raise InvalidSyntaxException(
|
|
1313
|
+
f"Datatypes do not align for merged statements {self.alias}, have {datatypes}"
|
|
1314
|
+
)
|
|
1315
|
+
if len(purposes) > 1:
|
|
1316
|
+
purpose = Purpose.KEY
|
|
1317
|
+
else:
|
|
1318
|
+
purpose = list(purposes)[0]
|
|
1319
|
+
new = Concept(
|
|
1320
|
+
name=self.alias,
|
|
1321
|
+
datatype=datatypes.pop(),
|
|
1322
|
+
purpose=purpose,
|
|
1323
|
+
lineage=parent,
|
|
1324
|
+
namespace=parent.namespace,
|
|
1325
|
+
)
|
|
1326
|
+
return new
|
|
1327
|
+
|
|
1328
|
+
|
|
1329
|
+
class AlignClause(Namespaced, BaseModel):
|
|
1330
|
+
items: List[AlignItem]
|
|
1331
|
+
|
|
1332
|
+
def with_namespace(self, namespace: str) -> "AlignClause":
|
|
1333
|
+
return AlignClause(items=[x.with_namespace(namespace) for x in self.items])
|
|
1334
|
+
|
|
1335
|
+
|
|
1336
|
+
class MultiSelectStatement(Namespaced, BaseModel):
|
|
1337
|
+
selects: List[SelectStatement]
|
|
1338
|
+
align: AlignClause
|
|
1339
|
+
namespace: str
|
|
1340
|
+
where_clause: Optional["WhereClause"] = None
|
|
1341
|
+
order_by: Optional[OrderBy] = None
|
|
1342
|
+
limit: Optional[int] = None
|
|
1343
|
+
|
|
1344
|
+
def __repr__(self):
|
|
1345
|
+
return "MultiSelect<" + " MERGE ".join([str(s) for s in self.selects]) + ">"
|
|
1346
|
+
|
|
1347
|
+
@computed_field # type: ignore
|
|
1348
|
+
@cached_property
|
|
1349
|
+
def arguments(self) -> List[Concept]:
|
|
1350
|
+
output = []
|
|
1351
|
+
for select in self.selects:
|
|
1352
|
+
output += select.input_components
|
|
1353
|
+
return unique(output, "address")
|
|
1354
|
+
|
|
1355
|
+
@computed_field # type: ignore
|
|
1356
|
+
@cached_property
|
|
1357
|
+
def concept_arguments(self) -> List[Concept]:
|
|
1358
|
+
output = []
|
|
1359
|
+
for select in self.selects:
|
|
1360
|
+
output += select.input_components
|
|
1361
|
+
if self.where_clause:
|
|
1362
|
+
output += self.where_clause.concept_arguments
|
|
1363
|
+
return unique(output, "address")
|
|
1364
|
+
|
|
1365
|
+
def get_merge_concept(self, check: Concept):
|
|
1366
|
+
for item in self.align.items:
|
|
1367
|
+
if check in item.concepts_lcl:
|
|
1368
|
+
return item.gen_concept(self)
|
|
1369
|
+
return None
|
|
1370
|
+
|
|
1371
|
+
def with_namespace(self, namespace: str) -> "MultiSelectStatement":
|
|
1372
|
+
return MultiSelectStatement(
|
|
1373
|
+
selects=[c.with_namespace(namespace) for c in self.selects],
|
|
1374
|
+
align=self.align.with_namespace(namespace),
|
|
1375
|
+
namespace=namespace,
|
|
1376
|
+
)
|
|
1377
|
+
|
|
1378
|
+
@property
|
|
1379
|
+
def grain(self):
|
|
1380
|
+
base = Grain()
|
|
1381
|
+
for select in self.selects:
|
|
1382
|
+
base += select.grain
|
|
1383
|
+
return base
|
|
1384
|
+
|
|
1385
|
+
@computed_field # type: ignore
|
|
1386
|
+
@cached_property
|
|
1387
|
+
def derived_concepts(self) -> List[Concept]:
|
|
1388
|
+
output = []
|
|
1389
|
+
for item in self.align.items:
|
|
1390
|
+
output.append(item.gen_concept(self))
|
|
1391
|
+
return output
|
|
1392
|
+
|
|
1393
|
+
def find_source(self, concept: Concept, cte: CTE) -> Concept:
|
|
1394
|
+
all = []
|
|
1395
|
+
for x in self.align.items:
|
|
1396
|
+
if concept.name == x.alias:
|
|
1397
|
+
for c in x.concepts:
|
|
1398
|
+
if c.address in cte.output_lcl:
|
|
1399
|
+
all.append(c)
|
|
1400
|
+
|
|
1401
|
+
if len(all) == 1:
|
|
1402
|
+
return all[0]
|
|
1403
|
+
|
|
1404
|
+
raise SyntaxError(
|
|
1405
|
+
f"Could not find upstream map for multiselect {str(concept)} on cte ({cte})"
|
|
1406
|
+
)
|
|
1407
|
+
|
|
1408
|
+
@property
|
|
1409
|
+
def output_components(self) -> List[Concept]:
|
|
1410
|
+
output = self.derived_concepts
|
|
1411
|
+
for select in self.selects:
|
|
1412
|
+
output += select.output_components
|
|
1413
|
+
return unique(output, "address")
|
|
1414
|
+
|
|
1415
|
+
@computed_field # type: ignore
|
|
1416
|
+
@cached_property
|
|
1417
|
+
def hidden_components(self) -> List[Concept]:
|
|
1418
|
+
output = []
|
|
1419
|
+
for select in self.selects:
|
|
1420
|
+
output += select.hidden_components
|
|
1421
|
+
return output
|
|
1422
|
+
|
|
1423
|
+
|
|
1424
|
+
class Address(BaseModel):
|
|
1425
|
+
location: str
|
|
1426
|
+
|
|
1427
|
+
|
|
1428
|
+
class Query(BaseModel):
|
|
1429
|
+
text: str
|
|
1430
|
+
|
|
1431
|
+
|
|
1432
|
+
def safe_concept(v: Union[Dict, Concept]) -> Concept:
|
|
1433
|
+
if isinstance(v, dict):
|
|
1434
|
+
return Concept.model_validate(v)
|
|
1435
|
+
return v
|
|
1436
|
+
|
|
1437
|
+
|
|
1438
|
+
class GrainWindow(BaseModel):
|
|
1439
|
+
window: Window
|
|
1440
|
+
sort_concepts: List[Concept]
|
|
1441
|
+
|
|
1442
|
+
def __str__(self):
|
|
1443
|
+
return (
|
|
1444
|
+
"GrainWindow<"
|
|
1445
|
+
+ ",".join([c.address for c in self.sort_concepts])
|
|
1446
|
+
+ f":{str(self.window)}>"
|
|
1447
|
+
)
|
|
1448
|
+
|
|
1449
|
+
|
|
1450
|
+
def safe_grain(v) -> Grain:
|
|
1451
|
+
if isinstance(v, dict):
|
|
1452
|
+
return Grain.model_validate(v)
|
|
1453
|
+
elif isinstance(v, Grain):
|
|
1454
|
+
return v
|
|
1455
|
+
elif not v:
|
|
1456
|
+
return Grain(components=[])
|
|
1457
|
+
else:
|
|
1458
|
+
raise ValueError(f"Invalid input type to safe_grain {type(v)}")
|
|
1459
|
+
|
|
1460
|
+
|
|
1461
|
+
class DatasourceMetadata(BaseModel):
|
|
1462
|
+
freshness_concept: Concept | None
|
|
1463
|
+
partition_fields: List[Concept] = Field(default_factory=list)
|
|
1464
|
+
|
|
1465
|
+
|
|
1466
|
+
class MergeStatement(Namespaced, BaseModel):
|
|
1467
|
+
concepts: List[Concept]
|
|
1468
|
+
datatype: DataType | ListType | StructType | MapType
|
|
1469
|
+
|
|
1470
|
+
@cached_property
|
|
1471
|
+
def concepts_lcl(self):
|
|
1472
|
+
return LooseConceptList(concepts=self.concepts)
|
|
1473
|
+
|
|
1474
|
+
@property
|
|
1475
|
+
def merge_concept(self) -> Concept:
|
|
1476
|
+
bridge_name = "_".join([c.safe_address for c in self.concepts])
|
|
1477
|
+
return Concept(
|
|
1478
|
+
name=f"__merge_{bridge_name}",
|
|
1479
|
+
datatype=self.datatype,
|
|
1480
|
+
purpose=Purpose.PROPERTY,
|
|
1481
|
+
lineage=self,
|
|
1482
|
+
keys=tuple(self.concepts),
|
|
1483
|
+
)
|
|
1484
|
+
|
|
1485
|
+
@property
|
|
1486
|
+
def arguments(self) -> List[Concept]:
|
|
1487
|
+
return self.concepts
|
|
1488
|
+
|
|
1489
|
+
@property
|
|
1490
|
+
def concept_arguments(self) -> List[Concept]:
|
|
1491
|
+
return self.concepts
|
|
1492
|
+
|
|
1493
|
+
def find_source(self, concept: Concept, cte: CTE) -> Concept:
|
|
1494
|
+
for x in self.concepts:
|
|
1495
|
+
for z in cte.output_columns:
|
|
1496
|
+
if z.address == x.address:
|
|
1497
|
+
return z
|
|
1498
|
+
raise SyntaxError(
|
|
1499
|
+
f"Could not find upstream map for multiselect {str(concept)} on cte ({cte})"
|
|
1500
|
+
)
|
|
1501
|
+
|
|
1502
|
+
def with_namespace(self, namespace: str) -> "MergeStatement":
|
|
1503
|
+
return MergeStatement(
|
|
1504
|
+
concepts=[c.with_namespace(namespace) for c in self.concepts],
|
|
1505
|
+
datatype=self.datatype,
|
|
1506
|
+
)
|
|
1507
|
+
|
|
1508
|
+
|
|
1509
|
+
class Datasource(Namespaced, BaseModel):
|
|
1510
|
+
identifier: str
|
|
1511
|
+
columns: List[ColumnAssignment]
|
|
1512
|
+
address: Union[Address, str]
|
|
1513
|
+
grain: Grain = Field(
|
|
1514
|
+
default_factory=lambda: Grain(components=[]), validate_default=True
|
|
1515
|
+
)
|
|
1516
|
+
namespace: Optional[str] = Field(default=DEFAULT_NAMESPACE, validate_default=True)
|
|
1517
|
+
metadata: DatasourceMetadata = Field(
|
|
1518
|
+
default_factory=lambda: DatasourceMetadata(freshness_concept=None)
|
|
1519
|
+
)
|
|
1520
|
+
|
|
1521
|
+
@cached_property
|
|
1522
|
+
def output_lcl(self) -> LooseConceptList:
|
|
1523
|
+
return LooseConceptList(concepts=self.output_concepts)
|
|
1524
|
+
|
|
1525
|
+
@field_validator("namespace", mode="plain")
|
|
1526
|
+
@classmethod
|
|
1527
|
+
def namespace_validation(cls, v):
|
|
1528
|
+
return v or DEFAULT_NAMESPACE
|
|
1529
|
+
|
|
1530
|
+
@field_validator("address")
|
|
1531
|
+
@classmethod
|
|
1532
|
+
def address_enforcement(cls, v):
|
|
1533
|
+
if isinstance(v, str):
|
|
1534
|
+
v = Address(location=v)
|
|
1535
|
+
return v
|
|
1536
|
+
|
|
1537
|
+
@field_validator("grain", mode="plain")
|
|
1538
|
+
@classmethod
|
|
1539
|
+
def grain_enforcement(cls, v: Grain, info: ValidationInfo):
|
|
1540
|
+
values = info.data
|
|
1541
|
+
grain: Grain = safe_grain(v)
|
|
1542
|
+
if not grain.components:
|
|
1543
|
+
columns: List[ColumnAssignment] = values.get("columns", [])
|
|
1544
|
+
grain = Grain(
|
|
1545
|
+
components=[
|
|
1546
|
+
c.concept.with_grain(Grain())
|
|
1547
|
+
for c in columns
|
|
1548
|
+
if c.concept.purpose == Purpose.KEY
|
|
1549
|
+
]
|
|
1550
|
+
)
|
|
1551
|
+
return grain
|
|
1552
|
+
|
|
1553
|
+
def add_column(self, concept: Concept, alias: str, modifiers=None):
|
|
1554
|
+
self.columns.append(
|
|
1555
|
+
ColumnAssignment(alias=alias, concept=concept, modifiers=modifiers)
|
|
1556
|
+
)
|
|
1557
|
+
# force refresh
|
|
1558
|
+
del self.output_lcl
|
|
1559
|
+
|
|
1560
|
+
def __add__(self, other):
|
|
1561
|
+
if not other == self:
|
|
1562
|
+
raise ValueError(
|
|
1563
|
+
"Attempted to add two datasources that are not identical, this should"
|
|
1564
|
+
" never happen"
|
|
1565
|
+
)
|
|
1566
|
+
return self
|
|
1567
|
+
|
|
1568
|
+
def __str__(self):
|
|
1569
|
+
return f"{self.namespace}.{self.identifier}@<{self.grain}>"
|
|
1570
|
+
|
|
1571
|
+
def __hash__(self):
|
|
1572
|
+
return (self.namespace + self.identifier).__hash__()
|
|
1573
|
+
|
|
1574
|
+
def with_namespace(self, namespace: str):
|
|
1575
|
+
new_namespace = (
|
|
1576
|
+
namespace + "." + self.namespace
|
|
1577
|
+
if self.namespace and self.namespace != DEFAULT_NAMESPACE
|
|
1578
|
+
else namespace
|
|
1579
|
+
)
|
|
1580
|
+
return Datasource(
|
|
1581
|
+
identifier=self.identifier,
|
|
1582
|
+
namespace=new_namespace,
|
|
1583
|
+
grain=self.grain.with_namespace(namespace),
|
|
1584
|
+
address=self.address,
|
|
1585
|
+
columns=[c.with_namespace(namespace) for c in self.columns],
|
|
1586
|
+
)
|
|
1587
|
+
|
|
1588
|
+
@property
|
|
1589
|
+
def concepts(self) -> List[Concept]:
|
|
1590
|
+
return [c.concept for c in self.columns]
|
|
1591
|
+
|
|
1592
|
+
@property
|
|
1593
|
+
def group_required(self):
|
|
1594
|
+
return False
|
|
1595
|
+
|
|
1596
|
+
@property
|
|
1597
|
+
def full_concepts(self) -> List[Concept]:
|
|
1598
|
+
return [c.concept for c in self.columns if Modifier.PARTIAL not in c.modifiers]
|
|
1599
|
+
|
|
1600
|
+
@property
|
|
1601
|
+
def output_concepts(self) -> List[Concept]:
|
|
1602
|
+
return self.concepts
|
|
1603
|
+
|
|
1604
|
+
@property
|
|
1605
|
+
def partial_concepts(self) -> List[Concept]:
|
|
1606
|
+
return [c.concept for c in self.columns if Modifier.PARTIAL in c.modifiers]
|
|
1607
|
+
|
|
1608
|
+
def get_alias(
|
|
1609
|
+
self, concept: Concept, use_raw_name: bool = True, force_alias: bool = False
|
|
1610
|
+
) -> Optional[str | RawColumnExpr] | Function:
|
|
1611
|
+
# 2022-01-22
|
|
1612
|
+
# this logic needs to be refined.
|
|
1613
|
+
# if concept.lineage:
|
|
1614
|
+
# # return None
|
|
1615
|
+
for x in self.columns:
|
|
1616
|
+
if x.concept == concept or x.concept.with_grain(concept.grain) == concept:
|
|
1617
|
+
if use_raw_name:
|
|
1618
|
+
return x.alias
|
|
1619
|
+
return concept.safe_address
|
|
1620
|
+
existing = [str(c.concept.with_grain(self.grain)) for c in self.columns]
|
|
1621
|
+
raise ValueError(
|
|
1622
|
+
f"{LOGGER_PREFIX} Concept {concept} not found on {self.identifier}; have"
|
|
1623
|
+
f" {existing}."
|
|
1624
|
+
)
|
|
1625
|
+
|
|
1626
|
+
@property
|
|
1627
|
+
def name(self) -> str:
|
|
1628
|
+
return self.identifier
|
|
1629
|
+
# TODO: namespace all references
|
|
1630
|
+
# return f'{self.namespace}_{self.identifier}'
|
|
1631
|
+
|
|
1632
|
+
@property
|
|
1633
|
+
def full_name(self) -> str:
|
|
1634
|
+
if not self.namespace:
|
|
1635
|
+
return self.identifier
|
|
1636
|
+
namespace = self.namespace.replace(".", "_") if self.namespace else ""
|
|
1637
|
+
return f"{namespace}_{self.identifier}"
|
|
1638
|
+
|
|
1639
|
+
@cached_property
|
|
1640
|
+
def safe_location(self) -> str:
|
|
1641
|
+
if isinstance(self.address, Address):
|
|
1642
|
+
return self.address.location
|
|
1643
|
+
return self.address
|
|
1644
|
+
|
|
1645
|
+
|
|
1646
|
+
class UnnestJoin(BaseModel):
|
|
1647
|
+
concept: Concept
|
|
1648
|
+
alias: str = "unnest"
|
|
1649
|
+
|
|
1650
|
+
def __hash__(self):
|
|
1651
|
+
return (self.alias + self.concept.address).__hash__()
|
|
1652
|
+
|
|
1653
|
+
|
|
1654
|
+
class InstantiatedUnnestJoin(BaseModel):
|
|
1655
|
+
concept: Concept
|
|
1656
|
+
alias: str = "unnest"
|
|
1657
|
+
|
|
1658
|
+
|
|
1659
|
+
class BaseJoin(BaseModel):
|
|
1660
|
+
left_datasource: Union[Datasource, "QueryDatasource"]
|
|
1661
|
+
right_datasource: Union[Datasource, "QueryDatasource"]
|
|
1662
|
+
concepts: List[Concept]
|
|
1663
|
+
join_type: JoinType
|
|
1664
|
+
filter_to_mutual: bool = False
|
|
1665
|
+
|
|
1666
|
+
def __init__(self, **data: Any):
|
|
1667
|
+
super().__init__(**data)
|
|
1668
|
+
if self.left_datasource.full_name == self.right_datasource.full_name:
|
|
1669
|
+
raise SyntaxError(
|
|
1670
|
+
f"Cannot join a dataself to itself, joining {self.left_datasource} and"
|
|
1671
|
+
f" {self.right_datasource}"
|
|
1672
|
+
)
|
|
1673
|
+
final_concepts = []
|
|
1674
|
+
for concept in self.concepts:
|
|
1675
|
+
include = True
|
|
1676
|
+
for ds in [self.left_datasource, self.right_datasource]:
|
|
1677
|
+
if concept.address not in [c.address for c in ds.output_concepts]:
|
|
1678
|
+
if self.filter_to_mutual:
|
|
1679
|
+
include = False
|
|
1680
|
+
else:
|
|
1681
|
+
raise SyntaxError(
|
|
1682
|
+
f"Invalid join, missing {concept} on {ds.name}, have"
|
|
1683
|
+
f" {[c.address for c in ds.output_concepts]}"
|
|
1684
|
+
)
|
|
1685
|
+
if include:
|
|
1686
|
+
final_concepts.append(concept)
|
|
1687
|
+
if not final_concepts and self.concepts:
|
|
1688
|
+
# if one datasource only has constants
|
|
1689
|
+
# we can join on 1=1
|
|
1690
|
+
for ds in [self.left_datasource, self.right_datasource]:
|
|
1691
|
+
# single rows
|
|
1692
|
+
if all(
|
|
1693
|
+
[
|
|
1694
|
+
c.granularity == Granularity.SINGLE_ROW
|
|
1695
|
+
for c in ds.output_concepts
|
|
1696
|
+
]
|
|
1697
|
+
):
|
|
1698
|
+
self.concepts = []
|
|
1699
|
+
return
|
|
1700
|
+
# if everything is at abstract grain, we can skip joins
|
|
1701
|
+
if all([c.grain == Grain() for c in ds.output_concepts]):
|
|
1702
|
+
self.concepts = []
|
|
1703
|
+
return
|
|
1704
|
+
|
|
1705
|
+
left_keys = [c.address for c in self.left_datasource.output_concepts]
|
|
1706
|
+
right_keys = [c.address for c in self.right_datasource.output_concepts]
|
|
1707
|
+
match_concepts = [c.address for c in self.concepts]
|
|
1708
|
+
raise SyntaxError(
|
|
1709
|
+
"No mutual join keys found between"
|
|
1710
|
+
f" {self.left_datasource.identifier} and"
|
|
1711
|
+
f" {self.right_datasource.identifier}, left_keys {left_keys},"
|
|
1712
|
+
f" right_keys {right_keys},"
|
|
1713
|
+
f" provided join concepts {match_concepts}"
|
|
1714
|
+
)
|
|
1715
|
+
self.concepts = final_concepts
|
|
1716
|
+
|
|
1717
|
+
@property
|
|
1718
|
+
def unique_id(self) -> str:
|
|
1719
|
+
# TODO: include join type?
|
|
1720
|
+
return (
|
|
1721
|
+
self.left_datasource.name
|
|
1722
|
+
+ self.right_datasource.name
|
|
1723
|
+
+ self.join_type.value
|
|
1724
|
+
)
|
|
1725
|
+
|
|
1726
|
+
def __str__(self):
|
|
1727
|
+
return (
|
|
1728
|
+
f"{self.join_type.value} JOIN {self.left_datasource.identifier} and"
|
|
1729
|
+
f" {self.right_datasource.identifier} on"
|
|
1730
|
+
f" {','.join([str(k) for k in self.concepts])}"
|
|
1731
|
+
)
|
|
1732
|
+
|
|
1733
|
+
|
|
1734
|
+
class QueryDatasource(BaseModel):
|
|
1735
|
+
input_concepts: List[Concept]
|
|
1736
|
+
output_concepts: List[Concept]
|
|
1737
|
+
source_map: Dict[str, Set[Union[Datasource, "QueryDatasource", "UnnestJoin"]]]
|
|
1738
|
+
datasources: Sequence[Union[Datasource, "QueryDatasource"]]
|
|
1739
|
+
grain: Grain
|
|
1740
|
+
joins: List[BaseJoin | UnnestJoin]
|
|
1741
|
+
limit: Optional[int] = None
|
|
1742
|
+
condition: Optional[Union["Conditional", "Comparison", "Parenthetical"]] = Field(
|
|
1743
|
+
default=None
|
|
1744
|
+
)
|
|
1745
|
+
filter_concepts: List[Concept] = Field(default_factory=list)
|
|
1746
|
+
source_type: SourceType = SourceType.SELECT
|
|
1747
|
+
partial_concepts: List[Concept] = Field(default_factory=list)
|
|
1748
|
+
join_derived_concepts: List[Concept] = Field(default_factory=list)
|
|
1749
|
+
force_group: bool | None = None
|
|
1750
|
+
|
|
1751
|
+
@property
|
|
1752
|
+
def non_partial_concept_addresses(self) -> List[str]:
|
|
1753
|
+
return [
|
|
1754
|
+
c.address
|
|
1755
|
+
for c in self.output_concepts
|
|
1756
|
+
if c.address not in [z.address for z in self.partial_concepts]
|
|
1757
|
+
]
|
|
1758
|
+
|
|
1759
|
+
@field_validator("joins")
|
|
1760
|
+
@classmethod
|
|
1761
|
+
def validate_joins(cls, v):
|
|
1762
|
+
for join in v:
|
|
1763
|
+
if not isinstance(join, BaseJoin):
|
|
1764
|
+
continue
|
|
1765
|
+
if join.left_datasource.identifier == join.right_datasource.identifier:
|
|
1766
|
+
raise SyntaxError(
|
|
1767
|
+
f"Cannot join a datasource to itself, joining {join.left_datasource}"
|
|
1768
|
+
)
|
|
1769
|
+
return v
|
|
1770
|
+
|
|
1771
|
+
@field_validator("input_concepts")
|
|
1772
|
+
@classmethod
|
|
1773
|
+
def validate_inputs(cls, v):
|
|
1774
|
+
return unique(v, "address")
|
|
1775
|
+
|
|
1776
|
+
@field_validator("output_concepts")
|
|
1777
|
+
@classmethod
|
|
1778
|
+
def validate_outputs(cls, v):
|
|
1779
|
+
return unique(v, "address")
|
|
1780
|
+
|
|
1781
|
+
@field_validator("source_map")
|
|
1782
|
+
@classmethod
|
|
1783
|
+
def validate_source_map(cls, v, info=ValidationInfo):
|
|
1784
|
+
values = info.data
|
|
1785
|
+
expected = {c.address for c in values["output_concepts"]}.union(
|
|
1786
|
+
c.address for c in values["input_concepts"]
|
|
1787
|
+
)
|
|
1788
|
+
seen = set()
|
|
1789
|
+
for k, val in v.items():
|
|
1790
|
+
# if val:
|
|
1791
|
+
# if len(val) != 1:
|
|
1792
|
+
# raise SyntaxError(f"source map {k} has multiple values {len(val)}")
|
|
1793
|
+
seen.add(k)
|
|
1794
|
+
for x in expected:
|
|
1795
|
+
if x not in seen:
|
|
1796
|
+
raise SyntaxError(
|
|
1797
|
+
f"source map missing {x} on (expected {expected}, have {seen})"
|
|
1798
|
+
)
|
|
1799
|
+
return v
|
|
1800
|
+
|
|
1801
|
+
def __str__(self):
|
|
1802
|
+
return f"{self.identifier}@<{self.grain}>"
|
|
1803
|
+
|
|
1804
|
+
def __hash__(self):
|
|
1805
|
+
return (self.identifier).__hash__()
|
|
1806
|
+
|
|
1807
|
+
@property
|
|
1808
|
+
def concepts(self):
|
|
1809
|
+
return self.output_concepts
|
|
1810
|
+
|
|
1811
|
+
@property
|
|
1812
|
+
def name(self):
|
|
1813
|
+
return self.identifier
|
|
1814
|
+
|
|
1815
|
+
@property
|
|
1816
|
+
def full_name(self):
|
|
1817
|
+
return self.identifier
|
|
1818
|
+
|
|
1819
|
+
@property
|
|
1820
|
+
def group_required(self) -> bool:
|
|
1821
|
+
if self.force_group is True:
|
|
1822
|
+
return True
|
|
1823
|
+
if self.force_group is False:
|
|
1824
|
+
return False
|
|
1825
|
+
if self.source_type:
|
|
1826
|
+
if self.source_type in [
|
|
1827
|
+
SourceType.FILTER,
|
|
1828
|
+
]:
|
|
1829
|
+
return False
|
|
1830
|
+
elif self.source_type in [
|
|
1831
|
+
SourceType.GROUP,
|
|
1832
|
+
]:
|
|
1833
|
+
return True
|
|
1834
|
+
return False
|
|
1835
|
+
|
|
1836
|
+
def __add__(self, other):
|
|
1837
|
+
# these are syntax errors to avoid being caught by current
|
|
1838
|
+
if not isinstance(other, QueryDatasource):
|
|
1839
|
+
raise SyntaxError("Can only merge two query datasources")
|
|
1840
|
+
if not other.grain == self.grain:
|
|
1841
|
+
raise SyntaxError(
|
|
1842
|
+
"Can only merge two query datasources with identical grain"
|
|
1843
|
+
)
|
|
1844
|
+
if not self.source_type == other.source_type:
|
|
1845
|
+
raise SyntaxError(
|
|
1846
|
+
"Can only merge two query datasources with identical source type"
|
|
1847
|
+
)
|
|
1848
|
+
if not self.group_required == other.group_required:
|
|
1849
|
+
raise SyntaxError(
|
|
1850
|
+
"can only merge two datasources if the group required flag is the same"
|
|
1851
|
+
)
|
|
1852
|
+
if not self.join_derived_concepts == other.join_derived_concepts:
|
|
1853
|
+
raise SyntaxError(
|
|
1854
|
+
"can only merge two datasources if the join derived concepts are the same"
|
|
1855
|
+
)
|
|
1856
|
+
if not self.force_group == other.force_group:
|
|
1857
|
+
raise SyntaxError(
|
|
1858
|
+
"can only merge two datasources if the force_group flag is the same"
|
|
1859
|
+
)
|
|
1860
|
+
logger.debug(
|
|
1861
|
+
f"{LOGGER_PREFIX} merging {self.name} with"
|
|
1862
|
+
f" {[c.address for c in self.output_concepts]} concepts and"
|
|
1863
|
+
f" {other.name} with {[c.address for c in other.output_concepts]} concepts"
|
|
1864
|
+
)
|
|
1865
|
+
|
|
1866
|
+
merged_datasources = {}
|
|
1867
|
+
for ds in [*self.datasources, *other.datasources]:
|
|
1868
|
+
if ds.full_name in merged_datasources:
|
|
1869
|
+
merged_datasources[ds.full_name] = merged_datasources[ds.full_name] + ds
|
|
1870
|
+
else:
|
|
1871
|
+
merged_datasources[ds.full_name] = ds
|
|
1872
|
+
qds = QueryDatasource(
|
|
1873
|
+
input_concepts=unique(
|
|
1874
|
+
self.input_concepts + other.input_concepts, "address"
|
|
1875
|
+
),
|
|
1876
|
+
output_concepts=unique(
|
|
1877
|
+
self.output_concepts + other.output_concepts, "address"
|
|
1878
|
+
),
|
|
1879
|
+
source_map={**self.source_map, **other.source_map},
|
|
1880
|
+
datasources=list(merged_datasources.values()),
|
|
1881
|
+
grain=self.grain,
|
|
1882
|
+
joins=unique(self.joins + other.joins, "unique_id"),
|
|
1883
|
+
# joins = self.joins,
|
|
1884
|
+
condition=(
|
|
1885
|
+
self.condition + other.condition
|
|
1886
|
+
if (self.condition or other.condition)
|
|
1887
|
+
else None
|
|
1888
|
+
),
|
|
1889
|
+
source_type=self.source_type,
|
|
1890
|
+
partial_concepts=self.partial_concepts + other.partial_concepts,
|
|
1891
|
+
join_derived_concepts=self.join_derived_concepts,
|
|
1892
|
+
force_group=self.force_group,
|
|
1893
|
+
)
|
|
1894
|
+
|
|
1895
|
+
return qds
|
|
1896
|
+
|
|
1897
|
+
@property
|
|
1898
|
+
def identifier(self) -> str:
|
|
1899
|
+
filters = abs(hash(str(self.condition))) if self.condition else ""
|
|
1900
|
+
grain = "_".join(
|
|
1901
|
+
[str(c.address).replace(".", "_") for c in self.grain.components]
|
|
1902
|
+
)
|
|
1903
|
+
# partial = "_".join([str(c.address).replace(".", "_") for c in self.partial_concepts])
|
|
1904
|
+
return (
|
|
1905
|
+
"_join_".join([d.name for d in self.datasources])
|
|
1906
|
+
+ (f"_at_{grain}" if grain else "_at_abstract")
|
|
1907
|
+
+ (f"_filtered_by_{filters}" if filters else "")
|
|
1908
|
+
# + (f"_partial_{partial}" if partial else "")
|
|
1909
|
+
)
|
|
1910
|
+
|
|
1911
|
+
def get_alias(
|
|
1912
|
+
self, concept: Concept, use_raw_name: bool = False, force_alias: bool = False
|
|
1913
|
+
):
|
|
1914
|
+
# if we should use the raw datasource name to access
|
|
1915
|
+
use_raw_name = (
|
|
1916
|
+
True
|
|
1917
|
+
if (len(self.datasources) == 1 or use_raw_name) and not force_alias
|
|
1918
|
+
# if ((len(self.datasources) == 1 and isinstance(self.datasources[0], Datasource)) or use_raw_name) and not force_alias
|
|
1919
|
+
else False
|
|
1920
|
+
)
|
|
1921
|
+
for x in self.datasources:
|
|
1922
|
+
# query datasources should be referenced by their alias, always
|
|
1923
|
+
force_alias = isinstance(x, QueryDatasource)
|
|
1924
|
+
try:
|
|
1925
|
+
return x.get_alias(
|
|
1926
|
+
concept.with_grain(self.grain),
|
|
1927
|
+
use_raw_name,
|
|
1928
|
+
force_alias=force_alias,
|
|
1929
|
+
)
|
|
1930
|
+
except ValueError as e:
|
|
1931
|
+
from trilogy.constants import logger
|
|
1932
|
+
|
|
1933
|
+
logger.debug(e)
|
|
1934
|
+
continue
|
|
1935
|
+
existing = [c.with_grain(self.grain) for c in self.output_concepts]
|
|
1936
|
+
if concept in existing:
|
|
1937
|
+
return concept.name
|
|
1938
|
+
|
|
1939
|
+
existing_str = [str(c) for c in existing]
|
|
1940
|
+
datasources = [ds.identifier for ds in self.datasources]
|
|
1941
|
+
raise ValueError(
|
|
1942
|
+
f"{LOGGER_PREFIX} Concept {str(concept)} not found on {self.identifier};"
|
|
1943
|
+
f" have {existing_str} from {datasources}."
|
|
1944
|
+
)
|
|
1945
|
+
|
|
1946
|
+
@property
|
|
1947
|
+
def safe_location(self):
|
|
1948
|
+
return self.identifier
|
|
1949
|
+
|
|
1950
|
+
|
|
1951
|
+
class Comment(BaseModel):
|
|
1952
|
+
text: str
|
|
1953
|
+
|
|
1954
|
+
|
|
1955
|
+
class CTE(BaseModel):
|
|
1956
|
+
name: str
|
|
1957
|
+
source: "QueryDatasource" # TODO: make recursive
|
|
1958
|
+
# output columns are what are selected/grouped by
|
|
1959
|
+
output_columns: List[Concept]
|
|
1960
|
+
source_map: Dict[str, str | list[str]]
|
|
1961
|
+
grain: Grain
|
|
1962
|
+
base: bool = False
|
|
1963
|
+
group_to_grain: bool = False
|
|
1964
|
+
parent_ctes: List["CTE"] = Field(default_factory=list)
|
|
1965
|
+
joins: List[Union["Join", "InstantiatedUnnestJoin"]] = Field(default_factory=list)
|
|
1966
|
+
condition: Optional[Union["Conditional", "Comparison", "Parenthetical"]] = None
|
|
1967
|
+
partial_concepts: List[Concept] = Field(default_factory=list)
|
|
1968
|
+
join_derived_concepts: List[Concept] = Field(default_factory=list)
|
|
1969
|
+
|
|
1970
|
+
@computed_field # type: ignore
|
|
1971
|
+
@property
|
|
1972
|
+
def output_lcl(self) -> LooseConceptList:
|
|
1973
|
+
return LooseConceptList(concepts=self.output_columns)
|
|
1974
|
+
|
|
1975
|
+
@field_validator("output_columns")
|
|
1976
|
+
def validate_output_columns(cls, v):
|
|
1977
|
+
return unique(v, "address")
|
|
1978
|
+
|
|
1979
|
+
def __add__(self, other: "CTE"):
|
|
1980
|
+
logger.info('Merging two copies of CTE "%s"', self.name)
|
|
1981
|
+
if not self.grain == other.grain:
|
|
1982
|
+
error = (
|
|
1983
|
+
"Attempting to merge two ctes of different grains"
|
|
1984
|
+
f" {self.name} {other.name} grains {self.grain} {other.grain}| {self.group_to_grain} {other.group_to_grain}| {self.output_lcl} {other.output_lcl}"
|
|
1985
|
+
)
|
|
1986
|
+
raise ValueError(error)
|
|
1987
|
+
if not self.condition == other.condition:
|
|
1988
|
+
error = (
|
|
1989
|
+
"Attempting to merge two ctes with different conditions"
|
|
1990
|
+
f" {self.name} {other.name} conditions {self.condition} {other.condition}"
|
|
1991
|
+
)
|
|
1992
|
+
raise ValueError(error)
|
|
1993
|
+
self.partial_concepts = unique(
|
|
1994
|
+
self.partial_concepts + other.partial_concepts, "address"
|
|
1995
|
+
)
|
|
1996
|
+
self.parent_ctes = merge_ctes(self.parent_ctes + other.parent_ctes)
|
|
1997
|
+
|
|
1998
|
+
self.source_map = {**self.source_map, **other.source_map}
|
|
1999
|
+
|
|
2000
|
+
self.output_columns = unique(
|
|
2001
|
+
self.output_columns + other.output_columns, "address"
|
|
2002
|
+
)
|
|
2003
|
+
self.joins = unique(self.joins + other.joins, "unique_id")
|
|
2004
|
+
self.partial_concepts = unique(
|
|
2005
|
+
self.partial_concepts + other.partial_concepts, "address"
|
|
2006
|
+
)
|
|
2007
|
+
self.join_derived_concepts = unique(
|
|
2008
|
+
self.join_derived_concepts + other.join_derived_concepts, "address"
|
|
2009
|
+
)
|
|
2010
|
+
|
|
2011
|
+
self.source.source_map = {**self.source.source_map, **other.source.source_map}
|
|
2012
|
+
self.source.output_concepts = unique(
|
|
2013
|
+
self.source.output_concepts + other.source.output_concepts, "address"
|
|
2014
|
+
)
|
|
2015
|
+
return self
|
|
2016
|
+
|
|
2017
|
+
@property
|
|
2018
|
+
def relevant_base_ctes(self):
|
|
2019
|
+
return self.parent_ctes
|
|
2020
|
+
|
|
2021
|
+
@property
|
|
2022
|
+
def base_name(self) -> str:
|
|
2023
|
+
# if this cte selects from a single datasource, select right from it
|
|
2024
|
+
valid_joins: List[Join] = [
|
|
2025
|
+
join for join in self.joins if isinstance(join, Join)
|
|
2026
|
+
]
|
|
2027
|
+
if len(self.source.datasources) == 1 and isinstance(
|
|
2028
|
+
self.source.datasources[0], Datasource
|
|
2029
|
+
):
|
|
2030
|
+
return self.source.datasources[0].safe_location
|
|
2031
|
+
# if we have multiple joined CTEs, pick the base
|
|
2032
|
+
# as the root
|
|
2033
|
+
elif len(self.source.datasources) == 1 and len(self.parent_ctes) == 1:
|
|
2034
|
+
return self.parent_ctes[0].name
|
|
2035
|
+
elif valid_joins and len(valid_joins) > 0:
|
|
2036
|
+
candidates = [x.left_cte.name for x in valid_joins]
|
|
2037
|
+
disallowed = [x.right_cte.name for x in valid_joins]
|
|
2038
|
+
try:
|
|
2039
|
+
return [y for y in candidates if y not in disallowed][0]
|
|
2040
|
+
except IndexError:
|
|
2041
|
+
raise SyntaxError(
|
|
2042
|
+
f"Invalid join configuration {candidates} {disallowed} with all parents {[x.base_name for x in self.parent_ctes]}"
|
|
2043
|
+
)
|
|
2044
|
+
elif self.relevant_base_ctes:
|
|
2045
|
+
return self.relevant_base_ctes[0].name
|
|
2046
|
+
elif self.parent_ctes:
|
|
2047
|
+
raise SyntaxError(
|
|
2048
|
+
f"{self.name} has no relevant base CTEs, {self.source_map},"
|
|
2049
|
+
f" {[x.name for x in self.parent_ctes]}, outputs"
|
|
2050
|
+
f" {[x.address for x in self.output_columns]}"
|
|
2051
|
+
)
|
|
2052
|
+
return self.source.name
|
|
2053
|
+
|
|
2054
|
+
@property
|
|
2055
|
+
def base_alias(self) -> str:
|
|
2056
|
+
relevant_joins = [j for j in self.joins if isinstance(j, Join)]
|
|
2057
|
+
if len(self.source.datasources) == 1 and isinstance(
|
|
2058
|
+
self.source.datasources[0], Datasource
|
|
2059
|
+
):
|
|
2060
|
+
return self.source.datasources[0].full_name.replace(".", "_")
|
|
2061
|
+
if relevant_joins:
|
|
2062
|
+
return relevant_joins[0].left_cte.name
|
|
2063
|
+
elif self.relevant_base_ctes:
|
|
2064
|
+
return self.relevant_base_ctes[0].name
|
|
2065
|
+
elif self.parent_ctes:
|
|
2066
|
+
return self.parent_ctes[0].name
|
|
2067
|
+
return self.name
|
|
2068
|
+
|
|
2069
|
+
def get_alias(self, concept: Concept) -> str:
|
|
2070
|
+
for cte in self.parent_ctes:
|
|
2071
|
+
if concept.address in [x.address for x in cte.output_columns]:
|
|
2072
|
+
return concept.safe_address
|
|
2073
|
+
try:
|
|
2074
|
+
source = self.source.get_alias(concept)
|
|
2075
|
+
return source
|
|
2076
|
+
except ValueError as e:
|
|
2077
|
+
return f"INVALID_ALIAS: {str(e)}"
|
|
2078
|
+
|
|
2079
|
+
@property
|
|
2080
|
+
def render_from_clause(self) -> bool:
|
|
2081
|
+
if (
|
|
2082
|
+
all([c.derivation == PurposeLineage.CONSTANT for c in self.output_columns])
|
|
2083
|
+
and not self.parent_ctes
|
|
2084
|
+
and not self.group_to_grain
|
|
2085
|
+
):
|
|
2086
|
+
return False
|
|
2087
|
+
return True
|
|
2088
|
+
|
|
2089
|
+
@property
|
|
2090
|
+
def sourced_concepts(self) -> List[Concept]:
|
|
2091
|
+
return [c for c in self.output_columns if c.address in self.source_map]
|
|
2092
|
+
|
|
2093
|
+
|
|
2094
|
+
def merge_ctes(ctes: List[CTE]) -> List[CTE]:
|
|
2095
|
+
final_ctes_dict: Dict[str, CTE] = {}
|
|
2096
|
+
# merge CTEs
|
|
2097
|
+
for cte in ctes:
|
|
2098
|
+
if cte.name not in final_ctes_dict:
|
|
2099
|
+
final_ctes_dict[cte.name] = cte
|
|
2100
|
+
else:
|
|
2101
|
+
final_ctes_dict[cte.name] = final_ctes_dict[cte.name] + cte
|
|
2102
|
+
|
|
2103
|
+
final_ctes = list(final_ctes_dict.values())
|
|
2104
|
+
return final_ctes
|
|
2105
|
+
|
|
2106
|
+
|
|
2107
|
+
class CompiledCTE(BaseModel):
|
|
2108
|
+
name: str
|
|
2109
|
+
statement: str
|
|
2110
|
+
|
|
2111
|
+
|
|
2112
|
+
class JoinKey(BaseModel):
|
|
2113
|
+
concept: Concept
|
|
2114
|
+
|
|
2115
|
+
def __str__(self):
|
|
2116
|
+
return str(self.concept)
|
|
2117
|
+
|
|
2118
|
+
|
|
2119
|
+
class Join(BaseModel):
|
|
2120
|
+
left_cte: CTE
|
|
2121
|
+
right_cte: CTE
|
|
2122
|
+
jointype: JoinType
|
|
2123
|
+
joinkeys: List[JoinKey]
|
|
2124
|
+
|
|
2125
|
+
@property
|
|
2126
|
+
def unique_id(self) -> str:
|
|
2127
|
+
return self.left_cte.name + self.right_cte.name + self.jointype.value
|
|
2128
|
+
|
|
2129
|
+
def __str__(self):
|
|
2130
|
+
return (
|
|
2131
|
+
f"{self.jointype.value} JOIN {self.left_cte.name} and"
|
|
2132
|
+
f" {self.right_cte.name} on {','.join([str(k) for k in self.joinkeys])}"
|
|
2133
|
+
)
|
|
2134
|
+
|
|
2135
|
+
|
|
2136
|
+
class UndefinedConcept(Concept):
|
|
2137
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
2138
|
+
name: str
|
|
2139
|
+
environment: "EnvironmentConceptDict"
|
|
2140
|
+
line_no: int | None = None
|
|
2141
|
+
datatype: DataType = DataType.UNKNOWN
|
|
2142
|
+
purpose: Purpose = Purpose.KEY
|
|
2143
|
+
|
|
2144
|
+
def with_namespace(self, namespace: str) -> "UndefinedConcept":
|
|
2145
|
+
return self.__class__(
|
|
2146
|
+
name=self.name,
|
|
2147
|
+
datatype=self.datatype,
|
|
2148
|
+
purpose=self.purpose,
|
|
2149
|
+
metadata=self.metadata,
|
|
2150
|
+
lineage=self.lineage.with_namespace(namespace) if self.lineage else None,
|
|
2151
|
+
grain=(
|
|
2152
|
+
self.grain.with_namespace(namespace)
|
|
2153
|
+
if self.grain
|
|
2154
|
+
else Grain(components=[])
|
|
2155
|
+
),
|
|
2156
|
+
namespace=namespace,
|
|
2157
|
+
keys=self.keys,
|
|
2158
|
+
environment=self.environment,
|
|
2159
|
+
line_no=self.line_no,
|
|
2160
|
+
)
|
|
2161
|
+
|
|
2162
|
+
def with_select_grain(self, grain: Optional["Grain"] = None) -> "UndefinedConcept":
|
|
2163
|
+
if not all([isinstance(x, Concept) for x in self.keys or []]):
|
|
2164
|
+
raise ValueError(f"Invalid keys {self.keys} for concept {self.address}")
|
|
2165
|
+
new_grain = grain or Grain(components=[])
|
|
2166
|
+
if self.lineage:
|
|
2167
|
+
new_lineage = self.lineage
|
|
2168
|
+
if isinstance(self.lineage, SelectGrain):
|
|
2169
|
+
new_lineage = self.lineage.with_select_grain(new_grain)
|
|
2170
|
+
else:
|
|
2171
|
+
new_lineage = None
|
|
2172
|
+
return self.__class__(
|
|
2173
|
+
name=self.name,
|
|
2174
|
+
datatype=self.datatype,
|
|
2175
|
+
purpose=self.purpose,
|
|
2176
|
+
metadata=self.metadata,
|
|
2177
|
+
lineage=new_lineage,
|
|
2178
|
+
grain=new_grain,
|
|
2179
|
+
namespace=self.namespace,
|
|
2180
|
+
keys=self.keys,
|
|
2181
|
+
environment=self.environment,
|
|
2182
|
+
)
|
|
2183
|
+
|
|
2184
|
+
def with_grain(self, grain: Optional["Grain"] = None) -> "Concept":
|
|
2185
|
+
return self.__class__(
|
|
2186
|
+
name=self.name,
|
|
2187
|
+
datatype=self.datatype,
|
|
2188
|
+
purpose=self.purpose,
|
|
2189
|
+
metadata=self.metadata,
|
|
2190
|
+
lineage=self.lineage,
|
|
2191
|
+
grain=grain or Grain(components=[]),
|
|
2192
|
+
namespace=self.namespace,
|
|
2193
|
+
keys=self.keys,
|
|
2194
|
+
environment=self.environment,
|
|
2195
|
+
line_no=self.line_no,
|
|
2196
|
+
)
|
|
2197
|
+
|
|
2198
|
+
def with_default_grain(self) -> "Concept":
|
|
2199
|
+
if self.purpose == Purpose.KEY:
|
|
2200
|
+
# we need to make this abstract
|
|
2201
|
+
grain = Grain(components=[self.with_grain(Grain())], nested=True)
|
|
2202
|
+
elif self.purpose == Purpose.PROPERTY:
|
|
2203
|
+
components: List[Concept] = []
|
|
2204
|
+
if self.keys:
|
|
2205
|
+
components = [*self.keys]
|
|
2206
|
+
if self.lineage:
|
|
2207
|
+
for item in self.lineage.arguments:
|
|
2208
|
+
if isinstance(item, Concept):
|
|
2209
|
+
if item.keys and not all(c in components for c in item.keys):
|
|
2210
|
+
components += item.sources
|
|
2211
|
+
else:
|
|
2212
|
+
components += item.sources
|
|
2213
|
+
grain = Grain(components=components)
|
|
2214
|
+
elif self.purpose == Purpose.METRIC:
|
|
2215
|
+
grain = Grain()
|
|
2216
|
+
else:
|
|
2217
|
+
grain = self.grain # type: ignore
|
|
2218
|
+
return self.__class__(
|
|
2219
|
+
name=self.name,
|
|
2220
|
+
datatype=self.datatype,
|
|
2221
|
+
purpose=self.purpose,
|
|
2222
|
+
metadata=self.metadata,
|
|
2223
|
+
lineage=self.lineage,
|
|
2224
|
+
grain=grain,
|
|
2225
|
+
keys=self.keys,
|
|
2226
|
+
namespace=self.namespace,
|
|
2227
|
+
environment=self.environment,
|
|
2228
|
+
line_no=self.line_no,
|
|
2229
|
+
)
|
|
2230
|
+
|
|
2231
|
+
|
|
2232
|
+
class EnvironmentConceptDict(dict):
|
|
2233
|
+
def __init__(self, *args, **kwargs) -> None:
|
|
2234
|
+
super().__init__(self, *args, **kwargs)
|
|
2235
|
+
self.undefined: dict[str, UndefinedConcept] = {}
|
|
2236
|
+
self.fail_on_missing: bool = False
|
|
2237
|
+
self.populate_default_concepts()
|
|
2238
|
+
|
|
2239
|
+
def populate_default_concepts(self):
|
|
2240
|
+
from trilogy.core.internal import DEFAULT_CONCEPTS
|
|
2241
|
+
|
|
2242
|
+
for concept in DEFAULT_CONCEPTS.values():
|
|
2243
|
+
self[concept.address] = concept
|
|
2244
|
+
|
|
2245
|
+
def values(self) -> ValuesView[Concept]: # type: ignore
|
|
2246
|
+
return super().values()
|
|
2247
|
+
|
|
2248
|
+
def __getitem__(
|
|
2249
|
+
self, key, line_no: int | None = None
|
|
2250
|
+
) -> Concept | UndefinedConcept:
|
|
2251
|
+
try:
|
|
2252
|
+
return super(EnvironmentConceptDict, self).__getitem__(key)
|
|
2253
|
+
|
|
2254
|
+
except KeyError:
|
|
2255
|
+
if "." in key and key.split(".")[0] == DEFAULT_NAMESPACE:
|
|
2256
|
+
return self.__getitem__(key.split(".")[1], line_no)
|
|
2257
|
+
if DEFAULT_NAMESPACE + "." + key in self:
|
|
2258
|
+
return self.__getitem__(DEFAULT_NAMESPACE + "." + key, line_no)
|
|
2259
|
+
if not self.fail_on_missing:
|
|
2260
|
+
undefined = UndefinedConcept(
|
|
2261
|
+
name=key,
|
|
2262
|
+
line_no=line_no,
|
|
2263
|
+
environment=self,
|
|
2264
|
+
datatype=DataType.UNKNOWN,
|
|
2265
|
+
purpose=Purpose.KEY,
|
|
2266
|
+
)
|
|
2267
|
+
self.undefined[key] = undefined
|
|
2268
|
+
return undefined
|
|
2269
|
+
|
|
2270
|
+
matches = self._find_similar_concepts(key)
|
|
2271
|
+
message = f"Undefined concept: {key}."
|
|
2272
|
+
if matches:
|
|
2273
|
+
message += f" Suggestions: {matches}"
|
|
2274
|
+
|
|
2275
|
+
if line_no:
|
|
2276
|
+
raise UndefinedConceptException(f"line: {line_no}: " + message, matches)
|
|
2277
|
+
raise UndefinedConceptException(message, matches)
|
|
2278
|
+
|
|
2279
|
+
def _find_similar_concepts(self, concept_name):
|
|
2280
|
+
matches = difflib.get_close_matches(concept_name, self.keys())
|
|
2281
|
+
return matches
|
|
2282
|
+
|
|
2283
|
+
def items(self) -> ItemsView[str, Concept | UndefinedConcept]: # type: ignore
|
|
2284
|
+
return super().items()
|
|
2285
|
+
|
|
2286
|
+
|
|
2287
|
+
class ImportStatement(BaseModel):
|
|
2288
|
+
alias: str
|
|
2289
|
+
path: str
|
|
2290
|
+
# environment: "Environment" | None = None
|
|
2291
|
+
# TODO: this might result in a lot of duplication
|
|
2292
|
+
# environment:"Environment"
|
|
2293
|
+
|
|
2294
|
+
|
|
2295
|
+
class EnvironmentOptions(BaseModel):
|
|
2296
|
+
allow_duplicate_declaration: bool = True
|
|
2297
|
+
|
|
2298
|
+
|
|
2299
|
+
def validate_concepts(v) -> EnvironmentConceptDict:
|
|
2300
|
+
if isinstance(v, EnvironmentConceptDict):
|
|
2301
|
+
return v
|
|
2302
|
+
elif isinstance(v, dict):
|
|
2303
|
+
return EnvironmentConceptDict(
|
|
2304
|
+
**{x: Concept.model_validate(y) for x, y in v.items()}
|
|
2305
|
+
)
|
|
2306
|
+
raise ValueError
|
|
2307
|
+
|
|
2308
|
+
|
|
2309
|
+
class Environment(BaseModel):
|
|
2310
|
+
model_config = ConfigDict(arbitrary_types_allowed=True, strict=False)
|
|
2311
|
+
|
|
2312
|
+
concepts: Annotated[EnvironmentConceptDict, PlainValidator(validate_concepts)] = (
|
|
2313
|
+
Field(default_factory=EnvironmentConceptDict)
|
|
2314
|
+
)
|
|
2315
|
+
datasources: Dict[str, Datasource] = Field(default_factory=dict)
|
|
2316
|
+
functions: Dict[str, Function] = Field(default_factory=dict)
|
|
2317
|
+
data_types: Dict[str, DataType] = Field(default_factory=dict)
|
|
2318
|
+
imports: Dict[str, ImportStatement] = Field(default_factory=dict)
|
|
2319
|
+
namespace: str = DEFAULT_NAMESPACE
|
|
2320
|
+
working_path: str | Path = Field(default_factory=lambda: os.getcwd())
|
|
2321
|
+
environment_config: EnvironmentOptions = Field(default_factory=EnvironmentOptions)
|
|
2322
|
+
version: str = Field(default_factory=get_version)
|
|
2323
|
+
cte_name_map: Dict[str, str] = Field(default_factory=dict)
|
|
2324
|
+
|
|
2325
|
+
@classmethod
|
|
2326
|
+
def from_file(cls, path: str | Path) -> "Environment":
|
|
2327
|
+
with open(path, "r") as f:
|
|
2328
|
+
read = f.read()
|
|
2329
|
+
return Environment(working_path=Path(path).parent).parse(read)[0]
|
|
2330
|
+
|
|
2331
|
+
@classmethod
|
|
2332
|
+
def from_cache(cls, path) -> Optional["Environment"]:
|
|
2333
|
+
with open(path, "r") as f:
|
|
2334
|
+
read = f.read()
|
|
2335
|
+
base = cls.model_validate_json(read)
|
|
2336
|
+
version = get_version()
|
|
2337
|
+
if base.version != version:
|
|
2338
|
+
return None
|
|
2339
|
+
return base
|
|
2340
|
+
|
|
2341
|
+
def to_cache(self, path: Optional[str | Path] = None) -> Path:
|
|
2342
|
+
if not path:
|
|
2343
|
+
ppath = Path(self.working_path) / ENV_CACHE_NAME
|
|
2344
|
+
else:
|
|
2345
|
+
ppath = Path(path)
|
|
2346
|
+
with open(ppath, "w") as f:
|
|
2347
|
+
f.write(self.model_dump_json())
|
|
2348
|
+
return ppath
|
|
2349
|
+
|
|
2350
|
+
@property
|
|
2351
|
+
def materialized_concepts(self) -> List[Concept]:
|
|
2352
|
+
output = []
|
|
2353
|
+
for concept in self.concepts.values():
|
|
2354
|
+
found = False
|
|
2355
|
+
# basic concepts are effectively materialized
|
|
2356
|
+
# and can be found via join paths
|
|
2357
|
+
for datasource in self.datasources.values():
|
|
2358
|
+
if concept.address in [x.address for x in datasource.output_concepts]:
|
|
2359
|
+
found = True
|
|
2360
|
+
break
|
|
2361
|
+
if found:
|
|
2362
|
+
output.append(concept)
|
|
2363
|
+
return output
|
|
2364
|
+
|
|
2365
|
+
def validate_concept(self, lookup: str, meta: Meta | None = None):
|
|
2366
|
+
existing: Concept = self.concepts.get(lookup) # type: ignore
|
|
2367
|
+
if not existing:
|
|
2368
|
+
return
|
|
2369
|
+
elif existing and self.environment_config.allow_duplicate_declaration:
|
|
2370
|
+
return
|
|
2371
|
+
elif existing.metadata:
|
|
2372
|
+
# if the existing concept is auto derived, we can overwrite it
|
|
2373
|
+
if existing.metadata.concept_source == ConceptSource.AUTO_DERIVED:
|
|
2374
|
+
return
|
|
2375
|
+
elif meta and existing.metadata:
|
|
2376
|
+
raise ValueError(
|
|
2377
|
+
f"Assignment to concept '{lookup}' on line {meta.line} is a duplicate"
|
|
2378
|
+
f" declaration; '{lookup}' was originally defined on line"
|
|
2379
|
+
f" {existing.metadata.line_number}"
|
|
2380
|
+
)
|
|
2381
|
+
elif existing.metadata:
|
|
2382
|
+
raise ValueError(
|
|
2383
|
+
f"Assignment to concept '{lookup}' is a duplicate declaration;"
|
|
2384
|
+
f" '{lookup}' was originally defined on line"
|
|
2385
|
+
f" {existing.metadata.line_number}"
|
|
2386
|
+
)
|
|
2387
|
+
raise ValueError(
|
|
2388
|
+
f"Assignment to concept '{lookup}' is a duplicate declaration;"
|
|
2389
|
+
)
|
|
2390
|
+
|
|
2391
|
+
def add_import(self, alias: str, environment: Environment):
|
|
2392
|
+
self.imports[alias] = ImportStatement(
|
|
2393
|
+
alias=alias, path=str(environment.working_path)
|
|
2394
|
+
)
|
|
2395
|
+
for key, concept in environment.concepts.items():
|
|
2396
|
+
self.concepts[f"{alias}.{key}"] = concept.with_namespace(alias)
|
|
2397
|
+
for key, datasource in environment.datasources.items():
|
|
2398
|
+
self.datasources[f"{alias}.{key}"] = datasource.with_namespace(alias)
|
|
2399
|
+
|
|
2400
|
+
def parse(
|
|
2401
|
+
self, input: str, namespace: str | None = None, persist: bool = False
|
|
2402
|
+
) -> Tuple[Environment, list]:
|
|
2403
|
+
from trilogy import parse
|
|
2404
|
+
from trilogy.core.query_processor import process_persist
|
|
2405
|
+
|
|
2406
|
+
if namespace:
|
|
2407
|
+
new = Environment()
|
|
2408
|
+
_, queries = new.parse(input)
|
|
2409
|
+
self.add_import(namespace, new)
|
|
2410
|
+
return self, queries
|
|
2411
|
+
_, queries = parse(input, self)
|
|
2412
|
+
generatable = [
|
|
2413
|
+
x
|
|
2414
|
+
for x in queries
|
|
2415
|
+
if isinstance(
|
|
2416
|
+
x,
|
|
2417
|
+
(
|
|
2418
|
+
SelectStatement,
|
|
2419
|
+
PersistStatement,
|
|
2420
|
+
MultiSelectStatement,
|
|
2421
|
+
ShowStatement,
|
|
2422
|
+
),
|
|
2423
|
+
)
|
|
2424
|
+
]
|
|
2425
|
+
while generatable:
|
|
2426
|
+
t = generatable.pop(0)
|
|
2427
|
+
if isinstance(t, PersistStatement) and persist:
|
|
2428
|
+
processed = process_persist(self, t)
|
|
2429
|
+
self.add_datasource(processed.datasource)
|
|
2430
|
+
return self, queries
|
|
2431
|
+
|
|
2432
|
+
def add_concept(
|
|
2433
|
+
self,
|
|
2434
|
+
concept: Concept,
|
|
2435
|
+
meta: Meta | None = None,
|
|
2436
|
+
force: bool = False,
|
|
2437
|
+
add_derived: bool = True,
|
|
2438
|
+
):
|
|
2439
|
+
if not force:
|
|
2440
|
+
self.validate_concept(concept.address, meta=meta)
|
|
2441
|
+
if concept.namespace == DEFAULT_NAMESPACE:
|
|
2442
|
+
self.concepts[concept.name] = concept
|
|
2443
|
+
else:
|
|
2444
|
+
self.concepts[concept.address] = concept
|
|
2445
|
+
if add_derived:
|
|
2446
|
+
from trilogy.core.environment_helpers import generate_related_concepts
|
|
2447
|
+
|
|
2448
|
+
generate_related_concepts(concept, self)
|
|
2449
|
+
return concept
|
|
2450
|
+
|
|
2451
|
+
def add_datasource(
|
|
2452
|
+
self,
|
|
2453
|
+
datasource: Datasource,
|
|
2454
|
+
):
|
|
2455
|
+
if datasource.namespace == DEFAULT_NAMESPACE:
|
|
2456
|
+
self.datasources[datasource.name] = datasource
|
|
2457
|
+
return datasource
|
|
2458
|
+
if not datasource.namespace:
|
|
2459
|
+
self.datasources[datasource.name] = datasource
|
|
2460
|
+
return datasource
|
|
2461
|
+
self.datasources[datasource.namespace + "." + datasource.identifier] = (
|
|
2462
|
+
datasource
|
|
2463
|
+
)
|
|
2464
|
+
return datasource
|
|
2465
|
+
|
|
2466
|
+
|
|
2467
|
+
class LazyEnvironment(Environment):
|
|
2468
|
+
"""Variant of environment to defer parsing of a path"""
|
|
2469
|
+
|
|
2470
|
+
load_path: Path
|
|
2471
|
+
loaded: bool = False
|
|
2472
|
+
|
|
2473
|
+
def __getattribute__(self, name):
|
|
2474
|
+
if name in (
|
|
2475
|
+
"load_path",
|
|
2476
|
+
"loaded",
|
|
2477
|
+
"working_path",
|
|
2478
|
+
"model_config",
|
|
2479
|
+
"model_fields",
|
|
2480
|
+
) or name.startswith("_"):
|
|
2481
|
+
return super().__getattribute__(name)
|
|
2482
|
+
if not self.loaded:
|
|
2483
|
+
print(f"lazily evaluating load path {self.load_path} to access {name}")
|
|
2484
|
+
from trilogy import parse
|
|
2485
|
+
|
|
2486
|
+
env = Environment(working_path=str(self.working_path))
|
|
2487
|
+
with open(self.load_path, "r") as f:
|
|
2488
|
+
parse(f.read(), env)
|
|
2489
|
+
self.loaded = True
|
|
2490
|
+
self.datasources = env.datasources
|
|
2491
|
+
self.concepts = env.concepts
|
|
2492
|
+
self.imports = env.imports
|
|
2493
|
+
return super().__getattribute__(name)
|
|
2494
|
+
|
|
2495
|
+
|
|
2496
|
+
class Comparison(Namespaced, SelectGrain, BaseModel):
|
|
2497
|
+
left: Union[
|
|
2498
|
+
int,
|
|
2499
|
+
str,
|
|
2500
|
+
float,
|
|
2501
|
+
list,
|
|
2502
|
+
bool,
|
|
2503
|
+
Function,
|
|
2504
|
+
Concept,
|
|
2505
|
+
"Conditional",
|
|
2506
|
+
DataType,
|
|
2507
|
+
"Comparison",
|
|
2508
|
+
"Parenthetical",
|
|
2509
|
+
MagicConstants,
|
|
2510
|
+
WindowItem,
|
|
2511
|
+
AggregateWrapper,
|
|
2512
|
+
]
|
|
2513
|
+
right: Union[
|
|
2514
|
+
int,
|
|
2515
|
+
str,
|
|
2516
|
+
float,
|
|
2517
|
+
list,
|
|
2518
|
+
bool,
|
|
2519
|
+
Concept,
|
|
2520
|
+
Function,
|
|
2521
|
+
"Conditional",
|
|
2522
|
+
DataType,
|
|
2523
|
+
"Comparison",
|
|
2524
|
+
"Parenthetical",
|
|
2525
|
+
MagicConstants,
|
|
2526
|
+
WindowItem,
|
|
2527
|
+
AggregateWrapper,
|
|
2528
|
+
]
|
|
2529
|
+
operator: ComparisonOperator
|
|
2530
|
+
|
|
2531
|
+
def __post_init__(self):
|
|
2532
|
+
if arg_to_datatype(self.left) != arg_to_datatype(self.right):
|
|
2533
|
+
raise ValueError(
|
|
2534
|
+
f"Cannot compare {self.left} and {self.right} of different types"
|
|
2535
|
+
)
|
|
2536
|
+
|
|
2537
|
+
def __add__(self, other):
|
|
2538
|
+
if not isinstance(other, (Comparison, Conditional, Parenthetical)):
|
|
2539
|
+
raise ValueError("Cannot add Comparison to non-Comparison")
|
|
2540
|
+
if other == self:
|
|
2541
|
+
return self
|
|
2542
|
+
return Conditional(left=self, right=other, operator=BooleanOperator.AND)
|
|
2543
|
+
|
|
2544
|
+
def __repr__(self):
|
|
2545
|
+
return f"{str(self.left)} {self.operator.value} {str(self.right)}"
|
|
2546
|
+
|
|
2547
|
+
def with_namespace(self, namespace: str):
|
|
2548
|
+
return Comparison(
|
|
2549
|
+
left=(
|
|
2550
|
+
self.left.with_namespace(namespace)
|
|
2551
|
+
if isinstance(self.left, Namespaced)
|
|
2552
|
+
else self.left
|
|
2553
|
+
),
|
|
2554
|
+
right=(
|
|
2555
|
+
self.right.with_namespace(namespace)
|
|
2556
|
+
if isinstance(self.right, Namespaced)
|
|
2557
|
+
else self.right
|
|
2558
|
+
),
|
|
2559
|
+
operator=self.operator,
|
|
2560
|
+
)
|
|
2561
|
+
|
|
2562
|
+
def with_select_grain(self, grain: Grain):
|
|
2563
|
+
return Comparison(
|
|
2564
|
+
left=(
|
|
2565
|
+
self.left.with_select_grain(grain)
|
|
2566
|
+
if isinstance(self.left, SelectGrain)
|
|
2567
|
+
else self.left
|
|
2568
|
+
),
|
|
2569
|
+
right=(
|
|
2570
|
+
self.right.with_select_grain(grain)
|
|
2571
|
+
if isinstance(self.right, SelectGrain)
|
|
2572
|
+
else self.right
|
|
2573
|
+
),
|
|
2574
|
+
operator=self.operator,
|
|
2575
|
+
)
|
|
2576
|
+
|
|
2577
|
+
@property
|
|
2578
|
+
def input(self) -> List[Concept]:
|
|
2579
|
+
output: List[Concept] = []
|
|
2580
|
+
if isinstance(self.left, (Concept,)):
|
|
2581
|
+
output += [self.left]
|
|
2582
|
+
if isinstance(self.left, (Conditional, Parenthetical)):
|
|
2583
|
+
output += self.left.input
|
|
2584
|
+
if isinstance(self.left, FilterItem):
|
|
2585
|
+
output += self.left.concept_arguments
|
|
2586
|
+
if isinstance(self.left, Function):
|
|
2587
|
+
output += self.left.concept_arguments
|
|
2588
|
+
|
|
2589
|
+
if isinstance(self.right, (Concept,)):
|
|
2590
|
+
output += [self.right]
|
|
2591
|
+
if isinstance(self.right, (Conditional, Parenthetical)):
|
|
2592
|
+
output += self.right.input
|
|
2593
|
+
if isinstance(self.right, FilterItem):
|
|
2594
|
+
output += self.right.concept_arguments
|
|
2595
|
+
if isinstance(self.right, Function):
|
|
2596
|
+
output += self.right.concept_arguments
|
|
2597
|
+
return output
|
|
2598
|
+
|
|
2599
|
+
@property
|
|
2600
|
+
def concept_arguments(self) -> List[Concept]:
|
|
2601
|
+
"""Return concepts directly referenced in where clause"""
|
|
2602
|
+
output = []
|
|
2603
|
+
output += get_concept_arguments(self.left)
|
|
2604
|
+
output += get_concept_arguments(self.right)
|
|
2605
|
+
return output
|
|
2606
|
+
|
|
2607
|
+
|
|
2608
|
+
class CaseWhen(Namespaced, SelectGrain, BaseModel):
|
|
2609
|
+
comparison: Conditional | Comparison
|
|
2610
|
+
expr: "Expr"
|
|
2611
|
+
|
|
2612
|
+
@property
|
|
2613
|
+
def concept_arguments(self):
|
|
2614
|
+
return get_concept_arguments(self.comparison) + get_concept_arguments(self.expr)
|
|
2615
|
+
|
|
2616
|
+
def with_namespace(self, namespace: str) -> CaseWhen:
|
|
2617
|
+
return CaseWhen(
|
|
2618
|
+
comparison=self.comparison.with_namespace(namespace),
|
|
2619
|
+
expr=(
|
|
2620
|
+
self.expr.with_namespace(namespace)
|
|
2621
|
+
if isinstance(
|
|
2622
|
+
self.expr,
|
|
2623
|
+
Namespaced,
|
|
2624
|
+
)
|
|
2625
|
+
else self.expr
|
|
2626
|
+
),
|
|
2627
|
+
)
|
|
2628
|
+
|
|
2629
|
+
def with_select_grain(self, grain: Grain) -> CaseWhen:
|
|
2630
|
+
return CaseWhen(
|
|
2631
|
+
comparison=self.comparison.with_select_grain(grain),
|
|
2632
|
+
expr=(
|
|
2633
|
+
(self.expr.with_select_grain(grain))
|
|
2634
|
+
if isinstance(self.expr, SelectGrain)
|
|
2635
|
+
else self.expr
|
|
2636
|
+
),
|
|
2637
|
+
)
|
|
2638
|
+
|
|
2639
|
+
|
|
2640
|
+
class CaseElse(Namespaced, SelectGrain, BaseModel):
|
|
2641
|
+
expr: "Expr"
|
|
2642
|
+
# this ensures that it's easily differentiable from CaseWhen
|
|
2643
|
+
discriminant: ComparisonOperator = ComparisonOperator.ELSE
|
|
2644
|
+
|
|
2645
|
+
@property
|
|
2646
|
+
def concept_arguments(self):
|
|
2647
|
+
return get_concept_arguments(self.expr)
|
|
2648
|
+
|
|
2649
|
+
def with_select_grain(self, grain: Grain) -> CaseElse:
|
|
2650
|
+
return CaseElse(
|
|
2651
|
+
discriminant=self.discriminant,
|
|
2652
|
+
expr=(
|
|
2653
|
+
self.expr.with_select_grain(grain)
|
|
2654
|
+
if isinstance(
|
|
2655
|
+
self.expr,
|
|
2656
|
+
SelectGrain,
|
|
2657
|
+
)
|
|
2658
|
+
else self.expr
|
|
2659
|
+
),
|
|
2660
|
+
)
|
|
2661
|
+
|
|
2662
|
+
def with_namespace(self, namespace: str) -> CaseElse:
|
|
2663
|
+
return CaseElse(
|
|
2664
|
+
discriminant=self.discriminant,
|
|
2665
|
+
expr=(
|
|
2666
|
+
self.expr.with_namespace(namespace)
|
|
2667
|
+
if isinstance(
|
|
2668
|
+
self.expr,
|
|
2669
|
+
Namespaced,
|
|
2670
|
+
)
|
|
2671
|
+
else self.expr
|
|
2672
|
+
),
|
|
2673
|
+
)
|
|
2674
|
+
|
|
2675
|
+
|
|
2676
|
+
class Conditional(Namespaced, SelectGrain, BaseModel):
|
|
2677
|
+
left: Union[
|
|
2678
|
+
int,
|
|
2679
|
+
str,
|
|
2680
|
+
float,
|
|
2681
|
+
list,
|
|
2682
|
+
bool,
|
|
2683
|
+
Concept,
|
|
2684
|
+
Comparison,
|
|
2685
|
+
"Conditional",
|
|
2686
|
+
"Parenthetical",
|
|
2687
|
+
Function,
|
|
2688
|
+
FilterItem,
|
|
2689
|
+
]
|
|
2690
|
+
right: Union[
|
|
2691
|
+
int,
|
|
2692
|
+
str,
|
|
2693
|
+
float,
|
|
2694
|
+
list,
|
|
2695
|
+
bool,
|
|
2696
|
+
Concept,
|
|
2697
|
+
Comparison,
|
|
2698
|
+
"Conditional",
|
|
2699
|
+
"Parenthetical",
|
|
2700
|
+
Function,
|
|
2701
|
+
]
|
|
2702
|
+
operator: BooleanOperator
|
|
2703
|
+
|
|
2704
|
+
def __add__(self, other) -> "Conditional":
|
|
2705
|
+
if other is None:
|
|
2706
|
+
return self
|
|
2707
|
+
elif isinstance(other, (Comparison, Conditional, Parenthetical)):
|
|
2708
|
+
return Conditional(left=self, right=other, operator=BooleanOperator.AND)
|
|
2709
|
+
raise ValueError(f"Cannot add {self.__class__} and {type(other)}")
|
|
2710
|
+
|
|
2711
|
+
def __str__(self):
|
|
2712
|
+
return self.__repr__()
|
|
2713
|
+
|
|
2714
|
+
def __repr__(self):
|
|
2715
|
+
return f"{str(self.left)} {self.operator.value} {str(self.right)}"
|
|
2716
|
+
|
|
2717
|
+
def with_namespace(self, namespace: str):
|
|
2718
|
+
return Conditional(
|
|
2719
|
+
left=(
|
|
2720
|
+
self.left.with_namespace(namespace)
|
|
2721
|
+
if isinstance(self.left, Namespaced)
|
|
2722
|
+
else self.left
|
|
2723
|
+
),
|
|
2724
|
+
right=(
|
|
2725
|
+
self.right.with_namespace(namespace)
|
|
2726
|
+
if isinstance(self.right, Namespaced)
|
|
2727
|
+
else self.right
|
|
2728
|
+
),
|
|
2729
|
+
operator=self.operator,
|
|
2730
|
+
)
|
|
2731
|
+
|
|
2732
|
+
def with_select_grain(self, grain: Grain):
|
|
2733
|
+
return Conditional(
|
|
2734
|
+
left=(
|
|
2735
|
+
self.left.with_select_grain(grain)
|
|
2736
|
+
if isinstance(self.left, SelectGrain)
|
|
2737
|
+
else self.left
|
|
2738
|
+
),
|
|
2739
|
+
right=(
|
|
2740
|
+
self.right.with_select_grain(grain)
|
|
2741
|
+
if isinstance(self.right, SelectGrain)
|
|
2742
|
+
else self.right
|
|
2743
|
+
),
|
|
2744
|
+
operator=self.operator,
|
|
2745
|
+
)
|
|
2746
|
+
|
|
2747
|
+
@property
|
|
2748
|
+
def input(self) -> List[Concept]:
|
|
2749
|
+
"""Return concepts directly referenced in where clause"""
|
|
2750
|
+
output = []
|
|
2751
|
+
|
|
2752
|
+
for x in (self.left, self.right):
|
|
2753
|
+
if isinstance(x, Concept):
|
|
2754
|
+
output += x.input
|
|
2755
|
+
elif isinstance(x, (Comparison, Conditional)):
|
|
2756
|
+
output += x.input
|
|
2757
|
+
elif isinstance(x, (Function, Parenthetical, FilterItem)):
|
|
2758
|
+
output += x.concept_arguments
|
|
2759
|
+
return output
|
|
2760
|
+
|
|
2761
|
+
@property
|
|
2762
|
+
def concept_arguments(self) -> List[Concept]:
|
|
2763
|
+
"""Return concepts directly referenced in where clause"""
|
|
2764
|
+
output = []
|
|
2765
|
+
output += get_concept_arguments(self.left)
|
|
2766
|
+
output += get_concept_arguments(self.right)
|
|
2767
|
+
return output
|
|
2768
|
+
|
|
2769
|
+
|
|
2770
|
+
class AggregateWrapper(Namespaced, SelectGrain, BaseModel):
|
|
2771
|
+
function: Function
|
|
2772
|
+
by: List[Concept] = Field(default_factory=list)
|
|
2773
|
+
|
|
2774
|
+
def __str__(self):
|
|
2775
|
+
grain_str = [str(c) for c in self.by] if self.by else "abstract"
|
|
2776
|
+
return f"{str(self.function)}<{grain_str}>"
|
|
2777
|
+
|
|
2778
|
+
@property
|
|
2779
|
+
def datatype(self):
|
|
2780
|
+
return self.function.datatype
|
|
2781
|
+
|
|
2782
|
+
@property
|
|
2783
|
+
def concept_arguments(self) -> List[Concept]:
|
|
2784
|
+
return self.function.concept_arguments + self.by
|
|
2785
|
+
|
|
2786
|
+
@property
|
|
2787
|
+
def output_datatype(self):
|
|
2788
|
+
return self.function.output_datatype
|
|
2789
|
+
|
|
2790
|
+
@property
|
|
2791
|
+
def output_purpose(self):
|
|
2792
|
+
return self.function.output_purpose
|
|
2793
|
+
|
|
2794
|
+
@property
|
|
2795
|
+
def arguments(self):
|
|
2796
|
+
return self.function.arguments
|
|
2797
|
+
|
|
2798
|
+
def with_namespace(self, namespace: str) -> "AggregateWrapper":
|
|
2799
|
+
return AggregateWrapper(
|
|
2800
|
+
function=self.function.with_namespace(namespace),
|
|
2801
|
+
by=[c.with_namespace(namespace) for c in self.by] if self.by else [],
|
|
2802
|
+
)
|
|
2803
|
+
|
|
2804
|
+
def with_select_grain(self, grain: Grain) -> AggregateWrapper:
|
|
2805
|
+
if not self.by:
|
|
2806
|
+
by = grain.components_copy
|
|
2807
|
+
else:
|
|
2808
|
+
by = self.by
|
|
2809
|
+
return AggregateWrapper(function=self.function.with_select_grain(grain), by=by)
|
|
2810
|
+
|
|
2811
|
+
|
|
2812
|
+
class WhereClause(Namespaced, SelectGrain, BaseModel):
|
|
2813
|
+
conditional: Union[Comparison, Conditional, "Parenthetical"]
|
|
2814
|
+
|
|
2815
|
+
@property
|
|
2816
|
+
def input(self) -> List[Concept]:
|
|
2817
|
+
return self.conditional.input
|
|
2818
|
+
|
|
2819
|
+
@property
|
|
2820
|
+
def concept_arguments(self) -> List[Concept]:
|
|
2821
|
+
return self.conditional.concept_arguments
|
|
2822
|
+
|
|
2823
|
+
def with_namespace(self, namespace: str) -> WhereClause:
|
|
2824
|
+
return WhereClause(conditional=self.conditional.with_namespace(namespace))
|
|
2825
|
+
|
|
2826
|
+
def with_select_grain(self, grain: Grain) -> WhereClause:
|
|
2827
|
+
return WhereClause(conditional=self.conditional.with_select_grain(grain))
|
|
2828
|
+
|
|
2829
|
+
@property
|
|
2830
|
+
def grain(self) -> Grain:
|
|
2831
|
+
output = []
|
|
2832
|
+
for item in self.input:
|
|
2833
|
+
if item.purpose == Purpose.KEY:
|
|
2834
|
+
output.append(item)
|
|
2835
|
+
elif item.purpose == Purpose.PROPERTY:
|
|
2836
|
+
output += item.grain.components if item.grain else []
|
|
2837
|
+
return Grain(components=list(set(output)))
|
|
2838
|
+
|
|
2839
|
+
|
|
2840
|
+
class MaterializedDataset(BaseModel):
|
|
2841
|
+
address: Address
|
|
2842
|
+
|
|
2843
|
+
|
|
2844
|
+
# TODO: combine with CTEs
|
|
2845
|
+
# CTE contains procesed query?
|
|
2846
|
+
# or CTE references CTE?
|
|
2847
|
+
|
|
2848
|
+
|
|
2849
|
+
class ProcessedQuery(BaseModel):
|
|
2850
|
+
output_columns: List[Concept]
|
|
2851
|
+
ctes: List[CTE]
|
|
2852
|
+
base: CTE
|
|
2853
|
+
joins: List[Join]
|
|
2854
|
+
grain: Grain
|
|
2855
|
+
hidden_columns: List[Concept] = Field(default_factory=list)
|
|
2856
|
+
limit: Optional[int] = None
|
|
2857
|
+
where_clause: Optional[WhereClause] = None
|
|
2858
|
+
order_by: Optional[OrderBy] = None
|
|
2859
|
+
|
|
2860
|
+
|
|
2861
|
+
class ProcessedQueryMixin(BaseModel):
|
|
2862
|
+
output_to: MaterializedDataset
|
|
2863
|
+
datasource: Datasource
|
|
2864
|
+
# base:Dataset
|
|
2865
|
+
|
|
2866
|
+
|
|
2867
|
+
class ProcessedQueryPersist(ProcessedQuery, ProcessedQueryMixin):
|
|
2868
|
+
pass
|
|
2869
|
+
|
|
2870
|
+
|
|
2871
|
+
class ProcessedShowStatement(BaseModel):
|
|
2872
|
+
output_columns: List[Concept]
|
|
2873
|
+
output_values: List[Union[Concept, Datasource, ProcessedQuery]]
|
|
2874
|
+
|
|
2875
|
+
|
|
2876
|
+
class Limit(BaseModel):
|
|
2877
|
+
count: int
|
|
2878
|
+
|
|
2879
|
+
|
|
2880
|
+
class ConceptDeclarationStatement(BaseModel):
|
|
2881
|
+
concept: Concept
|
|
2882
|
+
|
|
2883
|
+
|
|
2884
|
+
class ConceptDerivation(BaseModel):
|
|
2885
|
+
concept: Concept
|
|
2886
|
+
|
|
2887
|
+
|
|
2888
|
+
class RowsetDerivationStatement(Namespaced, BaseModel):
|
|
2889
|
+
name: str
|
|
2890
|
+
select: SelectStatement | MultiSelectStatement
|
|
2891
|
+
namespace: str
|
|
2892
|
+
|
|
2893
|
+
def __repr__(self):
|
|
2894
|
+
return f"RowsetDerivation<{str(self.select)}>"
|
|
2895
|
+
|
|
2896
|
+
@property
|
|
2897
|
+
def derived_concepts(self) -> List[Concept]:
|
|
2898
|
+
output: list[Concept] = []
|
|
2899
|
+
orig: dict[str, Concept] = {}
|
|
2900
|
+
for orig_concept in self.select.output_components:
|
|
2901
|
+
new_concept = Concept(
|
|
2902
|
+
name=orig_concept.name,
|
|
2903
|
+
datatype=orig_concept.datatype,
|
|
2904
|
+
purpose=orig_concept.purpose,
|
|
2905
|
+
lineage=RowsetItem(
|
|
2906
|
+
content=orig_concept, where=self.select.where_clause, rowset=self
|
|
2907
|
+
),
|
|
2908
|
+
grain=orig_concept.grain,
|
|
2909
|
+
metadata=orig_concept.metadata,
|
|
2910
|
+
namespace=(
|
|
2911
|
+
f"{self.name}.{orig_concept.namespace}"
|
|
2912
|
+
if orig_concept.namespace != self.namespace
|
|
2913
|
+
else self.name
|
|
2914
|
+
),
|
|
2915
|
+
keys=orig_concept.keys,
|
|
2916
|
+
)
|
|
2917
|
+
orig[orig_concept.address] = new_concept
|
|
2918
|
+
output.append(new_concept)
|
|
2919
|
+
default_grain = Grain(components=[*output])
|
|
2920
|
+
# remap everything to the properties of the rowset
|
|
2921
|
+
for x in output:
|
|
2922
|
+
if x.keys:
|
|
2923
|
+
if all([k.address in orig for k in x.keys]):
|
|
2924
|
+
x.keys = tuple(
|
|
2925
|
+
[orig[k.address] if k.address in orig else k for k in x.keys]
|
|
2926
|
+
)
|
|
2927
|
+
else:
|
|
2928
|
+
# TODO: fix this up
|
|
2929
|
+
x.keys = tuple()
|
|
2930
|
+
for x in output:
|
|
2931
|
+
if all([c.address in orig for c in x.grain.components_copy]):
|
|
2932
|
+
x.grain = Grain(
|
|
2933
|
+
components=[orig[c.address] for c in x.grain.components_copy]
|
|
2934
|
+
)
|
|
2935
|
+
else:
|
|
2936
|
+
x.grain = default_grain
|
|
2937
|
+
return output
|
|
2938
|
+
|
|
2939
|
+
@property
|
|
2940
|
+
def arguments(self) -> List[Concept]:
|
|
2941
|
+
return self.select.output_components
|
|
2942
|
+
|
|
2943
|
+
def with_namespace(self, namespace: str) -> "RowsetDerivationStatement":
|
|
2944
|
+
return RowsetDerivationStatement(
|
|
2945
|
+
name=self.name,
|
|
2946
|
+
select=self.select.with_namespace(namespace),
|
|
2947
|
+
namespace=namespace,
|
|
2948
|
+
)
|
|
2949
|
+
|
|
2950
|
+
|
|
2951
|
+
class RowsetItem(Namespaced, BaseModel):
|
|
2952
|
+
content: Concept
|
|
2953
|
+
rowset: RowsetDerivationStatement
|
|
2954
|
+
where: Optional["WhereClause"] = None
|
|
2955
|
+
|
|
2956
|
+
def __repr__(self):
|
|
2957
|
+
return (
|
|
2958
|
+
f"<Rowset<{self.rowset.name}>: {str(self.content)} where {str(self.where)}>"
|
|
2959
|
+
)
|
|
2960
|
+
|
|
2961
|
+
def with_namespace(self, namespace: str) -> "RowsetItem":
|
|
2962
|
+
return RowsetItem(
|
|
2963
|
+
content=self.content.with_namespace(namespace),
|
|
2964
|
+
where=self.where.with_namespace(namespace) if self.where else None,
|
|
2965
|
+
rowset=self.rowset.with_namespace(namespace),
|
|
2966
|
+
)
|
|
2967
|
+
|
|
2968
|
+
@property
|
|
2969
|
+
def arguments(self) -> List[Concept]:
|
|
2970
|
+
output = [self.content]
|
|
2971
|
+
if self.where:
|
|
2972
|
+
output += self.where.input
|
|
2973
|
+
return output
|
|
2974
|
+
|
|
2975
|
+
@property
|
|
2976
|
+
def output(self) -> Concept:
|
|
2977
|
+
if isinstance(self.content, ConceptTransform):
|
|
2978
|
+
return self.content.output
|
|
2979
|
+
return self.content
|
|
2980
|
+
|
|
2981
|
+
@output.setter
|
|
2982
|
+
def output(self, value):
|
|
2983
|
+
if isinstance(self.content, ConceptTransform):
|
|
2984
|
+
self.content.output = value
|
|
2985
|
+
else:
|
|
2986
|
+
self.content = value
|
|
2987
|
+
|
|
2988
|
+
@property
|
|
2989
|
+
def input(self) -> List[Concept]:
|
|
2990
|
+
base = self.content.input
|
|
2991
|
+
if self.where:
|
|
2992
|
+
base += self.where.input
|
|
2993
|
+
return base
|
|
2994
|
+
|
|
2995
|
+
@property
|
|
2996
|
+
def output_datatype(self):
|
|
2997
|
+
return self.content.datatype
|
|
2998
|
+
|
|
2999
|
+
@property
|
|
3000
|
+
def output_purpose(self):
|
|
3001
|
+
return self.content.purpose
|
|
3002
|
+
|
|
3003
|
+
@property
|
|
3004
|
+
def concept_arguments(self):
|
|
3005
|
+
if self.where:
|
|
3006
|
+
return [self.content] + self.where.concept_arguments
|
|
3007
|
+
return [self.content]
|
|
3008
|
+
|
|
3009
|
+
|
|
3010
|
+
class Parenthetical(Namespaced, SelectGrain, BaseModel):
|
|
3011
|
+
content: "Expr"
|
|
3012
|
+
|
|
3013
|
+
def __str__(self):
|
|
3014
|
+
return self.__repr__()
|
|
3015
|
+
|
|
3016
|
+
def __add__(self, other) -> Union["Parenthetical", "Conditional"]:
|
|
3017
|
+
if other is None:
|
|
3018
|
+
return self
|
|
3019
|
+
elif isinstance(other, (Comparison, Conditional, Parenthetical)):
|
|
3020
|
+
return Conditional(left=self, right=other, operator=BooleanOperator.AND)
|
|
3021
|
+
raise ValueError(f"Cannot add {self.__class__} and {type(other)}")
|
|
3022
|
+
|
|
3023
|
+
def __repr__(self):
|
|
3024
|
+
return f"({str(self.content)})"
|
|
3025
|
+
|
|
3026
|
+
def with_namespace(self, namespace: str):
|
|
3027
|
+
return Parenthetical(
|
|
3028
|
+
content=(
|
|
3029
|
+
self.content.with_namespace(namespace)
|
|
3030
|
+
if isinstance(self.content, Namespaced)
|
|
3031
|
+
else self.content
|
|
3032
|
+
)
|
|
3033
|
+
)
|
|
3034
|
+
|
|
3035
|
+
def with_select_grain(self, grain: Grain):
|
|
3036
|
+
return Parenthetical(
|
|
3037
|
+
content=(
|
|
3038
|
+
self.content.with_select_grain(grain)
|
|
3039
|
+
if isinstance(self.content, SelectGrain)
|
|
3040
|
+
else self.content
|
|
3041
|
+
)
|
|
3042
|
+
)
|
|
3043
|
+
|
|
3044
|
+
@property
|
|
3045
|
+
def concept_arguments(self) -> List[Concept]:
|
|
3046
|
+
base: List[Concept] = []
|
|
3047
|
+
x = self.content
|
|
3048
|
+
if hasattr(x, "concept_arguments"):
|
|
3049
|
+
base += x.concept_arguments
|
|
3050
|
+
elif isinstance(x, Concept):
|
|
3051
|
+
base.append(x)
|
|
3052
|
+
return base
|
|
3053
|
+
|
|
3054
|
+
@property
|
|
3055
|
+
def input(self):
|
|
3056
|
+
base = []
|
|
3057
|
+
x = self.content
|
|
3058
|
+
if hasattr(x, "input"):
|
|
3059
|
+
base += x.input
|
|
3060
|
+
return base
|
|
3061
|
+
|
|
3062
|
+
|
|
3063
|
+
class PersistStatement(BaseModel):
|
|
3064
|
+
datasource: Datasource
|
|
3065
|
+
select: SelectStatement
|
|
3066
|
+
|
|
3067
|
+
@property
|
|
3068
|
+
def identifier(self):
|
|
3069
|
+
return self.datasource.identifier
|
|
3070
|
+
|
|
3071
|
+
@property
|
|
3072
|
+
def address(self):
|
|
3073
|
+
return self.datasource.address
|
|
3074
|
+
|
|
3075
|
+
|
|
3076
|
+
class ShowStatement(BaseModel):
|
|
3077
|
+
content: SelectStatement | PersistStatement | ShowCategory
|
|
3078
|
+
|
|
3079
|
+
|
|
3080
|
+
Expr = (
|
|
3081
|
+
bool
|
|
3082
|
+
| int
|
|
3083
|
+
| str
|
|
3084
|
+
| float
|
|
3085
|
+
| list
|
|
3086
|
+
| WindowItem
|
|
3087
|
+
| FilterItem
|
|
3088
|
+
| Concept
|
|
3089
|
+
| Comparison
|
|
3090
|
+
| Conditional
|
|
3091
|
+
| Parenthetical
|
|
3092
|
+
| Function
|
|
3093
|
+
| AggregateWrapper
|
|
3094
|
+
)
|
|
3095
|
+
|
|
3096
|
+
|
|
3097
|
+
Concept.model_rebuild()
|
|
3098
|
+
Grain.model_rebuild()
|
|
3099
|
+
WindowItem.model_rebuild()
|
|
3100
|
+
WindowItemOrder.model_rebuild()
|
|
3101
|
+
FilterItem.model_rebuild()
|
|
3102
|
+
Comparison.model_rebuild()
|
|
3103
|
+
Conditional.model_rebuild()
|
|
3104
|
+
Parenthetical.model_rebuild()
|
|
3105
|
+
WhereClause.model_rebuild()
|
|
3106
|
+
ImportStatement.model_rebuild()
|
|
3107
|
+
CaseWhen.model_rebuild()
|
|
3108
|
+
CaseElse.model_rebuild()
|
|
3109
|
+
SelectStatement.model_rebuild()
|
|
3110
|
+
CTE.model_rebuild()
|
|
3111
|
+
BaseJoin.model_rebuild()
|
|
3112
|
+
QueryDatasource.model_rebuild()
|
|
3113
|
+
ProcessedQuery.model_rebuild()
|
|
3114
|
+
ProcessedQueryPersist.model_rebuild()
|
|
3115
|
+
InstantiatedUnnestJoin.model_rebuild()
|
|
3116
|
+
UndefinedConcept.model_rebuild()
|
|
3117
|
+
Function.model_rebuild()
|
|
3118
|
+
Grain.model_rebuild()
|
|
3119
|
+
|
|
3120
|
+
|
|
3121
|
+
def arg_to_datatype(arg) -> DataType | ListType | StructType | MapType:
|
|
3122
|
+
if isinstance(arg, Function):
|
|
3123
|
+
return arg.output_datatype
|
|
3124
|
+
elif isinstance(arg, Concept):
|
|
3125
|
+
return arg.datatype
|
|
3126
|
+
elif isinstance(arg, bool):
|
|
3127
|
+
return DataType.BOOL
|
|
3128
|
+
elif isinstance(arg, int):
|
|
3129
|
+
return DataType.INTEGER
|
|
3130
|
+
elif isinstance(arg, str):
|
|
3131
|
+
return DataType.STRING
|
|
3132
|
+
elif isinstance(arg, float):
|
|
3133
|
+
return DataType.FLOAT
|
|
3134
|
+
elif isinstance(arg, ListWrapper):
|
|
3135
|
+
return ListType(type=arg.type)
|
|
3136
|
+
elif isinstance(arg, AggregateWrapper):
|
|
3137
|
+
return arg.function.output_datatype
|
|
3138
|
+
elif isinstance(arg, Parenthetical):
|
|
3139
|
+
return arg_to_datatype(arg.content)
|
|
3140
|
+
elif isinstance(arg, WindowItem):
|
|
3141
|
+
if arg.type in (WindowType.RANK, WindowType.ROW_NUMBER):
|
|
3142
|
+
return DataType.INTEGER
|
|
3143
|
+
return arg_to_datatype(arg.content)
|
|
3144
|
+
else:
|
|
3145
|
+
raise ValueError(f"Cannot parse arg datatype for arg of raw type {type(arg)}")
|