pytrilogy 0.0.2.57__py3-none-any.whl → 0.0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pytrilogy-0.0.2.57.dist-info → pytrilogy-0.0.3.0.dist-info}/METADATA +9 -2
- pytrilogy-0.0.3.0.dist-info/RECORD +99 -0
- {pytrilogy-0.0.2.57.dist-info → pytrilogy-0.0.3.0.dist-info}/WHEEL +1 -1
- trilogy/__init__.py +2 -2
- trilogy/core/enums.py +1 -7
- trilogy/core/env_processor.py +17 -5
- trilogy/core/environment_helpers.py +11 -25
- trilogy/core/exceptions.py +4 -0
- trilogy/core/functions.py +695 -261
- trilogy/core/graph_models.py +10 -10
- trilogy/core/internal.py +11 -2
- trilogy/core/models/__init__.py +0 -0
- trilogy/core/models/author.py +2110 -0
- trilogy/core/models/build.py +1845 -0
- trilogy/core/models/build_environment.py +151 -0
- trilogy/core/models/core.py +370 -0
- trilogy/core/models/datasource.py +297 -0
- trilogy/core/models/environment.py +696 -0
- trilogy/core/models/execute.py +931 -0
- trilogy/core/optimization.py +17 -22
- trilogy/core/optimizations/base_optimization.py +1 -1
- trilogy/core/optimizations/inline_constant.py +6 -6
- trilogy/core/optimizations/inline_datasource.py +17 -11
- trilogy/core/optimizations/predicate_pushdown.py +17 -16
- trilogy/core/processing/concept_strategies_v3.py +181 -146
- trilogy/core/processing/graph_utils.py +1 -1
- trilogy/core/processing/node_generators/basic_node.py +19 -18
- trilogy/core/processing/node_generators/common.py +51 -45
- trilogy/core/processing/node_generators/filter_node.py +26 -13
- trilogy/core/processing/node_generators/group_node.py +26 -21
- trilogy/core/processing/node_generators/group_to_node.py +13 -10
- trilogy/core/processing/node_generators/multiselect_node.py +60 -43
- trilogy/core/processing/node_generators/node_merge_node.py +76 -38
- trilogy/core/processing/node_generators/rowset_node.py +59 -36
- trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +27 -34
- trilogy/core/processing/node_generators/select_merge_node.py +161 -64
- trilogy/core/processing/node_generators/select_node.py +13 -13
- trilogy/core/processing/node_generators/union_node.py +12 -11
- trilogy/core/processing/node_generators/unnest_node.py +9 -7
- trilogy/core/processing/node_generators/window_node.py +19 -16
- trilogy/core/processing/nodes/__init__.py +21 -18
- trilogy/core/processing/nodes/base_node.py +92 -77
- trilogy/core/processing/nodes/filter_node.py +19 -13
- trilogy/core/processing/nodes/group_node.py +55 -40
- trilogy/core/processing/nodes/merge_node.py +47 -38
- trilogy/core/processing/nodes/select_node_v2.py +54 -40
- trilogy/core/processing/nodes/union_node.py +5 -7
- trilogy/core/processing/nodes/unnest_node.py +7 -11
- trilogy/core/processing/nodes/window_node.py +9 -4
- trilogy/core/processing/utility.py +108 -80
- trilogy/core/query_processor.py +67 -49
- trilogy/core/statements/__init__.py +0 -0
- trilogy/core/statements/author.py +413 -0
- trilogy/core/statements/build.py +0 -0
- trilogy/core/statements/common.py +30 -0
- trilogy/core/statements/execute.py +42 -0
- trilogy/dialect/base.py +152 -111
- trilogy/dialect/common.py +9 -10
- trilogy/dialect/duckdb.py +1 -1
- trilogy/dialect/enums.py +4 -2
- trilogy/dialect/presto.py +1 -1
- trilogy/dialect/sql_server.py +1 -1
- trilogy/executor.py +44 -32
- trilogy/hooks/base_hook.py +6 -4
- trilogy/hooks/query_debugger.py +110 -93
- trilogy/parser.py +1 -1
- trilogy/parsing/common.py +303 -64
- trilogy/parsing/parse_engine.py +263 -617
- trilogy/parsing/render.py +50 -26
- trilogy/scripts/trilogy.py +2 -1
- pytrilogy-0.0.2.57.dist-info/RECORD +0 -87
- trilogy/core/models.py +0 -4960
- {pytrilogy-0.0.2.57.dist-info → pytrilogy-0.0.3.0.dist-info}/LICENSE.md +0 -0
- {pytrilogy-0.0.2.57.dist-info → pytrilogy-0.0.3.0.dist-info}/entry_points.txt +0 -0
- {pytrilogy-0.0.2.57.dist-info → pytrilogy-0.0.3.0.dist-info}/top_level.txt +0 -0
trilogy/parsing/common.py
CHANGED
|
@@ -1,45 +1,51 @@
|
|
|
1
1
|
from datetime import date, datetime
|
|
2
|
-
from typing import List, Tuple
|
|
2
|
+
from typing import Iterable, List, Sequence, Tuple
|
|
3
|
+
|
|
4
|
+
from lark.tree import Meta
|
|
3
5
|
|
|
4
6
|
from trilogy.constants import (
|
|
5
7
|
VIRTUAL_CONCEPT_PREFIX,
|
|
6
8
|
)
|
|
9
|
+
from trilogy.core.constants import ALL_ROWS_CONCEPT
|
|
7
10
|
from trilogy.core.enums import (
|
|
11
|
+
ConceptSource,
|
|
12
|
+
Derivation,
|
|
13
|
+
FunctionClass,
|
|
8
14
|
FunctionType,
|
|
9
15
|
Granularity,
|
|
10
16
|
Modifier,
|
|
11
|
-
|
|
17
|
+
Purpose,
|
|
12
18
|
WindowType,
|
|
13
19
|
)
|
|
14
|
-
from trilogy.core.
|
|
15
|
-
from trilogy.core.
|
|
20
|
+
from trilogy.core.exceptions import InvalidSyntaxException
|
|
21
|
+
from trilogy.core.functions import function_args_to_output_purpose
|
|
22
|
+
from trilogy.core.models.author import (
|
|
16
23
|
AggregateWrapper,
|
|
24
|
+
AlignClause,
|
|
25
|
+
AlignItem,
|
|
17
26
|
Concept,
|
|
18
|
-
|
|
19
|
-
Environment,
|
|
27
|
+
ConceptRef,
|
|
20
28
|
FilterItem,
|
|
21
29
|
Function,
|
|
22
|
-
FunctionClass,
|
|
23
30
|
Grain,
|
|
31
|
+
HavingClause,
|
|
24
32
|
ListWrapper,
|
|
25
33
|
MapWrapper,
|
|
26
|
-
Meta,
|
|
27
34
|
Metadata,
|
|
35
|
+
MultiSelectLineage,
|
|
28
36
|
Parenthetical,
|
|
29
|
-
|
|
37
|
+
RowsetItem,
|
|
38
|
+
RowsetLineage,
|
|
39
|
+
UndefinedConcept,
|
|
40
|
+
WhereClause,
|
|
30
41
|
WindowItem,
|
|
31
42
|
)
|
|
43
|
+
from trilogy.core.models.core import DataType, arg_to_datatype
|
|
44
|
+
from trilogy.core.models.environment import Environment
|
|
45
|
+
from trilogy.core.statements.author import RowsetDerivationStatement, SelectStatement
|
|
32
46
|
from trilogy.utility import string_to_hash, unique
|
|
33
47
|
|
|
34
48
|
|
|
35
|
-
def get_upstream_modifiers(keys: List[Concept]) -> list[Modifier]:
|
|
36
|
-
modifiers = set()
|
|
37
|
-
for pkey in keys:
|
|
38
|
-
if pkey.modifiers:
|
|
39
|
-
modifiers.update(pkey.modifiers)
|
|
40
|
-
return list(modifiers)
|
|
41
|
-
|
|
42
|
-
|
|
43
49
|
def process_function_args(
|
|
44
50
|
args,
|
|
45
51
|
meta: Meta | None,
|
|
@@ -98,20 +104,43 @@ def process_function_args(
|
|
|
98
104
|
return final
|
|
99
105
|
|
|
100
106
|
|
|
107
|
+
def get_upstream_modifiers(
|
|
108
|
+
keys: Sequence[Concept | ConceptRef], environment: Environment
|
|
109
|
+
) -> list[Modifier]:
|
|
110
|
+
modifiers = set()
|
|
111
|
+
for pkey in keys:
|
|
112
|
+
if isinstance(pkey, ConceptRef):
|
|
113
|
+
pkey = environment.concepts[pkey.address]
|
|
114
|
+
if isinstance(pkey, UndefinedConcept):
|
|
115
|
+
continue
|
|
116
|
+
if pkey.modifiers:
|
|
117
|
+
modifiers.update(pkey.modifiers)
|
|
118
|
+
return list(modifiers)
|
|
119
|
+
|
|
120
|
+
|
|
101
121
|
def get_purpose_and_keys(
|
|
102
|
-
purpose: Purpose | None,
|
|
122
|
+
purpose: Purpose | None,
|
|
123
|
+
args: Tuple[ConceptRef | Concept, ...] | None,
|
|
124
|
+
environment: Environment,
|
|
103
125
|
) -> Tuple[Purpose, set[str] | None]:
|
|
104
126
|
local_purpose = purpose or function_args_to_output_purpose(args)
|
|
105
127
|
if local_purpose in (Purpose.PROPERTY, Purpose.METRIC) and args:
|
|
106
|
-
keys = concept_list_to_keys(args)
|
|
128
|
+
keys = concept_list_to_keys(args, environment)
|
|
107
129
|
else:
|
|
108
130
|
keys = None
|
|
109
131
|
return local_purpose, keys
|
|
110
132
|
|
|
111
133
|
|
|
112
|
-
def concept_list_to_keys(
|
|
134
|
+
def concept_list_to_keys(
|
|
135
|
+
concepts: Tuple[Concept | ConceptRef, ...], environment: Environment
|
|
136
|
+
) -> set[str]:
|
|
113
137
|
final_keys: List[str] = []
|
|
114
138
|
for concept in concepts:
|
|
139
|
+
|
|
140
|
+
if isinstance(concept, ConceptRef):
|
|
141
|
+
concept = environment.concepts[concept.address]
|
|
142
|
+
if isinstance(concept, UndefinedConcept):
|
|
143
|
+
continue
|
|
115
144
|
if concept.keys:
|
|
116
145
|
final_keys += list(concept.keys)
|
|
117
146
|
elif concept.purpose != Purpose.PROPERTY:
|
|
@@ -138,30 +167,73 @@ def constant_to_concept(
|
|
|
138
167
|
name=name,
|
|
139
168
|
datatype=const_function.output_datatype,
|
|
140
169
|
purpose=Purpose.CONSTANT,
|
|
170
|
+
granularity=Granularity.SINGLE_ROW,
|
|
171
|
+
derivation=Derivation.CONSTANT,
|
|
141
172
|
lineage=const_function,
|
|
142
|
-
grain=
|
|
173
|
+
grain=Grain(),
|
|
143
174
|
namespace=namespace,
|
|
144
175
|
metadata=fmetadata,
|
|
145
176
|
)
|
|
146
177
|
|
|
147
178
|
|
|
179
|
+
def concept_is_relevant(
|
|
180
|
+
concept: Concept | ConceptRef,
|
|
181
|
+
others: list[Concept | ConceptRef],
|
|
182
|
+
environment: Environment | None = None,
|
|
183
|
+
) -> bool:
|
|
184
|
+
if isinstance(concept, UndefinedConcept):
|
|
185
|
+
|
|
186
|
+
return False
|
|
187
|
+
if isinstance(concept, ConceptRef):
|
|
188
|
+
if environment:
|
|
189
|
+
concept = environment.concepts[concept.address]
|
|
190
|
+
else:
|
|
191
|
+
raise SyntaxError(
|
|
192
|
+
"Require environment to determine relevance of ConceptRef"
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
if concept.is_aggregate and not (
|
|
196
|
+
isinstance(concept.lineage, AggregateWrapper) and concept.lineage.by
|
|
197
|
+
):
|
|
198
|
+
|
|
199
|
+
return False
|
|
200
|
+
if concept.purpose in (Purpose.PROPERTY, Purpose.METRIC) and concept.keys:
|
|
201
|
+
if any([c in others for c in concept.keys]):
|
|
202
|
+
|
|
203
|
+
return False
|
|
204
|
+
if concept.purpose in (Purpose.METRIC,):
|
|
205
|
+
if all([c in others for c in concept.grain.components]):
|
|
206
|
+
return False
|
|
207
|
+
if concept.derivation in (Derivation.BASIC,):
|
|
208
|
+
|
|
209
|
+
return any(
|
|
210
|
+
concept_is_relevant(c, others, environment)
|
|
211
|
+
for c in concept.concept_arguments
|
|
212
|
+
)
|
|
213
|
+
if concept.granularity == Granularity.SINGLE_ROW:
|
|
214
|
+
return False
|
|
215
|
+
return True
|
|
216
|
+
|
|
217
|
+
|
|
148
218
|
def concepts_to_grain_concepts(
|
|
149
|
-
concepts:
|
|
219
|
+
concepts: Iterable[Concept | ConceptRef | str], environment: Environment | None
|
|
150
220
|
) -> list[Concept]:
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
221
|
+
pconcepts: list[Concept] = []
|
|
222
|
+
for c in concepts:
|
|
223
|
+
if isinstance(c, Concept):
|
|
224
|
+
pconcepts.append(c)
|
|
225
|
+
elif isinstance(c, ConceptRef) and environment:
|
|
226
|
+
pconcepts.append(environment.concepts[c.address])
|
|
227
|
+
elif isinstance(c, str) and environment:
|
|
228
|
+
pconcepts.append(environment.concepts[c])
|
|
229
|
+
else:
|
|
230
|
+
raise ValueError(
|
|
231
|
+
f"Unable to resolve input {c} without environment provided to concepts_to_grain call"
|
|
232
|
+
)
|
|
155
233
|
|
|
156
234
|
final: List[Concept] = []
|
|
157
235
|
for sub in pconcepts:
|
|
158
|
-
if
|
|
159
|
-
if any([c in pconcepts for c in sub.keys]):
|
|
160
|
-
continue
|
|
161
|
-
if sub.purpose in (Purpose.METRIC,):
|
|
162
|
-
if all([c in pconcepts for c in sub.grain.components]):
|
|
163
|
-
continue
|
|
164
|
-
if sub.granularity == Granularity.SINGLE_ROW:
|
|
236
|
+
if not concept_is_relevant(sub, pconcepts, environment): # type: ignore
|
|
165
237
|
continue
|
|
166
238
|
final.append(sub)
|
|
167
239
|
final = unique(final, "address")
|
|
@@ -178,19 +250,20 @@ def function_to_concept(
|
|
|
178
250
|
) -> Concept:
|
|
179
251
|
pkeys: List[Concept] = []
|
|
180
252
|
namespace = namespace or environment.namespace
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
253
|
+
concrete_args = [
|
|
254
|
+
x
|
|
255
|
+
for x in [environment.concepts[c.address] for c in parent.concept_arguments]
|
|
256
|
+
if not isinstance(x, UndefinedConcept)
|
|
257
|
+
]
|
|
258
|
+
|
|
259
|
+
pkeys += [x for x in concrete_args if not x.derivation == Derivation.CONSTANT]
|
|
187
260
|
grain: Grain | None = Grain()
|
|
188
261
|
for x in pkeys:
|
|
189
262
|
grain += x.grain
|
|
190
263
|
if parent.operator in FunctionClass.ONE_TO_MANY.value:
|
|
191
264
|
# if the function will create more rows, we don't know what grain this is at
|
|
192
265
|
grain = None
|
|
193
|
-
modifiers = get_upstream_modifiers(pkeys)
|
|
266
|
+
modifiers = get_upstream_modifiers(pkeys, environment)
|
|
194
267
|
key_grain: list[str] = []
|
|
195
268
|
for x in pkeys:
|
|
196
269
|
if x.keys:
|
|
@@ -203,8 +276,37 @@ def function_to_concept(
|
|
|
203
276
|
else:
|
|
204
277
|
purpose = parent.output_purpose
|
|
205
278
|
fmetadata = metadata or Metadata()
|
|
279
|
+
if parent.operator in FunctionClass.AGGREGATE_FUNCTIONS.value:
|
|
280
|
+
derivation = Derivation.AGGREGATE
|
|
281
|
+
if (
|
|
282
|
+
grain
|
|
283
|
+
and grain.components
|
|
284
|
+
and all(x.endswith(ALL_ROWS_CONCEPT) for x in grain.components)
|
|
285
|
+
):
|
|
286
|
+
granularity = Granularity.SINGLE_ROW
|
|
287
|
+
else:
|
|
288
|
+
granularity = Granularity.MULTI_ROW
|
|
289
|
+
elif parent.operator == FunctionType.UNION:
|
|
290
|
+
derivation = Derivation.UNION
|
|
291
|
+
granularity = Granularity.MULTI_ROW
|
|
292
|
+
elif parent.operator == FunctionType.UNNEST:
|
|
293
|
+
derivation = Derivation.UNNEST
|
|
294
|
+
granularity = Granularity.MULTI_ROW
|
|
295
|
+
elif parent.operator in FunctionClass.SINGLE_ROW.value:
|
|
296
|
+
derivation = Derivation.CONSTANT
|
|
297
|
+
granularity = Granularity.SINGLE_ROW
|
|
298
|
+
elif concrete_args and all(
|
|
299
|
+
x.derivation == Derivation.CONSTANT for x in concrete_args
|
|
300
|
+
):
|
|
301
|
+
derivation = Derivation.CONSTANT
|
|
302
|
+
granularity = Granularity.SINGLE_ROW
|
|
303
|
+
else:
|
|
304
|
+
derivation = Derivation.BASIC
|
|
305
|
+
granularity = Granularity.MULTI_ROW
|
|
306
|
+
# granularity = Concept.calculate_granularity(derivation, grain, parent)
|
|
307
|
+
|
|
206
308
|
if grain is not None:
|
|
207
|
-
|
|
309
|
+
r = Concept(
|
|
208
310
|
name=name,
|
|
209
311
|
datatype=parent.output_datatype,
|
|
210
312
|
purpose=purpose,
|
|
@@ -214,7 +316,10 @@ def function_to_concept(
|
|
|
214
316
|
modifiers=modifiers,
|
|
215
317
|
grain=grain,
|
|
216
318
|
metadata=fmetadata,
|
|
319
|
+
derivation=derivation,
|
|
320
|
+
granularity=granularity,
|
|
217
321
|
)
|
|
322
|
+
return r
|
|
218
323
|
|
|
219
324
|
return Concept(
|
|
220
325
|
name=name,
|
|
@@ -225,6 +330,8 @@ def function_to_concept(
|
|
|
225
330
|
keys=keys,
|
|
226
331
|
modifiers=modifiers,
|
|
227
332
|
metadata=fmetadata,
|
|
333
|
+
derivation=derivation,
|
|
334
|
+
granularity=granularity,
|
|
228
335
|
)
|
|
229
336
|
|
|
230
337
|
|
|
@@ -232,32 +339,36 @@ def filter_item_to_concept(
|
|
|
232
339
|
parent: FilterItem,
|
|
233
340
|
name: str,
|
|
234
341
|
namespace: str,
|
|
342
|
+
environment: Environment,
|
|
235
343
|
purpose: Purpose | None = None,
|
|
236
344
|
metadata: Metadata | None = None,
|
|
237
345
|
) -> Concept:
|
|
238
346
|
fmetadata = metadata or Metadata()
|
|
239
|
-
|
|
347
|
+
cparent = environment.concepts[parent.content.address]
|
|
348
|
+
modifiers = get_upstream_modifiers(
|
|
349
|
+
cparent.concept_arguments, environment=environment
|
|
350
|
+
)
|
|
351
|
+
grain = cparent.grain if cparent.purpose == Purpose.PROPERTY else Grain()
|
|
352
|
+
granularity = cparent.granularity
|
|
240
353
|
return Concept(
|
|
241
354
|
name=name,
|
|
242
|
-
datatype=
|
|
355
|
+
datatype=cparent.datatype,
|
|
243
356
|
purpose=Purpose.PROPERTY,
|
|
244
357
|
lineage=parent,
|
|
245
358
|
metadata=fmetadata,
|
|
246
359
|
namespace=namespace,
|
|
247
360
|
# filtered copies cannot inherit keys
|
|
248
361
|
keys=(
|
|
249
|
-
|
|
250
|
-
if
|
|
362
|
+
cparent.keys
|
|
363
|
+
if cparent.purpose == Purpose.PROPERTY
|
|
251
364
|
else {
|
|
252
|
-
|
|
365
|
+
cparent.address,
|
|
253
366
|
}
|
|
254
367
|
),
|
|
255
|
-
grain=
|
|
256
|
-
parent.content.grain
|
|
257
|
-
if parent.content.purpose == Purpose.PROPERTY
|
|
258
|
-
else Grain()
|
|
259
|
-
),
|
|
368
|
+
grain=grain,
|
|
260
369
|
modifiers=modifiers,
|
|
370
|
+
derivation=Derivation.FILTER,
|
|
371
|
+
granularity=granularity,
|
|
261
372
|
)
|
|
262
373
|
|
|
263
374
|
|
|
@@ -265,18 +376,24 @@ def window_item_to_concept(
|
|
|
265
376
|
parent: WindowItem,
|
|
266
377
|
name: str,
|
|
267
378
|
namespace: str,
|
|
379
|
+
environment: Environment,
|
|
268
380
|
purpose: Purpose | None = None,
|
|
269
381
|
metadata: Metadata | None = None,
|
|
270
382
|
) -> Concept:
|
|
271
383
|
fmetadata = metadata or Metadata()
|
|
272
|
-
|
|
384
|
+
bcontent = environment.concepts[parent.content.address]
|
|
385
|
+
if isinstance(bcontent, UndefinedConcept):
|
|
386
|
+
return UndefinedConcept(address=f"{namespace}.{name}", metadata=fmetadata)
|
|
387
|
+
local_purpose, keys = get_purpose_and_keys(purpose, (bcontent,), environment)
|
|
388
|
+
|
|
273
389
|
if parent.order_by:
|
|
274
|
-
|
|
390
|
+
grain_components = parent.over + [bcontent.output]
|
|
275
391
|
for item in parent.order_by:
|
|
276
|
-
|
|
392
|
+
grain_components += item.concept_arguments
|
|
277
393
|
else:
|
|
278
|
-
|
|
279
|
-
|
|
394
|
+
grain_components = parent.over + [bcontent.output]
|
|
395
|
+
final_grain = Grain.from_concepts(grain_components, environment)
|
|
396
|
+
modifiers = get_upstream_modifiers(bcontent.concept_arguments, environment)
|
|
280
397
|
datatype = parent.content.datatype
|
|
281
398
|
if parent.type in (
|
|
282
399
|
WindowType.RANK,
|
|
@@ -285,7 +402,6 @@ def window_item_to_concept(
|
|
|
285
402
|
WindowType.COUNT_DISTINCT,
|
|
286
403
|
):
|
|
287
404
|
datatype = DataType.INTEGER
|
|
288
|
-
|
|
289
405
|
return Concept(
|
|
290
406
|
name=name,
|
|
291
407
|
datatype=datatype,
|
|
@@ -293,10 +409,12 @@ def window_item_to_concept(
|
|
|
293
409
|
lineage=parent,
|
|
294
410
|
metadata=fmetadata,
|
|
295
411
|
# filters are implicitly at the grain of the base item
|
|
296
|
-
grain=
|
|
412
|
+
grain=final_grain,
|
|
297
413
|
namespace=namespace,
|
|
298
414
|
keys=keys,
|
|
299
415
|
modifiers=modifiers,
|
|
416
|
+
derivation=Derivation.WINDOW,
|
|
417
|
+
granularity=bcontent.granularity,
|
|
300
418
|
)
|
|
301
419
|
|
|
302
420
|
|
|
@@ -304,30 +422,135 @@ def agg_wrapper_to_concept(
|
|
|
304
422
|
parent: AggregateWrapper,
|
|
305
423
|
namespace: str,
|
|
306
424
|
name: str,
|
|
425
|
+
environment: Environment,
|
|
307
426
|
metadata: Metadata | None = None,
|
|
308
427
|
) -> Concept:
|
|
309
428
|
_, keys = get_purpose_and_keys(
|
|
310
|
-
Purpose.METRIC, tuple(parent.by) if parent.by else None
|
|
429
|
+
Purpose.METRIC, tuple(parent.by) if parent.by else None, environment=environment
|
|
311
430
|
)
|
|
312
431
|
# anything grouped to a grain should be a property
|
|
313
432
|
# at that grain
|
|
314
433
|
fmetadata = metadata or Metadata()
|
|
315
434
|
aggfunction = parent.function
|
|
316
|
-
modifiers = get_upstream_modifiers(parent.concept_arguments)
|
|
435
|
+
modifiers = get_upstream_modifiers(parent.concept_arguments, environment)
|
|
436
|
+
# derivation = Concept.calculate_derivation(parent, Purpose.PROPERTY)
|
|
437
|
+
grain = Grain.from_concepts(parent.by, environment) if parent.by else Grain()
|
|
438
|
+
granularity = Concept.calculate_granularity(Derivation.AGGREGATE, grain, parent)
|
|
317
439
|
out = Concept(
|
|
318
440
|
name=name,
|
|
319
441
|
datatype=aggfunction.output_datatype,
|
|
320
442
|
purpose=Purpose.METRIC,
|
|
321
443
|
metadata=fmetadata,
|
|
322
444
|
lineage=parent,
|
|
323
|
-
grain=
|
|
445
|
+
grain=grain,
|
|
324
446
|
namespace=namespace,
|
|
325
447
|
keys=set([x.address for x in parent.by]) if parent.by else keys,
|
|
326
448
|
modifiers=modifiers,
|
|
449
|
+
derivation=Derivation.AGGREGATE,
|
|
450
|
+
granularity=granularity,
|
|
327
451
|
)
|
|
328
452
|
return out
|
|
329
453
|
|
|
330
454
|
|
|
455
|
+
def align_item_to_concept(
|
|
456
|
+
parent: AlignItem,
|
|
457
|
+
align_clause: AlignClause,
|
|
458
|
+
selects: list[SelectStatement],
|
|
459
|
+
environment: Environment,
|
|
460
|
+
where: WhereClause | None = None,
|
|
461
|
+
having: HavingClause | None = None,
|
|
462
|
+
limit: int | None = None,
|
|
463
|
+
) -> Concept:
|
|
464
|
+
align = parent
|
|
465
|
+
datatypes = set([c.datatype for c in align.concepts])
|
|
466
|
+
if len(datatypes) > 1:
|
|
467
|
+
raise InvalidSyntaxException(
|
|
468
|
+
f"Datatypes do not align for merged statements {align.alias}, have {datatypes}"
|
|
469
|
+
)
|
|
470
|
+
|
|
471
|
+
new_selects = [x.as_lineage(environment) for x in selects]
|
|
472
|
+
multi_lineage = MultiSelectLineage(
|
|
473
|
+
selects=new_selects,
|
|
474
|
+
align=align_clause,
|
|
475
|
+
namespace=align.namespace,
|
|
476
|
+
where_clause=where,
|
|
477
|
+
having_clause=having,
|
|
478
|
+
limit=limit,
|
|
479
|
+
hidden_components=set(y for x in new_selects for y in x.hidden_components),
|
|
480
|
+
)
|
|
481
|
+
grain = Grain()
|
|
482
|
+
new = Concept(
|
|
483
|
+
name=align.alias,
|
|
484
|
+
datatype=datatypes.pop(),
|
|
485
|
+
purpose=Purpose.PROPERTY,
|
|
486
|
+
lineage=multi_lineage,
|
|
487
|
+
grain=grain,
|
|
488
|
+
namespace=align.namespace,
|
|
489
|
+
granularity=Granularity.MULTI_ROW,
|
|
490
|
+
derivation=Derivation.MULTISELECT,
|
|
491
|
+
keys=set(x.address for x in align.concepts),
|
|
492
|
+
)
|
|
493
|
+
return new
|
|
494
|
+
|
|
495
|
+
|
|
496
|
+
def rowset_to_concepts(rowset: RowsetDerivationStatement, environment: Environment):
|
|
497
|
+
pre_output: list[Concept] = []
|
|
498
|
+
orig: dict[str, Concept] = {}
|
|
499
|
+
orig_map: dict[str, Concept] = {}
|
|
500
|
+
for orig_address in rowset.select.output_components:
|
|
501
|
+
orig_concept = environment.concepts[orig_address.address]
|
|
502
|
+
name = orig_concept.name
|
|
503
|
+
if isinstance(orig_concept.lineage, FilterItem):
|
|
504
|
+
if orig_concept.lineage.where == rowset.select.where_clause:
|
|
505
|
+
name = environment.concepts[orig_concept.lineage.content.address].name
|
|
506
|
+
|
|
507
|
+
new_concept = Concept(
|
|
508
|
+
name=name,
|
|
509
|
+
datatype=orig_concept.datatype,
|
|
510
|
+
purpose=orig_concept.purpose,
|
|
511
|
+
lineage=None,
|
|
512
|
+
grain=orig_concept.grain,
|
|
513
|
+
# TODO: add proper metadata
|
|
514
|
+
metadata=Metadata(concept_source=ConceptSource.CTE),
|
|
515
|
+
namespace=(
|
|
516
|
+
f"{rowset.name}.{orig_concept.namespace}"
|
|
517
|
+
if orig_concept.namespace != rowset.namespace
|
|
518
|
+
else rowset.name
|
|
519
|
+
),
|
|
520
|
+
keys=orig_concept.keys,
|
|
521
|
+
derivation=Derivation.ROWSET,
|
|
522
|
+
granularity=orig_concept.granularity,
|
|
523
|
+
)
|
|
524
|
+
orig[orig_concept.address] = new_concept
|
|
525
|
+
orig_map[new_concept.address] = orig_concept
|
|
526
|
+
pre_output.append(new_concept)
|
|
527
|
+
select_lineage = rowset.select.as_lineage(environment)
|
|
528
|
+
for x in pre_output:
|
|
529
|
+
x.lineage = RowsetItem(
|
|
530
|
+
content=orig_map[x.address].reference,
|
|
531
|
+
# where=rowset.select.where_clause,
|
|
532
|
+
rowset=RowsetLineage(
|
|
533
|
+
name=rowset.name,
|
|
534
|
+
derived_concepts=[x.reference for x in pre_output],
|
|
535
|
+
select=select_lineage,
|
|
536
|
+
),
|
|
537
|
+
)
|
|
538
|
+
default_grain = Grain.from_concepts([*pre_output])
|
|
539
|
+
# remap everything to the properties of the rowset
|
|
540
|
+
for x in pre_output:
|
|
541
|
+
if x.keys:
|
|
542
|
+
if all([k in orig for k in x.keys]):
|
|
543
|
+
x.keys = set([orig[k].address if k in orig else k for k in x.keys])
|
|
544
|
+
else:
|
|
545
|
+
# TODO: fix this up
|
|
546
|
+
x.keys = set()
|
|
547
|
+
if all([c in orig for c in x.grain.components]):
|
|
548
|
+
x.grain = Grain(components={orig[c].address for c in x.grain.components})
|
|
549
|
+
else:
|
|
550
|
+
x.grain = default_grain
|
|
551
|
+
return pre_output
|
|
552
|
+
|
|
553
|
+
|
|
331
554
|
def arbitrary_to_concept(
|
|
332
555
|
parent: (
|
|
333
556
|
AggregateWrapper
|
|
@@ -350,15 +573,31 @@ def arbitrary_to_concept(
|
|
|
350
573
|
if isinstance(parent, AggregateWrapper):
|
|
351
574
|
if not name:
|
|
352
575
|
name = f"{VIRTUAL_CONCEPT_PREFIX}_agg_{parent.function.operator.value}_{string_to_hash(str(parent))}"
|
|
353
|
-
return agg_wrapper_to_concept(
|
|
576
|
+
return agg_wrapper_to_concept(
|
|
577
|
+
parent, namespace, name, metadata=metadata, environment=environment
|
|
578
|
+
)
|
|
354
579
|
elif isinstance(parent, WindowItem):
|
|
355
580
|
if not name:
|
|
356
581
|
name = f"{VIRTUAL_CONCEPT_PREFIX}_window_{parent.type.value}_{string_to_hash(str(parent))}"
|
|
357
|
-
return window_item_to_concept(
|
|
582
|
+
return window_item_to_concept(
|
|
583
|
+
parent,
|
|
584
|
+
name,
|
|
585
|
+
namespace,
|
|
586
|
+
environment=environment,
|
|
587
|
+
purpose=purpose,
|
|
588
|
+
metadata=metadata,
|
|
589
|
+
)
|
|
358
590
|
elif isinstance(parent, FilterItem):
|
|
359
591
|
if not name:
|
|
360
592
|
name = f"{VIRTUAL_CONCEPT_PREFIX}_filter_{parent.content.name}_{string_to_hash(str(parent))}"
|
|
361
|
-
return filter_item_to_concept(
|
|
593
|
+
return filter_item_to_concept(
|
|
594
|
+
parent,
|
|
595
|
+
name,
|
|
596
|
+
namespace,
|
|
597
|
+
environment=environment,
|
|
598
|
+
purpose=purpose,
|
|
599
|
+
metadata=metadata,
|
|
600
|
+
)
|
|
362
601
|
elif isinstance(parent, Function):
|
|
363
602
|
if not name:
|
|
364
603
|
name = f"{VIRTUAL_CONCEPT_PREFIX}_func_{parent.operator.value}_{string_to_hash(str(parent))}"
|