pytrilogy 0.3.138__cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- LICENSE.md +19 -0
- _preql_import_resolver/__init__.py +5 -0
- _preql_import_resolver/_preql_import_resolver.cpython-311-x86_64-linux-gnu.so +0 -0
- pytrilogy-0.3.138.dist-info/METADATA +525 -0
- pytrilogy-0.3.138.dist-info/RECORD +182 -0
- pytrilogy-0.3.138.dist-info/WHEEL +5 -0
- pytrilogy-0.3.138.dist-info/entry_points.txt +2 -0
- pytrilogy-0.3.138.dist-info/licenses/LICENSE.md +19 -0
- trilogy/__init__.py +9 -0
- trilogy/ai/README.md +10 -0
- trilogy/ai/__init__.py +19 -0
- trilogy/ai/constants.py +92 -0
- trilogy/ai/conversation.py +107 -0
- trilogy/ai/enums.py +7 -0
- trilogy/ai/execute.py +50 -0
- trilogy/ai/models.py +34 -0
- trilogy/ai/prompts.py +87 -0
- trilogy/ai/providers/__init__.py +0 -0
- trilogy/ai/providers/anthropic.py +106 -0
- trilogy/ai/providers/base.py +24 -0
- trilogy/ai/providers/google.py +146 -0
- trilogy/ai/providers/openai.py +89 -0
- trilogy/ai/providers/utils.py +68 -0
- trilogy/authoring/README.md +3 -0
- trilogy/authoring/__init__.py +143 -0
- trilogy/constants.py +113 -0
- trilogy/core/README.md +52 -0
- trilogy/core/__init__.py +0 -0
- trilogy/core/constants.py +6 -0
- trilogy/core/enums.py +443 -0
- trilogy/core/env_processor.py +120 -0
- trilogy/core/environment_helpers.py +320 -0
- trilogy/core/ergonomics.py +193 -0
- trilogy/core/exceptions.py +123 -0
- trilogy/core/functions.py +1227 -0
- trilogy/core/graph_models.py +139 -0
- trilogy/core/internal.py +85 -0
- trilogy/core/models/__init__.py +0 -0
- trilogy/core/models/author.py +2672 -0
- trilogy/core/models/build.py +2521 -0
- trilogy/core/models/build_environment.py +180 -0
- trilogy/core/models/core.py +494 -0
- trilogy/core/models/datasource.py +322 -0
- trilogy/core/models/environment.py +748 -0
- trilogy/core/models/execute.py +1177 -0
- trilogy/core/optimization.py +251 -0
- trilogy/core/optimizations/__init__.py +12 -0
- trilogy/core/optimizations/base_optimization.py +17 -0
- trilogy/core/optimizations/hide_unused_concept.py +47 -0
- trilogy/core/optimizations/inline_datasource.py +102 -0
- trilogy/core/optimizations/predicate_pushdown.py +245 -0
- trilogy/core/processing/README.md +94 -0
- trilogy/core/processing/READMEv2.md +121 -0
- trilogy/core/processing/VIRTUAL_UNNEST.md +30 -0
- trilogy/core/processing/__init__.py +0 -0
- trilogy/core/processing/concept_strategies_v3.py +508 -0
- trilogy/core/processing/constants.py +15 -0
- trilogy/core/processing/discovery_node_factory.py +451 -0
- trilogy/core/processing/discovery_utility.py +517 -0
- trilogy/core/processing/discovery_validation.py +167 -0
- trilogy/core/processing/graph_utils.py +43 -0
- trilogy/core/processing/node_generators/README.md +9 -0
- trilogy/core/processing/node_generators/__init__.py +31 -0
- trilogy/core/processing/node_generators/basic_node.py +160 -0
- trilogy/core/processing/node_generators/common.py +268 -0
- trilogy/core/processing/node_generators/constant_node.py +38 -0
- trilogy/core/processing/node_generators/filter_node.py +315 -0
- trilogy/core/processing/node_generators/group_node.py +213 -0
- trilogy/core/processing/node_generators/group_to_node.py +117 -0
- trilogy/core/processing/node_generators/multiselect_node.py +205 -0
- trilogy/core/processing/node_generators/node_merge_node.py +653 -0
- trilogy/core/processing/node_generators/recursive_node.py +88 -0
- trilogy/core/processing/node_generators/rowset_node.py +165 -0
- trilogy/core/processing/node_generators/select_helpers/__init__.py +0 -0
- trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +261 -0
- trilogy/core/processing/node_generators/select_merge_node.py +748 -0
- trilogy/core/processing/node_generators/select_node.py +95 -0
- trilogy/core/processing/node_generators/synonym_node.py +98 -0
- trilogy/core/processing/node_generators/union_node.py +91 -0
- trilogy/core/processing/node_generators/unnest_node.py +182 -0
- trilogy/core/processing/node_generators/window_node.py +201 -0
- trilogy/core/processing/nodes/README.md +28 -0
- trilogy/core/processing/nodes/__init__.py +179 -0
- trilogy/core/processing/nodes/base_node.py +519 -0
- trilogy/core/processing/nodes/filter_node.py +75 -0
- trilogy/core/processing/nodes/group_node.py +194 -0
- trilogy/core/processing/nodes/merge_node.py +420 -0
- trilogy/core/processing/nodes/recursive_node.py +46 -0
- trilogy/core/processing/nodes/select_node_v2.py +242 -0
- trilogy/core/processing/nodes/union_node.py +53 -0
- trilogy/core/processing/nodes/unnest_node.py +62 -0
- trilogy/core/processing/nodes/window_node.py +56 -0
- trilogy/core/processing/utility.py +823 -0
- trilogy/core/query_processor.py +596 -0
- trilogy/core/statements/README.md +35 -0
- trilogy/core/statements/__init__.py +0 -0
- trilogy/core/statements/author.py +536 -0
- trilogy/core/statements/build.py +0 -0
- trilogy/core/statements/common.py +20 -0
- trilogy/core/statements/execute.py +155 -0
- trilogy/core/table_processor.py +66 -0
- trilogy/core/utility.py +8 -0
- trilogy/core/validation/README.md +46 -0
- trilogy/core/validation/__init__.py +0 -0
- trilogy/core/validation/common.py +161 -0
- trilogy/core/validation/concept.py +146 -0
- trilogy/core/validation/datasource.py +227 -0
- trilogy/core/validation/environment.py +73 -0
- trilogy/core/validation/fix.py +106 -0
- trilogy/dialect/__init__.py +32 -0
- trilogy/dialect/base.py +1359 -0
- trilogy/dialect/bigquery.py +256 -0
- trilogy/dialect/common.py +147 -0
- trilogy/dialect/config.py +144 -0
- trilogy/dialect/dataframe.py +50 -0
- trilogy/dialect/duckdb.py +177 -0
- trilogy/dialect/enums.py +147 -0
- trilogy/dialect/metadata.py +173 -0
- trilogy/dialect/mock.py +190 -0
- trilogy/dialect/postgres.py +91 -0
- trilogy/dialect/presto.py +104 -0
- trilogy/dialect/results.py +89 -0
- trilogy/dialect/snowflake.py +90 -0
- trilogy/dialect/sql_server.py +92 -0
- trilogy/engine.py +48 -0
- trilogy/execution/config.py +75 -0
- trilogy/executor.py +568 -0
- trilogy/hooks/__init__.py +4 -0
- trilogy/hooks/base_hook.py +40 -0
- trilogy/hooks/graph_hook.py +139 -0
- trilogy/hooks/query_debugger.py +166 -0
- trilogy/metadata/__init__.py +0 -0
- trilogy/parser.py +10 -0
- trilogy/parsing/README.md +21 -0
- trilogy/parsing/__init__.py +0 -0
- trilogy/parsing/common.py +1069 -0
- trilogy/parsing/config.py +5 -0
- trilogy/parsing/exceptions.py +8 -0
- trilogy/parsing/helpers.py +1 -0
- trilogy/parsing/parse_engine.py +2813 -0
- trilogy/parsing/render.py +750 -0
- trilogy/parsing/trilogy.lark +540 -0
- trilogy/py.typed +0 -0
- trilogy/render.py +42 -0
- trilogy/scripts/README.md +7 -0
- trilogy/scripts/__init__.py +0 -0
- trilogy/scripts/dependency/Cargo.lock +617 -0
- trilogy/scripts/dependency/Cargo.toml +39 -0
- trilogy/scripts/dependency/README.md +131 -0
- trilogy/scripts/dependency/build.sh +25 -0
- trilogy/scripts/dependency/src/directory_resolver.rs +162 -0
- trilogy/scripts/dependency/src/lib.rs +16 -0
- trilogy/scripts/dependency/src/main.rs +770 -0
- trilogy/scripts/dependency/src/parser.rs +435 -0
- trilogy/scripts/dependency/src/preql.pest +208 -0
- trilogy/scripts/dependency/src/python_bindings.rs +289 -0
- trilogy/scripts/dependency/src/resolver.rs +716 -0
- trilogy/scripts/dependency/tests/base.preql +3 -0
- trilogy/scripts/dependency/tests/cli_integration.rs +377 -0
- trilogy/scripts/dependency/tests/customer.preql +6 -0
- trilogy/scripts/dependency/tests/main.preql +9 -0
- trilogy/scripts/dependency/tests/orders.preql +7 -0
- trilogy/scripts/dependency/tests/test_data/base.preql +9 -0
- trilogy/scripts/dependency/tests/test_data/consumer.preql +1 -0
- trilogy/scripts/dependency.py +323 -0
- trilogy/scripts/display.py +460 -0
- trilogy/scripts/environment.py +46 -0
- trilogy/scripts/parallel_execution.py +483 -0
- trilogy/scripts/single_execution.py +131 -0
- trilogy/scripts/trilogy.py +772 -0
- trilogy/std/__init__.py +0 -0
- trilogy/std/color.preql +3 -0
- trilogy/std/date.preql +13 -0
- trilogy/std/display.preql +18 -0
- trilogy/std/geography.preql +22 -0
- trilogy/std/metric.preql +15 -0
- trilogy/std/money.preql +67 -0
- trilogy/std/net.preql +14 -0
- trilogy/std/ranking.preql +7 -0
- trilogy/std/report.preql +5 -0
- trilogy/std/semantic.preql +6 -0
- trilogy/utility.py +34 -0
|
@@ -0,0 +1,1069 @@
|
|
|
1
|
+
from datetime import date, datetime
|
|
2
|
+
from typing import Iterable, List, Sequence, Tuple
|
|
3
|
+
|
|
4
|
+
from lark.tree import Meta
|
|
5
|
+
|
|
6
|
+
from trilogy.constants import DEFAULT_NAMESPACE, VIRTUAL_CONCEPT_PREFIX
|
|
7
|
+
from trilogy.core.constants import ALL_ROWS_CONCEPT
|
|
8
|
+
from trilogy.core.enums import (
|
|
9
|
+
ConceptSource,
|
|
10
|
+
Derivation,
|
|
11
|
+
FunctionClass,
|
|
12
|
+
FunctionType,
|
|
13
|
+
Granularity,
|
|
14
|
+
Modifier,
|
|
15
|
+
Purpose,
|
|
16
|
+
WindowType,
|
|
17
|
+
)
|
|
18
|
+
from trilogy.core.exceptions import InvalidSyntaxException
|
|
19
|
+
from trilogy.core.functions import function_args_to_output_purpose
|
|
20
|
+
from trilogy.core.models.author import (
|
|
21
|
+
AggregateWrapper,
|
|
22
|
+
AlignClause,
|
|
23
|
+
AlignItem,
|
|
24
|
+
CaseElse,
|
|
25
|
+
CaseWhen,
|
|
26
|
+
Comparison,
|
|
27
|
+
Concept,
|
|
28
|
+
ConceptArgs,
|
|
29
|
+
ConceptRef,
|
|
30
|
+
Conditional,
|
|
31
|
+
FilterItem,
|
|
32
|
+
Function,
|
|
33
|
+
FunctionCallWrapper,
|
|
34
|
+
Grain,
|
|
35
|
+
HavingClause,
|
|
36
|
+
ListWrapper,
|
|
37
|
+
MapWrapper,
|
|
38
|
+
Metadata,
|
|
39
|
+
MultiSelectLineage,
|
|
40
|
+
Parenthetical,
|
|
41
|
+
RowsetItem,
|
|
42
|
+
RowsetLineage,
|
|
43
|
+
SubselectComparison,
|
|
44
|
+
TraitDataType,
|
|
45
|
+
TupleWrapper,
|
|
46
|
+
UndefinedConcept,
|
|
47
|
+
WhereClause,
|
|
48
|
+
WindowItem,
|
|
49
|
+
address_with_namespace,
|
|
50
|
+
)
|
|
51
|
+
from trilogy.core.models.core import DataType, arg_to_datatype
|
|
52
|
+
from trilogy.core.models.environment import Environment
|
|
53
|
+
from trilogy.core.statements.author import RowsetDerivationStatement, SelectStatement
|
|
54
|
+
from trilogy.utility import string_to_hash, unique
|
|
55
|
+
|
|
56
|
+
ARBITRARY_INPUTS = (
|
|
57
|
+
AggregateWrapper
|
|
58
|
+
| FunctionCallWrapper
|
|
59
|
+
| WindowItem
|
|
60
|
+
| FilterItem
|
|
61
|
+
| Function
|
|
62
|
+
| Parenthetical
|
|
63
|
+
| ListWrapper
|
|
64
|
+
| MapWrapper
|
|
65
|
+
| Comparison
|
|
66
|
+
| int
|
|
67
|
+
| float
|
|
68
|
+
| str
|
|
69
|
+
| date
|
|
70
|
+
| bool
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def process_function_arg(
|
|
75
|
+
arg,
|
|
76
|
+
meta: Meta | None,
|
|
77
|
+
environment: Environment,
|
|
78
|
+
):
|
|
79
|
+
# if a function has an anonymous function argument
|
|
80
|
+
# create an implicit concept
|
|
81
|
+
if isinstance(arg, Parenthetical):
|
|
82
|
+
processed = process_function_args([arg.content], meta, environment)
|
|
83
|
+
return Function(
|
|
84
|
+
operator=FunctionType.PARENTHETICAL,
|
|
85
|
+
arguments=processed,
|
|
86
|
+
output_datatype=arg_to_datatype(processed[0]),
|
|
87
|
+
output_purpose=function_args_to_output_purpose(processed, environment),
|
|
88
|
+
)
|
|
89
|
+
elif isinstance(arg, Function):
|
|
90
|
+
# if it's not an aggregate function, we can skip the virtual concepts
|
|
91
|
+
# to simplify anonymous function handling
|
|
92
|
+
if (
|
|
93
|
+
arg.operator not in FunctionClass.AGGREGATE_FUNCTIONS.value
|
|
94
|
+
and arg.operator not in FunctionClass.ONE_TO_MANY.value
|
|
95
|
+
):
|
|
96
|
+
return arg
|
|
97
|
+
id_hash = string_to_hash(str(arg))
|
|
98
|
+
name = f"{VIRTUAL_CONCEPT_PREFIX}_{arg.operator.value}_{id_hash}"
|
|
99
|
+
if f"{environment.namespace}.{name}" in environment.concepts:
|
|
100
|
+
return environment.concepts[f"{environment.namespace}.{name}"]
|
|
101
|
+
concept = function_to_concept(
|
|
102
|
+
arg,
|
|
103
|
+
name=name,
|
|
104
|
+
environment=environment,
|
|
105
|
+
)
|
|
106
|
+
# to satisfy mypy, concept will always have metadata
|
|
107
|
+
if concept.metadata and meta:
|
|
108
|
+
concept.metadata.line_number = meta.line
|
|
109
|
+
environment.add_concept(concept, meta=meta)
|
|
110
|
+
return concept.reference
|
|
111
|
+
elif isinstance(
|
|
112
|
+
arg,
|
|
113
|
+
(ListWrapper, MapWrapper),
|
|
114
|
+
):
|
|
115
|
+
id_hash = string_to_hash(str(arg))
|
|
116
|
+
name = f"{VIRTUAL_CONCEPT_PREFIX}_{id_hash}"
|
|
117
|
+
if f"{environment.namespace}.{name}" in environment.concepts:
|
|
118
|
+
return environment.concepts[f"{environment.namespace}.{name}"]
|
|
119
|
+
concept = arbitrary_to_concept(
|
|
120
|
+
arg,
|
|
121
|
+
name=name,
|
|
122
|
+
environment=environment,
|
|
123
|
+
)
|
|
124
|
+
if concept.metadata and meta:
|
|
125
|
+
concept.metadata.line_number = meta.line
|
|
126
|
+
environment.add_concept(concept, meta=meta)
|
|
127
|
+
return concept.reference
|
|
128
|
+
elif isinstance(arg, Concept):
|
|
129
|
+
return arg.reference
|
|
130
|
+
elif isinstance(arg, ConceptRef):
|
|
131
|
+
return environment.concepts[arg.address].reference
|
|
132
|
+
return arg
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def process_function_args(
|
|
136
|
+
args,
|
|
137
|
+
meta: Meta | None,
|
|
138
|
+
environment: Environment,
|
|
139
|
+
) -> List[ConceptRef | Function | str | int | float | date | datetime]:
|
|
140
|
+
final: List[ConceptRef | Function | str | int | float | date | datetime] = []
|
|
141
|
+
for arg in args:
|
|
142
|
+
final.append(process_function_arg(arg, meta, environment))
|
|
143
|
+
return final
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def get_upstream_modifiers(
|
|
147
|
+
keys: Sequence[Concept | ConceptRef], environment: Environment
|
|
148
|
+
) -> list[Modifier]:
|
|
149
|
+
modifiers = set()
|
|
150
|
+
for pkey in keys:
|
|
151
|
+
if isinstance(pkey, ConceptRef):
|
|
152
|
+
pkey = environment.concepts[pkey.address]
|
|
153
|
+
if isinstance(pkey, UndefinedConcept):
|
|
154
|
+
continue
|
|
155
|
+
if pkey.modifiers:
|
|
156
|
+
modifiers.update(pkey.modifiers)
|
|
157
|
+
return list(modifiers)
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def get_purpose_and_keys(
|
|
161
|
+
purpose: Purpose | None,
|
|
162
|
+
args: Tuple[ConceptRef | Concept, ...] | None,
|
|
163
|
+
environment: Environment,
|
|
164
|
+
) -> Tuple[Purpose, set[str] | None]:
|
|
165
|
+
local_purpose = purpose or function_args_to_output_purpose(args, environment)
|
|
166
|
+
if local_purpose in (Purpose.PROPERTY, Purpose.METRIC) and args:
|
|
167
|
+
keys = concept_list_to_keys(args, environment)
|
|
168
|
+
else:
|
|
169
|
+
keys = None
|
|
170
|
+
return local_purpose, keys
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def concept_list_to_keys(
|
|
174
|
+
concepts: Tuple[Concept | ConceptRef, ...], environment: Environment
|
|
175
|
+
) -> set[str]:
|
|
176
|
+
final_keys: List[str] = []
|
|
177
|
+
for concept in concepts:
|
|
178
|
+
|
|
179
|
+
if isinstance(concept, ConceptRef):
|
|
180
|
+
concept = environment.concepts[concept.address]
|
|
181
|
+
if isinstance(concept, UndefinedConcept):
|
|
182
|
+
continue
|
|
183
|
+
if concept.keys:
|
|
184
|
+
final_keys += list(concept.keys)
|
|
185
|
+
elif concept.purpose != Purpose.PROPERTY:
|
|
186
|
+
final_keys.append(concept.address)
|
|
187
|
+
return set(final_keys)
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def constant_to_concept(
|
|
191
|
+
parent: (
|
|
192
|
+
ListWrapper | TupleWrapper | MapWrapper | int | float | str | date | datetime
|
|
193
|
+
),
|
|
194
|
+
name: str,
|
|
195
|
+
namespace: str,
|
|
196
|
+
metadata: Metadata | None = None,
|
|
197
|
+
) -> Concept:
|
|
198
|
+
const_function: Function = Function(
|
|
199
|
+
operator=FunctionType.CONSTANT,
|
|
200
|
+
output_datatype=arg_to_datatype(parent),
|
|
201
|
+
output_purpose=Purpose.CONSTANT,
|
|
202
|
+
arguments=[parent],
|
|
203
|
+
)
|
|
204
|
+
# assert const_function.arguments[0] == parent, f'{const_function.arguments[0]} != {parent}, {type(const_function.arguments[0])} != {type(parent)}'
|
|
205
|
+
fmetadata = metadata or Metadata()
|
|
206
|
+
return Concept(
|
|
207
|
+
name=name,
|
|
208
|
+
datatype=const_function.output_datatype,
|
|
209
|
+
purpose=Purpose.CONSTANT,
|
|
210
|
+
granularity=Granularity.SINGLE_ROW,
|
|
211
|
+
derivation=Derivation.CONSTANT,
|
|
212
|
+
lineage=const_function,
|
|
213
|
+
grain=Grain(),
|
|
214
|
+
namespace=namespace,
|
|
215
|
+
metadata=fmetadata,
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def atom_is_relevant(
|
|
220
|
+
atom,
|
|
221
|
+
others: list[Concept | ConceptRef],
|
|
222
|
+
environment: Environment | None = None,
|
|
223
|
+
):
|
|
224
|
+
|
|
225
|
+
if isinstance(atom, (ConceptRef, Concept)):
|
|
226
|
+
# when we are looking at atoms, if there is a concept that is in others
|
|
227
|
+
# return directly
|
|
228
|
+
if atom.address in others:
|
|
229
|
+
return False
|
|
230
|
+
return concept_is_relevant(atom, others, environment)
|
|
231
|
+
|
|
232
|
+
if isinstance(atom, AggregateWrapper) and not atom.by:
|
|
233
|
+
return False
|
|
234
|
+
elif isinstance(atom, AggregateWrapper):
|
|
235
|
+
return any(atom_is_relevant(x, others, environment) for x in atom.by)
|
|
236
|
+
|
|
237
|
+
elif isinstance(atom, Function):
|
|
238
|
+
relevant = False
|
|
239
|
+
for arg in atom.arguments:
|
|
240
|
+
|
|
241
|
+
relevant = relevant or atom_is_relevant(arg, others, environment)
|
|
242
|
+
return relevant
|
|
243
|
+
elif isinstance(atom, FunctionCallWrapper):
|
|
244
|
+
return any(
|
|
245
|
+
[atom_is_relevant(atom.content, others, environment)]
|
|
246
|
+
+ [atom_is_relevant(x, others, environment) for x in atom.args]
|
|
247
|
+
)
|
|
248
|
+
elif isinstance(atom, CaseWhen):
|
|
249
|
+
rval = atom_is_relevant(atom.expr, others, environment) or atom_is_relevant(
|
|
250
|
+
atom.comparison, others, environment
|
|
251
|
+
)
|
|
252
|
+
return rval
|
|
253
|
+
elif isinstance(atom, CaseElse):
|
|
254
|
+
|
|
255
|
+
rval = atom_is_relevant(atom.expr, others, environment)
|
|
256
|
+
return rval
|
|
257
|
+
elif isinstance(atom, SubselectComparison):
|
|
258
|
+
return atom_is_relevant(atom.left, others, environment)
|
|
259
|
+
elif isinstance(atom, Comparison):
|
|
260
|
+
return atom_is_relevant(atom.left, others, environment) or atom_is_relevant(
|
|
261
|
+
atom.right, others, environment
|
|
262
|
+
)
|
|
263
|
+
elif isinstance(atom, Conditional):
|
|
264
|
+
return atom_is_relevant(atom.left, others, environment) or atom_is_relevant(
|
|
265
|
+
atom.right, others, environment
|
|
266
|
+
)
|
|
267
|
+
elif isinstance(atom, Parenthetical):
|
|
268
|
+
return atom_is_relevant(atom.content, others, environment)
|
|
269
|
+
elif isinstance(atom, ConceptArgs):
|
|
270
|
+
# use atom is relevant here to trigger the early exit behavior for concepts in set
|
|
271
|
+
return any(
|
|
272
|
+
[atom_is_relevant(x, others, environment) for x in atom.concept_arguments]
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
return False
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
def concept_is_relevant(
|
|
279
|
+
concept: Concept | ConceptRef,
|
|
280
|
+
others: list[Concept | ConceptRef],
|
|
281
|
+
environment: Environment | None = None,
|
|
282
|
+
) -> bool:
|
|
283
|
+
if isinstance(concept, UndefinedConcept):
|
|
284
|
+
return False
|
|
285
|
+
if concept.datatype == DataType.UNKNOWN:
|
|
286
|
+
return False
|
|
287
|
+
|
|
288
|
+
if isinstance(concept, ConceptRef):
|
|
289
|
+
if environment:
|
|
290
|
+
concept = environment.concepts[concept.address]
|
|
291
|
+
else:
|
|
292
|
+
raise SyntaxError(
|
|
293
|
+
"Require environment to determine relevance of ConceptRef"
|
|
294
|
+
)
|
|
295
|
+
if concept.derivation == Derivation.CONSTANT:
|
|
296
|
+
return False
|
|
297
|
+
if concept.is_aggregate and not (
|
|
298
|
+
isinstance(concept.lineage, AggregateWrapper) and concept.lineage.by
|
|
299
|
+
):
|
|
300
|
+
|
|
301
|
+
return False
|
|
302
|
+
if concept.purpose in (Purpose.PROPERTY, Purpose.METRIC) and concept.keys:
|
|
303
|
+
if all([c in others for c in concept.keys]):
|
|
304
|
+
return False
|
|
305
|
+
if (
|
|
306
|
+
concept.purpose == Purpose.KEY
|
|
307
|
+
and concept.keys
|
|
308
|
+
and all([c in others for c in concept.keys])
|
|
309
|
+
):
|
|
310
|
+
return False
|
|
311
|
+
if concept.purpose in (Purpose.METRIC,):
|
|
312
|
+
if all([c in others for c in concept.grain.components]):
|
|
313
|
+
return False
|
|
314
|
+
if (
|
|
315
|
+
concept.derivation in (Derivation.BASIC,)
|
|
316
|
+
and isinstance(concept.lineage, Function)
|
|
317
|
+
and concept.lineage.operator == FunctionType.DATE_SPINE
|
|
318
|
+
):
|
|
319
|
+
return True
|
|
320
|
+
if concept.derivation in (Derivation.BASIC,) and isinstance(
|
|
321
|
+
concept.lineage, (Function, CaseWhen)
|
|
322
|
+
):
|
|
323
|
+
relevant = False
|
|
324
|
+
for arg in concept.lineage.arguments:
|
|
325
|
+
relevant = atom_is_relevant(arg, others, environment) or relevant
|
|
326
|
+
return relevant
|
|
327
|
+
if concept.derivation in (Derivation.BASIC,) and isinstance(
|
|
328
|
+
concept.lineage, Parenthetical
|
|
329
|
+
):
|
|
330
|
+
return atom_is_relevant(concept.lineage.content, others, environment)
|
|
331
|
+
|
|
332
|
+
if concept.granularity == Granularity.SINGLE_ROW:
|
|
333
|
+
return False
|
|
334
|
+
return True
|
|
335
|
+
|
|
336
|
+
|
|
337
|
+
def concepts_to_grain_concepts(
|
|
338
|
+
concepts: Iterable[Concept | ConceptRef | str],
|
|
339
|
+
environment: Environment | None,
|
|
340
|
+
local_concepts: dict[str, Concept] | None = None,
|
|
341
|
+
) -> set[str]:
|
|
342
|
+
preconcepts: list[Concept] = []
|
|
343
|
+
for c in concepts:
|
|
344
|
+
if isinstance(c, Concept):
|
|
345
|
+
preconcepts.append(c)
|
|
346
|
+
|
|
347
|
+
elif isinstance(c, ConceptRef) and environment:
|
|
348
|
+
if local_concepts and c.address in local_concepts:
|
|
349
|
+
preconcepts.append(local_concepts[c.address])
|
|
350
|
+
else:
|
|
351
|
+
preconcepts.append(environment.concepts[c.address])
|
|
352
|
+
elif isinstance(c, str) and environment:
|
|
353
|
+
if local_concepts and c in local_concepts:
|
|
354
|
+
preconcepts.append(local_concepts[c])
|
|
355
|
+
else:
|
|
356
|
+
preconcepts.append(environment.concepts[c])
|
|
357
|
+
else:
|
|
358
|
+
raise ValueError(
|
|
359
|
+
f"Unable to resolve input {c} without environment provided to concepts_to_grain call"
|
|
360
|
+
)
|
|
361
|
+
pconcepts: list[Concept] = []
|
|
362
|
+
for x in preconcepts:
|
|
363
|
+
if (
|
|
364
|
+
x.lineage
|
|
365
|
+
and isinstance(x.lineage, Function)
|
|
366
|
+
and x.lineage.operator == FunctionType.ALIAS
|
|
367
|
+
):
|
|
368
|
+
# if the function is an alias, use the unaliased concept to calculate grain
|
|
369
|
+
pconcepts.append(environment.concepts[x.lineage.arguments[0].address]) # type: ignore
|
|
370
|
+
else:
|
|
371
|
+
pconcepts.append(x)
|
|
372
|
+
|
|
373
|
+
seen = set()
|
|
374
|
+
for sub in pconcepts:
|
|
375
|
+
if sub.address in seen:
|
|
376
|
+
continue
|
|
377
|
+
if not concept_is_relevant(sub, pconcepts, environment): # type: ignore
|
|
378
|
+
|
|
379
|
+
continue
|
|
380
|
+
seen.add(sub.address)
|
|
381
|
+
|
|
382
|
+
return seen
|
|
383
|
+
|
|
384
|
+
|
|
385
|
+
def _get_relevant_parent_concepts(arg) -> tuple[list[ConceptRef], bool]:
|
|
386
|
+
from trilogy.core.models.author import get_concept_arguments
|
|
387
|
+
|
|
388
|
+
is_metric = False
|
|
389
|
+
if isinstance(arg, Function):
|
|
390
|
+
all = []
|
|
391
|
+
for y in arg.arguments:
|
|
392
|
+
refs, local_flag = get_relevant_parent_concepts(y)
|
|
393
|
+
all += refs
|
|
394
|
+
is_metric = is_metric or local_flag
|
|
395
|
+
return all, is_metric
|
|
396
|
+
elif isinstance(arg, AggregateWrapper) and not arg.by:
|
|
397
|
+
return [], True
|
|
398
|
+
elif isinstance(arg, AggregateWrapper) and arg.by:
|
|
399
|
+
return [x.reference for x in arg.by], True
|
|
400
|
+
elif isinstance(arg, FunctionCallWrapper):
|
|
401
|
+
return get_relevant_parent_concepts(arg.content)
|
|
402
|
+
elif isinstance(arg, Comparison):
|
|
403
|
+
left, lflag = get_relevant_parent_concepts(arg.left)
|
|
404
|
+
right, rflag = get_relevant_parent_concepts(arg.right)
|
|
405
|
+
return left + right, lflag or rflag
|
|
406
|
+
return get_concept_arguments(arg), False
|
|
407
|
+
|
|
408
|
+
|
|
409
|
+
def get_relevant_parent_concepts(arg) -> tuple[list[ConceptRef], bool]:
|
|
410
|
+
concepts, status = _get_relevant_parent_concepts(arg)
|
|
411
|
+
return unique(concepts, "address"), status
|
|
412
|
+
|
|
413
|
+
|
|
414
|
+
def group_function_to_concept(
|
|
415
|
+
parent: Function,
|
|
416
|
+
name: str,
|
|
417
|
+
environment: Environment,
|
|
418
|
+
namespace: str | None = None,
|
|
419
|
+
metadata: Metadata | None = None,
|
|
420
|
+
):
|
|
421
|
+
pkeys: List[Concept] = []
|
|
422
|
+
namespace = namespace or environment.namespace
|
|
423
|
+
is_metric = False
|
|
424
|
+
ref_args, is_metric = get_relevant_parent_concepts(parent)
|
|
425
|
+
concrete_args = [environment.concepts[c.address] for c in ref_args]
|
|
426
|
+
pkeys += [x for x in concrete_args if not x.derivation == Derivation.CONSTANT]
|
|
427
|
+
modifiers = get_upstream_modifiers(pkeys, environment)
|
|
428
|
+
key_grain: list[str] = []
|
|
429
|
+
for x in pkeys:
|
|
430
|
+
# for a group to, if we have a dynamic metric, ignore it
|
|
431
|
+
# it will end up with the group target grain
|
|
432
|
+
if x.purpose == Purpose.METRIC and not x.keys:
|
|
433
|
+
continue
|
|
434
|
+
# metrics will group to keys, so do no do key traversal
|
|
435
|
+
elif is_metric:
|
|
436
|
+
key_grain.append(x.address)
|
|
437
|
+
else:
|
|
438
|
+
key_grain.append(x.address)
|
|
439
|
+
keys = set(key_grain)
|
|
440
|
+
|
|
441
|
+
grain = Grain.from_concepts(keys, environment)
|
|
442
|
+
if is_metric:
|
|
443
|
+
purpose = Purpose.METRIC
|
|
444
|
+
elif not pkeys:
|
|
445
|
+
purpose = Purpose.CONSTANT
|
|
446
|
+
else:
|
|
447
|
+
purpose = parent.output_purpose
|
|
448
|
+
fmetadata = metadata or Metadata()
|
|
449
|
+
granularity = Granularity.MULTI_ROW
|
|
450
|
+
|
|
451
|
+
if grain is not None:
|
|
452
|
+
# deduplicte
|
|
453
|
+
grain = Grain.from_concepts(grain.components, environment)
|
|
454
|
+
|
|
455
|
+
r = Concept(
|
|
456
|
+
name=name,
|
|
457
|
+
datatype=parent.output_datatype,
|
|
458
|
+
purpose=purpose,
|
|
459
|
+
lineage=parent,
|
|
460
|
+
namespace=namespace,
|
|
461
|
+
keys=keys,
|
|
462
|
+
modifiers=modifiers,
|
|
463
|
+
grain=grain,
|
|
464
|
+
metadata=fmetadata,
|
|
465
|
+
derivation=Derivation.GROUP_TO,
|
|
466
|
+
granularity=granularity,
|
|
467
|
+
)
|
|
468
|
+
return r
|
|
469
|
+
|
|
470
|
+
return Concept(
|
|
471
|
+
name=name,
|
|
472
|
+
datatype=parent.output_datatype,
|
|
473
|
+
purpose=purpose,
|
|
474
|
+
lineage=parent,
|
|
475
|
+
namespace=namespace,
|
|
476
|
+
keys=keys,
|
|
477
|
+
modifiers=modifiers,
|
|
478
|
+
metadata=fmetadata,
|
|
479
|
+
derivation=Derivation.BASIC,
|
|
480
|
+
granularity=granularity,
|
|
481
|
+
)
|
|
482
|
+
|
|
483
|
+
|
|
484
|
+
def function_to_concept(
|
|
485
|
+
parent: Function,
|
|
486
|
+
name: str,
|
|
487
|
+
environment: Environment,
|
|
488
|
+
namespace: str | None = None,
|
|
489
|
+
metadata: Metadata | None = None,
|
|
490
|
+
) -> Concept:
|
|
491
|
+
|
|
492
|
+
pkeys: List[Concept] = []
|
|
493
|
+
namespace = namespace or environment.namespace
|
|
494
|
+
is_metric = False
|
|
495
|
+
ref_args, is_metric = get_relevant_parent_concepts(parent)
|
|
496
|
+
concrete_args = [environment.concepts[c.address] for c in ref_args]
|
|
497
|
+
pkeys += [
|
|
498
|
+
x
|
|
499
|
+
for x in concrete_args
|
|
500
|
+
if not x.derivation == Derivation.CONSTANT
|
|
501
|
+
and not (x.derivation == Derivation.AGGREGATE and not x.grain.components)
|
|
502
|
+
]
|
|
503
|
+
grain: Grain | None = Grain()
|
|
504
|
+
for x in pkeys:
|
|
505
|
+
grain += x.grain
|
|
506
|
+
if parent.operator in FunctionClass.ONE_TO_MANY.value:
|
|
507
|
+
# if the function will create more rows, we don't know what grain this is at
|
|
508
|
+
grain = None
|
|
509
|
+
modifiers = get_upstream_modifiers(pkeys, environment)
|
|
510
|
+
key_grain: list[str] = []
|
|
511
|
+
for x in pkeys:
|
|
512
|
+
# metrics will group to keys, so do not do key traversal
|
|
513
|
+
if is_metric:
|
|
514
|
+
key_grain.append(x.address)
|
|
515
|
+
# otherwse, for row ops, assume keys are transitive
|
|
516
|
+
elif x.keys:
|
|
517
|
+
key_grain += [*x.keys]
|
|
518
|
+
else:
|
|
519
|
+
key_grain.append(x.address)
|
|
520
|
+
keys = set(key_grain)
|
|
521
|
+
if is_metric:
|
|
522
|
+
purpose = Purpose.METRIC
|
|
523
|
+
elif not pkeys:
|
|
524
|
+
purpose = Purpose.CONSTANT
|
|
525
|
+
else:
|
|
526
|
+
purpose = parent.output_purpose
|
|
527
|
+
fmetadata = metadata or Metadata()
|
|
528
|
+
if parent.operator in FunctionClass.AGGREGATE_FUNCTIONS.value:
|
|
529
|
+
derivation = Derivation.AGGREGATE
|
|
530
|
+
if (
|
|
531
|
+
grain
|
|
532
|
+
and grain.components
|
|
533
|
+
and all(x.endswith(ALL_ROWS_CONCEPT) for x in grain.components)
|
|
534
|
+
):
|
|
535
|
+
granularity = Granularity.SINGLE_ROW
|
|
536
|
+
else:
|
|
537
|
+
granularity = Granularity.MULTI_ROW
|
|
538
|
+
elif parent.operator == FunctionType.UNION:
|
|
539
|
+
derivation = Derivation.UNION
|
|
540
|
+
granularity = Granularity.MULTI_ROW
|
|
541
|
+
elif parent.operator in FunctionClass.ONE_TO_MANY.value:
|
|
542
|
+
derivation = Derivation.UNNEST
|
|
543
|
+
granularity = Granularity.MULTI_ROW
|
|
544
|
+
elif parent.operator == FunctionType.RECURSE_EDGE:
|
|
545
|
+
derivation = Derivation.RECURSIVE
|
|
546
|
+
granularity = Granularity.MULTI_ROW
|
|
547
|
+
elif parent.operator in FunctionClass.SINGLE_ROW.value:
|
|
548
|
+
derivation = Derivation.CONSTANT
|
|
549
|
+
granularity = Granularity.SINGLE_ROW
|
|
550
|
+
elif concrete_args and all(
|
|
551
|
+
x.derivation == Derivation.CONSTANT for x in concrete_args
|
|
552
|
+
):
|
|
553
|
+
derivation = Derivation.CONSTANT
|
|
554
|
+
granularity = Granularity.SINGLE_ROW
|
|
555
|
+
else:
|
|
556
|
+
derivation = Derivation.BASIC
|
|
557
|
+
granularity = Granularity.MULTI_ROW
|
|
558
|
+
if grain is not None:
|
|
559
|
+
r = Concept(
|
|
560
|
+
name=name,
|
|
561
|
+
datatype=parent.output_datatype,
|
|
562
|
+
purpose=purpose,
|
|
563
|
+
lineage=parent,
|
|
564
|
+
namespace=namespace,
|
|
565
|
+
keys=keys,
|
|
566
|
+
modifiers=modifiers,
|
|
567
|
+
grain=grain,
|
|
568
|
+
metadata=fmetadata,
|
|
569
|
+
derivation=derivation,
|
|
570
|
+
granularity=granularity,
|
|
571
|
+
)
|
|
572
|
+
return r
|
|
573
|
+
|
|
574
|
+
return Concept(
|
|
575
|
+
name=name,
|
|
576
|
+
datatype=parent.output_datatype,
|
|
577
|
+
purpose=purpose,
|
|
578
|
+
lineage=parent,
|
|
579
|
+
namespace=namespace,
|
|
580
|
+
keys=keys,
|
|
581
|
+
modifiers=modifiers,
|
|
582
|
+
metadata=fmetadata,
|
|
583
|
+
derivation=derivation,
|
|
584
|
+
granularity=granularity,
|
|
585
|
+
)
|
|
586
|
+
|
|
587
|
+
|
|
588
|
+
def filter_item_to_concept(
|
|
589
|
+
parent: FilterItem,
|
|
590
|
+
name: str,
|
|
591
|
+
namespace: str,
|
|
592
|
+
environment: Environment,
|
|
593
|
+
metadata: Metadata | None = None,
|
|
594
|
+
) -> Concept:
|
|
595
|
+
fmetadata = metadata or Metadata()
|
|
596
|
+
fallback_keys = set()
|
|
597
|
+
if isinstance(parent.content, ConceptRef):
|
|
598
|
+
cparent = environment.concepts[parent.content.address]
|
|
599
|
+
fallback_keys = set([cparent.address])
|
|
600
|
+
elif isinstance(
|
|
601
|
+
parent.content,
|
|
602
|
+
(
|
|
603
|
+
FilterItem,
|
|
604
|
+
AggregateWrapper,
|
|
605
|
+
FunctionCallWrapper,
|
|
606
|
+
WindowItem,
|
|
607
|
+
Function,
|
|
608
|
+
ListWrapper,
|
|
609
|
+
MapWrapper,
|
|
610
|
+
int,
|
|
611
|
+
str,
|
|
612
|
+
float,
|
|
613
|
+
),
|
|
614
|
+
):
|
|
615
|
+
cparent = arbitrary_to_concept(parent.content, environment, namespace=namespace)
|
|
616
|
+
|
|
617
|
+
else:
|
|
618
|
+
raise NotImplementedError(
|
|
619
|
+
f"Filter item with non ref content {parent.content} ({type(parent.content)}) not yet supported"
|
|
620
|
+
)
|
|
621
|
+
modifiers = get_upstream_modifiers(
|
|
622
|
+
cparent.concept_arguments, environment=environment
|
|
623
|
+
)
|
|
624
|
+
grain = cparent.grain if cparent.purpose == Purpose.PROPERTY else Grain()
|
|
625
|
+
granularity = cparent.granularity
|
|
626
|
+
return Concept(
|
|
627
|
+
name=name,
|
|
628
|
+
datatype=cparent.datatype,
|
|
629
|
+
purpose=Purpose.PROPERTY,
|
|
630
|
+
lineage=parent,
|
|
631
|
+
metadata=fmetadata,
|
|
632
|
+
namespace=namespace,
|
|
633
|
+
# filtered copies cannot inherit keys
|
|
634
|
+
keys=(cparent.keys if cparent.purpose == Purpose.PROPERTY else fallback_keys),
|
|
635
|
+
grain=grain,
|
|
636
|
+
modifiers=modifiers,
|
|
637
|
+
derivation=Derivation.FILTER,
|
|
638
|
+
granularity=granularity,
|
|
639
|
+
)
|
|
640
|
+
|
|
641
|
+
|
|
642
|
+
def window_item_to_concept(
|
|
643
|
+
parent: WindowItem,
|
|
644
|
+
name: str,
|
|
645
|
+
namespace: str,
|
|
646
|
+
environment: Environment,
|
|
647
|
+
metadata: Metadata | None = None,
|
|
648
|
+
) -> Concept:
|
|
649
|
+
fmetadata = metadata or Metadata()
|
|
650
|
+
if not isinstance(parent.content, ConceptRef):
|
|
651
|
+
raise NotImplementedError(
|
|
652
|
+
f"Window function with non ref content {parent.content} not yet supported"
|
|
653
|
+
)
|
|
654
|
+
bcontent = environment.concepts[parent.content.address]
|
|
655
|
+
if isinstance(bcontent, UndefinedConcept):
|
|
656
|
+
return UndefinedConcept(address=f"{namespace}.{name}", metadata=fmetadata)
|
|
657
|
+
if bcontent.purpose == Purpose.METRIC:
|
|
658
|
+
local_purpose, keys = get_purpose_and_keys(None, (bcontent,), environment)
|
|
659
|
+
else:
|
|
660
|
+
local_purpose = Purpose.PROPERTY
|
|
661
|
+
keys = Grain.from_concepts(
|
|
662
|
+
[bcontent.address] + [y.address for y in parent.over], environment
|
|
663
|
+
).components
|
|
664
|
+
|
|
665
|
+
# when including the order by in discovery grain
|
|
666
|
+
if parent.order_by:
|
|
667
|
+
|
|
668
|
+
grain_components = parent.over + [bcontent.output]
|
|
669
|
+
for item in parent.order_by:
|
|
670
|
+
relevant, _ = get_relevant_parent_concepts(item.expr)
|
|
671
|
+
grain_components += relevant
|
|
672
|
+
else:
|
|
673
|
+
grain_components = parent.over + [bcontent.output]
|
|
674
|
+
|
|
675
|
+
final_grain = Grain.from_concepts(grain_components, environment)
|
|
676
|
+
modifiers = get_upstream_modifiers(bcontent.concept_arguments, environment)
|
|
677
|
+
datatype = parent.content.datatype
|
|
678
|
+
if parent.type in (
|
|
679
|
+
# WindowType.RANK,
|
|
680
|
+
WindowType.ROW_NUMBER,
|
|
681
|
+
WindowType.COUNT,
|
|
682
|
+
WindowType.COUNT_DISTINCT,
|
|
683
|
+
):
|
|
684
|
+
datatype = DataType.INTEGER
|
|
685
|
+
if parent.type == WindowType.RANK:
|
|
686
|
+
datatype = TraitDataType(type=DataType.INTEGER, traits=["rank"])
|
|
687
|
+
return Concept(
|
|
688
|
+
name=name,
|
|
689
|
+
datatype=datatype,
|
|
690
|
+
purpose=local_purpose,
|
|
691
|
+
lineage=parent,
|
|
692
|
+
metadata=fmetadata,
|
|
693
|
+
grain=final_grain,
|
|
694
|
+
namespace=namespace,
|
|
695
|
+
keys=keys,
|
|
696
|
+
modifiers=modifiers,
|
|
697
|
+
derivation=Derivation.WINDOW,
|
|
698
|
+
granularity=bcontent.granularity,
|
|
699
|
+
)
|
|
700
|
+
|
|
701
|
+
|
|
702
|
+
def agg_wrapper_to_concept(
|
|
703
|
+
parent: AggregateWrapper,
|
|
704
|
+
namespace: str,
|
|
705
|
+
name: str,
|
|
706
|
+
environment: Environment,
|
|
707
|
+
metadata: Metadata | None = None,
|
|
708
|
+
) -> Concept:
|
|
709
|
+
_, keys = get_purpose_and_keys(
|
|
710
|
+
Purpose.METRIC, tuple(parent.by) if parent.by else None, environment=environment
|
|
711
|
+
)
|
|
712
|
+
# anything grouped to a grain should be a property
|
|
713
|
+
# at that grain
|
|
714
|
+
fmetadata = metadata or Metadata()
|
|
715
|
+
aggfunction = parent.function
|
|
716
|
+
modifiers = get_upstream_modifiers(parent.concept_arguments, environment)
|
|
717
|
+
grain = Grain.from_concepts(parent.by, environment) if parent.by else Grain()
|
|
718
|
+
granularity = Concept.calculate_granularity(Derivation.AGGREGATE, grain, parent)
|
|
719
|
+
|
|
720
|
+
out = Concept(
|
|
721
|
+
name=name,
|
|
722
|
+
datatype=aggfunction.output_datatype,
|
|
723
|
+
purpose=Purpose.METRIC,
|
|
724
|
+
metadata=fmetadata,
|
|
725
|
+
lineage=parent,
|
|
726
|
+
grain=grain,
|
|
727
|
+
namespace=namespace,
|
|
728
|
+
keys=set([x.address for x in parent.by]) if parent.by else keys,
|
|
729
|
+
modifiers=modifiers,
|
|
730
|
+
derivation=Derivation.AGGREGATE,
|
|
731
|
+
granularity=granularity,
|
|
732
|
+
)
|
|
733
|
+
for x in parent.function.concept_arguments:
|
|
734
|
+
if x.address == out.address:
|
|
735
|
+
raise InvalidSyntaxException(
|
|
736
|
+
f"Aggregate concept {out.address} cannot reference itself. If defining a new concept in a select, use a new name."
|
|
737
|
+
)
|
|
738
|
+
return out
|
|
739
|
+
|
|
740
|
+
|
|
741
|
+
def align_item_to_concept(
|
|
742
|
+
parent: AlignItem,
|
|
743
|
+
align_clause: AlignClause,
|
|
744
|
+
selects: list[SelectStatement],
|
|
745
|
+
environment: Environment,
|
|
746
|
+
where: WhereClause | None = None,
|
|
747
|
+
having: HavingClause | None = None,
|
|
748
|
+
limit: int | None = None,
|
|
749
|
+
) -> Concept:
|
|
750
|
+
align = parent
|
|
751
|
+
datatypes = set([c.datatype for c in align.concepts])
|
|
752
|
+
if len(datatypes) > 1:
|
|
753
|
+
raise InvalidSyntaxException(
|
|
754
|
+
f"Datatypes do not align for merged statements {align.alias}, have {datatypes}"
|
|
755
|
+
)
|
|
756
|
+
|
|
757
|
+
new_selects = [x.as_lineage(environment) for x in selects]
|
|
758
|
+
multi_lineage = MultiSelectLineage(
|
|
759
|
+
selects=new_selects,
|
|
760
|
+
align=align_clause,
|
|
761
|
+
namespace=align.namespace,
|
|
762
|
+
where_clause=where,
|
|
763
|
+
having_clause=having,
|
|
764
|
+
limit=limit,
|
|
765
|
+
hidden_components=set(y for x in new_selects for y in x.hidden_components),
|
|
766
|
+
)
|
|
767
|
+
grain = Grain()
|
|
768
|
+
new = Concept(
|
|
769
|
+
name=align.alias,
|
|
770
|
+
datatype=datatypes.pop(),
|
|
771
|
+
purpose=Purpose.PROPERTY,
|
|
772
|
+
lineage=multi_lineage,
|
|
773
|
+
grain=grain,
|
|
774
|
+
namespace=align.namespace,
|
|
775
|
+
granularity=Granularity.MULTI_ROW,
|
|
776
|
+
derivation=Derivation.MULTISELECT,
|
|
777
|
+
keys=set(x.address for x in align.concepts),
|
|
778
|
+
)
|
|
779
|
+
return new
|
|
780
|
+
|
|
781
|
+
|
|
782
|
+
def derive_item_to_concept(
|
|
783
|
+
parent: ARBITRARY_INPUTS,
|
|
784
|
+
name: str,
|
|
785
|
+
lineage: MultiSelectLineage,
|
|
786
|
+
namespace: str | None = None,
|
|
787
|
+
) -> Concept:
|
|
788
|
+
datatype = arg_to_datatype(parent)
|
|
789
|
+
grain = Grain()
|
|
790
|
+
new = Concept(
|
|
791
|
+
name=name,
|
|
792
|
+
datatype=datatype,
|
|
793
|
+
purpose=Purpose.PROPERTY,
|
|
794
|
+
lineage=lineage,
|
|
795
|
+
grain=grain,
|
|
796
|
+
namespace=namespace or DEFAULT_NAMESPACE,
|
|
797
|
+
granularity=Granularity.MULTI_ROW,
|
|
798
|
+
derivation=Derivation.MULTISELECT,
|
|
799
|
+
)
|
|
800
|
+
return new
|
|
801
|
+
|
|
802
|
+
|
|
803
|
+
def rowset_concept(
|
|
804
|
+
orig_address: ConceptRef,
|
|
805
|
+
environment: Environment,
|
|
806
|
+
rowset: RowsetDerivationStatement,
|
|
807
|
+
pre_output: list[Concept],
|
|
808
|
+
orig: dict[str, Concept],
|
|
809
|
+
orig_map: dict[str, Concept],
|
|
810
|
+
):
|
|
811
|
+
orig_concept = environment.concepts[orig_address.address]
|
|
812
|
+
name = orig_concept.name
|
|
813
|
+
if isinstance(orig_concept.lineage, FilterItem):
|
|
814
|
+
if orig_concept.lineage.where == rowset.select.where_clause and isinstance(
|
|
815
|
+
orig_concept.lineage.content, (ConceptRef, Concept)
|
|
816
|
+
):
|
|
817
|
+
name = environment.concepts[orig_concept.lineage.content.address].name
|
|
818
|
+
base_namespace = (
|
|
819
|
+
f"{rowset.name}.{orig_concept.namespace}"
|
|
820
|
+
if orig_concept.namespace != rowset.namespace
|
|
821
|
+
else rowset.name
|
|
822
|
+
)
|
|
823
|
+
|
|
824
|
+
new_concept = Concept(
|
|
825
|
+
name=name,
|
|
826
|
+
datatype=orig_concept.datatype,
|
|
827
|
+
purpose=orig_concept.purpose,
|
|
828
|
+
lineage=None,
|
|
829
|
+
grain=orig_concept.grain,
|
|
830
|
+
metadata=Metadata(concept_source=ConceptSource.CTE),
|
|
831
|
+
namespace=base_namespace,
|
|
832
|
+
keys=orig_concept.keys,
|
|
833
|
+
derivation=Derivation.ROWSET,
|
|
834
|
+
granularity=orig_concept.granularity,
|
|
835
|
+
pseudonyms={
|
|
836
|
+
address_with_namespace(x, rowset.name) for x in orig_concept.pseudonyms
|
|
837
|
+
},
|
|
838
|
+
)
|
|
839
|
+
for x in orig_concept.pseudonyms:
|
|
840
|
+
new_address = address_with_namespace(x, rowset.name)
|
|
841
|
+
origa = environment.alias_origin_lookup[x]
|
|
842
|
+
environment.concepts[new_address] = new_concept
|
|
843
|
+
environment.alias_origin_lookup[new_address] = origa.model_copy(
|
|
844
|
+
update={"namespace": f"{rowset.name}.{origa.namespace}"}
|
|
845
|
+
)
|
|
846
|
+
orig[orig_concept.address] = new_concept
|
|
847
|
+
orig_map[new_concept.address] = orig_concept
|
|
848
|
+
pre_output.append(new_concept)
|
|
849
|
+
|
|
850
|
+
|
|
851
|
+
def rowset_to_concepts(rowset: RowsetDerivationStatement, environment: Environment):
|
|
852
|
+
pre_output: list[Concept] = []
|
|
853
|
+
orig: dict[str, Concept] = {}
|
|
854
|
+
orig_map: dict[str, Concept] = {}
|
|
855
|
+
for orig_address in rowset.select.output_components:
|
|
856
|
+
rowset_concept(orig_address, environment, rowset, pre_output, orig, orig_map)
|
|
857
|
+
select_lineage = rowset.select.as_lineage(environment)
|
|
858
|
+
for x in pre_output:
|
|
859
|
+
x.lineage = RowsetItem(
|
|
860
|
+
content=orig_map[x.address].reference,
|
|
861
|
+
rowset=RowsetLineage(
|
|
862
|
+
name=rowset.name,
|
|
863
|
+
derived_concepts=[x.reference for x in pre_output],
|
|
864
|
+
select=select_lineage,
|
|
865
|
+
),
|
|
866
|
+
)
|
|
867
|
+
default_grain = Grain.from_concepts([*pre_output])
|
|
868
|
+
# remap everything to the properties of the rowset
|
|
869
|
+
for x in pre_output:
|
|
870
|
+
if x.keys:
|
|
871
|
+
if all([k in orig for k in x.keys]):
|
|
872
|
+
x.keys = set([orig[k].address if k in orig else k for k in x.keys])
|
|
873
|
+
else:
|
|
874
|
+
# TODO: fix this up
|
|
875
|
+
x.keys = set()
|
|
876
|
+
if all([c in orig for c in x.grain.components]):
|
|
877
|
+
x.grain = Grain(components={orig[c].address for c in x.grain.components})
|
|
878
|
+
else:
|
|
879
|
+
x.grain = default_grain
|
|
880
|
+
return pre_output
|
|
881
|
+
|
|
882
|
+
|
|
883
|
+
def generate_concept_name(
|
|
884
|
+
parent: ARBITRARY_INPUTS,
|
|
885
|
+
) -> str:
|
|
886
|
+
"""Generate a name for a concept based on its parent type and content."""
|
|
887
|
+
if isinstance(parent, AggregateWrapper):
|
|
888
|
+
return f"{VIRTUAL_CONCEPT_PREFIX}_agg_{parent.function.operator.value}_{string_to_hash(str(parent))}"
|
|
889
|
+
elif isinstance(parent, WindowItem):
|
|
890
|
+
return f"{VIRTUAL_CONCEPT_PREFIX}_window_{parent.type.value}_{string_to_hash(str(parent))}"
|
|
891
|
+
elif isinstance(parent, FilterItem):
|
|
892
|
+
if isinstance(parent.content, ConceptRef):
|
|
893
|
+
return f"{VIRTUAL_CONCEPT_PREFIX}_filter_{parent.content.name}_{string_to_hash(str(parent))}"
|
|
894
|
+
else:
|
|
895
|
+
return f"{VIRTUAL_CONCEPT_PREFIX}_filter_{string_to_hash(str(parent))}"
|
|
896
|
+
elif isinstance(parent, Function):
|
|
897
|
+
if parent.operator == FunctionType.GROUP:
|
|
898
|
+
return f"{VIRTUAL_CONCEPT_PREFIX}_group_to_{string_to_hash(str(parent))}"
|
|
899
|
+
else:
|
|
900
|
+
return f"{VIRTUAL_CONCEPT_PREFIX}_func_{parent.operator.value}_{string_to_hash(str(parent))}"
|
|
901
|
+
elif isinstance(parent, Parenthetical):
|
|
902
|
+
return f"{VIRTUAL_CONCEPT_PREFIX}_paren_{string_to_hash(str(parent))}"
|
|
903
|
+
elif isinstance(parent, FunctionCallWrapper):
|
|
904
|
+
return f"{VIRTUAL_CONCEPT_PREFIX}_{parent.name}_{string_to_hash(str(parent))}"
|
|
905
|
+
elif isinstance(parent, Comparison):
|
|
906
|
+
return f"{VIRTUAL_CONCEPT_PREFIX}_comp_{string_to_hash(str(parent))}"
|
|
907
|
+
else: # ListWrapper, MapWrapper, or primitive types
|
|
908
|
+
return f"{VIRTUAL_CONCEPT_PREFIX}_{string_to_hash(str(parent))}"
|
|
909
|
+
|
|
910
|
+
|
|
911
|
+
def parenthetical_to_concept(
|
|
912
|
+
parent: Parenthetical,
|
|
913
|
+
name: str,
|
|
914
|
+
namespace: str,
|
|
915
|
+
environment: Environment,
|
|
916
|
+
metadata: Metadata | None = None,
|
|
917
|
+
) -> Concept:
|
|
918
|
+
if isinstance(
|
|
919
|
+
parent.content,
|
|
920
|
+
ARBITRARY_INPUTS,
|
|
921
|
+
):
|
|
922
|
+
|
|
923
|
+
return arbitrary_to_concept(
|
|
924
|
+
parent.content, environment, namespace, name, metadata
|
|
925
|
+
)
|
|
926
|
+
raise NotImplementedError(
|
|
927
|
+
f"Parenthetical with non-supported content {parent.content} ({type(parent.content)}) not yet supported"
|
|
928
|
+
)
|
|
929
|
+
|
|
930
|
+
|
|
931
|
+
def comparison_to_concept(
|
|
932
|
+
parent: Comparison,
|
|
933
|
+
name: str,
|
|
934
|
+
namespace: str,
|
|
935
|
+
environment: Environment,
|
|
936
|
+
metadata: Metadata | None = None,
|
|
937
|
+
):
|
|
938
|
+
fmetadata = metadata or Metadata()
|
|
939
|
+
|
|
940
|
+
pkeys: List[Concept] = []
|
|
941
|
+
namespace = namespace or environment.namespace
|
|
942
|
+
is_metric = False
|
|
943
|
+
ref_args, is_metric = get_relevant_parent_concepts(parent)
|
|
944
|
+
concrete_args = [environment.concepts[c.address] for c in ref_args]
|
|
945
|
+
pkeys += [
|
|
946
|
+
x
|
|
947
|
+
for x in concrete_args
|
|
948
|
+
if not x.derivation == Derivation.CONSTANT
|
|
949
|
+
and not (x.derivation == Derivation.AGGREGATE and not x.grain.components)
|
|
950
|
+
]
|
|
951
|
+
grain: Grain | None = Grain()
|
|
952
|
+
for x in pkeys:
|
|
953
|
+
grain += x.grain
|
|
954
|
+
if parent.operator in FunctionClass.ONE_TO_MANY.value:
|
|
955
|
+
# if the function will create more rows, we don't know what grain this is at
|
|
956
|
+
grain = None
|
|
957
|
+
modifiers = get_upstream_modifiers(pkeys, environment)
|
|
958
|
+
key_grain: list[str] = []
|
|
959
|
+
for x in pkeys:
|
|
960
|
+
# metrics will group to keys, so do not do key traversal
|
|
961
|
+
if is_metric:
|
|
962
|
+
key_grain.append(x.address)
|
|
963
|
+
# otherwse, for row ops, assume keys are transitive
|
|
964
|
+
elif x.keys:
|
|
965
|
+
key_grain += [*x.keys]
|
|
966
|
+
else:
|
|
967
|
+
key_grain.append(x.address)
|
|
968
|
+
keys = set(key_grain)
|
|
969
|
+
if is_metric:
|
|
970
|
+
purpose = Purpose.METRIC
|
|
971
|
+
elif not pkeys:
|
|
972
|
+
purpose = Purpose.CONSTANT
|
|
973
|
+
else:
|
|
974
|
+
purpose = Purpose.PROPERTY
|
|
975
|
+
fmetadata = metadata or Metadata()
|
|
976
|
+
|
|
977
|
+
if grain is not None:
|
|
978
|
+
r = Concept(
|
|
979
|
+
name=name,
|
|
980
|
+
datatype=parent.output_datatype,
|
|
981
|
+
purpose=purpose,
|
|
982
|
+
lineage=parent,
|
|
983
|
+
namespace=namespace,
|
|
984
|
+
keys=keys,
|
|
985
|
+
modifiers=modifiers,
|
|
986
|
+
grain=grain,
|
|
987
|
+
metadata=fmetadata,
|
|
988
|
+
derivation=Derivation.BASIC,
|
|
989
|
+
granularity=Granularity.MULTI_ROW,
|
|
990
|
+
)
|
|
991
|
+
return r
|
|
992
|
+
|
|
993
|
+
return Concept(
|
|
994
|
+
name=name,
|
|
995
|
+
datatype=parent.output_datatype,
|
|
996
|
+
purpose=purpose,
|
|
997
|
+
lineage=parent,
|
|
998
|
+
namespace=namespace,
|
|
999
|
+
keys=keys,
|
|
1000
|
+
modifiers=modifiers,
|
|
1001
|
+
metadata=fmetadata,
|
|
1002
|
+
derivation=Derivation.BASIC,
|
|
1003
|
+
granularity=Granularity.MULTI_ROW,
|
|
1004
|
+
)
|
|
1005
|
+
|
|
1006
|
+
|
|
1007
|
+
def arbitrary_to_concept(
|
|
1008
|
+
parent: ARBITRARY_INPUTS,
|
|
1009
|
+
environment: Environment,
|
|
1010
|
+
namespace: str | None = None,
|
|
1011
|
+
name: str | None = None,
|
|
1012
|
+
metadata: Metadata | None = None,
|
|
1013
|
+
) -> Concept:
|
|
1014
|
+
namespace = namespace or environment.namespace
|
|
1015
|
+
|
|
1016
|
+
# this is purely for the parse tree, discard from derivation
|
|
1017
|
+
if isinstance(parent, FunctionCallWrapper):
|
|
1018
|
+
return arbitrary_to_concept(
|
|
1019
|
+
parent.content, environment, namespace, name, metadata # type: ignore
|
|
1020
|
+
)
|
|
1021
|
+
|
|
1022
|
+
# Generate name if not provided
|
|
1023
|
+
if not name:
|
|
1024
|
+
name = generate_concept_name(parent)
|
|
1025
|
+
|
|
1026
|
+
if isinstance(parent, AggregateWrapper):
|
|
1027
|
+
return agg_wrapper_to_concept(
|
|
1028
|
+
parent, namespace, name, metadata=metadata, environment=environment
|
|
1029
|
+
)
|
|
1030
|
+
elif isinstance(parent, WindowItem):
|
|
1031
|
+
return window_item_to_concept(
|
|
1032
|
+
parent,
|
|
1033
|
+
name,
|
|
1034
|
+
namespace,
|
|
1035
|
+
environment=environment,
|
|
1036
|
+
metadata=metadata,
|
|
1037
|
+
)
|
|
1038
|
+
elif isinstance(parent, FilterItem):
|
|
1039
|
+
return filter_item_to_concept(
|
|
1040
|
+
parent,
|
|
1041
|
+
name,
|
|
1042
|
+
namespace,
|
|
1043
|
+
environment=environment,
|
|
1044
|
+
metadata=metadata,
|
|
1045
|
+
)
|
|
1046
|
+
elif isinstance(parent, Function):
|
|
1047
|
+
if parent.operator == FunctionType.GROUP:
|
|
1048
|
+
return group_function_to_concept(
|
|
1049
|
+
parent,
|
|
1050
|
+
name,
|
|
1051
|
+
environment=environment,
|
|
1052
|
+
namespace=namespace,
|
|
1053
|
+
metadata=metadata,
|
|
1054
|
+
)
|
|
1055
|
+
return function_to_concept(
|
|
1056
|
+
parent,
|
|
1057
|
+
name,
|
|
1058
|
+
metadata=metadata,
|
|
1059
|
+
environment=environment,
|
|
1060
|
+
namespace=namespace,
|
|
1061
|
+
)
|
|
1062
|
+
elif isinstance(parent, ListWrapper):
|
|
1063
|
+
return constant_to_concept(parent, name, namespace, metadata)
|
|
1064
|
+
elif isinstance(parent, Parenthetical):
|
|
1065
|
+
return parenthetical_to_concept(parent, name, namespace, environment, metadata)
|
|
1066
|
+
elif isinstance(parent, Comparison):
|
|
1067
|
+
return comparison_to_concept(parent, name, namespace, environment, metadata)
|
|
1068
|
+
else:
|
|
1069
|
+
return constant_to_concept(parent, name, namespace, metadata)
|