pytrilogy 0.0.2.11__tar.gz → 0.0.2.13__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pytrilogy might be problematic. Click here for more details.
- {pytrilogy-0.0.2.11/pytrilogy.egg-info → pytrilogy-0.0.2.13}/PKG-INFO +1 -1
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13/pytrilogy.egg-info}/PKG-INFO +1 -1
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/__init__.py +1 -1
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/constants.py +5 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/core/enums.py +3 -1
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/core/environment_helpers.py +44 -6
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/core/models.py +51 -27
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/core/optimization.py +31 -3
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/core/optimizations/__init__.py +2 -1
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/core/optimizations/predicate_pushdown.py +60 -42
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/core/processing/concept_strategies_v3.py +6 -4
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/core/processing/node_generators/basic_node.py +22 -9
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/core/processing/node_generators/common.py +13 -23
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/core/processing/node_generators/node_merge_node.py +22 -1
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/core/processing/node_generators/unnest_node.py +10 -3
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/core/processing/nodes/base_node.py +18 -11
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/core/processing/nodes/group_node.py +0 -1
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/core/processing/nodes/merge_node.py +12 -5
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/core/processing/nodes/unnest_node.py +13 -9
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/core/processing/utility.py +3 -1
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/core/query_processor.py +14 -12
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/dialect/base.py +95 -52
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/dialect/common.py +3 -3
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/executor.py +8 -2
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/parsing/common.py +73 -2
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/parsing/parse_engine.py +88 -132
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/parsing/trilogy.lark +3 -3
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/LICENSE.md +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/README.md +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/pyproject.toml +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/pytrilogy.egg-info/SOURCES.txt +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/pytrilogy.egg-info/dependency_links.txt +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/pytrilogy.egg-info/entry_points.txt +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/pytrilogy.egg-info/requires.txt +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/pytrilogy.egg-info/top_level.txt +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/setup.cfg +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/setup.py +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/tests/test_datatypes.py +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/tests/test_declarations.py +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/tests/test_derived_concepts.py +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/tests/test_discovery_nodes.py +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/tests/test_environment.py +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/tests/test_functions.py +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/tests/test_imports.py +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/tests/test_metadata.py +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/tests/test_models.py +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/tests/test_multi_join_assignments.py +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/tests/test_parsing.py +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/tests/test_partial_handling.py +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/tests/test_query_processing.py +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/tests/test_select.py +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/tests/test_statements.py +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/tests/test_undefined_concept.py +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/tests/test_where_clause.py +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/compiler.py +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/core/__init__.py +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/core/constants.py +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/core/env_processor.py +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/core/ergonomics.py +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/core/exceptions.py +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/core/functions.py +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/core/graph_models.py +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/core/internal.py +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/core/optimizations/base_optimization.py +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/core/optimizations/inline_constant.py +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/core/optimizations/inline_datasource.py +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/core/processing/__init__.py +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/core/processing/graph_utils.py +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/core/processing/node_generators/__init__.py +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/core/processing/node_generators/filter_node.py +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/core/processing/node_generators/group_node.py +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/core/processing/node_generators/group_to_node.py +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/core/processing/node_generators/multiselect_node.py +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/core/processing/node_generators/rowset_node.py +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/core/processing/node_generators/select_node.py +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/core/processing/node_generators/window_node.py +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/core/processing/nodes/__init__.py +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/core/processing/nodes/filter_node.py +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/core/processing/nodes/select_node_v2.py +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/core/processing/nodes/window_node.py +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/dialect/__init__.py +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/dialect/bigquery.py +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/dialect/config.py +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/dialect/duckdb.py +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/dialect/enums.py +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/dialect/postgres.py +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/dialect/presto.py +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/dialect/snowflake.py +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/dialect/sql_server.py +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/engine.py +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/hooks/__init__.py +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/hooks/base_hook.py +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/hooks/graph_hook.py +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/hooks/query_debugger.py +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/metadata/__init__.py +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/parser.py +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/parsing/__init__.py +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/parsing/config.py +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/parsing/exceptions.py +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/parsing/helpers.py +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/parsing/render.py +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/py.typed +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/scripts/__init__.py +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/scripts/trilogy.py +0 -0
- {pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/utility.py +0 -0
|
@@ -62,6 +62,8 @@ class Modifier(Enum):
|
|
|
62
62
|
strval = str(value)
|
|
63
63
|
if strval == "~":
|
|
64
64
|
return Modifier.PARTIAL
|
|
65
|
+
elif strval == "?":
|
|
66
|
+
return Modifier.NULLABLE
|
|
65
67
|
return super()._missing_(value=strval.capitalize())
|
|
66
68
|
|
|
67
69
|
|
|
@@ -229,7 +231,6 @@ class ComparisonOperator(Enum):
|
|
|
229
231
|
ILIKE = "ilike"
|
|
230
232
|
CONTAINS = "contains"
|
|
231
233
|
ELSE = "else"
|
|
232
|
-
BETWEEN = "between"
|
|
233
234
|
|
|
234
235
|
@classmethod
|
|
235
236
|
def _missing_(cls, value):
|
|
@@ -274,6 +275,7 @@ class SourceType(Enum):
|
|
|
274
275
|
CONSTANT = "constant"
|
|
275
276
|
ROWSET = "rowset"
|
|
276
277
|
MERGE = "merge"
|
|
278
|
+
BASIC = "basic"
|
|
277
279
|
|
|
278
280
|
|
|
279
281
|
class ShowCategory(Enum):
|
|
@@ -1,6 +1,15 @@
|
|
|
1
|
-
from trilogy.core.models import
|
|
1
|
+
from trilogy.core.models import (
|
|
2
|
+
DataType,
|
|
3
|
+
Concept,
|
|
4
|
+
Environment,
|
|
5
|
+
Function,
|
|
6
|
+
Metadata,
|
|
7
|
+
StructType,
|
|
8
|
+
)
|
|
9
|
+
from trilogy.core.functions import AttrAccess
|
|
2
10
|
from trilogy.core.enums import Purpose, FunctionType, ConceptSource
|
|
3
11
|
from trilogy.constants import DEFAULT_NAMESPACE
|
|
12
|
+
from trilogy.parsing.common import process_function_args, arg_to_datatype, Meta
|
|
4
13
|
|
|
5
14
|
|
|
6
15
|
def generate_date_concepts(concept: Concept, environment: Environment):
|
|
@@ -142,15 +151,44 @@ def generate_key_concepts(concept: Concept, environment: Environment):
|
|
|
142
151
|
environment.add_concept(new_concept, add_derived=False)
|
|
143
152
|
|
|
144
153
|
|
|
145
|
-
def generate_related_concepts(
|
|
154
|
+
def generate_related_concepts(
|
|
155
|
+
concept: Concept,
|
|
156
|
+
environment: Environment,
|
|
157
|
+
meta: Meta | None = None,
|
|
158
|
+
add_derived: bool = False,
|
|
159
|
+
):
|
|
146
160
|
"""Auto populate common derived concepts on types"""
|
|
147
|
-
if concept.purpose == Purpose.KEY:
|
|
161
|
+
if concept.purpose == Purpose.KEY and add_derived:
|
|
148
162
|
generate_key_concepts(concept, environment)
|
|
149
|
-
|
|
163
|
+
|
|
164
|
+
# datatype types
|
|
165
|
+
if concept.datatype == DataType.DATE and add_derived:
|
|
150
166
|
generate_date_concepts(concept, environment)
|
|
151
|
-
elif concept.datatype == DataType.DATETIME:
|
|
167
|
+
elif concept.datatype == DataType.DATETIME and add_derived:
|
|
152
168
|
generate_date_concepts(concept, environment)
|
|
153
169
|
generate_datetime_concepts(concept, environment)
|
|
154
|
-
elif concept.datatype == DataType.TIMESTAMP:
|
|
170
|
+
elif concept.datatype == DataType.TIMESTAMP and add_derived:
|
|
155
171
|
generate_date_concepts(concept, environment)
|
|
156
172
|
generate_datetime_concepts(concept, environment)
|
|
173
|
+
|
|
174
|
+
if isinstance(concept.datatype, StructType):
|
|
175
|
+
for key, value in concept.datatype.fields_map.items():
|
|
176
|
+
args = process_function_args(
|
|
177
|
+
[concept, key], meta=meta, environment=environment
|
|
178
|
+
)
|
|
179
|
+
auto = Concept(
|
|
180
|
+
name=key,
|
|
181
|
+
datatype=arg_to_datatype(value),
|
|
182
|
+
purpose=Purpose.PROPERTY,
|
|
183
|
+
namespace=(
|
|
184
|
+
environment.namespace + "." + concept.name
|
|
185
|
+
if environment.namespace
|
|
186
|
+
and environment.namespace != DEFAULT_NAMESPACE
|
|
187
|
+
else concept.name
|
|
188
|
+
),
|
|
189
|
+
lineage=AttrAccess(args),
|
|
190
|
+
)
|
|
191
|
+
environment.add_concept(auto, meta=meta)
|
|
192
|
+
if isinstance(value, Concept):
|
|
193
|
+
environment.merge_concept(auto, value, modifiers=[])
|
|
194
|
+
assert value.pseudonyms is not None
|
|
@@ -300,7 +300,7 @@ class MapType(BaseModel):
|
|
|
300
300
|
|
|
301
301
|
class StructType(BaseModel):
|
|
302
302
|
fields: List[ALL_TYPES]
|
|
303
|
-
fields_map: Dict[str, Concept | int | float | str]
|
|
303
|
+
fields_map: Dict[str, Concept | int | float | str]
|
|
304
304
|
|
|
305
305
|
@property
|
|
306
306
|
def data_type(self):
|
|
@@ -2119,16 +2119,19 @@ class Datasource(Namespaced, BaseModel):
|
|
|
2119
2119
|
|
|
2120
2120
|
|
|
2121
2121
|
class UnnestJoin(BaseModel):
|
|
2122
|
-
|
|
2122
|
+
concepts: list[Concept]
|
|
2123
|
+
parent: Function
|
|
2123
2124
|
alias: str = "unnest"
|
|
2124
2125
|
rendering_required: bool = True
|
|
2125
2126
|
|
|
2126
2127
|
def __hash__(self):
|
|
2127
|
-
return (
|
|
2128
|
+
return (
|
|
2129
|
+
self.alias + "".join([str(s.address) for s in self.concepts])
|
|
2130
|
+
).__hash__()
|
|
2128
2131
|
|
|
2129
2132
|
|
|
2130
2133
|
class InstantiatedUnnestJoin(BaseModel):
|
|
2131
|
-
|
|
2134
|
+
concept_to_unnest: Concept
|
|
2132
2135
|
alias: str = "unnest"
|
|
2133
2136
|
|
|
2134
2137
|
|
|
@@ -2261,6 +2264,7 @@ class QueryDatasource(BaseModel):
|
|
|
2261
2264
|
@field_validator("joins")
|
|
2262
2265
|
@classmethod
|
|
2263
2266
|
def validate_joins(cls, v):
|
|
2267
|
+
unique_pairs = set()
|
|
2264
2268
|
for join in v:
|
|
2265
2269
|
if not isinstance(join, BaseJoin):
|
|
2266
2270
|
continue
|
|
@@ -2268,6 +2272,16 @@ class QueryDatasource(BaseModel):
|
|
|
2268
2272
|
raise SyntaxError(
|
|
2269
2273
|
f"Cannot join a datasource to itself, joining {join.left_datasource}"
|
|
2270
2274
|
)
|
|
2275
|
+
pairing = "".join(
|
|
2276
|
+
sorted(
|
|
2277
|
+
[join.left_datasource.identifier, join.right_datasource.identifier]
|
|
2278
|
+
)
|
|
2279
|
+
)
|
|
2280
|
+
if pairing in unique_pairs:
|
|
2281
|
+
raise SyntaxError(
|
|
2282
|
+
f"Duplicate join {join.left_datasource.identifier} and {join.right_datasource.identifier}"
|
|
2283
|
+
)
|
|
2284
|
+
unique_pairs.add(pairing)
|
|
2271
2285
|
return v
|
|
2272
2286
|
|
|
2273
2287
|
@field_validator("input_concepts")
|
|
@@ -2287,8 +2301,13 @@ class QueryDatasource(BaseModel):
|
|
|
2287
2301
|
for key in ("input_concepts", "output_concepts"):
|
|
2288
2302
|
if not values.get(key):
|
|
2289
2303
|
continue
|
|
2304
|
+
concept: Concept
|
|
2290
2305
|
for concept in values[key]:
|
|
2291
|
-
if
|
|
2306
|
+
if (
|
|
2307
|
+
concept.address not in v
|
|
2308
|
+
and not any(x in v for x in concept.pseudonyms)
|
|
2309
|
+
and CONFIG.validate_missing
|
|
2310
|
+
):
|
|
2292
2311
|
raise SyntaxError(
|
|
2293
2312
|
f"Missing source map for {concept.address} on {key}, have {v}"
|
|
2294
2313
|
)
|
|
@@ -2377,6 +2396,11 @@ class QueryDatasource(BaseModel):
|
|
|
2377
2396
|
final_source_map[key] = other.source_map[key]
|
|
2378
2397
|
for k, v in final_source_map.items():
|
|
2379
2398
|
final_source_map[k] = set(merged_datasources[x.full_name] for x in list(v))
|
|
2399
|
+
self_hidden = self.hidden_concepts or []
|
|
2400
|
+
other_hidden = other.hidden_concepts or []
|
|
2401
|
+
hidden = [
|
|
2402
|
+
x for x in self_hidden if x.address in [y.address for y in other_hidden]
|
|
2403
|
+
]
|
|
2380
2404
|
qds = QueryDatasource(
|
|
2381
2405
|
input_concepts=unique(
|
|
2382
2406
|
self.input_concepts + other.input_concepts, "address"
|
|
@@ -2400,9 +2424,7 @@ class QueryDatasource(BaseModel):
|
|
|
2400
2424
|
),
|
|
2401
2425
|
join_derived_concepts=self.join_derived_concepts,
|
|
2402
2426
|
force_group=self.force_group,
|
|
2403
|
-
hidden_concepts=
|
|
2404
|
-
self.hidden_concepts + other.hidden_concepts, "address"
|
|
2405
|
-
),
|
|
2427
|
+
hidden_concepts=hidden,
|
|
2406
2428
|
)
|
|
2407
2429
|
|
|
2408
2430
|
return qds
|
|
@@ -2533,7 +2555,7 @@ class CTE(BaseModel):
|
|
|
2533
2555
|
)
|
|
2534
2556
|
]
|
|
2535
2557
|
for join in self.joins:
|
|
2536
|
-
if isinstance(join, UnnestJoin) and
|
|
2558
|
+
if isinstance(join, UnnestJoin) and concept in join.concepts:
|
|
2537
2559
|
join.rendering_required = False
|
|
2538
2560
|
|
|
2539
2561
|
self.parent_ctes = [
|
|
@@ -2548,6 +2570,7 @@ class CTE(BaseModel):
|
|
|
2548
2570
|
@property
|
|
2549
2571
|
def comment(self) -> str:
|
|
2550
2572
|
base = f"Target: {str(self.grain)}."
|
|
2573
|
+
base += f" Source: {self.source.source_type}."
|
|
2551
2574
|
if self.parent_ctes:
|
|
2552
2575
|
base += f" References: {', '.join([x.name for x in self.parent_ctes])}."
|
|
2553
2576
|
if self.joins:
|
|
@@ -2556,6 +2579,11 @@ class CTE(BaseModel):
|
|
|
2556
2579
|
base += (
|
|
2557
2580
|
f"\n-- Partials: {', '.join([str(x) for x in self.partial_concepts])}."
|
|
2558
2581
|
)
|
|
2582
|
+
base += f"\n-- Source Map: {self.source_map}."
|
|
2583
|
+
base += f"\n-- Output: {', '.join([str(x) for x in self.output_columns])}."
|
|
2584
|
+
if self.hidden_concepts:
|
|
2585
|
+
base += f"\n-- Hidden: {', '.join([str(x) for x in self.hidden_concepts])}."
|
|
2586
|
+
|
|
2559
2587
|
return base
|
|
2560
2588
|
|
|
2561
2589
|
def inline_parent_datasource(self, parent: CTE, force_group: bool = False) -> bool:
|
|
@@ -2623,6 +2651,10 @@ class CTE(BaseModel):
|
|
|
2623
2651
|
f" {self.name} {other.name} conditions {self.condition} {other.condition}"
|
|
2624
2652
|
)
|
|
2625
2653
|
raise ValueError(error)
|
|
2654
|
+
mutually_hidden = []
|
|
2655
|
+
for concept in self.hidden_concepts:
|
|
2656
|
+
if concept in other.hidden_concepts:
|
|
2657
|
+
mutually_hidden.append(concept)
|
|
2626
2658
|
self.partial_concepts = unique(
|
|
2627
2659
|
self.partial_concepts + other.partial_concepts, "address"
|
|
2628
2660
|
)
|
|
@@ -2645,9 +2677,7 @@ class CTE(BaseModel):
|
|
|
2645
2677
|
self.source.output_concepts = unique(
|
|
2646
2678
|
self.source.output_concepts + other.source.output_concepts, "address"
|
|
2647
2679
|
)
|
|
2648
|
-
self.hidden_concepts =
|
|
2649
|
-
self.hidden_concepts + other.hidden_concepts, "address"
|
|
2650
|
-
)
|
|
2680
|
+
self.hidden_concepts = mutually_hidden
|
|
2651
2681
|
self.existence_source_map = {
|
|
2652
2682
|
**self.existence_source_map,
|
|
2653
2683
|
**other.existence_source_map,
|
|
@@ -2996,13 +3026,16 @@ class EnvironmentDatasourceDict(dict):
|
|
|
2996
3026
|
except KeyError:
|
|
2997
3027
|
if DEFAULT_NAMESPACE + "." + key in self:
|
|
2998
3028
|
return self.__getitem__(DEFAULT_NAMESPACE + "." + key)
|
|
2999
|
-
if "." in key and key.split(".")[0] == DEFAULT_NAMESPACE:
|
|
3000
|
-
return self.__getitem__(key.split(".")[1])
|
|
3029
|
+
if "." in key and key.split(".", 1)[0] == DEFAULT_NAMESPACE:
|
|
3030
|
+
return self.__getitem__(key.split(".", 1)[1])
|
|
3001
3031
|
raise
|
|
3002
3032
|
|
|
3003
3033
|
def values(self) -> ValuesView[Datasource]: # type: ignore
|
|
3004
3034
|
return super().values()
|
|
3005
3035
|
|
|
3036
|
+
def items(self) -> ItemsView[str, Datasource]: # type: ignore
|
|
3037
|
+
return super().items()
|
|
3038
|
+
|
|
3006
3039
|
|
|
3007
3040
|
class EnvironmentConceptDict(dict):
|
|
3008
3041
|
def __init__(self, *args, **kwargs) -> None:
|
|
@@ -3027,8 +3060,8 @@ class EnvironmentConceptDict(dict):
|
|
|
3027
3060
|
return super(EnvironmentConceptDict, self).__getitem__(key)
|
|
3028
3061
|
|
|
3029
3062
|
except KeyError:
|
|
3030
|
-
if "." in key and key.split(".")[0] == DEFAULT_NAMESPACE:
|
|
3031
|
-
return self.__getitem__(key.split(".")[1], line_no)
|
|
3063
|
+
if "." in key and key.split(".", 1)[0] == DEFAULT_NAMESPACE:
|
|
3064
|
+
return self.__getitem__(key.split(".", 1)[1], line_no)
|
|
3032
3065
|
if DEFAULT_NAMESPACE + "." + key in self:
|
|
3033
3066
|
return self.__getitem__(DEFAULT_NAMESPACE + "." + key, line_no)
|
|
3034
3067
|
if not self.fail_on_missing:
|
|
@@ -3293,10 +3326,9 @@ class Environment(BaseModel):
|
|
|
3293
3326
|
self.concepts[concept.name] = concept
|
|
3294
3327
|
else:
|
|
3295
3328
|
self.concepts[concept.address] = concept
|
|
3296
|
-
|
|
3297
|
-
from trilogy.core.environment_helpers import generate_related_concepts
|
|
3329
|
+
from trilogy.core.environment_helpers import generate_related_concepts
|
|
3298
3330
|
|
|
3299
|
-
|
|
3331
|
+
generate_related_concepts(concept, self, meta=meta, add_derived=add_derived)
|
|
3300
3332
|
self.gen_concept_list_caches()
|
|
3301
3333
|
return concept
|
|
3302
3334
|
|
|
@@ -3423,14 +3455,6 @@ class Comparison(
|
|
|
3423
3455
|
raise SyntaxError(
|
|
3424
3456
|
f"Cannot compare {self.left} and {self.right} of different types"
|
|
3425
3457
|
)
|
|
3426
|
-
if self.operator == ComparisonOperator.BETWEEN:
|
|
3427
|
-
if (
|
|
3428
|
-
not isinstance(self.right, ComparisonOperator)
|
|
3429
|
-
and self.right.operator == BooleanOperator.AND
|
|
3430
|
-
):
|
|
3431
|
-
raise SyntaxError(
|
|
3432
|
-
f"Between operator must have two operands with and, not {self.right}"
|
|
3433
|
-
)
|
|
3434
3458
|
|
|
3435
3459
|
def __add__(self, other):
|
|
3436
3460
|
if other is None:
|
|
@@ -10,6 +10,7 @@ from trilogy.core.optimizations import (
|
|
|
10
10
|
OptimizationRule,
|
|
11
11
|
InlineConstant,
|
|
12
12
|
PredicatePushdown,
|
|
13
|
+
PredicatePushdownRemove,
|
|
13
14
|
InlineDatasource,
|
|
14
15
|
)
|
|
15
16
|
|
|
@@ -34,6 +35,31 @@ MAX_OPTIMIZATION_LOOPS = 100
|
|
|
34
35
|
# return parent
|
|
35
36
|
|
|
36
37
|
|
|
38
|
+
def reorder_ctes(
|
|
39
|
+
input: list[CTE],
|
|
40
|
+
):
|
|
41
|
+
import networkx as nx
|
|
42
|
+
|
|
43
|
+
# Create a directed graph
|
|
44
|
+
G = nx.DiGraph()
|
|
45
|
+
mapping: dict[str, CTE] = {}
|
|
46
|
+
for cte in input:
|
|
47
|
+
mapping[cte.name] = cte
|
|
48
|
+
for parent in cte.parent_ctes:
|
|
49
|
+
G.add_edge(parent.name, cte.name)
|
|
50
|
+
# Perform topological sort (only works for DAGs)
|
|
51
|
+
try:
|
|
52
|
+
topological_order = list(nx.topological_sort(G))
|
|
53
|
+
if not topological_order:
|
|
54
|
+
return input
|
|
55
|
+
return [mapping[x] for x in topological_order]
|
|
56
|
+
except nx.NetworkXUnfeasible as e:
|
|
57
|
+
print(
|
|
58
|
+
"The graph is not a DAG (contains cycles) and cannot be topologically sorted."
|
|
59
|
+
)
|
|
60
|
+
raise e
|
|
61
|
+
|
|
62
|
+
|
|
37
63
|
def filter_irrelevant_ctes(
|
|
38
64
|
input: list[CTE],
|
|
39
65
|
root_cte: CTE,
|
|
@@ -169,20 +195,22 @@ def optimize_ctes(
|
|
|
169
195
|
REGISTERED_RULES.append(InlineDatasource())
|
|
170
196
|
if CONFIG.optimizations.predicate_pushdown:
|
|
171
197
|
REGISTERED_RULES.append(PredicatePushdown())
|
|
172
|
-
|
|
198
|
+
if CONFIG.optimizations.predicate_pushdown:
|
|
199
|
+
REGISTERED_RULES.append(PredicatePushdownRemove())
|
|
173
200
|
for rule in REGISTERED_RULES:
|
|
174
201
|
loops = 0
|
|
175
202
|
complete = False
|
|
176
203
|
while not complete and (loops <= MAX_OPTIMIZATION_LOOPS):
|
|
177
204
|
actions_taken = False
|
|
178
205
|
# assume we go through all CTEs once
|
|
179
|
-
look_at = [root_cte, *input]
|
|
206
|
+
look_at = [root_cte, *reversed(input)]
|
|
180
207
|
inverse_map = gen_inverse_map(look_at)
|
|
181
208
|
for cte in look_at:
|
|
182
209
|
opt = rule.optimize(cte, inverse_map)
|
|
183
210
|
actions_taken = actions_taken or opt
|
|
184
211
|
complete = not actions_taken
|
|
185
212
|
loops += 1
|
|
213
|
+
input = reorder_ctes(filter_irrelevant_ctes(input, root_cte))
|
|
186
214
|
logger.info(f"finished checking for {type(rule).__name__} in {loops} loops")
|
|
187
215
|
|
|
188
|
-
return filter_irrelevant_ctes(input, root_cte)
|
|
216
|
+
return reorder_ctes(filter_irrelevant_ctes(input, root_cte))
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from .inline_constant import InlineConstant
|
|
2
2
|
from .inline_datasource import InlineDatasource
|
|
3
|
-
from .predicate_pushdown import PredicatePushdown
|
|
3
|
+
from .predicate_pushdown import PredicatePushdown, PredicatePushdownRemove
|
|
4
4
|
from .base_optimization import OptimizationRule
|
|
5
5
|
|
|
6
6
|
__all__ = [
|
|
@@ -8,4 +8,5 @@ __all__ = [
|
|
|
8
8
|
"InlineConstant",
|
|
9
9
|
"InlineDatasource",
|
|
10
10
|
"PredicatePushdown",
|
|
11
|
+
"PredicatePushdownRemove",
|
|
11
12
|
]
|
|
@@ -114,48 +114,6 @@ class PredicatePushdown(OptimizationRule):
|
|
|
114
114
|
if not cte.condition:
|
|
115
115
|
self.debug(f"No CTE condition for {cte.name}")
|
|
116
116
|
return False
|
|
117
|
-
|
|
118
|
-
parent_filter_status = {
|
|
119
|
-
parent.name: is_child_of(cte.condition, parent.condition)
|
|
120
|
-
for parent in cte.parent_ctes
|
|
121
|
-
}
|
|
122
|
-
# flatten existnce argument tuples to a list
|
|
123
|
-
|
|
124
|
-
flattened_existence = [
|
|
125
|
-
x.address for y in cte.condition.existence_arguments for x in y
|
|
126
|
-
]
|
|
127
|
-
|
|
128
|
-
existence_only = [
|
|
129
|
-
parent.name
|
|
130
|
-
for parent in cte.parent_ctes
|
|
131
|
-
if all([x.address in flattened_existence for x in parent.output_columns])
|
|
132
|
-
and len(flattened_existence) > 0
|
|
133
|
-
]
|
|
134
|
-
if all(
|
|
135
|
-
[
|
|
136
|
-
value
|
|
137
|
-
for key, value in parent_filter_status.items()
|
|
138
|
-
if key not in existence_only
|
|
139
|
-
]
|
|
140
|
-
) and not any([isinstance(x, Datasource) for x in cte.source.datasources]):
|
|
141
|
-
self.log(
|
|
142
|
-
f"All parents of {cte.name} have same filter or are existence only inputs, removing filter from {cte.name}"
|
|
143
|
-
)
|
|
144
|
-
cte.condition = None
|
|
145
|
-
# remove any "parent" CTEs that provided only existence inputs
|
|
146
|
-
if existence_only:
|
|
147
|
-
original = [y.name for y in cte.parent_ctes]
|
|
148
|
-
cte.parent_ctes = [
|
|
149
|
-
x for x in cte.parent_ctes if x.name not in existence_only
|
|
150
|
-
]
|
|
151
|
-
self.log(
|
|
152
|
-
f"new parents for {cte.name} are {[x.name for x in cte.parent_ctes]}, vs {original}"
|
|
153
|
-
)
|
|
154
|
-
return True
|
|
155
|
-
else:
|
|
156
|
-
self.log(
|
|
157
|
-
f"Could not remove filter from {cte.name}, as not all parents have the same filter: {parent_filter_status}"
|
|
158
|
-
)
|
|
159
117
|
if self.complete.get(cte.name):
|
|
160
118
|
self.debug("Have done this CTE before")
|
|
161
119
|
return False
|
|
@@ -197,3 +155,63 @@ class PredicatePushdown(OptimizationRule):
|
|
|
197
155
|
|
|
198
156
|
self.complete[cte.name] = True
|
|
199
157
|
return optimized
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
class PredicatePushdownRemove(OptimizationRule):
|
|
161
|
+
|
|
162
|
+
def __init__(self, *args, **kwargs) -> None:
|
|
163
|
+
super().__init__(*args, **kwargs)
|
|
164
|
+
self.complete: dict[str, bool] = {}
|
|
165
|
+
|
|
166
|
+
def optimize(self, cte: CTE, inverse_map: dict[str, list[CTE]]) -> bool:
|
|
167
|
+
optimized = False
|
|
168
|
+
|
|
169
|
+
if not cte.parent_ctes:
|
|
170
|
+
self.debug(f"No parent CTEs for {cte.name}")
|
|
171
|
+
|
|
172
|
+
return False
|
|
173
|
+
|
|
174
|
+
if not cte.condition:
|
|
175
|
+
self.debug(f"No CTE condition for {cte.name}")
|
|
176
|
+
return False
|
|
177
|
+
|
|
178
|
+
parent_filter_status = {
|
|
179
|
+
parent.name: is_child_of(cte.condition, parent.condition)
|
|
180
|
+
for parent in cte.parent_ctes
|
|
181
|
+
}
|
|
182
|
+
# flatten existnce argument tuples to a list
|
|
183
|
+
|
|
184
|
+
flattened_existence = [
|
|
185
|
+
x.address for y in cte.condition.existence_arguments for x in y
|
|
186
|
+
]
|
|
187
|
+
|
|
188
|
+
existence_only = [
|
|
189
|
+
parent.name
|
|
190
|
+
for parent in cte.parent_ctes
|
|
191
|
+
if all([x.address in flattened_existence for x in parent.output_columns])
|
|
192
|
+
and len(flattened_existence) > 0
|
|
193
|
+
]
|
|
194
|
+
if all(
|
|
195
|
+
[
|
|
196
|
+
value
|
|
197
|
+
for key, value in parent_filter_status.items()
|
|
198
|
+
if key not in existence_only
|
|
199
|
+
]
|
|
200
|
+
) and not any([isinstance(x, Datasource) for x in cte.source.datasources]):
|
|
201
|
+
self.log(
|
|
202
|
+
f"All parents of {cte.name} have same filter or are existence only inputs, removing filter from {cte.name}"
|
|
203
|
+
)
|
|
204
|
+
cte.condition = None
|
|
205
|
+
# remove any "parent" CTEs that provided only existence inputs
|
|
206
|
+
if existence_only:
|
|
207
|
+
original = [y.name for y in cte.parent_ctes]
|
|
208
|
+
cte.parent_ctes = [
|
|
209
|
+
x for x in cte.parent_ctes if x.name not in existence_only
|
|
210
|
+
]
|
|
211
|
+
self.log(
|
|
212
|
+
f"new parents for {cte.name} are {[x.name for x in cte.parent_ctes]}, vs {original}"
|
|
213
|
+
)
|
|
214
|
+
return True
|
|
215
|
+
|
|
216
|
+
self.complete[cte.name] = True
|
|
217
|
+
return optimized
|
|
@@ -180,7 +180,10 @@ def generate_candidates_restrictive(
|
|
|
180
180
|
local_candidates = [
|
|
181
181
|
x
|
|
182
182
|
for x in list(candidates)
|
|
183
|
-
if x.address not in exhausted
|
|
183
|
+
if x.address not in exhausted
|
|
184
|
+
and x.granularity != Granularity.SINGLE_ROW
|
|
185
|
+
and x.address not in priority_concept.pseudonyms
|
|
186
|
+
and priority_concept.address not in x.pseudonyms
|
|
184
187
|
]
|
|
185
188
|
combos: list[list[Concept]] = []
|
|
186
189
|
grain_check = Grain(components=[*local_candidates]).components_copy
|
|
@@ -608,7 +611,7 @@ def _search_concepts(
|
|
|
608
611
|
if len(stack) == 1:
|
|
609
612
|
output = stack[0]
|
|
610
613
|
logger.info(
|
|
611
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Source stack has single node, returning that {type(output)} with output {[x.address for x in output.output_concepts]}"
|
|
614
|
+
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Source stack has single node, returning that {type(output)} with output {[x.address for x in output.output_concepts]} and {output.resolve().source_map}"
|
|
612
615
|
)
|
|
613
616
|
return output
|
|
614
617
|
|
|
@@ -658,8 +661,7 @@ def _search_concepts(
|
|
|
658
661
|
if x.address not in [y.address for y in mandatory_list]
|
|
659
662
|
and x not in ex_resolve.grain.components
|
|
660
663
|
]
|
|
661
|
-
expanded.
|
|
662
|
-
expanded.rebuild_cache()
|
|
664
|
+
expanded.set_output_concepts(mandatory_list)
|
|
663
665
|
|
|
664
666
|
logger.info(
|
|
665
667
|
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Found connections for {[c.address for c in mandatory_list]} via concept addition; removing extra {[c.address for c in extra]}"
|
{pytrilogy-0.0.2.11 → pytrilogy-0.0.2.13}/trilogy/core/processing/node_generators/basic_node.py
RENAMED
|
@@ -10,6 +10,8 @@ from trilogy.core.processing.node_generators.common import (
|
|
|
10
10
|
)
|
|
11
11
|
from trilogy.utility import unique
|
|
12
12
|
from trilogy.constants import logger
|
|
13
|
+
from trilogy.core.enums import SourceType
|
|
14
|
+
from itertools import combinations
|
|
13
15
|
|
|
14
16
|
LOGGER_PREFIX = "[GEN_BASIC_NODE]"
|
|
15
17
|
|
|
@@ -31,12 +33,21 @@ def gen_basic_node(
|
|
|
31
33
|
)
|
|
32
34
|
|
|
33
35
|
local_optional_redundant = [x for x in local_optional if x in parent_concepts]
|
|
34
|
-
attempts
|
|
35
|
-
|
|
36
|
+
attempts: List[tuple[list[Concept], list[Concept]]] = [
|
|
37
|
+
(parent_concepts, [concept] + local_optional_redundant)
|
|
38
|
+
]
|
|
39
|
+
equivalent_optional = [
|
|
40
|
+
x
|
|
41
|
+
for x in local_optional
|
|
42
|
+
if x.lineage == concept.lineage and x.address != concept.address
|
|
43
|
+
]
|
|
44
|
+
non_equivalent_optional = [
|
|
45
|
+
x for x in local_optional if x not in equivalent_optional
|
|
46
|
+
]
|
|
36
47
|
|
|
37
48
|
if local_optional:
|
|
38
|
-
for combo in range(1, len(
|
|
39
|
-
combos = combinations(
|
|
49
|
+
for combo in range(1, len(non_equivalent_optional) + 1):
|
|
50
|
+
combos = combinations(non_equivalent_optional, combo)
|
|
40
51
|
for optional_set in combos:
|
|
41
52
|
attempts.append(
|
|
42
53
|
(
|
|
@@ -55,8 +66,10 @@ def gen_basic_node(
|
|
|
55
66
|
depth=depth + 1,
|
|
56
67
|
history=history,
|
|
57
68
|
)
|
|
69
|
+
|
|
58
70
|
if not parent_node:
|
|
59
71
|
continue
|
|
72
|
+
parent_node.source_type = SourceType.BASIC
|
|
60
73
|
parents: List[StrategyNode] = [parent_node]
|
|
61
74
|
for x in basic_output:
|
|
62
75
|
sources = [p for p in parents if x in p.output_concepts]
|
|
@@ -64,13 +77,10 @@ def gen_basic_node(
|
|
|
64
77
|
continue
|
|
65
78
|
if all(x in source.partial_concepts for source in sources):
|
|
66
79
|
partials.append(x)
|
|
67
|
-
outputs = parent_node.output_concepts + [concept]
|
|
68
|
-
logger.info(
|
|
69
|
-
f"{depth_prefix}{LOGGER_PREFIX} Returning basic select for {concept} with attempted extra {[x.address for x in attempt]}, output {[x.address for x in outputs]}"
|
|
70
|
-
)
|
|
71
|
-
# parents.resolve()
|
|
72
80
|
|
|
73
81
|
parent_node.add_output_concept(concept)
|
|
82
|
+
for x in equivalent_optional:
|
|
83
|
+
parent_node.add_output_concept(x)
|
|
74
84
|
|
|
75
85
|
parent_node.remove_output_concepts(
|
|
76
86
|
[
|
|
@@ -79,6 +89,9 @@ def gen_basic_node(
|
|
|
79
89
|
if x.address not in [y.address for y in basic_output]
|
|
80
90
|
]
|
|
81
91
|
)
|
|
92
|
+
logger.info(
|
|
93
|
+
f"{depth_prefix}{LOGGER_PREFIX} Returning basic select for {concept} with attempted extra {[x.address for x in attempt]}, output {[x.address for x in parent_node.output_concepts]}"
|
|
94
|
+
)
|
|
82
95
|
return parent_node
|
|
83
96
|
logger.info(
|
|
84
97
|
f"{depth_prefix}{LOGGER_PREFIX} No basic node could be generated for {concept}"
|