pytrilogy 0.0.2.25__py3-none-any.whl → 0.0.2.27__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pytrilogy might be problematic. Click here for more details.
- {pytrilogy-0.0.2.25.dist-info → pytrilogy-0.0.2.27.dist-info}/METADATA +1 -1
- {pytrilogy-0.0.2.25.dist-info → pytrilogy-0.0.2.27.dist-info}/RECORD +26 -26
- {pytrilogy-0.0.2.25.dist-info → pytrilogy-0.0.2.27.dist-info}/WHEEL +1 -1
- trilogy/__init__.py +1 -1
- trilogy/constants.py +1 -1
- trilogy/core/graph_models.py +2 -2
- trilogy/core/models.py +205 -140
- trilogy/core/optimizations/inline_datasource.py +4 -4
- trilogy/core/processing/node_generators/common.py +0 -1
- trilogy/core/processing/node_generators/select_merge_node.py +56 -23
- trilogy/core/processing/nodes/base_node.py +3 -0
- trilogy/core/processing/nodes/merge_node.py +12 -12
- trilogy/core/processing/nodes/select_node_v2.py +6 -2
- trilogy/core/processing/utility.py +237 -258
- trilogy/core/query_processor.py +65 -53
- trilogy/dialect/base.py +1 -0
- trilogy/dialect/common.py +4 -25
- trilogy/executor.py +12 -3
- trilogy/hooks/query_debugger.py +5 -1
- trilogy/parsing/common.py +4 -6
- trilogy/parsing/parse_engine.py +20 -16
- trilogy/parsing/render.py +63 -21
- trilogy/parsing/trilogy.lark +6 -4
- {pytrilogy-0.0.2.25.dist-info → pytrilogy-0.0.2.27.dist-info}/LICENSE.md +0 -0
- {pytrilogy-0.0.2.25.dist-info → pytrilogy-0.0.2.27.dist-info}/entry_points.txt +0 -0
- {pytrilogy-0.0.2.25.dist-info → pytrilogy-0.0.2.27.dist-info}/top_level.txt +0 -0
trilogy/core/models.py
CHANGED
|
@@ -73,6 +73,7 @@ from collections import UserList, UserDict
|
|
|
73
73
|
from functools import cached_property
|
|
74
74
|
from abc import ABC
|
|
75
75
|
from collections import defaultdict
|
|
76
|
+
import hashlib
|
|
76
77
|
|
|
77
78
|
LOGGER_PREFIX = "[MODELS]"
|
|
78
79
|
|
|
@@ -190,6 +191,13 @@ class ConstantInlineable(ABC):
|
|
|
190
191
|
raise NotImplementedError
|
|
191
192
|
|
|
192
193
|
|
|
194
|
+
class HasUUID(ABC):
|
|
195
|
+
|
|
196
|
+
@property
|
|
197
|
+
def uuid(self) -> str:
|
|
198
|
+
return hashlib.md5(str(self).encode()).hexdigest()
|
|
199
|
+
|
|
200
|
+
|
|
193
201
|
class SelectTypeMixin(BaseModel):
|
|
194
202
|
where_clause: Union["WhereClause", None] = Field(default=None)
|
|
195
203
|
having_clause: Union["HavingClause", None] = Field(default=None)
|
|
@@ -1606,7 +1614,7 @@ class RawSQLStatement(BaseModel):
|
|
|
1606
1614
|
meta: Optional[Metadata] = Field(default_factory=lambda: Metadata())
|
|
1607
1615
|
|
|
1608
1616
|
|
|
1609
|
-
class SelectStatement(Mergeable, Namespaced, SelectTypeMixin, BaseModel):
|
|
1617
|
+
class SelectStatement(HasUUID, Mergeable, Namespaced, SelectTypeMixin, BaseModel):
|
|
1610
1618
|
selection: List[SelectItem]
|
|
1611
1619
|
order_by: Optional[OrderBy] = None
|
|
1612
1620
|
limit: Optional[int] = None
|
|
@@ -1711,7 +1719,7 @@ class SelectStatement(Mergeable, Namespaced, SelectTypeMixin, BaseModel):
|
|
|
1711
1719
|
def to_datasource(
|
|
1712
1720
|
self,
|
|
1713
1721
|
namespace: str,
|
|
1714
|
-
|
|
1722
|
+
name: str,
|
|
1715
1723
|
address: Address,
|
|
1716
1724
|
grain: Grain | None = None,
|
|
1717
1725
|
) -> Datasource:
|
|
@@ -1724,19 +1732,34 @@ class SelectStatement(Mergeable, Namespaced, SelectTypeMixin, BaseModel):
|
|
|
1724
1732
|
# if the concept is a locally derived concept, it cannot ever be partial
|
|
1725
1733
|
# but if it's a concept pulled in from upstream and we have a where clause, it should be partial
|
|
1726
1734
|
ColumnAssignment(
|
|
1727
|
-
alias=
|
|
1735
|
+
alias=(
|
|
1736
|
+
c.name.replace(".", "_")
|
|
1737
|
+
if c.namespace == DEFAULT_NAMESPACE
|
|
1738
|
+
else c.address.replace(".", "_")
|
|
1739
|
+
),
|
|
1728
1740
|
concept=c,
|
|
1729
1741
|
modifiers=modifiers if c.address not in self.locally_derived else [],
|
|
1730
1742
|
)
|
|
1731
1743
|
for c in self.output_components
|
|
1732
1744
|
]
|
|
1733
1745
|
|
|
1746
|
+
condition = None
|
|
1747
|
+
if self.where_clause:
|
|
1748
|
+
condition = self.where_clause.conditional
|
|
1749
|
+
if self.having_clause:
|
|
1750
|
+
if condition:
|
|
1751
|
+
condition = self.having_clause.conditional + condition
|
|
1752
|
+
else:
|
|
1753
|
+
condition = self.having_clause.conditional
|
|
1754
|
+
|
|
1734
1755
|
new_datasource = Datasource(
|
|
1735
|
-
|
|
1756
|
+
name=name,
|
|
1736
1757
|
address=address,
|
|
1737
1758
|
grain=grain or self.grain,
|
|
1738
1759
|
columns=columns,
|
|
1739
1760
|
namespace=namespace,
|
|
1761
|
+
where=WhereClause(conditional=condition) if condition else None,
|
|
1762
|
+
non_partial_for=WhereClause(conditional=condition) if condition else None,
|
|
1740
1763
|
)
|
|
1741
1764
|
for column in columns:
|
|
1742
1765
|
column.concept = column.concept.with_grain(new_datasource.grain)
|
|
@@ -1859,7 +1882,7 @@ class AlignClause(Namespaced, BaseModel):
|
|
|
1859
1882
|
return AlignClause(items=[x.with_namespace(namespace) for x in self.items])
|
|
1860
1883
|
|
|
1861
1884
|
|
|
1862
|
-
class MultiSelectStatement(SelectTypeMixin, Mergeable, Namespaced, BaseModel):
|
|
1885
|
+
class MultiSelectStatement(HasUUID, SelectTypeMixin, Mergeable, Namespaced, BaseModel):
|
|
1863
1886
|
selects: List[SelectStatement]
|
|
1864
1887
|
align: AlignClause
|
|
1865
1888
|
namespace: str
|
|
@@ -2021,7 +2044,7 @@ class DatasourceMetadata(BaseModel):
|
|
|
2021
2044
|
line_no: int | None = None
|
|
2022
2045
|
|
|
2023
2046
|
|
|
2024
|
-
class MergeStatementV2(Namespaced, BaseModel):
|
|
2047
|
+
class MergeStatementV2(HasUUID, Namespaced, BaseModel):
|
|
2025
2048
|
source: Concept
|
|
2026
2049
|
target: Concept
|
|
2027
2050
|
modifiers: List[Modifier] = Field(default_factory=list)
|
|
@@ -2035,8 +2058,8 @@ class MergeStatementV2(Namespaced, BaseModel):
|
|
|
2035
2058
|
return new
|
|
2036
2059
|
|
|
2037
2060
|
|
|
2038
|
-
class Datasource(Namespaced, BaseModel):
|
|
2039
|
-
|
|
2061
|
+
class Datasource(HasUUID, Namespaced, BaseModel):
|
|
2062
|
+
name: str
|
|
2040
2063
|
columns: List[ColumnAssignment]
|
|
2041
2064
|
address: Union[Address, str]
|
|
2042
2065
|
grain: Grain = Field(
|
|
@@ -2047,6 +2070,7 @@ class Datasource(Namespaced, BaseModel):
|
|
|
2047
2070
|
default_factory=lambda: DatasourceMetadata(freshness_concept=None)
|
|
2048
2071
|
)
|
|
2049
2072
|
where: Optional[WhereClause] = None
|
|
2073
|
+
non_partial_for: Optional[WhereClause] = None
|
|
2050
2074
|
|
|
2051
2075
|
def merge_concept(
|
|
2052
2076
|
self, source: Concept, target: Concept, modifiers: List[Modifier]
|
|
@@ -2070,10 +2094,14 @@ class Datasource(Namespaced, BaseModel):
|
|
|
2070
2094
|
self.add_column(target, original[0].alias, modifiers)
|
|
2071
2095
|
|
|
2072
2096
|
@property
|
|
2073
|
-
def
|
|
2097
|
+
def identifier(self) -> str:
|
|
2074
2098
|
if not self.namespace or self.namespace == DEFAULT_NAMESPACE:
|
|
2075
|
-
return self.
|
|
2076
|
-
return f"{self.namespace}.{self.
|
|
2099
|
+
return self.name
|
|
2100
|
+
return f"{self.namespace}.{self.name}"
|
|
2101
|
+
|
|
2102
|
+
@property
|
|
2103
|
+
def safe_identifier(self) -> str:
|
|
2104
|
+
return self.identifier.replace(".", "_")
|
|
2077
2105
|
|
|
2078
2106
|
@property
|
|
2079
2107
|
def condition(self):
|
|
@@ -2142,13 +2170,13 @@ class Datasource(Namespaced, BaseModel):
|
|
|
2142
2170
|
return self
|
|
2143
2171
|
|
|
2144
2172
|
def __repr__(self):
|
|
2145
|
-
return f"Datasource<{self.
|
|
2173
|
+
return f"Datasource<{self.identifier}@<{self.grain}>"
|
|
2146
2174
|
|
|
2147
2175
|
def __str__(self):
|
|
2148
2176
|
return self.__repr__()
|
|
2149
2177
|
|
|
2150
2178
|
def __hash__(self):
|
|
2151
|
-
return self.
|
|
2179
|
+
return self.identifier.__hash__()
|
|
2152
2180
|
|
|
2153
2181
|
def with_namespace(self, namespace: str):
|
|
2154
2182
|
new_namespace = (
|
|
@@ -2157,7 +2185,7 @@ class Datasource(Namespaced, BaseModel):
|
|
|
2157
2185
|
else namespace
|
|
2158
2186
|
)
|
|
2159
2187
|
return Datasource(
|
|
2160
|
-
|
|
2188
|
+
name=self.name,
|
|
2161
2189
|
namespace=new_namespace,
|
|
2162
2190
|
grain=self.grain.with_namespace(namespace),
|
|
2163
2191
|
address=self.address,
|
|
@@ -2207,19 +2235,6 @@ class Datasource(Namespaced, BaseModel):
|
|
|
2207
2235
|
f" {existing}."
|
|
2208
2236
|
)
|
|
2209
2237
|
|
|
2210
|
-
@property
|
|
2211
|
-
def name(self) -> str:
|
|
2212
|
-
return self.identifier
|
|
2213
|
-
# TODO: namespace all references
|
|
2214
|
-
# return f'{self.namespace}_{self.identifier}'
|
|
2215
|
-
|
|
2216
|
-
@property
|
|
2217
|
-
def full_name(self) -> str:
|
|
2218
|
-
if not self.namespace:
|
|
2219
|
-
return self.identifier
|
|
2220
|
-
namespace = self.namespace.replace(".", "_") if self.namespace else ""
|
|
2221
|
-
return f"{namespace}_{self.identifier}"
|
|
2222
|
-
|
|
2223
2238
|
@property
|
|
2224
2239
|
def safe_location(self) -> str:
|
|
2225
2240
|
if isinstance(self.address, Address):
|
|
@@ -2247,6 +2262,7 @@ class InstantiatedUnnestJoin(BaseModel):
|
|
|
2247
2262
|
class ConceptPair(BaseModel):
|
|
2248
2263
|
left: Concept
|
|
2249
2264
|
right: Concept
|
|
2265
|
+
existing_datasource: Union[Datasource, "QueryDatasource"]
|
|
2250
2266
|
modifiers: List[Modifier] = Field(default_factory=list)
|
|
2251
2267
|
|
|
2252
2268
|
@property
|
|
@@ -2258,17 +2274,23 @@ class ConceptPair(BaseModel):
|
|
|
2258
2274
|
return Modifier.NULLABLE in self.modifiers
|
|
2259
2275
|
|
|
2260
2276
|
|
|
2277
|
+
class CTEConceptPair(ConceptPair):
|
|
2278
|
+
cte: CTE
|
|
2279
|
+
|
|
2280
|
+
|
|
2261
2281
|
class BaseJoin(BaseModel):
|
|
2262
|
-
left_datasource: Union[Datasource, "QueryDatasource"]
|
|
2263
2282
|
right_datasource: Union[Datasource, "QueryDatasource"]
|
|
2264
|
-
concepts: List[Concept]
|
|
2265
2283
|
join_type: JoinType
|
|
2266
|
-
|
|
2284
|
+
concepts: Optional[List[Concept]] = None
|
|
2285
|
+
left_datasource: Optional[Union[Datasource, "QueryDatasource"]] = None
|
|
2267
2286
|
concept_pairs: list[ConceptPair] | None = None
|
|
2268
2287
|
|
|
2269
2288
|
def __init__(self, **data: Any):
|
|
2270
2289
|
super().__init__(**data)
|
|
2271
|
-
if
|
|
2290
|
+
if (
|
|
2291
|
+
self.left_datasource
|
|
2292
|
+
and self.left_datasource.identifier == self.right_datasource.identifier
|
|
2293
|
+
):
|
|
2272
2294
|
raise SyntaxError(
|
|
2273
2295
|
f"Cannot join a dataself to itself, joining {self.left_datasource} and"
|
|
2274
2296
|
f" {self.right_datasource}"
|
|
@@ -2278,8 +2300,10 @@ class BaseJoin(BaseModel):
|
|
|
2278
2300
|
# if we have a list of concept pairs
|
|
2279
2301
|
if self.concept_pairs:
|
|
2280
2302
|
return
|
|
2281
|
-
|
|
2282
|
-
|
|
2303
|
+
if self.concepts == []:
|
|
2304
|
+
return
|
|
2305
|
+
assert self.left_datasource and self.right_datasource
|
|
2306
|
+
for concept in self.concepts or []:
|
|
2283
2307
|
include = True
|
|
2284
2308
|
for ds in [self.left_datasource, self.right_datasource]:
|
|
2285
2309
|
synonyms = []
|
|
@@ -2289,13 +2313,10 @@ class BaseJoin(BaseModel):
|
|
|
2289
2313
|
concept.address not in [c.address for c in ds.output_concepts]
|
|
2290
2314
|
and concept.address not in synonyms
|
|
2291
2315
|
):
|
|
2292
|
-
|
|
2293
|
-
|
|
2294
|
-
|
|
2295
|
-
|
|
2296
|
-
f"Invalid join, missing {concept} on {ds.name}, have"
|
|
2297
|
-
f" {[c.address for c in ds.output_concepts]}"
|
|
2298
|
-
)
|
|
2316
|
+
raise SyntaxError(
|
|
2317
|
+
f"Invalid join, missing {concept} on {ds.name}, have"
|
|
2318
|
+
f" {[c.address for c in ds.output_concepts]}"
|
|
2319
|
+
)
|
|
2299
2320
|
if include:
|
|
2300
2321
|
final_concepts.append(concept)
|
|
2301
2322
|
if not final_concepts and self.concepts:
|
|
@@ -2312,7 +2333,7 @@ class BaseJoin(BaseModel):
|
|
|
2312
2333
|
self.concepts = []
|
|
2313
2334
|
return
|
|
2314
2335
|
# if everything is at abstract grain, we can skip joins
|
|
2315
|
-
if all([c.grain
|
|
2336
|
+
if all([c.grain.abstract for c in ds.output_concepts]):
|
|
2316
2337
|
self.concepts = []
|
|
2317
2338
|
return
|
|
2318
2339
|
|
|
@@ -2330,21 +2351,27 @@ class BaseJoin(BaseModel):
|
|
|
2330
2351
|
|
|
2331
2352
|
@property
|
|
2332
2353
|
def unique_id(self) -> str:
|
|
2333
|
-
|
|
2334
|
-
|
|
2335
|
-
|
|
2336
|
-
|
|
2337
|
-
|
|
2338
|
-
|
|
2354
|
+
return str(self)
|
|
2355
|
+
|
|
2356
|
+
@property
|
|
2357
|
+
def input_concepts(self) -> List[Concept]:
|
|
2358
|
+
base = []
|
|
2359
|
+
if self.concept_pairs:
|
|
2360
|
+
for pair in self.concept_pairs:
|
|
2361
|
+
base += [pair.left, pair.right]
|
|
2362
|
+
elif self.concepts:
|
|
2363
|
+
base += self.concepts
|
|
2364
|
+
return base
|
|
2339
2365
|
|
|
2340
2366
|
def __str__(self):
|
|
2341
2367
|
if self.concept_pairs:
|
|
2342
2368
|
return (
|
|
2343
|
-
f"{self.join_type.value} on"
|
|
2344
|
-
f" {','.join([str(k.left)+'='+str(k.right) for k in self.concept_pairs])}"
|
|
2369
|
+
f"{self.join_type.value} {self.right_datasource.name} on"
|
|
2370
|
+
f" {','.join([str(k.existing_datasource.name) + '.'+ str(k.left)+'='+str(k.right) for k in self.concept_pairs])}"
|
|
2345
2371
|
)
|
|
2346
2372
|
return (
|
|
2347
|
-
f"{self.join_type.value}
|
|
2373
|
+
f"{self.join_type.value} {self.right_datasource.name} on"
|
|
2374
|
+
f" {','.join([str(k) for k in self.concepts])}"
|
|
2348
2375
|
)
|
|
2349
2376
|
|
|
2350
2377
|
|
|
@@ -2374,6 +2401,10 @@ class QueryDatasource(BaseModel):
|
|
|
2374
2401
|
def __repr__(self):
|
|
2375
2402
|
return f"{self.identifier}@<{self.grain}>"
|
|
2376
2403
|
|
|
2404
|
+
@property
|
|
2405
|
+
def safe_identifier(self):
|
|
2406
|
+
return self.identifier.replace(".", "_")
|
|
2407
|
+
|
|
2377
2408
|
@property
|
|
2378
2409
|
def non_partial_concept_addresses(self) -> List[str]:
|
|
2379
2410
|
return [
|
|
@@ -2389,19 +2420,9 @@ class QueryDatasource(BaseModel):
|
|
|
2389
2420
|
for join in v:
|
|
2390
2421
|
if not isinstance(join, BaseJoin):
|
|
2391
2422
|
continue
|
|
2392
|
-
|
|
2393
|
-
raise SyntaxError(
|
|
2394
|
-
f"Cannot join a datasource to itself, joining {join.left_datasource}"
|
|
2395
|
-
)
|
|
2396
|
-
pairing = "".join(
|
|
2397
|
-
sorted(
|
|
2398
|
-
[join.left_datasource.identifier, join.right_datasource.identifier]
|
|
2399
|
-
)
|
|
2400
|
-
)
|
|
2423
|
+
pairing = str(join)
|
|
2401
2424
|
if pairing in unique_pairs:
|
|
2402
|
-
raise SyntaxError(
|
|
2403
|
-
f"Duplicate join {join.left_datasource.identifier} and {join.right_datasource.identifier}"
|
|
2404
|
-
)
|
|
2425
|
+
raise SyntaxError(f"Duplicate join {str(join)}")
|
|
2405
2426
|
unique_pairs.add(pairing)
|
|
2406
2427
|
return v
|
|
2407
2428
|
|
|
@@ -2448,10 +2469,6 @@ class QueryDatasource(BaseModel):
|
|
|
2448
2469
|
def name(self):
|
|
2449
2470
|
return self.identifier
|
|
2450
2471
|
|
|
2451
|
-
@property
|
|
2452
|
-
def full_name(self):
|
|
2453
|
-
return self.identifier
|
|
2454
|
-
|
|
2455
2472
|
@property
|
|
2456
2473
|
def group_required(self) -> bool:
|
|
2457
2474
|
if self.force_group is True:
|
|
@@ -2498,10 +2515,12 @@ class QueryDatasource(BaseModel):
|
|
|
2498
2515
|
merged_datasources = {}
|
|
2499
2516
|
|
|
2500
2517
|
for ds in [*self.datasources, *other.datasources]:
|
|
2501
|
-
if ds.
|
|
2502
|
-
merged_datasources[ds.
|
|
2518
|
+
if ds.safe_identifier in merged_datasources:
|
|
2519
|
+
merged_datasources[ds.safe_identifier] = (
|
|
2520
|
+
merged_datasources[ds.safe_identifier] + ds
|
|
2521
|
+
)
|
|
2503
2522
|
else:
|
|
2504
|
-
merged_datasources[ds.
|
|
2523
|
+
merged_datasources[ds.safe_identifier] = ds
|
|
2505
2524
|
|
|
2506
2525
|
final_source_map = defaultdict(set)
|
|
2507
2526
|
for key in self.source_map:
|
|
@@ -2512,7 +2531,9 @@ class QueryDatasource(BaseModel):
|
|
|
2512
2531
|
if key not in final_source_map:
|
|
2513
2532
|
final_source_map[key] = other.source_map[key]
|
|
2514
2533
|
for k, v in final_source_map.items():
|
|
2515
|
-
final_source_map[k] = set(
|
|
2534
|
+
final_source_map[k] = set(
|
|
2535
|
+
merged_datasources[x.safe_identifier] for x in list(v)
|
|
2536
|
+
)
|
|
2516
2537
|
self_hidden = self.hidden_concepts or []
|
|
2517
2538
|
other_hidden = other.hidden_concepts or []
|
|
2518
2539
|
hidden = [x for x in self_hidden if x.address in other_hidden]
|
|
@@ -2552,7 +2573,7 @@ class QueryDatasource(BaseModel):
|
|
|
2552
2573
|
)
|
|
2553
2574
|
# partial = "_".join([str(c.address).replace(".", "_") for c in self.partial_concepts])
|
|
2554
2575
|
return (
|
|
2555
|
-
"_join_".join([d.
|
|
2576
|
+
"_join_".join([d.identifier for d in self.datasources])
|
|
2556
2577
|
+ (f"_at_{grain}" if grain else "_at_abstract")
|
|
2557
2578
|
+ (f"_filtered_by_{filters}" if filters else "")
|
|
2558
2579
|
# + (f"_partial_{partial}" if partial else "")
|
|
@@ -2568,8 +2589,9 @@ class QueryDatasource(BaseModel):
|
|
|
2568
2589
|
for x in self.datasources:
|
|
2569
2590
|
# query datasources should be referenced by their alias, always
|
|
2570
2591
|
force_alias = isinstance(x, QueryDatasource)
|
|
2592
|
+
#
|
|
2571
2593
|
use_raw_name = isinstance(x, Datasource) and not force_alias
|
|
2572
|
-
if source and x.
|
|
2594
|
+
if source and x.safe_identifier != source:
|
|
2573
2595
|
continue
|
|
2574
2596
|
try:
|
|
2575
2597
|
return x.get_alias(
|
|
@@ -2623,6 +2645,14 @@ class CTE(BaseModel):
|
|
|
2623
2645
|
base_name_override: Optional[str] = None
|
|
2624
2646
|
base_alias_override: Optional[str] = None
|
|
2625
2647
|
|
|
2648
|
+
@property
|
|
2649
|
+
def identifier(self):
|
|
2650
|
+
return self.name
|
|
2651
|
+
|
|
2652
|
+
@property
|
|
2653
|
+
def safe_identifier(self):
|
|
2654
|
+
return self.name
|
|
2655
|
+
|
|
2626
2656
|
@computed_field # type: ignore
|
|
2627
2657
|
@property
|
|
2628
2658
|
def output_lcl(self) -> LooseConceptList:
|
|
@@ -2666,7 +2696,12 @@ class CTE(BaseModel):
|
|
|
2666
2696
|
isinstance(join, Join)
|
|
2667
2697
|
and (
|
|
2668
2698
|
join.right_cte.name != removed_cte
|
|
2669
|
-
and
|
|
2699
|
+
and any(
|
|
2700
|
+
[
|
|
2701
|
+
x.cte.name != removed_cte
|
|
2702
|
+
for x in (join.joinkey_pairs or [])
|
|
2703
|
+
]
|
|
2704
|
+
)
|
|
2670
2705
|
)
|
|
2671
2706
|
)
|
|
2672
2707
|
]
|
|
@@ -2715,7 +2750,7 @@ class CTE(BaseModel):
|
|
|
2715
2750
|
return False
|
|
2716
2751
|
if any(
|
|
2717
2752
|
[
|
|
2718
|
-
x.
|
|
2753
|
+
x.safe_identifier == ds_being_inlined.safe_identifier
|
|
2719
2754
|
for x in self.source.datasources
|
|
2720
2755
|
]
|
|
2721
2756
|
):
|
|
@@ -2726,35 +2761,49 @@ class CTE(BaseModel):
|
|
|
2726
2761
|
*[
|
|
2727
2762
|
x
|
|
2728
2763
|
for x in self.source.datasources
|
|
2729
|
-
if x.
|
|
2764
|
+
if x.safe_identifier != qds_being_inlined.safe_identifier
|
|
2730
2765
|
],
|
|
2731
2766
|
]
|
|
2732
2767
|
# need to identify this before updating joins
|
|
2733
2768
|
if self.base_name == parent.name:
|
|
2734
2769
|
self.base_name_override = ds_being_inlined.safe_location
|
|
2735
|
-
self.base_alias_override = ds_being_inlined.
|
|
2770
|
+
self.base_alias_override = ds_being_inlined.safe_identifier
|
|
2736
2771
|
|
|
2737
2772
|
for join in self.joins:
|
|
2738
2773
|
if isinstance(join, InstantiatedUnnestJoin):
|
|
2739
2774
|
continue
|
|
2740
|
-
if
|
|
2775
|
+
if (
|
|
2776
|
+
join.left_cte
|
|
2777
|
+
and join.left_cte.safe_identifier == parent.safe_identifier
|
|
2778
|
+
):
|
|
2741
2779
|
join.inline_cte(parent)
|
|
2742
|
-
if join.
|
|
2780
|
+
if join.joinkey_pairs:
|
|
2781
|
+
for pair in join.joinkey_pairs:
|
|
2782
|
+
if pair.cte and pair.cte.safe_identifier == parent.safe_identifier:
|
|
2783
|
+
join.inline_cte(parent)
|
|
2784
|
+
if join.right_cte.safe_identifier == parent.safe_identifier:
|
|
2743
2785
|
join.inline_cte(parent)
|
|
2744
2786
|
for k, v in self.source_map.items():
|
|
2745
2787
|
if isinstance(v, list):
|
|
2746
2788
|
self.source_map[k] = [
|
|
2747
|
-
|
|
2789
|
+
(
|
|
2790
|
+
ds_being_inlined.safe_identifier
|
|
2791
|
+
if x == parent.safe_identifier
|
|
2792
|
+
else x
|
|
2793
|
+
)
|
|
2794
|
+
for x in v
|
|
2748
2795
|
]
|
|
2749
|
-
elif v == parent.
|
|
2750
|
-
self.source_map[k] = [ds_being_inlined.
|
|
2796
|
+
elif v == parent.safe_identifier:
|
|
2797
|
+
self.source_map[k] = [ds_being_inlined.safe_identifier]
|
|
2751
2798
|
|
|
2752
2799
|
# zip in any required values for lookups
|
|
2753
2800
|
for k in ds_being_inlined.output_lcl.addresses:
|
|
2754
2801
|
if k in self.source_map and self.source_map[k]:
|
|
2755
2802
|
continue
|
|
2756
|
-
self.source_map[k] = [ds_being_inlined.
|
|
2757
|
-
self.parent_ctes = [
|
|
2803
|
+
self.source_map[k] = [ds_being_inlined.safe_identifier]
|
|
2804
|
+
self.parent_ctes = [
|
|
2805
|
+
x for x in self.parent_ctes if x.safe_identifier != parent.safe_identifier
|
|
2806
|
+
]
|
|
2758
2807
|
if force_group:
|
|
2759
2808
|
self.group_to_grain = True
|
|
2760
2809
|
return True
|
|
@@ -2961,55 +3010,50 @@ class JoinKey(BaseModel):
|
|
|
2961
3010
|
|
|
2962
3011
|
|
|
2963
3012
|
class Join(BaseModel):
|
|
2964
|
-
|
|
3013
|
+
|
|
2965
3014
|
right_cte: CTE
|
|
2966
3015
|
jointype: JoinType
|
|
2967
|
-
|
|
2968
|
-
joinkey_pairs: List[
|
|
3016
|
+
left_cte: CTE | None = None
|
|
3017
|
+
joinkey_pairs: List[CTEConceptPair] | None = None
|
|
2969
3018
|
inlined_ctes: set[str] = Field(default_factory=set)
|
|
2970
3019
|
|
|
2971
3020
|
def inline_cte(self, cte: CTE):
|
|
2972
3021
|
self.inlined_ctes.add(cte.name)
|
|
2973
3022
|
|
|
2974
|
-
|
|
2975
|
-
|
|
2976
|
-
|
|
2977
|
-
|
|
2978
|
-
return self.left_cte.name
|
|
3023
|
+
def get_name(self, cte: CTE):
|
|
3024
|
+
if cte.identifier in self.inlined_ctes:
|
|
3025
|
+
return cte.source.datasources[0].safe_identifier
|
|
3026
|
+
return cte.safe_identifier
|
|
2979
3027
|
|
|
2980
3028
|
@property
|
|
2981
3029
|
def right_name(self) -> str:
|
|
2982
|
-
if self.right_cte.
|
|
2983
|
-
return self.right_cte.source.datasources[0].
|
|
2984
|
-
return self.right_cte.
|
|
2985
|
-
|
|
2986
|
-
@property
|
|
2987
|
-
def left_ref(self) -> str:
|
|
2988
|
-
if self.left_cte.name in self.inlined_ctes:
|
|
2989
|
-
return f"{self.left_cte.source.datasources[0].safe_location} as {self.left_cte.source.datasources[0].identifier}"
|
|
2990
|
-
return self.left_cte.name
|
|
3030
|
+
if self.right_cte.identifier in self.inlined_ctes:
|
|
3031
|
+
return self.right_cte.source.datasources[0].safe_identifier
|
|
3032
|
+
return self.right_cte.safe_identifier
|
|
2991
3033
|
|
|
2992
3034
|
@property
|
|
2993
3035
|
def right_ref(self) -> str:
|
|
2994
|
-
if self.right_cte.
|
|
2995
|
-
return f"{self.right_cte.source.datasources[0].safe_location} as {self.right_cte.source.datasources[0].
|
|
2996
|
-
return self.right_cte.
|
|
3036
|
+
if self.right_cte.identifier in self.inlined_ctes:
|
|
3037
|
+
return f"{self.right_cte.source.datasources[0].safe_location} as {self.right_cte.source.datasources[0].safe_identifier}"
|
|
3038
|
+
return self.right_cte.safe_identifier
|
|
2997
3039
|
|
|
2998
3040
|
@property
|
|
2999
3041
|
def unique_id(self) -> str:
|
|
3000
|
-
return self
|
|
3042
|
+
return str(self)
|
|
3001
3043
|
|
|
3002
3044
|
def __str__(self):
|
|
3003
3045
|
if self.joinkey_pairs:
|
|
3004
3046
|
return (
|
|
3005
|
-
f"{self.jointype.value}
|
|
3047
|
+
f"{self.jointype.value} join"
|
|
3006
3048
|
f" {self.right_name} on"
|
|
3007
|
-
f" {','.join([str(k.left)+'='+str(k.right
|
|
3049
|
+
f" {','.join([k.cte.name + '.'+str(k.left.address)+'='+str(k.right.address) for k in self.joinkey_pairs])}"
|
|
3008
3050
|
)
|
|
3009
|
-
|
|
3010
|
-
|
|
3011
|
-
|
|
3012
|
-
|
|
3051
|
+
elif self.left_cte:
|
|
3052
|
+
return (
|
|
3053
|
+
f"{self.jointype.value} JOIN {self.left_cte.name} and"
|
|
3054
|
+
f" {self.right_name} on {','.join([str(k) for k in self.joinkey_pairs])}"
|
|
3055
|
+
)
|
|
3056
|
+
return f"{self.jointype.value} JOIN {self.right_name} on {','.join([str(k) for k in self.joinkey_pairs])}"
|
|
3013
3057
|
|
|
3014
3058
|
|
|
3015
3059
|
class UndefinedConcept(Concept, Mergeable, Namespaced):
|
|
@@ -3227,7 +3271,7 @@ class EnvironmentConceptDict(dict):
|
|
|
3227
3271
|
return super().items()
|
|
3228
3272
|
|
|
3229
3273
|
|
|
3230
|
-
class ImportStatement(BaseModel):
|
|
3274
|
+
class ImportStatement(HasUUID, BaseModel):
|
|
3231
3275
|
alias: str
|
|
3232
3276
|
path: Path
|
|
3233
3277
|
environment: Union["Environment", None] = None
|
|
@@ -3270,7 +3314,9 @@ class Environment(BaseModel):
|
|
|
3270
3314
|
] = Field(default_factory=EnvironmentDatasourceDict)
|
|
3271
3315
|
functions: Dict[str, Function] = Field(default_factory=dict)
|
|
3272
3316
|
data_types: Dict[str, DataType] = Field(default_factory=dict)
|
|
3273
|
-
imports: Dict[str, ImportStatement] = Field(
|
|
3317
|
+
imports: Dict[str, list[ImportStatement]] = Field(
|
|
3318
|
+
default_factory=lambda: defaultdict(list)
|
|
3319
|
+
)
|
|
3274
3320
|
namespace: str = DEFAULT_NAMESPACE
|
|
3275
3321
|
working_path: str | Path = Field(default_factory=lambda: os.getcwd())
|
|
3276
3322
|
environment_config: EnvironmentOptions = Field(default_factory=EnvironmentOptions)
|
|
@@ -3384,14 +3430,28 @@ class Environment(BaseModel):
|
|
|
3384
3430
|
f"Assignment to concept '{lookup}' is a duplicate declaration;"
|
|
3385
3431
|
)
|
|
3386
3432
|
|
|
3387
|
-
def add_import(
|
|
3388
|
-
self
|
|
3389
|
-
|
|
3390
|
-
|
|
3391
|
-
|
|
3392
|
-
|
|
3393
|
-
|
|
3394
|
-
|
|
3433
|
+
def add_import(
|
|
3434
|
+
self, alias: str, source: Environment, imp_stm: ImportStatement | None = None
|
|
3435
|
+
):
|
|
3436
|
+
exists = False
|
|
3437
|
+
existing = self.imports[alias]
|
|
3438
|
+
if imp_stm:
|
|
3439
|
+
if any([x.path == imp_stm.path for x in existing]):
|
|
3440
|
+
exists = True
|
|
3441
|
+
|
|
3442
|
+
else:
|
|
3443
|
+
if any([x.path == source.working_path for x in existing]):
|
|
3444
|
+
exists = True
|
|
3445
|
+
imp_stm = ImportStatement(alias=alias, path=Path(source.working_path))
|
|
3446
|
+
|
|
3447
|
+
if not exists:
|
|
3448
|
+
self.imports[alias].append(imp_stm)
|
|
3449
|
+
|
|
3450
|
+
for _, concept in source.concepts.items():
|
|
3451
|
+
self.add_concept(concept.with_namespace(alias), _ignore_cache=True)
|
|
3452
|
+
|
|
3453
|
+
for _, datasource in source.datasources.items():
|
|
3454
|
+
self.add_datasource(datasource.with_namespace(alias), _ignore_cache=True)
|
|
3395
3455
|
self.gen_concept_list_caches()
|
|
3396
3456
|
return self
|
|
3397
3457
|
|
|
@@ -3402,18 +3462,15 @@ class Environment(BaseModel):
|
|
|
3402
3462
|
apath[-1] = apath[-1] + ".preql"
|
|
3403
3463
|
|
|
3404
3464
|
target: Path = Path(self.working_path, *apath)
|
|
3465
|
+
if alias in self.imports:
|
|
3466
|
+
imports = self.imports[alias]
|
|
3467
|
+
for x in imports:
|
|
3468
|
+
if x.path == target:
|
|
3469
|
+
return imports
|
|
3405
3470
|
if env:
|
|
3406
|
-
self.imports[alias]
|
|
3407
|
-
alias=alias, path=target, environment=env
|
|
3471
|
+
self.imports[alias].append(
|
|
3472
|
+
ImportStatement(alias=alias, path=target, environment=env)
|
|
3408
3473
|
)
|
|
3409
|
-
|
|
3410
|
-
elif alias in self.imports:
|
|
3411
|
-
current = self.imports[alias]
|
|
3412
|
-
env = self.imports[alias].environment
|
|
3413
|
-
if current.path != target:
|
|
3414
|
-
raise ImportError(
|
|
3415
|
-
f"Attempted to import {target} with alias {alias} but {alias} is already imported from {current.path}"
|
|
3416
|
-
)
|
|
3417
3474
|
else:
|
|
3418
3475
|
try:
|
|
3419
3476
|
with open(target, "r", encoding="utf-8") as f:
|
|
@@ -3432,14 +3489,13 @@ class Environment(BaseModel):
|
|
|
3432
3489
|
f"Unable to import file {target.parent}, parsing error: {e}"
|
|
3433
3490
|
)
|
|
3434
3491
|
env = nparser.environment
|
|
3435
|
-
|
|
3436
|
-
|
|
3437
|
-
self.add_concept(concept.with_namespace(alias))
|
|
3492
|
+
for _, concept in env.concepts.items():
|
|
3493
|
+
self.add_concept(concept.with_namespace(alias))
|
|
3438
3494
|
|
|
3439
|
-
|
|
3440
|
-
|
|
3495
|
+
for _, datasource in env.datasources.items():
|
|
3496
|
+
self.add_datasource(datasource.with_namespace(alias))
|
|
3441
3497
|
imps = ImportStatement(alias=alias, path=target, environment=env)
|
|
3442
|
-
self.imports[alias]
|
|
3498
|
+
self.imports[alias].append(imps)
|
|
3443
3499
|
return imps
|
|
3444
3500
|
|
|
3445
3501
|
def parse(
|
|
@@ -3502,8 +3558,14 @@ class Environment(BaseModel):
|
|
|
3502
3558
|
meta: Meta | None = None,
|
|
3503
3559
|
_ignore_cache: bool = False,
|
|
3504
3560
|
):
|
|
3505
|
-
self.datasources[datasource.
|
|
3561
|
+
self.datasources[datasource.identifier] = datasource
|
|
3562
|
+
|
|
3563
|
+
eligible_to_promote_roots = datasource.non_partial_for is None
|
|
3564
|
+
# mark this as canonical source
|
|
3506
3565
|
for current_concept in datasource.output_concepts:
|
|
3566
|
+
if not eligible_to_promote_roots:
|
|
3567
|
+
continue
|
|
3568
|
+
|
|
3507
3569
|
current_derivation = current_concept.derivation
|
|
3508
3570
|
# TODO: refine this section;
|
|
3509
3571
|
# too hacky for maintainability
|
|
@@ -4223,6 +4285,9 @@ class AggregateWrapper(Mergeable, Namespaced, SelectContext, BaseModel):
|
|
|
4223
4285
|
class WhereClause(Mergeable, ConceptArgs, Namespaced, SelectContext, BaseModel):
|
|
4224
4286
|
conditional: Union[SubselectComparison, Comparison, Conditional, "Parenthetical"]
|
|
4225
4287
|
|
|
4288
|
+
def __repr__(self):
|
|
4289
|
+
return str(self.conditional)
|
|
4290
|
+
|
|
4226
4291
|
@property
|
|
4227
4292
|
def input(self) -> List[Concept]:
|
|
4228
4293
|
return self.conditional.input
|
|
@@ -4341,7 +4406,7 @@ class Limit(BaseModel):
|
|
|
4341
4406
|
count: int
|
|
4342
4407
|
|
|
4343
4408
|
|
|
4344
|
-
class ConceptDeclarationStatement(BaseModel):
|
|
4409
|
+
class ConceptDeclarationStatement(HasUUID, BaseModel):
|
|
4345
4410
|
concept: Concept
|
|
4346
4411
|
|
|
4347
4412
|
|
|
@@ -4349,7 +4414,7 @@ class ConceptDerivation(BaseModel):
|
|
|
4349
4414
|
concept: Concept
|
|
4350
4415
|
|
|
4351
4416
|
|
|
4352
|
-
class RowsetDerivationStatement(Namespaced, BaseModel):
|
|
4417
|
+
class RowsetDerivationStatement(HasUUID, Namespaced, BaseModel):
|
|
4353
4418
|
name: str
|
|
4354
4419
|
select: SelectStatement | MultiSelectStatement
|
|
4355
4420
|
namespace: str
|
|
@@ -4614,7 +4679,7 @@ class TupleWrapper(Generic[VT], tuple):
|
|
|
4614
4679
|
return cls(v, type=arg_to_datatype(v[0]))
|
|
4615
4680
|
|
|
4616
4681
|
|
|
4617
|
-
class PersistStatement(BaseModel):
|
|
4682
|
+
class PersistStatement(HasUUID, BaseModel):
|
|
4618
4683
|
datasource: Datasource
|
|
4619
4684
|
select: SelectStatement
|
|
4620
4685
|
meta: Optional[Metadata] = Field(default_factory=lambda: Metadata())
|