pytrilogy 0.0.2.23__py3-none-any.whl → 0.0.2.26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pytrilogy might be problematic. Click here for more details.
- {pytrilogy-0.0.2.23.dist-info → pytrilogy-0.0.2.26.dist-info}/METADATA +1 -1
- {pytrilogy-0.0.2.23.dist-info → pytrilogy-0.0.2.26.dist-info}/RECORD +25 -25
- trilogy/__init__.py +1 -1
- trilogy/constants.py +1 -1
- trilogy/core/env_processor.py +12 -6
- trilogy/core/environment_helpers.py +0 -1
- trilogy/core/models.py +163 -86
- trilogy/core/processing/concept_strategies_v3.py +23 -4
- trilogy/core/processing/node_generators/common.py +0 -1
- trilogy/core/processing/node_generators/node_merge_node.py +4 -4
- trilogy/core/processing/node_generators/select_merge_node.py +49 -22
- trilogy/core/processing/nodes/merge_node.py +2 -2
- trilogy/core/processing/utility.py +241 -259
- trilogy/core/query_processor.py +47 -39
- trilogy/dialect/base.py +6 -1
- trilogy/dialect/common.py +4 -25
- trilogy/executor.py +12 -3
- trilogy/parsing/common.py +4 -6
- trilogy/parsing/parse_engine.py +3 -2
- trilogy/parsing/render.py +41 -17
- trilogy/parsing/trilogy.lark +2 -2
- {pytrilogy-0.0.2.23.dist-info → pytrilogy-0.0.2.26.dist-info}/LICENSE.md +0 -0
- {pytrilogy-0.0.2.23.dist-info → pytrilogy-0.0.2.26.dist-info}/WHEEL +0 -0
- {pytrilogy-0.0.2.23.dist-info → pytrilogy-0.0.2.26.dist-info}/entry_points.txt +0 -0
- {pytrilogy-0.0.2.23.dist-info → pytrilogy-0.0.2.26.dist-info}/top_level.txt +0 -0
trilogy/core/models.py
CHANGED
|
@@ -73,6 +73,7 @@ from collections import UserList, UserDict
|
|
|
73
73
|
from functools import cached_property
|
|
74
74
|
from abc import ABC
|
|
75
75
|
from collections import defaultdict
|
|
76
|
+
import hashlib
|
|
76
77
|
|
|
77
78
|
LOGGER_PREFIX = "[MODELS]"
|
|
78
79
|
|
|
@@ -102,6 +103,12 @@ def get_version():
|
|
|
102
103
|
return __version__
|
|
103
104
|
|
|
104
105
|
|
|
106
|
+
def address_with_namespace(address: str, namespace: str) -> str:
|
|
107
|
+
if address.split(".", 1)[0] == DEFAULT_NAMESPACE:
|
|
108
|
+
return f"{namespace}.{address.split('.',1)[1]}"
|
|
109
|
+
return f"{namespace}.{address}"
|
|
110
|
+
|
|
111
|
+
|
|
105
112
|
def get_concept_arguments(expr) -> List["Concept"]:
|
|
106
113
|
output = []
|
|
107
114
|
if isinstance(expr, Concept):
|
|
@@ -184,6 +191,13 @@ class ConstantInlineable(ABC):
|
|
|
184
191
|
raise NotImplementedError
|
|
185
192
|
|
|
186
193
|
|
|
194
|
+
class HasUUID(ABC):
|
|
195
|
+
|
|
196
|
+
@property
|
|
197
|
+
def uuid(self) -> str:
|
|
198
|
+
return hashlib.md5(str(self).encode()).hexdigest()
|
|
199
|
+
|
|
200
|
+
|
|
187
201
|
class SelectTypeMixin(BaseModel):
|
|
188
202
|
where_clause: Union["WhereClause", None] = Field(default=None)
|
|
189
203
|
having_clause: Union["HavingClause", None] = Field(default=None)
|
|
@@ -436,7 +450,7 @@ class Concept(Mergeable, Namespaced, SelectContext, BaseModel):
|
|
|
436
450
|
keys: Optional[Tuple["Concept", ...]] = None
|
|
437
451
|
grain: "Grain" = Field(default=None, validate_default=True)
|
|
438
452
|
modifiers: Optional[List[Modifier]] = Field(default_factory=list)
|
|
439
|
-
pseudonyms:
|
|
453
|
+
pseudonyms: set[str] = Field(default_factory=set)
|
|
440
454
|
_address_cache: str | None = None
|
|
441
455
|
|
|
442
456
|
def __hash__(self):
|
|
@@ -462,7 +476,7 @@ class Concept(Mergeable, Namespaced, SelectContext, BaseModel):
|
|
|
462
476
|
def with_merge(self, source: Concept, target: Concept, modifiers: List[Modifier]):
|
|
463
477
|
if self.address == source.address:
|
|
464
478
|
new = target.with_grain(self.grain.with_merge(source, target, modifiers))
|
|
465
|
-
new.pseudonyms
|
|
479
|
+
new.pseudonyms.add(self.address)
|
|
466
480
|
return new
|
|
467
481
|
return self.__class__(
|
|
468
482
|
name=self.name,
|
|
@@ -616,9 +630,7 @@ class Concept(Mergeable, Namespaced, SelectContext, BaseModel):
|
|
|
616
630
|
else None
|
|
617
631
|
),
|
|
618
632
|
modifiers=self.modifiers,
|
|
619
|
-
pseudonyms={
|
|
620
|
-
k: v.with_namespace(namespace) for k, v in self.pseudonyms.items()
|
|
621
|
-
},
|
|
633
|
+
pseudonyms={address_with_namespace(v, namespace) for v in self.pseudonyms},
|
|
622
634
|
)
|
|
623
635
|
|
|
624
636
|
def with_select_context(
|
|
@@ -862,7 +874,7 @@ class Grain(Mergeable, BaseModel):
|
|
|
862
874
|
)
|
|
863
875
|
else:
|
|
864
876
|
v2 = unique(v, "address")
|
|
865
|
-
final = []
|
|
877
|
+
final: List[Concept] = []
|
|
866
878
|
for sub in v2:
|
|
867
879
|
if sub.purpose in (Purpose.PROPERTY, Purpose.METRIC) and sub.keys:
|
|
868
880
|
if all([c in v2 for c in sub.keys]):
|
|
@@ -916,6 +928,20 @@ class Grain(Mergeable, BaseModel):
|
|
|
916
928
|
[c.name == ALL_ROWS_CONCEPT for c in self.components]
|
|
917
929
|
)
|
|
918
930
|
|
|
931
|
+
@property
|
|
932
|
+
def synonym_set(self) -> set[str]:
|
|
933
|
+
base = []
|
|
934
|
+
for x in self.components_copy:
|
|
935
|
+
if isinstance(x.lineage, RowsetItem):
|
|
936
|
+
base.append(x.lineage.content.address)
|
|
937
|
+
for c in x.lineage.content.pseudonyms:
|
|
938
|
+
base.append(c)
|
|
939
|
+
else:
|
|
940
|
+
base.append(x.address)
|
|
941
|
+
for c in x.pseudonyms:
|
|
942
|
+
base.append(c)
|
|
943
|
+
return set(base)
|
|
944
|
+
|
|
919
945
|
@cached_property
|
|
920
946
|
def set(self) -> set[str]:
|
|
921
947
|
base = []
|
|
@@ -931,7 +957,11 @@ class Grain(Mergeable, BaseModel):
|
|
|
931
957
|
return self.set == set([c.address for c in other])
|
|
932
958
|
if not isinstance(other, Grain):
|
|
933
959
|
return False
|
|
934
|
-
|
|
960
|
+
if self.set == other.set:
|
|
961
|
+
return True
|
|
962
|
+
elif self.synonym_set == other.synonym_set:
|
|
963
|
+
return True
|
|
964
|
+
return False
|
|
935
965
|
|
|
936
966
|
def issubset(self, other: "Grain"):
|
|
937
967
|
return self.set.issubset(other.set)
|
|
@@ -1584,14 +1614,7 @@ class RawSQLStatement(BaseModel):
|
|
|
1584
1614
|
meta: Optional[Metadata] = Field(default_factory=lambda: Metadata())
|
|
1585
1615
|
|
|
1586
1616
|
|
|
1587
|
-
class
|
|
1588
|
-
target: str
|
|
1589
|
-
target_type: IOType
|
|
1590
|
-
meta: Optional[Metadata] = Field(default_factory=lambda: Metadata())
|
|
1591
|
-
select: SelectStatement
|
|
1592
|
-
|
|
1593
|
-
|
|
1594
|
-
class SelectStatement(Mergeable, Namespaced, SelectTypeMixin, BaseModel):
|
|
1617
|
+
class SelectStatement(HasUUID, Mergeable, Namespaced, SelectTypeMixin, BaseModel):
|
|
1595
1618
|
selection: List[SelectItem]
|
|
1596
1619
|
order_by: Optional[OrderBy] = None
|
|
1597
1620
|
limit: Optional[int] = None
|
|
@@ -1709,19 +1732,34 @@ class SelectStatement(Mergeable, Namespaced, SelectTypeMixin, BaseModel):
|
|
|
1709
1732
|
# if the concept is a locally derived concept, it cannot ever be partial
|
|
1710
1733
|
# but if it's a concept pulled in from upstream and we have a where clause, it should be partial
|
|
1711
1734
|
ColumnAssignment(
|
|
1712
|
-
alias=
|
|
1735
|
+
alias=(
|
|
1736
|
+
c.name.replace(".", "_")
|
|
1737
|
+
if c.namespace == DEFAULT_NAMESPACE
|
|
1738
|
+
else c.address.replace(".", "_")
|
|
1739
|
+
),
|
|
1713
1740
|
concept=c,
|
|
1714
1741
|
modifiers=modifiers if c.address not in self.locally_derived else [],
|
|
1715
1742
|
)
|
|
1716
1743
|
for c in self.output_components
|
|
1717
1744
|
]
|
|
1718
1745
|
|
|
1746
|
+
condition = None
|
|
1747
|
+
if self.where_clause:
|
|
1748
|
+
condition = self.where_clause.conditional
|
|
1749
|
+
if self.having_clause:
|
|
1750
|
+
if condition:
|
|
1751
|
+
condition = self.having_clause.conditional + condition
|
|
1752
|
+
else:
|
|
1753
|
+
condition = self.having_clause.conditional
|
|
1754
|
+
|
|
1719
1755
|
new_datasource = Datasource(
|
|
1720
1756
|
identifier=identifier,
|
|
1721
1757
|
address=address,
|
|
1722
1758
|
grain=grain or self.grain,
|
|
1723
1759
|
columns=columns,
|
|
1724
1760
|
namespace=namespace,
|
|
1761
|
+
where=WhereClause(conditional=condition) if condition else None,
|
|
1762
|
+
non_partial_for=WhereClause(conditional=condition) if condition else None,
|
|
1725
1763
|
)
|
|
1726
1764
|
for column in columns:
|
|
1727
1765
|
column.concept = column.concept.with_grain(new_datasource.grain)
|
|
@@ -1789,6 +1827,16 @@ class SelectStatement(Mergeable, Namespaced, SelectTypeMixin, BaseModel):
|
|
|
1789
1827
|
)
|
|
1790
1828
|
|
|
1791
1829
|
|
|
1830
|
+
class CopyStatement(BaseModel):
|
|
1831
|
+
target: str
|
|
1832
|
+
target_type: IOType
|
|
1833
|
+
meta: Optional[Metadata] = Field(default_factory=lambda: Metadata())
|
|
1834
|
+
select: SelectStatement
|
|
1835
|
+
|
|
1836
|
+
def refresh_bindings(self, environment: Environment):
|
|
1837
|
+
self.select.refresh_bindings(environment)
|
|
1838
|
+
|
|
1839
|
+
|
|
1792
1840
|
class AlignItem(Namespaced, BaseModel):
|
|
1793
1841
|
alias: str
|
|
1794
1842
|
concepts: List[Concept]
|
|
@@ -1834,7 +1882,7 @@ class AlignClause(Namespaced, BaseModel):
|
|
|
1834
1882
|
return AlignClause(items=[x.with_namespace(namespace) for x in self.items])
|
|
1835
1883
|
|
|
1836
1884
|
|
|
1837
|
-
class MultiSelectStatement(SelectTypeMixin, Mergeable, Namespaced, BaseModel):
|
|
1885
|
+
class MultiSelectStatement(HasUUID, SelectTypeMixin, Mergeable, Namespaced, BaseModel):
|
|
1838
1886
|
selects: List[SelectStatement]
|
|
1839
1887
|
align: AlignClause
|
|
1840
1888
|
namespace: str
|
|
@@ -1996,7 +2044,7 @@ class DatasourceMetadata(BaseModel):
|
|
|
1996
2044
|
line_no: int | None = None
|
|
1997
2045
|
|
|
1998
2046
|
|
|
1999
|
-
class MergeStatementV2(Namespaced, BaseModel):
|
|
2047
|
+
class MergeStatementV2(HasUUID, Namespaced, BaseModel):
|
|
2000
2048
|
source: Concept
|
|
2001
2049
|
target: Concept
|
|
2002
2050
|
modifiers: List[Modifier] = Field(default_factory=list)
|
|
@@ -2010,7 +2058,7 @@ class MergeStatementV2(Namespaced, BaseModel):
|
|
|
2010
2058
|
return new
|
|
2011
2059
|
|
|
2012
2060
|
|
|
2013
|
-
class Datasource(Namespaced, BaseModel):
|
|
2061
|
+
class Datasource(HasUUID, Namespaced, BaseModel):
|
|
2014
2062
|
identifier: str
|
|
2015
2063
|
columns: List[ColumnAssignment]
|
|
2016
2064
|
address: Union[Address, str]
|
|
@@ -2022,6 +2070,7 @@ class Datasource(Namespaced, BaseModel):
|
|
|
2022
2070
|
default_factory=lambda: DatasourceMetadata(freshness_concept=None)
|
|
2023
2071
|
)
|
|
2024
2072
|
where: Optional[WhereClause] = None
|
|
2073
|
+
non_partial_for: Optional[WhereClause] = None
|
|
2025
2074
|
|
|
2026
2075
|
def merge_concept(
|
|
2027
2076
|
self, source: Concept, target: Concept, modifiers: List[Modifier]
|
|
@@ -2222,6 +2271,7 @@ class InstantiatedUnnestJoin(BaseModel):
|
|
|
2222
2271
|
class ConceptPair(BaseModel):
|
|
2223
2272
|
left: Concept
|
|
2224
2273
|
right: Concept
|
|
2274
|
+
existing_datasource: Union[Datasource, "QueryDatasource"]
|
|
2225
2275
|
modifiers: List[Modifier] = Field(default_factory=list)
|
|
2226
2276
|
|
|
2227
2277
|
@property
|
|
@@ -2233,17 +2283,23 @@ class ConceptPair(BaseModel):
|
|
|
2233
2283
|
return Modifier.NULLABLE in self.modifiers
|
|
2234
2284
|
|
|
2235
2285
|
|
|
2286
|
+
class CTEConceptPair(ConceptPair):
|
|
2287
|
+
cte: CTE
|
|
2288
|
+
|
|
2289
|
+
|
|
2236
2290
|
class BaseJoin(BaseModel):
|
|
2237
|
-
left_datasource: Union[Datasource, "QueryDatasource"]
|
|
2238
2291
|
right_datasource: Union[Datasource, "QueryDatasource"]
|
|
2239
|
-
concepts: List[Concept]
|
|
2240
2292
|
join_type: JoinType
|
|
2241
|
-
|
|
2293
|
+
concepts: Optional[List[Concept]] = None
|
|
2294
|
+
left_datasource: Optional[Union[Datasource, "QueryDatasource"]] = None
|
|
2242
2295
|
concept_pairs: list[ConceptPair] | None = None
|
|
2243
2296
|
|
|
2244
2297
|
def __init__(self, **data: Any):
|
|
2245
2298
|
super().__init__(**data)
|
|
2246
|
-
if
|
|
2299
|
+
if (
|
|
2300
|
+
self.left_datasource
|
|
2301
|
+
and self.left_datasource.full_name == self.right_datasource.full_name
|
|
2302
|
+
):
|
|
2247
2303
|
raise SyntaxError(
|
|
2248
2304
|
f"Cannot join a dataself to itself, joining {self.left_datasource} and"
|
|
2249
2305
|
f" {self.right_datasource}"
|
|
@@ -2253,24 +2309,23 @@ class BaseJoin(BaseModel):
|
|
|
2253
2309
|
# if we have a list of concept pairs
|
|
2254
2310
|
if self.concept_pairs:
|
|
2255
2311
|
return
|
|
2256
|
-
|
|
2257
|
-
|
|
2312
|
+
if self.concepts == []:
|
|
2313
|
+
return
|
|
2314
|
+
assert self.left_datasource and self.right_datasource
|
|
2315
|
+
for concept in self.concepts or []:
|
|
2258
2316
|
include = True
|
|
2259
2317
|
for ds in [self.left_datasource, self.right_datasource]:
|
|
2260
2318
|
synonyms = []
|
|
2261
2319
|
for c in ds.output_concepts:
|
|
2262
|
-
synonyms += list(c.pseudonyms
|
|
2320
|
+
synonyms += list(c.pseudonyms)
|
|
2263
2321
|
if (
|
|
2264
2322
|
concept.address not in [c.address for c in ds.output_concepts]
|
|
2265
2323
|
and concept.address not in synonyms
|
|
2266
2324
|
):
|
|
2267
|
-
|
|
2268
|
-
|
|
2269
|
-
|
|
2270
|
-
|
|
2271
|
-
f"Invalid join, missing {concept} on {ds.name}, have"
|
|
2272
|
-
f" {[c.address for c in ds.output_concepts]}"
|
|
2273
|
-
)
|
|
2325
|
+
raise SyntaxError(
|
|
2326
|
+
f"Invalid join, missing {concept} on {ds.name}, have"
|
|
2327
|
+
f" {[c.address for c in ds.output_concepts]}"
|
|
2328
|
+
)
|
|
2274
2329
|
if include:
|
|
2275
2330
|
final_concepts.append(concept)
|
|
2276
2331
|
if not final_concepts and self.concepts:
|
|
@@ -2287,7 +2342,7 @@ class BaseJoin(BaseModel):
|
|
|
2287
2342
|
self.concepts = []
|
|
2288
2343
|
return
|
|
2289
2344
|
# if everything is at abstract grain, we can skip joins
|
|
2290
|
-
if all([c.grain
|
|
2345
|
+
if all([c.grain.abstract for c in ds.output_concepts]):
|
|
2291
2346
|
self.concepts = []
|
|
2292
2347
|
return
|
|
2293
2348
|
|
|
@@ -2305,21 +2360,27 @@ class BaseJoin(BaseModel):
|
|
|
2305
2360
|
|
|
2306
2361
|
@property
|
|
2307
2362
|
def unique_id(self) -> str:
|
|
2308
|
-
|
|
2309
|
-
|
|
2310
|
-
|
|
2311
|
-
|
|
2312
|
-
|
|
2313
|
-
|
|
2363
|
+
return str(self)
|
|
2364
|
+
|
|
2365
|
+
@property
|
|
2366
|
+
def input_concepts(self) -> List[Concept]:
|
|
2367
|
+
base = []
|
|
2368
|
+
if self.concept_pairs:
|
|
2369
|
+
for pair in self.concept_pairs:
|
|
2370
|
+
base += [pair.left, pair.right]
|
|
2371
|
+
elif self.concepts:
|
|
2372
|
+
base += self.concepts
|
|
2373
|
+
return base
|
|
2314
2374
|
|
|
2315
2375
|
def __str__(self):
|
|
2316
2376
|
if self.concept_pairs:
|
|
2317
2377
|
return (
|
|
2318
|
-
f"{self.join_type.value} on"
|
|
2319
|
-
f" {','.join([str(k.left)+'='+str(k.right) for k in self.concept_pairs])}"
|
|
2378
|
+
f"{self.join_type.value} {self.right_datasource.name} on"
|
|
2379
|
+
f" {','.join([str(k.existing_datasource.name) + '.'+ str(k.left)+'='+str(k.right) for k in self.concept_pairs])}"
|
|
2320
2380
|
)
|
|
2321
2381
|
return (
|
|
2322
|
-
f"{self.join_type.value}
|
|
2382
|
+
f"{self.join_type.value} {self.right_datasource.name} on"
|
|
2383
|
+
f" {','.join([str(k) for k in self.concepts])}"
|
|
2323
2384
|
)
|
|
2324
2385
|
|
|
2325
2386
|
|
|
@@ -2364,19 +2425,9 @@ class QueryDatasource(BaseModel):
|
|
|
2364
2425
|
for join in v:
|
|
2365
2426
|
if not isinstance(join, BaseJoin):
|
|
2366
2427
|
continue
|
|
2367
|
-
|
|
2368
|
-
raise SyntaxError(
|
|
2369
|
-
f"Cannot join a datasource to itself, joining {join.left_datasource}"
|
|
2370
|
-
)
|
|
2371
|
-
pairing = "".join(
|
|
2372
|
-
sorted(
|
|
2373
|
-
[join.left_datasource.identifier, join.right_datasource.identifier]
|
|
2374
|
-
)
|
|
2375
|
-
)
|
|
2428
|
+
pairing = str(join)
|
|
2376
2429
|
if pairing in unique_pairs:
|
|
2377
|
-
raise SyntaxError(
|
|
2378
|
-
f"Duplicate join {join.left_datasource.identifier} and {join.right_datasource.identifier}"
|
|
2379
|
-
)
|
|
2430
|
+
raise SyntaxError(f"Duplicate join {str(join)}")
|
|
2380
2431
|
unique_pairs.add(pairing)
|
|
2381
2432
|
return v
|
|
2382
2433
|
|
|
@@ -2641,7 +2692,12 @@ class CTE(BaseModel):
|
|
|
2641
2692
|
isinstance(join, Join)
|
|
2642
2693
|
and (
|
|
2643
2694
|
join.right_cte.name != removed_cte
|
|
2644
|
-
and
|
|
2695
|
+
and any(
|
|
2696
|
+
[
|
|
2697
|
+
x.cte.name != removed_cte
|
|
2698
|
+
for x in (join.joinkey_pairs or [])
|
|
2699
|
+
]
|
|
2700
|
+
)
|
|
2645
2701
|
)
|
|
2646
2702
|
)
|
|
2647
2703
|
]
|
|
@@ -2712,8 +2768,12 @@ class CTE(BaseModel):
|
|
|
2712
2768
|
for join in self.joins:
|
|
2713
2769
|
if isinstance(join, InstantiatedUnnestJoin):
|
|
2714
2770
|
continue
|
|
2715
|
-
if join.left_cte.name == parent.name:
|
|
2771
|
+
if join.left_cte and join.left_cte.name == parent.name:
|
|
2716
2772
|
join.inline_cte(parent)
|
|
2773
|
+
if join.joinkey_pairs:
|
|
2774
|
+
for pair in join.joinkey_pairs:
|
|
2775
|
+
if pair.cte and pair.cte.name == parent.name:
|
|
2776
|
+
join.inline_cte(parent)
|
|
2717
2777
|
if join.right_cte.name == parent.name:
|
|
2718
2778
|
join.inline_cte(parent)
|
|
2719
2779
|
for k, v in self.source_map.items():
|
|
@@ -2834,9 +2894,21 @@ class CTE(BaseModel):
|
|
|
2834
2894
|
return self.parent_ctes[0].name
|
|
2835
2895
|
return self.name
|
|
2836
2896
|
|
|
2897
|
+
def get_concept(self, address: str) -> Concept | None:
|
|
2898
|
+
for cte in self.parent_ctes:
|
|
2899
|
+
if address in cte.output_columns:
|
|
2900
|
+
match = [x for x in cte.output_columns if x.address == address].pop()
|
|
2901
|
+
return match
|
|
2902
|
+
|
|
2903
|
+
for array in [self.source.input_concepts, self.source.output_concepts]:
|
|
2904
|
+
match_list = [x for x in array if x.address == address]
|
|
2905
|
+
if match_list:
|
|
2906
|
+
return match_list.pop()
|
|
2907
|
+
return None
|
|
2908
|
+
|
|
2837
2909
|
def get_alias(self, concept: Concept, source: str | None = None) -> str:
|
|
2838
2910
|
for cte in self.parent_ctes:
|
|
2839
|
-
if concept.address in
|
|
2911
|
+
if concept.address in cte.output_columns:
|
|
2840
2912
|
if source and source != cte.name:
|
|
2841
2913
|
continue
|
|
2842
2914
|
return concept.safe_address
|
|
@@ -2924,21 +2996,26 @@ class JoinKey(BaseModel):
|
|
|
2924
2996
|
|
|
2925
2997
|
|
|
2926
2998
|
class Join(BaseModel):
|
|
2927
|
-
|
|
2999
|
+
|
|
2928
3000
|
right_cte: CTE
|
|
2929
3001
|
jointype: JoinType
|
|
2930
|
-
|
|
2931
|
-
joinkey_pairs: List[
|
|
3002
|
+
left_cte: CTE | None = None
|
|
3003
|
+
joinkey_pairs: List[CTEConceptPair] | None = None
|
|
2932
3004
|
inlined_ctes: set[str] = Field(default_factory=set)
|
|
2933
3005
|
|
|
2934
3006
|
def inline_cte(self, cte: CTE):
|
|
2935
3007
|
self.inlined_ctes.add(cte.name)
|
|
2936
3008
|
|
|
2937
|
-
@property
|
|
2938
|
-
def left_name(self) -> str:
|
|
2939
|
-
|
|
2940
|
-
|
|
2941
|
-
|
|
3009
|
+
# @property
|
|
3010
|
+
# def left_name(self) -> str:
|
|
3011
|
+
# if self.left_cte.name in self.inlined_ctes:
|
|
3012
|
+
# return self.left_cte.source.datasources[0].identifier
|
|
3013
|
+
# return self.left_cte.name
|
|
3014
|
+
|
|
3015
|
+
def get_name(self, cte: CTE):
|
|
3016
|
+
if cte.name in self.inlined_ctes:
|
|
3017
|
+
return cte.source.datasources[0].identifier
|
|
3018
|
+
return cte.name
|
|
2942
3019
|
|
|
2943
3020
|
@property
|
|
2944
3021
|
def right_name(self) -> str:
|
|
@@ -2946,12 +3023,6 @@ class Join(BaseModel):
|
|
|
2946
3023
|
return self.right_cte.source.datasources[0].identifier
|
|
2947
3024
|
return self.right_cte.name
|
|
2948
3025
|
|
|
2949
|
-
@property
|
|
2950
|
-
def left_ref(self) -> str:
|
|
2951
|
-
if self.left_cte.name in self.inlined_ctes:
|
|
2952
|
-
return f"{self.left_cte.source.datasources[0].safe_location} as {self.left_cte.source.datasources[0].identifier}"
|
|
2953
|
-
return self.left_cte.name
|
|
2954
|
-
|
|
2955
3026
|
@property
|
|
2956
3027
|
def right_ref(self) -> str:
|
|
2957
3028
|
if self.right_cte.name in self.inlined_ctes:
|
|
@@ -2960,19 +3031,21 @@ class Join(BaseModel):
|
|
|
2960
3031
|
|
|
2961
3032
|
@property
|
|
2962
3033
|
def unique_id(self) -> str:
|
|
2963
|
-
return self
|
|
3034
|
+
return str(self)
|
|
2964
3035
|
|
|
2965
3036
|
def __str__(self):
|
|
2966
3037
|
if self.joinkey_pairs:
|
|
2967
3038
|
return (
|
|
2968
|
-
f"{self.jointype.value}
|
|
3039
|
+
f"{self.jointype.value} join"
|
|
2969
3040
|
f" {self.right_name} on"
|
|
2970
|
-
f" {','.join([str(k.left)+'='+str(k.right
|
|
3041
|
+
f" {','.join([k.cte.name + '.'+str(k.left.address)+'='+str(k.right.address) for k in self.joinkey_pairs])}"
|
|
2971
3042
|
)
|
|
2972
|
-
|
|
2973
|
-
|
|
2974
|
-
|
|
2975
|
-
|
|
3043
|
+
elif self.left_cte:
|
|
3044
|
+
return (
|
|
3045
|
+
f"{self.jointype.value} JOIN {self.left_cte.name} and"
|
|
3046
|
+
f" {self.right_name} on {','.join([str(k) for k in self.joinkey_pairs])}"
|
|
3047
|
+
)
|
|
3048
|
+
return f"{self.jointype.value} JOIN {self.right_name} on {','.join([str(k) for k in self.joinkey_pairs])}"
|
|
2976
3049
|
|
|
2977
3050
|
|
|
2978
3051
|
class UndefinedConcept(Concept, Mergeable, Namespaced):
|
|
@@ -2988,7 +3061,7 @@ class UndefinedConcept(Concept, Mergeable, Namespaced):
|
|
|
2988
3061
|
) -> "UndefinedConcept" | Concept:
|
|
2989
3062
|
if self.address == source.address:
|
|
2990
3063
|
new = target.with_grain(self.grain.with_merge(source, target, modifiers))
|
|
2991
|
-
new.pseudonyms
|
|
3064
|
+
new.pseudonyms.add(self.address)
|
|
2992
3065
|
return new
|
|
2993
3066
|
return self.__class__(
|
|
2994
3067
|
name=self.name,
|
|
@@ -3190,7 +3263,7 @@ class EnvironmentConceptDict(dict):
|
|
|
3190
3263
|
return super().items()
|
|
3191
3264
|
|
|
3192
3265
|
|
|
3193
|
-
class ImportStatement(BaseModel):
|
|
3266
|
+
class ImportStatement(HasUUID, BaseModel):
|
|
3194
3267
|
alias: str
|
|
3195
3268
|
path: Path
|
|
3196
3269
|
environment: Union["Environment", None] = None
|
|
@@ -3520,6 +3593,7 @@ class Environment(BaseModel):
|
|
|
3520
3593
|
self, source: Concept, target: Concept, modifiers: List[Modifier]
|
|
3521
3594
|
):
|
|
3522
3595
|
replacements = {}
|
|
3596
|
+
|
|
3523
3597
|
# exit early if we've run this
|
|
3524
3598
|
if source.address in self.alias_origin_lookup:
|
|
3525
3599
|
if self.concepts[source.address] == target:
|
|
@@ -3528,11 +3602,11 @@ class Environment(BaseModel):
|
|
|
3528
3602
|
for k, v in self.concepts.items():
|
|
3529
3603
|
|
|
3530
3604
|
if v.address == target.address:
|
|
3531
|
-
v.pseudonyms
|
|
3605
|
+
v.pseudonyms.add(source.address)
|
|
3532
3606
|
if v.address == source.address:
|
|
3533
3607
|
replacements[k] = target
|
|
3534
3608
|
self.canonical_map[k] = target.address
|
|
3535
|
-
v.pseudonyms
|
|
3609
|
+
v.pseudonyms.add(target.address)
|
|
3536
3610
|
# we need to update keys and grains of all concepts
|
|
3537
3611
|
else:
|
|
3538
3612
|
replacements[k] = v.with_merge(source, target, modifiers)
|
|
@@ -4185,6 +4259,9 @@ class AggregateWrapper(Mergeable, Namespaced, SelectContext, BaseModel):
|
|
|
4185
4259
|
class WhereClause(Mergeable, ConceptArgs, Namespaced, SelectContext, BaseModel):
|
|
4186
4260
|
conditional: Union[SubselectComparison, Comparison, Conditional, "Parenthetical"]
|
|
4187
4261
|
|
|
4262
|
+
def __repr__(self):
|
|
4263
|
+
return str(self.conditional)
|
|
4264
|
+
|
|
4188
4265
|
@property
|
|
4189
4266
|
def input(self) -> List[Concept]:
|
|
4190
4267
|
return self.conditional.input
|
|
@@ -4303,7 +4380,7 @@ class Limit(BaseModel):
|
|
|
4303
4380
|
count: int
|
|
4304
4381
|
|
|
4305
4382
|
|
|
4306
|
-
class ConceptDeclarationStatement(BaseModel):
|
|
4383
|
+
class ConceptDeclarationStatement(HasUUID, BaseModel):
|
|
4307
4384
|
concept: Concept
|
|
4308
4385
|
|
|
4309
4386
|
|
|
@@ -4311,7 +4388,7 @@ class ConceptDerivation(BaseModel):
|
|
|
4311
4388
|
concept: Concept
|
|
4312
4389
|
|
|
4313
4390
|
|
|
4314
|
-
class RowsetDerivationStatement(Namespaced, BaseModel):
|
|
4391
|
+
class RowsetDerivationStatement(HasUUID, Namespaced, BaseModel):
|
|
4315
4392
|
name: str
|
|
4316
4393
|
select: SelectStatement | MultiSelectStatement
|
|
4317
4394
|
namespace: str
|
|
@@ -4576,7 +4653,7 @@ class TupleWrapper(Generic[VT], tuple):
|
|
|
4576
4653
|
return cls(v, type=arg_to_datatype(v[0]))
|
|
4577
4654
|
|
|
4578
4655
|
|
|
4579
|
-
class PersistStatement(BaseModel):
|
|
4656
|
+
class PersistStatement(HasUUID, BaseModel):
|
|
4580
4657
|
datasource: Datasource
|
|
4581
4658
|
select: SelectStatement
|
|
4582
4659
|
meta: Optional[Metadata] = Field(default_factory=lambda: Metadata())
|
|
@@ -455,7 +455,20 @@ def generate_node(
|
|
|
455
455
|
if x.address not in [y.address for y in root_targets]
|
|
456
456
|
and x not in ex_resolve.grain.components
|
|
457
457
|
]
|
|
458
|
-
|
|
458
|
+
|
|
459
|
+
pseudonyms = [
|
|
460
|
+
x
|
|
461
|
+
for x in extra
|
|
462
|
+
if any(x.address in y.pseudonyms for y in root_targets)
|
|
463
|
+
]
|
|
464
|
+
# if we're only connected by a pseudonym, keep those in output
|
|
465
|
+
expanded.set_output_concepts(root_targets + pseudonyms)
|
|
466
|
+
# but hide them
|
|
467
|
+
if pseudonyms:
|
|
468
|
+
logger.info(
|
|
469
|
+
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Hiding pseudonyms{[c.address for c in pseudonyms]}"
|
|
470
|
+
)
|
|
471
|
+
expanded.hide_output_concepts(pseudonyms)
|
|
459
472
|
|
|
460
473
|
logger.info(
|
|
461
474
|
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Found connections for {[c.address for c in root_targets]} via concept addition; removing extra {[c.address for c in extra]}"
|
|
@@ -480,6 +493,7 @@ def validate_concept(
|
|
|
480
493
|
found_map: dict[str, set[Concept]],
|
|
481
494
|
accept_partial: bool,
|
|
482
495
|
seen: set[str],
|
|
496
|
+
environment: Environment,
|
|
483
497
|
):
|
|
484
498
|
|
|
485
499
|
found_map[str(node)].add(concept)
|
|
@@ -500,10 +514,11 @@ def validate_concept(
|
|
|
500
514
|
if accept_partial:
|
|
501
515
|
found_addresses.add(concept.address)
|
|
502
516
|
found_map[str(node)].add(concept)
|
|
503
|
-
for
|
|
504
|
-
|
|
517
|
+
for v_address in concept.pseudonyms:
|
|
518
|
+
v = environment.concepts[v_address]
|
|
519
|
+
if v == concept.address:
|
|
505
520
|
return
|
|
506
|
-
if v
|
|
521
|
+
if v in seen:
|
|
507
522
|
return
|
|
508
523
|
validate_concept(
|
|
509
524
|
v,
|
|
@@ -515,10 +530,12 @@ def validate_concept(
|
|
|
515
530
|
found_map,
|
|
516
531
|
accept_partial,
|
|
517
532
|
seen=seen,
|
|
533
|
+
environment=environment,
|
|
518
534
|
)
|
|
519
535
|
|
|
520
536
|
|
|
521
537
|
def validate_stack(
|
|
538
|
+
environment: Environment,
|
|
522
539
|
stack: List[StrategyNode],
|
|
523
540
|
concepts: List[Concept],
|
|
524
541
|
mandatory_with_filter: List[Concept],
|
|
@@ -546,6 +563,7 @@ def validate_stack(
|
|
|
546
563
|
found_map,
|
|
547
564
|
accept_partial,
|
|
548
565
|
seen,
|
|
566
|
+
environment,
|
|
549
567
|
)
|
|
550
568
|
for concept in node.virtual_output_concepts:
|
|
551
569
|
if concept.address in non_partial_addresses:
|
|
@@ -807,6 +825,7 @@ def _search_concepts(
|
|
|
807
825
|
break
|
|
808
826
|
attempted.add(priority_concept.address)
|
|
809
827
|
complete, found, missing, partial, virtual = validate_stack(
|
|
828
|
+
environment,
|
|
810
829
|
stack,
|
|
811
830
|
mandatory_list,
|
|
812
831
|
completion_mandatory,
|
|
@@ -209,9 +209,9 @@ def resolve_weak_components(
|
|
|
209
209
|
for c in all_concepts
|
|
210
210
|
if "__preql_internal" not in c.address
|
|
211
211
|
]
|
|
212
|
-
synonyms:
|
|
212
|
+
synonyms: set[str] = set()
|
|
213
213
|
for x in all_concepts:
|
|
214
|
-
synonyms
|
|
214
|
+
synonyms = synonyms.union(x.pseudonyms)
|
|
215
215
|
while break_flag is not True:
|
|
216
216
|
count += 1
|
|
217
217
|
if count > AMBIGUITY_CHECK_LIMIT:
|
|
@@ -385,9 +385,9 @@ def gen_merge_node(
|
|
|
385
385
|
# one concept handling may need to be kicked to alias
|
|
386
386
|
if len(all_concepts) == 1:
|
|
387
387
|
concept = all_concepts[0]
|
|
388
|
-
for
|
|
388
|
+
for v in concept.pseudonyms:
|
|
389
389
|
test = subgraphs_to_merge_node(
|
|
390
|
-
[[concept, v]],
|
|
390
|
+
[[concept, environment.alias_origin_lookup[v]]],
|
|
391
391
|
g=g,
|
|
392
392
|
all_concepts=[concept],
|
|
393
393
|
environment=environment,
|