pytrilogy 0.0.1.102__py3-none-any.whl → 0.0.1.104__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pytrilogy might be problematic. Click here for more details.
- {pytrilogy-0.0.1.102.dist-info → pytrilogy-0.0.1.104.dist-info}/METADATA +2 -2
- {pytrilogy-0.0.1.102.dist-info → pytrilogy-0.0.1.104.dist-info}/RECORD +16 -16
- {pytrilogy-0.0.1.102.dist-info → pytrilogy-0.0.1.104.dist-info}/WHEEL +1 -1
- trilogy/__init__.py +1 -1
- trilogy/core/env_processor.py +5 -1
- trilogy/core/models.py +84 -29
- trilogy/core/processing/concept_strategies_v3.py +6 -4
- trilogy/core/processing/node_generators/filter_node.py +2 -0
- trilogy/core/processing/node_generators/rowset_node.py +10 -6
- trilogy/core/processing/node_generators/select_node.py +339 -103
- trilogy/core/processing/nodes/__init__.py +54 -1
- trilogy/parsing/parse_engine.py +23 -12
- trilogy/scripts/trilogy.py +1 -1
- {pytrilogy-0.0.1.102.dist-info → pytrilogy-0.0.1.104.dist-info}/LICENSE.md +0 -0
- {pytrilogy-0.0.1.102.dist-info → pytrilogy-0.0.1.104.dist-info}/entry_points.txt +0 -0
- {pytrilogy-0.0.1.102.dist-info → pytrilogy-0.0.1.104.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: pytrilogy
|
|
3
|
-
Version: 0.0.1.
|
|
3
|
+
Version: 0.0.1.104
|
|
4
4
|
Summary: Declarative, typed query language that compiles to SQL.
|
|
5
5
|
Home-page:
|
|
6
6
|
Author:
|
|
@@ -27,7 +27,7 @@ Requires-Dist: psycopg2-binary ; extra == 'postgres'
|
|
|
27
27
|
Provides-Extra: snowflake
|
|
28
28
|
Requires-Dist: snowflake-sqlalchemy ; extra == 'snowflake'
|
|
29
29
|
|
|
30
|
-
##Trilogy
|
|
30
|
+
## Trilogy
|
|
31
31
|
[](https://trilogydata.dev/)
|
|
32
32
|
[](https://discord.gg/Z4QSSuqGEd)
|
|
33
33
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
trilogy/__init__.py,sha256=
|
|
1
|
+
trilogy/__init__.py,sha256=SFMMJIYUKTFdaXEFolvT4DhBf1i_Id3pbS5lCdo1kE0,245
|
|
2
2
|
trilogy/compiler.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
3
|
trilogy/constants.py,sha256=MphpyZXP4URq7R1vP8EUtT_ZfGPSKuqKKFGMMjfMRtA,482
|
|
4
4
|
trilogy/engine.py,sha256=R5ubIxYyrxRExz07aZCUfrTsoXCHQ8DKFTDsobXdWdA,1102
|
|
@@ -9,33 +9,33 @@ trilogy/utility.py,sha256=zM__8r29EsyDW7K9VOHz8yvZC2bXFzh7xKy3cL7GKsk,707
|
|
|
9
9
|
trilogy/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
10
|
trilogy/core/constants.py,sha256=LL8NLvxb3HRnAjvofyLRXqQJijLcYiXAQYQzGarVD-g,128
|
|
11
11
|
trilogy/core/enums.py,sha256=KEZQTzJ8tlGIukuUwQUIG1FTHOP1B4i0EeCgFjfsbDw,5394
|
|
12
|
-
trilogy/core/env_processor.py,sha256=
|
|
12
|
+
trilogy/core/env_processor.py,sha256=dfiUp5QxcrgG5YI3Py4xP4OTDRZqltWJGQp8PwukkfY,1401
|
|
13
13
|
trilogy/core/environment_helpers.py,sha256=mzBDHhdF9ssZ_-LY8CcaM_ddfJavkpRYrFImUd3cjXI,5972
|
|
14
14
|
trilogy/core/ergonomics.py,sha256=w3gwXdgrxNHCuaRdyKg73t6F36tj-wIjQf47WZkHmJk,1465
|
|
15
15
|
trilogy/core/exceptions.py,sha256=NvV_4qLOgKXbpotgRf7c8BANDEvHxlqRPaA53IThQ2o,561
|
|
16
16
|
trilogy/core/functions.py,sha256=zkRReytiotOBAW-a3Ri5eoejZDYTt2-7Op80ZxZxUmw,9129
|
|
17
17
|
trilogy/core/graph_models.py,sha256=oJUMSpmYhqXlavckHLpR07GJxuQ8dZ1VbB1fB0KaS8c,2036
|
|
18
18
|
trilogy/core/internal.py,sha256=jNGFHKENnbMiMCtAgsnLZYVSENDK4b5ALecXFZpTDzQ,1075
|
|
19
|
-
trilogy/core/models.py,sha256=
|
|
19
|
+
trilogy/core/models.py,sha256=I0RJVMRzRawznOFasphMfFa31WqJvZhXw5VtkWA-KgA,102941
|
|
20
20
|
trilogy/core/query_processor.py,sha256=x3fjs1Vhg_G1FHhjLomZ3kH16PU9RPcediRz-Lu_QZg,11625
|
|
21
21
|
trilogy/core/processing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
22
|
-
trilogy/core/processing/concept_strategies_v3.py,sha256=
|
|
22
|
+
trilogy/core/processing/concept_strategies_v3.py,sha256=L0lBiiZ1D3a1zsJfnXa0vNlV3EM6Nyp936OzzUT5u24,22368
|
|
23
23
|
trilogy/core/processing/graph_utils.py,sha256=ulCJ4hYAISbUxLD6VM2fah9RBPGIXSEHEPeRBSFl0Rs,1197
|
|
24
24
|
trilogy/core/processing/utility.py,sha256=Gk35HgyIG2SSUyI5OHZcB0bw1PZUVC_aNc9Sre6xPQU,10535
|
|
25
25
|
trilogy/core/processing/node_generators/__init__.py,sha256=1L1TWnGkrSQlLe9ZuMG8DMGfS755v0fjCdz_W1ofCJQ,747
|
|
26
26
|
trilogy/core/processing/node_generators/basic_node.py,sha256=tVPmg0r0kDdABkmn6z4sxsk1hKy9yTT_Xvl1eVN2Zck,2162
|
|
27
27
|
trilogy/core/processing/node_generators/common.py,sha256=o2nLcAv2jwjynOhgixoCFN46v_p3s5Zw_M8iDiMNvi8,8444
|
|
28
28
|
trilogy/core/processing/node_generators/concept_merge.py,sha256=oKxRuc7m6wnsrxAaLg3q2-dM2dyFgAvaXCp6nmG--jU,5113
|
|
29
|
-
trilogy/core/processing/node_generators/filter_node.py,sha256=
|
|
29
|
+
trilogy/core/processing/node_generators/filter_node.py,sha256=VCafGBvFQgBfkqclR-Y9TLNwZeOucsBkhXy3jLV_TPc,2511
|
|
30
30
|
trilogy/core/processing/node_generators/group_node.py,sha256=xWI1xNIXEOj6jlRGD9hcv2_vVNvY6lpzJl6pQ8HuFBE,2988
|
|
31
31
|
trilogy/core/processing/node_generators/group_to_node.py,sha256=BzPdYwzoo8gRMH7BDffTTXq4z-mjfCEzvfB5I-P0_nw,2941
|
|
32
32
|
trilogy/core/processing/node_generators/merge_node.py,sha256=gF1AELkkakP6NDdntQMAba7Efhyd4b3QS07aQXyQE-c,5359
|
|
33
33
|
trilogy/core/processing/node_generators/multiselect_node.py,sha256=vP84dnLQy6dtypi6mUbt9sMAcmmrTgQ1Oz4GI6X1IEo,6421
|
|
34
|
-
trilogy/core/processing/node_generators/rowset_node.py,sha256=
|
|
35
|
-
trilogy/core/processing/node_generators/select_node.py,sha256=
|
|
34
|
+
trilogy/core/processing/node_generators/rowset_node.py,sha256=K-aoLi0OSfNADXR5_vxDcNv2dJeFy30XNp_IaaWWJ6o,4684
|
|
35
|
+
trilogy/core/processing/node_generators/select_node.py,sha256=xeCqIUEubrf3u_QQfbGdf1BG4fO0HYQ64hiFur8NUqY,20080
|
|
36
36
|
trilogy/core/processing/node_generators/unnest_node.py,sha256=s1VXQZSf1LnX3ISeQ5JzmzmCKUw30-5OK_f0YTB9_48,1031
|
|
37
37
|
trilogy/core/processing/node_generators/window_node.py,sha256=ekazi5eXxnShpcp-qukXNG4DHFdULoXrX-YWUWLNEpM,2527
|
|
38
|
-
trilogy/core/processing/nodes/__init__.py,sha256=
|
|
38
|
+
trilogy/core/processing/nodes/__init__.py,sha256=ZkDGQksvsM5uNia5rhXFCUJcpTRhoYYFdyfJw-Eiu8s,3674
|
|
39
39
|
trilogy/core/processing/nodes/base_node.py,sha256=ovFM4r8QG2sy5d2X1MTCk5BiYUwvr0chxSaIopkO6Fc,8890
|
|
40
40
|
trilogy/core/processing/nodes/filter_node.py,sha256=DqSRv8voEajPZqzeeiIsxuv4ubvsmeQcCW6x_v2CmOk,1359
|
|
41
41
|
trilogy/core/processing/nodes/group_node.py,sha256=-xaZuAkCXHWP_K5BmoJ5jTM_53wYWtYPEkrr4LaWib0,3735
|
|
@@ -65,13 +65,13 @@ trilogy/parsing/common.py,sha256=wLxfqoLYdpAJ6I-6HB951-8kNOOUbnEhwgX61xlz0_0,479
|
|
|
65
65
|
trilogy/parsing/config.py,sha256=Z-DaefdKhPDmSXLgg5V4pebhSB0h590vI0_VtHnlukI,111
|
|
66
66
|
trilogy/parsing/exceptions.py,sha256=92E5i2frv5hj9wxObJZsZqj5T6bglvPzvdvco_vW1Zk,38
|
|
67
67
|
trilogy/parsing/helpers.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
68
|
-
trilogy/parsing/parse_engine.py,sha256=
|
|
68
|
+
trilogy/parsing/parse_engine.py,sha256=2-wCQhJXaPhPtnnS31oR3NwLZ5MoH20N5IHo_RPG6KQ,65779
|
|
69
69
|
trilogy/parsing/render.py,sha256=OlkJs6LnQsSNAc94j_rcxT6KI3VU6wzZK1gDHgjL_Rs,16133
|
|
70
70
|
trilogy/scripts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
71
|
-
trilogy/scripts/trilogy.py,sha256=
|
|
72
|
-
pytrilogy-0.0.1.
|
|
73
|
-
pytrilogy-0.0.1.
|
|
74
|
-
pytrilogy-0.0.1.
|
|
75
|
-
pytrilogy-0.0.1.
|
|
76
|
-
pytrilogy-0.0.1.
|
|
77
|
-
pytrilogy-0.0.1.
|
|
71
|
+
trilogy/scripts/trilogy.py,sha256=PHxvv6f2ODv0esyyhWxlARgra8dVhqQhYl0lTrSyVNo,3729
|
|
72
|
+
pytrilogy-0.0.1.104.dist-info/LICENSE.md,sha256=5ZRvtTyCCFwz1THxDTjAu3Lidds9WjPvvzgVwPSYNDo,1042
|
|
73
|
+
pytrilogy-0.0.1.104.dist-info/METADATA,sha256=RhbBJJRMUSJQW0csiwwJ-aE-HBtrpI0aOjrza5WGfvM,6883
|
|
74
|
+
pytrilogy-0.0.1.104.dist-info/WHEEL,sha256=y4mX-SOX4fYIkonsAGA5N0Oy-8_gI4FXw5HNI1xqvWg,91
|
|
75
|
+
pytrilogy-0.0.1.104.dist-info/entry_points.txt,sha256=0petKryjvvtEfTlbZC1AuMFumH_WQ9v8A19LvoS6G6c,54
|
|
76
|
+
pytrilogy-0.0.1.104.dist-info/top_level.txt,sha256=cAy__NW_eMAa_yT9UnUNlZLFfxcg6eimUAZ184cdNiE,8
|
|
77
|
+
pytrilogy-0.0.1.104.dist-info/RECORD,,
|
trilogy/__init__.py
CHANGED
trilogy/core/env_processor.py
CHANGED
|
@@ -1,4 +1,8 @@
|
|
|
1
|
-
from trilogy.core.graph_models import
|
|
1
|
+
from trilogy.core.graph_models import (
|
|
2
|
+
ReferenceGraph,
|
|
3
|
+
concept_to_node,
|
|
4
|
+
datasource_to_node,
|
|
5
|
+
)
|
|
2
6
|
from trilogy.core.models import Environment
|
|
3
7
|
from trilogy.core.enums import PurposeLineage
|
|
4
8
|
|
trilogy/core/models.py
CHANGED
|
@@ -338,7 +338,7 @@ class Concept(Namespaced, SelectGrain, BaseModel):
|
|
|
338
338
|
|
|
339
339
|
def __eq__(self, other: object):
|
|
340
340
|
if isinstance(other, str):
|
|
341
|
-
if self.address ==
|
|
341
|
+
if self.address == other:
|
|
342
342
|
return True
|
|
343
343
|
if not isinstance(other, Concept):
|
|
344
344
|
return False
|
|
@@ -355,7 +355,7 @@ class Concept(Namespaced, SelectGrain, BaseModel):
|
|
|
355
355
|
grain = ",".join([str(c.address) for c in self.grain.components])
|
|
356
356
|
return f"{self.namespace}.{self.name}<{grain}>"
|
|
357
357
|
|
|
358
|
-
@
|
|
358
|
+
@cached_property
|
|
359
359
|
def address(self) -> str:
|
|
360
360
|
return f"{self.namespace}.{self.name}"
|
|
361
361
|
|
|
@@ -436,7 +436,8 @@ class Concept(Namespaced, SelectGrain, BaseModel):
|
|
|
436
436
|
modifiers=self.modifiers,
|
|
437
437
|
)
|
|
438
438
|
|
|
439
|
-
|
|
439
|
+
@cached_property
|
|
440
|
+
def _with_default_grain(self) -> "Concept":
|
|
440
441
|
if self.purpose == Purpose.KEY:
|
|
441
442
|
# we need to make this abstract
|
|
442
443
|
grain = Grain(components=[self.with_grain(Grain())], nested=True)
|
|
@@ -473,6 +474,9 @@ class Concept(Namespaced, SelectGrain, BaseModel):
|
|
|
473
474
|
modifiers=self.modifiers,
|
|
474
475
|
)
|
|
475
476
|
|
|
477
|
+
def with_default_grain(self) -> "Concept":
|
|
478
|
+
return self._with_default_grain
|
|
479
|
+
|
|
476
480
|
@property
|
|
477
481
|
def sources(self) -> List["Concept"]:
|
|
478
482
|
if self.lineage:
|
|
@@ -610,7 +614,7 @@ class Grain(BaseModel):
|
|
|
610
614
|
[c.name == ALL_ROWS_CONCEPT for c in self.components]
|
|
611
615
|
)
|
|
612
616
|
|
|
613
|
-
@
|
|
617
|
+
@cached_property
|
|
614
618
|
def set(self):
|
|
615
619
|
return set([c.address for c in self.components_copy])
|
|
616
620
|
|
|
@@ -1585,7 +1589,7 @@ class Datasource(Namespaced, BaseModel):
|
|
|
1585
1589
|
columns=[c.with_namespace(namespace) for c in self.columns],
|
|
1586
1590
|
)
|
|
1587
1591
|
|
|
1588
|
-
@
|
|
1592
|
+
@cached_property
|
|
1589
1593
|
def concepts(self) -> List[Concept]:
|
|
1590
1594
|
return [c.concept for c in self.columns]
|
|
1591
1595
|
|
|
@@ -1780,7 +1784,7 @@ class QueryDatasource(BaseModel):
|
|
|
1780
1784
|
|
|
1781
1785
|
@field_validator("source_map")
|
|
1782
1786
|
@classmethod
|
|
1783
|
-
def validate_source_map(cls, v, info
|
|
1787
|
+
def validate_source_map(cls, v, info: ValidationInfo):
|
|
1784
1788
|
values = info.data
|
|
1785
1789
|
expected = {c.address for c in values["output_concepts"]}.union(
|
|
1786
1790
|
c.address for c in values["input_concepts"]
|
|
@@ -1887,7 +1891,9 @@ class QueryDatasource(BaseModel):
|
|
|
1887
1891
|
else None
|
|
1888
1892
|
),
|
|
1889
1893
|
source_type=self.source_type,
|
|
1890
|
-
partial_concepts=
|
|
1894
|
+
partial_concepts=unique(
|
|
1895
|
+
self.partial_concepts + other.partial_concepts, "address"
|
|
1896
|
+
),
|
|
1891
1897
|
join_derived_concepts=self.join_derived_concepts,
|
|
1892
1898
|
force_group=self.force_group,
|
|
1893
1899
|
)
|
|
@@ -2286,8 +2292,8 @@ class EnvironmentConceptDict(dict):
|
|
|
2286
2292
|
|
|
2287
2293
|
class ImportStatement(BaseModel):
|
|
2288
2294
|
alias: str
|
|
2289
|
-
path:
|
|
2290
|
-
|
|
2295
|
+
path: Path
|
|
2296
|
+
environment: Union["Environment", None] = None
|
|
2291
2297
|
# TODO: this might result in a lot of duplication
|
|
2292
2298
|
# environment:"Environment"
|
|
2293
2299
|
|
|
@@ -2322,6 +2328,9 @@ class Environment(BaseModel):
|
|
|
2322
2328
|
version: str = Field(default_factory=get_version)
|
|
2323
2329
|
cte_name_map: Dict[str, str] = Field(default_factory=dict)
|
|
2324
2330
|
|
|
2331
|
+
materialized_concepts: List[Concept] = Field(default_factory=list)
|
|
2332
|
+
_parse_count: int = 0
|
|
2333
|
+
|
|
2325
2334
|
@classmethod
|
|
2326
2335
|
def from_file(cls, path: str | Path) -> "Environment":
|
|
2327
2336
|
with open(path, "r") as f:
|
|
@@ -2347,20 +2356,14 @@ class Environment(BaseModel):
|
|
|
2347
2356
|
f.write(self.model_dump_json())
|
|
2348
2357
|
return ppath
|
|
2349
2358
|
|
|
2350
|
-
|
|
2351
|
-
|
|
2352
|
-
|
|
2353
|
-
|
|
2354
|
-
|
|
2355
|
-
|
|
2356
|
-
|
|
2357
|
-
|
|
2358
|
-
if concept.address in [x.address for x in datasource.output_concepts]:
|
|
2359
|
-
found = True
|
|
2360
|
-
break
|
|
2361
|
-
if found:
|
|
2362
|
-
output.append(concept)
|
|
2363
|
-
return output
|
|
2359
|
+
def gen_materialized_concepts(self) -> None:
|
|
2360
|
+
concrete_addresses = set()
|
|
2361
|
+
for datasource in self.datasources.values():
|
|
2362
|
+
for concept in datasource.output_concepts:
|
|
2363
|
+
concrete_addresses.add(concept.address)
|
|
2364
|
+
self.materialized_concepts = [
|
|
2365
|
+
c for c in self.concepts.values() if c.address in concrete_addresses
|
|
2366
|
+
]
|
|
2364
2367
|
|
|
2365
2368
|
def validate_concept(self, lookup: str, meta: Meta | None = None):
|
|
2366
2369
|
existing: Concept = self.concepts.get(lookup) # type: ignore
|
|
@@ -2390,12 +2393,61 @@ class Environment(BaseModel):
|
|
|
2390
2393
|
|
|
2391
2394
|
def add_import(self, alias: str, environment: Environment):
|
|
2392
2395
|
self.imports[alias] = ImportStatement(
|
|
2393
|
-
alias=alias, path=
|
|
2396
|
+
alias=alias, path=Path(environment.working_path)
|
|
2394
2397
|
)
|
|
2395
2398
|
for key, concept in environment.concepts.items():
|
|
2396
2399
|
self.concepts[f"{alias}.{key}"] = concept.with_namespace(alias)
|
|
2397
2400
|
for key, datasource in environment.datasources.items():
|
|
2398
2401
|
self.datasources[f"{alias}.{key}"] = datasource.with_namespace(alias)
|
|
2402
|
+
self.gen_materialized_concepts()
|
|
2403
|
+
return self
|
|
2404
|
+
|
|
2405
|
+
def add_file_import(self, path: str, alias: str, env: Environment | None = None):
|
|
2406
|
+
from trilogy.parsing.parse_engine import ParseToObjects, PARSER
|
|
2407
|
+
|
|
2408
|
+
apath = path.split(".")
|
|
2409
|
+
apath[-1] = apath[-1] + ".preql"
|
|
2410
|
+
|
|
2411
|
+
target: Path = Path(self.working_path, *apath)
|
|
2412
|
+
if env:
|
|
2413
|
+
self.imports[alias] = ImportStatement(
|
|
2414
|
+
alias=alias, path=target, environment=env
|
|
2415
|
+
)
|
|
2416
|
+
|
|
2417
|
+
elif alias in self.imports:
|
|
2418
|
+
current = self.imports[alias]
|
|
2419
|
+
env = self.imports[alias].environment
|
|
2420
|
+
if current.path != target:
|
|
2421
|
+
raise ImportError(
|
|
2422
|
+
f"Attempted to import {target} with alias {alias} but {alias} is already imported from {current.path}"
|
|
2423
|
+
)
|
|
2424
|
+
else:
|
|
2425
|
+
try:
|
|
2426
|
+
with open(target, "r", encoding="utf-8") as f:
|
|
2427
|
+
text = f.read()
|
|
2428
|
+
nparser = ParseToObjects(
|
|
2429
|
+
visit_tokens=True,
|
|
2430
|
+
text=text,
|
|
2431
|
+
environment=Environment(
|
|
2432
|
+
working_path=target.parent,
|
|
2433
|
+
),
|
|
2434
|
+
parse_address=str(target),
|
|
2435
|
+
)
|
|
2436
|
+
nparser.transform(PARSER.parse(text))
|
|
2437
|
+
except Exception as e:
|
|
2438
|
+
raise ImportError(
|
|
2439
|
+
f"Unable to import file {target.parent}, parsing error: {e}"
|
|
2440
|
+
)
|
|
2441
|
+
env = nparser.environment
|
|
2442
|
+
if env:
|
|
2443
|
+
for _, concept in env.concepts.items():
|
|
2444
|
+
self.add_concept(concept.with_namespace(alias))
|
|
2445
|
+
|
|
2446
|
+
for _, datasource in env.datasources.items():
|
|
2447
|
+
self.add_datasource(datasource.with_namespace(alias))
|
|
2448
|
+
imps = ImportStatement(alias=alias, path=target, environment=env)
|
|
2449
|
+
self.imports[alias] = imps
|
|
2450
|
+
return imps
|
|
2399
2451
|
|
|
2400
2452
|
def parse(
|
|
2401
2453
|
self, input: str, namespace: str | None = None, persist: bool = False
|
|
@@ -2446,21 +2498,22 @@ class Environment(BaseModel):
|
|
|
2446
2498
|
from trilogy.core.environment_helpers import generate_related_concepts
|
|
2447
2499
|
|
|
2448
2500
|
generate_related_concepts(concept, self)
|
|
2501
|
+
self.gen_materialized_concepts()
|
|
2449
2502
|
return concept
|
|
2450
2503
|
|
|
2451
2504
|
def add_datasource(
|
|
2452
2505
|
self,
|
|
2453
2506
|
datasource: Datasource,
|
|
2507
|
+
meta: Meta | None = None,
|
|
2454
2508
|
):
|
|
2455
|
-
if datasource.namespace == DEFAULT_NAMESPACE:
|
|
2456
|
-
self.datasources[datasource.name] = datasource
|
|
2457
|
-
return datasource
|
|
2458
|
-
if not datasource.namespace:
|
|
2509
|
+
if not datasource.namespace or datasource.namespace == DEFAULT_NAMESPACE:
|
|
2459
2510
|
self.datasources[datasource.name] = datasource
|
|
2511
|
+
self.gen_materialized_concepts()
|
|
2460
2512
|
return datasource
|
|
2461
2513
|
self.datasources[datasource.namespace + "." + datasource.identifier] = (
|
|
2462
2514
|
datasource
|
|
2463
2515
|
)
|
|
2516
|
+
self.gen_materialized_concepts()
|
|
2464
2517
|
return datasource
|
|
2465
2518
|
|
|
2466
2519
|
|
|
@@ -2530,7 +2583,7 @@ class Comparison(Namespaced, SelectGrain, BaseModel):
|
|
|
2530
2583
|
|
|
2531
2584
|
def __post_init__(self):
|
|
2532
2585
|
if arg_to_datatype(self.left) != arg_to_datatype(self.right):
|
|
2533
|
-
raise
|
|
2586
|
+
raise SyntaxError(
|
|
2534
2587
|
f"Cannot compare {self.left} and {self.right} of different types"
|
|
2535
2588
|
)
|
|
2536
2589
|
|
|
@@ -2704,6 +2757,8 @@ class Conditional(Namespaced, SelectGrain, BaseModel):
|
|
|
2704
2757
|
def __add__(self, other) -> "Conditional":
|
|
2705
2758
|
if other is None:
|
|
2706
2759
|
return self
|
|
2760
|
+
elif str(other) == str(self):
|
|
2761
|
+
return self
|
|
2707
2762
|
elif isinstance(other, (Comparison, Conditional, Parenthetical)):
|
|
2708
2763
|
return Conditional(left=self, right=other, operator=BooleanOperator.AND)
|
|
2709
2764
|
raise ValueError(f"Cannot add {self.__class__} and {type(other)}")
|
|
@@ -23,7 +23,6 @@ from trilogy.core.processing.node_generators import (
|
|
|
23
23
|
gen_window_node,
|
|
24
24
|
gen_group_node,
|
|
25
25
|
gen_basic_node,
|
|
26
|
-
gen_select_node,
|
|
27
26
|
gen_unnest_node,
|
|
28
27
|
gen_merge_node,
|
|
29
28
|
gen_group_to_node,
|
|
@@ -208,7 +207,8 @@ def generate_node(
|
|
|
208
207
|
history: History | None = None,
|
|
209
208
|
) -> StrategyNode | None:
|
|
210
209
|
# first check in case there is a materialized_concept
|
|
211
|
-
|
|
210
|
+
history = history or History()
|
|
211
|
+
candidate = history.gen_select_node(
|
|
212
212
|
concept,
|
|
213
213
|
local_optional,
|
|
214
214
|
environment,
|
|
@@ -218,6 +218,7 @@ def generate_node(
|
|
|
218
218
|
accept_partial=accept_partial,
|
|
219
219
|
accept_partial_optional=False,
|
|
220
220
|
)
|
|
221
|
+
|
|
221
222
|
if candidate:
|
|
222
223
|
return candidate
|
|
223
224
|
|
|
@@ -318,9 +319,9 @@ def generate_node(
|
|
|
318
319
|
)
|
|
319
320
|
elif concept.derivation == PurposeLineage.ROOT:
|
|
320
321
|
logger.info(
|
|
321
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating select node"
|
|
322
|
+
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating select node with optional {[x.address for x in local_optional]}"
|
|
322
323
|
)
|
|
323
|
-
return gen_select_node(
|
|
324
|
+
return history.gen_select_node(
|
|
324
325
|
concept,
|
|
325
326
|
local_optional,
|
|
326
327
|
environment,
|
|
@@ -328,6 +329,7 @@ def generate_node(
|
|
|
328
329
|
depth + 1,
|
|
329
330
|
fail_if_not_found=False,
|
|
330
331
|
accept_partial=accept_partial,
|
|
332
|
+
accept_partial_optional=True,
|
|
331
333
|
)
|
|
332
334
|
else:
|
|
333
335
|
raise ValueError(f"Unknown derivation {concept.derivation}")
|
|
@@ -35,26 +35,30 @@ def gen_rowset_node(
|
|
|
35
35
|
lineage: RowsetItem = concept.lineage
|
|
36
36
|
rowset: RowsetDerivationStatement = lineage.rowset
|
|
37
37
|
select: SelectStatement | MultiSelectStatement = lineage.rowset.select
|
|
38
|
+
if where := select.where_clause:
|
|
39
|
+
targets = select.output_components + where.conditional.concept_arguments
|
|
40
|
+
else:
|
|
41
|
+
targets = select.output_components
|
|
38
42
|
node: StrategyNode = source_concepts(
|
|
39
|
-
mandatory_list=
|
|
43
|
+
mandatory_list=targets,
|
|
40
44
|
environment=environment,
|
|
41
45
|
g=g,
|
|
42
46
|
depth=depth + 1,
|
|
43
47
|
history=history,
|
|
44
48
|
)
|
|
45
|
-
node.conditions = select.where_clause.conditional if select.where_clause else None
|
|
46
|
-
# rebuild any cached info with the new condition clause
|
|
47
|
-
node.rebuild_cache()
|
|
48
49
|
if not node:
|
|
49
50
|
logger.info(
|
|
50
51
|
f"{padding(depth)}{LOGGER_PREFIX} Cannot generate rowset node for {concept}"
|
|
51
52
|
)
|
|
52
53
|
return None
|
|
54
|
+
node.conditions = select.where_clause.conditional if select.where_clause else None
|
|
55
|
+
# rebuild any cached info with the new condition clause
|
|
56
|
+
node.rebuild_cache()
|
|
53
57
|
enrichment = set([x.address for x in local_optional])
|
|
54
58
|
rowset_relevant = [
|
|
55
59
|
x
|
|
56
60
|
for x in rowset.derived_concepts
|
|
57
|
-
if x.address == concept.address or x.address in enrichment
|
|
61
|
+
# if x.address == concept.address or x.address in enrichment
|
|
58
62
|
]
|
|
59
63
|
additional_relevant = [
|
|
60
64
|
x for x in select.output_components if x.address in enrichment
|
|
@@ -68,7 +72,7 @@ def gen_rowset_node(
|
|
|
68
72
|
for item in additional_relevant:
|
|
69
73
|
node.partial_concepts.append(item)
|
|
70
74
|
|
|
71
|
-
# assume grain to be
|
|
75
|
+
# assume grain to be output of select
|
|
72
76
|
# but don't include anything aggregate at this point
|
|
73
77
|
assert node.resolution_cache
|
|
74
78
|
node.resolution_cache.grain = concept_list_to_grain(
|
|
@@ -2,7 +2,13 @@ from itertools import combinations
|
|
|
2
2
|
from typing import List, Optional
|
|
3
3
|
|
|
4
4
|
from trilogy.core.enums import PurposeLineage
|
|
5
|
-
from trilogy.core.models import
|
|
5
|
+
from trilogy.core.models import (
|
|
6
|
+
Concept,
|
|
7
|
+
Environment,
|
|
8
|
+
Grain,
|
|
9
|
+
LooseConceptList,
|
|
10
|
+
Datasource,
|
|
11
|
+
)
|
|
6
12
|
from trilogy.core.processing.nodes import (
|
|
7
13
|
StrategyNode,
|
|
8
14
|
SelectNode,
|
|
@@ -15,10 +21,211 @@ import networkx as nx
|
|
|
15
21
|
from trilogy.core.graph_models import concept_to_node, datasource_to_node
|
|
16
22
|
from trilogy.constants import logger
|
|
17
23
|
from trilogy.core.processing.utility import padding
|
|
24
|
+
from dataclasses import dataclass
|
|
18
25
|
|
|
19
26
|
LOGGER_PREFIX = "[GEN_SELECT_NODE]"
|
|
20
27
|
|
|
21
28
|
|
|
29
|
+
@dataclass
|
|
30
|
+
class DatasourceMatch:
|
|
31
|
+
key: str
|
|
32
|
+
datasource: Datasource
|
|
33
|
+
matched: LooseConceptList
|
|
34
|
+
partial: LooseConceptList
|
|
35
|
+
|
|
36
|
+
def __repr__(self):
|
|
37
|
+
return f"DatasourceMatch({self.key}, {self.datasource.identifier}, {str(self.matched)}, {str(self.partial)})"
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def dm_to_strategy_node(
|
|
41
|
+
dm: DatasourceMatch,
|
|
42
|
+
target_grain: Grain,
|
|
43
|
+
environment: Environment,
|
|
44
|
+
g: nx.DiGraph,
|
|
45
|
+
depth: int,
|
|
46
|
+
accept_partial: bool = False,
|
|
47
|
+
) -> StrategyNode:
|
|
48
|
+
datasource = dm.datasource
|
|
49
|
+
if target_grain and target_grain.issubset(datasource.grain):
|
|
50
|
+
if all([x in dm.matched for x in target_grain.components]):
|
|
51
|
+
force_group = False
|
|
52
|
+
# if we are not returning the grain
|
|
53
|
+
# we have to group
|
|
54
|
+
else:
|
|
55
|
+
logger.info(
|
|
56
|
+
f"{padding(depth)}{LOGGER_PREFIX} not all grain components are in output {str(dm.matched)}, group to actual grain"
|
|
57
|
+
)
|
|
58
|
+
force_group = True
|
|
59
|
+
elif all([x in dm.matched for x in datasource.grain.components]):
|
|
60
|
+
logger.info(
|
|
61
|
+
f"{padding(depth)}{LOGGER_PREFIX} query output includes all grain components, no reason to group further"
|
|
62
|
+
)
|
|
63
|
+
force_group = False
|
|
64
|
+
else:
|
|
65
|
+
logger.info(
|
|
66
|
+
f"{padding(depth)}{LOGGER_PREFIX} target grain is not subset of datasource grain {datasource.grain}, required to group"
|
|
67
|
+
)
|
|
68
|
+
force_group = True
|
|
69
|
+
bcandidate: StrategyNode = SelectNode(
|
|
70
|
+
input_concepts=[c.concept for c in datasource.columns],
|
|
71
|
+
output_concepts=dm.matched.concepts,
|
|
72
|
+
environment=environment,
|
|
73
|
+
g=g,
|
|
74
|
+
parents=[],
|
|
75
|
+
depth=depth,
|
|
76
|
+
partial_concepts=dm.partial.concepts,
|
|
77
|
+
accept_partial=accept_partial,
|
|
78
|
+
datasource=datasource,
|
|
79
|
+
grain=Grain(components=dm.matched.concepts),
|
|
80
|
+
)
|
|
81
|
+
# we need to nest the group node one further
|
|
82
|
+
if force_group is True:
|
|
83
|
+
candidate: StrategyNode = GroupNode(
|
|
84
|
+
output_concepts=dm.matched.concepts,
|
|
85
|
+
input_concepts=dm.matched.concepts,
|
|
86
|
+
environment=environment,
|
|
87
|
+
g=g,
|
|
88
|
+
parents=[bcandidate],
|
|
89
|
+
depth=depth,
|
|
90
|
+
partial_concepts=bcandidate.partial_concepts,
|
|
91
|
+
)
|
|
92
|
+
else:
|
|
93
|
+
candidate = bcandidate
|
|
94
|
+
return candidate
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def gen_select_nodes_from_tables_v2(
|
|
98
|
+
mandatory_concept: Concept,
|
|
99
|
+
all_concepts: List[Concept],
|
|
100
|
+
g: nx.DiGraph,
|
|
101
|
+
environment: Environment,
|
|
102
|
+
depth: int,
|
|
103
|
+
target_grain: Grain,
|
|
104
|
+
accept_partial: bool = False,
|
|
105
|
+
) -> tuple[bool, list[Concept], list[StrategyNode]]:
|
|
106
|
+
# if we have only constants
|
|
107
|
+
# we don't need a table
|
|
108
|
+
# so verify nothing, select node will render
|
|
109
|
+
all_lcl = LooseConceptList(concepts=all_concepts)
|
|
110
|
+
if all([c.derivation == PurposeLineage.CONSTANT for c in all_lcl]):
|
|
111
|
+
logger.info(
|
|
112
|
+
f"{padding(depth)}{LOGGER_PREFIX} All concepts {[x.address for x in all_lcl]} are constants, returning constant node"
|
|
113
|
+
)
|
|
114
|
+
return (
|
|
115
|
+
True,
|
|
116
|
+
all_lcl.concepts,
|
|
117
|
+
[
|
|
118
|
+
ConstantNode(
|
|
119
|
+
output_concepts=all_lcl.concepts,
|
|
120
|
+
input_concepts=[],
|
|
121
|
+
environment=environment,
|
|
122
|
+
g=g,
|
|
123
|
+
parents=[],
|
|
124
|
+
depth=depth,
|
|
125
|
+
# no partial for constants
|
|
126
|
+
partial_concepts=[],
|
|
127
|
+
force_group=False,
|
|
128
|
+
)
|
|
129
|
+
],
|
|
130
|
+
)
|
|
131
|
+
# otherwise, we need to look for a table
|
|
132
|
+
nodes_to_find = [concept_to_node(x.with_default_grain()) for x in all_lcl.concepts]
|
|
133
|
+
matches: dict[str, DatasourceMatch] = {}
|
|
134
|
+
for k, datasource in environment.datasources.items():
|
|
135
|
+
matched = []
|
|
136
|
+
matched_paths = []
|
|
137
|
+
for idx, req_concept in enumerate(nodes_to_find):
|
|
138
|
+
try:
|
|
139
|
+
path = nx.shortest_path(
|
|
140
|
+
g,
|
|
141
|
+
source=datasource_to_node(datasource),
|
|
142
|
+
target=req_concept,
|
|
143
|
+
)
|
|
144
|
+
ds_valid = (
|
|
145
|
+
sum(
|
|
146
|
+
[
|
|
147
|
+
1 if g.nodes[node]["type"] == "datasource" else 0
|
|
148
|
+
for node in path
|
|
149
|
+
]
|
|
150
|
+
)
|
|
151
|
+
== 1
|
|
152
|
+
)
|
|
153
|
+
address_valid = (
|
|
154
|
+
sum(
|
|
155
|
+
[
|
|
156
|
+
(
|
|
157
|
+
1
|
|
158
|
+
if g.nodes[node]["type"] == "concept"
|
|
159
|
+
and g.nodes[node]["concept"].address
|
|
160
|
+
!= all_lcl.concepts[idx].address
|
|
161
|
+
else 0
|
|
162
|
+
)
|
|
163
|
+
for node in path
|
|
164
|
+
]
|
|
165
|
+
)
|
|
166
|
+
== 0
|
|
167
|
+
)
|
|
168
|
+
if ds_valid and address_valid:
|
|
169
|
+
matched_paths.append(path)
|
|
170
|
+
matched.append(all_lcl.concepts[idx])
|
|
171
|
+
except nx.NodeNotFound:
|
|
172
|
+
continue
|
|
173
|
+
except nx.exception.NetworkXNoPath:
|
|
174
|
+
continue
|
|
175
|
+
dm = DatasourceMatch(
|
|
176
|
+
key=k,
|
|
177
|
+
datasource=datasource,
|
|
178
|
+
matched=LooseConceptList(concepts=matched),
|
|
179
|
+
partial=LooseConceptList(
|
|
180
|
+
concepts=[
|
|
181
|
+
c.concept
|
|
182
|
+
for c in datasource.columns
|
|
183
|
+
if not c.is_complete and c.concept.address in all_lcl
|
|
184
|
+
]
|
|
185
|
+
),
|
|
186
|
+
)
|
|
187
|
+
if not matched:
|
|
188
|
+
continue
|
|
189
|
+
if mandatory_concept.address not in dm.matched:
|
|
190
|
+
continue
|
|
191
|
+
if not accept_partial and dm.partial.addresses:
|
|
192
|
+
continue
|
|
193
|
+
matches[k] = dm
|
|
194
|
+
found: set[str] = set()
|
|
195
|
+
all_found = False
|
|
196
|
+
all_checked = False
|
|
197
|
+
to_return: list[StrategyNode] = []
|
|
198
|
+
if not matches:
|
|
199
|
+
return False, [], []
|
|
200
|
+
while not all_found and not all_checked:
|
|
201
|
+
final_key: str = max(
|
|
202
|
+
matches,
|
|
203
|
+
key=lambda x: len(
|
|
204
|
+
[m for m in matches[x].matched.addresses if m not in found]
|
|
205
|
+
)
|
|
206
|
+
- 0.1 * len(matches[x].partial.addresses),
|
|
207
|
+
)
|
|
208
|
+
final: DatasourceMatch = matches[final_key]
|
|
209
|
+
candidate = dm_to_strategy_node(
|
|
210
|
+
final,
|
|
211
|
+
target_grain=Grain(
|
|
212
|
+
components=[
|
|
213
|
+
x for x in target_grain.components if x.address in final.matched
|
|
214
|
+
]
|
|
215
|
+
),
|
|
216
|
+
environment=environment,
|
|
217
|
+
g=g,
|
|
218
|
+
depth=depth,
|
|
219
|
+
accept_partial=accept_partial,
|
|
220
|
+
)
|
|
221
|
+
to_return.append(candidate)
|
|
222
|
+
del matches[final_key]
|
|
223
|
+
found = found.union(final.matched.addresses)
|
|
224
|
+
all_found = all_lcl.addresses.issubset(found)
|
|
225
|
+
all_checked = len(matches) == 0
|
|
226
|
+
return all_found, [x for x in all_concepts if x.address in found], to_return
|
|
227
|
+
|
|
228
|
+
|
|
22
229
|
def gen_select_node_from_table(
|
|
23
230
|
target_concept: Concept,
|
|
24
231
|
all_concepts: List[Concept],
|
|
@@ -50,26 +257,21 @@ def gen_select_node_from_table(
|
|
|
50
257
|
candidates: dict[str, StrategyNode] = {}
|
|
51
258
|
scores: dict[str, int] = {}
|
|
52
259
|
# otherwise, we need to look for a table
|
|
260
|
+
nodes_to_find = [concept_to_node(x.with_default_grain()) for x in all_concepts]
|
|
53
261
|
for datasource in environment.datasources.values():
|
|
54
262
|
all_found = True
|
|
55
|
-
for
|
|
56
|
-
# look for connection to abstract grain
|
|
57
|
-
req_concept = raw_concept.with_default_grain()
|
|
58
|
-
# if we don't have a concept in the graph
|
|
59
|
-
# exit early
|
|
60
|
-
if concept_to_node(req_concept) not in g.nodes:
|
|
61
|
-
raise ValueError(concept_to_node(req_concept))
|
|
263
|
+
for idx, req_concept in enumerate(nodes_to_find):
|
|
62
264
|
try:
|
|
63
265
|
path = nx.shortest_path(
|
|
64
266
|
g,
|
|
65
267
|
source=datasource_to_node(datasource),
|
|
66
|
-
target=
|
|
268
|
+
target=req_concept,
|
|
67
269
|
)
|
|
68
270
|
except nx.NodeNotFound as e:
|
|
69
271
|
# just to provide better error
|
|
70
272
|
ncandidates = [
|
|
71
273
|
datasource_to_node(datasource),
|
|
72
|
-
|
|
274
|
+
req_concept,
|
|
73
275
|
]
|
|
74
276
|
for ncandidate in ncandidates:
|
|
75
277
|
try:
|
|
@@ -94,81 +296,152 @@ def gen_select_node_from_table(
|
|
|
94
296
|
for node in path:
|
|
95
297
|
if g.nodes[node]["type"] == "datasource":
|
|
96
298
|
continue
|
|
97
|
-
if g.nodes[node]["concept"].address ==
|
|
299
|
+
if g.nodes[node]["concept"].address == all_concepts[idx].address:
|
|
98
300
|
continue
|
|
99
301
|
all_found = False
|
|
100
302
|
break
|
|
101
303
|
|
|
102
|
-
if all_found:
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
304
|
+
if not all_found:
|
|
305
|
+
# skip to next node
|
|
306
|
+
continue
|
|
307
|
+
partial_concepts = [
|
|
308
|
+
c.concept
|
|
309
|
+
for c in datasource.columns
|
|
310
|
+
if not c.is_complete and c.concept in all_lcl
|
|
311
|
+
]
|
|
312
|
+
partial_lcl = LooseConceptList(concepts=partial_concepts)
|
|
313
|
+
if not accept_partial and target_concept in partial_lcl:
|
|
314
|
+
continue
|
|
315
|
+
logger.info(
|
|
316
|
+
f"{padding(depth)}{LOGGER_PREFIX} target grain is {str(target_grain)}"
|
|
317
|
+
)
|
|
318
|
+
if target_grain and target_grain.issubset(datasource.grain):
|
|
115
319
|
|
|
116
|
-
|
|
117
|
-
force_group = False
|
|
118
|
-
# if we are not returning the grain
|
|
119
|
-
# we have to group
|
|
120
|
-
else:
|
|
121
|
-
logger.info(
|
|
122
|
-
f"{padding(depth)}{LOGGER_PREFIX} not all grain components are in output {str(all_lcl)}, group to actual grain"
|
|
123
|
-
)
|
|
124
|
-
force_group = True
|
|
125
|
-
elif all([x in all_lcl for x in datasource.grain.components]):
|
|
126
|
-
logger.info(
|
|
127
|
-
f"{padding(depth)}{LOGGER_PREFIX} query output includes all grain components, no reason to group further"
|
|
128
|
-
)
|
|
320
|
+
if all([x in all_lcl for x in target_grain.components]):
|
|
129
321
|
force_group = False
|
|
322
|
+
# if we are not returning the grain
|
|
323
|
+
# we have to group
|
|
130
324
|
else:
|
|
131
325
|
logger.info(
|
|
132
|
-
f"{padding(depth)}{LOGGER_PREFIX}
|
|
326
|
+
f"{padding(depth)}{LOGGER_PREFIX} not all grain components are in output {str(all_lcl)}, group to actual grain"
|
|
133
327
|
)
|
|
134
328
|
force_group = True
|
|
329
|
+
elif all([x in all_lcl for x in datasource.grain.components]):
|
|
330
|
+
logger.info(
|
|
331
|
+
f"{padding(depth)}{LOGGER_PREFIX} query output includes all grain components, no reason to group further"
|
|
332
|
+
)
|
|
333
|
+
force_group = False
|
|
334
|
+
else:
|
|
335
|
+
logger.info(
|
|
336
|
+
f"{padding(depth)}{LOGGER_PREFIX} target grain is not subset of datasource grain {datasource.grain}, required to group"
|
|
337
|
+
)
|
|
338
|
+
force_group = True
|
|
135
339
|
|
|
136
|
-
|
|
137
|
-
|
|
340
|
+
bcandidate: StrategyNode = SelectNode(
|
|
341
|
+
input_concepts=[c.concept for c in datasource.columns],
|
|
342
|
+
output_concepts=all_concepts,
|
|
343
|
+
environment=environment,
|
|
344
|
+
g=g,
|
|
345
|
+
parents=[],
|
|
346
|
+
depth=depth,
|
|
347
|
+
partial_concepts=[c for c in all_concepts if c in partial_lcl],
|
|
348
|
+
accept_partial=accept_partial,
|
|
349
|
+
datasource=datasource,
|
|
350
|
+
grain=Grain(components=all_concepts),
|
|
351
|
+
)
|
|
352
|
+
# we need to nest the group node one further
|
|
353
|
+
if force_group is True:
|
|
354
|
+
candidate: StrategyNode = GroupNode(
|
|
138
355
|
output_concepts=all_concepts,
|
|
356
|
+
input_concepts=all_concepts,
|
|
139
357
|
environment=environment,
|
|
140
358
|
g=g,
|
|
141
|
-
parents=[],
|
|
359
|
+
parents=[bcandidate],
|
|
142
360
|
depth=depth,
|
|
143
|
-
partial_concepts=
|
|
144
|
-
accept_partial=accept_partial,
|
|
145
|
-
datasource=datasource,
|
|
146
|
-
grain=Grain(components=all_concepts),
|
|
147
|
-
)
|
|
148
|
-
# we need to ntest the group node one further
|
|
149
|
-
if force_group is True:
|
|
150
|
-
candidate: StrategyNode = GroupNode(
|
|
151
|
-
output_concepts=all_concepts,
|
|
152
|
-
input_concepts=all_concepts,
|
|
153
|
-
environment=environment,
|
|
154
|
-
g=g,
|
|
155
|
-
parents=[bcandidate],
|
|
156
|
-
depth=depth,
|
|
157
|
-
partial_concepts=bcandidate.partial_concepts,
|
|
158
|
-
)
|
|
159
|
-
else:
|
|
160
|
-
candidate = bcandidate
|
|
161
|
-
logger.info(
|
|
162
|
-
f"{padding(depth)}{LOGGER_PREFIX} found select node with {datasource.identifier}, returning {candidate.output_lcl}"
|
|
361
|
+
partial_concepts=bcandidate.partial_concepts,
|
|
163
362
|
)
|
|
164
|
-
|
|
165
|
-
|
|
363
|
+
else:
|
|
364
|
+
candidate = bcandidate
|
|
365
|
+
logger.info(
|
|
366
|
+
f"{padding(depth)}{LOGGER_PREFIX} found select node with {datasource.identifier}, returning {candidate.output_lcl}"
|
|
367
|
+
)
|
|
368
|
+
candidates[datasource.identifier] = candidate
|
|
369
|
+
scores[datasource.identifier] = -len(partial_concepts)
|
|
166
370
|
if not candidates:
|
|
167
371
|
return None
|
|
168
372
|
final = max(candidates, key=lambda x: scores[x])
|
|
169
373
|
return candidates[final]
|
|
170
374
|
|
|
171
375
|
|
|
376
|
+
def gen_select_nodes_from_tables(
|
|
377
|
+
local_optional: List[Concept],
|
|
378
|
+
depth: int,
|
|
379
|
+
concept: Concept,
|
|
380
|
+
environment: Environment,
|
|
381
|
+
g: nx.DiGraph,
|
|
382
|
+
accept_partial: bool,
|
|
383
|
+
all_concepts: List[Concept],
|
|
384
|
+
) -> tuple[bool, list[Concept], list[StrategyNode]]:
|
|
385
|
+
parents: List[StrategyNode] = []
|
|
386
|
+
found: List[Concept] = []
|
|
387
|
+
logger.info(
|
|
388
|
+
f"{padding(depth)}{LOGGER_PREFIX} looking for multiple sources that can satisfy"
|
|
389
|
+
)
|
|
390
|
+
all_found = False
|
|
391
|
+
unreachable: list[str] = []
|
|
392
|
+
# first pass
|
|
393
|
+
for opt_con in local_optional:
|
|
394
|
+
ds = gen_select_node_from_table(
|
|
395
|
+
concept,
|
|
396
|
+
[concept, opt_con],
|
|
397
|
+
g=g,
|
|
398
|
+
environment=environment,
|
|
399
|
+
depth=depth + 1,
|
|
400
|
+
accept_partial=accept_partial,
|
|
401
|
+
target_grain=Grain(components=all_concepts),
|
|
402
|
+
)
|
|
403
|
+
if not ds:
|
|
404
|
+
unreachable.append(opt_con.address)
|
|
405
|
+
all_found = False
|
|
406
|
+
for x in reversed(range(1, len(local_optional) + 1)):
|
|
407
|
+
if all_found:
|
|
408
|
+
break
|
|
409
|
+
for combo in combinations(local_optional, x):
|
|
410
|
+
if all_found:
|
|
411
|
+
break
|
|
412
|
+
# filter to just the original ones we need to get
|
|
413
|
+
local_combo = [
|
|
414
|
+
x for x in combo if x not in found and x.address not in unreachable
|
|
415
|
+
]
|
|
416
|
+
# skip if nothing new in this combo
|
|
417
|
+
if not local_combo:
|
|
418
|
+
continue
|
|
419
|
+
# include core concept as join
|
|
420
|
+
all_concepts = [concept, *local_combo]
|
|
421
|
+
|
|
422
|
+
ds = gen_select_node_from_table(
|
|
423
|
+
concept,
|
|
424
|
+
all_concepts,
|
|
425
|
+
g=g,
|
|
426
|
+
environment=environment,
|
|
427
|
+
depth=depth + 1,
|
|
428
|
+
accept_partial=accept_partial,
|
|
429
|
+
target_grain=Grain(components=all_concepts),
|
|
430
|
+
)
|
|
431
|
+
if ds:
|
|
432
|
+
logger.info(
|
|
433
|
+
f"{padding(depth)}{LOGGER_PREFIX} found a source with {[x.address for x in all_concepts]}"
|
|
434
|
+
)
|
|
435
|
+
parents.append(ds)
|
|
436
|
+
found += [x for x in ds.output_concepts if x != concept]
|
|
437
|
+
if {x.address for x in found} == {c.address for c in local_optional}:
|
|
438
|
+
logger.info(
|
|
439
|
+
f"{padding(depth)}{LOGGER_PREFIX} found all optional {[c.address for c in local_optional]}"
|
|
440
|
+
)
|
|
441
|
+
all_found = True
|
|
442
|
+
return all_found, found, parents
|
|
443
|
+
|
|
444
|
+
|
|
172
445
|
def gen_select_node(
|
|
173
446
|
concept: Concept,
|
|
174
447
|
local_optional: List[Concept],
|
|
@@ -221,46 +494,9 @@ def gen_select_node(
|
|
|
221
494
|
)
|
|
222
495
|
return ds
|
|
223
496
|
# if we cannot find a match
|
|
224
|
-
parents
|
|
225
|
-
|
|
226
|
-
logger.info(
|
|
227
|
-
f"{padding(depth)}{LOGGER_PREFIX} looking for multiple sources that can satisfy"
|
|
497
|
+
all_found, found, parents = gen_select_nodes_from_tables_v2(
|
|
498
|
+
concept, all_concepts, g, environment, depth, target_grain, accept_partial
|
|
228
499
|
)
|
|
229
|
-
all_found = False
|
|
230
|
-
for x in reversed(range(1, len(local_optional) + 1)):
|
|
231
|
-
if all_found:
|
|
232
|
-
break
|
|
233
|
-
for combo in combinations(local_optional, x):
|
|
234
|
-
if all_found:
|
|
235
|
-
break
|
|
236
|
-
# filter to just the original ones we need to get
|
|
237
|
-
local_combo = [x for x in combo if x not in found]
|
|
238
|
-
# skip if nothing new in this combo
|
|
239
|
-
if not local_combo:
|
|
240
|
-
continue
|
|
241
|
-
# include core concept as join
|
|
242
|
-
all_concepts = [concept, *local_combo]
|
|
243
|
-
|
|
244
|
-
ds = gen_select_node_from_table(
|
|
245
|
-
concept,
|
|
246
|
-
all_concepts,
|
|
247
|
-
g=g,
|
|
248
|
-
environment=environment,
|
|
249
|
-
depth=depth + 1,
|
|
250
|
-
accept_partial=accept_partial,
|
|
251
|
-
target_grain=Grain(components=all_concepts),
|
|
252
|
-
)
|
|
253
|
-
if ds:
|
|
254
|
-
logger.info(
|
|
255
|
-
f"{padding(depth)}{LOGGER_PREFIX} found a source with {[x.address for x in all_concepts]}"
|
|
256
|
-
)
|
|
257
|
-
parents.append(ds)
|
|
258
|
-
found += [x for x in ds.output_concepts if x != concept]
|
|
259
|
-
if {x.address for x in found} == {c.address for c in local_optional}:
|
|
260
|
-
logger.info(
|
|
261
|
-
f"{padding(depth)}{LOGGER_PREFIX} found all optional {[c.address for c in local_optional]}"
|
|
262
|
-
)
|
|
263
|
-
all_found = True
|
|
264
500
|
if parents and (all_found or accept_partial_optional):
|
|
265
501
|
if all_found:
|
|
266
502
|
logger.info(
|
|
@@ -268,7 +504,7 @@ def gen_select_node(
|
|
|
268
504
|
)
|
|
269
505
|
else:
|
|
270
506
|
logger.info(
|
|
271
|
-
f"{padding(depth)}{LOGGER_PREFIX} found some optional
|
|
507
|
+
f"{padding(depth)}{LOGGER_PREFIX} found some optional, returning"
|
|
272
508
|
)
|
|
273
509
|
all_partial = [
|
|
274
510
|
c
|
|
@@ -6,11 +6,12 @@ from .window_node import WindowNode
|
|
|
6
6
|
from .base_node import StrategyNode, NodeJoin
|
|
7
7
|
from .unnest_node import UnnestNode
|
|
8
8
|
from pydantic import BaseModel, Field, ConfigDict
|
|
9
|
-
from trilogy.core.models import Concept
|
|
9
|
+
from trilogy.core.models import Concept, Environment
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
class History(BaseModel):
|
|
13
13
|
history: dict[str, StrategyNode | None] = Field(default_factory=dict)
|
|
14
|
+
select_history: dict[str, StrategyNode | None] = Field(default_factory=dict)
|
|
14
15
|
started: set[str] = Field(default_factory=set)
|
|
15
16
|
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
16
17
|
|
|
@@ -60,6 +61,58 @@ class History(BaseModel):
|
|
|
60
61
|
in self.started
|
|
61
62
|
)
|
|
62
63
|
|
|
64
|
+
def _select_concepts_to_lookup(
|
|
65
|
+
self,
|
|
66
|
+
main: Concept,
|
|
67
|
+
search: list[Concept],
|
|
68
|
+
accept_partial: bool,
|
|
69
|
+
fail_if_not_found: bool,
|
|
70
|
+
accept_partial_optional: bool,
|
|
71
|
+
) -> str:
|
|
72
|
+
return (
|
|
73
|
+
str(main.address)
|
|
74
|
+
+ "|"
|
|
75
|
+
+ "-".join([c.address for c in search])
|
|
76
|
+
+ str(accept_partial)
|
|
77
|
+
+ str(fail_if_not_found)
|
|
78
|
+
+ str(accept_partial_optional)
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
def gen_select_node(
|
|
82
|
+
self,
|
|
83
|
+
concept: Concept,
|
|
84
|
+
local_optional: list[Concept],
|
|
85
|
+
environment: Environment,
|
|
86
|
+
g,
|
|
87
|
+
depth: int,
|
|
88
|
+
fail_if_not_found: bool = False,
|
|
89
|
+
accept_partial: bool = False,
|
|
90
|
+
accept_partial_optional: bool = False,
|
|
91
|
+
) -> StrategyNode | None:
|
|
92
|
+
from trilogy.core.processing.node_generators.select_node import gen_select_node
|
|
93
|
+
|
|
94
|
+
fingerprint = self._select_concepts_to_lookup(
|
|
95
|
+
concept,
|
|
96
|
+
local_optional,
|
|
97
|
+
accept_partial,
|
|
98
|
+
fail_if_not_found,
|
|
99
|
+
accept_partial_optional,
|
|
100
|
+
)
|
|
101
|
+
if fingerprint in self.select_history:
|
|
102
|
+
return self.select_history[fingerprint]
|
|
103
|
+
gen = gen_select_node(
|
|
104
|
+
concept,
|
|
105
|
+
local_optional,
|
|
106
|
+
environment,
|
|
107
|
+
g,
|
|
108
|
+
depth + 1,
|
|
109
|
+
fail_if_not_found=fail_if_not_found,
|
|
110
|
+
accept_partial=accept_partial,
|
|
111
|
+
accept_partial_optional=accept_partial_optional,
|
|
112
|
+
)
|
|
113
|
+
self.select_history[fingerprint] = gen
|
|
114
|
+
return gen
|
|
115
|
+
|
|
63
116
|
|
|
64
117
|
__all__ = [
|
|
65
118
|
"FilterNode",
|
trilogy/parsing/parse_engine.py
CHANGED
|
@@ -9,6 +9,7 @@ from lark.exceptions import (
|
|
|
9
9
|
UnexpectedToken,
|
|
10
10
|
VisitError,
|
|
11
11
|
)
|
|
12
|
+
from pathlib import Path
|
|
12
13
|
from lark.tree import Meta
|
|
13
14
|
from pydantic import ValidationError
|
|
14
15
|
from trilogy.core.internal import INTERNAL_NAMESPACE, ALL_ROWS_CONCEPT
|
|
@@ -466,25 +467,34 @@ class ParseToObjects(Transformer):
|
|
|
466
467
|
text,
|
|
467
468
|
environment: Environment,
|
|
468
469
|
parse_address: str | None = None,
|
|
469
|
-
parsed: dict | None = None,
|
|
470
|
+
parsed: dict[str, "ParseToObjects"] | None = None,
|
|
470
471
|
):
|
|
471
472
|
Transformer.__init__(self, visit_tokens)
|
|
472
473
|
self.text = text
|
|
473
474
|
self.environment: Environment = environment
|
|
474
|
-
self.imported: set[str] = set()
|
|
475
475
|
self.parse_address = parse_address or "root"
|
|
476
476
|
self.parsed: dict[str, ParseToObjects] = parsed if parsed else {}
|
|
477
477
|
# we do a second pass to pick up circular dependencies
|
|
478
478
|
# after initial parsing
|
|
479
479
|
self.pass_count = 1
|
|
480
|
+
self._results_stash = None
|
|
481
|
+
|
|
482
|
+
def transform(self, tree):
|
|
483
|
+
results = super().transform(tree)
|
|
484
|
+
self._results_stash = results
|
|
485
|
+
self.environment._parse_count += 1
|
|
486
|
+
return results
|
|
480
487
|
|
|
481
488
|
def hydrate_missing(self):
|
|
482
489
|
self.pass_count = 2
|
|
483
490
|
for k, v in self.parsed.items():
|
|
491
|
+
|
|
484
492
|
if v.pass_count == 2:
|
|
485
493
|
continue
|
|
486
494
|
v.hydrate_missing()
|
|
487
495
|
self.environment.concepts.fail_on_missing = True
|
|
496
|
+
# if not self.environment.concepts.undefined:
|
|
497
|
+
# return self._results_stash
|
|
488
498
|
reparsed = self.transform(PARSER.parse(self.text))
|
|
489
499
|
self.environment.concepts.undefined = {}
|
|
490
500
|
return reparsed
|
|
@@ -932,7 +942,7 @@ class ParseToObjects(Transformer):
|
|
|
932
942
|
)
|
|
933
943
|
for column in columns:
|
|
934
944
|
column.concept = column.concept.with_grain(datasource.grain)
|
|
935
|
-
self.environment.
|
|
945
|
+
self.environment.add_datasource(datasource, meta=meta)
|
|
936
946
|
return datasource
|
|
937
947
|
|
|
938
948
|
@v_args(meta=True)
|
|
@@ -1046,12 +1056,11 @@ class ParseToObjects(Transformer):
|
|
|
1046
1056
|
self.environment.add_concept(new, meta=meta)
|
|
1047
1057
|
return merge
|
|
1048
1058
|
|
|
1049
|
-
def import_statement(self, args: list[str]):
|
|
1059
|
+
def import_statement(self, args: list[str]) -> ImportStatement:
|
|
1050
1060
|
alias = args[-1]
|
|
1051
1061
|
path = args[0].split(".")
|
|
1052
1062
|
|
|
1053
1063
|
target = join(self.environment.working_path, *path) + ".preql"
|
|
1054
|
-
self.imported.add(target)
|
|
1055
1064
|
if target in self.parsed:
|
|
1056
1065
|
nparser = self.parsed[target]
|
|
1057
1066
|
else:
|
|
@@ -1070,21 +1079,23 @@ class ParseToObjects(Transformer):
|
|
|
1070
1079
|
)
|
|
1071
1080
|
nparser.transform(PARSER.parse(text))
|
|
1072
1081
|
self.parsed[target] = nparser
|
|
1082
|
+
# add the parsed objects of the import in
|
|
1083
|
+
self.parsed = {**self.parsed, **nparser.parsed}
|
|
1073
1084
|
except Exception as e:
|
|
1074
1085
|
raise ImportError(
|
|
1075
1086
|
f"Unable to import file {dirname(target)}, parsing error: {e}"
|
|
1076
1087
|
)
|
|
1077
1088
|
|
|
1078
|
-
for
|
|
1079
|
-
# self.environment.concepts[f"{alias}.{key}"] = concept.with_namespace(new_namespace)
|
|
1089
|
+
for _, concept in nparser.environment.concepts.items():
|
|
1080
1090
|
self.environment.add_concept(concept.with_namespace(alias))
|
|
1081
1091
|
|
|
1082
|
-
for
|
|
1092
|
+
for _, datasource in nparser.environment.datasources.items():
|
|
1083
1093
|
self.environment.add_datasource(datasource.with_namespace(alias))
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
|
|
1087
|
-
|
|
1094
|
+
imps = ImportStatement(
|
|
1095
|
+
alias=alias, path=Path(args[0]), environment=nparser.environment
|
|
1096
|
+
)
|
|
1097
|
+
self.environment.imports[alias] = imps
|
|
1098
|
+
return imps
|
|
1088
1099
|
|
|
1089
1100
|
@v_args(meta=True)
|
|
1090
1101
|
def show_category(self, meta: Meta, args) -> ShowCategory:
|
trilogy/scripts/trilogy.py
CHANGED
|
@@ -115,7 +115,7 @@ def run(ctx, input, dialect: str, conn_args):
|
|
|
115
115
|
|
|
116
116
|
print_tabulate(results, tabulate.tabulate)
|
|
117
117
|
except ImportError:
|
|
118
|
-
print(
|
|
118
|
+
print("Install tabulate (pip install tabulate) for a prettier output")
|
|
119
119
|
print(", ".join(results.keys()))
|
|
120
120
|
for row in results:
|
|
121
121
|
print(row)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|