PyPI - pytrilogy - Versions diffs - 0.0.2.25__py3-none-any.whl → 0.0.2.27__py3-none-any.whl - Mend

pytrilogy 0.0.2.25py3-none-any.whl → 0.0.2.27py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pytrilogy might be problematic. Click here for more details.

Files changed (26) hide show

{pytrilogy-0.0.2.25.dist-info → pytrilogy-0.0.2.27.dist-info}/METADATA +1 -1
{pytrilogy-0.0.2.25.dist-info → pytrilogy-0.0.2.27.dist-info}/RECORD +26 -26
{pytrilogy-0.0.2.25.dist-info → pytrilogy-0.0.2.27.dist-info}/WHEEL +1 -1
trilogy/__init__.py +1 -1
trilogy/constants.py +1 -1
trilogy/core/graph_models.py +2 -2
trilogy/core/models.py +205 -140
trilogy/core/optimizations/inline_datasource.py +4 -4
trilogy/core/processing/node_generators/common.py +0 -1
trilogy/core/processing/node_generators/select_merge_node.py +56 -23
trilogy/core/processing/nodes/base_node.py +3 -0
trilogy/core/processing/nodes/merge_node.py +12 -12
trilogy/core/processing/nodes/select_node_v2.py +6 -2
trilogy/core/processing/utility.py +237 -258
trilogy/core/query_processor.py +65 -53
trilogy/dialect/base.py +1 -0
trilogy/dialect/common.py +4 -25
trilogy/executor.py +12 -3
trilogy/hooks/query_debugger.py +5 -1
trilogy/parsing/common.py +4 -6
trilogy/parsing/parse_engine.py +20 -16
trilogy/parsing/render.py +63 -21
trilogy/parsing/trilogy.lark +6 -4
{pytrilogy-0.0.2.25.dist-info → pytrilogy-0.0.2.27.dist-info}/LICENSE.md +0 -0
{pytrilogy-0.0.2.25.dist-info → pytrilogy-0.0.2.27.dist-info}/entry_points.txt +0 -0
{pytrilogy-0.0.2.25.dist-info → pytrilogy-0.0.2.27.dist-info}/top_level.txt +0 -0

trilogy/core/optimizations/inline_datasource.py CHANGED Viewed

@@ -63,14 +63,14 @@ class InlineDatasource(OptimizationRule):
         for replaceable in to_inline:
             if replaceable.name not in self.candidates[cte.name]:
                 self.candidates[cte.name].add(replaceable.name)
-                self.count[replaceable.source.name] += 1
+                self.count[replaceable.source.identifier] += 1
                 return True
             if (
-                self.count[replaceable.source.name]
+                self.count[replaceable.source.identifier]
                 > CONFIG.optimizations.constant_inline_cutoff
             ):
                 self.log(
-                    f"Skipping inlining raw datasource {replaceable.source.name} ({replaceable.name}) due to multiple references"
+                    f"Skipping inlining raw datasource {replaceable.source.identifier} ({replaceable.name}) due to multiple references"
                 )
                 continue
             if not replaceable.source.datasources[0].grain.issubset(replaceable.grain):
@@ -81,7 +81,7 @@ class InlineDatasource(OptimizationRule):
             result = cte.inline_parent_datasource(replaceable, force_group=force_group)
             if result:
                 self.log(
-                    f"Inlined parent {replaceable.name} with {replaceable.source.name}"
+                    f"Inlined parent {replaceable.name} with {replaceable.source.identifier}"
                 )
                 optimized = True
             else:

trilogy/core/processing/node_generators/common.py CHANGED Viewed

@@ -1,6 +1,5 @@
 from typing import List, Tuple, Callable
 from trilogy.core.enums import PurposeLineage, Purpose
 from trilogy.core.models import (
     Concept,

trilogy/core/processing/node_generators/select_merge_node.py CHANGED Viewed

@@ -28,14 +28,18 @@ def extract_address(node: str):
     return node.split("~")[1].split("@")[0]
-def get_graph_partial_nodes(g: nx.DiGraph) -> dict[str, list[str]]:
+def get_graph_partial_nodes(
+    g: nx.DiGraph, conditions: WhereClause | None
+) -> dict[str, list[str]]:
     datasources: dict[str, Datasource] = nx.get_node_attributes(g, "datasource")
     partial: dict[str, list[str]] = {}
     for node in g.nodes:
         if node in datasources:
-            partial[node] = [
-                concept_to_node(c) for c in datasources[node].partial_concepts
-            ]
+            ds = datasources[node]
+            partial[node] = [concept_to_node(c) for c in ds.partial_concepts]
+            if ds.non_partial_for and conditions == ds.non_partial_for:
+                partial[node] = []
     return partial
@@ -49,7 +53,10 @@ def get_graph_grain_length(g: nx.DiGraph) -> dict[str, int]:
 def create_pruned_concept_graph(
-    g: nx.DiGraph, all_concepts: List[Concept], accept_partial: bool = False
+    g: nx.DiGraph,
+    all_concepts: List[Concept],
+    accept_partial: bool = False,
+    conditions: WhereClause | None = None,
 ) -> nx.DiGraph:
     orig_g = g
     g = g.copy()
@@ -66,11 +73,7 @@ def create_pruned_concept_graph(
     relevent_datasets: list[str] = []
     if not accept_partial:
         partial = {}
-        for node in g.nodes:
-            if node in datasources:
-                partial[node] = [
-                    concept_to_node(c) for c in datasources[node].partial_concepts
-                ]
+        partial = get_graph_partial_nodes(g, conditions)
         to_remove = []
         for edge in g.edges:
             if (
@@ -133,31 +136,53 @@ def create_pruned_concept_graph(
     return g
-def resolve_subgraphs(g: nx.DiGraph) -> dict[str, list[str]]:
+def resolve_subgraphs(
+    g: nx.DiGraph, conditions: WhereClause | None
+) -> dict[str, list[str]]:
     datasources = [n for n in g.nodes if n.startswith("ds~")]
-    subgraphs = {ds: list(set(list(nx.all_neighbors(g, ds)))) for ds in datasources}
-    partial_map = get_graph_partial_nodes(g)
+    subgraphs: dict[str, list[str]] = {
+        ds: list(set(list(nx.all_neighbors(g, ds)))) for ds in datasources
+    }
+    partial_map = get_graph_partial_nodes(g, conditions)
     grain_length = get_graph_grain_length(g)
-    non_partial = {
-        ds: [c for c in subgraphs[ds] if c not in partial_map[ds]] for ds in datasources
+    concepts: dict[str, Concept] = nx.get_node_attributes(g, "concept")
+    non_partial_map = {
+        ds: [concepts[c].address for c in subgraphs[ds] if c not in partial_map[ds]]
+        for ds in datasources
+    }
+    concept_map = {
+        ds: [concepts[c].address for c in subgraphs[ds]] for ds in datasources
     }
     pruned_subgraphs = {}
-    for key, value in subgraphs.items():
+    for key, nodes in subgraphs.items():
+        value = non_partial_map[key]
+        all_concepts = concept_map[key]
         is_subset = False
         matches = set()
         # Compare current list with other lists
-        for other_key, other_value in non_partial.items():
-            if key != other_key and set(value).issubset(set(other_value)):
+        for other_key, other_all_concepts in concept_map.items():
+            other_value = non_partial_map[other_key]
+            # needs to be a subset of non partial and a subset of all
+            if (
+                key != other_key
+                and set(value).issubset(set(other_value))
+                and set(all_concepts).issubset(set(other_all_concepts))
+            ):
                 if len(value) < len(other_value):
                     is_subset = True
+                    logger.debug(
+                        f"Dropping subgraph {key} with {value} as it is a subset of {other_key} with {other_value}"
+                    )
                     break
-                elif len(value) == len(other_value):
+                elif len(value) == len(other_value) and len(all_concepts) == len(
+                    other_all_concepts
+                ):
                     matches.add(other_key)
                     matches.add(key)
         if matches:
             is_subset = key is not min(matches, key=lambda x: (grain_length[x], x))
         if not is_subset:
-            pruned_subgraphs[key] = value
+            pruned_subgraphs[key] = nodes
     return pruned_subgraphs
@@ -168,6 +193,7 @@ def create_select_node(
     g,
     environment: Environment,
     depth: int,
+    conditions: WhereClause | None = None,
 ) -> StrategyNode:
     ds_name = ds_name.split("~")[1]
     all_concepts = [
@@ -206,6 +232,7 @@ def create_select_node(
         c.concept for c in datasource.columns if c.is_nullable and c.concept in all_lcl
     ]
     nullable_lcl = LooseConceptList(concepts=nullable_concepts)
+    partial_is_full = conditions and (conditions == datasource.non_partial_for)
     bcandidate: StrategyNode = SelectNode(
         input_concepts=[c.concept for c in datasource.columns],
@@ -214,12 +241,15 @@ def create_select_node(
         g=g,
         parents=[],
         depth=depth,
-        partial_concepts=[c for c in all_concepts if c in partial_lcl],
+        partial_concepts=(
+            [] if partial_is_full else [c for c in all_concepts if c in partial_lcl]
+        ),
         nullable_concepts=[c for c in all_concepts if c in nullable_lcl],
         accept_partial=accept_partial,
         datasource=datasource,
         grain=Grain(components=all_concepts),
         conditions=datasource.where.conditional if datasource.where else None,
+        render_condition=not partial_is_full,
     )
     # we need to nest the group node one further
@@ -261,7 +291,9 @@ def gen_select_merge_node(
             force_group=False,
         )
     for attempt in [False, True]:
-        pruned_concept_graph = create_pruned_concept_graph(g, non_constant, attempt)
+        pruned_concept_graph = create_pruned_concept_graph(
+            g, non_constant, attempt, conditions
+        )
         if pruned_concept_graph:
             logger.info(
                 f"{padding(depth)}{LOGGER_PREFIX} found covering graph w/ partial flag {attempt}"
@@ -274,7 +306,7 @@ def gen_select_merge_node(
         )
         return None
-    sub_nodes = resolve_subgraphs(pruned_concept_graph)
+    sub_nodes = resolve_subgraphs(pruned_concept_graph, conditions)
     logger.info(f"{padding(depth)}{LOGGER_PREFIX} fetching subgraphs {sub_nodes}")
     parents = [
@@ -285,6 +317,7 @@ def gen_select_merge_node(
             accept_partial=accept_partial,
             environment=environment,
             depth=depth,
+            conditions=conditions,
         )
         for k, subgraph in sub_nodes.items()
     ]

trilogy/core/processing/nodes/base_node.py CHANGED Viewed

@@ -165,6 +165,7 @@ class StrategyNode:
         hidden_concepts: List[Concept] | None = None,
         existence_concepts: List[Concept] | None = None,
         virtual_output_concepts: List[Concept] | None = None,
+        render_condition: bool = True,
     ):
         self.input_concepts: List[Concept] = (
             unique(input_concepts, "address") if input_concepts else []
@@ -208,6 +209,7 @@ class StrategyNode:
             )
         self.validate_parents()
         self.log = True
+        self.render_condition = render_condition
     def add_parents(self, parents: list["StrategyNode"]):
         self.parents += parents
@@ -380,6 +382,7 @@ class StrategyNode:
             hidden_concepts=list(self.hidden_concepts),
             existence_concepts=list(self.existence_concepts),
             virtual_output_concepts=list(self.virtual_output_concepts),
+            render_condition=self.render_condition,
         )

trilogy/core/processing/nodes/merge_node.py CHANGED Viewed

@@ -89,8 +89,8 @@ def deduplicate_nodes_and_joins(
             joins = [
                 j
                 for j in joins
-                if j.left_node.resolve().full_name not in removed
-                and j.right_node.resolve().full_name not in removed
+                if j.left_node.resolve().identifier not in removed
+                and j.right_node.resolve().identifier not in removed
             ]
     return joins, merged
@@ -155,8 +155,8 @@ class MergeNode(StrategyNode):
         for join in node_joins:
             left = join.left_node.resolve()
             right = join.right_node.resolve()
-            if left.full_name == right.full_name:
-                raise SyntaxError(f"Cannot join node {left.full_name} to itself")
+            if left.identifier == right.identifier:
+                raise SyntaxError(f"Cannot join node {left.identifier} to itself")
             joins.append(
                 BaseJoin(
                     left_datasource=left,
@@ -168,7 +168,7 @@ class MergeNode(StrategyNode):
             )
         return joins
-    def create_full_joins(self, dataset_list: List[QueryDatasource]):
+    def create_full_joins(self, dataset_list: List[QueryDatasource | Datasource]):
         joins = []
         seen = set()
         for left_value in dataset_list:
@@ -198,7 +198,7 @@ class MergeNode(StrategyNode):
         environment: Environment,
     ) -> List[BaseJoin | UnnestJoin]:
         # only finally, join between them for unique values
-        dataset_list: List[QueryDatasource] = sorted(
+        dataset_list: List[QueryDatasource | Datasource] = sorted(
             final_datasets, key=lambda x: -len(x.grain.components_copy)
         )
@@ -215,7 +215,7 @@ class MergeNode(StrategyNode):
                 logger.info(
                     f"{self.logging_prefix}{LOGGER_PREFIX} inferring node joins to target grain {str(grain)}"
                 )
-                joins = get_node_joins(dataset_list, grain.components, environment)
+                joins = get_node_joins(dataset_list, environment=environment)
         elif final_joins:
             logger.info(
                 f"{self.logging_prefix}{LOGGER_PREFIX} translating provided node joins {len(final_joins)}"
@@ -238,13 +238,13 @@ class MergeNode(StrategyNode):
         merged: dict[str, QueryDatasource | Datasource] = {}
         final_joins: List[NodeJoin] | None = self.node_joins
         for source in parent_sources:
-            if source.full_name in merged:
+            if source.identifier in merged:
                 logger.info(
-                    f"{self.logging_prefix}{LOGGER_PREFIX} merging parent node with {source.full_name} into existing"
+                    f"{self.logging_prefix}{LOGGER_PREFIX} merging parent node with {source.identifier} into existing"
                 )
-                merged[source.full_name] = merged[source.full_name] + source
+                merged[source.identifier] = merged[source.identifier] + source
             else:
-                merged[source.full_name] = source
+                merged[source.identifier] = source
         # it's possible that we have more sources than we need
         final_joins, merged = deduplicate_nodes_and_joins(
@@ -314,7 +314,7 @@ class MergeNode(StrategyNode):
         full_join_concepts = []
         for join in joins:
             if isinstance(join, BaseJoin) and join.join_type == JoinType.FULL:
-                full_join_concepts += join.concepts
+                full_join_concepts += join.input_concepts
         if self.whole_grain:
             force_group = False
         elif self.force_group is False:

trilogy/core/processing/nodes/select_node_v2.py CHANGED Viewed

@@ -49,6 +49,7 @@ class SelectNode(StrategyNode):
         conditions: Conditional | Comparison | Parenthetical | None = None,
         preexisting_conditions: Conditional | Comparison | Parenthetical | None = None,
         hidden_concepts: List[Concept] | None = None,
+        render_condition: bool = True,
     ):
         super().__init__(
             input_concepts=input_concepts,
@@ -65,6 +66,7 @@ class SelectNode(StrategyNode):
             conditions=conditions,
             preexisting_conditions=preexisting_conditions,
             hidden_concepts=hidden_concepts,
+            render_condition=render_condition,
         )
         self.accept_partial = accept_partial
         self.datasource = datasource
@@ -120,7 +122,8 @@ class SelectNode(StrategyNode):
             ],
             nullable_concepts=[c.concept for c in datasource.columns if c.is_nullable],
             source_type=SourceType.DIRECT_SELECT,
-            condition=self.conditions,
+            # we can skip rendering conditions
+            condition=self.conditions if self.render_condition else None,
             # select nodes should never group
             force_group=self.force_group,
             hidden_concepts=self.hidden_concepts,
@@ -128,7 +131,7 @@ class SelectNode(StrategyNode):
     def resolve_from_constant_datasources(self) -> QueryDatasource:
         datasource = Datasource(
-            identifier=CONSTANT_DATASET, address=CONSTANT_DATASET, columns=[]
+            name=CONSTANT_DATASET, address=CONSTANT_DATASET, columns=[]
         )
         return QueryDatasource(
             input_concepts=[],
@@ -205,6 +208,7 @@ class SelectNode(StrategyNode):
             conditions=self.conditions,
             preexisting_conditions=self.preexisting_conditions,
             hidden_concepts=self.hidden_concepts,
+            render_condition=self.render_condition,
         )

pytrilogy 0.0.2.25__py3-none-any.whl → 0.0.2.27__py3-none-any.whl

Potentially problematic release.

pytrilogy 0.0.2.25py3-none-any.whl → 0.0.2.27py3-none-any.whl