PyPI - pytrilogy - Versions diffs - 0.3.138__cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl - Mend

pytrilogy 0.3.138__cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (182) hide show

LICENSE.md +19 -0
_preql_import_resolver/__init__.py +5 -0
_preql_import_resolver/_preql_import_resolver.cpython-311-x86_64-linux-gnu.so +0 -0
pytrilogy-0.3.138.dist-info/METADATA +525 -0
pytrilogy-0.3.138.dist-info/RECORD +182 -0
pytrilogy-0.3.138.dist-info/WHEEL +5 -0
pytrilogy-0.3.138.dist-info/entry_points.txt +2 -0
pytrilogy-0.3.138.dist-info/licenses/LICENSE.md +19 -0
trilogy/__init__.py +9 -0
trilogy/ai/README.md +10 -0
trilogy/ai/__init__.py +19 -0
trilogy/ai/constants.py +92 -0
trilogy/ai/conversation.py +107 -0
trilogy/ai/enums.py +7 -0
trilogy/ai/execute.py +50 -0
trilogy/ai/models.py +34 -0
trilogy/ai/prompts.py +87 -0
trilogy/ai/providers/__init__.py +0 -0
trilogy/ai/providers/anthropic.py +106 -0
trilogy/ai/providers/base.py +24 -0
trilogy/ai/providers/google.py +146 -0
trilogy/ai/providers/openai.py +89 -0
trilogy/ai/providers/utils.py +68 -0
trilogy/authoring/README.md +3 -0
trilogy/authoring/__init__.py +143 -0
trilogy/constants.py +113 -0
trilogy/core/README.md +52 -0
trilogy/core/__init__.py +0 -0
trilogy/core/constants.py +6 -0
trilogy/core/enums.py +443 -0
trilogy/core/env_processor.py +120 -0
trilogy/core/environment_helpers.py +320 -0
trilogy/core/ergonomics.py +193 -0
trilogy/core/exceptions.py +123 -0
trilogy/core/functions.py +1227 -0
trilogy/core/graph_models.py +139 -0
trilogy/core/internal.py +85 -0
trilogy/core/models/__init__.py +0 -0
trilogy/core/models/author.py +2672 -0
trilogy/core/models/build.py +2521 -0
trilogy/core/models/build_environment.py +180 -0
trilogy/core/models/core.py +494 -0
trilogy/core/models/datasource.py +322 -0
trilogy/core/models/environment.py +748 -0
trilogy/core/models/execute.py +1177 -0
trilogy/core/optimization.py +251 -0
trilogy/core/optimizations/__init__.py +12 -0
trilogy/core/optimizations/base_optimization.py +17 -0
trilogy/core/optimizations/hide_unused_concept.py +47 -0
trilogy/core/optimizations/inline_datasource.py +102 -0
trilogy/core/optimizations/predicate_pushdown.py +245 -0
trilogy/core/processing/README.md +94 -0
trilogy/core/processing/READMEv2.md +121 -0
trilogy/core/processing/VIRTUAL_UNNEST.md +30 -0
trilogy/core/processing/__init__.py +0 -0
trilogy/core/processing/concept_strategies_v3.py +508 -0
trilogy/core/processing/constants.py +15 -0
trilogy/core/processing/discovery_node_factory.py +451 -0
trilogy/core/processing/discovery_utility.py +517 -0
trilogy/core/processing/discovery_validation.py +167 -0
trilogy/core/processing/graph_utils.py +43 -0
trilogy/core/processing/node_generators/README.md +9 -0
trilogy/core/processing/node_generators/__init__.py +31 -0
trilogy/core/processing/node_generators/basic_node.py +160 -0
trilogy/core/processing/node_generators/common.py +268 -0
trilogy/core/processing/node_generators/constant_node.py +38 -0
trilogy/core/processing/node_generators/filter_node.py +315 -0
trilogy/core/processing/node_generators/group_node.py +213 -0
trilogy/core/processing/node_generators/group_to_node.py +117 -0
trilogy/core/processing/node_generators/multiselect_node.py +205 -0
trilogy/core/processing/node_generators/node_merge_node.py +653 -0
trilogy/core/processing/node_generators/recursive_node.py +88 -0
trilogy/core/processing/node_generators/rowset_node.py +165 -0
trilogy/core/processing/node_generators/select_helpers/__init__.py +0 -0
trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +261 -0
trilogy/core/processing/node_generators/select_merge_node.py +748 -0
trilogy/core/processing/node_generators/select_node.py +95 -0
trilogy/core/processing/node_generators/synonym_node.py +98 -0
trilogy/core/processing/node_generators/union_node.py +91 -0
trilogy/core/processing/node_generators/unnest_node.py +182 -0
trilogy/core/processing/node_generators/window_node.py +201 -0
trilogy/core/processing/nodes/README.md +28 -0
trilogy/core/processing/nodes/__init__.py +179 -0
trilogy/core/processing/nodes/base_node.py +519 -0
trilogy/core/processing/nodes/filter_node.py +75 -0
trilogy/core/processing/nodes/group_node.py +194 -0
trilogy/core/processing/nodes/merge_node.py +420 -0
trilogy/core/processing/nodes/recursive_node.py +46 -0
trilogy/core/processing/nodes/select_node_v2.py +242 -0
trilogy/core/processing/nodes/union_node.py +53 -0
trilogy/core/processing/nodes/unnest_node.py +62 -0
trilogy/core/processing/nodes/window_node.py +56 -0
trilogy/core/processing/utility.py +823 -0
trilogy/core/query_processor.py +596 -0
trilogy/core/statements/README.md +35 -0
trilogy/core/statements/__init__.py +0 -0
trilogy/core/statements/author.py +536 -0
trilogy/core/statements/build.py +0 -0
trilogy/core/statements/common.py +20 -0
trilogy/core/statements/execute.py +155 -0
trilogy/core/table_processor.py +66 -0
trilogy/core/utility.py +8 -0
trilogy/core/validation/README.md +46 -0
trilogy/core/validation/__init__.py +0 -0
trilogy/core/validation/common.py +161 -0
trilogy/core/validation/concept.py +146 -0
trilogy/core/validation/datasource.py +227 -0
trilogy/core/validation/environment.py +73 -0
trilogy/core/validation/fix.py +106 -0
trilogy/dialect/__init__.py +32 -0
trilogy/dialect/base.py +1359 -0
trilogy/dialect/bigquery.py +256 -0
trilogy/dialect/common.py +147 -0
trilogy/dialect/config.py +144 -0
trilogy/dialect/dataframe.py +50 -0
trilogy/dialect/duckdb.py +177 -0
trilogy/dialect/enums.py +147 -0
trilogy/dialect/metadata.py +173 -0
trilogy/dialect/mock.py +190 -0
trilogy/dialect/postgres.py +91 -0
trilogy/dialect/presto.py +104 -0
trilogy/dialect/results.py +89 -0
trilogy/dialect/snowflake.py +90 -0
trilogy/dialect/sql_server.py +92 -0
trilogy/engine.py +48 -0
trilogy/execution/config.py +75 -0
trilogy/executor.py +568 -0
trilogy/hooks/__init__.py +4 -0
trilogy/hooks/base_hook.py +40 -0
trilogy/hooks/graph_hook.py +139 -0
trilogy/hooks/query_debugger.py +166 -0
trilogy/metadata/__init__.py +0 -0
trilogy/parser.py +10 -0
trilogy/parsing/README.md +21 -0
trilogy/parsing/__init__.py +0 -0
trilogy/parsing/common.py +1069 -0
trilogy/parsing/config.py +5 -0
trilogy/parsing/exceptions.py +8 -0
trilogy/parsing/helpers.py +1 -0
trilogy/parsing/parse_engine.py +2813 -0
trilogy/parsing/render.py +750 -0
trilogy/parsing/trilogy.lark +540 -0
trilogy/py.typed +0 -0
trilogy/render.py +42 -0
trilogy/scripts/README.md +7 -0
trilogy/scripts/__init__.py +0 -0
trilogy/scripts/dependency/Cargo.lock +617 -0
trilogy/scripts/dependency/Cargo.toml +39 -0
trilogy/scripts/dependency/README.md +131 -0
trilogy/scripts/dependency/build.sh +25 -0
trilogy/scripts/dependency/src/directory_resolver.rs +162 -0
trilogy/scripts/dependency/src/lib.rs +16 -0
trilogy/scripts/dependency/src/main.rs +770 -0
trilogy/scripts/dependency/src/parser.rs +435 -0
trilogy/scripts/dependency/src/preql.pest +208 -0
trilogy/scripts/dependency/src/python_bindings.rs +289 -0
trilogy/scripts/dependency/src/resolver.rs +716 -0
trilogy/scripts/dependency/tests/base.preql +3 -0
trilogy/scripts/dependency/tests/cli_integration.rs +377 -0
trilogy/scripts/dependency/tests/customer.preql +6 -0
trilogy/scripts/dependency/tests/main.preql +9 -0
trilogy/scripts/dependency/tests/orders.preql +7 -0
trilogy/scripts/dependency/tests/test_data/base.preql +9 -0
trilogy/scripts/dependency/tests/test_data/consumer.preql +1 -0
trilogy/scripts/dependency.py +323 -0
trilogy/scripts/display.py +460 -0
trilogy/scripts/environment.py +46 -0
trilogy/scripts/parallel_execution.py +483 -0
trilogy/scripts/single_execution.py +131 -0
trilogy/scripts/trilogy.py +772 -0
trilogy/std/__init__.py +0 -0
trilogy/std/color.preql +3 -0
trilogy/std/date.preql +13 -0
trilogy/std/display.preql +18 -0
trilogy/std/geography.preql +22 -0
trilogy/std/metric.preql +15 -0
trilogy/std/money.preql +67 -0
trilogy/std/net.preql +14 -0
trilogy/std/ranking.preql +7 -0
trilogy/std/report.preql +5 -0
trilogy/std/semantic.preql +6 -0
trilogy/utility.py +34 -0

trilogy/core/processing/node_generators/node_merge_node.py ADDED Viewed

@@ -0,0 +1,653 @@
+from itertools import combinations
+from typing import Callable, List, Optional
+import networkx as nx
+from networkx.algorithms import approximation as ax
+from trilogy.constants import logger
+from trilogy.core.enums import Derivation, FunctionType
+from trilogy.core.exceptions import AmbiguousRelationshipResolutionException
+from trilogy.core.graph_models import (
+    ReferenceGraph,
+    concept_to_node,
+    prune_sources_for_conditions,
+)
+from trilogy.core.models.build import (
+    BuildConcept,
+    BuildConditional,
+    BuildFunction,
+    BuildGrain,
+    BuildWhereClause,
+)
+from trilogy.core.models.build_environment import BuildEnvironment
+from trilogy.core.processing.nodes import History, MergeNode, StrategyNode
+from trilogy.core.processing.utility import padding
+from trilogy.utility import unique
+LOGGER_PREFIX = "[GEN_MERGE_NODE]"
+AMBIGUITY_CHECK_LIMIT = 20
+def filter_pseudonyms_for_source(
+    ds_graph: nx.DiGraph, node: str, pseudonyms: set[tuple[str, str]]
+):
+    to_remove = set()
+    for edge in ds_graph.edges:
+        if edge in pseudonyms:
+            lengths = {}
+            for n in edge:
+                try:
+                    lengths[n] = nx.shortest_path_length(ds_graph, node, n)
+                except nx.NetworkXNoPath:
+                    lengths[n] = 999
+            to_remove.add(max(lengths, key=lambda x: lengths.get(x, 0)))
+    for node in to_remove:
+        ds_graph.remove_node(node)
+def extract_address(node: str):
+    return node.split("~")[1].split("@")[0]
+def extract_concept(node: str, env: BuildEnvironment):
+    # removing this as part of canonical mapping
+    # if node in env.alias_origin_lookup:
+    #     return env.alias_origin_lookup[node]
+    return env.canonical_concepts[node]
+def filter_unique_graphs(graphs: list[list[str]]) -> list[list[str]]:
+    unique_graphs: list[set[str]] = []
+    # sort graphs from largest to smallest
+    graphs.sort(key=lambda x: len(x), reverse=True)
+    for graph in graphs:
+        if not any(set(graph).issubset(x) for x in unique_graphs):
+            unique_graphs.append(set(graph))
+    return [list(x) for x in unique_graphs]
+def extract_ds_components(
+    g: nx.DiGraph, nodelist: list[str], pseudonyms: set[tuple[str, str]]
+) -> list[list[str]]:
+    graphs = []
+    for node in g.nodes:
+        if node.startswith("ds~"):
+            local = g.copy()
+            filter_pseudonyms_for_source(local, node, pseudonyms)
+            ds_graph: nx.DiGraph = nx.ego_graph(local, node, radius=10).copy()
+            graphs.append(
+                [
+                    extract_address(x)
+                    for x in ds_graph.nodes
+                    if not str(x).startswith("ds~")
+                ]
+            )
+    # if we had no ego graphs, return all concepts
+    if not graphs:
+        return [[extract_address(node) for node in nodelist]]
+    graphs = filter_unique_graphs(graphs)
+    for node in nodelist:
+        parsed = extract_address(node)
+        if not any(parsed in x for x in graphs):
+            graphs.append([parsed])
+    return graphs
+def prune_and_merge(
+    G: nx.DiGraph,
+    keep_node_lambda: Callable[[str], bool],
+) -> nx.DiGraph:
+    """
+    Prune nodes of one type and create direct connections between remaining nodes.
+    Args:
+        G: NetworkX graph
+        keep_node_type: The node type to keep
+        node_type_attr: Attribute name that stores node type
+    Returns:
+        New graph with only keep_node_type nodes and merged connections
+    """
+    # Get nodes to keep
+    nodes_to_keep = [n for n in G.nodes if keep_node_lambda(n)]
+    # Create new graph
+    new_graph = G.subgraph(nodes_to_keep).copy()
+    # Find paths between nodes to keep through removed nodes
+    nodes_to_remove = [n for n in G.nodes() if n not in nodes_to_keep]
+    for node_pair in combinations(nodes_to_keep, 2):
+        n1, n2 = node_pair
+        # Check if there's a path through removed nodes
+        try:
+            path = nx.shortest_path(G, n1, n2)
+            # If path exists and goes through nodes we're removing
+            if len(path) > 2 or any(node in nodes_to_remove for node in path[1:-1]):
+                new_graph.add_edge(n1, n2)
+        except nx.NetworkXNoPath:
+            continue
+    return new_graph
+def reinject_common_join_keys_v2(
+    G: ReferenceGraph,
+    final: nx.DiGraph,
+    nodelist: list[str],
+    synonyms: set[str] = set(),
+) -> bool:
+    # when we've discovered a concept join, for each pair of ds nodes
+    # check if they have more keys in common
+    # and inject those to discovery as join conditions
+    def is_ds_node(n: str) -> bool:
+        return n.startswith("ds~")
+    ds_graph = prune_and_merge(final, is_ds_node)
+    injected = False
+    for datasource in ds_graph.nodes:
+        node1 = G.datasources[datasource]
+        neighbors = nx.all_neighbors(ds_graph, datasource)
+        for neighbor in neighbors:
+            node2 = G.datasources[neighbor]
+            common_concepts = set(
+                x.concept.address for x in node1.columns
+            ).intersection(set(x.concept.address for x in node2.columns))
+            concrete_concepts = [
+                x.concept for x in node1.columns if x.concept.address in common_concepts
+            ]
+            reduced = BuildGrain.from_concepts(concrete_concepts).components
+            existing_addresses = set()
+            for concrete in concrete_concepts:
+                cnode = concept_to_node(concrete.with_default_grain())
+                if cnode in final.nodes:
+                    existing_addresses.add(concrete.address)
+                    continue
+            for concrete in concrete_concepts:
+                if concrete.address in synonyms:
+                    continue
+                if concrete.address not in reduced:
+                    continue
+                if concrete.address in existing_addresses:
+                    continue
+                # skip anything that is already in the graph pseudonyms
+                if any(x in concrete.pseudonyms for x in existing_addresses):
+                    continue
+                cnode = concept_to_node(concrete.with_default_grain())
+                final.add_edge(datasource, cnode)
+                final.add_edge(neighbor, cnode)
+                logger.debug(
+                    f"{LOGGER_PREFIX} reinjecting common join key {cnode} to list {nodelist} between {datasource} and {neighbor}, existing {existing_addresses}"
+                )
+                injected = True
+    return injected
+def determine_induced_minimal_nodes(
+    G: ReferenceGraph,
+    nodelist: list[str],
+    environment: BuildEnvironment,
+    filter_downstream: bool,
+    accept_partial: bool = False,
+    synonyms: set[str] = set(),
+) -> nx.DiGraph | None:
+    H: nx.Graph = nx.to_undirected(G).copy()
+    # Add weights to edges based on target node's derivation type
+    for edge in G.edges():
+        _, target = edge
+        target_lookup = G.concepts.get(target)
+        weight = 1  # default weight
+        # If either node is BASIC, set higher weight
+        if target_lookup and target_lookup.derivation == Derivation.BASIC:
+            if (
+                isinstance(target_lookup.lineage, BuildFunction)
+                and target_lookup.lineage.operator == FunctionType.ATTR_ACCESS
+            ):
+                weight = 1
+            else:
+                # raise SyntaxError(target_lookup.lineage.operator)
+                weight = 50
+        H.edges[edge]["weight"] = weight
+    nodes_to_remove = []
+    for node, lookup in G.concepts.items():
+        # inclusion of aggregates can create ambiguous node relation chains
+        # there may be a better way to handle this
+        # can be revisited if we need to connect a derived synonym based on an aggregate
+        if lookup.derivation in (
+            Derivation.CONSTANT,
+            Derivation.AGGREGATE,
+            Derivation.FILTER,
+        ):
+            nodes_to_remove.append(node)
+        # purge a node if we're already looking for all it's parents
+        if filter_downstream and lookup.derivation not in (Derivation.ROOT,):
+            nodes_to_remove.append(node)
+    if nodes_to_remove:
+        # logger.debug(f"Removing nodes {nodes_to_remove} from graph")
+        H.remove_nodes_from(nodes_to_remove)
+    isolates = list(nx.isolates(H))
+    if isolates:
+        # logger.debug(f"Removing isolates {isolates} from graph")
+        H.remove_nodes_from(isolates)
+    zero_out = list(x for x in H.nodes if G.out_degree(x) == 0 and x not in nodelist)
+    while zero_out:
+        logger.debug(f"Removing zero out nodes {zero_out} from graph")
+        H.remove_nodes_from(zero_out)
+        zero_out = list(
+            x for x in H.nodes if G.out_degree(x) == 0 and x not in nodelist
+        )
+    try:
+        # Use weight attribute for Dijkstra pathfinding
+        paths = nx.multi_source_dijkstra_path(H, nodelist, weight="weight")
+        # logger.debug(f"Paths found for {nodelist} {paths}")
+    except nx.exception.NodeNotFound as e:
+        logger.debug(f"Unable to find paths for {nodelist}- {str(e)}")
+        return None
+    path_removals = list(x for x in H.nodes if x not in paths)
+    if path_removals:
+        # logger.debug(f"Removing paths {path_removals} from graph")
+        H.remove_nodes_from(path_removals)
+    # logger.debug(f"Graph after path removal {H.nodes}")
+    sG: nx.Graph = ax.steinertree.steiner_tree(H, nodelist, weight="weight").copy()
+    if not sG.nodes:
+        logger.debug(f"No Steiner tree found for nodes {nodelist}")
+        return None
+    logger.debug(f"Steiner tree found for nodes {nodelist} {sG.nodes}")
+    final: nx.DiGraph = nx.subgraph(G, sG.nodes).copy()
+    for edge in G.edges:
+        if edge[1] in final.nodes and edge[0].startswith("ds~"):
+            ds_name = extract_address(edge[0])
+            ds = environment.datasources[ds_name]
+            concept = environment.canonical_concepts[extract_address(edge[1])]
+            if concept.address in [x.address for x in ds.partial_concepts]:
+                if not accept_partial:
+                    continue
+            final.add_edge(*edge)
+    reinject_common_join_keys_v2(G, final, nodelist, synonyms)
+    # all concept nodes must have a parent
+    if not all(
+        [
+            final.in_degree(node) > 0
+            for node in final.nodes
+            if node.startswith("c~") and node in nodelist
+        ]
+    ):
+        missing = [
+            node
+            for node in final.nodes
+            if node.startswith("c~") and final.in_degree(node) == 0
+        ]
+        logger.debug(f"Skipping graph for {nodelist} as no in_degree {missing}")
+        return None
+    if not all([node in final.nodes for node in nodelist]):
+        missing = [node for node in nodelist if node not in final.nodes]
+        logger.debug(
+            f"Skipping graph for initial list {nodelist} as missing nodes {missing} from final graph {final.nodes}"
+        )
+        return None
+    logger.debug(f"Found final graph {final.nodes}")
+    return final
+def canonicalize_addresses(
+    reduced_concept_set: set[str], environment: BuildEnvironment
+) -> set[str]:
+    """
+    Convert a set of concept addresses to their canonical form.
+    This is necessary to ensure that we can compare concepts correctly,
+    especially when dealing with aliases or pseudonyms.
+    """
+    return set(
+        environment.concepts[x].address if x in environment.concepts else x
+        for x in reduced_concept_set
+    )
+def detect_ambiguity_and_raise(
+    all_concepts: list[BuildConcept],
+    reduced_concept_sets_raw: list[set[str]],
+    environment: BuildEnvironment,
+) -> None:
+    final_candidates: list[set[str]] = []
+    common: set[str] = set()
+    # find all values that show up in every join_additions
+    reduced_concept_sets = [
+        canonicalize_addresses(x, environment) for x in reduced_concept_sets_raw
+    ]
+    for ja in reduced_concept_sets:
+        if not common:
+            common = ja
+        else:
+            common = common.intersection(ja)
+        if all(set(ja).issubset(y) for y in reduced_concept_sets):
+            final_candidates.append(ja)
+    if not final_candidates:
+        filtered_paths = [x.difference(common) for x in reduced_concept_sets]
+        raise AmbiguousRelationshipResolutionException(
+            message=f"Multiple possible concept injections found to resolve {[x.address for x in all_concepts]}, have {' or '.join([str(x) for x in reduced_concept_sets])}: {filtered_paths}",
+            parents=filtered_paths,
+        )
+def has_synonym(concept: BuildConcept, others: list[list[BuildConcept]]) -> bool:
+    return any(
+        c.address in concept.pseudonyms or concept.address in c.pseudonyms
+        for sublist in others
+        for c in sublist
+    )
+def filter_relevant_subgraphs(
+    subgraphs: list[list[BuildConcept]],
+) -> list[list[BuildConcept]]:
+    return [
+        subgraph
+        for subgraph in subgraphs
+        if len(subgraph) > 1
+        or (
+            len(subgraph) == 1
+            and not has_synonym(subgraph[0], [x for x in subgraphs if x != subgraph])
+        )
+    ]
+# 2025-11-18 - removing this as it was causing us to drop
+# partial concept required parents
+# but leaving here for possible future use
+# def filter_duplicate_subgraphs(
+#     subgraphs: list[list[BuildConcept]], environment
+# ) -> list[list[BuildConcept]]:
+#     seen: list[set[str]] = []
+#     for graph in subgraphs:
+#         seen.append(
+#             canonicalize_addresses(set([x.address for x in graph]), environment)
+#         )
+#     final = []
+#     # sometimes w can get two subcomponents that are the same
+#     # due to alias resolution
+#     # if so, drop any that are strict subsets.
+#     for graph in subgraphs:
+#         logger.info(f"{LOGGER_PREFIX} Checking graph {graph} for duplicates in {seen}")
+#         set_x = canonicalize_addresses(set([x.address for x in graph]), environment)
+#         if any([set_x.issubset(y) and set_x != y for y in seen]):
+#             continue
+#         final.append(graph)
+#     return final
+def resolve_weak_components(
+    all_concepts: List[BuildConcept],
+    environment: BuildEnvironment,
+    environment_graph: ReferenceGraph,
+    filter_downstream: bool = True,
+    accept_partial: bool = False,
+    search_conditions: BuildWhereClause | None = None,
+) -> list[list[BuildConcept]] | None:
+    break_flag = False
+    found = []
+    search_graph = environment_graph.copy()
+    prune_sources_for_conditions(
+        search_graph, accept_partial, conditions=search_conditions
+    )
+    reduced_concept_sets: list[set[str]] = []
+    # prune properties
+    # to_remove = []
+    # for node in search_graph.nodes:
+    #     if not node.startswith("c~"):
+    #         continue
+    #     try:
+    #         concept = extract_concept(extract_address(node), environment)
+    #         if concept.purpose == Purpose.PROPERTY and concept.address not in all_concepts:
+    #             to_remove.append(node)
+    #     except Exception as e:
+    #         logger.error(f"Error extracting concept from node {node}: {e}")
+    #     raise ValueError('FIX THIS TO BE MORE PRECISEj,,j')
+    # for node in to_remove:
+    #     search_graph.remove_node(node)
+    count = 0
+    node_list = sorted(
+        [
+            concept_to_node(c.with_default_grain())
+            for c in all_concepts
+            if "__preql_internal" not in c.address
+        ]
+    )
+    synonyms: set[str] = set()
+    for x in all_concepts:
+        synonyms = synonyms.union(x.pseudonyms)
+    # from trilogy.hooks.graph_hook import GraphHook
+    # GraphHook().query_graph_built(search_graph, highlight_nodes=[concept_to_node(c.with_default_grain()) for c in all_concepts if "__preql_internal" not in c.address])
+    # loop through, removing new nodes we find
+    # to ensure there are not ambiguous discovery paths
+    # (if we did not care about raising ambiguity errors, we could just use the first one)
+    while break_flag is not True:
+        count += 1
+        if count > AMBIGUITY_CHECK_LIMIT:
+            break_flag = True
+        try:
+            g = determine_induced_minimal_nodes(
+                search_graph,
+                node_list,
+                filter_downstream=filter_downstream,
+                accept_partial=accept_partial,
+                environment=environment,
+                synonyms=synonyms,
+            )
+            if not g or not g.nodes:
+                break_flag = True
+                continue
+            if not nx.is_weakly_connected(g):
+                break_flag = True
+                continue
+            # from trilogy.hooks.graph_hook import GraphHook
+            # GraphHook().query_graph_built(g, highlight_nodes=[concept_to_node(c.with_default_grain()) for c in all_concepts if "__preql_internal" not in c.address])
+            all_graph_concepts = [
+                extract_concept(extract_address(node), environment)
+                for node in g.nodes
+                if node.startswith("c~")
+            ]
+            new = [x for x in all_graph_concepts if x.address not in all_concepts]
+            if not new:
+                break_flag = True
+            # remove our new nodes for the next search path
+            for n in new:
+                node = concept_to_node(n)
+                if node in search_graph:
+                    search_graph.remove_node(node)
+            # TODO: figure out better place for debugging
+            # from trilogy.hooks.graph_hook import GraphHook
+            # GraphHook().query_graph_built(g, highlight_nodes=[concept_to_node(c.with_default_grain()) for c in all_concepts if "__preql_internal" not in c.address])
+            found.append(g)
+            new_addresses = set([x.address for x in new if x.address not in synonyms])
+            reduced_concept_sets.append(new_addresses)
+        except nx.exception.NetworkXNoPath:
+            break_flag = True
+        if g and not g.nodes:
+            break_flag = True
+    if not found:
+        return None
+    detect_ambiguity_and_raise(all_concepts, reduced_concept_sets, environment)
+    # take our first one as the actual graph
+    g = found[0]
+    subgraphs: list[list[BuildConcept]] = []
+    # components = nx.strongly_connected_components(g)
+    node_list = [x for x in g.nodes if x.startswith("c~")]
+    components = extract_ds_components(g, node_list, environment_graph.pseudonyms)
+    logger.debug(f"Extracted components {components} from {node_list}")
+    for component in components:
+        # we need to take unique again as different addresses may map to the same concept
+        sub_component = unique(
+            # sorting here is required for reproducibility
+            # todo: we should sort in an optimized order
+            [extract_concept(x, environment) for x in sorted(component)],
+            "address",
+        )
+        if not sub_component:
+            continue
+        subgraphs.append(sub_component)
+    return subgraphs
+def subgraphs_to_merge_node(
+    concept_subgraphs: list[list[BuildConcept]],
+    depth: int,
+    all_concepts: List[BuildConcept],
+    environment,
+    g,
+    source_concepts,
+    history,
+    conditions,
+    output_concepts: List[BuildConcept],
+    search_conditions: BuildWhereClause | None = None,
+    enable_early_exit: bool = True,
+):
+    parents: List[StrategyNode] = []
+    logger.info(
+        f"{padding(depth)}{LOGGER_PREFIX} fetching subgraphs {[[c.address for c in subgraph] for subgraph in concept_subgraphs]}"
+    )
+    for graph in concept_subgraphs:
+        logger.info(
+            f"{padding(depth)}{LOGGER_PREFIX} fetching subgraph {[c.address for c in graph]}"
+        )
+        parent: StrategyNode | None = source_concepts(
+            mandatory_list=graph,
+            environment=environment,
+            g=g,
+            depth=depth + 1,
+            history=history,
+            # conditions=search_conditions,
+        )
+        if not parent:
+            logger.info(
+                f"{padding(depth)}{LOGGER_PREFIX} Unable to instantiate target subgraph"
+            )
+            return None
+        logger.info(
+            f"{padding(depth)}{LOGGER_PREFIX} finished subgraph fetch for {[c.address for c in graph]}, have parent {type(parent)} w/ {[c.address for c in parent.output_concepts]}"
+        )
+        parents.append(parent)
+    input_c = []
+    output_c = []
+    for x in parents:
+        for y in x.usable_outputs:
+            input_c.append(y)
+            if y in output_concepts:
+                output_c.append(y)
+            elif any(y.address in c.pseudonyms for c in output_concepts) or any(
+                c.address in y.pseudonyms for c in output_concepts
+            ):
+                output_c.append(y)
+    if len(parents) == 1 and enable_early_exit:
+        logger.info(
+            f"{padding(depth)}{LOGGER_PREFIX} only one parent node, exiting early w/ {[c.address for c in parents[0].output_concepts]}"
+        )
+        parent = parents[0]
+        return parent
+    rval = MergeNode(
+        input_concepts=unique(input_c, "address"),
+        output_concepts=output_concepts,
+        environment=environment,
+        parents=parents,
+        depth=depth,
+        # hidden_concepts=[]
+        # conditions=conditions,
+        # conditions=search_conditions.conditional,
+        # preexisting_conditions=search_conditions.conditional,
+        # node_joins=[]
+    )
+    return rval
+def gen_merge_node(
+    all_concepts: List[BuildConcept],
+    g: nx.DiGraph,
+    environment: BuildEnvironment,
+    depth: int,
+    source_concepts,
+    accept_partial: bool = False,
+    history: History | None = None,
+    conditions: BuildConditional | None = None,
+    search_conditions: BuildWhereClause | None = None,
+) -> Optional[MergeNode]:
+    # we do not actually APPLY these conditions anywhere
+    # though we could look at doing that as an optimization
+    # it's important to include them so the base discovery loop that was generating
+    # the merge node can then add them automatically
+    # so we should not return a node with preexisting conditions
+    if search_conditions:
+        all_search_concepts = unique(
+            all_concepts + list(search_conditions.row_arguments), "address"
+        )
+    else:
+        all_search_concepts = all_concepts
+    all_search_concepts = sorted(all_search_concepts, key=lambda x: x.address)
+    break_set = set([x.address for x in all_search_concepts])
+    for filter_downstream in [True, False]:
+        weak_resolve = resolve_weak_components(
+            all_search_concepts,
+            environment,
+            g,
+            filter_downstream=filter_downstream,
+            accept_partial=accept_partial,
+            search_conditions=search_conditions,
+        )
+        if not weak_resolve:
+            logger.info(
+                f"{padding(depth)}{LOGGER_PREFIX} wasn't able to resolve graph through intermediate concept injection with accept_partial {accept_partial}, filter_downstream {filter_downstream}"
+            )
+            continue
+        log_graph = [[y.address for y in x] for x in weak_resolve]
+        logger.info(
+            f"{padding(depth)}{LOGGER_PREFIX} Was able to resolve graph through weak component resolution - final graph {log_graph}"
+        )
+        for flat in log_graph:
+            if set(flat) == break_set:
+                logger.info(
+                    f"{padding(depth)}{LOGGER_PREFIX} expanded concept resolution was identical to search resolution; breaking to avoid recursion error."
+                )
+                return None
+        return subgraphs_to_merge_node(
+            weak_resolve,
+            depth=depth,
+            all_concepts=all_search_concepts,
+            environment=environment,
+            g=g,
+            source_concepts=source_concepts,
+            history=history,
+            conditions=conditions,
+            search_conditions=search_conditions,
+            output_concepts=all_concepts,
+        )
+    return None