pytrilogy 0.0.3.104__py3-none-any.whl → 0.0.3.106__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pytrilogy might be problematic. Click here for more details.
- {pytrilogy-0.0.3.104.dist-info → pytrilogy-0.0.3.106.dist-info}/METADATA +1 -1
- {pytrilogy-0.0.3.104.dist-info → pytrilogy-0.0.3.106.dist-info}/RECORD +12 -12
- trilogy/__init__.py +1 -1
- trilogy/core/processing/discovery_utility.py +25 -3
- trilogy/core/processing/node_generators/filter_node.py +4 -5
- trilogy/core/processing/node_generators/select_merge_node.py +66 -0
- trilogy/core/processing/node_generators/unnest_node.py +0 -5
- trilogy/core/processing/utility.py +145 -58
- {pytrilogy-0.0.3.104.dist-info → pytrilogy-0.0.3.106.dist-info}/WHEEL +0 -0
- {pytrilogy-0.0.3.104.dist-info → pytrilogy-0.0.3.106.dist-info}/entry_points.txt +0 -0
- {pytrilogy-0.0.3.104.dist-info → pytrilogy-0.0.3.106.dist-info}/licenses/LICENSE.md +0 -0
- {pytrilogy-0.0.3.104.dist-info → pytrilogy-0.0.3.106.dist-info}/top_level.txt +0 -0
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
pytrilogy-0.0.3.
|
|
2
|
-
trilogy/__init__.py,sha256=
|
|
1
|
+
pytrilogy-0.0.3.106.dist-info/licenses/LICENSE.md,sha256=5ZRvtTyCCFwz1THxDTjAu3Lidds9WjPvvzgVwPSYNDo,1042
|
|
2
|
+
trilogy/__init__.py,sha256=6_By-LphYYIXu7GSa5PwnchymrRabR6qiUwPJWX62EE,304
|
|
3
3
|
trilogy/constants.py,sha256=g_zkVCNjGop6coZ1kM8eXXAzCnUN22ldx3TYFz0E9sc,1747
|
|
4
4
|
trilogy/engine.py,sha256=3MiADf5MKcmxqiHBuRqiYdsXiLj7oitDfVvXvHrfjkA,2178
|
|
5
5
|
trilogy/executor.py,sha256=KgCAQhHPT-j0rPkBbALX0f84W9-Q-bkjHayGuavg99w,16490
|
|
@@ -37,26 +37,26 @@ trilogy/core/optimizations/predicate_pushdown.py,sha256=5ubatgq1IwWQ4L2FDt4--y16
|
|
|
37
37
|
trilogy/core/processing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
38
38
|
trilogy/core/processing/concept_strategies_v3.py,sha256=AcMU1d5uCo8I1PFCkBtmcC6iFmM9vN6xSdKxSVMGfpA,23080
|
|
39
39
|
trilogy/core/processing/discovery_node_factory.py,sha256=p23jiiHyhrW-Q8ndbnRlqMHJKT8ZqPOA89SzE4xaFFo,15445
|
|
40
|
-
trilogy/core/processing/discovery_utility.py,sha256=
|
|
40
|
+
trilogy/core/processing/discovery_utility.py,sha256=ZJTTWm34zpR30p-xzElUJCTdx-XT44skKG0-LBfEOg4,12525
|
|
41
41
|
trilogy/core/processing/discovery_validation.py,sha256=eZ4HfHMpqZLI8MGG2jez8arS8THs6ceuVrQFIY6gXrU,5364
|
|
42
42
|
trilogy/core/processing/graph_utils.py,sha256=8QUVrkE9j-9C1AyrCb1nQEh8daCe0u1HuXl-Te85lag,1205
|
|
43
|
-
trilogy/core/processing/utility.py,sha256=
|
|
43
|
+
trilogy/core/processing/utility.py,sha256=ESs6pKqVP2c9eMdfB2JNjw7D7YnoezVwbLFx1D6OUYA,26088
|
|
44
44
|
trilogy/core/processing/node_generators/__init__.py,sha256=iVJ-crowPxYeut-hFjyEjfibKIDq7PfB4LEuDAUCjGY,943
|
|
45
45
|
trilogy/core/processing/node_generators/basic_node.py,sha256=74LoVZXLinRvSzk2LmI1kwza96TnuH3ELoYRIbHB29A,5578
|
|
46
46
|
trilogy/core/processing/node_generators/common.py,sha256=xF32Kf6B08dZgKs2SOow1HomptSiSC057GCUCHFlS5s,9464
|
|
47
47
|
trilogy/core/processing/node_generators/constant_node.py,sha256=LfpDq2WrBRZ3tGsLxw77LuigKfhbteWWh9L8BGdMGwk,1146
|
|
48
|
-
trilogy/core/processing/node_generators/filter_node.py,sha256=
|
|
48
|
+
trilogy/core/processing/node_generators/filter_node.py,sha256=cJ5od1fAfvalaUDO2O4Y6Yrr2RukOCqey7f3zrKSBbI,10808
|
|
49
49
|
trilogy/core/processing/node_generators/group_node.py,sha256=NdK1rl6Ze94XFWtgeC2dlRiL4pS3lh1ArKGPEltLtnw,8525
|
|
50
50
|
trilogy/core/processing/node_generators/group_to_node.py,sha256=jKcNCDOY6fNblrdZwaRU0sbUSr9H0moQbAxrGgX6iGA,3832
|
|
51
51
|
trilogy/core/processing/node_generators/multiselect_node.py,sha256=a505AEixjsjp5jI8Ng3H5KF_AaehkS6HfRfTef64l_o,7063
|
|
52
52
|
trilogy/core/processing/node_generators/node_merge_node.py,sha256=hNcZxnDLTZyYJWfojg769zH9HB9PfZfESmpN1lcHWXg,23172
|
|
53
53
|
trilogy/core/processing/node_generators/recursive_node.py,sha256=l5zdh0dURKwmAy8kK4OpMtZfyUEQRk6N-PwSWIyBpSM,2468
|
|
54
54
|
trilogy/core/processing/node_generators/rowset_node.py,sha256=MuVNIexXhqGONho_mewqMOwaYXNUnjjvyPvk_RDGNYE,5943
|
|
55
|
-
trilogy/core/processing/node_generators/select_merge_node.py,sha256=
|
|
55
|
+
trilogy/core/processing/node_generators/select_merge_node.py,sha256=ORF9H7A-yT2wzQZYVex2asmm7_y0b2_lP6U0e48asNA,25290
|
|
56
56
|
trilogy/core/processing/node_generators/select_node.py,sha256=Ta1G39V94gjX_AgyZDz9OqnwLz4BjY3D6Drx9YpziMQ,3555
|
|
57
57
|
trilogy/core/processing/node_generators/synonym_node.py,sha256=AnAsa_Wj50NJ_IK0HSgab_7klYmKVrv0WI1uUe-GvEY,3766
|
|
58
58
|
trilogy/core/processing/node_generators/union_node.py,sha256=NxQbnRRoYMI4WjMeph41yk4E6yipj53qdGuNt-Mozxw,2818
|
|
59
|
-
trilogy/core/processing/node_generators/unnest_node.py,sha256=
|
|
59
|
+
trilogy/core/processing/node_generators/unnest_node.py,sha256=u_hVHFYMz-ZylDdHH9mhFSRpxuKcTGvrrOP0rxrY_Xg,3901
|
|
60
60
|
trilogy/core/processing/node_generators/window_node.py,sha256=A90linr4pkZtTNfn9k2YNLqrJ_SFII3lbHxB-BC6mI8,6688
|
|
61
61
|
trilogy/core/processing/node_generators/select_helpers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
62
62
|
trilogy/core/processing/node_generators/select_helpers/datasource_injection.py,sha256=m2YQ4OmG0N2O61a7NEq1ZzbTa7JsCC00lxB2ymjcYRI,8224
|
|
@@ -119,8 +119,8 @@ trilogy/std/money.preql,sha256=XWwvAV3WxBsHX9zfptoYRnBigcfYwrYtBHXTME0xJuQ,2082
|
|
|
119
119
|
trilogy/std/net.preql,sha256=WZCuvH87_rZntZiuGJMmBDMVKkdhTtxeHOkrXNwJ1EE,416
|
|
120
120
|
trilogy/std/ranking.preql,sha256=LDoZrYyz4g3xsII9XwXfmstZD-_92i1Eox1UqkBIfi8,83
|
|
121
121
|
trilogy/std/report.preql,sha256=LbV-XlHdfw0jgnQ8pV7acG95xrd1-p65fVpiIc-S7W4,202
|
|
122
|
-
pytrilogy-0.0.3.
|
|
123
|
-
pytrilogy-0.0.3.
|
|
124
|
-
pytrilogy-0.0.3.
|
|
125
|
-
pytrilogy-0.0.3.
|
|
126
|
-
pytrilogy-0.0.3.
|
|
122
|
+
pytrilogy-0.0.3.106.dist-info/METADATA,sha256=NM64Zgq3r16YOeN1tu1QI2sM-NyoLm86gnecFfRGQVs,11839
|
|
123
|
+
pytrilogy-0.0.3.106.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
124
|
+
pytrilogy-0.0.3.106.dist-info/entry_points.txt,sha256=ewBPU2vLnVexZVnB-NrVj-p3E-4vukg83Zk8A55Wp2w,56
|
|
125
|
+
pytrilogy-0.0.3.106.dist-info/top_level.txt,sha256=cAy__NW_eMAa_yT9UnUNlZLFfxcg6eimUAZ184cdNiE,8
|
|
126
|
+
pytrilogy-0.0.3.106.dist-info/RECORD,,
|
trilogy/__init__.py
CHANGED
|
@@ -27,18 +27,22 @@ def calculate_effective_parent_grain(
|
|
|
27
27
|
) -> BuildGrain:
|
|
28
28
|
# calculate the effective grain of the parent node
|
|
29
29
|
# this is the union of all parent grains
|
|
30
|
-
if isinstance(node,
|
|
30
|
+
if isinstance(node, QueryDatasource):
|
|
31
31
|
grain = BuildGrain()
|
|
32
|
-
qds = node
|
|
32
|
+
qds = node
|
|
33
33
|
if not qds.joins:
|
|
34
34
|
return qds.datasources[0].grain
|
|
35
|
+
seen = set()
|
|
35
36
|
for join in qds.joins:
|
|
36
37
|
if isinstance(join, UnnestJoin):
|
|
38
|
+
grain += BuildGrain(components=set([x.address for x in join.concepts]))
|
|
37
39
|
continue
|
|
38
40
|
pairs = join.concept_pairs or []
|
|
39
41
|
for key in pairs:
|
|
40
42
|
left = key.existing_datasource
|
|
43
|
+
logger.info(f"adding left grain {left.grain} for join key {key.left}")
|
|
41
44
|
grain += left.grain
|
|
45
|
+
seen.add(left.name)
|
|
42
46
|
keys = [key.right for key in pairs]
|
|
43
47
|
join_grain = BuildGrain.from_concepts(keys)
|
|
44
48
|
if join_grain == join.right_datasource.grain:
|
|
@@ -48,6 +52,24 @@ def calculate_effective_parent_grain(
|
|
|
48
52
|
f"join changes grain, adding {join.right_datasource.grain} to {grain}"
|
|
49
53
|
)
|
|
50
54
|
grain += join.right_datasource.grain
|
|
55
|
+
seen.add(join.right_datasource.name)
|
|
56
|
+
for x in qds.datasources:
|
|
57
|
+
# if we haven't seen it, it's still contributing to grain
|
|
58
|
+
# unless used ONLY in a subselect
|
|
59
|
+
# so the existence check is a [bad] proxy for that
|
|
60
|
+
if x.name not in seen and not (
|
|
61
|
+
qds.condition
|
|
62
|
+
and qds.condition.existence_arguments
|
|
63
|
+
and any(
|
|
64
|
+
[
|
|
65
|
+
c.address in block
|
|
66
|
+
for c in x.output_concepts
|
|
67
|
+
for block in qds.condition.existence_arguments
|
|
68
|
+
]
|
|
69
|
+
)
|
|
70
|
+
):
|
|
71
|
+
logger.info(f"adding unjoined grain {x.grain} for datasource {x.name}")
|
|
72
|
+
grain += x.grain
|
|
51
73
|
return grain
|
|
52
74
|
else:
|
|
53
75
|
return node.grain or BuildGrain()
|
|
@@ -75,7 +97,7 @@ def check_if_group_required(
|
|
|
75
97
|
if comp_grain.issubset(target_grain):
|
|
76
98
|
|
|
77
99
|
logger.info(
|
|
78
|
-
f"{padding}{LOGGER_PREFIX} Group requirement check:
|
|
100
|
+
f"{padding}{LOGGER_PREFIX} Group requirement check: {comp_grain}, target: {target_grain}, grain is subset of target, no group node required"
|
|
79
101
|
)
|
|
80
102
|
return GroupRequiredResponse(target_grain, comp_grain, False)
|
|
81
103
|
# find out what extra is in the comp grain vs target grain
|
|
@@ -4,7 +4,6 @@ from trilogy.constants import logger
|
|
|
4
4
|
from trilogy.core.models.build import (
|
|
5
5
|
BuildConcept,
|
|
6
6
|
BuildFilterItem,
|
|
7
|
-
BuildGrain,
|
|
8
7
|
BuildWhereClause,
|
|
9
8
|
)
|
|
10
9
|
from trilogy.core.models.build_environment import BuildEnvironment
|
|
@@ -238,10 +237,10 @@ def gen_filter_node(
|
|
|
238
237
|
if not parent.preexisting_conditions == where.conditional:
|
|
239
238
|
parent.add_condition(where.conditional)
|
|
240
239
|
parent.add_existence_concepts(flattened_existence, False)
|
|
241
|
-
parent.grain = BuildGrain.from_concepts(
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
)
|
|
240
|
+
# parent.grain = BuildGrain.from_concepts(
|
|
241
|
+
# parent.output_concepts,
|
|
242
|
+
# environment=environment,
|
|
243
|
+
# )
|
|
245
244
|
parent.rebuild_cache()
|
|
246
245
|
filter_node = parent
|
|
247
246
|
else:
|
|
@@ -224,6 +224,72 @@ def create_pruned_concept_graph(
|
|
|
224
224
|
return g
|
|
225
225
|
|
|
226
226
|
|
|
227
|
+
# def deduplicate_nodes(subgraph: nx.DiGraph, nodes: list[str], partial_map: dict[str, list[str]], depth: int) -> list[str]:
|
|
228
|
+
# """
|
|
229
|
+
# Remove duplicate datasource nodes that are connected to the same concepts
|
|
230
|
+
# and have the same partial state, keeping the one with the most unique concepts.
|
|
231
|
+
|
|
232
|
+
# Args:
|
|
233
|
+
# subgraph: NetworkX DiGraph containing the nodes and edges
|
|
234
|
+
# nodes: List of node names to deduplicate
|
|
235
|
+
# partial_map: Map of datasource to partial nodes
|
|
236
|
+
|
|
237
|
+
# Returns:
|
|
238
|
+
# List of deduplicated node names
|
|
239
|
+
# """
|
|
240
|
+
# # Filter for datasource nodes only
|
|
241
|
+
# ds_nodes = [node for node in nodes if node.startswith("ds~")]
|
|
242
|
+
# non_ds_nodes = [node for node in nodes if not node.startswith("ds~")]
|
|
243
|
+
|
|
244
|
+
# if len(ds_nodes) <= 1:
|
|
245
|
+
# return nodes # No deduplication needed
|
|
246
|
+
|
|
247
|
+
# # Build a map of each datasource to its connected concepts and partial state
|
|
248
|
+
# ds_info = {}
|
|
249
|
+
|
|
250
|
+
# for ds_node in ds_nodes:
|
|
251
|
+
# # Get connected concept nodes (nodes starting with "c~")
|
|
252
|
+
# connected_concepts = set()
|
|
253
|
+
# for neighbor in subgraph.neighbors(ds_node):
|
|
254
|
+
# if neighbor.startswith("c~"):
|
|
255
|
+
# connected_concepts.add(neighbor)
|
|
256
|
+
|
|
257
|
+
# # Get partial state for this datasource
|
|
258
|
+
# partial_state = tuple(sorted(partial_map.get(ds_node, [])))
|
|
259
|
+
|
|
260
|
+
# ds_info[ds_node] = {
|
|
261
|
+
# 'concepts': connected_concepts,
|
|
262
|
+
# 'partial_state': partial_state
|
|
263
|
+
# }
|
|
264
|
+
|
|
265
|
+
# # Find datasources to remove (those that are subsets of others)
|
|
266
|
+
# nodes_to_remove = set()
|
|
267
|
+
# logger.info('LOOK HERE')
|
|
268
|
+
# logger.info(ds_info)
|
|
269
|
+
# for ds_a, info_a in ds_info.items():
|
|
270
|
+
# for ds_b, info_b in ds_info.items():
|
|
271
|
+
# if ds_a != ds_b and ds_a not in nodes_to_remove:
|
|
272
|
+
# # Check if ds_a is a subset of ds_b (same partial state and concepts are subset)
|
|
273
|
+
# if (info_a['partial_state'] == info_b['partial_state'] and
|
|
274
|
+
# info_a['concepts'].issubset(info_b['concepts']) and
|
|
275
|
+
# len(info_a['concepts']) < len(info_b['concepts'])):
|
|
276
|
+
# # ds_a connects to fewer concepts than ds_b, so remove ds_a
|
|
277
|
+
# nodes_to_remove.add(ds_a)
|
|
278
|
+
# elif (info_a['partial_state'] == info_b['partial_state'] and
|
|
279
|
+
# info_a['concepts'] == info_b['concepts']):
|
|
280
|
+
# # Exact same concepts and partial state - keep one arbitrarily
|
|
281
|
+
# # (keep the lexicographically smaller one for consistency)
|
|
282
|
+
# if ds_a > ds_b:
|
|
283
|
+
# nodes_to_remove.add(ds_a)
|
|
284
|
+
|
|
285
|
+
# # Keep datasource nodes that weren't marked for removal
|
|
286
|
+
# logger.info(f"{padding(depth)}{LOGGER_PREFIX} Removing duplicate datasource nodes: {nodes_to_remove}")
|
|
287
|
+
# deduplicated_ds_nodes = [ds for ds in ds_nodes if ds not in nodes_to_remove]
|
|
288
|
+
|
|
289
|
+
# # Return deduplicated datasource nodes plus all non-datasource nodes
|
|
290
|
+
# return deduplicated_ds_nodes + non_ds_nodes
|
|
291
|
+
|
|
292
|
+
|
|
227
293
|
def resolve_subgraphs(
|
|
228
294
|
g: ReferenceGraph,
|
|
229
295
|
relevant: list[BuildConcept],
|
|
@@ -4,7 +4,6 @@ from trilogy.constants import logger
|
|
|
4
4
|
from trilogy.core.models.build import (
|
|
5
5
|
BuildConcept,
|
|
6
6
|
BuildFunction,
|
|
7
|
-
BuildGrain,
|
|
8
7
|
BuildWhereClause,
|
|
9
8
|
)
|
|
10
9
|
from trilogy.core.models.build_environment import BuildEnvironment
|
|
@@ -104,10 +103,6 @@ def gen_unnest_node(
|
|
|
104
103
|
preexisting_conditions=(
|
|
105
104
|
conditional if conditional and local_conditions is False else None
|
|
106
105
|
),
|
|
107
|
-
grain=BuildGrain.from_concepts(
|
|
108
|
-
concepts=base.output_concepts,
|
|
109
|
-
environment=environment,
|
|
110
|
-
),
|
|
111
106
|
)
|
|
112
107
|
# qds = new.resolve()
|
|
113
108
|
# assert qds.source_map[concept.address] == {base.resolve()}
|
|
@@ -90,13 +90,86 @@ class GroupRequiredResponse:
|
|
|
90
90
|
required: bool
|
|
91
91
|
|
|
92
92
|
|
|
93
|
+
def find_all_connecting_concepts(g: nx.Graph, ds1: str, ds2: str) -> set[str]:
|
|
94
|
+
"""Find all concepts that connect two datasources"""
|
|
95
|
+
concepts1 = set(g.neighbors(ds1))
|
|
96
|
+
concepts2 = set(g.neighbors(ds2))
|
|
97
|
+
return concepts1 & concepts2
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def get_connection_keys(
|
|
101
|
+
all_connections: dict[tuple[str, str], set[str]], left: str, right: str
|
|
102
|
+
) -> set[str]:
|
|
103
|
+
"""Get all concepts that connect two datasources"""
|
|
104
|
+
lookup = sorted([left, right])
|
|
105
|
+
key: tuple[str, str] = (lookup[0], lookup[1])
|
|
106
|
+
return all_connections.get(key, set())
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def get_join_type(
|
|
110
|
+
left: str,
|
|
111
|
+
right: str,
|
|
112
|
+
partials: dict[str, list[str]],
|
|
113
|
+
nullables: dict[str, list[str]],
|
|
114
|
+
all_connecting_keys: set[str],
|
|
115
|
+
) -> JoinType:
|
|
116
|
+
left_is_partial = any(key in partials.get(left, []) for key in all_connecting_keys)
|
|
117
|
+
left_is_nullable = any(
|
|
118
|
+
key in nullables.get(left, []) for key in all_connecting_keys
|
|
119
|
+
)
|
|
120
|
+
right_is_partial = any(
|
|
121
|
+
key in partials.get(right, []) for key in all_connecting_keys
|
|
122
|
+
)
|
|
123
|
+
right_is_nullable = any(
|
|
124
|
+
key in nullables.get(right, []) for key in all_connecting_keys
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
if left_is_nullable and right_is_nullable:
|
|
128
|
+
join_type = JoinType.FULL
|
|
129
|
+
elif left_is_partial and right_is_partial:
|
|
130
|
+
join_type = JoinType.FULL
|
|
131
|
+
elif left_is_partial:
|
|
132
|
+
join_type = JoinType.FULL
|
|
133
|
+
elif right_is_nullable:
|
|
134
|
+
join_type = JoinType.RIGHT_OUTER
|
|
135
|
+
elif right_is_partial or left_is_nullable:
|
|
136
|
+
join_type = JoinType.LEFT_OUTER
|
|
137
|
+
# we can't inner join if the left was an outer join
|
|
138
|
+
else:
|
|
139
|
+
join_type = JoinType.INNER
|
|
140
|
+
return join_type
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def reduce_join_types(join_types: Set[JoinType]) -> JoinType:
|
|
144
|
+
final_join_type = JoinType.INNER
|
|
145
|
+
if any([x == JoinType.FULL for x in join_types]):
|
|
146
|
+
final_join_type = JoinType.FULL
|
|
147
|
+
elif any([x == JoinType.LEFT_OUTER for x in join_types]):
|
|
148
|
+
final_join_type = JoinType.LEFT_OUTER
|
|
149
|
+
elif any([x == JoinType.RIGHT_OUTER for x in join_types]):
|
|
150
|
+
final_join_type = JoinType.RIGHT_OUTER
|
|
151
|
+
|
|
152
|
+
return final_join_type
|
|
153
|
+
|
|
154
|
+
|
|
93
155
|
def resolve_join_order_v2(
|
|
94
156
|
g: nx.Graph, partials: dict[str, list[str]], nullables: dict[str, list[str]]
|
|
95
157
|
) -> list[JoinOrderOutput]:
|
|
96
158
|
datasources = [x for x in g.nodes if x.startswith("ds~")]
|
|
97
159
|
concepts = [x for x in g.nodes if x.startswith("c~")]
|
|
98
160
|
|
|
161
|
+
# Pre-compute all possible connections between datasources
|
|
162
|
+
all_connections: dict[tuple[str, str], set[str]] = {}
|
|
163
|
+
for i, ds1 in enumerate(datasources):
|
|
164
|
+
for ds2 in datasources[i + 1 :]:
|
|
165
|
+
connecting_concepts = find_all_connecting_concepts(g, ds1, ds2)
|
|
166
|
+
if connecting_concepts:
|
|
167
|
+
key = tuple(sorted([ds1, ds2]))
|
|
168
|
+
all_connections[key] = connecting_concepts
|
|
169
|
+
|
|
99
170
|
output: list[JoinOrderOutput] = []
|
|
171
|
+
|
|
172
|
+
# create our map of pivots, or common join concepts
|
|
100
173
|
pivot_map = {
|
|
101
174
|
concept: [x for x in g.neighbors(concept) if x in datasources]
|
|
102
175
|
for concept in concepts
|
|
@@ -108,8 +181,9 @@ def resolve_join_order_v2(
|
|
|
108
181
|
)
|
|
109
182
|
)
|
|
110
183
|
solo = [x for x in pivot_map if len(pivot_map[x]) == 1]
|
|
111
|
-
eligible_left = set()
|
|
184
|
+
eligible_left: set[str] = set()
|
|
112
185
|
|
|
186
|
+
# while we have pivots, keep joining them in
|
|
113
187
|
while pivots:
|
|
114
188
|
next_pivots = [
|
|
115
189
|
x for x in pivots if any(y in eligible_left for y in pivot_map[x])
|
|
@@ -120,7 +194,7 @@ def resolve_join_order_v2(
|
|
|
120
194
|
else:
|
|
121
195
|
root = pivots.pop(0)
|
|
122
196
|
|
|
123
|
-
# sort so less partials is last and eligible lefts are
|
|
197
|
+
# sort so less partials is last and eligible lefts are first
|
|
124
198
|
def score_key(x: str) -> tuple[int, int, str]:
|
|
125
199
|
base = 1
|
|
126
200
|
# if it's left, higher weight
|
|
@@ -133,79 +207,56 @@ def resolve_join_order_v2(
|
|
|
133
207
|
base -= 1
|
|
134
208
|
return (base, len(x), x)
|
|
135
209
|
|
|
136
|
-
# get
|
|
210
|
+
# get remaining un-joined datasets
|
|
137
211
|
to_join = sorted(
|
|
138
212
|
[x for x in pivot_map[root] if x not in eligible_left], key=score_key
|
|
139
213
|
)
|
|
140
214
|
while to_join:
|
|
141
215
|
# need to sort this to ensure we join on the best match
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
)
|
|
216
|
+
# but check ALL left in case there are non-pivt keys to join on
|
|
217
|
+
base = sorted([x for x in eligible_left], key=score_key)
|
|
145
218
|
if not base:
|
|
146
219
|
new = to_join.pop()
|
|
147
220
|
eligible_left.add(new)
|
|
148
221
|
base = [new]
|
|
149
222
|
right = to_join.pop()
|
|
150
223
|
# we already joined it
|
|
151
|
-
# this could happen if the same pivot is shared with multiple
|
|
224
|
+
# this could happen if the same pivot is shared with multiple DSes
|
|
152
225
|
if right in eligible_left:
|
|
153
226
|
continue
|
|
227
|
+
|
|
154
228
|
joinkeys: dict[str, set[str]] = {}
|
|
155
229
|
# sorting puts the best candidate last for pop
|
|
156
230
|
# so iterate over the reversed list
|
|
157
231
|
join_types = set()
|
|
232
|
+
|
|
158
233
|
for left_candidate in reversed(base):
|
|
159
|
-
|
|
234
|
+
# Get all concepts that connect these two datasources
|
|
235
|
+
all_connecting_keys = get_connection_keys(
|
|
236
|
+
all_connections, left_candidate, right
|
|
237
|
+
)
|
|
160
238
|
|
|
161
|
-
if not
|
|
239
|
+
if not all_connecting_keys:
|
|
162
240
|
continue
|
|
241
|
+
|
|
242
|
+
# Check if we already have this exact set of keys
|
|
163
243
|
exists = False
|
|
164
244
|
for _, v in joinkeys.items():
|
|
165
|
-
if v ==
|
|
245
|
+
if v == all_connecting_keys:
|
|
166
246
|
exists = True
|
|
167
247
|
if exists:
|
|
168
248
|
continue
|
|
169
|
-
left_is_partial = any(
|
|
170
|
-
key in partials.get(left_candidate, []) for key in common
|
|
171
|
-
)
|
|
172
|
-
left_is_nullable = any(
|
|
173
|
-
key in nullables.get(left_candidate, []) for key in common
|
|
174
|
-
)
|
|
175
|
-
right_is_partial = any(key in partials.get(right, []) for key in common)
|
|
176
|
-
# we don't care if left is nullable for join type (just keys), but if we did
|
|
177
|
-
# left_is_nullable = any(
|
|
178
|
-
# key in nullables.get(left_candidate, []) for key in common
|
|
179
|
-
# )
|
|
180
|
-
right_is_nullable = any(
|
|
181
|
-
key in nullables.get(right, []) for key in common
|
|
182
|
-
)
|
|
183
|
-
if left_is_nullable and right_is_nullable:
|
|
184
|
-
join_type = JoinType.FULL
|
|
185
|
-
elif left_is_partial and right_is_partial:
|
|
186
|
-
join_type = JoinType.FULL
|
|
187
|
-
elif left_is_partial:
|
|
188
|
-
join_type = JoinType.FULL
|
|
189
|
-
elif right_is_nullable:
|
|
190
|
-
join_type = JoinType.RIGHT_OUTER
|
|
191
|
-
elif right_is_partial or left_is_nullable:
|
|
192
|
-
join_type = JoinType.LEFT_OUTER
|
|
193
|
-
# we can't inner join if the left was an outer join
|
|
194
|
-
else:
|
|
195
|
-
join_type = JoinType.INNER
|
|
196
249
|
|
|
250
|
+
join_type = get_join_type(
|
|
251
|
+
left_candidate, right, partials, nullables, all_connecting_keys
|
|
252
|
+
)
|
|
197
253
|
join_types.add(join_type)
|
|
198
|
-
joinkeys[left_candidate] =
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
elif any([x == JoinType.LEFT_OUTER for x in join_types]):
|
|
203
|
-
final_join_type = JoinType.LEFT_OUTER
|
|
204
|
-
elif any([x == JoinType.RIGHT_OUTER for x in join_types]):
|
|
205
|
-
final_join_type = JoinType.RIGHT_OUTER
|
|
254
|
+
joinkeys[left_candidate] = all_connecting_keys
|
|
255
|
+
|
|
256
|
+
final_join_type = reduce_join_types(join_types)
|
|
257
|
+
|
|
206
258
|
output.append(
|
|
207
259
|
JoinOrderOutput(
|
|
208
|
-
# left=left_candidate,
|
|
209
260
|
right=right,
|
|
210
261
|
type=final_join_type,
|
|
211
262
|
keys=joinkeys,
|
|
@@ -216,7 +267,6 @@ def resolve_join_order_v2(
|
|
|
216
267
|
for concept in solo:
|
|
217
268
|
for ds in pivot_map[concept]:
|
|
218
269
|
# if we already have it, skip it
|
|
219
|
-
|
|
220
270
|
if ds in eligible_left:
|
|
221
271
|
continue
|
|
222
272
|
# if we haven't had ANY left datasources yet
|
|
@@ -224,17 +274,39 @@ def resolve_join_order_v2(
|
|
|
224
274
|
if not eligible_left:
|
|
225
275
|
eligible_left.add(ds)
|
|
226
276
|
continue
|
|
227
|
-
# otherwise do a full
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
277
|
+
# otherwise do a full outer join
|
|
278
|
+
# Try to find if there are any connecting keys with existing left tables
|
|
279
|
+
best_left = None
|
|
280
|
+
best_keys: set[str] = set()
|
|
281
|
+
for existing_left in eligible_left:
|
|
282
|
+
connecting_keys = get_connection_keys(
|
|
283
|
+
all_connections, existing_left, ds
|
|
284
|
+
)
|
|
285
|
+
if connecting_keys and len(connecting_keys) > len(best_keys):
|
|
286
|
+
best_left = existing_left
|
|
287
|
+
best_keys = connecting_keys
|
|
288
|
+
|
|
289
|
+
if best_left and best_keys:
|
|
290
|
+
output.append(
|
|
291
|
+
JoinOrderOutput(
|
|
292
|
+
left=best_left,
|
|
293
|
+
right=ds,
|
|
294
|
+
type=JoinType.FULL,
|
|
295
|
+
keys={best_left: best_keys},
|
|
296
|
+
)
|
|
297
|
+
)
|
|
298
|
+
else:
|
|
299
|
+
output.append(
|
|
300
|
+
JoinOrderOutput(
|
|
301
|
+
# pick random one to be left
|
|
302
|
+
left=list(eligible_left)[0],
|
|
303
|
+
right=ds,
|
|
304
|
+
type=JoinType.FULL,
|
|
305
|
+
keys={},
|
|
306
|
+
)
|
|
235
307
|
)
|
|
236
|
-
)
|
|
237
308
|
eligible_left.add(ds)
|
|
309
|
+
|
|
238
310
|
# only once we have all joins
|
|
239
311
|
# do we know if some inners need to be left outers
|
|
240
312
|
for review_join in output:
|
|
@@ -248,6 +320,7 @@ def resolve_join_order_v2(
|
|
|
248
320
|
]
|
|
249
321
|
):
|
|
250
322
|
review_join.type = JoinType.LEFT_OUTER
|
|
323
|
+
|
|
251
324
|
return output
|
|
252
325
|
|
|
253
326
|
|
|
@@ -352,7 +425,9 @@ def resolve_instantiated_concept(
|
|
|
352
425
|
)
|
|
353
426
|
|
|
354
427
|
|
|
355
|
-
def reduce_concept_pairs(
|
|
428
|
+
def reduce_concept_pairs(
|
|
429
|
+
input: list[ConceptPair], right_source: QueryDatasource | BuildDatasource
|
|
430
|
+
) -> list[ConceptPair]:
|
|
356
431
|
left_keys = set()
|
|
357
432
|
right_keys = set()
|
|
358
433
|
for pair in input:
|
|
@@ -361,7 +436,10 @@ def reduce_concept_pairs(input: list[ConceptPair]) -> list[ConceptPair]:
|
|
|
361
436
|
if pair.right.purpose == Purpose.KEY:
|
|
362
437
|
right_keys.add(pair.right.address)
|
|
363
438
|
final: list[ConceptPair] = []
|
|
439
|
+
seen_right_keys = set()
|
|
364
440
|
for pair in input:
|
|
441
|
+
if pair.right.address in seen_right_keys:
|
|
442
|
+
continue
|
|
365
443
|
if (
|
|
366
444
|
pair.left.purpose == Purpose.PROPERTY
|
|
367
445
|
and pair.left.keys
|
|
@@ -374,7 +452,15 @@ def reduce_concept_pairs(input: list[ConceptPair]) -> list[ConceptPair]:
|
|
|
374
452
|
and pair.right.keys.issubset(right_keys)
|
|
375
453
|
):
|
|
376
454
|
continue
|
|
455
|
+
|
|
456
|
+
seen_right_keys.add(pair.right.address)
|
|
377
457
|
final.append(pair)
|
|
458
|
+
all_keys = set([x.right.address for x in final])
|
|
459
|
+
if right_source.grain.components and right_source.grain.components.issubset(
|
|
460
|
+
all_keys
|
|
461
|
+
):
|
|
462
|
+
return [x for x in final if x.right.address in right_source.grain.components]
|
|
463
|
+
|
|
378
464
|
return final
|
|
379
465
|
|
|
380
466
|
|
|
@@ -443,7 +529,8 @@ def get_node_joins(
|
|
|
443
529
|
)
|
|
444
530
|
for k, v in j.keys.items()
|
|
445
531
|
for concept in v
|
|
446
|
-
]
|
|
532
|
+
],
|
|
533
|
+
ds_node_map[j.right],
|
|
447
534
|
),
|
|
448
535
|
)
|
|
449
536
|
for j in joins
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|