pytrilogy 0.0.3.105__py3-none-any.whl → 0.0.3.107__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pytrilogy might be problematic. Click here for more details.
- {pytrilogy-0.0.3.105.dist-info → pytrilogy-0.0.3.107.dist-info}/METADATA +1 -1
- {pytrilogy-0.0.3.105.dist-info → pytrilogy-0.0.3.107.dist-info}/RECORD +15 -15
- trilogy/__init__.py +1 -1
- trilogy/core/processing/concept_strategies_v3.py +12 -13
- trilogy/core/processing/discovery_utility.py +22 -3
- trilogy/core/processing/node_generators/group_node.py +1 -0
- trilogy/core/processing/node_generators/select_merge_node.py +66 -0
- trilogy/core/processing/node_generators/window_node.py +7 -5
- trilogy/core/processing/nodes/group_node.py +7 -0
- trilogy/core/processing/utility.py +145 -58
- trilogy/parsing/render.py +16 -1
- {pytrilogy-0.0.3.105.dist-info → pytrilogy-0.0.3.107.dist-info}/WHEEL +0 -0
- {pytrilogy-0.0.3.105.dist-info → pytrilogy-0.0.3.107.dist-info}/entry_points.txt +0 -0
- {pytrilogy-0.0.3.105.dist-info → pytrilogy-0.0.3.107.dist-info}/licenses/LICENSE.md +0 -0
- {pytrilogy-0.0.3.105.dist-info → pytrilogy-0.0.3.107.dist-info}/top_level.txt +0 -0
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
pytrilogy-0.0.3.
|
|
2
|
-
trilogy/__init__.py,sha256=
|
|
1
|
+
pytrilogy-0.0.3.107.dist-info/licenses/LICENSE.md,sha256=5ZRvtTyCCFwz1THxDTjAu3Lidds9WjPvvzgVwPSYNDo,1042
|
|
2
|
+
trilogy/__init__.py,sha256=TOWJnXypdm58AYp22QN4LJVjkesmn8ojjYSy-Hf9K9Q,304
|
|
3
3
|
trilogy/constants.py,sha256=g_zkVCNjGop6coZ1kM8eXXAzCnUN22ldx3TYFz0E9sc,1747
|
|
4
4
|
trilogy/engine.py,sha256=3MiADf5MKcmxqiHBuRqiYdsXiLj7oitDfVvXvHrfjkA,2178
|
|
5
5
|
trilogy/executor.py,sha256=KgCAQhHPT-j0rPkBbALX0f84W9-Q-bkjHayGuavg99w,16490
|
|
@@ -35,35 +35,35 @@ trilogy/core/optimizations/hide_unused_concept.py,sha256=DbsP8NqQOxmPv9omDOoFNPU
|
|
|
35
35
|
trilogy/core/optimizations/inline_datasource.py,sha256=2sWNRpoRInnTgo9wExVT_r9RfLAQHI57reEV5cGHUcg,4329
|
|
36
36
|
trilogy/core/optimizations/predicate_pushdown.py,sha256=5ubatgq1IwWQ4L2FDt4--y168YLuGP-vwqH0m8IeTIw,9786
|
|
37
37
|
trilogy/core/processing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
38
|
-
trilogy/core/processing/concept_strategies_v3.py,sha256
|
|
38
|
+
trilogy/core/processing/concept_strategies_v3.py,sha256=-iC2CLALmSrOglMKZM4TslVncyOrJbUH0V_COmbqHIw,22681
|
|
39
39
|
trilogy/core/processing/discovery_node_factory.py,sha256=p23jiiHyhrW-Q8ndbnRlqMHJKT8ZqPOA89SzE4xaFFo,15445
|
|
40
|
-
trilogy/core/processing/discovery_utility.py,sha256=
|
|
40
|
+
trilogy/core/processing/discovery_utility.py,sha256=KTWArF3zK2P2UfZxY1Y_L0-4SCx0UZlfp-VcjpSkhMI,13191
|
|
41
41
|
trilogy/core/processing/discovery_validation.py,sha256=eZ4HfHMpqZLI8MGG2jez8arS8THs6ceuVrQFIY6gXrU,5364
|
|
42
42
|
trilogy/core/processing/graph_utils.py,sha256=8QUVrkE9j-9C1AyrCb1nQEh8daCe0u1HuXl-Te85lag,1205
|
|
43
|
-
trilogy/core/processing/utility.py,sha256=
|
|
43
|
+
trilogy/core/processing/utility.py,sha256=ESs6pKqVP2c9eMdfB2JNjw7D7YnoezVwbLFx1D6OUYA,26088
|
|
44
44
|
trilogy/core/processing/node_generators/__init__.py,sha256=iVJ-crowPxYeut-hFjyEjfibKIDq7PfB4LEuDAUCjGY,943
|
|
45
45
|
trilogy/core/processing/node_generators/basic_node.py,sha256=74LoVZXLinRvSzk2LmI1kwza96TnuH3ELoYRIbHB29A,5578
|
|
46
46
|
trilogy/core/processing/node_generators/common.py,sha256=xF32Kf6B08dZgKs2SOow1HomptSiSC057GCUCHFlS5s,9464
|
|
47
47
|
trilogy/core/processing/node_generators/constant_node.py,sha256=LfpDq2WrBRZ3tGsLxw77LuigKfhbteWWh9L8BGdMGwk,1146
|
|
48
48
|
trilogy/core/processing/node_generators/filter_node.py,sha256=cJ5od1fAfvalaUDO2O4Y6Yrr2RukOCqey7f3zrKSBbI,10808
|
|
49
|
-
trilogy/core/processing/node_generators/group_node.py,sha256=
|
|
49
|
+
trilogy/core/processing/node_generators/group_node.py,sha256=sIm1QYrF4EY6sk56A48B6MieCZqvaJLSQebih_aiKnQ,8567
|
|
50
50
|
trilogy/core/processing/node_generators/group_to_node.py,sha256=jKcNCDOY6fNblrdZwaRU0sbUSr9H0moQbAxrGgX6iGA,3832
|
|
51
51
|
trilogy/core/processing/node_generators/multiselect_node.py,sha256=a505AEixjsjp5jI8Ng3H5KF_AaehkS6HfRfTef64l_o,7063
|
|
52
52
|
trilogy/core/processing/node_generators/node_merge_node.py,sha256=hNcZxnDLTZyYJWfojg769zH9HB9PfZfESmpN1lcHWXg,23172
|
|
53
53
|
trilogy/core/processing/node_generators/recursive_node.py,sha256=l5zdh0dURKwmAy8kK4OpMtZfyUEQRk6N-PwSWIyBpSM,2468
|
|
54
54
|
trilogy/core/processing/node_generators/rowset_node.py,sha256=MuVNIexXhqGONho_mewqMOwaYXNUnjjvyPvk_RDGNYE,5943
|
|
55
|
-
trilogy/core/processing/node_generators/select_merge_node.py,sha256=
|
|
55
|
+
trilogy/core/processing/node_generators/select_merge_node.py,sha256=ORF9H7A-yT2wzQZYVex2asmm7_y0b2_lP6U0e48asNA,25290
|
|
56
56
|
trilogy/core/processing/node_generators/select_node.py,sha256=Ta1G39V94gjX_AgyZDz9OqnwLz4BjY3D6Drx9YpziMQ,3555
|
|
57
57
|
trilogy/core/processing/node_generators/synonym_node.py,sha256=AnAsa_Wj50NJ_IK0HSgab_7klYmKVrv0WI1uUe-GvEY,3766
|
|
58
58
|
trilogy/core/processing/node_generators/union_node.py,sha256=NxQbnRRoYMI4WjMeph41yk4E6yipj53qdGuNt-Mozxw,2818
|
|
59
59
|
trilogy/core/processing/node_generators/unnest_node.py,sha256=u_hVHFYMz-ZylDdHH9mhFSRpxuKcTGvrrOP0rxrY_Xg,3901
|
|
60
|
-
trilogy/core/processing/node_generators/window_node.py,sha256=
|
|
60
|
+
trilogy/core/processing/node_generators/window_node.py,sha256=wNvmumGO6AIQ7C9bDUYYZ6LJvDvPQPfFVX82pTxjV-k,6767
|
|
61
61
|
trilogy/core/processing/node_generators/select_helpers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
62
62
|
trilogy/core/processing/node_generators/select_helpers/datasource_injection.py,sha256=m2YQ4OmG0N2O61a7NEq1ZzbTa7JsCC00lxB2ymjcYRI,8224
|
|
63
63
|
trilogy/core/processing/nodes/__init__.py,sha256=zTge1EzwzEydlcMliIFO_TT7h7lS8l37lyZuQDir1h0,5487
|
|
64
64
|
trilogy/core/processing/nodes/base_node.py,sha256=6LPQ5zP_dZJ6-k_dmX9ZSLsHaQMHgqiR5DEylpHYGZA,18478
|
|
65
65
|
trilogy/core/processing/nodes/filter_node.py,sha256=5VtRfKbCORx0dV-vQfgy3gOEkmmscL9f31ExvlODwvY,2461
|
|
66
|
-
trilogy/core/processing/nodes/group_node.py,sha256=
|
|
66
|
+
trilogy/core/processing/nodes/group_node.py,sha256=Ku8El9KQvRiTiHCZDS_jX0DjErSDNv7IIQMcd1Gsk7I,7449
|
|
67
67
|
trilogy/core/processing/nodes/merge_node.py,sha256=uc0tlz30Yt9SnCwLhMcWuPVbXLzm3dzy0XqbyirqqTo,16521
|
|
68
68
|
trilogy/core/processing/nodes/recursive_node.py,sha256=k0rizxR8KE64ievfHx_GPfQmU8QAP118Laeyq5BLUOk,1526
|
|
69
69
|
trilogy/core/processing/nodes/select_node_v2.py,sha256=IWyKyNgFlV8A2S1FUTPdIaogg6PzaHh-HmQo6v24sbg,8862
|
|
@@ -105,7 +105,7 @@ trilogy/parsing/config.py,sha256=Z-DaefdKhPDmSXLgg5V4pebhSB0h590vI0_VtHnlukI,111
|
|
|
105
105
|
trilogy/parsing/exceptions.py,sha256=Xwwsv2C9kSNv2q-HrrKC1f60JNHShXcCMzstTSEbiCw,154
|
|
106
106
|
trilogy/parsing/helpers.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
107
107
|
trilogy/parsing/parse_engine.py,sha256=T-3Q4UH256IB6cfX85crScZwZ6gAwslgv0fy3WKBdjc,81930
|
|
108
|
-
trilogy/parsing/render.py,sha256=
|
|
108
|
+
trilogy/parsing/render.py,sha256=FhSU3-bMA0YM3oEn6nfpfjbM74nvH2r1TtFgbWNzOsM,24204
|
|
109
109
|
trilogy/parsing/trilogy.lark,sha256=6eBDD6d4D9N1Nnn4CtmaoB-NpOpjHrEn5oi0JykAlbE,16509
|
|
110
110
|
trilogy/scripts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
111
111
|
trilogy/scripts/trilogy.py,sha256=1L0XrH4mVHRt1C9T1HnaDv2_kYEfbWTb5_-cBBke79w,3774
|
|
@@ -119,8 +119,8 @@ trilogy/std/money.preql,sha256=XWwvAV3WxBsHX9zfptoYRnBigcfYwrYtBHXTME0xJuQ,2082
|
|
|
119
119
|
trilogy/std/net.preql,sha256=WZCuvH87_rZntZiuGJMmBDMVKkdhTtxeHOkrXNwJ1EE,416
|
|
120
120
|
trilogy/std/ranking.preql,sha256=LDoZrYyz4g3xsII9XwXfmstZD-_92i1Eox1UqkBIfi8,83
|
|
121
121
|
trilogy/std/report.preql,sha256=LbV-XlHdfw0jgnQ8pV7acG95xrd1-p65fVpiIc-S7W4,202
|
|
122
|
-
pytrilogy-0.0.3.
|
|
123
|
-
pytrilogy-0.0.3.
|
|
124
|
-
pytrilogy-0.0.3.
|
|
125
|
-
pytrilogy-0.0.3.
|
|
126
|
-
pytrilogy-0.0.3.
|
|
122
|
+
pytrilogy-0.0.3.107.dist-info/METADATA,sha256=WH1n7SdMf8X0M1jUWcT-UNZdT5NGMOANPo_uu9vBdFA,11839
|
|
123
|
+
pytrilogy-0.0.3.107.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
124
|
+
pytrilogy-0.0.3.107.dist-info/entry_points.txt,sha256=ewBPU2vLnVexZVnB-NrVj-p3E-4vukg83Zk8A55Wp2w,56
|
|
125
|
+
pytrilogy-0.0.3.107.dist-info/top_level.txt,sha256=cAy__NW_eMAa_yT9UnUNlZLFfxcg6eimUAZ184cdNiE,8
|
|
126
|
+
pytrilogy-0.0.3.107.dist-info/RECORD,,
|
trilogy/__init__.py
CHANGED
|
@@ -350,9 +350,6 @@ def check_for_early_exit(
|
|
|
350
350
|
def generate_loop_completion(context: LoopContext, virtual: set[str]) -> StrategyNode:
|
|
351
351
|
condition_required = True
|
|
352
352
|
non_virtual = [c for c in context.completion_mandatory if c.address not in virtual]
|
|
353
|
-
non_virtual_output = [
|
|
354
|
-
c for c in context.original_mandatory if c.address not in virtual
|
|
355
|
-
]
|
|
356
353
|
non_virtual_different = len(context.completion_mandatory) != len(
|
|
357
354
|
context.original_mandatory
|
|
358
355
|
)
|
|
@@ -380,11 +377,12 @@ def generate_loop_completion(context: LoopContext, virtual: set[str]) -> Strateg
|
|
|
380
377
|
logger.info(
|
|
381
378
|
f"Condition {context.conditions} not required, parents included filtering! {parent_map}"
|
|
382
379
|
)
|
|
380
|
+
|
|
383
381
|
if len(context.stack) == 1:
|
|
384
382
|
output: StrategyNode = context.stack[0]
|
|
385
383
|
if non_virtual_different:
|
|
386
384
|
logger.info(
|
|
387
|
-
f"{depth_to_prefix(context.depth)}{LOGGER_PREFIX} Found
|
|
385
|
+
f"{depth_to_prefix(context.depth)}{LOGGER_PREFIX} Found added non-virtual output concepts ({non_virtual_difference_values})"
|
|
388
386
|
)
|
|
389
387
|
# output.set_output_concepts(
|
|
390
388
|
# [
|
|
@@ -398,13 +396,6 @@ def generate_loop_completion(context: LoopContext, virtual: set[str]) -> Strateg
|
|
|
398
396
|
# )
|
|
399
397
|
# output.set_output_concepts(context.original_mandatory)
|
|
400
398
|
|
|
401
|
-
# if isinstance(output, MergeNode):
|
|
402
|
-
# output.force_group = True
|
|
403
|
-
# output.rebuild_cache()
|
|
404
|
-
|
|
405
|
-
logger.info(
|
|
406
|
-
f"{depth_to_prefix(context.depth)}{LOGGER_PREFIX} Source stack has single node, returning that {type(output)}"
|
|
407
|
-
)
|
|
408
399
|
else:
|
|
409
400
|
logger.info(
|
|
410
401
|
f"{depth_to_prefix(context.depth)}{LOGGER_PREFIX} wrapping multiple parent nodes {[type(x) for x in context.stack]} in merge node"
|
|
@@ -441,10 +432,18 @@ def generate_loop_completion(context: LoopContext, virtual: set[str]) -> Strateg
|
|
|
441
432
|
f"{depth_to_prefix(context.depth)}{LOGGER_PREFIX} Conditions {context.conditions} were injected, checking if we need a group to restore grain"
|
|
442
433
|
)
|
|
443
434
|
return group_if_required_v2(
|
|
444
|
-
output,
|
|
435
|
+
output,
|
|
436
|
+
context.original_mandatory,
|
|
437
|
+
context.environment,
|
|
438
|
+
non_virtual_difference_values,
|
|
445
439
|
)
|
|
446
440
|
|
|
447
|
-
return group_if_required_v2(
|
|
441
|
+
return group_if_required_v2(
|
|
442
|
+
output,
|
|
443
|
+
context.original_mandatory,
|
|
444
|
+
context.environment,
|
|
445
|
+
non_virtual_difference_values,
|
|
446
|
+
)
|
|
448
447
|
|
|
449
448
|
|
|
450
449
|
def _search_concepts(
|
|
@@ -180,8 +180,12 @@ def check_if_group_required(
|
|
|
180
180
|
|
|
181
181
|
|
|
182
182
|
def group_if_required_v2(
|
|
183
|
-
root: StrategyNode,
|
|
183
|
+
root: StrategyNode,
|
|
184
|
+
final: List[BuildConcept],
|
|
185
|
+
environment: BuildEnvironment,
|
|
186
|
+
where_injected: set[str] | None = None,
|
|
184
187
|
):
|
|
188
|
+
where_injected = where_injected or set()
|
|
185
189
|
required = check_if_group_required(
|
|
186
190
|
downstream_concepts=final, parents=[root.resolve()], environment=environment
|
|
187
191
|
)
|
|
@@ -197,8 +201,23 @@ def group_if_required_v2(
|
|
|
197
201
|
root.rebuild_cache()
|
|
198
202
|
return root
|
|
199
203
|
elif isinstance(root, GroupNode):
|
|
200
|
-
|
|
201
|
-
|
|
204
|
+
|
|
205
|
+
if set(x.address for x in final) != set(
|
|
206
|
+
x.address for x in root.output_concepts
|
|
207
|
+
):
|
|
208
|
+
allowed_outputs = [
|
|
209
|
+
x
|
|
210
|
+
for x in root.output_concepts
|
|
211
|
+
if not (
|
|
212
|
+
x.address in where_injected
|
|
213
|
+
and x.address not in (root.required_outputs or set())
|
|
214
|
+
)
|
|
215
|
+
]
|
|
216
|
+
|
|
217
|
+
logger.info(
|
|
218
|
+
f"Adjusting group node outputs to remove injected concepts {where_injected}: remaining {allowed_outputs}"
|
|
219
|
+
)
|
|
220
|
+
root.set_output_concepts(allowed_outputs)
|
|
202
221
|
return root
|
|
203
222
|
return GroupNode(
|
|
204
223
|
output_concepts=targets,
|
|
@@ -224,6 +224,72 @@ def create_pruned_concept_graph(
|
|
|
224
224
|
return g
|
|
225
225
|
|
|
226
226
|
|
|
227
|
+
# def deduplicate_nodes(subgraph: nx.DiGraph, nodes: list[str], partial_map: dict[str, list[str]], depth: int) -> list[str]:
|
|
228
|
+
# """
|
|
229
|
+
# Remove duplicate datasource nodes that are connected to the same concepts
|
|
230
|
+
# and have the same partial state, keeping the one with the most unique concepts.
|
|
231
|
+
|
|
232
|
+
# Args:
|
|
233
|
+
# subgraph: NetworkX DiGraph containing the nodes and edges
|
|
234
|
+
# nodes: List of node names to deduplicate
|
|
235
|
+
# partial_map: Map of datasource to partial nodes
|
|
236
|
+
|
|
237
|
+
# Returns:
|
|
238
|
+
# List of deduplicated node names
|
|
239
|
+
# """
|
|
240
|
+
# # Filter for datasource nodes only
|
|
241
|
+
# ds_nodes = [node for node in nodes if node.startswith("ds~")]
|
|
242
|
+
# non_ds_nodes = [node for node in nodes if not node.startswith("ds~")]
|
|
243
|
+
|
|
244
|
+
# if len(ds_nodes) <= 1:
|
|
245
|
+
# return nodes # No deduplication needed
|
|
246
|
+
|
|
247
|
+
# # Build a map of each datasource to its connected concepts and partial state
|
|
248
|
+
# ds_info = {}
|
|
249
|
+
|
|
250
|
+
# for ds_node in ds_nodes:
|
|
251
|
+
# # Get connected concept nodes (nodes starting with "c~")
|
|
252
|
+
# connected_concepts = set()
|
|
253
|
+
# for neighbor in subgraph.neighbors(ds_node):
|
|
254
|
+
# if neighbor.startswith("c~"):
|
|
255
|
+
# connected_concepts.add(neighbor)
|
|
256
|
+
|
|
257
|
+
# # Get partial state for this datasource
|
|
258
|
+
# partial_state = tuple(sorted(partial_map.get(ds_node, [])))
|
|
259
|
+
|
|
260
|
+
# ds_info[ds_node] = {
|
|
261
|
+
# 'concepts': connected_concepts,
|
|
262
|
+
# 'partial_state': partial_state
|
|
263
|
+
# }
|
|
264
|
+
|
|
265
|
+
# # Find datasources to remove (those that are subsets of others)
|
|
266
|
+
# nodes_to_remove = set()
|
|
267
|
+
# logger.info('LOOK HERE')
|
|
268
|
+
# logger.info(ds_info)
|
|
269
|
+
# for ds_a, info_a in ds_info.items():
|
|
270
|
+
# for ds_b, info_b in ds_info.items():
|
|
271
|
+
# if ds_a != ds_b and ds_a not in nodes_to_remove:
|
|
272
|
+
# # Check if ds_a is a subset of ds_b (same partial state and concepts are subset)
|
|
273
|
+
# if (info_a['partial_state'] == info_b['partial_state'] and
|
|
274
|
+
# info_a['concepts'].issubset(info_b['concepts']) and
|
|
275
|
+
# len(info_a['concepts']) < len(info_b['concepts'])):
|
|
276
|
+
# # ds_a connects to fewer concepts than ds_b, so remove ds_a
|
|
277
|
+
# nodes_to_remove.add(ds_a)
|
|
278
|
+
# elif (info_a['partial_state'] == info_b['partial_state'] and
|
|
279
|
+
# info_a['concepts'] == info_b['concepts']):
|
|
280
|
+
# # Exact same concepts and partial state - keep one arbitrarily
|
|
281
|
+
# # (keep the lexicographically smaller one for consistency)
|
|
282
|
+
# if ds_a > ds_b:
|
|
283
|
+
# nodes_to_remove.add(ds_a)
|
|
284
|
+
|
|
285
|
+
# # Keep datasource nodes that weren't marked for removal
|
|
286
|
+
# logger.info(f"{padding(depth)}{LOGGER_PREFIX} Removing duplicate datasource nodes: {nodes_to_remove}")
|
|
287
|
+
# deduplicated_ds_nodes = [ds for ds in ds_nodes if ds not in nodes_to_remove]
|
|
288
|
+
|
|
289
|
+
# # Return deduplicated datasource nodes plus all non-datasource nodes
|
|
290
|
+
# return deduplicated_ds_nodes + non_ds_nodes
|
|
291
|
+
|
|
292
|
+
|
|
227
293
|
def resolve_subgraphs(
|
|
228
294
|
g: ReferenceGraph,
|
|
229
295
|
relevant: list[BuildConcept],
|
|
@@ -27,7 +27,7 @@ WINDOW_TYPES = (BuildWindowItem,)
|
|
|
27
27
|
|
|
28
28
|
|
|
29
29
|
def resolve_window_parent_concepts(
|
|
30
|
-
concept: BuildConcept, environment: BuildEnvironment
|
|
30
|
+
concept: BuildConcept, environment: BuildEnvironment, depth: int
|
|
31
31
|
) -> tuple[BuildConcept, List[BuildConcept]]:
|
|
32
32
|
if not isinstance(concept.lineage, WINDOW_TYPES):
|
|
33
33
|
raise ValueError
|
|
@@ -39,7 +39,9 @@ def resolve_window_parent_concepts(
|
|
|
39
39
|
base += item.concept_arguments
|
|
40
40
|
if concept.grain:
|
|
41
41
|
for gitem in concept.grain.components:
|
|
42
|
-
logger.info(
|
|
42
|
+
logger.info(
|
|
43
|
+
f"{padding(depth)}{LOGGER_PREFIX} appending grain item {gitem} to base"
|
|
44
|
+
)
|
|
43
45
|
base.append(environment.concepts[gitem])
|
|
44
46
|
return concept.lineage.content, unique(base, "address")
|
|
45
47
|
|
|
@@ -54,7 +56,7 @@ def gen_window_node(
|
|
|
54
56
|
history: History,
|
|
55
57
|
conditions: BuildWhereClause | None = None,
|
|
56
58
|
) -> StrategyNode | None:
|
|
57
|
-
base, parent_concepts = resolve_window_parent_concepts(concept, environment)
|
|
59
|
+
base, parent_concepts = resolve_window_parent_concepts(concept, environment, depth)
|
|
58
60
|
logger.info(
|
|
59
61
|
f"{padding(depth)}{LOGGER_PREFIX} generating window node for {concept} with parents {[x.address for x in parent_concepts]} and optional {local_optional}"
|
|
60
62
|
)
|
|
@@ -62,7 +64,7 @@ def gen_window_node(
|
|
|
62
64
|
x
|
|
63
65
|
for x in local_optional
|
|
64
66
|
if isinstance(x.lineage, WINDOW_TYPES)
|
|
65
|
-
and resolve_window_parent_concepts(x, environment)[1] == parent_concepts
|
|
67
|
+
and resolve_window_parent_concepts(x, environment, depth)[1] == parent_concepts
|
|
66
68
|
]
|
|
67
69
|
|
|
68
70
|
targets = [base]
|
|
@@ -79,7 +81,7 @@ def gen_window_node(
|
|
|
79
81
|
if equivalent_optional:
|
|
80
82
|
for x in equivalent_optional:
|
|
81
83
|
assert isinstance(x.lineage, WINDOW_TYPES)
|
|
82
|
-
base, parents = resolve_window_parent_concepts(x, environment)
|
|
84
|
+
base, parents = resolve_window_parent_concepts(x, environment, depth)
|
|
83
85
|
logger.info(
|
|
84
86
|
f"{padding(depth)}{LOGGER_PREFIX} found equivalent optional {x} with parents {parents}"
|
|
85
87
|
)
|
|
@@ -49,6 +49,7 @@ class GroupNode(StrategyNode):
|
|
|
49
49
|
existence_concepts: List[BuildConcept] | None = None,
|
|
50
50
|
hidden_concepts: set[str] | None = None,
|
|
51
51
|
ordering: BuildOrderBy | None = None,
|
|
52
|
+
required_outputs: List[BuildConcept] | None = None,
|
|
52
53
|
):
|
|
53
54
|
super().__init__(
|
|
54
55
|
input_concepts=input_concepts,
|
|
@@ -66,6 +67,9 @@ class GroupNode(StrategyNode):
|
|
|
66
67
|
hidden_concepts=hidden_concepts,
|
|
67
68
|
ordering=ordering,
|
|
68
69
|
)
|
|
70
|
+
# the set of concepts required to preserve grain
|
|
71
|
+
# set by group by node generation with aggregates
|
|
72
|
+
self.required_outputs = required_outputs
|
|
69
73
|
|
|
70
74
|
@classmethod
|
|
71
75
|
def check_if_required(
|
|
@@ -184,4 +188,7 @@ class GroupNode(StrategyNode):
|
|
|
184
188
|
existence_concepts=list(self.existence_concepts),
|
|
185
189
|
hidden_concepts=set(self.hidden_concepts),
|
|
186
190
|
ordering=self.ordering,
|
|
191
|
+
required_outputs=(
|
|
192
|
+
list(self.required_outputs) if self.required_outputs else None
|
|
193
|
+
),
|
|
187
194
|
)
|
|
@@ -90,13 +90,86 @@ class GroupRequiredResponse:
|
|
|
90
90
|
required: bool
|
|
91
91
|
|
|
92
92
|
|
|
93
|
+
def find_all_connecting_concepts(g: nx.Graph, ds1: str, ds2: str) -> set[str]:
|
|
94
|
+
"""Find all concepts that connect two datasources"""
|
|
95
|
+
concepts1 = set(g.neighbors(ds1))
|
|
96
|
+
concepts2 = set(g.neighbors(ds2))
|
|
97
|
+
return concepts1 & concepts2
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def get_connection_keys(
|
|
101
|
+
all_connections: dict[tuple[str, str], set[str]], left: str, right: str
|
|
102
|
+
) -> set[str]:
|
|
103
|
+
"""Get all concepts that connect two datasources"""
|
|
104
|
+
lookup = sorted([left, right])
|
|
105
|
+
key: tuple[str, str] = (lookup[0], lookup[1])
|
|
106
|
+
return all_connections.get(key, set())
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def get_join_type(
|
|
110
|
+
left: str,
|
|
111
|
+
right: str,
|
|
112
|
+
partials: dict[str, list[str]],
|
|
113
|
+
nullables: dict[str, list[str]],
|
|
114
|
+
all_connecting_keys: set[str],
|
|
115
|
+
) -> JoinType:
|
|
116
|
+
left_is_partial = any(key in partials.get(left, []) for key in all_connecting_keys)
|
|
117
|
+
left_is_nullable = any(
|
|
118
|
+
key in nullables.get(left, []) for key in all_connecting_keys
|
|
119
|
+
)
|
|
120
|
+
right_is_partial = any(
|
|
121
|
+
key in partials.get(right, []) for key in all_connecting_keys
|
|
122
|
+
)
|
|
123
|
+
right_is_nullable = any(
|
|
124
|
+
key in nullables.get(right, []) for key in all_connecting_keys
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
if left_is_nullable and right_is_nullable:
|
|
128
|
+
join_type = JoinType.FULL
|
|
129
|
+
elif left_is_partial and right_is_partial:
|
|
130
|
+
join_type = JoinType.FULL
|
|
131
|
+
elif left_is_partial:
|
|
132
|
+
join_type = JoinType.FULL
|
|
133
|
+
elif right_is_nullable:
|
|
134
|
+
join_type = JoinType.RIGHT_OUTER
|
|
135
|
+
elif right_is_partial or left_is_nullable:
|
|
136
|
+
join_type = JoinType.LEFT_OUTER
|
|
137
|
+
# we can't inner join if the left was an outer join
|
|
138
|
+
else:
|
|
139
|
+
join_type = JoinType.INNER
|
|
140
|
+
return join_type
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def reduce_join_types(join_types: Set[JoinType]) -> JoinType:
|
|
144
|
+
final_join_type = JoinType.INNER
|
|
145
|
+
if any([x == JoinType.FULL for x in join_types]):
|
|
146
|
+
final_join_type = JoinType.FULL
|
|
147
|
+
elif any([x == JoinType.LEFT_OUTER for x in join_types]):
|
|
148
|
+
final_join_type = JoinType.LEFT_OUTER
|
|
149
|
+
elif any([x == JoinType.RIGHT_OUTER for x in join_types]):
|
|
150
|
+
final_join_type = JoinType.RIGHT_OUTER
|
|
151
|
+
|
|
152
|
+
return final_join_type
|
|
153
|
+
|
|
154
|
+
|
|
93
155
|
def resolve_join_order_v2(
|
|
94
156
|
g: nx.Graph, partials: dict[str, list[str]], nullables: dict[str, list[str]]
|
|
95
157
|
) -> list[JoinOrderOutput]:
|
|
96
158
|
datasources = [x for x in g.nodes if x.startswith("ds~")]
|
|
97
159
|
concepts = [x for x in g.nodes if x.startswith("c~")]
|
|
98
160
|
|
|
161
|
+
# Pre-compute all possible connections between datasources
|
|
162
|
+
all_connections: dict[tuple[str, str], set[str]] = {}
|
|
163
|
+
for i, ds1 in enumerate(datasources):
|
|
164
|
+
for ds2 in datasources[i + 1 :]:
|
|
165
|
+
connecting_concepts = find_all_connecting_concepts(g, ds1, ds2)
|
|
166
|
+
if connecting_concepts:
|
|
167
|
+
key = tuple(sorted([ds1, ds2]))
|
|
168
|
+
all_connections[key] = connecting_concepts
|
|
169
|
+
|
|
99
170
|
output: list[JoinOrderOutput] = []
|
|
171
|
+
|
|
172
|
+
# create our map of pivots, or common join concepts
|
|
100
173
|
pivot_map = {
|
|
101
174
|
concept: [x for x in g.neighbors(concept) if x in datasources]
|
|
102
175
|
for concept in concepts
|
|
@@ -108,8 +181,9 @@ def resolve_join_order_v2(
|
|
|
108
181
|
)
|
|
109
182
|
)
|
|
110
183
|
solo = [x for x in pivot_map if len(pivot_map[x]) == 1]
|
|
111
|
-
eligible_left = set()
|
|
184
|
+
eligible_left: set[str] = set()
|
|
112
185
|
|
|
186
|
+
# while we have pivots, keep joining them in
|
|
113
187
|
while pivots:
|
|
114
188
|
next_pivots = [
|
|
115
189
|
x for x in pivots if any(y in eligible_left for y in pivot_map[x])
|
|
@@ -120,7 +194,7 @@ def resolve_join_order_v2(
|
|
|
120
194
|
else:
|
|
121
195
|
root = pivots.pop(0)
|
|
122
196
|
|
|
123
|
-
# sort so less partials is last and eligible lefts are
|
|
197
|
+
# sort so less partials is last and eligible lefts are first
|
|
124
198
|
def score_key(x: str) -> tuple[int, int, str]:
|
|
125
199
|
base = 1
|
|
126
200
|
# if it's left, higher weight
|
|
@@ -133,79 +207,56 @@ def resolve_join_order_v2(
|
|
|
133
207
|
base -= 1
|
|
134
208
|
return (base, len(x), x)
|
|
135
209
|
|
|
136
|
-
# get
|
|
210
|
+
# get remaining un-joined datasets
|
|
137
211
|
to_join = sorted(
|
|
138
212
|
[x for x in pivot_map[root] if x not in eligible_left], key=score_key
|
|
139
213
|
)
|
|
140
214
|
while to_join:
|
|
141
215
|
# need to sort this to ensure we join on the best match
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
)
|
|
216
|
+
# but check ALL left in case there are non-pivt keys to join on
|
|
217
|
+
base = sorted([x for x in eligible_left], key=score_key)
|
|
145
218
|
if not base:
|
|
146
219
|
new = to_join.pop()
|
|
147
220
|
eligible_left.add(new)
|
|
148
221
|
base = [new]
|
|
149
222
|
right = to_join.pop()
|
|
150
223
|
# we already joined it
|
|
151
|
-
# this could happen if the same pivot is shared with multiple
|
|
224
|
+
# this could happen if the same pivot is shared with multiple DSes
|
|
152
225
|
if right in eligible_left:
|
|
153
226
|
continue
|
|
227
|
+
|
|
154
228
|
joinkeys: dict[str, set[str]] = {}
|
|
155
229
|
# sorting puts the best candidate last for pop
|
|
156
230
|
# so iterate over the reversed list
|
|
157
231
|
join_types = set()
|
|
232
|
+
|
|
158
233
|
for left_candidate in reversed(base):
|
|
159
|
-
|
|
234
|
+
# Get all concepts that connect these two datasources
|
|
235
|
+
all_connecting_keys = get_connection_keys(
|
|
236
|
+
all_connections, left_candidate, right
|
|
237
|
+
)
|
|
160
238
|
|
|
161
|
-
if not
|
|
239
|
+
if not all_connecting_keys:
|
|
162
240
|
continue
|
|
241
|
+
|
|
242
|
+
# Check if we already have this exact set of keys
|
|
163
243
|
exists = False
|
|
164
244
|
for _, v in joinkeys.items():
|
|
165
|
-
if v ==
|
|
245
|
+
if v == all_connecting_keys:
|
|
166
246
|
exists = True
|
|
167
247
|
if exists:
|
|
168
248
|
continue
|
|
169
|
-
left_is_partial = any(
|
|
170
|
-
key in partials.get(left_candidate, []) for key in common
|
|
171
|
-
)
|
|
172
|
-
left_is_nullable = any(
|
|
173
|
-
key in nullables.get(left_candidate, []) for key in common
|
|
174
|
-
)
|
|
175
|
-
right_is_partial = any(key in partials.get(right, []) for key in common)
|
|
176
|
-
# we don't care if left is nullable for join type (just keys), but if we did
|
|
177
|
-
# left_is_nullable = any(
|
|
178
|
-
# key in nullables.get(left_candidate, []) for key in common
|
|
179
|
-
# )
|
|
180
|
-
right_is_nullable = any(
|
|
181
|
-
key in nullables.get(right, []) for key in common
|
|
182
|
-
)
|
|
183
|
-
if left_is_nullable and right_is_nullable:
|
|
184
|
-
join_type = JoinType.FULL
|
|
185
|
-
elif left_is_partial and right_is_partial:
|
|
186
|
-
join_type = JoinType.FULL
|
|
187
|
-
elif left_is_partial:
|
|
188
|
-
join_type = JoinType.FULL
|
|
189
|
-
elif right_is_nullable:
|
|
190
|
-
join_type = JoinType.RIGHT_OUTER
|
|
191
|
-
elif right_is_partial or left_is_nullable:
|
|
192
|
-
join_type = JoinType.LEFT_OUTER
|
|
193
|
-
# we can't inner join if the left was an outer join
|
|
194
|
-
else:
|
|
195
|
-
join_type = JoinType.INNER
|
|
196
249
|
|
|
250
|
+
join_type = get_join_type(
|
|
251
|
+
left_candidate, right, partials, nullables, all_connecting_keys
|
|
252
|
+
)
|
|
197
253
|
join_types.add(join_type)
|
|
198
|
-
joinkeys[left_candidate] =
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
elif any([x == JoinType.LEFT_OUTER for x in join_types]):
|
|
203
|
-
final_join_type = JoinType.LEFT_OUTER
|
|
204
|
-
elif any([x == JoinType.RIGHT_OUTER for x in join_types]):
|
|
205
|
-
final_join_type = JoinType.RIGHT_OUTER
|
|
254
|
+
joinkeys[left_candidate] = all_connecting_keys
|
|
255
|
+
|
|
256
|
+
final_join_type = reduce_join_types(join_types)
|
|
257
|
+
|
|
206
258
|
output.append(
|
|
207
259
|
JoinOrderOutput(
|
|
208
|
-
# left=left_candidate,
|
|
209
260
|
right=right,
|
|
210
261
|
type=final_join_type,
|
|
211
262
|
keys=joinkeys,
|
|
@@ -216,7 +267,6 @@ def resolve_join_order_v2(
|
|
|
216
267
|
for concept in solo:
|
|
217
268
|
for ds in pivot_map[concept]:
|
|
218
269
|
# if we already have it, skip it
|
|
219
|
-
|
|
220
270
|
if ds in eligible_left:
|
|
221
271
|
continue
|
|
222
272
|
# if we haven't had ANY left datasources yet
|
|
@@ -224,17 +274,39 @@ def resolve_join_order_v2(
|
|
|
224
274
|
if not eligible_left:
|
|
225
275
|
eligible_left.add(ds)
|
|
226
276
|
continue
|
|
227
|
-
# otherwise do a full
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
277
|
+
# otherwise do a full outer join
|
|
278
|
+
# Try to find if there are any connecting keys with existing left tables
|
|
279
|
+
best_left = None
|
|
280
|
+
best_keys: set[str] = set()
|
|
281
|
+
for existing_left in eligible_left:
|
|
282
|
+
connecting_keys = get_connection_keys(
|
|
283
|
+
all_connections, existing_left, ds
|
|
284
|
+
)
|
|
285
|
+
if connecting_keys and len(connecting_keys) > len(best_keys):
|
|
286
|
+
best_left = existing_left
|
|
287
|
+
best_keys = connecting_keys
|
|
288
|
+
|
|
289
|
+
if best_left and best_keys:
|
|
290
|
+
output.append(
|
|
291
|
+
JoinOrderOutput(
|
|
292
|
+
left=best_left,
|
|
293
|
+
right=ds,
|
|
294
|
+
type=JoinType.FULL,
|
|
295
|
+
keys={best_left: best_keys},
|
|
296
|
+
)
|
|
297
|
+
)
|
|
298
|
+
else:
|
|
299
|
+
output.append(
|
|
300
|
+
JoinOrderOutput(
|
|
301
|
+
# pick random one to be left
|
|
302
|
+
left=list(eligible_left)[0],
|
|
303
|
+
right=ds,
|
|
304
|
+
type=JoinType.FULL,
|
|
305
|
+
keys={},
|
|
306
|
+
)
|
|
235
307
|
)
|
|
236
|
-
)
|
|
237
308
|
eligible_left.add(ds)
|
|
309
|
+
|
|
238
310
|
# only once we have all joins
|
|
239
311
|
# do we know if some inners need to be left outers
|
|
240
312
|
for review_join in output:
|
|
@@ -248,6 +320,7 @@ def resolve_join_order_v2(
|
|
|
248
320
|
]
|
|
249
321
|
):
|
|
250
322
|
review_join.type = JoinType.LEFT_OUTER
|
|
323
|
+
|
|
251
324
|
return output
|
|
252
325
|
|
|
253
326
|
|
|
@@ -352,7 +425,9 @@ def resolve_instantiated_concept(
|
|
|
352
425
|
)
|
|
353
426
|
|
|
354
427
|
|
|
355
|
-
def reduce_concept_pairs(
|
|
428
|
+
def reduce_concept_pairs(
|
|
429
|
+
input: list[ConceptPair], right_source: QueryDatasource | BuildDatasource
|
|
430
|
+
) -> list[ConceptPair]:
|
|
356
431
|
left_keys = set()
|
|
357
432
|
right_keys = set()
|
|
358
433
|
for pair in input:
|
|
@@ -361,7 +436,10 @@ def reduce_concept_pairs(input: list[ConceptPair]) -> list[ConceptPair]:
|
|
|
361
436
|
if pair.right.purpose == Purpose.KEY:
|
|
362
437
|
right_keys.add(pair.right.address)
|
|
363
438
|
final: list[ConceptPair] = []
|
|
439
|
+
seen_right_keys = set()
|
|
364
440
|
for pair in input:
|
|
441
|
+
if pair.right.address in seen_right_keys:
|
|
442
|
+
continue
|
|
365
443
|
if (
|
|
366
444
|
pair.left.purpose == Purpose.PROPERTY
|
|
367
445
|
and pair.left.keys
|
|
@@ -374,7 +452,15 @@ def reduce_concept_pairs(input: list[ConceptPair]) -> list[ConceptPair]:
|
|
|
374
452
|
and pair.right.keys.issubset(right_keys)
|
|
375
453
|
):
|
|
376
454
|
continue
|
|
455
|
+
|
|
456
|
+
seen_right_keys.add(pair.right.address)
|
|
377
457
|
final.append(pair)
|
|
458
|
+
all_keys = set([x.right.address for x in final])
|
|
459
|
+
if right_source.grain.components and right_source.grain.components.issubset(
|
|
460
|
+
all_keys
|
|
461
|
+
):
|
|
462
|
+
return [x for x in final if x.right.address in right_source.grain.components]
|
|
463
|
+
|
|
378
464
|
return final
|
|
379
465
|
|
|
380
466
|
|
|
@@ -443,7 +529,8 @@ def get_node_joins(
|
|
|
443
529
|
)
|
|
444
530
|
for k, v in j.keys.items()
|
|
445
531
|
for concept in v
|
|
446
|
-
]
|
|
532
|
+
],
|
|
533
|
+
ds_node_map[j.right],
|
|
447
534
|
),
|
|
448
535
|
)
|
|
449
536
|
for j in joins
|
trilogy/parsing/render.py
CHANGED
|
@@ -8,7 +8,14 @@ from typing import Any
|
|
|
8
8
|
from jinja2 import Template
|
|
9
9
|
|
|
10
10
|
from trilogy.constants import DEFAULT_NAMESPACE, VIRTUAL_CONCEPT_PREFIX, MagicConstants
|
|
11
|
-
from trilogy.core.enums import
|
|
11
|
+
from trilogy.core.enums import (
|
|
12
|
+
ConceptSource,
|
|
13
|
+
DatePart,
|
|
14
|
+
FunctionType,
|
|
15
|
+
Modifier,
|
|
16
|
+
Purpose,
|
|
17
|
+
ValidationScope,
|
|
18
|
+
)
|
|
12
19
|
from trilogy.core.models.author import (
|
|
13
20
|
AggregateWrapper,
|
|
14
21
|
AlignClause,
|
|
@@ -66,6 +73,7 @@ from trilogy.core.statements.author import (
|
|
|
66
73
|
SelectItem,
|
|
67
74
|
SelectStatement,
|
|
68
75
|
TypeDeclaration,
|
|
76
|
+
ValidateStatement,
|
|
69
77
|
)
|
|
70
78
|
|
|
71
79
|
QUERY_TEMPLATE = Template(
|
|
@@ -445,6 +453,13 @@ class Renderer:
|
|
|
445
453
|
final = "".join(prefixes)
|
|
446
454
|
return f"{final}{self.to_string(arg.content)}"
|
|
447
455
|
|
|
456
|
+
@to_string.register
|
|
457
|
+
def _(self, arg: ValidateStatement):
|
|
458
|
+
targets = ",".join(arg.targets) if arg.targets else "*"
|
|
459
|
+
if arg.scope.value == ValidationScope.ALL:
|
|
460
|
+
return "validate all;"
|
|
461
|
+
return f"validate {arg.scope.value} {targets};"
|
|
462
|
+
|
|
448
463
|
@to_string.register
|
|
449
464
|
def _(self, arg: SelectStatement):
|
|
450
465
|
with self.indented():
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|