pytrilogy 0.3.148__cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (206) hide show
  1. LICENSE.md +19 -0
  2. _preql_import_resolver/__init__.py +5 -0
  3. _preql_import_resolver/_preql_import_resolver.cpython-312-aarch64-linux-gnu.so +0 -0
  4. pytrilogy-0.3.148.dist-info/METADATA +555 -0
  5. pytrilogy-0.3.148.dist-info/RECORD +206 -0
  6. pytrilogy-0.3.148.dist-info/WHEEL +5 -0
  7. pytrilogy-0.3.148.dist-info/entry_points.txt +2 -0
  8. pytrilogy-0.3.148.dist-info/licenses/LICENSE.md +19 -0
  9. trilogy/__init__.py +27 -0
  10. trilogy/ai/README.md +10 -0
  11. trilogy/ai/__init__.py +19 -0
  12. trilogy/ai/constants.py +92 -0
  13. trilogy/ai/conversation.py +107 -0
  14. trilogy/ai/enums.py +7 -0
  15. trilogy/ai/execute.py +50 -0
  16. trilogy/ai/models.py +34 -0
  17. trilogy/ai/prompts.py +100 -0
  18. trilogy/ai/providers/__init__.py +0 -0
  19. trilogy/ai/providers/anthropic.py +106 -0
  20. trilogy/ai/providers/base.py +24 -0
  21. trilogy/ai/providers/google.py +146 -0
  22. trilogy/ai/providers/openai.py +89 -0
  23. trilogy/ai/providers/utils.py +68 -0
  24. trilogy/authoring/README.md +3 -0
  25. trilogy/authoring/__init__.py +148 -0
  26. trilogy/constants.py +119 -0
  27. trilogy/core/README.md +52 -0
  28. trilogy/core/__init__.py +0 -0
  29. trilogy/core/constants.py +6 -0
  30. trilogy/core/enums.py +454 -0
  31. trilogy/core/env_processor.py +239 -0
  32. trilogy/core/environment_helpers.py +320 -0
  33. trilogy/core/ergonomics.py +193 -0
  34. trilogy/core/exceptions.py +123 -0
  35. trilogy/core/functions.py +1240 -0
  36. trilogy/core/graph_models.py +142 -0
  37. trilogy/core/internal.py +85 -0
  38. trilogy/core/models/__init__.py +0 -0
  39. trilogy/core/models/author.py +2662 -0
  40. trilogy/core/models/build.py +2603 -0
  41. trilogy/core/models/build_environment.py +165 -0
  42. trilogy/core/models/core.py +506 -0
  43. trilogy/core/models/datasource.py +434 -0
  44. trilogy/core/models/environment.py +756 -0
  45. trilogy/core/models/execute.py +1213 -0
  46. trilogy/core/optimization.py +251 -0
  47. trilogy/core/optimizations/__init__.py +12 -0
  48. trilogy/core/optimizations/base_optimization.py +17 -0
  49. trilogy/core/optimizations/hide_unused_concept.py +47 -0
  50. trilogy/core/optimizations/inline_datasource.py +102 -0
  51. trilogy/core/optimizations/predicate_pushdown.py +245 -0
  52. trilogy/core/processing/README.md +94 -0
  53. trilogy/core/processing/READMEv2.md +121 -0
  54. trilogy/core/processing/VIRTUAL_UNNEST.md +30 -0
  55. trilogy/core/processing/__init__.py +0 -0
  56. trilogy/core/processing/concept_strategies_v3.py +508 -0
  57. trilogy/core/processing/constants.py +15 -0
  58. trilogy/core/processing/discovery_node_factory.py +451 -0
  59. trilogy/core/processing/discovery_utility.py +548 -0
  60. trilogy/core/processing/discovery_validation.py +167 -0
  61. trilogy/core/processing/graph_utils.py +43 -0
  62. trilogy/core/processing/node_generators/README.md +9 -0
  63. trilogy/core/processing/node_generators/__init__.py +31 -0
  64. trilogy/core/processing/node_generators/basic_node.py +160 -0
  65. trilogy/core/processing/node_generators/common.py +270 -0
  66. trilogy/core/processing/node_generators/constant_node.py +38 -0
  67. trilogy/core/processing/node_generators/filter_node.py +315 -0
  68. trilogy/core/processing/node_generators/group_node.py +213 -0
  69. trilogy/core/processing/node_generators/group_to_node.py +117 -0
  70. trilogy/core/processing/node_generators/multiselect_node.py +207 -0
  71. trilogy/core/processing/node_generators/node_merge_node.py +695 -0
  72. trilogy/core/processing/node_generators/recursive_node.py +88 -0
  73. trilogy/core/processing/node_generators/rowset_node.py +165 -0
  74. trilogy/core/processing/node_generators/select_helpers/__init__.py +0 -0
  75. trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +261 -0
  76. trilogy/core/processing/node_generators/select_merge_node.py +786 -0
  77. trilogy/core/processing/node_generators/select_node.py +95 -0
  78. trilogy/core/processing/node_generators/synonym_node.py +98 -0
  79. trilogy/core/processing/node_generators/union_node.py +91 -0
  80. trilogy/core/processing/node_generators/unnest_node.py +182 -0
  81. trilogy/core/processing/node_generators/window_node.py +201 -0
  82. trilogy/core/processing/nodes/README.md +28 -0
  83. trilogy/core/processing/nodes/__init__.py +179 -0
  84. trilogy/core/processing/nodes/base_node.py +522 -0
  85. trilogy/core/processing/nodes/filter_node.py +75 -0
  86. trilogy/core/processing/nodes/group_node.py +194 -0
  87. trilogy/core/processing/nodes/merge_node.py +420 -0
  88. trilogy/core/processing/nodes/recursive_node.py +46 -0
  89. trilogy/core/processing/nodes/select_node_v2.py +242 -0
  90. trilogy/core/processing/nodes/union_node.py +53 -0
  91. trilogy/core/processing/nodes/unnest_node.py +62 -0
  92. trilogy/core/processing/nodes/window_node.py +56 -0
  93. trilogy/core/processing/utility.py +823 -0
  94. trilogy/core/query_processor.py +604 -0
  95. trilogy/core/statements/README.md +35 -0
  96. trilogy/core/statements/__init__.py +0 -0
  97. trilogy/core/statements/author.py +536 -0
  98. trilogy/core/statements/build.py +0 -0
  99. trilogy/core/statements/common.py +20 -0
  100. trilogy/core/statements/execute.py +155 -0
  101. trilogy/core/table_processor.py +66 -0
  102. trilogy/core/utility.py +8 -0
  103. trilogy/core/validation/README.md +46 -0
  104. trilogy/core/validation/__init__.py +0 -0
  105. trilogy/core/validation/common.py +161 -0
  106. trilogy/core/validation/concept.py +146 -0
  107. trilogy/core/validation/datasource.py +227 -0
  108. trilogy/core/validation/environment.py +73 -0
  109. trilogy/core/validation/fix.py +256 -0
  110. trilogy/dialect/__init__.py +32 -0
  111. trilogy/dialect/base.py +1431 -0
  112. trilogy/dialect/bigquery.py +314 -0
  113. trilogy/dialect/common.py +147 -0
  114. trilogy/dialect/config.py +159 -0
  115. trilogy/dialect/dataframe.py +50 -0
  116. trilogy/dialect/duckdb.py +376 -0
  117. trilogy/dialect/enums.py +149 -0
  118. trilogy/dialect/metadata.py +173 -0
  119. trilogy/dialect/mock.py +190 -0
  120. trilogy/dialect/postgres.py +117 -0
  121. trilogy/dialect/presto.py +110 -0
  122. trilogy/dialect/results.py +89 -0
  123. trilogy/dialect/snowflake.py +129 -0
  124. trilogy/dialect/sql_server.py +137 -0
  125. trilogy/engine.py +48 -0
  126. trilogy/execution/__init__.py +17 -0
  127. trilogy/execution/config.py +119 -0
  128. trilogy/execution/state/__init__.py +0 -0
  129. trilogy/execution/state/file_state_store.py +0 -0
  130. trilogy/execution/state/sqllite_state_store.py +0 -0
  131. trilogy/execution/state/state_store.py +301 -0
  132. trilogy/executor.py +656 -0
  133. trilogy/hooks/__init__.py +4 -0
  134. trilogy/hooks/base_hook.py +40 -0
  135. trilogy/hooks/graph_hook.py +135 -0
  136. trilogy/hooks/query_debugger.py +166 -0
  137. trilogy/metadata/__init__.py +0 -0
  138. trilogy/parser.py +10 -0
  139. trilogy/parsing/README.md +21 -0
  140. trilogy/parsing/__init__.py +0 -0
  141. trilogy/parsing/common.py +1069 -0
  142. trilogy/parsing/config.py +5 -0
  143. trilogy/parsing/exceptions.py +8 -0
  144. trilogy/parsing/helpers.py +1 -0
  145. trilogy/parsing/parse_engine.py +2863 -0
  146. trilogy/parsing/render.py +773 -0
  147. trilogy/parsing/trilogy.lark +544 -0
  148. trilogy/py.typed +0 -0
  149. trilogy/render.py +45 -0
  150. trilogy/scripts/README.md +9 -0
  151. trilogy/scripts/__init__.py +0 -0
  152. trilogy/scripts/agent.py +41 -0
  153. trilogy/scripts/agent_info.py +306 -0
  154. trilogy/scripts/common.py +430 -0
  155. trilogy/scripts/dependency/Cargo.lock +617 -0
  156. trilogy/scripts/dependency/Cargo.toml +39 -0
  157. trilogy/scripts/dependency/README.md +131 -0
  158. trilogy/scripts/dependency/build.sh +25 -0
  159. trilogy/scripts/dependency/src/directory_resolver.rs +387 -0
  160. trilogy/scripts/dependency/src/lib.rs +16 -0
  161. trilogy/scripts/dependency/src/main.rs +770 -0
  162. trilogy/scripts/dependency/src/parser.rs +435 -0
  163. trilogy/scripts/dependency/src/preql.pest +208 -0
  164. trilogy/scripts/dependency/src/python_bindings.rs +311 -0
  165. trilogy/scripts/dependency/src/resolver.rs +716 -0
  166. trilogy/scripts/dependency/tests/base.preql +3 -0
  167. trilogy/scripts/dependency/tests/cli_integration.rs +377 -0
  168. trilogy/scripts/dependency/tests/customer.preql +6 -0
  169. trilogy/scripts/dependency/tests/main.preql +9 -0
  170. trilogy/scripts/dependency/tests/orders.preql +7 -0
  171. trilogy/scripts/dependency/tests/test_data/base.preql +9 -0
  172. trilogy/scripts/dependency/tests/test_data/consumer.preql +1 -0
  173. trilogy/scripts/dependency.py +323 -0
  174. trilogy/scripts/display.py +555 -0
  175. trilogy/scripts/environment.py +59 -0
  176. trilogy/scripts/fmt.py +32 -0
  177. trilogy/scripts/ingest.py +472 -0
  178. trilogy/scripts/ingest_helpers/__init__.py +1 -0
  179. trilogy/scripts/ingest_helpers/foreign_keys.py +123 -0
  180. trilogy/scripts/ingest_helpers/formatting.py +93 -0
  181. trilogy/scripts/ingest_helpers/typing.py +161 -0
  182. trilogy/scripts/init.py +105 -0
  183. trilogy/scripts/parallel_execution.py +748 -0
  184. trilogy/scripts/plan.py +189 -0
  185. trilogy/scripts/refresh.py +106 -0
  186. trilogy/scripts/run.py +79 -0
  187. trilogy/scripts/serve.py +202 -0
  188. trilogy/scripts/serve_helpers/__init__.py +41 -0
  189. trilogy/scripts/serve_helpers/file_discovery.py +142 -0
  190. trilogy/scripts/serve_helpers/index_generation.py +206 -0
  191. trilogy/scripts/serve_helpers/models.py +38 -0
  192. trilogy/scripts/single_execution.py +131 -0
  193. trilogy/scripts/testing.py +129 -0
  194. trilogy/scripts/trilogy.py +75 -0
  195. trilogy/std/__init__.py +0 -0
  196. trilogy/std/color.preql +3 -0
  197. trilogy/std/date.preql +13 -0
  198. trilogy/std/display.preql +18 -0
  199. trilogy/std/geography.preql +22 -0
  200. trilogy/std/metric.preql +15 -0
  201. trilogy/std/money.preql +67 -0
  202. trilogy/std/net.preql +14 -0
  203. trilogy/std/ranking.preql +7 -0
  204. trilogy/std/report.preql +5 -0
  205. trilogy/std/semantic.preql +6 -0
  206. trilogy/utility.py +34 -0
@@ -0,0 +1,75 @@
1
+ from typing import List
2
+
3
+ from trilogy.core.enums import (
4
+ SourceType,
5
+ )
6
+ from trilogy.core.models.build import (
7
+ BuildComparison,
8
+ BuildConcept,
9
+ BuildConditional,
10
+ BuildGrain,
11
+ BuildParenthetical,
12
+ )
13
+ from trilogy.core.processing.nodes.base_node import StrategyNode
14
+
15
+
16
+ class FilterNode(StrategyNode):
17
+ """Filter nodes represent a restriction operation
18
+ on a concept that creates a new derived concept.
19
+
20
+ They should only output a concept and it's filtered
21
+ version, but will have parents that provide all required
22
+ filtering keys as inputs.
23
+ """
24
+
25
+ source_type = SourceType.FILTER
26
+
27
+ def __init__(
28
+ self,
29
+ input_concepts: List[BuildConcept],
30
+ output_concepts: List[BuildConcept],
31
+ environment,
32
+ whole_grain: bool = False,
33
+ parents: List["StrategyNode"] | None = None,
34
+ depth: int = 0,
35
+ conditions: (
36
+ BuildConditional | BuildComparison | BuildParenthetical | None
37
+ ) = None,
38
+ preexisting_conditions: (
39
+ BuildConditional | BuildComparison | BuildParenthetical | None
40
+ ) = None,
41
+ partial_concepts: List[BuildConcept] | None = None,
42
+ force_group: bool | None = False,
43
+ grain: BuildGrain | None = None,
44
+ existence_concepts: List[BuildConcept] | None = None,
45
+ ):
46
+ super().__init__(
47
+ output_concepts=output_concepts,
48
+ environment=environment,
49
+ whole_grain=whole_grain,
50
+ parents=parents,
51
+ depth=depth,
52
+ input_concepts=input_concepts,
53
+ conditions=conditions,
54
+ preexisting_conditions=preexisting_conditions,
55
+ partial_concepts=partial_concepts,
56
+ force_group=force_group,
57
+ grain=grain,
58
+ existence_concepts=existence_concepts,
59
+ )
60
+
61
+ def copy(self) -> "FilterNode":
62
+ return FilterNode(
63
+ input_concepts=list(self.input_concepts),
64
+ output_concepts=list(self.output_concepts),
65
+ environment=self.environment,
66
+ whole_grain=self.whole_grain,
67
+ parents=self.parents,
68
+ depth=self.depth,
69
+ conditions=self.conditions,
70
+ preexisting_conditions=self.preexisting_conditions,
71
+ partial_concepts=list(self.partial_concepts),
72
+ force_group=self.force_group,
73
+ grain=self.grain,
74
+ existence_concepts=list(self.existence_concepts),
75
+ )
@@ -0,0 +1,194 @@
1
+ from typing import List, Optional
2
+
3
+ from trilogy.constants import logger
4
+ from trilogy.core.enums import SourceType
5
+ from trilogy.core.models.build import (
6
+ BuildComparison,
7
+ BuildConcept,
8
+ BuildConditional,
9
+ BuildDatasource,
10
+ BuildOrderBy,
11
+ BuildParenthetical,
12
+ )
13
+ from trilogy.core.models.build_environment import BuildEnvironment
14
+ from trilogy.core.models.execute import QueryDatasource
15
+ from trilogy.core.processing.nodes.base_node import (
16
+ StrategyNode,
17
+ resolve_concept_map,
18
+ )
19
+ from trilogy.core.processing.utility import (
20
+ GroupRequiredResponse,
21
+ find_nullable_concepts,
22
+ is_scalar_condition,
23
+ )
24
+ from trilogy.utility import unique
25
+
26
+ LOGGER_PREFIX = "[CONCEPT DETAIL - GROUP NODE]"
27
+
28
+
29
+ class GroupNode(StrategyNode):
30
+ source_type = SourceType.GROUP
31
+
32
+ def __init__(
33
+ self,
34
+ output_concepts: List[BuildConcept],
35
+ input_concepts: List[BuildConcept],
36
+ environment: BuildEnvironment,
37
+ whole_grain: bool = False,
38
+ parents: List["StrategyNode"] | None = None,
39
+ depth: int = 0,
40
+ partial_concepts: Optional[List[BuildConcept]] = None,
41
+ nullable_concepts: Optional[List[BuildConcept]] = None,
42
+ force_group: bool | None = None,
43
+ conditions: (
44
+ BuildConditional | BuildComparison | BuildParenthetical | None
45
+ ) = None,
46
+ preexisting_conditions: (
47
+ BuildConditional | BuildComparison | BuildParenthetical | None
48
+ ) = None,
49
+ existence_concepts: List[BuildConcept] | None = None,
50
+ hidden_concepts: set[str] | None = None,
51
+ ordering: BuildOrderBy | None = None,
52
+ required_outputs: List[BuildConcept] | None = None,
53
+ ):
54
+ super().__init__(
55
+ input_concepts=input_concepts,
56
+ output_concepts=output_concepts,
57
+ environment=environment,
58
+ whole_grain=whole_grain,
59
+ parents=parents,
60
+ depth=depth,
61
+ partial_concepts=partial_concepts,
62
+ nullable_concepts=nullable_concepts,
63
+ force_group=force_group,
64
+ conditions=conditions,
65
+ existence_concepts=existence_concepts,
66
+ preexisting_conditions=preexisting_conditions,
67
+ hidden_concepts=hidden_concepts,
68
+ ordering=ordering,
69
+ )
70
+ # the set of concepts required to preserve grain
71
+ # set by group by node generation with aggregates
72
+ self.required_outputs = required_outputs
73
+
74
+ @classmethod
75
+ def check_if_required(
76
+ cls,
77
+ downstream_concepts: List[BuildConcept],
78
+ parents: list[QueryDatasource | BuildDatasource],
79
+ environment: BuildEnvironment,
80
+ depth: int = 0,
81
+ ) -> GroupRequiredResponse:
82
+ from trilogy.core.processing.discovery_utility import check_if_group_required
83
+
84
+ return check_if_group_required(downstream_concepts, parents, environment, depth)
85
+
86
+ def _resolve(self) -> QueryDatasource:
87
+ parent_sources: List[QueryDatasource | BuildDatasource] = [
88
+ p.resolve() for p in self.parents
89
+ ]
90
+
91
+ grains = self.check_if_required(
92
+ self.output_concepts, parent_sources, self.environment, self.depth
93
+ )
94
+ target_grain = grains.target
95
+ comp_grain = grains.upstream
96
+ # dynamically select if we need to group
97
+ # because sometimes, we are already at required grain
98
+ if not grains.required and self.force_group is not True:
99
+ # otherwise if no group by, just treat it as a select
100
+ source_type = SourceType.SELECT
101
+ else:
102
+ logger.info(
103
+ f"{self.logging_prefix}{LOGGER_PREFIX} Group node has different grain than parents; group is required."
104
+ f" Upstream grains {[str(source.grain) for source in parent_sources]}"
105
+ f" with final grain {comp_grain} vs"
106
+ f" target grain {target_grain}"
107
+ f" delta: {comp_grain - target_grain}"
108
+ )
109
+ source_type = SourceType.GROUP
110
+ source_map = resolve_concept_map(
111
+ parent_sources,
112
+ targets=(
113
+ unique(
114
+ self.output_concepts + self.conditions.concept_arguments,
115
+ "address",
116
+ )
117
+ if self.conditions
118
+ else self.output_concepts
119
+ ),
120
+ inherited_inputs=self.input_concepts + self.existence_concepts,
121
+ )
122
+ nullable_addresses = find_nullable_concepts(
123
+ source_map=source_map, joins=[], datasources=parent_sources
124
+ )
125
+ nullable_concepts = [
126
+ x for x in self.output_concepts if x.address in nullable_addresses
127
+ ]
128
+ base = QueryDatasource(
129
+ input_concepts=self.input_concepts,
130
+ output_concepts=self.output_concepts,
131
+ datasources=parent_sources,
132
+ source_type=source_type,
133
+ source_map=source_map,
134
+ joins=[],
135
+ grain=target_grain,
136
+ partial_concepts=self.partial_concepts,
137
+ nullable_concepts=nullable_concepts,
138
+ hidden_concepts=self.hidden_concepts,
139
+ condition=self.conditions,
140
+ ordering=self.ordering,
141
+ )
142
+ # if there is a condition on a group node and it's not scalar
143
+ # inject an additional CTE
144
+ if self.conditions and not is_scalar_condition(self.conditions):
145
+ base.condition = None
146
+ base.output_concepts = unique(
147
+ list(base.output_concepts) + list(self.conditions.row_arguments),
148
+ "address",
149
+ )
150
+ # re-visible any hidden concepts
151
+ base.hidden_concepts = set(
152
+ [x for x in base.hidden_concepts if x not in base.output_concepts]
153
+ )
154
+ source_map = resolve_concept_map(
155
+ [base],
156
+ targets=self.output_concepts,
157
+ inherited_inputs=base.output_concepts,
158
+ )
159
+ return QueryDatasource(
160
+ input_concepts=base.output_concepts,
161
+ output_concepts=self.output_concepts,
162
+ datasources=[base],
163
+ source_type=SourceType.SELECT,
164
+ source_map=source_map,
165
+ joins=[],
166
+ grain=target_grain,
167
+ nullable_concepts=base.nullable_concepts,
168
+ partial_concepts=self.partial_concepts,
169
+ condition=self.conditions,
170
+ hidden_concepts=self.hidden_concepts,
171
+ ordering=self.ordering,
172
+ )
173
+ return base
174
+
175
+ def copy(self) -> "GroupNode":
176
+ return GroupNode(
177
+ input_concepts=list(self.input_concepts),
178
+ output_concepts=list(self.output_concepts),
179
+ environment=self.environment,
180
+ whole_grain=self.whole_grain,
181
+ parents=self.parents,
182
+ depth=self.depth,
183
+ partial_concepts=list(self.partial_concepts),
184
+ nullable_concepts=list(self.nullable_concepts),
185
+ force_group=self.force_group,
186
+ conditions=self.conditions,
187
+ preexisting_conditions=self.preexisting_conditions,
188
+ existence_concepts=list(self.existence_concepts),
189
+ hidden_concepts=set(self.hidden_concepts),
190
+ ordering=self.ordering,
191
+ required_outputs=(
192
+ list(self.required_outputs) if self.required_outputs else None
193
+ ),
194
+ )
@@ -0,0 +1,420 @@
1
+ from typing import List, Optional, Tuple
2
+
3
+ from trilogy.constants import logger
4
+ from trilogy.core.enums import (
5
+ JoinType,
6
+ SourceType,
7
+ )
8
+ from trilogy.core.models.build import (
9
+ BuildComparison,
10
+ BuildConcept,
11
+ BuildConditional,
12
+ BuildDatasource,
13
+ BuildGrain,
14
+ BuildOrderBy,
15
+ BuildParenthetical,
16
+ )
17
+ from trilogy.core.models.build_environment import BuildEnvironment
18
+ from trilogy.core.models.execute import BaseJoin, QueryDatasource, UnnestJoin
19
+ from trilogy.core.processing.nodes.base_node import (
20
+ NodeJoin,
21
+ StrategyNode,
22
+ resolve_concept_map,
23
+ )
24
+ from trilogy.core.processing.utility import find_nullable_concepts, get_node_joins
25
+ from trilogy.utility import unique
26
+
27
+ LOGGER_PREFIX = "[CONCEPT DETAIL - MERGE NODE]"
28
+
29
+
30
+ def deduplicate_nodes(
31
+ merged: dict[str, QueryDatasource | BuildDatasource],
32
+ logging_prefix: str,
33
+ environment: BuildEnvironment,
34
+ ) -> tuple[bool, dict[str, QueryDatasource | BuildDatasource], set[str]]:
35
+ duplicates = False
36
+ removed: set[str] = set()
37
+ set_map: dict[str, set[str]] = {}
38
+ for k, v in merged.items():
39
+ unique_outputs = [
40
+ # the concept may be a in a different environment for a rowset.
41
+ (environment.concepts.get(x.address) or x).address
42
+ for x in v.output_concepts
43
+ if x not in v.partial_concepts
44
+ ]
45
+ set_map[k] = set(unique_outputs)
46
+ for k1, v1 in set_map.items():
47
+ found = False
48
+ for k2, v2 in set_map.items():
49
+ if k1 == k2:
50
+ continue
51
+ if (
52
+ v1.issubset(v2)
53
+ and merged[k1].grain.issubset(merged[k2].grain)
54
+ and not merged[k2].partial_concepts
55
+ and not merged[k1].partial_concepts
56
+ and not merged[k2].condition
57
+ and not merged[k1].condition
58
+ ):
59
+ og = merged[k1]
60
+ subset_to = merged[k2]
61
+ logger.info(
62
+ f"{logging_prefix}{LOGGER_PREFIX} extraneous parent node that is subset of another parent node {og.grain.issubset(subset_to.grain)} {og.grain.components} {subset_to.grain.components}"
63
+ )
64
+ merged = {k: v for k, v in merged.items() if k != k1}
65
+ removed.add(k1)
66
+ duplicates = True
67
+ found = True
68
+ break
69
+ if found:
70
+ break
71
+
72
+ return duplicates, merged, removed
73
+
74
+
75
+ def deduplicate_nodes_and_joins(
76
+ joins: List[NodeJoin] | None,
77
+ merged: dict[str, QueryDatasource | BuildDatasource],
78
+ logging_prefix: str,
79
+ environment: BuildEnvironment,
80
+ ) -> Tuple[List[NodeJoin] | None, dict[str, QueryDatasource | BuildDatasource]]:
81
+ # it's possible that we have more sources than we need
82
+ duplicates = True
83
+ while duplicates:
84
+ duplicates = False
85
+ duplicates, merged, removed = deduplicate_nodes(
86
+ merged, logging_prefix, environment=environment
87
+ )
88
+ # filter out any removed joins
89
+ if joins is not None:
90
+ joins = [
91
+ j
92
+ for j in joins
93
+ if j.left_node.resolve().identifier not in removed
94
+ and j.right_node.resolve().identifier not in removed
95
+ ]
96
+ return joins, merged
97
+
98
+
99
+ class MergeNode(StrategyNode):
100
+ source_type = SourceType.MERGE
101
+
102
+ def __init__(
103
+ self,
104
+ input_concepts: List[BuildConcept],
105
+ output_concepts: List[BuildConcept],
106
+ environment,
107
+ whole_grain: bool = False,
108
+ parents: List["StrategyNode"] | None = None,
109
+ node_joins: List[NodeJoin] | None = None,
110
+ join_concepts: Optional[List] = None,
111
+ force_join_type: Optional[JoinType] = None,
112
+ partial_concepts: Optional[List[BuildConcept]] = None,
113
+ nullable_concepts: Optional[List[BuildConcept]] = None,
114
+ force_group: bool | None = None,
115
+ depth: int = 0,
116
+ grain: BuildGrain | None = None,
117
+ conditions: (
118
+ BuildConditional | BuildComparison | BuildParenthetical | None
119
+ ) = None,
120
+ preexisting_conditions: (
121
+ BuildConditional | BuildComparison | BuildParenthetical | None
122
+ ) = None,
123
+ hidden_concepts: set[str] | None = None,
124
+ virtual_output_concepts: List[BuildConcept] | None = None,
125
+ existence_concepts: List[BuildConcept] | None = None,
126
+ ordering: BuildOrderBy | None = None,
127
+ ):
128
+ super().__init__(
129
+ input_concepts=input_concepts,
130
+ output_concepts=output_concepts,
131
+ environment=environment,
132
+ whole_grain=whole_grain,
133
+ parents=parents,
134
+ depth=depth,
135
+ partial_concepts=partial_concepts,
136
+ nullable_concepts=nullable_concepts,
137
+ force_group=force_group,
138
+ grain=grain,
139
+ conditions=conditions,
140
+ preexisting_conditions=preexisting_conditions,
141
+ hidden_concepts=hidden_concepts,
142
+ virtual_output_concepts=virtual_output_concepts,
143
+ existence_concepts=existence_concepts,
144
+ ordering=ordering,
145
+ )
146
+ self.join_concepts = join_concepts
147
+ self.force_join_type = force_join_type
148
+ self.node_joins: List[NodeJoin] | None = node_joins
149
+
150
+ final_joins: List[NodeJoin] = []
151
+ if self.node_joins is not None:
152
+ for join in self.node_joins:
153
+ if join.left_node.resolve().name == join.right_node.resolve().name:
154
+ continue
155
+ final_joins.append(join)
156
+ self.node_joins = final_joins
157
+
158
+ def translate_node_joins(self, node_joins: List[NodeJoin]) -> List[BaseJoin]:
159
+ joins = []
160
+ for join in node_joins:
161
+ left = join.left_node.resolve()
162
+ right = join.right_node.resolve()
163
+ if left.identifier == right.identifier:
164
+ raise SyntaxError(f"Cannot join node {left.identifier} to itself")
165
+ joins.append(
166
+ BaseJoin(
167
+ left_datasource=left,
168
+ right_datasource=right,
169
+ join_type=join.join_type,
170
+ concepts=join.concepts,
171
+ concept_pairs=join.concept_pairs,
172
+ modifiers=join.modifiers,
173
+ )
174
+ )
175
+ return joins
176
+
177
+ def create_full_joins(self, dataset_list: List[QueryDatasource | BuildDatasource]):
178
+ joins = []
179
+ seen = set()
180
+ for left_value in dataset_list:
181
+ for right_value in dataset_list:
182
+ if left_value.identifier == right_value.identifier:
183
+ continue
184
+ if left_value.identifier in seen and right_value.identifier in seen:
185
+ continue
186
+ joins.append(
187
+ BaseJoin(
188
+ left_datasource=left_value,
189
+ right_datasource=right_value,
190
+ join_type=JoinType.FULL,
191
+ concepts=[],
192
+ )
193
+ )
194
+ seen.add(left_value.identifier)
195
+ seen.add(right_value.identifier)
196
+ return joins
197
+
198
+ def generate_joins(
199
+ self,
200
+ final_datasets,
201
+ final_joins: List[NodeJoin] | None,
202
+ pregrain: BuildGrain,
203
+ grain: BuildGrain,
204
+ environment: BuildEnvironment,
205
+ ) -> List[BaseJoin | UnnestJoin]:
206
+ # only finally, join between them for unique values
207
+ dataset_list: List[QueryDatasource | BuildDatasource] = sorted(
208
+ final_datasets, key=lambda x: -len(x.grain.components)
209
+ )
210
+
211
+ logger.info(
212
+ f"{self.logging_prefix}{LOGGER_PREFIX} Merge node has {len(dataset_list)} parents, starting merge"
213
+ )
214
+ if final_joins is None:
215
+ if not pregrain.components:
216
+ logger.info(
217
+ f"{self.logging_prefix}{LOGGER_PREFIX} no grain components, doing full join"
218
+ )
219
+ joins = self.create_full_joins(dataset_list)
220
+ else:
221
+ logger.info(
222
+ f"{self.logging_prefix}{LOGGER_PREFIX} inferring node joins to target grain {str(grain)}"
223
+ )
224
+ joins = get_node_joins(dataset_list, environment=environment)
225
+ elif final_joins:
226
+ logger.info(
227
+ f"{self.logging_prefix}{LOGGER_PREFIX} translating provided node joins {len(final_joins)}"
228
+ )
229
+ joins = self.translate_node_joins(final_joins)
230
+ else:
231
+ logger.info(
232
+ f"{self.logging_prefix}{LOGGER_PREFIX} Final joins is not null {final_joins} but is empty, skipping join generation"
233
+ )
234
+ return []
235
+ return joins
236
+
237
+ def _resolve(self) -> QueryDatasource:
238
+ parent_sources: List[QueryDatasource | BuildDatasource] = [
239
+ p.resolve() for p in self.parents
240
+ ]
241
+ merged: dict[str, QueryDatasource | BuildDatasource] = {}
242
+ final_joins: List[NodeJoin] | None = self.node_joins
243
+ for source in parent_sources:
244
+ if source.identifier in merged:
245
+ logger.info(
246
+ f"{self.logging_prefix}{LOGGER_PREFIX} merging parent node with {source.identifier} into existing"
247
+ )
248
+ merged[source.identifier] = merged[source.identifier] + source
249
+ else:
250
+ merged[source.identifier] = source
251
+
252
+ # it's possible that we have more sources than we need
253
+ final_joins, merged = deduplicate_nodes_and_joins(
254
+ final_joins, merged, self.logging_prefix, self.environment
255
+ )
256
+ # early exit if we can just return the parent
257
+ final_datasets: List[QueryDatasource | BuildDatasource] = list(merged.values())
258
+
259
+ existence_final = [
260
+ x
261
+ for x in final_datasets
262
+ if all([y in self.existence_concepts for y in x.output_concepts])
263
+ ]
264
+ if len(merged.keys()) == 1:
265
+ final: QueryDatasource | BuildDatasource = list(merged.values())[0]
266
+ if (
267
+ set([c.address for c in final.output_concepts])
268
+ == set([c.address for c in self.output_concepts])
269
+ and not self.conditions
270
+ and isinstance(final, QueryDatasource)
271
+ ):
272
+ logger.info(
273
+ f"{self.logging_prefix}{LOGGER_PREFIX} Merge node has only one parent with the same"
274
+ " outputs as this merge node, dropping merge node "
275
+ )
276
+ # push up any conditions we need
277
+ final.ordering = self.ordering
278
+ return final
279
+
280
+ # if we have multiple candidates, see if one is good enough
281
+ for dataset in final_datasets:
282
+ output_set = set(
283
+ [
284
+ c.address
285
+ for c in dataset.output_concepts
286
+ if c.address not in [x.address for x in dataset.partial_concepts]
287
+ ]
288
+ )
289
+ if (
290
+ all([c.address in output_set for c in self.all_concepts])
291
+ and not self.conditions
292
+ and isinstance(dataset, QueryDatasource)
293
+ ):
294
+ logger.info(
295
+ f"{self.logging_prefix}{LOGGER_PREFIX} Merge node not required as parent node {dataset.source_type}"
296
+ f" has all required output properties with partial {[c.address for c in dataset.partial_concepts]}"
297
+ f" and self has no conditions ({self.conditions})"
298
+ )
299
+ dataset.ordering = self.ordering
300
+ return dataset
301
+
302
+ pregrain = BuildGrain()
303
+
304
+ for source in final_datasets:
305
+ if all(
306
+ [x.address in self.existence_concepts for x in source.output_concepts]
307
+ ):
308
+ logger.info(
309
+ f"{self.logging_prefix}{LOGGER_PREFIX} skipping existence only source with {source.output_concepts} from grain accumulation"
310
+ )
311
+ continue
312
+ logger.info(
313
+ f"{self.logging_prefix}{LOGGER_PREFIX} adding source grain {source.grain} from source {source.identifier} to pregrain"
314
+ )
315
+ pregrain += source.grain
316
+ logger.info(
317
+ f"{self.logging_prefix}{LOGGER_PREFIX} pregrain is now {pregrain}"
318
+ )
319
+
320
+ pregrain = BuildGrain.from_concepts(
321
+ pregrain.components, environment=self.environment
322
+ )
323
+
324
+ grain = self.grain if self.grain else pregrain
325
+ logger.info(
326
+ f"{self.logging_prefix}{LOGGER_PREFIX} has pre grain {pregrain} and final merge node grain {grain}"
327
+ )
328
+ join_candidates = [x for x in final_datasets if x not in existence_final]
329
+ if len(join_candidates) > 1:
330
+ joins: List[BaseJoin | UnnestJoin] = self.generate_joins(
331
+ join_candidates, final_joins, pregrain, grain, self.environment
332
+ )
333
+ else:
334
+ joins = []
335
+
336
+ logger.info(
337
+ f"{self.logging_prefix}{LOGGER_PREFIX} Final join count for CTE parent count {len(join_candidates)} is {len(joins)}"
338
+ )
339
+ full_join_concepts = []
340
+ for join in joins:
341
+ if isinstance(join, BaseJoin) and join.join_type == JoinType.FULL:
342
+ full_join_concepts += join.input_concepts
343
+
344
+ if self.force_group is True:
345
+
346
+ force_group = True
347
+ elif self.whole_grain:
348
+ force_group = False
349
+ elif self.force_group is False:
350
+ force_group = False
351
+ elif not any(
352
+ [d.grain.issubset(grain) for d in final_datasets]
353
+ ) and not pregrain.issubset(grain):
354
+ logger.info(
355
+ f"{self.logging_prefix}{LOGGER_PREFIX} no parents include full grain {grain} and pregrain {pregrain} does not match, assume must group to grain. Have {[str(d.grain) for d in final_datasets]}"
356
+ )
357
+ force_group = True
358
+ else:
359
+ force_group = None
360
+
361
+ qd_joins: List[BaseJoin | UnnestJoin] = [*joins]
362
+
363
+ source_map = resolve_concept_map(
364
+ final_datasets,
365
+ targets=self.output_concepts,
366
+ inherited_inputs=self.input_concepts + self.existence_concepts,
367
+ full_joins=full_join_concepts,
368
+ )
369
+ nullable_concepts = find_nullable_concepts(
370
+ source_map=source_map, joins=joins, datasources=final_datasets
371
+ )
372
+ if force_group:
373
+
374
+ grain = BuildGrain.from_concepts(
375
+ self.output_concepts, environment=self.environment
376
+ )
377
+ logger.info(
378
+ f"{self.logging_prefix}{LOGGER_PREFIX} forcing group by to achieve grain {grain}"
379
+ )
380
+ qds = QueryDatasource(
381
+ input_concepts=unique(self.input_concepts, "address"),
382
+ output_concepts=unique(self.output_concepts, "address"),
383
+ datasources=final_datasets,
384
+ source_type=self.source_type,
385
+ source_map=source_map,
386
+ joins=qd_joins,
387
+ grain=grain,
388
+ nullable_concepts=[
389
+ x for x in self.output_concepts if x.address in nullable_concepts
390
+ ],
391
+ partial_concepts=self.partial_concepts,
392
+ force_group=force_group,
393
+ condition=self.conditions,
394
+ hidden_concepts=self.hidden_concepts,
395
+ ordering=self.ordering,
396
+ )
397
+ return qds
398
+
399
+ def copy(self) -> "MergeNode":
400
+ return MergeNode(
401
+ input_concepts=list(self.input_concepts),
402
+ output_concepts=list(self.output_concepts),
403
+ environment=self.environment,
404
+ whole_grain=self.whole_grain,
405
+ parents=self.parents,
406
+ depth=self.depth,
407
+ partial_concepts=list(self.partial_concepts),
408
+ force_group=self.force_group,
409
+ grain=self.grain,
410
+ conditions=self.conditions,
411
+ preexisting_conditions=self.preexisting_conditions,
412
+ nullable_concepts=list(self.nullable_concepts),
413
+ hidden_concepts=set(self.hidden_concepts),
414
+ virtual_output_concepts=list(self.virtual_output_concepts),
415
+ node_joins=list(self.node_joins) if self.node_joins else None,
416
+ join_concepts=list(self.join_concepts) if self.join_concepts else None,
417
+ force_join_type=self.force_join_type,
418
+ existence_concepts=list(self.existence_concepts),
419
+ ordering=self.ordering,
420
+ )