pytrilogy 0.3.148__cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (206) hide show
  1. LICENSE.md +19 -0
  2. _preql_import_resolver/__init__.py +5 -0
  3. _preql_import_resolver/_preql_import_resolver.cpython-312-aarch64-linux-gnu.so +0 -0
  4. pytrilogy-0.3.148.dist-info/METADATA +555 -0
  5. pytrilogy-0.3.148.dist-info/RECORD +206 -0
  6. pytrilogy-0.3.148.dist-info/WHEEL +5 -0
  7. pytrilogy-0.3.148.dist-info/entry_points.txt +2 -0
  8. pytrilogy-0.3.148.dist-info/licenses/LICENSE.md +19 -0
  9. trilogy/__init__.py +27 -0
  10. trilogy/ai/README.md +10 -0
  11. trilogy/ai/__init__.py +19 -0
  12. trilogy/ai/constants.py +92 -0
  13. trilogy/ai/conversation.py +107 -0
  14. trilogy/ai/enums.py +7 -0
  15. trilogy/ai/execute.py +50 -0
  16. trilogy/ai/models.py +34 -0
  17. trilogy/ai/prompts.py +100 -0
  18. trilogy/ai/providers/__init__.py +0 -0
  19. trilogy/ai/providers/anthropic.py +106 -0
  20. trilogy/ai/providers/base.py +24 -0
  21. trilogy/ai/providers/google.py +146 -0
  22. trilogy/ai/providers/openai.py +89 -0
  23. trilogy/ai/providers/utils.py +68 -0
  24. trilogy/authoring/README.md +3 -0
  25. trilogy/authoring/__init__.py +148 -0
  26. trilogy/constants.py +119 -0
  27. trilogy/core/README.md +52 -0
  28. trilogy/core/__init__.py +0 -0
  29. trilogy/core/constants.py +6 -0
  30. trilogy/core/enums.py +454 -0
  31. trilogy/core/env_processor.py +239 -0
  32. trilogy/core/environment_helpers.py +320 -0
  33. trilogy/core/ergonomics.py +193 -0
  34. trilogy/core/exceptions.py +123 -0
  35. trilogy/core/functions.py +1240 -0
  36. trilogy/core/graph_models.py +142 -0
  37. trilogy/core/internal.py +85 -0
  38. trilogy/core/models/__init__.py +0 -0
  39. trilogy/core/models/author.py +2662 -0
  40. trilogy/core/models/build.py +2603 -0
  41. trilogy/core/models/build_environment.py +165 -0
  42. trilogy/core/models/core.py +506 -0
  43. trilogy/core/models/datasource.py +434 -0
  44. trilogy/core/models/environment.py +756 -0
  45. trilogy/core/models/execute.py +1213 -0
  46. trilogy/core/optimization.py +251 -0
  47. trilogy/core/optimizations/__init__.py +12 -0
  48. trilogy/core/optimizations/base_optimization.py +17 -0
  49. trilogy/core/optimizations/hide_unused_concept.py +47 -0
  50. trilogy/core/optimizations/inline_datasource.py +102 -0
  51. trilogy/core/optimizations/predicate_pushdown.py +245 -0
  52. trilogy/core/processing/README.md +94 -0
  53. trilogy/core/processing/READMEv2.md +121 -0
  54. trilogy/core/processing/VIRTUAL_UNNEST.md +30 -0
  55. trilogy/core/processing/__init__.py +0 -0
  56. trilogy/core/processing/concept_strategies_v3.py +508 -0
  57. trilogy/core/processing/constants.py +15 -0
  58. trilogy/core/processing/discovery_node_factory.py +451 -0
  59. trilogy/core/processing/discovery_utility.py +548 -0
  60. trilogy/core/processing/discovery_validation.py +167 -0
  61. trilogy/core/processing/graph_utils.py +43 -0
  62. trilogy/core/processing/node_generators/README.md +9 -0
  63. trilogy/core/processing/node_generators/__init__.py +31 -0
  64. trilogy/core/processing/node_generators/basic_node.py +160 -0
  65. trilogy/core/processing/node_generators/common.py +270 -0
  66. trilogy/core/processing/node_generators/constant_node.py +38 -0
  67. trilogy/core/processing/node_generators/filter_node.py +315 -0
  68. trilogy/core/processing/node_generators/group_node.py +213 -0
  69. trilogy/core/processing/node_generators/group_to_node.py +117 -0
  70. trilogy/core/processing/node_generators/multiselect_node.py +207 -0
  71. trilogy/core/processing/node_generators/node_merge_node.py +695 -0
  72. trilogy/core/processing/node_generators/recursive_node.py +88 -0
  73. trilogy/core/processing/node_generators/rowset_node.py +165 -0
  74. trilogy/core/processing/node_generators/select_helpers/__init__.py +0 -0
  75. trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +261 -0
  76. trilogy/core/processing/node_generators/select_merge_node.py +786 -0
  77. trilogy/core/processing/node_generators/select_node.py +95 -0
  78. trilogy/core/processing/node_generators/synonym_node.py +98 -0
  79. trilogy/core/processing/node_generators/union_node.py +91 -0
  80. trilogy/core/processing/node_generators/unnest_node.py +182 -0
  81. trilogy/core/processing/node_generators/window_node.py +201 -0
  82. trilogy/core/processing/nodes/README.md +28 -0
  83. trilogy/core/processing/nodes/__init__.py +179 -0
  84. trilogy/core/processing/nodes/base_node.py +522 -0
  85. trilogy/core/processing/nodes/filter_node.py +75 -0
  86. trilogy/core/processing/nodes/group_node.py +194 -0
  87. trilogy/core/processing/nodes/merge_node.py +420 -0
  88. trilogy/core/processing/nodes/recursive_node.py +46 -0
  89. trilogy/core/processing/nodes/select_node_v2.py +242 -0
  90. trilogy/core/processing/nodes/union_node.py +53 -0
  91. trilogy/core/processing/nodes/unnest_node.py +62 -0
  92. trilogy/core/processing/nodes/window_node.py +56 -0
  93. trilogy/core/processing/utility.py +823 -0
  94. trilogy/core/query_processor.py +604 -0
  95. trilogy/core/statements/README.md +35 -0
  96. trilogy/core/statements/__init__.py +0 -0
  97. trilogy/core/statements/author.py +536 -0
  98. trilogy/core/statements/build.py +0 -0
  99. trilogy/core/statements/common.py +20 -0
  100. trilogy/core/statements/execute.py +155 -0
  101. trilogy/core/table_processor.py +66 -0
  102. trilogy/core/utility.py +8 -0
  103. trilogy/core/validation/README.md +46 -0
  104. trilogy/core/validation/__init__.py +0 -0
  105. trilogy/core/validation/common.py +161 -0
  106. trilogy/core/validation/concept.py +146 -0
  107. trilogy/core/validation/datasource.py +227 -0
  108. trilogy/core/validation/environment.py +73 -0
  109. trilogy/core/validation/fix.py +256 -0
  110. trilogy/dialect/__init__.py +32 -0
  111. trilogy/dialect/base.py +1431 -0
  112. trilogy/dialect/bigquery.py +314 -0
  113. trilogy/dialect/common.py +147 -0
  114. trilogy/dialect/config.py +159 -0
  115. trilogy/dialect/dataframe.py +50 -0
  116. trilogy/dialect/duckdb.py +376 -0
  117. trilogy/dialect/enums.py +149 -0
  118. trilogy/dialect/metadata.py +173 -0
  119. trilogy/dialect/mock.py +190 -0
  120. trilogy/dialect/postgres.py +117 -0
  121. trilogy/dialect/presto.py +110 -0
  122. trilogy/dialect/results.py +89 -0
  123. trilogy/dialect/snowflake.py +129 -0
  124. trilogy/dialect/sql_server.py +137 -0
  125. trilogy/engine.py +48 -0
  126. trilogy/execution/__init__.py +17 -0
  127. trilogy/execution/config.py +119 -0
  128. trilogy/execution/state/__init__.py +0 -0
  129. trilogy/execution/state/file_state_store.py +0 -0
  130. trilogy/execution/state/sqllite_state_store.py +0 -0
  131. trilogy/execution/state/state_store.py +301 -0
  132. trilogy/executor.py +656 -0
  133. trilogy/hooks/__init__.py +4 -0
  134. trilogy/hooks/base_hook.py +40 -0
  135. trilogy/hooks/graph_hook.py +135 -0
  136. trilogy/hooks/query_debugger.py +166 -0
  137. trilogy/metadata/__init__.py +0 -0
  138. trilogy/parser.py +10 -0
  139. trilogy/parsing/README.md +21 -0
  140. trilogy/parsing/__init__.py +0 -0
  141. trilogy/parsing/common.py +1069 -0
  142. trilogy/parsing/config.py +5 -0
  143. trilogy/parsing/exceptions.py +8 -0
  144. trilogy/parsing/helpers.py +1 -0
  145. trilogy/parsing/parse_engine.py +2863 -0
  146. trilogy/parsing/render.py +773 -0
  147. trilogy/parsing/trilogy.lark +544 -0
  148. trilogy/py.typed +0 -0
  149. trilogy/render.py +45 -0
  150. trilogy/scripts/README.md +9 -0
  151. trilogy/scripts/__init__.py +0 -0
  152. trilogy/scripts/agent.py +41 -0
  153. trilogy/scripts/agent_info.py +306 -0
  154. trilogy/scripts/common.py +430 -0
  155. trilogy/scripts/dependency/Cargo.lock +617 -0
  156. trilogy/scripts/dependency/Cargo.toml +39 -0
  157. trilogy/scripts/dependency/README.md +131 -0
  158. trilogy/scripts/dependency/build.sh +25 -0
  159. trilogy/scripts/dependency/src/directory_resolver.rs +387 -0
  160. trilogy/scripts/dependency/src/lib.rs +16 -0
  161. trilogy/scripts/dependency/src/main.rs +770 -0
  162. trilogy/scripts/dependency/src/parser.rs +435 -0
  163. trilogy/scripts/dependency/src/preql.pest +208 -0
  164. trilogy/scripts/dependency/src/python_bindings.rs +311 -0
  165. trilogy/scripts/dependency/src/resolver.rs +716 -0
  166. trilogy/scripts/dependency/tests/base.preql +3 -0
  167. trilogy/scripts/dependency/tests/cli_integration.rs +377 -0
  168. trilogy/scripts/dependency/tests/customer.preql +6 -0
  169. trilogy/scripts/dependency/tests/main.preql +9 -0
  170. trilogy/scripts/dependency/tests/orders.preql +7 -0
  171. trilogy/scripts/dependency/tests/test_data/base.preql +9 -0
  172. trilogy/scripts/dependency/tests/test_data/consumer.preql +1 -0
  173. trilogy/scripts/dependency.py +323 -0
  174. trilogy/scripts/display.py +555 -0
  175. trilogy/scripts/environment.py +59 -0
  176. trilogy/scripts/fmt.py +32 -0
  177. trilogy/scripts/ingest.py +472 -0
  178. trilogy/scripts/ingest_helpers/__init__.py +1 -0
  179. trilogy/scripts/ingest_helpers/foreign_keys.py +123 -0
  180. trilogy/scripts/ingest_helpers/formatting.py +93 -0
  181. trilogy/scripts/ingest_helpers/typing.py +161 -0
  182. trilogy/scripts/init.py +105 -0
  183. trilogy/scripts/parallel_execution.py +748 -0
  184. trilogy/scripts/plan.py +189 -0
  185. trilogy/scripts/refresh.py +106 -0
  186. trilogy/scripts/run.py +79 -0
  187. trilogy/scripts/serve.py +202 -0
  188. trilogy/scripts/serve_helpers/__init__.py +41 -0
  189. trilogy/scripts/serve_helpers/file_discovery.py +142 -0
  190. trilogy/scripts/serve_helpers/index_generation.py +206 -0
  191. trilogy/scripts/serve_helpers/models.py +38 -0
  192. trilogy/scripts/single_execution.py +131 -0
  193. trilogy/scripts/testing.py +129 -0
  194. trilogy/scripts/trilogy.py +75 -0
  195. trilogy/std/__init__.py +0 -0
  196. trilogy/std/color.preql +3 -0
  197. trilogy/std/date.preql +13 -0
  198. trilogy/std/display.preql +18 -0
  199. trilogy/std/geography.preql +22 -0
  200. trilogy/std/metric.preql +15 -0
  201. trilogy/std/money.preql +67 -0
  202. trilogy/std/net.preql +14 -0
  203. trilogy/std/ranking.preql +7 -0
  204. trilogy/std/report.preql +5 -0
  205. trilogy/std/semantic.preql +6 -0
  206. trilogy/utility.py +34 -0
@@ -0,0 +1,823 @@
1
+ from dataclasses import dataclass
2
+ from datetime import date, datetime
3
+ from enum import Enum
4
+ from logging import Logger
5
+ from typing import Any, Dict, List, Set, Tuple
6
+
7
+ import networkx as nx
8
+
9
+ from trilogy.constants import MagicConstants
10
+ from trilogy.core.enums import (
11
+ BooleanOperator,
12
+ DatePart,
13
+ Derivation,
14
+ FunctionClass,
15
+ Granularity,
16
+ JoinType,
17
+ Modifier,
18
+ Purpose,
19
+ )
20
+ from trilogy.core.models.build import (
21
+ BuildAggregateWrapper,
22
+ BuildCaseElse,
23
+ BuildCaseWhen,
24
+ BuildComparison,
25
+ BuildConcept,
26
+ BuildConditional,
27
+ BuildDatasource,
28
+ BuildFilterItem,
29
+ BuildFunction,
30
+ BuildGrain,
31
+ BuildParenthetical,
32
+ BuildSubselectComparison,
33
+ BuildWindowItem,
34
+ LooseBuildConceptList,
35
+ )
36
+ from trilogy.core.models.build_environment import BuildEnvironment
37
+ from trilogy.core.models.core import (
38
+ ArrayType,
39
+ DataType,
40
+ ListWrapper,
41
+ MapType,
42
+ MapWrapper,
43
+ NumericType,
44
+ TraitDataType,
45
+ TupleWrapper,
46
+ )
47
+ from trilogy.core.models.execute import (
48
+ CTE,
49
+ BaseJoin,
50
+ ConceptPair,
51
+ QueryDatasource,
52
+ UnionCTE,
53
+ UnnestJoin,
54
+ )
55
+ from trilogy.core.statements.author import MultiSelectStatement, SelectStatement
56
+ from trilogy.core.statements.execute import ProcessedQuery
57
+ from trilogy.utility import unique
58
+
59
+ AGGREGATE_TYPES = (BuildAggregateWrapper,)
60
+ SUBSELECT_TYPES = (BuildSubselectComparison,)
61
+ COMPARISON_TYPES = (BuildComparison,)
62
+ FUNCTION_TYPES = (BuildFunction,)
63
+ PARENTHETICAL_TYPES = (BuildParenthetical,)
64
+ CONDITIONAL_TYPES = (BuildConditional,)
65
+ CONCEPT_TYPES = (BuildConcept,)
66
+ WINDOW_TYPES = (BuildWindowItem,)
67
+
68
+
69
+ class NodeType(Enum):
70
+ CONCEPT = 1
71
+ NODE = 2
72
+
73
+
74
+ @dataclass
75
+ class JoinOrderOutput:
76
+ right: str
77
+ type: JoinType
78
+ keys: dict[str, set[str]]
79
+ left: str | None = None
80
+
81
+ @property
82
+ def lefts(self):
83
+ return set(self.keys.keys())
84
+
85
+
86
+ @dataclass
87
+ class GroupRequiredResponse:
88
+ target: BuildGrain
89
+ upstream: BuildGrain
90
+ required: bool
91
+
92
+
93
+ def find_all_connecting_concepts(g: nx.Graph, ds1: str, ds2: str) -> set[str]:
94
+ """Find all concepts that connect two datasources"""
95
+ concepts1 = set(g.neighbors(ds1))
96
+ concepts2 = set(g.neighbors(ds2))
97
+ return concepts1 & concepts2
98
+
99
+
100
+ def get_connection_keys(
101
+ all_connections: dict[tuple[str, str], set[str]], left: str, right: str
102
+ ) -> set[str]:
103
+ """Get all concepts that connect two datasources"""
104
+ lookup = sorted([left, right])
105
+ key: tuple[str, str] = (lookup[0], lookup[1])
106
+ return all_connections.get(key, set())
107
+
108
+
109
+ def get_join_type(
110
+ left: str,
111
+ right: str,
112
+ partials: dict[str, list[str]],
113
+ nullables: dict[str, list[str]],
114
+ all_connecting_keys: set[str],
115
+ ) -> JoinType:
116
+ left_is_partial = any(key in partials.get(left, []) for key in all_connecting_keys)
117
+ left_is_nullable = any(
118
+ key in nullables.get(left, []) for key in all_connecting_keys
119
+ )
120
+ right_is_partial = any(
121
+ key in partials.get(right, []) for key in all_connecting_keys
122
+ )
123
+ right_is_nullable = any(
124
+ key in nullables.get(right, []) for key in all_connecting_keys
125
+ )
126
+
127
+ left_complete = not left_is_partial and not left_is_nullable
128
+ right_complete = not right_is_partial and not right_is_nullable
129
+
130
+ if not left_complete and not right_complete:
131
+ join_type = JoinType.FULL
132
+ elif not left_complete and right_complete:
133
+ join_type = JoinType.RIGHT_OUTER
134
+ elif not right_complete and left_complete:
135
+ join_type = JoinType.LEFT_OUTER
136
+ else:
137
+ join_type = JoinType.INNER
138
+ return join_type
139
+
140
+
141
+ def reduce_join_types(join_types: Set[JoinType]) -> JoinType:
142
+ final_join_type = JoinType.INNER
143
+ has_full = any([x == JoinType.FULL for x in join_types])
144
+
145
+ if has_full:
146
+ final_join_type = JoinType.FULL
147
+ return final_join_type
148
+ has_left = any([x == JoinType.LEFT_OUTER for x in join_types])
149
+ has_right = any([x == JoinType.RIGHT_OUTER for x in join_types])
150
+ if has_left and has_right:
151
+ final_join_type = JoinType.FULL
152
+ elif has_left:
153
+ final_join_type = JoinType.LEFT_OUTER
154
+ elif has_right:
155
+ final_join_type = JoinType.RIGHT_OUTER
156
+
157
+ return final_join_type
158
+
159
+
160
+ def ensure_content_preservation(joins: list[JoinOrderOutput]):
161
+ # ensure that for a join, if we have prior joins that would
162
+ # introduce nulls, we are controlling for that
163
+ for idx, review_join in enumerate(joins):
164
+ predecessors = joins[:idx]
165
+ if review_join.type == JoinType.FULL:
166
+ continue
167
+ has_prior_left = False
168
+ has_prior_right = False
169
+ for pred in predecessors:
170
+ if (
171
+ pred.type in (JoinType.LEFT_OUTER, JoinType.FULL)
172
+ and pred.right in review_join.lefts
173
+ ):
174
+ has_prior_left = True
175
+ if pred.type in (JoinType.RIGHT_OUTER, JoinType.FULL) and any(
176
+ x in review_join.lefts for x in pred.lefts
177
+ ):
178
+ has_prior_right = True
179
+ if has_prior_left and has_prior_right:
180
+ target = JoinType.FULL
181
+ elif has_prior_left:
182
+ target = (
183
+ JoinType.LEFT_OUTER
184
+ if review_join.type != JoinType.RIGHT_OUTER
185
+ else JoinType.FULL
186
+ )
187
+ elif has_prior_right:
188
+ target = (
189
+ JoinType.RIGHT_OUTER
190
+ if review_join.type != JoinType.LEFT_OUTER
191
+ else JoinType.FULL
192
+ )
193
+ else:
194
+ target = review_join.type
195
+ if review_join.type != target:
196
+ review_join.type = target
197
+ continue
198
+
199
+
200
+ def resolve_join_order_v2(
201
+ g: nx.Graph, partials: dict[str, list[str]], nullables: dict[str, list[str]]
202
+ ) -> list[JoinOrderOutput]:
203
+ datasources = [x for x in g.nodes if x.startswith("ds~")]
204
+ concepts = [x for x in g.nodes if x.startswith("c~")]
205
+
206
+ # Pre-compute all possible connections between datasources
207
+ all_connections: dict[tuple[str, str], set[str]] = {}
208
+ for i, ds1 in enumerate(datasources):
209
+ for ds2 in datasources[i + 1 :]:
210
+ connecting_concepts = find_all_connecting_concepts(g, ds1, ds2)
211
+ if connecting_concepts:
212
+ key = tuple(sorted([ds1, ds2]))
213
+ all_connections[key] = connecting_concepts
214
+
215
+ output: list[JoinOrderOutput] = []
216
+
217
+ # create our map of pivots, or common join concepts
218
+ pivot_map = {
219
+ concept: [x for x in g.neighbors(concept) if x in datasources]
220
+ for concept in concepts
221
+ }
222
+ pivots = list(
223
+ sorted(
224
+ [x for x in pivot_map if len(pivot_map[x]) > 1],
225
+ key=lambda x: (len(pivot_map[x]), len(x), x),
226
+ )
227
+ )
228
+ solo = [x for x in pivot_map if len(pivot_map[x]) == 1]
229
+ eligible_left: set[str] = set()
230
+
231
+ # while we have pivots, keep joining them in
232
+ while pivots:
233
+ next_pivots = [
234
+ x for x in pivots if any(y in eligible_left for y in pivot_map[x])
235
+ ]
236
+ if next_pivots:
237
+ root = next_pivots[0]
238
+ pivots = [x for x in pivots if x != root]
239
+ else:
240
+ root = pivots.pop(0)
241
+
242
+ # sort so less partials is last and eligible lefts are first
243
+ def score_key(x: str) -> tuple[int, int, str]:
244
+ base = 1
245
+ # if it's left, higher weight
246
+ if x in eligible_left:
247
+ base += 3
248
+ # if it has the concept as a partial, lower weight
249
+ if root in partials.get(x, []):
250
+ base -= 1
251
+ if root in nullables.get(x, []):
252
+ base -= 1
253
+ return (base, len(x), x)
254
+
255
+ # get remaining un-joined datasets
256
+ to_join = sorted(
257
+ [x for x in pivot_map[root] if x not in eligible_left], key=score_key
258
+ )
259
+ while to_join:
260
+ # need to sort this to ensure we join on the best match
261
+ # but check ALL left in case there are non-pivt keys to join on
262
+ base = sorted([x for x in eligible_left], key=score_key)
263
+ if not base:
264
+ new = to_join.pop()
265
+ eligible_left.add(new)
266
+ base = [new]
267
+ right = to_join.pop()
268
+ # we already joined it
269
+ # this could happen if the same pivot is shared with multiple DSes
270
+ if right in eligible_left:
271
+ continue
272
+
273
+ joinkeys: dict[str, set[str]] = {}
274
+ # sorting puts the best candidate last for pop
275
+ # so iterate over the reversed list
276
+ join_types = set()
277
+
278
+ for left_candidate in reversed(base):
279
+ # Get all concepts that connect these two datasources
280
+ all_connecting_keys = get_connection_keys(
281
+ all_connections, left_candidate, right
282
+ )
283
+
284
+ if not all_connecting_keys:
285
+ continue
286
+
287
+ # Check if we already have this exact set of keys
288
+ exists = False
289
+ for _, v in joinkeys.items():
290
+ if v == all_connecting_keys:
291
+ exists = True
292
+ if exists:
293
+ continue
294
+
295
+ join_type = get_join_type(
296
+ left_candidate, right, partials, nullables, all_connecting_keys
297
+ )
298
+ join_types.add(join_type)
299
+ joinkeys[left_candidate] = all_connecting_keys
300
+
301
+ final_join_type = reduce_join_types(join_types)
302
+
303
+ output.append(
304
+ JoinOrderOutput(
305
+ right=right,
306
+ type=final_join_type,
307
+ keys=joinkeys,
308
+ )
309
+ )
310
+ eligible_left.add(right)
311
+
312
+ for concept in solo:
313
+ for ds in pivot_map[concept]:
314
+ # if we already have it, skip it
315
+ if ds in eligible_left:
316
+ continue
317
+ # if we haven't had ANY left datasources yet
318
+ # this needs to become it
319
+ if not eligible_left:
320
+ eligible_left.add(ds)
321
+ continue
322
+ # otherwise do a full outer join
323
+ # Try to find if there are any connecting keys with existing left tables
324
+ best_left = None
325
+ best_keys: set[str] = set()
326
+ for existing_left in eligible_left:
327
+ connecting_keys = get_connection_keys(
328
+ all_connections, existing_left, ds
329
+ )
330
+ if connecting_keys and len(connecting_keys) > len(best_keys):
331
+ best_left = existing_left
332
+ best_keys = connecting_keys
333
+
334
+ if best_left and best_keys:
335
+ output.append(
336
+ JoinOrderOutput(
337
+ left=best_left,
338
+ right=ds,
339
+ type=JoinType.FULL,
340
+ keys={best_left: best_keys},
341
+ )
342
+ )
343
+ else:
344
+ output.append(
345
+ JoinOrderOutput(
346
+ # pick random one to be left
347
+ left=list(eligible_left)[0],
348
+ right=ds,
349
+ type=JoinType.FULL,
350
+ keys={},
351
+ )
352
+ )
353
+ eligible_left.add(ds)
354
+
355
+ # only once we have all joins
356
+ # do we know if some inners need to be left outers
357
+ ensure_content_preservation(output)
358
+
359
+ return output
360
+
361
+
362
+ def concept_to_relevant_joins(concepts: list[BuildConcept]) -> List[BuildConcept]:
363
+ sub_props = LooseBuildConceptList(
364
+ concepts=[
365
+ x for x in concepts if x.keys and all([key in concepts for key in x.keys])
366
+ ]
367
+ )
368
+ final = [c for c in concepts if c.address not in sub_props]
369
+ return unique(final, "address")
370
+
371
+
372
+ def padding(x: int) -> str:
373
+ return "\t" * x
374
+
375
+
376
+ def create_log_lambda(prefix: str, depth: int, logger: Logger):
377
+ pad = padding(depth)
378
+
379
+ def log_lambda(msg: str):
380
+ logger.info(f"{pad}{prefix} {msg}")
381
+
382
+ return log_lambda
383
+
384
+
385
+ def calculate_graph_relevance(
386
+ g: nx.DiGraph, subset_nodes: set[str], concepts: set[BuildConcept]
387
+ ) -> int:
388
+ """Calculate the relevance of each node in a graph
389
+ Relevance is used to prune irrelevant nodes from the graph
390
+ """
391
+ relevance = 0
392
+ for node in g.nodes:
393
+
394
+ if node not in subset_nodes:
395
+ continue
396
+ if not g.nodes[node]["type"] == NodeType.CONCEPT:
397
+ continue
398
+ concept = [x for x in concepts if x.address == node].pop()
399
+ # debug granularity and derivation
400
+ # a single row concept can always be crossjoined
401
+ # therefore a graph with only single row concepts is always relevant
402
+ if concept.granularity == Granularity.SINGLE_ROW:
403
+ continue
404
+ if concept.derivation == Derivation.CONSTANT:
405
+ continue
406
+ # if it's an aggregate up to an arbitrary grain, it can be joined in later
407
+ # and can be ignored in subgraph
408
+ if concept.purpose == Purpose.METRIC:
409
+ if not concept.grain:
410
+ continue
411
+ if len(concept.grain.components) == 0:
412
+ continue
413
+ if concept.grain and len(concept.grain.components) > 0:
414
+ relevance += 1
415
+ continue
416
+ # Added 2023-10-18 since we seemed to be strangely dropping things
417
+ relevance += 1
418
+ return relevance
419
+
420
+
421
+ def add_node_join_concept(
422
+ graph: nx.DiGraph,
423
+ concept: BuildConcept,
424
+ concept_map: dict[str, BuildConcept],
425
+ ds_node: str,
426
+ environment: BuildEnvironment,
427
+ ):
428
+ name = f"c~{concept.address}"
429
+ graph.add_node(name, type=NodeType.CONCEPT)
430
+ graph.add_edge(ds_node, name)
431
+ concept_map[name] = concept
432
+ for v_address in concept.pseudonyms:
433
+ v = environment.alias_origin_lookup.get(
434
+ v_address, environment.concepts[v_address]
435
+ )
436
+ if f"c~{v.address}" in graph.nodes:
437
+ continue
438
+ if v != concept.address:
439
+ add_node_join_concept(
440
+ graph=graph,
441
+ concept=v,
442
+ concept_map=concept_map,
443
+ ds_node=ds_node,
444
+ environment=environment,
445
+ )
446
+
447
+
448
+ def resolve_instantiated_concept(
449
+ concept: BuildConcept, datasource: QueryDatasource | BuildDatasource
450
+ ) -> BuildConcept:
451
+ if concept.address in datasource.output_concepts:
452
+ return concept
453
+ for k in concept.pseudonyms:
454
+ if k in datasource.output_concepts:
455
+ return [x for x in datasource.output_concepts if x.address == k].pop()
456
+ if any(k in x.pseudonyms for x in datasource.output_concepts):
457
+ return [x for x in datasource.output_concepts if k in x.pseudonyms].pop()
458
+ raise SyntaxError(
459
+ f"Could not find {concept.address} in {datasource.identifier} output {[c.address for c in datasource.output_concepts]}, acceptable synonyms {concept.pseudonyms}"
460
+ )
461
+
462
+
463
+ def reduce_concept_pairs(
464
+ input: list[ConceptPair], right_source: QueryDatasource | BuildDatasource
465
+ ) -> list[ConceptPair]:
466
+ left_keys = set()
467
+ right_keys = set()
468
+ for pair in input:
469
+ if pair.left.purpose == Purpose.KEY:
470
+ left_keys.add(pair.left.address)
471
+ if pair.right.purpose == Purpose.KEY:
472
+ right_keys.add(pair.right.address)
473
+ final: list[ConceptPair] = []
474
+ seen_right_keys = set()
475
+ for pair in input:
476
+ if pair.right.address in seen_right_keys:
477
+ continue
478
+ if (
479
+ pair.left.purpose == Purpose.PROPERTY
480
+ and pair.left.keys
481
+ and pair.left.keys.issubset(left_keys)
482
+ ):
483
+ continue
484
+ if (
485
+ pair.right.purpose == Purpose.PROPERTY
486
+ and pair.right.keys
487
+ and pair.right.keys.issubset(right_keys)
488
+ ):
489
+ continue
490
+
491
+ seen_right_keys.add(pair.right.address)
492
+ final.append(pair)
493
+ all_keys = set([x.right.address for x in final])
494
+ if right_source.grain.components and right_source.grain.components.issubset(
495
+ all_keys
496
+ ):
497
+ return [x for x in final if x.right.address in right_source.grain.components]
498
+
499
+ return final
500
+
501
+
502
+ def get_modifiers(
503
+ concept: str,
504
+ join: JoinOrderOutput,
505
+ ds_node_map: dict[str, QueryDatasource | BuildDatasource],
506
+ ):
507
+ base = []
508
+
509
+ if join.right and concept in ds_node_map[join.right].nullable_concepts:
510
+ base.append(Modifier.NULLABLE)
511
+ if join.left and concept in ds_node_map[join.left].nullable_concepts:
512
+ base.append(Modifier.NULLABLE)
513
+ return list(set(base))
514
+
515
+
516
+ def get_node_joins(
517
+ datasources: List[QueryDatasource | BuildDatasource],
518
+ environment: BuildEnvironment,
519
+ # concepts:List[Concept],
520
+ ) -> List[BaseJoin]:
521
+ graph = nx.Graph()
522
+ partials: dict[str, list[str]] = {}
523
+ nullables: dict[str, list[str]] = {}
524
+ ds_node_map: dict[str, QueryDatasource | BuildDatasource] = {}
525
+ concept_map: dict[str, BuildConcept] = {}
526
+ for datasource in datasources:
527
+ ds_node = f"ds~{datasource.identifier}"
528
+ ds_node_map[ds_node] = datasource
529
+ graph.add_node(ds_node, type=NodeType.NODE)
530
+ partials[ds_node] = [f"c~{c.address}" for c in datasource.partial_concepts]
531
+ nullables[ds_node] = [f"c~{c.address}" for c in datasource.nullable_concepts]
532
+ for concept in datasource.output_concepts:
533
+ if concept.address in datasource.hidden_concepts:
534
+ continue
535
+ add_node_join_concept(
536
+ graph=graph,
537
+ concept=concept,
538
+ concept_map=concept_map,
539
+ ds_node=ds_node,
540
+ environment=environment,
541
+ )
542
+
543
+ joins = resolve_join_order_v2(graph, partials=partials, nullables=nullables)
544
+ return [
545
+ BaseJoin(
546
+ left_datasource=ds_node_map[j.left] if j.left else None,
547
+ right_datasource=ds_node_map[j.right],
548
+ join_type=j.type,
549
+ # preserve empty field for maps
550
+ concepts=[] if not j.keys else None,
551
+ concept_pairs=reduce_concept_pairs(
552
+ [
553
+ ConceptPair.model_construct(
554
+ left=resolve_instantiated_concept(
555
+ concept_map[concept], ds_node_map[k]
556
+ ),
557
+ right=resolve_instantiated_concept(
558
+ concept_map[concept], ds_node_map[j.right]
559
+ ),
560
+ existing_datasource=ds_node_map[k],
561
+ modifiers=get_modifiers(
562
+ concept_map[concept].address, j, ds_node_map
563
+ ),
564
+ )
565
+ for k, v in j.keys.items()
566
+ for concept in v
567
+ ],
568
+ ds_node_map[j.right],
569
+ ),
570
+ )
571
+ for j in joins
572
+ ]
573
+
574
+
575
+ def get_disconnected_components(
576
+ concept_map: Dict[str, Set[BuildConcept]],
577
+ ) -> Tuple[int, List]:
578
+ """Find if any of the datasources are not linked"""
579
+ import networkx as nx
580
+
581
+ graph = nx.Graph()
582
+ all_concepts = set()
583
+ for datasource, concepts in concept_map.items():
584
+ graph.add_node(datasource, type=NodeType.NODE)
585
+ for concept in concepts:
586
+ graph.add_node(concept.address, type=NodeType.CONCEPT)
587
+ graph.add_edge(datasource, concept.address)
588
+ all_concepts.add(concept)
589
+ sub_graphs = list(nx.connected_components(graph))
590
+ sub_graphs = [
591
+ x for x in sub_graphs if calculate_graph_relevance(graph, x, all_concepts) > 0
592
+ ]
593
+ return len(sub_graphs), sub_graphs
594
+
595
+
596
+ def is_scalar_condition(
597
+ element: (
598
+ int
599
+ | str
600
+ | float
601
+ | date
602
+ | datetime
603
+ | list[Any]
604
+ | BuildConcept
605
+ | BuildWindowItem
606
+ | BuildFilterItem
607
+ | BuildConditional
608
+ | BuildComparison
609
+ | BuildParenthetical
610
+ | BuildFunction
611
+ | BuildAggregateWrapper
612
+ | BuildCaseWhen
613
+ | BuildCaseElse
614
+ | MagicConstants
615
+ | TraitDataType
616
+ | DataType
617
+ | MapWrapper[Any, Any]
618
+ | ArrayType
619
+ | MapType
620
+ | NumericType
621
+ | DatePart
622
+ | ListWrapper[Any]
623
+ | TupleWrapper[Any]
624
+ ),
625
+ materialized: set[str] | None = None,
626
+ ) -> bool:
627
+ if isinstance(element, PARENTHETICAL_TYPES):
628
+ return is_scalar_condition(element.content, materialized)
629
+ elif isinstance(element, SUBSELECT_TYPES):
630
+ return True
631
+ elif isinstance(element, COMPARISON_TYPES):
632
+ return is_scalar_condition(element.left, materialized) and is_scalar_condition(
633
+ element.right, materialized
634
+ )
635
+ elif isinstance(element, FUNCTION_TYPES):
636
+ if element.operator in FunctionClass.AGGREGATE_FUNCTIONS.value:
637
+ return False
638
+ return all([is_scalar_condition(x, materialized) for x in element.arguments])
639
+ elif isinstance(element, CONCEPT_TYPES):
640
+ if materialized and element.address in materialized:
641
+ return True
642
+ if element.lineage and isinstance(element.lineage, AGGREGATE_TYPES):
643
+ return is_scalar_condition(element.lineage, materialized)
644
+ if element.lineage and isinstance(element.lineage, FUNCTION_TYPES):
645
+ return is_scalar_condition(element.lineage, materialized)
646
+ return True
647
+ elif isinstance(element, AGGREGATE_TYPES):
648
+ return is_scalar_condition(element.function, materialized)
649
+ elif isinstance(element, CONDITIONAL_TYPES):
650
+ return is_scalar_condition(element.left, materialized) and is_scalar_condition(
651
+ element.right, materialized
652
+ )
653
+ elif isinstance(element, (BuildCaseWhen,)):
654
+ return is_scalar_condition(
655
+ element.comparison, materialized
656
+ ) and is_scalar_condition(element.expr, materialized)
657
+ elif isinstance(element, (BuildCaseElse,)):
658
+ return is_scalar_condition(element.expr, materialized)
659
+ elif isinstance(element, MagicConstants):
660
+ return True
661
+ return True
662
+
663
+
664
+ CONDITION_TYPES = (
665
+ BuildSubselectComparison,
666
+ BuildComparison,
667
+ BuildConditional,
668
+ BuildParenthetical,
669
+ )
670
+
671
+
672
+ def decompose_condition(
673
+ conditional: BuildConditional | BuildComparison | BuildParenthetical,
674
+ ) -> list[
675
+ BuildSubselectComparison | BuildComparison | BuildConditional | BuildParenthetical
676
+ ]:
677
+ chunks: list[
678
+ BuildSubselectComparison
679
+ | BuildComparison
680
+ | BuildConditional
681
+ | BuildParenthetical
682
+ ] = []
683
+ if not isinstance(conditional, BuildConditional):
684
+ return [conditional]
685
+ if conditional.operator == BooleanOperator.AND:
686
+ if not (
687
+ isinstance(conditional.left, CONDITION_TYPES)
688
+ and isinstance(
689
+ conditional.right,
690
+ CONDITION_TYPES,
691
+ )
692
+ ):
693
+ chunks.append(conditional)
694
+ else:
695
+ for val in [conditional.left, conditional.right]:
696
+ if isinstance(val, BuildConditional):
697
+ chunks.extend(decompose_condition(val))
698
+ else:
699
+ chunks.append(val)
700
+ else:
701
+ chunks.append(conditional)
702
+ return chunks
703
+
704
+
705
+ def find_nullable_concepts(
706
+ source_map: Dict[str, set[BuildDatasource | QueryDatasource | UnnestJoin]],
707
+ datasources: List[BuildDatasource | QueryDatasource],
708
+ joins: List[BaseJoin | UnnestJoin],
709
+ ) -> List[str]:
710
+ """give a set of datasources and joins, find the concepts
711
+ that may contain nulls in the output set
712
+ """
713
+ nullable_datasources = set()
714
+ datasource_map = {
715
+ x.identifier: x
716
+ for x in datasources
717
+ if isinstance(x, (BuildDatasource, QueryDatasource))
718
+ }
719
+ for join in joins:
720
+ is_on_nullable_condition = False
721
+ if not isinstance(join, BaseJoin):
722
+ continue
723
+ if not join.concept_pairs:
724
+ continue
725
+ for pair in join.concept_pairs:
726
+ if pair.right.address in [
727
+ y.address
728
+ for y in datasource_map[
729
+ join.right_datasource.identifier
730
+ ].nullable_concepts
731
+ ]:
732
+ is_on_nullable_condition = True
733
+ break
734
+ left_check = (
735
+ join.left_datasource.identifier
736
+ if join.left_datasource is not None
737
+ else pair.existing_datasource.identifier
738
+ )
739
+ if pair.left.address in [
740
+ y.address for y in datasource_map[left_check].nullable_concepts
741
+ ]:
742
+ is_on_nullable_condition = True
743
+ break
744
+ if is_on_nullable_condition:
745
+ nullable_datasources.add(datasource_map[join.right_datasource.identifier])
746
+ final_nullable = set()
747
+
748
+ for k, v in source_map.items():
749
+ local_nullable = [
750
+ x for x in datasources if k in [v.address for v in x.nullable_concepts]
751
+ ]
752
+ nullable_matches = [
753
+ k in [v.address for v in x.nullable_concepts]
754
+ for x in datasources
755
+ if k in [z.address for z in x.output_concepts]
756
+ ]
757
+ if all(nullable_matches) and len(nullable_matches) > 0:
758
+ final_nullable.add(k)
759
+ all_ds = set([ds for ds in local_nullable]).union(nullable_datasources)
760
+ if nullable_datasources:
761
+ if set(v).issubset(all_ds):
762
+ final_nullable.add(k)
763
+ return list(sorted(final_nullable))
764
+
765
+
766
+ def sort_select_output_processed(
767
+ cte: CTE | UnionCTE, query: SelectStatement | MultiSelectStatement | ProcessedQuery
768
+ ) -> CTE | UnionCTE:
769
+ if isinstance(query, ProcessedQuery):
770
+ targets = query.output_columns
771
+ hidden = query.hidden_columns
772
+ else:
773
+ targets = query.output_components
774
+ hidden = query.hidden_components
775
+
776
+ output_addresses = [c.address for c in targets]
777
+
778
+ mapping = {x.address: x for x in cte.output_columns}
779
+
780
+ new_output: list[BuildConcept] = []
781
+ for x in targets:
782
+ if x.address in mapping:
783
+ new_output.append(mapping[x.address])
784
+ for oc in cte.output_columns:
785
+ if x.address in oc.pseudonyms:
786
+ # create a wrapper BuildConcept to render the pseudonym under the original name
787
+ if any(x.address == y for y in mapping.keys()):
788
+ continue
789
+ new_output.append(
790
+ BuildConcept(
791
+ name=x.name,
792
+ canonical_name=x.name,
793
+ namespace=x.namespace,
794
+ pseudonyms={oc.address},
795
+ datatype=oc.datatype,
796
+ purpose=oc.purpose,
797
+ grain=oc.grain,
798
+ build_is_aggregate=oc.build_is_aggregate,
799
+ )
800
+ )
801
+ break
802
+
803
+ for oc in cte.output_columns:
804
+ # add hidden back
805
+ if oc.address not in output_addresses:
806
+ new_output.append(oc)
807
+
808
+ cte.hidden_concepts = set(
809
+ [
810
+ c.address
811
+ for c in cte.output_columns
812
+ if (c.address not in targets or c.address in hidden)
813
+ ]
814
+ )
815
+ cte.output_columns = new_output
816
+ return cte
817
+
818
+
819
+ def sort_select_output(
820
+ cte: CTE | UnionCTE, query: SelectStatement | MultiSelectStatement | ProcessedQuery
821
+ ) -> CTE | UnionCTE:
822
+
823
+ return sort_select_output_processed(cte, query)