pytrilogy 0.0.2.58__py3-none-any.whl → 0.0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.0.dist-info}/METADATA +9 -2
  2. pytrilogy-0.0.3.0.dist-info/RECORD +99 -0
  3. {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.0.dist-info}/WHEEL +1 -1
  4. trilogy/__init__.py +2 -2
  5. trilogy/core/enums.py +1 -7
  6. trilogy/core/env_processor.py +17 -5
  7. trilogy/core/environment_helpers.py +11 -25
  8. trilogy/core/exceptions.py +4 -0
  9. trilogy/core/functions.py +695 -261
  10. trilogy/core/graph_models.py +10 -10
  11. trilogy/core/internal.py +11 -2
  12. trilogy/core/models/__init__.py +0 -0
  13. trilogy/core/models/author.py +2110 -0
  14. trilogy/core/models/build.py +1845 -0
  15. trilogy/core/models/build_environment.py +151 -0
  16. trilogy/core/models/core.py +370 -0
  17. trilogy/core/models/datasource.py +297 -0
  18. trilogy/core/models/environment.py +696 -0
  19. trilogy/core/models/execute.py +931 -0
  20. trilogy/core/optimization.py +14 -16
  21. trilogy/core/optimizations/base_optimization.py +1 -1
  22. trilogy/core/optimizations/inline_constant.py +6 -6
  23. trilogy/core/optimizations/inline_datasource.py +17 -11
  24. trilogy/core/optimizations/predicate_pushdown.py +17 -16
  25. trilogy/core/processing/concept_strategies_v3.py +180 -145
  26. trilogy/core/processing/graph_utils.py +1 -1
  27. trilogy/core/processing/node_generators/basic_node.py +19 -18
  28. trilogy/core/processing/node_generators/common.py +50 -44
  29. trilogy/core/processing/node_generators/filter_node.py +26 -13
  30. trilogy/core/processing/node_generators/group_node.py +26 -21
  31. trilogy/core/processing/node_generators/group_to_node.py +11 -8
  32. trilogy/core/processing/node_generators/multiselect_node.py +60 -43
  33. trilogy/core/processing/node_generators/node_merge_node.py +76 -38
  34. trilogy/core/processing/node_generators/rowset_node.py +57 -36
  35. trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +27 -34
  36. trilogy/core/processing/node_generators/select_merge_node.py +161 -64
  37. trilogy/core/processing/node_generators/select_node.py +13 -13
  38. trilogy/core/processing/node_generators/union_node.py +12 -11
  39. trilogy/core/processing/node_generators/unnest_node.py +9 -7
  40. trilogy/core/processing/node_generators/window_node.py +19 -16
  41. trilogy/core/processing/nodes/__init__.py +21 -18
  42. trilogy/core/processing/nodes/base_node.py +82 -66
  43. trilogy/core/processing/nodes/filter_node.py +19 -13
  44. trilogy/core/processing/nodes/group_node.py +50 -35
  45. trilogy/core/processing/nodes/merge_node.py +45 -36
  46. trilogy/core/processing/nodes/select_node_v2.py +53 -39
  47. trilogy/core/processing/nodes/union_node.py +5 -7
  48. trilogy/core/processing/nodes/unnest_node.py +7 -11
  49. trilogy/core/processing/nodes/window_node.py +9 -4
  50. trilogy/core/processing/utility.py +103 -75
  51. trilogy/core/query_processor.py +65 -47
  52. trilogy/core/statements/__init__.py +0 -0
  53. trilogy/core/statements/author.py +413 -0
  54. trilogy/core/statements/build.py +0 -0
  55. trilogy/core/statements/common.py +30 -0
  56. trilogy/core/statements/execute.py +42 -0
  57. trilogy/dialect/base.py +146 -106
  58. trilogy/dialect/common.py +9 -10
  59. trilogy/dialect/duckdb.py +1 -1
  60. trilogy/dialect/enums.py +4 -2
  61. trilogy/dialect/presto.py +1 -1
  62. trilogy/dialect/sql_server.py +1 -1
  63. trilogy/executor.py +44 -32
  64. trilogy/hooks/base_hook.py +6 -4
  65. trilogy/hooks/query_debugger.py +110 -93
  66. trilogy/parser.py +1 -1
  67. trilogy/parsing/common.py +303 -64
  68. trilogy/parsing/parse_engine.py +263 -617
  69. trilogy/parsing/render.py +50 -26
  70. trilogy/scripts/trilogy.py +2 -1
  71. pytrilogy-0.0.2.58.dist-info/RECORD +0 -87
  72. trilogy/core/models.py +0 -4960
  73. {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.0.dist-info}/LICENSE.md +0 -0
  74. {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.0.dist-info}/entry_points.txt +0 -0
  75. {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,931 @@
1
+ from __future__ import annotations
2
+
3
+ from collections import defaultdict
4
+ from typing import Any, Dict, List, Optional, Set, Union
5
+
6
+ from pydantic import BaseModel, Field, ValidationInfo, computed_field, field_validator
7
+
8
+ from trilogy.constants import CONFIG, logger
9
+ from trilogy.core.constants import CONSTANT_DATASET
10
+ from trilogy.core.enums import (
11
+ Derivation,
12
+ FunctionType,
13
+ Granularity,
14
+ JoinType,
15
+ Modifier,
16
+ Purpose,
17
+ SourceType,
18
+ )
19
+ from trilogy.core.models.build import (
20
+ BuildComparison,
21
+ BuildConcept,
22
+ BuildConditional,
23
+ BuildDatasource,
24
+ BuildFunction,
25
+ BuildGrain,
26
+ BuildOrderBy,
27
+ BuildParenthetical,
28
+ BuildRowsetItem,
29
+ LooseBuildConceptList,
30
+ )
31
+ from trilogy.core.models.datasource import Address
32
+ from trilogy.utility import unique
33
+
34
+ LOGGER_PREFIX = "[MODELS_EXECUTE]"
35
+
36
+ DATASOURCE_TYPES = (BuildDatasource, BuildDatasource)
37
+
38
+
39
+ class CTE(BaseModel):
40
+ name: str
41
+ source: "QueryDatasource"
42
+ output_columns: List[BuildConcept]
43
+ source_map: Dict[str, list[str]]
44
+ grain: BuildGrain
45
+ base: bool = False
46
+ group_to_grain: bool = False
47
+ existence_source_map: Dict[str, list[str]] = Field(default_factory=dict)
48
+ parent_ctes: List[Union["CTE", "UnionCTE"]] = Field(default_factory=list)
49
+ joins: List[Union["Join", "InstantiatedUnnestJoin"]] = Field(default_factory=list)
50
+ condition: Optional[
51
+ Union[BuildComparison, BuildConditional, BuildParenthetical]
52
+ ] = None
53
+ partial_concepts: List[BuildConcept] = Field(default_factory=list)
54
+ nullable_concepts: List[BuildConcept] = Field(default_factory=list)
55
+ join_derived_concepts: List[BuildConcept] = Field(default_factory=list)
56
+ hidden_concepts: set[str] = Field(default_factory=set)
57
+ order_by: Optional[BuildOrderBy] = None
58
+ limit: Optional[int] = None
59
+ base_name_override: Optional[str] = None
60
+ base_alias_override: Optional[str] = None
61
+
62
+ @property
63
+ def identifier(self):
64
+ return self.name
65
+
66
+ @property
67
+ def safe_identifier(self):
68
+ return self.name
69
+
70
+ @computed_field # type: ignore
71
+ @property
72
+ def output_lcl(self) -> LooseBuildConceptList:
73
+ return LooseBuildConceptList(concepts=self.output_columns)
74
+
75
+ @field_validator("output_columns")
76
+ def validate_output_columns(cls, v):
77
+ return unique(v, "address")
78
+
79
+ def inline_constant(self, concept: BuildConcept):
80
+ if not concept.derivation == Derivation.CONSTANT:
81
+ return False
82
+ if not isinstance(concept.lineage, BuildFunction):
83
+ return False
84
+ if not concept.lineage.operator == FunctionType.CONSTANT:
85
+ return False
86
+ # remove the constant
87
+ removed: set = set()
88
+ if concept.address in self.source_map:
89
+ removed = removed.union(self.source_map[concept.address])
90
+ del self.source_map[concept.address]
91
+
92
+ if self.condition:
93
+ self.condition = self.condition.inline_constant(concept)
94
+ # if we've entirely removed the need to join to someplace to get the concept
95
+ # drop the join as well.
96
+ for removed_cte in removed:
97
+ still_required = any(
98
+ [
99
+ removed_cte in x
100
+ for x in self.source_map.values()
101
+ or self.existence_source_map.values()
102
+ ]
103
+ )
104
+ if not still_required:
105
+ self.joins = [
106
+ join
107
+ for join in self.joins
108
+ if not isinstance(join, Join)
109
+ or (
110
+ isinstance(join, Join)
111
+ and (
112
+ join.right_cte.name != removed_cte
113
+ and any(
114
+ [
115
+ x.cte.name != removed_cte
116
+ for x in (join.joinkey_pairs or [])
117
+ ]
118
+ )
119
+ )
120
+ )
121
+ ]
122
+ for join in self.joins:
123
+ if isinstance(join, UnnestJoin) and concept in join.concepts:
124
+ join.rendering_required = False
125
+
126
+ self.parent_ctes = [
127
+ x for x in self.parent_ctes if x.name != removed_cte
128
+ ]
129
+ if removed_cte == self.base_name_override:
130
+ candidates = [x.name for x in self.parent_ctes]
131
+ self.base_name_override = candidates[0] if candidates else None
132
+ self.base_alias_override = candidates[0] if candidates else None
133
+ return True
134
+
135
+ @property
136
+ def comment(self) -> str:
137
+ base = f"Target: {str(self.grain)}. Group: {self.group_to_grain}"
138
+ base += f" Source: {self.source.source_type}."
139
+ if self.parent_ctes:
140
+ base += f" References: {', '.join([x.name for x in self.parent_ctes])}."
141
+ if self.joins:
142
+ base += f"\n-- Joins: {', '.join([str(x) for x in self.joins])}."
143
+ if self.partial_concepts:
144
+ base += (
145
+ f"\n-- Partials: {', '.join([str(x) for x in self.partial_concepts])}."
146
+ )
147
+ base += f"\n-- Source Map: {self.source_map}."
148
+ base += f"\n-- Output: {', '.join([str(x) for x in self.output_columns])}."
149
+ if self.source.input_concepts:
150
+ base += f"\n-- Inputs: {', '.join([str(x) for x in self.source.input_concepts])}."
151
+ if self.hidden_concepts:
152
+ base += f"\n-- Hidden: {', '.join([str(x) for x in self.hidden_concepts])}."
153
+ if self.nullable_concepts:
154
+ base += (
155
+ f"\n-- Nullable: {', '.join([str(x) for x in self.nullable_concepts])}."
156
+ )
157
+
158
+ return base
159
+
160
+ def inline_parent_datasource(
161
+ self, parent: "CTE", force_group: bool = False
162
+ ) -> bool:
163
+ qds_being_inlined = parent.source
164
+ ds_being_inlined = qds_being_inlined.datasources[0]
165
+ if not isinstance(ds_being_inlined, DATASOURCE_TYPES):
166
+ return False
167
+ if any(
168
+ [
169
+ x.safe_identifier == ds_being_inlined.safe_identifier
170
+ for x in self.source.datasources
171
+ ]
172
+ ):
173
+ return False
174
+
175
+ self.source.datasources = [
176
+ ds_being_inlined,
177
+ *[
178
+ x
179
+ for x in self.source.datasources
180
+ if x.safe_identifier != qds_being_inlined.safe_identifier
181
+ ],
182
+ ]
183
+ # need to identify this before updating joins
184
+ if self.base_name == parent.name:
185
+ self.base_name_override = ds_being_inlined.safe_location
186
+ self.base_alias_override = ds_being_inlined.safe_identifier
187
+
188
+ for join in self.joins:
189
+ if isinstance(join, InstantiatedUnnestJoin):
190
+ continue
191
+ if (
192
+ join.left_cte
193
+ and join.left_cte.safe_identifier == parent.safe_identifier
194
+ ):
195
+ join.inline_cte(parent)
196
+ if join.joinkey_pairs:
197
+ for pair in join.joinkey_pairs:
198
+ if pair.cte and pair.cte.safe_identifier == parent.safe_identifier:
199
+ join.inline_cte(parent)
200
+ if join.right_cte.safe_identifier == parent.safe_identifier:
201
+ join.inline_cte(parent)
202
+ for k, v in self.source_map.items():
203
+ if isinstance(v, list):
204
+ self.source_map[k] = [
205
+ (
206
+ ds_being_inlined.safe_identifier
207
+ if x == parent.safe_identifier
208
+ else x
209
+ )
210
+ for x in v
211
+ ]
212
+ elif v == parent.safe_identifier:
213
+ self.source_map[k] = [ds_being_inlined.safe_identifier]
214
+
215
+ # zip in any required values for lookups
216
+ for k in ds_being_inlined.output_lcl.addresses:
217
+ if k in self.source_map and self.source_map[k]:
218
+ continue
219
+ self.source_map[k] = [ds_being_inlined.safe_identifier]
220
+ self.parent_ctes = [
221
+ x for x in self.parent_ctes if x.safe_identifier != parent.safe_identifier
222
+ ]
223
+ if force_group:
224
+ self.group_to_grain = True
225
+ return True
226
+
227
+ def __add__(self, other: "CTE" | "UnionCTE"):
228
+ if isinstance(other, UnionCTE):
229
+ raise ValueError("cannot merge CTE and union CTE")
230
+ logger.info('Merging two copies of CTE "%s"', self.name)
231
+ if not self.grain == other.grain:
232
+ error = (
233
+ "Attempting to merge two ctes of different grains"
234
+ f" {self.name} {other.name} grains {self.grain} {other.grain}| {self.group_to_grain} {other.group_to_grain}| {self.output_lcl} {other.output_lcl}"
235
+ )
236
+ raise ValueError(error)
237
+ if not self.condition == other.condition:
238
+ error = (
239
+ "Attempting to merge two ctes with different conditions"
240
+ f" {self.name} {other.name} conditions {self.condition} {other.condition}"
241
+ )
242
+ raise ValueError(error)
243
+ mutually_hidden = set()
244
+ for concept in self.hidden_concepts:
245
+ if concept in other.hidden_concepts:
246
+ mutually_hidden.add(concept)
247
+ self.partial_concepts = unique(
248
+ self.partial_concepts + other.partial_concepts, "address"
249
+ )
250
+ self.parent_ctes = merge_ctes(self.parent_ctes + other.parent_ctes)
251
+
252
+ self.source_map = {**self.source_map, **other.source_map}
253
+
254
+ self.output_columns = unique(
255
+ self.output_columns + other.output_columns, "address"
256
+ )
257
+ self.joins = unique(self.joins + other.joins, "unique_id")
258
+ self.partial_concepts = unique(
259
+ self.partial_concepts + other.partial_concepts, "address"
260
+ )
261
+ self.join_derived_concepts = unique(
262
+ self.join_derived_concepts + other.join_derived_concepts, "address"
263
+ )
264
+
265
+ self.source.source_map = {**self.source.source_map, **other.source.source_map}
266
+ self.source.output_concepts = unique(
267
+ self.source.output_concepts + other.source.output_concepts, "address"
268
+ )
269
+ self.nullable_concepts = unique(
270
+ self.nullable_concepts + other.nullable_concepts, "address"
271
+ )
272
+ self.hidden_concepts = mutually_hidden
273
+ self.existence_source_map = {
274
+ **self.existence_source_map,
275
+ **other.existence_source_map,
276
+ }
277
+ return self
278
+
279
+ @property
280
+ def relevant_base_ctes(self):
281
+ return self.parent_ctes
282
+
283
+ @property
284
+ def is_root_datasource(self) -> bool:
285
+ return (
286
+ len(self.source.datasources) == 1
287
+ and isinstance(self.source.datasources[0], DATASOURCE_TYPES)
288
+ and not self.source.datasources[0].name == CONSTANT_DATASET
289
+ )
290
+
291
+ @property
292
+ def base_name(self) -> str:
293
+ if self.base_name_override:
294
+ return self.base_name_override
295
+ # if this cte selects from a single datasource, select right from it
296
+ if self.is_root_datasource:
297
+ return self.source.datasources[0].safe_location
298
+
299
+ # if we have multiple joined CTEs, pick the base
300
+ # as the root
301
+ elif len(self.source.datasources) == 1 and len(self.parent_ctes) == 1:
302
+ return self.parent_ctes[0].name
303
+ elif self.relevant_base_ctes:
304
+ return self.relevant_base_ctes[0].name
305
+ return self.source.name
306
+
307
+ @property
308
+ def quote_address(self) -> bool:
309
+ if self.is_root_datasource:
310
+ candidate = self.source.datasources[0]
311
+ if isinstance(candidate, DATASOURCE_TYPES) and isinstance(
312
+ candidate.address, Address
313
+ ):
314
+ return candidate.address.quoted
315
+ return False
316
+
317
+ @property
318
+ def base_alias(self) -> str:
319
+ if self.base_alias_override:
320
+ return self.base_alias_override
321
+ if self.is_root_datasource:
322
+ return self.source.datasources[0].identifier
323
+ elif self.relevant_base_ctes:
324
+ return self.relevant_base_ctes[0].name
325
+ elif self.parent_ctes:
326
+ return self.parent_ctes[0].name
327
+ return self.name
328
+
329
+ def get_concept(self, address: str) -> BuildConcept | None:
330
+ for cte in self.parent_ctes:
331
+ if address in cte.output_columns:
332
+ match = [x for x in cte.output_columns if x.address == address].pop()
333
+ if match:
334
+ return match
335
+
336
+ for array in [self.source.input_concepts, self.source.output_concepts]:
337
+ match_list = [x for x in array if x.address == address]
338
+ if match_list:
339
+ return match_list.pop()
340
+ match_list = [x for x in self.output_columns if x.address == address]
341
+ if match_list:
342
+ return match_list.pop()
343
+ return None
344
+
345
+ def get_alias(self, concept: BuildConcept, source: str | None = None) -> str:
346
+ for cte in self.parent_ctes:
347
+ if concept.address in cte.output_columns:
348
+ if source and source != cte.name:
349
+ continue
350
+ return concept.safe_address
351
+
352
+ try:
353
+ source = self.source.get_alias(concept, source=source)
354
+
355
+ if not source:
356
+ raise ValueError("No source found")
357
+ return source
358
+ except ValueError as e:
359
+ return f"INVALID_ALIAS: {str(e)}"
360
+
361
+ @property
362
+ def group_concepts(self) -> List[BuildConcept]:
363
+ def check_is_not_in_group(c: BuildConcept):
364
+ if len(self.source_map.get(c.address, [])) > 0:
365
+ return False
366
+ if c.derivation == Derivation.ROWSET:
367
+ assert isinstance(c.lineage, BuildRowsetItem)
368
+ return check_is_not_in_group(c.lineage.content)
369
+ if c.derivation == Derivation.CONSTANT:
370
+ return True
371
+ if c.purpose == Purpose.METRIC:
372
+ return True
373
+
374
+ if c.derivation == Derivation.BASIC and c.lineage:
375
+ if all([check_is_not_in_group(x) for x in c.lineage.concept_arguments]):
376
+ return True
377
+ if (
378
+ isinstance(c.lineage, BuildFunction)
379
+ and c.lineage.operator == FunctionType.GROUP
380
+ ):
381
+ return check_is_not_in_group(c.lineage.concept_arguments[0])
382
+ return False
383
+
384
+ return (
385
+ unique(
386
+ [c for c in self.output_columns if not check_is_not_in_group(c)],
387
+ "address",
388
+ )
389
+ if self.group_to_grain
390
+ else []
391
+ )
392
+
393
+ @property
394
+ def render_from_clause(self) -> bool:
395
+ if (
396
+ all([c.derivation == Derivation.CONSTANT for c in self.output_columns])
397
+ and not self.parent_ctes
398
+ and not self.group_to_grain
399
+ ):
400
+ return False
401
+ # if we don't need to source any concepts from anywhere
402
+ # render without from
403
+ # most likely to happen from inlining constants
404
+ if not any([v for v in self.source_map.values()]):
405
+ return False
406
+ if (
407
+ len(self.source.datasources) == 1
408
+ and self.source.datasources[0].name == CONSTANT_DATASET
409
+ ):
410
+ return False
411
+ return True
412
+
413
+ @property
414
+ def sourced_concepts(self) -> List[BuildConcept]:
415
+ return [c for c in self.output_columns if c.address in self.source_map]
416
+
417
+
418
+ class ConceptPair(BaseModel):
419
+ left: BuildConcept
420
+ right: BuildConcept
421
+ existing_datasource: Union[BuildDatasource, "QueryDatasource"]
422
+ modifiers: List[Modifier] = Field(default_factory=list)
423
+
424
+ @property
425
+ def is_partial(self):
426
+ return Modifier.PARTIAL in self.modifiers
427
+
428
+ @property
429
+ def is_nullable(self):
430
+ return Modifier.NULLABLE in self.modifiers
431
+
432
+
433
+ class CTEConceptPair(ConceptPair):
434
+ cte: CTE
435
+
436
+
437
+ class InstantiatedUnnestJoin(BaseModel):
438
+ concept_to_unnest: BuildConcept
439
+ alias: str = "unnest"
440
+
441
+
442
+ class UnnestJoin(BaseModel):
443
+ concepts: list[BuildConcept]
444
+ parent: BuildFunction
445
+ alias: str = "unnest"
446
+ rendering_required: bool = True
447
+
448
+ def __hash__(self):
449
+ return self.safe_identifier.__hash__()
450
+
451
+ @property
452
+ def safe_identifier(self) -> str:
453
+ return self.alias + "".join([str(s.address) for s in self.concepts])
454
+
455
+
456
+ class BaseJoin(BaseModel):
457
+ right_datasource: Union[BuildDatasource, "QueryDatasource"]
458
+ join_type: JoinType
459
+ concepts: Optional[List[BuildConcept]] = None
460
+ left_datasource: Optional[Union[BuildDatasource, "QueryDatasource"]] = None
461
+ concept_pairs: list[ConceptPair] | None = None
462
+
463
+ def __init__(self, **data: Any):
464
+ super().__init__(**data)
465
+ if (
466
+ self.left_datasource
467
+ and self.left_datasource.identifier == self.right_datasource.identifier
468
+ ):
469
+ raise SyntaxError(
470
+ f"Cannot join a dataself to itself, joining {self.left_datasource} and"
471
+ f" {self.right_datasource}"
472
+ )
473
+ final_concepts = []
474
+
475
+ # if we have a list of concept pairs
476
+ if self.concept_pairs:
477
+ return
478
+ if self.concepts == []:
479
+ return
480
+ assert self.left_datasource and self.right_datasource
481
+ for concept in self.concepts or []:
482
+ include = True
483
+ for ds in [self.left_datasource, self.right_datasource]:
484
+ synonyms = []
485
+ for c in ds.output_concepts:
486
+ synonyms += list(c.pseudonyms)
487
+ if (
488
+ concept.address not in [c.address for c in ds.output_concepts]
489
+ and concept.address not in synonyms
490
+ ):
491
+ raise SyntaxError(
492
+ f"Invalid join, missing {concept} on {ds.name}, have"
493
+ f" {[c.address for c in ds.output_concepts]}"
494
+ )
495
+ if include:
496
+ final_concepts.append(concept)
497
+ if not final_concepts and self.concepts:
498
+ # if one datasource only has constants
499
+ # we can join on 1=1
500
+ for ds in [self.left_datasource, self.right_datasource]:
501
+ # single rows
502
+ if all(
503
+ [
504
+ c.granularity == Granularity.SINGLE_ROW
505
+ for c in ds.output_concepts
506
+ ]
507
+ ):
508
+ self.concepts = []
509
+ return
510
+ # if everything is at abstract grain, we can skip joins
511
+ if all([c.grain.abstract for c in ds.output_concepts]):
512
+ self.concepts = []
513
+ return
514
+
515
+ left_keys = [c.address for c in self.left_datasource.output_concepts]
516
+ right_keys = [c.address for c in self.right_datasource.output_concepts]
517
+ match_concepts = [c.address for c in self.concepts]
518
+ raise SyntaxError(
519
+ "No mutual join keys found between"
520
+ f" {self.left_datasource.identifier} and"
521
+ f" {self.right_datasource.identifier}, left_keys {left_keys},"
522
+ f" right_keys {right_keys},"
523
+ f" provided join concepts {match_concepts}"
524
+ )
525
+ self.concepts = final_concepts
526
+
527
+ @property
528
+ def unique_id(self) -> str:
529
+ return str(self)
530
+
531
+ @property
532
+ def input_concepts(self) -> List[BuildConcept]:
533
+ base = []
534
+ if self.concept_pairs:
535
+ for pair in self.concept_pairs:
536
+ base += [pair.left, pair.right]
537
+ elif self.concepts:
538
+ base += self.concepts
539
+ return base
540
+
541
+ def __str__(self):
542
+ if self.concept_pairs:
543
+ return (
544
+ f"{self.join_type.value} {self.right_datasource.name} on"
545
+ f" {','.join([str(k.existing_datasource.name) + '.'+ str(k.left)+'='+str(k.right) for k in self.concept_pairs])}"
546
+ )
547
+ return (
548
+ f"{self.join_type.value} {self.right_datasource.name} on"
549
+ f" {','.join([str(k) for k in self.concepts])}"
550
+ )
551
+
552
+
553
+ class QueryDatasource(BaseModel):
554
+ input_concepts: List[BuildConcept]
555
+ output_concepts: List[BuildConcept]
556
+ datasources: List[Union[BuildDatasource, "QueryDatasource"]]
557
+ source_map: Dict[str, Set[Union[BuildDatasource, "QueryDatasource", "UnnestJoin"]]]
558
+
559
+ grain: BuildGrain
560
+ joins: List[BaseJoin | UnnestJoin]
561
+ limit: Optional[int] = None
562
+ condition: Optional[
563
+ Union[BuildConditional, BuildComparison, BuildParenthetical]
564
+ ] = Field(default=None)
565
+ source_type: SourceType = SourceType.SELECT
566
+ partial_concepts: List[BuildConcept] = Field(default_factory=list)
567
+ hidden_concepts: set[str] = Field(default_factory=set)
568
+ nullable_concepts: List[BuildConcept] = Field(default_factory=list)
569
+ join_derived_concepts: List[BuildConcept] = Field(default_factory=list)
570
+ force_group: bool | None = None
571
+ existence_source_map: Dict[str, Set[Union[BuildDatasource, "QueryDatasource"]]] = (
572
+ Field(default_factory=dict)
573
+ )
574
+ ordering: BuildOrderBy | None = None
575
+
576
+ def __repr__(self):
577
+ return f"{self.identifier}@<{self.grain}>"
578
+
579
+ @property
580
+ def safe_identifier(self):
581
+ return self.identifier.replace(".", "_")
582
+
583
+ @property
584
+ def full_concepts(self) -> List[BuildConcept]:
585
+ return [
586
+ c
587
+ for c in self.output_concepts
588
+ if c.address not in [z.address for z in self.partial_concepts]
589
+ ]
590
+
591
+ @field_validator("joins")
592
+ @classmethod
593
+ def validate_joins(cls, v):
594
+ unique_pairs = set()
595
+ for join in v:
596
+ if not isinstance(join, BaseJoin):
597
+ continue
598
+ pairing = str(join)
599
+ if pairing in unique_pairs:
600
+ raise SyntaxError(f"Duplicate join {str(join)}")
601
+ unique_pairs.add(pairing)
602
+ return v
603
+
604
+ @field_validator("input_concepts")
605
+ @classmethod
606
+ def validate_inputs(cls, v):
607
+ return unique(v, "address")
608
+
609
+ @field_validator("output_concepts")
610
+ @classmethod
611
+ def validate_outputs(cls, v):
612
+ return unique(v, "address")
613
+
614
+ @field_validator("source_map")
615
+ @classmethod
616
+ def validate_source_map(cls, v: dict, info: ValidationInfo):
617
+ values = info.data
618
+ for key in ("input_concepts", "output_concepts"):
619
+ if not values.get(key):
620
+ continue
621
+ concept: BuildConcept
622
+ for concept in values[key]:
623
+ if (
624
+ concept.address not in v
625
+ and not any(x in v for x in concept.pseudonyms)
626
+ and CONFIG.validate_missing
627
+ ):
628
+ raise SyntaxError(
629
+ f"On query datasource missing source map for {concept.address} on {key}, have {v}"
630
+ )
631
+ return v
632
+
633
+ def __str__(self):
634
+ return self.__repr__()
635
+
636
+ def __hash__(self):
637
+ return (self.identifier).__hash__()
638
+
639
+ @property
640
+ def concepts(self):
641
+ return self.output_concepts
642
+
643
+ @property
644
+ def name(self):
645
+ return self.identifier
646
+
647
+ @property
648
+ def group_required(self) -> bool:
649
+ if self.force_group is True:
650
+ return True
651
+ if self.force_group is False:
652
+ return False
653
+ if self.source_type:
654
+ if self.source_type in [
655
+ SourceType.FILTER,
656
+ ]:
657
+ return False
658
+ elif self.source_type in [
659
+ SourceType.GROUP,
660
+ ]:
661
+ return True
662
+ return False
663
+
664
+ def __add__(self, other) -> "QueryDatasource":
665
+ # these are syntax errors to avoid being caught by current
666
+ if not isinstance(other, QueryDatasource):
667
+ raise SyntaxError("Can only merge two query datasources")
668
+ if not other.grain == self.grain:
669
+ raise SyntaxError(
670
+ "Can only merge two query datasources with identical grain"
671
+ )
672
+ if not self.group_required == other.group_required:
673
+ raise SyntaxError(
674
+ "can only merge two datasources if the group required flag is the same"
675
+ )
676
+ if not self.join_derived_concepts == other.join_derived_concepts:
677
+ raise SyntaxError(
678
+ "can only merge two datasources if the join derived concepts are the same"
679
+ )
680
+ if not self.force_group == other.force_group:
681
+ raise SyntaxError(
682
+ "can only merge two datasources if the force_group flag is the same"
683
+ )
684
+ logger.debug(
685
+ f"{LOGGER_PREFIX} merging {self.name} with"
686
+ f" {[c.address for c in self.output_concepts]} concepts and"
687
+ f" {other.name} with {[c.address for c in other.output_concepts]} concepts"
688
+ )
689
+
690
+ merged_datasources: dict[str, Union[BuildDatasource, "QueryDatasource"]] = {}
691
+
692
+ for ds in [*self.datasources, *other.datasources]:
693
+ if ds.safe_identifier in merged_datasources:
694
+ merged_datasources[ds.safe_identifier] = (
695
+ merged_datasources[ds.safe_identifier] + ds
696
+ )
697
+ else:
698
+ merged_datasources[ds.safe_identifier] = ds
699
+
700
+ final_source_map: defaultdict[
701
+ str, Set[Union[BuildDatasource, QueryDatasource, UnnestJoin]]
702
+ ] = defaultdict(set)
703
+
704
+ # add our sources
705
+ for key in self.source_map:
706
+ final_source_map[key] = self.source_map[key].union(
707
+ other.source_map.get(key, set())
708
+ )
709
+ # add their sources
710
+ for key in other.source_map:
711
+ if key not in final_source_map:
712
+ final_source_map[key] = other.source_map[key]
713
+
714
+ # if a ds was merged (to combine columns), we need to update the source map
715
+ # to use the merged item
716
+ for k, v in final_source_map.items():
717
+ final_source_map[k] = set(
718
+ merged_datasources.get(x.safe_identifier, x) for x in list(v)
719
+ )
720
+ self_hidden: set[str] = self.hidden_concepts or set()
721
+ other_hidden: set[str] = other.hidden_concepts or set()
722
+ # hidden is the minimum overlapping set
723
+ hidden = self_hidden.intersection(other_hidden)
724
+ qds = QueryDatasource(
725
+ input_concepts=unique(
726
+ self.input_concepts + other.input_concepts, "address"
727
+ ),
728
+ output_concepts=unique(
729
+ self.output_concepts + other.output_concepts, "address"
730
+ ),
731
+ source_map=final_source_map,
732
+ datasources=list(merged_datasources.values()),
733
+ grain=self.grain,
734
+ joins=unique(self.joins + other.joins, "unique_id"),
735
+ condition=(
736
+ self.condition + other.condition
737
+ if self.condition and other.condition
738
+ else self.condition or other.condition
739
+ ),
740
+ source_type=self.source_type,
741
+ partial_concepts=unique(
742
+ self.partial_concepts + other.partial_concepts, "address"
743
+ ),
744
+ join_derived_concepts=self.join_derived_concepts,
745
+ force_group=self.force_group,
746
+ hidden_concepts=hidden,
747
+ ordering=self.ordering,
748
+ )
749
+
750
+ return qds
751
+
752
+ @property
753
+ def identifier(self) -> str:
754
+ filters = abs(hash(str(self.condition))) if self.condition else ""
755
+ grain = "_".join([str(c).replace(".", "_") for c in self.grain.components])
756
+ return (
757
+ "_join_".join([d.identifier for d in self.datasources])
758
+ + (f"_at_{grain}" if grain else "_at_abstract")
759
+ + (f"_filtered_by_{filters}" if filters else "")
760
+ )
761
+
762
+ def get_alias(
763
+ self,
764
+ concept: BuildConcept,
765
+ use_raw_name: bool = False,
766
+ force_alias: bool = False,
767
+ source: str | None = None,
768
+ ):
769
+ for x in self.datasources:
770
+ # query datasources should be referenced by their alias, always
771
+ force_alias = isinstance(x, QueryDatasource)
772
+ #
773
+ use_raw_name = isinstance(x, DATASOURCE_TYPES) and not force_alias
774
+ if source and x.safe_identifier != source:
775
+ continue
776
+ try:
777
+ return x.get_alias(
778
+ concept.with_grain(self.grain),
779
+ use_raw_name,
780
+ force_alias=force_alias,
781
+ )
782
+ except ValueError as e:
783
+ from trilogy.constants import logger
784
+
785
+ logger.debug(e)
786
+ continue
787
+ existing = [c.with_grain(self.grain) for c in self.output_concepts]
788
+ if concept in existing:
789
+ return concept.name
790
+
791
+ existing_str = [str(c) for c in existing]
792
+ datasources = [ds.identifier for ds in self.datasources]
793
+ raise ValueError(
794
+ f"{LOGGER_PREFIX} Concept {str(concept)} not found on {self.identifier};"
795
+ f" have {existing_str} from {datasources}."
796
+ )
797
+
798
+ @property
799
+ def safe_location(self):
800
+ return self.identifier
801
+
802
+
803
+ class UnionCTE(BaseModel):
804
+ name: str
805
+ source: QueryDatasource
806
+ parent_ctes: list[CTE | UnionCTE]
807
+ internal_ctes: list[CTE | UnionCTE]
808
+ output_columns: List[BuildConcept]
809
+ grain: BuildGrain
810
+ operator: str = "UNION ALL"
811
+ order_by: Optional[BuildOrderBy] = None
812
+ limit: Optional[int] = None
813
+ hidden_concepts: set[str] = Field(default_factory=set)
814
+ partial_concepts: list[BuildConcept] = Field(default_factory=list)
815
+ existence_source_map: Dict[str, list[str]] = Field(default_factory=dict)
816
+
817
+ @computed_field # type: ignore
818
+ @property
819
+ def output_lcl(self) -> LooseBuildConceptList:
820
+ return LooseBuildConceptList(concepts=self.output_columns)
821
+
822
+ def get_alias(self, concept: BuildConcept, source: str | None = None) -> str:
823
+ for cte in self.parent_ctes:
824
+ if concept.address in cte.output_columns:
825
+ if source and source != cte.name:
826
+ continue
827
+ return concept.safe_address
828
+ return "INVALID_ALIAS"
829
+
830
+ def get_concept(self, address: str) -> BuildConcept | None:
831
+ for cte in self.internal_ctes:
832
+ if address in cte.output_columns:
833
+ match = [x for x in cte.output_columns if x.address == address].pop()
834
+ return match
835
+
836
+ match_list = [x for x in self.output_columns if x.address == address]
837
+ if match_list:
838
+ return match_list.pop()
839
+ return None
840
+
841
+ @property
842
+ def source_map(self):
843
+ return {x.address: [] for x in self.output_columns}
844
+
845
+ @property
846
+ def condition(self):
847
+ return None
848
+
849
+ @condition.setter
850
+ def condition(self, value):
851
+ raise NotImplementedError
852
+
853
+ @property
854
+ def safe_identifier(self):
855
+ return self.name
856
+
857
+ @property
858
+ def group_to_grain(self) -> bool:
859
+ return False
860
+
861
+ def __add__(self, other):
862
+ if not isinstance(other, UnionCTE) or not other.name == self.name:
863
+ raise SyntaxError("Cannot merge union CTEs")
864
+ return self
865
+
866
+
867
+ class Join(BaseModel):
868
+ right_cte: CTE
869
+ jointype: JoinType
870
+ left_cte: CTE | None = None
871
+ joinkey_pairs: List[CTEConceptPair] | None = None
872
+ inlined_ctes: set[str] = Field(default_factory=set)
873
+
874
+ def inline_cte(self, cte: CTE):
875
+ self.inlined_ctes.add(cte.name)
876
+
877
+ def get_name(self, cte: CTE):
878
+ if cte.identifier in self.inlined_ctes:
879
+ return cte.source.datasources[0].safe_identifier
880
+ return cte.safe_identifier
881
+
882
+ @property
883
+ def right_name(self) -> str:
884
+ if self.right_cte.identifier in self.inlined_ctes:
885
+ return self.right_cte.source.datasources[0].safe_identifier
886
+ return self.right_cte.safe_identifier
887
+
888
+ @property
889
+ def right_ref(self) -> str:
890
+ if self.right_cte.identifier in self.inlined_ctes:
891
+ return f"{self.right_cte.source.datasources[0].safe_location} as {self.right_cte.source.datasources[0].safe_identifier}"
892
+ return self.right_cte.safe_identifier
893
+
894
+ @property
895
+ def unique_id(self) -> str:
896
+ return str(self)
897
+
898
+ def __str__(self):
899
+ if self.joinkey_pairs:
900
+ return (
901
+ f"{self.jointype.value} join"
902
+ f" {self.right_name} on"
903
+ f" {','.join([k.cte.name + '.'+str(k.left.address)+'='+str(k.right.address) for k in self.joinkey_pairs])}"
904
+ )
905
+ elif self.left_cte:
906
+ return (
907
+ f"{self.jointype.value} JOIN {self.left_cte.name} and"
908
+ f" {self.right_name} on {','.join([str(k) for k in self.joinkey_pairs])}"
909
+ )
910
+ return f"{self.jointype.value} JOIN {self.right_name} on {','.join([str(k) for k in self.joinkey_pairs])}"
911
+
912
+
913
+ def merge_ctes(ctes: List[CTE | UnionCTE]) -> List[CTE | UnionCTE]:
914
+ final_ctes_dict: Dict[str, CTE | UnionCTE] = {}
915
+ # merge CTEs
916
+ for cte in ctes:
917
+ if cte.name not in final_ctes_dict:
918
+ final_ctes_dict[cte.name] = cte
919
+ else:
920
+ final_ctes_dict[cte.name] = final_ctes_dict[cte.name] + cte
921
+
922
+ final_ctes = list(final_ctes_dict.values())
923
+ return final_ctes
924
+
925
+
926
+ class CompiledCTE(BaseModel):
927
+ name: str
928
+ statement: str
929
+
930
+
931
+ UnionCTE.model_rebuild()