pytrilogy 0.3.138__cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (182) hide show
  1. LICENSE.md +19 -0
  2. _preql_import_resolver/__init__.py +5 -0
  3. _preql_import_resolver/_preql_import_resolver.cpython-311-x86_64-linux-gnu.so +0 -0
  4. pytrilogy-0.3.138.dist-info/METADATA +525 -0
  5. pytrilogy-0.3.138.dist-info/RECORD +182 -0
  6. pytrilogy-0.3.138.dist-info/WHEEL +5 -0
  7. pytrilogy-0.3.138.dist-info/entry_points.txt +2 -0
  8. pytrilogy-0.3.138.dist-info/licenses/LICENSE.md +19 -0
  9. trilogy/__init__.py +9 -0
  10. trilogy/ai/README.md +10 -0
  11. trilogy/ai/__init__.py +19 -0
  12. trilogy/ai/constants.py +92 -0
  13. trilogy/ai/conversation.py +107 -0
  14. trilogy/ai/enums.py +7 -0
  15. trilogy/ai/execute.py +50 -0
  16. trilogy/ai/models.py +34 -0
  17. trilogy/ai/prompts.py +87 -0
  18. trilogy/ai/providers/__init__.py +0 -0
  19. trilogy/ai/providers/anthropic.py +106 -0
  20. trilogy/ai/providers/base.py +24 -0
  21. trilogy/ai/providers/google.py +146 -0
  22. trilogy/ai/providers/openai.py +89 -0
  23. trilogy/ai/providers/utils.py +68 -0
  24. trilogy/authoring/README.md +3 -0
  25. trilogy/authoring/__init__.py +143 -0
  26. trilogy/constants.py +113 -0
  27. trilogy/core/README.md +52 -0
  28. trilogy/core/__init__.py +0 -0
  29. trilogy/core/constants.py +6 -0
  30. trilogy/core/enums.py +443 -0
  31. trilogy/core/env_processor.py +120 -0
  32. trilogy/core/environment_helpers.py +320 -0
  33. trilogy/core/ergonomics.py +193 -0
  34. trilogy/core/exceptions.py +123 -0
  35. trilogy/core/functions.py +1227 -0
  36. trilogy/core/graph_models.py +139 -0
  37. trilogy/core/internal.py +85 -0
  38. trilogy/core/models/__init__.py +0 -0
  39. trilogy/core/models/author.py +2672 -0
  40. trilogy/core/models/build.py +2521 -0
  41. trilogy/core/models/build_environment.py +180 -0
  42. trilogy/core/models/core.py +494 -0
  43. trilogy/core/models/datasource.py +322 -0
  44. trilogy/core/models/environment.py +748 -0
  45. trilogy/core/models/execute.py +1177 -0
  46. trilogy/core/optimization.py +251 -0
  47. trilogy/core/optimizations/__init__.py +12 -0
  48. trilogy/core/optimizations/base_optimization.py +17 -0
  49. trilogy/core/optimizations/hide_unused_concept.py +47 -0
  50. trilogy/core/optimizations/inline_datasource.py +102 -0
  51. trilogy/core/optimizations/predicate_pushdown.py +245 -0
  52. trilogy/core/processing/README.md +94 -0
  53. trilogy/core/processing/READMEv2.md +121 -0
  54. trilogy/core/processing/VIRTUAL_UNNEST.md +30 -0
  55. trilogy/core/processing/__init__.py +0 -0
  56. trilogy/core/processing/concept_strategies_v3.py +508 -0
  57. trilogy/core/processing/constants.py +15 -0
  58. trilogy/core/processing/discovery_node_factory.py +451 -0
  59. trilogy/core/processing/discovery_utility.py +517 -0
  60. trilogy/core/processing/discovery_validation.py +167 -0
  61. trilogy/core/processing/graph_utils.py +43 -0
  62. trilogy/core/processing/node_generators/README.md +9 -0
  63. trilogy/core/processing/node_generators/__init__.py +31 -0
  64. trilogy/core/processing/node_generators/basic_node.py +160 -0
  65. trilogy/core/processing/node_generators/common.py +268 -0
  66. trilogy/core/processing/node_generators/constant_node.py +38 -0
  67. trilogy/core/processing/node_generators/filter_node.py +315 -0
  68. trilogy/core/processing/node_generators/group_node.py +213 -0
  69. trilogy/core/processing/node_generators/group_to_node.py +117 -0
  70. trilogy/core/processing/node_generators/multiselect_node.py +205 -0
  71. trilogy/core/processing/node_generators/node_merge_node.py +653 -0
  72. trilogy/core/processing/node_generators/recursive_node.py +88 -0
  73. trilogy/core/processing/node_generators/rowset_node.py +165 -0
  74. trilogy/core/processing/node_generators/select_helpers/__init__.py +0 -0
  75. trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +261 -0
  76. trilogy/core/processing/node_generators/select_merge_node.py +748 -0
  77. trilogy/core/processing/node_generators/select_node.py +95 -0
  78. trilogy/core/processing/node_generators/synonym_node.py +98 -0
  79. trilogy/core/processing/node_generators/union_node.py +91 -0
  80. trilogy/core/processing/node_generators/unnest_node.py +182 -0
  81. trilogy/core/processing/node_generators/window_node.py +201 -0
  82. trilogy/core/processing/nodes/README.md +28 -0
  83. trilogy/core/processing/nodes/__init__.py +179 -0
  84. trilogy/core/processing/nodes/base_node.py +519 -0
  85. trilogy/core/processing/nodes/filter_node.py +75 -0
  86. trilogy/core/processing/nodes/group_node.py +194 -0
  87. trilogy/core/processing/nodes/merge_node.py +420 -0
  88. trilogy/core/processing/nodes/recursive_node.py +46 -0
  89. trilogy/core/processing/nodes/select_node_v2.py +242 -0
  90. trilogy/core/processing/nodes/union_node.py +53 -0
  91. trilogy/core/processing/nodes/unnest_node.py +62 -0
  92. trilogy/core/processing/nodes/window_node.py +56 -0
  93. trilogy/core/processing/utility.py +823 -0
  94. trilogy/core/query_processor.py +596 -0
  95. trilogy/core/statements/README.md +35 -0
  96. trilogy/core/statements/__init__.py +0 -0
  97. trilogy/core/statements/author.py +536 -0
  98. trilogy/core/statements/build.py +0 -0
  99. trilogy/core/statements/common.py +20 -0
  100. trilogy/core/statements/execute.py +155 -0
  101. trilogy/core/table_processor.py +66 -0
  102. trilogy/core/utility.py +8 -0
  103. trilogy/core/validation/README.md +46 -0
  104. trilogy/core/validation/__init__.py +0 -0
  105. trilogy/core/validation/common.py +161 -0
  106. trilogy/core/validation/concept.py +146 -0
  107. trilogy/core/validation/datasource.py +227 -0
  108. trilogy/core/validation/environment.py +73 -0
  109. trilogy/core/validation/fix.py +106 -0
  110. trilogy/dialect/__init__.py +32 -0
  111. trilogy/dialect/base.py +1359 -0
  112. trilogy/dialect/bigquery.py +256 -0
  113. trilogy/dialect/common.py +147 -0
  114. trilogy/dialect/config.py +144 -0
  115. trilogy/dialect/dataframe.py +50 -0
  116. trilogy/dialect/duckdb.py +177 -0
  117. trilogy/dialect/enums.py +147 -0
  118. trilogy/dialect/metadata.py +173 -0
  119. trilogy/dialect/mock.py +190 -0
  120. trilogy/dialect/postgres.py +91 -0
  121. trilogy/dialect/presto.py +104 -0
  122. trilogy/dialect/results.py +89 -0
  123. trilogy/dialect/snowflake.py +90 -0
  124. trilogy/dialect/sql_server.py +92 -0
  125. trilogy/engine.py +48 -0
  126. trilogy/execution/config.py +75 -0
  127. trilogy/executor.py +568 -0
  128. trilogy/hooks/__init__.py +4 -0
  129. trilogy/hooks/base_hook.py +40 -0
  130. trilogy/hooks/graph_hook.py +139 -0
  131. trilogy/hooks/query_debugger.py +166 -0
  132. trilogy/metadata/__init__.py +0 -0
  133. trilogy/parser.py +10 -0
  134. trilogy/parsing/README.md +21 -0
  135. trilogy/parsing/__init__.py +0 -0
  136. trilogy/parsing/common.py +1069 -0
  137. trilogy/parsing/config.py +5 -0
  138. trilogy/parsing/exceptions.py +8 -0
  139. trilogy/parsing/helpers.py +1 -0
  140. trilogy/parsing/parse_engine.py +2813 -0
  141. trilogy/parsing/render.py +750 -0
  142. trilogy/parsing/trilogy.lark +540 -0
  143. trilogy/py.typed +0 -0
  144. trilogy/render.py +42 -0
  145. trilogy/scripts/README.md +7 -0
  146. trilogy/scripts/__init__.py +0 -0
  147. trilogy/scripts/dependency/Cargo.lock +617 -0
  148. trilogy/scripts/dependency/Cargo.toml +39 -0
  149. trilogy/scripts/dependency/README.md +131 -0
  150. trilogy/scripts/dependency/build.sh +25 -0
  151. trilogy/scripts/dependency/src/directory_resolver.rs +162 -0
  152. trilogy/scripts/dependency/src/lib.rs +16 -0
  153. trilogy/scripts/dependency/src/main.rs +770 -0
  154. trilogy/scripts/dependency/src/parser.rs +435 -0
  155. trilogy/scripts/dependency/src/preql.pest +208 -0
  156. trilogy/scripts/dependency/src/python_bindings.rs +289 -0
  157. trilogy/scripts/dependency/src/resolver.rs +716 -0
  158. trilogy/scripts/dependency/tests/base.preql +3 -0
  159. trilogy/scripts/dependency/tests/cli_integration.rs +377 -0
  160. trilogy/scripts/dependency/tests/customer.preql +6 -0
  161. trilogy/scripts/dependency/tests/main.preql +9 -0
  162. trilogy/scripts/dependency/tests/orders.preql +7 -0
  163. trilogy/scripts/dependency/tests/test_data/base.preql +9 -0
  164. trilogy/scripts/dependency/tests/test_data/consumer.preql +1 -0
  165. trilogy/scripts/dependency.py +323 -0
  166. trilogy/scripts/display.py +460 -0
  167. trilogy/scripts/environment.py +46 -0
  168. trilogy/scripts/parallel_execution.py +483 -0
  169. trilogy/scripts/single_execution.py +131 -0
  170. trilogy/scripts/trilogy.py +772 -0
  171. trilogy/std/__init__.py +0 -0
  172. trilogy/std/color.preql +3 -0
  173. trilogy/std/date.preql +13 -0
  174. trilogy/std/display.preql +18 -0
  175. trilogy/std/geography.preql +22 -0
  176. trilogy/std/metric.preql +15 -0
  177. trilogy/std/money.preql +67 -0
  178. trilogy/std/net.preql +14 -0
  179. trilogy/std/ranking.preql +7 -0
  180. trilogy/std/report.preql +5 -0
  181. trilogy/std/semantic.preql +6 -0
  182. trilogy/utility.py +34 -0
@@ -0,0 +1,2672 @@
1
+ from __future__ import annotations
2
+
3
+ import hashlib
4
+ from abc import ABC
5
+ from datetime import date, datetime
6
+ from functools import cached_property
7
+ from typing import (
8
+ TYPE_CHECKING,
9
+ Any,
10
+ Iterable,
11
+ List,
12
+ Optional,
13
+ Self,
14
+ Sequence,
15
+ Set,
16
+ Tuple,
17
+ Type,
18
+ Union,
19
+ )
20
+
21
+ from pydantic import (
22
+ BaseModel,
23
+ ConfigDict,
24
+ Field,
25
+ ValidationInfo,
26
+ computed_field,
27
+ field_validator,
28
+ model_validator,
29
+ )
30
+
31
+ from trilogy.constants import DEFAULT_NAMESPACE, MagicConstants
32
+ from trilogy.core.constants import ALL_ROWS_CONCEPT
33
+ from trilogy.core.enums import (
34
+ BooleanOperator,
35
+ ComparisonOperator,
36
+ ConceptSource,
37
+ DatePart,
38
+ Derivation,
39
+ FunctionClass,
40
+ FunctionType,
41
+ Granularity,
42
+ InfiniteFunctionArgs,
43
+ Modifier,
44
+ Ordering,
45
+ Purpose,
46
+ WindowOrder,
47
+ WindowType,
48
+ )
49
+ from trilogy.core.models.core import (
50
+ Addressable,
51
+ ArrayType,
52
+ DataType,
53
+ DataTyped,
54
+ ListWrapper,
55
+ MapType,
56
+ MapWrapper,
57
+ NumericType,
58
+ StructType,
59
+ TraitDataType,
60
+ TupleWrapper,
61
+ arg_to_datatype,
62
+ is_compatible_datatype,
63
+ )
64
+ from trilogy.utility import unique
65
+
66
+ # TODO: refactor to avoid these
67
+ if TYPE_CHECKING:
68
+ from trilogy.core.models.environment import Environment
69
+
70
+
71
+ class Namespaced(ABC):
72
+ def with_namespace(self, namespace: str):
73
+ raise NotImplementedError
74
+
75
+
76
+ class Mergeable(ABC):
77
+ def with_merge(self, source: Concept, target: Concept, modifiers: List[Modifier]):
78
+ raise NotImplementedError
79
+
80
+ def with_reference_replacement(self, source: str, target: Expr | ArgBinding):
81
+ raise NotImplementedError(type(self))
82
+
83
+
84
+ class ConceptArgs(ABC):
85
+ @property
86
+ def concept_arguments(self) -> Sequence["ConceptRef"]:
87
+ raise NotImplementedError
88
+
89
+ @property
90
+ def existence_arguments(self) -> Sequence[tuple["ConceptRef", ...]]:
91
+ return []
92
+
93
+ @property
94
+ def row_arguments(self) -> Sequence["ConceptRef"]:
95
+ return self.concept_arguments
96
+
97
+
98
+ class HasUUID(ABC):
99
+ @property
100
+ def uuid(self) -> str:
101
+ return hashlib.md5(str(self).encode()).hexdigest()
102
+
103
+
104
+ class ConceptRef(Addressable, Namespaced, DataTyped, Mergeable, BaseModel):
105
+ address: str
106
+ datatype: (
107
+ DataType | TraitDataType | ArrayType | StructType | MapType | NumericType
108
+ ) = DataType.UNKNOWN
109
+ metadata: Optional["Metadata"] = None
110
+
111
+ @property
112
+ def reference(self):
113
+ return self
114
+
115
+ @property
116
+ def line_no(self) -> int | None:
117
+ if self.metadata:
118
+ return self.metadata.line_number
119
+ return None
120
+
121
+ def __repr__(self):
122
+ return f"ref:{self.address}"
123
+
124
+ def __str__(self):
125
+ return self.__repr__()
126
+
127
+ def __eq__(self, other):
128
+ if isinstance(other, Concept):
129
+ return self.address == other.address
130
+ elif isinstance(other, str):
131
+ return self.address == other
132
+ elif isinstance(other, ConceptRef):
133
+ return self.address == other.address
134
+ return False
135
+
136
+ @property
137
+ def namespace(self):
138
+ return self.address.rsplit(".", 1)[0]
139
+
140
+ @property
141
+ def name(self):
142
+ return self.address.rsplit(".", 1)[1]
143
+
144
+ @property
145
+ def output_datatype(self):
146
+ return self.datatype
147
+
148
+ def with_merge(
149
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
150
+ ) -> ConceptRef:
151
+ if self.address == source.address:
152
+ return ConceptRef.model_construct(
153
+ address=target.address, datatype=target.datatype, metadata=self.metadata
154
+ )
155
+ return self
156
+
157
+ def with_namespace(self, namespace: str):
158
+ return ConceptRef.model_construct(
159
+ address=address_with_namespace(self.address, namespace),
160
+ datatype=self.datatype,
161
+ metadata=self.metadata,
162
+ )
163
+
164
+ def with_reference_replacement(self, source: str, target: Expr | ArgBinding):
165
+ # a reference might be to an attribute of a struct that is bound late
166
+ # if the replacement is a parent in the access path; replace reference
167
+ # with an attribute access call
168
+ candidates = [f"{DEFAULT_NAMESPACE}.{self.address}", self.address]
169
+ for candidate in candidates:
170
+ if candidate == source:
171
+ return target
172
+ if not candidate.startswith(f"{source}."):
173
+ continue
174
+ attribute = self.address.rsplit(".", 1)[1]
175
+ dtype = arg_to_datatype(target)
176
+ if not isinstance(dtype, StructType):
177
+ continue
178
+ output_type = dtype.field_types.get(attribute, DataType.UNKNOWN)
179
+ return Function(
180
+ arguments=[target, self.address.rsplit(".", 1)[1]],
181
+ operator=FunctionType.ATTR_ACCESS,
182
+ arg_count=2,
183
+ output_datatype=output_type,
184
+ output_purpose=Purpose.PROPERTY,
185
+ )
186
+ return self
187
+
188
+
189
+ class UndefinedConcept(ConceptRef):
190
+ pass
191
+
192
+ @property
193
+ def reference(self):
194
+ return self
195
+
196
+ @property
197
+ def purpose(self):
198
+ return Purpose.UNKNOWN
199
+
200
+
201
+ def address_with_namespace(address: str, namespace: str) -> str:
202
+ existing_ns = address.split(".", 1)[0]
203
+ if "." in address:
204
+ existing_name = address.split(".", 1)[1]
205
+ else:
206
+ existing_name = address
207
+ if existing_name == ALL_ROWS_CONCEPT:
208
+ return address
209
+ if existing_ns == DEFAULT_NAMESPACE:
210
+ return f"{namespace}.{existing_name}"
211
+ return f"{namespace}.{address}"
212
+
213
+
214
+ class Parenthetical(
215
+ DataTyped,
216
+ ConceptArgs,
217
+ Mergeable,
218
+ Namespaced,
219
+ BaseModel,
220
+ ):
221
+ content: "Expr"
222
+
223
+ @field_validator("content", mode="before")
224
+ @classmethod
225
+ def content_validator(cls, v, info: ValidationInfo):
226
+ if isinstance(v, Concept):
227
+ return v.reference
228
+ return v
229
+
230
+ def __add__(self, other) -> Union["Parenthetical", "Conditional"]:
231
+ if other is None:
232
+ return self
233
+ elif isinstance(other, (Comparison, Conditional, Parenthetical)):
234
+ return Conditional(left=self, right=other, operator=BooleanOperator.AND)
235
+ raise ValueError(f"Cannot add {self.__class__} and {type(other)}")
236
+
237
+ def __str__(self):
238
+ return self.__repr__()
239
+
240
+ def __repr__(self):
241
+ return f"({str(self.content)})"
242
+
243
+ def with_namespace(self, namespace: str) -> Parenthetical:
244
+ return Parenthetical.model_construct(
245
+ content=(
246
+ self.content.with_namespace(namespace)
247
+ if isinstance(self.content, Namespaced)
248
+ else self.content
249
+ )
250
+ )
251
+
252
+ def with_merge(self, source: Concept, target: Concept, modifiers: List[Modifier]):
253
+ return Parenthetical.model_construct(
254
+ content=(
255
+ self.content.with_merge(source, target, modifiers)
256
+ if isinstance(self.content, Mergeable)
257
+ else self.content
258
+ )
259
+ )
260
+
261
+ def with_reference_replacement(self, source, target):
262
+ return Parenthetical.model_construct(
263
+ content=(
264
+ self.content.with_reference_replacement(source, target)
265
+ if isinstance(self.content, Mergeable)
266
+ else self.content
267
+ )
268
+ )
269
+
270
+ @property
271
+ def concept_arguments(self) -> Sequence[ConceptRef]:
272
+ base: List[ConceptRef] = []
273
+ x = self.content
274
+ if isinstance(x, ConceptRef):
275
+ base += [x]
276
+ elif isinstance(x, ConceptArgs):
277
+ base += x.concept_arguments
278
+ return base
279
+
280
+ @property
281
+ def row_arguments(self) -> Sequence[ConceptRef]:
282
+ if isinstance(self.content, ConceptArgs):
283
+ return self.content.row_arguments
284
+ return self.concept_arguments
285
+
286
+ @property
287
+ def existence_arguments(self) -> Sequence[tuple["ConceptRef", ...]]:
288
+ if isinstance(self.content, ConceptArgs):
289
+ return self.content.existence_arguments
290
+ return []
291
+
292
+ @property
293
+ def output_datatype(self):
294
+ return arg_to_datatype(self.content)
295
+
296
+
297
+ class Conditional(Mergeable, ConceptArgs, Namespaced, DataTyped, BaseModel):
298
+ left: Expr
299
+ right: Expr
300
+ operator: BooleanOperator
301
+
302
+ @field_validator("left", mode="before")
303
+ @classmethod
304
+ def left_validator(cls, v, info: ValidationInfo):
305
+ if isinstance(v, Concept):
306
+ return v.reference
307
+ return v
308
+
309
+ @field_validator("right", mode="before")
310
+ @classmethod
311
+ def right_validator(cls, v, info: ValidationInfo):
312
+ if isinstance(v, Concept):
313
+ return v.reference
314
+ return v
315
+
316
+ def __add__(self, other) -> "Conditional":
317
+ if other is None:
318
+ return self
319
+ elif str(other) == str(self):
320
+ return self
321
+ elif isinstance(other, (Comparison, Conditional, Parenthetical)):
322
+ return Conditional.model_construct(
323
+ left=self, right=other, operator=BooleanOperator.AND
324
+ )
325
+ raise ValueError(f"Cannot add {self.__class__} and {type(other)}")
326
+
327
+ def __str__(self):
328
+ return self.__repr__()
329
+
330
+ def __repr__(self):
331
+ return f"{str(self.left)} {self.operator.value} {str(self.right)}"
332
+
333
+ def __eq__(self, other):
334
+ if not isinstance(other, Conditional):
335
+ return False
336
+ return (
337
+ self.left == other.left
338
+ and self.right == other.right
339
+ and self.operator == other.operator
340
+ )
341
+
342
+ def with_namespace(self, namespace: str) -> "Conditional":
343
+ return Conditional.model_construct(
344
+ left=(
345
+ self.left.with_namespace(namespace)
346
+ if isinstance(self.left, Namespaced)
347
+ else self.left
348
+ ),
349
+ right=(
350
+ self.right.with_namespace(namespace)
351
+ if isinstance(self.right, Namespaced)
352
+ else self.right
353
+ ),
354
+ operator=self.operator,
355
+ )
356
+
357
+ def with_merge(
358
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
359
+ ) -> "Conditional":
360
+ return Conditional.model_construct(
361
+ left=(
362
+ self.left.with_merge(source, target, modifiers)
363
+ if isinstance(self.left, Mergeable)
364
+ else self.left
365
+ ),
366
+ right=(
367
+ self.right.with_merge(source, target, modifiers)
368
+ if isinstance(self.right, Mergeable)
369
+ else self.right
370
+ ),
371
+ operator=self.operator,
372
+ )
373
+
374
+ def with_reference_replacement(self, source, target):
375
+ return self.__class__.model_construct(
376
+ left=(
377
+ self.left.with_reference_replacement(source, target)
378
+ if isinstance(self.left, Mergeable)
379
+ else self.left
380
+ ),
381
+ right=(
382
+ self.right.with_reference_replacement(source, target)
383
+ if isinstance(self.right, Mergeable)
384
+ else self.right
385
+ ),
386
+ operator=self.operator,
387
+ )
388
+
389
+ @property
390
+ def concept_arguments(self) -> Sequence[ConceptRef]:
391
+ output = []
392
+ output += get_concept_arguments(self.left)
393
+ output += get_concept_arguments(self.right)
394
+ return output
395
+
396
+ @property
397
+ def row_arguments(self) -> Sequence[ConceptRef]:
398
+ output = []
399
+ output += get_concept_row_arguments(self.left)
400
+ output += get_concept_row_arguments(self.right)
401
+ return output
402
+
403
+ @property
404
+ def existence_arguments(self) -> Sequence[tuple[ConceptRef, ...]]:
405
+ output: list[tuple[ConceptRef, ...]] = []
406
+ if isinstance(self.left, ConceptArgs):
407
+ output += self.left.existence_arguments
408
+ if isinstance(self.right, ConceptArgs):
409
+ output += self.right.existence_arguments
410
+ return output
411
+
412
+ @property
413
+ def output_datatype(self):
414
+ # a conditional is always a boolean
415
+ return DataType.BOOL
416
+
417
+ def decompose(self):
418
+ chunks = []
419
+ if self.operator == BooleanOperator.AND:
420
+ for val in [self.left, self.right]:
421
+ if isinstance(val, Conditional):
422
+ chunks.extend(val.decompose())
423
+ else:
424
+ chunks.append(val)
425
+ else:
426
+ chunks.append(self)
427
+ return chunks
428
+
429
+
430
+ class WhereClause(Mergeable, ConceptArgs, Namespaced, BaseModel):
431
+ conditional: Union[SubselectComparison, Comparison, Conditional, Parenthetical]
432
+
433
+ def __repr__(self):
434
+ return str(self.conditional)
435
+
436
+ def __str__(self):
437
+ return self.__repr__()
438
+
439
+ @property
440
+ def concept_arguments(self) -> Sequence[ConceptRef]:
441
+ return self.conditional.concept_arguments
442
+
443
+ @property
444
+ def row_arguments(self) -> Sequence[ConceptRef]:
445
+ return self.conditional.row_arguments
446
+
447
+ @property
448
+ def existence_arguments(self) -> Sequence[tuple["ConceptRef", ...]]:
449
+ return self.conditional.existence_arguments
450
+
451
+ def with_merge(
452
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
453
+ ) -> Self:
454
+ return self.__class__.model_construct(
455
+ conditional=self.conditional.with_merge(source, target, modifiers)
456
+ )
457
+
458
+ def with_namespace(self, namespace: str) -> Self:
459
+ return self.__class__.model_construct(
460
+ conditional=self.conditional.with_namespace(namespace)
461
+ )
462
+
463
+ def with_reference_replacement(self, source, target):
464
+ return self.__class__.model_construct(
465
+ conditional=self.conditional.with_reference_replacement(source, target)
466
+ )
467
+
468
+
469
+ class HavingClause(WhereClause):
470
+ pass
471
+
472
+
473
+ class Grain(Namespaced, BaseModel):
474
+ components: set[str] = Field(default_factory=set)
475
+ where_clause: Optional["WhereClause"] = None
476
+ _str: str | None = None
477
+ _abstract: bool = False
478
+
479
+ def without_condition(self):
480
+ return Grain(components=self.components)
481
+
482
+ def with_merge(self, source: Concept, target: Concept, modifiers: List[Modifier]):
483
+ new_components = set()
484
+ for c in self.components:
485
+ if c == source.address:
486
+ new_components.add(target.address)
487
+ else:
488
+ new_components.add(c)
489
+ return Grain.model_construct(components=new_components)
490
+
491
+ @classmethod
492
+ def from_concepts(
493
+ cls,
494
+ concepts: Iterable[Concept | ConceptRef | str],
495
+ environment: Environment | None = None,
496
+ where_clause: WhereClause | None = None,
497
+ local_concepts: dict[str, Concept] | None = None,
498
+ ) -> Grain:
499
+ from trilogy.parsing.common import concepts_to_grain_concepts
500
+
501
+ x = Grain.model_construct(
502
+ components=concepts_to_grain_concepts(
503
+ concepts, environment=environment, local_concepts=local_concepts
504
+ ),
505
+ where_clause=where_clause,
506
+ )
507
+
508
+ return x
509
+
510
+ def with_namespace(self, namespace: str) -> "Grain":
511
+ return Grain.model_construct(
512
+ components={address_with_namespace(c, namespace) for c in self.components},
513
+ where_clause=(
514
+ self.where_clause.with_namespace(namespace)
515
+ if self.where_clause
516
+ else None
517
+ ),
518
+ )
519
+
520
+ @field_validator("components", mode="before")
521
+ def component_validator(cls, v, info: ValidationInfo):
522
+ output = set()
523
+ if isinstance(v, list):
524
+ for vc in v:
525
+ if isinstance(vc, Addressable):
526
+ output.add(vc._address)
527
+ else:
528
+ output.add(vc)
529
+ else:
530
+ output = v
531
+ if not isinstance(output, set):
532
+ raise ValueError(f"Invalid grain component {output}, is not set")
533
+ if not all(isinstance(x, str) for x in output):
534
+ raise ValueError(f"Invalid component {output}")
535
+ return output
536
+
537
+ def __add__(self, other: "Grain") -> "Grain":
538
+ if not other:
539
+ return self
540
+ where = self.where_clause
541
+ if other.where_clause:
542
+ if not self.where_clause:
543
+ where = other.where_clause
544
+ elif not other.where_clause == self.where_clause:
545
+ where = WhereClause.model_construct(
546
+ conditional=Conditional(
547
+ left=self.where_clause.conditional,
548
+ right=other.where_clause.conditional,
549
+ operator=BooleanOperator.AND,
550
+ )
551
+ )
552
+ # raise NotImplementedError(
553
+ # f"Cannot merge grains with where clauses, self {self.where_clause} other {other.where_clause}"
554
+ # )
555
+ return Grain(
556
+ components=self.components.union(other.components), where_clause=where
557
+ )
558
+
559
+ def __sub__(self, other: "Grain") -> "Grain":
560
+ return Grain.model_construct(
561
+ components=self.components.difference(other.components),
562
+ where_clause=self.where_clause,
563
+ )
564
+
565
+ def _gen_abstract(self) -> bool:
566
+ return not self.components or all(
567
+ [c.endswith(ALL_ROWS_CONCEPT) for c in self.components]
568
+ )
569
+
570
+ @property
571
+ def abstract(self):
572
+ if not self._abstract:
573
+ self._abstract = self._gen_abstract()
574
+ return self._abstract
575
+
576
+ def __eq__(self, other: object):
577
+ if isinstance(other, list):
578
+ if all([isinstance(c, Concept) for c in other]):
579
+ return self.components == set([c.address for c in other])
580
+ return False
581
+ if not isinstance(other, Grain):
582
+ return False
583
+ if self.components == other.components:
584
+ return True
585
+ return False
586
+
587
+ def issubset(self, other: "Grain"):
588
+ return self.components.issubset(other.components)
589
+
590
+ def union(self, other: "Grain"):
591
+ addresses = self.components.union(other.components)
592
+ return Grain(components=addresses, where_clause=self.where_clause)
593
+
594
+ def isdisjoint(self, other: "Grain"):
595
+ return self.components.isdisjoint(other.components)
596
+
597
+ def intersection(self, other: "Grain") -> "Grain":
598
+ intersection = self.components.intersection(other.components)
599
+ return Grain(components=intersection)
600
+
601
+ def _gen_str(self) -> str:
602
+ if self.abstract:
603
+ base = "Grain<Abstract>"
604
+ else:
605
+ base = "Grain<" + ",".join(sorted(self.components)) + ">"
606
+ if self.where_clause:
607
+ base += f"|{str(self.where_clause)}"
608
+ return base
609
+
610
+ def __str__(self):
611
+ if not self._str:
612
+ self._str = self._gen_str()
613
+ return self._str
614
+
615
+ def __radd__(self, other) -> "Grain":
616
+ if other == 0:
617
+ return self
618
+ else:
619
+ return self.__add__(other)
620
+
621
+
622
+ class Comparison(ConceptArgs, Mergeable, DataTyped, Namespaced, BaseModel):
623
+ left: Union[
624
+ int,
625
+ str,
626
+ float,
627
+ list,
628
+ bool,
629
+ datetime,
630
+ date,
631
+ Function,
632
+ ConceptRef,
633
+ Conditional,
634
+ DataType,
635
+ Comparison,
636
+ FunctionCallWrapper,
637
+ Parenthetical,
638
+ MagicConstants,
639
+ WindowItem,
640
+ AggregateWrapper,
641
+ FilterItem,
642
+ ]
643
+ right: Union[
644
+ int,
645
+ str,
646
+ float,
647
+ list,
648
+ bool,
649
+ date,
650
+ datetime,
651
+ ConceptRef,
652
+ Function,
653
+ Conditional,
654
+ DataType,
655
+ Comparison,
656
+ FunctionCallWrapper,
657
+ Parenthetical,
658
+ MagicConstants,
659
+ WindowItem,
660
+ AggregateWrapper,
661
+ TupleWrapper,
662
+ FilterItem,
663
+ ]
664
+ operator: ComparisonOperator
665
+
666
+ @field_validator("left", mode="before")
667
+ @classmethod
668
+ def left_validator(cls, v, info: ValidationInfo):
669
+ if isinstance(v, Concept):
670
+ return v.reference
671
+ return v
672
+
673
+ @field_validator("right", mode="before")
674
+ @classmethod
675
+ def right_validator(cls, v, info: ValidationInfo):
676
+ if isinstance(v, Concept):
677
+ return v.reference
678
+ return v
679
+
680
+ @model_validator(mode="after")
681
+ def validate_comparison(self):
682
+ left_type = arg_to_datatype(self.left)
683
+ right_type = arg_to_datatype(self.right)
684
+ left_name = (
685
+ left_type.name if isinstance(left_type, DataType) else str(left_type)
686
+ )
687
+ right_name = (
688
+ right_type.name if isinstance(right_type, DataType) else str(right_type)
689
+ )
690
+ if self.operator in (ComparisonOperator.IS, ComparisonOperator.IS_NOT):
691
+ if self.right != MagicConstants.NULL and DataType.BOOL != right_type:
692
+ raise SyntaxError(
693
+ f"Cannot use {self.operator.value} with non-null or boolean value {self.right}"
694
+ )
695
+ elif self.operator in (ComparisonOperator.IN, ComparisonOperator.NOT_IN):
696
+
697
+ if isinstance(right_type, ArrayType) and not is_compatible_datatype(
698
+ left_type, right_type.value_data_type
699
+ ):
700
+ raise SyntaxError(
701
+ f"Cannot compare {left_type} and {right_type} with operator {self.operator} in {str(self)}"
702
+ )
703
+ elif isinstance(self.right, Concept) and not is_compatible_datatype(
704
+ left_type, right_type
705
+ ):
706
+ raise SyntaxError(
707
+ f"Cannot compare {left_name} and {right_name} with operator {self.operator} in {str(self)}"
708
+ )
709
+ else:
710
+ if not is_compatible_datatype(left_type, right_type):
711
+ raise SyntaxError(
712
+ f"Cannot compare {left_name} ({self.left}) and {right_name} ({self.right}) of different types with operator {self.operator.value} in {str(self)}"
713
+ )
714
+
715
+ return self
716
+
717
+ def __add__(self, other):
718
+ if other is None:
719
+ return self
720
+ if not isinstance(other, (Comparison, Conditional, Parenthetical)):
721
+ raise ValueError("Cannot add Comparison to non-Comparison")
722
+ if other == self:
723
+ return self
724
+ return Conditional(left=self, right=other, operator=BooleanOperator.AND)
725
+
726
+ def __repr__(self):
727
+ if isinstance(self.left, Concept):
728
+ left = self.left.address
729
+ else:
730
+ left = str(self.left)
731
+ if isinstance(self.right, Concept):
732
+ right = self.right.address
733
+ else:
734
+ right = str(self.right)
735
+ return f"{left} {self.operator.value} {right}"
736
+
737
+ def __str__(self):
738
+ return self.__repr__()
739
+
740
+ def __eq__(self, other):
741
+ if not isinstance(other, Comparison):
742
+ return False
743
+ return (
744
+ self.left == other.left
745
+ and self.right == other.right
746
+ and self.operator == other.operator
747
+ )
748
+
749
+ def with_merge(self, source: Concept, target: Concept, modifiers: List[Modifier]):
750
+ return self.__class__.model_construct(
751
+ left=(
752
+ self.left.with_merge(source, target, modifiers)
753
+ if isinstance(self.left, Mergeable)
754
+ else self.left
755
+ ),
756
+ right=(
757
+ self.right.with_merge(source, target, modifiers)
758
+ if isinstance(self.right, Mergeable)
759
+ else self.right
760
+ ),
761
+ operator=self.operator,
762
+ )
763
+
764
+ def with_reference_replacement(self, source, target):
765
+ return self.__class__.model_construct(
766
+ left=(
767
+ self.left.with_reference_replacement(source, target)
768
+ if isinstance(self.left, Mergeable)
769
+ else self.left
770
+ ),
771
+ right=(
772
+ self.right.with_reference_replacement(source, target)
773
+ if isinstance(self.right, Mergeable)
774
+ else self.right
775
+ ),
776
+ operator=self.operator,
777
+ )
778
+
779
+ def with_namespace(self, namespace: str):
780
+ return self.__class__.model_construct(
781
+ left=(
782
+ self.left.with_namespace(namespace)
783
+ if isinstance(self.left, Namespaced)
784
+ else self.left
785
+ ),
786
+ right=(
787
+ self.right.with_namespace(namespace)
788
+ if isinstance(self.right, Namespaced)
789
+ else self.right
790
+ ),
791
+ operator=self.operator,
792
+ )
793
+
794
+ @property
795
+ def concept_arguments(self) -> List[ConceptRef]:
796
+ """Return concepts directly referenced in where clause"""
797
+ output = []
798
+ output += get_concept_arguments(self.left)
799
+ output += get_concept_arguments(self.right)
800
+ return output
801
+
802
+ @property
803
+ def row_arguments(self) -> List[ConceptRef]:
804
+ output = []
805
+ output += get_concept_row_arguments(self.left)
806
+ output += get_concept_row_arguments(self.right)
807
+ return output
808
+
809
+ @property
810
+ def existence_arguments(self) -> List[Tuple[ConceptRef, ...]]:
811
+ """Return concepts directly referenced in where clause"""
812
+ output: List[Tuple[ConceptRef, ...]] = []
813
+ if isinstance(self.left, ConceptArgs):
814
+ output += self.left.existence_arguments
815
+ if isinstance(self.right, ConceptArgs):
816
+ output += self.right.existence_arguments
817
+ return output
818
+
819
+ @property
820
+ def output_datatype(self):
821
+ # a conditional is always a boolean
822
+ return DataType.BOOL
823
+
824
+
825
+ class SubselectComparison(Comparison):
826
+ def __eq__(self, other):
827
+ if not isinstance(other, SubselectComparison):
828
+ return False
829
+
830
+ comp = (
831
+ self.left == other.left
832
+ and self.right == other.right
833
+ and self.operator == other.operator
834
+ )
835
+ return comp
836
+
837
+ @property
838
+ def row_arguments(self) -> List[ConceptRef]:
839
+ return get_concept_row_arguments(self.left)
840
+
841
+ @property
842
+ def existence_arguments(self) -> list[tuple["ConceptRef", ...]]:
843
+ return [tuple(get_concept_arguments(self.right))]
844
+
845
+
846
+ class Concept(Addressable, DataTyped, ConceptArgs, Mergeable, Namespaced, BaseModel):
847
+ model_config = ConfigDict(
848
+ extra="forbid",
849
+ )
850
+ name: str
851
+ datatype: DataType | TraitDataType | ArrayType | StructType | MapType | NumericType
852
+ purpose: Purpose
853
+ derivation: Derivation = Derivation.ROOT
854
+ granularity: Granularity = Granularity.MULTI_ROW
855
+ metadata: Metadata = Field(
856
+ default_factory=lambda: Metadata(description=None, line_number=None),
857
+ validate_default=True,
858
+ )
859
+ lineage: Optional[
860
+ Union[
861
+ Function,
862
+ WindowItem,
863
+ FilterItem,
864
+ AggregateWrapper,
865
+ RowsetItem,
866
+ MultiSelectLineage,
867
+ Comparison,
868
+ ]
869
+ ] = None
870
+ namespace: str = Field(default=DEFAULT_NAMESPACE, validate_default=True)
871
+ keys: Optional[set[str]] = None
872
+ grain: "Grain" = Field(default=None, validate_default=True) # type: ignore
873
+ modifiers: List[Modifier] = Field(default_factory=list) # type: ignore
874
+ pseudonyms: set[str] = Field(default_factory=set)
875
+
876
+ def duplicate(self) -> Concept:
877
+ return self.model_copy(deep=True)
878
+
879
+ def __hash__(self):
880
+ return hash(
881
+ f"{self.name}+{self.datatype}+ {self.purpose} + {str(self.lineage)} + {self.namespace} + {str(self.grain)} + {str(self.keys)}"
882
+ )
883
+
884
+ def __repr__(self):
885
+ base = f"{self.address}@{self.grain}"
886
+ return base
887
+
888
+ @property
889
+ def is_internal(self) -> bool:
890
+ return self.namespace.startswith("_") or self.name.startswith("_")
891
+
892
+ @property
893
+ def reference(self) -> ConceptRef:
894
+ return ConceptRef.model_construct(
895
+ address=self.address,
896
+ datatype=self.output_datatype,
897
+ metadata=self.metadata,
898
+ )
899
+
900
+ @property
901
+ def output_datatype(self):
902
+ return self.datatype
903
+
904
+ @classmethod
905
+ def calculate_is_aggregate(cls, lineage):
906
+ if lineage and isinstance(lineage, Function):
907
+ if lineage.operator in FunctionClass.AGGREGATE_FUNCTIONS.value:
908
+ return True
909
+ if (
910
+ lineage
911
+ and isinstance(lineage, AggregateWrapper)
912
+ and lineage.function.operator in FunctionClass.AGGREGATE_FUNCTIONS.value
913
+ ):
914
+ return True
915
+ return False
916
+
917
+ @property
918
+ def is_aggregate(self):
919
+ base = getattr(self, "_is_aggregate", None)
920
+ if base:
921
+ return base
922
+ setattr(self, "_is_aggregate", self.calculate_is_aggregate(self.lineage))
923
+ return self._is_aggregate
924
+
925
+ def with_merge(self, source: Self, target: Self, modifiers: List[Modifier]) -> Self:
926
+ if self.address == source.address:
927
+ new = target.with_grain(self.grain.with_merge(source, target, modifiers))
928
+ new.pseudonyms.add(self.address)
929
+ return new
930
+ if not self.grain.components and not self.lineage and not self.keys:
931
+ return self
932
+ return self.__class__.model_construct(
933
+ name=self.name,
934
+ datatype=self.datatype,
935
+ purpose=self.purpose,
936
+ metadata=self.metadata,
937
+ derivation=self.derivation,
938
+ granularity=self.granularity,
939
+ lineage=(
940
+ self.lineage.with_merge(source, target, modifiers)
941
+ if self.lineage
942
+ else None
943
+ ),
944
+ grain=self.grain.with_merge(source, target, modifiers),
945
+ namespace=self.namespace,
946
+ keys=(
947
+ set(x if x != source.address else target.address for x in self.keys)
948
+ if self.keys
949
+ else None
950
+ ),
951
+ modifiers=self.modifiers,
952
+ pseudonyms=self.pseudonyms,
953
+ )
954
+
955
+ @field_validator("namespace", mode="plain")
956
+ @classmethod
957
+ def namespace_validation(cls, v):
958
+ return v or DEFAULT_NAMESPACE
959
+
960
+ @field_validator("metadata", mode="before")
961
+ @classmethod
962
+ def metadata_validation(cls, v):
963
+ v = v or Metadata()
964
+ return v
965
+
966
+ @field_validator("purpose", mode="after")
967
+ @classmethod
968
+ def purpose_validation(cls, v):
969
+ if v == Purpose.AUTO:
970
+ raise ValueError("Cannot set purpose to AUTO")
971
+ return v
972
+
973
+ @field_validator("grain", mode="before")
974
+ @classmethod
975
+ def parse_grain(cls, v, info: ValidationInfo) -> Grain:
976
+
977
+ # this is silly - rethink how we do grains
978
+ values = info.data
979
+
980
+ if not v and values.get("purpose", None) == Purpose.KEY:
981
+ v = Grain(
982
+ components={
983
+ f'{values.get("namespace", DEFAULT_NAMESPACE)}.{values["name"]}'
984
+ }
985
+ )
986
+ elif not v and values.get("purpose", None) == Purpose.PROPERTY:
987
+ v = Grain(components=values.get("keys", set()) or set())
988
+ elif (
989
+ "lineage" in values
990
+ and isinstance(values["lineage"], AggregateWrapper)
991
+ and values["lineage"].by
992
+ ):
993
+ v = Grain(components={c.address for c in values["lineage"].by})
994
+ elif not v:
995
+ v = Grain(components=set())
996
+ elif isinstance(v, Grain):
997
+ pass
998
+ elif isinstance(v, Concept):
999
+ v = Grain(components={v.address})
1000
+ elif isinstance(v, dict):
1001
+ v = Grain.model_validate(v)
1002
+ else:
1003
+ raise SyntaxError(f"Invalid grain {v} for concept {values['name']}")
1004
+ return v
1005
+
1006
+ def __eq__(self, other: object):
1007
+ if isinstance(other, str):
1008
+ if self.address == other:
1009
+ return True
1010
+ if isinstance(other, ConceptRef):
1011
+ return self.address == other.address
1012
+ if not isinstance(other, Concept):
1013
+ return False
1014
+ return (
1015
+ self.name == other.name
1016
+ and self.datatype == other.datatype
1017
+ and self.purpose == other.purpose
1018
+ and self.namespace == other.namespace
1019
+ and self.grain == other.grain
1020
+ and self.derivation == other.derivation
1021
+ and self.granularity == other.granularity
1022
+ # and self.keys == other.keys
1023
+ )
1024
+
1025
+ def __str__(self):
1026
+ grain = str(self.grain) if self.grain else "Grain<>"
1027
+ return f"{self.namespace}.{self.name}@{grain}"
1028
+
1029
+ @property
1030
+ def address(self) -> str:
1031
+ return f"{self.namespace}.{self.name}"
1032
+
1033
+ @property
1034
+ def output(self) -> "Concept":
1035
+ return self
1036
+
1037
+ @property
1038
+ def safe_address(self) -> str:
1039
+ if self.namespace == DEFAULT_NAMESPACE:
1040
+ return self.name.replace(".", "_")
1041
+ elif self.namespace:
1042
+ return f"{self.namespace.replace('.','_')}_{self.name.replace('.','_')}"
1043
+ return self.name.replace(".", "_")
1044
+
1045
+ def with_namespace(self, namespace: str) -> Self:
1046
+ return self.__class__.model_construct(
1047
+ name=self.name,
1048
+ datatype=self.datatype,
1049
+ purpose=self.purpose,
1050
+ granularity=self.granularity,
1051
+ derivation=self.derivation,
1052
+ metadata=self.metadata,
1053
+ lineage=self.lineage.with_namespace(namespace) if self.lineage else None,
1054
+ grain=(
1055
+ self.grain.with_namespace(namespace)
1056
+ if self.grain
1057
+ else Grain(components=set())
1058
+ ),
1059
+ namespace=(
1060
+ namespace + "." + self.namespace
1061
+ if self.namespace != DEFAULT_NAMESPACE
1062
+ else namespace
1063
+ ),
1064
+ keys=(
1065
+ set([address_with_namespace(x, namespace) for x in self.keys])
1066
+ if self.keys
1067
+ else None
1068
+ ),
1069
+ modifiers=self.modifiers,
1070
+ pseudonyms={address_with_namespace(v, namespace) for v in self.pseudonyms},
1071
+ )
1072
+
1073
+ def get_select_grain_and_keys(
1074
+ self, grain: Grain, environment: Environment
1075
+ ) -> Tuple[
1076
+ Function
1077
+ | WindowItem
1078
+ | FilterItem
1079
+ | AggregateWrapper
1080
+ | RowsetItem
1081
+ | MultiSelectLineage
1082
+ | Comparison
1083
+ | None,
1084
+ Grain,
1085
+ set[str] | None,
1086
+ ]:
1087
+ new_lineage = self.lineage
1088
+ final_grain = grain if not self.grain.components else self.grain
1089
+ keys = self.keys
1090
+
1091
+ if self.is_aggregate and grain.components and isinstance(new_lineage, Function):
1092
+ grain_components: list[ConceptRef | Concept] = [
1093
+ environment.concepts[c].reference for c in grain.components
1094
+ ]
1095
+ new_lineage = AggregateWrapper.model_construct(
1096
+ function=new_lineage, by=grain_components
1097
+ )
1098
+ final_grain = grain
1099
+ keys = set(grain.components)
1100
+ elif (
1101
+ grain
1102
+ and new_lineage
1103
+ and isinstance(new_lineage, AggregateWrapper)
1104
+ and not new_lineage.by
1105
+ ):
1106
+ grain_components = [
1107
+ environment.concepts[c].reference for c in grain.components
1108
+ ]
1109
+ new_lineage = AggregateWrapper.model_construct(
1110
+ function=new_lineage.function, by=grain_components
1111
+ )
1112
+ final_grain = grain
1113
+ keys = set([x.address for x in new_lineage.by])
1114
+ elif self.derivation == Derivation.BASIC:
1115
+
1116
+ pkeys: set[str] = set()
1117
+ assert new_lineage
1118
+ for x_ref in new_lineage.concept_arguments:
1119
+ x = environment.concepts[x_ref.address]
1120
+ if isinstance(x, UndefinedConcept):
1121
+ continue
1122
+ _, _, parent_keys = x.get_select_grain_and_keys(grain, environment)
1123
+ if parent_keys:
1124
+ pkeys.update(parent_keys)
1125
+ raw_keys = pkeys
1126
+ # deduplicate
1127
+
1128
+ final_grain = Grain.from_concepts(raw_keys, environment)
1129
+ keys = final_grain.components
1130
+ return new_lineage, final_grain, keys
1131
+
1132
+ def set_select_grain(self, grain: Grain, environment: Environment) -> Self:
1133
+ """Assign a mutable concept the appropriate grain/keys for a select"""
1134
+ new_lineage, final_grain, keys = self.get_select_grain_and_keys(
1135
+ grain, environment
1136
+ )
1137
+ return self.__class__.model_construct(
1138
+ name=self.name,
1139
+ datatype=self.datatype,
1140
+ purpose=self.purpose,
1141
+ granularity=self.granularity,
1142
+ derivation=self.derivation,
1143
+ metadata=self.metadata,
1144
+ lineage=new_lineage,
1145
+ grain=final_grain,
1146
+ namespace=self.namespace,
1147
+ keys=keys,
1148
+ modifiers=self.modifiers,
1149
+ pseudonyms=self.pseudonyms,
1150
+ )
1151
+
1152
+ def with_grain(self, grain: Optional["Grain"] = None) -> Self:
1153
+
1154
+ return self.__class__.model_construct(
1155
+ name=self.name,
1156
+ datatype=self.datatype,
1157
+ purpose=self.purpose,
1158
+ metadata=self.metadata,
1159
+ granularity=self.granularity,
1160
+ derivation=self.derivation,
1161
+ lineage=self.lineage,
1162
+ grain=grain if grain else Grain.model_construct(components=set()),
1163
+ namespace=self.namespace,
1164
+ keys=self.keys,
1165
+ modifiers=self.modifiers,
1166
+ pseudonyms=self.pseudonyms,
1167
+ )
1168
+
1169
+ @cached_property
1170
+ def sources(self) -> List["ConceptRef"]:
1171
+ if self.lineage:
1172
+ output: List[ConceptRef] = []
1173
+
1174
+ def get_sources(
1175
+ expr: Union[
1176
+ Function,
1177
+ WindowItem,
1178
+ FilterItem,
1179
+ AggregateWrapper,
1180
+ RowsetItem,
1181
+ MultiSelectLineage,
1182
+ Comparison,
1183
+ ],
1184
+ output: List[ConceptRef],
1185
+ ):
1186
+
1187
+ for item in expr.concept_arguments:
1188
+ if isinstance(item, (ConceptRef,)):
1189
+ if item.address == self.address:
1190
+ raise SyntaxError(
1191
+ f"Concept {self.address} references itself"
1192
+ )
1193
+ output.append(item)
1194
+
1195
+ # output += item.sources
1196
+
1197
+ get_sources(self.lineage, output)
1198
+ return output
1199
+ return []
1200
+
1201
+ @property
1202
+ def concept_arguments(self) -> List[ConceptRef]:
1203
+ return self.lineage.concept_arguments if self.lineage else []
1204
+
1205
+ @classmethod
1206
+ def calculate_derivation(self, lineage, purpose: Purpose) -> Derivation:
1207
+ from trilogy.core.models.build import (
1208
+ BuildAggregateWrapper,
1209
+ BuildComparison,
1210
+ BuildFilterItem,
1211
+ BuildFunction,
1212
+ BuildMultiSelectLineage,
1213
+ BuildRowsetItem,
1214
+ BuildWindowItem,
1215
+ )
1216
+
1217
+ if lineage and isinstance(lineage, (BuildWindowItem, WindowItem)):
1218
+ return Derivation.WINDOW
1219
+ elif lineage and isinstance(lineage, (BuildFilterItem, FilterItem)):
1220
+ return Derivation.FILTER
1221
+ elif lineage and isinstance(lineage, (BuildAggregateWrapper, AggregateWrapper)):
1222
+ return Derivation.AGGREGATE
1223
+ # elif lineage and isinstance(lineage, (BuildParenthetical, Parenthetical)):
1224
+ # return Derivation.PARENTHETICAL
1225
+ elif lineage and isinstance(lineage, (BuildRowsetItem, RowsetItem)):
1226
+ return Derivation.ROWSET
1227
+ elif lineage and isinstance(lineage, BuildComparison):
1228
+ return Derivation.BASIC
1229
+ elif lineage and isinstance(
1230
+ lineage, (BuildMultiSelectLineage, MultiSelectLineage)
1231
+ ):
1232
+ return Derivation.MULTISELECT
1233
+ elif (
1234
+ lineage
1235
+ and isinstance(lineage, (BuildFunction, Function))
1236
+ and lineage.operator in FunctionClass.AGGREGATE_FUNCTIONS.value
1237
+ ):
1238
+ return Derivation.AGGREGATE
1239
+ elif (
1240
+ lineage
1241
+ and isinstance(lineage, (BuildFunction, Function))
1242
+ and lineage.operator in FunctionClass.ONE_TO_MANY.value
1243
+ ):
1244
+ return Derivation.UNNEST
1245
+ elif (
1246
+ lineage
1247
+ and isinstance(lineage, (BuildFunction, Function))
1248
+ and lineage.operator == FunctionType.RECURSE_EDGE
1249
+ ):
1250
+ return Derivation.RECURSIVE
1251
+ elif (
1252
+ lineage
1253
+ and isinstance(lineage, (BuildFunction, Function))
1254
+ and lineage.operator == FunctionType.UNION
1255
+ ):
1256
+ return Derivation.UNION
1257
+ elif (
1258
+ lineage
1259
+ and isinstance(lineage, (BuildFunction, Function))
1260
+ and lineage.operator == FunctionType.GROUP
1261
+ ):
1262
+ return Derivation.GROUP_TO
1263
+ elif (
1264
+ lineage
1265
+ and isinstance(lineage, (BuildFunction, Function))
1266
+ and lineage.operator == FunctionType.ALIAS
1267
+ ):
1268
+ source = lineage.arguments[0].derivation
1269
+ if source == Derivation.AGGREGATE:
1270
+ return Derivation.AGGREGATE
1271
+ return Derivation.BASIC
1272
+ elif (
1273
+ lineage
1274
+ and isinstance(lineage, (BuildFunction, Function))
1275
+ and lineage.operator in FunctionClass.SINGLE_ROW.value
1276
+ ):
1277
+ return Derivation.CONSTANT
1278
+
1279
+ elif lineage and isinstance(lineage, (BuildFunction, Function)):
1280
+ if not lineage.concept_arguments:
1281
+ return Derivation.CONSTANT
1282
+ elif all(
1283
+ [x.derivation == Derivation.CONSTANT for x in lineage.concept_arguments]
1284
+ ):
1285
+ return Derivation.CONSTANT
1286
+ return Derivation.BASIC
1287
+ elif purpose == Purpose.CONSTANT:
1288
+ return Derivation.CONSTANT
1289
+ return Derivation.ROOT
1290
+
1291
+ @classmethod
1292
+ def calculate_granularity(cls, derivation: Derivation, grain: Grain, lineage):
1293
+ from trilogy.core.models.build import BuildFunction
1294
+
1295
+ if derivation == Derivation.CONSTANT:
1296
+ return Granularity.SINGLE_ROW
1297
+ elif derivation == Derivation.AGGREGATE:
1298
+ if all([x.endswith(ALL_ROWS_CONCEPT) for x in grain.components]):
1299
+ return Granularity.SINGLE_ROW
1300
+ elif (
1301
+ lineage
1302
+ and isinstance(lineage, (Function, BuildFunction))
1303
+ and lineage.operator
1304
+ in (FunctionType.UNNEST, FunctionType.UNION, FunctionType.DATE_SPINE)
1305
+ ):
1306
+ return Granularity.MULTI_ROW
1307
+ elif lineage and all(
1308
+ [x.granularity == Granularity.SINGLE_ROW for x in lineage.concept_arguments]
1309
+ ):
1310
+ return Granularity.SINGLE_ROW
1311
+ return Granularity.MULTI_ROW
1312
+
1313
+ # @property
1314
+ # def granularity(self) -> Granularity:
1315
+ # return self.calculate_granularity(self.derivation, self.grain, self.lineage)
1316
+
1317
+ def with_filter(
1318
+ self,
1319
+ condition: Conditional | Comparison | Parenthetical,
1320
+ environment: Environment | None = None,
1321
+ ) -> "Concept":
1322
+ from trilogy.utility import string_to_hash
1323
+
1324
+ if self.lineage and isinstance(self.lineage, FilterItem):
1325
+ if self.lineage.where.conditional == condition:
1326
+ return self
1327
+ hash = string_to_hash(self.name + str(condition))
1328
+ new_lineage = FilterItem(
1329
+ content=self.reference, where=WhereClause(conditional=condition)
1330
+ )
1331
+ new = Concept.model_construct(
1332
+ name=f"{self.name}_filter_{hash}",
1333
+ datatype=self.datatype,
1334
+ purpose=self.purpose,
1335
+ derivation=self.calculate_derivation(new_lineage, self.purpose),
1336
+ granularity=self.granularity,
1337
+ metadata=self.metadata,
1338
+ lineage=new_lineage,
1339
+ keys=(self.keys if self.purpose == Purpose.PROPERTY else None),
1340
+ grain=self.grain if self.grain else Grain(components=set()),
1341
+ namespace=self.namespace,
1342
+ modifiers=self.modifiers,
1343
+ pseudonyms=self.pseudonyms,
1344
+ )
1345
+ if environment:
1346
+ environment.add_concept(new)
1347
+ return new
1348
+
1349
+
1350
+ class UndefinedConceptFull(Concept, Mergeable, Namespaced):
1351
+ model_config = ConfigDict(arbitrary_types_allowed=True)
1352
+ name: str
1353
+ line_no: int | None = None
1354
+ datatype: (
1355
+ DataType | TraitDataType | ArrayType | StructType | MapType | NumericType
1356
+ ) = DataType.UNKNOWN
1357
+ purpose: Purpose = Purpose.UNKNOWN
1358
+
1359
+ @property
1360
+ def reference(self) -> UndefinedConcept:
1361
+ return UndefinedConcept(address=self.address)
1362
+
1363
+
1364
+ class OrderItem(Mergeable, ConceptArgs, Namespaced, BaseModel):
1365
+ # this needs to be a full concept as it may not exist in environment
1366
+ expr: Expr
1367
+ order: Ordering
1368
+
1369
+ @field_validator("expr", mode="before")
1370
+ def enforce_reference(cls, v):
1371
+ if isinstance(v, Concept):
1372
+ return v.reference
1373
+ return v
1374
+
1375
+ def with_namespace(self, namespace: str) -> "OrderItem":
1376
+ return OrderItem.model_construct(
1377
+ expr=(
1378
+ self.expr.with_namespace(namespace)
1379
+ if isinstance(self.expr, Namespaced)
1380
+ else self.expr
1381
+ ),
1382
+ order=self.order,
1383
+ )
1384
+
1385
+ def with_merge(
1386
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
1387
+ ) -> "OrderItem":
1388
+ return OrderItem.model_construct(
1389
+ expr=(
1390
+ self.expr.with_merge(source, target, modifiers)
1391
+ if isinstance(self.expr, Mergeable)
1392
+ else self.expr
1393
+ ),
1394
+ order=self.order,
1395
+ )
1396
+
1397
+ def with_reference_replacement(self, source, target):
1398
+ return OrderItem.model_construct(
1399
+ expr=(
1400
+ self.expr.with_reference_replacement(source, target)
1401
+ if isinstance(self.expr, Mergeable)
1402
+ else self.expr
1403
+ ),
1404
+ order=self.order,
1405
+ )
1406
+
1407
+ @property
1408
+ def concept_arguments(self) -> Sequence[ConceptRef]:
1409
+ return get_concept_arguments(self.expr)
1410
+
1411
+ @property
1412
+ def row_arguments(self) -> Sequence[ConceptRef]:
1413
+ if isinstance(self.expr, ConceptArgs):
1414
+ return self.expr.row_arguments
1415
+ return self.concept_arguments
1416
+
1417
+ @property
1418
+ def existence_arguments(self) -> Sequence[tuple["ConceptRef", ...]]:
1419
+ if isinstance(self.expr, ConceptArgs):
1420
+ return self.expr.existence_arguments
1421
+ return []
1422
+
1423
+ @property
1424
+ def output_datatype(self):
1425
+ return arg_to_datatype(self.expr)
1426
+
1427
+
1428
+ class WindowItem(DataTyped, ConceptArgs, Mergeable, Namespaced, BaseModel):
1429
+ type: WindowType
1430
+ content: FuncArgs
1431
+ order_by: List["OrderItem"]
1432
+ over: List["ConceptRef"] = Field(default_factory=list)
1433
+ index: Optional[int] = None
1434
+
1435
+ def __str__(self):
1436
+ return self.__repr__()
1437
+
1438
+ def __repr__(self):
1439
+ return f"{self.type.value} {self.content} by {self.index} over {self.over} order {self.order_by}"
1440
+
1441
+ @field_validator("content", mode="before")
1442
+ def enforce_concept_ref(cls, v):
1443
+ if isinstance(v, Concept):
1444
+ return ConceptRef(address=v.address, datatype=v.datatype)
1445
+ return v
1446
+
1447
+ @field_validator("over", mode="before")
1448
+ def enforce_concept_ref_over(cls, v):
1449
+ final = []
1450
+ for item in v:
1451
+ if isinstance(item, Concept):
1452
+ final.append(ConceptRef(address=item.address, datatype=item.datatype))
1453
+ else:
1454
+ final.append(item)
1455
+ return final
1456
+
1457
+ def with_merge(
1458
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
1459
+ ) -> "WindowItem":
1460
+ output = WindowItem.model_construct(
1461
+ type=self.type,
1462
+ content=(
1463
+ self.content.with_merge(source, target, modifiers)
1464
+ if isinstance(self.content, Mergeable)
1465
+ else self.content
1466
+ ),
1467
+ over=[x.with_merge(source, target, modifiers) for x in self.over],
1468
+ order_by=[x.with_merge(source, target, modifiers) for x in self.order_by],
1469
+ index=self.index,
1470
+ )
1471
+ return output
1472
+
1473
+ def with_reference_replacement(self, source, target):
1474
+ return WindowItem.model_construct(
1475
+ type=self.type,
1476
+ content=self.content.with_reference_replacement(source, target),
1477
+ over=[x.with_reference_replacement(source, target) for x in self.over],
1478
+ order_by=[
1479
+ x.with_reference_replacement(source, target) for x in self.order_by
1480
+ ],
1481
+ index=self.index,
1482
+ )
1483
+
1484
+ def with_namespace(self, namespace: str) -> "WindowItem":
1485
+ return WindowItem.model_construct(
1486
+ type=self.type,
1487
+ content=(
1488
+ self.content.with_namespace(namespace)
1489
+ if isinstance(self.content, Namespaced)
1490
+ else self.content
1491
+ ),
1492
+ over=[x.with_namespace(namespace) for x in self.over],
1493
+ order_by=[x.with_namespace(namespace) for x in self.order_by],
1494
+ index=self.index,
1495
+ )
1496
+
1497
+ @property
1498
+ def concept_arguments(self) -> List[ConceptRef]:
1499
+ output = []
1500
+ output += get_concept_arguments(self.content)
1501
+ for order in self.order_by:
1502
+ output += get_concept_arguments(order)
1503
+ for item in self.over:
1504
+ output += get_concept_arguments(item)
1505
+ return output
1506
+
1507
+ @property
1508
+ def output_datatype(self):
1509
+ if self.type in (WindowType.RANK, WindowType.ROW_NUMBER):
1510
+ return DataType.INTEGER
1511
+ return self.content.output_datatype
1512
+
1513
+
1514
+ def get_basic_type(
1515
+ type: DataType | ArrayType | StructType | MapType | NumericType | TraitDataType,
1516
+ ) -> DataType:
1517
+ if isinstance(type, ArrayType):
1518
+ return DataType.ARRAY
1519
+ if isinstance(type, StructType):
1520
+ return DataType.STRUCT
1521
+ if isinstance(type, MapType):
1522
+ return DataType.MAP
1523
+ if isinstance(type, NumericType):
1524
+ return DataType.NUMERIC
1525
+ if isinstance(type, TraitDataType):
1526
+ return get_basic_type(type.type)
1527
+ return type
1528
+
1529
+
1530
+ class CaseWhen(Namespaced, DataTyped, ConceptArgs, Mergeable, BaseModel):
1531
+ comparison: Conditional | SubselectComparison | Comparison
1532
+ expr: "Expr"
1533
+
1534
+ @field_validator("expr", mode="before")
1535
+ def enforce_reference(cls, v):
1536
+ if isinstance(v, Concept):
1537
+ return v.reference
1538
+ return v
1539
+
1540
+ @property
1541
+ def output_datatype(self):
1542
+ return arg_to_datatype(self.expr)
1543
+
1544
+ def __str__(self):
1545
+ return self.__repr__()
1546
+
1547
+ def __repr__(self):
1548
+ return f"WHEN {str(self.comparison)} THEN {str(self.expr)}"
1549
+
1550
+ @property
1551
+ def concept_arguments(self):
1552
+ return get_concept_arguments(self.comparison) + get_concept_arguments(self.expr)
1553
+
1554
+ @property
1555
+ def concept_row_arguments(self):
1556
+ return get_concept_row_arguments(self.comparison) + get_concept_row_arguments(
1557
+ self.expr
1558
+ )
1559
+
1560
+ def with_namespace(self, namespace: str) -> CaseWhen:
1561
+ return CaseWhen.model_construct(
1562
+ comparison=self.comparison.with_namespace(namespace),
1563
+ expr=(
1564
+ self.expr.with_namespace(namespace)
1565
+ if isinstance(
1566
+ self.expr,
1567
+ Namespaced,
1568
+ )
1569
+ else self.expr
1570
+ ),
1571
+ )
1572
+
1573
+ def with_merge(
1574
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
1575
+ ) -> CaseWhen:
1576
+ return CaseWhen.model_construct(
1577
+ comparison=self.comparison.with_merge(source, target, modifiers),
1578
+ expr=(
1579
+ self.expr.with_merge(source, target, modifiers)
1580
+ if isinstance(self.expr, Mergeable)
1581
+ else self.expr
1582
+ ),
1583
+ )
1584
+
1585
+ def with_reference_replacement(self, source, target):
1586
+ return CaseWhen.model_construct(
1587
+ comparison=self.comparison.with_reference_replacement(source, target),
1588
+ expr=(
1589
+ self.expr.with_reference_replacement(source, target)
1590
+ if isinstance(self.expr, Mergeable)
1591
+ else self.expr
1592
+ ),
1593
+ )
1594
+
1595
+
1596
+ class CaseElse(Namespaced, ConceptArgs, DataTyped, Mergeable, BaseModel):
1597
+ expr: "Expr"
1598
+ # this ensures that it's easily differentiable from CaseWhen
1599
+ discriminant: ComparisonOperator = ComparisonOperator.ELSE
1600
+
1601
+ def __str__(self):
1602
+ return self.__repr__()
1603
+
1604
+ def __repr__(self):
1605
+ return f"ELSE {str(self.expr)}"
1606
+
1607
+ @property
1608
+ def output_datatype(self):
1609
+ return arg_to_datatype(self.expr)
1610
+
1611
+ @field_validator("expr", mode="before")
1612
+ def enforce_expr(cls, v):
1613
+ if isinstance(v, Concept):
1614
+ return ConceptRef(address=v.address, datatype=v.datatype)
1615
+ return v
1616
+
1617
+ @property
1618
+ def concept_arguments(self):
1619
+ return get_concept_arguments(self.expr)
1620
+
1621
+ def with_merge(
1622
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
1623
+ ) -> CaseElse:
1624
+ return CaseElse.model_construct(
1625
+ discriminant=self.discriminant,
1626
+ expr=(
1627
+ self.expr.with_merge(source, target, modifiers)
1628
+ if isinstance(self.expr, Mergeable)
1629
+ else self.expr
1630
+ ),
1631
+ )
1632
+
1633
+ def with_reference_replacement(self, source, target):
1634
+ return CaseElse.model_construct(
1635
+ discriminant=self.discriminant,
1636
+ expr=(
1637
+ self.expr.with_reference_replacement(
1638
+ source,
1639
+ target,
1640
+ )
1641
+ if isinstance(self.expr, Mergeable)
1642
+ else self.expr
1643
+ ),
1644
+ )
1645
+
1646
+ def with_namespace(self, namespace: str) -> CaseElse:
1647
+ return CaseElse.model_construct(
1648
+ discriminant=self.discriminant,
1649
+ expr=(
1650
+ self.expr.with_namespace(namespace)
1651
+ if isinstance(
1652
+ self.expr,
1653
+ Namespaced,
1654
+ )
1655
+ else self.expr
1656
+ ),
1657
+ )
1658
+
1659
+
1660
+ def get_concept_row_arguments(expr) -> List["ConceptRef"]:
1661
+ output = []
1662
+ if isinstance(expr, ConceptRef):
1663
+ output += [expr]
1664
+
1665
+ elif isinstance(expr, ConceptArgs):
1666
+ output += expr.row_arguments
1667
+ return output
1668
+
1669
+
1670
+ def get_concept_arguments(expr) -> List["ConceptRef"]:
1671
+ output = []
1672
+ if isinstance(expr, ConceptRef):
1673
+ output += [expr]
1674
+
1675
+ elif isinstance(
1676
+ expr,
1677
+ ConceptArgs,
1678
+ ):
1679
+ output += expr.concept_arguments
1680
+ return output
1681
+
1682
+
1683
+ def args_to_pretty(input: set[DataType]) -> str:
1684
+ return ", ".join(sorted([f"'{x.value}'" for x in input if x != DataType.UNKNOWN]))
1685
+
1686
+
1687
+ class Function(DataTyped, ConceptArgs, Mergeable, Namespaced, BaseModel):
1688
+ operator: FunctionType
1689
+ arg_count: int = Field(default=1)
1690
+ output_datatype: (
1691
+ DataType | ArrayType | StructType | MapType | NumericType | TraitDataType
1692
+ )
1693
+ output_purpose: Purpose
1694
+ valid_inputs: Optional[
1695
+ Union[
1696
+ Set[DataType],
1697
+ List[Set[DataType]],
1698
+ ]
1699
+ ] = None
1700
+ arguments: Sequence[FuncArgs]
1701
+
1702
+ class Config:
1703
+ frozen = True
1704
+
1705
+ def __repr__(self):
1706
+ return f'{self.operator.value}({",".join([str(a) for a in self.arguments])})'
1707
+
1708
+ def __str__(self):
1709
+ return self.__repr__()
1710
+
1711
+ @property
1712
+ def datatype(self):
1713
+ return self.output_datatype
1714
+
1715
+ @field_validator("arguments", mode="before")
1716
+ @classmethod
1717
+ def parse_arguments(cls, v, info: ValidationInfo):
1718
+ from trilogy.core.models.build import BuildConcept
1719
+ from trilogy.parsing.exceptions import ParseError
1720
+
1721
+ values = info.data
1722
+ arg_count = len(v)
1723
+ final = []
1724
+ for x in v:
1725
+ if isinstance(x, Concept) and not isinstance(x, BuildConcept):
1726
+ final.append(x.reference)
1727
+ else:
1728
+ final.append(x)
1729
+ v = final
1730
+ target_arg_count = values["arg_count"]
1731
+ operator_name = values["operator"].name
1732
+ # surface right error
1733
+ if "valid_inputs" not in values:
1734
+ return v
1735
+ valid_inputs = values["valid_inputs"]
1736
+ if not arg_count <= target_arg_count:
1737
+ if target_arg_count != InfiniteFunctionArgs:
1738
+ raise ParseError(
1739
+ f"Incorrect argument count to {operator_name} function, expects"
1740
+ f" {target_arg_count}, got {arg_count}"
1741
+ )
1742
+ # if all arguments can be any of the set type
1743
+ # turn this into an array for validation
1744
+ if isinstance(valid_inputs, set):
1745
+ valid_inputs = [valid_inputs for _ in v]
1746
+ elif not valid_inputs:
1747
+ return v
1748
+ for idx, arg in enumerate(v):
1749
+ if (
1750
+ isinstance(arg, ConceptRef)
1751
+ and get_basic_type(arg.datatype.data_type) not in valid_inputs[idx]
1752
+ ):
1753
+ if arg.datatype != DataType.UNKNOWN:
1754
+
1755
+ raise TypeError(
1756
+ f"Invalid argument type '{arg.datatype.data_type.value}' passed into {operator_name} function in position {idx+1}"
1757
+ f" from concept: {arg.name}. Valid: {args_to_pretty(valid_inputs[idx])}."
1758
+ )
1759
+ if (
1760
+ isinstance(arg, Function)
1761
+ and get_basic_type(arg.output_datatype) not in valid_inputs[idx]
1762
+ ):
1763
+ if arg.output_datatype != DataType.UNKNOWN:
1764
+ raise TypeError(
1765
+ f"Invalid argument type {arg.output_datatype}' passed into"
1766
+ f" {operator_name} function from function {arg.operator.name} in position {idx+1}. Valid: {args_to_pretty(valid_inputs[idx])}"
1767
+ )
1768
+ # check constants
1769
+ comparisons: List[Tuple[Type, DataType]] = [
1770
+ (str, DataType.STRING),
1771
+ (int, DataType.INTEGER),
1772
+ (float, DataType.FLOAT),
1773
+ (bool, DataType.BOOL),
1774
+ (DatePart, DataType.DATE_PART),
1775
+ ]
1776
+ for ptype, dtype in comparisons:
1777
+ if (
1778
+ isinstance(arg, ptype)
1779
+ and get_basic_type(dtype) in valid_inputs[idx]
1780
+ ):
1781
+ # attempt to exit early to avoid checking all types
1782
+ break
1783
+ elif isinstance(arg, ptype):
1784
+ if isinstance(arg, str) and DataType.DATE_PART in valid_inputs[idx]:
1785
+ if arg not in [x.value for x in DatePart]:
1786
+ pass
1787
+ else:
1788
+ break
1789
+ raise TypeError(
1790
+ f'Invalid {dtype} constant passed into {operator_name} "{arg}", expecting one of {valid_inputs[idx]}'
1791
+ )
1792
+ return v
1793
+
1794
+ def with_reference_replacement(self, source: str, target: Expr | ArgBinding):
1795
+ from trilogy.core.functions import arg_to_datatype, merge_datatypes
1796
+
1797
+ nargs = [
1798
+ (
1799
+ c.with_reference_replacement(
1800
+ source,
1801
+ target,
1802
+ )
1803
+ if isinstance(
1804
+ c,
1805
+ Mergeable,
1806
+ )
1807
+ else c
1808
+ )
1809
+ for c in self.arguments
1810
+ ]
1811
+ if self.output_datatype == DataType.UNKNOWN:
1812
+ new_output = merge_datatypes([arg_to_datatype(x) for x in nargs])
1813
+
1814
+ if self.operator == FunctionType.ATTR_ACCESS:
1815
+ if isinstance(new_output, StructType):
1816
+ new_output = new_output.field_types[str(nargs[1])]
1817
+ else:
1818
+ new_output = self.output_datatype
1819
+ # this is not ideal - see hacky logic for datatypes above
1820
+ # we need to figure out how to patch properly
1821
+ # should use function factory, but does not have environment access
1822
+ # probably move all datatype resolution to build?
1823
+ return Function.model_construct(
1824
+ operator=self.operator,
1825
+ arguments=nargs,
1826
+ output_datatype=new_output,
1827
+ output_purpose=self.output_purpose,
1828
+ valid_inputs=self.valid_inputs,
1829
+ arg_count=self.arg_count,
1830
+ )
1831
+
1832
+ def with_namespace(self, namespace: str) -> "Function":
1833
+ return Function.model_construct(
1834
+ operator=self.operator,
1835
+ arguments=[
1836
+ (
1837
+ c.with_namespace(namespace)
1838
+ if isinstance(
1839
+ c,
1840
+ Namespaced,
1841
+ )
1842
+ else c
1843
+ )
1844
+ for c in self.arguments
1845
+ ],
1846
+ output_datatype=self.output_datatype,
1847
+ output_purpose=self.output_purpose,
1848
+ valid_inputs=self.valid_inputs,
1849
+ arg_count=self.arg_count,
1850
+ )
1851
+
1852
+ def with_merge(
1853
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
1854
+ ) -> "Function":
1855
+ return Function.model_construct(
1856
+ operator=self.operator,
1857
+ arguments=[
1858
+ (
1859
+ c.with_merge(source, target, modifiers)
1860
+ if isinstance(
1861
+ c,
1862
+ Mergeable,
1863
+ )
1864
+ else c
1865
+ )
1866
+ for c in self.arguments
1867
+ ],
1868
+ output_datatype=self.output_datatype,
1869
+ output_purpose=self.output_purpose,
1870
+ valid_inputs=self.valid_inputs,
1871
+ arg_count=self.arg_count,
1872
+ )
1873
+
1874
+ @property
1875
+ def concept_arguments(self) -> List[ConceptRef]:
1876
+ base = []
1877
+ for arg in self.arguments:
1878
+ base += get_concept_arguments(arg)
1879
+ return base
1880
+
1881
+
1882
+ class FunctionCallWrapper(
1883
+ DataTyped,
1884
+ ConceptArgs,
1885
+ Mergeable,
1886
+ Namespaced,
1887
+ BaseModel,
1888
+ ):
1889
+ content: Expr
1890
+ name: str
1891
+ args: List[Expr]
1892
+
1893
+ def __str__(self):
1894
+ return f'@{self.name}({",".join([str(x) for x in self.args])})'
1895
+
1896
+ def with_namespace(self, namespace) -> "FunctionCallWrapper":
1897
+ return FunctionCallWrapper.model_construct(
1898
+ content=(
1899
+ self.content.with_namespace(namespace)
1900
+ if isinstance(self.content, Namespaced)
1901
+ else self.content
1902
+ ),
1903
+ name=self.name,
1904
+ args=[
1905
+ x.with_namespace(namespace) if isinstance(x, Namespaced) else x
1906
+ for x in self.args
1907
+ ],
1908
+ )
1909
+
1910
+ def with_reference_replacement(self, source, target):
1911
+ raise NotImplementedError("Cannot reference replace")
1912
+ return self
1913
+
1914
+ def with_merge(
1915
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
1916
+ ) -> "FunctionCallWrapper":
1917
+ return FunctionCallWrapper.model_construct(
1918
+ content=(
1919
+ self.content.with_merge(source, target, modifiers)
1920
+ if isinstance(self.content, Mergeable)
1921
+ else self.content
1922
+ ),
1923
+ name=self.name,
1924
+ args=[
1925
+ (
1926
+ x.with_merge(source, target, modifiers)
1927
+ if isinstance(x, Mergeable)
1928
+ else x
1929
+ )
1930
+ for x in self.args
1931
+ ],
1932
+ )
1933
+
1934
+ @property
1935
+ def concept_arguments(self) -> Sequence[ConceptRef]:
1936
+ base: List[ConceptRef] = []
1937
+ x = self.content
1938
+ if isinstance(x, ConceptRef):
1939
+ base += [x]
1940
+ elif isinstance(x, ConceptArgs):
1941
+ base += x.concept_arguments
1942
+ return base
1943
+
1944
+ @property
1945
+ def output_datatype(self):
1946
+ return arg_to_datatype(self.content)
1947
+
1948
+
1949
+ class AggregateWrapper(Mergeable, DataTyped, ConceptArgs, Namespaced, BaseModel):
1950
+ function: Function
1951
+ by: List[ConceptRef | Concept] = Field(default_factory=list)
1952
+
1953
+ @field_validator("by", mode="before")
1954
+ @classmethod
1955
+ def enforce_concept_ref(cls, v):
1956
+ output = []
1957
+ for item in v:
1958
+ if isinstance(item, Concept):
1959
+ output.append(item.reference)
1960
+ else:
1961
+ output.append(item)
1962
+ return output
1963
+
1964
+ def __str__(self):
1965
+ grain_str = [str(c) for c in self.by] if self.by else "abstract"
1966
+ return f"{str(self.function)}<{grain_str}>"
1967
+
1968
+ @property
1969
+ def datatype(self):
1970
+ return self.function.datatype
1971
+
1972
+ @property
1973
+ def concept_arguments(self) -> List[ConceptRef]:
1974
+ return self.function.concept_arguments + [x.reference for x in self.by]
1975
+
1976
+ @property
1977
+ def output_datatype(self):
1978
+ return self.function.output_datatype
1979
+
1980
+ @property
1981
+ def output_purpose(self):
1982
+ return self.function.output_purpose
1983
+
1984
+ def with_merge(self, source: Concept, target: Concept, modifiers: List[Modifier]):
1985
+ return AggregateWrapper.model_construct(
1986
+ function=self.function.with_merge(source, target, modifiers=modifiers),
1987
+ by=(
1988
+ [c.with_merge(source, target, modifiers) for c in self.by]
1989
+ if self.by
1990
+ else []
1991
+ ),
1992
+ )
1993
+
1994
+ def with_reference_replacement(self, source, target):
1995
+ return AggregateWrapper.model_construct(
1996
+ function=self.function.with_reference_replacement(source, target),
1997
+ by=(
1998
+ [c.with_reference_replacement(source, target) for c in self.by]
1999
+ if self.by
2000
+ else []
2001
+ ),
2002
+ )
2003
+
2004
+ def with_namespace(self, namespace: str) -> "AggregateWrapper":
2005
+ return AggregateWrapper.model_construct(
2006
+ function=self.function.with_namespace(namespace),
2007
+ by=[c.with_namespace(namespace) for c in self.by] if self.by else [],
2008
+ )
2009
+
2010
+
2011
+ class FilterItem(DataTyped, Namespaced, ConceptArgs, BaseModel):
2012
+ content: FuncArgs
2013
+ where: "WhereClause"
2014
+
2015
+ @field_validator("content", mode="before")
2016
+ def enforce_concept_ref(cls, v):
2017
+ if isinstance(v, Concept):
2018
+ return ConceptRef(address=v.address, datatype=v.datatype)
2019
+ return v
2020
+
2021
+ def __str__(self):
2022
+ return f"<Filter: {str(self.content)} where {str(self.where)}>"
2023
+
2024
+ def with_merge(
2025
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
2026
+ ) -> "FilterItem":
2027
+ return FilterItem.model_construct(
2028
+ content=(
2029
+ self.content.with_merge(source, target, modifiers)
2030
+ if isinstance(self.content, Mergeable)
2031
+ else self.content
2032
+ ),
2033
+ where=self.where.with_merge(source, target, modifiers),
2034
+ )
2035
+
2036
+ def with_namespace(self, namespace: str) -> "FilterItem":
2037
+ return FilterItem.model_construct(
2038
+ content=(
2039
+ self.content.with_namespace(namespace)
2040
+ if isinstance(self.content, Namespaced)
2041
+ else self.content
2042
+ ),
2043
+ where=self.where.with_namespace(namespace),
2044
+ )
2045
+
2046
+ @property
2047
+ def output_datatype(self):
2048
+ return arg_to_datatype(self.content)
2049
+
2050
+ @property
2051
+ def concept_arguments(self):
2052
+ if isinstance(self.content, ConceptRef):
2053
+ return [self.content] + self.where.concept_arguments
2054
+ elif isinstance(self.content, ConceptArgs):
2055
+ return self.content.concept_arguments + self.where.concept_arguments
2056
+ return self.where.concept_arguments
2057
+
2058
+
2059
+ class RowsetLineage(Namespaced, Mergeable, BaseModel):
2060
+ name: str
2061
+ derived_concepts: List[ConceptRef]
2062
+ select: SelectLineage | MultiSelectLineage
2063
+
2064
+ def with_namespace(self, namespace: str):
2065
+ return RowsetLineage.model_construct(
2066
+ name=self.name,
2067
+ derived_concepts=[
2068
+ x.with_namespace(namespace) for x in self.derived_concepts
2069
+ ],
2070
+ select=self.select.with_namespace(namespace),
2071
+ )
2072
+
2073
+ def with_merge(self, source: Concept, target: Concept, modifiers: List[Modifier]):
2074
+ return RowsetLineage.model_construct(
2075
+ name=self.name,
2076
+ derived_concepts=[
2077
+ x.with_merge(source, target, modifiers) for x in self.derived_concepts
2078
+ ],
2079
+ select=self.select.with_merge(source, target, modifiers),
2080
+ )
2081
+
2082
+
2083
+ class RowsetItem(Mergeable, DataTyped, ConceptArgs, Namespaced, BaseModel):
2084
+ content: ConceptRef
2085
+ rowset: RowsetLineage
2086
+
2087
+ def __repr__(self):
2088
+ return f"<Rowset<{self.rowset.name}>: {str(self.content)}>"
2089
+
2090
+ def __str__(self):
2091
+ return self.__repr__()
2092
+
2093
+ def with_merge(self, source: Concept, target: Concept, modifiers: List[Modifier]):
2094
+ return RowsetItem.model_construct(
2095
+ content=self.content.with_merge(source, target, modifiers),
2096
+ rowset=self.rowset,
2097
+ )
2098
+
2099
+ def with_namespace(self, namespace: str) -> "RowsetItem":
2100
+ return RowsetItem.model_construct(
2101
+ content=self.content.with_namespace(namespace),
2102
+ rowset=self.rowset.with_namespace(namespace),
2103
+ )
2104
+
2105
+ @property
2106
+ def output(self) -> ConceptRef:
2107
+ return self.content
2108
+
2109
+ @property
2110
+ def output_datatype(self):
2111
+ return self.content.datatype
2112
+
2113
+ @property
2114
+ def concept_arguments(self):
2115
+ return [self.content]
2116
+
2117
+
2118
+ class OrderBy(Mergeable, Namespaced, BaseModel):
2119
+ items: List[OrderItem]
2120
+
2121
+ def with_namespace(self, namespace: str) -> "OrderBy":
2122
+ return OrderBy.model_construct(
2123
+ items=[x.with_namespace(namespace) for x in self.items]
2124
+ )
2125
+
2126
+ def with_merge(
2127
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
2128
+ ) -> "OrderBy":
2129
+ return OrderBy.model_construct(
2130
+ items=[x.with_merge(source, target, modifiers) for x in self.items]
2131
+ )
2132
+
2133
+ @property
2134
+ def concept_arguments(self):
2135
+ base = []
2136
+ for x in self.items:
2137
+ base += x.concept_arguments
2138
+ return base
2139
+
2140
+
2141
+ class AlignClause(Namespaced, BaseModel):
2142
+ items: List[AlignItem]
2143
+
2144
+ def with_namespace(self, namespace: str) -> "AlignClause":
2145
+ return AlignClause.model_construct(
2146
+ items=[x.with_namespace(namespace) for x in self.items]
2147
+ )
2148
+
2149
+
2150
+ class DeriveItem(Namespaced, DataTyped, ConceptArgs, Mergeable, BaseModel):
2151
+ expr: Expr
2152
+ name: str
2153
+ namespace: str
2154
+
2155
+ @property
2156
+ def derived_concept(self) -> str:
2157
+ return f"{self.namespace}.{self.name}"
2158
+ # return ConceptRef(
2159
+ # address=f"{self.namespace}.{self.name}",
2160
+ # datatype=arg_to_datatype(self.expr),
2161
+ # )
2162
+
2163
+ def with_namespace(self, namespace):
2164
+ return DeriveItem.model_construct(
2165
+ expr=(self.expr.with_namespace(namespace) if self.expr else None),
2166
+ name=self.name,
2167
+ namespace=namespace,
2168
+ )
2169
+
2170
+ def with_merge(
2171
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
2172
+ ) -> "DeriveItem":
2173
+ return DeriveItem.model_construct(
2174
+ expr=(
2175
+ self.expr.with_merge(source, target, modifiers)
2176
+ if isinstance(self.expr, Mergeable)
2177
+ else self.expr
2178
+ ),
2179
+ name=self.name,
2180
+ namespace=self.namespace,
2181
+ )
2182
+
2183
+ def with_reference_replacement(self, source, target):
2184
+ return DeriveItem.model_construct(
2185
+ expr=(
2186
+ self.expr.with_reference_replacement(source, target)
2187
+ if isinstance(self.expr, Mergeable)
2188
+ else self.expr
2189
+ ),
2190
+ name=self.name,
2191
+ namespace=self.namespace,
2192
+ )
2193
+
2194
+
2195
+ class DeriveClause(Mergeable, Namespaced, BaseModel):
2196
+ items: List[DeriveItem]
2197
+
2198
+ def with_namespace(self, namespace: str) -> "DeriveClause":
2199
+ return DeriveClause.model_construct(
2200
+ items=[
2201
+ x.with_namespace(namespace) if isinstance(x, Namespaced) else x
2202
+ for x in self.items
2203
+ ]
2204
+ )
2205
+
2206
+ def with_merge(
2207
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
2208
+ ) -> "DeriveClause":
2209
+ return DeriveClause.model_construct(
2210
+ items=[
2211
+ (
2212
+ x.with_merge(source, target, modifiers)
2213
+ if isinstance(x, Mergeable)
2214
+ else x
2215
+ )
2216
+ for x in self.items
2217
+ ]
2218
+ )
2219
+
2220
+ def with_reference_replacement(self, source, target):
2221
+ return DeriveClause.model_construct(
2222
+ items=[
2223
+ (
2224
+ x.with_reference_replacement(source, target)
2225
+ if isinstance(x, Mergeable)
2226
+ else x
2227
+ )
2228
+ for x in self.items
2229
+ ]
2230
+ )
2231
+
2232
+
2233
+ class SelectLineage(Mergeable, Namespaced, BaseModel):
2234
+ selection: List[ConceptRef]
2235
+ hidden_components: set[str]
2236
+ local_concepts: dict[str, Concept]
2237
+ order_by: Optional[OrderBy] = None
2238
+ limit: Optional[int] = None
2239
+ meta: Metadata = Field(default_factory=lambda: Metadata())
2240
+ grain: Grain = Field(default_factory=Grain)
2241
+ where_clause: Union["WhereClause", None] = Field(default=None)
2242
+ having_clause: Union["HavingClause", None] = Field(default=None)
2243
+
2244
+ @property
2245
+ def output_components(self) -> List[ConceptRef]:
2246
+ return self.selection
2247
+
2248
+ def with_merge(
2249
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
2250
+ ) -> SelectLineage:
2251
+ return SelectLineage.model_construct(
2252
+ selection=[x.with_merge(source, target, modifiers) for x in self.selection],
2253
+ hidden_components=self.hidden_components,
2254
+ local_concepts={
2255
+ x: y.with_merge(source, target, modifiers)
2256
+ for x, y in self.local_concepts.items()
2257
+ },
2258
+ order_by=(
2259
+ self.order_by.with_merge(source, target, modifiers)
2260
+ if self.order_by
2261
+ else None
2262
+ ),
2263
+ limit=self.limit,
2264
+ grain=self.grain.with_merge(source, target, modifiers),
2265
+ where_clause=(
2266
+ self.where_clause.with_merge(source, target, modifiers)
2267
+ if self.where_clause
2268
+ else None
2269
+ ),
2270
+ having_clause=(
2271
+ self.having_clause.with_merge(source, target, modifiers)
2272
+ if self.having_clause
2273
+ else None
2274
+ ),
2275
+ )
2276
+
2277
+ def with_namespace(self, namespace):
2278
+ return SelectLineage.model_construct(
2279
+ selection=[x.with_namespace(namespace) for x in self.selection],
2280
+ hidden_components=self.hidden_components,
2281
+ local_concepts={
2282
+ x: y.with_namespace(namespace) for x, y in self.local_concepts.items()
2283
+ },
2284
+ order_by=self.order_by.with_namespace(namespace) if self.order_by else None,
2285
+ limit=self.limit,
2286
+ meta=self.meta,
2287
+ grain=self.grain.with_namespace(namespace),
2288
+ where_clause=(
2289
+ self.where_clause.with_namespace(namespace)
2290
+ if self.where_clause
2291
+ else None
2292
+ ),
2293
+ having_clause=(
2294
+ self.having_clause.with_namespace(namespace)
2295
+ if self.having_clause
2296
+ else None
2297
+ ),
2298
+ )
2299
+
2300
+
2301
+ class MultiSelectLineage(Mergeable, ConceptArgs, Namespaced, BaseModel):
2302
+ selects: List[SelectLineage]
2303
+ align: AlignClause
2304
+
2305
+ namespace: str
2306
+ order_by: Optional[OrderBy] = None
2307
+ limit: Optional[int] = None
2308
+ where_clause: Union["WhereClause", None] = Field(default=None)
2309
+ having_clause: Union["HavingClause", None] = Field(default=None)
2310
+ derive: DeriveClause | None = None
2311
+ hidden_components: set[str]
2312
+
2313
+ @property
2314
+ def grain(self):
2315
+ base = Grain()
2316
+ for select in self.selects:
2317
+ base += select.grain
2318
+ return base
2319
+
2320
+ @property
2321
+ def output_components(self) -> list[ConceptRef]:
2322
+ output = [
2323
+ ConceptRef.model_construct(address=x, datatype=DataType.UNKNOWN)
2324
+ for x in self.derived_concepts
2325
+ ]
2326
+ for select in self.selects:
2327
+ output += select.output_components
2328
+ return [x for x in output if x.address not in self.hidden_components]
2329
+
2330
+ def with_merge(
2331
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
2332
+ ) -> MultiSelectLineage:
2333
+ new = MultiSelectLineage.model_construct(
2334
+ selects=[s.with_merge(source, target, modifiers) for s in self.selects],
2335
+ align=self.align,
2336
+ derive=(
2337
+ self.derive.with_merge(source, target, modifiers)
2338
+ if self.derive
2339
+ else None
2340
+ ),
2341
+ namespace=self.namespace,
2342
+ hidden_components=self.hidden_components,
2343
+ order_by=(
2344
+ self.order_by.with_merge(source, target, modifiers)
2345
+ if self.order_by
2346
+ else None
2347
+ ),
2348
+ limit=self.limit,
2349
+ where_clause=(
2350
+ self.where_clause.with_merge(source, target, modifiers)
2351
+ if self.where_clause
2352
+ else None
2353
+ ),
2354
+ having_clause=(
2355
+ self.having_clause.with_merge(source, target, modifiers)
2356
+ if self.having_clause
2357
+ else None
2358
+ ),
2359
+ )
2360
+ return new
2361
+
2362
+ def with_namespace(self, namespace: str) -> "MultiSelectLineage":
2363
+ return MultiSelectLineage.model_construct(
2364
+ selects=[c.with_namespace(namespace) for c in self.selects],
2365
+ align=self.align.with_namespace(namespace),
2366
+ derive=self.derive.with_namespace(namespace) if self.derive else None,
2367
+ namespace=namespace,
2368
+ hidden_components=self.hidden_components,
2369
+ order_by=self.order_by.with_namespace(namespace) if self.order_by else None,
2370
+ limit=self.limit,
2371
+ where_clause=(
2372
+ self.where_clause.with_namespace(namespace)
2373
+ if self.where_clause
2374
+ else None
2375
+ ),
2376
+ having_clause=(
2377
+ self.having_clause.with_namespace(namespace)
2378
+ if self.having_clause
2379
+ else None
2380
+ ),
2381
+ )
2382
+
2383
+ @property
2384
+ def derived_concepts(self) -> set[str]:
2385
+ output = set()
2386
+ for item in self.align.items:
2387
+ output.add(item.aligned_concept)
2388
+ if self.derive:
2389
+ for ditem in self.derive.items:
2390
+ output.add(ditem.derived_concept)
2391
+ return output
2392
+
2393
+ @property
2394
+ def concept_arguments(self):
2395
+ output = []
2396
+ for select in self.selects:
2397
+ output += select.output_components
2398
+ return unique(output, "address")
2399
+
2400
+
2401
+ class LooseConceptList(BaseModel):
2402
+ concepts: Sequence[Concept | ConceptRef]
2403
+
2404
+ @cached_property
2405
+ def addresses(self) -> set[str]:
2406
+ return {s.address for s in self.concepts}
2407
+
2408
+ @classmethod
2409
+ def validate(cls, v):
2410
+ return cls(v)
2411
+
2412
+ @cached_property
2413
+ def sorted_addresses(self) -> List[str]:
2414
+ return sorted(list(self.addresses))
2415
+
2416
+ def __str__(self) -> str:
2417
+ return f"lcl{str(self.sorted_addresses)}"
2418
+
2419
+ def __iter__(self):
2420
+ return iter(self.concepts)
2421
+
2422
+ def __eq__(self, other):
2423
+ if not isinstance(other, LooseConceptList):
2424
+ return False
2425
+ return self.addresses == other.addresses
2426
+
2427
+ def issubset(self, other):
2428
+ if not isinstance(other, LooseConceptList):
2429
+ return False
2430
+ return self.addresses.issubset(other.addresses)
2431
+
2432
+ def __contains__(self, other):
2433
+ if isinstance(other, str):
2434
+ return other in self.addresses
2435
+ if not isinstance(other, Concept):
2436
+ return False
2437
+ return other.address in self.addresses
2438
+
2439
+ def difference(self, other):
2440
+ if not isinstance(other, LooseConceptList):
2441
+ return False
2442
+ return self.addresses.difference(other.addresses)
2443
+
2444
+ def isdisjoint(self, other):
2445
+ if not isinstance(other, LooseConceptList):
2446
+ return False
2447
+ return self.addresses.isdisjoint(other.addresses)
2448
+
2449
+
2450
+ class AlignItem(Namespaced, BaseModel):
2451
+ alias: str
2452
+ concepts: List[ConceptRef]
2453
+ namespace: str = Field(default=DEFAULT_NAMESPACE, validate_default=True)
2454
+
2455
+ @field_validator("concepts", mode="before")
2456
+ @classmethod
2457
+ def enforce_concept_ref(cls, v):
2458
+ output = []
2459
+ for item in v:
2460
+ if isinstance(item, Concept):
2461
+ output.append(item.reference)
2462
+ else:
2463
+ output.append(item)
2464
+ return output
2465
+
2466
+ @computed_field # type: ignore
2467
+ @cached_property
2468
+ def concepts_lcl(self) -> LooseConceptList:
2469
+ return LooseConceptList(concepts=self.concepts)
2470
+
2471
+ @property
2472
+ def aligned_concept(self) -> str:
2473
+ return f"{self.namespace}.{self.alias}"
2474
+
2475
+ def with_namespace(self, namespace: str) -> "AlignItem":
2476
+ return AlignItem.model_construct(
2477
+ alias=self.alias,
2478
+ concepts=[c.with_namespace(namespace) for c in self.concepts],
2479
+ namespace=namespace,
2480
+ )
2481
+
2482
+
2483
+ class CustomFunctionFactory:
2484
+ def __init__(
2485
+ self,
2486
+ function: Expr,
2487
+ namespace: str,
2488
+ function_arguments: list[ArgBinding],
2489
+ name: str,
2490
+ ):
2491
+ self.namespace = namespace
2492
+ self.function = function
2493
+ self.function_arguments = function_arguments
2494
+ self.name = name
2495
+
2496
+ def with_namespace(self, namespace: str):
2497
+ self.namespace = namespace
2498
+ self.function = (
2499
+ self.function.with_namespace(namespace)
2500
+ if isinstance(self.function, Namespaced)
2501
+ else self.function
2502
+ )
2503
+ self.function_arguments = [
2504
+ x.with_namespace(namespace) for x in self.function_arguments
2505
+ ]
2506
+ return self
2507
+
2508
+ def __call__(self, *creation_args: ArgBinding | Expr):
2509
+ nout = (
2510
+ self.function.model_copy(deep=True)
2511
+ if isinstance(self.function, BaseModel)
2512
+ else self.function
2513
+ )
2514
+ creation_arg_list: list[ArgBinding | Expr] = list(creation_args)
2515
+ if len(creation_args) < len(self.function_arguments):
2516
+ for binding in self.function_arguments[len(creation_arg_list) :]:
2517
+ if binding.default is None:
2518
+ raise ValueError(f"Missing argument {binding.name}")
2519
+
2520
+ creation_arg_list.append(binding.default)
2521
+ for arg_idx, arg in enumerate(self.function_arguments):
2522
+ if not arg.datatype or arg.datatype == DataType.UNKNOWN:
2523
+ continue
2524
+ if arg_idx > len(creation_arg_list):
2525
+ continue
2526
+ comparison = arg_to_datatype(creation_arg_list[arg_idx])
2527
+ if comparison != arg.datatype:
2528
+ raise TypeError(
2529
+ f"Invalid type passed into custom function @{self.name} in position {arg_idx+1} for argument {arg.name}, expected {arg.datatype}, got {comparison}"
2530
+ )
2531
+ if isinstance(arg.datatype, TraitDataType):
2532
+ if not (
2533
+ isinstance(comparison, TraitDataType)
2534
+ and all(x in comparison.traits for x in arg.datatype.traits)
2535
+ ):
2536
+ raise TypeError(
2537
+ f"Invalid argument type passed into custom function @{self.name} in position {arg_idx+1} for argument {arg.name}, expected traits {arg.datatype.traits}, got {comparison}"
2538
+ )
2539
+
2540
+ if isinstance(nout, Mergeable):
2541
+ for idx, x in enumerate(creation_arg_list):
2542
+ if self.namespace == DEFAULT_NAMESPACE:
2543
+ target = f"{DEFAULT_NAMESPACE}.{self.function_arguments[idx].name}"
2544
+ else:
2545
+ target = self.function_arguments[idx].name
2546
+ nout = nout.with_reference_replacement(target, x)
2547
+ return nout
2548
+
2549
+
2550
+ class Metadata(BaseModel):
2551
+ """Metadata container object.
2552
+ TODO: support arbitrary tags"""
2553
+
2554
+ description: Optional[str] = None
2555
+ line_number: Optional[int] = None
2556
+ concept_source: ConceptSource = ConceptSource.MANUAL
2557
+
2558
+
2559
+ class Window(BaseModel):
2560
+ count: int
2561
+ window_order: WindowOrder
2562
+
2563
+ def __str__(self):
2564
+ return f"Window<{self.window_order}>"
2565
+
2566
+
2567
+ class WindowItemOver(BaseModel):
2568
+ contents: List[ConceptRef]
2569
+
2570
+
2571
+ class WindowItemOrder(BaseModel):
2572
+ contents: List["OrderItem"]
2573
+
2574
+
2575
+ class Comment(BaseModel):
2576
+ text: str
2577
+
2578
+
2579
+ class ArgBinding(Namespaced, DataTyped, BaseModel):
2580
+ name: str
2581
+ default: Expr | None = None
2582
+ datatype: (
2583
+ DataType | MapType | ArrayType | NumericType | StructType | TraitDataType
2584
+ ) = DataType.UNKNOWN
2585
+
2586
+ def with_namespace(self, namespace):
2587
+ return ArgBinding(
2588
+ name=address_with_namespace(self.name, namespace),
2589
+ default=(
2590
+ self.default.with_namespace(namespace)
2591
+ if isinstance(self.default, Namespaced)
2592
+ else self.default
2593
+ ),
2594
+ )
2595
+
2596
+ @property
2597
+ def output_datatype(self):
2598
+ if self.default is not None:
2599
+ return arg_to_datatype(self.default)
2600
+ return self.datatype
2601
+
2602
+
2603
+ class CustomType(BaseModel):
2604
+ name: str
2605
+ type: DataType | list[DataType]
2606
+ drop_on: list[FunctionType] = Field(default_factory=list)
2607
+ add_on: list[FunctionType] = Field(default_factory=list)
2608
+
2609
+ def with_namespace(self, namespace: str) -> "CustomType":
2610
+ return CustomType.model_construct(
2611
+ name=address_with_namespace(self.name, namespace),
2612
+ type=self.type,
2613
+ drop_on=self.drop_on,
2614
+ add_on=self.add_on,
2615
+ )
2616
+
2617
+
2618
+ Expr = (
2619
+ MagicConstants
2620
+ | bool
2621
+ | int
2622
+ | str
2623
+ | float
2624
+ | date
2625
+ | datetime
2626
+ | TupleWrapper
2627
+ | ListWrapper
2628
+ | MapWrapper
2629
+ | WindowItem
2630
+ | FilterItem
2631
+ | ConceptRef
2632
+ | Comparison
2633
+ | Conditional
2634
+ | FunctionCallWrapper
2635
+ | Parenthetical
2636
+ | Function
2637
+ | AggregateWrapper
2638
+ | CaseWhen
2639
+ | CaseElse
2640
+ )
2641
+
2642
+ FuncArgs = (
2643
+ ConceptRef
2644
+ | AggregateWrapper
2645
+ | Function
2646
+ | FunctionCallWrapper
2647
+ | Parenthetical
2648
+ | CaseWhen
2649
+ | CaseElse
2650
+ | WindowItem
2651
+ | FilterItem
2652
+ | bool
2653
+ | int
2654
+ | float
2655
+ | DatePart
2656
+ | str
2657
+ | date
2658
+ | datetime
2659
+ | MapWrapper[Any, Any]
2660
+ | TraitDataType
2661
+ | DataType
2662
+ | ArrayType
2663
+ | MapType
2664
+ | NumericType
2665
+ | ListWrapper[Any]
2666
+ | TupleWrapper[Any]
2667
+ | Comparison
2668
+ | Conditional
2669
+ | MagicConstants
2670
+ | ArgBinding
2671
+ | Ordering
2672
+ )