pytrilogy 0.3.148__cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (206) hide show
  1. LICENSE.md +19 -0
  2. _preql_import_resolver/__init__.py +5 -0
  3. _preql_import_resolver/_preql_import_resolver.cpython-312-aarch64-linux-gnu.so +0 -0
  4. pytrilogy-0.3.148.dist-info/METADATA +555 -0
  5. pytrilogy-0.3.148.dist-info/RECORD +206 -0
  6. pytrilogy-0.3.148.dist-info/WHEEL +5 -0
  7. pytrilogy-0.3.148.dist-info/entry_points.txt +2 -0
  8. pytrilogy-0.3.148.dist-info/licenses/LICENSE.md +19 -0
  9. trilogy/__init__.py +27 -0
  10. trilogy/ai/README.md +10 -0
  11. trilogy/ai/__init__.py +19 -0
  12. trilogy/ai/constants.py +92 -0
  13. trilogy/ai/conversation.py +107 -0
  14. trilogy/ai/enums.py +7 -0
  15. trilogy/ai/execute.py +50 -0
  16. trilogy/ai/models.py +34 -0
  17. trilogy/ai/prompts.py +100 -0
  18. trilogy/ai/providers/__init__.py +0 -0
  19. trilogy/ai/providers/anthropic.py +106 -0
  20. trilogy/ai/providers/base.py +24 -0
  21. trilogy/ai/providers/google.py +146 -0
  22. trilogy/ai/providers/openai.py +89 -0
  23. trilogy/ai/providers/utils.py +68 -0
  24. trilogy/authoring/README.md +3 -0
  25. trilogy/authoring/__init__.py +148 -0
  26. trilogy/constants.py +119 -0
  27. trilogy/core/README.md +52 -0
  28. trilogy/core/__init__.py +0 -0
  29. trilogy/core/constants.py +6 -0
  30. trilogy/core/enums.py +454 -0
  31. trilogy/core/env_processor.py +239 -0
  32. trilogy/core/environment_helpers.py +320 -0
  33. trilogy/core/ergonomics.py +193 -0
  34. trilogy/core/exceptions.py +123 -0
  35. trilogy/core/functions.py +1240 -0
  36. trilogy/core/graph_models.py +142 -0
  37. trilogy/core/internal.py +85 -0
  38. trilogy/core/models/__init__.py +0 -0
  39. trilogy/core/models/author.py +2662 -0
  40. trilogy/core/models/build.py +2603 -0
  41. trilogy/core/models/build_environment.py +165 -0
  42. trilogy/core/models/core.py +506 -0
  43. trilogy/core/models/datasource.py +434 -0
  44. trilogy/core/models/environment.py +756 -0
  45. trilogy/core/models/execute.py +1213 -0
  46. trilogy/core/optimization.py +251 -0
  47. trilogy/core/optimizations/__init__.py +12 -0
  48. trilogy/core/optimizations/base_optimization.py +17 -0
  49. trilogy/core/optimizations/hide_unused_concept.py +47 -0
  50. trilogy/core/optimizations/inline_datasource.py +102 -0
  51. trilogy/core/optimizations/predicate_pushdown.py +245 -0
  52. trilogy/core/processing/README.md +94 -0
  53. trilogy/core/processing/READMEv2.md +121 -0
  54. trilogy/core/processing/VIRTUAL_UNNEST.md +30 -0
  55. trilogy/core/processing/__init__.py +0 -0
  56. trilogy/core/processing/concept_strategies_v3.py +508 -0
  57. trilogy/core/processing/constants.py +15 -0
  58. trilogy/core/processing/discovery_node_factory.py +451 -0
  59. trilogy/core/processing/discovery_utility.py +548 -0
  60. trilogy/core/processing/discovery_validation.py +167 -0
  61. trilogy/core/processing/graph_utils.py +43 -0
  62. trilogy/core/processing/node_generators/README.md +9 -0
  63. trilogy/core/processing/node_generators/__init__.py +31 -0
  64. trilogy/core/processing/node_generators/basic_node.py +160 -0
  65. trilogy/core/processing/node_generators/common.py +270 -0
  66. trilogy/core/processing/node_generators/constant_node.py +38 -0
  67. trilogy/core/processing/node_generators/filter_node.py +315 -0
  68. trilogy/core/processing/node_generators/group_node.py +213 -0
  69. trilogy/core/processing/node_generators/group_to_node.py +117 -0
  70. trilogy/core/processing/node_generators/multiselect_node.py +207 -0
  71. trilogy/core/processing/node_generators/node_merge_node.py +695 -0
  72. trilogy/core/processing/node_generators/recursive_node.py +88 -0
  73. trilogy/core/processing/node_generators/rowset_node.py +165 -0
  74. trilogy/core/processing/node_generators/select_helpers/__init__.py +0 -0
  75. trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +261 -0
  76. trilogy/core/processing/node_generators/select_merge_node.py +786 -0
  77. trilogy/core/processing/node_generators/select_node.py +95 -0
  78. trilogy/core/processing/node_generators/synonym_node.py +98 -0
  79. trilogy/core/processing/node_generators/union_node.py +91 -0
  80. trilogy/core/processing/node_generators/unnest_node.py +182 -0
  81. trilogy/core/processing/node_generators/window_node.py +201 -0
  82. trilogy/core/processing/nodes/README.md +28 -0
  83. trilogy/core/processing/nodes/__init__.py +179 -0
  84. trilogy/core/processing/nodes/base_node.py +522 -0
  85. trilogy/core/processing/nodes/filter_node.py +75 -0
  86. trilogy/core/processing/nodes/group_node.py +194 -0
  87. trilogy/core/processing/nodes/merge_node.py +420 -0
  88. trilogy/core/processing/nodes/recursive_node.py +46 -0
  89. trilogy/core/processing/nodes/select_node_v2.py +242 -0
  90. trilogy/core/processing/nodes/union_node.py +53 -0
  91. trilogy/core/processing/nodes/unnest_node.py +62 -0
  92. trilogy/core/processing/nodes/window_node.py +56 -0
  93. trilogy/core/processing/utility.py +823 -0
  94. trilogy/core/query_processor.py +604 -0
  95. trilogy/core/statements/README.md +35 -0
  96. trilogy/core/statements/__init__.py +0 -0
  97. trilogy/core/statements/author.py +536 -0
  98. trilogy/core/statements/build.py +0 -0
  99. trilogy/core/statements/common.py +20 -0
  100. trilogy/core/statements/execute.py +155 -0
  101. trilogy/core/table_processor.py +66 -0
  102. trilogy/core/utility.py +8 -0
  103. trilogy/core/validation/README.md +46 -0
  104. trilogy/core/validation/__init__.py +0 -0
  105. trilogy/core/validation/common.py +161 -0
  106. trilogy/core/validation/concept.py +146 -0
  107. trilogy/core/validation/datasource.py +227 -0
  108. trilogy/core/validation/environment.py +73 -0
  109. trilogy/core/validation/fix.py +256 -0
  110. trilogy/dialect/__init__.py +32 -0
  111. trilogy/dialect/base.py +1431 -0
  112. trilogy/dialect/bigquery.py +314 -0
  113. trilogy/dialect/common.py +147 -0
  114. trilogy/dialect/config.py +159 -0
  115. trilogy/dialect/dataframe.py +50 -0
  116. trilogy/dialect/duckdb.py +376 -0
  117. trilogy/dialect/enums.py +149 -0
  118. trilogy/dialect/metadata.py +173 -0
  119. trilogy/dialect/mock.py +190 -0
  120. trilogy/dialect/postgres.py +117 -0
  121. trilogy/dialect/presto.py +110 -0
  122. trilogy/dialect/results.py +89 -0
  123. trilogy/dialect/snowflake.py +129 -0
  124. trilogy/dialect/sql_server.py +137 -0
  125. trilogy/engine.py +48 -0
  126. trilogy/execution/__init__.py +17 -0
  127. trilogy/execution/config.py +119 -0
  128. trilogy/execution/state/__init__.py +0 -0
  129. trilogy/execution/state/file_state_store.py +0 -0
  130. trilogy/execution/state/sqllite_state_store.py +0 -0
  131. trilogy/execution/state/state_store.py +301 -0
  132. trilogy/executor.py +656 -0
  133. trilogy/hooks/__init__.py +4 -0
  134. trilogy/hooks/base_hook.py +40 -0
  135. trilogy/hooks/graph_hook.py +135 -0
  136. trilogy/hooks/query_debugger.py +166 -0
  137. trilogy/metadata/__init__.py +0 -0
  138. trilogy/parser.py +10 -0
  139. trilogy/parsing/README.md +21 -0
  140. trilogy/parsing/__init__.py +0 -0
  141. trilogy/parsing/common.py +1069 -0
  142. trilogy/parsing/config.py +5 -0
  143. trilogy/parsing/exceptions.py +8 -0
  144. trilogy/parsing/helpers.py +1 -0
  145. trilogy/parsing/parse_engine.py +2863 -0
  146. trilogy/parsing/render.py +773 -0
  147. trilogy/parsing/trilogy.lark +544 -0
  148. trilogy/py.typed +0 -0
  149. trilogy/render.py +45 -0
  150. trilogy/scripts/README.md +9 -0
  151. trilogy/scripts/__init__.py +0 -0
  152. trilogy/scripts/agent.py +41 -0
  153. trilogy/scripts/agent_info.py +306 -0
  154. trilogy/scripts/common.py +430 -0
  155. trilogy/scripts/dependency/Cargo.lock +617 -0
  156. trilogy/scripts/dependency/Cargo.toml +39 -0
  157. trilogy/scripts/dependency/README.md +131 -0
  158. trilogy/scripts/dependency/build.sh +25 -0
  159. trilogy/scripts/dependency/src/directory_resolver.rs +387 -0
  160. trilogy/scripts/dependency/src/lib.rs +16 -0
  161. trilogy/scripts/dependency/src/main.rs +770 -0
  162. trilogy/scripts/dependency/src/parser.rs +435 -0
  163. trilogy/scripts/dependency/src/preql.pest +208 -0
  164. trilogy/scripts/dependency/src/python_bindings.rs +311 -0
  165. trilogy/scripts/dependency/src/resolver.rs +716 -0
  166. trilogy/scripts/dependency/tests/base.preql +3 -0
  167. trilogy/scripts/dependency/tests/cli_integration.rs +377 -0
  168. trilogy/scripts/dependency/tests/customer.preql +6 -0
  169. trilogy/scripts/dependency/tests/main.preql +9 -0
  170. trilogy/scripts/dependency/tests/orders.preql +7 -0
  171. trilogy/scripts/dependency/tests/test_data/base.preql +9 -0
  172. trilogy/scripts/dependency/tests/test_data/consumer.preql +1 -0
  173. trilogy/scripts/dependency.py +323 -0
  174. trilogy/scripts/display.py +555 -0
  175. trilogy/scripts/environment.py +59 -0
  176. trilogy/scripts/fmt.py +32 -0
  177. trilogy/scripts/ingest.py +472 -0
  178. trilogy/scripts/ingest_helpers/__init__.py +1 -0
  179. trilogy/scripts/ingest_helpers/foreign_keys.py +123 -0
  180. trilogy/scripts/ingest_helpers/formatting.py +93 -0
  181. trilogy/scripts/ingest_helpers/typing.py +161 -0
  182. trilogy/scripts/init.py +105 -0
  183. trilogy/scripts/parallel_execution.py +748 -0
  184. trilogy/scripts/plan.py +189 -0
  185. trilogy/scripts/refresh.py +106 -0
  186. trilogy/scripts/run.py +79 -0
  187. trilogy/scripts/serve.py +202 -0
  188. trilogy/scripts/serve_helpers/__init__.py +41 -0
  189. trilogy/scripts/serve_helpers/file_discovery.py +142 -0
  190. trilogy/scripts/serve_helpers/index_generation.py +206 -0
  191. trilogy/scripts/serve_helpers/models.py +38 -0
  192. trilogy/scripts/single_execution.py +131 -0
  193. trilogy/scripts/testing.py +129 -0
  194. trilogy/scripts/trilogy.py +75 -0
  195. trilogy/std/__init__.py +0 -0
  196. trilogy/std/color.preql +3 -0
  197. trilogy/std/date.preql +13 -0
  198. trilogy/std/display.preql +18 -0
  199. trilogy/std/geography.preql +22 -0
  200. trilogy/std/metric.preql +15 -0
  201. trilogy/std/money.preql +67 -0
  202. trilogy/std/net.preql +14 -0
  203. trilogy/std/ranking.preql +7 -0
  204. trilogy/std/report.preql +5 -0
  205. trilogy/std/semantic.preql +6 -0
  206. trilogy/utility.py +34 -0
@@ -0,0 +1,2662 @@
1
+ from __future__ import annotations
2
+
3
+ import hashlib
4
+ from abc import ABC
5
+ from datetime import date, datetime
6
+ from functools import cached_property
7
+ from typing import (
8
+ TYPE_CHECKING,
9
+ Any,
10
+ Iterable,
11
+ List,
12
+ Optional,
13
+ Self,
14
+ Sequence,
15
+ Set,
16
+ Tuple,
17
+ Type,
18
+ Union,
19
+ )
20
+
21
+ from pydantic import (
22
+ BaseModel,
23
+ ConfigDict,
24
+ Field,
25
+ ValidationInfo,
26
+ computed_field,
27
+ field_validator,
28
+ model_validator,
29
+ )
30
+
31
+ from trilogy.constants import DEFAULT_NAMESPACE, MagicConstants
32
+ from trilogy.core.constants import ALL_ROWS_CONCEPT
33
+ from trilogy.core.enums import (
34
+ BooleanOperator,
35
+ ComparisonOperator,
36
+ ConceptSource,
37
+ DatePart,
38
+ Derivation,
39
+ FunctionClass,
40
+ FunctionType,
41
+ Granularity,
42
+ InfiniteFunctionArgs,
43
+ Modifier,
44
+ Ordering,
45
+ Purpose,
46
+ WindowOrder,
47
+ WindowType,
48
+ )
49
+ from trilogy.core.models.core import (
50
+ Addressable,
51
+ ArrayType,
52
+ DataType,
53
+ DataTyped,
54
+ ListWrapper,
55
+ MapType,
56
+ MapWrapper,
57
+ NumericType,
58
+ StructType,
59
+ TraitDataType,
60
+ TupleWrapper,
61
+ arg_to_datatype,
62
+ is_compatible_datatype,
63
+ )
64
+ from trilogy.utility import unique
65
+
66
+ # TODO: refactor to avoid these
67
+ if TYPE_CHECKING:
68
+ from trilogy.core.models.environment import Environment
69
+
70
+
71
+ class Namespaced(ABC):
72
+ def with_namespace(self, namespace: str):
73
+ raise NotImplementedError
74
+
75
+
76
+ class Mergeable(ABC):
77
+ def with_merge(self, source: Concept, target: Concept, modifiers: List[Modifier]):
78
+ raise NotImplementedError
79
+
80
+ def with_reference_replacement(self, source: str, target: Expr | ArgBinding):
81
+ raise NotImplementedError(type(self))
82
+
83
+
84
+ class ConceptArgs(ABC):
85
+ @property
86
+ def concept_arguments(self) -> Sequence["ConceptRef"]:
87
+ raise NotImplementedError
88
+
89
+ @property
90
+ def existence_arguments(self) -> Sequence[tuple["ConceptRef", ...]]:
91
+ return []
92
+
93
+ @property
94
+ def row_arguments(self) -> Sequence["ConceptRef"]:
95
+ return self.concept_arguments
96
+
97
+
98
+ class HasUUID(ABC):
99
+ @property
100
+ def uuid(self) -> str:
101
+ return hashlib.md5(str(self).encode()).hexdigest()
102
+
103
+
104
+ class ConceptRef(Addressable, Namespaced, DataTyped, Mergeable, BaseModel):
105
+ address: str
106
+ datatype: (
107
+ DataType | TraitDataType | ArrayType | StructType | MapType | NumericType
108
+ ) = DataType.UNKNOWN
109
+ metadata: Optional["Metadata"] = None
110
+
111
+ @property
112
+ def reference(self):
113
+ return self
114
+
115
+ @property
116
+ def line_no(self) -> int | None:
117
+ if self.metadata:
118
+ return self.metadata.line_number
119
+ return None
120
+
121
+ def __repr__(self):
122
+ return f"ref:{self.address}"
123
+
124
+ def __str__(self):
125
+ return self.__repr__()
126
+
127
+ def __eq__(self, other):
128
+ if isinstance(other, Concept):
129
+ return self.address == other.address
130
+ elif isinstance(other, str):
131
+ return self.address == other
132
+ elif isinstance(other, ConceptRef):
133
+ return self.address == other.address
134
+ return False
135
+
136
+ @property
137
+ def namespace(self):
138
+ return self.address.rsplit(".", 1)[0]
139
+
140
+ @property
141
+ def name(self):
142
+ return self.address.rsplit(".", 1)[1]
143
+
144
+ @property
145
+ def output_datatype(self):
146
+ return self.datatype
147
+
148
+ def with_merge(
149
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
150
+ ) -> ConceptRef:
151
+ if self.address == source.address:
152
+ return ConceptRef.model_construct(
153
+ address=target.address, datatype=target.datatype, metadata=self.metadata
154
+ )
155
+ return self
156
+
157
+ def with_namespace(self, namespace: str):
158
+ return ConceptRef.model_construct(
159
+ address=address_with_namespace(self.address, namespace),
160
+ datatype=self.datatype,
161
+ metadata=self.metadata,
162
+ )
163
+
164
+ def with_reference_replacement(self, source: str, target: Expr | ArgBinding):
165
+ # a reference might be to an attribute of a struct that is bound late
166
+ # if the replacement is a parent in the access path; replace reference
167
+ # with an attribute access call
168
+ candidates = [f"{DEFAULT_NAMESPACE}.{self.address}", self.address]
169
+ for candidate in candidates:
170
+ if candidate == source:
171
+ return target
172
+ if not candidate.startswith(f"{source}."):
173
+ continue
174
+ attribute = self.address.rsplit(".", 1)[1]
175
+ dtype = arg_to_datatype(target)
176
+ if not isinstance(dtype, StructType):
177
+ continue
178
+ output_type = dtype.field_types.get(attribute, DataType.UNKNOWN)
179
+ return Function(
180
+ arguments=[target, self.address.rsplit(".", 1)[1]],
181
+ operator=FunctionType.ATTR_ACCESS,
182
+ arg_count=2,
183
+ output_datatype=output_type,
184
+ output_purpose=Purpose.PROPERTY,
185
+ )
186
+ return self
187
+
188
+
189
+ class UndefinedConcept(ConceptRef):
190
+ pass
191
+
192
+ @property
193
+ def reference(self):
194
+ return self
195
+
196
+ @property
197
+ def purpose(self):
198
+ return Purpose.UNKNOWN
199
+
200
+
201
+ def address_with_namespace(address: str, namespace: str) -> str:
202
+ existing_ns = address.split(".", 1)[0]
203
+ if "." in address:
204
+ existing_name = address.split(".", 1)[1]
205
+ else:
206
+ existing_name = address
207
+ if existing_name == ALL_ROWS_CONCEPT:
208
+ return address
209
+ if existing_ns == DEFAULT_NAMESPACE:
210
+ return f"{namespace}.{existing_name}"
211
+ return f"{namespace}.{address}"
212
+
213
+
214
+ class Parenthetical(
215
+ DataTyped,
216
+ ConceptArgs,
217
+ Mergeable,
218
+ Namespaced,
219
+ BaseModel,
220
+ ):
221
+ content: "Expr"
222
+
223
+ @field_validator("content", mode="before")
224
+ @classmethod
225
+ def content_validator(cls, v, info: ValidationInfo):
226
+ if isinstance(v, Concept):
227
+ return v.reference
228
+ return v
229
+
230
+ def __add__(self, other) -> Union["Parenthetical", "Conditional"]:
231
+ if other is None:
232
+ return self
233
+ elif isinstance(other, (Comparison, Conditional, Parenthetical)):
234
+ return Conditional(left=self, right=other, operator=BooleanOperator.AND)
235
+ raise ValueError(f"Cannot add {self.__class__} and {type(other)}")
236
+
237
+ def __str__(self):
238
+ return self.__repr__()
239
+
240
+ def __repr__(self):
241
+ return f"({str(self.content)})"
242
+
243
+ def with_namespace(self, namespace: str) -> Parenthetical:
244
+ return Parenthetical.model_construct(
245
+ content=(
246
+ self.content.with_namespace(namespace)
247
+ if isinstance(self.content, Namespaced)
248
+ else self.content
249
+ )
250
+ )
251
+
252
+ def with_merge(self, source: Concept, target: Concept, modifiers: List[Modifier]):
253
+ return Parenthetical.model_construct(
254
+ content=(
255
+ self.content.with_merge(source, target, modifiers)
256
+ if isinstance(self.content, Mergeable)
257
+ else self.content
258
+ )
259
+ )
260
+
261
+ def with_reference_replacement(self, source, target):
262
+ return Parenthetical.model_construct(
263
+ content=(
264
+ self.content.with_reference_replacement(source, target)
265
+ if isinstance(self.content, Mergeable)
266
+ else self.content
267
+ )
268
+ )
269
+
270
+ @property
271
+ def concept_arguments(self) -> Sequence[ConceptRef]:
272
+ base: List[ConceptRef] = []
273
+ x = self.content
274
+ if isinstance(x, ConceptRef):
275
+ base += [x]
276
+ elif isinstance(x, ConceptArgs):
277
+ base += x.concept_arguments
278
+ return base
279
+
280
+ @property
281
+ def row_arguments(self) -> Sequence[ConceptRef]:
282
+ if isinstance(self.content, ConceptArgs):
283
+ return self.content.row_arguments
284
+ return self.concept_arguments
285
+
286
+ @property
287
+ def existence_arguments(self) -> Sequence[tuple["ConceptRef", ...]]:
288
+ if isinstance(self.content, ConceptArgs):
289
+ return self.content.existence_arguments
290
+ return []
291
+
292
+ @property
293
+ def output_datatype(self):
294
+ return arg_to_datatype(self.content)
295
+
296
+
297
+ class Conditional(Mergeable, ConceptArgs, Namespaced, DataTyped, BaseModel):
298
+ left: Expr
299
+ right: Expr
300
+ operator: BooleanOperator
301
+
302
+ @field_validator("left", mode="before")
303
+ @classmethod
304
+ def left_validator(cls, v, info: ValidationInfo):
305
+ if isinstance(v, Concept):
306
+ return v.reference
307
+ return v
308
+
309
+ @field_validator("right", mode="before")
310
+ @classmethod
311
+ def right_validator(cls, v, info: ValidationInfo):
312
+ if isinstance(v, Concept):
313
+ return v.reference
314
+ return v
315
+
316
+ def __add__(self, other) -> "Conditional":
317
+ if other is None:
318
+ return self
319
+ elif str(other) == str(self):
320
+ return self
321
+ elif isinstance(other, (Comparison, Conditional, Parenthetical)):
322
+ return Conditional.model_construct(
323
+ left=self, right=other, operator=BooleanOperator.AND
324
+ )
325
+ raise ValueError(f"Cannot add {self.__class__} and {type(other)}")
326
+
327
+ def __str__(self):
328
+ return self.__repr__()
329
+
330
+ def __repr__(self):
331
+ return f"{str(self.left)} {self.operator.value} {str(self.right)}"
332
+
333
+ def __eq__(self, other):
334
+ if not isinstance(other, Conditional):
335
+ return False
336
+ return (
337
+ self.left == other.left
338
+ and self.right == other.right
339
+ and self.operator == other.operator
340
+ )
341
+
342
+ def with_namespace(self, namespace: str) -> "Conditional":
343
+ return Conditional.model_construct(
344
+ left=(
345
+ self.left.with_namespace(namespace)
346
+ if isinstance(self.left, Namespaced)
347
+ else self.left
348
+ ),
349
+ right=(
350
+ self.right.with_namespace(namespace)
351
+ if isinstance(self.right, Namespaced)
352
+ else self.right
353
+ ),
354
+ operator=self.operator,
355
+ )
356
+
357
+ def with_merge(
358
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
359
+ ) -> "Conditional":
360
+ return Conditional.model_construct(
361
+ left=(
362
+ self.left.with_merge(source, target, modifiers)
363
+ if isinstance(self.left, Mergeable)
364
+ else self.left
365
+ ),
366
+ right=(
367
+ self.right.with_merge(source, target, modifiers)
368
+ if isinstance(self.right, Mergeable)
369
+ else self.right
370
+ ),
371
+ operator=self.operator,
372
+ )
373
+
374
+ def with_reference_replacement(self, source, target):
375
+ return self.__class__.model_construct(
376
+ left=(
377
+ self.left.with_reference_replacement(source, target)
378
+ if isinstance(self.left, Mergeable)
379
+ else self.left
380
+ ),
381
+ right=(
382
+ self.right.with_reference_replacement(source, target)
383
+ if isinstance(self.right, Mergeable)
384
+ else self.right
385
+ ),
386
+ operator=self.operator,
387
+ )
388
+
389
+ @property
390
+ def concept_arguments(self) -> Sequence[ConceptRef]:
391
+ output = []
392
+ output += get_concept_arguments(self.left)
393
+ output += get_concept_arguments(self.right)
394
+ return output
395
+
396
+ @property
397
+ def row_arguments(self) -> Sequence[ConceptRef]:
398
+ output = []
399
+ output += get_concept_row_arguments(self.left)
400
+ output += get_concept_row_arguments(self.right)
401
+ return output
402
+
403
+ @property
404
+ def existence_arguments(self) -> Sequence[tuple[ConceptRef, ...]]:
405
+ output: list[tuple[ConceptRef, ...]] = []
406
+ if isinstance(self.left, ConceptArgs):
407
+ output += self.left.existence_arguments
408
+ if isinstance(self.right, ConceptArgs):
409
+ output += self.right.existence_arguments
410
+ return output
411
+
412
+ @property
413
+ def output_datatype(self):
414
+ # a conditional is always a boolean
415
+ return DataType.BOOL
416
+
417
+ def decompose(self):
418
+ chunks = []
419
+ if self.operator == BooleanOperator.AND:
420
+ for val in [self.left, self.right]:
421
+ if isinstance(val, Conditional):
422
+ chunks.extend(val.decompose())
423
+ else:
424
+ chunks.append(val)
425
+ else:
426
+ chunks.append(self)
427
+ return chunks
428
+
429
+
430
+ class WhereClause(Mergeable, ConceptArgs, Namespaced, BaseModel):
431
+ conditional: Union[SubselectComparison, Comparison, Conditional, Parenthetical]
432
+
433
+ def __repr__(self):
434
+ return str(self.conditional)
435
+
436
+ def __str__(self):
437
+ return self.__repr__()
438
+
439
+ @property
440
+ def concept_arguments(self) -> Sequence[ConceptRef]:
441
+ return self.conditional.concept_arguments
442
+
443
+ @property
444
+ def row_arguments(self) -> Sequence[ConceptRef]:
445
+ return self.conditional.row_arguments
446
+
447
+ @property
448
+ def existence_arguments(self) -> Sequence[tuple["ConceptRef", ...]]:
449
+ return self.conditional.existence_arguments
450
+
451
+ def with_merge(
452
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
453
+ ) -> Self:
454
+ return self.__class__.model_construct(
455
+ conditional=self.conditional.with_merge(source, target, modifiers)
456
+ )
457
+
458
+ def with_namespace(self, namespace: str) -> Self:
459
+ return self.__class__.model_construct(
460
+ conditional=self.conditional.with_namespace(namespace)
461
+ )
462
+
463
+ def with_reference_replacement(self, source, target):
464
+ return self.__class__.model_construct(
465
+ conditional=self.conditional.with_reference_replacement(source, target)
466
+ )
467
+
468
+
469
+ class HavingClause(WhereClause):
470
+ pass
471
+
472
+
473
+ class Grain(Namespaced, BaseModel):
474
+ components: set[str] = Field(default_factory=set)
475
+ where_clause: Optional["WhereClause"] = None
476
+ _str: str | None = None
477
+ _abstract: bool = False
478
+
479
+ def without_condition(self):
480
+ return Grain(components=self.components)
481
+
482
+ def with_merge(self, source: Concept, target: Concept, modifiers: List[Modifier]):
483
+ new_components = set()
484
+ for c in self.components:
485
+ if c == source.address:
486
+ new_components.add(target.address)
487
+ else:
488
+ new_components.add(c)
489
+ return Grain.model_construct(components=new_components)
490
+
491
+ @classmethod
492
+ def from_concepts(
493
+ cls,
494
+ concepts: Iterable[Concept | ConceptRef | str],
495
+ environment: Environment | None = None,
496
+ where_clause: WhereClause | None = None,
497
+ local_concepts: dict[str, Concept] | None = None,
498
+ ) -> Grain:
499
+ from trilogy.parsing.common import concepts_to_grain_concepts
500
+
501
+ x = Grain.model_construct(
502
+ components=concepts_to_grain_concepts(
503
+ concepts, environment=environment, local_concepts=local_concepts
504
+ ),
505
+ where_clause=where_clause,
506
+ )
507
+
508
+ return x
509
+
510
+ def with_namespace(self, namespace: str) -> "Grain":
511
+ return Grain.model_construct(
512
+ components={address_with_namespace(c, namespace) for c in self.components},
513
+ where_clause=(
514
+ self.where_clause.with_namespace(namespace)
515
+ if self.where_clause
516
+ else None
517
+ ),
518
+ )
519
+
520
+ @field_validator("components", mode="before")
521
+ def component_validator(cls, v, info: ValidationInfo):
522
+ output = set()
523
+ if isinstance(v, list):
524
+ for vc in v:
525
+ if isinstance(vc, Addressable):
526
+ output.add(vc._address)
527
+ else:
528
+ output.add(vc)
529
+ else:
530
+ output = v
531
+ if not isinstance(output, set):
532
+ raise ValueError(f"Invalid grain component {output}, is not set")
533
+ if not all(isinstance(x, str) for x in output):
534
+ raise ValueError(f"Invalid component {output}")
535
+ return output
536
+
537
+ def __add__(self, other: "Grain") -> "Grain":
538
+ if not other:
539
+ return self
540
+ where = self.where_clause
541
+ if other.where_clause:
542
+ if not self.where_clause:
543
+ where = other.where_clause
544
+ elif not other.where_clause == self.where_clause:
545
+ where = WhereClause.model_construct(
546
+ conditional=Conditional(
547
+ left=self.where_clause.conditional,
548
+ right=other.where_clause.conditional,
549
+ operator=BooleanOperator.AND,
550
+ )
551
+ )
552
+ # raise NotImplementedError(
553
+ # f"Cannot merge grains with where clauses, self {self.where_clause} other {other.where_clause}"
554
+ # )
555
+ return Grain(
556
+ components=self.components.union(other.components), where_clause=where
557
+ )
558
+
559
+ def __sub__(self, other: "Grain") -> "Grain":
560
+ return Grain.model_construct(
561
+ components=self.components.difference(other.components),
562
+ where_clause=self.where_clause,
563
+ )
564
+
565
+ def _gen_abstract(self) -> bool:
566
+ return not self.components or all(
567
+ [c.endswith(ALL_ROWS_CONCEPT) for c in self.components]
568
+ )
569
+
570
+ @property
571
+ def abstract(self):
572
+ if not self._abstract:
573
+ self._abstract = self._gen_abstract()
574
+ return self._abstract
575
+
576
+ def __eq__(self, other: object):
577
+ if isinstance(other, list):
578
+ if all([isinstance(c, Concept) for c in other]):
579
+ return self.components == set([c.address for c in other])
580
+ return False
581
+ if not isinstance(other, Grain):
582
+ return False
583
+ if self.components == other.components:
584
+ return True
585
+ return False
586
+
587
+ def issubset(self, other: "Grain"):
588
+ return self.components.issubset(other.components)
589
+
590
+ def union(self, other: "Grain"):
591
+ addresses = self.components.union(other.components)
592
+ return Grain(components=addresses, where_clause=self.where_clause)
593
+
594
+ def isdisjoint(self, other: "Grain"):
595
+ return self.components.isdisjoint(other.components)
596
+
597
+ def intersection(self, other: "Grain") -> "Grain":
598
+ intersection = self.components.intersection(other.components)
599
+ return Grain(components=intersection)
600
+
601
+ def _gen_str(self) -> str:
602
+ if self.abstract:
603
+ base = "Grain<Abstract>"
604
+ else:
605
+ base = "Grain<" + ",".join(sorted(self.components)) + ">"
606
+ if self.where_clause:
607
+ base += f"|{str(self.where_clause)}"
608
+ return base
609
+
610
+ def __str__(self):
611
+ if not self._str:
612
+ self._str = self._gen_str()
613
+ return self._str
614
+
615
+ def __radd__(self, other) -> "Grain":
616
+ if other == 0:
617
+ return self
618
+ else:
619
+ return self.__add__(other)
620
+
621
+
622
+ class Comparison(ConceptArgs, Mergeable, DataTyped, Namespaced, BaseModel):
623
+ left: Union[
624
+ int,
625
+ str,
626
+ float,
627
+ list,
628
+ bool,
629
+ datetime,
630
+ date,
631
+ Function,
632
+ ConceptRef,
633
+ Conditional,
634
+ DataType,
635
+ Comparison,
636
+ FunctionCallWrapper,
637
+ Parenthetical,
638
+ MagicConstants,
639
+ WindowItem,
640
+ AggregateWrapper,
641
+ FilterItem,
642
+ ]
643
+ right: Union[
644
+ int,
645
+ str,
646
+ float,
647
+ list,
648
+ bool,
649
+ date,
650
+ datetime,
651
+ ConceptRef,
652
+ Function,
653
+ Conditional,
654
+ DataType,
655
+ Comparison,
656
+ FunctionCallWrapper,
657
+ Parenthetical,
658
+ MagicConstants,
659
+ WindowItem,
660
+ AggregateWrapper,
661
+ TupleWrapper,
662
+ FilterItem,
663
+ ]
664
+ operator: ComparisonOperator
665
+
666
+ @field_validator("left", mode="before")
667
+ @classmethod
668
+ def left_validator(cls, v, info: ValidationInfo):
669
+ if isinstance(v, Concept):
670
+ return v.reference
671
+ return v
672
+
673
+ @field_validator("right", mode="before")
674
+ @classmethod
675
+ def right_validator(cls, v, info: ValidationInfo):
676
+ if isinstance(v, Concept):
677
+ return v.reference
678
+ return v
679
+
680
+ @model_validator(mode="after")
681
+ def validate_comparison(self):
682
+ left_type = arg_to_datatype(self.left)
683
+ right_type = arg_to_datatype(self.right)
684
+ left_name = (
685
+ left_type.name if isinstance(left_type, DataType) else str(left_type)
686
+ )
687
+ right_name = (
688
+ right_type.name if isinstance(right_type, DataType) else str(right_type)
689
+ )
690
+ if self.operator in (ComparisonOperator.IS, ComparisonOperator.IS_NOT):
691
+ if self.right != MagicConstants.NULL and DataType.BOOL != right_type:
692
+ raise SyntaxError(
693
+ f"Cannot use {self.operator.value} with non-null or boolean value {self.right}"
694
+ )
695
+ elif self.operator in (ComparisonOperator.IN, ComparisonOperator.NOT_IN):
696
+
697
+ if isinstance(right_type, ArrayType) and not is_compatible_datatype(
698
+ left_type, right_type.value_data_type
699
+ ):
700
+ raise SyntaxError(
701
+ f"Cannot compare {left_type} and {right_type} with operator {self.operator} in {str(self)}"
702
+ )
703
+ elif isinstance(self.right, Concept) and not is_compatible_datatype(
704
+ left_type, right_type
705
+ ):
706
+ raise SyntaxError(
707
+ f"Cannot compare {left_name} and {right_name} with operator {self.operator} in {str(self)}"
708
+ )
709
+ else:
710
+ if not is_compatible_datatype(left_type, right_type):
711
+ raise SyntaxError(
712
+ f"Cannot compare {left_name} ({self.left}) and {right_name} ({self.right}) of different types with operator {self.operator.value} in {str(self)}"
713
+ )
714
+
715
+ return self
716
+
717
+ def __add__(self, other):
718
+ if other is None:
719
+ return self
720
+ if not isinstance(other, (Comparison, Conditional, Parenthetical)):
721
+ raise ValueError("Cannot add Comparison to non-Comparison")
722
+ if other == self:
723
+ return self
724
+ return Conditional(left=self, right=other, operator=BooleanOperator.AND)
725
+
726
+ def __repr__(self):
727
+ if isinstance(self.left, Concept):
728
+ left = self.left.address
729
+ else:
730
+ left = str(self.left)
731
+ if isinstance(self.right, Concept):
732
+ right = self.right.address
733
+ else:
734
+ right = str(self.right)
735
+ return f"{left} {self.operator.value} {right}"
736
+
737
+ def __str__(self):
738
+ return self.__repr__()
739
+
740
+ def __eq__(self, other):
741
+ if not isinstance(other, Comparison):
742
+ return False
743
+ return (
744
+ self.left == other.left
745
+ and self.right == other.right
746
+ and self.operator == other.operator
747
+ )
748
+
749
+ def with_merge(self, source: Concept, target: Concept, modifiers: List[Modifier]):
750
+ return self.__class__.model_construct(
751
+ left=(
752
+ self.left.with_merge(source, target, modifiers)
753
+ if isinstance(self.left, Mergeable)
754
+ else self.left
755
+ ),
756
+ right=(
757
+ self.right.with_merge(source, target, modifiers)
758
+ if isinstance(self.right, Mergeable)
759
+ else self.right
760
+ ),
761
+ operator=self.operator,
762
+ )
763
+
764
+ def with_reference_replacement(self, source, target):
765
+ return self.__class__.model_construct(
766
+ left=(
767
+ self.left.with_reference_replacement(source, target)
768
+ if isinstance(self.left, Mergeable)
769
+ else self.left
770
+ ),
771
+ right=(
772
+ self.right.with_reference_replacement(source, target)
773
+ if isinstance(self.right, Mergeable)
774
+ else self.right
775
+ ),
776
+ operator=self.operator,
777
+ )
778
+
779
+ def with_namespace(self, namespace: str):
780
+ return self.__class__.model_construct(
781
+ left=(
782
+ self.left.with_namespace(namespace)
783
+ if isinstance(self.left, Namespaced)
784
+ else self.left
785
+ ),
786
+ right=(
787
+ self.right.with_namespace(namespace)
788
+ if isinstance(self.right, Namespaced)
789
+ else self.right
790
+ ),
791
+ operator=self.operator,
792
+ )
793
+
794
+ @property
795
+ def concept_arguments(self) -> List[ConceptRef]:
796
+ """Return concepts directly referenced in where clause"""
797
+ output = []
798
+ output += get_concept_arguments(self.left)
799
+ output += get_concept_arguments(self.right)
800
+ return output
801
+
802
+ @property
803
+ def row_arguments(self) -> List[ConceptRef]:
804
+ output = []
805
+ output += get_concept_row_arguments(self.left)
806
+ output += get_concept_row_arguments(self.right)
807
+ return output
808
+
809
+ @property
810
+ def existence_arguments(self) -> List[Tuple[ConceptRef, ...]]:
811
+ """Return concepts directly referenced in where clause"""
812
+ output: List[Tuple[ConceptRef, ...]] = []
813
+ if isinstance(self.left, ConceptArgs):
814
+ output += self.left.existence_arguments
815
+ if isinstance(self.right, ConceptArgs):
816
+ output += self.right.existence_arguments
817
+ return output
818
+
819
+ @property
820
+ def output_datatype(self):
821
+ # a conditional is always a boolean
822
+ return DataType.BOOL
823
+
824
+
825
+ class SubselectComparison(Comparison):
826
+ def __eq__(self, other):
827
+ if not isinstance(other, SubselectComparison):
828
+ return False
829
+
830
+ comp = (
831
+ self.left == other.left
832
+ and self.right == other.right
833
+ and self.operator == other.operator
834
+ )
835
+ return comp
836
+
837
+ @property
838
+ def row_arguments(self) -> List[ConceptRef]:
839
+ return get_concept_row_arguments(self.left)
840
+
841
+ @property
842
+ def existence_arguments(self) -> list[tuple["ConceptRef", ...]]:
843
+ return [tuple(get_concept_arguments(self.right))]
844
+
845
+
846
+ class Concept(Addressable, DataTyped, ConceptArgs, Mergeable, Namespaced, BaseModel):
847
+ model_config = ConfigDict(
848
+ extra="forbid",
849
+ )
850
+ name: str
851
+ datatype: DataType | TraitDataType | ArrayType | StructType | MapType | NumericType
852
+ purpose: Purpose
853
+ derivation: Derivation = Derivation.ROOT
854
+ granularity: Granularity = Granularity.MULTI_ROW
855
+ metadata: Metadata = Field(
856
+ default_factory=lambda: Metadata(description=None, line_number=None),
857
+ validate_default=True,
858
+ )
859
+ lineage: Optional[
860
+ Union[
861
+ Function,
862
+ WindowItem,
863
+ FilterItem,
864
+ AggregateWrapper,
865
+ RowsetItem,
866
+ MultiSelectLineage,
867
+ Comparison,
868
+ ]
869
+ ] = None
870
+ namespace: str = Field(default=DEFAULT_NAMESPACE, validate_default=True)
871
+ keys: Optional[set[str]] = None
872
+ grain: "Grain" = Field(default=None, validate_default=True) # type: ignore
873
+ modifiers: List[Modifier] = Field(default_factory=list) # type: ignore
874
+ pseudonyms: set[str] = Field(default_factory=set)
875
+
876
+ def duplicate(self) -> Concept:
877
+ return self.model_copy(deep=True)
878
+
879
+ def __hash__(self):
880
+ return hash(
881
+ f"{self.name}+{self.datatype}+ {self.purpose} + {str(self.lineage)} + {self.namespace} + {str(self.grain)} + {str(self.keys)}"
882
+ )
883
+
884
+ def __repr__(self):
885
+ base = f"{self.address}@{self.grain}"
886
+ return base
887
+
888
+ @property
889
+ def is_internal(self) -> bool:
890
+ return self.namespace.startswith("_") or self.name.startswith("_")
891
+
892
+ @property
893
+ def reference(self) -> ConceptRef:
894
+ return ConceptRef.model_construct(
895
+ address=self.address,
896
+ datatype=self.output_datatype,
897
+ metadata=self.metadata,
898
+ )
899
+
900
+ @property
901
+ def output_datatype(self):
902
+ return self.datatype
903
+
904
+ @classmethod
905
+ def calculate_is_aggregate(cls, lineage):
906
+ if lineage and isinstance(lineage, Function):
907
+ if lineage.operator in FunctionClass.AGGREGATE_FUNCTIONS.value:
908
+ return True
909
+ if (
910
+ lineage
911
+ and isinstance(lineage, AggregateWrapper)
912
+ and lineage.function.operator in FunctionClass.AGGREGATE_FUNCTIONS.value
913
+ ):
914
+ return True
915
+ return False
916
+
917
+ @cached_property
918
+ def is_aggregate(self):
919
+ return self.calculate_is_aggregate(self.lineage)
920
+
921
+ def with_merge(self, source: Self, target: Self, modifiers: List[Modifier]) -> Self:
922
+ if self.address == source.address:
923
+ new = target.with_grain(self.grain.with_merge(source, target, modifiers))
924
+ new.pseudonyms.add(self.address)
925
+ return new
926
+ if not self.grain.components and not self.lineage and not self.keys:
927
+ return self
928
+ return self.__class__.model_construct(
929
+ name=self.name,
930
+ datatype=self.datatype,
931
+ purpose=self.purpose,
932
+ metadata=self.metadata,
933
+ derivation=self.derivation,
934
+ granularity=self.granularity,
935
+ lineage=(
936
+ self.lineage.with_merge(source, target, modifiers)
937
+ if self.lineage
938
+ else None
939
+ ),
940
+ grain=self.grain.with_merge(source, target, modifiers),
941
+ namespace=self.namespace,
942
+ keys=(
943
+ set(x if x != source.address else target.address for x in self.keys)
944
+ if self.keys
945
+ else None
946
+ ),
947
+ modifiers=self.modifiers,
948
+ pseudonyms=self.pseudonyms,
949
+ )
950
+
951
+ @field_validator("namespace", mode="plain")
952
+ @classmethod
953
+ def namespace_validation(cls, v):
954
+ return v or DEFAULT_NAMESPACE
955
+
956
+ @field_validator("metadata", mode="before")
957
+ @classmethod
958
+ def metadata_validation(cls, v):
959
+ v = v or Metadata()
960
+ return v
961
+
962
+ @field_validator("purpose", mode="after")
963
+ @classmethod
964
+ def purpose_validation(cls, v):
965
+ if v == Purpose.AUTO:
966
+ raise ValueError("Cannot set purpose to AUTO")
967
+ return v
968
+
969
+ @field_validator("grain", mode="before")
970
+ @classmethod
971
+ def parse_grain(cls, v, info: ValidationInfo) -> Grain:
972
+
973
+ # this is silly - rethink how we do grains
974
+ values = info.data
975
+
976
+ if not v and values.get("purpose", None) == Purpose.KEY:
977
+ v = Grain(
978
+ components={
979
+ f'{values.get("namespace", DEFAULT_NAMESPACE)}.{values["name"]}'
980
+ }
981
+ )
982
+ elif not v and values.get("purpose", None) == Purpose.PROPERTY:
983
+ v = Grain(components=values.get("keys", set()) or set())
984
+ elif (
985
+ "lineage" in values
986
+ and isinstance(values["lineage"], AggregateWrapper)
987
+ and values["lineage"].by
988
+ ):
989
+ v = Grain(components={c.address for c in values["lineage"].by})
990
+ elif not v:
991
+ v = Grain(components=set())
992
+ elif isinstance(v, Grain):
993
+ pass
994
+ elif isinstance(v, Concept):
995
+ v = Grain(components={v.address})
996
+ elif isinstance(v, dict):
997
+ v = Grain.model_validate(v)
998
+ else:
999
+ raise SyntaxError(f"Invalid grain {v} for concept {values['name']}")
1000
+ return v
1001
+
1002
+ def __eq__(self, other: object):
1003
+ if isinstance(other, str):
1004
+ if self.address == other:
1005
+ return True
1006
+ if isinstance(other, ConceptRef):
1007
+ return self.address == other.address
1008
+ if not isinstance(other, Concept):
1009
+ return False
1010
+ return (
1011
+ self.name == other.name
1012
+ and self.datatype == other.datatype
1013
+ and self.purpose == other.purpose
1014
+ and self.namespace == other.namespace
1015
+ and self.grain == other.grain
1016
+ and self.derivation == other.derivation
1017
+ and self.granularity == other.granularity
1018
+ # and self.keys == other.keys
1019
+ )
1020
+
1021
+ def __str__(self):
1022
+ grain = str(self.grain) if self.grain else "Grain<>"
1023
+ return f"{self.namespace}.{self.name}@{grain}"
1024
+
1025
+ @property
1026
+ def address(self) -> str:
1027
+ return f"{self.namespace}.{self.name}"
1028
+
1029
+ @property
1030
+ def output(self) -> "Concept":
1031
+ return self
1032
+
1033
+ @property
1034
+ def safe_address(self) -> str:
1035
+ if self.namespace == DEFAULT_NAMESPACE:
1036
+ return self.name.replace(".", "_")
1037
+ elif self.namespace:
1038
+ return f"{self.namespace.replace('.','_')}_{self.name.replace('.','_')}"
1039
+ return self.name.replace(".", "_")
1040
+
1041
+ def with_namespace(self, namespace: str) -> Self:
1042
+ return self.__class__.model_construct(
1043
+ name=self.name,
1044
+ datatype=self.datatype,
1045
+ purpose=self.purpose,
1046
+ granularity=self.granularity,
1047
+ derivation=self.derivation,
1048
+ metadata=self.metadata,
1049
+ lineage=self.lineage.with_namespace(namespace) if self.lineage else None,
1050
+ grain=(
1051
+ self.grain.with_namespace(namespace)
1052
+ if self.grain
1053
+ else Grain(components=set())
1054
+ ),
1055
+ namespace=(
1056
+ namespace + "." + self.namespace
1057
+ if self.namespace != DEFAULT_NAMESPACE
1058
+ else namespace
1059
+ ),
1060
+ keys=(
1061
+ set([address_with_namespace(x, namespace) for x in self.keys])
1062
+ if self.keys
1063
+ else None
1064
+ ),
1065
+ modifiers=self.modifiers,
1066
+ pseudonyms={address_with_namespace(v, namespace) for v in self.pseudonyms},
1067
+ )
1068
+
1069
+ def get_select_grain_and_keys(
1070
+ self, grain: Grain, environment: Environment
1071
+ ) -> Tuple[
1072
+ Function
1073
+ | WindowItem
1074
+ | FilterItem
1075
+ | AggregateWrapper
1076
+ | RowsetItem
1077
+ | MultiSelectLineage
1078
+ | Comparison
1079
+ | None,
1080
+ Grain,
1081
+ set[str] | None,
1082
+ ]:
1083
+ new_lineage = self.lineage
1084
+ final_grain = grain if not self.grain.components else self.grain
1085
+ keys = self.keys
1086
+ if not new_lineage:
1087
+ return new_lineage, final_grain, keys
1088
+
1089
+ if grain.components and isinstance(new_lineage, Function) and self.is_aggregate:
1090
+ grain_components: list[ConceptRef | Concept] = [
1091
+ environment.concepts[c].reference for c in grain.components
1092
+ ]
1093
+ new_lineage = AggregateWrapper.model_construct(
1094
+ function=new_lineage, by=grain_components
1095
+ )
1096
+ final_grain = grain
1097
+ keys = set(grain.components)
1098
+ elif isinstance(new_lineage, AggregateWrapper) and not new_lineage.by:
1099
+ grain_components = [
1100
+ environment.concepts[c].reference for c in grain.components
1101
+ ]
1102
+ new_lineage = AggregateWrapper.model_construct(
1103
+ function=new_lineage.function, by=grain_components
1104
+ )
1105
+ final_grain = grain
1106
+ keys = set([x.address for x in new_lineage.by])
1107
+ elif self.derivation == Derivation.BASIC:
1108
+
1109
+ pkeys: set[str] = set()
1110
+ assert new_lineage
1111
+ for x_ref in new_lineage.concept_arguments:
1112
+ x = environment.concepts[x_ref.address]
1113
+ if isinstance(x, UndefinedConcept):
1114
+ continue
1115
+ _, _, parent_keys = x.get_select_grain_and_keys(grain, environment)
1116
+ if parent_keys:
1117
+ pkeys.update(parent_keys)
1118
+ raw_keys = pkeys
1119
+ # deduplicate
1120
+
1121
+ final_grain = Grain.from_concepts(raw_keys, environment)
1122
+ keys = final_grain.components
1123
+ return new_lineage, final_grain, keys
1124
+
1125
+ def set_select_grain(self, grain: Grain, environment: Environment) -> Self:
1126
+ """Assign a mutable concept the appropriate grain/keys for a select"""
1127
+ new_lineage, final_grain, keys = self.get_select_grain_and_keys(
1128
+ grain, environment
1129
+ )
1130
+ return self.__class__.model_construct(
1131
+ name=self.name,
1132
+ datatype=self.datatype,
1133
+ purpose=self.purpose,
1134
+ granularity=self.granularity,
1135
+ derivation=self.derivation,
1136
+ metadata=self.metadata,
1137
+ lineage=new_lineage,
1138
+ grain=final_grain,
1139
+ namespace=self.namespace,
1140
+ keys=keys,
1141
+ modifiers=self.modifiers,
1142
+ pseudonyms=self.pseudonyms,
1143
+ )
1144
+
1145
+ def with_grain(self, grain: Optional["Grain"] = None) -> Self:
1146
+
1147
+ return self.__class__.model_construct(
1148
+ name=self.name,
1149
+ datatype=self.datatype,
1150
+ purpose=self.purpose,
1151
+ metadata=self.metadata,
1152
+ granularity=self.granularity,
1153
+ derivation=self.derivation,
1154
+ lineage=self.lineage,
1155
+ grain=grain if grain else Grain.model_construct(components=set()),
1156
+ namespace=self.namespace,
1157
+ keys=self.keys,
1158
+ modifiers=self.modifiers,
1159
+ pseudonyms=self.pseudonyms,
1160
+ )
1161
+
1162
+ @cached_property
1163
+ def sources(self) -> List["ConceptRef"]:
1164
+ if self.lineage:
1165
+ output: List[ConceptRef] = []
1166
+
1167
+ def get_sources(
1168
+ expr: Union[
1169
+ Function,
1170
+ WindowItem,
1171
+ FilterItem,
1172
+ AggregateWrapper,
1173
+ RowsetItem,
1174
+ MultiSelectLineage,
1175
+ Comparison,
1176
+ ],
1177
+ output: List[ConceptRef],
1178
+ ):
1179
+
1180
+ for item in expr.concept_arguments:
1181
+ if isinstance(item, (ConceptRef,)):
1182
+ if item.address == self.address:
1183
+ raise SyntaxError(
1184
+ f"Concept {self.address} references itself"
1185
+ )
1186
+ output.append(item)
1187
+
1188
+ # output += item.sources
1189
+
1190
+ get_sources(self.lineage, output)
1191
+ return output
1192
+ return []
1193
+
1194
+ @property
1195
+ def concept_arguments(self) -> List[ConceptRef]:
1196
+ return self.lineage.concept_arguments if self.lineage else []
1197
+
1198
+ @classmethod
1199
+ def calculate_derivation(self, lineage, purpose: Purpose) -> Derivation:
1200
+ from trilogy.core.models.build import (
1201
+ BuildAggregateWrapper,
1202
+ BuildComparison,
1203
+ BuildFilterItem,
1204
+ BuildFunction,
1205
+ BuildMultiSelectLineage,
1206
+ BuildRowsetItem,
1207
+ BuildWindowItem,
1208
+ )
1209
+
1210
+ if lineage and isinstance(lineage, (BuildWindowItem, WindowItem)):
1211
+ return Derivation.WINDOW
1212
+ elif lineage and isinstance(lineage, (BuildFilterItem, FilterItem)):
1213
+ return Derivation.FILTER
1214
+ elif lineage and isinstance(lineage, (BuildAggregateWrapper, AggregateWrapper)):
1215
+ return Derivation.AGGREGATE
1216
+ # elif lineage and isinstance(lineage, (BuildParenthetical, Parenthetical)):
1217
+ # return Derivation.PARENTHETICAL
1218
+ elif lineage and isinstance(lineage, (BuildRowsetItem, RowsetItem)):
1219
+ return Derivation.ROWSET
1220
+ elif lineage and isinstance(lineage, BuildComparison):
1221
+ return Derivation.BASIC
1222
+ elif lineage and isinstance(
1223
+ lineage, (BuildMultiSelectLineage, MultiSelectLineage)
1224
+ ):
1225
+ return Derivation.MULTISELECT
1226
+ elif (
1227
+ lineage
1228
+ and isinstance(lineage, (BuildFunction, Function))
1229
+ and lineage.operator in FunctionClass.AGGREGATE_FUNCTIONS.value
1230
+ ):
1231
+ return Derivation.AGGREGATE
1232
+ elif (
1233
+ lineage
1234
+ and isinstance(lineage, (BuildFunction, Function))
1235
+ and lineage.operator in FunctionClass.ONE_TO_MANY.value
1236
+ ):
1237
+ return Derivation.UNNEST
1238
+ elif (
1239
+ lineage
1240
+ and isinstance(lineage, (BuildFunction, Function))
1241
+ and lineage.operator == FunctionType.RECURSE_EDGE
1242
+ ):
1243
+ return Derivation.RECURSIVE
1244
+ elif (
1245
+ lineage
1246
+ and isinstance(lineage, (BuildFunction, Function))
1247
+ and lineage.operator == FunctionType.UNION
1248
+ ):
1249
+ return Derivation.UNION
1250
+ elif (
1251
+ lineage
1252
+ and isinstance(lineage, (BuildFunction, Function))
1253
+ and lineage.operator == FunctionType.GROUP
1254
+ ):
1255
+ return Derivation.GROUP_TO
1256
+ elif (
1257
+ lineage
1258
+ and isinstance(lineage, (BuildFunction, Function))
1259
+ and lineage.operator == FunctionType.ALIAS
1260
+ ):
1261
+ return Derivation.BASIC
1262
+ elif (
1263
+ lineage
1264
+ and isinstance(lineage, (BuildFunction, Function))
1265
+ and lineage.operator in FunctionClass.SINGLE_ROW.value
1266
+ ):
1267
+ return Derivation.CONSTANT
1268
+
1269
+ elif lineage and isinstance(lineage, (BuildFunction, Function)):
1270
+ if not lineage.concept_arguments:
1271
+ return Derivation.CONSTANT
1272
+ elif all(
1273
+ [x.derivation == Derivation.CONSTANT for x in lineage.concept_arguments]
1274
+ ):
1275
+ return Derivation.CONSTANT
1276
+ return Derivation.BASIC
1277
+ elif purpose == Purpose.CONSTANT:
1278
+ return Derivation.CONSTANT
1279
+ return Derivation.ROOT
1280
+
1281
+ @classmethod
1282
+ def calculate_granularity(cls, derivation: Derivation, grain: Grain, lineage):
1283
+ from trilogy.core.models.build import BuildFunction
1284
+
1285
+ if derivation == Derivation.CONSTANT:
1286
+ return Granularity.SINGLE_ROW
1287
+ elif derivation == Derivation.AGGREGATE:
1288
+ if all([x.endswith(ALL_ROWS_CONCEPT) for x in grain.components]):
1289
+ return Granularity.SINGLE_ROW
1290
+ elif (
1291
+ lineage
1292
+ and isinstance(lineage, (Function, BuildFunction))
1293
+ and lineage.operator
1294
+ in (FunctionType.UNNEST, FunctionType.UNION, FunctionType.DATE_SPINE)
1295
+ ):
1296
+ return Granularity.MULTI_ROW
1297
+ elif lineage and all(
1298
+ [x.granularity == Granularity.SINGLE_ROW for x in lineage.concept_arguments]
1299
+ ):
1300
+ return Granularity.SINGLE_ROW
1301
+ return Granularity.MULTI_ROW
1302
+
1303
+ # @property
1304
+ # def granularity(self) -> Granularity:
1305
+ # return self.calculate_granularity(self.derivation, self.grain, self.lineage)
1306
+
1307
+ def with_filter(
1308
+ self,
1309
+ condition: Conditional | Comparison | Parenthetical,
1310
+ environment: Environment | None = None,
1311
+ ) -> "Concept":
1312
+ from trilogy.utility import string_to_hash
1313
+
1314
+ if self.lineage and isinstance(self.lineage, FilterItem):
1315
+ if self.lineage.where.conditional == condition:
1316
+ return self
1317
+ hash = string_to_hash(self.name + str(condition))
1318
+ new_lineage = FilterItem(
1319
+ content=self.reference, where=WhereClause(conditional=condition)
1320
+ )
1321
+ new = Concept.model_construct(
1322
+ name=f"{self.name}_filter_{hash}",
1323
+ datatype=self.datatype,
1324
+ purpose=self.purpose,
1325
+ derivation=self.calculate_derivation(new_lineage, self.purpose),
1326
+ granularity=self.granularity,
1327
+ metadata=self.metadata,
1328
+ lineage=new_lineage,
1329
+ keys=(self.keys if self.purpose == Purpose.PROPERTY else None),
1330
+ grain=self.grain if self.grain else Grain(components=set()),
1331
+ namespace=self.namespace,
1332
+ modifiers=self.modifiers,
1333
+ pseudonyms=self.pseudonyms,
1334
+ )
1335
+ if environment:
1336
+ environment.add_concept(new)
1337
+ return new
1338
+
1339
+
1340
+ class UndefinedConceptFull(Concept, Mergeable, Namespaced):
1341
+ model_config = ConfigDict(arbitrary_types_allowed=True)
1342
+ name: str
1343
+ line_no: int | None = None
1344
+ datatype: (
1345
+ DataType | TraitDataType | ArrayType | StructType | MapType | NumericType
1346
+ ) = DataType.UNKNOWN
1347
+ purpose: Purpose = Purpose.UNKNOWN
1348
+
1349
+ @property
1350
+ def reference(self) -> UndefinedConcept:
1351
+ return UndefinedConcept(address=self.address)
1352
+
1353
+
1354
+ class OrderItem(Mergeable, ConceptArgs, Namespaced, BaseModel):
1355
+ # this needs to be a full concept as it may not exist in environment
1356
+ expr: Expr
1357
+ order: Ordering
1358
+
1359
+ @field_validator("expr", mode="before")
1360
+ def enforce_reference(cls, v):
1361
+ if isinstance(v, Concept):
1362
+ return v.reference
1363
+ return v
1364
+
1365
+ def with_namespace(self, namespace: str) -> "OrderItem":
1366
+ return OrderItem.model_construct(
1367
+ expr=(
1368
+ self.expr.with_namespace(namespace)
1369
+ if isinstance(self.expr, Namespaced)
1370
+ else self.expr
1371
+ ),
1372
+ order=self.order,
1373
+ )
1374
+
1375
+ def with_merge(
1376
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
1377
+ ) -> "OrderItem":
1378
+ return OrderItem.model_construct(
1379
+ expr=(
1380
+ self.expr.with_merge(source, target, modifiers)
1381
+ if isinstance(self.expr, Mergeable)
1382
+ else self.expr
1383
+ ),
1384
+ order=self.order,
1385
+ )
1386
+
1387
+ def with_reference_replacement(self, source, target):
1388
+ return OrderItem.model_construct(
1389
+ expr=(
1390
+ self.expr.with_reference_replacement(source, target)
1391
+ if isinstance(self.expr, Mergeable)
1392
+ else self.expr
1393
+ ),
1394
+ order=self.order,
1395
+ )
1396
+
1397
+ @property
1398
+ def concept_arguments(self) -> Sequence[ConceptRef]:
1399
+ return get_concept_arguments(self.expr)
1400
+
1401
+ @property
1402
+ def row_arguments(self) -> Sequence[ConceptRef]:
1403
+ if isinstance(self.expr, ConceptArgs):
1404
+ return self.expr.row_arguments
1405
+ return self.concept_arguments
1406
+
1407
+ @property
1408
+ def existence_arguments(self) -> Sequence[tuple["ConceptRef", ...]]:
1409
+ if isinstance(self.expr, ConceptArgs):
1410
+ return self.expr.existence_arguments
1411
+ return []
1412
+
1413
+ @property
1414
+ def output_datatype(self):
1415
+ return arg_to_datatype(self.expr)
1416
+
1417
+
1418
+ class WindowItem(DataTyped, ConceptArgs, Mergeable, Namespaced, BaseModel):
1419
+ type: WindowType
1420
+ content: FuncArgs
1421
+ order_by: List["OrderItem"]
1422
+ over: List["ConceptRef"] = Field(default_factory=list)
1423
+ index: Optional[int] = None
1424
+
1425
+ def __str__(self):
1426
+ return self.__repr__()
1427
+
1428
+ def __repr__(self):
1429
+ return f"{self.type.value} {self.content} by {self.index} over {self.over} order {self.order_by}"
1430
+
1431
+ @field_validator("content", mode="before")
1432
+ def enforce_concept_ref(cls, v):
1433
+ if isinstance(v, Concept):
1434
+ return ConceptRef(address=v.address, datatype=v.datatype)
1435
+ return v
1436
+
1437
+ @field_validator("over", mode="before")
1438
+ def enforce_concept_ref_over(cls, v):
1439
+ final = []
1440
+ for item in v:
1441
+ if isinstance(item, Concept):
1442
+ final.append(ConceptRef(address=item.address, datatype=item.datatype))
1443
+ else:
1444
+ final.append(item)
1445
+ return final
1446
+
1447
+ def with_merge(
1448
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
1449
+ ) -> "WindowItem":
1450
+ output = WindowItem.model_construct(
1451
+ type=self.type,
1452
+ content=(
1453
+ self.content.with_merge(source, target, modifiers)
1454
+ if isinstance(self.content, Mergeable)
1455
+ else self.content
1456
+ ),
1457
+ over=[x.with_merge(source, target, modifiers) for x in self.over],
1458
+ order_by=[x.with_merge(source, target, modifiers) for x in self.order_by],
1459
+ index=self.index,
1460
+ )
1461
+ return output
1462
+
1463
+ def with_reference_replacement(self, source, target):
1464
+ return WindowItem.model_construct(
1465
+ type=self.type,
1466
+ content=self.content.with_reference_replacement(source, target),
1467
+ over=[x.with_reference_replacement(source, target) for x in self.over],
1468
+ order_by=[
1469
+ x.with_reference_replacement(source, target) for x in self.order_by
1470
+ ],
1471
+ index=self.index,
1472
+ )
1473
+
1474
+ def with_namespace(self, namespace: str) -> "WindowItem":
1475
+ return WindowItem.model_construct(
1476
+ type=self.type,
1477
+ content=(
1478
+ self.content.with_namespace(namespace)
1479
+ if isinstance(self.content, Namespaced)
1480
+ else self.content
1481
+ ),
1482
+ over=[x.with_namespace(namespace) for x in self.over],
1483
+ order_by=[x.with_namespace(namespace) for x in self.order_by],
1484
+ index=self.index,
1485
+ )
1486
+
1487
+ @property
1488
+ def concept_arguments(self) -> List[ConceptRef]:
1489
+ output = []
1490
+ output += get_concept_arguments(self.content)
1491
+ for order in self.order_by:
1492
+ output += get_concept_arguments(order)
1493
+ for item in self.over:
1494
+ output += get_concept_arguments(item)
1495
+ return output
1496
+
1497
+ @property
1498
+ def output_datatype(self):
1499
+ if self.type in (WindowType.RANK, WindowType.ROW_NUMBER):
1500
+ return DataType.INTEGER
1501
+ return self.content.output_datatype
1502
+
1503
+
1504
+ def get_basic_type(
1505
+ type: DataType | ArrayType | StructType | MapType | NumericType | TraitDataType,
1506
+ ) -> DataType:
1507
+ if isinstance(type, ArrayType):
1508
+ return DataType.ARRAY
1509
+ if isinstance(type, StructType):
1510
+ return DataType.STRUCT
1511
+ if isinstance(type, MapType):
1512
+ return DataType.MAP
1513
+ if isinstance(type, NumericType):
1514
+ return DataType.NUMERIC
1515
+ if isinstance(type, TraitDataType):
1516
+ return get_basic_type(type.type)
1517
+ return type
1518
+
1519
+
1520
+ class CaseWhen(Namespaced, DataTyped, ConceptArgs, Mergeable, BaseModel):
1521
+ comparison: Conditional | SubselectComparison | Comparison
1522
+ expr: "Expr"
1523
+
1524
+ @field_validator("expr", mode="before")
1525
+ def enforce_reference(cls, v):
1526
+ if isinstance(v, Concept):
1527
+ return v.reference
1528
+ return v
1529
+
1530
+ @property
1531
+ def output_datatype(self):
1532
+ return arg_to_datatype(self.expr)
1533
+
1534
+ def __str__(self):
1535
+ return self.__repr__()
1536
+
1537
+ def __repr__(self):
1538
+ return f"WHEN {str(self.comparison)} THEN {str(self.expr)}"
1539
+
1540
+ @property
1541
+ def concept_arguments(self):
1542
+ return get_concept_arguments(self.comparison) + get_concept_arguments(self.expr)
1543
+
1544
+ @property
1545
+ def concept_row_arguments(self):
1546
+ return get_concept_row_arguments(self.comparison) + get_concept_row_arguments(
1547
+ self.expr
1548
+ )
1549
+
1550
+ def with_namespace(self, namespace: str) -> CaseWhen:
1551
+ return CaseWhen.model_construct(
1552
+ comparison=self.comparison.with_namespace(namespace),
1553
+ expr=(
1554
+ self.expr.with_namespace(namespace)
1555
+ if isinstance(
1556
+ self.expr,
1557
+ Namespaced,
1558
+ )
1559
+ else self.expr
1560
+ ),
1561
+ )
1562
+
1563
+ def with_merge(
1564
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
1565
+ ) -> CaseWhen:
1566
+ return CaseWhen.model_construct(
1567
+ comparison=self.comparison.with_merge(source, target, modifiers),
1568
+ expr=(
1569
+ self.expr.with_merge(source, target, modifiers)
1570
+ if isinstance(self.expr, Mergeable)
1571
+ else self.expr
1572
+ ),
1573
+ )
1574
+
1575
+ def with_reference_replacement(self, source, target):
1576
+ return CaseWhen.model_construct(
1577
+ comparison=self.comparison.with_reference_replacement(source, target),
1578
+ expr=(
1579
+ self.expr.with_reference_replacement(source, target)
1580
+ if isinstance(self.expr, Mergeable)
1581
+ else self.expr
1582
+ ),
1583
+ )
1584
+
1585
+
1586
+ class CaseElse(Namespaced, ConceptArgs, DataTyped, Mergeable, BaseModel):
1587
+ expr: "Expr"
1588
+ # this ensures that it's easily differentiable from CaseWhen
1589
+ discriminant: ComparisonOperator = ComparisonOperator.ELSE
1590
+
1591
+ def __str__(self):
1592
+ return self.__repr__()
1593
+
1594
+ def __repr__(self):
1595
+ return f"ELSE {str(self.expr)}"
1596
+
1597
+ @property
1598
+ def output_datatype(self):
1599
+ return arg_to_datatype(self.expr)
1600
+
1601
+ @field_validator("expr", mode="before")
1602
+ def enforce_expr(cls, v):
1603
+ if isinstance(v, Concept):
1604
+ return ConceptRef(address=v.address, datatype=v.datatype)
1605
+ return v
1606
+
1607
+ @property
1608
+ def concept_arguments(self):
1609
+ return get_concept_arguments(self.expr)
1610
+
1611
+ def with_merge(
1612
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
1613
+ ) -> CaseElse:
1614
+ return CaseElse.model_construct(
1615
+ discriminant=self.discriminant,
1616
+ expr=(
1617
+ self.expr.with_merge(source, target, modifiers)
1618
+ if isinstance(self.expr, Mergeable)
1619
+ else self.expr
1620
+ ),
1621
+ )
1622
+
1623
+ def with_reference_replacement(self, source, target):
1624
+ return CaseElse.model_construct(
1625
+ discriminant=self.discriminant,
1626
+ expr=(
1627
+ self.expr.with_reference_replacement(
1628
+ source,
1629
+ target,
1630
+ )
1631
+ if isinstance(self.expr, Mergeable)
1632
+ else self.expr
1633
+ ),
1634
+ )
1635
+
1636
+ def with_namespace(self, namespace: str) -> CaseElse:
1637
+ return CaseElse.model_construct(
1638
+ discriminant=self.discriminant,
1639
+ expr=(
1640
+ self.expr.with_namespace(namespace)
1641
+ if isinstance(
1642
+ self.expr,
1643
+ Namespaced,
1644
+ )
1645
+ else self.expr
1646
+ ),
1647
+ )
1648
+
1649
+
1650
+ def get_concept_row_arguments(expr) -> List["ConceptRef"]:
1651
+ output = []
1652
+ if isinstance(expr, ConceptRef):
1653
+ output += [expr]
1654
+
1655
+ elif isinstance(expr, ConceptArgs):
1656
+ output += expr.row_arguments
1657
+ return output
1658
+
1659
+
1660
+ def get_concept_arguments(expr) -> List["ConceptRef"]:
1661
+ output = []
1662
+ if isinstance(expr, ConceptRef):
1663
+ output += [expr]
1664
+
1665
+ elif isinstance(
1666
+ expr,
1667
+ ConceptArgs,
1668
+ ):
1669
+ output += expr.concept_arguments
1670
+ return output
1671
+
1672
+
1673
+ def args_to_pretty(input: set[DataType]) -> str:
1674
+ return ", ".join(sorted([f"'{x.value}'" for x in input if x != DataType.UNKNOWN]))
1675
+
1676
+
1677
+ class Function(DataTyped, ConceptArgs, Mergeable, Namespaced, BaseModel):
1678
+ operator: FunctionType
1679
+ arg_count: int = Field(default=1)
1680
+ output_datatype: (
1681
+ DataType | ArrayType | StructType | MapType | NumericType | TraitDataType
1682
+ )
1683
+ output_purpose: Purpose
1684
+ valid_inputs: Optional[
1685
+ Union[
1686
+ Set[DataType],
1687
+ List[Set[DataType]],
1688
+ ]
1689
+ ] = None
1690
+ arguments: Sequence[FuncArgs]
1691
+
1692
+ class Config:
1693
+ frozen = True
1694
+
1695
+ def __repr__(self):
1696
+ return f'{self.operator.value}({",".join([str(a) for a in self.arguments])})'
1697
+
1698
+ def __str__(self):
1699
+ return self.__repr__()
1700
+
1701
+ @property
1702
+ def datatype(self):
1703
+ return self.output_datatype
1704
+
1705
+ @field_validator("arguments", mode="before")
1706
+ @classmethod
1707
+ def parse_arguments(cls, v, info: ValidationInfo):
1708
+ from trilogy.core.models.build import BuildConcept
1709
+ from trilogy.parsing.exceptions import ParseError
1710
+
1711
+ values = info.data
1712
+ arg_count = len(v)
1713
+ final = []
1714
+ for x in v:
1715
+ if isinstance(x, Concept) and not isinstance(x, BuildConcept):
1716
+ final.append(x.reference)
1717
+ else:
1718
+ final.append(x)
1719
+ v = final
1720
+ target_arg_count = values["arg_count"]
1721
+ operator_name = values["operator"].name
1722
+ # surface right error
1723
+ if "valid_inputs" not in values:
1724
+ return v
1725
+ valid_inputs = values["valid_inputs"]
1726
+ if not arg_count <= target_arg_count:
1727
+ if target_arg_count != InfiniteFunctionArgs:
1728
+ raise ParseError(
1729
+ f"Incorrect argument count to {operator_name} function, expects"
1730
+ f" {target_arg_count}, got {arg_count}"
1731
+ )
1732
+ # if all arguments can be any of the set type
1733
+ # turn this into an array for validation
1734
+ if isinstance(valid_inputs, set):
1735
+ valid_inputs = [valid_inputs for _ in v]
1736
+ elif not valid_inputs:
1737
+ return v
1738
+ for idx, arg in enumerate(v):
1739
+ if (
1740
+ isinstance(arg, ConceptRef)
1741
+ and get_basic_type(arg.datatype.data_type) not in valid_inputs[idx]
1742
+ ):
1743
+ if arg.datatype != DataType.UNKNOWN:
1744
+
1745
+ raise TypeError(
1746
+ f"Invalid argument type '{arg.datatype.data_type.value}' passed into {operator_name} function in position {idx+1}"
1747
+ f" from concept: {arg.name}. Valid: {args_to_pretty(valid_inputs[idx])}."
1748
+ )
1749
+ if (
1750
+ isinstance(arg, Function)
1751
+ and get_basic_type(arg.output_datatype) not in valid_inputs[idx]
1752
+ ):
1753
+ if arg.output_datatype != DataType.UNKNOWN:
1754
+ raise TypeError(
1755
+ f"Invalid argument type {arg.output_datatype}' passed into"
1756
+ f" {operator_name} function from function {arg.operator.name} in position {idx+1}. Valid: {args_to_pretty(valid_inputs[idx])}"
1757
+ )
1758
+ # check constants
1759
+ comparisons: List[Tuple[Type, DataType]] = [
1760
+ (str, DataType.STRING),
1761
+ (int, DataType.INTEGER),
1762
+ (float, DataType.FLOAT),
1763
+ (bool, DataType.BOOL),
1764
+ (DatePart, DataType.DATE_PART),
1765
+ ]
1766
+ for ptype, dtype in comparisons:
1767
+ if (
1768
+ isinstance(arg, ptype)
1769
+ and get_basic_type(dtype) in valid_inputs[idx]
1770
+ ):
1771
+ # attempt to exit early to avoid checking all types
1772
+ break
1773
+ elif isinstance(arg, ptype):
1774
+ if isinstance(arg, str) and DataType.DATE_PART in valid_inputs[idx]:
1775
+ if arg not in [x.value for x in DatePart]:
1776
+ pass
1777
+ else:
1778
+ break
1779
+ raise TypeError(
1780
+ f'Invalid {dtype} constant passed into {operator_name} "{arg}", expecting one of {valid_inputs[idx]}'
1781
+ )
1782
+ return v
1783
+
1784
+ def with_reference_replacement(self, source: str, target: Expr | ArgBinding):
1785
+ from trilogy.core.functions import arg_to_datatype, merge_datatypes
1786
+
1787
+ nargs = [
1788
+ (
1789
+ c.with_reference_replacement(
1790
+ source,
1791
+ target,
1792
+ )
1793
+ if isinstance(
1794
+ c,
1795
+ Mergeable,
1796
+ )
1797
+ else c
1798
+ )
1799
+ for c in self.arguments
1800
+ ]
1801
+ if self.output_datatype == DataType.UNKNOWN:
1802
+ new_output = merge_datatypes([arg_to_datatype(x) for x in nargs])
1803
+
1804
+ if self.operator == FunctionType.ATTR_ACCESS:
1805
+ if isinstance(new_output, StructType):
1806
+ new_output = new_output.field_types[str(nargs[1])]
1807
+ else:
1808
+ new_output = self.output_datatype
1809
+ # this is not ideal - see hacky logic for datatypes above
1810
+ # we need to figure out how to patch properly
1811
+ # should use function factory, but does not have environment access
1812
+ # probably move all datatype resolution to build?
1813
+ return Function.model_construct(
1814
+ operator=self.operator,
1815
+ arguments=nargs,
1816
+ output_datatype=new_output,
1817
+ output_purpose=self.output_purpose,
1818
+ valid_inputs=self.valid_inputs,
1819
+ arg_count=self.arg_count,
1820
+ )
1821
+
1822
+ def with_namespace(self, namespace: str) -> "Function":
1823
+ return Function.model_construct(
1824
+ operator=self.operator,
1825
+ arguments=[
1826
+ (
1827
+ c.with_namespace(namespace)
1828
+ if isinstance(
1829
+ c,
1830
+ Namespaced,
1831
+ )
1832
+ else c
1833
+ )
1834
+ for c in self.arguments
1835
+ ],
1836
+ output_datatype=self.output_datatype,
1837
+ output_purpose=self.output_purpose,
1838
+ valid_inputs=self.valid_inputs,
1839
+ arg_count=self.arg_count,
1840
+ )
1841
+
1842
+ def with_merge(
1843
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
1844
+ ) -> "Function":
1845
+ return Function.model_construct(
1846
+ operator=self.operator,
1847
+ arguments=[
1848
+ (
1849
+ c.with_merge(source, target, modifiers)
1850
+ if isinstance(
1851
+ c,
1852
+ Mergeable,
1853
+ )
1854
+ else c
1855
+ )
1856
+ for c in self.arguments
1857
+ ],
1858
+ output_datatype=self.output_datatype,
1859
+ output_purpose=self.output_purpose,
1860
+ valid_inputs=self.valid_inputs,
1861
+ arg_count=self.arg_count,
1862
+ )
1863
+
1864
+ @property
1865
+ def concept_arguments(self) -> List[ConceptRef]:
1866
+ base = []
1867
+ for arg in self.arguments:
1868
+ base += get_concept_arguments(arg)
1869
+ return base
1870
+
1871
+
1872
+ class FunctionCallWrapper(
1873
+ DataTyped,
1874
+ ConceptArgs,
1875
+ Mergeable,
1876
+ Namespaced,
1877
+ BaseModel,
1878
+ ):
1879
+ content: Expr
1880
+ name: str
1881
+ args: List[Expr]
1882
+
1883
+ def __str__(self):
1884
+ return f'@{self.name}({",".join([str(x) for x in self.args])})'
1885
+
1886
+ def with_namespace(self, namespace) -> "FunctionCallWrapper":
1887
+ return FunctionCallWrapper.model_construct(
1888
+ content=(
1889
+ self.content.with_namespace(namespace)
1890
+ if isinstance(self.content, Namespaced)
1891
+ else self.content
1892
+ ),
1893
+ name=self.name,
1894
+ args=[
1895
+ x.with_namespace(namespace) if isinstance(x, Namespaced) else x
1896
+ for x in self.args
1897
+ ],
1898
+ )
1899
+
1900
+ def with_reference_replacement(self, source, target):
1901
+ raise NotImplementedError("Cannot reference replace")
1902
+ return self
1903
+
1904
+ def with_merge(
1905
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
1906
+ ) -> "FunctionCallWrapper":
1907
+ return FunctionCallWrapper.model_construct(
1908
+ content=(
1909
+ self.content.with_merge(source, target, modifiers)
1910
+ if isinstance(self.content, Mergeable)
1911
+ else self.content
1912
+ ),
1913
+ name=self.name,
1914
+ args=[
1915
+ (
1916
+ x.with_merge(source, target, modifiers)
1917
+ if isinstance(x, Mergeable)
1918
+ else x
1919
+ )
1920
+ for x in self.args
1921
+ ],
1922
+ )
1923
+
1924
+ @property
1925
+ def concept_arguments(self) -> Sequence[ConceptRef]:
1926
+ base: List[ConceptRef] = []
1927
+ x = self.content
1928
+ if isinstance(x, ConceptRef):
1929
+ base += [x]
1930
+ elif isinstance(x, ConceptArgs):
1931
+ base += x.concept_arguments
1932
+ return base
1933
+
1934
+ @property
1935
+ def output_datatype(self):
1936
+ return arg_to_datatype(self.content)
1937
+
1938
+
1939
+ class AggregateWrapper(Mergeable, DataTyped, ConceptArgs, Namespaced, BaseModel):
1940
+ function: Function
1941
+ by: List[ConceptRef | Concept] = Field(default_factory=list)
1942
+
1943
+ @field_validator("by", mode="before")
1944
+ @classmethod
1945
+ def enforce_concept_ref(cls, v):
1946
+ output = []
1947
+ for item in v:
1948
+ if isinstance(item, Concept):
1949
+ output.append(item.reference)
1950
+ else:
1951
+ output.append(item)
1952
+ return output
1953
+
1954
+ def __str__(self):
1955
+ grain_str = [str(c) for c in self.by] if self.by else "abstract"
1956
+ return f"{str(self.function)}<{grain_str}>"
1957
+
1958
+ @property
1959
+ def datatype(self):
1960
+ return self.function.datatype
1961
+
1962
+ @property
1963
+ def concept_arguments(self) -> List[ConceptRef]:
1964
+ return self.function.concept_arguments + [x.reference for x in self.by]
1965
+
1966
+ @property
1967
+ def output_datatype(self):
1968
+ return self.function.output_datatype
1969
+
1970
+ @property
1971
+ def output_purpose(self):
1972
+ return self.function.output_purpose
1973
+
1974
+ def with_merge(self, source: Concept, target: Concept, modifiers: List[Modifier]):
1975
+ return AggregateWrapper.model_construct(
1976
+ function=self.function.with_merge(source, target, modifiers=modifiers),
1977
+ by=(
1978
+ [c.with_merge(source, target, modifiers) for c in self.by]
1979
+ if self.by
1980
+ else []
1981
+ ),
1982
+ )
1983
+
1984
+ def with_reference_replacement(self, source, target):
1985
+ return AggregateWrapper.model_construct(
1986
+ function=self.function.with_reference_replacement(source, target),
1987
+ by=(
1988
+ [c.with_reference_replacement(source, target) for c in self.by]
1989
+ if self.by
1990
+ else []
1991
+ ),
1992
+ )
1993
+
1994
+ def with_namespace(self, namespace: str) -> "AggregateWrapper":
1995
+ return AggregateWrapper.model_construct(
1996
+ function=self.function.with_namespace(namespace),
1997
+ by=[c.with_namespace(namespace) for c in self.by] if self.by else [],
1998
+ )
1999
+
2000
+
2001
+ class FilterItem(DataTyped, Namespaced, ConceptArgs, BaseModel):
2002
+ content: FuncArgs
2003
+ where: "WhereClause"
2004
+
2005
+ @field_validator("content", mode="before")
2006
+ def enforce_concept_ref(cls, v):
2007
+ if isinstance(v, Concept):
2008
+ return ConceptRef(address=v.address, datatype=v.datatype)
2009
+ return v
2010
+
2011
+ def __str__(self):
2012
+ return f"<Filter: {str(self.content)} where {str(self.where)}>"
2013
+
2014
+ def with_merge(
2015
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
2016
+ ) -> "FilterItem":
2017
+ return FilterItem.model_construct(
2018
+ content=(
2019
+ self.content.with_merge(source, target, modifiers)
2020
+ if isinstance(self.content, Mergeable)
2021
+ else self.content
2022
+ ),
2023
+ where=self.where.with_merge(source, target, modifiers),
2024
+ )
2025
+
2026
+ def with_namespace(self, namespace: str) -> "FilterItem":
2027
+ return FilterItem.model_construct(
2028
+ content=(
2029
+ self.content.with_namespace(namespace)
2030
+ if isinstance(self.content, Namespaced)
2031
+ else self.content
2032
+ ),
2033
+ where=self.where.with_namespace(namespace),
2034
+ )
2035
+
2036
+ @property
2037
+ def output_datatype(self):
2038
+ return arg_to_datatype(self.content)
2039
+
2040
+ @property
2041
+ def concept_arguments(self):
2042
+ if isinstance(self.content, ConceptRef):
2043
+ return [self.content] + self.where.concept_arguments
2044
+ elif isinstance(self.content, ConceptArgs):
2045
+ return self.content.concept_arguments + self.where.concept_arguments
2046
+ return self.where.concept_arguments
2047
+
2048
+
2049
+ class RowsetLineage(Namespaced, Mergeable, BaseModel):
2050
+ name: str
2051
+ derived_concepts: List[ConceptRef]
2052
+ select: SelectLineage | MultiSelectLineage
2053
+
2054
+ def with_namespace(self, namespace: str):
2055
+ return RowsetLineage.model_construct(
2056
+ name=self.name,
2057
+ derived_concepts=[
2058
+ x.with_namespace(namespace) for x in self.derived_concepts
2059
+ ],
2060
+ select=self.select.with_namespace(namespace),
2061
+ )
2062
+
2063
+ def with_merge(self, source: Concept, target: Concept, modifiers: List[Modifier]):
2064
+ return RowsetLineage.model_construct(
2065
+ name=self.name,
2066
+ derived_concepts=[
2067
+ x.with_merge(source, target, modifiers) for x in self.derived_concepts
2068
+ ],
2069
+ select=self.select.with_merge(source, target, modifiers),
2070
+ )
2071
+
2072
+
2073
+ class RowsetItem(Mergeable, DataTyped, ConceptArgs, Namespaced, BaseModel):
2074
+ content: ConceptRef
2075
+ rowset: RowsetLineage
2076
+
2077
+ def __repr__(self):
2078
+ return f"<Rowset<{self.rowset.name}>: {str(self.content)}>"
2079
+
2080
+ def __str__(self):
2081
+ return self.__repr__()
2082
+
2083
+ def with_merge(self, source: Concept, target: Concept, modifiers: List[Modifier]):
2084
+ return RowsetItem.model_construct(
2085
+ content=self.content.with_merge(source, target, modifiers),
2086
+ rowset=self.rowset,
2087
+ )
2088
+
2089
+ def with_namespace(self, namespace: str) -> "RowsetItem":
2090
+ return RowsetItem.model_construct(
2091
+ content=self.content.with_namespace(namespace),
2092
+ rowset=self.rowset.with_namespace(namespace),
2093
+ )
2094
+
2095
+ @property
2096
+ def output(self) -> ConceptRef:
2097
+ return self.content
2098
+
2099
+ @property
2100
+ def output_datatype(self):
2101
+ return self.content.datatype
2102
+
2103
+ @property
2104
+ def concept_arguments(self):
2105
+ return [self.content]
2106
+
2107
+
2108
+ class OrderBy(Mergeable, Namespaced, BaseModel):
2109
+ items: List[OrderItem]
2110
+
2111
+ def with_namespace(self, namespace: str) -> "OrderBy":
2112
+ return OrderBy.model_construct(
2113
+ items=[x.with_namespace(namespace) for x in self.items]
2114
+ )
2115
+
2116
+ def with_merge(
2117
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
2118
+ ) -> "OrderBy":
2119
+ return OrderBy.model_construct(
2120
+ items=[x.with_merge(source, target, modifiers) for x in self.items]
2121
+ )
2122
+
2123
+ @property
2124
+ def concept_arguments(self):
2125
+ base = []
2126
+ for x in self.items:
2127
+ base += x.concept_arguments
2128
+ return base
2129
+
2130
+
2131
+ class AlignClause(Namespaced, BaseModel):
2132
+ items: List[AlignItem]
2133
+
2134
+ def with_namespace(self, namespace: str) -> "AlignClause":
2135
+ return AlignClause.model_construct(
2136
+ items=[x.with_namespace(namespace) for x in self.items]
2137
+ )
2138
+
2139
+
2140
+ class DeriveItem(Namespaced, DataTyped, ConceptArgs, Mergeable, BaseModel):
2141
+ expr: Expr
2142
+ name: str
2143
+ namespace: str
2144
+
2145
+ @property
2146
+ def derived_concept(self) -> str:
2147
+ return f"{self.namespace}.{self.name}"
2148
+ # return ConceptRef(
2149
+ # address=f"{self.namespace}.{self.name}",
2150
+ # datatype=arg_to_datatype(self.expr),
2151
+ # )
2152
+
2153
+ def with_namespace(self, namespace):
2154
+ return DeriveItem.model_construct(
2155
+ expr=(self.expr.with_namespace(namespace) if self.expr else None),
2156
+ name=self.name,
2157
+ namespace=namespace,
2158
+ )
2159
+
2160
+ def with_merge(
2161
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
2162
+ ) -> "DeriveItem":
2163
+ return DeriveItem.model_construct(
2164
+ expr=(
2165
+ self.expr.with_merge(source, target, modifiers)
2166
+ if isinstance(self.expr, Mergeable)
2167
+ else self.expr
2168
+ ),
2169
+ name=self.name,
2170
+ namespace=self.namespace,
2171
+ )
2172
+
2173
+ def with_reference_replacement(self, source, target):
2174
+ return DeriveItem.model_construct(
2175
+ expr=(
2176
+ self.expr.with_reference_replacement(source, target)
2177
+ if isinstance(self.expr, Mergeable)
2178
+ else self.expr
2179
+ ),
2180
+ name=self.name,
2181
+ namespace=self.namespace,
2182
+ )
2183
+
2184
+
2185
+ class DeriveClause(Mergeable, Namespaced, BaseModel):
2186
+ items: List[DeriveItem]
2187
+
2188
+ def with_namespace(self, namespace: str) -> "DeriveClause":
2189
+ return DeriveClause.model_construct(
2190
+ items=[
2191
+ x.with_namespace(namespace) if isinstance(x, Namespaced) else x
2192
+ for x in self.items
2193
+ ]
2194
+ )
2195
+
2196
+ def with_merge(
2197
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
2198
+ ) -> "DeriveClause":
2199
+ return DeriveClause.model_construct(
2200
+ items=[
2201
+ (
2202
+ x.with_merge(source, target, modifiers)
2203
+ if isinstance(x, Mergeable)
2204
+ else x
2205
+ )
2206
+ for x in self.items
2207
+ ]
2208
+ )
2209
+
2210
+ def with_reference_replacement(self, source, target):
2211
+ return DeriveClause.model_construct(
2212
+ items=[
2213
+ (
2214
+ x.with_reference_replacement(source, target)
2215
+ if isinstance(x, Mergeable)
2216
+ else x
2217
+ )
2218
+ for x in self.items
2219
+ ]
2220
+ )
2221
+
2222
+
2223
+ class SelectLineage(Mergeable, Namespaced, BaseModel):
2224
+ selection: List[ConceptRef]
2225
+ hidden_components: set[str]
2226
+ local_concepts: dict[str, Concept]
2227
+ order_by: Optional[OrderBy] = None
2228
+ limit: Optional[int] = None
2229
+ meta: Metadata = Field(default_factory=lambda: Metadata())
2230
+ grain: Grain = Field(default_factory=Grain)
2231
+ where_clause: Union["WhereClause", None] = Field(default=None)
2232
+ having_clause: Union["HavingClause", None] = Field(default=None)
2233
+
2234
+ @property
2235
+ def output_components(self) -> List[ConceptRef]:
2236
+ return self.selection
2237
+
2238
+ def with_merge(
2239
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
2240
+ ) -> SelectLineage:
2241
+ return SelectLineage.model_construct(
2242
+ selection=[x.with_merge(source, target, modifiers) for x in self.selection],
2243
+ hidden_components=self.hidden_components,
2244
+ local_concepts={
2245
+ x: y.with_merge(source, target, modifiers)
2246
+ for x, y in self.local_concepts.items()
2247
+ },
2248
+ order_by=(
2249
+ self.order_by.with_merge(source, target, modifiers)
2250
+ if self.order_by
2251
+ else None
2252
+ ),
2253
+ limit=self.limit,
2254
+ grain=self.grain.with_merge(source, target, modifiers),
2255
+ where_clause=(
2256
+ self.where_clause.with_merge(source, target, modifiers)
2257
+ if self.where_clause
2258
+ else None
2259
+ ),
2260
+ having_clause=(
2261
+ self.having_clause.with_merge(source, target, modifiers)
2262
+ if self.having_clause
2263
+ else None
2264
+ ),
2265
+ )
2266
+
2267
+ def with_namespace(self, namespace):
2268
+ return SelectLineage.model_construct(
2269
+ selection=[x.with_namespace(namespace) for x in self.selection],
2270
+ hidden_components=self.hidden_components,
2271
+ local_concepts={
2272
+ x: y.with_namespace(namespace) for x, y in self.local_concepts.items()
2273
+ },
2274
+ order_by=self.order_by.with_namespace(namespace) if self.order_by else None,
2275
+ limit=self.limit,
2276
+ meta=self.meta,
2277
+ grain=self.grain.with_namespace(namespace),
2278
+ where_clause=(
2279
+ self.where_clause.with_namespace(namespace)
2280
+ if self.where_clause
2281
+ else None
2282
+ ),
2283
+ having_clause=(
2284
+ self.having_clause.with_namespace(namespace)
2285
+ if self.having_clause
2286
+ else None
2287
+ ),
2288
+ )
2289
+
2290
+
2291
+ class MultiSelectLineage(Mergeable, ConceptArgs, Namespaced, BaseModel):
2292
+ selects: List[SelectLineage]
2293
+ align: AlignClause
2294
+
2295
+ namespace: str
2296
+ order_by: Optional[OrderBy] = None
2297
+ limit: Optional[int] = None
2298
+ where_clause: Union["WhereClause", None] = Field(default=None)
2299
+ having_clause: Union["HavingClause", None] = Field(default=None)
2300
+ derive: DeriveClause | None = None
2301
+ hidden_components: set[str]
2302
+
2303
+ @property
2304
+ def grain(self):
2305
+ base = Grain()
2306
+ for select in self.selects:
2307
+ base += select.grain
2308
+ return base
2309
+
2310
+ @property
2311
+ def output_components(self) -> list[ConceptRef]:
2312
+ output = [
2313
+ ConceptRef.model_construct(address=x, datatype=DataType.UNKNOWN)
2314
+ for x in self.derived_concepts
2315
+ ]
2316
+ for select in self.selects:
2317
+ output += select.output_components
2318
+ return [x for x in output if x.address not in self.hidden_components]
2319
+
2320
+ def with_merge(
2321
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
2322
+ ) -> MultiSelectLineage:
2323
+ new = MultiSelectLineage.model_construct(
2324
+ selects=[s.with_merge(source, target, modifiers) for s in self.selects],
2325
+ align=self.align,
2326
+ derive=(
2327
+ self.derive.with_merge(source, target, modifiers)
2328
+ if self.derive
2329
+ else None
2330
+ ),
2331
+ namespace=self.namespace,
2332
+ hidden_components=self.hidden_components,
2333
+ order_by=(
2334
+ self.order_by.with_merge(source, target, modifiers)
2335
+ if self.order_by
2336
+ else None
2337
+ ),
2338
+ limit=self.limit,
2339
+ where_clause=(
2340
+ self.where_clause.with_merge(source, target, modifiers)
2341
+ if self.where_clause
2342
+ else None
2343
+ ),
2344
+ having_clause=(
2345
+ self.having_clause.with_merge(source, target, modifiers)
2346
+ if self.having_clause
2347
+ else None
2348
+ ),
2349
+ )
2350
+ return new
2351
+
2352
+ def with_namespace(self, namespace: str) -> "MultiSelectLineage":
2353
+ return MultiSelectLineage.model_construct(
2354
+ selects=[c.with_namespace(namespace) for c in self.selects],
2355
+ align=self.align.with_namespace(namespace),
2356
+ derive=self.derive.with_namespace(namespace) if self.derive else None,
2357
+ namespace=namespace,
2358
+ hidden_components=self.hidden_components,
2359
+ order_by=self.order_by.with_namespace(namespace) if self.order_by else None,
2360
+ limit=self.limit,
2361
+ where_clause=(
2362
+ self.where_clause.with_namespace(namespace)
2363
+ if self.where_clause
2364
+ else None
2365
+ ),
2366
+ having_clause=(
2367
+ self.having_clause.with_namespace(namespace)
2368
+ if self.having_clause
2369
+ else None
2370
+ ),
2371
+ )
2372
+
2373
+ @property
2374
+ def derived_concepts(self) -> set[str]:
2375
+ output = set()
2376
+ for item in self.align.items:
2377
+ output.add(item.aligned_concept)
2378
+ if self.derive:
2379
+ for ditem in self.derive.items:
2380
+ output.add(ditem.derived_concept)
2381
+ return output
2382
+
2383
+ @property
2384
+ def concept_arguments(self):
2385
+ output = []
2386
+ for select in self.selects:
2387
+ output += select.output_components
2388
+ return unique(output, "address")
2389
+
2390
+
2391
+ class LooseConceptList(BaseModel):
2392
+ concepts: Sequence[Concept | ConceptRef]
2393
+
2394
+ @cached_property
2395
+ def addresses(self) -> set[str]:
2396
+ return {s.address for s in self.concepts}
2397
+
2398
+ @classmethod
2399
+ def validate(cls, v):
2400
+ return cls(v)
2401
+
2402
+ @cached_property
2403
+ def sorted_addresses(self) -> List[str]:
2404
+ return sorted(list(self.addresses))
2405
+
2406
+ def __str__(self) -> str:
2407
+ return f"lcl{str(self.sorted_addresses)}"
2408
+
2409
+ def __iter__(self):
2410
+ return iter(self.concepts)
2411
+
2412
+ def __eq__(self, other):
2413
+ if not isinstance(other, LooseConceptList):
2414
+ return False
2415
+ return self.addresses == other.addresses
2416
+
2417
+ def issubset(self, other):
2418
+ if not isinstance(other, LooseConceptList):
2419
+ return False
2420
+ return self.addresses.issubset(other.addresses)
2421
+
2422
+ def __contains__(self, other):
2423
+ if isinstance(other, str):
2424
+ return other in self.addresses
2425
+ if not isinstance(other, Concept):
2426
+ return False
2427
+ return other.address in self.addresses
2428
+
2429
+ def difference(self, other):
2430
+ if not isinstance(other, LooseConceptList):
2431
+ return False
2432
+ return self.addresses.difference(other.addresses)
2433
+
2434
+ def isdisjoint(self, other):
2435
+ if not isinstance(other, LooseConceptList):
2436
+ return False
2437
+ return self.addresses.isdisjoint(other.addresses)
2438
+
2439
+
2440
+ class AlignItem(Namespaced, BaseModel):
2441
+ alias: str
2442
+ concepts: List[ConceptRef]
2443
+ namespace: str = Field(default=DEFAULT_NAMESPACE, validate_default=True)
2444
+
2445
+ @field_validator("concepts", mode="before")
2446
+ @classmethod
2447
+ def enforce_concept_ref(cls, v):
2448
+ output = []
2449
+ for item in v:
2450
+ if isinstance(item, Concept):
2451
+ output.append(item.reference)
2452
+ else:
2453
+ output.append(item)
2454
+ return output
2455
+
2456
+ @computed_field # type: ignore
2457
+ @cached_property
2458
+ def concepts_lcl(self) -> LooseConceptList:
2459
+ return LooseConceptList(concepts=self.concepts)
2460
+
2461
+ @property
2462
+ def aligned_concept(self) -> str:
2463
+ return f"{self.namespace}.{self.alias}"
2464
+
2465
+ def with_namespace(self, namespace: str) -> "AlignItem":
2466
+ return AlignItem.model_construct(
2467
+ alias=self.alias,
2468
+ concepts=[c.with_namespace(namespace) for c in self.concepts],
2469
+ namespace=namespace,
2470
+ )
2471
+
2472
+
2473
+ class CustomFunctionFactory:
2474
+ def __init__(
2475
+ self,
2476
+ function: Expr,
2477
+ namespace: str,
2478
+ function_arguments: list[ArgBinding],
2479
+ name: str,
2480
+ ):
2481
+ self.namespace = namespace
2482
+ self.function = function
2483
+ self.function_arguments = function_arguments
2484
+ self.name = name
2485
+
2486
+ def with_namespace(self, namespace: str):
2487
+ self.namespace = namespace
2488
+ self.function = (
2489
+ self.function.with_namespace(namespace)
2490
+ if isinstance(self.function, Namespaced)
2491
+ else self.function
2492
+ )
2493
+ self.function_arguments = [
2494
+ x.with_namespace(namespace) for x in self.function_arguments
2495
+ ]
2496
+ return self
2497
+
2498
+ def __call__(self, *creation_args: ArgBinding | Expr):
2499
+ nout = (
2500
+ self.function.model_copy(deep=True)
2501
+ if isinstance(self.function, BaseModel)
2502
+ else self.function
2503
+ )
2504
+ creation_arg_list: list[ArgBinding | Expr] = list(creation_args)
2505
+ if len(creation_args) < len(self.function_arguments):
2506
+ for binding in self.function_arguments[len(creation_arg_list) :]:
2507
+ if binding.default is None:
2508
+ raise ValueError(f"Missing argument {binding.name}")
2509
+
2510
+ creation_arg_list.append(binding.default)
2511
+ for arg_idx, arg in enumerate(self.function_arguments):
2512
+ if not arg.datatype or arg.datatype == DataType.UNKNOWN:
2513
+ continue
2514
+ if arg_idx > len(creation_arg_list):
2515
+ continue
2516
+ comparison = arg_to_datatype(creation_arg_list[arg_idx])
2517
+ if comparison != arg.datatype:
2518
+ raise TypeError(
2519
+ f"Invalid type passed into custom function @{self.name} in position {arg_idx+1} for argument {arg.name}, expected {arg.datatype}, got {comparison}"
2520
+ )
2521
+ if isinstance(arg.datatype, TraitDataType):
2522
+ if not (
2523
+ isinstance(comparison, TraitDataType)
2524
+ and all(x in comparison.traits for x in arg.datatype.traits)
2525
+ ):
2526
+ raise TypeError(
2527
+ f"Invalid argument type passed into custom function @{self.name} in position {arg_idx+1} for argument {arg.name}, expected traits {arg.datatype.traits}, got {comparison}"
2528
+ )
2529
+
2530
+ if isinstance(nout, Mergeable):
2531
+ for idx, x in enumerate(creation_arg_list):
2532
+ if self.namespace == DEFAULT_NAMESPACE:
2533
+ target = f"{DEFAULT_NAMESPACE}.{self.function_arguments[idx].name}"
2534
+ else:
2535
+ target = self.function_arguments[idx].name
2536
+ nout = nout.with_reference_replacement(target, x)
2537
+ return nout
2538
+
2539
+
2540
+ class Metadata(BaseModel):
2541
+ """Metadata container object.
2542
+ TODO: support arbitrary tags"""
2543
+
2544
+ description: Optional[str] = None
2545
+ line_number: Optional[int] = None
2546
+ concept_source: ConceptSource = ConceptSource.MANUAL
2547
+
2548
+
2549
+ class Window(BaseModel):
2550
+ count: int
2551
+ window_order: WindowOrder
2552
+
2553
+ def __str__(self):
2554
+ return f"Window<{self.window_order}>"
2555
+
2556
+
2557
+ class WindowItemOver(BaseModel):
2558
+ contents: List[ConceptRef]
2559
+
2560
+
2561
+ class WindowItemOrder(BaseModel):
2562
+ contents: List["OrderItem"]
2563
+
2564
+
2565
+ class Comment(BaseModel):
2566
+ text: str
2567
+
2568
+
2569
+ class ArgBinding(Namespaced, DataTyped, BaseModel):
2570
+ name: str
2571
+ default: Expr | None = None
2572
+ datatype: (
2573
+ DataType | MapType | ArrayType | NumericType | StructType | TraitDataType
2574
+ ) = DataType.UNKNOWN
2575
+
2576
+ def with_namespace(self, namespace):
2577
+ return ArgBinding.model_construct(
2578
+ name=address_with_namespace(self.name, namespace),
2579
+ default=(
2580
+ self.default.with_namespace(namespace)
2581
+ if isinstance(self.default, Namespaced)
2582
+ else self.default
2583
+ ),
2584
+ )
2585
+
2586
+ @property
2587
+ def output_datatype(self):
2588
+ if self.default is not None:
2589
+ return arg_to_datatype(self.default)
2590
+ return self.datatype
2591
+
2592
+
2593
+ class CustomType(BaseModel):
2594
+ name: str
2595
+ type: DataType | list[DataType]
2596
+ drop_on: list[FunctionType] = Field(default_factory=list)
2597
+ add_on: list[FunctionType] = Field(default_factory=list)
2598
+
2599
+ def with_namespace(self, namespace: str) -> "CustomType":
2600
+ return CustomType.model_construct(
2601
+ name=address_with_namespace(self.name, namespace),
2602
+ type=self.type,
2603
+ drop_on=self.drop_on,
2604
+ add_on=self.add_on,
2605
+ )
2606
+
2607
+
2608
+ Expr = (
2609
+ MagicConstants
2610
+ | bool
2611
+ | int
2612
+ | str
2613
+ | float
2614
+ | date
2615
+ | datetime
2616
+ | TupleWrapper
2617
+ | ListWrapper
2618
+ | MapWrapper
2619
+ | WindowItem
2620
+ | FilterItem
2621
+ | ConceptRef
2622
+ | Comparison
2623
+ | Conditional
2624
+ | FunctionCallWrapper
2625
+ | Parenthetical
2626
+ | Function
2627
+ | AggregateWrapper
2628
+ | CaseWhen
2629
+ | CaseElse
2630
+ )
2631
+
2632
+ FuncArgs = (
2633
+ ConceptRef
2634
+ | AggregateWrapper
2635
+ | Function
2636
+ | FunctionCallWrapper
2637
+ | Parenthetical
2638
+ | CaseWhen
2639
+ | CaseElse
2640
+ | WindowItem
2641
+ | FilterItem
2642
+ | bool
2643
+ | int
2644
+ | float
2645
+ | DatePart
2646
+ | str
2647
+ | date
2648
+ | datetime
2649
+ | MapWrapper[Any, Any]
2650
+ | TraitDataType
2651
+ | DataType
2652
+ | ArrayType
2653
+ | MapType
2654
+ | NumericType
2655
+ | ListWrapper[Any]
2656
+ | TupleWrapper[Any]
2657
+ | Comparison
2658
+ | Conditional
2659
+ | MagicConstants
2660
+ | ArgBinding
2661
+ | Ordering
2662
+ )