pytrilogy 0.3.142__cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (200) hide show
  1. LICENSE.md +19 -0
  2. _preql_import_resolver/__init__.py +5 -0
  3. _preql_import_resolver/_preql_import_resolver.cpython-313-x86_64-linux-gnu.so +0 -0
  4. pytrilogy-0.3.142.dist-info/METADATA +555 -0
  5. pytrilogy-0.3.142.dist-info/RECORD +200 -0
  6. pytrilogy-0.3.142.dist-info/WHEEL +5 -0
  7. pytrilogy-0.3.142.dist-info/entry_points.txt +2 -0
  8. pytrilogy-0.3.142.dist-info/licenses/LICENSE.md +19 -0
  9. trilogy/__init__.py +16 -0
  10. trilogy/ai/README.md +10 -0
  11. trilogy/ai/__init__.py +19 -0
  12. trilogy/ai/constants.py +92 -0
  13. trilogy/ai/conversation.py +107 -0
  14. trilogy/ai/enums.py +7 -0
  15. trilogy/ai/execute.py +50 -0
  16. trilogy/ai/models.py +34 -0
  17. trilogy/ai/prompts.py +100 -0
  18. trilogy/ai/providers/__init__.py +0 -0
  19. trilogy/ai/providers/anthropic.py +106 -0
  20. trilogy/ai/providers/base.py +24 -0
  21. trilogy/ai/providers/google.py +146 -0
  22. trilogy/ai/providers/openai.py +89 -0
  23. trilogy/ai/providers/utils.py +68 -0
  24. trilogy/authoring/README.md +3 -0
  25. trilogy/authoring/__init__.py +148 -0
  26. trilogy/constants.py +113 -0
  27. trilogy/core/README.md +52 -0
  28. trilogy/core/__init__.py +0 -0
  29. trilogy/core/constants.py +6 -0
  30. trilogy/core/enums.py +443 -0
  31. trilogy/core/env_processor.py +120 -0
  32. trilogy/core/environment_helpers.py +320 -0
  33. trilogy/core/ergonomics.py +193 -0
  34. trilogy/core/exceptions.py +123 -0
  35. trilogy/core/functions.py +1227 -0
  36. trilogy/core/graph_models.py +139 -0
  37. trilogy/core/internal.py +85 -0
  38. trilogy/core/models/__init__.py +0 -0
  39. trilogy/core/models/author.py +2669 -0
  40. trilogy/core/models/build.py +2521 -0
  41. trilogy/core/models/build_environment.py +180 -0
  42. trilogy/core/models/core.py +501 -0
  43. trilogy/core/models/datasource.py +322 -0
  44. trilogy/core/models/environment.py +751 -0
  45. trilogy/core/models/execute.py +1177 -0
  46. trilogy/core/optimization.py +251 -0
  47. trilogy/core/optimizations/__init__.py +12 -0
  48. trilogy/core/optimizations/base_optimization.py +17 -0
  49. trilogy/core/optimizations/hide_unused_concept.py +47 -0
  50. trilogy/core/optimizations/inline_datasource.py +102 -0
  51. trilogy/core/optimizations/predicate_pushdown.py +245 -0
  52. trilogy/core/processing/README.md +94 -0
  53. trilogy/core/processing/READMEv2.md +121 -0
  54. trilogy/core/processing/VIRTUAL_UNNEST.md +30 -0
  55. trilogy/core/processing/__init__.py +0 -0
  56. trilogy/core/processing/concept_strategies_v3.py +508 -0
  57. trilogy/core/processing/constants.py +15 -0
  58. trilogy/core/processing/discovery_node_factory.py +451 -0
  59. trilogy/core/processing/discovery_utility.py +548 -0
  60. trilogy/core/processing/discovery_validation.py +167 -0
  61. trilogy/core/processing/graph_utils.py +43 -0
  62. trilogy/core/processing/node_generators/README.md +9 -0
  63. trilogy/core/processing/node_generators/__init__.py +31 -0
  64. trilogy/core/processing/node_generators/basic_node.py +160 -0
  65. trilogy/core/processing/node_generators/common.py +268 -0
  66. trilogy/core/processing/node_generators/constant_node.py +38 -0
  67. trilogy/core/processing/node_generators/filter_node.py +315 -0
  68. trilogy/core/processing/node_generators/group_node.py +213 -0
  69. trilogy/core/processing/node_generators/group_to_node.py +117 -0
  70. trilogy/core/processing/node_generators/multiselect_node.py +205 -0
  71. trilogy/core/processing/node_generators/node_merge_node.py +653 -0
  72. trilogy/core/processing/node_generators/recursive_node.py +88 -0
  73. trilogy/core/processing/node_generators/rowset_node.py +165 -0
  74. trilogy/core/processing/node_generators/select_helpers/__init__.py +0 -0
  75. trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +261 -0
  76. trilogy/core/processing/node_generators/select_merge_node.py +748 -0
  77. trilogy/core/processing/node_generators/select_node.py +95 -0
  78. trilogy/core/processing/node_generators/synonym_node.py +98 -0
  79. trilogy/core/processing/node_generators/union_node.py +91 -0
  80. trilogy/core/processing/node_generators/unnest_node.py +182 -0
  81. trilogy/core/processing/node_generators/window_node.py +201 -0
  82. trilogy/core/processing/nodes/README.md +28 -0
  83. trilogy/core/processing/nodes/__init__.py +179 -0
  84. trilogy/core/processing/nodes/base_node.py +519 -0
  85. trilogy/core/processing/nodes/filter_node.py +75 -0
  86. trilogy/core/processing/nodes/group_node.py +194 -0
  87. trilogy/core/processing/nodes/merge_node.py +420 -0
  88. trilogy/core/processing/nodes/recursive_node.py +46 -0
  89. trilogy/core/processing/nodes/select_node_v2.py +242 -0
  90. trilogy/core/processing/nodes/union_node.py +53 -0
  91. trilogy/core/processing/nodes/unnest_node.py +62 -0
  92. trilogy/core/processing/nodes/window_node.py +56 -0
  93. trilogy/core/processing/utility.py +823 -0
  94. trilogy/core/query_processor.py +596 -0
  95. trilogy/core/statements/README.md +35 -0
  96. trilogy/core/statements/__init__.py +0 -0
  97. trilogy/core/statements/author.py +536 -0
  98. trilogy/core/statements/build.py +0 -0
  99. trilogy/core/statements/common.py +20 -0
  100. trilogy/core/statements/execute.py +155 -0
  101. trilogy/core/table_processor.py +66 -0
  102. trilogy/core/utility.py +8 -0
  103. trilogy/core/validation/README.md +46 -0
  104. trilogy/core/validation/__init__.py +0 -0
  105. trilogy/core/validation/common.py +161 -0
  106. trilogy/core/validation/concept.py +146 -0
  107. trilogy/core/validation/datasource.py +227 -0
  108. trilogy/core/validation/environment.py +73 -0
  109. trilogy/core/validation/fix.py +256 -0
  110. trilogy/dialect/__init__.py +32 -0
  111. trilogy/dialect/base.py +1392 -0
  112. trilogy/dialect/bigquery.py +308 -0
  113. trilogy/dialect/common.py +147 -0
  114. trilogy/dialect/config.py +144 -0
  115. trilogy/dialect/dataframe.py +50 -0
  116. trilogy/dialect/duckdb.py +231 -0
  117. trilogy/dialect/enums.py +147 -0
  118. trilogy/dialect/metadata.py +173 -0
  119. trilogy/dialect/mock.py +190 -0
  120. trilogy/dialect/postgres.py +117 -0
  121. trilogy/dialect/presto.py +110 -0
  122. trilogy/dialect/results.py +89 -0
  123. trilogy/dialect/snowflake.py +129 -0
  124. trilogy/dialect/sql_server.py +137 -0
  125. trilogy/engine.py +48 -0
  126. trilogy/execution/config.py +75 -0
  127. trilogy/executor.py +568 -0
  128. trilogy/hooks/__init__.py +4 -0
  129. trilogy/hooks/base_hook.py +40 -0
  130. trilogy/hooks/graph_hook.py +139 -0
  131. trilogy/hooks/query_debugger.py +166 -0
  132. trilogy/metadata/__init__.py +0 -0
  133. trilogy/parser.py +10 -0
  134. trilogy/parsing/README.md +21 -0
  135. trilogy/parsing/__init__.py +0 -0
  136. trilogy/parsing/common.py +1069 -0
  137. trilogy/parsing/config.py +5 -0
  138. trilogy/parsing/exceptions.py +8 -0
  139. trilogy/parsing/helpers.py +1 -0
  140. trilogy/parsing/parse_engine.py +2813 -0
  141. trilogy/parsing/render.py +769 -0
  142. trilogy/parsing/trilogy.lark +540 -0
  143. trilogy/py.typed +0 -0
  144. trilogy/render.py +42 -0
  145. trilogy/scripts/README.md +9 -0
  146. trilogy/scripts/__init__.py +0 -0
  147. trilogy/scripts/agent.py +41 -0
  148. trilogy/scripts/agent_info.py +303 -0
  149. trilogy/scripts/common.py +355 -0
  150. trilogy/scripts/dependency/Cargo.lock +617 -0
  151. trilogy/scripts/dependency/Cargo.toml +39 -0
  152. trilogy/scripts/dependency/README.md +131 -0
  153. trilogy/scripts/dependency/build.sh +25 -0
  154. trilogy/scripts/dependency/src/directory_resolver.rs +177 -0
  155. trilogy/scripts/dependency/src/lib.rs +16 -0
  156. trilogy/scripts/dependency/src/main.rs +770 -0
  157. trilogy/scripts/dependency/src/parser.rs +435 -0
  158. trilogy/scripts/dependency/src/preql.pest +208 -0
  159. trilogy/scripts/dependency/src/python_bindings.rs +303 -0
  160. trilogy/scripts/dependency/src/resolver.rs +716 -0
  161. trilogy/scripts/dependency/tests/base.preql +3 -0
  162. trilogy/scripts/dependency/tests/cli_integration.rs +377 -0
  163. trilogy/scripts/dependency/tests/customer.preql +6 -0
  164. trilogy/scripts/dependency/tests/main.preql +9 -0
  165. trilogy/scripts/dependency/tests/orders.preql +7 -0
  166. trilogy/scripts/dependency/tests/test_data/base.preql +9 -0
  167. trilogy/scripts/dependency/tests/test_data/consumer.preql +1 -0
  168. trilogy/scripts/dependency.py +323 -0
  169. trilogy/scripts/display.py +512 -0
  170. trilogy/scripts/environment.py +46 -0
  171. trilogy/scripts/fmt.py +32 -0
  172. trilogy/scripts/ingest.py +471 -0
  173. trilogy/scripts/ingest_helpers/__init__.py +1 -0
  174. trilogy/scripts/ingest_helpers/foreign_keys.py +123 -0
  175. trilogy/scripts/ingest_helpers/formatting.py +93 -0
  176. trilogy/scripts/ingest_helpers/typing.py +161 -0
  177. trilogy/scripts/init.py +105 -0
  178. trilogy/scripts/parallel_execution.py +713 -0
  179. trilogy/scripts/plan.py +189 -0
  180. trilogy/scripts/run.py +63 -0
  181. trilogy/scripts/serve.py +140 -0
  182. trilogy/scripts/serve_helpers/__init__.py +41 -0
  183. trilogy/scripts/serve_helpers/file_discovery.py +142 -0
  184. trilogy/scripts/serve_helpers/index_generation.py +206 -0
  185. trilogy/scripts/serve_helpers/models.py +38 -0
  186. trilogy/scripts/single_execution.py +131 -0
  187. trilogy/scripts/testing.py +119 -0
  188. trilogy/scripts/trilogy.py +68 -0
  189. trilogy/std/__init__.py +0 -0
  190. trilogy/std/color.preql +3 -0
  191. trilogy/std/date.preql +13 -0
  192. trilogy/std/display.preql +18 -0
  193. trilogy/std/geography.preql +22 -0
  194. trilogy/std/metric.preql +15 -0
  195. trilogy/std/money.preql +67 -0
  196. trilogy/std/net.preql +14 -0
  197. trilogy/std/ranking.preql +7 -0
  198. trilogy/std/report.preql +5 -0
  199. trilogy/std/semantic.preql +6 -0
  200. trilogy/utility.py +34 -0
@@ -0,0 +1,1227 @@
1
+ from dataclasses import dataclass
2
+ from datetime import date, datetime
3
+ from typing import Any, Callable, Optional
4
+
5
+ from lark.tree import Meta
6
+
7
+ from trilogy.constants import MagicConstants
8
+ from trilogy.core.enums import (
9
+ DatePart,
10
+ FunctionClass,
11
+ FunctionType,
12
+ Granularity,
13
+ InfiniteFunctionArgs,
14
+ Purpose,
15
+ )
16
+ from trilogy.core.exceptions import InvalidSyntaxException
17
+ from trilogy.core.models.author import (
18
+ AggregateWrapper,
19
+ Concept,
20
+ ConceptRef,
21
+ Conditional,
22
+ CustomType,
23
+ Function,
24
+ Parenthetical,
25
+ UndefinedConcept,
26
+ WindowItem,
27
+ )
28
+ from trilogy.core.models.core import (
29
+ CONCRETE_TYPES,
30
+ ArrayType,
31
+ DataType,
32
+ MapType,
33
+ NumericType,
34
+ StructType,
35
+ TraitDataType,
36
+ arg_to_datatype,
37
+ merge_datatypes,
38
+ )
39
+ from trilogy.core.models.environment import Environment
40
+
41
+ GENERIC_ARGS = Concept | ConceptRef | Function | str | int | float | date | datetime
42
+
43
+
44
+ CUSTOM_PLACEHOLDER = CustomType(
45
+ name="__placeholder__", type=DataType.UNKNOWN, drop_on=[], add_on=[]
46
+ )
47
+
48
+
49
+ @dataclass
50
+ class FunctionConfig:
51
+ arg_count: int = 1
52
+ valid_inputs: set[DataType] | list[set[DataType]] | None = None
53
+ output_purpose: Purpose | None = None
54
+ output_type: (
55
+ DataType | ArrayType | MapType | StructType | NumericType | TraitDataType | None
56
+ ) = None
57
+ output_type_function: Optional[Callable] = None
58
+
59
+
60
+ def get_unnest_output_type(args: list[Any]) -> CONCRETE_TYPES:
61
+ output = arg_to_datatype(args[0])
62
+ if isinstance(output, (ArrayType, MapType)):
63
+ output = output.value_data_type
64
+ else:
65
+ output = DataType.STRING
66
+ return output
67
+
68
+
69
+ def get_coalesce_output_type(args: list[Any]) -> CONCRETE_TYPES:
70
+ non_null = [x for x in args if not x == MagicConstants.NULL]
71
+ processed = [arg_to_datatype(x) for x in non_null if x]
72
+ if not len(set(processed)) == 1:
73
+ raise InvalidSyntaxException(
74
+ f"All arguments to coalesce must be of the same type, have {set(arg_to_datatype(x) for x in args)}"
75
+ )
76
+ return processed[0]
77
+
78
+
79
+ def get_transform_output_type(args: list[Any]) -> CONCRETE_TYPES:
80
+ return ArrayType(type=arg_to_datatype(args[2]))
81
+
82
+
83
+ def get_index_output_type(
84
+ args: list[Any],
85
+ ) -> CONCRETE_TYPES:
86
+ arg = args[0]
87
+ datatype = arg_to_datatype(arg)
88
+ if isinstance(datatype, ArrayType):
89
+ return datatype.value_data_type
90
+ elif isinstance(datatype, MapType):
91
+ return datatype.value_data_type
92
+ return datatype
93
+
94
+
95
+ def get_attr_datatype(
96
+ args: list[Any],
97
+ ) -> CONCRETE_TYPES:
98
+ arg = args[0]
99
+ lookup = args[1]
100
+ datatype = arg_to_datatype(arg)
101
+ if isinstance(datatype, StructType):
102
+ return datatype.field_types[lookup]
103
+ return datatype
104
+
105
+
106
+ def get_cast_output_type(
107
+ args: list[Any],
108
+ ) -> DataType | TraitDataType:
109
+ base = arg_to_datatype(args[0])
110
+ if isinstance(base, TraitDataType):
111
+ traits = base.traits
112
+ else:
113
+ traits = []
114
+ if isinstance(args[1], TraitDataType):
115
+ return TraitDataType(
116
+ type=args[1].type, traits=list(set(traits + args[1].traits))
117
+ )
118
+ elif traits:
119
+ return TraitDataType(type=args[1], traits=traits)
120
+ return args[1]
121
+
122
+
123
+ def get_output_type_at_index(args, index: int):
124
+ return arg_to_datatype(args[index])
125
+
126
+
127
+ def validate_case_output(
128
+ args: list[Any],
129
+ ) -> DataType:
130
+ datatypes = set()
131
+ mapz = dict()
132
+ for arg in args:
133
+ output_datatype = arg_to_datatype(arg.expr)
134
+ if output_datatype != DataType.NULL:
135
+ datatypes.add(output_datatype.data_type)
136
+ mapz[str(arg.expr)] = output_datatype
137
+ known = [x for x in datatypes if x != DataType.UNKNOWN]
138
+ if len(known) == 0:
139
+ return DataType.UNKNOWN
140
+ if not len(known) == 1:
141
+ raise SyntaxError(
142
+ f"All case expressions must have the same output datatype, got {datatypes} from {mapz}"
143
+ )
144
+ return known.pop()
145
+
146
+
147
+ def create_struct_output(
148
+ args: list[Any],
149
+ ) -> StructType:
150
+ zipped = dict(zip(args[1::2], args[::2]))
151
+ types = [arg_to_datatype(x) for x in args[::2]]
152
+ return StructType(fields=types, fields_map=zipped)
153
+
154
+
155
+ def get_date_part_output(args: list[Any]):
156
+ target = args[1]
157
+ if target == DatePart.YEAR:
158
+ return TraitDataType(type=DataType.INTEGER, traits=["year"])
159
+ elif target == DatePart.MONTH:
160
+ return TraitDataType(type=DataType.INTEGER, traits=["month"])
161
+ elif target == DatePart.DAY:
162
+ return TraitDataType(type=DataType.INTEGER, traits=["day"])
163
+ elif target == DatePart.HOUR:
164
+ return TraitDataType(type=DataType.INTEGER, traits=["hour"])
165
+ elif target == DatePart.MINUTE:
166
+ return TraitDataType(type=DataType.INTEGER, traits=["minute"])
167
+ elif target == DatePart.SECOND:
168
+ return TraitDataType(type=DataType.INTEGER, traits=["second"])
169
+ elif target == DatePart.WEEK:
170
+ return TraitDataType(type=DataType.INTEGER, traits=["week"])
171
+ elif target == DatePart.QUARTER:
172
+ return TraitDataType(type=DataType.INTEGER, traits=["quarter"])
173
+ elif target == DatePart.DAY_OF_WEEK:
174
+ return TraitDataType(type=DataType.INTEGER, traits=["day_of_week"])
175
+ else:
176
+ raise InvalidSyntaxException(f"Date part not supported for {target}")
177
+
178
+
179
+ def get_date_trunc_output(
180
+ args: list[Any],
181
+ ):
182
+ target: DatePart = args[1]
183
+ if target == DatePart.YEAR:
184
+ return DataType.DATE
185
+ elif target == DatePart.MONTH:
186
+ return DataType.DATE
187
+ elif target == DatePart.DAY:
188
+ return DataType.DATE
189
+ elif target == DatePart.HOUR:
190
+ return DataType.DATETIME
191
+ elif target == DatePart.MINUTE:
192
+ return DataType.DATETIME
193
+ elif target == DatePart.SECOND:
194
+ return DataType.DATETIME
195
+ elif target == DatePart.WEEK:
196
+ return DataType.DATE
197
+ elif target == DatePart.QUARTER:
198
+ return DataType.DATE
199
+ else:
200
+ raise InvalidSyntaxException(f"Date truncation not supported for {target}")
201
+
202
+
203
+ def get_map_key_type(arg):
204
+ arg_datatype = arg_to_datatype(arg)
205
+ if isinstance(arg_datatype, MapType):
206
+ return ArrayType(type=arg_datatype.key_data_type)
207
+ return ArrayType(type=DataType.STRING)
208
+
209
+
210
+ def get_map_value_type(arg):
211
+ arg_datatype = arg_to_datatype(arg)
212
+ if isinstance(arg_datatype, MapType):
213
+ return ArrayType(type=arg_datatype.value_data_type)
214
+ return ArrayType(type=DataType.STRING)
215
+
216
+
217
+ FUNCTION_REGISTRY: dict[FunctionType, FunctionConfig] = {
218
+ FunctionType.ALIAS: FunctionConfig(
219
+ arg_count=1,
220
+ ),
221
+ FunctionType.PARENTHETICAL: FunctionConfig(
222
+ arg_count=1,
223
+ ),
224
+ FunctionType.UNNEST: FunctionConfig(
225
+ valid_inputs={
226
+ DataType.ARRAY,
227
+ },
228
+ output_purpose=Purpose.KEY,
229
+ output_type_function=get_unnest_output_type,
230
+ arg_count=1,
231
+ ),
232
+ FunctionType.DATE_SPINE: FunctionConfig(
233
+ valid_inputs={
234
+ DataType.DATE,
235
+ },
236
+ output_purpose=Purpose.KEY,
237
+ output_type=DataType.DATE,
238
+ arg_count=2,
239
+ ),
240
+ FunctionType.RECURSE_EDGE: FunctionConfig(
241
+ arg_count=2,
242
+ ),
243
+ FunctionType.GROUP: FunctionConfig(
244
+ arg_count=-1,
245
+ output_type_function=lambda args: get_output_type_at_index(args, 0),
246
+ ),
247
+ FunctionType.COUNT: FunctionConfig(
248
+ output_purpose=Purpose.METRIC,
249
+ output_type=DataType.INTEGER,
250
+ arg_count=1,
251
+ ),
252
+ FunctionType.COUNT_DISTINCT: FunctionConfig(
253
+ output_purpose=Purpose.METRIC,
254
+ output_type=DataType.INTEGER,
255
+ arg_count=1,
256
+ ),
257
+ FunctionType.MAX: FunctionConfig(
258
+ valid_inputs={
259
+ DataType.INTEGER,
260
+ DataType.FLOAT,
261
+ DataType.NUMBER,
262
+ DataType.DATE,
263
+ DataType.DATETIME,
264
+ DataType.TIMESTAMP,
265
+ DataType.BOOL,
266
+ },
267
+ output_purpose=Purpose.METRIC,
268
+ arg_count=1,
269
+ ),
270
+ FunctionType.MIN: FunctionConfig(
271
+ valid_inputs={
272
+ DataType.INTEGER,
273
+ DataType.FLOAT,
274
+ DataType.NUMBER,
275
+ DataType.DATE,
276
+ DataType.DATETIME,
277
+ DataType.TIMESTAMP,
278
+ },
279
+ output_purpose=Purpose.METRIC,
280
+ arg_count=1,
281
+ ),
282
+ FunctionType.SPLIT: FunctionConfig(
283
+ valid_inputs={DataType.STRING},
284
+ output_purpose=Purpose.PROPERTY,
285
+ output_type=ArrayType(type=DataType.STRING),
286
+ arg_count=2,
287
+ ),
288
+ FunctionType.INDEX_ACCESS: FunctionConfig(
289
+ valid_inputs=[
290
+ {
291
+ DataType.ARRAY,
292
+ },
293
+ {
294
+ DataType.INTEGER,
295
+ },
296
+ ],
297
+ output_purpose=Purpose.PROPERTY,
298
+ output_type_function=get_index_output_type,
299
+ arg_count=2,
300
+ ),
301
+ FunctionType.MAP_KEYS: FunctionConfig(
302
+ valid_inputs={
303
+ DataType.MAP,
304
+ },
305
+ output_purpose=Purpose.PROPERTY,
306
+ output_type_function=lambda args: get_map_key_type(args[0]),
307
+ arg_count=1,
308
+ ),
309
+ FunctionType.MAP_VALUES: FunctionConfig(
310
+ valid_inputs={
311
+ DataType.MAP,
312
+ },
313
+ output_purpose=Purpose.PROPERTY,
314
+ output_type_function=lambda args: get_map_value_type(args[0]),
315
+ arg_count=1,
316
+ ),
317
+ FunctionType.GENERATE_ARRAY: FunctionConfig(
318
+ valid_inputs={
319
+ DataType.INTEGER,
320
+ DataType.INTEGER,
321
+ DataType.INTEGER,
322
+ },
323
+ output_purpose=Purpose.PROPERTY,
324
+ output_type=ArrayType(type=DataType.INTEGER),
325
+ arg_count=3,
326
+ ),
327
+ FunctionType.ARRAY_DISTINCT: FunctionConfig(
328
+ valid_inputs={
329
+ DataType.ARRAY,
330
+ },
331
+ output_purpose=Purpose.PROPERTY,
332
+ output_type_function=lambda args: get_output_type_at_index(args, 0),
333
+ arg_count=1,
334
+ ),
335
+ FunctionType.ARRAY_SORT: FunctionConfig(
336
+ valid_inputs=[
337
+ {DataType.ARRAY},
338
+ {DataType.STRING},
339
+ ],
340
+ output_purpose=Purpose.PROPERTY,
341
+ output_type_function=lambda args: get_output_type_at_index(args, 0),
342
+ arg_count=2,
343
+ ),
344
+ FunctionType.ARRAY_TRANSFORM: FunctionConfig(
345
+ valid_inputs=[
346
+ {
347
+ DataType.ARRAY,
348
+ },
349
+ {*DataType},
350
+ {*DataType},
351
+ ],
352
+ output_purpose=Purpose.PROPERTY,
353
+ output_type_function=get_transform_output_type,
354
+ arg_count=3,
355
+ ),
356
+ FunctionType.ARRAY_FILTER: FunctionConfig(
357
+ valid_inputs=[
358
+ {
359
+ DataType.ARRAY,
360
+ },
361
+ {*DataType},
362
+ {*DataType},
363
+ ],
364
+ output_purpose=Purpose.PROPERTY,
365
+ output_type_function=get_transform_output_type,
366
+ arg_count=3,
367
+ ),
368
+ FunctionType.ARRAY_TO_STRING: FunctionConfig(
369
+ valid_inputs={
370
+ DataType.ARRAY,
371
+ DataType.STRING,
372
+ },
373
+ output_purpose=Purpose.PROPERTY,
374
+ output_type=DataType.STRING,
375
+ arg_count=2,
376
+ ),
377
+ FunctionType.ARRAY_SUM: FunctionConfig(
378
+ valid_inputs={
379
+ DataType.ARRAY,
380
+ },
381
+ output_purpose=Purpose.PROPERTY,
382
+ output_type_function=get_index_output_type,
383
+ arg_count=1,
384
+ ),
385
+ FunctionType.MAP_ACCESS: FunctionConfig(
386
+ valid_inputs=[
387
+ {
388
+ DataType.MAP,
389
+ },
390
+ {
391
+ DataType.INTEGER,
392
+ DataType.STRING,
393
+ },
394
+ ],
395
+ output_purpose=Purpose.PROPERTY,
396
+ output_type_function=get_index_output_type,
397
+ arg_count=2,
398
+ ),
399
+ FunctionType.ATTR_ACCESS: FunctionConfig(
400
+ valid_inputs=[
401
+ {DataType.STRUCT},
402
+ {
403
+ DataType.STRING,
404
+ },
405
+ ],
406
+ output_purpose=Purpose.PROPERTY,
407
+ output_type_function=get_attr_datatype,
408
+ arg_count=2,
409
+ ),
410
+ FunctionType.ABS: FunctionConfig(
411
+ valid_inputs={DataType.INTEGER, DataType.FLOAT, DataType.NUMBER},
412
+ output_purpose=Purpose.PROPERTY,
413
+ arg_count=1,
414
+ ),
415
+ FunctionType.NULLIF: FunctionConfig(
416
+ valid_inputs={*DataType},
417
+ output_purpose=Purpose.PROPERTY,
418
+ output_type_function=lambda args: get_output_type_at_index(args, 0),
419
+ arg_count=2,
420
+ ),
421
+ FunctionType.COALESCE: FunctionConfig(
422
+ valid_inputs={*DataType},
423
+ output_purpose=Purpose.PROPERTY,
424
+ output_type=DataType.INTEGER,
425
+ arg_count=-1,
426
+ output_type_function=get_coalesce_output_type,
427
+ ),
428
+ FunctionType.CURRENT_DATE: FunctionConfig(
429
+ output_purpose=Purpose.CONSTANT,
430
+ output_type=DataType.DATE,
431
+ arg_count=0,
432
+ ),
433
+ FunctionType.CURRENT_DATETIME: FunctionConfig(
434
+ output_purpose=Purpose.CONSTANT,
435
+ output_type=DataType.DATETIME,
436
+ arg_count=0,
437
+ ),
438
+ FunctionType.CURRENT_TIMESTAMP: FunctionConfig(
439
+ output_purpose=Purpose.CONSTANT,
440
+ output_type=DataType.TIMESTAMP,
441
+ arg_count=0,
442
+ ),
443
+ FunctionType.BOOL: FunctionConfig(
444
+ output_purpose=Purpose.PROPERTY,
445
+ output_type=DataType.BOOL,
446
+ arg_count=1,
447
+ ),
448
+ FunctionType.STRPOS: FunctionConfig(
449
+ valid_inputs=[
450
+ {DataType.STRING},
451
+ {DataType.STRING},
452
+ ],
453
+ output_purpose=Purpose.PROPERTY,
454
+ output_type=DataType.INTEGER,
455
+ arg_count=2,
456
+ ),
457
+ FunctionType.REPLACE: FunctionConfig(
458
+ valid_inputs=[
459
+ {DataType.STRING},
460
+ {DataType.STRING},
461
+ {DataType.STRING},
462
+ ],
463
+ output_purpose=Purpose.PROPERTY,
464
+ output_type=DataType.STRING,
465
+ arg_count=3,
466
+ ),
467
+ FunctionType.CONTAINS: FunctionConfig(
468
+ valid_inputs=[
469
+ {DataType.STRING},
470
+ {DataType.STRING},
471
+ ],
472
+ output_purpose=Purpose.PROPERTY,
473
+ output_type=DataType.BOOL,
474
+ arg_count=2,
475
+ ),
476
+ FunctionType.TRIM: FunctionConfig(
477
+ valid_inputs=[
478
+ {DataType.STRING},
479
+ {DataType.STRING},
480
+ ],
481
+ output_purpose=Purpose.PROPERTY,
482
+ output_type=DataType.STRING,
483
+ arg_count=2,
484
+ ),
485
+ FunctionType.SUBSTRING: FunctionConfig(
486
+ valid_inputs=[{DataType.STRING}, {DataType.INTEGER}, {DataType.INTEGER}],
487
+ output_purpose=Purpose.PROPERTY,
488
+ output_type=DataType.STRING,
489
+ arg_count=3,
490
+ ),
491
+ FunctionType.UNION: FunctionConfig(
492
+ valid_inputs={*DataType},
493
+ output_purpose=Purpose.KEY,
494
+ arg_count=-1,
495
+ ),
496
+ FunctionType.LIKE: FunctionConfig(
497
+ valid_inputs={DataType.STRING},
498
+ output_purpose=Purpose.PROPERTY,
499
+ output_type=DataType.BOOL,
500
+ arg_count=2,
501
+ ),
502
+ FunctionType.ILIKE: FunctionConfig(
503
+ valid_inputs={DataType.STRING},
504
+ output_purpose=Purpose.PROPERTY,
505
+ output_type=DataType.BOOL,
506
+ arg_count=2,
507
+ ),
508
+ FunctionType.UPPER: FunctionConfig(
509
+ valid_inputs={DataType.STRING},
510
+ output_purpose=Purpose.PROPERTY,
511
+ output_type=DataType.STRING,
512
+ arg_count=1,
513
+ ),
514
+ FunctionType.LOWER: FunctionConfig(
515
+ valid_inputs={DataType.STRING},
516
+ output_purpose=Purpose.PROPERTY,
517
+ output_type=DataType.STRING,
518
+ arg_count=1,
519
+ ),
520
+ FunctionType.REGEXP_CONTAINS: FunctionConfig(
521
+ valid_inputs={DataType.STRING},
522
+ output_purpose=Purpose.PROPERTY,
523
+ output_type=DataType.BOOL,
524
+ arg_count=2,
525
+ ),
526
+ FunctionType.REGEXP_EXTRACT: FunctionConfig(
527
+ valid_inputs=[{DataType.STRING}, {DataType.STRING}, {DataType.INTEGER}],
528
+ output_purpose=Purpose.PROPERTY,
529
+ output_type=DataType.STRING,
530
+ arg_count=3,
531
+ ),
532
+ FunctionType.REGEXP_REPLACE: FunctionConfig(
533
+ valid_inputs={DataType.STRING},
534
+ output_purpose=Purpose.PROPERTY,
535
+ output_type=DataType.STRING,
536
+ arg_count=3,
537
+ ),
538
+ FunctionType.DATE: FunctionConfig(
539
+ valid_inputs={
540
+ DataType.DATE,
541
+ DataType.TIMESTAMP,
542
+ DataType.DATETIME,
543
+ DataType.STRING,
544
+ },
545
+ output_purpose=Purpose.PROPERTY,
546
+ output_type=DataType.DATE,
547
+ arg_count=1,
548
+ ),
549
+ FunctionType.DATE_TRUNCATE: FunctionConfig(
550
+ valid_inputs=[
551
+ {
552
+ DataType.DATE,
553
+ DataType.TIMESTAMP,
554
+ DataType.DATETIME,
555
+ DataType.STRING,
556
+ },
557
+ {DataType.DATE_PART},
558
+ ],
559
+ output_purpose=Purpose.PROPERTY,
560
+ # output_type=DataType.DATE,
561
+ output_type_function=get_date_trunc_output,
562
+ arg_count=2,
563
+ ),
564
+ FunctionType.DATE_PART: FunctionConfig(
565
+ valid_inputs=[
566
+ {
567
+ DataType.DATE,
568
+ DataType.TIMESTAMP,
569
+ DataType.DATETIME,
570
+ DataType.STRING,
571
+ },
572
+ {DataType.DATE_PART},
573
+ ],
574
+ output_purpose=Purpose.PROPERTY,
575
+ output_type_function=get_date_part_output,
576
+ arg_count=2,
577
+ ),
578
+ FunctionType.DATE_ADD: FunctionConfig(
579
+ valid_inputs=[
580
+ {
581
+ DataType.DATE,
582
+ DataType.TIMESTAMP,
583
+ DataType.DATETIME,
584
+ DataType.STRING,
585
+ },
586
+ {DataType.DATE_PART},
587
+ {DataType.INTEGER},
588
+ ],
589
+ output_purpose=Purpose.PROPERTY,
590
+ output_type=DataType.DATE,
591
+ arg_count=3,
592
+ ),
593
+ FunctionType.DATE_SUB: FunctionConfig(
594
+ valid_inputs=[
595
+ {
596
+ DataType.DATE,
597
+ DataType.TIMESTAMP,
598
+ DataType.DATETIME,
599
+ DataType.STRING,
600
+ },
601
+ {DataType.DATE_PART},
602
+ {DataType.INTEGER},
603
+ ],
604
+ output_purpose=Purpose.PROPERTY,
605
+ output_type=DataType.DATE,
606
+ arg_count=3,
607
+ ),
608
+ FunctionType.DATE_DIFF: FunctionConfig(
609
+ valid_inputs=[
610
+ {
611
+ DataType.DATE,
612
+ DataType.TIMESTAMP,
613
+ DataType.DATETIME,
614
+ DataType.STRING,
615
+ },
616
+ {
617
+ DataType.DATE,
618
+ DataType.TIMESTAMP,
619
+ DataType.DATETIME,
620
+ DataType.STRING,
621
+ },
622
+ {DataType.DATE_PART},
623
+ ],
624
+ output_purpose=Purpose.PROPERTY,
625
+ output_type=DataType.INTEGER,
626
+ arg_count=3,
627
+ ),
628
+ FunctionType.DATETIME: FunctionConfig(
629
+ valid_inputs={
630
+ DataType.DATE,
631
+ DataType.TIMESTAMP,
632
+ DataType.DATETIME,
633
+ DataType.STRING,
634
+ },
635
+ output_purpose=Purpose.PROPERTY,
636
+ output_type=DataType.DATETIME,
637
+ arg_count=1,
638
+ ),
639
+ FunctionType.TIMESTAMP: FunctionConfig(
640
+ valid_inputs={
641
+ DataType.DATE,
642
+ DataType.TIMESTAMP,
643
+ DataType.DATETIME,
644
+ DataType.STRING,
645
+ },
646
+ output_purpose=Purpose.PROPERTY,
647
+ output_type=DataType.TIMESTAMP,
648
+ arg_count=1,
649
+ ),
650
+ FunctionType.SECOND: FunctionConfig(
651
+ valid_inputs={
652
+ DataType.DATE,
653
+ DataType.TIMESTAMP,
654
+ DataType.DATETIME,
655
+ DataType.STRING,
656
+ },
657
+ output_purpose=Purpose.PROPERTY,
658
+ output_type=TraitDataType(type=DataType.INTEGER, traits=["second"]),
659
+ arg_count=1,
660
+ ),
661
+ FunctionType.MINUTE: FunctionConfig(
662
+ valid_inputs={
663
+ DataType.DATE,
664
+ DataType.TIMESTAMP,
665
+ DataType.DATETIME,
666
+ DataType.STRING,
667
+ },
668
+ output_purpose=Purpose.PROPERTY,
669
+ output_type=TraitDataType(type=DataType.INTEGER, traits=["minute"]),
670
+ arg_count=1,
671
+ ),
672
+ FunctionType.HOUR: FunctionConfig(
673
+ valid_inputs={
674
+ DataType.DATE,
675
+ DataType.TIMESTAMP,
676
+ DataType.DATETIME,
677
+ DataType.STRING,
678
+ },
679
+ output_purpose=Purpose.PROPERTY,
680
+ output_type=TraitDataType(type=DataType.INTEGER, traits=["hour"]),
681
+ arg_count=1,
682
+ ),
683
+ FunctionType.DAY: FunctionConfig(
684
+ valid_inputs={
685
+ DataType.DATE,
686
+ DataType.TIMESTAMP,
687
+ DataType.DATETIME,
688
+ DataType.STRING,
689
+ },
690
+ output_purpose=Purpose.PROPERTY,
691
+ output_type=TraitDataType(type=DataType.INTEGER, traits=["day"]),
692
+ arg_count=1,
693
+ ),
694
+ FunctionType.DAY_NAME: FunctionConfig(
695
+ valid_inputs={
696
+ DataType.DATE,
697
+ DataType.TIMESTAMP,
698
+ DataType.DATETIME,
699
+ # DataType.STRING,
700
+ },
701
+ output_purpose=Purpose.PROPERTY,
702
+ output_type=TraitDataType(type=DataType.STRING, traits=["day_name"]),
703
+ arg_count=1,
704
+ ),
705
+ FunctionType.WEEK: FunctionConfig(
706
+ valid_inputs={
707
+ DataType.DATE,
708
+ DataType.TIMESTAMP,
709
+ DataType.DATETIME,
710
+ DataType.STRING,
711
+ },
712
+ output_purpose=Purpose.PROPERTY,
713
+ output_type=TraitDataType(type=DataType.INTEGER, traits=["week"]),
714
+ arg_count=1,
715
+ ),
716
+ FunctionType.MONTH: FunctionConfig(
717
+ valid_inputs={
718
+ DataType.DATE,
719
+ DataType.TIMESTAMP,
720
+ DataType.DATETIME,
721
+ DataType.STRING,
722
+ },
723
+ output_purpose=Purpose.PROPERTY,
724
+ output_type=TraitDataType(type=DataType.INTEGER, traits=["month"]),
725
+ arg_count=1,
726
+ ),
727
+ FunctionType.MONTH_NAME: FunctionConfig(
728
+ valid_inputs={
729
+ DataType.DATE,
730
+ DataType.TIMESTAMP,
731
+ DataType.DATETIME,
732
+ # DataType.STRING,
733
+ },
734
+ output_purpose=Purpose.PROPERTY,
735
+ output_type=TraitDataType(type=DataType.STRING, traits=["month_name"]),
736
+ arg_count=1,
737
+ ),
738
+ FunctionType.QUARTER: FunctionConfig(
739
+ valid_inputs={
740
+ DataType.DATE,
741
+ DataType.TIMESTAMP,
742
+ DataType.DATETIME,
743
+ DataType.STRING,
744
+ },
745
+ output_purpose=Purpose.PROPERTY,
746
+ output_type=TraitDataType(type=DataType.INTEGER, traits=["quarter"]),
747
+ arg_count=1,
748
+ ),
749
+ FunctionType.YEAR: FunctionConfig(
750
+ valid_inputs={
751
+ DataType.DATE,
752
+ DataType.TIMESTAMP,
753
+ DataType.DATETIME,
754
+ DataType.STRING,
755
+ },
756
+ output_purpose=Purpose.PROPERTY,
757
+ output_type=TraitDataType(type=DataType.INTEGER, traits=["year"]),
758
+ arg_count=1,
759
+ ),
760
+ FunctionType.DAY_OF_WEEK: FunctionConfig(
761
+ valid_inputs={
762
+ DataType.DATE,
763
+ DataType.TIMESTAMP,
764
+ DataType.DATETIME,
765
+ DataType.STRING,
766
+ },
767
+ output_purpose=Purpose.PROPERTY,
768
+ output_type=TraitDataType(type=DataType.INTEGER, traits=["day_of_week"]),
769
+ arg_count=1,
770
+ ),
771
+ FunctionType.ADD: FunctionConfig(
772
+ valid_inputs={
773
+ DataType.INTEGER,
774
+ DataType.FLOAT,
775
+ DataType.NUMBER,
776
+ DataType.NUMERIC,
777
+ },
778
+ output_purpose=Purpose.PROPERTY,
779
+ arg_count=InfiniteFunctionArgs,
780
+ ),
781
+ FunctionType.SUBTRACT: FunctionConfig(
782
+ valid_inputs={
783
+ DataType.INTEGER,
784
+ DataType.FLOAT,
785
+ DataType.NUMBER,
786
+ DataType.NUMERIC,
787
+ },
788
+ output_purpose=Purpose.PROPERTY,
789
+ arg_count=InfiniteFunctionArgs,
790
+ ),
791
+ FunctionType.MULTIPLY: FunctionConfig(
792
+ valid_inputs={
793
+ DataType.INTEGER,
794
+ DataType.FLOAT,
795
+ DataType.NUMBER,
796
+ DataType.NUMERIC,
797
+ },
798
+ output_purpose=Purpose.PROPERTY,
799
+ arg_count=InfiniteFunctionArgs,
800
+ ),
801
+ FunctionType.POWER: FunctionConfig(
802
+ valid_inputs={
803
+ DataType.INTEGER,
804
+ DataType.FLOAT,
805
+ DataType.NUMBER,
806
+ DataType.NUMERIC,
807
+ },
808
+ output_purpose=Purpose.PROPERTY,
809
+ arg_count=2,
810
+ ),
811
+ FunctionType.DIVIDE: FunctionConfig(
812
+ valid_inputs={
813
+ DataType.INTEGER,
814
+ DataType.FLOAT,
815
+ DataType.NUMBER,
816
+ DataType.NUMERIC,
817
+ },
818
+ output_purpose=Purpose.PROPERTY,
819
+ arg_count=InfiniteFunctionArgs,
820
+ ),
821
+ FunctionType.MOD: FunctionConfig(
822
+ valid_inputs=[
823
+ {DataType.INTEGER, DataType.FLOAT, DataType.NUMBER, DataType.NUMERIC},
824
+ {DataType.INTEGER},
825
+ ],
826
+ output_purpose=Purpose.PROPERTY,
827
+ output_type=DataType.INTEGER,
828
+ arg_count=2,
829
+ ),
830
+ FunctionType.SQRT: FunctionConfig(
831
+ valid_inputs=[
832
+ {DataType.INTEGER, DataType.FLOAT, DataType.NUMBER, DataType.NUMERIC},
833
+ ],
834
+ output_purpose=Purpose.PROPERTY,
835
+ output_type=DataType.INTEGER,
836
+ arg_count=1,
837
+ ),
838
+ FunctionType.LOG: FunctionConfig(
839
+ valid_inputs=[
840
+ {DataType.INTEGER, DataType.FLOAT, DataType.NUMBER, DataType.NUMERIC},
841
+ {DataType.INTEGER},
842
+ ],
843
+ output_purpose=Purpose.PROPERTY,
844
+ output_type=DataType.FLOAT,
845
+ arg_count=2,
846
+ ),
847
+ FunctionType.RANDOM: FunctionConfig(
848
+ valid_inputs=[],
849
+ output_purpose=Purpose.PROPERTY,
850
+ output_type=DataType.FLOAT,
851
+ arg_count=1,
852
+ ),
853
+ FunctionType.ROUND: FunctionConfig(
854
+ valid_inputs=[
855
+ {DataType.INTEGER, DataType.FLOAT, DataType.NUMBER, DataType.NUMERIC},
856
+ {DataType.INTEGER},
857
+ ],
858
+ output_purpose=Purpose.PROPERTY,
859
+ output_type_function=lambda args: get_output_type_at_index(args, 0),
860
+ arg_count=2,
861
+ ),
862
+ FunctionType.FLOOR: FunctionConfig(
863
+ valid_inputs=[
864
+ {DataType.INTEGER, DataType.FLOAT, DataType.NUMBER, DataType.NUMERIC},
865
+ ],
866
+ output_purpose=Purpose.PROPERTY,
867
+ output_type=DataType.INTEGER,
868
+ arg_count=1,
869
+ ),
870
+ FunctionType.CEIL: FunctionConfig(
871
+ valid_inputs=[
872
+ {DataType.INTEGER, DataType.FLOAT, DataType.NUMBER, DataType.NUMERIC},
873
+ ],
874
+ output_purpose=Purpose.PROPERTY,
875
+ output_type=DataType.INTEGER,
876
+ arg_count=1,
877
+ ),
878
+ FunctionType.CUSTOM: FunctionConfig(
879
+ output_purpose=Purpose.PROPERTY,
880
+ arg_count=InfiniteFunctionArgs,
881
+ ),
882
+ FunctionType.CASE: FunctionConfig(
883
+ output_purpose=Purpose.PROPERTY,
884
+ output_type_function=validate_case_output,
885
+ arg_count=InfiniteFunctionArgs,
886
+ ),
887
+ FunctionType.CAST: FunctionConfig(
888
+ output_purpose=Purpose.PROPERTY,
889
+ arg_count=2,
890
+ output_type_function=get_cast_output_type,
891
+ ),
892
+ FunctionType.CONCAT: FunctionConfig(
893
+ valid_inputs={DataType.STRING},
894
+ output_purpose=Purpose.PROPERTY,
895
+ output_type=DataType.STRING,
896
+ arg_count=InfiniteFunctionArgs,
897
+ ),
898
+ FunctionType.CONSTANT: FunctionConfig(
899
+ output_purpose=Purpose.CONSTANT,
900
+ arg_count=1,
901
+ ),
902
+ FunctionType.TYPED_CONSTANT: FunctionConfig(
903
+ output_purpose=Purpose.CONSTANT,
904
+ output_type_function=get_cast_output_type,
905
+ arg_count=2,
906
+ ),
907
+ FunctionType.IS_NULL: FunctionConfig(
908
+ output_purpose=Purpose.PROPERTY,
909
+ output_type=DataType.BOOL,
910
+ arg_count=1,
911
+ ),
912
+ FunctionType.STRUCT: FunctionConfig(
913
+ output_purpose=Purpose.PROPERTY,
914
+ arg_count=InfiniteFunctionArgs,
915
+ output_type_function=create_struct_output,
916
+ ),
917
+ FunctionType.ARRAY: FunctionConfig(
918
+ output_purpose=Purpose.PROPERTY,
919
+ arg_count=InfiniteFunctionArgs,
920
+ output_type=ArrayType(type=DataType.STRING),
921
+ ),
922
+ FunctionType.LENGTH: FunctionConfig(
923
+ valid_inputs={DataType.STRING, DataType.ARRAY, DataType.MAP},
924
+ output_purpose=Purpose.PROPERTY,
925
+ output_type=DataType.INTEGER,
926
+ arg_count=1,
927
+ ),
928
+ FunctionType.SUM: FunctionConfig(
929
+ valid_inputs={
930
+ DataType.INTEGER,
931
+ DataType.FLOAT,
932
+ DataType.NUMBER,
933
+ DataType.NUMERIC,
934
+ DataType.BOOL,
935
+ },
936
+ output_purpose=Purpose.METRIC,
937
+ arg_count=1,
938
+ ),
939
+ FunctionType.ARRAY_AGG: FunctionConfig(
940
+ valid_inputs={*DataType},
941
+ output_purpose=Purpose.METRIC,
942
+ output_type_function=lambda args: ArrayType(
943
+ type=merge_datatypes([arg_to_datatype(x) for x in args])
944
+ ),
945
+ arg_count=1,
946
+ ),
947
+ FunctionType.ANY: FunctionConfig(
948
+ valid_inputs={*DataType},
949
+ output_purpose=Purpose.PROPERTY,
950
+ arg_count=1,
951
+ ),
952
+ FunctionType.BOOL_AND: FunctionConfig(
953
+ valid_inputs={DataType.BOOL},
954
+ output_purpose=Purpose.METRIC,
955
+ output_type=DataType.BOOL,
956
+ arg_count=1,
957
+ ),
958
+ FunctionType.BOOL_OR: FunctionConfig(
959
+ valid_inputs={DataType.BOOL},
960
+ output_purpose=Purpose.METRIC,
961
+ output_type=DataType.BOOL,
962
+ arg_count=1,
963
+ ),
964
+ FunctionType.AVG: FunctionConfig(
965
+ valid_inputs={
966
+ DataType.INTEGER,
967
+ DataType.FLOAT,
968
+ DataType.NUMBER,
969
+ DataType.NUMERIC,
970
+ },
971
+ output_purpose=Purpose.METRIC,
972
+ arg_count=1,
973
+ ),
974
+ FunctionType.UNIX_TO_TIMESTAMP: FunctionConfig(
975
+ valid_inputs={DataType.INTEGER},
976
+ output_purpose=Purpose.PROPERTY,
977
+ output_type=DataType.TIMESTAMP,
978
+ arg_count=1,
979
+ ),
980
+ FunctionType.HASH: FunctionConfig(
981
+ valid_inputs={
982
+ DataType.STRING,
983
+ },
984
+ output_purpose=Purpose.PROPERTY,
985
+ output_type=DataType.STRING,
986
+ arg_count=2,
987
+ ),
988
+ FunctionType.GEO_POINT: FunctionConfig(
989
+ valid_inputs={DataType.NUMERIC},
990
+ output_purpose=Purpose.PROPERTY,
991
+ output_type=DataType.GEOGRAPHY,
992
+ arg_count=2,
993
+ ),
994
+ FunctionType.GEO_DISTANCE: FunctionConfig(
995
+ valid_inputs={DataType.GEOGRAPHY},
996
+ output_purpose=Purpose.PROPERTY,
997
+ output_type=DataType.NUMERIC,
998
+ arg_count=2,
999
+ ),
1000
+ }
1001
+
1002
+ EXCLUDED_FUNCTIONS = {
1003
+ FunctionType.CUSTOM,
1004
+ # Temporary
1005
+ FunctionType.DATE_LITERAL,
1006
+ FunctionType.DATETIME_LITERAL,
1007
+ FunctionType.ARRAY,
1008
+ }
1009
+
1010
+ for k in FunctionType.__members__.values():
1011
+ if k not in FUNCTION_REGISTRY and k not in EXCLUDED_FUNCTIONS:
1012
+ raise InvalidSyntaxException(
1013
+ f"Function enum value {k} not in creation registry"
1014
+ )
1015
+
1016
+
1017
+ class FunctionFactory:
1018
+ def __init__(self, environment: Environment | None = None):
1019
+ self.environment = environment
1020
+
1021
+ def create_function(
1022
+ self,
1023
+ args: list[Any],
1024
+ operator: FunctionType,
1025
+ meta: Meta | None = None,
1026
+ ):
1027
+ if operator not in FUNCTION_REGISTRY:
1028
+ raise ValueError(f"Function {operator} not in registry")
1029
+ config = FUNCTION_REGISTRY[operator]
1030
+ valid_inputs: set[DataType] | list[set[DataType]] = config.valid_inputs or set(
1031
+ DataType
1032
+ )
1033
+ output_purpose = config.output_purpose
1034
+ base_output_type = config.output_type
1035
+ arg_count = config.arg_count
1036
+
1037
+ if args:
1038
+ if not self.environment:
1039
+ raise ValueError("Environment required for function creation with args")
1040
+ # TODO: remove this dependency
1041
+ from trilogy.parsing.common import process_function_args
1042
+
1043
+ full_args = process_function_args(
1044
+ args, environment=self.environment, meta=meta
1045
+ )
1046
+ else:
1047
+ full_args = []
1048
+ final_output_type: CONCRETE_TYPES
1049
+ if config.output_type_function:
1050
+
1051
+ final_output_type = config.output_type_function(full_args)
1052
+ elif not base_output_type:
1053
+
1054
+ final_output_type = merge_datatypes([arg_to_datatype(x) for x in full_args])
1055
+ elif base_output_type:
1056
+ final_output_type = base_output_type
1057
+ else:
1058
+ raise SyntaxError(f"Could not determine output type for {operator}")
1059
+ if isinstance(final_output_type, TraitDataType) and self.environment:
1060
+ final_output_type = TraitDataType(
1061
+ type=final_output_type.type,
1062
+ traits=[
1063
+ x
1064
+ for x in final_output_type.traits
1065
+ if operator
1066
+ not in self.environment.data_types.get(
1067
+ x, CUSTOM_PLACEHOLDER
1068
+ ).drop_on
1069
+ ],
1070
+ )
1071
+
1072
+ if not output_purpose:
1073
+ if operator in FunctionClass.AGGREGATE_FUNCTIONS.value:
1074
+ output_purpose = Purpose.METRIC
1075
+ else:
1076
+ output_purpose = Purpose.PROPERTY
1077
+
1078
+ return Function(
1079
+ operator=operator,
1080
+ arguments=full_args, # type: ignore
1081
+ output_datatype=final_output_type,
1082
+ output_purpose=output_purpose,
1083
+ valid_inputs=valid_inputs,
1084
+ arg_count=arg_count,
1085
+ )
1086
+
1087
+
1088
+ def create_function_derived_concept(
1089
+ name: str,
1090
+ namespace: str,
1091
+ operator: FunctionType,
1092
+ arguments: list[Concept],
1093
+ environment: Environment,
1094
+ output_type: Optional[
1095
+ DataType | ArrayType | StructType | MapType | NumericType | TraitDataType
1096
+ ] = None,
1097
+ output_purpose: Optional[Purpose] = None,
1098
+ ) -> Concept:
1099
+ purpose = (
1100
+ function_args_to_output_purpose(arguments, environment=environment)
1101
+ if output_purpose is None
1102
+ else output_purpose
1103
+ )
1104
+ output_type = arg_to_datatype(arguments[0]) if output_type is None else output_type
1105
+ return Concept(
1106
+ name=name,
1107
+ namespace=namespace,
1108
+ datatype=output_type,
1109
+ purpose=purpose,
1110
+ lineage=Function(
1111
+ operator=operator,
1112
+ arguments=[x.reference for x in arguments],
1113
+ output_datatype=output_type,
1114
+ output_purpose=purpose,
1115
+ arg_count=len(arguments),
1116
+ ),
1117
+ )
1118
+
1119
+
1120
+ def argument_to_purpose(arg) -> Purpose:
1121
+ if isinstance(arg, UndefinedConcept):
1122
+ return Purpose.UNKNOWN
1123
+ if isinstance(arg, Function):
1124
+ return arg.output_purpose
1125
+ elif isinstance(arg, AggregateWrapper):
1126
+ base = arg.function.output_purpose
1127
+ if arg.by and base == Purpose.METRIC:
1128
+ return Purpose.PROPERTY
1129
+ return arg.function.output_purpose
1130
+ elif isinstance(arg, Parenthetical):
1131
+ return argument_to_purpose(arg.content)
1132
+ elif isinstance(arg, WindowItem):
1133
+ return Purpose.PROPERTY
1134
+ elif isinstance(arg, Conditional):
1135
+ return Purpose.PROPERTY
1136
+ elif isinstance(arg, Concept):
1137
+ base = arg.purpose
1138
+ if (
1139
+ isinstance(arg.lineage, AggregateWrapper)
1140
+ and arg.lineage.by
1141
+ and base == Purpose.METRIC
1142
+ ):
1143
+ return Purpose.PROPERTY
1144
+ return arg.purpose
1145
+ elif isinstance(arg, (int, float, str, bool, list, NumericType, DataType)):
1146
+ return Purpose.CONSTANT
1147
+ elif isinstance(arg, DatePart):
1148
+ return Purpose.CONSTANT
1149
+ elif isinstance(arg, MagicConstants):
1150
+ return Purpose.CONSTANT
1151
+ else:
1152
+ raise ValueError(f"Cannot parse arg purpose for {arg} of type {type(arg)}")
1153
+
1154
+
1155
+ def function_args_to_output_purpose(args, environment: Environment) -> Purpose:
1156
+ has_metric = False
1157
+ has_non_constant = False
1158
+ has_non_single_row_constant = False
1159
+ if not args:
1160
+ return Purpose.CONSTANT
1161
+ for arg in args:
1162
+ if isinstance(arg, ConceptRef):
1163
+ arg = environment.concepts[arg.address]
1164
+ purpose = argument_to_purpose(arg)
1165
+ if purpose == Purpose.METRIC:
1166
+ has_metric = True
1167
+ if purpose != Purpose.CONSTANT:
1168
+ has_non_constant = True
1169
+ if isinstance(arg, Concept) and arg.granularity != Granularity.SINGLE_ROW:
1170
+ has_non_single_row_constant = True
1171
+ if args and not has_non_constant and not has_non_single_row_constant:
1172
+ return Purpose.CONSTANT
1173
+ if has_metric:
1174
+ return Purpose.METRIC
1175
+ return Purpose.PROPERTY
1176
+
1177
+
1178
+ def Count(args: list[Concept], environment: Environment) -> Function:
1179
+ return FunctionFactory(environment).create_function(
1180
+ args=args, operator=FunctionType.COUNT
1181
+ )
1182
+
1183
+
1184
+ def CountDistinct(args: list[Concept], environment: Environment) -> Function:
1185
+ return FunctionFactory(environment).create_function(
1186
+ args=args, operator=FunctionType.COUNT
1187
+ )
1188
+
1189
+
1190
+ def Max(args: list[Concept], environment: Environment) -> Function:
1191
+ return FunctionFactory(environment).create_function(
1192
+ args=args, operator=FunctionType.COUNT
1193
+ )
1194
+
1195
+
1196
+ def Min(args: list[Concept], environment: Environment) -> Function:
1197
+ return FunctionFactory(environment).create_function(
1198
+ args=args, operator=FunctionType.COUNT
1199
+ )
1200
+
1201
+
1202
+ def Split(args: list[Concept], environment: Environment) -> Function:
1203
+ return FunctionFactory(environment).create_function(
1204
+ args=args, operator=FunctionType.SPLIT
1205
+ )
1206
+
1207
+
1208
+ def AttrAccess(args: list[ConceptRef | str | int], environment: Environment):
1209
+ return FunctionFactory(environment).create_function(
1210
+ args=args, operator=FunctionType.ATTR_ACCESS
1211
+ )
1212
+
1213
+
1214
+ def CurrentDate(
1215
+ args: list[Concept], environment: Environment | None = None
1216
+ ) -> Function:
1217
+ return FunctionFactory(environment).create_function(
1218
+ args=args, operator=FunctionType.CURRENT_DATE
1219
+ )
1220
+
1221
+
1222
+ def CurrentDatetime(
1223
+ args: list[Concept], environment: Environment | None = None
1224
+ ) -> Function:
1225
+ return FunctionFactory(environment).create_function(
1226
+ args=args, operator=FunctionType.CURRENT_DATETIME
1227
+ )