pytrilogy 0.3.142__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (200) hide show
  1. LICENSE.md +19 -0
  2. _preql_import_resolver/__init__.py +5 -0
  3. _preql_import_resolver/_preql_import_resolver.cp312-win_amd64.pyd +0 -0
  4. pytrilogy-0.3.142.dist-info/METADATA +555 -0
  5. pytrilogy-0.3.142.dist-info/RECORD +200 -0
  6. pytrilogy-0.3.142.dist-info/WHEEL +4 -0
  7. pytrilogy-0.3.142.dist-info/entry_points.txt +2 -0
  8. pytrilogy-0.3.142.dist-info/licenses/LICENSE.md +19 -0
  9. trilogy/__init__.py +16 -0
  10. trilogy/ai/README.md +10 -0
  11. trilogy/ai/__init__.py +19 -0
  12. trilogy/ai/constants.py +92 -0
  13. trilogy/ai/conversation.py +107 -0
  14. trilogy/ai/enums.py +7 -0
  15. trilogy/ai/execute.py +50 -0
  16. trilogy/ai/models.py +34 -0
  17. trilogy/ai/prompts.py +100 -0
  18. trilogy/ai/providers/__init__.py +0 -0
  19. trilogy/ai/providers/anthropic.py +106 -0
  20. trilogy/ai/providers/base.py +24 -0
  21. trilogy/ai/providers/google.py +146 -0
  22. trilogy/ai/providers/openai.py +89 -0
  23. trilogy/ai/providers/utils.py +68 -0
  24. trilogy/authoring/README.md +3 -0
  25. trilogy/authoring/__init__.py +148 -0
  26. trilogy/constants.py +113 -0
  27. trilogy/core/README.md +52 -0
  28. trilogy/core/__init__.py +0 -0
  29. trilogy/core/constants.py +6 -0
  30. trilogy/core/enums.py +443 -0
  31. trilogy/core/env_processor.py +120 -0
  32. trilogy/core/environment_helpers.py +320 -0
  33. trilogy/core/ergonomics.py +193 -0
  34. trilogy/core/exceptions.py +123 -0
  35. trilogy/core/functions.py +1227 -0
  36. trilogy/core/graph_models.py +139 -0
  37. trilogy/core/internal.py +85 -0
  38. trilogy/core/models/__init__.py +0 -0
  39. trilogy/core/models/author.py +2669 -0
  40. trilogy/core/models/build.py +2521 -0
  41. trilogy/core/models/build_environment.py +180 -0
  42. trilogy/core/models/core.py +501 -0
  43. trilogy/core/models/datasource.py +322 -0
  44. trilogy/core/models/environment.py +751 -0
  45. trilogy/core/models/execute.py +1177 -0
  46. trilogy/core/optimization.py +251 -0
  47. trilogy/core/optimizations/__init__.py +12 -0
  48. trilogy/core/optimizations/base_optimization.py +17 -0
  49. trilogy/core/optimizations/hide_unused_concept.py +47 -0
  50. trilogy/core/optimizations/inline_datasource.py +102 -0
  51. trilogy/core/optimizations/predicate_pushdown.py +245 -0
  52. trilogy/core/processing/README.md +94 -0
  53. trilogy/core/processing/READMEv2.md +121 -0
  54. trilogy/core/processing/VIRTUAL_UNNEST.md +30 -0
  55. trilogy/core/processing/__init__.py +0 -0
  56. trilogy/core/processing/concept_strategies_v3.py +508 -0
  57. trilogy/core/processing/constants.py +15 -0
  58. trilogy/core/processing/discovery_node_factory.py +451 -0
  59. trilogy/core/processing/discovery_utility.py +548 -0
  60. trilogy/core/processing/discovery_validation.py +167 -0
  61. trilogy/core/processing/graph_utils.py +43 -0
  62. trilogy/core/processing/node_generators/README.md +9 -0
  63. trilogy/core/processing/node_generators/__init__.py +31 -0
  64. trilogy/core/processing/node_generators/basic_node.py +160 -0
  65. trilogy/core/processing/node_generators/common.py +268 -0
  66. trilogy/core/processing/node_generators/constant_node.py +38 -0
  67. trilogy/core/processing/node_generators/filter_node.py +315 -0
  68. trilogy/core/processing/node_generators/group_node.py +213 -0
  69. trilogy/core/processing/node_generators/group_to_node.py +117 -0
  70. trilogy/core/processing/node_generators/multiselect_node.py +205 -0
  71. trilogy/core/processing/node_generators/node_merge_node.py +653 -0
  72. trilogy/core/processing/node_generators/recursive_node.py +88 -0
  73. trilogy/core/processing/node_generators/rowset_node.py +165 -0
  74. trilogy/core/processing/node_generators/select_helpers/__init__.py +0 -0
  75. trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +261 -0
  76. trilogy/core/processing/node_generators/select_merge_node.py +748 -0
  77. trilogy/core/processing/node_generators/select_node.py +95 -0
  78. trilogy/core/processing/node_generators/synonym_node.py +98 -0
  79. trilogy/core/processing/node_generators/union_node.py +91 -0
  80. trilogy/core/processing/node_generators/unnest_node.py +182 -0
  81. trilogy/core/processing/node_generators/window_node.py +201 -0
  82. trilogy/core/processing/nodes/README.md +28 -0
  83. trilogy/core/processing/nodes/__init__.py +179 -0
  84. trilogy/core/processing/nodes/base_node.py +519 -0
  85. trilogy/core/processing/nodes/filter_node.py +75 -0
  86. trilogy/core/processing/nodes/group_node.py +194 -0
  87. trilogy/core/processing/nodes/merge_node.py +420 -0
  88. trilogy/core/processing/nodes/recursive_node.py +46 -0
  89. trilogy/core/processing/nodes/select_node_v2.py +242 -0
  90. trilogy/core/processing/nodes/union_node.py +53 -0
  91. trilogy/core/processing/nodes/unnest_node.py +62 -0
  92. trilogy/core/processing/nodes/window_node.py +56 -0
  93. trilogy/core/processing/utility.py +823 -0
  94. trilogy/core/query_processor.py +596 -0
  95. trilogy/core/statements/README.md +35 -0
  96. trilogy/core/statements/__init__.py +0 -0
  97. trilogy/core/statements/author.py +536 -0
  98. trilogy/core/statements/build.py +0 -0
  99. trilogy/core/statements/common.py +20 -0
  100. trilogy/core/statements/execute.py +155 -0
  101. trilogy/core/table_processor.py +66 -0
  102. trilogy/core/utility.py +8 -0
  103. trilogy/core/validation/README.md +46 -0
  104. trilogy/core/validation/__init__.py +0 -0
  105. trilogy/core/validation/common.py +161 -0
  106. trilogy/core/validation/concept.py +146 -0
  107. trilogy/core/validation/datasource.py +227 -0
  108. trilogy/core/validation/environment.py +73 -0
  109. trilogy/core/validation/fix.py +256 -0
  110. trilogy/dialect/__init__.py +32 -0
  111. trilogy/dialect/base.py +1392 -0
  112. trilogy/dialect/bigquery.py +308 -0
  113. trilogy/dialect/common.py +147 -0
  114. trilogy/dialect/config.py +144 -0
  115. trilogy/dialect/dataframe.py +50 -0
  116. trilogy/dialect/duckdb.py +231 -0
  117. trilogy/dialect/enums.py +147 -0
  118. trilogy/dialect/metadata.py +173 -0
  119. trilogy/dialect/mock.py +190 -0
  120. trilogy/dialect/postgres.py +117 -0
  121. trilogy/dialect/presto.py +110 -0
  122. trilogy/dialect/results.py +89 -0
  123. trilogy/dialect/snowflake.py +129 -0
  124. trilogy/dialect/sql_server.py +137 -0
  125. trilogy/engine.py +48 -0
  126. trilogy/execution/config.py +75 -0
  127. trilogy/executor.py +568 -0
  128. trilogy/hooks/__init__.py +4 -0
  129. trilogy/hooks/base_hook.py +40 -0
  130. trilogy/hooks/graph_hook.py +139 -0
  131. trilogy/hooks/query_debugger.py +166 -0
  132. trilogy/metadata/__init__.py +0 -0
  133. trilogy/parser.py +10 -0
  134. trilogy/parsing/README.md +21 -0
  135. trilogy/parsing/__init__.py +0 -0
  136. trilogy/parsing/common.py +1069 -0
  137. trilogy/parsing/config.py +5 -0
  138. trilogy/parsing/exceptions.py +8 -0
  139. trilogy/parsing/helpers.py +1 -0
  140. trilogy/parsing/parse_engine.py +2813 -0
  141. trilogy/parsing/render.py +769 -0
  142. trilogy/parsing/trilogy.lark +540 -0
  143. trilogy/py.typed +0 -0
  144. trilogy/render.py +42 -0
  145. trilogy/scripts/README.md +9 -0
  146. trilogy/scripts/__init__.py +0 -0
  147. trilogy/scripts/agent.py +41 -0
  148. trilogy/scripts/agent_info.py +303 -0
  149. trilogy/scripts/common.py +355 -0
  150. trilogy/scripts/dependency/Cargo.lock +617 -0
  151. trilogy/scripts/dependency/Cargo.toml +39 -0
  152. trilogy/scripts/dependency/README.md +131 -0
  153. trilogy/scripts/dependency/build.sh +25 -0
  154. trilogy/scripts/dependency/src/directory_resolver.rs +177 -0
  155. trilogy/scripts/dependency/src/lib.rs +16 -0
  156. trilogy/scripts/dependency/src/main.rs +770 -0
  157. trilogy/scripts/dependency/src/parser.rs +435 -0
  158. trilogy/scripts/dependency/src/preql.pest +208 -0
  159. trilogy/scripts/dependency/src/python_bindings.rs +303 -0
  160. trilogy/scripts/dependency/src/resolver.rs +716 -0
  161. trilogy/scripts/dependency/tests/base.preql +3 -0
  162. trilogy/scripts/dependency/tests/cli_integration.rs +377 -0
  163. trilogy/scripts/dependency/tests/customer.preql +6 -0
  164. trilogy/scripts/dependency/tests/main.preql +9 -0
  165. trilogy/scripts/dependency/tests/orders.preql +7 -0
  166. trilogy/scripts/dependency/tests/test_data/base.preql +9 -0
  167. trilogy/scripts/dependency/tests/test_data/consumer.preql +1 -0
  168. trilogy/scripts/dependency.py +323 -0
  169. trilogy/scripts/display.py +512 -0
  170. trilogy/scripts/environment.py +46 -0
  171. trilogy/scripts/fmt.py +32 -0
  172. trilogy/scripts/ingest.py +471 -0
  173. trilogy/scripts/ingest_helpers/__init__.py +1 -0
  174. trilogy/scripts/ingest_helpers/foreign_keys.py +123 -0
  175. trilogy/scripts/ingest_helpers/formatting.py +93 -0
  176. trilogy/scripts/ingest_helpers/typing.py +161 -0
  177. trilogy/scripts/init.py +105 -0
  178. trilogy/scripts/parallel_execution.py +713 -0
  179. trilogy/scripts/plan.py +189 -0
  180. trilogy/scripts/run.py +63 -0
  181. trilogy/scripts/serve.py +140 -0
  182. trilogy/scripts/serve_helpers/__init__.py +41 -0
  183. trilogy/scripts/serve_helpers/file_discovery.py +142 -0
  184. trilogy/scripts/serve_helpers/index_generation.py +206 -0
  185. trilogy/scripts/serve_helpers/models.py +38 -0
  186. trilogy/scripts/single_execution.py +131 -0
  187. trilogy/scripts/testing.py +119 -0
  188. trilogy/scripts/trilogy.py +68 -0
  189. trilogy/std/__init__.py +0 -0
  190. trilogy/std/color.preql +3 -0
  191. trilogy/std/date.preql +13 -0
  192. trilogy/std/display.preql +18 -0
  193. trilogy/std/geography.preql +22 -0
  194. trilogy/std/metric.preql +15 -0
  195. trilogy/std/money.preql +67 -0
  196. trilogy/std/net.preql +14 -0
  197. trilogy/std/ranking.preql +7 -0
  198. trilogy/std/report.preql +5 -0
  199. trilogy/std/semantic.preql +6 -0
  200. trilogy/utility.py +34 -0
@@ -0,0 +1,1069 @@
1
+ from datetime import date, datetime
2
+ from typing import Iterable, List, Sequence, Tuple
3
+
4
+ from lark.tree import Meta
5
+
6
+ from trilogy.constants import DEFAULT_NAMESPACE, VIRTUAL_CONCEPT_PREFIX
7
+ from trilogy.core.constants import ALL_ROWS_CONCEPT
8
+ from trilogy.core.enums import (
9
+ ConceptSource,
10
+ Derivation,
11
+ FunctionClass,
12
+ FunctionType,
13
+ Granularity,
14
+ Modifier,
15
+ Purpose,
16
+ WindowType,
17
+ )
18
+ from trilogy.core.exceptions import InvalidSyntaxException
19
+ from trilogy.core.functions import function_args_to_output_purpose
20
+ from trilogy.core.models.author import (
21
+ AggregateWrapper,
22
+ AlignClause,
23
+ AlignItem,
24
+ CaseElse,
25
+ CaseWhen,
26
+ Comparison,
27
+ Concept,
28
+ ConceptArgs,
29
+ ConceptRef,
30
+ Conditional,
31
+ FilterItem,
32
+ Function,
33
+ FunctionCallWrapper,
34
+ Grain,
35
+ HavingClause,
36
+ ListWrapper,
37
+ MapWrapper,
38
+ Metadata,
39
+ MultiSelectLineage,
40
+ Parenthetical,
41
+ RowsetItem,
42
+ RowsetLineage,
43
+ SubselectComparison,
44
+ TraitDataType,
45
+ TupleWrapper,
46
+ UndefinedConcept,
47
+ WhereClause,
48
+ WindowItem,
49
+ address_with_namespace,
50
+ )
51
+ from trilogy.core.models.core import DataType, arg_to_datatype
52
+ from trilogy.core.models.environment import Environment
53
+ from trilogy.core.statements.author import RowsetDerivationStatement, SelectStatement
54
+ from trilogy.utility import string_to_hash, unique
55
+
56
+ ARBITRARY_INPUTS = (
57
+ AggregateWrapper
58
+ | FunctionCallWrapper
59
+ | WindowItem
60
+ | FilterItem
61
+ | Function
62
+ | Parenthetical
63
+ | ListWrapper
64
+ | MapWrapper
65
+ | Comparison
66
+ | int
67
+ | float
68
+ | str
69
+ | date
70
+ | bool
71
+ )
72
+
73
+
74
+ def process_function_arg(
75
+ arg,
76
+ meta: Meta | None,
77
+ environment: Environment,
78
+ ):
79
+ # if a function has an anonymous function argument
80
+ # create an implicit concept
81
+ if isinstance(arg, Parenthetical):
82
+ processed = process_function_args([arg.content], meta, environment)
83
+ return Function(
84
+ operator=FunctionType.PARENTHETICAL,
85
+ arguments=processed,
86
+ output_datatype=arg_to_datatype(processed[0]),
87
+ output_purpose=function_args_to_output_purpose(processed, environment),
88
+ )
89
+ elif isinstance(arg, Function):
90
+ # if it's not an aggregate function, we can skip the virtual concepts
91
+ # to simplify anonymous function handling
92
+ if (
93
+ arg.operator not in FunctionClass.AGGREGATE_FUNCTIONS.value
94
+ and arg.operator not in FunctionClass.ONE_TO_MANY.value
95
+ ):
96
+ return arg
97
+ id_hash = string_to_hash(str(arg))
98
+ name = f"{VIRTUAL_CONCEPT_PREFIX}_{arg.operator.value}_{id_hash}"
99
+ if f"{environment.namespace}.{name}" in environment.concepts:
100
+ return environment.concepts[f"{environment.namespace}.{name}"]
101
+ concept = function_to_concept(
102
+ arg,
103
+ name=name,
104
+ environment=environment,
105
+ )
106
+ # to satisfy mypy, concept will always have metadata
107
+ if concept.metadata and meta:
108
+ concept.metadata.line_number = meta.line
109
+ environment.add_concept(concept, meta=meta)
110
+ return concept.reference
111
+ elif isinstance(
112
+ arg,
113
+ (ListWrapper, MapWrapper),
114
+ ):
115
+ id_hash = string_to_hash(str(arg))
116
+ name = f"{VIRTUAL_CONCEPT_PREFIX}_{id_hash}"
117
+ if f"{environment.namespace}.{name}" in environment.concepts:
118
+ return environment.concepts[f"{environment.namespace}.{name}"]
119
+ concept = arbitrary_to_concept(
120
+ arg,
121
+ name=name,
122
+ environment=environment,
123
+ )
124
+ if concept.metadata and meta:
125
+ concept.metadata.line_number = meta.line
126
+ environment.add_concept(concept, meta=meta)
127
+ return concept.reference
128
+ elif isinstance(arg, Concept):
129
+ return arg.reference
130
+ elif isinstance(arg, ConceptRef):
131
+ return environment.concepts[arg.address].reference
132
+ return arg
133
+
134
+
135
+ def process_function_args(
136
+ args,
137
+ meta: Meta | None,
138
+ environment: Environment,
139
+ ) -> List[ConceptRef | Function | str | int | float | date | datetime]:
140
+ final: List[ConceptRef | Function | str | int | float | date | datetime] = []
141
+ for arg in args:
142
+ final.append(process_function_arg(arg, meta, environment))
143
+ return final
144
+
145
+
146
+ def get_upstream_modifiers(
147
+ keys: Sequence[Concept | ConceptRef], environment: Environment
148
+ ) -> list[Modifier]:
149
+ modifiers = set()
150
+ for pkey in keys:
151
+ if isinstance(pkey, ConceptRef):
152
+ pkey = environment.concepts[pkey.address]
153
+ if isinstance(pkey, UndefinedConcept):
154
+ continue
155
+ if pkey.modifiers:
156
+ modifiers.update(pkey.modifiers)
157
+ return list(modifiers)
158
+
159
+
160
+ def get_purpose_and_keys(
161
+ purpose: Purpose | None,
162
+ args: Tuple[ConceptRef | Concept, ...] | None,
163
+ environment: Environment,
164
+ ) -> Tuple[Purpose, set[str] | None]:
165
+ local_purpose = purpose or function_args_to_output_purpose(args, environment)
166
+ if local_purpose in (Purpose.PROPERTY, Purpose.METRIC) and args:
167
+ keys = concept_list_to_keys(args, environment)
168
+ else:
169
+ keys = None
170
+ return local_purpose, keys
171
+
172
+
173
+ def concept_list_to_keys(
174
+ concepts: Tuple[Concept | ConceptRef, ...], environment: Environment
175
+ ) -> set[str]:
176
+ final_keys: List[str] = []
177
+ for concept in concepts:
178
+
179
+ if isinstance(concept, ConceptRef):
180
+ concept = environment.concepts[concept.address]
181
+ if isinstance(concept, UndefinedConcept):
182
+ continue
183
+ if concept.keys:
184
+ final_keys += list(concept.keys)
185
+ elif concept.purpose != Purpose.PROPERTY:
186
+ final_keys.append(concept.address)
187
+ return set(final_keys)
188
+
189
+
190
+ def constant_to_concept(
191
+ parent: (
192
+ ListWrapper | TupleWrapper | MapWrapper | int | float | str | date | datetime
193
+ ),
194
+ name: str,
195
+ namespace: str,
196
+ metadata: Metadata | None = None,
197
+ ) -> Concept:
198
+ const_function: Function = Function(
199
+ operator=FunctionType.CONSTANT,
200
+ output_datatype=arg_to_datatype(parent),
201
+ output_purpose=Purpose.CONSTANT,
202
+ arguments=[parent],
203
+ )
204
+ # assert const_function.arguments[0] == parent, f'{const_function.arguments[0]} != {parent}, {type(const_function.arguments[0])} != {type(parent)}'
205
+ fmetadata = metadata or Metadata()
206
+ return Concept(
207
+ name=name,
208
+ datatype=const_function.output_datatype,
209
+ purpose=Purpose.CONSTANT,
210
+ granularity=Granularity.SINGLE_ROW,
211
+ derivation=Derivation.CONSTANT,
212
+ lineage=const_function,
213
+ grain=Grain(),
214
+ namespace=namespace,
215
+ metadata=fmetadata,
216
+ )
217
+
218
+
219
+ def atom_is_relevant(
220
+ atom,
221
+ others: list[Concept | ConceptRef],
222
+ environment: Environment | None = None,
223
+ ):
224
+
225
+ if isinstance(atom, (ConceptRef, Concept)):
226
+ # when we are looking at atoms, if there is a concept that is in others
227
+ # return directly
228
+ if atom.address in others:
229
+ return False
230
+ return concept_is_relevant(atom, others, environment)
231
+
232
+ if isinstance(atom, AggregateWrapper) and not atom.by:
233
+ return False
234
+ elif isinstance(atom, AggregateWrapper):
235
+ return any(atom_is_relevant(x, others, environment) for x in atom.by)
236
+
237
+ elif isinstance(atom, Function):
238
+ relevant = False
239
+ for arg in atom.arguments:
240
+
241
+ relevant = relevant or atom_is_relevant(arg, others, environment)
242
+ return relevant
243
+ elif isinstance(atom, FunctionCallWrapper):
244
+ return any(
245
+ [atom_is_relevant(atom.content, others, environment)]
246
+ + [atom_is_relevant(x, others, environment) for x in atom.args]
247
+ )
248
+ elif isinstance(atom, CaseWhen):
249
+ rval = atom_is_relevant(atom.expr, others, environment) or atom_is_relevant(
250
+ atom.comparison, others, environment
251
+ )
252
+ return rval
253
+ elif isinstance(atom, CaseElse):
254
+
255
+ rval = atom_is_relevant(atom.expr, others, environment)
256
+ return rval
257
+ elif isinstance(atom, SubselectComparison):
258
+ return atom_is_relevant(atom.left, others, environment)
259
+ elif isinstance(atom, Comparison):
260
+ return atom_is_relevant(atom.left, others, environment) or atom_is_relevant(
261
+ atom.right, others, environment
262
+ )
263
+ elif isinstance(atom, Conditional):
264
+ return atom_is_relevant(atom.left, others, environment) or atom_is_relevant(
265
+ atom.right, others, environment
266
+ )
267
+ elif isinstance(atom, Parenthetical):
268
+ return atom_is_relevant(atom.content, others, environment)
269
+ elif isinstance(atom, ConceptArgs):
270
+ # use atom is relevant here to trigger the early exit behavior for concepts in set
271
+ return any(
272
+ [atom_is_relevant(x, others, environment) for x in atom.concept_arguments]
273
+ )
274
+
275
+ return False
276
+
277
+
278
+ def concept_is_relevant(
279
+ concept: Concept | ConceptRef,
280
+ others: list[Concept | ConceptRef],
281
+ environment: Environment | None = None,
282
+ ) -> bool:
283
+ if isinstance(concept, UndefinedConcept):
284
+ return False
285
+ if concept.datatype == DataType.UNKNOWN:
286
+ return False
287
+
288
+ if isinstance(concept, ConceptRef):
289
+ if environment:
290
+ concept = environment.concepts[concept.address]
291
+ else:
292
+ raise SyntaxError(
293
+ "Require environment to determine relevance of ConceptRef"
294
+ )
295
+ if concept.derivation == Derivation.CONSTANT:
296
+ return False
297
+ if concept.is_aggregate and not (
298
+ isinstance(concept.lineage, AggregateWrapper) and concept.lineage.by
299
+ ):
300
+
301
+ return False
302
+ if concept.purpose in (Purpose.PROPERTY, Purpose.METRIC) and concept.keys:
303
+ if all([c in others for c in concept.keys]):
304
+ return False
305
+ if (
306
+ concept.purpose == Purpose.KEY
307
+ and concept.keys
308
+ and all([c in others for c in concept.keys])
309
+ ):
310
+ return False
311
+ if concept.purpose in (Purpose.METRIC,):
312
+ if all([c in others for c in concept.grain.components]):
313
+ return False
314
+ if (
315
+ concept.derivation in (Derivation.BASIC,)
316
+ and isinstance(concept.lineage, Function)
317
+ and concept.lineage.operator == FunctionType.DATE_SPINE
318
+ ):
319
+ return True
320
+ if concept.derivation in (Derivation.BASIC,) and isinstance(
321
+ concept.lineage, (Function, CaseWhen)
322
+ ):
323
+ relevant = False
324
+ for arg in concept.lineage.arguments:
325
+ relevant = atom_is_relevant(arg, others, environment) or relevant
326
+ return relevant
327
+ if concept.derivation in (Derivation.BASIC,) and isinstance(
328
+ concept.lineage, Parenthetical
329
+ ):
330
+ return atom_is_relevant(concept.lineage.content, others, environment)
331
+
332
+ if concept.granularity == Granularity.SINGLE_ROW:
333
+ return False
334
+ return True
335
+
336
+
337
+ def concepts_to_grain_concepts(
338
+ concepts: Iterable[Concept | ConceptRef | str],
339
+ environment: Environment | None,
340
+ local_concepts: dict[str, Concept] | None = None,
341
+ ) -> set[str]:
342
+ preconcepts: list[Concept] = []
343
+ for c in concepts:
344
+ if isinstance(c, Concept):
345
+ preconcepts.append(c)
346
+
347
+ elif isinstance(c, ConceptRef) and environment:
348
+ if local_concepts and c.address in local_concepts:
349
+ preconcepts.append(local_concepts[c.address])
350
+ else:
351
+ preconcepts.append(environment.concepts[c.address])
352
+ elif isinstance(c, str) and environment:
353
+ if local_concepts and c in local_concepts:
354
+ preconcepts.append(local_concepts[c])
355
+ else:
356
+ preconcepts.append(environment.concepts[c])
357
+ else:
358
+ raise ValueError(
359
+ f"Unable to resolve input {c} without environment provided to concepts_to_grain call"
360
+ )
361
+ pconcepts: list[Concept] = []
362
+ for x in preconcepts:
363
+ if (
364
+ x.lineage
365
+ and isinstance(x.lineage, Function)
366
+ and x.lineage.operator == FunctionType.ALIAS
367
+ ):
368
+ # if the function is an alias, use the unaliased concept to calculate grain
369
+ pconcepts.append(environment.concepts[x.lineage.arguments[0].address]) # type: ignore
370
+ else:
371
+ pconcepts.append(x)
372
+
373
+ seen = set()
374
+ for sub in pconcepts:
375
+ if sub.address in seen:
376
+ continue
377
+ if not concept_is_relevant(sub, pconcepts, environment): # type: ignore
378
+
379
+ continue
380
+ seen.add(sub.address)
381
+
382
+ return seen
383
+
384
+
385
+ def _get_relevant_parent_concepts(arg) -> tuple[list[ConceptRef], bool]:
386
+ from trilogy.core.models.author import get_concept_arguments
387
+
388
+ is_metric = False
389
+ if isinstance(arg, Function):
390
+ all = []
391
+ for y in arg.arguments:
392
+ refs, local_flag = get_relevant_parent_concepts(y)
393
+ all += refs
394
+ is_metric = is_metric or local_flag
395
+ return all, is_metric
396
+ elif isinstance(arg, AggregateWrapper) and not arg.by:
397
+ return [], True
398
+ elif isinstance(arg, AggregateWrapper) and arg.by:
399
+ return [x.reference for x in arg.by], True
400
+ elif isinstance(arg, FunctionCallWrapper):
401
+ return get_relevant_parent_concepts(arg.content)
402
+ elif isinstance(arg, Comparison):
403
+ left, lflag = get_relevant_parent_concepts(arg.left)
404
+ right, rflag = get_relevant_parent_concepts(arg.right)
405
+ return left + right, lflag or rflag
406
+ return get_concept_arguments(arg), False
407
+
408
+
409
+ def get_relevant_parent_concepts(arg) -> tuple[list[ConceptRef], bool]:
410
+ concepts, status = _get_relevant_parent_concepts(arg)
411
+ return unique(concepts, "address"), status
412
+
413
+
414
+ def group_function_to_concept(
415
+ parent: Function,
416
+ name: str,
417
+ environment: Environment,
418
+ namespace: str | None = None,
419
+ metadata: Metadata | None = None,
420
+ ):
421
+ pkeys: List[Concept] = []
422
+ namespace = namespace or environment.namespace
423
+ is_metric = False
424
+ ref_args, is_metric = get_relevant_parent_concepts(parent)
425
+ concrete_args = [environment.concepts[c.address] for c in ref_args]
426
+ pkeys += [x for x in concrete_args if not x.derivation == Derivation.CONSTANT]
427
+ modifiers = get_upstream_modifiers(pkeys, environment)
428
+ key_grain: list[str] = []
429
+ for x in pkeys:
430
+ # for a group to, if we have a dynamic metric, ignore it
431
+ # it will end up with the group target grain
432
+ if x.purpose == Purpose.METRIC and not x.keys:
433
+ continue
434
+ # metrics will group to keys, so do no do key traversal
435
+ elif is_metric:
436
+ key_grain.append(x.address)
437
+ else:
438
+ key_grain.append(x.address)
439
+ keys = set(key_grain)
440
+
441
+ grain = Grain.from_concepts(keys, environment)
442
+ if is_metric:
443
+ purpose = Purpose.METRIC
444
+ elif not pkeys:
445
+ purpose = Purpose.CONSTANT
446
+ else:
447
+ purpose = parent.output_purpose
448
+ fmetadata = metadata or Metadata()
449
+ granularity = Granularity.MULTI_ROW
450
+
451
+ if grain is not None:
452
+ # deduplicte
453
+ grain = Grain.from_concepts(grain.components, environment)
454
+
455
+ r = Concept(
456
+ name=name,
457
+ datatype=parent.output_datatype,
458
+ purpose=purpose,
459
+ lineage=parent,
460
+ namespace=namespace,
461
+ keys=keys,
462
+ modifiers=modifiers,
463
+ grain=grain,
464
+ metadata=fmetadata,
465
+ derivation=Derivation.GROUP_TO,
466
+ granularity=granularity,
467
+ )
468
+ return r
469
+
470
+ return Concept(
471
+ name=name,
472
+ datatype=parent.output_datatype,
473
+ purpose=purpose,
474
+ lineage=parent,
475
+ namespace=namespace,
476
+ keys=keys,
477
+ modifiers=modifiers,
478
+ metadata=fmetadata,
479
+ derivation=Derivation.BASIC,
480
+ granularity=granularity,
481
+ )
482
+
483
+
484
+ def function_to_concept(
485
+ parent: Function,
486
+ name: str,
487
+ environment: Environment,
488
+ namespace: str | None = None,
489
+ metadata: Metadata | None = None,
490
+ ) -> Concept:
491
+
492
+ pkeys: List[Concept] = []
493
+ namespace = namespace or environment.namespace
494
+ is_metric = False
495
+ ref_args, is_metric = get_relevant_parent_concepts(parent)
496
+ concrete_args = [environment.concepts[c.address] for c in ref_args]
497
+ pkeys += [
498
+ x
499
+ for x in concrete_args
500
+ if not x.derivation == Derivation.CONSTANT
501
+ and not (x.derivation == Derivation.AGGREGATE and not x.grain.components)
502
+ ]
503
+ grain: Grain | None = Grain()
504
+ for x in pkeys:
505
+ grain += x.grain
506
+ if parent.operator in FunctionClass.ONE_TO_MANY.value:
507
+ # if the function will create more rows, we don't know what grain this is at
508
+ grain = None
509
+ modifiers = get_upstream_modifiers(pkeys, environment)
510
+ key_grain: list[str] = []
511
+ for x in pkeys:
512
+ # metrics will group to keys, so do not do key traversal
513
+ if is_metric:
514
+ key_grain.append(x.address)
515
+ # otherwse, for row ops, assume keys are transitive
516
+ elif x.keys:
517
+ key_grain += [*x.keys]
518
+ else:
519
+ key_grain.append(x.address)
520
+ keys = set(key_grain)
521
+ if is_metric:
522
+ purpose = Purpose.METRIC
523
+ elif not pkeys:
524
+ purpose = Purpose.CONSTANT
525
+ else:
526
+ purpose = parent.output_purpose
527
+ fmetadata = metadata or Metadata()
528
+ if parent.operator in FunctionClass.AGGREGATE_FUNCTIONS.value:
529
+ derivation = Derivation.AGGREGATE
530
+ if (
531
+ grain
532
+ and grain.components
533
+ and all(x.endswith(ALL_ROWS_CONCEPT) for x in grain.components)
534
+ ):
535
+ granularity = Granularity.SINGLE_ROW
536
+ else:
537
+ granularity = Granularity.MULTI_ROW
538
+ elif parent.operator == FunctionType.UNION:
539
+ derivation = Derivation.UNION
540
+ granularity = Granularity.MULTI_ROW
541
+ elif parent.operator in FunctionClass.ONE_TO_MANY.value:
542
+ derivation = Derivation.UNNEST
543
+ granularity = Granularity.MULTI_ROW
544
+ elif parent.operator == FunctionType.RECURSE_EDGE:
545
+ derivation = Derivation.RECURSIVE
546
+ granularity = Granularity.MULTI_ROW
547
+ elif parent.operator in FunctionClass.SINGLE_ROW.value:
548
+ derivation = Derivation.CONSTANT
549
+ granularity = Granularity.SINGLE_ROW
550
+ elif concrete_args and all(
551
+ x.derivation == Derivation.CONSTANT for x in concrete_args
552
+ ):
553
+ derivation = Derivation.CONSTANT
554
+ granularity = Granularity.SINGLE_ROW
555
+ else:
556
+ derivation = Derivation.BASIC
557
+ granularity = Granularity.MULTI_ROW
558
+ if grain is not None:
559
+ r = Concept(
560
+ name=name,
561
+ datatype=parent.output_datatype,
562
+ purpose=purpose,
563
+ lineage=parent,
564
+ namespace=namespace,
565
+ keys=keys,
566
+ modifiers=modifiers,
567
+ grain=grain,
568
+ metadata=fmetadata,
569
+ derivation=derivation,
570
+ granularity=granularity,
571
+ )
572
+ return r
573
+
574
+ return Concept(
575
+ name=name,
576
+ datatype=parent.output_datatype,
577
+ purpose=purpose,
578
+ lineage=parent,
579
+ namespace=namespace,
580
+ keys=keys,
581
+ modifiers=modifiers,
582
+ metadata=fmetadata,
583
+ derivation=derivation,
584
+ granularity=granularity,
585
+ )
586
+
587
+
588
+ def filter_item_to_concept(
589
+ parent: FilterItem,
590
+ name: str,
591
+ namespace: str,
592
+ environment: Environment,
593
+ metadata: Metadata | None = None,
594
+ ) -> Concept:
595
+ fmetadata = metadata or Metadata()
596
+ fallback_keys = set()
597
+ if isinstance(parent.content, ConceptRef):
598
+ cparent = environment.concepts[parent.content.address]
599
+ fallback_keys = set([cparent.address])
600
+ elif isinstance(
601
+ parent.content,
602
+ (
603
+ FilterItem,
604
+ AggregateWrapper,
605
+ FunctionCallWrapper,
606
+ WindowItem,
607
+ Function,
608
+ ListWrapper,
609
+ MapWrapper,
610
+ int,
611
+ str,
612
+ float,
613
+ ),
614
+ ):
615
+ cparent = arbitrary_to_concept(parent.content, environment, namespace=namespace)
616
+
617
+ else:
618
+ raise NotImplementedError(
619
+ f"Filter item with non ref content {parent.content} ({type(parent.content)}) not yet supported"
620
+ )
621
+ modifiers = get_upstream_modifiers(
622
+ cparent.concept_arguments, environment=environment
623
+ )
624
+ grain = cparent.grain if cparent.purpose == Purpose.PROPERTY else Grain()
625
+ granularity = cparent.granularity
626
+ return Concept(
627
+ name=name,
628
+ datatype=cparent.datatype,
629
+ purpose=Purpose.PROPERTY,
630
+ lineage=parent,
631
+ metadata=fmetadata,
632
+ namespace=namespace,
633
+ # filtered copies cannot inherit keys
634
+ keys=(cparent.keys if cparent.purpose == Purpose.PROPERTY else fallback_keys),
635
+ grain=grain,
636
+ modifiers=modifiers,
637
+ derivation=Derivation.FILTER,
638
+ granularity=granularity,
639
+ )
640
+
641
+
642
+ def window_item_to_concept(
643
+ parent: WindowItem,
644
+ name: str,
645
+ namespace: str,
646
+ environment: Environment,
647
+ metadata: Metadata | None = None,
648
+ ) -> Concept:
649
+ fmetadata = metadata or Metadata()
650
+ if not isinstance(parent.content, ConceptRef):
651
+ raise NotImplementedError(
652
+ f"Window function with non ref content {parent.content} not yet supported"
653
+ )
654
+ bcontent = environment.concepts[parent.content.address]
655
+ if isinstance(bcontent, UndefinedConcept):
656
+ return UndefinedConcept(address=f"{namespace}.{name}", metadata=fmetadata)
657
+ if bcontent.purpose == Purpose.METRIC:
658
+ local_purpose, keys = get_purpose_and_keys(None, (bcontent,), environment)
659
+ else:
660
+ local_purpose = Purpose.PROPERTY
661
+ keys = Grain.from_concepts(
662
+ [bcontent.address] + [y.address for y in parent.over], environment
663
+ ).components
664
+
665
+ # when including the order by in discovery grain
666
+ if parent.order_by:
667
+
668
+ grain_components = parent.over + [bcontent.output]
669
+ for item in parent.order_by:
670
+ relevant, _ = get_relevant_parent_concepts(item.expr)
671
+ grain_components += relevant
672
+ else:
673
+ grain_components = parent.over + [bcontent.output]
674
+
675
+ final_grain = Grain.from_concepts(grain_components, environment)
676
+ modifiers = get_upstream_modifiers(bcontent.concept_arguments, environment)
677
+ datatype = parent.content.datatype
678
+ if parent.type in (
679
+ # WindowType.RANK,
680
+ WindowType.ROW_NUMBER,
681
+ WindowType.COUNT,
682
+ WindowType.COUNT_DISTINCT,
683
+ ):
684
+ datatype = DataType.INTEGER
685
+ if parent.type == WindowType.RANK:
686
+ datatype = TraitDataType(type=DataType.INTEGER, traits=["rank"])
687
+ return Concept(
688
+ name=name,
689
+ datatype=datatype,
690
+ purpose=local_purpose,
691
+ lineage=parent,
692
+ metadata=fmetadata,
693
+ grain=final_grain,
694
+ namespace=namespace,
695
+ keys=keys,
696
+ modifiers=modifiers,
697
+ derivation=Derivation.WINDOW,
698
+ granularity=bcontent.granularity,
699
+ )
700
+
701
+
702
+ def agg_wrapper_to_concept(
703
+ parent: AggregateWrapper,
704
+ namespace: str,
705
+ name: str,
706
+ environment: Environment,
707
+ metadata: Metadata | None = None,
708
+ ) -> Concept:
709
+ _, keys = get_purpose_and_keys(
710
+ Purpose.METRIC, tuple(parent.by) if parent.by else None, environment=environment
711
+ )
712
+ # anything grouped to a grain should be a property
713
+ # at that grain
714
+ fmetadata = metadata or Metadata()
715
+ aggfunction = parent.function
716
+ modifiers = get_upstream_modifiers(parent.concept_arguments, environment)
717
+ grain = Grain.from_concepts(parent.by, environment) if parent.by else Grain()
718
+ granularity = Concept.calculate_granularity(Derivation.AGGREGATE, grain, parent)
719
+
720
+ out = Concept(
721
+ name=name,
722
+ datatype=aggfunction.output_datatype,
723
+ purpose=Purpose.METRIC,
724
+ metadata=fmetadata,
725
+ lineage=parent,
726
+ grain=grain,
727
+ namespace=namespace,
728
+ keys=set([x.address for x in parent.by]) if parent.by else keys,
729
+ modifiers=modifiers,
730
+ derivation=Derivation.AGGREGATE,
731
+ granularity=granularity,
732
+ )
733
+ for x in parent.function.concept_arguments:
734
+ if x.address == out.address:
735
+ raise InvalidSyntaxException(
736
+ f"Aggregate concept {out.address} cannot reference itself. If defining a new concept in a select, use a new name."
737
+ )
738
+ return out
739
+
740
+
741
+ def align_item_to_concept(
742
+ parent: AlignItem,
743
+ align_clause: AlignClause,
744
+ selects: list[SelectStatement],
745
+ environment: Environment,
746
+ where: WhereClause | None = None,
747
+ having: HavingClause | None = None,
748
+ limit: int | None = None,
749
+ ) -> Concept:
750
+ align = parent
751
+ datatypes = set([c.datatype for c in align.concepts])
752
+ if len(datatypes) > 1:
753
+ raise InvalidSyntaxException(
754
+ f"Datatypes do not align for merged statements {align.alias}, have {datatypes}"
755
+ )
756
+
757
+ new_selects = [x.as_lineage(environment) for x in selects]
758
+ multi_lineage = MultiSelectLineage(
759
+ selects=new_selects,
760
+ align=align_clause,
761
+ namespace=align.namespace,
762
+ where_clause=where,
763
+ having_clause=having,
764
+ limit=limit,
765
+ hidden_components=set(y for x in new_selects for y in x.hidden_components),
766
+ )
767
+ grain = Grain()
768
+ new = Concept(
769
+ name=align.alias,
770
+ datatype=datatypes.pop(),
771
+ purpose=Purpose.PROPERTY,
772
+ lineage=multi_lineage,
773
+ grain=grain,
774
+ namespace=align.namespace,
775
+ granularity=Granularity.MULTI_ROW,
776
+ derivation=Derivation.MULTISELECT,
777
+ keys=set(x.address for x in align.concepts),
778
+ )
779
+ return new
780
+
781
+
782
+ def derive_item_to_concept(
783
+ parent: ARBITRARY_INPUTS,
784
+ name: str,
785
+ lineage: MultiSelectLineage,
786
+ namespace: str | None = None,
787
+ ) -> Concept:
788
+ datatype = arg_to_datatype(parent)
789
+ grain = Grain()
790
+ new = Concept(
791
+ name=name,
792
+ datatype=datatype,
793
+ purpose=Purpose.PROPERTY,
794
+ lineage=lineage,
795
+ grain=grain,
796
+ namespace=namespace or DEFAULT_NAMESPACE,
797
+ granularity=Granularity.MULTI_ROW,
798
+ derivation=Derivation.MULTISELECT,
799
+ )
800
+ return new
801
+
802
+
803
+ def rowset_concept(
804
+ orig_address: ConceptRef,
805
+ environment: Environment,
806
+ rowset: RowsetDerivationStatement,
807
+ pre_output: list[Concept],
808
+ orig: dict[str, Concept],
809
+ orig_map: dict[str, Concept],
810
+ ):
811
+ orig_concept = environment.concepts[orig_address.address]
812
+ name = orig_concept.name
813
+ if isinstance(orig_concept.lineage, FilterItem):
814
+ if orig_concept.lineage.where == rowset.select.where_clause and isinstance(
815
+ orig_concept.lineage.content, (ConceptRef, Concept)
816
+ ):
817
+ name = environment.concepts[orig_concept.lineage.content.address].name
818
+ base_namespace = (
819
+ f"{rowset.name}.{orig_concept.namespace}"
820
+ if orig_concept.namespace != rowset.namespace
821
+ else rowset.name
822
+ )
823
+
824
+ new_concept = Concept(
825
+ name=name,
826
+ datatype=orig_concept.datatype,
827
+ purpose=orig_concept.purpose,
828
+ lineage=None,
829
+ grain=orig_concept.grain,
830
+ metadata=Metadata(concept_source=ConceptSource.CTE),
831
+ namespace=base_namespace,
832
+ keys=orig_concept.keys,
833
+ derivation=Derivation.ROWSET,
834
+ granularity=orig_concept.granularity,
835
+ pseudonyms={
836
+ address_with_namespace(x, rowset.name) for x in orig_concept.pseudonyms
837
+ },
838
+ )
839
+ for x in orig_concept.pseudonyms:
840
+ new_address = address_with_namespace(x, rowset.name)
841
+ origa = environment.alias_origin_lookup[x]
842
+ environment.concepts[new_address] = new_concept
843
+ environment.alias_origin_lookup[new_address] = origa.model_copy(
844
+ update={"namespace": f"{rowset.name}.{origa.namespace}"}
845
+ )
846
+ orig[orig_concept.address] = new_concept
847
+ orig_map[new_concept.address] = orig_concept
848
+ pre_output.append(new_concept)
849
+
850
+
851
+ def rowset_to_concepts(rowset: RowsetDerivationStatement, environment: Environment):
852
+ pre_output: list[Concept] = []
853
+ orig: dict[str, Concept] = {}
854
+ orig_map: dict[str, Concept] = {}
855
+ for orig_address in rowset.select.output_components:
856
+ rowset_concept(orig_address, environment, rowset, pre_output, orig, orig_map)
857
+ select_lineage = rowset.select.as_lineage(environment)
858
+ for x in pre_output:
859
+ x.lineage = RowsetItem(
860
+ content=orig_map[x.address].reference,
861
+ rowset=RowsetLineage(
862
+ name=rowset.name,
863
+ derived_concepts=[x.reference for x in pre_output],
864
+ select=select_lineage,
865
+ ),
866
+ )
867
+ default_grain = Grain.from_concepts([*pre_output])
868
+ # remap everything to the properties of the rowset
869
+ for x in pre_output:
870
+ if x.keys:
871
+ if all([k in orig for k in x.keys]):
872
+ x.keys = set([orig[k].address if k in orig else k for k in x.keys])
873
+ else:
874
+ # TODO: fix this up
875
+ x.keys = set()
876
+ if all([c in orig for c in x.grain.components]):
877
+ x.grain = Grain(components={orig[c].address for c in x.grain.components})
878
+ else:
879
+ x.grain = default_grain
880
+ return pre_output
881
+
882
+
883
+ def generate_concept_name(
884
+ parent: ARBITRARY_INPUTS,
885
+ ) -> str:
886
+ """Generate a name for a concept based on its parent type and content."""
887
+ if isinstance(parent, AggregateWrapper):
888
+ return f"{VIRTUAL_CONCEPT_PREFIX}_agg_{parent.function.operator.value}_{string_to_hash(str(parent))}"
889
+ elif isinstance(parent, WindowItem):
890
+ return f"{VIRTUAL_CONCEPT_PREFIX}_window_{parent.type.value}_{string_to_hash(str(parent))}"
891
+ elif isinstance(parent, FilterItem):
892
+ if isinstance(parent.content, ConceptRef):
893
+ return f"{VIRTUAL_CONCEPT_PREFIX}_filter_{parent.content.name}_{string_to_hash(str(parent))}"
894
+ else:
895
+ return f"{VIRTUAL_CONCEPT_PREFIX}_filter_{string_to_hash(str(parent))}"
896
+ elif isinstance(parent, Function):
897
+ if parent.operator == FunctionType.GROUP:
898
+ return f"{VIRTUAL_CONCEPT_PREFIX}_group_to_{string_to_hash(str(parent))}"
899
+ else:
900
+ return f"{VIRTUAL_CONCEPT_PREFIX}_func_{parent.operator.value}_{string_to_hash(str(parent))}"
901
+ elif isinstance(parent, Parenthetical):
902
+ return f"{VIRTUAL_CONCEPT_PREFIX}_paren_{string_to_hash(str(parent))}"
903
+ elif isinstance(parent, FunctionCallWrapper):
904
+ return f"{VIRTUAL_CONCEPT_PREFIX}_{parent.name}_{string_to_hash(str(parent))}"
905
+ elif isinstance(parent, Comparison):
906
+ return f"{VIRTUAL_CONCEPT_PREFIX}_comp_{string_to_hash(str(parent))}"
907
+ else: # ListWrapper, MapWrapper, or primitive types
908
+ return f"{VIRTUAL_CONCEPT_PREFIX}_{string_to_hash(str(parent))}"
909
+
910
+
911
+ def parenthetical_to_concept(
912
+ parent: Parenthetical,
913
+ name: str,
914
+ namespace: str,
915
+ environment: Environment,
916
+ metadata: Metadata | None = None,
917
+ ) -> Concept:
918
+ if isinstance(
919
+ parent.content,
920
+ ARBITRARY_INPUTS,
921
+ ):
922
+
923
+ return arbitrary_to_concept(
924
+ parent.content, environment, namespace, name, metadata
925
+ )
926
+ raise NotImplementedError(
927
+ f"Parenthetical with non-supported content {parent.content} ({type(parent.content)}) not yet supported"
928
+ )
929
+
930
+
931
+ def comparison_to_concept(
932
+ parent: Comparison,
933
+ name: str,
934
+ namespace: str,
935
+ environment: Environment,
936
+ metadata: Metadata | None = None,
937
+ ):
938
+ fmetadata = metadata or Metadata()
939
+
940
+ pkeys: List[Concept] = []
941
+ namespace = namespace or environment.namespace
942
+ is_metric = False
943
+ ref_args, is_metric = get_relevant_parent_concepts(parent)
944
+ concrete_args = [environment.concepts[c.address] for c in ref_args]
945
+ pkeys += [
946
+ x
947
+ for x in concrete_args
948
+ if not x.derivation == Derivation.CONSTANT
949
+ and not (x.derivation == Derivation.AGGREGATE and not x.grain.components)
950
+ ]
951
+ grain: Grain | None = Grain()
952
+ for x in pkeys:
953
+ grain += x.grain
954
+ if parent.operator in FunctionClass.ONE_TO_MANY.value:
955
+ # if the function will create more rows, we don't know what grain this is at
956
+ grain = None
957
+ modifiers = get_upstream_modifiers(pkeys, environment)
958
+ key_grain: list[str] = []
959
+ for x in pkeys:
960
+ # metrics will group to keys, so do not do key traversal
961
+ if is_metric:
962
+ key_grain.append(x.address)
963
+ # otherwse, for row ops, assume keys are transitive
964
+ elif x.keys:
965
+ key_grain += [*x.keys]
966
+ else:
967
+ key_grain.append(x.address)
968
+ keys = set(key_grain)
969
+ if is_metric:
970
+ purpose = Purpose.METRIC
971
+ elif not pkeys:
972
+ purpose = Purpose.CONSTANT
973
+ else:
974
+ purpose = Purpose.PROPERTY
975
+ fmetadata = metadata or Metadata()
976
+
977
+ if grain is not None:
978
+ r = Concept(
979
+ name=name,
980
+ datatype=parent.output_datatype,
981
+ purpose=purpose,
982
+ lineage=parent,
983
+ namespace=namespace,
984
+ keys=keys,
985
+ modifiers=modifiers,
986
+ grain=grain,
987
+ metadata=fmetadata,
988
+ derivation=Derivation.BASIC,
989
+ granularity=Granularity.MULTI_ROW,
990
+ )
991
+ return r
992
+
993
+ return Concept(
994
+ name=name,
995
+ datatype=parent.output_datatype,
996
+ purpose=purpose,
997
+ lineage=parent,
998
+ namespace=namespace,
999
+ keys=keys,
1000
+ modifiers=modifiers,
1001
+ metadata=fmetadata,
1002
+ derivation=Derivation.BASIC,
1003
+ granularity=Granularity.MULTI_ROW,
1004
+ )
1005
+
1006
+
1007
+ def arbitrary_to_concept(
1008
+ parent: ARBITRARY_INPUTS,
1009
+ environment: Environment,
1010
+ namespace: str | None = None,
1011
+ name: str | None = None,
1012
+ metadata: Metadata | None = None,
1013
+ ) -> Concept:
1014
+ namespace = namespace or environment.namespace
1015
+
1016
+ # this is purely for the parse tree, discard from derivation
1017
+ if isinstance(parent, FunctionCallWrapper):
1018
+ return arbitrary_to_concept(
1019
+ parent.content, environment, namespace, name, metadata # type: ignore
1020
+ )
1021
+
1022
+ # Generate name if not provided
1023
+ if not name:
1024
+ name = generate_concept_name(parent)
1025
+
1026
+ if isinstance(parent, AggregateWrapper):
1027
+ return agg_wrapper_to_concept(
1028
+ parent, namespace, name, metadata=metadata, environment=environment
1029
+ )
1030
+ elif isinstance(parent, WindowItem):
1031
+ return window_item_to_concept(
1032
+ parent,
1033
+ name,
1034
+ namespace,
1035
+ environment=environment,
1036
+ metadata=metadata,
1037
+ )
1038
+ elif isinstance(parent, FilterItem):
1039
+ return filter_item_to_concept(
1040
+ parent,
1041
+ name,
1042
+ namespace,
1043
+ environment=environment,
1044
+ metadata=metadata,
1045
+ )
1046
+ elif isinstance(parent, Function):
1047
+ if parent.operator == FunctionType.GROUP:
1048
+ return group_function_to_concept(
1049
+ parent,
1050
+ name,
1051
+ environment=environment,
1052
+ namespace=namespace,
1053
+ metadata=metadata,
1054
+ )
1055
+ return function_to_concept(
1056
+ parent,
1057
+ name,
1058
+ metadata=metadata,
1059
+ environment=environment,
1060
+ namespace=namespace,
1061
+ )
1062
+ elif isinstance(parent, ListWrapper):
1063
+ return constant_to_concept(parent, name, namespace, metadata)
1064
+ elif isinstance(parent, Parenthetical):
1065
+ return parenthetical_to_concept(parent, name, namespace, environment, metadata)
1066
+ elif isinstance(parent, Comparison):
1067
+ return comparison_to_concept(parent, name, namespace, environment, metadata)
1068
+ else:
1069
+ return constant_to_concept(parent, name, namespace, metadata)