pytrilogy 0.3.149__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (207) hide show
  1. LICENSE.md +19 -0
  2. _preql_import_resolver/__init__.py +5 -0
  3. _preql_import_resolver/_preql_import_resolver.cp313-win_amd64.pyd +0 -0
  4. pytrilogy-0.3.149.dist-info/METADATA +555 -0
  5. pytrilogy-0.3.149.dist-info/RECORD +207 -0
  6. pytrilogy-0.3.149.dist-info/WHEEL +4 -0
  7. pytrilogy-0.3.149.dist-info/entry_points.txt +2 -0
  8. pytrilogy-0.3.149.dist-info/licenses/LICENSE.md +19 -0
  9. trilogy/__init__.py +27 -0
  10. trilogy/ai/README.md +10 -0
  11. trilogy/ai/__init__.py +19 -0
  12. trilogy/ai/constants.py +92 -0
  13. trilogy/ai/conversation.py +107 -0
  14. trilogy/ai/enums.py +7 -0
  15. trilogy/ai/execute.py +50 -0
  16. trilogy/ai/models.py +34 -0
  17. trilogy/ai/prompts.py +100 -0
  18. trilogy/ai/providers/__init__.py +0 -0
  19. trilogy/ai/providers/anthropic.py +106 -0
  20. trilogy/ai/providers/base.py +24 -0
  21. trilogy/ai/providers/google.py +146 -0
  22. trilogy/ai/providers/openai.py +89 -0
  23. trilogy/ai/providers/utils.py +68 -0
  24. trilogy/authoring/README.md +3 -0
  25. trilogy/authoring/__init__.py +148 -0
  26. trilogy/constants.py +119 -0
  27. trilogy/core/README.md +52 -0
  28. trilogy/core/__init__.py +0 -0
  29. trilogy/core/constants.py +6 -0
  30. trilogy/core/enums.py +454 -0
  31. trilogy/core/env_processor.py +239 -0
  32. trilogy/core/environment_helpers.py +320 -0
  33. trilogy/core/ergonomics.py +193 -0
  34. trilogy/core/exceptions.py +123 -0
  35. trilogy/core/functions.py +1240 -0
  36. trilogy/core/graph_models.py +142 -0
  37. trilogy/core/internal.py +85 -0
  38. trilogy/core/models/__init__.py +0 -0
  39. trilogy/core/models/author.py +2670 -0
  40. trilogy/core/models/build.py +2603 -0
  41. trilogy/core/models/build_environment.py +165 -0
  42. trilogy/core/models/core.py +506 -0
  43. trilogy/core/models/datasource.py +436 -0
  44. trilogy/core/models/environment.py +756 -0
  45. trilogy/core/models/execute.py +1213 -0
  46. trilogy/core/optimization.py +251 -0
  47. trilogy/core/optimizations/__init__.py +12 -0
  48. trilogy/core/optimizations/base_optimization.py +17 -0
  49. trilogy/core/optimizations/hide_unused_concept.py +47 -0
  50. trilogy/core/optimizations/inline_datasource.py +102 -0
  51. trilogy/core/optimizations/predicate_pushdown.py +245 -0
  52. trilogy/core/processing/README.md +94 -0
  53. trilogy/core/processing/READMEv2.md +121 -0
  54. trilogy/core/processing/VIRTUAL_UNNEST.md +30 -0
  55. trilogy/core/processing/__init__.py +0 -0
  56. trilogy/core/processing/concept_strategies_v3.py +508 -0
  57. trilogy/core/processing/constants.py +15 -0
  58. trilogy/core/processing/discovery_node_factory.py +451 -0
  59. trilogy/core/processing/discovery_utility.py +548 -0
  60. trilogy/core/processing/discovery_validation.py +167 -0
  61. trilogy/core/processing/graph_utils.py +43 -0
  62. trilogy/core/processing/node_generators/README.md +9 -0
  63. trilogy/core/processing/node_generators/__init__.py +31 -0
  64. trilogy/core/processing/node_generators/basic_node.py +160 -0
  65. trilogy/core/processing/node_generators/common.py +270 -0
  66. trilogy/core/processing/node_generators/constant_node.py +38 -0
  67. trilogy/core/processing/node_generators/filter_node.py +315 -0
  68. trilogy/core/processing/node_generators/group_node.py +213 -0
  69. trilogy/core/processing/node_generators/group_to_node.py +117 -0
  70. trilogy/core/processing/node_generators/multiselect_node.py +207 -0
  71. trilogy/core/processing/node_generators/node_merge_node.py +695 -0
  72. trilogy/core/processing/node_generators/recursive_node.py +88 -0
  73. trilogy/core/processing/node_generators/rowset_node.py +165 -0
  74. trilogy/core/processing/node_generators/select_helpers/__init__.py +0 -0
  75. trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +261 -0
  76. trilogy/core/processing/node_generators/select_merge_node.py +846 -0
  77. trilogy/core/processing/node_generators/select_node.py +95 -0
  78. trilogy/core/processing/node_generators/synonym_node.py +98 -0
  79. trilogy/core/processing/node_generators/union_node.py +91 -0
  80. trilogy/core/processing/node_generators/unnest_node.py +182 -0
  81. trilogy/core/processing/node_generators/window_node.py +201 -0
  82. trilogy/core/processing/nodes/README.md +28 -0
  83. trilogy/core/processing/nodes/__init__.py +179 -0
  84. trilogy/core/processing/nodes/base_node.py +522 -0
  85. trilogy/core/processing/nodes/filter_node.py +75 -0
  86. trilogy/core/processing/nodes/group_node.py +194 -0
  87. trilogy/core/processing/nodes/merge_node.py +420 -0
  88. trilogy/core/processing/nodes/recursive_node.py +46 -0
  89. trilogy/core/processing/nodes/select_node_v2.py +242 -0
  90. trilogy/core/processing/nodes/union_node.py +53 -0
  91. trilogy/core/processing/nodes/unnest_node.py +62 -0
  92. trilogy/core/processing/nodes/window_node.py +56 -0
  93. trilogy/core/processing/utility.py +823 -0
  94. trilogy/core/query_processor.py +604 -0
  95. trilogy/core/statements/README.md +35 -0
  96. trilogy/core/statements/__init__.py +0 -0
  97. trilogy/core/statements/author.py +536 -0
  98. trilogy/core/statements/build.py +0 -0
  99. trilogy/core/statements/common.py +20 -0
  100. trilogy/core/statements/execute.py +155 -0
  101. trilogy/core/table_processor.py +66 -0
  102. trilogy/core/utility.py +8 -0
  103. trilogy/core/validation/README.md +46 -0
  104. trilogy/core/validation/__init__.py +0 -0
  105. trilogy/core/validation/common.py +161 -0
  106. trilogy/core/validation/concept.py +146 -0
  107. trilogy/core/validation/datasource.py +227 -0
  108. trilogy/core/validation/environment.py +73 -0
  109. trilogy/core/validation/fix.py +256 -0
  110. trilogy/dialect/__init__.py +32 -0
  111. trilogy/dialect/base.py +1432 -0
  112. trilogy/dialect/bigquery.py +314 -0
  113. trilogy/dialect/common.py +147 -0
  114. trilogy/dialect/config.py +159 -0
  115. trilogy/dialect/dataframe.py +50 -0
  116. trilogy/dialect/duckdb.py +397 -0
  117. trilogy/dialect/enums.py +151 -0
  118. trilogy/dialect/metadata.py +173 -0
  119. trilogy/dialect/mock.py +190 -0
  120. trilogy/dialect/postgres.py +117 -0
  121. trilogy/dialect/presto.py +110 -0
  122. trilogy/dialect/results.py +89 -0
  123. trilogy/dialect/snowflake.py +129 -0
  124. trilogy/dialect/sql_server.py +137 -0
  125. trilogy/engine.py +48 -0
  126. trilogy/execution/__init__.py +17 -0
  127. trilogy/execution/config.py +119 -0
  128. trilogy/execution/state/__init__.py +0 -0
  129. trilogy/execution/state/exceptions.py +26 -0
  130. trilogy/execution/state/file_state_store.py +0 -0
  131. trilogy/execution/state/sqllite_state_store.py +0 -0
  132. trilogy/execution/state/state_store.py +406 -0
  133. trilogy/executor.py +692 -0
  134. trilogy/hooks/__init__.py +4 -0
  135. trilogy/hooks/base_hook.py +40 -0
  136. trilogy/hooks/graph_hook.py +135 -0
  137. trilogy/hooks/query_debugger.py +166 -0
  138. trilogy/metadata/__init__.py +0 -0
  139. trilogy/parser.py +10 -0
  140. trilogy/parsing/README.md +21 -0
  141. trilogy/parsing/__init__.py +0 -0
  142. trilogy/parsing/common.py +1069 -0
  143. trilogy/parsing/config.py +5 -0
  144. trilogy/parsing/exceptions.py +8 -0
  145. trilogy/parsing/helpers.py +1 -0
  146. trilogy/parsing/parse_engine.py +2876 -0
  147. trilogy/parsing/render.py +775 -0
  148. trilogy/parsing/trilogy.lark +546 -0
  149. trilogy/py.typed +0 -0
  150. trilogy/render.py +45 -0
  151. trilogy/scripts/README.md +9 -0
  152. trilogy/scripts/__init__.py +0 -0
  153. trilogy/scripts/agent.py +41 -0
  154. trilogy/scripts/agent_info.py +306 -0
  155. trilogy/scripts/common.py +432 -0
  156. trilogy/scripts/dependency/Cargo.lock +617 -0
  157. trilogy/scripts/dependency/Cargo.toml +39 -0
  158. trilogy/scripts/dependency/README.md +131 -0
  159. trilogy/scripts/dependency/build.sh +25 -0
  160. trilogy/scripts/dependency/src/directory_resolver.rs +387 -0
  161. trilogy/scripts/dependency/src/lib.rs +16 -0
  162. trilogy/scripts/dependency/src/main.rs +770 -0
  163. trilogy/scripts/dependency/src/parser.rs +435 -0
  164. trilogy/scripts/dependency/src/preql.pest +208 -0
  165. trilogy/scripts/dependency/src/python_bindings.rs +311 -0
  166. trilogy/scripts/dependency/src/resolver.rs +716 -0
  167. trilogy/scripts/dependency/tests/base.preql +3 -0
  168. trilogy/scripts/dependency/tests/cli_integration.rs +377 -0
  169. trilogy/scripts/dependency/tests/customer.preql +6 -0
  170. trilogy/scripts/dependency/tests/main.preql +9 -0
  171. trilogy/scripts/dependency/tests/orders.preql +7 -0
  172. trilogy/scripts/dependency/tests/test_data/base.preql +9 -0
  173. trilogy/scripts/dependency/tests/test_data/consumer.preql +1 -0
  174. trilogy/scripts/dependency.py +323 -0
  175. trilogy/scripts/display.py +555 -0
  176. trilogy/scripts/environment.py +59 -0
  177. trilogy/scripts/fmt.py +32 -0
  178. trilogy/scripts/ingest.py +487 -0
  179. trilogy/scripts/ingest_helpers/__init__.py +1 -0
  180. trilogy/scripts/ingest_helpers/foreign_keys.py +123 -0
  181. trilogy/scripts/ingest_helpers/formatting.py +93 -0
  182. trilogy/scripts/ingest_helpers/typing.py +161 -0
  183. trilogy/scripts/init.py +105 -0
  184. trilogy/scripts/parallel_execution.py +762 -0
  185. trilogy/scripts/plan.py +189 -0
  186. trilogy/scripts/refresh.py +161 -0
  187. trilogy/scripts/run.py +79 -0
  188. trilogy/scripts/serve.py +202 -0
  189. trilogy/scripts/serve_helpers/__init__.py +41 -0
  190. trilogy/scripts/serve_helpers/file_discovery.py +142 -0
  191. trilogy/scripts/serve_helpers/index_generation.py +206 -0
  192. trilogy/scripts/serve_helpers/models.py +38 -0
  193. trilogy/scripts/single_execution.py +131 -0
  194. trilogy/scripts/testing.py +143 -0
  195. trilogy/scripts/trilogy.py +75 -0
  196. trilogy/std/__init__.py +0 -0
  197. trilogy/std/color.preql +3 -0
  198. trilogy/std/date.preql +13 -0
  199. trilogy/std/display.preql +18 -0
  200. trilogy/std/geography.preql +22 -0
  201. trilogy/std/metric.preql +15 -0
  202. trilogy/std/money.preql +67 -0
  203. trilogy/std/net.preql +14 -0
  204. trilogy/std/ranking.preql +7 -0
  205. trilogy/std/report.preql +5 -0
  206. trilogy/std/semantic.preql +6 -0
  207. trilogy/utility.py +34 -0
@@ -0,0 +1,239 @@
1
+ from dataclasses import dataclass
2
+ from typing import Iterator
3
+
4
+ from trilogy.core.enums import Derivation
5
+ from trilogy.core.graph_models import (
6
+ ReferenceGraph,
7
+ concept_to_node,
8
+ datasource_to_node,
9
+ )
10
+ from trilogy.core.models.build import BuildConcept, BuildDatasource
11
+ from trilogy.core.models.build_environment import BuildEnvironment
12
+
13
+
14
+ @dataclass(slots=True)
15
+ class BasicConceptGraph:
16
+ """Pre-computed dependency graph for BASIC derived concepts."""
17
+
18
+ # concept address -> concept
19
+ concepts: dict[str, BuildConcept]
20
+ # concept address -> set of input addresses required
21
+ dependencies: dict[str, frozenset[str]]
22
+ # input address -> concepts that depend on it
23
+ reverse_deps: dict[str, list[str]]
24
+ # concepts with no dependencies (can be computed from any base concept)
25
+ roots: list[str]
26
+
27
+
28
+ def build_basic_concept_graph(concepts: list[BuildConcept]) -> BasicConceptGraph:
29
+ """Build dependency graph for BASIC derived concepts.
30
+
31
+ Returns a structure that allows efficient single-pass computation
32
+ of which derived concepts can be added to a datasource.
33
+ """
34
+ concept_map: dict[str, BuildConcept] = {}
35
+ dependencies: dict[str, frozenset[str]] = {}
36
+ reverse_deps: dict[str, list[str]] = {}
37
+ roots: list[str] = []
38
+
39
+ for concept in concepts:
40
+ if concept.derivation != Derivation.BASIC or not concept.concept_arguments:
41
+ continue
42
+
43
+ addr = concept.canonical_address
44
+ concept_map[addr] = concept
45
+ input_addrs = frozenset(c.canonical_address for c in concept.concept_arguments)
46
+ dependencies[addr] = input_addrs
47
+
48
+ # Build reverse dependency map
49
+ for input_addr in input_addrs:
50
+ if input_addr not in reverse_deps:
51
+ reverse_deps[input_addr] = []
52
+ reverse_deps[input_addr].append(addr)
53
+
54
+ # Find roots - concepts whose inputs are all non-BASIC
55
+ for addr, deps in dependencies.items():
56
+ if all(d not in dependencies for d in deps):
57
+ roots.append(addr)
58
+
59
+ return BasicConceptGraph(
60
+ concepts=concept_map,
61
+ dependencies=dependencies,
62
+ reverse_deps=reverse_deps,
63
+ roots=roots,
64
+ )
65
+
66
+
67
+ def get_derivable_concepts(
68
+ graph: BasicConceptGraph,
69
+ available: set[str],
70
+ already_present: set[str],
71
+ ) -> Iterator[BuildConcept]:
72
+ """Yield concepts derivable from available concepts in topological order.
73
+
74
+ Uses the pre-computed dependency graph to traverse in a single pass,
75
+ yielding each concept as soon as its dependencies are satisfied.
76
+
77
+ Args:
78
+ graph: Pre-computed dependency graph for BASIC concepts
79
+ available: Set of canonical addresses of complete concepts (will be mutated)
80
+ already_present: Set of canonical addresses already in the datasource (skip these)
81
+ """
82
+ if not graph.roots:
83
+ return
84
+
85
+ # Track which concepts we've already processed
86
+ processed: set[str] = set()
87
+ # Queue of concept addresses to check
88
+ to_check: list[str] = list(graph.roots)
89
+
90
+ while to_check:
91
+ addr = to_check.pop()
92
+ if addr in processed:
93
+ continue
94
+
95
+ deps = graph.dependencies[addr]
96
+ if deps.issubset(available):
97
+ processed.add(addr)
98
+ available.add(addr)
99
+
100
+ # Only yield if not already in the datasource
101
+ if addr not in already_present:
102
+ yield graph.concepts[addr]
103
+
104
+ # Add concepts that depend on this one to the check queue
105
+ for dependent in graph.reverse_deps.get(addr, []):
106
+ if dependent not in processed:
107
+ to_check.append(dependent)
108
+
109
+
110
+ def add_concept(
111
+ concept: BuildConcept,
112
+ g: ReferenceGraph,
113
+ concept_mapping: dict[str, BuildConcept],
114
+ default_concept_graph: dict[str, BuildConcept],
115
+ seen: set[str],
116
+ node_stash: dict[str, str] = {},
117
+ ):
118
+
119
+ # if we have sources, recursively add them
120
+ node_name = concept_to_node(concept, node_stash)
121
+ if node_name in seen:
122
+ return
123
+ seen.add(node_name)
124
+ g.concepts[node_name] = concept
125
+ g.add_node(node_name)
126
+ root_name = node_name.split("@", 1)[0]
127
+ if concept.concept_arguments:
128
+ for source in concept.concept_arguments:
129
+ if not isinstance(source, BuildConcept):
130
+ raise ValueError(
131
+ f"Invalid non-build concept {source} passed into graph generation from {concept}"
132
+ )
133
+ generic = get_default_grain_concept(source, default_concept_graph)
134
+ generic_node = concept_to_node(generic, stash=node_stash)
135
+ add_concept(
136
+ generic, g, concept_mapping, default_concept_graph, seen, node_stash
137
+ )
138
+
139
+ g.add_edge(generic_node, node_name)
140
+ for ps_address in concept.pseudonyms:
141
+ if ps_address not in concept_mapping:
142
+ raise SyntaxError(f"Concept {concept} has invalid pseudonym {ps_address}")
143
+ pseudonym = concept_mapping[ps_address]
144
+ pseudonym = get_default_grain_concept(pseudonym, default_concept_graph)
145
+ pseudonym_node = concept_to_node(pseudonym, stash=node_stash)
146
+ if (pseudonym_node, node_name) in g.edges and (
147
+ node_name,
148
+ pseudonym_node,
149
+ ) in g.edges:
150
+ continue
151
+ if pseudonym_node.split("@", 1)[0] == root_name:
152
+ continue
153
+ g.add_edge(pseudonym_node, node_name)
154
+ g.add_edge(node_name, pseudonym_node)
155
+ g.pseudonyms.add((pseudonym_node, node_name))
156
+ g.pseudonyms.add((node_name, pseudonym_node))
157
+ add_concept(
158
+ pseudonym, g, concept_mapping, default_concept_graph, seen, node_stash
159
+ )
160
+
161
+
162
+ def get_default_grain_concept(
163
+ concept: BuildConcept, default_concept_graph: dict[str, BuildConcept]
164
+ ) -> BuildConcept:
165
+ """Get the default grain concept from the graph."""
166
+ if concept.address in default_concept_graph:
167
+ return default_concept_graph[concept.address]
168
+ default = concept.with_default_grain()
169
+ default_concept_graph[concept.address] = default
170
+ return default
171
+
172
+
173
+ def generate_adhoc_graph(
174
+ concepts: list[BuildConcept],
175
+ datasources: list[BuildDatasource],
176
+ default_concept_graph: dict[str, BuildConcept],
177
+ ) -> ReferenceGraph:
178
+ g = ReferenceGraph()
179
+ concept_mapping = {x.address: x for x in concepts}
180
+ node_stash: dict[str, str] = {}
181
+ seen: set[str] = set()
182
+ for concept in concepts:
183
+ if not isinstance(concept, BuildConcept):
184
+ raise ValueError(f"Invalid non-build concept {concept}")
185
+
186
+ # add all parsed concepts
187
+ for concept in concepts:
188
+ add_concept(
189
+ concept, g, concept_mapping, default_concept_graph, seen, node_stash
190
+ )
191
+
192
+ basic_graph = build_basic_concept_graph(concepts)
193
+
194
+ for dataset in datasources:
195
+ node = datasource_to_node(dataset)
196
+ g.datasources[node] = dataset
197
+ g.add_datasource_node(node, dataset)
198
+ eligible = dataset.concepts
199
+ already_present = set(x.canonical_address for x in eligible)
200
+ complete_contains = set(
201
+ c.concept.canonical_address for c in dataset.columns if c.is_complete
202
+ )
203
+ for derived in get_derivable_concepts(
204
+ basic_graph, complete_contains, already_present
205
+ ):
206
+ eligible.append(derived)
207
+
208
+ for concept in eligible:
209
+ cnode = concept_to_node(concept, node_stash)
210
+ g.concepts[cnode] = concept
211
+
212
+ g.add_node(cnode)
213
+ g.add_edge(node, cnode)
214
+ g.add_edge(cnode, node)
215
+ # if there is a key on a table at a different grain
216
+ # add an FK edge to the canonical source, if it exists
217
+ # for example, order ID on order product table
218
+ default = get_default_grain_concept(concept, default_concept_graph)
219
+
220
+ if concept != default:
221
+
222
+ dcnode = concept_to_node(default, node_stash)
223
+ g.concepts[dcnode] = default
224
+ g.add_node(dcnode)
225
+ g.add_edge(cnode, dcnode)
226
+ g.add_edge(dcnode, cnode)
227
+ return g
228
+
229
+
230
+ def generate_graph(
231
+ environment: BuildEnvironment,
232
+ ) -> ReferenceGraph:
233
+ default_concept_graph: dict[str, BuildConcept] = {}
234
+ return generate_adhoc_graph(
235
+ list(environment.concepts.values())
236
+ + list(environment.alias_origin_lookup.values()),
237
+ list(environment.datasources.values()),
238
+ default_concept_graph=default_concept_graph,
239
+ )
@@ -0,0 +1,320 @@
1
+ from trilogy.constants import DEFAULT_NAMESPACE
2
+ from trilogy.core.enums import ConceptSource, DatePart, FunctionType, Purpose
3
+ from trilogy.core.functions import AttrAccess
4
+ from trilogy.core.models.author import Concept, Function, Grain, Metadata, TraitDataType
5
+ from trilogy.core.models.core import DataType, StructType, arg_to_datatype
6
+ from trilogy.core.models.environment import Environment
7
+ from trilogy.parsing.common import Meta
8
+
9
+
10
+ def generate_date_concepts(concept: Concept, environment: Environment):
11
+ if concept.metadata and concept.metadata.line_number:
12
+ base_line_number = concept.metadata.line_number
13
+ else:
14
+ base_line_number = None
15
+ arg_tuples: list[tuple[FunctionType, TraitDataType]] = [
16
+ (FunctionType.MONTH, TraitDataType(type=DataType.INTEGER, traits=["month"])),
17
+ (FunctionType.YEAR, TraitDataType(type=DataType.INTEGER, traits=["year"])),
18
+ (
19
+ FunctionType.QUARTER,
20
+ TraitDataType(type=DataType.INTEGER, traits=["quarter"]),
21
+ ),
22
+ (FunctionType.DAY, TraitDataType(type=DataType.INTEGER, traits=["day"])),
23
+ (
24
+ FunctionType.DAY_OF_WEEK,
25
+ TraitDataType(type=DataType.INTEGER, traits=["day_of_week"]),
26
+ ),
27
+ ]
28
+ for ftype, dtype in arg_tuples:
29
+ fname = ftype.name.lower()
30
+ address = concept.address + f".{fname}"
31
+ if address in environment.concepts:
32
+ continue
33
+ default_type = (
34
+ Purpose.CONSTANT
35
+ if concept.purpose == Purpose.CONSTANT
36
+ else Purpose.PROPERTY
37
+ )
38
+ function = Function.model_construct(
39
+ operator=ftype,
40
+ arguments=[concept.reference],
41
+ output_datatype=dtype,
42
+ output_purpose=default_type,
43
+ )
44
+ new_concept = Concept.model_construct(
45
+ name=f"{concept.name}.{fname}",
46
+ datatype=function.output_datatype,
47
+ purpose=default_type,
48
+ lineage=function,
49
+ grain=concept.grain,
50
+ namespace=concept.namespace,
51
+ keys=set(
52
+ [concept.address],
53
+ ),
54
+ metadata=Metadata(
55
+ line_number=base_line_number,
56
+ concept_source=ConceptSource.AUTO_DERIVED,
57
+ ),
58
+ )
59
+ environment.add_concept(new_concept, add_derived=False)
60
+ for grain in [DatePart.MONTH, DatePart.YEAR]:
61
+ address = concept.address + f".{grain.value}_start"
62
+ if address in environment.concepts:
63
+ continue
64
+ function = Function.model_construct(
65
+ operator=FunctionType.DATE_TRUNCATE,
66
+ arguments=[concept.reference, grain],
67
+ output_datatype=DataType.DATE,
68
+ output_purpose=default_type,
69
+ arg_count=2,
70
+ )
71
+ new_concept = Concept.model_construct(
72
+ name=f"{concept.name}.{grain.value}_start",
73
+ datatype=DataType.DATE,
74
+ purpose=Purpose.PROPERTY,
75
+ lineage=function,
76
+ grain=concept.grain.model_copy(),
77
+ namespace=concept.namespace,
78
+ keys=set(
79
+ [concept.address],
80
+ ),
81
+ metadata=Metadata(
82
+ # description=f"Auto-derived from {base_description}. The date truncated to the {grain.value}.",
83
+ line_number=base_line_number,
84
+ concept_source=ConceptSource.AUTO_DERIVED,
85
+ ),
86
+ )
87
+
88
+ environment.add_concept(new_concept, add_derived=False)
89
+
90
+
91
+ def generate_datetime_concepts(concept: Concept, environment: Environment):
92
+ if concept.metadata and concept.metadata.line_number:
93
+ base_line_number = concept.metadata.line_number
94
+ else:
95
+ base_line_number = None
96
+ setup_tuples: list[tuple[FunctionType, DataType | TraitDataType]] = [
97
+ (FunctionType.DATE, DataType.DATE),
98
+ (FunctionType.HOUR, TraitDataType(type=DataType.INTEGER, traits=["hour"])),
99
+ (FunctionType.MINUTE, TraitDataType(type=DataType.INTEGER, traits=["minute"])),
100
+ (FunctionType.SECOND, TraitDataType(type=DataType.INTEGER, traits=["second"])),
101
+ ]
102
+ for ftype, datatype in setup_tuples:
103
+ fname = ftype.name.lower()
104
+ address = concept.address + f".{fname}"
105
+ if address in environment.concepts:
106
+ continue
107
+ default_type = (
108
+ Purpose.CONSTANT
109
+ if concept.purpose == Purpose.CONSTANT
110
+ else Purpose.PROPERTY
111
+ )
112
+ const_function = Function.model_construct(
113
+ operator=ftype,
114
+ arguments=[concept.reference],
115
+ output_datatype=datatype,
116
+ output_purpose=default_type,
117
+ )
118
+ new_concept = Concept.model_construct(
119
+ name=f"{concept.name}.{fname}",
120
+ datatype=datatype,
121
+ purpose=default_type,
122
+ lineage=const_function,
123
+ grain=concept.grain.model_copy(),
124
+ namespace=concept.namespace,
125
+ keys=set(
126
+ [concept.address],
127
+ ),
128
+ metadata=Metadata(
129
+ line_number=base_line_number,
130
+ concept_source=ConceptSource.AUTO_DERIVED,
131
+ ),
132
+ )
133
+ if new_concept.name in environment.concepts:
134
+ continue
135
+ environment.add_concept(new_concept, add_derived=False)
136
+
137
+
138
+ def generate_key_concepts(concept: Concept, environment: Environment):
139
+ if concept.metadata and concept.metadata.line_number:
140
+ base_line_number = concept.metadata.line_number
141
+ else:
142
+ base_line_number = None
143
+ for ftype in [FunctionType.COUNT]:
144
+ address = concept.address + f".{ftype.name.lower()}"
145
+ if address in environment.concepts:
146
+ continue
147
+ fname = ftype.name.lower()
148
+ default_type = Purpose.METRIC
149
+ const_function: Function = Function.model_construct(
150
+ operator=ftype,
151
+ output_datatype=DataType.INTEGER,
152
+ output_purpose=default_type,
153
+ arguments=[concept.reference],
154
+ )
155
+ new_concept = Concept.model_construct(
156
+ name=f"{concept.name}.{fname}",
157
+ datatype=DataType.INTEGER,
158
+ purpose=default_type,
159
+ lineage=const_function,
160
+ grain=Grain(),
161
+ namespace=concept.namespace,
162
+ keys=set(),
163
+ metadata=Metadata(
164
+ # description=f"Auto-derived integer. The {ftype.value} of {concept.address}, {base_description}",
165
+ line_number=base_line_number,
166
+ concept_source=ConceptSource.AUTO_DERIVED,
167
+ ),
168
+ )
169
+ environment.add_concept(new_concept, add_derived=False)
170
+
171
+
172
+ def remove_date_concepts(concept: Concept, environment: Environment):
173
+ """Remove auto-generated date-related concepts for the given concept"""
174
+ date_suffixes = ["month", "year", "quarter", "day", "day_of_week"]
175
+ grain_suffixes = ["month_start", "year_start"]
176
+
177
+ for suffix in date_suffixes + grain_suffixes:
178
+ address = concept.address + f".{suffix}"
179
+ if address in environment.concepts:
180
+ derived_concept = environment.concepts[address]
181
+ # Only remove if it was auto-derived from this concept
182
+ if (
183
+ derived_concept.metadata
184
+ and derived_concept.metadata.concept_source
185
+ == ConceptSource.AUTO_DERIVED
186
+ and derived_concept.keys
187
+ and concept.address in derived_concept.keys
188
+ ):
189
+ environment.remove_concept(address)
190
+
191
+
192
+ def remove_datetime_concepts(concept: Concept, environment: Environment):
193
+ """Remove auto-generated datetime-related concepts for the given concept"""
194
+ datetime_suffixes = ["date", "hour", "minute", "second"]
195
+
196
+ for suffix in datetime_suffixes:
197
+ address = concept.address + f".{suffix}"
198
+ if address in environment.concepts:
199
+ derived_concept = environment.concepts[address]
200
+ # Only remove if it was auto-derived from this concept
201
+ if (
202
+ derived_concept.metadata
203
+ and derived_concept.metadata.concept_source
204
+ == ConceptSource.AUTO_DERIVED
205
+ and derived_concept.keys
206
+ and concept.address in derived_concept.keys
207
+ ):
208
+ environment.remove_concept(address)
209
+
210
+
211
+ def remove_key_concepts(concept: Concept, environment: Environment):
212
+ """Remove auto-generated key-related concepts for the given concept"""
213
+ key_suffixes = ["count"]
214
+
215
+ for suffix in key_suffixes:
216
+ address = concept.address + f".{suffix}"
217
+ if address in environment.concepts:
218
+ derived_concept = environment.concepts[address]
219
+ if (
220
+ derived_concept.metadata
221
+ and derived_concept.metadata.concept_source
222
+ == ConceptSource.AUTO_DERIVED
223
+ ):
224
+ environment.remove_concept(address)
225
+
226
+
227
+ def remove_struct_concepts(concept: Concept, environment: Environment):
228
+ """Remove auto-generated struct field concepts for the given concept"""
229
+ if not isinstance(concept.datatype, StructType):
230
+ return
231
+
232
+ target_namespace = (
233
+ environment.namespace + "." + concept.name
234
+ if environment.namespace and environment.namespace != DEFAULT_NAMESPACE
235
+ else concept.name
236
+ )
237
+
238
+ # Get all concepts in the target namespace that were auto-derived
239
+ concepts_to_remove = []
240
+ for address, derived_concept in environment.concepts.items():
241
+ if (
242
+ derived_concept.namespace == target_namespace
243
+ and derived_concept.metadata
244
+ and derived_concept.metadata.concept_source == ConceptSource.AUTO_DERIVED
245
+ and isinstance(derived_concept.lineage, Function)
246
+ and derived_concept.lineage.operator == FunctionType.ATTR_ACCESS
247
+ and len(derived_concept.lineage.arguments) >= 1
248
+ and derived_concept.lineage.arguments[0] == concept.reference
249
+ ):
250
+ concepts_to_remove.append(address)
251
+
252
+ for address in concepts_to_remove:
253
+ environment.remove_concept(address)
254
+
255
+
256
+ def remove_related_concepts(concept: Concept, environment: Environment):
257
+ """Remove all auto-generated concepts that were derived from the given concept"""
258
+
259
+ # Remove key-related concepts
260
+ if concept.purpose == Purpose.KEY:
261
+ remove_key_concepts(concept, environment)
262
+
263
+ # Remove datatype-specific concepts
264
+ if concept.datatype == DataType.DATE:
265
+ remove_date_concepts(concept, environment)
266
+ elif concept.datatype == DataType.DATETIME:
267
+ remove_date_concepts(concept, environment)
268
+ remove_datetime_concepts(concept, environment)
269
+ elif concept.datatype == DataType.TIMESTAMP:
270
+ remove_date_concepts(concept, environment)
271
+ remove_datetime_concepts(concept, environment)
272
+
273
+ # Remove struct field concepts
274
+ if isinstance(concept.datatype, StructType):
275
+ remove_struct_concepts(concept, environment)
276
+
277
+
278
+ def generate_related_concepts(
279
+ concept: Concept,
280
+ environment: Environment,
281
+ meta: Meta | None = None,
282
+ add_derived: bool = False,
283
+ ):
284
+ """Auto populate common derived concepts on types"""
285
+ if concept.purpose == Purpose.KEY and add_derived:
286
+ generate_key_concepts(concept, environment)
287
+
288
+ # datatype types
289
+ if concept.datatype == DataType.DATE and add_derived:
290
+ generate_date_concepts(concept, environment)
291
+ elif concept.datatype == DataType.DATETIME and add_derived:
292
+
293
+ generate_date_concepts(concept, environment)
294
+ generate_datetime_concepts(concept, environment)
295
+ elif concept.datatype == DataType.TIMESTAMP and add_derived:
296
+ generate_date_concepts(concept, environment)
297
+ generate_datetime_concepts(concept, environment)
298
+
299
+ if isinstance(concept.datatype, StructType):
300
+ for key, value in concept.datatype.fields_map.items():
301
+ auto = Concept.model_construct(
302
+ name=key,
303
+ datatype=arg_to_datatype(value),
304
+ purpose=Purpose.PROPERTY,
305
+ namespace=(
306
+ environment.namespace + "." + concept.name
307
+ if environment.namespace
308
+ and environment.namespace != DEFAULT_NAMESPACE
309
+ else concept.name
310
+ ),
311
+ lineage=AttrAccess([concept.reference, key], environment=environment),
312
+ grain=concept.grain,
313
+ metadata=Metadata(
314
+ concept_source=ConceptSource.AUTO_DERIVED,
315
+ ),
316
+ keys=concept.keys,
317
+ )
318
+ environment.add_concept(auto, meta=meta)
319
+ if isinstance(value, Concept):
320
+ environment.merge_concept(auto, value, modifiers=[])