pytrilogy 0.3.138__cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (182) hide show
  1. LICENSE.md +19 -0
  2. _preql_import_resolver/__init__.py +5 -0
  3. _preql_import_resolver/_preql_import_resolver.cpython-311-x86_64-linux-gnu.so +0 -0
  4. pytrilogy-0.3.138.dist-info/METADATA +525 -0
  5. pytrilogy-0.3.138.dist-info/RECORD +182 -0
  6. pytrilogy-0.3.138.dist-info/WHEEL +5 -0
  7. pytrilogy-0.3.138.dist-info/entry_points.txt +2 -0
  8. pytrilogy-0.3.138.dist-info/licenses/LICENSE.md +19 -0
  9. trilogy/__init__.py +9 -0
  10. trilogy/ai/README.md +10 -0
  11. trilogy/ai/__init__.py +19 -0
  12. trilogy/ai/constants.py +92 -0
  13. trilogy/ai/conversation.py +107 -0
  14. trilogy/ai/enums.py +7 -0
  15. trilogy/ai/execute.py +50 -0
  16. trilogy/ai/models.py +34 -0
  17. trilogy/ai/prompts.py +87 -0
  18. trilogy/ai/providers/__init__.py +0 -0
  19. trilogy/ai/providers/anthropic.py +106 -0
  20. trilogy/ai/providers/base.py +24 -0
  21. trilogy/ai/providers/google.py +146 -0
  22. trilogy/ai/providers/openai.py +89 -0
  23. trilogy/ai/providers/utils.py +68 -0
  24. trilogy/authoring/README.md +3 -0
  25. trilogy/authoring/__init__.py +143 -0
  26. trilogy/constants.py +113 -0
  27. trilogy/core/README.md +52 -0
  28. trilogy/core/__init__.py +0 -0
  29. trilogy/core/constants.py +6 -0
  30. trilogy/core/enums.py +443 -0
  31. trilogy/core/env_processor.py +120 -0
  32. trilogy/core/environment_helpers.py +320 -0
  33. trilogy/core/ergonomics.py +193 -0
  34. trilogy/core/exceptions.py +123 -0
  35. trilogy/core/functions.py +1227 -0
  36. trilogy/core/graph_models.py +139 -0
  37. trilogy/core/internal.py +85 -0
  38. trilogy/core/models/__init__.py +0 -0
  39. trilogy/core/models/author.py +2672 -0
  40. trilogy/core/models/build.py +2521 -0
  41. trilogy/core/models/build_environment.py +180 -0
  42. trilogy/core/models/core.py +494 -0
  43. trilogy/core/models/datasource.py +322 -0
  44. trilogy/core/models/environment.py +748 -0
  45. trilogy/core/models/execute.py +1177 -0
  46. trilogy/core/optimization.py +251 -0
  47. trilogy/core/optimizations/__init__.py +12 -0
  48. trilogy/core/optimizations/base_optimization.py +17 -0
  49. trilogy/core/optimizations/hide_unused_concept.py +47 -0
  50. trilogy/core/optimizations/inline_datasource.py +102 -0
  51. trilogy/core/optimizations/predicate_pushdown.py +245 -0
  52. trilogy/core/processing/README.md +94 -0
  53. trilogy/core/processing/READMEv2.md +121 -0
  54. trilogy/core/processing/VIRTUAL_UNNEST.md +30 -0
  55. trilogy/core/processing/__init__.py +0 -0
  56. trilogy/core/processing/concept_strategies_v3.py +508 -0
  57. trilogy/core/processing/constants.py +15 -0
  58. trilogy/core/processing/discovery_node_factory.py +451 -0
  59. trilogy/core/processing/discovery_utility.py +517 -0
  60. trilogy/core/processing/discovery_validation.py +167 -0
  61. trilogy/core/processing/graph_utils.py +43 -0
  62. trilogy/core/processing/node_generators/README.md +9 -0
  63. trilogy/core/processing/node_generators/__init__.py +31 -0
  64. trilogy/core/processing/node_generators/basic_node.py +160 -0
  65. trilogy/core/processing/node_generators/common.py +268 -0
  66. trilogy/core/processing/node_generators/constant_node.py +38 -0
  67. trilogy/core/processing/node_generators/filter_node.py +315 -0
  68. trilogy/core/processing/node_generators/group_node.py +213 -0
  69. trilogy/core/processing/node_generators/group_to_node.py +117 -0
  70. trilogy/core/processing/node_generators/multiselect_node.py +205 -0
  71. trilogy/core/processing/node_generators/node_merge_node.py +653 -0
  72. trilogy/core/processing/node_generators/recursive_node.py +88 -0
  73. trilogy/core/processing/node_generators/rowset_node.py +165 -0
  74. trilogy/core/processing/node_generators/select_helpers/__init__.py +0 -0
  75. trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +261 -0
  76. trilogy/core/processing/node_generators/select_merge_node.py +748 -0
  77. trilogy/core/processing/node_generators/select_node.py +95 -0
  78. trilogy/core/processing/node_generators/synonym_node.py +98 -0
  79. trilogy/core/processing/node_generators/union_node.py +91 -0
  80. trilogy/core/processing/node_generators/unnest_node.py +182 -0
  81. trilogy/core/processing/node_generators/window_node.py +201 -0
  82. trilogy/core/processing/nodes/README.md +28 -0
  83. trilogy/core/processing/nodes/__init__.py +179 -0
  84. trilogy/core/processing/nodes/base_node.py +519 -0
  85. trilogy/core/processing/nodes/filter_node.py +75 -0
  86. trilogy/core/processing/nodes/group_node.py +194 -0
  87. trilogy/core/processing/nodes/merge_node.py +420 -0
  88. trilogy/core/processing/nodes/recursive_node.py +46 -0
  89. trilogy/core/processing/nodes/select_node_v2.py +242 -0
  90. trilogy/core/processing/nodes/union_node.py +53 -0
  91. trilogy/core/processing/nodes/unnest_node.py +62 -0
  92. trilogy/core/processing/nodes/window_node.py +56 -0
  93. trilogy/core/processing/utility.py +823 -0
  94. trilogy/core/query_processor.py +596 -0
  95. trilogy/core/statements/README.md +35 -0
  96. trilogy/core/statements/__init__.py +0 -0
  97. trilogy/core/statements/author.py +536 -0
  98. trilogy/core/statements/build.py +0 -0
  99. trilogy/core/statements/common.py +20 -0
  100. trilogy/core/statements/execute.py +155 -0
  101. trilogy/core/table_processor.py +66 -0
  102. trilogy/core/utility.py +8 -0
  103. trilogy/core/validation/README.md +46 -0
  104. trilogy/core/validation/__init__.py +0 -0
  105. trilogy/core/validation/common.py +161 -0
  106. trilogy/core/validation/concept.py +146 -0
  107. trilogy/core/validation/datasource.py +227 -0
  108. trilogy/core/validation/environment.py +73 -0
  109. trilogy/core/validation/fix.py +106 -0
  110. trilogy/dialect/__init__.py +32 -0
  111. trilogy/dialect/base.py +1359 -0
  112. trilogy/dialect/bigquery.py +256 -0
  113. trilogy/dialect/common.py +147 -0
  114. trilogy/dialect/config.py +144 -0
  115. trilogy/dialect/dataframe.py +50 -0
  116. trilogy/dialect/duckdb.py +177 -0
  117. trilogy/dialect/enums.py +147 -0
  118. trilogy/dialect/metadata.py +173 -0
  119. trilogy/dialect/mock.py +190 -0
  120. trilogy/dialect/postgres.py +91 -0
  121. trilogy/dialect/presto.py +104 -0
  122. trilogy/dialect/results.py +89 -0
  123. trilogy/dialect/snowflake.py +90 -0
  124. trilogy/dialect/sql_server.py +92 -0
  125. trilogy/engine.py +48 -0
  126. trilogy/execution/config.py +75 -0
  127. trilogy/executor.py +568 -0
  128. trilogy/hooks/__init__.py +4 -0
  129. trilogy/hooks/base_hook.py +40 -0
  130. trilogy/hooks/graph_hook.py +139 -0
  131. trilogy/hooks/query_debugger.py +166 -0
  132. trilogy/metadata/__init__.py +0 -0
  133. trilogy/parser.py +10 -0
  134. trilogy/parsing/README.md +21 -0
  135. trilogy/parsing/__init__.py +0 -0
  136. trilogy/parsing/common.py +1069 -0
  137. trilogy/parsing/config.py +5 -0
  138. trilogy/parsing/exceptions.py +8 -0
  139. trilogy/parsing/helpers.py +1 -0
  140. trilogy/parsing/parse_engine.py +2813 -0
  141. trilogy/parsing/render.py +750 -0
  142. trilogy/parsing/trilogy.lark +540 -0
  143. trilogy/py.typed +0 -0
  144. trilogy/render.py +42 -0
  145. trilogy/scripts/README.md +7 -0
  146. trilogy/scripts/__init__.py +0 -0
  147. trilogy/scripts/dependency/Cargo.lock +617 -0
  148. trilogy/scripts/dependency/Cargo.toml +39 -0
  149. trilogy/scripts/dependency/README.md +131 -0
  150. trilogy/scripts/dependency/build.sh +25 -0
  151. trilogy/scripts/dependency/src/directory_resolver.rs +162 -0
  152. trilogy/scripts/dependency/src/lib.rs +16 -0
  153. trilogy/scripts/dependency/src/main.rs +770 -0
  154. trilogy/scripts/dependency/src/parser.rs +435 -0
  155. trilogy/scripts/dependency/src/preql.pest +208 -0
  156. trilogy/scripts/dependency/src/python_bindings.rs +289 -0
  157. trilogy/scripts/dependency/src/resolver.rs +716 -0
  158. trilogy/scripts/dependency/tests/base.preql +3 -0
  159. trilogy/scripts/dependency/tests/cli_integration.rs +377 -0
  160. trilogy/scripts/dependency/tests/customer.preql +6 -0
  161. trilogy/scripts/dependency/tests/main.preql +9 -0
  162. trilogy/scripts/dependency/tests/orders.preql +7 -0
  163. trilogy/scripts/dependency/tests/test_data/base.preql +9 -0
  164. trilogy/scripts/dependency/tests/test_data/consumer.preql +1 -0
  165. trilogy/scripts/dependency.py +323 -0
  166. trilogy/scripts/display.py +460 -0
  167. trilogy/scripts/environment.py +46 -0
  168. trilogy/scripts/parallel_execution.py +483 -0
  169. trilogy/scripts/single_execution.py +131 -0
  170. trilogy/scripts/trilogy.py +772 -0
  171. trilogy/std/__init__.py +0 -0
  172. trilogy/std/color.preql +3 -0
  173. trilogy/std/date.preql +13 -0
  174. trilogy/std/display.preql +18 -0
  175. trilogy/std/geography.preql +22 -0
  176. trilogy/std/metric.preql +15 -0
  177. trilogy/std/money.preql +67 -0
  178. trilogy/std/net.preql +14 -0
  179. trilogy/std/ranking.preql +7 -0
  180. trilogy/std/report.preql +5 -0
  181. trilogy/std/semantic.preql +6 -0
  182. trilogy/utility.py +34 -0
@@ -0,0 +1,748 @@
1
+ from __future__ import annotations
2
+
3
+ import difflib
4
+ import os
5
+ from collections import defaultdict
6
+ from dataclasses import dataclass
7
+ from pathlib import Path
8
+ from typing import (
9
+ TYPE_CHECKING,
10
+ Annotated,
11
+ Any,
12
+ Dict,
13
+ ItemsView,
14
+ List,
15
+ Never,
16
+ Optional,
17
+ Self,
18
+ Tuple,
19
+ ValuesView,
20
+ )
21
+
22
+ from lark.tree import Meta
23
+ from pydantic import BaseModel, ConfigDict, Field
24
+ from pydantic.functional_validators import PlainValidator
25
+
26
+ from trilogy.constants import DEFAULT_NAMESPACE, ENV_CACHE_NAME, logger
27
+ from trilogy.core.constants import (
28
+ INTERNAL_NAMESPACE,
29
+ WORKING_PATH_CONCEPT,
30
+ )
31
+ from trilogy.core.enums import (
32
+ ConceptSource,
33
+ Derivation,
34
+ FunctionType,
35
+ Granularity,
36
+ Modifier,
37
+ Purpose,
38
+ )
39
+ from trilogy.core.exceptions import (
40
+ FrozenEnvironmentException,
41
+ UndefinedConceptException,
42
+ )
43
+ from trilogy.core.models.author import (
44
+ Concept,
45
+ ConceptRef,
46
+ CustomFunctionFactory,
47
+ CustomType,
48
+ Function,
49
+ SelectLineage,
50
+ UndefinedConcept,
51
+ UndefinedConceptFull,
52
+ address_with_namespace,
53
+ )
54
+ from trilogy.core.models.core import DataType
55
+ from trilogy.core.models.datasource import Datasource, EnvironmentDatasourceDict
56
+
57
+ if TYPE_CHECKING:
58
+ from trilogy.core.models.build import BuildConcept, BuildEnvironment
59
+
60
+
61
+ @dataclass
62
+ class Import:
63
+ alias: str
64
+ path: Path
65
+ input_path: Path | None = (
66
+ None # filepath where the text came from (path is the import path, but may be resolved from a dictionary for some resolvers)
67
+ )
68
+
69
+
70
+ class BaseImportResolver(BaseModel):
71
+ pass
72
+
73
+
74
+ class FileSystemImportResolver(BaseImportResolver):
75
+ pass
76
+
77
+
78
+ class DictImportResolver(BaseImportResolver):
79
+ content: Dict[str, str]
80
+
81
+
82
+ class EnvironmentOptions(BaseModel):
83
+ allow_duplicate_declaration: bool = True
84
+ import_resolver: BaseImportResolver = Field(
85
+ default_factory=FileSystemImportResolver
86
+ )
87
+
88
+ def copy_for_root(self, root: str | None) -> Self:
89
+ new = self.model_copy(deep=True)
90
+ if isinstance(new.import_resolver, DictImportResolver) and root:
91
+ new.import_resolver = DictImportResolver(
92
+ content={
93
+ k[len(root) + 1 :]: v
94
+ for k, v in new.import_resolver.content.items()
95
+ if k.startswith(f"{root}.")
96
+ }
97
+ )
98
+ return new
99
+
100
+
101
+ class EnvironmentConceptDict(dict):
102
+ def __init__(self, *args, **kwargs) -> None:
103
+ super().__init__(self, *args, **kwargs)
104
+ self.undefined: dict[str, UndefinedConceptFull] = {}
105
+ self.fail_on_missing: bool = True
106
+ self.populate_default_concepts()
107
+
108
+ def duplicate(self) -> "EnvironmentConceptDict":
109
+ new = EnvironmentConceptDict()
110
+ new.update({k: v.duplicate() for k, v in self.items()})
111
+ new.undefined = self.undefined
112
+ new.fail_on_missing = self.fail_on_missing
113
+ return new
114
+
115
+ def populate_default_concepts(self):
116
+ from trilogy.core.internal import DEFAULT_CONCEPTS
117
+
118
+ for concept in DEFAULT_CONCEPTS.values():
119
+ self[concept.address] = concept
120
+
121
+ def values(self) -> ValuesView[Concept]: # type: ignore
122
+ return super().values()
123
+
124
+ def get(self, key: str, default: Concept | None = None) -> Concept | None: # type: ignore
125
+ try:
126
+ return self.__getitem__(key)
127
+ except UndefinedConceptException:
128
+ return default
129
+
130
+ def raise_undefined(
131
+ self, key: str, line_no: int | None = None, file: Path | str | None = None
132
+ ) -> Never:
133
+
134
+ matches = self._find_similar_concepts(key)
135
+ message = f"Undefined concept: {key}."
136
+ if matches:
137
+ message += f" Suggestions: {matches}"
138
+
139
+ if line_no:
140
+ if file:
141
+ raise UndefinedConceptException(
142
+ f"{file}: {line_no}: " + message, matches
143
+ )
144
+ raise UndefinedConceptException(f"line: {line_no}: " + message, matches)
145
+ raise UndefinedConceptException(message, matches)
146
+
147
+ def __getitem__(
148
+ self, key: str, line_no: int | None = None, file: Path | None = None
149
+ ) -> Concept | UndefinedConceptFull:
150
+ # fast access path
151
+ if key in self.keys():
152
+ return super(EnvironmentConceptDict, self).__getitem__(key)
153
+ if isinstance(key, ConceptRef):
154
+ return self.__getitem__(key.address, line_no=line_no, file=file)
155
+ try:
156
+ return super(EnvironmentConceptDict, self).__getitem__(key)
157
+ except KeyError:
158
+ if "." in key and key.split(".", 1)[0] == DEFAULT_NAMESPACE:
159
+ return self.__getitem__(key.split(".", 1)[1], line_no)
160
+ if DEFAULT_NAMESPACE + "." + key in self:
161
+ return self.__getitem__(DEFAULT_NAMESPACE + "." + key, line_no)
162
+ if not self.fail_on_missing:
163
+ if "." in key:
164
+ ns, rest = key.rsplit(".", 1)
165
+ else:
166
+ ns = DEFAULT_NAMESPACE
167
+ rest = key
168
+ if key in self.undefined:
169
+ return self.undefined[key]
170
+ undefined = UndefinedConceptFull(
171
+ line_no=line_no,
172
+ datatype=DataType.UNKNOWN,
173
+ name=rest,
174
+ purpose=Purpose.UNKNOWN,
175
+ namespace=ns,
176
+ )
177
+ self.undefined[key] = undefined
178
+ return undefined
179
+ self.raise_undefined(key, line_no, file)
180
+
181
+ def _find_similar_concepts(self, concept_name: str):
182
+ def strip_local(input: str):
183
+ if input.startswith(f"{DEFAULT_NAMESPACE}."):
184
+ return input[len(DEFAULT_NAMESPACE) + 1 :]
185
+ return input
186
+
187
+ matches = difflib.get_close_matches(
188
+ strip_local(concept_name), [strip_local(x) for x in self.keys()]
189
+ )
190
+ return matches
191
+
192
+ def items(self) -> ItemsView[str, Concept]: # type: ignore
193
+ return super().items()
194
+
195
+
196
+ def validate_concepts(v) -> EnvironmentConceptDict:
197
+ if isinstance(v, EnvironmentConceptDict):
198
+ return v
199
+ elif isinstance(v, dict):
200
+ return EnvironmentConceptDict(
201
+ **{x: Concept.model_validate(y) for x, y in v.items()}
202
+ )
203
+ raise ValueError
204
+
205
+
206
+ def validate_datasources(v) -> EnvironmentDatasourceDict:
207
+ if isinstance(v, EnvironmentDatasourceDict):
208
+ return v
209
+ elif isinstance(v, dict):
210
+ return EnvironmentDatasourceDict(
211
+ **{x: Datasource.model_validate(y) for x, y in v.items()}
212
+ )
213
+ raise ValueError
214
+
215
+
216
+ def get_version():
217
+ from trilogy import __version__
218
+
219
+ return __version__
220
+
221
+
222
+ class Environment(BaseModel):
223
+ model_config = ConfigDict(arbitrary_types_allowed=True, strict=False)
224
+
225
+ concepts: Annotated[EnvironmentConceptDict, PlainValidator(validate_concepts)] = (
226
+ Field(default_factory=EnvironmentConceptDict)
227
+ )
228
+ datasources: Annotated[
229
+ EnvironmentDatasourceDict, PlainValidator(validate_datasources)
230
+ ] = Field(default_factory=EnvironmentDatasourceDict)
231
+ functions: Dict[str, CustomFunctionFactory] = Field(default_factory=dict)
232
+ data_types: Dict[str, CustomType] = Field(default_factory=dict)
233
+ named_statements: Dict[str, SelectLineage] = Field(default_factory=dict)
234
+ imports: defaultdict[str, list[Import]] = Field(
235
+ default_factory=lambda: defaultdict(list) # type: ignore
236
+ )
237
+ namespace: str = DEFAULT_NAMESPACE
238
+ working_path: str | Path = Field(default_factory=lambda: os.getcwd())
239
+ config: EnvironmentOptions = Field(default_factory=EnvironmentOptions)
240
+ version: str = Field(default_factory=get_version)
241
+ cte_name_map: Dict[str, str] = Field(default_factory=dict)
242
+ alias_origin_lookup: Dict[str, Concept] = Field(default_factory=dict)
243
+ # TODO: support freezing environments to avoid mutation
244
+ frozen: bool = False
245
+ env_file_path: Path | str | None = None
246
+ parameters: Dict[str, Any] = Field(default_factory=dict)
247
+
248
+ def freeze(self):
249
+ self.frozen = True
250
+
251
+ def thaw(self):
252
+ self.frozen = False
253
+
254
+ def set_parameters(self, **kwargs) -> Self:
255
+
256
+ self.parameters.update(kwargs)
257
+ return self
258
+
259
+ def materialize_for_select(
260
+ self, local_concepts: dict[str, "BuildConcept"] | None = None
261
+ ) -> "BuildEnvironment":
262
+ """helper method"""
263
+ from trilogy.core.models.build import Factory
264
+
265
+ return Factory(self, local_concepts=local_concepts).build(self)
266
+
267
+ def add_rowset(self, name: str, lineage: SelectLineage):
268
+ self.named_statements[name] = lineage
269
+
270
+ def duplicate(self):
271
+ return Environment.model_construct(
272
+ datasources=self.datasources.duplicate(),
273
+ concepts=self.concepts.duplicate(),
274
+ functions=dict(self.functions),
275
+ data_types=dict(self.data_types),
276
+ imports=defaultdict(list, self.imports),
277
+ namespace=self.namespace,
278
+ working_path=self.working_path,
279
+ environment_config=self.config.model_copy(deep=True),
280
+ version=self.version,
281
+ cte_name_map=dict(self.cte_name_map),
282
+ alias_origin_lookup={
283
+ k: v.duplicate() for k, v in self.alias_origin_lookup.items()
284
+ },
285
+ env_file_path=self.env_file_path,
286
+ )
287
+
288
+ def _add_path_concepts(self):
289
+ concept = Concept(
290
+ name=WORKING_PATH_CONCEPT,
291
+ namespace=self.namespace,
292
+ lineage=Function(
293
+ operator=FunctionType.CONSTANT,
294
+ arguments=[str(self.working_path)],
295
+ output_datatype=DataType.STRING,
296
+ output_purpose=Purpose.CONSTANT,
297
+ ),
298
+ datatype=DataType.STRING,
299
+ granularity=Granularity.SINGLE_ROW,
300
+ derivation=Derivation.CONSTANT,
301
+ purpose=Purpose.CONSTANT,
302
+ )
303
+ self.add_concept(concept)
304
+
305
+ def model_post_init(self, context: Any) -> None:
306
+ self._add_path_concepts()
307
+
308
+ @classmethod
309
+ def from_file(cls, path: str | Path) -> "Environment":
310
+ if isinstance(path, str):
311
+ path = Path(path)
312
+ with open(path, "r") as f:
313
+ read = f.read()
314
+ return Environment(working_path=path.parent, env_file_path=path).parse(read)[0]
315
+
316
+ @classmethod
317
+ def from_string(cls, input: str) -> "Environment":
318
+ return Environment().parse(input)[0]
319
+
320
+ @classmethod
321
+ def from_cache(cls, path) -> Optional["Environment"]:
322
+ with open(path, "r") as f:
323
+ read = f.read()
324
+ base = cls.model_validate_json(read)
325
+ version = get_version()
326
+ if base.version != version:
327
+ return None
328
+ return base
329
+
330
+ def to_cache(self, path: Optional[str | Path] = None) -> Path:
331
+ if not path:
332
+ ppath = Path(self.working_path) / ENV_CACHE_NAME
333
+ else:
334
+ ppath = Path(path)
335
+ with open(ppath, "w") as f:
336
+ f.write(self.model_dump_json())
337
+ return ppath
338
+
339
+ def validate_concept(
340
+ self, new_concept: Concept, meta: Meta | None = None
341
+ ) -> Concept | None:
342
+ lookup = new_concept.address
343
+ if lookup not in self.concepts:
344
+ return None
345
+ existing: Concept = self.concepts.get(lookup) # type: ignore
346
+ if isinstance(existing, UndefinedConcept):
347
+ return None
348
+
349
+ def handle_currently_bound_sources():
350
+ if str(existing.lineage) == str(new_concept.lineage):
351
+ return None
352
+
353
+ invalidated = False
354
+ for k, datasource in self.datasources.items():
355
+ if existing.address in datasource.output_concepts:
356
+ logger.warning(
357
+ f"Removed concept for {existing} assignment from {k}"
358
+ )
359
+ clen = len(datasource.columns)
360
+ datasource.columns = [
361
+ x
362
+ for x in datasource.columns
363
+ if x.concept.address != existing.address
364
+ ]
365
+ assert len(datasource.columns) < clen
366
+ invalidated = len(datasource.columns) < clen
367
+ if invalidated:
368
+ logger.warning(
369
+ f"Persisted concept {existing.address} lineage {str(existing.lineage)} did not match redeclaration {str(new_concept.lineage)}, invalidated current bound datasource."
370
+ )
371
+ return None
372
+
373
+ if existing and self.config.allow_duplicate_declaration:
374
+ if (
375
+ existing.metadata
376
+ and existing.metadata.concept_source == ConceptSource.AUTO_DERIVED
377
+ ):
378
+ # auto derived concepts will not have sources nad do not need to be checked
379
+ return None
380
+ return handle_currently_bound_sources()
381
+ elif (
382
+ existing.metadata
383
+ and existing.metadata.concept_source == ConceptSource.AUTO_DERIVED
384
+ ):
385
+ return None
386
+ elif meta and existing.metadata:
387
+ raise ValueError(
388
+ f"Assignment to concept '{lookup}' on line {meta.line} is a duplicate"
389
+ f" declaration; '{lookup}' was originally defined on line"
390
+ f" {existing.metadata.line_number}"
391
+ )
392
+ elif existing.metadata:
393
+ raise ValueError(
394
+ f"Assignment to concept '{lookup}' is a duplicate declaration;"
395
+ f" '{lookup}' was originally defined on line"
396
+ f" {existing.metadata.line_number}"
397
+ )
398
+ raise ValueError(
399
+ f"Assignment to concept '{lookup}' is a duplicate declaration;"
400
+ )
401
+
402
+ def add_import(
403
+ self, alias: str, source: Environment, imp_stm: Import | None = None
404
+ ):
405
+ if self.frozen:
406
+ raise ValueError("Environment is frozen, cannot add imports")
407
+ exists = False
408
+ existing = self.imports[alias]
409
+ if imp_stm:
410
+ if any(
411
+ [x.path == imp_stm.path and x.alias == imp_stm.alias for x in existing]
412
+ ):
413
+ exists = True
414
+ else:
415
+ if any(
416
+ [x.path == source.working_path and x.alias == alias for x in existing]
417
+ ):
418
+ exists = True
419
+ imp_stm = Import(alias=alias, path=Path(source.working_path))
420
+ same_namespace = alias == DEFAULT_NAMESPACE
421
+
422
+ if not exists:
423
+ self.imports[alias].append(imp_stm)
424
+ # we can't exit early
425
+ # as there may be new concepts
426
+ iteration: list[tuple[str, Concept]] = list(source.concepts.items())
427
+ for k, concept in iteration:
428
+ # skip internal namespace
429
+ if INTERNAL_NAMESPACE in concept.address:
430
+ continue
431
+ # don't overwrite working path
432
+ if concept.name == WORKING_PATH_CONCEPT:
433
+ continue
434
+ if same_namespace:
435
+ new = self.add_concept(concept, add_derived=False)
436
+ else:
437
+ new = self.add_concept(concept.with_namespace(alias), add_derived=False)
438
+
439
+ k = address_with_namespace(k, alias)
440
+ # set this explicitly, to handle aliasing
441
+ self.concepts[k] = new
442
+
443
+ for _, datasource in source.datasources.items():
444
+ if same_namespace:
445
+ self.add_datasource(datasource)
446
+ else:
447
+ self.add_datasource(datasource.with_namespace(alias))
448
+ for key, val in source.alias_origin_lookup.items():
449
+
450
+ if same_namespace:
451
+ self.alias_origin_lookup[key] = val
452
+ else:
453
+ self.alias_origin_lookup[address_with_namespace(key, alias)] = (
454
+ val.with_namespace(alias)
455
+ )
456
+
457
+ for key, function in source.functions.items():
458
+ if same_namespace:
459
+ self.functions[key] = function
460
+ else:
461
+ self.functions[address_with_namespace(key, alias)] = (
462
+ function.with_namespace(alias)
463
+ )
464
+ for key, type in source.data_types.items():
465
+ if same_namespace:
466
+ self.data_types[key] = type
467
+ else:
468
+ self.data_types[address_with_namespace(key, alias)] = (
469
+ type.with_namespace(alias)
470
+ )
471
+ return self
472
+
473
+ def add_file_import(
474
+ self, path: str | Path, alias: str, env: "Environment" | None = None
475
+ ):
476
+ if self.frozen:
477
+ raise ValueError("Environment is frozen, cannot add imports")
478
+ from trilogy.parsing.parse_engine import (
479
+ PARSER,
480
+ ParseToObjects,
481
+ )
482
+
483
+ if isinstance(path, str):
484
+ if path.endswith(".preql"):
485
+ path = path.rsplit(".", 1)[0]
486
+ if "." not in path:
487
+ target = Path(self.working_path, path)
488
+ else:
489
+ target = Path(self.working_path, *path.split("."))
490
+ target = target.with_suffix(".preql")
491
+ else:
492
+ target = path
493
+ if not env:
494
+ import_keys = ["root", alias]
495
+ parse_address = "-".join(import_keys)
496
+ try:
497
+ with open(target, "r", encoding="utf-8") as f:
498
+ text = f.read()
499
+ nenv = Environment(
500
+ working_path=target.parent,
501
+ )
502
+ nenv.concepts.fail_on_missing = False
503
+ nparser = ParseToObjects(
504
+ environment=Environment(
505
+ working_path=target.parent,
506
+ ),
507
+ parse_address=parse_address,
508
+ token_address=target,
509
+ import_keys=import_keys,
510
+ )
511
+ nparser.set_text(text)
512
+ nparser.environment.concepts.fail_on_missing = False
513
+ nparser.transform(PARSER.parse(text))
514
+ nparser.run_second_parse_pass()
515
+ nparser.environment.concepts.fail_on_missing = True
516
+
517
+ except Exception as e:
518
+ raise ImportError(
519
+ f"Unable to import file {target.parent}, parsing error: {e}"
520
+ )
521
+ env = nparser.environment
522
+ imps = Import(alias=alias, path=target)
523
+ self.add_import(alias, source=env, imp_stm=imps)
524
+ return imps
525
+
526
+ def parse(
527
+ self, input: str, namespace: str | None = None, persist: bool = False
528
+ ) -> Tuple["Environment", list]:
529
+ from trilogy import parse
530
+ from trilogy.core.query_processor import process_persist
531
+ from trilogy.core.statements.author import (
532
+ MultiSelectStatement,
533
+ PersistStatement,
534
+ SelectStatement,
535
+ ShowStatement,
536
+ )
537
+
538
+ if namespace:
539
+ new = Environment()
540
+ _, queries = new.parse(input)
541
+ self.add_import(namespace, new)
542
+ return self, queries
543
+ _, queries = parse(input, self)
544
+ generatable = [
545
+ x
546
+ for x in queries
547
+ if isinstance(
548
+ x,
549
+ (
550
+ SelectStatement,
551
+ PersistStatement,
552
+ MultiSelectStatement,
553
+ ShowStatement,
554
+ ),
555
+ )
556
+ ]
557
+ while generatable:
558
+ t = generatable.pop(0)
559
+ if isinstance(t, PersistStatement) and persist:
560
+ processed = process_persist(self, t)
561
+ self.add_datasource(processed.datasource)
562
+ return self, queries
563
+
564
+ def add_concept(
565
+ self,
566
+ concept: Concept,
567
+ meta: Meta | None = None,
568
+ force: bool = False,
569
+ add_derived: bool = True,
570
+ ):
571
+
572
+ if self.frozen:
573
+ raise FrozenEnvironmentException(
574
+ "Environment is frozen, cannot add concepts"
575
+ )
576
+ if not force:
577
+ existing = self.validate_concept(concept, meta=meta)
578
+ if existing:
579
+ concept = existing
580
+
581
+ self.concepts[concept.address] = concept
582
+
583
+ from trilogy.core.environment_helpers import generate_related_concepts
584
+
585
+ generate_related_concepts(concept, self, meta=meta, add_derived=add_derived)
586
+
587
+ return concept
588
+
589
+ def remove_concept(
590
+ self,
591
+ concept: Concept | str,
592
+ ) -> bool:
593
+ if self.frozen:
594
+ raise FrozenEnvironmentException(
595
+ "Environment is frozen, cannot remove concepts"
596
+ )
597
+ if isinstance(concept, Concept):
598
+ address = concept.address
599
+ c_instance = concept
600
+ else:
601
+ address = concept
602
+ c_instance_check = self.concepts.get(address)
603
+ if not c_instance_check:
604
+ return False
605
+ c_instance = c_instance_check
606
+ from trilogy.core.environment_helpers import remove_related_concepts
607
+
608
+ remove_related_concepts(c_instance, self)
609
+ if address in self.concepts:
610
+ del self.concepts[address]
611
+ return True
612
+ if address in self.alias_origin_lookup:
613
+ del self.alias_origin_lookup[address]
614
+
615
+ return False
616
+
617
+ def add_datasource(
618
+ self,
619
+ datasource: Datasource,
620
+ meta: Meta | None = None,
621
+ ):
622
+ if self.frozen:
623
+ raise FrozenEnvironmentException(
624
+ "Environment is frozen, cannot add datasource"
625
+ )
626
+ self.datasources[datasource.identifier] = datasource
627
+ return datasource
628
+
629
+ def delete_datasource(
630
+ self,
631
+ address: str,
632
+ meta: Meta | None = None,
633
+ ) -> bool:
634
+ if self.frozen:
635
+ raise ValueError("Environment is frozen, cannot delete datsources")
636
+ if address in self.datasources:
637
+ del self.datasources[address]
638
+ # self.gen_concept_list_caches()
639
+ return True
640
+ return False
641
+
642
+ def merge_concept(
643
+ self,
644
+ source: Concept,
645
+ target: Concept,
646
+ modifiers: List[Modifier],
647
+ force: bool = False,
648
+ ) -> bool:
649
+ from trilogy.core.models.build import BuildConcept
650
+
651
+ if isinstance(source, BuildConcept):
652
+ raise SyntaxError(source)
653
+ elif isinstance(target, BuildConcept):
654
+ raise SyntaxError(target)
655
+ if self.frozen:
656
+ raise ValueError("Environment is frozen, cannot merge concepts")
657
+ replacements = {}
658
+
659
+ # exit early if we've run this
660
+ if source.address in self.alias_origin_lookup and not force:
661
+ if self.concepts[source.address] == target:
662
+ return False
663
+
664
+ self.alias_origin_lookup[source.address] = source
665
+ self.alias_origin_lookup[source.address].pseudonyms.add(target.address)
666
+ for k, v in self.concepts.items():
667
+
668
+ if v.address == target.address:
669
+ if source.address != target.address:
670
+ v.pseudonyms.add(source.address)
671
+
672
+ if v.address == source.address:
673
+ replacements[k] = target
674
+ # we need to update keys and grains of all concepts
675
+ else:
676
+ if source.address in v.sources or source.address in v.grain.components:
677
+ replacements[k] = v.with_merge(source, target, modifiers)
678
+ self.concepts.update(replacements)
679
+ for k, ds in self.datasources.items():
680
+ if source.address in ds.output_lcl:
681
+ ds.merge_concept(source, target, modifiers=modifiers)
682
+
683
+ return True
684
+
685
+
686
+ class LazyEnvironment(Environment):
687
+ """Variant of environment to defer parsing of a path
688
+ until relevant attributes accessed."""
689
+
690
+ load_path: Path
691
+ setup_queries: list[Any] = Field(default_factory=list)
692
+ loaded: bool = False
693
+
694
+ @property
695
+ def setup_path(self) -> Path:
696
+ return self.load_path.parent / "setup.preql"
697
+
698
+ def __init__(self, **data):
699
+ if not data.get("working_path"):
700
+ data["working_path"] = data["load_path"].parent
701
+ super().__init__(**data)
702
+ assert self.working_path == self.load_path.parent
703
+
704
+ def _add_path_concepts(self):
705
+ pass
706
+
707
+ def _load(self):
708
+ if self.loaded:
709
+ return
710
+ from trilogy import parse
711
+
712
+ env = Environment(working_path=self.load_path.parent)
713
+ assert env.working_path == self.load_path.parent
714
+ with open(self.load_path, "r") as f:
715
+ env, _ = parse(f.read(), env)
716
+ if self.setup_path.exists():
717
+ with open(self.setup_path, "r") as f2:
718
+ env, q = parse(f2.read(), env)
719
+ for q in q:
720
+ self.setup_queries.append(q)
721
+ self.loaded = True
722
+ self.datasources = env.datasources
723
+ self.concepts = env.concepts
724
+ self.imports = env.imports
725
+ self.alias_origin_lookup = env.alias_origin_lookup
726
+ self.functions = env.functions
727
+ self.data_types = env.data_types
728
+ self.cte_name_map = env.cte_name_map
729
+
730
+ def __getattr__(self, name):
731
+ return self.__getattribute__(name)
732
+
733
+ def __getattribute__(self, name):
734
+ if name not in (
735
+ "datasources",
736
+ "concepts",
737
+ "imports",
738
+ "functions",
739
+ "datatypes",
740
+ "cte_name_map",
741
+ ) or name.startswith("_"):
742
+ return super().__getattribute__(name)
743
+ if not self.loaded:
744
+ self._load()
745
+ return super().__getattribute__(name)
746
+
747
+
748
+ Environment.model_rebuild()