pytrilogy 0.0.2.58__py3-none-any.whl → 0.0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pytrilogy might be problematic. Click here for more details.

Files changed (76) hide show
  1. {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.1.dist-info}/METADATA +9 -2
  2. pytrilogy-0.0.3.1.dist-info/RECORD +99 -0
  3. {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.1.dist-info}/WHEEL +1 -1
  4. trilogy/__init__.py +2 -2
  5. trilogy/core/enums.py +1 -7
  6. trilogy/core/env_processor.py +17 -5
  7. trilogy/core/environment_helpers.py +11 -25
  8. trilogy/core/exceptions.py +4 -0
  9. trilogy/core/functions.py +695 -261
  10. trilogy/core/graph_models.py +10 -10
  11. trilogy/core/internal.py +11 -2
  12. trilogy/core/models/__init__.py +0 -0
  13. trilogy/core/models/author.py +2110 -0
  14. trilogy/core/models/build.py +1859 -0
  15. trilogy/core/models/build_environment.py +151 -0
  16. trilogy/core/models/core.py +370 -0
  17. trilogy/core/models/datasource.py +297 -0
  18. trilogy/core/models/environment.py +701 -0
  19. trilogy/core/models/execute.py +931 -0
  20. trilogy/core/optimization.py +14 -16
  21. trilogy/core/optimizations/base_optimization.py +1 -1
  22. trilogy/core/optimizations/inline_constant.py +6 -6
  23. trilogy/core/optimizations/inline_datasource.py +17 -11
  24. trilogy/core/optimizations/predicate_pushdown.py +17 -16
  25. trilogy/core/processing/concept_strategies_v3.py +178 -145
  26. trilogy/core/processing/graph_utils.py +1 -1
  27. trilogy/core/processing/node_generators/basic_node.py +19 -18
  28. trilogy/core/processing/node_generators/common.py +50 -44
  29. trilogy/core/processing/node_generators/filter_node.py +26 -13
  30. trilogy/core/processing/node_generators/group_node.py +26 -21
  31. trilogy/core/processing/node_generators/group_to_node.py +11 -8
  32. trilogy/core/processing/node_generators/multiselect_node.py +60 -43
  33. trilogy/core/processing/node_generators/node_merge_node.py +76 -38
  34. trilogy/core/processing/node_generators/rowset_node.py +55 -36
  35. trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +27 -34
  36. trilogy/core/processing/node_generators/select_merge_node.py +161 -64
  37. trilogy/core/processing/node_generators/select_node.py +13 -13
  38. trilogy/core/processing/node_generators/union_node.py +12 -11
  39. trilogy/core/processing/node_generators/unnest_node.py +9 -7
  40. trilogy/core/processing/node_generators/window_node.py +18 -16
  41. trilogy/core/processing/nodes/__init__.py +21 -18
  42. trilogy/core/processing/nodes/base_node.py +82 -66
  43. trilogy/core/processing/nodes/filter_node.py +19 -13
  44. trilogy/core/processing/nodes/group_node.py +50 -35
  45. trilogy/core/processing/nodes/merge_node.py +45 -36
  46. trilogy/core/processing/nodes/select_node_v2.py +53 -39
  47. trilogy/core/processing/nodes/union_node.py +5 -7
  48. trilogy/core/processing/nodes/unnest_node.py +7 -11
  49. trilogy/core/processing/nodes/window_node.py +9 -4
  50. trilogy/core/processing/utility.py +103 -75
  51. trilogy/core/query_processor.py +70 -47
  52. trilogy/core/statements/__init__.py +0 -0
  53. trilogy/core/statements/author.py +413 -0
  54. trilogy/core/statements/build.py +0 -0
  55. trilogy/core/statements/common.py +30 -0
  56. trilogy/core/statements/execute.py +42 -0
  57. trilogy/dialect/base.py +148 -106
  58. trilogy/dialect/common.py +9 -10
  59. trilogy/dialect/duckdb.py +1 -1
  60. trilogy/dialect/enums.py +4 -2
  61. trilogy/dialect/presto.py +1 -1
  62. trilogy/dialect/sql_server.py +1 -1
  63. trilogy/executor.py +44 -32
  64. trilogy/hooks/__init__.py +4 -0
  65. trilogy/hooks/base_hook.py +6 -4
  66. trilogy/hooks/query_debugger.py +113 -97
  67. trilogy/parser.py +1 -1
  68. trilogy/parsing/common.py +307 -64
  69. trilogy/parsing/parse_engine.py +277 -618
  70. trilogy/parsing/render.py +50 -26
  71. trilogy/scripts/trilogy.py +2 -1
  72. pytrilogy-0.0.2.58.dist-info/RECORD +0 -87
  73. trilogy/core/models.py +0 -4960
  74. {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.1.dist-info}/LICENSE.md +0 -0
  75. {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.1.dist-info}/entry_points.txt +0 -0
  76. {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,701 @@
1
+ from __future__ import annotations
2
+
3
+ import difflib
4
+ import os
5
+ from collections import defaultdict
6
+ from dataclasses import dataclass
7
+ from pathlib import Path
8
+ from typing import (
9
+ TYPE_CHECKING,
10
+ Annotated,
11
+ Dict,
12
+ ItemsView,
13
+ List,
14
+ Never,
15
+ Optional,
16
+ Tuple,
17
+ ValuesView,
18
+ )
19
+
20
+ from lark.tree import Meta
21
+ from pydantic import BaseModel, ConfigDict, Field
22
+ from pydantic.functional_validators import PlainValidator
23
+
24
+ from trilogy.constants import DEFAULT_NAMESPACE, ENV_CACHE_NAME, logger
25
+ from trilogy.core.constants import INTERNAL_NAMESPACE, PERSISTED_CONCEPT_PREFIX
26
+ from trilogy.core.enums import (
27
+ ConceptSource,
28
+ Derivation,
29
+ FunctionType,
30
+ Granularity,
31
+ Modifier,
32
+ Purpose,
33
+ )
34
+ from trilogy.core.exceptions import (
35
+ FrozenEnvironmentException,
36
+ UndefinedConceptException,
37
+ )
38
+ from trilogy.core.models.author import (
39
+ Concept,
40
+ ConceptRef,
41
+ Function,
42
+ SelectLineage,
43
+ UndefinedConcept,
44
+ UndefinedConceptFull,
45
+ address_with_namespace,
46
+ )
47
+ from trilogy.core.models.core import DataType
48
+ from trilogy.core.models.datasource import Datasource, EnvironmentDatasourceDict
49
+
50
+ if TYPE_CHECKING:
51
+ from trilogy.core.models.build import BuildConcept, BuildEnvironment
52
+
53
+
54
+ @dataclass
55
+ class Import:
56
+ alias: str
57
+ path: Path
58
+
59
+
60
+ class EnvironmentOptions(BaseModel):
61
+ allow_duplicate_declaration: bool = True
62
+
63
+
64
+ class EnvironmentConceptDict(dict):
65
+ def __init__(self, *args, **kwargs) -> None:
66
+ super().__init__(self, *args, **kwargs)
67
+ self.undefined: dict[str, UndefinedConceptFull] = {}
68
+ self.fail_on_missing: bool = True
69
+ self.populate_default_concepts()
70
+
71
+ def duplicate(self) -> "EnvironmentConceptDict":
72
+ new = EnvironmentConceptDict()
73
+ new.update({k: v.duplicate() for k, v in self.items()})
74
+ new.undefined = self.undefined
75
+ new.fail_on_missing = self.fail_on_missing
76
+ return new
77
+
78
+ def populate_default_concepts(self):
79
+ from trilogy.core.internal import DEFAULT_CONCEPTS
80
+
81
+ for concept in DEFAULT_CONCEPTS.values():
82
+ self[concept.address] = concept
83
+
84
+ def values(self) -> ValuesView[Concept]: # type: ignore
85
+ return super().values()
86
+
87
+ def get(self, key: str, default: Concept | None = None) -> Concept | None: # type: ignore
88
+ try:
89
+ return self.__getitem__(key)
90
+ except UndefinedConceptException:
91
+ return default
92
+
93
+ def raise_undefined(
94
+ self, key: str, line_no: int | None = None, file: Path | str | None = None
95
+ ) -> Never:
96
+
97
+ matches = self._find_similar_concepts(key)
98
+ message = f"Undefined concept: {key}."
99
+ if matches:
100
+ message += f" Suggestions: {matches}"
101
+
102
+ if line_no:
103
+ if file:
104
+ raise UndefinedConceptException(
105
+ f"{file}: {line_no}: " + message, matches
106
+ )
107
+ raise UndefinedConceptException(f"line: {line_no}: " + message, matches)
108
+ raise UndefinedConceptException(message, matches)
109
+
110
+ def __getitem__(
111
+ self, key: str, line_no: int | None = None, file: Path | None = None
112
+ ) -> Concept | UndefinedConceptFull:
113
+ if isinstance(key, ConceptRef):
114
+ return self.__getitem__(key.address, line_no=line_no, file=file)
115
+ try:
116
+ return super(EnvironmentConceptDict, self).__getitem__(key)
117
+ except KeyError:
118
+ if "." in key and key.split(".", 1)[0] == DEFAULT_NAMESPACE:
119
+ return self.__getitem__(key.split(".", 1)[1], line_no)
120
+ if DEFAULT_NAMESPACE + "." + key in self:
121
+ return self.__getitem__(DEFAULT_NAMESPACE + "." + key, line_no)
122
+ if not self.fail_on_missing:
123
+ if "." in key:
124
+ ns, rest = key.rsplit(".", 1)
125
+ else:
126
+ ns = DEFAULT_NAMESPACE
127
+ rest = key
128
+ if key in self.undefined:
129
+ return self.undefined[key]
130
+ undefined = UndefinedConceptFull(
131
+ line_no=line_no,
132
+ datatype=DataType.UNKNOWN,
133
+ name=rest,
134
+ purpose=Purpose.UNKNOWN,
135
+ namespace=ns,
136
+ )
137
+ self.undefined[key] = undefined
138
+ return undefined
139
+ self.raise_undefined(key, line_no, file)
140
+
141
+ def _find_similar_concepts(self, concept_name: str):
142
+ def strip_local(input: str):
143
+ if input.startswith(f"{DEFAULT_NAMESPACE}."):
144
+ return input[len(DEFAULT_NAMESPACE) + 1 :]
145
+ return input
146
+
147
+ matches = difflib.get_close_matches(
148
+ strip_local(concept_name), [strip_local(x) for x in self.keys()]
149
+ )
150
+ return matches
151
+
152
+ def items(self) -> ItemsView[str, Concept]: # type: ignore
153
+ return super().items()
154
+
155
+
156
+ def validate_concepts(v) -> EnvironmentConceptDict:
157
+ if isinstance(v, EnvironmentConceptDict):
158
+ return v
159
+ elif isinstance(v, dict):
160
+ return EnvironmentConceptDict(
161
+ **{x: Concept.model_validate(y) for x, y in v.items()}
162
+ )
163
+ raise ValueError
164
+
165
+
166
+ def validate_datasources(v) -> EnvironmentDatasourceDict:
167
+ if isinstance(v, EnvironmentDatasourceDict):
168
+ return v
169
+ elif isinstance(v, dict):
170
+ return EnvironmentDatasourceDict(
171
+ **{x: Datasource.model_validate(y) for x, y in v.items()}
172
+ )
173
+ raise ValueError
174
+
175
+
176
+ def get_version():
177
+ from trilogy import __version__
178
+
179
+ return __version__
180
+
181
+
182
+ class Environment(BaseModel):
183
+ model_config = ConfigDict(arbitrary_types_allowed=True, strict=False)
184
+
185
+ concepts: Annotated[EnvironmentConceptDict, PlainValidator(validate_concepts)] = (
186
+ Field(default_factory=EnvironmentConceptDict)
187
+ )
188
+ datasources: Annotated[
189
+ EnvironmentDatasourceDict, PlainValidator(validate_datasources)
190
+ ] = Field(default_factory=EnvironmentDatasourceDict)
191
+ functions: Dict[str, Function] = Field(default_factory=dict)
192
+ data_types: Dict[str, DataType] = Field(default_factory=dict)
193
+ named_statements: Dict[str, SelectLineage] = Field(default_factory=dict)
194
+ imports: Dict[str, list[Import]] = Field(
195
+ default_factory=lambda: defaultdict(list) # type: ignore
196
+ )
197
+ namespace: str = DEFAULT_NAMESPACE
198
+ working_path: str | Path = Field(default_factory=lambda: os.getcwd())
199
+ environment_config: EnvironmentOptions = Field(default_factory=EnvironmentOptions)
200
+ version: str = Field(default_factory=get_version)
201
+ cte_name_map: Dict[str, str] = Field(default_factory=dict)
202
+ materialized_concepts: set[str] = Field(default_factory=set)
203
+ alias_origin_lookup: Dict[str, Concept] = Field(default_factory=dict)
204
+ # TODO: support freezing environments to avoid mutation
205
+ frozen: bool = False
206
+ env_file_path: Path | None = None
207
+
208
+ def freeze(self):
209
+ self.frozen = True
210
+
211
+ def thaw(self):
212
+ self.frozen = False
213
+
214
+ def materialize_for_select(
215
+ self, local_concepts: dict[str, "BuildConcept"] | None = None
216
+ ) -> "BuildEnvironment":
217
+ """helper method"""
218
+ from trilogy.core.models.build import Factory
219
+
220
+ return Factory(self, local_concepts=local_concepts).build(self)
221
+
222
+ def add_rowset(self, name: str, lineage: SelectLineage):
223
+ self.named_statements[name] = lineage
224
+
225
+ def duplicate(self):
226
+ return Environment.model_construct(
227
+ datasources=self.datasources.duplicate(),
228
+ concepts=self.concepts.duplicate(),
229
+ functions=dict(self.functions),
230
+ data_types=dict(self.data_types),
231
+ imports=dict(self.imports),
232
+ namespace=self.namespace,
233
+ working_path=self.working_path,
234
+ environment_config=self.environment_config,
235
+ version=self.version,
236
+ cte_name_map=dict(self.cte_name_map),
237
+ materialized_concepts=set(self.materialized_concepts),
238
+ alias_origin_lookup={
239
+ k: v.duplicate() for k, v in self.alias_origin_lookup.items()
240
+ },
241
+ )
242
+
243
+ def _add_path_concepts(self):
244
+ concept = Concept(
245
+ name="_env_working_path",
246
+ namespace=self.namespace,
247
+ lineage=Function(
248
+ operator=FunctionType.CONSTANT,
249
+ arguments=[str(self.working_path)],
250
+ output_datatype=DataType.STRING,
251
+ output_purpose=Purpose.CONSTANT,
252
+ ),
253
+ datatype=DataType.STRING,
254
+ granularity=Granularity.SINGLE_ROW,
255
+ derivation=Derivation.CONSTANT,
256
+ purpose=Purpose.CONSTANT,
257
+ )
258
+ self.add_concept(concept)
259
+
260
+ def __init__(self, **data):
261
+ super().__init__(**data)
262
+ self._add_path_concepts()
263
+
264
+ @classmethod
265
+ def from_file(cls, path: str | Path) -> "Environment":
266
+ if isinstance(path, str):
267
+ path = Path(path)
268
+ with open(path, "r") as f:
269
+ read = f.read()
270
+ return Environment(working_path=path.parent, env_file_path=path).parse(read)[0]
271
+
272
+ @classmethod
273
+ def from_string(cls, input: str) -> "Environment":
274
+ return Environment().parse(input)[0]
275
+
276
+ @classmethod
277
+ def from_cache(cls, path) -> Optional["Environment"]:
278
+ with open(path, "r") as f:
279
+ read = f.read()
280
+ base = cls.model_validate_json(read)
281
+ version = get_version()
282
+ if base.version != version:
283
+ return None
284
+ return base
285
+
286
+ def to_cache(self, path: Optional[str | Path] = None) -> Path:
287
+ if not path:
288
+ ppath = Path(self.working_path) / ENV_CACHE_NAME
289
+ else:
290
+ ppath = Path(path)
291
+ with open(ppath, "w") as f:
292
+ f.write(self.model_dump_json())
293
+ return ppath
294
+
295
+ def validate_concept(self, new_concept: Concept, meta: Meta | None = None):
296
+ lookup = new_concept.address
297
+ existing: Concept = self.concepts.get(lookup) # type: ignore
298
+ if not existing or isinstance(existing, UndefinedConcept):
299
+ return
300
+
301
+ def handle_persist():
302
+ deriv_lookup = (
303
+ f"{existing.namespace}.{PERSISTED_CONCEPT_PREFIX}_{existing.name}"
304
+ )
305
+
306
+ alt_source = self.alias_origin_lookup.get(deriv_lookup)
307
+ if not alt_source:
308
+ return None
309
+ # del self.alias_origin_lookup[deriv_lookup]
310
+ # del self.concepts[deriv_lookup]
311
+ # if the new concept binding has no lineage
312
+ # nothing to cause us to think a persist binding
313
+ # needs to be invalidated
314
+ if not new_concept.lineage:
315
+ return existing
316
+ if str(alt_source.lineage) == str(new_concept.lineage):
317
+ logger.info(
318
+ f"Persisted concept {existing.address} matched redeclaration, keeping current persistence binding."
319
+ )
320
+ return existing
321
+ logger.warning(
322
+ f"Persisted concept {existing.address} lineage {str(alt_source.lineage)} did not match redeclaration {str(new_concept.lineage)}, overwriting and invalidating persist binding."
323
+ )
324
+ for k, datasource in self.datasources.items():
325
+ if existing.address in datasource.output_concepts:
326
+ logger.warning(
327
+ f"Removed concept for {existing} assignment from {k}"
328
+ )
329
+ clen = len(datasource.columns)
330
+ datasource.columns = [
331
+ x
332
+ for x in datasource.columns
333
+ if x.concept.address != existing.address
334
+ and x.concept.address != deriv_lookup
335
+ ]
336
+ assert len(datasource.columns) < clen
337
+ for x in datasource.columns:
338
+ logger.info(x)
339
+
340
+ return None
341
+
342
+ if existing and self.environment_config.allow_duplicate_declaration:
343
+ if existing.metadata.concept_source == ConceptSource.PERSIST_STATEMENT:
344
+ return handle_persist()
345
+ return
346
+ elif existing.metadata:
347
+ if existing.metadata.concept_source == ConceptSource.PERSIST_STATEMENT:
348
+ return handle_persist()
349
+ # if the existing concept is auto derived, we can overwrite it
350
+ if existing.metadata.concept_source == ConceptSource.AUTO_DERIVED:
351
+ return None
352
+ elif meta and existing.metadata:
353
+ raise ValueError(
354
+ f"Assignment to concept '{lookup}' on line {meta.line} is a duplicate"
355
+ f" declaration; '{lookup}' was originally defined on line"
356
+ f" {existing.metadata.line_number}"
357
+ )
358
+ elif existing.metadata:
359
+ raise ValueError(
360
+ f"Assignment to concept '{lookup}' is a duplicate declaration;"
361
+ f" '{lookup}' was originally defined on line"
362
+ f" {existing.metadata.line_number}"
363
+ )
364
+ raise ValueError(
365
+ f"Assignment to concept '{lookup}' is a duplicate declaration;"
366
+ )
367
+
368
+ def add_import(
369
+ self, alias: str, source: Environment, imp_stm: Import | None = None
370
+ ):
371
+ if self.frozen:
372
+ raise ValueError("Environment is frozen, cannot add imports")
373
+ exists = False
374
+ existing = self.imports[alias]
375
+ if imp_stm:
376
+ if any(
377
+ [x.path == imp_stm.path and x.alias == imp_stm.alias for x in existing]
378
+ ):
379
+ exists = True
380
+ else:
381
+ if any(
382
+ [x.path == source.working_path and x.alias == alias for x in existing]
383
+ ):
384
+ exists = True
385
+ imp_stm = Import(alias=alias, path=Path(source.working_path))
386
+ same_namespace = alias == self.namespace
387
+
388
+ if not exists:
389
+ self.imports[alias].append(imp_stm)
390
+ # we can't exit early
391
+ # as there may be new concepts
392
+ for k, concept in source.concepts.items():
393
+
394
+ # skip internal namespace
395
+ if INTERNAL_NAMESPACE in concept.address:
396
+ continue
397
+ if same_namespace:
398
+ new = self.add_concept(concept)
399
+ else:
400
+ new = self.add_concept(concept.with_namespace(alias))
401
+
402
+ k = address_with_namespace(k, alias)
403
+ # set this explicitly, to handle aliasing
404
+ self.concepts[k] = new
405
+
406
+ for _, datasource in source.datasources.items():
407
+ if same_namespace:
408
+ self.add_datasource(datasource)
409
+ else:
410
+ self.add_datasource(datasource.with_namespace(alias))
411
+ for key, val in source.alias_origin_lookup.items():
412
+
413
+ if same_namespace:
414
+ self.alias_origin_lookup[key] = val
415
+ else:
416
+ self.alias_origin_lookup[address_with_namespace(key, alias)] = (
417
+ val.with_namespace(alias)
418
+ )
419
+
420
+ return self
421
+
422
+ def add_file_import(
423
+ self, path: str | Path, alias: str, env: "Environment" | None = None
424
+ ):
425
+ if self.frozen:
426
+ raise ValueError("Environment is frozen, cannot add imports")
427
+ from trilogy.parsing.parse_engine import (
428
+ PARSER,
429
+ ParseToObjects,
430
+ gen_cache_lookup,
431
+ )
432
+
433
+ if isinstance(path, str):
434
+ if path.endswith(".preql"):
435
+ path = path.rsplit(".", 1)[0]
436
+ if "." not in path:
437
+ target = Path(self.working_path, path)
438
+ else:
439
+ target = Path(self.working_path, *path.split("."))
440
+ target = target.with_suffix(".preql")
441
+ else:
442
+ target = path
443
+ if not env:
444
+ parse_address = gen_cache_lookup(str(target), alias, str(self.working_path))
445
+ try:
446
+ with open(target, "r", encoding="utf-8") as f:
447
+ text = f.read()
448
+ nenv = Environment(
449
+ working_path=target.parent,
450
+ )
451
+ nenv.concepts.fail_on_missing = False
452
+ nparser = ParseToObjects(
453
+ environment=Environment(
454
+ working_path=target.parent,
455
+ ),
456
+ parse_address=parse_address,
457
+ token_address=target,
458
+ )
459
+ nparser.set_text(text)
460
+ nparser.transform(PARSER.parse(text))
461
+ nparser.hydrate_missing()
462
+
463
+ except Exception as e:
464
+ raise ImportError(
465
+ f"Unable to import file {target.parent}, parsing error: {e}"
466
+ )
467
+ env = nparser.environment
468
+ imps = Import(alias=alias, path=target)
469
+ self.add_import(alias, source=env, imp_stm=imps)
470
+ return imps
471
+
472
+ def parse(
473
+ self, input: str, namespace: str | None = None, persist: bool = False
474
+ ) -> Tuple["Environment", list]:
475
+ from trilogy import parse
476
+ from trilogy.core.query_processor import process_persist
477
+ from trilogy.core.statements.author import (
478
+ MultiSelectStatement,
479
+ PersistStatement,
480
+ SelectStatement,
481
+ ShowStatement,
482
+ )
483
+
484
+ if namespace:
485
+ new = Environment()
486
+ _, queries = new.parse(input)
487
+ self.add_import(namespace, new)
488
+ return self, queries
489
+ _, queries = parse(input, self)
490
+ generatable = [
491
+ x
492
+ for x in queries
493
+ if isinstance(
494
+ x,
495
+ (
496
+ SelectStatement,
497
+ PersistStatement,
498
+ MultiSelectStatement,
499
+ ShowStatement,
500
+ ),
501
+ )
502
+ ]
503
+ while generatable:
504
+ t = generatable.pop(0)
505
+ if isinstance(t, PersistStatement) and persist:
506
+ processed = process_persist(self, t)
507
+ self.add_datasource(processed.datasource)
508
+ return self, queries
509
+
510
+ def add_concept(
511
+ self,
512
+ concept: Concept,
513
+ meta: Meta | None = None,
514
+ force: bool = False,
515
+ add_derived: bool = True,
516
+ ):
517
+
518
+ if self.frozen:
519
+ raise FrozenEnvironmentException(
520
+ "Environment is frozen, cannot add concepts"
521
+ )
522
+ if not force:
523
+ existing = self.validate_concept(concept, meta=meta)
524
+ if existing:
525
+ concept = existing
526
+ self.concepts[concept.address] = concept
527
+
528
+ from trilogy.core.environment_helpers import generate_related_concepts
529
+
530
+ generate_related_concepts(concept, self, meta=meta, add_derived=add_derived)
531
+
532
+ return concept
533
+
534
+ def add_datasource(
535
+ self,
536
+ datasource: Datasource,
537
+ meta: Meta | None = None,
538
+ ):
539
+ if self.frozen:
540
+ raise FrozenEnvironmentException(
541
+ "Environment is frozen, cannot add datasource"
542
+ )
543
+ self.datasources[datasource.identifier] = datasource
544
+
545
+ eligible_to_promote_roots = datasource.non_partial_for is None
546
+ # mark this as canonical source
547
+ for c in datasource.columns:
548
+ cref = c.concept
549
+ if cref.address not in self.concepts:
550
+ continue
551
+ new_persisted_concept = self.concepts[cref.address]
552
+ if isinstance(new_persisted_concept, UndefinedConcept):
553
+ continue
554
+ if not eligible_to_promote_roots:
555
+ continue
556
+
557
+ current_derivation = new_persisted_concept.derivation
558
+ # TODO: refine this section;
559
+ # too hacky for maintainability
560
+ if current_derivation not in (Derivation.ROOT, Derivation.CONSTANT):
561
+ logger.info(
562
+ f"A datasource has been added which will persist derived concept {new_persisted_concept.address}"
563
+ )
564
+ persisted = f"{PERSISTED_CONCEPT_PREFIX}_" + new_persisted_concept.name
565
+ # override the current concept source to reflect that it's now coming from a datasource
566
+ if (
567
+ new_persisted_concept.metadata.concept_source
568
+ != ConceptSource.PERSIST_STATEMENT
569
+ ):
570
+ original_concept = new_persisted_concept.model_copy(
571
+ deep=True,
572
+ update={
573
+ "name": persisted,
574
+ },
575
+ )
576
+ self.add_concept(
577
+ original_concept,
578
+ meta=meta,
579
+ force=True,
580
+ )
581
+ new_persisted_concept = new_persisted_concept.model_copy(
582
+ deep=True,
583
+ update={
584
+ "lineage": None,
585
+ "metadata": new_persisted_concept.metadata.model_copy(
586
+ update={
587
+ "concept_source": ConceptSource.PERSIST_STATEMENT
588
+ }
589
+ ),
590
+ "derivation": Derivation.ROOT,
591
+ },
592
+ )
593
+ self.add_concept(
594
+ new_persisted_concept,
595
+ meta=meta,
596
+ force=True,
597
+ )
598
+ # datasource.add_column(original_concept, alias=c.alias, modifiers = c.modifiers)
599
+ self.merge_concept(original_concept, new_persisted_concept, [])
600
+ else:
601
+ self.add_concept(
602
+ new_persisted_concept,
603
+ meta=meta,
604
+ )
605
+ return datasource
606
+
607
+ def delete_datasource(
608
+ self,
609
+ address: str,
610
+ meta: Meta | None = None,
611
+ ) -> bool:
612
+ if self.frozen:
613
+ raise ValueError("Environment is frozen, cannot delete datsources")
614
+ if address in self.datasources:
615
+ del self.datasources[address]
616
+ # self.gen_concept_list_caches()
617
+ return True
618
+ return False
619
+
620
+ def merge_concept(
621
+ self,
622
+ source: Concept,
623
+ target: Concept,
624
+ modifiers: List[Modifier],
625
+ force: bool = False,
626
+ ) -> bool:
627
+ from trilogy.core.models.build import BuildConcept
628
+
629
+ if isinstance(source, BuildConcept):
630
+ raise SyntaxError(source)
631
+ elif isinstance(target, BuildConcept):
632
+ raise SyntaxError(target)
633
+ if self.frozen:
634
+ raise ValueError("Environment is frozen, cannot merge concepts")
635
+ replacements = {}
636
+
637
+ # exit early if we've run this
638
+ if source.address in self.alias_origin_lookup and not force:
639
+ if self.concepts[source.address] == target:
640
+ return False
641
+
642
+ self.alias_origin_lookup[source.address] = source
643
+ self.alias_origin_lookup[source.address].pseudonyms.add(target.address)
644
+ for k, v in self.concepts.items():
645
+
646
+ if v.address == target.address:
647
+ if source.address != target.address:
648
+ v.pseudonyms.add(source.address)
649
+
650
+ if v.address == source.address:
651
+ replacements[k] = target
652
+ # we need to update keys and grains of all concepts
653
+ else:
654
+ replacements[k] = v.with_merge(source, target, modifiers)
655
+ self.concepts.update(replacements)
656
+ for k, ds in self.datasources.items():
657
+ if source.address in ds.output_lcl:
658
+ ds.merge_concept(source, target, modifiers=modifiers)
659
+ return True
660
+
661
+
662
+ class LazyEnvironment(Environment):
663
+ """Variant of environment to defer parsing of a path
664
+ until relevant attributes accessed."""
665
+
666
+ load_path: Path
667
+ loaded: bool = False
668
+
669
+ def __init__(self, **data):
670
+ super().__init__(**data)
671
+
672
+ def _add_path_concepts(self):
673
+ pass
674
+
675
+ def __getattribute__(self, name):
676
+ if name in (
677
+ "load_path",
678
+ "loaded",
679
+ "working_path",
680
+ "model_config",
681
+ "model_fields",
682
+ "model_post_init",
683
+ ) or name.startswith("_"):
684
+ return super().__getattribute__(name)
685
+ if not self.loaded:
686
+ logger.info(
687
+ f"lazily evaluating load path {self.load_path} to access {name}"
688
+ )
689
+ from trilogy import parse
690
+
691
+ env = Environment(working_path=str(self.working_path))
692
+ with open(self.load_path, "r") as f:
693
+ parse(f.read(), env)
694
+ self.loaded = True
695
+ self.datasources = env.datasources
696
+ self.concepts = env.concepts
697
+ self.imports = env.imports
698
+ return super().__getattribute__(name)
699
+
700
+
701
+ Environment.model_rebuild()