pytrilogy 0.0.2.58__py3-none-any.whl → 0.0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.0.dist-info}/METADATA +9 -2
  2. pytrilogy-0.0.3.0.dist-info/RECORD +99 -0
  3. {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.0.dist-info}/WHEEL +1 -1
  4. trilogy/__init__.py +2 -2
  5. trilogy/core/enums.py +1 -7
  6. trilogy/core/env_processor.py +17 -5
  7. trilogy/core/environment_helpers.py +11 -25
  8. trilogy/core/exceptions.py +4 -0
  9. trilogy/core/functions.py +695 -261
  10. trilogy/core/graph_models.py +10 -10
  11. trilogy/core/internal.py +11 -2
  12. trilogy/core/models/__init__.py +0 -0
  13. trilogy/core/models/author.py +2110 -0
  14. trilogy/core/models/build.py +1845 -0
  15. trilogy/core/models/build_environment.py +151 -0
  16. trilogy/core/models/core.py +370 -0
  17. trilogy/core/models/datasource.py +297 -0
  18. trilogy/core/models/environment.py +696 -0
  19. trilogy/core/models/execute.py +931 -0
  20. trilogy/core/optimization.py +14 -16
  21. trilogy/core/optimizations/base_optimization.py +1 -1
  22. trilogy/core/optimizations/inline_constant.py +6 -6
  23. trilogy/core/optimizations/inline_datasource.py +17 -11
  24. trilogy/core/optimizations/predicate_pushdown.py +17 -16
  25. trilogy/core/processing/concept_strategies_v3.py +180 -145
  26. trilogy/core/processing/graph_utils.py +1 -1
  27. trilogy/core/processing/node_generators/basic_node.py +19 -18
  28. trilogy/core/processing/node_generators/common.py +50 -44
  29. trilogy/core/processing/node_generators/filter_node.py +26 -13
  30. trilogy/core/processing/node_generators/group_node.py +26 -21
  31. trilogy/core/processing/node_generators/group_to_node.py +11 -8
  32. trilogy/core/processing/node_generators/multiselect_node.py +60 -43
  33. trilogy/core/processing/node_generators/node_merge_node.py +76 -38
  34. trilogy/core/processing/node_generators/rowset_node.py +57 -36
  35. trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +27 -34
  36. trilogy/core/processing/node_generators/select_merge_node.py +161 -64
  37. trilogy/core/processing/node_generators/select_node.py +13 -13
  38. trilogy/core/processing/node_generators/union_node.py +12 -11
  39. trilogy/core/processing/node_generators/unnest_node.py +9 -7
  40. trilogy/core/processing/node_generators/window_node.py +19 -16
  41. trilogy/core/processing/nodes/__init__.py +21 -18
  42. trilogy/core/processing/nodes/base_node.py +82 -66
  43. trilogy/core/processing/nodes/filter_node.py +19 -13
  44. trilogy/core/processing/nodes/group_node.py +50 -35
  45. trilogy/core/processing/nodes/merge_node.py +45 -36
  46. trilogy/core/processing/nodes/select_node_v2.py +53 -39
  47. trilogy/core/processing/nodes/union_node.py +5 -7
  48. trilogy/core/processing/nodes/unnest_node.py +7 -11
  49. trilogy/core/processing/nodes/window_node.py +9 -4
  50. trilogy/core/processing/utility.py +103 -75
  51. trilogy/core/query_processor.py +65 -47
  52. trilogy/core/statements/__init__.py +0 -0
  53. trilogy/core/statements/author.py +413 -0
  54. trilogy/core/statements/build.py +0 -0
  55. trilogy/core/statements/common.py +30 -0
  56. trilogy/core/statements/execute.py +42 -0
  57. trilogy/dialect/base.py +146 -106
  58. trilogy/dialect/common.py +9 -10
  59. trilogy/dialect/duckdb.py +1 -1
  60. trilogy/dialect/enums.py +4 -2
  61. trilogy/dialect/presto.py +1 -1
  62. trilogy/dialect/sql_server.py +1 -1
  63. trilogy/executor.py +44 -32
  64. trilogy/hooks/base_hook.py +6 -4
  65. trilogy/hooks/query_debugger.py +110 -93
  66. trilogy/parser.py +1 -1
  67. trilogy/parsing/common.py +303 -64
  68. trilogy/parsing/parse_engine.py +263 -617
  69. trilogy/parsing/render.py +50 -26
  70. trilogy/scripts/trilogy.py +2 -1
  71. pytrilogy-0.0.2.58.dist-info/RECORD +0 -87
  72. trilogy/core/models.py +0 -4960
  73. {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.0.dist-info}/LICENSE.md +0 -0
  74. {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.0.dist-info}/entry_points.txt +0 -0
  75. {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,696 @@
1
+ from __future__ import annotations
2
+
3
+ import difflib
4
+ import os
5
+ from collections import defaultdict
6
+ from dataclasses import dataclass
7
+ from pathlib import Path
8
+ from typing import (
9
+ TYPE_CHECKING,
10
+ Annotated,
11
+ Dict,
12
+ ItemsView,
13
+ List,
14
+ Never,
15
+ Optional,
16
+ Tuple,
17
+ ValuesView,
18
+ )
19
+
20
+ from lark.tree import Meta
21
+ from pydantic import BaseModel, ConfigDict, Field
22
+ from pydantic.functional_validators import PlainValidator
23
+
24
+ from trilogy.constants import DEFAULT_NAMESPACE, ENV_CACHE_NAME, logger
25
+ from trilogy.core.constants import INTERNAL_NAMESPACE, PERSISTED_CONCEPT_PREFIX
26
+ from trilogy.core.enums import (
27
+ ConceptSource,
28
+ Derivation,
29
+ FunctionType,
30
+ Granularity,
31
+ Modifier,
32
+ Purpose,
33
+ )
34
+ from trilogy.core.exceptions import (
35
+ FrozenEnvironmentException,
36
+ UndefinedConceptException,
37
+ )
38
+ from trilogy.core.models.author import (
39
+ Concept,
40
+ ConceptRef,
41
+ Function,
42
+ UndefinedConcept,
43
+ UndefinedConceptFull,
44
+ address_with_namespace,
45
+ )
46
+ from trilogy.core.models.core import DataType
47
+ from trilogy.core.models.datasource import Datasource, EnvironmentDatasourceDict
48
+
49
+ if TYPE_CHECKING:
50
+ from trilogy.core.models.build import BuildConcept, BuildEnvironment
51
+
52
+
53
+ @dataclass
54
+ class Import:
55
+ alias: str
56
+ path: Path
57
+
58
+
59
+ class EnvironmentOptions(BaseModel):
60
+ allow_duplicate_declaration: bool = True
61
+
62
+
63
+ class EnvironmentConceptDict(dict):
64
+ def __init__(self, *args, **kwargs) -> None:
65
+ super().__init__(self, *args, **kwargs)
66
+ self.undefined: dict[str, UndefinedConceptFull] = {}
67
+ self.fail_on_missing: bool = True
68
+ self.populate_default_concepts()
69
+
70
+ def duplicate(self) -> "EnvironmentConceptDict":
71
+ new = EnvironmentConceptDict()
72
+ new.update({k: v.duplicate() for k, v in self.items()})
73
+ new.undefined = self.undefined
74
+ new.fail_on_missing = self.fail_on_missing
75
+ return new
76
+
77
+ def populate_default_concepts(self):
78
+ from trilogy.core.internal import DEFAULT_CONCEPTS
79
+
80
+ for concept in DEFAULT_CONCEPTS.values():
81
+ self[concept.address] = concept
82
+
83
+ def values(self) -> ValuesView[Concept]: # type: ignore
84
+ return super().values()
85
+
86
+ def get(self, key: str, default: Concept | None = None) -> Concept | None: # type: ignore
87
+ try:
88
+ return self.__getitem__(key)
89
+ except UndefinedConceptException:
90
+ return default
91
+
92
+ def raise_undefined(
93
+ self, key: str, line_no: int | None = None, file: Path | str | None = None
94
+ ) -> Never:
95
+
96
+ matches = self._find_similar_concepts(key)
97
+ message = f"Undefined concept: {key}."
98
+ if matches:
99
+ message += f" Suggestions: {matches}"
100
+
101
+ if line_no:
102
+ if file:
103
+ raise UndefinedConceptException(
104
+ f"{file}: {line_no}: " + message, matches
105
+ )
106
+ raise UndefinedConceptException(f"line: {line_no}: " + message, matches)
107
+ raise UndefinedConceptException(message, matches)
108
+
109
+ def __getitem__(
110
+ self, key: str, line_no: int | None = None, file: Path | None = None
111
+ ) -> Concept | UndefinedConceptFull:
112
+ if isinstance(key, ConceptRef):
113
+ return self.__getitem__(key.address, line_no=line_no, file=file)
114
+ try:
115
+ return super(EnvironmentConceptDict, self).__getitem__(key)
116
+ except KeyError:
117
+ if "." in key and key.split(".", 1)[0] == DEFAULT_NAMESPACE:
118
+ return self.__getitem__(key.split(".", 1)[1], line_no)
119
+ if DEFAULT_NAMESPACE + "." + key in self:
120
+ return self.__getitem__(DEFAULT_NAMESPACE + "." + key, line_no)
121
+ if not self.fail_on_missing:
122
+ if "." in key:
123
+ ns, rest = key.rsplit(".", 1)
124
+ else:
125
+ ns = DEFAULT_NAMESPACE
126
+ rest = key
127
+ if key in self.undefined:
128
+ return self.undefined[key]
129
+ undefined = UndefinedConceptFull(
130
+ line_no=line_no,
131
+ datatype=DataType.UNKNOWN,
132
+ name=rest,
133
+ purpose=Purpose.UNKNOWN,
134
+ namespace=ns,
135
+ )
136
+ self.undefined[key] = undefined
137
+ return undefined
138
+ self.raise_undefined(key, line_no, file)
139
+
140
+ def _find_similar_concepts(self, concept_name: str):
141
+ def strip_local(input: str):
142
+ if input.startswith(f"{DEFAULT_NAMESPACE}."):
143
+ return input[len(DEFAULT_NAMESPACE) + 1 :]
144
+ return input
145
+
146
+ matches = difflib.get_close_matches(
147
+ strip_local(concept_name), [strip_local(x) for x in self.keys()]
148
+ )
149
+ return matches
150
+
151
+ def items(self) -> ItemsView[str, Concept]: # type: ignore
152
+ return super().items()
153
+
154
+
155
+ def validate_concepts(v) -> EnvironmentConceptDict:
156
+ if isinstance(v, EnvironmentConceptDict):
157
+ return v
158
+ elif isinstance(v, dict):
159
+ return EnvironmentConceptDict(
160
+ **{x: Concept.model_validate(y) for x, y in v.items()}
161
+ )
162
+ raise ValueError
163
+
164
+
165
+ def validate_datasources(v) -> EnvironmentDatasourceDict:
166
+ if isinstance(v, EnvironmentDatasourceDict):
167
+ return v
168
+ elif isinstance(v, dict):
169
+ return EnvironmentDatasourceDict(
170
+ **{x: Datasource.model_validate(y) for x, y in v.items()}
171
+ )
172
+ raise ValueError
173
+
174
+
175
+ def get_version():
176
+ from trilogy import __version__
177
+
178
+ return __version__
179
+
180
+
181
+ class Environment(BaseModel):
182
+ model_config = ConfigDict(arbitrary_types_allowed=True, strict=False)
183
+
184
+ concepts: Annotated[EnvironmentConceptDict, PlainValidator(validate_concepts)] = (
185
+ Field(default_factory=EnvironmentConceptDict)
186
+ )
187
+ datasources: Annotated[
188
+ EnvironmentDatasourceDict, PlainValidator(validate_datasources)
189
+ ] = Field(default_factory=EnvironmentDatasourceDict)
190
+ functions: Dict[str, Function] = Field(default_factory=dict)
191
+ data_types: Dict[str, DataType] = Field(default_factory=dict)
192
+ imports: Dict[str, list[Import]] = Field(
193
+ default_factory=lambda: defaultdict(list) # type: ignore
194
+ )
195
+ namespace: str = DEFAULT_NAMESPACE
196
+ working_path: str | Path = Field(default_factory=lambda: os.getcwd())
197
+ environment_config: EnvironmentOptions = Field(default_factory=EnvironmentOptions)
198
+ version: str = Field(default_factory=get_version)
199
+ cte_name_map: Dict[str, str] = Field(default_factory=dict)
200
+ materialized_concepts: set[str] = Field(default_factory=set)
201
+ alias_origin_lookup: Dict[str, Concept] = Field(default_factory=dict)
202
+ # TODO: support freezing environments to avoid mutation
203
+ frozen: bool = False
204
+ env_file_path: Path | None = None
205
+
206
+ def freeze(self):
207
+ self.frozen = True
208
+
209
+ def thaw(self):
210
+ self.frozen = False
211
+
212
+ def materialize_for_select(
213
+ self, local_concepts: dict[str, "BuildConcept"] | None = None
214
+ ) -> "BuildEnvironment":
215
+ """helper method"""
216
+ from trilogy.core.models.build import Factory
217
+
218
+ return Factory(self, local_concepts=local_concepts).build(self)
219
+
220
+ def duplicate(self):
221
+ return Environment.model_construct(
222
+ datasources=self.datasources.duplicate(),
223
+ concepts=self.concepts.duplicate(),
224
+ functions=dict(self.functions),
225
+ data_types=dict(self.data_types),
226
+ imports=dict(self.imports),
227
+ namespace=self.namespace,
228
+ working_path=self.working_path,
229
+ environment_config=self.environment_config,
230
+ version=self.version,
231
+ cte_name_map=dict(self.cte_name_map),
232
+ materialized_concepts=set(self.materialized_concepts),
233
+ alias_origin_lookup={
234
+ k: v.duplicate() for k, v in self.alias_origin_lookup.items()
235
+ },
236
+ )
237
+
238
+ def _add_path_concepts(self):
239
+ concept = Concept(
240
+ name="_env_working_path",
241
+ namespace=self.namespace,
242
+ lineage=Function(
243
+ operator=FunctionType.CONSTANT,
244
+ arguments=[str(self.working_path)],
245
+ output_datatype=DataType.STRING,
246
+ output_purpose=Purpose.CONSTANT,
247
+ ),
248
+ datatype=DataType.STRING,
249
+ granularity=Granularity.SINGLE_ROW,
250
+ derivation=Derivation.CONSTANT,
251
+ purpose=Purpose.CONSTANT,
252
+ )
253
+ self.add_concept(concept)
254
+
255
+ def __init__(self, **data):
256
+ super().__init__(**data)
257
+ self._add_path_concepts()
258
+
259
+ @classmethod
260
+ def from_file(cls, path: str | Path) -> "Environment":
261
+ if isinstance(path, str):
262
+ path = Path(path)
263
+ with open(path, "r") as f:
264
+ read = f.read()
265
+ return Environment(working_path=path.parent, env_file_path=path).parse(read)[0]
266
+
267
+ @classmethod
268
+ def from_string(cls, input: str) -> "Environment":
269
+ return Environment().parse(input)[0]
270
+
271
+ @classmethod
272
+ def from_cache(cls, path) -> Optional["Environment"]:
273
+ with open(path, "r") as f:
274
+ read = f.read()
275
+ base = cls.model_validate_json(read)
276
+ version = get_version()
277
+ if base.version != version:
278
+ return None
279
+ return base
280
+
281
+ def to_cache(self, path: Optional[str | Path] = None) -> Path:
282
+ if not path:
283
+ ppath = Path(self.working_path) / ENV_CACHE_NAME
284
+ else:
285
+ ppath = Path(path)
286
+ with open(ppath, "w") as f:
287
+ f.write(self.model_dump_json())
288
+ return ppath
289
+
290
+ def validate_concept(self, new_concept: Concept, meta: Meta | None = None):
291
+ lookup = new_concept.address
292
+ existing: Concept = self.concepts.get(lookup) # type: ignore
293
+ if not existing or isinstance(existing, UndefinedConcept):
294
+ return
295
+
296
+ def handle_persist():
297
+ deriv_lookup = (
298
+ f"{existing.namespace}.{PERSISTED_CONCEPT_PREFIX}_{existing.name}"
299
+ )
300
+
301
+ alt_source = self.alias_origin_lookup.get(deriv_lookup)
302
+ if not alt_source:
303
+ return None
304
+ # del self.alias_origin_lookup[deriv_lookup]
305
+ # del self.concepts[deriv_lookup]
306
+ # if the new concept binding has no lineage
307
+ # nothing to cause us to think a persist binding
308
+ # needs to be invalidated
309
+ if not new_concept.lineage:
310
+ return existing
311
+ if str(alt_source.lineage) == str(new_concept.lineage):
312
+ logger.info(
313
+ f"Persisted concept {existing.address} matched redeclaration, keeping current persistence binding."
314
+ )
315
+ return existing
316
+ logger.warning(
317
+ f"Persisted concept {existing.address} lineage {str(alt_source.lineage)} did not match redeclaration {str(new_concept.lineage)}, overwriting and invalidating persist binding."
318
+ )
319
+ for k, datasource in self.datasources.items():
320
+ if existing.address in datasource.output_concepts:
321
+ logger.warning(
322
+ f"Removed concept for {existing} assignment from {k}"
323
+ )
324
+ clen = len(datasource.columns)
325
+ datasource.columns = [
326
+ x
327
+ for x in datasource.columns
328
+ if x.concept.address != existing.address
329
+ and x.concept.address != deriv_lookup
330
+ ]
331
+ assert len(datasource.columns) < clen
332
+ for x in datasource.columns:
333
+ logger.info(x)
334
+
335
+ return None
336
+
337
+ if existing and self.environment_config.allow_duplicate_declaration:
338
+ if existing.metadata.concept_source == ConceptSource.PERSIST_STATEMENT:
339
+ return handle_persist()
340
+ return
341
+ elif existing.metadata:
342
+ if existing.metadata.concept_source == ConceptSource.PERSIST_STATEMENT:
343
+ return handle_persist()
344
+ # if the existing concept is auto derived, we can overwrite it
345
+ if existing.metadata.concept_source == ConceptSource.AUTO_DERIVED:
346
+ return None
347
+ elif meta and existing.metadata:
348
+ raise ValueError(
349
+ f"Assignment to concept '{lookup}' on line {meta.line} is a duplicate"
350
+ f" declaration; '{lookup}' was originally defined on line"
351
+ f" {existing.metadata.line_number}"
352
+ )
353
+ elif existing.metadata:
354
+ raise ValueError(
355
+ f"Assignment to concept '{lookup}' is a duplicate declaration;"
356
+ f" '{lookup}' was originally defined on line"
357
+ f" {existing.metadata.line_number}"
358
+ )
359
+ raise ValueError(
360
+ f"Assignment to concept '{lookup}' is a duplicate declaration;"
361
+ )
362
+
363
+ def add_import(
364
+ self, alias: str, source: Environment, imp_stm: Import | None = None
365
+ ):
366
+ if self.frozen:
367
+ raise ValueError("Environment is frozen, cannot add imports")
368
+ exists = False
369
+ existing = self.imports[alias]
370
+ if imp_stm:
371
+ if any(
372
+ [x.path == imp_stm.path and x.alias == imp_stm.alias for x in existing]
373
+ ):
374
+ exists = True
375
+ else:
376
+ if any(
377
+ [x.path == source.working_path and x.alias == alias for x in existing]
378
+ ):
379
+ exists = True
380
+ imp_stm = Import(alias=alias, path=Path(source.working_path))
381
+ same_namespace = alias == self.namespace
382
+
383
+ if not exists:
384
+ self.imports[alias].append(imp_stm)
385
+ # we can't exit early
386
+ # as there may be new concepts
387
+ for k, concept in source.concepts.items():
388
+
389
+ # skip internal namespace
390
+ if INTERNAL_NAMESPACE in concept.address:
391
+ continue
392
+ if same_namespace:
393
+ new = self.add_concept(concept)
394
+ else:
395
+ new = self.add_concept(concept.with_namespace(alias))
396
+
397
+ k = address_with_namespace(k, alias)
398
+ # set this explicitly, to handle aliasing
399
+ self.concepts[k] = new
400
+
401
+ for _, datasource in source.datasources.items():
402
+ if same_namespace:
403
+ self.add_datasource(datasource)
404
+ else:
405
+ self.add_datasource(datasource.with_namespace(alias))
406
+ for key, val in source.alias_origin_lookup.items():
407
+
408
+ if same_namespace:
409
+ self.alias_origin_lookup[key] = val
410
+ else:
411
+ self.alias_origin_lookup[address_with_namespace(key, alias)] = (
412
+ val.with_namespace(alias)
413
+ )
414
+
415
+ return self
416
+
417
+ def add_file_import(
418
+ self, path: str | Path, alias: str, env: "Environment" | None = None
419
+ ):
420
+ if self.frozen:
421
+ raise ValueError("Environment is frozen, cannot add imports")
422
+ from trilogy.parsing.parse_engine import (
423
+ PARSER,
424
+ ParseToObjects,
425
+ gen_cache_lookup,
426
+ )
427
+
428
+ if isinstance(path, str):
429
+ if path.endswith(".preql"):
430
+ path = path.rsplit(".", 1)[0]
431
+ if "." not in path:
432
+ target = Path(self.working_path, path)
433
+ else:
434
+ target = Path(self.working_path, *path.split("."))
435
+ target = target.with_suffix(".preql")
436
+ else:
437
+ target = path
438
+ if not env:
439
+ parse_address = gen_cache_lookup(str(target), alias, str(self.working_path))
440
+ try:
441
+ with open(target, "r", encoding="utf-8") as f:
442
+ text = f.read()
443
+ nenv = Environment(
444
+ working_path=target.parent,
445
+ )
446
+ nenv.concepts.fail_on_missing = False
447
+ nparser = ParseToObjects(
448
+ environment=Environment(
449
+ working_path=target.parent,
450
+ ),
451
+ parse_address=parse_address,
452
+ token_address=target,
453
+ )
454
+ nparser.set_text(text)
455
+ nparser.transform(PARSER.parse(text))
456
+ nparser.hydrate_missing()
457
+
458
+ except Exception as e:
459
+ raise ImportError(
460
+ f"Unable to import file {target.parent}, parsing error: {e}"
461
+ )
462
+ env = nparser.environment
463
+ imps = Import(alias=alias, path=target)
464
+ self.add_import(alias, source=env, imp_stm=imps)
465
+ return imps
466
+
467
+ def parse(
468
+ self, input: str, namespace: str | None = None, persist: bool = False
469
+ ) -> Tuple["Environment", list]:
470
+ from trilogy import parse
471
+ from trilogy.core.query_processor import process_persist
472
+ from trilogy.core.statements.author import (
473
+ MultiSelectStatement,
474
+ PersistStatement,
475
+ SelectStatement,
476
+ ShowStatement,
477
+ )
478
+
479
+ if namespace:
480
+ new = Environment()
481
+ _, queries = new.parse(input)
482
+ self.add_import(namespace, new)
483
+ return self, queries
484
+ _, queries = parse(input, self)
485
+ generatable = [
486
+ x
487
+ for x in queries
488
+ if isinstance(
489
+ x,
490
+ (
491
+ SelectStatement,
492
+ PersistStatement,
493
+ MultiSelectStatement,
494
+ ShowStatement,
495
+ ),
496
+ )
497
+ ]
498
+ while generatable:
499
+ t = generatable.pop(0)
500
+ if isinstance(t, PersistStatement) and persist:
501
+ processed = process_persist(self, t)
502
+ self.add_datasource(processed.datasource)
503
+ return self, queries
504
+
505
+ def add_concept(
506
+ self,
507
+ concept: Concept,
508
+ meta: Meta | None = None,
509
+ force: bool = False,
510
+ add_derived: bool = True,
511
+ ):
512
+
513
+ if self.frozen:
514
+ raise FrozenEnvironmentException(
515
+ "Environment is frozen, cannot add concepts"
516
+ )
517
+ if not force:
518
+ existing = self.validate_concept(concept, meta=meta)
519
+ if existing:
520
+ concept = existing
521
+ self.concepts[concept.address] = concept
522
+
523
+ from trilogy.core.environment_helpers import generate_related_concepts
524
+
525
+ generate_related_concepts(concept, self, meta=meta, add_derived=add_derived)
526
+
527
+ return concept
528
+
529
+ def add_datasource(
530
+ self,
531
+ datasource: Datasource,
532
+ meta: Meta | None = None,
533
+ ):
534
+ if self.frozen:
535
+ raise FrozenEnvironmentException(
536
+ "Environment is frozen, cannot add datasource"
537
+ )
538
+ self.datasources[datasource.identifier] = datasource
539
+
540
+ eligible_to_promote_roots = datasource.non_partial_for is None
541
+ # mark this as canonical source
542
+ for c in datasource.columns:
543
+ cref = c.concept
544
+ if cref.address not in self.concepts:
545
+ continue
546
+ new_persisted_concept = self.concepts[cref.address]
547
+ if isinstance(new_persisted_concept, UndefinedConcept):
548
+ continue
549
+ if not eligible_to_promote_roots:
550
+ continue
551
+
552
+ current_derivation = new_persisted_concept.derivation
553
+ # TODO: refine this section;
554
+ # too hacky for maintainability
555
+ if current_derivation not in (Derivation.ROOT, Derivation.CONSTANT):
556
+ logger.info(
557
+ f"A datasource has been added which will persist derived concept {new_persisted_concept.address}"
558
+ )
559
+ persisted = f"{PERSISTED_CONCEPT_PREFIX}_" + new_persisted_concept.name
560
+ # override the current concept source to reflect that it's now coming from a datasource
561
+ if (
562
+ new_persisted_concept.metadata.concept_source
563
+ != ConceptSource.PERSIST_STATEMENT
564
+ ):
565
+ original_concept = new_persisted_concept.model_copy(
566
+ deep=True,
567
+ update={
568
+ "name": persisted,
569
+ },
570
+ )
571
+ self.add_concept(
572
+ original_concept,
573
+ meta=meta,
574
+ force=True,
575
+ )
576
+ new_persisted_concept = new_persisted_concept.model_copy(
577
+ deep=True,
578
+ update={
579
+ "lineage": None,
580
+ "metadata": new_persisted_concept.metadata.model_copy(
581
+ update={
582
+ "concept_source": ConceptSource.PERSIST_STATEMENT
583
+ }
584
+ ),
585
+ "derivation": Derivation.ROOT,
586
+ },
587
+ )
588
+ self.add_concept(
589
+ new_persisted_concept,
590
+ meta=meta,
591
+ force=True,
592
+ )
593
+ # datasource.add_column(original_concept, alias=c.alias, modifiers = c.modifiers)
594
+ self.merge_concept(original_concept, new_persisted_concept, [])
595
+ else:
596
+ self.add_concept(
597
+ new_persisted_concept,
598
+ meta=meta,
599
+ )
600
+ return datasource
601
+
602
+ def delete_datasource(
603
+ self,
604
+ address: str,
605
+ meta: Meta | None = None,
606
+ ) -> bool:
607
+ if self.frozen:
608
+ raise ValueError("Environment is frozen, cannot delete datsources")
609
+ if address in self.datasources:
610
+ del self.datasources[address]
611
+ # self.gen_concept_list_caches()
612
+ return True
613
+ return False
614
+
615
+ def merge_concept(
616
+ self,
617
+ source: Concept,
618
+ target: Concept,
619
+ modifiers: List[Modifier],
620
+ force: bool = False,
621
+ ) -> bool:
622
+ from trilogy.core.models.build import BuildConcept
623
+
624
+ if isinstance(source, BuildConcept):
625
+ raise SyntaxError(source)
626
+ elif isinstance(target, BuildConcept):
627
+ raise SyntaxError(target)
628
+ if self.frozen:
629
+ raise ValueError("Environment is frozen, cannot merge concepts")
630
+ replacements = {}
631
+
632
+ # exit early if we've run this
633
+ if source.address in self.alias_origin_lookup and not force:
634
+ if self.concepts[source.address] == target:
635
+ return False
636
+
637
+ self.alias_origin_lookup[source.address] = source
638
+ self.alias_origin_lookup[source.address].pseudonyms.add(target.address)
639
+ for k, v in self.concepts.items():
640
+
641
+ if v.address == target.address:
642
+ if source.address != target.address:
643
+ v.pseudonyms.add(source.address)
644
+
645
+ if v.address == source.address:
646
+ replacements[k] = target
647
+ # we need to update keys and grains of all concepts
648
+ else:
649
+ replacements[k] = v.with_merge(source, target, modifiers)
650
+ self.concepts.update(replacements)
651
+ for k, ds in self.datasources.items():
652
+ if source.address in ds.output_lcl:
653
+ ds.merge_concept(source, target, modifiers=modifiers)
654
+ return True
655
+
656
+
657
+ class LazyEnvironment(Environment):
658
+ """Variant of environment to defer parsing of a path
659
+ until relevant attributes accessed."""
660
+
661
+ load_path: Path
662
+ loaded: bool = False
663
+
664
+ def __init__(self, **data):
665
+ super().__init__(**data)
666
+
667
+ def _add_path_concepts(self):
668
+ pass
669
+
670
+ def __getattribute__(self, name):
671
+ if name in (
672
+ "load_path",
673
+ "loaded",
674
+ "working_path",
675
+ "model_config",
676
+ "model_fields",
677
+ "model_post_init",
678
+ ) or name.startswith("_"):
679
+ return super().__getattribute__(name)
680
+ if not self.loaded:
681
+ logger.info(
682
+ f"lazily evaluating load path {self.load_path} to access {name}"
683
+ )
684
+ from trilogy import parse
685
+
686
+ env = Environment(working_path=str(self.working_path))
687
+ with open(self.load_path, "r") as f:
688
+ parse(f.read(), env)
689
+ self.loaded = True
690
+ self.datasources = env.datasources
691
+ self.concepts = env.concepts
692
+ self.imports = env.imports
693
+ return super().__getattribute__(name)
694
+
695
+
696
+ Environment.model_rebuild()