pytrilogy 0.0.2.58__py3-none-any.whl → 0.0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.0.dist-info}/METADATA +9 -2
- pytrilogy-0.0.3.0.dist-info/RECORD +99 -0
- {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.0.dist-info}/WHEEL +1 -1
- trilogy/__init__.py +2 -2
- trilogy/core/enums.py +1 -7
- trilogy/core/env_processor.py +17 -5
- trilogy/core/environment_helpers.py +11 -25
- trilogy/core/exceptions.py +4 -0
- trilogy/core/functions.py +695 -261
- trilogy/core/graph_models.py +10 -10
- trilogy/core/internal.py +11 -2
- trilogy/core/models/__init__.py +0 -0
- trilogy/core/models/author.py +2110 -0
- trilogy/core/models/build.py +1845 -0
- trilogy/core/models/build_environment.py +151 -0
- trilogy/core/models/core.py +370 -0
- trilogy/core/models/datasource.py +297 -0
- trilogy/core/models/environment.py +696 -0
- trilogy/core/models/execute.py +931 -0
- trilogy/core/optimization.py +14 -16
- trilogy/core/optimizations/base_optimization.py +1 -1
- trilogy/core/optimizations/inline_constant.py +6 -6
- trilogy/core/optimizations/inline_datasource.py +17 -11
- trilogy/core/optimizations/predicate_pushdown.py +17 -16
- trilogy/core/processing/concept_strategies_v3.py +180 -145
- trilogy/core/processing/graph_utils.py +1 -1
- trilogy/core/processing/node_generators/basic_node.py +19 -18
- trilogy/core/processing/node_generators/common.py +50 -44
- trilogy/core/processing/node_generators/filter_node.py +26 -13
- trilogy/core/processing/node_generators/group_node.py +26 -21
- trilogy/core/processing/node_generators/group_to_node.py +11 -8
- trilogy/core/processing/node_generators/multiselect_node.py +60 -43
- trilogy/core/processing/node_generators/node_merge_node.py +76 -38
- trilogy/core/processing/node_generators/rowset_node.py +57 -36
- trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +27 -34
- trilogy/core/processing/node_generators/select_merge_node.py +161 -64
- trilogy/core/processing/node_generators/select_node.py +13 -13
- trilogy/core/processing/node_generators/union_node.py +12 -11
- trilogy/core/processing/node_generators/unnest_node.py +9 -7
- trilogy/core/processing/node_generators/window_node.py +19 -16
- trilogy/core/processing/nodes/__init__.py +21 -18
- trilogy/core/processing/nodes/base_node.py +82 -66
- trilogy/core/processing/nodes/filter_node.py +19 -13
- trilogy/core/processing/nodes/group_node.py +50 -35
- trilogy/core/processing/nodes/merge_node.py +45 -36
- trilogy/core/processing/nodes/select_node_v2.py +53 -39
- trilogy/core/processing/nodes/union_node.py +5 -7
- trilogy/core/processing/nodes/unnest_node.py +7 -11
- trilogy/core/processing/nodes/window_node.py +9 -4
- trilogy/core/processing/utility.py +103 -75
- trilogy/core/query_processor.py +65 -47
- trilogy/core/statements/__init__.py +0 -0
- trilogy/core/statements/author.py +413 -0
- trilogy/core/statements/build.py +0 -0
- trilogy/core/statements/common.py +30 -0
- trilogy/core/statements/execute.py +42 -0
- trilogy/dialect/base.py +146 -106
- trilogy/dialect/common.py +9 -10
- trilogy/dialect/duckdb.py +1 -1
- trilogy/dialect/enums.py +4 -2
- trilogy/dialect/presto.py +1 -1
- trilogy/dialect/sql_server.py +1 -1
- trilogy/executor.py +44 -32
- trilogy/hooks/base_hook.py +6 -4
- trilogy/hooks/query_debugger.py +110 -93
- trilogy/parser.py +1 -1
- trilogy/parsing/common.py +303 -64
- trilogy/parsing/parse_engine.py +263 -617
- trilogy/parsing/render.py +50 -26
- trilogy/scripts/trilogy.py +2 -1
- pytrilogy-0.0.2.58.dist-info/RECORD +0 -87
- trilogy/core/models.py +0 -4960
- {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.0.dist-info}/LICENSE.md +0 -0
- {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.0.dist-info}/entry_points.txt +0 -0
- {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,696 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import difflib
|
|
4
|
+
import os
|
|
5
|
+
from collections import defaultdict
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import (
|
|
9
|
+
TYPE_CHECKING,
|
|
10
|
+
Annotated,
|
|
11
|
+
Dict,
|
|
12
|
+
ItemsView,
|
|
13
|
+
List,
|
|
14
|
+
Never,
|
|
15
|
+
Optional,
|
|
16
|
+
Tuple,
|
|
17
|
+
ValuesView,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
from lark.tree import Meta
|
|
21
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
22
|
+
from pydantic.functional_validators import PlainValidator
|
|
23
|
+
|
|
24
|
+
from trilogy.constants import DEFAULT_NAMESPACE, ENV_CACHE_NAME, logger
|
|
25
|
+
from trilogy.core.constants import INTERNAL_NAMESPACE, PERSISTED_CONCEPT_PREFIX
|
|
26
|
+
from trilogy.core.enums import (
|
|
27
|
+
ConceptSource,
|
|
28
|
+
Derivation,
|
|
29
|
+
FunctionType,
|
|
30
|
+
Granularity,
|
|
31
|
+
Modifier,
|
|
32
|
+
Purpose,
|
|
33
|
+
)
|
|
34
|
+
from trilogy.core.exceptions import (
|
|
35
|
+
FrozenEnvironmentException,
|
|
36
|
+
UndefinedConceptException,
|
|
37
|
+
)
|
|
38
|
+
from trilogy.core.models.author import (
|
|
39
|
+
Concept,
|
|
40
|
+
ConceptRef,
|
|
41
|
+
Function,
|
|
42
|
+
UndefinedConcept,
|
|
43
|
+
UndefinedConceptFull,
|
|
44
|
+
address_with_namespace,
|
|
45
|
+
)
|
|
46
|
+
from trilogy.core.models.core import DataType
|
|
47
|
+
from trilogy.core.models.datasource import Datasource, EnvironmentDatasourceDict
|
|
48
|
+
|
|
49
|
+
if TYPE_CHECKING:
|
|
50
|
+
from trilogy.core.models.build import BuildConcept, BuildEnvironment
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@dataclass
|
|
54
|
+
class Import:
|
|
55
|
+
alias: str
|
|
56
|
+
path: Path
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class EnvironmentOptions(BaseModel):
|
|
60
|
+
allow_duplicate_declaration: bool = True
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class EnvironmentConceptDict(dict):
|
|
64
|
+
def __init__(self, *args, **kwargs) -> None:
|
|
65
|
+
super().__init__(self, *args, **kwargs)
|
|
66
|
+
self.undefined: dict[str, UndefinedConceptFull] = {}
|
|
67
|
+
self.fail_on_missing: bool = True
|
|
68
|
+
self.populate_default_concepts()
|
|
69
|
+
|
|
70
|
+
def duplicate(self) -> "EnvironmentConceptDict":
|
|
71
|
+
new = EnvironmentConceptDict()
|
|
72
|
+
new.update({k: v.duplicate() for k, v in self.items()})
|
|
73
|
+
new.undefined = self.undefined
|
|
74
|
+
new.fail_on_missing = self.fail_on_missing
|
|
75
|
+
return new
|
|
76
|
+
|
|
77
|
+
def populate_default_concepts(self):
|
|
78
|
+
from trilogy.core.internal import DEFAULT_CONCEPTS
|
|
79
|
+
|
|
80
|
+
for concept in DEFAULT_CONCEPTS.values():
|
|
81
|
+
self[concept.address] = concept
|
|
82
|
+
|
|
83
|
+
def values(self) -> ValuesView[Concept]: # type: ignore
|
|
84
|
+
return super().values()
|
|
85
|
+
|
|
86
|
+
def get(self, key: str, default: Concept | None = None) -> Concept | None: # type: ignore
|
|
87
|
+
try:
|
|
88
|
+
return self.__getitem__(key)
|
|
89
|
+
except UndefinedConceptException:
|
|
90
|
+
return default
|
|
91
|
+
|
|
92
|
+
def raise_undefined(
|
|
93
|
+
self, key: str, line_no: int | None = None, file: Path | str | None = None
|
|
94
|
+
) -> Never:
|
|
95
|
+
|
|
96
|
+
matches = self._find_similar_concepts(key)
|
|
97
|
+
message = f"Undefined concept: {key}."
|
|
98
|
+
if matches:
|
|
99
|
+
message += f" Suggestions: {matches}"
|
|
100
|
+
|
|
101
|
+
if line_no:
|
|
102
|
+
if file:
|
|
103
|
+
raise UndefinedConceptException(
|
|
104
|
+
f"{file}: {line_no}: " + message, matches
|
|
105
|
+
)
|
|
106
|
+
raise UndefinedConceptException(f"line: {line_no}: " + message, matches)
|
|
107
|
+
raise UndefinedConceptException(message, matches)
|
|
108
|
+
|
|
109
|
+
def __getitem__(
|
|
110
|
+
self, key: str, line_no: int | None = None, file: Path | None = None
|
|
111
|
+
) -> Concept | UndefinedConceptFull:
|
|
112
|
+
if isinstance(key, ConceptRef):
|
|
113
|
+
return self.__getitem__(key.address, line_no=line_no, file=file)
|
|
114
|
+
try:
|
|
115
|
+
return super(EnvironmentConceptDict, self).__getitem__(key)
|
|
116
|
+
except KeyError:
|
|
117
|
+
if "." in key and key.split(".", 1)[0] == DEFAULT_NAMESPACE:
|
|
118
|
+
return self.__getitem__(key.split(".", 1)[1], line_no)
|
|
119
|
+
if DEFAULT_NAMESPACE + "." + key in self:
|
|
120
|
+
return self.__getitem__(DEFAULT_NAMESPACE + "." + key, line_no)
|
|
121
|
+
if not self.fail_on_missing:
|
|
122
|
+
if "." in key:
|
|
123
|
+
ns, rest = key.rsplit(".", 1)
|
|
124
|
+
else:
|
|
125
|
+
ns = DEFAULT_NAMESPACE
|
|
126
|
+
rest = key
|
|
127
|
+
if key in self.undefined:
|
|
128
|
+
return self.undefined[key]
|
|
129
|
+
undefined = UndefinedConceptFull(
|
|
130
|
+
line_no=line_no,
|
|
131
|
+
datatype=DataType.UNKNOWN,
|
|
132
|
+
name=rest,
|
|
133
|
+
purpose=Purpose.UNKNOWN,
|
|
134
|
+
namespace=ns,
|
|
135
|
+
)
|
|
136
|
+
self.undefined[key] = undefined
|
|
137
|
+
return undefined
|
|
138
|
+
self.raise_undefined(key, line_no, file)
|
|
139
|
+
|
|
140
|
+
def _find_similar_concepts(self, concept_name: str):
|
|
141
|
+
def strip_local(input: str):
|
|
142
|
+
if input.startswith(f"{DEFAULT_NAMESPACE}."):
|
|
143
|
+
return input[len(DEFAULT_NAMESPACE) + 1 :]
|
|
144
|
+
return input
|
|
145
|
+
|
|
146
|
+
matches = difflib.get_close_matches(
|
|
147
|
+
strip_local(concept_name), [strip_local(x) for x in self.keys()]
|
|
148
|
+
)
|
|
149
|
+
return matches
|
|
150
|
+
|
|
151
|
+
def items(self) -> ItemsView[str, Concept]: # type: ignore
|
|
152
|
+
return super().items()
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def validate_concepts(v) -> EnvironmentConceptDict:
|
|
156
|
+
if isinstance(v, EnvironmentConceptDict):
|
|
157
|
+
return v
|
|
158
|
+
elif isinstance(v, dict):
|
|
159
|
+
return EnvironmentConceptDict(
|
|
160
|
+
**{x: Concept.model_validate(y) for x, y in v.items()}
|
|
161
|
+
)
|
|
162
|
+
raise ValueError
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def validate_datasources(v) -> EnvironmentDatasourceDict:
|
|
166
|
+
if isinstance(v, EnvironmentDatasourceDict):
|
|
167
|
+
return v
|
|
168
|
+
elif isinstance(v, dict):
|
|
169
|
+
return EnvironmentDatasourceDict(
|
|
170
|
+
**{x: Datasource.model_validate(y) for x, y in v.items()}
|
|
171
|
+
)
|
|
172
|
+
raise ValueError
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def get_version():
|
|
176
|
+
from trilogy import __version__
|
|
177
|
+
|
|
178
|
+
return __version__
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
class Environment(BaseModel):
|
|
182
|
+
model_config = ConfigDict(arbitrary_types_allowed=True, strict=False)
|
|
183
|
+
|
|
184
|
+
concepts: Annotated[EnvironmentConceptDict, PlainValidator(validate_concepts)] = (
|
|
185
|
+
Field(default_factory=EnvironmentConceptDict)
|
|
186
|
+
)
|
|
187
|
+
datasources: Annotated[
|
|
188
|
+
EnvironmentDatasourceDict, PlainValidator(validate_datasources)
|
|
189
|
+
] = Field(default_factory=EnvironmentDatasourceDict)
|
|
190
|
+
functions: Dict[str, Function] = Field(default_factory=dict)
|
|
191
|
+
data_types: Dict[str, DataType] = Field(default_factory=dict)
|
|
192
|
+
imports: Dict[str, list[Import]] = Field(
|
|
193
|
+
default_factory=lambda: defaultdict(list) # type: ignore
|
|
194
|
+
)
|
|
195
|
+
namespace: str = DEFAULT_NAMESPACE
|
|
196
|
+
working_path: str | Path = Field(default_factory=lambda: os.getcwd())
|
|
197
|
+
environment_config: EnvironmentOptions = Field(default_factory=EnvironmentOptions)
|
|
198
|
+
version: str = Field(default_factory=get_version)
|
|
199
|
+
cte_name_map: Dict[str, str] = Field(default_factory=dict)
|
|
200
|
+
materialized_concepts: set[str] = Field(default_factory=set)
|
|
201
|
+
alias_origin_lookup: Dict[str, Concept] = Field(default_factory=dict)
|
|
202
|
+
# TODO: support freezing environments to avoid mutation
|
|
203
|
+
frozen: bool = False
|
|
204
|
+
env_file_path: Path | None = None
|
|
205
|
+
|
|
206
|
+
def freeze(self):
|
|
207
|
+
self.frozen = True
|
|
208
|
+
|
|
209
|
+
def thaw(self):
|
|
210
|
+
self.frozen = False
|
|
211
|
+
|
|
212
|
+
def materialize_for_select(
|
|
213
|
+
self, local_concepts: dict[str, "BuildConcept"] | None = None
|
|
214
|
+
) -> "BuildEnvironment":
|
|
215
|
+
"""helper method"""
|
|
216
|
+
from trilogy.core.models.build import Factory
|
|
217
|
+
|
|
218
|
+
return Factory(self, local_concepts=local_concepts).build(self)
|
|
219
|
+
|
|
220
|
+
def duplicate(self):
|
|
221
|
+
return Environment.model_construct(
|
|
222
|
+
datasources=self.datasources.duplicate(),
|
|
223
|
+
concepts=self.concepts.duplicate(),
|
|
224
|
+
functions=dict(self.functions),
|
|
225
|
+
data_types=dict(self.data_types),
|
|
226
|
+
imports=dict(self.imports),
|
|
227
|
+
namespace=self.namespace,
|
|
228
|
+
working_path=self.working_path,
|
|
229
|
+
environment_config=self.environment_config,
|
|
230
|
+
version=self.version,
|
|
231
|
+
cte_name_map=dict(self.cte_name_map),
|
|
232
|
+
materialized_concepts=set(self.materialized_concepts),
|
|
233
|
+
alias_origin_lookup={
|
|
234
|
+
k: v.duplicate() for k, v in self.alias_origin_lookup.items()
|
|
235
|
+
},
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
def _add_path_concepts(self):
|
|
239
|
+
concept = Concept(
|
|
240
|
+
name="_env_working_path",
|
|
241
|
+
namespace=self.namespace,
|
|
242
|
+
lineage=Function(
|
|
243
|
+
operator=FunctionType.CONSTANT,
|
|
244
|
+
arguments=[str(self.working_path)],
|
|
245
|
+
output_datatype=DataType.STRING,
|
|
246
|
+
output_purpose=Purpose.CONSTANT,
|
|
247
|
+
),
|
|
248
|
+
datatype=DataType.STRING,
|
|
249
|
+
granularity=Granularity.SINGLE_ROW,
|
|
250
|
+
derivation=Derivation.CONSTANT,
|
|
251
|
+
purpose=Purpose.CONSTANT,
|
|
252
|
+
)
|
|
253
|
+
self.add_concept(concept)
|
|
254
|
+
|
|
255
|
+
def __init__(self, **data):
|
|
256
|
+
super().__init__(**data)
|
|
257
|
+
self._add_path_concepts()
|
|
258
|
+
|
|
259
|
+
@classmethod
|
|
260
|
+
def from_file(cls, path: str | Path) -> "Environment":
|
|
261
|
+
if isinstance(path, str):
|
|
262
|
+
path = Path(path)
|
|
263
|
+
with open(path, "r") as f:
|
|
264
|
+
read = f.read()
|
|
265
|
+
return Environment(working_path=path.parent, env_file_path=path).parse(read)[0]
|
|
266
|
+
|
|
267
|
+
@classmethod
|
|
268
|
+
def from_string(cls, input: str) -> "Environment":
|
|
269
|
+
return Environment().parse(input)[0]
|
|
270
|
+
|
|
271
|
+
@classmethod
|
|
272
|
+
def from_cache(cls, path) -> Optional["Environment"]:
|
|
273
|
+
with open(path, "r") as f:
|
|
274
|
+
read = f.read()
|
|
275
|
+
base = cls.model_validate_json(read)
|
|
276
|
+
version = get_version()
|
|
277
|
+
if base.version != version:
|
|
278
|
+
return None
|
|
279
|
+
return base
|
|
280
|
+
|
|
281
|
+
def to_cache(self, path: Optional[str | Path] = None) -> Path:
|
|
282
|
+
if not path:
|
|
283
|
+
ppath = Path(self.working_path) / ENV_CACHE_NAME
|
|
284
|
+
else:
|
|
285
|
+
ppath = Path(path)
|
|
286
|
+
with open(ppath, "w") as f:
|
|
287
|
+
f.write(self.model_dump_json())
|
|
288
|
+
return ppath
|
|
289
|
+
|
|
290
|
+
def validate_concept(self, new_concept: Concept, meta: Meta | None = None):
|
|
291
|
+
lookup = new_concept.address
|
|
292
|
+
existing: Concept = self.concepts.get(lookup) # type: ignore
|
|
293
|
+
if not existing or isinstance(existing, UndefinedConcept):
|
|
294
|
+
return
|
|
295
|
+
|
|
296
|
+
def handle_persist():
|
|
297
|
+
deriv_lookup = (
|
|
298
|
+
f"{existing.namespace}.{PERSISTED_CONCEPT_PREFIX}_{existing.name}"
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
alt_source = self.alias_origin_lookup.get(deriv_lookup)
|
|
302
|
+
if not alt_source:
|
|
303
|
+
return None
|
|
304
|
+
# del self.alias_origin_lookup[deriv_lookup]
|
|
305
|
+
# del self.concepts[deriv_lookup]
|
|
306
|
+
# if the new concept binding has no lineage
|
|
307
|
+
# nothing to cause us to think a persist binding
|
|
308
|
+
# needs to be invalidated
|
|
309
|
+
if not new_concept.lineage:
|
|
310
|
+
return existing
|
|
311
|
+
if str(alt_source.lineage) == str(new_concept.lineage):
|
|
312
|
+
logger.info(
|
|
313
|
+
f"Persisted concept {existing.address} matched redeclaration, keeping current persistence binding."
|
|
314
|
+
)
|
|
315
|
+
return existing
|
|
316
|
+
logger.warning(
|
|
317
|
+
f"Persisted concept {existing.address} lineage {str(alt_source.lineage)} did not match redeclaration {str(new_concept.lineage)}, overwriting and invalidating persist binding."
|
|
318
|
+
)
|
|
319
|
+
for k, datasource in self.datasources.items():
|
|
320
|
+
if existing.address in datasource.output_concepts:
|
|
321
|
+
logger.warning(
|
|
322
|
+
f"Removed concept for {existing} assignment from {k}"
|
|
323
|
+
)
|
|
324
|
+
clen = len(datasource.columns)
|
|
325
|
+
datasource.columns = [
|
|
326
|
+
x
|
|
327
|
+
for x in datasource.columns
|
|
328
|
+
if x.concept.address != existing.address
|
|
329
|
+
and x.concept.address != deriv_lookup
|
|
330
|
+
]
|
|
331
|
+
assert len(datasource.columns) < clen
|
|
332
|
+
for x in datasource.columns:
|
|
333
|
+
logger.info(x)
|
|
334
|
+
|
|
335
|
+
return None
|
|
336
|
+
|
|
337
|
+
if existing and self.environment_config.allow_duplicate_declaration:
|
|
338
|
+
if existing.metadata.concept_source == ConceptSource.PERSIST_STATEMENT:
|
|
339
|
+
return handle_persist()
|
|
340
|
+
return
|
|
341
|
+
elif existing.metadata:
|
|
342
|
+
if existing.metadata.concept_source == ConceptSource.PERSIST_STATEMENT:
|
|
343
|
+
return handle_persist()
|
|
344
|
+
# if the existing concept is auto derived, we can overwrite it
|
|
345
|
+
if existing.metadata.concept_source == ConceptSource.AUTO_DERIVED:
|
|
346
|
+
return None
|
|
347
|
+
elif meta and existing.metadata:
|
|
348
|
+
raise ValueError(
|
|
349
|
+
f"Assignment to concept '{lookup}' on line {meta.line} is a duplicate"
|
|
350
|
+
f" declaration; '{lookup}' was originally defined on line"
|
|
351
|
+
f" {existing.metadata.line_number}"
|
|
352
|
+
)
|
|
353
|
+
elif existing.metadata:
|
|
354
|
+
raise ValueError(
|
|
355
|
+
f"Assignment to concept '{lookup}' is a duplicate declaration;"
|
|
356
|
+
f" '{lookup}' was originally defined on line"
|
|
357
|
+
f" {existing.metadata.line_number}"
|
|
358
|
+
)
|
|
359
|
+
raise ValueError(
|
|
360
|
+
f"Assignment to concept '{lookup}' is a duplicate declaration;"
|
|
361
|
+
)
|
|
362
|
+
|
|
363
|
+
def add_import(
|
|
364
|
+
self, alias: str, source: Environment, imp_stm: Import | None = None
|
|
365
|
+
):
|
|
366
|
+
if self.frozen:
|
|
367
|
+
raise ValueError("Environment is frozen, cannot add imports")
|
|
368
|
+
exists = False
|
|
369
|
+
existing = self.imports[alias]
|
|
370
|
+
if imp_stm:
|
|
371
|
+
if any(
|
|
372
|
+
[x.path == imp_stm.path and x.alias == imp_stm.alias for x in existing]
|
|
373
|
+
):
|
|
374
|
+
exists = True
|
|
375
|
+
else:
|
|
376
|
+
if any(
|
|
377
|
+
[x.path == source.working_path and x.alias == alias for x in existing]
|
|
378
|
+
):
|
|
379
|
+
exists = True
|
|
380
|
+
imp_stm = Import(alias=alias, path=Path(source.working_path))
|
|
381
|
+
same_namespace = alias == self.namespace
|
|
382
|
+
|
|
383
|
+
if not exists:
|
|
384
|
+
self.imports[alias].append(imp_stm)
|
|
385
|
+
# we can't exit early
|
|
386
|
+
# as there may be new concepts
|
|
387
|
+
for k, concept in source.concepts.items():
|
|
388
|
+
|
|
389
|
+
# skip internal namespace
|
|
390
|
+
if INTERNAL_NAMESPACE in concept.address:
|
|
391
|
+
continue
|
|
392
|
+
if same_namespace:
|
|
393
|
+
new = self.add_concept(concept)
|
|
394
|
+
else:
|
|
395
|
+
new = self.add_concept(concept.with_namespace(alias))
|
|
396
|
+
|
|
397
|
+
k = address_with_namespace(k, alias)
|
|
398
|
+
# set this explicitly, to handle aliasing
|
|
399
|
+
self.concepts[k] = new
|
|
400
|
+
|
|
401
|
+
for _, datasource in source.datasources.items():
|
|
402
|
+
if same_namespace:
|
|
403
|
+
self.add_datasource(datasource)
|
|
404
|
+
else:
|
|
405
|
+
self.add_datasource(datasource.with_namespace(alias))
|
|
406
|
+
for key, val in source.alias_origin_lookup.items():
|
|
407
|
+
|
|
408
|
+
if same_namespace:
|
|
409
|
+
self.alias_origin_lookup[key] = val
|
|
410
|
+
else:
|
|
411
|
+
self.alias_origin_lookup[address_with_namespace(key, alias)] = (
|
|
412
|
+
val.with_namespace(alias)
|
|
413
|
+
)
|
|
414
|
+
|
|
415
|
+
return self
|
|
416
|
+
|
|
417
|
+
def add_file_import(
|
|
418
|
+
self, path: str | Path, alias: str, env: "Environment" | None = None
|
|
419
|
+
):
|
|
420
|
+
if self.frozen:
|
|
421
|
+
raise ValueError("Environment is frozen, cannot add imports")
|
|
422
|
+
from trilogy.parsing.parse_engine import (
|
|
423
|
+
PARSER,
|
|
424
|
+
ParseToObjects,
|
|
425
|
+
gen_cache_lookup,
|
|
426
|
+
)
|
|
427
|
+
|
|
428
|
+
if isinstance(path, str):
|
|
429
|
+
if path.endswith(".preql"):
|
|
430
|
+
path = path.rsplit(".", 1)[0]
|
|
431
|
+
if "." not in path:
|
|
432
|
+
target = Path(self.working_path, path)
|
|
433
|
+
else:
|
|
434
|
+
target = Path(self.working_path, *path.split("."))
|
|
435
|
+
target = target.with_suffix(".preql")
|
|
436
|
+
else:
|
|
437
|
+
target = path
|
|
438
|
+
if not env:
|
|
439
|
+
parse_address = gen_cache_lookup(str(target), alias, str(self.working_path))
|
|
440
|
+
try:
|
|
441
|
+
with open(target, "r", encoding="utf-8") as f:
|
|
442
|
+
text = f.read()
|
|
443
|
+
nenv = Environment(
|
|
444
|
+
working_path=target.parent,
|
|
445
|
+
)
|
|
446
|
+
nenv.concepts.fail_on_missing = False
|
|
447
|
+
nparser = ParseToObjects(
|
|
448
|
+
environment=Environment(
|
|
449
|
+
working_path=target.parent,
|
|
450
|
+
),
|
|
451
|
+
parse_address=parse_address,
|
|
452
|
+
token_address=target,
|
|
453
|
+
)
|
|
454
|
+
nparser.set_text(text)
|
|
455
|
+
nparser.transform(PARSER.parse(text))
|
|
456
|
+
nparser.hydrate_missing()
|
|
457
|
+
|
|
458
|
+
except Exception as e:
|
|
459
|
+
raise ImportError(
|
|
460
|
+
f"Unable to import file {target.parent}, parsing error: {e}"
|
|
461
|
+
)
|
|
462
|
+
env = nparser.environment
|
|
463
|
+
imps = Import(alias=alias, path=target)
|
|
464
|
+
self.add_import(alias, source=env, imp_stm=imps)
|
|
465
|
+
return imps
|
|
466
|
+
|
|
467
|
+
def parse(
|
|
468
|
+
self, input: str, namespace: str | None = None, persist: bool = False
|
|
469
|
+
) -> Tuple["Environment", list]:
|
|
470
|
+
from trilogy import parse
|
|
471
|
+
from trilogy.core.query_processor import process_persist
|
|
472
|
+
from trilogy.core.statements.author import (
|
|
473
|
+
MultiSelectStatement,
|
|
474
|
+
PersistStatement,
|
|
475
|
+
SelectStatement,
|
|
476
|
+
ShowStatement,
|
|
477
|
+
)
|
|
478
|
+
|
|
479
|
+
if namespace:
|
|
480
|
+
new = Environment()
|
|
481
|
+
_, queries = new.parse(input)
|
|
482
|
+
self.add_import(namespace, new)
|
|
483
|
+
return self, queries
|
|
484
|
+
_, queries = parse(input, self)
|
|
485
|
+
generatable = [
|
|
486
|
+
x
|
|
487
|
+
for x in queries
|
|
488
|
+
if isinstance(
|
|
489
|
+
x,
|
|
490
|
+
(
|
|
491
|
+
SelectStatement,
|
|
492
|
+
PersistStatement,
|
|
493
|
+
MultiSelectStatement,
|
|
494
|
+
ShowStatement,
|
|
495
|
+
),
|
|
496
|
+
)
|
|
497
|
+
]
|
|
498
|
+
while generatable:
|
|
499
|
+
t = generatable.pop(0)
|
|
500
|
+
if isinstance(t, PersistStatement) and persist:
|
|
501
|
+
processed = process_persist(self, t)
|
|
502
|
+
self.add_datasource(processed.datasource)
|
|
503
|
+
return self, queries
|
|
504
|
+
|
|
505
|
+
def add_concept(
|
|
506
|
+
self,
|
|
507
|
+
concept: Concept,
|
|
508
|
+
meta: Meta | None = None,
|
|
509
|
+
force: bool = False,
|
|
510
|
+
add_derived: bool = True,
|
|
511
|
+
):
|
|
512
|
+
|
|
513
|
+
if self.frozen:
|
|
514
|
+
raise FrozenEnvironmentException(
|
|
515
|
+
"Environment is frozen, cannot add concepts"
|
|
516
|
+
)
|
|
517
|
+
if not force:
|
|
518
|
+
existing = self.validate_concept(concept, meta=meta)
|
|
519
|
+
if existing:
|
|
520
|
+
concept = existing
|
|
521
|
+
self.concepts[concept.address] = concept
|
|
522
|
+
|
|
523
|
+
from trilogy.core.environment_helpers import generate_related_concepts
|
|
524
|
+
|
|
525
|
+
generate_related_concepts(concept, self, meta=meta, add_derived=add_derived)
|
|
526
|
+
|
|
527
|
+
return concept
|
|
528
|
+
|
|
529
|
+
def add_datasource(
|
|
530
|
+
self,
|
|
531
|
+
datasource: Datasource,
|
|
532
|
+
meta: Meta | None = None,
|
|
533
|
+
):
|
|
534
|
+
if self.frozen:
|
|
535
|
+
raise FrozenEnvironmentException(
|
|
536
|
+
"Environment is frozen, cannot add datasource"
|
|
537
|
+
)
|
|
538
|
+
self.datasources[datasource.identifier] = datasource
|
|
539
|
+
|
|
540
|
+
eligible_to_promote_roots = datasource.non_partial_for is None
|
|
541
|
+
# mark this as canonical source
|
|
542
|
+
for c in datasource.columns:
|
|
543
|
+
cref = c.concept
|
|
544
|
+
if cref.address not in self.concepts:
|
|
545
|
+
continue
|
|
546
|
+
new_persisted_concept = self.concepts[cref.address]
|
|
547
|
+
if isinstance(new_persisted_concept, UndefinedConcept):
|
|
548
|
+
continue
|
|
549
|
+
if not eligible_to_promote_roots:
|
|
550
|
+
continue
|
|
551
|
+
|
|
552
|
+
current_derivation = new_persisted_concept.derivation
|
|
553
|
+
# TODO: refine this section;
|
|
554
|
+
# too hacky for maintainability
|
|
555
|
+
if current_derivation not in (Derivation.ROOT, Derivation.CONSTANT):
|
|
556
|
+
logger.info(
|
|
557
|
+
f"A datasource has been added which will persist derived concept {new_persisted_concept.address}"
|
|
558
|
+
)
|
|
559
|
+
persisted = f"{PERSISTED_CONCEPT_PREFIX}_" + new_persisted_concept.name
|
|
560
|
+
# override the current concept source to reflect that it's now coming from a datasource
|
|
561
|
+
if (
|
|
562
|
+
new_persisted_concept.metadata.concept_source
|
|
563
|
+
!= ConceptSource.PERSIST_STATEMENT
|
|
564
|
+
):
|
|
565
|
+
original_concept = new_persisted_concept.model_copy(
|
|
566
|
+
deep=True,
|
|
567
|
+
update={
|
|
568
|
+
"name": persisted,
|
|
569
|
+
},
|
|
570
|
+
)
|
|
571
|
+
self.add_concept(
|
|
572
|
+
original_concept,
|
|
573
|
+
meta=meta,
|
|
574
|
+
force=True,
|
|
575
|
+
)
|
|
576
|
+
new_persisted_concept = new_persisted_concept.model_copy(
|
|
577
|
+
deep=True,
|
|
578
|
+
update={
|
|
579
|
+
"lineage": None,
|
|
580
|
+
"metadata": new_persisted_concept.metadata.model_copy(
|
|
581
|
+
update={
|
|
582
|
+
"concept_source": ConceptSource.PERSIST_STATEMENT
|
|
583
|
+
}
|
|
584
|
+
),
|
|
585
|
+
"derivation": Derivation.ROOT,
|
|
586
|
+
},
|
|
587
|
+
)
|
|
588
|
+
self.add_concept(
|
|
589
|
+
new_persisted_concept,
|
|
590
|
+
meta=meta,
|
|
591
|
+
force=True,
|
|
592
|
+
)
|
|
593
|
+
# datasource.add_column(original_concept, alias=c.alias, modifiers = c.modifiers)
|
|
594
|
+
self.merge_concept(original_concept, new_persisted_concept, [])
|
|
595
|
+
else:
|
|
596
|
+
self.add_concept(
|
|
597
|
+
new_persisted_concept,
|
|
598
|
+
meta=meta,
|
|
599
|
+
)
|
|
600
|
+
return datasource
|
|
601
|
+
|
|
602
|
+
def delete_datasource(
|
|
603
|
+
self,
|
|
604
|
+
address: str,
|
|
605
|
+
meta: Meta | None = None,
|
|
606
|
+
) -> bool:
|
|
607
|
+
if self.frozen:
|
|
608
|
+
raise ValueError("Environment is frozen, cannot delete datsources")
|
|
609
|
+
if address in self.datasources:
|
|
610
|
+
del self.datasources[address]
|
|
611
|
+
# self.gen_concept_list_caches()
|
|
612
|
+
return True
|
|
613
|
+
return False
|
|
614
|
+
|
|
615
|
+
def merge_concept(
|
|
616
|
+
self,
|
|
617
|
+
source: Concept,
|
|
618
|
+
target: Concept,
|
|
619
|
+
modifiers: List[Modifier],
|
|
620
|
+
force: bool = False,
|
|
621
|
+
) -> bool:
|
|
622
|
+
from trilogy.core.models.build import BuildConcept
|
|
623
|
+
|
|
624
|
+
if isinstance(source, BuildConcept):
|
|
625
|
+
raise SyntaxError(source)
|
|
626
|
+
elif isinstance(target, BuildConcept):
|
|
627
|
+
raise SyntaxError(target)
|
|
628
|
+
if self.frozen:
|
|
629
|
+
raise ValueError("Environment is frozen, cannot merge concepts")
|
|
630
|
+
replacements = {}
|
|
631
|
+
|
|
632
|
+
# exit early if we've run this
|
|
633
|
+
if source.address in self.alias_origin_lookup and not force:
|
|
634
|
+
if self.concepts[source.address] == target:
|
|
635
|
+
return False
|
|
636
|
+
|
|
637
|
+
self.alias_origin_lookup[source.address] = source
|
|
638
|
+
self.alias_origin_lookup[source.address].pseudonyms.add(target.address)
|
|
639
|
+
for k, v in self.concepts.items():
|
|
640
|
+
|
|
641
|
+
if v.address == target.address:
|
|
642
|
+
if source.address != target.address:
|
|
643
|
+
v.pseudonyms.add(source.address)
|
|
644
|
+
|
|
645
|
+
if v.address == source.address:
|
|
646
|
+
replacements[k] = target
|
|
647
|
+
# we need to update keys and grains of all concepts
|
|
648
|
+
else:
|
|
649
|
+
replacements[k] = v.with_merge(source, target, modifiers)
|
|
650
|
+
self.concepts.update(replacements)
|
|
651
|
+
for k, ds in self.datasources.items():
|
|
652
|
+
if source.address in ds.output_lcl:
|
|
653
|
+
ds.merge_concept(source, target, modifiers=modifiers)
|
|
654
|
+
return True
|
|
655
|
+
|
|
656
|
+
|
|
657
|
+
class LazyEnvironment(Environment):
|
|
658
|
+
"""Variant of environment to defer parsing of a path
|
|
659
|
+
until relevant attributes accessed."""
|
|
660
|
+
|
|
661
|
+
load_path: Path
|
|
662
|
+
loaded: bool = False
|
|
663
|
+
|
|
664
|
+
def __init__(self, **data):
|
|
665
|
+
super().__init__(**data)
|
|
666
|
+
|
|
667
|
+
def _add_path_concepts(self):
|
|
668
|
+
pass
|
|
669
|
+
|
|
670
|
+
def __getattribute__(self, name):
|
|
671
|
+
if name in (
|
|
672
|
+
"load_path",
|
|
673
|
+
"loaded",
|
|
674
|
+
"working_path",
|
|
675
|
+
"model_config",
|
|
676
|
+
"model_fields",
|
|
677
|
+
"model_post_init",
|
|
678
|
+
) or name.startswith("_"):
|
|
679
|
+
return super().__getattribute__(name)
|
|
680
|
+
if not self.loaded:
|
|
681
|
+
logger.info(
|
|
682
|
+
f"lazily evaluating load path {self.load_path} to access {name}"
|
|
683
|
+
)
|
|
684
|
+
from trilogy import parse
|
|
685
|
+
|
|
686
|
+
env = Environment(working_path=str(self.working_path))
|
|
687
|
+
with open(self.load_path, "r") as f:
|
|
688
|
+
parse(f.read(), env)
|
|
689
|
+
self.loaded = True
|
|
690
|
+
self.datasources = env.datasources
|
|
691
|
+
self.concepts = env.concepts
|
|
692
|
+
self.imports = env.imports
|
|
693
|
+
return super().__getattribute__(name)
|
|
694
|
+
|
|
695
|
+
|
|
696
|
+
Environment.model_rebuild()
|