pytrilogy 0.3.138__cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- LICENSE.md +19 -0
- _preql_import_resolver/__init__.py +5 -0
- _preql_import_resolver/_preql_import_resolver.cpython-311-x86_64-linux-gnu.so +0 -0
- pytrilogy-0.3.138.dist-info/METADATA +525 -0
- pytrilogy-0.3.138.dist-info/RECORD +182 -0
- pytrilogy-0.3.138.dist-info/WHEEL +5 -0
- pytrilogy-0.3.138.dist-info/entry_points.txt +2 -0
- pytrilogy-0.3.138.dist-info/licenses/LICENSE.md +19 -0
- trilogy/__init__.py +9 -0
- trilogy/ai/README.md +10 -0
- trilogy/ai/__init__.py +19 -0
- trilogy/ai/constants.py +92 -0
- trilogy/ai/conversation.py +107 -0
- trilogy/ai/enums.py +7 -0
- trilogy/ai/execute.py +50 -0
- trilogy/ai/models.py +34 -0
- trilogy/ai/prompts.py +87 -0
- trilogy/ai/providers/__init__.py +0 -0
- trilogy/ai/providers/anthropic.py +106 -0
- trilogy/ai/providers/base.py +24 -0
- trilogy/ai/providers/google.py +146 -0
- trilogy/ai/providers/openai.py +89 -0
- trilogy/ai/providers/utils.py +68 -0
- trilogy/authoring/README.md +3 -0
- trilogy/authoring/__init__.py +143 -0
- trilogy/constants.py +113 -0
- trilogy/core/README.md +52 -0
- trilogy/core/__init__.py +0 -0
- trilogy/core/constants.py +6 -0
- trilogy/core/enums.py +443 -0
- trilogy/core/env_processor.py +120 -0
- trilogy/core/environment_helpers.py +320 -0
- trilogy/core/ergonomics.py +193 -0
- trilogy/core/exceptions.py +123 -0
- trilogy/core/functions.py +1227 -0
- trilogy/core/graph_models.py +139 -0
- trilogy/core/internal.py +85 -0
- trilogy/core/models/__init__.py +0 -0
- trilogy/core/models/author.py +2672 -0
- trilogy/core/models/build.py +2521 -0
- trilogy/core/models/build_environment.py +180 -0
- trilogy/core/models/core.py +494 -0
- trilogy/core/models/datasource.py +322 -0
- trilogy/core/models/environment.py +748 -0
- trilogy/core/models/execute.py +1177 -0
- trilogy/core/optimization.py +251 -0
- trilogy/core/optimizations/__init__.py +12 -0
- trilogy/core/optimizations/base_optimization.py +17 -0
- trilogy/core/optimizations/hide_unused_concept.py +47 -0
- trilogy/core/optimizations/inline_datasource.py +102 -0
- trilogy/core/optimizations/predicate_pushdown.py +245 -0
- trilogy/core/processing/README.md +94 -0
- trilogy/core/processing/READMEv2.md +121 -0
- trilogy/core/processing/VIRTUAL_UNNEST.md +30 -0
- trilogy/core/processing/__init__.py +0 -0
- trilogy/core/processing/concept_strategies_v3.py +508 -0
- trilogy/core/processing/constants.py +15 -0
- trilogy/core/processing/discovery_node_factory.py +451 -0
- trilogy/core/processing/discovery_utility.py +517 -0
- trilogy/core/processing/discovery_validation.py +167 -0
- trilogy/core/processing/graph_utils.py +43 -0
- trilogy/core/processing/node_generators/README.md +9 -0
- trilogy/core/processing/node_generators/__init__.py +31 -0
- trilogy/core/processing/node_generators/basic_node.py +160 -0
- trilogy/core/processing/node_generators/common.py +268 -0
- trilogy/core/processing/node_generators/constant_node.py +38 -0
- trilogy/core/processing/node_generators/filter_node.py +315 -0
- trilogy/core/processing/node_generators/group_node.py +213 -0
- trilogy/core/processing/node_generators/group_to_node.py +117 -0
- trilogy/core/processing/node_generators/multiselect_node.py +205 -0
- trilogy/core/processing/node_generators/node_merge_node.py +653 -0
- trilogy/core/processing/node_generators/recursive_node.py +88 -0
- trilogy/core/processing/node_generators/rowset_node.py +165 -0
- trilogy/core/processing/node_generators/select_helpers/__init__.py +0 -0
- trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +261 -0
- trilogy/core/processing/node_generators/select_merge_node.py +748 -0
- trilogy/core/processing/node_generators/select_node.py +95 -0
- trilogy/core/processing/node_generators/synonym_node.py +98 -0
- trilogy/core/processing/node_generators/union_node.py +91 -0
- trilogy/core/processing/node_generators/unnest_node.py +182 -0
- trilogy/core/processing/node_generators/window_node.py +201 -0
- trilogy/core/processing/nodes/README.md +28 -0
- trilogy/core/processing/nodes/__init__.py +179 -0
- trilogy/core/processing/nodes/base_node.py +519 -0
- trilogy/core/processing/nodes/filter_node.py +75 -0
- trilogy/core/processing/nodes/group_node.py +194 -0
- trilogy/core/processing/nodes/merge_node.py +420 -0
- trilogy/core/processing/nodes/recursive_node.py +46 -0
- trilogy/core/processing/nodes/select_node_v2.py +242 -0
- trilogy/core/processing/nodes/union_node.py +53 -0
- trilogy/core/processing/nodes/unnest_node.py +62 -0
- trilogy/core/processing/nodes/window_node.py +56 -0
- trilogy/core/processing/utility.py +823 -0
- trilogy/core/query_processor.py +596 -0
- trilogy/core/statements/README.md +35 -0
- trilogy/core/statements/__init__.py +0 -0
- trilogy/core/statements/author.py +536 -0
- trilogy/core/statements/build.py +0 -0
- trilogy/core/statements/common.py +20 -0
- trilogy/core/statements/execute.py +155 -0
- trilogy/core/table_processor.py +66 -0
- trilogy/core/utility.py +8 -0
- trilogy/core/validation/README.md +46 -0
- trilogy/core/validation/__init__.py +0 -0
- trilogy/core/validation/common.py +161 -0
- trilogy/core/validation/concept.py +146 -0
- trilogy/core/validation/datasource.py +227 -0
- trilogy/core/validation/environment.py +73 -0
- trilogy/core/validation/fix.py +106 -0
- trilogy/dialect/__init__.py +32 -0
- trilogy/dialect/base.py +1359 -0
- trilogy/dialect/bigquery.py +256 -0
- trilogy/dialect/common.py +147 -0
- trilogy/dialect/config.py +144 -0
- trilogy/dialect/dataframe.py +50 -0
- trilogy/dialect/duckdb.py +177 -0
- trilogy/dialect/enums.py +147 -0
- trilogy/dialect/metadata.py +173 -0
- trilogy/dialect/mock.py +190 -0
- trilogy/dialect/postgres.py +91 -0
- trilogy/dialect/presto.py +104 -0
- trilogy/dialect/results.py +89 -0
- trilogy/dialect/snowflake.py +90 -0
- trilogy/dialect/sql_server.py +92 -0
- trilogy/engine.py +48 -0
- trilogy/execution/config.py +75 -0
- trilogy/executor.py +568 -0
- trilogy/hooks/__init__.py +4 -0
- trilogy/hooks/base_hook.py +40 -0
- trilogy/hooks/graph_hook.py +139 -0
- trilogy/hooks/query_debugger.py +166 -0
- trilogy/metadata/__init__.py +0 -0
- trilogy/parser.py +10 -0
- trilogy/parsing/README.md +21 -0
- trilogy/parsing/__init__.py +0 -0
- trilogy/parsing/common.py +1069 -0
- trilogy/parsing/config.py +5 -0
- trilogy/parsing/exceptions.py +8 -0
- trilogy/parsing/helpers.py +1 -0
- trilogy/parsing/parse_engine.py +2813 -0
- trilogy/parsing/render.py +750 -0
- trilogy/parsing/trilogy.lark +540 -0
- trilogy/py.typed +0 -0
- trilogy/render.py +42 -0
- trilogy/scripts/README.md +7 -0
- trilogy/scripts/__init__.py +0 -0
- trilogy/scripts/dependency/Cargo.lock +617 -0
- trilogy/scripts/dependency/Cargo.toml +39 -0
- trilogy/scripts/dependency/README.md +131 -0
- trilogy/scripts/dependency/build.sh +25 -0
- trilogy/scripts/dependency/src/directory_resolver.rs +162 -0
- trilogy/scripts/dependency/src/lib.rs +16 -0
- trilogy/scripts/dependency/src/main.rs +770 -0
- trilogy/scripts/dependency/src/parser.rs +435 -0
- trilogy/scripts/dependency/src/preql.pest +208 -0
- trilogy/scripts/dependency/src/python_bindings.rs +289 -0
- trilogy/scripts/dependency/src/resolver.rs +716 -0
- trilogy/scripts/dependency/tests/base.preql +3 -0
- trilogy/scripts/dependency/tests/cli_integration.rs +377 -0
- trilogy/scripts/dependency/tests/customer.preql +6 -0
- trilogy/scripts/dependency/tests/main.preql +9 -0
- trilogy/scripts/dependency/tests/orders.preql +7 -0
- trilogy/scripts/dependency/tests/test_data/base.preql +9 -0
- trilogy/scripts/dependency/tests/test_data/consumer.preql +1 -0
- trilogy/scripts/dependency.py +323 -0
- trilogy/scripts/display.py +460 -0
- trilogy/scripts/environment.py +46 -0
- trilogy/scripts/parallel_execution.py +483 -0
- trilogy/scripts/single_execution.py +131 -0
- trilogy/scripts/trilogy.py +772 -0
- trilogy/std/__init__.py +0 -0
- trilogy/std/color.preql +3 -0
- trilogy/std/date.preql +13 -0
- trilogy/std/display.preql +18 -0
- trilogy/std/geography.preql +22 -0
- trilogy/std/metric.preql +15 -0
- trilogy/std/money.preql +67 -0
- trilogy/std/net.preql +14 -0
- trilogy/std/ranking.preql +7 -0
- trilogy/std/report.preql +5 -0
- trilogy/std/semantic.preql +6 -0
- trilogy/utility.py +34 -0
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import difflib
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Annotated, Dict, ItemsView, Never, ValuesView
|
|
6
|
+
|
|
7
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
8
|
+
from pydantic.functional_validators import PlainValidator
|
|
9
|
+
|
|
10
|
+
from trilogy.constants import DEFAULT_NAMESPACE
|
|
11
|
+
from trilogy.core.exceptions import (
|
|
12
|
+
UndefinedConceptException,
|
|
13
|
+
)
|
|
14
|
+
from trilogy.core.models.build import BuildConcept, BuildDatasource, BuildFunction
|
|
15
|
+
from trilogy.core.models.core import DataType
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class BuildEnvironmentConceptDict(dict):
|
|
19
|
+
def __init__(self, *args, **kwargs) -> None:
|
|
20
|
+
super().__init__(self, *args, **kwargs)
|
|
21
|
+
|
|
22
|
+
def values(self) -> ValuesView[BuildConcept]: # type: ignore
|
|
23
|
+
return super().values()
|
|
24
|
+
|
|
25
|
+
def get(self, key: str, default: BuildConcept | None = None) -> BuildConcept | None: # type: ignore
|
|
26
|
+
try:
|
|
27
|
+
return self.__getitem__(key)
|
|
28
|
+
except UndefinedConceptException:
|
|
29
|
+
return default
|
|
30
|
+
|
|
31
|
+
def raise_undefined(
|
|
32
|
+
self, key: str, line_no: int | None = None, file: Path | str | None = None
|
|
33
|
+
) -> Never:
|
|
34
|
+
# build environment should never check for missing values.
|
|
35
|
+
if line_no is not None:
|
|
36
|
+
message = f"Concept '{key}' not found in environment at line {line_no}."
|
|
37
|
+
else:
|
|
38
|
+
message = f"Concept '{key}' not found in environment."
|
|
39
|
+
raise UndefinedConceptException(message, [])
|
|
40
|
+
|
|
41
|
+
def __getitem__(
|
|
42
|
+
self, key: str, line_no: int | None = None, file: Path | None = None
|
|
43
|
+
) -> BuildConcept:
|
|
44
|
+
try:
|
|
45
|
+
return super(BuildEnvironmentConceptDict, self).__getitem__(key)
|
|
46
|
+
except KeyError:
|
|
47
|
+
if "." in key and key.split(".", 1)[0] == DEFAULT_NAMESPACE:
|
|
48
|
+
return self.__getitem__(key.split(".", 1)[1], line_no)
|
|
49
|
+
if DEFAULT_NAMESPACE + "." + key in self:
|
|
50
|
+
return self.__getitem__(DEFAULT_NAMESPACE + "." + key, line_no)
|
|
51
|
+
self.raise_undefined(key, line_no, file)
|
|
52
|
+
|
|
53
|
+
def _find_similar_concepts(self, concept_name: str):
|
|
54
|
+
def strip_local(input: str):
|
|
55
|
+
if input.startswith(f"{DEFAULT_NAMESPACE}."):
|
|
56
|
+
return input[len(DEFAULT_NAMESPACE) + 1 :]
|
|
57
|
+
return input
|
|
58
|
+
|
|
59
|
+
matches = difflib.get_close_matches(
|
|
60
|
+
strip_local(concept_name), [strip_local(x) for x in self.keys()]
|
|
61
|
+
)
|
|
62
|
+
return matches
|
|
63
|
+
|
|
64
|
+
def items(self) -> ItemsView[str, BuildConcept]: # type: ignore
|
|
65
|
+
return super().items()
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class BuildEnvironmentDatasourceDict(dict):
|
|
69
|
+
def __init__(self, *args, **kwargs) -> None:
|
|
70
|
+
super().__init__(self, *args, **kwargs)
|
|
71
|
+
|
|
72
|
+
def __getitem__(self, key: str) -> BuildDatasource:
|
|
73
|
+
try:
|
|
74
|
+
return super(BuildEnvironmentDatasourceDict, self).__getitem__(key)
|
|
75
|
+
except KeyError:
|
|
76
|
+
if DEFAULT_NAMESPACE + "." + key in self:
|
|
77
|
+
return self.__getitem__(DEFAULT_NAMESPACE + "." + key)
|
|
78
|
+
if "." in key and key.split(".", 1)[0] == DEFAULT_NAMESPACE:
|
|
79
|
+
return self.__getitem__(key.split(".", 1)[1])
|
|
80
|
+
raise
|
|
81
|
+
|
|
82
|
+
def values(self) -> ValuesView[BuildDatasource]: # type: ignore
|
|
83
|
+
return super().values()
|
|
84
|
+
|
|
85
|
+
def items(self) -> ItemsView[str, BuildDatasource]: # type: ignore
|
|
86
|
+
return super().items()
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def validate_concepts(v) -> BuildEnvironmentConceptDict:
|
|
90
|
+
if isinstance(v, BuildEnvironmentConceptDict):
|
|
91
|
+
return v
|
|
92
|
+
elif isinstance(v, dict):
|
|
93
|
+
return BuildEnvironmentConceptDict(**{x: y for x, y in v.items()})
|
|
94
|
+
raise ValueError
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def validate_datasources(v) -> BuildEnvironmentDatasourceDict:
|
|
98
|
+
if isinstance(v, BuildEnvironmentDatasourceDict):
|
|
99
|
+
return v
|
|
100
|
+
elif isinstance(v, dict):
|
|
101
|
+
return BuildEnvironmentDatasourceDict(**{x: y for x, y in v.items()})
|
|
102
|
+
raise ValueError
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
class BuildEnvironment(BaseModel):
|
|
106
|
+
model_config = ConfigDict(arbitrary_types_allowed=True, strict=False)
|
|
107
|
+
|
|
108
|
+
concepts: Annotated[
|
|
109
|
+
BuildEnvironmentConceptDict, PlainValidator(validate_concepts)
|
|
110
|
+
] = Field(default_factory=BuildEnvironmentConceptDict)
|
|
111
|
+
|
|
112
|
+
canonical_concepts: Annotated[
|
|
113
|
+
BuildEnvironmentConceptDict, PlainValidator(validate_concepts)
|
|
114
|
+
] = Field(default_factory=BuildEnvironmentConceptDict)
|
|
115
|
+
|
|
116
|
+
datasources: Annotated[
|
|
117
|
+
BuildEnvironmentDatasourceDict, PlainValidator(validate_datasources)
|
|
118
|
+
] = Field(default_factory=BuildEnvironmentDatasourceDict)
|
|
119
|
+
functions: Dict[str, BuildFunction] = Field(default_factory=dict)
|
|
120
|
+
data_types: Dict[str, DataType] = Field(default_factory=dict)
|
|
121
|
+
namespace: str = DEFAULT_NAMESPACE
|
|
122
|
+
cte_name_map: Dict[str, str] = Field(default_factory=dict)
|
|
123
|
+
materialized_concepts: set[str] = Field(default_factory=set)
|
|
124
|
+
materialized_canonical_concepts: set[str] = Field(default_factory=set)
|
|
125
|
+
non_partial_materialized_canonical_concepts: set[str] = Field(default_factory=set)
|
|
126
|
+
alias_origin_lookup: Dict[str, BuildConcept] = Field(default_factory=dict)
|
|
127
|
+
|
|
128
|
+
def gen_concept_list_caches(self) -> None:
|
|
129
|
+
concrete_concepts: list[BuildConcept] = []
|
|
130
|
+
non_partial_concrete_concepts: list[BuildConcept] = []
|
|
131
|
+
for datasource in self.datasources.values():
|
|
132
|
+
for column in datasource.columns:
|
|
133
|
+
if column.is_complete:
|
|
134
|
+
non_partial_concrete_concepts.append(column.concept)
|
|
135
|
+
concrete_concepts.append(column.concept)
|
|
136
|
+
concrete_addresses = set([x.address for x in concrete_concepts])
|
|
137
|
+
canonical_addresses = set([x.canonical_address for x in concrete_concepts])
|
|
138
|
+
non_partial_canonical_addresses = set(
|
|
139
|
+
[x.canonical_address for x in non_partial_concrete_concepts]
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
self.materialized_concepts = set(
|
|
143
|
+
[
|
|
144
|
+
c.address
|
|
145
|
+
for c in self.concepts.values()
|
|
146
|
+
if c.address in concrete_addresses
|
|
147
|
+
]
|
|
148
|
+
+ [
|
|
149
|
+
c.address
|
|
150
|
+
for c in self.alias_origin_lookup.values()
|
|
151
|
+
if c.address in concrete_addresses
|
|
152
|
+
],
|
|
153
|
+
)
|
|
154
|
+
self.materialized_canonical_concepts = set(
|
|
155
|
+
[
|
|
156
|
+
c.canonical_address
|
|
157
|
+
for c in self.concepts.values()
|
|
158
|
+
if c.canonical_address in canonical_addresses
|
|
159
|
+
]
|
|
160
|
+
+ [
|
|
161
|
+
c.canonical_address
|
|
162
|
+
for c in self.alias_origin_lookup.values()
|
|
163
|
+
if c.canonical_address in canonical_addresses
|
|
164
|
+
],
|
|
165
|
+
)
|
|
166
|
+
self.non_partial_materialized_canonical_concepts = set(
|
|
167
|
+
[
|
|
168
|
+
c.canonical_address
|
|
169
|
+
for c in self.concepts.values()
|
|
170
|
+
if c.canonical_address in non_partial_canonical_addresses
|
|
171
|
+
]
|
|
172
|
+
+ [
|
|
173
|
+
c.canonical_address
|
|
174
|
+
for c in self.alias_origin_lookup.values()
|
|
175
|
+
if c.canonical_address in non_partial_canonical_addresses
|
|
176
|
+
],
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
BuildEnvironment.model_rebuild()
|
|
@@ -0,0 +1,494 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from abc import ABC
|
|
4
|
+
from collections import UserDict, UserList
|
|
5
|
+
from datetime import date, datetime
|
|
6
|
+
from decimal import Decimal
|
|
7
|
+
from enum import Enum
|
|
8
|
+
from typing import (
|
|
9
|
+
Any,
|
|
10
|
+
Callable,
|
|
11
|
+
Dict,
|
|
12
|
+
Generic,
|
|
13
|
+
List,
|
|
14
|
+
Sequence,
|
|
15
|
+
Tuple,
|
|
16
|
+
TypeVar,
|
|
17
|
+
Union,
|
|
18
|
+
get_args,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
from pydantic import (
|
|
22
|
+
BaseModel,
|
|
23
|
+
ConfigDict,
|
|
24
|
+
field_validator,
|
|
25
|
+
)
|
|
26
|
+
from pydantic_core import core_schema
|
|
27
|
+
|
|
28
|
+
from trilogy.constants import (
|
|
29
|
+
MagicConstants,
|
|
30
|
+
)
|
|
31
|
+
from trilogy.core.enums import DatePart, Modifier, Ordering
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class DataTyped(ABC):
|
|
35
|
+
|
|
36
|
+
# this is not abstract
|
|
37
|
+
# only because when it's a pydantic property, it fails validation
|
|
38
|
+
@property
|
|
39
|
+
def output_datatype(self) -> CONCRETE_TYPES: # type: ignore
|
|
40
|
+
"""
|
|
41
|
+
This is a huge hack to get property vs pydantic attribute inheritance to work.
|
|
42
|
+
"""
|
|
43
|
+
if "output_datatype" in self.__dict__:
|
|
44
|
+
return self.__dict__["output_datatype"]
|
|
45
|
+
raise NotImplementedError
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class Addressable(ABC):
|
|
49
|
+
|
|
50
|
+
@property
|
|
51
|
+
def _address(self):
|
|
52
|
+
return self.address
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
TYPEDEF_TYPES = Union[
|
|
56
|
+
"DataType",
|
|
57
|
+
"MapType",
|
|
58
|
+
"ArrayType",
|
|
59
|
+
"NumericType",
|
|
60
|
+
"StructType",
|
|
61
|
+
"DataTyped",
|
|
62
|
+
"TraitDataType",
|
|
63
|
+
]
|
|
64
|
+
|
|
65
|
+
CONCRETE_TYPES = Union[
|
|
66
|
+
"DataType",
|
|
67
|
+
"MapType",
|
|
68
|
+
"ArrayType",
|
|
69
|
+
"NumericType",
|
|
70
|
+
"StructType",
|
|
71
|
+
"TraitDataType",
|
|
72
|
+
]
|
|
73
|
+
|
|
74
|
+
KT = TypeVar("KT")
|
|
75
|
+
VT = TypeVar("VT")
|
|
76
|
+
LT = TypeVar("LT")
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class DataType(Enum):
|
|
80
|
+
# PRIMITIVES
|
|
81
|
+
STRING = "string"
|
|
82
|
+
BOOL = "bool"
|
|
83
|
+
MAP = "map"
|
|
84
|
+
NUMBER = "number"
|
|
85
|
+
FLOAT = "float"
|
|
86
|
+
NUMERIC = "numeric"
|
|
87
|
+
INTEGER = "int"
|
|
88
|
+
BIGINT = "bigint"
|
|
89
|
+
DATE = "date"
|
|
90
|
+
DATETIME = "datetime"
|
|
91
|
+
TIMESTAMP = "timestamp"
|
|
92
|
+
ARRAY = "array"
|
|
93
|
+
DATE_PART = "date_part"
|
|
94
|
+
STRUCT = "struct"
|
|
95
|
+
GEOGRAPHY = "geography"
|
|
96
|
+
NULL = "null"
|
|
97
|
+
|
|
98
|
+
# GRANULAR
|
|
99
|
+
UNIX_SECONDS = "unix_seconds"
|
|
100
|
+
|
|
101
|
+
# PARSING
|
|
102
|
+
UNKNOWN = "unknown"
|
|
103
|
+
ANY = "any"
|
|
104
|
+
|
|
105
|
+
@property
|
|
106
|
+
def data_type(self):
|
|
107
|
+
return self
|
|
108
|
+
|
|
109
|
+
def __str__(self) -> str:
|
|
110
|
+
return self.name
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
class TraitDataType(BaseModel):
|
|
114
|
+
type: DataType | NumericType | StructType | ArrayType | MapType
|
|
115
|
+
traits: list[str]
|
|
116
|
+
|
|
117
|
+
def __hash__(self):
|
|
118
|
+
return hash(self.type)
|
|
119
|
+
|
|
120
|
+
def __str__(self) -> str:
|
|
121
|
+
return f"Trait<{self.type}, {self.traits}>"
|
|
122
|
+
|
|
123
|
+
def __eq__(self, other):
|
|
124
|
+
if isinstance(other, DataType):
|
|
125
|
+
return self.type == other
|
|
126
|
+
elif isinstance(other, TraitDataType):
|
|
127
|
+
return self.type == other.type and self.traits == other.traits
|
|
128
|
+
return False
|
|
129
|
+
|
|
130
|
+
@property
|
|
131
|
+
def data_type(self):
|
|
132
|
+
return self.type
|
|
133
|
+
|
|
134
|
+
@property
|
|
135
|
+
def value(self):
|
|
136
|
+
return self.data_type.value
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
class NumericType(BaseModel):
|
|
140
|
+
precision: int = 20
|
|
141
|
+
scale: int = 5
|
|
142
|
+
|
|
143
|
+
def __str__(self) -> str:
|
|
144
|
+
return f"Numeric({self.precision},{self.scale})"
|
|
145
|
+
|
|
146
|
+
@property
|
|
147
|
+
def data_type(self):
|
|
148
|
+
return DataType.NUMERIC
|
|
149
|
+
|
|
150
|
+
@property
|
|
151
|
+
def value(self):
|
|
152
|
+
return self.data_type.value
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
class ArrayType(BaseModel):
|
|
156
|
+
model_config = ConfigDict(frozen=True)
|
|
157
|
+
type: TYPEDEF_TYPES
|
|
158
|
+
|
|
159
|
+
@field_validator("type", mode="plain")
|
|
160
|
+
def validate_type(cls, v):
|
|
161
|
+
return v
|
|
162
|
+
|
|
163
|
+
def __str__(self) -> str:
|
|
164
|
+
return f"ListType<{self.type}>"
|
|
165
|
+
|
|
166
|
+
@property
|
|
167
|
+
def data_type(self):
|
|
168
|
+
return DataType.ARRAY
|
|
169
|
+
|
|
170
|
+
@property
|
|
171
|
+
def value(self) -> str:
|
|
172
|
+
return self.data_type.value
|
|
173
|
+
|
|
174
|
+
@property
|
|
175
|
+
def value_data_type(
|
|
176
|
+
self,
|
|
177
|
+
) -> CONCRETE_TYPES:
|
|
178
|
+
if isinstance(self.type, DataTyped):
|
|
179
|
+
return self.type.output_datatype
|
|
180
|
+
return self.type
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
class MapType(BaseModel):
|
|
184
|
+
key_type: TYPEDEF_TYPES
|
|
185
|
+
value_type: TYPEDEF_TYPES
|
|
186
|
+
|
|
187
|
+
@field_validator("value_type", mode="plain")
|
|
188
|
+
def validate_type(cls, v):
|
|
189
|
+
return v
|
|
190
|
+
|
|
191
|
+
@field_validator("key_type", mode="plain")
|
|
192
|
+
def validate_key_type(cls, v):
|
|
193
|
+
return v
|
|
194
|
+
|
|
195
|
+
@property
|
|
196
|
+
def data_type(self):
|
|
197
|
+
return DataType.MAP
|
|
198
|
+
|
|
199
|
+
@property
|
|
200
|
+
def value(self):
|
|
201
|
+
return self.data_type.value
|
|
202
|
+
|
|
203
|
+
@property
|
|
204
|
+
def value_data_type(
|
|
205
|
+
self,
|
|
206
|
+
) -> CONCRETE_TYPES:
|
|
207
|
+
if isinstance(self.value_type, DataTyped):
|
|
208
|
+
return self.value_type.output_datatype
|
|
209
|
+
return self.value_type
|
|
210
|
+
|
|
211
|
+
@property
|
|
212
|
+
def key_data_type(
|
|
213
|
+
self,
|
|
214
|
+
) -> CONCRETE_TYPES:
|
|
215
|
+
if isinstance(self.key_type, DataTyped):
|
|
216
|
+
return self.key_type.output_datatype
|
|
217
|
+
return self.key_type
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
class StructComponent(BaseModel):
|
|
221
|
+
name: str
|
|
222
|
+
type: TYPEDEF_TYPES
|
|
223
|
+
modifiers: list[Modifier] = []
|
|
224
|
+
|
|
225
|
+
@field_validator("type", mode="plain")
|
|
226
|
+
def validate_type(cls, v):
|
|
227
|
+
return v
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
class StructType(BaseModel):
|
|
231
|
+
fields: Sequence[StructComponent | TYPEDEF_TYPES]
|
|
232
|
+
fields_map: Dict[str, DataTyped | int | float | str | StructComponent]
|
|
233
|
+
|
|
234
|
+
def __repr__(self):
|
|
235
|
+
return "struct<{}>".format(
|
|
236
|
+
", ".join(
|
|
237
|
+
f"{field.name}:{field.type.name}"
|
|
238
|
+
for field in self.fields
|
|
239
|
+
if isinstance(field, StructComponent)
|
|
240
|
+
)
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
def __str__(self) -> str:
|
|
244
|
+
return self.__repr__()
|
|
245
|
+
|
|
246
|
+
@field_validator("fields", mode="plain")
|
|
247
|
+
def validate_type(cls, v):
|
|
248
|
+
final = []
|
|
249
|
+
for field in v:
|
|
250
|
+
final.append(field)
|
|
251
|
+
return final
|
|
252
|
+
|
|
253
|
+
@field_validator("fields_map", mode="plain")
|
|
254
|
+
def validate_fields_map(cls, v):
|
|
255
|
+
return v
|
|
256
|
+
|
|
257
|
+
@property
|
|
258
|
+
def data_type(self):
|
|
259
|
+
return DataType.STRUCT
|
|
260
|
+
|
|
261
|
+
@property
|
|
262
|
+
def value(self):
|
|
263
|
+
return self.data_type.value
|
|
264
|
+
|
|
265
|
+
@property
|
|
266
|
+
def field_types(self) -> Dict[str, CONCRETE_TYPES]:
|
|
267
|
+
out: Dict[str, CONCRETE_TYPES] = {}
|
|
268
|
+
keys = list(self.fields_map.keys())
|
|
269
|
+
for idx, field in enumerate(self.fields):
|
|
270
|
+
if isinstance(field, StructComponent):
|
|
271
|
+
out[field.name] = arg_to_datatype(field.type)
|
|
272
|
+
elif isinstance(field, DataTyped):
|
|
273
|
+
out[keys[idx]] = field.output_datatype
|
|
274
|
+
else:
|
|
275
|
+
out[keys[idx]] = field
|
|
276
|
+
return out
|
|
277
|
+
|
|
278
|
+
def __hash__(self):
|
|
279
|
+
return hash(str(self))
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
class ListWrapper(Generic[VT], UserList):
|
|
283
|
+
"""Used to distinguish parsed list objects from other lists"""
|
|
284
|
+
|
|
285
|
+
def __init__(self, *args, type: DataType, nullable: bool = False, **kwargs):
|
|
286
|
+
super().__init__(*args, **kwargs)
|
|
287
|
+
self.type = type
|
|
288
|
+
self.nullable = nullable
|
|
289
|
+
|
|
290
|
+
@classmethod
|
|
291
|
+
def __get_pydantic_core_schema__(
|
|
292
|
+
cls, source_type: Any, handler: Callable[[Any], core_schema.CoreSchema]
|
|
293
|
+
) -> core_schema.CoreSchema:
|
|
294
|
+
args = get_args(source_type)
|
|
295
|
+
if args:
|
|
296
|
+
schema = handler(List[args]) # type: ignore
|
|
297
|
+
else:
|
|
298
|
+
schema = handler(List)
|
|
299
|
+
return core_schema.no_info_after_validator_function(cls.validate, schema)
|
|
300
|
+
|
|
301
|
+
@classmethod
|
|
302
|
+
def validate(cls, v):
|
|
303
|
+
return cls(v, type=arg_to_datatype(v[0]))
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
class MapWrapper(Generic[KT, VT], UserDict):
|
|
307
|
+
"""Used to distinguish parsed map objects from other dicts"""
|
|
308
|
+
|
|
309
|
+
def __init__(self, *args, key_type: DataType, value_type: DataType, **kwargs):
|
|
310
|
+
super().__init__(*args, **kwargs)
|
|
311
|
+
self.key_type = key_type
|
|
312
|
+
self.value_type = value_type
|
|
313
|
+
|
|
314
|
+
@classmethod
|
|
315
|
+
def __get_pydantic_core_schema__(
|
|
316
|
+
cls, source_type: Any, handler: Callable[[Any], core_schema.CoreSchema]
|
|
317
|
+
) -> core_schema.CoreSchema:
|
|
318
|
+
args = get_args(source_type)
|
|
319
|
+
if args:
|
|
320
|
+
schema = handler(Dict[args]) # type: ignore
|
|
321
|
+
else:
|
|
322
|
+
schema = handler(Dict)
|
|
323
|
+
return core_schema.no_info_after_validator_function(cls.validate, schema)
|
|
324
|
+
|
|
325
|
+
@classmethod
|
|
326
|
+
def validate(cls, v):
|
|
327
|
+
return cls(
|
|
328
|
+
v,
|
|
329
|
+
key_type=arg_to_datatype(list(v.keys()).pop()),
|
|
330
|
+
value_type=arg_to_datatype(list(v.values()).pop()),
|
|
331
|
+
)
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
class TupleWrapper(Generic[VT], tuple):
|
|
335
|
+
"""Used to distinguish parsed tuple objects from other tuples"""
|
|
336
|
+
|
|
337
|
+
def __init__(self, val, type: DataType, nullable: bool = False, **kwargs):
|
|
338
|
+
super().__init__()
|
|
339
|
+
self.type = type
|
|
340
|
+
self.val = val
|
|
341
|
+
self.nullable = nullable
|
|
342
|
+
|
|
343
|
+
def __getnewargs__(self):
|
|
344
|
+
return (self.val, self.type)
|
|
345
|
+
|
|
346
|
+
def __new__(cls, val, type: DataType, **kwargs):
|
|
347
|
+
return super().__new__(cls, tuple(val))
|
|
348
|
+
# self.type = type
|
|
349
|
+
|
|
350
|
+
@classmethod
|
|
351
|
+
def __get_pydantic_core_schema__(
|
|
352
|
+
cls, source_type: Any, handler: Callable[[Any], core_schema.CoreSchema]
|
|
353
|
+
) -> core_schema.CoreSchema:
|
|
354
|
+
args = get_args(source_type)
|
|
355
|
+
if args:
|
|
356
|
+
schema = handler(Tuple[args]) # type: ignore
|
|
357
|
+
else:
|
|
358
|
+
schema = handler(Tuple)
|
|
359
|
+
return core_schema.no_info_after_validator_function(cls.validate, schema)
|
|
360
|
+
|
|
361
|
+
@classmethod
|
|
362
|
+
def validate(cls, v):
|
|
363
|
+
return cls(v, type=arg_to_datatype(v[0]))
|
|
364
|
+
|
|
365
|
+
|
|
366
|
+
def list_to_wrapper(args):
|
|
367
|
+
rtypes = [arg_to_datatype(arg) for arg in args]
|
|
368
|
+
types = [arg for arg in rtypes if arg != DataType.NULL]
|
|
369
|
+
if not len(set(types)) == 1:
|
|
370
|
+
raise SyntaxError(f"Cannot create a list with this set of types: {set(types)}")
|
|
371
|
+
return ListWrapper(args, type=types[0], nullable=DataType.NULL in rtypes)
|
|
372
|
+
|
|
373
|
+
|
|
374
|
+
def tuple_to_wrapper(args):
|
|
375
|
+
rtypes = [arg_to_datatype(arg) for arg in args]
|
|
376
|
+
types = [arg for arg in rtypes if arg != DataType.NULL]
|
|
377
|
+
if not len(set(types)) == 1:
|
|
378
|
+
raise SyntaxError(f"Cannot create a tuple with this set of types: {set(types)}")
|
|
379
|
+
return TupleWrapper(args, type=types[0], nullable=DataType.NULL in rtypes)
|
|
380
|
+
|
|
381
|
+
|
|
382
|
+
def dict_to_map_wrapper(arg):
|
|
383
|
+
key_types = [arg_to_datatype(arg) for arg in arg.keys()]
|
|
384
|
+
|
|
385
|
+
value_types = [arg_to_datatype(arg) for arg in arg.values()]
|
|
386
|
+
assert len(set(key_types)) == 1
|
|
387
|
+
assert len(set(key_types)) == 1
|
|
388
|
+
return MapWrapper(arg, key_type=key_types[0], value_type=value_types[0])
|
|
389
|
+
|
|
390
|
+
|
|
391
|
+
def merge_datatypes(
|
|
392
|
+
inputs: list[
|
|
393
|
+
DataType | ArrayType | StructType | MapType | NumericType | TraitDataType
|
|
394
|
+
],
|
|
395
|
+
) -> DataType | ArrayType | StructType | MapType | NumericType | TraitDataType:
|
|
396
|
+
"""This is a temporary hack for doing between
|
|
397
|
+
allowable datatype transformation matrix"""
|
|
398
|
+
if len(inputs) == 1:
|
|
399
|
+
return inputs[0]
|
|
400
|
+
if set(inputs) == {DataType.INTEGER, DataType.FLOAT}:
|
|
401
|
+
return DataType.FLOAT
|
|
402
|
+
if set(inputs) == {DataType.INTEGER, DataType.NUMERIC}:
|
|
403
|
+
return DataType.NUMERIC
|
|
404
|
+
if any(isinstance(x, NumericType) for x in inputs) and all(
|
|
405
|
+
isinstance(x, NumericType)
|
|
406
|
+
or x in (DataType.INTEGER, DataType.FLOAT, DataType.NUMERIC)
|
|
407
|
+
for x in inputs
|
|
408
|
+
):
|
|
409
|
+
candidate = next(x for x in inputs if isinstance(x, NumericType))
|
|
410
|
+
return candidate
|
|
411
|
+
return inputs[0]
|
|
412
|
+
|
|
413
|
+
|
|
414
|
+
def is_compatible_datatype(left, right):
|
|
415
|
+
# for unknown types, we can't make any assumptions
|
|
416
|
+
if isinstance(left, list):
|
|
417
|
+
return any(is_compatible_datatype(ltype, right) for ltype in left)
|
|
418
|
+
if isinstance(right, list):
|
|
419
|
+
return any(is_compatible_datatype(left, rtype) for rtype in right)
|
|
420
|
+
if left == DataType.ANY or right == DataType.ANY:
|
|
421
|
+
return True
|
|
422
|
+
if all(
|
|
423
|
+
isinstance(x, NumericType)
|
|
424
|
+
or x in (DataType.INTEGER, DataType.FLOAT, DataType.NUMERIC)
|
|
425
|
+
for x in (left, right)
|
|
426
|
+
):
|
|
427
|
+
return True
|
|
428
|
+
elif isinstance(left, NumericType) or isinstance(right, NumericType):
|
|
429
|
+
return False
|
|
430
|
+
if right == DataType.UNKNOWN or left == DataType.UNKNOWN:
|
|
431
|
+
return True
|
|
432
|
+
if left == right:
|
|
433
|
+
return True
|
|
434
|
+
if {left, right} == {DataType.NUMERIC, DataType.FLOAT}:
|
|
435
|
+
return True
|
|
436
|
+
|
|
437
|
+
if {left, right} == {DataType.NUMERIC, DataType.INTEGER}:
|
|
438
|
+
return True
|
|
439
|
+
if {left, right} == {DataType.FLOAT, DataType.INTEGER}:
|
|
440
|
+
return True
|
|
441
|
+
return False
|
|
442
|
+
|
|
443
|
+
|
|
444
|
+
def arg_to_datatype(arg) -> CONCRETE_TYPES:
|
|
445
|
+
|
|
446
|
+
if isinstance(arg, MagicConstants):
|
|
447
|
+
if arg == MagicConstants.NULL:
|
|
448
|
+
return DataType.NULL
|
|
449
|
+
raise ValueError(f"Cannot parse arg datatype for arg of type {arg}")
|
|
450
|
+
elif isinstance(arg, bool):
|
|
451
|
+
return DataType.BOOL
|
|
452
|
+
elif isinstance(arg, Ordering):
|
|
453
|
+
return DataType.STRING # TODO: revisit
|
|
454
|
+
elif isinstance(arg, int):
|
|
455
|
+
return DataType.INTEGER
|
|
456
|
+
elif isinstance(arg, str):
|
|
457
|
+
return DataType.STRING
|
|
458
|
+
elif isinstance(arg, float):
|
|
459
|
+
return DataType.FLOAT
|
|
460
|
+
elif isinstance(arg, Decimal):
|
|
461
|
+
return DataType.NUMERIC
|
|
462
|
+
elif isinstance(arg, DataType):
|
|
463
|
+
return arg
|
|
464
|
+
elif isinstance(arg, NumericType):
|
|
465
|
+
return arg
|
|
466
|
+
elif isinstance(arg, TraitDataType):
|
|
467
|
+
return arg
|
|
468
|
+
elif isinstance(arg, ListWrapper):
|
|
469
|
+
return ArrayType(type=arg.type)
|
|
470
|
+
elif isinstance(arg, ArrayType):
|
|
471
|
+
return arg
|
|
472
|
+
elif isinstance(arg, MapType):
|
|
473
|
+
return arg
|
|
474
|
+
elif isinstance(arg, DataTyped):
|
|
475
|
+
return arg.output_datatype
|
|
476
|
+
elif isinstance(arg, TupleWrapper):
|
|
477
|
+
return ArrayType(type=arg.type)
|
|
478
|
+
elif isinstance(arg, list):
|
|
479
|
+
wrapper = list_to_wrapper(arg)
|
|
480
|
+
return ArrayType(type=wrapper.type)
|
|
481
|
+
elif isinstance(arg, MapWrapper):
|
|
482
|
+
return MapType(key_type=arg.key_type, value_type=arg.value_type)
|
|
483
|
+
elif isinstance(arg, datetime):
|
|
484
|
+
return DataType.DATETIME
|
|
485
|
+
elif isinstance(arg, date):
|
|
486
|
+
return DataType.DATE
|
|
487
|
+
elif isinstance(arg, StructComponent):
|
|
488
|
+
return arg_to_datatype(arg.type)
|
|
489
|
+
elif isinstance(arg, DatePart):
|
|
490
|
+
return DataType.DATE_PART
|
|
491
|
+
else:
|
|
492
|
+
raise ValueError(
|
|
493
|
+
f"Cannot parse arg datatype for arg of raw type {type(arg)} value {arg}"
|
|
494
|
+
)
|