pytrilogy 0.0.2.57__py3-none-any.whl → 0.0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pytrilogy-0.0.2.57.dist-info → pytrilogy-0.0.3.0.dist-info}/METADATA +9 -2
- pytrilogy-0.0.3.0.dist-info/RECORD +99 -0
- {pytrilogy-0.0.2.57.dist-info → pytrilogy-0.0.3.0.dist-info}/WHEEL +1 -1
- trilogy/__init__.py +2 -2
- trilogy/core/enums.py +1 -7
- trilogy/core/env_processor.py +17 -5
- trilogy/core/environment_helpers.py +11 -25
- trilogy/core/exceptions.py +4 -0
- trilogy/core/functions.py +695 -261
- trilogy/core/graph_models.py +10 -10
- trilogy/core/internal.py +11 -2
- trilogy/core/models/__init__.py +0 -0
- trilogy/core/models/author.py +2110 -0
- trilogy/core/models/build.py +1845 -0
- trilogy/core/models/build_environment.py +151 -0
- trilogy/core/models/core.py +370 -0
- trilogy/core/models/datasource.py +297 -0
- trilogy/core/models/environment.py +696 -0
- trilogy/core/models/execute.py +931 -0
- trilogy/core/optimization.py +17 -22
- trilogy/core/optimizations/base_optimization.py +1 -1
- trilogy/core/optimizations/inline_constant.py +6 -6
- trilogy/core/optimizations/inline_datasource.py +17 -11
- trilogy/core/optimizations/predicate_pushdown.py +17 -16
- trilogy/core/processing/concept_strategies_v3.py +181 -146
- trilogy/core/processing/graph_utils.py +1 -1
- trilogy/core/processing/node_generators/basic_node.py +19 -18
- trilogy/core/processing/node_generators/common.py +51 -45
- trilogy/core/processing/node_generators/filter_node.py +26 -13
- trilogy/core/processing/node_generators/group_node.py +26 -21
- trilogy/core/processing/node_generators/group_to_node.py +13 -10
- trilogy/core/processing/node_generators/multiselect_node.py +60 -43
- trilogy/core/processing/node_generators/node_merge_node.py +76 -38
- trilogy/core/processing/node_generators/rowset_node.py +59 -36
- trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +27 -34
- trilogy/core/processing/node_generators/select_merge_node.py +161 -64
- trilogy/core/processing/node_generators/select_node.py +13 -13
- trilogy/core/processing/node_generators/union_node.py +12 -11
- trilogy/core/processing/node_generators/unnest_node.py +9 -7
- trilogy/core/processing/node_generators/window_node.py +19 -16
- trilogy/core/processing/nodes/__init__.py +21 -18
- trilogy/core/processing/nodes/base_node.py +92 -77
- trilogy/core/processing/nodes/filter_node.py +19 -13
- trilogy/core/processing/nodes/group_node.py +55 -40
- trilogy/core/processing/nodes/merge_node.py +47 -38
- trilogy/core/processing/nodes/select_node_v2.py +54 -40
- trilogy/core/processing/nodes/union_node.py +5 -7
- trilogy/core/processing/nodes/unnest_node.py +7 -11
- trilogy/core/processing/nodes/window_node.py +9 -4
- trilogy/core/processing/utility.py +108 -80
- trilogy/core/query_processor.py +67 -49
- trilogy/core/statements/__init__.py +0 -0
- trilogy/core/statements/author.py +413 -0
- trilogy/core/statements/build.py +0 -0
- trilogy/core/statements/common.py +30 -0
- trilogy/core/statements/execute.py +42 -0
- trilogy/dialect/base.py +152 -111
- trilogy/dialect/common.py +9 -10
- trilogy/dialect/duckdb.py +1 -1
- trilogy/dialect/enums.py +4 -2
- trilogy/dialect/presto.py +1 -1
- trilogy/dialect/sql_server.py +1 -1
- trilogy/executor.py +44 -32
- trilogy/hooks/base_hook.py +6 -4
- trilogy/hooks/query_debugger.py +110 -93
- trilogy/parser.py +1 -1
- trilogy/parsing/common.py +303 -64
- trilogy/parsing/parse_engine.py +263 -617
- trilogy/parsing/render.py +50 -26
- trilogy/scripts/trilogy.py +2 -1
- pytrilogy-0.0.2.57.dist-info/RECORD +0 -87
- trilogy/core/models.py +0 -4960
- {pytrilogy-0.0.2.57.dist-info → pytrilogy-0.0.3.0.dist-info}/LICENSE.md +0 -0
- {pytrilogy-0.0.2.57.dist-info → pytrilogy-0.0.3.0.dist-info}/entry_points.txt +0 -0
- {pytrilogy-0.0.2.57.dist-info → pytrilogy-0.0.3.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import difflib
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Annotated, Dict, ItemsView, Never, ValuesView
|
|
6
|
+
|
|
7
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
8
|
+
from pydantic.functional_validators import PlainValidator
|
|
9
|
+
|
|
10
|
+
from trilogy.constants import DEFAULT_NAMESPACE
|
|
11
|
+
from trilogy.core.exceptions import (
|
|
12
|
+
UndefinedConceptException,
|
|
13
|
+
)
|
|
14
|
+
from trilogy.core.models.build import BuildConcept, BuildDatasource, BuildFunction
|
|
15
|
+
from trilogy.core.models.core import DataType
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class BuildEnvironmentConceptDict(dict):
|
|
19
|
+
def __init__(self, *args, **kwargs) -> None:
|
|
20
|
+
super().__init__(self, *args, **kwargs)
|
|
21
|
+
|
|
22
|
+
def values(self) -> ValuesView[BuildConcept]: # type: ignore
|
|
23
|
+
return super().values()
|
|
24
|
+
|
|
25
|
+
def get(self, key: str, default: BuildConcept | None = None) -> BuildConcept | None: # type: ignore
|
|
26
|
+
try:
|
|
27
|
+
return self.__getitem__(key)
|
|
28
|
+
except UndefinedConceptException:
|
|
29
|
+
return default
|
|
30
|
+
|
|
31
|
+
def raise_undefined(
|
|
32
|
+
self, key: str, line_no: int | None = None, file: Path | str | None = None
|
|
33
|
+
) -> Never:
|
|
34
|
+
|
|
35
|
+
matches = self._find_similar_concepts(key)
|
|
36
|
+
message = f"Undefined concept: {key}."
|
|
37
|
+
if matches:
|
|
38
|
+
message += f" Suggestions: {matches}"
|
|
39
|
+
|
|
40
|
+
if line_no:
|
|
41
|
+
if file:
|
|
42
|
+
raise UndefinedConceptException(
|
|
43
|
+
f"{file}: {line_no}: " + message, matches
|
|
44
|
+
)
|
|
45
|
+
raise UndefinedConceptException(f"line: {line_no}: " + message, matches)
|
|
46
|
+
raise UndefinedConceptException(message, matches)
|
|
47
|
+
|
|
48
|
+
def __getitem__(
|
|
49
|
+
self, key: str, line_no: int | None = None, file: Path | None = None
|
|
50
|
+
) -> BuildConcept:
|
|
51
|
+
try:
|
|
52
|
+
return super(BuildEnvironmentConceptDict, self).__getitem__(key)
|
|
53
|
+
except KeyError:
|
|
54
|
+
if "." in key and key.split(".", 1)[0] == DEFAULT_NAMESPACE:
|
|
55
|
+
return self.__getitem__(key.split(".", 1)[1], line_no)
|
|
56
|
+
if DEFAULT_NAMESPACE + "." + key in self:
|
|
57
|
+
return self.__getitem__(DEFAULT_NAMESPACE + "." + key, line_no)
|
|
58
|
+
self.raise_undefined(key, line_no, file)
|
|
59
|
+
|
|
60
|
+
def _find_similar_concepts(self, concept_name: str):
|
|
61
|
+
def strip_local(input: str):
|
|
62
|
+
if input.startswith(f"{DEFAULT_NAMESPACE}."):
|
|
63
|
+
return input[len(DEFAULT_NAMESPACE) + 1 :]
|
|
64
|
+
return input
|
|
65
|
+
|
|
66
|
+
matches = difflib.get_close_matches(
|
|
67
|
+
strip_local(concept_name), [strip_local(x) for x in self.keys()]
|
|
68
|
+
)
|
|
69
|
+
return matches
|
|
70
|
+
|
|
71
|
+
def items(self) -> ItemsView[str, BuildConcept]: # type: ignore
|
|
72
|
+
return super().items()
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class BuildEnvironmentDatasourceDict(dict):
|
|
76
|
+
def __init__(self, *args, **kwargs) -> None:
|
|
77
|
+
super().__init__(self, *args, **kwargs)
|
|
78
|
+
|
|
79
|
+
def __getitem__(self, key: str) -> BuildDatasource:
|
|
80
|
+
try:
|
|
81
|
+
return super(BuildEnvironmentDatasourceDict, self).__getitem__(key)
|
|
82
|
+
except KeyError:
|
|
83
|
+
if DEFAULT_NAMESPACE + "." + key in self:
|
|
84
|
+
return self.__getitem__(DEFAULT_NAMESPACE + "." + key)
|
|
85
|
+
if "." in key and key.split(".", 1)[0] == DEFAULT_NAMESPACE:
|
|
86
|
+
return self.__getitem__(key.split(".", 1)[1])
|
|
87
|
+
raise
|
|
88
|
+
|
|
89
|
+
def values(self) -> ValuesView[BuildDatasource]: # type: ignore
|
|
90
|
+
return super().values()
|
|
91
|
+
|
|
92
|
+
def items(self) -> ItemsView[str, BuildDatasource]: # type: ignore
|
|
93
|
+
return super().items()
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def validate_concepts(v) -> BuildEnvironmentConceptDict:
|
|
97
|
+
if isinstance(v, BuildEnvironmentConceptDict):
|
|
98
|
+
return v
|
|
99
|
+
elif isinstance(v, dict):
|
|
100
|
+
return BuildEnvironmentConceptDict(
|
|
101
|
+
**{x: BuildConcept.model_validate(y) for x, y in v.items()}
|
|
102
|
+
)
|
|
103
|
+
raise ValueError
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def validate_datasources(v) -> BuildEnvironmentDatasourceDict:
|
|
107
|
+
if isinstance(v, BuildEnvironmentDatasourceDict):
|
|
108
|
+
return v
|
|
109
|
+
elif isinstance(v, dict):
|
|
110
|
+
return BuildEnvironmentDatasourceDict(
|
|
111
|
+
**{x: BuildDatasource.model_validate(y) for x, y in v.items()}
|
|
112
|
+
)
|
|
113
|
+
raise ValueError
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
class BuildEnvironment(BaseModel):
|
|
117
|
+
model_config = ConfigDict(arbitrary_types_allowed=True, strict=False)
|
|
118
|
+
|
|
119
|
+
concepts: Annotated[
|
|
120
|
+
BuildEnvironmentConceptDict, PlainValidator(validate_concepts)
|
|
121
|
+
] = Field(default_factory=BuildEnvironmentConceptDict)
|
|
122
|
+
datasources: Annotated[
|
|
123
|
+
BuildEnvironmentDatasourceDict, PlainValidator(validate_datasources)
|
|
124
|
+
] = Field(default_factory=BuildEnvironmentDatasourceDict)
|
|
125
|
+
functions: Dict[str, BuildFunction] = Field(default_factory=dict)
|
|
126
|
+
data_types: Dict[str, DataType] = Field(default_factory=dict)
|
|
127
|
+
namespace: str = DEFAULT_NAMESPACE
|
|
128
|
+
cte_name_map: Dict[str, str] = Field(default_factory=dict)
|
|
129
|
+
materialized_concepts: set[str] = Field(default_factory=set)
|
|
130
|
+
alias_origin_lookup: Dict[str, BuildConcept] = Field(default_factory=dict)
|
|
131
|
+
|
|
132
|
+
def gen_concept_list_caches(self) -> None:
|
|
133
|
+
concrete_addresses = set()
|
|
134
|
+
for datasource in self.datasources.values():
|
|
135
|
+
for concept in datasource.output_concepts:
|
|
136
|
+
concrete_addresses.add(concept.address)
|
|
137
|
+
self.materialized_concepts = set(
|
|
138
|
+
[
|
|
139
|
+
c.address
|
|
140
|
+
for c in self.concepts.values()
|
|
141
|
+
if c.address in concrete_addresses
|
|
142
|
+
]
|
|
143
|
+
+ [
|
|
144
|
+
c.address
|
|
145
|
+
for c in self.alias_origin_lookup.values()
|
|
146
|
+
if c.address in concrete_addresses
|
|
147
|
+
],
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
BuildEnvironment.model_rebuild()
|
|
@@ -0,0 +1,370 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from abc import ABC
|
|
4
|
+
from collections import UserDict, UserList
|
|
5
|
+
from datetime import date, datetime
|
|
6
|
+
from enum import Enum
|
|
7
|
+
from typing import (
|
|
8
|
+
Any,
|
|
9
|
+
Callable,
|
|
10
|
+
Dict,
|
|
11
|
+
Generic,
|
|
12
|
+
List,
|
|
13
|
+
Sequence,
|
|
14
|
+
Tuple,
|
|
15
|
+
TypeVar,
|
|
16
|
+
Union,
|
|
17
|
+
get_args,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
from pydantic import (
|
|
21
|
+
BaseModel,
|
|
22
|
+
ConfigDict,
|
|
23
|
+
field_validator,
|
|
24
|
+
)
|
|
25
|
+
from pydantic_core import core_schema
|
|
26
|
+
|
|
27
|
+
from trilogy.constants import (
|
|
28
|
+
MagicConstants,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class DataTyped(ABC):
|
|
33
|
+
|
|
34
|
+
# this is not abstract
|
|
35
|
+
# only because when it's a pydantic property, it fails validation
|
|
36
|
+
@property
|
|
37
|
+
def output_datatype(self) -> CONCRETE_TYPES: # type: ignore
|
|
38
|
+
"""
|
|
39
|
+
This is a huge hack to get property vs pydantic attribute inheritance to work.
|
|
40
|
+
"""
|
|
41
|
+
if "output_datatype" in self.__dict__:
|
|
42
|
+
return self.__dict__["output_datatype"]
|
|
43
|
+
raise NotImplementedError
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class Addressable(ABC):
|
|
47
|
+
|
|
48
|
+
@property
|
|
49
|
+
def _address(self):
|
|
50
|
+
return self.address
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
TYPEDEF_TYPES = Union[
|
|
54
|
+
"DataType", "MapType", "ListType", "NumericType", "StructType", "DataTyped"
|
|
55
|
+
]
|
|
56
|
+
|
|
57
|
+
CONCRETE_TYPES = Union[
|
|
58
|
+
"DataType",
|
|
59
|
+
"MapType",
|
|
60
|
+
"ListType",
|
|
61
|
+
"NumericType",
|
|
62
|
+
"StructType",
|
|
63
|
+
]
|
|
64
|
+
|
|
65
|
+
KT = TypeVar("KT")
|
|
66
|
+
VT = TypeVar("VT")
|
|
67
|
+
LT = TypeVar("LT")
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class DataType(Enum):
|
|
71
|
+
# PRIMITIVES
|
|
72
|
+
STRING = "string"
|
|
73
|
+
BOOL = "bool"
|
|
74
|
+
MAP = "map"
|
|
75
|
+
LIST = "list"
|
|
76
|
+
NUMBER = "number"
|
|
77
|
+
FLOAT = "float"
|
|
78
|
+
NUMERIC = "numeric"
|
|
79
|
+
INTEGER = "int"
|
|
80
|
+
BIGINT = "bigint"
|
|
81
|
+
DATE = "date"
|
|
82
|
+
DATETIME = "datetime"
|
|
83
|
+
TIMESTAMP = "timestamp"
|
|
84
|
+
ARRAY = "array"
|
|
85
|
+
DATE_PART = "date_part"
|
|
86
|
+
STRUCT = "struct"
|
|
87
|
+
NULL = "null"
|
|
88
|
+
|
|
89
|
+
# GRANULAR
|
|
90
|
+
UNIX_SECONDS = "unix_seconds"
|
|
91
|
+
|
|
92
|
+
# PARSING
|
|
93
|
+
UNKNOWN = "unknown"
|
|
94
|
+
|
|
95
|
+
@property
|
|
96
|
+
def data_type(self):
|
|
97
|
+
return self
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
class NumericType(BaseModel):
|
|
101
|
+
precision: int = 20
|
|
102
|
+
scale: int = 5
|
|
103
|
+
|
|
104
|
+
@property
|
|
105
|
+
def data_type(self):
|
|
106
|
+
return DataType.NUMERIC
|
|
107
|
+
|
|
108
|
+
@property
|
|
109
|
+
def value(self):
|
|
110
|
+
return self.data_type.value
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
class ListType(BaseModel):
|
|
114
|
+
model_config = ConfigDict(frozen=True)
|
|
115
|
+
type: TYPEDEF_TYPES
|
|
116
|
+
|
|
117
|
+
@field_validator("type", mode="plain")
|
|
118
|
+
def validate_type(cls, v):
|
|
119
|
+
return v
|
|
120
|
+
|
|
121
|
+
def __str__(self) -> str:
|
|
122
|
+
return f"ListType<{self.type}>"
|
|
123
|
+
|
|
124
|
+
@property
|
|
125
|
+
def data_type(self):
|
|
126
|
+
return DataType.LIST
|
|
127
|
+
|
|
128
|
+
@property
|
|
129
|
+
def value(self):
|
|
130
|
+
return self.data_type.value
|
|
131
|
+
|
|
132
|
+
@property
|
|
133
|
+
def value_data_type(
|
|
134
|
+
self,
|
|
135
|
+
) -> CONCRETE_TYPES:
|
|
136
|
+
if isinstance(self.type, DataTyped):
|
|
137
|
+
return self.type.output_datatype
|
|
138
|
+
return self.type
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
class MapType(BaseModel):
|
|
142
|
+
key_type: DataType
|
|
143
|
+
value_type: TYPEDEF_TYPES
|
|
144
|
+
|
|
145
|
+
@field_validator("value_type", mode="plain")
|
|
146
|
+
def validate_type(cls, v):
|
|
147
|
+
return v
|
|
148
|
+
|
|
149
|
+
@property
|
|
150
|
+
def data_type(self):
|
|
151
|
+
return DataType.MAP
|
|
152
|
+
|
|
153
|
+
@property
|
|
154
|
+
def value(self):
|
|
155
|
+
return self.data_type.value
|
|
156
|
+
|
|
157
|
+
@property
|
|
158
|
+
def value_data_type(
|
|
159
|
+
self,
|
|
160
|
+
) -> CONCRETE_TYPES:
|
|
161
|
+
if isinstance(self.value_type, DataTyped):
|
|
162
|
+
return self.value_type.output_datatype
|
|
163
|
+
return self.value_type
|
|
164
|
+
|
|
165
|
+
@property
|
|
166
|
+
def key_data_type(
|
|
167
|
+
self,
|
|
168
|
+
) -> CONCRETE_TYPES:
|
|
169
|
+
if isinstance(self.key_type, DataTyped):
|
|
170
|
+
return self.key_type.output_datatype
|
|
171
|
+
return self.key_type
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
class StructType(BaseModel):
|
|
175
|
+
fields: Sequence[TYPEDEF_TYPES]
|
|
176
|
+
fields_map: Dict[str, DataTyped | int | float | str]
|
|
177
|
+
|
|
178
|
+
@field_validator("fields", mode="plain")
|
|
179
|
+
def validate_type(cls, v):
|
|
180
|
+
final = []
|
|
181
|
+
for field in v:
|
|
182
|
+
final.append(field)
|
|
183
|
+
return final
|
|
184
|
+
|
|
185
|
+
@field_validator("fields_map", mode="plain")
|
|
186
|
+
def validate_fields_map(cls, v):
|
|
187
|
+
return v
|
|
188
|
+
|
|
189
|
+
@property
|
|
190
|
+
def data_type(self):
|
|
191
|
+
return DataType.STRUCT
|
|
192
|
+
|
|
193
|
+
@property
|
|
194
|
+
def value(self):
|
|
195
|
+
return self.data_type.value
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
class ListWrapper(Generic[VT], UserList):
|
|
199
|
+
"""Used to distinguish parsed list objects from other lists"""
|
|
200
|
+
|
|
201
|
+
def __init__(self, *args, type: DataType, **kwargs):
|
|
202
|
+
super().__init__(*args, **kwargs)
|
|
203
|
+
self.type = type
|
|
204
|
+
|
|
205
|
+
@classmethod
|
|
206
|
+
def __get_pydantic_core_schema__(
|
|
207
|
+
cls, source_type: Any, handler: Callable[[Any], core_schema.CoreSchema]
|
|
208
|
+
) -> core_schema.CoreSchema:
|
|
209
|
+
args = get_args(source_type)
|
|
210
|
+
if args:
|
|
211
|
+
schema = handler(List[args]) # type: ignore
|
|
212
|
+
else:
|
|
213
|
+
schema = handler(List)
|
|
214
|
+
return core_schema.no_info_after_validator_function(cls.validate, schema)
|
|
215
|
+
|
|
216
|
+
@classmethod
|
|
217
|
+
def validate(cls, v):
|
|
218
|
+
return cls(v, type=arg_to_datatype(v[0]))
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
class MapWrapper(Generic[KT, VT], UserDict):
|
|
222
|
+
"""Used to distinguish parsed map objects from other dicts"""
|
|
223
|
+
|
|
224
|
+
def __init__(self, *args, key_type: DataType, value_type: DataType, **kwargs):
|
|
225
|
+
super().__init__(*args, **kwargs)
|
|
226
|
+
self.key_type = key_type
|
|
227
|
+
self.value_type = value_type
|
|
228
|
+
|
|
229
|
+
@classmethod
|
|
230
|
+
def __get_pydantic_core_schema__(
|
|
231
|
+
cls, source_type: Any, handler: Callable[[Any], core_schema.CoreSchema]
|
|
232
|
+
) -> core_schema.CoreSchema:
|
|
233
|
+
args = get_args(source_type)
|
|
234
|
+
if args:
|
|
235
|
+
schema = handler(Dict[args]) # type: ignore
|
|
236
|
+
else:
|
|
237
|
+
schema = handler(Dict)
|
|
238
|
+
return core_schema.no_info_after_validator_function(cls.validate, schema)
|
|
239
|
+
|
|
240
|
+
@classmethod
|
|
241
|
+
def validate(cls, v):
|
|
242
|
+
return cls(
|
|
243
|
+
v,
|
|
244
|
+
key_type=arg_to_datatype(list(v.keys()).pop()),
|
|
245
|
+
value_type=arg_to_datatype(list(v.values()).pop()),
|
|
246
|
+
)
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
class TupleWrapper(Generic[VT], tuple):
|
|
250
|
+
"""Used to distinguish parsed tuple objects from other tuples"""
|
|
251
|
+
|
|
252
|
+
def __init__(self, val, type: DataType, **kwargs):
|
|
253
|
+
super().__init__()
|
|
254
|
+
self.type = type
|
|
255
|
+
self.val = val
|
|
256
|
+
|
|
257
|
+
def __getnewargs__(self):
|
|
258
|
+
return (self.val, self.type)
|
|
259
|
+
|
|
260
|
+
def __new__(cls, val, type: DataType, **kwargs):
|
|
261
|
+
return super().__new__(cls, tuple(val))
|
|
262
|
+
# self.type = type
|
|
263
|
+
|
|
264
|
+
@classmethod
|
|
265
|
+
def __get_pydantic_core_schema__(
|
|
266
|
+
cls, source_type: Any, handler: Callable[[Any], core_schema.CoreSchema]
|
|
267
|
+
) -> core_schema.CoreSchema:
|
|
268
|
+
args = get_args(source_type)
|
|
269
|
+
if args:
|
|
270
|
+
schema = handler(Tuple[args]) # type: ignore
|
|
271
|
+
else:
|
|
272
|
+
schema = handler(Tuple)
|
|
273
|
+
return core_schema.no_info_after_validator_function(cls.validate, schema)
|
|
274
|
+
|
|
275
|
+
@classmethod
|
|
276
|
+
def validate(cls, v):
|
|
277
|
+
return cls(v, type=arg_to_datatype(v[0]))
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
def list_to_wrapper(args):
|
|
281
|
+
types = [arg_to_datatype(arg) for arg in args]
|
|
282
|
+
assert len(set(types)) == 1
|
|
283
|
+
return ListWrapper(args, type=types[0])
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
def tuple_to_wrapper(args):
|
|
287
|
+
types = [arg_to_datatype(arg) for arg in args]
|
|
288
|
+
assert len(set(types)) == 1
|
|
289
|
+
return TupleWrapper(args, type=types[0])
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
def dict_to_map_wrapper(arg):
|
|
293
|
+
key_types = [arg_to_datatype(arg) for arg in arg.keys()]
|
|
294
|
+
|
|
295
|
+
value_types = [arg_to_datatype(arg) for arg in arg.values()]
|
|
296
|
+
assert len(set(key_types)) == 1
|
|
297
|
+
assert len(set(key_types)) == 1
|
|
298
|
+
return MapWrapper(arg, key_type=key_types[0], value_type=value_types[0])
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
def merge_datatypes(
|
|
302
|
+
inputs: list[DataType | ListType | StructType | MapType | NumericType],
|
|
303
|
+
) -> DataType | ListType | StructType | MapType | NumericType:
|
|
304
|
+
"""This is a temporary hack for doing between
|
|
305
|
+
allowable datatype transformation matrix"""
|
|
306
|
+
if len(inputs) == 1:
|
|
307
|
+
return inputs[0]
|
|
308
|
+
if set(inputs) == {DataType.INTEGER, DataType.FLOAT}:
|
|
309
|
+
return DataType.FLOAT
|
|
310
|
+
if set(inputs) == {DataType.INTEGER, DataType.NUMERIC}:
|
|
311
|
+
return DataType.NUMERIC
|
|
312
|
+
if any(isinstance(x, NumericType) for x in inputs) and all(
|
|
313
|
+
isinstance(x, NumericType)
|
|
314
|
+
or x in (DataType.INTEGER, DataType.FLOAT, DataType.NUMERIC)
|
|
315
|
+
for x in inputs
|
|
316
|
+
):
|
|
317
|
+
candidate = next(x for x in inputs if isinstance(x, NumericType))
|
|
318
|
+
return candidate
|
|
319
|
+
return inputs[0]
|
|
320
|
+
|
|
321
|
+
|
|
322
|
+
def is_compatible_datatype(left, right):
|
|
323
|
+
# for unknown types, we can't make any assumptions
|
|
324
|
+
if right == DataType.UNKNOWN or left == DataType.UNKNOWN:
|
|
325
|
+
return True
|
|
326
|
+
if left == right:
|
|
327
|
+
return True
|
|
328
|
+
if {left, right} == {DataType.NUMERIC, DataType.FLOAT}:
|
|
329
|
+
return True
|
|
330
|
+
if {left, right} == {DataType.NUMERIC, DataType.INTEGER}:
|
|
331
|
+
return True
|
|
332
|
+
if {left, right} == {DataType.FLOAT, DataType.INTEGER}:
|
|
333
|
+
return True
|
|
334
|
+
return False
|
|
335
|
+
|
|
336
|
+
|
|
337
|
+
def arg_to_datatype(arg) -> CONCRETE_TYPES:
|
|
338
|
+
if isinstance(arg, MagicConstants):
|
|
339
|
+
if arg == MagicConstants.NULL:
|
|
340
|
+
return DataType.NULL
|
|
341
|
+
raise ValueError(f"Cannot parse arg datatype for arg of type {arg}")
|
|
342
|
+
elif isinstance(arg, bool):
|
|
343
|
+
return DataType.BOOL
|
|
344
|
+
elif isinstance(arg, int):
|
|
345
|
+
return DataType.INTEGER
|
|
346
|
+
elif isinstance(arg, str):
|
|
347
|
+
return DataType.STRING
|
|
348
|
+
elif isinstance(arg, float):
|
|
349
|
+
return DataType.FLOAT
|
|
350
|
+
elif isinstance(arg, NumericType):
|
|
351
|
+
return arg
|
|
352
|
+
elif isinstance(arg, ListWrapper):
|
|
353
|
+
return ListType(type=arg.type)
|
|
354
|
+
elif isinstance(arg, DataTyped):
|
|
355
|
+
return arg.output_datatype
|
|
356
|
+
elif isinstance(arg, TupleWrapper):
|
|
357
|
+
return ListType(type=arg.type)
|
|
358
|
+
elif isinstance(arg, list):
|
|
359
|
+
wrapper = list_to_wrapper(arg)
|
|
360
|
+
return ListType(type=wrapper.type)
|
|
361
|
+
elif isinstance(arg, MapWrapper):
|
|
362
|
+
return MapType(key_type=arg.key_type, value_type=arg.value_type)
|
|
363
|
+
elif isinstance(arg, datetime):
|
|
364
|
+
return DataType.DATETIME
|
|
365
|
+
elif isinstance(arg, date):
|
|
366
|
+
return DataType.DATE
|
|
367
|
+
else:
|
|
368
|
+
raise ValueError(
|
|
369
|
+
f"Cannot parse arg datatype for arg of raw type {type(arg)} value {arg}"
|
|
370
|
+
)
|