altimate-code 0.5.1 → 0.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +35 -0
- package/README.md +1 -5
- package/bin/altimate +6 -0
- package/bin/altimate-code +6 -0
- package/dbt-tools/bin/altimate-dbt +2 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/altimate/__init__.py +0 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/altimate/fetch_schema.py +35 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/altimate/utils.py +353 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/altimate/validate_sql.py +114 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/__init__.py +178 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/__main__.py +96 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/_typing.py +17 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/__init__.py +3 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/__init__.py +18 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/_typing.py +18 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/column.py +332 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/dataframe.py +866 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/functions.py +1267 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/group.py +59 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/normalize.py +78 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/operations.py +53 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/readwriter.py +108 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/session.py +190 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/transforms.py +9 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/types.py +212 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/util.py +32 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/window.py +134 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/__init__.py +118 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/athena.py +166 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/bigquery.py +1331 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/clickhouse.py +1393 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/databricks.py +131 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/dialect.py +1915 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/doris.py +561 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/drill.py +157 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/druid.py +20 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/duckdb.py +1159 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/dune.py +16 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/hive.py +787 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/materialize.py +94 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/mysql.py +1324 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/oracle.py +378 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/postgres.py +778 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/presto.py +788 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/prql.py +203 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/redshift.py +448 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/risingwave.py +78 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/snowflake.py +1464 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/spark.py +202 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/spark2.py +349 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/sqlite.py +320 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/starrocks.py +343 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/tableau.py +61 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/teradata.py +356 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/trino.py +115 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/tsql.py +1403 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/diff.py +456 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/errors.py +93 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/__init__.py +95 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/context.py +101 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/env.py +246 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/python.py +460 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/table.py +155 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/expressions.py +8870 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/generator.py +4993 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/helper.py +582 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/jsonpath.py +227 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/lineage.py +423 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/__init__.py +11 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/annotate_types.py +589 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/canonicalize.py +222 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/eliminate_ctes.py +43 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/eliminate_joins.py +181 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/eliminate_subqueries.py +189 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/isolate_table_selects.py +50 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/merge_subqueries.py +415 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/normalize.py +200 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/normalize_identifiers.py +64 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/optimize_joins.py +91 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/optimizer.py +94 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/pushdown_predicates.py +222 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/pushdown_projections.py +172 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/qualify.py +104 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/qualify_columns.py +1024 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/qualify_tables.py +155 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/scope.py +904 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/simplify.py +1587 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/unnest_subqueries.py +302 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/parser.py +8501 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/planner.py +463 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/schema.py +588 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/serde.py +68 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/time.py +687 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/tokens.py +1520 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/transforms.py +1020 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/trie.py +81 -0
- package/dbt-tools/dist/altimate_python_packages/dbt_core_integration.py +825 -0
- package/dbt-tools/dist/altimate_python_packages/dbt_utils.py +157 -0
- package/dbt-tools/dist/index.js +23859 -0
- package/package.json +13 -13
- package/postinstall.mjs +42 -0
- package/skills/altimate-setup/SKILL.md +31 -0
|
@@ -0,0 +1,582 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import datetime
|
|
4
|
+
import inspect
|
|
5
|
+
import logging
|
|
6
|
+
import re
|
|
7
|
+
import sys
|
|
8
|
+
import typing as t
|
|
9
|
+
from collections.abc import Collection, Set
|
|
10
|
+
from contextlib import contextmanager
|
|
11
|
+
from copy import copy
|
|
12
|
+
from difflib import get_close_matches
|
|
13
|
+
from enum import Enum
|
|
14
|
+
from itertools import count
|
|
15
|
+
|
|
16
|
+
if t.TYPE_CHECKING:
|
|
17
|
+
from sqlglot import exp
|
|
18
|
+
from sqlglot._typing import A, E, T
|
|
19
|
+
from sqlglot.dialects.dialect import DialectType
|
|
20
|
+
from sqlglot.expressions import Expression
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
CAMEL_CASE_PATTERN = re.compile("(?<!^)(?=[A-Z])")
|
|
24
|
+
PYTHON_VERSION = sys.version_info[:2]
|
|
25
|
+
logger = logging.getLogger("sqlglot")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class AutoName(Enum):
|
|
29
|
+
"""
|
|
30
|
+
This is used for creating Enum classes where `auto()` is the string form
|
|
31
|
+
of the corresponding enum's identifier (e.g. FOO.value results in "FOO").
|
|
32
|
+
|
|
33
|
+
Reference: https://docs.python.org/3/howto/enum.html#using-automatic-values
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
def _generate_next_value_(name, _start, _count, _last_values):
|
|
37
|
+
return name
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class classproperty(property):
|
|
41
|
+
"""
|
|
42
|
+
Similar to a normal property but works for class methods
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
def __get__(self, obj: t.Any, owner: t.Any = None) -> t.Any:
|
|
46
|
+
return classmethod(self.fget).__get__(None, owner)() # type: ignore
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def suggest_closest_match_and_fail(
|
|
50
|
+
kind: str,
|
|
51
|
+
word: str,
|
|
52
|
+
possibilities: t.Iterable[str],
|
|
53
|
+
) -> None:
|
|
54
|
+
close_matches = get_close_matches(word, possibilities, n=1)
|
|
55
|
+
|
|
56
|
+
similar = seq_get(close_matches, 0) or ""
|
|
57
|
+
if similar:
|
|
58
|
+
similar = f" Did you mean {similar}?"
|
|
59
|
+
|
|
60
|
+
raise ValueError(f"Unknown {kind} '{word}'.{similar}")
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def seq_get(seq: t.Sequence[T], index: int) -> t.Optional[T]:
|
|
64
|
+
"""Returns the value in `seq` at position `index`, or `None` if `index` is out of bounds."""
|
|
65
|
+
try:
|
|
66
|
+
return seq[index]
|
|
67
|
+
except IndexError:
|
|
68
|
+
return None
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
@t.overload
|
|
72
|
+
def ensure_list(value: t.Collection[T]) -> t.List[T]: ...
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
@t.overload
|
|
76
|
+
def ensure_list(value: None) -> t.List: ...
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
@t.overload
|
|
80
|
+
def ensure_list(value: T) -> t.List[T]: ...
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def ensure_list(value):
|
|
84
|
+
"""
|
|
85
|
+
Ensures that a value is a list, otherwise casts or wraps it into one.
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
value: The value of interest.
|
|
89
|
+
|
|
90
|
+
Returns:
|
|
91
|
+
The value cast as a list if it's a list or a tuple, or else the value wrapped in a list.
|
|
92
|
+
"""
|
|
93
|
+
if value is None:
|
|
94
|
+
return []
|
|
95
|
+
if isinstance(value, (list, tuple)):
|
|
96
|
+
return list(value)
|
|
97
|
+
|
|
98
|
+
return [value]
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
@t.overload
|
|
102
|
+
def ensure_collection(value: t.Collection[T]) -> t.Collection[T]: ...
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
@t.overload
|
|
106
|
+
def ensure_collection(value: T) -> t.Collection[T]: ...
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def ensure_collection(value):
|
|
110
|
+
"""
|
|
111
|
+
Ensures that a value is a collection (excluding `str` and `bytes`), otherwise wraps it into a list.
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
value: The value of interest.
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
The value if it's a collection, or else the value wrapped in a list.
|
|
118
|
+
"""
|
|
119
|
+
if value is None:
|
|
120
|
+
return []
|
|
121
|
+
return (
|
|
122
|
+
value if isinstance(value, Collection) and not isinstance(value, (str, bytes)) else [value]
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def csv(*args: str, sep: str = ", ") -> str:
|
|
127
|
+
"""
|
|
128
|
+
Formats any number of string arguments as CSV.
|
|
129
|
+
|
|
130
|
+
Args:
|
|
131
|
+
args: The string arguments to format.
|
|
132
|
+
sep: The argument separator.
|
|
133
|
+
|
|
134
|
+
Returns:
|
|
135
|
+
The arguments formatted as a CSV string.
|
|
136
|
+
"""
|
|
137
|
+
return sep.join(arg for arg in args if arg)
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def subclasses(
|
|
141
|
+
module_name: str,
|
|
142
|
+
classes: t.Type | t.Tuple[t.Type, ...],
|
|
143
|
+
exclude: t.Type | t.Tuple[t.Type, ...] = (),
|
|
144
|
+
) -> t.List[t.Type]:
|
|
145
|
+
"""
|
|
146
|
+
Returns all subclasses for a collection of classes, possibly excluding some of them.
|
|
147
|
+
|
|
148
|
+
Args:
|
|
149
|
+
module_name: The name of the module to search for subclasses in.
|
|
150
|
+
classes: Class(es) we want to find the subclasses of.
|
|
151
|
+
exclude: Class(es) we want to exclude from the returned list.
|
|
152
|
+
|
|
153
|
+
Returns:
|
|
154
|
+
The target subclasses.
|
|
155
|
+
"""
|
|
156
|
+
return [
|
|
157
|
+
obj
|
|
158
|
+
for _, obj in inspect.getmembers(
|
|
159
|
+
sys.modules[module_name],
|
|
160
|
+
lambda obj: inspect.isclass(obj) and issubclass(obj, classes) and obj not in exclude,
|
|
161
|
+
)
|
|
162
|
+
]
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def apply_index_offset(
|
|
166
|
+
this: exp.Expression,
|
|
167
|
+
expressions: t.List[E],
|
|
168
|
+
offset: int,
|
|
169
|
+
dialect: DialectType = None,
|
|
170
|
+
) -> t.List[E]:
|
|
171
|
+
"""
|
|
172
|
+
Applies an offset to a given integer literal expression.
|
|
173
|
+
|
|
174
|
+
Args:
|
|
175
|
+
this: The target of the index.
|
|
176
|
+
expressions: The expression the offset will be applied to, wrapped in a list.
|
|
177
|
+
offset: The offset that will be applied.
|
|
178
|
+
dialect: the dialect of interest.
|
|
179
|
+
|
|
180
|
+
Returns:
|
|
181
|
+
The original expression with the offset applied to it, wrapped in a list. If the provided
|
|
182
|
+
`expressions` argument contains more than one expression, it's returned unaffected.
|
|
183
|
+
"""
|
|
184
|
+
if not offset or len(expressions) != 1:
|
|
185
|
+
return expressions
|
|
186
|
+
|
|
187
|
+
expression = expressions[0]
|
|
188
|
+
|
|
189
|
+
from sqlglot import exp
|
|
190
|
+
from sqlglot.optimizer.annotate_types import annotate_types
|
|
191
|
+
from sqlglot.optimizer.simplify import simplify
|
|
192
|
+
|
|
193
|
+
if not this.type:
|
|
194
|
+
annotate_types(this, dialect=dialect)
|
|
195
|
+
|
|
196
|
+
if t.cast(exp.DataType, this.type).this not in (
|
|
197
|
+
exp.DataType.Type.UNKNOWN,
|
|
198
|
+
exp.DataType.Type.ARRAY,
|
|
199
|
+
):
|
|
200
|
+
return expressions
|
|
201
|
+
|
|
202
|
+
if not expression.type:
|
|
203
|
+
annotate_types(expression, dialect=dialect)
|
|
204
|
+
|
|
205
|
+
if t.cast(exp.DataType, expression.type).this in exp.DataType.INTEGER_TYPES:
|
|
206
|
+
logger.info("Applying array index offset (%s)", offset)
|
|
207
|
+
expression = simplify(expression + offset)
|
|
208
|
+
return [expression]
|
|
209
|
+
|
|
210
|
+
return expressions
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def camel_to_snake_case(name: str) -> str:
|
|
214
|
+
"""Converts `name` from camelCase to snake_case and returns the result."""
|
|
215
|
+
return CAMEL_CASE_PATTERN.sub("_", name).upper()
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def while_changing(expression: Expression, func: t.Callable[[Expression], E]) -> E:
|
|
219
|
+
"""
|
|
220
|
+
Applies a transformation to a given expression until a fix point is reached.
|
|
221
|
+
|
|
222
|
+
Args:
|
|
223
|
+
expression: The expression to be transformed.
|
|
224
|
+
func: The transformation to be applied.
|
|
225
|
+
|
|
226
|
+
Returns:
|
|
227
|
+
The transformed expression.
|
|
228
|
+
"""
|
|
229
|
+
end_hash: t.Optional[int] = None
|
|
230
|
+
|
|
231
|
+
while True:
|
|
232
|
+
# No need to walk the AST– we've already cached the hashes in the previous iteration
|
|
233
|
+
if end_hash is None:
|
|
234
|
+
for n in reversed(tuple(expression.walk())):
|
|
235
|
+
n._hash = hash(n)
|
|
236
|
+
|
|
237
|
+
start_hash = hash(expression)
|
|
238
|
+
expression = func(expression)
|
|
239
|
+
|
|
240
|
+
expression_nodes = tuple(expression.walk())
|
|
241
|
+
|
|
242
|
+
# Uncache previous caches so we can recompute them
|
|
243
|
+
for n in reversed(expression_nodes):
|
|
244
|
+
n._hash = None
|
|
245
|
+
n._hash = hash(n)
|
|
246
|
+
|
|
247
|
+
end_hash = hash(expression)
|
|
248
|
+
|
|
249
|
+
if start_hash == end_hash:
|
|
250
|
+
# ... and reset the hash so we don't risk it becoming out of date if a mutation happens
|
|
251
|
+
for n in expression_nodes:
|
|
252
|
+
n._hash = None
|
|
253
|
+
|
|
254
|
+
break
|
|
255
|
+
|
|
256
|
+
return expression
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
def tsort(dag: t.Dict[T, t.Set[T]]) -> t.List[T]:
|
|
260
|
+
"""
|
|
261
|
+
Sorts a given directed acyclic graph in topological order.
|
|
262
|
+
|
|
263
|
+
Args:
|
|
264
|
+
dag: The graph to be sorted.
|
|
265
|
+
|
|
266
|
+
Returns:
|
|
267
|
+
A list that contains all of the graph's nodes in topological order.
|
|
268
|
+
"""
|
|
269
|
+
result = []
|
|
270
|
+
|
|
271
|
+
for node, deps in tuple(dag.items()):
|
|
272
|
+
for dep in deps:
|
|
273
|
+
if dep not in dag:
|
|
274
|
+
dag[dep] = set()
|
|
275
|
+
|
|
276
|
+
while dag:
|
|
277
|
+
current = {node for node, deps in dag.items() if not deps}
|
|
278
|
+
|
|
279
|
+
if not current:
|
|
280
|
+
raise ValueError("Cycle error")
|
|
281
|
+
|
|
282
|
+
for node in current:
|
|
283
|
+
dag.pop(node)
|
|
284
|
+
|
|
285
|
+
for deps in dag.values():
|
|
286
|
+
deps -= current
|
|
287
|
+
|
|
288
|
+
result.extend(sorted(current)) # type: ignore
|
|
289
|
+
|
|
290
|
+
return result
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
def open_file(file_name: str) -> t.TextIO:
|
|
294
|
+
"""Open a file that may be compressed as gzip and return it in universal newline mode."""
|
|
295
|
+
with open(file_name, "rb") as f:
|
|
296
|
+
gzipped = f.read(2) == b"\x1f\x8b"
|
|
297
|
+
|
|
298
|
+
if gzipped:
|
|
299
|
+
import gzip
|
|
300
|
+
|
|
301
|
+
return gzip.open(file_name, "rt", newline="")
|
|
302
|
+
|
|
303
|
+
return open(file_name, encoding="utf-8", newline="")
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
@contextmanager
|
|
307
|
+
def csv_reader(read_csv: exp.ReadCSV) -> t.Any:
|
|
308
|
+
"""
|
|
309
|
+
Returns a csv reader given the expression `READ_CSV(name, ['delimiter', '|', ...])`.
|
|
310
|
+
|
|
311
|
+
Args:
|
|
312
|
+
read_csv: A `ReadCSV` function call.
|
|
313
|
+
|
|
314
|
+
Yields:
|
|
315
|
+
A python csv reader.
|
|
316
|
+
"""
|
|
317
|
+
args = read_csv.expressions
|
|
318
|
+
file = open_file(read_csv.name)
|
|
319
|
+
|
|
320
|
+
delimiter = ","
|
|
321
|
+
args = iter(arg.name for arg in args) # type: ignore
|
|
322
|
+
for k, v in zip(args, args):
|
|
323
|
+
if k == "delimiter":
|
|
324
|
+
delimiter = v
|
|
325
|
+
|
|
326
|
+
try:
|
|
327
|
+
import csv as csv_
|
|
328
|
+
|
|
329
|
+
yield csv_.reader(file, delimiter=delimiter)
|
|
330
|
+
finally:
|
|
331
|
+
file.close()
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
def find_new_name(taken: t.Collection[str], base: str) -> str:
|
|
335
|
+
"""
|
|
336
|
+
Searches for a new name.
|
|
337
|
+
|
|
338
|
+
Args:
|
|
339
|
+
taken: A collection of taken names.
|
|
340
|
+
base: Base name to alter.
|
|
341
|
+
|
|
342
|
+
Returns:
|
|
343
|
+
The new, available name.
|
|
344
|
+
"""
|
|
345
|
+
if base not in taken:
|
|
346
|
+
return base
|
|
347
|
+
|
|
348
|
+
i = 2
|
|
349
|
+
new = f"{base}_{i}"
|
|
350
|
+
while new in taken:
|
|
351
|
+
i += 1
|
|
352
|
+
new = f"{base}_{i}"
|
|
353
|
+
|
|
354
|
+
return new
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
def is_int(text: str) -> bool:
|
|
358
|
+
return is_type(text, int)
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
def is_float(text: str) -> bool:
|
|
362
|
+
return is_type(text, float)
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
def is_type(text: str, target_type: t.Type) -> bool:
|
|
366
|
+
try:
|
|
367
|
+
target_type(text)
|
|
368
|
+
return True
|
|
369
|
+
except ValueError:
|
|
370
|
+
return False
|
|
371
|
+
|
|
372
|
+
|
|
373
|
+
def name_sequence(prefix: str) -> t.Callable[[], str]:
|
|
374
|
+
"""Returns a name generator given a prefix (e.g. a0, a1, a2, ... if the prefix is "a")."""
|
|
375
|
+
sequence = count()
|
|
376
|
+
return lambda: f"{prefix}{next(sequence)}"
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
def object_to_dict(obj: t.Any, **kwargs) -> t.Dict:
|
|
380
|
+
"""Returns a dictionary created from an object's attributes."""
|
|
381
|
+
return {
|
|
382
|
+
**{k: v.copy() if hasattr(v, "copy") else copy(v) for k, v in vars(obj).items()},
|
|
383
|
+
**kwargs,
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
|
|
387
|
+
def split_num_words(
|
|
388
|
+
value: str, sep: str, min_num_words: int, fill_from_start: bool = True
|
|
389
|
+
) -> t.List[t.Optional[str]]:
|
|
390
|
+
"""
|
|
391
|
+
Perform a split on a value and return N words as a result with `None` used for words that don't exist.
|
|
392
|
+
|
|
393
|
+
Args:
|
|
394
|
+
value: The value to be split.
|
|
395
|
+
sep: The value to use to split on.
|
|
396
|
+
min_num_words: The minimum number of words that are going to be in the result.
|
|
397
|
+
fill_from_start: Indicates that if `None` values should be inserted at the start or end of the list.
|
|
398
|
+
|
|
399
|
+
Examples:
|
|
400
|
+
>>> split_num_words("db.table", ".", 3)
|
|
401
|
+
[None, 'db', 'table']
|
|
402
|
+
>>> split_num_words("db.table", ".", 3, fill_from_start=False)
|
|
403
|
+
['db', 'table', None]
|
|
404
|
+
>>> split_num_words("db.table", ".", 1)
|
|
405
|
+
['db', 'table']
|
|
406
|
+
|
|
407
|
+
Returns:
|
|
408
|
+
The list of words returned by `split`, possibly augmented by a number of `None` values.
|
|
409
|
+
"""
|
|
410
|
+
words = value.split(sep)
|
|
411
|
+
if fill_from_start:
|
|
412
|
+
return [None] * (min_num_words - len(words)) + words
|
|
413
|
+
return words + [None] * (min_num_words - len(words))
|
|
414
|
+
|
|
415
|
+
|
|
416
|
+
def is_iterable(value: t.Any) -> bool:
|
|
417
|
+
"""
|
|
418
|
+
Checks if the value is an iterable, excluding the types `str` and `bytes`.
|
|
419
|
+
|
|
420
|
+
Examples:
|
|
421
|
+
>>> is_iterable([1,2])
|
|
422
|
+
True
|
|
423
|
+
>>> is_iterable("test")
|
|
424
|
+
False
|
|
425
|
+
|
|
426
|
+
Args:
|
|
427
|
+
value: The value to check if it is an iterable.
|
|
428
|
+
|
|
429
|
+
Returns:
|
|
430
|
+
A `bool` value indicating if it is an iterable.
|
|
431
|
+
"""
|
|
432
|
+
from sqlglot import Expression
|
|
433
|
+
|
|
434
|
+
return hasattr(value, "__iter__") and not isinstance(value, (str, bytes, Expression))
|
|
435
|
+
|
|
436
|
+
|
|
437
|
+
def flatten(values: t.Iterable[t.Iterable[t.Any] | t.Any]) -> t.Iterator[t.Any]:
|
|
438
|
+
"""
|
|
439
|
+
Flattens an iterable that can contain both iterable and non-iterable elements. Objects of
|
|
440
|
+
type `str` and `bytes` are not regarded as iterables.
|
|
441
|
+
|
|
442
|
+
Examples:
|
|
443
|
+
>>> list(flatten([[1, 2], 3, {4}, (5, "bla")]))
|
|
444
|
+
[1, 2, 3, 4, 5, 'bla']
|
|
445
|
+
>>> list(flatten([1, 2, 3]))
|
|
446
|
+
[1, 2, 3]
|
|
447
|
+
|
|
448
|
+
Args:
|
|
449
|
+
values: The value to be flattened.
|
|
450
|
+
|
|
451
|
+
Yields:
|
|
452
|
+
Non-iterable elements in `values`.
|
|
453
|
+
"""
|
|
454
|
+
for value in values:
|
|
455
|
+
if is_iterable(value):
|
|
456
|
+
yield from flatten(value)
|
|
457
|
+
else:
|
|
458
|
+
yield value
|
|
459
|
+
|
|
460
|
+
|
|
461
|
+
def dict_depth(d: t.Dict) -> int:
|
|
462
|
+
"""
|
|
463
|
+
Get the nesting depth of a dictionary.
|
|
464
|
+
|
|
465
|
+
Example:
|
|
466
|
+
>>> dict_depth(None)
|
|
467
|
+
0
|
|
468
|
+
>>> dict_depth({})
|
|
469
|
+
1
|
|
470
|
+
>>> dict_depth({"a": "b"})
|
|
471
|
+
1
|
|
472
|
+
>>> dict_depth({"a": {}})
|
|
473
|
+
2
|
|
474
|
+
>>> dict_depth({"a": {"b": {}}})
|
|
475
|
+
3
|
|
476
|
+
"""
|
|
477
|
+
try:
|
|
478
|
+
return 1 + dict_depth(next(iter(d.values())))
|
|
479
|
+
except AttributeError:
|
|
480
|
+
# d doesn't have attribute "values"
|
|
481
|
+
return 0
|
|
482
|
+
except StopIteration:
|
|
483
|
+
# d.values() returns an empty sequence
|
|
484
|
+
return 1
|
|
485
|
+
|
|
486
|
+
|
|
487
|
+
def first(it: t.Iterable[T]) -> T:
|
|
488
|
+
"""Returns the first element from an iterable (useful for sets)."""
|
|
489
|
+
return next(i for i in it)
|
|
490
|
+
|
|
491
|
+
|
|
492
|
+
def to_bool(value: t.Optional[str | bool]) -> t.Optional[str | bool]:
|
|
493
|
+
if isinstance(value, bool) or value is None:
|
|
494
|
+
return value
|
|
495
|
+
|
|
496
|
+
# Coerce the value to boolean if it matches to the truthy/falsy values below
|
|
497
|
+
value_lower = value.lower()
|
|
498
|
+
if value_lower in ("true", "1"):
|
|
499
|
+
return True
|
|
500
|
+
if value_lower in ("false", "0"):
|
|
501
|
+
return False
|
|
502
|
+
|
|
503
|
+
return value
|
|
504
|
+
|
|
505
|
+
|
|
506
|
+
def merge_ranges(ranges: t.List[t.Tuple[A, A]]) -> t.List[t.Tuple[A, A]]:
|
|
507
|
+
"""
|
|
508
|
+
Merges a sequence of ranges, represented as tuples (low, high) whose values
|
|
509
|
+
belong to some totally-ordered set.
|
|
510
|
+
|
|
511
|
+
Example:
|
|
512
|
+
>>> merge_ranges([(1, 3), (2, 6)])
|
|
513
|
+
[(1, 6)]
|
|
514
|
+
"""
|
|
515
|
+
if not ranges:
|
|
516
|
+
return []
|
|
517
|
+
|
|
518
|
+
ranges = sorted(ranges)
|
|
519
|
+
|
|
520
|
+
merged = [ranges[0]]
|
|
521
|
+
|
|
522
|
+
for start, end in ranges[1:]:
|
|
523
|
+
last_start, last_end = merged[-1]
|
|
524
|
+
|
|
525
|
+
if start <= last_end:
|
|
526
|
+
merged[-1] = (last_start, max(last_end, end))
|
|
527
|
+
else:
|
|
528
|
+
merged.append((start, end))
|
|
529
|
+
|
|
530
|
+
return merged
|
|
531
|
+
|
|
532
|
+
|
|
533
|
+
def is_iso_date(text: str) -> bool:
|
|
534
|
+
try:
|
|
535
|
+
datetime.date.fromisoformat(text)
|
|
536
|
+
return True
|
|
537
|
+
except ValueError:
|
|
538
|
+
return False
|
|
539
|
+
|
|
540
|
+
|
|
541
|
+
def is_iso_datetime(text: str) -> bool:
|
|
542
|
+
try:
|
|
543
|
+
datetime.datetime.fromisoformat(text)
|
|
544
|
+
return True
|
|
545
|
+
except ValueError:
|
|
546
|
+
return False
|
|
547
|
+
|
|
548
|
+
|
|
549
|
+
# Interval units that operate on date components
|
|
550
|
+
DATE_UNITS = {"day", "week", "month", "quarter", "year", "year_month"}
|
|
551
|
+
|
|
552
|
+
|
|
553
|
+
def is_date_unit(expression: t.Optional[exp.Expression]) -> bool:
|
|
554
|
+
return expression is not None and expression.name.lower() in DATE_UNITS
|
|
555
|
+
|
|
556
|
+
|
|
557
|
+
K = t.TypeVar("K")
|
|
558
|
+
V = t.TypeVar("V")
|
|
559
|
+
|
|
560
|
+
|
|
561
|
+
class SingleValuedMapping(t.Mapping[K, V]):
|
|
562
|
+
"""
|
|
563
|
+
Mapping where all keys return the same value.
|
|
564
|
+
|
|
565
|
+
This rigamarole is meant to avoid copying keys, which was originally intended
|
|
566
|
+
as an optimization while qualifying columns for tables with lots of columns.
|
|
567
|
+
"""
|
|
568
|
+
|
|
569
|
+
def __init__(self, keys: t.Collection[K], value: V):
|
|
570
|
+
self._keys = keys if isinstance(keys, Set) else set(keys)
|
|
571
|
+
self._value = value
|
|
572
|
+
|
|
573
|
+
def __getitem__(self, key: K) -> V:
|
|
574
|
+
if key in self._keys:
|
|
575
|
+
return self._value
|
|
576
|
+
raise KeyError(key)
|
|
577
|
+
|
|
578
|
+
def __len__(self) -> int:
|
|
579
|
+
return len(self._keys)
|
|
580
|
+
|
|
581
|
+
def __iter__(self) -> t.Iterator[K]:
|
|
582
|
+
return iter(self._keys)
|