fakesnow 0.9.38__py3-none-any.whl → 0.9.40__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fakesnow/checks.py +2 -2
- fakesnow/copy_into.py +194 -29
- fakesnow/cursor.py +37 -10
- fakesnow/info_schema.py +45 -0
- fakesnow/macros.py +11 -0
- fakesnow/server.py +15 -0
- fakesnow/transforms/__init__.py +7 -3
- fakesnow/transforms/show.py +263 -173
- fakesnow/transforms/stage.py +163 -0
- fakesnow/transforms/transforms.py +31 -42
- fakesnow/variables.py +3 -1
- {fakesnow-0.9.38.dist-info → fakesnow-0.9.40.dist-info}/METADATA +2 -2
- {fakesnow-0.9.38.dist-info → fakesnow-0.9.40.dist-info}/RECORD +17 -16
- {fakesnow-0.9.38.dist-info → fakesnow-0.9.40.dist-info}/WHEEL +1 -1
- {fakesnow-0.9.38.dist-info → fakesnow-0.9.40.dist-info}/entry_points.txt +0 -0
- {fakesnow-0.9.38.dist-info → fakesnow-0.9.40.dist-info}/licenses/LICENSE +0 -0
- {fakesnow-0.9.38.dist-info → fakesnow-0.9.40.dist-info}/top_level.txt +0 -0
fakesnow/checks.py
CHANGED
@@ -39,8 +39,8 @@ def is_unqualified_table_expression(expression: exp.Expression) -> tuple[bool, b
|
|
39
39
|
# "CREATE/DROP SCHEMA"
|
40
40
|
no_database = not node.args.get("catalog")
|
41
41
|
no_schema = False
|
42
|
-
elif parent_kind.upper() in {"TABLE", "VIEW"}:
|
43
|
-
# "CREATE/DROP TABLE/VIEW"
|
42
|
+
elif parent_kind.upper() in {"TABLE", "VIEW", "STAGE"}:
|
43
|
+
# "CREATE/DROP TABLE/VIEW/STAGE"
|
44
44
|
no_database = not node.args.get("catalog")
|
45
45
|
no_schema = not node.args.get("db")
|
46
46
|
else:
|
fakesnow/copy_into.py
CHANGED
@@ -1,8 +1,9 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
+
import datetime
|
3
4
|
from collections.abc import Sequence
|
4
5
|
from dataclasses import dataclass, field
|
5
|
-
from typing import Any, Protocol, cast
|
6
|
+
from typing import Any, NamedTuple, Protocol, cast
|
6
7
|
from urllib.parse import urlparse, urlunparse
|
7
8
|
|
8
9
|
import duckdb
|
@@ -10,30 +11,125 @@ import snowflake.connector.errors
|
|
10
11
|
from duckdb import DuckDBPyConnection
|
11
12
|
from sqlglot import exp
|
12
13
|
|
14
|
+
import fakesnow.transforms.stage as stage
|
13
15
|
from fakesnow import logger
|
14
16
|
|
15
17
|
|
18
|
+
class LoadHistoryRecord(NamedTuple):
|
19
|
+
"""Represents a record in the INFORMATION_SCHEMA.LOAD_HISTORY table."""
|
20
|
+
|
21
|
+
schema_name: str
|
22
|
+
file_name: str
|
23
|
+
table_name: str
|
24
|
+
last_load_time: str # ISO8601 datetime with timezone
|
25
|
+
status: str
|
26
|
+
row_count: int
|
27
|
+
row_parsed: int
|
28
|
+
first_error_message: str | None
|
29
|
+
first_error_line_number: int | None
|
30
|
+
first_error_character_position: int | None
|
31
|
+
first_error_col_name: str | None
|
32
|
+
error_count: int
|
33
|
+
error_limit: int | None
|
34
|
+
|
35
|
+
|
16
36
|
def copy_into(
|
17
|
-
duck_conn: DuckDBPyConnection,
|
37
|
+
duck_conn: DuckDBPyConnection,
|
38
|
+
current_database: str | None,
|
39
|
+
current_schema: str | None,
|
40
|
+
expr: exp.Copy,
|
41
|
+
params: Sequence[Any] | dict[Any, Any] | None = None,
|
18
42
|
) -> str:
|
19
43
|
cparams = _params(expr)
|
20
|
-
|
44
|
+
if isinstance(cparams.file_format, ReadParquet):
|
45
|
+
from_ = expr.args["files"][0]
|
46
|
+
# parquet must use MATCH_BY_COLUMN_NAME (TODO) or a copy transformation
|
47
|
+
# ie: the from clause in COPY INTO must be a subquery
|
48
|
+
if not isinstance(from_, exp.Subquery):
|
49
|
+
raise snowflake.connector.errors.ProgrammingError(
|
50
|
+
msg="SQL compilation error:\nPARQUET file format can produce one and only one column of type variant, object, or array. Load data into separate columns using the MATCH_BY_COLUMN_NAME copy option or copy with transformation.", # noqa: E501
|
51
|
+
errno=2019,
|
52
|
+
sqlstate="0A000",
|
53
|
+
)
|
54
|
+
|
55
|
+
from_source = _from_source(expr)
|
56
|
+
source = (
|
57
|
+
stage_url_from_var(from_source, duck_conn, current_database, current_schema)
|
58
|
+
if from_source.startswith("@")
|
59
|
+
else from_source
|
60
|
+
)
|
61
|
+
urls = _source_urls(source, cparams.files)
|
62
|
+
|
21
63
|
inserts = _inserts(expr, cparams, urls)
|
64
|
+
table = expr.this
|
65
|
+
if isinstance(expr.this, exp.Table):
|
66
|
+
table = expr.this
|
67
|
+
elif isinstance(expr.this, exp.Schema) and isinstance(expr.this.this, exp.Table):
|
68
|
+
table = expr.this.this
|
69
|
+
else:
|
70
|
+
raise AssertionError(f"copy into {expr.this.__class__} is not Table or Schema")
|
71
|
+
|
72
|
+
schema = table.db or current_schema
|
73
|
+
assert schema
|
22
74
|
|
23
|
-
|
75
|
+
histories: list[LoadHistoryRecord] = []
|
76
|
+
load_time = datetime.datetime.now(datetime.timezone.utc).isoformat()
|
24
77
|
try:
|
25
|
-
|
78
|
+
check_sql = "SELECT 1 FROM _fs_information_schema._fs_load_history WHERE FILE_NAME = ? LIMIT 1"
|
79
|
+
|
26
80
|
for i, url in zip(inserts, urls):
|
27
|
-
|
28
|
-
|
81
|
+
# Check if file has been loaded into any table before
|
82
|
+
duck_conn.execute(check_sql, [url])
|
83
|
+
if duck_conn.fetchone() and not cparams.force:
|
84
|
+
affected_count = 0
|
85
|
+
status = "LOAD_SKIPPED"
|
86
|
+
error_limit = None
|
87
|
+
error_count = 1
|
88
|
+
first_error_message = "File was loaded before."
|
89
|
+
else:
|
90
|
+
sql = i.sql(dialect="duckdb")
|
91
|
+
logger.log_sql(sql, params)
|
92
|
+
duck_conn.execute(sql, params)
|
93
|
+
(affected_count,) = duck_conn.fetchall()[0]
|
94
|
+
status = "LOADED"
|
95
|
+
error_limit = 1
|
96
|
+
error_count = 0
|
97
|
+
first_error_message = None
|
98
|
+
|
99
|
+
history = LoadHistoryRecord(
|
100
|
+
schema_name=schema,
|
101
|
+
file_name=url,
|
102
|
+
table_name=table.name,
|
103
|
+
last_load_time=load_time,
|
104
|
+
status=status,
|
105
|
+
row_count=affected_count,
|
106
|
+
row_parsed=affected_count,
|
107
|
+
first_error_message=first_error_message,
|
108
|
+
first_error_line_number=None,
|
109
|
+
first_error_character_position=None,
|
110
|
+
first_error_col_name=None,
|
111
|
+
error_count=error_count,
|
112
|
+
error_limit=error_limit,
|
113
|
+
)
|
114
|
+
histories.append(history)
|
115
|
+
|
116
|
+
if insert_histories := [h for h in histories if h.status != "LOAD_SKIPPED"]:
|
117
|
+
values = "\n ,".join(str(tuple(history)).replace("None", "NULL") for history in insert_histories)
|
118
|
+
sql = f"INSERT INTO _fs_information_schema._fs_load_history VALUES {values}"
|
29
119
|
duck_conn.execute(sql, params)
|
30
|
-
(affected_count,) = duck_conn.fetchall()[0]
|
31
|
-
results.append(f"('{url}', 'LOADED', {affected_count}, {affected_count}, 1, 0, NULL, NULL, NULL, NULL)")
|
32
120
|
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
121
|
+
columns = (
|
122
|
+
"file, status, rows_parsed, rows_loaded, error_limit, errors_seen, first_error, first_error_line, "
|
123
|
+
"first_error_character, first_error_column_name"
|
124
|
+
)
|
125
|
+
values = "\n, ".join(
|
126
|
+
f"('{h.file_name}', '{h.status}', {h.row_parsed}, {h.row_count}, "
|
127
|
+
f"{h.error_limit or 'NULL'}, {h.error_count}, "
|
128
|
+
f"{repr(h.first_error_message) if h.first_error_message else 'NULL'}, "
|
129
|
+
f"{h.first_error_line_number or 'NULL'}, {h.first_error_character_position or 'NULL'}, "
|
130
|
+
f"{h.first_error_col_name or 'NULL'})"
|
131
|
+
for h in histories
|
132
|
+
)
|
37
133
|
sql = f"SELECT * FROM (VALUES\n {values}\n) AS t({columns})"
|
38
134
|
duck_conn.execute(sql)
|
39
135
|
return sql
|
@@ -62,6 +158,8 @@ def _params(expr: exp.Copy) -> Params:
|
|
62
158
|
|
63
159
|
if var_type == "CSV":
|
64
160
|
kwargs["file_format"] = handle_csv(param.expressions)
|
161
|
+
elif var_type == "PARQUET":
|
162
|
+
kwargs["file_format"] = ReadParquet()
|
65
163
|
else:
|
66
164
|
raise NotImplementedError(f"{var_type} FILE_FORMAT is not currently implemented")
|
67
165
|
elif var == "FORCE":
|
@@ -71,28 +169,67 @@ def _params(expr: exp.Copy) -> Params:
|
|
71
169
|
else:
|
72
170
|
raise ValueError(f"Unknown copy parameter: {param.this}")
|
73
171
|
|
74
|
-
|
75
|
-
raise NotImplementedError("COPY INTO with FORCE=false (default) is not currently implemented")
|
172
|
+
return Params(force=force, **kwargs)
|
76
173
|
|
77
|
-
return Params(**kwargs)
|
78
174
|
|
175
|
+
def _from_source(expr: exp.Copy) -> str:
|
176
|
+
# NB: sqlglot parses the from clause as "files" strangely
|
177
|
+
from_ = expr.args["files"][0].this
|
178
|
+
|
179
|
+
if isinstance(from_, exp.Select):
|
180
|
+
from_table = from_.args["from"]
|
181
|
+
# if a subquery is used in the FROM clause it must be loaded from a stage not an external location
|
182
|
+
assert isinstance(from_table, exp.From), f"{from_table.__class__} is not a From"
|
183
|
+
assert isinstance(from_table.this, exp.Table), f"{from_table.__class__} is not a Table"
|
184
|
+
var = from_table.this.this
|
185
|
+
if not isinstance(var, exp.Var):
|
186
|
+
# not a very helpful message, but this is what Snowflake returns
|
187
|
+
raise snowflake.connector.errors.ProgrammingError(
|
188
|
+
msg=f"SQL compilation error:\ninvalid URL prefix found in: {from_table.this.this}",
|
189
|
+
errno=1011,
|
190
|
+
sqlstate="42601",
|
191
|
+
)
|
192
|
+
# return the name of the stage, eg: @stage1
|
193
|
+
return var.this
|
194
|
+
|
195
|
+
assert isinstance(from_, exp.Literal), f"{from_} is not a exp.Literal"
|
196
|
+
# return url
|
197
|
+
return from_.name
|
198
|
+
|
199
|
+
|
200
|
+
def stage_url_from_var(
|
201
|
+
from_source: str, duck_conn: DuckDBPyConnection, current_database: str | None, current_schema: str | None
|
202
|
+
) -> str:
|
203
|
+
database_name, schema_name, name = stage.parts_from_var(from_source, current_database, current_schema)
|
204
|
+
|
205
|
+
# Look up the stage URL
|
206
|
+
duck_conn.execute(
|
207
|
+
"""
|
208
|
+
SELECT url FROM _fs_global._fs_information_schema._fs_stages
|
209
|
+
WHERE database_name = ? and schema_name = ? and name = ?
|
210
|
+
""",
|
211
|
+
(database_name, schema_name, name),
|
212
|
+
)
|
213
|
+
if result := duck_conn.fetchone():
|
214
|
+
return result[0]
|
215
|
+
else:
|
216
|
+
raise snowflake.connector.errors.ProgrammingError(
|
217
|
+
msg=f"SQL compilation error:\nStage '{database_name}.{schema_name}.{name}' does not exist or not authorized.", # noqa: E501
|
218
|
+
errno=2003,
|
219
|
+
sqlstate="02000",
|
220
|
+
)
|
79
221
|
|
80
|
-
def _source_urls(expr: exp.Copy, files: list[str]) -> list[str]:
|
81
|
-
"""
|
82
|
-
Given a COPY statement and a list of files, return a list of URLs with each file appended as a fragment.
|
83
|
-
Checks that the source is a valid URL.
|
84
|
-
"""
|
85
|
-
source = expr.args["files"][0].this
|
86
|
-
assert isinstance(source, exp.Literal), f"{source} is not a exp.Literal"
|
87
222
|
|
88
|
-
|
223
|
+
def _source_urls(from_source: str, files: list[str]) -> list[str]:
|
224
|
+
"""Convert from_source to a list of URLs."""
|
225
|
+
scheme, netloc, path, params, query, fragment = urlparse(from_source)
|
89
226
|
if not scheme:
|
90
227
|
raise snowflake.connector.errors.ProgrammingError(
|
91
|
-
msg=f"SQL compilation error:\ninvalid URL prefix found in: '{
|
228
|
+
msg=f"SQL compilation error:\ninvalid URL prefix found in: '{from_source}'", errno=1011, sqlstate="42601"
|
92
229
|
)
|
93
230
|
|
94
231
|
# rebuild url from components to ensure correct handling of host slash
|
95
|
-
return [_urlunparse(scheme, netloc, path, params, query, fragment, file) for file in files] or [
|
232
|
+
return [_urlunparse(scheme, netloc, path, params, query, fragment, file) for file in files] or [from_source]
|
96
233
|
|
97
234
|
|
98
235
|
def _urlunparse(scheme: str, netloc: str, path: str, params: str, query: str, fragment: str, suffix: str) -> str:
|
@@ -111,9 +248,16 @@ def _urlunparse(scheme: str, netloc: str, path: str, params: str, query: str, fr
|
|
111
248
|
def _inserts(expr: exp.Copy, params: Params, urls: list[str]) -> list[exp.Expression]:
|
112
249
|
# INTO expression
|
113
250
|
target = expr.this
|
114
|
-
|
115
|
-
|
116
|
-
|
251
|
+
|
252
|
+
from_ = expr.args["files"][0]
|
253
|
+
if isinstance(from_, exp.Subquery):
|
254
|
+
select = from_.this
|
255
|
+
assert isinstance(select, exp.Select), f"{select.__class__} is not a Select"
|
256
|
+
columns = _strip_json_extract(select).expressions
|
257
|
+
else:
|
258
|
+
columns = [exp.Column(this=exp.Identifier(this=f"column{i}")) for i in range(len(target.expressions))] or [
|
259
|
+
exp.Column(this=exp.Star())
|
260
|
+
]
|
117
261
|
|
118
262
|
return [
|
119
263
|
exp.Insert(
|
@@ -124,6 +268,20 @@ def _inserts(expr: exp.Copy, params: Params, urls: list[str]) -> list[exp.Expres
|
|
124
268
|
]
|
125
269
|
|
126
270
|
|
271
|
+
def _strip_json_extract(expr: exp.Select) -> exp.Select:
|
272
|
+
"""
|
273
|
+
Strip $1 prefix from SELECT statement columns.
|
274
|
+
"""
|
275
|
+
dollar1 = exp.Parameter(this=exp.Literal(this="1", is_string=False))
|
276
|
+
|
277
|
+
for p in expr.find_all(exp.Parameter):
|
278
|
+
if p == dollar1 and p.parent and (key := p.parent.expression.find(exp.JSONPathKey)):
|
279
|
+
assert p.parent.parent, expr
|
280
|
+
p.parent.parent.args["this"] = exp.Identifier(this=key.this)
|
281
|
+
|
282
|
+
return expr
|
283
|
+
|
284
|
+
|
127
285
|
def handle_csv(expressions: list[exp.Property]) -> ReadCSV:
|
128
286
|
skip_header = ReadCSV.skip_header
|
129
287
|
quote = ReadCSV.quote
|
@@ -192,8 +350,15 @@ class ReadCSV(FileTypeHandler):
|
|
192
350
|
return exp.func("read_csv", exp.Literal(this=url, is_string=True), *args)
|
193
351
|
|
194
352
|
|
353
|
+
@dataclass
|
354
|
+
class ReadParquet(FileTypeHandler):
|
355
|
+
def read_expression(self, url: str) -> exp.Expression:
|
356
|
+
return exp.func("read_parquet", exp.Literal(this=url, is_string=True))
|
357
|
+
|
358
|
+
|
195
359
|
@dataclass
|
196
360
|
class Params:
|
197
361
|
files: list[str] = field(default_factory=list)
|
198
362
|
# Snowflake defaults to CSV when no file format is specified
|
199
363
|
file_format: FileTypeHandler = field(default_factory=ReadCSV)
|
364
|
+
force: bool = False
|
fakesnow/cursor.py
CHANGED
@@ -43,6 +43,7 @@ SQL_CREATED_DATABASE = Template("SELECT 'Database ${name} successfully created.'
|
|
43
43
|
SQL_CREATED_SCHEMA = Template("SELECT 'Schema ${name} successfully created.' as 'status'")
|
44
44
|
SQL_CREATED_TABLE = Template("SELECT 'Table ${name} successfully created.' as 'status'")
|
45
45
|
SQL_CREATED_VIEW = Template("SELECT 'View ${name} successfully created.' as 'status'")
|
46
|
+
SQL_CREATED_STAGE = Template("SELECT 'Stage area ${name} successfully created.' as status")
|
46
47
|
SQL_DROPPED = Template("SELECT '${name} successfully dropped.' as 'status'")
|
47
48
|
SQL_INSERTED_ROWS = Template("SELECT ${count} as 'number of rows inserted'")
|
48
49
|
SQL_UPDATED_ROWS = Template("SELECT ${count} as 'number of rows updated', 0 as 'number of multi-joined rows updated'")
|
@@ -75,6 +76,7 @@ class FakeSnowflakeCursor:
|
|
75
76
|
self._use_dict_result = use_dict_result
|
76
77
|
self._last_sql = None
|
77
78
|
self._last_params = None
|
79
|
+
self._last_transformed = None
|
78
80
|
self._sqlstate = None
|
79
81
|
self._arraysize = 1
|
80
82
|
self._arrow_table = None
|
@@ -105,6 +107,7 @@ class FakeSnowflakeCursor:
|
|
105
107
|
def close(self) -> bool:
|
106
108
|
self._last_sql = None
|
107
109
|
self._last_params = None
|
110
|
+
self._last_transformed = None
|
108
111
|
return True
|
109
112
|
|
110
113
|
def describe(self, command: str, *args: Any, **kwargs: Any) -> list[ResultMetadata]:
|
@@ -203,10 +206,8 @@ class FakeSnowflakeCursor:
|
|
203
206
|
.transform(transforms.create_database, db_path=self._conn.db_path)
|
204
207
|
.transform(transforms.extract_comment_on_table)
|
205
208
|
.transform(transforms.extract_comment_on_columns)
|
206
|
-
.transform(transforms.
|
209
|
+
.transform(transforms.information_schema_fs)
|
207
210
|
.transform(transforms.information_schema_databases, current_schema=self._conn.schema)
|
208
|
-
.transform(transforms.information_schema_fs_tables)
|
209
|
-
.transform(transforms.information_schema_fs_views)
|
210
211
|
.transform(transforms.drop_schema_cascade)
|
211
212
|
.transform(transforms.tag)
|
212
213
|
.transform(transforms.semi_structured_types)
|
@@ -240,6 +241,7 @@ class FakeSnowflakeCursor:
|
|
240
241
|
.transform(transforms.identifier)
|
241
242
|
.transform(transforms.array_agg_within_group)
|
242
243
|
.transform(transforms.array_agg)
|
244
|
+
.transform(transforms.array_construct_etc)
|
243
245
|
.transform(transforms.dateadd_date_cast)
|
244
246
|
.transform(transforms.dateadd_string_literal_timestamp_cast)
|
245
247
|
.transform(transforms.datediff_string_literal_timestamp_cast)
|
@@ -248,8 +250,9 @@ class FakeSnowflakeCursor:
|
|
248
250
|
.transform(transforms.show_procedures)
|
249
251
|
.transform(transforms.show_warehouses)
|
250
252
|
.transform(lambda e: transforms.show_schemas(e, self._conn.database))
|
251
|
-
.transform(lambda e: transforms.show_tables_etc(e, self._conn.database))
|
252
|
-
.transform(lambda e: transforms.show_columns(e, self._conn.database))
|
253
|
+
.transform(lambda e: transforms.show_tables_etc(e, self._conn.database, self._conn.schema))
|
254
|
+
.transform(lambda e: transforms.show_columns(e, self._conn.database, self._conn.schema))
|
255
|
+
.transform(lambda e: transforms.show_stages(e, self._conn.database, self._conn.schema))
|
253
256
|
# TODO collapse into a single show_keys function
|
254
257
|
.transform(lambda e: transforms.show_keys(e, self._conn.database, kind="PRIMARY"))
|
255
258
|
.transform(lambda e: transforms.show_keys(e, self._conn.database, kind="UNIQUE"))
|
@@ -260,6 +263,8 @@ class FakeSnowflakeCursor:
|
|
260
263
|
.transform(transforms.create_clone)
|
261
264
|
.transform(transforms.alias_in_join)
|
262
265
|
.transform(transforms.alter_table_strip_cluster_by)
|
266
|
+
.transform(lambda e: transforms.create_stage(e, self._conn.database, self._conn.schema))
|
267
|
+
.transform(lambda e: transforms.put_stage(e, self._conn.database, self._conn.schema))
|
263
268
|
)
|
264
269
|
|
265
270
|
def _transform_explode(self, expression: exp.Expression) -> list[exp.Expression]:
|
@@ -287,17 +292,17 @@ class FakeSnowflakeCursor:
|
|
287
292
|
|
288
293
|
try:
|
289
294
|
if isinstance(transformed, exp.Copy):
|
290
|
-
sql = copy_into(self._duck_conn, transformed, params)
|
295
|
+
sql = copy_into(self._duck_conn, self._conn.database, self._conn.schema, transformed, params)
|
291
296
|
else:
|
292
297
|
logger.log_sql(sql, params)
|
293
298
|
self._duck_conn.execute(sql, params)
|
294
299
|
except duckdb.BinderException as e:
|
295
300
|
msg = e.args[0]
|
296
|
-
raise snowflake.connector.errors.ProgrammingError(msg=msg, errno=2043, sqlstate="02000") from
|
301
|
+
raise snowflake.connector.errors.ProgrammingError(msg=msg, errno=2043, sqlstate="02000") from e
|
297
302
|
except duckdb.CatalogException as e:
|
298
303
|
# minimal processing to make it look like a snowflake exception, message content may differ
|
299
304
|
msg = cast(str, e.args[0]).split("\n")[0]
|
300
|
-
raise snowflake.connector.errors.ProgrammingError(msg=msg, errno=2003, sqlstate="42S02") from
|
305
|
+
raise snowflake.connector.errors.ProgrammingError(msg=msg, errno=2003, sqlstate="42S02") from e
|
301
306
|
except duckdb.TransactionException as e:
|
302
307
|
if "cannot rollback - no transaction is active" in str(
|
303
308
|
e
|
@@ -307,9 +312,9 @@ class FakeSnowflakeCursor:
|
|
307
312
|
else:
|
308
313
|
raise e
|
309
314
|
except duckdb.ConnectionException as e:
|
310
|
-
raise snowflake.connector.errors.DatabaseError(msg=e.args[0], errno=250002, sqlstate="08003") from
|
315
|
+
raise snowflake.connector.errors.DatabaseError(msg=e.args[0], errno=250002, sqlstate="08003") from e
|
311
316
|
except duckdb.ParserException as e:
|
312
|
-
raise snowflake.connector.errors.ProgrammingError(msg=e.args[0], errno=1003, sqlstate="42000") from
|
317
|
+
raise snowflake.connector.errors.ProgrammingError(msg=e.args[0], errno=1003, sqlstate="42000") from e
|
313
318
|
|
314
319
|
affected_count = None
|
315
320
|
|
@@ -329,6 +334,15 @@ class FakeSnowflakeCursor:
|
|
329
334
|
self._duck_conn.execute(info_schema.per_db_creation_sql(create_db_name))
|
330
335
|
result_sql = SQL_CREATED_DATABASE.substitute(name=create_db_name)
|
331
336
|
|
337
|
+
elif stage_name := transformed.args.get("create_stage_name"):
|
338
|
+
if stage_name == "?":
|
339
|
+
assert isinstance(params, (tuple, list)) and len(params) == 1, (
|
340
|
+
"Expected single parameter for create_stage_name"
|
341
|
+
)
|
342
|
+
result_sql = SQL_CREATED_STAGE.substitute(name=params[0].upper())
|
343
|
+
else:
|
344
|
+
result_sql = SQL_CREATED_STAGE.substitute(name=stage_name.upper())
|
345
|
+
|
332
346
|
elif cmd == "INSERT":
|
333
347
|
(affected_count,) = self._duck_conn.fetchall()[0]
|
334
348
|
result_sql = SQL_INSERTED_ROWS.substitute(count=affected_count)
|
@@ -401,8 +415,21 @@ class FakeSnowflakeCursor:
|
|
401
415
|
self._rowcount = affected_count or self._arrow_table.num_rows
|
402
416
|
self._sfqid = str(uuid.uuid4())
|
403
417
|
|
418
|
+
if stage_name := transformed.args.get("put_stage_name"):
|
419
|
+
if stage_name == "?":
|
420
|
+
assert isinstance(params, (tuple, list)) and len(params) == 1, (
|
421
|
+
"Expected single parameter for put_stage_name"
|
422
|
+
)
|
423
|
+
if self._arrow_table.num_rows != 1:
|
424
|
+
raise snowflake.connector.errors.ProgrammingError(
|
425
|
+
msg=f"SQL compilation error:\nStage '{stage_name}' does not exist or not authorized.",
|
426
|
+
errno=2003,
|
427
|
+
sqlstate="02000",
|
428
|
+
)
|
429
|
+
|
404
430
|
self._last_sql = result_sql or sql
|
405
431
|
self._last_params = None if result_sql else params
|
432
|
+
self._last_transformed = transformed
|
406
433
|
|
407
434
|
def executemany(
|
408
435
|
self,
|
fakesnow/info_schema.py
CHANGED
@@ -185,6 +185,49 @@ where database_name = '${catalog}'
|
|
185
185
|
"""
|
186
186
|
)
|
187
187
|
|
188
|
+
SQL_CREATE_LOAD_HISTORY_TABLE = Template(
|
189
|
+
"""
|
190
|
+
create table if not exists ${catalog}._fs_information_schema._fs_load_history (
|
191
|
+
SCHEMA_NAME VARCHAR,
|
192
|
+
FILE_NAME VARCHAR,
|
193
|
+
TABLE_NAME VARCHAR,
|
194
|
+
LAST_LOAD_TIME TIMESTAMPTZ,
|
195
|
+
STATUS VARCHAR,
|
196
|
+
ROW_COUNT INTEGER,
|
197
|
+
ROW_PARSED INTEGER,
|
198
|
+
FIRST_ERROR_MESSAGE VARCHAR,
|
199
|
+
FIRST_ERROR_LINE_NUMBER INTEGER,
|
200
|
+
FIRST_ERROR_CHARACTER_POSITION INTEGER,
|
201
|
+
FIRST_ERROR_COL_NAME VARCHAR,
|
202
|
+
ERROR_COUNT INTEGER,
|
203
|
+
ERROR_LIMIT INTEGER
|
204
|
+
)
|
205
|
+
"""
|
206
|
+
)
|
207
|
+
|
208
|
+
|
209
|
+
SQL_CREATE_GLOBAL_INFORMATION_SCHEMA_STAGES_TABLE = """
|
210
|
+
CREATE TABLE IF NOT EXISTS _fs_global._fs_information_schema._fs_stages (
|
211
|
+
created_on TIMESTAMPTZ,
|
212
|
+
name TEXT,
|
213
|
+
database_name TEXT,
|
214
|
+
schema_name TEXT,
|
215
|
+
url TEXT,
|
216
|
+
has_credentials TEXT,
|
217
|
+
has_encryption_key TEXT,
|
218
|
+
owner TEXT,
|
219
|
+
comment TEXT,
|
220
|
+
region TEXT,
|
221
|
+
type TEXT,
|
222
|
+
cloud TEXT,
|
223
|
+
notification_channel TEXT,
|
224
|
+
storage_integration TEXT,
|
225
|
+
endpoint TEXT,
|
226
|
+
owner_role_type TEXT,
|
227
|
+
directory_enabled TEXT
|
228
|
+
);
|
229
|
+
"""
|
230
|
+
|
188
231
|
|
189
232
|
def per_db_creation_sql(catalog: str) -> str:
|
190
233
|
return f"""
|
@@ -193,6 +236,7 @@ def per_db_creation_sql(catalog: str) -> str:
|
|
193
236
|
{SQL_CREATE_INFORMATION_SCHEMA_DATABASES_VIEW.substitute(catalog=catalog)};
|
194
237
|
{SQL_CREATE_INFORMATION_SCHEMA_TABLES_VIEW.substitute(catalog=catalog)};
|
195
238
|
{SQL_CREATE_INFORMATION_SCHEMA_VIEWS_VIEW.substitute(catalog=catalog)};
|
239
|
+
{SQL_CREATE_LOAD_HISTORY_TABLE.substitute(catalog=catalog)};
|
196
240
|
"""
|
197
241
|
|
198
242
|
|
@@ -203,6 +247,7 @@ def fs_global_creation_sql() -> str:
|
|
203
247
|
{SQL_CREATE_GLOBAL_INFORMATION_SCHEMA_COLUMNS_EXT};
|
204
248
|
{SQL_CREATE_GLOBAL_INFORMATION_SCHEMA_COLUMNS_VIEW};
|
205
249
|
{SQL_CREATE_GLOBAL_INFORMATION_SCHEMA_USERS_TABLE};
|
250
|
+
{SQL_CREATE_GLOBAL_INFORMATION_SCHEMA_STAGES_TABLE}
|
206
251
|
"""
|
207
252
|
|
208
253
|
|
fakesnow/macros.py
CHANGED
@@ -29,9 +29,20 @@ CREATE OR REPLACE MACRO ${catalog}._fs_flatten(input) AS TABLE
|
|
29
29
|
"""
|
30
30
|
)
|
31
31
|
|
32
|
+
# emulates https://docs.snowflake.com/en/sql-reference/functions/array_construct_compact
|
33
|
+
# requires transforms.array_construct_compact
|
34
|
+
ARRAY_CONSTRUCT_COMPACT = Template(
|
35
|
+
"""
|
36
|
+
CREATE OR REPLACE MACRO ${catalog}.array_construct_compact(list) AS (
|
37
|
+
SELECT ARRAY_AGG(x)::JSON FROM UNNEST(list) AS t(x) WHERE x IS NOT NULL
|
38
|
+
);
|
39
|
+
"""
|
40
|
+
)
|
41
|
+
|
32
42
|
|
33
43
|
def creation_sql(catalog: str) -> str:
|
34
44
|
return f"""
|
35
45
|
{EQUAL_NULL.substitute(catalog=catalog)};
|
36
46
|
{FS_FLATTEN.substitute(catalog=catalog)};
|
47
|
+
{ARRAY_CONSTRUCT_COMPACT.substitute(catalog=catalog)};
|
37
48
|
"""
|
fakesnow/server.py
CHANGED
@@ -9,6 +9,7 @@ from dataclasses import dataclass
|
|
9
9
|
from typing import Any
|
10
10
|
|
11
11
|
import snowflake.connector.errors
|
12
|
+
from sqlglot import parse_one
|
12
13
|
from starlette.applications import Starlette
|
13
14
|
from starlette.concurrency import run_in_threadpool
|
14
15
|
from starlette.requests import Request
|
@@ -63,6 +64,7 @@ async def login_request(request: Request) -> JSONResponse:
|
|
63
64
|
{"name": "AUTOCOMMIT", "value": True},
|
64
65
|
{"name": "CLIENT_SESSION_KEEP_ALIVE_HEARTBEAT_FREQUENCY", "value": 3600},
|
65
66
|
],
|
67
|
+
"sessionInfo": {},
|
66
68
|
},
|
67
69
|
"success": True,
|
68
70
|
}
|
@@ -88,11 +90,24 @@ async def query_request(request: Request) -> JSONResponse:
|
|
88
90
|
else:
|
89
91
|
params = None
|
90
92
|
|
93
|
+
expr = parse_one(sql_text, read="snowflake")
|
94
|
+
|
91
95
|
try:
|
92
96
|
# only a single sql statement is sent at a time by the python snowflake connector
|
93
97
|
cur = await run_in_threadpool(conn.cursor().execute, sql_text, binding_params=params)
|
94
98
|
rowtype = describe_as_rowtype(cur._describe_last_sql()) # noqa: SLF001
|
95
99
|
|
100
|
+
expr = cur._last_transformed # noqa: SLF001
|
101
|
+
assert expr
|
102
|
+
if put_stage_data := expr.args.get("put_stage_data"):
|
103
|
+
# this is a PUT command, so return the stage data
|
104
|
+
return JSONResponse(
|
105
|
+
{
|
106
|
+
"data": put_stage_data,
|
107
|
+
"success": True,
|
108
|
+
}
|
109
|
+
)
|
110
|
+
|
96
111
|
except snowflake.connector.errors.ProgrammingError as e:
|
97
112
|
logger.info(f"{sql_text=} ProgrammingError {e}")
|
98
113
|
code = f"{e.errno:06d}"
|
fakesnow/transforms/__init__.py
CHANGED
@@ -8,16 +8,22 @@ from fakesnow.transforms.show import (
|
|
8
8
|
show_keys as show_keys,
|
9
9
|
show_procedures as show_procedures,
|
10
10
|
show_schemas as show_schemas,
|
11
|
+
show_stages as show_stages,
|
11
12
|
show_tables_etc as show_tables_etc,
|
12
13
|
show_users as show_users,
|
13
14
|
show_warehouses as show_warehouses,
|
14
15
|
)
|
16
|
+
from fakesnow.transforms.stage import (
|
17
|
+
create_stage as create_stage,
|
18
|
+
put_stage as put_stage,
|
19
|
+
)
|
15
20
|
from fakesnow.transforms.transforms import (
|
16
21
|
SUCCESS_NOP as SUCCESS_NOP,
|
17
22
|
alias_in_join as alias_in_join,
|
18
23
|
alter_table_strip_cluster_by as alter_table_strip_cluster_by,
|
19
24
|
array_agg as array_agg,
|
20
25
|
array_agg_within_group as array_agg_within_group,
|
26
|
+
array_construct_etc as array_construct_etc,
|
21
27
|
array_size as array_size,
|
22
28
|
create_clone as create_clone,
|
23
29
|
create_database as create_database,
|
@@ -36,9 +42,7 @@ from fakesnow.transforms.transforms import (
|
|
36
42
|
identifier as identifier,
|
37
43
|
indices_to_json_extract as indices_to_json_extract,
|
38
44
|
information_schema_databases as information_schema_databases,
|
39
|
-
|
40
|
-
information_schema_fs_tables as information_schema_fs_tables,
|
41
|
-
information_schema_fs_views as information_schema_fs_views,
|
45
|
+
information_schema_fs as information_schema_fs,
|
42
46
|
integer_precision as integer_precision,
|
43
47
|
json_extract_cased_as_varchar as json_extract_cased_as_varchar,
|
44
48
|
json_extract_cast_as_varchar as json_extract_cast_as_varchar,
|