fakesnow 0.9.41__py3-none-any.whl → 0.9.43__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fakesnow/copy_into.py +77 -23
- fakesnow/cursor.py +56 -27
- fakesnow/expr.py +11 -0
- fakesnow/params.py +29 -0
- fakesnow/server.py +11 -5
- fakesnow/transforms/__init__.py +1 -0
- fakesnow/transforms/show.py +10 -27
- fakesnow/transforms/stage.py +151 -31
- fakesnow/transforms/transforms.py +50 -41
- {fakesnow-0.9.41.dist-info → fakesnow-0.9.43.dist-info}/METADATA +5 -4
- {fakesnow-0.9.41.dist-info → fakesnow-0.9.43.dist-info}/RECORD +15 -14
- {fakesnow-0.9.41.dist-info → fakesnow-0.9.43.dist-info}/WHEEL +0 -0
- {fakesnow-0.9.41.dist-info → fakesnow-0.9.43.dist-info}/entry_points.txt +0 -0
- {fakesnow-0.9.41.dist-info → fakesnow-0.9.43.dist-info}/licenses/LICENSE +0 -0
- {fakesnow-0.9.41.dist-info → fakesnow-0.9.43.dist-info}/top_level.txt +0 -0
fakesnow/copy_into.py
CHANGED
@@ -1,9 +1,11 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
3
|
import datetime
|
4
|
+
import os
|
4
5
|
from collections.abc import Sequence
|
5
6
|
from dataclasses import dataclass, field
|
6
|
-
from
|
7
|
+
from pathlib import Path
|
8
|
+
from typing import Any, NamedTuple, Protocol, Union, cast
|
7
9
|
from urllib.parse import urlparse, urlunparse
|
8
10
|
|
9
11
|
import duckdb
|
@@ -13,6 +15,9 @@ from sqlglot import exp
|
|
13
15
|
|
14
16
|
import fakesnow.transforms.stage as stage
|
15
17
|
from fakesnow import logger
|
18
|
+
from fakesnow.params import MutableParams, pop_qmark_param
|
19
|
+
|
20
|
+
Params = Union[Sequence[Any], dict[Any, Any]]
|
16
21
|
|
17
22
|
|
18
23
|
class LoadHistoryRecord(NamedTuple):
|
@@ -38,9 +43,9 @@ def copy_into(
|
|
38
43
|
current_database: str | None,
|
39
44
|
current_schema: str | None,
|
40
45
|
expr: exp.Copy,
|
41
|
-
params:
|
46
|
+
params: MutableParams | None = None,
|
42
47
|
) -> str:
|
43
|
-
cparams = _params(expr)
|
48
|
+
cparams = _params(expr, params)
|
44
49
|
if isinstance(cparams.file_format, ReadParquet):
|
45
50
|
from_ = expr.args["files"][0]
|
46
51
|
# parquet must use MATCH_BY_COLUMN_NAME (TODO) or a copy transformation
|
@@ -54,11 +59,15 @@ def copy_into(
|
|
54
59
|
|
55
60
|
from_source = _from_source(expr)
|
56
61
|
source = (
|
57
|
-
stage_url_from_var(from_source, duck_conn, current_database, current_schema)
|
62
|
+
stage_url_from_var(from_source[1:], duck_conn, current_database, current_schema)
|
58
63
|
if from_source.startswith("@")
|
59
64
|
else from_source
|
60
65
|
)
|
61
|
-
urls = _source_urls(source, cparams.files)
|
66
|
+
urls = _source_urls(source, cparams.files) if cparams.files else _source_glob(source, duck_conn)
|
67
|
+
if not urls:
|
68
|
+
sql = "SELECT 'Copy executed with 0 files processed.' AS status"
|
69
|
+
duck_conn.execute(sql)
|
70
|
+
return sql
|
62
71
|
|
63
72
|
inserts = _inserts(expr, cparams, urls)
|
64
73
|
table = expr.this
|
@@ -95,6 +104,10 @@ def copy_into(
|
|
95
104
|
error_limit = 1
|
96
105
|
error_count = 0
|
97
106
|
first_error_message = None
|
107
|
+
path = urlparse(url).path
|
108
|
+
if cparams.purge and stage.is_internal(path):
|
109
|
+
# If the file is internal, we can remove it from the stage
|
110
|
+
os.remove(path)
|
98
111
|
|
99
112
|
history = LoadHistoryRecord(
|
100
113
|
schema_name=schema,
|
@@ -123,7 +136,7 @@ def copy_into(
|
|
123
136
|
"first_error_character, first_error_column_name"
|
124
137
|
)
|
125
138
|
values = "\n, ".join(
|
126
|
-
f"('{h.file_name}', '{h.status}', {h.row_parsed}, {h.row_count}, "
|
139
|
+
f"('{_result_file_name(h.file_name)}', '{h.status}', {h.row_parsed}, {h.row_count}, "
|
127
140
|
f"{h.error_limit or 'NULL'}, {h.error_count}, "
|
128
141
|
f"{repr(h.first_error_message) if h.first_error_message else 'NULL'}, "
|
129
142
|
f"{h.first_error_line_number or 'NULL'}, {h.first_error_character_position or 'NULL'}, "
|
@@ -132,6 +145,7 @@ def copy_into(
|
|
132
145
|
)
|
133
146
|
sql = f"SELECT * FROM (VALUES\n {values}\n) AS t({columns})"
|
134
147
|
duck_conn.execute(sql)
|
148
|
+
|
135
149
|
return sql
|
136
150
|
except duckdb.HTTPException as e:
|
137
151
|
raise snowflake.connector.errors.ProgrammingError(msg=e.args[0], errno=91016, sqlstate="22000") from None
|
@@ -139,13 +153,23 @@ def copy_into(
|
|
139
153
|
raise snowflake.connector.errors.ProgrammingError(msg=e.args[0], errno=100038, sqlstate="22018") from None
|
140
154
|
|
141
155
|
|
142
|
-
def
|
156
|
+
def _result_file_name(url: str) -> str:
|
157
|
+
if not stage.is_internal(urlparse(url).path):
|
158
|
+
return url
|
159
|
+
|
160
|
+
# for internal stages, return the stage name lowered + file name
|
161
|
+
parts = url.split("/")
|
162
|
+
return f"{parts[-2].lower()}/{parts[-1]}"
|
163
|
+
|
164
|
+
|
165
|
+
def _params(expr: exp.Copy, params: MutableParams | None = None) -> CopyParams:
|
143
166
|
kwargs = {}
|
144
167
|
force = False
|
168
|
+
purge = False
|
169
|
+
on_error = "ABORT_STATEMENT"
|
145
170
|
|
146
|
-
|
147
|
-
|
148
|
-
for param in params:
|
171
|
+
cparams = CopyParams()
|
172
|
+
for param in cast(list[exp.CopyParameter], expr.args.get("params", [])):
|
149
173
|
assert isinstance(param.this, exp.Var), f"{param.this.__class__} is not a Var"
|
150
174
|
var = param.this.name.upper()
|
151
175
|
if var == "FILE_FORMAT":
|
@@ -166,10 +190,22 @@ def _params(expr: exp.Copy) -> Params:
|
|
166
190
|
force = True
|
167
191
|
elif var == "FILES":
|
168
192
|
kwargs["files"] = [lit.name for lit in param.find_all(exp.Literal)]
|
193
|
+
elif var == "PURGE":
|
194
|
+
purge = True
|
195
|
+
elif var == "ON_ERROR":
|
196
|
+
if isinstance(param.expression, exp.Var):
|
197
|
+
on_error = param.expression.name.upper()
|
198
|
+
elif isinstance(param.expression, exp.Placeholder):
|
199
|
+
on_error = pop_qmark_param(params, expr, param.expression)
|
200
|
+
else:
|
201
|
+
raise NotImplementedError(f"{param.expression.__class__=}")
|
202
|
+
|
203
|
+
if not (isinstance(on_error, str) and on_error.upper() == "ABORT_STATEMENT"):
|
204
|
+
raise NotImplementedError(param)
|
169
205
|
else:
|
170
206
|
raise ValueError(f"Unknown copy parameter: {param.this}")
|
171
207
|
|
172
|
-
return
|
208
|
+
return CopyParams(force=force, purge=purge, on_error=on_error, **kwargs)
|
173
209
|
|
174
210
|
|
175
211
|
def _from_source(expr: exp.Copy) -> str:
|
@@ -191,6 +227,9 @@ def _from_source(expr: exp.Copy) -> str:
|
|
191
227
|
)
|
192
228
|
# return the name of the stage, eg: @stage1
|
193
229
|
return var.this
|
230
|
+
elif isinstance(from_, exp.Var):
|
231
|
+
# return the name of the stage, eg: @stage1
|
232
|
+
return from_.this
|
194
233
|
|
195
234
|
assert isinstance(from_, exp.Literal), f"{from_} is not a exp.Literal"
|
196
235
|
# return url
|
@@ -198,9 +237,9 @@ def _from_source(expr: exp.Copy) -> str:
|
|
198
237
|
|
199
238
|
|
200
239
|
def stage_url_from_var(
|
201
|
-
|
240
|
+
var: str, duck_conn: DuckDBPyConnection, current_database: str | None, current_schema: str | None
|
202
241
|
) -> str:
|
203
|
-
database_name, schema_name, name = stage.parts_from_var(
|
242
|
+
database_name, schema_name, name = stage.parts_from_var(var, current_database, current_schema)
|
204
243
|
|
205
244
|
# Look up the stage URL
|
206
245
|
duck_conn.execute(
|
@@ -211,7 +250,9 @@ def stage_url_from_var(
|
|
211
250
|
(database_name, schema_name, name),
|
212
251
|
)
|
213
252
|
if result := duck_conn.fetchone():
|
214
|
-
|
253
|
+
# if no URL is found, it is an internal stage ie: local directory
|
254
|
+
url = result[0] or stage.internal_dir(f"{database_name}.{schema_name}.{name}")
|
255
|
+
return url
|
215
256
|
else:
|
216
257
|
raise snowflake.connector.errors.ProgrammingError(
|
217
258
|
msg=f"SQL compilation error:\nStage '{database_name}.{schema_name}.{name}' does not exist or not authorized.", # noqa: E501
|
@@ -220,16 +261,29 @@ def stage_url_from_var(
|
|
220
261
|
)
|
221
262
|
|
222
263
|
|
223
|
-
def _source_urls(
|
264
|
+
def _source_urls(source: str, files: list[str]) -> list[str]:
|
224
265
|
"""Convert from_source to a list of URLs."""
|
225
|
-
scheme, netloc, path, params, query, fragment = urlparse(
|
266
|
+
scheme, netloc, path, params, query, fragment = urlparse(source)
|
226
267
|
if not scheme:
|
227
268
|
raise snowflake.connector.errors.ProgrammingError(
|
228
|
-
msg=f"SQL compilation error:\ninvalid URL prefix found in: '{
|
269
|
+
msg=f"SQL compilation error:\ninvalid URL prefix found in: '{source}'", errno=1011, sqlstate="42601"
|
229
270
|
)
|
230
271
|
|
231
272
|
# rebuild url from components to ensure correct handling of host slash
|
232
|
-
return [_urlunparse(scheme, netloc, path, params, query, fragment, file) for file in files] or [
|
273
|
+
return [_urlunparse(scheme, netloc, path, params, query, fragment, file) for file in files] or [source]
|
274
|
+
|
275
|
+
|
276
|
+
def _source_glob(source: str, duck_conn: DuckDBPyConnection) -> list[str]:
|
277
|
+
"""List files from the source using duckdb glob."""
|
278
|
+
if stage.is_internal(source):
|
279
|
+
source = Path(source).as_uri() # convert local directory to a file URL
|
280
|
+
|
281
|
+
scheme, _netloc, _path, _params, _query, _fragment = urlparse(source)
|
282
|
+
glob = f"{source}/*" if scheme == "file" else f"{source}*"
|
283
|
+
sql = f"SELECT file FROM glob('{glob}')"
|
284
|
+
logger.log_sql(sql)
|
285
|
+
result = duck_conn.execute(sql).fetchall()
|
286
|
+
return [r[0] for r in result]
|
233
287
|
|
234
288
|
|
235
289
|
def _urlunparse(scheme: str, netloc: str, path: str, params: str, query: str, fragment: str, suffix: str) -> str:
|
@@ -245,7 +299,7 @@ def _urlunparse(scheme: str, netloc: str, path: str, params: str, query: str, fr
|
|
245
299
|
return urlunparse((scheme, netloc, path, params, query, fragment))
|
246
300
|
|
247
301
|
|
248
|
-
def _inserts(expr: exp.Copy, params:
|
302
|
+
def _inserts(expr: exp.Copy, params: CopyParams, urls: list[str]) -> list[exp.Expression]:
|
249
303
|
# INTO expression
|
250
304
|
target = expr.this
|
251
305
|
|
@@ -331,10 +385,8 @@ class ReadCSV(FileTypeHandler):
|
|
331
385
|
delimiter: str = ","
|
332
386
|
|
333
387
|
def read_expression(self, url: str) -> exp.Expression:
|
334
|
-
args = []
|
335
|
-
|
336
388
|
# don't parse header and use as column names, keep them as column0, column1, etc
|
337
|
-
args
|
389
|
+
args = [self.make_eq("header", False)]
|
338
390
|
|
339
391
|
if self.skip_header:
|
340
392
|
args.append(self.make_eq("skip", 1))
|
@@ -357,8 +409,10 @@ class ReadParquet(FileTypeHandler):
|
|
357
409
|
|
358
410
|
|
359
411
|
@dataclass
|
360
|
-
class
|
412
|
+
class CopyParams:
|
361
413
|
files: list[str] = field(default_factory=list)
|
362
414
|
# Snowflake defaults to CSV when no file format is specified
|
363
415
|
file_format: FileTypeHandler = field(default_factory=ReadCSV)
|
364
416
|
force: bool = False
|
417
|
+
purge: bool = False
|
418
|
+
on_error: str = "ABORT_STATEMENT" # Default to ABORT_STATEMENT
|
fakesnow/cursor.py
CHANGED
@@ -27,7 +27,9 @@ import fakesnow.info_schema as info_schema
|
|
27
27
|
import fakesnow.transforms as transforms
|
28
28
|
from fakesnow import logger
|
29
29
|
from fakesnow.copy_into import copy_into
|
30
|
+
from fakesnow.params import MutableParams
|
30
31
|
from fakesnow.rowtype import describe_as_result_metadata
|
32
|
+
from fakesnow.transforms import stage
|
31
33
|
|
32
34
|
if TYPE_CHECKING:
|
33
35
|
# don't require pandas at import time
|
@@ -41,6 +43,7 @@ SCHEMA_UNSET = "schema_unset"
|
|
41
43
|
SQL_SUCCESS = "SELECT 'Statement executed successfully.' as 'status'"
|
42
44
|
SQL_CREATED_DATABASE = Template("SELECT 'Database ${name} successfully created.' as 'status'")
|
43
45
|
SQL_CREATED_SCHEMA = Template("SELECT 'Schema ${name} successfully created.' as 'status'")
|
46
|
+
SQL_CREATED_SECRET = Template("SELECT 'Secret ${name} successfully created.' as 'status'")
|
44
47
|
SQL_CREATED_TABLE = Template("SELECT 'Table ${name} successfully created.' as 'status'")
|
45
48
|
SQL_CREATED_VIEW = Template("SELECT 'View ${name} successfully created.' as 'status'")
|
46
49
|
SQL_CREATED_STAGE = Template("SELECT 'Stage area ${name} successfully created.' as status")
|
@@ -146,7 +149,8 @@ class FakeSnowflakeCursor:
|
|
146
149
|
self._sqlstate = None
|
147
150
|
|
148
151
|
if os.environ.get("FAKESNOW_DEBUG") == "snowflake":
|
149
|
-
|
152
|
+
p = params or kwargs.get("binding_params")
|
153
|
+
print(f"{command};params={p}" if p else f"{command};", file=sys.stderr)
|
150
154
|
|
151
155
|
command = self._inline_variables(command)
|
152
156
|
if kwargs.get("binding_params"):
|
@@ -155,6 +159,10 @@ class FakeSnowflakeCursor:
|
|
155
159
|
else:
|
156
160
|
command, params = self._rewrite_with_params(command, params)
|
157
161
|
|
162
|
+
# convert tuple to mutable list
|
163
|
+
if not isinstance(params, (list, dict)) and params is not None:
|
164
|
+
params = list(params)
|
165
|
+
|
158
166
|
if self._conn.nop_regexes and any(re.match(p, command, re.IGNORECASE) for p in self._conn.nop_regexes):
|
159
167
|
transformed = transforms.SUCCESS_NOP
|
160
168
|
self._execute(transformed, params)
|
@@ -164,9 +172,12 @@ class FakeSnowflakeCursor:
|
|
164
172
|
self.check_db_and_schema(expression)
|
165
173
|
|
166
174
|
for exp in self._transform_explode(expression):
|
167
|
-
transformed = self._transform(exp)
|
175
|
+
transformed = self._transform(exp, params)
|
168
176
|
self._execute(transformed, params)
|
169
177
|
|
178
|
+
if not kwargs.get("server") and (put_stage_data := transformed.args.get("put_stage_data")): # pyright: ignore[reportPossiblyUnboundVariable]
|
179
|
+
self._put_files(put_stage_data)
|
180
|
+
|
170
181
|
return self
|
171
182
|
except snowflake.connector.errors.ProgrammingError as e:
|
172
183
|
self._sqlstate = e.sqlstate
|
@@ -180,6 +191,13 @@ class FakeSnowflakeCursor:
|
|
180
191
|
msg = f"{e} not implemented. Please raise an issue via https://github.com/tekumara/fakesnow/issues/new"
|
181
192
|
raise snowflake.connector.errors.ProgrammingError(msg=msg, errno=9999, sqlstate="99999") from e
|
182
193
|
|
194
|
+
def _put_files(self, put_stage_data: stage.UploadCommandDict) -> None:
|
195
|
+
results = stage.upload_files(put_stage_data)
|
196
|
+
_df = pyarrow.Table.from_pylist(results)
|
197
|
+
self._duck_conn.execute("select * from _df")
|
198
|
+
self._arrow_table = self._duck_conn.fetch_arrow_table()
|
199
|
+
self._rowcount = self._arrow_table.num_rows
|
200
|
+
|
183
201
|
def check_db_and_schema(self, expression: exp.Expression) -> None:
|
184
202
|
no_database, no_schema = checks.is_unqualified_table_expression(expression)
|
185
203
|
|
@@ -198,9 +216,10 @@ class FakeSnowflakeCursor:
|
|
198
216
|
sqlstate="22000",
|
199
217
|
)
|
200
218
|
|
201
|
-
def _transform(self, expression: exp.Expression) -> exp.Expression:
|
219
|
+
def _transform(self, expression: exp.Expression, params: MutableParams | None) -> exp.Expression:
|
202
220
|
return (
|
203
|
-
expression.transform(transforms.
|
221
|
+
expression.transform(lambda e: transforms.identifier(e, params))
|
222
|
+
.transform(transforms.upper_case_unquoted_identifiers)
|
204
223
|
.transform(transforms.update_variables, variables=self._conn.variables)
|
205
224
|
.transform(transforms.set_schema, current_database=self._conn.database)
|
206
225
|
.transform(transforms.create_database, db_path=self._conn.db_path)
|
@@ -238,7 +257,6 @@ class FakeSnowflakeCursor:
|
|
238
257
|
.transform(transforms.sample)
|
239
258
|
.transform(transforms.array_size)
|
240
259
|
.transform(transforms.random)
|
241
|
-
.transform(transforms.identifier)
|
242
260
|
.transform(transforms.array_agg_within_group)
|
243
261
|
.transform(transforms.array_agg)
|
244
262
|
.transform(transforms.array_construct_etc)
|
@@ -264,7 +282,8 @@ class FakeSnowflakeCursor:
|
|
264
282
|
.transform(transforms.alias_in_join)
|
265
283
|
.transform(transforms.alter_table_strip_cluster_by)
|
266
284
|
.transform(lambda e: transforms.create_stage(e, self._conn.database, self._conn.schema))
|
267
|
-
.transform(lambda e: transforms.
|
285
|
+
.transform(lambda e: transforms.list_stage(e, self._conn.database, self._conn.schema))
|
286
|
+
.transform(lambda e: transforms.put_stage(e, self._conn.database, self._conn.schema, params))
|
268
287
|
)
|
269
288
|
|
270
289
|
def _transform_explode(self, expression: exp.Expression) -> list[exp.Expression]:
|
@@ -272,7 +291,7 @@ class FakeSnowflakeCursor:
|
|
272
291
|
# Split transforms have limited support at the moment.
|
273
292
|
return transforms.merge(expression)
|
274
293
|
|
275
|
-
def _execute(self, transformed: exp.Expression, params:
|
294
|
+
def _execute(self, transformed: exp.Expression, params: MutableParams | None = None) -> None:
|
276
295
|
self._arrow_table = None
|
277
296
|
self._arrow_table_fetch_index = None
|
278
297
|
self._rowcount = None
|
@@ -335,13 +354,26 @@ class FakeSnowflakeCursor:
|
|
335
354
|
result_sql = SQL_CREATED_DATABASE.substitute(name=create_db_name)
|
336
355
|
|
337
356
|
elif stage_name := transformed.args.get("create_stage_name"):
|
338
|
-
|
339
|
-
|
340
|
-
|
357
|
+
(affected_count,) = self._duck_conn.fetchall()[0]
|
358
|
+
if affected_count == 0:
|
359
|
+
raise snowflake.connector.errors.ProgrammingError(
|
360
|
+
msg=f"SQL compilation error:\nObject '{stage_name}' already exists.",
|
361
|
+
errno=2002,
|
362
|
+
sqlstate="42710",
|
341
363
|
)
|
342
|
-
|
343
|
-
|
344
|
-
|
364
|
+
result_sql = SQL_CREATED_STAGE.substitute(name=stage_name)
|
365
|
+
|
366
|
+
elif stage_name := transformed.args.get("list_stage_name") or transformed.args.get("put_stage_name"):
|
367
|
+
if self._duck_conn.fetch_arrow_table().num_rows != 1:
|
368
|
+
raise snowflake.connector.errors.ProgrammingError(
|
369
|
+
msg=f"SQL compilation error:\nStage '{stage_name}' does not exist or not authorized.",
|
370
|
+
errno=2003,
|
371
|
+
sqlstate="02000",
|
372
|
+
)
|
373
|
+
if transformed.args.get("list_stage_name"):
|
374
|
+
result_sql = stage.list_stage_files_sql(stage_name)
|
375
|
+
elif transformed.args.get("put_stage_name"):
|
376
|
+
result_sql = SQL_SUCCESS
|
345
377
|
|
346
378
|
elif cmd == "INSERT":
|
347
379
|
(affected_count,) = self._duck_conn.fetchall()[0]
|
@@ -365,8 +397,8 @@ class FakeSnowflakeCursor:
|
|
365
397
|
lambda e: transforms.describe_table(e, self._conn.database, self._conn.schema)
|
366
398
|
).sql(dialect="duckdb")
|
367
399
|
|
368
|
-
elif
|
369
|
-
ident = eid.
|
400
|
+
elif eid := transformed.find(exp.Identifier, bfs=False):
|
401
|
+
ident = eid.name
|
370
402
|
if cmd == "CREATE SCHEMA" and ident:
|
371
403
|
result_sql = SQL_CREATED_SCHEMA.substitute(name=ident)
|
372
404
|
|
@@ -389,6 +421,15 @@ class FakeSnowflakeCursor:
|
|
389
421
|
|
390
422
|
elif cmd == "DROP SCHEMA" and ident == self._conn.schema:
|
391
423
|
self._conn._schema = None # noqa: SLF001
|
424
|
+
elif (
|
425
|
+
cmd == "CREATE"
|
426
|
+
and isinstance(transformed, exp.Command)
|
427
|
+
and isinstance(transformed.expression, str)
|
428
|
+
and transformed.expression.upper().startswith(" SECRET")
|
429
|
+
):
|
430
|
+
match = re.search(r"SECRET\s+(\w+)\s*\(", transformed.expression, re.IGNORECASE)
|
431
|
+
secret_name = match[1].upper() if match else "UNKNOWN"
|
432
|
+
result_sql = SQL_CREATED_SECRET.substitute(name=secret_name)
|
392
433
|
|
393
434
|
if table_comment := cast(tuple[exp.Table, str], transformed.args.get("table_comment")):
|
394
435
|
# record table comment
|
@@ -415,18 +456,6 @@ class FakeSnowflakeCursor:
|
|
415
456
|
self._rowcount = affected_count or self._arrow_table.num_rows
|
416
457
|
self._sfqid = str(uuid.uuid4())
|
417
458
|
|
418
|
-
if stage_name := transformed.args.get("put_stage_name"):
|
419
|
-
if stage_name == "?":
|
420
|
-
assert isinstance(params, (tuple, list)) and len(params) == 1, (
|
421
|
-
"Expected single parameter for put_stage_name"
|
422
|
-
)
|
423
|
-
if self._arrow_table.num_rows != 1:
|
424
|
-
raise snowflake.connector.errors.ProgrammingError(
|
425
|
-
msg=f"SQL compilation error:\nStage '{stage_name}' does not exist or not authorized.",
|
426
|
-
errno=2003,
|
427
|
-
sqlstate="02000",
|
428
|
-
)
|
429
|
-
|
430
459
|
self._last_sql = result_sql or sql
|
431
460
|
self._last_params = None if result_sql else params
|
432
461
|
self._last_transformed = transformed
|
fakesnow/expr.py
CHANGED
@@ -30,3 +30,14 @@ def key_command(expression: exp.Expression) -> str:
|
|
30
30
|
key = expression.key.upper()
|
31
31
|
|
32
32
|
return key
|
33
|
+
|
34
|
+
|
35
|
+
def normalise_ident(name: str) -> str:
|
36
|
+
"""
|
37
|
+
Strip double quotes if present else return uppercased.
|
38
|
+
Snowflake treats quoted identifiers as case-sensitive and un-quoted identifiers as case-insensitive
|
39
|
+
"""
|
40
|
+
if name.startswith('"') and name.endswith('"'):
|
41
|
+
return name[1:-1] # Strip quotes
|
42
|
+
|
43
|
+
return name.upper()
|
fakesnow/params.py
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
from typing import Any, Union
|
4
|
+
|
5
|
+
from sqlglot import exp
|
6
|
+
|
7
|
+
MutableParams = Union[list[Any], dict[Any, Any]]
|
8
|
+
|
9
|
+
|
10
|
+
def pop_qmark_param(params: MutableParams | None, expr: exp.Expression, pl: exp.Placeholder) -> Any: # noqa: ANN401
|
11
|
+
assert isinstance(params, list), "params must be provided as a list or tuple to resolve qmarks"
|
12
|
+
i = index_of_placeholder(expr, pl)
|
13
|
+
return params.pop(i)
|
14
|
+
|
15
|
+
|
16
|
+
def index_of_placeholder(expr: exp.Expression, target: exp.Placeholder) -> int:
|
17
|
+
"""Count the number of prior placeholders to determine the index.
|
18
|
+
|
19
|
+
Args:
|
20
|
+
expression (exp.Expression): The expression to search.
|
21
|
+
ph (exp.Placeholder): The placeholder to find.
|
22
|
+
|
23
|
+
Returns:
|
24
|
+
int: The index of the placeholder, or -1 if not found.
|
25
|
+
"""
|
26
|
+
for index, ph in enumerate(expr.find_all(exp.Placeholder, bfs=False)):
|
27
|
+
if ph is target:
|
28
|
+
return index
|
29
|
+
return -1
|
fakesnow/server.py
CHANGED
@@ -18,6 +18,7 @@ from starlette.routing import Route
|
|
18
18
|
|
19
19
|
from fakesnow.arrow import to_ipc, to_sf
|
20
20
|
from fakesnow.converter import from_binding
|
21
|
+
from fakesnow.expr import normalise_ident
|
21
22
|
from fakesnow.fakes import FakeSnowflakeConnection
|
22
23
|
from fakesnow.instance import FakeSnow
|
23
24
|
from fakesnow.rowtype import describe_as_rowtype
|
@@ -39,8 +40,8 @@ class ServerError(Exception):
|
|
39
40
|
|
40
41
|
|
41
42
|
async def login_request(request: Request) -> JSONResponse:
|
42
|
-
database = request.query_params.get("databaseName")
|
43
|
-
schema = request.query_params.get("schemaName")
|
43
|
+
database = (d := request.query_params.get("databaseName")) and normalise_ident(d)
|
44
|
+
schema = (s := request.query_params.get("schemaName")) and normalise_ident(s)
|
44
45
|
body = await request.body()
|
45
46
|
if request.headers.get("Content-Encoding") == "gzip":
|
46
47
|
body = gzip.decompress(body)
|
@@ -64,7 +65,10 @@ async def login_request(request: Request) -> JSONResponse:
|
|
64
65
|
{"name": "AUTOCOMMIT", "value": True},
|
65
66
|
{"name": "CLIENT_SESSION_KEEP_ALIVE_HEARTBEAT_FREQUENCY", "value": 3600},
|
66
67
|
],
|
67
|
-
"sessionInfo": {
|
68
|
+
"sessionInfo": {
|
69
|
+
"databaseName": database,
|
70
|
+
"schemaName": schema,
|
71
|
+
},
|
68
72
|
},
|
69
73
|
"success": True,
|
70
74
|
}
|
@@ -94,7 +98,7 @@ async def query_request(request: Request) -> JSONResponse:
|
|
94
98
|
|
95
99
|
try:
|
96
100
|
# only a single sql statement is sent at a time by the python snowflake connector
|
97
|
-
cur = await run_in_threadpool(conn.cursor().execute, sql_text, binding_params=params)
|
101
|
+
cur = await run_in_threadpool(conn.cursor().execute, sql_text, binding_params=params, server=True)
|
98
102
|
rowtype = describe_as_rowtype(cur._describe_last_sql()) # noqa: SLF001
|
99
103
|
|
100
104
|
expr = cur._last_transformed # noqa: SLF001
|
@@ -124,7 +128,7 @@ async def query_request(request: Request) -> JSONResponse:
|
|
124
128
|
)
|
125
129
|
except Exception as e:
|
126
130
|
# we have a bug or use of an unsupported feature
|
127
|
-
msg = f"{sql_text=} Unhandled exception"
|
131
|
+
msg = f"{sql_text=} {params=} Unhandled exception"
|
128
132
|
logger.error(msg, exc_info=e)
|
129
133
|
# my guess at mimicking a 500 error as per https://docs.snowflake.com/en/developer-guide/sql-api/reference
|
130
134
|
# and https://github.com/snowflakedb/gosnowflake/blob/8ed4c75ffd707dd712ad843f40189843ace683c4/restful.go#L318
|
@@ -147,6 +151,8 @@ async def query_request(request: Request) -> JSONResponse:
|
|
147
151
|
"total": cur._rowcount, # noqa: SLF001
|
148
152
|
"queryId": cur.sfqid,
|
149
153
|
"queryResultFormat": "arrow",
|
154
|
+
"finalDatabaseName": conn.database,
|
155
|
+
"finalSchemaName": conn.schema,
|
150
156
|
},
|
151
157
|
"success": True,
|
152
158
|
}
|
fakesnow/transforms/__init__.py
CHANGED
fakesnow/transforms/show.py
CHANGED
@@ -62,9 +62,7 @@ def show_columns(
|
|
62
62
|
|
63
63
|
See https://docs.snowflake.com/en/sql-reference/sql/show-columns
|
64
64
|
"""
|
65
|
-
if not (
|
66
|
-
isinstance(expression, exp.Show) and isinstance(expression.this, str) and expression.this.upper() == "COLUMNS"
|
67
|
-
):
|
65
|
+
if not (isinstance(expression, exp.Show) and expression.name.upper() == "COLUMNS"):
|
68
66
|
return expression
|
69
67
|
|
70
68
|
scope_kind = expression.args.get("scope_kind")
|
@@ -139,7 +137,7 @@ def show_databases(expression: exp.Expression) -> exp.Expression:
|
|
139
137
|
|
140
138
|
See https://docs.snowflake.com/en/sql-reference/sql/show-databases
|
141
139
|
"""
|
142
|
-
if isinstance(expression, exp.Show) and
|
140
|
+
if isinstance(expression, exp.Show) and expression.name.upper() == "DATABASES":
|
143
141
|
return sqlglot.parse_one("SELECT * FROM _fs_global._fs_information_schema._fs_show_databases", read="duckdb")
|
144
142
|
|
145
143
|
return expression
|
@@ -177,7 +175,7 @@ def show_functions(expression: exp.Expression) -> exp.Expression:
|
|
177
175
|
|
178
176
|
See https://docs.snowflake.com/en/sql-reference/sql/show-functions
|
179
177
|
"""
|
180
|
-
if isinstance(expression, exp.Show) and
|
178
|
+
if isinstance(expression, exp.Show) and expression.name.upper() == "FUNCTIONS":
|
181
179
|
return sqlglot.parse_one("SELECT * FROM _fs_global._fs_information_schema._fs_show_functions", read="duckdb")
|
182
180
|
|
183
181
|
return expression
|
@@ -197,11 +195,7 @@ def show_keys(
|
|
197
195
|
if kind == "FOREIGN":
|
198
196
|
snowflake_kind = "IMPORTED"
|
199
197
|
|
200
|
-
if (
|
201
|
-
isinstance(expression, exp.Show)
|
202
|
-
and isinstance(expression.this, str)
|
203
|
-
and expression.this.upper() == f"{snowflake_kind} KEYS"
|
204
|
-
):
|
198
|
+
if isinstance(expression, exp.Show) and expression.name.upper() == f"{snowflake_kind} KEYS":
|
205
199
|
if kind == "FOREIGN":
|
206
200
|
statement = f"""
|
207
201
|
SELECT
|
@@ -298,11 +292,7 @@ def show_procedures(expression: exp.Expression) -> exp.Expression:
|
|
298
292
|
|
299
293
|
See https://docs.snowflake.com/en/sql-reference/sql/show-procedures
|
300
294
|
"""
|
301
|
-
if (
|
302
|
-
isinstance(expression, exp.Show)
|
303
|
-
and isinstance(expression.this, str)
|
304
|
-
and expression.this.upper() == "PROCEDURES"
|
305
|
-
):
|
295
|
+
if isinstance(expression, exp.Show) and expression.name.upper() == "PROCEDURES":
|
306
296
|
return sqlglot.parse_one(
|
307
297
|
"SELECT * FROM _fs_global._fs_information_schema._fs_show_procedures",
|
308
298
|
read="duckdb",
|
@@ -333,7 +323,7 @@ def show_schemas(expression: exp.Expression, current_database: str | None) -> ex
|
|
333
323
|
|
334
324
|
See https://docs.snowflake.com/en/sql-reference/sql/show-schemas
|
335
325
|
"""
|
336
|
-
if isinstance(expression, exp.Show) and
|
326
|
+
if isinstance(expression, exp.Show) and expression.name.upper() == "SCHEMAS":
|
337
327
|
if (ident := expression.find(exp.Identifier)) and isinstance(ident.this, str):
|
338
328
|
database = ident.this
|
339
329
|
else:
|
@@ -350,9 +340,7 @@ def show_schemas(expression: exp.Expression, current_database: str | None) -> ex
|
|
350
340
|
|
351
341
|
def show_stages(expression: exp.Expression, current_database: str | None, current_schema: str | None) -> exp.Expression:
|
352
342
|
"""Transform SHOW STAGES to a select from the fake _fs_stages table."""
|
353
|
-
if not (
|
354
|
-
isinstance(expression, exp.Show) and isinstance(expression.this, str) and expression.this.upper() == "STAGES"
|
355
|
-
):
|
343
|
+
if not (isinstance(expression, exp.Show) and expression.name.upper() == "STAGES"):
|
356
344
|
return expression
|
357
345
|
|
358
346
|
scope_kind = expression.args.get("scope_kind")
|
@@ -480,8 +468,7 @@ def show_tables_etc(
|
|
480
468
|
"""Transform SHOW OBJECTS/TABLES/VIEWS to a query against the _fs_information_schema views."""
|
481
469
|
if not (
|
482
470
|
isinstance(expression, exp.Show)
|
483
|
-
and
|
484
|
-
and (show := expression.this.upper())
|
471
|
+
and (show := expression.name.upper())
|
485
472
|
and show in {"OBJECTS", "TABLES", "VIEWS"}
|
486
473
|
):
|
487
474
|
return expression
|
@@ -538,7 +525,7 @@ def show_users(expression: exp.Expression) -> exp.Expression:
|
|
538
525
|
|
539
526
|
https://docs.snowflake.com/en/sql-reference/sql/show-users
|
540
527
|
"""
|
541
|
-
if isinstance(expression, exp.Show) and
|
528
|
+
if isinstance(expression, exp.Show) and expression.name.upper() == "USERS":
|
542
529
|
return sqlglot.parse_one("SELECT * FROM _fs_global._fs_information_schema._fs_users", read="duckdb")
|
543
530
|
|
544
531
|
return expression
|
@@ -593,11 +580,7 @@ def show_warehouses(expression: exp.Expression) -> exp.Expression:
|
|
593
580
|
|
594
581
|
See https://docs.snowflake.com/en/sql-reference/sql/show-warehouses
|
595
582
|
"""
|
596
|
-
if (
|
597
|
-
isinstance(expression, exp.Show)
|
598
|
-
and isinstance(expression.this, str)
|
599
|
-
and expression.this.upper() == "WAREHOUSES"
|
600
|
-
):
|
583
|
+
if isinstance(expression, exp.Show) and expression.name.upper() == "WAREHOUSES":
|
601
584
|
return sqlglot.parse_one(SQL_SHOW_WAREHOUSES, read="duckdb")
|
602
585
|
|
603
586
|
return expression
|
fakesnow/transforms/stage.py
CHANGED
@@ -1,18 +1,45 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
3
|
import datetime
|
4
|
+
import os
|
5
|
+
import tempfile
|
6
|
+
from pathlib import PurePath
|
7
|
+
from typing import Any, TypedDict
|
4
8
|
from urllib.parse import urlparse
|
5
9
|
from urllib.request import url2pathname
|
6
10
|
|
7
11
|
import snowflake.connector.errors
|
8
12
|
import sqlglot
|
13
|
+
from snowflake.connector.file_util import SnowflakeFileUtil
|
9
14
|
from sqlglot import exp
|
10
15
|
|
11
|
-
|
16
|
+
from fakesnow.expr import normalise_ident
|
17
|
+
from fakesnow.params import MutableParams
|
18
|
+
|
19
|
+
# TODO: clean up temp files on exit
|
20
|
+
LOCAL_BUCKET_PATH = tempfile.mkdtemp(prefix="fakesnow_bucket_")
|
21
|
+
|
22
|
+
|
23
|
+
class StageInfoDict(TypedDict):
|
24
|
+
locationType: str
|
25
|
+
location: str
|
26
|
+
creds: dict[str, Any]
|
27
|
+
|
28
|
+
|
29
|
+
class UploadCommandDict(TypedDict):
|
30
|
+
stageInfo: StageInfoDict
|
31
|
+
src_locations: list[str]
|
32
|
+
parallel: int
|
33
|
+
autoCompress: bool
|
34
|
+
sourceCompression: str
|
35
|
+
overwrite: bool
|
36
|
+
command: str
|
12
37
|
|
13
38
|
|
14
39
|
def create_stage(
|
15
|
-
expression: exp.Expression,
|
40
|
+
expression: exp.Expression,
|
41
|
+
current_database: str | None,
|
42
|
+
current_schema: str | None,
|
16
43
|
) -> exp.Expression:
|
17
44
|
"""Transform CREATE STAGE to an INSERT statement for the fake stages table."""
|
18
45
|
if not (
|
@@ -24,15 +51,17 @@ def create_stage(
|
|
24
51
|
):
|
25
52
|
return expression
|
26
53
|
|
54
|
+
ident = table.this
|
55
|
+
if not isinstance(ident, exp.Identifier):
|
56
|
+
raise snowflake.connector.errors.ProgrammingError(
|
57
|
+
msg=f"SQL compilation error:\nInvalid identifier type {ident.__class__.__name__} for stage name.",
|
58
|
+
errno=1003,
|
59
|
+
sqlstate="42000",
|
60
|
+
)
|
61
|
+
|
27
62
|
catalog = table.catalog or current_database
|
28
63
|
schema = table.db or current_schema
|
29
|
-
|
30
|
-
if isinstance(ident, exp.Placeholder):
|
31
|
-
stage_name = "?"
|
32
|
-
elif isinstance(ident, exp.Identifier):
|
33
|
-
stage_name = ident.this if ident.quoted else ident.this.upper()
|
34
|
-
else:
|
35
|
-
raise ValueError(f"Invalid identifier type {ident.__class__.__name__} for stage name")
|
64
|
+
stage_name = ident.this
|
36
65
|
now = datetime.datetime.now(datetime.timezone.utc).isoformat()
|
37
66
|
|
38
67
|
is_temp = False
|
@@ -55,17 +84,19 @@ def create_stage(
|
|
55
84
|
cloud = "AWS" if url.startswith("s3://") else None
|
56
85
|
|
57
86
|
stage_type = ("EXTERNAL" if url else "INTERNAL") + (" TEMPORARY" if is_temp else "")
|
58
|
-
stage_name_value = stage_name if stage_name == "?" else repr(stage_name)
|
59
87
|
|
60
88
|
insert_sql = f"""
|
61
89
|
INSERT INTO _fs_global._fs_information_schema._fs_stages
|
62
90
|
(created_on, name, database_name, schema_name, url, has_credentials, has_encryption_key, owner,
|
63
91
|
comment, region, type, cloud, notification_channel, storage_integration, endpoint, owner_role_type,
|
64
92
|
directory_enabled)
|
65
|
-
|
66
|
-
'{now}', {
|
93
|
+
SELECT
|
94
|
+
'{now}', '{stage_name}', '{catalog}', '{schema}', '{url}', 'N', 'N', 'SYSADMIN',
|
67
95
|
'', NULL, '{stage_type}', {f"'{cloud}'" if cloud else "NULL"}, NULL, NULL, NULL, 'ROLE',
|
68
96
|
'N'
|
97
|
+
WHERE NOT EXISTS (
|
98
|
+
SELECT 1 FROM _fs_global._fs_information_schema._fs_stages
|
99
|
+
WHERE name = '{stage_name}' AND database_name = '{catalog}' AND schema_name = '{schema}'
|
69
100
|
)
|
70
101
|
"""
|
71
102
|
transformed = sqlglot.parse_one(insert_sql, read="duckdb")
|
@@ -73,10 +104,44 @@ def create_stage(
|
|
73
104
|
return transformed
|
74
105
|
|
75
106
|
|
76
|
-
|
107
|
+
def list_stage(expression: exp.Expression, current_database: str | None, current_schema: str | None) -> exp.Expression:
|
108
|
+
"""Transform LIST to list file system operation.
|
109
|
+
|
110
|
+
See https://docs.snowflake.com/en/sql-reference/sql/list
|
111
|
+
"""
|
112
|
+
if not (
|
113
|
+
isinstance(expression, exp.Alias)
|
114
|
+
and isinstance(expression.this, exp.Column)
|
115
|
+
and isinstance(expression.this.this, exp.Identifier)
|
116
|
+
and isinstance(expression.this.this.this, str)
|
117
|
+
and expression.this.this.this.upper() == "LIST"
|
118
|
+
):
|
119
|
+
return expression
|
120
|
+
|
121
|
+
stage = expression.args["alias"].this
|
122
|
+
if not isinstance(stage, exp.Var):
|
123
|
+
raise ValueError(f"LIST command requires a stage name as a Var, got {stage}")
|
124
|
+
|
125
|
+
var = stage.text("this")
|
126
|
+
catalog, schema, stage_name = parts_from_var(var, current_database=current_database, current_schema=current_schema)
|
127
|
+
|
128
|
+
query = f"""
|
129
|
+
SELECT *
|
130
|
+
from _fs_global._fs_information_schema._fs_stages
|
131
|
+
where database_name = '{catalog}' and schema_name = '{schema}' and name = '{stage_name}'
|
132
|
+
"""
|
133
|
+
|
134
|
+
transformed = sqlglot.parse_one(query, read="duckdb")
|
135
|
+
transformed.args["list_stage_name"] = f"{catalog}.{schema}.{stage_name}"
|
136
|
+
return transformed
|
77
137
|
|
78
138
|
|
79
|
-
def put_stage(
|
139
|
+
def put_stage(
|
140
|
+
expression: exp.Expression,
|
141
|
+
current_database: str | None,
|
142
|
+
current_schema: str | None,
|
143
|
+
params: MutableParams | None,
|
144
|
+
) -> exp.Expression:
|
80
145
|
"""Transform PUT to a SELECT statement to locate the stage.
|
81
146
|
|
82
147
|
See https://docs.snowflake.com/en/sql-reference/sql/put
|
@@ -90,14 +155,20 @@ def put_stage(expression: exp.Expression, current_database: str | None, current_
|
|
90
155
|
target = expression.args["target"]
|
91
156
|
|
92
157
|
assert isinstance(target, exp.Var), f"{target} is not a exp.Var"
|
93
|
-
|
94
|
-
if
|
95
|
-
|
158
|
+
this = target.text("this")
|
159
|
+
if this == "?":
|
160
|
+
if not (isinstance(params, list) and len(params) == 1):
|
161
|
+
raise NotImplementedError("PUT requires a single parameter for the stage name")
|
162
|
+
this = params.pop(0)
|
163
|
+
if not this.startswith("@"):
|
164
|
+
msg = f"SQL compilation error:\n{this} does not start with @"
|
96
165
|
raise snowflake.connector.errors.ProgrammingError(
|
97
166
|
msg=msg,
|
98
167
|
errno=1003,
|
99
168
|
sqlstate="42000",
|
100
169
|
)
|
170
|
+
# strip leading @
|
171
|
+
var = this[1:]
|
101
172
|
catalog, schema, stage_name = parts_from_var(var, current_database=current_database, current_schema=current_schema)
|
102
173
|
|
103
174
|
query = f"""
|
@@ -107,12 +178,13 @@ def put_stage(expression: exp.Expression, current_database: str | None, current_
|
|
107
178
|
"""
|
108
179
|
|
109
180
|
transformed = sqlglot.parse_one(query, read="duckdb")
|
110
|
-
|
181
|
+
fqname = f"{catalog}.{schema}.{stage_name}"
|
182
|
+
transformed.args["put_stage_name"] = fqname
|
111
183
|
transformed.args["put_stage_data"] = {
|
112
184
|
"stageInfo": {
|
113
185
|
# use LOCAL_FS otherwise we need to mock S3 with HTTPS which requires a certificate
|
114
186
|
"locationType": "LOCAL_FS",
|
115
|
-
"location":
|
187
|
+
"location": internal_dir(fqname),
|
116
188
|
"creds": {},
|
117
189
|
},
|
118
190
|
"src_locations": [src_path],
|
@@ -127,19 +199,8 @@ def put_stage(expression: exp.Expression, current_database: str | None, current_
|
|
127
199
|
return transformed
|
128
200
|
|
129
201
|
|
130
|
-
def normalise_ident(name: str) -> str:
|
131
|
-
"""
|
132
|
-
Strip double quotes if present else return uppercased.
|
133
|
-
Snowflake treats quoted identifiers as case-sensitive and un-quoted identifiers as case-insensitive
|
134
|
-
"""
|
135
|
-
if name.startswith('"') and name.endswith('"'):
|
136
|
-
return name[1:-1] # Strip quotes
|
137
|
-
|
138
|
-
return name.upper()
|
139
|
-
|
140
|
-
|
141
202
|
def parts_from_var(var: str, current_database: str | None, current_schema: str | None) -> tuple[str, str, str]:
|
142
|
-
parts = var
|
203
|
+
parts = var.split(".")
|
143
204
|
if len(parts) == 3:
|
144
205
|
# Fully qualified name
|
145
206
|
database_name, schema_name, name = parts
|
@@ -161,3 +222,62 @@ def parts_from_var(var: str, current_database: str | None, current_schema: str |
|
|
161
222
|
name = normalise_ident(name)
|
162
223
|
|
163
224
|
return database_name, schema_name, name
|
225
|
+
|
226
|
+
|
227
|
+
def is_internal(s: str) -> bool:
|
228
|
+
return PurePath(s).is_relative_to(LOCAL_BUCKET_PATH)
|
229
|
+
|
230
|
+
|
231
|
+
def internal_dir(fqname: str) -> str:
|
232
|
+
"""
|
233
|
+
Given a fully qualified stage name, return the directory path where the stage files are stored.
|
234
|
+
"""
|
235
|
+
catalog, schema, stage_name = fqname.split(".")
|
236
|
+
return f"{LOCAL_BUCKET_PATH}/{catalog}/{schema}/{stage_name}/"
|
237
|
+
|
238
|
+
|
239
|
+
def list_stage_files_sql(stage_name: str) -> str:
|
240
|
+
"""
|
241
|
+
Generate SQL to list files in a stage directory, matching Snowflake's LIST output format.
|
242
|
+
"""
|
243
|
+
sdir = internal_dir(stage_name)
|
244
|
+
return f"""
|
245
|
+
select
|
246
|
+
lower(split_part(filename, '/', -2)) || '/' || split_part(filename, '/', -1) AS name,
|
247
|
+
size,
|
248
|
+
md5(content) as md5,
|
249
|
+
strftime(last_modified, '%a, %d %b %Y %H:%M:%S GMT') as last_modified
|
250
|
+
from read_blob('{sdir}/*')
|
251
|
+
"""
|
252
|
+
|
253
|
+
|
254
|
+
def upload_files(put_stage_data: UploadCommandDict) -> list[dict[str, Any]]:
|
255
|
+
results = []
|
256
|
+
for src in put_stage_data["src_locations"]:
|
257
|
+
basename = os.path.basename(src)
|
258
|
+
stage_dir = put_stage_data["stageInfo"]["location"]
|
259
|
+
|
260
|
+
os.makedirs(stage_dir, exist_ok=True)
|
261
|
+
gzip_file_name, target_size = SnowflakeFileUtil.compress_file_with_gzip(src, stage_dir)
|
262
|
+
|
263
|
+
# Rename to match expected .gz extension on upload
|
264
|
+
target_basename = basename + ".gz"
|
265
|
+
target = os.path.join(stage_dir, target_basename)
|
266
|
+
os.replace(gzip_file_name, target)
|
267
|
+
|
268
|
+
target_size = os.path.getsize(target)
|
269
|
+
source_size = os.path.getsize(src)
|
270
|
+
|
271
|
+
results.append(
|
272
|
+
{
|
273
|
+
"source": basename,
|
274
|
+
"target": target_basename,
|
275
|
+
"source_size": source_size,
|
276
|
+
"target_size": target_size,
|
277
|
+
"source_compression": "NONE",
|
278
|
+
"target_compression": "GZIP",
|
279
|
+
"status": "UPLOADED",
|
280
|
+
"message": "",
|
281
|
+
}
|
282
|
+
)
|
283
|
+
return results
|
@@ -7,6 +7,7 @@ from typing import ClassVar, cast
|
|
7
7
|
import sqlglot
|
8
8
|
from sqlglot import exp
|
9
9
|
|
10
|
+
from fakesnow.params import MutableParams, pop_qmark_param
|
10
11
|
from fakesnow.variables import Variables
|
11
12
|
|
12
13
|
SUCCESS_NOP = sqlglot.parse_one("SELECT 'Statement executed successfully.' as status")
|
@@ -230,7 +231,7 @@ def describe_table(
|
|
230
231
|
catalog = table.catalog or current_database
|
231
232
|
schema = table.db or current_schema
|
232
233
|
|
233
|
-
if schema
|
234
|
+
if schema == "_FS_INFORMATION_SCHEMA":
|
234
235
|
# describing an information_schema view
|
235
236
|
# (schema already transformed from information_schema -> _fs_information_schema)
|
236
237
|
return sqlglot.parse_one(SQL_DESCRIBE_INFO_SCHEMA.substitute(view=f"{schema}.{table.name}"), read="duckdb")
|
@@ -537,22 +538,47 @@ def float_to_double(expression: exp.Expression) -> exp.Expression:
|
|
537
538
|
return expression
|
538
539
|
|
539
540
|
|
540
|
-
def identifier(expression: exp.Expression) -> exp.Expression:
|
541
|
-
"""Convert identifier function to an identifier.
|
541
|
+
def identifier(expression: exp.Expression, params: MutableParams | None) -> exp.Expression:
|
542
|
+
"""Convert identifier function to an identifier or table.
|
542
543
|
|
543
544
|
See https://docs.snowflake.com/en/sql-reference/identifier-literal
|
544
545
|
"""
|
545
546
|
|
546
547
|
if (
|
547
|
-
isinstance(expression, exp.
|
548
|
-
and isinstance(expression.this,
|
549
|
-
and expression.this.
|
548
|
+
isinstance(expression, exp.Table)
|
549
|
+
and isinstance(expression.this, exp.Anonymous)
|
550
|
+
and isinstance(expression.this.this, str)
|
551
|
+
and expression.this.this.upper() == "IDENTIFIER"
|
550
552
|
):
|
551
|
-
arg = expression.expressions[0]
|
553
|
+
arg = expression.this.expressions[0]
|
554
|
+
|
552
555
|
# ? is parsed as exp.Placeholder
|
553
|
-
if isinstance(arg, exp.Placeholder)
|
554
|
-
|
555
|
-
|
556
|
+
val: str = pop_qmark_param(params, arg.root(), arg) if isinstance(arg, exp.Placeholder) else arg.this
|
557
|
+
|
558
|
+
# If the whole identifier is quoted, treat as a single quoted identifier inside a Table node
|
559
|
+
if val.startswith('"') and val.endswith('"'):
|
560
|
+
return exp.Table(this=exp.Identifier(this=val[1:-1], quoted=True))
|
561
|
+
|
562
|
+
# Split a dotted identifier string into parts, identifying and stripping quoted segments
|
563
|
+
parts = [(p[1:-1], True) if p.startswith('"') and p.endswith('"') else (p, False) for p in val.split(".")]
|
564
|
+
if len(parts) == 1:
|
565
|
+
return exp.Table(this=exp.Identifier(this=parts[0][0], quoted=parts[0][1]))
|
566
|
+
elif len(parts) == 2:
|
567
|
+
# db.table
|
568
|
+
return exp.Table(
|
569
|
+
this=exp.Identifier(this=parts[1][0], quoted=parts[1][1]),
|
570
|
+
db=exp.Identifier(this=parts[0][0], quoted=parts[0][1]),
|
571
|
+
)
|
572
|
+
elif len(parts) == 3:
|
573
|
+
# catalog.db.table
|
574
|
+
return exp.Table(
|
575
|
+
this=exp.Identifier(this=parts[2][0], quoted=parts[2][1]),
|
576
|
+
db=exp.Identifier(this=parts[1][0], quoted=parts[1][1]),
|
577
|
+
catalog=exp.Identifier(this=parts[0][0], quoted=parts[0][1]),
|
578
|
+
)
|
579
|
+
else:
|
580
|
+
# fallback: treat as a single identifier
|
581
|
+
return exp.Table(this=exp.Identifier(this=val, quoted=False))
|
556
582
|
return expression
|
557
583
|
|
558
584
|
|
@@ -600,10 +626,10 @@ def information_schema_fs(expression: exp.Expression) -> exp.Expression:
|
|
600
626
|
|
601
627
|
if (
|
602
628
|
isinstance(expression, exp.Table)
|
603
|
-
and expression.db
|
604
|
-
and expression.name
|
629
|
+
and expression.db == "INFORMATION_SCHEMA"
|
630
|
+
and expression.name in {"COLUMNS", "TABLES", "VIEWS", "LOAD_HISTORY"}
|
605
631
|
):
|
606
|
-
expression.set("this", exp.Identifier(this=f"_FS_{expression.name
|
632
|
+
expression.set("this", exp.Identifier(this=f"_FS_{expression.name}", quoted=False))
|
607
633
|
expression.set("db", exp.Identifier(this="_FS_INFORMATION_SCHEMA", quoted=False))
|
608
634
|
|
609
635
|
return expression
|
@@ -615,11 +641,8 @@ def information_schema_databases(
|
|
615
641
|
) -> exp.Expression:
|
616
642
|
if (
|
617
643
|
isinstance(expression, exp.Table)
|
618
|
-
and (
|
619
|
-
|
620
|
-
or (current_schema and current_schema.upper() == "INFORMATION_SCHEMA")
|
621
|
-
)
|
622
|
-
and expression.name.upper() == "DATABASES"
|
644
|
+
and (expression.db == "INFORMATION_SCHEMA" or (current_schema and current_schema == "INFORMATION_SCHEMA"))
|
645
|
+
and expression.name == "DATABASES"
|
623
646
|
):
|
624
647
|
return exp.Table(
|
625
648
|
this=exp.Identifier(this="DATABASES", quoted=False),
|
@@ -978,11 +1001,7 @@ def to_date(expression: exp.Expression) -> exp.Expression:
|
|
978
1001
|
exp.Expression: The transformed expression.
|
979
1002
|
"""
|
980
1003
|
|
981
|
-
if (
|
982
|
-
isinstance(expression, exp.Anonymous)
|
983
|
-
and isinstance(expression.this, str)
|
984
|
-
and expression.this.upper() == "TO_DATE"
|
985
|
-
):
|
1004
|
+
if isinstance(expression, exp.Anonymous) and expression.name.upper() == "TO_DATE":
|
986
1005
|
return exp.Cast(
|
987
1006
|
this=expression.expressions[0],
|
988
1007
|
to=exp.DataType(this=exp.DataType.Type.DATE, nested=False, prefix=False),
|
@@ -1066,11 +1085,7 @@ def to_decimal(expression: exp.Expression) -> exp.Expression:
|
|
1066
1085
|
to=exp.DataType(this=exp.DataType.Type.DECIMAL, expressions=[precision, scale], nested=False, prefix=False),
|
1067
1086
|
)
|
1068
1087
|
|
1069
|
-
if (
|
1070
|
-
isinstance(expression, exp.Anonymous)
|
1071
|
-
and isinstance(expression.this, str)
|
1072
|
-
and expression.this.upper() in ["TO_DECIMAL", "TO_NUMERIC"]
|
1073
|
-
):
|
1088
|
+
if isinstance(expression, exp.Anonymous) and expression.name.upper() in ["TO_DECIMAL", "TO_NUMERIC"]:
|
1074
1089
|
return _to_decimal(expression, exp.Cast)
|
1075
1090
|
|
1076
1091
|
return expression
|
@@ -1081,11 +1096,11 @@ def try_to_decimal(expression: exp.Expression) -> exp.Expression:
|
|
1081
1096
|
See https://docs.snowflake.com/en/sql-reference/functions/try_to_decimal
|
1082
1097
|
"""
|
1083
1098
|
|
1084
|
-
if (
|
1085
|
-
|
1086
|
-
|
1087
|
-
|
1088
|
-
|
1099
|
+
if isinstance(expression, exp.Anonymous) and expression.name.upper() in [
|
1100
|
+
"TRY_TO_DECIMAL",
|
1101
|
+
"TRY_TO_NUMBER",
|
1102
|
+
"TRY_TO_NUMERIC",
|
1103
|
+
]:
|
1089
1104
|
return _to_decimal(expression, exp.TryCast)
|
1090
1105
|
|
1091
1106
|
return expression
|
@@ -1111,9 +1126,7 @@ def to_timestamp_ntz(expression: exp.Expression) -> exp.Expression:
|
|
1111
1126
|
Because it's not yet supported by sqlglot, see https://github.com/tobymao/sqlglot/issues/2748
|
1112
1127
|
"""
|
1113
1128
|
|
1114
|
-
if isinstance(expression, exp.Anonymous) and (
|
1115
|
-
isinstance(expression.this, str) and expression.this.upper() == "TO_TIMESTAMP_NTZ"
|
1116
|
-
):
|
1129
|
+
if isinstance(expression, exp.Anonymous) and expression.name.upper() == "TO_TIMESTAMP_NTZ":
|
1117
1130
|
return exp.StrToTime(
|
1118
1131
|
this=expression.expressions[0],
|
1119
1132
|
format=exp.Literal(this="%Y-%m-%d %H:%M:%S", is_string=True),
|
@@ -1164,11 +1177,7 @@ def try_parse_json(expression: exp.Expression) -> exp.Expression:
|
|
1164
1177
|
exp.Expression: The transformed expression.
|
1165
1178
|
"""
|
1166
1179
|
|
1167
|
-
if (
|
1168
|
-
isinstance(expression, exp.Anonymous)
|
1169
|
-
and isinstance(expression.this, str)
|
1170
|
-
and expression.this.upper() == "TRY_PARSE_JSON"
|
1171
|
-
):
|
1180
|
+
if isinstance(expression, exp.Anonymous) and expression.name.upper() == "TRY_PARSE_JSON":
|
1172
1181
|
expressions = expression.expressions
|
1173
1182
|
return exp.TryCast(
|
1174
1183
|
this=expressions[0],
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: fakesnow
|
3
|
-
Version: 0.9.
|
3
|
+
Version: 0.9.43
|
4
4
|
Summary: Fake Snowflake Connector for Python. Run, mock and test Snowflake DB locally.
|
5
5
|
License: Apache License
|
6
6
|
Version 2.0, January 2004
|
@@ -420,7 +420,7 @@ Fully supported:
|
|
420
420
|
- Multiple databases
|
421
421
|
- [Parameter binding](https://docs.snowflake.com/en/user-guide/python-connector-example#binding-data) in queries
|
422
422
|
- Table comments
|
423
|
-
- Pandas integration including [write_pandas(..)](https://docs.snowflake.com/en/user-guide/python-connector-api#write_pandas)
|
423
|
+
- Pandas integration including [write_pandas(..)](https://docs.snowflake.com/en/user-guide/python-connector-api#write_pandas)
|
424
424
|
- Result batch retrieval via [get_result_batches()](https://docs.snowflake.com/en/user-guide/python-connector-api#get_result_batches)
|
425
425
|
- HTTP server for non-Python connectors
|
426
426
|
|
@@ -431,7 +431,8 @@ Partially supported:
|
|
431
431
|
- Semi-structured data operations
|
432
432
|
- Tags
|
433
433
|
- User management
|
434
|
-
-
|
434
|
+
- Stages and PUT
|
435
|
+
- `COPY INTO` from S3 sources and stages, see [COPY INTO](#copy-into)
|
435
436
|
|
436
437
|
Not yet implemented:
|
437
438
|
|
@@ -447,7 +448,7 @@ For more detail see the [test suite](tests/).
|
|
447
448
|
|
448
449
|
## COPY INTO
|
449
450
|
|
450
|
-
`COPY INTO` can be used from S3 sources. By default the standard AWS credential chain will be used. If you are getting an HTTP 403 or need to provide alternative S3 credentials you can use the duckdb [CREATE SECRET](https://duckdb.org/docs/stable/extensions/httpfs/s3api) statement. For an example of creating a secret to use a moto S3 endpoint see `s3_client` in [conftest.py](tests/conftest.py#L80)
|
451
|
+
`COPY INTO` can be used from S3 sources and stages. By default the standard AWS credential chain will be used. If you are getting an HTTP 403 or need to provide alternative S3 credentials you can use the duckdb [CREATE SECRET](https://duckdb.org/docs/stable/extensions/httpfs/s3api) statement. For an example of creating a secret to use a moto S3 endpoint see `s3_client` in [conftest.py](tests/conftest.py#L80)
|
451
452
|
|
452
453
|
## Contributing
|
453
454
|
|
@@ -5,9 +5,9 @@ fakesnow/checks.py,sha256=bOJPMp46AvjJV_bXXjx2njO2dXNjffLrznwRuKyYZ4g,2889
|
|
5
5
|
fakesnow/cli.py,sha256=9qfI-Ssr6mo8UmIlXkUAOz2z2YPBgDsrEVaZv9FjGFs,2201
|
6
6
|
fakesnow/conn.py,sha256=diCwcjaCBrlCn9PyjbScfIQTNQjqiPTkQanUTqcvblE,6009
|
7
7
|
fakesnow/converter.py,sha256=wPOfsFXIUJNJSx5oFNAxh13udxmAVIIHsLK8BiGkXGA,1635
|
8
|
-
fakesnow/copy_into.py,sha256=
|
9
|
-
fakesnow/cursor.py,sha256=
|
10
|
-
fakesnow/expr.py,sha256=
|
8
|
+
fakesnow/copy_into.py,sha256=utlV03RWHdWblIlKs92Q__9BWHi_gvqT76RV6tEMWTo,16006
|
9
|
+
fakesnow/cursor.py,sha256=A1jWLGkXzHcYbM0wFrbh2PiewzFFS7ZlfOm-x3AGHgg,25412
|
10
|
+
fakesnow/expr.py,sha256=tLcHC-mx2O6-vCt77RVlRQa3CovqUdu4vc5WOSmuQdk,1231
|
11
11
|
fakesnow/fakes.py,sha256=JQTiUkkwPeQrJ8FDWhPFPK6pGwd_aR2oiOrNzCWznlM,187
|
12
12
|
fakesnow/fixtures.py,sha256=2rj0MTZlaZc4PNWhaqC5IiiLa7E9G0QZT3g45YawsL0,633
|
13
13
|
fakesnow/info_schema.py,sha256=lqEYD5aWK2MamjALbj6ct7pz_1yyAq3tAk51kLa8NKk,9872
|
@@ -15,19 +15,20 @@ fakesnow/instance.py,sha256=OKoYXwaI6kL9HQpnHx44yzpON_xNfuIT_F4oJNF_XXQ,2114
|
|
15
15
|
fakesnow/logger.py,sha256=U6EjUENQuTrDeNYqER2hxazoySmXzLmZJ-t-SDZgjkg,363
|
16
16
|
fakesnow/macros.py,sha256=lxtznTCYryjecFkwswbqWMzCVamDLWyQZRKWtkWCWEk,1397
|
17
17
|
fakesnow/pandas_tools.py,sha256=wI203UQHC8JvDzxE_VjE1NeV4rThek2P-u52oTg2foo,3481
|
18
|
+
fakesnow/params.py,sha256=Hp7tBiDycdOh8LuRoISsOZycD7DufDAMKyF_hqEjh6M,929
|
18
19
|
fakesnow/py.typed,sha256=B-DLSjYBi7pkKjwxCSdpVj2J02wgfJr-E7B1wOUyxYU,80
|
19
20
|
fakesnow/rowtype.py,sha256=QUp8EaXD5LT0Xv8BXk5ze4WseEn52xoJ6R05pJjs5mM,2729
|
20
|
-
fakesnow/server.py,sha256=
|
21
|
+
fakesnow/server.py,sha256=NzhpVM_3L9QeG84bhn0_It6ABM9ifs1CYWPgKhWaOeE,7511
|
21
22
|
fakesnow/variables.py,sha256=BGnD4LAdVByfJ2GXL6qpGBaTF8ZJRjt3pdJsd9sIAcw,3134
|
22
|
-
fakesnow/transforms/__init__.py,sha256=
|
23
|
+
fakesnow/transforms/__init__.py,sha256=3dpcXjeLkfc4e9rO0G8Mnv9zZgmwsKSwQ7UJeyLYnOg,2837
|
23
24
|
fakesnow/transforms/merge.py,sha256=H2yYyzGsxjpggS6PY91rAUtYCFuOkBm8uGi0EO0r6T0,8375
|
24
|
-
fakesnow/transforms/show.py,sha256=
|
25
|
-
fakesnow/transforms/stage.py,sha256=
|
26
|
-
fakesnow/transforms/transforms.py,sha256=
|
27
|
-
fakesnow-0.9.
|
25
|
+
fakesnow/transforms/show.py,sha256=NIJDkf4-ns4VTw0DQphEn2DnwxQjXyTU2wS9VSUh6Wc,19919
|
26
|
+
fakesnow/transforms/stage.py,sha256=VRE122Twha1Dd7QC2rdAAWWG93OxIiJZv2DfvGSsMZ8,10047
|
27
|
+
fakesnow/transforms/transforms.py,sha256=ykzay7J28MsifhRKhm5qJJd__hMcLNVZTT14MSdWcJs,49102
|
28
|
+
fakesnow-0.9.43.dist-info/licenses/LICENSE,sha256=kW-7NWIyaRMQiDpryfSmF2DObDZHGR1cJZ39s6B1Svg,11344
|
28
29
|
tools/decode.py,sha256=kC5kUvLQxdCkMRsnH6BqCajlKxKeN77w6rwCKsY6gqU,1781
|
29
|
-
fakesnow-0.9.
|
30
|
-
fakesnow-0.9.
|
31
|
-
fakesnow-0.9.
|
32
|
-
fakesnow-0.9.
|
33
|
-
fakesnow-0.9.
|
30
|
+
fakesnow-0.9.43.dist-info/METADATA,sha256=tUAcWIfezqI1Lu64-sz8VToB4bFIyfaEFyOPCJpDREY,20684
|
31
|
+
fakesnow-0.9.43.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
32
|
+
fakesnow-0.9.43.dist-info/entry_points.txt,sha256=2riAUgu928ZIHawtO8EsfrMEJhi-EH-z_Vq7Q44xKPM,47
|
33
|
+
fakesnow-0.9.43.dist-info/top_level.txt,sha256=Yos7YveA3f03xVYuURqnBsfMV2DePXfu_yGcsj3pPzI,30
|
34
|
+
fakesnow-0.9.43.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|