duckdb-sqlalchemy 1.4.4.1__py3-none-any.whl → 1.4.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- duckdb_sqlalchemy/__init__.py +56 -33
- duckdb_sqlalchemy/_validation.py +38 -0
- duckdb_sqlalchemy/bulk.py +20 -6
- duckdb_sqlalchemy/config.py +7 -4
- duckdb_sqlalchemy/tests/test_basic.py +4 -2
- duckdb_sqlalchemy/tests/test_core_units.py +84 -3
- {duckdb_sqlalchemy-1.4.4.1.dist-info → duckdb_sqlalchemy-1.4.4.2.dist-info}/METADATA +34 -3
- {duckdb_sqlalchemy-1.4.4.1.dist-info → duckdb_sqlalchemy-1.4.4.2.dist-info}/RECORD +11 -10
- {duckdb_sqlalchemy-1.4.4.1.dist-info → duckdb_sqlalchemy-1.4.4.2.dist-info}/WHEEL +0 -0
- {duckdb_sqlalchemy-1.4.4.1.dist-info → duckdb_sqlalchemy-1.4.4.2.dist-info}/entry_points.txt +0 -0
- {duckdb_sqlalchemy-1.4.4.1.dist-info → duckdb_sqlalchemy-1.4.4.2.dist-info}/licenses/LICENSE.txt +0 -0
duckdb_sqlalchemy/__init__.py
CHANGED
|
@@ -15,6 +15,7 @@ from typing import (
|
|
|
15
15
|
Sequence,
|
|
16
16
|
Tuple,
|
|
17
17
|
Type,
|
|
18
|
+
cast,
|
|
18
19
|
)
|
|
19
20
|
|
|
20
21
|
import duckdb
|
|
@@ -38,6 +39,7 @@ from sqlalchemy.sql import bindparam
|
|
|
38
39
|
from sqlalchemy.sql.selectable import Select
|
|
39
40
|
|
|
40
41
|
from ._supports import has_comment_support
|
|
42
|
+
from ._validation import validate_extension_name
|
|
41
43
|
from .bulk import copy_from_csv, copy_from_parquet, copy_from_rows
|
|
42
44
|
from .capabilities import get_capabilities
|
|
43
45
|
from .config import apply_config, get_core_config
|
|
@@ -56,11 +58,15 @@ from .olap import read_csv, read_csv_auto, read_parquet, table_function
|
|
|
56
58
|
from .url import URL, make_url
|
|
57
59
|
|
|
58
60
|
try:
|
|
59
|
-
from sqlalchemy.dialects.postgresql
|
|
61
|
+
from sqlalchemy.dialects.postgresql import base as _pg_base
|
|
60
62
|
except ImportError: # pragma: no cover - fallback for older SQLAlchemy
|
|
61
|
-
|
|
63
|
+
_PGExecutionContext = DefaultExecutionContext
|
|
64
|
+
else:
|
|
65
|
+
_PGExecutionContext = getattr(
|
|
66
|
+
_pg_base, "PGExecutionContext", DefaultExecutionContext
|
|
67
|
+
)
|
|
62
68
|
|
|
63
|
-
__version__ = "1.4.4.
|
|
69
|
+
__version__ = "1.4.4.2"
|
|
64
70
|
sqlalchemy_version = sqlalchemy.__version__
|
|
65
71
|
SQLALCHEMY_VERSION = Version(sqlalchemy_version)
|
|
66
72
|
SQLALCHEMY_2 = SQLALCHEMY_VERSION >= Version("2.0.0")
|
|
@@ -71,7 +77,9 @@ supports_user_agent: bool = _capabilities.supports_user_agent
|
|
|
71
77
|
|
|
72
78
|
if TYPE_CHECKING:
|
|
73
79
|
from sqlalchemy.engine import Connection
|
|
74
|
-
|
|
80
|
+
|
|
81
|
+
ReflectedCheckConstraint = Dict[str, Any]
|
|
82
|
+
ReflectedIndex = Dict[str, Any]
|
|
75
83
|
|
|
76
84
|
from .capabilities import DuckDBCapabilities
|
|
77
85
|
|
|
@@ -318,7 +326,7 @@ class DuckDBArrowResult:
|
|
|
318
326
|
return iter(self._result)
|
|
319
327
|
|
|
320
328
|
|
|
321
|
-
class DuckDBExecutionContext(
|
|
329
|
+
class DuckDBExecutionContext(_PGExecutionContext):
|
|
322
330
|
@classmethod
|
|
323
331
|
def _init_compiled(
|
|
324
332
|
cls,
|
|
@@ -369,8 +377,9 @@ class DuckDBExecutionContext(PGExecutionContext):
|
|
|
369
377
|
arraysize = self.execution_options.get("duckdb_arraysize")
|
|
370
378
|
if arraysize is None:
|
|
371
379
|
arraysize = self.execution_options.get("arraysize")
|
|
372
|
-
|
|
373
|
-
|
|
380
|
+
cursor = getattr(self, "cursor", None)
|
|
381
|
+
if arraysize is not None and hasattr(cursor, "arraysize"):
|
|
382
|
+
cursor.arraysize = arraysize
|
|
374
383
|
result = super()._setup_result_proxy()
|
|
375
384
|
if self.execution_options.get("duckdb_arrow") and getattr(
|
|
376
385
|
result, "returns_rows", False
|
|
@@ -607,7 +616,7 @@ class Dialect(PGDialect_psycopg2):
|
|
|
607
616
|
conn = duckdb.connect(*cargs, **cparams)
|
|
608
617
|
|
|
609
618
|
for extension in preload_extensions:
|
|
610
|
-
conn.execute(f"LOAD {extension}")
|
|
619
|
+
conn.execute(f"LOAD {validate_extension_name(extension)}")
|
|
611
620
|
|
|
612
621
|
for filesystem in filesystems:
|
|
613
622
|
conn.register_filesystem(filesystem)
|
|
@@ -875,7 +884,7 @@ class Dialect(PGDialect_psycopg2):
|
|
|
875
884
|
|
|
876
885
|
@cache # type: ignore[call-arg]
|
|
877
886
|
def get_columns( # type: ignore[no-untyped-def]
|
|
878
|
-
self, connection: "Connection", table_name: str, schema=None, **kw: Any
|
|
887
|
+
self, connection: "Connection", table_name: str, schema=None, **kw: "Any"
|
|
879
888
|
):
|
|
880
889
|
try:
|
|
881
890
|
return super().get_columns(connection, table_name, schema=schema, **kw)
|
|
@@ -887,7 +896,7 @@ class Dialect(PGDialect_psycopg2):
|
|
|
887
896
|
|
|
888
897
|
@cache # type: ignore[call-arg]
|
|
889
898
|
def get_foreign_keys( # type: ignore[no-untyped-def]
|
|
890
|
-
self, connection: "Connection", table_name: str, schema=None, **kw: Any
|
|
899
|
+
self, connection: "Connection", table_name: str, schema=None, **kw: "Any"
|
|
891
900
|
):
|
|
892
901
|
try:
|
|
893
902
|
return super().get_foreign_keys(connection, table_name, schema=schema, **kw)
|
|
@@ -898,7 +907,7 @@ class Dialect(PGDialect_psycopg2):
|
|
|
898
907
|
|
|
899
908
|
@cache # type: ignore[call-arg]
|
|
900
909
|
def get_unique_constraints( # type: ignore[no-untyped-def]
|
|
901
|
-
self, connection: "Connection", table_name: str, schema=None, **kw: Any
|
|
910
|
+
self, connection: "Connection", table_name: str, schema=None, **kw: "Any"
|
|
902
911
|
):
|
|
903
912
|
try:
|
|
904
913
|
return super().get_unique_constraints(
|
|
@@ -911,7 +920,7 @@ class Dialect(PGDialect_psycopg2):
|
|
|
911
920
|
|
|
912
921
|
@cache # type: ignore[call-arg]
|
|
913
922
|
def get_check_constraints( # type: ignore[no-untyped-def]
|
|
914
|
-
self, connection: "Connection", table_name: str, schema=None, **kw: Any
|
|
923
|
+
self, connection: "Connection", table_name: str, schema=None, **kw: "Any"
|
|
915
924
|
):
|
|
916
925
|
try:
|
|
917
926
|
return super().get_check_constraints(
|
|
@@ -1019,7 +1028,7 @@ class Dialect(PGDialect_psycopg2):
|
|
|
1019
1028
|
import pandas as pd # type: ignore[import-not-found]
|
|
1020
1029
|
|
|
1021
1030
|
rows = parameters if isinstance(parameters, list) else list(parameters)
|
|
1022
|
-
data = pd.DataFrame(rows, columns=column_names)
|
|
1031
|
+
data = pd.DataFrame(rows, columns=cast(Any, column_names))
|
|
1023
1032
|
except Exception:
|
|
1024
1033
|
data = None
|
|
1025
1034
|
if data is None:
|
|
@@ -1119,16 +1128,25 @@ class Dialect(PGDialect_psycopg2):
|
|
|
1119
1128
|
|
|
1120
1129
|
self._execute_with_retry(cursor, statement, parameters, context, executor)
|
|
1121
1130
|
|
|
1122
|
-
def do_execute_no_params(
|
|
1123
|
-
|
|
1124
|
-
|
|
1125
|
-
|
|
1126
|
-
|
|
1127
|
-
|
|
1131
|
+
def do_execute_no_params(self, cursor: Any, statement: str, *args: Any) -> None:
|
|
1132
|
+
parameters: Any = None
|
|
1133
|
+
context: Optional[Any] = None
|
|
1134
|
+
if len(args) == 1:
|
|
1135
|
+
context = cast(Optional[Any], args[0])
|
|
1136
|
+
elif len(args) >= 2:
|
|
1137
|
+
parameters = args[0]
|
|
1138
|
+
context = cast(Optional[Any], args[1])
|
|
1139
|
+
|
|
1128
1140
|
def executor() -> Any:
|
|
1129
|
-
|
|
1141
|
+
if parameters is None:
|
|
1142
|
+
return DefaultDialect.do_execute_no_params(
|
|
1143
|
+
self, cursor, statement, context
|
|
1144
|
+
)
|
|
1145
|
+
return DefaultDialect.do_execute(
|
|
1146
|
+
self, cursor, statement, parameters, context
|
|
1147
|
+
)
|
|
1130
1148
|
|
|
1131
|
-
self._execute_with_retry(cursor, statement,
|
|
1149
|
+
self._execute_with_retry(cursor, statement, parameters, context, executor)
|
|
1132
1150
|
|
|
1133
1151
|
def _pg_class_filter_scope_schema(
|
|
1134
1152
|
self,
|
|
@@ -1160,10 +1178,10 @@ class Dialect(PGDialect_psycopg2):
|
|
|
1160
1178
|
# reflection to avoid Catalog Errors during SQLAlchemy 2.x reflection.
|
|
1161
1179
|
from sqlalchemy.dialects.postgresql import base as pg_base
|
|
1162
1180
|
|
|
1163
|
-
pg_catalog = pg_base
|
|
1164
|
-
REGCLASS = pg_base
|
|
1165
|
-
TEXT = pg_base
|
|
1166
|
-
OID = pg_base
|
|
1181
|
+
pg_catalog = getattr(pg_base, "pg_catalog")
|
|
1182
|
+
REGCLASS = getattr(pg_base, "REGCLASS")
|
|
1183
|
+
TEXT = getattr(pg_base, "TEXT")
|
|
1184
|
+
OID = getattr(pg_base, "OID")
|
|
1167
1185
|
|
|
1168
1186
|
server_version_info = self.server_version_info or (0,)
|
|
1169
1187
|
|
|
@@ -1241,7 +1259,7 @@ class Dialect(PGDialect_psycopg2):
|
|
|
1241
1259
|
|
|
1242
1260
|
collate = sql.null().label("collation")
|
|
1243
1261
|
|
|
1244
|
-
relkinds =
|
|
1262
|
+
relkinds = getattr(super(), "_kind_to_relkinds")(kind)
|
|
1245
1263
|
query = (
|
|
1246
1264
|
select(
|
|
1247
1265
|
pg_catalog.pg_attribute.c.attname.label("name"),
|
|
@@ -1275,7 +1293,7 @@ class Dialect(PGDialect_psycopg2):
|
|
|
1275
1293
|
== pg_catalog.pg_attribute.c.attnum,
|
|
1276
1294
|
),
|
|
1277
1295
|
)
|
|
1278
|
-
.where(
|
|
1296
|
+
.where(getattr(super(), "_pg_class_relkind_condition")(relkinds))
|
|
1279
1297
|
.order_by(pg_catalog.pg_class.c.relname, pg_catalog.pg_attribute.c.attnum)
|
|
1280
1298
|
)
|
|
1281
1299
|
query = self._pg_class_filter_scope_schema(query, schema, scope=scope)
|
|
@@ -1339,15 +1357,20 @@ class Dialect(PGDialect_psycopg2):
|
|
|
1339
1357
|
|
|
1340
1358
|
# dictionary with (name, ) if default search path or (schema, name)
|
|
1341
1359
|
# as keys
|
|
1360
|
+
load_enums = getattr(self, "_load_enums")
|
|
1361
|
+
try:
|
|
1362
|
+
enum_records = load_enums(
|
|
1363
|
+
connection, schema="*", info_cache=kw.get("info_cache")
|
|
1364
|
+
)
|
|
1365
|
+
except TypeError:
|
|
1366
|
+
enum_records = load_enums(connection, schema="*")
|
|
1342
1367
|
enums = dict(
|
|
1343
1368
|
(
|
|
1344
1369
|
((rec["name"],), rec)
|
|
1345
1370
|
if rec["visible"]
|
|
1346
1371
|
else ((rec["schema"], rec["name"]), rec)
|
|
1347
1372
|
)
|
|
1348
|
-
for rec in
|
|
1349
|
-
connection, schema="*", info_cache=kw.get("info_cache")
|
|
1350
|
-
)
|
|
1373
|
+
for rec in enum_records
|
|
1351
1374
|
)
|
|
1352
1375
|
|
|
1353
1376
|
columns = self._get_columns_info(rows, domains, enums, schema) # type: ignore[attr-defined]
|
|
@@ -1361,9 +1384,9 @@ class Dialect(PGDialect_psycopg2):
|
|
|
1361
1384
|
self, schema: str, has_filter_names: bool, scope: Any, kind: Any
|
|
1362
1385
|
):
|
|
1363
1386
|
if SQLALCHEMY_VERSION >= Version("2.0.36"):
|
|
1364
|
-
from sqlalchemy.dialects.postgresql import
|
|
1365
|
-
|
|
1366
|
-
)
|
|
1387
|
+
from sqlalchemy.dialects.postgresql import base as pg_base
|
|
1388
|
+
|
|
1389
|
+
pg_catalog = getattr(pg_base, "pg_catalog")
|
|
1367
1390
|
|
|
1368
1391
|
if (
|
|
1369
1392
|
hasattr(super(), "_kind_to_relkinds")
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from typing import Iterable
|
|
3
|
+
|
|
4
|
+
IDENTIFIER_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")
|
|
5
|
+
EXTENSION_RE = re.compile(r"^[A-Za-z0-9_]+$")
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def validate_identifier(value: str, *, kind: str = "identifier") -> str:
|
|
9
|
+
if not isinstance(value, str):
|
|
10
|
+
raise ValueError(f"{kind} must be a string")
|
|
11
|
+
if not IDENTIFIER_RE.fullmatch(value):
|
|
12
|
+
raise ValueError(f"invalid {kind}: {value!r}")
|
|
13
|
+
return value
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def validate_dotted_identifier(value: str, *, kind: str = "identifier") -> str:
|
|
17
|
+
if not isinstance(value, str):
|
|
18
|
+
raise ValueError(f"{kind} must be a string")
|
|
19
|
+
parts = value.split(".")
|
|
20
|
+
if not parts or any(not part for part in parts):
|
|
21
|
+
raise ValueError(f"invalid {kind}: {value!r}")
|
|
22
|
+
for part in parts:
|
|
23
|
+
validate_identifier(part, kind=kind)
|
|
24
|
+
return value
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def validate_extension_name(value: str) -> str:
|
|
28
|
+
if not isinstance(value, str):
|
|
29
|
+
raise ValueError("extension name must be a string")
|
|
30
|
+
if not EXTENSION_RE.fullmatch(value):
|
|
31
|
+
raise ValueError(f"invalid extension name: {value!r}")
|
|
32
|
+
return value
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def validate_identifier_list(
|
|
36
|
+
values: Iterable[str], *, kind: str = "identifier"
|
|
37
|
+
) -> tuple[str, ...]:
|
|
38
|
+
return tuple(validate_identifier(value, kind=kind) for value in values)
|
duckdb_sqlalchemy/bulk.py
CHANGED
|
@@ -3,6 +3,12 @@ import tempfile
|
|
|
3
3
|
from pathlib import Path
|
|
4
4
|
from typing import Any, Iterable, Mapping, Optional, Sequence, Tuple, Union
|
|
5
5
|
|
|
6
|
+
from ._validation import (
|
|
7
|
+
validate_dotted_identifier,
|
|
8
|
+
validate_identifier,
|
|
9
|
+
validate_identifier_list,
|
|
10
|
+
)
|
|
11
|
+
|
|
6
12
|
TableLike = Union[str, Any]
|
|
7
13
|
|
|
8
14
|
|
|
@@ -25,7 +31,7 @@ def _format_copy_options(options: Mapping[str, Any]) -> str:
|
|
|
25
31
|
for key, value in options.items():
|
|
26
32
|
if value is None:
|
|
27
33
|
continue
|
|
28
|
-
opt_key = str(key).upper()
|
|
34
|
+
opt_key = validate_identifier(str(key), kind="COPY option key").upper()
|
|
29
35
|
if isinstance(value, (list, tuple)):
|
|
30
36
|
inner = ", ".join(_quote_literal(v) for v in value)
|
|
31
37
|
parts.append(f"{opt_key} ({inner})")
|
|
@@ -46,21 +52,28 @@ def _format_table(connection: Any, table: TableLike) -> str:
|
|
|
46
52
|
schema = getattr(table, "schema", None)
|
|
47
53
|
name = getattr(table, "name", None)
|
|
48
54
|
if schema:
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
55
|
+
schema_name = validate_dotted_identifier(
|
|
56
|
+
str(schema), kind="table schema identifier"
|
|
57
|
+
)
|
|
58
|
+
table_name = validate_identifier(str(name), kind="table identifier")
|
|
59
|
+
return f"{schema_name}.{table_name}"
|
|
60
|
+
return validate_identifier(str(name), kind="table identifier")
|
|
61
|
+
table_name = str(table)
|
|
62
|
+
validate_dotted_identifier(table_name, kind="table identifier")
|
|
63
|
+
return table_name
|
|
52
64
|
|
|
53
65
|
|
|
54
66
|
def _format_columns(connection: Any, columns: Optional[Sequence[str]]) -> str:
|
|
55
67
|
if not columns:
|
|
56
68
|
return ""
|
|
69
|
+
validated_columns = validate_identifier_list(columns, kind="column identifier")
|
|
57
70
|
preparer = getattr(
|
|
58
71
|
getattr(connection, "dialect", None), "identifier_preparer", None
|
|
59
72
|
)
|
|
60
73
|
if preparer is None:
|
|
61
|
-
cols = ", ".join(
|
|
74
|
+
cols = ", ".join(validated_columns)
|
|
62
75
|
else:
|
|
63
|
-
cols = ", ".join(preparer.quote_identifier(col) for col in
|
|
76
|
+
cols = ", ".join(preparer.quote_identifier(col) for col in validated_columns)
|
|
64
77
|
return f" ({cols})"
|
|
65
78
|
|
|
66
79
|
|
|
@@ -115,6 +128,7 @@ def _copy_from_file(
|
|
|
115
128
|
columns: Optional[Sequence[str]] = None,
|
|
116
129
|
**options: Any,
|
|
117
130
|
) -> Any:
|
|
131
|
+
validate_identifier(format_name, kind="COPY format")
|
|
118
132
|
table_name = _format_table(connection, table)
|
|
119
133
|
column_clause = _format_columns(connection, columns)
|
|
120
134
|
path_literal = _quote_literal(path)
|
duckdb_sqlalchemy/config.py
CHANGED
|
@@ -1,13 +1,15 @@
|
|
|
1
1
|
import os
|
|
2
2
|
from decimal import Decimal
|
|
3
3
|
from functools import lru_cache
|
|
4
|
-
from typing import Dict, Set, Type, Union
|
|
4
|
+
from typing import Any, Dict, Set, Type, Union
|
|
5
5
|
|
|
6
6
|
import duckdb
|
|
7
7
|
from sqlalchemy import Boolean, Float, Integer, String
|
|
8
8
|
from sqlalchemy.engine import Dialect
|
|
9
9
|
from sqlalchemy.sql.type_api import TypeEngine
|
|
10
10
|
|
|
11
|
+
from ._validation import validate_identifier
|
|
12
|
+
|
|
11
13
|
TYPES: Dict[Type, TypeEngine] = {
|
|
12
14
|
bool: Boolean(),
|
|
13
15
|
int: Integer(),
|
|
@@ -37,7 +39,7 @@ def get_core_config() -> Set[str]:
|
|
|
37
39
|
|
|
38
40
|
def apply_config(
|
|
39
41
|
dialect: Dialect,
|
|
40
|
-
conn:
|
|
42
|
+
conn: Any,
|
|
41
43
|
ext: Dict[str, Union[str, int, bool, float, None]],
|
|
42
44
|
) -> None:
|
|
43
45
|
# TODO: does sqlalchemy have something that could do this for us?
|
|
@@ -48,8 +50,9 @@ def apply_config(
|
|
|
48
50
|
string_processor = String().literal_processor(dialect=dialect)
|
|
49
51
|
|
|
50
52
|
for k, v in ext.items():
|
|
53
|
+
key = validate_identifier(k, kind="config key")
|
|
51
54
|
if v is None:
|
|
52
|
-
conn.execute(f"SET {
|
|
55
|
+
conn.execute(f"SET {key} = NULL")
|
|
53
56
|
continue
|
|
54
57
|
if isinstance(v, os.PathLike):
|
|
55
58
|
v = os.fspath(v)
|
|
@@ -67,4 +70,4 @@ def apply_config(
|
|
|
67
70
|
v = str(v)
|
|
68
71
|
process = string_processor
|
|
69
72
|
assert process, f"Not able to configure {k} with {v}"
|
|
70
|
-
conn.execute(f"SET {
|
|
73
|
+
conn.execute(f"SET {key} = {process(v)}")
|
|
@@ -37,6 +37,7 @@ from sqlalchemy.engine.reflection import Inspector
|
|
|
37
37
|
from sqlalchemy.exc import DBAPIError
|
|
38
38
|
from sqlalchemy.ext.declarative import declarative_base
|
|
39
39
|
from sqlalchemy.orm import Session, relationship, sessionmaker
|
|
40
|
+
from sqlalchemy.pool import QueuePool
|
|
40
41
|
|
|
41
42
|
from .. import Dialect, insert, supports_attach, supports_user_agent
|
|
42
43
|
from .._supports import has_comment_support
|
|
@@ -572,7 +573,7 @@ def test_do_ping(tmp_path: Path, caplog: LogCaptureFixture) -> None:
|
|
|
572
573
|
"duckdb:///" + str(tmp_path / "db"),
|
|
573
574
|
pool_pre_ping=True,
|
|
574
575
|
pool_size=1,
|
|
575
|
-
poolclass=
|
|
576
|
+
poolclass=QueuePool,
|
|
576
577
|
)
|
|
577
578
|
|
|
578
579
|
logger = cast(logging.Logger, engine.pool.logger) # type: ignore
|
|
@@ -615,7 +616,8 @@ def test_361(engine: Engine) -> None:
|
|
|
615
616
|
|
|
616
617
|
metadata = MetaData()
|
|
617
618
|
metadata.reflect(bind=conn)
|
|
618
|
-
|
|
619
|
+
tables = cast(dict[str, Table], metadata.tables)
|
|
620
|
+
test = tables["test"]
|
|
619
621
|
part = "year"
|
|
620
622
|
date_part = func.date_part(part, test.c.dt)
|
|
621
623
|
|
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
from
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
from typing import Any, cast
|
|
2
3
|
from urllib.parse import parse_qs
|
|
3
4
|
|
|
4
5
|
import duckdb
|
|
@@ -7,6 +8,7 @@ from sqlalchemy import Integer, String, pool
|
|
|
7
8
|
from sqlalchemy import exc as sa_exc
|
|
8
9
|
from sqlalchemy.engine import URL as SAURL
|
|
9
10
|
|
|
11
|
+
import duckdb_sqlalchemy
|
|
10
12
|
from duckdb_sqlalchemy import (
|
|
11
13
|
URL,
|
|
12
14
|
ConnectionWrapper,
|
|
@@ -28,6 +30,7 @@ from duckdb_sqlalchemy import (
|
|
|
28
30
|
)
|
|
29
31
|
from duckdb_sqlalchemy import datatypes as dt
|
|
30
32
|
from duckdb_sqlalchemy import motherduck as md
|
|
33
|
+
from duckdb_sqlalchemy.bulk import copy_from_csv
|
|
31
34
|
from duckdb_sqlalchemy.config import TYPES, apply_config, get_core_config
|
|
32
35
|
|
|
33
36
|
|
|
@@ -472,15 +475,93 @@ def test_struct_or_union_requires_fields() -> None:
|
|
|
472
475
|
preparer = dialect.identifier_preparer
|
|
473
476
|
|
|
474
477
|
with pytest.raises(sa_exc.CompileError):
|
|
475
|
-
dt.struct_or_union(dt.Struct(), compiler, preparer)
|
|
478
|
+
dt.struct_or_union(dt.Struct(), cast(Any, compiler), preparer)
|
|
476
479
|
|
|
477
480
|
struct = dt.Struct({"first name": String, "age": Integer})
|
|
478
|
-
rendered = dt.struct_or_union(struct, compiler, preparer)
|
|
481
|
+
rendered = dt.struct_or_union(struct, cast(Any, compiler), preparer)
|
|
479
482
|
assert rendered.startswith("(")
|
|
480
483
|
assert rendered.endswith(")")
|
|
481
484
|
assert '"first name"' in rendered
|
|
482
485
|
|
|
483
486
|
|
|
487
|
+
def test_apply_config_rejects_invalid_key_no_side_effect() -> None:
|
|
488
|
+
conn = duckdb.connect(":memory:")
|
|
489
|
+
dialect = Dialect()
|
|
490
|
+
with pytest.raises(ValueError, match="invalid config key"):
|
|
491
|
+
apply_config(
|
|
492
|
+
dialect,
|
|
493
|
+
conn,
|
|
494
|
+
{"threads = 1; CREATE TABLE pwned_cfg(i INTEGER); --": "x"},
|
|
495
|
+
)
|
|
496
|
+
|
|
497
|
+
found = conn.execute(
|
|
498
|
+
"SELECT COUNT(*) FROM duckdb_tables() WHERE table_name='pwned_cfg'"
|
|
499
|
+
).fetchone()
|
|
500
|
+
assert found is not None
|
|
501
|
+
assert found[0] == 0
|
|
502
|
+
|
|
503
|
+
|
|
504
|
+
def test_connect_rejects_invalid_extension_before_execute(
|
|
505
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
506
|
+
) -> None:
|
|
507
|
+
get_core_config()
|
|
508
|
+
|
|
509
|
+
class DummyConn:
|
|
510
|
+
def __init__(self) -> None:
|
|
511
|
+
self.executed: list[str] = []
|
|
512
|
+
|
|
513
|
+
def execute(self, statement: str) -> None:
|
|
514
|
+
self.executed.append(statement)
|
|
515
|
+
|
|
516
|
+
def register_filesystem(self, filesystem: object) -> None:
|
|
517
|
+
return None
|
|
518
|
+
|
|
519
|
+
dummy = DummyConn()
|
|
520
|
+
monkeypatch.setattr(duckdb_sqlalchemy.duckdb, "connect", lambda *a, **k: dummy)
|
|
521
|
+
|
|
522
|
+
with pytest.raises(ValueError, match="invalid extension name"):
|
|
523
|
+
Dialect().connect(
|
|
524
|
+
database=":memory:",
|
|
525
|
+
preload_extensions=["sqlite; CREATE TABLE pwned_ext(i INTEGER); --"],
|
|
526
|
+
config={},
|
|
527
|
+
)
|
|
528
|
+
|
|
529
|
+
assert dummy.executed == []
|
|
530
|
+
|
|
531
|
+
|
|
532
|
+
def test_copy_from_csv_rejects_invalid_table_and_option_key(
|
|
533
|
+
tmp_path: Path,
|
|
534
|
+
) -> None:
|
|
535
|
+
conn = duckdb.connect(":memory:")
|
|
536
|
+
conn.execute("CREATE TABLE safe(i INTEGER)")
|
|
537
|
+
csv_path = tmp_path / "rows.csv"
|
|
538
|
+
csv_path.write_text("1\n")
|
|
539
|
+
|
|
540
|
+
with pytest.raises(ValueError, match="invalid table identifier"):
|
|
541
|
+
copy_from_csv(
|
|
542
|
+
conn,
|
|
543
|
+
"safe FROM 'x'; CREATE TABLE pwned_bulk(i INTEGER); --",
|
|
544
|
+
csv_path,
|
|
545
|
+
)
|
|
546
|
+
|
|
547
|
+
with pytest.raises(ValueError, match="invalid COPY option key"):
|
|
548
|
+
bad_options: dict[str, Any] = {
|
|
549
|
+
"header); CREATE TABLE pwned_opt(i INTEGER); --": True
|
|
550
|
+
}
|
|
551
|
+
copy_from_csv(
|
|
552
|
+
conn,
|
|
553
|
+
"safe",
|
|
554
|
+
csv_path,
|
|
555
|
+
**bad_options,
|
|
556
|
+
)
|
|
557
|
+
|
|
558
|
+
found = conn.execute(
|
|
559
|
+
"SELECT COUNT(*) FROM duckdb_tables() WHERE table_name IN ('pwned_bulk', 'pwned_opt')"
|
|
560
|
+
).fetchone()
|
|
561
|
+
assert found is not None
|
|
562
|
+
assert found[0] == 0
|
|
563
|
+
|
|
564
|
+
|
|
484
565
|
def test_parse_register_params_dict_and_tuple() -> None:
|
|
485
566
|
view_name, df = _parse_register_params({"view_name": "v", "df": "data"})
|
|
486
567
|
assert view_name == "v"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: duckdb-sqlalchemy
|
|
3
|
-
Version: 1.4.4.
|
|
3
|
+
Version: 1.4.4.2
|
|
4
4
|
Summary: DuckDB SQLAlchemy dialect for DuckDB and MotherDuck
|
|
5
5
|
Project-URL: Bug Tracker, https://github.com/leonardovida/duckdb-sqlalchemy/issues
|
|
6
6
|
Project-URL: Changelog, https://github.com/leonardovida/duckdb-sqlalchemy/releases
|
|
@@ -58,14 +58,45 @@ Description-Content-Type: text/markdown
|
|
|
58
58
|
|
|
59
59
|
duckdb-sqlalchemy is a DuckDB SQLAlchemy dialect for DuckDB and MotherDuck. It supports SQLAlchemy Core and ORM APIs for local DuckDB and MotherDuck connections.
|
|
60
60
|
|
|
61
|
+
For new projects, this repository is the recommended dialect when you want production-oriented defaults, explicit MotherDuck guidance, and a clear migration path from older package names.
|
|
62
|
+
|
|
61
63
|
The dialect handles pooling defaults, bulk inserts, type mappings, and cloud-specific configuration.
|
|
62
64
|
|
|
63
|
-
## Why
|
|
65
|
+
## Why choose duckdb-sqlalchemy today
|
|
64
66
|
|
|
65
67
|
- **SQLAlchemy compatibility**: Core, ORM, Alembic, and reflection.
|
|
66
68
|
- **MotherDuck support**: Token handling, attach modes, session hints, and read scaling helpers.
|
|
67
69
|
- **Operational defaults**: Pooling defaults, transient retry for reads, and bulk insert optimization via Arrow/DataFrame registration.
|
|
68
|
-
- **
|
|
70
|
+
- **Active release cadence**: Tracks current DuckDB releases with a long-term support posture.
|
|
71
|
+
|
|
72
|
+
| Area | `duckdb-sqlalchemy` (this repo) | `duckdb_engine` |
|
|
73
|
+
| --- | --- | --- |
|
|
74
|
+
| Package/module name | `duckdb-sqlalchemy` / `duckdb_sqlalchemy` | `duckdb-engine` / `duckdb_engine` |
|
|
75
|
+
| SQLAlchemy driver URL | `duckdb://` | `duckdb://` |
|
|
76
|
+
| MotherDuck workflow coverage | Dedicated URL helper (`MotherDuckURL`), connection guidance, and examples | No dedicated MotherDuck usage section in the upstream README |
|
|
77
|
+
| Operational guidance | Documented pooling defaults, read-scaling helpers, and bulk insert patterns | Basic configuration guidance in upstream README |
|
|
78
|
+
| Migration path | Explicit migration guide from older package names | Migration to this package is documented in this repo |
|
|
79
|
+
| Project direction | Release policy, changelog, roadmap, and docs site are maintained here | Upstream README focuses on the core driver usage |
|
|
80
|
+
|
|
81
|
+
## Coming from duckdb_engine?
|
|
82
|
+
|
|
83
|
+
If you already use `duckdb-engine`, migration is straightforward:
|
|
84
|
+
|
|
85
|
+
- keep the SQLAlchemy URL scheme (`duckdb://`)
|
|
86
|
+
- install `duckdb-sqlalchemy`
|
|
87
|
+
- switch imports to `duckdb_sqlalchemy`
|
|
88
|
+
|
|
89
|
+
See the full guide: [docs/migration-from-duckdb-engine.md](docs/migration-from-duckdb-engine.md).
|
|
90
|
+
|
|
91
|
+
## Project lineage
|
|
92
|
+
|
|
93
|
+
This project is a heavily modified fork of `Mause/duckdb_engine` and continues to preserve upstream history in `CHANGELOG.md`.
|
|
94
|
+
|
|
95
|
+
Current direction in this repository:
|
|
96
|
+
|
|
97
|
+
- package and module rename to `duckdb-sqlalchemy` / `duckdb_sqlalchemy`
|
|
98
|
+
- production-oriented defaults for local DuckDB and MotherDuck deployments
|
|
99
|
+
- docs-first maintenance with versioned release notes and a published docs site
|
|
69
100
|
|
|
70
101
|
## Compatibility
|
|
71
102
|
|
|
@@ -1,8 +1,9 @@
|
|
|
1
|
-
duckdb_sqlalchemy/__init__.py,sha256=
|
|
1
|
+
duckdb_sqlalchemy/__init__.py,sha256=B4iN5zezUQx8oqW6pA2Vt_txl2Gs5mcBEjZfsBWpjmg,50875
|
|
2
2
|
duckdb_sqlalchemy/_supports.py,sha256=GCOH9nFB4MitnjYKx5V4BsDSCxIfTyXqm6W-BDkgbfE,598
|
|
3
|
-
duckdb_sqlalchemy/
|
|
3
|
+
duckdb_sqlalchemy/_validation.py,sha256=XbhxZAwG4gRX35cKNXrzWxlEVorQbhIbjFZ1g83qvoU,1263
|
|
4
|
+
duckdb_sqlalchemy/bulk.py,sha256=0snlLkEefHTEvWfu0QSJjbKPEZX3zuzGzndOjd9Kavc,5894
|
|
4
5
|
duckdb_sqlalchemy/capabilities.py,sha256=Y9l-FaVPMw9CTpsG-42tiqltXECFXqIeTQdXPfSuxPY,719
|
|
5
|
-
duckdb_sqlalchemy/config.py,sha256=
|
|
6
|
+
duckdb_sqlalchemy/config.py,sha256=7u8SOgz8dOGBfudAcAzRNFpB2Pix3HQQekNPPbwTZZU,2292
|
|
6
7
|
duckdb_sqlalchemy/conftest.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
8
|
duckdb_sqlalchemy/datatypes.py,sha256=kVUe1gXZuUe9UpzY30CQfEP4VAz308HilCG16sRGmkU,8296
|
|
8
9
|
duckdb_sqlalchemy/motherduck.py,sha256=m6g-Qsd6mfYbKJTuhRYVT7MU6QItDirx4N3IiK0CVuA,7101
|
|
@@ -12,8 +13,8 @@ duckdb_sqlalchemy/requirements.py,sha256=jDDZyI6Pt2IfqeMX83hb-IhQWFMDjHxoW_9j6Tx
|
|
|
12
13
|
duckdb_sqlalchemy/url.py,sha256=y2rtgiHXXcgVpQt22eEIeP8MAeVKwBNC9tsh1i3j3OE,1539
|
|
13
14
|
duckdb_sqlalchemy/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
14
15
|
duckdb_sqlalchemy/tests/conftest.py,sha256=GldGGf9wrY1hZvcl4hmzKmdirQYCltpZWfM3-WyOKqc,1498
|
|
15
|
-
duckdb_sqlalchemy/tests/test_basic.py,sha256=
|
|
16
|
-
duckdb_sqlalchemy/tests/test_core_units.py,sha256=
|
|
16
|
+
duckdb_sqlalchemy/tests/test_basic.py,sha256=EFWtFf3R0N8FjIOjF7D_SvZdITVrwu4veoes68h9BKc,22055
|
|
17
|
+
duckdb_sqlalchemy/tests/test_core_units.py,sha256=aRYxRK3RudiCwyEV5rIKsVwWSUpfDcvTUnKyLR_9VUk,20991
|
|
17
18
|
duckdb_sqlalchemy/tests/test_datatypes.py,sha256=g7WwxP6Kq6rhhWdpFUs1g6NA0jNYuaJMiolsRpG0qI8,7144
|
|
18
19
|
duckdb_sqlalchemy/tests/test_execution_options.py,sha256=ov0YVVQLdKdw1K8grdzkpIQMD863dsyB0SG4rkevGks,964
|
|
19
20
|
duckdb_sqlalchemy/tests/test_helpers.py,sha256=9KGRmNVvTVPvcEN3mHCwXIIKhdFe_GXmyBnfWisYiCs,2216
|
|
@@ -24,8 +25,8 @@ duckdb_sqlalchemy/tests/util.py,sha256=1tN5UC7nRhxAeu1NmD-DWIC3bRyD3n0pl5L--b1tM
|
|
|
24
25
|
duckdb_sqlalchemy/tests/snapshots/test_datatypes/test_interval/schema.sql,sha256=ZXscZo4xepli7WSjbhWqTufIciscCDLoRznaA6KGiOI,47
|
|
25
26
|
duckdb_sqlalchemy/tests/sqlalchemy_suite/conftest.py,sha256=BVvwaWDIXobKa-ziFyhmjkIkCd5vz0TbT77AFOPCHHc,263
|
|
26
27
|
duckdb_sqlalchemy/tests/sqlalchemy_suite/test_suite.py,sha256=O2O52uLfENDAU_xl2_iZZgigLP1DB8IYaULqCEOnIA8,58
|
|
27
|
-
duckdb_sqlalchemy-1.4.4.
|
|
28
|
-
duckdb_sqlalchemy-1.4.4.
|
|
29
|
-
duckdb_sqlalchemy-1.4.4.
|
|
30
|
-
duckdb_sqlalchemy-1.4.4.
|
|
31
|
-
duckdb_sqlalchemy-1.4.4.
|
|
28
|
+
duckdb_sqlalchemy-1.4.4.2.dist-info/METADATA,sha256=XdI-hASiG9GT9huia0yMa9nc9nsI9BHZZ0Cc-GwJovs,9467
|
|
29
|
+
duckdb_sqlalchemy-1.4.4.2.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
30
|
+
duckdb_sqlalchemy-1.4.4.2.dist-info/entry_points.txt,sha256=MyXbmaqEhyBLIL2NnHrweY6EJ_Rke2HnVZR1wCz08cM,57
|
|
31
|
+
duckdb_sqlalchemy-1.4.4.2.dist-info/licenses/LICENSE.txt,sha256=nhRQcy_ZV2R-xzl3MPltQuQ53bcURavT0N6mC3VdDE8,1076
|
|
32
|
+
duckdb_sqlalchemy-1.4.4.2.dist-info/RECORD,,
|
|
File without changes
|
{duckdb_sqlalchemy-1.4.4.1.dist-info → duckdb_sqlalchemy-1.4.4.2.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
{duckdb_sqlalchemy-1.4.4.1.dist-info → duckdb_sqlalchemy-1.4.4.2.dist-info}/licenses/LICENSE.txt
RENAMED
|
File without changes
|