duckdb-sqlalchemy 1.4.4__tar.gz → 1.4.4.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/CHANGELOG.md +23 -0
  2. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/PKG-INFO +34 -3
  3. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/README.md +33 -2
  4. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/docs/configuration.md +3 -0
  5. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/docs/migration-from-duckdb-engine.md +2 -1
  6. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/docs/motherduck.md +7 -0
  7. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/docs/olap.md +4 -0
  8. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/docs/seo-checklist.md +1 -0
  9. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/docs/types-and-caveats.md +8 -0
  10. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/duckdb_sqlalchemy/__init__.py +56 -33
  11. duckdb_sqlalchemy-1.4.4.2/duckdb_sqlalchemy/_validation.py +38 -0
  12. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/duckdb_sqlalchemy/bulk.py +20 -6
  13. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/duckdb_sqlalchemy/config.py +7 -4
  14. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/duckdb_sqlalchemy/tests/test_basic.py +4 -2
  15. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/duckdb_sqlalchemy/tests/test_core_units.py +84 -3
  16. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/noxfile.py +3 -0
  17. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/pyproject.toml +1 -1
  18. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  19. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  20. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/.github/dependabot.yml +0 -0
  21. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/.github/workflows/lint.yml +0 -0
  22. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/.github/workflows/pages.yml +0 -0
  23. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/.github/workflows/publish.yaml +0 -0
  24. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/.github/workflows/pythonapp.yaml +0 -0
  25. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/.gitignore +0 -0
  26. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/.pre-commit-config.yaml +0 -0
  27. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/AGENTS.md +0 -0
  28. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/CLAUDE.md +0 -0
  29. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/CODE_OF_CONDUCT.md +0 -0
  30. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/LICENSE.txt +0 -0
  31. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/ROADMAP.md +0 -0
  32. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/codecov.yml +0 -0
  33. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/docs/README.md +0 -0
  34. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/docs/_config.yml +0 -0
  35. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/docs/alembic.md +0 -0
  36. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/docs/connection-urls.md +0 -0
  37. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/docs/getting-started.md +0 -0
  38. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/docs/index.md +0 -0
  39. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/docs/overview.md +0 -0
  40. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/docs/pandas-jupyter.md +0 -0
  41. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/docs/robots.txt +0 -0
  42. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/duckdb_sqlalchemy/_supports.py +0 -0
  43. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/duckdb_sqlalchemy/capabilities.py +0 -0
  44. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/duckdb_sqlalchemy/conftest.py +0 -0
  45. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/duckdb_sqlalchemy/datatypes.py +0 -0
  46. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/duckdb_sqlalchemy/motherduck.py +0 -0
  47. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/duckdb_sqlalchemy/olap.py +0 -0
  48. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/duckdb_sqlalchemy/py.typed +0 -0
  49. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/duckdb_sqlalchemy/requirements.py +0 -0
  50. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/duckdb_sqlalchemy/tests/__init__.py +0 -0
  51. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/duckdb_sqlalchemy/tests/conftest.py +0 -0
  52. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/duckdb_sqlalchemy/tests/snapshots/test_datatypes/test_interval/schema.sql +0 -0
  53. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/duckdb_sqlalchemy/tests/sqlalchemy_suite/conftest.py +0 -0
  54. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/duckdb_sqlalchemy/tests/sqlalchemy_suite/test_suite.py +0 -0
  55. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/duckdb_sqlalchemy/tests/test_datatypes.py +0 -0
  56. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/duckdb_sqlalchemy/tests/test_execution_options.py +0 -0
  57. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/duckdb_sqlalchemy/tests/test_helpers.py +0 -0
  58. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/duckdb_sqlalchemy/tests/test_integration.py +0 -0
  59. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/duckdb_sqlalchemy/tests/test_pandas.py +0 -0
  60. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/duckdb_sqlalchemy/tests/test_pyarrow.py +0 -0
  61. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/duckdb_sqlalchemy/tests/util.py +0 -0
  62. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/duckdb_sqlalchemy/url.py +0 -0
  63. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/examples/motherduck_arrow_reads.py +0 -0
  64. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/examples/motherduck_attach_modes.py +0 -0
  65. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/examples/motherduck_multi_instance_pool.py +0 -0
  66. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/examples/motherduck_queuepool_high_concurrency.py +0 -0
  67. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/examples/motherduck_read_scaling_per_user.py +0 -0
  68. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/examples/sqlalchemy_example.py +0 -0
  69. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/llms.txt +0 -0
  70. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/renovate.json +0 -0
  71. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/test.cfg +0 -0
  72. {duckdb_sqlalchemy-1.4.4 → duckdb_sqlalchemy-1.4.4.2}/uv.lock +0 -0
@@ -6,6 +6,29 @@ preserved from the upstream project for historical context.
6
6
 
7
7
  ## Maintained in this fork
8
8
 
9
+ ## [1.4.4.2](https://github.com/leonardovida/duckdb-sqlalchemy/compare/v1.4.4...v1.4.4.2) (2026-02-05)
10
+
11
+ ### Security
12
+
13
+ * validate config keys before `SET` statements to block SQL injection payloads
14
+ * validate preload extension names before `LOAD`
15
+ * validate COPY helper table/column/option identifiers and reject SQL fragments
16
+
17
+ ### Testing
18
+
19
+ * gate pandas tests on supported pandas/SQLAlchemy combinations
20
+ * pin `pandas<2.2` in `nox` SQLAlchemy 1.x sessions for stable matrix runs
21
+
22
+ ### Typing
23
+
24
+ * align SQLAlchemy compatibility shims and test typing to satisfy `ty`
25
+
26
+ ## [1.4.4.1](https://github.com/leonardovida/duckdb-sqlalchemy/compare/v1.4.4...v1.4.4.1) (2026-02-05)
27
+
28
+ ### Documentation
29
+
30
+ * document DuckDB multiprocessing fork-safety caveat and `spawn`/`forkserver` workaround
31
+
9
32
  ## [1.4.4](https://github.com/leonardovida/duckdb-sqlalchemy/compare/v1.4.3...v1.4.4) (2026-02-03)
10
33
 
11
34
  ### Versioning
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: duckdb-sqlalchemy
3
- Version: 1.4.4
3
+ Version: 1.4.4.2
4
4
  Summary: DuckDB SQLAlchemy dialect for DuckDB and MotherDuck
5
5
  Project-URL: Bug Tracker, https://github.com/leonardovida/duckdb-sqlalchemy/issues
6
6
  Project-URL: Changelog, https://github.com/leonardovida/duckdb-sqlalchemy/releases
@@ -58,14 +58,45 @@ Description-Content-Type: text/markdown
58
58
 
59
59
  duckdb-sqlalchemy is a DuckDB SQLAlchemy dialect for DuckDB and MotherDuck. It supports SQLAlchemy Core and ORM APIs for local DuckDB and MotherDuck connections.
60
60
 
61
+ For new projects, this repository is the recommended dialect when you want production-oriented defaults, explicit MotherDuck guidance, and a clear migration path from older package names.
62
+
61
63
  The dialect handles pooling defaults, bulk inserts, type mappings, and cloud-specific configuration.
62
64
 
63
- ## Why this dialect
65
+ ## Why choose duckdb-sqlalchemy today
64
66
 
65
67
  - **SQLAlchemy compatibility**: Core, ORM, Alembic, and reflection.
66
68
  - **MotherDuck support**: Token handling, attach modes, session hints, and read scaling helpers.
67
69
  - **Operational defaults**: Pooling defaults, transient retry for reads, and bulk insert optimization via Arrow/DataFrame registration.
68
- - **Maintained**: Tracks current DuckDB releases with a long-term support posture.
70
+ - **Active release cadence**: Tracks current DuckDB releases with a long-term support posture.
71
+
72
+ | Area | `duckdb-sqlalchemy` (this repo) | `duckdb_engine` |
73
+ | --- | --- | --- |
74
+ | Package/module name | `duckdb-sqlalchemy` / `duckdb_sqlalchemy` | `duckdb-engine` / `duckdb_engine` |
75
+ | SQLAlchemy driver URL | `duckdb://` | `duckdb://` |
76
+ | MotherDuck workflow coverage | Dedicated URL helper (`MotherDuckURL`), connection guidance, and examples | No dedicated MotherDuck usage section in the upstream README |
77
+ | Operational guidance | Documented pooling defaults, read-scaling helpers, and bulk insert patterns | Basic configuration guidance in upstream README |
78
+ | Migration path | Explicit migration guide from older package names | Migration to this package is documented in this repo |
79
+ | Project direction | Release policy, changelog, roadmap, and docs site are maintained here | Upstream README focuses on the core driver usage |
80
+
81
+ ## Coming from duckdb_engine?
82
+
83
+ If you already use `duckdb-engine`, migration is straightforward:
84
+
85
+ - keep the SQLAlchemy URL scheme (`duckdb://`)
86
+ - install `duckdb-sqlalchemy`
87
+ - switch imports to `duckdb_sqlalchemy`
88
+
89
+ See the full guide: [docs/migration-from-duckdb-engine.md](docs/migration-from-duckdb-engine.md).
90
+
91
+ ## Project lineage
92
+
93
+ This project is a heavily modified fork of `Mause/duckdb_engine` and continues to preserve upstream history in `CHANGELOG.md`.
94
+
95
+ Current direction in this repository:
96
+
97
+ - package and module rename to `duckdb-sqlalchemy` / `duckdb_sqlalchemy`
98
+ - production-oriented defaults for local DuckDB and MotherDuck deployments
99
+ - docs-first maintenance with versioned release notes and a published docs site
69
100
 
70
101
  ## Compatibility
71
102
 
@@ -6,14 +6,45 @@
6
6
 
7
7
  duckdb-sqlalchemy is a DuckDB SQLAlchemy dialect for DuckDB and MotherDuck. It supports SQLAlchemy Core and ORM APIs for local DuckDB and MotherDuck connections.
8
8
 
9
+ For new projects, this repository is the recommended dialect when you want production-oriented defaults, explicit MotherDuck guidance, and a clear migration path from older package names.
10
+
9
11
  The dialect handles pooling defaults, bulk inserts, type mappings, and cloud-specific configuration.
10
12
 
11
- ## Why this dialect
13
+ ## Why choose duckdb-sqlalchemy today
12
14
 
13
15
  - **SQLAlchemy compatibility**: Core, ORM, Alembic, and reflection.
14
16
  - **MotherDuck support**: Token handling, attach modes, session hints, and read scaling helpers.
15
17
  - **Operational defaults**: Pooling defaults, transient retry for reads, and bulk insert optimization via Arrow/DataFrame registration.
16
- - **Maintained**: Tracks current DuckDB releases with a long-term support posture.
18
+ - **Active release cadence**: Tracks current DuckDB releases with a long-term support posture.
19
+
20
+ | Area | `duckdb-sqlalchemy` (this repo) | `duckdb_engine` |
21
+ | --- | --- | --- |
22
+ | Package/module name | `duckdb-sqlalchemy` / `duckdb_sqlalchemy` | `duckdb-engine` / `duckdb_engine` |
23
+ | SQLAlchemy driver URL | `duckdb://` | `duckdb://` |
24
+ | MotherDuck workflow coverage | Dedicated URL helper (`MotherDuckURL`), connection guidance, and examples | No dedicated MotherDuck usage section in the upstream README |
25
+ | Operational guidance | Documented pooling defaults, read-scaling helpers, and bulk insert patterns | Basic configuration guidance in upstream README |
26
+ | Migration path | Explicit migration guide from older package names | Migration to this package is documented in this repo |
27
+ | Project direction | Release policy, changelog, roadmap, and docs site are maintained here | Upstream README focuses on the core driver usage |
28
+
29
+ ## Coming from duckdb_engine?
30
+
31
+ If you already use `duckdb-engine`, migration is straightforward:
32
+
33
+ - keep the SQLAlchemy URL scheme (`duckdb://`)
34
+ - install `duckdb-sqlalchemy`
35
+ - switch imports to `duckdb_sqlalchemy`
36
+
37
+ See the full guide: [docs/migration-from-duckdb-engine.md](docs/migration-from-duckdb-engine.md).
38
+
39
+ ## Project lineage
40
+
41
+ This project is a heavily modified fork of `Mause/duckdb_engine` and continues to preserve upstream history in `CHANGELOG.md`.
42
+
43
+ Current direction in this repository:
44
+
45
+ - package and module rename to `duckdb-sqlalchemy` / `duckdb_sqlalchemy`
46
+ - production-oriented defaults for local DuckDB and MotherDuck deployments
47
+ - docs-first maintenance with versioned release notes and a published docs site
17
48
 
18
49
  ## Compatibility
19
50
 
@@ -57,6 +57,9 @@ engine = create_engine(
57
57
  )
58
58
  ```
59
59
 
60
+ For safety, extension names must be plain identifiers (`[A-Za-z0-9_]+`).
61
+ Values containing spaces, punctuation, or SQL fragments are rejected.
62
+
60
63
  ## Register filesystems
61
64
 
62
65
  You can register filesystems via `fsspec`:
@@ -5,7 +5,7 @@ title: Migration from duckdb_engine
5
5
 
6
6
  # Migration from duckdb_engine
7
7
 
8
- This project is the actively maintained DuckDB SQLAlchemy dialect. If you are coming from the older `duckdb_engine` package, migrate as follows:
8
+ `duckdb-sqlalchemy` is the recommended package name for new work in this repository. If you are coming from `duckdb_engine`, migrate as follows:
9
9
 
10
10
  ## Package and import rename
11
11
 
@@ -28,3 +28,4 @@ SQLAlchemy URLs use the `duckdb://` driver name in both packages. Existing URLs
28
28
  - The package name is now `duckdb-sqlalchemy` and the module is `duckdb_sqlalchemy`.
29
29
  - The dialect remains registered as `duckdb` for SQLAlchemy.
30
30
  - See `docs/motherduck.md` for MotherDuck-specific behavior.
31
+ - See `README.md` for project lineage, release policy, and roadmap links.
@@ -41,6 +41,13 @@ engine = create_engine(
41
41
  )
42
42
  ```
43
43
 
44
+ ## Multiprocessing (fork)
45
+
46
+ DuckDB's Python client is not fork-safe, so `multiprocessing` children created with
47
+ `fork` can fail when opening new connections (commonly observed with MotherDuck or
48
+ file-backed databases). Use the `spawn` or `forkserver` start methods and create
49
+ engines/connections inside the child process.
50
+
44
51
  ## Options
45
52
 
46
53
  ### Connection-string parameters (instance cache key)
@@ -113,6 +113,10 @@ with engine.begin() as conn:
113
113
  copy_from_csv(conn, "events", "data/events.csv", header=True)
114
114
  ```
115
115
 
116
+ For safety, string table names, column names, and COPY option keys must be
117
+ identifiers. Dotted paths like `schema.events` are supported, but SQL
118
+ fragments are rejected.
119
+
116
120
  For row iterables, you can stream to a temporary CSV in chunks:
117
121
 
118
122
  ```python
@@ -22,3 +22,4 @@ Use this list to validate indexability after each docs update or release.
22
22
 
23
23
  - Project name and description are consistent in README, docs, and PyPI metadata.
24
24
  - URLs in `pyproject.toml` match the docs site.
25
+ - README and docs clearly differentiate this fork's scope from upstream `duckdb_engine` content.
@@ -87,3 +87,11 @@ users = Table(
87
87
  ## Pandas chunksize
88
88
 
89
89
  Older DuckDB versions (< 0.5.0) may have issues with `pandas.read_sql(..., chunksize=...)`. If you hit errors, use `chunksize=None` or upgrade DuckDB.
90
+
91
+ ## Multiprocessing (fork)
92
+
93
+ DuckDB's Python bindings are not fork-safe. Creating a new connection in a
94
+ `multiprocessing` child process created with `fork` can raise runtime errors
95
+ (for example, `RuntimeError: thread::join failed: No such process`), especially
96
+ with MotherDuck or file-backed connections. Prefer `spawn` or `forkserver`, and
97
+ initialize engines/connections in the child process.
@@ -15,6 +15,7 @@ from typing import (
15
15
  Sequence,
16
16
  Tuple,
17
17
  Type,
18
+ cast,
18
19
  )
19
20
 
20
21
  import duckdb
@@ -38,6 +39,7 @@ from sqlalchemy.sql import bindparam
38
39
  from sqlalchemy.sql.selectable import Select
39
40
 
40
41
  from ._supports import has_comment_support
42
+ from ._validation import validate_extension_name
41
43
  from .bulk import copy_from_csv, copy_from_parquet, copy_from_rows
42
44
  from .capabilities import get_capabilities
43
45
  from .config import apply_config, get_core_config
@@ -56,11 +58,15 @@ from .olap import read_csv, read_csv_auto, read_parquet, table_function
56
58
  from .url import URL, make_url
57
59
 
58
60
  try:
59
- from sqlalchemy.dialects.postgresql.base import PGExecutionContext
61
+ from sqlalchemy.dialects.postgresql import base as _pg_base
60
62
  except ImportError: # pragma: no cover - fallback for older SQLAlchemy
61
- PGExecutionContext = DefaultExecutionContext
63
+ _PGExecutionContext = DefaultExecutionContext
64
+ else:
65
+ _PGExecutionContext = getattr(
66
+ _pg_base, "PGExecutionContext", DefaultExecutionContext
67
+ )
62
68
 
63
- __version__ = "1.4.4"
69
+ __version__ = "1.4.4.2"
64
70
  sqlalchemy_version = sqlalchemy.__version__
65
71
  SQLALCHEMY_VERSION = Version(sqlalchemy_version)
66
72
  SQLALCHEMY_2 = SQLALCHEMY_VERSION >= Version("2.0.0")
@@ -71,7 +77,9 @@ supports_user_agent: bool = _capabilities.supports_user_agent
71
77
 
72
78
  if TYPE_CHECKING:
73
79
  from sqlalchemy.engine import Connection
74
- from sqlalchemy.engine.reflection import ReflectedCheckConstraint, ReflectedIndex
80
+
81
+ ReflectedCheckConstraint = Dict[str, Any]
82
+ ReflectedIndex = Dict[str, Any]
75
83
 
76
84
  from .capabilities import DuckDBCapabilities
77
85
 
@@ -318,7 +326,7 @@ class DuckDBArrowResult:
318
326
  return iter(self._result)
319
327
 
320
328
 
321
- class DuckDBExecutionContext(PGExecutionContext):
329
+ class DuckDBExecutionContext(_PGExecutionContext):
322
330
  @classmethod
323
331
  def _init_compiled(
324
332
  cls,
@@ -369,8 +377,9 @@ class DuckDBExecutionContext(PGExecutionContext):
369
377
  arraysize = self.execution_options.get("duckdb_arraysize")
370
378
  if arraysize is None:
371
379
  arraysize = self.execution_options.get("arraysize")
372
- if arraysize is not None and hasattr(self.cursor, "arraysize"):
373
- self.cursor.arraysize = arraysize
380
+ cursor = getattr(self, "cursor", None)
381
+ if arraysize is not None and hasattr(cursor, "arraysize"):
382
+ cursor.arraysize = arraysize
374
383
  result = super()._setup_result_proxy()
375
384
  if self.execution_options.get("duckdb_arrow") and getattr(
376
385
  result, "returns_rows", False
@@ -607,7 +616,7 @@ class Dialect(PGDialect_psycopg2):
607
616
  conn = duckdb.connect(*cargs, **cparams)
608
617
 
609
618
  for extension in preload_extensions:
610
- conn.execute(f"LOAD {extension}")
619
+ conn.execute(f"LOAD {validate_extension_name(extension)}")
611
620
 
612
621
  for filesystem in filesystems:
613
622
  conn.register_filesystem(filesystem)
@@ -875,7 +884,7 @@ class Dialect(PGDialect_psycopg2):
875
884
 
876
885
  @cache # type: ignore[call-arg]
877
886
  def get_columns( # type: ignore[no-untyped-def]
878
- self, connection: "Connection", table_name: str, schema=None, **kw: Any
887
+ self, connection: "Connection", table_name: str, schema=None, **kw: "Any"
879
888
  ):
880
889
  try:
881
890
  return super().get_columns(connection, table_name, schema=schema, **kw)
@@ -887,7 +896,7 @@ class Dialect(PGDialect_psycopg2):
887
896
 
888
897
  @cache # type: ignore[call-arg]
889
898
  def get_foreign_keys( # type: ignore[no-untyped-def]
890
- self, connection: "Connection", table_name: str, schema=None, **kw: Any
899
+ self, connection: "Connection", table_name: str, schema=None, **kw: "Any"
891
900
  ):
892
901
  try:
893
902
  return super().get_foreign_keys(connection, table_name, schema=schema, **kw)
@@ -898,7 +907,7 @@ class Dialect(PGDialect_psycopg2):
898
907
 
899
908
  @cache # type: ignore[call-arg]
900
909
  def get_unique_constraints( # type: ignore[no-untyped-def]
901
- self, connection: "Connection", table_name: str, schema=None, **kw: Any
910
+ self, connection: "Connection", table_name: str, schema=None, **kw: "Any"
902
911
  ):
903
912
  try:
904
913
  return super().get_unique_constraints(
@@ -911,7 +920,7 @@ class Dialect(PGDialect_psycopg2):
911
920
 
912
921
  @cache # type: ignore[call-arg]
913
922
  def get_check_constraints( # type: ignore[no-untyped-def]
914
- self, connection: "Connection", table_name: str, schema=None, **kw: Any
923
+ self, connection: "Connection", table_name: str, schema=None, **kw: "Any"
915
924
  ):
916
925
  try:
917
926
  return super().get_check_constraints(
@@ -1019,7 +1028,7 @@ class Dialect(PGDialect_psycopg2):
1019
1028
  import pandas as pd # type: ignore[import-not-found]
1020
1029
 
1021
1030
  rows = parameters if isinstance(parameters, list) else list(parameters)
1022
- data = pd.DataFrame(rows, columns=column_names)
1031
+ data = pd.DataFrame(rows, columns=cast(Any, column_names))
1023
1032
  except Exception:
1024
1033
  data = None
1025
1034
  if data is None:
@@ -1119,16 +1128,25 @@ class Dialect(PGDialect_psycopg2):
1119
1128
 
1120
1129
  self._execute_with_retry(cursor, statement, parameters, context, executor)
1121
1130
 
1122
- def do_execute_no_params(
1123
- self,
1124
- cursor: Any,
1125
- statement: str,
1126
- context: Optional[Any] = None,
1127
- ) -> None:
1131
+ def do_execute_no_params(self, cursor: Any, statement: str, *args: Any) -> None:
1132
+ parameters: Any = None
1133
+ context: Optional[Any] = None
1134
+ if len(args) == 1:
1135
+ context = cast(Optional[Any], args[0])
1136
+ elif len(args) >= 2:
1137
+ parameters = args[0]
1138
+ context = cast(Optional[Any], args[1])
1139
+
1128
1140
  def executor() -> Any:
1129
- return DefaultDialect.do_execute_no_params(self, cursor, statement, context)
1141
+ if parameters is None:
1142
+ return DefaultDialect.do_execute_no_params(
1143
+ self, cursor, statement, context
1144
+ )
1145
+ return DefaultDialect.do_execute(
1146
+ self, cursor, statement, parameters, context
1147
+ )
1130
1148
 
1131
- self._execute_with_retry(cursor, statement, None, context, executor)
1149
+ self._execute_with_retry(cursor, statement, parameters, context, executor)
1132
1150
 
1133
1151
  def _pg_class_filter_scope_schema(
1134
1152
  self,
@@ -1160,10 +1178,10 @@ class Dialect(PGDialect_psycopg2):
1160
1178
  # reflection to avoid Catalog Errors during SQLAlchemy 2.x reflection.
1161
1179
  from sqlalchemy.dialects.postgresql import base as pg_base
1162
1180
 
1163
- pg_catalog = pg_base.pg_catalog
1164
- REGCLASS = pg_base.REGCLASS
1165
- TEXT = pg_base.TEXT
1166
- OID = pg_base.OID
1181
+ pg_catalog = getattr(pg_base, "pg_catalog")
1182
+ REGCLASS = getattr(pg_base, "REGCLASS")
1183
+ TEXT = getattr(pg_base, "TEXT")
1184
+ OID = getattr(pg_base, "OID")
1167
1185
 
1168
1186
  server_version_info = self.server_version_info or (0,)
1169
1187
 
@@ -1241,7 +1259,7 @@ class Dialect(PGDialect_psycopg2):
1241
1259
 
1242
1260
  collate = sql.null().label("collation")
1243
1261
 
1244
- relkinds = self._kind_to_relkinds(kind)
1262
+ relkinds = getattr(super(), "_kind_to_relkinds")(kind)
1245
1263
  query = (
1246
1264
  select(
1247
1265
  pg_catalog.pg_attribute.c.attname.label("name"),
@@ -1275,7 +1293,7 @@ class Dialect(PGDialect_psycopg2):
1275
1293
  == pg_catalog.pg_attribute.c.attnum,
1276
1294
  ),
1277
1295
  )
1278
- .where(self._pg_class_relkind_condition(relkinds))
1296
+ .where(getattr(super(), "_pg_class_relkind_condition")(relkinds))
1279
1297
  .order_by(pg_catalog.pg_class.c.relname, pg_catalog.pg_attribute.c.attnum)
1280
1298
  )
1281
1299
  query = self._pg_class_filter_scope_schema(query, schema, scope=scope)
@@ -1339,15 +1357,20 @@ class Dialect(PGDialect_psycopg2):
1339
1357
 
1340
1358
  # dictionary with (name, ) if default search path or (schema, name)
1341
1359
  # as keys
1360
+ load_enums = getattr(self, "_load_enums")
1361
+ try:
1362
+ enum_records = load_enums(
1363
+ connection, schema="*", info_cache=kw.get("info_cache")
1364
+ )
1365
+ except TypeError:
1366
+ enum_records = load_enums(connection, schema="*")
1342
1367
  enums = dict(
1343
1368
  (
1344
1369
  ((rec["name"],), rec)
1345
1370
  if rec["visible"]
1346
1371
  else ((rec["schema"], rec["name"]), rec)
1347
1372
  )
1348
- for rec in self._load_enums( # type: ignore[attr-defined]
1349
- connection, schema="*", info_cache=kw.get("info_cache")
1350
- )
1373
+ for rec in enum_records
1351
1374
  )
1352
1375
 
1353
1376
  columns = self._get_columns_info(rows, domains, enums, schema) # type: ignore[attr-defined]
@@ -1361,9 +1384,9 @@ class Dialect(PGDialect_psycopg2):
1361
1384
  self, schema: str, has_filter_names: bool, scope: Any, kind: Any
1362
1385
  ):
1363
1386
  if SQLALCHEMY_VERSION >= Version("2.0.36"):
1364
- from sqlalchemy.dialects.postgresql import ( # type: ignore[attr-defined]
1365
- pg_catalog,
1366
- )
1387
+ from sqlalchemy.dialects.postgresql import base as pg_base
1388
+
1389
+ pg_catalog = getattr(pg_base, "pg_catalog")
1367
1390
 
1368
1391
  if (
1369
1392
  hasattr(super(), "_kind_to_relkinds")
@@ -0,0 +1,38 @@
1
+ import re
2
+ from typing import Iterable
3
+
4
+ IDENTIFIER_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")
5
+ EXTENSION_RE = re.compile(r"^[A-Za-z0-9_]+$")
6
+
7
+
8
+ def validate_identifier(value: str, *, kind: str = "identifier") -> str:
9
+ if not isinstance(value, str):
10
+ raise ValueError(f"{kind} must be a string")
11
+ if not IDENTIFIER_RE.fullmatch(value):
12
+ raise ValueError(f"invalid {kind}: {value!r}")
13
+ return value
14
+
15
+
16
+ def validate_dotted_identifier(value: str, *, kind: str = "identifier") -> str:
17
+ if not isinstance(value, str):
18
+ raise ValueError(f"{kind} must be a string")
19
+ parts = value.split(".")
20
+ if not parts or any(not part for part in parts):
21
+ raise ValueError(f"invalid {kind}: {value!r}")
22
+ for part in parts:
23
+ validate_identifier(part, kind=kind)
24
+ return value
25
+
26
+
27
+ def validate_extension_name(value: str) -> str:
28
+ if not isinstance(value, str):
29
+ raise ValueError("extension name must be a string")
30
+ if not EXTENSION_RE.fullmatch(value):
31
+ raise ValueError(f"invalid extension name: {value!r}")
32
+ return value
33
+
34
+
35
+ def validate_identifier_list(
36
+ values: Iterable[str], *, kind: str = "identifier"
37
+ ) -> tuple[str, ...]:
38
+ return tuple(validate_identifier(value, kind=kind) for value in values)
@@ -3,6 +3,12 @@ import tempfile
3
3
  from pathlib import Path
4
4
  from typing import Any, Iterable, Mapping, Optional, Sequence, Tuple, Union
5
5
 
6
+ from ._validation import (
7
+ validate_dotted_identifier,
8
+ validate_identifier,
9
+ validate_identifier_list,
10
+ )
11
+
6
12
  TableLike = Union[str, Any]
7
13
 
8
14
 
@@ -25,7 +31,7 @@ def _format_copy_options(options: Mapping[str, Any]) -> str:
25
31
  for key, value in options.items():
26
32
  if value is None:
27
33
  continue
28
- opt_key = str(key).upper()
34
+ opt_key = validate_identifier(str(key), kind="COPY option key").upper()
29
35
  if isinstance(value, (list, tuple)):
30
36
  inner = ", ".join(_quote_literal(v) for v in value)
31
37
  parts.append(f"{opt_key} ({inner})")
@@ -46,21 +52,28 @@ def _format_table(connection: Any, table: TableLike) -> str:
46
52
  schema = getattr(table, "schema", None)
47
53
  name = getattr(table, "name", None)
48
54
  if schema:
49
- return f"{schema}.{name}"
50
- return str(name)
51
- return str(table)
55
+ schema_name = validate_dotted_identifier(
56
+ str(schema), kind="table schema identifier"
57
+ )
58
+ table_name = validate_identifier(str(name), kind="table identifier")
59
+ return f"{schema_name}.{table_name}"
60
+ return validate_identifier(str(name), kind="table identifier")
61
+ table_name = str(table)
62
+ validate_dotted_identifier(table_name, kind="table identifier")
63
+ return table_name
52
64
 
53
65
 
54
66
  def _format_columns(connection: Any, columns: Optional[Sequence[str]]) -> str:
55
67
  if not columns:
56
68
  return ""
69
+ validated_columns = validate_identifier_list(columns, kind="column identifier")
57
70
  preparer = getattr(
58
71
  getattr(connection, "dialect", None), "identifier_preparer", None
59
72
  )
60
73
  if preparer is None:
61
- cols = ", ".join(columns)
74
+ cols = ", ".join(validated_columns)
62
75
  else:
63
- cols = ", ".join(preparer.quote_identifier(col) for col in columns)
76
+ cols = ", ".join(preparer.quote_identifier(col) for col in validated_columns)
64
77
  return f" ({cols})"
65
78
 
66
79
 
@@ -115,6 +128,7 @@ def _copy_from_file(
115
128
  columns: Optional[Sequence[str]] = None,
116
129
  **options: Any,
117
130
  ) -> Any:
131
+ validate_identifier(format_name, kind="COPY format")
118
132
  table_name = _format_table(connection, table)
119
133
  column_clause = _format_columns(connection, columns)
120
134
  path_literal = _quote_literal(path)
@@ -1,13 +1,15 @@
1
1
  import os
2
2
  from decimal import Decimal
3
3
  from functools import lru_cache
4
- from typing import Dict, Set, Type, Union
4
+ from typing import Any, Dict, Set, Type, Union
5
5
 
6
6
  import duckdb
7
7
  from sqlalchemy import Boolean, Float, Integer, String
8
8
  from sqlalchemy.engine import Dialect
9
9
  from sqlalchemy.sql.type_api import TypeEngine
10
10
 
11
+ from ._validation import validate_identifier
12
+
11
13
  TYPES: Dict[Type, TypeEngine] = {
12
14
  bool: Boolean(),
13
15
  int: Integer(),
@@ -37,7 +39,7 @@ def get_core_config() -> Set[str]:
37
39
 
38
40
  def apply_config(
39
41
  dialect: Dialect,
40
- conn: duckdb.DuckDBPyConnection,
42
+ conn: Any,
41
43
  ext: Dict[str, Union[str, int, bool, float, None]],
42
44
  ) -> None:
43
45
  # TODO: does sqlalchemy have something that could do this for us?
@@ -48,8 +50,9 @@ def apply_config(
48
50
  string_processor = String().literal_processor(dialect=dialect)
49
51
 
50
52
  for k, v in ext.items():
53
+ key = validate_identifier(k, kind="config key")
51
54
  if v is None:
52
- conn.execute(f"SET {k} = NULL")
55
+ conn.execute(f"SET {key} = NULL")
53
56
  continue
54
57
  if isinstance(v, os.PathLike):
55
58
  v = os.fspath(v)
@@ -67,4 +70,4 @@ def apply_config(
67
70
  v = str(v)
68
71
  process = string_processor
69
72
  assert process, f"Not able to configure {k} with {v}"
70
- conn.execute(f"SET {k} = {process(v)}")
73
+ conn.execute(f"SET {key} = {process(v)}")
@@ -37,6 +37,7 @@ from sqlalchemy.engine.reflection import Inspector
37
37
  from sqlalchemy.exc import DBAPIError
38
38
  from sqlalchemy.ext.declarative import declarative_base
39
39
  from sqlalchemy.orm import Session, relationship, sessionmaker
40
+ from sqlalchemy.pool import QueuePool
40
41
 
41
42
  from .. import Dialect, insert, supports_attach, supports_user_agent
42
43
  from .._supports import has_comment_support
@@ -572,7 +573,7 @@ def test_do_ping(tmp_path: Path, caplog: LogCaptureFixture) -> None:
572
573
  "duckdb:///" + str(tmp_path / "db"),
573
574
  pool_pre_ping=True,
574
575
  pool_size=1,
575
- poolclass=sqlalchemy.pool.QueuePool,
576
+ poolclass=QueuePool,
576
577
  )
577
578
 
578
579
  logger = cast(logging.Logger, engine.pool.logger) # type: ignore
@@ -615,7 +616,8 @@ def test_361(engine: Engine) -> None:
615
616
 
616
617
  metadata = MetaData()
617
618
  metadata.reflect(bind=conn)
618
- test = metadata.tables["test"]
619
+ tables = cast(dict[str, Table], metadata.tables)
620
+ test = tables["test"]
619
621
  part = "year"
620
622
  date_part = func.date_part(part, test.c.dt)
621
623
 
@@ -1,4 +1,5 @@
1
- from typing import cast
1
+ from pathlib import Path
2
+ from typing import Any, cast
2
3
  from urllib.parse import parse_qs
3
4
 
4
5
  import duckdb
@@ -7,6 +8,7 @@ from sqlalchemy import Integer, String, pool
7
8
  from sqlalchemy import exc as sa_exc
8
9
  from sqlalchemy.engine import URL as SAURL
9
10
 
11
+ import duckdb_sqlalchemy
10
12
  from duckdb_sqlalchemy import (
11
13
  URL,
12
14
  ConnectionWrapper,
@@ -28,6 +30,7 @@ from duckdb_sqlalchemy import (
28
30
  )
29
31
  from duckdb_sqlalchemy import datatypes as dt
30
32
  from duckdb_sqlalchemy import motherduck as md
33
+ from duckdb_sqlalchemy.bulk import copy_from_csv
31
34
  from duckdb_sqlalchemy.config import TYPES, apply_config, get_core_config
32
35
 
33
36
 
@@ -472,15 +475,93 @@ def test_struct_or_union_requires_fields() -> None:
472
475
  preparer = dialect.identifier_preparer
473
476
 
474
477
  with pytest.raises(sa_exc.CompileError):
475
- dt.struct_or_union(dt.Struct(), compiler, preparer)
478
+ dt.struct_or_union(dt.Struct(), cast(Any, compiler), preparer)
476
479
 
477
480
  struct = dt.Struct({"first name": String, "age": Integer})
478
- rendered = dt.struct_or_union(struct, compiler, preparer)
481
+ rendered = dt.struct_or_union(struct, cast(Any, compiler), preparer)
479
482
  assert rendered.startswith("(")
480
483
  assert rendered.endswith(")")
481
484
  assert '"first name"' in rendered
482
485
 
483
486
 
487
+ def test_apply_config_rejects_invalid_key_no_side_effect() -> None:
488
+ conn = duckdb.connect(":memory:")
489
+ dialect = Dialect()
490
+ with pytest.raises(ValueError, match="invalid config key"):
491
+ apply_config(
492
+ dialect,
493
+ conn,
494
+ {"threads = 1; CREATE TABLE pwned_cfg(i INTEGER); --": "x"},
495
+ )
496
+
497
+ found = conn.execute(
498
+ "SELECT COUNT(*) FROM duckdb_tables() WHERE table_name='pwned_cfg'"
499
+ ).fetchone()
500
+ assert found is not None
501
+ assert found[0] == 0
502
+
503
+
504
+ def test_connect_rejects_invalid_extension_before_execute(
505
+ monkeypatch: pytest.MonkeyPatch,
506
+ ) -> None:
507
+ get_core_config()
508
+
509
+ class DummyConn:
510
+ def __init__(self) -> None:
511
+ self.executed: list[str] = []
512
+
513
+ def execute(self, statement: str) -> None:
514
+ self.executed.append(statement)
515
+
516
+ def register_filesystem(self, filesystem: object) -> None:
517
+ return None
518
+
519
+ dummy = DummyConn()
520
+ monkeypatch.setattr(duckdb_sqlalchemy.duckdb, "connect", lambda *a, **k: dummy)
521
+
522
+ with pytest.raises(ValueError, match="invalid extension name"):
523
+ Dialect().connect(
524
+ database=":memory:",
525
+ preload_extensions=["sqlite; CREATE TABLE pwned_ext(i INTEGER); --"],
526
+ config={},
527
+ )
528
+
529
+ assert dummy.executed == []
530
+
531
+
532
+ def test_copy_from_csv_rejects_invalid_table_and_option_key(
533
+ tmp_path: Path,
534
+ ) -> None:
535
+ conn = duckdb.connect(":memory:")
536
+ conn.execute("CREATE TABLE safe(i INTEGER)")
537
+ csv_path = tmp_path / "rows.csv"
538
+ csv_path.write_text("1\n")
539
+
540
+ with pytest.raises(ValueError, match="invalid table identifier"):
541
+ copy_from_csv(
542
+ conn,
543
+ "safe FROM 'x'; CREATE TABLE pwned_bulk(i INTEGER); --",
544
+ csv_path,
545
+ )
546
+
547
+ with pytest.raises(ValueError, match="invalid COPY option key"):
548
+ bad_options: dict[str, Any] = {
549
+ "header); CREATE TABLE pwned_opt(i INTEGER); --": True
550
+ }
551
+ copy_from_csv(
552
+ conn,
553
+ "safe",
554
+ csv_path,
555
+ **bad_options,
556
+ )
557
+
558
+ found = conn.execute(
559
+ "SELECT COUNT(*) FROM duckdb_tables() WHERE table_name IN ('pwned_bulk', 'pwned_opt')"
560
+ ).fetchone()
561
+ assert found is not None
562
+ assert found[0] == 0
563
+
564
+
484
565
  def test_parse_register_params_dict_and_tuple() -> None:
485
566
  view_name, df = _parse_register_params({"view_name": "v", "df": "data"})
486
567
  assert view_name == "v"
@@ -3,6 +3,7 @@ from typing import Generator
3
3
 
4
4
  import github_action_utils as gha
5
5
  import nox
6
+ from packaging.version import Version
6
7
 
7
8
  nox.options.default_venv_backend = "uv"
8
9
  nox.options.error_on_external_run = True
@@ -61,6 +62,8 @@ def tests_core(session: nox.Session, duckdb: str, sqlalchemy: str) -> None:
61
62
  session.install("-e", ".[dev]")
62
63
  operator = "==" if sqlalchemy.count(".") == 2 else "~="
63
64
  session.install(f"sqlalchemy{operator}{sqlalchemy}")
65
+ if Version(sqlalchemy) < Version("2.0"):
66
+ session.install("pandas<2.2")
64
67
  if duckdb == "master":
65
68
  session.install("duckdb", "--pre", "-U")
66
69
  else:
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "duckdb-sqlalchemy"
3
- version = "1.4.4"
3
+ version = "1.4.4.2"
4
4
  description = "DuckDB SQLAlchemy dialect for DuckDB and MotherDuck"
5
5
  authors = [
6
6
  {name = "Leonardo Vida", email = "lleonardovida@gmail.com"},