quackpipe 0.6.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. quackpipe-0.6.1/LICENSE +21 -0
  2. quackpipe-0.6.1/PKG-INFO +193 -0
  3. quackpipe-0.6.1/README.md +155 -0
  4. quackpipe-0.6.1/pyproject.toml +83 -0
  5. quackpipe-0.6.1/setup.cfg +4 -0
  6. quackpipe-0.6.1/src/quackpipe/__init__.py +45 -0
  7. quackpipe-0.6.1/src/quackpipe/builder.py +58 -0
  8. quackpipe-0.6.1/src/quackpipe/cli.py +28 -0
  9. quackpipe-0.6.1/src/quackpipe/commands/__init__.py +0 -0
  10. quackpipe-0.6.1/src/quackpipe/commands/common.py +43 -0
  11. quackpipe-0.6.1/src/quackpipe/commands/generate_sqlmesh_config.py +85 -0
  12. quackpipe-0.6.1/src/quackpipe/commands/ui.py +74 -0
  13. quackpipe-0.6.1/src/quackpipe/config.py +35 -0
  14. quackpipe-0.6.1/src/quackpipe/core.py +123 -0
  15. quackpipe-0.6.1/src/quackpipe/etl_utils.py +110 -0
  16. quackpipe-0.6.1/src/quackpipe/exceptions.py +15 -0
  17. quackpipe-0.6.1/src/quackpipe/secrets.py +100 -0
  18. quackpipe-0.6.1/src/quackpipe/sources/__init__.py +3 -0
  19. quackpipe-0.6.1/src/quackpipe/sources/azure_blob.py +76 -0
  20. quackpipe-0.6.1/src/quackpipe/sources/base.py +43 -0
  21. quackpipe-0.6.1/src/quackpipe/sources/ducklake/__init__.py +115 -0
  22. quackpipe-0.6.1/src/quackpipe/sources/ducklake/providers.py +108 -0
  23. quackpipe-0.6.1/src/quackpipe/sources/postgres.py +68 -0
  24. quackpipe-0.6.1/src/quackpipe/sources/s3.py +77 -0
  25. quackpipe-0.6.1/src/quackpipe/sources/sqlite.py +42 -0
  26. quackpipe-0.6.1/src/quackpipe/test_utils/__init__.py +0 -0
  27. quackpipe-0.6.1/src/quackpipe/test_utils/data_fixtures.py +113 -0
  28. quackpipe-0.6.1/src/quackpipe/test_utils/fixtures.py +478 -0
  29. quackpipe-0.6.1/src/quackpipe/utils.py +59 -0
  30. quackpipe-0.6.1/src/quackpipe.egg-info/PKG-INFO +193 -0
  31. quackpipe-0.6.1/src/quackpipe.egg-info/SOURCES.txt +44 -0
  32. quackpipe-0.6.1/src/quackpipe.egg-info/dependency_links.txt +1 -0
  33. quackpipe-0.6.1/src/quackpipe.egg-info/entry_points.txt +2 -0
  34. quackpipe-0.6.1/src/quackpipe.egg-info/requires.txt +35 -0
  35. quackpipe-0.6.1/src/quackpipe.egg-info/top_level.txt +1 -0
  36. quackpipe-0.6.1/tests/test_azure_blob_handler.py +162 -0
  37. quackpipe-0.6.1/tests/test_cli.py +138 -0
  38. quackpipe-0.6.1/tests/test_ducklake_handler.py +156 -0
  39. quackpipe-0.6.1/tests/test_ducklake_integration.py +42 -0
  40. quackpipe-0.6.1/tests/test_e2e_ducklake_integration.py +131 -0
  41. quackpipe-0.6.1/tests/test_etl_utils.py +214 -0
  42. quackpipe-0.6.1/tests/test_postgres_handler.py +202 -0
  43. quackpipe-0.6.1/tests/test_quackpipe.py +347 -0
  44. quackpipe-0.6.1/tests/test_s3_handler.py +175 -0
  45. quackpipe-0.6.1/tests/test_secret_management.py +107 -0
  46. quackpipe-0.6.1/tests/test_sqlite_handler.py +89 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 ekiourk consulting ltd
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the “Software”), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,193 @@
1
+ Metadata-Version: 2.4
2
+ Name: quackpipe
3
+ Version: 0.6.1
4
+ Summary: A configuration-driven and programmatic ETL helper for DuckDB.
5
+ License: MIT
6
+ Requires-Python: >=3.12
7
+ Description-Content-Type: text/markdown
8
+ License-File: LICENSE
9
+ Requires-Dist: pyyaml
10
+ Requires-Dist: duckdb>=0.9.0
11
+ Requires-Dist: pandas
12
+ Requires-Dist: python-dotenv
13
+ Requires-Dist: azure-storage-blob
14
+ Provides-Extra: dev
15
+ Requires-Dist: pytest; extra == "dev"
16
+ Requires-Dist: pytest-cov; extra == "dev"
17
+ Requires-Dist: quackpipe[fixtures]; extra == "dev"
18
+ Requires-Dist: ipdb; extra == "dev"
19
+ Provides-Extra: fixtures
20
+ Requires-Dist: testcontainers==4.10.0; extra == "fixtures"
21
+ Requires-Dist: sqlalchemy; extra == "fixtures"
22
+ Requires-Dist: testcontainers-postgres; extra == "fixtures"
23
+ Requires-Dist: testcontainers-minio; extra == "fixtures"
24
+ Requires-Dist: testcontainers-azurite; extra == "fixtures"
25
+ Requires-Dist: httpx; extra == "fixtures"
26
+ Provides-Extra: lint
27
+ Requires-Dist: ruff; extra == "lint"
28
+ Provides-Extra: logging
29
+ Requires-Dist: structlog>=23.0.0; extra == "logging"
30
+ Requires-Dist: colorlog>=6.0.0; extra == "logging"
31
+ Provides-Extra: postgres
32
+ Requires-Dist: psycopg; extra == "postgres"
33
+ Provides-Extra: s3
34
+ Requires-Dist: pyarrow; extra == "s3"
35
+ Provides-Extra: kafka
36
+ Requires-Dist: confluent-kafka; extra == "kafka"
37
+ Dynamic: license-file
38
+
39
+ # Quackpipe
40
+
41
+ **The missing link between your Python scripts and your data infrastructure.**
42
+
43
+ Quackpipe is a powerful ETL helper library that uses **DuckDB** to create a unified, high-performance data plane for Python applications. It bridges the gap between writing raw, complex connection code and adopting a full-scale data transformation framework.
44
+
45
+ With a simple YAML configuration, you can instantly connect to multiple data sources like **PostgreSQL**, **S3**, **Azure Blob Storage**, and **SQLite**, and even orchestrate complex **DuckLake** setups, all from a single, clean Python interface.
46
+
47
+ [![codecov](https://codecov.io/github/ekiourk/quackpipe/graph/badge.svg?token=5LF2QD9MEW)](https://codecov.io/github/ekiourk/quackpipe)
48
+
49
+ ## What Gap Does Quackpipe Fill?
50
+
51
+ In the modern data stack, you often face a choice:
52
+
53
+ * **Low-Level:** Write boilerplate code with multiple database drivers (`psycopg2`, `boto3`, etc.) to connect and move data manually. This is flexible but repetitive and error-prone.
54
+ * **High-Level:** Adopt a full DataOps framework like **SQLMesh** or **dbt**. These are powerful for building production-grade data warehouses but can be overkill for ad-hoc analysis, rapid prototyping, or simple scripting.
55
+
56
+ **Quackpipe provides the perfect middle ground.** It gives you the power of a unified query engine and the simplicity of a Python library, allowing you to:
57
+
58
+ * **Prototype Rapidly:** Spin up a multi-source data environment in seconds.
59
+ * **Simplify ETL Scripts:** Replace complex driver code with a single, clean `session` or a one-line `move_data` command.
60
+ * **Explore Data Interactively:** Use the built-in CLI to launch a web UI with all your sources pre-connected for instant ad-hoc querying.
61
+ * **Bridge to Production:** Automatically generate configuration for frameworks like **SQLMesh** when you're ready to graduate from a script to a versioned data model.
62
+
63
+ ## Core Capabilities
64
+
65
+ * **Unified Data Access:** Query across PostgreSQL, S3, Azure, and SQLite as if they were all schemas in a single database.
66
+ * **Declarative Configuration:** Define all your data sources in one human-readable `config.yml` file.
67
+ * **Powerful ETL Utilities:** Move data between any two configured sources with the `move_data()` function.
68
+ * **Programmatic API:** Use the `QuackpipeBuilder` for dynamic, on-the-fly connection setups in your code.
69
+ * **Secure Secret Management:** Load credentials safely from `.env` files, keeping them out of your code and configuration.
70
+ * **Interactive UI:** Launch an interactive DuckDB web UI with all your sources pre-connected using a single CLI command.
71
+ * **Framework Integration:** Automatically generate a `sqlmesh_config.yml` file to seamlessly transition your project to a full DataOps framework.
72
+
73
+ ## Installation
74
+
75
+ ```bash
76
+ pip install quackpipe
77
+ ```
78
+
79
+ Install support for the sources you need:
80
+
81
+ ```bash
82
+ # Example: Install support for Postgres, S3, Azure, and the UI
83
+ pip install "quackpipe[postgres,s3,azure,ui]"
84
+ ```
85
+
86
+ ## Configuration
87
+
88
+ `quackpipe` uses a simple `config.yml` file to define your sources and an `.env` file to manage your secrets.
89
+
90
+ ### `config.yml` Example
91
+
92
+ ```yaml
93
+ # config.yml
94
+ sources:
95
+ # A writeable PostgreSQL database.
96
+ pg_warehouse:
97
+ type: postgres
98
+ secret_name: "pg_prod" # See Secret Management section below
99
+ read_only: false # Allows writing data back to this source
100
+
101
+ # An S3 data lake for Parquet files.
102
+ s3_datalake:
103
+ type: s3
104
+ secret_name: "aws_prod"
105
+ region: "us-east-1"
106
+
107
+ # An Azure Blob Storage container.
108
+ azure_datalake:
109
+ type: azure
110
+ provider: connection_string
111
+ secret_name: "azure_prod"
112
+
113
+ # A composite DuckLake source.
114
+ my_lake:
115
+ type: ducklake
116
+ catalog:
117
+ type: sqlite
118
+ path: "/path/to/lake_catalog.db"
119
+ storage:
120
+ type: local
121
+ path: "/path/to/lake_storage/"
122
+ ```
123
+
124
+ ### Secret Management with `.env`
125
+
126
+ Quackpipe uses a `secret_name` in the config to refer to a bundle of credentials. These are loaded from an `.env` file using a simple prefix convention: `SECRET_NAME_KEY`.
127
+
128
+ Create an `.env` file in your project root:
129
+
130
+ ```dotenv
131
+ # .env
132
+
133
+ # Secrets for secret_name: "pg_prod"
134
+ PG_PROD_HOST=db.example.com
135
+ PG_PROD_USER=myuser
136
+ PG_PROD_PASSWORD=mypassword
137
+ PG_PROD_DATABASE=production
138
+
139
+ # Secrets for secret_name: "aws_prod"
140
+ AWS_PROD_ACCESS_KEY_ID=YOUR_AWS_ACCESS_KEY
141
+ AWS_PROD_SECRET_ACCESS_KEY=YOUR_AWS_SECRET_KEY
142
+
143
+ # Secrets for secret_name: "azure_prod"
144
+ AZURE_PROD_CONNECTION_STRING="DefaultEndpointsProtocol=https..."
145
+ ```
146
+
147
+ ## Usage Highlights
148
+
149
+ ### 1. Interactive Querying with `session`
150
+
151
+ Need to join a CSV in S3 with a table in Postgres? `quackpipe` makes it trivial.
152
+
153
+ ```python
154
+ import quackpipe
155
+
156
+ # quackpipe automatically loads your .env file
157
+ with quackpipe.session(config_path="config.yml", env_file=".env") as con:
158
+ df = con.execute("""
159
+ SELECT u.name, o.order_total
160
+ FROM pg_warehouse.users u
161
+ JOIN read_parquet('s3://my-bucket/orders/*.parquet') o ON u.id = o.user_id
162
+ WHERE u.signup_date > '2024-01-01';
163
+ """).fetchdf()
164
+
165
+ print(df.head())
166
+ ```
167
+
168
+ ### 2. One-Line Data Movement with `move_data`
169
+
170
+ Archive old records from your production database to your data lake with a single command.
171
+
172
+ ```python
173
+ from quackpipe.etl_utils import move_data
174
+
175
+ move_data(
176
+ config_path="config.yml",
177
+ env_file=".env",
178
+ source_query="SELECT * FROM pg_warehouse.logs WHERE timestamp < '2024-01-01'",
179
+ destination_name="s3_datalake",
180
+ table_name="logs_archive_2023"
181
+ )
182
+ ```
183
+
184
+ ### 3. Instant Data Exploration with the CLI
185
+
186
+ Launch a web browser UI with all your sources attached and ready for ad-hoc queries.
187
+
188
+ ```bash
189
+ # This command reads your config.yml and .env file
190
+ quackpipe ui
191
+
192
+ # Or connect to specific sources
193
+ quackpipe ui pg_warehouse s3_datalake
@@ -0,0 +1,155 @@
1
+ # Quackpipe
2
+
3
+ **The missing link between your Python scripts and your data infrastructure.**
4
+
5
+ Quackpipe is a powerful ETL helper library that uses **DuckDB** to create a unified, high-performance data plane for Python applications. It bridges the gap between writing raw, complex connection code and adopting a full-scale data transformation framework.
6
+
7
+ With a simple YAML configuration, you can instantly connect to multiple data sources like **PostgreSQL**, **S3**, **Azure Blob Storage**, and **SQLite**, and even orchestrate complex **DuckLake** setups, all from a single, clean Python interface.
8
+
9
+ [![codecov](https://codecov.io/github/ekiourk/quackpipe/graph/badge.svg?token=5LF2QD9MEW)](https://codecov.io/github/ekiourk/quackpipe)
10
+
11
+ ## What Gap Does Quackpipe Fill?
12
+
13
+ In the modern data stack, you often face a choice:
14
+
15
+ * **Low-Level:** Write boilerplate code with multiple database drivers (`psycopg2`, `boto3`, etc.) to connect and move data manually. This is flexible but repetitive and error-prone.
16
+ * **High-Level:** Adopt a full DataOps framework like **SQLMesh** or **dbt**. These are powerful for building production-grade data warehouses but can be overkill for ad-hoc analysis, rapid prototyping, or simple scripting.
17
+
18
+ **Quackpipe provides the perfect middle ground.** It gives you the power of a unified query engine and the simplicity of a Python library, allowing you to:
19
+
20
+ * **Prototype Rapidly:** Spin up a multi-source data environment in seconds.
21
+ * **Simplify ETL Scripts:** Replace complex driver code with a single, clean `session` or a one-line `move_data` command.
22
+ * **Explore Data Interactively:** Use the built-in CLI to launch a web UI with all your sources pre-connected for instant ad-hoc querying.
23
+ * **Bridge to Production:** Automatically generate configuration for frameworks like **SQLMesh** when you're ready to graduate from a script to a versioned data model.
24
+
25
+ ## Core Capabilities
26
+
27
+ * **Unified Data Access:** Query across PostgreSQL, S3, Azure, and SQLite as if they were all schemas in a single database.
28
+ * **Declarative Configuration:** Define all your data sources in one human-readable `config.yml` file.
29
+ * **Powerful ETL Utilities:** Move data between any two configured sources with the `move_data()` function.
30
+ * **Programmatic API:** Use the `QuackpipeBuilder` for dynamic, on-the-fly connection setups in your code.
31
+ * **Secure Secret Management:** Load credentials safely from `.env` files, keeping them out of your code and configuration.
32
+ * **Interactive UI:** Launch an interactive DuckDB web UI with all your sources pre-connected using a single CLI command.
33
+ * **Framework Integration:** Automatically generate a `sqlmesh_config.yml` file to seamlessly transition your project to a full DataOps framework.
34
+
35
+ ## Installation
36
+
37
+ ```bash
38
+ pip install quackpipe
39
+ ```
40
+
41
+ Install support for the sources you need:
42
+
43
+ ```bash
44
+ # Example: Install support for Postgres, S3, Azure, and the UI
45
+ pip install "quackpipe[postgres,s3,azure,ui]"
46
+ ```
47
+
48
+ ## Configuration
49
+
50
+ `quackpipe` uses a simple `config.yml` file to define your sources and an `.env` file to manage your secrets.
51
+
52
+ ### `config.yml` Example
53
+
54
+ ```yaml
55
+ # config.yml
56
+ sources:
57
+ # A writeable PostgreSQL database.
58
+ pg_warehouse:
59
+ type: postgres
60
+ secret_name: "pg_prod" # See Secret Management section below
61
+ read_only: false # Allows writing data back to this source
62
+
63
+ # An S3 data lake for Parquet files.
64
+ s3_datalake:
65
+ type: s3
66
+ secret_name: "aws_prod"
67
+ region: "us-east-1"
68
+
69
+ # An Azure Blob Storage container.
70
+ azure_datalake:
71
+ type: azure
72
+ provider: connection_string
73
+ secret_name: "azure_prod"
74
+
75
+ # A composite DuckLake source.
76
+ my_lake:
77
+ type: ducklake
78
+ catalog:
79
+ type: sqlite
80
+ path: "/path/to/lake_catalog.db"
81
+ storage:
82
+ type: local
83
+ path: "/path/to/lake_storage/"
84
+ ```
85
+
86
+ ### Secret Management with `.env`
87
+
88
+ Quackpipe uses a `secret_name` in the config to refer to a bundle of credentials. These are loaded from an `.env` file using a simple prefix convention: `SECRET_NAME_KEY`.
89
+
90
+ Create an `.env` file in your project root:
91
+
92
+ ```dotenv
93
+ # .env
94
+
95
+ # Secrets for secret_name: "pg_prod"
96
+ PG_PROD_HOST=db.example.com
97
+ PG_PROD_USER=myuser
98
+ PG_PROD_PASSWORD=mypassword
99
+ PG_PROD_DATABASE=production
100
+
101
+ # Secrets for secret_name: "aws_prod"
102
+ AWS_PROD_ACCESS_KEY_ID=YOUR_AWS_ACCESS_KEY
103
+ AWS_PROD_SECRET_ACCESS_KEY=YOUR_AWS_SECRET_KEY
104
+
105
+ # Secrets for secret_name: "azure_prod"
106
+ AZURE_PROD_CONNECTION_STRING="DefaultEndpointsProtocol=https..."
107
+ ```
108
+
109
+ ## Usage Highlights
110
+
111
+ ### 1. Interactive Querying with `session`
112
+
113
+ Need to join a CSV in S3 with a table in Postgres? `quackpipe` makes it trivial.
114
+
115
+ ```python
116
+ import quackpipe
117
+
118
+ # quackpipe automatically loads your .env file
119
+ with quackpipe.session(config_path="config.yml", env_file=".env") as con:
120
+ df = con.execute("""
121
+ SELECT u.name, o.order_total
122
+ FROM pg_warehouse.users u
123
+ JOIN read_parquet('s3://my-bucket/orders/*.parquet') o ON u.id = o.user_id
124
+ WHERE u.signup_date > '2024-01-01';
125
+ """).fetchdf()
126
+
127
+ print(df.head())
128
+ ```
129
+
130
+ ### 2. One-Line Data Movement with `move_data`
131
+
132
+ Archive old records from your production database to your data lake with a single command.
133
+
134
+ ```python
135
+ from quackpipe.etl_utils import move_data
136
+
137
+ move_data(
138
+ config_path="config.yml",
139
+ env_file=".env",
140
+ source_query="SELECT * FROM pg_warehouse.logs WHERE timestamp < '2024-01-01'",
141
+ destination_name="s3_datalake",
142
+ table_name="logs_archive_2023"
143
+ )
144
+ ```
145
+
146
+ ### 3. Instant Data Exploration with the CLI
147
+
148
+ Launch a web browser UI with all your sources attached and ready for ad-hoc queries.
149
+
150
+ ```bash
151
+ # This command reads your config.yml and .env file
152
+ quackpipe ui
153
+
154
+ # Or connect to specific sources
155
+ quackpipe ui pg_warehouse s3_datalake
@@ -0,0 +1,83 @@
1
+ [project]
2
+ name = "quackpipe"
3
+ version = "0.6.1"
4
+ requires-python = ">=3.12"
5
+ description = "A configuration-driven and programmatic ETL helper for DuckDB."
6
+ license = {text = "MIT"}
7
+ readme = "README.md"
8
+ dependencies = [
9
+ "pyyaml",
10
+ "duckdb>=0.9.0",
11
+ "pandas",
12
+ "python-dotenv",
13
+ "azure-storage-blob"
14
+ ]
15
+
16
+ [project.optional-dependencies]
17
+ dev = [
18
+ "pytest",
19
+ "pytest-cov",
20
+ "quackpipe[fixtures]",
21
+ "ipdb"
22
+ ]
23
+ fixtures = [
24
+ "testcontainers==4.10.0",
25
+ "sqlalchemy",
26
+ "testcontainers-postgres",
27
+ "testcontainers-minio",
28
+ "testcontainers-azurite",
29
+ "httpx"
30
+ ]
31
+ lint = [
32
+ "ruff"
33
+ ]
34
+ logging = [
35
+ "structlog>=23.0.0",
36
+ "colorlog>=6.0.0",
37
+ ]
38
+ postgres = ["psycopg"]
39
+ s3 = ["pyarrow"]
40
+ kafka = ["confluent-kafka"]
41
+
42
+ [build-system]
43
+ requires = ["setuptools>=61.0"]
44
+ build-backend = "setuptools.build_meta"
45
+
46
+ [tool.setuptools]
47
+ package-dir = {"" = "src"}
48
+
49
+ [tool.setuptools.packages.find]
50
+ where = ["src"]
51
+
52
+ [tool.ruff]
53
+ src = ["src"]
54
+ line-length = 120
55
+
56
+ [tool.ruff.lint]
57
+ select = [
58
+ "E", # pycodestyle errors
59
+ "W", # pycodestyle warnings
60
+ "F", # pyflakes
61
+ "I", # isort
62
+ "B", # flake8-bugbear
63
+ "C4", # flake8-comprehensions
64
+ "UP", # pyupgrade
65
+ ]
66
+ # Allow logging format strings
67
+ ignore = [
68
+ "G201",
69
+ "G202",
70
+ "E501", # line too long (handled by formatter)
71
+ "B008", # do not perform function calls in argument defaults
72
+ ]
73
+
74
+ [tool.ruff.format]
75
+ # Use double quotes
76
+ quote-style = "double"
77
+
78
+ # Indent with spaces
79
+ indent-style = "space"
80
+
81
+ [project.scripts]
82
+ quackpipe = "quackpipe.cli:main"
83
+
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,45 @@
1
+ """
2
+ quackpipe - A configuration-driven ETL helper for DuckDB.
3
+
4
+ This library provides simple, high-level functions to connect DuckDB
5
+ to various data sources based on a YAML configuration file or a
6
+ programmatic builder.
7
+ """
8
+
9
+ import logging
10
+ import os
11
+
12
+ # Expose the primary user-facing functions and classes.
13
+ from .builder import QuackpipeBuilder
14
+ from .config import SourceConfig, SourceType
15
+ from .core import session, with_session
16
+ from .exceptions import ConfigError, QuackpipeError, SecretError
17
+ from .secrets import configure_secret_provider
18
+
19
+ # Set up the library's top-level logger
20
+ _default_level = os.getenv('QUACKPIPE_LOG_LEVEL', 'WARNING').upper()
21
+ _root_logger = logging.getLogger(__name__)
22
+ _root_logger.setLevel(getattr(logging, _default_level, logging.WARNING))
23
+ _root_logger.addHandler(logging.NullHandler())
24
+
25
+
26
+ __all__ = [
27
+ # Core API
28
+ "session",
29
+ "with_session",
30
+
31
+ # Builder API
32
+ "QuackpipeBuilder",
33
+
34
+ # Configuration Types
35
+ "SourceConfig",
36
+ "SourceType",
37
+
38
+ # Secret Management
39
+ "configure_secret_provider",
40
+
41
+ # Exceptions
42
+ "QuackpipeError",
43
+ "ConfigError",
44
+ "SecretError",
45
+ ]
@@ -0,0 +1,58 @@
1
+ """
2
+ The Builder API for programmatically constructing a quackpipe session.
3
+ """
4
+ from typing import Any, Self
5
+
6
+ from .config import SourceConfig, SourceType
7
+ from .core import session as core_session # Avoid circular import
8
+
9
+
10
+ class QuackpipeBuilder:
11
+ """A fluent builder for creating a quackpipe session without a YAML file."""
12
+
13
+ def __init__(self):
14
+ self._sources: list[SourceConfig] = []
15
+
16
+ def add_source(self, name: str, type: SourceType, config: dict[str, Any] = None, secret_name: str = None) -> Self:
17
+ """
18
+ Adds a data source to the configuration.
19
+
20
+ Args:
21
+ name: The name for the data source (e.g., 'pg_main').
22
+ type: The type of the source, using the SourceType enum.
23
+ config: A dictionary of non-secret parameters.
24
+ secret_name: The logical name of the secret bundle.
25
+
26
+ Returns:
27
+ The builder instance for chaining.
28
+ """
29
+ source = SourceConfig(
30
+ name=name,
31
+ type=type,
32
+ config=config or {},
33
+ secret_name=secret_name
34
+ )
35
+ self._sources.append(source)
36
+ return self
37
+
38
+ def get_configs(self) -> list[SourceConfig]:
39
+ """
40
+ Returns the list of SourceConfig objects that have been added to the builder.
41
+ This is useful for passing to high-level utilities like `move_data`.
42
+ """
43
+ return self._sources
44
+
45
+ def session(self, **kwargs):
46
+ """
47
+ Builds and enters the session context manager. Can accept the same arguments
48
+ as the core session function, like `sources=['source_a']`.
49
+
50
+ Returns:
51
+ A context manager yielding a configured DuckDB connection.
52
+ """
53
+ if not self._sources:
54
+ raise ValueError("Cannot build a session with no sources defined.")
55
+
56
+ # Pass the built configs and any extra arguments (like `sources`)
57
+ # to the core session manager.
58
+ return core_session(configs=self._sources, **kwargs)
@@ -0,0 +1,28 @@
1
+ """
2
+ cli.py
3
+
4
+ This module provides the main entry point for the quackpipe command-line interface.
5
+ It discovers and registers commands from the 'commands' submodule.
6
+ """
7
+ import argparse
8
+
9
+ # Import the registration functions from each command module
10
+ from .commands import generate_sqlmesh_config, ui
11
+
12
+
13
+ def main():
14
+ """Main function to parse arguments and dispatch commands."""
15
+ parser = argparse.ArgumentParser(description="quackpipe: A DuckDB ETL Helper CLI.")
16
+ subparsers = parser.add_subparsers(dest="command", required=True, help="Available commands")
17
+
18
+ # Register all available commands
19
+ generate_sqlmesh_config.register_command(subparsers)
20
+ ui.register_command(subparsers)
21
+
22
+ # Parse the arguments and call the handler function assigned by the subparser
23
+ args = parser.parse_args()
24
+ args.func(args)
25
+
26
+
27
+ if __name__ == "__main__":
28
+ main()
File without changes
@@ -0,0 +1,43 @@
1
+ """
2
+ src/quackpipe/commands/common.py
3
+
4
+ This module contains common utilities shared across CLI command modules.
5
+ """
6
+ import logging
7
+ import sys
8
+
9
+
10
+ def setup_cli_logging(verbose_level: int = 0):
11
+ """
12
+ Configures the root logger for quackpipe to ensure CLI output is visible.
13
+
14
+ Args:
15
+ verbose_level (int): The verbosity level. 0 for WARNING, 1 for INFO, 2+ for DEBUG.
16
+ """
17
+ # Map the integer verbosity level to a logging level
18
+ if verbose_level >= 2:
19
+ level = logging.DEBUG
20
+ elif verbose_level == 1:
21
+ level = logging.INFO
22
+ else:
23
+ # Default to WARNING to avoid being too noisy
24
+ level = logging.WARNING
25
+
26
+ # Get the top-level logger for the library
27
+ log = logging.getLogger("quackpipe")
28
+ log.setLevel(level)
29
+
30
+ # Create a handler to write messages to the console (stdout)
31
+ handler = logging.StreamHandler(sys.stdout)
32
+
33
+ # Create a formatter and add it to the handler
34
+ formatter = logging.Formatter('%(asctime)s - %(message)s')
35
+ handler.setFormatter(formatter)
36
+
37
+ # Add the handler to the logger. This ensures messages will be output.
38
+ # We clear existing handlers to avoid duplicate messages if run in a notebook.
39
+ if log.hasHandlers():
40
+ log.handlers.clear()
41
+ log.addHandler(handler)
42
+
43
+ return log