dbt-feldera 0.289.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dbt_feldera-0.289.0/PKG-INFO +286 -0
- dbt_feldera-0.289.0/README.md +257 -0
- dbt_feldera-0.289.0/dbt/adapters/feldera/__init__.py +15 -0
- dbt_feldera-0.289.0/dbt/adapters/feldera/__version__.py +18 -0
- dbt_feldera-0.289.0/dbt/adapters/feldera/column.py +114 -0
- dbt_feldera-0.289.0/dbt/adapters/feldera/connections.py +250 -0
- dbt_feldera-0.289.0/dbt/adapters/feldera/credentials.py +55 -0
- dbt_feldera-0.289.0/dbt/adapters/feldera/cursor.py +294 -0
- dbt_feldera-0.289.0/dbt/adapters/feldera/impl.py +766 -0
- dbt_feldera-0.289.0/dbt/adapters/feldera/pipeline_manager.py +546 -0
- dbt_feldera-0.289.0/dbt/adapters/feldera/relation.py +65 -0
- dbt_feldera-0.289.0/dbt/adapters/feldera/sql_parser.py +107 -0
- dbt_feldera-0.289.0/dbt/adapters/feldera/sqlglot_parser.py +264 -0
- dbt_feldera-0.289.0/dbt/include/feldera/__init__.py +3 -0
- dbt_feldera-0.289.0/dbt/include/feldera/dbt_project.yml +8 -0
- dbt_feldera-0.289.0/dbt/include/feldera/macros/adapters/metadata.sql +29 -0
- dbt_feldera-0.289.0/dbt/include/feldera/macros/adapters/relation.sql +16 -0
- dbt_feldera-0.289.0/dbt/include/feldera/macros/adapters/schema.sql +11 -0
- dbt_feldera-0.289.0/dbt/include/feldera/macros/catalog.sql +27 -0
- dbt_feldera-0.289.0/dbt/include/feldera/macros/materializations/incremental.sql +27 -0
- dbt_feldera-0.289.0/dbt/include/feldera/macros/materializations/seed.sql +68 -0
- dbt_feldera-0.289.0/dbt/include/feldera/macros/materializations/streaming_pipeline.sql +36 -0
- dbt_feldera-0.289.0/dbt/include/feldera/macros/materializations/table.sql +30 -0
- dbt_feldera-0.289.0/dbt/include/feldera/macros/materializations/view.sql +52 -0
- dbt_feldera-0.289.0/dbt/include/feldera/macros/utils.sql +49 -0
- dbt_feldera-0.289.0/dbt/include/feldera/profile_template.yml +19 -0
- dbt_feldera-0.289.0/dbt_feldera.egg-info/PKG-INFO +286 -0
- dbt_feldera-0.289.0/dbt_feldera.egg-info/SOURCES.txt +32 -0
- dbt_feldera-0.289.0/dbt_feldera.egg-info/dependency_links.txt +1 -0
- dbt_feldera-0.289.0/dbt_feldera.egg-info/entry_points.txt +2 -0
- dbt_feldera-0.289.0/dbt_feldera.egg-info/requires.txt +15 -0
- dbt_feldera-0.289.0/dbt_feldera.egg-info/top_level.txt +1 -0
- dbt_feldera-0.289.0/pyproject.toml +75 -0
- dbt_feldera-0.289.0/setup.cfg +4 -0
|
@@ -0,0 +1,286 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: dbt-feldera
|
|
3
|
+
Version: 0.289.0
|
|
4
|
+
Summary: The dbt adapter for Feldera — DBSP-native incremental view maintenance
|
|
5
|
+
Author-email: Feldera Team <dev@feldera.com>, Raki Rahman <mdrakiburrahman@gmail.com>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://www.feldera.com
|
|
8
|
+
Project-URL: Documentation, https://docs.feldera.com
|
|
9
|
+
Project-URL: Repository, https://github.com/feldera/feldera
|
|
10
|
+
Project-URL: Issues, https://github.com/feldera/feldera/issues
|
|
11
|
+
Keywords: feldera,dbt,incremental,streaming,dbsp
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
+
Classifier: Operating System :: OS Independent
|
|
14
|
+
Requires-Python: >=3.10
|
|
15
|
+
Description-Content-Type: text/markdown
|
|
16
|
+
Requires-Dist: dbt-core~=1.11
|
|
17
|
+
Requires-Dist: dbt-adapters~=1.16
|
|
18
|
+
Requires-Dist: dbt-common~=1.12
|
|
19
|
+
Requires-Dist: feldera>=0.275.0
|
|
20
|
+
Requires-Dist: agate>=1.9.1
|
|
21
|
+
Requires-Dist: sqlglot>=30.1.0
|
|
22
|
+
Provides-Extra: dev
|
|
23
|
+
Requires-Dist: pytest>=9.0.3; extra == "dev"
|
|
24
|
+
Requires-Dist: pytest-timeout>=2.3.1; extra == "dev"
|
|
25
|
+
Requires-Dist: ruff==0.9.10; extra == "dev"
|
|
26
|
+
Provides-Extra: test
|
|
27
|
+
Requires-Dist: pytest>=9.0.3; extra == "test"
|
|
28
|
+
Requires-Dist: pytest-timeout>=2.3.1; extra == "test"
|
|
29
|
+
|
|
30
|
+
# dbt-feldera
|
|
31
|
+
|
|
32
|
+
The [dbt](https://www.getdbt.com/) adapter for
|
|
33
|
+
[Feldera](https://www.feldera.com/).
|
|
34
|
+
|
|
35
|
+
**[dbt](https://www.getdbt.com/)** enables data analysts and engineers to
|
|
36
|
+
transform their data using the same practices that software engineers use to
|
|
37
|
+
build applications.
|
|
38
|
+
|
|
39
|
+
**[Feldera](https://www.feldera.com/)** is a streaming SQL engine powered by
|
|
40
|
+
the DBSP incremental computation engine. It automatically incrementalizes
|
|
41
|
+
_every_ SQL query without watermarks, scans, or `MERGE`. When input data
|
|
42
|
+
changes, only affected output rows are recomputed.
|
|
43
|
+
|
|
44
|
+
> [!IMPORTANT]
|
|
45
|
+
> **This adapter deploys
|
|
46
|
+
> [continuous pipelines](https://docs.feldera.com/pipelines), not
|
|
47
|
+
> [ad-hoc queries](https://docs.feldera.com/sql/ad-hoc).**
|
|
48
|
+
>
|
|
49
|
+
> Feldera supports two modes of query execution:
|
|
50
|
+
>
|
|
51
|
+
> - **Continuous pipelines** compile SQL into an incremental dataflow that runs
|
|
52
|
+
> indefinitely, processing every input change as it arrives in near real-time.
|
|
53
|
+
> - **Ad-hoc queries** are one-shot batch queries executed by
|
|
54
|
+
> [DataFusion](https://datafusion.apache.org/) against the state of
|
|
55
|
+
> [materialized tables and views](https://docs.feldera.com/sql/materialized).
|
|
56
|
+
> They exist primarily for development and debugging.
|
|
57
|
+
>
|
|
58
|
+
> When you run `dbt run`, this adapter assembles your models into a Feldera
|
|
59
|
+
> pipeline program, compiles it, and **starts a continuously running pipeline**.
|
|
60
|
+
> The pipeline keeps processing input changes and updating outputs until it is
|
|
61
|
+
> explicitly stopped. This differs from typical batch-oriented dbt adapters where `dbt run` executes
|
|
62
|
+
> a query once, processes a batch of data and exits.
|
|
63
|
+
|
|
64
|
+
## Key features
|
|
65
|
+
|
|
66
|
+
- **Automatic incremental view maintenance (IVM)** — Feldera's DBSP engine
|
|
67
|
+
incrementalizes any SQL query out of the box. No manual merge logic or
|
|
68
|
+
watermark tuning required.
|
|
69
|
+
- **Continuous pipeline deployment** — `dbt run` compiles and starts a
|
|
70
|
+
long-running Feldera pipeline; it does not execute one-shot queries.
|
|
71
|
+
- **Connector integration** — attach Kafka, Delta Lake, S3, and HTTP
|
|
72
|
+
connectors directly to models via configuration.
|
|
73
|
+
- **Easy setup** — pure Python adapter with no ODBC driver needed.
|
|
74
|
+
|
|
75
|
+
## Installation
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
pip install dbt-feldera
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
or with [uv](https://docs.astral.sh/uv/):
|
|
82
|
+
|
|
83
|
+
```bash
|
|
84
|
+
uv add dbt-feldera
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
Requires Python 3.10+ and dbt-core ~1.9.
|
|
88
|
+
|
|
89
|
+
## Configuration
|
|
90
|
+
|
|
91
|
+
Add a Feldera target to your `profiles.yml`:
|
|
92
|
+
|
|
93
|
+
```yaml
|
|
94
|
+
my_project:
|
|
95
|
+
target: dev
|
|
96
|
+
outputs:
|
|
97
|
+
dev:
|
|
98
|
+
type: feldera
|
|
99
|
+
host: "http://localhost:8080"
|
|
100
|
+
api_key: "apikey:..." # optional — for authenticated instances
|
|
101
|
+
database: "default"
|
|
102
|
+
schema: "my_pipeline" # maps to the Feldera pipeline name
|
|
103
|
+
compilation_profile: dev # dev | unoptimized | optimized
|
|
104
|
+
workers: 4
|
|
105
|
+
timeout: 300
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
### Concept mapping
|
|
109
|
+
|
|
110
|
+
Feldera uses different terminology than traditional databases. Here's how dbt
|
|
111
|
+
concepts map to Feldera. Every materialization contributes SQL to a
|
|
112
|
+
**continuously running pipeline** — nothing is executed as a one-shot batch
|
|
113
|
+
query.
|
|
114
|
+
|
|
115
|
+
| dbt concept | Feldera concept | Description |
|
|
116
|
+
| ----------------------------- | ----------------- | --------------------------------------------------------------------------------------------------------------------------------- |
|
|
117
|
+
| `database` | _(unused)_ | Set to any string (e.g. `"default"`) |
|
|
118
|
+
| `schema` | Pipeline name | Each dbt schema maps to one [Feldera pipeline](https://docs.feldera.com/pipelines) (a continuously running SQL program) |
|
|
119
|
+
| `table` materialization | Input table | External data source (Kafka, HTTP, S3) |
|
|
120
|
+
| `view` materialization | View | SQL view inside the continuous pipeline (all views are incrementally maintained) |
|
|
121
|
+
| `view` + `stored: true` | Materialized view | Queryable via [ad-hoc queries](https://docs.feldera.com/sql/ad-hoc) |
|
|
122
|
+
| `seed` | Table + HTTP push | Schema registered, data pushed via HTTP ingress |
|
|
123
|
+
|
|
124
|
+
### Configuration options
|
|
125
|
+
|
|
126
|
+
| Option | Default | Description |
|
|
127
|
+
| --------------------- | ----------------------- | ---------------------------------------------------------------------------------------------------------------- |
|
|
128
|
+
| `host` | `http://localhost:8080` | Feldera API base URL |
|
|
129
|
+
| `api_key` | _(none)_ | API key for authenticated Feldera instances |
|
|
130
|
+
| `schema` | _(required)_ | Pipeline name in Feldera |
|
|
131
|
+
| `compilation_profile` | `dev` | SQL compilation profile: `dev` (fast compile), `unoptimized`, or `optimized` (best runtime performance) |
|
|
132
|
+
| `workers` | `4` | Number of pipeline worker threads |
|
|
133
|
+
| `timeout` | `300` | Max wait (seconds) for pipeline compilation + startup |
|
|
134
|
+
|
|
135
|
+
## Materializations
|
|
136
|
+
|
|
137
|
+
### `view` — Intermediate transform / Materialized view
|
|
138
|
+
|
|
139
|
+
Creates a `CREATE VIEW` in the pipeline. Use for intermediate transformations
|
|
140
|
+
that don't need to be queried directly or connected to an output.
|
|
141
|
+
|
|
142
|
+
```sql
|
|
143
|
+
-- models/orders_enriched.sql
|
|
144
|
+
{{ config(materialized='view') }}
|
|
145
|
+
|
|
146
|
+
SELECT o.id, o.total, c.name AS customer_name
|
|
147
|
+
FROM {{ ref('orders') }} o
|
|
148
|
+
JOIN {{ ref('customers') }} c ON o.customer_id = c.id
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
Set `stored: true` to promote to a `CREATE MATERIALIZED VIEW` — a view backed
|
|
152
|
+
by persistent storage, enabling ad-hoc queries:
|
|
153
|
+
|
|
154
|
+
```sql
|
|
155
|
+
-- models/sales_summary.sql
|
|
156
|
+
{{ config(
|
|
157
|
+
materialized='view',
|
|
158
|
+
stored=true,
|
|
159
|
+
connectors=[{'transport': {'name': 'my_delta_connector'}}]
|
|
160
|
+
) }}
|
|
161
|
+
|
|
162
|
+
SELECT
|
|
163
|
+
region,
|
|
164
|
+
product_category,
|
|
165
|
+
SUM(amount) AS total_sales,
|
|
166
|
+
COUNT(*) AS order_count
|
|
167
|
+
FROM {{ ref('orders') }}
|
|
168
|
+
GROUP BY region, product_category
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
> [!NOTE]
|
|
172
|
+
> Every view in Feldera is automatically incrementally maintained by the DBSP
|
|
173
|
+
> engine. When inputs change, only affected output rows are recomputed — no
|
|
174
|
+
> watermarks, merge logic, or special configuration required. The
|
|
175
|
+
> `stored` flag controls only whether the view's state is
|
|
176
|
+
> **queryable** (via ad-hoc queries); it does **not** change how the view is
|
|
177
|
+
> computed.
|
|
178
|
+
|
|
179
|
+
On `--full-refresh`, the pipeline is stopped, all stored state (including
|
|
180
|
+
connector offsets) is cleared, and the pipeline is redeployed from scratch.
|
|
181
|
+
|
|
182
|
+
### `table` — Input source
|
|
183
|
+
|
|
184
|
+
Creates a `CREATE TABLE` — an input source for external data ingress. The model
|
|
185
|
+
SQL defines the **column schema**, not a SELECT query. Attach connectors for
|
|
186
|
+
Kafka, S3, HTTP, or other input sources.
|
|
187
|
+
|
|
188
|
+
```sql
|
|
189
|
+
-- models/raw_events.sql
|
|
190
|
+
{{ config(
|
|
191
|
+
materialized='table',
|
|
192
|
+
connectors=[{
|
|
193
|
+
'transport': {
|
|
194
|
+
'name': 'kafka_in',
|
|
195
|
+
'config': {
|
|
196
|
+
'bootstrap.servers': 'redpanda:29092',
|
|
197
|
+
'topics': ['events']
|
|
198
|
+
}
|
|
199
|
+
},
|
|
200
|
+
'format': {'name': 'json'}
|
|
201
|
+
}]
|
|
202
|
+
) }}
|
|
203
|
+
|
|
204
|
+
event_id BIGINT NOT NULL,
|
|
205
|
+
event_type VARCHAR NOT NULL,
|
|
206
|
+
payload VARCHAR,
|
|
207
|
+
created_at TIMESTAMP NOT NULL
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
### `incremental` — Unsupported
|
|
211
|
+
|
|
212
|
+
> [!IMPORTANT]
|
|
213
|
+
> `dbt-feldera` does not support the `incremental` materialization because
|
|
214
|
+
> **all** views in Feldera are natively maintained incrementally by the DBSP
|
|
215
|
+
> engine.
|
|
216
|
+
>
|
|
217
|
+
> Use `materialized='view'` with `stored=true` instead:
|
|
218
|
+
>
|
|
219
|
+
> ```sql
|
|
220
|
+
> {{ config(materialized='view', stored=true) }}
|
|
221
|
+
> ```
|
|
222
|
+
|
|
223
|
+
### `streaming_pipeline` — Full pipeline as a single model
|
|
224
|
+
|
|
225
|
+
Deploys an entire Feldera pipeline as one dbt model. The model SQL **is** the
|
|
226
|
+
complete pipeline program — containing `CREATE TABLE` and `CREATE VIEW`
|
|
227
|
+
statements. Useful for complex multi-table, multi-view pipelines managed as a
|
|
228
|
+
single unit.
|
|
229
|
+
|
|
230
|
+
```sql
|
|
231
|
+
-- models/my_pipeline.sql
|
|
232
|
+
{{ config(materialized='streaming_pipeline') }}
|
|
233
|
+
|
|
234
|
+
CREATE TABLE orders (
|
|
235
|
+
id BIGINT NOT NULL,
|
|
236
|
+
customer_id BIGINT NOT NULL,
|
|
237
|
+
amount DECIMAL(10, 2) NOT NULL
|
|
238
|
+
);
|
|
239
|
+
|
|
240
|
+
CREATE TABLE customers (
|
|
241
|
+
id BIGINT NOT NULL,
|
|
242
|
+
name VARCHAR NOT NULL
|
|
243
|
+
);
|
|
244
|
+
|
|
245
|
+
CREATE MATERIALIZED VIEW enriched_orders AS
|
|
246
|
+
SELECT o.id, o.amount, c.name AS customer_name
|
|
247
|
+
FROM orders o
|
|
248
|
+
JOIN customers c ON o.customer_id = c.id;
|
|
249
|
+
```
|
|
250
|
+
|
|
251
|
+
### `seed` — Reference data via HTTP push
|
|
252
|
+
|
|
253
|
+
Seeds register a `CREATE TABLE` and push row data via Feldera's HTTP ingress
|
|
254
|
+
API after the pipeline is deployed. Use for small reference datasets (CSVs).
|
|
255
|
+
|
|
256
|
+
```bash
|
|
257
|
+
dbt seed # push seed data
|
|
258
|
+
dbt seed --full-refresh # stop, clear storage, redeploy, then push
|
|
259
|
+
```
|
|
260
|
+
|
|
261
|
+
### Summary
|
|
262
|
+
|
|
263
|
+
| Materialization | Feldera SQL | Best for |
|
|
264
|
+
| ---------------------------------- | -------------------------- | --------------------------------------------------------------------------- |
|
|
265
|
+
| `view` | `CREATE VIEW` | Incrementally maintained intermediate transforms |
|
|
266
|
+
| `view` + `stored: true` | `CREATE MATERIALIZED VIEW` | Queryable outputs |
|
|
267
|
+
| `table` | `CREATE TABLE` | External input sources (Kafka, S3, HTTP) |
|
|
268
|
+
| `streaming_pipeline` | Full program | Multi-table/view pipelines as a single unit |
|
|
269
|
+
| `seed` | `CREATE TABLE` + data push | Small reference datasets (HTTP ingress; any connector can also be attached) |
|
|
270
|
+
|
|
271
|
+
## Documentation
|
|
272
|
+
|
|
273
|
+
- **[Feldera documentation](https://docs.feldera.com/)** — platform docs, SQL reference, connectors
|
|
274
|
+
- [Pipelines (continuous queries)](https://docs.feldera.com/pipelines) — how Feldera compiles SQL into an incremental dataflow
|
|
275
|
+
- [Ad-hoc queries](https://docs.feldera.com/sql/ad-hoc) — one-shot DataFusion queries for debugging materialized state
|
|
276
|
+
- [Materialized tables and views](https://docs.feldera.com/sql/materialized) — prerequisite for ad-hoc query access
|
|
277
|
+
- **[dbt documentation](https://docs.getdbt.com/)** — general dbt usage and concepts
|
|
278
|
+
|
|
279
|
+
## Contributing
|
|
280
|
+
|
|
281
|
+
See [CONTRIBUTING.md](CONTRIBUTING.md) for development setup, testing, and
|
|
282
|
+
project layout.
|
|
283
|
+
|
|
284
|
+
## License
|
|
285
|
+
|
|
286
|
+
Apache-2.0 — see [LICENSE](../../LICENSE) for details.
|
|
@@ -0,0 +1,257 @@
|
|
|
1
|
+
# dbt-feldera
|
|
2
|
+
|
|
3
|
+
The [dbt](https://www.getdbt.com/) adapter for
|
|
4
|
+
[Feldera](https://www.feldera.com/).
|
|
5
|
+
|
|
6
|
+
**[dbt](https://www.getdbt.com/)** enables data analysts and engineers to
|
|
7
|
+
transform their data using the same practices that software engineers use to
|
|
8
|
+
build applications.
|
|
9
|
+
|
|
10
|
+
**[Feldera](https://www.feldera.com/)** is a streaming SQL engine powered by
|
|
11
|
+
the DBSP incremental computation engine. It automatically incrementalizes
|
|
12
|
+
_every_ SQL query without watermarks, scans, or `MERGE`. When input data
|
|
13
|
+
changes, only affected output rows are recomputed.
|
|
14
|
+
|
|
15
|
+
> [!IMPORTANT]
|
|
16
|
+
> **This adapter deploys
|
|
17
|
+
> [continuous pipelines](https://docs.feldera.com/pipelines), not
|
|
18
|
+
> [ad-hoc queries](https://docs.feldera.com/sql/ad-hoc).**
|
|
19
|
+
>
|
|
20
|
+
> Feldera supports two modes of query execution:
|
|
21
|
+
>
|
|
22
|
+
> - **Continuous pipelines** compile SQL into an incremental dataflow that runs
|
|
23
|
+
> indefinitely, processing every input change as it arrives in near real-time.
|
|
24
|
+
> - **Ad-hoc queries** are one-shot batch queries executed by
|
|
25
|
+
> [DataFusion](https://datafusion.apache.org/) against the state of
|
|
26
|
+
> [materialized tables and views](https://docs.feldera.com/sql/materialized).
|
|
27
|
+
> They exist primarily for development and debugging.
|
|
28
|
+
>
|
|
29
|
+
> When you run `dbt run`, this adapter assembles your models into a Feldera
|
|
30
|
+
> pipeline program, compiles it, and **starts a continuously running pipeline**.
|
|
31
|
+
> The pipeline keeps processing input changes and updating outputs until it is
|
|
32
|
+
> explicitly stopped. This differs from typical batch-oriented dbt adapters where `dbt run` executes
|
|
33
|
+
> a query once, processes a batch of data and exits.
|
|
34
|
+
|
|
35
|
+
## Key features
|
|
36
|
+
|
|
37
|
+
- **Automatic incremental view maintenance (IVM)** — Feldera's DBSP engine
|
|
38
|
+
incrementalizes any SQL query out of the box. No manual merge logic or
|
|
39
|
+
watermark tuning required.
|
|
40
|
+
- **Continuous pipeline deployment** — `dbt run` compiles and starts a
|
|
41
|
+
long-running Feldera pipeline; it does not execute one-shot queries.
|
|
42
|
+
- **Connector integration** — attach Kafka, Delta Lake, S3, and HTTP
|
|
43
|
+
connectors directly to models via configuration.
|
|
44
|
+
- **Easy setup** — pure Python adapter with no ODBC driver needed.
|
|
45
|
+
|
|
46
|
+
## Installation
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
pip install dbt-feldera
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
or with [uv](https://docs.astral.sh/uv/):
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
uv add dbt-feldera
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
Requires Python 3.10+ and dbt-core ~1.9.
|
|
59
|
+
|
|
60
|
+
## Configuration
|
|
61
|
+
|
|
62
|
+
Add a Feldera target to your `profiles.yml`:
|
|
63
|
+
|
|
64
|
+
```yaml
|
|
65
|
+
my_project:
|
|
66
|
+
target: dev
|
|
67
|
+
outputs:
|
|
68
|
+
dev:
|
|
69
|
+
type: feldera
|
|
70
|
+
host: "http://localhost:8080"
|
|
71
|
+
api_key: "apikey:..." # optional — for authenticated instances
|
|
72
|
+
database: "default"
|
|
73
|
+
schema: "my_pipeline" # maps to the Feldera pipeline name
|
|
74
|
+
compilation_profile: dev # dev | unoptimized | optimized
|
|
75
|
+
workers: 4
|
|
76
|
+
timeout: 300
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
### Concept mapping
|
|
80
|
+
|
|
81
|
+
Feldera uses different terminology than traditional databases. Here's how dbt
|
|
82
|
+
concepts map to Feldera. Every materialization contributes SQL to a
|
|
83
|
+
**continuously running pipeline** — nothing is executed as a one-shot batch
|
|
84
|
+
query.
|
|
85
|
+
|
|
86
|
+
| dbt concept | Feldera concept | Description |
|
|
87
|
+
| ----------------------------- | ----------------- | --------------------------------------------------------------------------------------------------------------------------------- |
|
|
88
|
+
| `database` | _(unused)_ | Set to any string (e.g. `"default"`) |
|
|
89
|
+
| `schema` | Pipeline name | Each dbt schema maps to one [Feldera pipeline](https://docs.feldera.com/pipelines) (a continuously running SQL program) |
|
|
90
|
+
| `table` materialization | Input table | External data source (Kafka, HTTP, S3) |
|
|
91
|
+
| `view` materialization | View | SQL view inside the continuous pipeline (all views are incrementally maintained) |
|
|
92
|
+
| `view` + `stored: true` | Materialized view | Queryable via [ad-hoc queries](https://docs.feldera.com/sql/ad-hoc) |
|
|
93
|
+
| `seed` | Table + HTTP push | Schema registered, data pushed via HTTP ingress |
|
|
94
|
+
|
|
95
|
+
### Configuration options
|
|
96
|
+
|
|
97
|
+
| Option | Default | Description |
|
|
98
|
+
| --------------------- | ----------------------- | ---------------------------------------------------------------------------------------------------------------- |
|
|
99
|
+
| `host` | `http://localhost:8080` | Feldera API base URL |
|
|
100
|
+
| `api_key` | _(none)_ | API key for authenticated Feldera instances |
|
|
101
|
+
| `schema` | _(required)_ | Pipeline name in Feldera |
|
|
102
|
+
| `compilation_profile` | `dev` | SQL compilation profile: `dev` (fast compile), `unoptimized`, or `optimized` (best runtime performance) |
|
|
103
|
+
| `workers` | `4` | Number of pipeline worker threads |
|
|
104
|
+
| `timeout` | `300` | Max wait (seconds) for pipeline compilation + startup |
|
|
105
|
+
|
|
106
|
+
## Materializations
|
|
107
|
+
|
|
108
|
+
### `view` — Intermediate transform / Materialized view
|
|
109
|
+
|
|
110
|
+
Creates a `CREATE VIEW` in the pipeline. Use for intermediate transformations
|
|
111
|
+
that don't need to be queried directly or connected to an output.
|
|
112
|
+
|
|
113
|
+
```sql
|
|
114
|
+
-- models/orders_enriched.sql
|
|
115
|
+
{{ config(materialized='view') }}
|
|
116
|
+
|
|
117
|
+
SELECT o.id, o.total, c.name AS customer_name
|
|
118
|
+
FROM {{ ref('orders') }} o
|
|
119
|
+
JOIN {{ ref('customers') }} c ON o.customer_id = c.id
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
Set `stored: true` to promote to a `CREATE MATERIALIZED VIEW` — a view backed
|
|
123
|
+
by persistent storage, enabling ad-hoc queries:
|
|
124
|
+
|
|
125
|
+
```sql
|
|
126
|
+
-- models/sales_summary.sql
|
|
127
|
+
{{ config(
|
|
128
|
+
materialized='view',
|
|
129
|
+
stored=true,
|
|
130
|
+
connectors=[{'transport': {'name': 'my_delta_connector'}}]
|
|
131
|
+
) }}
|
|
132
|
+
|
|
133
|
+
SELECT
|
|
134
|
+
region,
|
|
135
|
+
product_category,
|
|
136
|
+
SUM(amount) AS total_sales,
|
|
137
|
+
COUNT(*) AS order_count
|
|
138
|
+
FROM {{ ref('orders') }}
|
|
139
|
+
GROUP BY region, product_category
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
> [!NOTE]
|
|
143
|
+
> Every view in Feldera is automatically incrementally maintained by the DBSP
|
|
144
|
+
> engine. When inputs change, only affected output rows are recomputed — no
|
|
145
|
+
> watermarks, merge logic, or special configuration required. The
|
|
146
|
+
> `stored` flag controls only whether the view's state is
|
|
147
|
+
> **queryable** (via ad-hoc queries); it does **not** change how the view is
|
|
148
|
+
> computed.
|
|
149
|
+
|
|
150
|
+
On `--full-refresh`, the pipeline is stopped, all stored state (including
|
|
151
|
+
connector offsets) is cleared, and the pipeline is redeployed from scratch.
|
|
152
|
+
|
|
153
|
+
### `table` — Input source
|
|
154
|
+
|
|
155
|
+
Creates a `CREATE TABLE` — an input source for external data ingress. The model
|
|
156
|
+
SQL defines the **column schema**, not a SELECT query. Attach connectors for
|
|
157
|
+
Kafka, S3, HTTP, or other input sources.
|
|
158
|
+
|
|
159
|
+
```sql
|
|
160
|
+
-- models/raw_events.sql
|
|
161
|
+
{{ config(
|
|
162
|
+
materialized='table',
|
|
163
|
+
connectors=[{
|
|
164
|
+
'transport': {
|
|
165
|
+
'name': 'kafka_in',
|
|
166
|
+
'config': {
|
|
167
|
+
'bootstrap.servers': 'redpanda:29092',
|
|
168
|
+
'topics': ['events']
|
|
169
|
+
}
|
|
170
|
+
},
|
|
171
|
+
'format': {'name': 'json'}
|
|
172
|
+
}]
|
|
173
|
+
) }}
|
|
174
|
+
|
|
175
|
+
event_id BIGINT NOT NULL,
|
|
176
|
+
event_type VARCHAR NOT NULL,
|
|
177
|
+
payload VARCHAR,
|
|
178
|
+
created_at TIMESTAMP NOT NULL
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
### `incremental` — Unsupported
|
|
182
|
+
|
|
183
|
+
> [!IMPORTANT]
|
|
184
|
+
> `dbt-feldera` does not support the `incremental` materialization because
|
|
185
|
+
> **all** views in Feldera are natively maintained incrementally by the DBSP
|
|
186
|
+
> engine.
|
|
187
|
+
>
|
|
188
|
+
> Use `materialized='view'` with `stored=true` instead:
|
|
189
|
+
>
|
|
190
|
+
> ```sql
|
|
191
|
+
> {{ config(materialized='view', stored=true) }}
|
|
192
|
+
> ```
|
|
193
|
+
|
|
194
|
+
### `streaming_pipeline` — Full pipeline as a single model
|
|
195
|
+
|
|
196
|
+
Deploys an entire Feldera pipeline as one dbt model. The model SQL **is** the
|
|
197
|
+
complete pipeline program — containing `CREATE TABLE` and `CREATE VIEW`
|
|
198
|
+
statements. Useful for complex multi-table, multi-view pipelines managed as a
|
|
199
|
+
single unit.
|
|
200
|
+
|
|
201
|
+
```sql
|
|
202
|
+
-- models/my_pipeline.sql
|
|
203
|
+
{{ config(materialized='streaming_pipeline') }}
|
|
204
|
+
|
|
205
|
+
CREATE TABLE orders (
|
|
206
|
+
id BIGINT NOT NULL,
|
|
207
|
+
customer_id BIGINT NOT NULL,
|
|
208
|
+
amount DECIMAL(10, 2) NOT NULL
|
|
209
|
+
);
|
|
210
|
+
|
|
211
|
+
CREATE TABLE customers (
|
|
212
|
+
id BIGINT NOT NULL,
|
|
213
|
+
name VARCHAR NOT NULL
|
|
214
|
+
);
|
|
215
|
+
|
|
216
|
+
CREATE MATERIALIZED VIEW enriched_orders AS
|
|
217
|
+
SELECT o.id, o.amount, c.name AS customer_name
|
|
218
|
+
FROM orders o
|
|
219
|
+
JOIN customers c ON o.customer_id = c.id;
|
|
220
|
+
```
|
|
221
|
+
|
|
222
|
+
### `seed` — Reference data via HTTP push
|
|
223
|
+
|
|
224
|
+
Seeds register a `CREATE TABLE` and push row data via Feldera's HTTP ingress
|
|
225
|
+
API after the pipeline is deployed. Use for small reference datasets (CSVs).
|
|
226
|
+
|
|
227
|
+
```bash
|
|
228
|
+
dbt seed # push seed data
|
|
229
|
+
dbt seed --full-refresh # stop, clear storage, redeploy, then push
|
|
230
|
+
```
|
|
231
|
+
|
|
232
|
+
### Summary
|
|
233
|
+
|
|
234
|
+
| Materialization | Feldera SQL | Best for |
|
|
235
|
+
| ---------------------------------- | -------------------------- | --------------------------------------------------------------------------- |
|
|
236
|
+
| `view` | `CREATE VIEW` | Incrementally maintained intermediate transforms |
|
|
237
|
+
| `view` + `stored: true` | `CREATE MATERIALIZED VIEW` | Queryable outputs |
|
|
238
|
+
| `table` | `CREATE TABLE` | External input sources (Kafka, S3, HTTP) |
|
|
239
|
+
| `streaming_pipeline` | Full program | Multi-table/view pipelines as a single unit |
|
|
240
|
+
| `seed` | `CREATE TABLE` + data push | Small reference datasets (HTTP ingress; any connector can also be attached) |
|
|
241
|
+
|
|
242
|
+
## Documentation
|
|
243
|
+
|
|
244
|
+
- **[Feldera documentation](https://docs.feldera.com/)** — platform docs, SQL reference, connectors
|
|
245
|
+
- [Pipelines (continuous queries)](https://docs.feldera.com/pipelines) — how Feldera compiles SQL into an incremental dataflow
|
|
246
|
+
- [Ad-hoc queries](https://docs.feldera.com/sql/ad-hoc) — one-shot DataFusion queries for debugging materialized state
|
|
247
|
+
- [Materialized tables and views](https://docs.feldera.com/sql/materialized) — prerequisite for ad-hoc query access
|
|
248
|
+
- **[dbt documentation](https://docs.getdbt.com/)** — general dbt usage and concepts
|
|
249
|
+
|
|
250
|
+
## Contributing
|
|
251
|
+
|
|
252
|
+
See [CONTRIBUTING.md](CONTRIBUTING.md) for development setup, testing, and
|
|
253
|
+
project layout.
|
|
254
|
+
|
|
255
|
+
## License
|
|
256
|
+
|
|
257
|
+
Apache-2.0 — see [LICENSE](../../LICENSE) for details.
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
from dbt.adapters.base import AdapterPlugin
|
|
4
|
+
from dbt.adapters.feldera.__version__ import version as version # noqa: PLC0414 — re-exported as public API
|
|
5
|
+
|
|
6
|
+
__version__ = version
|
|
7
|
+
from dbt.adapters.feldera.connections import FelderaConnectionManager as FelderaConnectionManager
|
|
8
|
+
from dbt.adapters.feldera.credentials import FelderaCredentials
|
|
9
|
+
from dbt.adapters.feldera.impl import FelderaAdapter
|
|
10
|
+
|
|
11
|
+
Plugin = AdapterPlugin(
|
|
12
|
+
adapter=FelderaAdapter,
|
|
13
|
+
credentials=FelderaCredentials,
|
|
14
|
+
include_path=os.path.join(os.path.dirname(__file__), "..", "..", "include", "feldera"),
|
|
15
|
+
)
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
from importlib.metadata import PackageNotFoundError
|
|
2
|
+
from importlib.metadata import version as _pkg_version
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def determine_client_version() -> str:
|
|
6
|
+
"""Return the installed dbt-feldera package version.
|
|
7
|
+
|
|
8
|
+
Uses ``importlib.metadata`` so the value always reflects
|
|
9
|
+
what is declared in ``pyproject.toml`` at install time,
|
|
10
|
+
exactly like the feldera Python client does.
|
|
11
|
+
"""
|
|
12
|
+
try:
|
|
13
|
+
return _pkg_version("dbt-feldera")
|
|
14
|
+
except PackageNotFoundError:
|
|
15
|
+
return "unknown"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
version = determine_client_version()
|