duckbill 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- duckbill-0.1.0/.gitignore +11 -0
- duckbill-0.1.0/PKG-INFO +18 -0
- duckbill-0.1.0/README.md +316 -0
- duckbill-0.1.0/duckbill/__init__.py +15 -0
- duckbill-0.1.0/duckbill/backends/__init__.py +48 -0
- duckbill-0.1.0/duckbill/backends/base.py +262 -0
- duckbill-0.1.0/duckbill/backends/duckdb.py +96 -0
- duckbill-0.1.0/duckbill/backends/mysql.py +69 -0
- duckbill-0.1.0/duckbill/backends/postgres.py +61 -0
- duckbill-0.1.0/duckbill/backends/snowflake.py +82 -0
- duckbill-0.1.0/duckbill/backends/sqlite.py +73 -0
- duckbill-0.1.0/duckbill/bundle.py +73 -0
- duckbill-0.1.0/duckbill/cli.py +70 -0
- duckbill-0.1.0/duckbill/core.py +188 -0
- duckbill-0.1.0/duckbill/docs.py +21 -0
- duckbill-0.1.0/duckbill/loader.py +44 -0
- duckbill-0.1.0/duckbill/page.py +1179 -0
- duckbill-0.1.0/duckbill/prune.py +157 -0
- duckbill-0.1.0/duckbill/questions.py +57 -0
- duckbill-0.1.0/duckbill/server.py +162 -0
- duckbill-0.1.0/duckbill/server_bundle.py +581 -0
- duckbill-0.1.0/examples/rds_slowq.py +377 -0
- duckbill-0.1.0/pyproject.toml +22 -0
- duckbill-0.1.0/tests/conftest.py +2 -0
- duckbill-0.1.0/tests/test_backends_factory.py +42 -0
- duckbill-0.1.0/tests/test_backends_mysql.py +62 -0
- duckbill-0.1.0/tests/test_backends_postgres.py +68 -0
- duckbill-0.1.0/tests/test_backends_scan.py +101 -0
- duckbill-0.1.0/tests/test_backends_snowflake.py +70 -0
- duckbill-0.1.0/tests/test_backends_sqlite.py +62 -0
- duckbill-0.1.0/tests/test_bundle_cli.py +56 -0
- duckbill-0.1.0/tests/test_bundle_guard.py +14 -0
- duckbill-0.1.0/tests/test_duckbill.py +311 -0
- duckbill-0.1.0/tests/test_prune.py +142 -0
- duckbill-0.1.0/tests/test_server_bundle.py +230 -0
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
.venv/
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.pyc
|
|
4
|
+
*.egg-info/
|
|
5
|
+
.pytest_cache/
|
|
6
|
+
examples/questions/
|
|
7
|
+
.vendor/
|
|
8
|
+
# standalone bundles are regenerable build artifacts (duckbill bundle -> a single
|
|
9
|
+
# uv-run .py); they embed the warehouse data and are too large to track.
|
|
10
|
+
/dashboard.py
|
|
11
|
+
/slowq.py
|
duckbill-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: duckbill
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Live, query-backed dashboards over a local DuckDB, declared as Python data.
|
|
5
|
+
Requires-Python: >=3.9
|
|
6
|
+
Requires-Dist: duckdb>=0.10
|
|
7
|
+
Requires-Dist: pytz
|
|
8
|
+
Requires-Dist: sqlglot>=20
|
|
9
|
+
Provides-Extra: all
|
|
10
|
+
Requires-Dist: psycopg[binary]>=3; extra == 'all'
|
|
11
|
+
Requires-Dist: pymysql; extra == 'all'
|
|
12
|
+
Requires-Dist: snowflake-connector-python; extra == 'all'
|
|
13
|
+
Provides-Extra: mysql
|
|
14
|
+
Requires-Dist: pymysql; extra == 'mysql'
|
|
15
|
+
Provides-Extra: postgres
|
|
16
|
+
Requires-Dist: psycopg[binary]>=3; extra == 'postgres'
|
|
17
|
+
Provides-Extra: snowflake
|
|
18
|
+
Requires-Dist: snowflake-connector-python; extra == 'snowflake'
|
duckbill-0.1.0/README.md
ADDED
|
@@ -0,0 +1,316 @@
|
|
|
1
|
+
# duckbill
|
|
2
|
+
|
|
3
|
+
Live, query-backed dashboards declared as Python data. A dashboard is a Python
|
|
4
|
+
file that defines charts as dicts; the server runs each chart's SQL on every
|
|
5
|
+
request, so the page is live -- it re-queries on interaction and reflects the
|
|
6
|
+
current warehouse. No build step, two dependencies (`duckdb` + `sqlglot`); network
|
|
7
|
+
backends are opt-in extras. Single process.
|
|
8
|
+
|
|
9
|
+
> Part of **duckpond**, a two-part local-DuckDB toolkit. **duckbill** (this) serves
|
|
10
|
+
> and shares a warehouse as a live dashboard; its sibling **ducktail** pulls
|
|
11
|
+
> scattered sources into one. duckbill works against any DuckDB/SQLite store --
|
|
12
|
+
> ducktail-built or not.
|
|
13
|
+
|
|
14
|
+
```
|
|
15
|
+
pip install -e .
|
|
16
|
+
duckbill serve examples/rds_slowq.py --db /path/to/warehouse.duckdb
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
## Backends
|
|
20
|
+
|
|
21
|
+
The `--db` flag accepts a DSN or a bare file path:
|
|
22
|
+
|
|
23
|
+
| DSN form | backend |
|
|
24
|
+
|---|---|
|
|
25
|
+
| `/path/to/file.duckdb` or `duckdb:///path/to/file.duckdb` | DuckDB (local file) |
|
|
26
|
+
| `sqlite:///path/to/file.db` | SQLite (local file) |
|
|
27
|
+
| `postgresql://user:pass@host/db` | Postgres |
|
|
28
|
+
| `mysql://user:pass@host/db` | MySQL |
|
|
29
|
+
| `snowflake://user@account/db/schema?warehouse=W&role=R` | Snowflake |
|
|
30
|
+
|
|
31
|
+
Secret values (passwords, tokens) should not be written into command lines or
|
|
32
|
+
dashboard files. Use `${VAR}` in the DSN; duckbill expands it from the
|
|
33
|
+
environment before connecting:
|
|
34
|
+
|
|
35
|
+
```
|
|
36
|
+
duckbill serve dash.py --db "postgresql://ro_user:${DB_PASS}@db-host:5432/warehouse"
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
Network backends (Postgres, MySQL, Snowflake) are opt-in extras; the base
|
|
40
|
+
install only pulls in `duckdb` and `sqlglot`:
|
|
41
|
+
|
|
42
|
+
```
|
|
43
|
+
pip install duckbill[postgres] # psycopg
|
|
44
|
+
pip install duckbill[mysql] # pymysql
|
|
45
|
+
pip install duckbill[snowflake] # snowflake-connector-python
|
|
46
|
+
pip install duckbill[all] # all three
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
All connections are read-only. For DuckDB and SQLite that is enforced at the
|
|
50
|
+
driver level. For network backends, use a read-only role or user -- Snowflake
|
|
51
|
+
has no session-level read-only toggle, so this is especially important there.
|
|
52
|
+
|
|
53
|
+
`--pool` sets the connection pool size for network backends (default 4); it has
|
|
54
|
+
no effect on DuckDB or SQLite.
|
|
55
|
+
|
|
56
|
+
`$name` parameter binding is uniform across all backends -- the server translates
|
|
57
|
+
the dashboard's `$name` placeholders to the native paramstyle before executing.
|
|
58
|
+
SQL dialect (functions, casts, date arithmetic) is the author's responsibility:
|
|
59
|
+
write SQL that matches the backend you deploy against.
|
|
60
|
+
|
|
61
|
+
Bundles (`duckbill bundle`) are DuckDB/SQLite-only. A bundle embeds the warehouse
|
|
62
|
+
tables as Parquet in a self-contained `uv run` server script; network backends are
|
|
63
|
+
serve-only (they can't export to Parquet). A bundle never contains credentials --
|
|
64
|
+
the DSN is used at build time to export data and is not written into the output.
|
|
65
|
+
|
|
66
|
+
## Declaring a dashboard
|
|
67
|
+
|
|
68
|
+
A dashboard module defines `charts`, and optionally `params`, `title`, and a
|
|
69
|
+
`readme` (see [Documentation](#documentation)):
|
|
70
|
+
|
|
71
|
+
```python
|
|
72
|
+
title = "my warehouse"
|
|
73
|
+
|
|
74
|
+
params = [
|
|
75
|
+
{"name": "window", "control": "timespan", "default": "31d",
|
|
76
|
+
"presets": ["6h", "24h", "7d", "31d"]}, # binds $start and $end
|
|
77
|
+
{"name": "kind", "control": "select", "default": "all",
|
|
78
|
+
"choices_sql": "SELECT DISTINCT kind FROM warehouse.t ORDER BY 1"},
|
|
79
|
+
{"name": "id", "default": "", "control": "none"}, # set by a drill click
|
|
80
|
+
]
|
|
81
|
+
|
|
82
|
+
charts = [
|
|
83
|
+
{"id": "volume", "section": "Overview", "title": "Volume", "type": "line",
|
|
84
|
+
"brush": "timespan", # drag the x-axis to zoom
|
|
85
|
+
"sql": """SELECT to_timestamp(ts) AS t, n FROM warehouse.t
|
|
86
|
+
WHERE to_timestamp(ts) >= $start::TIMESTAMPTZ
|
|
87
|
+
AND to_timestamp(ts) < $end::TIMESTAMPTZ
|
|
88
|
+
AND ($kind = 'all' OR kind = $kind) ORDER BY t""",
|
|
89
|
+
"encoding": {"x": {"field": "t", "type": "temporal"},
|
|
90
|
+
"y": {"field": "n", "type": "quantitative"}}},
|
|
91
|
+
]
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
### Charts
|
|
95
|
+
|
|
96
|
+
| key | meaning |
|
|
97
|
+
|------------|---------|
|
|
98
|
+
| `id` | unique identifier (required) |
|
|
99
|
+
| `title` | card heading (required) |
|
|
100
|
+
| `type` | `line` / `bar` / `stacked-bar` / `area` / `point` / `table` / `metric` / `leaderboard` / `spec` (required) |
|
|
101
|
+
| (table col) | a `table` query column named `_*` is data-only: kept for drill values, not displayed |
|
|
102
|
+
| `sql` | the query; may reference `$param` (required) |
|
|
103
|
+
| `section` | groups cards under a heading (default `Overview`) |
|
|
104
|
+
| `encoding` | Vega-Lite encoding for the built-in types |
|
|
105
|
+
| `spec` | raw Vega-Lite spec -- the escape hatch, used when `type` is `spec` |
|
|
106
|
+
| `drill` | bars: `{"param": p, "field": col}` -- click a mark to open param `p`'s detail page. tables: `{column: param}` or `{column: {param, value}}` -- click a cell to drill (a `value` column, or one named `_*`, supplies the param value when it differs from the displayed text) |
|
|
107
|
+
| `brush` | `"timespan"`: drag the x-axis to set the window |
|
|
108
|
+
| `markers` | `true` (all marker sets) or `["id", ...]`: overlay marker rules on this chart |
|
|
109
|
+
| `span` | `"full"` -- the card spans the whole row; an integer `N` -- it spans `N` columns (clamped to the columns that fit, so it degrades to full width on a narrow window) |
|
|
110
|
+
|
|
111
|
+
`sql` is bound, not interpolated -- the server passes `$param` values to DuckDB as
|
|
112
|
+
parameters, so control and drill input is safe. The dashboard module is your own
|
|
113
|
+
trusted code; its SQL runs as written.
|
|
114
|
+
|
|
115
|
+
### Metric cards
|
|
116
|
+
|
|
117
|
+
A `metric` chart is a strip of hero figures rather than a plot. Its SQL returns
|
|
118
|
+
**one row**; each column becomes a figure -- the value shown large and compacted
|
|
119
|
+
(`30.2k`, `1.2M`), the column name as the label. Use a quoted alias to control
|
|
120
|
+
the label, and pair it with `"span": "full"` for a hero row across the top:
|
|
121
|
+
|
|
122
|
+
```python
|
|
123
|
+
{"id": "summary", "section": "Overview", "title": "Slow-log summary", "type": "metric", "span": "full",
|
|
124
|
+
"sql": f"""SELECT count(*) AS "slow entries", round(sum(query_time_s)) AS "total query time (s)",
|
|
125
|
+
count(DISTINCT fingerprint_hash) AS "fingerprints"
|
|
126
|
+
FROM warehouse.entries WHERE {{window}}"""}
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
When the query references the timespan (`$start`/`$end`), each numeric figure
|
|
130
|
+
also shows its change versus the previous equal-length window, as a signed
|
|
131
|
+
percent colored by whether the move is good or bad. `good` declares the good
|
|
132
|
+
direction -- `"up"` (higher is better), `"down"` (lower is better), or
|
|
133
|
+
`"neutral"` (no judgment, gray) -- either for all figures or per figure:
|
|
134
|
+
|
|
135
|
+
```python
|
|
136
|
+
"good": {"slow entries": "down", "total query time (s)": "down", "fingerprints": "neutral"}
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
Figures not listed default to `"up"`. The delta is omitted when there's no prior
|
|
140
|
+
window or the previous value is zero/absent.
|
|
141
|
+
|
|
142
|
+
A metric may also carry a `spark` query -- SQL returning a temporal column plus
|
|
143
|
+
one column per figure (aliases matched by name) -- and each figure gets an inline
|
|
144
|
+
sparkline of that trend:
|
|
145
|
+
|
|
146
|
+
```python
|
|
147
|
+
"spark": f"""SELECT date_trunc('hour', t) AS hour, count(*) AS "slow entries", ...
|
|
148
|
+
FROM warehouse.entries WHERE {{window}} GROUP BY 1 ORDER BY 1"""
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
### Leaderboards
|
|
152
|
+
|
|
153
|
+
A `leaderboard` is a ranked list for top-N-by-dimension: SQL returns rows whose
|
|
154
|
+
first text column is the label and first numeric column the value, drawn with an
|
|
155
|
+
inline magnitude bar behind each value. It drills like a bar (`{"param", "field"}`,
|
|
156
|
+
clicking a row navigates to that param's detail page; a `_`-prefixed column can
|
|
157
|
+
carry a hidden drill value, e.g. show readable text but drill on a hash). Denser
|
|
158
|
+
and more scannable than a bar chart for a long ranking.
|
|
159
|
+
|
|
160
|
+
### Compare
|
|
161
|
+
|
|
162
|
+
The **Compare** toggle (in the timespan control) overlays the previous
|
|
163
|
+
equal-length window: a faded previous-period series on single-series time charts,
|
|
164
|
+
and a `Δ%` per row on windowed leaderboards. The prev/next arrows step the window
|
|
165
|
+
back/forward by its own length.
|
|
166
|
+
|
|
167
|
+
### Enlarge and explore
|
|
168
|
+
|
|
169
|
+
Every card has an expand icon (top-right, on hover). Clicking it opens the chart
|
|
170
|
+
in a large modal where you can flip between the **Chart** and the raw **Data**
|
|
171
|
+
table, and **Open in Ask** to drop the chart's query (with the current params
|
|
172
|
+
substituted) into the Ask workbench for ad-hoc exploration.
|
|
173
|
+
|
|
174
|
+
### Params
|
|
175
|
+
|
|
176
|
+
A param drives a control and binds into SQL by its `name`. Controls:
|
|
177
|
+
|
|
178
|
+
- `select` -- a dropdown; options come from `choices` (a list) or `choices_sql`.
|
|
179
|
+
- `timespan` -- a time-range picker (presets + custom from/to + brush-to-zoom).
|
|
180
|
+
It binds `$start` and `$end` (ISO timestamps), not a param of its own name.
|
|
181
|
+
- `none` -- no control; the param is set only by a drill click.
|
|
182
|
+
|
|
183
|
+
`type` is `str` (default), `int`, or `float`.
|
|
184
|
+
|
|
185
|
+
### Markers
|
|
186
|
+
|
|
187
|
+
A `markers` list declares overlay queries -- the canonical case is deploy
|
|
188
|
+
markers, a recurring motif. Each marker is `{"id", "sql", "field"}` plus optional
|
|
189
|
+
`label` and `color`; the `sql` returns timestamps (referencing `$param` like any
|
|
190
|
+
query), and any chart with `markers: true` gets those timestamps drawn as rules.
|
|
191
|
+
|
|
192
|
+
```python
|
|
193
|
+
markers = [
|
|
194
|
+
{"id": "deploys", "field": "t", "label": "label", "color": "#b9c2cc",
|
|
195
|
+
"sql": "SELECT to_timestamp(build_time) AS t, version AS label FROM warehouse.deploys "
|
|
196
|
+
"WHERE to_timestamp(build_time) >= $start::TIMESTAMPTZ "
|
|
197
|
+
" AND to_timestamp(build_time) < $end::TIMESTAMPTZ"},
|
|
198
|
+
]
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
Window the marker query on `$start`/`$end` so rules stay inside the chart's time
|
|
202
|
+
axis. Markers re-run when the window changes.
|
|
203
|
+
|
|
204
|
+
## Documentation
|
|
205
|
+
|
|
206
|
+
A warehouse documents itself from two sources, both surfaced in the header's
|
|
207
|
+
**About** tab and by `duckbill docs`:
|
|
208
|
+
|
|
209
|
+
- the dashboard's `readme` -- a Markdown string for the narrative: what the
|
|
210
|
+
warehouse is, how the pieces fit, how to read the dashboard.
|
|
211
|
+
- DuckDB `COMMENT`s -- per-table and per-column descriptions that live in the
|
|
212
|
+
warehouse catalog, so the schema reference is generated, not hand-maintained.
|
|
213
|
+
|
|
214
|
+
```python
|
|
215
|
+
readme = """\
|
|
216
|
+
This warehouse stitches together the slow log, Performance Insights, and ALB
|
|
217
|
+
access logs so a slow query can be traced out to the request that issued it.
|
|
218
|
+
|
|
219
|
+
Times are stored as epoch seconds (`logged_at`); the charts convert with
|
|
220
|
+
`to_timestamp`.
|
|
221
|
+
"""
|
|
222
|
+
```
|
|
223
|
+
|
|
224
|
+
Set the `COMMENT`s where the warehouse is built, so they survive a rebuild:
|
|
225
|
+
|
|
226
|
+
```sql
|
|
227
|
+
COMMENT ON TABLE warehouse.entries IS 'One row per slow-query log entry, fingerprinted.';
|
|
228
|
+
COMMENT ON COLUMN warehouse.entries.logged_at IS 'When the statement was logged, epoch seconds (UTC).';
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
The About view renders the `readme` and a schema reference (each table's comment
|
|
232
|
+
and its columns with types and comments); the Ask sidebar hangs the same comments
|
|
233
|
+
off tables and columns as tooltips. To emit a `WAREHOUSE.md` for the repo:
|
|
234
|
+
|
|
235
|
+
```
|
|
236
|
+
duckbill docs examples/rds_slowq.py --db warehouse.duckdb -o WAREHOUSE.md
|
|
237
|
+
```
|
|
238
|
+
|
|
239
|
+
## Ask (ad-hoc queries)
|
|
240
|
+
|
|
241
|
+
The header's **Ask** tab is a query workbench, like Metabase's native query: a
|
|
242
|
+
schema sidebar (click to insert), a CodeMirror SQL editor with schema-aware
|
|
243
|
+
autocomplete, and a Run button (⌘/Ctrl+Enter). Results show as a table, or pick a
|
|
244
|
+
chart type + x/y/color to visualize through the same chart engine (tooltips,
|
|
245
|
+
hover crosshair, interactive legend included). The query is read-only -- the
|
|
246
|
+
connection is `read_only`, so it's SELECT-only -- and results are row-capped.
|
|
247
|
+
|
|
248
|
+
**Save** names a question and writes it to a file -- one JSON per question under
|
|
249
|
+
`questions/` next to the dashboard (override with `--questions <dir>`), so they're
|
|
250
|
+
git-friendly and hand-editable. The **Saved** dropdown reopens or deletes them,
|
|
251
|
+
and each has a stable link (`#q=<slug>`). **Copy link** is the no-save path: it
|
|
252
|
+
encodes the SQL and chart choice into the URL (`#ask=…`).
|
|
253
|
+
|
|
254
|
+
## Standalone bundle
|
|
255
|
+
|
|
256
|
+
Wrap a dashboard and its data into one self-contained file for sharing or
|
|
257
|
+
archiving:
|
|
258
|
+
|
|
259
|
+
```
|
|
260
|
+
duckbill bundle examples/rds_slowq.py --db warehouse.duckdb -o dashboard.py
|
|
261
|
+
# -> dashboard.py (run it with: uv run dashboard.py)
|
|
262
|
+
```
|
|
263
|
+
|
|
264
|
+
`bundle` prunes the warehouse first -- only the tables and columns the charts
|
|
265
|
+
actually reference are included -- exports them to zstd Parquet, and embeds that
|
|
266
|
+
(b85) in a single `uv run`-able Python script. The recipient runs `uv run
|
|
267
|
+
dashboard.py`; uv resolves the deps from the PEP 723 header, the script extracts
|
|
268
|
+
its embedded Parquet to a content-keyed temp dir on first run (later runs reuse
|
|
269
|
+
it), an in-memory DuckDB exposes each table as a view `warehouse.<table>`, and a
|
|
270
|
+
browser renders the dashboard. No duckbill install, no static host, no sibling
|
|
271
|
+
files -- just one script and uv.
|
|
272
|
+
|
|
273
|
+
Queries run server-side (it's a tiny localhost http server), so it works in every
|
|
274
|
+
browser and the whole dashboard stays live: drill-down, the timespan brush, legend
|
|
275
|
+
filters, and the Ask view all work. The only degradation is that saved questions
|
|
276
|
+
are read-only -- the ones embedded at build time are loadable, but new ones can't
|
|
277
|
+
be persisted into the bundle.
|
|
278
|
+
|
|
279
|
+
Bundles are DuckDB/SQLite-only -- a bundle embeds the data as Parquet, which a
|
|
280
|
+
network backend can't export. A bundle never contains credentials; the DSN is used
|
|
281
|
+
only at build time.
|
|
282
|
+
|
|
283
|
+
## How it works
|
|
284
|
+
|
|
285
|
+
- **The dashboard module** is pure data -- charts and params, plus whatever Python
|
|
286
|
+
you want for shared SQL fragments and computed defaults.
|
|
287
|
+
- **The server** holds one read-only DuckDB connection behind a lock and serves
|
|
288
|
+
`/` (the page), `/meta` (params + chart metadata), `/q` (run one chart's SQL),
|
|
289
|
+
and `/docs` (the readme + catalog comments). It binds only the params each
|
|
290
|
+
query references.
|
|
291
|
+
- **The page** builds controls from the params, draws each chart with Vega-Lite,
|
|
292
|
+
and re-queries only the charts that reference a changed param.
|
|
293
|
+
|
|
294
|
+
### Pages and drill-down
|
|
295
|
+
|
|
296
|
+
Every section is a page. Sections not driven by a drill are the home page; each
|
|
297
|
+
drill param has its own detail page -- the section whose charts all reference it.
|
|
298
|
+
Clicking a drill mark navigates to that detail page (the current page lives in
|
|
299
|
+
the URL hash, so the browser back button and shareable links work); the home
|
|
300
|
+
page's other charts stay put. A detail page populates from its drill value, or,
|
|
301
|
+
on a direct link with no value, from a SQL default like
|
|
302
|
+
`COALESCE(NULLIF($route, ''), (SELECT ... LIMIT 1))`.
|
|
303
|
+
|
|
304
|
+
A control appears only when a chart on the current page references its param, so
|
|
305
|
+
a home-only filter hides on a detail page that ignores it. The header is pinned.
|
|
306
|
+
|
|
307
|
+
A chart with a color series gets an interactive legend: click an entry to focus
|
|
308
|
+
that series (the rest dim), click again to clear, shift-click for several. This
|
|
309
|
+
filters within the chart and never drills.
|
|
310
|
+
|
|
311
|
+
## Not in scope
|
|
312
|
+
|
|
313
|
+
Client-side crossfilter (that's [Mosaic](https://github.com/uwdata/mosaic)),
|
|
314
|
+
multi-user/auth/sharing (loopback, single user), and static export (the point is
|
|
315
|
+
to stay live). Charts that need a layered/transformed view use the `spec` escape
|
|
316
|
+
hatch; reference overlays (deploys, incidents) use `markers`.
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""duckbill -- live, query-backed dashboards over a local DuckDB.
|
|
2
|
+
|
|
3
|
+
A dashboard is a plain Python module that defines `charts` (and optionally
|
|
4
|
+
`params` and `title`) as data. The server runs each chart's SQL per request, so
|
|
5
|
+
the page is live: it re-queries on every interaction and reflects the current
|
|
6
|
+
warehouse.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from .core import Dashboard, Warehouse, params_in
|
|
10
|
+
from .loader import DashboardError, load_dashboard
|
|
11
|
+
from .server import serve
|
|
12
|
+
|
|
13
|
+
__all__ = ["Dashboard", "Warehouse", "params_in", "load_dashboard",
|
|
14
|
+
"DashboardError", "serve"]
|
|
15
|
+
__version__ = "0.1.0"
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"""open_backend: pick a Backend from a DSN, expanding ${VAR} from the environment.
|
|
2
|
+
|
|
3
|
+
A bare path (no scheme) is treated as a DuckDB file (`--db /x.duckdb`).
|
|
4
|
+
Network drivers are imported lazily inside their module, so a missing extra
|
|
5
|
+
errors only when that backend is selected.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import os
|
|
9
|
+
import re
|
|
10
|
+
from urllib.parse import urlparse
|
|
11
|
+
|
|
12
|
+
from .base import Backend # re-exported for callers
|
|
13
|
+
|
|
14
|
+
_VAR = re.compile(r"\$\{([A-Za-z_][A-Za-z0-9_]*)\}")
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _expand(dsn):
|
|
18
|
+
return _VAR.sub(lambda m: os.environ.get(m.group(1), ""), dsn)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def open_backend(dsn, *, read_only=True, pool=4):
|
|
22
|
+
dsn = _expand(dsn)
|
|
23
|
+
scheme = urlparse(dsn).scheme
|
|
24
|
+
|
|
25
|
+
if scheme in ("", "duckdb", "file"):
|
|
26
|
+
from .duckdb import DuckDBBackend
|
|
27
|
+
path = dsn
|
|
28
|
+
if scheme:
|
|
29
|
+
path = dsn.split("://", 1)[1]
|
|
30
|
+
return DuckDBBackend(path, read_only=read_only)
|
|
31
|
+
|
|
32
|
+
if scheme == "sqlite":
|
|
33
|
+
from .sqlite import SQLiteBackend
|
|
34
|
+
return SQLiteBackend(dsn.split("://", 1)[1], read_only=read_only)
|
|
35
|
+
|
|
36
|
+
if scheme in ("postgres", "postgresql"):
|
|
37
|
+
from .postgres import PostgresBackend
|
|
38
|
+
return PostgresBackend(dsn, read_only=read_only, pool=pool)
|
|
39
|
+
|
|
40
|
+
if scheme == "mysql":
|
|
41
|
+
from .mysql import MySQLBackend
|
|
42
|
+
return MySQLBackend(dsn, read_only=read_only, pool=pool)
|
|
43
|
+
|
|
44
|
+
if scheme == "snowflake":
|
|
45
|
+
from .snowflake import SnowflakeBackend
|
|
46
|
+
return SnowflakeBackend(dsn, read_only=read_only, pool=pool)
|
|
47
|
+
|
|
48
|
+
raise ValueError(f"unknown backend scheme {scheme!r} in {dsn!r}")
|
|
@@ -0,0 +1,262 @@
|
|
|
1
|
+
"""Backend surface plus the dialect-aware parameter scan shared by every backend.
|
|
2
|
+
|
|
3
|
+
`$name` is the one author-facing bind placeholder. Discovery and translation are
|
|
4
|
+
the same scan: sqlglot tokenizes the SQL for the backend's dialect so we know the
|
|
5
|
+
source spans of string literals, quoted identifiers, and dollar-quoted bodies;
|
|
6
|
+
comment spans we add ourselves (guarded by those string spans). A `$name` counts
|
|
7
|
+
only when it falls outside every protected span. We use sqlglot to find non-code
|
|
8
|
+
regions, not to interpret `$name` -- which is a duckbill convention, not native
|
|
9
|
+
to each dialect.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import queue
|
|
13
|
+
import re
|
|
14
|
+
import threading
|
|
15
|
+
from contextlib import contextmanager
|
|
16
|
+
from datetime import date, datetime, time
|
|
17
|
+
from decimal import Decimal
|
|
18
|
+
|
|
19
|
+
import sqlglot
|
|
20
|
+
|
|
21
|
+
_PARAM = re.compile(r"\$([A-Za-z_][A-Za-z0-9_]*)")
|
|
22
|
+
|
|
23
|
+
# sqlglot token types whose source span is non-code: string literals (incl.
|
|
24
|
+
# Postgres/Snowflake dollar-quoting -> HEREDOC/RAW) and quoted identifiers.
|
|
25
|
+
_PROTECTED_TOKENS = {
|
|
26
|
+
"STRING", "HEREDOC_STRING", "RAW_STRING", "NATIONAL_STRING",
|
|
27
|
+
"BYTE_STRING", "HEX_STRING", "BIT_STRING", "IDENTIFIER",
|
|
28
|
+
}
|
|
29
|
+
# Line-comment markers; only MySQL adds '#'. Block comments /* */ are universal.
|
|
30
|
+
_LINE_COMMENTS = {"mysql": ("--", "#")}
|
|
31
|
+
|
|
32
|
+
_STYLE = {
|
|
33
|
+
"duckdb": lambda n: f"${n}", # native; passthrough
|
|
34
|
+
"sqlite": lambda n: f":{n}", # sqlite3 named paramstyle
|
|
35
|
+
"pyformat": lambda n: f"%({n})s", # psycopg / PyMySQL / snowflake-connector
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _in(spans, i):
|
|
40
|
+
return any(a <= i <= b for a, b in spans)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _comment_spans(sql, dialect, str_spans):
|
|
44
|
+
markers = _LINE_COMMENTS.get(dialect, ("--",))
|
|
45
|
+
spans, i, n = [], 0, len(sql)
|
|
46
|
+
while i < n:
|
|
47
|
+
if _in(str_spans, i): # a marker inside a string is not a comment
|
|
48
|
+
i += 1
|
|
49
|
+
continue
|
|
50
|
+
if sql.startswith("/*", i):
|
|
51
|
+
j = sql.find("*/", i + 2)
|
|
52
|
+
j = n - 1 if j < 0 else j + 1
|
|
53
|
+
spans.append((i, j))
|
|
54
|
+
i = j + 1
|
|
55
|
+
continue
|
|
56
|
+
if any(sql.startswith(m, i) for m in markers):
|
|
57
|
+
j = sql.find("\n", i)
|
|
58
|
+
j = n - 1 if j < 0 else j - 1
|
|
59
|
+
spans.append((i, j))
|
|
60
|
+
i = j + 1
|
|
61
|
+
continue
|
|
62
|
+
i += 1
|
|
63
|
+
return spans
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _protected(sql, dialect):
|
|
67
|
+
try:
|
|
68
|
+
toks = sqlglot.tokenize(sql, dialect=dialect)
|
|
69
|
+
except Exception: # a tokenize failure must not blank the chart -- protect nothing
|
|
70
|
+
toks = []
|
|
71
|
+
str_spans = [(t.start, t.end) for t in toks if t.token_type.name in _PROTECTED_TOKENS]
|
|
72
|
+
return str_spans + _comment_spans(sql, dialect, str_spans)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def referenced_params(sql, dialect="duckdb"):
|
|
76
|
+
"""The set of $name placeholders a query references, ignoring those inside
|
|
77
|
+
strings, quoted identifiers, comments, or dollar-quoted bodies."""
|
|
78
|
+
spans = _protected(sql, dialect)
|
|
79
|
+
return {m.group(1) for m in _PARAM.finditer(sql) if not _in(spans, m.start())}
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def bind(sql, args, dialect, paramstyle):
|
|
83
|
+
"""Translate $name to the driver's paramstyle and bind only referenced params.
|
|
84
|
+
|
|
85
|
+
Returns (translated_sql, params). For 'pyformat' backends, literal '%' in the
|
|
86
|
+
SQL is escaped to '%%' so LIKE patterns survive the driver's own substitution.
|
|
87
|
+
"""
|
|
88
|
+
spans = _protected(sql, dialect)
|
|
89
|
+
fmt = _STYLE[paramstyle]
|
|
90
|
+
esc = (lambda s: s.replace("%", "%%")) if paramstyle == "pyformat" else (lambda s: s)
|
|
91
|
+
out, last, used = [], 0, set()
|
|
92
|
+
for m in _PARAM.finditer(sql):
|
|
93
|
+
if _in(spans, m.start()):
|
|
94
|
+
continue
|
|
95
|
+
name = m.group(1)
|
|
96
|
+
out.append(esc(sql[last:m.start()]))
|
|
97
|
+
out.append(fmt(name))
|
|
98
|
+
last = m.end()
|
|
99
|
+
used.add(name)
|
|
100
|
+
if not used:
|
|
101
|
+
return sql, {} # no binds -> run() calls execute(q) with no driver
|
|
102
|
+
# %-substitution, so the SQL must stay verbatim
|
|
103
|
+
out.append(esc(sql[last:]))
|
|
104
|
+
return "".join(out), {k: v for k, v in args.items() if k in used}
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def jsonable(v):
|
|
108
|
+
"""Coerce a driver value to something the JSON encoder and Vega accept."""
|
|
109
|
+
if isinstance(v, Decimal):
|
|
110
|
+
return float(v)
|
|
111
|
+
if isinstance(v, (datetime, date, time)):
|
|
112
|
+
return v.isoformat()
|
|
113
|
+
if isinstance(v, (bytes, bytearray, memoryview)):
|
|
114
|
+
return bytes(v).hex()
|
|
115
|
+
return v
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def jsonable_row(row):
|
|
119
|
+
return [jsonable(v) for v in row]
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
class Backend:
|
|
123
|
+
"""The surface the server and bundler speak. Subclasses implement these.
|
|
124
|
+
|
|
125
|
+
dialect: sqlglot dialect name for the scan
|
|
126
|
+
paramstyle: key into _STYLE for bind()
|
|
127
|
+
bundleable: can `duckbill bundle` embed this backend's data?
|
|
128
|
+
"""
|
|
129
|
+
|
|
130
|
+
dialect = "duckdb"
|
|
131
|
+
paramstyle = "duckdb"
|
|
132
|
+
bundleable = False
|
|
133
|
+
|
|
134
|
+
def run(self, sql, args):
|
|
135
|
+
raise NotImplementedError
|
|
136
|
+
|
|
137
|
+
def query(self, sql, limit=2000):
|
|
138
|
+
raise NotImplementedError
|
|
139
|
+
|
|
140
|
+
def docs(self):
|
|
141
|
+
raise NotImplementedError
|
|
142
|
+
|
|
143
|
+
def schema(self):
|
|
144
|
+
raise NotImplementedError
|
|
145
|
+
|
|
146
|
+
def table_columns(self):
|
|
147
|
+
"""Columns per table, keyed by the same qualified names as `schema()`:
|
|
148
|
+
`{<schema>.<name>: [col, ...]}`. The bundler's column pruner feeds this to
|
|
149
|
+
sqlglot as a schema map. Serve-only backends don't implement it."""
|
|
150
|
+
raise NotImplementedError(f"{type(self).__name__} is serve-only (not bundleable)")
|
|
151
|
+
|
|
152
|
+
def export_parquet(self, qualified, columns=None, compression="snappy"):
|
|
153
|
+
raise NotImplementedError(f"{type(self).__name__} is serve-only (not bundleable)")
|
|
154
|
+
|
|
155
|
+
def close(self):
|
|
156
|
+
pass
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
# Parquet codecs DuckDB writes and reads back. Restricted to an allowlist because
|
|
160
|
+
# the value is interpolated into a COPY statement (it can't be a bind parameter).
|
|
161
|
+
_PARQUET_CODECS = frozenset({"snappy", "zstd", "gzip", "uncompressed"})
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def parquet_codec(compression):
|
|
165
|
+
"""Validate and normalize a Parquet compression name to a COPY keyword."""
|
|
166
|
+
c = compression.lower()
|
|
167
|
+
if c not in _PARQUET_CODECS:
|
|
168
|
+
raise ValueError(
|
|
169
|
+
f"unsupported Parquet compression {compression!r}; "
|
|
170
|
+
f"expected one of {sorted(_PARQUET_CODECS)}")
|
|
171
|
+
return c
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
class Pool:
|
|
175
|
+
"""A tiny bounded connection pool for network backends. Connections are made
|
|
176
|
+
lazily up to `size`, then borrowers block for a free one. LIFO so a small
|
|
177
|
+
working set stays warm."""
|
|
178
|
+
|
|
179
|
+
def __init__(self, factory, size=4):
|
|
180
|
+
self._factory = factory
|
|
181
|
+
self._free = queue.LifoQueue()
|
|
182
|
+
self._made = 0
|
|
183
|
+
self._size = max(1, size)
|
|
184
|
+
self._lock = threading.Lock()
|
|
185
|
+
|
|
186
|
+
@contextmanager
|
|
187
|
+
def borrow(self):
|
|
188
|
+
con = self._acquire()
|
|
189
|
+
ok = False
|
|
190
|
+
try:
|
|
191
|
+
yield con
|
|
192
|
+
ok = True
|
|
193
|
+
finally:
|
|
194
|
+
if ok:
|
|
195
|
+
self._free.put(con)
|
|
196
|
+
else:
|
|
197
|
+
try:
|
|
198
|
+
con.close()
|
|
199
|
+
except Exception:
|
|
200
|
+
pass
|
|
201
|
+
with self._lock:
|
|
202
|
+
self._made -= 1
|
|
203
|
+
|
|
204
|
+
def _acquire(self):
|
|
205
|
+
try:
|
|
206
|
+
return self._free.get_nowait()
|
|
207
|
+
except queue.Empty:
|
|
208
|
+
pass
|
|
209
|
+
with self._lock:
|
|
210
|
+
make = self._made < self._size
|
|
211
|
+
if make:
|
|
212
|
+
self._made += 1
|
|
213
|
+
if make:
|
|
214
|
+
try:
|
|
215
|
+
return self._factory()
|
|
216
|
+
except Exception:
|
|
217
|
+
with self._lock:
|
|
218
|
+
self._made -= 1
|
|
219
|
+
raise
|
|
220
|
+
return self._free.get() # all in use -- block for one
|
|
221
|
+
|
|
222
|
+
def close(self):
|
|
223
|
+
while not self._free.empty():
|
|
224
|
+
try:
|
|
225
|
+
self._free.get_nowait().close()
|
|
226
|
+
except Exception:
|
|
227
|
+
pass
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
class DBAPIBackend(Backend):
|
|
231
|
+
"""Shared run/query for PEP-249 drivers: translate $name via bind(), borrow a
|
|
232
|
+
pooled connection, coerce rows. Subclasses set dialect/paramstyle and
|
|
233
|
+
implement `_connect` (returns a new read-only DBAPI connection), `docs`,
|
|
234
|
+
`schema`."""
|
|
235
|
+
|
|
236
|
+
def __init__(self, *, pool=4):
|
|
237
|
+
self._pool = Pool(self._connect, size=pool)
|
|
238
|
+
|
|
239
|
+
def _connect(self):
|
|
240
|
+
raise NotImplementedError
|
|
241
|
+
|
|
242
|
+
def run(self, sql, args):
|
|
243
|
+
q, p = bind(sql, args, self.dialect, self.paramstyle)
|
|
244
|
+
with self._pool.borrow() as con:
|
|
245
|
+
cur = con.cursor()
|
|
246
|
+
cur.execute(q, p) if p else cur.execute(q)
|
|
247
|
+
cols = [d[0] for d in cur.description]
|
|
248
|
+
rows = cur.fetchall()
|
|
249
|
+
return cols, [dict(zip(cols, jsonable_row(r))) for r in rows]
|
|
250
|
+
|
|
251
|
+
def query(self, sql, limit=2000):
|
|
252
|
+
with self._pool.borrow() as con:
|
|
253
|
+
cur = con.cursor()
|
|
254
|
+
cur.execute(sql)
|
|
255
|
+
cols = [d[0] for d in cur.description]
|
|
256
|
+
rows = cur.fetchmany(limit + 1)
|
|
257
|
+
truncated = len(rows) > limit
|
|
258
|
+
rows = rows[:limit]
|
|
259
|
+
return cols, [dict(zip(cols, jsonable_row(r))) for r in rows], truncated
|
|
260
|
+
|
|
261
|
+
def close(self):
|
|
262
|
+
self._pool.close()
|