dirsql 0.3.45__tar.gz → 0.3.46__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. {dirsql-0.3.45 → dirsql-0.3.46}/Cargo.lock +1 -1
  2. {dirsql-0.3.45 → dirsql-0.3.46}/PKG-INFO +1 -1
  3. {dirsql-0.3.45/packages/python → dirsql-0.3.46}/docs/cli/config.md +67 -2
  4. {dirsql-0.3.45 → dirsql-0.3.46}/docs/cli/http-api.md +9 -0
  5. {dirsql-0.3.45 → dirsql-0.3.46}/packages/python/Cargo.toml +1 -1
  6. {dirsql-0.3.45 → dirsql-0.3.46/packages/python}/docs/cli/config.md +67 -2
  7. {dirsql-0.3.45/packages/rust → dirsql-0.3.46/packages/python}/docs/cli/http-api.md +9 -0
  8. {dirsql-0.3.45 → dirsql-0.3.46}/packages/rust/docs/cli/config.md +67 -2
  9. {dirsql-0.3.45/packages/python → dirsql-0.3.46/packages/rust}/docs/cli/http-api.md +9 -0
  10. {dirsql-0.3.45 → dirsql-0.3.46}/packages/rust/src/bin/dirsql.rs +25 -2
  11. {dirsql-0.3.45 → dirsql-0.3.46}/packages/rust/src/cli/mod.rs +57 -1
  12. {dirsql-0.3.45 → dirsql-0.3.46}/packages/rust/src/cli/router.rs +81 -14
  13. {dirsql-0.3.45 → dirsql-0.3.46}/packages/rust/src/cli/server.rs +1 -0
  14. {dirsql-0.3.45 → dirsql-0.3.46}/packages/rust/src/config.rs +65 -3
  15. {dirsql-0.3.45 → dirsql-0.3.46}/Cargo.toml +0 -0
  16. {dirsql-0.3.45 → dirsql-0.3.46}/README.md +0 -0
  17. {dirsql-0.3.45 → dirsql-0.3.46}/dirsql/__init__.py +0 -0
  18. {dirsql-0.3.45 → dirsql-0.3.46}/dirsql/_async.py +0 -0
  19. {dirsql-0.3.45 → dirsql-0.3.46}/dirsql/_dirsql.pyi +0 -0
  20. {dirsql-0.3.45 → dirsql-0.3.46}/dirsql/cli/__init__.py +0 -0
  21. {dirsql-0.3.45 → dirsql-0.3.46}/dirsql/cli/binary_path.py +0 -0
  22. {dirsql-0.3.45 → dirsql-0.3.46}/dirsql/cli/interpret/__init__.py +0 -0
  23. {dirsql-0.3.45 → dirsql-0.3.46}/dirsql/cli/is_windows.py +0 -0
  24. {dirsql-0.3.45 → dirsql-0.3.46}/dirsql/cli/main.py +0 -0
  25. {dirsql-0.3.45 → dirsql-0.3.46}/dirsql/py.typed +0 -0
  26. {dirsql-0.3.45 → dirsql-0.3.46}/docs/.claude/CLAUDE.md +0 -0
  27. {dirsql-0.3.45 → dirsql-0.3.46}/docs/.vitepress/config.ts +0 -0
  28. {dirsql-0.3.45 → dirsql-0.3.46}/docs/.vitepress/theme/index.ts +0 -0
  29. {dirsql-0.3.45 → dirsql-0.3.46}/docs/.vitepress/theme/lang.ts +0 -0
  30. {dirsql-0.3.45 → dirsql-0.3.46}/docs/AGENTS.md +0 -0
  31. {dirsql-0.3.45 → dirsql-0.3.46}/docs/api/index.md +0 -0
  32. {dirsql-0.3.45 → dirsql-0.3.46}/docs/cli/index.md +0 -0
  33. {dirsql-0.3.45 → dirsql-0.3.46}/docs/cli/init.md +0 -0
  34. {dirsql-0.3.45 → dirsql-0.3.46}/docs/cli/server.md +0 -0
  35. {dirsql-0.3.45 → dirsql-0.3.46}/docs/getting-started.md +0 -0
  36. {dirsql-0.3.45 → dirsql-0.3.46}/docs/guide/async.md +0 -0
  37. {dirsql-0.3.45 → dirsql-0.3.46}/docs/guide/crdt.md +0 -0
  38. {dirsql-0.3.45 → dirsql-0.3.46}/docs/guide/persistence.md +0 -0
  39. {dirsql-0.3.45 → dirsql-0.3.46}/docs/guide/querying.md +0 -0
  40. {dirsql-0.3.45 → dirsql-0.3.46}/docs/guide/tables.md +0 -0
  41. {dirsql-0.3.45 → dirsql-0.3.46}/docs/guide/watching.md +0 -0
  42. {dirsql-0.3.45 → dirsql-0.3.46}/docs/index.md +0 -0
  43. {dirsql-0.3.45 → dirsql-0.3.46}/docs/migrations.md +0 -0
  44. {dirsql-0.3.45 → dirsql-0.3.46}/docs/package.json +0 -0
  45. {dirsql-0.3.45 → dirsql-0.3.46}/docs/playwright.config.ts +0 -0
  46. {dirsql-0.3.45 → dirsql-0.3.46}/docs/pnpm-lock.yaml +0 -0
  47. {dirsql-0.3.45 → dirsql-0.3.46}/docs/pnpm-workspace.yaml +0 -0
  48. {dirsql-0.3.45 → dirsql-0.3.46}/docs/tests/integration/home.spec.ts +0 -0
  49. {dirsql-0.3.45 → dirsql-0.3.46}/docs/tests/integration/language-flag.spec.ts +0 -0
  50. {dirsql-0.3.45 → dirsql-0.3.46}/docs/tests/integration/sidebar.spec.ts +0 -0
  51. {dirsql-0.3.45 → dirsql-0.3.46}/docs/tests/unit/config.test.ts +0 -0
  52. {dirsql-0.3.45 → dirsql-0.3.46}/docs/tests/unit/lang.test.ts +0 -0
  53. {dirsql-0.3.45 → dirsql-0.3.46}/docs/vitest.config.ts +0 -0
  54. {dirsql-0.3.45 → dirsql-0.3.46}/packages/python/README.md +0 -0
  55. {dirsql-0.3.45 → dirsql-0.3.46}/packages/python/conftest.py +0 -0
  56. {dirsql-0.3.45 → dirsql-0.3.46}/packages/python/docs/.claude/CLAUDE.md +0 -0
  57. {dirsql-0.3.45 → dirsql-0.3.46}/packages/python/docs/.vitepress/config.ts +0 -0
  58. {dirsql-0.3.45 → dirsql-0.3.46}/packages/python/docs/.vitepress/theme/index.ts +0 -0
  59. {dirsql-0.3.45 → dirsql-0.3.46}/packages/python/docs/.vitepress/theme/lang.ts +0 -0
  60. {dirsql-0.3.45 → dirsql-0.3.46}/packages/python/docs/AGENTS.md +0 -0
  61. {dirsql-0.3.45 → dirsql-0.3.46}/packages/python/docs/api/index.md +0 -0
  62. {dirsql-0.3.45 → dirsql-0.3.46}/packages/python/docs/cli/index.md +0 -0
  63. {dirsql-0.3.45 → dirsql-0.3.46}/packages/python/docs/cli/init.md +0 -0
  64. {dirsql-0.3.45 → dirsql-0.3.46}/packages/python/docs/cli/server.md +0 -0
  65. {dirsql-0.3.45 → dirsql-0.3.46}/packages/python/docs/getting-started.md +0 -0
  66. {dirsql-0.3.45 → dirsql-0.3.46}/packages/python/docs/guide/async.md +0 -0
  67. {dirsql-0.3.45 → dirsql-0.3.46}/packages/python/docs/guide/crdt.md +0 -0
  68. {dirsql-0.3.45 → dirsql-0.3.46}/packages/python/docs/guide/persistence.md +0 -0
  69. {dirsql-0.3.45 → dirsql-0.3.46}/packages/python/docs/guide/querying.md +0 -0
  70. {dirsql-0.3.45 → dirsql-0.3.46}/packages/python/docs/guide/tables.md +0 -0
  71. {dirsql-0.3.45 → dirsql-0.3.46}/packages/python/docs/guide/watching.md +0 -0
  72. {dirsql-0.3.45 → dirsql-0.3.46}/packages/python/docs/index.md +0 -0
  73. {dirsql-0.3.45 → dirsql-0.3.46}/packages/python/docs/migrations.md +0 -0
  74. {dirsql-0.3.45 → dirsql-0.3.46}/packages/python/docs/package.json +0 -0
  75. {dirsql-0.3.45 → dirsql-0.3.46}/packages/python/docs/playwright.config.ts +0 -0
  76. {dirsql-0.3.45 → dirsql-0.3.46}/packages/python/docs/pnpm-lock.yaml +0 -0
  77. {dirsql-0.3.45 → dirsql-0.3.46}/packages/python/docs/pnpm-workspace.yaml +0 -0
  78. {dirsql-0.3.45 → dirsql-0.3.46}/packages/python/docs/tests/integration/home.spec.ts +0 -0
  79. {dirsql-0.3.45 → dirsql-0.3.46}/packages/python/docs/tests/integration/language-flag.spec.ts +0 -0
  80. {dirsql-0.3.45 → dirsql-0.3.46}/packages/python/docs/tests/integration/sidebar.spec.ts +0 -0
  81. {dirsql-0.3.45 → dirsql-0.3.46}/packages/python/docs/tests/unit/config.test.ts +0 -0
  82. {dirsql-0.3.45 → dirsql-0.3.46}/packages/python/docs/tests/unit/lang.test.ts +0 -0
  83. {dirsql-0.3.45 → dirsql-0.3.46}/packages/python/docs/vitest.config.ts +0 -0
  84. {dirsql-0.3.45 → dirsql-0.3.46}/packages/python/e2e-attestation.json +0 -0
  85. {dirsql-0.3.45 → dirsql-0.3.46}/packages/python/src/lib.rs +0 -0
  86. {dirsql-0.3.45 → dirsql-0.3.46}/packages/python/tests/__init__.py +0 -0
  87. {dirsql-0.3.45 → dirsql-0.3.46}/packages/python/tests/conftest.py +0 -0
  88. {dirsql-0.3.45 → dirsql-0.3.46}/packages/python/tests/e2e/__init__.py +0 -0
  89. {dirsql-0.3.45 → dirsql-0.3.46}/packages/python/tests/integration/__init__.py +0 -0
  90. {dirsql-0.3.45 → dirsql-0.3.46}/packages/rust/Cargo.toml +0 -0
  91. {dirsql-0.3.45 → dirsql-0.3.46}/packages/rust/README.md +0 -0
  92. {dirsql-0.3.45 → dirsql-0.3.46}/packages/rust/benches/db_bench.rs +0 -0
  93. {dirsql-0.3.45 → dirsql-0.3.46}/packages/rust/benches/differ_bench.rs +0 -0
  94. {dirsql-0.3.45 → dirsql-0.3.46}/packages/rust/benches/matcher_bench.rs +0 -0
  95. {dirsql-0.3.45 → dirsql-0.3.46}/packages/rust/benches/scanner_bench.rs +0 -0
  96. {dirsql-0.3.45 → dirsql-0.3.46}/packages/rust/docs/api/index.md +0 -0
  97. {dirsql-0.3.45 → dirsql-0.3.46}/packages/rust/docs/cli/index.md +0 -0
  98. {dirsql-0.3.45 → dirsql-0.3.46}/packages/rust/docs/cli/init.md +0 -0
  99. {dirsql-0.3.45 → dirsql-0.3.46}/packages/rust/docs/cli/server.md +0 -0
  100. {dirsql-0.3.45 → dirsql-0.3.46}/packages/rust/docs/getting-started.md +0 -0
  101. {dirsql-0.3.45 → dirsql-0.3.46}/packages/rust/docs/guide/async.md +0 -0
  102. {dirsql-0.3.45 → dirsql-0.3.46}/packages/rust/docs/guide/crdt.md +0 -0
  103. {dirsql-0.3.45 → dirsql-0.3.46}/packages/rust/docs/guide/persistence.md +0 -0
  104. {dirsql-0.3.45 → dirsql-0.3.46}/packages/rust/docs/guide/querying.md +0 -0
  105. {dirsql-0.3.45 → dirsql-0.3.46}/packages/rust/docs/guide/tables.md +0 -0
  106. {dirsql-0.3.45 → dirsql-0.3.46}/packages/rust/docs/guide/watching.md +0 -0
  107. {dirsql-0.3.45 → dirsql-0.3.46}/packages/rust/docs/index.md +0 -0
  108. {dirsql-0.3.45 → dirsql-0.3.46}/packages/rust/docs/migrations.md +0 -0
  109. {dirsql-0.3.45 → dirsql-0.3.46}/packages/rust/src/cli/init.rs +0 -0
  110. {dirsql-0.3.45 → dirsql-0.3.46}/packages/rust/src/cli/serialize.rs +0 -0
  111. {dirsql-0.3.45 → dirsql-0.3.46}/packages/rust/src/command.rs +0 -0
  112. {dirsql-0.3.45 → dirsql-0.3.46}/packages/rust/src/db.rs +0 -0
  113. {dirsql-0.3.45 → dirsql-0.3.46}/packages/rust/src/differ.rs +0 -0
  114. {dirsql-0.3.45 → dirsql-0.3.46}/packages/rust/src/lib.rs +0 -0
  115. {dirsql-0.3.45 → dirsql-0.3.46}/packages/rust/src/matcher.rs +0 -0
  116. {dirsql-0.3.45 → dirsql-0.3.46}/packages/rust/src/persist.rs +0 -0
  117. {dirsql-0.3.45 → dirsql-0.3.46}/packages/rust/src/scanner.rs +0 -0
  118. {dirsql-0.3.45 → dirsql-0.3.46}/packages/rust/src/watcher.rs +0 -0
  119. {dirsql-0.3.45 → dirsql-0.3.46}/pyproject.toml +0 -0
@@ -501,7 +501,7 @@ dependencies = [
501
501
 
502
502
  [[package]]
503
503
  name = "dirsql-py-ext"
504
- version = "0.3.45"
504
+ version = "0.3.46"
505
505
  dependencies = [
506
506
  "dirsql",
507
507
  "pyo3",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dirsql
3
- Version: 0.3.45
3
+ Version: 0.3.46
4
4
  Requires-Dist: pytest>=8 ; extra == 'dev'
5
5
  Requires-Dist: pytest-describe>=2 ; extra == 'dev'
6
6
  Requires-Dist: pytest-asyncio>=0.23 ; extra == 'dev'
@@ -242,6 +242,71 @@ other files' rows are indexed normally.
242
242
  See [Command execution](#command-execution) for the full contract (argv
243
243
  splitting, injection safety, cwd, environment, timeout, and output framing).
244
244
 
245
+ ### Rewriting queries (`pre-query`)
246
+
247
+ The `pre-query` hook intercepts every incoming request and transforms it into
248
+ the SQL that runs against the index. Because the hook owns SQL construction,
249
+ `POST /query` can accept whatever shape you want — a natural-language question,
250
+ a saved-query name, a templating DSL — and your command translates it to SQL
251
+ before it runs. Unlike `on-file` (a per-`[[table]]` key), `pre-query` is a
252
+ **server-wide** `[dirsql]` key: every query flows through it.
253
+
254
+ ```toml
255
+ [dirsql]
256
+ pre-query = "uv run python to_sql.py {args}"
257
+ ```
258
+
259
+ With `pre-query` set, the **raw `POST /query` request body** is passed to the
260
+ command as the `{args}` placeholder — a single, injection-safe argv token even
261
+ though the body is untrusted. The command prints **plain-text SQL** on stdout
262
+ (the last non-empty line is used); `dirsql` runs that SQL and returns rows
263
+ exactly as it would for a normal query.
264
+
265
+ | Placeholder | Value |
266
+ |-------------|-------|
267
+ | `{args}` | The raw `POST /query` request body, verbatim, as one argv token. |
268
+
269
+ When `pre-query` is **absent**, nothing changes: the request body is parsed as
270
+ `{"sql": "…"}` JSON and executed — the [HTTP API](./http-api.md#post-query)
271
+ default. Enabling the hook is fully backward compatible in reverse: remove the
272
+ key and the `{"sql": …}` contract returns.
273
+
274
+ **On failure** — a non-zero exit, a timeout, or a spawn error — the request
275
+ returns `500 Internal Server Error` with the command's stderr tail in the JSON
276
+ `error` body. The command runs in the config file's directory and is bounded by
277
+ a fixed **30-second** timeout.
278
+
279
+ #### The hook owns SQL safety
280
+
281
+ Because the hook returns **plain SQL** (not a parameterized query), it is the
282
+ **trusted component** that turns the untrusted request body into safe SQL. The
283
+ `{args}` substitution keeps the body inert *as an argv token* — it can never
284
+ break out into extra command arguments — but whatever SQL string the hook
285
+ prints is executed as-is. Validate, escape, or parameterize **inside** the
286
+ hook. This trade-off is intentional for v1: it keeps the contract a simple
287
+ plain-text-SQL pipe and puts translation logic — and its safety — in your hook.
288
+
289
+ Worked example — a hook that maps a saved-query name to SQL:
290
+
291
+ ```python
292
+ # to_sql.py
293
+ import sys
294
+
295
+ QUERIES = {
296
+ "recent-posts": "SELECT title, author FROM posts ORDER BY _mtime DESC LIMIT 10",
297
+ }
298
+ name = sys.argv[1].strip() if len(sys.argv) > 1 else ""
299
+ # Fall back to an empty result rather than trusting arbitrary input.
300
+ print(QUERIES.get(name, "SELECT 1 WHERE 0"))
301
+ ```
302
+
303
+ ```bash
304
+ curl -s http://localhost:7117/query -d 'recent-posts' | jq
305
+ ```
306
+
307
+ See [Command execution](#command-execution) for the full contract (argv
308
+ splitting, injection safety, cwd, environment, timeout, and output framing).
309
+
245
310
  ### Full Example
246
311
 
247
312
  ```toml
@@ -263,8 +328,8 @@ glob = "logs/*.csv"
263
328
 
264
329
  ## Command execution
265
330
 
266
- Config keys that run an external command — today `on-file`, with more events to
267
- follow — share one execution contract:
331
+ Config keys that run an external command — today `on-file` and `pre-query`,
332
+ with more events to follow — share one execution contract:
268
333
 
269
334
  - **argv, not a shell.** The command string is split into an argv with
270
335
  shell-like quoting (spaces separate arguments; quotes group them), but **no
@@ -35,6 +35,15 @@ On error, the server returns a non-2xx status with a JSON body:
35
35
 
36
36
  Malformed SQL returns `400`. An unreadable or malformed config returns `503`; a *missing* config is not an error — the server serves the default `files` table.
37
37
 
38
+ ::: tip `pre-query` changes the body contract
39
+ When [`[dirsql].pre-query`](./config.md#rewriting-queries-pre-query) is
40
+ configured, the request body is **not** parsed as `{"sql": …}`. Instead the raw
41
+ body is passed verbatim to the hook command, which prints the SQL to run. A hook
42
+ that fails (non-zero exit, timeout, or spawn error) returns `500` with the
43
+ command's stderr tail. With no `pre-query` key, the `{"sql": …}` contract above
44
+ applies.
45
+ :::
46
+
38
47
  ```bash
39
48
  curl -s http://localhost:7117/query \
40
49
  -H 'content-type: application/json' \
@@ -4,7 +4,7 @@ name = "dirsql-py-ext"
4
4
  # pypi/maturin handler can rewrite it via `write-version` before
5
5
  # `maturin build`. `pyproject.toml` declares `dynamic = ["version"]`
6
6
  # and maturin reads this field. Mirrors `packages/rust/Cargo.toml`.
7
- version = "0.3.45"
7
+ version = "0.3.46"
8
8
  edition.workspace = true
9
9
  publish = false
10
10
  readme = "README.md"
@@ -242,6 +242,71 @@ other files' rows are indexed normally.
242
242
  See [Command execution](#command-execution) for the full contract (argv
243
243
  splitting, injection safety, cwd, environment, timeout, and output framing).
244
244
 
245
+ ### Rewriting queries (`pre-query`)
246
+
247
+ The `pre-query` hook intercepts every incoming request and transforms it into
248
+ the SQL that runs against the index. Because the hook owns SQL construction,
249
+ `POST /query` can accept whatever shape you want — a natural-language question,
250
+ a saved-query name, a templating DSL — and your command translates it to SQL
251
+ before it runs. Unlike `on-file` (a per-`[[table]]` key), `pre-query` is a
252
+ **server-wide** `[dirsql]` key: every query flows through it.
253
+
254
+ ```toml
255
+ [dirsql]
256
+ pre-query = "uv run python to_sql.py {args}"
257
+ ```
258
+
259
+ With `pre-query` set, the **raw `POST /query` request body** is passed to the
260
+ command as the `{args}` placeholder — a single, injection-safe argv token even
261
+ though the body is untrusted. The command prints **plain-text SQL** on stdout
262
+ (the last non-empty line is used); `dirsql` runs that SQL and returns rows
263
+ exactly as it would for a normal query.
264
+
265
+ | Placeholder | Value |
266
+ |-------------|-------|
267
+ | `{args}` | The raw `POST /query` request body, verbatim, as one argv token. |
268
+
269
+ When `pre-query` is **absent**, nothing changes: the request body is parsed as
270
+ `{"sql": "…"}` JSON and executed — the [HTTP API](./http-api.md#post-query)
271
+ default. Enabling the hook is fully backward compatible in reverse: remove the
272
+ key and the `{"sql": …}` contract returns.
273
+
274
+ **On failure** — a non-zero exit, a timeout, or a spawn error — the request
275
+ returns `500 Internal Server Error` with the command's stderr tail in the JSON
276
+ `error` body. The command runs in the config file's directory and is bounded by
277
+ a fixed **30-second** timeout.
278
+
279
+ #### The hook owns SQL safety
280
+
281
+ Because the hook returns **plain SQL** (not a parameterized query), it is the
282
+ **trusted component** that turns the untrusted request body into safe SQL. The
283
+ `{args}` substitution keeps the body inert *as an argv token* — it can never
284
+ break out into extra command arguments — but whatever SQL string the hook
285
+ prints is executed as-is. Validate, escape, or parameterize **inside** the
286
+ hook. This trade-off is intentional for v1: it keeps the contract a simple
287
+ plain-text-SQL pipe and puts translation logic — and its safety — in your hook.
288
+
289
+ Worked example — a hook that maps a saved-query name to SQL:
290
+
291
+ ```python
292
+ # to_sql.py
293
+ import sys
294
+
295
+ QUERIES = {
296
+ "recent-posts": "SELECT title, author FROM posts ORDER BY _mtime DESC LIMIT 10",
297
+ }
298
+ name = sys.argv[1].strip() if len(sys.argv) > 1 else ""
299
+ # Fall back to an empty result rather than trusting arbitrary input.
300
+ print(QUERIES.get(name, "SELECT 1 WHERE 0"))
301
+ ```
302
+
303
+ ```bash
304
+ curl -s http://localhost:7117/query -d 'recent-posts' | jq
305
+ ```
306
+
307
+ See [Command execution](#command-execution) for the full contract (argv
308
+ splitting, injection safety, cwd, environment, timeout, and output framing).
309
+
245
310
  ### Full Example
246
311
 
247
312
  ```toml
@@ -263,8 +328,8 @@ glob = "logs/*.csv"
263
328
 
264
329
  ## Command execution
265
330
 
266
- Config keys that run an external command — today `on-file`, with more events to
267
- follow — share one execution contract:
331
+ Config keys that run an external command — today `on-file` and `pre-query`,
332
+ with more events to follow — share one execution contract:
268
333
 
269
334
  - **argv, not a shell.** The command string is split into an argv with
270
335
  shell-like quoting (spaces separate arguments; quotes group them), but **no
@@ -35,6 +35,15 @@ On error, the server returns a non-2xx status with a JSON body:
35
35
 
36
36
  Malformed SQL returns `400`. An unreadable or malformed config returns `503`; a *missing* config is not an error — the server serves the default `files` table.
37
37
 
38
+ ::: tip `pre-query` changes the body contract
39
+ When [`[dirsql].pre-query`](./config.md#rewriting-queries-pre-query) is
40
+ configured, the request body is **not** parsed as `{"sql": …}`. Instead the raw
41
+ body is passed verbatim to the hook command, which prints the SQL to run. A hook
42
+ that fails (non-zero exit, timeout, or spawn error) returns `500` with the
43
+ command's stderr tail. With no `pre-query` key, the `{"sql": …}` contract above
44
+ applies.
45
+ :::
46
+
38
47
  ```bash
39
48
  curl -s http://localhost:7117/query \
40
49
  -H 'content-type: application/json' \
@@ -242,6 +242,71 @@ other files' rows are indexed normally.
242
242
  See [Command execution](#command-execution) for the full contract (argv
243
243
  splitting, injection safety, cwd, environment, timeout, and output framing).
244
244
 
245
+ ### Rewriting queries (`pre-query`)
246
+
247
+ The `pre-query` hook intercepts every incoming request and transforms it into
248
+ the SQL that runs against the index. Because the hook owns SQL construction,
249
+ `POST /query` can accept whatever shape you want — a natural-language question,
250
+ a saved-query name, a templating DSL — and your command translates it to SQL
251
+ before it runs. Unlike `on-file` (a per-`[[table]]` key), `pre-query` is a
252
+ **server-wide** `[dirsql]` key: every query flows through it.
253
+
254
+ ```toml
255
+ [dirsql]
256
+ pre-query = "uv run python to_sql.py {args}"
257
+ ```
258
+
259
+ With `pre-query` set, the **raw `POST /query` request body** is passed to the
260
+ command as the `{args}` placeholder — a single, injection-safe argv token even
261
+ though the body is untrusted. The command prints **plain-text SQL** on stdout
262
+ (the last non-empty line is used); `dirsql` runs that SQL and returns rows
263
+ exactly as it would for a normal query.
264
+
265
+ | Placeholder | Value |
266
+ |-------------|-------|
267
+ | `{args}` | The raw `POST /query` request body, verbatim, as one argv token. |
268
+
269
+ When `pre-query` is **absent**, nothing changes: the request body is parsed as
270
+ `{"sql": "…"}` JSON and executed — the [HTTP API](./http-api.md#post-query)
271
+ default. Enabling the hook is fully backward compatible in reverse: remove the
272
+ key and the `{"sql": …}` contract returns.
273
+
274
+ **On failure** — a non-zero exit, a timeout, or a spawn error — the request
275
+ returns `500 Internal Server Error` with the command's stderr tail in the JSON
276
+ `error` body. The command runs in the config file's directory and is bounded by
277
+ a fixed **30-second** timeout.
278
+
279
+ #### The hook owns SQL safety
280
+
281
+ Because the hook returns **plain SQL** (not a parameterized query), it is the
282
+ **trusted component** that turns the untrusted request body into safe SQL. The
283
+ `{args}` substitution keeps the body inert *as an argv token* — it can never
284
+ break out into extra command arguments — but whatever SQL string the hook
285
+ prints is executed as-is. Validate, escape, or parameterize **inside** the
286
+ hook. This trade-off is intentional for v1: it keeps the contract a simple
287
+ plain-text-SQL pipe and puts translation logic — and its safety — in your hook.
288
+
289
+ Worked example — a hook that maps a saved-query name to SQL:
290
+
291
+ ```python
292
+ # to_sql.py
293
+ import sys
294
+
295
+ QUERIES = {
296
+ "recent-posts": "SELECT title, author FROM posts ORDER BY _mtime DESC LIMIT 10",
297
+ }
298
+ name = sys.argv[1].strip() if len(sys.argv) > 1 else ""
299
+ # Fall back to an empty result rather than trusting arbitrary input.
300
+ print(QUERIES.get(name, "SELECT 1 WHERE 0"))
301
+ ```
302
+
303
+ ```bash
304
+ curl -s http://localhost:7117/query -d 'recent-posts' | jq
305
+ ```
306
+
307
+ See [Command execution](#command-execution) for the full contract (argv
308
+ splitting, injection safety, cwd, environment, timeout, and output framing).
309
+
245
310
  ### Full Example
246
311
 
247
312
  ```toml
@@ -263,8 +328,8 @@ glob = "logs/*.csv"
263
328
 
264
329
  ## Command execution
265
330
 
266
- Config keys that run an external command — today `on-file`, with more events to
267
- follow — share one execution contract:
331
+ Config keys that run an external command — today `on-file` and `pre-query`,
332
+ with more events to follow — share one execution contract:
268
333
 
269
334
  - **argv, not a shell.** The command string is split into an argv with
270
335
  shell-like quoting (spaces separate arguments; quotes group them), but **no
@@ -35,6 +35,15 @@ On error, the server returns a non-2xx status with a JSON body:
35
35
 
36
36
  Malformed SQL returns `400`. An unreadable or malformed config returns `503`; a *missing* config is not an error — the server serves the default `files` table.
37
37
 
38
+ ::: tip `pre-query` changes the body contract
39
+ When [`[dirsql].pre-query`](./config.md#rewriting-queries-pre-query) is
40
+ configured, the request body is **not** parsed as `{"sql": …}`. Instead the raw
41
+ body is passed verbatim to the hook command, which prints the SQL to run. A hook
42
+ that fails (non-zero exit, timeout, or spawn error) returns `500` with the
43
+ command's stderr tail. With no `pre-query` key, the `{"sql": …}` contract above
44
+ applies.
45
+ :::
46
+
38
47
  ```bash
39
48
  curl -s http://localhost:7117/query \
40
49
  -H 'content-type: application/json' \
@@ -8,7 +8,7 @@ use std::path::{Path, PathBuf};
8
8
  use std::process::ExitCode;
9
9
 
10
10
  use clap::{Args, Parser, Subcommand};
11
- use dirsql::cli::{AppState, ServerConfig, init::InitOptions, serve_with_state};
11
+ use dirsql::cli::{AppState, PreQuery, ServerConfig, init::InitOptions, serve_with_state};
12
12
  use dirsql::{DirSQL, Row, Table};
13
13
 
14
14
  #[derive(Debug, Parser)]
@@ -105,7 +105,10 @@ fn run_init(args: InitArgs) -> ExitCode {
105
105
 
106
106
  async fn run_server(cli: Cli) -> ExitCode {
107
107
  let state = load_state(&cli);
108
- let server_config = ServerConfig::bind(cli.host.clone(), cli.port);
108
+ let mut server_config = ServerConfig::bind(cli.host.clone(), cli.port);
109
+ if let Some(pre_query) = load_pre_query(&cli) {
110
+ server_config = server_config.with_pre_query(pre_query);
111
+ }
109
112
 
110
113
  let host = cli.host.clone();
111
114
  let handle = match serve_with_state(server_config, state).await {
@@ -158,6 +161,26 @@ fn load_state(cli: &Cli) -> AppState {
158
161
  }
159
162
  }
160
163
 
164
+ /// Extract the server-wide `pre-query` hook from the config, if any.
165
+ ///
166
+ /// Returns `None` when the config is absent, unresolvable, unparsable, or
167
+ /// declares no `pre-query` — the server then parses `POST /query` bodies as
168
+ /// `{"sql": …}` (the degraded / zero-config paths never get a hook). The
169
+ /// command's working directory is the config file's parent, mirroring the
170
+ /// `on-file` contract. Config resolution mirrors [`load_state`]: a config that
171
+ /// fails here also fails there (leaving the server degraded), so the hook is
172
+ /// simply skipped.
173
+ fn load_pre_query(cli: &Cli) -> Option<PreQuery> {
174
+ let config_path = &cli.config;
175
+ if !config_path.exists() {
176
+ return None;
177
+ }
178
+ let resolved = config_path.canonicalize().ok()?;
179
+ let command = dirsql::config::load_config(&resolved).ok()?.pre_query?;
180
+ let config_dir = resolved.parent()?.to_path_buf();
181
+ Some(PreQuery::new(command, config_dir))
182
+ }
183
+
161
184
  /// Zero-config fallback. When no `.dirsql.toml` is found, dirsql indexes the
162
185
  /// directory that would have held the config with a single default `files`
163
186
  /// table — one row per file, columns drawn entirely from filesystem facts —
@@ -19,6 +19,7 @@
19
19
  //! - [`serialize`] — row + event → JSON.
20
20
 
21
21
  use std::net::SocketAddr;
22
+ use std::path::PathBuf;
22
23
  use std::time::Duration;
23
24
 
24
25
  use tokio::sync::{oneshot, watch};
@@ -38,13 +39,40 @@ pub use server::{serve, serve_with_state};
38
39
  // Public types
39
40
  // ---------------------------------------------------------------------------
40
41
 
42
+ /// A server-wide `pre-query` command hook, carrying the command template plus
43
+ /// the directory it runs in (the config file's parent). When set on a
44
+ /// [`ServerConfig`], the server passes each `POST /query` request body to the
45
+ /// command as `{args}` and runs the plain-text SQL it prints. See
46
+ /// [`crate::command`] for the execution contract.
47
+ #[derive(Debug, Clone)]
48
+ pub struct PreQuery {
49
+ /// The command template (argv-split, no shell). Receives the raw request
50
+ /// body as the `{args}` placeholder.
51
+ pub command: String,
52
+ /// The command's working directory — the config file's parent.
53
+ pub config_dir: PathBuf,
54
+ }
55
+
56
+ impl PreQuery {
57
+ /// Build a [`PreQuery`] from a command template and its working directory.
58
+ pub fn new(command: impl Into<String>, config_dir: impl Into<PathBuf>) -> Self {
59
+ Self {
60
+ command: command.into(),
61
+ config_dir: config_dir.into(),
62
+ }
63
+ }
64
+ }
65
+
41
66
  /// Configure how the server binds. Defaults to `localhost:7117` with a
42
- /// 30-second per-query timeout.
67
+ /// 30-second per-query timeout and no `pre-query` hook.
43
68
  #[derive(Debug, Clone)]
44
69
  pub struct ServerConfig {
45
70
  pub host: String,
46
71
  pub port: u16,
47
72
  pub query_timeout: Duration,
73
+ /// Optional server-wide `pre-query` command. When `None` (the default),
74
+ /// `POST /query` parses its body as `{"sql": …}`.
75
+ pub pre_query: Option<PreQuery>,
48
76
  }
49
77
 
50
78
  impl ServerConfig {
@@ -55,6 +83,7 @@ impl ServerConfig {
55
83
  host: "localhost".into(),
56
84
  port: 0,
57
85
  query_timeout: Duration::from_secs(30),
86
+ pre_query: None,
58
87
  }
59
88
  }
60
89
 
@@ -64,6 +93,7 @@ impl ServerConfig {
64
93
  host: host.into(),
65
94
  port,
66
95
  query_timeout: Duration::from_secs(30),
96
+ pre_query: None,
67
97
  }
68
98
  }
69
99
 
@@ -73,6 +103,14 @@ impl ServerConfig {
73
103
  self.query_timeout = timeout;
74
104
  self
75
105
  }
106
+
107
+ /// Attach a server-wide [`PreQuery`] hook. With it set, `POST /query`
108
+ /// passes the raw request body to the command and runs the SQL it prints
109
+ /// instead of parsing the body as `{"sql": …}`.
110
+ pub fn with_pre_query(mut self, pre_query: PreQuery) -> Self {
111
+ self.pre_query = Some(pre_query);
112
+ self
113
+ }
76
114
  }
77
115
 
78
116
  impl Default for ServerConfig {
@@ -175,5 +213,23 @@ mod tests {
175
213
  assert_eq!(cfg.host, "localhost");
176
214
  assert_eq!(cfg.port, 7117);
177
215
  assert_eq!(cfg.query_timeout, Duration::from_secs(30));
216
+ assert!(cfg.pre_query.is_none());
217
+ }
218
+
219
+ #[test]
220
+ fn pre_query_constructor_carries_command_and_dir() {
221
+ // `PreQuery::new` is pure data plumbing: the command template and the
222
+ // working directory it will run in.
223
+ let pq = PreQuery::new("to_sql.py {args}", "/proj");
224
+ assert_eq!(pq.command, "to_sql.py {args}");
225
+ assert_eq!(pq.config_dir, PathBuf::from("/proj"));
226
+ }
227
+
228
+ #[test]
229
+ fn with_pre_query_sets_the_hook() {
230
+ let cfg = ServerConfig::ephemeral().with_pre_query(PreQuery::new("cmd {args}", "/proj"));
231
+ let pq = cfg.pre_query.expect("hook must be set");
232
+ assert_eq!(pq.command, "cmd {args}");
233
+ assert_eq!(pq.config_dir, PathBuf::from("/proj"));
178
234
  }
179
235
  }
@@ -16,15 +16,25 @@ use serde_json::json;
16
16
  use tokio::sync::{broadcast, watch};
17
17
  use tokio_stream::wrappers::BroadcastStream;
18
18
 
19
- use super::AppState;
20
19
  use super::serialize::rows_to_json;
20
+ use super::{AppState, PreQuery};
21
+ use crate::command::{Placeholder, run_command};
21
22
  use crate::{DirSQL, DirSqlError};
22
23
 
24
+ /// Fixed timeout for a server-wide `pre-query` command. There is no override
25
+ /// key yet; this module constant is the documented current default (mirrors
26
+ /// `on-file`'s `ON_FILE_TIMEOUT`).
27
+ const PRE_QUERY_TIMEOUT: Duration = Duration::from_secs(30);
28
+
23
29
  pub(super) struct AppContext {
24
30
  pub state: AppState,
25
31
  pub events: broadcast::Sender<String>,
26
32
  pub cancel: watch::Receiver<bool>,
27
33
  pub query_timeout: Duration,
34
+ /// Optional server-wide `pre-query` hook. When `Some`, `POST /query`
35
+ /// rewrites the request body through the command; when `None`, the body
36
+ /// is parsed as `{"sql": …}`.
37
+ pub pre_query: Option<PreQuery>,
28
38
  }
29
39
 
30
40
  pub(super) type SharedCtx = Arc<AppContext>;
@@ -47,19 +57,18 @@ struct QueryBody {
47
57
  sql: Option<String>,
48
58
  }
49
59
 
50
- async fn handle_query(
51
- State(ctx): State<SharedCtx>,
52
- body: Result<Json<QueryBody>, axum::extract::rejection::JsonRejection>,
53
- ) -> Response {
54
- let Json(body) = match body {
55
- Ok(body) => body,
56
- Err(rej) => return error_response(StatusCode::BAD_REQUEST, rej.body_text()),
57
- };
58
-
59
- let sql = match body.sql.as_deref().map(str::trim) {
60
- Some(s) if !s.is_empty() => s.to_string(),
61
- Some(_) => return error_response(StatusCode::BAD_REQUEST, "`sql` must not be empty"),
62
- None => return error_response(StatusCode::BAD_REQUEST, "missing `sql` field"),
60
+ async fn handle_query(State(ctx): State<SharedCtx>, body: String) -> Response {
61
+ // Resolve the SQL to run. With a `pre-query` hook the raw body is rewritten
62
+ // by the command; without one it is parsed as `{"sql": …}` (today's path).
63
+ let sql = match &ctx.pre_query {
64
+ Some(pq) => match run_pre_query(pq, body).await {
65
+ Ok(sql) => sql,
66
+ Err(resp) => return resp,
67
+ },
68
+ None => match parse_sql_body(&body) {
69
+ Ok(sql) => sql,
70
+ Err(resp) => return resp,
71
+ },
63
72
  };
64
73
 
65
74
  let db = match require_ready(&ctx.state) {
@@ -87,6 +96,64 @@ async fn handle_query(
87
96
  }
88
97
  }
89
98
 
99
+ /// Parse a `POST /query` body as `{"sql": …}` and return the trimmed SQL.
100
+ /// Reproduces the pre-hook behavior: 400 on malformed JSON, 400 on a
101
+ /// missing/empty `sql` field.
102
+ ///
103
+ /// `Response` is large (clippy flags the error variant), but returning it
104
+ /// directly matches the axum handler contract and avoids boxing on the hot
105
+ /// path — same trade-off as [`require_ready`].
106
+ #[allow(clippy::result_large_err)]
107
+ fn parse_sql_body(body: &str) -> Result<String, Response> {
108
+ let parsed: QueryBody = serde_json::from_str(body)
109
+ .map_err(|err| error_response(StatusCode::BAD_REQUEST, err.to_string()))?;
110
+ match parsed.sql.as_deref().map(str::trim) {
111
+ Some(s) if !s.is_empty() => Ok(s.to_string()),
112
+ Some(_) => Err(error_response(
113
+ StatusCode::BAD_REQUEST,
114
+ "`sql` must not be empty",
115
+ )),
116
+ None => Err(error_response(
117
+ StatusCode::BAD_REQUEST,
118
+ "missing `sql` field",
119
+ )),
120
+ }
121
+ }
122
+
123
+ /// Run the server-wide `pre-query` hook over the raw request body and return
124
+ /// the SQL it prints. The body is passed as the injection-safe `{args}`
125
+ /// placeholder (a single argv token); the command's last non-empty stdout line
126
+ /// is the SQL to run. Any failure (non-zero exit, timeout, spawn error) maps to
127
+ /// `500` carrying the command's stderr tail.
128
+ ///
129
+ /// `Response` is large (see [`parse_sql_body`]); returned by value for the same
130
+ /// reason.
131
+ #[allow(clippy::result_large_err)]
132
+ async fn run_pre_query(pq: &PreQuery, raw_body: String) -> Result<String, Response> {
133
+ let command = pq.command.clone();
134
+ let config_dir = pq.config_dir.clone();
135
+ // `run_command` is blocking — it spawns a child and joins drain threads —
136
+ // so run it off the async runtime. It enforces `PRE_QUERY_TIMEOUT`
137
+ // internally, so no outer `tokio::time::timeout` is needed.
138
+ let outcome = tokio::task::spawn_blocking(move || {
139
+ run_command(
140
+ &command,
141
+ &[Placeholder::new("args", &raw_body)],
142
+ &config_dir,
143
+ PRE_QUERY_TIMEOUT,
144
+ None,
145
+ )
146
+ })
147
+ .await
148
+ .map_err(|join_err| error_response(StatusCode::INTERNAL_SERVER_ERROR, join_err.to_string()))?;
149
+
150
+ // `run_command` only returns `Ok` with a non-empty last stdout line
151
+ // (`EmptyOutput` otherwise), so the payload is the SQL as-is.
152
+ outcome
153
+ .map(|out| out.payload)
154
+ .map_err(|err| error_response(StatusCode::INTERNAL_SERVER_ERROR, err.to_string()))
155
+ }
156
+
90
157
  async fn handle_events(State(ctx): State<SharedCtx>) -> Response {
91
158
  if let Err(resp) = require_ready(&ctx.state) {
92
159
  return resp;
@@ -48,6 +48,7 @@ pub async fn serve_with_state(
48
48
  events: event_tx,
49
49
  cancel: cancel_rx,
50
50
  query_timeout: config.query_timeout,
51
+ pre_query: config.pre_query,
51
52
  });
52
53
  let app = router(shared);
53
54
 
@@ -17,7 +17,7 @@ pub enum ConfigError {
17
17
  #[error("Missing required field '{0}' in [[dirsql.extension]] entry")]
18
18
  MissingExtensionField(&'static str),
19
19
 
20
- #[error("Field '{0}' in [[table]] entry must not be empty")]
20
+ #[error("Field '{0}' must not be empty")]
21
21
  EmptyField(&'static str),
22
22
  }
23
23
 
@@ -44,6 +44,13 @@ pub struct Config {
44
44
  /// relative paths are resolved against the config file's parent directory
45
45
  /// by the caller (`DirSQLBuilder::resolve`).
46
46
  pub extensions: Vec<ExtensionSpec>,
47
+ /// Optional server-wide `pre-query` command (`[dirsql].pre-query`). When
48
+ /// set, the HTTP server passes each `POST /query` request body to this
49
+ /// command as `{args}` and runs the plain-text SQL it prints, instead of
50
+ /// parsing the body as `{"sql": …}`. See `dirsql::command` for the
51
+ /// execution contract. Only the CLI server consults this; the SDK ignores
52
+ /// it.
53
+ pub pre_query: Option<String>,
47
54
  }
48
55
 
49
56
  /// A SQLite extension to load at startup.
@@ -100,6 +107,8 @@ struct RawDirsql {
100
107
  persist: Option<bool>,
101
108
  persist_path: Option<PathBuf>,
102
109
  extension: Option<Vec<RawExtension>>,
110
+ #[serde(rename = "pre-query")]
111
+ pre_query: Option<String>,
103
112
  }
104
113
 
105
114
  #[derive(Deserialize)]
@@ -127,15 +136,26 @@ pub fn load_config(path: &Path) -> Result<Config> {
127
136
  pub fn load_config_str(content: &str) -> Result<Config> {
128
137
  let raw: RawConfig = toml::from_str(content)?;
129
138
 
130
- let (root, ignore, persist, persist_path, raw_extensions) = match raw.dirsql {
139
+ let (root, ignore, persist, persist_path, raw_extensions, raw_pre_query) = match raw.dirsql {
131
140
  Some(d) => (
132
141
  d.root,
133
142
  d.ignore.unwrap_or_default(),
134
143
  d.persist.unwrap_or(false),
135
144
  d.persist_path,
136
145
  d.extension.unwrap_or_default(),
146
+ d.pre_query,
137
147
  ),
138
- None => (None, Vec::new(), false, None, Vec::new()),
148
+ None => (None, Vec::new(), false, None, Vec::new(), None),
149
+ };
150
+
151
+ // A present-but-empty `pre-query = ""` is as unusable as a missing key:
152
+ // reject it at parse time rather than spawning an empty command later
153
+ // (mirrors the `on-file` handling below).
154
+ let pre_query = match raw_pre_query {
155
+ Some(cmd) if cmd.trim().is_empty() => {
156
+ return Err(ConfigError::EmptyField("pre-query"));
157
+ }
158
+ other => other,
139
159
  };
140
160
 
141
161
  let mut extensions = Vec::with_capacity(raw_extensions.len());
@@ -183,6 +203,7 @@ pub fn load_config_str(content: &str) -> Result<Config> {
183
203
  persist,
184
204
  persist_path,
185
205
  extensions,
206
+ pre_query,
186
207
  })
187
208
  }
188
209
 
@@ -506,6 +527,47 @@ on-file = " "
506
527
  );
507
528
  }
508
529
 
530
+ #[test]
531
+ fn pre_query_parses_when_present() {
532
+ let toml = r#"
533
+ [dirsql]
534
+ pre-query = "uv run python to_sql.py {args}"
535
+
536
+ [[table]]
537
+ ddl = "CREATE TABLE t (_path TEXT)"
538
+ glob = "*.json"
539
+ "#;
540
+ let config = load_config_str(toml).unwrap();
541
+ assert_eq!(
542
+ config.pre_query.as_deref(),
543
+ Some("uv run python to_sql.py {args}")
544
+ );
545
+ }
546
+
547
+ #[test]
548
+ fn pre_query_absent_is_none() {
549
+ let toml = r#"
550
+ [[table]]
551
+ ddl = "CREATE TABLE t (_path TEXT)"
552
+ glob = "*.json"
553
+ "#;
554
+ let config = load_config_str(toml).unwrap();
555
+ assert!(config.pre_query.is_none());
556
+ }
557
+
558
+ #[test]
559
+ fn pre_query_empty_errors() {
560
+ let toml = r#"
561
+ [dirsql]
562
+ pre-query = " "
563
+ "#;
564
+ let err = load_config_str(toml).unwrap_err();
565
+ assert!(
566
+ matches!(err, ConfigError::EmptyField("pre-query")),
567
+ "got: {err:?}"
568
+ );
569
+ }
570
+
509
571
  #[test]
510
572
  fn extension_empty_path_errors() {
511
573
  // An empty `path = ""` is as unusable as a missing key — it must be
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes