dirsql 0.3.44__tar.gz → 0.3.45__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dirsql-0.3.44 → dirsql-0.3.45}/Cargo.lock +1 -1
- {dirsql-0.3.44 → dirsql-0.3.45}/PKG-INFO +1 -1
- {dirsql-0.3.44/packages/python → dirsql-0.3.45}/docs/cli/config.md +82 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/python/Cargo.toml +1 -1
- {dirsql-0.3.44 → dirsql-0.3.45/packages/python}/docs/cli/config.md +82 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/rust/docs/cli/config.md +82 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/rust/src/config.rs +62 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/rust/src/lib.rs +213 -19
- {dirsql-0.3.44 → dirsql-0.3.45}/Cargo.toml +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/README.md +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/dirsql/__init__.py +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/dirsql/_async.py +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/dirsql/_dirsql.pyi +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/dirsql/cli/__init__.py +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/dirsql/cli/binary_path.py +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/dirsql/cli/interpret/__init__.py +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/dirsql/cli/is_windows.py +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/dirsql/cli/main.py +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/dirsql/py.typed +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/docs/.claude/CLAUDE.md +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/docs/.vitepress/config.ts +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/docs/.vitepress/theme/index.ts +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/docs/.vitepress/theme/lang.ts +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/docs/AGENTS.md +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/docs/api/index.md +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/docs/cli/http-api.md +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/docs/cli/index.md +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/docs/cli/init.md +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/docs/cli/server.md +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/docs/getting-started.md +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/docs/guide/async.md +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/docs/guide/crdt.md +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/docs/guide/persistence.md +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/docs/guide/querying.md +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/docs/guide/tables.md +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/docs/guide/watching.md +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/docs/index.md +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/docs/migrations.md +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/docs/package.json +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/docs/playwright.config.ts +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/docs/pnpm-lock.yaml +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/docs/pnpm-workspace.yaml +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/docs/tests/integration/home.spec.ts +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/docs/tests/integration/language-flag.spec.ts +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/docs/tests/integration/sidebar.spec.ts +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/docs/tests/unit/config.test.ts +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/docs/tests/unit/lang.test.ts +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/docs/vitest.config.ts +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/python/README.md +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/python/conftest.py +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/python/docs/.claude/CLAUDE.md +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/python/docs/.vitepress/config.ts +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/python/docs/.vitepress/theme/index.ts +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/python/docs/.vitepress/theme/lang.ts +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/python/docs/AGENTS.md +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/python/docs/api/index.md +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/python/docs/cli/http-api.md +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/python/docs/cli/index.md +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/python/docs/cli/init.md +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/python/docs/cli/server.md +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/python/docs/getting-started.md +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/python/docs/guide/async.md +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/python/docs/guide/crdt.md +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/python/docs/guide/persistence.md +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/python/docs/guide/querying.md +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/python/docs/guide/tables.md +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/python/docs/guide/watching.md +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/python/docs/index.md +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/python/docs/migrations.md +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/python/docs/package.json +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/python/docs/playwright.config.ts +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/python/docs/pnpm-lock.yaml +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/python/docs/pnpm-workspace.yaml +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/python/docs/tests/integration/home.spec.ts +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/python/docs/tests/integration/language-flag.spec.ts +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/python/docs/tests/integration/sidebar.spec.ts +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/python/docs/tests/unit/config.test.ts +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/python/docs/tests/unit/lang.test.ts +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/python/docs/vitest.config.ts +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/python/e2e-attestation.json +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/python/src/lib.rs +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/python/tests/__init__.py +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/python/tests/conftest.py +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/python/tests/e2e/__init__.py +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/python/tests/integration/__init__.py +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/rust/Cargo.toml +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/rust/README.md +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/rust/benches/db_bench.rs +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/rust/benches/differ_bench.rs +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/rust/benches/matcher_bench.rs +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/rust/benches/scanner_bench.rs +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/rust/docs/api/index.md +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/rust/docs/cli/http-api.md +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/rust/docs/cli/index.md +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/rust/docs/cli/init.md +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/rust/docs/cli/server.md +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/rust/docs/getting-started.md +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/rust/docs/guide/async.md +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/rust/docs/guide/crdt.md +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/rust/docs/guide/persistence.md +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/rust/docs/guide/querying.md +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/rust/docs/guide/tables.md +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/rust/docs/guide/watching.md +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/rust/docs/index.md +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/rust/docs/migrations.md +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/rust/src/bin/dirsql.rs +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/rust/src/cli/init.rs +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/rust/src/cli/mod.rs +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/rust/src/cli/router.rs +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/rust/src/cli/serialize.rs +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/rust/src/cli/server.rs +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/rust/src/command.rs +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/rust/src/db.rs +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/rust/src/differ.rs +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/rust/src/matcher.rs +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/rust/src/persist.rs +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/rust/src/scanner.rs +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/packages/rust/src/watcher.rs +0 -0
- {dirsql-0.3.44 → dirsql-0.3.45}/pyproject.toml +0 -0
|
@@ -191,6 +191,57 @@ always filtered to the DDL's declared columns regardless. Strict mode
|
|
|
191
191
|
applies only to keys produced by an extract callback (relevant for
|
|
192
192
|
programmatic [tables](../guide/tables.md)).
|
|
193
193
|
|
|
194
|
+
### Per-file commands (`on-file`)
|
|
195
|
+
|
|
196
|
+
Reach for `on-file` when a table's rows come from the *contents* of each
|
|
197
|
+
matched file, not just its path and stat metadata. A filesystem-fact table
|
|
198
|
+
gives you one row per file; `on-file` runs a command per file that reads the
|
|
199
|
+
file and emits as many rows as it likes.
|
|
200
|
+
|
|
201
|
+
```toml
|
|
202
|
+
[[table]]
|
|
203
|
+
ddl = "CREATE TABLE papers (paper_id TEXT, title TEXT)"
|
|
204
|
+
glob = "**/meta.json"
|
|
205
|
+
on-file = "uv run python extract_papers.py {path}"
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
For every file matched by `glob`, `dirsql` runs the command. **The command
|
|
209
|
+
reads the file itself and prints a JSON array of row objects on stdout**; each
|
|
210
|
+
object becomes one row, its fields mapped to columns:
|
|
211
|
+
|
|
212
|
+
```json
|
|
213
|
+
[
|
|
214
|
+
{ "paper_id": "arXiv:2401.001", "title": "On Directories" },
|
|
215
|
+
{ "paper_id": "arXiv:2401.002", "title": "SQL All The Way Down" }
|
|
216
|
+
]
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
Placeholders substituted into the command:
|
|
220
|
+
|
|
221
|
+
| Placeholder | Value |
|
|
222
|
+
|-------------|-------|
|
|
223
|
+
| `{path}` | The matched file's path **relative to the index root**. Appended automatically when the command omits it, so `extract.py` and `extract.py {path}` behave identically. |
|
|
224
|
+
| `{abspath}` | The matched file's absolute path. |
|
|
225
|
+
| `{root}` | The index root directory. |
|
|
226
|
+
|
|
227
|
+
Filesystem facts (stat virtuals and glob captures) are still merged onto every
|
|
228
|
+
`on-file` row, so you can declare `_path`, `_basename`, `{capture}`, etc. in the
|
|
229
|
+
DDL alongside the command's own columns — a column emitted by the command wins
|
|
230
|
+
over a same-named filesystem fact.
|
|
231
|
+
|
|
232
|
+
JSON values map to SQLite as follows: `null` → NULL; `true`/`false` → `1`/`0`;
|
|
233
|
+
an integer → INTEGER, any other number → REAL; a string → TEXT; a nested array
|
|
234
|
+
or object → its JSON text as TEXT.
|
|
235
|
+
|
|
236
|
+
**Per-file error isolation.** If a file's command fails — a non-zero exit, a
|
|
237
|
+
timeout, a spawn error, or output that isn't a JSON array of objects — that
|
|
238
|
+
file is skipped (it contributes no rows) and a one-line warning naming the file
|
|
239
|
+
and the error is written to stderr. One bad file never aborts the scan; the
|
|
240
|
+
other files' rows are indexed normally.
|
|
241
|
+
|
|
242
|
+
See [Command execution](#command-execution) for the full contract (argv
|
|
243
|
+
splitting, injection safety, cwd, environment, timeout, and output framing).
|
|
244
|
+
|
|
194
245
|
### Full Example
|
|
195
246
|
|
|
196
247
|
```toml
|
|
@@ -209,3 +260,34 @@ glob = "**/index.md"
|
|
|
209
260
|
ddl = "CREATE TABLE logs (_path TEXT, _size INTEGER, _mtime INTEGER)"
|
|
210
261
|
glob = "logs/*.csv"
|
|
211
262
|
```
|
|
263
|
+
|
|
264
|
+
## Command execution
|
|
265
|
+
|
|
266
|
+
Config keys that run an external command — today `on-file`, with more events to
|
|
267
|
+
follow — share one execution contract:
|
|
268
|
+
|
|
269
|
+
- **argv, not a shell.** The command string is split into an argv with
|
|
270
|
+
shell-like quoting (spaces separate arguments; quotes group them), but **no
|
|
271
|
+
shell is invoked** — there is no globbing, piping, `$VAR` expansion, or
|
|
272
|
+
`&&`/`;` chaining. To get those, ask for a shell explicitly:
|
|
273
|
+
`sh -c 'grep foo {path} | sort'` — the quoted script stays a single argument.
|
|
274
|
+
- **Injection-safe placeholders.** Each placeholder (`{path}`, `{abspath}`,
|
|
275
|
+
`{root}`, …) is substituted into whole argv tokens, every occurrence, in a
|
|
276
|
+
single left-to-right pass. A substituted value is always exactly one argv
|
|
277
|
+
element, so a path with spaces — or untrusted content that itself contains
|
|
278
|
+
`{…}` or shell metacharacters — is inert and never re-scanned. An unknown
|
|
279
|
+
`{…}` is left literal.
|
|
280
|
+
- **Working directory.** The command runs in the **config file's directory**,
|
|
281
|
+
so relative paths in the command resolve predictably regardless of where you
|
|
282
|
+
launched `dirsql`.
|
|
283
|
+
- **Environment.** The command inherits `dirsql`'s environment, so tools like
|
|
284
|
+
`uvx --with …` / `npx …` resolve their dependencies as usual.
|
|
285
|
+
- **Output framing.** The command's result is the **last non-empty line of
|
|
286
|
+
stdout**; any log/chatter lines above it are ignored. stderr is never data —
|
|
287
|
+
it is captured only to enrich error messages.
|
|
288
|
+
- **Timeout.** Each command run is bounded by a fixed **30-second** timeout (no
|
|
289
|
+
per-table override yet); a command that exceeds it is killed and treated as a
|
|
290
|
+
failure.
|
|
291
|
+
- **Errors.** A non-zero exit, a timeout, a spawn failure, or output that does
|
|
292
|
+
not parse as expected is a per-file failure: the file is skipped with a
|
|
293
|
+
stderr warning and the scan continues.
|
|
@@ -4,7 +4,7 @@ name = "dirsql-py-ext"
|
|
|
4
4
|
# pypi/maturin handler can rewrite it via `write-version` before
|
|
5
5
|
# `maturin build`. `pyproject.toml` declares `dynamic = ["version"]`
|
|
6
6
|
# and maturin reads this field. Mirrors `packages/rust/Cargo.toml`.
|
|
7
|
-
version = "0.3.
|
|
7
|
+
version = "0.3.45"
|
|
8
8
|
edition.workspace = true
|
|
9
9
|
publish = false
|
|
10
10
|
readme = "README.md"
|
|
@@ -191,6 +191,57 @@ always filtered to the DDL's declared columns regardless. Strict mode
|
|
|
191
191
|
applies only to keys produced by an extract callback (relevant for
|
|
192
192
|
programmatic [tables](../guide/tables.md)).
|
|
193
193
|
|
|
194
|
+
### Per-file commands (`on-file`)
|
|
195
|
+
|
|
196
|
+
Reach for `on-file` when a table's rows come from the *contents* of each
|
|
197
|
+
matched file, not just its path and stat metadata. A filesystem-fact table
|
|
198
|
+
gives you one row per file; `on-file` runs a command per file that reads the
|
|
199
|
+
file and emits as many rows as it likes.
|
|
200
|
+
|
|
201
|
+
```toml
|
|
202
|
+
[[table]]
|
|
203
|
+
ddl = "CREATE TABLE papers (paper_id TEXT, title TEXT)"
|
|
204
|
+
glob = "**/meta.json"
|
|
205
|
+
on-file = "uv run python extract_papers.py {path}"
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
For every file matched by `glob`, `dirsql` runs the command. **The command
|
|
209
|
+
reads the file itself and prints a JSON array of row objects on stdout**; each
|
|
210
|
+
object becomes one row, its fields mapped to columns:
|
|
211
|
+
|
|
212
|
+
```json
|
|
213
|
+
[
|
|
214
|
+
{ "paper_id": "arXiv:2401.001", "title": "On Directories" },
|
|
215
|
+
{ "paper_id": "arXiv:2401.002", "title": "SQL All The Way Down" }
|
|
216
|
+
]
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
Placeholders substituted into the command:
|
|
220
|
+
|
|
221
|
+
| Placeholder | Value |
|
|
222
|
+
|-------------|-------|
|
|
223
|
+
| `{path}` | The matched file's path **relative to the index root**. Appended automatically when the command omits it, so `extract.py` and `extract.py {path}` behave identically. |
|
|
224
|
+
| `{abspath}` | The matched file's absolute path. |
|
|
225
|
+
| `{root}` | The index root directory. |
|
|
226
|
+
|
|
227
|
+
Filesystem facts (stat virtuals and glob captures) are still merged onto every
|
|
228
|
+
`on-file` row, so you can declare `_path`, `_basename`, `{capture}`, etc. in the
|
|
229
|
+
DDL alongside the command's own columns — a column emitted by the command wins
|
|
230
|
+
over a same-named filesystem fact.
|
|
231
|
+
|
|
232
|
+
JSON values map to SQLite as follows: `null` → NULL; `true`/`false` → `1`/`0`;
|
|
233
|
+
an integer → INTEGER, any other number → REAL; a string → TEXT; a nested array
|
|
234
|
+
or object → its JSON text as TEXT.
|
|
235
|
+
|
|
236
|
+
**Per-file error isolation.** If a file's command fails — a non-zero exit, a
|
|
237
|
+
timeout, a spawn error, or output that isn't a JSON array of objects — that
|
|
238
|
+
file is skipped (it contributes no rows) and a one-line warning naming the file
|
|
239
|
+
and the error is written to stderr. One bad file never aborts the scan; the
|
|
240
|
+
other files' rows are indexed normally.
|
|
241
|
+
|
|
242
|
+
See [Command execution](#command-execution) for the full contract (argv
|
|
243
|
+
splitting, injection safety, cwd, environment, timeout, and output framing).
|
|
244
|
+
|
|
194
245
|
### Full Example
|
|
195
246
|
|
|
196
247
|
```toml
|
|
@@ -209,3 +260,34 @@ glob = "**/index.md"
|
|
|
209
260
|
ddl = "CREATE TABLE logs (_path TEXT, _size INTEGER, _mtime INTEGER)"
|
|
210
261
|
glob = "logs/*.csv"
|
|
211
262
|
```
|
|
263
|
+
|
|
264
|
+
## Command execution
|
|
265
|
+
|
|
266
|
+
Config keys that run an external command — today `on-file`, with more events to
|
|
267
|
+
follow — share one execution contract:
|
|
268
|
+
|
|
269
|
+
- **argv, not a shell.** The command string is split into an argv with
|
|
270
|
+
shell-like quoting (spaces separate arguments; quotes group them), but **no
|
|
271
|
+
shell is invoked** — there is no globbing, piping, `$VAR` expansion, or
|
|
272
|
+
`&&`/`;` chaining. To get those, ask for a shell explicitly:
|
|
273
|
+
`sh -c 'grep foo {path} | sort'` — the quoted script stays a single argument.
|
|
274
|
+
- **Injection-safe placeholders.** Each placeholder (`{path}`, `{abspath}`,
|
|
275
|
+
`{root}`, …) is substituted into whole argv tokens, every occurrence, in a
|
|
276
|
+
single left-to-right pass. A substituted value is always exactly one argv
|
|
277
|
+
element, so a path with spaces — or untrusted content that itself contains
|
|
278
|
+
`{…}` or shell metacharacters — is inert and never re-scanned. An unknown
|
|
279
|
+
`{…}` is left literal.
|
|
280
|
+
- **Working directory.** The command runs in the **config file's directory**,
|
|
281
|
+
so relative paths in the command resolve predictably regardless of where you
|
|
282
|
+
launched `dirsql`.
|
|
283
|
+
- **Environment.** The command inherits `dirsql`'s environment, so tools like
|
|
284
|
+
`uvx --with …` / `npx …` resolve their dependencies as usual.
|
|
285
|
+
- **Output framing.** The command's result is the **last non-empty line of
|
|
286
|
+
stdout**; any log/chatter lines above it are ignored. stderr is never data —
|
|
287
|
+
it is captured only to enrich error messages.
|
|
288
|
+
- **Timeout.** Each command run is bounded by a fixed **30-second** timeout (no
|
|
289
|
+
per-table override yet); a command that exceeds it is killed and treated as a
|
|
290
|
+
failure.
|
|
291
|
+
- **Errors.** A non-zero exit, a timeout, a spawn failure, or output that does
|
|
292
|
+
not parse as expected is a per-file failure: the file is skipped with a
|
|
293
|
+
stderr warning and the scan continues.
|
|
@@ -191,6 +191,57 @@ always filtered to the DDL's declared columns regardless. Strict mode
|
|
|
191
191
|
applies only to keys produced by an extract callback (relevant for
|
|
192
192
|
programmatic [tables](../guide/tables.md)).
|
|
193
193
|
|
|
194
|
+
### Per-file commands (`on-file`)
|
|
195
|
+
|
|
196
|
+
Reach for `on-file` when a table's rows come from the *contents* of each
|
|
197
|
+
matched file, not just its path and stat metadata. A filesystem-fact table
|
|
198
|
+
gives you one row per file; `on-file` runs a command per file that reads the
|
|
199
|
+
file and emits as many rows as it likes.
|
|
200
|
+
|
|
201
|
+
```toml
|
|
202
|
+
[[table]]
|
|
203
|
+
ddl = "CREATE TABLE papers (paper_id TEXT, title TEXT)"
|
|
204
|
+
glob = "**/meta.json"
|
|
205
|
+
on-file = "uv run python extract_papers.py {path}"
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
For every file matched by `glob`, `dirsql` runs the command. **The command
|
|
209
|
+
reads the file itself and prints a JSON array of row objects on stdout**; each
|
|
210
|
+
object becomes one row, its fields mapped to columns:
|
|
211
|
+
|
|
212
|
+
```json
|
|
213
|
+
[
|
|
214
|
+
{ "paper_id": "arXiv:2401.001", "title": "On Directories" },
|
|
215
|
+
{ "paper_id": "arXiv:2401.002", "title": "SQL All The Way Down" }
|
|
216
|
+
]
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
Placeholders substituted into the command:
|
|
220
|
+
|
|
221
|
+
| Placeholder | Value |
|
|
222
|
+
|-------------|-------|
|
|
223
|
+
| `{path}` | The matched file's path **relative to the index root**. Appended automatically when the command omits it, so `extract.py` and `extract.py {path}` behave identically. |
|
|
224
|
+
| `{abspath}` | The matched file's absolute path. |
|
|
225
|
+
| `{root}` | The index root directory. |
|
|
226
|
+
|
|
227
|
+
Filesystem facts (stat virtuals and glob captures) are still merged onto every
|
|
228
|
+
`on-file` row, so you can declare `_path`, `_basename`, `{capture}`, etc. in the
|
|
229
|
+
DDL alongside the command's own columns — a column emitted by the command wins
|
|
230
|
+
over a same-named filesystem fact.
|
|
231
|
+
|
|
232
|
+
JSON values map to SQLite as follows: `null` → NULL; `true`/`false` → `1`/`0`;
|
|
233
|
+
an integer → INTEGER, any other number → REAL; a string → TEXT; a nested array
|
|
234
|
+
or object → its JSON text as TEXT.
|
|
235
|
+
|
|
236
|
+
**Per-file error isolation.** If a file's command fails — a non-zero exit, a
|
|
237
|
+
timeout, a spawn error, or output that isn't a JSON array of objects — that
|
|
238
|
+
file is skipped (it contributes no rows) and a one-line warning naming the file
|
|
239
|
+
and the error is written to stderr. One bad file never aborts the scan; the
|
|
240
|
+
other files' rows are indexed normally.
|
|
241
|
+
|
|
242
|
+
See [Command execution](#command-execution) for the full contract (argv
|
|
243
|
+
splitting, injection safety, cwd, environment, timeout, and output framing).
|
|
244
|
+
|
|
194
245
|
### Full Example
|
|
195
246
|
|
|
196
247
|
```toml
|
|
@@ -209,3 +260,34 @@ glob = "**/index.md"
|
|
|
209
260
|
ddl = "CREATE TABLE logs (_path TEXT, _size INTEGER, _mtime INTEGER)"
|
|
210
261
|
glob = "logs/*.csv"
|
|
211
262
|
```
|
|
263
|
+
|
|
264
|
+
## Command execution
|
|
265
|
+
|
|
266
|
+
Config keys that run an external command — today `on-file`, with more events to
|
|
267
|
+
follow — share one execution contract:
|
|
268
|
+
|
|
269
|
+
- **argv, not a shell.** The command string is split into an argv with
|
|
270
|
+
shell-like quoting (spaces separate arguments; quotes group them), but **no
|
|
271
|
+
shell is invoked** — there is no globbing, piping, `$VAR` expansion, or
|
|
272
|
+
`&&`/`;` chaining. To get those, ask for a shell explicitly:
|
|
273
|
+
`sh -c 'grep foo {path} | sort'` — the quoted script stays a single argument.
|
|
274
|
+
- **Injection-safe placeholders.** Each placeholder (`{path}`, `{abspath}`,
|
|
275
|
+
`{root}`, …) is substituted into whole argv tokens, every occurrence, in a
|
|
276
|
+
single left-to-right pass. A substituted value is always exactly one argv
|
|
277
|
+
element, so a path with spaces — or untrusted content that itself contains
|
|
278
|
+
`{…}` or shell metacharacters — is inert and never re-scanned. An unknown
|
|
279
|
+
`{…}` is left literal.
|
|
280
|
+
- **Working directory.** The command runs in the **config file's directory**,
|
|
281
|
+
so relative paths in the command resolve predictably regardless of where you
|
|
282
|
+
launched `dirsql`.
|
|
283
|
+
- **Environment.** The command inherits `dirsql`'s environment, so tools like
|
|
284
|
+
`uvx --with …` / `npx …` resolve their dependencies as usual.
|
|
285
|
+
- **Output framing.** The command's result is the **last non-empty line of
|
|
286
|
+
stdout**; any log/chatter lines above it are ignored. stderr is never data —
|
|
287
|
+
it is captured only to enrich error messages.
|
|
288
|
+
- **Timeout.** Each command run is bounded by a fixed **30-second** timeout (no
|
|
289
|
+
per-table override yet); a command that exceeds it is killed and treated as a
|
|
290
|
+
failure.
|
|
291
|
+
- **Errors.** A non-zero exit, a timeout, a spawn failure, or output that does
|
|
292
|
+
not parse as expected is a per-file failure: the file is skipped with a
|
|
293
|
+
stderr warning and the scan continues.
|
|
@@ -16,6 +16,9 @@ pub enum ConfigError {
|
|
|
16
16
|
|
|
17
17
|
#[error("Missing required field '{0}' in [[dirsql.extension]] entry")]
|
|
18
18
|
MissingExtensionField(&'static str),
|
|
19
|
+
|
|
20
|
+
#[error("Field '{0}' in [[table]] entry must not be empty")]
|
|
21
|
+
EmptyField(&'static str),
|
|
19
22
|
}
|
|
20
23
|
|
|
21
24
|
pub type Result<T> = std::result::Result<T, ConfigError>;
|
|
@@ -75,6 +78,11 @@ pub struct TableConfig {
|
|
|
75
78
|
pub ddl: String,
|
|
76
79
|
pub glob: String,
|
|
77
80
|
pub strict: Option<bool>,
|
|
81
|
+
/// Optional per-file command (`on-file`). When set, each matched file's
|
|
82
|
+
/// rows come from running this command (which reads the file and prints a
|
|
83
|
+
/// JSON array of row objects) instead of the empty filesystem-facts-only
|
|
84
|
+
/// row. See `dirsql::command` for the execution contract.
|
|
85
|
+
pub on_file: Option<String>,
|
|
78
86
|
}
|
|
79
87
|
|
|
80
88
|
// --- Raw deserialization types (serde) ---
|
|
@@ -105,6 +113,8 @@ struct RawTable {
|
|
|
105
113
|
ddl: Option<String>,
|
|
106
114
|
glob: Option<String>,
|
|
107
115
|
strict: Option<bool>,
|
|
116
|
+
#[serde(rename = "on-file")]
|
|
117
|
+
on_file: Option<String>,
|
|
108
118
|
}
|
|
109
119
|
|
|
110
120
|
/// Load and parse a `.dirsql.toml` config file from the given path.
|
|
@@ -149,10 +159,20 @@ pub fn load_config_str(content: &str) -> Result<Config> {
|
|
|
149
159
|
let ddl = raw_table.ddl.ok_or(ConfigError::MissingField("ddl"))?;
|
|
150
160
|
let glob = raw_table.glob.ok_or(ConfigError::MissingField("glob"))?;
|
|
151
161
|
|
|
162
|
+
// A present-but-empty `on-file = ""` is as unusable as a missing key:
|
|
163
|
+
// reject it at parse time rather than spawning an empty command later.
|
|
164
|
+
let on_file = match raw_table.on_file {
|
|
165
|
+
Some(cmd) if cmd.trim().is_empty() => {
|
|
166
|
+
return Err(ConfigError::EmptyField("on-file"));
|
|
167
|
+
}
|
|
168
|
+
other => other,
|
|
169
|
+
};
|
|
170
|
+
|
|
152
171
|
tables.push(TableConfig {
|
|
153
172
|
ddl,
|
|
154
173
|
glob,
|
|
155
174
|
strict: raw_table.strict,
|
|
175
|
+
on_file,
|
|
156
176
|
});
|
|
157
177
|
}
|
|
158
178
|
|
|
@@ -444,6 +464,48 @@ path = "b.so"
|
|
|
444
464
|
assert_eq!(config.extensions[1].path, PathBuf::from("b.so"));
|
|
445
465
|
}
|
|
446
466
|
|
|
467
|
+
#[test]
|
|
468
|
+
fn on_file_parses_when_present() {
|
|
469
|
+
let toml = r#"
|
|
470
|
+
[[table]]
|
|
471
|
+
ddl = "CREATE TABLE papers (paper_id TEXT, title TEXT)"
|
|
472
|
+
glob = "**/meta.json"
|
|
473
|
+
on-file = "uv run python extract_papers.py {path}"
|
|
474
|
+
"#;
|
|
475
|
+
let config = load_config_str(toml).unwrap();
|
|
476
|
+
assert_eq!(config.tables.len(), 1);
|
|
477
|
+
assert_eq!(
|
|
478
|
+
config.tables[0].on_file.as_deref(),
|
|
479
|
+
Some("uv run python extract_papers.py {path}")
|
|
480
|
+
);
|
|
481
|
+
}
|
|
482
|
+
|
|
483
|
+
#[test]
|
|
484
|
+
fn on_file_absent_is_none() {
|
|
485
|
+
let toml = r#"
|
|
486
|
+
[[table]]
|
|
487
|
+
ddl = "CREATE TABLE t (_path TEXT)"
|
|
488
|
+
glob = "*.json"
|
|
489
|
+
"#;
|
|
490
|
+
let config = load_config_str(toml).unwrap();
|
|
491
|
+
assert!(config.tables[0].on_file.is_none());
|
|
492
|
+
}
|
|
493
|
+
|
|
494
|
+
#[test]
|
|
495
|
+
fn on_file_empty_errors() {
|
|
496
|
+
let toml = r#"
|
|
497
|
+
[[table]]
|
|
498
|
+
ddl = "CREATE TABLE t (_path TEXT)"
|
|
499
|
+
glob = "*.json"
|
|
500
|
+
on-file = " "
|
|
501
|
+
"#;
|
|
502
|
+
let err = load_config_str(toml).unwrap_err();
|
|
503
|
+
assert!(
|
|
504
|
+
matches!(err, ConfigError::EmptyField("on-file")),
|
|
505
|
+
"got: {err:?}"
|
|
506
|
+
);
|
|
507
|
+
}
|
|
508
|
+
|
|
447
509
|
#[test]
|
|
448
510
|
fn extension_empty_path_errors() {
|
|
449
511
|
// An empty `path = ""` is as unusable as a missing key — it must be
|
|
@@ -27,6 +27,7 @@ pub mod watcher;
|
|
|
27
27
|
#[cfg(feature = "cli")]
|
|
28
28
|
pub mod cli;
|
|
29
29
|
|
|
30
|
+
use crate::command::Placeholder;
|
|
30
31
|
use crate::db::{Db, parse_table_name};
|
|
31
32
|
use crate::matcher::TableMatcher;
|
|
32
33
|
use crate::persist::{
|
|
@@ -1038,18 +1039,20 @@ impl DirSQLBuilder {
|
|
|
1038
1039
|
.parent()
|
|
1039
1040
|
.map(PathBuf::from)
|
|
1040
1041
|
.unwrap_or_else(|| PathBuf::from("."));
|
|
1041
|
-
if let Some(cfg_root) = cfg.root.clone() {
|
|
1042
|
-
|
|
1042
|
+
let resolved_root = if let Some(cfg_root) = cfg.root.clone() {
|
|
1043
|
+
if cfg_root.is_absolute() {
|
|
1043
1044
|
cfg_root
|
|
1044
1045
|
} else {
|
|
1045
1046
|
cfg_parent.join(cfg_root)
|
|
1046
|
-
}
|
|
1047
|
-
config_root = Some(resolved);
|
|
1047
|
+
}
|
|
1048
1048
|
} else {
|
|
1049
|
-
|
|
1050
|
-
}
|
|
1049
|
+
cfg_parent.clone()
|
|
1050
|
+
};
|
|
1051
|
+
config_root = Some(resolved_root.clone());
|
|
1051
1052
|
|
|
1052
|
-
|
|
1053
|
+
// `on-file` commands run in the config file's directory and compute
|
|
1054
|
+
// `{path}` relative to the resolved index root.
|
|
1055
|
+
let cfg_tables = build_tables_from_config(&cfg, &cfg_parent, &resolved_root)?;
|
|
1053
1056
|
tables.extend(cfg_tables);
|
|
1054
1057
|
ignore.extend(cfg.ignore);
|
|
1055
1058
|
|
|
@@ -1433,25 +1436,53 @@ fn relative_path(root: &Path, path: &Path) -> String {
|
|
|
1433
1436
|
.to_string()
|
|
1434
1437
|
}
|
|
1435
1438
|
|
|
1439
|
+
/// Fixed timeout for an `on-file` command. There is no per-table timeout key
|
|
1440
|
+
/// yet (#327); this module constant is the documented current default.
|
|
1441
|
+
const ON_FILE_TIMEOUT: Duration = Duration::from_secs(30);
|
|
1442
|
+
|
|
1436
1443
|
/// Build [`Table`] objects from a parsed config.
|
|
1437
1444
|
///
|
|
1438
|
-
///
|
|
1445
|
+
/// A plain config-defined table produces one row per matched file built
|
|
1439
1446
|
/// entirely from filesystem facts: glob path captures and stat virtuals
|
|
1440
1447
|
/// (`_path`, `_basename`, `_dir`, `_ext`, `_size`, `_mtime`, `_ctime`) are
|
|
1441
|
-
/// injected by the core pipeline ([`merge_filesystem_facts`]).
|
|
1442
|
-
///
|
|
1443
|
-
///
|
|
1444
|
-
///
|
|
1445
|
-
///
|
|
1446
|
-
|
|
1448
|
+
/// injected by the core pipeline ([`merge_filesystem_facts`]). Its synthesized
|
|
1449
|
+
/// extract emits a single empty row per file; the fact-injection layer fills it
|
|
1450
|
+
/// in.
|
|
1451
|
+
///
|
|
1452
|
+
/// A table with an `on-file` command instead runs that command once per matched
|
|
1453
|
+
/// file (see [`run_on_file`]): the command reads the file and prints a JSON
|
|
1454
|
+
/// array of row objects on stdout, which becomes the file's rows (filesystem
|
|
1455
|
+
/// facts are still merged on top, user values winning). `config_dir` is the
|
|
1456
|
+
/// command's working directory (the config file's parent) and `root` is the
|
|
1457
|
+
/// resolved index root used to compute the `{path}` placeholder.
|
|
1458
|
+
fn build_tables_from_config(
|
|
1459
|
+
cfg: &config::Config,
|
|
1460
|
+
config_dir: &Path,
|
|
1461
|
+
root: &Path,
|
|
1462
|
+
) -> Result<Vec<Table>> {
|
|
1447
1463
|
let mut tables = Vec::with_capacity(cfg.tables.len());
|
|
1448
1464
|
|
|
1449
1465
|
for table_cfg in &cfg.tables {
|
|
1450
|
-
let mut table =
|
|
1451
|
-
|
|
1452
|
-
|
|
1453
|
-
|
|
1454
|
-
|
|
1466
|
+
let mut table = match &table_cfg.on_file {
|
|
1467
|
+
Some(command) => {
|
|
1468
|
+
let command = command.clone();
|
|
1469
|
+
let config_dir = config_dir.to_path_buf();
|
|
1470
|
+
let root = root.to_path_buf();
|
|
1471
|
+
// `Table::new` (infallible): `run_on_file` isolates its own
|
|
1472
|
+
// errors to an empty row set so one bad file never aborts the
|
|
1473
|
+
// scan (the scan aborts on an extract `Err`).
|
|
1474
|
+
Table::new(
|
|
1475
|
+
table_cfg.ddl.clone(),
|
|
1476
|
+
table_cfg.glob.clone(),
|
|
1477
|
+
move |abs_path: &str| run_on_file(&command, abs_path, &config_dir, &root),
|
|
1478
|
+
)
|
|
1479
|
+
}
|
|
1480
|
+
None => Table::new(
|
|
1481
|
+
table_cfg.ddl.clone(),
|
|
1482
|
+
table_cfg.glob.clone(),
|
|
1483
|
+
|_path: &str| vec![Row::new()],
|
|
1484
|
+
),
|
|
1485
|
+
};
|
|
1455
1486
|
|
|
1456
1487
|
if table_cfg.strict == Some(true) {
|
|
1457
1488
|
table.strict = true;
|
|
@@ -1463,6 +1494,89 @@ fn build_tables_from_config(cfg: &config::Config) -> Result<Vec<Table>> {
|
|
|
1463
1494
|
Ok(tables)
|
|
1464
1495
|
}
|
|
1465
1496
|
|
|
1497
|
+
/// Run a table's `on-file` command for one matched file and parse its output
|
|
1498
|
+
/// into rows.
|
|
1499
|
+
///
|
|
1500
|
+
/// Placeholders: `{path}` (the file relative to `root`, append-if-absent so
|
|
1501
|
+
/// `cmd` and `cmd {path}` behave identically), `{abspath}` (the absolute path),
|
|
1502
|
+
/// and `{root}` (the index root). The relative path is computed with a single
|
|
1503
|
+
/// [`Path::strip_prefix`] (#251/#252), falling back to the absolute path when
|
|
1504
|
+
/// the file is not under `root`.
|
|
1505
|
+
///
|
|
1506
|
+
/// Per-file isolation: any failure — a spawn/exit/timeout error from
|
|
1507
|
+
/// [`command::run_command`], or output that is not a JSON array of objects —
|
|
1508
|
+
/// is logged to stderr and yields no rows (`vec![]`). Returning `Err` here
|
|
1509
|
+
/// would abort the whole scan, so it never does.
|
|
1510
|
+
fn run_on_file(command: &str, abs_path: &str, config_dir: &Path, root: &Path) -> Vec<Row> {
|
|
1511
|
+
let abs = Path::new(abs_path);
|
|
1512
|
+
let rel = abs
|
|
1513
|
+
.strip_prefix(root)
|
|
1514
|
+
.map(|p| p.to_string_lossy().into_owned())
|
|
1515
|
+
.unwrap_or_else(|_| abs_path.to_string());
|
|
1516
|
+
let placeholders = [
|
|
1517
|
+
Placeholder::append("path", rel),
|
|
1518
|
+
Placeholder::new("abspath", abs_path),
|
|
1519
|
+
Placeholder::new("root", root.to_string_lossy().into_owned()),
|
|
1520
|
+
];
|
|
1521
|
+
|
|
1522
|
+
match command::run_command(command, &placeholders, config_dir, ON_FILE_TIMEOUT, None) {
|
|
1523
|
+
Ok(output) => match parse_command_rows(&output.payload) {
|
|
1524
|
+
Ok(rows) => rows,
|
|
1525
|
+
Err(message) => {
|
|
1526
|
+
eprintln!(
|
|
1527
|
+
"dirsql: skipping `{abs_path}`: on-file output was not a JSON array of rows: {message}"
|
|
1528
|
+
);
|
|
1529
|
+
Vec::new()
|
|
1530
|
+
}
|
|
1531
|
+
},
|
|
1532
|
+
Err(error) => {
|
|
1533
|
+
eprintln!("dirsql: skipping `{abs_path}`: on-file command failed: {error}");
|
|
1534
|
+
Vec::new()
|
|
1535
|
+
}
|
|
1536
|
+
}
|
|
1537
|
+
}
|
|
1538
|
+
|
|
1539
|
+
/// Parse an `on-file` command's stdout payload — a JSON array of row objects —
|
|
1540
|
+
/// into [`Row`]s. Returns `Err(msg)` when the top-level JSON is not an array or
|
|
1541
|
+
/// any element is not an object. Pure (no IO), so it stays colocated-unit-
|
|
1542
|
+
/// testable; the effectful spawn lives in [`run_on_file`].
|
|
1543
|
+
fn parse_command_rows(payload: &str) -> std::result::Result<Vec<Row>, String> {
|
|
1544
|
+
let parsed: serde_json::Value =
|
|
1545
|
+
serde_json::from_str(payload).map_err(|e| format!("invalid JSON: {e}"))?;
|
|
1546
|
+
let array = parsed
|
|
1547
|
+
.as_array()
|
|
1548
|
+
.ok_or_else(|| "expected a JSON array of row objects".to_string())?;
|
|
1549
|
+
|
|
1550
|
+
let mut rows = Vec::with_capacity(array.len());
|
|
1551
|
+
for element in array {
|
|
1552
|
+
let object = element
|
|
1553
|
+
.as_object()
|
|
1554
|
+
.ok_or_else(|| "expected each array element to be a JSON object".to_string())?;
|
|
1555
|
+
let mut row = Row::with_capacity(object.len());
|
|
1556
|
+
for (key, value) in object {
|
|
1557
|
+
row.insert(key.clone(), json_to_value(value));
|
|
1558
|
+
}
|
|
1559
|
+
rows.push(row);
|
|
1560
|
+
}
|
|
1561
|
+
Ok(rows)
|
|
1562
|
+
}
|
|
1563
|
+
|
|
1564
|
+
/// Map a JSON value to a SQLite [`Value`]: `null` → `Null`; `bool` → `Integer`
|
|
1565
|
+
/// (0/1); an integral number → `Integer`, otherwise `Real`; `string` → `Text`;
|
|
1566
|
+
/// an array/object → its JSON text as `Text`. Pure.
|
|
1567
|
+
fn json_to_value(value: &serde_json::Value) -> Value {
|
|
1568
|
+
match value {
|
|
1569
|
+
serde_json::Value::Null => Value::Null,
|
|
1570
|
+
serde_json::Value::Bool(b) => Value::Integer(i64::from(*b)),
|
|
1571
|
+
serde_json::Value::Number(n) => match n.as_i64() {
|
|
1572
|
+
Some(i) => Value::Integer(i),
|
|
1573
|
+
None => Value::Real(n.as_f64().unwrap_or(f64::NAN)),
|
|
1574
|
+
},
|
|
1575
|
+
serde_json::Value::String(s) => Value::Text(s.clone()),
|
|
1576
|
+
other => Value::Text(other.to_string()),
|
|
1577
|
+
}
|
|
1578
|
+
}
|
|
1579
|
+
|
|
1466
1580
|
/// Reserved column names for filesystem-derived virtual columns. These are
|
|
1467
1581
|
/// always available on every row when declared in the table DDL; if not
|
|
1468
1582
|
/// declared, they are silently dropped during normalization.
|
|
@@ -2519,3 +2633,83 @@ mod internal_tests {
|
|
|
2519
2633
|
assert!(rx.try_recv().is_err(), "loop should have ended");
|
|
2520
2634
|
}
|
|
2521
2635
|
}
|
|
2636
|
+
|
|
2637
|
+
#[cfg(test)]
|
|
2638
|
+
mod command_rows_tests {
|
|
2639
|
+
use super::*;
|
|
2640
|
+
|
|
2641
|
+
#[test]
|
|
2642
|
+
fn parses_an_array_of_row_objects() {
|
|
2643
|
+
let rows = parse_command_rows(r#"[{"id":"a","n":1},{"id":"b","n":2}]"#).unwrap();
|
|
2644
|
+
assert_eq!(rows.len(), 2);
|
|
2645
|
+
assert_eq!(rows[0]["id"], Value::Text("a".into()));
|
|
2646
|
+
assert_eq!(rows[0]["n"], Value::Integer(1));
|
|
2647
|
+
assert_eq!(rows[1]["id"], Value::Text("b".into()));
|
|
2648
|
+
assert_eq!(rows[1]["n"], Value::Integer(2));
|
|
2649
|
+
}
|
|
2650
|
+
|
|
2651
|
+
#[test]
|
|
2652
|
+
fn parses_an_empty_array_to_no_rows() {
|
|
2653
|
+
assert_eq!(parse_command_rows("[]").unwrap(), Vec::<Row>::new());
|
|
2654
|
+
}
|
|
2655
|
+
|
|
2656
|
+
#[test]
|
|
2657
|
+
fn maps_every_json_value_type_including_nested_to_text_json() {
|
|
2658
|
+
let rows = parse_command_rows(
|
|
2659
|
+
r#"[{"nul":null,"t":true,"f":false,"i":42,"r":1.5,"s":"hi","arr":[1,2],"obj":{"k":"v"}}]"#,
|
|
2660
|
+
)
|
|
2661
|
+
.unwrap();
|
|
2662
|
+
let row = &rows[0];
|
|
2663
|
+
assert_eq!(row["nul"], Value::Null);
|
|
2664
|
+
assert_eq!(row["t"], Value::Integer(1));
|
|
2665
|
+
assert_eq!(row["f"], Value::Integer(0));
|
|
2666
|
+
assert_eq!(row["i"], Value::Integer(42));
|
|
2667
|
+
assert_eq!(row["r"], Value::Real(1.5));
|
|
2668
|
+
assert_eq!(row["s"], Value::Text("hi".into()));
|
|
2669
|
+
assert_eq!(row["arr"], Value::Text("[1,2]".into()));
|
|
2670
|
+
assert_eq!(row["obj"], Value::Text(r#"{"k":"v"}"#.into()));
|
|
2671
|
+
}
|
|
2672
|
+
|
|
2673
|
+
#[test]
|
|
2674
|
+
fn a_number_that_does_not_fit_i64_becomes_real() {
|
|
2675
|
+
// 10^19 exceeds i64::MAX (~9.2e18) but fits u64, so `as_i64` is None and
|
|
2676
|
+
// it falls through to `Real`.
|
|
2677
|
+
let rows = parse_command_rows(r#"[{"big":10000000000000000000}]"#).unwrap();
|
|
2678
|
+
assert!(matches!(rows[0]["big"], Value::Real(_)));
|
|
2679
|
+
}
|
|
2680
|
+
|
|
2681
|
+
#[test]
|
|
2682
|
+
fn a_non_array_payload_is_an_error() {
|
|
2683
|
+
let err = parse_command_rows(r#"{"id":"a"}"#).unwrap_err();
|
|
2684
|
+
assert!(err.contains("array"), "got: {err}");
|
|
2685
|
+
}
|
|
2686
|
+
|
|
2687
|
+
#[test]
|
|
2688
|
+
fn an_element_that_is_not_an_object_is_an_error() {
|
|
2689
|
+
let err = parse_command_rows(r#"[{"id":"a"}, 3]"#).unwrap_err();
|
|
2690
|
+
assert!(err.contains("object"), "got: {err}");
|
|
2691
|
+
}
|
|
2692
|
+
|
|
2693
|
+
#[test]
|
|
2694
|
+
fn invalid_json_is_an_error() {
|
|
2695
|
+
let err = parse_command_rows("not json at all").unwrap_err();
|
|
2696
|
+
assert!(err.contains("invalid JSON"), "got: {err}");
|
|
2697
|
+
}
|
|
2698
|
+
|
|
2699
|
+
#[test]
|
|
2700
|
+
fn json_to_value_maps_each_variant() {
|
|
2701
|
+
assert_eq!(json_to_value(&serde_json::Value::Null), Value::Null);
|
|
2702
|
+
assert_eq!(json_to_value(&serde_json::json!(true)), Value::Integer(1));
|
|
2703
|
+
assert_eq!(json_to_value(&serde_json::json!(false)), Value::Integer(0));
|
|
2704
|
+
assert_eq!(json_to_value(&serde_json::json!(7)), Value::Integer(7));
|
|
2705
|
+
assert_eq!(json_to_value(&serde_json::json!(2.5)), Value::Real(2.5));
|
|
2706
|
+
assert_eq!(
|
|
2707
|
+
json_to_value(&serde_json::json!("x")),
|
|
2708
|
+
Value::Text("x".into())
|
|
2709
|
+
);
|
|
2710
|
+
assert_eq!(
|
|
2711
|
+
json_to_value(&serde_json::json!([1, 2])),
|
|
2712
|
+
Value::Text("[1,2]".into())
|
|
2713
|
+
);
|
|
2714
|
+
}
|
|
2715
|
+
}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{dirsql-0.3.44 → dirsql-0.3.45}/packages/python/docs/tests/integration/language-flag.spec.ts
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|