dirsql 0.2.9__tar.gz → 0.3.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dirsql-0.2.9 → dirsql-0.3.1}/Cargo.lock +43 -43
- {dirsql-0.2.9 → dirsql-0.3.1}/PKG-INFO +1 -1
- {dirsql-0.2.9 → dirsql-0.3.1}/docs/.vitepress/config.ts +1 -0
- dirsql-0.3.1/docs/guide/config.md +193 -0
- dirsql-0.3.1/docs/guide/persistence.md +177 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/Cargo.toml +1 -1
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/docs/.vitepress/config.ts +1 -0
- dirsql-0.3.1/packages/python/docs/guide/config.md +193 -0
- dirsql-0.3.1/packages/python/docs/guide/persistence.md +177 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/src/lib.rs +10 -1
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/tests/integration/test_async_dirsql.py +12 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/tests/integration/test_binding.py +35 -1
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/tests/integration/test_docs_examples.py +14 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/tests/integration/test_docs_gaps.py +31 -152
- dirsql-0.3.1/packages/python/tests/integration/test_from_config.py +230 -0
- dirsql-0.3.1/packages/python/tests/integration/test_persist.py +300 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/rust/Cargo.toml +1 -2
- dirsql-0.3.1/packages/rust/docs/guide/config.md +193 -0
- dirsql-0.3.1/packages/rust/docs/guide/persistence.md +177 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/rust/src/config.rs +84 -179
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/rust/src/db.rs +15 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/rust/src/lib.rs +504 -123
- dirsql-0.3.1/packages/rust/src/persist.rs +603 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/rust/src/scanner.rs +33 -1
- dirsql-0.3.1/packages/rust/tests/docs_gaps.rs +151 -0
- dirsql-0.3.1/packages/rust/tests/from_config.rs +228 -0
- dirsql-0.3.1/packages/rust/tests/persist.rs +393 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/rust/tests/sdk.rs +10 -5
- {dirsql-0.2.9 → dirsql-0.3.1}/python/dirsql/_async.py +17 -1
- {dirsql-0.2.9 → dirsql-0.3.1}/python/dirsql/test_async.py +12 -1
- dirsql-0.2.9/docs/guide/config.md +0 -205
- dirsql-0.2.9/packages/python/docs/guide/config.md +0 -205
- dirsql-0.2.9/packages/python/tests/integration/test_from_config.py +0 -361
- dirsql-0.2.9/packages/rust/docs/guide/config.md +0 -205
- dirsql-0.2.9/packages/rust/src/parser.rs +0 -1507
- dirsql-0.2.9/packages/rust/tests/docs_gaps.rs +0 -327
- dirsql-0.2.9/packages/rust/tests/from_config.rs +0 -256
- {dirsql-0.2.9 → dirsql-0.3.1}/Cargo.toml +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/README.md +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/docs/.claude/CLAUDE.md +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/docs/.vitepress/theme/index.ts +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/docs/.vitepress/theme/lang.ts +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/docs/AGENTS.md +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/docs/api/index.md +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/docs/getting-started.md +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/docs/guide/async.md +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/docs/guide/cli.md +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/docs/guide/crdt.md +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/docs/guide/querying.md +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/docs/guide/tables.md +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/docs/guide/watching.md +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/docs/index.md +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/docs/migrations.md +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/docs/package.json +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/docs/playwright.config.ts +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/docs/pnpm-lock.yaml +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/docs/pnpm-workspace.yaml +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/docs/tests/integration/home.spec.ts +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/docs/tests/integration/language-flag.spec.ts +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/docs/tests/unit/config.test.ts +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/docs/tests/unit/lang.test.ts +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/docs/vitest.config.ts +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/README.md +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/docs/.claude/CLAUDE.md +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/docs/.vitepress/theme/index.ts +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/docs/.vitepress/theme/lang.ts +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/docs/AGENTS.md +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/docs/api/index.md +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/docs/getting-started.md +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/docs/guide/async.md +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/docs/guide/cli.md +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/docs/guide/crdt.md +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/docs/guide/querying.md +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/docs/guide/tables.md +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/docs/guide/watching.md +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/docs/index.md +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/docs/migrations.md +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/docs/package.json +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/docs/playwright.config.ts +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/docs/pnpm-lock.yaml +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/docs/pnpm-workspace.yaml +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/docs/tests/integration/home.spec.ts +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/docs/tests/integration/language-flag.spec.ts +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/docs/tests/unit/config.test.ts +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/docs/tests/unit/lang.test.ts +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/docs/vitest.config.ts +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/python/conftest.py +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/tests/__init__.py +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/tests/conftest.py +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/tests/integration/__init__.py +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/tests/integration/test_dirsql.py +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/rust/README.md +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/rust/benches/db_bench.rs +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/rust/benches/differ_bench.rs +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/rust/benches/matcher_bench.rs +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/rust/benches/scanner_bench.rs +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/rust/docs/api/index.md +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/rust/docs/getting-started.md +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/rust/docs/guide/async.md +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/rust/docs/guide/cli.md +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/rust/docs/guide/crdt.md +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/rust/docs/guide/querying.md +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/rust/docs/guide/tables.md +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/rust/docs/guide/watching.md +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/rust/docs/index.md +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/rust/docs/migrations.md +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/rust/src/bin/dirsql.rs +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/rust/src/cli/mod.rs +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/rust/src/cli/router.rs +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/rust/src/cli/serialize.rs +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/rust/src/cli/server.rs +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/rust/src/differ.rs +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/rust/src/matcher.rs +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/rust/src/watcher.rs +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/rust/tests/async_sdk.rs +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/rust/tests/cli_e2e.rs +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/rust/tests/cli_integration.rs +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/rust/tests/docs_examples.rs +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/packages/rust/tests/readonly_query.rs +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/pyproject.toml +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/python/dirsql/__init__.py +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/python/dirsql/_cli/__init__.py +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/python/dirsql/_cli/binary_path.py +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/python/dirsql/_cli/binary_path_test.py +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/python/dirsql/_cli/is_windows.py +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/python/dirsql/_cli/is_windows_test.py +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/python/dirsql/_cli/main.py +0 -0
- {dirsql-0.2.9 → dirsql-0.3.1}/python/dirsql/_cli/main_test.py +0 -0
|
@@ -73,6 +73,18 @@ version = "1.0.102"
|
|
|
73
73
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
74
74
|
checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c"
|
|
75
75
|
|
|
76
|
+
[[package]]
|
|
77
|
+
name = "arrayref"
|
|
78
|
+
version = "0.3.9"
|
|
79
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
80
|
+
checksum = "76a2e8124351fda1ef8aaaa3bbd7ebbcb486bbcd4225aca0aa0d84bb2db8fecb"
|
|
81
|
+
|
|
82
|
+
[[package]]
|
|
83
|
+
name = "arrayvec"
|
|
84
|
+
version = "0.7.6"
|
|
85
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
86
|
+
checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
|
|
87
|
+
|
|
76
88
|
[[package]]
|
|
77
89
|
name = "assert_cmd"
|
|
78
90
|
version = "2.2.1"
|
|
@@ -176,6 +188,20 @@ version = "2.11.1"
|
|
|
176
188
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
177
189
|
checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3"
|
|
178
190
|
|
|
191
|
+
[[package]]
|
|
192
|
+
name = "blake3"
|
|
193
|
+
version = "1.8.4"
|
|
194
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
195
|
+
checksum = "4d2d5991425dfd0785aed03aedcf0b321d61975c9b5b3689c774a2610ae0b51e"
|
|
196
|
+
dependencies = [
|
|
197
|
+
"arrayref",
|
|
198
|
+
"arrayvec",
|
|
199
|
+
"cc",
|
|
200
|
+
"cfg-if",
|
|
201
|
+
"constant_time_eq",
|
|
202
|
+
"cpufeatures",
|
|
203
|
+
]
|
|
204
|
+
|
|
179
205
|
[[package]]
|
|
180
206
|
name = "bstr"
|
|
181
207
|
version = "1.12.1"
|
|
@@ -300,6 +326,12 @@ version = "1.0.5"
|
|
|
300
326
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
301
327
|
checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570"
|
|
302
328
|
|
|
329
|
+
[[package]]
|
|
330
|
+
name = "constant_time_eq"
|
|
331
|
+
version = "0.4.2"
|
|
332
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
333
|
+
checksum = "3d52eff69cd5e647efe296129160853a42795992097e8af39800e1060caeea9b"
|
|
334
|
+
|
|
303
335
|
[[package]]
|
|
304
336
|
name = "convert_case"
|
|
305
337
|
version = "0.11.0"
|
|
@@ -325,6 +357,15 @@ version = "0.8.7"
|
|
|
325
357
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
326
358
|
checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
|
|
327
359
|
|
|
360
|
+
[[package]]
|
|
361
|
+
name = "cpufeatures"
|
|
362
|
+
version = "0.3.0"
|
|
363
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
364
|
+
checksum = "8b2a41393f66f16b0823bb79094d54ac5fbd34ab292ddafb9a0456ac9f87d201"
|
|
365
|
+
dependencies = [
|
|
366
|
+
"libc",
|
|
367
|
+
]
|
|
368
|
+
|
|
328
369
|
[[package]]
|
|
329
370
|
name = "criterion"
|
|
330
371
|
version = "0.5.1"
|
|
@@ -392,27 +433,6 @@ version = "0.2.4"
|
|
|
392
433
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
393
434
|
checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5"
|
|
394
435
|
|
|
395
|
-
[[package]]
|
|
396
|
-
name = "csv"
|
|
397
|
-
version = "1.4.0"
|
|
398
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
399
|
-
checksum = "52cd9d68cf7efc6ddfaaee42e7288d3a99d613d4b50f76ce9827ae0c6e14f938"
|
|
400
|
-
dependencies = [
|
|
401
|
-
"csv-core",
|
|
402
|
-
"itoa",
|
|
403
|
-
"ryu",
|
|
404
|
-
"serde_core",
|
|
405
|
-
]
|
|
406
|
-
|
|
407
|
-
[[package]]
|
|
408
|
-
name = "csv-core"
|
|
409
|
-
version = "0.1.13"
|
|
410
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
411
|
-
checksum = "704a3c26996a80471189265814dbc2c257598b96b8a7feae2d31ace646bb9782"
|
|
412
|
-
dependencies = [
|
|
413
|
-
"memchr",
|
|
414
|
-
]
|
|
415
|
-
|
|
416
436
|
[[package]]
|
|
417
437
|
name = "ctor"
|
|
418
438
|
version = "0.10.0"
|
|
@@ -441,9 +461,9 @@ version = "0.2.7"
|
|
|
441
461
|
dependencies = [
|
|
442
462
|
"assert_cmd",
|
|
443
463
|
"axum",
|
|
464
|
+
"blake3",
|
|
444
465
|
"clap",
|
|
445
466
|
"criterion",
|
|
446
|
-
"csv",
|
|
447
467
|
"eventsource-client",
|
|
448
468
|
"futures",
|
|
449
469
|
"futures-channel",
|
|
@@ -458,7 +478,6 @@ dependencies = [
|
|
|
458
478
|
"rusqlite",
|
|
459
479
|
"serde",
|
|
460
480
|
"serde_json",
|
|
461
|
-
"serde_yaml",
|
|
462
481
|
"tempfile",
|
|
463
482
|
"thiserror",
|
|
464
483
|
"tokio",
|
|
@@ -480,7 +499,7 @@ dependencies = [
|
|
|
480
499
|
|
|
481
500
|
[[package]]
|
|
482
501
|
name = "dirsql-py-ext"
|
|
483
|
-
version = "0.
|
|
502
|
+
version = "0.3.1"
|
|
484
503
|
dependencies = [
|
|
485
504
|
"dirsql",
|
|
486
505
|
"pyo3",
|
|
@@ -2188,19 +2207,6 @@ dependencies = [
|
|
|
2188
2207
|
"serde",
|
|
2189
2208
|
]
|
|
2190
2209
|
|
|
2191
|
-
[[package]]
|
|
2192
|
-
name = "serde_yaml"
|
|
2193
|
-
version = "0.9.34+deprecated"
|
|
2194
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
2195
|
-
checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47"
|
|
2196
|
-
dependencies = [
|
|
2197
|
-
"indexmap",
|
|
2198
|
-
"itoa",
|
|
2199
|
-
"ryu",
|
|
2200
|
-
"serde",
|
|
2201
|
-
"unsafe-libyaml",
|
|
2202
|
-
]
|
|
2203
|
-
|
|
2204
2210
|
[[package]]
|
|
2205
2211
|
name = "shlex"
|
|
2206
2212
|
version = "1.3.0"
|
|
@@ -2591,12 +2597,6 @@ version = "0.2.6"
|
|
|
2591
2597
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
2592
2598
|
checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853"
|
|
2593
2599
|
|
|
2594
|
-
[[package]]
|
|
2595
|
-
name = "unsafe-libyaml"
|
|
2596
|
-
version = "0.2.11"
|
|
2597
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
2598
|
-
checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861"
|
|
2599
|
-
|
|
2600
2600
|
[[package]]
|
|
2601
2601
|
name = "untrusted"
|
|
2602
2602
|
version = "0.9.0"
|
|
@@ -32,6 +32,7 @@ export default defineConfig({
|
|
|
32
32
|
{ text: 'Defining Tables', link: '/guide/tables' },
|
|
33
33
|
{ text: 'Querying', link: '/guide/querying' },
|
|
34
34
|
{ text: 'File Watching', link: '/guide/watching' },
|
|
35
|
+
{ text: 'Persistence', link: '/guide/persistence' },
|
|
35
36
|
{ text: 'Async API', link: '/guide/async' },
|
|
36
37
|
{ text: 'Command-Line Interface', link: '/guide/cli' },
|
|
37
38
|
{ text: 'Collaboration with CRDTs', link: '/guide/crdt' }
|
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
---
|
|
2
|
+
canonical: https://thekevinscott.github.io/dirsql/guide/config
|
|
3
|
+
---
|
|
4
|
+
|
|
5
|
+
# Configuration File
|
|
6
|
+
|
|
7
|
+
> Online: <https://thekevinscott.github.io/dirsql/guide/config>
|
|
8
|
+
|
|
9
|
+
`dirsql` can be configured with a `.dirsql.toml` file. Tables defined this
|
|
10
|
+
way produce **one row per matched file**. Each row's columns come from
|
|
11
|
+
filesystem facts:
|
|
12
|
+
|
|
13
|
+
- **Glob path captures** — named `{placeholder}` segments in the glob.
|
|
14
|
+
- **Stat virtuals** — reserved `_`-prefixed columns for path-derived and
|
|
15
|
+
stat-derived metadata.
|
|
16
|
+
|
|
17
|
+
Content interpretation (parsing JSON, CSV, frontmatter, etc.) is **not**
|
|
18
|
+
configured in `.dirsql.toml`. If you need columns derived from file
|
|
19
|
+
contents, register a programmatic [`Table`](./tables.md) whose `extract`
|
|
20
|
+
function does the parsing in your host language.
|
|
21
|
+
|
|
22
|
+
## Basic Example
|
|
23
|
+
|
|
24
|
+
```toml
|
|
25
|
+
[dirsql]
|
|
26
|
+
ignore = ["node_modules/**", ".git/**"]
|
|
27
|
+
|
|
28
|
+
[[table]]
|
|
29
|
+
ddl = "CREATE TABLE posts (_path TEXT, _basename TEXT, _size INTEGER, _mtime INTEGER)"
|
|
30
|
+
glob = "posts/*.md"
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
Each `posts/*.md` file produces one row. The DDL declares which stat
|
|
34
|
+
virtuals are surfaced as SQL columns.
|
|
35
|
+
|
|
36
|
+
## Loading a Config File
|
|
37
|
+
|
|
38
|
+
Pass the config file path to the `DirSQL` constructor:
|
|
39
|
+
|
|
40
|
+
::: code-group
|
|
41
|
+
|
|
42
|
+
```python [Python]
|
|
43
|
+
from dirsql import DirSQL
|
|
44
|
+
|
|
45
|
+
db = DirSQL(config="./my-project/.dirsql.toml")
|
|
46
|
+
await db.ready()
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
```rust [Rust]
|
|
50
|
+
use dirsql::DirSQL;
|
|
51
|
+
|
|
52
|
+
let db = DirSQL::builder()
|
|
53
|
+
.config("./my-project/.dirsql.toml")
|
|
54
|
+
.build()?;
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
```typescript [TypeScript]
|
|
58
|
+
import { DirSQL } from "dirsql";
|
|
59
|
+
|
|
60
|
+
// String argument is interpreted as a config file path.
|
|
61
|
+
const db = new DirSQL("./my-project/.dirsql.toml");
|
|
62
|
+
await db.ready;
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
:::
|
|
66
|
+
|
|
67
|
+
By default, the root directory scanned is the config file's parent
|
|
68
|
+
directory. Override it by passing `root` explicitly (the explicit value
|
|
69
|
+
wins and a warning is emitted) or by declaring `[dirsql].root` in the
|
|
70
|
+
config file itself.
|
|
71
|
+
|
|
72
|
+
## Root Directory
|
|
73
|
+
|
|
74
|
+
By default, the config file's parent directory is the scan root. To index
|
|
75
|
+
a different location, declare `[dirsql].root` (relative paths are resolved
|
|
76
|
+
relative to the config file's parent):
|
|
77
|
+
|
|
78
|
+
```toml
|
|
79
|
+
[dirsql]
|
|
80
|
+
root = "../data"
|
|
81
|
+
ignore = ["node_modules/**"]
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
## Stat Virtuals
|
|
85
|
+
|
|
86
|
+
Every config-defined table can expose any of these reserved columns. Add
|
|
87
|
+
the ones you want to your DDL; the rest are silently dropped.
|
|
88
|
+
|
|
89
|
+
| Column | Type | Source |
|
|
90
|
+
|--------|---------|--------|
|
|
91
|
+
| `_path` | TEXT | The file's path relative to the scan root. |
|
|
92
|
+
| `_basename` | TEXT | The filename including extension. |
|
|
93
|
+
| `_dir` | TEXT | The parent directory path (relative to root). |
|
|
94
|
+
| `_ext` | TEXT | The file extension, lowercased, no leading dot. |
|
|
95
|
+
| `_size` | INTEGER | Size in bytes. |
|
|
96
|
+
| `_mtime` | INTEGER | Last-modified time, unix seconds. |
|
|
97
|
+
| `_ctime` | INTEGER | Created/changed time, unix seconds. |
|
|
98
|
+
|
|
99
|
+
Example query:
|
|
100
|
+
|
|
101
|
+
```sql
|
|
102
|
+
SELECT _basename, _size
|
|
103
|
+
FROM posts
|
|
104
|
+
WHERE _mtime > strftime('%s', '2024-01-01')
|
|
105
|
+
ORDER BY _mtime DESC;
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
## Path Captures
|
|
109
|
+
|
|
110
|
+
Use `{name}` in glob patterns to extract path segments as columns. Add a
|
|
111
|
+
matching column name to the DDL and the capture is auto-populated:
|
|
112
|
+
|
|
113
|
+
```toml
|
|
114
|
+
[[table]]
|
|
115
|
+
ddl = "CREATE TABLE comments (thread_id TEXT, _basename TEXT, _mtime INTEGER)"
|
|
116
|
+
glob = "_comments/{thread_id}/*.jsonl"
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
A file at `_comments/abc123/2024-05-05.jsonl` produces a row with
|
|
120
|
+
`thread_id = "abc123"`, `_basename = "2024-05-05.jsonl"`, and `_mtime` set
|
|
121
|
+
to the file's modification time.
|
|
122
|
+
|
|
123
|
+
## Ignore Patterns
|
|
124
|
+
|
|
125
|
+
The `ignore` list skips files and directories entirely (not even scanned):
|
|
126
|
+
|
|
127
|
+
```toml
|
|
128
|
+
[dirsql]
|
|
129
|
+
ignore = ["node_modules/**", ".git/**", "*.pyc", "__pycache__/**"]
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
The top-level `.dirsql/` directory is always excluded, whether you list it
|
|
133
|
+
or not — it is a reserved namespace for `dirsql`'s own metadata (see
|
|
134
|
+
[Persistence](./persistence.md)).
|
|
135
|
+
|
|
136
|
+
## Persistence
|
|
137
|
+
|
|
138
|
+
Set `persist = true` to keep the SQLite database on disk between runs
|
|
139
|
+
instead of rebuilding from scratch on every startup:
|
|
140
|
+
|
|
141
|
+
```toml
|
|
142
|
+
[dirsql]
|
|
143
|
+
persist = true
|
|
144
|
+
# persist_path = ".dirsql/cache.db" # optional; this is the default
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
See [Persistence](./persistence.md) for the full reconcile algorithm,
|
|
148
|
+
storage layout, and limitations.
|
|
149
|
+
|
|
150
|
+
## Strict Mode
|
|
151
|
+
|
|
152
|
+
By default, auto-injected virtuals that aren't in the DDL are silently
|
|
153
|
+
dropped, and undeclared user-extract keys are dropped. Enable strict mode
|
|
154
|
+
to error when an extract emits keys not declared in the DDL:
|
|
155
|
+
|
|
156
|
+
```toml
|
|
157
|
+
[[table]]
|
|
158
|
+
ddl = "CREATE TABLE comments (thread_id TEXT)"
|
|
159
|
+
glob = "_comments/{thread_id}/*.jsonl"
|
|
160
|
+
strict = true
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
Strict mode does **not** apply to auto-injected stat virtuals — those are
|
|
164
|
+
always filtered to the DDL's declared columns regardless. Strict mode
|
|
165
|
+
applies only to keys produced by an extract callback (relevant for
|
|
166
|
+
programmatic [tables](./tables.md)).
|
|
167
|
+
|
|
168
|
+
## Full Example
|
|
169
|
+
|
|
170
|
+
```toml
|
|
171
|
+
[dirsql]
|
|
172
|
+
ignore = ["node_modules/**", ".git/**", "dist/**"]
|
|
173
|
+
|
|
174
|
+
[[table]]
|
|
175
|
+
ddl = "CREATE TABLE comments (thread_id TEXT, _basename TEXT, _mtime INTEGER)"
|
|
176
|
+
glob = "_comments/{thread_id}/*.jsonl"
|
|
177
|
+
|
|
178
|
+
[[table]]
|
|
179
|
+
ddl = "CREATE TABLE documents (_path TEXT, _basename TEXT, _size INTEGER)"
|
|
180
|
+
glob = "**/index.md"
|
|
181
|
+
|
|
182
|
+
[[table]]
|
|
183
|
+
ddl = "CREATE TABLE logs (_path TEXT, _size INTEGER, _mtime INTEGER)"
|
|
184
|
+
glob = "logs/*.csv"
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
## When you need parsed content
|
|
188
|
+
|
|
189
|
+
`.dirsql.toml` does not parse file contents. For columns derived from the
|
|
190
|
+
*inside* of files (frontmatter keys, JSON values, CSV cells, etc.),
|
|
191
|
+
register a programmatic [`Table`](./tables.md) instead, and parse the
|
|
192
|
+
bytes in your host language. Glob captures and stat virtuals are still
|
|
193
|
+
auto-injected into rows produced by your extract.
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
# Persistence
|
|
2
|
+
|
|
3
|
+
By default `dirsql` keeps its SQLite database in memory and rebuilds it from scratch every time the process starts. For large directories this can take seconds to minutes -- nearly all of which is spent re-parsing files that haven't changed since the previous run.
|
|
4
|
+
|
|
5
|
+
Persistence stores the SQLite database on disk so that subsequent startups only re-parse the files that have actually changed.
|
|
6
|
+
|
|
7
|
+
::: tip Same answers, faster startup
|
|
8
|
+
The rows returned by `query()` after a persistent startup are equivalent to those produced by a from-scratch rebuild. Persistence is a startup-time optimization, not a correctness compromise. The reconcile algorithm is the same one `git status` uses to decide which files have changed since the last index write.
|
|
9
|
+
:::
|
|
10
|
+
|
|
11
|
+
## Quick start
|
|
12
|
+
|
|
13
|
+
::: code-group
|
|
14
|
+
|
|
15
|
+
```toml [.dirsql.toml]
|
|
16
|
+
[dirsql]
|
|
17
|
+
persist = true
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
```python [Python]
|
|
21
|
+
from dirsql import DirSQL
|
|
22
|
+
|
|
23
|
+
db = DirSQL("./my-project", tables=[...], persist=True)
|
|
24
|
+
await db.ready()
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
```rust [Rust]
|
|
28
|
+
use dirsql::DirSQL;
|
|
29
|
+
|
|
30
|
+
let db = DirSQL::builder()
|
|
31
|
+
.root("./my-project")
|
|
32
|
+
.tables(vec![/* ... */])
|
|
33
|
+
.persist(true)
|
|
34
|
+
.build()?;
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
```typescript [TypeScript]
|
|
38
|
+
import { DirSQL } from "dirsql";
|
|
39
|
+
|
|
40
|
+
const db = new DirSQL({ root: "./my-project", tables: [/* ... */], persist: true });
|
|
41
|
+
await db.ready;
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
:::
|
|
45
|
+
|
|
46
|
+
That's it. The first run writes the database to `./my-project/.dirsql/cache.db`. Every subsequent startup uses the cache.
|
|
47
|
+
|
|
48
|
+
## Configuration
|
|
49
|
+
|
|
50
|
+
| Option | Type | Default | Meaning |
|
|
51
|
+
|---|---|---|---|
|
|
52
|
+
| `persist` | boolean | `false` | Enable persistent on-disk storage. |
|
|
53
|
+
| `persist_path` (Python, Rust) / `persistPath` (TypeScript) | string | `<root>/.dirsql/cache.db` | Override the database file path. Ignored when `persist` is `false`. |
|
|
54
|
+
|
|
55
|
+
The default location keeps the cache alongside the data it indexes, which means it follows the project around (clone, copy, move) without extra setup. Override `persist_path` if you want the cache somewhere else -- a CI cache directory, a tmpfs mount, an XDG cache dir, etc.
|
|
56
|
+
|
|
57
|
+
::: code-group
|
|
58
|
+
|
|
59
|
+
```toml [.dirsql.toml]
|
|
60
|
+
[dirsql]
|
|
61
|
+
persist = true
|
|
62
|
+
persist_path = "/var/cache/dirsql/myproject.db"
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
```python [Python]
|
|
66
|
+
db = DirSQL(
|
|
67
|
+
"./my-project",
|
|
68
|
+
tables=[...],
|
|
69
|
+
persist=True,
|
|
70
|
+
persist_path="/var/cache/dirsql/myproject.db",
|
|
71
|
+
)
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
```rust [Rust]
|
|
75
|
+
let db = DirSQL::builder()
|
|
76
|
+
.root("./my-project")
|
|
77
|
+
.tables(vec![/* ... */])
|
|
78
|
+
.persist(true)
|
|
79
|
+
.persist_path("/var/cache/dirsql/myproject.db")
|
|
80
|
+
.build()?;
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
```typescript [TypeScript]
|
|
84
|
+
const db = new DirSQL({
|
|
85
|
+
root: "./my-project",
|
|
86
|
+
tables: [/* ... */],
|
|
87
|
+
persist: true,
|
|
88
|
+
persistPath: "/var/cache/dirsql/myproject.db",
|
|
89
|
+
});
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
:::
|
|
93
|
+
|
|
94
|
+
## The `.dirsql/` directory
|
|
95
|
+
|
|
96
|
+
`dirsql` reserves the top-level `.dirsql/` directory inside every scanned root. It is **unconditionally excluded from the directory walk**, whether persistence is enabled or not. This means:
|
|
97
|
+
|
|
98
|
+
- The default cache path `<root>/.dirsql/cache.db` cannot accidentally be ingested as a data file.
|
|
99
|
+
- You can place additional `dirsql`-related files in `.dirsql/` (e.g. a project-local config snapshot) without them being parsed.
|
|
100
|
+
- You should not put your own data files in `.dirsql/` -- they will be silently ignored.
|
|
101
|
+
|
|
102
|
+
If you persist into `.dirsql/`, add it to your `.gitignore`:
|
|
103
|
+
|
|
104
|
+
```
|
|
105
|
+
.dirsql/
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
The cache file should never be committed -- it is reproducible from the source tree and frequently large.
|
|
109
|
+
|
|
110
|
+
## How the startup reconcile works
|
|
111
|
+
|
|
112
|
+
When a persistent cache exists, `dirsql` does not blindly trust it. On startup it:
|
|
113
|
+
|
|
114
|
+
1. **Checks compatibility metadata.** If the cached `dirsql` version, schema version, glob configuration, parser versions, or canonical root path differs from the current build, the cache is wiped and rebuilt from scratch.
|
|
115
|
+
2. **Walks the tree and stats every matching file.** This is metadata-only -- no file contents are read.
|
|
116
|
+
3. **For each file, compares the live `(size, mtime, ctime, inode, dev)` tuple against the cached row:**
|
|
117
|
+
- **Trust the cache** when every field matches *and* the file's mtime is older than the cache's snapshot time (outside the racy window).
|
|
118
|
+
- **Hash-confirm** when the tuple matches but the file's mtime falls inside the racy window. `dirsql` reads and hashes the file; if the hash matches the cached hash, the cache is trusted.
|
|
119
|
+
- **Re-parse** when any field of the tuple differs.
|
|
120
|
+
4. **Deletes** rows for files that were in the cache but are no longer on disk.
|
|
121
|
+
5. **Inserts** rows for files that are on disk but were not in the cache.
|
|
122
|
+
|
|
123
|
+
This is the same algorithm `git status` uses to decide which files have changed since the last index write. The "racy window" handling is what closes the gap when a file is modified within the same filesystem-timestamp resolution as the cache write.
|
|
124
|
+
|
|
125
|
+
## When `dirsql` does a full rebuild
|
|
126
|
+
|
|
127
|
+
Any of the following will cause the cache to be discarded and rebuilt from scratch on the next startup:
|
|
128
|
+
|
|
129
|
+
- The `dirsql` library was upgraded between runs.
|
|
130
|
+
- The glob configuration changed (a new table, a removed table, a modified glob, a changed `ignore` list).
|
|
131
|
+
- A built-in parser version changed (this generally only happens on `dirsql` upgrades).
|
|
132
|
+
- The cache was written for a different root directory than the one currently configured.
|
|
133
|
+
- The internal schema of the cache changed (i.e. you upgraded `dirsql` across a schema version bump).
|
|
134
|
+
|
|
135
|
+
Full rebuilds take exactly as long as a non-persistent startup -- there is no penalty for them, only a missed optimization.
|
|
136
|
+
|
|
137
|
+
## Limitations
|
|
138
|
+
|
|
139
|
+
### Network filesystems
|
|
140
|
+
|
|
141
|
+
NFS, SMB/CIFS, and similar network filesystems cache file attributes on the client and can return stale `stat` results. Persistent mode is **not supported** on network filesystems and may produce stale rows. Use in-memory mode (the default) if your `root` lives on a network mount.
|
|
142
|
+
|
|
143
|
+
### The mtime-preservation edge case
|
|
144
|
+
|
|
145
|
+
Racy-stat detection misses changes only when **all** of the following are true:
|
|
146
|
+
|
|
147
|
+
- A file's contents are modified.
|
|
148
|
+
- The file's size after modification is identical to its size before.
|
|
149
|
+
- The file's `mtime` is externally reset to a value older than the cache's snapshot time (e.g. via `touch -r` or a backup-restore tool that preserves mtime).
|
|
150
|
+
|
|
151
|
+
If you cannot tolerate this edge case, disable persistence (`persist = false`). This is the same trade-off `git` makes with `core.trustctime` / `core.checkStat`.
|
|
152
|
+
|
|
153
|
+
### Single writer
|
|
154
|
+
|
|
155
|
+
Only one `dirsql` process should write to a given cache file at a time. Multiple read-only processes can query the same file safely once the writer finishes the initial reconcile. Coordinated multi-writer access is not supported in v0.3.0.
|
|
156
|
+
|
|
157
|
+
## Inspecting the cache
|
|
158
|
+
|
|
159
|
+
The persistent database is a normal SQLite file. You can open it with any SQLite client:
|
|
160
|
+
|
|
161
|
+
```bash
|
|
162
|
+
sqlite3 .dirsql/cache.db
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
```sql
|
|
166
|
+
.tables
|
|
167
|
+
-- comments documents metrics _dirsql_files _dirsql_meta
|
|
168
|
+
|
|
169
|
+
SELECT * FROM _dirsql_meta;
|
|
170
|
+
-- schema_version | 1
|
|
171
|
+
-- dirsql_version | 0.3.0
|
|
172
|
+
-- glob_config_hash | <hex>
|
|
173
|
+
-- parser_versions | {"json":"1","jsonl":"1","csv":"1",...}
|
|
174
|
+
-- root_canonical | /home/alice/my-project
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
The `_dirsql_files` and `_dirsql_meta` tables are managed by `dirsql`. Do not modify them by hand -- on the next startup, `dirsql` will detect the inconsistency and rebuild from scratch.
|
|
@@ -4,7 +4,7 @@ name = "dirsql-py-ext"
|
|
|
4
4
|
# pypi/maturin handler can rewrite it via `write-version` before
|
|
5
5
|
# `maturin build`. `pyproject.toml` declares `dynamic = ["version"]`
|
|
6
6
|
# and maturin reads this field. Mirrors `packages/rust/Cargo.toml`.
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.3.1"
|
|
8
8
|
edition.workspace = true
|
|
9
9
|
publish = false
|
|
10
10
|
readme = "README.md"
|
|
@@ -32,6 +32,7 @@ export default defineConfig({
|
|
|
32
32
|
{ text: 'Defining Tables', link: '/guide/tables' },
|
|
33
33
|
{ text: 'Querying', link: '/guide/querying' },
|
|
34
34
|
{ text: 'File Watching', link: '/guide/watching' },
|
|
35
|
+
{ text: 'Persistence', link: '/guide/persistence' },
|
|
35
36
|
{ text: 'Async API', link: '/guide/async' },
|
|
36
37
|
{ text: 'Command-Line Interface', link: '/guide/cli' },
|
|
37
38
|
{ text: 'Collaboration with CRDTs', link: '/guide/crdt' }
|