dirsql 0.2.9__tar.gz → 0.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (128) hide show
  1. {dirsql-0.2.9 → dirsql-0.3.1}/Cargo.lock +43 -43
  2. {dirsql-0.2.9 → dirsql-0.3.1}/PKG-INFO +1 -1
  3. {dirsql-0.2.9 → dirsql-0.3.1}/docs/.vitepress/config.ts +1 -0
  4. dirsql-0.3.1/docs/guide/config.md +193 -0
  5. dirsql-0.3.1/docs/guide/persistence.md +177 -0
  6. {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/Cargo.toml +1 -1
  7. {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/docs/.vitepress/config.ts +1 -0
  8. dirsql-0.3.1/packages/python/docs/guide/config.md +193 -0
  9. dirsql-0.3.1/packages/python/docs/guide/persistence.md +177 -0
  10. {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/src/lib.rs +10 -1
  11. {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/tests/integration/test_async_dirsql.py +12 -0
  12. {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/tests/integration/test_binding.py +35 -1
  13. {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/tests/integration/test_docs_examples.py +14 -0
  14. {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/tests/integration/test_docs_gaps.py +31 -152
  15. dirsql-0.3.1/packages/python/tests/integration/test_from_config.py +230 -0
  16. dirsql-0.3.1/packages/python/tests/integration/test_persist.py +300 -0
  17. {dirsql-0.2.9 → dirsql-0.3.1}/packages/rust/Cargo.toml +1 -2
  18. dirsql-0.3.1/packages/rust/docs/guide/config.md +193 -0
  19. dirsql-0.3.1/packages/rust/docs/guide/persistence.md +177 -0
  20. {dirsql-0.2.9 → dirsql-0.3.1}/packages/rust/src/config.rs +84 -179
  21. {dirsql-0.2.9 → dirsql-0.3.1}/packages/rust/src/db.rs +15 -0
  22. {dirsql-0.2.9 → dirsql-0.3.1}/packages/rust/src/lib.rs +504 -123
  23. dirsql-0.3.1/packages/rust/src/persist.rs +603 -0
  24. {dirsql-0.2.9 → dirsql-0.3.1}/packages/rust/src/scanner.rs +33 -1
  25. dirsql-0.3.1/packages/rust/tests/docs_gaps.rs +151 -0
  26. dirsql-0.3.1/packages/rust/tests/from_config.rs +228 -0
  27. dirsql-0.3.1/packages/rust/tests/persist.rs +393 -0
  28. {dirsql-0.2.9 → dirsql-0.3.1}/packages/rust/tests/sdk.rs +10 -5
  29. {dirsql-0.2.9 → dirsql-0.3.1}/python/dirsql/_async.py +17 -1
  30. {dirsql-0.2.9 → dirsql-0.3.1}/python/dirsql/test_async.py +12 -1
  31. dirsql-0.2.9/docs/guide/config.md +0 -205
  32. dirsql-0.2.9/packages/python/docs/guide/config.md +0 -205
  33. dirsql-0.2.9/packages/python/tests/integration/test_from_config.py +0 -361
  34. dirsql-0.2.9/packages/rust/docs/guide/config.md +0 -205
  35. dirsql-0.2.9/packages/rust/src/parser.rs +0 -1507
  36. dirsql-0.2.9/packages/rust/tests/docs_gaps.rs +0 -327
  37. dirsql-0.2.9/packages/rust/tests/from_config.rs +0 -256
  38. {dirsql-0.2.9 → dirsql-0.3.1}/Cargo.toml +0 -0
  39. {dirsql-0.2.9 → dirsql-0.3.1}/README.md +0 -0
  40. {dirsql-0.2.9 → dirsql-0.3.1}/docs/.claude/CLAUDE.md +0 -0
  41. {dirsql-0.2.9 → dirsql-0.3.1}/docs/.vitepress/theme/index.ts +0 -0
  42. {dirsql-0.2.9 → dirsql-0.3.1}/docs/.vitepress/theme/lang.ts +0 -0
  43. {dirsql-0.2.9 → dirsql-0.3.1}/docs/AGENTS.md +0 -0
  44. {dirsql-0.2.9 → dirsql-0.3.1}/docs/api/index.md +0 -0
  45. {dirsql-0.2.9 → dirsql-0.3.1}/docs/getting-started.md +0 -0
  46. {dirsql-0.2.9 → dirsql-0.3.1}/docs/guide/async.md +0 -0
  47. {dirsql-0.2.9 → dirsql-0.3.1}/docs/guide/cli.md +0 -0
  48. {dirsql-0.2.9 → dirsql-0.3.1}/docs/guide/crdt.md +0 -0
  49. {dirsql-0.2.9 → dirsql-0.3.1}/docs/guide/querying.md +0 -0
  50. {dirsql-0.2.9 → dirsql-0.3.1}/docs/guide/tables.md +0 -0
  51. {dirsql-0.2.9 → dirsql-0.3.1}/docs/guide/watching.md +0 -0
  52. {dirsql-0.2.9 → dirsql-0.3.1}/docs/index.md +0 -0
  53. {dirsql-0.2.9 → dirsql-0.3.1}/docs/migrations.md +0 -0
  54. {dirsql-0.2.9 → dirsql-0.3.1}/docs/package.json +0 -0
  55. {dirsql-0.2.9 → dirsql-0.3.1}/docs/playwright.config.ts +0 -0
  56. {dirsql-0.2.9 → dirsql-0.3.1}/docs/pnpm-lock.yaml +0 -0
  57. {dirsql-0.2.9 → dirsql-0.3.1}/docs/pnpm-workspace.yaml +0 -0
  58. {dirsql-0.2.9 → dirsql-0.3.1}/docs/tests/integration/home.spec.ts +0 -0
  59. {dirsql-0.2.9 → dirsql-0.3.1}/docs/tests/integration/language-flag.spec.ts +0 -0
  60. {dirsql-0.2.9 → dirsql-0.3.1}/docs/tests/unit/config.test.ts +0 -0
  61. {dirsql-0.2.9 → dirsql-0.3.1}/docs/tests/unit/lang.test.ts +0 -0
  62. {dirsql-0.2.9 → dirsql-0.3.1}/docs/vitest.config.ts +0 -0
  63. {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/README.md +0 -0
  64. {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/docs/.claude/CLAUDE.md +0 -0
  65. {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/docs/.vitepress/theme/index.ts +0 -0
  66. {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/docs/.vitepress/theme/lang.ts +0 -0
  67. {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/docs/AGENTS.md +0 -0
  68. {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/docs/api/index.md +0 -0
  69. {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/docs/getting-started.md +0 -0
  70. {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/docs/guide/async.md +0 -0
  71. {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/docs/guide/cli.md +0 -0
  72. {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/docs/guide/crdt.md +0 -0
  73. {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/docs/guide/querying.md +0 -0
  74. {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/docs/guide/tables.md +0 -0
  75. {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/docs/guide/watching.md +0 -0
  76. {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/docs/index.md +0 -0
  77. {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/docs/migrations.md +0 -0
  78. {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/docs/package.json +0 -0
  79. {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/docs/playwright.config.ts +0 -0
  80. {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/docs/pnpm-lock.yaml +0 -0
  81. {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/docs/pnpm-workspace.yaml +0 -0
  82. {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/docs/tests/integration/home.spec.ts +0 -0
  83. {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/docs/tests/integration/language-flag.spec.ts +0 -0
  84. {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/docs/tests/unit/config.test.ts +0 -0
  85. {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/docs/tests/unit/lang.test.ts +0 -0
  86. {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/docs/vitest.config.ts +0 -0
  87. {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/python/conftest.py +0 -0
  88. {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/tests/__init__.py +0 -0
  89. {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/tests/conftest.py +0 -0
  90. {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/tests/integration/__init__.py +0 -0
  91. {dirsql-0.2.9 → dirsql-0.3.1}/packages/python/tests/integration/test_dirsql.py +0 -0
  92. {dirsql-0.2.9 → dirsql-0.3.1}/packages/rust/README.md +0 -0
  93. {dirsql-0.2.9 → dirsql-0.3.1}/packages/rust/benches/db_bench.rs +0 -0
  94. {dirsql-0.2.9 → dirsql-0.3.1}/packages/rust/benches/differ_bench.rs +0 -0
  95. {dirsql-0.2.9 → dirsql-0.3.1}/packages/rust/benches/matcher_bench.rs +0 -0
  96. {dirsql-0.2.9 → dirsql-0.3.1}/packages/rust/benches/scanner_bench.rs +0 -0
  97. {dirsql-0.2.9 → dirsql-0.3.1}/packages/rust/docs/api/index.md +0 -0
  98. {dirsql-0.2.9 → dirsql-0.3.1}/packages/rust/docs/getting-started.md +0 -0
  99. {dirsql-0.2.9 → dirsql-0.3.1}/packages/rust/docs/guide/async.md +0 -0
  100. {dirsql-0.2.9 → dirsql-0.3.1}/packages/rust/docs/guide/cli.md +0 -0
  101. {dirsql-0.2.9 → dirsql-0.3.1}/packages/rust/docs/guide/crdt.md +0 -0
  102. {dirsql-0.2.9 → dirsql-0.3.1}/packages/rust/docs/guide/querying.md +0 -0
  103. {dirsql-0.2.9 → dirsql-0.3.1}/packages/rust/docs/guide/tables.md +0 -0
  104. {dirsql-0.2.9 → dirsql-0.3.1}/packages/rust/docs/guide/watching.md +0 -0
  105. {dirsql-0.2.9 → dirsql-0.3.1}/packages/rust/docs/index.md +0 -0
  106. {dirsql-0.2.9 → dirsql-0.3.1}/packages/rust/docs/migrations.md +0 -0
  107. {dirsql-0.2.9 → dirsql-0.3.1}/packages/rust/src/bin/dirsql.rs +0 -0
  108. {dirsql-0.2.9 → dirsql-0.3.1}/packages/rust/src/cli/mod.rs +0 -0
  109. {dirsql-0.2.9 → dirsql-0.3.1}/packages/rust/src/cli/router.rs +0 -0
  110. {dirsql-0.2.9 → dirsql-0.3.1}/packages/rust/src/cli/serialize.rs +0 -0
  111. {dirsql-0.2.9 → dirsql-0.3.1}/packages/rust/src/cli/server.rs +0 -0
  112. {dirsql-0.2.9 → dirsql-0.3.1}/packages/rust/src/differ.rs +0 -0
  113. {dirsql-0.2.9 → dirsql-0.3.1}/packages/rust/src/matcher.rs +0 -0
  114. {dirsql-0.2.9 → dirsql-0.3.1}/packages/rust/src/watcher.rs +0 -0
  115. {dirsql-0.2.9 → dirsql-0.3.1}/packages/rust/tests/async_sdk.rs +0 -0
  116. {dirsql-0.2.9 → dirsql-0.3.1}/packages/rust/tests/cli_e2e.rs +0 -0
  117. {dirsql-0.2.9 → dirsql-0.3.1}/packages/rust/tests/cli_integration.rs +0 -0
  118. {dirsql-0.2.9 → dirsql-0.3.1}/packages/rust/tests/docs_examples.rs +0 -0
  119. {dirsql-0.2.9 → dirsql-0.3.1}/packages/rust/tests/readonly_query.rs +0 -0
  120. {dirsql-0.2.9 → dirsql-0.3.1}/pyproject.toml +0 -0
  121. {dirsql-0.2.9 → dirsql-0.3.1}/python/dirsql/__init__.py +0 -0
  122. {dirsql-0.2.9 → dirsql-0.3.1}/python/dirsql/_cli/__init__.py +0 -0
  123. {dirsql-0.2.9 → dirsql-0.3.1}/python/dirsql/_cli/binary_path.py +0 -0
  124. {dirsql-0.2.9 → dirsql-0.3.1}/python/dirsql/_cli/binary_path_test.py +0 -0
  125. {dirsql-0.2.9 → dirsql-0.3.1}/python/dirsql/_cli/is_windows.py +0 -0
  126. {dirsql-0.2.9 → dirsql-0.3.1}/python/dirsql/_cli/is_windows_test.py +0 -0
  127. {dirsql-0.2.9 → dirsql-0.3.1}/python/dirsql/_cli/main.py +0 -0
  128. {dirsql-0.2.9 → dirsql-0.3.1}/python/dirsql/_cli/main_test.py +0 -0
@@ -73,6 +73,18 @@ version = "1.0.102"
73
73
  source = "registry+https://github.com/rust-lang/crates.io-index"
74
74
  checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c"
75
75
 
76
+ [[package]]
77
+ name = "arrayref"
78
+ version = "0.3.9"
79
+ source = "registry+https://github.com/rust-lang/crates.io-index"
80
+ checksum = "76a2e8124351fda1ef8aaaa3bbd7ebbcb486bbcd4225aca0aa0d84bb2db8fecb"
81
+
82
+ [[package]]
83
+ name = "arrayvec"
84
+ version = "0.7.6"
85
+ source = "registry+https://github.com/rust-lang/crates.io-index"
86
+ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
87
+
76
88
  [[package]]
77
89
  name = "assert_cmd"
78
90
  version = "2.2.1"
@@ -176,6 +188,20 @@ version = "2.11.1"
176
188
  source = "registry+https://github.com/rust-lang/crates.io-index"
177
189
  checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3"
178
190
 
191
+ [[package]]
192
+ name = "blake3"
193
+ version = "1.8.4"
194
+ source = "registry+https://github.com/rust-lang/crates.io-index"
195
+ checksum = "4d2d5991425dfd0785aed03aedcf0b321d61975c9b5b3689c774a2610ae0b51e"
196
+ dependencies = [
197
+ "arrayref",
198
+ "arrayvec",
199
+ "cc",
200
+ "cfg-if",
201
+ "constant_time_eq",
202
+ "cpufeatures",
203
+ ]
204
+
179
205
  [[package]]
180
206
  name = "bstr"
181
207
  version = "1.12.1"
@@ -300,6 +326,12 @@ version = "1.0.5"
300
326
  source = "registry+https://github.com/rust-lang/crates.io-index"
301
327
  checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570"
302
328
 
329
+ [[package]]
330
+ name = "constant_time_eq"
331
+ version = "0.4.2"
332
+ source = "registry+https://github.com/rust-lang/crates.io-index"
333
+ checksum = "3d52eff69cd5e647efe296129160853a42795992097e8af39800e1060caeea9b"
334
+
303
335
  [[package]]
304
336
  name = "convert_case"
305
337
  version = "0.11.0"
@@ -325,6 +357,15 @@ version = "0.8.7"
325
357
  source = "registry+https://github.com/rust-lang/crates.io-index"
326
358
  checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
327
359
 
360
+ [[package]]
361
+ name = "cpufeatures"
362
+ version = "0.3.0"
363
+ source = "registry+https://github.com/rust-lang/crates.io-index"
364
+ checksum = "8b2a41393f66f16b0823bb79094d54ac5fbd34ab292ddafb9a0456ac9f87d201"
365
+ dependencies = [
366
+ "libc",
367
+ ]
368
+
328
369
  [[package]]
329
370
  name = "criterion"
330
371
  version = "0.5.1"
@@ -392,27 +433,6 @@ version = "0.2.4"
392
433
  source = "registry+https://github.com/rust-lang/crates.io-index"
393
434
  checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5"
394
435
 
395
- [[package]]
396
- name = "csv"
397
- version = "1.4.0"
398
- source = "registry+https://github.com/rust-lang/crates.io-index"
399
- checksum = "52cd9d68cf7efc6ddfaaee42e7288d3a99d613d4b50f76ce9827ae0c6e14f938"
400
- dependencies = [
401
- "csv-core",
402
- "itoa",
403
- "ryu",
404
- "serde_core",
405
- ]
406
-
407
- [[package]]
408
- name = "csv-core"
409
- version = "0.1.13"
410
- source = "registry+https://github.com/rust-lang/crates.io-index"
411
- checksum = "704a3c26996a80471189265814dbc2c257598b96b8a7feae2d31ace646bb9782"
412
- dependencies = [
413
- "memchr",
414
- ]
415
-
416
436
  [[package]]
417
437
  name = "ctor"
418
438
  version = "0.10.0"
@@ -441,9 +461,9 @@ version = "0.2.7"
441
461
  dependencies = [
442
462
  "assert_cmd",
443
463
  "axum",
464
+ "blake3",
444
465
  "clap",
445
466
  "criterion",
446
- "csv",
447
467
  "eventsource-client",
448
468
  "futures",
449
469
  "futures-channel",
@@ -458,7 +478,6 @@ dependencies = [
458
478
  "rusqlite",
459
479
  "serde",
460
480
  "serde_json",
461
- "serde_yaml",
462
481
  "tempfile",
463
482
  "thiserror",
464
483
  "tokio",
@@ -480,7 +499,7 @@ dependencies = [
480
499
 
481
500
  [[package]]
482
501
  name = "dirsql-py-ext"
483
- version = "0.2.9"
502
+ version = "0.3.1"
484
503
  dependencies = [
485
504
  "dirsql",
486
505
  "pyo3",
@@ -2188,19 +2207,6 @@ dependencies = [
2188
2207
  "serde",
2189
2208
  ]
2190
2209
 
2191
- [[package]]
2192
- name = "serde_yaml"
2193
- version = "0.9.34+deprecated"
2194
- source = "registry+https://github.com/rust-lang/crates.io-index"
2195
- checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47"
2196
- dependencies = [
2197
- "indexmap",
2198
- "itoa",
2199
- "ryu",
2200
- "serde",
2201
- "unsafe-libyaml",
2202
- ]
2203
-
2204
2210
  [[package]]
2205
2211
  name = "shlex"
2206
2212
  version = "1.3.0"
@@ -2591,12 +2597,6 @@ version = "0.2.6"
2591
2597
  source = "registry+https://github.com/rust-lang/crates.io-index"
2592
2598
  checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853"
2593
2599
 
2594
- [[package]]
2595
- name = "unsafe-libyaml"
2596
- version = "0.2.11"
2597
- source = "registry+https://github.com/rust-lang/crates.io-index"
2598
- checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861"
2599
-
2600
2600
  [[package]]
2601
2601
  name = "untrusted"
2602
2602
  version = "0.9.0"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dirsql
3
- Version: 0.2.9
3
+ Version: 0.3.1
4
4
  Requires-Dist: pytest>=8 ; extra == 'dev'
5
5
  Requires-Dist: pytest-describe>=2 ; extra == 'dev'
6
6
  Requires-Dist: pytest-asyncio>=0.23 ; extra == 'dev'
@@ -32,6 +32,7 @@ export default defineConfig({
32
32
  { text: 'Defining Tables', link: '/guide/tables' },
33
33
  { text: 'Querying', link: '/guide/querying' },
34
34
  { text: 'File Watching', link: '/guide/watching' },
35
+ { text: 'Persistence', link: '/guide/persistence' },
35
36
  { text: 'Async API', link: '/guide/async' },
36
37
  { text: 'Command-Line Interface', link: '/guide/cli' },
37
38
  { text: 'Collaboration with CRDTs', link: '/guide/crdt' }
@@ -0,0 +1,193 @@
1
+ ---
2
+ canonical: https://thekevinscott.github.io/dirsql/guide/config
3
+ ---
4
+
5
+ # Configuration File
6
+
7
+ > Online: <https://thekevinscott.github.io/dirsql/guide/config>
8
+
9
+ `dirsql` can be configured with a `.dirsql.toml` file. Tables defined this
10
+ way produce **one row per matched file**. Each row's columns come from
11
+ filesystem facts:
12
+
13
+ - **Glob path captures** — named `{placeholder}` segments in the glob.
14
+ - **Stat virtuals** — reserved `_`-prefixed columns for path-derived and
15
+ stat-derived metadata.
16
+
17
+ Content interpretation (parsing JSON, CSV, frontmatter, etc.) is **not**
18
+ configured in `.dirsql.toml`. If you need columns derived from file
19
+ contents, register a programmatic [`Table`](./tables.md) whose `extract`
20
+ function does the parsing in your host language.
21
+
22
+ ## Basic Example
23
+
24
+ ```toml
25
+ [dirsql]
26
+ ignore = ["node_modules/**", ".git/**"]
27
+
28
+ [[table]]
29
+ ddl = "CREATE TABLE posts (_path TEXT, _basename TEXT, _size INTEGER, _mtime INTEGER)"
30
+ glob = "posts/*.md"
31
+ ```
32
+
33
+ Each `posts/*.md` file produces one row. The DDL declares which stat
34
+ virtuals are surfaced as SQL columns.
35
+
36
+ ## Loading a Config File
37
+
38
+ Pass the config file path to the `DirSQL` constructor:
39
+
40
+ ::: code-group
41
+
42
+ ```python [Python]
43
+ from dirsql import DirSQL
44
+
45
+ db = DirSQL(config="./my-project/.dirsql.toml")
46
+ await db.ready()
47
+ ```
48
+
49
+ ```rust [Rust]
50
+ use dirsql::DirSQL;
51
+
52
+ let db = DirSQL::builder()
53
+ .config("./my-project/.dirsql.toml")
54
+ .build()?;
55
+ ```
56
+
57
+ ```typescript [TypeScript]
58
+ import { DirSQL } from "dirsql";
59
+
60
+ // String argument is interpreted as a config file path.
61
+ const db = new DirSQL("./my-project/.dirsql.toml");
62
+ await db.ready;
63
+ ```
64
+
65
+ :::
66
+
67
+ By default, the root directory scanned is the config file's parent
68
+ directory. Override it by passing `root` explicitly (the explicit value
69
+ wins and a warning is emitted) or by declaring `[dirsql].root` in the
70
+ config file itself.
71
+
72
+ ## Root Directory
73
+
74
+ By default, the config file's parent directory is the scan root. To index
75
+ a different location, declare `[dirsql].root` (relative paths are resolved
76
+ relative to the config file's parent):
77
+
78
+ ```toml
79
+ [dirsql]
80
+ root = "../data"
81
+ ignore = ["node_modules/**"]
82
+ ```
83
+
84
+ ## Stat Virtuals
85
+
86
+ Every config-defined table can expose any of these reserved columns. Add
87
+ the ones you want to your DDL; the rest are silently dropped.
88
+
89
+ | Column | Type | Source |
90
+ |--------|---------|--------|
91
+ | `_path` | TEXT | The file's path relative to the scan root. |
92
+ | `_basename` | TEXT | The filename including extension. |
93
+ | `_dir` | TEXT | The parent directory path (relative to root). |
94
+ | `_ext` | TEXT | The file extension, lowercased, no leading dot. |
95
+ | `_size` | INTEGER | Size in bytes. |
96
+ | `_mtime` | INTEGER | Last-modified time, unix seconds. |
97
+ | `_ctime` | INTEGER | Created/changed time, unix seconds. |
98
+
99
+ Example query:
100
+
101
+ ```sql
102
+ SELECT _basename, _size
103
+ FROM posts
104
+ WHERE _mtime > strftime('%s', '2024-01-01')
105
+ ORDER BY _mtime DESC;
106
+ ```
107
+
108
+ ## Path Captures
109
+
110
+ Use `{name}` in glob patterns to extract path segments as columns. Add a
111
+ matching column name to the DDL and the capture is auto-populated:
112
+
113
+ ```toml
114
+ [[table]]
115
+ ddl = "CREATE TABLE comments (thread_id TEXT, _basename TEXT, _mtime INTEGER)"
116
+ glob = "_comments/{thread_id}/*.jsonl"
117
+ ```
118
+
119
+ A file at `_comments/abc123/2024-05-05.jsonl` produces a row with
120
+ `thread_id = "abc123"`, `_basename = "2024-05-05.jsonl"`, and `_mtime` set
121
+ to the file's modification time.
122
+
123
+ ## Ignore Patterns
124
+
125
+ The `ignore` list skips files and directories entirely (not even scanned):
126
+
127
+ ```toml
128
+ [dirsql]
129
+ ignore = ["node_modules/**", ".git/**", "*.pyc", "__pycache__/**"]
130
+ ```
131
+
132
+ The top-level `.dirsql/` directory is always excluded, whether you list it
133
+ or not — it is a reserved namespace for `dirsql`'s own metadata (see
134
+ [Persistence](./persistence.md)).
135
+
136
+ ## Persistence
137
+
138
+ Set `persist = true` to keep the SQLite database on disk between runs
139
+ instead of rebuilding from scratch on every startup:
140
+
141
+ ```toml
142
+ [dirsql]
143
+ persist = true
144
+ # persist_path = ".dirsql/cache.db" # optional; this is the default
145
+ ```
146
+
147
+ See [Persistence](./persistence.md) for the full reconcile algorithm,
148
+ storage layout, and limitations.
149
+
150
+ ## Strict Mode
151
+
152
+ By default, auto-injected virtuals that aren't in the DDL are silently
153
+ dropped, and undeclared user-extract keys are dropped. Enable strict mode
154
+ to error when an extract emits keys not declared in the DDL:
155
+
156
+ ```toml
157
+ [[table]]
158
+ ddl = "CREATE TABLE comments (thread_id TEXT)"
159
+ glob = "_comments/{thread_id}/*.jsonl"
160
+ strict = true
161
+ ```
162
+
163
+ Strict mode does **not** apply to auto-injected stat virtuals — those are
164
+ always filtered to the DDL's declared columns regardless. Strict mode
165
+ applies only to keys produced by an extract callback (relevant for
166
+ programmatic [tables](./tables.md)).
167
+
168
+ ## Full Example
169
+
170
+ ```toml
171
+ [dirsql]
172
+ ignore = ["node_modules/**", ".git/**", "dist/**"]
173
+
174
+ [[table]]
175
+ ddl = "CREATE TABLE comments (thread_id TEXT, _basename TEXT, _mtime INTEGER)"
176
+ glob = "_comments/{thread_id}/*.jsonl"
177
+
178
+ [[table]]
179
+ ddl = "CREATE TABLE documents (_path TEXT, _basename TEXT, _size INTEGER)"
180
+ glob = "**/index.md"
181
+
182
+ [[table]]
183
+ ddl = "CREATE TABLE logs (_path TEXT, _size INTEGER, _mtime INTEGER)"
184
+ glob = "logs/*.csv"
185
+ ```
186
+
187
+ ## When you need parsed content
188
+
189
+ `.dirsql.toml` does not parse file contents. For columns derived from the
190
+ *inside* of files (frontmatter keys, JSON values, CSV cells, etc.),
191
+ register a programmatic [`Table`](./tables.md) instead, and parse the
192
+ bytes in your host language. Glob captures and stat virtuals are still
193
+ auto-injected into rows produced by your extract.
@@ -0,0 +1,177 @@
1
+ # Persistence
2
+
3
+ By default `dirsql` keeps its SQLite database in memory and rebuilds it from scratch every time the process starts. For large directories this can take seconds to minutes -- nearly all of which is spent re-parsing files that haven't changed since the previous run.
4
+
5
+ Persistence stores the SQLite database on disk so that subsequent startups only re-parse the files that have actually changed.
6
+
7
+ ::: tip Same answers, faster startup
8
+ The rows returned by `query()` after a persistent startup are equivalent to those produced by a from-scratch rebuild. Persistence is a startup-time optimization, not a correctness compromise. The reconcile algorithm is the same one `git status` uses to decide which files have changed since the last index write.
9
+ :::
10
+
11
+ ## Quick start
12
+
13
+ ::: code-group
14
+
15
+ ```toml [.dirsql.toml]
16
+ [dirsql]
17
+ persist = true
18
+ ```
19
+
20
+ ```python [Python]
21
+ from dirsql import DirSQL
22
+
23
+ db = DirSQL("./my-project", tables=[...], persist=True)
24
+ await db.ready()
25
+ ```
26
+
27
+ ```rust [Rust]
28
+ use dirsql::DirSQL;
29
+
30
+ let db = DirSQL::builder()
31
+ .root("./my-project")
32
+ .tables(vec![/* ... */])
33
+ .persist(true)
34
+ .build()?;
35
+ ```
36
+
37
+ ```typescript [TypeScript]
38
+ import { DirSQL } from "dirsql";
39
+
40
+ const db = new DirSQL({ root: "./my-project", tables: [/* ... */], persist: true });
41
+ await db.ready;
42
+ ```
43
+
44
+ :::
45
+
46
+ That's it. The first run writes the database to `./my-project/.dirsql/cache.db`. Every subsequent startup uses the cache.
47
+
48
+ ## Configuration
49
+
50
+ | Option | Type | Default | Meaning |
51
+ |---|---|---|---|
52
+ | `persist` | boolean | `false` | Enable persistent on-disk storage. |
53
+ | `persist_path` (Python, Rust) / `persistPath` (TypeScript) | string | `<root>/.dirsql/cache.db` | Override the database file path. Ignored when `persist` is `false`. |
54
+
55
+ The default location keeps the cache alongside the data it indexes, which means it follows the project around (clone, copy, move) without extra setup. Override `persist_path` if you want the cache somewhere else -- a CI cache directory, a tmpfs mount, an XDG cache dir, etc.
56
+
57
+ ::: code-group
58
+
59
+ ```toml [.dirsql.toml]
60
+ [dirsql]
61
+ persist = true
62
+ persist_path = "/var/cache/dirsql/myproject.db"
63
+ ```
64
+
65
+ ```python [Python]
66
+ db = DirSQL(
67
+ "./my-project",
68
+ tables=[...],
69
+ persist=True,
70
+ persist_path="/var/cache/dirsql/myproject.db",
71
+ )
72
+ ```
73
+
74
+ ```rust [Rust]
75
+ let db = DirSQL::builder()
76
+ .root("./my-project")
77
+ .tables(vec![/* ... */])
78
+ .persist(true)
79
+ .persist_path("/var/cache/dirsql/myproject.db")
80
+ .build()?;
81
+ ```
82
+
83
+ ```typescript [TypeScript]
84
+ const db = new DirSQL({
85
+ root: "./my-project",
86
+ tables: [/* ... */],
87
+ persist: true,
88
+ persistPath: "/var/cache/dirsql/myproject.db",
89
+ });
90
+ ```
91
+
92
+ :::
93
+
94
+ ## The `.dirsql/` directory
95
+
96
+ `dirsql` reserves the top-level `.dirsql/` directory inside every scanned root. It is **unconditionally excluded from the directory walk**, whether persistence is enabled or not. This means:
97
+
98
+ - The default cache path `<root>/.dirsql/cache.db` cannot accidentally be ingested as a data file.
99
+ - You can place additional `dirsql`-related files in `.dirsql/` (e.g. a project-local config snapshot) without them being parsed.
100
+ - You should not put your own data files in `.dirsql/` -- they will be silently ignored.
101
+
102
+ If you persist into `.dirsql/`, add it to your `.gitignore`:
103
+
104
+ ```
105
+ .dirsql/
106
+ ```
107
+
108
+ The cache file should never be committed -- it is reproducible from the source tree and frequently large.
109
+
110
+ ## How the startup reconcile works
111
+
112
+ When a persistent cache exists, `dirsql` does not blindly trust it. On startup it:
113
+
114
+ 1. **Checks compatibility metadata.** If the cached `dirsql` version, schema version, glob configuration, parser versions, or canonical root path differs from the current build, the cache is wiped and rebuilt from scratch.
115
+ 2. **Walks the tree and stats every matching file.** This is metadata-only -- no file contents are read.
116
+ 3. **For each file, compares the live `(size, mtime, ctime, inode, dev)` tuple against the cached row:**
117
+ - **Trust the cache** when every field matches *and* the file's mtime is older than the cache's snapshot time (outside the racy window).
118
+ - **Hash-confirm** when the tuple matches but the file's mtime falls inside the racy window. `dirsql` reads and hashes the file; if the hash matches the cached hash, the cache is trusted.
119
+ - **Re-parse** when any field of the tuple differs.
120
+ 4. **Deletes** rows for files that were in the cache but are no longer on disk.
121
+ 5. **Inserts** rows for files that are on disk but were not in the cache.
122
+
123
+ This is the same algorithm `git status` uses to decide which files have changed since the last index write. The "racy window" handling is what closes the gap when a file is modified within the same filesystem-timestamp resolution as the cache write.
124
+
125
+ ## When `dirsql` does a full rebuild
126
+
127
+ Any of the following will cause the cache to be discarded and rebuilt from scratch on the next startup:
128
+
129
+ - The `dirsql` library was upgraded between runs.
130
+ - The glob configuration changed (a new table, a removed table, a modified glob, a changed `ignore` list).
131
+ - A built-in parser version changed (this generally only happens on `dirsql` upgrades).
132
+ - The cache was written for a different root directory than the one currently configured.
133
+ - The internal schema of the cache changed (i.e. you upgraded `dirsql` across a schema version bump).
134
+
135
+ Full rebuilds take exactly as long as a non-persistent startup -- there is no penalty for them, only a missed optimization.
136
+
137
+ ## Limitations
138
+
139
+ ### Network filesystems
140
+
141
+ NFS, SMB/CIFS, and similar network filesystems cache file attributes on the client and can return stale `stat` results. Persistent mode is **not supported** on network filesystems and may produce stale rows. Use in-memory mode (the default) if your `root` lives on a network mount.
142
+
143
+ ### The mtime-preservation edge case
144
+
145
+ Racy-stat detection misses changes only when **all** of the following are true:
146
+
147
+ - A file's contents are modified.
148
+ - The file's size after modification is identical to its size before.
149
+ - The file's `mtime` is externally reset to a value older than the cache's snapshot time (e.g. via `touch -r` or a backup-restore tool that preserves mtime).
150
+
151
+ If you cannot tolerate this edge case, disable persistence (`persist = false`). This is the same trade-off `git` makes with `core.trustctime` / `core.checkStat`.
152
+
153
+ ### Single writer
154
+
155
+ Only one `dirsql` process should write to a given cache file at a time. Multiple read-only processes can query the same file safely once the writer finishes the initial reconcile. Coordinated multi-writer access is not supported in v0.3.0.
156
+
157
+ ## Inspecting the cache
158
+
159
+ The persistent database is a normal SQLite file. You can open it with any SQLite client:
160
+
161
+ ```bash
162
+ sqlite3 .dirsql/cache.db
163
+ ```
164
+
165
+ ```sql
166
+ .tables
167
+ -- comments documents metrics _dirsql_files _dirsql_meta
168
+
169
+ SELECT * FROM _dirsql_meta;
170
+ -- schema_version | 1
171
+ -- dirsql_version | 0.3.0
172
+ -- glob_config_hash | <hex>
173
+ -- parser_versions | {"json":"1","jsonl":"1","csv":"1",...}
174
+ -- root_canonical | /home/alice/my-project
175
+ ```
176
+
177
+ The `_dirsql_files` and `_dirsql_meta` tables are managed by `dirsql`. Do not modify them by hand -- on the next startup, `dirsql` will detect the inconsistency and rebuild from scratch.
@@ -4,7 +4,7 @@ name = "dirsql-py-ext"
4
4
  # pypi/maturin handler can rewrite it via `write-version` before
5
5
  # `maturin build`. `pyproject.toml` declares `dynamic = ["version"]`
6
6
  # and maturin reads this field. Mirrors `packages/rust/Cargo.toml`.
7
- version = "0.2.9"
7
+ version = "0.3.1"
8
8
  edition.workspace = true
9
9
  publish = false
10
10
  readme = "README.md"
@@ -32,6 +32,7 @@ export default defineConfig({
32
32
  { text: 'Defining Tables', link: '/guide/tables' },
33
33
  { text: 'Querying', link: '/guide/querying' },
34
34
  { text: 'File Watching', link: '/guide/watching' },
35
+ { text: 'Persistence', link: '/guide/persistence' },
35
36
  { text: 'Async API', link: '/guide/async' },
36
37
  { text: 'Command-Line Interface', link: '/guide/cli' },
37
38
  { text: 'Collaboration with CRDTs', link: '/guide/crdt' }