dirsql 0.3.0__tar.gz → 0.3.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (128) hide show
  1. {dirsql-0.3.0 → dirsql-0.3.2}/Cargo.lock +1 -43
  2. {dirsql-0.3.0 → dirsql-0.3.2}/PKG-INFO +1 -1
  3. dirsql-0.3.2/docs/guide/config.md +193 -0
  4. {dirsql-0.3.0 → dirsql-0.3.2}/packages/python/Cargo.toml +1 -1
  5. dirsql-0.3.2/packages/python/docs/guide/config.md +193 -0
  6. {dirsql-0.3.0 → dirsql-0.3.2}/packages/python/tests/integration/test_docs_gaps.py +31 -152
  7. dirsql-0.3.2/packages/python/tests/integration/test_from_config.py +230 -0
  8. {dirsql-0.3.0 → dirsql-0.3.2}/packages/rust/Cargo.toml +0 -2
  9. dirsql-0.3.2/packages/rust/docs/guide/config.md +193 -0
  10. {dirsql-0.3.0 → dirsql-0.3.2}/packages/rust/src/config.rs +40 -181
  11. {dirsql-0.3.0 → dirsql-0.3.2}/packages/rust/src/lib.rs +129 -58
  12. {dirsql-0.3.0 → dirsql-0.3.2}/packages/rust/tests/cli_e2e.rs +14 -20
  13. {dirsql-0.3.0 → dirsql-0.3.2}/packages/rust/tests/cli_integration.rs +31 -23
  14. dirsql-0.3.2/packages/rust/tests/docs_gaps.rs +151 -0
  15. dirsql-0.3.2/packages/rust/tests/from_config.rs +228 -0
  16. {dirsql-0.3.0 → dirsql-0.3.2}/packages/rust/tests/sdk.rs +4 -2
  17. dirsql-0.3.0/docs/guide/config.md +0 -219
  18. dirsql-0.3.0/packages/python/docs/guide/config.md +0 -219
  19. dirsql-0.3.0/packages/python/tests/integration/test_from_config.py +0 -361
  20. dirsql-0.3.0/packages/rust/docs/guide/config.md +0 -219
  21. dirsql-0.3.0/packages/rust/src/parser.rs +0 -1507
  22. dirsql-0.3.0/packages/rust/tests/docs_gaps.rs +0 -327
  23. dirsql-0.3.0/packages/rust/tests/from_config.rs +0 -256
  24. {dirsql-0.3.0 → dirsql-0.3.2}/Cargo.toml +0 -0
  25. {dirsql-0.3.0 → dirsql-0.3.2}/README.md +0 -0
  26. {dirsql-0.3.0 → dirsql-0.3.2}/docs/.claude/CLAUDE.md +0 -0
  27. {dirsql-0.3.0 → dirsql-0.3.2}/docs/.vitepress/config.ts +0 -0
  28. {dirsql-0.3.0 → dirsql-0.3.2}/docs/.vitepress/theme/index.ts +0 -0
  29. {dirsql-0.3.0 → dirsql-0.3.2}/docs/.vitepress/theme/lang.ts +0 -0
  30. {dirsql-0.3.0 → dirsql-0.3.2}/docs/AGENTS.md +0 -0
  31. {dirsql-0.3.0 → dirsql-0.3.2}/docs/api/index.md +0 -0
  32. {dirsql-0.3.0 → dirsql-0.3.2}/docs/getting-started.md +0 -0
  33. {dirsql-0.3.0 → dirsql-0.3.2}/docs/guide/async.md +0 -0
  34. {dirsql-0.3.0 → dirsql-0.3.2}/docs/guide/cli.md +0 -0
  35. {dirsql-0.3.0 → dirsql-0.3.2}/docs/guide/crdt.md +0 -0
  36. {dirsql-0.3.0 → dirsql-0.3.2}/docs/guide/persistence.md +0 -0
  37. {dirsql-0.3.0 → dirsql-0.3.2}/docs/guide/querying.md +0 -0
  38. {dirsql-0.3.0 → dirsql-0.3.2}/docs/guide/tables.md +0 -0
  39. {dirsql-0.3.0 → dirsql-0.3.2}/docs/guide/watching.md +0 -0
  40. {dirsql-0.3.0 → dirsql-0.3.2}/docs/index.md +0 -0
  41. {dirsql-0.3.0 → dirsql-0.3.2}/docs/migrations.md +0 -0
  42. {dirsql-0.3.0 → dirsql-0.3.2}/docs/package.json +0 -0
  43. {dirsql-0.3.0 → dirsql-0.3.2}/docs/playwright.config.ts +0 -0
  44. {dirsql-0.3.0 → dirsql-0.3.2}/docs/pnpm-lock.yaml +0 -0
  45. {dirsql-0.3.0 → dirsql-0.3.2}/docs/pnpm-workspace.yaml +0 -0
  46. {dirsql-0.3.0 → dirsql-0.3.2}/docs/tests/integration/home.spec.ts +0 -0
  47. {dirsql-0.3.0 → dirsql-0.3.2}/docs/tests/integration/language-flag.spec.ts +0 -0
  48. {dirsql-0.3.0 → dirsql-0.3.2}/docs/tests/unit/config.test.ts +0 -0
  49. {dirsql-0.3.0 → dirsql-0.3.2}/docs/tests/unit/lang.test.ts +0 -0
  50. {dirsql-0.3.0 → dirsql-0.3.2}/docs/vitest.config.ts +0 -0
  51. {dirsql-0.3.0 → dirsql-0.3.2}/packages/python/README.md +0 -0
  52. {dirsql-0.3.0 → dirsql-0.3.2}/packages/python/docs/.claude/CLAUDE.md +0 -0
  53. {dirsql-0.3.0 → dirsql-0.3.2}/packages/python/docs/.vitepress/config.ts +0 -0
  54. {dirsql-0.3.0 → dirsql-0.3.2}/packages/python/docs/.vitepress/theme/index.ts +0 -0
  55. {dirsql-0.3.0 → dirsql-0.3.2}/packages/python/docs/.vitepress/theme/lang.ts +0 -0
  56. {dirsql-0.3.0 → dirsql-0.3.2}/packages/python/docs/AGENTS.md +0 -0
  57. {dirsql-0.3.0 → dirsql-0.3.2}/packages/python/docs/api/index.md +0 -0
  58. {dirsql-0.3.0 → dirsql-0.3.2}/packages/python/docs/getting-started.md +0 -0
  59. {dirsql-0.3.0 → dirsql-0.3.2}/packages/python/docs/guide/async.md +0 -0
  60. {dirsql-0.3.0 → dirsql-0.3.2}/packages/python/docs/guide/cli.md +0 -0
  61. {dirsql-0.3.0 → dirsql-0.3.2}/packages/python/docs/guide/crdt.md +0 -0
  62. {dirsql-0.3.0 → dirsql-0.3.2}/packages/python/docs/guide/persistence.md +0 -0
  63. {dirsql-0.3.0 → dirsql-0.3.2}/packages/python/docs/guide/querying.md +0 -0
  64. {dirsql-0.3.0 → dirsql-0.3.2}/packages/python/docs/guide/tables.md +0 -0
  65. {dirsql-0.3.0 → dirsql-0.3.2}/packages/python/docs/guide/watching.md +0 -0
  66. {dirsql-0.3.0 → dirsql-0.3.2}/packages/python/docs/index.md +0 -0
  67. {dirsql-0.3.0 → dirsql-0.3.2}/packages/python/docs/migrations.md +0 -0
  68. {dirsql-0.3.0 → dirsql-0.3.2}/packages/python/docs/package.json +0 -0
  69. {dirsql-0.3.0 → dirsql-0.3.2}/packages/python/docs/playwright.config.ts +0 -0
  70. {dirsql-0.3.0 → dirsql-0.3.2}/packages/python/docs/pnpm-lock.yaml +0 -0
  71. {dirsql-0.3.0 → dirsql-0.3.2}/packages/python/docs/pnpm-workspace.yaml +0 -0
  72. {dirsql-0.3.0 → dirsql-0.3.2}/packages/python/docs/tests/integration/home.spec.ts +0 -0
  73. {dirsql-0.3.0 → dirsql-0.3.2}/packages/python/docs/tests/integration/language-flag.spec.ts +0 -0
  74. {dirsql-0.3.0 → dirsql-0.3.2}/packages/python/docs/tests/unit/config.test.ts +0 -0
  75. {dirsql-0.3.0 → dirsql-0.3.2}/packages/python/docs/tests/unit/lang.test.ts +0 -0
  76. {dirsql-0.3.0 → dirsql-0.3.2}/packages/python/docs/vitest.config.ts +0 -0
  77. {dirsql-0.3.0 → dirsql-0.3.2}/packages/python/python/conftest.py +0 -0
  78. {dirsql-0.3.0 → dirsql-0.3.2}/packages/python/src/lib.rs +0 -0
  79. {dirsql-0.3.0 → dirsql-0.3.2}/packages/python/tests/__init__.py +0 -0
  80. {dirsql-0.3.0 → dirsql-0.3.2}/packages/python/tests/conftest.py +0 -0
  81. {dirsql-0.3.0 → dirsql-0.3.2}/packages/python/tests/integration/__init__.py +0 -0
  82. {dirsql-0.3.0 → dirsql-0.3.2}/packages/python/tests/integration/test_async_dirsql.py +0 -0
  83. {dirsql-0.3.0 → dirsql-0.3.2}/packages/python/tests/integration/test_binding.py +0 -0
  84. {dirsql-0.3.0 → dirsql-0.3.2}/packages/python/tests/integration/test_dirsql.py +0 -0
  85. {dirsql-0.3.0 → dirsql-0.3.2}/packages/python/tests/integration/test_docs_examples.py +0 -0
  86. {dirsql-0.3.0 → dirsql-0.3.2}/packages/python/tests/integration/test_persist.py +0 -0
  87. {dirsql-0.3.0 → dirsql-0.3.2}/packages/rust/README.md +0 -0
  88. {dirsql-0.3.0 → dirsql-0.3.2}/packages/rust/benches/db_bench.rs +0 -0
  89. {dirsql-0.3.0 → dirsql-0.3.2}/packages/rust/benches/differ_bench.rs +0 -0
  90. {dirsql-0.3.0 → dirsql-0.3.2}/packages/rust/benches/matcher_bench.rs +0 -0
  91. {dirsql-0.3.0 → dirsql-0.3.2}/packages/rust/benches/scanner_bench.rs +0 -0
  92. {dirsql-0.3.0 → dirsql-0.3.2}/packages/rust/docs/api/index.md +0 -0
  93. {dirsql-0.3.0 → dirsql-0.3.2}/packages/rust/docs/getting-started.md +0 -0
  94. {dirsql-0.3.0 → dirsql-0.3.2}/packages/rust/docs/guide/async.md +0 -0
  95. {dirsql-0.3.0 → dirsql-0.3.2}/packages/rust/docs/guide/cli.md +0 -0
  96. {dirsql-0.3.0 → dirsql-0.3.2}/packages/rust/docs/guide/crdt.md +0 -0
  97. {dirsql-0.3.0 → dirsql-0.3.2}/packages/rust/docs/guide/persistence.md +0 -0
  98. {dirsql-0.3.0 → dirsql-0.3.2}/packages/rust/docs/guide/querying.md +0 -0
  99. {dirsql-0.3.0 → dirsql-0.3.2}/packages/rust/docs/guide/tables.md +0 -0
  100. {dirsql-0.3.0 → dirsql-0.3.2}/packages/rust/docs/guide/watching.md +0 -0
  101. {dirsql-0.3.0 → dirsql-0.3.2}/packages/rust/docs/index.md +0 -0
  102. {dirsql-0.3.0 → dirsql-0.3.2}/packages/rust/docs/migrations.md +0 -0
  103. {dirsql-0.3.0 → dirsql-0.3.2}/packages/rust/src/bin/dirsql.rs +0 -0
  104. {dirsql-0.3.0 → dirsql-0.3.2}/packages/rust/src/cli/mod.rs +0 -0
  105. {dirsql-0.3.0 → dirsql-0.3.2}/packages/rust/src/cli/router.rs +0 -0
  106. {dirsql-0.3.0 → dirsql-0.3.2}/packages/rust/src/cli/serialize.rs +0 -0
  107. {dirsql-0.3.0 → dirsql-0.3.2}/packages/rust/src/cli/server.rs +0 -0
  108. {dirsql-0.3.0 → dirsql-0.3.2}/packages/rust/src/db.rs +0 -0
  109. {dirsql-0.3.0 → dirsql-0.3.2}/packages/rust/src/differ.rs +0 -0
  110. {dirsql-0.3.0 → dirsql-0.3.2}/packages/rust/src/matcher.rs +0 -0
  111. {dirsql-0.3.0 → dirsql-0.3.2}/packages/rust/src/persist.rs +0 -0
  112. {dirsql-0.3.0 → dirsql-0.3.2}/packages/rust/src/scanner.rs +0 -0
  113. {dirsql-0.3.0 → dirsql-0.3.2}/packages/rust/src/watcher.rs +0 -0
  114. {dirsql-0.3.0 → dirsql-0.3.2}/packages/rust/tests/async_sdk.rs +0 -0
  115. {dirsql-0.3.0 → dirsql-0.3.2}/packages/rust/tests/docs_examples.rs +0 -0
  116. {dirsql-0.3.0 → dirsql-0.3.2}/packages/rust/tests/persist.rs +0 -0
  117. {dirsql-0.3.0 → dirsql-0.3.2}/packages/rust/tests/readonly_query.rs +0 -0
  118. {dirsql-0.3.0 → dirsql-0.3.2}/pyproject.toml +0 -0
  119. {dirsql-0.3.0 → dirsql-0.3.2}/python/dirsql/__init__.py +0 -0
  120. {dirsql-0.3.0 → dirsql-0.3.2}/python/dirsql/_async.py +0 -0
  121. {dirsql-0.3.0 → dirsql-0.3.2}/python/dirsql/_cli/__init__.py +0 -0
  122. {dirsql-0.3.0 → dirsql-0.3.2}/python/dirsql/_cli/binary_path.py +0 -0
  123. {dirsql-0.3.0 → dirsql-0.3.2}/python/dirsql/_cli/binary_path_test.py +0 -0
  124. {dirsql-0.3.0 → dirsql-0.3.2}/python/dirsql/_cli/is_windows.py +0 -0
  125. {dirsql-0.3.0 → dirsql-0.3.2}/python/dirsql/_cli/is_windows_test.py +0 -0
  126. {dirsql-0.3.0 → dirsql-0.3.2}/python/dirsql/_cli/main.py +0 -0
  127. {dirsql-0.3.0 → dirsql-0.3.2}/python/dirsql/_cli/main_test.py +0 -0
  128. {dirsql-0.3.0 → dirsql-0.3.2}/python/dirsql/test_async.py +0 -0
@@ -433,27 +433,6 @@ version = "0.2.4"
433
433
  source = "registry+https://github.com/rust-lang/crates.io-index"
434
434
  checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5"
435
435
 
436
- [[package]]
437
- name = "csv"
438
- version = "1.4.0"
439
- source = "registry+https://github.com/rust-lang/crates.io-index"
440
- checksum = "52cd9d68cf7efc6ddfaaee42e7288d3a99d613d4b50f76ce9827ae0c6e14f938"
441
- dependencies = [
442
- "csv-core",
443
- "itoa",
444
- "ryu",
445
- "serde_core",
446
- ]
447
-
448
- [[package]]
449
- name = "csv-core"
450
- version = "0.1.13"
451
- source = "registry+https://github.com/rust-lang/crates.io-index"
452
- checksum = "704a3c26996a80471189265814dbc2c257598b96b8a7feae2d31ace646bb9782"
453
- dependencies = [
454
- "memchr",
455
- ]
456
-
457
436
  [[package]]
458
437
  name = "ctor"
459
438
  version = "0.10.0"
@@ -485,7 +464,6 @@ dependencies = [
485
464
  "blake3",
486
465
  "clap",
487
466
  "criterion",
488
- "csv",
489
467
  "eventsource-client",
490
468
  "futures",
491
469
  "futures-channel",
@@ -500,7 +478,6 @@ dependencies = [
500
478
  "rusqlite",
501
479
  "serde",
502
480
  "serde_json",
503
- "serde_yaml",
504
481
  "tempfile",
505
482
  "thiserror",
506
483
  "tokio",
@@ -522,7 +499,7 @@ dependencies = [
522
499
 
523
500
  [[package]]
524
501
  name = "dirsql-py-ext"
525
- version = "0.3.0"
502
+ version = "0.3.2"
526
503
  dependencies = [
527
504
  "dirsql",
528
505
  "pyo3",
@@ -2230,19 +2207,6 @@ dependencies = [
2230
2207
  "serde",
2231
2208
  ]
2232
2209
 
2233
- [[package]]
2234
- name = "serde_yaml"
2235
- version = "0.9.34+deprecated"
2236
- source = "registry+https://github.com/rust-lang/crates.io-index"
2237
- checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47"
2238
- dependencies = [
2239
- "indexmap",
2240
- "itoa",
2241
- "ryu",
2242
- "serde",
2243
- "unsafe-libyaml",
2244
- ]
2245
-
2246
2210
  [[package]]
2247
2211
  name = "shlex"
2248
2212
  version = "1.3.0"
@@ -2633,12 +2597,6 @@ version = "0.2.6"
2633
2597
  source = "registry+https://github.com/rust-lang/crates.io-index"
2634
2598
  checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853"
2635
2599
 
2636
- [[package]]
2637
- name = "unsafe-libyaml"
2638
- version = "0.2.11"
2639
- source = "registry+https://github.com/rust-lang/crates.io-index"
2640
- checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861"
2641
-
2642
2600
  [[package]]
2643
2601
  name = "untrusted"
2644
2602
  version = "0.9.0"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dirsql
3
- Version: 0.3.0
3
+ Version: 0.3.2
4
4
  Requires-Dist: pytest>=8 ; extra == 'dev'
5
5
  Requires-Dist: pytest-describe>=2 ; extra == 'dev'
6
6
  Requires-Dist: pytest-asyncio>=0.23 ; extra == 'dev'
@@ -0,0 +1,193 @@
1
+ ---
2
+ canonical: https://thekevinscott.github.io/dirsql/guide/config
3
+ ---
4
+
5
+ # Configuration File
6
+
7
+ > Online: <https://thekevinscott.github.io/dirsql/guide/config>
8
+
9
+ `dirsql` can be configured with a `.dirsql.toml` file. Tables defined this
10
+ way produce **one row per matched file**. Each row's columns come from
11
+ filesystem facts:
12
+
13
+ - **Glob path captures** — named `{placeholder}` segments in the glob.
14
+ - **Stat virtuals** — reserved `_`-prefixed columns for path-derived and
15
+ stat-derived metadata.
16
+
17
+ Content interpretation (parsing JSON, CSV, frontmatter, etc.) is **not**
18
+ configured in `.dirsql.toml`. If you need columns derived from file
19
+ contents, register a programmatic [`Table`](./tables.md) whose `extract`
20
+ function does the parsing in your host language.
21
+
22
+ ## Basic Example
23
+
24
+ ```toml
25
+ [dirsql]
26
+ ignore = ["node_modules/**", ".git/**"]
27
+
28
+ [[table]]
29
+ ddl = "CREATE TABLE posts (_path TEXT, _basename TEXT, _size INTEGER, _mtime INTEGER)"
30
+ glob = "posts/*.md"
31
+ ```
32
+
33
+ Each `posts/*.md` file produces one row. The DDL declares which stat
34
+ virtuals are surfaced as SQL columns.
35
+
36
+ ## Loading a Config File
37
+
38
+ Pass the config file path to the `DirSQL` constructor:
39
+
40
+ ::: code-group
41
+
42
+ ```python [Python]
43
+ from dirsql import DirSQL
44
+
45
+ db = DirSQL(config="./my-project/.dirsql.toml")
46
+ await db.ready()
47
+ ```
48
+
49
+ ```rust [Rust]
50
+ use dirsql::DirSQL;
51
+
52
+ let db = DirSQL::builder()
53
+ .config("./my-project/.dirsql.toml")
54
+ .build()?;
55
+ ```
56
+
57
+ ```typescript [TypeScript]
58
+ import { DirSQL } from "dirsql";
59
+
60
+ // String argument is interpreted as a config file path.
61
+ const db = new DirSQL("./my-project/.dirsql.toml");
62
+ await db.ready;
63
+ ```
64
+
65
+ :::
66
+
67
+ By default, the root directory scanned is the config file's parent
68
+ directory. Override it by passing `root` explicitly (the explicit value
69
+ wins and a warning is emitted) or by declaring `[dirsql].root` in the
70
+ config file itself.
71
+
72
+ ## Root Directory
73
+
74
+ By default, the config file's parent directory is the scan root. To index
75
+ a different location, declare `[dirsql].root` (relative paths are resolved
76
+ relative to the config file's parent):
77
+
78
+ ```toml
79
+ [dirsql]
80
+ root = "../data"
81
+ ignore = ["node_modules/**"]
82
+ ```
83
+
84
+ ## Stat Virtuals
85
+
86
+ Every config-defined table can expose any of these reserved columns. Add
87
+ the ones you want to your DDL; the rest are silently dropped.
88
+
89
+ | Column | Type | Source |
90
+ |--------|---------|--------|
91
+ | `_path` | TEXT | The file's path relative to the scan root. |
92
+ | `_basename` | TEXT | The filename including extension. |
93
+ | `_dir` | TEXT | The parent directory path (relative to root). |
94
+ | `_ext` | TEXT | The file extension, lowercased, no leading dot. |
95
+ | `_size` | INTEGER | Size in bytes. |
96
+ | `_mtime` | INTEGER | Last-modified time, unix seconds. |
97
+ | `_ctime` | INTEGER | Created/changed time, unix seconds. |
98
+
99
+ Example query:
100
+
101
+ ```sql
102
+ SELECT _basename, _size
103
+ FROM posts
104
+ WHERE _mtime > strftime('%s', '2024-01-01')
105
+ ORDER BY _mtime DESC;
106
+ ```
107
+
108
+ ## Path Captures
109
+
110
+ Use `{name}` in glob patterns to extract path segments as columns. Add a
111
+ matching column name to the DDL and the capture is auto-populated:
112
+
113
+ ```toml
114
+ [[table]]
115
+ ddl = "CREATE TABLE comments (thread_id TEXT, _basename TEXT, _mtime INTEGER)"
116
+ glob = "_comments/{thread_id}/*.jsonl"
117
+ ```
118
+
119
+ A file at `_comments/abc123/2024-05-05.jsonl` produces a row with
120
+ `thread_id = "abc123"`, `_basename = "2024-05-05.jsonl"`, and `_mtime` set
121
+ to the file's modification time.
122
+
123
+ ## Ignore Patterns
124
+
125
+ The `ignore` list skips files and directories entirely (not even scanned):
126
+
127
+ ```toml
128
+ [dirsql]
129
+ ignore = ["node_modules/**", ".git/**", "*.pyc", "__pycache__/**"]
130
+ ```
131
+
132
+ The top-level `.dirsql/` directory is always excluded, whether you list it
133
+ or not — it is a reserved namespace for `dirsql`'s own metadata (see
134
+ [Persistence](./persistence.md)).
135
+
136
+ ## Persistence
137
+
138
+ Set `persist = true` to keep the SQLite database on disk between runs
139
+ instead of rebuilding from scratch on every startup:
140
+
141
+ ```toml
142
+ [dirsql]
143
+ persist = true
144
+ # persist_path = ".dirsql/cache.db" # optional; this is the default
145
+ ```
146
+
147
+ See [Persistence](./persistence.md) for the full reconcile algorithm,
148
+ storage layout, and limitations.
149
+
150
+ ## Strict Mode
151
+
152
+ By default, auto-injected virtuals that aren't in the DDL are silently
153
+ dropped, and undeclared user-extract keys are dropped. Enable strict mode
154
+ to error when an extract emits keys not declared in the DDL:
155
+
156
+ ```toml
157
+ [[table]]
158
+ ddl = "CREATE TABLE comments (thread_id TEXT)"
159
+ glob = "_comments/{thread_id}/*.jsonl"
160
+ strict = true
161
+ ```
162
+
163
+ Strict mode does **not** apply to auto-injected stat virtuals — those are
164
+ always filtered to the DDL's declared columns regardless. Strict mode
165
+ applies only to keys produced by an extract callback (relevant for
166
+ programmatic [tables](./tables.md)).
167
+
168
+ ## Full Example
169
+
170
+ ```toml
171
+ [dirsql]
172
+ ignore = ["node_modules/**", ".git/**", "dist/**"]
173
+
174
+ [[table]]
175
+ ddl = "CREATE TABLE comments (thread_id TEXT, _basename TEXT, _mtime INTEGER)"
176
+ glob = "_comments/{thread_id}/*.jsonl"
177
+
178
+ [[table]]
179
+ ddl = "CREATE TABLE documents (_path TEXT, _basename TEXT, _size INTEGER)"
180
+ glob = "**/index.md"
181
+
182
+ [[table]]
183
+ ddl = "CREATE TABLE logs (_path TEXT, _size INTEGER, _mtime INTEGER)"
184
+ glob = "logs/*.csv"
185
+ ```
186
+
187
+ ## When you need parsed content
188
+
189
+ `.dirsql.toml` does not parse file contents. For columns derived from the
190
+ *inside* of files (frontmatter keys, JSON values, CSV cells, etc.),
191
+ register a programmatic [`Table`](./tables.md) instead, and parse the
192
+ bytes in your host language. Glob captures and stat virtuals are still
193
+ auto-injected into rows produced by your extract.
@@ -4,7 +4,7 @@ name = "dirsql-py-ext"
4
4
  # pypi/maturin handler can rewrite it via `write-version` before
5
5
  # `maturin build`. `pyproject.toml` declares `dynamic = ["version"]`
6
6
  # and maturin reads this field. Mirrors `packages/rust/Cargo.toml`.
7
- version = "0.3.0"
7
+ version = "0.3.2"
8
8
  edition.workspace = true
9
9
  publish = false
10
10
  readme = "README.md"
@@ -0,0 +1,193 @@
1
+ ---
2
+ canonical: https://thekevinscott.github.io/dirsql/guide/config
3
+ ---
4
+
5
+ # Configuration File
6
+
7
+ > Online: <https://thekevinscott.github.io/dirsql/guide/config>
8
+
9
+ `dirsql` can be configured with a `.dirsql.toml` file. Tables defined this
10
+ way produce **one row per matched file**. Each row's columns come from
11
+ filesystem facts:
12
+
13
+ - **Glob path captures** — named `{placeholder}` segments in the glob.
14
+ - **Stat virtuals** — reserved `_`-prefixed columns for path-derived and
15
+ stat-derived metadata.
16
+
17
+ Content interpretation (parsing JSON, CSV, frontmatter, etc.) is **not**
18
+ configured in `.dirsql.toml`. If you need columns derived from file
19
+ contents, register a programmatic [`Table`](./tables.md) whose `extract`
20
+ function does the parsing in your host language.
21
+
22
+ ## Basic Example
23
+
24
+ ```toml
25
+ [dirsql]
26
+ ignore = ["node_modules/**", ".git/**"]
27
+
28
+ [[table]]
29
+ ddl = "CREATE TABLE posts (_path TEXT, _basename TEXT, _size INTEGER, _mtime INTEGER)"
30
+ glob = "posts/*.md"
31
+ ```
32
+
33
+ Each `posts/*.md` file produces one row. The DDL declares which stat
34
+ virtuals are surfaced as SQL columns.
35
+
36
+ ## Loading a Config File
37
+
38
+ Pass the config file path to the `DirSQL` constructor:
39
+
40
+ ::: code-group
41
+
42
+ ```python [Python]
43
+ from dirsql import DirSQL
44
+
45
+ db = DirSQL(config="./my-project/.dirsql.toml")
46
+ await db.ready()
47
+ ```
48
+
49
+ ```rust [Rust]
50
+ use dirsql::DirSQL;
51
+
52
+ let db = DirSQL::builder()
53
+ .config("./my-project/.dirsql.toml")
54
+ .build()?;
55
+ ```
56
+
57
+ ```typescript [TypeScript]
58
+ import { DirSQL } from "dirsql";
59
+
60
+ // String argument is interpreted as a config file path.
61
+ const db = new DirSQL("./my-project/.dirsql.toml");
62
+ await db.ready;
63
+ ```
64
+
65
+ :::
66
+
67
+ By default, the root directory scanned is the config file's parent
68
+ directory. Override it by passing `root` explicitly (the explicit value
69
+ wins and a warning is emitted) or by declaring `[dirsql].root` in the
70
+ config file itself.
71
+
72
+ ## Root Directory
73
+
74
+ By default, the config file's parent directory is the scan root. To index
75
+ a different location, declare `[dirsql].root` (relative paths are resolved
76
+ relative to the config file's parent):
77
+
78
+ ```toml
79
+ [dirsql]
80
+ root = "../data"
81
+ ignore = ["node_modules/**"]
82
+ ```
83
+
84
+ ## Stat Virtuals
85
+
86
+ Every config-defined table can expose any of these reserved columns. Add
87
+ the ones you want to your DDL; the rest are silently dropped.
88
+
89
+ | Column | Type | Source |
90
+ |--------|---------|--------|
91
+ | `_path` | TEXT | The file's path relative to the scan root. |
92
+ | `_basename` | TEXT | The filename including extension. |
93
+ | `_dir` | TEXT | The parent directory path (relative to root). |
94
+ | `_ext` | TEXT | The file extension, lowercased, no leading dot. |
95
+ | `_size` | INTEGER | Size in bytes. |
96
+ | `_mtime` | INTEGER | Last-modified time, unix seconds. |
97
+ | `_ctime` | INTEGER | Created/changed time, unix seconds. |
98
+
99
+ Example query:
100
+
101
+ ```sql
102
+ SELECT _basename, _size
103
+ FROM posts
104
+ WHERE _mtime > strftime('%s', '2024-01-01')
105
+ ORDER BY _mtime DESC;
106
+ ```
107
+
108
+ ## Path Captures
109
+
110
+ Use `{name}` in glob patterns to extract path segments as columns. Add a
111
+ matching column name to the DDL and the capture is auto-populated:
112
+
113
+ ```toml
114
+ [[table]]
115
+ ddl = "CREATE TABLE comments (thread_id TEXT, _basename TEXT, _mtime INTEGER)"
116
+ glob = "_comments/{thread_id}/*.jsonl"
117
+ ```
118
+
119
+ A file at `_comments/abc123/2024-05-05.jsonl` produces a row with
120
+ `thread_id = "abc123"`, `_basename = "2024-05-05.jsonl"`, and `_mtime` set
121
+ to the file's modification time.
122
+
123
+ ## Ignore Patterns
124
+
125
+ The `ignore` list skips files and directories entirely (not even scanned):
126
+
127
+ ```toml
128
+ [dirsql]
129
+ ignore = ["node_modules/**", ".git/**", "*.pyc", "__pycache__/**"]
130
+ ```
131
+
132
+ The top-level `.dirsql/` directory is always excluded, whether you list it
133
+ or not — it is a reserved namespace for `dirsql`'s own metadata (see
134
+ [Persistence](./persistence.md)).
135
+
136
+ ## Persistence
137
+
138
+ Set `persist = true` to keep the SQLite database on disk between runs
139
+ instead of rebuilding from scratch on every startup:
140
+
141
+ ```toml
142
+ [dirsql]
143
+ persist = true
144
+ # persist_path = ".dirsql/cache.db" # optional; this is the default
145
+ ```
146
+
147
+ See [Persistence](./persistence.md) for the full reconcile algorithm,
148
+ storage layout, and limitations.
149
+
150
+ ## Strict Mode
151
+
152
+ By default, auto-injected virtuals that aren't in the DDL are silently
153
+ dropped, and undeclared user-extract keys are dropped. Enable strict mode
154
+ to error when an extract emits keys not declared in the DDL:
155
+
156
+ ```toml
157
+ [[table]]
158
+ ddl = "CREATE TABLE comments (thread_id TEXT)"
159
+ glob = "_comments/{thread_id}/*.jsonl"
160
+ strict = true
161
+ ```
162
+
163
+ Strict mode does **not** apply to auto-injected stat virtuals — those are
164
+ always filtered to the DDL's declared columns regardless. Strict mode
165
+ applies only to keys produced by an extract callback (relevant for
166
+ programmatic [tables](./tables.md)).
167
+
168
+ ## Full Example
169
+
170
+ ```toml
171
+ [dirsql]
172
+ ignore = ["node_modules/**", ".git/**", "dist/**"]
173
+
174
+ [[table]]
175
+ ddl = "CREATE TABLE comments (thread_id TEXT, _basename TEXT, _mtime INTEGER)"
176
+ glob = "_comments/{thread_id}/*.jsonl"
177
+
178
+ [[table]]
179
+ ddl = "CREATE TABLE documents (_path TEXT, _basename TEXT, _size INTEGER)"
180
+ glob = "**/index.md"
181
+
182
+ [[table]]
183
+ ddl = "CREATE TABLE logs (_path TEXT, _size INTEGER, _mtime INTEGER)"
184
+ glob = "logs/*.csv"
185
+ ```
186
+
187
+ ## When you need parsed content
188
+
189
+ `.dirsql.toml` does not parse file contents. For columns derived from the
190
+ *inside* of files (frontmatter keys, JSON values, CSV cells, etc.),
191
+ register a programmatic [`Table`](./tables.md) instead, and parse the
192
+ bytes in your host language. Glob captures and stat virtuals are still
193
+ auto-injected into rows produced by your extract.
@@ -1,8 +1,6 @@
1
1
  """Gap-filling tests for features documented in docs/ but previously untested.
2
2
 
3
3
  Each test cites the canonical doc location (docs page + section) that it covers.
4
- These were identified by the TESTS_AUDIT.md pass for bead dirsql-9ng
5
- (Tests follow docs: 1:1 mapping between documented features and tests).
6
4
  """
7
5
 
8
6
  import json
@@ -63,167 +61,48 @@ def describe_tables_guide_bytes_to_blob():
63
61
 
64
62
 
65
63
  # ---------------------------------------------------------------------------
66
- # docs/guide/config.md -- "Supported Formats" (.tsv/.ndjson/.toml/.yaml/.yml/.md)
67
- # and "Strict Mode" (strict = true)
64
+ # docs/guide/tables.md -- "Strict Mode" (programmatic Table strict=True)
68
65
  # ---------------------------------------------------------------------------
69
66
 
70
67
 
71
- def describe_from_config_formats_gap():
68
+ def describe_strict_mode_gap():
72
69
  @pytest.mark.asyncio
73
- async def it_loads_tsv_files_via_config(config_dir):
74
- """Docs (guide/config.md "Supported Formats"): .tsv format is tab-separated."""
75
- _write(
76
- os.path.join(config_dir, "data.tsv"),
77
- "name\tcount\napples\t10\noranges\t20\n",
78
- )
79
- _write(
80
- os.path.join(config_dir, ".dirsql.toml"),
81
- """\
82
- [[table]]
83
- ddl = "CREATE TABLE produce (name TEXT, count TEXT)"
84
- glob = "*.tsv"
85
- """,
86
- )
87
- db = DirSQL(config=os.path.join(config_dir, ".dirsql.toml"))
88
- await db.ready()
89
- results = await db.query("SELECT * FROM produce ORDER BY name")
90
- assert len(results) == 2
91
- assert results[0]["name"] == "apples"
92
- assert results[0]["count"] == "10"
93
- assert results[1]["name"] == "oranges"
94
-
95
- @pytest.mark.asyncio
96
- async def it_loads_ndjson_files_via_config(config_dir):
97
- """Docs (guide/config.md "Supported Formats"): .ndjson aliases JSONL (one row per line)."""
98
- _write(
99
- os.path.join(config_dir, "events.ndjson"),
100
- json.dumps({"type": "click", "count": 5})
101
- + "\n"
102
- + json.dumps({"type": "view", "count": 100})
103
- + "\n",
104
- )
105
- _write(
106
- os.path.join(config_dir, ".dirsql.toml"),
107
- """\
108
- [[table]]
109
- ddl = "CREATE TABLE events (type TEXT, count INTEGER)"
110
- glob = "*.ndjson"
111
- """,
112
- )
113
- db = DirSQL(config=os.path.join(config_dir, ".dirsql.toml"))
114
- await db.ready()
115
- results = await db.query("SELECT * FROM events ORDER BY type")
116
- assert len(results) == 2
117
- assert results[0]["type"] == "click"
118
- assert results[0]["count"] == 5
119
-
120
- @pytest.mark.asyncio
121
- async def it_loads_toml_files_via_config(config_dir):
122
- """Docs (guide/config.md "Supported Formats"): .toml format is one row per file."""
123
- _write(
124
- os.path.join(config_dir, "config", "app.toml"),
125
- 'name = "myapp"\nversion = "1.2"\n',
126
- )
127
- _write(
128
- os.path.join(config_dir, ".dirsql.toml"),
129
- """\
130
- [[table]]
131
- ddl = "CREATE TABLE app (name TEXT, version TEXT)"
132
- glob = "config/*.toml"
133
- """,
134
- )
135
- db = DirSQL(config=os.path.join(config_dir, ".dirsql.toml"))
136
- await db.ready()
137
- results = await db.query("SELECT * FROM app")
138
- assert len(results) == 1
139
- assert results[0]["name"] == "myapp"
140
- assert results[0]["version"] == "1.2"
141
-
142
- @pytest.mark.asyncio
143
- @pytest.mark.parametrize("ext", ["yaml", "yml"])
144
- async def it_loads_yaml_files_via_config(config_dir, ext):
145
- """Docs (guide/config.md "Supported Formats"): .yaml/.yml mapping = 1 row."""
146
- _write(
147
- os.path.join(config_dir, f"data.{ext}"),
148
- "name: widget\nprice: 9.99\n",
149
- )
150
- _write(
151
- os.path.join(config_dir, ".dirsql.toml"),
152
- f"""\
153
- [[table]]
154
- ddl = "CREATE TABLE items (name TEXT, price REAL)"
155
- glob = "*.{ext}"
156
- """,
157
- )
158
- db = DirSQL(config=os.path.join(config_dir, ".dirsql.toml"))
159
- await db.ready()
160
- results = await db.query("SELECT * FROM items")
161
- assert len(results) == 1
162
- assert results[0]["name"] == "widget"
163
- assert results[0]["price"] == pytest.approx(9.99)
164
-
165
- @pytest.mark.asyncio
166
- async def it_loads_markdown_with_frontmatter_via_config(config_dir):
167
- """Docs (guide/config.md "Supported Formats"): .md uses YAML frontmatter + body column."""
168
- _write(
169
- os.path.join(config_dir, "posts", "hello.md"),
170
- "---\ntitle: Hello\nauthor: Alice\n---\nBody text here.\n",
171
- )
172
- _write(
173
- os.path.join(config_dir, ".dirsql.toml"),
174
- """\
175
- [[table]]
176
- ddl = "CREATE TABLE posts (title TEXT, author TEXT, body TEXT)"
177
- glob = "posts/*.md"
178
- """,
179
- )
180
- db = DirSQL(config=os.path.join(config_dir, ".dirsql.toml"))
181
- await db.ready()
182
- results = await db.query("SELECT * FROM posts")
183
- assert len(results) == 1
184
- assert results[0]["title"] == "Hello"
185
- assert results[0]["author"] == "Alice"
186
- assert "Body text here." in (results[0]["body"] or "")
187
-
70
+ async def it_raises_on_extra_keys_when_strict_true(tmp_dir):
71
+ """Docs (guide/tables.md "Strict Mode"): strict=True errors on extra keys."""
72
+ with open(os.path.join(tmp_dir, "a.json"), "w") as f:
73
+ f.write("{}")
188
74
 
189
- def describe_from_config_strict_mode_gap():
190
- @pytest.mark.asyncio
191
- async def it_raises_on_extra_keys_when_strict_true(config_dir):
192
- """Docs (guide/config.md "Strict Mode"): `strict = true` errors on extra keys."""
193
- _write(
194
- os.path.join(config_dir, "items", "a.json"),
195
- json.dumps({"name": "apple", "color": "red"}),
196
- )
197
- _write(
198
- os.path.join(config_dir, ".dirsql.toml"),
199
- """\
200
- [[table]]
201
- ddl = "CREATE TABLE items (name TEXT)"
202
- glob = "items/*.json"
203
- strict = true
204
- """,
75
+ db = DirSQL(
76
+ tmp_dir,
77
+ tables=[
78
+ Table(
79
+ ddl="CREATE TABLE items (name TEXT)",
80
+ glob="*.json",
81
+ extract=lambda path, content: [{"name": "apple", "color": "red"}],
82
+ strict=True,
83
+ ),
84
+ ],
205
85
  )
206
- db = DirSQL(config=os.path.join(config_dir, ".dirsql.toml"))
207
86
  with pytest.raises(Exception):
208
87
  await db.ready()
209
88
 
210
89
  @pytest.mark.asyncio
211
- async def it_allows_exact_match_when_strict_true(config_dir):
212
- """Docs (guide/config.md "Strict Mode"): strict mode passes on exact key match."""
213
- _write(
214
- os.path.join(config_dir, "items", "a.json"),
215
- json.dumps({"name": "apple", "color": "red"}),
216
- )
217
- _write(
218
- os.path.join(config_dir, ".dirsql.toml"),
219
- """\
220
- [[table]]
221
- ddl = "CREATE TABLE items (name TEXT, color TEXT)"
222
- glob = "items/*.json"
223
- strict = true
224
- """,
90
+ async def it_allows_exact_match_when_strict_true(tmp_dir):
91
+ """Docs (guide/tables.md "Strict Mode"): strict mode passes on exact key match."""
92
+ with open(os.path.join(tmp_dir, "a.json"), "w") as f:
93
+ f.write("{}")
94
+
95
+ db = DirSQL(
96
+ tmp_dir,
97
+ tables=[
98
+ Table(
99
+ ddl="CREATE TABLE items (name TEXT, color TEXT)",
100
+ glob="*.json",
101
+ extract=lambda path, content: [{"name": "apple", "color": "red"}],
102
+ strict=True,
103
+ ),
104
+ ],
225
105
  )
226
- db = DirSQL(config=os.path.join(config_dir, ".dirsql.toml"))
227
106
  await db.ready()
228
107
  results = await db.query("SELECT * FROM items")
229
108
  assert len(results) == 1