dirsql 0.2.9 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +12 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +1 -1
- package/dist/index.js.map +1 -1
- package/docs/guide/config.md +86 -98
- package/docs/guide/persistence.md +177 -0
- package/package.json +11 -11
package/dist/index.d.ts
CHANGED
|
@@ -31,6 +31,17 @@ export interface DirSQLOptions {
|
|
|
31
31
|
* relative to the config file's parent directory.
|
|
32
32
|
*/
|
|
33
33
|
config?: string;
|
|
34
|
+
/**
|
|
35
|
+
* Enable persistent on-disk SQLite cache. When `true`, the database is
|
|
36
|
+
* written to `<root>/.dirsql/cache.db` (override via `persistPath`) so
|
|
37
|
+
* subsequent startups only re-parse files that have actually changed.
|
|
38
|
+
*/
|
|
39
|
+
persist?: boolean;
|
|
40
|
+
/**
|
|
41
|
+
* Override the location of the persistent cache file. Ignored when
|
|
42
|
+
* `persist` is not `true`.
|
|
43
|
+
*/
|
|
44
|
+
persistPath?: string;
|
|
34
45
|
}
|
|
35
46
|
/** A row-level event emitted by the file watcher. */
|
|
36
47
|
export interface RowEvent {
|
|
@@ -52,7 +63,7 @@ interface NativeDirSQL {
|
|
|
52
63
|
pollEvents(timeoutMs: number): Promise<RowEvent[]>;
|
|
53
64
|
}
|
|
54
65
|
interface NativeDirSQLConstructor {
|
|
55
|
-
openAsync(root: string | null, tables: TableDef[] | null, ignore: string[] | null, config: string | null): Promise<NativeDirSQL>;
|
|
66
|
+
openAsync(root: string | null, tables: TableDef[] | null, ignore: string[] | null, config: string | null, persist: boolean | null, persistPath: string | null): Promise<NativeDirSQL>;
|
|
56
67
|
}
|
|
57
68
|
interface CoreModule {
|
|
58
69
|
DirSQL: NativeDirSQLConstructor;
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../ts/index.ts"],"names":[],"mappings":"AAcA,iEAAiE;AACjE,MAAM,WAAW,QAAQ;IACvB,6EAA6E;IAC7E,GAAG,EAAE,MAAM,CAAC;IACZ,+EAA+E;IAC/E,IAAI,EAAE,MAAM,CAAC;IACb,4EAA4E;IAC5E,OAAO,EAAE,CAAC,QAAQ,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,KAAK,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EAAE,CAAC;IAC1E,+DAA+D;IAC/D,MAAM,CAAC,EAAE,OAAO,CAAC;CAClB;AAED;;;;;;GAMG;AACH,MAAM,WAAW,aAAa;IAC5B,8BAA8B;IAC9B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,8EAA8E;IAC9E,MAAM,CAAC,EAAE,QAAQ,EAAE,CAAC;IACpB,oDAAoD;IACpD,MAAM,CAAC,EAAE,MAAM,EAAE,CAAC;IAClB;;;;;;OAMG;IACH,MAAM,CAAC,EAAE,MAAM,CAAC;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../ts/index.ts"],"names":[],"mappings":"AAcA,iEAAiE;AACjE,MAAM,WAAW,QAAQ;IACvB,6EAA6E;IAC7E,GAAG,EAAE,MAAM,CAAC;IACZ,+EAA+E;IAC/E,IAAI,EAAE,MAAM,CAAC;IACb,4EAA4E;IAC5E,OAAO,EAAE,CAAC,QAAQ,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,KAAK,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EAAE,CAAC;IAC1E,+DAA+D;IAC/D,MAAM,CAAC,EAAE,OAAO,CAAC;CAClB;AAED;;;;;;GAMG;AACH,MAAM,WAAW,aAAa;IAC5B,8BAA8B;IAC9B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,8EAA8E;IAC9E,MAAM,CAAC,EAAE,QAAQ,EAAE,CAAC;IACpB,oDAAoD;IACpD,MAAM,CAAC,EAAE,MAAM,EAAE,CAAC;IAClB;;;;;;OAMG;IACH,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB;;;;OAIG;IACH,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB;;;OAGG;IACH,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED,qDAAqD;AACrD,MAAM,WAAW,QAAQ;IACvB;;;;OAIG;IACH,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC;IACrB,MAAM,EAAE,QAAQ,GAAG,QAAQ,GAAG,QAAQ,GAAG,OAAO,CAAC;IACjD,GAAG,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,CAAC;IACrC,MAAM,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,CAAC;IACxC,KAAK,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACtB,QAAQ,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;CAC1B;AAGD,UAAU,YAAY;IACpB,KAAK,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EAAE,CAAC,CAAC;IACvD,YAAY,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;IAC9B,UAAU,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC,CAAC;CACpD;AAED,UAAU,uBAAuB;IAC/B,SAAS,CACP,IAAI,EAAE,MAAM,GAAG,IAAI,EACnB,MAAM,EAAE,QAAQ,EAAE,GAAG,IAAI,EACzB,MAAM,EAAE,MAAM,EAAE,GAAG,IAAI,EACvB,MAAM,EAAE,MAAM,GAAG,IAAI,EACrB,OAAO,EAAE,OAAO,GAAG,IAAI,EACvB,WAAW,EAAE,MAAM,GAAG,IAAI,GACzB,OAAO,CAAC,YAAY,CAAC,CAAC;CAC1B;AAID,UAAU,UAAU;IAClB,MAAM,EAAE,uBAAuB,CAAC;CACjC;AAaD;;;;;;;GAOG;AACH,wBAAgB,mBAAmB,CAAC,IAAI,EAAE,UAAU,GAAG,IAAI,GAAG,IAAI,CAEjE;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AACH,qBAAa,MAAM;IACjB;;;;;;OAMG;IACH,QAAQ,CAAC,KAAK,EAAE,OAAO,CAAC,IAAI,CAAC,CAAC;IAG9B,OAAO,CAAC,MAAM,CAAgB;IAE9B,wDAAwD;gBAC5C,UAAU,EAAE,MAAM;IAC9B,yCAAyC;gBAC7B,OAAO,EAAE,aAAa;IAkBlC;;;;;;OAMG;IACG,KAAK,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EAAE,CAAC;IAK5D;;;;;;;OAOG;IACG,YAAY,IAAI,OAAO,CAAC,IAAI,CAAC;IAKnC;;;;;;;OAOG;IACG,UAAU,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC;IAKxD;;;;;;;;;;OAUG;IACI,KAAK,IAAI,cAAc,CAAC,QAAQ,EAAE,IAAI,EAAE,OAAO,CAAC;CAaxD"}
|
package/dist/index.js
CHANGED
|
@@ -73,7 +73,7 @@ export class DirSQL {
|
|
|
73
73
|
constructor(arg) {
|
|
74
74
|
const options = typeof arg === "string" ? { config: arg } : arg;
|
|
75
75
|
const Ctor = getCore().DirSQL;
|
|
76
|
-
const openPromise = Ctor.openAsync(options.root ?? null, options.tables ?? null, options.ignore ?? null, options.config ?? null);
|
|
76
|
+
const openPromise = Ctor.openAsync(options.root ?? null, options.tables ?? null, options.ignore ?? null, options.config ?? null, options.persist ?? null, options.persistPath ?? null);
|
|
77
77
|
this.ready = openPromise.then((inner) => {
|
|
78
78
|
this._inner = inner;
|
|
79
79
|
});
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../ts/index.ts"],"names":[],"mappings":"AAAA,yBAAyB;AACzB,EAAE;AACF,wEAAwE;AACxE,kEAAkE;AAClE,qEAAqE;AACrE,qCAAqC;AACrC,EAAE;AACF,qEAAqE;AACrE,uEAAuE;AACvE,uEAAuE;AACvE,mEAAmE;AAEnE,OAAO,EAAE,cAAc,IAAI,qBAAqB,EAAE,MAAM,qBAAqB,CAAC;
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../ts/index.ts"],"names":[],"mappings":"AAAA,yBAAyB;AACzB,EAAE;AACF,wEAAwE;AACxE,kEAAkE;AAClE,qEAAqE;AACrE,qCAAqC;AACrC,EAAE;AACF,qEAAqE;AACrE,uEAAuE;AACvE,uEAAuE;AACvE,mEAAmE;AAEnE,OAAO,EAAE,cAAc,IAAI,qBAAqB,EAAE,MAAM,qBAAqB,CAAC;AAwF9E,yEAAyE;AACzE,sEAAsE;AACtE,IAAI,IAAI,GAAsB,IAAI,CAAC;AAEnC,SAAS,OAAO;IACd,IAAI,IAAI,KAAK,IAAI,EAAE,CAAC;QAClB,IAAI,GAAG,qBAAqB,EAAgB,CAAC;IAC/C,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,mBAAmB,CAAC,IAAuB;IACzD,IAAI,GAAG,IAAI,CAAC;AACd,CAAC;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AACH,MAAM,OAAO,MAAM;IACjB;;;;;;OAMG;IACM,KAAK,CAAgB;IAE9B,gEAAgE;IACxD,MAAM,CAAgB;IAM9B,YAAY,GAA2B;QACrC,MAAM,OAAO,GACX,OAAO,GAAG,KAAK,QAAQ,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,GAAG,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC;QAClD,MAAM,IAAI,GAAG,OAAO,EAAE,CAAC,MAAM,CAAC;QAC9B,MAAM,WAAW,GAAG,IAAI,CAAC,SAAS,CAChC,OAAO,CAAC,IAAI,IAAI,IAAI,EACpB,OAAO,CAAC,MAAM,IAAI,IAAI,EACtB,OAAO,CAAC,MAAM,IAAI,IAAI,EACtB,OAAO,CAAC,MAAM,IAAI,IAAI,EACtB,OAAO,CAAC,OAAO,IAAI,IAAI,EACvB,OAAO,CAAC,WAAW,IAAI,IAAI,CAC5B,CAAC;QACF,IAAI,CAAC,KAAK,GAAG,WAAW,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,EAAE;YACtC,IAAI,CAAC,MAAM,GAAG,KAAK,CAAC;QACtB,CAAC,CAAC,CAAC;IACL,CAAC;IAED;;;;;;OAMG;IACH,KAAK,CAAC,KAAK,CAAC,GAAW;QACrB,MAAM,IAAI,CAAC,KAAK,CAAC;QACjB,OAAO,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IAChC,CAAC;IAED;;;;;;;OAOG;IACH,KAAK,CAAC,YAAY;QAChB,MAAM,IAAI,CAAC,KAAK,CAAC;QACjB,OAAO,IAAI,CAAC,MAAM,CAAC,YAAY,EAAE,CAAC;IACpC,CAAC;IAED;;;;;;;OAOG;IACH,KAAK,CAAC,UAAU,CAAC,SAAiB;QAChC,MAAM,IAAI,CAAC,KAAK,CAAC;QACjB,OAAO,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC;IAC3C,CAAC;IAED;;;;;;;;;;OAUG;IACH,KAAK,CAAC,CAAC,KAAK;QACV,MAAM,IAAI,CAAC,KAAK,CAAC;QACjB,MAAM,IAAI,CAAC,MAAM,CAAC,YAAY,EAAE,CAAC;QACjC,OAAO,IAAI,EAAE,CAAC;YACZ,qEAAqE;YACrE,gEAAgE;YAChE,sEAAsE;YACtE,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC;YACjD,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;gBAC3B,MAAM,KAAK,CAAC;YACd,CAAC;QACH,CAAC;IACH,CAAC;CACF"}
|
package/docs/guide/config.md
CHANGED
|
@@ -6,7 +6,18 @@ canonical: https://thekevinscott.github.io/dirsql/guide/config
|
|
|
6
6
|
|
|
7
7
|
> Online: <https://thekevinscott.github.io/dirsql/guide/config>
|
|
8
8
|
|
|
9
|
-
`dirsql` can be configured with a `.dirsql.toml` file
|
|
9
|
+
`dirsql` can be configured with a `.dirsql.toml` file. Tables defined this
|
|
10
|
+
way produce **one row per matched file**. Each row's columns come from
|
|
11
|
+
filesystem facts:
|
|
12
|
+
|
|
13
|
+
- **Glob path captures** — named `{placeholder}` segments in the glob.
|
|
14
|
+
- **Stat virtuals** — reserved `_`-prefixed columns for path-derived and
|
|
15
|
+
stat-derived metadata.
|
|
16
|
+
|
|
17
|
+
Content interpretation (parsing JSON, CSV, frontmatter, etc.) is **not**
|
|
18
|
+
configured in `.dirsql.toml`. If you need columns derived from file
|
|
19
|
+
contents, register a programmatic [`Table`](./tables.md) whose `extract`
|
|
20
|
+
function does the parsing in your host language.
|
|
10
21
|
|
|
11
22
|
## Basic Example
|
|
12
23
|
|
|
@@ -15,11 +26,12 @@ canonical: https://thekevinscott.github.io/dirsql/guide/config
|
|
|
15
26
|
ignore = ["node_modules/**", ".git/**"]
|
|
16
27
|
|
|
17
28
|
[[table]]
|
|
18
|
-
ddl
|
|
19
|
-
glob = "posts/*.
|
|
29
|
+
ddl = "CREATE TABLE posts (_path TEXT, _basename TEXT, _size INTEGER, _mtime INTEGER)"
|
|
30
|
+
glob = "posts/*.md"
|
|
20
31
|
```
|
|
21
32
|
|
|
22
|
-
|
|
33
|
+
Each `posts/*.md` file produces one row. The DDL declares which stat
|
|
34
|
+
virtuals are surfaced as SQL columns.
|
|
23
35
|
|
|
24
36
|
## Loading a Config File
|
|
25
37
|
|
|
@@ -52,11 +64,16 @@ await db.ready;
|
|
|
52
64
|
|
|
53
65
|
:::
|
|
54
66
|
|
|
55
|
-
By default, the root directory scanned is the config file's parent
|
|
67
|
+
By default, the root directory scanned is the config file's parent
|
|
68
|
+
directory. Override it by passing `root` explicitly (the explicit value
|
|
69
|
+
wins and a warning is emitted) or by declaring `[dirsql].root` in the
|
|
70
|
+
config file itself.
|
|
56
71
|
|
|
57
72
|
## Root Directory
|
|
58
73
|
|
|
59
|
-
By default, the config file's parent directory is the scan root. To index
|
|
74
|
+
By default, the config file's parent directory is the scan root. To index
|
|
75
|
+
a different location, declare `[dirsql].root` (relative paths are resolved
|
|
76
|
+
relative to the config file's parent):
|
|
60
77
|
|
|
61
78
|
```toml
|
|
62
79
|
[dirsql]
|
|
@@ -64,122 +81,90 @@ root = "../data"
|
|
|
64
81
|
ignore = ["node_modules/**"]
|
|
65
82
|
```
|
|
66
83
|
|
|
67
|
-
##
|
|
84
|
+
## Stat Virtuals
|
|
68
85
|
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
| `.json` | JSON | Object = 1 row, Array = many rows |
|
|
72
|
-
| `.jsonl`, `.ndjson` | JSONL | One row per line |
|
|
73
|
-
| `.csv` | CSV | One row per data line (header = columns) |
|
|
74
|
-
| `.tsv` | TSV | One row per data line (tab-separated) |
|
|
75
|
-
| `.toml` | TOML | One row per file |
|
|
76
|
-
| `.yaml`, `.yml` | YAML | Mapping = 1 row, Sequence = many rows |
|
|
77
|
-
| `.md` | Frontmatter | YAML frontmatter + body column |
|
|
86
|
+
Every config-defined table can expose any of these reserved columns. Add
|
|
87
|
+
the ones you want to your DDL; the rest are silently dropped.
|
|
78
88
|
|
|
79
|
-
|
|
89
|
+
| Column | Type | Source |
|
|
90
|
+
|--------|---------|--------|
|
|
91
|
+
| `_path` | TEXT | The file's path relative to the scan root. |
|
|
92
|
+
| `_basename` | TEXT | The filename including extension. |
|
|
93
|
+
| `_dir` | TEXT | The parent directory path (relative to root). |
|
|
94
|
+
| `_ext` | TEXT | The file extension, lowercased, no leading dot. |
|
|
95
|
+
| `_size` | INTEGER | Size in bytes. |
|
|
96
|
+
| `_mtime` | INTEGER | Last-modified time, unix seconds. |
|
|
97
|
+
| `_ctime` | INTEGER | Created/changed time, unix seconds. |
|
|
80
98
|
|
|
81
|
-
|
|
99
|
+
Example query:
|
|
82
100
|
|
|
83
|
-
```
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
101
|
+
```sql
|
|
102
|
+
SELECT _basename, _size
|
|
103
|
+
FROM posts
|
|
104
|
+
WHERE _mtime > strftime('%s', '2024-01-01')
|
|
105
|
+
ORDER BY _mtime DESC;
|
|
87
106
|
```
|
|
88
107
|
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
## Nested Data
|
|
92
|
-
|
|
93
|
-
Use `each` to navigate into nested JSON structures:
|
|
94
|
-
|
|
95
|
-
```toml
|
|
96
|
-
[[table]]
|
|
97
|
-
ddl = "CREATE TABLE items (name TEXT, price REAL)"
|
|
98
|
-
glob = "catalog/*.json"
|
|
99
|
-
each = "data.items"
|
|
100
|
-
```
|
|
101
|
-
|
|
102
|
-
This extracts rows from `{"data": {"items": [...]}}`.
|
|
103
|
-
|
|
104
|
-
## Column Mapping
|
|
108
|
+
## Path Captures
|
|
105
109
|
|
|
106
|
-
Use `
|
|
110
|
+
Use `{name}` in glob patterns to extract path segments as columns. Add a
|
|
111
|
+
matching column name to the DDL and the capture is auto-populated:
|
|
107
112
|
|
|
108
113
|
```toml
|
|
109
114
|
[[table]]
|
|
110
|
-
ddl
|
|
111
|
-
glob = "
|
|
112
|
-
|
|
113
|
-
[table.columns]
|
|
114
|
-
display_name = "metadata.author.name"
|
|
115
|
-
body = "body"
|
|
115
|
+
ddl = "CREATE TABLE comments (thread_id TEXT, _basename TEXT, _mtime INTEGER)"
|
|
116
|
+
glob = "_comments/{thread_id}/*.jsonl"
|
|
116
117
|
```
|
|
117
118
|
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
populated**. Any column in the DDL that is not mentioned in the mapping
|
|
122
|
-
is set to `NULL` for every row — the original key from the file is not
|
|
123
|
-
auto-copied.
|
|
119
|
+
A file at `_comments/abc123/2024-05-05.jsonl` produces a row with
|
|
120
|
+
`thread_id = "abc123"`, `_basename = "2024-05-05.jsonl"`, and `_mtime` set
|
|
121
|
+
to the file's modification time.
|
|
124
122
|
|
|
125
|
-
|
|
126
|
-
every column comes from", not "rename these specific keys".
|
|
123
|
+
## Ignore Patterns
|
|
127
124
|
|
|
128
|
-
|
|
125
|
+
The `ignore` list skips files and directories entirely (not even scanned):
|
|
129
126
|
|
|
130
127
|
```toml
|
|
131
|
-
[
|
|
132
|
-
|
|
133
|
-
glob = "*.json"
|
|
134
|
-
|
|
135
|
-
[table.columns]
|
|
136
|
-
display_name = "author" # intended: "just rename author -> display_name"
|
|
137
|
-
```
|
|
138
|
-
|
|
139
|
-
against a file `one.json`:
|
|
140
|
-
|
|
141
|
-
```json
|
|
142
|
-
{"id": "a1", "body": "hello", "author": "Alice"}
|
|
143
|
-
```
|
|
144
|
-
|
|
145
|
-
produces:
|
|
146
|
-
|
|
147
|
-
```json
|
|
148
|
-
[{"id": null, "body": null, "display_name": "Alice"}]
|
|
128
|
+
[dirsql]
|
|
129
|
+
ignore = ["node_modules/**", ".git/**", "*.pyc", "__pycache__/**"]
|
|
149
130
|
```
|
|
150
131
|
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
```toml
|
|
156
|
-
[table.columns]
|
|
157
|
-
id = "id"
|
|
158
|
-
body = "body"
|
|
159
|
-
display_name = "author"
|
|
160
|
-
```
|
|
161
|
-
:::
|
|
132
|
+
The top-level `.dirsql/` directory is always excluded, whether you list it
|
|
133
|
+
or not — it is a reserved namespace for `dirsql`'s own metadata (see
|
|
134
|
+
[Persistence](./persistence.md)).
|
|
162
135
|
|
|
163
|
-
##
|
|
136
|
+
## Persistence
|
|
164
137
|
|
|
165
|
-
|
|
138
|
+
Set `persist = true` to keep the SQLite database on disk between runs
|
|
139
|
+
instead of rebuilding from scratch on every startup:
|
|
166
140
|
|
|
167
141
|
```toml
|
|
168
142
|
[dirsql]
|
|
169
|
-
|
|
143
|
+
persist = true
|
|
144
|
+
# persist_path = ".dirsql/cache.db" # optional; this is the default
|
|
170
145
|
```
|
|
171
146
|
|
|
147
|
+
See [Persistence](./persistence.md) for the full reconcile algorithm,
|
|
148
|
+
storage layout, and limitations.
|
|
149
|
+
|
|
172
150
|
## Strict Mode
|
|
173
151
|
|
|
174
|
-
By default,
|
|
152
|
+
By default, auto-injected virtuals that aren't in the DDL are silently
|
|
153
|
+
dropped, and undeclared user-extract keys are dropped. Enable strict mode
|
|
154
|
+
to error when an extract emits keys not declared in the DDL:
|
|
175
155
|
|
|
176
156
|
```toml
|
|
177
157
|
[[table]]
|
|
178
|
-
ddl
|
|
179
|
-
glob = "
|
|
158
|
+
ddl = "CREATE TABLE comments (thread_id TEXT)"
|
|
159
|
+
glob = "_comments/{thread_id}/*.jsonl"
|
|
180
160
|
strict = true
|
|
181
161
|
```
|
|
182
162
|
|
|
163
|
+
Strict mode does **not** apply to auto-injected stat virtuals — those are
|
|
164
|
+
always filtered to the DDL's declared columns regardless. Strict mode
|
|
165
|
+
applies only to keys produced by an extract callback (relevant for
|
|
166
|
+
programmatic [tables](./tables.md)).
|
|
167
|
+
|
|
183
168
|
## Full Example
|
|
184
169
|
|
|
185
170
|
```toml
|
|
@@ -187,19 +172,22 @@ strict = true
|
|
|
187
172
|
ignore = ["node_modules/**", ".git/**", "dist/**"]
|
|
188
173
|
|
|
189
174
|
[[table]]
|
|
190
|
-
ddl
|
|
191
|
-
glob = "_comments/{thread_id}
|
|
175
|
+
ddl = "CREATE TABLE comments (thread_id TEXT, _basename TEXT, _mtime INTEGER)"
|
|
176
|
+
glob = "_comments/{thread_id}/*.jsonl"
|
|
192
177
|
|
|
193
178
|
[[table]]
|
|
194
|
-
ddl
|
|
179
|
+
ddl = "CREATE TABLE documents (_path TEXT, _basename TEXT, _size INTEGER)"
|
|
195
180
|
glob = "**/index.md"
|
|
196
181
|
|
|
197
182
|
[[table]]
|
|
198
|
-
ddl
|
|
183
|
+
ddl = "CREATE TABLE logs (_path TEXT, _size INTEGER, _mtime INTEGER)"
|
|
199
184
|
glob = "logs/*.csv"
|
|
200
|
-
|
|
201
|
-
[[table]]
|
|
202
|
-
ddl = "CREATE TABLE config (key TEXT, value TEXT)"
|
|
203
|
-
glob = "config/*.toml"
|
|
204
|
-
strict = true
|
|
205
185
|
```
|
|
186
|
+
|
|
187
|
+
## When you need parsed content
|
|
188
|
+
|
|
189
|
+
`.dirsql.toml` does not parse file contents. For columns derived from the
|
|
190
|
+
*inside* of files (frontmatter keys, JSON values, CSV cells, etc.),
|
|
191
|
+
register a programmatic [`Table`](./tables.md) instead, and parse the
|
|
192
|
+
bytes in your host language. Glob captures and stat virtuals are still
|
|
193
|
+
auto-injected into rows produced by your extract.
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
# Persistence
|
|
2
|
+
|
|
3
|
+
By default `dirsql` keeps its SQLite database in memory and rebuilds it from scratch every time the process starts. For large directories this can take seconds to minutes -- nearly all of which is spent re-parsing files that haven't changed since the previous run.
|
|
4
|
+
|
|
5
|
+
Persistence stores the SQLite database on disk so that subsequent startups only re-parse the files that have actually changed.
|
|
6
|
+
|
|
7
|
+
::: tip Same answers, faster startup
|
|
8
|
+
The rows returned by `query()` after a persistent startup are equivalent to those produced by a from-scratch rebuild. Persistence is a startup-time optimization, not a correctness compromise. The reconcile algorithm is the same one `git status` uses to decide which files have changed since the last index write.
|
|
9
|
+
:::
|
|
10
|
+
|
|
11
|
+
## Quick start
|
|
12
|
+
|
|
13
|
+
::: code-group
|
|
14
|
+
|
|
15
|
+
```toml [.dirsql.toml]
|
|
16
|
+
[dirsql]
|
|
17
|
+
persist = true
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
```python [Python]
|
|
21
|
+
from dirsql import DirSQL
|
|
22
|
+
|
|
23
|
+
db = DirSQL("./my-project", tables=[...], persist=True)
|
|
24
|
+
await db.ready()
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
```rust [Rust]
|
|
28
|
+
use dirsql::DirSQL;
|
|
29
|
+
|
|
30
|
+
let db = DirSQL::builder()
|
|
31
|
+
.root("./my-project")
|
|
32
|
+
.tables(vec![/* ... */])
|
|
33
|
+
.persist(true)
|
|
34
|
+
.build()?;
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
```typescript [TypeScript]
|
|
38
|
+
import { DirSQL } from "dirsql";
|
|
39
|
+
|
|
40
|
+
const db = new DirSQL({ root: "./my-project", tables: [/* ... */], persist: true });
|
|
41
|
+
await db.ready;
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
:::
|
|
45
|
+
|
|
46
|
+
That's it. The first run writes the database to `./my-project/.dirsql/cache.db`. Every subsequent startup uses the cache.
|
|
47
|
+
|
|
48
|
+
## Configuration
|
|
49
|
+
|
|
50
|
+
| Option | Type | Default | Meaning |
|
|
51
|
+
|---|---|---|---|
|
|
52
|
+
| `persist` | boolean | `false` | Enable persistent on-disk storage. |
|
|
53
|
+
| `persist_path` (Python, Rust) / `persistPath` (TypeScript) | string | `<root>/.dirsql/cache.db` | Override the database file path. Ignored when `persist` is `false`. |
|
|
54
|
+
|
|
55
|
+
The default location keeps the cache alongside the data it indexes, which means it follows the project around (clone, copy, move) without extra setup. Override `persist_path` if you want the cache somewhere else -- a CI cache directory, a tmpfs mount, an XDG cache dir, etc.
|
|
56
|
+
|
|
57
|
+
::: code-group
|
|
58
|
+
|
|
59
|
+
```toml [.dirsql.toml]
|
|
60
|
+
[dirsql]
|
|
61
|
+
persist = true
|
|
62
|
+
persist_path = "/var/cache/dirsql/myproject.db"
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
```python [Python]
|
|
66
|
+
db = DirSQL(
|
|
67
|
+
"./my-project",
|
|
68
|
+
tables=[...],
|
|
69
|
+
persist=True,
|
|
70
|
+
persist_path="/var/cache/dirsql/myproject.db",
|
|
71
|
+
)
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
```rust [Rust]
|
|
75
|
+
let db = DirSQL::builder()
|
|
76
|
+
.root("./my-project")
|
|
77
|
+
.tables(vec![/* ... */])
|
|
78
|
+
.persist(true)
|
|
79
|
+
.persist_path("/var/cache/dirsql/myproject.db")
|
|
80
|
+
.build()?;
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
```typescript [TypeScript]
|
|
84
|
+
const db = new DirSQL({
|
|
85
|
+
root: "./my-project",
|
|
86
|
+
tables: [/* ... */],
|
|
87
|
+
persist: true,
|
|
88
|
+
persistPath: "/var/cache/dirsql/myproject.db",
|
|
89
|
+
});
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
:::
|
|
93
|
+
|
|
94
|
+
## The `.dirsql/` directory
|
|
95
|
+
|
|
96
|
+
`dirsql` reserves the top-level `.dirsql/` directory inside every scanned root. It is **unconditionally excluded from the directory walk**, whether persistence is enabled or not. This means:
|
|
97
|
+
|
|
98
|
+
- The default cache path `<root>/.dirsql/cache.db` cannot accidentally be ingested as a data file.
|
|
99
|
+
- You can place additional `dirsql`-related files in `.dirsql/` (e.g. a project-local config snapshot) without them being parsed.
|
|
100
|
+
- You should not put your own data files in `.dirsql/` -- they will be silently ignored.
|
|
101
|
+
|
|
102
|
+
If you persist into `.dirsql/`, add it to your `.gitignore`:
|
|
103
|
+
|
|
104
|
+
```
|
|
105
|
+
.dirsql/
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
The cache file should never be committed -- it is reproducible from the source tree and frequently large.
|
|
109
|
+
|
|
110
|
+
## How the startup reconcile works
|
|
111
|
+
|
|
112
|
+
When a persistent cache exists, `dirsql` does not blindly trust it. On startup it:
|
|
113
|
+
|
|
114
|
+
1. **Checks compatibility metadata.** If the cached `dirsql` version, schema version, glob configuration, parser versions, or canonical root path differs from the current build, the cache is wiped and rebuilt from scratch.
|
|
115
|
+
2. **Walks the tree and stats every matching file.** This is metadata-only -- no file contents are read.
|
|
116
|
+
3. **For each file, compares the live `(size, mtime, ctime, inode, dev)` tuple against the cached row:**
|
|
117
|
+
- **Trust the cache** when every field matches *and* the file's mtime is older than the cache's snapshot time (outside the racy window).
|
|
118
|
+
- **Hash-confirm** when the tuple matches but the file's mtime falls inside the racy window. `dirsql` reads and hashes the file; if the hash matches the cached hash, the cache is trusted.
|
|
119
|
+
- **Re-parse** when any field of the tuple differs.
|
|
120
|
+
4. **Deletes** rows for files that were in the cache but are no longer on disk.
|
|
121
|
+
5. **Inserts** rows for files that are on disk but were not in the cache.
|
|
122
|
+
|
|
123
|
+
This is the same algorithm `git status` uses to decide which files have changed since the last index write. The "racy window" handling is what closes the gap when a file is modified within the same filesystem-timestamp resolution as the cache write.
|
|
124
|
+
|
|
125
|
+
## When `dirsql` does a full rebuild
|
|
126
|
+
|
|
127
|
+
Any of the following will cause the cache to be discarded and rebuilt from scratch on the next startup:
|
|
128
|
+
|
|
129
|
+
- The `dirsql` library was upgraded between runs.
|
|
130
|
+
- The glob configuration changed (a new table, a removed table, a modified glob, a changed `ignore` list).
|
|
131
|
+
- A built-in parser version changed (this generally only happens on `dirsql` upgrades).
|
|
132
|
+
- The cache was written for a different root directory than the one currently configured.
|
|
133
|
+
- The internal schema of the cache changed (i.e. you upgraded `dirsql` across a schema version bump).
|
|
134
|
+
|
|
135
|
+
Full rebuilds take exactly as long as a non-persistent startup -- there is no penalty for them, only a missed optimization.
|
|
136
|
+
|
|
137
|
+
## Limitations
|
|
138
|
+
|
|
139
|
+
### Network filesystems
|
|
140
|
+
|
|
141
|
+
NFS, SMB/CIFS, and similar network filesystems cache file attributes on the client and can return stale `stat` results. Persistent mode is **not supported** on network filesystems and may produce stale rows. Use in-memory mode (the default) if your `root` lives on a network mount.
|
|
142
|
+
|
|
143
|
+
### The mtime-preservation edge case
|
|
144
|
+
|
|
145
|
+
Racy-stat detection misses changes only when **all** of the following are true:
|
|
146
|
+
|
|
147
|
+
- A file's contents are modified.
|
|
148
|
+
- The file's size after modification is identical to its size before.
|
|
149
|
+
- The file's `mtime` is externally reset to a value older than the cache's snapshot time (e.g. via `touch -r` or a backup-restore tool that preserves mtime).
|
|
150
|
+
|
|
151
|
+
If you cannot tolerate this edge case, disable persistence (`persist = false`). This is the same trade-off `git` makes with `core.trustctime` / `core.checkStat`.
|
|
152
|
+
|
|
153
|
+
### Single writer
|
|
154
|
+
|
|
155
|
+
Only one `dirsql` process should write to a given cache file at a time. Multiple read-only processes can query the same file safely once the writer finishes the initial reconcile. Coordinated multi-writer access is not supported in v0.3.0.
|
|
156
|
+
|
|
157
|
+
## Inspecting the cache
|
|
158
|
+
|
|
159
|
+
The persistent database is a normal SQLite file. You can open it with any SQLite client:
|
|
160
|
+
|
|
161
|
+
```bash
|
|
162
|
+
sqlite3 .dirsql/cache.db
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
```sql
|
|
166
|
+
.tables
|
|
167
|
+
-- comments documents metrics _dirsql_files _dirsql_meta
|
|
168
|
+
|
|
169
|
+
SELECT * FROM _dirsql_meta;
|
|
170
|
+
-- schema_version | 1
|
|
171
|
+
-- dirsql_version | 0.3.0
|
|
172
|
+
-- glob_config_hash | <hex>
|
|
173
|
+
-- parser_versions | {"json":"1","jsonl":"1","csv":"1",...}
|
|
174
|
+
-- root_canonical | /home/alice/my-project
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
The `_dirsql_files` and `_dirsql_meta` tables are managed by `dirsql`. Do not modify them by hand -- on the next startup, `dirsql` will detect the inconsistency and rebuild from scratch.
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "dirsql",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.3.1",
|
|
4
4
|
"description": "Ephemeral SQL index over a local directory",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"repository": "https://github.com/thekevinscott/dirsql",
|
|
@@ -175,15 +175,15 @@
|
|
|
175
175
|
]
|
|
176
176
|
},
|
|
177
177
|
"optionalDependencies": {
|
|
178
|
-
"@dirsql/lib-linux-x64-gnu": "0.
|
|
179
|
-
"@dirsql/lib-linux-arm64-gnu": "0.
|
|
180
|
-
"@dirsql/lib-darwin-x64": "0.
|
|
181
|
-
"@dirsql/lib-darwin-arm64": "0.
|
|
182
|
-
"@dirsql/lib-win32-x64-msvc": "0.
|
|
183
|
-
"@dirsql/cli-linux-x64-gnu": "0.
|
|
184
|
-
"@dirsql/cli-linux-arm64-gnu": "0.
|
|
185
|
-
"@dirsql/cli-darwin-x64": "0.
|
|
186
|
-
"@dirsql/cli-darwin-arm64": "0.
|
|
187
|
-
"@dirsql/cli-win32-x64-msvc": "0.
|
|
178
|
+
"@dirsql/lib-linux-x64-gnu": "0.3.1",
|
|
179
|
+
"@dirsql/lib-linux-arm64-gnu": "0.3.1",
|
|
180
|
+
"@dirsql/lib-darwin-x64": "0.3.1",
|
|
181
|
+
"@dirsql/lib-darwin-arm64": "0.3.1",
|
|
182
|
+
"@dirsql/lib-win32-x64-msvc": "0.3.1",
|
|
183
|
+
"@dirsql/cli-linux-x64-gnu": "0.3.1",
|
|
184
|
+
"@dirsql/cli-linux-arm64-gnu": "0.3.1",
|
|
185
|
+
"@dirsql/cli-darwin-x64": "0.3.1",
|
|
186
|
+
"@dirsql/cli-darwin-arm64": "0.3.1",
|
|
187
|
+
"@dirsql/cli-win32-x64-msvc": "0.3.1"
|
|
188
188
|
}
|
|
189
189
|
}
|