dirsql 0.3.5 → 0.3.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -1
- package/dist/index.d.ts +7 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js.map +1 -1
- package/docs/api/index.md +4 -4
- package/docs/getting-started.md +11 -10
- package/docs/guide/async.md +4 -3
- package/docs/guide/crdt.md +4 -3
- package/docs/guide/querying.md +4 -0
- package/docs/guide/tables.md +28 -27
- package/docs/guide/watching.md +8 -3
- package/docs/index.md +4 -3
- package/package.json +11 -11
- package/docs/guide/cli.md +0 -131
- package/docs/guide/config.md +0 -193
- package/docs/guide/init.md +0 -84
package/README.md
CHANGED
|
@@ -17,6 +17,7 @@ Prebuilt binaries ship for linux-x64, linux-arm64, darwin-x64, darwin-arm64, and
|
|
|
17
17
|
## Usage
|
|
18
18
|
|
|
19
19
|
```typescript
|
|
20
|
+
import { readFileSync } from "node:fs";
|
|
20
21
|
import { DirSQL } from "dirsql";
|
|
21
22
|
|
|
22
23
|
const db = new DirSQL({
|
|
@@ -25,7 +26,7 @@ const db = new DirSQL({
|
|
|
25
26
|
{
|
|
26
27
|
ddl: "CREATE TABLE users (name TEXT, age INTEGER)",
|
|
27
28
|
glob: "data/*.json",
|
|
28
|
-
extract: (
|
|
29
|
+
extract: (filePath) => JSON.parse(readFileSync(filePath, "utf8")),
|
|
29
30
|
},
|
|
30
31
|
],
|
|
31
32
|
});
|
package/dist/index.d.ts
CHANGED
|
@@ -4,8 +4,13 @@ export interface TableDef {
|
|
|
4
4
|
ddl: string;
|
|
5
5
|
/** Glob pattern (relative to the DirSQL root) for files backing this table. */
|
|
6
6
|
glob: string;
|
|
7
|
-
/**
|
|
8
|
-
|
|
7
|
+
/**
|
|
8
|
+
* Produce the rows a matched file contributes. Receives the absolute
|
|
9
|
+
* filesystem path of the file. dirsql does not read file contents; if the
|
|
10
|
+
* callback needs the file body it reads the path itself (e.g.
|
|
11
|
+
* `fs.readFileSync(filePath, "utf8")`). Returns an array of row objects.
|
|
12
|
+
*/
|
|
13
|
+
extract: (filePath: string) => Record<string, unknown>[];
|
|
9
14
|
/** If true, reject rows with columns not declared in `ddl`. */
|
|
10
15
|
strict?: boolean;
|
|
11
16
|
}
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../ts/index.ts"],"names":[],"mappings":"AAcA,iEAAiE;AACjE,MAAM,WAAW,QAAQ;IACvB,6EAA6E;IAC7E,GAAG,EAAE,MAAM,CAAC;IACZ,+EAA+E;IAC/E,IAAI,EAAE,MAAM,CAAC;IACb
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../ts/index.ts"],"names":[],"mappings":"AAcA,iEAAiE;AACjE,MAAM,WAAW,QAAQ;IACvB,6EAA6E;IAC7E,GAAG,EAAE,MAAM,CAAC;IACZ,+EAA+E;IAC/E,IAAI,EAAE,MAAM,CAAC;IACb;;;;;OAKG;IACH,OAAO,EAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EAAE,CAAC;IACzD,+DAA+D;IAC/D,MAAM,CAAC,EAAE,OAAO,CAAC;CAClB;AAED;;;;;;GAMG;AACH,MAAM,WAAW,aAAa;IAC5B,8BAA8B;IAC9B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,8EAA8E;IAC9E,MAAM,CAAC,EAAE,QAAQ,EAAE,CAAC;IACpB,oDAAoD;IACpD,MAAM,CAAC,EAAE,MAAM,EAAE,CAAC;IAClB;;;;;;OAMG;IACH,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB;;;;OAIG;IACH,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB;;;OAGG;IACH,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED,qDAAqD;AACrD,MAAM,WAAW,QAAQ;IACvB;;;;OAIG;IACH,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC;IACrB,MAAM,EAAE,QAAQ,GAAG,QAAQ,GAAG,QAAQ,GAAG,OAAO,CAAC;IACjD,GAAG,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,CAAC;IACrC,MAAM,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,CAAC;IACxC,KAAK,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACtB,QAAQ,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;CAC1B;AAGD,UAAU,YAAY;IACpB,KAAK,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EAAE,CAAC,CAAC;IACvD,YAAY,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;IAC9B,UAAU,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC,CAAC;CACpD;AAED,UAAU,uBAAuB;IAC/B,SAAS,CACP,IAAI,EAAE,MAAM,GAAG,IAAI,EACnB,MAAM,EAAE,QAAQ,EAAE,GAAG,IAAI,EACzB,MAAM,EAAE,MAAM,EAAE,GAAG,IAAI,EACvB,MAAM,EAAE,MAAM,GAAG,IAAI,EACrB,OAAO,EAAE,OAAO,GAAG,IAAI,EACvB,WAAW,EAAE,MAAM,GAAG,IAAI,GACzB,OAAO,CAAC,YAAY,CAAC,CAAC;CAC1B;AAID,UAAU,UAAU;IAClB,MAAM,EAAE,uBAAuB,CAAC;CACjC;AAaD;;;;;;;GAOG;AACH,wBAAgB,mBAAmB,CAAC,IAAI,EAAE,UAAU,GAAG,IAAI,GAAG,IAAI,CAEjE;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AACH,qBAAa,MAAM;IACjB;;;;;;OAMG;IACH,QAAQ,CAAC,KAAK,EAAE,OAAO,CAAC,IAAI,CAAC,CAAC;IAG9B,OAAO,CAAC,MAAM,CAAgB;IAE9B,wDAAwD;gBAC5C,UAAU,EAAE,MAAM;IAC9B,yCAAyC;gBAC7B,OAAO,EAAE,aAAa;IAkBlC;;;;;;OAMG;IACG,KAAK,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EAAE,CAAC;IAK5D;;;;;;;OAOG;IACG,YAAY,IAAI,OAAO,CAAC,IAAI,CAAC;IAKnC;;;;;;;OAOG;IACG,UAAU,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC;IAKxD;;;;;;;;;;OAUG;IACI,KAAK,IAAI,cAAc,CAAC,QAAQ,EAAE,IAAI,EAAE,OAAO,CAAC;CAaxD"}
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../ts/index.ts"],"names":[],"mappings":"AAAA,yBAAyB;AACzB,EAAE;AACF,wEAAwE;AACxE,kEAAkE;AAClE,qEAAqE;AACrE,qCAAqC;AACrC,EAAE;AACF,qEAAqE;AACrE,uEAAuE;AACvE,uEAAuE;AACvE,mEAAmE;AAEnE,OAAO,EAAE,cAAc,IAAI,qBAAqB,EAAE,MAAM,qBAAqB,CAAC;
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../ts/index.ts"],"names":[],"mappings":"AAAA,yBAAyB;AACzB,EAAE;AACF,wEAAwE;AACxE,kEAAkE;AAClE,qEAAqE;AACrE,qCAAqC;AACrC,EAAE;AACF,qEAAqE;AACrE,uEAAuE;AACvE,uEAAuE;AACvE,mEAAmE;AAEnE,OAAO,EAAE,cAAc,IAAI,qBAAqB,EAAE,MAAM,qBAAqB,CAAC;AA6F9E,yEAAyE;AACzE,sEAAsE;AACtE,IAAI,IAAI,GAAsB,IAAI,CAAC;AAEnC,SAAS,OAAO;IACd,IAAI,IAAI,KAAK,IAAI,EAAE,CAAC;QAClB,IAAI,GAAG,qBAAqB,EAAgB,CAAC;IAC/C,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,mBAAmB,CAAC,IAAuB;IACzD,IAAI,GAAG,IAAI,CAAC;AACd,CAAC;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AACH,MAAM,OAAO,MAAM;IACjB;;;;;;OAMG;IACM,KAAK,CAAgB;IAE9B,gEAAgE;IACxD,MAAM,CAAgB;IAM9B,YAAY,GAA2B;QACrC,MAAM,OAAO,GACX,OAAO,GAAG,KAAK,QAAQ,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,GAAG,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC;QAClD,MAAM,IAAI,GAAG,OAAO,EAAE,CAAC,MAAM,CAAC;QAC9B,MAAM,WAAW,GAAG,IAAI,CAAC,SAAS,CAChC,OAAO,CAAC,IAAI,IAAI,IAAI,EACpB,OAAO,CAAC,MAAM,IAAI,IAAI,EACtB,OAAO,CAAC,MAAM,IAAI,IAAI,EACtB,OAAO,CAAC,MAAM,IAAI,IAAI,EACtB,OAAO,CAAC,OAAO,IAAI,IAAI,EACvB,OAAO,CAAC,WAAW,IAAI,IAAI,CAC5B,CAAC;QACF,IAAI,CAAC,KAAK,GAAG,WAAW,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,EAAE;YACtC,IAAI,CAAC,MAAM,GAAG,KAAK,CAAC;QACtB,CAAC,CAAC,CAAC;IACL,CAAC;IAED;;;;;;OAMG;IACH,KAAK,CAAC,KAAK,CAAC,GAAW;QACrB,MAAM,IAAI,CAAC,KAAK,CAAC;QACjB,OAAO,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IAChC,CAAC;IAED;;;;;;;OAOG;IACH,KAAK,CAAC,YAAY;QAChB,MAAM,IAAI,CAAC,KAAK,CAAC;QACjB,OAAO,IAAI,CAAC,MAAM,CAAC,YAAY,EAAE,CAAC;IACpC,CAAC;IAED;;;;;;;OAOG;IACH,KAAK,CAAC,UAAU,CAAC,SAAiB;QAChC,MAAM,IAAI,CAAC,KAAK,CAAC;QACjB,OAAO,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC;IAC3C,CAAC;IAED;;;;;;;;;;OAUG;IACH,KAAK,CAAC,CAAC,KAAK;QACV,MAAM,IAAI,CAAC,KAAK,CAAC;QACjB,MAAM,IAAI,CAAC,MAAM,CAAC,YAAY,EAAE,CAAC;QACjC,OAAO,IAAI,EAAE,CAAC;YACZ,qEAAqE;YACrE,gEAAgE;YAChE,sEAAsE;YACtE,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC;YACjD,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;gBAC3B,MAAM,KAAK,CAAC;YACd,CAAC;QACH,CAAC;IACH,CAAC;CACF"}
|
package/docs/api/index.md
CHANGED
|
@@ -168,15 +168,15 @@ import { Table } from 'dirsql';
|
|
|
168
168
|
::: code-group
|
|
169
169
|
|
|
170
170
|
```python [Python]
|
|
171
|
-
Table(*, ddl: str, glob: str, extract: Callable[[str
|
|
171
|
+
Table(*, ddl: str, glob: str, extract: Callable[[str], list[dict]])
|
|
172
172
|
```
|
|
173
173
|
|
|
174
174
|
```rust [Rust]
|
|
175
|
-
Table::new(ddl: &str, glob: &str, extract: fn(&str
|
|
175
|
+
Table::new(ddl: &str, glob: &str, extract: fn(&str) -> Vec<Value>)
|
|
176
176
|
```
|
|
177
177
|
|
|
178
178
|
```typescript [TypeScript]
|
|
179
|
-
new Table({ ddl: string, glob: string, extract: (path: string
|
|
179
|
+
new Table({ ddl: string, glob: string, extract: (path: string) => Record<string, unknown>[] })
|
|
180
180
|
```
|
|
181
181
|
|
|
182
182
|
:::
|
|
@@ -187,7 +187,7 @@ Defines a mapping from files to SQLite table rows.
|
|
|
187
187
|
|
|
188
188
|
- `ddl` -- A `CREATE TABLE` statement. The table name is parsed from this DDL.
|
|
189
189
|
- `glob` -- A glob pattern matched against file paths relative to the root directory.
|
|
190
|
-
- `extract` -- A callable `(path
|
|
190
|
+
- `extract` -- A callable `(path) -> list[dict]`. Receives the absolute filesystem path of the matched file. `dirsql` does not read file contents; a callback that needs the file body reads `path` itself. Returns a list of dicts/maps mapping column names to values. Return an empty list to skip a file.
|
|
191
191
|
|
|
192
192
|
**Attributes:**
|
|
193
193
|
|
package/docs/getting-started.md
CHANGED
|
@@ -31,7 +31,7 @@ cargo install dirsql --features cli
|
|
|
31
31
|
|
|
32
32
|
:::
|
|
33
33
|
|
|
34
|
-
See the [CLI
|
|
34
|
+
See the [CLI section](./cli/) for details on the command-line interface, and the [Rust library README](https://github.com/thekevinscott/dirsql/tree/main/packages/rust) for the library-vs-CLI feature split.
|
|
35
35
|
|
|
36
36
|
## Quick start
|
|
37
37
|
|
|
@@ -63,12 +63,12 @@ async def main():
|
|
|
63
63
|
Table(
|
|
64
64
|
ddl="CREATE TABLE posts (title TEXT, author TEXT)",
|
|
65
65
|
glob="posts/*.json",
|
|
66
|
-
extract=lambda path
|
|
66
|
+
extract=lambda path: [json.loads(open(path, encoding="utf-8").read())],
|
|
67
67
|
),
|
|
68
68
|
Table(
|
|
69
69
|
ddl="CREATE TABLE authors (id TEXT, name TEXT)",
|
|
70
70
|
glob="authors/*.json",
|
|
71
|
-
extract=lambda path
|
|
71
|
+
extract=lambda path: [json.loads(open(path, encoding="utf-8").read())],
|
|
72
72
|
),
|
|
73
73
|
],
|
|
74
74
|
)
|
|
@@ -120,12 +120,12 @@ let db = DirSQL::new(
|
|
|
120
120
|
Table::new(
|
|
121
121
|
"CREATE TABLE posts (title TEXT, author TEXT)",
|
|
122
122
|
"posts/*.json",
|
|
123
|
-
|
|
|
123
|
+
|path| vec![row_from_json(&std::fs::read_to_string(path).unwrap())],
|
|
124
124
|
),
|
|
125
125
|
Table::new(
|
|
126
126
|
"CREATE TABLE authors (id TEXT, name TEXT)",
|
|
127
127
|
"authors/*.json",
|
|
128
|
-
|
|
|
128
|
+
|path| vec![row_from_json(&std::fs::read_to_string(path).unwrap())],
|
|
129
129
|
),
|
|
130
130
|
],
|
|
131
131
|
)?;
|
|
@@ -139,18 +139,19 @@ let results = db.query(
|
|
|
139
139
|
```
|
|
140
140
|
|
|
141
141
|
```typescript [TypeScript]
|
|
142
|
+
import { readFileSync } from 'node:fs';
|
|
142
143
|
import { DirSQL, type TableDef } from 'dirsql';
|
|
143
144
|
|
|
144
145
|
const tables: TableDef[] = [
|
|
145
146
|
{
|
|
146
147
|
ddl: 'CREATE TABLE posts (title TEXT, author TEXT)',
|
|
147
148
|
glob: 'posts/*.json',
|
|
148
|
-
extract: (
|
|
149
|
+
extract: (path) => [JSON.parse(readFileSync(path, 'utf8'))],
|
|
149
150
|
},
|
|
150
151
|
{
|
|
151
152
|
ddl: 'CREATE TABLE authors (id TEXT, name TEXT)',
|
|
152
153
|
glob: 'authors/*.json',
|
|
153
|
-
extract: (
|
|
154
|
+
extract: (path) => [JSON.parse(readFileSync(path, 'utf8'))],
|
|
154
155
|
},
|
|
155
156
|
];
|
|
156
157
|
|
|
@@ -169,8 +170,8 @@ const results = await db.query(`
|
|
|
169
170
|
## What happens at startup
|
|
170
171
|
|
|
171
172
|
1. `dirsql` walks the directory tree
|
|
172
|
-
2. Files matching each table's glob pattern are
|
|
173
|
-
3. The `extract` function
|
|
173
|
+
2. Files matching each table's glob pattern are identified
|
|
174
|
+
3. The `extract` function receives each matched file's absolute path and returns rows
|
|
174
175
|
4. Rows are inserted into an in-memory SQLite database
|
|
175
176
|
5. SQL queries run against that database
|
|
176
177
|
|
|
@@ -182,5 +183,5 @@ The filesystem is always the source of truth. The database is rebuilt from files
|
|
|
182
183
|
- [Querying](./guide/querying.md) -- SQL queries and return format
|
|
183
184
|
- [File Watching](./guide/watching.md) -- real-time change events
|
|
184
185
|
- [Async API](./guide/async.md) -- async ready(), query(), and watch()
|
|
185
|
-
- [
|
|
186
|
+
- [Using `dirsql` from the CLI](./cli/) -- `dirsql` runs an HTTP server (`POST /query`, `GET /events` SSE)
|
|
186
187
|
- [Collaboration with CRDTs](./guide/crdt.md) -- multi-writer document merging alongside `dirsql`
|
package/docs/guide/async.md
CHANGED
|
@@ -24,7 +24,7 @@ async def main():
|
|
|
24
24
|
Table(
|
|
25
25
|
ddl="CREATE TABLE items (name TEXT, value INTEGER)",
|
|
26
26
|
glob="data/*.json",
|
|
27
|
-
extract=lambda path
|
|
27
|
+
extract=lambda path: [json.loads(open(path, encoding="utf-8").read())],
|
|
28
28
|
),
|
|
29
29
|
],
|
|
30
30
|
)
|
|
@@ -47,7 +47,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|
|
47
47
|
Table::new(
|
|
48
48
|
"CREATE TABLE items (name TEXT, value INTEGER)",
|
|
49
49
|
"data/*.json",
|
|
50
|
-
|
|
|
50
|
+
|path| vec![serde_json::from_str(&std::fs::read_to_string(path).unwrap()).unwrap()],
|
|
51
51
|
),
|
|
52
52
|
],
|
|
53
53
|
)?;
|
|
@@ -59,6 +59,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|
|
59
59
|
```
|
|
60
60
|
|
|
61
61
|
```typescript [TypeScript]
|
|
62
|
+
import { readFileSync } from 'node:fs';
|
|
62
63
|
import { DirSQL, Table } from 'dirsql';
|
|
63
64
|
|
|
64
65
|
const db = new DirSQL({
|
|
@@ -67,7 +68,7 @@ const db = new DirSQL({
|
|
|
67
68
|
new Table({
|
|
68
69
|
ddl: 'CREATE TABLE items (name TEXT, value INTEGER)',
|
|
69
70
|
glob: 'data/*.json',
|
|
70
|
-
extract: (
|
|
71
|
+
extract: (path) => [JSON.parse(readFileSync(path, 'utf8'))],
|
|
71
72
|
}),
|
|
72
73
|
],
|
|
73
74
|
});
|
package/docs/guide/crdt.md
CHANGED
|
@@ -60,7 +60,7 @@ db = DirSQL(
|
|
|
60
60
|
ddl="CREATE TABLE posts (id TEXT, title TEXT, body TEXT, updated INTEGER)",
|
|
61
61
|
# Match the JSON view, not the raw CRDT binary.
|
|
62
62
|
glob="posts/*/view.json",
|
|
63
|
-
extract=lambda path
|
|
63
|
+
extract=lambda path: [json.loads(open(path, encoding="utf-8").read())],
|
|
64
64
|
),
|
|
65
65
|
],
|
|
66
66
|
)
|
|
@@ -76,20 +76,21 @@ let db = DirSQL::new(
|
|
|
76
76
|
Table::new(
|
|
77
77
|
"CREATE TABLE posts (id TEXT, title TEXT, body TEXT, updated INTEGER)",
|
|
78
78
|
"posts/*/view.json",
|
|
79
|
-
|
|
|
79
|
+
|path| vec![row_from_json(&std::fs::read_to_string(path).unwrap())],
|
|
80
80
|
),
|
|
81
81
|
],
|
|
82
82
|
)?;
|
|
83
83
|
```
|
|
84
84
|
|
|
85
85
|
```typescript [TypeScript]
|
|
86
|
+
import { readFileSync } from 'node:fs';
|
|
86
87
|
import { DirSQL, type TableDef } from 'dirsql';
|
|
87
88
|
|
|
88
89
|
const tables: TableDef[] = [
|
|
89
90
|
{
|
|
90
91
|
ddl: 'CREATE TABLE posts (id TEXT, title TEXT, body TEXT, updated INTEGER)',
|
|
91
92
|
glob: 'posts/*/view.json',
|
|
92
|
-
extract: (
|
|
93
|
+
extract: (path) => [JSON.parse(readFileSync(path, 'utf8'))],
|
|
93
94
|
},
|
|
94
95
|
];
|
|
95
96
|
|
package/docs/guide/querying.md
CHANGED
|
@@ -8,6 +8,10 @@ canonical: https://thekevinscott.github.io/dirsql/guide/querying
|
|
|
8
8
|
|
|
9
9
|
Once a `DirSQL` instance is created, the initial directory scan is complete and you can run SQL queries against the indexed data.
|
|
10
10
|
|
|
11
|
+
::: tip From the CLI
|
|
12
|
+
The `dirsql` HTTP server exposes the same query interface over [`POST /query`](../cli/http-api.md#post-query). Send `{"sql": "..."}` and get back the same JSON array of row objects. See the [CLI section](../cli/) for the full server setup.
|
|
13
|
+
:::
|
|
14
|
+
|
|
11
15
|
## Basic queries
|
|
12
16
|
|
|
13
17
|
::: code-group
|
package/docs/guide/tables.md
CHANGED
|
@@ -18,7 +18,7 @@ from dirsql import Table
|
|
|
18
18
|
table = Table(
|
|
19
19
|
ddl="CREATE TABLE comments (id TEXT, body TEXT, author TEXT)",
|
|
20
20
|
glob="comments/**/index.jsonl",
|
|
21
|
-
extract=lambda path
|
|
21
|
+
extract=lambda path: [
|
|
22
22
|
{"id": "...", "body": "...", "author": "..."}
|
|
23
23
|
],
|
|
24
24
|
)
|
|
@@ -31,7 +31,7 @@ use std::collections::HashMap;
|
|
|
31
31
|
let table = Table::new(
|
|
32
32
|
"CREATE TABLE comments (id TEXT, body TEXT, author TEXT)",
|
|
33
33
|
"comments/**/index.jsonl",
|
|
34
|
-
|_path
|
|
34
|
+
|_path| {
|
|
35
35
|
let mut row: HashMap<String, Value> = HashMap::new();
|
|
36
36
|
row.insert("id".into(), Value::Text("...".into()));
|
|
37
37
|
row.insert("body".into(), Value::Text("...".into()));
|
|
@@ -47,7 +47,7 @@ import type { TableDef } from 'dirsql';
|
|
|
47
47
|
const table: TableDef = {
|
|
48
48
|
ddl: 'CREATE TABLE comments (id TEXT, body TEXT, author TEXT)',
|
|
49
49
|
glob: 'comments/**/index.jsonl',
|
|
50
|
-
extract: (_path
|
|
50
|
+
extract: (_path) => [
|
|
51
51
|
{ id: '...', body: '...', author: '...' },
|
|
52
52
|
],
|
|
53
53
|
};
|
|
@@ -89,38 +89,38 @@ Glob syntax follows standard Unix globbing rules. `**` matches any number of dir
|
|
|
89
89
|
|
|
90
90
|
### `extract`
|
|
91
91
|
|
|
92
|
-
A callable `(path: str
|
|
92
|
+
A callable `(path: str) -> list[dict]` that converts a file into rows.
|
|
93
93
|
|
|
94
|
-
- `path` is the
|
|
95
|
-
- `content` is the file content as a string
|
|
94
|
+
- `path` is the **absolute filesystem path** of the matched file
|
|
96
95
|
- Return a list of dicts, where each dict maps column names to values
|
|
97
96
|
- Return an empty list to skip a file
|
|
98
97
|
|
|
98
|
+
`dirsql` does not read file contents for you. If your extract needs the file
|
|
99
|
+
body, read it inside the callback using `path`. Callbacks that derive columns
|
|
100
|
+
only from the path (or that rely solely on the auto-injected filesystem-fact
|
|
101
|
+
columns) never touch the file at all.
|
|
102
|
+
|
|
99
103
|
```python
|
|
100
104
|
import json
|
|
101
105
|
|
|
102
106
|
# Single-object JSON files: one row per file
|
|
103
|
-
extract
|
|
107
|
+
def extract(path):
|
|
108
|
+
with open(path, encoding="utf-8") as f:
|
|
109
|
+
return [json.loads(f.read())]
|
|
104
110
|
|
|
105
111
|
# JSONL files: one row per line
|
|
106
|
-
extract
|
|
107
|
-
|
|
108
|
-
]
|
|
112
|
+
def extract(path):
|
|
113
|
+
with open(path, encoding="utf-8") as f:
|
|
114
|
+
return [json.loads(line) for line in f]
|
|
109
115
|
|
|
110
|
-
# Derive
|
|
116
|
+
# Derive a value from the file path alone -- no file read
|
|
111
117
|
import os
|
|
112
|
-
extract=lambda path
|
|
113
|
-
{
|
|
114
|
-
"id": os.path.basename(os.path.dirname(path)),
|
|
115
|
-
"body": json.loads(line)["body"],
|
|
116
|
-
}
|
|
117
|
-
for line in content.splitlines()
|
|
118
|
-
for _ in [json.loads(line)]
|
|
119
|
-
]
|
|
118
|
+
extract = lambda path: [{"id": os.path.basename(os.path.dirname(path))}]
|
|
120
119
|
|
|
121
120
|
# Conditionally skip files
|
|
122
|
-
def extract(path
|
|
123
|
-
|
|
121
|
+
def extract(path):
|
|
122
|
+
with open(path, encoding="utf-8") as f:
|
|
123
|
+
data = json.loads(f.read())
|
|
124
124
|
if data.get("draft"):
|
|
125
125
|
return []
|
|
126
126
|
return [data]
|
|
@@ -142,12 +142,12 @@ db = DirSQL(
|
|
|
142
142
|
Table(
|
|
143
143
|
ddl="CREATE TABLE posts (title TEXT, author_id TEXT)",
|
|
144
144
|
glob="posts/*.json",
|
|
145
|
-
extract=lambda path
|
|
145
|
+
extract=lambda path: [json.loads(open(path, encoding="utf-8").read())],
|
|
146
146
|
),
|
|
147
147
|
Table(
|
|
148
148
|
ddl="CREATE TABLE authors (id TEXT, name TEXT)",
|
|
149
149
|
glob="authors/*.json",
|
|
150
|
-
extract=lambda path
|
|
150
|
+
extract=lambda path: [json.loads(open(path, encoding="utf-8").read())],
|
|
151
151
|
),
|
|
152
152
|
],
|
|
153
153
|
)
|
|
@@ -184,12 +184,12 @@ let db = DirSQL::new(
|
|
|
184
184
|
Table::new(
|
|
185
185
|
"CREATE TABLE posts (title TEXT, author_id TEXT)",
|
|
186
186
|
"posts/*.json",
|
|
187
|
-
|
|
|
187
|
+
|path| vec![row_from_json(&std::fs::read_to_string(path).unwrap())],
|
|
188
188
|
),
|
|
189
189
|
Table::new(
|
|
190
190
|
"CREATE TABLE authors (id TEXT, name TEXT)",
|
|
191
191
|
"authors/*.json",
|
|
192
|
-
|
|
|
192
|
+
|path| vec![row_from_json(&std::fs::read_to_string(path).unwrap())],
|
|
193
193
|
),
|
|
194
194
|
],
|
|
195
195
|
)?;
|
|
@@ -197,17 +197,18 @@ let db = DirSQL::new(
|
|
|
197
197
|
|
|
198
198
|
```typescript [TypeScript]
|
|
199
199
|
import { DirSQL, type TableDef } from 'dirsql';
|
|
200
|
+
import { readFileSync } from 'node:fs';
|
|
200
201
|
|
|
201
202
|
const tables: TableDef[] = [
|
|
202
203
|
{
|
|
203
204
|
ddl: 'CREATE TABLE posts (title TEXT, author_id TEXT)',
|
|
204
205
|
glob: 'posts/*.json',
|
|
205
|
-
extract: (
|
|
206
|
+
extract: (path) => [JSON.parse(readFileSync(path, 'utf8'))],
|
|
206
207
|
},
|
|
207
208
|
{
|
|
208
209
|
ddl: 'CREATE TABLE authors (id TEXT, name TEXT)',
|
|
209
210
|
glob: 'authors/*.json',
|
|
210
|
-
extract: (
|
|
211
|
+
extract: (path) => [JSON.parse(readFileSync(path, 'utf8'))],
|
|
211
212
|
},
|
|
212
213
|
];
|
|
213
214
|
|
package/docs/guide/watching.md
CHANGED
|
@@ -8,6 +8,10 @@ canonical: https://thekevinscott.github.io/dirsql/guide/watching
|
|
|
8
8
|
|
|
9
9
|
`dirsql` can monitor the filesystem for changes and emit events when rows are inserted, updated, or deleted. This is useful for building reactive applications that respond to file changes in real time.
|
|
10
10
|
|
|
11
|
+
::: tip From the CLI
|
|
12
|
+
The `dirsql` HTTP server streams the same events over [`GET /events`](../cli/http-api.md#get-events) as a [Server-Sent Events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events) stream. Each `data:` payload uses the same JSON schema described in [Event types](#event-types) below. See the [CLI section](../cli/) for the full server setup.
|
|
13
|
+
:::
|
|
14
|
+
|
|
11
15
|
## Starting a watch stream
|
|
12
16
|
|
|
13
17
|
::: code-group
|
|
@@ -22,7 +26,7 @@ db = DirSQL(
|
|
|
22
26
|
Table(
|
|
23
27
|
ddl="CREATE TABLE comments (id TEXT, body TEXT, author TEXT)",
|
|
24
28
|
glob="comments/**/*.json",
|
|
25
|
-
extract=lambda path
|
|
29
|
+
extract=lambda path: [json.loads(open(path, encoding="utf-8").read())],
|
|
26
30
|
),
|
|
27
31
|
],
|
|
28
32
|
)
|
|
@@ -63,7 +67,7 @@ let db = DirSQL::new(
|
|
|
63
67
|
Table::new(
|
|
64
68
|
"CREATE TABLE comments (id TEXT, body TEXT, author TEXT)",
|
|
65
69
|
"comments/**/*.json",
|
|
66
|
-
|
|
|
70
|
+
|path| vec![row_from_json(&std::fs::read_to_string(path).unwrap())],
|
|
67
71
|
),
|
|
68
72
|
],
|
|
69
73
|
)?;
|
|
@@ -88,13 +92,14 @@ while let Some(event) = stream.next().await {
|
|
|
88
92
|
```
|
|
89
93
|
|
|
90
94
|
```typescript [TypeScript]
|
|
95
|
+
import { readFileSync } from 'node:fs';
|
|
91
96
|
import { DirSQL, type TableDef } from 'dirsql';
|
|
92
97
|
|
|
93
98
|
const tables: TableDef[] = [
|
|
94
99
|
{
|
|
95
100
|
ddl: 'CREATE TABLE comments (id TEXT, body TEXT, author TEXT)',
|
|
96
101
|
glob: 'comments/**/*.json',
|
|
97
|
-
extract: (
|
|
102
|
+
extract: (path) => [JSON.parse(readFileSync(path, 'utf8'))],
|
|
98
103
|
},
|
|
99
104
|
];
|
|
100
105
|
|
package/docs/index.md
CHANGED
|
@@ -35,7 +35,7 @@ db = DirSQL(
|
|
|
35
35
|
Table(
|
|
36
36
|
ddl="CREATE TABLE files (name TEXT, size INTEGER, type TEXT)",
|
|
37
37
|
glob="data/*.json",
|
|
38
|
-
extract=lambda path
|
|
38
|
+
extract=lambda path: [json.loads(open(path, encoding="utf-8").read())],
|
|
39
39
|
),
|
|
40
40
|
],
|
|
41
41
|
)
|
|
@@ -53,7 +53,7 @@ let db = DirSQL::new(
|
|
|
53
53
|
Table::new(
|
|
54
54
|
"CREATE TABLE files (name TEXT, size INTEGER, type TEXT)",
|
|
55
55
|
"data/*.json",
|
|
56
|
-
|
|
|
56
|
+
|path| vec![serde_json::from_str(&std::fs::read_to_string(path).unwrap()).unwrap()],
|
|
57
57
|
),
|
|
58
58
|
],
|
|
59
59
|
)?;
|
|
@@ -62,6 +62,7 @@ let large = db.query("SELECT * FROM files WHERE size > 1000")?;
|
|
|
62
62
|
```
|
|
63
63
|
|
|
64
64
|
```typescript [TypeScript]
|
|
65
|
+
import { readFileSync } from 'node:fs';
|
|
65
66
|
import { DirSQL, Table } from 'dirsql';
|
|
66
67
|
|
|
67
68
|
const db = new DirSQL({
|
|
@@ -70,7 +71,7 @@ const db = new DirSQL({
|
|
|
70
71
|
new Table({
|
|
71
72
|
ddl: 'CREATE TABLE files (name TEXT, size INTEGER, type TEXT)',
|
|
72
73
|
glob: 'data/*.json',
|
|
73
|
-
extract: (
|
|
74
|
+
extract: (path) => [JSON.parse(readFileSync(path, 'utf8'))],
|
|
74
75
|
}),
|
|
75
76
|
],
|
|
76
77
|
});
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "dirsql",
|
|
3
|
-
"version": "0.3.
|
|
3
|
+
"version": "0.3.7",
|
|
4
4
|
"description": "Ephemeral SQL index over a local directory",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"repository": "https://github.com/thekevinscott/dirsql",
|
|
@@ -191,15 +191,15 @@
|
|
|
191
191
|
]
|
|
192
192
|
},
|
|
193
193
|
"optionalDependencies": {
|
|
194
|
-
"@dirsql/lib-linux-x64-gnu": "0.3.
|
|
195
|
-
"@dirsql/lib-linux-arm64-gnu": "0.3.
|
|
196
|
-
"@dirsql/lib-darwin-x64": "0.3.
|
|
197
|
-
"@dirsql/lib-darwin-arm64": "0.3.
|
|
198
|
-
"@dirsql/lib-win32-x64-msvc": "0.3.
|
|
199
|
-
"@dirsql/cli-linux-x64-gnu": "0.3.
|
|
200
|
-
"@dirsql/cli-linux-arm64-gnu": "0.3.
|
|
201
|
-
"@dirsql/cli-darwin-x64": "0.3.
|
|
202
|
-
"@dirsql/cli-darwin-arm64": "0.3.
|
|
203
|
-
"@dirsql/cli-win32-x64-msvc": "0.3.
|
|
194
|
+
"@dirsql/lib-linux-x64-gnu": "0.3.7",
|
|
195
|
+
"@dirsql/lib-linux-arm64-gnu": "0.3.7",
|
|
196
|
+
"@dirsql/lib-darwin-x64": "0.3.7",
|
|
197
|
+
"@dirsql/lib-darwin-arm64": "0.3.7",
|
|
198
|
+
"@dirsql/lib-win32-x64-msvc": "0.3.7",
|
|
199
|
+
"@dirsql/cli-linux-x64-gnu": "0.3.7",
|
|
200
|
+
"@dirsql/cli-linux-arm64-gnu": "0.3.7",
|
|
201
|
+
"@dirsql/cli-darwin-x64": "0.3.7",
|
|
202
|
+
"@dirsql/cli-darwin-arm64": "0.3.7",
|
|
203
|
+
"@dirsql/cli-win32-x64-msvc": "0.3.7"
|
|
204
204
|
}
|
|
205
205
|
}
|
package/docs/guide/cli.md
DELETED
|
@@ -1,131 +0,0 @@
|
|
|
1
|
-
---
|
|
2
|
-
canonical: https://thekevinscott.github.io/dirsql/guide/cli
|
|
3
|
-
---
|
|
4
|
-
|
|
5
|
-
# Command-Line Interface
|
|
6
|
-
|
|
7
|
-
> Online: <https://thekevinscott.github.io/dirsql/guide/cli>
|
|
8
|
-
|
|
9
|
-
`dirsql` starts an HTTP server that exposes identical SDK functionality.
|
|
10
|
-
|
|
11
|
-
## Installation
|
|
12
|
-
|
|
13
|
-
::: code-group
|
|
14
|
-
|
|
15
|
-
```bash [npm]
|
|
16
|
-
npx dirsql
|
|
17
|
-
```
|
|
18
|
-
|
|
19
|
-
```bash [PyPI]
|
|
20
|
-
uvx dirsql
|
|
21
|
-
```
|
|
22
|
-
|
|
23
|
-
```bash [Cargo]
|
|
24
|
-
# Installs the binary only (non-default feature)
|
|
25
|
-
cargo install dirsql --features cli
|
|
26
|
-
dirsql
|
|
27
|
-
```
|
|
28
|
-
|
|
29
|
-
:::
|
|
30
|
-
|
|
31
|
-
::: tip For Rust library consumers
|
|
32
|
-
The `cli` feature is **opt-in**. Adding `dirsql` as a library dependency (`cargo add dirsql`) pulls no CLI dependencies — only the core library. See the [Rust library README](https://github.com/thekevinscott/dirsql/tree/main/packages/rust) for details.
|
|
33
|
-
:::
|
|
34
|
-
|
|
35
|
-
## Subcommands
|
|
36
|
-
|
|
37
|
-
| Command | Purpose |
|
|
38
|
-
|---|---|
|
|
39
|
-
| `dirsql` (no subcommand) | Start the long-lived HTTP server (default behavior, see below). |
|
|
40
|
-
| `dirsql init` | Generate a starter `.dirsql.toml` from the contents of a directory. See [Generating a Config](./init.md). |
|
|
41
|
-
|
|
42
|
-
## Running the server
|
|
43
|
-
|
|
44
|
-
Run `dirsql` from the directory containing your files:
|
|
45
|
-
|
|
46
|
-
```bash
|
|
47
|
-
dirsql
|
|
48
|
-
|
|
49
|
-
$ Running at localhost:7117
|
|
50
|
-
```
|
|
51
|
-
|
|
52
|
-
### Flags
|
|
53
|
-
|
|
54
|
-
| Flag | Default | Description |
|
|
55
|
-
|---|---|---|
|
|
56
|
-
| `--config <path>` | `./.dirsql.toml` | Path to the config file. The index is rooted at the directory containing this file. |
|
|
57
|
-
| `--host <addr>` | `localhost` | Bind address |
|
|
58
|
-
| `--port <n>` | `7117` | TCP port to bind |
|
|
59
|
-
|
|
60
|
-
## HTTP API
|
|
61
|
-
|
|
62
|
-
### `POST /query`
|
|
63
|
-
|
|
64
|
-
Run a SQL query. Request body is JSON:
|
|
65
|
-
|
|
66
|
-
```json
|
|
67
|
-
{"sql": "SELECT title, author FROM posts WHERE draft = 0"}
|
|
68
|
-
```
|
|
69
|
-
|
|
70
|
-
Response is a JSON array of row objects:
|
|
71
|
-
|
|
72
|
-
```json
|
|
73
|
-
[
|
|
74
|
-
{"title": "Hello World", "author": "alice"},
|
|
75
|
-
{"title": "Second Post", "author": "bob"}
|
|
76
|
-
]
|
|
77
|
-
```
|
|
78
|
-
|
|
79
|
-
On error, the server returns a non-2xx status with a JSON body:
|
|
80
|
-
|
|
81
|
-
```json
|
|
82
|
-
{"error": "syntax error near \"SLECT\""}
|
|
83
|
-
```
|
|
84
|
-
|
|
85
|
-
Malformed SQL returns `400`, not `500` — the client sent bad input. Missing / unreadable config returns `503`.
|
|
86
|
-
|
|
87
|
-
```bash
|
|
88
|
-
curl -s http://localhost:7117/query \
|
|
89
|
-
-H 'content-type: application/json' \
|
|
90
|
-
-d '{"sql":"SELECT COUNT(*) AS n FROM posts"}' \
|
|
91
|
-
| jq
|
|
92
|
-
```
|
|
93
|
-
|
|
94
|
-
### `GET /events`
|
|
95
|
-
|
|
96
|
-
Opens a [Server-Sent Events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events) stream of change events. Each `data:` payload is the same JSON schema the SDK emits from [`db.watch()`](./watching.md#event-types):
|
|
97
|
-
|
|
98
|
-
```
|
|
99
|
-
event: row
|
|
100
|
-
data: {"action":"insert","table":"posts","file_path":"posts/hello.json","row":{"title":"Hello World","author":"alice"},"old_row":null}
|
|
101
|
-
|
|
102
|
-
event: row
|
|
103
|
-
data: {"action":"update","table":"posts","file_path":"posts/hello.json","row":{"title":"Hello, world","author":"alice"},"old_row":{"title":"Hello World","author":"alice"}}
|
|
104
|
-
|
|
105
|
-
event: row
|
|
106
|
-
data: {"action":"delete","table":"posts","file_path":"posts/second.json","row":{"title":"Second Post","author":"bob"},"old_row":null}
|
|
107
|
-
```
|
|
108
|
-
|
|
109
|
-
Errors during extraction appear as `{"action":"error",...}` events on the same stream. They do **not** terminate the stream — a malformed file is a per-event problem, not a server-wide one.
|
|
110
|
-
|
|
111
|
-
```bash
|
|
112
|
-
curl -N http://localhost:7117/events
|
|
113
|
-
```
|
|
114
|
-
|
|
115
|
-
## Piping event streams
|
|
116
|
-
|
|
117
|
-
The SSE stream is easy to tee into shell tools with `curl -N` plus `jq`:
|
|
118
|
-
|
|
119
|
-
```bash
|
|
120
|
-
# Log every delete to a file
|
|
121
|
-
curl -N http://localhost:7117/events \
|
|
122
|
-
| jq -cR 'fromjson? | select(.action=="delete")' \
|
|
123
|
-
>> deletes.log
|
|
124
|
-
|
|
125
|
-
# Alert on errors
|
|
126
|
-
curl -N http://localhost:7117/events \
|
|
127
|
-
| jq -c 'fromjson? | select(.action=="error")' \
|
|
128
|
-
| while read -r line; do notify-send "dirsql error" "$line"; done
|
|
129
|
-
```
|
|
130
|
-
|
|
131
|
-
(The `fromjson?` wrapping strips the `data:` framing; drop it if your SSE client is already parsing frames.)
|
package/docs/guide/config.md
DELETED
|
@@ -1,193 +0,0 @@
|
|
|
1
|
-
---
|
|
2
|
-
canonical: https://thekevinscott.github.io/dirsql/guide/config
|
|
3
|
-
---
|
|
4
|
-
|
|
5
|
-
# Configuration File
|
|
6
|
-
|
|
7
|
-
> Online: <https://thekevinscott.github.io/dirsql/guide/config>
|
|
8
|
-
|
|
9
|
-
`dirsql` can be configured with a `.dirsql.toml` file. Tables defined this
|
|
10
|
-
way produce **one row per matched file**. Each row's columns come from
|
|
11
|
-
filesystem facts:
|
|
12
|
-
|
|
13
|
-
- **Glob path captures** — named `{placeholder}` segments in the glob.
|
|
14
|
-
- **Stat virtuals** — reserved `_`-prefixed columns for path-derived and
|
|
15
|
-
stat-derived metadata.
|
|
16
|
-
|
|
17
|
-
Content interpretation (parsing JSON, CSV, frontmatter, etc.) is **not**
|
|
18
|
-
configured in `.dirsql.toml`. If you need columns derived from file
|
|
19
|
-
contents, register a programmatic [`Table`](./tables.md) whose `extract`
|
|
20
|
-
function does the parsing in your host language.
|
|
21
|
-
|
|
22
|
-
## Basic Example
|
|
23
|
-
|
|
24
|
-
```toml
|
|
25
|
-
[dirsql]
|
|
26
|
-
ignore = ["node_modules/**", ".git/**"]
|
|
27
|
-
|
|
28
|
-
[[table]]
|
|
29
|
-
ddl = "CREATE TABLE posts (_path TEXT, _basename TEXT, _size INTEGER, _mtime INTEGER)"
|
|
30
|
-
glob = "posts/*.md"
|
|
31
|
-
```
|
|
32
|
-
|
|
33
|
-
Each `posts/*.md` file produces one row. The DDL declares which stat
|
|
34
|
-
virtuals are surfaced as SQL columns.
|
|
35
|
-
|
|
36
|
-
## Loading a Config File
|
|
37
|
-
|
|
38
|
-
Pass the config file path to the `DirSQL` constructor:
|
|
39
|
-
|
|
40
|
-
::: code-group
|
|
41
|
-
|
|
42
|
-
```python [Python]
|
|
43
|
-
from dirsql import DirSQL
|
|
44
|
-
|
|
45
|
-
db = DirSQL(config="./my-project/.dirsql.toml")
|
|
46
|
-
await db.ready()
|
|
47
|
-
```
|
|
48
|
-
|
|
49
|
-
```rust [Rust]
|
|
50
|
-
use dirsql::DirSQL;
|
|
51
|
-
|
|
52
|
-
let db = DirSQL::builder()
|
|
53
|
-
.config("./my-project/.dirsql.toml")
|
|
54
|
-
.build()?;
|
|
55
|
-
```
|
|
56
|
-
|
|
57
|
-
```typescript [TypeScript]
|
|
58
|
-
import { DirSQL } from "dirsql";
|
|
59
|
-
|
|
60
|
-
// String argument is interpreted as a config file path.
|
|
61
|
-
const db = new DirSQL("./my-project/.dirsql.toml");
|
|
62
|
-
await db.ready;
|
|
63
|
-
```
|
|
64
|
-
|
|
65
|
-
:::
|
|
66
|
-
|
|
67
|
-
By default, the root directory scanned is the config file's parent
|
|
68
|
-
directory. Override it by passing `root` explicitly (the explicit value
|
|
69
|
-
wins and a warning is emitted) or by declaring `[dirsql].root` in the
|
|
70
|
-
config file itself.
|
|
71
|
-
|
|
72
|
-
## Root Directory
|
|
73
|
-
|
|
74
|
-
By default, the config file's parent directory is the scan root. To index
|
|
75
|
-
a different location, declare `[dirsql].root` (relative paths are resolved
|
|
76
|
-
relative to the config file's parent):
|
|
77
|
-
|
|
78
|
-
```toml
|
|
79
|
-
[dirsql]
|
|
80
|
-
root = "../data"
|
|
81
|
-
ignore = ["node_modules/**"]
|
|
82
|
-
```
|
|
83
|
-
|
|
84
|
-
## Stat Virtuals
|
|
85
|
-
|
|
86
|
-
Every config-defined table can expose any of these reserved columns. Add
|
|
87
|
-
the ones you want to your DDL; the rest are silently dropped.
|
|
88
|
-
|
|
89
|
-
| Column | Type | Source |
|
|
90
|
-
|--------|---------|--------|
|
|
91
|
-
| `_path` | TEXT | The file's path relative to the scan root. |
|
|
92
|
-
| `_basename` | TEXT | The filename including extension. |
|
|
93
|
-
| `_dir` | TEXT | The parent directory path (relative to root). |
|
|
94
|
-
| `_ext` | TEXT | The file extension, lowercased, no leading dot. |
|
|
95
|
-
| `_size` | INTEGER | Size in bytes. |
|
|
96
|
-
| `_mtime` | INTEGER | Last-modified time, unix seconds. |
|
|
97
|
-
| `_ctime` | INTEGER | Created/changed time, unix seconds. |
|
|
98
|
-
|
|
99
|
-
Example query:
|
|
100
|
-
|
|
101
|
-
```sql
|
|
102
|
-
SELECT _basename, _size
|
|
103
|
-
FROM posts
|
|
104
|
-
WHERE _mtime > strftime('%s', '2024-01-01')
|
|
105
|
-
ORDER BY _mtime DESC;
|
|
106
|
-
```
|
|
107
|
-
|
|
108
|
-
## Path Captures
|
|
109
|
-
|
|
110
|
-
Use `{name}` in glob patterns to extract path segments as columns. Add a
|
|
111
|
-
matching column name to the DDL and the capture is auto-populated:
|
|
112
|
-
|
|
113
|
-
```toml
|
|
114
|
-
[[table]]
|
|
115
|
-
ddl = "CREATE TABLE comments (thread_id TEXT, _basename TEXT, _mtime INTEGER)"
|
|
116
|
-
glob = "_comments/{thread_id}/*.jsonl"
|
|
117
|
-
```
|
|
118
|
-
|
|
119
|
-
A file at `_comments/abc123/2024-05-05.jsonl` produces a row with
|
|
120
|
-
`thread_id = "abc123"`, `_basename = "2024-05-05.jsonl"`, and `_mtime` set
|
|
121
|
-
to the file's modification time.
|
|
122
|
-
|
|
123
|
-
## Ignore Patterns
|
|
124
|
-
|
|
125
|
-
The `ignore` list skips files and directories entirely (not even scanned):
|
|
126
|
-
|
|
127
|
-
```toml
|
|
128
|
-
[dirsql]
|
|
129
|
-
ignore = ["node_modules/**", ".git/**", "*.pyc", "__pycache__/**"]
|
|
130
|
-
```
|
|
131
|
-
|
|
132
|
-
The top-level `.dirsql/` directory is always excluded, whether you list it
|
|
133
|
-
or not — it is a reserved namespace for `dirsql`'s own metadata (see
|
|
134
|
-
[Persistence](./persistence.md)).
|
|
135
|
-
|
|
136
|
-
## Persistence
|
|
137
|
-
|
|
138
|
-
Set `persist = true` to keep the SQLite database on disk between runs
|
|
139
|
-
instead of rebuilding from scratch on every startup:
|
|
140
|
-
|
|
141
|
-
```toml
|
|
142
|
-
[dirsql]
|
|
143
|
-
persist = true
|
|
144
|
-
# persist_path = ".dirsql/cache.db" # optional; this is the default
|
|
145
|
-
```
|
|
146
|
-
|
|
147
|
-
See [Persistence](./persistence.md) for the full reconcile algorithm,
|
|
148
|
-
storage layout, and limitations.
|
|
149
|
-
|
|
150
|
-
## Strict Mode
|
|
151
|
-
|
|
152
|
-
By default, auto-injected virtuals that aren't in the DDL are silently
|
|
153
|
-
dropped, and undeclared user-extract keys are dropped. Enable strict mode
|
|
154
|
-
to error when an extract emits keys not declared in the DDL:
|
|
155
|
-
|
|
156
|
-
```toml
|
|
157
|
-
[[table]]
|
|
158
|
-
ddl = "CREATE TABLE comments (thread_id TEXT)"
|
|
159
|
-
glob = "_comments/{thread_id}/*.jsonl"
|
|
160
|
-
strict = true
|
|
161
|
-
```
|
|
162
|
-
|
|
163
|
-
Strict mode does **not** apply to auto-injected stat virtuals — those are
|
|
164
|
-
always filtered to the DDL's declared columns regardless. Strict mode
|
|
165
|
-
applies only to keys produced by an extract callback (relevant for
|
|
166
|
-
programmatic [tables](./tables.md)).
|
|
167
|
-
|
|
168
|
-
## Full Example
|
|
169
|
-
|
|
170
|
-
```toml
|
|
171
|
-
[dirsql]
|
|
172
|
-
ignore = ["node_modules/**", ".git/**", "dist/**"]
|
|
173
|
-
|
|
174
|
-
[[table]]
|
|
175
|
-
ddl = "CREATE TABLE comments (thread_id TEXT, _basename TEXT, _mtime INTEGER)"
|
|
176
|
-
glob = "_comments/{thread_id}/*.jsonl"
|
|
177
|
-
|
|
178
|
-
[[table]]
|
|
179
|
-
ddl = "CREATE TABLE documents (_path TEXT, _basename TEXT, _size INTEGER)"
|
|
180
|
-
glob = "**/index.md"
|
|
181
|
-
|
|
182
|
-
[[table]]
|
|
183
|
-
ddl = "CREATE TABLE logs (_path TEXT, _size INTEGER, _mtime INTEGER)"
|
|
184
|
-
glob = "logs/*.csv"
|
|
185
|
-
```
|
|
186
|
-
|
|
187
|
-
## When you need parsed content
|
|
188
|
-
|
|
189
|
-
`.dirsql.toml` does not parse file contents. For columns derived from the
|
|
190
|
-
*inside* of files (frontmatter keys, JSON values, CSV cells, etc.),
|
|
191
|
-
register a programmatic [`Table`](./tables.md) instead, and parse the
|
|
192
|
-
bytes in your host language. Glob captures and stat virtuals are still
|
|
193
|
-
auto-injected into rows produced by your extract.
|
package/docs/guide/init.md
DELETED
|
@@ -1,84 +0,0 @@
|
|
|
1
|
-
---
|
|
2
|
-
canonical: https://thekevinscott.github.io/dirsql/guide/init
|
|
3
|
-
---
|
|
4
|
-
|
|
5
|
-
# Generating a config with `dirsql init`
|
|
6
|
-
|
|
7
|
-
> Online: <https://thekevinscott.github.io/dirsql/guide/init>
|
|
8
|
-
|
|
9
|
-
`dirsql init` generates a `.dirsql.toml` by running `claude` over the target directory.
|
|
10
|
-
|
|
11
|
-
The output is limited to filesystem-fact tables. For content-aware schemas, see [Defining Tables](./tables.md).
|
|
12
|
-
|
|
13
|
-
## Examples
|
|
14
|
-
|
|
15
|
-
### Mixed files
|
|
16
|
-
|
|
17
|
-
```
|
|
18
|
-
my-downloads/
|
|
19
|
-
├── archive.zip
|
|
20
|
-
├── invoice.pdf
|
|
21
|
-
├── notes.txt
|
|
22
|
-
└── photo.jpg
|
|
23
|
-
```
|
|
24
|
-
|
|
25
|
-
```toml
|
|
26
|
-
[[table]]
|
|
27
|
-
ddl = "CREATE TABLE files (_path TEXT, _ext TEXT, _size INTEGER)"
|
|
28
|
-
glob = "*"
|
|
29
|
-
```
|
|
30
|
-
|
|
31
|
-
### Path captures
|
|
32
|
-
|
|
33
|
-
```
|
|
34
|
-
photos/
|
|
35
|
-
├── 2024-01/
|
|
36
|
-
│ ├── beach.jpg
|
|
37
|
-
│ └── sunset.jpg
|
|
38
|
-
└── 2024-02/
|
|
39
|
-
├── snow.jpg
|
|
40
|
-
└── mountain.jpg
|
|
41
|
-
```
|
|
42
|
-
|
|
43
|
-
```toml
|
|
44
|
-
[[table]]
|
|
45
|
-
ddl = "CREATE TABLE photos (month TEXT, _basename TEXT, _mtime INTEGER)"
|
|
46
|
-
glob = "{month}/*.jpg"
|
|
47
|
-
```
|
|
48
|
-
|
|
49
|
-
### Multiple tables
|
|
50
|
-
|
|
51
|
-
```
|
|
52
|
-
my-blog/
|
|
53
|
-
├── posts/
|
|
54
|
-
│ ├── hello-world.md
|
|
55
|
-
│ └── second.md
|
|
56
|
-
└── _comments/
|
|
57
|
-
└── hello-world/
|
|
58
|
-
├── 2024-01-15.jsonl
|
|
59
|
-
└── 2024-02-03.jsonl
|
|
60
|
-
```
|
|
61
|
-
|
|
62
|
-
```toml
|
|
63
|
-
[[table]]
|
|
64
|
-
ddl = "CREATE TABLE posts (_basename TEXT, _mtime INTEGER, _size INTEGER)"
|
|
65
|
-
glob = "posts/*.md"
|
|
66
|
-
|
|
67
|
-
[[table]]
|
|
68
|
-
ddl = "CREATE TABLE comments (thread_id TEXT, _basename TEXT, _mtime INTEGER)"
|
|
69
|
-
glob = "_comments/{thread_id}/*.jsonl"
|
|
70
|
-
```
|
|
71
|
-
|
|
72
|
-
`init` will not overwrite an existing config without `--force`.
|
|
73
|
-
|
|
74
|
-
## Flags
|
|
75
|
-
|
|
76
|
-
| Flag | Default | Description |
|
|
77
|
-
|---|---|---|
|
|
78
|
-
| `--root <path>` | cwd | Directory to scan |
|
|
79
|
-
| `--output <path>` | `<root>/.dirsql.toml` | Output path |
|
|
80
|
-
| `--force` | off | Overwrite if the output exists |
|
|
81
|
-
|
|
82
|
-
## Authentication
|
|
83
|
-
|
|
84
|
-
Requires `claude` on `PATH` and signed in. There is no separate API key. If `claude` is missing, `dirsql init` raises an exception.
|