dirsql 0.2.9 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -31,6 +31,17 @@ export interface DirSQLOptions {
31
31
  * relative to the config file's parent directory.
32
32
  */
33
33
  config?: string;
34
+ /**
35
+ * Enable persistent on-disk SQLite cache. When `true`, the database is
36
+ * written to `<root>/.dirsql/cache.db` (override via `persistPath`) so
37
+ * subsequent startups only re-parse files that have actually changed.
38
+ */
39
+ persist?: boolean;
40
+ /**
41
+ * Override the location of the persistent cache file. Ignored when
42
+ * `persist` is not `true`.
43
+ */
44
+ persistPath?: string;
34
45
  }
35
46
  /** A row-level event emitted by the file watcher. */
36
47
  export interface RowEvent {
@@ -52,7 +63,7 @@ interface NativeDirSQL {
52
63
  pollEvents(timeoutMs: number): Promise<RowEvent[]>;
53
64
  }
54
65
  interface NativeDirSQLConstructor {
55
- openAsync(root: string | null, tables: TableDef[] | null, ignore: string[] | null, config: string | null): Promise<NativeDirSQL>;
66
+ openAsync(root: string | null, tables: TableDef[] | null, ignore: string[] | null, config: string | null, persist: boolean | null, persistPath: string | null): Promise<NativeDirSQL>;
56
67
  }
57
68
  interface CoreModule {
58
69
  DirSQL: NativeDirSQLConstructor;
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../ts/index.ts"],"names":[],"mappings":"AAcA,iEAAiE;AACjE,MAAM,WAAW,QAAQ;IACvB,6EAA6E;IAC7E,GAAG,EAAE,MAAM,CAAC;IACZ,+EAA+E;IAC/E,IAAI,EAAE,MAAM,CAAC;IACb,4EAA4E;IAC5E,OAAO,EAAE,CAAC,QAAQ,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,KAAK,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EAAE,CAAC;IAC1E,+DAA+D;IAC/D,MAAM,CAAC,EAAE,OAAO,CAAC;CAClB;AAED;;;;;;GAMG;AACH,MAAM,WAAW,aAAa;IAC5B,8BAA8B;IAC9B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,8EAA8E;IAC9E,MAAM,CAAC,EAAE,QAAQ,EAAE,CAAC;IACpB,oDAAoD;IACpD,MAAM,CAAC,EAAE,MAAM,EAAE,CAAC;IAClB;;;;;;OAMG;IACH,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,qDAAqD;AACrD,MAAM,WAAW,QAAQ;IACvB;;;;OAIG;IACH,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC;IACrB,MAAM,EAAE,QAAQ,GAAG,QAAQ,GAAG,QAAQ,GAAG,OAAO,CAAC;IACjD,GAAG,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,CAAC;IACrC,MAAM,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,CAAC;IACxC,KAAK,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACtB,QAAQ,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;CAC1B;AAGD,UAAU,YAAY;IACpB,KAAK,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EAAE,CAAC,CAAC;IACvD,YAAY,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;IAC9B,UAAU,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC,CAAC;CACpD;AAED,UAAU,uBAAuB;IAC/B,SAAS,CACP,IAAI,EAAE,MAAM,GAAG,IAAI,EACnB,MAAM,EAAE,QAAQ,EAAE,GAAG,IAAI,EACzB,MAAM,EAAE,MAAM,EAAE,GAAG,IAAI,EACvB,MAAM,EAAE,MAAM,GAAG,IAAI,GACpB,OAAO,CAAC,YAAY,CAAC,CAAC;CAC1B;AAID,UAAU,UAAU;IAClB,MAAM,EAAE,uBAAuB,CAAC;CACjC;AAaD;;;;;;;GAOG;AACH,wBAAgB,mBAAmB,CAAC,IAAI,EAAE,UAAU,GAAG,IAAI,GAAG,IAAI,CAEjE;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AACH,qBAAa,MAAM;IACjB;;;;;;OAMG;IACH,QAAQ,CAAC,KAAK,EAAE,OAAO,CAAC,IAAI,CAAC,CAAC;IAG9B,OAAO,CAAC,MAAM,CAAgB;IAE9B,wDAAwD;gBAC5C,UAAU,EAAE,MAAM;IAC9B,yCAAyC;gBAC7B,OAAO,EAAE,aAAa;IAgBlC;;;;;;OAMG;IACG,KAAK,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EAAE,CAAC;IAK5D;;;;;;;OAOG;IACG,YAAY,IAAI,OAAO,CAAC,IAAI,CAAC;IAKnC;;;;;;;OAOG;IACG,UAAU,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC;IAKxD;;;;;;;;;;OAUG;IACI,KAAK,IAAI,cAAc,CAAC,QAAQ,EAAE,IAAI,EAAE,OAAO,CAAC;CAaxD"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../ts/index.ts"],"names":[],"mappings":"AAcA,iEAAiE;AACjE,MAAM,WAAW,QAAQ;IACvB,6EAA6E;IAC7E,GAAG,EAAE,MAAM,CAAC;IACZ,+EAA+E;IAC/E,IAAI,EAAE,MAAM,CAAC;IACb,4EAA4E;IAC5E,OAAO,EAAE,CAAC,QAAQ,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,KAAK,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EAAE,CAAC;IAC1E,+DAA+D;IAC/D,MAAM,CAAC,EAAE,OAAO,CAAC;CAClB;AAED;;;;;;GAMG;AACH,MAAM,WAAW,aAAa;IAC5B,8BAA8B;IAC9B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,8EAA8E;IAC9E,MAAM,CAAC,EAAE,QAAQ,EAAE,CAAC;IACpB,oDAAoD;IACpD,MAAM,CAAC,EAAE,MAAM,EAAE,CAAC;IAClB;;;;;;OAMG;IACH,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB;;;;OAIG;IACH,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB;;;OAGG;IACH,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED,qDAAqD;AACrD,MAAM,WAAW,QAAQ;IACvB;;;;OAIG;IACH,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC;IACrB,MAAM,EAAE,QAAQ,GAAG,QAAQ,GAAG,QAAQ,GAAG,OAAO,CAAC;IACjD,GAAG,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,CAAC;IACrC,MAAM,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,CAAC;IACxC,KAAK,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACtB,QAAQ,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;CAC1B;AAGD,UAAU,YAAY;IACpB,KAAK,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EAAE,CAAC,CAAC;IACvD,YAAY,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;IAC9B,UAAU,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC,CAAC;CACpD;AAED,UAAU,uBAAuB;IAC/B,SAAS,CACP,IAAI,EAAE,MAAM,GAAG,IAAI,EACnB,MAAM,EAAE,QAAQ,EAAE,GAAG,IAAI,EACzB,MAAM,EAAE,MAAM,EAAE,GAAG,IAAI,EACvB,MAAM,EAAE,MAAM,GAAG,IAAI,EACrB,OAAO,EAAE,OAAO,GAAG,IAAI,EACvB,WAAW,EAAE,MAAM,GAAG,IAAI,GACzB,OAAO,CAAC,YAAY,CAAC,CAAC;CAC1B;AAID,UAAU,UAAU;IAClB,MAAM,EAAE,uBAAuB,CAAC;CACjC;AAaD;;;;;;;GAOG;AACH,wBAAgB,mBAAmB,CAAC,IAAI,EAAE,UAAU,GAAG,IAAI,GAAG,IAAI,CAEjE;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AACH,qBAAa,MAAM;IACjB;;;;;;OAMG;IACH,QAAQ,CAAC,KAAK,EAAE,OAAO,CAAC,IAAI,CAAC,CAAC;IAG9B,OAAO,CAAC,MAAM,CAAgB;IAE9B,wDAAwD;gBAC5C,UAAU,EAAE,MAAM;IAC9B,yCAAyC;gBAC7B,OAAO,EAAE,aAAa;IAkBlC;;;;;;OAMG;IACG,KAAK,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EAAE,CAAC;IAK5D;;;;;;;OAOG;IACG,YAAY,IAAI,OAAO,CAAC,IAAI,CAAC;IAKnC;;;;;;;OAOG;IACG,UAAU,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC;IAKxD;;;;;;;;;;OAUG;IACI,KAAK,IAAI,cAAc,CAAC,QAAQ,EAAE,IAAI,EAAE,OAAO,CAAC;CAaxD"}
package/dist/index.js CHANGED
@@ -73,7 +73,7 @@ export class DirSQL {
73
73
  constructor(arg) {
74
74
  const options = typeof arg === "string" ? { config: arg } : arg;
75
75
  const Ctor = getCore().DirSQL;
76
- const openPromise = Ctor.openAsync(options.root ?? null, options.tables ?? null, options.ignore ?? null, options.config ?? null);
76
+ const openPromise = Ctor.openAsync(options.root ?? null, options.tables ?? null, options.ignore ?? null, options.config ?? null, options.persist ?? null, options.persistPath ?? null);
77
77
  this.ready = openPromise.then((inner) => {
78
78
  this._inner = inner;
79
79
  });
package/dist/index.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../ts/index.ts"],"names":[],"mappings":"AAAA,yBAAyB;AACzB,EAAE;AACF,wEAAwE;AACxE,kEAAkE;AAClE,qEAAqE;AACrE,qCAAqC;AACrC,EAAE;AACF,qEAAqE;AACrE,uEAAuE;AACvE,uEAAuE;AACvE,mEAAmE;AAEnE,OAAO,EAAE,cAAc,IAAI,qBAAqB,EAAE,MAAM,qBAAqB,CAAC;AA2E9E,yEAAyE;AACzE,sEAAsE;AACtE,IAAI,IAAI,GAAsB,IAAI,CAAC;AAEnC,SAAS,OAAO;IACd,IAAI,IAAI,KAAK,IAAI,EAAE,CAAC;QAClB,IAAI,GAAG,qBAAqB,EAAgB,CAAC;IAC/C,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,mBAAmB,CAAC,IAAuB;IACzD,IAAI,GAAG,IAAI,CAAC;AACd,CAAC;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AACH,MAAM,OAAO,MAAM;IACjB;;;;;;OAMG;IACM,KAAK,CAAgB;IAE9B,gEAAgE;IACxD,MAAM,CAAgB;IAM9B,YAAY,GAA2B;QACrC,MAAM,OAAO,GACX,OAAO,GAAG,KAAK,QAAQ,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,GAAG,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC;QAClD,MAAM,IAAI,GAAG,OAAO,EAAE,CAAC,MAAM,CAAC;QAC9B,MAAM,WAAW,GAAG,IAAI,CAAC,SAAS,CAChC,OAAO,CAAC,IAAI,IAAI,IAAI,EACpB,OAAO,CAAC,MAAM,IAAI,IAAI,EACtB,OAAO,CAAC,MAAM,IAAI,IAAI,EACtB,OAAO,CAAC,MAAM,IAAI,IAAI,CACvB,CAAC;QACF,IAAI,CAAC,KAAK,GAAG,WAAW,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,EAAE;YACtC,IAAI,CAAC,MAAM,GAAG,KAAK,CAAC;QACtB,CAAC,CAAC,CAAC;IACL,CAAC;IAED;;;;;;OAMG;IACH,KAAK,CAAC,KAAK,CAAC,GAAW;QACrB,MAAM,IAAI,CAAC,KAAK,CAAC;QACjB,OAAO,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IAChC,CAAC;IAED;;;;;;;OAOG;IACH,KAAK,CAAC,YAAY;QAChB,MAAM,IAAI,CAAC,KAAK,CAAC;QACjB,OAAO,IAAI,CAAC,MAAM,CAAC,YAAY,EAAE,CAAC;IACpC,CAAC;IAED;;;;;;;OAOG;IACH,KAAK,CAAC,UAAU,CAAC,SAAiB;QAChC,MAAM,IAAI,CAAC,KAAK,CAAC;QACjB,OAAO,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC;IAC3C,CAAC;IAED;;;;;;;;;;OAUG;IACH,KAAK,CAAC,CAAC,KAAK;QACV,MAAM,IAAI,CAAC,KAAK,CAAC;QACjB,MAAM,IAAI,CAAC,MAAM,CAAC,YAAY,EAAE,CAAC;QACjC,OAAO,IAAI,EAAE,CAAC;YACZ,qEAAqE;YACrE,gEAAgE;YAChE,sEAAsE;YACtE,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC;YACjD,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;gBAC3B,MAAM,KAAK,CAAC;YACd,CAAC;QACH,CAAC;IACH,CAAC;CACF"}
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../ts/index.ts"],"names":[],"mappings":"AAAA,yBAAyB;AACzB,EAAE;AACF,wEAAwE;AACxE,kEAAkE;AAClE,qEAAqE;AACrE,qCAAqC;AACrC,EAAE;AACF,qEAAqE;AACrE,uEAAuE;AACvE,uEAAuE;AACvE,mEAAmE;AAEnE,OAAO,EAAE,cAAc,IAAI,qBAAqB,EAAE,MAAM,qBAAqB,CAAC;AAwF9E,yEAAyE;AACzE,sEAAsE;AACtE,IAAI,IAAI,GAAsB,IAAI,CAAC;AAEnC,SAAS,OAAO;IACd,IAAI,IAAI,KAAK,IAAI,EAAE,CAAC;QAClB,IAAI,GAAG,qBAAqB,EAAgB,CAAC;IAC/C,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,mBAAmB,CAAC,IAAuB;IACzD,IAAI,GAAG,IAAI,CAAC;AACd,CAAC;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AACH,MAAM,OAAO,MAAM;IACjB;;;;;;OAMG;IACM,KAAK,CAAgB;IAE9B,gEAAgE;IACxD,MAAM,CAAgB;IAM9B,YAAY,GAA2B;QACrC,MAAM,OAAO,GACX,OAAO,GAAG,KAAK,QAAQ,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,GAAG,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC;QAClD,MAAM,IAAI,GAAG,OAAO,EAAE,CAAC,MAAM,CAAC;QAC9B,MAAM,WAAW,GAAG,IAAI,CAAC,SAAS,CAChC,OAAO,CAAC,IAAI,IAAI,IAAI,EACpB,OAAO,CAAC,MAAM,IAAI,IAAI,EACtB,OAAO,CAAC,MAAM,IAAI,IAAI,EACtB,OAAO,CAAC,MAAM,IAAI,IAAI,EACtB,OAAO,CAAC,OAAO,IAAI,IAAI,EACvB,OAAO,CAAC,WAAW,IAAI,IAAI,CAC5B,CAAC;QACF,IAAI,CAAC,KAAK,GAAG,WAAW,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,EAAE;YACtC,IAAI,CAAC,MAAM,GAAG,KAAK,CAAC;QACtB,CAAC,CAAC,CAAC;IACL,CAAC;IAED;;;;;;OAMG;IACH,KAAK,CAAC,KAAK,CAAC,GAAW;QACrB,MAAM,IAAI,CAAC,KAAK,CAAC;QACjB,OAAO,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IAChC,CAAC;IAED;;;;;;;OAOG;IACH,KAAK,CAAC,YAAY;QAChB,MAAM,IAAI,CAAC,KAAK,CAAC;QACjB,OAAO,IAAI,CAAC,MAAM,CAAC,YAAY,EAAE,CAAC;IACpC,CAAC;IAED;;;;;;;OAOG;IACH,KAAK,CAAC,UAAU,CAAC,SAAiB;QAChC,MAAM,IAAI,CAAC,KAAK,CAAC;QACjB,OAAO,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC;IAC3C,CAAC;IAED;;;;;;;;;;OAUG;IACH,KAAK,CAAC,CAAC,KAAK;QACV,MAAM,IAAI,CAAC,KAAK,CAAC;QACjB,MAAM,IAAI,CAAC,MAAM,CAAC,YAAY,EAAE,CAAC;QACjC,OAAO,IAAI,EAAE,CAAC;YACZ,qEAAqE;YACrE,gEAAgE;YAChE,sEAAsE;YACtE,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC;YACjD,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;gBAC3B,MAAM,KAAK,CAAC;YACd,CAAC;QACH,CAAC;IACH,CAAC;CACF"}
@@ -6,7 +6,18 @@ canonical: https://thekevinscott.github.io/dirsql/guide/config
6
6
 
7
7
  > Online: <https://thekevinscott.github.io/dirsql/guide/config>
8
8
 
9
- `dirsql` can be configured with a `.dirsql.toml` file, allowing you to define tables declaratively without writing code.
9
+ `dirsql` can be configured with a `.dirsql.toml` file. Tables defined this
10
+ way produce **one row per matched file**. Each row's columns come from
11
+ filesystem facts:
12
+
13
+ - **Glob path captures** — named `{placeholder}` segments in the glob.
14
+ - **Stat virtuals** — reserved `_`-prefixed columns for path-derived and
15
+ stat-derived metadata.
16
+
17
+ Content interpretation (parsing JSON, CSV, frontmatter, etc.) is **not**
18
+ configured in `.dirsql.toml`. If you need columns derived from file
19
+ contents, register a programmatic [`Table`](./tables.md) whose `extract`
20
+ function does the parsing in your host language.
10
21
 
11
22
  ## Basic Example
12
23
 
@@ -15,11 +26,12 @@ canonical: https://thekevinscott.github.io/dirsql/guide/config
15
26
  ignore = ["node_modules/**", ".git/**"]
16
27
 
17
28
  [[table]]
18
- ddl = "CREATE TABLE posts (title TEXT, author TEXT)"
19
- glob = "posts/*.json"
29
+ ddl = "CREATE TABLE posts (_path TEXT, _basename TEXT, _size INTEGER, _mtime INTEGER)"
30
+ glob = "posts/*.md"
20
31
  ```
21
32
 
22
- The `format` is inferred from the glob extension (`.json` -> JSON, `.jsonl` -> JSONL, `.csv` -> CSV, etc.). Each JSON key maps to a column with the same name.
33
+ Each `posts/*.md` file produces one row. The DDL declares which stat
34
+ virtuals are surfaced as SQL columns.
23
35
 
24
36
  ## Loading a Config File
25
37
 
@@ -52,11 +64,16 @@ await db.ready;
52
64
 
53
65
  :::
54
66
 
55
- By default, the root directory scanned is the config file's parent directory. Override it by passing `root` explicitly (the explicit value wins and a warning is emitted) or by declaring `[dirsql].root` in the config file itself.
67
+ By default, the root directory scanned is the config file's parent
68
+ directory. Override it by passing `root` explicitly (the explicit value
69
+ wins and a warning is emitted) or by declaring `[dirsql].root` in the
70
+ config file itself.
56
71
 
57
72
  ## Root Directory
58
73
 
59
- By default, the config file's parent directory is the scan root. To index a different location, declare `[dirsql].root` (relative paths are resolved relative to the config file's parent):
74
+ By default, the config file's parent directory is the scan root. To index
75
+ a different location, declare `[dirsql].root` (relative paths are resolved
76
+ relative to the config file's parent):
60
77
 
61
78
  ```toml
62
79
  [dirsql]
@@ -64,122 +81,90 @@ root = "../data"
64
81
  ignore = ["node_modules/**"]
65
82
  ```
66
83
 
67
- ## Supported Formats
84
+ ## Stat Virtuals
68
85
 
69
- | Extension | Format | Rows |
70
- |---|---|---|
71
- | `.json` | JSON | Object = 1 row, Array = many rows |
72
- | `.jsonl`, `.ndjson` | JSONL | One row per line |
73
- | `.csv` | CSV | One row per data line (header = columns) |
74
- | `.tsv` | TSV | One row per data line (tab-separated) |
75
- | `.toml` | TOML | One row per file |
76
- | `.yaml`, `.yml` | YAML | Mapping = 1 row, Sequence = many rows |
77
- | `.md` | Frontmatter | YAML frontmatter + body column |
86
+ Every config-defined table can expose any of these reserved columns. Add
87
+ the ones you want to your DDL; the rest are silently dropped.
78
88
 
79
- ## Path Captures
89
+ | Column | Type | Source |
90
+ |--------|---------|--------|
91
+ | `_path` | TEXT | The file's path relative to the scan root. |
92
+ | `_basename` | TEXT | The filename including extension. |
93
+ | `_dir` | TEXT | The parent directory path (relative to root). |
94
+ | `_ext` | TEXT | The file extension, lowercased, no leading dot. |
95
+ | `_size` | INTEGER | Size in bytes. |
96
+ | `_mtime` | INTEGER | Last-modified time, unix seconds. |
97
+ | `_ctime` | INTEGER | Created/changed time, unix seconds. |
80
98
 
81
- Use `{name}` in glob patterns to extract path segments as columns:
99
+ Example query:
82
100
 
83
- ```toml
84
- [[table]]
85
- ddl = "CREATE TABLE comments (thread_id TEXT, body TEXT, author TEXT)"
86
- glob = "_comments/{thread_id}/index.jsonl"
101
+ ```sql
102
+ SELECT _basename, _size
103
+ FROM posts
104
+ WHERE _mtime > strftime('%s', '2024-01-01')
105
+ ORDER BY _mtime DESC;
87
106
  ```
88
107
 
89
- The directory name (e.g., `abc123`) becomes the `thread_id` column value for every row in that file.
90
-
91
- ## Nested Data
92
-
93
- Use `each` to navigate into nested JSON structures:
94
-
95
- ```toml
96
- [[table]]
97
- ddl = "CREATE TABLE items (name TEXT, price REAL)"
98
- glob = "catalog/*.json"
99
- each = "data.items"
100
- ```
101
-
102
- This extracts rows from `{"data": {"items": [...]}}`.
103
-
104
- ## Column Mapping
108
+ ## Path Captures
105
109
 
106
- Use `columns` to map SQL column names to nested fields or path captures:
110
+ Use `{name}` in glob patterns to extract path segments as columns. Add a
111
+ matching column name to the DDL and the capture is auto-populated:
107
112
 
108
113
  ```toml
109
114
  [[table]]
110
- ddl = "CREATE TABLE posts (display_name TEXT, body TEXT)"
111
- glob = "posts/*.json"
112
-
113
- [table.columns]
114
- display_name = "metadata.author.name"
115
- body = "body"
115
+ ddl = "CREATE TABLE comments (thread_id TEXT, _basename TEXT, _mtime INTEGER)"
116
+ glob = "_comments/{thread_id}/*.jsonl"
116
117
  ```
117
118
 
118
- ::: warning `[table.columns]` is a complete projection, not a partial rename
119
- When a `[table.columns]` section is present, `dirsql` switches to fully
120
- declarative projection: **only the columns listed in the mapping are
121
- populated**. Any column in the DDL that is not mentioned in the mapping
122
- is set to `NULL` for every row — the original key from the file is not
123
- auto-copied.
119
+ A file at `_comments/abc123/2024-05-05.jsonl` produces a row with
120
+ `thread_id = "abc123"`, `_basename = "2024-05-05.jsonl"`, and `_mtime` set
121
+ to the file's modification time.
124
122
 
125
- This is intentional: `[table.columns]` means "here is exactly where
126
- every column comes from", not "rename these specific keys".
123
+ ## Ignore Patterns
127
124
 
128
- **Trap to avoid.** A config like this:
125
+ The `ignore` list skips files and directories entirely (not even scanned):
129
126
 
130
127
  ```toml
131
- [[table]]
132
- ddl = "CREATE TABLE comments (id TEXT, body TEXT, display_name TEXT)"
133
- glob = "*.json"
134
-
135
- [table.columns]
136
- display_name = "author" # intended: "just rename author -> display_name"
137
- ```
138
-
139
- against a file `one.json`:
140
-
141
- ```json
142
- {"id": "a1", "body": "hello", "author": "Alice"}
143
- ```
144
-
145
- produces:
146
-
147
- ```json
148
- [{"id": null, "body": null, "display_name": "Alice"}]
128
+ [dirsql]
129
+ ignore = ["node_modules/**", ".git/**", "*.pyc", "__pycache__/**"]
149
130
  ```
150
131
 
151
- `id` and `body` are `NULL` because they are not listed in
152
- `[table.columns]`. To keep them populated, add them to the mapping
153
- explicitly:
154
-
155
- ```toml
156
- [table.columns]
157
- id = "id"
158
- body = "body"
159
- display_name = "author"
160
- ```
161
- :::
132
+ The top-level `.dirsql/` directory is always excluded, whether you list it
133
+ or not it is a reserved namespace for `dirsql`'s own metadata (see
134
+ [Persistence](./persistence.md)).
162
135
 
163
- ## Ignore Patterns
136
+ ## Persistence
164
137
 
165
- The `ignore` list skips files and directories entirely (not even scanned):
138
+ Set `persist = true` to keep the SQLite database on disk between runs
139
+ instead of rebuilding from scratch on every startup:
166
140
 
167
141
  ```toml
168
142
  [dirsql]
169
- ignore = ["node_modules/**", ".git/**", "*.pyc", "__pycache__/**"]
143
+ persist = true
144
+ # persist_path = ".dirsql/cache.db" # optional; this is the default
170
145
  ```
171
146
 
147
+ See [Persistence](./persistence.md) for the full reconcile algorithm,
148
+ storage layout, and limitations.
149
+
172
150
  ## Strict Mode
173
151
 
174
- By default, extra keys in file content are ignored and missing keys become NULL. Enable strict mode to error on mismatches:
152
+ By default, auto-injected virtuals that aren't in the DDL are silently
153
+ dropped, and undeclared user-extract keys are dropped. Enable strict mode
154
+ to error when an extract emits keys not declared in the DDL:
175
155
 
176
156
  ```toml
177
157
  [[table]]
178
- ddl = "CREATE TABLE posts (title TEXT, author TEXT)"
179
- glob = "posts/*.json"
158
+ ddl = "CREATE TABLE comments (thread_id TEXT)"
159
+ glob = "_comments/{thread_id}/*.jsonl"
180
160
  strict = true
181
161
  ```
182
162
 
163
+ Strict mode does **not** apply to auto-injected stat virtuals — those are
164
+ always filtered to the DDL's declared columns regardless. Strict mode
165
+ applies only to keys produced by an extract callback (relevant for
166
+ programmatic [tables](./tables.md)).
167
+
183
168
  ## Full Example
184
169
 
185
170
  ```toml
@@ -187,19 +172,22 @@ strict = true
187
172
  ignore = ["node_modules/**", ".git/**", "dist/**"]
188
173
 
189
174
  [[table]]
190
- ddl = "CREATE TABLE comments (thread_id TEXT, body TEXT, author TEXT, resolved INTEGER)"
191
- glob = "_comments/{thread_id}/index.jsonl"
175
+ ddl = "CREATE TABLE comments (thread_id TEXT, _basename TEXT, _mtime INTEGER)"
176
+ glob = "_comments/{thread_id}/*.jsonl"
192
177
 
193
178
  [[table]]
194
- ddl = "CREATE TABLE documents (title TEXT, draft INTEGER, body TEXT)"
179
+ ddl = "CREATE TABLE documents (_path TEXT, _basename TEXT, _size INTEGER)"
195
180
  glob = "**/index.md"
196
181
 
197
182
  [[table]]
198
- ddl = "CREATE TABLE metrics (date TEXT, requests INTEGER, errors INTEGER)"
183
+ ddl = "CREATE TABLE logs (_path TEXT, _size INTEGER, _mtime INTEGER)"
199
184
  glob = "logs/*.csv"
200
-
201
- [[table]]
202
- ddl = "CREATE TABLE config (key TEXT, value TEXT)"
203
- glob = "config/*.toml"
204
- strict = true
205
185
  ```
186
+
187
+ ## When you need parsed content
188
+
189
+ `.dirsql.toml` does not parse file contents. For columns derived from the
190
+ *inside* of files (frontmatter keys, JSON values, CSV cells, etc.),
191
+ register a programmatic [`Table`](./tables.md) instead, and parse the
192
+ bytes in your host language. Glob captures and stat virtuals are still
193
+ auto-injected into rows produced by your extract.
@@ -0,0 +1,177 @@
1
+ # Persistence
2
+
3
+ By default `dirsql` keeps its SQLite database in memory and rebuilds it from scratch every time the process starts. For large directories this can take seconds to minutes -- nearly all of which is spent re-parsing files that haven't changed since the previous run.
4
+
5
+ Persistence stores the SQLite database on disk so that subsequent startups only re-parse the files that have actually changed.
6
+
7
+ ::: tip Same answers, faster startup
8
+ The rows returned by `query()` after a persistent startup are equivalent to those produced by a from-scratch rebuild. Persistence is a startup-time optimization, not a correctness compromise. The reconcile algorithm is the same one `git status` uses to decide which files have changed since the last index write.
9
+ :::
10
+
11
+ ## Quick start
12
+
13
+ ::: code-group
14
+
15
+ ```toml [.dirsql.toml]
16
+ [dirsql]
17
+ persist = true
18
+ ```
19
+
20
+ ```python [Python]
21
+ from dirsql import DirSQL
22
+
23
+ db = DirSQL("./my-project", tables=[...], persist=True)
24
+ await db.ready()
25
+ ```
26
+
27
+ ```rust [Rust]
28
+ use dirsql::DirSQL;
29
+
30
+ let db = DirSQL::builder()
31
+ .root("./my-project")
32
+ .tables(vec![/* ... */])
33
+ .persist(true)
34
+ .build()?;
35
+ ```
36
+
37
+ ```typescript [TypeScript]
38
+ import { DirSQL } from "dirsql";
39
+
40
+ const db = new DirSQL({ root: "./my-project", tables: [/* ... */], persist: true });
41
+ await db.ready;
42
+ ```
43
+
44
+ :::
45
+
46
+ That's it. The first run writes the database to `./my-project/.dirsql/cache.db`. Every subsequent startup uses the cache.
47
+
48
+ ## Configuration
49
+
50
+ | Option | Type | Default | Meaning |
51
+ |---|---|---|---|
52
+ | `persist` | boolean | `false` | Enable persistent on-disk storage. |
53
+ | `persist_path` (Python, Rust) / `persistPath` (TypeScript) | string | `<root>/.dirsql/cache.db` | Override the database file path. Ignored when `persist` is `false`. |
54
+
55
+ The default location keeps the cache alongside the data it indexes, which means it follows the project around (clone, copy, move) without extra setup. Override `persist_path` if you want the cache somewhere else -- a CI cache directory, a tmpfs mount, an XDG cache dir, etc.
56
+
57
+ ::: code-group
58
+
59
+ ```toml [.dirsql.toml]
60
+ [dirsql]
61
+ persist = true
62
+ persist_path = "/var/cache/dirsql/myproject.db"
63
+ ```
64
+
65
+ ```python [Python]
66
+ db = DirSQL(
67
+ "./my-project",
68
+ tables=[...],
69
+ persist=True,
70
+ persist_path="/var/cache/dirsql/myproject.db",
71
+ )
72
+ ```
73
+
74
+ ```rust [Rust]
75
+ let db = DirSQL::builder()
76
+ .root("./my-project")
77
+ .tables(vec![/* ... */])
78
+ .persist(true)
79
+ .persist_path("/var/cache/dirsql/myproject.db")
80
+ .build()?;
81
+ ```
82
+
83
+ ```typescript [TypeScript]
84
+ const db = new DirSQL({
85
+ root: "./my-project",
86
+ tables: [/* ... */],
87
+ persist: true,
88
+ persistPath: "/var/cache/dirsql/myproject.db",
89
+ });
90
+ ```
91
+
92
+ :::
93
+
94
+ ## The `.dirsql/` directory
95
+
96
+ `dirsql` reserves the top-level `.dirsql/` directory inside every scanned root. It is **unconditionally excluded from the directory walk**, whether persistence is enabled or not. This means:
97
+
98
+ - The default cache path `<root>/.dirsql/cache.db` cannot accidentally be ingested as a data file.
99
+ - You can place additional `dirsql`-related files in `.dirsql/` (e.g. a project-local config snapshot) without them being parsed.
100
+ - You should not put your own data files in `.dirsql/` -- they will be silently ignored.
101
+
102
+ If you persist into `.dirsql/`, add it to your `.gitignore`:
103
+
104
+ ```
105
+ .dirsql/
106
+ ```
107
+
108
+ The cache file should never be committed -- it is reproducible from the source tree and frequently large.
109
+
110
+ ## How the startup reconcile works
111
+
112
+ When a persistent cache exists, `dirsql` does not blindly trust it. On startup it:
113
+
114
+ 1. **Checks compatibility metadata.** If the cached `dirsql` version, schema version, glob configuration, parser versions, or canonical root path differs from the current build, the cache is wiped and rebuilt from scratch.
115
+ 2. **Walks the tree and stats every matching file.** This is metadata-only -- no file contents are read.
116
+ 3. **For each file, compares the live `(size, mtime, ctime, inode, dev)` tuple against the cached row:**
117
+ - **Trust the cache** when every field matches *and* the file's mtime is older than the cache's snapshot time (outside the racy window).
118
+ - **Hash-confirm** when the tuple matches but the file's mtime falls inside the racy window. `dirsql` reads and hashes the file; if the hash matches the cached hash, the cache is trusted.
119
+ - **Re-parse** when any field of the tuple differs.
120
+ 4. **Deletes** rows for files that were in the cache but are no longer on disk.
121
+ 5. **Inserts** rows for files that are on disk but were not in the cache.
122
+
123
+ This is the same algorithm `git status` uses to decide which files have changed since the last index write. The "racy window" handling is what closes the gap when a file is modified within the same filesystem-timestamp resolution as the cache write.
124
+
125
+ ## When `dirsql` does a full rebuild
126
+
127
+ Any of the following will cause the cache to be discarded and rebuilt from scratch on the next startup:
128
+
129
+ - The `dirsql` library was upgraded between runs.
130
+ - The glob configuration changed (a new table, a removed table, a modified glob, a changed `ignore` list).
131
+ - A built-in parser version changed (this generally only happens on `dirsql` upgrades).
132
+ - The cache was written for a different root directory than the one currently configured.
133
+ - The internal schema of the cache changed (i.e. you upgraded `dirsql` across a schema version bump).
134
+
135
+ Full rebuilds take exactly as long as a non-persistent startup -- there is no penalty for them, only a missed optimization.
136
+
137
+ ## Limitations
138
+
139
+ ### Network filesystems
140
+
141
+ NFS, SMB/CIFS, and similar network filesystems cache file attributes on the client and can return stale `stat` results. Persistent mode is **not supported** on network filesystems and may produce stale rows. Use in-memory mode (the default) if your `root` lives on a network mount.
142
+
143
+ ### The mtime-preservation edge case
144
+
145
+ Racy-stat detection misses changes only when **all** of the following are true:
146
+
147
+ - A file's contents are modified.
148
+ - The file's size after modification is identical to its size before.
149
+ - The file's `mtime` is externally reset to a value older than the cache's snapshot time (e.g. via `touch -r` or a backup-restore tool that preserves mtime).
150
+
151
+ If you cannot tolerate this edge case, disable persistence (`persist = false`). This is the same trade-off `git` makes with `core.trustctime` / `core.checkStat`.
152
+
153
+ ### Single writer
154
+
155
+ Only one `dirsql` process should write to a given cache file at a time. Multiple read-only processes can query the same file safely once the writer finishes the initial reconcile. Coordinated multi-writer access is not supported in v0.3.0.
156
+
157
+ ## Inspecting the cache
158
+
159
+ The persistent database is a normal SQLite file. You can open it with any SQLite client:
160
+
161
+ ```bash
162
+ sqlite3 .dirsql/cache.db
163
+ ```
164
+
165
+ ```sql
166
+ .tables
167
+ -- comments documents metrics _dirsql_files _dirsql_meta
168
+
169
+ SELECT * FROM _dirsql_meta;
170
+ -- schema_version | 1
171
+ -- dirsql_version | 0.3.0
172
+ -- glob_config_hash | <hex>
173
+ -- parser_versions | {"json":"1","jsonl":"1","csv":"1",...}
174
+ -- root_canonical | /home/alice/my-project
175
+ ```
176
+
177
+ The `_dirsql_files` and `_dirsql_meta` tables are managed by `dirsql`. Do not modify them by hand -- on the next startup, `dirsql` will detect the inconsistency and rebuild from scratch.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "dirsql",
3
- "version": "0.2.9",
3
+ "version": "0.3.1",
4
4
  "description": "Ephemeral SQL index over a local directory",
5
5
  "license": "MIT",
6
6
  "repository": "https://github.com/thekevinscott/dirsql",
@@ -175,15 +175,15 @@
175
175
  ]
176
176
  },
177
177
  "optionalDependencies": {
178
- "@dirsql/lib-linux-x64-gnu": "0.2.9",
179
- "@dirsql/lib-linux-arm64-gnu": "0.2.9",
180
- "@dirsql/lib-darwin-x64": "0.2.9",
181
- "@dirsql/lib-darwin-arm64": "0.2.9",
182
- "@dirsql/lib-win32-x64-msvc": "0.2.9",
183
- "@dirsql/cli-linux-x64-gnu": "0.2.9",
184
- "@dirsql/cli-linux-arm64-gnu": "0.2.9",
185
- "@dirsql/cli-darwin-x64": "0.2.9",
186
- "@dirsql/cli-darwin-arm64": "0.2.9",
187
- "@dirsql/cli-win32-x64-msvc": "0.2.9"
178
+ "@dirsql/lib-linux-x64-gnu": "0.3.1",
179
+ "@dirsql/lib-linux-arm64-gnu": "0.3.1",
180
+ "@dirsql/lib-darwin-x64": "0.3.1",
181
+ "@dirsql/lib-darwin-arm64": "0.3.1",
182
+ "@dirsql/lib-win32-x64-msvc": "0.3.1",
183
+ "@dirsql/cli-linux-x64-gnu": "0.3.1",
184
+ "@dirsql/cli-linux-arm64-gnu": "0.3.1",
185
+ "@dirsql/cli-darwin-x64": "0.3.1",
186
+ "@dirsql/cli-darwin-arm64": "0.3.1",
187
+ "@dirsql/cli-win32-x64-msvc": "0.3.1"
188
188
  }
189
189
  }