dirsql 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/docs/guide/config.md +78 -104
- package/package.json +11 -11
package/docs/guide/config.md
CHANGED
|
@@ -6,7 +6,18 @@ canonical: https://thekevinscott.github.io/dirsql/guide/config
|
|
|
6
6
|
|
|
7
7
|
> Online: <https://thekevinscott.github.io/dirsql/guide/config>
|
|
8
8
|
|
|
9
|
-
`dirsql` can be configured with a `.dirsql.toml` file
|
|
9
|
+
`dirsql` can be configured with a `.dirsql.toml` file. Tables defined this
|
|
10
|
+
way produce **one row per matched file**. Each row's columns come from
|
|
11
|
+
filesystem facts:
|
|
12
|
+
|
|
13
|
+
- **Glob path captures** — named `{placeholder}` segments in the glob.
|
|
14
|
+
- **Stat virtuals** — reserved `_`-prefixed columns for path-derived and
|
|
15
|
+
stat-derived metadata.
|
|
16
|
+
|
|
17
|
+
Content interpretation (parsing JSON, CSV, frontmatter, etc.) is **not**
|
|
18
|
+
configured in `.dirsql.toml`. If you need columns derived from file
|
|
19
|
+
contents, register a programmatic [`Table`](./tables.md) whose `extract`
|
|
20
|
+
function does the parsing in your host language.
|
|
10
21
|
|
|
11
22
|
## Basic Example
|
|
12
23
|
|
|
@@ -15,11 +26,12 @@ canonical: https://thekevinscott.github.io/dirsql/guide/config
|
|
|
15
26
|
ignore = ["node_modules/**", ".git/**"]
|
|
16
27
|
|
|
17
28
|
[[table]]
|
|
18
|
-
ddl
|
|
19
|
-
glob = "posts/*.
|
|
29
|
+
ddl = "CREATE TABLE posts (_path TEXT, _basename TEXT, _size INTEGER, _mtime INTEGER)"
|
|
30
|
+
glob = "posts/*.md"
|
|
20
31
|
```
|
|
21
32
|
|
|
22
|
-
|
|
33
|
+
Each `posts/*.md` file produces one row. The DDL declares which stat
|
|
34
|
+
virtuals are surfaced as SQL columns.
|
|
23
35
|
|
|
24
36
|
## Loading a Config File
|
|
25
37
|
|
|
@@ -52,11 +64,16 @@ await db.ready;
|
|
|
52
64
|
|
|
53
65
|
:::
|
|
54
66
|
|
|
55
|
-
By default, the root directory scanned is the config file's parent
|
|
67
|
+
By default, the root directory scanned is the config file's parent
|
|
68
|
+
directory. Override it by passing `root` explicitly (the explicit value
|
|
69
|
+
wins and a warning is emitted) or by declaring `[dirsql].root` in the
|
|
70
|
+
config file itself.
|
|
56
71
|
|
|
57
72
|
## Root Directory
|
|
58
73
|
|
|
59
|
-
By default, the config file's parent directory is the scan root. To index
|
|
74
|
+
By default, the config file's parent directory is the scan root. To index
|
|
75
|
+
a different location, declare `[dirsql].root` (relative paths are resolved
|
|
76
|
+
relative to the config file's parent):
|
|
60
77
|
|
|
61
78
|
```toml
|
|
62
79
|
[dirsql]
|
|
@@ -64,101 +81,44 @@ root = "../data"
|
|
|
64
81
|
ignore = ["node_modules/**"]
|
|
65
82
|
```
|
|
66
83
|
|
|
67
|
-
##
|
|
68
|
-
|
|
69
|
-
| Extension | Format | Rows |
|
|
70
|
-
|---|---|---|
|
|
71
|
-
| `.json` | JSON | Object = 1 row, Array = many rows |
|
|
72
|
-
| `.jsonl`, `.ndjson` | JSONL | One row per line |
|
|
73
|
-
| `.csv` | CSV | One row per data line (header = columns) |
|
|
74
|
-
| `.tsv` | TSV | One row per data line (tab-separated) |
|
|
75
|
-
| `.toml` | TOML | One row per file |
|
|
76
|
-
| `.yaml`, `.yml` | YAML | Mapping = 1 row, Sequence = many rows |
|
|
77
|
-
| `.md` | Frontmatter | YAML frontmatter + body column |
|
|
78
|
-
|
|
79
|
-
## Path Captures
|
|
80
|
-
|
|
81
|
-
Use `{name}` in glob patterns to extract path segments as columns:
|
|
82
|
-
|
|
83
|
-
```toml
|
|
84
|
-
[[table]]
|
|
85
|
-
ddl = "CREATE TABLE comments (thread_id TEXT, body TEXT, author TEXT)"
|
|
86
|
-
glob = "_comments/{thread_id}/index.jsonl"
|
|
87
|
-
```
|
|
88
|
-
|
|
89
|
-
The directory name (e.g., `abc123`) becomes the `thread_id` column value for every row in that file.
|
|
90
|
-
|
|
91
|
-
## Nested Data
|
|
92
|
-
|
|
93
|
-
Use `each` to navigate into nested JSON structures:
|
|
94
|
-
|
|
95
|
-
```toml
|
|
96
|
-
[[table]]
|
|
97
|
-
ddl = "CREATE TABLE items (name TEXT, price REAL)"
|
|
98
|
-
glob = "catalog/*.json"
|
|
99
|
-
each = "data.items"
|
|
100
|
-
```
|
|
101
|
-
|
|
102
|
-
This extracts rows from `{"data": {"items": [...]}}`.
|
|
84
|
+
## Stat Virtuals
|
|
103
85
|
|
|
104
|
-
|
|
86
|
+
Every config-defined table can expose any of these reserved columns. Add
|
|
87
|
+
the ones you want to your DDL; the rest are silently dropped.
|
|
105
88
|
|
|
106
|
-
|
|
89
|
+
| Column | Type | Source |
|
|
90
|
+
|--------|---------|--------|
|
|
91
|
+
| `_path` | TEXT | The file's path relative to the scan root. |
|
|
92
|
+
| `_basename` | TEXT | The filename including extension. |
|
|
93
|
+
| `_dir` | TEXT | The parent directory path (relative to root). |
|
|
94
|
+
| `_ext` | TEXT | The file extension, lowercased, no leading dot. |
|
|
95
|
+
| `_size` | INTEGER | Size in bytes. |
|
|
96
|
+
| `_mtime` | INTEGER | Last-modified time, unix seconds. |
|
|
97
|
+
| `_ctime` | INTEGER | Created/changed time, unix seconds. |
|
|
107
98
|
|
|
108
|
-
|
|
109
|
-
[[table]]
|
|
110
|
-
ddl = "CREATE TABLE posts (display_name TEXT, body TEXT)"
|
|
111
|
-
glob = "posts/*.json"
|
|
99
|
+
Example query:
|
|
112
100
|
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
101
|
+
```sql
|
|
102
|
+
SELECT _basename, _size
|
|
103
|
+
FROM posts
|
|
104
|
+
WHERE _mtime > strftime('%s', '2024-01-01')
|
|
105
|
+
ORDER BY _mtime DESC;
|
|
116
106
|
```
|
|
117
107
|
|
|
118
|
-
|
|
119
|
-
When a `[table.columns]` section is present, `dirsql` switches to fully
|
|
120
|
-
declarative projection: **only the columns listed in the mapping are
|
|
121
|
-
populated**. Any column in the DDL that is not mentioned in the mapping
|
|
122
|
-
is set to `NULL` for every row — the original key from the file is not
|
|
123
|
-
auto-copied.
|
|
124
|
-
|
|
125
|
-
This is intentional: `[table.columns]` means "here is exactly where
|
|
126
|
-
every column comes from", not "rename these specific keys".
|
|
108
|
+
## Path Captures
|
|
127
109
|
|
|
128
|
-
|
|
110
|
+
Use `{name}` in glob patterns to extract path segments as columns. Add a
|
|
111
|
+
matching column name to the DDL and the capture is auto-populated:
|
|
129
112
|
|
|
130
113
|
```toml
|
|
131
114
|
[[table]]
|
|
132
|
-
ddl
|
|
133
|
-
glob = "
|
|
134
|
-
|
|
135
|
-
[table.columns]
|
|
136
|
-
display_name = "author" # intended: "just rename author -> display_name"
|
|
137
|
-
```
|
|
138
|
-
|
|
139
|
-
against a file `one.json`:
|
|
140
|
-
|
|
141
|
-
```json
|
|
142
|
-
{"id": "a1", "body": "hello", "author": "Alice"}
|
|
115
|
+
ddl = "CREATE TABLE comments (thread_id TEXT, _basename TEXT, _mtime INTEGER)"
|
|
116
|
+
glob = "_comments/{thread_id}/*.jsonl"
|
|
143
117
|
```
|
|
144
118
|
|
|
145
|
-
produces
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
[{"id": null, "body": null, "display_name": "Alice"}]
|
|
149
|
-
```
|
|
150
|
-
|
|
151
|
-
`id` and `body` are `NULL` because they are not listed in
|
|
152
|
-
`[table.columns]`. To keep them populated, add them to the mapping
|
|
153
|
-
explicitly:
|
|
154
|
-
|
|
155
|
-
```toml
|
|
156
|
-
[table.columns]
|
|
157
|
-
id = "id"
|
|
158
|
-
body = "body"
|
|
159
|
-
display_name = "author"
|
|
160
|
-
```
|
|
161
|
-
:::
|
|
119
|
+
A file at `_comments/abc123/2024-05-05.jsonl` produces a row with
|
|
120
|
+
`thread_id = "abc123"`, `_basename = "2024-05-05.jsonl"`, and `_mtime` set
|
|
121
|
+
to the file's modification time.
|
|
162
122
|
|
|
163
123
|
## Ignore Patterns
|
|
164
124
|
|
|
@@ -169,11 +129,14 @@ The `ignore` list skips files and directories entirely (not even scanned):
|
|
|
169
129
|
ignore = ["node_modules/**", ".git/**", "*.pyc", "__pycache__/**"]
|
|
170
130
|
```
|
|
171
131
|
|
|
172
|
-
The top-level `.dirsql/` directory is always excluded, whether you list it
|
|
132
|
+
The top-level `.dirsql/` directory is always excluded, whether you list it
|
|
133
|
+
or not — it is a reserved namespace for `dirsql`'s own metadata (see
|
|
134
|
+
[Persistence](./persistence.md)).
|
|
173
135
|
|
|
174
136
|
## Persistence
|
|
175
137
|
|
|
176
|
-
Set `persist = true` to keep the SQLite database on disk between runs
|
|
138
|
+
Set `persist = true` to keep the SQLite database on disk between runs
|
|
139
|
+
instead of rebuilding from scratch on every startup:
|
|
177
140
|
|
|
178
141
|
```toml
|
|
179
142
|
[dirsql]
|
|
@@ -181,19 +144,27 @@ persist = true
|
|
|
181
144
|
# persist_path = ".dirsql/cache.db" # optional; this is the default
|
|
182
145
|
```
|
|
183
146
|
|
|
184
|
-
See [Persistence](./persistence.md) for the full reconcile algorithm,
|
|
147
|
+
See [Persistence](./persistence.md) for the full reconcile algorithm,
|
|
148
|
+
storage layout, and limitations.
|
|
185
149
|
|
|
186
150
|
## Strict Mode
|
|
187
151
|
|
|
188
|
-
By default,
|
|
152
|
+
By default, auto-injected virtuals that aren't in the DDL are silently
|
|
153
|
+
dropped, and undeclared user-extract keys are dropped. Enable strict mode
|
|
154
|
+
to error when an extract emits keys not declared in the DDL:
|
|
189
155
|
|
|
190
156
|
```toml
|
|
191
157
|
[[table]]
|
|
192
|
-
ddl
|
|
193
|
-
glob = "
|
|
158
|
+
ddl = "CREATE TABLE comments (thread_id TEXT)"
|
|
159
|
+
glob = "_comments/{thread_id}/*.jsonl"
|
|
194
160
|
strict = true
|
|
195
161
|
```
|
|
196
162
|
|
|
163
|
+
Strict mode does **not** apply to auto-injected stat virtuals — those are
|
|
164
|
+
always filtered to the DDL's declared columns regardless. Strict mode
|
|
165
|
+
applies only to keys produced by an extract callback (relevant for
|
|
166
|
+
programmatic [tables](./tables.md)).
|
|
167
|
+
|
|
197
168
|
## Full Example
|
|
198
169
|
|
|
199
170
|
```toml
|
|
@@ -201,19 +172,22 @@ strict = true
|
|
|
201
172
|
ignore = ["node_modules/**", ".git/**", "dist/**"]
|
|
202
173
|
|
|
203
174
|
[[table]]
|
|
204
|
-
ddl
|
|
205
|
-
glob = "_comments/{thread_id}
|
|
175
|
+
ddl = "CREATE TABLE comments (thread_id TEXT, _basename TEXT, _mtime INTEGER)"
|
|
176
|
+
glob = "_comments/{thread_id}/*.jsonl"
|
|
206
177
|
|
|
207
178
|
[[table]]
|
|
208
|
-
ddl
|
|
179
|
+
ddl = "CREATE TABLE documents (_path TEXT, _basename TEXT, _size INTEGER)"
|
|
209
180
|
glob = "**/index.md"
|
|
210
181
|
|
|
211
182
|
[[table]]
|
|
212
|
-
ddl
|
|
183
|
+
ddl = "CREATE TABLE logs (_path TEXT, _size INTEGER, _mtime INTEGER)"
|
|
213
184
|
glob = "logs/*.csv"
|
|
214
|
-
|
|
215
|
-
[[table]]
|
|
216
|
-
ddl = "CREATE TABLE config (key TEXT, value TEXT)"
|
|
217
|
-
glob = "config/*.toml"
|
|
218
|
-
strict = true
|
|
219
185
|
```
|
|
186
|
+
|
|
187
|
+
## When you need parsed content
|
|
188
|
+
|
|
189
|
+
`.dirsql.toml` does not parse file contents. For columns derived from the
|
|
190
|
+
*inside* of files (frontmatter keys, JSON values, CSV cells, etc.),
|
|
191
|
+
register a programmatic [`Table`](./tables.md) instead, and parse the
|
|
192
|
+
bytes in your host language. Glob captures and stat virtuals are still
|
|
193
|
+
auto-injected into rows produced by your extract.
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "dirsql",
|
|
3
|
-
"version": "0.3.
|
|
3
|
+
"version": "0.3.1",
|
|
4
4
|
"description": "Ephemeral SQL index over a local directory",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"repository": "https://github.com/thekevinscott/dirsql",
|
|
@@ -175,15 +175,15 @@
|
|
|
175
175
|
]
|
|
176
176
|
},
|
|
177
177
|
"optionalDependencies": {
|
|
178
|
-
"@dirsql/lib-linux-x64-gnu": "0.3.
|
|
179
|
-
"@dirsql/lib-linux-arm64-gnu": "0.3.
|
|
180
|
-
"@dirsql/lib-darwin-x64": "0.3.
|
|
181
|
-
"@dirsql/lib-darwin-arm64": "0.3.
|
|
182
|
-
"@dirsql/lib-win32-x64-msvc": "0.3.
|
|
183
|
-
"@dirsql/cli-linux-x64-gnu": "0.3.
|
|
184
|
-
"@dirsql/cli-linux-arm64-gnu": "0.3.
|
|
185
|
-
"@dirsql/cli-darwin-x64": "0.3.
|
|
186
|
-
"@dirsql/cli-darwin-arm64": "0.3.
|
|
187
|
-
"@dirsql/cli-win32-x64-msvc": "0.3.
|
|
178
|
+
"@dirsql/lib-linux-x64-gnu": "0.3.1",
|
|
179
|
+
"@dirsql/lib-linux-arm64-gnu": "0.3.1",
|
|
180
|
+
"@dirsql/lib-darwin-x64": "0.3.1",
|
|
181
|
+
"@dirsql/lib-darwin-arm64": "0.3.1",
|
|
182
|
+
"@dirsql/lib-win32-x64-msvc": "0.3.1",
|
|
183
|
+
"@dirsql/cli-linux-x64-gnu": "0.3.1",
|
|
184
|
+
"@dirsql/cli-linux-arm64-gnu": "0.3.1",
|
|
185
|
+
"@dirsql/cli-darwin-x64": "0.3.1",
|
|
186
|
+
"@dirsql/cli-darwin-arm64": "0.3.1",
|
|
187
|
+
"@dirsql/cli-win32-x64-msvc": "0.3.1"
|
|
188
188
|
}
|
|
189
189
|
}
|