dirsql 0.2.3 → 0.2.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -11
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +12 -27
- package/dist/index.js.map +1 -1
- package/dist/loadNativeCore.d.ts +12 -0
- package/dist/loadNativeCore.d.ts.map +1 -0
- package/dist/loadNativeCore.js +26 -0
- package/dist/loadNativeCore.js.map +1 -0
- package/dist/loadNativeCore.test.d.ts +2 -0
- package/dist/loadNativeCore.test.d.ts.map +1 -0
- package/dist/loadNativeCore.test.js +48 -0
- package/dist/loadNativeCore.test.js.map +1 -0
- package/dist/platforms.d.ts +12 -1
- package/dist/platforms.d.ts.map +1 -1
- package/dist/platforms.js +39 -9
- package/dist/platforms.js.map +1 -1
- package/dist/platforms.test.js +28 -1
- package/dist/platforms.test.js.map +1 -1
- package/docs/api/index.md +232 -0
- package/docs/getting-started.md +186 -0
- package/docs/guide/async.md +224 -0
- package/docs/guide/cli.md +124 -0
- package/docs/guide/config.md +205 -0
- package/docs/guide/crdt.md +160 -0
- package/docs/guide/querying.md +216 -0
- package/docs/guide/tables.md +268 -0
- package/docs/guide/watching.md +264 -0
- package/docs/index.md +81 -0
- package/docs/migrations.md +8 -0
- package/package.json +38 -7
- package/dirsql.node +0 -0
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
---
|
|
2
|
+
canonical: https://thekevinscott.github.io/dirsql/guide/tables
|
|
3
|
+
---
|
|
4
|
+
|
|
5
|
+
# Defining Tables
|
|
6
|
+
|
|
7
|
+
> Online: <https://thekevinscott.github.io/dirsql/guide/tables>
|
|
8
|
+
|
|
9
|
+
Each table in `dirsql` maps a set of files to rows in an in-memory SQLite table. A table definition has three parts: DDL, a glob pattern, and an extract function.
|
|
10
|
+
|
|
11
|
+
## Table constructor
|
|
12
|
+
|
|
13
|
+
::: code-group
|
|
14
|
+
|
|
15
|
+
```python [Python]
|
|
16
|
+
from dirsql import Table
|
|
17
|
+
|
|
18
|
+
table = Table(
|
|
19
|
+
ddl="CREATE TABLE comments (id TEXT, body TEXT, author TEXT)",
|
|
20
|
+
glob="comments/**/index.jsonl",
|
|
21
|
+
extract=lambda path, content: [
|
|
22
|
+
{"id": "...", "body": "...", "author": "..."}
|
|
23
|
+
],
|
|
24
|
+
)
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
```rust [Rust]
|
|
28
|
+
use dirsql::{Table, Value};
|
|
29
|
+
use std::collections::HashMap;
|
|
30
|
+
|
|
31
|
+
let table = Table::new(
|
|
32
|
+
"CREATE TABLE comments (id TEXT, body TEXT, author TEXT)",
|
|
33
|
+
"comments/**/index.jsonl",
|
|
34
|
+
|_path, _content| {
|
|
35
|
+
let mut row: HashMap<String, Value> = HashMap::new();
|
|
36
|
+
row.insert("id".into(), Value::Text("...".into()));
|
|
37
|
+
row.insert("body".into(), Value::Text("...".into()));
|
|
38
|
+
row.insert("author".into(), Value::Text("...".into()));
|
|
39
|
+
vec![row]
|
|
40
|
+
},
|
|
41
|
+
);
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
```typescript [TypeScript]
|
|
45
|
+
import type { TableDef } from 'dirsql';
|
|
46
|
+
|
|
47
|
+
const table: TableDef = {
|
|
48
|
+
ddl: 'CREATE TABLE comments (id TEXT, body TEXT, author TEXT)',
|
|
49
|
+
glob: 'comments/**/index.jsonl',
|
|
50
|
+
extract: (_path, content) => [
|
|
51
|
+
{ id: '...', body: '...', author: '...' },
|
|
52
|
+
],
|
|
53
|
+
};
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
:::
|
|
57
|
+
|
|
58
|
+
All three arguments are keyword-only (in Python). In Rust they are positional to `Table::new`. In TypeScript a table is a plain `TableDef` object literal — the TS SDK exports the `TableDef` type (not a class).
|
|
59
|
+
|
|
60
|
+
### `ddl`
|
|
61
|
+
|
|
62
|
+
A SQLite `CREATE TABLE` statement. This defines the schema of the table. `dirsql` executes this DDL directly against the in-memory database, so any valid SQLite column types and constraints work.
|
|
63
|
+
|
|
64
|
+
```python
|
|
65
|
+
# Simple text columns
|
|
66
|
+
ddl="CREATE TABLE notes (title TEXT, body TEXT)"
|
|
67
|
+
|
|
68
|
+
# Typed columns
|
|
69
|
+
ddl="CREATE TABLE metrics (name TEXT, value REAL, count INTEGER)"
|
|
70
|
+
|
|
71
|
+
# With constraints
|
|
72
|
+
ddl="CREATE TABLE items (id TEXT PRIMARY KEY, name TEXT NOT NULL)"
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
The table name is parsed from the DDL. It must be a valid SQLite identifier.
|
|
76
|
+
|
|
77
|
+
### `glob`
|
|
78
|
+
|
|
79
|
+
A glob pattern that determines which files feed into this table. Matched relative to the root directory passed to `DirSQL`.
|
|
80
|
+
|
|
81
|
+
```python
|
|
82
|
+
glob="*.json" # JSON files in root only
|
|
83
|
+
glob="**/*.json" # JSON files at any depth
|
|
84
|
+
glob="comments/**/index.jsonl" # JSONL files in comment subdirectories
|
|
85
|
+
glob="data/*.csv" # CSV files in data/
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
Glob syntax follows standard Unix globbing rules. `**` matches any number of directory levels.
|
|
89
|
+
|
|
90
|
+
### `extract`
|
|
91
|
+
|
|
92
|
+
A callable `(path: str, content: str) -> list[dict]` that converts a file into rows.
|
|
93
|
+
|
|
94
|
+
- `path` is the file path relative to the root directory
|
|
95
|
+
- `content` is the file content as a string
|
|
96
|
+
- Return a list of dicts, where each dict maps column names to values
|
|
97
|
+
- Return an empty list to skip a file
|
|
98
|
+
|
|
99
|
+
```python
|
|
100
|
+
import json
|
|
101
|
+
|
|
102
|
+
# Single-object JSON files: one row per file
|
|
103
|
+
extract=lambda path, content: [json.loads(content)]
|
|
104
|
+
|
|
105
|
+
# JSONL files: one row per line
|
|
106
|
+
extract=lambda path, content: [
|
|
107
|
+
json.loads(line) for line in content.splitlines()
|
|
108
|
+
]
|
|
109
|
+
|
|
110
|
+
# Derive values from the file path
|
|
111
|
+
import os
|
|
112
|
+
extract=lambda path, content: [
|
|
113
|
+
{
|
|
114
|
+
"id": os.path.basename(os.path.dirname(path)),
|
|
115
|
+
"body": json.loads(line)["body"],
|
|
116
|
+
}
|
|
117
|
+
for line in content.splitlines()
|
|
118
|
+
for _ in [json.loads(line)]
|
|
119
|
+
]
|
|
120
|
+
|
|
121
|
+
# Conditionally skip files
|
|
122
|
+
def extract(path, content):
|
|
123
|
+
data = json.loads(content)
|
|
124
|
+
if data.get("draft"):
|
|
125
|
+
return []
|
|
126
|
+
return [data]
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
## Multiple tables
|
|
130
|
+
|
|
131
|
+
Pass multiple `Table` definitions to index different file types into separate tables:
|
|
132
|
+
|
|
133
|
+
::: code-group
|
|
134
|
+
|
|
135
|
+
```python [Python]
|
|
136
|
+
from dirsql import DirSQL, Table
|
|
137
|
+
import json
|
|
138
|
+
|
|
139
|
+
db = DirSQL(
|
|
140
|
+
"./workspace",
|
|
141
|
+
tables=[
|
|
142
|
+
Table(
|
|
143
|
+
ddl="CREATE TABLE posts (title TEXT, author_id TEXT)",
|
|
144
|
+
glob="posts/*.json",
|
|
145
|
+
extract=lambda path, content: [json.loads(content)],
|
|
146
|
+
),
|
|
147
|
+
Table(
|
|
148
|
+
ddl="CREATE TABLE authors (id TEXT, name TEXT)",
|
|
149
|
+
glob="authors/*.json",
|
|
150
|
+
extract=lambda path, content: [json.loads(content)],
|
|
151
|
+
),
|
|
152
|
+
],
|
|
153
|
+
)
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
```rust [Rust]
|
|
157
|
+
use dirsql::{DirSQL, Table, Value};
|
|
158
|
+
use std::collections::HashMap;
|
|
159
|
+
|
|
160
|
+
// See `row_from_json` in getting-started.md for a reusable helper.
|
|
161
|
+
fn row_from_json(raw: &str) -> HashMap<String, Value> {
|
|
162
|
+
let v: serde_json::Value = serde_json::from_str(raw).unwrap();
|
|
163
|
+
let serde_json::Value::Object(obj) = v else { return HashMap::new() };
|
|
164
|
+
obj.into_iter()
|
|
165
|
+
.map(|(k, val)| {
|
|
166
|
+
let v = match val {
|
|
167
|
+
serde_json::Value::String(s) => Value::Text(s),
|
|
168
|
+
serde_json::Value::Number(n) => n
|
|
169
|
+
.as_i64()
|
|
170
|
+
.map(Value::Integer)
|
|
171
|
+
.unwrap_or_else(|| Value::Real(n.as_f64().unwrap_or(0.0))),
|
|
172
|
+
serde_json::Value::Bool(b) => Value::Integer(b as i64),
|
|
173
|
+
serde_json::Value::Null => Value::Null,
|
|
174
|
+
other => Value::Text(other.to_string()),
|
|
175
|
+
};
|
|
176
|
+
(k, v)
|
|
177
|
+
})
|
|
178
|
+
.collect()
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
let db = DirSQL::new(
|
|
182
|
+
"./workspace",
|
|
183
|
+
vec![
|
|
184
|
+
Table::new(
|
|
185
|
+
"CREATE TABLE posts (title TEXT, author_id TEXT)",
|
|
186
|
+
"posts/*.json",
|
|
187
|
+
|_path, content| vec![row_from_json(content)],
|
|
188
|
+
),
|
|
189
|
+
Table::new(
|
|
190
|
+
"CREATE TABLE authors (id TEXT, name TEXT)",
|
|
191
|
+
"authors/*.json",
|
|
192
|
+
|_path, content| vec![row_from_json(content)],
|
|
193
|
+
),
|
|
194
|
+
],
|
|
195
|
+
)?;
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
```typescript [TypeScript]
|
|
199
|
+
import { DirSQL, type TableDef } from 'dirsql';
|
|
200
|
+
|
|
201
|
+
const tables: TableDef[] = [
|
|
202
|
+
{
|
|
203
|
+
ddl: 'CREATE TABLE posts (title TEXT, author_id TEXT)',
|
|
204
|
+
glob: 'posts/*.json',
|
|
205
|
+
extract: (_path, content) => [JSON.parse(content)],
|
|
206
|
+
},
|
|
207
|
+
{
|
|
208
|
+
ddl: 'CREATE TABLE authors (id TEXT, name TEXT)',
|
|
209
|
+
glob: 'authors/*.json',
|
|
210
|
+
extract: (_path, content) => [JSON.parse(content)],
|
|
211
|
+
},
|
|
212
|
+
];
|
|
213
|
+
|
|
214
|
+
const db = new DirSQL({ root: './workspace', tables });
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
:::
|
|
218
|
+
|
|
219
|
+
Each table has its own glob and extract function. A file can only match one table (the first matching glob wins).
|
|
220
|
+
|
|
221
|
+
## Ignore patterns
|
|
222
|
+
|
|
223
|
+
Use the `ignore` parameter to exclude paths from all tables:
|
|
224
|
+
|
|
225
|
+
::: code-group
|
|
226
|
+
|
|
227
|
+
```python [Python]
|
|
228
|
+
db = DirSQL(
|
|
229
|
+
"./workspace",
|
|
230
|
+
ignore=["**/node_modules/**", "**/.git/**"],
|
|
231
|
+
tables=[...],
|
|
232
|
+
)
|
|
233
|
+
```
|
|
234
|
+
|
|
235
|
+
```rust [Rust]
|
|
236
|
+
let db = DirSQL::with_ignore(
|
|
237
|
+
"./workspace",
|
|
238
|
+
vec![/* tables */],
|
|
239
|
+
vec!["**/node_modules/**", "**/.git/**"],
|
|
240
|
+
)?;
|
|
241
|
+
```
|
|
242
|
+
|
|
243
|
+
```typescript [TypeScript]
|
|
244
|
+
const db = new DirSQL({
|
|
245
|
+
root: './workspace',
|
|
246
|
+
tables: [/* tables */],
|
|
247
|
+
ignore: ['**/node_modules/**', '**/.git/**'],
|
|
248
|
+
});
|
|
249
|
+
```
|
|
250
|
+
|
|
251
|
+
:::
|
|
252
|
+
|
|
253
|
+
Ignore patterns are applied before glob matching. Any file matching an ignore pattern is skipped regardless of table globs.
|
|
254
|
+
|
|
255
|
+
## Supported value types
|
|
256
|
+
|
|
257
|
+
The extract function can return these Python types, which map to SQLite types:
|
|
258
|
+
|
|
259
|
+
| Python type | SQLite type |
|
|
260
|
+
|-------------|-------------|
|
|
261
|
+
| `str` | TEXT |
|
|
262
|
+
| `int` | INTEGER |
|
|
263
|
+
| `float` | REAL |
|
|
264
|
+
| `bool` | INTEGER (0/1) |
|
|
265
|
+
| `bytes` | BLOB |
|
|
266
|
+
| `None` | NULL |
|
|
267
|
+
|
|
268
|
+
Any other type is converted to its string representation via `str()`.
|
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
---
|
|
2
|
+
canonical: https://thekevinscott.github.io/dirsql/guide/watching
|
|
3
|
+
---
|
|
4
|
+
|
|
5
|
+
# File Watching
|
|
6
|
+
|
|
7
|
+
> Online: <https://thekevinscott.github.io/dirsql/guide/watching>
|
|
8
|
+
|
|
9
|
+
`dirsql` can monitor the filesystem for changes and emit events when rows are inserted, updated, or deleted. This is useful for building reactive applications that respond to file changes in real time.
|
|
10
|
+
|
|
11
|
+
## Starting a watch stream
|
|
12
|
+
|
|
13
|
+
::: code-group
|
|
14
|
+
|
|
15
|
+
```python [Python]
|
|
16
|
+
from dirsql import DirSQL, Table
|
|
17
|
+
import json
|
|
18
|
+
|
|
19
|
+
db = DirSQL(
|
|
20
|
+
"./my-project",
|
|
21
|
+
tables=[
|
|
22
|
+
Table(
|
|
23
|
+
ddl="CREATE TABLE comments (id TEXT, body TEXT, author TEXT)",
|
|
24
|
+
glob="comments/**/*.json",
|
|
25
|
+
extract=lambda path, content: [json.loads(content)],
|
|
26
|
+
),
|
|
27
|
+
],
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
async for event in db.watch():
|
|
31
|
+
print(f"{event.action} on {event.table}: {event.row}")
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
```rust [Rust]
|
|
35
|
+
use dirsql::{DirSQL, RowEvent, Table, Value};
|
|
36
|
+
use futures::StreamExt;
|
|
37
|
+
use std::collections::HashMap;
|
|
38
|
+
|
|
39
|
+
// See `row_from_json` in getting-started.md for a reusable helper.
|
|
40
|
+
fn row_from_json(raw: &str) -> HashMap<String, Value> {
|
|
41
|
+
let v: serde_json::Value = serde_json::from_str(raw).unwrap();
|
|
42
|
+
let serde_json::Value::Object(obj) = v else { return HashMap::new() };
|
|
43
|
+
obj.into_iter()
|
|
44
|
+
.map(|(k, val)| {
|
|
45
|
+
let v = match val {
|
|
46
|
+
serde_json::Value::String(s) => Value::Text(s),
|
|
47
|
+
serde_json::Value::Number(n) => n
|
|
48
|
+
.as_i64()
|
|
49
|
+
.map(Value::Integer)
|
|
50
|
+
.unwrap_or_else(|| Value::Real(n.as_f64().unwrap_or(0.0))),
|
|
51
|
+
serde_json::Value::Bool(b) => Value::Integer(b as i64),
|
|
52
|
+
serde_json::Value::Null => Value::Null,
|
|
53
|
+
other => Value::Text(other.to_string()),
|
|
54
|
+
};
|
|
55
|
+
(k, v)
|
|
56
|
+
})
|
|
57
|
+
.collect()
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
let db = DirSQL::new(
|
|
61
|
+
"./my-project",
|
|
62
|
+
vec![
|
|
63
|
+
Table::new(
|
|
64
|
+
"CREATE TABLE comments (id TEXT, body TEXT, author TEXT)",
|
|
65
|
+
"comments/**/*.json",
|
|
66
|
+
|_path, content| vec![row_from_json(content)],
|
|
67
|
+
),
|
|
68
|
+
],
|
|
69
|
+
)?;
|
|
70
|
+
|
|
71
|
+
let mut stream = db.watch()?;
|
|
72
|
+
while let Some(event) = stream.next().await {
|
|
73
|
+
match event {
|
|
74
|
+
RowEvent::Insert { table, row, file_path } => {
|
|
75
|
+
println!("insert on {table} ({file_path}): {row:?}")
|
|
76
|
+
}
|
|
77
|
+
RowEvent::Update { table, old_row, new_row, file_path } => {
|
|
78
|
+
println!("update on {table} ({file_path}): {old_row:?} -> {new_row:?}")
|
|
79
|
+
}
|
|
80
|
+
RowEvent::Delete { table, row, file_path } => {
|
|
81
|
+
println!("delete on {table} ({file_path}): {row:?}")
|
|
82
|
+
}
|
|
83
|
+
RowEvent::Error { file_path, error } => {
|
|
84
|
+
println!("error on {file_path:?}: {error}")
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
```typescript [TypeScript]
|
|
91
|
+
import { DirSQL, type TableDef } from 'dirsql';
|
|
92
|
+
|
|
93
|
+
const tables: TableDef[] = [
|
|
94
|
+
{
|
|
95
|
+
ddl: 'CREATE TABLE comments (id TEXT, body TEXT, author TEXT)',
|
|
96
|
+
glob: 'comments/**/*.json',
|
|
97
|
+
extract: (_path, content) => [JSON.parse(content)],
|
|
98
|
+
},
|
|
99
|
+
];
|
|
100
|
+
|
|
101
|
+
const db = new DirSQL({ root: './my-project', tables });
|
|
102
|
+
|
|
103
|
+
for await (const event of db.watch()) {
|
|
104
|
+
console.log(`${event.action} on ${event.table}:`, event.row);
|
|
105
|
+
}
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
:::
|
|
109
|
+
|
|
110
|
+
See [Async API](./async.md) for full details on the async `DirSQL` API (Python).
|
|
111
|
+
|
|
112
|
+
## Event types
|
|
113
|
+
|
|
114
|
+
Each event is a `RowEvent` object with these attributes:
|
|
115
|
+
|
|
116
|
+
### `insert`
|
|
117
|
+
|
|
118
|
+
A new row was added. This happens when a new file is created or an existing file gains additional rows.
|
|
119
|
+
|
|
120
|
+
::: code-group
|
|
121
|
+
|
|
122
|
+
```python [Python]
|
|
123
|
+
event.action # "insert"
|
|
124
|
+
event.table # "comments"
|
|
125
|
+
event.row # {"id": "abc", "body": "new comment", "author": "alice"}
|
|
126
|
+
event.old_row # None
|
|
127
|
+
event.file_path # "comments/abc/index.json"
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
```rust [Rust]
|
|
131
|
+
// RowEvent is an enum; match on the variant to destructure its fields.
|
|
132
|
+
RowEvent::Insert {
|
|
133
|
+
table, // "comments"
|
|
134
|
+
row, // {"id": "abc", "body": "new comment", "author": "alice"}
|
|
135
|
+
file_path, // "comments/abc/index.json"
|
|
136
|
+
} => { /* ... */ }
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
```typescript [TypeScript]
|
|
140
|
+
event.action // 'insert'
|
|
141
|
+
event.table // 'comments'
|
|
142
|
+
event.row // { id: 'abc', body: 'new comment', author: 'alice' }
|
|
143
|
+
event.oldRow // undefined
|
|
144
|
+
event.filePath // 'comments/abc/index.json'
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
:::
|
|
148
|
+
|
|
149
|
+
### `update`
|
|
150
|
+
|
|
151
|
+
An existing row was modified. `dirsql` diffs the old and new rows extracted from the file to detect changes.
|
|
152
|
+
|
|
153
|
+
::: code-group
|
|
154
|
+
|
|
155
|
+
```python [Python]
|
|
156
|
+
event.action # "update"
|
|
157
|
+
event.table # "comments"
|
|
158
|
+
event.row # {"id": "abc", "body": "edited comment", "author": "alice"}
|
|
159
|
+
event.old_row # {"id": "abc", "body": "original comment", "author": "alice"}
|
|
160
|
+
event.file_path # "comments/abc/index.json"
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
```rust [Rust]
|
|
164
|
+
RowEvent::Update {
|
|
165
|
+
table, // "comments"
|
|
166
|
+
old_row, // {"id": "abc", "body": "original comment", "author": "alice"}
|
|
167
|
+
new_row, // {"id": "abc", "body": "edited comment", "author": "alice"}
|
|
168
|
+
file_path, // "comments/abc/index.json"
|
|
169
|
+
} => { /* ... */ }
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
```typescript [TypeScript]
|
|
173
|
+
event.action // 'update'
|
|
174
|
+
event.table // 'comments'
|
|
175
|
+
event.row // { id: 'abc', body: 'edited comment', author: 'alice' }
|
|
176
|
+
event.oldRow // { id: 'abc', body: 'original comment', author: 'alice' }
|
|
177
|
+
event.filePath // 'comments/abc/index.json'
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
:::
|
|
181
|
+
|
|
182
|
+
### `delete`
|
|
183
|
+
|
|
184
|
+
A row was removed. This happens when a file is deleted or a file is modified to contain fewer rows.
|
|
185
|
+
|
|
186
|
+
::: code-group
|
|
187
|
+
|
|
188
|
+
```python [Python]
|
|
189
|
+
event.action # "delete"
|
|
190
|
+
event.table # "comments"
|
|
191
|
+
event.row # {"id": "abc", "body": "deleted comment", "author": "alice"}
|
|
192
|
+
event.old_row # None
|
|
193
|
+
event.file_path # "comments/abc/index.json"
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
```rust [Rust]
|
|
197
|
+
RowEvent::Delete {
|
|
198
|
+
table, // "comments"
|
|
199
|
+
row, // {"id": "abc", "body": "deleted comment", "author": "alice"}
|
|
200
|
+
file_path, // "comments/abc/index.json"
|
|
201
|
+
} => { /* ... */ }
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
```typescript [TypeScript]
|
|
205
|
+
event.action // 'delete'
|
|
206
|
+
event.table // 'comments'
|
|
207
|
+
event.row // { id: 'abc', body: 'deleted comment', author: 'alice' }
|
|
208
|
+
event.oldRow // undefined
|
|
209
|
+
event.filePath // 'comments/abc/index.json'
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
:::
|
|
213
|
+
|
|
214
|
+
### `error`
|
|
215
|
+
|
|
216
|
+
An error occurred while processing a file change. The file was modified but the extract function failed, or the file could not be read.
|
|
217
|
+
|
|
218
|
+
::: code-group
|
|
219
|
+
|
|
220
|
+
```python [Python]
|
|
221
|
+
event.action # "error"
|
|
222
|
+
event.table # "comments" (or None if the error isn't tied to a table)
|
|
223
|
+
event.error # "Extract error: ..."
|
|
224
|
+
event.file_path # "comments/abc/index.json"
|
|
225
|
+
event.row # None
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
```rust [Rust]
|
|
229
|
+
// `table` is `Option<String>`: `Some("comments")` when the failing
|
|
230
|
+
// file matched a table's glob; `None` for errors that aren't tied
|
|
231
|
+
// to a specific table (e.g. a watch-channel failure).
|
|
232
|
+
RowEvent::Error {
|
|
233
|
+
table, // Some("comments")
|
|
234
|
+
file_path, // PathBuf, e.g. "comments/abc/index.json"
|
|
235
|
+
error, // "Extract error: ..."
|
|
236
|
+
} => { /* ... */ }
|
|
237
|
+
```
|
|
238
|
+
|
|
239
|
+
```typescript [TypeScript]
|
|
240
|
+
event.action // 'error'
|
|
241
|
+
event.table // 'comments' (or null if the error isn't tied to a table)
|
|
242
|
+
event.error // 'Extract error: ...'
|
|
243
|
+
event.filePath // 'comments/abc/index.json'
|
|
244
|
+
event.row // undefined
|
|
245
|
+
```
|
|
246
|
+
|
|
247
|
+
:::
|
|
248
|
+
|
|
249
|
+
## How diffing works
|
|
250
|
+
|
|
251
|
+
When a file changes, `dirsql`:
|
|
252
|
+
|
|
253
|
+
1. Re-reads the file and calls the extract function to get new rows
|
|
254
|
+
2. Compares new rows against the previously extracted rows for that file
|
|
255
|
+
3. Emits insert, update, and delete events based on the diff
|
|
256
|
+
4. Updates the in-memory database to reflect the new state
|
|
257
|
+
|
|
258
|
+
Row identity is determined by position (row index within the file). If a file previously produced 3 rows and now produces 2, the first two rows are compared for updates and the third is emitted as a delete.
|
|
259
|
+
|
|
260
|
+
## Filesystem events
|
|
261
|
+
|
|
262
|
+
Under the hood, `dirsql` uses the `notify` crate (inotify on Linux, FSEvents on macOS, ReadDirectoryChangesW on Windows) to receive filesystem events. Events are coalesced and filtered through the table matcher before being processed.
|
|
263
|
+
|
|
264
|
+
Files that do not match any table glob or that match an ignore pattern are silently skipped.
|
package/docs/index.md
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
---
|
|
2
|
+
canonical: https://thekevinscott.github.io/dirsql/
|
|
3
|
+
layout: home
|
|
4
|
+
hero:
|
|
5
|
+
name: dirsql
|
|
6
|
+
tagline: Ephemeral SQL index over a local directory. `dirsql` watches a filesystem, ingests structured files into an in-memory SQLite database, and exposes a SQL query interface. The filesystem is always the source of truth.
|
|
7
|
+
actions:
|
|
8
|
+
- theme: brand
|
|
9
|
+
text: Get Started
|
|
10
|
+
link: /getting-started
|
|
11
|
+
- theme: alt
|
|
12
|
+
text: GitHub
|
|
13
|
+
link: https://github.com/thekevinscott/dirsql
|
|
14
|
+
---
|
|
15
|
+
|
|
16
|
+
Structured data stored as flat files (JSON, CSV, markdown) is easy to read, write, diff, and version-control.
|
|
17
|
+
|
|
18
|
+
But querying across many files is slow.
|
|
19
|
+
|
|
20
|
+
"Show me all records matching X across 50 files" requires opening and parsing every file.
|
|
21
|
+
|
|
22
|
+
## Solution
|
|
23
|
+
|
|
24
|
+
`dirsql` bridges this gap. The filesystem remains the source of truth, but you get SQL queries and real-time change events for free. Define tables with glob patterns and extract functions, and `dirsql` handles the rest.
|
|
25
|
+
|
|
26
|
+
::: code-group
|
|
27
|
+
|
|
28
|
+
```python [Python]
|
|
29
|
+
from dirsql import DirSQL, Table
|
|
30
|
+
import json
|
|
31
|
+
|
|
32
|
+
db = DirSQL(
|
|
33
|
+
"./my-project",
|
|
34
|
+
tables=[
|
|
35
|
+
Table(
|
|
36
|
+
ddl="CREATE TABLE files (name TEXT, size INTEGER, type TEXT)",
|
|
37
|
+
glob="data/*.json",
|
|
38
|
+
extract=lambda path, content: [json.loads(content)],
|
|
39
|
+
),
|
|
40
|
+
],
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
# SQL queries over your filesystem
|
|
44
|
+
large = db.query("SELECT * FROM files WHERE size > 1000")
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
```rust [Rust]
|
|
48
|
+
use dirsql::{DirSQL, Table};
|
|
49
|
+
|
|
50
|
+
let db = DirSQL::new(
|
|
51
|
+
"./my-project",
|
|
52
|
+
vec![
|
|
53
|
+
Table::new(
|
|
54
|
+
"CREATE TABLE files (name TEXT, size INTEGER, type TEXT)",
|
|
55
|
+
"data/*.json",
|
|
56
|
+
|_path, content| vec![serde_json::from_str(content).unwrap()],
|
|
57
|
+
),
|
|
58
|
+
],
|
|
59
|
+
)?;
|
|
60
|
+
|
|
61
|
+
let large = db.query("SELECT * FROM files WHERE size > 1000")?;
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
```typescript [TypeScript]
|
|
65
|
+
import { DirSQL, Table } from 'dirsql';
|
|
66
|
+
|
|
67
|
+
const db = new DirSQL({
|
|
68
|
+
root: './my-project',
|
|
69
|
+
tables: [
|
|
70
|
+
new Table({
|
|
71
|
+
ddl: 'CREATE TABLE files (name TEXT, size INTEGER, type TEXT)',
|
|
72
|
+
glob: 'data/*.json',
|
|
73
|
+
extract: (_path, content) => [JSON.parse(content)],
|
|
74
|
+
}),
|
|
75
|
+
],
|
|
76
|
+
});
|
|
77
|
+
|
|
78
|
+
const large = await db.query('SELECT * FROM files WHERE size > 1000');
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
:::
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "dirsql",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.8",
|
|
4
4
|
"description": "Ephemeral SQL index over a local directory",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"repository": "https://github.com/thekevinscott/dirsql",
|
|
@@ -18,7 +18,7 @@
|
|
|
18
18
|
},
|
|
19
19
|
"files": [
|
|
20
20
|
"dist/",
|
|
21
|
-
"
|
|
21
|
+
"docs/",
|
|
22
22
|
"README.md",
|
|
23
23
|
"LICENSE"
|
|
24
24
|
],
|
|
@@ -33,8 +33,9 @@
|
|
|
33
33
|
"artifacts": "napi artifacts",
|
|
34
34
|
"build": "wireit",
|
|
35
35
|
"build:debug": "wireit",
|
|
36
|
-
"
|
|
37
|
-
"
|
|
36
|
+
"stage:platform": "wireit",
|
|
37
|
+
"prepack": "tsx tools/stageDocs.ts pre",
|
|
38
|
+
"postpack": "tsx tools/stageDocs.ts post",
|
|
38
39
|
"test": "wireit",
|
|
39
40
|
"test:unit": "wireit",
|
|
40
41
|
"test:integration": "wireit",
|
|
@@ -62,10 +63,29 @@
|
|
|
62
63
|
"dist/**"
|
|
63
64
|
]
|
|
64
65
|
},
|
|
66
|
+
"stage:platform": {
|
|
67
|
+
"command": "tsx tools/stagePlatform.ts",
|
|
68
|
+
"dependencies": [
|
|
69
|
+
"napi:build"
|
|
70
|
+
],
|
|
71
|
+
"files": [
|
|
72
|
+
"tools/stagePlatform.ts",
|
|
73
|
+
"ts/platforms.ts",
|
|
74
|
+
"src/**/*.rs",
|
|
75
|
+
"build.rs",
|
|
76
|
+
"Cargo.toml",
|
|
77
|
+
"../rust/src/**/*.rs",
|
|
78
|
+
"../rust/Cargo.toml"
|
|
79
|
+
],
|
|
80
|
+
"output": [
|
|
81
|
+
"build/**"
|
|
82
|
+
]
|
|
83
|
+
},
|
|
65
84
|
"build": {
|
|
66
85
|
"dependencies": [
|
|
67
86
|
"napi:build",
|
|
68
|
-
"tsc"
|
|
87
|
+
"tsc",
|
|
88
|
+
"stage:platform"
|
|
69
89
|
]
|
|
70
90
|
},
|
|
71
91
|
"build:debug": {
|
|
@@ -154,5 +174,16 @@
|
|
|
154
174
|
"esbuild"
|
|
155
175
|
]
|
|
156
176
|
},
|
|
157
|
-
"optionalDependencies": {
|
|
158
|
-
|
|
177
|
+
"optionalDependencies": {
|
|
178
|
+
"@dirsql/lib-linux-x64-gnu": "0.2.8",
|
|
179
|
+
"@dirsql/lib-linux-arm64-gnu": "0.2.8",
|
|
180
|
+
"@dirsql/lib-darwin-x64": "0.2.8",
|
|
181
|
+
"@dirsql/lib-darwin-arm64": "0.2.8",
|
|
182
|
+
"@dirsql/lib-win32-x64-msvc": "0.2.8",
|
|
183
|
+
"@dirsql/cli-linux-x64-gnu": "0.2.8",
|
|
184
|
+
"@dirsql/cli-linux-arm64-gnu": "0.2.8",
|
|
185
|
+
"@dirsql/cli-darwin-x64": "0.2.8",
|
|
186
|
+
"@dirsql/cli-darwin-arm64": "0.2.8",
|
|
187
|
+
"@dirsql/cli-win32-x64-msvc": "0.2.8"
|
|
188
|
+
}
|
|
189
|
+
}
|
package/dirsql.node
DELETED
|
Binary file
|