dirsql 0.2.4 → 0.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,232 @@
1
+ ---
2
+ canonical: https://thekevinscott.github.io/dirsql/api/
3
+ ---
4
+
5
+ # API Reference
6
+
7
+ > Online: <https://thekevinscott.github.io/dirsql/api/>
8
+
9
+ ## DirSQL
10
+
11
+ ### Import
12
+
13
+ ::: code-group
14
+
15
+ ```python [Python]
16
+ from dirsql import DirSQL
17
+ ```
18
+
19
+ ```rust [Rust]
20
+ use dirsql::DirSQL;
21
+ ```
22
+
23
+ ```typescript [TypeScript]
24
+ import { DirSQL } from 'dirsql';
25
+ ```
26
+
27
+ :::
28
+
29
+ ### Constructor
30
+
31
+ ::: code-group
32
+
33
+ ```python [Python]
34
+ DirSQL(
35
+ root: str | None = None,
36
+ *,
37
+ tables: list[Table] | None = None,
38
+ ignore: list[str] | None = None,
39
+ config: str | None = None,
40
+ )
41
+ ```
42
+
43
+ ```rust [Rust]
44
+ DirSQL::builder()
45
+ .root(root) // optional
46
+ .tables(tables) // optional; append with .table(t)
47
+ .ignore(patterns) // optional
48
+ .config(config_toml_path) // optional
49
+ .build() // -> Result<DirSQL>
50
+ ```
51
+
52
+ ```typescript [TypeScript]
53
+ new DirSQL(configPath: string)
54
+ // or
55
+ new DirSQL({
56
+ root?: string,
57
+ tables?: TableDef[],
58
+ ignore?: string[],
59
+ config?: string,
60
+ })
61
+ ```
62
+
63
+ :::
64
+
65
+ Creates an in-memory SQLite index over the given directory. At least one of `root` or `config` must be supplied.
66
+
67
+ When both `root` and `config` are supplied -- or when `config` declares `[dirsql].root` -- the explicit `root` wins and a warning is emitted on stderr. A `[dirsql].root` declared in the config file is resolved relative to the config file's parent directory.
68
+
69
+ In Python, the constructor starts scanning in a background thread and returns immediately. Call `await db.ready()` before querying. In Rust, `.build()` scans synchronously; use `.build_async()` (via `AsyncDirSQL`) for the tokio-driven equivalent. In TypeScript, scanning starts immediately and `db.ready` resolves when the scan finishes.
70
+
71
+ **Parameters:**
72
+
73
+ - `root` -- Path to the directory to index. Optional if `config` is supplied.
74
+ - `tables` -- List of `Table` definitions. Each defines a SQLite table, a glob pattern, and an extract function.
75
+ - `ignore` -- Optional list of glob patterns. Files matching any ignore pattern are skipped regardless of table globs.
76
+ - `config` -- Optional path to a `.dirsql.toml` config file. Its `[[table]]` entries are appended to any programmatic `tables`; its `[dirsql].ignore` patterns are appended to any explicit `ignore`; its optional `[dirsql].root` supplies the root directory when `root` is not passed explicitly.
77
+
78
+ ### Methods
79
+
80
+ #### `ready`
81
+
82
+ ::: code-group
83
+
84
+ ```python [Python]
85
+ await db.ready() -> None
86
+ ```
87
+
88
+ ```rust [Rust]
89
+ db.ready().await -> Result<()>
90
+ ```
91
+
92
+ ```typescript [TypeScript]
93
+ await db.ready // awaitable property
94
+ ```
95
+
96
+ :::
97
+
98
+ Wait for the initial scan to complete. Re-raises any exception from the scan. Safe to call multiple times.
99
+
100
+ #### `query`
101
+
102
+ ::: code-group
103
+
104
+ ```python [Python]
105
+ await db.query(sql: str) -> list[dict]
106
+ ```
107
+
108
+ ```rust [Rust]
109
+ db.query(sql: &str) -> Result<Vec<HashMap<String, Value>>>
110
+ ```
111
+
112
+ ```typescript [TypeScript]
113
+ await db.query(sql: string): Promise<Record<string, unknown>[]>
114
+ ```
115
+
116
+ :::
117
+
118
+ Execute a SQL query against the in-memory database. Returns results keyed by column name. Internal tracking columns (`_dirsql_file_path`, `_dirsql_row_index`) are excluded from results.
119
+
120
+ #### `watch`
121
+
122
+ ::: code-group
123
+
124
+ ```python [Python]
125
+ async for event in db.watch(): # AsyncIterator[RowEvent]
126
+ ...
127
+ ```
128
+
129
+ ```rust [Rust]
130
+ let mut stream = db.watch(); // impl Stream<Item = RowEvent>
131
+ while let Some(event) = stream.next().await { ... }
132
+ ```
133
+
134
+ ```typescript [TypeScript]
135
+ for await (const event of db.watch()) { // AsyncIterable<RowEvent>
136
+ ...
137
+ }
138
+ ```
139
+
140
+ :::
141
+
142
+ Returns an async iterable of `RowEvent` objects. The file watcher starts automatically on first iteration. The iterator never terminates on its own.
143
+
144
+ ---
145
+
146
+ ## Table
147
+
148
+ ### Import
149
+
150
+ ::: code-group
151
+
152
+ ```python [Python]
153
+ from dirsql import Table
154
+ ```
155
+
156
+ ```rust [Rust]
157
+ use dirsql::Table;
158
+ ```
159
+
160
+ ```typescript [TypeScript]
161
+ import { Table } from 'dirsql';
162
+ ```
163
+
164
+ :::
165
+
166
+ ### Constructor
167
+
168
+ ::: code-group
169
+
170
+ ```python [Python]
171
+ Table(*, ddl: str, glob: str, extract: Callable[[str, str], list[dict]])
172
+ ```
173
+
174
+ ```rust [Rust]
175
+ Table::new(ddl: &str, glob: &str, extract: fn(&str, &str) -> Vec<Value>)
176
+ ```
177
+
178
+ ```typescript [TypeScript]
179
+ new Table({ ddl: string, glob: string, extract: (path: string, content: string) => Record<string, unknown>[] })
180
+ ```
181
+
182
+ :::
183
+
184
+ Defines a mapping from files to SQLite table rows.
185
+
186
+ **Parameters:**
187
+
188
+ - `ddl` -- A `CREATE TABLE` statement. The table name is parsed from this DDL.
189
+ - `glob` -- A glob pattern matched against file paths relative to the root directory.
190
+ - `extract` -- A callable `(path, content) -> list[dict]`. Receives the relative file path and file content as strings. Returns a list of dicts/maps mapping column names to values. Return an empty list to skip a file.
191
+
192
+ **Attributes:**
193
+
194
+ - `ddl` -- The DDL string (read-only).
195
+ - `glob` -- The glob pattern (read-only).
196
+
197
+ ---
198
+
199
+ ## RowEvent
200
+
201
+ ### Import
202
+
203
+ ::: code-group
204
+
205
+ ```python [Python]
206
+ from dirsql import RowEvent
207
+ ```
208
+
209
+ ```rust [Rust]
210
+ use dirsql::RowEvent;
211
+ ```
212
+
213
+ ```typescript [TypeScript]
214
+ import { RowEvent } from 'dirsql';
215
+ ```
216
+
217
+ :::
218
+
219
+ Emitted by the watch stream. Represents a change to a row in the database caused by a filesystem event.
220
+
221
+ **Attributes:**
222
+
223
+ | Attribute | Python | Rust | TypeScript |
224
+ |-----------|--------|------|------------|
225
+ | Table name | `table: str` | `table: String` | `table: string` |
226
+ | Action | `action: str` | `action: Action` | `action: string` |
227
+ | Current/new row | `row: dict \| None` | `row: Option<HashMap>` | `row?: Record` |
228
+ | Previous row | `old_row: dict \| None` | `old_row: Option<HashMap>` | `oldRow?: Record` |
229
+ | Error message | `error: str \| None` | `error: Option<String>` | `error?: string` |
230
+ | File path | `file_path: str \| None` | `file_path: Option<String>` | `filePath?: string` |
231
+
232
+ Action values: `"insert"`, `"update"`, `"delete"`, `"error"`.
@@ -0,0 +1,186 @@
1
+ ---
2
+ canonical: https://thekevinscott.github.io/dirsql/getting-started
3
+ ---
4
+
5
+ # Getting Started
6
+
7
+ > Online: <https://thekevinscott.github.io/dirsql/getting-started>
8
+
9
+ ## Installation
10
+
11
+ ::: code-group
12
+
13
+ ```bash [Python]
14
+ pip install dirsql
15
+ ```
16
+
17
+ ```bash [Rust]
18
+ cargo add dirsql
19
+ ```
20
+
21
+ ```bash [TypeScript]
22
+ pnpm add dirsql
23
+ ```
24
+
25
+ ```bash [CLI]
26
+ # Pick whichever install path you already have handy
27
+ npx dirsql --version
28
+ uvx dirsql --version
29
+ cargo install dirsql --features cli
30
+ ```
31
+
32
+ :::
33
+
34
+ See the [CLI guide](./guide/cli.md) for details on the command-line interface, and the [Rust library README](https://github.com/thekevinscott/dirsql/tree/main/packages/rust) for the library-vs-CLI feature split.
35
+
36
+ ## Quick start
37
+
38
+ Suppose you have a directory of JSON files representing blog posts:
39
+
40
+ ```
41
+ my-blog/
42
+ posts/
43
+ hello.json # {"title": "Hello World", "author": "alice"}
44
+ second.json # {"title": "Second Post", "author": "bob"}
45
+ authors/
46
+ alice.json # {"id": "alice", "name": "Alice"}
47
+ bob.json # {"id": "bob", "name": "Bob"}
48
+ ```
49
+
50
+ Index and query them with `dirsql`:
51
+
52
+ ::: code-group
53
+
54
+ ```python [Python]
55
+ import asyncio
56
+ import json
57
+ from dirsql import DirSQL, Table
58
+
59
+ async def main():
60
+ db = DirSQL(
61
+ "./my-blog",
62
+ tables=[
63
+ Table(
64
+ ddl="CREATE TABLE posts (title TEXT, author TEXT)",
65
+ glob="posts/*.json",
66
+ extract=lambda path, content: [json.loads(content)],
67
+ ),
68
+ Table(
69
+ ddl="CREATE TABLE authors (id TEXT, name TEXT)",
70
+ glob="authors/*.json",
71
+ extract=lambda path, content: [json.loads(content)],
72
+ ),
73
+ ],
74
+ )
75
+ await db.ready()
76
+
77
+ # Query all posts
78
+ posts = await db.query("SELECT * FROM posts")
79
+ # [{"title": "Hello World", "author": "alice"}, {"title": "Second Post", "author": "bob"}]
80
+
81
+ # Join across tables
82
+ results = await db.query("""
83
+ SELECT posts.title, authors.name
84
+ FROM posts
85
+ JOIN authors ON posts.author = authors.id
86
+ """)
87
+ # [{"title": "Hello World", "name": "Alice"}, {"title": "Second Post", "name": "Bob"}]
88
+
89
+ asyncio.run(main())
90
+ ```
91
+
92
+ ```rust [Rust]
93
+ use dirsql::{DirSQL, Table, Value};
94
+ use std::collections::HashMap;
95
+
96
+ // Convert a JSON object string into a dirsql row.
97
+ fn row_from_json(raw: &str) -> HashMap<String, Value> {
98
+ let v: serde_json::Value = serde_json::from_str(raw).unwrap();
99
+ let serde_json::Value::Object(obj) = v else { return HashMap::new() };
100
+ obj.into_iter()
101
+ .map(|(k, val)| {
102
+ let v = match val {
103
+ serde_json::Value::String(s) => Value::Text(s),
104
+ serde_json::Value::Number(n) => n
105
+ .as_i64()
106
+ .map(Value::Integer)
107
+ .unwrap_or_else(|| Value::Real(n.as_f64().unwrap_or(0.0))),
108
+ serde_json::Value::Bool(b) => Value::Integer(b as i64),
109
+ serde_json::Value::Null => Value::Null,
110
+ other => Value::Text(other.to_string()),
111
+ };
112
+ (k, v)
113
+ })
114
+ .collect()
115
+ }
116
+
117
+ let db = DirSQL::new(
118
+ "./my-blog",
119
+ vec![
120
+ Table::new(
121
+ "CREATE TABLE posts (title TEXT, author TEXT)",
122
+ "posts/*.json",
123
+ |_path, content| vec![row_from_json(content)],
124
+ ),
125
+ Table::new(
126
+ "CREATE TABLE authors (id TEXT, name TEXT)",
127
+ "authors/*.json",
128
+ |_path, content| vec![row_from_json(content)],
129
+ ),
130
+ ],
131
+ )?;
132
+
133
+ let posts = db.query("SELECT * FROM posts")?;
134
+
135
+ let results = db.query(
136
+ "SELECT posts.title, authors.name \
137
+ FROM posts JOIN authors ON posts.author = authors.id"
138
+ )?;
139
+ ```
140
+
141
+ ```typescript [TypeScript]
142
+ import { DirSQL, type TableDef } from 'dirsql';
143
+
144
+ const tables: TableDef[] = [
145
+ {
146
+ ddl: 'CREATE TABLE posts (title TEXT, author TEXT)',
147
+ glob: 'posts/*.json',
148
+ extract: (_path, content) => [JSON.parse(content)],
149
+ },
150
+ {
151
+ ddl: 'CREATE TABLE authors (id TEXT, name TEXT)',
152
+ glob: 'authors/*.json',
153
+ extract: (_path, content) => [JSON.parse(content)],
154
+ },
155
+ ];
156
+
157
+ const db = new DirSQL({ root: './my-blog', tables });
158
+
159
+ const posts = await db.query('SELECT * FROM posts');
160
+
161
+ const results = await db.query(`
162
+ SELECT posts.title, authors.name
163
+ FROM posts JOIN authors ON posts.author = authors.id
164
+ `);
165
+ ```
166
+
167
+ :::
168
+
169
+ ## What happens at startup
170
+
171
+ 1. `dirsql` walks the directory tree
172
+ 2. Files matching each table's glob pattern are read
173
+ 3. The `extract` function converts file content into rows
174
+ 4. Rows are inserted into an in-memory SQLite database
175
+ 5. SQL queries run against that database
176
+
177
+ The filesystem is always the source of truth. The database is rebuilt from files at startup.
178
+
179
+ ## Next steps
180
+
181
+ - [Defining Tables](./guide/tables.md) -- DDL, globs, and extract functions in detail
182
+ - [Querying](./guide/querying.md) -- SQL queries and return format
183
+ - [File Watching](./guide/watching.md) -- real-time change events
184
+ - [Async API](./guide/async.md) -- async ready(), query(), and watch()
185
+ - [Command-Line Interface](./guide/cli.md) -- `dirsql` runs an HTTP server (`POST /query`, `GET /events` SSE)
186
+ - [Collaboration with CRDTs](./guide/crdt.md) -- multi-writer document merging alongside `dirsql`
@@ -0,0 +1,224 @@
1
+ ---
2
+ canonical: https://thekevinscott.github.io/dirsql/guide/async
3
+ ---
4
+
5
+ # Async API
6
+
7
+ > Online: <https://thekevinscott.github.io/dirsql/guide/async>
8
+
9
+ `DirSQL` is async by default in Python. The initial directory scan runs in a background thread so it does not block the event loop.
10
+
11
+ ## Basic usage
12
+
13
+ ::: code-group
14
+
15
+ ```python [Python]
16
+ import asyncio
17
+ import json
18
+ from dirsql import DirSQL, Table
19
+
20
+ async def main():
21
+ db = DirSQL(
22
+ "./my-project",
23
+ tables=[
24
+ Table(
25
+ ddl="CREATE TABLE items (name TEXT, value INTEGER)",
26
+ glob="data/*.json",
27
+ extract=lambda path, content: [json.loads(content)],
28
+ ),
29
+ ],
30
+ )
31
+
32
+ # Query (runs in a thread, does not block the event loop)
33
+ results = await db.query("SELECT * FROM items WHERE value > 10")
34
+ print(results)
35
+
36
+ asyncio.run(main())
37
+ ```
38
+
39
+ ```rust [Rust]
40
+ use dirsql::{DirSQL, Table};
41
+
42
+ #[tokio::main]
43
+ async fn main() -> Result<(), Box<dyn std::error::Error>> {
44
+ let db = DirSQL::new(
45
+ "./my-project",
46
+ vec![
47
+ Table::new(
48
+ "CREATE TABLE items (name TEXT, value INTEGER)",
49
+ "data/*.json",
50
+ |_path, content| vec![serde_json::from_str(content).unwrap()],
51
+ ),
52
+ ],
53
+ )?;
54
+
55
+ let results = db.query("SELECT * FROM items WHERE value > 10")?;
56
+ println!("{:?}", results);
57
+ Ok(())
58
+ }
59
+ ```
60
+
61
+ ```typescript [TypeScript]
62
+ import { DirSQL, Table } from 'dirsql';
63
+
64
+ const db = new DirSQL({
65
+ root: './my-project',
66
+ tables: [
67
+ new Table({
68
+ ddl: 'CREATE TABLE items (name TEXT, value INTEGER)',
69
+ glob: 'data/*.json',
70
+ extract: (_path, content) => [JSON.parse(content)],
71
+ }),
72
+ ],
73
+ });
74
+
75
+ // Query
76
+ const results = await db.query('SELECT * FROM items WHERE value > 10');
77
+ console.log(results);
78
+ ```
79
+
80
+ :::
81
+
82
+ ## Constructor
83
+
84
+ ```python
85
+ DirSQL(root=None, *, tables=None, ignore=None, config=None)
86
+ ```
87
+
88
+ The constructor immediately starts scanning in a background thread via `asyncio.ensure_future`. The constructor itself returns immediately without blocking.
89
+
90
+ ## `await db.ready()`
91
+
92
+ Waits until the initial directory scan is complete. If the scan raised an exception (invalid DDL, unreadable files, etc.), `ready()` re-raises it.
93
+
94
+ `ready()` can be called multiple times safely. After the first completion, subsequent calls return immediately.
95
+
96
+ ```python
97
+ db = DirSQL("./data", tables=[...])
98
+
99
+ # Do other setup work here while the scan runs in the background
100
+ setup_logging()
101
+ connect_websocket()
102
+
103
+ # Now wait for the scan to finish before querying
104
+ await db.ready()
105
+ ```
106
+
107
+ ## `await db.query(sql)`
108
+
109
+ Runs a SQL query in a background thread. Returns a list of dicts keyed by column name.
110
+
111
+ ```python
112
+ results = await db.query("SELECT COUNT(*) as n FROM items")
113
+ ```
114
+
115
+ ## `async for event in db.watch()`
116
+
117
+ Returns an async iterable of `RowEvent` objects. The watcher is started automatically on the first iteration.
118
+
119
+ ::: code-group
120
+
121
+ ```python [Python]
122
+ async for event in db.watch():
123
+ if event.action == "insert":
124
+ print(f"New row in {event.table}: {event.row}")
125
+ elif event.action == "update":
126
+ print(f"Updated row in {event.table}: {event.row}")
127
+ elif event.action == "delete":
128
+ print(f"Deleted row from {event.table}: {event.row}")
129
+ elif event.action == "error":
130
+ print(f"Error: {event.error}")
131
+ ```
132
+
133
+ ```rust [Rust]
134
+ use futures::StreamExt;
135
+
136
+ let mut stream = db.watch();
137
+ while let Some(event) = stream.next().await {
138
+ match event.action {
139
+ Action::Insert => println!("New row in {}: {:?}", event.table, event.row),
140
+ Action::Update => println!("Updated row in {}: {:?}", event.table, event.row),
141
+ Action::Delete => println!("Deleted row from {}: {:?}", event.table, event.row),
142
+ Action::Error => eprintln!("Error: {:?}", event.error),
143
+ }
144
+ }
145
+ ```
146
+
147
+ ```typescript [TypeScript]
148
+ for await (const event of db.watch()) {
149
+ switch (event.action) {
150
+ case 'insert':
151
+ console.log(`New row in ${event.table}:`, event.row);
152
+ break;
153
+ case 'update':
154
+ console.log(`Updated row in ${event.table}:`, event.row);
155
+ break;
156
+ case 'delete':
157
+ console.log(`Deleted row from ${event.table}:`, event.row);
158
+ break;
159
+ case 'error':
160
+ console.error(`Error: ${event.error}`);
161
+ break;
162
+ }
163
+ }
164
+ ```
165
+
166
+ :::
167
+
168
+ The async iterator polls for filesystem events with a 200ms timeout internally. It yields events as they arrive and never terminates on its own -- use `break` or cancellation to stop.
169
+
170
+ ## Combining with other async code
171
+
172
+ The async API works alongside other concurrent code without blocking:
173
+
174
+ ::: code-group
175
+
176
+ ```python [Python]
177
+ async def watch_and_serve(db):
178
+ async for event in db.watch():
179
+ await notify_clients(event)
180
+
181
+ async def main():
182
+ db = DirSQL("./data", tables=[...])
183
+ await asyncio.gather(
184
+ watch_and_serve(db),
185
+ run_web_server(),
186
+ )
187
+ ```
188
+
189
+ ```rust [Rust]
190
+ async fn watch_and_serve(db: &DirSQL) {
191
+ let mut stream = db.watch();
192
+ while let Some(event) = stream.next().await {
193
+ notify_clients(&event).await;
194
+ }
195
+ }
196
+
197
+ #[tokio::main]
198
+ async fn main() -> Result<(), Box<dyn std::error::Error>> {
199
+ let db = DirSQL::new("./data", vec![...])?;
200
+
201
+ tokio::join!(
202
+ watch_and_serve(&db),
203
+ run_web_server(),
204
+ );
205
+ Ok(())
206
+ }
207
+ ```
208
+
209
+ ```typescript [TypeScript]
210
+ async function watchAndServe(db: DirSQL) {
211
+ for await (const event of db.watch()) {
212
+ await notifyClients(event);
213
+ }
214
+ }
215
+
216
+ const db = new DirSQL({ root: './data', tables: [/* tables */] });
217
+
218
+ await Promise.all([
219
+ watchAndServe(db),
220
+ runWebServer(),
221
+ ]);
222
+ ```
223
+
224
+ :::