dirsql 0.0.1 → 0.0.99-test.1775729890

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/Cargo.toml DELETED
@@ -1,27 +0,0 @@
1
- [package]
2
- name = "dirsql"
3
- version = "0.1.0"
4
- edition = "2024"
5
- description = "Ephemeral SQL index over a local directory"
6
- license = "MIT"
7
- repository = "https://github.com/thekevinscott/dirsql"
8
- keywords = ["sql", "filesystem", "directory", "sqlite", "index"]
9
- categories = ["filesystem", "database"]
10
-
11
- [lib]
12
- crate-type = ["cdylib", "rlib"]
13
-
14
- [dependencies]
15
- globset = "0.4"
16
- notify = "7"
17
- pyo3 = { version = "0.28", features = ["extension-module"], optional = true }
18
- rusqlite = { version = "0.34", features = ["bundled"] }
19
- thiserror = "2"
20
- walkdir = "2"
21
-
22
- [features]
23
- default = []
24
- extension-module = ["pyo3/extension-module"]
25
-
26
- [dev-dependencies]
27
- tempfile = "3"
package/SUMMARY.md DELETED
@@ -1,62 +0,0 @@
1
- # dirsql
2
-
3
- Ephemeral SQL index over a local directory. Watches a filesystem, ingests structured files (JSONL, JSON, markdown with frontmatter, CSV), builds an in-memory SQLite database, and exposes a SQL query interface. On shutdown, the database is discarded -- the filesystem is always the source of truth.
4
-
5
- ## Language
6
-
7
- Rust.
8
-
9
- ## What it does
10
-
11
- 1. **Startup scan**: Walk a directory tree, parse structured files, populate SQLite tables
12
- 2. **File watching**: Monitor for changes (inotify/fswatch via `notify` crate), update the index incrementally
13
- 3. **Query interface**: Expose SQL queries over the indexed data (Unix socket, HTTP, or embedded library)
14
- 4. **Event emission**: Notify subscribers when files change (websocket or event stream)
15
- 5. **Ephemeral**: The SQLite database is in-memory or tmpfile. Discarded on shutdown. Rebuilt on next start.
16
-
17
- ## Why
18
-
19
- The problem: structured data stored as flat files on disk (JSONL, JSON, markdown) is easy for agents and humans to read/write, git-friendly, and portable. But querying across many files is slow -- "show me all unresolved comments across 50 documents" requires opening and parsing every file.
20
-
21
- dirsql bridges this: files remain the source of truth (readable, appendable, diffable), but you get SQL queries and change events for free.
22
-
23
- ## Motivating use case
24
-
25
- A writing assistant app stores documents as markdown on disk with comment threads as JSONL files in a recursive workspace structure:
26
-
27
- ```
28
- my-article/
29
- index.md
30
- _resources/
31
- source-1.md
32
- _comments/
33
- a1b1/
34
- index.jsonl # comment thread (append-only events)
35
- _resources/
36
- deep-dive.md
37
- _comments/ # comments on comments
38
- c3d4/
39
- index.jsonl
40
- ```
41
-
42
- The editor needs to:
43
- - Query "all unresolved comments in this workspace" without scanning every file
44
- - Get notified when an external agent appends to a thread or creates a new resource
45
- - Remain decoupled from any specific database -- dirsql is a dev dependency, not a data store
46
-
47
- ## Analogues
48
-
49
- - **Steampipe**: SQL over cloud APIs
50
- - **Osquery**: SQL over OS state
51
- - **Datafusion/DuckDB**: SQL over data files (Parquet, CSV)
52
-
53
- dirsql is this pattern applied to a local project directory with real-time file watching.
54
-
55
- ## Open questions (for scoping conversation)
56
-
57
- - Table schema inference: auto-detect from file structure, or require a config/schema file?
58
- - Query interface: HTTP API, Unix socket, embedded Rust library, all three?
59
- - Event protocol: websockets, SSE, or something simpler?
60
- - Scope of file format support: start with JSONL only, or JSON/CSV/markdown frontmatter from day one?
61
- - How to handle nested/recursive structures (the workspace pattern above)?
62
- - Performance targets: how large a directory tree should startup scan handle?
package/justfile DELETED
@@ -1,53 +0,0 @@
1
- # Run all lints
2
- lint:
3
- ruff check .
4
-
5
- # Check formatting
6
- format-check:
7
- ruff format --check .
8
-
9
- # Auto-format
10
- format:
11
- ruff format .
12
-
13
- # Fix lint issues
14
- fix:
15
- ruff check --fix .
16
- ruff format .
17
-
18
- # Run Python unit tests (colocated)
19
- test-unit:
20
- pytest python/ -x -q
21
-
22
- # Run integration tests
23
- test-integration:
24
- pytest tests/integration/ -x -q
25
-
26
- # Run e2e tests (local only, not CI)
27
- test-e2e:
28
- pytest tests/e2e/ -x -q
29
-
30
- # CI test target (unit + integration, no e2e)
31
- test-ci:
32
- pytest python/ tests/integration/ -x -q --tb=short 2>/dev/null || echo "No tests found yet"
33
-
34
- # Run Rust tests
35
- test-rust:
36
- cargo test
37
-
38
- # Run Rust clippy
39
- clippy:
40
- cargo clippy -- -D warnings
41
-
42
- # Run Rust format check
43
- fmt-check:
44
- cargo fmt -- --check
45
-
46
- # Full local CI
47
- ci:
48
- just lint
49
- just format-check
50
- just clippy
51
- just fmt-check
52
- just test-rust
53
- just test-ci
package/pyproject.toml DELETED
@@ -1,27 +0,0 @@
1
- [build-system]
2
- requires = ["maturin>=1.0,<2.0"]
3
- build-backend = "maturin"
4
-
5
- [project]
6
- name = "dirsql"
7
- version = "0.1.0"
8
- description = "Ephemeral SQL index over a local directory"
9
- license = "MIT"
10
- requires-python = ">=3.12"
11
- authors = [
12
- { name = "Kevin Scott" },
13
- ]
14
- repository = "https://github.com/thekevinscott/dirsql"
15
- keywords = ["sql", "filesystem", "directory", "sqlite", "index"]
16
-
17
- [project.optional-dependencies]
18
- dev = [
19
- "pytest>=8",
20
- "pytest-describe>=2",
21
- "pytest-asyncio>=0.23",
22
- "ruff>=0.4",
23
- "maturin>=1.0",
24
- ]
25
-
26
- [tool.maturin]
27
- features = ["pyo3/extension-module"]
package/src/db.rs DELETED
@@ -1,312 +0,0 @@
1
- use rusqlite::Connection;
2
- use std::collections::HashMap;
3
- use thiserror::Error;
4
-
5
- #[derive(Error, Debug)]
6
- pub enum DbError {
7
- #[error("SQLite error: {0}")]
8
- Sqlite(#[from] rusqlite::Error),
9
-
10
- #[error("Schema mismatch: {0}")]
11
- SchemaMismatch(String),
12
-
13
- #[error("DDL parse error: {0}")]
14
- DdlParse(String),
15
- }
16
-
17
- pub type Result<T> = std::result::Result<T, DbError>;
18
-
19
- pub struct Db {
20
- conn: Connection,
21
- }
22
-
23
- impl Db {
24
- pub fn new() -> Result<Self> {
25
- let conn = Connection::open_in_memory()?;
26
- Ok(Self { conn })
27
- }
28
-
29
- /// Create a table from a user-provided DDL statement.
30
- /// Automatically injects internal tracking columns (_dirsql_file_path, _dirsql_row_index).
31
- pub fn create_table(&self, ddl: &str) -> Result<()> {
32
- let augmented = inject_tracking_columns(ddl)?;
33
- self.conn.execute(&augmented, [])?;
34
- Ok(())
35
- }
36
-
37
- /// Insert a row into a table.
38
- /// `row` contains user-defined columns only. `file_path` and `row_index` are tracked internally.
39
- pub fn insert_row(
40
- &self,
41
- table: &str,
42
- row: &HashMap<String, Value>,
43
- file_path: &str,
44
- row_index: usize,
45
- ) -> Result<()> {
46
- let mut columns: Vec<String> = row.keys().cloned().collect();
47
- columns.push("_dirsql_file_path".to_string());
48
- columns.push("_dirsql_row_index".to_string());
49
-
50
- let placeholders: Vec<String> = (1..=columns.len()).map(|i| format!("?{}", i)).collect();
51
-
52
- let sql = format!(
53
- "INSERT INTO {} ({}) VALUES ({})",
54
- table,
55
- columns.join(", "),
56
- placeholders.join(", "),
57
- );
58
-
59
- let mut params: Vec<Box<dyn rusqlite::types::ToSql>> = row
60
- .values()
61
- .map(|v| Box::new(v.clone()) as Box<dyn rusqlite::types::ToSql>)
62
- .collect();
63
- params.push(Box::new(file_path.to_string()));
64
- params.push(Box::new(row_index as i64));
65
-
66
- let param_refs: Vec<&dyn rusqlite::types::ToSql> =
67
- params.iter().map(|p| p.as_ref()).collect();
68
- self.conn.execute(&sql, param_refs.as_slice())?;
69
- Ok(())
70
- }
71
-
72
- /// Delete all rows that were produced by a given file path.
73
- pub fn delete_rows_by_file(&self, table: &str, file_path: &str) -> Result<usize> {
74
- let sql = format!("DELETE FROM {} WHERE _dirsql_file_path = ?1", table);
75
- let count = self.conn.execute(&sql, [file_path])?;
76
- Ok(count)
77
- }
78
-
79
- /// Query the database, returning rows as a list of column-name -> value maps.
80
- /// Internal tracking columns (_dirsql_*) are excluded from results.
81
- pub fn query(&self, sql: &str) -> Result<Vec<HashMap<String, Value>>> {
82
- let mut stmt = self.conn.prepare(sql)?;
83
- let column_names: Vec<String> = stmt.column_names().iter().map(|s| s.to_string()).collect();
84
-
85
- let rows = stmt.query_map([], |row| {
86
- let mut map = HashMap::new();
87
- for (i, name) in column_names.iter().enumerate() {
88
- if name.starts_with("_dirsql_") {
89
- continue;
90
- }
91
- let val: rusqlite::types::Value = row.get(i)?;
92
- map.insert(name.clone(), Value::from(val));
93
- }
94
- Ok(map)
95
- })?;
96
-
97
- let mut results = Vec::new();
98
- for row in rows {
99
- results.push(row?);
100
- }
101
- Ok(results)
102
- }
103
- }
104
-
105
- /// Inject _dirsql_file_path and _dirsql_row_index columns into a CREATE TABLE DDL statement.
106
- fn inject_tracking_columns(ddl: &str) -> Result<String> {
107
- // Find the last closing paren in the DDL and insert our columns before it
108
- let close_paren = ddl
109
- .rfind(')')
110
- .ok_or_else(|| DbError::DdlParse("DDL must contain a closing parenthesis".to_string()))?;
111
-
112
- let before = &ddl[..close_paren];
113
- let after = &ddl[close_paren..];
114
-
115
- Ok(format!(
116
- "{}, _dirsql_file_path TEXT NOT NULL, _dirsql_row_index INTEGER NOT NULL{}",
117
- before, after
118
- ))
119
- }
120
-
121
- /// A value that can be stored in SQLite.
122
- #[derive(Debug, Clone, PartialEq)]
123
- pub enum Value {
124
- Null,
125
- Integer(i64),
126
- Real(f64),
127
- Text(String),
128
- Blob(Vec<u8>),
129
- }
130
-
131
- impl rusqlite::types::ToSql for Value {
132
- fn to_sql(&self) -> rusqlite::Result<rusqlite::types::ToSqlOutput<'_>> {
133
- match self {
134
- Value::Null => Ok(rusqlite::types::ToSqlOutput::Owned(
135
- rusqlite::types::Value::Null,
136
- )),
137
- Value::Integer(i) => Ok(rusqlite::types::ToSqlOutput::Owned(
138
- rusqlite::types::Value::Integer(*i),
139
- )),
140
- Value::Real(f) => Ok(rusqlite::types::ToSqlOutput::Owned(
141
- rusqlite::types::Value::Real(*f),
142
- )),
143
- Value::Text(s) => Ok(rusqlite::types::ToSqlOutput::Owned(
144
- rusqlite::types::Value::Text(s.clone()),
145
- )),
146
- Value::Blob(b) => Ok(rusqlite::types::ToSqlOutput::Owned(
147
- rusqlite::types::Value::Blob(b.clone()),
148
- )),
149
- }
150
- }
151
- }
152
-
153
- impl From<rusqlite::types::Value> for Value {
154
- fn from(v: rusqlite::types::Value) -> Self {
155
- match v {
156
- rusqlite::types::Value::Null => Value::Null,
157
- rusqlite::types::Value::Integer(i) => Value::Integer(i),
158
- rusqlite::types::Value::Real(f) => Value::Real(f),
159
- rusqlite::types::Value::Text(s) => Value::Text(s),
160
- rusqlite::types::Value::Blob(b) => Value::Blob(b),
161
- }
162
- }
163
- }
164
-
165
- #[cfg(test)]
166
- mod tests {
167
- use super::*;
168
-
169
- #[test]
170
- fn create_table_from_ddl() {
171
- let db = Db::new().unwrap();
172
- db.create_table("CREATE TABLE comments (id TEXT PRIMARY KEY, body TEXT, resolved INTEGER)")
173
- .unwrap();
174
-
175
- // Table should exist -- querying it should return empty results
176
- let rows = db.query("SELECT * FROM comments").unwrap();
177
- assert_eq!(rows.len(), 0);
178
- }
179
-
180
- #[test]
181
- fn create_table_invalid_ddl_returns_error() {
182
- let db = Db::new().unwrap();
183
- let result = db.create_table("NOT VALID SQL");
184
- assert!(result.is_err());
185
- }
186
-
187
- #[test]
188
- fn create_table_injects_tracking_columns() {
189
- let db = Db::new().unwrap();
190
- db.create_table("CREATE TABLE t (id TEXT)").unwrap();
191
-
192
- // The tracking columns should exist even though the user didn't declare them
193
- db.insert_row(
194
- "t",
195
- &HashMap::from([("id".into(), Value::Text("1".into()))]),
196
- "test.json",
197
- 0,
198
- )
199
- .unwrap();
200
-
201
- // SELECT * should NOT return tracking columns
202
- let rows = db.query("SELECT * FROM t").unwrap();
203
- assert_eq!(rows.len(), 1);
204
- assert!(rows[0].contains_key("id"));
205
- assert!(!rows[0].contains_key("_dirsql_file_path"));
206
- assert!(!rows[0].contains_key("_dirsql_row_index"));
207
- }
208
-
209
- #[test]
210
- fn insert_and_query_rows() {
211
- let db = Db::new().unwrap();
212
- db.create_table("CREATE TABLE docs (title TEXT, draft INTEGER)")
213
- .unwrap();
214
-
215
- let row = HashMap::from([
216
- ("title".into(), Value::Text("Hello".into())),
217
- ("draft".into(), Value::Integer(0)),
218
- ]);
219
- db.insert_row("docs", &row, "docs/hello.md", 0).unwrap();
220
-
221
- let results = db.query("SELECT title, draft FROM docs").unwrap();
222
- assert_eq!(results.len(), 1);
223
- assert_eq!(results[0]["title"], Value::Text("Hello".into()));
224
- assert_eq!(results[0]["draft"], Value::Integer(0));
225
- }
226
-
227
- #[test]
228
- fn insert_multiple_rows_from_same_file() {
229
- let db = Db::new().unwrap();
230
- db.create_table("CREATE TABLE events (action TEXT, ts INTEGER)")
231
- .unwrap();
232
-
233
- for (i, action) in ["created", "resolved", "reopened"].iter().enumerate() {
234
- let row = HashMap::from([
235
- ("action".into(), Value::Text(action.to_string())),
236
- ("ts".into(), Value::Integer(i as i64)),
237
- ]);
238
- db.insert_row("events", &row, "thread.jsonl", i).unwrap();
239
- }
240
-
241
- let results = db.query("SELECT action FROM events ORDER BY ts").unwrap();
242
- assert_eq!(results.len(), 3);
243
- assert_eq!(results[0]["action"], Value::Text("created".into()));
244
- assert_eq!(results[2]["action"], Value::Text("reopened".into()));
245
- }
246
-
247
- #[test]
248
- fn delete_rows_by_file_path() {
249
- let db = Db::new().unwrap();
250
- db.create_table("CREATE TABLE comments (id TEXT, body TEXT)")
251
- .unwrap();
252
-
253
- // Insert rows from two different files
254
- for (i, (id, file)) in [("1", "a.jsonl"), ("2", "a.jsonl"), ("3", "b.jsonl")]
255
- .iter()
256
- .enumerate()
257
- {
258
- let row = HashMap::from([
259
- ("id".into(), Value::Text(id.to_string())),
260
- ("body".into(), Value::Text("text".into())),
261
- ]);
262
- db.insert_row("comments", &row, file, i).unwrap();
263
- }
264
-
265
- // Delete rows from file "a.jsonl"
266
- let deleted = db.delete_rows_by_file("comments", "a.jsonl").unwrap();
267
- assert_eq!(deleted, 2);
268
-
269
- // Only file b's row remains
270
- let results = db.query("SELECT id FROM comments").unwrap();
271
- assert_eq!(results.len(), 1);
272
- assert_eq!(results[0]["id"], Value::Text("3".into()));
273
- }
274
-
275
- #[test]
276
- fn query_with_where_clause() {
277
- let db = Db::new().unwrap();
278
- db.create_table("CREATE TABLE items (name TEXT, count INTEGER)")
279
- .unwrap();
280
-
281
- for (i, (name, count)) in [("apple", 5), ("banana", 0), ("cherry", 3)]
282
- .iter()
283
- .enumerate()
284
- {
285
- let row = HashMap::from([
286
- ("name".into(), Value::Text(name.to_string())),
287
- ("count".into(), Value::Integer(*count)),
288
- ]);
289
- db.insert_row("items", &row, "items.json", i).unwrap();
290
- }
291
-
292
- let results = db
293
- .query("SELECT name FROM items WHERE count > 0 ORDER BY name")
294
- .unwrap();
295
- assert_eq!(results.len(), 2);
296
- assert_eq!(results[0]["name"], Value::Text("apple".into()));
297
- assert_eq!(results[1]["name"], Value::Text("cherry".into()));
298
- }
299
-
300
- #[test]
301
- fn inject_tracking_columns_modifies_ddl() {
302
- let result = inject_tracking_columns("CREATE TABLE t (id TEXT)").unwrap();
303
- assert!(result.contains("_dirsql_file_path TEXT NOT NULL"));
304
- assert!(result.contains("_dirsql_row_index INTEGER NOT NULL"));
305
- }
306
-
307
- #[test]
308
- fn inject_tracking_columns_rejects_missing_paren() {
309
- let result = inject_tracking_columns("NOT A CREATE TABLE");
310
- assert!(result.is_err());
311
- }
312
- }