dirsql 0.0.11__tar.gz → 0.0.13__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. {dirsql-0.0.11 → dirsql-0.0.13}/.github/workflows/python-test.yml +6 -0
  2. dirsql-0.0.13/.npmignore +23 -0
  3. {dirsql-0.0.11 → dirsql-0.0.13}/PKG-INFO +1 -1
  4. {dirsql-0.0.11 → dirsql-0.0.13}/pyproject.toml +12 -1
  5. dirsql-0.0.13/src/lib.rs +291 -0
  6. dirsql-0.0.13/tests/__init__.py +0 -0
  7. dirsql-0.0.13/tests/conftest.py +38 -0
  8. dirsql-0.0.13/tests/integration/__init__.py +0 -0
  9. dirsql-0.0.13/tests/integration/test_dirsql.py +346 -0
  10. dirsql-0.0.11/index.js +0 -5
  11. dirsql-0.0.11/justfile +0 -53
  12. dirsql-0.0.11/package.json +0 -8
  13. dirsql-0.0.11/src/lib.rs +0 -15
  14. {dirsql-0.0.11 → dirsql-0.0.13}/.claude/CLAUDE.md +0 -0
  15. {dirsql-0.0.11 → dirsql-0.0.13}/.github/workflows/minor-release.yml +0 -0
  16. {dirsql-0.0.11 → dirsql-0.0.13}/.github/workflows/patch-release.yml +0 -0
  17. {dirsql-0.0.11 → dirsql-0.0.13}/.github/workflows/pr-monitor.yml +0 -0
  18. {dirsql-0.0.11 → dirsql-0.0.13}/.github/workflows/publish.yml +0 -0
  19. {dirsql-0.0.11 → dirsql-0.0.13}/.github/workflows/python-lint.yml +0 -0
  20. {dirsql-0.0.11 → dirsql-0.0.13}/.github/workflows/rust-test.yml +0 -0
  21. {dirsql-0.0.11 → dirsql-0.0.13}/.gitignore +0 -0
  22. {dirsql-0.0.11 → dirsql-0.0.13}/Cargo.lock +0 -0
  23. {dirsql-0.0.11 → dirsql-0.0.13}/Cargo.toml +0 -0
  24. {dirsql-0.0.11 → dirsql-0.0.13}/LICENSE +0 -0
  25. {dirsql-0.0.11 → dirsql-0.0.13}/SUMMARY.md +0 -0
  26. {dirsql-0.0.11 → dirsql-0.0.13}/src/db.rs +0 -0
  27. {dirsql-0.0.11 → dirsql-0.0.13}/src/differ.rs +0 -0
  28. {dirsql-0.0.11 → dirsql-0.0.13}/src/matcher.rs +0 -0
  29. {dirsql-0.0.11 → dirsql-0.0.13}/src/scanner.rs +0 -0
  30. {dirsql-0.0.11 → dirsql-0.0.13}/src/watcher.rs +0 -0
@@ -5,13 +5,19 @@ on:
5
5
  branches: [main]
6
6
  paths:
7
7
  - '**.py'
8
+ - '**.rs'
8
9
  - 'pyproject.toml'
10
+ - 'Cargo.toml'
11
+ - 'Cargo.lock'
9
12
  - 'uv.lock'
10
13
  - 'tests/**'
11
14
  pull_request:
12
15
  paths:
13
16
  - '**.py'
17
+ - '**.rs'
14
18
  - 'pyproject.toml'
19
+ - 'Cargo.toml'
20
+ - 'Cargo.lock'
15
21
  - 'uv.lock'
16
22
  - 'tests/**'
17
23
 
@@ -0,0 +1,23 @@
1
+ # Source and build artifacts
2
+ src/
3
+ target/
4
+ Cargo.*
5
+ *.rs
6
+ *.toml
7
+ *.lock
8
+
9
+ # Dev/CI infrastructure
10
+ .claude/
11
+ .github/
12
+ tests/
13
+ python/
14
+ .beads
15
+ .dolt/
16
+
17
+ # Docs and notes
18
+ AGENTS.md
19
+ SUMMARY.md
20
+ notes/
21
+
22
+ # Build tools
23
+ justfile
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dirsql
3
- Version: 0.0.11
3
+ Version: 0.0.13
4
4
  Requires-Dist: pytest>=8 ; extra == 'dev'
5
5
  Requires-Dist: pytest-describe>=2 ; extra == 'dev'
6
6
  Requires-Dist: pytest-asyncio>=0.23 ; extra == 'dev'
@@ -4,7 +4,7 @@ build-backend = "maturin"
4
4
 
5
5
  [project]
6
6
  name = "dirsql"
7
- version = "0.0.11"
7
+ version = "0.0.13"
8
8
  description = "Ephemeral SQL index over a local directory"
9
9
  license = "MIT"
10
10
  requires-python = ">=3.12"
@@ -25,3 +25,14 @@ dev = [
25
25
 
26
26
  [tool.maturin]
27
27
  features = ["pyo3/extension-module"]
28
+ exclude = [
29
+ ".github/",
30
+ ".claude/",
31
+ "tests/",
32
+ "notes/",
33
+ "index.js",
34
+ "package.json",
35
+ ".beads",
36
+ "AGENTS.md",
37
+ "justfile",
38
+ ]
@@ -0,0 +1,291 @@
1
+ pub mod db;
2
+ pub mod differ;
3
+ pub mod matcher;
4
+ pub mod scanner;
5
+ pub mod watcher;
6
+
7
+ /// Extract the table name from a CREATE TABLE DDL statement.
8
+ /// Handles: CREATE TABLE name (...), CREATE TABLE IF NOT EXISTS name (...)
9
+ pub fn parse_table_name(ddl: &str) -> Option<String> {
10
+ let upper = ddl.to_uppercase();
11
+ let idx = upper.find("CREATE TABLE")?;
12
+ let rest = &ddl[idx + "CREATE TABLE".len()..].trim_start();
13
+
14
+ // Skip optional "IF NOT EXISTS"
15
+ let rest = if rest.to_uppercase().starts_with("IF NOT EXISTS") {
16
+ rest["IF NOT EXISTS".len()..].trim_start()
17
+ } else {
18
+ rest
19
+ };
20
+
21
+ // Table name is everything up to the first whitespace or '('
22
+ let name: String = rest
23
+ .chars()
24
+ .take_while(|c| !c.is_whitespace() && *c != '(')
25
+ .collect();
26
+
27
+ if name.is_empty() { None } else { Some(name) }
28
+ }
29
+
30
+ #[cfg(feature = "extension-module")]
31
+ mod python {
32
+ use crate::db::{Db, Value};
33
+ use crate::matcher::TableMatcher;
34
+ use crate::parse_table_name;
35
+ use crate::scanner::scan_directory;
36
+ use pyo3::exceptions::PyRuntimeError;
37
+ use pyo3::prelude::*;
38
+ use pyo3::types::{PyDict, PyList};
39
+ use std::collections::HashMap;
40
+ use std::path::Path;
41
+ use std::sync::Mutex;
42
+
43
+ /// A table definition for DirSQL.
44
+ #[pyclass(name = "Table", frozen)]
45
+ struct PyTable {
46
+ #[pyo3(get)]
47
+ ddl: String,
48
+ #[pyo3(get)]
49
+ glob: String,
50
+ extract: Py<PyAny>,
51
+ }
52
+
53
+ #[pymethods]
54
+ impl PyTable {
55
+ #[new]
56
+ #[pyo3(signature = (*, ddl, glob, extract))]
57
+ fn new(ddl: String, glob: String, extract: Py<PyAny>) -> Self {
58
+ PyTable { ddl, glob, extract }
59
+ }
60
+ }
61
+
62
+ /// The main DirSQL class. Creates an in-memory SQLite index over a directory.
63
+ #[pyclass(name = "DirSQL")]
64
+ struct PyDirSQL {
65
+ db: Mutex<Db>,
66
+ }
67
+
68
+ #[pymethods]
69
+ impl PyDirSQL {
70
+ #[new]
71
+ #[pyo3(signature = (root, *, tables, ignore=None))]
72
+ fn new(
73
+ py: Python<'_>,
74
+ root: String,
75
+ tables: Vec<PyRef<'_, PyTable>>,
76
+ ignore: Option<Vec<String>>,
77
+ ) -> PyResult<Self> {
78
+ let db =
79
+ Db::new().map_err(|e| PyRuntimeError::new_err(format!("DB init error: {}", e)))?;
80
+
81
+ // Parse table names from DDLs and create tables
82
+ let mut table_configs: Vec<(String, String, Py<PyAny>)> = Vec::new();
83
+ for t in &tables {
84
+ let table_name = parse_table_name(&t.ddl).ok_or_else(|| {
85
+ PyRuntimeError::new_err(format!(
86
+ "Could not parse table name from DDL: {}",
87
+ t.ddl
88
+ ))
89
+ })?;
90
+ db.create_table(&t.ddl)
91
+ .map_err(|e| PyRuntimeError::new_err(format!("DDL error: {}", e)))?;
92
+ table_configs.push((table_name, t.glob.clone(), t.extract.clone_ref(py)));
93
+ }
94
+
95
+ // Build glob -> table_name mappings for the scanner
96
+ let mappings: Vec<(&str, &str)> = table_configs
97
+ .iter()
98
+ .map(|(name, glob, _extract): &(String, String, Py<PyAny>)| {
99
+ (glob.as_str(), name.as_str())
100
+ })
101
+ .collect();
102
+ let ignore_patterns: Vec<&str> = ignore
103
+ .as_ref()
104
+ .map(|v| v.iter().map(|s| s.as_str()).collect())
105
+ .unwrap_or_default();
106
+
107
+ let matcher = TableMatcher::new(&mappings, &ignore_patterns)
108
+ .map_err(|e| PyRuntimeError::new_err(format!("Glob error: {}", e)))?;
109
+
110
+ // Scan directory
111
+ let root_path = Path::new(&root);
112
+ let files = scan_directory(root_path, &matcher);
113
+
114
+ // Build a lookup from table_name -> extract callable
115
+ let extract_map: HashMap<String, Py<PyAny>> = table_configs
116
+ .iter()
117
+ .map(|(name, _glob, extract): &(String, String, Py<PyAny>)| {
118
+ (name.clone(), extract.clone_ref(py))
119
+ })
120
+ .collect();
121
+
122
+ // Process each file
123
+ for (file_path, table_name) in &files {
124
+ // Read file content
125
+ let content = std::fs::read_to_string(file_path).map_err(|e| {
126
+ PyRuntimeError::new_err(format!(
127
+ "Failed to read {}: {}",
128
+ file_path.display(),
129
+ e
130
+ ))
131
+ })?;
132
+
133
+ // Compute relative path
134
+ let rel_path = file_path
135
+ .strip_prefix(root_path)
136
+ .unwrap_or(file_path)
137
+ .to_string_lossy()
138
+ .to_string();
139
+
140
+ // Call extract
141
+ let extract_fn = extract_map.get(table_name).ok_or_else(|| {
142
+ PyRuntimeError::new_err(format!("No extract function for table {}", table_name))
143
+ })?;
144
+
145
+ let result = extract_fn.call1(py, (rel_path.clone(), content))?;
146
+ let rows: Vec<HashMap<String, Py<PyAny>>> = result.extract(py)?;
147
+
148
+ // Insert rows
149
+ for (row_index, py_row) in rows.iter().enumerate() {
150
+ let row = convert_py_row(py, py_row)?;
151
+ db.insert_row(table_name, &row, &rel_path, row_index)
152
+ .map_err(|e| PyRuntimeError::new_err(format!("Insert error: {}", e)))?;
153
+ }
154
+ }
155
+
156
+ Ok(PyDirSQL { db: Mutex::new(db) })
157
+ }
158
+
159
+ /// Execute a SQL query and return results as a list of dicts.
160
+ fn query(&self, py: Python<'_>, sql: &str) -> PyResult<Py<PyList>> {
161
+ let db = self
162
+ .db
163
+ .lock()
164
+ .map_err(|e| PyRuntimeError::new_err(format!("Lock error: {}", e)))?;
165
+ let rows = db
166
+ .query(sql)
167
+ .map_err(|e| PyRuntimeError::new_err(format!("Query error: {}", e)))?;
168
+
169
+ let result = PyList::empty(py);
170
+ for row in &rows {
171
+ let dict = PyDict::new(py);
172
+ for (key, value) in row {
173
+ let py_val = value_to_py(py, value);
174
+ dict.set_item(key, py_val)?;
175
+ }
176
+ result.append(dict)?;
177
+ }
178
+ Ok(result.unbind())
179
+ }
180
+ }
181
+
182
+ /// Convert a Python dict row to a Rust HashMap<String, Value>.
183
+ fn convert_py_row(
184
+ py: Python<'_>,
185
+ py_row: &HashMap<String, Py<PyAny>>,
186
+ ) -> PyResult<HashMap<String, Value>> {
187
+ let mut row: HashMap<String, Value> = HashMap::new();
188
+ for (key, val) in py_row {
189
+ let value = py_to_value(py, val)?;
190
+ row.insert(key.clone(), value);
191
+ }
192
+ Ok(row)
193
+ }
194
+
195
+ /// Convert a Python object to a db::Value.
196
+ fn py_to_value(py: Python<'_>, obj: &Py<PyAny>) -> PyResult<Value> {
197
+ let bound = obj.bind(py);
198
+
199
+ if bound.is_none() {
200
+ return Ok(Value::Null);
201
+ }
202
+
203
+ // Try bool first (before int, since bool is subclass of int in Python)
204
+ if bound.is_instance_of::<pyo3::types::PyBool>() {
205
+ let b: bool = bound.extract()?;
206
+ return Ok(Value::Integer(if b { 1 } else { 0 }));
207
+ }
208
+
209
+ // Try integer
210
+ if let Ok(i) = bound.extract::<i64>() {
211
+ return Ok(Value::Integer(i));
212
+ }
213
+
214
+ // Try float
215
+ if let Ok(f) = bound.extract::<f64>() {
216
+ return Ok(Value::Real(f));
217
+ }
218
+
219
+ // Try string
220
+ if let Ok(s) = bound.extract::<String>() {
221
+ return Ok(Value::Text(s));
222
+ }
223
+
224
+ // Try bytes
225
+ if let Ok(b) = bound.extract::<Vec<u8>>() {
226
+ return Ok(Value::Blob(b));
227
+ }
228
+
229
+ // Fall back to string representation
230
+ let s = bound.str()?.to_string();
231
+ Ok(Value::Text(s))
232
+ }
233
+
234
+ /// Convert a db::Value to a Python object.
235
+ fn value_to_py(py: Python<'_>, value: &Value) -> Py<PyAny> {
236
+ match value {
237
+ Value::Null => py.None(),
238
+ Value::Integer(i) => i.into_pyobject(py).unwrap().into_any().unbind(),
239
+ Value::Real(f) => f.into_pyobject(py).unwrap().into_any().unbind(),
240
+ Value::Text(s) => s.into_pyobject(py).unwrap().into_any().unbind(),
241
+ Value::Blob(b) => b.into_pyobject(py).unwrap().unbind(),
242
+ }
243
+ }
244
+
245
+ #[pymodule]
246
+ fn dirsql(m: &Bound<'_, PyModule>) -> PyResult<()> {
247
+ m.add("__version__", env!("CARGO_PKG_VERSION"))?;
248
+ m.add_class::<PyTable>()?;
249
+ m.add_class::<PyDirSQL>()?;
250
+ Ok(())
251
+ }
252
+ }
253
+
254
+ #[cfg(test)]
255
+ mod tests {
256
+ use super::*;
257
+
258
+ #[test]
259
+ fn parse_table_name_simple() {
260
+ assert_eq!(
261
+ parse_table_name("CREATE TABLE comments (id TEXT)"),
262
+ Some("comments".to_string())
263
+ );
264
+ }
265
+
266
+ #[test]
267
+ fn parse_table_name_if_not_exists() {
268
+ assert_eq!(
269
+ parse_table_name("CREATE TABLE IF NOT EXISTS comments (id TEXT)"),
270
+ Some("comments".to_string())
271
+ );
272
+ }
273
+
274
+ #[test]
275
+ fn parse_table_name_no_space_before_paren() {
276
+ assert_eq!(
277
+ parse_table_name("CREATE TABLE t(id TEXT)"),
278
+ Some("t".to_string())
279
+ );
280
+ }
281
+
282
+ #[test]
283
+ fn parse_table_name_invalid() {
284
+ assert_eq!(parse_table_name("NOT A DDL"), None);
285
+ }
286
+
287
+ #[test]
288
+ fn parse_table_name_empty_after_create_table() {
289
+ assert_eq!(parse_table_name("CREATE TABLE "), None);
290
+ }
291
+ }
File without changes
@@ -0,0 +1,38 @@
1
+ import json
2
+ import os
3
+ import tempfile
4
+
5
+ import pytest
6
+
7
+
8
+ @pytest.fixture
9
+ def tmp_dir():
10
+ """Create a temporary directory for test files."""
11
+ with tempfile.TemporaryDirectory() as d:
12
+ yield d
13
+
14
+
15
+ @pytest.fixture
16
+ def jsonl_dir(tmp_dir):
17
+ """Create a temp dir with JSONL files for testing."""
18
+ # Create a simple JSONL file
19
+ os.makedirs(os.path.join(tmp_dir, "comments", "abc"), exist_ok=True)
20
+ os.makedirs(os.path.join(tmp_dir, "comments", "def"), exist_ok=True)
21
+
22
+ with open(os.path.join(tmp_dir, "comments", "abc", "index.jsonl"), "w") as f:
23
+ f.write(json.dumps({"body": "first comment", "author": "alice"}) + "\n")
24
+ f.write(json.dumps({"body": "second comment", "author": "bob"}) + "\n")
25
+
26
+ with open(os.path.join(tmp_dir, "comments", "def", "index.jsonl"), "w") as f:
27
+ f.write(json.dumps({"body": "another comment", "author": "carol"}) + "\n")
28
+
29
+ return tmp_dir
30
+
31
+
32
+ @pytest.fixture
33
+ def csv_dir(tmp_dir):
34
+ """Create a temp dir with a CSV-like file for testing single-row extraction."""
35
+ with open(os.path.join(tmp_dir, "metadata.json"), "w") as f:
36
+ json.dump({"title": "My Project", "version": "1.0"}, f)
37
+
38
+ return tmp_dir
File without changes
@@ -0,0 +1,346 @@
1
+ """Integration tests for the DirSQL Python SDK."""
2
+
3
+ import json
4
+ import os
5
+
6
+ import pytest
7
+
8
+ from dirsql import DirSQL, Table
9
+
10
+
11
+ def describe_DirSQL():
12
+ def describe_init():
13
+ def it_creates_instance_with_tables(jsonl_dir):
14
+ """DirSQL can be initialized with a root path and table definitions."""
15
+ db = DirSQL(
16
+ jsonl_dir,
17
+ tables=[
18
+ Table(
19
+ ddl="CREATE TABLE comments (id TEXT, body TEXT, author TEXT)",
20
+ glob="comments/**/index.jsonl",
21
+ extract=lambda path, content: [
22
+ {
23
+ "id": os.path.basename(os.path.dirname(path)),
24
+ "body": row["body"],
25
+ "author": row["author"],
26
+ }
27
+ for line in content.splitlines()
28
+ for row in [json.loads(line)]
29
+ ],
30
+ ),
31
+ ],
32
+ )
33
+ assert db is not None
34
+
35
+ def it_accepts_ignore_patterns(jsonl_dir):
36
+ """DirSQL accepts an ignore list to skip matching paths."""
37
+ db = DirSQL(
38
+ jsonl_dir,
39
+ ignore=["**/def/**"],
40
+ tables=[
41
+ Table(
42
+ ddl="CREATE TABLE comments (id TEXT, body TEXT, author TEXT)",
43
+ glob="comments/**/index.jsonl",
44
+ extract=lambda path, content: [
45
+ {
46
+ "id": os.path.basename(os.path.dirname(path)),
47
+ "body": row["body"],
48
+ "author": row["author"],
49
+ }
50
+ for line in content.splitlines()
51
+ for row in [json.loads(line)]
52
+ ],
53
+ ),
54
+ ],
55
+ )
56
+ # Only the "abc" directory should be indexed, not "def"
57
+ results = db.query("SELECT DISTINCT id FROM comments")
58
+ ids = {r["id"] for r in results}
59
+ assert ids == {"abc"}
60
+
61
+ def describe_query():
62
+ def it_returns_all_rows(jsonl_dir):
63
+ """query returns all indexed rows when no WHERE clause."""
64
+ db = DirSQL(
65
+ jsonl_dir,
66
+ tables=[
67
+ Table(
68
+ ddl="CREATE TABLE comments (id TEXT, body TEXT, author TEXT)",
69
+ glob="comments/**/index.jsonl",
70
+ extract=lambda path, content: [
71
+ {
72
+ "id": os.path.basename(os.path.dirname(path)),
73
+ "body": row["body"],
74
+ "author": row["author"],
75
+ }
76
+ for line in content.splitlines()
77
+ for row in [json.loads(line)]
78
+ ],
79
+ ),
80
+ ],
81
+ )
82
+ results = db.query("SELECT * FROM comments")
83
+ assert len(results) == 3
84
+
85
+ def it_returns_dicts_with_column_names(jsonl_dir):
86
+ """Each result row is a dict keyed by column name."""
87
+ db = DirSQL(
88
+ jsonl_dir,
89
+ tables=[
90
+ Table(
91
+ ddl="CREATE TABLE comments (id TEXT, body TEXT, author TEXT)",
92
+ glob="comments/**/index.jsonl",
93
+ extract=lambda path, content: [
94
+ {
95
+ "id": os.path.basename(os.path.dirname(path)),
96
+ "body": row["body"],
97
+ "author": row["author"],
98
+ }
99
+ for line in content.splitlines()
100
+ for row in [json.loads(line)]
101
+ ],
102
+ ),
103
+ ],
104
+ )
105
+ results = db.query(
106
+ "SELECT author FROM comments WHERE body = 'first comment'"
107
+ )
108
+ assert len(results) == 1
109
+ assert results[0]["author"] == "alice"
110
+
111
+ def it_filters_with_where_clause(jsonl_dir):
112
+ """SQL WHERE clauses work correctly on indexed data."""
113
+ db = DirSQL(
114
+ jsonl_dir,
115
+ tables=[
116
+ Table(
117
+ ddl="CREATE TABLE comments (id TEXT, body TEXT, author TEXT)",
118
+ glob="comments/**/index.jsonl",
119
+ extract=lambda path, content: [
120
+ {
121
+ "id": os.path.basename(os.path.dirname(path)),
122
+ "body": row["body"],
123
+ "author": row["author"],
124
+ }
125
+ for line in content.splitlines()
126
+ for row in [json.loads(line)]
127
+ ],
128
+ ),
129
+ ],
130
+ )
131
+ results = db.query("SELECT * FROM comments WHERE id = 'abc'")
132
+ assert len(results) == 2
133
+ assert all(r["id"] == "abc" for r in results)
134
+
135
+ def it_excludes_internal_tracking_columns(jsonl_dir):
136
+ """Internal _dirsql_* columns are not exposed in query results."""
137
+ db = DirSQL(
138
+ jsonl_dir,
139
+ tables=[
140
+ Table(
141
+ ddl="CREATE TABLE comments (id TEXT, body TEXT, author TEXT)",
142
+ glob="comments/**/index.jsonl",
143
+ extract=lambda path, content: [
144
+ {
145
+ "id": os.path.basename(os.path.dirname(path)),
146
+ "body": row["body"],
147
+ "author": row["author"],
148
+ }
149
+ for line in content.splitlines()
150
+ for row in [json.loads(line)]
151
+ ],
152
+ ),
153
+ ],
154
+ )
155
+ results = db.query("SELECT * FROM comments LIMIT 1")
156
+ assert len(results) == 1
157
+ row = results[0]
158
+ assert "_dirsql_file_path" not in row
159
+ assert "_dirsql_row_index" not in row
160
+
161
+ def it_handles_integer_values(tmp_dir):
162
+ """Integer values in extracted data are preserved correctly."""
163
+ os.makedirs(os.path.join(tmp_dir, "data"), exist_ok=True)
164
+ with open(os.path.join(tmp_dir, "data", "counts.json"), "w") as f:
165
+ json.dump({"name": "apples", "count": 42}, f)
166
+
167
+ db = DirSQL(
168
+ tmp_dir,
169
+ tables=[
170
+ Table(
171
+ ddl="CREATE TABLE items (name TEXT, count INTEGER)",
172
+ glob="data/*.json",
173
+ extract=lambda path, content: [json.loads(content)],
174
+ ),
175
+ ],
176
+ )
177
+ results = db.query("SELECT * FROM items")
178
+ assert len(results) == 1
179
+ assert results[0]["name"] == "apples"
180
+ assert results[0]["count"] == 42
181
+
182
+ def describe_multiple_tables():
183
+ def it_supports_multiple_table_definitions(tmp_dir):
184
+ """Multiple tables can be defined with different globs and extractors."""
185
+ os.makedirs(os.path.join(tmp_dir, "posts"), exist_ok=True)
186
+ os.makedirs(os.path.join(tmp_dir, "authors"), exist_ok=True)
187
+
188
+ with open(os.path.join(tmp_dir, "posts", "hello.json"), "w") as f:
189
+ json.dump({"title": "Hello World", "author_id": "1"}, f)
190
+
191
+ with open(os.path.join(tmp_dir, "authors", "alice.json"), "w") as f:
192
+ json.dump({"id": "1", "name": "Alice"}, f)
193
+
194
+ db = DirSQL(
195
+ tmp_dir,
196
+ tables=[
197
+ Table(
198
+ ddl="CREATE TABLE posts (title TEXT, author_id TEXT)",
199
+ glob="posts/*.json",
200
+ extract=lambda path, content: [json.loads(content)],
201
+ ),
202
+ Table(
203
+ ddl="CREATE TABLE authors (id TEXT, name TEXT)",
204
+ glob="authors/*.json",
205
+ extract=lambda path, content: [json.loads(content)],
206
+ ),
207
+ ],
208
+ )
209
+ posts = db.query("SELECT * FROM posts")
210
+ authors = db.query("SELECT * FROM authors")
211
+ assert len(posts) == 1
212
+ assert len(authors) == 1
213
+ assert posts[0]["title"] == "Hello World"
214
+ assert authors[0]["name"] == "Alice"
215
+
216
+ def it_supports_joins_across_tables(tmp_dir):
217
+ """SQL JOINs work across different tables."""
218
+ os.makedirs(os.path.join(tmp_dir, "posts"), exist_ok=True)
219
+ os.makedirs(os.path.join(tmp_dir, "authors"), exist_ok=True)
220
+
221
+ with open(os.path.join(tmp_dir, "posts", "hello.json"), "w") as f:
222
+ json.dump({"title": "Hello World", "author_id": "1"}, f)
223
+
224
+ with open(os.path.join(tmp_dir, "authors", "alice.json"), "w") as f:
225
+ json.dump({"id": "1", "name": "Alice"}, f)
226
+
227
+ db = DirSQL(
228
+ tmp_dir,
229
+ tables=[
230
+ Table(
231
+ ddl="CREATE TABLE posts (title TEXT, author_id TEXT)",
232
+ glob="posts/*.json",
233
+ extract=lambda path, content: [json.loads(content)],
234
+ ),
235
+ Table(
236
+ ddl="CREATE TABLE authors (id TEXT, name TEXT)",
237
+ glob="authors/*.json",
238
+ extract=lambda path, content: [json.loads(content)],
239
+ ),
240
+ ],
241
+ )
242
+ results = db.query(
243
+ "SELECT posts.title, authors.name "
244
+ "FROM posts JOIN authors ON posts.author_id = authors.id"
245
+ )
246
+ assert len(results) == 1
247
+ assert results[0]["title"] == "Hello World"
248
+ assert results[0]["name"] == "Alice"
249
+
250
+ def describe_error_handling():
251
+ def it_raises_on_invalid_sql(jsonl_dir):
252
+ """Invalid SQL raises an exception."""
253
+ db = DirSQL(
254
+ jsonl_dir,
255
+ tables=[
256
+ Table(
257
+ ddl="CREATE TABLE comments (id TEXT, body TEXT, author TEXT)",
258
+ glob="comments/**/index.jsonl",
259
+ extract=lambda path, content: [
260
+ {
261
+ "id": os.path.basename(os.path.dirname(path)),
262
+ "body": row["body"],
263
+ "author": row["author"],
264
+ }
265
+ for line in content.splitlines()
266
+ for row in [json.loads(line)]
267
+ ],
268
+ ),
269
+ ],
270
+ )
271
+ with pytest.raises(Exception):
272
+ db.query("NOT VALID SQL")
273
+
274
+ def it_raises_on_invalid_ddl(tmp_dir):
275
+ """Invalid DDL raises an exception during init."""
276
+ with pytest.raises(Exception):
277
+ DirSQL(
278
+ tmp_dir,
279
+ tables=[
280
+ Table(
281
+ ddl="NOT A CREATE TABLE",
282
+ glob="*.json",
283
+ extract=lambda path, content: [],
284
+ ),
285
+ ],
286
+ )
287
+
288
+ def it_handles_empty_directory(tmp_dir):
289
+ """An empty directory produces zero rows."""
290
+ db = DirSQL(
291
+ tmp_dir,
292
+ tables=[
293
+ Table(
294
+ ddl="CREATE TABLE items (name TEXT)",
295
+ glob="**/*.json",
296
+ extract=lambda path, content: [json.loads(content)],
297
+ ),
298
+ ],
299
+ )
300
+ results = db.query("SELECT * FROM items")
301
+ assert len(results) == 0
302
+
303
+ def it_handles_extract_returning_empty_list(tmp_dir):
304
+ """Extract function returning [] produces no rows for that file."""
305
+ with open(os.path.join(tmp_dir, "skip.json"), "w") as f:
306
+ json.dump({"ignore": True}, f)
307
+
308
+ db = DirSQL(
309
+ tmp_dir,
310
+ tables=[
311
+ Table(
312
+ ddl="CREATE TABLE items (name TEXT)",
313
+ glob="**/*.json",
314
+ extract=lambda path, content: [],
315
+ ),
316
+ ],
317
+ )
318
+ results = db.query("SELECT * FROM items")
319
+ assert len(results) == 0
320
+
321
+ def describe_extract_receives_path_and_content():
322
+ def it_passes_relative_path_and_string_content(tmp_dir):
323
+ """Extract receives the file path (relative to root) and content as string."""
324
+ with open(os.path.join(tmp_dir, "test.json"), "w") as f:
325
+ json.dump({"val": 1}, f)
326
+
327
+ captured = {}
328
+
329
+ def extract(path, content):
330
+ captured["path"] = path
331
+ captured["content"] = content
332
+ return [{"val": 1}]
333
+
334
+ db = DirSQL(
335
+ tmp_dir,
336
+ tables=[
337
+ Table(
338
+ ddl="CREATE TABLE t (val INTEGER)",
339
+ glob="*.json",
340
+ extract=extract,
341
+ ),
342
+ ],
343
+ )
344
+ db.query("SELECT * FROM t")
345
+ assert captured["path"] == "test.json"
346
+ assert '"val"' in captured["content"]
dirsql-0.0.11/index.js DELETED
@@ -1,5 +0,0 @@
1
- "use strict";
2
-
3
- module.exports = {
4
- message: "dirsql TypeScript SDK is not yet implemented. See https://github.com/thekevinscott/dirsql for status.",
5
- };
dirsql-0.0.11/justfile DELETED
@@ -1,53 +0,0 @@
1
- # Run all lints
2
- lint:
3
- ruff check .
4
-
5
- # Check formatting
6
- format-check:
7
- ruff format --check .
8
-
9
- # Auto-format
10
- format:
11
- ruff format .
12
-
13
- # Fix lint issues
14
- fix:
15
- ruff check --fix .
16
- ruff format .
17
-
18
- # Run Python unit tests (colocated)
19
- test-unit:
20
- pytest python/ -x -q
21
-
22
- # Run integration tests
23
- test-integration:
24
- pytest tests/integration/ -x -q
25
-
26
- # Run e2e tests (local only, not CI)
27
- test-e2e:
28
- pytest tests/e2e/ -x -q
29
-
30
- # CI test target (unit + integration, no e2e)
31
- test-ci:
32
- pytest python/ tests/integration/ -x -q --tb=short 2>/dev/null || echo "No tests found yet"
33
-
34
- # Run Rust tests
35
- test-rust:
36
- cargo test
37
-
38
- # Run Rust clippy
39
- clippy:
40
- cargo clippy -- -D warnings
41
-
42
- # Run Rust format check
43
- fmt-check:
44
- cargo fmt -- --check
45
-
46
- # Full local CI
47
- ci:
48
- just lint
49
- just format-check
50
- just clippy
51
- just fmt-check
52
- just test-rust
53
- just test-ci
@@ -1,8 +0,0 @@
1
- {
2
- "name": "dirsql",
3
- "version": "0.0.1",
4
- "description": "Ephemeral SQL index over a local directory",
5
- "license": "MIT",
6
- "repository": "https://github.com/thekevinscott/dirsql",
7
- "main": "index.js"
8
- }
dirsql-0.0.11/src/lib.rs DELETED
@@ -1,15 +0,0 @@
1
- pub mod db;
2
- pub mod differ;
3
- pub mod matcher;
4
- pub mod scanner;
5
- pub mod watcher;
6
-
7
- #[cfg(feature = "extension-module")]
8
- use pyo3::prelude::*;
9
-
10
- #[cfg(feature = "extension-module")]
11
- #[pymodule]
12
- fn dirsql(m: &Bound<'_, PyModule>) -> PyResult<()> {
13
- m.add("__version__", env!("CARGO_PKG_VERSION"))?;
14
- Ok(())
15
- }
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes