dirsql 0.0.11__tar.gz → 0.0.13__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dirsql-0.0.11 → dirsql-0.0.13}/.github/workflows/python-test.yml +6 -0
- dirsql-0.0.13/.npmignore +23 -0
- {dirsql-0.0.11 → dirsql-0.0.13}/PKG-INFO +1 -1
- {dirsql-0.0.11 → dirsql-0.0.13}/pyproject.toml +12 -1
- dirsql-0.0.13/src/lib.rs +291 -0
- dirsql-0.0.13/tests/__init__.py +0 -0
- dirsql-0.0.13/tests/conftest.py +38 -0
- dirsql-0.0.13/tests/integration/__init__.py +0 -0
- dirsql-0.0.13/tests/integration/test_dirsql.py +346 -0
- dirsql-0.0.11/index.js +0 -5
- dirsql-0.0.11/justfile +0 -53
- dirsql-0.0.11/package.json +0 -8
- dirsql-0.0.11/src/lib.rs +0 -15
- {dirsql-0.0.11 → dirsql-0.0.13}/.claude/CLAUDE.md +0 -0
- {dirsql-0.0.11 → dirsql-0.0.13}/.github/workflows/minor-release.yml +0 -0
- {dirsql-0.0.11 → dirsql-0.0.13}/.github/workflows/patch-release.yml +0 -0
- {dirsql-0.0.11 → dirsql-0.0.13}/.github/workflows/pr-monitor.yml +0 -0
- {dirsql-0.0.11 → dirsql-0.0.13}/.github/workflows/publish.yml +0 -0
- {dirsql-0.0.11 → dirsql-0.0.13}/.github/workflows/python-lint.yml +0 -0
- {dirsql-0.0.11 → dirsql-0.0.13}/.github/workflows/rust-test.yml +0 -0
- {dirsql-0.0.11 → dirsql-0.0.13}/.gitignore +0 -0
- {dirsql-0.0.11 → dirsql-0.0.13}/Cargo.lock +0 -0
- {dirsql-0.0.11 → dirsql-0.0.13}/Cargo.toml +0 -0
- {dirsql-0.0.11 → dirsql-0.0.13}/LICENSE +0 -0
- {dirsql-0.0.11 → dirsql-0.0.13}/SUMMARY.md +0 -0
- {dirsql-0.0.11 → dirsql-0.0.13}/src/db.rs +0 -0
- {dirsql-0.0.11 → dirsql-0.0.13}/src/differ.rs +0 -0
- {dirsql-0.0.11 → dirsql-0.0.13}/src/matcher.rs +0 -0
- {dirsql-0.0.11 → dirsql-0.0.13}/src/scanner.rs +0 -0
- {dirsql-0.0.11 → dirsql-0.0.13}/src/watcher.rs +0 -0
|
@@ -5,13 +5,19 @@ on:
|
|
|
5
5
|
branches: [main]
|
|
6
6
|
paths:
|
|
7
7
|
- '**.py'
|
|
8
|
+
- '**.rs'
|
|
8
9
|
- 'pyproject.toml'
|
|
10
|
+
- 'Cargo.toml'
|
|
11
|
+
- 'Cargo.lock'
|
|
9
12
|
- 'uv.lock'
|
|
10
13
|
- 'tests/**'
|
|
11
14
|
pull_request:
|
|
12
15
|
paths:
|
|
13
16
|
- '**.py'
|
|
17
|
+
- '**.rs'
|
|
14
18
|
- 'pyproject.toml'
|
|
19
|
+
- 'Cargo.toml'
|
|
20
|
+
- 'Cargo.lock'
|
|
15
21
|
- 'uv.lock'
|
|
16
22
|
- 'tests/**'
|
|
17
23
|
|
dirsql-0.0.13/.npmignore
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# Source and build artifacts
|
|
2
|
+
src/
|
|
3
|
+
target/
|
|
4
|
+
Cargo.*
|
|
5
|
+
*.rs
|
|
6
|
+
*.toml
|
|
7
|
+
*.lock
|
|
8
|
+
|
|
9
|
+
# Dev/CI infrastructure
|
|
10
|
+
.claude/
|
|
11
|
+
.github/
|
|
12
|
+
tests/
|
|
13
|
+
python/
|
|
14
|
+
.beads
|
|
15
|
+
.dolt/
|
|
16
|
+
|
|
17
|
+
# Docs and notes
|
|
18
|
+
AGENTS.md
|
|
19
|
+
SUMMARY.md
|
|
20
|
+
notes/
|
|
21
|
+
|
|
22
|
+
# Build tools
|
|
23
|
+
justfile
|
|
@@ -4,7 +4,7 @@ build-backend = "maturin"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "dirsql"
|
|
7
|
-
version = "0.0.
|
|
7
|
+
version = "0.0.13"
|
|
8
8
|
description = "Ephemeral SQL index over a local directory"
|
|
9
9
|
license = "MIT"
|
|
10
10
|
requires-python = ">=3.12"
|
|
@@ -25,3 +25,14 @@ dev = [
|
|
|
25
25
|
|
|
26
26
|
[tool.maturin]
|
|
27
27
|
features = ["pyo3/extension-module"]
|
|
28
|
+
exclude = [
|
|
29
|
+
".github/",
|
|
30
|
+
".claude/",
|
|
31
|
+
"tests/",
|
|
32
|
+
"notes/",
|
|
33
|
+
"index.js",
|
|
34
|
+
"package.json",
|
|
35
|
+
".beads",
|
|
36
|
+
"AGENTS.md",
|
|
37
|
+
"justfile",
|
|
38
|
+
]
|
dirsql-0.0.13/src/lib.rs
ADDED
|
@@ -0,0 +1,291 @@
|
|
|
1
|
+
pub mod db;
|
|
2
|
+
pub mod differ;
|
|
3
|
+
pub mod matcher;
|
|
4
|
+
pub mod scanner;
|
|
5
|
+
pub mod watcher;
|
|
6
|
+
|
|
7
|
+
/// Extract the table name from a CREATE TABLE DDL statement.
|
|
8
|
+
/// Handles: CREATE TABLE name (...), CREATE TABLE IF NOT EXISTS name (...)
|
|
9
|
+
pub fn parse_table_name(ddl: &str) -> Option<String> {
|
|
10
|
+
let upper = ddl.to_uppercase();
|
|
11
|
+
let idx = upper.find("CREATE TABLE")?;
|
|
12
|
+
let rest = &ddl[idx + "CREATE TABLE".len()..].trim_start();
|
|
13
|
+
|
|
14
|
+
// Skip optional "IF NOT EXISTS"
|
|
15
|
+
let rest = if rest.to_uppercase().starts_with("IF NOT EXISTS") {
|
|
16
|
+
rest["IF NOT EXISTS".len()..].trim_start()
|
|
17
|
+
} else {
|
|
18
|
+
rest
|
|
19
|
+
};
|
|
20
|
+
|
|
21
|
+
// Table name is everything up to the first whitespace or '('
|
|
22
|
+
let name: String = rest
|
|
23
|
+
.chars()
|
|
24
|
+
.take_while(|c| !c.is_whitespace() && *c != '(')
|
|
25
|
+
.collect();
|
|
26
|
+
|
|
27
|
+
if name.is_empty() { None } else { Some(name) }
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
#[cfg(feature = "extension-module")]
|
|
31
|
+
mod python {
|
|
32
|
+
use crate::db::{Db, Value};
|
|
33
|
+
use crate::matcher::TableMatcher;
|
|
34
|
+
use crate::parse_table_name;
|
|
35
|
+
use crate::scanner::scan_directory;
|
|
36
|
+
use pyo3::exceptions::PyRuntimeError;
|
|
37
|
+
use pyo3::prelude::*;
|
|
38
|
+
use pyo3::types::{PyDict, PyList};
|
|
39
|
+
use std::collections::HashMap;
|
|
40
|
+
use std::path::Path;
|
|
41
|
+
use std::sync::Mutex;
|
|
42
|
+
|
|
43
|
+
/// A table definition for DirSQL.
|
|
44
|
+
#[pyclass(name = "Table", frozen)]
|
|
45
|
+
struct PyTable {
|
|
46
|
+
#[pyo3(get)]
|
|
47
|
+
ddl: String,
|
|
48
|
+
#[pyo3(get)]
|
|
49
|
+
glob: String,
|
|
50
|
+
extract: Py<PyAny>,
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
#[pymethods]
|
|
54
|
+
impl PyTable {
|
|
55
|
+
#[new]
|
|
56
|
+
#[pyo3(signature = (*, ddl, glob, extract))]
|
|
57
|
+
fn new(ddl: String, glob: String, extract: Py<PyAny>) -> Self {
|
|
58
|
+
PyTable { ddl, glob, extract }
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
/// The main DirSQL class. Creates an in-memory SQLite index over a directory.
|
|
63
|
+
#[pyclass(name = "DirSQL")]
|
|
64
|
+
struct PyDirSQL {
|
|
65
|
+
db: Mutex<Db>,
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
#[pymethods]
|
|
69
|
+
impl PyDirSQL {
|
|
70
|
+
#[new]
|
|
71
|
+
#[pyo3(signature = (root, *, tables, ignore=None))]
|
|
72
|
+
fn new(
|
|
73
|
+
py: Python<'_>,
|
|
74
|
+
root: String,
|
|
75
|
+
tables: Vec<PyRef<'_, PyTable>>,
|
|
76
|
+
ignore: Option<Vec<String>>,
|
|
77
|
+
) -> PyResult<Self> {
|
|
78
|
+
let db =
|
|
79
|
+
Db::new().map_err(|e| PyRuntimeError::new_err(format!("DB init error: {}", e)))?;
|
|
80
|
+
|
|
81
|
+
// Parse table names from DDLs and create tables
|
|
82
|
+
let mut table_configs: Vec<(String, String, Py<PyAny>)> = Vec::new();
|
|
83
|
+
for t in &tables {
|
|
84
|
+
let table_name = parse_table_name(&t.ddl).ok_or_else(|| {
|
|
85
|
+
PyRuntimeError::new_err(format!(
|
|
86
|
+
"Could not parse table name from DDL: {}",
|
|
87
|
+
t.ddl
|
|
88
|
+
))
|
|
89
|
+
})?;
|
|
90
|
+
db.create_table(&t.ddl)
|
|
91
|
+
.map_err(|e| PyRuntimeError::new_err(format!("DDL error: {}", e)))?;
|
|
92
|
+
table_configs.push((table_name, t.glob.clone(), t.extract.clone_ref(py)));
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
// Build glob -> table_name mappings for the scanner
|
|
96
|
+
let mappings: Vec<(&str, &str)> = table_configs
|
|
97
|
+
.iter()
|
|
98
|
+
.map(|(name, glob, _extract): &(String, String, Py<PyAny>)| {
|
|
99
|
+
(glob.as_str(), name.as_str())
|
|
100
|
+
})
|
|
101
|
+
.collect();
|
|
102
|
+
let ignore_patterns: Vec<&str> = ignore
|
|
103
|
+
.as_ref()
|
|
104
|
+
.map(|v| v.iter().map(|s| s.as_str()).collect())
|
|
105
|
+
.unwrap_or_default();
|
|
106
|
+
|
|
107
|
+
let matcher = TableMatcher::new(&mappings, &ignore_patterns)
|
|
108
|
+
.map_err(|e| PyRuntimeError::new_err(format!("Glob error: {}", e)))?;
|
|
109
|
+
|
|
110
|
+
// Scan directory
|
|
111
|
+
let root_path = Path::new(&root);
|
|
112
|
+
let files = scan_directory(root_path, &matcher);
|
|
113
|
+
|
|
114
|
+
// Build a lookup from table_name -> extract callable
|
|
115
|
+
let extract_map: HashMap<String, Py<PyAny>> = table_configs
|
|
116
|
+
.iter()
|
|
117
|
+
.map(|(name, _glob, extract): &(String, String, Py<PyAny>)| {
|
|
118
|
+
(name.clone(), extract.clone_ref(py))
|
|
119
|
+
})
|
|
120
|
+
.collect();
|
|
121
|
+
|
|
122
|
+
// Process each file
|
|
123
|
+
for (file_path, table_name) in &files {
|
|
124
|
+
// Read file content
|
|
125
|
+
let content = std::fs::read_to_string(file_path).map_err(|e| {
|
|
126
|
+
PyRuntimeError::new_err(format!(
|
|
127
|
+
"Failed to read {}: {}",
|
|
128
|
+
file_path.display(),
|
|
129
|
+
e
|
|
130
|
+
))
|
|
131
|
+
})?;
|
|
132
|
+
|
|
133
|
+
// Compute relative path
|
|
134
|
+
let rel_path = file_path
|
|
135
|
+
.strip_prefix(root_path)
|
|
136
|
+
.unwrap_or(file_path)
|
|
137
|
+
.to_string_lossy()
|
|
138
|
+
.to_string();
|
|
139
|
+
|
|
140
|
+
// Call extract
|
|
141
|
+
let extract_fn = extract_map.get(table_name).ok_or_else(|| {
|
|
142
|
+
PyRuntimeError::new_err(format!("No extract function for table {}", table_name))
|
|
143
|
+
})?;
|
|
144
|
+
|
|
145
|
+
let result = extract_fn.call1(py, (rel_path.clone(), content))?;
|
|
146
|
+
let rows: Vec<HashMap<String, Py<PyAny>>> = result.extract(py)?;
|
|
147
|
+
|
|
148
|
+
// Insert rows
|
|
149
|
+
for (row_index, py_row) in rows.iter().enumerate() {
|
|
150
|
+
let row = convert_py_row(py, py_row)?;
|
|
151
|
+
db.insert_row(table_name, &row, &rel_path, row_index)
|
|
152
|
+
.map_err(|e| PyRuntimeError::new_err(format!("Insert error: {}", e)))?;
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
Ok(PyDirSQL { db: Mutex::new(db) })
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
/// Execute a SQL query and return results as a list of dicts.
|
|
160
|
+
fn query(&self, py: Python<'_>, sql: &str) -> PyResult<Py<PyList>> {
|
|
161
|
+
let db = self
|
|
162
|
+
.db
|
|
163
|
+
.lock()
|
|
164
|
+
.map_err(|e| PyRuntimeError::new_err(format!("Lock error: {}", e)))?;
|
|
165
|
+
let rows = db
|
|
166
|
+
.query(sql)
|
|
167
|
+
.map_err(|e| PyRuntimeError::new_err(format!("Query error: {}", e)))?;
|
|
168
|
+
|
|
169
|
+
let result = PyList::empty(py);
|
|
170
|
+
for row in &rows {
|
|
171
|
+
let dict = PyDict::new(py);
|
|
172
|
+
for (key, value) in row {
|
|
173
|
+
let py_val = value_to_py(py, value);
|
|
174
|
+
dict.set_item(key, py_val)?;
|
|
175
|
+
}
|
|
176
|
+
result.append(dict)?;
|
|
177
|
+
}
|
|
178
|
+
Ok(result.unbind())
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
/// Convert a Python dict row to a Rust HashMap<String, Value>.
|
|
183
|
+
fn convert_py_row(
|
|
184
|
+
py: Python<'_>,
|
|
185
|
+
py_row: &HashMap<String, Py<PyAny>>,
|
|
186
|
+
) -> PyResult<HashMap<String, Value>> {
|
|
187
|
+
let mut row: HashMap<String, Value> = HashMap::new();
|
|
188
|
+
for (key, val) in py_row {
|
|
189
|
+
let value = py_to_value(py, val)?;
|
|
190
|
+
row.insert(key.clone(), value);
|
|
191
|
+
}
|
|
192
|
+
Ok(row)
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
/// Convert a Python object to a db::Value.
|
|
196
|
+
fn py_to_value(py: Python<'_>, obj: &Py<PyAny>) -> PyResult<Value> {
|
|
197
|
+
let bound = obj.bind(py);
|
|
198
|
+
|
|
199
|
+
if bound.is_none() {
|
|
200
|
+
return Ok(Value::Null);
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
// Try bool first (before int, since bool is subclass of int in Python)
|
|
204
|
+
if bound.is_instance_of::<pyo3::types::PyBool>() {
|
|
205
|
+
let b: bool = bound.extract()?;
|
|
206
|
+
return Ok(Value::Integer(if b { 1 } else { 0 }));
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
// Try integer
|
|
210
|
+
if let Ok(i) = bound.extract::<i64>() {
|
|
211
|
+
return Ok(Value::Integer(i));
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
// Try float
|
|
215
|
+
if let Ok(f) = bound.extract::<f64>() {
|
|
216
|
+
return Ok(Value::Real(f));
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
// Try string
|
|
220
|
+
if let Ok(s) = bound.extract::<String>() {
|
|
221
|
+
return Ok(Value::Text(s));
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
// Try bytes
|
|
225
|
+
if let Ok(b) = bound.extract::<Vec<u8>>() {
|
|
226
|
+
return Ok(Value::Blob(b));
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
// Fall back to string representation
|
|
230
|
+
let s = bound.str()?.to_string();
|
|
231
|
+
Ok(Value::Text(s))
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
/// Convert a db::Value to a Python object.
|
|
235
|
+
fn value_to_py(py: Python<'_>, value: &Value) -> Py<PyAny> {
|
|
236
|
+
match value {
|
|
237
|
+
Value::Null => py.None(),
|
|
238
|
+
Value::Integer(i) => i.into_pyobject(py).unwrap().into_any().unbind(),
|
|
239
|
+
Value::Real(f) => f.into_pyobject(py).unwrap().into_any().unbind(),
|
|
240
|
+
Value::Text(s) => s.into_pyobject(py).unwrap().into_any().unbind(),
|
|
241
|
+
Value::Blob(b) => b.into_pyobject(py).unwrap().unbind(),
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
#[pymodule]
|
|
246
|
+
fn dirsql(m: &Bound<'_, PyModule>) -> PyResult<()> {
|
|
247
|
+
m.add("__version__", env!("CARGO_PKG_VERSION"))?;
|
|
248
|
+
m.add_class::<PyTable>()?;
|
|
249
|
+
m.add_class::<PyDirSQL>()?;
|
|
250
|
+
Ok(())
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
#[cfg(test)]
|
|
255
|
+
mod tests {
|
|
256
|
+
use super::*;
|
|
257
|
+
|
|
258
|
+
#[test]
|
|
259
|
+
fn parse_table_name_simple() {
|
|
260
|
+
assert_eq!(
|
|
261
|
+
parse_table_name("CREATE TABLE comments (id TEXT)"),
|
|
262
|
+
Some("comments".to_string())
|
|
263
|
+
);
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
#[test]
|
|
267
|
+
fn parse_table_name_if_not_exists() {
|
|
268
|
+
assert_eq!(
|
|
269
|
+
parse_table_name("CREATE TABLE IF NOT EXISTS comments (id TEXT)"),
|
|
270
|
+
Some("comments".to_string())
|
|
271
|
+
);
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
#[test]
|
|
275
|
+
fn parse_table_name_no_space_before_paren() {
|
|
276
|
+
assert_eq!(
|
|
277
|
+
parse_table_name("CREATE TABLE t(id TEXT)"),
|
|
278
|
+
Some("t".to_string())
|
|
279
|
+
);
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
#[test]
|
|
283
|
+
fn parse_table_name_invalid() {
|
|
284
|
+
assert_eq!(parse_table_name("NOT A DDL"), None);
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
#[test]
|
|
288
|
+
fn parse_table_name_empty_after_create_table() {
|
|
289
|
+
assert_eq!(parse_table_name("CREATE TABLE "), None);
|
|
290
|
+
}
|
|
291
|
+
}
|
|
File without changes
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
import tempfile
|
|
4
|
+
|
|
5
|
+
import pytest
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@pytest.fixture
|
|
9
|
+
def tmp_dir():
|
|
10
|
+
"""Create a temporary directory for test files."""
|
|
11
|
+
with tempfile.TemporaryDirectory() as d:
|
|
12
|
+
yield d
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@pytest.fixture
|
|
16
|
+
def jsonl_dir(tmp_dir):
|
|
17
|
+
"""Create a temp dir with JSONL files for testing."""
|
|
18
|
+
# Create a simple JSONL file
|
|
19
|
+
os.makedirs(os.path.join(tmp_dir, "comments", "abc"), exist_ok=True)
|
|
20
|
+
os.makedirs(os.path.join(tmp_dir, "comments", "def"), exist_ok=True)
|
|
21
|
+
|
|
22
|
+
with open(os.path.join(tmp_dir, "comments", "abc", "index.jsonl"), "w") as f:
|
|
23
|
+
f.write(json.dumps({"body": "first comment", "author": "alice"}) + "\n")
|
|
24
|
+
f.write(json.dumps({"body": "second comment", "author": "bob"}) + "\n")
|
|
25
|
+
|
|
26
|
+
with open(os.path.join(tmp_dir, "comments", "def", "index.jsonl"), "w") as f:
|
|
27
|
+
f.write(json.dumps({"body": "another comment", "author": "carol"}) + "\n")
|
|
28
|
+
|
|
29
|
+
return tmp_dir
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@pytest.fixture
|
|
33
|
+
def csv_dir(tmp_dir):
|
|
34
|
+
"""Create a temp dir with a CSV-like file for testing single-row extraction."""
|
|
35
|
+
with open(os.path.join(tmp_dir, "metadata.json"), "w") as f:
|
|
36
|
+
json.dump({"title": "My Project", "version": "1.0"}, f)
|
|
37
|
+
|
|
38
|
+
return tmp_dir
|
|
File without changes
|
|
@@ -0,0 +1,346 @@
|
|
|
1
|
+
"""Integration tests for the DirSQL Python SDK."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
|
|
6
|
+
import pytest
|
|
7
|
+
|
|
8
|
+
from dirsql import DirSQL, Table
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def describe_DirSQL():
|
|
12
|
+
def describe_init():
|
|
13
|
+
def it_creates_instance_with_tables(jsonl_dir):
|
|
14
|
+
"""DirSQL can be initialized with a root path and table definitions."""
|
|
15
|
+
db = DirSQL(
|
|
16
|
+
jsonl_dir,
|
|
17
|
+
tables=[
|
|
18
|
+
Table(
|
|
19
|
+
ddl="CREATE TABLE comments (id TEXT, body TEXT, author TEXT)",
|
|
20
|
+
glob="comments/**/index.jsonl",
|
|
21
|
+
extract=lambda path, content: [
|
|
22
|
+
{
|
|
23
|
+
"id": os.path.basename(os.path.dirname(path)),
|
|
24
|
+
"body": row["body"],
|
|
25
|
+
"author": row["author"],
|
|
26
|
+
}
|
|
27
|
+
for line in content.splitlines()
|
|
28
|
+
for row in [json.loads(line)]
|
|
29
|
+
],
|
|
30
|
+
),
|
|
31
|
+
],
|
|
32
|
+
)
|
|
33
|
+
assert db is not None
|
|
34
|
+
|
|
35
|
+
def it_accepts_ignore_patterns(jsonl_dir):
|
|
36
|
+
"""DirSQL accepts an ignore list to skip matching paths."""
|
|
37
|
+
db = DirSQL(
|
|
38
|
+
jsonl_dir,
|
|
39
|
+
ignore=["**/def/**"],
|
|
40
|
+
tables=[
|
|
41
|
+
Table(
|
|
42
|
+
ddl="CREATE TABLE comments (id TEXT, body TEXT, author TEXT)",
|
|
43
|
+
glob="comments/**/index.jsonl",
|
|
44
|
+
extract=lambda path, content: [
|
|
45
|
+
{
|
|
46
|
+
"id": os.path.basename(os.path.dirname(path)),
|
|
47
|
+
"body": row["body"],
|
|
48
|
+
"author": row["author"],
|
|
49
|
+
}
|
|
50
|
+
for line in content.splitlines()
|
|
51
|
+
for row in [json.loads(line)]
|
|
52
|
+
],
|
|
53
|
+
),
|
|
54
|
+
],
|
|
55
|
+
)
|
|
56
|
+
# Only the "abc" directory should be indexed, not "def"
|
|
57
|
+
results = db.query("SELECT DISTINCT id FROM comments")
|
|
58
|
+
ids = {r["id"] for r in results}
|
|
59
|
+
assert ids == {"abc"}
|
|
60
|
+
|
|
61
|
+
def describe_query():
|
|
62
|
+
def it_returns_all_rows(jsonl_dir):
|
|
63
|
+
"""query returns all indexed rows when no WHERE clause."""
|
|
64
|
+
db = DirSQL(
|
|
65
|
+
jsonl_dir,
|
|
66
|
+
tables=[
|
|
67
|
+
Table(
|
|
68
|
+
ddl="CREATE TABLE comments (id TEXT, body TEXT, author TEXT)",
|
|
69
|
+
glob="comments/**/index.jsonl",
|
|
70
|
+
extract=lambda path, content: [
|
|
71
|
+
{
|
|
72
|
+
"id": os.path.basename(os.path.dirname(path)),
|
|
73
|
+
"body": row["body"],
|
|
74
|
+
"author": row["author"],
|
|
75
|
+
}
|
|
76
|
+
for line in content.splitlines()
|
|
77
|
+
for row in [json.loads(line)]
|
|
78
|
+
],
|
|
79
|
+
),
|
|
80
|
+
],
|
|
81
|
+
)
|
|
82
|
+
results = db.query("SELECT * FROM comments")
|
|
83
|
+
assert len(results) == 3
|
|
84
|
+
|
|
85
|
+
def it_returns_dicts_with_column_names(jsonl_dir):
|
|
86
|
+
"""Each result row is a dict keyed by column name."""
|
|
87
|
+
db = DirSQL(
|
|
88
|
+
jsonl_dir,
|
|
89
|
+
tables=[
|
|
90
|
+
Table(
|
|
91
|
+
ddl="CREATE TABLE comments (id TEXT, body TEXT, author TEXT)",
|
|
92
|
+
glob="comments/**/index.jsonl",
|
|
93
|
+
extract=lambda path, content: [
|
|
94
|
+
{
|
|
95
|
+
"id": os.path.basename(os.path.dirname(path)),
|
|
96
|
+
"body": row["body"],
|
|
97
|
+
"author": row["author"],
|
|
98
|
+
}
|
|
99
|
+
for line in content.splitlines()
|
|
100
|
+
for row in [json.loads(line)]
|
|
101
|
+
],
|
|
102
|
+
),
|
|
103
|
+
],
|
|
104
|
+
)
|
|
105
|
+
results = db.query(
|
|
106
|
+
"SELECT author FROM comments WHERE body = 'first comment'"
|
|
107
|
+
)
|
|
108
|
+
assert len(results) == 1
|
|
109
|
+
assert results[0]["author"] == "alice"
|
|
110
|
+
|
|
111
|
+
def it_filters_with_where_clause(jsonl_dir):
|
|
112
|
+
"""SQL WHERE clauses work correctly on indexed data."""
|
|
113
|
+
db = DirSQL(
|
|
114
|
+
jsonl_dir,
|
|
115
|
+
tables=[
|
|
116
|
+
Table(
|
|
117
|
+
ddl="CREATE TABLE comments (id TEXT, body TEXT, author TEXT)",
|
|
118
|
+
glob="comments/**/index.jsonl",
|
|
119
|
+
extract=lambda path, content: [
|
|
120
|
+
{
|
|
121
|
+
"id": os.path.basename(os.path.dirname(path)),
|
|
122
|
+
"body": row["body"],
|
|
123
|
+
"author": row["author"],
|
|
124
|
+
}
|
|
125
|
+
for line in content.splitlines()
|
|
126
|
+
for row in [json.loads(line)]
|
|
127
|
+
],
|
|
128
|
+
),
|
|
129
|
+
],
|
|
130
|
+
)
|
|
131
|
+
results = db.query("SELECT * FROM comments WHERE id = 'abc'")
|
|
132
|
+
assert len(results) == 2
|
|
133
|
+
assert all(r["id"] == "abc" for r in results)
|
|
134
|
+
|
|
135
|
+
def it_excludes_internal_tracking_columns(jsonl_dir):
|
|
136
|
+
"""Internal _dirsql_* columns are not exposed in query results."""
|
|
137
|
+
db = DirSQL(
|
|
138
|
+
jsonl_dir,
|
|
139
|
+
tables=[
|
|
140
|
+
Table(
|
|
141
|
+
ddl="CREATE TABLE comments (id TEXT, body TEXT, author TEXT)",
|
|
142
|
+
glob="comments/**/index.jsonl",
|
|
143
|
+
extract=lambda path, content: [
|
|
144
|
+
{
|
|
145
|
+
"id": os.path.basename(os.path.dirname(path)),
|
|
146
|
+
"body": row["body"],
|
|
147
|
+
"author": row["author"],
|
|
148
|
+
}
|
|
149
|
+
for line in content.splitlines()
|
|
150
|
+
for row in [json.loads(line)]
|
|
151
|
+
],
|
|
152
|
+
),
|
|
153
|
+
],
|
|
154
|
+
)
|
|
155
|
+
results = db.query("SELECT * FROM comments LIMIT 1")
|
|
156
|
+
assert len(results) == 1
|
|
157
|
+
row = results[0]
|
|
158
|
+
assert "_dirsql_file_path" not in row
|
|
159
|
+
assert "_dirsql_row_index" not in row
|
|
160
|
+
|
|
161
|
+
def it_handles_integer_values(tmp_dir):
|
|
162
|
+
"""Integer values in extracted data are preserved correctly."""
|
|
163
|
+
os.makedirs(os.path.join(tmp_dir, "data"), exist_ok=True)
|
|
164
|
+
with open(os.path.join(tmp_dir, "data", "counts.json"), "w") as f:
|
|
165
|
+
json.dump({"name": "apples", "count": 42}, f)
|
|
166
|
+
|
|
167
|
+
db = DirSQL(
|
|
168
|
+
tmp_dir,
|
|
169
|
+
tables=[
|
|
170
|
+
Table(
|
|
171
|
+
ddl="CREATE TABLE items (name TEXT, count INTEGER)",
|
|
172
|
+
glob="data/*.json",
|
|
173
|
+
extract=lambda path, content: [json.loads(content)],
|
|
174
|
+
),
|
|
175
|
+
],
|
|
176
|
+
)
|
|
177
|
+
results = db.query("SELECT * FROM items")
|
|
178
|
+
assert len(results) == 1
|
|
179
|
+
assert results[0]["name"] == "apples"
|
|
180
|
+
assert results[0]["count"] == 42
|
|
181
|
+
|
|
182
|
+
def describe_multiple_tables():
|
|
183
|
+
def it_supports_multiple_table_definitions(tmp_dir):
|
|
184
|
+
"""Multiple tables can be defined with different globs and extractors."""
|
|
185
|
+
os.makedirs(os.path.join(tmp_dir, "posts"), exist_ok=True)
|
|
186
|
+
os.makedirs(os.path.join(tmp_dir, "authors"), exist_ok=True)
|
|
187
|
+
|
|
188
|
+
with open(os.path.join(tmp_dir, "posts", "hello.json"), "w") as f:
|
|
189
|
+
json.dump({"title": "Hello World", "author_id": "1"}, f)
|
|
190
|
+
|
|
191
|
+
with open(os.path.join(tmp_dir, "authors", "alice.json"), "w") as f:
|
|
192
|
+
json.dump({"id": "1", "name": "Alice"}, f)
|
|
193
|
+
|
|
194
|
+
db = DirSQL(
|
|
195
|
+
tmp_dir,
|
|
196
|
+
tables=[
|
|
197
|
+
Table(
|
|
198
|
+
ddl="CREATE TABLE posts (title TEXT, author_id TEXT)",
|
|
199
|
+
glob="posts/*.json",
|
|
200
|
+
extract=lambda path, content: [json.loads(content)],
|
|
201
|
+
),
|
|
202
|
+
Table(
|
|
203
|
+
ddl="CREATE TABLE authors (id TEXT, name TEXT)",
|
|
204
|
+
glob="authors/*.json",
|
|
205
|
+
extract=lambda path, content: [json.loads(content)],
|
|
206
|
+
),
|
|
207
|
+
],
|
|
208
|
+
)
|
|
209
|
+
posts = db.query("SELECT * FROM posts")
|
|
210
|
+
authors = db.query("SELECT * FROM authors")
|
|
211
|
+
assert len(posts) == 1
|
|
212
|
+
assert len(authors) == 1
|
|
213
|
+
assert posts[0]["title"] == "Hello World"
|
|
214
|
+
assert authors[0]["name"] == "Alice"
|
|
215
|
+
|
|
216
|
+
def it_supports_joins_across_tables(tmp_dir):
|
|
217
|
+
"""SQL JOINs work across different tables."""
|
|
218
|
+
os.makedirs(os.path.join(tmp_dir, "posts"), exist_ok=True)
|
|
219
|
+
os.makedirs(os.path.join(tmp_dir, "authors"), exist_ok=True)
|
|
220
|
+
|
|
221
|
+
with open(os.path.join(tmp_dir, "posts", "hello.json"), "w") as f:
|
|
222
|
+
json.dump({"title": "Hello World", "author_id": "1"}, f)
|
|
223
|
+
|
|
224
|
+
with open(os.path.join(tmp_dir, "authors", "alice.json"), "w") as f:
|
|
225
|
+
json.dump({"id": "1", "name": "Alice"}, f)
|
|
226
|
+
|
|
227
|
+
db = DirSQL(
|
|
228
|
+
tmp_dir,
|
|
229
|
+
tables=[
|
|
230
|
+
Table(
|
|
231
|
+
ddl="CREATE TABLE posts (title TEXT, author_id TEXT)",
|
|
232
|
+
glob="posts/*.json",
|
|
233
|
+
extract=lambda path, content: [json.loads(content)],
|
|
234
|
+
),
|
|
235
|
+
Table(
|
|
236
|
+
ddl="CREATE TABLE authors (id TEXT, name TEXT)",
|
|
237
|
+
glob="authors/*.json",
|
|
238
|
+
extract=lambda path, content: [json.loads(content)],
|
|
239
|
+
),
|
|
240
|
+
],
|
|
241
|
+
)
|
|
242
|
+
results = db.query(
|
|
243
|
+
"SELECT posts.title, authors.name "
|
|
244
|
+
"FROM posts JOIN authors ON posts.author_id = authors.id"
|
|
245
|
+
)
|
|
246
|
+
assert len(results) == 1
|
|
247
|
+
assert results[0]["title"] == "Hello World"
|
|
248
|
+
assert results[0]["name"] == "Alice"
|
|
249
|
+
|
|
250
|
+
def describe_error_handling():
|
|
251
|
+
def it_raises_on_invalid_sql(jsonl_dir):
|
|
252
|
+
"""Invalid SQL raises an exception."""
|
|
253
|
+
db = DirSQL(
|
|
254
|
+
jsonl_dir,
|
|
255
|
+
tables=[
|
|
256
|
+
Table(
|
|
257
|
+
ddl="CREATE TABLE comments (id TEXT, body TEXT, author TEXT)",
|
|
258
|
+
glob="comments/**/index.jsonl",
|
|
259
|
+
extract=lambda path, content: [
|
|
260
|
+
{
|
|
261
|
+
"id": os.path.basename(os.path.dirname(path)),
|
|
262
|
+
"body": row["body"],
|
|
263
|
+
"author": row["author"],
|
|
264
|
+
}
|
|
265
|
+
for line in content.splitlines()
|
|
266
|
+
for row in [json.loads(line)]
|
|
267
|
+
],
|
|
268
|
+
),
|
|
269
|
+
],
|
|
270
|
+
)
|
|
271
|
+
with pytest.raises(Exception):
|
|
272
|
+
db.query("NOT VALID SQL")
|
|
273
|
+
|
|
274
|
+
def it_raises_on_invalid_ddl(tmp_dir):
|
|
275
|
+
"""Invalid DDL raises an exception during init."""
|
|
276
|
+
with pytest.raises(Exception):
|
|
277
|
+
DirSQL(
|
|
278
|
+
tmp_dir,
|
|
279
|
+
tables=[
|
|
280
|
+
Table(
|
|
281
|
+
ddl="NOT A CREATE TABLE",
|
|
282
|
+
glob="*.json",
|
|
283
|
+
extract=lambda path, content: [],
|
|
284
|
+
),
|
|
285
|
+
],
|
|
286
|
+
)
|
|
287
|
+
|
|
288
|
+
def it_handles_empty_directory(tmp_dir):
|
|
289
|
+
"""An empty directory produces zero rows."""
|
|
290
|
+
db = DirSQL(
|
|
291
|
+
tmp_dir,
|
|
292
|
+
tables=[
|
|
293
|
+
Table(
|
|
294
|
+
ddl="CREATE TABLE items (name TEXT)",
|
|
295
|
+
glob="**/*.json",
|
|
296
|
+
extract=lambda path, content: [json.loads(content)],
|
|
297
|
+
),
|
|
298
|
+
],
|
|
299
|
+
)
|
|
300
|
+
results = db.query("SELECT * FROM items")
|
|
301
|
+
assert len(results) == 0
|
|
302
|
+
|
|
303
|
+
def it_handles_extract_returning_empty_list(tmp_dir):
|
|
304
|
+
"""Extract function returning [] produces no rows for that file."""
|
|
305
|
+
with open(os.path.join(tmp_dir, "skip.json"), "w") as f:
|
|
306
|
+
json.dump({"ignore": True}, f)
|
|
307
|
+
|
|
308
|
+
db = DirSQL(
|
|
309
|
+
tmp_dir,
|
|
310
|
+
tables=[
|
|
311
|
+
Table(
|
|
312
|
+
ddl="CREATE TABLE items (name TEXT)",
|
|
313
|
+
glob="**/*.json",
|
|
314
|
+
extract=lambda path, content: [],
|
|
315
|
+
),
|
|
316
|
+
],
|
|
317
|
+
)
|
|
318
|
+
results = db.query("SELECT * FROM items")
|
|
319
|
+
assert len(results) == 0
|
|
320
|
+
|
|
321
|
+
def describe_extract_receives_path_and_content():
|
|
322
|
+
def it_passes_relative_path_and_string_content(tmp_dir):
|
|
323
|
+
"""Extract receives the file path (relative to root) and content as string."""
|
|
324
|
+
with open(os.path.join(tmp_dir, "test.json"), "w") as f:
|
|
325
|
+
json.dump({"val": 1}, f)
|
|
326
|
+
|
|
327
|
+
captured = {}
|
|
328
|
+
|
|
329
|
+
def extract(path, content):
|
|
330
|
+
captured["path"] = path
|
|
331
|
+
captured["content"] = content
|
|
332
|
+
return [{"val": 1}]
|
|
333
|
+
|
|
334
|
+
db = DirSQL(
|
|
335
|
+
tmp_dir,
|
|
336
|
+
tables=[
|
|
337
|
+
Table(
|
|
338
|
+
ddl="CREATE TABLE t (val INTEGER)",
|
|
339
|
+
glob="*.json",
|
|
340
|
+
extract=extract,
|
|
341
|
+
),
|
|
342
|
+
],
|
|
343
|
+
)
|
|
344
|
+
db.query("SELECT * FROM t")
|
|
345
|
+
assert captured["path"] == "test.json"
|
|
346
|
+
assert '"val"' in captured["content"]
|
dirsql-0.0.11/index.js
DELETED
dirsql-0.0.11/justfile
DELETED
|
@@ -1,53 +0,0 @@
|
|
|
1
|
-
# Run all lints
|
|
2
|
-
lint:
|
|
3
|
-
ruff check .
|
|
4
|
-
|
|
5
|
-
# Check formatting
|
|
6
|
-
format-check:
|
|
7
|
-
ruff format --check .
|
|
8
|
-
|
|
9
|
-
# Auto-format
|
|
10
|
-
format:
|
|
11
|
-
ruff format .
|
|
12
|
-
|
|
13
|
-
# Fix lint issues
|
|
14
|
-
fix:
|
|
15
|
-
ruff check --fix .
|
|
16
|
-
ruff format .
|
|
17
|
-
|
|
18
|
-
# Run Python unit tests (colocated)
|
|
19
|
-
test-unit:
|
|
20
|
-
pytest python/ -x -q
|
|
21
|
-
|
|
22
|
-
# Run integration tests
|
|
23
|
-
test-integration:
|
|
24
|
-
pytest tests/integration/ -x -q
|
|
25
|
-
|
|
26
|
-
# Run e2e tests (local only, not CI)
|
|
27
|
-
test-e2e:
|
|
28
|
-
pytest tests/e2e/ -x -q
|
|
29
|
-
|
|
30
|
-
# CI test target (unit + integration, no e2e)
|
|
31
|
-
test-ci:
|
|
32
|
-
pytest python/ tests/integration/ -x -q --tb=short 2>/dev/null || echo "No tests found yet"
|
|
33
|
-
|
|
34
|
-
# Run Rust tests
|
|
35
|
-
test-rust:
|
|
36
|
-
cargo test
|
|
37
|
-
|
|
38
|
-
# Run Rust clippy
|
|
39
|
-
clippy:
|
|
40
|
-
cargo clippy -- -D warnings
|
|
41
|
-
|
|
42
|
-
# Run Rust format check
|
|
43
|
-
fmt-check:
|
|
44
|
-
cargo fmt -- --check
|
|
45
|
-
|
|
46
|
-
# Full local CI
|
|
47
|
-
ci:
|
|
48
|
-
just lint
|
|
49
|
-
just format-check
|
|
50
|
-
just clippy
|
|
51
|
-
just fmt-check
|
|
52
|
-
just test-rust
|
|
53
|
-
just test-ci
|
dirsql-0.0.11/package.json
DELETED
dirsql-0.0.11/src/lib.rs
DELETED
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
pub mod db;
|
|
2
|
-
pub mod differ;
|
|
3
|
-
pub mod matcher;
|
|
4
|
-
pub mod scanner;
|
|
5
|
-
pub mod watcher;
|
|
6
|
-
|
|
7
|
-
#[cfg(feature = "extension-module")]
|
|
8
|
-
use pyo3::prelude::*;
|
|
9
|
-
|
|
10
|
-
#[cfg(feature = "extension-module")]
|
|
11
|
-
#[pymodule]
|
|
12
|
-
fn dirsql(m: &Bound<'_, PyModule>) -> PyResult<()> {
|
|
13
|
-
m.add("__version__", env!("CARGO_PKG_VERSION"))?;
|
|
14
|
-
Ok(())
|
|
15
|
-
}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|