npm - dirsql - Versions diffs - 0.0.1 → 0.0.99-test.1775729890 - Mend

dirsql 0.0.1 → 0.0.99-test.1775729890

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/package.json +5 -2
package/.claude/CLAUDE.md +0 -120
package/.github/workflows/minor-release.yml +0 -14
package/.github/workflows/patch-release.yml +0 -45
package/.github/workflows/pr-monitor.yml +0 -16
package/.github/workflows/publish.yml +0 -306
package/.github/workflows/python-lint.yml +0 -35
package/.github/workflows/python-test.yml +0 -45
package/.github/workflows/rust-test.yml +0 -41
package/Cargo.lock +0 -851
package/Cargo.toml +0 -27
package/SUMMARY.md +0 -62
package/justfile +0 -53
package/pyproject.toml +0 -27
package/src/db.rs +0 -312
package/src/differ.rs +0 -372
package/src/lib.rs +0 -15
package/src/matcher.rs +0 -116
package/src/scanner.rs +0 -100
package/src/watcher.rs +0 -227

package/Cargo.toml DELETED Viewed

@@ -1,27 +0,0 @@
-[package]
-name = "dirsql"
-version = "0.1.0"
-edition = "2024"
-description = "Ephemeral SQL index over a local directory"
-license = "MIT"
-repository = "https://github.com/thekevinscott/dirsql"
-keywords = ["sql", "filesystem", "directory", "sqlite", "index"]
-categories = ["filesystem", "database"]
-[lib]
-crate-type = ["cdylib", "rlib"]
-[dependencies]
-globset = "0.4"
-notify = "7"
-pyo3 = { version = "0.28", features = ["extension-module"], optional = true }
-rusqlite = { version = "0.34", features = ["bundled"] }
-thiserror = "2"
-walkdir = "2"
-[features]
-default = []
-extension-module = ["pyo3/extension-module"]
-[dev-dependencies]
-tempfile = "3"

package/SUMMARY.md DELETED Viewed

@@ -1,62 +0,0 @@
-# dirsql
-Ephemeral SQL index over a local directory. Watches a filesystem, ingests structured files (JSONL, JSON, markdown with frontmatter, CSV), builds an in-memory SQLite database, and exposes a SQL query interface. On shutdown, the database is discarded -- the filesystem is always the source of truth.
-## Language
-Rust.
-## What it does
-1. **Startup scan**: Walk a directory tree, parse structured files, populate SQLite tables
-2. **File watching**: Monitor for changes (inotify/fswatch via `notify` crate), update the index incrementally
-3. **Query interface**: Expose SQL queries over the indexed data (Unix socket, HTTP, or embedded library)
-4. **Event emission**: Notify subscribers when files change (websocket or event stream)
-5. **Ephemeral**: The SQLite database is in-memory or tmpfile. Discarded on shutdown. Rebuilt on next start.
-## Why
-The problem: structured data stored as flat files on disk (JSONL, JSON, markdown) is easy for agents and humans to read/write, git-friendly, and portable. But querying across many files is slow -- "show me all unresolved comments across 50 documents" requires opening and parsing every file.
-dirsql bridges this: files remain the source of truth (readable, appendable, diffable), but you get SQL queries and change events for free.
-## Motivating use case
-A writing assistant app stores documents as markdown on disk with comment threads as JSONL files in a recursive workspace structure:
-```
-my-article/
-  index.md
-  _resources/
-    source-1.md
-  _comments/
-    a1b1/
-      index.jsonl        # comment thread (append-only events)
-      _resources/
-        deep-dive.md
-      _comments/          # comments on comments
-        c3d4/
-          index.jsonl
-```
-The editor needs to:
-- Query "all unresolved comments in this workspace" without scanning every file
-- Get notified when an external agent appends to a thread or creates a new resource
-- Remain decoupled from any specific database -- dirsql is a dev dependency, not a data store
-## Analogues
-- **Steampipe**: SQL over cloud APIs
-- **Osquery**: SQL over OS state
-- **Datafusion/DuckDB**: SQL over data files (Parquet, CSV)
-dirsql is this pattern applied to a local project directory with real-time file watching.
-## Open questions (for scoping conversation)
-- Table schema inference: auto-detect from file structure, or require a config/schema file?
-- Query interface: HTTP API, Unix socket, embedded Rust library, all three?
-- Event protocol: websockets, SSE, or something simpler?
-- Scope of file format support: start with JSONL only, or JSON/CSV/markdown frontmatter from day one?
-- How to handle nested/recursive structures (the workspace pattern above)?
-- Performance targets: how large a directory tree should startup scan handle?

package/justfile DELETED Viewed

@@ -1,53 +0,0 @@
-# Run all lints
-lint:
-    ruff check .
-# Check formatting
-format-check:
-    ruff format --check .
-# Auto-format
-format:
-    ruff format .
-# Fix lint issues
-fix:
-    ruff check --fix .
-    ruff format .
-# Run Python unit tests (colocated)
-test-unit:
-    pytest python/ -x -q
-# Run integration tests
-test-integration:
-    pytest tests/integration/ -x -q
-# Run e2e tests (local only, not CI)
-test-e2e:
-    pytest tests/e2e/ -x -q
-# CI test target (unit + integration, no e2e)
-test-ci:
-    pytest python/ tests/integration/ -x -q --tb=short 2>/dev/null || echo "No tests found yet"
-# Run Rust tests
-test-rust:
-    cargo test
-# Run Rust clippy
-clippy:
-    cargo clippy -- -D warnings
-# Run Rust format check
-fmt-check:
-    cargo fmt -- --check
-# Full local CI
-ci:
-    just lint
-    just format-check
-    just clippy
-    just fmt-check
-    just test-rust
-    just test-ci

package/pyproject.toml DELETED Viewed

@@ -1,27 +0,0 @@
-[build-system]
-requires = ["maturin>=1.0,<2.0"]
-build-backend = "maturin"
-[project]
-name = "dirsql"
-version = "0.1.0"
-description = "Ephemeral SQL index over a local directory"
-license = "MIT"
-requires-python = ">=3.12"
-authors = [
-    { name = "Kevin Scott" },
-]
-repository = "https://github.com/thekevinscott/dirsql"
-keywords = ["sql", "filesystem", "directory", "sqlite", "index"]
-[project.optional-dependencies]
-dev = [
-    "pytest>=8",
-    "pytest-describe>=2",
-    "pytest-asyncio>=0.23",
-    "ruff>=0.4",
-    "maturin>=1.0",
-]
-[tool.maturin]
-features = ["pyo3/extension-module"]

package/src/db.rs DELETED Viewed

@@ -1,312 +0,0 @@
-use rusqlite::Connection;
-use std::collections::HashMap;
-use thiserror::Error;
-#[derive(Error, Debug)]
-pub enum DbError {
-    #[error("SQLite error: {0}")]
-    Sqlite(#[from] rusqlite::Error),
-    #[error("Schema mismatch: {0}")]
-    SchemaMismatch(String),
-    #[error("DDL parse error: {0}")]
-    DdlParse(String),
-}
-pub type Result<T> = std::result::Result<T, DbError>;
-pub struct Db {
-    conn: Connection,
-}
-impl Db {
-    pub fn new() -> Result<Self> {
-        let conn = Connection::open_in_memory()?;
-        Ok(Self { conn })
-    }
-    /// Create a table from a user-provided DDL statement.
-    /// Automatically injects internal tracking columns (_dirsql_file_path, _dirsql_row_index).
-    pub fn create_table(&self, ddl: &str) -> Result<()> {
-        let augmented = inject_tracking_columns(ddl)?;
-        self.conn.execute(&augmented, [])?;
-        Ok(())
-    }
-    /// Insert a row into a table.
-    /// `row` contains user-defined columns only. `file_path` and `row_index` are tracked internally.
-    pub fn insert_row(
-        &self,
-        table: &str,
-        row: &HashMap<String, Value>,
-        file_path: &str,
-        row_index: usize,
-    ) -> Result<()> {
-        let mut columns: Vec<String> = row.keys().cloned().collect();
-        columns.push("_dirsql_file_path".to_string());
-        columns.push("_dirsql_row_index".to_string());
-        let placeholders: Vec<String> = (1..=columns.len()).map(|i| format!("?{}", i)).collect();
-        let sql = format!(
-            "INSERT INTO {} ({}) VALUES ({})",
-            table,
-            columns.join(", "),
-            placeholders.join(", "),
-        );
-        let mut params: Vec<Box<dyn rusqlite::types::ToSql>> = row
-            .values()
-            .map(|v| Box::new(v.clone()) as Box<dyn rusqlite::types::ToSql>)
-            .collect();
-        params.push(Box::new(file_path.to_string()));
-        params.push(Box::new(row_index as i64));
-        let param_refs: Vec<&dyn rusqlite::types::ToSql> =
-            params.iter().map(|p| p.as_ref()).collect();
-        self.conn.execute(&sql, param_refs.as_slice())?;
-        Ok(())
-    }
-    /// Delete all rows that were produced by a given file path.
-    pub fn delete_rows_by_file(&self, table: &str, file_path: &str) -> Result<usize> {
-        let sql = format!("DELETE FROM {} WHERE _dirsql_file_path = ?1", table);
-        let count = self.conn.execute(&sql, [file_path])?;
-        Ok(count)
-    }
-    /// Query the database, returning rows as a list of column-name -> value maps.
-    /// Internal tracking columns (_dirsql_*) are excluded from results.
-    pub fn query(&self, sql: &str) -> Result<Vec<HashMap<String, Value>>> {
-        let mut stmt = self.conn.prepare(sql)?;
-        let column_names: Vec<String> = stmt.column_names().iter().map(|s| s.to_string()).collect();
-        let rows = stmt.query_map([], |row| {
-            let mut map = HashMap::new();
-            for (i, name) in column_names.iter().enumerate() {
-                if name.starts_with("_dirsql_") {
-                    continue;
-                }
-                let val: rusqlite::types::Value = row.get(i)?;
-                map.insert(name.clone(), Value::from(val));
-            }
-            Ok(map)
-        })?;
-        let mut results = Vec::new();
-        for row in rows {
-            results.push(row?);
-        }
-        Ok(results)
-    }
-}
-/// Inject _dirsql_file_path and _dirsql_row_index columns into a CREATE TABLE DDL statement.
-fn inject_tracking_columns(ddl: &str) -> Result<String> {
-    // Find the last closing paren in the DDL and insert our columns before it
-    let close_paren = ddl
-        .rfind(')')
-        .ok_or_else(|| DbError::DdlParse("DDL must contain a closing parenthesis".to_string()))?;
-    let before = &ddl[..close_paren];
-    let after = &ddl[close_paren..];
-    Ok(format!(
-        "{}, _dirsql_file_path TEXT NOT NULL, _dirsql_row_index INTEGER NOT NULL{}",
-        before, after
-    ))
-}
-/// A value that can be stored in SQLite.
-#[derive(Debug, Clone, PartialEq)]
-pub enum Value {
-    Null,
-    Integer(i64),
-    Real(f64),
-    Text(String),
-    Blob(Vec<u8>),
-}
-impl rusqlite::types::ToSql for Value {
-    fn to_sql(&self) -> rusqlite::Result<rusqlite::types::ToSqlOutput<'_>> {
-        match self {
-            Value::Null => Ok(rusqlite::types::ToSqlOutput::Owned(
-                rusqlite::types::Value::Null,
-            )),
-            Value::Integer(i) => Ok(rusqlite::types::ToSqlOutput::Owned(
-                rusqlite::types::Value::Integer(*i),
-            )),
-            Value::Real(f) => Ok(rusqlite::types::ToSqlOutput::Owned(
-                rusqlite::types::Value::Real(*f),
-            )),
-            Value::Text(s) => Ok(rusqlite::types::ToSqlOutput::Owned(
-                rusqlite::types::Value::Text(s.clone()),
-            )),
-            Value::Blob(b) => Ok(rusqlite::types::ToSqlOutput::Owned(
-                rusqlite::types::Value::Blob(b.clone()),
-            )),
-        }
-    }
-}
-impl From<rusqlite::types::Value> for Value {
-    fn from(v: rusqlite::types::Value) -> Self {
-        match v {
-            rusqlite::types::Value::Null => Value::Null,
-            rusqlite::types::Value::Integer(i) => Value::Integer(i),
-            rusqlite::types::Value::Real(f) => Value::Real(f),
-            rusqlite::types::Value::Text(s) => Value::Text(s),
-            rusqlite::types::Value::Blob(b) => Value::Blob(b),
-        }
-    }
-}
-#[cfg(test)]
-mod tests {
-    use super::*;
-    #[test]
-    fn create_table_from_ddl() {
-        let db = Db::new().unwrap();
-        db.create_table("CREATE TABLE comments (id TEXT PRIMARY KEY, body TEXT, resolved INTEGER)")
-            .unwrap();
-        // Table should exist -- querying it should return empty results
-        let rows = db.query("SELECT * FROM comments").unwrap();
-        assert_eq!(rows.len(), 0);
-    }
-    #[test]
-    fn create_table_invalid_ddl_returns_error() {
-        let db = Db::new().unwrap();
-        let result = db.create_table("NOT VALID SQL");
-        assert!(result.is_err());
-    }
-    #[test]
-    fn create_table_injects_tracking_columns() {
-        let db = Db::new().unwrap();
-        db.create_table("CREATE TABLE t (id TEXT)").unwrap();
-        // The tracking columns should exist even though the user didn't declare them
-        db.insert_row(
-            "t",
-            &HashMap::from([("id".into(), Value::Text("1".into()))]),
-            "test.json",
-            0,
-        )
-        .unwrap();
-        // SELECT * should NOT return tracking columns
-        let rows = db.query("SELECT * FROM t").unwrap();
-        assert_eq!(rows.len(), 1);
-        assert!(rows[0].contains_key("id"));
-        assert!(!rows[0].contains_key("_dirsql_file_path"));
-        assert!(!rows[0].contains_key("_dirsql_row_index"));
-    }
-    #[test]
-    fn insert_and_query_rows() {
-        let db = Db::new().unwrap();
-        db.create_table("CREATE TABLE docs (title TEXT, draft INTEGER)")
-            .unwrap();
-        let row = HashMap::from([
-            ("title".into(), Value::Text("Hello".into())),
-            ("draft".into(), Value::Integer(0)),
-        ]);
-        db.insert_row("docs", &row, "docs/hello.md", 0).unwrap();
-        let results = db.query("SELECT title, draft FROM docs").unwrap();
-        assert_eq!(results.len(), 1);
-        assert_eq!(results[0]["title"], Value::Text("Hello".into()));
-        assert_eq!(results[0]["draft"], Value::Integer(0));
-    }
-    #[test]
-    fn insert_multiple_rows_from_same_file() {
-        let db = Db::new().unwrap();
-        db.create_table("CREATE TABLE events (action TEXT, ts INTEGER)")
-            .unwrap();
-        for (i, action) in ["created", "resolved", "reopened"].iter().enumerate() {
-            let row = HashMap::from([
-                ("action".into(), Value::Text(action.to_string())),
-                ("ts".into(), Value::Integer(i as i64)),
-            ]);
-            db.insert_row("events", &row, "thread.jsonl", i).unwrap();
-        }
-        let results = db.query("SELECT action FROM events ORDER BY ts").unwrap();
-        assert_eq!(results.len(), 3);
-        assert_eq!(results[0]["action"], Value::Text("created".into()));
-        assert_eq!(results[2]["action"], Value::Text("reopened".into()));
-    }
-    #[test]
-    fn delete_rows_by_file_path() {
-        let db = Db::new().unwrap();
-        db.create_table("CREATE TABLE comments (id TEXT, body TEXT)")
-            .unwrap();
-        // Insert rows from two different files
-        for (i, (id, file)) in [("1", "a.jsonl"), ("2", "a.jsonl"), ("3", "b.jsonl")]
-            .iter()
-            .enumerate()
-        {
-            let row = HashMap::from([
-                ("id".into(), Value::Text(id.to_string())),
-                ("body".into(), Value::Text("text".into())),
-            ]);
-            db.insert_row("comments", &row, file, i).unwrap();
-        }
-        // Delete rows from file "a.jsonl"
-        let deleted = db.delete_rows_by_file("comments", "a.jsonl").unwrap();
-        assert_eq!(deleted, 2);
-        // Only file b's row remains
-        let results = db.query("SELECT id FROM comments").unwrap();
-        assert_eq!(results.len(), 1);
-        assert_eq!(results[0]["id"], Value::Text("3".into()));
-    }
-    #[test]
-    fn query_with_where_clause() {
-        let db = Db::new().unwrap();
-        db.create_table("CREATE TABLE items (name TEXT, count INTEGER)")
-            .unwrap();
-        for (i, (name, count)) in [("apple", 5), ("banana", 0), ("cherry", 3)]
-            .iter()
-            .enumerate()
-        {
-            let row = HashMap::from([
-                ("name".into(), Value::Text(name.to_string())),
-                ("count".into(), Value::Integer(*count)),
-            ]);
-            db.insert_row("items", &row, "items.json", i).unwrap();
-        }
-        let results = db
-            .query("SELECT name FROM items WHERE count > 0 ORDER BY name")
-            .unwrap();
-        assert_eq!(results.len(), 2);
-        assert_eq!(results[0]["name"], Value::Text("apple".into()));
-        assert_eq!(results[1]["name"], Value::Text("cherry".into()));
-    }
-    #[test]
-    fn inject_tracking_columns_modifies_ddl() {
-        let result = inject_tracking_columns("CREATE TABLE t (id TEXT)").unwrap();
-        assert!(result.contains("_dirsql_file_path TEXT NOT NULL"));
-        assert!(result.contains("_dirsql_row_index INTEGER NOT NULL"));
-    }
-    #[test]
-    fn inject_tracking_columns_rejects_missing_paren() {
-        let result = inject_tracking_columns("NOT A CREATE TABLE");
-        assert!(result.is_err());
-    }
-}