dirsql 0.0.1 → 0.0.99-test.1775729890

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/differ.rs DELETED
@@ -1,372 +0,0 @@
1
- use std::collections::HashMap;
2
- use std::path::PathBuf;
3
-
4
- use crate::db::Value;
5
-
6
- /// Events produced by comparing old and new file content.
7
- #[derive(Debug, Clone, PartialEq)]
8
- pub enum RowEvent {
9
- Insert {
10
- table: String,
11
- row: HashMap<String, Value>,
12
- },
13
- Update {
14
- table: String,
15
- old_row: HashMap<String, Value>,
16
- new_row: HashMap<String, Value>,
17
- },
18
- Delete {
19
- table: String,
20
- row: HashMap<String, Value>,
21
- },
22
- Error {
23
- file_path: PathBuf,
24
- error: String,
25
- },
26
- }
27
-
28
- /// Diff old and new file content to produce minimal row events.
29
- ///
30
- /// - `table`: the target table name
31
- /// - `old`: previous row content (None if file is new)
32
- /// - `new`: current row content (None if file was deleted)
33
- /// - `file_path`: the file path (used in Error events)
34
- ///
35
- /// For multi-row files (JSONL), uses line-index-based identity:
36
- /// - Unchanged lines produce no events
37
- /// - Changed lines produce Update events
38
- /// - Additional lines at the end produce Insert events
39
- /// - If the file shrunk or more than half the rows changed, does a full replace
40
- ///
41
- /// For single-row files, compares the single row directly.
42
- pub fn diff(
43
- table: &str,
44
- old: Option<&[HashMap<String, Value>]>,
45
- new: Option<&[HashMap<String, Value>]>,
46
- _file_path: &str,
47
- ) -> Vec<RowEvent> {
48
- match (old, new) {
49
- (None, None) => Vec::new(),
50
- (None, Some(new_rows)) => new_rows
51
- .iter()
52
- .map(|r| RowEvent::Insert {
53
- table: table.to_string(),
54
- row: r.clone(),
55
- })
56
- .collect(),
57
- (Some(old_rows), None) => old_rows
58
- .iter()
59
- .map(|r| RowEvent::Delete {
60
- table: table.to_string(),
61
- row: r.clone(),
62
- })
63
- .collect(),
64
- (Some(old_rows), Some(new_rows)) => diff_rows(table, old_rows, new_rows),
65
- }
66
- }
67
-
68
- /// Compare old and new row slices and produce minimal events.
69
- fn diff_rows(
70
- table: &str,
71
- old_rows: &[HashMap<String, Value>],
72
- new_rows: &[HashMap<String, Value>],
73
- ) -> Vec<RowEvent> {
74
- // If file shrunk, do full replace
75
- if new_rows.len() < old_rows.len() {
76
- return full_replace(table, old_rows, new_rows);
77
- }
78
-
79
- // Compare overlapping rows line by line
80
- let overlap = old_rows.len();
81
- let mut changed = 0;
82
- let mut events = Vec::new();
83
-
84
- for i in 0..overlap {
85
- if old_rows[i] != new_rows[i] {
86
- changed += 1;
87
- }
88
- }
89
-
90
- // For multi-row files, if more than half of overlapping rows changed, full replace.
91
- // Single-row files (overlap == 1) never trigger full replace -- they use Update.
92
- if overlap > 1 && changed * 2 > overlap {
93
- return full_replace(table, old_rows, new_rows);
94
- }
95
-
96
- // Emit Update events for changed lines
97
- for i in 0..overlap {
98
- if old_rows[i] != new_rows[i] {
99
- events.push(RowEvent::Update {
100
- table: table.to_string(),
101
- old_row: old_rows[i].clone(),
102
- new_row: new_rows[i].clone(),
103
- });
104
- }
105
- }
106
-
107
- // Emit Insert events for appended lines
108
- for row in &new_rows[overlap..] {
109
- events.push(RowEvent::Insert {
110
- table: table.to_string(),
111
- row: row.clone(),
112
- });
113
- }
114
-
115
- events
116
- }
117
-
118
- /// Full replace: delete all old rows, then insert all new rows.
119
- fn full_replace(
120
- table: &str,
121
- old_rows: &[HashMap<String, Value>],
122
- new_rows: &[HashMap<String, Value>],
123
- ) -> Vec<RowEvent> {
124
- let mut events = Vec::with_capacity(old_rows.len() + new_rows.len());
125
- for row in old_rows {
126
- events.push(RowEvent::Delete {
127
- table: table.to_string(),
128
- row: row.clone(),
129
- });
130
- }
131
- for row in new_rows {
132
- events.push(RowEvent::Insert {
133
- table: table.to_string(),
134
- row: row.clone(),
135
- });
136
- }
137
- events
138
- }
139
-
140
- #[cfg(test)]
141
- mod tests {
142
- use super::*;
143
-
144
- fn row(pairs: &[(&str, Value)]) -> HashMap<String, Value> {
145
- pairs
146
- .iter()
147
- .map(|(k, v)| (k.to_string(), v.clone()))
148
- .collect()
149
- }
150
-
151
- fn text(s: &str) -> Value {
152
- Value::Text(s.to_string())
153
- }
154
-
155
- fn int(i: i64) -> Value {
156
- Value::Integer(i)
157
- }
158
-
159
- // --- All inserts (file created) ---
160
-
161
- #[test]
162
- fn all_inserts_when_old_is_none() {
163
- let rows = vec![
164
- row(&[("name", text("alice")), ("age", int(30))]),
165
- row(&[("name", text("bob")), ("age", int(25))]),
166
- ];
167
- let events = diff("users", None, Some(&rows), "users.jsonl");
168
- assert_eq!(events.len(), 2);
169
- assert!(
170
- matches!(&events[0], RowEvent::Insert { table, row } if table == "users" && row["name"] == text("alice"))
171
- );
172
- assert!(
173
- matches!(&events[1], RowEvent::Insert { table, row } if table == "users" && row["name"] == text("bob"))
174
- );
175
- }
176
-
177
- // --- All deletes (file deleted) ---
178
-
179
- #[test]
180
- fn all_deletes_when_new_is_none() {
181
- let rows = vec![row(&[("id", text("1"))]), row(&[("id", text("2"))])];
182
- let events = diff("items", Some(&rows), None, "items.jsonl");
183
- assert_eq!(events.len(), 2);
184
- assert!(
185
- matches!(&events[0], RowEvent::Delete { table, row } if table == "items" && row["id"] == text("1"))
186
- );
187
- assert!(
188
- matches!(&events[1], RowEvent::Delete { table, row } if table == "items" && row["id"] == text("2"))
189
- );
190
- }
191
-
192
- // --- No changes ---
193
-
194
- #[test]
195
- fn no_events_when_content_identical() {
196
- let rows = vec![row(&[("x", int(1))]), row(&[("x", int(2))])];
197
- let events = diff("t", Some(&rows), Some(&rows), "t.jsonl");
198
- assert!(events.is_empty());
199
- }
200
-
201
- // --- Single line change ---
202
-
203
- #[test]
204
- fn update_event_for_changed_line() {
205
- let old = vec![
206
- row(&[("val", text("a"))]),
207
- row(&[("val", text("b"))]),
208
- row(&[("val", text("c"))]),
209
- ];
210
- let new = vec![
211
- row(&[("val", text("a"))]),
212
- row(&[("val", text("B"))]),
213
- row(&[("val", text("c"))]),
214
- ];
215
- let events = diff("t", Some(&old), Some(&new), "t.jsonl");
216
- assert_eq!(events.len(), 1);
217
- assert!(
218
- matches!(&events[0], RowEvent::Update { table, old_row, new_row }
219
- if table == "t" && old_row["val"] == text("b") && new_row["val"] == text("B"))
220
- );
221
- }
222
-
223
- // --- Append new lines ---
224
-
225
- #[test]
226
- fn insert_events_for_appended_lines() {
227
- let old = vec![row(&[("id", int(1))])];
228
- let new = vec![
229
- row(&[("id", int(1))]),
230
- row(&[("id", int(2))]),
231
- row(&[("id", int(3))]),
232
- ];
233
- let events = diff("t", Some(&old), Some(&new), "t.jsonl");
234
- assert_eq!(events.len(), 2);
235
- assert!(
236
- matches!(&events[0], RowEvent::Insert { table, row } if table == "t" && row["id"] == int(2))
237
- );
238
- assert!(
239
- matches!(&events[1], RowEvent::Insert { table, row } if table == "t" && row["id"] == int(3))
240
- );
241
- }
242
-
243
- // --- Full replace on shrink ---
244
-
245
- #[test]
246
- fn full_replace_when_file_shrinks() {
247
- let old = vec![
248
- row(&[("id", int(1))]),
249
- row(&[("id", int(2))]),
250
- row(&[("id", int(3))]),
251
- ];
252
- let new = vec![row(&[("id", int(1))])];
253
- let events = diff("t", Some(&old), Some(&new), "t.jsonl");
254
- // Should be 3 deletes + 1 insert = 4 events
255
- assert_eq!(events.len(), 4);
256
- let deletes: Vec<_> = events
257
- .iter()
258
- .filter(|e| matches!(e, RowEvent::Delete { .. }))
259
- .collect();
260
- let inserts: Vec<_> = events
261
- .iter()
262
- .filter(|e| matches!(e, RowEvent::Insert { .. }))
263
- .collect();
264
- assert_eq!(deletes.len(), 3);
265
- assert_eq!(inserts.len(), 1);
266
- }
267
-
268
- // --- Full replace on heavy modification ---
269
-
270
- #[test]
271
- fn full_replace_when_more_than_half_changed() {
272
- let old = vec![
273
- row(&[("v", text("a"))]),
274
- row(&[("v", text("b"))]),
275
- row(&[("v", text("c"))]),
276
- row(&[("v", text("d"))]),
277
- ];
278
- // 3 out of 4 changed = 75% > 50%, triggers full replace
279
- let new = vec![
280
- row(&[("v", text("A"))]),
281
- row(&[("v", text("B"))]),
282
- row(&[("v", text("C"))]),
283
- row(&[("v", text("d"))]),
284
- ];
285
- let events = diff("t", Some(&old), Some(&new), "t.jsonl");
286
- let deletes: Vec<_> = events
287
- .iter()
288
- .filter(|e| matches!(e, RowEvent::Delete { .. }))
289
- .collect();
290
- let inserts: Vec<_> = events
291
- .iter()
292
- .filter(|e| matches!(e, RowEvent::Insert { .. }))
293
- .collect();
294
- // Full replace: 4 deletes + 4 inserts
295
- assert_eq!(deletes.len(), 4);
296
- assert_eq!(inserts.len(), 4);
297
- }
298
-
299
- // --- Single-row file: update ---
300
-
301
- #[test]
302
- fn single_row_update() {
303
- let old = vec![row(&[("title", text("Draft"))])];
304
- let new = vec![row(&[("title", text("Final"))])];
305
- let events = diff("docs", Some(&old), Some(&new), "doc.json");
306
- assert_eq!(events.len(), 1);
307
- assert!(
308
- matches!(&events[0], RowEvent::Update { table, old_row, new_row }
309
- if table == "docs" && old_row["title"] == text("Draft") && new_row["title"] == text("Final"))
310
- );
311
- }
312
-
313
- // --- Single-row file: no change ---
314
-
315
- #[test]
316
- fn single_row_no_change() {
317
- let rows = vec![row(&[("title", text("Same"))])];
318
- let events = diff("docs", Some(&rows), Some(&rows), "doc.json");
319
- assert!(events.is_empty());
320
- }
321
-
322
- // --- Both None ---
323
-
324
- #[test]
325
- fn no_events_when_both_none() {
326
- let events = diff("t", None, None, "gone.json");
327
- assert!(events.is_empty());
328
- }
329
-
330
- // --- Exactly half changed should NOT trigger full replace ---
331
-
332
- #[test]
333
- fn no_full_replace_when_exactly_half_changed() {
334
- let old = vec![
335
- row(&[("v", text("a"))]),
336
- row(&[("v", text("b"))]),
337
- row(&[("v", text("c"))]),
338
- row(&[("v", text("d"))]),
339
- ];
340
- // 2 out of 4 changed = 50%, should NOT trigger full replace
341
- let new = vec![
342
- row(&[("v", text("A"))]),
343
- row(&[("v", text("B"))]),
344
- row(&[("v", text("c"))]),
345
- row(&[("v", text("d"))]),
346
- ];
347
- let events = diff("t", Some(&old), Some(&new), "t.jsonl");
348
- // Should be 2 Update events, not a full replace
349
- assert_eq!(events.len(), 2);
350
- assert!(events.iter().all(|e| matches!(e, RowEvent::Update { .. })));
351
- }
352
-
353
- // --- Full replace: deletes come before inserts ---
354
-
355
- #[test]
356
- fn full_replace_deletes_before_inserts() {
357
- let old = vec![row(&[("id", int(1))]), row(&[("id", int(2))])];
358
- let new = vec![row(&[("id", int(3))])];
359
- let events = diff("t", Some(&old), Some(&new), "t.jsonl");
360
- // Find the index of the last delete and first insert
361
- let last_delete = events
362
- .iter()
363
- .rposition(|e| matches!(e, RowEvent::Delete { .. }));
364
- let first_insert = events
365
- .iter()
366
- .position(|e| matches!(e, RowEvent::Insert { .. }));
367
- assert!(
368
- last_delete.unwrap() < first_insert.unwrap(),
369
- "Deletes should come before inserts in full replace"
370
- );
371
- }
372
- }
package/src/lib.rs DELETED
@@ -1,15 +0,0 @@
1
- pub mod db;
2
- pub mod differ;
3
- pub mod matcher;
4
- pub mod scanner;
5
- pub mod watcher;
6
-
7
- #[cfg(feature = "extension-module")]
8
- use pyo3::prelude::*;
9
-
10
- #[cfg(feature = "extension-module")]
11
- #[pymodule]
12
- fn dirsql(m: &Bound<'_, PyModule>) -> PyResult<()> {
13
- m.add("__version__", env!("CARGO_PKG_VERSION"))?;
14
- Ok(())
15
- }
package/src/matcher.rs DELETED
@@ -1,116 +0,0 @@
1
- use globset::{Glob, GlobSet, GlobSetBuilder};
2
- use std::path::Path;
3
-
4
- /// Maps file paths to table names based on glob patterns.
5
- /// First matching pattern wins. An ignore list filters paths entirely.
6
- pub struct TableMatcher {
7
- table_globs: Vec<(GlobSet, String)>,
8
- ignore_set: GlobSet,
9
- }
10
-
11
- impl TableMatcher {
12
- /// Build a new matcher from (glob_pattern, table_name) pairs and ignore patterns.
13
- pub fn new(
14
- mappings: &[(&str, &str)],
15
- ignore_patterns: &[&str],
16
- ) -> Result<Self, globset::Error> {
17
- let mut table_globs = Vec::new();
18
- for (pattern, table_name) in mappings {
19
- let mut builder = GlobSetBuilder::new();
20
- builder.add(Glob::new(pattern)?);
21
- table_globs.push((builder.build()?, table_name.to_string()));
22
- }
23
-
24
- let mut ignore_builder = GlobSetBuilder::new();
25
- for pattern in ignore_patterns {
26
- ignore_builder.add(Glob::new(pattern)?);
27
- }
28
- let ignore_set = ignore_builder.build()?;
29
-
30
- Ok(Self {
31
- table_globs,
32
- ignore_set,
33
- })
34
- }
35
-
36
- /// Returns the table name for a file path, or None if no pattern matches.
37
- pub fn match_file(&self, path: &Path) -> Option<&str> {
38
- for (glob_set, table_name) in &self.table_globs {
39
- if glob_set.is_match(path) {
40
- return Some(table_name.as_str());
41
- }
42
- }
43
- None
44
- }
45
-
46
- /// Returns true if the path matches any ignore pattern.
47
- pub fn is_ignored(&self, path: &Path) -> bool {
48
- self.ignore_set.is_match(path)
49
- }
50
- }
51
-
52
- #[cfg(test)]
53
- mod tests {
54
- use super::*;
55
-
56
- #[test]
57
- fn match_file_returns_table_for_matching_glob() {
58
- let matcher = TableMatcher::new(&[("*.csv", "data")], &[]).unwrap();
59
- assert_eq!(matcher.match_file(Path::new("report.csv")), Some("data"));
60
- }
61
-
62
- #[test]
63
- fn match_file_returns_none_for_no_match() {
64
- let matcher = TableMatcher::new(&[("*.csv", "data")], &[]).unwrap();
65
- assert_eq!(matcher.match_file(Path::new("readme.md")), None);
66
- }
67
-
68
- #[test]
69
- fn first_matching_pattern_wins() {
70
- let matcher = TableMatcher::new(
71
- &[("*.json", "json_table"), ("data/*.json", "data_table")],
72
- &[],
73
- )
74
- .unwrap();
75
- // "data/foo.json" matches *.json first
76
- assert_eq!(
77
- matcher.match_file(Path::new("data/foo.json")),
78
- Some("json_table")
79
- );
80
- }
81
-
82
- #[test]
83
- fn match_file_with_nested_path() {
84
- let matcher = TableMatcher::new(&[("**/*.jsonl", "events")], &[]).unwrap();
85
- assert_eq!(
86
- matcher.match_file(Path::new("logs/2024/events.jsonl")),
87
- Some("events")
88
- );
89
- }
90
-
91
- #[test]
92
- fn is_ignored_returns_true_for_matching_pattern() {
93
- let matcher = TableMatcher::new(&[], &["*.tmp", ".git/**"]).unwrap();
94
- assert!(matcher.is_ignored(Path::new("scratch.tmp")));
95
- assert!(matcher.is_ignored(Path::new(".git/config")));
96
- }
97
-
98
- #[test]
99
- fn is_ignored_returns_false_for_non_matching_path() {
100
- let matcher = TableMatcher::new(&[], &["*.tmp"]).unwrap();
101
- assert!(!matcher.is_ignored(Path::new("data.csv")));
102
- }
103
-
104
- #[test]
105
- fn empty_matcher_matches_nothing() {
106
- let matcher = TableMatcher::new(&[], &[]).unwrap();
107
- assert_eq!(matcher.match_file(Path::new("anything.txt")), None);
108
- assert!(!matcher.is_ignored(Path::new("anything.txt")));
109
- }
110
-
111
- #[test]
112
- fn invalid_glob_returns_error() {
113
- let result = TableMatcher::new(&[("[invalid", "t")], &[]);
114
- assert!(result.is_err());
115
- }
116
- }
package/src/scanner.rs DELETED
@@ -1,100 +0,0 @@
1
- use crate::matcher::TableMatcher;
2
- use std::path::{Path, PathBuf};
3
- use walkdir::WalkDir;
4
-
5
- /// Walk a directory tree and return all file paths paired with their matching table name.
6
- /// Ignored paths and directories are skipped. Only files (not directories) are returned.
7
- pub fn scan_directory(root: &Path, matcher: &TableMatcher) -> Vec<(PathBuf, String)> {
8
- let mut results = Vec::new();
9
-
10
- for entry in WalkDir::new(root).into_iter().filter_map(|e| e.ok()) {
11
- let path = entry.path();
12
-
13
- if matcher.is_ignored(path) {
14
- continue;
15
- }
16
-
17
- if !entry.file_type().is_file() {
18
- continue;
19
- }
20
-
21
- if let Some(table_name) = matcher.match_file(path) {
22
- results.push((path.to_path_buf(), table_name.to_string()));
23
- }
24
- }
25
-
26
- results
27
- }
28
-
29
- #[cfg(test)]
30
- mod tests {
31
- use super::*;
32
- use std::fs;
33
- use tempfile::TempDir;
34
-
35
- #[test]
36
- fn scan_finds_matching_files() {
37
- let dir = TempDir::new().unwrap();
38
- fs::write(dir.path().join("data.csv"), "a,b\n1,2").unwrap();
39
- fs::write(dir.path().join("readme.md"), "# hi").unwrap();
40
-
41
- let matcher = TableMatcher::new(&[("**/*.csv", "csv_table")], &[]).unwrap();
42
- let results = scan_directory(dir.path(), &matcher);
43
-
44
- assert_eq!(results.len(), 1);
45
- assert!(results[0].0.ends_with("data.csv"));
46
- assert_eq!(results[0].1, "csv_table");
47
- }
48
-
49
- #[test]
50
- fn scan_skips_ignored_files() {
51
- let dir = TempDir::new().unwrap();
52
- fs::write(dir.path().join("data.csv"), "a,b").unwrap();
53
- fs::write(dir.path().join("data.tmp"), "junk").unwrap();
54
-
55
- let matcher =
56
- TableMatcher::new(&[("**/*.csv", "t"), ("**/*.tmp", "t2")], &["**/*.tmp"]).unwrap();
57
- let results = scan_directory(dir.path(), &matcher);
58
-
59
- assert_eq!(results.len(), 1);
60
- assert!(results[0].0.ends_with("data.csv"));
61
- }
62
-
63
- #[test]
64
- fn scan_recurses_into_subdirectories() {
65
- let dir = TempDir::new().unwrap();
66
- let sub = dir.path().join("nested").join("deep");
67
- fs::create_dir_all(&sub).unwrap();
68
- fs::write(sub.join("events.jsonl"), "{}").unwrap();
69
-
70
- let matcher = TableMatcher::new(&[("**/*.jsonl", "events")], &[]).unwrap();
71
- let results = scan_directory(dir.path(), &matcher);
72
-
73
- assert_eq!(results.len(), 1);
74
- assert!(results[0].0.ends_with("events.jsonl"));
75
- assert_eq!(results[0].1, "events");
76
- }
77
-
78
- #[test]
79
- fn scan_returns_empty_for_no_matches() {
80
- let dir = TempDir::new().unwrap();
81
- fs::write(dir.path().join("readme.md"), "# hi").unwrap();
82
-
83
- let matcher = TableMatcher::new(&[("**/*.csv", "t")], &[]).unwrap();
84
- let results = scan_directory(dir.path(), &matcher);
85
-
86
- assert!(results.is_empty());
87
- }
88
-
89
- #[test]
90
- fn scan_skips_directories() {
91
- let dir = TempDir::new().unwrap();
92
- // Create a directory that matches the glob -- it should not appear in results
93
- fs::create_dir(dir.path().join("data.csv")).unwrap();
94
-
95
- let matcher = TableMatcher::new(&[("**/*.csv", "t")], &[]).unwrap();
96
- let results = scan_directory(dir.path(), &matcher);
97
-
98
- assert!(results.is_empty());
99
- }
100
- }