dirsql 0.0.1 → 0.0.99-test.1775729890
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +5 -2
- package/.claude/CLAUDE.md +0 -120
- package/.github/workflows/minor-release.yml +0 -14
- package/.github/workflows/patch-release.yml +0 -45
- package/.github/workflows/pr-monitor.yml +0 -16
- package/.github/workflows/publish.yml +0 -306
- package/.github/workflows/python-lint.yml +0 -35
- package/.github/workflows/python-test.yml +0 -45
- package/.github/workflows/rust-test.yml +0 -41
- package/Cargo.lock +0 -851
- package/Cargo.toml +0 -27
- package/SUMMARY.md +0 -62
- package/justfile +0 -53
- package/pyproject.toml +0 -27
- package/src/db.rs +0 -312
- package/src/differ.rs +0 -372
- package/src/lib.rs +0 -15
- package/src/matcher.rs +0 -116
- package/src/scanner.rs +0 -100
- package/src/watcher.rs +0 -227
package/src/differ.rs
DELETED
|
@@ -1,372 +0,0 @@
|
|
|
1
|
-
use std::collections::HashMap;
|
|
2
|
-
use std::path::PathBuf;
|
|
3
|
-
|
|
4
|
-
use crate::db::Value;
|
|
5
|
-
|
|
6
|
-
/// Events produced by comparing old and new file content.
|
|
7
|
-
#[derive(Debug, Clone, PartialEq)]
|
|
8
|
-
pub enum RowEvent {
|
|
9
|
-
Insert {
|
|
10
|
-
table: String,
|
|
11
|
-
row: HashMap<String, Value>,
|
|
12
|
-
},
|
|
13
|
-
Update {
|
|
14
|
-
table: String,
|
|
15
|
-
old_row: HashMap<String, Value>,
|
|
16
|
-
new_row: HashMap<String, Value>,
|
|
17
|
-
},
|
|
18
|
-
Delete {
|
|
19
|
-
table: String,
|
|
20
|
-
row: HashMap<String, Value>,
|
|
21
|
-
},
|
|
22
|
-
Error {
|
|
23
|
-
file_path: PathBuf,
|
|
24
|
-
error: String,
|
|
25
|
-
},
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
/// Diff old and new file content to produce minimal row events.
|
|
29
|
-
///
|
|
30
|
-
/// - `table`: the target table name
|
|
31
|
-
/// - `old`: previous row content (None if file is new)
|
|
32
|
-
/// - `new`: current row content (None if file was deleted)
|
|
33
|
-
/// - `file_path`: the file path (used in Error events)
|
|
34
|
-
///
|
|
35
|
-
/// For multi-row files (JSONL), uses line-index-based identity:
|
|
36
|
-
/// - Unchanged lines produce no events
|
|
37
|
-
/// - Changed lines produce Update events
|
|
38
|
-
/// - Additional lines at the end produce Insert events
|
|
39
|
-
/// - If the file shrunk or more than half the rows changed, does a full replace
|
|
40
|
-
///
|
|
41
|
-
/// For single-row files, compares the single row directly.
|
|
42
|
-
pub fn diff(
|
|
43
|
-
table: &str,
|
|
44
|
-
old: Option<&[HashMap<String, Value>]>,
|
|
45
|
-
new: Option<&[HashMap<String, Value>]>,
|
|
46
|
-
_file_path: &str,
|
|
47
|
-
) -> Vec<RowEvent> {
|
|
48
|
-
match (old, new) {
|
|
49
|
-
(None, None) => Vec::new(),
|
|
50
|
-
(None, Some(new_rows)) => new_rows
|
|
51
|
-
.iter()
|
|
52
|
-
.map(|r| RowEvent::Insert {
|
|
53
|
-
table: table.to_string(),
|
|
54
|
-
row: r.clone(),
|
|
55
|
-
})
|
|
56
|
-
.collect(),
|
|
57
|
-
(Some(old_rows), None) => old_rows
|
|
58
|
-
.iter()
|
|
59
|
-
.map(|r| RowEvent::Delete {
|
|
60
|
-
table: table.to_string(),
|
|
61
|
-
row: r.clone(),
|
|
62
|
-
})
|
|
63
|
-
.collect(),
|
|
64
|
-
(Some(old_rows), Some(new_rows)) => diff_rows(table, old_rows, new_rows),
|
|
65
|
-
}
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
/// Compare old and new row slices and produce minimal events.
|
|
69
|
-
fn diff_rows(
|
|
70
|
-
table: &str,
|
|
71
|
-
old_rows: &[HashMap<String, Value>],
|
|
72
|
-
new_rows: &[HashMap<String, Value>],
|
|
73
|
-
) -> Vec<RowEvent> {
|
|
74
|
-
// If file shrunk, do full replace
|
|
75
|
-
if new_rows.len() < old_rows.len() {
|
|
76
|
-
return full_replace(table, old_rows, new_rows);
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
// Compare overlapping rows line by line
|
|
80
|
-
let overlap = old_rows.len();
|
|
81
|
-
let mut changed = 0;
|
|
82
|
-
let mut events = Vec::new();
|
|
83
|
-
|
|
84
|
-
for i in 0..overlap {
|
|
85
|
-
if old_rows[i] != new_rows[i] {
|
|
86
|
-
changed += 1;
|
|
87
|
-
}
|
|
88
|
-
}
|
|
89
|
-
|
|
90
|
-
// For multi-row files, if more than half of overlapping rows changed, full replace.
|
|
91
|
-
// Single-row files (overlap == 1) never trigger full replace -- they use Update.
|
|
92
|
-
if overlap > 1 && changed * 2 > overlap {
|
|
93
|
-
return full_replace(table, old_rows, new_rows);
|
|
94
|
-
}
|
|
95
|
-
|
|
96
|
-
// Emit Update events for changed lines
|
|
97
|
-
for i in 0..overlap {
|
|
98
|
-
if old_rows[i] != new_rows[i] {
|
|
99
|
-
events.push(RowEvent::Update {
|
|
100
|
-
table: table.to_string(),
|
|
101
|
-
old_row: old_rows[i].clone(),
|
|
102
|
-
new_row: new_rows[i].clone(),
|
|
103
|
-
});
|
|
104
|
-
}
|
|
105
|
-
}
|
|
106
|
-
|
|
107
|
-
// Emit Insert events for appended lines
|
|
108
|
-
for row in &new_rows[overlap..] {
|
|
109
|
-
events.push(RowEvent::Insert {
|
|
110
|
-
table: table.to_string(),
|
|
111
|
-
row: row.clone(),
|
|
112
|
-
});
|
|
113
|
-
}
|
|
114
|
-
|
|
115
|
-
events
|
|
116
|
-
}
|
|
117
|
-
|
|
118
|
-
/// Full replace: delete all old rows, then insert all new rows.
|
|
119
|
-
fn full_replace(
|
|
120
|
-
table: &str,
|
|
121
|
-
old_rows: &[HashMap<String, Value>],
|
|
122
|
-
new_rows: &[HashMap<String, Value>],
|
|
123
|
-
) -> Vec<RowEvent> {
|
|
124
|
-
let mut events = Vec::with_capacity(old_rows.len() + new_rows.len());
|
|
125
|
-
for row in old_rows {
|
|
126
|
-
events.push(RowEvent::Delete {
|
|
127
|
-
table: table.to_string(),
|
|
128
|
-
row: row.clone(),
|
|
129
|
-
});
|
|
130
|
-
}
|
|
131
|
-
for row in new_rows {
|
|
132
|
-
events.push(RowEvent::Insert {
|
|
133
|
-
table: table.to_string(),
|
|
134
|
-
row: row.clone(),
|
|
135
|
-
});
|
|
136
|
-
}
|
|
137
|
-
events
|
|
138
|
-
}
|
|
139
|
-
|
|
140
|
-
#[cfg(test)]
|
|
141
|
-
mod tests {
|
|
142
|
-
use super::*;
|
|
143
|
-
|
|
144
|
-
fn row(pairs: &[(&str, Value)]) -> HashMap<String, Value> {
|
|
145
|
-
pairs
|
|
146
|
-
.iter()
|
|
147
|
-
.map(|(k, v)| (k.to_string(), v.clone()))
|
|
148
|
-
.collect()
|
|
149
|
-
}
|
|
150
|
-
|
|
151
|
-
fn text(s: &str) -> Value {
|
|
152
|
-
Value::Text(s.to_string())
|
|
153
|
-
}
|
|
154
|
-
|
|
155
|
-
fn int(i: i64) -> Value {
|
|
156
|
-
Value::Integer(i)
|
|
157
|
-
}
|
|
158
|
-
|
|
159
|
-
// --- All inserts (file created) ---
|
|
160
|
-
|
|
161
|
-
#[test]
|
|
162
|
-
fn all_inserts_when_old_is_none() {
|
|
163
|
-
let rows = vec![
|
|
164
|
-
row(&[("name", text("alice")), ("age", int(30))]),
|
|
165
|
-
row(&[("name", text("bob")), ("age", int(25))]),
|
|
166
|
-
];
|
|
167
|
-
let events = diff("users", None, Some(&rows), "users.jsonl");
|
|
168
|
-
assert_eq!(events.len(), 2);
|
|
169
|
-
assert!(
|
|
170
|
-
matches!(&events[0], RowEvent::Insert { table, row } if table == "users" && row["name"] == text("alice"))
|
|
171
|
-
);
|
|
172
|
-
assert!(
|
|
173
|
-
matches!(&events[1], RowEvent::Insert { table, row } if table == "users" && row["name"] == text("bob"))
|
|
174
|
-
);
|
|
175
|
-
}
|
|
176
|
-
|
|
177
|
-
// --- All deletes (file deleted) ---
|
|
178
|
-
|
|
179
|
-
#[test]
|
|
180
|
-
fn all_deletes_when_new_is_none() {
|
|
181
|
-
let rows = vec![row(&[("id", text("1"))]), row(&[("id", text("2"))])];
|
|
182
|
-
let events = diff("items", Some(&rows), None, "items.jsonl");
|
|
183
|
-
assert_eq!(events.len(), 2);
|
|
184
|
-
assert!(
|
|
185
|
-
matches!(&events[0], RowEvent::Delete { table, row } if table == "items" && row["id"] == text("1"))
|
|
186
|
-
);
|
|
187
|
-
assert!(
|
|
188
|
-
matches!(&events[1], RowEvent::Delete { table, row } if table == "items" && row["id"] == text("2"))
|
|
189
|
-
);
|
|
190
|
-
}
|
|
191
|
-
|
|
192
|
-
// --- No changes ---
|
|
193
|
-
|
|
194
|
-
#[test]
|
|
195
|
-
fn no_events_when_content_identical() {
|
|
196
|
-
let rows = vec![row(&[("x", int(1))]), row(&[("x", int(2))])];
|
|
197
|
-
let events = diff("t", Some(&rows), Some(&rows), "t.jsonl");
|
|
198
|
-
assert!(events.is_empty());
|
|
199
|
-
}
|
|
200
|
-
|
|
201
|
-
// --- Single line change ---
|
|
202
|
-
|
|
203
|
-
#[test]
|
|
204
|
-
fn update_event_for_changed_line() {
|
|
205
|
-
let old = vec![
|
|
206
|
-
row(&[("val", text("a"))]),
|
|
207
|
-
row(&[("val", text("b"))]),
|
|
208
|
-
row(&[("val", text("c"))]),
|
|
209
|
-
];
|
|
210
|
-
let new = vec![
|
|
211
|
-
row(&[("val", text("a"))]),
|
|
212
|
-
row(&[("val", text("B"))]),
|
|
213
|
-
row(&[("val", text("c"))]),
|
|
214
|
-
];
|
|
215
|
-
let events = diff("t", Some(&old), Some(&new), "t.jsonl");
|
|
216
|
-
assert_eq!(events.len(), 1);
|
|
217
|
-
assert!(
|
|
218
|
-
matches!(&events[0], RowEvent::Update { table, old_row, new_row }
|
|
219
|
-
if table == "t" && old_row["val"] == text("b") && new_row["val"] == text("B"))
|
|
220
|
-
);
|
|
221
|
-
}
|
|
222
|
-
|
|
223
|
-
// --- Append new lines ---
|
|
224
|
-
|
|
225
|
-
#[test]
|
|
226
|
-
fn insert_events_for_appended_lines() {
|
|
227
|
-
let old = vec![row(&[("id", int(1))])];
|
|
228
|
-
let new = vec![
|
|
229
|
-
row(&[("id", int(1))]),
|
|
230
|
-
row(&[("id", int(2))]),
|
|
231
|
-
row(&[("id", int(3))]),
|
|
232
|
-
];
|
|
233
|
-
let events = diff("t", Some(&old), Some(&new), "t.jsonl");
|
|
234
|
-
assert_eq!(events.len(), 2);
|
|
235
|
-
assert!(
|
|
236
|
-
matches!(&events[0], RowEvent::Insert { table, row } if table == "t" && row["id"] == int(2))
|
|
237
|
-
);
|
|
238
|
-
assert!(
|
|
239
|
-
matches!(&events[1], RowEvent::Insert { table, row } if table == "t" && row["id"] == int(3))
|
|
240
|
-
);
|
|
241
|
-
}
|
|
242
|
-
|
|
243
|
-
// --- Full replace on shrink ---
|
|
244
|
-
|
|
245
|
-
#[test]
|
|
246
|
-
fn full_replace_when_file_shrinks() {
|
|
247
|
-
let old = vec![
|
|
248
|
-
row(&[("id", int(1))]),
|
|
249
|
-
row(&[("id", int(2))]),
|
|
250
|
-
row(&[("id", int(3))]),
|
|
251
|
-
];
|
|
252
|
-
let new = vec![row(&[("id", int(1))])];
|
|
253
|
-
let events = diff("t", Some(&old), Some(&new), "t.jsonl");
|
|
254
|
-
// Should be 3 deletes + 1 insert = 4 events
|
|
255
|
-
assert_eq!(events.len(), 4);
|
|
256
|
-
let deletes: Vec<_> = events
|
|
257
|
-
.iter()
|
|
258
|
-
.filter(|e| matches!(e, RowEvent::Delete { .. }))
|
|
259
|
-
.collect();
|
|
260
|
-
let inserts: Vec<_> = events
|
|
261
|
-
.iter()
|
|
262
|
-
.filter(|e| matches!(e, RowEvent::Insert { .. }))
|
|
263
|
-
.collect();
|
|
264
|
-
assert_eq!(deletes.len(), 3);
|
|
265
|
-
assert_eq!(inserts.len(), 1);
|
|
266
|
-
}
|
|
267
|
-
|
|
268
|
-
// --- Full replace on heavy modification ---
|
|
269
|
-
|
|
270
|
-
#[test]
|
|
271
|
-
fn full_replace_when_more_than_half_changed() {
|
|
272
|
-
let old = vec![
|
|
273
|
-
row(&[("v", text("a"))]),
|
|
274
|
-
row(&[("v", text("b"))]),
|
|
275
|
-
row(&[("v", text("c"))]),
|
|
276
|
-
row(&[("v", text("d"))]),
|
|
277
|
-
];
|
|
278
|
-
// 3 out of 4 changed = 75% > 50%, triggers full replace
|
|
279
|
-
let new = vec![
|
|
280
|
-
row(&[("v", text("A"))]),
|
|
281
|
-
row(&[("v", text("B"))]),
|
|
282
|
-
row(&[("v", text("C"))]),
|
|
283
|
-
row(&[("v", text("d"))]),
|
|
284
|
-
];
|
|
285
|
-
let events = diff("t", Some(&old), Some(&new), "t.jsonl");
|
|
286
|
-
let deletes: Vec<_> = events
|
|
287
|
-
.iter()
|
|
288
|
-
.filter(|e| matches!(e, RowEvent::Delete { .. }))
|
|
289
|
-
.collect();
|
|
290
|
-
let inserts: Vec<_> = events
|
|
291
|
-
.iter()
|
|
292
|
-
.filter(|e| matches!(e, RowEvent::Insert { .. }))
|
|
293
|
-
.collect();
|
|
294
|
-
// Full replace: 4 deletes + 4 inserts
|
|
295
|
-
assert_eq!(deletes.len(), 4);
|
|
296
|
-
assert_eq!(inserts.len(), 4);
|
|
297
|
-
}
|
|
298
|
-
|
|
299
|
-
// --- Single-row file: update ---
|
|
300
|
-
|
|
301
|
-
#[test]
|
|
302
|
-
fn single_row_update() {
|
|
303
|
-
let old = vec![row(&[("title", text("Draft"))])];
|
|
304
|
-
let new = vec![row(&[("title", text("Final"))])];
|
|
305
|
-
let events = diff("docs", Some(&old), Some(&new), "doc.json");
|
|
306
|
-
assert_eq!(events.len(), 1);
|
|
307
|
-
assert!(
|
|
308
|
-
matches!(&events[0], RowEvent::Update { table, old_row, new_row }
|
|
309
|
-
if table == "docs" && old_row["title"] == text("Draft") && new_row["title"] == text("Final"))
|
|
310
|
-
);
|
|
311
|
-
}
|
|
312
|
-
|
|
313
|
-
// --- Single-row file: no change ---
|
|
314
|
-
|
|
315
|
-
#[test]
|
|
316
|
-
fn single_row_no_change() {
|
|
317
|
-
let rows = vec![row(&[("title", text("Same"))])];
|
|
318
|
-
let events = diff("docs", Some(&rows), Some(&rows), "doc.json");
|
|
319
|
-
assert!(events.is_empty());
|
|
320
|
-
}
|
|
321
|
-
|
|
322
|
-
// --- Both None ---
|
|
323
|
-
|
|
324
|
-
#[test]
|
|
325
|
-
fn no_events_when_both_none() {
|
|
326
|
-
let events = diff("t", None, None, "gone.json");
|
|
327
|
-
assert!(events.is_empty());
|
|
328
|
-
}
|
|
329
|
-
|
|
330
|
-
// --- Exactly half changed should NOT trigger full replace ---
|
|
331
|
-
|
|
332
|
-
#[test]
|
|
333
|
-
fn no_full_replace_when_exactly_half_changed() {
|
|
334
|
-
let old = vec![
|
|
335
|
-
row(&[("v", text("a"))]),
|
|
336
|
-
row(&[("v", text("b"))]),
|
|
337
|
-
row(&[("v", text("c"))]),
|
|
338
|
-
row(&[("v", text("d"))]),
|
|
339
|
-
];
|
|
340
|
-
// 2 out of 4 changed = 50%, should NOT trigger full replace
|
|
341
|
-
let new = vec![
|
|
342
|
-
row(&[("v", text("A"))]),
|
|
343
|
-
row(&[("v", text("B"))]),
|
|
344
|
-
row(&[("v", text("c"))]),
|
|
345
|
-
row(&[("v", text("d"))]),
|
|
346
|
-
];
|
|
347
|
-
let events = diff("t", Some(&old), Some(&new), "t.jsonl");
|
|
348
|
-
// Should be 2 Update events, not a full replace
|
|
349
|
-
assert_eq!(events.len(), 2);
|
|
350
|
-
assert!(events.iter().all(|e| matches!(e, RowEvent::Update { .. })));
|
|
351
|
-
}
|
|
352
|
-
|
|
353
|
-
// --- Full replace: deletes come before inserts ---
|
|
354
|
-
|
|
355
|
-
#[test]
|
|
356
|
-
fn full_replace_deletes_before_inserts() {
|
|
357
|
-
let old = vec![row(&[("id", int(1))]), row(&[("id", int(2))])];
|
|
358
|
-
let new = vec![row(&[("id", int(3))])];
|
|
359
|
-
let events = diff("t", Some(&old), Some(&new), "t.jsonl");
|
|
360
|
-
// Find the index of the last delete and first insert
|
|
361
|
-
let last_delete = events
|
|
362
|
-
.iter()
|
|
363
|
-
.rposition(|e| matches!(e, RowEvent::Delete { .. }));
|
|
364
|
-
let first_insert = events
|
|
365
|
-
.iter()
|
|
366
|
-
.position(|e| matches!(e, RowEvent::Insert { .. }));
|
|
367
|
-
assert!(
|
|
368
|
-
last_delete.unwrap() < first_insert.unwrap(),
|
|
369
|
-
"Deletes should come before inserts in full replace"
|
|
370
|
-
);
|
|
371
|
-
}
|
|
372
|
-
}
|
package/src/lib.rs
DELETED
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
pub mod db;
|
|
2
|
-
pub mod differ;
|
|
3
|
-
pub mod matcher;
|
|
4
|
-
pub mod scanner;
|
|
5
|
-
pub mod watcher;
|
|
6
|
-
|
|
7
|
-
#[cfg(feature = "extension-module")]
|
|
8
|
-
use pyo3::prelude::*;
|
|
9
|
-
|
|
10
|
-
#[cfg(feature = "extension-module")]
|
|
11
|
-
#[pymodule]
|
|
12
|
-
fn dirsql(m: &Bound<'_, PyModule>) -> PyResult<()> {
|
|
13
|
-
m.add("__version__", env!("CARGO_PKG_VERSION"))?;
|
|
14
|
-
Ok(())
|
|
15
|
-
}
|
package/src/matcher.rs
DELETED
|
@@ -1,116 +0,0 @@
|
|
|
1
|
-
use globset::{Glob, GlobSet, GlobSetBuilder};
|
|
2
|
-
use std::path::Path;
|
|
3
|
-
|
|
4
|
-
/// Maps file paths to table names based on glob patterns.
|
|
5
|
-
/// First matching pattern wins. An ignore list filters paths entirely.
|
|
6
|
-
pub struct TableMatcher {
|
|
7
|
-
table_globs: Vec<(GlobSet, String)>,
|
|
8
|
-
ignore_set: GlobSet,
|
|
9
|
-
}
|
|
10
|
-
|
|
11
|
-
impl TableMatcher {
|
|
12
|
-
/// Build a new matcher from (glob_pattern, table_name) pairs and ignore patterns.
|
|
13
|
-
pub fn new(
|
|
14
|
-
mappings: &[(&str, &str)],
|
|
15
|
-
ignore_patterns: &[&str],
|
|
16
|
-
) -> Result<Self, globset::Error> {
|
|
17
|
-
let mut table_globs = Vec::new();
|
|
18
|
-
for (pattern, table_name) in mappings {
|
|
19
|
-
let mut builder = GlobSetBuilder::new();
|
|
20
|
-
builder.add(Glob::new(pattern)?);
|
|
21
|
-
table_globs.push((builder.build()?, table_name.to_string()));
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
let mut ignore_builder = GlobSetBuilder::new();
|
|
25
|
-
for pattern in ignore_patterns {
|
|
26
|
-
ignore_builder.add(Glob::new(pattern)?);
|
|
27
|
-
}
|
|
28
|
-
let ignore_set = ignore_builder.build()?;
|
|
29
|
-
|
|
30
|
-
Ok(Self {
|
|
31
|
-
table_globs,
|
|
32
|
-
ignore_set,
|
|
33
|
-
})
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
/// Returns the table name for a file path, or None if no pattern matches.
|
|
37
|
-
pub fn match_file(&self, path: &Path) -> Option<&str> {
|
|
38
|
-
for (glob_set, table_name) in &self.table_globs {
|
|
39
|
-
if glob_set.is_match(path) {
|
|
40
|
-
return Some(table_name.as_str());
|
|
41
|
-
}
|
|
42
|
-
}
|
|
43
|
-
None
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
/// Returns true if the path matches any ignore pattern.
|
|
47
|
-
pub fn is_ignored(&self, path: &Path) -> bool {
|
|
48
|
-
self.ignore_set.is_match(path)
|
|
49
|
-
}
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
#[cfg(test)]
|
|
53
|
-
mod tests {
|
|
54
|
-
use super::*;
|
|
55
|
-
|
|
56
|
-
#[test]
|
|
57
|
-
fn match_file_returns_table_for_matching_glob() {
|
|
58
|
-
let matcher = TableMatcher::new(&[("*.csv", "data")], &[]).unwrap();
|
|
59
|
-
assert_eq!(matcher.match_file(Path::new("report.csv")), Some("data"));
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
#[test]
|
|
63
|
-
fn match_file_returns_none_for_no_match() {
|
|
64
|
-
let matcher = TableMatcher::new(&[("*.csv", "data")], &[]).unwrap();
|
|
65
|
-
assert_eq!(matcher.match_file(Path::new("readme.md")), None);
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
#[test]
|
|
69
|
-
fn first_matching_pattern_wins() {
|
|
70
|
-
let matcher = TableMatcher::new(
|
|
71
|
-
&[("*.json", "json_table"), ("data/*.json", "data_table")],
|
|
72
|
-
&[],
|
|
73
|
-
)
|
|
74
|
-
.unwrap();
|
|
75
|
-
// "data/foo.json" matches *.json first
|
|
76
|
-
assert_eq!(
|
|
77
|
-
matcher.match_file(Path::new("data/foo.json")),
|
|
78
|
-
Some("json_table")
|
|
79
|
-
);
|
|
80
|
-
}
|
|
81
|
-
|
|
82
|
-
#[test]
|
|
83
|
-
fn match_file_with_nested_path() {
|
|
84
|
-
let matcher = TableMatcher::new(&[("**/*.jsonl", "events")], &[]).unwrap();
|
|
85
|
-
assert_eq!(
|
|
86
|
-
matcher.match_file(Path::new("logs/2024/events.jsonl")),
|
|
87
|
-
Some("events")
|
|
88
|
-
);
|
|
89
|
-
}
|
|
90
|
-
|
|
91
|
-
#[test]
|
|
92
|
-
fn is_ignored_returns_true_for_matching_pattern() {
|
|
93
|
-
let matcher = TableMatcher::new(&[], &["*.tmp", ".git/**"]).unwrap();
|
|
94
|
-
assert!(matcher.is_ignored(Path::new("scratch.tmp")));
|
|
95
|
-
assert!(matcher.is_ignored(Path::new(".git/config")));
|
|
96
|
-
}
|
|
97
|
-
|
|
98
|
-
#[test]
|
|
99
|
-
fn is_ignored_returns_false_for_non_matching_path() {
|
|
100
|
-
let matcher = TableMatcher::new(&[], &["*.tmp"]).unwrap();
|
|
101
|
-
assert!(!matcher.is_ignored(Path::new("data.csv")));
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
#[test]
|
|
105
|
-
fn empty_matcher_matches_nothing() {
|
|
106
|
-
let matcher = TableMatcher::new(&[], &[]).unwrap();
|
|
107
|
-
assert_eq!(matcher.match_file(Path::new("anything.txt")), None);
|
|
108
|
-
assert!(!matcher.is_ignored(Path::new("anything.txt")));
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
#[test]
|
|
112
|
-
fn invalid_glob_returns_error() {
|
|
113
|
-
let result = TableMatcher::new(&[("[invalid", "t")], &[]);
|
|
114
|
-
assert!(result.is_err());
|
|
115
|
-
}
|
|
116
|
-
}
|
package/src/scanner.rs
DELETED
|
@@ -1,100 +0,0 @@
|
|
|
1
|
-
use crate::matcher::TableMatcher;
|
|
2
|
-
use std::path::{Path, PathBuf};
|
|
3
|
-
use walkdir::WalkDir;
|
|
4
|
-
|
|
5
|
-
/// Walk a directory tree and return all file paths paired with their matching table name.
|
|
6
|
-
/// Ignored paths and directories are skipped. Only files (not directories) are returned.
|
|
7
|
-
pub fn scan_directory(root: &Path, matcher: &TableMatcher) -> Vec<(PathBuf, String)> {
|
|
8
|
-
let mut results = Vec::new();
|
|
9
|
-
|
|
10
|
-
for entry in WalkDir::new(root).into_iter().filter_map(|e| e.ok()) {
|
|
11
|
-
let path = entry.path();
|
|
12
|
-
|
|
13
|
-
if matcher.is_ignored(path) {
|
|
14
|
-
continue;
|
|
15
|
-
}
|
|
16
|
-
|
|
17
|
-
if !entry.file_type().is_file() {
|
|
18
|
-
continue;
|
|
19
|
-
}
|
|
20
|
-
|
|
21
|
-
if let Some(table_name) = matcher.match_file(path) {
|
|
22
|
-
results.push((path.to_path_buf(), table_name.to_string()));
|
|
23
|
-
}
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
results
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
#[cfg(test)]
|
|
30
|
-
mod tests {
|
|
31
|
-
use super::*;
|
|
32
|
-
use std::fs;
|
|
33
|
-
use tempfile::TempDir;
|
|
34
|
-
|
|
35
|
-
#[test]
|
|
36
|
-
fn scan_finds_matching_files() {
|
|
37
|
-
let dir = TempDir::new().unwrap();
|
|
38
|
-
fs::write(dir.path().join("data.csv"), "a,b\n1,2").unwrap();
|
|
39
|
-
fs::write(dir.path().join("readme.md"), "# hi").unwrap();
|
|
40
|
-
|
|
41
|
-
let matcher = TableMatcher::new(&[("**/*.csv", "csv_table")], &[]).unwrap();
|
|
42
|
-
let results = scan_directory(dir.path(), &matcher);
|
|
43
|
-
|
|
44
|
-
assert_eq!(results.len(), 1);
|
|
45
|
-
assert!(results[0].0.ends_with("data.csv"));
|
|
46
|
-
assert_eq!(results[0].1, "csv_table");
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
#[test]
|
|
50
|
-
fn scan_skips_ignored_files() {
|
|
51
|
-
let dir = TempDir::new().unwrap();
|
|
52
|
-
fs::write(dir.path().join("data.csv"), "a,b").unwrap();
|
|
53
|
-
fs::write(dir.path().join("data.tmp"), "junk").unwrap();
|
|
54
|
-
|
|
55
|
-
let matcher =
|
|
56
|
-
TableMatcher::new(&[("**/*.csv", "t"), ("**/*.tmp", "t2")], &["**/*.tmp"]).unwrap();
|
|
57
|
-
let results = scan_directory(dir.path(), &matcher);
|
|
58
|
-
|
|
59
|
-
assert_eq!(results.len(), 1);
|
|
60
|
-
assert!(results[0].0.ends_with("data.csv"));
|
|
61
|
-
}
|
|
62
|
-
|
|
63
|
-
#[test]
|
|
64
|
-
fn scan_recurses_into_subdirectories() {
|
|
65
|
-
let dir = TempDir::new().unwrap();
|
|
66
|
-
let sub = dir.path().join("nested").join("deep");
|
|
67
|
-
fs::create_dir_all(&sub).unwrap();
|
|
68
|
-
fs::write(sub.join("events.jsonl"), "{}").unwrap();
|
|
69
|
-
|
|
70
|
-
let matcher = TableMatcher::new(&[("**/*.jsonl", "events")], &[]).unwrap();
|
|
71
|
-
let results = scan_directory(dir.path(), &matcher);
|
|
72
|
-
|
|
73
|
-
assert_eq!(results.len(), 1);
|
|
74
|
-
assert!(results[0].0.ends_with("events.jsonl"));
|
|
75
|
-
assert_eq!(results[0].1, "events");
|
|
76
|
-
}
|
|
77
|
-
|
|
78
|
-
#[test]
|
|
79
|
-
fn scan_returns_empty_for_no_matches() {
|
|
80
|
-
let dir = TempDir::new().unwrap();
|
|
81
|
-
fs::write(dir.path().join("readme.md"), "# hi").unwrap();
|
|
82
|
-
|
|
83
|
-
let matcher = TableMatcher::new(&[("**/*.csv", "t")], &[]).unwrap();
|
|
84
|
-
let results = scan_directory(dir.path(), &matcher);
|
|
85
|
-
|
|
86
|
-
assert!(results.is_empty());
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
#[test]
|
|
90
|
-
fn scan_skips_directories() {
|
|
91
|
-
let dir = TempDir::new().unwrap();
|
|
92
|
-
// Create a directory that matches the glob -- it should not appear in results
|
|
93
|
-
fs::create_dir(dir.path().join("data.csv")).unwrap();
|
|
94
|
-
|
|
95
|
-
let matcher = TableMatcher::new(&[("**/*.csv", "t")], &[]).unwrap();
|
|
96
|
-
let results = scan_directory(dir.path(), &matcher);
|
|
97
|
-
|
|
98
|
-
assert!(results.is_empty());
|
|
99
|
-
}
|
|
100
|
-
}
|