@anysphere/file-service 0.0.0-a4fe55c1 → 0.0.0-afb75aa9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Cargo.toml +5 -0
- package/index.d.ts +1 -1
- package/package.json +10 -8
- package/src/file_utils.rs +12 -14
- package/src/git_utils.rs +158 -19
- package/src/lib.rs +85 -24
- package/src/merkle_tree/local_construction.rs +34 -7
- package/src/merkle_tree/mod.rs +230 -112
- package/src/test.rs +5 -0
package/Cargo.toml
CHANGED
|
@@ -17,6 +17,11 @@ tempfile = "3.8.0"
|
|
|
17
17
|
anyhow = "1.0.75"
|
|
18
18
|
tonic = "0.9.2"
|
|
19
19
|
prost = "0.11.9"
|
|
20
|
+
tracing = "0.1.37"
|
|
21
|
+
tracing-subscriber = "0.3.17"
|
|
22
|
+
tracing-appender = "0.2.2"
|
|
23
|
+
binaryornot = "1.0.0"
|
|
24
|
+
dunce = "1.0.1"
|
|
20
25
|
|
|
21
26
|
[build-dependencies]
|
|
22
27
|
napi-build = "2.0.1"
|
package/index.d.ts
CHANGED
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@anysphere/file-service",
|
|
3
|
-
"version": "0.0.0-
|
|
3
|
+
"version": "0.0.0-afb75aa9",
|
|
4
4
|
"main": "index.js",
|
|
5
5
|
"types": "index.d.ts",
|
|
6
6
|
"napi": {
|
|
@@ -9,7 +9,8 @@
|
|
|
9
9
|
"additional": [
|
|
10
10
|
"aarch64-apple-darwin",
|
|
11
11
|
"aarch64-pc-windows-msvc",
|
|
12
|
-
"universal-apple-darwin"
|
|
12
|
+
"universal-apple-darwin",
|
|
13
|
+
"aarch64-unknown-linux-gnu"
|
|
13
14
|
]
|
|
14
15
|
}
|
|
15
16
|
},
|
|
@@ -35,11 +36,12 @@
|
|
|
35
36
|
"version": "napi version"
|
|
36
37
|
},
|
|
37
38
|
"optionalDependencies": {
|
|
38
|
-
"@anysphere/file-service-win32-x64-msvc": "0.0.0-
|
|
39
|
-
"@anysphere/file-service-darwin-x64": "0.0.0-
|
|
40
|
-
"@anysphere/file-service-linux-x64-gnu": "0.0.0-
|
|
41
|
-
"@anysphere/file-service-darwin-arm64": "0.0.0-
|
|
42
|
-
"@anysphere/file-service-win32-arm64-msvc": "0.0.0-
|
|
43
|
-
"@anysphere/file-service-darwin-universal": "0.0.0-
|
|
39
|
+
"@anysphere/file-service-win32-x64-msvc": "0.0.0-afb75aa9",
|
|
40
|
+
"@anysphere/file-service-darwin-x64": "0.0.0-afb75aa9",
|
|
41
|
+
"@anysphere/file-service-linux-x64-gnu": "0.0.0-afb75aa9",
|
|
42
|
+
"@anysphere/file-service-darwin-arm64": "0.0.0-afb75aa9",
|
|
43
|
+
"@anysphere/file-service-win32-arm64-msvc": "0.0.0-afb75aa9",
|
|
44
|
+
"@anysphere/file-service-darwin-universal": "0.0.0-afb75aa9",
|
|
45
|
+
"@anysphere/file-service-linux-arm64-gnu": "0.0.0-afb75aa9"
|
|
44
46
|
}
|
|
45
47
|
}
|
package/src/file_utils.rs
CHANGED
|
@@ -12,9 +12,8 @@ pub fn is_in_bad_dir(file_path: &Path) -> Result<bool, Error> {
|
|
|
12
12
|
let item_path = file_path
|
|
13
13
|
.to_str()
|
|
14
14
|
.ok_or(anyhow::anyhow!("Failed to convert path to string"))?;
|
|
15
|
-
let is_bad_dir =
|
|
16
|
-
|| item_path.contains(".git")
|
|
17
|
-
&& !(item_path.ends_with(".git") || item_path.ends_with("node_modules"));
|
|
15
|
+
let is_bad_dir =
|
|
16
|
+
item_path.contains("node_modules") || item_path.contains(".git");
|
|
18
17
|
Ok(is_bad_dir)
|
|
19
18
|
}
|
|
20
19
|
|
|
@@ -38,14 +37,14 @@ pub fn is_good_file(file_path: &Path) -> Result<(), Error> {
|
|
|
38
37
|
|
|
39
38
|
match file_name {
|
|
40
39
|
"package-lock.json" | "pnpm-lock.yaml" | "yarn.lock" | "composer.lock"
|
|
41
|
-
| "Gemfile.lock" => {
|
|
40
|
+
| "Gemfile.lock" | "bun.lockb" => {
|
|
42
41
|
return Err(anyhow::anyhow!("File is just a lock file"));
|
|
43
42
|
}
|
|
44
43
|
_ => {}
|
|
45
44
|
}
|
|
46
45
|
|
|
47
46
|
match extension {
|
|
48
|
-
"lock" | "bak" | "tmp" | "bin" | "exe" | "dll" | "so" => {
|
|
47
|
+
"lock" | "bak" | "tmp" | "bin" | "exe" | "dll" | "so" | "lockb" => {
|
|
49
48
|
return Err(anyhow::anyhow!("File is just a lock file"));
|
|
50
49
|
}
|
|
51
50
|
_ => {}
|
|
@@ -63,7 +62,7 @@ pub fn is_good_file(file_path: &Path) -> Result<(), Error> {
|
|
|
63
62
|
Some(extension) => match extension.to_str() {
|
|
64
63
|
Some(ext_str) => {
|
|
65
64
|
if bad_extensions.contains(&ext_str) {
|
|
66
|
-
return Err(anyhow::anyhow!("
|
|
65
|
+
return Err(anyhow::anyhow!("Binary file excluded from indexing."));
|
|
67
66
|
}
|
|
68
67
|
}
|
|
69
68
|
None => {
|
|
@@ -89,10 +88,12 @@ pub fn is_good_file(file_path: &Path) -> Result<(), Error> {
|
|
|
89
88
|
Ok(())
|
|
90
89
|
}
|
|
91
90
|
|
|
91
|
+
// use binaryornot::is_binary;
|
|
92
|
+
// use anyhow::Context;
|
|
92
93
|
// implement the buffer above:
|
|
93
94
|
pub async fn is_good_file_runtime_check(
|
|
94
95
|
file_path: &Path,
|
|
95
|
-
|
|
96
|
+
_buffer: &[u8],
|
|
96
97
|
) -> Result<(), Error> {
|
|
97
98
|
match get_file_size(file_path).await {
|
|
98
99
|
Ok(size) if size > 2 * 1024 * 1024 => {
|
|
@@ -102,13 +103,10 @@ pub async fn is_good_file_runtime_check(
|
|
|
102
103
|
_ => {}
|
|
103
104
|
}
|
|
104
105
|
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
return Err(anyhow::anyhow!("File is not a valid UTF-8 string"));
|
|
110
|
-
}
|
|
111
|
-
}
|
|
106
|
+
// if is_binary(file_path).context("Failed to check if file is binary")? {
|
|
107
|
+
// return Err(anyhow::anyhow!("File is binary"));
|
|
108
|
+
// }
|
|
109
|
+
|
|
112
110
|
Ok(())
|
|
113
111
|
}
|
|
114
112
|
|
package/src/git_utils.rs
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
use std::collections::HashSet;
|
|
2
|
-
use std::path::PathBuf;
|
|
3
2
|
use std::process::Command;
|
|
4
3
|
|
|
5
4
|
pub fn list_ignored_files(
|
|
6
5
|
workspace_root_path: &str,
|
|
7
|
-
|
|
6
|
+
should_return_absolute_paths: bool,
|
|
7
|
+
) -> Result<HashSet<String>, Box<dyn std::error::Error>> {
|
|
8
8
|
let mut gitignored_files = HashSet::new();
|
|
9
9
|
|
|
10
10
|
let commands = vec![
|
|
@@ -15,12 +15,66 @@ pub fn list_ignored_files(
|
|
|
15
15
|
"--ignored",
|
|
16
16
|
"--exclude-standard",
|
|
17
17
|
],
|
|
18
|
+
// FIXME(sualeh): this is super sketchy and might totally break in like a bazillion ways. i dont like it.
|
|
19
|
+
vec![
|
|
20
|
+
"sh",
|
|
21
|
+
"-c",
|
|
22
|
+
"git submodule foreach --quiet 'git ls-files --others --ignored --exclude-standard | sed \"s|^|$path/|\"'",
|
|
23
|
+
],
|
|
24
|
+
];
|
|
25
|
+
|
|
26
|
+
for command in commands {
|
|
27
|
+
let output = Command::new(&command[0])
|
|
28
|
+
.args(&command[1..])
|
|
29
|
+
.current_dir(workspace_root_path)
|
|
30
|
+
.output()?;
|
|
31
|
+
|
|
32
|
+
if output.status.success() {
|
|
33
|
+
let files = String::from_utf8(output.stdout)?
|
|
34
|
+
.lines()
|
|
35
|
+
.filter(|line| !line.is_empty())
|
|
36
|
+
.map(|line| {
|
|
37
|
+
if should_return_absolute_paths {
|
|
38
|
+
let mut path = std::path::PathBuf::from(workspace_root_path);
|
|
39
|
+
path.push(line);
|
|
40
|
+
|
|
41
|
+
match path.canonicalize() {
|
|
42
|
+
Ok(canonical_path) => {
|
|
43
|
+
canonical_path.to_string_lossy().into_owned()
|
|
44
|
+
}
|
|
45
|
+
Err(_) => String::from(line),
|
|
46
|
+
}
|
|
47
|
+
} else {
|
|
48
|
+
String::from(line)
|
|
49
|
+
}
|
|
50
|
+
})
|
|
51
|
+
.collect::<HashSet<_>>();
|
|
52
|
+
|
|
53
|
+
gitignored_files.extend(files);
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
Ok(gitignored_files)
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
pub fn list_ignored_files_with_absolute_paths(
|
|
61
|
+
workspace_root_path: &str,
|
|
62
|
+
) -> Result<HashSet<String>, Box<dyn std::error::Error>> {
|
|
63
|
+
let mut gitignored_files = HashSet::new();
|
|
64
|
+
|
|
65
|
+
let commands = vec![
|
|
18
66
|
vec![
|
|
19
67
|
"git",
|
|
20
|
-
"
|
|
21
|
-
"
|
|
22
|
-
"--
|
|
23
|
-
"
|
|
68
|
+
"ls-files",
|
|
69
|
+
"--others",
|
|
70
|
+
"--ignored",
|
|
71
|
+
"--exclude-standard",
|
|
72
|
+
],
|
|
73
|
+
// FIXME(sualeh): this is super sketchy and might totally break in like a bazillion ways. i dont like it.
|
|
74
|
+
vec![
|
|
75
|
+
"sh",
|
|
76
|
+
"-c",
|
|
77
|
+
"git submodule foreach --quiet 'git ls-files --others --ignored --exclude-standard | sed \"s|^|$path/|\"'",
|
|
24
78
|
],
|
|
25
79
|
];
|
|
26
80
|
|
|
@@ -34,7 +88,7 @@ pub fn list_ignored_files(
|
|
|
34
88
|
let files = String::from_utf8(output.stdout)?
|
|
35
89
|
.lines()
|
|
36
90
|
.filter(|line| !line.is_empty())
|
|
37
|
-
.map(|line|
|
|
91
|
+
.map(|line| String::from(line))
|
|
38
92
|
.collect::<HashSet<_>>();
|
|
39
93
|
|
|
40
94
|
gitignored_files.extend(files);
|
|
@@ -57,6 +111,18 @@ pub async fn is_git_ignored(
|
|
|
57
111
|
Ok(output.status.success())
|
|
58
112
|
}
|
|
59
113
|
|
|
114
|
+
pub async fn is_git_directory(
|
|
115
|
+
workspace_root_path: &str,
|
|
116
|
+
) -> Result<bool, anyhow::Error> {
|
|
117
|
+
let output = tokio::process::Command::new("git")
|
|
118
|
+
.args(&["rev-parse", "--is-inside-work-tree"])
|
|
119
|
+
.current_dir(workspace_root_path)
|
|
120
|
+
.output()
|
|
121
|
+
.await?;
|
|
122
|
+
|
|
123
|
+
Ok(output.status.success())
|
|
124
|
+
}
|
|
125
|
+
|
|
60
126
|
#[cfg(test)]
|
|
61
127
|
mod tests {
|
|
62
128
|
use super::*;
|
|
@@ -66,7 +132,8 @@ mod tests {
|
|
|
66
132
|
#[test]
|
|
67
133
|
fn test_no_ignored_files() {
|
|
68
134
|
let dir = tempfile::tempdir().unwrap();
|
|
69
|
-
let gitignored_files =
|
|
135
|
+
let gitignored_files =
|
|
136
|
+
list_ignored_files(dir.path().to_str().unwrap(), false).unwrap();
|
|
70
137
|
Command::new("git")
|
|
71
138
|
.args(&["init"])
|
|
72
139
|
.current_dir(dir.path())
|
|
@@ -92,13 +159,14 @@ mod tests {
|
|
|
92
159
|
.current_dir(dir.path())
|
|
93
160
|
.output()
|
|
94
161
|
.unwrap();
|
|
95
|
-
let gitignored_files =
|
|
162
|
+
let gitignored_files =
|
|
163
|
+
list_ignored_files(dir.path().to_str().unwrap(), false).unwrap();
|
|
96
164
|
println!(
|
|
97
165
|
"ignored files for test_one_ignored_file: {:?}",
|
|
98
166
|
gitignored_files
|
|
99
167
|
);
|
|
100
168
|
// assert_eq!(gitignored_files.len(), 1);
|
|
101
|
-
assert!(gitignored_files.contains(&
|
|
169
|
+
assert!(gitignored_files.contains(&String::from("ignored.txt")));
|
|
102
170
|
}
|
|
103
171
|
|
|
104
172
|
#[test]
|
|
@@ -121,19 +189,83 @@ mod tests {
|
|
|
121
189
|
.current_dir(dir.path())
|
|
122
190
|
.output()
|
|
123
191
|
.unwrap();
|
|
124
|
-
let gitignored_files =
|
|
192
|
+
let gitignored_files =
|
|
193
|
+
list_ignored_files(dir.path().to_str().unwrap(), false).unwrap();
|
|
125
194
|
println!(
|
|
126
195
|
"ignored files for test_multiple_ignored_files: {:?}",
|
|
127
196
|
gitignored_files
|
|
128
197
|
);
|
|
129
198
|
// assert_eq!(gitignored_files.len(), 2);
|
|
130
|
-
assert!(gitignored_files.contains(&
|
|
131
|
-
assert!(gitignored_files.contains(&
|
|
199
|
+
assert!(gitignored_files.contains(&String::from("ignored1.txt")));
|
|
200
|
+
assert!(gitignored_files.contains(&String::from("ignored2.txt")));
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
#[test]
|
|
204
|
+
fn test_git_submodule_ignored_files() {
|
|
205
|
+
let dir = tempfile::tempdir().unwrap();
|
|
206
|
+
let submodule_path = dir.path().join("submodule");
|
|
207
|
+
std::fs::create_dir(&submodule_path).unwrap();
|
|
208
|
+
|
|
209
|
+
let o = Command::new("git")
|
|
210
|
+
.args(&["init"])
|
|
211
|
+
.current_dir(&submodule_path)
|
|
212
|
+
.output()
|
|
213
|
+
.unwrap();
|
|
214
|
+
println!("git init output: {:?}", o);
|
|
215
|
+
|
|
216
|
+
let file_path = submodule_path.join("ignored.txt");
|
|
217
|
+
let mut file = File::create(&file_path).unwrap();
|
|
218
|
+
writeln!(file, "This is an ignored file.").unwrap();
|
|
219
|
+
|
|
220
|
+
let file2 = submodule_path.join("ignored2.txt");
|
|
221
|
+
let mut file = File::create(&file2).unwrap();
|
|
222
|
+
writeln!(file, "This is another ignored file.").unwrap();
|
|
223
|
+
|
|
224
|
+
let gitignore_path = submodule_path.join(".gitignore");
|
|
225
|
+
let mut gitignore = File::create(&gitignore_path).unwrap();
|
|
226
|
+
writeln!(gitignore, "*.txt").unwrap();
|
|
227
|
+
|
|
228
|
+
let o = Command::new("git")
|
|
229
|
+
.args(&["init"])
|
|
230
|
+
.current_dir(dir.path())
|
|
231
|
+
.output()
|
|
232
|
+
.unwrap();
|
|
233
|
+
println!("git init output: {:?}", o);
|
|
234
|
+
|
|
235
|
+
// make a commit in the submodule
|
|
236
|
+
let o = Command::new("git")
|
|
237
|
+
.args(&["add", "."])
|
|
238
|
+
.current_dir(&submodule_path)
|
|
239
|
+
.output()
|
|
240
|
+
.unwrap();
|
|
241
|
+
println!("git add output: {:?}", o);
|
|
242
|
+
let o = Command::new("git")
|
|
243
|
+
.args(&["commit", "-m", "initial commit"])
|
|
244
|
+
.current_dir(&submodule_path)
|
|
245
|
+
.output()
|
|
246
|
+
.unwrap();
|
|
247
|
+
println!("git commit output: {:?}", o);
|
|
248
|
+
|
|
249
|
+
let o = Command::new("git")
|
|
250
|
+
.args(&["submodule", "add", "./submodule"])
|
|
251
|
+
.current_dir(dir.path())
|
|
252
|
+
.output()
|
|
253
|
+
.unwrap();
|
|
254
|
+
println!("git submodule add output: {:?}", o);
|
|
255
|
+
|
|
256
|
+
let gitignored_files =
|
|
257
|
+
list_ignored_files(dir.path().to_str().unwrap(), false).unwrap();
|
|
258
|
+
println!(
|
|
259
|
+
"ignored files for test_git_submodule_ignored_files: {:?}",
|
|
260
|
+
gitignored_files
|
|
261
|
+
);
|
|
262
|
+
assert!(gitignored_files.contains(&String::from("submodule/ignored.txt")));
|
|
263
|
+
assert!(gitignored_files.contains(&String::from("submodule/ignored2.txt")));
|
|
132
264
|
}
|
|
133
265
|
|
|
134
266
|
#[test]
|
|
135
267
|
fn test_multiple_ignored_files_in_current_dir() {
|
|
136
|
-
let gitignored_files = list_ignored_files(".").unwrap();
|
|
268
|
+
let gitignored_files = list_ignored_files(".", false).unwrap();
|
|
137
269
|
assert!(gitignored_files.len() > 1);
|
|
138
270
|
|
|
139
271
|
// print a sample of the ignored files
|
|
@@ -147,7 +279,6 @@ mod tests {
|
|
|
147
279
|
}
|
|
148
280
|
}
|
|
149
281
|
|
|
150
|
-
|
|
151
282
|
#[tokio::test]
|
|
152
283
|
async fn test_file_not_ignored() {
|
|
153
284
|
let dir = tempfile::tempdir().unwrap();
|
|
@@ -160,7 +291,10 @@ mod tests {
|
|
|
160
291
|
.current_dir(dir.path())
|
|
161
292
|
.output()
|
|
162
293
|
.unwrap();
|
|
163
|
-
let is_ignored =
|
|
294
|
+
let is_ignored =
|
|
295
|
+
is_git_ignored(dir.path().to_str().unwrap(), "not_ignored.txt")
|
|
296
|
+
.await
|
|
297
|
+
.unwrap();
|
|
164
298
|
assert_eq!(is_ignored, false);
|
|
165
299
|
}
|
|
166
300
|
|
|
@@ -180,7 +314,10 @@ mod tests {
|
|
|
180
314
|
.current_dir(dir.path())
|
|
181
315
|
.output()
|
|
182
316
|
.unwrap();
|
|
183
|
-
let is_ignored =
|
|
317
|
+
let is_ignored =
|
|
318
|
+
is_git_ignored(dir.path().to_str().unwrap(), "ignored.txt")
|
|
319
|
+
.await
|
|
320
|
+
.unwrap();
|
|
184
321
|
assert_eq!(is_ignored, true);
|
|
185
322
|
}
|
|
186
323
|
|
|
@@ -200,8 +337,10 @@ mod tests {
|
|
|
200
337
|
.current_dir(dir.path())
|
|
201
338
|
.output()
|
|
202
339
|
.unwrap();
|
|
203
|
-
let is_ignored =
|
|
340
|
+
let is_ignored =
|
|
341
|
+
is_git_ignored(dir.path().to_str().unwrap(), "ignored.txt")
|
|
342
|
+
.await
|
|
343
|
+
.unwrap();
|
|
204
344
|
assert_eq!(is_ignored, true);
|
|
205
345
|
}
|
|
206
|
-
|
|
207
346
|
}
|
package/src/lib.rs
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
#![deny(clippy::all)]
|
|
2
|
+
#![deny(unsafe_op_in_unsafe_fn)]
|
|
2
3
|
pub mod file_utils;
|
|
3
4
|
pub mod git_utils;
|
|
4
5
|
pub mod merkle_tree;
|
|
@@ -6,6 +7,9 @@ pub mod merkle_tree;
|
|
|
6
7
|
use std::vec;
|
|
7
8
|
|
|
8
9
|
use merkle_tree::{LocalConstruction, MerkleTree};
|
|
10
|
+
use tracing::{info, Level};
|
|
11
|
+
use tracing_appender::rolling::{RollingFileAppender, Rotation};
|
|
12
|
+
use tracing_subscriber::fmt;
|
|
9
13
|
|
|
10
14
|
#[macro_use]
|
|
11
15
|
extern crate napi_derive;
|
|
@@ -13,16 +17,46 @@ extern crate napi_derive;
|
|
|
13
17
|
#[napi]
|
|
14
18
|
pub struct MerkleClient {
|
|
15
19
|
tree: MerkleTree,
|
|
16
|
-
|
|
20
|
+
absolute_root_directory: String,
|
|
21
|
+
_guard: tracing_appender::non_blocking::WorkerGuard,
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
pub fn init_logger() -> tracing_appender::non_blocking::WorkerGuard {
|
|
25
|
+
let file_appender =
|
|
26
|
+
RollingFileAppender::new(Rotation::NEVER, "./", "rust_log.txt");
|
|
27
|
+
let (non_blocking, _guard) = tracing_appender::non_blocking(file_appender);
|
|
28
|
+
let subscriber = fmt::Subscriber::builder()
|
|
29
|
+
.with_max_level(Level::TRACE)
|
|
30
|
+
.with_writer(non_blocking)
|
|
31
|
+
.with_ansi(false)
|
|
32
|
+
.with_line_number(true)
|
|
33
|
+
.finish();
|
|
34
|
+
|
|
35
|
+
let _ = tracing::subscriber::set_global_default(subscriber);
|
|
36
|
+
|
|
37
|
+
_guard
|
|
17
38
|
}
|
|
18
39
|
|
|
19
40
|
#[napi]
|
|
20
41
|
impl MerkleClient {
|
|
21
42
|
#[napi(constructor)]
|
|
22
|
-
pub fn new(
|
|
43
|
+
pub fn new(absolute_root_directory: String) -> MerkleClient {
|
|
44
|
+
let _guard = init_logger();
|
|
45
|
+
|
|
46
|
+
let canonical_root_directory = std::path::Path::new(&absolute_root_directory);
|
|
47
|
+
// use dunce::canonicalize;
|
|
48
|
+
let canonical_root_directory = match dunce::canonicalize(&canonical_root_directory) {
|
|
49
|
+
Ok(path) => path.to_str().unwrap_or(&absolute_root_directory).to_string().to_lowercase(),
|
|
50
|
+
Err(e) => {
|
|
51
|
+
info!("Error in canonicalizing path: path: {:?}, error {:?}", canonical_root_directory, e);
|
|
52
|
+
absolute_root_directory
|
|
53
|
+
}
|
|
54
|
+
};
|
|
55
|
+
|
|
23
56
|
MerkleClient {
|
|
24
57
|
tree: MerkleTree::empty_tree(),
|
|
25
|
-
|
|
58
|
+
absolute_root_directory: canonical_root_directory,
|
|
59
|
+
_guard,
|
|
26
60
|
}
|
|
27
61
|
}
|
|
28
62
|
|
|
@@ -31,7 +65,10 @@ impl MerkleClient {
|
|
|
31
65
|
// 1. compute the merkle tree
|
|
32
66
|
// 2. update the backend
|
|
33
67
|
// 3. sync with the remote
|
|
34
|
-
|
|
68
|
+
info!("Merkle tree compute started!");
|
|
69
|
+
unsafe {
|
|
70
|
+
self.compute_merkle_tree().await?;
|
|
71
|
+
}
|
|
35
72
|
|
|
36
73
|
Ok(())
|
|
37
74
|
}
|
|
@@ -45,7 +82,7 @@ impl MerkleClient {
|
|
|
45
82
|
&mut self,
|
|
46
83
|
) -> Result<(), napi::Error> {
|
|
47
84
|
let t =
|
|
48
|
-
MerkleTree::construct_merkle_tree(self.
|
|
85
|
+
MerkleTree::construct_merkle_tree(self.absolute_root_directory.clone()).await;
|
|
49
86
|
|
|
50
87
|
match t {
|
|
51
88
|
Ok(tree) => {
|
|
@@ -75,14 +112,35 @@ impl MerkleClient {
|
|
|
75
112
|
relative_path: String,
|
|
76
113
|
) -> Result<String, napi::Error> {
|
|
77
114
|
let absolute_path =
|
|
78
|
-
std::path::Path::new(&self.
|
|
79
|
-
|
|
115
|
+
std::path::Path::new(&self.absolute_root_directory).join(&relative_path);
|
|
116
|
+
|
|
117
|
+
let canonical_path = match dunce::canonicalize(&absolute_path) {
|
|
118
|
+
Ok(path) => path,
|
|
119
|
+
Err(e) => {
|
|
120
|
+
return Err(napi::Error::new(
|
|
121
|
+
napi::Status::Unknown,
|
|
122
|
+
format!("Error in canonicalizing path: {:?}", e),
|
|
123
|
+
))
|
|
124
|
+
}
|
|
125
|
+
};
|
|
126
|
+
|
|
127
|
+
let cononical_string = match canonical_path.to_str() {
|
|
128
|
+
Some(s) => s.to_lowercase(),
|
|
129
|
+
None => {
|
|
130
|
+
return Err(napi::Error::new(
|
|
131
|
+
napi::Status::Unknown,
|
|
132
|
+
format!("Error in converting canonical path to string"),
|
|
133
|
+
))
|
|
134
|
+
}
|
|
135
|
+
};
|
|
136
|
+
|
|
137
|
+
let hash = self.tree.get_subtree_hash(cononical_string.as_str()).await;
|
|
80
138
|
|
|
81
139
|
match hash {
|
|
82
140
|
Ok(hash) => Ok(hash),
|
|
83
141
|
Err(e) => Err(napi::Error::new(
|
|
84
142
|
napi::Status::Unknown,
|
|
85
|
-
format!("Error in get_subtree_hash: {:?}", e),
|
|
143
|
+
format!("Error in get_subtree_hash. \nRelative path: {:?}, \nAbsolute path: {:?}, \nCanonical path: {:?}, \nRoot directory: {:?}\nError: {:?}", &relative_path, absolute_path, canonical_path, self.absolute_root_directory, e),
|
|
86
144
|
)),
|
|
87
145
|
}
|
|
88
146
|
}
|
|
@@ -104,8 +162,10 @@ impl MerkleClient {
|
|
|
104
162
|
&self,
|
|
105
163
|
relative_path: String,
|
|
106
164
|
) -> Result<i32, napi::Error> {
|
|
107
|
-
let absolute_path =
|
|
108
|
-
|
|
165
|
+
let absolute_path = std::path::Path::new(&self.absolute_root_directory)
|
|
166
|
+
.join(relative_path)
|
|
167
|
+
.canonicalize()?;
|
|
168
|
+
|
|
109
169
|
let num = self
|
|
110
170
|
.tree
|
|
111
171
|
.get_num_embeddable_files_in_subtree(absolute_path)
|
|
@@ -138,10 +198,12 @@ impl MerkleClient {
|
|
|
138
198
|
&self,
|
|
139
199
|
absolute_file_path: String,
|
|
140
200
|
) -> Result<Vec<String>, napi::Error> {
|
|
141
|
-
let
|
|
201
|
+
// let absolute_path = absolute_file_path.to_lowercase();
|
|
202
|
+
// let absolute_path_str = absolute_path.as_str();
|
|
203
|
+
|
|
142
204
|
let files = self
|
|
143
205
|
.tree
|
|
144
|
-
.get_all_dir_files_to_embed(
|
|
206
|
+
.get_all_dir_files_to_embed(absolute_file_path.as_str())
|
|
145
207
|
.await;
|
|
146
208
|
|
|
147
209
|
match files {
|
|
@@ -167,7 +229,6 @@ impl MerkleClient {
|
|
|
167
229
|
|
|
168
230
|
let ret = vec![file];
|
|
169
231
|
let ret = ret.into_iter().chain(path.into_iter()).collect::<Vec<_>>();
|
|
170
|
-
|
|
171
232
|
Ok(ret)
|
|
172
233
|
}
|
|
173
234
|
Err(e) => Err(napi::Error::new(
|
|
@@ -183,17 +244,17 @@ impl MerkleClient {
|
|
|
183
244
|
&self,
|
|
184
245
|
absolute_file_path: String,
|
|
185
246
|
) -> Result<Vec<String>, napi::Error> {
|
|
186
|
-
// let
|
|
187
|
-
|
|
188
|
-
|
|
247
|
+
// let absolute_path = absolute_file_path.to_lowercase();
|
|
248
|
+
// let absolute_path_str = absolute_path.as_str();
|
|
249
|
+
let spline = self.tree.get_spline(absolute_file_path.as_str()).await;
|
|
189
250
|
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
251
|
+
match spline {
|
|
252
|
+
Ok(spline) => Ok(spline),
|
|
253
|
+
Err(e) => Err(napi::Error::new(
|
|
254
|
+
napi::Status::Unknown,
|
|
255
|
+
format!("Error in get_spline: {:?}", e),
|
|
256
|
+
)),
|
|
257
|
+
}
|
|
197
258
|
}
|
|
198
259
|
|
|
199
260
|
#[napi]
|
|
@@ -214,6 +275,6 @@ impl MerkleClient {
|
|
|
214
275
|
|
|
215
276
|
#[napi]
|
|
216
277
|
pub fn update_root_directory(&mut self, root_directory: String) {
|
|
217
|
-
self.
|
|
278
|
+
self.absolute_root_directory = root_directory;
|
|
218
279
|
}
|
|
219
280
|
}
|
|
@@ -1,11 +1,11 @@
|
|
|
1
|
+
use crate::git_utils;
|
|
1
2
|
use crate::merkle_tree::{
|
|
2
3
|
File, MerkleNode, MerkleNodePtr, NodeType, PinnedFuture,
|
|
3
4
|
};
|
|
4
5
|
|
|
5
6
|
use super::{LocalConstruction, MerkleTree};
|
|
6
|
-
use std::collections::BTreeMap;
|
|
7
|
-
use std::path::PathBuf;
|
|
8
|
-
use std::{collections::HashMap, path::Path, sync::Arc};
|
|
7
|
+
use std::collections::{BTreeMap, HashSet};
|
|
8
|
+
use std::path::{Path, PathBuf};
|
|
9
9
|
use tonic::async_trait;
|
|
10
10
|
|
|
11
11
|
#[async_trait]
|
|
@@ -29,20 +29,36 @@ impl LocalConstruction for MerkleTree {
|
|
|
29
29
|
/// 3. construct merkle tree
|
|
30
30
|
/// 4. return merkle tree
|
|
31
31
|
async fn construct_merkle_tree(
|
|
32
|
-
|
|
32
|
+
absolute_path_to_root_directory: String,
|
|
33
33
|
) -> Result<MerkleTree, anyhow::Error> {
|
|
34
|
-
let path = PathBuf::from(
|
|
34
|
+
let path = PathBuf::from(absolute_path_to_root_directory.clone());
|
|
35
35
|
if !path.exists() {
|
|
36
36
|
// FIXME: we should report this via a good logger.
|
|
37
37
|
panic!("Root directory does not exist!");
|
|
38
38
|
}
|
|
39
39
|
|
|
40
|
-
|
|
40
|
+
// 1. get all the gitignored files
|
|
41
|
+
let git_ignored_files = match git_utils::list_ignored_files(
|
|
42
|
+
absolute_path_to_root_directory.as_str(),
|
|
43
|
+
true,
|
|
44
|
+
) {
|
|
45
|
+
Ok(git_ignored) => git_ignored,
|
|
46
|
+
Err(_e) => HashSet::new(),
|
|
47
|
+
};
|
|
48
|
+
|
|
49
|
+
let root_node = MerkleNode::new(
|
|
50
|
+
path,
|
|
51
|
+
None,
|
|
52
|
+
&git_ignored_files,
|
|
53
|
+
absolute_path_to_root_directory.as_str(),
|
|
54
|
+
)
|
|
55
|
+
.await;
|
|
41
56
|
let mut mt = MerkleTree {
|
|
42
57
|
root: root_node,
|
|
43
58
|
files: BTreeMap::new(),
|
|
44
|
-
root_path:
|
|
59
|
+
root_path: absolute_path_to_root_directory,
|
|
45
60
|
cursor: None,
|
|
61
|
+
git_ignored_files,
|
|
46
62
|
};
|
|
47
63
|
|
|
48
64
|
// we now iterate over all the nodes and add them to the hashmap
|
|
@@ -55,6 +71,7 @@ impl LocalConstruction for MerkleTree {
|
|
|
55
71
|
let node_reader = node.read().await;
|
|
56
72
|
match &node_reader.node_type {
|
|
57
73
|
NodeType::Branch(n) => {
|
|
74
|
+
tracing::info!("Branch: {:?}", n.0);
|
|
58
75
|
let children = &n.1;
|
|
59
76
|
files.insert(n.0.clone(), File { node: node.clone() });
|
|
60
77
|
for child in children {
|
|
@@ -63,6 +80,13 @@ impl LocalConstruction for MerkleTree {
|
|
|
63
80
|
}
|
|
64
81
|
NodeType::File(file_name) => {
|
|
65
82
|
let f = File { node: node.clone() };
|
|
83
|
+
|
|
84
|
+
// i dont reallly like this :(((
|
|
85
|
+
// let canonical_file_name = match dunce::canonicalize(file_name) {
|
|
86
|
+
// Ok(path) => path.to_str().unwrap_or(file_name).to_string(),
|
|
87
|
+
// Err(_) => file_name.clone(),
|
|
88
|
+
// };
|
|
89
|
+
|
|
66
90
|
files.insert(file_name.clone(), f);
|
|
67
91
|
}
|
|
68
92
|
NodeType::ErrorNode(_) => {
|
|
@@ -74,6 +98,9 @@ impl LocalConstruction for MerkleTree {
|
|
|
74
98
|
|
|
75
99
|
add_nodes_to_hashmap(&mt.root, &mut mt.files).await;
|
|
76
100
|
|
|
101
|
+
tracing::info!("Merkle tree compute finished!");
|
|
102
|
+
tracing::info!("Merkle tree: {}", mt);
|
|
103
|
+
|
|
77
104
|
Ok(mt)
|
|
78
105
|
}
|
|
79
106
|
|
package/src/merkle_tree/mod.rs
CHANGED
|
@@ -1,10 +1,13 @@
|
|
|
1
|
+
use crate::git_utils;
|
|
2
|
+
|
|
1
3
|
use super::file_utils;
|
|
2
4
|
use sha2::Digest;
|
|
3
|
-
use std::collections::BTreeMap;
|
|
5
|
+
use std::collections::{BTreeMap, HashSet};
|
|
4
6
|
use std::path::PathBuf;
|
|
5
7
|
use std::{fs, path::Path, sync::Arc};
|
|
6
8
|
use tokio::sync::RwLock;
|
|
7
9
|
use tonic::async_trait;
|
|
10
|
+
use tracing::info;
|
|
8
11
|
pub mod local_construction;
|
|
9
12
|
pub mod test;
|
|
10
13
|
|
|
@@ -14,7 +17,8 @@ pub struct MerkleTree {
|
|
|
14
17
|
root_path: String,
|
|
15
18
|
root: MerkleNodePtr,
|
|
16
19
|
files: BTreeMap<String, File>,
|
|
17
|
-
cursor: Option<
|
|
20
|
+
cursor: Option<usize>,
|
|
21
|
+
git_ignored_files: HashSet<String>,
|
|
18
22
|
}
|
|
19
23
|
|
|
20
24
|
#[derive(Debug)]
|
|
@@ -91,26 +95,20 @@ impl MerkleTree {
|
|
|
91
95
|
files: BTreeMap::new(),
|
|
92
96
|
root_path: "".to_string(),
|
|
93
97
|
cursor: None,
|
|
98
|
+
git_ignored_files: HashSet::new(),
|
|
94
99
|
}
|
|
95
100
|
}
|
|
96
101
|
|
|
97
102
|
pub async fn get_subtree_hash(
|
|
98
103
|
&self,
|
|
99
|
-
absolute_path:
|
|
104
|
+
absolute_path: &str,
|
|
100
105
|
) -> Result<String, anyhow::Error> {
|
|
101
|
-
let abs_string = match absolute_path.to_str() {
|
|
102
|
-
Some(s) => s.to_string(),
|
|
103
|
-
None => {
|
|
104
|
-
return Err(anyhow::anyhow!(
|
|
105
|
-
"get_subtree_hash: Failed to convert path to string"
|
|
106
|
-
))
|
|
107
|
-
}
|
|
108
|
-
};
|
|
109
106
|
|
|
110
|
-
let node = match self.files.get(
|
|
107
|
+
let node = match self.files.get(absolute_path) {
|
|
111
108
|
Some(file) => file.node.clone(),
|
|
112
109
|
None => {
|
|
113
|
-
|
|
110
|
+
let all_files: Vec<String> = self.files.keys().cloned().collect();
|
|
111
|
+
return Err(anyhow::anyhow!("Could not find file in tree! Looking for: {}. All files: {:?}", absolute_path, all_files));
|
|
114
112
|
}
|
|
115
113
|
};
|
|
116
114
|
|
|
@@ -156,7 +154,7 @@ impl MerkleTree {
|
|
|
156
154
|
}
|
|
157
155
|
};
|
|
158
156
|
|
|
159
|
-
// TODO(sualeh): worth keeping this list sorted.
|
|
157
|
+
// TODO(sualeh): worth keeping this list sorted. its now a btree
|
|
160
158
|
|
|
161
159
|
for (_, file) in &self.files {
|
|
162
160
|
let file_reader = file.node.read().await;
|
|
@@ -234,83 +232,44 @@ impl MerkleTree {
|
|
|
234
232
|
pub async fn get_next_file_to_embed(
|
|
235
233
|
&mut self,
|
|
236
234
|
) -> Result<(String, Vec<String>), anyhow::Error> {
|
|
237
|
-
// the
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
if let NodeType::File(file_path) = &self.root.read().await.node_type {
|
|
244
|
-
return Ok((file_path.clone(), vec![]));
|
|
235
|
+
// if the cursor is none, set it to 0
|
|
236
|
+
let cursor = match self.cursor {
|
|
237
|
+
Some(cursor) => cursor,
|
|
238
|
+
None => {
|
|
239
|
+
self.cursor = Some(0);
|
|
240
|
+
0
|
|
245
241
|
}
|
|
242
|
+
};
|
|
246
243
|
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
NodeType::File(_) => {
|
|
258
|
-
return Err(anyhow::anyhow!(
|
|
259
|
-
"get_next_file_to_embed: This should not happen! the branch happened to be file."
|
|
260
|
-
));
|
|
261
|
-
}
|
|
262
|
-
NodeType::ErrorNode(_) => {
|
|
263
|
-
return Err(anyhow::anyhow!("Cursor is an error node!"));
|
|
264
|
-
}
|
|
265
|
-
}
|
|
266
|
-
};
|
|
267
|
-
|
|
268
|
-
let current_node_name = &node.0;
|
|
269
|
-
let child_list = &node.1;
|
|
270
|
-
|
|
271
|
-
if let Some(c) = child_list.first() {
|
|
272
|
-
let c_reader = c.read().await;
|
|
273
|
-
|
|
274
|
-
match &c_reader.node_type {
|
|
275
|
-
NodeType::File(file_path) => {
|
|
276
|
-
// must set the cursor!
|
|
277
|
-
self.cursor = Some(c.clone());
|
|
278
|
-
|
|
279
|
-
return Ok((file_path.clone(), path));
|
|
280
|
-
}
|
|
281
|
-
NodeType::Branch(_) => {
|
|
282
|
-
potential_first_child = c.clone();
|
|
283
|
-
is_branch = true;
|
|
244
|
+
// get the thing at the cursor. while we dont find a file, we keep incrementing the cursor.
|
|
245
|
+
let mut cursor = cursor;
|
|
246
|
+
loop {
|
|
247
|
+
// O(log n)
|
|
248
|
+
let file = match self.files.values().nth(cursor) {
|
|
249
|
+
Some(file) => file,
|
|
250
|
+
None => {
|
|
251
|
+
return Err(anyhow::anyhow!("Could not find file to embed!"));
|
|
252
|
+
}
|
|
253
|
+
};
|
|
284
254
|
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
}
|
|
293
|
-
|
|
294
|
-
|
|
255
|
+
let file_reader = file.node.read().await;
|
|
256
|
+
match &file_reader.node_type {
|
|
257
|
+
NodeType::File(f) => {
|
|
258
|
+
// update the cursor.
|
|
259
|
+
self.cursor = Some(cursor + 1);
|
|
260
|
+
let spline = self.get_spline(f).await?;
|
|
261
|
+
return Ok((f.clone(), spline));
|
|
262
|
+
}
|
|
263
|
+
NodeType::Branch(_) => {
|
|
264
|
+
cursor += 1;
|
|
265
|
+
continue;
|
|
266
|
+
}
|
|
267
|
+
NodeType::ErrorNode(_) => {
|
|
268
|
+
cursor += 1;
|
|
269
|
+
continue;
|
|
295
270
|
}
|
|
296
271
|
}
|
|
297
272
|
}
|
|
298
|
-
|
|
299
|
-
// THE DEFAULT CASE:
|
|
300
|
-
// we already have a cursor at a file.
|
|
301
|
-
|
|
302
|
-
// UNWRAP checked and fine. see the none case above.
|
|
303
|
-
let cursor_name = self.cursor.as_ref().unwrap();
|
|
304
|
-
let cursor_reader = cursor_name.read().await;
|
|
305
|
-
|
|
306
|
-
// invariant: you must be a file!!
|
|
307
|
-
|
|
308
|
-
// everytime we get to a child list, we will add all the children to a fifo, and then pull from it as long as we need it.
|
|
309
|
-
|
|
310
|
-
// algorithm:
|
|
311
|
-
// 1.
|
|
312
|
-
|
|
313
|
-
Err(anyhow::anyhow!("Could not find file to embed!"))
|
|
314
273
|
}
|
|
315
274
|
|
|
316
275
|
pub async fn get_all_dir_files_to_embed(
|
|
@@ -340,6 +299,54 @@ impl MerkleTree {
|
|
|
340
299
|
Ok(files)
|
|
341
300
|
}
|
|
342
301
|
|
|
302
|
+
// TODO(sualeh): i need tests for this!!
|
|
303
|
+
pub async fn get_spline(
|
|
304
|
+
&self,
|
|
305
|
+
absolute_path: &str,
|
|
306
|
+
) -> Result<Vec<String>, anyhow::Error> {
|
|
307
|
+
info!("get_spline called with absolute_path: {}", absolute_path);
|
|
308
|
+
let mut files = Vec::new();
|
|
309
|
+
|
|
310
|
+
let current_node = match self.files.get(absolute_path) {
|
|
311
|
+
Some(node) => {
|
|
312
|
+
info!("Found node for absolute_path: {}", absolute_path);
|
|
313
|
+
node.node.clone()
|
|
314
|
+
}
|
|
315
|
+
None => {
|
|
316
|
+
info!("File not found for absolute_path: {}", absolute_path);
|
|
317
|
+
return Err(anyhow::anyhow!("File not found: {}", absolute_path));
|
|
318
|
+
}
|
|
319
|
+
};
|
|
320
|
+
|
|
321
|
+
let mut stack = Vec::new();
|
|
322
|
+
stack.push(current_node);
|
|
323
|
+
|
|
324
|
+
while let Some(node) = stack.pop() {
|
|
325
|
+
let parent = node.read().await.parent.clone();
|
|
326
|
+
if let Some(parent) = parent {
|
|
327
|
+
info!("Adding parent hash to files vector");
|
|
328
|
+
{
|
|
329
|
+
let parent_node = parent.read().await;
|
|
330
|
+
match &parent_node.node_type {
|
|
331
|
+
NodeType::File(file_name) => {
|
|
332
|
+
files.push(file_name.clone());
|
|
333
|
+
}
|
|
334
|
+
NodeType::Branch((branch_name, _)) => {
|
|
335
|
+
files.push(branch_name.clone());
|
|
336
|
+
}
|
|
337
|
+
_ => {
|
|
338
|
+
continue;
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
stack.push(parent);
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
info!("Returning files vector with {} elements", files.len());
|
|
347
|
+
Ok(files)
|
|
348
|
+
}
|
|
349
|
+
|
|
343
350
|
/// creates a new node and attaches it to the current tree.
|
|
344
351
|
/// SPEC:
|
|
345
352
|
/// - you are allowed to create a file with a node such that the
|
|
@@ -375,12 +382,18 @@ impl MerkleTree {
|
|
|
375
382
|
// 1. the path is empty. this means that the ancestor is the root.
|
|
376
383
|
// 2. the path is non-empty. that means there exist a non-empty element btwn till the root.
|
|
377
384
|
|
|
385
|
+
let absolute_root_path = self.root_path.clone();
|
|
378
386
|
let new_node = match path.len() {
|
|
379
387
|
0 => {
|
|
380
388
|
// this means that the ancestor is the root.
|
|
381
389
|
// we need to create a new node and attach it to the ancestor.
|
|
382
|
-
let new_node =
|
|
383
|
-
|
|
390
|
+
let new_node = MerkleNode::new(
|
|
391
|
+
file_path.clone(),
|
|
392
|
+
Some(ancestor.clone()),
|
|
393
|
+
&self.git_ignored_files,
|
|
394
|
+
&absolute_root_path.as_str(),
|
|
395
|
+
)
|
|
396
|
+
.await;
|
|
384
397
|
ancestor.write().await.attach_child(new_node.clone()).await;
|
|
385
398
|
new_node
|
|
386
399
|
}
|
|
@@ -391,9 +404,13 @@ impl MerkleTree {
|
|
|
391
404
|
// UNSURE: not sure this is the correct thing to do but it is the fastest.
|
|
392
405
|
// get the last thing that is not in the tree.
|
|
393
406
|
let first_child_path = path.last().unwrap();
|
|
394
|
-
let first_child =
|
|
395
|
-
|
|
396
|
-
|
|
407
|
+
let first_child = MerkleNode::new(
|
|
408
|
+
first_child_path.clone(),
|
|
409
|
+
Some(ancestor.clone()),
|
|
410
|
+
&self.git_ignored_files,
|
|
411
|
+
&absolute_root_path.as_str(),
|
|
412
|
+
)
|
|
413
|
+
.await;
|
|
397
414
|
|
|
398
415
|
// TODO(sualeh): we should do an assertion check that the entire vec is contained here.
|
|
399
416
|
|
|
@@ -670,18 +687,62 @@ use std::future::Future;
|
|
|
670
687
|
use std::pin::Pin;
|
|
671
688
|
|
|
672
689
|
type PinnedFuture<'a, T> = Pin<Box<dyn Future<Output = T> + Send + 'a>>;
|
|
690
|
+
type IgnoredFiles = HashSet<String>;
|
|
673
691
|
|
|
674
692
|
impl MerkleNode {
|
|
675
693
|
/// please be careful using this.
|
|
676
694
|
async fn __new_unchecked(
|
|
677
695
|
file_or_directory: String,
|
|
678
696
|
parent: ParentPtr,
|
|
697
|
+
ignored_files: &IgnoredFiles,
|
|
698
|
+
absolute_root_path: &str,
|
|
679
699
|
) -> MerkleNodePtr {
|
|
680
|
-
|
|
700
|
+
// check if the root is a git directory.
|
|
701
|
+
let is_git_repo =
|
|
702
|
+
match git_utils::is_git_directory(absolute_root_path).await {
|
|
703
|
+
Ok(is_git_repo) => is_git_repo,
|
|
704
|
+
Err(e) => false,
|
|
705
|
+
};
|
|
706
|
+
let bypass_git = !is_git_repo;
|
|
707
|
+
|
|
708
|
+
MerkleNode::construct_node(
|
|
709
|
+
Path::new(&file_or_directory),
|
|
710
|
+
parent,
|
|
711
|
+
ignored_files,
|
|
712
|
+
absolute_root_path,
|
|
713
|
+
bypass_git,
|
|
714
|
+
)
|
|
715
|
+
.await
|
|
681
716
|
}
|
|
682
717
|
|
|
683
|
-
async fn new(
|
|
684
|
-
|
|
718
|
+
async fn new(
|
|
719
|
+
absolute_file_or_directory: PathBuf,
|
|
720
|
+
parent: ParentPtr,
|
|
721
|
+
ignored_files: &IgnoredFiles,
|
|
722
|
+
absolute_root_path: &str,
|
|
723
|
+
) -> MerkleNodePtr {
|
|
724
|
+
// check if the root is a git directory.
|
|
725
|
+
let is_git_repo =
|
|
726
|
+
match git_utils::is_git_directory(absolute_root_path).await {
|
|
727
|
+
Ok(is_git_repo) => is_git_repo,
|
|
728
|
+
Err(_e) => false,
|
|
729
|
+
};
|
|
730
|
+
let bypass_git = !is_git_repo;
|
|
731
|
+
|
|
732
|
+
info!(
|
|
733
|
+
"constructing node for absolute_file_or_directory: {:?}",
|
|
734
|
+
absolute_file_or_directory
|
|
735
|
+
);
|
|
736
|
+
info!("bypass_git: {}, is_git_repo: {}", bypass_git, is_git_repo);
|
|
737
|
+
|
|
738
|
+
MerkleNode::construct_node(
|
|
739
|
+
Path::new(&absolute_file_or_directory),
|
|
740
|
+
parent,
|
|
741
|
+
ignored_files,
|
|
742
|
+
absolute_root_path,
|
|
743
|
+
bypass_git,
|
|
744
|
+
)
|
|
745
|
+
.await
|
|
685
746
|
}
|
|
686
747
|
|
|
687
748
|
/// NOT added to the tree by default.
|
|
@@ -692,38 +753,68 @@ impl MerkleNode {
|
|
|
692
753
|
// let file_hash = self.files.get_mut(&file_path).unwrap();
|
|
693
754
|
|
|
694
755
|
fn construct_node<'a>(
|
|
695
|
-
|
|
756
|
+
absolute_file_or_directory: &'a Path,
|
|
696
757
|
parent: ParentPtr,
|
|
758
|
+
ignored_files: &'a IgnoredFiles,
|
|
759
|
+
absolute_root_path: &'a str,
|
|
760
|
+
bypass_git: bool,
|
|
697
761
|
) -> PinnedFuture<'a, MerkleNodePtr> {
|
|
698
762
|
Box::pin(async move {
|
|
699
763
|
// check if it is a file
|
|
700
|
-
let path_str =
|
|
701
|
-
if
|
|
764
|
+
let path_str = absolute_file_or_directory.to_str().unwrap().to_string();
|
|
765
|
+
if absolute_file_or_directory.is_file() {
|
|
702
766
|
return Arc::new(RwLock::new(
|
|
703
767
|
MerkleNode::construct_file_node_or_error_node(
|
|
704
|
-
|
|
768
|
+
absolute_file_or_directory,
|
|
705
769
|
parent,
|
|
770
|
+
ignored_files,
|
|
706
771
|
)
|
|
707
772
|
.await,
|
|
708
773
|
));
|
|
709
774
|
}
|
|
710
775
|
|
|
711
776
|
// check if the directory fails the bad dir test.
|
|
712
|
-
let is_bad_dir = file_utils::is_in_bad_dir(
|
|
777
|
+
let is_bad_dir = file_utils::is_in_bad_dir(absolute_file_or_directory);
|
|
713
778
|
if is_bad_dir.is_err() || is_bad_dir.unwrap_or(false) {
|
|
714
779
|
// println!("skipping directory: {}", path_str);
|
|
715
780
|
return Arc::new(RwLock::new(MerkleNode::empty_node(
|
|
716
|
-
Some(
|
|
781
|
+
Some(absolute_file_or_directory),
|
|
717
782
|
Some("Directory is in bad dir!".to_string()),
|
|
718
783
|
)));
|
|
719
784
|
}
|
|
720
785
|
|
|
721
|
-
|
|
786
|
+
// check if the directory is git ignored
|
|
787
|
+
let is_git_ignored =
|
|
788
|
+
match git_utils::is_git_ignored(absolute_root_path, path_str.as_str())
|
|
789
|
+
.await
|
|
790
|
+
{
|
|
791
|
+
Ok(is_git_ignored) => is_git_ignored,
|
|
792
|
+
Err(e) => {
|
|
793
|
+
return Arc::new(RwLock::new(MerkleNode::empty_node(
|
|
794
|
+
Some(absolute_file_or_directory),
|
|
795
|
+
Some(e.to_string()),
|
|
796
|
+
)));
|
|
797
|
+
}
|
|
798
|
+
};
|
|
799
|
+
|
|
800
|
+
if is_git_ignored && !bypass_git {
|
|
801
|
+
// println!("skipping directory: {}", path_str);
|
|
802
|
+
tracing::info!(
|
|
803
|
+
"skipping directory because its git ignored: {}",
|
|
804
|
+
path_str
|
|
805
|
+
);
|
|
806
|
+
return Arc::new(RwLock::new(MerkleNode::empty_node(
|
|
807
|
+
Some(absolute_file_or_directory),
|
|
808
|
+
Some("Directory is git ignored!".to_string()),
|
|
809
|
+
)));
|
|
810
|
+
}
|
|
811
|
+
|
|
812
|
+
let entries = fs::read_dir(absolute_file_or_directory);
|
|
722
813
|
match entries {
|
|
723
814
|
Ok(_) => (),
|
|
724
815
|
Err(e) => {
|
|
725
816
|
return Arc::new(RwLock::new(MerkleNode::empty_node(
|
|
726
|
-
Some(
|
|
817
|
+
Some(absolute_file_or_directory),
|
|
727
818
|
Some(e.to_string()),
|
|
728
819
|
)));
|
|
729
820
|
}
|
|
@@ -743,13 +834,19 @@ impl MerkleNode {
|
|
|
743
834
|
match entry {
|
|
744
835
|
Ok(entry) => {
|
|
745
836
|
children.push(
|
|
746
|
-
MerkleNode::construct_node(
|
|
747
|
-
.
|
|
837
|
+
MerkleNode::construct_node(
|
|
838
|
+
&entry.path(),
|
|
839
|
+
Some(node.clone()),
|
|
840
|
+
ignored_files,
|
|
841
|
+
absolute_root_path,
|
|
842
|
+
bypass_git,
|
|
843
|
+
)
|
|
844
|
+
.await,
|
|
748
845
|
);
|
|
749
846
|
}
|
|
750
847
|
Err(e) => {
|
|
751
848
|
children.push(Arc::new(RwLock::new(MerkleNode::empty_node(
|
|
752
|
-
Some(
|
|
849
|
+
Some(absolute_file_or_directory),
|
|
753
850
|
Some(e.to_string()),
|
|
754
851
|
))));
|
|
755
852
|
}
|
|
@@ -769,23 +866,33 @@ impl MerkleNode {
|
|
|
769
866
|
}
|
|
770
867
|
|
|
771
868
|
async fn construct_file_node(
|
|
772
|
-
|
|
869
|
+
absolute_file_path: &Path,
|
|
773
870
|
parent: ParentPtr,
|
|
871
|
+
ignored_files: &IgnoredFiles,
|
|
774
872
|
) -> Result<MerkleNode, String> {
|
|
775
|
-
let file_str =
|
|
873
|
+
let file_str = absolute_file_path
|
|
776
874
|
.to_str()
|
|
777
875
|
.ok_or("Could not convert file path to string!")?
|
|
778
876
|
.to_string();
|
|
779
877
|
// first see if it passes the
|
|
780
|
-
match file_utils::is_good_file(
|
|
878
|
+
match file_utils::is_good_file(absolute_file_path) {
|
|
781
879
|
Ok(_) => {}
|
|
782
880
|
Err(e) => {
|
|
783
881
|
return Err(format!("File failed runtime checks! {}", e.to_string()));
|
|
784
882
|
}
|
|
785
883
|
}
|
|
786
884
|
|
|
885
|
+
// check if the file is in the git ignore buffer.
|
|
886
|
+
// this is a bug right because we are not checking absoluteness here.
|
|
887
|
+
match ignored_files.contains(&file_str) {
|
|
888
|
+
true => {
|
|
889
|
+
return Err(format!("File is in git ignore buffer!"));
|
|
890
|
+
}
|
|
891
|
+
false => {}
|
|
892
|
+
}
|
|
893
|
+
|
|
787
894
|
// read the file_content to a buffer
|
|
788
|
-
let file_content = match tokio::fs::read(
|
|
895
|
+
let file_content = match tokio::fs::read(absolute_file_path).await {
|
|
789
896
|
Ok(content) => content,
|
|
790
897
|
Err(e) => {
|
|
791
898
|
return Err(format!("Could not read file! {}", e.to_string()));
|
|
@@ -793,7 +900,11 @@ impl MerkleNode {
|
|
|
793
900
|
};
|
|
794
901
|
|
|
795
902
|
// check if the file passes runtime checks.
|
|
796
|
-
match file_utils::is_good_file_runtime_check(
|
|
903
|
+
match file_utils::is_good_file_runtime_check(
|
|
904
|
+
absolute_file_path,
|
|
905
|
+
&file_content,
|
|
906
|
+
)
|
|
907
|
+
.await
|
|
797
908
|
{
|
|
798
909
|
Ok(_) => {}
|
|
799
910
|
Err(e) => {
|
|
@@ -824,15 +935,22 @@ impl MerkleNode {
|
|
|
824
935
|
}
|
|
825
936
|
|
|
826
937
|
async fn construct_file_node_or_error_node(
|
|
827
|
-
|
|
938
|
+
absolute_file_path: &Path,
|
|
828
939
|
parent: ParentPtr,
|
|
940
|
+
ignored_files: &IgnoredFiles,
|
|
829
941
|
) -> MerkleNode {
|
|
830
|
-
let node = match MerkleNode::construct_file_node(
|
|
942
|
+
let node = match MerkleNode::construct_file_node(
|
|
943
|
+
absolute_file_path,
|
|
944
|
+
parent,
|
|
945
|
+
ignored_files,
|
|
946
|
+
)
|
|
947
|
+
.await
|
|
948
|
+
{
|
|
831
949
|
Ok(node) => node,
|
|
832
950
|
Err(e) => {
|
|
833
951
|
// println!("constructing error node. error: {}", e);
|
|
834
952
|
// println!("file_path: {:?}", file_path);
|
|
835
|
-
MerkleNode::empty_node(Some(
|
|
953
|
+
MerkleNode::empty_node(Some(absolute_file_path), Some(e))
|
|
836
954
|
}
|
|
837
955
|
};
|
|
838
956
|
|