@anysphere/file-service 0.0.0-c0e75c6f → 0.0.0-c2b75b32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Cargo.toml +5 -0
- package/index.d.ts +6 -2
- package/package.json +10 -8
- package/src/file_utils.rs +12 -14
- package/src/git_utils.rs +166 -20
- package/src/lib.rs +136 -14
- package/src/merkle_tree/local_construction.rs +37 -8
- package/src/merkle_tree/mod.rs +320 -101
- package/src/test.rs +5 -0
package/Cargo.toml
CHANGED
|
@@ -17,6 +17,11 @@ tempfile = "3.8.0"
|
|
|
17
17
|
anyhow = "1.0.75"
|
|
18
18
|
tonic = "0.9.2"
|
|
19
19
|
prost = "0.11.9"
|
|
20
|
+
tracing = "0.1.37"
|
|
21
|
+
tracing-subscriber = "0.3.17"
|
|
22
|
+
tracing-appender = "0.2.2"
|
|
23
|
+
binaryornot = "1.0.0"
|
|
24
|
+
dunce = "1.0.1"
|
|
20
25
|
|
|
21
26
|
[build-dependencies]
|
|
22
27
|
napi-build = "2.0.1"
|
package/index.d.ts
CHANGED
|
@@ -4,13 +4,17 @@
|
|
|
4
4
|
/* auto-generated by NAPI-RS */
|
|
5
5
|
|
|
6
6
|
export class MerkleClient {
|
|
7
|
-
constructor(
|
|
7
|
+
constructor(absoluteRootDirectory: string)
|
|
8
8
|
init(): Promise<void>
|
|
9
|
+
computeMerkleTree(): Promise<void>
|
|
9
10
|
updateFile(filePath: string): Promise<void>
|
|
10
11
|
deleteFile(filePath: string): Promise<void>
|
|
11
|
-
getSubtreeHash(
|
|
12
|
+
getSubtreeHash(relativePath: string): Promise<string>
|
|
12
13
|
getNumEmbeddableFiles(): Promise<number>
|
|
13
14
|
getAllFiles(): Promise<Array<string>>
|
|
15
|
+
getAllDirFilesToEmbed(absoluteFilePath: string): Promise<Array<string>>
|
|
14
16
|
getNextFileToEmbed(): Promise<Array<string>>
|
|
17
|
+
getSpline(absoluteFilePath: string): Promise<Array<string>>
|
|
15
18
|
getHashesForFiles(files: Array<string>): Promise<Array<string>>
|
|
19
|
+
updateRootDirectory(rootDirectory: string): void
|
|
16
20
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@anysphere/file-service",
|
|
3
|
-
"version": "0.0.0-
|
|
3
|
+
"version": "0.0.0-c2b75b32",
|
|
4
4
|
"main": "index.js",
|
|
5
5
|
"types": "index.d.ts",
|
|
6
6
|
"napi": {
|
|
@@ -9,7 +9,8 @@
|
|
|
9
9
|
"additional": [
|
|
10
10
|
"aarch64-apple-darwin",
|
|
11
11
|
"aarch64-pc-windows-msvc",
|
|
12
|
-
"universal-apple-darwin"
|
|
12
|
+
"universal-apple-darwin",
|
|
13
|
+
"aarch64-unknown-linux-gnu"
|
|
13
14
|
]
|
|
14
15
|
}
|
|
15
16
|
},
|
|
@@ -35,11 +36,12 @@
|
|
|
35
36
|
"version": "napi version"
|
|
36
37
|
},
|
|
37
38
|
"optionalDependencies": {
|
|
38
|
-
"@anysphere/file-service-win32-x64-msvc": "0.0.0-
|
|
39
|
-
"@anysphere/file-service-darwin-x64": "0.0.0-
|
|
40
|
-
"@anysphere/file-service-linux-x64-gnu": "0.0.0-
|
|
41
|
-
"@anysphere/file-service-darwin-arm64": "0.0.0-
|
|
42
|
-
"@anysphere/file-service-win32-arm64-msvc": "0.0.0-
|
|
43
|
-
"@anysphere/file-service-darwin-universal": "0.0.0-
|
|
39
|
+
"@anysphere/file-service-win32-x64-msvc": "0.0.0-c2b75b32",
|
|
40
|
+
"@anysphere/file-service-darwin-x64": "0.0.0-c2b75b32",
|
|
41
|
+
"@anysphere/file-service-linux-x64-gnu": "0.0.0-c2b75b32",
|
|
42
|
+
"@anysphere/file-service-darwin-arm64": "0.0.0-c2b75b32",
|
|
43
|
+
"@anysphere/file-service-win32-arm64-msvc": "0.0.0-c2b75b32",
|
|
44
|
+
"@anysphere/file-service-darwin-universal": "0.0.0-c2b75b32",
|
|
45
|
+
"@anysphere/file-service-linux-arm64-gnu": "0.0.0-c2b75b32"
|
|
44
46
|
}
|
|
45
47
|
}
|
package/src/file_utils.rs
CHANGED
|
@@ -12,9 +12,8 @@ pub fn is_in_bad_dir(file_path: &Path) -> Result<bool, Error> {
|
|
|
12
12
|
let item_path = file_path
|
|
13
13
|
.to_str()
|
|
14
14
|
.ok_or(anyhow::anyhow!("Failed to convert path to string"))?;
|
|
15
|
-
let is_bad_dir =
|
|
16
|
-
|| item_path.contains(".git")
|
|
17
|
-
&& !(item_path.ends_with(".git") || item_path.ends_with("node_modules"));
|
|
15
|
+
let is_bad_dir =
|
|
16
|
+
item_path.contains("node_modules") || item_path.contains(".git");
|
|
18
17
|
Ok(is_bad_dir)
|
|
19
18
|
}
|
|
20
19
|
|
|
@@ -38,14 +37,14 @@ pub fn is_good_file(file_path: &Path) -> Result<(), Error> {
|
|
|
38
37
|
|
|
39
38
|
match file_name {
|
|
40
39
|
"package-lock.json" | "pnpm-lock.yaml" | "yarn.lock" | "composer.lock"
|
|
41
|
-
| "Gemfile.lock" => {
|
|
40
|
+
| "Gemfile.lock" | "bun.lockb" => {
|
|
42
41
|
return Err(anyhow::anyhow!("File is just a lock file"));
|
|
43
42
|
}
|
|
44
43
|
_ => {}
|
|
45
44
|
}
|
|
46
45
|
|
|
47
46
|
match extension {
|
|
48
|
-
"lock" | "bak" | "tmp" | "bin" | "exe" | "dll" | "so" => {
|
|
47
|
+
"lock" | "bak" | "tmp" | "bin" | "exe" | "dll" | "so" | "lockb" => {
|
|
49
48
|
return Err(anyhow::anyhow!("File is just a lock file"));
|
|
50
49
|
}
|
|
51
50
|
_ => {}
|
|
@@ -63,7 +62,7 @@ pub fn is_good_file(file_path: &Path) -> Result<(), Error> {
|
|
|
63
62
|
Some(extension) => match extension.to_str() {
|
|
64
63
|
Some(ext_str) => {
|
|
65
64
|
if bad_extensions.contains(&ext_str) {
|
|
66
|
-
return Err(anyhow::anyhow!("
|
|
65
|
+
return Err(anyhow::anyhow!("Binary file excluded from indexing."));
|
|
67
66
|
}
|
|
68
67
|
}
|
|
69
68
|
None => {
|
|
@@ -89,10 +88,12 @@ pub fn is_good_file(file_path: &Path) -> Result<(), Error> {
|
|
|
89
88
|
Ok(())
|
|
90
89
|
}
|
|
91
90
|
|
|
91
|
+
// use binaryornot::is_binary;
|
|
92
|
+
// use anyhow::Context;
|
|
92
93
|
// implement the buffer above:
|
|
93
94
|
pub async fn is_good_file_runtime_check(
|
|
94
95
|
file_path: &Path,
|
|
95
|
-
|
|
96
|
+
_buffer: &[u8],
|
|
96
97
|
) -> Result<(), Error> {
|
|
97
98
|
match get_file_size(file_path).await {
|
|
98
99
|
Ok(size) if size > 2 * 1024 * 1024 => {
|
|
@@ -102,13 +103,10 @@ pub async fn is_good_file_runtime_check(
|
|
|
102
103
|
_ => {}
|
|
103
104
|
}
|
|
104
105
|
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
return Err(anyhow::anyhow!("File is not a valid UTF-8 string"));
|
|
110
|
-
}
|
|
111
|
-
}
|
|
106
|
+
// if is_binary(file_path).context("Failed to check if file is binary")? {
|
|
107
|
+
// return Err(anyhow::anyhow!("File is binary"));
|
|
108
|
+
// }
|
|
109
|
+
|
|
112
110
|
Ok(())
|
|
113
111
|
}
|
|
114
112
|
|
package/src/git_utils.rs
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
use std::collections::HashSet;
|
|
2
|
-
use std::path::PathBuf;
|
|
3
2
|
use std::process::Command;
|
|
4
3
|
|
|
5
|
-
pub fn
|
|
4
|
+
pub fn list_ignored_files_and_directories(
|
|
6
5
|
workspace_root_path: &str,
|
|
7
|
-
|
|
6
|
+
should_return_absolute_paths: bool,
|
|
7
|
+
) -> Result<HashSet<String>, Box<dyn std::error::Error>> {
|
|
8
8
|
let mut gitignored_files = HashSet::new();
|
|
9
9
|
|
|
10
10
|
let commands = vec![
|
|
@@ -14,13 +14,69 @@ pub fn list_ignored_files(
|
|
|
14
14
|
"--others",
|
|
15
15
|
"--ignored",
|
|
16
16
|
"--exclude-standard",
|
|
17
|
+
"--directory",
|
|
18
|
+
"--no-empty-directory"
|
|
17
19
|
],
|
|
20
|
+
// FIXME(sualeh): this is super sketchy and might totally break in like a bazillion ways. i dont like it.
|
|
21
|
+
vec![
|
|
22
|
+
"sh",
|
|
23
|
+
"-c",
|
|
24
|
+
"git submodule foreach --quiet 'git ls-files --others --ignored --exclude-standard --directory --no-empty-directory | sed \"s|^|$path/|\"'",
|
|
25
|
+
],
|
|
26
|
+
];
|
|
27
|
+
|
|
28
|
+
for command in commands {
|
|
29
|
+
let output = Command::new(&command[0])
|
|
30
|
+
.args(&command[1..])
|
|
31
|
+
.current_dir(workspace_root_path)
|
|
32
|
+
.output()?;
|
|
33
|
+
|
|
34
|
+
if output.status.success() {
|
|
35
|
+
let files = String::from_utf8(output.stdout)?
|
|
36
|
+
.lines()
|
|
37
|
+
.filter(|line| !line.is_empty())
|
|
38
|
+
.map(|line| {
|
|
39
|
+
if should_return_absolute_paths {
|
|
40
|
+
let mut path = std::path::PathBuf::from(workspace_root_path);
|
|
41
|
+
path.push(line);
|
|
42
|
+
|
|
43
|
+
match path.canonicalize() {
|
|
44
|
+
Ok(canonical_path) => {
|
|
45
|
+
canonical_path.to_string_lossy().into_owned()
|
|
46
|
+
}
|
|
47
|
+
Err(_) => String::from(line),
|
|
48
|
+
}
|
|
49
|
+
} else {
|
|
50
|
+
String::from(line)
|
|
51
|
+
}
|
|
52
|
+
})
|
|
53
|
+
.collect::<HashSet<_>>();
|
|
54
|
+
|
|
55
|
+
gitignored_files.extend(files);
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
Ok(gitignored_files)
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
pub fn list_ignored_files_with_absolute_paths(
|
|
63
|
+
workspace_root_path: &str,
|
|
64
|
+
) -> Result<HashSet<String>, Box<dyn std::error::Error>> {
|
|
65
|
+
let mut gitignored_files = HashSet::new();
|
|
66
|
+
|
|
67
|
+
let commands = vec![
|
|
18
68
|
vec![
|
|
19
69
|
"git",
|
|
20
|
-
"
|
|
21
|
-
"
|
|
22
|
-
"--
|
|
23
|
-
"
|
|
70
|
+
"ls-files",
|
|
71
|
+
"--others",
|
|
72
|
+
"--ignored",
|
|
73
|
+
"--exclude-standard",
|
|
74
|
+
],
|
|
75
|
+
// FIXME(sualeh): this is super sketchy and might totally break in like a bazillion ways. i dont like it.
|
|
76
|
+
vec![
|
|
77
|
+
"sh",
|
|
78
|
+
"-c",
|
|
79
|
+
"git submodule foreach --quiet 'git ls-files --others --ignored --exclude-standard | sed \"s|^|$path/|\"'",
|
|
24
80
|
],
|
|
25
81
|
];
|
|
26
82
|
|
|
@@ -34,7 +90,7 @@ pub fn list_ignored_files(
|
|
|
34
90
|
let files = String::from_utf8(output.stdout)?
|
|
35
91
|
.lines()
|
|
36
92
|
.filter(|line| !line.is_empty())
|
|
37
|
-
.map(|line|
|
|
93
|
+
.map(|line| String::from(line))
|
|
38
94
|
.collect::<HashSet<_>>();
|
|
39
95
|
|
|
40
96
|
gitignored_files.extend(files);
|
|
@@ -57,6 +113,18 @@ pub async fn is_git_ignored(
|
|
|
57
113
|
Ok(output.status.success())
|
|
58
114
|
}
|
|
59
115
|
|
|
116
|
+
pub async fn is_git_directory(
|
|
117
|
+
workspace_root_path: &str,
|
|
118
|
+
) -> Result<bool, anyhow::Error> {
|
|
119
|
+
let output = tokio::process::Command::new("git")
|
|
120
|
+
.args(&["rev-parse", "--is-inside-work-tree"])
|
|
121
|
+
.current_dir(workspace_root_path)
|
|
122
|
+
.output()
|
|
123
|
+
.await?;
|
|
124
|
+
|
|
125
|
+
Ok(output.status.success())
|
|
126
|
+
}
|
|
127
|
+
|
|
60
128
|
#[cfg(test)]
|
|
61
129
|
mod tests {
|
|
62
130
|
use super::*;
|
|
@@ -66,7 +134,9 @@ mod tests {
|
|
|
66
134
|
#[test]
|
|
67
135
|
fn test_no_ignored_files() {
|
|
68
136
|
let dir = tempfile::tempdir().unwrap();
|
|
69
|
-
let gitignored_files =
|
|
137
|
+
let gitignored_files =
|
|
138
|
+
list_ignored_files_and_directories(dir.path().to_str().unwrap(), false)
|
|
139
|
+
.unwrap();
|
|
70
140
|
Command::new("git")
|
|
71
141
|
.args(&["init"])
|
|
72
142
|
.current_dir(dir.path())
|
|
@@ -92,13 +162,15 @@ mod tests {
|
|
|
92
162
|
.current_dir(dir.path())
|
|
93
163
|
.output()
|
|
94
164
|
.unwrap();
|
|
95
|
-
let gitignored_files =
|
|
165
|
+
let gitignored_files =
|
|
166
|
+
list_ignored_files_and_directories(dir.path().to_str().unwrap(), false)
|
|
167
|
+
.unwrap();
|
|
96
168
|
println!(
|
|
97
169
|
"ignored files for test_one_ignored_file: {:?}",
|
|
98
170
|
gitignored_files
|
|
99
171
|
);
|
|
100
172
|
// assert_eq!(gitignored_files.len(), 1);
|
|
101
|
-
assert!(gitignored_files.contains(&
|
|
173
|
+
assert!(gitignored_files.contains(&String::from("ignored.txt")));
|
|
102
174
|
}
|
|
103
175
|
|
|
104
176
|
#[test]
|
|
@@ -121,19 +193,86 @@ mod tests {
|
|
|
121
193
|
.current_dir(dir.path())
|
|
122
194
|
.output()
|
|
123
195
|
.unwrap();
|
|
124
|
-
let gitignored_files =
|
|
196
|
+
let gitignored_files =
|
|
197
|
+
list_ignored_files_and_directories(dir.path().to_str().unwrap(), false)
|
|
198
|
+
.unwrap();
|
|
125
199
|
println!(
|
|
126
200
|
"ignored files for test_multiple_ignored_files: {:?}",
|
|
127
201
|
gitignored_files
|
|
128
202
|
);
|
|
129
203
|
// assert_eq!(gitignored_files.len(), 2);
|
|
130
|
-
assert!(gitignored_files.contains(&
|
|
131
|
-
assert!(gitignored_files.contains(&
|
|
204
|
+
assert!(gitignored_files.contains(&String::from("ignored1.txt")));
|
|
205
|
+
assert!(gitignored_files.contains(&String::from("ignored2.txt")));
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
#[test]
|
|
209
|
+
fn test_git_submodule_ignored_files() {
|
|
210
|
+
let dir = tempfile::tempdir().unwrap();
|
|
211
|
+
let submodule_path = dir.path().join("submodule");
|
|
212
|
+
std::fs::create_dir(&submodule_path).unwrap();
|
|
213
|
+
|
|
214
|
+
let o = Command::new("git")
|
|
215
|
+
.args(&["init"])
|
|
216
|
+
.current_dir(&submodule_path)
|
|
217
|
+
.output()
|
|
218
|
+
.unwrap();
|
|
219
|
+
println!("git init output: {:?}", o);
|
|
220
|
+
|
|
221
|
+
let file_path = submodule_path.join("ignored.txt");
|
|
222
|
+
let mut file = File::create(&file_path).unwrap();
|
|
223
|
+
writeln!(file, "This is an ignored file.").unwrap();
|
|
224
|
+
|
|
225
|
+
let file2 = submodule_path.join("ignored2.txt");
|
|
226
|
+
let mut file = File::create(&file2).unwrap();
|
|
227
|
+
writeln!(file, "This is another ignored file.").unwrap();
|
|
228
|
+
|
|
229
|
+
let gitignore_path = submodule_path.join(".gitignore");
|
|
230
|
+
let mut gitignore = File::create(&gitignore_path).unwrap();
|
|
231
|
+
writeln!(gitignore, "*.txt").unwrap();
|
|
232
|
+
|
|
233
|
+
let o = Command::new("git")
|
|
234
|
+
.args(&["init"])
|
|
235
|
+
.current_dir(dir.path())
|
|
236
|
+
.output()
|
|
237
|
+
.unwrap();
|
|
238
|
+
println!("git init output: {:?}", o);
|
|
239
|
+
|
|
240
|
+
// make a commit in the submodule
|
|
241
|
+
let o = Command::new("git")
|
|
242
|
+
.args(&["add", "."])
|
|
243
|
+
.current_dir(&submodule_path)
|
|
244
|
+
.output()
|
|
245
|
+
.unwrap();
|
|
246
|
+
println!("git add output: {:?}", o);
|
|
247
|
+
let o = Command::new("git")
|
|
248
|
+
.args(&["commit", "-m", "initial commit"])
|
|
249
|
+
.current_dir(&submodule_path)
|
|
250
|
+
.output()
|
|
251
|
+
.unwrap();
|
|
252
|
+
println!("git commit output: {:?}", o);
|
|
253
|
+
|
|
254
|
+
let o = Command::new("git")
|
|
255
|
+
.args(&["submodule", "add", "./submodule"])
|
|
256
|
+
.current_dir(dir.path())
|
|
257
|
+
.output()
|
|
258
|
+
.unwrap();
|
|
259
|
+
println!("git submodule add output: {:?}", o);
|
|
260
|
+
|
|
261
|
+
let gitignored_files =
|
|
262
|
+
list_ignored_files_and_directories(dir.path().to_str().unwrap(), false)
|
|
263
|
+
.unwrap();
|
|
264
|
+
println!(
|
|
265
|
+
"ignored files for test_git_submodule_ignored_files: {:?}",
|
|
266
|
+
gitignored_files
|
|
267
|
+
);
|
|
268
|
+
assert!(gitignored_files.contains(&String::from("submodule/ignored.txt")));
|
|
269
|
+
assert!(gitignored_files.contains(&String::from("submodule/ignored2.txt")));
|
|
132
270
|
}
|
|
133
271
|
|
|
134
272
|
#[test]
|
|
135
273
|
fn test_multiple_ignored_files_in_current_dir() {
|
|
136
|
-
let gitignored_files =
|
|
274
|
+
let gitignored_files =
|
|
275
|
+
list_ignored_files_and_directories(".", false).unwrap();
|
|
137
276
|
assert!(gitignored_files.len() > 1);
|
|
138
277
|
|
|
139
278
|
// print a sample of the ignored files
|
|
@@ -147,7 +286,6 @@ mod tests {
|
|
|
147
286
|
}
|
|
148
287
|
}
|
|
149
288
|
|
|
150
|
-
|
|
151
289
|
#[tokio::test]
|
|
152
290
|
async fn test_file_not_ignored() {
|
|
153
291
|
let dir = tempfile::tempdir().unwrap();
|
|
@@ -160,7 +298,10 @@ mod tests {
|
|
|
160
298
|
.current_dir(dir.path())
|
|
161
299
|
.output()
|
|
162
300
|
.unwrap();
|
|
163
|
-
let is_ignored =
|
|
301
|
+
let is_ignored =
|
|
302
|
+
is_git_ignored(dir.path().to_str().unwrap(), "not_ignored.txt")
|
|
303
|
+
.await
|
|
304
|
+
.unwrap();
|
|
164
305
|
assert_eq!(is_ignored, false);
|
|
165
306
|
}
|
|
166
307
|
|
|
@@ -180,7 +321,10 @@ mod tests {
|
|
|
180
321
|
.current_dir(dir.path())
|
|
181
322
|
.output()
|
|
182
323
|
.unwrap();
|
|
183
|
-
let is_ignored =
|
|
324
|
+
let is_ignored =
|
|
325
|
+
is_git_ignored(dir.path().to_str().unwrap(), "ignored.txt")
|
|
326
|
+
.await
|
|
327
|
+
.unwrap();
|
|
184
328
|
assert_eq!(is_ignored, true);
|
|
185
329
|
}
|
|
186
330
|
|
|
@@ -200,8 +344,10 @@ mod tests {
|
|
|
200
344
|
.current_dir(dir.path())
|
|
201
345
|
.output()
|
|
202
346
|
.unwrap();
|
|
203
|
-
let is_ignored =
|
|
347
|
+
let is_ignored =
|
|
348
|
+
is_git_ignored(dir.path().to_str().unwrap(), "ignored.txt")
|
|
349
|
+
.await
|
|
350
|
+
.unwrap();
|
|
204
351
|
assert_eq!(is_ignored, true);
|
|
205
352
|
}
|
|
206
|
-
|
|
207
353
|
}
|
package/src/lib.rs
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
#![deny(clippy::all)]
|
|
2
|
+
#![deny(unsafe_op_in_unsafe_fn)]
|
|
2
3
|
pub mod file_utils;
|
|
3
4
|
pub mod git_utils;
|
|
4
5
|
pub mod merkle_tree;
|
|
@@ -6,6 +7,10 @@ pub mod merkle_tree;
|
|
|
6
7
|
use std::vec;
|
|
7
8
|
|
|
8
9
|
use merkle_tree::{LocalConstruction, MerkleTree};
|
|
10
|
+
use tracing::{info, Level};
|
|
11
|
+
use tracing_appender::rolling::{RollingFileAppender, Rotation};
|
|
12
|
+
use tracing_subscriber::fmt;
|
|
13
|
+
use anyhow::Context;
|
|
9
14
|
|
|
10
15
|
#[macro_use]
|
|
11
16
|
extern crate napi_derive;
|
|
@@ -13,16 +18,46 @@ extern crate napi_derive;
|
|
|
13
18
|
#[napi]
|
|
14
19
|
pub struct MerkleClient {
|
|
15
20
|
tree: MerkleTree,
|
|
16
|
-
|
|
21
|
+
absolute_root_directory: String,
|
|
22
|
+
_guard: tracing_appender::non_blocking::WorkerGuard,
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
pub fn init_logger() -> tracing_appender::non_blocking::WorkerGuard {
|
|
26
|
+
let file_appender =
|
|
27
|
+
RollingFileAppender::new(Rotation::NEVER, "./", "rust_log.txt");
|
|
28
|
+
let (non_blocking, _guard) = tracing_appender::non_blocking(file_appender);
|
|
29
|
+
let subscriber = fmt::Subscriber::builder()
|
|
30
|
+
.with_max_level(Level::TRACE)
|
|
31
|
+
.with_writer(non_blocking)
|
|
32
|
+
.with_ansi(false)
|
|
33
|
+
.with_line_number(true)
|
|
34
|
+
.finish();
|
|
35
|
+
|
|
36
|
+
let _ = tracing::subscriber::set_global_default(subscriber);
|
|
37
|
+
|
|
38
|
+
_guard
|
|
17
39
|
}
|
|
18
40
|
|
|
19
41
|
#[napi]
|
|
20
42
|
impl MerkleClient {
|
|
21
43
|
#[napi(constructor)]
|
|
22
|
-
pub fn new(
|
|
44
|
+
pub fn new(absolute_root_directory: String) -> MerkleClient {
|
|
45
|
+
let _guard = init_logger();
|
|
46
|
+
|
|
47
|
+
// let canonical_root_directory = std::path::Path::new(&absolute_root_directory);
|
|
48
|
+
// use dunce::canonicalize;
|
|
49
|
+
// let canonical_root_directory = match dunce::canonicalize(&canonical_root_directory) {
|
|
50
|
+
// Ok(path) => path.to_str().unwrap_or(&absolute_root_directory).to_string().to_lowercase(),
|
|
51
|
+
// Err(e) => {
|
|
52
|
+
// info!("Error in canonicalizing path: path: {:?}, error {:?}", canonical_root_directory, e);
|
|
53
|
+
// absolute_root_directory
|
|
54
|
+
// }
|
|
55
|
+
// };
|
|
56
|
+
|
|
23
57
|
MerkleClient {
|
|
24
58
|
tree: MerkleTree::empty_tree(),
|
|
25
|
-
|
|
59
|
+
absolute_root_directory,
|
|
60
|
+
_guard,
|
|
26
61
|
}
|
|
27
62
|
}
|
|
28
63
|
|
|
@@ -31,7 +66,10 @@ impl MerkleClient {
|
|
|
31
66
|
// 1. compute the merkle tree
|
|
32
67
|
// 2. update the backend
|
|
33
68
|
// 3. sync with the remote
|
|
34
|
-
|
|
69
|
+
info!("Merkle tree compute started!");
|
|
70
|
+
unsafe {
|
|
71
|
+
self.compute_merkle_tree().await?;
|
|
72
|
+
}
|
|
35
73
|
|
|
36
74
|
Ok(())
|
|
37
75
|
}
|
|
@@ -40,12 +78,12 @@ impl MerkleClient {
|
|
|
40
78
|
unimplemented!("Interrupt is not implemented yet");
|
|
41
79
|
}
|
|
42
80
|
|
|
43
|
-
|
|
81
|
+
#[napi]
|
|
44
82
|
pub async unsafe fn compute_merkle_tree(
|
|
45
83
|
&mut self,
|
|
46
84
|
) -> Result<(), napi::Error> {
|
|
47
85
|
let t =
|
|
48
|
-
MerkleTree::construct_merkle_tree(self.
|
|
86
|
+
MerkleTree::construct_merkle_tree(self.absolute_root_directory.clone()).await;
|
|
49
87
|
|
|
50
88
|
match t {
|
|
51
89
|
Ok(tree) => {
|
|
@@ -72,15 +110,37 @@ impl MerkleClient {
|
|
|
72
110
|
#[napi]
|
|
73
111
|
pub async fn get_subtree_hash(
|
|
74
112
|
&self,
|
|
75
|
-
|
|
113
|
+
relative_path: String,
|
|
76
114
|
) -> Result<String, napi::Error> {
|
|
77
|
-
|
|
115
|
+
|
|
116
|
+
let relative_path_without_leading_slash = match relative_path.strip_prefix('.') {
|
|
117
|
+
Some(path) => path.strip_prefix(std::path::MAIN_SEPARATOR).unwrap_or(""),
|
|
118
|
+
None => relative_path.as_str(),
|
|
119
|
+
};
|
|
120
|
+
|
|
121
|
+
let absolute_path = if !relative_path_without_leading_slash.is_empty() {
|
|
122
|
+
std::path::Path::new(&self.absolute_root_directory).join(relative_path_without_leading_slash)
|
|
123
|
+
} else {
|
|
124
|
+
std::path::Path::new(&self.absolute_root_directory).to_path_buf()
|
|
125
|
+
};
|
|
126
|
+
|
|
127
|
+
let absolute_path_string = match absolute_path.to_str() {
|
|
128
|
+
Some(path) => path.to_string(),
|
|
129
|
+
None => {
|
|
130
|
+
return Err(napi::Error::new(
|
|
131
|
+
napi::Status::Unknown,
|
|
132
|
+
format!("some string error")
|
|
133
|
+
))
|
|
134
|
+
}
|
|
135
|
+
};
|
|
136
|
+
|
|
137
|
+
let hash = self.tree.get_subtree_hash(absolute_path_string.as_str()).await;
|
|
78
138
|
|
|
79
139
|
match hash {
|
|
80
140
|
Ok(hash) => Ok(hash),
|
|
81
141
|
Err(e) => Err(napi::Error::new(
|
|
82
142
|
napi::Status::Unknown,
|
|
83
|
-
format!("Error in get_subtree_hash: {:?}", e)
|
|
143
|
+
format!("Error in get_subtree_hash. \nRelative path: {:?}, \nAbsolute path: {:?}, \nRoot directory: {:?}\nError: {:?}", &relative_path, absolute_path, self.absolute_root_directory, e)
|
|
84
144
|
)),
|
|
85
145
|
}
|
|
86
146
|
}
|
|
@@ -93,7 +153,29 @@ impl MerkleClient {
|
|
|
93
153
|
Ok(num) => Ok(num),
|
|
94
154
|
Err(e) => Err(napi::Error::new(
|
|
95
155
|
napi::Status::Unknown,
|
|
96
|
-
format!("Error in get_num_embeddable_files: {:?}", e)
|
|
156
|
+
format!("Error in get_num_embeddable_files: {:?}", e)
|
|
157
|
+
)),
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
pub async fn get_num_embeddable_files_in_subtree(
|
|
162
|
+
&self,
|
|
163
|
+
relative_path: String,
|
|
164
|
+
) -> Result<i32, napi::Error> {
|
|
165
|
+
let absolute_path = std::path::Path::new(&self.absolute_root_directory)
|
|
166
|
+
.join(relative_path)
|
|
167
|
+
.canonicalize()?;
|
|
168
|
+
|
|
169
|
+
let num = self
|
|
170
|
+
.tree
|
|
171
|
+
.get_num_embeddable_files_in_subtree(absolute_path)
|
|
172
|
+
.await;
|
|
173
|
+
|
|
174
|
+
match num {
|
|
175
|
+
Ok(num) => Ok(num),
|
|
176
|
+
Err(e) => Err(napi::Error::new(
|
|
177
|
+
napi::Status::Unknown,
|
|
178
|
+
format!("Error in get_num_embeddable_files_in_subtree: {:?}", e)
|
|
97
179
|
)),
|
|
98
180
|
}
|
|
99
181
|
}
|
|
@@ -106,7 +188,29 @@ impl MerkleClient {
|
|
|
106
188
|
Ok(files) => Ok(files),
|
|
107
189
|
Err(e) => Err(napi::Error::new(
|
|
108
190
|
napi::Status::Unknown,
|
|
109
|
-
format!("Error in get_all_files: {:?}", e)
|
|
191
|
+
format!("Error in get_all_files: {:?}", e)
|
|
192
|
+
)),
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
#[napi]
|
|
197
|
+
pub async fn get_all_dir_files_to_embed(
|
|
198
|
+
&self,
|
|
199
|
+
absolute_file_path: String,
|
|
200
|
+
) -> Result<Vec<String>, napi::Error> {
|
|
201
|
+
// let absolute_path = absolute_file_path.to_lowercase();
|
|
202
|
+
// let absolute_path_str = absolute_path.as_str();
|
|
203
|
+
|
|
204
|
+
let files = self
|
|
205
|
+
.tree
|
|
206
|
+
.get_all_dir_files_to_embed(absolute_file_path.as_str())
|
|
207
|
+
.await;
|
|
208
|
+
|
|
209
|
+
match files {
|
|
210
|
+
Ok(files) => Ok(files),
|
|
211
|
+
Err(e) => Err(napi::Error::new(
|
|
212
|
+
napi::Status::Unknown,
|
|
213
|
+
format!("Error in get_all_dir_files_to_embed: {:?}", e),
|
|
110
214
|
)),
|
|
111
215
|
}
|
|
112
216
|
}
|
|
@@ -125,7 +229,6 @@ impl MerkleClient {
|
|
|
125
229
|
|
|
126
230
|
let ret = vec![file];
|
|
127
231
|
let ret = ret.into_iter().chain(path.into_iter()).collect::<Vec<_>>();
|
|
128
|
-
|
|
129
232
|
Ok(ret)
|
|
130
233
|
}
|
|
131
234
|
Err(e) => Err(napi::Error::new(
|
|
@@ -135,6 +238,25 @@ impl MerkleClient {
|
|
|
135
238
|
}
|
|
136
239
|
}
|
|
137
240
|
|
|
241
|
+
// FIXME(sualeh): get_spline
|
|
242
|
+
#[napi]
|
|
243
|
+
pub async fn get_spline(
|
|
244
|
+
&self,
|
|
245
|
+
absolute_file_path: String,
|
|
246
|
+
) -> Result<Vec<String>, napi::Error> {
|
|
247
|
+
// let absolute_path = absolute_file_path.to_lowercase();
|
|
248
|
+
// let absolute_path_str = absolute_path.as_str();
|
|
249
|
+
let spline = self.tree.get_spline(absolute_file_path.as_str()).await;
|
|
250
|
+
|
|
251
|
+
match spline {
|
|
252
|
+
Ok(spline) => Ok(spline),
|
|
253
|
+
Err(e) => Err(napi::Error::new(
|
|
254
|
+
napi::Status::Unknown,
|
|
255
|
+
format!("Error in get_spline: {:?}", e),
|
|
256
|
+
)),
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
|
|
138
260
|
#[napi]
|
|
139
261
|
pub async fn get_hashes_for_files(
|
|
140
262
|
&self,
|
|
@@ -151,8 +273,8 @@ impl MerkleClient {
|
|
|
151
273
|
}
|
|
152
274
|
}
|
|
153
275
|
|
|
154
|
-
|
|
276
|
+
#[napi]
|
|
155
277
|
pub fn update_root_directory(&mut self, root_directory: String) {
|
|
156
|
-
self.
|
|
278
|
+
self.absolute_root_directory = root_directory;
|
|
157
279
|
}
|
|
158
280
|
}
|
|
@@ -1,10 +1,11 @@
|
|
|
1
|
+
use crate::git_utils;
|
|
1
2
|
use crate::merkle_tree::{
|
|
2
3
|
File, MerkleNode, MerkleNodePtr, NodeType, PinnedFuture,
|
|
3
4
|
};
|
|
4
5
|
|
|
5
6
|
use super::{LocalConstruction, MerkleTree};
|
|
6
|
-
use std::
|
|
7
|
-
use std::
|
|
7
|
+
use std::collections::{BTreeMap, HashSet};
|
|
8
|
+
use std::path::{Path, PathBuf};
|
|
8
9
|
use tonic::async_trait;
|
|
9
10
|
|
|
10
11
|
#[async_trait]
|
|
@@ -28,32 +29,50 @@ impl LocalConstruction for MerkleTree {
|
|
|
28
29
|
/// 3. construct merkle tree
|
|
29
30
|
/// 4. return merkle tree
|
|
30
31
|
async fn construct_merkle_tree(
|
|
31
|
-
|
|
32
|
+
absolute_path_to_root_directory: String,
|
|
32
33
|
) -> Result<MerkleTree, anyhow::Error> {
|
|
33
|
-
let path = PathBuf::from(
|
|
34
|
+
let path = PathBuf::from(absolute_path_to_root_directory.clone());
|
|
34
35
|
if !path.exists() {
|
|
35
36
|
// FIXME: we should report this via a good logger.
|
|
36
37
|
panic!("Root directory does not exist!");
|
|
37
38
|
}
|
|
38
39
|
|
|
39
|
-
|
|
40
|
+
// 1. get all the gitignored files
|
|
41
|
+
let git_ignored_files_and_dirs =
|
|
42
|
+
match git_utils::list_ignored_files_and_directories(
|
|
43
|
+
absolute_path_to_root_directory.as_str(),
|
|
44
|
+
true,
|
|
45
|
+
) {
|
|
46
|
+
Ok(git_ignored) => git_ignored,
|
|
47
|
+
Err(_e) => HashSet::new(),
|
|
48
|
+
};
|
|
49
|
+
|
|
50
|
+
let root_node = MerkleNode::new(
|
|
51
|
+
path,
|
|
52
|
+
None,
|
|
53
|
+
&git_ignored_files_and_dirs,
|
|
54
|
+
absolute_path_to_root_directory.as_str(),
|
|
55
|
+
)
|
|
56
|
+
.await;
|
|
40
57
|
let mut mt = MerkleTree {
|
|
41
58
|
root: root_node,
|
|
42
|
-
files:
|
|
43
|
-
root_path:
|
|
59
|
+
files: BTreeMap::new(),
|
|
60
|
+
root_path: absolute_path_to_root_directory,
|
|
44
61
|
cursor: None,
|
|
62
|
+
git_ignored_files_and_dirs: git_ignored_files_and_dirs,
|
|
45
63
|
};
|
|
46
64
|
|
|
47
65
|
// we now iterate over all the nodes and add them to the hashmap
|
|
48
66
|
// TODO(later): i can make this parallel.
|
|
49
67
|
fn add_nodes_to_hashmap<'a>(
|
|
50
68
|
node: &'a MerkleNodePtr,
|
|
51
|
-
files: &'a mut
|
|
69
|
+
files: &'a mut BTreeMap<String, File>,
|
|
52
70
|
) -> PinnedFuture<'a, ()> {
|
|
53
71
|
Box::pin(async move {
|
|
54
72
|
let node_reader = node.read().await;
|
|
55
73
|
match &node_reader.node_type {
|
|
56
74
|
NodeType::Branch(n) => {
|
|
75
|
+
tracing::info!("Branch: {:?}", n.0);
|
|
57
76
|
let children = &n.1;
|
|
58
77
|
files.insert(n.0.clone(), File { node: node.clone() });
|
|
59
78
|
for child in children {
|
|
@@ -62,6 +81,13 @@ impl LocalConstruction for MerkleTree {
|
|
|
62
81
|
}
|
|
63
82
|
NodeType::File(file_name) => {
|
|
64
83
|
let f = File { node: node.clone() };
|
|
84
|
+
|
|
85
|
+
// i dont reallly like this :(((
|
|
86
|
+
// let canonical_file_name = match dunce::canonicalize(file_name) {
|
|
87
|
+
// Ok(path) => path.to_str().unwrap_or(file_name).to_string(),
|
|
88
|
+
// Err(_) => file_name.clone(),
|
|
89
|
+
// };
|
|
90
|
+
|
|
65
91
|
files.insert(file_name.clone(), f);
|
|
66
92
|
}
|
|
67
93
|
NodeType::ErrorNode(_) => {
|
|
@@ -73,6 +99,9 @@ impl LocalConstruction for MerkleTree {
|
|
|
73
99
|
|
|
74
100
|
add_nodes_to_hashmap(&mt.root, &mut mt.files).await;
|
|
75
101
|
|
|
102
|
+
tracing::info!("Merkle tree compute finished!");
|
|
103
|
+
tracing::info!("Merkle tree: {}", mt);
|
|
104
|
+
|
|
76
105
|
Ok(mt)
|
|
77
106
|
}
|
|
78
107
|
|
package/src/merkle_tree/mod.rs
CHANGED
|
@@ -1,9 +1,14 @@
|
|
|
1
|
+
use crate::git_utils;
|
|
2
|
+
|
|
1
3
|
use super::file_utils;
|
|
2
4
|
use sha2::Digest;
|
|
5
|
+
use std::collections::{BTreeMap, HashSet};
|
|
3
6
|
use std::path::PathBuf;
|
|
4
|
-
use std::
|
|
7
|
+
use std::vec;
|
|
8
|
+
use std::{fs, path::Path, sync::Arc};
|
|
5
9
|
use tokio::sync::RwLock;
|
|
6
10
|
use tonic::async_trait;
|
|
11
|
+
use tracing::info;
|
|
7
12
|
pub mod local_construction;
|
|
8
13
|
pub mod test;
|
|
9
14
|
|
|
@@ -12,8 +17,9 @@ pub type MerkleNodePtr = Arc<RwLock<MerkleNode>>;
|
|
|
12
17
|
pub struct MerkleTree {
|
|
13
18
|
root_path: String,
|
|
14
19
|
root: MerkleNodePtr,
|
|
15
|
-
files:
|
|
16
|
-
cursor: Option<
|
|
20
|
+
files: BTreeMap<String, File>,
|
|
21
|
+
cursor: Option<usize>,
|
|
22
|
+
git_ignored_files_and_dirs: HashSet<String>,
|
|
17
23
|
}
|
|
18
24
|
|
|
19
25
|
#[derive(Debug)]
|
|
@@ -87,27 +93,37 @@ impl MerkleTree {
|
|
|
87
93
|
pub fn empty_tree() -> MerkleTree {
|
|
88
94
|
MerkleTree {
|
|
89
95
|
root: Arc::new(RwLock::new(MerkleNode::empty_node(None, None))),
|
|
90
|
-
files:
|
|
96
|
+
files: BTreeMap::new(),
|
|
91
97
|
root_path: "".to_string(),
|
|
92
98
|
cursor: None,
|
|
99
|
+
git_ignored_files_and_dirs: HashSet::new(),
|
|
93
100
|
}
|
|
94
101
|
}
|
|
95
102
|
|
|
96
103
|
pub async fn get_subtree_hash(
|
|
97
104
|
&self,
|
|
98
|
-
|
|
105
|
+
absolute_path: &str,
|
|
99
106
|
) -> Result<String, anyhow::Error> {
|
|
100
|
-
let
|
|
101
|
-
let node = match self.files.get(path.to_str().unwrap()) {
|
|
107
|
+
let node = match self.files.get(absolute_path) {
|
|
102
108
|
Some(file) => file.node.clone(),
|
|
103
109
|
None => {
|
|
104
|
-
|
|
110
|
+
let all_files: Vec<String> = self.files.keys().cloned().collect();
|
|
111
|
+
return Err(anyhow::anyhow!(
|
|
112
|
+
"Could not find file in tree! Looking for: {}. All files: {:?}",
|
|
113
|
+
absolute_path,
|
|
114
|
+
all_files
|
|
115
|
+
));
|
|
105
116
|
}
|
|
106
117
|
};
|
|
107
118
|
|
|
108
119
|
let node_reader = node.read().await;
|
|
109
120
|
let node_hash = node_reader.hash.clone();
|
|
110
121
|
|
|
122
|
+
info!(
|
|
123
|
+
"get_subtree_hash for path: {}, node_hash: {}",
|
|
124
|
+
absolute_path, node_hash
|
|
125
|
+
);
|
|
126
|
+
|
|
111
127
|
Ok(node_hash)
|
|
112
128
|
}
|
|
113
129
|
|
|
@@ -132,6 +148,43 @@ impl MerkleTree {
|
|
|
132
148
|
Ok(count)
|
|
133
149
|
}
|
|
134
150
|
|
|
151
|
+
pub async fn get_num_embeddable_files_in_subtree(
|
|
152
|
+
&self,
|
|
153
|
+
absolute_path: PathBuf,
|
|
154
|
+
) -> Result<i32, anyhow::Error> {
|
|
155
|
+
let mut count = 0;
|
|
156
|
+
|
|
157
|
+
let absolute_path = match absolute_path.to_str() {
|
|
158
|
+
Some(s) => s.to_string(),
|
|
159
|
+
None => {
|
|
160
|
+
return Err(anyhow::anyhow!(
|
|
161
|
+
"get_num_embeddable_files_in_subtree: Failed to convert path to string"
|
|
162
|
+
))
|
|
163
|
+
}
|
|
164
|
+
};
|
|
165
|
+
|
|
166
|
+
// TODO(sualeh): worth keeping this list sorted. its now a btree
|
|
167
|
+
|
|
168
|
+
for (_, file) in &self.files {
|
|
169
|
+
let file_reader = file.node.read().await;
|
|
170
|
+
match &file_reader.node_type {
|
|
171
|
+
NodeType::File(file_name) => {
|
|
172
|
+
if file_name.contains(&absolute_path) {
|
|
173
|
+
count += 1;
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
NodeType::Branch(_) => {
|
|
177
|
+
continue;
|
|
178
|
+
}
|
|
179
|
+
NodeType::ErrorNode(_) => {
|
|
180
|
+
continue;
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
Ok(count)
|
|
186
|
+
}
|
|
187
|
+
|
|
135
188
|
pub async fn get_all_files(&self) -> Result<Vec<String>, anyhow::Error> {
|
|
136
189
|
let mut files = Vec::new();
|
|
137
190
|
|
|
@@ -188,83 +241,119 @@ impl MerkleTree {
|
|
|
188
241
|
pub async fn get_next_file_to_embed(
|
|
189
242
|
&mut self,
|
|
190
243
|
) -> Result<(String, Vec<String>), anyhow::Error> {
|
|
191
|
-
// the
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
}
|
|
200
|
-
|
|
201
|
-
// if the path is not empty, we can iterate till we find the first child.
|
|
202
|
-
let mut potential_first_child = self.root.clone();
|
|
203
|
-
let mut is_branch = true;
|
|
204
|
-
let mut path = Vec::new();
|
|
205
|
-
|
|
206
|
-
while is_branch {
|
|
207
|
-
let node = {
|
|
208
|
-
let potential_first_child_reader = potential_first_child.read().await;
|
|
209
|
-
match &potential_first_child_reader.node_type {
|
|
210
|
-
NodeType::Branch(branch) => branch.clone(),
|
|
211
|
-
NodeType::File(_) => {
|
|
212
|
-
return Err(anyhow::anyhow!(
|
|
213
|
-
"get_next_file_to_embed: This should not happen! the branch happened to be file."
|
|
214
|
-
));
|
|
215
|
-
}
|
|
216
|
-
NodeType::ErrorNode(_) => {
|
|
217
|
-
return Err(anyhow::anyhow!("Cursor is an error node!"));
|
|
218
|
-
}
|
|
219
|
-
}
|
|
220
|
-
};
|
|
244
|
+
// if the cursor is none, set it to 0
|
|
245
|
+
let cursor = match self.cursor {
|
|
246
|
+
Some(cursor) => cursor,
|
|
247
|
+
None => {
|
|
248
|
+
self.cursor = Some(0);
|
|
249
|
+
0
|
|
250
|
+
}
|
|
251
|
+
};
|
|
221
252
|
|
|
222
|
-
|
|
223
|
-
|
|
253
|
+
// get the thing at the cursor. while we dont find a file, we keep incrementing the cursor.
|
|
254
|
+
let mut cursor = cursor;
|
|
255
|
+
loop {
|
|
256
|
+
// O(log n)
|
|
257
|
+
let file = match self.files.values().nth(cursor) {
|
|
258
|
+
Some(file) => file,
|
|
259
|
+
None => {
|
|
260
|
+
return Err(anyhow::anyhow!("Could not find file to embed!"));
|
|
261
|
+
}
|
|
262
|
+
};
|
|
224
263
|
|
|
225
|
-
|
|
226
|
-
|
|
264
|
+
let file_reader = file.node.read().await;
|
|
265
|
+
match &file_reader.node_type {
|
|
266
|
+
NodeType::File(f) => {
|
|
267
|
+
// update the cursor.
|
|
268
|
+
self.cursor = Some(cursor + 1);
|
|
269
|
+
let spline = self.get_spline(f).await?;
|
|
270
|
+
return Ok((f.clone(), spline));
|
|
271
|
+
}
|
|
272
|
+
NodeType::Branch(_) => {
|
|
273
|
+
cursor += 1;
|
|
274
|
+
continue;
|
|
275
|
+
}
|
|
276
|
+
NodeType::ErrorNode(_) => {
|
|
277
|
+
cursor += 1;
|
|
278
|
+
continue;
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
}
|
|
227
283
|
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
284
|
+
pub async fn get_all_dir_files_to_embed(
|
|
285
|
+
&self,
|
|
286
|
+
absolute_path: &str,
|
|
287
|
+
) -> Result<Vec<String>, anyhow::Error> {
|
|
288
|
+
let mut files = Vec::new();
|
|
232
289
|
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
is_branch = true;
|
|
290
|
+
for (file_path, f) in &self.files {
|
|
291
|
+
if !file_path.contains(absolute_path) {
|
|
292
|
+
continue;
|
|
293
|
+
}
|
|
238
294
|
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
return Err(anyhow::anyhow!("Root has no children!"));
|
|
295
|
+
match f.node.read().await.node_type {
|
|
296
|
+
NodeType::File(_) => {
|
|
297
|
+
files.push(file_path.clone());
|
|
298
|
+
}
|
|
299
|
+
NodeType::Branch(_) => {
|
|
300
|
+
continue;
|
|
301
|
+
}
|
|
302
|
+
NodeType::ErrorNode(_) => {
|
|
303
|
+
continue;
|
|
249
304
|
}
|
|
250
305
|
}
|
|
251
306
|
}
|
|
252
307
|
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
// UNWRAP checked and fine. see the none case above.
|
|
257
|
-
let cursor_name = self.cursor.as_ref().unwrap();
|
|
258
|
-
let cursor_reader = cursor_name.read().await;
|
|
308
|
+
Ok(files)
|
|
309
|
+
}
|
|
259
310
|
|
|
260
|
-
|
|
311
|
+
// TODO(sualeh): i need tests for this!!
|
|
312
|
+
pub async fn get_spline(
|
|
313
|
+
&self,
|
|
314
|
+
absolute_path: &str,
|
|
315
|
+
) -> Result<Vec<String>, anyhow::Error> {
|
|
316
|
+
info!("get_spline called with absolute_path: {}", absolute_path);
|
|
317
|
+
let mut files = Vec::new();
|
|
261
318
|
|
|
262
|
-
|
|
319
|
+
let current_node = match self.files.get(absolute_path) {
|
|
320
|
+
Some(node) => {
|
|
321
|
+
info!("Found node for absolute_path: {}", absolute_path);
|
|
322
|
+
node.node.clone()
|
|
323
|
+
}
|
|
324
|
+
None => {
|
|
325
|
+
info!("File not found for absolute_path: {}", absolute_path);
|
|
326
|
+
return Err(anyhow::anyhow!("File not found: {}", absolute_path));
|
|
327
|
+
}
|
|
328
|
+
};
|
|
263
329
|
|
|
264
|
-
|
|
265
|
-
|
|
330
|
+
let mut stack = Vec::new();
|
|
331
|
+
stack.push(current_node);
|
|
332
|
+
|
|
333
|
+
while let Some(node) = stack.pop() {
|
|
334
|
+
let parent = node.read().await.parent.clone();
|
|
335
|
+
if let Some(parent) = parent {
|
|
336
|
+
info!("Adding parent hash to files vector");
|
|
337
|
+
{
|
|
338
|
+
let parent_node = parent.read().await;
|
|
339
|
+
match &parent_node.node_type {
|
|
340
|
+
NodeType::File(file_name) => {
|
|
341
|
+
files.push(file_name.clone());
|
|
342
|
+
}
|
|
343
|
+
NodeType::Branch((branch_name, _)) => {
|
|
344
|
+
files.push(branch_name.clone());
|
|
345
|
+
}
|
|
346
|
+
_ => {
|
|
347
|
+
continue;
|
|
348
|
+
}
|
|
349
|
+
}
|
|
350
|
+
}
|
|
266
351
|
|
|
267
|
-
|
|
352
|
+
stack.push(parent);
|
|
353
|
+
}
|
|
354
|
+
}
|
|
355
|
+
info!("Returning files vector with {} elements", files.len());
|
|
356
|
+
Ok(files)
|
|
268
357
|
}
|
|
269
358
|
|
|
270
359
|
/// creates a new node and attaches it to the current tree.
|
|
@@ -302,12 +391,18 @@ impl MerkleTree {
|
|
|
302
391
|
// 1. the path is empty. this means that the ancestor is the root.
|
|
303
392
|
// 2. the path is non-empty. that means there exist a non-empty element btwn till the root.
|
|
304
393
|
|
|
394
|
+
let absolute_root_path = self.root_path.clone();
|
|
305
395
|
let new_node = match path.len() {
|
|
306
396
|
0 => {
|
|
307
397
|
// this means that the ancestor is the root.
|
|
308
398
|
// we need to create a new node and attach it to the ancestor.
|
|
309
|
-
let new_node =
|
|
310
|
-
|
|
399
|
+
let new_node = MerkleNode::new(
|
|
400
|
+
file_path.clone(),
|
|
401
|
+
Some(ancestor.clone()),
|
|
402
|
+
&self.git_ignored_files_and_dirs,
|
|
403
|
+
&absolute_root_path.as_str(),
|
|
404
|
+
)
|
|
405
|
+
.await;
|
|
311
406
|
ancestor.write().await.attach_child(new_node.clone()).await;
|
|
312
407
|
new_node
|
|
313
408
|
}
|
|
@@ -318,9 +413,13 @@ impl MerkleTree {
|
|
|
318
413
|
// UNSURE: not sure this is the correct thing to do but it is the fastest.
|
|
319
414
|
// get the last thing that is not in the tree.
|
|
320
415
|
let first_child_path = path.last().unwrap();
|
|
321
|
-
let first_child =
|
|
322
|
-
|
|
323
|
-
|
|
416
|
+
let first_child = MerkleNode::new(
|
|
417
|
+
first_child_path.clone(),
|
|
418
|
+
Some(ancestor.clone()),
|
|
419
|
+
&self.git_ignored_files_and_dirs,
|
|
420
|
+
&absolute_root_path.as_str(),
|
|
421
|
+
)
|
|
422
|
+
.await;
|
|
324
423
|
|
|
325
424
|
// TODO(sualeh): we should do an assertion check that the entire vec is contained here.
|
|
326
425
|
|
|
@@ -597,18 +696,62 @@ use std::future::Future;
|
|
|
597
696
|
use std::pin::Pin;
|
|
598
697
|
|
|
599
698
|
type PinnedFuture<'a, T> = Pin<Box<dyn Future<Output = T> + Send + 'a>>;
|
|
699
|
+
type IgnoredFiles = HashSet<String>;
|
|
600
700
|
|
|
601
701
|
impl MerkleNode {
|
|
602
702
|
/// please be careful using this.
|
|
603
703
|
async fn __new_unchecked(
|
|
604
704
|
file_or_directory: String,
|
|
605
705
|
parent: ParentPtr,
|
|
706
|
+
ignored_files: &IgnoredFiles,
|
|
707
|
+
absolute_root_path: &str,
|
|
606
708
|
) -> MerkleNodePtr {
|
|
607
|
-
|
|
709
|
+
// check if the root is a git directory.
|
|
710
|
+
let is_git_repo =
|
|
711
|
+
match git_utils::is_git_directory(absolute_root_path).await {
|
|
712
|
+
Ok(is_git_repo) => is_git_repo,
|
|
713
|
+
Err(e) => false,
|
|
714
|
+
};
|
|
715
|
+
let bypass_git = !is_git_repo;
|
|
716
|
+
|
|
717
|
+
MerkleNode::construct_node(
|
|
718
|
+
Path::new(&file_or_directory),
|
|
719
|
+
parent,
|
|
720
|
+
ignored_files,
|
|
721
|
+
absolute_root_path,
|
|
722
|
+
bypass_git,
|
|
723
|
+
)
|
|
724
|
+
.await
|
|
608
725
|
}
|
|
609
726
|
|
|
610
|
-
async fn new(
|
|
611
|
-
|
|
727
|
+
async fn new(
|
|
728
|
+
absolute_file_or_directory: PathBuf,
|
|
729
|
+
parent: ParentPtr,
|
|
730
|
+
ignored_files: &IgnoredFiles,
|
|
731
|
+
absolute_root_path: &str,
|
|
732
|
+
) -> MerkleNodePtr {
|
|
733
|
+
// check if the root is a git directory.
|
|
734
|
+
let is_git_repo =
|
|
735
|
+
match git_utils::is_git_directory(absolute_root_path).await {
|
|
736
|
+
Ok(is_git_repo) => is_git_repo,
|
|
737
|
+
Err(_e) => false,
|
|
738
|
+
};
|
|
739
|
+
let bypass_git = !is_git_repo;
|
|
740
|
+
|
|
741
|
+
info!(
|
|
742
|
+
"constructing node for absolute_file_or_directory: {:?}",
|
|
743
|
+
absolute_file_or_directory
|
|
744
|
+
);
|
|
745
|
+
info!("bypass_git: {}, is_git_repo: {}", bypass_git, is_git_repo);
|
|
746
|
+
|
|
747
|
+
MerkleNode::construct_node(
|
|
748
|
+
Path::new(&absolute_file_or_directory),
|
|
749
|
+
parent,
|
|
750
|
+
ignored_files,
|
|
751
|
+
absolute_root_path,
|
|
752
|
+
bypass_git,
|
|
753
|
+
)
|
|
754
|
+
.await
|
|
612
755
|
}
|
|
613
756
|
|
|
614
757
|
/// NOT added to the tree by default.
|
|
@@ -619,38 +762,51 @@ impl MerkleNode {
|
|
|
619
762
|
// let file_hash = self.files.get_mut(&file_path).unwrap();
|
|
620
763
|
|
|
621
764
|
fn construct_node<'a>(
|
|
622
|
-
|
|
765
|
+
absolute_file_or_directory: &'a Path,
|
|
623
766
|
parent: ParentPtr,
|
|
767
|
+
ignored_files: &'a IgnoredFiles,
|
|
768
|
+
absolute_root_path: &'a str,
|
|
769
|
+
bypass_git: bool,
|
|
624
770
|
) -> PinnedFuture<'a, MerkleNodePtr> {
|
|
625
771
|
Box::pin(async move {
|
|
626
772
|
// check if it is a file
|
|
627
|
-
let path_str =
|
|
628
|
-
if
|
|
773
|
+
let path_str = absolute_file_or_directory.to_str().unwrap().to_string();
|
|
774
|
+
if absolute_file_or_directory.is_file() {
|
|
629
775
|
return Arc::new(RwLock::new(
|
|
630
776
|
MerkleNode::construct_file_node_or_error_node(
|
|
631
|
-
|
|
777
|
+
absolute_file_or_directory,
|
|
632
778
|
parent,
|
|
779
|
+
ignored_files,
|
|
633
780
|
)
|
|
634
781
|
.await,
|
|
635
782
|
));
|
|
636
783
|
}
|
|
637
784
|
|
|
638
785
|
// check if the directory fails the bad dir test.
|
|
639
|
-
let is_bad_dir = file_utils::is_in_bad_dir(
|
|
786
|
+
let is_bad_dir = file_utils::is_in_bad_dir(absolute_file_or_directory);
|
|
640
787
|
if is_bad_dir.is_err() || is_bad_dir.unwrap_or(false) {
|
|
641
788
|
// println!("skipping directory: {}", path_str);
|
|
642
789
|
return Arc::new(RwLock::new(MerkleNode::empty_node(
|
|
643
|
-
Some(
|
|
790
|
+
Some(absolute_file_or_directory),
|
|
644
791
|
Some("Directory is in bad dir!".to_string()),
|
|
645
792
|
)));
|
|
646
793
|
}
|
|
647
794
|
|
|
648
|
-
let
|
|
795
|
+
let is_git_ignored_dir = ignored_files.contains(&path_str);
|
|
796
|
+
|
|
797
|
+
if is_git_ignored_dir && !bypass_git {
|
|
798
|
+
return Arc::new(RwLock::new(MerkleNode::empty_node(
|
|
799
|
+
Some(absolute_file_or_directory),
|
|
800
|
+
Some("Directory is git ignored!".to_string()),
|
|
801
|
+
)));
|
|
802
|
+
}
|
|
803
|
+
|
|
804
|
+
let entries = fs::read_dir(absolute_file_or_directory);
|
|
649
805
|
match entries {
|
|
650
806
|
Ok(_) => (),
|
|
651
807
|
Err(e) => {
|
|
652
808
|
return Arc::new(RwLock::new(MerkleNode::empty_node(
|
|
653
|
-
Some(
|
|
809
|
+
Some(absolute_file_or_directory),
|
|
654
810
|
Some(e.to_string()),
|
|
655
811
|
)));
|
|
656
812
|
}
|
|
@@ -670,13 +826,19 @@ impl MerkleNode {
|
|
|
670
826
|
match entry {
|
|
671
827
|
Ok(entry) => {
|
|
672
828
|
children.push(
|
|
673
|
-
MerkleNode::construct_node(
|
|
674
|
-
.
|
|
829
|
+
MerkleNode::construct_node(
|
|
830
|
+
&entry.path(),
|
|
831
|
+
Some(node.clone()),
|
|
832
|
+
ignored_files,
|
|
833
|
+
absolute_root_path,
|
|
834
|
+
bypass_git,
|
|
835
|
+
)
|
|
836
|
+
.await,
|
|
675
837
|
);
|
|
676
838
|
}
|
|
677
839
|
Err(e) => {
|
|
678
840
|
children.push(Arc::new(RwLock::new(MerkleNode::empty_node(
|
|
679
|
-
Some(
|
|
841
|
+
Some(absolute_file_or_directory),
|
|
680
842
|
Some(e.to_string()),
|
|
681
843
|
))));
|
|
682
844
|
}
|
|
@@ -696,23 +858,33 @@ impl MerkleNode {
|
|
|
696
858
|
}
|
|
697
859
|
|
|
698
860
|
async fn construct_file_node(
|
|
699
|
-
|
|
861
|
+
absolute_file_path: &Path,
|
|
700
862
|
parent: ParentPtr,
|
|
863
|
+
ignored_files: &IgnoredFiles,
|
|
701
864
|
) -> Result<MerkleNode, String> {
|
|
702
|
-
let file_str =
|
|
865
|
+
let file_str = absolute_file_path
|
|
703
866
|
.to_str()
|
|
704
867
|
.ok_or("Could not convert file path to string!")?
|
|
705
868
|
.to_string();
|
|
706
869
|
// first see if it passes the
|
|
707
|
-
match file_utils::is_good_file(
|
|
870
|
+
match file_utils::is_good_file(absolute_file_path) {
|
|
708
871
|
Ok(_) => {}
|
|
709
872
|
Err(e) => {
|
|
710
873
|
return Err(format!("File failed runtime checks! {}", e.to_string()));
|
|
711
874
|
}
|
|
712
875
|
}
|
|
713
876
|
|
|
877
|
+
// check if the file is in the git ignore buffer.
|
|
878
|
+
// this is a bug right because we are not checking absoluteness here.
|
|
879
|
+
match ignored_files.contains(&file_str) {
|
|
880
|
+
true => {
|
|
881
|
+
return Err(format!("File is in git ignore buffer!"));
|
|
882
|
+
}
|
|
883
|
+
false => {}
|
|
884
|
+
}
|
|
885
|
+
|
|
714
886
|
// read the file_content to a buffer
|
|
715
|
-
let file_content = match tokio::fs::read(
|
|
887
|
+
let file_content = match tokio::fs::read(absolute_file_path).await {
|
|
716
888
|
Ok(content) => content,
|
|
717
889
|
Err(e) => {
|
|
718
890
|
return Err(format!("Could not read file! {}", e.to_string()));
|
|
@@ -720,7 +892,11 @@ impl MerkleNode {
|
|
|
720
892
|
};
|
|
721
893
|
|
|
722
894
|
// check if the file passes runtime checks.
|
|
723
|
-
match file_utils::is_good_file_runtime_check(
|
|
895
|
+
match file_utils::is_good_file_runtime_check(
|
|
896
|
+
absolute_file_path,
|
|
897
|
+
&file_content,
|
|
898
|
+
)
|
|
899
|
+
.await
|
|
724
900
|
{
|
|
725
901
|
Ok(_) => {}
|
|
726
902
|
Err(e) => {
|
|
@@ -751,15 +927,22 @@ impl MerkleNode {
|
|
|
751
927
|
}
|
|
752
928
|
|
|
753
929
|
async fn construct_file_node_or_error_node(
|
|
754
|
-
|
|
930
|
+
absolute_file_path: &Path,
|
|
755
931
|
parent: ParentPtr,
|
|
932
|
+
ignored_files: &IgnoredFiles,
|
|
756
933
|
) -> MerkleNode {
|
|
757
|
-
let node = match MerkleNode::construct_file_node(
|
|
934
|
+
let node = match MerkleNode::construct_file_node(
|
|
935
|
+
absolute_file_path,
|
|
936
|
+
parent,
|
|
937
|
+
ignored_files,
|
|
938
|
+
)
|
|
939
|
+
.await
|
|
940
|
+
{
|
|
758
941
|
Ok(node) => node,
|
|
759
942
|
Err(e) => {
|
|
760
943
|
// println!("constructing error node. error: {}", e);
|
|
761
944
|
// println!("file_path: {:?}", file_path);
|
|
762
|
-
MerkleNode::empty_node(Some(
|
|
945
|
+
MerkleNode::empty_node(Some(absolute_file_path), Some(e))
|
|
763
946
|
}
|
|
764
947
|
};
|
|
765
948
|
|
|
@@ -785,15 +968,51 @@ impl MerkleNode {
|
|
|
785
968
|
|
|
786
969
|
async fn compute_branch_hash(children: &[MerkleNodePtr]) -> String {
|
|
787
970
|
let mut hasher = sha2::Sha256::new();
|
|
971
|
+
let mut names_and_hashes = vec![];
|
|
972
|
+
let mut non_zero_children = 0;
|
|
973
|
+
|
|
788
974
|
for child in children {
|
|
789
975
|
// check if it is an error node
|
|
790
976
|
let child_reader = child.read().await;
|
|
791
|
-
|
|
977
|
+
|
|
978
|
+
match &child_reader.node_type {
|
|
979
|
+
NodeType::File(file_name) => {
|
|
980
|
+
non_zero_children += 1;
|
|
981
|
+
names_and_hashes.push((file_name.clone(), child_reader.hash.clone()));
|
|
982
|
+
}
|
|
983
|
+
NodeType::Branch((file_name, _)) => {
|
|
984
|
+
let hash = child_reader.hash.clone();
|
|
985
|
+
if hash == "" {
|
|
986
|
+
continue;
|
|
987
|
+
}
|
|
988
|
+
|
|
989
|
+
non_zero_children += 1;
|
|
990
|
+
names_and_hashes.push((file_name.clone(), hash));
|
|
991
|
+
}
|
|
992
|
+
NodeType::ErrorNode(_) => {
|
|
993
|
+
continue;
|
|
994
|
+
}
|
|
995
|
+
}
|
|
996
|
+
}
|
|
997
|
+
|
|
998
|
+
// sort the list of names and hashes by the hashes!!
|
|
999
|
+
names_and_hashes
|
|
1000
|
+
.sort_by(|a, b| a.1.to_lowercase().cmp(&b.1.to_lowercase()));
|
|
1001
|
+
|
|
1002
|
+
for (name, hash) in names_and_hashes {
|
|
1003
|
+
if hash == "" {
|
|
792
1004
|
continue;
|
|
793
1005
|
}
|
|
1006
|
+
info!("name: {}, hash: {}", name, hash);
|
|
1007
|
+
hasher.update(hash);
|
|
1008
|
+
}
|
|
794
1009
|
|
|
795
|
-
|
|
1010
|
+
if non_zero_children == 0 {
|
|
1011
|
+
// this means that the branch is empty.
|
|
1012
|
+
// we should return an empty string.
|
|
1013
|
+
return "".to_string();
|
|
796
1014
|
}
|
|
1015
|
+
|
|
797
1016
|
let result = hasher.finalize();
|
|
798
1017
|
format!("{:x}", result)
|
|
799
1018
|
}
|