@anysphere/file-service 0.0.0-dbd43428 → 0.0.0-e15bb6ec
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Cargo.toml +12 -0
- package/build.rs +2 -0
- package/index.d.ts +4 -3
- package/package.json +10 -8
- package/src/file_utils.rs +69 -23
- package/src/git_utils.rs +19 -10
- package/src/lib.rs +145 -56
- package/src/logger.rs +55 -0
- package/src/merkle_tree/local_construction.rs +33 -13
- package/src/merkle_tree/mod.rs +94 -79
- package/src/merkle_tree/test.rs +2 -1
package/Cargo.toml
CHANGED
|
@@ -6,6 +6,11 @@ version = "0.0.0"
|
|
|
6
6
|
[lib]
|
|
7
7
|
crate-type = ["cdylib"]
|
|
8
8
|
|
|
9
|
+
[features]
|
|
10
|
+
default = ["windows-subsystem"]
|
|
11
|
+
windows-subsystem = []
|
|
12
|
+
debugfile = []
|
|
13
|
+
|
|
9
14
|
[dependencies]
|
|
10
15
|
# Default enable napi4 feature, see https://nodejs.org/api/n-api.html#node-api-version-matrix
|
|
11
16
|
napi = { version = "2.12.2", default-features = false, features = ["napi4", "async", "tokio_rt"] }
|
|
@@ -20,6 +25,12 @@ prost = "0.11.9"
|
|
|
20
25
|
tracing = "0.1.37"
|
|
21
26
|
tracing-subscriber = "0.3.17"
|
|
22
27
|
tracing-appender = "0.2.2"
|
|
28
|
+
binaryornot = "1.0.0"
|
|
29
|
+
dunce = "1.0.1"
|
|
30
|
+
encoding_rs = "0.8.33"
|
|
31
|
+
|
|
32
|
+
[target.'cfg(not(target_os = "linux"))'.dependencies]
|
|
33
|
+
tracing-axiom = "0.4"
|
|
23
34
|
|
|
24
35
|
[build-dependencies]
|
|
25
36
|
napi-build = "2.0.1"
|
|
@@ -27,5 +38,6 @@ tonic-build = "0.9.2"
|
|
|
27
38
|
anyhow = "1.0.75"
|
|
28
39
|
glob = "0.3.0"
|
|
29
40
|
|
|
41
|
+
|
|
30
42
|
[profile.release]
|
|
31
43
|
lto = true
|
package/build.rs
CHANGED
package/index.d.ts
CHANGED
|
@@ -4,9 +4,10 @@
|
|
|
4
4
|
/* auto-generated by NAPI-RS */
|
|
5
5
|
|
|
6
6
|
export class MerkleClient {
|
|
7
|
-
constructor(
|
|
8
|
-
|
|
9
|
-
|
|
7
|
+
constructor(absoluteRootDirectory: string)
|
|
8
|
+
isTooBig(maxFiles: number, gitIgnoredFiles: Array<string>, isGitRepo: boolean): Promise<boolean>
|
|
9
|
+
init(gitIgnoredFiles: Array<string>, isGitRepo: boolean): Promise<void>
|
|
10
|
+
computeMerkleTree(gitIgnoredFiles: Array<string>, isGitRepo: boolean): Promise<void>
|
|
10
11
|
updateFile(filePath: string): Promise<void>
|
|
11
12
|
deleteFile(filePath: string): Promise<void>
|
|
12
13
|
getSubtreeHash(relativePath: string): Promise<string>
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@anysphere/file-service",
|
|
3
|
-
"version": "0.0.0-
|
|
3
|
+
"version": "0.0.0-e15bb6ec",
|
|
4
4
|
"main": "index.js",
|
|
5
5
|
"types": "index.d.ts",
|
|
6
6
|
"napi": {
|
|
@@ -9,7 +9,8 @@
|
|
|
9
9
|
"additional": [
|
|
10
10
|
"aarch64-apple-darwin",
|
|
11
11
|
"aarch64-pc-windows-msvc",
|
|
12
|
-
"universal-apple-darwin"
|
|
12
|
+
"universal-apple-darwin",
|
|
13
|
+
"aarch64-unknown-linux-gnu"
|
|
13
14
|
]
|
|
14
15
|
}
|
|
15
16
|
},
|
|
@@ -35,11 +36,12 @@
|
|
|
35
36
|
"version": "napi version"
|
|
36
37
|
},
|
|
37
38
|
"optionalDependencies": {
|
|
38
|
-
"@anysphere/file-service-win32-x64-msvc": "0.0.0-
|
|
39
|
-
"@anysphere/file-service-darwin-x64": "0.0.0-
|
|
40
|
-
"@anysphere/file-service-linux-x64-gnu": "0.0.0-
|
|
41
|
-
"@anysphere/file-service-darwin-arm64": "0.0.0-
|
|
42
|
-
"@anysphere/file-service-win32-arm64-msvc": "0.0.0-
|
|
43
|
-
"@anysphere/file-service-darwin-universal": "0.0.0-
|
|
39
|
+
"@anysphere/file-service-win32-x64-msvc": "0.0.0-e15bb6ec",
|
|
40
|
+
"@anysphere/file-service-darwin-x64": "0.0.0-e15bb6ec",
|
|
41
|
+
"@anysphere/file-service-linux-x64-gnu": "0.0.0-e15bb6ec",
|
|
42
|
+
"@anysphere/file-service-darwin-arm64": "0.0.0-e15bb6ec",
|
|
43
|
+
"@anysphere/file-service-win32-arm64-msvc": "0.0.0-e15bb6ec",
|
|
44
|
+
"@anysphere/file-service-darwin-universal": "0.0.0-e15bb6ec",
|
|
45
|
+
"@anysphere/file-service-linux-arm64-gnu": "0.0.0-e15bb6ec"
|
|
44
46
|
}
|
|
45
47
|
}
|
package/src/file_utils.rs
CHANGED
|
@@ -5,6 +5,7 @@
|
|
|
5
5
|
// 4. vscode.fs.stat
|
|
6
6
|
|
|
7
7
|
use anyhow::Error;
|
|
8
|
+
use encoding_rs::UTF_8;
|
|
8
9
|
use std::path::Path;
|
|
9
10
|
use tokio::fs;
|
|
10
11
|
|
|
@@ -43,8 +44,21 @@ pub fn is_good_file(file_path: &Path) -> Result<(), Error> {
|
|
|
43
44
|
_ => {}
|
|
44
45
|
}
|
|
45
46
|
|
|
46
|
-
|
|
47
|
-
"lock"
|
|
47
|
+
let bad_extensions = vec![
|
|
48
|
+
"lock", "bak", "tmp", "bin", "exe", "dll", "so", "lockb", "qwoff", "isl",
|
|
49
|
+
"csv", "pdf", // add ms word, excel, powerpoint, etc.
|
|
50
|
+
"doc", "docx", "xls", "xlsx", "ppt", "pptx", "odt", "ods", "odp", "odg",
|
|
51
|
+
"odf", "sxw", "sxc", "sxi", "sxd", "sdc", // add images
|
|
52
|
+
"jpg", "jpeg", "png", "gif", "bmp", "tif", // add audio
|
|
53
|
+
"mp3", "wav", "wma", "ogg", "flac", "aac", // add video
|
|
54
|
+
"mp4", "mov", "wmv", "flv", "avi", // add archives
|
|
55
|
+
"zip", "tar", "gz", "7z", "rar", "tgz", "dmg", "iso", "cue", "mdf", "mds",
|
|
56
|
+
"vcd", "toast", "img", "apk", "msi", "cab", "tar.gz", "tar.xz", "tar.bz2",
|
|
57
|
+
"tar.lzma", "tar.Z", "tar.sz", "lzma", // add fonts
|
|
58
|
+
"ttf", "otf", "woff", "woff2", "eot", "webp", "vsix",
|
|
59
|
+
];
|
|
60
|
+
match bad_extensions.contains(&extension) {
|
|
61
|
+
true => {
|
|
48
62
|
return Err(anyhow::anyhow!("File is just a lock file"));
|
|
49
63
|
}
|
|
50
64
|
_ => {}
|
|
@@ -62,7 +76,7 @@ pub fn is_good_file(file_path: &Path) -> Result<(), Error> {
|
|
|
62
76
|
Some(extension) => match extension.to_str() {
|
|
63
77
|
Some(ext_str) => {
|
|
64
78
|
if bad_extensions.contains(&ext_str) {
|
|
65
|
-
return Err(anyhow::anyhow!("
|
|
79
|
+
return Err(anyhow::anyhow!("Binary file excluded from indexing."));
|
|
66
80
|
}
|
|
67
81
|
}
|
|
68
82
|
None => {
|
|
@@ -88,10 +102,12 @@ pub fn is_good_file(file_path: &Path) -> Result<(), Error> {
|
|
|
88
102
|
Ok(())
|
|
89
103
|
}
|
|
90
104
|
|
|
105
|
+
// use binaryornot::is_binary;
|
|
106
|
+
// use anyhow::Context;
|
|
91
107
|
// implement the buffer above:
|
|
92
108
|
pub async fn is_good_file_runtime_check(
|
|
93
109
|
file_path: &Path,
|
|
94
|
-
|
|
110
|
+
// _buffer: &[u8],
|
|
95
111
|
) -> Result<(), Error> {
|
|
96
112
|
match get_file_size(file_path).await {
|
|
97
113
|
Ok(size) if size > 2 * 1024 * 1024 => {
|
|
@@ -101,16 +117,31 @@ pub async fn is_good_file_runtime_check(
|
|
|
101
117
|
_ => {}
|
|
102
118
|
}
|
|
103
119
|
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
return Err(anyhow::anyhow!("File is not a valid UTF-8 string"));
|
|
109
|
-
}
|
|
110
|
-
}
|
|
120
|
+
// if is_binary(file_path).context("Failed to check if file is binary")? {
|
|
121
|
+
// return Err(anyhow::anyhow!("File is binary"));
|
|
122
|
+
// }
|
|
123
|
+
|
|
111
124
|
Ok(())
|
|
112
125
|
}
|
|
113
126
|
|
|
127
|
+
pub async fn read_string_without_bom(
|
|
128
|
+
file_path: &Path,
|
|
129
|
+
) -> Result<String, Error> {
|
|
130
|
+
let file_buffer = match fs::read(file_path).await {
|
|
131
|
+
Ok(buffer) => buffer,
|
|
132
|
+
Err(e) => {
|
|
133
|
+
return Err(anyhow::anyhow!(
|
|
134
|
+
"Failed to read file buffer: {}",
|
|
135
|
+
e.to_string()
|
|
136
|
+
))
|
|
137
|
+
}
|
|
138
|
+
};
|
|
139
|
+
|
|
140
|
+
let (cow, _) = UTF_8.decode_with_bom_removal(&file_buffer);
|
|
141
|
+
|
|
142
|
+
Ok(cow.to_string())
|
|
143
|
+
}
|
|
144
|
+
|
|
114
145
|
pub fn as_relative_path(
|
|
115
146
|
base_path: &Path,
|
|
116
147
|
file_path: &Path,
|
|
@@ -168,25 +199,40 @@ mod tests {
|
|
|
168
199
|
temp_file.write_all(b"Hello, world!").await.unwrap();
|
|
169
200
|
let buffer = fs::read(&temp_file_path).await.unwrap();
|
|
170
201
|
assert_eq!(
|
|
171
|
-
is_good_file_runtime_check(&temp_file_path,
|
|
172
|
-
.await
|
|
173
|
-
.is_ok(),
|
|
202
|
+
is_good_file_runtime_check(&temp_file_path).await.is_ok(),
|
|
174
203
|
true
|
|
175
204
|
);
|
|
176
205
|
temp_dir.close().unwrap();
|
|
177
206
|
|
|
207
|
+
// let temp_dir = tempfile::tempdir().unwrap();
|
|
208
|
+
// let temp_file_path = temp_dir.path().join("test_file");
|
|
209
|
+
// let mut temp_file = fs::File::create(&temp_file_path).await.unwrap();
|
|
210
|
+
// temp_file.write_all(&[0, 159, 146, 150]).await.unwrap(); // Invalid UTF-8 sequence
|
|
211
|
+
// let buffer = fs::read(&temp_file_path).await.unwrap();
|
|
212
|
+
// assert_eq!(
|
|
213
|
+
// is_good_file_runtime_check(&temp_file_path).await.is_err(),
|
|
214
|
+
// true
|
|
215
|
+
// );
|
|
216
|
+
// temp_dir.close().unwrap();
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
#[tokio::test]
|
|
220
|
+
async fn test_bom_file() {
|
|
221
|
+
const BOM: [u8; 3] = [0xEF, 0xBB, 0xBF];
|
|
222
|
+
const CONTENT: &str = "Hello, world!";
|
|
223
|
+
|
|
224
|
+
// Write this to a temp file
|
|
178
225
|
let temp_dir = tempfile::tempdir().unwrap();
|
|
179
226
|
let temp_file_path = temp_dir.path().join("test_file");
|
|
180
227
|
let mut temp_file = fs::File::create(&temp_file_path).await.unwrap();
|
|
181
|
-
temp_file.write_all(&
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
);
|
|
189
|
-
temp_dir.close().unwrap();
|
|
228
|
+
temp_file.write_all(&BOM).await.unwrap();
|
|
229
|
+
temp_file.write_all(CONTENT.as_bytes()).await.unwrap();
|
|
230
|
+
|
|
231
|
+
// expect that we read the file with tokio as the CONTENT
|
|
232
|
+
let file_contents = read_string_without_bom(&temp_file_path).await.unwrap();
|
|
233
|
+
|
|
234
|
+
// Check string equality of CONTENT (&str) to file_contents (String)
|
|
235
|
+
assert_eq!(CONTENT, file_contents);
|
|
190
236
|
}
|
|
191
237
|
|
|
192
238
|
#[test]
|
package/src/git_utils.rs
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
use std::collections::HashSet;
|
|
2
|
+
use std::path::MAIN_SEPARATOR_STR;
|
|
2
3
|
use std::process::Command;
|
|
3
4
|
|
|
4
|
-
pub fn
|
|
5
|
+
pub fn list_ignored_files_and_directories(
|
|
5
6
|
workspace_root_path: &str,
|
|
6
7
|
should_return_absolute_paths: bool,
|
|
7
8
|
) -> Result<HashSet<String>, Box<dyn std::error::Error>> {
|
|
@@ -14,12 +15,12 @@ pub fn list_ignored_files(
|
|
|
14
15
|
"--others",
|
|
15
16
|
"--ignored",
|
|
16
17
|
"--exclude-standard",
|
|
18
|
+
"--directory",
|
|
19
|
+
"--no-empty-directory"
|
|
17
20
|
],
|
|
18
21
|
// FIXME(sualeh): this is super sketchy and might totally break in like a bazillion ways. i dont like it.
|
|
19
22
|
vec![
|
|
20
|
-
"
|
|
21
|
-
"-c",
|
|
22
|
-
"git submodule foreach --quiet 'git ls-files --others --ignored --exclude-standard | sed \"s|^|$path/|\"'",
|
|
23
|
+
"git submodule foreach --quiet 'git -C $toplevel/$path ls-files --others --ignored --exclude-standard --directory --no-empty-directory | (while read line; do echo $path/$line; done)'",
|
|
23
24
|
],
|
|
24
25
|
];
|
|
25
26
|
|
|
@@ -34,9 +35,12 @@ pub fn list_ignored_files(
|
|
|
34
35
|
.lines()
|
|
35
36
|
.filter(|line| !line.is_empty())
|
|
36
37
|
.map(|line| {
|
|
38
|
+
let line = line.replace("/", MAIN_SEPARATOR_STR);
|
|
39
|
+
|
|
37
40
|
if should_return_absolute_paths {
|
|
38
41
|
let mut path = std::path::PathBuf::from(workspace_root_path);
|
|
39
|
-
|
|
42
|
+
|
|
43
|
+
path.push(line.clone());
|
|
40
44
|
|
|
41
45
|
match path.canonicalize() {
|
|
42
46
|
Ok(canonical_path) => {
|
|
@@ -133,7 +137,8 @@ mod tests {
|
|
|
133
137
|
fn test_no_ignored_files() {
|
|
134
138
|
let dir = tempfile::tempdir().unwrap();
|
|
135
139
|
let gitignored_files =
|
|
136
|
-
|
|
140
|
+
list_ignored_files_and_directories(dir.path().to_str().unwrap(), false)
|
|
141
|
+
.unwrap();
|
|
137
142
|
Command::new("git")
|
|
138
143
|
.args(&["init"])
|
|
139
144
|
.current_dir(dir.path())
|
|
@@ -160,7 +165,8 @@ mod tests {
|
|
|
160
165
|
.output()
|
|
161
166
|
.unwrap();
|
|
162
167
|
let gitignored_files =
|
|
163
|
-
|
|
168
|
+
list_ignored_files_and_directories(dir.path().to_str().unwrap(), false)
|
|
169
|
+
.unwrap();
|
|
164
170
|
println!(
|
|
165
171
|
"ignored files for test_one_ignored_file: {:?}",
|
|
166
172
|
gitignored_files
|
|
@@ -190,7 +196,8 @@ mod tests {
|
|
|
190
196
|
.output()
|
|
191
197
|
.unwrap();
|
|
192
198
|
let gitignored_files =
|
|
193
|
-
|
|
199
|
+
list_ignored_files_and_directories(dir.path().to_str().unwrap(), false)
|
|
200
|
+
.unwrap();
|
|
194
201
|
println!(
|
|
195
202
|
"ignored files for test_multiple_ignored_files: {:?}",
|
|
196
203
|
gitignored_files
|
|
@@ -254,7 +261,8 @@ mod tests {
|
|
|
254
261
|
println!("git submodule add output: {:?}", o);
|
|
255
262
|
|
|
256
263
|
let gitignored_files =
|
|
257
|
-
|
|
264
|
+
list_ignored_files_and_directories(dir.path().to_str().unwrap(), false)
|
|
265
|
+
.unwrap();
|
|
258
266
|
println!(
|
|
259
267
|
"ignored files for test_git_submodule_ignored_files: {:?}",
|
|
260
268
|
gitignored_files
|
|
@@ -265,7 +273,8 @@ mod tests {
|
|
|
265
273
|
|
|
266
274
|
#[test]
|
|
267
275
|
fn test_multiple_ignored_files_in_current_dir() {
|
|
268
|
-
let gitignored_files =
|
|
276
|
+
let gitignored_files =
|
|
277
|
+
list_ignored_files_and_directories(".", false).unwrap();
|
|
269
278
|
assert!(gitignored_files.len() > 1);
|
|
270
279
|
|
|
271
280
|
// print a sample of the ignored files
|
package/src/lib.rs
CHANGED
|
@@ -1,15 +1,15 @@
|
|
|
1
|
+
#![windows_subsystem = "windows"]
|
|
1
2
|
#![deny(clippy::all)]
|
|
2
3
|
#![deny(unsafe_op_in_unsafe_fn)]
|
|
3
4
|
pub mod file_utils;
|
|
4
|
-
pub mod
|
|
5
|
+
pub mod logger;
|
|
5
6
|
pub mod merkle_tree;
|
|
6
7
|
|
|
7
|
-
use std::vec;
|
|
8
|
+
use std::{collections::HashSet, vec};
|
|
8
9
|
|
|
10
|
+
use anyhow::Context;
|
|
9
11
|
use merkle_tree::{LocalConstruction, MerkleTree};
|
|
10
|
-
use tracing::{
|
|
11
|
-
use tracing_appender::rolling::{RollingFileAppender, Rotation};
|
|
12
|
-
use tracing_subscriber::fmt;
|
|
12
|
+
use tracing::{debug, info};
|
|
13
13
|
|
|
14
14
|
#[macro_use]
|
|
15
15
|
extern crate napi_derive;
|
|
@@ -17,46 +17,101 @@ extern crate napi_derive;
|
|
|
17
17
|
#[napi]
|
|
18
18
|
pub struct MerkleClient {
|
|
19
19
|
tree: MerkleTree,
|
|
20
|
-
|
|
21
|
-
_guard:
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
pub fn init_logger() -> tracing_appender::non_blocking::WorkerGuard {
|
|
25
|
-
let file_appender =
|
|
26
|
-
RollingFileAppender::new(Rotation::NEVER, "./", "rust_log.txt");
|
|
27
|
-
let (non_blocking, _guard) = tracing_appender::non_blocking(file_appender);
|
|
28
|
-
let subscriber = fmt::Subscriber::builder()
|
|
29
|
-
.with_max_level(Level::TRACE)
|
|
30
|
-
.with_writer(non_blocking)
|
|
31
|
-
.with_ansi(false)
|
|
32
|
-
.with_line_number(true)
|
|
33
|
-
.finish();
|
|
34
|
-
|
|
35
|
-
let _ = tracing::subscriber::set_global_default(subscriber);
|
|
36
|
-
|
|
37
|
-
_guard
|
|
20
|
+
absolute_root_directory: String,
|
|
21
|
+
_guard: Option<logger::GuardType>,
|
|
38
22
|
}
|
|
39
23
|
|
|
40
24
|
#[napi]
|
|
41
25
|
impl MerkleClient {
|
|
42
26
|
#[napi(constructor)]
|
|
43
|
-
pub fn new(
|
|
44
|
-
let _guard = init_logger();
|
|
27
|
+
pub fn new(absolute_root_directory: String) -> MerkleClient {
|
|
28
|
+
let _guard = logger::init_logger();
|
|
29
|
+
|
|
30
|
+
// let canonical_root_directory = std::path::Path::new(&absolute_root_directory);
|
|
31
|
+
// use dunce::canonicalize;
|
|
32
|
+
// let canonical_root_directory = match dunce::canonicalize(&canonical_root_directory) {
|
|
33
|
+
// Ok(path) => path.to_str().unwrap_or(&absolute_root_directory).to_string().to_lowercase(),
|
|
34
|
+
// Err(e) => {
|
|
35
|
+
// info!("Error in canonicalizing path: path: {:?}, error {:?}", canonical_root_directory, e);
|
|
36
|
+
// absolute_root_directory
|
|
37
|
+
// }
|
|
38
|
+
// };
|
|
45
39
|
|
|
46
40
|
MerkleClient {
|
|
47
41
|
tree: MerkleTree::empty_tree(),
|
|
48
|
-
|
|
42
|
+
absolute_root_directory,
|
|
49
43
|
_guard,
|
|
50
44
|
}
|
|
51
45
|
}
|
|
52
46
|
|
|
47
|
+
#[napi]
|
|
48
|
+
pub async fn is_too_big(
|
|
49
|
+
&self,
|
|
50
|
+
max_files: i32,
|
|
51
|
+
git_ignored_files: Vec<String>,
|
|
52
|
+
is_git_repo: bool,
|
|
53
|
+
) -> bool {
|
|
54
|
+
let git_ignored_set =
|
|
55
|
+
HashSet::<String>::from_iter(git_ignored_files.into_iter());
|
|
56
|
+
let mut num_files = 0;
|
|
57
|
+
let mut dirs_to_check = vec![self.absolute_root_directory.clone()];
|
|
58
|
+
|
|
59
|
+
while let Some(dir) = dirs_to_check.pop() {
|
|
60
|
+
info!("dir: {:?}", dir);
|
|
61
|
+
let mut entries = match tokio::fs::read_dir(&dir).await {
|
|
62
|
+
Ok(entries) => entries,
|
|
63
|
+
Err(_) => continue,
|
|
64
|
+
};
|
|
65
|
+
if num_files > max_files {
|
|
66
|
+
return true;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
while let Some(entry) = entries.next_entry().await.unwrap_or(None) {
|
|
71
|
+
let path = entry.path();
|
|
72
|
+
info!("entry: {:?}", path);
|
|
73
|
+
let path_str = match path.to_str() {
|
|
74
|
+
Some(path_str) => path_str.to_string(),
|
|
75
|
+
None => continue,
|
|
76
|
+
};
|
|
77
|
+
|
|
78
|
+
if git_ignored_set.contains(&path_str) {
|
|
79
|
+
continue;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
match entry.file_type().await {
|
|
83
|
+
Ok(file_type) => {
|
|
84
|
+
if file_type.is_dir() {
|
|
85
|
+
dirs_to_check.push(path_str);
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
if file_type.is_file() {
|
|
89
|
+
num_files += 1;
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
Err(_) => continue,
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
num_files > max_files
|
|
98
|
+
}
|
|
99
|
+
|
|
53
100
|
#[napi]
|
|
54
|
-
pub async unsafe fn init(
|
|
101
|
+
pub async unsafe fn init(
|
|
102
|
+
&mut self,
|
|
103
|
+
git_ignored_files: Vec<String>,
|
|
104
|
+
is_git_repo: bool,
|
|
105
|
+
) -> Result<(), napi::Error> {
|
|
55
106
|
// 1. compute the merkle tree
|
|
56
107
|
// 2. update the backend
|
|
57
108
|
// 3. sync with the remote
|
|
109
|
+
info!("Merkle tree compute started!");
|
|
110
|
+
info!("Root directory: {:?}", self.absolute_root_directory);
|
|
58
111
|
unsafe {
|
|
59
|
-
self
|
|
112
|
+
self
|
|
113
|
+
.compute_merkle_tree(git_ignored_files, is_git_repo)
|
|
114
|
+
.await?;
|
|
60
115
|
}
|
|
61
116
|
|
|
62
117
|
Ok(())
|
|
@@ -69,21 +124,25 @@ impl MerkleClient {
|
|
|
69
124
|
#[napi]
|
|
70
125
|
pub async unsafe fn compute_merkle_tree(
|
|
71
126
|
&mut self,
|
|
127
|
+
git_ignored_files: Vec<String>,
|
|
128
|
+
is_git_repo: bool,
|
|
72
129
|
) -> Result<(), napi::Error> {
|
|
73
|
-
|
|
74
|
-
|
|
130
|
+
// make the git ignored files into a hash set
|
|
131
|
+
let mut git_ignored_set = HashSet::from_iter(git_ignored_files.into_iter());
|
|
75
132
|
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
info!("files: {:?}", files);
|
|
81
|
-
}
|
|
82
|
-
Err(e) => {
|
|
83
|
-
info!("Error in get_all_files: {:?}", e);
|
|
84
|
-
}
|
|
133
|
+
// if the hashset itself contains the root directory, then we should remove it.
|
|
134
|
+
// this is because the root directory is not a file, and we don't want to ignore it.
|
|
135
|
+
if git_ignored_set.contains(&self.absolute_root_directory) {
|
|
136
|
+
git_ignored_set.remove(&self.absolute_root_directory);
|
|
85
137
|
}
|
|
86
138
|
|
|
139
|
+
let t = MerkleTree::construct_merkle_tree(
|
|
140
|
+
self.absolute_root_directory.clone(),
|
|
141
|
+
git_ignored_set,
|
|
142
|
+
is_git_repo,
|
|
143
|
+
)
|
|
144
|
+
.await;
|
|
145
|
+
|
|
87
146
|
match t {
|
|
88
147
|
Ok(tree) => {
|
|
89
148
|
self.tree = tree;
|
|
@@ -111,19 +170,50 @@ impl MerkleClient {
|
|
|
111
170
|
&self,
|
|
112
171
|
relative_path: String,
|
|
113
172
|
) -> Result<String, napi::Error> {
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
173
|
+
debug!("get_subtree_hash: relative_path: {:?}", relative_path);
|
|
174
|
+
|
|
175
|
+
let relative_path_without_leading_slash = match relative_path
|
|
176
|
+
.strip_prefix('.')
|
|
177
|
+
{
|
|
178
|
+
Some(path) => path.strip_prefix(std::path::MAIN_SEPARATOR).unwrap_or(""),
|
|
179
|
+
None => relative_path.as_str(),
|
|
180
|
+
};
|
|
181
|
+
debug!(
|
|
182
|
+
"relative_path_without_leading_slash: {:?}",
|
|
183
|
+
relative_path_without_leading_slash
|
|
184
|
+
);
|
|
185
|
+
|
|
186
|
+
let absolute_path = if !relative_path_without_leading_slash.is_empty() {
|
|
187
|
+
std::path::Path::new(&self.absolute_root_directory)
|
|
188
|
+
.join(relative_path_without_leading_slash)
|
|
189
|
+
} else {
|
|
190
|
+
std::path::Path::new(&self.absolute_root_directory).to_path_buf()
|
|
191
|
+
};
|
|
192
|
+
|
|
193
|
+
debug!("absolute_path: {:?}", absolute_path);
|
|
194
|
+
|
|
195
|
+
let absolute_path_string = match absolute_path.to_str() {
|
|
196
|
+
Some(path) => path.to_string(),
|
|
197
|
+
None => {
|
|
198
|
+
return Err(napi::Error::new(
|
|
199
|
+
napi::Status::Unknown,
|
|
200
|
+
format!("some string error"),
|
|
201
|
+
))
|
|
202
|
+
}
|
|
203
|
+
};
|
|
118
204
|
|
|
119
|
-
|
|
120
|
-
|
|
205
|
+
debug!("absolute_path_string: {:?}", absolute_path_string);
|
|
206
|
+
|
|
207
|
+
let hash = self
|
|
208
|
+
.tree
|
|
209
|
+
.get_subtree_hash(absolute_path_string.as_str())
|
|
210
|
+
.await;
|
|
121
211
|
|
|
122
212
|
match hash {
|
|
123
213
|
Ok(hash) => Ok(hash),
|
|
124
214
|
Err(e) => Err(napi::Error::new(
|
|
125
215
|
napi::Status::Unknown,
|
|
126
|
-
format!("Error in get_subtree_hash: {:?}", e)
|
|
216
|
+
format!("Error in get_subtree_hash. \nRelative path: {:?}, \nAbsolute path: {:?}, \nRoot directory: {:?}\nError: {:?}", &relative_path, absolute_path, self.absolute_root_directory, e)
|
|
127
217
|
)),
|
|
128
218
|
}
|
|
129
219
|
}
|
|
@@ -145,7 +235,7 @@ impl MerkleClient {
|
|
|
145
235
|
&self,
|
|
146
236
|
relative_path: String,
|
|
147
237
|
) -> Result<i32, napi::Error> {
|
|
148
|
-
let absolute_path = std::path::Path::new(&self.
|
|
238
|
+
let absolute_path = std::path::Path::new(&self.absolute_root_directory)
|
|
149
239
|
.join(relative_path)
|
|
150
240
|
.canonicalize()?;
|
|
151
241
|
|
|
@@ -181,10 +271,12 @@ impl MerkleClient {
|
|
|
181
271
|
&self,
|
|
182
272
|
absolute_file_path: String,
|
|
183
273
|
) -> Result<Vec<String>, napi::Error> {
|
|
184
|
-
let
|
|
274
|
+
// let absolute_path = absolute_file_path.to_lowercase();
|
|
275
|
+
// let absolute_path_str = absolute_path.as_str();
|
|
276
|
+
|
|
185
277
|
let files = self
|
|
186
278
|
.tree
|
|
187
|
-
.get_all_dir_files_to_embed(
|
|
279
|
+
.get_all_dir_files_to_embed(absolute_file_path.as_str())
|
|
188
280
|
.await;
|
|
189
281
|
|
|
190
282
|
match files {
|
|
@@ -209,11 +301,7 @@ impl MerkleClient {
|
|
|
209
301
|
// TODO(sualeh): we should assert that the path is ascending up to the path.
|
|
210
302
|
|
|
211
303
|
let ret = vec![file];
|
|
212
|
-
info!("file: {:?}", ret);
|
|
213
|
-
|
|
214
304
|
let ret = ret.into_iter().chain(path.into_iter()).collect::<Vec<_>>();
|
|
215
|
-
info!("ret to js: {:?}", ret);
|
|
216
|
-
|
|
217
305
|
Ok(ret)
|
|
218
306
|
}
|
|
219
307
|
Err(e) => Err(napi::Error::new(
|
|
@@ -229,8 +317,9 @@ impl MerkleClient {
|
|
|
229
317
|
&self,
|
|
230
318
|
absolute_file_path: String,
|
|
231
319
|
) -> Result<Vec<String>, napi::Error> {
|
|
232
|
-
let
|
|
233
|
-
let
|
|
320
|
+
// let absolute_path = absolute_file_path.to_lowercase();
|
|
321
|
+
// let absolute_path_str = absolute_path.as_str();
|
|
322
|
+
let spline = self.tree.get_spline(absolute_file_path.as_str()).await;
|
|
234
323
|
|
|
235
324
|
match spline {
|
|
236
325
|
Ok(spline) => Ok(spline),
|
|
@@ -259,6 +348,6 @@ impl MerkleClient {
|
|
|
259
348
|
|
|
260
349
|
#[napi]
|
|
261
350
|
pub fn update_root_directory(&mut self, root_directory: String) {
|
|
262
|
-
self.
|
|
351
|
+
self.absolute_root_directory = root_directory;
|
|
263
352
|
}
|
|
264
353
|
}
|
package/src/logger.rs
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
use tracing::{info, subscriber, Level};
|
|
2
|
+
use tracing_appender::non_blocking::WorkerGuard;
|
|
3
|
+
use tracing_appender::rolling::{RollingFileAppender, Rotation};
|
|
4
|
+
use tracing_subscriber::fmt;
|
|
5
|
+
use tracing_subscriber::prelude::*;
|
|
6
|
+
|
|
7
|
+
pub enum GuardType {
|
|
8
|
+
#[cfg(all(not(feature = "debugfile"), not(target_os = "linux")))]
|
|
9
|
+
Guard(tracing_axiom::Guard),
|
|
10
|
+
WorkerGuard(tracing_appender::non_blocking::WorkerGuard),
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
pub fn init_logger() -> Option<GuardType> {
|
|
14
|
+
#[cfg(feature = "debugfile")]
|
|
15
|
+
let _guard = {
|
|
16
|
+
let file_appender =
|
|
17
|
+
RollingFileAppender::new(Rotation::NEVER, "./", "rust_log.txt");
|
|
18
|
+
let (non_blocking, _guard) = tracing_appender::non_blocking(file_appender);
|
|
19
|
+
let subscriber = fmt::Subscriber::builder()
|
|
20
|
+
.with_max_level(Level::TRACE)
|
|
21
|
+
.with_writer(non_blocking)
|
|
22
|
+
.with_ansi(false)
|
|
23
|
+
.with_line_number(true)
|
|
24
|
+
.finish();
|
|
25
|
+
|
|
26
|
+
let _ = tracing::subscriber::set_global_default(subscriber);
|
|
27
|
+
|
|
28
|
+
Some(GuardType::WorkerGuard(_guard))
|
|
29
|
+
};
|
|
30
|
+
|
|
31
|
+
#[cfg(all(not(feature = "debugfile"), not(target_os = "linux")))]
|
|
32
|
+
let _guard = {
|
|
33
|
+
let (axiom_layer, _guard) = tracing_axiom::builder()
|
|
34
|
+
.with_token("xaat-a51088e6-7889-41c0-b440-cfd4601acdd7")
|
|
35
|
+
.with_dataset("local-indexing")
|
|
36
|
+
.layer()
|
|
37
|
+
.ok()?;
|
|
38
|
+
// let fmt_layer = fmt::layer().with_level(true).with_ansi(false).with_line_number(true);
|
|
39
|
+
|
|
40
|
+
let _ = tracing_subscriber::registry()
|
|
41
|
+
.with(axiom_layer)
|
|
42
|
+
.try_init()
|
|
43
|
+
.ok()?;
|
|
44
|
+
// let _ = tracing::subscriber::set_global_default(subscriber);
|
|
45
|
+
|
|
46
|
+
info!("Tracing initialized! in rust");
|
|
47
|
+
|
|
48
|
+
Some(GuardType::Guard(_guard))
|
|
49
|
+
};
|
|
50
|
+
|
|
51
|
+
#[cfg(all(not(feature = "debugfile"), target_os = "linux"))]
|
|
52
|
+
let _guard = { None };
|
|
53
|
+
|
|
54
|
+
_guard
|
|
55
|
+
}
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
use crate::git_utils;
|
|
2
1
|
use crate::merkle_tree::{
|
|
3
2
|
File, MerkleNode, MerkleNodePtr, NodeType, PinnedFuture,
|
|
4
3
|
};
|
|
@@ -10,11 +9,18 @@ use tonic::async_trait;
|
|
|
10
9
|
|
|
11
10
|
#[async_trait]
|
|
12
11
|
impl LocalConstruction for MerkleTree {
|
|
12
|
+
#[tracing::instrument]
|
|
13
13
|
async fn new(
|
|
14
14
|
root_directory: Option<String>,
|
|
15
15
|
) -> Result<MerkleTree, anyhow::Error> {
|
|
16
|
+
let git_ignored_files = HashSet::<String>::new();
|
|
16
17
|
if let Some(root_directory) = root_directory {
|
|
17
|
-
let n = MerkleTree::construct_merkle_tree(
|
|
18
|
+
let n = MerkleTree::construct_merkle_tree(
|
|
19
|
+
root_directory,
|
|
20
|
+
git_ignored_files,
|
|
21
|
+
false,
|
|
22
|
+
)
|
|
23
|
+
.await;
|
|
18
24
|
return n;
|
|
19
25
|
}
|
|
20
26
|
|
|
@@ -30,6 +36,8 @@ impl LocalConstruction for MerkleTree {
|
|
|
30
36
|
/// 4. return merkle tree
|
|
31
37
|
async fn construct_merkle_tree(
|
|
32
38
|
absolute_path_to_root_directory: String,
|
|
39
|
+
git_ignored_files_and_dirs: HashSet<String>,
|
|
40
|
+
is_git_repo: bool,
|
|
33
41
|
) -> Result<MerkleTree, anyhow::Error> {
|
|
34
42
|
let path = PathBuf::from(absolute_path_to_root_directory.clone());
|
|
35
43
|
if !path.exists() {
|
|
@@ -38,29 +46,31 @@ impl LocalConstruction for MerkleTree {
|
|
|
38
46
|
}
|
|
39
47
|
|
|
40
48
|
// 1. get all the gitignored files
|
|
41
|
-
let
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
tracing::info!("git_ignored_files: {:?}", git_ignored_files);
|
|
49
|
+
// let git_ignored_files_and_dirs =
|
|
50
|
+
// match git_utils::list_ignored_files_and_directories(
|
|
51
|
+
// absolute_path_to_root_directory.as_str(),
|
|
52
|
+
// true,
|
|
53
|
+
// ) {
|
|
54
|
+
// Ok(git_ignored) => git_ignored,
|
|
55
|
+
// Err(_e) => HashSet::new(),
|
|
56
|
+
// };
|
|
50
57
|
|
|
51
58
|
let root_node = MerkleNode::new(
|
|
52
59
|
path,
|
|
53
60
|
None,
|
|
54
|
-
&
|
|
61
|
+
&git_ignored_files_and_dirs,
|
|
55
62
|
absolute_path_to_root_directory.as_str(),
|
|
63
|
+
is_git_repo,
|
|
56
64
|
)
|
|
57
65
|
.await;
|
|
66
|
+
|
|
58
67
|
let mut mt = MerkleTree {
|
|
59
68
|
root: root_node,
|
|
60
69
|
files: BTreeMap::new(),
|
|
61
70
|
root_path: absolute_path_to_root_directory,
|
|
62
71
|
cursor: None,
|
|
63
|
-
|
|
72
|
+
git_ignored_files_and_dirs,
|
|
73
|
+
is_git_repo,
|
|
64
74
|
};
|
|
65
75
|
|
|
66
76
|
// we now iterate over all the nodes and add them to the hashmap
|
|
@@ -81,6 +91,13 @@ impl LocalConstruction for MerkleTree {
|
|
|
81
91
|
}
|
|
82
92
|
NodeType::File(file_name) => {
|
|
83
93
|
let f = File { node: node.clone() };
|
|
94
|
+
|
|
95
|
+
// i dont reallly like this :(((
|
|
96
|
+
// let canonical_file_name = match dunce::canonicalize(file_name) {
|
|
97
|
+
// Ok(path) => path.to_str().unwrap_or(file_name).to_string(),
|
|
98
|
+
// Err(_) => file_name.clone(),
|
|
99
|
+
// };
|
|
100
|
+
|
|
84
101
|
files.insert(file_name.clone(), f);
|
|
85
102
|
}
|
|
86
103
|
NodeType::ErrorNode(_) => {
|
|
@@ -92,6 +109,8 @@ impl LocalConstruction for MerkleTree {
|
|
|
92
109
|
|
|
93
110
|
add_nodes_to_hashmap(&mt.root, &mut mt.files).await;
|
|
94
111
|
|
|
112
|
+
tracing::info!("number of files in the tree: {}", mt.files.len());
|
|
113
|
+
|
|
95
114
|
Ok(mt)
|
|
96
115
|
}
|
|
97
116
|
|
|
@@ -134,6 +153,7 @@ impl LocalConstruction for MerkleTree {
|
|
|
134
153
|
Ok(())
|
|
135
154
|
}
|
|
136
155
|
|
|
156
|
+
#[tracing::instrument]
|
|
137
157
|
async fn delete_file(
|
|
138
158
|
&mut self,
|
|
139
159
|
file_path: String,
|
package/src/merkle_tree/mod.rs
CHANGED
|
@@ -1,24 +1,26 @@
|
|
|
1
|
-
use crate::git_utils;
|
|
2
|
-
|
|
3
1
|
use super::file_utils;
|
|
4
2
|
use sha2::Digest;
|
|
5
3
|
use std::collections::{BTreeMap, HashSet};
|
|
6
4
|
use std::path::PathBuf;
|
|
5
|
+
use std::vec;
|
|
7
6
|
use std::{fs, path::Path, sync::Arc};
|
|
8
7
|
use tokio::sync::RwLock;
|
|
9
8
|
use tonic::async_trait;
|
|
10
|
-
use tracing::info;
|
|
9
|
+
use tracing::{debug, info};
|
|
10
|
+
|
|
11
11
|
pub mod local_construction;
|
|
12
12
|
pub mod test;
|
|
13
13
|
|
|
14
14
|
pub type MerkleNodePtr = Arc<RwLock<MerkleNode>>;
|
|
15
15
|
|
|
16
|
+
#[derive(Debug)]
|
|
16
17
|
pub struct MerkleTree {
|
|
17
18
|
root_path: String,
|
|
18
19
|
root: MerkleNodePtr,
|
|
19
20
|
files: BTreeMap<String, File>,
|
|
20
21
|
cursor: Option<usize>,
|
|
21
|
-
|
|
22
|
+
git_ignored_files_and_dirs: HashSet<String>,
|
|
23
|
+
is_git_repo: bool,
|
|
22
24
|
}
|
|
23
25
|
|
|
24
26
|
#[derive(Debug)]
|
|
@@ -62,6 +64,8 @@ pub trait LocalConstruction {
|
|
|
62
64
|
|
|
63
65
|
async fn construct_merkle_tree(
|
|
64
66
|
root_directory: String,
|
|
67
|
+
git_ignored_files_and_dirs: HashSet<String>,
|
|
68
|
+
is_git_repo: bool,
|
|
65
69
|
) -> Result<MerkleTree, anyhow::Error>;
|
|
66
70
|
|
|
67
71
|
async fn update_file(
|
|
@@ -95,33 +99,34 @@ impl MerkleTree {
|
|
|
95
99
|
files: BTreeMap::new(),
|
|
96
100
|
root_path: "".to_string(),
|
|
97
101
|
cursor: None,
|
|
98
|
-
|
|
102
|
+
git_ignored_files_and_dirs: HashSet::new(),
|
|
103
|
+
is_git_repo: false,
|
|
99
104
|
}
|
|
100
105
|
}
|
|
101
106
|
|
|
102
107
|
pub async fn get_subtree_hash(
|
|
103
108
|
&self,
|
|
104
|
-
absolute_path:
|
|
109
|
+
absolute_path: &str,
|
|
105
110
|
) -> Result<String, anyhow::Error> {
|
|
106
|
-
|
|
107
|
-
Some(s) => s.to_string(),
|
|
108
|
-
None => {
|
|
109
|
-
return Err(anyhow::anyhow!(
|
|
110
|
-
"get_subtree_hash: Failed to convert path to string"
|
|
111
|
-
))
|
|
112
|
-
}
|
|
113
|
-
};
|
|
111
|
+
debug!("get_subtree_hash: absolute_path: {:?}", absolute_path);
|
|
114
112
|
|
|
115
|
-
let node = match self.files.get(
|
|
113
|
+
let node = match self.files.get(absolute_path) {
|
|
116
114
|
Some(file) => file.node.clone(),
|
|
117
115
|
None => {
|
|
118
|
-
|
|
116
|
+
let all_files: Vec<String> = self.files.keys().cloned().collect();
|
|
117
|
+
return Err(anyhow::anyhow!(
|
|
118
|
+
"Could not find file in tree! Looking for: {}. All files: {:?}",
|
|
119
|
+
absolute_path,
|
|
120
|
+
all_files
|
|
121
|
+
));
|
|
119
122
|
}
|
|
120
123
|
};
|
|
121
124
|
|
|
122
125
|
let node_reader = node.read().await;
|
|
123
126
|
let node_hash = node_reader.hash.clone();
|
|
124
127
|
|
|
128
|
+
debug!("node_hash: {:?}", node_hash);
|
|
129
|
+
|
|
125
130
|
Ok(node_hash)
|
|
126
131
|
}
|
|
127
132
|
|
|
@@ -285,6 +290,12 @@ impl MerkleTree {
|
|
|
285
290
|
) -> Result<Vec<String>, anyhow::Error> {
|
|
286
291
|
let mut files = Vec::new();
|
|
287
292
|
|
|
293
|
+
// 1. should check that this absolute path is actually a directory.
|
|
294
|
+
let file_node = self.files.get(absolute_path);
|
|
295
|
+
if file_node.is_none() {
|
|
296
|
+
return Err(anyhow::anyhow!("Could not find directory the in tree!"));
|
|
297
|
+
}
|
|
298
|
+
|
|
288
299
|
for (file_path, f) in &self.files {
|
|
289
300
|
if !file_path.contains(absolute_path) {
|
|
290
301
|
continue;
|
|
@@ -311,16 +322,11 @@ impl MerkleTree {
|
|
|
311
322
|
&self,
|
|
312
323
|
absolute_path: &str,
|
|
313
324
|
) -> Result<Vec<String>, anyhow::Error> {
|
|
314
|
-
info!("get_spline called with absolute_path: {}", absolute_path);
|
|
315
325
|
let mut files = Vec::new();
|
|
316
326
|
|
|
317
327
|
let current_node = match self.files.get(absolute_path) {
|
|
318
|
-
Some(node) =>
|
|
319
|
-
info!("Found node for absolute_path: {}", absolute_path);
|
|
320
|
-
node.node.clone()
|
|
321
|
-
}
|
|
328
|
+
Some(node) => node.node.clone(),
|
|
322
329
|
None => {
|
|
323
|
-
info!("File not found for absolute_path: {}", absolute_path);
|
|
324
330
|
return Err(anyhow::anyhow!("File not found: {}", absolute_path));
|
|
325
331
|
}
|
|
326
332
|
};
|
|
@@ -331,7 +337,6 @@ impl MerkleTree {
|
|
|
331
337
|
while let Some(node) = stack.pop() {
|
|
332
338
|
let parent = node.read().await.parent.clone();
|
|
333
339
|
if let Some(parent) = parent {
|
|
334
|
-
info!("Adding parent hash to files vector");
|
|
335
340
|
{
|
|
336
341
|
let parent_node = parent.read().await;
|
|
337
342
|
match &parent_node.node_type {
|
|
@@ -350,7 +355,6 @@ impl MerkleTree {
|
|
|
350
355
|
stack.push(parent);
|
|
351
356
|
}
|
|
352
357
|
}
|
|
353
|
-
info!("Returning files vector with {} elements", files.len());
|
|
354
358
|
Ok(files)
|
|
355
359
|
}
|
|
356
360
|
|
|
@@ -397,8 +401,9 @@ impl MerkleTree {
|
|
|
397
401
|
let new_node = MerkleNode::new(
|
|
398
402
|
file_path.clone(),
|
|
399
403
|
Some(ancestor.clone()),
|
|
400
|
-
&self.
|
|
404
|
+
&self.git_ignored_files_and_dirs,
|
|
401
405
|
&absolute_root_path.as_str(),
|
|
406
|
+
self.is_git_repo,
|
|
402
407
|
)
|
|
403
408
|
.await;
|
|
404
409
|
ancestor.write().await.attach_child(new_node.clone()).await;
|
|
@@ -414,8 +419,9 @@ impl MerkleTree {
|
|
|
414
419
|
let first_child = MerkleNode::new(
|
|
415
420
|
first_child_path.clone(),
|
|
416
421
|
Some(ancestor.clone()),
|
|
417
|
-
&self.
|
|
422
|
+
&self.git_ignored_files_and_dirs,
|
|
418
423
|
&absolute_root_path.as_str(),
|
|
424
|
+
self.is_git_repo,
|
|
419
425
|
)
|
|
420
426
|
.await;
|
|
421
427
|
|
|
@@ -703,13 +709,14 @@ impl MerkleNode {
|
|
|
703
709
|
parent: ParentPtr,
|
|
704
710
|
ignored_files: &IgnoredFiles,
|
|
705
711
|
absolute_root_path: &str,
|
|
712
|
+
is_git_repo: bool,
|
|
706
713
|
) -> MerkleNodePtr {
|
|
707
|
-
// check if the root is a git directory.
|
|
708
|
-
let is_git_repo =
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
714
|
+
// // check if the root is a git directory.
|
|
715
|
+
// let is_git_repo =
|
|
716
|
+
// match git_utils::is_git_directory(absolute_root_path).await {
|
|
717
|
+
// Ok(is_git_repo) => is_git_repo,
|
|
718
|
+
// Err(_e) => false,
|
|
719
|
+
// };
|
|
713
720
|
let bypass_git = !is_git_repo;
|
|
714
721
|
|
|
715
722
|
MerkleNode::construct_node(
|
|
@@ -722,25 +729,20 @@ impl MerkleNode {
|
|
|
722
729
|
.await
|
|
723
730
|
}
|
|
724
731
|
|
|
732
|
+
// #[tracing::instrument]
|
|
725
733
|
async fn new(
|
|
726
734
|
absolute_file_or_directory: PathBuf,
|
|
727
735
|
parent: ParentPtr,
|
|
728
736
|
ignored_files: &IgnoredFiles,
|
|
729
737
|
absolute_root_path: &str,
|
|
738
|
+
is_git_repo: bool,
|
|
730
739
|
) -> MerkleNodePtr {
|
|
731
|
-
// check if the root is a git directory.
|
|
732
|
-
let is_git_repo =
|
|
733
|
-
match git_utils::is_git_directory(absolute_root_path).await {
|
|
734
|
-
Ok(is_git_repo) => is_git_repo,
|
|
735
|
-
Err(_e) => false,
|
|
736
|
-
};
|
|
737
740
|
let bypass_git = !is_git_repo;
|
|
738
741
|
|
|
739
742
|
info!(
|
|
740
743
|
"constructing node for absolute_file_or_directory: {:?}",
|
|
741
744
|
absolute_file_or_directory
|
|
742
745
|
);
|
|
743
|
-
info!("bypass_git: {}, is_git_repo: {}", bypass_git, is_git_repo);
|
|
744
746
|
|
|
745
747
|
MerkleNode::construct_node(
|
|
746
748
|
Path::new(&absolute_file_or_directory),
|
|
@@ -769,6 +771,7 @@ impl MerkleNode {
|
|
|
769
771
|
Box::pin(async move {
|
|
770
772
|
// check if it is a file
|
|
771
773
|
let path_str = absolute_file_or_directory.to_str().unwrap().to_string();
|
|
774
|
+
|
|
772
775
|
if absolute_file_or_directory.is_file() {
|
|
773
776
|
return Arc::new(RwLock::new(
|
|
774
777
|
MerkleNode::construct_file_node_or_error_node(
|
|
@@ -790,22 +793,10 @@ impl MerkleNode {
|
|
|
790
793
|
)));
|
|
791
794
|
}
|
|
792
795
|
|
|
793
|
-
|
|
794
|
-
let is_git_ignored =
|
|
795
|
-
match git_utils::is_git_ignored(absolute_root_path, path_str.as_str())
|
|
796
|
-
.await
|
|
797
|
-
{
|
|
798
|
-
Ok(is_git_ignored) => is_git_ignored,
|
|
799
|
-
Err(e) => {
|
|
800
|
-
return Arc::new(RwLock::new(MerkleNode::empty_node(
|
|
801
|
-
Some(absolute_file_or_directory),
|
|
802
|
-
Some(e.to_string()),
|
|
803
|
-
)));
|
|
804
|
-
}
|
|
805
|
-
};
|
|
796
|
+
let is_git_ignored_dir = ignored_files.contains(&path_str);
|
|
806
797
|
|
|
807
|
-
if
|
|
808
|
-
|
|
798
|
+
if is_git_ignored_dir && !bypass_git {
|
|
799
|
+
tracing::info!("skipping directory: {}", path_str);
|
|
809
800
|
return Arc::new(RwLock::new(MerkleNode::empty_node(
|
|
810
801
|
Some(absolute_file_or_directory),
|
|
811
802
|
Some("Directory is git ignored!".to_string()),
|
|
@@ -816,6 +807,7 @@ impl MerkleNode {
|
|
|
816
807
|
match entries {
|
|
817
808
|
Ok(_) => (),
|
|
818
809
|
Err(e) => {
|
|
810
|
+
tracing::error!("error reading directory: {}", e);
|
|
819
811
|
return Arc::new(RwLock::new(MerkleNode::empty_node(
|
|
820
812
|
Some(absolute_file_or_directory),
|
|
821
813
|
Some(e.to_string()),
|
|
@@ -848,6 +840,7 @@ impl MerkleNode {
|
|
|
848
840
|
);
|
|
849
841
|
}
|
|
850
842
|
Err(e) => {
|
|
843
|
+
tracing::error!("error reading directory: {}", e);
|
|
851
844
|
children.push(Arc::new(RwLock::new(MerkleNode::empty_node(
|
|
852
845
|
Some(absolute_file_or_directory),
|
|
853
846
|
Some(e.to_string()),
|
|
@@ -894,18 +887,10 @@ impl MerkleNode {
|
|
|
894
887
|
false => {}
|
|
895
888
|
}
|
|
896
889
|
|
|
897
|
-
// read the file_content to a buffer
|
|
898
|
-
let file_content = match tokio::fs::read(absolute_file_path).await {
|
|
899
|
-
Ok(content) => content,
|
|
900
|
-
Err(e) => {
|
|
901
|
-
return Err(format!("Could not read file! {}", e.to_string()));
|
|
902
|
-
}
|
|
903
|
-
};
|
|
904
|
-
|
|
905
890
|
// check if the file passes runtime checks.
|
|
906
891
|
match file_utils::is_good_file_runtime_check(
|
|
907
892
|
absolute_file_path,
|
|
908
|
-
&file_content,
|
|
893
|
+
// &file_content,
|
|
909
894
|
)
|
|
910
895
|
.await
|
|
911
896
|
{
|
|
@@ -915,15 +900,14 @@ impl MerkleNode {
|
|
|
915
900
|
}
|
|
916
901
|
}
|
|
917
902
|
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
e.to_string()
|
|
924
|
-
|
|
925
|
-
}
|
|
926
|
-
};
|
|
903
|
+
// read the file_content to a buffer
|
|
904
|
+
let file_content =
|
|
905
|
+
match file_utils::read_string_without_bom(absolute_file_path).await {
|
|
906
|
+
Ok(content) => content,
|
|
907
|
+
Err(e) => {
|
|
908
|
+
return Err(format!("Could not read file! {}", e.to_string()));
|
|
909
|
+
}
|
|
910
|
+
};
|
|
927
911
|
|
|
928
912
|
let file_hash = compute_hash(&file_content);
|
|
929
913
|
let node = MerkleNode {
|
|
@@ -950,11 +934,7 @@ impl MerkleNode {
|
|
|
950
934
|
.await
|
|
951
935
|
{
|
|
952
936
|
Ok(node) => node,
|
|
953
|
-
Err(e) =>
|
|
954
|
-
// println!("constructing error node. error: {}", e);
|
|
955
|
-
// println!("file_path: {:?}", file_path);
|
|
956
|
-
MerkleNode::empty_node(Some(absolute_file_path), Some(e))
|
|
957
|
-
}
|
|
937
|
+
Err(e) => MerkleNode::empty_node(Some(absolute_file_path), Some(e)),
|
|
958
938
|
};
|
|
959
939
|
|
|
960
940
|
node
|
|
@@ -979,15 +959,50 @@ impl MerkleNode {
|
|
|
979
959
|
|
|
980
960
|
async fn compute_branch_hash(children: &[MerkleNodePtr]) -> String {
|
|
981
961
|
let mut hasher = sha2::Sha256::new();
|
|
962
|
+
let mut names_and_hashes = vec![];
|
|
963
|
+
let mut non_zero_children = 0;
|
|
964
|
+
|
|
982
965
|
for child in children {
|
|
983
966
|
// check if it is an error node
|
|
984
967
|
let child_reader = child.read().await;
|
|
985
|
-
|
|
968
|
+
|
|
969
|
+
match &child_reader.node_type {
|
|
970
|
+
NodeType::File(file_name) => {
|
|
971
|
+
non_zero_children += 1;
|
|
972
|
+
names_and_hashes.push((file_name.clone(), child_reader.hash.clone()));
|
|
973
|
+
}
|
|
974
|
+
NodeType::Branch((file_name, _)) => {
|
|
975
|
+
let hash = child_reader.hash.clone();
|
|
976
|
+
if hash == "" {
|
|
977
|
+
continue;
|
|
978
|
+
}
|
|
979
|
+
|
|
980
|
+
non_zero_children += 1;
|
|
981
|
+
names_and_hashes.push((file_name.clone(), hash));
|
|
982
|
+
}
|
|
983
|
+
NodeType::ErrorNode(_) => {
|
|
984
|
+
continue;
|
|
985
|
+
}
|
|
986
|
+
}
|
|
987
|
+
}
|
|
988
|
+
|
|
989
|
+
// sort the list of names and hashes by the hashes!!
|
|
990
|
+
names_and_hashes
|
|
991
|
+
.sort_by(|a, b| a.1.to_lowercase().cmp(&b.1.to_lowercase()));
|
|
992
|
+
|
|
993
|
+
for (name, hash) in names_and_hashes {
|
|
994
|
+
if hash == "" {
|
|
986
995
|
continue;
|
|
987
996
|
}
|
|
997
|
+
hasher.update(hash);
|
|
998
|
+
}
|
|
988
999
|
|
|
989
|
-
|
|
1000
|
+
if non_zero_children == 0 {
|
|
1001
|
+
// this means that the branch is empty.
|
|
1002
|
+
// we should return an empty string.
|
|
1003
|
+
return "".to_string();
|
|
990
1004
|
}
|
|
1005
|
+
|
|
991
1006
|
let result = hasher.finalize();
|
|
992
1007
|
format!("{:x}", result)
|
|
993
1008
|
}
|
package/src/merkle_tree/test.rs
CHANGED
|
@@ -43,8 +43,9 @@ mod tests {
|
|
|
43
43
|
// let path = Path::new(&temp_dir_path);
|
|
44
44
|
|
|
45
45
|
// Test construct_merkle_tree() function
|
|
46
|
+
let new_set = std::collections::HashSet::<String>::new();
|
|
46
47
|
let tree =
|
|
47
|
-
MerkleTree::construct_merkle_tree(temp_dir_path.clone()).await;
|
|
48
|
+
MerkleTree::construct_merkle_tree(temp_dir_path.clone(), new_set, false).await;
|
|
48
49
|
let mut tree = match tree {
|
|
49
50
|
Ok(tree) => {
|
|
50
51
|
assert_eq!(tree.files.len(), 2);
|