@anysphere/file-service 0.0.0-fcf7dd4f → 0.0.0-ffd549b3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/Cargo.toml CHANGED
@@ -6,6 +6,11 @@ version = "0.0.0"
6
6
  [lib]
7
7
  crate-type = ["cdylib"]
8
8
 
9
+ [features]
10
+ default = ["windows-subsystem"]
11
+ windows-subsystem = []
12
+ debugfile = []
13
+
9
14
  [dependencies]
10
15
  # Default enable napi4 feature, see https://nodejs.org/api/n-api.html#node-api-version-matrix
11
16
  napi = { version = "2.12.2", default-features = false, features = ["napi4", "async", "tokio_rt"] }
@@ -22,6 +27,10 @@ tracing-subscriber = "0.3.17"
22
27
  tracing-appender = "0.2.2"
23
28
  binaryornot = "1.0.0"
24
29
  dunce = "1.0.1"
30
+ encoding_rs = "0.8.33"
31
+
32
+ [target.'cfg(not(target_os = "linux"))'.dependencies]
33
+ tracing-axiom = "0.4"
25
34
 
26
35
  [build-dependencies]
27
36
  napi-build = "2.0.1"
@@ -29,5 +38,6 @@ tonic-build = "0.9.2"
29
38
  anyhow = "1.0.75"
30
39
  glob = "0.3.0"
31
40
 
41
+
32
42
  [profile.release]
33
43
  lto = true
package/build.rs CHANGED
@@ -3,6 +3,8 @@ use std::path::Path;
3
3
  extern crate napi_build;
4
4
 
5
5
  fn main() -> Result<(), anyhow::Error> {
6
+ #[cfg(target_os = "windows")]
7
+ println!("cargo:rustc-cdylib-link-arg=/SUBSYSTEM:WINDOWS");
6
8
  napi_build::setup();
7
9
 
8
10
  // print the relative path.
package/index.d.ts CHANGED
@@ -5,8 +5,9 @@
5
5
 
6
6
  export class MerkleClient {
7
7
  constructor(absoluteRootDirectory: string)
8
- init(): Promise<void>
9
- computeMerkleTree(): Promise<void>
8
+ isTooBig(maxFiles: number, gitIgnoredFiles: Array<string>, isGitRepo: boolean): Promise<boolean>
9
+ init(gitIgnoredFiles: Array<string>, isGitRepo: boolean): Promise<void>
10
+ computeMerkleTree(gitIgnoredFiles: Array<string>, isGitRepo: boolean): Promise<void>
10
11
  updateFile(filePath: string): Promise<void>
11
12
  deleteFile(filePath: string): Promise<void>
12
13
  getSubtreeHash(relativePath: string): Promise<string>
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@anysphere/file-service",
3
- "version": "0.0.0-fcf7dd4f",
3
+ "version": "0.0.0-ffd549b3",
4
4
  "main": "index.js",
5
5
  "types": "index.d.ts",
6
6
  "napi": {
@@ -36,12 +36,12 @@
36
36
  "version": "napi version"
37
37
  },
38
38
  "optionalDependencies": {
39
- "@anysphere/file-service-win32-x64-msvc": "0.0.0-fcf7dd4f",
40
- "@anysphere/file-service-darwin-x64": "0.0.0-fcf7dd4f",
41
- "@anysphere/file-service-linux-x64-gnu": "0.0.0-fcf7dd4f",
42
- "@anysphere/file-service-darwin-arm64": "0.0.0-fcf7dd4f",
43
- "@anysphere/file-service-win32-arm64-msvc": "0.0.0-fcf7dd4f",
44
- "@anysphere/file-service-darwin-universal": "0.0.0-fcf7dd4f",
45
- "@anysphere/file-service-linux-arm64-gnu": "0.0.0-fcf7dd4f"
39
+ "@anysphere/file-service-win32-x64-msvc": "0.0.0-ffd549b3",
40
+ "@anysphere/file-service-darwin-x64": "0.0.0-ffd549b3",
41
+ "@anysphere/file-service-linux-x64-gnu": "0.0.0-ffd549b3",
42
+ "@anysphere/file-service-darwin-arm64": "0.0.0-ffd549b3",
43
+ "@anysphere/file-service-win32-arm64-msvc": "0.0.0-ffd549b3",
44
+ "@anysphere/file-service-darwin-universal": "0.0.0-ffd549b3",
45
+ "@anysphere/file-service-linux-arm64-gnu": "0.0.0-ffd549b3"
46
46
  }
47
47
  }
package/src/file_utils.rs CHANGED
@@ -5,6 +5,7 @@
5
5
  // 4. vscode.fs.stat
6
6
 
7
7
  use anyhow::Error;
8
+ use encoding_rs::UTF_8;
8
9
  use std::path::Path;
9
10
  use tokio::fs;
10
11
 
@@ -43,8 +44,90 @@ pub fn is_good_file(file_path: &Path) -> Result<(), Error> {
43
44
  _ => {}
44
45
  }
45
46
 
46
- match extension {
47
- "lock" | "bak" | "tmp" | "bin" | "exe" | "dll" | "so" | "lockb" => {
47
+ let bad_extensions = vec![
48
+ "lock",
49
+ "bak",
50
+ "tmp",
51
+ "bin",
52
+ "exe",
53
+ "dll",
54
+ "so",
55
+ "lockb",
56
+ "qwoff",
57
+ "isl",
58
+ "csv",
59
+ "pdf",
60
+ // add ms word, excel, powerpoint, etc.
61
+ "doc",
62
+ "docx",
63
+ "xls",
64
+ "xlsx",
65
+ "ppt",
66
+ "pptx",
67
+ "odt",
68
+ "ods",
69
+ "odp",
70
+ "odg",
71
+ "odf",
72
+ "sxw",
73
+ "sxc",
74
+ "sxi",
75
+ "sxd",
76
+ "sdc",
77
+ // add images
78
+ "jpg",
79
+ "jpeg",
80
+ "png",
81
+ "gif",
82
+ "bmp",
83
+ "tif",
84
+ // add audio
85
+ "mp3",
86
+ "wav",
87
+ "wma",
88
+ "ogg",
89
+ "flac",
90
+ "aac",
91
+ // add video
92
+ "mp4",
93
+ "mov",
94
+ "wmv",
95
+ "flv",
96
+ "avi",
97
+ // add archives
98
+ "zip",
99
+ "tar",
100
+ "gz",
101
+ "7z",
102
+ "rar",
103
+ "tgz",
104
+ "dmg",
105
+ "iso",
106
+ "cue",
107
+ "mdf",
108
+ "mds",
109
+ "vcd",
110
+ "toast",
111
+ "img",
112
+ "apk",
113
+ "msi",
114
+ "cab",
115
+ "tar.gz",
116
+ "tar.xz",
117
+ "tar.bz2",
118
+ "tar.lzma",
119
+ "tar.Z",
120
+ "tar.sz",
121
+ "lzma",
122
+ // add fonts
123
+ "ttf",
124
+ "otf",
125
+ "woff",
126
+ "woff2",
127
+ "eot",
128
+ ];
129
+ match bad_extensions.contains(&extension) {
130
+ true => {
48
131
  return Err(anyhow::anyhow!("File is just a lock file"));
49
132
  }
50
133
  _ => {}
@@ -93,7 +176,7 @@ pub fn is_good_file(file_path: &Path) -> Result<(), Error> {
93
176
  // implement the buffer above:
94
177
  pub async fn is_good_file_runtime_check(
95
178
  file_path: &Path,
96
- _buffer: &[u8],
179
+ // _buffer: &[u8],
97
180
  ) -> Result<(), Error> {
98
181
  match get_file_size(file_path).await {
99
182
  Ok(size) if size > 2 * 1024 * 1024 => {
@@ -103,13 +186,31 @@ pub async fn is_good_file_runtime_check(
103
186
  _ => {}
104
187
  }
105
188
 
106
- // if is_binary(file_path).context("Failed to check if file is binary")? {
107
- // return Err(anyhow::anyhow!("File is binary"));
108
- // }
189
+ // if is_binary(file_path).context("Failed to check if file is binary")? {
190
+ // return Err(anyhow::anyhow!("File is binary"));
191
+ // }
109
192
 
110
193
  Ok(())
111
194
  }
112
195
 
196
+ pub async fn read_string_without_bom(
197
+ file_path: &Path,
198
+ ) -> Result<String, Error> {
199
+ let file_buffer = match fs::read(file_path).await {
200
+ Ok(buffer) => buffer,
201
+ Err(e) => {
202
+ return Err(anyhow::anyhow!(
203
+ "Failed to read file buffer: {}",
204
+ e.to_string()
205
+ ))
206
+ }
207
+ };
208
+
209
+ let (cow, _) = UTF_8.decode_with_bom_removal(&file_buffer);
210
+
211
+ Ok(cow.to_string())
212
+ }
213
+
113
214
  pub fn as_relative_path(
114
215
  base_path: &Path,
115
216
  file_path: &Path,
@@ -167,25 +268,40 @@ mod tests {
167
268
  temp_file.write_all(b"Hello, world!").await.unwrap();
168
269
  let buffer = fs::read(&temp_file_path).await.unwrap();
169
270
  assert_eq!(
170
- is_good_file_runtime_check(&temp_file_path, &buffer)
171
- .await
172
- .is_ok(),
271
+ is_good_file_runtime_check(&temp_file_path).await.is_ok(),
173
272
  true
174
273
  );
175
274
  temp_dir.close().unwrap();
176
275
 
276
+ // let temp_dir = tempfile::tempdir().unwrap();
277
+ // let temp_file_path = temp_dir.path().join("test_file");
278
+ // let mut temp_file = fs::File::create(&temp_file_path).await.unwrap();
279
+ // temp_file.write_all(&[0, 159, 146, 150]).await.unwrap(); // Invalid UTF-8 sequence
280
+ // let buffer = fs::read(&temp_file_path).await.unwrap();
281
+ // assert_eq!(
282
+ // is_good_file_runtime_check(&temp_file_path).await.is_err(),
283
+ // true
284
+ // );
285
+ // temp_dir.close().unwrap();
286
+ }
287
+
288
+ #[tokio::test]
289
+ async fn test_bom_file() {
290
+ const BOM: [u8; 3] = [0xEF, 0xBB, 0xBF];
291
+ const CONTENT: &str = "Hello, world!";
292
+
293
+ // Write this to a temp file
177
294
  let temp_dir = tempfile::tempdir().unwrap();
178
295
  let temp_file_path = temp_dir.path().join("test_file");
179
296
  let mut temp_file = fs::File::create(&temp_file_path).await.unwrap();
180
- temp_file.write_all(&[0, 159, 146, 150]).await.unwrap(); // Invalid UTF-8 sequence
181
- let buffer = fs::read(&temp_file_path).await.unwrap();
182
- assert_eq!(
183
- is_good_file_runtime_check(&temp_file_path, &buffer)
184
- .await
185
- .is_err(),
186
- true
187
- );
188
- temp_dir.close().unwrap();
297
+ temp_file.write_all(&BOM).await.unwrap();
298
+ temp_file.write_all(CONTENT.as_bytes()).await.unwrap();
299
+
300
+ // expect that we read the file with tokio as the CONTENT
301
+ let file_contents = read_string_without_bom(&temp_file_path).await.unwrap();
302
+
303
+ // Check string equality of CONTENT (&str) to file_contents (String)
304
+ assert_eq!(CONTENT, file_contents);
189
305
  }
190
306
 
191
307
  #[test]
package/src/git_utils.rs CHANGED
@@ -1,4 +1,5 @@
1
1
  use std::collections::HashSet;
2
+ use std::path::MAIN_SEPARATOR_STR;
2
3
  use std::process::Command;
3
4
 
4
5
  pub fn list_ignored_files_and_directories(
@@ -19,9 +20,7 @@ pub fn list_ignored_files_and_directories(
19
20
  ],
20
21
  // FIXME(sualeh): this is super sketchy and might totally break in like a bazillion ways. i dont like it.
21
22
  vec![
22
- "sh",
23
- "-c",
24
- "git submodule foreach --quiet 'git ls-files --others --ignored --exclude-standard --directory --no-empty-directory | sed \"s|^|$path/|\"'",
23
+ "git submodule foreach --quiet 'git -C $toplevel/$path ls-files --others --ignored --exclude-standard --directory --no-empty-directory | (while read line; do echo $path/$line; done)'",
25
24
  ],
26
25
  ];
27
26
 
@@ -36,9 +35,12 @@ pub fn list_ignored_files_and_directories(
36
35
  .lines()
37
36
  .filter(|line| !line.is_empty())
38
37
  .map(|line| {
38
+ let line = line.replace("/", MAIN_SEPARATOR_STR);
39
+
39
40
  if should_return_absolute_paths {
40
41
  let mut path = std::path::PathBuf::from(workspace_root_path);
41
- path.push(line);
42
+
43
+ path.push(line.clone());
42
44
 
43
45
  match path.canonicalize() {
44
46
  Ok(canonical_path) => {
package/src/lib.rs CHANGED
@@ -1,16 +1,15 @@
1
+ #![windows_subsystem = "windows"]
1
2
  #![deny(clippy::all)]
2
3
  #![deny(unsafe_op_in_unsafe_fn)]
3
4
  pub mod file_utils;
4
- pub mod git_utils;
5
+ pub mod logger;
5
6
  pub mod merkle_tree;
6
7
 
7
- use std::vec;
8
+ use std::{collections::HashSet, vec};
8
9
 
9
- use merkle_tree::{LocalConstruction, MerkleTree};
10
- use tracing::{info, Level};
11
- use tracing_appender::rolling::{RollingFileAppender, Rotation};
12
- use tracing_subscriber::fmt;
13
10
  use anyhow::Context;
11
+ use merkle_tree::{LocalConstruction, MerkleTree};
12
+ use tracing::{debug, info};
14
13
 
15
14
  #[macro_use]
16
15
  extern crate napi_derive;
@@ -19,30 +18,14 @@ extern crate napi_derive;
19
18
  pub struct MerkleClient {
20
19
  tree: MerkleTree,
21
20
  absolute_root_directory: String,
22
- _guard: tracing_appender::non_blocking::WorkerGuard,
23
- }
24
-
25
- pub fn init_logger() -> tracing_appender::non_blocking::WorkerGuard {
26
- let file_appender =
27
- RollingFileAppender::new(Rotation::NEVER, "./", "rust_log.txt");
28
- let (non_blocking, _guard) = tracing_appender::non_blocking(file_appender);
29
- let subscriber = fmt::Subscriber::builder()
30
- .with_max_level(Level::TRACE)
31
- .with_writer(non_blocking)
32
- .with_ansi(false)
33
- .with_line_number(true)
34
- .finish();
35
-
36
- let _ = tracing::subscriber::set_global_default(subscriber);
37
-
38
- _guard
21
+ _guard: Option<logger::GuardType>,
39
22
  }
40
23
 
41
24
  #[napi]
42
25
  impl MerkleClient {
43
26
  #[napi(constructor)]
44
27
  pub fn new(absolute_root_directory: String) -> MerkleClient {
45
- let _guard = init_logger();
28
+ let _guard = logger::init_logger();
46
29
 
47
30
  // let canonical_root_directory = std::path::Path::new(&absolute_root_directory);
48
31
  // use dunce::canonicalize;
@@ -61,14 +44,74 @@ impl MerkleClient {
61
44
  }
62
45
  }
63
46
 
47
+ #[napi]
48
+ pub async fn is_too_big(
49
+ &self,
50
+ max_files: i32,
51
+ git_ignored_files: Vec<String>,
52
+ is_git_repo: bool,
53
+ ) -> bool {
54
+ let git_ignored_set =
55
+ HashSet::<String>::from_iter(git_ignored_files.into_iter());
56
+ let mut num_files = 0;
57
+ let mut dirs_to_check = vec![self.absolute_root_directory.clone()];
58
+
59
+ while let Some(dir) = dirs_to_check.pop() {
60
+ info!("dir: {:?}", dir);
61
+ let mut entries = match tokio::fs::read_dir(&dir).await {
62
+ Ok(entries) => entries,
63
+ Err(_) => continue,
64
+ };
65
+ if num_files > max_files {
66
+ return true;
67
+ }
68
+
69
+
70
+ while let Some(entry) = entries.next_entry().await.unwrap_or(None) {
71
+ let path = entry.path();
72
+ info!("entry: {:?}", path);
73
+ let path_str = match path.to_str() {
74
+ Some(path_str) => path_str.to_string(),
75
+ None => continue,
76
+ };
77
+
78
+ if git_ignored_set.contains(&path_str) {
79
+ continue;
80
+ }
81
+
82
+ match entry.file_type().await {
83
+ Ok(file_type) => {
84
+ if file_type.is_dir() {
85
+ dirs_to_check.push(path_str);
86
+ }
87
+
88
+ if file_type.is_file() {
89
+ num_files += 1;
90
+ }
91
+ }
92
+ Err(_) => continue,
93
+ }
94
+
95
+ }
96
+ }
97
+ num_files > max_files
98
+ }
99
+
64
100
  #[napi]
65
- pub async unsafe fn init(&mut self) -> Result<(), napi::Error> {
101
+ pub async unsafe fn init(
102
+ &mut self,
103
+ git_ignored_files: Vec<String>,
104
+ is_git_repo: bool,
105
+ ) -> Result<(), napi::Error> {
66
106
  // 1. compute the merkle tree
67
107
  // 2. update the backend
68
108
  // 3. sync with the remote
69
109
  info!("Merkle tree compute started!");
110
+ info!("Root directory: {:?}", self.absolute_root_directory);
70
111
  unsafe {
71
- self.compute_merkle_tree().await?;
112
+ self
113
+ .compute_merkle_tree(git_ignored_files, is_git_repo)
114
+ .await?;
72
115
  }
73
116
 
74
117
  Ok(())
@@ -81,9 +124,24 @@ impl MerkleClient {
81
124
  #[napi]
82
125
  pub async unsafe fn compute_merkle_tree(
83
126
  &mut self,
127
+ git_ignored_files: Vec<String>,
128
+ is_git_repo: bool,
84
129
  ) -> Result<(), napi::Error> {
85
- let t =
86
- MerkleTree::construct_merkle_tree(self.absolute_root_directory.clone()).await;
130
+ // make the git ignored files into a hash set
131
+ let mut git_ignored_set = HashSet::from_iter(git_ignored_files.into_iter());
132
+
133
+ // if the hashset itself contains the root directory, then we should remove it.
134
+ // this is because the root directory is not a file, and we don't want to ignore it.
135
+ if git_ignored_set.contains(&self.absolute_root_directory) {
136
+ git_ignored_set.remove(&self.absolute_root_directory);
137
+ }
138
+
139
+ let t = MerkleTree::construct_merkle_tree(
140
+ self.absolute_root_directory.clone(),
141
+ git_ignored_set,
142
+ is_git_repo,
143
+ )
144
+ .await;
87
145
 
88
146
  match t {
89
147
  Ok(tree) => {
@@ -112,29 +170,44 @@ impl MerkleClient {
112
170
  &self,
113
171
  relative_path: String,
114
172
  ) -> Result<String, napi::Error> {
173
+ debug!("get_subtree_hash: relative_path: {:?}", relative_path);
115
174
 
116
- let relative_path_without_leading_slash = match relative_path.strip_prefix('.') {
175
+ let relative_path_without_leading_slash = match relative_path
176
+ .strip_prefix('.')
177
+ {
117
178
  Some(path) => path.strip_prefix(std::path::MAIN_SEPARATOR).unwrap_or(""),
118
179
  None => relative_path.as_str(),
119
180
  };
181
+ debug!(
182
+ "relative_path_without_leading_slash: {:?}",
183
+ relative_path_without_leading_slash
184
+ );
120
185
 
121
186
  let absolute_path = if !relative_path_without_leading_slash.is_empty() {
122
- std::path::Path::new(&self.absolute_root_directory).join(relative_path_without_leading_slash)
187
+ std::path::Path::new(&self.absolute_root_directory)
188
+ .join(relative_path_without_leading_slash)
123
189
  } else {
124
190
  std::path::Path::new(&self.absolute_root_directory).to_path_buf()
125
191
  };
126
192
 
193
+ debug!("absolute_path: {:?}", absolute_path);
194
+
127
195
  let absolute_path_string = match absolute_path.to_str() {
128
196
  Some(path) => path.to_string(),
129
197
  None => {
130
198
  return Err(napi::Error::new(
131
199
  napi::Status::Unknown,
132
- format!("some string error")
200
+ format!("some string error"),
133
201
  ))
134
202
  }
135
203
  };
136
204
 
137
- let hash = self.tree.get_subtree_hash(absolute_path_string.as_str()).await;
205
+ debug!("absolute_path_string: {:?}", absolute_path_string);
206
+
207
+ let hash = self
208
+ .tree
209
+ .get_subtree_hash(absolute_path_string.as_str())
210
+ .await;
138
211
 
139
212
  match hash {
140
213
  Ok(hash) => Ok(hash),
@@ -153,7 +226,7 @@ impl MerkleClient {
153
226
  Ok(num) => Ok(num),
154
227
  Err(e) => Err(napi::Error::new(
155
228
  napi::Status::Unknown,
156
- format!("Error in get_num_embeddable_files: {:?}", e)
229
+ format!("Error in get_num_embeddable_files: {:?}", e),
157
230
  )),
158
231
  }
159
232
  }
@@ -175,7 +248,7 @@ impl MerkleClient {
175
248
  Ok(num) => Ok(num),
176
249
  Err(e) => Err(napi::Error::new(
177
250
  napi::Status::Unknown,
178
- format!("Error in get_num_embeddable_files_in_subtree: {:?}", e)
251
+ format!("Error in get_num_embeddable_files_in_subtree: {:?}", e),
179
252
  )),
180
253
  }
181
254
  }
@@ -188,7 +261,7 @@ impl MerkleClient {
188
261
  Ok(files) => Ok(files),
189
262
  Err(e) => Err(napi::Error::new(
190
263
  napi::Status::Unknown,
191
- format!("Error in get_all_files: {:?}", e)
264
+ format!("Error in get_all_files: {:?}", e),
192
265
  )),
193
266
  }
194
267
  }
package/src/logger.rs ADDED
@@ -0,0 +1,55 @@
1
+ use tracing::{info, subscriber, Level};
2
+ use tracing_appender::non_blocking::WorkerGuard;
3
+ use tracing_appender::rolling::{RollingFileAppender, Rotation};
4
+ use tracing_subscriber::fmt;
5
+ use tracing_subscriber::prelude::*;
6
+
7
+ pub enum GuardType {
8
+ #[cfg(all(not(feature = "debugfile"), not(target_os = "linux")))]
9
+ Guard(tracing_axiom::Guard),
10
+ WorkerGuard(tracing_appender::non_blocking::WorkerGuard),
11
+ }
12
+
13
+ pub fn init_logger() -> Option<GuardType> {
14
+ #[cfg(feature = "debugfile")]
15
+ let _guard = {
16
+ let file_appender =
17
+ RollingFileAppender::new(Rotation::NEVER, "./", "rust_log.txt");
18
+ let (non_blocking, _guard) = tracing_appender::non_blocking(file_appender);
19
+ let subscriber = fmt::Subscriber::builder()
20
+ .with_max_level(Level::TRACE)
21
+ .with_writer(non_blocking)
22
+ .with_ansi(false)
23
+ .with_line_number(true)
24
+ .finish();
25
+
26
+ let _ = tracing::subscriber::set_global_default(subscriber);
27
+
28
+ Some(GuardType::WorkerGuard(_guard))
29
+ };
30
+
31
+ #[cfg(all(not(feature = "debugfile"), not(target_os = "linux")))]
32
+ let _guard = {
33
+ let (axiom_layer, _guard) = tracing_axiom::builder()
34
+ .with_token("xaat-a51088e6-7889-41c0-b440-cfd4601acdd7")
35
+ .with_dataset("local-indexing")
36
+ .layer()
37
+ .ok()?;
38
+ // let fmt_layer = fmt::layer().with_level(true).with_ansi(false).with_line_number(true);
39
+
40
+ let _ = tracing_subscriber::registry()
41
+ .with(axiom_layer)
42
+ .try_init()
43
+ .ok()?;
44
+ // let _ = tracing::subscriber::set_global_default(subscriber);
45
+
46
+ info!("Tracing initialized! in rust");
47
+
48
+ Some(GuardType::Guard(_guard))
49
+ };
50
+
51
+ #[cfg(all(not(feature = "debugfile"), target_os = "linux"))]
52
+ let _guard = { None };
53
+
54
+ _guard
55
+ }
@@ -1,4 +1,3 @@
1
- use crate::git_utils;
2
1
  use crate::merkle_tree::{
3
2
  File, MerkleNode, MerkleNodePtr, NodeType, PinnedFuture,
4
3
  };
@@ -10,11 +9,18 @@ use tonic::async_trait;
10
9
 
11
10
  #[async_trait]
12
11
  impl LocalConstruction for MerkleTree {
12
+ #[tracing::instrument]
13
13
  async fn new(
14
14
  root_directory: Option<String>,
15
15
  ) -> Result<MerkleTree, anyhow::Error> {
16
+ let git_ignored_files = HashSet::<String>::new();
16
17
  if let Some(root_directory) = root_directory {
17
- let n = MerkleTree::construct_merkle_tree(root_directory).await;
18
+ let n = MerkleTree::construct_merkle_tree(
19
+ root_directory,
20
+ git_ignored_files,
21
+ false,
22
+ )
23
+ .await;
18
24
  return n;
19
25
  }
20
26
 
@@ -30,6 +36,8 @@ impl LocalConstruction for MerkleTree {
30
36
  /// 4. return merkle tree
31
37
  async fn construct_merkle_tree(
32
38
  absolute_path_to_root_directory: String,
39
+ git_ignored_files_and_dirs: HashSet<String>,
40
+ is_git_repo: bool,
33
41
  ) -> Result<MerkleTree, anyhow::Error> {
34
42
  let path = PathBuf::from(absolute_path_to_root_directory.clone());
35
43
  if !path.exists() {
@@ -38,28 +46,31 @@ impl LocalConstruction for MerkleTree {
38
46
  }
39
47
 
40
48
  // 1. get all the gitignored files
41
- let git_ignored_files_and_dirs =
42
- match git_utils::list_ignored_files_and_directories(
43
- absolute_path_to_root_directory.as_str(),
44
- true,
45
- ) {
46
- Ok(git_ignored) => git_ignored,
47
- Err(_e) => HashSet::new(),
48
- };
49
+ // let git_ignored_files_and_dirs =
50
+ // match git_utils::list_ignored_files_and_directories(
51
+ // absolute_path_to_root_directory.as_str(),
52
+ // true,
53
+ // ) {
54
+ // Ok(git_ignored) => git_ignored,
55
+ // Err(_e) => HashSet::new(),
56
+ // };
49
57
 
50
58
  let root_node = MerkleNode::new(
51
59
  path,
52
60
  None,
53
61
  &git_ignored_files_and_dirs,
54
62
  absolute_path_to_root_directory.as_str(),
63
+ is_git_repo,
55
64
  )
56
65
  .await;
66
+
57
67
  let mut mt = MerkleTree {
58
68
  root: root_node,
59
69
  files: BTreeMap::new(),
60
70
  root_path: absolute_path_to_root_directory,
61
71
  cursor: None,
62
- git_ignored_files_and_dirs: git_ignored_files_and_dirs,
72
+ git_ignored_files_and_dirs,
73
+ is_git_repo,
63
74
  };
64
75
 
65
76
  // we now iterate over all the nodes and add them to the hashmap
@@ -72,7 +83,6 @@ impl LocalConstruction for MerkleTree {
72
83
  let node_reader = node.read().await;
73
84
  match &node_reader.node_type {
74
85
  NodeType::Branch(n) => {
75
- tracing::info!("Branch: {:?}", n.0);
76
86
  let children = &n.1;
77
87
  files.insert(n.0.clone(), File { node: node.clone() });
78
88
  for child in children {
@@ -99,8 +109,7 @@ impl LocalConstruction for MerkleTree {
99
109
 
100
110
  add_nodes_to_hashmap(&mt.root, &mut mt.files).await;
101
111
 
102
- tracing::info!("Merkle tree compute finished!");
103
- tracing::info!("Merkle tree: {}", mt);
112
+ tracing::info!("number of files in the tree: {}", mt.files.len());
104
113
 
105
114
  Ok(mt)
106
115
  }
@@ -144,6 +153,7 @@ impl LocalConstruction for MerkleTree {
144
153
  Ok(())
145
154
  }
146
155
 
156
+ #[tracing::instrument]
147
157
  async fn delete_file(
148
158
  &mut self,
149
159
  file_path: String,
@@ -1,5 +1,3 @@
1
- use crate::git_utils;
2
-
3
1
  use super::file_utils;
4
2
  use sha2::Digest;
5
3
  use std::collections::{BTreeMap, HashSet};
@@ -8,18 +6,21 @@ use std::vec;
8
6
  use std::{fs, path::Path, sync::Arc};
9
7
  use tokio::sync::RwLock;
10
8
  use tonic::async_trait;
11
- use tracing::info;
9
+ use tracing::{debug, info};
10
+
12
11
  pub mod local_construction;
13
12
  pub mod test;
14
13
 
15
14
  pub type MerkleNodePtr = Arc<RwLock<MerkleNode>>;
16
15
 
16
+ #[derive(Debug)]
17
17
  pub struct MerkleTree {
18
18
  root_path: String,
19
19
  root: MerkleNodePtr,
20
20
  files: BTreeMap<String, File>,
21
21
  cursor: Option<usize>,
22
22
  git_ignored_files_and_dirs: HashSet<String>,
23
+ is_git_repo: bool,
23
24
  }
24
25
 
25
26
  #[derive(Debug)]
@@ -63,6 +64,8 @@ pub trait LocalConstruction {
63
64
 
64
65
  async fn construct_merkle_tree(
65
66
  root_directory: String,
67
+ git_ignored_files_and_dirs: HashSet<String>,
68
+ is_git_repo: bool,
66
69
  ) -> Result<MerkleTree, anyhow::Error>;
67
70
 
68
71
  async fn update_file(
@@ -97,6 +100,7 @@ impl MerkleTree {
97
100
  root_path: "".to_string(),
98
101
  cursor: None,
99
102
  git_ignored_files_and_dirs: HashSet::new(),
103
+ is_git_repo: false,
100
104
  }
101
105
  }
102
106
 
@@ -104,6 +108,8 @@ impl MerkleTree {
104
108
  &self,
105
109
  absolute_path: &str,
106
110
  ) -> Result<String, anyhow::Error> {
111
+ debug!("get_subtree_hash: absolute_path: {:?}", absolute_path);
112
+
107
113
  let node = match self.files.get(absolute_path) {
108
114
  Some(file) => file.node.clone(),
109
115
  None => {
@@ -119,10 +125,7 @@ impl MerkleTree {
119
125
  let node_reader = node.read().await;
120
126
  let node_hash = node_reader.hash.clone();
121
127
 
122
- info!(
123
- "get_subtree_hash for path: {}, node_hash: {}",
124
- absolute_path, node_hash
125
- );
128
+ debug!("node_hash: {:?}", node_hash);
126
129
 
127
130
  Ok(node_hash)
128
131
  }
@@ -287,6 +290,12 @@ impl MerkleTree {
287
290
  ) -> Result<Vec<String>, anyhow::Error> {
288
291
  let mut files = Vec::new();
289
292
 
293
+ // 1. should check that this absolute path is actually a directory.
294
+ let file_node = self.files.get(absolute_path);
295
+ if file_node.is_none() {
296
+ return Err(anyhow::anyhow!("Could not find directory the in tree!"));
297
+ }
298
+
290
299
  for (file_path, f) in &self.files {
291
300
  if !file_path.contains(absolute_path) {
292
301
  continue;
@@ -313,16 +322,11 @@ impl MerkleTree {
313
322
  &self,
314
323
  absolute_path: &str,
315
324
  ) -> Result<Vec<String>, anyhow::Error> {
316
- info!("get_spline called with absolute_path: {}", absolute_path);
317
325
  let mut files = Vec::new();
318
326
 
319
327
  let current_node = match self.files.get(absolute_path) {
320
- Some(node) => {
321
- info!("Found node for absolute_path: {}", absolute_path);
322
- node.node.clone()
323
- }
328
+ Some(node) => node.node.clone(),
324
329
  None => {
325
- info!("File not found for absolute_path: {}", absolute_path);
326
330
  return Err(anyhow::anyhow!("File not found: {}", absolute_path));
327
331
  }
328
332
  };
@@ -333,7 +337,6 @@ impl MerkleTree {
333
337
  while let Some(node) = stack.pop() {
334
338
  let parent = node.read().await.parent.clone();
335
339
  if let Some(parent) = parent {
336
- info!("Adding parent hash to files vector");
337
340
  {
338
341
  let parent_node = parent.read().await;
339
342
  match &parent_node.node_type {
@@ -352,7 +355,6 @@ impl MerkleTree {
352
355
  stack.push(parent);
353
356
  }
354
357
  }
355
- info!("Returning files vector with {} elements", files.len());
356
358
  Ok(files)
357
359
  }
358
360
 
@@ -401,6 +403,7 @@ impl MerkleTree {
401
403
  Some(ancestor.clone()),
402
404
  &self.git_ignored_files_and_dirs,
403
405
  &absolute_root_path.as_str(),
406
+ self.is_git_repo,
404
407
  )
405
408
  .await;
406
409
  ancestor.write().await.attach_child(new_node.clone()).await;
@@ -418,6 +421,7 @@ impl MerkleTree {
418
421
  Some(ancestor.clone()),
419
422
  &self.git_ignored_files_and_dirs,
420
423
  &absolute_root_path.as_str(),
424
+ self.is_git_repo,
421
425
  )
422
426
  .await;
423
427
 
@@ -705,13 +709,14 @@ impl MerkleNode {
705
709
  parent: ParentPtr,
706
710
  ignored_files: &IgnoredFiles,
707
711
  absolute_root_path: &str,
712
+ is_git_repo: bool,
708
713
  ) -> MerkleNodePtr {
709
- // check if the root is a git directory.
710
- let is_git_repo =
711
- match git_utils::is_git_directory(absolute_root_path).await {
712
- Ok(is_git_repo) => is_git_repo,
713
- Err(e) => false,
714
- };
714
+ // // check if the root is a git directory.
715
+ // let is_git_repo =
716
+ // match git_utils::is_git_directory(absolute_root_path).await {
717
+ // Ok(is_git_repo) => is_git_repo,
718
+ // Err(_e) => false,
719
+ // };
715
720
  let bypass_git = !is_git_repo;
716
721
 
717
722
  MerkleNode::construct_node(
@@ -724,25 +729,20 @@ impl MerkleNode {
724
729
  .await
725
730
  }
726
731
 
732
+ // #[tracing::instrument]
727
733
  async fn new(
728
734
  absolute_file_or_directory: PathBuf,
729
735
  parent: ParentPtr,
730
736
  ignored_files: &IgnoredFiles,
731
737
  absolute_root_path: &str,
738
+ is_git_repo: bool,
732
739
  ) -> MerkleNodePtr {
733
- // check if the root is a git directory.
734
- let is_git_repo =
735
- match git_utils::is_git_directory(absolute_root_path).await {
736
- Ok(is_git_repo) => is_git_repo,
737
- Err(_e) => false,
738
- };
739
740
  let bypass_git = !is_git_repo;
740
741
 
741
742
  info!(
742
743
  "constructing node for absolute_file_or_directory: {:?}",
743
744
  absolute_file_or_directory
744
745
  );
745
- info!("bypass_git: {}, is_git_repo: {}", bypass_git, is_git_repo);
746
746
 
747
747
  MerkleNode::construct_node(
748
748
  Path::new(&absolute_file_or_directory),
@@ -771,6 +771,7 @@ impl MerkleNode {
771
771
  Box::pin(async move {
772
772
  // check if it is a file
773
773
  let path_str = absolute_file_or_directory.to_str().unwrap().to_string();
774
+
774
775
  if absolute_file_or_directory.is_file() {
775
776
  return Arc::new(RwLock::new(
776
777
  MerkleNode::construct_file_node_or_error_node(
@@ -795,6 +796,7 @@ impl MerkleNode {
795
796
  let is_git_ignored_dir = ignored_files.contains(&path_str);
796
797
 
797
798
  if is_git_ignored_dir && !bypass_git {
799
+ tracing::info!("skipping directory: {}", path_str);
798
800
  return Arc::new(RwLock::new(MerkleNode::empty_node(
799
801
  Some(absolute_file_or_directory),
800
802
  Some("Directory is git ignored!".to_string()),
@@ -805,6 +807,7 @@ impl MerkleNode {
805
807
  match entries {
806
808
  Ok(_) => (),
807
809
  Err(e) => {
810
+ tracing::error!("error reading directory: {}", e);
808
811
  return Arc::new(RwLock::new(MerkleNode::empty_node(
809
812
  Some(absolute_file_or_directory),
810
813
  Some(e.to_string()),
@@ -837,6 +840,7 @@ impl MerkleNode {
837
840
  );
838
841
  }
839
842
  Err(e) => {
843
+ tracing::error!("error reading directory: {}", e);
840
844
  children.push(Arc::new(RwLock::new(MerkleNode::empty_node(
841
845
  Some(absolute_file_or_directory),
842
846
  Some(e.to_string()),
@@ -883,18 +887,10 @@ impl MerkleNode {
883
887
  false => {}
884
888
  }
885
889
 
886
- // read the file_content to a buffer
887
- let file_content = match tokio::fs::read(absolute_file_path).await {
888
- Ok(content) => content,
889
- Err(e) => {
890
- return Err(format!("Could not read file! {}", e.to_string()));
891
- }
892
- };
893
-
894
890
  // check if the file passes runtime checks.
895
891
  match file_utils::is_good_file_runtime_check(
896
892
  absolute_file_path,
897
- &file_content,
893
+ // &file_content,
898
894
  )
899
895
  .await
900
896
  {
@@ -904,15 +900,14 @@ impl MerkleNode {
904
900
  }
905
901
  }
906
902
 
907
- let file_content = match std::str::from_utf8(&file_content) {
908
- Ok(content) => content,
909
- Err(e) => {
910
- return Err(format!(
911
- "UTF8 Failure. Could not convert file content to string! {}",
912
- e.to_string()
913
- ));
914
- }
915
- };
903
+ // read the file_content to a buffer
904
+ let file_content =
905
+ match file_utils::read_string_without_bom(absolute_file_path).await {
906
+ Ok(content) => content,
907
+ Err(e) => {
908
+ return Err(format!("Could not read file! {}", e.to_string()));
909
+ }
910
+ };
916
911
 
917
912
  let file_hash = compute_hash(&file_content);
918
913
  let node = MerkleNode {
@@ -939,11 +934,7 @@ impl MerkleNode {
939
934
  .await
940
935
  {
941
936
  Ok(node) => node,
942
- Err(e) => {
943
- // println!("constructing error node. error: {}", e);
944
- // println!("file_path: {:?}", file_path);
945
- MerkleNode::empty_node(Some(absolute_file_path), Some(e))
946
- }
937
+ Err(e) => MerkleNode::empty_node(Some(absolute_file_path), Some(e)),
947
938
  };
948
939
 
949
940
  node
@@ -1003,7 +994,6 @@ impl MerkleNode {
1003
994
  if hash == "" {
1004
995
  continue;
1005
996
  }
1006
- info!("name: {}, hash: {}", name, hash);
1007
997
  hasher.update(hash);
1008
998
  }
1009
999
 
@@ -43,8 +43,9 @@ mod tests {
43
43
  // let path = Path::new(&temp_dir_path);
44
44
 
45
45
  // Test construct_merkle_tree() function
46
+ let new_set = std::collections::HashSet::<String>::new();
46
47
  let tree =
47
- MerkleTree::construct_merkle_tree(temp_dir_path.clone()).await;
48
+ MerkleTree::construct_merkle_tree(temp_dir_path.clone(), new_set, false).await;
48
49
  let mut tree = match tree {
49
50
  Ok(tree) => {
50
51
  assert_eq!(tree.files.len(), 2);