@anysphere/file-service 0.0.0-b6f39ec9 → 0.0.0-b8ef5486

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/Cargo.toml CHANGED
@@ -6,6 +6,11 @@ version = "0.0.0"
6
6
  [lib]
7
7
  crate-type = ["cdylib"]
8
8
 
9
+ [features]
10
+ default = ["windows-subsystem"]
11
+ windows-subsystem = []
12
+ debugfile = []
13
+
9
14
  [dependencies]
10
15
  # Default enable napi4 feature, see https://nodejs.org/api/n-api.html#node-api-version-matrix
11
16
  napi = { version = "2.12.2", default-features = false, features = ["napi4", "async", "tokio_rt"] }
@@ -22,6 +27,10 @@ tracing-subscriber = "0.3.17"
22
27
  tracing-appender = "0.2.2"
23
28
  binaryornot = "1.0.0"
24
29
  dunce = "1.0.1"
30
+ encoding_rs = "0.8.33"
31
+
32
+ [target.'cfg(not(target_os = "linux"))'.dependencies]
33
+ tracing-axiom = "0.4"
25
34
 
26
35
  [build-dependencies]
27
36
  napi-build = "2.0.1"
@@ -29,5 +38,6 @@ tonic-build = "0.9.2"
29
38
  anyhow = "1.0.75"
30
39
  glob = "0.3.0"
31
40
 
41
+
32
42
  [profile.release]
33
43
  lto = true
package/build.rs CHANGED
@@ -3,6 +3,8 @@ use std::path::Path;
3
3
  extern crate napi_build;
4
4
 
5
5
  fn main() -> Result<(), anyhow::Error> {
6
+ #[cfg(target_os = "windows")]
7
+ println!("cargo:rustc-cdylib-link-arg=/SUBSYSTEM:WINDOWS");
6
8
  napi_build::setup();
7
9
 
8
10
  // print the relative path.
package/index.d.ts CHANGED
@@ -5,8 +5,8 @@
5
5
 
6
6
  export class MerkleClient {
7
7
  constructor(absoluteRootDirectory: string)
8
- init(): Promise<void>
9
- computeMerkleTree(): Promise<void>
8
+ init(gitIgnoredFiles: Array<string>, isGitRepo: boolean): Promise<void>
9
+ computeMerkleTree(gitIgnoredFiles: Array<string>, isGitRepo: boolean): Promise<void>
10
10
  updateFile(filePath: string): Promise<void>
11
11
  deleteFile(filePath: string): Promise<void>
12
12
  getSubtreeHash(relativePath: string): Promise<string>
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@anysphere/file-service",
3
- "version": "0.0.0-b6f39ec9",
3
+ "version": "0.0.0-b8ef5486",
4
4
  "main": "index.js",
5
5
  "types": "index.d.ts",
6
6
  "napi": {
@@ -36,12 +36,12 @@
36
36
  "version": "napi version"
37
37
  },
38
38
  "optionalDependencies": {
39
- "@anysphere/file-service-win32-x64-msvc": "0.0.0-b6f39ec9",
40
- "@anysphere/file-service-darwin-x64": "0.0.0-b6f39ec9",
41
- "@anysphere/file-service-linux-x64-gnu": "0.0.0-b6f39ec9",
42
- "@anysphere/file-service-darwin-arm64": "0.0.0-b6f39ec9",
43
- "@anysphere/file-service-win32-arm64-msvc": "0.0.0-b6f39ec9",
44
- "@anysphere/file-service-darwin-universal": "0.0.0-b6f39ec9",
45
- "@anysphere/file-service-linux-arm64-gnu": "0.0.0-b6f39ec9"
39
+ "@anysphere/file-service-win32-x64-msvc": "0.0.0-b8ef5486",
40
+ "@anysphere/file-service-darwin-x64": "0.0.0-b8ef5486",
41
+ "@anysphere/file-service-linux-x64-gnu": "0.0.0-b8ef5486",
42
+ "@anysphere/file-service-darwin-arm64": "0.0.0-b8ef5486",
43
+ "@anysphere/file-service-win32-arm64-msvc": "0.0.0-b8ef5486",
44
+ "@anysphere/file-service-darwin-universal": "0.0.0-b8ef5486",
45
+ "@anysphere/file-service-linux-arm64-gnu": "0.0.0-b8ef5486"
46
46
  }
47
47
  }
package/src/file_utils.rs CHANGED
@@ -5,6 +5,7 @@
5
5
  // 4. vscode.fs.stat
6
6
 
7
7
  use anyhow::Error;
8
+ use encoding_rs::UTF_8;
8
9
  use std::path::Path;
9
10
  use tokio::fs;
10
11
 
@@ -43,8 +44,90 @@ pub fn is_good_file(file_path: &Path) -> Result<(), Error> {
43
44
  _ => {}
44
45
  }
45
46
 
46
- match extension {
47
- "lock" | "bak" | "tmp" | "bin" | "exe" | "dll" | "so" | "lockb" => {
47
+ let bad_extensions = vec![
48
+ "lock",
49
+ "bak",
50
+ "tmp",
51
+ "bin",
52
+ "exe",
53
+ "dll",
54
+ "so",
55
+ "lockb",
56
+ "qwoff",
57
+ "isl",
58
+ "csv",
59
+ "pdf",
60
+ // add ms word, excel, powerpoint, etc.
61
+ "doc",
62
+ "docx",
63
+ "xls",
64
+ "xlsx",
65
+ "ppt",
66
+ "pptx",
67
+ "odt",
68
+ "ods",
69
+ "odp",
70
+ "odg",
71
+ "odf",
72
+ "sxw",
73
+ "sxc",
74
+ "sxi",
75
+ "sxd",
76
+ "sdc",
77
+ // add images
78
+ "jpg",
79
+ "jpeg",
80
+ "png",
81
+ "gif",
82
+ "bmp",
83
+ "tif",
84
+ // add audio
85
+ "mp3",
86
+ "wav",
87
+ "wma",
88
+ "ogg",
89
+ "flac",
90
+ "aac",
91
+ // add video
92
+ "mp4",
93
+ "mov",
94
+ "wmv",
95
+ "flv",
96
+ "avi",
97
+ // add archives
98
+ "zip",
99
+ "tar",
100
+ "gz",
101
+ "7z",
102
+ "rar",
103
+ "tgz",
104
+ "dmg",
105
+ "iso",
106
+ "cue",
107
+ "mdf",
108
+ "mds",
109
+ "vcd",
110
+ "toast",
111
+ "img",
112
+ "apk",
113
+ "msi",
114
+ "cab",
115
+ "tar.gz",
116
+ "tar.xz",
117
+ "tar.bz2",
118
+ "tar.lzma",
119
+ "tar.Z",
120
+ "tar.sz",
121
+ "lzma",
122
+ // add fonts
123
+ "ttf",
124
+ "otf",
125
+ "woff",
126
+ "woff2",
127
+ "eot",
128
+ ];
129
+ match bad_extensions.contains(&extension) {
130
+ true => {
48
131
  return Err(anyhow::anyhow!("File is just a lock file"));
49
132
  }
50
133
  _ => {}
@@ -93,7 +176,7 @@ pub fn is_good_file(file_path: &Path) -> Result<(), Error> {
93
176
  // implement the buffer above:
94
177
  pub async fn is_good_file_runtime_check(
95
178
  file_path: &Path,
96
- _buffer: &[u8],
179
+ // _buffer: &[u8],
97
180
  ) -> Result<(), Error> {
98
181
  match get_file_size(file_path).await {
99
182
  Ok(size) if size > 2 * 1024 * 1024 => {
@@ -103,13 +186,31 @@ pub async fn is_good_file_runtime_check(
103
186
  _ => {}
104
187
  }
105
188
 
106
- // if is_binary(file_path).context("Failed to check if file is binary")? {
107
- // return Err(anyhow::anyhow!("File is binary"));
108
- // }
189
+ // if is_binary(file_path).context("Failed to check if file is binary")? {
190
+ // return Err(anyhow::anyhow!("File is binary"));
191
+ // }
109
192
 
110
193
  Ok(())
111
194
  }
112
195
 
196
+ pub async fn read_string_without_bom(
197
+ file_path: &Path,
198
+ ) -> Result<String, Error> {
199
+ let file_buffer = match fs::read(file_path).await {
200
+ Ok(buffer) => buffer,
201
+ Err(e) => {
202
+ return Err(anyhow::anyhow!(
203
+ "Failed to read file buffer: {}",
204
+ e.to_string()
205
+ ))
206
+ }
207
+ };
208
+
209
+ let (cow, _) = UTF_8.decode_with_bom_removal(&file_buffer);
210
+
211
+ Ok(cow.to_string())
212
+ }
213
+
113
214
  pub fn as_relative_path(
114
215
  base_path: &Path,
115
216
  file_path: &Path,
@@ -167,25 +268,40 @@ mod tests {
167
268
  temp_file.write_all(b"Hello, world!").await.unwrap();
168
269
  let buffer = fs::read(&temp_file_path).await.unwrap();
169
270
  assert_eq!(
170
- is_good_file_runtime_check(&temp_file_path, &buffer)
171
- .await
172
- .is_ok(),
271
+ is_good_file_runtime_check(&temp_file_path).await.is_ok(),
173
272
  true
174
273
  );
175
274
  temp_dir.close().unwrap();
176
275
 
276
+ // let temp_dir = tempfile::tempdir().unwrap();
277
+ // let temp_file_path = temp_dir.path().join("test_file");
278
+ // let mut temp_file = fs::File::create(&temp_file_path).await.unwrap();
279
+ // temp_file.write_all(&[0, 159, 146, 150]).await.unwrap(); // Invalid UTF-8 sequence
280
+ // let buffer = fs::read(&temp_file_path).await.unwrap();
281
+ // assert_eq!(
282
+ // is_good_file_runtime_check(&temp_file_path).await.is_err(),
283
+ // true
284
+ // );
285
+ // temp_dir.close().unwrap();
286
+ }
287
+
288
+ #[tokio::test]
289
+ async fn test_bom_file() {
290
+ const BOM: [u8; 3] = [0xEF, 0xBB, 0xBF];
291
+ const CONTENT: &str = "Hello, world!";
292
+
293
+ // Write this to a temp file
177
294
  let temp_dir = tempfile::tempdir().unwrap();
178
295
  let temp_file_path = temp_dir.path().join("test_file");
179
296
  let mut temp_file = fs::File::create(&temp_file_path).await.unwrap();
180
- temp_file.write_all(&[0, 159, 146, 150]).await.unwrap(); // Invalid UTF-8 sequence
181
- let buffer = fs::read(&temp_file_path).await.unwrap();
182
- assert_eq!(
183
- is_good_file_runtime_check(&temp_file_path, &buffer)
184
- .await
185
- .is_err(),
186
- true
187
- );
188
- temp_dir.close().unwrap();
297
+ temp_file.write_all(&BOM).await.unwrap();
298
+ temp_file.write_all(CONTENT.as_bytes()).await.unwrap();
299
+
300
+ // expect that we read the file with tokio as the CONTENT
301
+ let file_contents = read_string_without_bom(&temp_file_path).await.unwrap();
302
+
303
+ // Check string equality of CONTENT (&str) to file_contents (String)
304
+ assert_eq!(CONTENT, file_contents);
189
305
  }
190
306
 
191
307
  #[test]
package/src/git_utils.rs CHANGED
@@ -1,4 +1,5 @@
1
1
  use std::collections::HashSet;
2
+ use std::path::MAIN_SEPARATOR_STR;
2
3
  use std::process::Command;
3
4
 
4
5
  pub fn list_ignored_files_and_directories(
@@ -19,9 +20,7 @@ pub fn list_ignored_files_and_directories(
19
20
  ],
20
21
  // FIXME(sualeh): this is super sketchy and might totally break in like a bazillion ways. i dont like it.
21
22
  vec![
22
- "sh",
23
- "-c",
24
- "git submodule foreach --quiet 'git ls-files --others --ignored --exclude-standard --directory --no-empty-directory | sed \"s|^|$path/|\"'",
23
+ "git submodule foreach --quiet 'git -C $toplevel/$path ls-files --others --ignored --exclude-standard --directory --no-empty-directory | (while read line; do echo $path/$line; done)'",
25
24
  ],
26
25
  ];
27
26
 
@@ -36,9 +35,12 @@ pub fn list_ignored_files_and_directories(
36
35
  .lines()
37
36
  .filter(|line| !line.is_empty())
38
37
  .map(|line| {
38
+ let line = line.replace("/", MAIN_SEPARATOR_STR);
39
+
39
40
  if should_return_absolute_paths {
40
41
  let mut path = std::path::PathBuf::from(workspace_root_path);
41
- path.push(line);
42
+
43
+ path.push(line.clone());
42
44
 
43
45
  match path.canonicalize() {
44
46
  Ok(canonical_path) => {
package/src/lib.rs CHANGED
@@ -1,16 +1,15 @@
1
+ #![windows_subsystem = "windows"]
1
2
  #![deny(clippy::all)]
2
3
  #![deny(unsafe_op_in_unsafe_fn)]
3
4
  pub mod file_utils;
4
- pub mod git_utils;
5
+ pub mod logger;
5
6
  pub mod merkle_tree;
6
7
 
7
- use std::vec;
8
+ use std::{collections::HashSet, vec};
8
9
 
9
- use merkle_tree::{LocalConstruction, MerkleTree};
10
- use tracing::{info, Level};
11
- use tracing_appender::rolling::{RollingFileAppender, Rotation};
12
- use tracing_subscriber::fmt;
13
10
  use anyhow::Context;
11
+ use merkle_tree::{LocalConstruction, MerkleTree};
12
+ use tracing::{debug, info};
14
13
 
15
14
  #[macro_use]
16
15
  extern crate napi_derive;
@@ -19,30 +18,14 @@ extern crate napi_derive;
19
18
  pub struct MerkleClient {
20
19
  tree: MerkleTree,
21
20
  absolute_root_directory: String,
22
- _guard: tracing_appender::non_blocking::WorkerGuard,
23
- }
24
-
25
- pub fn init_logger() -> tracing_appender::non_blocking::WorkerGuard {
26
- let file_appender =
27
- RollingFileAppender::new(Rotation::NEVER, "./", "rust_log.txt");
28
- let (non_blocking, _guard) = tracing_appender::non_blocking(file_appender);
29
- let subscriber = fmt::Subscriber::builder()
30
- .with_max_level(Level::TRACE)
31
- .with_writer(non_blocking)
32
- .with_ansi(false)
33
- .with_line_number(true)
34
- .finish();
35
-
36
- let _ = tracing::subscriber::set_global_default(subscriber);
37
-
38
- _guard
21
+ _guard: Option<logger::GuardType>,
39
22
  }
40
23
 
41
24
  #[napi]
42
25
  impl MerkleClient {
43
26
  #[napi(constructor)]
44
27
  pub fn new(absolute_root_directory: String) -> MerkleClient {
45
- let _guard = init_logger();
28
+ let _guard = logger::init_logger();
46
29
 
47
30
  // let canonical_root_directory = std::path::Path::new(&absolute_root_directory);
48
31
  // use dunce::canonicalize;
@@ -62,13 +45,20 @@ impl MerkleClient {
62
45
  }
63
46
 
64
47
  #[napi]
65
- pub async unsafe fn init(&mut self) -> Result<(), napi::Error> {
48
+ pub async unsafe fn init(
49
+ &mut self,
50
+ git_ignored_files: Vec<String>,
51
+ is_git_repo: bool,
52
+ ) -> Result<(), napi::Error> {
66
53
  // 1. compute the merkle tree
67
54
  // 2. update the backend
68
55
  // 3. sync with the remote
69
56
  info!("Merkle tree compute started!");
57
+ info!("Root directory: {:?}", self.absolute_root_directory);
70
58
  unsafe {
71
- self.compute_merkle_tree().await?;
59
+ self
60
+ .compute_merkle_tree(git_ignored_files, is_git_repo)
61
+ .await?;
72
62
  }
73
63
 
74
64
  Ok(())
@@ -81,9 +71,24 @@ impl MerkleClient {
81
71
  #[napi]
82
72
  pub async unsafe fn compute_merkle_tree(
83
73
  &mut self,
74
+ git_ignored_files: Vec<String>,
75
+ is_git_repo: bool,
84
76
  ) -> Result<(), napi::Error> {
85
- let t =
86
- MerkleTree::construct_merkle_tree(self.absolute_root_directory.clone()).await;
77
+ // make the git ignored files into a hash set
78
+ let mut git_ignored_set = HashSet::from_iter(git_ignored_files.into_iter());
79
+
80
+ // if the hashset itself contains the root directory, then we should remove it.
81
+ // this is because the root directory is not a file, and we don't want to ignore it.
82
+ if git_ignored_set.contains(&self.absolute_root_directory) {
83
+ git_ignored_set.remove(&self.absolute_root_directory);
84
+ }
85
+
86
+ let t = MerkleTree::construct_merkle_tree(
87
+ self.absolute_root_directory.clone(),
88
+ git_ignored_set,
89
+ is_git_repo,
90
+ )
91
+ .await;
87
92
 
88
93
  match t {
89
94
  Ok(tree) => {
@@ -112,29 +117,44 @@ impl MerkleClient {
112
117
  &self,
113
118
  relative_path: String,
114
119
  ) -> Result<String, napi::Error> {
120
+ debug!("get_subtree_hash: relative_path: {:?}", relative_path);
115
121
 
116
- let relative_path_without_leading_slash = match relative_path.strip_prefix('.') {
122
+ let relative_path_without_leading_slash = match relative_path
123
+ .strip_prefix('.')
124
+ {
117
125
  Some(path) => path.strip_prefix(std::path::MAIN_SEPARATOR).unwrap_or(""),
118
126
  None => relative_path.as_str(),
119
127
  };
128
+ debug!(
129
+ "relative_path_without_leading_slash: {:?}",
130
+ relative_path_without_leading_slash
131
+ );
120
132
 
121
133
  let absolute_path = if !relative_path_without_leading_slash.is_empty() {
122
- std::path::Path::new(&self.absolute_root_directory).join(relative_path_without_leading_slash)
134
+ std::path::Path::new(&self.absolute_root_directory)
135
+ .join(relative_path_without_leading_slash)
123
136
  } else {
124
137
  std::path::Path::new(&self.absolute_root_directory).to_path_buf()
125
138
  };
126
139
 
140
+ debug!("absolute_path: {:?}", absolute_path);
141
+
127
142
  let absolute_path_string = match absolute_path.to_str() {
128
143
  Some(path) => path.to_string(),
129
144
  None => {
130
145
  return Err(napi::Error::new(
131
146
  napi::Status::Unknown,
132
- format!("some string error")
147
+ format!("some string error"),
133
148
  ))
134
149
  }
135
150
  };
136
151
 
137
- let hash = self.tree.get_subtree_hash(absolute_path_string.as_str()).await;
152
+ debug!("absolute_path_string: {:?}", absolute_path_string);
153
+
154
+ let hash = self
155
+ .tree
156
+ .get_subtree_hash(absolute_path_string.as_str())
157
+ .await;
138
158
 
139
159
  match hash {
140
160
  Ok(hash) => Ok(hash),
@@ -153,7 +173,7 @@ impl MerkleClient {
153
173
  Ok(num) => Ok(num),
154
174
  Err(e) => Err(napi::Error::new(
155
175
  napi::Status::Unknown,
156
- format!("Error in get_num_embeddable_files: {:?}", e)
176
+ format!("Error in get_num_embeddable_files: {:?}", e),
157
177
  )),
158
178
  }
159
179
  }
@@ -175,7 +195,7 @@ impl MerkleClient {
175
195
  Ok(num) => Ok(num),
176
196
  Err(e) => Err(napi::Error::new(
177
197
  napi::Status::Unknown,
178
- format!("Error in get_num_embeddable_files_in_subtree: {:?}", e)
198
+ format!("Error in get_num_embeddable_files_in_subtree: {:?}", e),
179
199
  )),
180
200
  }
181
201
  }
@@ -188,7 +208,7 @@ impl MerkleClient {
188
208
  Ok(files) => Ok(files),
189
209
  Err(e) => Err(napi::Error::new(
190
210
  napi::Status::Unknown,
191
- format!("Error in get_all_files: {:?}", e)
211
+ format!("Error in get_all_files: {:?}", e),
192
212
  )),
193
213
  }
194
214
  }
package/src/logger.rs ADDED
@@ -0,0 +1,55 @@
1
+ use tracing::{info, subscriber, Level};
2
+ use tracing_appender::non_blocking::WorkerGuard;
3
+ use tracing_appender::rolling::{RollingFileAppender, Rotation};
4
+ use tracing_subscriber::fmt;
5
+ use tracing_subscriber::prelude::*;
6
+
7
+ pub enum GuardType {
8
+ #[cfg(all(not(feature = "debugfile"), not(target_os = "linux")))]
9
+ Guard(tracing_axiom::Guard),
10
+ WorkerGuard(tracing_appender::non_blocking::WorkerGuard),
11
+ }
12
+
13
+ pub fn init_logger() -> Option<GuardType> {
14
+ #[cfg(feature = "debugfile")]
15
+ let _guard = {
16
+ let file_appender =
17
+ RollingFileAppender::new(Rotation::NEVER, "./", "rust_log.txt");
18
+ let (non_blocking, _guard) = tracing_appender::non_blocking(file_appender);
19
+ let subscriber = fmt::Subscriber::builder()
20
+ .with_max_level(Level::TRACE)
21
+ .with_writer(non_blocking)
22
+ .with_ansi(false)
23
+ .with_line_number(true)
24
+ .finish();
25
+
26
+ let _ = tracing::subscriber::set_global_default(subscriber);
27
+
28
+ Some(GuardType::WorkerGuard(_guard))
29
+ };
30
+
31
+ #[cfg(all(not(feature = "debugfile"), not(target_os = "linux")))]
32
+ let _guard = {
33
+ let (axiom_layer, _guard) = tracing_axiom::builder()
34
+ .with_token("xaat-a51088e6-7889-41c0-b440-cfd4601acdd7")
35
+ .with_dataset("local-indexing")
36
+ .layer()
37
+ .ok()?;
38
+ // let fmt_layer = fmt::layer().with_level(true).with_ansi(false).with_line_number(true);
39
+
40
+ let _ = tracing_subscriber::registry()
41
+ .with(axiom_layer)
42
+ .try_init()
43
+ .ok()?;
44
+ // let _ = tracing::subscriber::set_global_default(subscriber);
45
+
46
+ info!("Tracing initialized! in rust");
47
+
48
+ Some(GuardType::Guard(_guard))
49
+ };
50
+
51
+ #[cfg(all(not(feature = "debugfile"), target_os = "linux"))]
52
+ let _guard = { None };
53
+
54
+ _guard
55
+ }
@@ -1,4 +1,3 @@
1
- use crate::git_utils;
2
1
  use crate::merkle_tree::{
3
2
  File, MerkleNode, MerkleNodePtr, NodeType, PinnedFuture,
4
3
  };
@@ -10,11 +9,18 @@ use tonic::async_trait;
10
9
 
11
10
  #[async_trait]
12
11
  impl LocalConstruction for MerkleTree {
12
+ #[tracing::instrument]
13
13
  async fn new(
14
14
  root_directory: Option<String>,
15
15
  ) -> Result<MerkleTree, anyhow::Error> {
16
+ let git_ignored_files = HashSet::<String>::new();
16
17
  if let Some(root_directory) = root_directory {
17
- let n = MerkleTree::construct_merkle_tree(root_directory).await;
18
+ let n = MerkleTree::construct_merkle_tree(
19
+ root_directory,
20
+ git_ignored_files,
21
+ false,
22
+ )
23
+ .await;
18
24
  return n;
19
25
  }
20
26
 
@@ -30,6 +36,8 @@ impl LocalConstruction for MerkleTree {
30
36
  /// 4. return merkle tree
31
37
  async fn construct_merkle_tree(
32
38
  absolute_path_to_root_directory: String,
39
+ git_ignored_files_and_dirs: HashSet<String>,
40
+ is_git_repo: bool,
33
41
  ) -> Result<MerkleTree, anyhow::Error> {
34
42
  let path = PathBuf::from(absolute_path_to_root_directory.clone());
35
43
  if !path.exists() {
@@ -38,28 +46,31 @@ impl LocalConstruction for MerkleTree {
38
46
  }
39
47
 
40
48
  // 1. get all the gitignored files
41
- let git_ignored_files_and_dirs =
42
- match git_utils::list_ignored_files_and_directories(
43
- absolute_path_to_root_directory.as_str(),
44
- true,
45
- ) {
46
- Ok(git_ignored) => git_ignored,
47
- Err(_e) => HashSet::new(),
48
- };
49
+ // let git_ignored_files_and_dirs =
50
+ // match git_utils::list_ignored_files_and_directories(
51
+ // absolute_path_to_root_directory.as_str(),
52
+ // true,
53
+ // ) {
54
+ // Ok(git_ignored) => git_ignored,
55
+ // Err(_e) => HashSet::new(),
56
+ // };
49
57
 
50
58
  let root_node = MerkleNode::new(
51
59
  path,
52
60
  None,
53
61
  &git_ignored_files_and_dirs,
54
62
  absolute_path_to_root_directory.as_str(),
63
+ is_git_repo,
55
64
  )
56
65
  .await;
66
+
57
67
  let mut mt = MerkleTree {
58
68
  root: root_node,
59
69
  files: BTreeMap::new(),
60
70
  root_path: absolute_path_to_root_directory,
61
71
  cursor: None,
62
- git_ignored_files_and_dirs: git_ignored_files_and_dirs,
72
+ git_ignored_files_and_dirs,
73
+ is_git_repo,
63
74
  };
64
75
 
65
76
  // we now iterate over all the nodes and add them to the hashmap
@@ -72,7 +83,6 @@ impl LocalConstruction for MerkleTree {
72
83
  let node_reader = node.read().await;
73
84
  match &node_reader.node_type {
74
85
  NodeType::Branch(n) => {
75
- tracing::info!("Branch: {:?}", n.0);
76
86
  let children = &n.1;
77
87
  files.insert(n.0.clone(), File { node: node.clone() });
78
88
  for child in children {
@@ -99,8 +109,7 @@ impl LocalConstruction for MerkleTree {
99
109
 
100
110
  add_nodes_to_hashmap(&mt.root, &mut mt.files).await;
101
111
 
102
- tracing::info!("Merkle tree compute finished!");
103
- tracing::info!("Merkle tree: {}", mt);
112
+ tracing::info!("number of files in the tree: {}", mt.files.len());
104
113
 
105
114
  Ok(mt)
106
115
  }
@@ -144,6 +153,7 @@ impl LocalConstruction for MerkleTree {
144
153
  Ok(())
145
154
  }
146
155
 
156
+ #[tracing::instrument]
147
157
  async fn delete_file(
148
158
  &mut self,
149
159
  file_path: String,
@@ -1,5 +1,3 @@
1
- use crate::git_utils;
2
-
3
1
  use super::file_utils;
4
2
  use sha2::Digest;
5
3
  use std::collections::{BTreeMap, HashSet};
@@ -8,18 +6,21 @@ use std::vec;
8
6
  use std::{fs, path::Path, sync::Arc};
9
7
  use tokio::sync::RwLock;
10
8
  use tonic::async_trait;
11
- use tracing::info;
9
+ use tracing::{debug, info};
10
+
12
11
  pub mod local_construction;
13
12
  pub mod test;
14
13
 
15
14
  pub type MerkleNodePtr = Arc<RwLock<MerkleNode>>;
16
15
 
16
+ #[derive(Debug)]
17
17
  pub struct MerkleTree {
18
18
  root_path: String,
19
19
  root: MerkleNodePtr,
20
20
  files: BTreeMap<String, File>,
21
21
  cursor: Option<usize>,
22
22
  git_ignored_files_and_dirs: HashSet<String>,
23
+ is_git_repo: bool,
23
24
  }
24
25
 
25
26
  #[derive(Debug)]
@@ -63,6 +64,8 @@ pub trait LocalConstruction {
63
64
 
64
65
  async fn construct_merkle_tree(
65
66
  root_directory: String,
67
+ git_ignored_files_and_dirs: HashSet<String>,
68
+ is_git_repo: bool,
66
69
  ) -> Result<MerkleTree, anyhow::Error>;
67
70
 
68
71
  async fn update_file(
@@ -97,6 +100,7 @@ impl MerkleTree {
97
100
  root_path: "".to_string(),
98
101
  cursor: None,
99
102
  git_ignored_files_and_dirs: HashSet::new(),
103
+ is_git_repo: false,
100
104
  }
101
105
  }
102
106
 
@@ -104,6 +108,8 @@ impl MerkleTree {
104
108
  &self,
105
109
  absolute_path: &str,
106
110
  ) -> Result<String, anyhow::Error> {
111
+ debug!("get_subtree_hash: absolute_path: {:?}", absolute_path);
112
+
107
113
  let node = match self.files.get(absolute_path) {
108
114
  Some(file) => file.node.clone(),
109
115
  None => {
@@ -119,6 +125,8 @@ impl MerkleTree {
119
125
  let node_reader = node.read().await;
120
126
  let node_hash = node_reader.hash.clone();
121
127
 
128
+ debug!("node_hash: {:?}", node_hash);
129
+
122
130
  Ok(node_hash)
123
131
  }
124
132
 
@@ -282,6 +290,12 @@ impl MerkleTree {
282
290
  ) -> Result<Vec<String>, anyhow::Error> {
283
291
  let mut files = Vec::new();
284
292
 
293
+ // 1. should check that this absolute path is actually a directory.
294
+ let file_node = self.files.get(absolute_path);
295
+ if file_node.is_none() {
296
+ return Err(anyhow::anyhow!("Could not find directory the in tree!"));
297
+ }
298
+
285
299
  for (file_path, f) in &self.files {
286
300
  if !file_path.contains(absolute_path) {
287
301
  continue;
@@ -308,16 +322,11 @@ impl MerkleTree {
308
322
  &self,
309
323
  absolute_path: &str,
310
324
  ) -> Result<Vec<String>, anyhow::Error> {
311
- info!("get_spline called with absolute_path: {}", absolute_path);
312
325
  let mut files = Vec::new();
313
326
 
314
327
  let current_node = match self.files.get(absolute_path) {
315
- Some(node) => {
316
- info!("Found node for absolute_path: {}", absolute_path);
317
- node.node.clone()
318
- }
328
+ Some(node) => node.node.clone(),
319
329
  None => {
320
- info!("File not found for absolute_path: {}", absolute_path);
321
330
  return Err(anyhow::anyhow!("File not found: {}", absolute_path));
322
331
  }
323
332
  };
@@ -328,7 +337,6 @@ impl MerkleTree {
328
337
  while let Some(node) = stack.pop() {
329
338
  let parent = node.read().await.parent.clone();
330
339
  if let Some(parent) = parent {
331
- info!("Adding parent hash to files vector");
332
340
  {
333
341
  let parent_node = parent.read().await;
334
342
  match &parent_node.node_type {
@@ -347,7 +355,6 @@ impl MerkleTree {
347
355
  stack.push(parent);
348
356
  }
349
357
  }
350
- info!("Returning files vector with {} elements", files.len());
351
358
  Ok(files)
352
359
  }
353
360
 
@@ -396,6 +403,7 @@ impl MerkleTree {
396
403
  Some(ancestor.clone()),
397
404
  &self.git_ignored_files_and_dirs,
398
405
  &absolute_root_path.as_str(),
406
+ self.is_git_repo,
399
407
  )
400
408
  .await;
401
409
  ancestor.write().await.attach_child(new_node.clone()).await;
@@ -413,6 +421,7 @@ impl MerkleTree {
413
421
  Some(ancestor.clone()),
414
422
  &self.git_ignored_files_and_dirs,
415
423
  &absolute_root_path.as_str(),
424
+ self.is_git_repo,
416
425
  )
417
426
  .await;
418
427
 
@@ -700,13 +709,14 @@ impl MerkleNode {
700
709
  parent: ParentPtr,
701
710
  ignored_files: &IgnoredFiles,
702
711
  absolute_root_path: &str,
712
+ is_git_repo: bool,
703
713
  ) -> MerkleNodePtr {
704
- // check if the root is a git directory.
705
- let is_git_repo =
706
- match git_utils::is_git_directory(absolute_root_path).await {
707
- Ok(is_git_repo) => is_git_repo,
708
- Err(e) => false,
709
- };
714
+ // // check if the root is a git directory.
715
+ // let is_git_repo =
716
+ // match git_utils::is_git_directory(absolute_root_path).await {
717
+ // Ok(is_git_repo) => is_git_repo,
718
+ // Err(_e) => false,
719
+ // };
710
720
  let bypass_git = !is_git_repo;
711
721
 
712
722
  MerkleNode::construct_node(
@@ -719,25 +729,20 @@ impl MerkleNode {
719
729
  .await
720
730
  }
721
731
 
732
+ // #[tracing::instrument]
722
733
  async fn new(
723
734
  absolute_file_or_directory: PathBuf,
724
735
  parent: ParentPtr,
725
736
  ignored_files: &IgnoredFiles,
726
737
  absolute_root_path: &str,
738
+ is_git_repo: bool,
727
739
  ) -> MerkleNodePtr {
728
- // check if the root is a git directory.
729
- let is_git_repo =
730
- match git_utils::is_git_directory(absolute_root_path).await {
731
- Ok(is_git_repo) => is_git_repo,
732
- Err(_e) => false,
733
- };
734
740
  let bypass_git = !is_git_repo;
735
741
 
736
742
  info!(
737
743
  "constructing node for absolute_file_or_directory: {:?}",
738
744
  absolute_file_or_directory
739
745
  );
740
- info!("bypass_git: {}, is_git_repo: {}", bypass_git, is_git_repo);
741
746
 
742
747
  MerkleNode::construct_node(
743
748
  Path::new(&absolute_file_or_directory),
@@ -766,6 +771,7 @@ impl MerkleNode {
766
771
  Box::pin(async move {
767
772
  // check if it is a file
768
773
  let path_str = absolute_file_or_directory.to_str().unwrap().to_string();
774
+
769
775
  if absolute_file_or_directory.is_file() {
770
776
  return Arc::new(RwLock::new(
771
777
  MerkleNode::construct_file_node_or_error_node(
@@ -790,6 +796,7 @@ impl MerkleNode {
790
796
  let is_git_ignored_dir = ignored_files.contains(&path_str);
791
797
 
792
798
  if is_git_ignored_dir && !bypass_git {
799
+ tracing::info!("skipping directory: {}", path_str);
793
800
  return Arc::new(RwLock::new(MerkleNode::empty_node(
794
801
  Some(absolute_file_or_directory),
795
802
  Some("Directory is git ignored!".to_string()),
@@ -800,6 +807,7 @@ impl MerkleNode {
800
807
  match entries {
801
808
  Ok(_) => (),
802
809
  Err(e) => {
810
+ tracing::error!("error reading directory: {}", e);
803
811
  return Arc::new(RwLock::new(MerkleNode::empty_node(
804
812
  Some(absolute_file_or_directory),
805
813
  Some(e.to_string()),
@@ -832,6 +840,7 @@ impl MerkleNode {
832
840
  );
833
841
  }
834
842
  Err(e) => {
843
+ tracing::error!("error reading directory: {}", e);
835
844
  children.push(Arc::new(RwLock::new(MerkleNode::empty_node(
836
845
  Some(absolute_file_or_directory),
837
846
  Some(e.to_string()),
@@ -878,18 +887,10 @@ impl MerkleNode {
878
887
  false => {}
879
888
  }
880
889
 
881
- // read the file_content to a buffer
882
- let file_content = match tokio::fs::read(absolute_file_path).await {
883
- Ok(content) => content,
884
- Err(e) => {
885
- return Err(format!("Could not read file! {}", e.to_string()));
886
- }
887
- };
888
-
889
890
  // check if the file passes runtime checks.
890
891
  match file_utils::is_good_file_runtime_check(
891
892
  absolute_file_path,
892
- &file_content,
893
+ // &file_content,
893
894
  )
894
895
  .await
895
896
  {
@@ -899,15 +900,14 @@ impl MerkleNode {
899
900
  }
900
901
  }
901
902
 
902
- let file_content = match std::str::from_utf8(&file_content) {
903
- Ok(content) => content,
904
- Err(e) => {
905
- return Err(format!(
906
- "UTF8 Failure. Could not convert file content to string! {}",
907
- e.to_string()
908
- ));
909
- }
910
- };
903
+ // read the file_content to a buffer
904
+ let file_content =
905
+ match file_utils::read_string_without_bom(absolute_file_path).await {
906
+ Ok(content) => content,
907
+ Err(e) => {
908
+ return Err(format!("Could not read file! {}", e.to_string()));
909
+ }
910
+ };
911
911
 
912
912
  let file_hash = compute_hash(&file_content);
913
913
  let node = MerkleNode {
@@ -934,11 +934,7 @@ impl MerkleNode {
934
934
  .await
935
935
  {
936
936
  Ok(node) => node,
937
- Err(e) => {
938
- // println!("constructing error node. error: {}", e);
939
- // println!("file_path: {:?}", file_path);
940
- MerkleNode::empty_node(Some(absolute_file_path), Some(e))
941
- }
937
+ Err(e) => MerkleNode::empty_node(Some(absolute_file_path), Some(e)),
942
938
  };
943
939
 
944
940
  node
@@ -991,9 +987,10 @@ impl MerkleNode {
991
987
  }
992
988
 
993
989
  // sort the list of names and hashes by the hashes!!
994
- names_and_hashes.sort_by(|a, b| a.1.to_lowercase().cmp(&b.1.to_lowercase()));
990
+ names_and_hashes
991
+ .sort_by(|a, b| a.1.to_lowercase().cmp(&b.1.to_lowercase()));
995
992
 
996
- for (_, hash) in names_and_hashes {
993
+ for (name, hash) in names_and_hashes {
997
994
  if hash == "" {
998
995
  continue;
999
996
  }
@@ -43,8 +43,9 @@ mod tests {
43
43
  // let path = Path::new(&temp_dir_path);
44
44
 
45
45
  // Test construct_merkle_tree() function
46
+ let new_set = std::collections::HashSet::<String>::new();
46
47
  let tree =
47
- MerkleTree::construct_merkle_tree(temp_dir_path.clone()).await;
48
+ MerkleTree::construct_merkle_tree(temp_dir_path.clone(), new_set, false).await;
48
49
  let mut tree = match tree {
49
50
  Ok(tree) => {
50
51
  assert_eq!(tree.files.len(), 2);