@anysphere/file-service 0.0.0-e0c70bcd → 0.0.0-e15bb6ec

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/Cargo.toml CHANGED
@@ -6,6 +6,11 @@ version = "0.0.0"
6
6
  [lib]
7
7
  crate-type = ["cdylib"]
8
8
 
9
+ [features]
10
+ default = ["windows-subsystem"]
11
+ windows-subsystem = []
12
+ debugfile = []
13
+
9
14
  [dependencies]
10
15
  # Default enable napi4 feature, see https://nodejs.org/api/n-api.html#node-api-version-matrix
11
16
  napi = { version = "2.12.2", default-features = false, features = ["napi4", "async", "tokio_rt"] }
@@ -21,6 +26,11 @@ tracing = "0.1.37"
21
26
  tracing-subscriber = "0.3.17"
22
27
  tracing-appender = "0.2.2"
23
28
  binaryornot = "1.0.0"
29
+ dunce = "1.0.1"
30
+ encoding_rs = "0.8.33"
31
+
32
+ [target.'cfg(not(target_os = "linux"))'.dependencies]
33
+ tracing-axiom = "0.4"
24
34
 
25
35
  [build-dependencies]
26
36
  napi-build = "2.0.1"
@@ -28,5 +38,6 @@ tonic-build = "0.9.2"
28
38
  anyhow = "1.0.75"
29
39
  glob = "0.3.0"
30
40
 
41
+
31
42
  [profile.release]
32
43
  lto = true
package/build.rs CHANGED
@@ -3,6 +3,8 @@ use std::path::Path;
3
3
  extern crate napi_build;
4
4
 
5
5
  fn main() -> Result<(), anyhow::Error> {
6
+ #[cfg(target_os = "windows")]
7
+ println!("cargo:rustc-cdylib-link-arg=/SUBSYSTEM:WINDOWS");
6
8
  napi_build::setup();
7
9
 
8
10
  // print the relative path.
package/index.d.ts CHANGED
@@ -4,9 +4,10 @@
4
4
  /* auto-generated by NAPI-RS */
5
5
 
6
6
  export class MerkleClient {
7
- constructor(rootDirectory: string)
8
- init(): Promise<void>
9
- computeMerkleTree(): Promise<void>
7
+ constructor(absoluteRootDirectory: string)
8
+ isTooBig(maxFiles: number, gitIgnoredFiles: Array<string>, isGitRepo: boolean): Promise<boolean>
9
+ init(gitIgnoredFiles: Array<string>, isGitRepo: boolean): Promise<void>
10
+ computeMerkleTree(gitIgnoredFiles: Array<string>, isGitRepo: boolean): Promise<void>
10
11
  updateFile(filePath: string): Promise<void>
11
12
  deleteFile(filePath: string): Promise<void>
12
13
  getSubtreeHash(relativePath: string): Promise<string>
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@anysphere/file-service",
3
- "version": "0.0.0-e0c70bcd",
3
+ "version": "0.0.0-e15bb6ec",
4
4
  "main": "index.js",
5
5
  "types": "index.d.ts",
6
6
  "napi": {
@@ -36,12 +36,12 @@
36
36
  "version": "napi version"
37
37
  },
38
38
  "optionalDependencies": {
39
- "@anysphere/file-service-win32-x64-msvc": "0.0.0-e0c70bcd",
40
- "@anysphere/file-service-darwin-x64": "0.0.0-e0c70bcd",
41
- "@anysphere/file-service-linux-x64-gnu": "0.0.0-e0c70bcd",
42
- "@anysphere/file-service-darwin-arm64": "0.0.0-e0c70bcd",
43
- "@anysphere/file-service-win32-arm64-msvc": "0.0.0-e0c70bcd",
44
- "@anysphere/file-service-darwin-universal": "0.0.0-e0c70bcd",
45
- "@anysphere/file-service-linux-arm64-gnu": "0.0.0-e0c70bcd"
39
+ "@anysphere/file-service-win32-x64-msvc": "0.0.0-e15bb6ec",
40
+ "@anysphere/file-service-darwin-x64": "0.0.0-e15bb6ec",
41
+ "@anysphere/file-service-linux-x64-gnu": "0.0.0-e15bb6ec",
42
+ "@anysphere/file-service-darwin-arm64": "0.0.0-e15bb6ec",
43
+ "@anysphere/file-service-win32-arm64-msvc": "0.0.0-e15bb6ec",
44
+ "@anysphere/file-service-darwin-universal": "0.0.0-e15bb6ec",
45
+ "@anysphere/file-service-linux-arm64-gnu": "0.0.0-e15bb6ec"
46
46
  }
47
47
  }
package/src/file_utils.rs CHANGED
@@ -5,7 +5,8 @@
5
5
  // 4. vscode.fs.stat
6
6
 
7
7
  use anyhow::Error;
8
- use std::{path::Path, any};
8
+ use encoding_rs::UTF_8;
9
+ use std::path::Path;
9
10
  use tokio::fs;
10
11
 
11
12
  pub fn is_in_bad_dir(file_path: &Path) -> Result<bool, Error> {
@@ -43,8 +44,21 @@ pub fn is_good_file(file_path: &Path) -> Result<(), Error> {
43
44
  _ => {}
44
45
  }
45
46
 
46
- match extension {
47
- "lock" | "bak" | "tmp" | "bin" | "exe" | "dll" | "so" | "lockb" => {
47
+ let bad_extensions = vec![
48
+ "lock", "bak", "tmp", "bin", "exe", "dll", "so", "lockb", "qwoff", "isl",
49
+ "csv", "pdf", // add ms word, excel, powerpoint, etc.
50
+ "doc", "docx", "xls", "xlsx", "ppt", "pptx", "odt", "ods", "odp", "odg",
51
+ "odf", "sxw", "sxc", "sxi", "sxd", "sdc", // add images
52
+ "jpg", "jpeg", "png", "gif", "bmp", "tif", // add audio
53
+ "mp3", "wav", "wma", "ogg", "flac", "aac", // add video
54
+ "mp4", "mov", "wmv", "flv", "avi", // add archives
55
+ "zip", "tar", "gz", "7z", "rar", "tgz", "dmg", "iso", "cue", "mdf", "mds",
56
+ "vcd", "toast", "img", "apk", "msi", "cab", "tar.gz", "tar.xz", "tar.bz2",
57
+ "tar.lzma", "tar.Z", "tar.sz", "lzma", // add fonts
58
+ "ttf", "otf", "woff", "woff2", "eot", "webp", "vsix",
59
+ ];
60
+ match bad_extensions.contains(&extension) {
61
+ true => {
48
62
  return Err(anyhow::anyhow!("File is just a lock file"));
49
63
  }
50
64
  _ => {}
@@ -93,7 +107,7 @@ pub fn is_good_file(file_path: &Path) -> Result<(), Error> {
93
107
  // implement the buffer above:
94
108
  pub async fn is_good_file_runtime_check(
95
109
  file_path: &Path,
96
- _buffer: &[u8],
110
+ // _buffer: &[u8],
97
111
  ) -> Result<(), Error> {
98
112
  match get_file_size(file_path).await {
99
113
  Ok(size) if size > 2 * 1024 * 1024 => {
@@ -103,13 +117,31 @@ pub async fn is_good_file_runtime_check(
103
117
  _ => {}
104
118
  }
105
119
 
106
- // if is_binary(file_path).context("Failed to check if file is binary")? {
107
- // return Err(anyhow::anyhow!("File is binary"));
108
- // }
120
+ // if is_binary(file_path).context("Failed to check if file is binary")? {
121
+ // return Err(anyhow::anyhow!("File is binary"));
122
+ // }
109
123
 
110
124
  Ok(())
111
125
  }
112
126
 
127
+ pub async fn read_string_without_bom(
128
+ file_path: &Path,
129
+ ) -> Result<String, Error> {
130
+ let file_buffer = match fs::read(file_path).await {
131
+ Ok(buffer) => buffer,
132
+ Err(e) => {
133
+ return Err(anyhow::anyhow!(
134
+ "Failed to read file buffer: {}",
135
+ e.to_string()
136
+ ))
137
+ }
138
+ };
139
+
140
+ let (cow, _) = UTF_8.decode_with_bom_removal(&file_buffer);
141
+
142
+ Ok(cow.to_string())
143
+ }
144
+
113
145
  pub fn as_relative_path(
114
146
  base_path: &Path,
115
147
  file_path: &Path,
@@ -167,25 +199,40 @@ mod tests {
167
199
  temp_file.write_all(b"Hello, world!").await.unwrap();
168
200
  let buffer = fs::read(&temp_file_path).await.unwrap();
169
201
  assert_eq!(
170
- is_good_file_runtime_check(&temp_file_path, &buffer)
171
- .await
172
- .is_ok(),
202
+ is_good_file_runtime_check(&temp_file_path).await.is_ok(),
173
203
  true
174
204
  );
175
205
  temp_dir.close().unwrap();
176
206
 
207
+ // let temp_dir = tempfile::tempdir().unwrap();
208
+ // let temp_file_path = temp_dir.path().join("test_file");
209
+ // let mut temp_file = fs::File::create(&temp_file_path).await.unwrap();
210
+ // temp_file.write_all(&[0, 159, 146, 150]).await.unwrap(); // Invalid UTF-8 sequence
211
+ // let buffer = fs::read(&temp_file_path).await.unwrap();
212
+ // assert_eq!(
213
+ // is_good_file_runtime_check(&temp_file_path).await.is_err(),
214
+ // true
215
+ // );
216
+ // temp_dir.close().unwrap();
217
+ }
218
+
219
+ #[tokio::test]
220
+ async fn test_bom_file() {
221
+ const BOM: [u8; 3] = [0xEF, 0xBB, 0xBF];
222
+ const CONTENT: &str = "Hello, world!";
223
+
224
+ // Write this to a temp file
177
225
  let temp_dir = tempfile::tempdir().unwrap();
178
226
  let temp_file_path = temp_dir.path().join("test_file");
179
227
  let mut temp_file = fs::File::create(&temp_file_path).await.unwrap();
180
- temp_file.write_all(&[0, 159, 146, 150]).await.unwrap(); // Invalid UTF-8 sequence
181
- let buffer = fs::read(&temp_file_path).await.unwrap();
182
- assert_eq!(
183
- is_good_file_runtime_check(&temp_file_path, &buffer)
184
- .await
185
- .is_err(),
186
- true
187
- );
188
- temp_dir.close().unwrap();
228
+ temp_file.write_all(&BOM).await.unwrap();
229
+ temp_file.write_all(CONTENT.as_bytes()).await.unwrap();
230
+
231
+ // expect that we read the file with tokio as the CONTENT
232
+ let file_contents = read_string_without_bom(&temp_file_path).await.unwrap();
233
+
234
+ // Check string equality of CONTENT (&str) to file_contents (String)
235
+ assert_eq!(CONTENT, file_contents);
189
236
  }
190
237
 
191
238
  #[test]
package/src/git_utils.rs CHANGED
@@ -1,7 +1,8 @@
1
1
  use std::collections::HashSet;
2
+ use std::path::MAIN_SEPARATOR_STR;
2
3
  use std::process::Command;
3
4
 
4
- pub fn list_ignored_files(
5
+ pub fn list_ignored_files_and_directories(
5
6
  workspace_root_path: &str,
6
7
  should_return_absolute_paths: bool,
7
8
  ) -> Result<HashSet<String>, Box<dyn std::error::Error>> {
@@ -14,12 +15,12 @@ pub fn list_ignored_files(
14
15
  "--others",
15
16
  "--ignored",
16
17
  "--exclude-standard",
18
+ "--directory",
19
+ "--no-empty-directory"
17
20
  ],
18
21
  // FIXME(sualeh): this is super sketchy and might totally break in like a bazillion ways. i dont like it.
19
22
  vec![
20
- "sh",
21
- "-c",
22
- "git submodule foreach --quiet 'git ls-files --others --ignored --exclude-standard | sed \"s|^|$path/|\"'",
23
+ "git submodule foreach --quiet 'git -C $toplevel/$path ls-files --others --ignored --exclude-standard --directory --no-empty-directory | (while read line; do echo $path/$line; done)'",
23
24
  ],
24
25
  ];
25
26
 
@@ -34,9 +35,12 @@ pub fn list_ignored_files(
34
35
  .lines()
35
36
  .filter(|line| !line.is_empty())
36
37
  .map(|line| {
38
+ let line = line.replace("/", MAIN_SEPARATOR_STR);
39
+
37
40
  if should_return_absolute_paths {
38
41
  let mut path = std::path::PathBuf::from(workspace_root_path);
39
- path.push(line);
42
+
43
+ path.push(line.clone());
40
44
 
41
45
  match path.canonicalize() {
42
46
  Ok(canonical_path) => {
@@ -133,7 +137,8 @@ mod tests {
133
137
  fn test_no_ignored_files() {
134
138
  let dir = tempfile::tempdir().unwrap();
135
139
  let gitignored_files =
136
- list_ignored_files(dir.path().to_str().unwrap(), false).unwrap();
140
+ list_ignored_files_and_directories(dir.path().to_str().unwrap(), false)
141
+ .unwrap();
137
142
  Command::new("git")
138
143
  .args(&["init"])
139
144
  .current_dir(dir.path())
@@ -160,7 +165,8 @@ mod tests {
160
165
  .output()
161
166
  .unwrap();
162
167
  let gitignored_files =
163
- list_ignored_files(dir.path().to_str().unwrap(), false).unwrap();
168
+ list_ignored_files_and_directories(dir.path().to_str().unwrap(), false)
169
+ .unwrap();
164
170
  println!(
165
171
  "ignored files for test_one_ignored_file: {:?}",
166
172
  gitignored_files
@@ -190,7 +196,8 @@ mod tests {
190
196
  .output()
191
197
  .unwrap();
192
198
  let gitignored_files =
193
- list_ignored_files(dir.path().to_str().unwrap(), false).unwrap();
199
+ list_ignored_files_and_directories(dir.path().to_str().unwrap(), false)
200
+ .unwrap();
194
201
  println!(
195
202
  "ignored files for test_multiple_ignored_files: {:?}",
196
203
  gitignored_files
@@ -254,7 +261,8 @@ mod tests {
254
261
  println!("git submodule add output: {:?}", o);
255
262
 
256
263
  let gitignored_files =
257
- list_ignored_files(dir.path().to_str().unwrap(), false).unwrap();
264
+ list_ignored_files_and_directories(dir.path().to_str().unwrap(), false)
265
+ .unwrap();
258
266
  println!(
259
267
  "ignored files for test_git_submodule_ignored_files: {:?}",
260
268
  gitignored_files
@@ -265,7 +273,8 @@ mod tests {
265
273
 
266
274
  #[test]
267
275
  fn test_multiple_ignored_files_in_current_dir() {
268
- let gitignored_files = list_ignored_files(".", false).unwrap();
276
+ let gitignored_files =
277
+ list_ignored_files_and_directories(".", false).unwrap();
269
278
  assert!(gitignored_files.len() > 1);
270
279
 
271
280
  // print a sample of the ignored files
package/src/lib.rs CHANGED
@@ -1,15 +1,15 @@
1
+ #![windows_subsystem = "windows"]
1
2
  #![deny(clippy::all)]
2
3
  #![deny(unsafe_op_in_unsafe_fn)]
3
4
  pub mod file_utils;
4
- pub mod git_utils;
5
+ pub mod logger;
5
6
  pub mod merkle_tree;
6
7
 
7
- use std::vec;
8
+ use std::{collections::HashSet, vec};
8
9
 
10
+ use anyhow::Context;
9
11
  use merkle_tree::{LocalConstruction, MerkleTree};
10
- use tracing::{info, Level};
11
- use tracing_appender::rolling::{RollingFileAppender, Rotation};
12
- use tracing_subscriber::fmt;
12
+ use tracing::{debug, info};
13
13
 
14
14
  #[macro_use]
15
15
  extern crate napi_derive;
@@ -17,47 +17,101 @@ extern crate napi_derive;
17
17
  #[napi]
18
18
  pub struct MerkleClient {
19
19
  tree: MerkleTree,
20
- root_directory: String,
21
- _guard: tracing_appender::non_blocking::WorkerGuard,
22
- }
23
-
24
- pub fn init_logger() -> tracing_appender::non_blocking::WorkerGuard {
25
- let file_appender =
26
- RollingFileAppender::new(Rotation::NEVER, "./", "rust_log.txt");
27
- let (non_blocking, _guard) = tracing_appender::non_blocking(file_appender);
28
- let subscriber = fmt::Subscriber::builder()
29
- .with_max_level(Level::TRACE)
30
- .with_writer(non_blocking)
31
- .with_ansi(false)
32
- .with_line_number(true)
33
- .finish();
34
-
35
- let _ = tracing::subscriber::set_global_default(subscriber);
36
-
37
- _guard
20
+ absolute_root_directory: String,
21
+ _guard: Option<logger::GuardType>,
38
22
  }
39
23
 
40
24
  #[napi]
41
25
  impl MerkleClient {
42
26
  #[napi(constructor)]
43
- pub fn new(root_directory: String) -> MerkleClient {
44
- let _guard = init_logger();
27
+ pub fn new(absolute_root_directory: String) -> MerkleClient {
28
+ let _guard = logger::init_logger();
29
+
30
+ // let canonical_root_directory = std::path::Path::new(&absolute_root_directory);
31
+ // use dunce::canonicalize;
32
+ // let canonical_root_directory = match dunce::canonicalize(&canonical_root_directory) {
33
+ // Ok(path) => path.to_str().unwrap_or(&absolute_root_directory).to_string().to_lowercase(),
34
+ // Err(e) => {
35
+ // info!("Error in canonicalizing path: path: {:?}, error {:?}", canonical_root_directory, e);
36
+ // absolute_root_directory
37
+ // }
38
+ // };
45
39
 
46
40
  MerkleClient {
47
41
  tree: MerkleTree::empty_tree(),
48
- root_directory,
42
+ absolute_root_directory,
49
43
  _guard,
50
44
  }
51
45
  }
52
46
 
47
+ #[napi]
48
+ pub async fn is_too_big(
49
+ &self,
50
+ max_files: i32,
51
+ git_ignored_files: Vec<String>,
52
+ is_git_repo: bool,
53
+ ) -> bool {
54
+ let git_ignored_set =
55
+ HashSet::<String>::from_iter(git_ignored_files.into_iter());
56
+ let mut num_files = 0;
57
+ let mut dirs_to_check = vec![self.absolute_root_directory.clone()];
58
+
59
+ while let Some(dir) = dirs_to_check.pop() {
60
+ info!("dir: {:?}", dir);
61
+ let mut entries = match tokio::fs::read_dir(&dir).await {
62
+ Ok(entries) => entries,
63
+ Err(_) => continue,
64
+ };
65
+ if num_files > max_files {
66
+ return true;
67
+ }
68
+
69
+
70
+ while let Some(entry) = entries.next_entry().await.unwrap_or(None) {
71
+ let path = entry.path();
72
+ info!("entry: {:?}", path);
73
+ let path_str = match path.to_str() {
74
+ Some(path_str) => path_str.to_string(),
75
+ None => continue,
76
+ };
77
+
78
+ if git_ignored_set.contains(&path_str) {
79
+ continue;
80
+ }
81
+
82
+ match entry.file_type().await {
83
+ Ok(file_type) => {
84
+ if file_type.is_dir() {
85
+ dirs_to_check.push(path_str);
86
+ }
87
+
88
+ if file_type.is_file() {
89
+ num_files += 1;
90
+ }
91
+ }
92
+ Err(_) => continue,
93
+ }
94
+
95
+ }
96
+ }
97
+ num_files > max_files
98
+ }
99
+
53
100
  #[napi]
54
- pub async unsafe fn init(&mut self) -> Result<(), napi::Error> {
101
+ pub async unsafe fn init(
102
+ &mut self,
103
+ git_ignored_files: Vec<String>,
104
+ is_git_repo: bool,
105
+ ) -> Result<(), napi::Error> {
55
106
  // 1. compute the merkle tree
56
107
  // 2. update the backend
57
108
  // 3. sync with the remote
58
109
  info!("Merkle tree compute started!");
110
+ info!("Root directory: {:?}", self.absolute_root_directory);
59
111
  unsafe {
60
- self.compute_merkle_tree().await?;
112
+ self
113
+ .compute_merkle_tree(git_ignored_files, is_git_repo)
114
+ .await?;
61
115
  }
62
116
 
63
117
  Ok(())
@@ -70,9 +124,24 @@ impl MerkleClient {
70
124
  #[napi]
71
125
  pub async unsafe fn compute_merkle_tree(
72
126
  &mut self,
127
+ git_ignored_files: Vec<String>,
128
+ is_git_repo: bool,
73
129
  ) -> Result<(), napi::Error> {
74
- let t =
75
- MerkleTree::construct_merkle_tree(self.root_directory.clone()).await;
130
+ // make the git ignored files into a hash set
131
+ let mut git_ignored_set = HashSet::from_iter(git_ignored_files.into_iter());
132
+
133
+ // if the hashset itself contains the root directory, then we should remove it.
134
+ // this is because the root directory is not a file, and we don't want to ignore it.
135
+ if git_ignored_set.contains(&self.absolute_root_directory) {
136
+ git_ignored_set.remove(&self.absolute_root_directory);
137
+ }
138
+
139
+ let t = MerkleTree::construct_merkle_tree(
140
+ self.absolute_root_directory.clone(),
141
+ git_ignored_set,
142
+ is_git_repo,
143
+ )
144
+ .await;
76
145
 
77
146
  match t {
78
147
  Ok(tree) => {
@@ -101,24 +170,50 @@ impl MerkleClient {
101
170
  &self,
102
171
  relative_path: String,
103
172
  ) -> Result<String, napi::Error> {
104
- let absolute_path =
105
- std::path::Path::new(&self.root_directory).join(&relative_path);
173
+ debug!("get_subtree_hash: relative_path: {:?}", relative_path);
106
174
 
107
- let canonical_path = match absolute_path.canonicalize() {
108
- Ok(path) => path,
109
- Err(e) => return Err(napi::Error::new(
110
- napi::Status::Unknown,
111
- format!("Error in canonicalizing path: {:?}", e),
112
- )),
175
+ let relative_path_without_leading_slash = match relative_path
176
+ .strip_prefix('.')
177
+ {
178
+ Some(path) => path.strip_prefix(std::path::MAIN_SEPARATOR).unwrap_or(""),
179
+ None => relative_path.as_str(),
180
+ };
181
+ debug!(
182
+ "relative_path_without_leading_slash: {:?}",
183
+ relative_path_without_leading_slash
184
+ );
185
+
186
+ let absolute_path = if !relative_path_without_leading_slash.is_empty() {
187
+ std::path::Path::new(&self.absolute_root_directory)
188
+ .join(relative_path_without_leading_slash)
189
+ } else {
190
+ std::path::Path::new(&self.absolute_root_directory).to_path_buf()
113
191
  };
114
192
 
115
- let hash = self.tree.get_subtree_hash(canonical_path.clone()).await;
193
+ debug!("absolute_path: {:?}", absolute_path);
194
+
195
+ let absolute_path_string = match absolute_path.to_str() {
196
+ Some(path) => path.to_string(),
197
+ None => {
198
+ return Err(napi::Error::new(
199
+ napi::Status::Unknown,
200
+ format!("some string error"),
201
+ ))
202
+ }
203
+ };
204
+
205
+ debug!("absolute_path_string: {:?}", absolute_path_string);
206
+
207
+ let hash = self
208
+ .tree
209
+ .get_subtree_hash(absolute_path_string.as_str())
210
+ .await;
116
211
 
117
212
  match hash {
118
213
  Ok(hash) => Ok(hash),
119
214
  Err(e) => Err(napi::Error::new(
120
215
  napi::Status::Unknown,
121
- format!("Error in get_subtree_hash. \nRelative path: {:?}, \nAbsolute path: {:?}, \nCanonical path: {:?}, \nRoot directory: {:?}\nError: {:?}", &relative_path, absolute_path, canonical_path, self.root_directory, e),
216
+ format!("Error in get_subtree_hash. \nRelative path: {:?}, \nAbsolute path: {:?}, \nRoot directory: {:?}\nError: {:?}", &relative_path, absolute_path, self.absolute_root_directory, e)
122
217
  )),
123
218
  }
124
219
  }
@@ -140,7 +235,7 @@ impl MerkleClient {
140
235
  &self,
141
236
  relative_path: String,
142
237
  ) -> Result<i32, napi::Error> {
143
- let absolute_path = std::path::Path::new(&self.root_directory)
238
+ let absolute_path = std::path::Path::new(&self.absolute_root_directory)
144
239
  .join(relative_path)
145
240
  .canonicalize()?;
146
241
 
@@ -176,10 +271,12 @@ impl MerkleClient {
176
271
  &self,
177
272
  absolute_file_path: String,
178
273
  ) -> Result<Vec<String>, napi::Error> {
179
- let absolute_path_str = absolute_file_path.as_str();
274
+ // let absolute_path = absolute_file_path.to_lowercase();
275
+ // let absolute_path_str = absolute_path.as_str();
276
+
180
277
  let files = self
181
278
  .tree
182
- .get_all_dir_files_to_embed(absolute_path_str)
279
+ .get_all_dir_files_to_embed(absolute_file_path.as_str())
183
280
  .await;
184
281
 
185
282
  match files {
@@ -220,8 +317,9 @@ impl MerkleClient {
220
317
  &self,
221
318
  absolute_file_path: String,
222
319
  ) -> Result<Vec<String>, napi::Error> {
223
- let absolute_path_str = absolute_file_path.as_str();
224
- let spline = self.tree.get_spline(absolute_path_str).await;
320
+ // let absolute_path = absolute_file_path.to_lowercase();
321
+ // let absolute_path_str = absolute_path.as_str();
322
+ let spline = self.tree.get_spline(absolute_file_path.as_str()).await;
225
323
 
226
324
  match spline {
227
325
  Ok(spline) => Ok(spline),
@@ -250,6 +348,6 @@ impl MerkleClient {
250
348
 
251
349
  #[napi]
252
350
  pub fn update_root_directory(&mut self, root_directory: String) {
253
- self.root_directory = root_directory;
351
+ self.absolute_root_directory = root_directory;
254
352
  }
255
353
  }
package/src/logger.rs ADDED
@@ -0,0 +1,55 @@
1
+ use tracing::{info, subscriber, Level};
2
+ use tracing_appender::non_blocking::WorkerGuard;
3
+ use tracing_appender::rolling::{RollingFileAppender, Rotation};
4
+ use tracing_subscriber::fmt;
5
+ use tracing_subscriber::prelude::*;
6
+
7
+ pub enum GuardType {
8
+ #[cfg(all(not(feature = "debugfile"), not(target_os = "linux")))]
9
+ Guard(tracing_axiom::Guard),
10
+ WorkerGuard(tracing_appender::non_blocking::WorkerGuard),
11
+ }
12
+
13
+ pub fn init_logger() -> Option<GuardType> {
14
+ #[cfg(feature = "debugfile")]
15
+ let _guard = {
16
+ let file_appender =
17
+ RollingFileAppender::new(Rotation::NEVER, "./", "rust_log.txt");
18
+ let (non_blocking, _guard) = tracing_appender::non_blocking(file_appender);
19
+ let subscriber = fmt::Subscriber::builder()
20
+ .with_max_level(Level::TRACE)
21
+ .with_writer(non_blocking)
22
+ .with_ansi(false)
23
+ .with_line_number(true)
24
+ .finish();
25
+
26
+ let _ = tracing::subscriber::set_global_default(subscriber);
27
+
28
+ Some(GuardType::WorkerGuard(_guard))
29
+ };
30
+
31
+ #[cfg(all(not(feature = "debugfile"), not(target_os = "linux")))]
32
+ let _guard = {
33
+ let (axiom_layer, _guard) = tracing_axiom::builder()
34
+ .with_token("xaat-a51088e6-7889-41c0-b440-cfd4601acdd7")
35
+ .with_dataset("local-indexing")
36
+ .layer()
37
+ .ok()?;
38
+ // let fmt_layer = fmt::layer().with_level(true).with_ansi(false).with_line_number(true);
39
+
40
+ let _ = tracing_subscriber::registry()
41
+ .with(axiom_layer)
42
+ .try_init()
43
+ .ok()?;
44
+ // let _ = tracing::subscriber::set_global_default(subscriber);
45
+
46
+ info!("Tracing initialized! in rust");
47
+
48
+ Some(GuardType::Guard(_guard))
49
+ };
50
+
51
+ #[cfg(all(not(feature = "debugfile"), target_os = "linux"))]
52
+ let _guard = { None };
53
+
54
+ _guard
55
+ }
@@ -1,4 +1,3 @@
1
- use crate::git_utils;
2
1
  use crate::merkle_tree::{
3
2
  File, MerkleNode, MerkleNodePtr, NodeType, PinnedFuture,
4
3
  };
@@ -10,11 +9,18 @@ use tonic::async_trait;
10
9
 
11
10
  #[async_trait]
12
11
  impl LocalConstruction for MerkleTree {
12
+ #[tracing::instrument]
13
13
  async fn new(
14
14
  root_directory: Option<String>,
15
15
  ) -> Result<MerkleTree, anyhow::Error> {
16
+ let git_ignored_files = HashSet::<String>::new();
16
17
  if let Some(root_directory) = root_directory {
17
- let n = MerkleTree::construct_merkle_tree(root_directory).await;
18
+ let n = MerkleTree::construct_merkle_tree(
19
+ root_directory,
20
+ git_ignored_files,
21
+ false,
22
+ )
23
+ .await;
18
24
  return n;
19
25
  }
20
26
 
@@ -30,6 +36,8 @@ impl LocalConstruction for MerkleTree {
30
36
  /// 4. return merkle tree
31
37
  async fn construct_merkle_tree(
32
38
  absolute_path_to_root_directory: String,
39
+ git_ignored_files_and_dirs: HashSet<String>,
40
+ is_git_repo: bool,
33
41
  ) -> Result<MerkleTree, anyhow::Error> {
34
42
  let path = PathBuf::from(absolute_path_to_root_directory.clone());
35
43
  if !path.exists() {
@@ -38,27 +46,31 @@ impl LocalConstruction for MerkleTree {
38
46
  }
39
47
 
40
48
  // 1. get all the gitignored files
41
- let git_ignored_files = match git_utils::list_ignored_files(
42
- absolute_path_to_root_directory.as_str(),
43
- true,
44
- ) {
45
- Ok(git_ignored) => git_ignored,
46
- Err(_e) => HashSet::new(),
47
- };
49
+ // let git_ignored_files_and_dirs =
50
+ // match git_utils::list_ignored_files_and_directories(
51
+ // absolute_path_to_root_directory.as_str(),
52
+ // true,
53
+ // ) {
54
+ // Ok(git_ignored) => git_ignored,
55
+ // Err(_e) => HashSet::new(),
56
+ // };
48
57
 
49
58
  let root_node = MerkleNode::new(
50
59
  path,
51
60
  None,
52
- &git_ignored_files,
61
+ &git_ignored_files_and_dirs,
53
62
  absolute_path_to_root_directory.as_str(),
63
+ is_git_repo,
54
64
  )
55
65
  .await;
66
+
56
67
  let mut mt = MerkleTree {
57
68
  root: root_node,
58
69
  files: BTreeMap::new(),
59
70
  root_path: absolute_path_to_root_directory,
60
71
  cursor: None,
61
- git_ignored_files,
72
+ git_ignored_files_and_dirs,
73
+ is_git_repo,
62
74
  };
63
75
 
64
76
  // we now iterate over all the nodes and add them to the hashmap
@@ -71,7 +83,6 @@ impl LocalConstruction for MerkleTree {
71
83
  let node_reader = node.read().await;
72
84
  match &node_reader.node_type {
73
85
  NodeType::Branch(n) => {
74
- tracing::info!("Branch: {:?}", n.0);
75
86
  let children = &n.1;
76
87
  files.insert(n.0.clone(), File { node: node.clone() });
77
88
  for child in children {
@@ -80,7 +91,13 @@ impl LocalConstruction for MerkleTree {
80
91
  }
81
92
  NodeType::File(file_name) => {
82
93
  let f = File { node: node.clone() };
83
- tracing::info!("File: {:?}", file_name);
94
+
95
+ // i dont reallly like this :(((
96
+ // let canonical_file_name = match dunce::canonicalize(file_name) {
97
+ // Ok(path) => path.to_str().unwrap_or(file_name).to_string(),
98
+ // Err(_) => file_name.clone(),
99
+ // };
100
+
84
101
  files.insert(file_name.clone(), f);
85
102
  }
86
103
  NodeType::ErrorNode(_) => {
@@ -92,8 +109,7 @@ impl LocalConstruction for MerkleTree {
92
109
 
93
110
  add_nodes_to_hashmap(&mt.root, &mut mt.files).await;
94
111
 
95
- tracing::info!("Merkle tree compute finished!");
96
- tracing::info!("Merkle tree: {}", mt);
112
+ tracing::info!("number of files in the tree: {}", mt.files.len());
97
113
 
98
114
  Ok(mt)
99
115
  }
@@ -137,6 +153,7 @@ impl LocalConstruction for MerkleTree {
137
153
  Ok(())
138
154
  }
139
155
 
156
+ #[tracing::instrument]
140
157
  async fn delete_file(
141
158
  &mut self,
142
159
  file_path: String,
@@ -1,24 +1,26 @@
1
- use crate::git_utils;
2
-
3
1
  use super::file_utils;
4
2
  use sha2::Digest;
5
3
  use std::collections::{BTreeMap, HashSet};
6
4
  use std::path::PathBuf;
5
+ use std::vec;
7
6
  use std::{fs, path::Path, sync::Arc};
8
7
  use tokio::sync::RwLock;
9
8
  use tonic::async_trait;
10
- use tracing::info;
9
+ use tracing::{debug, info};
10
+
11
11
  pub mod local_construction;
12
12
  pub mod test;
13
13
 
14
14
  pub type MerkleNodePtr = Arc<RwLock<MerkleNode>>;
15
15
 
16
+ #[derive(Debug)]
16
17
  pub struct MerkleTree {
17
18
  root_path: String,
18
19
  root: MerkleNodePtr,
19
20
  files: BTreeMap<String, File>,
20
21
  cursor: Option<usize>,
21
- git_ignored_files: HashSet<String>,
22
+ git_ignored_files_and_dirs: HashSet<String>,
23
+ is_git_repo: bool,
22
24
  }
23
25
 
24
26
  #[derive(Debug)]
@@ -62,6 +64,8 @@ pub trait LocalConstruction {
62
64
 
63
65
  async fn construct_merkle_tree(
64
66
  root_directory: String,
67
+ git_ignored_files_and_dirs: HashSet<String>,
68
+ is_git_repo: bool,
65
69
  ) -> Result<MerkleTree, anyhow::Error>;
66
70
 
67
71
  async fn update_file(
@@ -95,34 +99,34 @@ impl MerkleTree {
95
99
  files: BTreeMap::new(),
96
100
  root_path: "".to_string(),
97
101
  cursor: None,
98
- git_ignored_files: HashSet::new(),
102
+ git_ignored_files_and_dirs: HashSet::new(),
103
+ is_git_repo: false,
99
104
  }
100
105
  }
101
106
 
102
107
  pub async fn get_subtree_hash(
103
108
  &self,
104
- absolute_path: PathBuf,
109
+ absolute_path: &str,
105
110
  ) -> Result<String, anyhow::Error> {
106
- let abs_string = match absolute_path.to_str() {
107
- Some(s) => s.to_string(),
108
- None => {
109
- return Err(anyhow::anyhow!(
110
- "get_subtree_hash: Failed to convert path to string"
111
- ))
112
- }
113
- };
111
+ debug!("get_subtree_hash: absolute_path: {:?}", absolute_path);
114
112
 
115
- let node = match self.files.get(&abs_string) {
113
+ let node = match self.files.get(absolute_path) {
116
114
  Some(file) => file.node.clone(),
117
115
  None => {
118
116
  let all_files: Vec<String> = self.files.keys().cloned().collect();
119
- return Err(anyhow::anyhow!("Could not find file in tree! Looking for: {}. All files: {:?}", abs_string, all_files));
117
+ return Err(anyhow::anyhow!(
118
+ "Could not find file in tree! Looking for: {}. All files: {:?}",
119
+ absolute_path,
120
+ all_files
121
+ ));
120
122
  }
121
123
  };
122
124
 
123
125
  let node_reader = node.read().await;
124
126
  let node_hash = node_reader.hash.clone();
125
127
 
128
+ debug!("node_hash: {:?}", node_hash);
129
+
126
130
  Ok(node_hash)
127
131
  }
128
132
 
@@ -286,6 +290,12 @@ impl MerkleTree {
286
290
  ) -> Result<Vec<String>, anyhow::Error> {
287
291
  let mut files = Vec::new();
288
292
 
293
+ // 1. should check that this absolute path is actually a directory.
294
+ let file_node = self.files.get(absolute_path);
295
+ if file_node.is_none() {
296
+ return Err(anyhow::anyhow!("Could not find directory the in tree!"));
297
+ }
298
+
289
299
  for (file_path, f) in &self.files {
290
300
  if !file_path.contains(absolute_path) {
291
301
  continue;
@@ -312,16 +322,11 @@ impl MerkleTree {
312
322
  &self,
313
323
  absolute_path: &str,
314
324
  ) -> Result<Vec<String>, anyhow::Error> {
315
- info!("get_spline called with absolute_path: {}", absolute_path);
316
325
  let mut files = Vec::new();
317
326
 
318
327
  let current_node = match self.files.get(absolute_path) {
319
- Some(node) => {
320
- info!("Found node for absolute_path: {}", absolute_path);
321
- node.node.clone()
322
- }
328
+ Some(node) => node.node.clone(),
323
329
  None => {
324
- info!("File not found for absolute_path: {}", absolute_path);
325
330
  return Err(anyhow::anyhow!("File not found: {}", absolute_path));
326
331
  }
327
332
  };
@@ -332,7 +337,6 @@ impl MerkleTree {
332
337
  while let Some(node) = stack.pop() {
333
338
  let parent = node.read().await.parent.clone();
334
339
  if let Some(parent) = parent {
335
- info!("Adding parent hash to files vector");
336
340
  {
337
341
  let parent_node = parent.read().await;
338
342
  match &parent_node.node_type {
@@ -351,7 +355,6 @@ impl MerkleTree {
351
355
  stack.push(parent);
352
356
  }
353
357
  }
354
- info!("Returning files vector with {} elements", files.len());
355
358
  Ok(files)
356
359
  }
357
360
 
@@ -398,8 +401,9 @@ impl MerkleTree {
398
401
  let new_node = MerkleNode::new(
399
402
  file_path.clone(),
400
403
  Some(ancestor.clone()),
401
- &self.git_ignored_files,
404
+ &self.git_ignored_files_and_dirs,
402
405
  &absolute_root_path.as_str(),
406
+ self.is_git_repo,
403
407
  )
404
408
  .await;
405
409
  ancestor.write().await.attach_child(new_node.clone()).await;
@@ -415,8 +419,9 @@ impl MerkleTree {
415
419
  let first_child = MerkleNode::new(
416
420
  first_child_path.clone(),
417
421
  Some(ancestor.clone()),
418
- &self.git_ignored_files,
422
+ &self.git_ignored_files_and_dirs,
419
423
  &absolute_root_path.as_str(),
424
+ self.is_git_repo,
420
425
  )
421
426
  .await;
422
427
 
@@ -704,13 +709,14 @@ impl MerkleNode {
704
709
  parent: ParentPtr,
705
710
  ignored_files: &IgnoredFiles,
706
711
  absolute_root_path: &str,
712
+ is_git_repo: bool,
707
713
  ) -> MerkleNodePtr {
708
- // check if the root is a git directory.
709
- let is_git_repo =
710
- match git_utils::is_git_directory(absolute_root_path).await {
711
- Ok(is_git_repo) => is_git_repo,
712
- Err(e) => false,
713
- };
714
+ // // check if the root is a git directory.
715
+ // let is_git_repo =
716
+ // match git_utils::is_git_directory(absolute_root_path).await {
717
+ // Ok(is_git_repo) => is_git_repo,
718
+ // Err(_e) => false,
719
+ // };
714
720
  let bypass_git = !is_git_repo;
715
721
 
716
722
  MerkleNode::construct_node(
@@ -723,25 +729,20 @@ impl MerkleNode {
723
729
  .await
724
730
  }
725
731
 
732
+ // #[tracing::instrument]
726
733
  async fn new(
727
734
  absolute_file_or_directory: PathBuf,
728
735
  parent: ParentPtr,
729
736
  ignored_files: &IgnoredFiles,
730
737
  absolute_root_path: &str,
738
+ is_git_repo: bool,
731
739
  ) -> MerkleNodePtr {
732
- // check if the root is a git directory.
733
- let is_git_repo =
734
- match git_utils::is_git_directory(absolute_root_path).await {
735
- Ok(is_git_repo) => is_git_repo,
736
- Err(_e) => false,
737
- };
738
740
  let bypass_git = !is_git_repo;
739
741
 
740
742
  info!(
741
743
  "constructing node for absolute_file_or_directory: {:?}",
742
744
  absolute_file_or_directory
743
745
  );
744
- info!("bypass_git: {}, is_git_repo: {}", bypass_git, is_git_repo);
745
746
 
746
747
  MerkleNode::construct_node(
747
748
  Path::new(&absolute_file_or_directory),
@@ -770,6 +771,7 @@ impl MerkleNode {
770
771
  Box::pin(async move {
771
772
  // check if it is a file
772
773
  let path_str = absolute_file_or_directory.to_str().unwrap().to_string();
774
+
773
775
  if absolute_file_or_directory.is_file() {
774
776
  return Arc::new(RwLock::new(
775
777
  MerkleNode::construct_file_node_or_error_node(
@@ -791,26 +793,10 @@ impl MerkleNode {
791
793
  )));
792
794
  }
793
795
 
794
- // check if the directory is git ignored
795
- let is_git_ignored =
796
- match git_utils::is_git_ignored(absolute_root_path, path_str.as_str())
797
- .await
798
- {
799
- Ok(is_git_ignored) => is_git_ignored,
800
- Err(e) => {
801
- return Arc::new(RwLock::new(MerkleNode::empty_node(
802
- Some(absolute_file_or_directory),
803
- Some(e.to_string()),
804
- )));
805
- }
806
- };
796
+ let is_git_ignored_dir = ignored_files.contains(&path_str);
807
797
 
808
- if is_git_ignored && !bypass_git {
809
- // println!("skipping directory: {}", path_str);
810
- tracing::info!(
811
- "skipping directory because its git ignored: {}",
812
- path_str
813
- );
798
+ if is_git_ignored_dir && !bypass_git {
799
+ tracing::info!("skipping directory: {}", path_str);
814
800
  return Arc::new(RwLock::new(MerkleNode::empty_node(
815
801
  Some(absolute_file_or_directory),
816
802
  Some("Directory is git ignored!".to_string()),
@@ -821,6 +807,7 @@ impl MerkleNode {
821
807
  match entries {
822
808
  Ok(_) => (),
823
809
  Err(e) => {
810
+ tracing::error!("error reading directory: {}", e);
824
811
  return Arc::new(RwLock::new(MerkleNode::empty_node(
825
812
  Some(absolute_file_or_directory),
826
813
  Some(e.to_string()),
@@ -853,6 +840,7 @@ impl MerkleNode {
853
840
  );
854
841
  }
855
842
  Err(e) => {
843
+ tracing::error!("error reading directory: {}", e);
856
844
  children.push(Arc::new(RwLock::new(MerkleNode::empty_node(
857
845
  Some(absolute_file_or_directory),
858
846
  Some(e.to_string()),
@@ -899,18 +887,10 @@ impl MerkleNode {
899
887
  false => {}
900
888
  }
901
889
 
902
- // read the file_content to a buffer
903
- let file_content = match tokio::fs::read(absolute_file_path).await {
904
- Ok(content) => content,
905
- Err(e) => {
906
- return Err(format!("Could not read file! {}", e.to_string()));
907
- }
908
- };
909
-
910
890
  // check if the file passes runtime checks.
911
891
  match file_utils::is_good_file_runtime_check(
912
892
  absolute_file_path,
913
- &file_content,
893
+ // &file_content,
914
894
  )
915
895
  .await
916
896
  {
@@ -920,15 +900,14 @@ impl MerkleNode {
920
900
  }
921
901
  }
922
902
 
923
- let file_content = match std::str::from_utf8(&file_content) {
924
- Ok(content) => content,
925
- Err(e) => {
926
- return Err(format!(
927
- "UTF8 Failure. Could not convert file content to string! {}",
928
- e.to_string()
929
- ));
930
- }
931
- };
903
+ // read the file_content to a buffer
904
+ let file_content =
905
+ match file_utils::read_string_without_bom(absolute_file_path).await {
906
+ Ok(content) => content,
907
+ Err(e) => {
908
+ return Err(format!("Could not read file! {}", e.to_string()));
909
+ }
910
+ };
932
911
 
933
912
  let file_hash = compute_hash(&file_content);
934
913
  let node = MerkleNode {
@@ -955,11 +934,7 @@ impl MerkleNode {
955
934
  .await
956
935
  {
957
936
  Ok(node) => node,
958
- Err(e) => {
959
- // println!("constructing error node. error: {}", e);
960
- // println!("file_path: {:?}", file_path);
961
- MerkleNode::empty_node(Some(absolute_file_path), Some(e))
962
- }
937
+ Err(e) => MerkleNode::empty_node(Some(absolute_file_path), Some(e)),
963
938
  };
964
939
 
965
940
  node
@@ -984,15 +959,50 @@ impl MerkleNode {
984
959
 
985
960
  async fn compute_branch_hash(children: &[MerkleNodePtr]) -> String {
986
961
  let mut hasher = sha2::Sha256::new();
962
+ let mut names_and_hashes = vec![];
963
+ let mut non_zero_children = 0;
964
+
987
965
  for child in children {
988
966
  // check if it is an error node
989
967
  let child_reader = child.read().await;
990
- if let NodeType::ErrorNode(_) = &child_reader.node_type {
968
+
969
+ match &child_reader.node_type {
970
+ NodeType::File(file_name) => {
971
+ non_zero_children += 1;
972
+ names_and_hashes.push((file_name.clone(), child_reader.hash.clone()));
973
+ }
974
+ NodeType::Branch((file_name, _)) => {
975
+ let hash = child_reader.hash.clone();
976
+ if hash == "" {
977
+ continue;
978
+ }
979
+
980
+ non_zero_children += 1;
981
+ names_and_hashes.push((file_name.clone(), hash));
982
+ }
983
+ NodeType::ErrorNode(_) => {
984
+ continue;
985
+ }
986
+ }
987
+ }
988
+
989
+ // sort the list of names and hashes by the hashes!!
990
+ names_and_hashes
991
+ .sort_by(|a, b| a.1.to_lowercase().cmp(&b.1.to_lowercase()));
992
+
993
+ for (name, hash) in names_and_hashes {
994
+ if hash == "" {
991
995
  continue;
992
996
  }
997
+ hasher.update(hash);
998
+ }
993
999
 
994
- hasher.update(child_reader.hash.as_bytes());
1000
+ if non_zero_children == 0 {
1001
+ // this means that the branch is empty.
1002
+ // we should return an empty string.
1003
+ return "".to_string();
995
1004
  }
1005
+
996
1006
  let result = hasher.finalize();
997
1007
  format!("{:x}", result)
998
1008
  }
@@ -43,8 +43,9 @@ mod tests {
43
43
  // let path = Path::new(&temp_dir_path);
44
44
 
45
45
  // Test construct_merkle_tree() function
46
+ let new_set = std::collections::HashSet::<String>::new();
46
47
  let tree =
47
- MerkleTree::construct_merkle_tree(temp_dir_path.clone()).await;
48
+ MerkleTree::construct_merkle_tree(temp_dir_path.clone(), new_set, false).await;
48
49
  let mut tree = match tree {
49
50
  Ok(tree) => {
50
51
  assert_eq!(tree.files.len(), 2);