@anysphere/file-service 0.0.0-dbd43428 → 0.0.0-e15bb6ec

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/Cargo.toml CHANGED
@@ -6,6 +6,11 @@ version = "0.0.0"
6
6
  [lib]
7
7
  crate-type = ["cdylib"]
8
8
 
9
+ [features]
10
+ default = ["windows-subsystem"]
11
+ windows-subsystem = []
12
+ debugfile = []
13
+
9
14
  [dependencies]
10
15
  # Default enable napi4 feature, see https://nodejs.org/api/n-api.html#node-api-version-matrix
11
16
  napi = { version = "2.12.2", default-features = false, features = ["napi4", "async", "tokio_rt"] }
@@ -20,6 +25,12 @@ prost = "0.11.9"
20
25
  tracing = "0.1.37"
21
26
  tracing-subscriber = "0.3.17"
22
27
  tracing-appender = "0.2.2"
28
+ binaryornot = "1.0.0"
29
+ dunce = "1.0.1"
30
+ encoding_rs = "0.8.33"
31
+
32
+ [target.'cfg(not(target_os = "linux"))'.dependencies]
33
+ tracing-axiom = "0.4"
23
34
 
24
35
  [build-dependencies]
25
36
  napi-build = "2.0.1"
@@ -27,5 +38,6 @@ tonic-build = "0.9.2"
27
38
  anyhow = "1.0.75"
28
39
  glob = "0.3.0"
29
40
 
41
+
30
42
  [profile.release]
31
43
  lto = true
package/build.rs CHANGED
@@ -3,6 +3,8 @@ use std::path::Path;
3
3
  extern crate napi_build;
4
4
 
5
5
  fn main() -> Result<(), anyhow::Error> {
6
+ #[cfg(target_os = "windows")]
7
+ println!("cargo:rustc-cdylib-link-arg=/SUBSYSTEM:WINDOWS");
6
8
  napi_build::setup();
7
9
 
8
10
  // print the relative path.
package/index.d.ts CHANGED
@@ -4,9 +4,10 @@
4
4
  /* auto-generated by NAPI-RS */
5
5
 
6
6
  export class MerkleClient {
7
- constructor(rootDirectory: string)
8
- init(): Promise<void>
9
- computeMerkleTree(): Promise<void>
7
+ constructor(absoluteRootDirectory: string)
8
+ isTooBig(maxFiles: number, gitIgnoredFiles: Array<string>, isGitRepo: boolean): Promise<boolean>
9
+ init(gitIgnoredFiles: Array<string>, isGitRepo: boolean): Promise<void>
10
+ computeMerkleTree(gitIgnoredFiles: Array<string>, isGitRepo: boolean): Promise<void>
10
11
  updateFile(filePath: string): Promise<void>
11
12
  deleteFile(filePath: string): Promise<void>
12
13
  getSubtreeHash(relativePath: string): Promise<string>
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@anysphere/file-service",
3
- "version": "0.0.0-dbd43428",
3
+ "version": "0.0.0-e15bb6ec",
4
4
  "main": "index.js",
5
5
  "types": "index.d.ts",
6
6
  "napi": {
@@ -9,7 +9,8 @@
9
9
  "additional": [
10
10
  "aarch64-apple-darwin",
11
11
  "aarch64-pc-windows-msvc",
12
- "universal-apple-darwin"
12
+ "universal-apple-darwin",
13
+ "aarch64-unknown-linux-gnu"
13
14
  ]
14
15
  }
15
16
  },
@@ -35,11 +36,12 @@
35
36
  "version": "napi version"
36
37
  },
37
38
  "optionalDependencies": {
38
- "@anysphere/file-service-win32-x64-msvc": "0.0.0-dbd43428",
39
- "@anysphere/file-service-darwin-x64": "0.0.0-dbd43428",
40
- "@anysphere/file-service-linux-x64-gnu": "0.0.0-dbd43428",
41
- "@anysphere/file-service-darwin-arm64": "0.0.0-dbd43428",
42
- "@anysphere/file-service-win32-arm64-msvc": "0.0.0-dbd43428",
43
- "@anysphere/file-service-darwin-universal": "0.0.0-dbd43428"
39
+ "@anysphere/file-service-win32-x64-msvc": "0.0.0-e15bb6ec",
40
+ "@anysphere/file-service-darwin-x64": "0.0.0-e15bb6ec",
41
+ "@anysphere/file-service-linux-x64-gnu": "0.0.0-e15bb6ec",
42
+ "@anysphere/file-service-darwin-arm64": "0.0.0-e15bb6ec",
43
+ "@anysphere/file-service-win32-arm64-msvc": "0.0.0-e15bb6ec",
44
+ "@anysphere/file-service-darwin-universal": "0.0.0-e15bb6ec",
45
+ "@anysphere/file-service-linux-arm64-gnu": "0.0.0-e15bb6ec"
44
46
  }
45
47
  }
package/src/file_utils.rs CHANGED
@@ -5,6 +5,7 @@
5
5
  // 4. vscode.fs.stat
6
6
 
7
7
  use anyhow::Error;
8
+ use encoding_rs::UTF_8;
8
9
  use std::path::Path;
9
10
  use tokio::fs;
10
11
 
@@ -43,8 +44,21 @@ pub fn is_good_file(file_path: &Path) -> Result<(), Error> {
43
44
  _ => {}
44
45
  }
45
46
 
46
- match extension {
47
- "lock" | "bak" | "tmp" | "bin" | "exe" | "dll" | "so" | "lockb" => {
47
+ let bad_extensions = vec![
48
+ "lock", "bak", "tmp", "bin", "exe", "dll", "so", "lockb", "qwoff", "isl",
49
+ "csv", "pdf", // add ms word, excel, powerpoint, etc.
50
+ "doc", "docx", "xls", "xlsx", "ppt", "pptx", "odt", "ods", "odp", "odg",
51
+ "odf", "sxw", "sxc", "sxi", "sxd", "sdc", // add images
52
+ "jpg", "jpeg", "png", "gif", "bmp", "tif", // add audio
53
+ "mp3", "wav", "wma", "ogg", "flac", "aac", // add video
54
+ "mp4", "mov", "wmv", "flv", "avi", // add archives
55
+ "zip", "tar", "gz", "7z", "rar", "tgz", "dmg", "iso", "cue", "mdf", "mds",
56
+ "vcd", "toast", "img", "apk", "msi", "cab", "tar.gz", "tar.xz", "tar.bz2",
57
+ "tar.lzma", "tar.Z", "tar.sz", "lzma", // add fonts
58
+ "ttf", "otf", "woff", "woff2", "eot", "webp", "vsix",
59
+ ];
60
+ match bad_extensions.contains(&extension) {
61
+ true => {
48
62
  return Err(anyhow::anyhow!("File is just a lock file"));
49
63
  }
50
64
  _ => {}
@@ -62,7 +76,7 @@ pub fn is_good_file(file_path: &Path) -> Result<(), Error> {
62
76
  Some(extension) => match extension.to_str() {
63
77
  Some(ext_str) => {
64
78
  if bad_extensions.contains(&ext_str) {
65
- return Err(anyhow::anyhow!("File is not a valid UTF-8 string"));
79
+ return Err(anyhow::anyhow!("Binary file excluded from indexing."));
66
80
  }
67
81
  }
68
82
  None => {
@@ -88,10 +102,12 @@ pub fn is_good_file(file_path: &Path) -> Result<(), Error> {
88
102
  Ok(())
89
103
  }
90
104
 
105
+ // use binaryornot::is_binary;
106
+ // use anyhow::Context;
91
107
  // implement the buffer above:
92
108
  pub async fn is_good_file_runtime_check(
93
109
  file_path: &Path,
94
- buffer: &[u8],
110
+ // _buffer: &[u8],
95
111
  ) -> Result<(), Error> {
96
112
  match get_file_size(file_path).await {
97
113
  Ok(size) if size > 2 * 1024 * 1024 => {
@@ -101,16 +117,31 @@ pub async fn is_good_file_runtime_check(
101
117
  _ => {}
102
118
  }
103
119
 
104
- for &byte in buffer.iter().take(2048) {
105
- if byte.is_ascii() {
106
- continue;
107
- } else {
108
- return Err(anyhow::anyhow!("File is not a valid UTF-8 string"));
109
- }
110
- }
120
+ // if is_binary(file_path).context("Failed to check if file is binary")? {
121
+ // return Err(anyhow::anyhow!("File is binary"));
122
+ // }
123
+
111
124
  Ok(())
112
125
  }
113
126
 
127
+ pub async fn read_string_without_bom(
128
+ file_path: &Path,
129
+ ) -> Result<String, Error> {
130
+ let file_buffer = match fs::read(file_path).await {
131
+ Ok(buffer) => buffer,
132
+ Err(e) => {
133
+ return Err(anyhow::anyhow!(
134
+ "Failed to read file buffer: {}",
135
+ e.to_string()
136
+ ))
137
+ }
138
+ };
139
+
140
+ let (cow, _) = UTF_8.decode_with_bom_removal(&file_buffer);
141
+
142
+ Ok(cow.to_string())
143
+ }
144
+
114
145
  pub fn as_relative_path(
115
146
  base_path: &Path,
116
147
  file_path: &Path,
@@ -168,25 +199,40 @@ mod tests {
168
199
  temp_file.write_all(b"Hello, world!").await.unwrap();
169
200
  let buffer = fs::read(&temp_file_path).await.unwrap();
170
201
  assert_eq!(
171
- is_good_file_runtime_check(&temp_file_path, &buffer)
172
- .await
173
- .is_ok(),
202
+ is_good_file_runtime_check(&temp_file_path).await.is_ok(),
174
203
  true
175
204
  );
176
205
  temp_dir.close().unwrap();
177
206
 
207
+ // let temp_dir = tempfile::tempdir().unwrap();
208
+ // let temp_file_path = temp_dir.path().join("test_file");
209
+ // let mut temp_file = fs::File::create(&temp_file_path).await.unwrap();
210
+ // temp_file.write_all(&[0, 159, 146, 150]).await.unwrap(); // Invalid UTF-8 sequence
211
+ // let buffer = fs::read(&temp_file_path).await.unwrap();
212
+ // assert_eq!(
213
+ // is_good_file_runtime_check(&temp_file_path).await.is_err(),
214
+ // true
215
+ // );
216
+ // temp_dir.close().unwrap();
217
+ }
218
+
219
+ #[tokio::test]
220
+ async fn test_bom_file() {
221
+ const BOM: [u8; 3] = [0xEF, 0xBB, 0xBF];
222
+ const CONTENT: &str = "Hello, world!";
223
+
224
+ // Write this to a temp file
178
225
  let temp_dir = tempfile::tempdir().unwrap();
179
226
  let temp_file_path = temp_dir.path().join("test_file");
180
227
  let mut temp_file = fs::File::create(&temp_file_path).await.unwrap();
181
- temp_file.write_all(&[0, 159, 146, 150]).await.unwrap(); // Invalid UTF-8 sequence
182
- let buffer = fs::read(&temp_file_path).await.unwrap();
183
- assert_eq!(
184
- is_good_file_runtime_check(&temp_file_path, &buffer)
185
- .await
186
- .is_err(),
187
- true
188
- );
189
- temp_dir.close().unwrap();
228
+ temp_file.write_all(&BOM).await.unwrap();
229
+ temp_file.write_all(CONTENT.as_bytes()).await.unwrap();
230
+
231
+ // expect that we read the file with tokio as the CONTENT
232
+ let file_contents = read_string_without_bom(&temp_file_path).await.unwrap();
233
+
234
+ // Check string equality of CONTENT (&str) to file_contents (String)
235
+ assert_eq!(CONTENT, file_contents);
190
236
  }
191
237
 
192
238
  #[test]
package/src/git_utils.rs CHANGED
@@ -1,7 +1,8 @@
1
1
  use std::collections::HashSet;
2
+ use std::path::MAIN_SEPARATOR_STR;
2
3
  use std::process::Command;
3
4
 
4
- pub fn list_ignored_files(
5
+ pub fn list_ignored_files_and_directories(
5
6
  workspace_root_path: &str,
6
7
  should_return_absolute_paths: bool,
7
8
  ) -> Result<HashSet<String>, Box<dyn std::error::Error>> {
@@ -14,12 +15,12 @@ pub fn list_ignored_files(
14
15
  "--others",
15
16
  "--ignored",
16
17
  "--exclude-standard",
18
+ "--directory",
19
+ "--no-empty-directory"
17
20
  ],
18
21
  // FIXME(sualeh): this is super sketchy and might totally break in like a bazillion ways. i dont like it.
19
22
  vec![
20
- "sh",
21
- "-c",
22
- "git submodule foreach --quiet 'git ls-files --others --ignored --exclude-standard | sed \"s|^|$path/|\"'",
23
+ "git submodule foreach --quiet 'git -C $toplevel/$path ls-files --others --ignored --exclude-standard --directory --no-empty-directory | (while read line; do echo $path/$line; done)'",
23
24
  ],
24
25
  ];
25
26
 
@@ -34,9 +35,12 @@ pub fn list_ignored_files(
34
35
  .lines()
35
36
  .filter(|line| !line.is_empty())
36
37
  .map(|line| {
38
+ let line = line.replace("/", MAIN_SEPARATOR_STR);
39
+
37
40
  if should_return_absolute_paths {
38
41
  let mut path = std::path::PathBuf::from(workspace_root_path);
39
- path.push(line);
42
+
43
+ path.push(line.clone());
40
44
 
41
45
  match path.canonicalize() {
42
46
  Ok(canonical_path) => {
@@ -133,7 +137,8 @@ mod tests {
133
137
  fn test_no_ignored_files() {
134
138
  let dir = tempfile::tempdir().unwrap();
135
139
  let gitignored_files =
136
- list_ignored_files(dir.path().to_str().unwrap(), false).unwrap();
140
+ list_ignored_files_and_directories(dir.path().to_str().unwrap(), false)
141
+ .unwrap();
137
142
  Command::new("git")
138
143
  .args(&["init"])
139
144
  .current_dir(dir.path())
@@ -160,7 +165,8 @@ mod tests {
160
165
  .output()
161
166
  .unwrap();
162
167
  let gitignored_files =
163
- list_ignored_files(dir.path().to_str().unwrap(), false).unwrap();
168
+ list_ignored_files_and_directories(dir.path().to_str().unwrap(), false)
169
+ .unwrap();
164
170
  println!(
165
171
  "ignored files for test_one_ignored_file: {:?}",
166
172
  gitignored_files
@@ -190,7 +196,8 @@ mod tests {
190
196
  .output()
191
197
  .unwrap();
192
198
  let gitignored_files =
193
- list_ignored_files(dir.path().to_str().unwrap(), false).unwrap();
199
+ list_ignored_files_and_directories(dir.path().to_str().unwrap(), false)
200
+ .unwrap();
194
201
  println!(
195
202
  "ignored files for test_multiple_ignored_files: {:?}",
196
203
  gitignored_files
@@ -254,7 +261,8 @@ mod tests {
254
261
  println!("git submodule add output: {:?}", o);
255
262
 
256
263
  let gitignored_files =
257
- list_ignored_files(dir.path().to_str().unwrap(), false).unwrap();
264
+ list_ignored_files_and_directories(dir.path().to_str().unwrap(), false)
265
+ .unwrap();
258
266
  println!(
259
267
  "ignored files for test_git_submodule_ignored_files: {:?}",
260
268
  gitignored_files
@@ -265,7 +273,8 @@ mod tests {
265
273
 
266
274
  #[test]
267
275
  fn test_multiple_ignored_files_in_current_dir() {
268
- let gitignored_files = list_ignored_files(".", false).unwrap();
276
+ let gitignored_files =
277
+ list_ignored_files_and_directories(".", false).unwrap();
269
278
  assert!(gitignored_files.len() > 1);
270
279
 
271
280
  // print a sample of the ignored files
package/src/lib.rs CHANGED
@@ -1,15 +1,15 @@
1
+ #![windows_subsystem = "windows"]
1
2
  #![deny(clippy::all)]
2
3
  #![deny(unsafe_op_in_unsafe_fn)]
3
4
  pub mod file_utils;
4
- pub mod git_utils;
5
+ pub mod logger;
5
6
  pub mod merkle_tree;
6
7
 
7
- use std::vec;
8
+ use std::{collections::HashSet, vec};
8
9
 
10
+ use anyhow::Context;
9
11
  use merkle_tree::{LocalConstruction, MerkleTree};
10
- use tracing::{info, Level};
11
- use tracing_appender::rolling::{RollingFileAppender, Rotation};
12
- use tracing_subscriber::fmt;
12
+ use tracing::{debug, info};
13
13
 
14
14
  #[macro_use]
15
15
  extern crate napi_derive;
@@ -17,46 +17,101 @@ extern crate napi_derive;
17
17
  #[napi]
18
18
  pub struct MerkleClient {
19
19
  tree: MerkleTree,
20
- root_directory: String,
21
- _guard: tracing_appender::non_blocking::WorkerGuard,
22
- }
23
-
24
- pub fn init_logger() -> tracing_appender::non_blocking::WorkerGuard {
25
- let file_appender =
26
- RollingFileAppender::new(Rotation::NEVER, "./", "rust_log.txt");
27
- let (non_blocking, _guard) = tracing_appender::non_blocking(file_appender);
28
- let subscriber = fmt::Subscriber::builder()
29
- .with_max_level(Level::TRACE)
30
- .with_writer(non_blocking)
31
- .with_ansi(false)
32
- .with_line_number(true)
33
- .finish();
34
-
35
- let _ = tracing::subscriber::set_global_default(subscriber);
36
-
37
- _guard
20
+ absolute_root_directory: String,
21
+ _guard: Option<logger::GuardType>,
38
22
  }
39
23
 
40
24
  #[napi]
41
25
  impl MerkleClient {
42
26
  #[napi(constructor)]
43
- pub fn new(root_directory: String) -> MerkleClient {
44
- let _guard = init_logger();
27
+ pub fn new(absolute_root_directory: String) -> MerkleClient {
28
+ let _guard = logger::init_logger();
29
+
30
+ // let canonical_root_directory = std::path::Path::new(&absolute_root_directory);
31
+ // use dunce::canonicalize;
32
+ // let canonical_root_directory = match dunce::canonicalize(&canonical_root_directory) {
33
+ // Ok(path) => path.to_str().unwrap_or(&absolute_root_directory).to_string().to_lowercase(),
34
+ // Err(e) => {
35
+ // info!("Error in canonicalizing path: path: {:?}, error {:?}", canonical_root_directory, e);
36
+ // absolute_root_directory
37
+ // }
38
+ // };
45
39
 
46
40
  MerkleClient {
47
41
  tree: MerkleTree::empty_tree(),
48
- root_directory,
42
+ absolute_root_directory,
49
43
  _guard,
50
44
  }
51
45
  }
52
46
 
47
+ #[napi]
48
+ pub async fn is_too_big(
49
+ &self,
50
+ max_files: i32,
51
+ git_ignored_files: Vec<String>,
52
+ is_git_repo: bool,
53
+ ) -> bool {
54
+ let git_ignored_set =
55
+ HashSet::<String>::from_iter(git_ignored_files.into_iter());
56
+ let mut num_files = 0;
57
+ let mut dirs_to_check = vec![self.absolute_root_directory.clone()];
58
+
59
+ while let Some(dir) = dirs_to_check.pop() {
60
+ info!("dir: {:?}", dir);
61
+ let mut entries = match tokio::fs::read_dir(&dir).await {
62
+ Ok(entries) => entries,
63
+ Err(_) => continue,
64
+ };
65
+ if num_files > max_files {
66
+ return true;
67
+ }
68
+
69
+
70
+ while let Some(entry) = entries.next_entry().await.unwrap_or(None) {
71
+ let path = entry.path();
72
+ info!("entry: {:?}", path);
73
+ let path_str = match path.to_str() {
74
+ Some(path_str) => path_str.to_string(),
75
+ None => continue,
76
+ };
77
+
78
+ if git_ignored_set.contains(&path_str) {
79
+ continue;
80
+ }
81
+
82
+ match entry.file_type().await {
83
+ Ok(file_type) => {
84
+ if file_type.is_dir() {
85
+ dirs_to_check.push(path_str);
86
+ }
87
+
88
+ if file_type.is_file() {
89
+ num_files += 1;
90
+ }
91
+ }
92
+ Err(_) => continue,
93
+ }
94
+
95
+ }
96
+ }
97
+ num_files > max_files
98
+ }
99
+
53
100
  #[napi]
54
- pub async unsafe fn init(&mut self) -> Result<(), napi::Error> {
101
+ pub async unsafe fn init(
102
+ &mut self,
103
+ git_ignored_files: Vec<String>,
104
+ is_git_repo: bool,
105
+ ) -> Result<(), napi::Error> {
55
106
  // 1. compute the merkle tree
56
107
  // 2. update the backend
57
108
  // 3. sync with the remote
109
+ info!("Merkle tree compute started!");
110
+ info!("Root directory: {:?}", self.absolute_root_directory);
58
111
  unsafe {
59
- self.compute_merkle_tree().await?;
112
+ self
113
+ .compute_merkle_tree(git_ignored_files, is_git_repo)
114
+ .await?;
60
115
  }
61
116
 
62
117
  Ok(())
@@ -69,21 +124,25 @@ impl MerkleClient {
69
124
  #[napi]
70
125
  pub async unsafe fn compute_merkle_tree(
71
126
  &mut self,
127
+ git_ignored_files: Vec<String>,
128
+ is_git_repo: bool,
72
129
  ) -> Result<(), napi::Error> {
73
- let t =
74
- MerkleTree::construct_merkle_tree(self.root_directory.clone()).await;
130
+ // make the git ignored files into a hash set
131
+ let mut git_ignored_set = HashSet::from_iter(git_ignored_files.into_iter());
75
132
 
76
- let files = self.tree.get_all_files().await;
77
-
78
- match files {
79
- Ok(files) => {
80
- info!("files: {:?}", files);
81
- }
82
- Err(e) => {
83
- info!("Error in get_all_files: {:?}", e);
84
- }
133
+ // if the hashset itself contains the root directory, then we should remove it.
134
+ // this is because the root directory is not a file, and we don't want to ignore it.
135
+ if git_ignored_set.contains(&self.absolute_root_directory) {
136
+ git_ignored_set.remove(&self.absolute_root_directory);
85
137
  }
86
138
 
139
+ let t = MerkleTree::construct_merkle_tree(
140
+ self.absolute_root_directory.clone(),
141
+ git_ignored_set,
142
+ is_git_repo,
143
+ )
144
+ .await;
145
+
87
146
  match t {
88
147
  Ok(tree) => {
89
148
  self.tree = tree;
@@ -111,19 +170,50 @@ impl MerkleClient {
111
170
  &self,
112
171
  relative_path: String,
113
172
  ) -> Result<String, napi::Error> {
114
- info!("relative_path: {:?}", relative_path);
115
- let absolute_path =
116
- std::path::Path::new(&self.root_directory).join(relative_path);
117
- let canonical_path = absolute_path.canonicalize().unwrap();
173
+ debug!("get_subtree_hash: relative_path: {:?}", relative_path);
174
+
175
+ let relative_path_without_leading_slash = match relative_path
176
+ .strip_prefix('.')
177
+ {
178
+ Some(path) => path.strip_prefix(std::path::MAIN_SEPARATOR).unwrap_or(""),
179
+ None => relative_path.as_str(),
180
+ };
181
+ debug!(
182
+ "relative_path_without_leading_slash: {:?}",
183
+ relative_path_without_leading_slash
184
+ );
185
+
186
+ let absolute_path = if !relative_path_without_leading_slash.is_empty() {
187
+ std::path::Path::new(&self.absolute_root_directory)
188
+ .join(relative_path_without_leading_slash)
189
+ } else {
190
+ std::path::Path::new(&self.absolute_root_directory).to_path_buf()
191
+ };
192
+
193
+ debug!("absolute_path: {:?}", absolute_path);
194
+
195
+ let absolute_path_string = match absolute_path.to_str() {
196
+ Some(path) => path.to_string(),
197
+ None => {
198
+ return Err(napi::Error::new(
199
+ napi::Status::Unknown,
200
+ format!("some string error"),
201
+ ))
202
+ }
203
+ };
118
204
 
119
- info!("canonical_path: {:?}", canonical_path);
120
- let hash = self.tree.get_subtree_hash(canonical_path).await;
205
+ debug!("absolute_path_string: {:?}", absolute_path_string);
206
+
207
+ let hash = self
208
+ .tree
209
+ .get_subtree_hash(absolute_path_string.as_str())
210
+ .await;
121
211
 
122
212
  match hash {
123
213
  Ok(hash) => Ok(hash),
124
214
  Err(e) => Err(napi::Error::new(
125
215
  napi::Status::Unknown,
126
- format!("Error in get_subtree_hash: {:?}", e),
216
+ format!("Error in get_subtree_hash. \nRelative path: {:?}, \nAbsolute path: {:?}, \nRoot directory: {:?}\nError: {:?}", &relative_path, absolute_path, self.absolute_root_directory, e)
127
217
  )),
128
218
  }
129
219
  }
@@ -145,7 +235,7 @@ impl MerkleClient {
145
235
  &self,
146
236
  relative_path: String,
147
237
  ) -> Result<i32, napi::Error> {
148
- let absolute_path = std::path::Path::new(&self.root_directory)
238
+ let absolute_path = std::path::Path::new(&self.absolute_root_directory)
149
239
  .join(relative_path)
150
240
  .canonicalize()?;
151
241
 
@@ -181,10 +271,12 @@ impl MerkleClient {
181
271
  &self,
182
272
  absolute_file_path: String,
183
273
  ) -> Result<Vec<String>, napi::Error> {
184
- let absolute_path_str = absolute_file_path.as_str();
274
+ // let absolute_path = absolute_file_path.to_lowercase();
275
+ // let absolute_path_str = absolute_path.as_str();
276
+
185
277
  let files = self
186
278
  .tree
187
- .get_all_dir_files_to_embed(absolute_path_str)
279
+ .get_all_dir_files_to_embed(absolute_file_path.as_str())
188
280
  .await;
189
281
 
190
282
  match files {
@@ -209,11 +301,7 @@ impl MerkleClient {
209
301
  // TODO(sualeh): we should assert that the path is ascending up to the path.
210
302
 
211
303
  let ret = vec![file];
212
- info!("file: {:?}", ret);
213
-
214
304
  let ret = ret.into_iter().chain(path.into_iter()).collect::<Vec<_>>();
215
- info!("ret to js: {:?}", ret);
216
-
217
305
  Ok(ret)
218
306
  }
219
307
  Err(e) => Err(napi::Error::new(
@@ -229,8 +317,9 @@ impl MerkleClient {
229
317
  &self,
230
318
  absolute_file_path: String,
231
319
  ) -> Result<Vec<String>, napi::Error> {
232
- let absolute_path_str = absolute_file_path.as_str();
233
- let spline = self.tree.get_spline(absolute_path_str).await;
320
+ // let absolute_path = absolute_file_path.to_lowercase();
321
+ // let absolute_path_str = absolute_path.as_str();
322
+ let spline = self.tree.get_spline(absolute_file_path.as_str()).await;
234
323
 
235
324
  match spline {
236
325
  Ok(spline) => Ok(spline),
@@ -259,6 +348,6 @@ impl MerkleClient {
259
348
 
260
349
  #[napi]
261
350
  pub fn update_root_directory(&mut self, root_directory: String) {
262
- self.root_directory = root_directory;
351
+ self.absolute_root_directory = root_directory;
263
352
  }
264
353
  }
package/src/logger.rs ADDED
@@ -0,0 +1,55 @@
1
+ use tracing::{info, subscriber, Level};
2
+ use tracing_appender::non_blocking::WorkerGuard;
3
+ use tracing_appender::rolling::{RollingFileAppender, Rotation};
4
+ use tracing_subscriber::fmt;
5
+ use tracing_subscriber::prelude::*;
6
+
7
+ pub enum GuardType {
8
+ #[cfg(all(not(feature = "debugfile"), not(target_os = "linux")))]
9
+ Guard(tracing_axiom::Guard),
10
+ WorkerGuard(tracing_appender::non_blocking::WorkerGuard),
11
+ }
12
+
13
+ pub fn init_logger() -> Option<GuardType> {
14
+ #[cfg(feature = "debugfile")]
15
+ let _guard = {
16
+ let file_appender =
17
+ RollingFileAppender::new(Rotation::NEVER, "./", "rust_log.txt");
18
+ let (non_blocking, _guard) = tracing_appender::non_blocking(file_appender);
19
+ let subscriber = fmt::Subscriber::builder()
20
+ .with_max_level(Level::TRACE)
21
+ .with_writer(non_blocking)
22
+ .with_ansi(false)
23
+ .with_line_number(true)
24
+ .finish();
25
+
26
+ let _ = tracing::subscriber::set_global_default(subscriber);
27
+
28
+ Some(GuardType::WorkerGuard(_guard))
29
+ };
30
+
31
+ #[cfg(all(not(feature = "debugfile"), not(target_os = "linux")))]
32
+ let _guard = {
33
+ let (axiom_layer, _guard) = tracing_axiom::builder()
34
+ .with_token("xaat-a51088e6-7889-41c0-b440-cfd4601acdd7")
35
+ .with_dataset("local-indexing")
36
+ .layer()
37
+ .ok()?;
38
+ // let fmt_layer = fmt::layer().with_level(true).with_ansi(false).with_line_number(true);
39
+
40
+ let _ = tracing_subscriber::registry()
41
+ .with(axiom_layer)
42
+ .try_init()
43
+ .ok()?;
44
+ // let _ = tracing::subscriber::set_global_default(subscriber);
45
+
46
+ info!("Tracing initialized! in rust");
47
+
48
+ Some(GuardType::Guard(_guard))
49
+ };
50
+
51
+ #[cfg(all(not(feature = "debugfile"), target_os = "linux"))]
52
+ let _guard = { None };
53
+
54
+ _guard
55
+ }
@@ -1,4 +1,3 @@
1
- use crate::git_utils;
2
1
  use crate::merkle_tree::{
3
2
  File, MerkleNode, MerkleNodePtr, NodeType, PinnedFuture,
4
3
  };
@@ -10,11 +9,18 @@ use tonic::async_trait;
10
9
 
11
10
  #[async_trait]
12
11
  impl LocalConstruction for MerkleTree {
12
+ #[tracing::instrument]
13
13
  async fn new(
14
14
  root_directory: Option<String>,
15
15
  ) -> Result<MerkleTree, anyhow::Error> {
16
+ let git_ignored_files = HashSet::<String>::new();
16
17
  if let Some(root_directory) = root_directory {
17
- let n = MerkleTree::construct_merkle_tree(root_directory).await;
18
+ let n = MerkleTree::construct_merkle_tree(
19
+ root_directory,
20
+ git_ignored_files,
21
+ false,
22
+ )
23
+ .await;
18
24
  return n;
19
25
  }
20
26
 
@@ -30,6 +36,8 @@ impl LocalConstruction for MerkleTree {
30
36
  /// 4. return merkle tree
31
37
  async fn construct_merkle_tree(
32
38
  absolute_path_to_root_directory: String,
39
+ git_ignored_files_and_dirs: HashSet<String>,
40
+ is_git_repo: bool,
33
41
  ) -> Result<MerkleTree, anyhow::Error> {
34
42
  let path = PathBuf::from(absolute_path_to_root_directory.clone());
35
43
  if !path.exists() {
@@ -38,29 +46,31 @@ impl LocalConstruction for MerkleTree {
38
46
  }
39
47
 
40
48
  // 1. get all the gitignored files
41
- let git_ignored_files = match git_utils::list_ignored_files(
42
- absolute_path_to_root_directory.as_str(),
43
- true,
44
- ) {
45
- Ok(git_ignored) => git_ignored,
46
- Err(_e) => HashSet::new(),
47
- };
48
-
49
- tracing::info!("git_ignored_files: {:?}", git_ignored_files);
49
+ // let git_ignored_files_and_dirs =
50
+ // match git_utils::list_ignored_files_and_directories(
51
+ // absolute_path_to_root_directory.as_str(),
52
+ // true,
53
+ // ) {
54
+ // Ok(git_ignored) => git_ignored,
55
+ // Err(_e) => HashSet::new(),
56
+ // };
50
57
 
51
58
  let root_node = MerkleNode::new(
52
59
  path,
53
60
  None,
54
- &git_ignored_files,
61
+ &git_ignored_files_and_dirs,
55
62
  absolute_path_to_root_directory.as_str(),
63
+ is_git_repo,
56
64
  )
57
65
  .await;
66
+
58
67
  let mut mt = MerkleTree {
59
68
  root: root_node,
60
69
  files: BTreeMap::new(),
61
70
  root_path: absolute_path_to_root_directory,
62
71
  cursor: None,
63
- git_ignored_files,
72
+ git_ignored_files_and_dirs,
73
+ is_git_repo,
64
74
  };
65
75
 
66
76
  // we now iterate over all the nodes and add them to the hashmap
@@ -81,6 +91,13 @@ impl LocalConstruction for MerkleTree {
81
91
  }
82
92
  NodeType::File(file_name) => {
83
93
  let f = File { node: node.clone() };
94
+
95
+ // i dont reallly like this :(((
96
+ // let canonical_file_name = match dunce::canonicalize(file_name) {
97
+ // Ok(path) => path.to_str().unwrap_or(file_name).to_string(),
98
+ // Err(_) => file_name.clone(),
99
+ // };
100
+
84
101
  files.insert(file_name.clone(), f);
85
102
  }
86
103
  NodeType::ErrorNode(_) => {
@@ -92,6 +109,8 @@ impl LocalConstruction for MerkleTree {
92
109
 
93
110
  add_nodes_to_hashmap(&mt.root, &mut mt.files).await;
94
111
 
112
+ tracing::info!("number of files in the tree: {}", mt.files.len());
113
+
95
114
  Ok(mt)
96
115
  }
97
116
 
@@ -134,6 +153,7 @@ impl LocalConstruction for MerkleTree {
134
153
  Ok(())
135
154
  }
136
155
 
156
+ #[tracing::instrument]
137
157
  async fn delete_file(
138
158
  &mut self,
139
159
  file_path: String,
@@ -1,24 +1,26 @@
1
- use crate::git_utils;
2
-
3
1
  use super::file_utils;
4
2
  use sha2::Digest;
5
3
  use std::collections::{BTreeMap, HashSet};
6
4
  use std::path::PathBuf;
5
+ use std::vec;
7
6
  use std::{fs, path::Path, sync::Arc};
8
7
  use tokio::sync::RwLock;
9
8
  use tonic::async_trait;
10
- use tracing::info;
9
+ use tracing::{debug, info};
10
+
11
11
  pub mod local_construction;
12
12
  pub mod test;
13
13
 
14
14
  pub type MerkleNodePtr = Arc<RwLock<MerkleNode>>;
15
15
 
16
+ #[derive(Debug)]
16
17
  pub struct MerkleTree {
17
18
  root_path: String,
18
19
  root: MerkleNodePtr,
19
20
  files: BTreeMap<String, File>,
20
21
  cursor: Option<usize>,
21
- git_ignored_files: HashSet<String>,
22
+ git_ignored_files_and_dirs: HashSet<String>,
23
+ is_git_repo: bool,
22
24
  }
23
25
 
24
26
  #[derive(Debug)]
@@ -62,6 +64,8 @@ pub trait LocalConstruction {
62
64
 
63
65
  async fn construct_merkle_tree(
64
66
  root_directory: String,
67
+ git_ignored_files_and_dirs: HashSet<String>,
68
+ is_git_repo: bool,
65
69
  ) -> Result<MerkleTree, anyhow::Error>;
66
70
 
67
71
  async fn update_file(
@@ -95,33 +99,34 @@ impl MerkleTree {
95
99
  files: BTreeMap::new(),
96
100
  root_path: "".to_string(),
97
101
  cursor: None,
98
- git_ignored_files: HashSet::new(),
102
+ git_ignored_files_and_dirs: HashSet::new(),
103
+ is_git_repo: false,
99
104
  }
100
105
  }
101
106
 
102
107
  pub async fn get_subtree_hash(
103
108
  &self,
104
- absolute_path: PathBuf,
109
+ absolute_path: &str,
105
110
  ) -> Result<String, anyhow::Error> {
106
- let abs_string = match absolute_path.to_str() {
107
- Some(s) => s.to_string(),
108
- None => {
109
- return Err(anyhow::anyhow!(
110
- "get_subtree_hash: Failed to convert path to string"
111
- ))
112
- }
113
- };
111
+ debug!("get_subtree_hash: absolute_path: {:?}", absolute_path);
114
112
 
115
- let node = match self.files.get(&abs_string) {
113
+ let node = match self.files.get(absolute_path) {
116
114
  Some(file) => file.node.clone(),
117
115
  None => {
118
- return Err(anyhow::anyhow!("Could not find file in tree!"));
116
+ let all_files: Vec<String> = self.files.keys().cloned().collect();
117
+ return Err(anyhow::anyhow!(
118
+ "Could not find file in tree! Looking for: {}. All files: {:?}",
119
+ absolute_path,
120
+ all_files
121
+ ));
119
122
  }
120
123
  };
121
124
 
122
125
  let node_reader = node.read().await;
123
126
  let node_hash = node_reader.hash.clone();
124
127
 
128
+ debug!("node_hash: {:?}", node_hash);
129
+
125
130
  Ok(node_hash)
126
131
  }
127
132
 
@@ -285,6 +290,12 @@ impl MerkleTree {
285
290
  ) -> Result<Vec<String>, anyhow::Error> {
286
291
  let mut files = Vec::new();
287
292
 
293
+ // 1. should check that this absolute path is actually a directory.
294
+ let file_node = self.files.get(absolute_path);
295
+ if file_node.is_none() {
296
+ return Err(anyhow::anyhow!("Could not find directory the in tree!"));
297
+ }
298
+
288
299
  for (file_path, f) in &self.files {
289
300
  if !file_path.contains(absolute_path) {
290
301
  continue;
@@ -311,16 +322,11 @@ impl MerkleTree {
311
322
  &self,
312
323
  absolute_path: &str,
313
324
  ) -> Result<Vec<String>, anyhow::Error> {
314
- info!("get_spline called with absolute_path: {}", absolute_path);
315
325
  let mut files = Vec::new();
316
326
 
317
327
  let current_node = match self.files.get(absolute_path) {
318
- Some(node) => {
319
- info!("Found node for absolute_path: {}", absolute_path);
320
- node.node.clone()
321
- }
328
+ Some(node) => node.node.clone(),
322
329
  None => {
323
- info!("File not found for absolute_path: {}", absolute_path);
324
330
  return Err(anyhow::anyhow!("File not found: {}", absolute_path));
325
331
  }
326
332
  };
@@ -331,7 +337,6 @@ impl MerkleTree {
331
337
  while let Some(node) = stack.pop() {
332
338
  let parent = node.read().await.parent.clone();
333
339
  if let Some(parent) = parent {
334
- info!("Adding parent hash to files vector");
335
340
  {
336
341
  let parent_node = parent.read().await;
337
342
  match &parent_node.node_type {
@@ -350,7 +355,6 @@ impl MerkleTree {
350
355
  stack.push(parent);
351
356
  }
352
357
  }
353
- info!("Returning files vector with {} elements", files.len());
354
358
  Ok(files)
355
359
  }
356
360
 
@@ -397,8 +401,9 @@ impl MerkleTree {
397
401
  let new_node = MerkleNode::new(
398
402
  file_path.clone(),
399
403
  Some(ancestor.clone()),
400
- &self.git_ignored_files,
404
+ &self.git_ignored_files_and_dirs,
401
405
  &absolute_root_path.as_str(),
406
+ self.is_git_repo,
402
407
  )
403
408
  .await;
404
409
  ancestor.write().await.attach_child(new_node.clone()).await;
@@ -414,8 +419,9 @@ impl MerkleTree {
414
419
  let first_child = MerkleNode::new(
415
420
  first_child_path.clone(),
416
421
  Some(ancestor.clone()),
417
- &self.git_ignored_files,
422
+ &self.git_ignored_files_and_dirs,
418
423
  &absolute_root_path.as_str(),
424
+ self.is_git_repo,
419
425
  )
420
426
  .await;
421
427
 
@@ -703,13 +709,14 @@ impl MerkleNode {
703
709
  parent: ParentPtr,
704
710
  ignored_files: &IgnoredFiles,
705
711
  absolute_root_path: &str,
712
+ is_git_repo: bool,
706
713
  ) -> MerkleNodePtr {
707
- // check if the root is a git directory.
708
- let is_git_repo =
709
- match git_utils::is_git_directory(absolute_root_path).await {
710
- Ok(is_git_repo) => is_git_repo,
711
- Err(e) => false,
712
- };
714
+ // // check if the root is a git directory.
715
+ // let is_git_repo =
716
+ // match git_utils::is_git_directory(absolute_root_path).await {
717
+ // Ok(is_git_repo) => is_git_repo,
718
+ // Err(_e) => false,
719
+ // };
713
720
  let bypass_git = !is_git_repo;
714
721
 
715
722
  MerkleNode::construct_node(
@@ -722,25 +729,20 @@ impl MerkleNode {
722
729
  .await
723
730
  }
724
731
 
732
+ // #[tracing::instrument]
725
733
  async fn new(
726
734
  absolute_file_or_directory: PathBuf,
727
735
  parent: ParentPtr,
728
736
  ignored_files: &IgnoredFiles,
729
737
  absolute_root_path: &str,
738
+ is_git_repo: bool,
730
739
  ) -> MerkleNodePtr {
731
- // check if the root is a git directory.
732
- let is_git_repo =
733
- match git_utils::is_git_directory(absolute_root_path).await {
734
- Ok(is_git_repo) => is_git_repo,
735
- Err(_e) => false,
736
- };
737
740
  let bypass_git = !is_git_repo;
738
741
 
739
742
  info!(
740
743
  "constructing node for absolute_file_or_directory: {:?}",
741
744
  absolute_file_or_directory
742
745
  );
743
- info!("bypass_git: {}, is_git_repo: {}", bypass_git, is_git_repo);
744
746
 
745
747
  MerkleNode::construct_node(
746
748
  Path::new(&absolute_file_or_directory),
@@ -769,6 +771,7 @@ impl MerkleNode {
769
771
  Box::pin(async move {
770
772
  // check if it is a file
771
773
  let path_str = absolute_file_or_directory.to_str().unwrap().to_string();
774
+
772
775
  if absolute_file_or_directory.is_file() {
773
776
  return Arc::new(RwLock::new(
774
777
  MerkleNode::construct_file_node_or_error_node(
@@ -790,22 +793,10 @@ impl MerkleNode {
790
793
  )));
791
794
  }
792
795
 
793
- // check if the directory is git ignored
794
- let is_git_ignored =
795
- match git_utils::is_git_ignored(absolute_root_path, path_str.as_str())
796
- .await
797
- {
798
- Ok(is_git_ignored) => is_git_ignored,
799
- Err(e) => {
800
- return Arc::new(RwLock::new(MerkleNode::empty_node(
801
- Some(absolute_file_or_directory),
802
- Some(e.to_string()),
803
- )));
804
- }
805
- };
796
+ let is_git_ignored_dir = ignored_files.contains(&path_str);
806
797
 
807
- if is_git_ignored && !bypass_git {
808
- // println!("skipping directory: {}", path_str);
798
+ if is_git_ignored_dir && !bypass_git {
799
+ tracing::info!("skipping directory: {}", path_str);
809
800
  return Arc::new(RwLock::new(MerkleNode::empty_node(
810
801
  Some(absolute_file_or_directory),
811
802
  Some("Directory is git ignored!".to_string()),
@@ -816,6 +807,7 @@ impl MerkleNode {
816
807
  match entries {
817
808
  Ok(_) => (),
818
809
  Err(e) => {
810
+ tracing::error!("error reading directory: {}", e);
819
811
  return Arc::new(RwLock::new(MerkleNode::empty_node(
820
812
  Some(absolute_file_or_directory),
821
813
  Some(e.to_string()),
@@ -848,6 +840,7 @@ impl MerkleNode {
848
840
  );
849
841
  }
850
842
  Err(e) => {
843
+ tracing::error!("error reading directory: {}", e);
851
844
  children.push(Arc::new(RwLock::new(MerkleNode::empty_node(
852
845
  Some(absolute_file_or_directory),
853
846
  Some(e.to_string()),
@@ -894,18 +887,10 @@ impl MerkleNode {
894
887
  false => {}
895
888
  }
896
889
 
897
- // read the file_content to a buffer
898
- let file_content = match tokio::fs::read(absolute_file_path).await {
899
- Ok(content) => content,
900
- Err(e) => {
901
- return Err(format!("Could not read file! {}", e.to_string()));
902
- }
903
- };
904
-
905
890
  // check if the file passes runtime checks.
906
891
  match file_utils::is_good_file_runtime_check(
907
892
  absolute_file_path,
908
- &file_content,
893
+ // &file_content,
909
894
  )
910
895
  .await
911
896
  {
@@ -915,15 +900,14 @@ impl MerkleNode {
915
900
  }
916
901
  }
917
902
 
918
- let file_content = match std::str::from_utf8(&file_content) {
919
- Ok(content) => content,
920
- Err(e) => {
921
- return Err(format!(
922
- "UTF8 Failure. Could not convert file content to string! {}",
923
- e.to_string()
924
- ));
925
- }
926
- };
903
+ // read the file_content to a buffer
904
+ let file_content =
905
+ match file_utils::read_string_without_bom(absolute_file_path).await {
906
+ Ok(content) => content,
907
+ Err(e) => {
908
+ return Err(format!("Could not read file! {}", e.to_string()));
909
+ }
910
+ };
927
911
 
928
912
  let file_hash = compute_hash(&file_content);
929
913
  let node = MerkleNode {
@@ -950,11 +934,7 @@ impl MerkleNode {
950
934
  .await
951
935
  {
952
936
  Ok(node) => node,
953
- Err(e) => {
954
- // println!("constructing error node. error: {}", e);
955
- // println!("file_path: {:?}", file_path);
956
- MerkleNode::empty_node(Some(absolute_file_path), Some(e))
957
- }
937
+ Err(e) => MerkleNode::empty_node(Some(absolute_file_path), Some(e)),
958
938
  };
959
939
 
960
940
  node
@@ -979,15 +959,50 @@ impl MerkleNode {
979
959
 
980
960
  async fn compute_branch_hash(children: &[MerkleNodePtr]) -> String {
981
961
  let mut hasher = sha2::Sha256::new();
962
+ let mut names_and_hashes = vec![];
963
+ let mut non_zero_children = 0;
964
+
982
965
  for child in children {
983
966
  // check if it is an error node
984
967
  let child_reader = child.read().await;
985
- if let NodeType::ErrorNode(_) = &child_reader.node_type {
968
+
969
+ match &child_reader.node_type {
970
+ NodeType::File(file_name) => {
971
+ non_zero_children += 1;
972
+ names_and_hashes.push((file_name.clone(), child_reader.hash.clone()));
973
+ }
974
+ NodeType::Branch((file_name, _)) => {
975
+ let hash = child_reader.hash.clone();
976
+ if hash == "" {
977
+ continue;
978
+ }
979
+
980
+ non_zero_children += 1;
981
+ names_and_hashes.push((file_name.clone(), hash));
982
+ }
983
+ NodeType::ErrorNode(_) => {
984
+ continue;
985
+ }
986
+ }
987
+ }
988
+
989
+ // sort the list of names and hashes by the hashes!!
990
+ names_and_hashes
991
+ .sort_by(|a, b| a.1.to_lowercase().cmp(&b.1.to_lowercase()));
992
+
993
+ for (name, hash) in names_and_hashes {
994
+ if hash == "" {
986
995
  continue;
987
996
  }
997
+ hasher.update(hash);
998
+ }
988
999
 
989
- hasher.update(child_reader.hash.as_bytes());
1000
+ if non_zero_children == 0 {
1001
+ // this means that the branch is empty.
1002
+ // we should return an empty string.
1003
+ return "".to_string();
990
1004
  }
1005
+
991
1006
  let result = hasher.finalize();
992
1007
  format!("{:x}", result)
993
1008
  }
@@ -43,8 +43,9 @@ mod tests {
43
43
  // let path = Path::new(&temp_dir_path);
44
44
 
45
45
  // Test construct_merkle_tree() function
46
+ let new_set = std::collections::HashSet::<String>::new();
46
47
  let tree =
47
- MerkleTree::construct_merkle_tree(temp_dir_path.clone()).await;
48
+ MerkleTree::construct_merkle_tree(temp_dir_path.clone(), new_set, false).await;
48
49
  let mut tree = match tree {
49
50
  Ok(tree) => {
50
51
  assert_eq!(tree.files.len(), 2);