@anysphere/file-service 0.0.0-eb8b99bf → 0.0.0-ebd74baa

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/Cargo.toml CHANGED
@@ -6,6 +6,11 @@ version = "0.0.0"
6
6
  [lib]
7
7
  crate-type = ["cdylib"]
8
8
 
9
+ [features]
10
+ default = ["windows-subsystem"]
11
+ windows-subsystem = []
12
+ debugfile = []
13
+
9
14
  [dependencies]
10
15
  # Default enable napi4 feature, see https://nodejs.org/api/n-api.html#node-api-version-matrix
11
16
  napi = { version = "2.12.2", default-features = false, features = ["napi4", "async", "tokio_rt"] }
@@ -20,6 +25,12 @@ prost = "0.11.9"
20
25
  tracing = "0.1.37"
21
26
  tracing-subscriber = "0.3.17"
22
27
  tracing-appender = "0.2.2"
28
+ binaryornot = "1.0.0"
29
+ dunce = "1.0.1"
30
+ encoding_rs = "0.8.33"
31
+
32
+ [target.'cfg(not(target_os = "linux"))'.dependencies]
33
+ tracing-axiom = "0.4"
23
34
 
24
35
  [build-dependencies]
25
36
  napi-build = "2.0.1"
@@ -27,5 +38,6 @@ tonic-build = "0.9.2"
27
38
  anyhow = "1.0.75"
28
39
  glob = "0.3.0"
29
40
 
41
+
30
42
  [profile.release]
31
43
  lto = true
package/build.rs CHANGED
@@ -3,6 +3,8 @@ use std::path::Path;
3
3
  extern crate napi_build;
4
4
 
5
5
  fn main() -> Result<(), anyhow::Error> {
6
+ #[cfg(target_os = "windows")]
7
+ println!("cargo:rustc-cdylib-link-arg=/SUBSYSTEM:WINDOWS");
6
8
  napi_build::setup();
7
9
 
8
10
  // print the relative path.
package/index.d.ts CHANGED
@@ -4,9 +4,9 @@
4
4
  /* auto-generated by NAPI-RS */
5
5
 
6
6
  export class MerkleClient {
7
- constructor(rootDirectory: string)
8
- init(): Promise<void>
9
- computeMerkleTree(): Promise<void>
7
+ constructor(absoluteRootDirectory: string)
8
+ init(gitIgnoredFiles: Array<string>, isGitRepo: boolean): Promise<void>
9
+ computeMerkleTree(gitIgnoredFiles: Array<string>, isGitRepo: boolean): Promise<void>
10
10
  updateFile(filePath: string): Promise<void>
11
11
  deleteFile(filePath: string): Promise<void>
12
12
  getSubtreeHash(relativePath: string): Promise<string>
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@anysphere/file-service",
3
- "version": "0.0.0-eb8b99bf",
3
+ "version": "0.0.0-ebd74baa",
4
4
  "main": "index.js",
5
5
  "types": "index.d.ts",
6
6
  "napi": {
@@ -36,12 +36,12 @@
36
36
  "version": "napi version"
37
37
  },
38
38
  "optionalDependencies": {
39
- "@anysphere/file-service-win32-x64-msvc": "0.0.0-eb8b99bf",
40
- "@anysphere/file-service-darwin-x64": "0.0.0-eb8b99bf",
41
- "@anysphere/file-service-linux-x64-gnu": "0.0.0-eb8b99bf",
42
- "@anysphere/file-service-darwin-arm64": "0.0.0-eb8b99bf",
43
- "@anysphere/file-service-win32-arm64-msvc": "0.0.0-eb8b99bf",
44
- "@anysphere/file-service-darwin-universal": "0.0.0-eb8b99bf",
45
- "@anysphere/file-service-linux-arm64-gnu": "0.0.0-eb8b99bf"
39
+ "@anysphere/file-service-win32-x64-msvc": "0.0.0-ebd74baa",
40
+ "@anysphere/file-service-darwin-x64": "0.0.0-ebd74baa",
41
+ "@anysphere/file-service-linux-x64-gnu": "0.0.0-ebd74baa",
42
+ "@anysphere/file-service-darwin-arm64": "0.0.0-ebd74baa",
43
+ "@anysphere/file-service-win32-arm64-msvc": "0.0.0-ebd74baa",
44
+ "@anysphere/file-service-darwin-universal": "0.0.0-ebd74baa",
45
+ "@anysphere/file-service-linux-arm64-gnu": "0.0.0-ebd74baa"
46
46
  }
47
47
  }
package/src/file_utils.rs CHANGED
@@ -5,6 +5,7 @@
5
5
  // 4. vscode.fs.stat
6
6
 
7
7
  use anyhow::Error;
8
+ use encoding_rs::UTF_8;
8
9
  use std::path::Path;
9
10
  use tokio::fs;
10
11
 
@@ -43,8 +44,90 @@ pub fn is_good_file(file_path: &Path) -> Result<(), Error> {
43
44
  _ => {}
44
45
  }
45
46
 
46
- match extension {
47
- "lock" | "bak" | "tmp" | "bin" | "exe" | "dll" | "so" | "lockb" => {
47
+ let bad_extensions = vec![
48
+ "lock",
49
+ "bak",
50
+ "tmp",
51
+ "bin",
52
+ "exe",
53
+ "dll",
54
+ "so",
55
+ "lockb",
56
+ "qwoff",
57
+ "isl",
58
+ "csv",
59
+ "pdf",
60
+ // add ms word, excel, powerpoint, etc.
61
+ "doc",
62
+ "docx",
63
+ "xls",
64
+ "xlsx",
65
+ "ppt",
66
+ "pptx",
67
+ "odt",
68
+ "ods",
69
+ "odp",
70
+ "odg",
71
+ "odf",
72
+ "sxw",
73
+ "sxc",
74
+ "sxi",
75
+ "sxd",
76
+ "sdc",
77
+ // add images
78
+ "jpg",
79
+ "jpeg",
80
+ "png",
81
+ "gif",
82
+ "bmp",
83
+ "tif",
84
+ // add audio
85
+ "mp3",
86
+ "wav",
87
+ "wma",
88
+ "ogg",
89
+ "flac",
90
+ "aac",
91
+ // add video
92
+ "mp4",
93
+ "mov",
94
+ "wmv",
95
+ "flv",
96
+ "avi",
97
+ // add archives
98
+ "zip",
99
+ "tar",
100
+ "gz",
101
+ "7z",
102
+ "rar",
103
+ "tgz",
104
+ "dmg",
105
+ "iso",
106
+ "cue",
107
+ "mdf",
108
+ "mds",
109
+ "vcd",
110
+ "toast",
111
+ "img",
112
+ "apk",
113
+ "msi",
114
+ "cab",
115
+ "tar.gz",
116
+ "tar.xz",
117
+ "tar.bz2",
118
+ "tar.lzma",
119
+ "tar.Z",
120
+ "tar.sz",
121
+ "lzma",
122
+ // add fonts
123
+ "ttf",
124
+ "otf",
125
+ "woff",
126
+ "woff2",
127
+ "eot",
128
+ ];
129
+ match bad_extensions.contains(&extension) {
130
+ true => {
48
131
  return Err(anyhow::anyhow!("File is just a lock file"));
49
132
  }
50
133
  _ => {}
@@ -62,7 +145,7 @@ pub fn is_good_file(file_path: &Path) -> Result<(), Error> {
62
145
  Some(extension) => match extension.to_str() {
63
146
  Some(ext_str) => {
64
147
  if bad_extensions.contains(&ext_str) {
65
- return Err(anyhow::anyhow!("File is not a valid UTF-8 string"));
148
+ return Err(anyhow::anyhow!("Binary file excluded from indexing."));
66
149
  }
67
150
  }
68
151
  None => {
@@ -88,10 +171,12 @@ pub fn is_good_file(file_path: &Path) -> Result<(), Error> {
88
171
  Ok(())
89
172
  }
90
173
 
174
+ // use binaryornot::is_binary;
175
+ // use anyhow::Context;
91
176
  // implement the buffer above:
92
177
  pub async fn is_good_file_runtime_check(
93
178
  file_path: &Path,
94
- buffer: &[u8],
179
+ // _buffer: &[u8],
95
180
  ) -> Result<(), Error> {
96
181
  match get_file_size(file_path).await {
97
182
  Ok(size) if size > 2 * 1024 * 1024 => {
@@ -101,16 +186,31 @@ pub async fn is_good_file_runtime_check(
101
186
  _ => {}
102
187
  }
103
188
 
104
- for &byte in buffer.iter().take(2048) {
105
- if byte.is_ascii() {
106
- continue;
107
- } else {
108
- return Err(anyhow::anyhow!("File is not a valid UTF-8 string"));
109
- }
110
- }
189
+ // if is_binary(file_path).context("Failed to check if file is binary")? {
190
+ // return Err(anyhow::anyhow!("File is binary"));
191
+ // }
192
+
111
193
  Ok(())
112
194
  }
113
195
 
196
+ pub async fn read_string_without_bom(
197
+ file_path: &Path,
198
+ ) -> Result<String, Error> {
199
+ let file_buffer = match fs::read(file_path).await {
200
+ Ok(buffer) => buffer,
201
+ Err(e) => {
202
+ return Err(anyhow::anyhow!(
203
+ "Failed to read file buffer: {}",
204
+ e.to_string()
205
+ ))
206
+ }
207
+ };
208
+
209
+ let (cow, _) = UTF_8.decode_with_bom_removal(&file_buffer);
210
+
211
+ Ok(cow.to_string())
212
+ }
213
+
114
214
  pub fn as_relative_path(
115
215
  base_path: &Path,
116
216
  file_path: &Path,
@@ -168,25 +268,40 @@ mod tests {
168
268
  temp_file.write_all(b"Hello, world!").await.unwrap();
169
269
  let buffer = fs::read(&temp_file_path).await.unwrap();
170
270
  assert_eq!(
171
- is_good_file_runtime_check(&temp_file_path, &buffer)
172
- .await
173
- .is_ok(),
271
+ is_good_file_runtime_check(&temp_file_path).await.is_ok(),
174
272
  true
175
273
  );
176
274
  temp_dir.close().unwrap();
177
275
 
276
+ // let temp_dir = tempfile::tempdir().unwrap();
277
+ // let temp_file_path = temp_dir.path().join("test_file");
278
+ // let mut temp_file = fs::File::create(&temp_file_path).await.unwrap();
279
+ // temp_file.write_all(&[0, 159, 146, 150]).await.unwrap(); // Invalid UTF-8 sequence
280
+ // let buffer = fs::read(&temp_file_path).await.unwrap();
281
+ // assert_eq!(
282
+ // is_good_file_runtime_check(&temp_file_path).await.is_err(),
283
+ // true
284
+ // );
285
+ // temp_dir.close().unwrap();
286
+ }
287
+
288
+ #[tokio::test]
289
+ async fn test_bom_file() {
290
+ const BOM: [u8; 3] = [0xEF, 0xBB, 0xBF];
291
+ const CONTENT: &str = "Hello, world!";
292
+
293
+ // Write this to a temp file
178
294
  let temp_dir = tempfile::tempdir().unwrap();
179
295
  let temp_file_path = temp_dir.path().join("test_file");
180
296
  let mut temp_file = fs::File::create(&temp_file_path).await.unwrap();
181
- temp_file.write_all(&[0, 159, 146, 150]).await.unwrap(); // Invalid UTF-8 sequence
182
- let buffer = fs::read(&temp_file_path).await.unwrap();
183
- assert_eq!(
184
- is_good_file_runtime_check(&temp_file_path, &buffer)
185
- .await
186
- .is_err(),
187
- true
188
- );
189
- temp_dir.close().unwrap();
297
+ temp_file.write_all(&BOM).await.unwrap();
298
+ temp_file.write_all(CONTENT.as_bytes()).await.unwrap();
299
+
300
+ // expect that we read the file with tokio as the CONTENT
301
+ let file_contents = read_string_without_bom(&temp_file_path).await.unwrap();
302
+
303
+ // Check string equality of CONTENT (&str) to file_contents (String)
304
+ assert_eq!(CONTENT, file_contents);
190
305
  }
191
306
 
192
307
  #[test]
package/src/git_utils.rs CHANGED
@@ -1,7 +1,8 @@
1
1
  use std::collections::HashSet;
2
+ use std::path::MAIN_SEPARATOR_STR;
2
3
  use std::process::Command;
3
4
 
4
- pub fn list_ignored_files(
5
+ pub fn list_ignored_files_and_directories(
5
6
  workspace_root_path: &str,
6
7
  should_return_absolute_paths: bool,
7
8
  ) -> Result<HashSet<String>, Box<dyn std::error::Error>> {
@@ -14,12 +15,12 @@ pub fn list_ignored_files(
14
15
  "--others",
15
16
  "--ignored",
16
17
  "--exclude-standard",
18
+ "--directory",
19
+ "--no-empty-directory"
17
20
  ],
18
21
  // FIXME(sualeh): this is super sketchy and might totally break in like a bazillion ways. i dont like it.
19
22
  vec![
20
- "sh",
21
- "-c",
22
- "git submodule foreach --quiet 'git ls-files --others --ignored --exclude-standard | sed \"s|^|$path/|\"'",
23
+ "git submodule foreach --quiet 'git -C $toplevel/$path ls-files --others --ignored --exclude-standard --directory --no-empty-directory | (while read line; do echo $path/$line; done)'",
23
24
  ],
24
25
  ];
25
26
 
@@ -34,9 +35,12 @@ pub fn list_ignored_files(
34
35
  .lines()
35
36
  .filter(|line| !line.is_empty())
36
37
  .map(|line| {
38
+ let line = line.replace("/", MAIN_SEPARATOR_STR);
39
+
37
40
  if should_return_absolute_paths {
38
41
  let mut path = std::path::PathBuf::from(workspace_root_path);
39
- path.push(line);
42
+
43
+ path.push(line.clone());
40
44
 
41
45
  match path.canonicalize() {
42
46
  Ok(canonical_path) => {
@@ -133,7 +137,8 @@ mod tests {
133
137
  fn test_no_ignored_files() {
134
138
  let dir = tempfile::tempdir().unwrap();
135
139
  let gitignored_files =
136
- list_ignored_files(dir.path().to_str().unwrap(), false).unwrap();
140
+ list_ignored_files_and_directories(dir.path().to_str().unwrap(), false)
141
+ .unwrap();
137
142
  Command::new("git")
138
143
  .args(&["init"])
139
144
  .current_dir(dir.path())
@@ -160,7 +165,8 @@ mod tests {
160
165
  .output()
161
166
  .unwrap();
162
167
  let gitignored_files =
163
- list_ignored_files(dir.path().to_str().unwrap(), false).unwrap();
168
+ list_ignored_files_and_directories(dir.path().to_str().unwrap(), false)
169
+ .unwrap();
164
170
  println!(
165
171
  "ignored files for test_one_ignored_file: {:?}",
166
172
  gitignored_files
@@ -190,7 +196,8 @@ mod tests {
190
196
  .output()
191
197
  .unwrap();
192
198
  let gitignored_files =
193
- list_ignored_files(dir.path().to_str().unwrap(), false).unwrap();
199
+ list_ignored_files_and_directories(dir.path().to_str().unwrap(), false)
200
+ .unwrap();
194
201
  println!(
195
202
  "ignored files for test_multiple_ignored_files: {:?}",
196
203
  gitignored_files
@@ -254,7 +261,8 @@ mod tests {
254
261
  println!("git submodule add output: {:?}", o);
255
262
 
256
263
  let gitignored_files =
257
- list_ignored_files(dir.path().to_str().unwrap(), false).unwrap();
264
+ list_ignored_files_and_directories(dir.path().to_str().unwrap(), false)
265
+ .unwrap();
258
266
  println!(
259
267
  "ignored files for test_git_submodule_ignored_files: {:?}",
260
268
  gitignored_files
@@ -265,7 +273,8 @@ mod tests {
265
273
 
266
274
  #[test]
267
275
  fn test_multiple_ignored_files_in_current_dir() {
268
- let gitignored_files = list_ignored_files(".", false).unwrap();
276
+ let gitignored_files =
277
+ list_ignored_files_and_directories(".", false).unwrap();
269
278
  assert!(gitignored_files.len() > 1);
270
279
 
271
280
  // print a sample of the ignored files
package/src/lib.rs CHANGED
@@ -1,15 +1,15 @@
1
+ #![windows_subsystem = "windows"]
1
2
  #![deny(clippy::all)]
2
3
  #![deny(unsafe_op_in_unsafe_fn)]
3
4
  pub mod file_utils;
4
- pub mod git_utils;
5
+ pub mod logger;
5
6
  pub mod merkle_tree;
6
7
 
7
- use std::vec;
8
+ use std::{collections::HashSet, vec};
8
9
 
10
+ use anyhow::Context;
9
11
  use merkle_tree::{LocalConstruction, MerkleTree};
10
- use tracing::{info, Level};
11
- use tracing_appender::rolling::{RollingFileAppender, Rotation};
12
- use tracing_subscriber::fmt;
12
+ use tracing::{debug, info};
13
13
 
14
14
  #[macro_use]
15
15
  extern crate napi_derive;
@@ -17,47 +17,48 @@ extern crate napi_derive;
17
17
  #[napi]
18
18
  pub struct MerkleClient {
19
19
  tree: MerkleTree,
20
- root_directory: String,
21
- _guard: tracing_appender::non_blocking::WorkerGuard,
22
- }
23
-
24
- pub fn init_logger() -> tracing_appender::non_blocking::WorkerGuard {
25
- let file_appender =
26
- RollingFileAppender::new(Rotation::NEVER, "./", "rust_log.txt");
27
- let (non_blocking, _guard) = tracing_appender::non_blocking(file_appender);
28
- let subscriber = fmt::Subscriber::builder()
29
- .with_max_level(Level::TRACE)
30
- .with_writer(non_blocking)
31
- .with_ansi(false)
32
- .with_line_number(true)
33
- .finish();
34
-
35
- let _ = tracing::subscriber::set_global_default(subscriber);
36
-
37
- _guard
20
+ absolute_root_directory: String,
21
+ _guard: Option<logger::GuardType>,
38
22
  }
39
23
 
40
24
  #[napi]
41
25
  impl MerkleClient {
42
26
  #[napi(constructor)]
43
- pub fn new(root_directory: String) -> MerkleClient {
44
- let _guard = init_logger();
27
+ pub fn new(absolute_root_directory: String) -> MerkleClient {
28
+ let _guard = logger::init_logger();
29
+
30
+ // let canonical_root_directory = std::path::Path::new(&absolute_root_directory);
31
+ // use dunce::canonicalize;
32
+ // let canonical_root_directory = match dunce::canonicalize(&canonical_root_directory) {
33
+ // Ok(path) => path.to_str().unwrap_or(&absolute_root_directory).to_string().to_lowercase(),
34
+ // Err(e) => {
35
+ // info!("Error in canonicalizing path: path: {:?}, error {:?}", canonical_root_directory, e);
36
+ // absolute_root_directory
37
+ // }
38
+ // };
45
39
 
46
40
  MerkleClient {
47
41
  tree: MerkleTree::empty_tree(),
48
- root_directory,
42
+ absolute_root_directory,
49
43
  _guard,
50
44
  }
51
45
  }
52
46
 
53
47
  #[napi]
54
- pub async unsafe fn init(&mut self) -> Result<(), napi::Error> {
48
+ pub async unsafe fn init(
49
+ &mut self,
50
+ git_ignored_files: Vec<String>,
51
+ is_git_repo: bool,
52
+ ) -> Result<(), napi::Error> {
55
53
  // 1. compute the merkle tree
56
54
  // 2. update the backend
57
55
  // 3. sync with the remote
58
56
  info!("Merkle tree compute started!");
57
+ info!("Root directory: {:?}", self.absolute_root_directory);
59
58
  unsafe {
60
- self.compute_merkle_tree().await?;
59
+ self
60
+ .compute_merkle_tree(git_ignored_files, is_git_repo)
61
+ .await?;
61
62
  }
62
63
 
63
64
  Ok(())
@@ -70,21 +71,25 @@ impl MerkleClient {
70
71
  #[napi]
71
72
  pub async unsafe fn compute_merkle_tree(
72
73
  &mut self,
74
+ git_ignored_files: Vec<String>,
75
+ is_git_repo: bool,
73
76
  ) -> Result<(), napi::Error> {
74
- let t =
75
- MerkleTree::construct_merkle_tree(self.root_directory.clone()).await;
76
-
77
- let files = self.tree.get_all_files().await;
77
+ // make the git ignored files into a hash set
78
+ let mut git_ignored_set = HashSet::from_iter(git_ignored_files.into_iter());
78
79
 
79
- match files {
80
- Ok(files) => {
81
- info!("files: {:?}", files);
82
- }
83
- Err(e) => {
84
- info!("Error in get_all_files: {:?}", e);
85
- }
80
+ // if the hashset itself contains the root directory, then we should remove it.
81
+ // this is because the root directory is not a file, and we don't want to ignore it.
82
+ if git_ignored_set.contains(&self.absolute_root_directory) {
83
+ git_ignored_set.remove(&self.absolute_root_directory);
86
84
  }
87
85
 
86
+ let t = MerkleTree::construct_merkle_tree(
87
+ self.absolute_root_directory.clone(),
88
+ git_ignored_set,
89
+ is_git_repo,
90
+ )
91
+ .await;
92
+
88
93
  match t {
89
94
  Ok(tree) => {
90
95
  self.tree = tree;
@@ -112,17 +117,50 @@ impl MerkleClient {
112
117
  &self,
113
118
  relative_path: String,
114
119
  ) -> Result<String, napi::Error> {
115
- let absolute_path =
116
- std::path::Path::new(&self.root_directory).join(relative_path);
117
- let canonical_path = absolute_path.canonicalize().unwrap();
120
+ debug!("get_subtree_hash: relative_path: {:?}", relative_path);
121
+
122
+ let relative_path_without_leading_slash = match relative_path
123
+ .strip_prefix('.')
124
+ {
125
+ Some(path) => path.strip_prefix(std::path::MAIN_SEPARATOR).unwrap_or(""),
126
+ None => relative_path.as_str(),
127
+ };
128
+ debug!(
129
+ "relative_path_without_leading_slash: {:?}",
130
+ relative_path_without_leading_slash
131
+ );
132
+
133
+ let absolute_path = if !relative_path_without_leading_slash.is_empty() {
134
+ std::path::Path::new(&self.absolute_root_directory)
135
+ .join(relative_path_without_leading_slash)
136
+ } else {
137
+ std::path::Path::new(&self.absolute_root_directory).to_path_buf()
138
+ };
139
+
140
+ debug!("absolute_path: {:?}", absolute_path);
141
+
142
+ let absolute_path_string = match absolute_path.to_str() {
143
+ Some(path) => path.to_string(),
144
+ None => {
145
+ return Err(napi::Error::new(
146
+ napi::Status::Unknown,
147
+ format!("some string error"),
148
+ ))
149
+ }
150
+ };
118
151
 
119
- let hash = self.tree.get_subtree_hash(canonical_path).await;
152
+ debug!("absolute_path_string: {:?}", absolute_path_string);
153
+
154
+ let hash = self
155
+ .tree
156
+ .get_subtree_hash(absolute_path_string.as_str())
157
+ .await;
120
158
 
121
159
  match hash {
122
160
  Ok(hash) => Ok(hash),
123
161
  Err(e) => Err(napi::Error::new(
124
162
  napi::Status::Unknown,
125
- format!("Error in get_subtree_hash: {:?}", e),
163
+ format!("Error in get_subtree_hash. \nRelative path: {:?}, \nAbsolute path: {:?}, \nRoot directory: {:?}\nError: {:?}", &relative_path, absolute_path, self.absolute_root_directory, e)
126
164
  )),
127
165
  }
128
166
  }
@@ -144,7 +182,7 @@ impl MerkleClient {
144
182
  &self,
145
183
  relative_path: String,
146
184
  ) -> Result<i32, napi::Error> {
147
- let absolute_path = std::path::Path::new(&self.root_directory)
185
+ let absolute_path = std::path::Path::new(&self.absolute_root_directory)
148
186
  .join(relative_path)
149
187
  .canonicalize()?;
150
188
 
@@ -180,10 +218,12 @@ impl MerkleClient {
180
218
  &self,
181
219
  absolute_file_path: String,
182
220
  ) -> Result<Vec<String>, napi::Error> {
183
- let absolute_path_str = absolute_file_path.as_str();
221
+ // let absolute_path = absolute_file_path.to_lowercase();
222
+ // let absolute_path_str = absolute_path.as_str();
223
+
184
224
  let files = self
185
225
  .tree
186
- .get_all_dir_files_to_embed(absolute_path_str)
226
+ .get_all_dir_files_to_embed(absolute_file_path.as_str())
187
227
  .await;
188
228
 
189
229
  match files {
@@ -224,8 +264,9 @@ impl MerkleClient {
224
264
  &self,
225
265
  absolute_file_path: String,
226
266
  ) -> Result<Vec<String>, napi::Error> {
227
- let absolute_path_str = absolute_file_path.as_str();
228
- let spline = self.tree.get_spline(absolute_path_str).await;
267
+ // let absolute_path = absolute_file_path.to_lowercase();
268
+ // let absolute_path_str = absolute_path.as_str();
269
+ let spline = self.tree.get_spline(absolute_file_path.as_str()).await;
229
270
 
230
271
  match spline {
231
272
  Ok(spline) => Ok(spline),
@@ -254,6 +295,6 @@ impl MerkleClient {
254
295
 
255
296
  #[napi]
256
297
  pub fn update_root_directory(&mut self, root_directory: String) {
257
- self.root_directory = root_directory;
298
+ self.absolute_root_directory = root_directory;
258
299
  }
259
300
  }
package/src/logger.rs ADDED
@@ -0,0 +1,55 @@
1
+ use tracing::{info, subscriber, Level};
2
+ use tracing_appender::non_blocking::WorkerGuard;
3
+ use tracing_appender::rolling::{RollingFileAppender, Rotation};
4
+ use tracing_subscriber::fmt;
5
+ use tracing_subscriber::prelude::*;
6
+
7
+ pub enum GuardType {
8
+ #[cfg(all(not(feature = "debugfile"), not(target_os = "linux")))]
9
+ Guard(tracing_axiom::Guard),
10
+ WorkerGuard(tracing_appender::non_blocking::WorkerGuard),
11
+ }
12
+
13
+ pub fn init_logger() -> Option<GuardType> {
14
+ #[cfg(feature = "debugfile")]
15
+ let _guard = {
16
+ let file_appender =
17
+ RollingFileAppender::new(Rotation::NEVER, "./", "rust_log.txt");
18
+ let (non_blocking, _guard) = tracing_appender::non_blocking(file_appender);
19
+ let subscriber = fmt::Subscriber::builder()
20
+ .with_max_level(Level::TRACE)
21
+ .with_writer(non_blocking)
22
+ .with_ansi(false)
23
+ .with_line_number(true)
24
+ .finish();
25
+
26
+ let _ = tracing::subscriber::set_global_default(subscriber);
27
+
28
+ Some(GuardType::WorkerGuard(_guard))
29
+ };
30
+
31
+ #[cfg(all(not(feature = "debugfile"), not(target_os = "linux")))]
32
+ let _guard = {
33
+ let (axiom_layer, _guard) = tracing_axiom::builder()
34
+ .with_token("xaat-a51088e6-7889-41c0-b440-cfd4601acdd7")
35
+ .with_dataset("local-indexing")
36
+ .layer()
37
+ .ok()?;
38
+ // let fmt_layer = fmt::layer().with_level(true).with_ansi(false).with_line_number(true);
39
+
40
+ let _ = tracing_subscriber::registry()
41
+ .with(axiom_layer)
42
+ .try_init()
43
+ .ok()?;
44
+ // let _ = tracing::subscriber::set_global_default(subscriber);
45
+
46
+ info!("Tracing initialized! in rust");
47
+
48
+ Some(GuardType::Guard(_guard))
49
+ };
50
+
51
+ #[cfg(all(not(feature = "debugfile"), target_os = "linux"))]
52
+ let _guard = { None };
53
+
54
+ _guard
55
+ }
@@ -1,4 +1,3 @@
1
- use crate::git_utils;
2
1
  use crate::merkle_tree::{
3
2
  File, MerkleNode, MerkleNodePtr, NodeType, PinnedFuture,
4
3
  };
@@ -10,11 +9,18 @@ use tonic::async_trait;
10
9
 
11
10
  #[async_trait]
12
11
  impl LocalConstruction for MerkleTree {
12
+ #[tracing::instrument]
13
13
  async fn new(
14
14
  root_directory: Option<String>,
15
15
  ) -> Result<MerkleTree, anyhow::Error> {
16
+ let git_ignored_files = HashSet::<String>::new();
16
17
  if let Some(root_directory) = root_directory {
17
- let n = MerkleTree::construct_merkle_tree(root_directory).await;
18
+ let n = MerkleTree::construct_merkle_tree(
19
+ root_directory,
20
+ git_ignored_files,
21
+ false,
22
+ )
23
+ .await;
18
24
  return n;
19
25
  }
20
26
 
@@ -30,6 +36,8 @@ impl LocalConstruction for MerkleTree {
30
36
  /// 4. return merkle tree
31
37
  async fn construct_merkle_tree(
32
38
  absolute_path_to_root_directory: String,
39
+ git_ignored_files_and_dirs: HashSet<String>,
40
+ is_git_repo: bool,
33
41
  ) -> Result<MerkleTree, anyhow::Error> {
34
42
  let path = PathBuf::from(absolute_path_to_root_directory.clone());
35
43
  if !path.exists() {
@@ -38,27 +46,31 @@ impl LocalConstruction for MerkleTree {
38
46
  }
39
47
 
40
48
  // 1. get all the gitignored files
41
- let git_ignored_files = match git_utils::list_ignored_files(
42
- absolute_path_to_root_directory.as_str(),
43
- true,
44
- ) {
45
- Ok(git_ignored) => git_ignored,
46
- Err(_e) => HashSet::new(),
47
- };
49
+ // let git_ignored_files_and_dirs =
50
+ // match git_utils::list_ignored_files_and_directories(
51
+ // absolute_path_to_root_directory.as_str(),
52
+ // true,
53
+ // ) {
54
+ // Ok(git_ignored) => git_ignored,
55
+ // Err(_e) => HashSet::new(),
56
+ // };
48
57
 
49
58
  let root_node = MerkleNode::new(
50
59
  path,
51
60
  None,
52
- &git_ignored_files,
61
+ &git_ignored_files_and_dirs,
53
62
  absolute_path_to_root_directory.as_str(),
63
+ is_git_repo,
54
64
  )
55
65
  .await;
66
+
56
67
  let mut mt = MerkleTree {
57
68
  root: root_node,
58
69
  files: BTreeMap::new(),
59
70
  root_path: absolute_path_to_root_directory,
60
71
  cursor: None,
61
- git_ignored_files,
72
+ git_ignored_files_and_dirs,
73
+ is_git_repo,
62
74
  };
63
75
 
64
76
  // we now iterate over all the nodes and add them to the hashmap
@@ -71,7 +83,6 @@ impl LocalConstruction for MerkleTree {
71
83
  let node_reader = node.read().await;
72
84
  match &node_reader.node_type {
73
85
  NodeType::Branch(n) => {
74
- tracing::info!("Branch: {:?}", n.0);
75
86
  let children = &n.1;
76
87
  files.insert(n.0.clone(), File { node: node.clone() });
77
88
  for child in children {
@@ -80,7 +91,13 @@ impl LocalConstruction for MerkleTree {
80
91
  }
81
92
  NodeType::File(file_name) => {
82
93
  let f = File { node: node.clone() };
83
- tracing::info!("File: {:?}", file_name);
94
+
95
+ // i dont reallly like this :(((
96
+ // let canonical_file_name = match dunce::canonicalize(file_name) {
97
+ // Ok(path) => path.to_str().unwrap_or(file_name).to_string(),
98
+ // Err(_) => file_name.clone(),
99
+ // };
100
+
84
101
  files.insert(file_name.clone(), f);
85
102
  }
86
103
  NodeType::ErrorNode(_) => {
@@ -92,8 +109,7 @@ impl LocalConstruction for MerkleTree {
92
109
 
93
110
  add_nodes_to_hashmap(&mt.root, &mut mt.files).await;
94
111
 
95
- tracing::info!("Merkle tree compute finished!");
96
- // tracing::info!("Merkle tree: {:?}", mt.files);
112
+ tracing::info!("number of files in the tree: {}", mt.files.len());
97
113
 
98
114
  Ok(mt)
99
115
  }
@@ -137,6 +153,7 @@ impl LocalConstruction for MerkleTree {
137
153
  Ok(())
138
154
  }
139
155
 
156
+ #[tracing::instrument]
140
157
  async fn delete_file(
141
158
  &mut self,
142
159
  file_path: String,
@@ -1,24 +1,26 @@
1
- use crate::git_utils;
2
-
3
1
  use super::file_utils;
4
2
  use sha2::Digest;
5
3
  use std::collections::{BTreeMap, HashSet};
6
4
  use std::path::PathBuf;
5
+ use std::vec;
7
6
  use std::{fs, path::Path, sync::Arc};
8
7
  use tokio::sync::RwLock;
9
8
  use tonic::async_trait;
10
- use tracing::info;
9
+ use tracing::{debug, info};
10
+
11
11
  pub mod local_construction;
12
12
  pub mod test;
13
13
 
14
14
  pub type MerkleNodePtr = Arc<RwLock<MerkleNode>>;
15
15
 
16
+ #[derive(Debug)]
16
17
  pub struct MerkleTree {
17
18
  root_path: String,
18
19
  root: MerkleNodePtr,
19
20
  files: BTreeMap<String, File>,
20
21
  cursor: Option<usize>,
21
- git_ignored_files: HashSet<String>,
22
+ git_ignored_files_and_dirs: HashSet<String>,
23
+ is_git_repo: bool,
22
24
  }
23
25
 
24
26
  #[derive(Debug)]
@@ -62,6 +64,8 @@ pub trait LocalConstruction {
62
64
 
63
65
  async fn construct_merkle_tree(
64
66
  root_directory: String,
67
+ git_ignored_files_and_dirs: HashSet<String>,
68
+ is_git_repo: bool,
65
69
  ) -> Result<MerkleTree, anyhow::Error>;
66
70
 
67
71
  async fn update_file(
@@ -95,34 +99,34 @@ impl MerkleTree {
95
99
  files: BTreeMap::new(),
96
100
  root_path: "".to_string(),
97
101
  cursor: None,
98
- git_ignored_files: HashSet::new(),
102
+ git_ignored_files_and_dirs: HashSet::new(),
103
+ is_git_repo: false,
99
104
  }
100
105
  }
101
106
 
102
107
  pub async fn get_subtree_hash(
103
108
  &self,
104
- absolute_path: PathBuf,
109
+ absolute_path: &str,
105
110
  ) -> Result<String, anyhow::Error> {
106
- let abs_string = match absolute_path.to_str() {
107
- Some(s) => s.to_string(),
108
- None => {
109
- return Err(anyhow::anyhow!(
110
- "get_subtree_hash: Failed to convert path to string"
111
- ))
112
- }
113
- };
111
+ debug!("get_subtree_hash: absolute_path: {:?}", absolute_path);
114
112
 
115
- let node = match self.files.get(&abs_string) {
113
+ let node = match self.files.get(absolute_path) {
116
114
  Some(file) => file.node.clone(),
117
115
  None => {
118
116
  let all_files: Vec<String> = self.files.keys().cloned().collect();
119
- return Err(anyhow::anyhow!("Could not find file in tree! Looking for: {}. All files: {:?}", abs_string, all_files));
117
+ return Err(anyhow::anyhow!(
118
+ "Could not find file in tree! Looking for: {}. All files: {:?}",
119
+ absolute_path,
120
+ all_files
121
+ ));
120
122
  }
121
123
  };
122
124
 
123
125
  let node_reader = node.read().await;
124
126
  let node_hash = node_reader.hash.clone();
125
127
 
128
+ debug!("node_hash: {:?}", node_hash);
129
+
126
130
  Ok(node_hash)
127
131
  }
128
132
 
@@ -286,6 +290,12 @@ impl MerkleTree {
286
290
  ) -> Result<Vec<String>, anyhow::Error> {
287
291
  let mut files = Vec::new();
288
292
 
293
+ // 1. should check that this absolute path is actually a directory.
294
+ let file_node = self.files.get(absolute_path);
295
+ if file_node.is_none() {
296
+ return Err(anyhow::anyhow!("Could not find directory the in tree!"));
297
+ }
298
+
289
299
  for (file_path, f) in &self.files {
290
300
  if !file_path.contains(absolute_path) {
291
301
  continue;
@@ -312,16 +322,11 @@ impl MerkleTree {
312
322
  &self,
313
323
  absolute_path: &str,
314
324
  ) -> Result<Vec<String>, anyhow::Error> {
315
- info!("get_spline called with absolute_path: {}", absolute_path);
316
325
  let mut files = Vec::new();
317
326
 
318
327
  let current_node = match self.files.get(absolute_path) {
319
- Some(node) => {
320
- info!("Found node for absolute_path: {}", absolute_path);
321
- node.node.clone()
322
- }
328
+ Some(node) => node.node.clone(),
323
329
  None => {
324
- info!("File not found for absolute_path: {}", absolute_path);
325
330
  return Err(anyhow::anyhow!("File not found: {}", absolute_path));
326
331
  }
327
332
  };
@@ -332,7 +337,6 @@ impl MerkleTree {
332
337
  while let Some(node) = stack.pop() {
333
338
  let parent = node.read().await.parent.clone();
334
339
  if let Some(parent) = parent {
335
- info!("Adding parent hash to files vector");
336
340
  {
337
341
  let parent_node = parent.read().await;
338
342
  match &parent_node.node_type {
@@ -351,7 +355,6 @@ impl MerkleTree {
351
355
  stack.push(parent);
352
356
  }
353
357
  }
354
- info!("Returning files vector with {} elements", files.len());
355
358
  Ok(files)
356
359
  }
357
360
 
@@ -398,8 +401,9 @@ impl MerkleTree {
398
401
  let new_node = MerkleNode::new(
399
402
  file_path.clone(),
400
403
  Some(ancestor.clone()),
401
- &self.git_ignored_files,
404
+ &self.git_ignored_files_and_dirs,
402
405
  &absolute_root_path.as_str(),
406
+ self.is_git_repo,
403
407
  )
404
408
  .await;
405
409
  ancestor.write().await.attach_child(new_node.clone()).await;
@@ -415,8 +419,9 @@ impl MerkleTree {
415
419
  let first_child = MerkleNode::new(
416
420
  first_child_path.clone(),
417
421
  Some(ancestor.clone()),
418
- &self.git_ignored_files,
422
+ &self.git_ignored_files_and_dirs,
419
423
  &absolute_root_path.as_str(),
424
+ self.is_git_repo,
420
425
  )
421
426
  .await;
422
427
 
@@ -704,13 +709,14 @@ impl MerkleNode {
704
709
  parent: ParentPtr,
705
710
  ignored_files: &IgnoredFiles,
706
711
  absolute_root_path: &str,
712
+ is_git_repo: bool,
707
713
  ) -> MerkleNodePtr {
708
- // check if the root is a git directory.
709
- let is_git_repo =
710
- match git_utils::is_git_directory(absolute_root_path).await {
711
- Ok(is_git_repo) => is_git_repo,
712
- Err(e) => false,
713
- };
714
+ // // check if the root is a git directory.
715
+ // let is_git_repo =
716
+ // match git_utils::is_git_directory(absolute_root_path).await {
717
+ // Ok(is_git_repo) => is_git_repo,
718
+ // Err(_e) => false,
719
+ // };
714
720
  let bypass_git = !is_git_repo;
715
721
 
716
722
  MerkleNode::construct_node(
@@ -723,25 +729,20 @@ impl MerkleNode {
723
729
  .await
724
730
  }
725
731
 
732
+ // #[tracing::instrument]
726
733
  async fn new(
727
734
  absolute_file_or_directory: PathBuf,
728
735
  parent: ParentPtr,
729
736
  ignored_files: &IgnoredFiles,
730
737
  absolute_root_path: &str,
738
+ is_git_repo: bool,
731
739
  ) -> MerkleNodePtr {
732
- // check if the root is a git directory.
733
- let is_git_repo =
734
- match git_utils::is_git_directory(absolute_root_path).await {
735
- Ok(is_git_repo) => is_git_repo,
736
- Err(_e) => false,
737
- };
738
740
  let bypass_git = !is_git_repo;
739
741
 
740
742
  info!(
741
743
  "constructing node for absolute_file_or_directory: {:?}",
742
744
  absolute_file_or_directory
743
745
  );
744
- info!("bypass_git: {}, is_git_repo: {}", bypass_git, is_git_repo);
745
746
 
746
747
  MerkleNode::construct_node(
747
748
  Path::new(&absolute_file_or_directory),
@@ -770,6 +771,7 @@ impl MerkleNode {
770
771
  Box::pin(async move {
771
772
  // check if it is a file
772
773
  let path_str = absolute_file_or_directory.to_str().unwrap().to_string();
774
+
773
775
  if absolute_file_or_directory.is_file() {
774
776
  return Arc::new(RwLock::new(
775
777
  MerkleNode::construct_file_node_or_error_node(
@@ -791,26 +793,10 @@ impl MerkleNode {
791
793
  )));
792
794
  }
793
795
 
794
- // check if the directory is git ignored
795
- let is_git_ignored =
796
- match git_utils::is_git_ignored(absolute_root_path, path_str.as_str())
797
- .await
798
- {
799
- Ok(is_git_ignored) => is_git_ignored,
800
- Err(e) => {
801
- return Arc::new(RwLock::new(MerkleNode::empty_node(
802
- Some(absolute_file_or_directory),
803
- Some(e.to_string()),
804
- )));
805
- }
806
- };
796
+ let is_git_ignored_dir = ignored_files.contains(&path_str);
807
797
 
808
- if is_git_ignored && !bypass_git {
809
- // println!("skipping directory: {}", path_str);
810
- tracing::info!(
811
- "skipping directory because its git ignored: {}",
812
- path_str
813
- );
798
+ if is_git_ignored_dir && !bypass_git {
799
+ tracing::info!("skipping directory: {}", path_str);
814
800
  return Arc::new(RwLock::new(MerkleNode::empty_node(
815
801
  Some(absolute_file_or_directory),
816
802
  Some("Directory is git ignored!".to_string()),
@@ -821,6 +807,7 @@ impl MerkleNode {
821
807
  match entries {
822
808
  Ok(_) => (),
823
809
  Err(e) => {
810
+ tracing::error!("error reading directory: {}", e);
824
811
  return Arc::new(RwLock::new(MerkleNode::empty_node(
825
812
  Some(absolute_file_or_directory),
826
813
  Some(e.to_string()),
@@ -853,6 +840,7 @@ impl MerkleNode {
853
840
  );
854
841
  }
855
842
  Err(e) => {
843
+ tracing::error!("error reading directory: {}", e);
856
844
  children.push(Arc::new(RwLock::new(MerkleNode::empty_node(
857
845
  Some(absolute_file_or_directory),
858
846
  Some(e.to_string()),
@@ -899,18 +887,10 @@ impl MerkleNode {
899
887
  false => {}
900
888
  }
901
889
 
902
- // read the file_content to a buffer
903
- let file_content = match tokio::fs::read(absolute_file_path).await {
904
- Ok(content) => content,
905
- Err(e) => {
906
- return Err(format!("Could not read file! {}", e.to_string()));
907
- }
908
- };
909
-
910
890
  // check if the file passes runtime checks.
911
891
  match file_utils::is_good_file_runtime_check(
912
892
  absolute_file_path,
913
- &file_content,
893
+ // &file_content,
914
894
  )
915
895
  .await
916
896
  {
@@ -920,15 +900,14 @@ impl MerkleNode {
920
900
  }
921
901
  }
922
902
 
923
- let file_content = match std::str::from_utf8(&file_content) {
924
- Ok(content) => content,
925
- Err(e) => {
926
- return Err(format!(
927
- "UTF8 Failure. Could not convert file content to string! {}",
928
- e.to_string()
929
- ));
930
- }
931
- };
903
+ // read the file_content to a buffer
904
+ let file_content =
905
+ match file_utils::read_string_without_bom(absolute_file_path).await {
906
+ Ok(content) => content,
907
+ Err(e) => {
908
+ return Err(format!("Could not read file! {}", e.to_string()));
909
+ }
910
+ };
932
911
 
933
912
  let file_hash = compute_hash(&file_content);
934
913
  let node = MerkleNode {
@@ -955,11 +934,7 @@ impl MerkleNode {
955
934
  .await
956
935
  {
957
936
  Ok(node) => node,
958
- Err(e) => {
959
- // println!("constructing error node. error: {}", e);
960
- // println!("file_path: {:?}", file_path);
961
- MerkleNode::empty_node(Some(absolute_file_path), Some(e))
962
- }
937
+ Err(e) => MerkleNode::empty_node(Some(absolute_file_path), Some(e)),
963
938
  };
964
939
 
965
940
  node
@@ -984,15 +959,50 @@ impl MerkleNode {
984
959
 
985
960
  async fn compute_branch_hash(children: &[MerkleNodePtr]) -> String {
986
961
  let mut hasher = sha2::Sha256::new();
962
+ let mut names_and_hashes = vec![];
963
+ let mut non_zero_children = 0;
964
+
987
965
  for child in children {
988
966
  // check if it is an error node
989
967
  let child_reader = child.read().await;
990
- if let NodeType::ErrorNode(_) = &child_reader.node_type {
968
+
969
+ match &child_reader.node_type {
970
+ NodeType::File(file_name) => {
971
+ non_zero_children += 1;
972
+ names_and_hashes.push((file_name.clone(), child_reader.hash.clone()));
973
+ }
974
+ NodeType::Branch((file_name, _)) => {
975
+ let hash = child_reader.hash.clone();
976
+ if hash == "" {
977
+ continue;
978
+ }
979
+
980
+ non_zero_children += 1;
981
+ names_and_hashes.push((file_name.clone(), hash));
982
+ }
983
+ NodeType::ErrorNode(_) => {
984
+ continue;
985
+ }
986
+ }
987
+ }
988
+
989
+ // sort the list of names and hashes by the hashes!!
990
+ names_and_hashes
991
+ .sort_by(|a, b| a.1.to_lowercase().cmp(&b.1.to_lowercase()));
992
+
993
+ for (name, hash) in names_and_hashes {
994
+ if hash == "" {
991
995
  continue;
992
996
  }
997
+ hasher.update(hash);
998
+ }
993
999
 
994
- hasher.update(child_reader.hash.as_bytes());
1000
+ if non_zero_children == 0 {
1001
+ // this means that the branch is empty.
1002
+ // we should return an empty string.
1003
+ return "".to_string();
995
1004
  }
1005
+
996
1006
  let result = hasher.finalize();
997
1007
  format!("{:x}", result)
998
1008
  }
@@ -43,8 +43,9 @@ mod tests {
43
43
  // let path = Path::new(&temp_dir_path);
44
44
 
45
45
  // Test construct_merkle_tree() function
46
+ let new_set = std::collections::HashSet::<String>::new();
46
47
  let tree =
47
- MerkleTree::construct_merkle_tree(temp_dir_path.clone()).await;
48
+ MerkleTree::construct_merkle_tree(temp_dir_path.clone(), new_set, false).await;
48
49
  let mut tree = match tree {
49
50
  Ok(tree) => {
50
51
  assert_eq!(tree.files.len(), 2);