@anysphere/file-service 0.0.0-e0c70bcd → 0.0.0-e36a46ab

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/Cargo.toml CHANGED
@@ -6,6 +6,10 @@ version = "0.0.0"
6
6
  [lib]
7
7
  crate-type = ["cdylib"]
8
8
 
9
+ [features]
10
+ default = ["windows-subsystem"]
11
+ windows-subsystem = []
12
+
9
13
  [dependencies]
10
14
  # Default enable napi4 feature, see https://nodejs.org/api/n-api.html#node-api-version-matrix
11
15
  napi = { version = "2.12.2", default-features = false, features = ["napi4", "async", "tokio_rt"] }
@@ -21,6 +25,8 @@ tracing = "0.1.37"
21
25
  tracing-subscriber = "0.3.17"
22
26
  tracing-appender = "0.2.2"
23
27
  binaryornot = "1.0.0"
28
+ dunce = "1.0.1"
29
+ encoding_rs = "0.8.33"
24
30
 
25
31
  [build-dependencies]
26
32
  napi-build = "2.0.1"
@@ -28,5 +34,6 @@ tonic-build = "0.9.2"
28
34
  anyhow = "1.0.75"
29
35
  glob = "0.3.0"
30
36
 
37
+
31
38
  [profile.release]
32
39
  lto = true
package/build.rs CHANGED
@@ -3,6 +3,8 @@ use std::path::Path;
3
3
  extern crate napi_build;
4
4
 
5
5
  fn main() -> Result<(), anyhow::Error> {
6
+ #[cfg(target_os = "windows")]
7
+ println!("cargo:rustc-cdylib-link-arg=/SUBSYSTEM:WINDOWS");
6
8
  napi_build::setup();
7
9
 
8
10
  // print the relative path.
package/index.d.ts CHANGED
@@ -4,9 +4,9 @@
4
4
  /* auto-generated by NAPI-RS */
5
5
 
6
6
  export class MerkleClient {
7
- constructor(rootDirectory: string)
8
- init(): Promise<void>
9
- computeMerkleTree(): Promise<void>
7
+ constructor(absoluteRootDirectory: string)
8
+ init(gitIgnoredFiles: Array<string>, isGitRepo: boolean): Promise<void>
9
+ computeMerkleTree(gitIgnoredFiles: Array<string>, isGitRepo: boolean): Promise<void>
10
10
  updateFile(filePath: string): Promise<void>
11
11
  deleteFile(filePath: string): Promise<void>
12
12
  getSubtreeHash(relativePath: string): Promise<string>
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@anysphere/file-service",
3
- "version": "0.0.0-e0c70bcd",
3
+ "version": "0.0.0-e36a46ab",
4
4
  "main": "index.js",
5
5
  "types": "index.d.ts",
6
6
  "napi": {
@@ -36,12 +36,12 @@
36
36
  "version": "napi version"
37
37
  },
38
38
  "optionalDependencies": {
39
- "@anysphere/file-service-win32-x64-msvc": "0.0.0-e0c70bcd",
40
- "@anysphere/file-service-darwin-x64": "0.0.0-e0c70bcd",
41
- "@anysphere/file-service-linux-x64-gnu": "0.0.0-e0c70bcd",
42
- "@anysphere/file-service-darwin-arm64": "0.0.0-e0c70bcd",
43
- "@anysphere/file-service-win32-arm64-msvc": "0.0.0-e0c70bcd",
44
- "@anysphere/file-service-darwin-universal": "0.0.0-e0c70bcd",
45
- "@anysphere/file-service-linux-arm64-gnu": "0.0.0-e0c70bcd"
39
+ "@anysphere/file-service-win32-x64-msvc": "0.0.0-e36a46ab",
40
+ "@anysphere/file-service-darwin-x64": "0.0.0-e36a46ab",
41
+ "@anysphere/file-service-linux-x64-gnu": "0.0.0-e36a46ab",
42
+ "@anysphere/file-service-darwin-arm64": "0.0.0-e36a46ab",
43
+ "@anysphere/file-service-win32-arm64-msvc": "0.0.0-e36a46ab",
44
+ "@anysphere/file-service-darwin-universal": "0.0.0-e36a46ab",
45
+ "@anysphere/file-service-linux-arm64-gnu": "0.0.0-e36a46ab"
46
46
  }
47
47
  }
package/src/file_utils.rs CHANGED
@@ -5,7 +5,8 @@
5
5
  // 4. vscode.fs.stat
6
6
 
7
7
  use anyhow::Error;
8
- use std::{path::Path, any};
8
+ use encoding_rs::UTF_8;
9
+ use std::path::Path;
9
10
  use tokio::fs;
10
11
 
11
12
  pub fn is_in_bad_dir(file_path: &Path) -> Result<bool, Error> {
@@ -43,8 +44,90 @@ pub fn is_good_file(file_path: &Path) -> Result<(), Error> {
43
44
  _ => {}
44
45
  }
45
46
 
46
- match extension {
47
- "lock" | "bak" | "tmp" | "bin" | "exe" | "dll" | "so" | "lockb" => {
47
+ let bad_extensions = vec![
48
+ "lock",
49
+ "bak",
50
+ "tmp",
51
+ "bin",
52
+ "exe",
53
+ "dll",
54
+ "so",
55
+ "lockb",
56
+ "qwoff",
57
+ "isl",
58
+ "csv",
59
+ "pdf",
60
+ // add ms word, excel, powerpoint, etc.
61
+ "doc",
62
+ "docx",
63
+ "xls",
64
+ "xlsx",
65
+ "ppt",
66
+ "pptx",
67
+ "odt",
68
+ "ods",
69
+ "odp",
70
+ "odg",
71
+ "odf",
72
+ "sxw",
73
+ "sxc",
74
+ "sxi",
75
+ "sxd",
76
+ "sdc",
77
+ // add images
78
+ "jpg",
79
+ "jpeg",
80
+ "png",
81
+ "gif",
82
+ "bmp",
83
+ "tif",
84
+ // add audio
85
+ "mp3",
86
+ "wav",
87
+ "wma",
88
+ "ogg",
89
+ "flac",
90
+ "aac",
91
+ // add video
92
+ "mp4",
93
+ "mov",
94
+ "wmv",
95
+ "flv",
96
+ "avi",
97
+ // add archives
98
+ "zip",
99
+ "tar",
100
+ "gz",
101
+ "7z",
102
+ "rar",
103
+ "tgz",
104
+ "dmg",
105
+ "iso",
106
+ "cue",
107
+ "mdf",
108
+ "mds",
109
+ "vcd",
110
+ "toast",
111
+ "img",
112
+ "apk",
113
+ "msi",
114
+ "cab",
115
+ "tar.gz",
116
+ "tar.xz",
117
+ "tar.bz2",
118
+ "tar.lzma",
119
+ "tar.Z",
120
+ "tar.sz",
121
+ "lzma",
122
+ // add fonts
123
+ "ttf",
124
+ "otf",
125
+ "woff",
126
+ "woff2",
127
+ "eot",
128
+ ];
129
+ match bad_extensions.contains(&extension) {
130
+ true => {
48
131
  return Err(anyhow::anyhow!("File is just a lock file"));
49
132
  }
50
133
  _ => {}
@@ -93,7 +176,7 @@ pub fn is_good_file(file_path: &Path) -> Result<(), Error> {
93
176
  // implement the buffer above:
94
177
  pub async fn is_good_file_runtime_check(
95
178
  file_path: &Path,
96
- _buffer: &[u8],
179
+ // _buffer: &[u8],
97
180
  ) -> Result<(), Error> {
98
181
  match get_file_size(file_path).await {
99
182
  Ok(size) if size > 2 * 1024 * 1024 => {
@@ -103,13 +186,31 @@ pub async fn is_good_file_runtime_check(
103
186
  _ => {}
104
187
  }
105
188
 
106
- // if is_binary(file_path).context("Failed to check if file is binary")? {
107
- // return Err(anyhow::anyhow!("File is binary"));
108
- // }
189
+ // if is_binary(file_path).context("Failed to check if file is binary")? {
190
+ // return Err(anyhow::anyhow!("File is binary"));
191
+ // }
109
192
 
110
193
  Ok(())
111
194
  }
112
195
 
196
+ pub async fn read_string_without_bom(
197
+ file_path: &Path,
198
+ ) -> Result<String, Error> {
199
+ let file_buffer = match fs::read(file_path).await {
200
+ Ok(buffer) => buffer,
201
+ Err(e) => {
202
+ return Err(anyhow::anyhow!(
203
+ "Failed to read file buffer: {}",
204
+ e.to_string()
205
+ ))
206
+ }
207
+ };
208
+
209
+ let (cow, _) = UTF_8.decode_with_bom_removal(&file_buffer);
210
+
211
+ Ok(cow.to_string())
212
+ }
213
+
113
214
  pub fn as_relative_path(
114
215
  base_path: &Path,
115
216
  file_path: &Path,
@@ -167,25 +268,40 @@ mod tests {
167
268
  temp_file.write_all(b"Hello, world!").await.unwrap();
168
269
  let buffer = fs::read(&temp_file_path).await.unwrap();
169
270
  assert_eq!(
170
- is_good_file_runtime_check(&temp_file_path, &buffer)
171
- .await
172
- .is_ok(),
271
+ is_good_file_runtime_check(&temp_file_path).await.is_ok(),
173
272
  true
174
273
  );
175
274
  temp_dir.close().unwrap();
176
275
 
276
+ // let temp_dir = tempfile::tempdir().unwrap();
277
+ // let temp_file_path = temp_dir.path().join("test_file");
278
+ // let mut temp_file = fs::File::create(&temp_file_path).await.unwrap();
279
+ // temp_file.write_all(&[0, 159, 146, 150]).await.unwrap(); // Invalid UTF-8 sequence
280
+ // let buffer = fs::read(&temp_file_path).await.unwrap();
281
+ // assert_eq!(
282
+ // is_good_file_runtime_check(&temp_file_path).await.is_err(),
283
+ // true
284
+ // );
285
+ // temp_dir.close().unwrap();
286
+ }
287
+
288
+ #[tokio::test]
289
+ async fn test_bom_file() {
290
+ const BOM: [u8; 3] = [0xEF, 0xBB, 0xBF];
291
+ const CONTENT: &str = "Hello, world!";
292
+
293
+ // Write this to a temp file
177
294
  let temp_dir = tempfile::tempdir().unwrap();
178
295
  let temp_file_path = temp_dir.path().join("test_file");
179
296
  let mut temp_file = fs::File::create(&temp_file_path).await.unwrap();
180
- temp_file.write_all(&[0, 159, 146, 150]).await.unwrap(); // Invalid UTF-8 sequence
181
- let buffer = fs::read(&temp_file_path).await.unwrap();
182
- assert_eq!(
183
- is_good_file_runtime_check(&temp_file_path, &buffer)
184
- .await
185
- .is_err(),
186
- true
187
- );
188
- temp_dir.close().unwrap();
297
+ temp_file.write_all(&BOM).await.unwrap();
298
+ temp_file.write_all(CONTENT.as_bytes()).await.unwrap();
299
+
300
+ // expect that we read the file with tokio as the CONTENT
301
+ let file_contents = read_string_without_bom(&temp_file_path).await.unwrap();
302
+
303
+ // Check string equality of CONTENT (&str) to file_contents (String)
304
+ assert_eq!(CONTENT, file_contents);
189
305
  }
190
306
 
191
307
  #[test]
package/src/git_utils.rs CHANGED
@@ -1,7 +1,8 @@
1
1
  use std::collections::HashSet;
2
+ use std::path::MAIN_SEPARATOR_STR;
2
3
  use std::process::Command;
3
4
 
4
- pub fn list_ignored_files(
5
+ pub fn list_ignored_files_and_directories(
5
6
  workspace_root_path: &str,
6
7
  should_return_absolute_paths: bool,
7
8
  ) -> Result<HashSet<String>, Box<dyn std::error::Error>> {
@@ -14,12 +15,12 @@ pub fn list_ignored_files(
14
15
  "--others",
15
16
  "--ignored",
16
17
  "--exclude-standard",
18
+ "--directory",
19
+ "--no-empty-directory"
17
20
  ],
18
21
  // FIXME(sualeh): this is super sketchy and might totally break in like a bazillion ways. i dont like it.
19
22
  vec![
20
- "sh",
21
- "-c",
22
- "git submodule foreach --quiet 'git ls-files --others --ignored --exclude-standard | sed \"s|^|$path/|\"'",
23
+ "git submodule foreach --quiet 'git -C $toplevel/$path ls-files --others --ignored --exclude-standard --directory --no-empty-directory | (while read line; do echo $path/$line; done)'",
23
24
  ],
24
25
  ];
25
26
 
@@ -34,9 +35,12 @@ pub fn list_ignored_files(
34
35
  .lines()
35
36
  .filter(|line| !line.is_empty())
36
37
  .map(|line| {
38
+ let line = line.replace("/", MAIN_SEPARATOR_STR);
39
+
37
40
  if should_return_absolute_paths {
38
41
  let mut path = std::path::PathBuf::from(workspace_root_path);
39
- path.push(line);
42
+
43
+ path.push(line.clone());
40
44
 
41
45
  match path.canonicalize() {
42
46
  Ok(canonical_path) => {
@@ -133,7 +137,8 @@ mod tests {
133
137
  fn test_no_ignored_files() {
134
138
  let dir = tempfile::tempdir().unwrap();
135
139
  let gitignored_files =
136
- list_ignored_files(dir.path().to_str().unwrap(), false).unwrap();
140
+ list_ignored_files_and_directories(dir.path().to_str().unwrap(), false)
141
+ .unwrap();
137
142
  Command::new("git")
138
143
  .args(&["init"])
139
144
  .current_dir(dir.path())
@@ -160,7 +165,8 @@ mod tests {
160
165
  .output()
161
166
  .unwrap();
162
167
  let gitignored_files =
163
- list_ignored_files(dir.path().to_str().unwrap(), false).unwrap();
168
+ list_ignored_files_and_directories(dir.path().to_str().unwrap(), false)
169
+ .unwrap();
164
170
  println!(
165
171
  "ignored files for test_one_ignored_file: {:?}",
166
172
  gitignored_files
@@ -190,7 +196,8 @@ mod tests {
190
196
  .output()
191
197
  .unwrap();
192
198
  let gitignored_files =
193
- list_ignored_files(dir.path().to_str().unwrap(), false).unwrap();
199
+ list_ignored_files_and_directories(dir.path().to_str().unwrap(), false)
200
+ .unwrap();
194
201
  println!(
195
202
  "ignored files for test_multiple_ignored_files: {:?}",
196
203
  gitignored_files
@@ -254,7 +261,8 @@ mod tests {
254
261
  println!("git submodule add output: {:?}", o);
255
262
 
256
263
  let gitignored_files =
257
- list_ignored_files(dir.path().to_str().unwrap(), false).unwrap();
264
+ list_ignored_files_and_directories(dir.path().to_str().unwrap(), false)
265
+ .unwrap();
258
266
  println!(
259
267
  "ignored files for test_git_submodule_ignored_files: {:?}",
260
268
  gitignored_files
@@ -265,7 +273,8 @@ mod tests {
265
273
 
266
274
  #[test]
267
275
  fn test_multiple_ignored_files_in_current_dir() {
268
- let gitignored_files = list_ignored_files(".", false).unwrap();
276
+ let gitignored_files =
277
+ list_ignored_files_and_directories(".", false).unwrap();
269
278
  assert!(gitignored_files.len() > 1);
270
279
 
271
280
  // print a sample of the ignored files
package/src/lib.rs CHANGED
@@ -1,11 +1,12 @@
1
+ #![windows_subsystem = "windows"]
1
2
  #![deny(clippy::all)]
2
3
  #![deny(unsafe_op_in_unsafe_fn)]
3
4
  pub mod file_utils;
4
- pub mod git_utils;
5
5
  pub mod merkle_tree;
6
6
 
7
- use std::vec;
7
+ use std::{vec, collections::HashSet};
8
8
 
9
+ use anyhow::Context;
9
10
  use merkle_tree::{LocalConstruction, MerkleTree};
10
11
  use tracing::{info, Level};
11
12
  use tracing_appender::rolling::{RollingFileAppender, Rotation};
@@ -17,7 +18,7 @@ extern crate napi_derive;
17
18
  #[napi]
18
19
  pub struct MerkleClient {
19
20
  tree: MerkleTree,
20
- root_directory: String,
21
+ absolute_root_directory: String,
21
22
  _guard: tracing_appender::non_blocking::WorkerGuard,
22
23
  }
23
24
 
@@ -40,24 +41,34 @@ pub fn init_logger() -> tracing_appender::non_blocking::WorkerGuard {
40
41
  #[napi]
41
42
  impl MerkleClient {
42
43
  #[napi(constructor)]
43
- pub fn new(root_directory: String) -> MerkleClient {
44
+ pub fn new(absolute_root_directory: String) -> MerkleClient {
44
45
  let _guard = init_logger();
45
46
 
47
+ // let canonical_root_directory = std::path::Path::new(&absolute_root_directory);
48
+ // use dunce::canonicalize;
49
+ // let canonical_root_directory = match dunce::canonicalize(&canonical_root_directory) {
50
+ // Ok(path) => path.to_str().unwrap_or(&absolute_root_directory).to_string().to_lowercase(),
51
+ // Err(e) => {
52
+ // info!("Error in canonicalizing path: path: {:?}, error {:?}", canonical_root_directory, e);
53
+ // absolute_root_directory
54
+ // }
55
+ // };
56
+
46
57
  MerkleClient {
47
58
  tree: MerkleTree::empty_tree(),
48
- root_directory,
59
+ absolute_root_directory,
49
60
  _guard,
50
61
  }
51
62
  }
52
63
 
53
64
  #[napi]
54
- pub async unsafe fn init(&mut self) -> Result<(), napi::Error> {
65
+ pub async unsafe fn init(&mut self, git_ignored_files: Vec<String>, is_git_repo: bool) -> Result<(), napi::Error> {
55
66
  // 1. compute the merkle tree
56
67
  // 2. update the backend
57
68
  // 3. sync with the remote
58
69
  info!("Merkle tree compute started!");
59
70
  unsafe {
60
- self.compute_merkle_tree().await?;
71
+ self.compute_merkle_tree(git_ignored_files, is_git_repo).await?;
61
72
  }
62
73
 
63
74
  Ok(())
@@ -70,9 +81,15 @@ impl MerkleClient {
70
81
  #[napi]
71
82
  pub async unsafe fn compute_merkle_tree(
72
83
  &mut self,
84
+ git_ignored_files: Vec<String>,
85
+ is_git_repo: bool
73
86
  ) -> Result<(), napi::Error> {
87
+ // make the git ignored files into a hash set
88
+ let git_ignored_set = HashSet::from_iter(git_ignored_files.into_iter());
89
+
74
90
  let t =
75
- MerkleTree::construct_merkle_tree(self.root_directory.clone()).await;
91
+ MerkleTree::construct_merkle_tree(self.absolute_root_directory.clone(), git_ignored_set, is_git_repo)
92
+ .await;
76
93
 
77
94
  match t {
78
95
  Ok(tree) => {
@@ -101,24 +118,40 @@ impl MerkleClient {
101
118
  &self,
102
119
  relative_path: String,
103
120
  ) -> Result<String, napi::Error> {
104
- let absolute_path =
105
- std::path::Path::new(&self.root_directory).join(&relative_path);
121
+ let relative_path_without_leading_slash = match relative_path
122
+ .strip_prefix('.')
123
+ {
124
+ Some(path) => path.strip_prefix(std::path::MAIN_SEPARATOR).unwrap_or(""),
125
+ None => relative_path.as_str(),
126
+ };
106
127
 
107
- let canonical_path = match absolute_path.canonicalize() {
108
- Ok(path) => path,
109
- Err(e) => return Err(napi::Error::new(
110
- napi::Status::Unknown,
111
- format!("Error in canonicalizing path: {:?}", e),
112
- )),
128
+ let absolute_path = if !relative_path_without_leading_slash.is_empty() {
129
+ std::path::Path::new(&self.absolute_root_directory)
130
+ .join(relative_path_without_leading_slash)
131
+ } else {
132
+ std::path::Path::new(&self.absolute_root_directory).to_path_buf()
113
133
  };
114
134
 
115
- let hash = self.tree.get_subtree_hash(canonical_path.clone()).await;
135
+ let absolute_path_string = match absolute_path.to_str() {
136
+ Some(path) => path.to_string(),
137
+ None => {
138
+ return Err(napi::Error::new(
139
+ napi::Status::Unknown,
140
+ format!("some string error"),
141
+ ))
142
+ }
143
+ };
144
+
145
+ let hash = self
146
+ .tree
147
+ .get_subtree_hash(absolute_path_string.as_str())
148
+ .await;
116
149
 
117
150
  match hash {
118
151
  Ok(hash) => Ok(hash),
119
152
  Err(e) => Err(napi::Error::new(
120
153
  napi::Status::Unknown,
121
- format!("Error in get_subtree_hash. \nRelative path: {:?}, \nAbsolute path: {:?}, \nCanonical path: {:?}, \nRoot directory: {:?}\nError: {:?}", &relative_path, absolute_path, canonical_path, self.root_directory, e),
154
+ format!("Error in get_subtree_hash. \nRelative path: {:?}, \nAbsolute path: {:?}, \nRoot directory: {:?}\nError: {:?}", &relative_path, absolute_path, self.absolute_root_directory, e)
122
155
  )),
123
156
  }
124
157
  }
@@ -140,7 +173,7 @@ impl MerkleClient {
140
173
  &self,
141
174
  relative_path: String,
142
175
  ) -> Result<i32, napi::Error> {
143
- let absolute_path = std::path::Path::new(&self.root_directory)
176
+ let absolute_path = std::path::Path::new(&self.absolute_root_directory)
144
177
  .join(relative_path)
145
178
  .canonicalize()?;
146
179
 
@@ -176,10 +209,12 @@ impl MerkleClient {
176
209
  &self,
177
210
  absolute_file_path: String,
178
211
  ) -> Result<Vec<String>, napi::Error> {
179
- let absolute_path_str = absolute_file_path.as_str();
212
+ // let absolute_path = absolute_file_path.to_lowercase();
213
+ // let absolute_path_str = absolute_path.as_str();
214
+
180
215
  let files = self
181
216
  .tree
182
- .get_all_dir_files_to_embed(absolute_path_str)
217
+ .get_all_dir_files_to_embed(absolute_file_path.as_str())
183
218
  .await;
184
219
 
185
220
  match files {
@@ -220,8 +255,9 @@ impl MerkleClient {
220
255
  &self,
221
256
  absolute_file_path: String,
222
257
  ) -> Result<Vec<String>, napi::Error> {
223
- let absolute_path_str = absolute_file_path.as_str();
224
- let spline = self.tree.get_spline(absolute_path_str).await;
258
+ // let absolute_path = absolute_file_path.to_lowercase();
259
+ // let absolute_path_str = absolute_path.as_str();
260
+ let spline = self.tree.get_spline(absolute_file_path.as_str()).await;
225
261
 
226
262
  match spline {
227
263
  Ok(spline) => Ok(spline),
@@ -250,6 +286,6 @@ impl MerkleClient {
250
286
 
251
287
  #[napi]
252
288
  pub fn update_root_directory(&mut self, root_directory: String) {
253
- self.root_directory = root_directory;
289
+ self.absolute_root_directory = root_directory;
254
290
  }
255
291
  }
@@ -1,4 +1,3 @@
1
- use crate::git_utils;
2
1
  use crate::merkle_tree::{
3
2
  File, MerkleNode, MerkleNodePtr, NodeType, PinnedFuture,
4
3
  };
@@ -13,8 +12,13 @@ impl LocalConstruction for MerkleTree {
13
12
  async fn new(
14
13
  root_directory: Option<String>,
15
14
  ) -> Result<MerkleTree, anyhow::Error> {
15
+ let git_ignored_files = HashSet::<String>::new();
16
16
  if let Some(root_directory) = root_directory {
17
- let n = MerkleTree::construct_merkle_tree(root_directory).await;
17
+ let n = MerkleTree::construct_merkle_tree(
18
+ root_directory,
19
+ git_ignored_files,
20
+ false
21
+ ).await;
18
22
  return n;
19
23
  }
20
24
 
@@ -30,6 +34,8 @@ impl LocalConstruction for MerkleTree {
30
34
  /// 4. return merkle tree
31
35
  async fn construct_merkle_tree(
32
36
  absolute_path_to_root_directory: String,
37
+ git_ignored_files_and_dirs: HashSet<String>,
38
+ is_git_repo: bool
33
39
  ) -> Result<MerkleTree, anyhow::Error> {
34
40
  let path = PathBuf::from(absolute_path_to_root_directory.clone());
35
41
  if !path.exists() {
@@ -38,19 +44,21 @@ impl LocalConstruction for MerkleTree {
38
44
  }
39
45
 
40
46
  // 1. get all the gitignored files
41
- let git_ignored_files = match git_utils::list_ignored_files(
42
- absolute_path_to_root_directory.as_str(),
43
- true,
44
- ) {
45
- Ok(git_ignored) => git_ignored,
46
- Err(_e) => HashSet::new(),
47
- };
47
+ // let git_ignored_files_and_dirs =
48
+ // match git_utils::list_ignored_files_and_directories(
49
+ // absolute_path_to_root_directory.as_str(),
50
+ // true,
51
+ // ) {
52
+ // Ok(git_ignored) => git_ignored,
53
+ // Err(_e) => HashSet::new(),
54
+ // };
48
55
 
49
56
  let root_node = MerkleNode::new(
50
57
  path,
51
58
  None,
52
- &git_ignored_files,
59
+ &git_ignored_files_and_dirs,
53
60
  absolute_path_to_root_directory.as_str(),
61
+ is_git_repo
54
62
  )
55
63
  .await;
56
64
  let mut mt = MerkleTree {
@@ -58,7 +66,8 @@ impl LocalConstruction for MerkleTree {
58
66
  files: BTreeMap::new(),
59
67
  root_path: absolute_path_to_root_directory,
60
68
  cursor: None,
61
- git_ignored_files,
69
+ git_ignored_files_and_dirs: git_ignored_files_and_dirs,
70
+ is_git_repo
62
71
  };
63
72
 
64
73
  // we now iterate over all the nodes and add them to the hashmap
@@ -80,7 +89,13 @@ impl LocalConstruction for MerkleTree {
80
89
  }
81
90
  NodeType::File(file_name) => {
82
91
  let f = File { node: node.clone() };
83
- tracing::info!("File: {:?}", file_name);
92
+
93
+ // i dont reallly like this :(((
94
+ // let canonical_file_name = match dunce::canonicalize(file_name) {
95
+ // Ok(path) => path.to_str().unwrap_or(file_name).to_string(),
96
+ // Err(_) => file_name.clone(),
97
+ // };
98
+
84
99
  files.insert(file_name.clone(), f);
85
100
  }
86
101
  NodeType::ErrorNode(_) => {
@@ -1,13 +1,13 @@
1
- use crate::git_utils;
2
-
3
1
  use super::file_utils;
4
2
  use sha2::Digest;
5
3
  use std::collections::{BTreeMap, HashSet};
6
4
  use std::path::PathBuf;
5
+ use std::vec;
7
6
  use std::{fs, path::Path, sync::Arc};
8
7
  use tokio::sync::RwLock;
9
8
  use tonic::async_trait;
10
9
  use tracing::info;
10
+
11
11
  pub mod local_construction;
12
12
  pub mod test;
13
13
 
@@ -18,7 +18,8 @@ pub struct MerkleTree {
18
18
  root: MerkleNodePtr,
19
19
  files: BTreeMap<String, File>,
20
20
  cursor: Option<usize>,
21
- git_ignored_files: HashSet<String>,
21
+ git_ignored_files_and_dirs: HashSet<String>,
22
+ is_git_repo: bool,
22
23
  }
23
24
 
24
25
  #[derive(Debug)]
@@ -62,6 +63,8 @@ pub trait LocalConstruction {
62
63
 
63
64
  async fn construct_merkle_tree(
64
65
  root_directory: String,
66
+ git_ignored_files_and_dirs: HashSet<String>,
67
+ is_git_repo: bool
65
68
  ) -> Result<MerkleTree, anyhow::Error>;
66
69
 
67
70
  async fn update_file(
@@ -95,34 +98,35 @@ impl MerkleTree {
95
98
  files: BTreeMap::new(),
96
99
  root_path: "".to_string(),
97
100
  cursor: None,
98
- git_ignored_files: HashSet::new(),
101
+ git_ignored_files_and_dirs: HashSet::new(),
102
+ is_git_repo: false
99
103
  }
100
104
  }
101
105
 
102
106
  pub async fn get_subtree_hash(
103
107
  &self,
104
- absolute_path: PathBuf,
108
+ absolute_path: &str,
105
109
  ) -> Result<String, anyhow::Error> {
106
- let abs_string = match absolute_path.to_str() {
107
- Some(s) => s.to_string(),
108
- None => {
109
- return Err(anyhow::anyhow!(
110
- "get_subtree_hash: Failed to convert path to string"
111
- ))
112
- }
113
- };
114
-
115
- let node = match self.files.get(&abs_string) {
110
+ let node = match self.files.get(absolute_path) {
116
111
  Some(file) => file.node.clone(),
117
112
  None => {
118
113
  let all_files: Vec<String> = self.files.keys().cloned().collect();
119
- return Err(anyhow::anyhow!("Could not find file in tree! Looking for: {}. All files: {:?}", abs_string, all_files));
114
+ return Err(anyhow::anyhow!(
115
+ "Could not find file in tree! Looking for: {}. All files: {:?}",
116
+ absolute_path,
117
+ all_files
118
+ ));
120
119
  }
121
120
  };
122
121
 
123
122
  let node_reader = node.read().await;
124
123
  let node_hash = node_reader.hash.clone();
125
124
 
125
+ info!(
126
+ "get_subtree_hash for path: {}, node_hash: {}",
127
+ absolute_path, node_hash
128
+ );
129
+
126
130
  Ok(node_hash)
127
131
  }
128
132
 
@@ -286,6 +290,12 @@ impl MerkleTree {
286
290
  ) -> Result<Vec<String>, anyhow::Error> {
287
291
  let mut files = Vec::new();
288
292
 
293
+ // 1. should check that this absolute path is actually a directory.
294
+ let file_node = self.files.get(absolute_path);
295
+ if file_node.is_none() {
296
+ return Err(anyhow::anyhow!("Could not find directory the in tree!"));
297
+ }
298
+
289
299
  for (file_path, f) in &self.files {
290
300
  if !file_path.contains(absolute_path) {
291
301
  continue;
@@ -398,8 +408,9 @@ impl MerkleTree {
398
408
  let new_node = MerkleNode::new(
399
409
  file_path.clone(),
400
410
  Some(ancestor.clone()),
401
- &self.git_ignored_files,
411
+ &self.git_ignored_files_and_dirs,
402
412
  &absolute_root_path.as_str(),
413
+ self.is_git_repo
403
414
  )
404
415
  .await;
405
416
  ancestor.write().await.attach_child(new_node.clone()).await;
@@ -415,8 +426,9 @@ impl MerkleTree {
415
426
  let first_child = MerkleNode::new(
416
427
  first_child_path.clone(),
417
428
  Some(ancestor.clone()),
418
- &self.git_ignored_files,
429
+ &self.git_ignored_files_and_dirs,
419
430
  &absolute_root_path.as_str(),
431
+ self.is_git_repo
420
432
  )
421
433
  .await;
422
434
 
@@ -704,13 +716,14 @@ impl MerkleNode {
704
716
  parent: ParentPtr,
705
717
  ignored_files: &IgnoredFiles,
706
718
  absolute_root_path: &str,
719
+ is_git_repo: bool,
707
720
  ) -> MerkleNodePtr {
708
- // check if the root is a git directory.
709
- let is_git_repo =
710
- match git_utils::is_git_directory(absolute_root_path).await {
711
- Ok(is_git_repo) => is_git_repo,
712
- Err(e) => false,
713
- };
721
+ // // check if the root is a git directory.
722
+ // let is_git_repo =
723
+ // match git_utils::is_git_directory(absolute_root_path).await {
724
+ // Ok(is_git_repo) => is_git_repo,
725
+ // Err(_e) => false,
726
+ // };
714
727
  let bypass_git = !is_git_repo;
715
728
 
716
729
  MerkleNode::construct_node(
@@ -728,13 +741,8 @@ impl MerkleNode {
728
741
  parent: ParentPtr,
729
742
  ignored_files: &IgnoredFiles,
730
743
  absolute_root_path: &str,
744
+ is_git_repo: bool,
731
745
  ) -> MerkleNodePtr {
732
- // check if the root is a git directory.
733
- let is_git_repo =
734
- match git_utils::is_git_directory(absolute_root_path).await {
735
- Ok(is_git_repo) => is_git_repo,
736
- Err(_e) => false,
737
- };
738
746
  let bypass_git = !is_git_repo;
739
747
 
740
748
  info!(
@@ -791,26 +799,9 @@ impl MerkleNode {
791
799
  )));
792
800
  }
793
801
 
794
- // check if the directory is git ignored
795
- let is_git_ignored =
796
- match git_utils::is_git_ignored(absolute_root_path, path_str.as_str())
797
- .await
798
- {
799
- Ok(is_git_ignored) => is_git_ignored,
800
- Err(e) => {
801
- return Arc::new(RwLock::new(MerkleNode::empty_node(
802
- Some(absolute_file_or_directory),
803
- Some(e.to_string()),
804
- )));
805
- }
806
- };
802
+ let is_git_ignored_dir = ignored_files.contains(&path_str);
807
803
 
808
- if is_git_ignored && !bypass_git {
809
- // println!("skipping directory: {}", path_str);
810
- tracing::info!(
811
- "skipping directory because its git ignored: {}",
812
- path_str
813
- );
804
+ if is_git_ignored_dir && !bypass_git {
814
805
  return Arc::new(RwLock::new(MerkleNode::empty_node(
815
806
  Some(absolute_file_or_directory),
816
807
  Some("Directory is git ignored!".to_string()),
@@ -899,18 +890,10 @@ impl MerkleNode {
899
890
  false => {}
900
891
  }
901
892
 
902
- // read the file_content to a buffer
903
- let file_content = match tokio::fs::read(absolute_file_path).await {
904
- Ok(content) => content,
905
- Err(e) => {
906
- return Err(format!("Could not read file! {}", e.to_string()));
907
- }
908
- };
909
-
910
893
  // check if the file passes runtime checks.
911
894
  match file_utils::is_good_file_runtime_check(
912
895
  absolute_file_path,
913
- &file_content,
896
+ // &file_content,
914
897
  )
915
898
  .await
916
899
  {
@@ -920,15 +903,14 @@ impl MerkleNode {
920
903
  }
921
904
  }
922
905
 
923
- let file_content = match std::str::from_utf8(&file_content) {
924
- Ok(content) => content,
925
- Err(e) => {
926
- return Err(format!(
927
- "UTF8 Failure. Could not convert file content to string! {}",
928
- e.to_string()
929
- ));
930
- }
931
- };
906
+ // read the file_content to a buffer
907
+ let file_content =
908
+ match file_utils::read_string_without_bom(absolute_file_path).await {
909
+ Ok(content) => content,
910
+ Err(e) => {
911
+ return Err(format!("Could not read file! {}", e.to_string()));
912
+ }
913
+ };
932
914
 
933
915
  let file_hash = compute_hash(&file_content);
934
916
  let node = MerkleNode {
@@ -984,15 +966,51 @@ impl MerkleNode {
984
966
 
985
967
  async fn compute_branch_hash(children: &[MerkleNodePtr]) -> String {
986
968
  let mut hasher = sha2::Sha256::new();
969
+ let mut names_and_hashes = vec![];
970
+ let mut non_zero_children = 0;
971
+
987
972
  for child in children {
988
973
  // check if it is an error node
989
974
  let child_reader = child.read().await;
990
- if let NodeType::ErrorNode(_) = &child_reader.node_type {
975
+
976
+ match &child_reader.node_type {
977
+ NodeType::File(file_name) => {
978
+ non_zero_children += 1;
979
+ names_and_hashes.push((file_name.clone(), child_reader.hash.clone()));
980
+ }
981
+ NodeType::Branch((file_name, _)) => {
982
+ let hash = child_reader.hash.clone();
983
+ if hash == "" {
984
+ continue;
985
+ }
986
+
987
+ non_zero_children += 1;
988
+ names_and_hashes.push((file_name.clone(), hash));
989
+ }
990
+ NodeType::ErrorNode(_) => {
991
+ continue;
992
+ }
993
+ }
994
+ }
995
+
996
+ // sort the list of names and hashes by the hashes!!
997
+ names_and_hashes
998
+ .sort_by(|a, b| a.1.to_lowercase().cmp(&b.1.to_lowercase()));
999
+
1000
+ for (name, hash) in names_and_hashes {
1001
+ if hash == "" {
991
1002
  continue;
992
1003
  }
1004
+ info!("name: {}, hash: {}", name, hash);
1005
+ hasher.update(hash);
1006
+ }
993
1007
 
994
- hasher.update(child_reader.hash.as_bytes());
1008
+ if non_zero_children == 0 {
1009
+ // this means that the branch is empty.
1010
+ // we should return an empty string.
1011
+ return "".to_string();
995
1012
  }
1013
+
996
1014
  let result = hasher.finalize();
997
1015
  format!("{:x}", result)
998
1016
  }
@@ -43,8 +43,9 @@ mod tests {
43
43
  // let path = Path::new(&temp_dir_path);
44
44
 
45
45
  // Test construct_merkle_tree() function
46
+ let new_set = std::collections::HashSet::<String>::new();
46
47
  let tree =
47
- MerkleTree::construct_merkle_tree(temp_dir_path.clone()).await;
48
+ MerkleTree::construct_merkle_tree(temp_dir_path.clone(), new_set, false).await;
48
49
  let mut tree = match tree {
49
50
  Ok(tree) => {
50
51
  assert_eq!(tree.files.len(), 2);