@anysphere/file-service 0.0.0-c8c4f8fc → 0.0.0-d50aa568

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/Cargo.toml CHANGED
@@ -6,6 +6,10 @@ version = "0.0.0"
6
6
  [lib]
7
7
  crate-type = ["cdylib"]
8
8
 
9
+ [features]
10
+ default = ["windows-subsystem"]
11
+ windows-subsystem = []
12
+
9
13
  [dependencies]
10
14
  # Default enable napi4 feature, see https://nodejs.org/api/n-api.html#node-api-version-matrix
11
15
  napi = { version = "2.12.2", default-features = false, features = ["napi4", "async", "tokio_rt"] }
@@ -22,6 +26,7 @@ tracing-subscriber = "0.3.17"
22
26
  tracing-appender = "0.2.2"
23
27
  binaryornot = "1.0.0"
24
28
  dunce = "1.0.1"
29
+ encoding_rs = "0.8.33"
25
30
 
26
31
  [build-dependencies]
27
32
  napi-build = "2.0.1"
@@ -29,5 +34,6 @@ tonic-build = "0.9.2"
29
34
  anyhow = "1.0.75"
30
35
  glob = "0.3.0"
31
36
 
37
+
32
38
  [profile.release]
33
39
  lto = true
package/build.rs CHANGED
@@ -3,6 +3,8 @@ use std::path::Path;
3
3
  extern crate napi_build;
4
4
 
5
5
  fn main() -> Result<(), anyhow::Error> {
6
+ #[cfg(target_os = "windows")]
7
+ println!("cargo:rustc-cdylib-link-arg=/SUBSYSTEM:WINDOWS");
6
8
  napi_build::setup();
7
9
 
8
10
  // print the relative path.
package/index.d.ts CHANGED
@@ -5,8 +5,8 @@
5
5
 
6
6
  export class MerkleClient {
7
7
  constructor(absoluteRootDirectory: string)
8
- init(): Promise<void>
9
- computeMerkleTree(): Promise<void>
8
+ init(gitIgnoredFiles: Array<string>, isGitRepo: boolean): Promise<void>
9
+ computeMerkleTree(gitIgnoredFiles: Array<string>, isGitRepo: boolean): Promise<void>
10
10
  updateFile(filePath: string): Promise<void>
11
11
  deleteFile(filePath: string): Promise<void>
12
12
  getSubtreeHash(relativePath: string): Promise<string>
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@anysphere/file-service",
3
- "version": "0.0.0-c8c4f8fc",
3
+ "version": "0.0.0-d50aa568",
4
4
  "main": "index.js",
5
5
  "types": "index.d.ts",
6
6
  "napi": {
@@ -36,12 +36,12 @@
36
36
  "version": "napi version"
37
37
  },
38
38
  "optionalDependencies": {
39
- "@anysphere/file-service-win32-x64-msvc": "0.0.0-c8c4f8fc",
40
- "@anysphere/file-service-darwin-x64": "0.0.0-c8c4f8fc",
41
- "@anysphere/file-service-linux-x64-gnu": "0.0.0-c8c4f8fc",
42
- "@anysphere/file-service-darwin-arm64": "0.0.0-c8c4f8fc",
43
- "@anysphere/file-service-win32-arm64-msvc": "0.0.0-c8c4f8fc",
44
- "@anysphere/file-service-darwin-universal": "0.0.0-c8c4f8fc",
45
- "@anysphere/file-service-linux-arm64-gnu": "0.0.0-c8c4f8fc"
39
+ "@anysphere/file-service-win32-x64-msvc": "0.0.0-d50aa568",
40
+ "@anysphere/file-service-darwin-x64": "0.0.0-d50aa568",
41
+ "@anysphere/file-service-linux-x64-gnu": "0.0.0-d50aa568",
42
+ "@anysphere/file-service-darwin-arm64": "0.0.0-d50aa568",
43
+ "@anysphere/file-service-win32-arm64-msvc": "0.0.0-d50aa568",
44
+ "@anysphere/file-service-darwin-universal": "0.0.0-d50aa568",
45
+ "@anysphere/file-service-linux-arm64-gnu": "0.0.0-d50aa568"
46
46
  }
47
47
  }
package/src/file_utils.rs CHANGED
@@ -5,6 +5,7 @@
5
5
  // 4. vscode.fs.stat
6
6
 
7
7
  use anyhow::Error;
8
+ use encoding_rs::UTF_8;
8
9
  use std::path::Path;
9
10
  use tokio::fs;
10
11
 
@@ -43,8 +44,90 @@ pub fn is_good_file(file_path: &Path) -> Result<(), Error> {
43
44
  _ => {}
44
45
  }
45
46
 
46
- match extension {
47
- "lock" | "bak" | "tmp" | "bin" | "exe" | "dll" | "so" | "lockb" => {
47
+ let bad_extensions = vec![
48
+ "lock",
49
+ "bak",
50
+ "tmp",
51
+ "bin",
52
+ "exe",
53
+ "dll",
54
+ "so",
55
+ "lockb",
56
+ "qwoff",
57
+ "isl",
58
+ "csv",
59
+ "pdf",
60
+ // add ms word, excel, powerpoint, etc.
61
+ "doc",
62
+ "docx",
63
+ "xls",
64
+ "xlsx",
65
+ "ppt",
66
+ "pptx",
67
+ "odt",
68
+ "ods",
69
+ "odp",
70
+ "odg",
71
+ "odf",
72
+ "sxw",
73
+ "sxc",
74
+ "sxi",
75
+ "sxd",
76
+ "sdc",
77
+ // add images
78
+ "jpg",
79
+ "jpeg",
80
+ "png",
81
+ "gif",
82
+ "bmp",
83
+ "tif",
84
+ // add audio
85
+ "mp3",
86
+ "wav",
87
+ "wma",
88
+ "ogg",
89
+ "flac",
90
+ "aac",
91
+ // add video
92
+ "mp4",
93
+ "mov",
94
+ "wmv",
95
+ "flv",
96
+ "avi",
97
+ // add archives
98
+ "zip",
99
+ "tar",
100
+ "gz",
101
+ "7z",
102
+ "rar",
103
+ "tgz",
104
+ "dmg",
105
+ "iso",
106
+ "cue",
107
+ "mdf",
108
+ "mds",
109
+ "vcd",
110
+ "toast",
111
+ "img",
112
+ "apk",
113
+ "msi",
114
+ "cab",
115
+ "tar.gz",
116
+ "tar.xz",
117
+ "tar.bz2",
118
+ "tar.lzma",
119
+ "tar.Z",
120
+ "tar.sz",
121
+ "lzma",
122
+ // add fonts
123
+ "ttf",
124
+ "otf",
125
+ "woff",
126
+ "woff2",
127
+ "eot",
128
+ ];
129
+ match bad_extensions.contains(&extension) {
130
+ true => {
48
131
  return Err(anyhow::anyhow!("File is just a lock file"));
49
132
  }
50
133
  _ => {}
@@ -93,7 +176,7 @@ pub fn is_good_file(file_path: &Path) -> Result<(), Error> {
93
176
  // implement the buffer above:
94
177
  pub async fn is_good_file_runtime_check(
95
178
  file_path: &Path,
96
- _buffer: &[u8],
179
+ // _buffer: &[u8],
97
180
  ) -> Result<(), Error> {
98
181
  match get_file_size(file_path).await {
99
182
  Ok(size) if size > 2 * 1024 * 1024 => {
@@ -103,13 +186,31 @@ pub async fn is_good_file_runtime_check(
103
186
  _ => {}
104
187
  }
105
188
 
106
- // if is_binary(file_path).context("Failed to check if file is binary")? {
107
- // return Err(anyhow::anyhow!("File is binary"));
108
- // }
189
+ // if is_binary(file_path).context("Failed to check if file is binary")? {
190
+ // return Err(anyhow::anyhow!("File is binary"));
191
+ // }
109
192
 
110
193
  Ok(())
111
194
  }
112
195
 
196
+ pub async fn read_string_without_bom(
197
+ file_path: &Path,
198
+ ) -> Result<String, Error> {
199
+ let file_buffer = match fs::read(file_path).await {
200
+ Ok(buffer) => buffer,
201
+ Err(e) => {
202
+ return Err(anyhow::anyhow!(
203
+ "Failed to read file buffer: {}",
204
+ e.to_string()
205
+ ))
206
+ }
207
+ };
208
+
209
+ let (cow, _) = UTF_8.decode_with_bom_removal(&file_buffer);
210
+
211
+ Ok(cow.to_string())
212
+ }
213
+
113
214
  pub fn as_relative_path(
114
215
  base_path: &Path,
115
216
  file_path: &Path,
@@ -167,25 +268,40 @@ mod tests {
167
268
  temp_file.write_all(b"Hello, world!").await.unwrap();
168
269
  let buffer = fs::read(&temp_file_path).await.unwrap();
169
270
  assert_eq!(
170
- is_good_file_runtime_check(&temp_file_path, &buffer)
171
- .await
172
- .is_ok(),
271
+ is_good_file_runtime_check(&temp_file_path).await.is_ok(),
173
272
  true
174
273
  );
175
274
  temp_dir.close().unwrap();
176
275
 
276
+ // let temp_dir = tempfile::tempdir().unwrap();
277
+ // let temp_file_path = temp_dir.path().join("test_file");
278
+ // let mut temp_file = fs::File::create(&temp_file_path).await.unwrap();
279
+ // temp_file.write_all(&[0, 159, 146, 150]).await.unwrap(); // Invalid UTF-8 sequence
280
+ // let buffer = fs::read(&temp_file_path).await.unwrap();
281
+ // assert_eq!(
282
+ // is_good_file_runtime_check(&temp_file_path).await.is_err(),
283
+ // true
284
+ // );
285
+ // temp_dir.close().unwrap();
286
+ }
287
+
288
+ #[tokio::test]
289
+ async fn test_bom_file() {
290
+ const BOM: [u8; 3] = [0xEF, 0xBB, 0xBF];
291
+ const CONTENT: &str = "Hello, world!";
292
+
293
+ // Write this to a temp file
177
294
  let temp_dir = tempfile::tempdir().unwrap();
178
295
  let temp_file_path = temp_dir.path().join("test_file");
179
296
  let mut temp_file = fs::File::create(&temp_file_path).await.unwrap();
180
- temp_file.write_all(&[0, 159, 146, 150]).await.unwrap(); // Invalid UTF-8 sequence
181
- let buffer = fs::read(&temp_file_path).await.unwrap();
182
- assert_eq!(
183
- is_good_file_runtime_check(&temp_file_path, &buffer)
184
- .await
185
- .is_err(),
186
- true
187
- );
188
- temp_dir.close().unwrap();
297
+ temp_file.write_all(&BOM).await.unwrap();
298
+ temp_file.write_all(CONTENT.as_bytes()).await.unwrap();
299
+
300
+ // expect that we read the file with tokio as the CONTENT
301
+ let file_contents = read_string_without_bom(&temp_file_path).await.unwrap();
302
+
303
+ // Check string equality of CONTENT (&str) to file_contents (String)
304
+ assert_eq!(CONTENT, file_contents);
189
305
  }
190
306
 
191
307
  #[test]
package/src/git_utils.rs CHANGED
@@ -1,7 +1,8 @@
1
1
  use std::collections::HashSet;
2
+ use std::path::MAIN_SEPARATOR_STR;
2
3
  use std::process::Command;
3
4
 
4
- pub fn list_ignored_files(
5
+ pub fn list_ignored_files_and_directories(
5
6
  workspace_root_path: &str,
6
7
  should_return_absolute_paths: bool,
7
8
  ) -> Result<HashSet<String>, Box<dyn std::error::Error>> {
@@ -14,12 +15,12 @@ pub fn list_ignored_files(
14
15
  "--others",
15
16
  "--ignored",
16
17
  "--exclude-standard",
18
+ "--directory",
19
+ "--no-empty-directory"
17
20
  ],
18
21
  // FIXME(sualeh): this is super sketchy and might totally break in like a bazillion ways. i dont like it.
19
22
  vec![
20
- "sh",
21
- "-c",
22
- "git submodule foreach --quiet 'git ls-files --others --ignored --exclude-standard | sed \"s|^|$path/|\"'",
23
+ "git submodule foreach --quiet 'git -C $toplevel/$path ls-files --others --ignored --exclude-standard --directory --no-empty-directory | (while read line; do echo $path/$line; done)'",
23
24
  ],
24
25
  ];
25
26
 
@@ -34,9 +35,12 @@ pub fn list_ignored_files(
34
35
  .lines()
35
36
  .filter(|line| !line.is_empty())
36
37
  .map(|line| {
38
+ let line = line.replace("/", MAIN_SEPARATOR_STR);
39
+
37
40
  if should_return_absolute_paths {
38
41
  let mut path = std::path::PathBuf::from(workspace_root_path);
39
- path.push(line);
42
+
43
+ path.push(line.clone());
40
44
 
41
45
  match path.canonicalize() {
42
46
  Ok(canonical_path) => {
@@ -133,7 +137,8 @@ mod tests {
133
137
  fn test_no_ignored_files() {
134
138
  let dir = tempfile::tempdir().unwrap();
135
139
  let gitignored_files =
136
- list_ignored_files(dir.path().to_str().unwrap(), false).unwrap();
140
+ list_ignored_files_and_directories(dir.path().to_str().unwrap(), false)
141
+ .unwrap();
137
142
  Command::new("git")
138
143
  .args(&["init"])
139
144
  .current_dir(dir.path())
@@ -160,7 +165,8 @@ mod tests {
160
165
  .output()
161
166
  .unwrap();
162
167
  let gitignored_files =
163
- list_ignored_files(dir.path().to_str().unwrap(), false).unwrap();
168
+ list_ignored_files_and_directories(dir.path().to_str().unwrap(), false)
169
+ .unwrap();
164
170
  println!(
165
171
  "ignored files for test_one_ignored_file: {:?}",
166
172
  gitignored_files
@@ -190,7 +196,8 @@ mod tests {
190
196
  .output()
191
197
  .unwrap();
192
198
  let gitignored_files =
193
- list_ignored_files(dir.path().to_str().unwrap(), false).unwrap();
199
+ list_ignored_files_and_directories(dir.path().to_str().unwrap(), false)
200
+ .unwrap();
194
201
  println!(
195
202
  "ignored files for test_multiple_ignored_files: {:?}",
196
203
  gitignored_files
@@ -254,7 +261,8 @@ mod tests {
254
261
  println!("git submodule add output: {:?}", o);
255
262
 
256
263
  let gitignored_files =
257
- list_ignored_files(dir.path().to_str().unwrap(), false).unwrap();
264
+ list_ignored_files_and_directories(dir.path().to_str().unwrap(), false)
265
+ .unwrap();
258
266
  println!(
259
267
  "ignored files for test_git_submodule_ignored_files: {:?}",
260
268
  gitignored_files
@@ -265,7 +273,8 @@ mod tests {
265
273
 
266
274
  #[test]
267
275
  fn test_multiple_ignored_files_in_current_dir() {
268
- let gitignored_files = list_ignored_files(".", false).unwrap();
276
+ let gitignored_files =
277
+ list_ignored_files_and_directories(".", false).unwrap();
269
278
  assert!(gitignored_files.len() > 1);
270
279
 
271
280
  // print a sample of the ignored files
package/src/lib.rs CHANGED
@@ -1,11 +1,12 @@
1
+ #![windows_subsystem = "windows"]
1
2
  #![deny(clippy::all)]
2
3
  #![deny(unsafe_op_in_unsafe_fn)]
3
4
  pub mod file_utils;
4
- pub mod git_utils;
5
5
  pub mod merkle_tree;
6
6
 
7
- use std::vec;
7
+ use std::{vec, collections::HashSet};
8
8
 
9
+ use anyhow::Context;
9
10
  use merkle_tree::{LocalConstruction, MerkleTree};
10
11
  use tracing::{info, Level};
11
12
  use tracing_appender::rolling::{RollingFileAppender, Rotation};
@@ -43,31 +44,31 @@ impl MerkleClient {
43
44
  pub fn new(absolute_root_directory: String) -> MerkleClient {
44
45
  let _guard = init_logger();
45
46
 
46
- let canonical_root_directory = std::path::Path::new(&absolute_root_directory);
47
+ // let canonical_root_directory = std::path::Path::new(&absolute_root_directory);
47
48
  // use dunce::canonicalize;
48
- let canonical_root_directory = match dunce::canonicalize(&canonical_root_directory) {
49
- Ok(path) => path.to_str().unwrap_or(&absolute_root_directory).to_string(),
50
- Err(e) => {
51
- info!("Error in canonicalizing path: path: {:?}, error {:?}", canonical_root_directory, e);
52
- absolute_root_directory
53
- }
54
- };
49
+ // let canonical_root_directory = match dunce::canonicalize(&canonical_root_directory) {
50
+ // Ok(path) => path.to_str().unwrap_or(&absolute_root_directory).to_string().to_lowercase(),
51
+ // Err(e) => {
52
+ // info!("Error in canonicalizing path: path: {:?}, error {:?}", canonical_root_directory, e);
53
+ // absolute_root_directory
54
+ // }
55
+ // };
55
56
 
56
57
  MerkleClient {
57
58
  tree: MerkleTree::empty_tree(),
58
- absolute_root_directory: canonical_root_directory,
59
+ absolute_root_directory,
59
60
  _guard,
60
61
  }
61
62
  }
62
63
 
63
64
  #[napi]
64
- pub async unsafe fn init(&mut self) -> Result<(), napi::Error> {
65
+ pub async unsafe fn init(&mut self, git_ignored_files: Vec<String>, is_git_repo: bool) -> Result<(), napi::Error> {
65
66
  // 1. compute the merkle tree
66
67
  // 2. update the backend
67
68
  // 3. sync with the remote
68
69
  info!("Merkle tree compute started!");
69
70
  unsafe {
70
- self.compute_merkle_tree().await?;
71
+ self.compute_merkle_tree(git_ignored_files, is_git_repo).await?;
71
72
  }
72
73
 
73
74
  Ok(())
@@ -80,9 +81,15 @@ impl MerkleClient {
80
81
  #[napi]
81
82
  pub async unsafe fn compute_merkle_tree(
82
83
  &mut self,
84
+ git_ignored_files: Vec<String>,
85
+ is_git_repo: bool
83
86
  ) -> Result<(), napi::Error> {
87
+ // make the git ignored files into a hash set
88
+ let git_ignored_set = HashSet::from_iter(git_ignored_files.into_iter());
89
+
84
90
  let t =
85
- MerkleTree::construct_merkle_tree(self.absolute_root_directory.clone()).await;
91
+ MerkleTree::construct_merkle_tree(self.absolute_root_directory.clone(), git_ignored_set, is_git_repo)
92
+ .await;
86
93
 
87
94
  match t {
88
95
  Ok(tree) => {
@@ -111,36 +118,40 @@ impl MerkleClient {
111
118
  &self,
112
119
  relative_path: String,
113
120
  ) -> Result<String, napi::Error> {
114
- let absolute_path =
115
- std::path::Path::new(&self.absolute_root_directory).join(&relative_path);
121
+ let relative_path_without_leading_slash = match relative_path
122
+ .strip_prefix('.')
123
+ {
124
+ Some(path) => path.strip_prefix(std::path::MAIN_SEPARATOR).unwrap_or(""),
125
+ None => relative_path.as_str(),
126
+ };
116
127
 
117
- let canonical_path = match dunce::canonicalize(&absolute_path) {
118
- Ok(path) => path,
119
- Err(e) => {
120
- return Err(napi::Error::new(
121
- napi::Status::Unknown,
122
- format!("Error in canonicalizing path: {:?}", e),
123
- ))
124
- }
128
+ let absolute_path = if !relative_path_without_leading_slash.is_empty() {
129
+ std::path::Path::new(&self.absolute_root_directory)
130
+ .join(relative_path_without_leading_slash)
131
+ } else {
132
+ std::path::Path::new(&self.absolute_root_directory).to_path_buf()
125
133
  };
126
134
 
127
- let cononical_str = match canonical_path.to_str() {
128
- Some(s) => s,
135
+ let absolute_path_string = match absolute_path.to_str() {
136
+ Some(path) => path.to_string(),
129
137
  None => {
130
138
  return Err(napi::Error::new(
131
139
  napi::Status::Unknown,
132
- format!("Error in converting canonical path to string"),
140
+ format!("some string error"),
133
141
  ))
134
142
  }
135
143
  };
136
144
 
137
- let hash = self.tree.get_subtree_hash(cononical_str).await;
145
+ let hash = self
146
+ .tree
147
+ .get_subtree_hash(absolute_path_string.as_str())
148
+ .await;
138
149
 
139
150
  match hash {
140
151
  Ok(hash) => Ok(hash),
141
152
  Err(e) => Err(napi::Error::new(
142
153
  napi::Status::Unknown,
143
- format!("Error in get_subtree_hash. \nRelative path: {:?}, \nAbsolute path: {:?}, \nCanonical path: {:?}, \nRoot directory: {:?}\nError: {:?}", &relative_path, absolute_path, canonical_path, self.absolute_root_directory, e),
154
+ format!("Error in get_subtree_hash. \nRelative path: {:?}, \nAbsolute path: {:?}, \nRoot directory: {:?}\nError: {:?}", &relative_path, absolute_path, self.absolute_root_directory, e)
144
155
  )),
145
156
  }
146
157
  }
@@ -198,10 +209,12 @@ impl MerkleClient {
198
209
  &self,
199
210
  absolute_file_path: String,
200
211
  ) -> Result<Vec<String>, napi::Error> {
201
- let absolute_path_str = absolute_file_path.as_str();
212
+ // let absolute_path = absolute_file_path.to_lowercase();
213
+ // let absolute_path_str = absolute_path.as_str();
214
+
202
215
  let files = self
203
216
  .tree
204
- .get_all_dir_files_to_embed(absolute_path_str)
217
+ .get_all_dir_files_to_embed(absolute_file_path.as_str())
205
218
  .await;
206
219
 
207
220
  match files {
@@ -242,8 +255,9 @@ impl MerkleClient {
242
255
  &self,
243
256
  absolute_file_path: String,
244
257
  ) -> Result<Vec<String>, napi::Error> {
245
- let absolute_path_str = absolute_file_path.as_str();
246
- let spline = self.tree.get_spline(absolute_path_str).await;
258
+ // let absolute_path = absolute_file_path.to_lowercase();
259
+ // let absolute_path_str = absolute_path.as_str();
260
+ let spline = self.tree.get_spline(absolute_file_path.as_str()).await;
247
261
 
248
262
  match spline {
249
263
  Ok(spline) => Ok(spline),
@@ -1,4 +1,3 @@
1
- use crate::git_utils;
2
1
  use crate::merkle_tree::{
3
2
  File, MerkleNode, MerkleNodePtr, NodeType, PinnedFuture,
4
3
  };
@@ -13,8 +12,13 @@ impl LocalConstruction for MerkleTree {
13
12
  async fn new(
14
13
  root_directory: Option<String>,
15
14
  ) -> Result<MerkleTree, anyhow::Error> {
15
+ let git_ignored_files = HashSet::<String>::new();
16
16
  if let Some(root_directory) = root_directory {
17
- let n = MerkleTree::construct_merkle_tree(root_directory).await;
17
+ let n = MerkleTree::construct_merkle_tree(
18
+ root_directory,
19
+ git_ignored_files,
20
+ false
21
+ ).await;
18
22
  return n;
19
23
  }
20
24
 
@@ -30,6 +34,8 @@ impl LocalConstruction for MerkleTree {
30
34
  /// 4. return merkle tree
31
35
  async fn construct_merkle_tree(
32
36
  absolute_path_to_root_directory: String,
37
+ git_ignored_files_and_dirs: HashSet<String>,
38
+ is_git_repo: bool
33
39
  ) -> Result<MerkleTree, anyhow::Error> {
34
40
  let path = PathBuf::from(absolute_path_to_root_directory.clone());
35
41
  if !path.exists() {
@@ -38,19 +44,21 @@ impl LocalConstruction for MerkleTree {
38
44
  }
39
45
 
40
46
  // 1. get all the gitignored files
41
- let git_ignored_files = match git_utils::list_ignored_files(
42
- absolute_path_to_root_directory.as_str(),
43
- true,
44
- ) {
45
- Ok(git_ignored) => git_ignored,
46
- Err(_e) => HashSet::new(),
47
- };
47
+ // let git_ignored_files_and_dirs =
48
+ // match git_utils::list_ignored_files_and_directories(
49
+ // absolute_path_to_root_directory.as_str(),
50
+ // true,
51
+ // ) {
52
+ // Ok(git_ignored) => git_ignored,
53
+ // Err(_e) => HashSet::new(),
54
+ // };
48
55
 
49
56
  let root_node = MerkleNode::new(
50
57
  path,
51
58
  None,
52
- &git_ignored_files,
59
+ &git_ignored_files_and_dirs,
53
60
  absolute_path_to_root_directory.as_str(),
61
+ is_git_repo
54
62
  )
55
63
  .await;
56
64
  let mut mt = MerkleTree {
@@ -58,7 +66,8 @@ impl LocalConstruction for MerkleTree {
58
66
  files: BTreeMap::new(),
59
67
  root_path: absolute_path_to_root_directory,
60
68
  cursor: None,
61
- git_ignored_files,
69
+ git_ignored_files_and_dirs: git_ignored_files_and_dirs,
70
+ is_git_repo
62
71
  };
63
72
 
64
73
  // we now iterate over all the nodes and add them to the hashmap
@@ -80,12 +89,14 @@ impl LocalConstruction for MerkleTree {
80
89
  }
81
90
  NodeType::File(file_name) => {
82
91
  let f = File { node: node.clone() };
83
- let canonical_file_name = match dunce::canonicalize(file_name) {
84
- Ok(path) => path.to_str().unwrap_or(file_name).to_string(),
85
- Err(_) => file_name.clone(),
86
- };
87
92
 
88
- files.insert(canonical_file_name, f);
93
+ // i dont reallly like this :(((
94
+ // let canonical_file_name = match dunce::canonicalize(file_name) {
95
+ // Ok(path) => path.to_str().unwrap_or(file_name).to_string(),
96
+ // Err(_) => file_name.clone(),
97
+ // };
98
+
99
+ files.insert(file_name.clone(), f);
89
100
  }
90
101
  NodeType::ErrorNode(_) => {
91
102
  // do nothing
@@ -1,13 +1,13 @@
1
- use crate::git_utils;
2
-
3
1
  use super::file_utils;
4
2
  use sha2::Digest;
5
3
  use std::collections::{BTreeMap, HashSet};
6
4
  use std::path::PathBuf;
5
+ use std::vec;
7
6
  use std::{fs, path::Path, sync::Arc};
8
7
  use tokio::sync::RwLock;
9
8
  use tonic::async_trait;
10
9
  use tracing::info;
10
+
11
11
  pub mod local_construction;
12
12
  pub mod test;
13
13
 
@@ -18,7 +18,8 @@ pub struct MerkleTree {
18
18
  root: MerkleNodePtr,
19
19
  files: BTreeMap<String, File>,
20
20
  cursor: Option<usize>,
21
- git_ignored_files: HashSet<String>,
21
+ git_ignored_files_and_dirs: HashSet<String>,
22
+ is_git_repo: bool,
22
23
  }
23
24
 
24
25
  #[derive(Debug)]
@@ -62,6 +63,8 @@ pub trait LocalConstruction {
62
63
 
63
64
  async fn construct_merkle_tree(
64
65
  root_directory: String,
66
+ git_ignored_files_and_dirs: HashSet<String>,
67
+ is_git_repo: bool
65
68
  ) -> Result<MerkleTree, anyhow::Error>;
66
69
 
67
70
  async fn update_file(
@@ -95,7 +98,8 @@ impl MerkleTree {
95
98
  files: BTreeMap::new(),
96
99
  root_path: "".to_string(),
97
100
  cursor: None,
98
- git_ignored_files: HashSet::new(),
101
+ git_ignored_files_and_dirs: HashSet::new(),
102
+ is_git_repo: false
99
103
  }
100
104
  }
101
105
 
@@ -103,18 +107,26 @@ impl MerkleTree {
103
107
  &self,
104
108
  absolute_path: &str,
105
109
  ) -> Result<String, anyhow::Error> {
106
-
107
110
  let node = match self.files.get(absolute_path) {
108
111
  Some(file) => file.node.clone(),
109
112
  None => {
110
113
  let all_files: Vec<String> = self.files.keys().cloned().collect();
111
- return Err(anyhow::anyhow!("Could not find file in tree! Looking for: {}. All files: {:?}", absolute_path, all_files));
114
+ return Err(anyhow::anyhow!(
115
+ "Could not find file in tree! Looking for: {}. All files: {:?}",
116
+ absolute_path,
117
+ all_files
118
+ ));
112
119
  }
113
120
  };
114
121
 
115
122
  let node_reader = node.read().await;
116
123
  let node_hash = node_reader.hash.clone();
117
124
 
125
+ info!(
126
+ "get_subtree_hash for path: {}, node_hash: {}",
127
+ absolute_path, node_hash
128
+ );
129
+
118
130
  Ok(node_hash)
119
131
  }
120
132
 
@@ -278,6 +290,12 @@ impl MerkleTree {
278
290
  ) -> Result<Vec<String>, anyhow::Error> {
279
291
  let mut files = Vec::new();
280
292
 
293
+ // 1. should check that this absolute path is actually a directory.
294
+ let file_node = self.files.get(absolute_path);
295
+ if file_node.is_none() {
296
+ return Err(anyhow::anyhow!("Could not find directory the in tree!"));
297
+ }
298
+
281
299
  for (file_path, f) in &self.files {
282
300
  if !file_path.contains(absolute_path) {
283
301
  continue;
@@ -390,8 +408,9 @@ impl MerkleTree {
390
408
  let new_node = MerkleNode::new(
391
409
  file_path.clone(),
392
410
  Some(ancestor.clone()),
393
- &self.git_ignored_files,
411
+ &self.git_ignored_files_and_dirs,
394
412
  &absolute_root_path.as_str(),
413
+ self.is_git_repo
395
414
  )
396
415
  .await;
397
416
  ancestor.write().await.attach_child(new_node.clone()).await;
@@ -407,8 +426,9 @@ impl MerkleTree {
407
426
  let first_child = MerkleNode::new(
408
427
  first_child_path.clone(),
409
428
  Some(ancestor.clone()),
410
- &self.git_ignored_files,
429
+ &self.git_ignored_files_and_dirs,
411
430
  &absolute_root_path.as_str(),
431
+ self.is_git_repo
412
432
  )
413
433
  .await;
414
434
 
@@ -696,13 +716,14 @@ impl MerkleNode {
696
716
  parent: ParentPtr,
697
717
  ignored_files: &IgnoredFiles,
698
718
  absolute_root_path: &str,
719
+ is_git_repo: bool,
699
720
  ) -> MerkleNodePtr {
700
- // check if the root is a git directory.
701
- let is_git_repo =
702
- match git_utils::is_git_directory(absolute_root_path).await {
703
- Ok(is_git_repo) => is_git_repo,
704
- Err(e) => false,
705
- };
721
+ // // check if the root is a git directory.
722
+ // let is_git_repo =
723
+ // match git_utils::is_git_directory(absolute_root_path).await {
724
+ // Ok(is_git_repo) => is_git_repo,
725
+ // Err(_e) => false,
726
+ // };
706
727
  let bypass_git = !is_git_repo;
707
728
 
708
729
  MerkleNode::construct_node(
@@ -720,13 +741,8 @@ impl MerkleNode {
720
741
  parent: ParentPtr,
721
742
  ignored_files: &IgnoredFiles,
722
743
  absolute_root_path: &str,
744
+ is_git_repo: bool,
723
745
  ) -> MerkleNodePtr {
724
- // check if the root is a git directory.
725
- let is_git_repo =
726
- match git_utils::is_git_directory(absolute_root_path).await {
727
- Ok(is_git_repo) => is_git_repo,
728
- Err(_e) => false,
729
- };
730
746
  let bypass_git = !is_git_repo;
731
747
 
732
748
  info!(
@@ -783,26 +799,9 @@ impl MerkleNode {
783
799
  )));
784
800
  }
785
801
 
786
- // check if the directory is git ignored
787
- let is_git_ignored =
788
- match git_utils::is_git_ignored(absolute_root_path, path_str.as_str())
789
- .await
790
- {
791
- Ok(is_git_ignored) => is_git_ignored,
792
- Err(e) => {
793
- return Arc::new(RwLock::new(MerkleNode::empty_node(
794
- Some(absolute_file_or_directory),
795
- Some(e.to_string()),
796
- )));
797
- }
798
- };
802
+ let is_git_ignored_dir = ignored_files.contains(&path_str);
799
803
 
800
- if is_git_ignored && !bypass_git {
801
- // println!("skipping directory: {}", path_str);
802
- tracing::info!(
803
- "skipping directory because its git ignored: {}",
804
- path_str
805
- );
804
+ if is_git_ignored_dir && !bypass_git {
806
805
  return Arc::new(RwLock::new(MerkleNode::empty_node(
807
806
  Some(absolute_file_or_directory),
808
807
  Some("Directory is git ignored!".to_string()),
@@ -891,18 +890,10 @@ impl MerkleNode {
891
890
  false => {}
892
891
  }
893
892
 
894
- // read the file_content to a buffer
895
- let file_content = match tokio::fs::read(absolute_file_path).await {
896
- Ok(content) => content,
897
- Err(e) => {
898
- return Err(format!("Could not read file! {}", e.to_string()));
899
- }
900
- };
901
-
902
893
  // check if the file passes runtime checks.
903
894
  match file_utils::is_good_file_runtime_check(
904
895
  absolute_file_path,
905
- &file_content,
896
+ // &file_content,
906
897
  )
907
898
  .await
908
899
  {
@@ -912,15 +903,14 @@ impl MerkleNode {
912
903
  }
913
904
  }
914
905
 
915
- let file_content = match std::str::from_utf8(&file_content) {
916
- Ok(content) => content,
917
- Err(e) => {
918
- return Err(format!(
919
- "UTF8 Failure. Could not convert file content to string! {}",
920
- e.to_string()
921
- ));
922
- }
923
- };
906
+ // read the file_content to a buffer
907
+ let file_content =
908
+ match file_utils::read_string_without_bom(absolute_file_path).await {
909
+ Ok(content) => content,
910
+ Err(e) => {
911
+ return Err(format!("Could not read file! {}", e.to_string()));
912
+ }
913
+ };
924
914
 
925
915
  let file_hash = compute_hash(&file_content);
926
916
  let node = MerkleNode {
@@ -976,15 +966,51 @@ impl MerkleNode {
976
966
 
977
967
  async fn compute_branch_hash(children: &[MerkleNodePtr]) -> String {
978
968
  let mut hasher = sha2::Sha256::new();
969
+ let mut names_and_hashes = vec![];
970
+ let mut non_zero_children = 0;
971
+
979
972
  for child in children {
980
973
  // check if it is an error node
981
974
  let child_reader = child.read().await;
982
- if let NodeType::ErrorNode(_) = &child_reader.node_type {
975
+
976
+ match &child_reader.node_type {
977
+ NodeType::File(file_name) => {
978
+ non_zero_children += 1;
979
+ names_and_hashes.push((file_name.clone(), child_reader.hash.clone()));
980
+ }
981
+ NodeType::Branch((file_name, _)) => {
982
+ let hash = child_reader.hash.clone();
983
+ if hash == "" {
984
+ continue;
985
+ }
986
+
987
+ non_zero_children += 1;
988
+ names_and_hashes.push((file_name.clone(), hash));
989
+ }
990
+ NodeType::ErrorNode(_) => {
991
+ continue;
992
+ }
993
+ }
994
+ }
995
+
996
+ // sort the list of names and hashes by the hashes!!
997
+ names_and_hashes
998
+ .sort_by(|a, b| a.1.to_lowercase().cmp(&b.1.to_lowercase()));
999
+
1000
+ for (name, hash) in names_and_hashes {
1001
+ if hash == "" {
983
1002
  continue;
984
1003
  }
1004
+ info!("name: {}, hash: {}", name, hash);
1005
+ hasher.update(hash);
1006
+ }
985
1007
 
986
- hasher.update(child_reader.hash.as_bytes());
1008
+ if non_zero_children == 0 {
1009
+ // this means that the branch is empty.
1010
+ // we should return an empty string.
1011
+ return "".to_string();
987
1012
  }
1013
+
988
1014
  let result = hasher.finalize();
989
1015
  format!("{:x}", result)
990
1016
  }
@@ -43,8 +43,9 @@ mod tests {
43
43
  // let path = Path::new(&temp_dir_path);
44
44
 
45
45
  // Test construct_merkle_tree() function
46
+ let new_set = std::collections::HashSet::<String>::new();
46
47
  let tree =
47
- MerkleTree::construct_merkle_tree(temp_dir_path.clone()).await;
48
+ MerkleTree::construct_merkle_tree(temp_dir_path.clone(), new_set, false).await;
48
49
  let mut tree = match tree {
49
50
  Ok(tree) => {
50
51
  assert_eq!(tree.files.len(), 2);