@anysphere/file-service 0.0.0-a0aa43e6 → 0.0.0-a48d29cb

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/Cargo.toml CHANGED
@@ -17,6 +17,9 @@ tempfile = "3.8.0"
17
17
  anyhow = "1.0.75"
18
18
  tonic = "0.9.2"
19
19
  prost = "0.11.9"
20
+ tracing = "0.1.37"
21
+ tracing-subscriber = "0.3.17"
22
+ tracing-appender = "0.2.2"
20
23
 
21
24
  [build-dependencies]
22
25
  napi-build = "2.0.1"
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@anysphere/file-service",
3
- "version": "0.0.0-a0aa43e6",
3
+ "version": "0.0.0-a48d29cb",
4
4
  "main": "index.js",
5
5
  "types": "index.d.ts",
6
6
  "napi": {
@@ -35,11 +35,11 @@
35
35
  "version": "napi version"
36
36
  },
37
37
  "optionalDependencies": {
38
- "@anysphere/file-service-win32-x64-msvc": "0.0.0-a0aa43e6",
39
- "@anysphere/file-service-darwin-x64": "0.0.0-a0aa43e6",
40
- "@anysphere/file-service-linux-x64-gnu": "0.0.0-a0aa43e6",
41
- "@anysphere/file-service-darwin-arm64": "0.0.0-a0aa43e6",
42
- "@anysphere/file-service-win32-arm64-msvc": "0.0.0-a0aa43e6",
43
- "@anysphere/file-service-darwin-universal": "0.0.0-a0aa43e6"
38
+ "@anysphere/file-service-win32-x64-msvc": "0.0.0-a48d29cb",
39
+ "@anysphere/file-service-darwin-x64": "0.0.0-a48d29cb",
40
+ "@anysphere/file-service-linux-x64-gnu": "0.0.0-a48d29cb",
41
+ "@anysphere/file-service-darwin-arm64": "0.0.0-a48d29cb",
42
+ "@anysphere/file-service-win32-arm64-msvc": "0.0.0-a48d29cb",
43
+ "@anysphere/file-service-darwin-universal": "0.0.0-a48d29cb"
44
44
  }
45
45
  }
package/src/file_utils.rs CHANGED
@@ -12,9 +12,8 @@ pub fn is_in_bad_dir(file_path: &Path) -> Result<bool, Error> {
12
12
  let item_path = file_path
13
13
  .to_str()
14
14
  .ok_or(anyhow::anyhow!("Failed to convert path to string"))?;
15
- let is_bad_dir = (item_path.contains("node_modules")
16
- || item_path.contains(".git"))
17
- && !(item_path.ends_with(".git") || item_path.ends_with("node_modules"));
15
+ let is_bad_dir =
16
+ item_path.contains("node_modules") || item_path.contains(".git");
18
17
  Ok(is_bad_dir)
19
18
  }
20
19
 
@@ -38,14 +37,14 @@ pub fn is_good_file(file_path: &Path) -> Result<(), Error> {
38
37
 
39
38
  match file_name {
40
39
  "package-lock.json" | "pnpm-lock.yaml" | "yarn.lock" | "composer.lock"
41
- | "Gemfile.lock" => {
40
+ | "Gemfile.lock" | "bun.lockb" => {
42
41
  return Err(anyhow::anyhow!("File is just a lock file"));
43
42
  }
44
43
  _ => {}
45
44
  }
46
45
 
47
46
  match extension {
48
- "lock" | "bak" | "tmp" | "bin" | "exe" | "dll" | "so" => {
47
+ "lock" | "bak" | "tmp" | "bin" | "exe" | "dll" | "so" | "lockb" => {
49
48
  return Err(anyhow::anyhow!("File is just a lock file"));
50
49
  }
51
50
  _ => {}
package/src/git_utils.rs CHANGED
@@ -1,10 +1,10 @@
1
1
  use std::collections::HashSet;
2
- use std::path::PathBuf;
3
2
  use std::process::Command;
4
3
 
5
4
  pub fn list_ignored_files(
6
5
  workspace_root_path: &str,
7
- ) -> Result<HashSet<PathBuf>, Box<dyn std::error::Error>> {
6
+ should_return_absolute_paths: bool,
7
+ ) -> Result<HashSet<String>, Box<dyn std::error::Error>> {
8
8
  let mut gitignored_files = HashSet::new();
9
9
 
10
10
  let commands = vec![
@@ -15,12 +15,66 @@ pub fn list_ignored_files(
15
15
  "--ignored",
16
16
  "--exclude-standard",
17
17
  ],
18
+ // FIXME(sualeh): this is super sketchy and might totally break in like a bazillion ways. i dont like it.
19
+ vec![
20
+ "sh",
21
+ "-c",
22
+ "git submodule foreach --quiet 'git ls-files --others --ignored --exclude-standard | sed \"s|^|$path/|\"'",
23
+ ],
24
+ ];
25
+
26
+ for command in commands {
27
+ let output = Command::new(&command[0])
28
+ .args(&command[1..])
29
+ .current_dir(workspace_root_path)
30
+ .output()?;
31
+
32
+ if output.status.success() {
33
+ let files = String::from_utf8(output.stdout)?
34
+ .lines()
35
+ .filter(|line| !line.is_empty())
36
+ .map(|line| {
37
+ if should_return_absolute_paths {
38
+ let mut path = std::path::PathBuf::from(workspace_root_path);
39
+ path.push(line);
40
+
41
+ match path.canonicalize() {
42
+ Ok(canonical_path) => {
43
+ canonical_path.to_string_lossy().into_owned()
44
+ }
45
+ Err(_) => String::from(line),
46
+ }
47
+ } else {
48
+ String::from(line)
49
+ }
50
+ })
51
+ .collect::<HashSet<_>>();
52
+
53
+ gitignored_files.extend(files);
54
+ }
55
+ }
56
+
57
+ Ok(gitignored_files)
58
+ }
59
+
60
+ pub fn list_ignored_files_with_absolute_paths(
61
+ workspace_root_path: &str,
62
+ ) -> Result<HashSet<String>, Box<dyn std::error::Error>> {
63
+ let mut gitignored_files = HashSet::new();
64
+
65
+ let commands = vec![
18
66
  vec![
19
67
  "git",
20
- "submodule",
21
- "foreach",
22
- "--quiet",
23
- "git ls-files --others --ignored --exclude-standard | sed 's|^|$path/|'",
68
+ "ls-files",
69
+ "--others",
70
+ "--ignored",
71
+ "--exclude-standard",
72
+ ],
73
+ // FIXME(sualeh): this is super sketchy and might totally break in like a bazillion ways. i dont like it.
74
+ vec![
75
+ "sh",
76
+ "-c",
77
+ "git submodule foreach --quiet 'git ls-files --others --ignored --exclude-standard | sed \"s|^|$path/|\"'",
24
78
  ],
25
79
  ];
26
80
 
@@ -34,7 +88,7 @@ pub fn list_ignored_files(
34
88
  let files = String::from_utf8(output.stdout)?
35
89
  .lines()
36
90
  .filter(|line| !line.is_empty())
37
- .map(|line| PathBuf::from(line))
91
+ .map(|line| String::from(line))
38
92
  .collect::<HashSet<_>>();
39
93
 
40
94
  gitignored_files.extend(files);
@@ -57,6 +111,18 @@ pub async fn is_git_ignored(
57
111
  Ok(output.status.success())
58
112
  }
59
113
 
114
+ pub async fn is_git_directory(
115
+ workspace_root_path: &str,
116
+ ) -> Result<bool, anyhow::Error> {
117
+ let output = tokio::process::Command::new("git")
118
+ .args(&["rev-parse", "--is-inside-work-tree"])
119
+ .current_dir(workspace_root_path)
120
+ .output()
121
+ .await?;
122
+
123
+ Ok(output.status.success())
124
+ }
125
+
60
126
  #[cfg(test)]
61
127
  mod tests {
62
128
  use super::*;
@@ -66,7 +132,8 @@ mod tests {
66
132
  #[test]
67
133
  fn test_no_ignored_files() {
68
134
  let dir = tempfile::tempdir().unwrap();
69
- let gitignored_files = list_ignored_files(dir.path().to_str().unwrap()).unwrap();
135
+ let gitignored_files =
136
+ list_ignored_files(dir.path().to_str().unwrap(), false).unwrap();
70
137
  Command::new("git")
71
138
  .args(&["init"])
72
139
  .current_dir(dir.path())
@@ -92,13 +159,14 @@ mod tests {
92
159
  .current_dir(dir.path())
93
160
  .output()
94
161
  .unwrap();
95
- let gitignored_files = list_ignored_files(dir.path().to_str().unwrap()).unwrap();
162
+ let gitignored_files =
163
+ list_ignored_files(dir.path().to_str().unwrap(), false).unwrap();
96
164
  println!(
97
165
  "ignored files for test_one_ignored_file: {:?}",
98
166
  gitignored_files
99
167
  );
100
168
  // assert_eq!(gitignored_files.len(), 1);
101
- assert!(gitignored_files.contains(&PathBuf::from("ignored.txt")));
169
+ assert!(gitignored_files.contains(&String::from("ignored.txt")));
102
170
  }
103
171
 
104
172
  #[test]
@@ -121,19 +189,83 @@ mod tests {
121
189
  .current_dir(dir.path())
122
190
  .output()
123
191
  .unwrap();
124
- let gitignored_files = list_ignored_files(dir.path().to_str().unwrap()).unwrap();
192
+ let gitignored_files =
193
+ list_ignored_files(dir.path().to_str().unwrap(), false).unwrap();
125
194
  println!(
126
195
  "ignored files for test_multiple_ignored_files: {:?}",
127
196
  gitignored_files
128
197
  );
129
198
  // assert_eq!(gitignored_files.len(), 2);
130
- assert!(gitignored_files.contains(&PathBuf::from("ignored1.txt")));
131
- assert!(gitignored_files.contains(&PathBuf::from("ignored2.txt")));
199
+ assert!(gitignored_files.contains(&String::from("ignored1.txt")));
200
+ assert!(gitignored_files.contains(&String::from("ignored2.txt")));
201
+ }
202
+
203
+ #[test]
204
+ fn test_git_submodule_ignored_files() {
205
+ let dir = tempfile::tempdir().unwrap();
206
+ let submodule_path = dir.path().join("submodule");
207
+ std::fs::create_dir(&submodule_path).unwrap();
208
+
209
+ let o = Command::new("git")
210
+ .args(&["init"])
211
+ .current_dir(&submodule_path)
212
+ .output()
213
+ .unwrap();
214
+ println!("git init output: {:?}", o);
215
+
216
+ let file_path = submodule_path.join("ignored.txt");
217
+ let mut file = File::create(&file_path).unwrap();
218
+ writeln!(file, "This is an ignored file.").unwrap();
219
+
220
+ let file2 = submodule_path.join("ignored2.txt");
221
+ let mut file = File::create(&file2).unwrap();
222
+ writeln!(file, "This is another ignored file.").unwrap();
223
+
224
+ let gitignore_path = submodule_path.join(".gitignore");
225
+ let mut gitignore = File::create(&gitignore_path).unwrap();
226
+ writeln!(gitignore, "*.txt").unwrap();
227
+
228
+ let o = Command::new("git")
229
+ .args(&["init"])
230
+ .current_dir(dir.path())
231
+ .output()
232
+ .unwrap();
233
+ println!("git init output: {:?}", o);
234
+
235
+ // make a commit in the submodule
236
+ let o = Command::new("git")
237
+ .args(&["add", "."])
238
+ .current_dir(&submodule_path)
239
+ .output()
240
+ .unwrap();
241
+ println!("git add output: {:?}", o);
242
+ let o = Command::new("git")
243
+ .args(&["commit", "-m", "initial commit"])
244
+ .current_dir(&submodule_path)
245
+ .output()
246
+ .unwrap();
247
+ println!("git commit output: {:?}", o);
248
+
249
+ let o = Command::new("git")
250
+ .args(&["submodule", "add", "./submodule"])
251
+ .current_dir(dir.path())
252
+ .output()
253
+ .unwrap();
254
+ println!("git submodule add output: {:?}", o);
255
+
256
+ let gitignored_files =
257
+ list_ignored_files(dir.path().to_str().unwrap(), false).unwrap();
258
+ println!(
259
+ "ignored files for test_git_submodule_ignored_files: {:?}",
260
+ gitignored_files
261
+ );
262
+ assert!(gitignored_files.contains(&String::from("submodule/ignored.txt")));
263
+ assert!(gitignored_files.contains(&String::from("submodule/ignored2.txt")));
132
264
  }
133
265
 
134
266
  #[test]
135
267
  fn test_multiple_ignored_files_in_current_dir() {
136
- let gitignored_files = list_ignored_files(".").unwrap();
268
+ let gitignored_files = list_ignored_files(".", false).unwrap();
137
269
  assert!(gitignored_files.len() > 1);
138
270
 
139
271
  // print a sample of the ignored files
@@ -147,7 +279,6 @@ mod tests {
147
279
  }
148
280
  }
149
281
 
150
-
151
282
  #[tokio::test]
152
283
  async fn test_file_not_ignored() {
153
284
  let dir = tempfile::tempdir().unwrap();
@@ -160,7 +291,10 @@ mod tests {
160
291
  .current_dir(dir.path())
161
292
  .output()
162
293
  .unwrap();
163
- let is_ignored = is_git_ignored(dir.path().to_str().unwrap(), "not_ignored.txt").await.unwrap();
294
+ let is_ignored =
295
+ is_git_ignored(dir.path().to_str().unwrap(), "not_ignored.txt")
296
+ .await
297
+ .unwrap();
164
298
  assert_eq!(is_ignored, false);
165
299
  }
166
300
 
@@ -180,7 +314,10 @@ mod tests {
180
314
  .current_dir(dir.path())
181
315
  .output()
182
316
  .unwrap();
183
- let is_ignored = is_git_ignored(dir.path().to_str().unwrap(), "ignored.txt").await.unwrap();
317
+ let is_ignored =
318
+ is_git_ignored(dir.path().to_str().unwrap(), "ignored.txt")
319
+ .await
320
+ .unwrap();
184
321
  assert_eq!(is_ignored, true);
185
322
  }
186
323
 
@@ -200,8 +337,10 @@ mod tests {
200
337
  .current_dir(dir.path())
201
338
  .output()
202
339
  .unwrap();
203
- let is_ignored = is_git_ignored(dir.path().to_str().unwrap(), "ignored.txt").await.unwrap();
340
+ let is_ignored =
341
+ is_git_ignored(dir.path().to_str().unwrap(), "ignored.txt")
342
+ .await
343
+ .unwrap();
204
344
  assert_eq!(is_ignored, true);
205
345
  }
206
-
207
346
  }
package/src/lib.rs CHANGED
@@ -1,4 +1,5 @@
1
1
  #![deny(clippy::all)]
2
+ #![deny(unsafe_op_in_unsafe_fn)]
2
3
  pub mod file_utils;
3
4
  pub mod git_utils;
4
5
  pub mod merkle_tree;
@@ -6,6 +7,9 @@ pub mod merkle_tree;
6
7
  use std::vec;
7
8
 
8
9
  use merkle_tree::{LocalConstruction, MerkleTree};
10
+ use tracing::{info, Level};
11
+ use tracing_appender::rolling::{RollingFileAppender, Rotation};
12
+ use tracing_subscriber::fmt;
9
13
 
10
14
  #[macro_use]
11
15
  extern crate napi_derive;
@@ -14,15 +18,35 @@ extern crate napi_derive;
14
18
  pub struct MerkleClient {
15
19
  tree: MerkleTree,
16
20
  root_directory: String,
21
+ _guard: tracing_appender::non_blocking::WorkerGuard,
22
+ }
23
+
24
+ pub fn init_logger() -> tracing_appender::non_blocking::WorkerGuard {
25
+ let file_appender =
26
+ RollingFileAppender::new(Rotation::NEVER, "./", "rust_log.txt");
27
+ let (non_blocking, _guard) = tracing_appender::non_blocking(file_appender);
28
+ let subscriber = fmt::Subscriber::builder()
29
+ .with_max_level(Level::TRACE)
30
+ .with_writer(non_blocking)
31
+ .with_ansi(false)
32
+ .with_line_number(true)
33
+ .finish();
34
+
35
+ let _ = tracing::subscriber::set_global_default(subscriber);
36
+
37
+ _guard
17
38
  }
18
39
 
19
40
  #[napi]
20
41
  impl MerkleClient {
21
42
  #[napi(constructor)]
22
43
  pub fn new(root_directory: String) -> MerkleClient {
44
+ let _guard = init_logger();
45
+
23
46
  MerkleClient {
24
47
  tree: MerkleTree::empty_tree(),
25
48
  root_directory,
49
+ _guard,
26
50
  }
27
51
  }
28
52
 
@@ -31,7 +55,10 @@ impl MerkleClient {
31
55
  // 1. compute the merkle tree
32
56
  // 2. update the backend
33
57
  // 3. sync with the remote
34
- self.compute_merkle_tree().await?;
58
+ info!("Merkle tree compute started!");
59
+ unsafe {
60
+ self.compute_merkle_tree().await?;
61
+ }
35
62
 
36
63
  Ok(())
37
64
  }
@@ -47,6 +74,17 @@ impl MerkleClient {
47
74
  let t =
48
75
  MerkleTree::construct_merkle_tree(self.root_directory.clone()).await;
49
76
 
77
+ let files = self.tree.get_all_files().await;
78
+
79
+ match files {
80
+ Ok(files) => {
81
+ info!("files: {:?}", files);
82
+ }
83
+ Err(e) => {
84
+ info!("Error in get_all_files: {:?}", e);
85
+ }
86
+ }
87
+
50
88
  match t {
51
89
  Ok(tree) => {
52
90
  self.tree = tree;
@@ -76,7 +114,9 @@ impl MerkleClient {
76
114
  ) -> Result<String, napi::Error> {
77
115
  let absolute_path =
78
116
  std::path::Path::new(&self.root_directory).join(relative_path);
79
- let hash = self.tree.get_subtree_hash(absolute_path).await;
117
+ let canonical_path = absolute_path.canonicalize().unwrap();
118
+
119
+ let hash = self.tree.get_subtree_hash(canonical_path).await;
80
120
 
81
121
  match hash {
82
122
  Ok(hash) => Ok(hash),
@@ -104,8 +144,10 @@ impl MerkleClient {
104
144
  &self,
105
145
  relative_path: String,
106
146
  ) -> Result<i32, napi::Error> {
107
- let absolute_path =
108
- std::path::Path::new(&self.root_directory).join(relative_path);
147
+ let absolute_path = std::path::Path::new(&self.root_directory)
148
+ .join(relative_path)
149
+ .canonicalize()?;
150
+
109
151
  let num = self
110
152
  .tree
111
153
  .get_num_embeddable_files_in_subtree(absolute_path)
@@ -167,7 +209,6 @@ impl MerkleClient {
167
209
 
168
210
  let ret = vec![file];
169
211
  let ret = ret.into_iter().chain(path.into_iter()).collect::<Vec<_>>();
170
-
171
212
  Ok(ret)
172
213
  }
173
214
  Err(e) => Err(napi::Error::new(
@@ -183,17 +224,16 @@ impl MerkleClient {
183
224
  &self,
184
225
  absolute_file_path: String,
185
226
  ) -> Result<Vec<String>, napi::Error> {
186
- // let spline = self.tree.get_spline(absolute_file_path).await;
187
-
188
- return Ok(vec![]);
227
+ let absolute_path_str = absolute_file_path.as_str();
228
+ let spline = self.tree.get_spline(absolute_path_str).await;
189
229
 
190
- // match spline {
191
- // Ok(spline) => Ok(spline),
192
- // Err(e) => Err(napi::Error::new(
193
- // napi::Status::Unknown,
194
- // format!("Error in get_spline: {:?}", e),
195
- // )),
196
- // }
230
+ match spline {
231
+ Ok(spline) => Ok(spline),
232
+ Err(e) => Err(napi::Error::new(
233
+ napi::Status::Unknown,
234
+ format!("Error in get_spline: {:?}", e),
235
+ )),
236
+ }
197
237
  }
198
238
 
199
239
  #[napi]
@@ -1,11 +1,11 @@
1
+ use crate::git_utils;
1
2
  use crate::merkle_tree::{
2
3
  File, MerkleNode, MerkleNodePtr, NodeType, PinnedFuture,
3
4
  };
4
5
 
5
6
  use super::{LocalConstruction, MerkleTree};
6
- use std::collections::BTreeMap;
7
- use std::path::PathBuf;
8
- use std::{collections::HashMap, path::Path, sync::Arc};
7
+ use std::collections::{BTreeMap, HashSet};
8
+ use std::path::{Path, PathBuf};
9
9
  use tonic::async_trait;
10
10
 
11
11
  #[async_trait]
@@ -29,20 +29,36 @@ impl LocalConstruction for MerkleTree {
29
29
  /// 3. construct merkle tree
30
30
  /// 4. return merkle tree
31
31
  async fn construct_merkle_tree(
32
- root_directory: String,
32
+ absolute_path_to_root_directory: String,
33
33
  ) -> Result<MerkleTree, anyhow::Error> {
34
- let path = PathBuf::from(root_directory.clone());
34
+ let path = PathBuf::from(absolute_path_to_root_directory.clone());
35
35
  if !path.exists() {
36
36
  // FIXME: we should report this via a good logger.
37
37
  panic!("Root directory does not exist!");
38
38
  }
39
39
 
40
- let root_node = MerkleNode::new(path, None).await;
40
+ // 1. get all the gitignored files
41
+ let git_ignored_files = match git_utils::list_ignored_files(
42
+ absolute_path_to_root_directory.as_str(),
43
+ true,
44
+ ) {
45
+ Ok(git_ignored) => git_ignored,
46
+ Err(_e) => HashSet::new(),
47
+ };
48
+
49
+ let root_node = MerkleNode::new(
50
+ path,
51
+ None,
52
+ &git_ignored_files,
53
+ absolute_path_to_root_directory.as_str(),
54
+ )
55
+ .await;
41
56
  let mut mt = MerkleTree {
42
57
  root: root_node,
43
58
  files: BTreeMap::new(),
44
- root_path: root_directory,
59
+ root_path: absolute_path_to_root_directory,
45
60
  cursor: None,
61
+ git_ignored_files,
46
62
  };
47
63
 
48
64
  // we now iterate over all the nodes and add them to the hashmap
@@ -55,6 +71,7 @@ impl LocalConstruction for MerkleTree {
55
71
  let node_reader = node.read().await;
56
72
  match &node_reader.node_type {
57
73
  NodeType::Branch(n) => {
74
+ tracing::info!("Branch: {:?}", n.0);
58
75
  let children = &n.1;
59
76
  files.insert(n.0.clone(), File { node: node.clone() });
60
77
  for child in children {
@@ -63,6 +80,7 @@ impl LocalConstruction for MerkleTree {
63
80
  }
64
81
  NodeType::File(file_name) => {
65
82
  let f = File { node: node.clone() };
83
+ tracing::info!("File: {:?}", file_name);
66
84
  files.insert(file_name.clone(), f);
67
85
  }
68
86
  NodeType::ErrorNode(_) => {
@@ -74,6 +92,9 @@ impl LocalConstruction for MerkleTree {
74
92
 
75
93
  add_nodes_to_hashmap(&mt.root, &mut mt.files).await;
76
94
 
95
+ tracing::info!("Merkle tree compute finished!");
96
+ // tracing::info!("Merkle tree: {:?}", mt.files);
97
+
77
98
  Ok(mt)
78
99
  }
79
100
 
@@ -1,10 +1,13 @@
1
+ use crate::git_utils;
2
+
1
3
  use super::file_utils;
2
4
  use sha2::Digest;
3
- use std::collections::BTreeMap;
5
+ use std::collections::{BTreeMap, HashSet};
4
6
  use std::path::PathBuf;
5
7
  use std::{fs, path::Path, sync::Arc};
6
8
  use tokio::sync::RwLock;
7
9
  use tonic::async_trait;
10
+ use tracing::info;
8
11
  pub mod local_construction;
9
12
  pub mod test;
10
13
 
@@ -14,7 +17,8 @@ pub struct MerkleTree {
14
17
  root_path: String,
15
18
  root: MerkleNodePtr,
16
19
  files: BTreeMap<String, File>,
17
- cursor: Option<MerkleNodePtr>,
20
+ cursor: Option<usize>,
21
+ git_ignored_files: HashSet<String>,
18
22
  }
19
23
 
20
24
  #[derive(Debug)]
@@ -91,6 +95,7 @@ impl MerkleTree {
91
95
  files: BTreeMap::new(),
92
96
  root_path: "".to_string(),
93
97
  cursor: None,
98
+ git_ignored_files: HashSet::new(),
94
99
  }
95
100
  }
96
101
 
@@ -110,7 +115,8 @@ impl MerkleTree {
110
115
  let node = match self.files.get(&abs_string) {
111
116
  Some(file) => file.node.clone(),
112
117
  None => {
113
- return Err(anyhow::anyhow!("Could not find file in tree!"));
118
+ let all_files: Vec<String> = self.files.keys().cloned().collect();
119
+ return Err(anyhow::anyhow!("Could not find file in tree! Looking for: {}. All files: {:?}", abs_string, all_files));
114
120
  }
115
121
  };
116
122
 
@@ -156,7 +162,7 @@ impl MerkleTree {
156
162
  }
157
163
  };
158
164
 
159
- // TODO(sualeh): worth keeping this list sorted.
165
+ // TODO(sualeh): worth keeping this list sorted. its now a btree
160
166
 
161
167
  for (_, file) in &self.files {
162
168
  let file_reader = file.node.read().await;
@@ -234,83 +240,44 @@ impl MerkleTree {
234
240
  pub async fn get_next_file_to_embed(
235
241
  &mut self,
236
242
  ) -> Result<(String, Vec<String>), anyhow::Error> {
237
- // the plan is to do an in-order traversal of the tree.
238
-
239
- // first the edge case to deal with:
240
- // cursor == None
241
- if self.cursor.is_none() {
242
- // If the root is a file, return its name.
243
- if let NodeType::File(file_path) = &self.root.read().await.node_type {
244
- return Ok((file_path.clone(), vec![]));
243
+ // if the cursor is none, set it to 0
244
+ let cursor = match self.cursor {
245
+ Some(cursor) => cursor,
246
+ None => {
247
+ self.cursor = Some(0);
248
+ 0
245
249
  }
250
+ };
246
251
 
247
- // if the path is not empty, we can iterate till we find the first child.
248
- let mut potential_first_child = self.root.clone();
249
- let mut is_branch = true;
250
- let mut path = Vec::new();
251
-
252
- while is_branch {
253
- let node = {
254
- let potential_first_child_reader = potential_first_child.read().await;
255
- match &potential_first_child_reader.node_type {
256
- NodeType::Branch(branch) => branch.clone(),
257
- NodeType::File(_) => {
258
- return Err(anyhow::anyhow!(
259
- "get_next_file_to_embed: This should not happen! the branch happened to be file."
260
- ));
261
- }
262
- NodeType::ErrorNode(_) => {
263
- return Err(anyhow::anyhow!("Cursor is an error node!"));
264
- }
265
- }
266
- };
267
-
268
- let current_node_name = &node.0;
269
- let child_list = &node.1;
270
-
271
- if let Some(c) = child_list.first() {
272
- let c_reader = c.read().await;
273
-
274
- match &c_reader.node_type {
275
- NodeType::File(file_path) => {
276
- // must set the cursor!
277
- self.cursor = Some(c.clone());
278
-
279
- return Ok((file_path.clone(), path));
280
- }
281
- NodeType::Branch(_) => {
282
- potential_first_child = c.clone();
283
- is_branch = true;
252
+ // get the thing at the cursor. while we dont find a file, we keep incrementing the cursor.
253
+ let mut cursor = cursor;
254
+ loop {
255
+ // O(log n)
256
+ let file = match self.files.values().nth(cursor) {
257
+ Some(file) => file,
258
+ None => {
259
+ return Err(anyhow::anyhow!("Could not find file to embed!"));
260
+ }
261
+ };
284
262
 
285
- // add the path to the current node.
286
- path.push(current_node_name.clone());
287
- }
288
- NodeType::ErrorNode(_) => {
289
- return Err(anyhow::anyhow!("Cursor is an error node!"));
290
- }
291
- }
292
- } else {
293
- // If the root has no children, return an error.
294
- return Err(anyhow::anyhow!("Root has no children!"));
263
+ let file_reader = file.node.read().await;
264
+ match &file_reader.node_type {
265
+ NodeType::File(f) => {
266
+ // update the cursor.
267
+ self.cursor = Some(cursor + 1);
268
+ let spline = self.get_spline(f).await?;
269
+ return Ok((f.clone(), spline));
270
+ }
271
+ NodeType::Branch(_) => {
272
+ cursor += 1;
273
+ continue;
274
+ }
275
+ NodeType::ErrorNode(_) => {
276
+ cursor += 1;
277
+ continue;
295
278
  }
296
279
  }
297
280
  }
298
-
299
- // THE DEFAULT CASE:
300
- // we already have a cursor at a file.
301
-
302
- // UNWRAP checked and fine. see the none case above.
303
- let cursor_name = self.cursor.as_ref().unwrap();
304
- let cursor_reader = cursor_name.read().await;
305
-
306
- // invariant: you must be a file!!
307
-
308
- // everytime we get to a child list, we will add all the children to a fifo, and then pull from it as long as we need it.
309
-
310
- // algorithm:
311
- // 1.
312
-
313
- Err(anyhow::anyhow!("Could not find file to embed!"))
314
281
  }
315
282
 
316
283
  pub async fn get_all_dir_files_to_embed(
@@ -340,6 +307,54 @@ impl MerkleTree {
340
307
  Ok(files)
341
308
  }
342
309
 
310
+ // TODO(sualeh): i need tests for this!!
311
+ pub async fn get_spline(
312
+ &self,
313
+ absolute_path: &str,
314
+ ) -> Result<Vec<String>, anyhow::Error> {
315
+ info!("get_spline called with absolute_path: {}", absolute_path);
316
+ let mut files = Vec::new();
317
+
318
+ let current_node = match self.files.get(absolute_path) {
319
+ Some(node) => {
320
+ info!("Found node for absolute_path: {}", absolute_path);
321
+ node.node.clone()
322
+ }
323
+ None => {
324
+ info!("File not found for absolute_path: {}", absolute_path);
325
+ return Err(anyhow::anyhow!("File not found: {}", absolute_path));
326
+ }
327
+ };
328
+
329
+ let mut stack = Vec::new();
330
+ stack.push(current_node);
331
+
332
+ while let Some(node) = stack.pop() {
333
+ let parent = node.read().await.parent.clone();
334
+ if let Some(parent) = parent {
335
+ info!("Adding parent hash to files vector");
336
+ {
337
+ let parent_node = parent.read().await;
338
+ match &parent_node.node_type {
339
+ NodeType::File(file_name) => {
340
+ files.push(file_name.clone());
341
+ }
342
+ NodeType::Branch((branch_name, _)) => {
343
+ files.push(branch_name.clone());
344
+ }
345
+ _ => {
346
+ continue;
347
+ }
348
+ }
349
+ }
350
+
351
+ stack.push(parent);
352
+ }
353
+ }
354
+ info!("Returning files vector with {} elements", files.len());
355
+ Ok(files)
356
+ }
357
+
343
358
  /// creates a new node and attaches it to the current tree.
344
359
  /// SPEC:
345
360
  /// - you are allowed to create a file with a node such that the
@@ -375,12 +390,18 @@ impl MerkleTree {
375
390
  // 1. the path is empty. this means that the ancestor is the root.
376
391
  // 2. the path is non-empty. that means there exist a non-empty element btwn till the root.
377
392
 
393
+ let absolute_root_path = self.root_path.clone();
378
394
  let new_node = match path.len() {
379
395
  0 => {
380
396
  // this means that the ancestor is the root.
381
397
  // we need to create a new node and attach it to the ancestor.
382
- let new_node =
383
- MerkleNode::new(file_path.clone(), Some(ancestor.clone())).await;
398
+ let new_node = MerkleNode::new(
399
+ file_path.clone(),
400
+ Some(ancestor.clone()),
401
+ &self.git_ignored_files,
402
+ &absolute_root_path.as_str(),
403
+ )
404
+ .await;
384
405
  ancestor.write().await.attach_child(new_node.clone()).await;
385
406
  new_node
386
407
  }
@@ -391,9 +412,13 @@ impl MerkleTree {
391
412
  // UNSURE: not sure this is the correct thing to do but it is the fastest.
392
413
  // get the last thing that is not in the tree.
393
414
  let first_child_path = path.last().unwrap();
394
- let first_child =
395
- MerkleNode::new(first_child_path.clone(), Some(ancestor.clone()))
396
- .await;
415
+ let first_child = MerkleNode::new(
416
+ first_child_path.clone(),
417
+ Some(ancestor.clone()),
418
+ &self.git_ignored_files,
419
+ &absolute_root_path.as_str(),
420
+ )
421
+ .await;
397
422
 
398
423
  // TODO(sualeh): we should do an assertion check that the entire vec is contained here.
399
424
 
@@ -670,18 +695,62 @@ use std::future::Future;
670
695
  use std::pin::Pin;
671
696
 
672
697
  type PinnedFuture<'a, T> = Pin<Box<dyn Future<Output = T> + Send + 'a>>;
698
+ type IgnoredFiles = HashSet<String>;
673
699
 
674
700
  impl MerkleNode {
675
701
  /// please be careful using this.
676
702
  async fn __new_unchecked(
677
703
  file_or_directory: String,
678
704
  parent: ParentPtr,
705
+ ignored_files: &IgnoredFiles,
706
+ absolute_root_path: &str,
679
707
  ) -> MerkleNodePtr {
680
- MerkleNode::construct_node(Path::new(&file_or_directory), parent).await
708
+ // check if the root is a git directory.
709
+ let is_git_repo =
710
+ match git_utils::is_git_directory(absolute_root_path).await {
711
+ Ok(is_git_repo) => is_git_repo,
712
+ Err(e) => false,
713
+ };
714
+ let bypass_git = !is_git_repo;
715
+
716
+ MerkleNode::construct_node(
717
+ Path::new(&file_or_directory),
718
+ parent,
719
+ ignored_files,
720
+ absolute_root_path,
721
+ bypass_git,
722
+ )
723
+ .await
681
724
  }
682
725
 
683
- async fn new(file_or_directory: PathBuf, parent: ParentPtr) -> MerkleNodePtr {
684
- MerkleNode::construct_node(Path::new(&file_or_directory), parent).await
726
+ async fn new(
727
+ absolute_file_or_directory: PathBuf,
728
+ parent: ParentPtr,
729
+ ignored_files: &IgnoredFiles,
730
+ absolute_root_path: &str,
731
+ ) -> MerkleNodePtr {
732
+ // check if the root is a git directory.
733
+ let is_git_repo =
734
+ match git_utils::is_git_directory(absolute_root_path).await {
735
+ Ok(is_git_repo) => is_git_repo,
736
+ Err(_e) => false,
737
+ };
738
+ let bypass_git = !is_git_repo;
739
+
740
+ info!(
741
+ "constructing node for absolute_file_or_directory: {:?}",
742
+ absolute_file_or_directory
743
+ );
744
+ info!("bypass_git: {}, is_git_repo: {}", bypass_git, is_git_repo);
745
+
746
+ MerkleNode::construct_node(
747
+ Path::new(&absolute_file_or_directory),
748
+ parent,
749
+ ignored_files,
750
+ absolute_root_path,
751
+ bypass_git,
752
+ )
753
+ .await
685
754
  }
686
755
 
687
756
  /// NOT added to the tree by default.
@@ -692,38 +761,68 @@ impl MerkleNode {
692
761
  // let file_hash = self.files.get_mut(&file_path).unwrap();
693
762
 
694
763
  fn construct_node<'a>(
695
- file_or_directory: &'a Path,
764
+ absolute_file_or_directory: &'a Path,
696
765
  parent: ParentPtr,
766
+ ignored_files: &'a IgnoredFiles,
767
+ absolute_root_path: &'a str,
768
+ bypass_git: bool,
697
769
  ) -> PinnedFuture<'a, MerkleNodePtr> {
698
770
  Box::pin(async move {
699
771
  // check if it is a file
700
- let path_str = file_or_directory.to_str().unwrap().to_string();
701
- if file_or_directory.is_file() {
772
+ let path_str = absolute_file_or_directory.to_str().unwrap().to_string();
773
+ if absolute_file_or_directory.is_file() {
702
774
  return Arc::new(RwLock::new(
703
775
  MerkleNode::construct_file_node_or_error_node(
704
- file_or_directory,
776
+ absolute_file_or_directory,
705
777
  parent,
778
+ ignored_files,
706
779
  )
707
780
  .await,
708
781
  ));
709
782
  }
710
783
 
711
784
  // check if the directory fails the bad dir test.
712
- let is_bad_dir = file_utils::is_in_bad_dir(file_or_directory);
785
+ let is_bad_dir = file_utils::is_in_bad_dir(absolute_file_or_directory);
713
786
  if is_bad_dir.is_err() || is_bad_dir.unwrap_or(false) {
714
787
  // println!("skipping directory: {}", path_str);
715
788
  return Arc::new(RwLock::new(MerkleNode::empty_node(
716
- Some(file_or_directory),
789
+ Some(absolute_file_or_directory),
717
790
  Some("Directory is in bad dir!".to_string()),
718
791
  )));
719
792
  }
720
793
 
721
- let entries = fs::read_dir(file_or_directory);
794
+ // check if the directory is git ignored
795
+ let is_git_ignored =
796
+ match git_utils::is_git_ignored(absolute_root_path, path_str.as_str())
797
+ .await
798
+ {
799
+ Ok(is_git_ignored) => is_git_ignored,
800
+ Err(e) => {
801
+ return Arc::new(RwLock::new(MerkleNode::empty_node(
802
+ Some(absolute_file_or_directory),
803
+ Some(e.to_string()),
804
+ )));
805
+ }
806
+ };
807
+
808
+ if is_git_ignored && !bypass_git {
809
+ // println!("skipping directory: {}", path_str);
810
+ tracing::info!(
811
+ "skipping directory because its git ignored: {}",
812
+ path_str
813
+ );
814
+ return Arc::new(RwLock::new(MerkleNode::empty_node(
815
+ Some(absolute_file_or_directory),
816
+ Some("Directory is git ignored!".to_string()),
817
+ )));
818
+ }
819
+
820
+ let entries = fs::read_dir(absolute_file_or_directory);
722
821
  match entries {
723
822
  Ok(_) => (),
724
823
  Err(e) => {
725
824
  return Arc::new(RwLock::new(MerkleNode::empty_node(
726
- Some(file_or_directory),
825
+ Some(absolute_file_or_directory),
727
826
  Some(e.to_string()),
728
827
  )));
729
828
  }
@@ -743,13 +842,19 @@ impl MerkleNode {
743
842
  match entry {
744
843
  Ok(entry) => {
745
844
  children.push(
746
- MerkleNode::construct_node(&entry.path(), Some(node.clone()))
747
- .await,
845
+ MerkleNode::construct_node(
846
+ &entry.path(),
847
+ Some(node.clone()),
848
+ ignored_files,
849
+ absolute_root_path,
850
+ bypass_git,
851
+ )
852
+ .await,
748
853
  );
749
854
  }
750
855
  Err(e) => {
751
856
  children.push(Arc::new(RwLock::new(MerkleNode::empty_node(
752
- Some(file_or_directory),
857
+ Some(absolute_file_or_directory),
753
858
  Some(e.to_string()),
754
859
  ))));
755
860
  }
@@ -769,23 +874,33 @@ impl MerkleNode {
769
874
  }
770
875
 
771
876
  async fn construct_file_node(
772
- file_path: &Path,
877
+ absolute_file_path: &Path,
773
878
  parent: ParentPtr,
879
+ ignored_files: &IgnoredFiles,
774
880
  ) -> Result<MerkleNode, String> {
775
- let file_str = file_path
881
+ let file_str = absolute_file_path
776
882
  .to_str()
777
883
  .ok_or("Could not convert file path to string!")?
778
884
  .to_string();
779
885
  // first see if it passes the
780
- match file_utils::is_good_file(file_path) {
886
+ match file_utils::is_good_file(absolute_file_path) {
781
887
  Ok(_) => {}
782
888
  Err(e) => {
783
889
  return Err(format!("File failed runtime checks! {}", e.to_string()));
784
890
  }
785
891
  }
786
892
 
893
+ // check if the file is in the git ignore buffer.
894
+ // this is a bug right because we are not checking absoluteness here.
895
+ match ignored_files.contains(&file_str) {
896
+ true => {
897
+ return Err(format!("File is in git ignore buffer!"));
898
+ }
899
+ false => {}
900
+ }
901
+
787
902
  // read the file_content to a buffer
788
- let file_content = match tokio::fs::read(file_path).await {
903
+ let file_content = match tokio::fs::read(absolute_file_path).await {
789
904
  Ok(content) => content,
790
905
  Err(e) => {
791
906
  return Err(format!("Could not read file! {}", e.to_string()));
@@ -793,7 +908,11 @@ impl MerkleNode {
793
908
  };
794
909
 
795
910
  // check if the file passes runtime checks.
796
- match file_utils::is_good_file_runtime_check(file_path, &file_content).await
911
+ match file_utils::is_good_file_runtime_check(
912
+ absolute_file_path,
913
+ &file_content,
914
+ )
915
+ .await
797
916
  {
798
917
  Ok(_) => {}
799
918
  Err(e) => {
@@ -824,15 +943,22 @@ impl MerkleNode {
824
943
  }
825
944
 
826
945
  async fn construct_file_node_or_error_node(
827
- file_path: &Path,
946
+ absolute_file_path: &Path,
828
947
  parent: ParentPtr,
948
+ ignored_files: &IgnoredFiles,
829
949
  ) -> MerkleNode {
830
- let node = match MerkleNode::construct_file_node(file_path, parent).await {
950
+ let node = match MerkleNode::construct_file_node(
951
+ absolute_file_path,
952
+ parent,
953
+ ignored_files,
954
+ )
955
+ .await
956
+ {
831
957
  Ok(node) => node,
832
958
  Err(e) => {
833
959
  // println!("constructing error node. error: {}", e);
834
960
  // println!("file_path: {:?}", file_path);
835
- MerkleNode::empty_node(Some(file_path), Some(e))
961
+ MerkleNode::empty_node(Some(absolute_file_path), Some(e))
836
962
  }
837
963
  };
838
964
 
package/src/test.rs ADDED
@@ -0,0 +1,5 @@
1
+ #[cfg(test)]
2
+ mod tests {
3
+ use super::super::*;
4
+ use std::path::PathBuf;
5
+ }