@anysphere/file-service 0.0.0-e3fdf62d → 0.0.0-e7e53a0a
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Cargo.toml +15 -0
- package/build.rs +2 -0
- package/index.d.ts +7 -3
- package/package.json +10 -8
- package/src/file_utils.rs +141 -27
- package/src/git_utils.rs +168 -20
- package/src/lib.rs +201 -16
- package/src/merkle_tree/local_construction.rs +48 -9
- package/src/merkle_tree/mod.rs +332 -115
- package/src/merkle_tree/test.rs +2 -1
- package/src/test.rs +5 -0
package/src/merkle_tree/mod.rs
CHANGED
|
@@ -1,19 +1,26 @@
|
|
|
1
1
|
use super::file_utils;
|
|
2
2
|
use sha2::Digest;
|
|
3
|
+
use std::collections::{BTreeMap, HashSet};
|
|
3
4
|
use std::path::PathBuf;
|
|
4
|
-
use std::
|
|
5
|
+
use std::vec;
|
|
6
|
+
use std::{fs, path::Path, sync::Arc};
|
|
5
7
|
use tokio::sync::RwLock;
|
|
6
8
|
use tonic::async_trait;
|
|
9
|
+
use tracing::info;
|
|
10
|
+
|
|
7
11
|
pub mod local_construction;
|
|
8
12
|
pub mod test;
|
|
9
13
|
|
|
10
14
|
pub type MerkleNodePtr = Arc<RwLock<MerkleNode>>;
|
|
11
15
|
|
|
16
|
+
#[derive(Debug)]
|
|
12
17
|
pub struct MerkleTree {
|
|
13
18
|
root_path: String,
|
|
14
19
|
root: MerkleNodePtr,
|
|
15
|
-
files:
|
|
16
|
-
cursor: Option<
|
|
20
|
+
files: BTreeMap<String, File>,
|
|
21
|
+
cursor: Option<usize>,
|
|
22
|
+
git_ignored_files_and_dirs: HashSet<String>,
|
|
23
|
+
is_git_repo: bool,
|
|
17
24
|
}
|
|
18
25
|
|
|
19
26
|
#[derive(Debug)]
|
|
@@ -57,6 +64,8 @@ pub trait LocalConstruction {
|
|
|
57
64
|
|
|
58
65
|
async fn construct_merkle_tree(
|
|
59
66
|
root_directory: String,
|
|
67
|
+
git_ignored_files_and_dirs: HashSet<String>,
|
|
68
|
+
is_git_repo: bool
|
|
60
69
|
) -> Result<MerkleTree, anyhow::Error>;
|
|
61
70
|
|
|
62
71
|
async fn update_file(
|
|
@@ -87,21 +96,28 @@ impl MerkleTree {
|
|
|
87
96
|
pub fn empty_tree() -> MerkleTree {
|
|
88
97
|
MerkleTree {
|
|
89
98
|
root: Arc::new(RwLock::new(MerkleNode::empty_node(None, None))),
|
|
90
|
-
files:
|
|
99
|
+
files: BTreeMap::new(),
|
|
91
100
|
root_path: "".to_string(),
|
|
92
101
|
cursor: None,
|
|
102
|
+
git_ignored_files_and_dirs: HashSet::new(),
|
|
103
|
+
is_git_repo: false
|
|
93
104
|
}
|
|
94
105
|
}
|
|
95
106
|
|
|
107
|
+
#[tracing::instrument]
|
|
96
108
|
pub async fn get_subtree_hash(
|
|
97
109
|
&self,
|
|
98
|
-
|
|
110
|
+
absolute_path: &str,
|
|
99
111
|
) -> Result<String, anyhow::Error> {
|
|
100
|
-
let
|
|
101
|
-
let node = match self.files.get(path.to_str().unwrap()) {
|
|
112
|
+
let node = match self.files.get(absolute_path) {
|
|
102
113
|
Some(file) => file.node.clone(),
|
|
103
114
|
None => {
|
|
104
|
-
|
|
115
|
+
let all_files: Vec<String> = self.files.keys().cloned().collect();
|
|
116
|
+
return Err(anyhow::anyhow!(
|
|
117
|
+
"Could not find file in tree! Looking for: {}. All files: {:?}",
|
|
118
|
+
absolute_path,
|
|
119
|
+
all_files
|
|
120
|
+
));
|
|
105
121
|
}
|
|
106
122
|
};
|
|
107
123
|
|
|
@@ -132,6 +148,43 @@ impl MerkleTree {
|
|
|
132
148
|
Ok(count)
|
|
133
149
|
}
|
|
134
150
|
|
|
151
|
+
pub async fn get_num_embeddable_files_in_subtree(
|
|
152
|
+
&self,
|
|
153
|
+
absolute_path: PathBuf,
|
|
154
|
+
) -> Result<i32, anyhow::Error> {
|
|
155
|
+
let mut count = 0;
|
|
156
|
+
|
|
157
|
+
let absolute_path = match absolute_path.to_str() {
|
|
158
|
+
Some(s) => s.to_string(),
|
|
159
|
+
None => {
|
|
160
|
+
return Err(anyhow::anyhow!(
|
|
161
|
+
"get_num_embeddable_files_in_subtree: Failed to convert path to string"
|
|
162
|
+
))
|
|
163
|
+
}
|
|
164
|
+
};
|
|
165
|
+
|
|
166
|
+
// TODO(sualeh): worth keeping this list sorted. its now a btree
|
|
167
|
+
|
|
168
|
+
for (_, file) in &self.files {
|
|
169
|
+
let file_reader = file.node.read().await;
|
|
170
|
+
match &file_reader.node_type {
|
|
171
|
+
NodeType::File(file_name) => {
|
|
172
|
+
if file_name.contains(&absolute_path) {
|
|
173
|
+
count += 1;
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
NodeType::Branch(_) => {
|
|
177
|
+
continue;
|
|
178
|
+
}
|
|
179
|
+
NodeType::ErrorNode(_) => {
|
|
180
|
+
continue;
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
Ok(count)
|
|
186
|
+
}
|
|
187
|
+
|
|
135
188
|
pub async fn get_all_files(&self) -> Result<Vec<String>, anyhow::Error> {
|
|
136
189
|
let mut files = Vec::new();
|
|
137
190
|
|
|
@@ -188,83 +241,120 @@ impl MerkleTree {
|
|
|
188
241
|
pub async fn get_next_file_to_embed(
|
|
189
242
|
&mut self,
|
|
190
243
|
) -> Result<(String, Vec<String>), anyhow::Error> {
|
|
191
|
-
// the
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
}
|
|
200
|
-
|
|
201
|
-
// if the path is not empty, we can iterate till we find the first child.
|
|
202
|
-
let mut potential_first_child = self.root.clone();
|
|
203
|
-
let mut is_branch = true;
|
|
204
|
-
let mut path = Vec::new();
|
|
205
|
-
|
|
206
|
-
while is_branch {
|
|
207
|
-
let node = {
|
|
208
|
-
let potential_first_child_reader = potential_first_child.read().await;
|
|
209
|
-
match &potential_first_child_reader.node_type {
|
|
210
|
-
NodeType::Branch(branch) => branch.clone(),
|
|
211
|
-
NodeType::File(_) => {
|
|
212
|
-
return Err(anyhow::anyhow!(
|
|
213
|
-
"get_next_file_to_embed: This should not happen! the branch happened to be file."
|
|
214
|
-
));
|
|
215
|
-
}
|
|
216
|
-
NodeType::ErrorNode(_) => {
|
|
217
|
-
return Err(anyhow::anyhow!("Cursor is an error node!"));
|
|
218
|
-
}
|
|
219
|
-
}
|
|
220
|
-
};
|
|
244
|
+
// if the cursor is none, set it to 0
|
|
245
|
+
let cursor = match self.cursor {
|
|
246
|
+
Some(cursor) => cursor,
|
|
247
|
+
None => {
|
|
248
|
+
self.cursor = Some(0);
|
|
249
|
+
0
|
|
250
|
+
}
|
|
251
|
+
};
|
|
221
252
|
|
|
222
|
-
|
|
223
|
-
|
|
253
|
+
// get the thing at the cursor. while we dont find a file, we keep incrementing the cursor.
|
|
254
|
+
let mut cursor = cursor;
|
|
255
|
+
loop {
|
|
256
|
+
// O(log n)
|
|
257
|
+
let file = match self.files.values().nth(cursor) {
|
|
258
|
+
Some(file) => file,
|
|
259
|
+
None => {
|
|
260
|
+
return Err(anyhow::anyhow!("Could not find file to embed!"));
|
|
261
|
+
}
|
|
262
|
+
};
|
|
263
|
+
|
|
264
|
+
let file_reader = file.node.read().await;
|
|
265
|
+
match &file_reader.node_type {
|
|
266
|
+
NodeType::File(f) => {
|
|
267
|
+
// update the cursor.
|
|
268
|
+
self.cursor = Some(cursor + 1);
|
|
269
|
+
let spline = self.get_spline(f).await?;
|
|
270
|
+
return Ok((f.clone(), spline));
|
|
271
|
+
}
|
|
272
|
+
NodeType::Branch(_) => {
|
|
273
|
+
cursor += 1;
|
|
274
|
+
continue;
|
|
275
|
+
}
|
|
276
|
+
NodeType::ErrorNode(_) => {
|
|
277
|
+
cursor += 1;
|
|
278
|
+
continue;
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
}
|
|
224
283
|
|
|
225
|
-
|
|
226
|
-
|
|
284
|
+
pub async fn get_all_dir_files_to_embed(
|
|
285
|
+
&self,
|
|
286
|
+
absolute_path: &str,
|
|
287
|
+
) -> Result<Vec<String>, anyhow::Error> {
|
|
288
|
+
let mut files = Vec::new();
|
|
227
289
|
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
290
|
+
// 1. should check that this absolute path is actually a directory.
|
|
291
|
+
let file_node = self.files.get(absolute_path);
|
|
292
|
+
if file_node.is_none() {
|
|
293
|
+
return Err(anyhow::anyhow!("Could not find directory the in tree!"));
|
|
294
|
+
}
|
|
232
295
|
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
is_branch = true;
|
|
296
|
+
for (file_path, f) in &self.files {
|
|
297
|
+
if !file_path.contains(absolute_path) {
|
|
298
|
+
continue;
|
|
299
|
+
}
|
|
238
300
|
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
return Err(anyhow::anyhow!("Root has no children!"));
|
|
301
|
+
match f.node.read().await.node_type {
|
|
302
|
+
NodeType::File(_) => {
|
|
303
|
+
files.push(file_path.clone());
|
|
304
|
+
}
|
|
305
|
+
NodeType::Branch(_) => {
|
|
306
|
+
continue;
|
|
307
|
+
}
|
|
308
|
+
NodeType::ErrorNode(_) => {
|
|
309
|
+
continue;
|
|
249
310
|
}
|
|
250
311
|
}
|
|
251
312
|
}
|
|
252
313
|
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
// UNWRAP checked and fine. see the none case above.
|
|
257
|
-
let cursor_name = self.cursor.as_ref().unwrap();
|
|
258
|
-
let cursor_reader = cursor_name.read().await;
|
|
314
|
+
Ok(files)
|
|
315
|
+
}
|
|
259
316
|
|
|
260
|
-
|
|
317
|
+
// TODO(sualeh): i need tests for this!!
|
|
318
|
+
pub async fn get_spline(
|
|
319
|
+
&self,
|
|
320
|
+
absolute_path: &str,
|
|
321
|
+
) -> Result<Vec<String>, anyhow::Error> {
|
|
322
|
+
let mut files = Vec::new();
|
|
261
323
|
|
|
262
|
-
|
|
324
|
+
let current_node = match self.files.get(absolute_path) {
|
|
325
|
+
Some(node) => {
|
|
326
|
+
node.node.clone()
|
|
327
|
+
}
|
|
328
|
+
None => {
|
|
329
|
+
return Err(anyhow::anyhow!("File not found: {}", absolute_path));
|
|
330
|
+
}
|
|
331
|
+
};
|
|
263
332
|
|
|
264
|
-
|
|
265
|
-
|
|
333
|
+
let mut stack = Vec::new();
|
|
334
|
+
stack.push(current_node);
|
|
335
|
+
|
|
336
|
+
while let Some(node) = stack.pop() {
|
|
337
|
+
let parent = node.read().await.parent.clone();
|
|
338
|
+
if let Some(parent) = parent {
|
|
339
|
+
{
|
|
340
|
+
let parent_node = parent.read().await;
|
|
341
|
+
match &parent_node.node_type {
|
|
342
|
+
NodeType::File(file_name) => {
|
|
343
|
+
files.push(file_name.clone());
|
|
344
|
+
}
|
|
345
|
+
NodeType::Branch((branch_name, _)) => {
|
|
346
|
+
files.push(branch_name.clone());
|
|
347
|
+
}
|
|
348
|
+
_ => {
|
|
349
|
+
continue;
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
}
|
|
266
353
|
|
|
267
|
-
|
|
354
|
+
stack.push(parent);
|
|
355
|
+
}
|
|
356
|
+
}
|
|
357
|
+
Ok(files)
|
|
268
358
|
}
|
|
269
359
|
|
|
270
360
|
/// creates a new node and attaches it to the current tree.
|
|
@@ -302,12 +392,19 @@ impl MerkleTree {
|
|
|
302
392
|
// 1. the path is empty. this means that the ancestor is the root.
|
|
303
393
|
// 2. the path is non-empty. that means there exist a non-empty element btwn till the root.
|
|
304
394
|
|
|
395
|
+
let absolute_root_path = self.root_path.clone();
|
|
305
396
|
let new_node = match path.len() {
|
|
306
397
|
0 => {
|
|
307
398
|
// this means that the ancestor is the root.
|
|
308
399
|
// we need to create a new node and attach it to the ancestor.
|
|
309
|
-
let new_node =
|
|
310
|
-
|
|
400
|
+
let new_node = MerkleNode::new(
|
|
401
|
+
file_path.clone(),
|
|
402
|
+
Some(ancestor.clone()),
|
|
403
|
+
&self.git_ignored_files_and_dirs,
|
|
404
|
+
&absolute_root_path.as_str(),
|
|
405
|
+
self.is_git_repo
|
|
406
|
+
)
|
|
407
|
+
.await;
|
|
311
408
|
ancestor.write().await.attach_child(new_node.clone()).await;
|
|
312
409
|
new_node
|
|
313
410
|
}
|
|
@@ -318,9 +415,14 @@ impl MerkleTree {
|
|
|
318
415
|
// UNSURE: not sure this is the correct thing to do but it is the fastest.
|
|
319
416
|
// get the last thing that is not in the tree.
|
|
320
417
|
let first_child_path = path.last().unwrap();
|
|
321
|
-
let first_child =
|
|
322
|
-
|
|
323
|
-
|
|
418
|
+
let first_child = MerkleNode::new(
|
|
419
|
+
first_child_path.clone(),
|
|
420
|
+
Some(ancestor.clone()),
|
|
421
|
+
&self.git_ignored_files_and_dirs,
|
|
422
|
+
&absolute_root_path.as_str(),
|
|
423
|
+
self.is_git_repo
|
|
424
|
+
)
|
|
425
|
+
.await;
|
|
324
426
|
|
|
325
427
|
// TODO(sualeh): we should do an assertion check that the entire vec is contained here.
|
|
326
428
|
|
|
@@ -597,18 +699,58 @@ use std::future::Future;
|
|
|
597
699
|
use std::pin::Pin;
|
|
598
700
|
|
|
599
701
|
type PinnedFuture<'a, T> = Pin<Box<dyn Future<Output = T> + Send + 'a>>;
|
|
702
|
+
type IgnoredFiles = HashSet<String>;
|
|
600
703
|
|
|
601
704
|
impl MerkleNode {
|
|
602
705
|
/// please be careful using this.
|
|
603
706
|
async fn __new_unchecked(
|
|
604
707
|
file_or_directory: String,
|
|
605
708
|
parent: ParentPtr,
|
|
709
|
+
ignored_files: &IgnoredFiles,
|
|
710
|
+
absolute_root_path: &str,
|
|
711
|
+
is_git_repo: bool,
|
|
606
712
|
) -> MerkleNodePtr {
|
|
607
|
-
|
|
713
|
+
// // check if the root is a git directory.
|
|
714
|
+
// let is_git_repo =
|
|
715
|
+
// match git_utils::is_git_directory(absolute_root_path).await {
|
|
716
|
+
// Ok(is_git_repo) => is_git_repo,
|
|
717
|
+
// Err(_e) => false,
|
|
718
|
+
// };
|
|
719
|
+
let bypass_git = !is_git_repo;
|
|
720
|
+
|
|
721
|
+
MerkleNode::construct_node(
|
|
722
|
+
Path::new(&file_or_directory),
|
|
723
|
+
parent,
|
|
724
|
+
ignored_files,
|
|
725
|
+
absolute_root_path,
|
|
726
|
+
bypass_git,
|
|
727
|
+
)
|
|
728
|
+
.await
|
|
608
729
|
}
|
|
609
730
|
|
|
610
|
-
|
|
611
|
-
|
|
731
|
+
#[tracing::instrument]
|
|
732
|
+
async fn new(
|
|
733
|
+
absolute_file_or_directory: PathBuf,
|
|
734
|
+
parent: ParentPtr,
|
|
735
|
+
ignored_files: &IgnoredFiles,
|
|
736
|
+
absolute_root_path: &str,
|
|
737
|
+
is_git_repo: bool,
|
|
738
|
+
) -> MerkleNodePtr {
|
|
739
|
+
let bypass_git = !is_git_repo;
|
|
740
|
+
|
|
741
|
+
info!(
|
|
742
|
+
"constructing node for absolute_file_or_directory: {:?}",
|
|
743
|
+
absolute_file_or_directory
|
|
744
|
+
);
|
|
745
|
+
|
|
746
|
+
MerkleNode::construct_node(
|
|
747
|
+
Path::new(&absolute_file_or_directory),
|
|
748
|
+
parent,
|
|
749
|
+
ignored_files,
|
|
750
|
+
absolute_root_path,
|
|
751
|
+
bypass_git,
|
|
752
|
+
)
|
|
753
|
+
.await
|
|
612
754
|
}
|
|
613
755
|
|
|
614
756
|
/// NOT added to the tree by default.
|
|
@@ -619,38 +761,58 @@ impl MerkleNode {
|
|
|
619
761
|
// let file_hash = self.files.get_mut(&file_path).unwrap();
|
|
620
762
|
|
|
621
763
|
fn construct_node<'a>(
|
|
622
|
-
|
|
764
|
+
absolute_file_or_directory: &'a Path,
|
|
623
765
|
parent: ParentPtr,
|
|
766
|
+
ignored_files: &'a IgnoredFiles,
|
|
767
|
+
absolute_root_path: &'a str,
|
|
768
|
+
bypass_git: bool,
|
|
624
769
|
) -> PinnedFuture<'a, MerkleNodePtr> {
|
|
625
770
|
Box::pin(async move {
|
|
626
771
|
// check if it is a file
|
|
627
|
-
let path_str =
|
|
628
|
-
|
|
772
|
+
let path_str = absolute_file_or_directory.to_str().unwrap().to_string();
|
|
773
|
+
|
|
774
|
+
if absolute_file_or_directory.is_file() {
|
|
775
|
+
tracing::info!("constructing file node for path_str: {}", path_str);
|
|
629
776
|
return Arc::new(RwLock::new(
|
|
630
777
|
MerkleNode::construct_file_node_or_error_node(
|
|
631
|
-
|
|
778
|
+
absolute_file_or_directory,
|
|
632
779
|
parent,
|
|
780
|
+
ignored_files,
|
|
633
781
|
)
|
|
634
782
|
.await,
|
|
635
783
|
));
|
|
636
784
|
}
|
|
637
785
|
|
|
786
|
+
tracing::info!("constructing directory node for path_str: {}", path_str);
|
|
787
|
+
|
|
638
788
|
// check if the directory fails the bad dir test.
|
|
639
|
-
let is_bad_dir = file_utils::is_in_bad_dir(
|
|
789
|
+
let is_bad_dir = file_utils::is_in_bad_dir(absolute_file_or_directory);
|
|
640
790
|
if is_bad_dir.is_err() || is_bad_dir.unwrap_or(false) {
|
|
641
791
|
// println!("skipping directory: {}", path_str);
|
|
642
792
|
return Arc::new(RwLock::new(MerkleNode::empty_node(
|
|
643
|
-
Some(
|
|
793
|
+
Some(absolute_file_or_directory),
|
|
644
794
|
Some("Directory is in bad dir!".to_string()),
|
|
645
795
|
)));
|
|
646
796
|
}
|
|
647
797
|
|
|
648
|
-
let
|
|
798
|
+
let is_git_ignored_dir = ignored_files.contains(&path_str);
|
|
799
|
+
tracing::info!("is_git_ignored_dir: {}", is_git_ignored_dir);
|
|
800
|
+
|
|
801
|
+
if is_git_ignored_dir && !bypass_git {
|
|
802
|
+
tracing::info!("skipping directory: {}", path_str);
|
|
803
|
+
return Arc::new(RwLock::new(MerkleNode::empty_node(
|
|
804
|
+
Some(absolute_file_or_directory),
|
|
805
|
+
Some("Directory is git ignored!".to_string()),
|
|
806
|
+
)));
|
|
807
|
+
}
|
|
808
|
+
|
|
809
|
+
let entries = fs::read_dir(absolute_file_or_directory);
|
|
649
810
|
match entries {
|
|
650
811
|
Ok(_) => (),
|
|
651
812
|
Err(e) => {
|
|
813
|
+
tracing::error!("error reading directory: {}", e);
|
|
652
814
|
return Arc::new(RwLock::new(MerkleNode::empty_node(
|
|
653
|
-
Some(
|
|
815
|
+
Some(absolute_file_or_directory),
|
|
654
816
|
Some(e.to_string()),
|
|
655
817
|
)));
|
|
656
818
|
}
|
|
@@ -670,13 +832,20 @@ impl MerkleNode {
|
|
|
670
832
|
match entry {
|
|
671
833
|
Ok(entry) => {
|
|
672
834
|
children.push(
|
|
673
|
-
MerkleNode::construct_node(
|
|
674
|
-
.
|
|
835
|
+
MerkleNode::construct_node(
|
|
836
|
+
&entry.path(),
|
|
837
|
+
Some(node.clone()),
|
|
838
|
+
ignored_files,
|
|
839
|
+
absolute_root_path,
|
|
840
|
+
bypass_git,
|
|
841
|
+
)
|
|
842
|
+
.await,
|
|
675
843
|
);
|
|
676
844
|
}
|
|
677
845
|
Err(e) => {
|
|
846
|
+
tracing::error!("error reading directory: {}", e);
|
|
678
847
|
children.push(Arc::new(RwLock::new(MerkleNode::empty_node(
|
|
679
|
-
Some(
|
|
848
|
+
Some(absolute_file_or_directory),
|
|
680
849
|
Some(e.to_string()),
|
|
681
850
|
))));
|
|
682
851
|
}
|
|
@@ -696,31 +865,37 @@ impl MerkleNode {
|
|
|
696
865
|
}
|
|
697
866
|
|
|
698
867
|
async fn construct_file_node(
|
|
699
|
-
|
|
868
|
+
absolute_file_path: &Path,
|
|
700
869
|
parent: ParentPtr,
|
|
870
|
+
ignored_files: &IgnoredFiles,
|
|
701
871
|
) -> Result<MerkleNode, String> {
|
|
702
|
-
let file_str =
|
|
872
|
+
let file_str = absolute_file_path
|
|
703
873
|
.to_str()
|
|
704
874
|
.ok_or("Could not convert file path to string!")?
|
|
705
875
|
.to_string();
|
|
706
876
|
// first see if it passes the
|
|
707
|
-
match file_utils::is_good_file(
|
|
877
|
+
match file_utils::is_good_file(absolute_file_path) {
|
|
708
878
|
Ok(_) => {}
|
|
709
879
|
Err(e) => {
|
|
710
880
|
return Err(format!("File failed runtime checks! {}", e.to_string()));
|
|
711
881
|
}
|
|
712
882
|
}
|
|
713
883
|
|
|
714
|
-
//
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
return Err(format!("
|
|
884
|
+
// check if the file is in the git ignore buffer.
|
|
885
|
+
// this is a bug right because we are not checking absoluteness here.
|
|
886
|
+
match ignored_files.contains(&file_str) {
|
|
887
|
+
true => {
|
|
888
|
+
return Err(format!("File is in git ignore buffer!"));
|
|
719
889
|
}
|
|
720
|
-
|
|
890
|
+
false => {}
|
|
891
|
+
}
|
|
721
892
|
|
|
722
893
|
// check if the file passes runtime checks.
|
|
723
|
-
match file_utils::is_good_file_runtime_check(
|
|
894
|
+
match file_utils::is_good_file_runtime_check(
|
|
895
|
+
absolute_file_path,
|
|
896
|
+
// &file_content,
|
|
897
|
+
)
|
|
898
|
+
.await
|
|
724
899
|
{
|
|
725
900
|
Ok(_) => {}
|
|
726
901
|
Err(e) => {
|
|
@@ -728,15 +903,14 @@ impl MerkleNode {
|
|
|
728
903
|
}
|
|
729
904
|
}
|
|
730
905
|
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
e.to_string()
|
|
737
|
-
|
|
738
|
-
}
|
|
739
|
-
};
|
|
906
|
+
// read the file_content to a buffer
|
|
907
|
+
let file_content =
|
|
908
|
+
match file_utils::read_string_without_bom(absolute_file_path).await {
|
|
909
|
+
Ok(content) => content,
|
|
910
|
+
Err(e) => {
|
|
911
|
+
return Err(format!("Could not read file! {}", e.to_string()));
|
|
912
|
+
}
|
|
913
|
+
};
|
|
740
914
|
|
|
741
915
|
let file_hash = compute_hash(&file_content);
|
|
742
916
|
let node = MerkleNode {
|
|
@@ -751,15 +925,23 @@ impl MerkleNode {
|
|
|
751
925
|
}
|
|
752
926
|
|
|
753
927
|
async fn construct_file_node_or_error_node(
|
|
754
|
-
|
|
928
|
+
absolute_file_path: &Path,
|
|
755
929
|
parent: ParentPtr,
|
|
930
|
+
ignored_files: &IgnoredFiles,
|
|
756
931
|
) -> MerkleNode {
|
|
757
|
-
let node = match MerkleNode::construct_file_node(
|
|
932
|
+
let node = match MerkleNode::construct_file_node(
|
|
933
|
+
absolute_file_path,
|
|
934
|
+
parent,
|
|
935
|
+
ignored_files,
|
|
936
|
+
)
|
|
937
|
+
.await
|
|
938
|
+
{
|
|
758
939
|
Ok(node) => node,
|
|
759
940
|
Err(e) => {
|
|
760
941
|
// println!("constructing error node. error: {}", e);
|
|
761
942
|
// println!("file_path: {:?}", file_path);
|
|
762
|
-
|
|
943
|
+
tracing::error!("constructing error node. error: {}", e);
|
|
944
|
+
MerkleNode::empty_node(Some(absolute_file_path), Some(e))
|
|
763
945
|
}
|
|
764
946
|
};
|
|
765
947
|
|
|
@@ -785,15 +967,50 @@ impl MerkleNode {
|
|
|
785
967
|
|
|
786
968
|
async fn compute_branch_hash(children: &[MerkleNodePtr]) -> String {
|
|
787
969
|
let mut hasher = sha2::Sha256::new();
|
|
970
|
+
let mut names_and_hashes = vec![];
|
|
971
|
+
let mut non_zero_children = 0;
|
|
972
|
+
|
|
788
973
|
for child in children {
|
|
789
974
|
// check if it is an error node
|
|
790
975
|
let child_reader = child.read().await;
|
|
791
|
-
|
|
976
|
+
|
|
977
|
+
match &child_reader.node_type {
|
|
978
|
+
NodeType::File(file_name) => {
|
|
979
|
+
non_zero_children += 1;
|
|
980
|
+
names_and_hashes.push((file_name.clone(), child_reader.hash.clone()));
|
|
981
|
+
}
|
|
982
|
+
NodeType::Branch((file_name, _)) => {
|
|
983
|
+
let hash = child_reader.hash.clone();
|
|
984
|
+
if hash == "" {
|
|
985
|
+
continue;
|
|
986
|
+
}
|
|
987
|
+
|
|
988
|
+
non_zero_children += 1;
|
|
989
|
+
names_and_hashes.push((file_name.clone(), hash));
|
|
990
|
+
}
|
|
991
|
+
NodeType::ErrorNode(_) => {
|
|
992
|
+
continue;
|
|
993
|
+
}
|
|
994
|
+
}
|
|
995
|
+
}
|
|
996
|
+
|
|
997
|
+
// sort the list of names and hashes by the hashes!!
|
|
998
|
+
names_and_hashes
|
|
999
|
+
.sort_by(|a, b| a.1.to_lowercase().cmp(&b.1.to_lowercase()));
|
|
1000
|
+
|
|
1001
|
+
for (name, hash) in names_and_hashes {
|
|
1002
|
+
if hash == "" {
|
|
792
1003
|
continue;
|
|
793
1004
|
}
|
|
1005
|
+
hasher.update(hash);
|
|
1006
|
+
}
|
|
794
1007
|
|
|
795
|
-
|
|
1008
|
+
if non_zero_children == 0 {
|
|
1009
|
+
// this means that the branch is empty.
|
|
1010
|
+
// we should return an empty string.
|
|
1011
|
+
return "".to_string();
|
|
796
1012
|
}
|
|
1013
|
+
|
|
797
1014
|
let result = hasher.finalize();
|
|
798
1015
|
format!("{:x}", result)
|
|
799
1016
|
}
|
package/src/merkle_tree/test.rs
CHANGED
|
@@ -43,8 +43,9 @@ mod tests {
|
|
|
43
43
|
// let path = Path::new(&temp_dir_path);
|
|
44
44
|
|
|
45
45
|
// Test construct_merkle_tree() function
|
|
46
|
+
let new_set = std::collections::HashSet::<String>::new();
|
|
46
47
|
let tree =
|
|
47
|
-
MerkleTree::construct_merkle_tree(temp_dir_path.clone()).await;
|
|
48
|
+
MerkleTree::construct_merkle_tree(temp_dir_path.clone(), new_set, false).await;
|
|
48
49
|
let mut tree = match tree {
|
|
49
50
|
Ok(tree) => {
|
|
50
51
|
assert_eq!(tree.files.len(), 2);
|