@anysphere/file-service 0.0.0-e6124fba → 0.0.0-e68f3241

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,1193 +0,0 @@
1
- use super::file_utils;
2
- use sha2::Digest;
3
- use std::collections::{BTreeMap, HashSet};
4
- use std::path::PathBuf;
5
- use std::vec;
6
- use std::{fs, path::Path, sync::Arc};
7
- use tokio::sync::RwLock;
8
- use tonic::async_trait;
9
- use tracing::info;
10
-
11
- pub mod local_construction;
12
- pub mod test;
13
-
14
- pub type MerkleNodePtr = Arc<RwLock<MerkleNode>>;
15
-
16
- pub struct MerkleTree {
17
- root_path: String,
18
- root: MerkleNodePtr,
19
- files: BTreeMap<String, File>,
20
- cursor: Option<usize>,
21
- git_ignored_files_and_dirs: HashSet<String>,
22
- is_git_repo: bool,
23
- }
24
-
25
- #[derive(Debug)]
26
- pub struct File {
27
- node: MerkleNodePtr,
28
- }
29
-
30
- type FileName = String;
31
- type ParentPtr = Option<MerkleNodePtr>;
32
-
33
- #[derive(Debug)]
34
- pub enum NodeType {
35
- Branch((FileName, Vec<MerkleNodePtr>)),
36
- // this is the file_name
37
- File(FileName),
38
- ErrorNode((FileName, String)),
39
- }
40
-
41
- #[derive(Debug)]
42
- pub struct MerkleNode {
43
- pub id: i32,
44
- pub node_type: NodeType,
45
- pub hash: String,
46
- pub error: Option<String>,
47
- pub parent: ParentPtr,
48
- }
49
-
50
- fn get_id() -> i32 {
51
- rand::random::<i16>() as i32
52
- }
53
-
54
- // the merkle tree needs to implement 2 important traits
55
- // 1. the local construction and update trait. this makes sure that it works well locally!
56
- // 2. the remote sync trait. we need to make sure that it can be easily synced with the remote tree when necessary.
57
-
58
- #[async_trait]
59
- pub trait LocalConstruction {
60
- async fn new(
61
- root_directory: Option<String>,
62
- ) -> Result<MerkleTree, anyhow::Error>;
63
-
64
- async fn construct_merkle_tree(
65
- root_directory: String,
66
- git_ignored_files_and_dirs: HashSet<String>,
67
- is_git_repo: bool
68
- ) -> Result<MerkleTree, anyhow::Error>;
69
-
70
- async fn update_file(
71
- &mut self,
72
- file_path: String,
73
- ) -> Result<(), anyhow::Error>;
74
-
75
- async fn delete_file(
76
- &mut self,
77
- file_path: String,
78
- ) -> Result<(), anyhow::Error>;
79
- }
80
-
81
- // #[async_trait]
82
- // pub trait RemoteSync {
83
- // async fn sync_with_remote(
84
- // &mut self,
85
- // client: super::RepositoryClient,
86
- // ) -> Result<Vec<File>, tonic::Status>;
87
- // async fn sync_subtree_node(
88
- // &mut self,
89
- // node: &MerkleNode,
90
- // client: super::RepositoryClient,
91
- // ) -> Result<Vec<File>, tonic::Status>;
92
- // }
93
-
94
- impl MerkleTree {
95
- pub fn empty_tree() -> MerkleTree {
96
- MerkleTree {
97
- root: Arc::new(RwLock::new(MerkleNode::empty_node(None, None))),
98
- files: BTreeMap::new(),
99
- root_path: "".to_string(),
100
- cursor: None,
101
- git_ignored_files_and_dirs: HashSet::new(),
102
- is_git_repo: false
103
- }
104
- }
105
-
106
- pub async fn get_subtree_hash(
107
- &self,
108
- absolute_path: &str,
109
- ) -> Result<String, anyhow::Error> {
110
- let node = match self.files.get(absolute_path) {
111
- Some(file) => file.node.clone(),
112
- None => {
113
- let all_files: Vec<String> = self.files.keys().cloned().collect();
114
- return Err(anyhow::anyhow!(
115
- "Could not find file in tree! Looking for: {}. All files: {:?}",
116
- absolute_path,
117
- all_files
118
- ));
119
- }
120
- };
121
-
122
- let node_reader = node.read().await;
123
- let node_hash = node_reader.hash.clone();
124
-
125
- info!(
126
- "get_subtree_hash for path: {}, node_hash: {}",
127
- absolute_path, node_hash
128
- );
129
-
130
- Ok(node_hash)
131
- }
132
-
133
- pub async fn get_num_embeddable_files(&self) -> Result<i32, anyhow::Error> {
134
- let mut count = 0;
135
-
136
- for (_, file) in &self.files {
137
- let file_reader = file.node.read().await;
138
- match &file_reader.node_type {
139
- NodeType::File(_) => {
140
- count += 1;
141
- }
142
- NodeType::Branch(_) => {
143
- continue;
144
- }
145
- NodeType::ErrorNode(_) => {
146
- continue;
147
- }
148
- }
149
- }
150
-
151
- Ok(count)
152
- }
153
-
154
- pub async fn get_num_embeddable_files_in_subtree(
155
- &self,
156
- absolute_path: PathBuf,
157
- ) -> Result<i32, anyhow::Error> {
158
- let mut count = 0;
159
-
160
- let absolute_path = match absolute_path.to_str() {
161
- Some(s) => s.to_string(),
162
- None => {
163
- return Err(anyhow::anyhow!(
164
- "get_num_embeddable_files_in_subtree: Failed to convert path to string"
165
- ))
166
- }
167
- };
168
-
169
- // TODO(sualeh): worth keeping this list sorted. its now a btree
170
-
171
- for (_, file) in &self.files {
172
- let file_reader = file.node.read().await;
173
- match &file_reader.node_type {
174
- NodeType::File(file_name) => {
175
- if file_name.contains(&absolute_path) {
176
- count += 1;
177
- }
178
- }
179
- NodeType::Branch(_) => {
180
- continue;
181
- }
182
- NodeType::ErrorNode(_) => {
183
- continue;
184
- }
185
- }
186
- }
187
-
188
- Ok(count)
189
- }
190
-
191
- pub async fn get_all_files(&self) -> Result<Vec<String>, anyhow::Error> {
192
- let mut files = Vec::new();
193
-
194
- for (file_name, file) in &self.files {
195
- let file_reader = file.node.read().await;
196
- match &file_reader.node_type {
197
- NodeType::File(_) => {
198
- files.push(file_name.clone());
199
- }
200
- NodeType::Branch(_) => {
201
- continue;
202
- }
203
- NodeType::ErrorNode(_) => {
204
- continue;
205
- }
206
- }
207
- }
208
-
209
- Ok(files)
210
- }
211
-
212
- pub async fn get_hashes_for_files(
213
- &self,
214
- files: Vec<String>,
215
- ) -> Result<Vec<String>, anyhow::Error> {
216
- let mut hashes = Vec::new();
217
-
218
- for file_name in files {
219
- let file = match self.files.get(&file_name) {
220
- Some(file) => file,
221
- None => {
222
- return Err(anyhow::anyhow!("Could not find file in tree!"));
223
- }
224
- };
225
-
226
- let file_reader = file.node.read().await;
227
- match &file_reader.node_type {
228
- NodeType::File(_) => {
229
- hashes.push(file_reader.hash.clone());
230
- }
231
- NodeType::Branch(_) => {
232
- continue;
233
- }
234
- NodeType::ErrorNode(_) => {
235
- continue;
236
- }
237
- }
238
- }
239
-
240
- Ok(hashes)
241
- }
242
-
243
- /// Returns a filename, and then a path from its parent to the root (which can possibly be empty.)
244
- pub async fn get_next_file_to_embed(
245
- &mut self,
246
- ) -> Result<(String, Vec<String>), anyhow::Error> {
247
- // if the cursor is none, set it to 0
248
- let cursor = match self.cursor {
249
- Some(cursor) => cursor,
250
- None => {
251
- self.cursor = Some(0);
252
- 0
253
- }
254
- };
255
-
256
- // get the thing at the cursor. while we dont find a file, we keep incrementing the cursor.
257
- let mut cursor = cursor;
258
- loop {
259
- // O(log n)
260
- let file = match self.files.values().nth(cursor) {
261
- Some(file) => file,
262
- None => {
263
- return Err(anyhow::anyhow!("Could not find file to embed!"));
264
- }
265
- };
266
-
267
- let file_reader = file.node.read().await;
268
- match &file_reader.node_type {
269
- NodeType::File(f) => {
270
- // update the cursor.
271
- self.cursor = Some(cursor + 1);
272
- let spline = self.get_spline(f).await?;
273
- return Ok((f.clone(), spline));
274
- }
275
- NodeType::Branch(_) => {
276
- cursor += 1;
277
- continue;
278
- }
279
- NodeType::ErrorNode(_) => {
280
- cursor += 1;
281
- continue;
282
- }
283
- }
284
- }
285
- }
286
-
287
- pub async fn get_all_dir_files_to_embed(
288
- &self,
289
- absolute_path: &str,
290
- ) -> Result<Vec<String>, anyhow::Error> {
291
- let mut files = Vec::new();
292
-
293
- // 1. should check that this absolute path is actually a directory.
294
- let file_node = self.files.get(absolute_path);
295
- if file_node.is_none() {
296
- return Err(anyhow::anyhow!("Could not find directory the in tree!"));
297
- }
298
-
299
- for (file_path, f) in &self.files {
300
- if !file_path.contains(absolute_path) {
301
- continue;
302
- }
303
-
304
- match f.node.read().await.node_type {
305
- NodeType::File(_) => {
306
- files.push(file_path.clone());
307
- }
308
- NodeType::Branch(_) => {
309
- continue;
310
- }
311
- NodeType::ErrorNode(_) => {
312
- continue;
313
- }
314
- }
315
- }
316
-
317
- Ok(files)
318
- }
319
-
320
- // TODO(sualeh): i need tests for this!!
321
- pub async fn get_spline(
322
- &self,
323
- absolute_path: &str,
324
- ) -> Result<Vec<String>, anyhow::Error> {
325
- let mut files = Vec::new();
326
-
327
- let current_node = match self.files.get(absolute_path) {
328
- Some(node) => {
329
- node.node.clone()
330
- }
331
- None => {
332
- return Err(anyhow::anyhow!("File not found: {}", absolute_path));
333
- }
334
- };
335
-
336
- let mut stack = Vec::new();
337
- stack.push(current_node);
338
-
339
- while let Some(node) = stack.pop() {
340
- let parent = node.read().await.parent.clone();
341
- if let Some(parent) = parent {
342
- {
343
- let parent_node = parent.read().await;
344
- match &parent_node.node_type {
345
- NodeType::File(file_name) => {
346
- files.push(file_name.clone());
347
- }
348
- NodeType::Branch((branch_name, _)) => {
349
- files.push(branch_name.clone());
350
- }
351
- _ => {
352
- continue;
353
- }
354
- }
355
- }
356
-
357
- stack.push(parent);
358
- }
359
- }
360
- Ok(files)
361
- }
362
-
363
- /// creates a new node and attaches it to the current tree.
364
- /// SPEC:
365
- /// - you are allowed to create a file with a node such that the
366
- /// file is contained within the subtree defined by the root.
367
- /// - updates the hash of the ancestor path.
368
- /// - attaches to the ancestor.
369
- /// - doenst add to the filemap
370
- /// Primary usecase:
371
- /// 1. when a new file is created.
372
- /// Don't use it for:
373
- /// 1. creating a full new tree.
374
- /// Returns:
375
- /// - the new node that was created.
376
- async fn create_new_node_and_attach_to_ancestors(
377
- &mut self,
378
- file_path: PathBuf,
379
- ) -> Result<MerkleNodePtr, anyhow::Error> {
380
- // algorithm:
381
- // 1. find the nearest parent before we reach the root.
382
- // 2. if we don't find a parent, we should definitely return an error.
383
- // 3. then we should create the chain of things down. then attach it to the parent we have.
384
- let (ancestor, path) = match self.get_ancestor_in_tree(&file_path) {
385
- Ok((ancestor, path)) => (ancestor, path),
386
- Err(e) => {
387
- return Err(anyhow::anyhow!(
388
- "Could not find ancestor in tree! {}",
389
- e.to_string()
390
- ));
391
- }
392
- };
393
-
394
- // 2 cases:
395
- // 1. the path is empty. this means that the ancestor is the root.
396
- // 2. the path is non-empty. that means there exist a non-empty element btwn till the root.
397
-
398
- let absolute_root_path = self.root_path.clone();
399
- let new_node = match path.len() {
400
- 0 => {
401
- // this means that the ancestor is the root.
402
- // we need to create a new node and attach it to the ancestor.
403
- let new_node = MerkleNode::new(
404
- file_path.clone(),
405
- Some(ancestor.clone()),
406
- &self.git_ignored_files_and_dirs,
407
- &absolute_root_path.as_str(),
408
- self.is_git_repo
409
- )
410
- .await;
411
- ancestor.write().await.attach_child(new_node.clone()).await;
412
- new_node
413
- }
414
- _ => {
415
- // this means that the ancestor is not the root.
416
- // we need to create a new node and attach it to the ancestor.
417
-
418
- // UNSURE: not sure this is the correct thing to do but it is the fastest.
419
- // get the last thing that is not in the tree.
420
- let first_child_path = path.last().unwrap();
421
- let first_child = MerkleNode::new(
422
- first_child_path.clone(),
423
- Some(ancestor.clone()),
424
- &self.git_ignored_files_and_dirs,
425
- &absolute_root_path.as_str(),
426
- self.is_git_repo
427
- )
428
- .await;
429
-
430
- // TODO(sualeh): we should do an assertion check that the entire vec is contained here.
431
-
432
- // now we need to attach the first child to the ancestor.
433
- let mut mut_ancestor = ancestor.write().await;
434
- mut_ancestor.attach_child(first_child.clone()).await;
435
- first_child
436
- }
437
- };
438
-
439
- Ok(new_node)
440
- }
441
-
442
- /// Spec:
443
- /// - adds to the tree but doesnt change things which already exist in the file.
444
- async fn add_subtree_to_filemap(&mut self, node: MerkleNodePtr) {
445
- // Traverse the subtree rooted at 'node' and add all the nodes to the file map
446
- let mut stack = Vec::new();
447
- stack.push(node);
448
-
449
- while !stack.is_empty() {
450
- // UNWRAP CHECKED AND FINE
451
- let current_node = match stack.pop() {
452
- Some(node) => node,
453
- None => continue,
454
- };
455
-
456
- let node = current_node.read().await;
457
- match &node.node_type {
458
- NodeType::File(file_path) => {
459
- // SUALEH BE CAREFUL!
460
- match self.files.get(file_path) {
461
- Some(file) => {
462
- // check if the hash is the same:
463
- let file_hash = file.node.read().await.hash.clone();
464
-
465
- if file_hash != node.hash {
466
- // update the file from the map.
467
- self.files.insert(
468
- file_path.clone(),
469
- File {
470
- node: current_node.clone(),
471
- },
472
- );
473
- }
474
- }
475
- None => {
476
- self.files.insert(
477
- file_path.clone(),
478
- File {
479
- node: current_node.clone(),
480
- },
481
- );
482
- }
483
- }
484
- }
485
- NodeType::Branch(node) => {
486
- let children = &node.1;
487
- for child in children {
488
- stack.push(child.clone());
489
- }
490
- }
491
- NodeType::ErrorNode(_) => {
492
- continue;
493
- }
494
- }
495
- }
496
- }
497
-
498
- /// PRECONDITION: the file exists in the tree!!
499
- /// SPEC:
500
- /// - attaches to the ancestor.
501
- /// - adds to the filemap
502
- /// - updates hashes of ancestor path.
503
- async fn attach_new_node_to_tree(
504
- &mut self,
505
- file_path: String,
506
- ) -> Result<(), anyhow::Error> {
507
- let path = PathBuf::from(file_path.clone());
508
- match self.create_new_node_and_attach_to_ancestors(path).await {
509
- Ok(node_ptr) => {
510
- self.add_subtree_to_filemap(node_ptr).await;
511
- Ok(())
512
- }
513
- Err(e) => Err(anyhow::anyhow!(
514
- "Could not create new node and attach to ancestors! {}",
515
- e.to_string()
516
- )),
517
- }
518
- }
519
-
520
- /// MUTATES MUTATES MUTATES
521
- /// Precondition:
522
- /// - this can only be called on a file node!!!
523
- async fn update_node(
524
- &mut self,
525
- node: MerkleNodePtr,
526
- ) -> Result<(), anyhow::Error> {
527
- let node_reader = node.read().await;
528
-
529
- // first
530
- let file_string = match &node_reader.node_type {
531
- NodeType::File(file_path) => file_path,
532
- NodeType::Branch(_) => {
533
- // FIXME(sualeh): we shouldn't have panics around release time.
534
- panic!("This should not happen!");
535
- }
536
- NodeType::ErrorNode(_) => {
537
- panic!("This should not happen!");
538
- }
539
- };
540
- let file_string = file_string.clone();
541
- let file_path = PathBuf::from(file_string.clone());
542
-
543
- println!("file_string: {:?}", file_string);
544
- // get the parent, and remove this node from the parent.
545
- let parent_node = node_reader.parent.clone();
546
-
547
- match parent_node {
548
- Some(parent_node) => {
549
- let mut mut_parent = parent_node.write().await;
550
-
551
- // check if the parent node has the children i want.
552
- match mut_parent.node_type {
553
- NodeType::Branch(ref mut node) => {
554
- let children = &mut node.1;
555
- let mut found = false;
556
- let mut index = 0;
557
-
558
- for (i, child) in children.iter().enumerate() {
559
- match &child.read().await.node_type {
560
- NodeType::File(child_file_path) => {
561
- if child_file_path == &file_string {
562
- found = true;
563
- index = i;
564
- break;
565
- }
566
- }
567
- NodeType::Branch(_) => {
568
- continue;
569
- }
570
- NodeType::ErrorNode(_) => {
571
- continue;
572
- }
573
- }
574
- }
575
-
576
- if found {
577
- // remove the child from the parent.
578
- let _ = children.remove(index);
579
- }
580
- }
581
- NodeType::File(_) => {
582
- panic!("This should not happen! I am a file node!");
583
- }
584
- NodeType::ErrorNode(_) => {
585
- panic!("This should not happen! I am an error node!");
586
- }
587
- };
588
- }
589
- None => {
590
- panic!("We should always have a parent :( ");
591
- }
592
- }
593
-
594
- let node_ptr = self
595
- .create_new_node_and_attach_to_ancestors(file_path)
596
- .await;
597
-
598
- match node_ptr {
599
- Ok(node_ptr) => {
600
- self.files.insert(
601
- file_string,
602
- File {
603
- node: node_ptr.clone(),
604
- },
605
- );
606
- }
607
- Err(e) => {
608
- return Err(anyhow::anyhow!(
609
- "Could not create new node and attach to ancestors! {}",
610
- e.to_string()
611
- ));
612
- }
613
- }
614
-
615
- Ok(())
616
- }
617
-
618
- fn get_parent_node<'a>(
619
- &self,
620
- file_path: &'a Path,
621
- ) -> Result<MerkleNodePtr, anyhow::Error> {
622
- let parent_name = file_path.parent();
623
-
624
- match parent_name {
625
- Some(parent_name) => {
626
- // UNWRAP CHECKED AND FINE
627
- println!("parent_name: {:?}", parent_name);
628
- let parent_name = parent_name.to_str().unwrap().to_string();
629
- let parent_node = match self.files.get(&parent_name) {
630
- Some(file) => file.node.clone(),
631
- None => {
632
- return Err(anyhow::anyhow!("Could not get parent node! 2"));
633
- }
634
- };
635
-
636
- Ok(parent_node)
637
- }
638
- None => {
639
- return Err(anyhow::anyhow!("Could not get parent node! 3"));
640
- }
641
- }
642
- }
643
-
644
- /// SPEC:
645
- /// This function retrieves the ancestor node in the Merkle tree for a given file path.
646
- /// It returns a tuple containing the ancestor node and a vector of paths from the ancestor to the file.
647
- /// If the file is not in the tree, an error is returned.
648
- ///
649
- /// Postcondition:
650
- /// - the vector contains all the paths not including the ancestor itself.
651
- ///
652
- fn get_ancestor_in_tree<'a>(
653
- &self,
654
- file_path: &'a Path,
655
- ) -> Result<(MerkleNodePtr, Vec<PathBuf>), anyhow::Error> {
656
- let mut current_path = file_path;
657
- let mut path_vec = Vec::new();
658
- let root = self.root_path.clone();
659
-
660
- if !current_path.to_str().unwrap().contains(&root) {
661
- return Err(anyhow::anyhow!("File is not in the tree!"));
662
- }
663
-
664
- loop {
665
- let parent_path = current_path.parent();
666
-
667
- match parent_path {
668
- Some(parent_path) => match parent_path.to_str() {
669
- Some(parent_name) => {
670
- if self.files.contains_key(parent_name) {
671
- match self.files.get(parent_name) {
672
- Some(file) => return Ok((file.node.clone(), path_vec)),
673
- None => {
674
- return Err(anyhow::anyhow!(
675
- "Wow, this is weird. This should not happen!"
676
- ));
677
- }
678
- };
679
- } else {
680
- path_vec.push(parent_path.to_path_buf());
681
- current_path = parent_path;
682
- }
683
- }
684
- None => {
685
- return Err(anyhow::anyhow!("Could not convert path to string!"));
686
- }
687
- },
688
- None => {
689
- return Err(anyhow::anyhow!("Couldn't find ancestor node!"));
690
- }
691
- };
692
-
693
- // check if we are at the root.
694
- if current_path.to_str().unwrap() == &root {
695
- panic!("This should not happen!");
696
- }
697
- }
698
- }
699
- }
700
-
701
- use std::future::Future;
702
- use std::pin::Pin;
703
-
704
- type PinnedFuture<'a, T> = Pin<Box<dyn Future<Output = T> + Send + 'a>>;
705
- type IgnoredFiles = HashSet<String>;
706
-
707
- impl MerkleNode {
708
- /// please be careful using this.
709
- async fn __new_unchecked(
710
- file_or_directory: String,
711
- parent: ParentPtr,
712
- ignored_files: &IgnoredFiles,
713
- absolute_root_path: &str,
714
- is_git_repo: bool,
715
- ) -> MerkleNodePtr {
716
- // // check if the root is a git directory.
717
- // let is_git_repo =
718
- // match git_utils::is_git_directory(absolute_root_path).await {
719
- // Ok(is_git_repo) => is_git_repo,
720
- // Err(_e) => false,
721
- // };
722
- let bypass_git = !is_git_repo;
723
-
724
- MerkleNode::construct_node(
725
- Path::new(&file_or_directory),
726
- parent,
727
- ignored_files,
728
- absolute_root_path,
729
- bypass_git,
730
- )
731
- .await
732
- }
733
-
734
- async fn new(
735
- absolute_file_or_directory: PathBuf,
736
- parent: ParentPtr,
737
- ignored_files: &IgnoredFiles,
738
- absolute_root_path: &str,
739
- is_git_repo: bool,
740
- ) -> MerkleNodePtr {
741
- let bypass_git = !is_git_repo;
742
-
743
- info!(
744
- "constructing node for absolute_file_or_directory: {:?}",
745
- absolute_file_or_directory
746
- );
747
-
748
- MerkleNode::construct_node(
749
- Path::new(&absolute_file_or_directory),
750
- parent,
751
- ignored_files,
752
- absolute_root_path,
753
- bypass_git,
754
- )
755
- .await
756
- }
757
-
758
- /// NOT added to the tree by default.
759
- // async fn from_file(file_path: String) -> MerkleNode {
760
- // let path = Path::new(&file_path);
761
- // if path.exists() {
762
- // // Update the file in two places.
763
- // let file_hash = self.files.get_mut(&file_path).unwrap();
764
-
765
- fn construct_node<'a>(
766
- absolute_file_or_directory: &'a Path,
767
- parent: ParentPtr,
768
- ignored_files: &'a IgnoredFiles,
769
- absolute_root_path: &'a str,
770
- bypass_git: bool,
771
- ) -> PinnedFuture<'a, MerkleNodePtr> {
772
- Box::pin(async move {
773
- // check if it is a file
774
- let path_str = absolute_file_or_directory.to_str().unwrap().to_string();
775
- if absolute_file_or_directory.is_file() {
776
- return Arc::new(RwLock::new(
777
- MerkleNode::construct_file_node_or_error_node(
778
- absolute_file_or_directory,
779
- parent,
780
- ignored_files,
781
- )
782
- .await,
783
- ));
784
- }
785
-
786
- // check if the directory fails the bad dir test.
787
- let is_bad_dir = file_utils::is_in_bad_dir(absolute_file_or_directory);
788
- if is_bad_dir.is_err() || is_bad_dir.unwrap_or(false) {
789
- // println!("skipping directory: {}", path_str);
790
- return Arc::new(RwLock::new(MerkleNode::empty_node(
791
- Some(absolute_file_or_directory),
792
- Some("Directory is in bad dir!".to_string()),
793
- )));
794
- }
795
-
796
- let is_git_ignored_dir = ignored_files.contains(&path_str);
797
-
798
- if is_git_ignored_dir && !bypass_git {
799
- return Arc::new(RwLock::new(MerkleNode::empty_node(
800
- Some(absolute_file_or_directory),
801
- Some("Directory is git ignored!".to_string()),
802
- )));
803
- }
804
-
805
- let entries = fs::read_dir(absolute_file_or_directory);
806
- match entries {
807
- Ok(_) => (),
808
- Err(e) => {
809
- return Arc::new(RwLock::new(MerkleNode::empty_node(
810
- Some(absolute_file_or_directory),
811
- Some(e.to_string()),
812
- )));
813
- }
814
- }
815
- let entries = entries.unwrap();
816
- let mut children = Vec::<MerkleNodePtr>::new();
817
-
818
- let node = Arc::new(RwLock::new(MerkleNode {
819
- id: get_id(),
820
- node_type: NodeType::Branch((path_str.clone(), vec![])),
821
- hash: "".to_string(),
822
- error: None,
823
- parent,
824
- }));
825
-
826
- for entry in entries {
827
- match entry {
828
- Ok(entry) => {
829
- children.push(
830
- MerkleNode::construct_node(
831
- &entry.path(),
832
- Some(node.clone()),
833
- ignored_files,
834
- absolute_root_path,
835
- bypass_git,
836
- )
837
- .await,
838
- );
839
- }
840
- Err(e) => {
841
- children.push(Arc::new(RwLock::new(MerkleNode::empty_node(
842
- Some(absolute_file_or_directory),
843
- Some(e.to_string()),
844
- ))));
845
- }
846
- }
847
- }
848
-
849
- let node_hash = MerkleNode::compute_branch_hash(&children).await;
850
- // get a write lock on the node
851
- let mut node_writer = node.write().await;
852
- node_writer.node_type = NodeType::Branch((path_str.clone(), children));
853
- node_writer.hash = node_hash.clone();
854
- // release the lock
855
- drop(node_writer);
856
-
857
- node
858
- })
859
- }
860
-
861
- async fn construct_file_node(
862
- absolute_file_path: &Path,
863
- parent: ParentPtr,
864
- ignored_files: &IgnoredFiles,
865
- ) -> Result<MerkleNode, String> {
866
- let file_str = absolute_file_path
867
- .to_str()
868
- .ok_or("Could not convert file path to string!")?
869
- .to_string();
870
- // first see if it passes the
871
- match file_utils::is_good_file(absolute_file_path) {
872
- Ok(_) => {}
873
- Err(e) => {
874
- return Err(format!("File failed runtime checks! {}", e.to_string()));
875
- }
876
- }
877
-
878
- // check if the file is in the git ignore buffer.
879
- // this is a bug right because we are not checking absoluteness here.
880
- match ignored_files.contains(&file_str) {
881
- true => {
882
- return Err(format!("File is in git ignore buffer!"));
883
- }
884
- false => {}
885
- }
886
-
887
- // check if the file passes runtime checks.
888
- match file_utils::is_good_file_runtime_check(
889
- absolute_file_path,
890
- // &file_content,
891
- )
892
- .await
893
- {
894
- Ok(_) => {}
895
- Err(e) => {
896
- return Err(format!("File failed runtime checks! {}", e.to_string()));
897
- }
898
- }
899
-
900
- // read the file_content to a buffer
901
- let file_content =
902
- match file_utils::read_string_without_bom(absolute_file_path).await {
903
- Ok(content) => content,
904
- Err(e) => {
905
- return Err(format!("Could not read file! {}", e.to_string()));
906
- }
907
- };
908
-
909
- let file_hash = compute_hash(&file_content);
910
- let node = MerkleNode {
911
- id: get_id(),
912
- node_type: NodeType::File(file_str),
913
- hash: file_hash.clone(),
914
- error: None,
915
- parent,
916
- };
917
-
918
- Ok(node)
919
- }
920
-
921
- async fn construct_file_node_or_error_node(
922
- absolute_file_path: &Path,
923
- parent: ParentPtr,
924
- ignored_files: &IgnoredFiles,
925
- ) -> MerkleNode {
926
- let node = match MerkleNode::construct_file_node(
927
- absolute_file_path,
928
- parent,
929
- ignored_files,
930
- )
931
- .await
932
- {
933
- Ok(node) => node,
934
- Err(e) => {
935
- // println!("constructing error node. error: {}", e);
936
- // println!("file_path: {:?}", file_path);
937
- MerkleNode::empty_node(Some(absolute_file_path), Some(e))
938
- }
939
- };
940
-
941
- node
942
- }
943
-
944
- /// PRECONDITION: this must be a branch node.
945
- async fn attach_child(&mut self, child: MerkleNodePtr) {
946
- match &mut self.node_type {
947
- NodeType::Branch(node) => {
948
- let children = &mut node.1;
949
- children.push(child);
950
- self.update_hash_of_ancestor_path();
951
- }
952
- NodeType::File(_) => {
953
- panic!("This should not happen!");
954
- }
955
- NodeType::ErrorNode(_) => {
956
- panic!("This should not happen!");
957
- }
958
- }
959
- }
960
-
961
- async fn compute_branch_hash(children: &[MerkleNodePtr]) -> String {
962
- let mut hasher = sha2::Sha256::new();
963
- let mut names_and_hashes = vec![];
964
- let mut non_zero_children = 0;
965
-
966
- for child in children {
967
- // check if it is an error node
968
- let child_reader = child.read().await;
969
-
970
- match &child_reader.node_type {
971
- NodeType::File(file_name) => {
972
- non_zero_children += 1;
973
- names_and_hashes.push((file_name.clone(), child_reader.hash.clone()));
974
- }
975
- NodeType::Branch((file_name, _)) => {
976
- let hash = child_reader.hash.clone();
977
- if hash == "" {
978
- continue;
979
- }
980
-
981
- non_zero_children += 1;
982
- names_and_hashes.push((file_name.clone(), hash));
983
- }
984
- NodeType::ErrorNode(_) => {
985
- continue;
986
- }
987
- }
988
- }
989
-
990
- // sort the list of names and hashes by the hashes!!
991
- names_and_hashes
992
- .sort_by(|a, b| a.1.to_lowercase().cmp(&b.1.to_lowercase()));
993
-
994
- for (name, hash) in names_and_hashes {
995
- if hash == "" {
996
- continue;
997
- }
998
- info!("name: {}, hash: {}", name, hash);
999
- hasher.update(hash);
1000
- }
1001
-
1002
- if non_zero_children == 0 {
1003
- // this means that the branch is empty.
1004
- // we should return an empty string.
1005
- return "".to_string();
1006
- }
1007
-
1008
- let result = hasher.finalize();
1009
- format!("{:x}", result)
1010
- }
1011
-
1012
- fn update_hash_of_ancestor_path<'a>(&'a mut self) -> PinnedFuture<'a, ()> {
1013
- Box::pin(async move {
1014
- self.hash = match &self.node_type {
1015
- NodeType::Branch(node) => {
1016
- let children = &node.1;
1017
- MerkleNode::compute_branch_hash(children).await
1018
- }
1019
- NodeType::File(_) => {
1020
- panic!("This should not happen!");
1021
- }
1022
- NodeType::ErrorNode(_) => {
1023
- panic!("This should not happen!");
1024
- }
1025
- };
1026
-
1027
- // propogate it up to the parents as long as they are not null
1028
- match &self.parent {
1029
- Some(parent) => {
1030
- let mut mut_parent = parent.write().await;
1031
- mut_parent.update_hash_of_ancestor_path().await;
1032
- }
1033
- None => {
1034
- println!("This node has no parent!");
1035
- }
1036
- }
1037
- })
1038
- }
1039
-
1040
- fn empty_node(file_name: Option<&Path>, error: Option<String>) -> MerkleNode {
1041
- if let Some(file_name) = file_name {
1042
- let file_name = file_name.to_str();
1043
- match file_name {
1044
- Some(file_name) => {
1045
- return MerkleNode {
1046
- id: get_id(),
1047
- node_type: NodeType::ErrorNode((
1048
- file_name.to_string(),
1049
- error.clone().unwrap_or_default(),
1050
- )),
1051
- hash: "".to_string(),
1052
- error,
1053
- parent: None,
1054
- };
1055
- }
1056
- None => {
1057
- return MerkleNode {
1058
- id: get_id(),
1059
- node_type: NodeType::ErrorNode((
1060
- "".to_string(),
1061
- error.clone().unwrap_or_default(),
1062
- )),
1063
- hash: "".to_string(),
1064
- error: Some(
1065
- "FilenameError: Could not convert file name to string!"
1066
- .to_string()
1067
- + &error.unwrap_or_default(),
1068
- ),
1069
- parent: None,
1070
- };
1071
- }
1072
- }
1073
- } else {
1074
- return MerkleNode {
1075
- id: -1,
1076
- node_type: NodeType::ErrorNode((
1077
- "".to_string(),
1078
- error.clone().unwrap_or_default(),
1079
- )),
1080
- hash: "".to_string(),
1081
- error,
1082
- parent: None,
1083
- };
1084
- }
1085
- }
1086
-
1087
- fn fmt(
1088
- &self,
1089
- f: &mut std::fmt::Formatter<'_>,
1090
- indent: usize,
1091
- ) -> std::fmt::Result {
1092
- match &self.node_type {
1093
- NodeType::Branch(node) => {
1094
- // print yourself
1095
- writeln!(f, "{}BRANCH", " ".repeat(indent * 2))?;
1096
- writeln!(
1097
- f,
1098
- "{}ID: {}, Hash: {}\n",
1099
- " ".repeat(indent * 2),
1100
- self.id,
1101
- self.hash
1102
- )?;
1103
-
1104
- for child in &node.1 {
1105
- let child_read = child.try_read();
1106
- if child_read.is_err() {
1107
- writeln!(f, "------")?;
1108
- writeln!(f, "Could not get read lock on a child node!")?;
1109
- writeln!(f, "------")?;
1110
- }
1111
- child_read.unwrap().fmt(f, indent + 1)?;
1112
- }
1113
- }
1114
- NodeType::File(file_name) => {
1115
- writeln!(f, "{}FILE", " ".repeat(indent * 2))?;
1116
- if let Some(error) = &self.error {
1117
- writeln!(
1118
- f,
1119
- "{}ID: {}, File: {}, Hash: {} Error: {}",
1120
- " ".repeat(indent * 2),
1121
- self.id,
1122
- file_name,
1123
- self.hash,
1124
- error
1125
- )?;
1126
- } else {
1127
- writeln!(
1128
- f,
1129
- "{}ID: {}, File: {}, Hash: {}",
1130
- " ".repeat(indent * 2),
1131
- self.id,
1132
- file_name,
1133
- self.hash,
1134
- )?;
1135
- }
1136
- }
1137
- NodeType::ErrorNode(error) => {
1138
- writeln!(f, "{}Error Node:", " ".repeat(indent * 2))?;
1139
- writeln!(f, "{}file: {}", " ".repeat(indent * 2), error.0)?;
1140
- writeln!(f, "{}error: {}", " ".repeat(indent * 2), error.1)?;
1141
- }
1142
- }
1143
- Ok(())
1144
- }
1145
- }
1146
-
1147
- fn compute_hash(file_content: &str) -> String {
1148
- let mut hasher = sha2::Sha256::new();
1149
- hasher.update(file_content);
1150
- let result = hasher.finalize();
1151
-
1152
- format!("{:x}", result)
1153
- }
1154
-
1155
- impl std::fmt::Display for MerkleTree {
1156
- fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1157
- write!(f, "---------------------------------\n")?;
1158
- let root = self.root.try_read();
1159
- if root.is_err() {
1160
- write!(f, "Could not get read lock on root node!")?;
1161
- write!(f, "the root path is: {}", self.root_path)?;
1162
- write!(f, "---------------------------------\n")?;
1163
- return Ok(());
1164
- }
1165
-
1166
- // UNWRAP CHECKED AND FINE
1167
- let root = root.unwrap();
1168
- write!(f, "MerkleTree with root hash: {}\n", root.hash)?;
1169
- write!(f, "Tree:\n{}\n", root)?;
1170
- // dont uncomment: this will be an infinite loop
1171
- // write!(f, "Files: {:#?}\n", self.files)?;
1172
- write!(f, "Number of files: {}\n", self.files.len())?;
1173
- write!(f, "---------------------------------\n")
1174
- }
1175
- }
1176
-
1177
- impl std::fmt::Display for MerkleNode {
1178
- fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1179
- self.fmt(f, 0)
1180
- }
1181
- }
1182
-
1183
- impl std::fmt::Display for NodeType {
1184
- fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1185
- match self {
1186
- NodeType::Branch(_) => write!(f, "Branch"),
1187
- NodeType::File(file_name) => write!(f, "File: {}", file_name),
1188
- NodeType::ErrorNode((file_name, error)) => {
1189
- write!(f, "ErrorNode: {} {}", file_name, error)
1190
- }
1191
- }
1192
- }
1193
- }