dirshot 0.1.2__tar.gz → 0.1.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dirshot
3
- Version: 0.1.2
3
+ Version: 0.1.3
4
4
  Summary: A flexible utility for creating project snapshots and searching for files.
5
5
  Author-email: init-helpful <init.helpful@gmail.com>
6
6
  Project-URL: Homepage, https://github.com/init-helpful/dirshot
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "dirshot"
7
- version = "0.1.2"
7
+ version = "0.1.3"
8
8
  authors = [
9
9
  { name="init-helpful", email="init.helpful@gmail.com" },
10
10
  ]
@@ -1,7 +1,7 @@
1
1
  import os
2
2
  import sys
3
3
  import re
4
- import time # Imported for the fallback progress bar
4
+ import time
5
5
  from pathlib import Path
6
6
  from dataclasses import dataclass, field
7
7
  from typing import List, Optional, Set, Tuple, Callable, NamedTuple, Dict, Any
@@ -18,7 +18,9 @@ except ImportError:
18
18
  class tqdm:
19
19
  """A simple, text-based progress bar fallback if tqdm is not installed."""
20
20
 
21
- def __init__(self, iterable=None, total=None, desc="", unit="it", **kwargs):
21
+ def __init__(
22
+ self, iterable=None, total=None, desc="", unit="it", postfix=None, **kwargs
23
+ ):
22
24
  self.iterable = iterable
23
25
  self.total = (
24
26
  total
@@ -30,21 +32,20 @@ except ImportError:
30
32
  self.current = 0
31
33
  self.start_time = time.time()
32
34
  self._last_update_time = 0
35
+ self._postfix = postfix or {}
33
36
 
34
37
  def __iter__(self):
38
+ if self.iterable is None:
39
+ raise TypeError("tqdm fallback must be initialized with an iterable.")
35
40
  for obj in self.iterable:
36
41
  yield obj
37
42
  self.update(1)
38
- # The loop is finished, ensure the bar is 100% and close
39
- if self.total is not None and self.current < self.total:
40
- self.update(self.total - self.current)
41
43
  self.close()
42
44
 
43
45
  def update(self, n=1):
44
46
  """Update the progress bar by n steps."""
45
47
  self.current += n
46
48
  now = time.time()
47
- # Throttle screen updates to prevent flickering and performance loss
48
49
  if (
49
50
  self.total is None
50
51
  or now - self._last_update_time > 0.1
@@ -58,19 +59,29 @@ except ImportError:
58
59
  self.desc = desc
59
60
  self._draw()
60
61
 
62
+ def set_postfix_str(self, s: str):
63
+ self._postfix["info"] = s
64
+ self._draw()
65
+
61
66
  def _draw(self):
62
67
  """Draw the progress bar to the console."""
63
- if self.total:
68
+ postfix_str = ", ".join([f"{k}={v}" for k, v in self._postfix.items()])
69
+
70
+ if self.total and self.total > 0:
64
71
  percent = int((self.current / self.total) * 100)
65
72
  bar_length = 25
66
73
  filled_length = int(bar_length * self.current // self.total)
67
74
  bar = "█" * filled_length + "-" * (bar_length - filled_length)
68
- # Use carriage return to print on the same line
69
75
  progress_line = f"\r{self.desc}: {percent}%|{bar}| {self.current}/{self.total} [{self.unit}]"
70
- sys.stdout.write(progress_line)
71
76
  else: # Case where total is not known
72
- sys.stdout.write(f"\r{self.desc}: {self.current} {self.unit}")
77
+ progress_line = f"\r{self.desc}: {self.current} {self.unit}"
78
+
79
+ if postfix_str:
80
+ progress_line += f" [{postfix_str}]"
73
81
 
82
+ # Pad with spaces to clear previous, longer lines
83
+ terminal_width = 80
84
+ sys.stdout.write(progress_line.ljust(terminal_width))
74
85
  sys.stdout.flush()
75
86
 
76
87
  def close(self):
@@ -88,6 +99,72 @@ TREE_HEADER_TEXT = "Project File Structure"
88
99
  FILE_HEADER_PREFIX = "FILE: "
89
100
  TOKEN_APPROX_MODE = "CHAR_COUNT"
90
101
 
102
+ # List of binary file extensions to skip during content search
103
+ BINARY_FILE_EXTENSIONS = {
104
+ # Images
105
+ ".png",
106
+ ".jpg",
107
+ ".jpeg",
108
+ ".gif",
109
+ ".bmp",
110
+ ".ico",
111
+ ".tiff",
112
+ ".webp",
113
+ # Documents
114
+ ".pdf",
115
+ ".doc",
116
+ ".docx",
117
+ ".xls",
118
+ ".xlsx",
119
+ ".ppt",
120
+ ".pptx",
121
+ ".odt",
122
+ ".ods",
123
+ # Archives
124
+ ".zip",
125
+ ".gz",
126
+ ".tar",
127
+ ".rar",
128
+ ".7z",
129
+ ".bz2",
130
+ ".xz",
131
+ # Executables & Binaries
132
+ ".exe",
133
+ ".dll",
134
+ ".so",
135
+ ".o",
136
+ ".a",
137
+ ".lib",
138
+ ".bin",
139
+ ".dat",
140
+ ".db",
141
+ ".sqlite",
142
+ ".img",
143
+ ".iso",
144
+ # Compiled Code
145
+ ".class",
146
+ ".jar",
147
+ ".war",
148
+ ".pyc",
149
+ ".pyo",
150
+ # Audio/Video
151
+ ".mp3",
152
+ ".wav",
153
+ ".flac",
154
+ ".ogg",
155
+ ".mp4",
156
+ ".mkv",
157
+ ".avi",
158
+ ".mov",
159
+ ".wmv",
160
+ # Fonts
161
+ ".ttf",
162
+ ".otf",
163
+ ".woff",
164
+ ".woff2",
165
+ }
166
+
167
+
91
168
  # --- Public Enums for Import and Usage ---
92
169
 
93
170
 
@@ -273,17 +350,25 @@ def process_file_for_search(
273
350
  search_file_contents: bool,
274
351
  full_path_compare: bool,
275
352
  ) -> Optional[Path]:
353
+ """
354
+ Checks a single file for keyword matches. Skips content search for binary files.
355
+ """
276
356
  compare_target = str(file_path) if full_path_compare else file_path.name
277
357
  if any(key in compare_target.lower() for key in normalized_keywords):
278
358
  return file_path
359
+
279
360
  if search_file_contents:
361
+ # Before reading content, check if it's a known binary file type
362
+ if file_path.suffix.lower() in BINARY_FILE_EXTENSIONS:
363
+ return None # Do not attempt to read binary file content
364
+
280
365
  try:
281
366
  with open(str(file_path), "r", encoding="utf-8", errors="ignore") as f:
282
367
  for line in f:
283
368
  if any(key in line.lower() for key in normalized_keywords):
284
369
  return file_path
285
370
  except (IOError, OSError):
286
- pass
371
+ pass # Ignore files that can't be opened
287
372
  return None
288
373
 
289
374
 
@@ -487,13 +572,12 @@ def _collate_content_to_file(
487
572
  ) -> None:
488
573
  """
489
574
  Collates content to a string buffer, calculates token count,
490
- and then writes to the output file.
575
+ and then writes to the output file with a progress bar.
491
576
  """
492
577
  output_file_path = Path(output_file_path_str).resolve()
493
578
  output_file_path.parent.mkdir(parents=True, exist_ok=True)
494
579
  separator_line = separator_char * separator_line_len
495
580
 
496
- # Use an in-memory buffer to build the output first
497
581
  buffer = StringIO()
498
582
 
499
583
  if tree_content_lines:
@@ -505,40 +589,41 @@ def _collate_content_to_file(
505
589
  "Key: [I: Included f/d | T: Total f/d in original dir]\n"
506
590
  " (f=files, d=directories)\n\n"
507
591
  )
508
- else: # ProjectMode.SEARCH
592
+ else:
509
593
  stats_key = (
510
594
  "Key: [M: Matched files/dirs]\n" " (f=files, d=directories)\n\n"
511
595
  )
512
596
  buffer.write(stats_key)
513
- tree_content = "\n".join(tree_content_lines)
514
- buffer.write(tree_content + "\n")
597
+ buffer.write("\n".join(tree_content_lines) + "\n")
515
598
  buffer.write(f"\n{separator_line}\n\n")
516
599
 
517
- # This message is for the file content, not the console.
518
600
  if not files_to_process:
519
601
  message = (
520
602
  "No files found matching the specified criteria.\n"
521
603
  if mode == ProjectMode.SEARCH
522
- else "No files found matching the specified criteria for content aggregation.\n"
604
+ else "No files found matching specified criteria for content aggregation.\n"
523
605
  )
524
606
  buffer.write(message)
525
607
  else:
526
- for file_info in files_to_process:
608
+
609
+ collation_bar = tqdm(
610
+ files_to_process, desc="Phase 3: Collating files", unit="file", leave=False
611
+ )
612
+ for file_info in collation_bar:
613
+ collation_bar.set_postfix_str(file_info.relative_path_posix, refresh=True)
527
614
  header_content = f"{separator_line}\n{FILE_HEADER_PREFIX}{file_info.relative_path_posix}\n{separator_line}\n\n"
528
615
  buffer.write(header_content)
529
616
  try:
530
617
  with open(
531
618
  file_info.absolute_path, "r", encoding=encoding, errors="replace"
532
619
  ) as infile:
533
- file_content = infile.read()
534
- buffer.write(file_content)
620
+ buffer.write(infile.read())
535
621
  buffer.write("\n\n")
536
622
  except Exception:
537
623
  buffer.write(
538
624
  f"Error: Could not read file '{file_info.relative_path_posix}'.\n\n"
539
625
  )
540
626
 
541
- # Get the complete content from the buffer
542
627
  final_content = buffer.getvalue()
543
628
  total_token_count = 0
544
629
  mode_display = "Characters" if TOKEN_APPROX_MODE == "CHAR_COUNT" else "Words"
@@ -549,24 +634,19 @@ def _collate_content_to_file(
549
634
  elif TOKEN_APPROX_MODE == "WORD_COUNT":
550
635
  total_token_count = len(final_content.split())
551
636
 
552
- # Now, write everything to the actual file
553
637
  try:
554
638
  with open(output_file_path, "w", encoding=encoding) as outfile:
555
639
  if show_token_count:
556
- # Add the token count at the top of the file as requested
557
640
  outfile.write(f"Token Count ({mode_display}): {total_token_count}\n\n")
558
-
559
- # Write the main content
560
641
  outfile.write(final_content)
561
642
  except IOError as e:
562
643
  print(f"\nError: Could not write to output file '{output_file_path}': {e}")
563
644
  return
564
645
 
565
- # Final console output for user feedback
566
646
  if mode == ProjectMode.SEARCH:
567
647
  if files_to_process:
568
- print("\nSuccess! Collation complete.")
569
- else: # Filter mode has its own messaging pattern
648
+ print("Success! Collation complete.")
649
+ else:
570
650
  print(f"\nProcess complete. Output written to: {output_file_path}")
571
651
  if len(files_to_process) > 0:
572
652
  print(
@@ -609,22 +689,34 @@ def filter_and_append_content(
609
689
  else None
610
690
  )
611
691
  files_to_process: List[FileToProcess] = []
612
- for dirpath_str, dirnames, filenames in os.walk(str(root_dir), topdown=True):
613
- current_dir_path = Path(dirpath_str)
614
- orig_dirnames = list(dirnames)
615
- dirnames[:] = []
616
- for d_name in orig_dirnames:
617
- dir_abs_path = current_dir_path / d_name
618
- if _should_include_entry(dir_abs_path, root_dir, criteria, is_dir=True):
619
- dirnames.append(d_name)
620
- for filename in filenames:
621
- file_abs_path = current_dir_path / filename
622
- if _should_include_entry(file_abs_path, root_dir, criteria, is_dir=False):
623
- files_to_process.append(
624
- FileToProcess(
625
- file_abs_path, file_abs_path.relative_to(root_dir).as_posix()
626
- )
692
+
693
+ with tqdm(desc="Discovering files", unit="dir") as discovery_bar:
694
+ for dirpath_str, dirnames, filenames in os.walk(str(root_dir), topdown=True):
695
+ discovery_bar.update(1)
696
+ discovery_bar.set_postfix_str(os.path.basename(dirpath_str), refresh=True)
697
+
698
+ current_dir_path = Path(dirpath_str)
699
+ orig_dirnames = list(dirnames)
700
+ dirnames[:] = [
701
+ d
702
+ for d in orig_dirnames
703
+ if _should_include_entry(
704
+ current_dir_path / d, root_dir, criteria, is_dir=True
627
705
  )
706
+ ]
707
+
708
+ for filename in filenames:
709
+ file_abs_path = current_dir_path / filename
710
+ if _should_include_entry(
711
+ file_abs_path, root_dir, criteria, is_dir=False
712
+ ):
713
+ files_to_process.append(
714
+ FileToProcess(
715
+ file_abs_path,
716
+ file_abs_path.relative_to(root_dir).as_posix(),
717
+ )
718
+ )
719
+
628
720
  files_to_process.sort(key=lambda f_info: f_info.relative_path_posix.lower())
629
721
  _collate_content_to_file(
630
722
  output_file_path_str,
@@ -654,7 +746,7 @@ def search_and_collate_content(
654
746
  show_token_count: bool,
655
747
  show_tree_stats: bool,
656
748
  ) -> None:
657
- """SEARCH MODE: Scans for files that match a substring in their path/name or content."""
749
+ """SEARCH MODE: Scans for files that match a substring with multi-phase progress bars."""
658
750
  criteria = FilterCriteria.normalize_inputs(
659
751
  file_extensions_to_check,
660
752
  None,
@@ -670,41 +762,52 @@ def search_and_collate_content(
670
762
  print("Error: Search mode requires 'search_keywords' to be provided.")
671
763
  return
672
764
 
673
- print("Phase 1: Finding all matching files...")
674
765
  if criteria.ignore_path_components:
675
766
  print(
676
- f"Ignoring directories and files containing: {', '.join(criteria.ignore_path_components)}"
767
+ f"Ignoring directories containing: {', '.join(sorted(list(criteria.ignore_path_components)))}"
677
768
  )
678
769
 
679
770
  candidate_files: List[Path] = []
680
- for dirpath_str, dirnames, filenames in os.walk(str(root_dir), topdown=True):
681
- current_dir_path = Path(dirpath_str)
682
- # Prune directories based on ignore criteria
683
- dirnames[:] = [
684
- d
685
- for d in dirnames
686
- if (current_dir_path / d).name.lower()
687
- not in criteria.ignore_path_components
688
- ]
689
771
 
690
- for filename in filenames:
691
- file_abs_path = current_dir_path / filename
692
- # Also ignore individual files based on path components
693
- try:
694
- relative_parts = file_abs_path.relative_to(root_dir).parts
695
- if any(
696
- part.lower() in criteria.ignore_path_components
697
- for part in relative_parts
698
- ):
772
+ with tqdm(desc="Phase 1: Discovering files", unit="dir") as discovery_bar:
773
+ for dirpath_str, dirnames, filenames in os.walk(str(root_dir), topdown=True):
774
+ discovery_bar.update(1)
775
+ discovery_bar.set_postfix_str(os.path.basename(dirpath_str), refresh=True)
776
+ current_dir_path = Path(dirpath_str)
777
+ dirnames[:] = [
778
+ d
779
+ for d in dirnames
780
+ if (current_dir_path / d).name.lower()
781
+ not in criteria.ignore_path_components
782
+ ]
783
+
784
+ for filename in filenames:
785
+ file_abs_path = current_dir_path / filename
786
+ try:
787
+ relative_parts = file_abs_path.relative_to(root_dir).parts
788
+ if any(
789
+ part.lower() in criteria.ignore_path_components
790
+ for part in relative_parts
791
+ ):
792
+ continue
793
+ except ValueError:
699
794
  continue
700
- except ValueError:
701
- continue
702
795
 
703
- if (
704
- not criteria.file_extensions
705
- or file_abs_path.suffix.lower() in criteria.file_extensions
706
- ):
707
- candidate_files.append(file_abs_path)
796
+ if (
797
+ not criteria.file_extensions
798
+ or file_abs_path.suffix.lower() in criteria.file_extensions
799
+ ):
800
+ candidate_files.append(file_abs_path)
801
+
802
+ print(f"Discovered {len(candidate_files)} candidate files to process.")
803
+
804
+ if not candidate_files:
805
+ print(
806
+ "\nScan complete. No files matched the initial criteria (extensions and ignores)."
807
+ )
808
+ with open(output_file, "w", encoding=DEFAULT_ENCODING) as f_out:
809
+ f_out.write("No files found matching the specified criteria.\n")
810
+ return
708
811
 
709
812
  matched_files: Set[Path] = set()
710
813
  with ThreadPoolExecutor(max_workers=max_workers) as executor:
@@ -718,30 +821,34 @@ def search_and_collate_content(
718
821
  ): file
719
822
  for file in candidate_files
720
823
  }
824
+
825
+ progress_bar_desc = f"Phase 2: Processing {len(candidate_files)} files"
721
826
  progress_bar = tqdm(
722
827
  as_completed(future_to_file),
723
- total=len(candidate_files),
828
+ total=len(future_to_file),
724
829
  unit="file",
725
- desc="Scanning",
830
+ desc=progress_bar_desc,
726
831
  )
832
+
727
833
  for future in progress_bar:
728
834
  result = future.result()
729
835
  if result:
730
836
  matched_files.add(result)
731
837
 
732
838
  if not matched_files:
733
- print("\nScan complete. No matching files were found.")
734
- # Still create the output file with a "not found" message
839
+ print(
840
+ "\nScan complete. No matching files were found after processing keywords."
841
+ )
735
842
  with open(output_file, "w", encoding=DEFAULT_ENCODING) as f_out:
736
- f_out.write("No files found matching the specified criteria.\n")
843
+ f_out.write("No files found matching the specified search keywords.\n")
737
844
  return
738
845
 
739
846
  sorted_matched_files = sorted(
740
847
  list(matched_files), key=lambda p: p.relative_to(root_dir).as_posix().lower()
741
848
  )
742
849
 
743
- print(f"\nPhase 1 Complete: Found {len(sorted_matched_files)} matching files.")
744
- print(f"\nPhase 2: Generating output file at '{Path(output_file).resolve()}'...")
850
+ print(f"Found {len(sorted_matched_files)} matching files.")
851
+ print(f"Generating output file at '{Path(output_file).resolve()}'...")
745
852
 
746
853
  tree_content_lines = _generate_tree_from_paths(
747
854
  root_dir, sorted_matched_files, tree_style, show_tree_stats
@@ -750,6 +857,7 @@ def search_and_collate_content(
750
857
  FileToProcess(f, f.relative_to(root_dir).as_posix())
751
858
  for f in sorted_matched_files
752
859
  ]
860
+
753
861
  _collate_content_to_file(
754
862
  output_file,
755
863
  tree_content_lines,
@@ -999,7 +1107,6 @@ __all__ = [
999
1107
  ]
1000
1108
 
1001
1109
  if __name__ == "__main__":
1002
- # --- Example: Scan with Custom Filters and the New Readable Stats ---
1003
1110
  print("\n--- Running a custom filter scan with new stats format ---")
1004
1111
  filter_project(
1005
1112
  root_dir_param=".",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dirshot
3
- Version: 0.1.2
3
+ Version: 0.1.3
4
4
  Summary: A flexible utility for creating project snapshots and searching for files.
5
5
  Author-email: init-helpful <init.helpful@gmail.com>
6
6
  Project-URL: Homepage, https://github.com/init-helpful/dirshot
File without changes
File without changes
File without changes
File without changes
File without changes