dirshot 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
dirshot/dirshot.py CHANGED
@@ -1,6 +1,7 @@
1
1
  import os
2
2
  import sys
3
3
  import re
4
+ import time
4
5
  from pathlib import Path
5
6
  from dataclasses import dataclass, field
6
7
  from typing import List, Optional, Set, Tuple, Callable, NamedTuple, Dict, Any
@@ -13,21 +14,81 @@ try:
13
14
  from tqdm import tqdm
14
15
  except ImportError:
15
16
 
17
+ # Define a functional fallback dummy tqdm class if the import fails.
16
18
  class tqdm:
17
- def __init__(self, iterable=None, **kwargs):
19
+ """A simple, text-based progress bar fallback if tqdm is not installed."""
20
+
21
+ def __init__(
22
+ self, iterable=None, total=None, desc="", unit="it", postfix=None, **kwargs
23
+ ):
18
24
  self.iterable = iterable
25
+ self.total = (
26
+ total
27
+ if total is not None
28
+ else (len(iterable) if hasattr(iterable, "__len__") else None)
29
+ )
30
+ self.desc = desc
31
+ self.unit = unit
32
+ self.current = 0
33
+ self.start_time = time.time()
34
+ self._last_update_time = 0
35
+ self._postfix = postfix or {}
19
36
 
20
37
  def __iter__(self):
21
- return iter(self.iterable)
38
+ if self.iterable is None:
39
+ raise TypeError("tqdm fallback must be initialized with an iterable.")
40
+ for obj in self.iterable:
41
+ yield obj
42
+ self.update(1)
43
+ self.close()
22
44
 
23
45
  def update(self, n=1):
24
- pass
25
-
26
- def set_description(self, desc):
27
- pass
46
+ """Update the progress bar by n steps."""
47
+ self.current += n
48
+ now = time.time()
49
+ if (
50
+ self.total is None
51
+ or now - self._last_update_time > 0.1
52
+ or self.current == self.total
53
+ ):
54
+ self._last_update_time = now
55
+ self._draw()
56
+
57
+ def set_description(self, desc: str):
58
+ """Set the description of the progress bar."""
59
+ self.desc = desc
60
+ self._draw()
61
+
62
+ def set_postfix_str(self, s: str):
63
+ self._postfix["info"] = s
64
+ self._draw()
65
+
66
+ def _draw(self):
67
+ """Draw the progress bar to the console."""
68
+ postfix_str = ", ".join([f"{k}={v}" for k, v in self._postfix.items()])
69
+
70
+ if self.total and self.total > 0:
71
+ percent = int((self.current / self.total) * 100)
72
+ bar_length = 25
73
+ filled_length = int(bar_length * self.current // self.total)
74
+ bar = "█" * filled_length + "-" * (bar_length - filled_length)
75
+ progress_line = f"\r{self.desc}: {percent}%|{bar}| {self.current}/{self.total} [{self.unit}]"
76
+ else: # Case where total is not known
77
+ progress_line = f"\r{self.desc}: {self.current} {self.unit}"
78
+
79
+ if postfix_str:
80
+ progress_line += f" [{postfix_str}]"
81
+
82
+ # Pad with spaces to clear previous, longer lines
83
+ terminal_width = 80
84
+ sys.stdout.write(progress_line.ljust(terminal_width))
85
+ sys.stdout.flush()
28
86
 
29
87
  def close(self):
30
- pass
88
+ """Clean up the progress bar line."""
89
+ # Print a newline to move off the progress bar line
90
+ sys.stdout.write("\n")
91
+ sys.stdout.flush()
31
92
 
32
93
 
33
94
  # --- Configuration Constants ---
@@ -38,6 +99,72 @@ TREE_HEADER_TEXT = "Project File Structure"
38
99
  FILE_HEADER_PREFIX = "FILE: "
39
100
  TOKEN_APPROX_MODE = "CHAR_COUNT"
40
101
 
102
+ # List of binary file extensions to skip during content search
103
+ BINARY_FILE_EXTENSIONS = {
104
+ # Images
105
+ ".png",
106
+ ".jpg",
107
+ ".jpeg",
108
+ ".gif",
109
+ ".bmp",
110
+ ".ico",
111
+ ".tiff",
112
+ ".webp",
113
+ # Documents
114
+ ".pdf",
115
+ ".doc",
116
+ ".docx",
117
+ ".xls",
118
+ ".xlsx",
119
+ ".ppt",
120
+ ".pptx",
121
+ ".odt",
122
+ ".ods",
123
+ # Archives
124
+ ".zip",
125
+ ".gz",
126
+ ".tar",
127
+ ".rar",
128
+ ".7z",
129
+ ".bz2",
130
+ ".xz",
131
+ # Executables & Binaries
132
+ ".exe",
133
+ ".dll",
134
+ ".so",
135
+ ".o",
136
+ ".a",
137
+ ".lib",
138
+ ".bin",
139
+ ".dat",
140
+ ".db",
141
+ ".sqlite",
142
+ ".img",
143
+ ".iso",
144
+ # Compiled Code
145
+ ".class",
146
+ ".jar",
147
+ ".war",
148
+ ".pyc",
149
+ ".pyo",
150
+ # Audio/Video
151
+ ".mp3",
152
+ ".wav",
153
+ ".flac",
154
+ ".ogg",
155
+ ".mp4",
156
+ ".mkv",
157
+ ".avi",
158
+ ".mov",
159
+ ".wmv",
160
+ # Fonts
161
+ ".ttf",
162
+ ".otf",
163
+ ".woff",
164
+ ".woff2",
165
+ }
166
+
167
+
41
168
  # --- Public Enums for Import and Usage ---
42
169
 
43
170
 
@@ -223,17 +350,25 @@ def process_file_for_search(
223
350
  search_file_contents: bool,
224
351
  full_path_compare: bool,
225
352
  ) -> Optional[Path]:
353
+ """
354
+ Checks a single file for keyword matches. Skips content search for binary files.
355
+ """
226
356
  compare_target = str(file_path) if full_path_compare else file_path.name
227
357
  if any(key in compare_target.lower() for key in normalized_keywords):
228
358
  return file_path
359
+
229
360
  if search_file_contents:
361
+ # Before reading content, check if it's a known binary file type
362
+ if file_path.suffix.lower() in BINARY_FILE_EXTENSIONS:
363
+ return None # Do not attempt to read binary file content
364
+
230
365
  try:
231
366
  with open(str(file_path), "r", encoding="utf-8", errors="ignore") as f:
232
367
  for line in f:
233
368
  if any(key in line.lower() for key in normalized_keywords):
234
369
  return file_path
235
370
  except (IOError, OSError):
236
- pass
371
+ pass # Ignore files that can't be opened
237
372
  return None
238
373
 
239
374
 
@@ -437,13 +572,12 @@ def _collate_content_to_file(
437
572
  ) -> None:
438
573
  """
439
574
  Collates content to a string buffer, calculates token count,
440
- and then writes to the output file.
575
+ and then writes to the output file with a progress bar.
441
576
  """
442
577
  output_file_path = Path(output_file_path_str).resolve()
443
578
  output_file_path.parent.mkdir(parents=True, exist_ok=True)
444
579
  separator_line = separator_char * separator_line_len
445
580
 
446
- # Use an in-memory buffer to build the output first
447
581
  buffer = StringIO()
448
582
 
449
583
  if tree_content_lines:
@@ -455,37 +589,41 @@ def _collate_content_to_file(
455
589
  "Key: [I: Included f/d | T: Total f/d in original dir]\n"
456
590
  " (f=files, d=directories)\n\n"
457
591
  )
458
- else: # ProjectMode.SEARCH
592
+ else:
459
593
  stats_key = (
460
- "Key: [M: Matched files/dirs]\n"
461
- " (f=files, d=directories)\n\n"
594
+ "Key: [M: Matched files/dirs]\n" " (f=files, d=directories)\n\n"
462
595
  )
463
596
  buffer.write(stats_key)
464
- tree_content = "\n".join(tree_content_lines)
465
- buffer.write(tree_content + "\n")
597
+ buffer.write("\n".join(tree_content_lines) + "\n")
466
598
  buffer.write(f"\n{separator_line}\n\n")
467
599
 
468
- for file_info in files_to_process:
469
- header_content = f"{separator_line}\n{FILE_HEADER_PREFIX}{file_info.relative_path_posix}\n{separator_line}\n\n"
470
- buffer.write(header_content)
471
- try:
472
- with open(
473
- file_info.absolute_path, "r", encoding=encoding, errors="replace"
474
- ) as infile:
475
- file_content = infile.read()
476
- buffer.write(file_content)
477
- buffer.write("\n\n")
478
- except Exception:
479
- buffer.write(
480
- f"Error: Could not read file '{file_info.relative_path_posix}'.\n\n"
481
- )
482
-
483
- if not files_to_process and not tree_content_lines:
484
- buffer.write(
485
- "No files found matching the specified criteria for content aggregation.\n"
600
+ if not files_to_process:
601
+ message = (
602
+ "No files found matching the specified criteria.\n"
603
+ if mode == ProjectMode.SEARCH
604
+ else "No files found matching specified criteria for content aggregation.\n"
486
605
  )
606
+ buffer.write(message)
607
+ else:
608
+
609
+ collation_bar = tqdm(
610
+ files_to_process, desc="Phase 3: Collating files", unit="file", leave=False
611
+ )
612
+ for file_info in collation_bar:
613
+ collation_bar.set_postfix_str(file_info.relative_path_posix, refresh=True)
614
+ header_content = f"{separator_line}\n{FILE_HEADER_PREFIX}{file_info.relative_path_posix}\n{separator_line}\n\n"
615
+ buffer.write(header_content)
616
+ try:
617
+ with open(
618
+ file_info.absolute_path, "r", encoding=encoding, errors="replace"
619
+ ) as infile:
620
+ buffer.write(infile.read())
621
+ buffer.write("\n\n")
622
+ except Exception:
623
+ buffer.write(
624
+ f"Error: Could not read file '{file_info.relative_path_posix}'.\n\n"
625
+ )
487
626
 
488
- # Get the complete content from the buffer
489
627
  final_content = buffer.getvalue()
490
628
  total_token_count = 0
491
629
  mode_display = "Characters" if TOKEN_APPROX_MODE == "CHAR_COUNT" else "Words"
@@ -496,27 +634,27 @@ def _collate_content_to_file(
496
634
  elif TOKEN_APPROX_MODE == "WORD_COUNT":
497
635
  total_token_count = len(final_content.split())
498
636
 
499
- # Now, write everything to the actual file
500
637
  try:
501
638
  with open(output_file_path, "w", encoding=encoding) as outfile:
502
639
  if show_token_count:
503
- # Add the token count at the top of the file as requested
504
640
  outfile.write(f"Token Count ({mode_display}): {total_token_count}\n\n")
505
-
506
- # Write the main content
507
641
  outfile.write(final_content)
508
642
  except IOError as e:
509
- print(f"Error: Could not write to output file '{output_file_path}': {e}")
643
+ print(f"\nError: Could not write to output file '{output_file_path}': {e}")
510
644
  return
511
645
 
512
- # Final console output remains for user feedback
513
- print(f"\nProcess complete. Output written to: {output_file_path}")
646
+ if mode == ProjectMode.SEARCH:
647
+ if files_to_process:
648
+ print("Success! Collation complete.")
649
+ else:
650
+ print(f"\nProcess complete. Output written to: {output_file_path}")
651
+ if len(files_to_process) > 0:
652
+ print(
653
+ f"Summary: {len(files_to_process)} files selected for content processing."
654
+ )
655
+
514
656
  if show_token_count:
515
657
  print(f"Total Approximated Tokens ({mode_display}): {total_token_count}")
516
- if len(files_to_process) > 0:
517
- print(
518
- f"Summary: {len(files_to_process)} files selected for content processing."
519
- )
520
658
 
521
659
 
522
660
  def filter_and_append_content(
@@ -551,22 +689,34 @@ def filter_and_append_content(
551
689
  else None
552
690
  )
553
691
  files_to_process: List[FileToProcess] = []
554
- for dirpath_str, dirnames, filenames in os.walk(str(root_dir), topdown=True):
555
- current_dir_path = Path(dirpath_str)
556
- orig_dirnames = list(dirnames)
557
- dirnames[:] = []
558
- for d_name in orig_dirnames:
559
- dir_abs_path = current_dir_path / d_name
560
- if _should_include_entry(dir_abs_path, root_dir, criteria, is_dir=True):
561
- dirnames.append(d_name)
562
- for filename in filenames:
563
- file_abs_path = current_dir_path / filename
564
- if _should_include_entry(file_abs_path, root_dir, criteria, is_dir=False):
565
- files_to_process.append(
566
- FileToProcess(
567
- file_abs_path, file_abs_path.relative_to(root_dir).as_posix()
568
- )
692
+
693
+ with tqdm(desc="Discovering files", unit="dir") as discovery_bar:
694
+ for dirpath_str, dirnames, filenames in os.walk(str(root_dir), topdown=True):
695
+ discovery_bar.update(1)
696
+ discovery_bar.set_postfix_str(os.path.basename(dirpath_str), refresh=True)
697
+
698
+ current_dir_path = Path(dirpath_str)
699
+ orig_dirnames = list(dirnames)
700
+ dirnames[:] = [
701
+ d
702
+ for d in orig_dirnames
703
+ if _should_include_entry(
704
+ current_dir_path / d, root_dir, criteria, is_dir=True
569
705
  )
706
+ ]
707
+
708
+ for filename in filenames:
709
+ file_abs_path = current_dir_path / filename
710
+ if _should_include_entry(
711
+ file_abs_path, root_dir, criteria, is_dir=False
712
+ ):
713
+ files_to_process.append(
714
+ FileToProcess(
715
+ file_abs_path,
716
+ file_abs_path.relative_to(root_dir).as_posix(),
717
+ )
718
+ )
719
+
570
720
  files_to_process.sort(key=lambda f_info: f_info.relative_path_posix.lower())
571
721
  _collate_content_to_file(
572
722
  output_file_path_str,
@@ -596,7 +746,7 @@ def search_and_collate_content(
596
746
  show_token_count: bool,
597
747
  show_tree_stats: bool,
598
748
  ) -> None:
599
- """SEARCH MODE: Scans for files that match a substring in their path/name or content."""
749
+ """SEARCH MODE: Scans for files that match a substring with multi-phase progress bars."""
600
750
  criteria = FilterCriteria.normalize_inputs(
601
751
  file_extensions_to_check,
602
752
  None,
@@ -611,19 +761,54 @@ def search_and_collate_content(
611
761
  if not normalized_keywords:
612
762
  print("Error: Search mode requires 'search_keywords' to be provided.")
613
763
  return
764
+
765
+ if criteria.ignore_path_components:
766
+ print(
767
+ f"Ignoring directories containing: {', '.join(sorted(list(criteria.ignore_path_components)))}"
768
+ )
769
+
614
770
  candidate_files: List[Path] = []
615
- for dirpath_str, dirnames, filenames in os.walk(str(root_dir), topdown=True):
616
- current_dir_path = Path(dirpath_str)
617
- dirnames[:] = [
618
- d for d in dirnames if d.lower() not in criteria.ignore_path_components
619
- ]
620
- for filename in filenames:
621
- file_abs_path = current_dir_path / filename
622
- if (
623
- file_abs_path.suffix.lower() in criteria.file_extensions
624
- or not criteria.file_extensions
625
- ):
626
- candidate_files.append(file_abs_path)
771
+
772
+ with tqdm(desc="Phase 1: Discovering files", unit="dir") as discovery_bar:
773
+ for dirpath_str, dirnames, filenames in os.walk(str(root_dir), topdown=True):
774
+ discovery_bar.update(1)
775
+ discovery_bar.set_postfix_str(os.path.basename(dirpath_str), refresh=True)
776
+ current_dir_path = Path(dirpath_str)
777
+ dirnames[:] = [
778
+ d
779
+ for d in dirnames
780
+ if (current_dir_path / d).name.lower()
781
+ not in criteria.ignore_path_components
782
+ ]
783
+
784
+ for filename in filenames:
785
+ file_abs_path = current_dir_path / filename
786
+ try:
787
+ relative_parts = file_abs_path.relative_to(root_dir).parts
788
+ if any(
789
+ part.lower() in criteria.ignore_path_components
790
+ for part in relative_parts
791
+ ):
792
+ continue
793
+ except ValueError:
794
+ continue
795
+
796
+ if (
797
+ not criteria.file_extensions
798
+ or file_abs_path.suffix.lower() in criteria.file_extensions
799
+ ):
800
+ candidate_files.append(file_abs_path)
801
+
802
+ print(f"Discovered {len(candidate_files)} candidate files to process.")
803
+
804
+ if not candidate_files:
805
+ print(
806
+ "\nScan complete. No files matched the initial criteria (extensions and ignores)."
807
+ )
808
+ with open(output_file, "w", encoding=DEFAULT_ENCODING) as f_out:
809
+ f_out.write("No files found matching the specified criteria.\n")
810
+ return
811
+
627
812
  matched_files: Set[Path] = set()
628
813
  with ThreadPoolExecutor(max_workers=max_workers) as executor:
629
814
  future_to_file = {
@@ -636,33 +821,35 @@ def search_and_collate_content(
636
821
  ): file
637
822
  for file in candidate_files
638
823
  }
824
+
825
+ progress_bar_desc = f"Phase 2: Processing {len(candidate_files)} files"
639
826
  progress_bar = tqdm(
640
827
  as_completed(future_to_file),
641
- total=len(candidate_files),
828
+ total=len(future_to_file),
642
829
  unit="file",
643
- desc="Scanning",
830
+ desc=progress_bar_desc,
644
831
  )
832
+
645
833
  for future in progress_bar:
646
834
  result = future.result()
647
835
  if result:
648
836
  matched_files.add(result)
837
+
649
838
  if not matched_files:
650
- print("\nScan complete. No matching files were found.")
651
- _collate_content_to_file(
652
- output_file,
653
- None,
654
- [],
655
- DEFAULT_ENCODING,
656
- DEFAULT_SEPARATOR_CHAR,
657
- DEFAULT_SEPARATOR_LINE_LENGTH,
658
- show_token_count,
659
- show_tree_stats,
660
- ProjectMode.SEARCH,
839
+ print(
840
+ "\nScan complete. No matching files were found after processing keywords."
661
841
  )
842
+ with open(output_file, "w", encoding=DEFAULT_ENCODING) as f_out:
843
+ f_out.write("No files found matching the specified search keywords.\n")
662
844
  return
845
+
663
846
  sorted_matched_files = sorted(
664
847
  list(matched_files), key=lambda p: p.relative_to(root_dir).as_posix().lower()
665
848
  )
849
+
850
+ print(f"Found {len(sorted_matched_files)} matching files.")
851
+ print(f"Generating output file at '{Path(output_file).resolve()}'...")
852
+
666
853
  tree_content_lines = _generate_tree_from_paths(
667
854
  root_dir, sorted_matched_files, tree_style, show_tree_stats
668
855
  )
@@ -670,6 +857,7 @@ def search_and_collate_content(
670
857
  FileToProcess(f, f.relative_to(root_dir).as_posix())
671
858
  for f in sorted_matched_files
672
859
  ]
860
+
673
861
  _collate_content_to_file(
674
862
  output_file,
675
863
  tree_content_lines,
@@ -919,7 +1107,6 @@ __all__ = [
919
1107
  ]
920
1108
 
921
1109
  if __name__ == "__main__":
922
- # --- Example: Scan with Custom Filters and the New Readable Stats ---
923
1110
  print("\n--- Running a custom filter scan with new stats format ---")
924
1111
  filter_project(
925
1112
  root_dir_param=".",
@@ -928,4 +1115,4 @@ if __name__ == "__main__":
928
1115
  ignore_dirs_in_path=["venv", "build", "node_modules", "static", "templates"],
929
1116
  show_tree_stats=True,
930
1117
  show_token_count=True,
931
- )
1118
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dirshot
3
- Version: 0.1.1
3
+ Version: 0.1.3
4
4
  Summary: A flexible utility for creating project snapshots and searching for files.
5
5
  Author-email: init-helpful <init.helpful@gmail.com>
6
6
  Project-URL: Homepage, https://github.com/init-helpful/dirshot
@@ -0,0 +1,7 @@
1
+ dirshot/__init__.py,sha256=ss4HC5VTyD9j6GFGCLMU6VxPlXy0qaGFzXlZB3_d2WM,403
2
+ dirshot/dirshot.py,sha256=2zx4ghzYi5Rsh-C0maHATapF2ArremgRLFWJlWlRu34,40365
3
+ dirshot/examples.py,sha256=q--iNqxmA4xX8nyXYdOP-HPsqzpLHBFo1PTseQ9ki7M,2344
4
+ dirshot-0.1.3.dist-info/METADATA,sha256=9mdpQmEFer0rY-kineW0bSU2OZHHYo7FNw1eDCZ_M4I,4172
5
+ dirshot-0.1.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
6
+ dirshot-0.1.3.dist-info/top_level.txt,sha256=ROGW8gTcmwJ2jJ1Fp7TV1REZLRUGbL3L-Lfoy8tPxOA,8
7
+ dirshot-0.1.3.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- dirshot/__init__.py,sha256=ss4HC5VTyD9j6GFGCLMU6VxPlXy0qaGFzXlZB3_d2WM,403
2
- dirshot/dirshot.py,sha256=ItCwC4BsSbPzBLlHddiFlYsqdB3Hh3PEpwN89EuplIc,34693
3
- dirshot/examples.py,sha256=q--iNqxmA4xX8nyXYdOP-HPsqzpLHBFo1PTseQ9ki7M,2344
4
- dirshot-0.1.1.dist-info/METADATA,sha256=z72qXvnkUFizL4qkdXEXF6QWu3yZs28szf9wuaru4kI,4172
5
- dirshot-0.1.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
6
- dirshot-0.1.1.dist-info/top_level.txt,sha256=ROGW8gTcmwJ2jJ1Fp7TV1REZLRUGbL3L-Lfoy8tPxOA,8
7
- dirshot-0.1.1.dist-info/RECORD,,