dirshot 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
dirshot/__init__.py ADDED
@@ -0,0 +1,21 @@
1
+ from .dirshot import (
2
+ process_project,
3
+ filter_project,
4
+ find_in_project,
5
+ deconstruct_snapshot,
6
+ ProjectMode,
7
+ LanguagePreset,
8
+ IgnorePreset,
9
+ TreeStylePreset,
10
+ )
11
+
12
+ __all__ = [
13
+ "process_project",
14
+ "filter_project",
15
+ "find_in_project",
16
+ "deconstruct_snapshot",
17
+ "ProjectMode",
18
+ "LanguagePreset",
19
+ "IgnorePreset",
20
+ "TreeStylePreset",
21
+ ]
dirshot/dirshot.py ADDED
@@ -0,0 +1,931 @@
1
+ import os
2
+ import sys
3
+ import re
4
+ from pathlib import Path
5
+ from dataclasses import dataclass, field
6
+ from typing import List, Optional, Set, Tuple, Callable, NamedTuple, Dict, Any
7
+ from enum import Enum
8
+ from concurrent.futures import ThreadPoolExecutor, as_completed
9
+ from io import StringIO
10
+
11
+ # --- TQDM Dependency Handler ---
12
+ try:
13
+ from tqdm import tqdm
14
+ except ImportError:
15
+
16
+ class tqdm:
17
+ def __init__(self, iterable=None, **kwargs):
18
+ self.iterable = iterable
19
+
20
+ def __iter__(self):
21
+ return iter(self.iterable)
22
+
23
+ def update(self, n=1):
24
+ pass
25
+
26
+ def set_description(self, desc):
27
+ pass
28
+
29
+ def close(self):
30
+ pass
31
+
32
+
33
+ # --- Configuration Constants ---
34
+ DEFAULT_SEPARATOR_CHAR = "-"
35
+ DEFAULT_SEPARATOR_LINE_LENGTH = 80
36
+ DEFAULT_ENCODING = "utf-8"
37
+ TREE_HEADER_TEXT = "Project File Structure"
38
+ FILE_HEADER_PREFIX = "FILE: "
39
+ TOKEN_APPROX_MODE = "CHAR_COUNT"
40
+
41
+ # --- Public Enums for Import and Usage ---
42
+
43
+
44
+ class ProjectMode(Enum):
45
+ """The mode of operation for the script."""
46
+
47
+ FILTER = "filter"
48
+ SEARCH = "search"
49
+
50
+
51
+ class LanguagePreset(Enum):
52
+ """Predefined sets of file extensions/names for common languages/frameworks."""
53
+
54
+ PYTHON = [
55
+ ".py",
56
+ ".pyw",
57
+ "setup.py",
58
+ "requirements.txt",
59
+ "Pipfile",
60
+ "pyproject.toml",
61
+ ]
62
+ JAVASCRIPT = [".js", ".jsx", ".ts", ".tsx", ".mjs", ".cjs"]
63
+ WEB = [".html", ".css", ".scss", ".less"]
64
+ JAVA = [".java", ".groovy", ".kt", ".gradle", ".properties"]
65
+
66
+
67
+ class IgnorePreset(Enum):
68
+ """Predefined sets of path components and filename substrings to ignore."""
69
+
70
+ VERSION_CONTROL = [".git", ".svn", ".hg", ".idea"]
71
+ NODE_MODULES = ["node_modules", "package-lock.json", "yarn.lock"]
72
+ PYTHON_ENV = ["__pycache__", "venv", ".venv", "env", "lib", "bin"]
73
+ BUILD_ARTIFACTS = ["dist", "build", "target", "out", "temp", "tmp"]
74
+ TEST_FILES = ["test", "spec", "fixture", "example", "mock"]
75
+
76
+
77
+ class TreeStylePreset(Enum):
78
+ """Predefined character sets for directory tree rendering."""
79
+
80
+ UNICODE = ("├── ", "└── ", "│ ", " ")
81
+ ASCII = ("|-- ", "+-- ", "| ", " ")
82
+ COMPACT = ("|---", "`---", "| ", " ")
83
+
84
+ def to_style(self) -> "TreeStyle":
85
+ return TreeStyle(self.value[0], self.value[1], self.value[2], self.value[3])
86
+
87
+
88
+ class TreeStyle(NamedTuple):
89
+ """Holds the characters used to render the directory tree."""
90
+
91
+ t_connector: str
92
+ l_connector: str
93
+ v_connector: str
94
+ h_spacer: str
95
+
96
+
97
+ # --- Helper Data Structures ---
98
+
99
+
100
+ @dataclass
101
+ class FilterCriteria:
102
+ """Holds normalized filter criteria for files and directories."""
103
+
104
+ file_extensions: Set[str] = field(default_factory=set)
105
+ exact_filenames: Set[str] = field(default_factory=set)
106
+ whitelist_fname_substrings: Set[str] = field(default_factory=set)
107
+ ignore_fname_substrings: Set[str] = field(default_factory=set)
108
+ ignore_path_components: Set[str] = field(default_factory=set)
109
+
110
+ @classmethod
111
+ def normalize_inputs(
112
+ cls,
113
+ file_types: Optional[List[str]],
114
+ whitelist_substrings: Optional[List[str]],
115
+ ignore_filename_substrings: Optional[List[str]],
116
+ ignore_path_components_list: Optional[List[str]],
117
+ language_presets: Optional[List[LanguagePreset]] = None,
118
+ ignore_presets: Optional[List[IgnorePreset]] = None,
119
+ ) -> "FilterCriteria":
120
+ all_file_types, all_ignore_paths, all_ignore_fnames = (
121
+ set(file_types or []),
122
+ set(ignore_path_components_list or []),
123
+ set(ignore_filename_substrings or []),
124
+ )
125
+ if language_presets:
126
+ for preset in language_presets:
127
+ all_file_types.update(preset.value)
128
+ if ignore_presets:
129
+ for preset in ignore_presets:
130
+ all_ignore_paths.update(preset.value)
131
+ all_ignore_fnames.update(preset.value)
132
+ norm_exts, norm_exact_fnames = set(), set()
133
+ for ft in all_file_types:
134
+ ft_lower = ft.lower().strip()
135
+ if ft_lower.startswith("."):
136
+ norm_exts.add(ft_lower)
137
+ elif ft_lower:
138
+ norm_exact_fnames.add(ft_lower)
139
+ return cls(
140
+ file_extensions=norm_exts,
141
+ exact_filenames=norm_exact_fnames,
142
+ whitelist_fname_substrings=(
143
+ set(s.lower() for s in whitelist_substrings if s.strip())
144
+ if whitelist_substrings
145
+ else set()
146
+ ),
147
+ ignore_fname_substrings=set(
148
+ s.lower() for s in all_ignore_fnames if s.strip()
149
+ ),
150
+ ignore_path_components=set(
151
+ d.lower() for d in all_ignore_paths if d.strip()
152
+ ),
153
+ )
154
+
155
+
156
+ class FileToProcess(NamedTuple):
157
+ """Represents a file selected for content processing."""
158
+
159
+ absolute_path: Path
160
+ relative_path_posix: str
161
+
162
+
163
+ # --- Helper Functions ---
164
+
165
+
166
+ def validate_root_directory(root_dir_param: Optional[str]) -> Optional[Path]:
167
+ original_param_for_messaging = (
168
+ root_dir_param if root_dir_param else "current working directory"
169
+ )
170
+ try:
171
+ resolved_path = Path(root_dir_param or Path.cwd()).resolve(strict=True)
172
+ except Exception as e:
173
+ print(
174
+ f"Error: Could not resolve root directory '{original_param_for_messaging}': {e}"
175
+ )
176
+ return None
177
+ if not resolved_path.is_dir():
178
+ print(f"Error: Root path '{resolved_path}' is not a directory.")
179
+ return None
180
+ return resolved_path
181
+
182
+
183
+ def _should_include_entry(
184
+ entry_path: Path,
185
+ root_dir: Path,
186
+ criteria: FilterCriteria,
187
+ is_dir: bool,
188
+ log_func: Optional[Callable[[str], None]] = None,
189
+ ) -> bool:
190
+ try:
191
+ relative_path = entry_path.relative_to(root_dir)
192
+ except ValueError:
193
+ return False
194
+ entry_name_lower = entry_path.name.lower()
195
+ if criteria.ignore_path_components and any(
196
+ part.lower() in criteria.ignore_path_components for part in relative_path.parts
197
+ ):
198
+ return False
199
+ if is_dir:
200
+ return True
201
+ file_ext_lower = entry_path.suffix.lower()
202
+ matched_type = (file_ext_lower in criteria.file_extensions) or (
203
+ entry_name_lower in criteria.exact_filenames
204
+ )
205
+ if not criteria.file_extensions and not criteria.exact_filenames:
206
+ matched_type = True
207
+ if not matched_type:
208
+ return False
209
+ if criteria.whitelist_fname_substrings and not any(
210
+ sub in entry_name_lower for sub in criteria.whitelist_fname_substrings
211
+ ):
212
+ return False
213
+ if criteria.ignore_fname_substrings and any(
214
+ sub in entry_name_lower for sub in criteria.ignore_fname_substrings
215
+ ):
216
+ return False
217
+ return True
218
+
219
+
220
+ def process_file_for_search(
221
+ file_path: Path,
222
+ normalized_keywords: List[str],
223
+ search_file_contents: bool,
224
+ full_path_compare: bool,
225
+ ) -> Optional[Path]:
226
+ compare_target = str(file_path) if full_path_compare else file_path.name
227
+ if any(key in compare_target.lower() for key in normalized_keywords):
228
+ return file_path
229
+ if search_file_contents:
230
+ try:
231
+ with open(str(file_path), "r", encoding="utf-8", errors="ignore") as f:
232
+ for line in f:
233
+ if any(key in line.lower() for key in normalized_keywords):
234
+ return file_path
235
+ except (IOError, OSError):
236
+ pass
237
+ return None
238
+
239
+
240
+ def _calculate_total_stats(
241
+ root_dir: Path, criteria: FilterCriteria
242
+ ) -> Dict[Path, Tuple[int, int]]:
243
+ stats: Dict[Path, Tuple[int, int]] = {}
244
+ for dirpath_str, dirnames, filenames in os.walk(str(root_dir), topdown=True):
245
+ current_dir = Path(dirpath_str)
246
+ all_children = [current_dir / d for d in dirnames] + [
247
+ current_dir / f for f in filenames
248
+ ]
249
+ total_files, total_dirs = 0, 0
250
+ for child_path in all_children:
251
+ try:
252
+ is_dir = child_path.is_dir()
253
+ except OSError:
254
+ continue
255
+ if criteria.ignore_path_components:
256
+ try:
257
+ relative_path = child_path.relative_to(root_dir)
258
+ except ValueError:
259
+ continue
260
+ if any(
261
+ part.lower() in criteria.ignore_path_components
262
+ for part in relative_path.parts
263
+ ):
264
+ continue
265
+ if is_dir:
266
+ total_dirs += 1
267
+ else:
268
+ total_files += 1
269
+ stats[current_dir] = (total_files, total_dirs)
270
+ dirnames[:] = [
271
+ d
272
+ for d in dirnames
273
+ if (current_dir / d).name.lower() not in criteria.ignore_path_components
274
+ ]
275
+ return stats
276
+
277
+
278
+ # --- Tree Generation Functions ---
279
+
280
+
281
+ def _generate_tree_lines(
282
+ root_dir: Path, criteria: FilterCriteria, style: TreeStyle, show_stats: bool
283
+ ) -> List[str]:
284
+ """Generates a list of strings representing the directory tree based on criteria, style, and stats."""
285
+ dir_stats: Optional[Dict[Path, Tuple[int, int]]] = (
286
+ _calculate_total_stats(root_dir, criteria) if show_stats else None
287
+ )
288
+ tree_lines: List[str] = []
289
+
290
+ def format_dir_name(
291
+ path: Path, path_name: str, included_files: int, included_dirs: int
292
+ ) -> str:
293
+ if not show_stats or not dir_stats:
294
+ return path_name
295
+ total_files, total_dirs = dir_stats.get(path, (0, 0))
296
+
297
+ stats_str = f" [I: {included_files}f, {included_dirs}d | T: {total_files}f, {total_dirs}d]"
298
+ return path_name + stats_str
299
+
300
+ def _recursive_build(current_path: Path, prefix_parts: List[str]):
301
+ try:
302
+ entries = sorted(current_path.iterdir(), key=lambda p: p.name.lower())
303
+ except OSError as e:
304
+ error_prefix = "".join(prefix_parts) + style.l_connector
305
+ tree_lines.append(
306
+ error_prefix + f"[Error accessing: {current_path.name} - {e.strerror}]"
307
+ )
308
+ return
309
+ displayable_children: List[Tuple[Path, bool]] = []
310
+ for e in entries:
311
+ try:
312
+ is_dir = e.is_dir()
313
+ except OSError:
314
+ continue
315
+ if _should_include_entry(
316
+ e, root_dir, criteria, is_dir=is_dir, log_func=None
317
+ ):
318
+ displayable_children.append((e, is_dir))
319
+ num_children = len(displayable_children)
320
+ included_files_in_level = sum(
321
+ 1 for _, is_dir in displayable_children if not is_dir
322
+ )
323
+ included_dirs_in_level = sum(1 for _, is_dir in displayable_children if is_dir)
324
+ if not prefix_parts:
325
+ tree_lines.append(
326
+ format_dir_name(
327
+ current_path,
328
+ current_path.name,
329
+ included_files_in_level,
330
+ included_dirs_in_level,
331
+ )
332
+ )
333
+ for i, (child_path, child_is_dir) in enumerate(displayable_children):
334
+ is_last = i == num_children - 1
335
+ connector = style.l_connector if is_last else style.t_connector
336
+ entry_name = child_path.name
337
+ if child_is_dir:
338
+ try:
339
+ child_entries = sorted(
340
+ child_path.iterdir(), key=lambda p: p.name.lower()
341
+ )
342
+ child_displayable_children = [
343
+ (e, e.is_dir())
344
+ for e in child_entries
345
+ if _should_include_entry(
346
+ e, root_dir, criteria, is_dir=e.is_dir(), log_func=None
347
+ )
348
+ ]
349
+ child_included_files = sum(
350
+ 1 for _, is_dir in child_displayable_children if not is_dir
351
+ )
352
+ child_included_dirs = sum(
353
+ 1 for _, is_dir in child_displayable_children if is_dir
354
+ )
355
+ entry_name = format_dir_name(
356
+ child_path,
357
+ child_path.name,
358
+ child_included_files,
359
+ child_included_dirs,
360
+ )
361
+ except OSError:
362
+ pass
363
+ tree_lines.append("".join(prefix_parts) + connector + entry_name)
364
+ if child_is_dir:
365
+ new_prefix_parts = prefix_parts + [
366
+ style.h_spacer if is_last else style.v_connector
367
+ ]
368
+ _recursive_build(child_path, new_prefix_parts)
369
+
370
+ _recursive_build(root_dir, [])
371
+ return tree_lines
372
+
373
+
374
+ def _generate_tree_from_paths(
375
+ root_dir: Path, file_paths: List[Path], style: TreeStyle, show_stats: bool
376
+ ) -> List[str]:
377
+ """Generates a directory tree structure from a list of *matched* file paths using the given style."""
378
+ tree_dict: Dict[str, Any] = {}
379
+ matched_paths = {p.relative_to(root_dir) for p in file_paths}
380
+ for rel_path in matched_paths:
381
+ parts = rel_path.parts
382
+ current_level = tree_dict
383
+ for part in parts:
384
+ current_level = current_level.setdefault(part, {})
385
+ tree_lines: List[str] = []
386
+
387
+ def format_dir_name_search(name: str, matched_files: int, matched_dirs: int) -> str:
388
+ if not show_stats:
389
+ return name
390
+
391
+ stats_str = f" [M: {matched_files}f, {matched_dirs}d]"
392
+ return name + stats_str
393
+
394
+ def build_lines(d: Dict[str, Any], prefix: str):
395
+ items = sorted(d.keys(), key=lambda k: (len(d[k]) == 0, k.lower()))
396
+ num_children = len(items)
397
+ matched_files_in_level = sum(1 for k in items if not d[k])
398
+ matched_dirs_in_level = sum(1 for k in items if d[k])
399
+ if not prefix:
400
+ tree_lines.append(
401
+ format_dir_name_search(
402
+ root_dir.name, matched_files_in_level, matched_dirs_in_level
403
+ )
404
+ )
405
+ for i, name in enumerate(items):
406
+ is_last = i == num_children - 1
407
+ connector = style.l_connector if is_last else style.t_connector
408
+ entry_name = name
409
+ if d[name]:
410
+ child_matched_files = sum(1 for k in d[name] if not d[name][k])
411
+ child_matched_dirs = sum(1 for k in d[name] if d[name][k])
412
+ entry_name = format_dir_name_search(
413
+ name, child_matched_files, child_matched_dirs
414
+ )
415
+ tree_lines.append(prefix + connector + entry_name)
416
+ if d[name]:
417
+ extension = style.h_spacer if is_last else style.v_connector
418
+ build_lines(d[name], prefix + extension)
419
+
420
+ build_lines(tree_dict, "")
421
+ return tree_lines
422
+
423
+
424
+ # --- Collation and Main Modes ---
425
+
426
+
427
+ def _collate_content_to_file(
428
+ output_file_path_str: str,
429
+ tree_content_lines: Optional[List[str]],
430
+ files_to_process: List[FileToProcess],
431
+ encoding: str,
432
+ separator_char: str,
433
+ separator_line_len: int,
434
+ show_token_count: bool,
435
+ show_tree_stats: bool,
436
+ mode: ProjectMode,
437
+ ) -> None:
438
+ """
439
+ Collates content to a string buffer, calculates token count,
440
+ and then writes to the output file.
441
+ """
442
+ output_file_path = Path(output_file_path_str).resolve()
443
+ output_file_path.parent.mkdir(parents=True, exist_ok=True)
444
+ separator_line = separator_char * separator_line_len
445
+
446
+ # Use an in-memory buffer to build the output first
447
+ buffer = StringIO()
448
+
449
+ if tree_content_lines:
450
+ buffer.write(f"{TREE_HEADER_TEXT}\n{separator_line}\n\n")
451
+ stats_key = ""
452
+ if show_tree_stats:
453
+ if mode == ProjectMode.FILTER:
454
+ stats_key = (
455
+ "Key: [I: Included f/d | T: Total f/d in original dir]\n"
456
+ " (f=files, d=directories)\n\n"
457
+ )
458
+ else: # ProjectMode.SEARCH
459
+ stats_key = (
460
+ "Key: [M: Matched files/dirs]\n"
461
+ " (f=files, d=directories)\n\n"
462
+ )
463
+ buffer.write(stats_key)
464
+ tree_content = "\n".join(tree_content_lines)
465
+ buffer.write(tree_content + "\n")
466
+ buffer.write(f"\n{separator_line}\n\n")
467
+
468
+ for file_info in files_to_process:
469
+ header_content = f"{separator_line}\n{FILE_HEADER_PREFIX}{file_info.relative_path_posix}\n{separator_line}\n\n"
470
+ buffer.write(header_content)
471
+ try:
472
+ with open(
473
+ file_info.absolute_path, "r", encoding=encoding, errors="replace"
474
+ ) as infile:
475
+ file_content = infile.read()
476
+ buffer.write(file_content)
477
+ buffer.write("\n\n")
478
+ except Exception:
479
+ buffer.write(
480
+ f"Error: Could not read file '{file_info.relative_path_posix}'.\n\n"
481
+ )
482
+
483
+ if not files_to_process and not tree_content_lines:
484
+ buffer.write(
485
+ "No files found matching the specified criteria for content aggregation.\n"
486
+ )
487
+
488
+ # Get the complete content from the buffer
489
+ final_content = buffer.getvalue()
490
+ total_token_count = 0
491
+ mode_display = "Characters" if TOKEN_APPROX_MODE == "CHAR_COUNT" else "Words"
492
+
493
+ if show_token_count:
494
+ if TOKEN_APPROX_MODE == "CHAR_COUNT":
495
+ total_token_count = len(final_content)
496
+ elif TOKEN_APPROX_MODE == "WORD_COUNT":
497
+ total_token_count = len(final_content.split())
498
+
499
+ # Now, write everything to the actual file
500
+ try:
501
+ with open(output_file_path, "w", encoding=encoding) as outfile:
502
+ if show_token_count:
503
+ # Add the token count at the top of the file as requested
504
+ outfile.write(f"Token Count ({mode_display}): {total_token_count}\n\n")
505
+
506
+ # Write the main content
507
+ outfile.write(final_content)
508
+ except IOError as e:
509
+ print(f"Error: Could not write to output file '{output_file_path}': {e}")
510
+ return
511
+
512
+ # Final console output remains for user feedback
513
+ print(f"\nProcess complete. Output written to: {output_file_path}")
514
+ if show_token_count:
515
+ print(f"Total Approximated Tokens ({mode_display}): {total_token_count}")
516
+ if len(files_to_process) > 0:
517
+ print(
518
+ f"Summary: {len(files_to_process)} files selected for content processing."
519
+ )
520
+
521
+
522
+ def filter_and_append_content(
523
+ root_dir: Path,
524
+ output_file_path_str: str,
525
+ tree_style: TreeStyle,
526
+ generate_tree: bool,
527
+ file_types: Optional[List[str]],
528
+ whitelist_substrings_in_filename: Optional[List[str]],
529
+ ignore_substrings_in_filename: Optional[List[str]],
530
+ ignore_dirs_in_path: Optional[List[str]],
531
+ language_presets: Optional[List[LanguagePreset]],
532
+ ignore_presets: Optional[List[IgnorePreset]],
533
+ encoding: str,
534
+ separator_char: str,
535
+ separator_line_len: int,
536
+ show_token_count: bool,
537
+ show_tree_stats: bool,
538
+ ) -> None:
539
+ """FILTER MODE: Selects files based on explicit criteria and prepares content/tree."""
540
+ criteria = FilterCriteria.normalize_inputs(
541
+ file_types,
542
+ whitelist_substrings_in_filename,
543
+ ignore_substrings_in_filename,
544
+ ignore_dirs_in_path,
545
+ language_presets,
546
+ ignore_presets,
547
+ )
548
+ tree_content_lines: Optional[List[str]] = (
549
+ _generate_tree_lines(root_dir, criteria, tree_style, show_tree_stats)
550
+ if generate_tree
551
+ else None
552
+ )
553
+ files_to_process: List[FileToProcess] = []
554
+ for dirpath_str, dirnames, filenames in os.walk(str(root_dir), topdown=True):
555
+ current_dir_path = Path(dirpath_str)
556
+ orig_dirnames = list(dirnames)
557
+ dirnames[:] = []
558
+ for d_name in orig_dirnames:
559
+ dir_abs_path = current_dir_path / d_name
560
+ if _should_include_entry(dir_abs_path, root_dir, criteria, is_dir=True):
561
+ dirnames.append(d_name)
562
+ for filename in filenames:
563
+ file_abs_path = current_dir_path / filename
564
+ if _should_include_entry(file_abs_path, root_dir, criteria, is_dir=False):
565
+ files_to_process.append(
566
+ FileToProcess(
567
+ file_abs_path, file_abs_path.relative_to(root_dir).as_posix()
568
+ )
569
+ )
570
+ files_to_process.sort(key=lambda f_info: f_info.relative_path_posix.lower())
571
+ _collate_content_to_file(
572
+ output_file_path_str,
573
+ tree_content_lines,
574
+ files_to_process,
575
+ encoding,
576
+ separator_char,
577
+ separator_line_len,
578
+ show_token_count,
579
+ show_tree_stats,
580
+ ProjectMode.FILTER,
581
+ )
582
+
583
+
584
+ def search_and_collate_content(
585
+ root_dir: Path,
586
+ sub_string_match: List[str],
587
+ output_file: str,
588
+ tree_style: TreeStyle,
589
+ file_extensions_to_check: Optional[List[str]],
590
+ ignore_substrings_in_path: Optional[List[str]],
591
+ language_presets: Optional[List[LanguagePreset]],
592
+ ignore_presets: Optional[List[IgnorePreset]],
593
+ search_file_contents: bool,
594
+ max_workers: Optional[int],
595
+ full_path_compare: bool,
596
+ show_token_count: bool,
597
+ show_tree_stats: bool,
598
+ ) -> None:
599
+ """SEARCH MODE: Scans for files that match a substring in their path/name or content."""
600
+ criteria = FilterCriteria.normalize_inputs(
601
+ file_extensions_to_check,
602
+ None,
603
+ None,
604
+ ignore_substrings_in_path,
605
+ language_presets,
606
+ ignore_presets,
607
+ )
608
+ normalized_keywords = [
609
+ sub.lower().strip() for sub in sub_string_match if sub.strip()
610
+ ]
611
+ if not normalized_keywords:
612
+ print("Error: Search mode requires 'search_keywords' to be provided.")
613
+ return
614
+ candidate_files: List[Path] = []
615
+ for dirpath_str, dirnames, filenames in os.walk(str(root_dir), topdown=True):
616
+ current_dir_path = Path(dirpath_str)
617
+ dirnames[:] = [
618
+ d for d in dirnames if d.lower() not in criteria.ignore_path_components
619
+ ]
620
+ for filename in filenames:
621
+ file_abs_path = current_dir_path / filename
622
+ if (
623
+ file_abs_path.suffix.lower() in criteria.file_extensions
624
+ or not criteria.file_extensions
625
+ ):
626
+ candidate_files.append(file_abs_path)
627
+ matched_files: Set[Path] = set()
628
+ with ThreadPoolExecutor(max_workers=max_workers) as executor:
629
+ future_to_file = {
630
+ executor.submit(
631
+ process_file_for_search,
632
+ file,
633
+ normalized_keywords,
634
+ search_file_contents,
635
+ full_path_compare,
636
+ ): file
637
+ for file in candidate_files
638
+ }
639
+ progress_bar = tqdm(
640
+ as_completed(future_to_file),
641
+ total=len(candidate_files),
642
+ unit="file",
643
+ desc="Scanning",
644
+ )
645
+ for future in progress_bar:
646
+ result = future.result()
647
+ if result:
648
+ matched_files.add(result)
649
+ if not matched_files:
650
+ print("\nScan complete. No matching files were found.")
651
+ _collate_content_to_file(
652
+ output_file,
653
+ None,
654
+ [],
655
+ DEFAULT_ENCODING,
656
+ DEFAULT_SEPARATOR_CHAR,
657
+ DEFAULT_SEPARATOR_LINE_LENGTH,
658
+ show_token_count,
659
+ show_tree_stats,
660
+ ProjectMode.SEARCH,
661
+ )
662
+ return
663
+ sorted_matched_files = sorted(
664
+ list(matched_files), key=lambda p: p.relative_to(root_dir).as_posix().lower()
665
+ )
666
+ tree_content_lines = _generate_tree_from_paths(
667
+ root_dir, sorted_matched_files, tree_style, show_tree_stats
668
+ )
669
+ files_to_process = [
670
+ FileToProcess(f, f.relative_to(root_dir).as_posix())
671
+ for f in sorted_matched_files
672
+ ]
673
+ _collate_content_to_file(
674
+ output_file,
675
+ tree_content_lines,
676
+ files_to_process,
677
+ DEFAULT_ENCODING,
678
+ DEFAULT_SEPARATOR_CHAR,
679
+ DEFAULT_SEPARATOR_LINE_LENGTH,
680
+ show_token_count,
681
+ show_tree_stats,
682
+ ProjectMode.SEARCH,
683
+ )
684
+
685
+
686
+ # --- DECONSTRUCTION FUNCTION ---
687
+
688
+
689
+ def deconstruct_snapshot(snapshot_file_path: str) -> Dict[str, Any]:
690
+ """Scans a compiled snapshot file, extracts the directory tree lines and file paths."""
691
+ snapshot_path = Path(snapshot_file_path)
692
+ if not snapshot_path.is_file():
693
+ raise FileNotFoundError(f"Snapshot file not found: {snapshot_file_path}")
694
+ tree_lines: List[str] = []
695
+ file_paths: List[str] = []
696
+ separator_pattern = re.compile(
697
+ r"^[{}]{{4,}}[{}|]*$".format(
698
+ re.escape(DEFAULT_SEPARATOR_CHAR), re.escape(DEFAULT_SEPARATOR_CHAR)
699
+ )
700
+ )
701
+ state = "LOOKING_FOR_TREE"
702
+ with open(snapshot_path, "r", encoding=DEFAULT_ENCODING, errors="replace") as f:
703
+ for line in f:
704
+ line = line.strip()
705
+ if state == "LOOKING_FOR_TREE":
706
+ if line == TREE_HEADER_TEXT:
707
+ state = "READING_TREE"
708
+ elif state == "READING_TREE":
709
+ if not line or separator_pattern.match(line):
710
+ if tree_lines and separator_pattern.match(line):
711
+ state = "LOOKING_FOR_CONTENT"
712
+ continue
713
+ if state == "READING_TREE" and not line.startswith("Key:"):
714
+ tree_lines.append(line)
715
+ elif state == "LOOKING_FOR_CONTENT":
716
+ if line.startswith(FILE_HEADER_PREFIX):
717
+ file_paths.append(line[len(FILE_HEADER_PREFIX) :].strip())
718
+ state = "READING_CONTENT"
719
+ elif state == "READING_CONTENT":
720
+ if line.startswith(FILE_HEADER_PREFIX):
721
+ file_paths.append(line[len(FILE_HEADER_PREFIX) :].strip())
722
+ # Post-process to remove the key lines if they were accidentally captured
723
+ tree_lines = [
724
+ line
725
+ for line in tree_lines
726
+ if not line.strip().startswith("Key:")
727
+ and not line.strip().startswith("(f=files")
728
+ ]
729
+ return {"tree_lines": tree_lines, "file_paths": file_paths}
730
+
731
+
732
+ # --- UNIFIED ENTRY POINT AND UTILITY WRAPPERS ---
733
+
734
+
735
+ def process_project(
736
+ root_dir_param: Optional[str] = None,
737
+ output_file_name: str = "project_output.txt",
738
+ mode: ProjectMode = ProjectMode.FILTER,
739
+ file_types: Optional[List[str]] = None,
740
+ ignore_dirs_in_path: Optional[List[str]] = None,
741
+ language_presets: Optional[List[LanguagePreset]] = None,
742
+ ignore_presets: Optional[List[IgnorePreset]] = None,
743
+ whitelist_filename_substrings: Optional[List[str]] = None,
744
+ ignore_filename_substrings: Optional[List[str]] = None,
745
+ generate_tree: bool = True,
746
+ search_keywords: Optional[List[str]] = None,
747
+ search_file_contents: bool = False,
748
+ full_path_compare: bool = True,
749
+ max_workers: Optional[int] = None,
750
+ tree_style_preset: TreeStylePreset = TreeStylePreset.UNICODE,
751
+ tree_style_t_connector: Optional[str] = None,
752
+ tree_style_l_connector: Optional[str] = None,
753
+ tree_style_v_connector: Optional[str] = None,
754
+ tree_style_h_spacer: Optional[str] = None,
755
+ show_token_count: bool = False,
756
+ show_tree_stats: bool = False,
757
+ encoding: str = DEFAULT_ENCODING,
758
+ separator_char: str = DEFAULT_SEPARATOR_CHAR,
759
+ separator_line_len: int = DEFAULT_SEPARATOR_LINE_LENGTH,
760
+ ) -> None:
761
+ """Main function to process a project directory in either FILTER or SEARCH mode."""
762
+ actual_root_dir = validate_root_directory(root_dir_param)
763
+ if actual_root_dir is None:
764
+ sys.exit(1)
765
+ style = tree_style_preset.to_style()
766
+ final_style = TreeStyle(
767
+ t_connector=tree_style_t_connector or style.t_connector,
768
+ l_connector=tree_style_l_connector or style.l_connector,
769
+ v_connector=tree_style_v_connector or style.v_connector,
770
+ h_spacer=tree_style_h_spacer or style.h_spacer,
771
+ )
772
+ print(f"--- Starting Project Processing in {mode.name} Mode ---")
773
+ if mode == ProjectMode.FILTER:
774
+ filter_and_append_content(
775
+ actual_root_dir,
776
+ output_file_name,
777
+ final_style,
778
+ generate_tree,
779
+ file_types,
780
+ whitelist_filename_substrings,
781
+ ignore_filename_substrings,
782
+ ignore_dirs_in_path,
783
+ language_presets,
784
+ ignore_presets,
785
+ encoding,
786
+ separator_char,
787
+ separator_line_len,
788
+ show_token_count,
789
+ show_tree_stats,
790
+ )
791
+ elif mode == ProjectMode.SEARCH:
792
+ if not search_keywords:
793
+ print("Error: Search mode requires 'search_keywords' to be provided.")
794
+ return
795
+ search_and_collate_content(
796
+ actual_root_dir,
797
+ search_keywords,
798
+ output_file_name,
799
+ final_style,
800
+ file_types,
801
+ ignore_dirs_in_path,
802
+ language_presets,
803
+ ignore_presets,
804
+ search_file_contents,
805
+ max_workers,
806
+ full_path_compare,
807
+ show_token_count,
808
+ show_tree_stats,
809
+ )
810
+ print("--- Script Execution Finished ---")
811
+
812
+
813
+ def filter_project(
814
+ root_dir_param: Optional[str] = None,
815
+ output_file_name: str = "project_filter_output.txt",
816
+ file_types: Optional[List[str]] = None,
817
+ ignore_dirs_in_path: Optional[List[str]] = None,
818
+ language_presets: Optional[List[LanguagePreset]] = None,
819
+ ignore_presets: Optional[List[IgnorePreset]] = None,
820
+ whitelist_filename_substrings: Optional[List[str]] = None,
821
+ ignore_filename_substrings: Optional[List[str]] = None,
822
+ generate_tree: bool = True,
823
+ tree_style_preset: TreeStylePreset = TreeStylePreset.UNICODE,
824
+ tree_style_t_connector: Optional[str] = None,
825
+ tree_style_l_connector: Optional[str] = None,
826
+ tree_style_v_connector: Optional[str] = None,
827
+ tree_style_h_spacer: Optional[str] = None,
828
+ show_token_count: bool = False,
829
+ show_tree_stats: bool = False,
830
+ encoding: str = DEFAULT_ENCODING,
831
+ separator_char: str = DEFAULT_SEPARATOR_CHAR,
832
+ separator_line_len: int = DEFAULT_SEPARATOR_LINE_LENGTH,
833
+ ) -> None:
834
+ """Utility wrapper for process_project in FILTER mode."""
835
+ process_project(
836
+ root_dir_param=root_dir_param,
837
+ output_file_name=output_file_name,
838
+ mode=ProjectMode.FILTER,
839
+ file_types=file_types,
840
+ ignore_dirs_in_path=ignore_dirs_in_path,
841
+ language_presets=language_presets,
842
+ ignore_presets=ignore_presets,
843
+ whitelist_filename_substrings=whitelist_filename_substrings,
844
+ ignore_filename_substrings=ignore_filename_substrings,
845
+ generate_tree=generate_tree,
846
+ tree_style_preset=tree_style_preset,
847
+ tree_style_t_connector=tree_style_t_connector,
848
+ tree_style_l_connector=tree_style_l_connector,
849
+ tree_style_v_connector=tree_style_v_connector,
850
+ tree_style_h_spacer=tree_style_h_spacer,
851
+ show_token_count=show_token_count,
852
+ show_tree_stats=show_tree_stats,
853
+ encoding=encoding,
854
+ separator_char=separator_char,
855
+ separator_line_len=separator_line_len,
856
+ )
857
+
858
+
859
+ def find_in_project(
860
+ root_dir_param: Optional[str] = None,
861
+ output_file_name: str = "project_search_output.txt",
862
+ search_keywords: Optional[List[str]] = None,
863
+ file_extensions_to_check: Optional[List[str]] = None,
864
+ ignore_dirs_in_path: Optional[List[str]] = None,
865
+ language_presets: Optional[List[LanguagePreset]] = None,
866
+ ignore_presets: Optional[List[IgnorePreset]] = None,
867
+ search_file_contents: bool = False,
868
+ full_path_compare: bool = True,
869
+ max_workers: Optional[int] = None,
870
+ tree_style_preset: TreeStylePreset = TreeStylePreset.UNICODE,
871
+ tree_style_t_connector: Optional[str] = None,
872
+ tree_style_l_connector: Optional[str] = None,
873
+ tree_style_v_connector: Optional[str] = None,
874
+ tree_style_h_spacer: Optional[str] = None,
875
+ show_token_count: bool = False,
876
+ show_tree_stats: bool = False,
877
+ encoding: str = DEFAULT_ENCODING,
878
+ separator_char: str = DEFAULT_SEPARATOR_CHAR,
879
+ separator_line_len: int = DEFAULT_SEPARATOR_LINE_LENGTH,
880
+ ) -> None:
881
+ """Utility wrapper for process_project in SEARCH mode."""
882
+ if not search_keywords:
883
+ print("Error: 'search_keywords' must be provided for find_in_project.")
884
+ return
885
+ process_project(
886
+ root_dir_param=root_dir_param,
887
+ output_file_name=output_file_name,
888
+ mode=ProjectMode.SEARCH,
889
+ file_types=file_extensions_to_check,
890
+ ignore_dirs_in_path=ignore_dirs_in_path,
891
+ language_presets=language_presets,
892
+ ignore_presets=ignore_presets,
893
+ search_keywords=search_keywords,
894
+ search_file_contents=search_file_contents,
895
+ full_path_compare=full_path_compare,
896
+ max_workers=max_workers,
897
+ tree_style_preset=tree_style_preset,
898
+ tree_style_t_connector=tree_style_t_connector,
899
+ tree_style_l_connector=tree_style_l_connector,
900
+ tree_style_v_connector=tree_style_v_connector,
901
+ tree_style_h_spacer=tree_style_h_spacer,
902
+ show_token_count=show_token_count,
903
+ show_tree_stats=show_tree_stats,
904
+ encoding=encoding,
905
+ separator_char=separator_char,
906
+ separator_line_len=separator_line_len,
907
+ )
908
+
909
+
910
+ __all__ = [
911
+ "process_project",
912
+ "filter_project",
913
+ "find_in_project",
914
+ "deconstruct_snapshot",
915
+ "ProjectMode",
916
+ "LanguagePreset",
917
+ "IgnorePreset",
918
+ "TreeStylePreset",
919
+ ]
920
+
921
+ if __name__ == "__main__":
922
+ # --- Example: Scan with Custom Filters and the New Readable Stats ---
923
+ print("\n--- Running a custom filter scan with new stats format ---")
924
+ filter_project(
925
+ root_dir_param=".",
926
+ output_file_name="custom_snapshot_readable.txt",
927
+ file_types=[".py", "requirements.txt", ".sql", ".md"],
928
+ ignore_dirs_in_path=["venv", "build", "node_modules", "static", "templates"],
929
+ show_tree_stats=True,
930
+ show_token_count=True,
931
+ )
dirshot/examples.py ADDED
@@ -0,0 +1,65 @@
1
+ from src.dirshot.dirshot import *
2
+
3
+ if __name__ == "__main__":
4
+ # To run a specific example, make sure it is NOT commented out,
5
+ # and the other examples ARE commented out.
6
+
7
+ # --- Example 1: Search with NO Presets (Custom Filters) ---
8
+ # Goal: Find the words "API" or "Controller" inside any .java or .js file,
9
+ # while manually ignoring common dependency/build folders.
10
+ # print("\n--- Example 1: Running a custom search with NO presets ---")
11
+ # find_in_project(
12
+ # root_dir_param="example_project",
13
+ # output_file_name="search_custom_results.txt",
14
+ # search_keywords=["API", "Controller"],
15
+
16
+ # # --- NO language_presets ---
17
+ # # Manually define which file types to scan
18
+ # file_extensions_to_check=[".java", ".js"],
19
+
20
+ # # --- NO ignore_presets ---
21
+ # # Manually define which directories to skip
22
+ # ignore_dirs_in_path=["node_modules", "build", "venv"],
23
+
24
+ # search_file_contents=True,
25
+ # show_tree_stats=True,
26
+ # show_token_count=True,
27
+ # )
28
+
29
+
30
+ # # --- Example 2: Search with Python Presets ---
31
+ # # Goal: Find the word "Flask" inside any Python-related file.
32
+ # # The presets will automatically handle file types and ignore folders like 'venv'.
33
+ # print("\n--- Example 2: Running a search with Python presets ---")
34
+ # find_in_project(
35
+ # root_dir_param=".",
36
+ # output_file_name="search_python_preset_results.txt",
37
+ # search_keywords=[""],
38
+
39
+ # # Use presets for convenience
40
+ # language_presets=[LanguagePreset.PYTHON],
41
+ # ignore_presets=[IgnorePreset.PYTHON_ENV],
42
+
43
+ # search_file_contents=True,
44
+ # show_tree_stats=True,
45
+ # show_token_count=True,
46
+ # )
47
+
48
+
49
+ filter_project(
50
+ root_dir_param=".",
51
+ output_file_name="snapshot.txt",
52
+ file_types=[".py"],
53
+ # Use presets to define the scope of the snapshot
54
+ # language_presets=[LanguagePreset.PYTHON],
55
+
56
+ ignore_dirs_in_path=[".git"],
57
+ ignore_presets=[
58
+ IgnorePreset.PYTHON_ENV,
59
+ IgnorePreset.NODE_MODULES,
60
+ IgnorePreset.BUILD_ARTIFACTS,
61
+ ],
62
+
63
+ show_tree_stats=False,
64
+ show_token_count=True,
65
+ )
@@ -0,0 +1,110 @@
1
+ Metadata-Version: 2.4
2
+ Name: dirshot
3
+ Version: 0.1.0
4
+ Summary: A flexible utility for creating project snapshots and searching for files.
5
+ Author-email: Your Name <youremail@example.com>
6
+ Project-URL: Homepage, https://github.com/yourusername/dirshot
7
+ Project-URL: Bug Tracker, https://github.com/yourusername/dirshot/issues
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Operating System :: OS Independent
11
+ Requires-Python: >=3.7
12
+ Description-Content-Type: text/markdown
13
+ Requires-Dist: tqdm
14
+
15
+ # Dirshot: A Flexible Project Snapshot and Search Tool
16
+
17
+ Dirshot is a Python utility that creates snapshots of a project's directory structure and file contents. It can operate in two modes: filtering files based on their type and path, or searching for files based on keywords in their name or content.
18
+
19
+ The script generates a single output file containing a directory tree and the concatenated text of the selected files. This is useful for quickly gathering project context for code analysis, sharing with collaborators, or providing to a Large Language Model (LLM).
20
+
21
+ ## Key Features
22
+
23
+ * **Two Operating Modes**:
24
+ * **Filter Mode**: Create a snapshot of your project by filtering files based on extensions, filenames, and directory paths.
25
+ * **Search Mode**: Search for files containing specific keywords in their name, path, or content.
26
+ * **Customizable Filtering**:
27
+ * Use language presets for popular languages (Python, JavaScript, Java, etc.).
28
+ * Use ignore presets to exclude common files and directories (e.g., `.git`, `node_modules`, `__pycache__`).
29
+ * Define custom file types, and whitelist/blacklist substrings in filenames and paths.
30
+ * **Flexible Tree Generation**:
31
+ * Display a directory tree in various styles (Unicode, ASCII, Compact).
32
+ * Show statistics for included/matched files and directories in the tree.
33
+ * **Content Collation**:
34
+ * Concatenates the content of all selected files into a single output file.
35
+ * Optionally display an approximated token/character count.
36
+ * **Snapshot Deconstruction**:
37
+ * A utility function to parse a generated snapshot file and extract the directory tree and file paths.
38
+
39
+ ## Installation
40
+
41
+ You can install Dirshot from PyPI:
42
+
43
+ ```bash
44
+ pip install dirshot
45
+ ```
46
+
47
+
48
+
49
+ ## Usage
50
+
51
+ Here are some examples of how to use Dirshot in your own Python scripts.
52
+
53
+ #### Example 1: Creating a Snapshot with Presets (Filter Mode)
54
+
55
+ This example creates a snapshot of a Python project, ignoring common virtual environment and build directories.
56
+
57
+ ```python
58
+ from dirshot import filter_project, LanguagePreset, IgnorePreset
59
+
60
+ filter_project(
61
+ root_dir_param=".",
62
+ output_file_name="project_snapshot.txt",
63
+ language_presets=[LanguagePreset.PYTHON],
64
+ ignore_presets=[
65
+ IgnorePreset.PYTHON_ENV,
66
+ IgnorePreset.NODE_MODULES,
67
+ IgnorePreset.BUILD_ARTIFACTS,
68
+ ],
69
+ show_token_count=True,
70
+ )
71
+ ```
72
+
73
+ #### Example 2: Searching for Keywords in a Project (Search Mode)
74
+
75
+ This example searches for the keywords "API" or "Controller" within `.java` and `.js` files.
76
+
77
+ ```python
78
+ from dirshot import find_in_project
79
+
80
+ find_in_project(
81
+ root_dir_param="example_project",
82
+ output_file_name="search_results.txt",
83
+ search_keywords=["API", "Controller"],
84
+ file_extensions_to_check=[".java", ".js"],
85
+ ignore_dirs_in_path=["node_modules", "build"],
86
+ search_file_contents=True,
87
+ show_tree_stats=True,
88
+ )
89
+ ```
90
+
91
+ ### Deconstructing a Snapshot
92
+
93
+ You can also parse a previously generated snapshot file to extract the directory structure and the list of included files.
94
+
95
+ ```python
96
+ from dirshot import deconstruct_snapshot
97
+
98
+ snapshot_data = deconstruct_snapshot("project_snapshot.txt")
99
+ print("Directory Tree:")
100
+ for line in snapshot_data["tree_lines"]:
101
+ print(line)
102
+
103
+ print("\nIncluded Files:")
104
+ for file_path in snapshot_data["file_paths"]:
105
+ print(file_path)
106
+ ```
107
+
108
+ ## Contributing
109
+
110
+ Contributions are welcome! Please feel free to submit a pull request or open an issue on the project's GitHub repository.
@@ -0,0 +1,7 @@
1
+ dirshot/__init__.py,sha256=ss4HC5VTyD9j6GFGCLMU6VxPlXy0qaGFzXlZB3_d2WM,403
2
+ dirshot/dirshot.py,sha256=ItCwC4BsSbPzBLlHddiFlYsqdB3Hh3PEpwN89EuplIc,34693
3
+ dirshot/examples.py,sha256=q--iNqxmA4xX8nyXYdOP-HPsqzpLHBFo1PTseQ9ki7M,2344
4
+ dirshot-0.1.0.dist-info/METADATA,sha256=IxRaINwm34SoIrMoBRO0y_oMNhg0KP21ww4wkYqKA5M,4168
5
+ dirshot-0.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
6
+ dirshot-0.1.0.dist-info/top_level.txt,sha256=ROGW8gTcmwJ2jJ1Fp7TV1REZLRUGbL3L-Lfoy8tPxOA,8
7
+ dirshot-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.9.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ dirshot