dirshot 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dirshot/__init__.py +21 -0
- dirshot/dirshot.py +931 -0
- dirshot/examples.py +65 -0
- dirshot-0.1.0.dist-info/METADATA +110 -0
- dirshot-0.1.0.dist-info/RECORD +7 -0
- dirshot-0.1.0.dist-info/WHEEL +5 -0
- dirshot-0.1.0.dist-info/top_level.txt +1 -0
dirshot/__init__.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
from .dirshot import (
|
|
2
|
+
process_project,
|
|
3
|
+
filter_project,
|
|
4
|
+
find_in_project,
|
|
5
|
+
deconstruct_snapshot,
|
|
6
|
+
ProjectMode,
|
|
7
|
+
LanguagePreset,
|
|
8
|
+
IgnorePreset,
|
|
9
|
+
TreeStylePreset,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
"process_project",
|
|
14
|
+
"filter_project",
|
|
15
|
+
"find_in_project",
|
|
16
|
+
"deconstruct_snapshot",
|
|
17
|
+
"ProjectMode",
|
|
18
|
+
"LanguagePreset",
|
|
19
|
+
"IgnorePreset",
|
|
20
|
+
"TreeStylePreset",
|
|
21
|
+
]
|
dirshot/dirshot.py
ADDED
|
@@ -0,0 +1,931 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import sys
|
|
3
|
+
import re
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from typing import List, Optional, Set, Tuple, Callable, NamedTuple, Dict, Any
|
|
7
|
+
from enum import Enum
|
|
8
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
9
|
+
from io import StringIO
|
|
10
|
+
|
|
11
|
+
# --- TQDM Dependency Handler ---
|
|
12
|
+
try:
|
|
13
|
+
from tqdm import tqdm
|
|
14
|
+
except ImportError:
|
|
15
|
+
|
|
16
|
+
class tqdm:
|
|
17
|
+
def __init__(self, iterable=None, **kwargs):
|
|
18
|
+
self.iterable = iterable
|
|
19
|
+
|
|
20
|
+
def __iter__(self):
|
|
21
|
+
return iter(self.iterable)
|
|
22
|
+
|
|
23
|
+
def update(self, n=1):
|
|
24
|
+
pass
|
|
25
|
+
|
|
26
|
+
def set_description(self, desc):
|
|
27
|
+
pass
|
|
28
|
+
|
|
29
|
+
def close(self):
|
|
30
|
+
pass
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
# --- Configuration Constants ---
|
|
34
|
+
DEFAULT_SEPARATOR_CHAR = "-"
|
|
35
|
+
DEFAULT_SEPARATOR_LINE_LENGTH = 80
|
|
36
|
+
DEFAULT_ENCODING = "utf-8"
|
|
37
|
+
TREE_HEADER_TEXT = "Project File Structure"
|
|
38
|
+
FILE_HEADER_PREFIX = "FILE: "
|
|
39
|
+
TOKEN_APPROX_MODE = "CHAR_COUNT"
|
|
40
|
+
|
|
41
|
+
# --- Public Enums for Import and Usage ---
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class ProjectMode(Enum):
|
|
45
|
+
"""The mode of operation for the script."""
|
|
46
|
+
|
|
47
|
+
FILTER = "filter"
|
|
48
|
+
SEARCH = "search"
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class LanguagePreset(Enum):
|
|
52
|
+
"""Predefined sets of file extensions/names for common languages/frameworks."""
|
|
53
|
+
|
|
54
|
+
PYTHON = [
|
|
55
|
+
".py",
|
|
56
|
+
".pyw",
|
|
57
|
+
"setup.py",
|
|
58
|
+
"requirements.txt",
|
|
59
|
+
"Pipfile",
|
|
60
|
+
"pyproject.toml",
|
|
61
|
+
]
|
|
62
|
+
JAVASCRIPT = [".js", ".jsx", ".ts", ".tsx", ".mjs", ".cjs"]
|
|
63
|
+
WEB = [".html", ".css", ".scss", ".less"]
|
|
64
|
+
JAVA = [".java", ".groovy", ".kt", ".gradle", ".properties"]
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class IgnorePreset(Enum):
|
|
68
|
+
"""Predefined sets of path components and filename substrings to ignore."""
|
|
69
|
+
|
|
70
|
+
VERSION_CONTROL = [".git", ".svn", ".hg", ".idea"]
|
|
71
|
+
NODE_MODULES = ["node_modules", "package-lock.json", "yarn.lock"]
|
|
72
|
+
PYTHON_ENV = ["__pycache__", "venv", ".venv", "env", "lib", "bin"]
|
|
73
|
+
BUILD_ARTIFACTS = ["dist", "build", "target", "out", "temp", "tmp"]
|
|
74
|
+
TEST_FILES = ["test", "spec", "fixture", "example", "mock"]
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class TreeStylePreset(Enum):
|
|
78
|
+
"""Predefined character sets for directory tree rendering."""
|
|
79
|
+
|
|
80
|
+
UNICODE = ("├── ", "└── ", "│ ", " ")
|
|
81
|
+
ASCII = ("|-- ", "+-- ", "| ", " ")
|
|
82
|
+
COMPACT = ("|---", "`---", "| ", " ")
|
|
83
|
+
|
|
84
|
+
def to_style(self) -> "TreeStyle":
|
|
85
|
+
return TreeStyle(self.value[0], self.value[1], self.value[2], self.value[3])
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
class TreeStyle(NamedTuple):
|
|
89
|
+
"""Holds the characters used to render the directory tree."""
|
|
90
|
+
|
|
91
|
+
t_connector: str
|
|
92
|
+
l_connector: str
|
|
93
|
+
v_connector: str
|
|
94
|
+
h_spacer: str
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
# --- Helper Data Structures ---
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
@dataclass
|
|
101
|
+
class FilterCriteria:
|
|
102
|
+
"""Holds normalized filter criteria for files and directories."""
|
|
103
|
+
|
|
104
|
+
file_extensions: Set[str] = field(default_factory=set)
|
|
105
|
+
exact_filenames: Set[str] = field(default_factory=set)
|
|
106
|
+
whitelist_fname_substrings: Set[str] = field(default_factory=set)
|
|
107
|
+
ignore_fname_substrings: Set[str] = field(default_factory=set)
|
|
108
|
+
ignore_path_components: Set[str] = field(default_factory=set)
|
|
109
|
+
|
|
110
|
+
@classmethod
|
|
111
|
+
def normalize_inputs(
|
|
112
|
+
cls,
|
|
113
|
+
file_types: Optional[List[str]],
|
|
114
|
+
whitelist_substrings: Optional[List[str]],
|
|
115
|
+
ignore_filename_substrings: Optional[List[str]],
|
|
116
|
+
ignore_path_components_list: Optional[List[str]],
|
|
117
|
+
language_presets: Optional[List[LanguagePreset]] = None,
|
|
118
|
+
ignore_presets: Optional[List[IgnorePreset]] = None,
|
|
119
|
+
) -> "FilterCriteria":
|
|
120
|
+
all_file_types, all_ignore_paths, all_ignore_fnames = (
|
|
121
|
+
set(file_types or []),
|
|
122
|
+
set(ignore_path_components_list or []),
|
|
123
|
+
set(ignore_filename_substrings or []),
|
|
124
|
+
)
|
|
125
|
+
if language_presets:
|
|
126
|
+
for preset in language_presets:
|
|
127
|
+
all_file_types.update(preset.value)
|
|
128
|
+
if ignore_presets:
|
|
129
|
+
for preset in ignore_presets:
|
|
130
|
+
all_ignore_paths.update(preset.value)
|
|
131
|
+
all_ignore_fnames.update(preset.value)
|
|
132
|
+
norm_exts, norm_exact_fnames = set(), set()
|
|
133
|
+
for ft in all_file_types:
|
|
134
|
+
ft_lower = ft.lower().strip()
|
|
135
|
+
if ft_lower.startswith("."):
|
|
136
|
+
norm_exts.add(ft_lower)
|
|
137
|
+
elif ft_lower:
|
|
138
|
+
norm_exact_fnames.add(ft_lower)
|
|
139
|
+
return cls(
|
|
140
|
+
file_extensions=norm_exts,
|
|
141
|
+
exact_filenames=norm_exact_fnames,
|
|
142
|
+
whitelist_fname_substrings=(
|
|
143
|
+
set(s.lower() for s in whitelist_substrings if s.strip())
|
|
144
|
+
if whitelist_substrings
|
|
145
|
+
else set()
|
|
146
|
+
),
|
|
147
|
+
ignore_fname_substrings=set(
|
|
148
|
+
s.lower() for s in all_ignore_fnames if s.strip()
|
|
149
|
+
),
|
|
150
|
+
ignore_path_components=set(
|
|
151
|
+
d.lower() for d in all_ignore_paths if d.strip()
|
|
152
|
+
),
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
class FileToProcess(NamedTuple):
|
|
157
|
+
"""Represents a file selected for content processing."""
|
|
158
|
+
|
|
159
|
+
absolute_path: Path
|
|
160
|
+
relative_path_posix: str
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
# --- Helper Functions ---
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def validate_root_directory(root_dir_param: Optional[str]) -> Optional[Path]:
|
|
167
|
+
original_param_for_messaging = (
|
|
168
|
+
root_dir_param if root_dir_param else "current working directory"
|
|
169
|
+
)
|
|
170
|
+
try:
|
|
171
|
+
resolved_path = Path(root_dir_param or Path.cwd()).resolve(strict=True)
|
|
172
|
+
except Exception as e:
|
|
173
|
+
print(
|
|
174
|
+
f"Error: Could not resolve root directory '{original_param_for_messaging}': {e}"
|
|
175
|
+
)
|
|
176
|
+
return None
|
|
177
|
+
if not resolved_path.is_dir():
|
|
178
|
+
print(f"Error: Root path '{resolved_path}' is not a directory.")
|
|
179
|
+
return None
|
|
180
|
+
return resolved_path
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def _should_include_entry(
|
|
184
|
+
entry_path: Path,
|
|
185
|
+
root_dir: Path,
|
|
186
|
+
criteria: FilterCriteria,
|
|
187
|
+
is_dir: bool,
|
|
188
|
+
log_func: Optional[Callable[[str], None]] = None,
|
|
189
|
+
) -> bool:
|
|
190
|
+
try:
|
|
191
|
+
relative_path = entry_path.relative_to(root_dir)
|
|
192
|
+
except ValueError:
|
|
193
|
+
return False
|
|
194
|
+
entry_name_lower = entry_path.name.lower()
|
|
195
|
+
if criteria.ignore_path_components and any(
|
|
196
|
+
part.lower() in criteria.ignore_path_components for part in relative_path.parts
|
|
197
|
+
):
|
|
198
|
+
return False
|
|
199
|
+
if is_dir:
|
|
200
|
+
return True
|
|
201
|
+
file_ext_lower = entry_path.suffix.lower()
|
|
202
|
+
matched_type = (file_ext_lower in criteria.file_extensions) or (
|
|
203
|
+
entry_name_lower in criteria.exact_filenames
|
|
204
|
+
)
|
|
205
|
+
if not criteria.file_extensions and not criteria.exact_filenames:
|
|
206
|
+
matched_type = True
|
|
207
|
+
if not matched_type:
|
|
208
|
+
return False
|
|
209
|
+
if criteria.whitelist_fname_substrings and not any(
|
|
210
|
+
sub in entry_name_lower for sub in criteria.whitelist_fname_substrings
|
|
211
|
+
):
|
|
212
|
+
return False
|
|
213
|
+
if criteria.ignore_fname_substrings and any(
|
|
214
|
+
sub in entry_name_lower for sub in criteria.ignore_fname_substrings
|
|
215
|
+
):
|
|
216
|
+
return False
|
|
217
|
+
return True
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def process_file_for_search(
|
|
221
|
+
file_path: Path,
|
|
222
|
+
normalized_keywords: List[str],
|
|
223
|
+
search_file_contents: bool,
|
|
224
|
+
full_path_compare: bool,
|
|
225
|
+
) -> Optional[Path]:
|
|
226
|
+
compare_target = str(file_path) if full_path_compare else file_path.name
|
|
227
|
+
if any(key in compare_target.lower() for key in normalized_keywords):
|
|
228
|
+
return file_path
|
|
229
|
+
if search_file_contents:
|
|
230
|
+
try:
|
|
231
|
+
with open(str(file_path), "r", encoding="utf-8", errors="ignore") as f:
|
|
232
|
+
for line in f:
|
|
233
|
+
if any(key in line.lower() for key in normalized_keywords):
|
|
234
|
+
return file_path
|
|
235
|
+
except (IOError, OSError):
|
|
236
|
+
pass
|
|
237
|
+
return None
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
def _calculate_total_stats(
|
|
241
|
+
root_dir: Path, criteria: FilterCriteria
|
|
242
|
+
) -> Dict[Path, Tuple[int, int]]:
|
|
243
|
+
stats: Dict[Path, Tuple[int, int]] = {}
|
|
244
|
+
for dirpath_str, dirnames, filenames in os.walk(str(root_dir), topdown=True):
|
|
245
|
+
current_dir = Path(dirpath_str)
|
|
246
|
+
all_children = [current_dir / d for d in dirnames] + [
|
|
247
|
+
current_dir / f for f in filenames
|
|
248
|
+
]
|
|
249
|
+
total_files, total_dirs = 0, 0
|
|
250
|
+
for child_path in all_children:
|
|
251
|
+
try:
|
|
252
|
+
is_dir = child_path.is_dir()
|
|
253
|
+
except OSError:
|
|
254
|
+
continue
|
|
255
|
+
if criteria.ignore_path_components:
|
|
256
|
+
try:
|
|
257
|
+
relative_path = child_path.relative_to(root_dir)
|
|
258
|
+
except ValueError:
|
|
259
|
+
continue
|
|
260
|
+
if any(
|
|
261
|
+
part.lower() in criteria.ignore_path_components
|
|
262
|
+
for part in relative_path.parts
|
|
263
|
+
):
|
|
264
|
+
continue
|
|
265
|
+
if is_dir:
|
|
266
|
+
total_dirs += 1
|
|
267
|
+
else:
|
|
268
|
+
total_files += 1
|
|
269
|
+
stats[current_dir] = (total_files, total_dirs)
|
|
270
|
+
dirnames[:] = [
|
|
271
|
+
d
|
|
272
|
+
for d in dirnames
|
|
273
|
+
if (current_dir / d).name.lower() not in criteria.ignore_path_components
|
|
274
|
+
]
|
|
275
|
+
return stats
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
# --- Tree Generation Functions ---
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
def _generate_tree_lines(
|
|
282
|
+
root_dir: Path, criteria: FilterCriteria, style: TreeStyle, show_stats: bool
|
|
283
|
+
) -> List[str]:
|
|
284
|
+
"""Generates a list of strings representing the directory tree based on criteria, style, and stats."""
|
|
285
|
+
dir_stats: Optional[Dict[Path, Tuple[int, int]]] = (
|
|
286
|
+
_calculate_total_stats(root_dir, criteria) if show_stats else None
|
|
287
|
+
)
|
|
288
|
+
tree_lines: List[str] = []
|
|
289
|
+
|
|
290
|
+
def format_dir_name(
|
|
291
|
+
path: Path, path_name: str, included_files: int, included_dirs: int
|
|
292
|
+
) -> str:
|
|
293
|
+
if not show_stats or not dir_stats:
|
|
294
|
+
return path_name
|
|
295
|
+
total_files, total_dirs = dir_stats.get(path, (0, 0))
|
|
296
|
+
|
|
297
|
+
stats_str = f" [I: {included_files}f, {included_dirs}d | T: {total_files}f, {total_dirs}d]"
|
|
298
|
+
return path_name + stats_str
|
|
299
|
+
|
|
300
|
+
def _recursive_build(current_path: Path, prefix_parts: List[str]):
|
|
301
|
+
try:
|
|
302
|
+
entries = sorted(current_path.iterdir(), key=lambda p: p.name.lower())
|
|
303
|
+
except OSError as e:
|
|
304
|
+
error_prefix = "".join(prefix_parts) + style.l_connector
|
|
305
|
+
tree_lines.append(
|
|
306
|
+
error_prefix + f"[Error accessing: {current_path.name} - {e.strerror}]"
|
|
307
|
+
)
|
|
308
|
+
return
|
|
309
|
+
displayable_children: List[Tuple[Path, bool]] = []
|
|
310
|
+
for e in entries:
|
|
311
|
+
try:
|
|
312
|
+
is_dir = e.is_dir()
|
|
313
|
+
except OSError:
|
|
314
|
+
continue
|
|
315
|
+
if _should_include_entry(
|
|
316
|
+
e, root_dir, criteria, is_dir=is_dir, log_func=None
|
|
317
|
+
):
|
|
318
|
+
displayable_children.append((e, is_dir))
|
|
319
|
+
num_children = len(displayable_children)
|
|
320
|
+
included_files_in_level = sum(
|
|
321
|
+
1 for _, is_dir in displayable_children if not is_dir
|
|
322
|
+
)
|
|
323
|
+
included_dirs_in_level = sum(1 for _, is_dir in displayable_children if is_dir)
|
|
324
|
+
if not prefix_parts:
|
|
325
|
+
tree_lines.append(
|
|
326
|
+
format_dir_name(
|
|
327
|
+
current_path,
|
|
328
|
+
current_path.name,
|
|
329
|
+
included_files_in_level,
|
|
330
|
+
included_dirs_in_level,
|
|
331
|
+
)
|
|
332
|
+
)
|
|
333
|
+
for i, (child_path, child_is_dir) in enumerate(displayable_children):
|
|
334
|
+
is_last = i == num_children - 1
|
|
335
|
+
connector = style.l_connector if is_last else style.t_connector
|
|
336
|
+
entry_name = child_path.name
|
|
337
|
+
if child_is_dir:
|
|
338
|
+
try:
|
|
339
|
+
child_entries = sorted(
|
|
340
|
+
child_path.iterdir(), key=lambda p: p.name.lower()
|
|
341
|
+
)
|
|
342
|
+
child_displayable_children = [
|
|
343
|
+
(e, e.is_dir())
|
|
344
|
+
for e in child_entries
|
|
345
|
+
if _should_include_entry(
|
|
346
|
+
e, root_dir, criteria, is_dir=e.is_dir(), log_func=None
|
|
347
|
+
)
|
|
348
|
+
]
|
|
349
|
+
child_included_files = sum(
|
|
350
|
+
1 for _, is_dir in child_displayable_children if not is_dir
|
|
351
|
+
)
|
|
352
|
+
child_included_dirs = sum(
|
|
353
|
+
1 for _, is_dir in child_displayable_children if is_dir
|
|
354
|
+
)
|
|
355
|
+
entry_name = format_dir_name(
|
|
356
|
+
child_path,
|
|
357
|
+
child_path.name,
|
|
358
|
+
child_included_files,
|
|
359
|
+
child_included_dirs,
|
|
360
|
+
)
|
|
361
|
+
except OSError:
|
|
362
|
+
pass
|
|
363
|
+
tree_lines.append("".join(prefix_parts) + connector + entry_name)
|
|
364
|
+
if child_is_dir:
|
|
365
|
+
new_prefix_parts = prefix_parts + [
|
|
366
|
+
style.h_spacer if is_last else style.v_connector
|
|
367
|
+
]
|
|
368
|
+
_recursive_build(child_path, new_prefix_parts)
|
|
369
|
+
|
|
370
|
+
_recursive_build(root_dir, [])
|
|
371
|
+
return tree_lines
|
|
372
|
+
|
|
373
|
+
|
|
374
|
+
def _generate_tree_from_paths(
|
|
375
|
+
root_dir: Path, file_paths: List[Path], style: TreeStyle, show_stats: bool
|
|
376
|
+
) -> List[str]:
|
|
377
|
+
"""Generates a directory tree structure from a list of *matched* file paths using the given style."""
|
|
378
|
+
tree_dict: Dict[str, Any] = {}
|
|
379
|
+
matched_paths = {p.relative_to(root_dir) for p in file_paths}
|
|
380
|
+
for rel_path in matched_paths:
|
|
381
|
+
parts = rel_path.parts
|
|
382
|
+
current_level = tree_dict
|
|
383
|
+
for part in parts:
|
|
384
|
+
current_level = current_level.setdefault(part, {})
|
|
385
|
+
tree_lines: List[str] = []
|
|
386
|
+
|
|
387
|
+
def format_dir_name_search(name: str, matched_files: int, matched_dirs: int) -> str:
|
|
388
|
+
if not show_stats:
|
|
389
|
+
return name
|
|
390
|
+
|
|
391
|
+
stats_str = f" [M: {matched_files}f, {matched_dirs}d]"
|
|
392
|
+
return name + stats_str
|
|
393
|
+
|
|
394
|
+
def build_lines(d: Dict[str, Any], prefix: str):
|
|
395
|
+
items = sorted(d.keys(), key=lambda k: (len(d[k]) == 0, k.lower()))
|
|
396
|
+
num_children = len(items)
|
|
397
|
+
matched_files_in_level = sum(1 for k in items if not d[k])
|
|
398
|
+
matched_dirs_in_level = sum(1 for k in items if d[k])
|
|
399
|
+
if not prefix:
|
|
400
|
+
tree_lines.append(
|
|
401
|
+
format_dir_name_search(
|
|
402
|
+
root_dir.name, matched_files_in_level, matched_dirs_in_level
|
|
403
|
+
)
|
|
404
|
+
)
|
|
405
|
+
for i, name in enumerate(items):
|
|
406
|
+
is_last = i == num_children - 1
|
|
407
|
+
connector = style.l_connector if is_last else style.t_connector
|
|
408
|
+
entry_name = name
|
|
409
|
+
if d[name]:
|
|
410
|
+
child_matched_files = sum(1 for k in d[name] if not d[name][k])
|
|
411
|
+
child_matched_dirs = sum(1 for k in d[name] if d[name][k])
|
|
412
|
+
entry_name = format_dir_name_search(
|
|
413
|
+
name, child_matched_files, child_matched_dirs
|
|
414
|
+
)
|
|
415
|
+
tree_lines.append(prefix + connector + entry_name)
|
|
416
|
+
if d[name]:
|
|
417
|
+
extension = style.h_spacer if is_last else style.v_connector
|
|
418
|
+
build_lines(d[name], prefix + extension)
|
|
419
|
+
|
|
420
|
+
build_lines(tree_dict, "")
|
|
421
|
+
return tree_lines
|
|
422
|
+
|
|
423
|
+
|
|
424
|
+
# --- Collation and Main Modes ---
|
|
425
|
+
|
|
426
|
+
|
|
427
|
+
def _collate_content_to_file(
|
|
428
|
+
output_file_path_str: str,
|
|
429
|
+
tree_content_lines: Optional[List[str]],
|
|
430
|
+
files_to_process: List[FileToProcess],
|
|
431
|
+
encoding: str,
|
|
432
|
+
separator_char: str,
|
|
433
|
+
separator_line_len: int,
|
|
434
|
+
show_token_count: bool,
|
|
435
|
+
show_tree_stats: bool,
|
|
436
|
+
mode: ProjectMode,
|
|
437
|
+
) -> None:
|
|
438
|
+
"""
|
|
439
|
+
Collates content to a string buffer, calculates token count,
|
|
440
|
+
and then writes to the output file.
|
|
441
|
+
"""
|
|
442
|
+
output_file_path = Path(output_file_path_str).resolve()
|
|
443
|
+
output_file_path.parent.mkdir(parents=True, exist_ok=True)
|
|
444
|
+
separator_line = separator_char * separator_line_len
|
|
445
|
+
|
|
446
|
+
# Use an in-memory buffer to build the output first
|
|
447
|
+
buffer = StringIO()
|
|
448
|
+
|
|
449
|
+
if tree_content_lines:
|
|
450
|
+
buffer.write(f"{TREE_HEADER_TEXT}\n{separator_line}\n\n")
|
|
451
|
+
stats_key = ""
|
|
452
|
+
if show_tree_stats:
|
|
453
|
+
if mode == ProjectMode.FILTER:
|
|
454
|
+
stats_key = (
|
|
455
|
+
"Key: [I: Included f/d | T: Total f/d in original dir]\n"
|
|
456
|
+
" (f=files, d=directories)\n\n"
|
|
457
|
+
)
|
|
458
|
+
else: # ProjectMode.SEARCH
|
|
459
|
+
stats_key = (
|
|
460
|
+
"Key: [M: Matched files/dirs]\n"
|
|
461
|
+
" (f=files, d=directories)\n\n"
|
|
462
|
+
)
|
|
463
|
+
buffer.write(stats_key)
|
|
464
|
+
tree_content = "\n".join(tree_content_lines)
|
|
465
|
+
buffer.write(tree_content + "\n")
|
|
466
|
+
buffer.write(f"\n{separator_line}\n\n")
|
|
467
|
+
|
|
468
|
+
for file_info in files_to_process:
|
|
469
|
+
header_content = f"{separator_line}\n{FILE_HEADER_PREFIX}{file_info.relative_path_posix}\n{separator_line}\n\n"
|
|
470
|
+
buffer.write(header_content)
|
|
471
|
+
try:
|
|
472
|
+
with open(
|
|
473
|
+
file_info.absolute_path, "r", encoding=encoding, errors="replace"
|
|
474
|
+
) as infile:
|
|
475
|
+
file_content = infile.read()
|
|
476
|
+
buffer.write(file_content)
|
|
477
|
+
buffer.write("\n\n")
|
|
478
|
+
except Exception:
|
|
479
|
+
buffer.write(
|
|
480
|
+
f"Error: Could not read file '{file_info.relative_path_posix}'.\n\n"
|
|
481
|
+
)
|
|
482
|
+
|
|
483
|
+
if not files_to_process and not tree_content_lines:
|
|
484
|
+
buffer.write(
|
|
485
|
+
"No files found matching the specified criteria for content aggregation.\n"
|
|
486
|
+
)
|
|
487
|
+
|
|
488
|
+
# Get the complete content from the buffer
|
|
489
|
+
final_content = buffer.getvalue()
|
|
490
|
+
total_token_count = 0
|
|
491
|
+
mode_display = "Characters" if TOKEN_APPROX_MODE == "CHAR_COUNT" else "Words"
|
|
492
|
+
|
|
493
|
+
if show_token_count:
|
|
494
|
+
if TOKEN_APPROX_MODE == "CHAR_COUNT":
|
|
495
|
+
total_token_count = len(final_content)
|
|
496
|
+
elif TOKEN_APPROX_MODE == "WORD_COUNT":
|
|
497
|
+
total_token_count = len(final_content.split())
|
|
498
|
+
|
|
499
|
+
# Now, write everything to the actual file
|
|
500
|
+
try:
|
|
501
|
+
with open(output_file_path, "w", encoding=encoding) as outfile:
|
|
502
|
+
if show_token_count:
|
|
503
|
+
# Add the token count at the top of the file as requested
|
|
504
|
+
outfile.write(f"Token Count ({mode_display}): {total_token_count}\n\n")
|
|
505
|
+
|
|
506
|
+
# Write the main content
|
|
507
|
+
outfile.write(final_content)
|
|
508
|
+
except IOError as e:
|
|
509
|
+
print(f"Error: Could not write to output file '{output_file_path}': {e}")
|
|
510
|
+
return
|
|
511
|
+
|
|
512
|
+
# Final console output remains for user feedback
|
|
513
|
+
print(f"\nProcess complete. Output written to: {output_file_path}")
|
|
514
|
+
if show_token_count:
|
|
515
|
+
print(f"Total Approximated Tokens ({mode_display}): {total_token_count}")
|
|
516
|
+
if len(files_to_process) > 0:
|
|
517
|
+
print(
|
|
518
|
+
f"Summary: {len(files_to_process)} files selected for content processing."
|
|
519
|
+
)
|
|
520
|
+
|
|
521
|
+
|
|
522
|
+
def filter_and_append_content(
|
|
523
|
+
root_dir: Path,
|
|
524
|
+
output_file_path_str: str,
|
|
525
|
+
tree_style: TreeStyle,
|
|
526
|
+
generate_tree: bool,
|
|
527
|
+
file_types: Optional[List[str]],
|
|
528
|
+
whitelist_substrings_in_filename: Optional[List[str]],
|
|
529
|
+
ignore_substrings_in_filename: Optional[List[str]],
|
|
530
|
+
ignore_dirs_in_path: Optional[List[str]],
|
|
531
|
+
language_presets: Optional[List[LanguagePreset]],
|
|
532
|
+
ignore_presets: Optional[List[IgnorePreset]],
|
|
533
|
+
encoding: str,
|
|
534
|
+
separator_char: str,
|
|
535
|
+
separator_line_len: int,
|
|
536
|
+
show_token_count: bool,
|
|
537
|
+
show_tree_stats: bool,
|
|
538
|
+
) -> None:
|
|
539
|
+
"""FILTER MODE: Selects files based on explicit criteria and prepares content/tree."""
|
|
540
|
+
criteria = FilterCriteria.normalize_inputs(
|
|
541
|
+
file_types,
|
|
542
|
+
whitelist_substrings_in_filename,
|
|
543
|
+
ignore_substrings_in_filename,
|
|
544
|
+
ignore_dirs_in_path,
|
|
545
|
+
language_presets,
|
|
546
|
+
ignore_presets,
|
|
547
|
+
)
|
|
548
|
+
tree_content_lines: Optional[List[str]] = (
|
|
549
|
+
_generate_tree_lines(root_dir, criteria, tree_style, show_tree_stats)
|
|
550
|
+
if generate_tree
|
|
551
|
+
else None
|
|
552
|
+
)
|
|
553
|
+
files_to_process: List[FileToProcess] = []
|
|
554
|
+
for dirpath_str, dirnames, filenames in os.walk(str(root_dir), topdown=True):
|
|
555
|
+
current_dir_path = Path(dirpath_str)
|
|
556
|
+
orig_dirnames = list(dirnames)
|
|
557
|
+
dirnames[:] = []
|
|
558
|
+
for d_name in orig_dirnames:
|
|
559
|
+
dir_abs_path = current_dir_path / d_name
|
|
560
|
+
if _should_include_entry(dir_abs_path, root_dir, criteria, is_dir=True):
|
|
561
|
+
dirnames.append(d_name)
|
|
562
|
+
for filename in filenames:
|
|
563
|
+
file_abs_path = current_dir_path / filename
|
|
564
|
+
if _should_include_entry(file_abs_path, root_dir, criteria, is_dir=False):
|
|
565
|
+
files_to_process.append(
|
|
566
|
+
FileToProcess(
|
|
567
|
+
file_abs_path, file_abs_path.relative_to(root_dir).as_posix()
|
|
568
|
+
)
|
|
569
|
+
)
|
|
570
|
+
files_to_process.sort(key=lambda f_info: f_info.relative_path_posix.lower())
|
|
571
|
+
_collate_content_to_file(
|
|
572
|
+
output_file_path_str,
|
|
573
|
+
tree_content_lines,
|
|
574
|
+
files_to_process,
|
|
575
|
+
encoding,
|
|
576
|
+
separator_char,
|
|
577
|
+
separator_line_len,
|
|
578
|
+
show_token_count,
|
|
579
|
+
show_tree_stats,
|
|
580
|
+
ProjectMode.FILTER,
|
|
581
|
+
)
|
|
582
|
+
|
|
583
|
+
|
|
584
|
+
def search_and_collate_content(
|
|
585
|
+
root_dir: Path,
|
|
586
|
+
sub_string_match: List[str],
|
|
587
|
+
output_file: str,
|
|
588
|
+
tree_style: TreeStyle,
|
|
589
|
+
file_extensions_to_check: Optional[List[str]],
|
|
590
|
+
ignore_substrings_in_path: Optional[List[str]],
|
|
591
|
+
language_presets: Optional[List[LanguagePreset]],
|
|
592
|
+
ignore_presets: Optional[List[IgnorePreset]],
|
|
593
|
+
search_file_contents: bool,
|
|
594
|
+
max_workers: Optional[int],
|
|
595
|
+
full_path_compare: bool,
|
|
596
|
+
show_token_count: bool,
|
|
597
|
+
show_tree_stats: bool,
|
|
598
|
+
) -> None:
|
|
599
|
+
"""SEARCH MODE: Scans for files that match a substring in their path/name or content."""
|
|
600
|
+
criteria = FilterCriteria.normalize_inputs(
|
|
601
|
+
file_extensions_to_check,
|
|
602
|
+
None,
|
|
603
|
+
None,
|
|
604
|
+
ignore_substrings_in_path,
|
|
605
|
+
language_presets,
|
|
606
|
+
ignore_presets,
|
|
607
|
+
)
|
|
608
|
+
normalized_keywords = [
|
|
609
|
+
sub.lower().strip() for sub in sub_string_match if sub.strip()
|
|
610
|
+
]
|
|
611
|
+
if not normalized_keywords:
|
|
612
|
+
print("Error: Search mode requires 'search_keywords' to be provided.")
|
|
613
|
+
return
|
|
614
|
+
candidate_files: List[Path] = []
|
|
615
|
+
for dirpath_str, dirnames, filenames in os.walk(str(root_dir), topdown=True):
|
|
616
|
+
current_dir_path = Path(dirpath_str)
|
|
617
|
+
dirnames[:] = [
|
|
618
|
+
d for d in dirnames if d.lower() not in criteria.ignore_path_components
|
|
619
|
+
]
|
|
620
|
+
for filename in filenames:
|
|
621
|
+
file_abs_path = current_dir_path / filename
|
|
622
|
+
if (
|
|
623
|
+
file_abs_path.suffix.lower() in criteria.file_extensions
|
|
624
|
+
or not criteria.file_extensions
|
|
625
|
+
):
|
|
626
|
+
candidate_files.append(file_abs_path)
|
|
627
|
+
matched_files: Set[Path] = set()
|
|
628
|
+
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
629
|
+
future_to_file = {
|
|
630
|
+
executor.submit(
|
|
631
|
+
process_file_for_search,
|
|
632
|
+
file,
|
|
633
|
+
normalized_keywords,
|
|
634
|
+
search_file_contents,
|
|
635
|
+
full_path_compare,
|
|
636
|
+
): file
|
|
637
|
+
for file in candidate_files
|
|
638
|
+
}
|
|
639
|
+
progress_bar = tqdm(
|
|
640
|
+
as_completed(future_to_file),
|
|
641
|
+
total=len(candidate_files),
|
|
642
|
+
unit="file",
|
|
643
|
+
desc="Scanning",
|
|
644
|
+
)
|
|
645
|
+
for future in progress_bar:
|
|
646
|
+
result = future.result()
|
|
647
|
+
if result:
|
|
648
|
+
matched_files.add(result)
|
|
649
|
+
if not matched_files:
|
|
650
|
+
print("\nScan complete. No matching files were found.")
|
|
651
|
+
_collate_content_to_file(
|
|
652
|
+
output_file,
|
|
653
|
+
None,
|
|
654
|
+
[],
|
|
655
|
+
DEFAULT_ENCODING,
|
|
656
|
+
DEFAULT_SEPARATOR_CHAR,
|
|
657
|
+
DEFAULT_SEPARATOR_LINE_LENGTH,
|
|
658
|
+
show_token_count,
|
|
659
|
+
show_tree_stats,
|
|
660
|
+
ProjectMode.SEARCH,
|
|
661
|
+
)
|
|
662
|
+
return
|
|
663
|
+
sorted_matched_files = sorted(
|
|
664
|
+
list(matched_files), key=lambda p: p.relative_to(root_dir).as_posix().lower()
|
|
665
|
+
)
|
|
666
|
+
tree_content_lines = _generate_tree_from_paths(
|
|
667
|
+
root_dir, sorted_matched_files, tree_style, show_tree_stats
|
|
668
|
+
)
|
|
669
|
+
files_to_process = [
|
|
670
|
+
FileToProcess(f, f.relative_to(root_dir).as_posix())
|
|
671
|
+
for f in sorted_matched_files
|
|
672
|
+
]
|
|
673
|
+
_collate_content_to_file(
|
|
674
|
+
output_file,
|
|
675
|
+
tree_content_lines,
|
|
676
|
+
files_to_process,
|
|
677
|
+
DEFAULT_ENCODING,
|
|
678
|
+
DEFAULT_SEPARATOR_CHAR,
|
|
679
|
+
DEFAULT_SEPARATOR_LINE_LENGTH,
|
|
680
|
+
show_token_count,
|
|
681
|
+
show_tree_stats,
|
|
682
|
+
ProjectMode.SEARCH,
|
|
683
|
+
)
|
|
684
|
+
|
|
685
|
+
|
|
686
|
+
# --- DECONSTRUCTION FUNCTION ---
|
|
687
|
+
|
|
688
|
+
|
|
689
|
+
def deconstruct_snapshot(snapshot_file_path: str) -> Dict[str, Any]:
|
|
690
|
+
"""Scans a compiled snapshot file, extracts the directory tree lines and file paths."""
|
|
691
|
+
snapshot_path = Path(snapshot_file_path)
|
|
692
|
+
if not snapshot_path.is_file():
|
|
693
|
+
raise FileNotFoundError(f"Snapshot file not found: {snapshot_file_path}")
|
|
694
|
+
tree_lines: List[str] = []
|
|
695
|
+
file_paths: List[str] = []
|
|
696
|
+
separator_pattern = re.compile(
|
|
697
|
+
r"^[{}]{{4,}}[{}|]*$".format(
|
|
698
|
+
re.escape(DEFAULT_SEPARATOR_CHAR), re.escape(DEFAULT_SEPARATOR_CHAR)
|
|
699
|
+
)
|
|
700
|
+
)
|
|
701
|
+
state = "LOOKING_FOR_TREE"
|
|
702
|
+
with open(snapshot_path, "r", encoding=DEFAULT_ENCODING, errors="replace") as f:
|
|
703
|
+
for line in f:
|
|
704
|
+
line = line.strip()
|
|
705
|
+
if state == "LOOKING_FOR_TREE":
|
|
706
|
+
if line == TREE_HEADER_TEXT:
|
|
707
|
+
state = "READING_TREE"
|
|
708
|
+
elif state == "READING_TREE":
|
|
709
|
+
if not line or separator_pattern.match(line):
|
|
710
|
+
if tree_lines and separator_pattern.match(line):
|
|
711
|
+
state = "LOOKING_FOR_CONTENT"
|
|
712
|
+
continue
|
|
713
|
+
if state == "READING_TREE" and not line.startswith("Key:"):
|
|
714
|
+
tree_lines.append(line)
|
|
715
|
+
elif state == "LOOKING_FOR_CONTENT":
|
|
716
|
+
if line.startswith(FILE_HEADER_PREFIX):
|
|
717
|
+
file_paths.append(line[len(FILE_HEADER_PREFIX) :].strip())
|
|
718
|
+
state = "READING_CONTENT"
|
|
719
|
+
elif state == "READING_CONTENT":
|
|
720
|
+
if line.startswith(FILE_HEADER_PREFIX):
|
|
721
|
+
file_paths.append(line[len(FILE_HEADER_PREFIX) :].strip())
|
|
722
|
+
# Post-process to remove the key lines if they were accidentally captured
|
|
723
|
+
tree_lines = [
|
|
724
|
+
line
|
|
725
|
+
for line in tree_lines
|
|
726
|
+
if not line.strip().startswith("Key:")
|
|
727
|
+
and not line.strip().startswith("(f=files")
|
|
728
|
+
]
|
|
729
|
+
return {"tree_lines": tree_lines, "file_paths": file_paths}
|
|
730
|
+
|
|
731
|
+
|
|
732
|
+
# --- UNIFIED ENTRY POINT AND UTILITY WRAPPERS ---
|
|
733
|
+
|
|
734
|
+
|
|
735
|
+
def process_project(
|
|
736
|
+
root_dir_param: Optional[str] = None,
|
|
737
|
+
output_file_name: str = "project_output.txt",
|
|
738
|
+
mode: ProjectMode = ProjectMode.FILTER,
|
|
739
|
+
file_types: Optional[List[str]] = None,
|
|
740
|
+
ignore_dirs_in_path: Optional[List[str]] = None,
|
|
741
|
+
language_presets: Optional[List[LanguagePreset]] = None,
|
|
742
|
+
ignore_presets: Optional[List[IgnorePreset]] = None,
|
|
743
|
+
whitelist_filename_substrings: Optional[List[str]] = None,
|
|
744
|
+
ignore_filename_substrings: Optional[List[str]] = None,
|
|
745
|
+
generate_tree: bool = True,
|
|
746
|
+
search_keywords: Optional[List[str]] = None,
|
|
747
|
+
search_file_contents: bool = False,
|
|
748
|
+
full_path_compare: bool = True,
|
|
749
|
+
max_workers: Optional[int] = None,
|
|
750
|
+
tree_style_preset: TreeStylePreset = TreeStylePreset.UNICODE,
|
|
751
|
+
tree_style_t_connector: Optional[str] = None,
|
|
752
|
+
tree_style_l_connector: Optional[str] = None,
|
|
753
|
+
tree_style_v_connector: Optional[str] = None,
|
|
754
|
+
tree_style_h_spacer: Optional[str] = None,
|
|
755
|
+
show_token_count: bool = False,
|
|
756
|
+
show_tree_stats: bool = False,
|
|
757
|
+
encoding: str = DEFAULT_ENCODING,
|
|
758
|
+
separator_char: str = DEFAULT_SEPARATOR_CHAR,
|
|
759
|
+
separator_line_len: int = DEFAULT_SEPARATOR_LINE_LENGTH,
|
|
760
|
+
) -> None:
|
|
761
|
+
"""Main function to process a project directory in either FILTER or SEARCH mode."""
|
|
762
|
+
actual_root_dir = validate_root_directory(root_dir_param)
|
|
763
|
+
if actual_root_dir is None:
|
|
764
|
+
sys.exit(1)
|
|
765
|
+
style = tree_style_preset.to_style()
|
|
766
|
+
final_style = TreeStyle(
|
|
767
|
+
t_connector=tree_style_t_connector or style.t_connector,
|
|
768
|
+
l_connector=tree_style_l_connector or style.l_connector,
|
|
769
|
+
v_connector=tree_style_v_connector or style.v_connector,
|
|
770
|
+
h_spacer=tree_style_h_spacer or style.h_spacer,
|
|
771
|
+
)
|
|
772
|
+
print(f"--- Starting Project Processing in {mode.name} Mode ---")
|
|
773
|
+
if mode == ProjectMode.FILTER:
|
|
774
|
+
filter_and_append_content(
|
|
775
|
+
actual_root_dir,
|
|
776
|
+
output_file_name,
|
|
777
|
+
final_style,
|
|
778
|
+
generate_tree,
|
|
779
|
+
file_types,
|
|
780
|
+
whitelist_filename_substrings,
|
|
781
|
+
ignore_filename_substrings,
|
|
782
|
+
ignore_dirs_in_path,
|
|
783
|
+
language_presets,
|
|
784
|
+
ignore_presets,
|
|
785
|
+
encoding,
|
|
786
|
+
separator_char,
|
|
787
|
+
separator_line_len,
|
|
788
|
+
show_token_count,
|
|
789
|
+
show_tree_stats,
|
|
790
|
+
)
|
|
791
|
+
elif mode == ProjectMode.SEARCH:
|
|
792
|
+
if not search_keywords:
|
|
793
|
+
print("Error: Search mode requires 'search_keywords' to be provided.")
|
|
794
|
+
return
|
|
795
|
+
search_and_collate_content(
|
|
796
|
+
actual_root_dir,
|
|
797
|
+
search_keywords,
|
|
798
|
+
output_file_name,
|
|
799
|
+
final_style,
|
|
800
|
+
file_types,
|
|
801
|
+
ignore_dirs_in_path,
|
|
802
|
+
language_presets,
|
|
803
|
+
ignore_presets,
|
|
804
|
+
search_file_contents,
|
|
805
|
+
max_workers,
|
|
806
|
+
full_path_compare,
|
|
807
|
+
show_token_count,
|
|
808
|
+
show_tree_stats,
|
|
809
|
+
)
|
|
810
|
+
print("--- Script Execution Finished ---")
|
|
811
|
+
|
|
812
|
+
|
|
813
|
+
def filter_project(
|
|
814
|
+
root_dir_param: Optional[str] = None,
|
|
815
|
+
output_file_name: str = "project_filter_output.txt",
|
|
816
|
+
file_types: Optional[List[str]] = None,
|
|
817
|
+
ignore_dirs_in_path: Optional[List[str]] = None,
|
|
818
|
+
language_presets: Optional[List[LanguagePreset]] = None,
|
|
819
|
+
ignore_presets: Optional[List[IgnorePreset]] = None,
|
|
820
|
+
whitelist_filename_substrings: Optional[List[str]] = None,
|
|
821
|
+
ignore_filename_substrings: Optional[List[str]] = None,
|
|
822
|
+
generate_tree: bool = True,
|
|
823
|
+
tree_style_preset: TreeStylePreset = TreeStylePreset.UNICODE,
|
|
824
|
+
tree_style_t_connector: Optional[str] = None,
|
|
825
|
+
tree_style_l_connector: Optional[str] = None,
|
|
826
|
+
tree_style_v_connector: Optional[str] = None,
|
|
827
|
+
tree_style_h_spacer: Optional[str] = None,
|
|
828
|
+
show_token_count: bool = False,
|
|
829
|
+
show_tree_stats: bool = False,
|
|
830
|
+
encoding: str = DEFAULT_ENCODING,
|
|
831
|
+
separator_char: str = DEFAULT_SEPARATOR_CHAR,
|
|
832
|
+
separator_line_len: int = DEFAULT_SEPARATOR_LINE_LENGTH,
|
|
833
|
+
) -> None:
|
|
834
|
+
"""Utility wrapper for process_project in FILTER mode."""
|
|
835
|
+
process_project(
|
|
836
|
+
root_dir_param=root_dir_param,
|
|
837
|
+
output_file_name=output_file_name,
|
|
838
|
+
mode=ProjectMode.FILTER,
|
|
839
|
+
file_types=file_types,
|
|
840
|
+
ignore_dirs_in_path=ignore_dirs_in_path,
|
|
841
|
+
language_presets=language_presets,
|
|
842
|
+
ignore_presets=ignore_presets,
|
|
843
|
+
whitelist_filename_substrings=whitelist_filename_substrings,
|
|
844
|
+
ignore_filename_substrings=ignore_filename_substrings,
|
|
845
|
+
generate_tree=generate_tree,
|
|
846
|
+
tree_style_preset=tree_style_preset,
|
|
847
|
+
tree_style_t_connector=tree_style_t_connector,
|
|
848
|
+
tree_style_l_connector=tree_style_l_connector,
|
|
849
|
+
tree_style_v_connector=tree_style_v_connector,
|
|
850
|
+
tree_style_h_spacer=tree_style_h_spacer,
|
|
851
|
+
show_token_count=show_token_count,
|
|
852
|
+
show_tree_stats=show_tree_stats,
|
|
853
|
+
encoding=encoding,
|
|
854
|
+
separator_char=separator_char,
|
|
855
|
+
separator_line_len=separator_line_len,
|
|
856
|
+
)
|
|
857
|
+
|
|
858
|
+
|
|
859
|
+
def find_in_project(
|
|
860
|
+
root_dir_param: Optional[str] = None,
|
|
861
|
+
output_file_name: str = "project_search_output.txt",
|
|
862
|
+
search_keywords: Optional[List[str]] = None,
|
|
863
|
+
file_extensions_to_check: Optional[List[str]] = None,
|
|
864
|
+
ignore_dirs_in_path: Optional[List[str]] = None,
|
|
865
|
+
language_presets: Optional[List[LanguagePreset]] = None,
|
|
866
|
+
ignore_presets: Optional[List[IgnorePreset]] = None,
|
|
867
|
+
search_file_contents: bool = False,
|
|
868
|
+
full_path_compare: bool = True,
|
|
869
|
+
max_workers: Optional[int] = None,
|
|
870
|
+
tree_style_preset: TreeStylePreset = TreeStylePreset.UNICODE,
|
|
871
|
+
tree_style_t_connector: Optional[str] = None,
|
|
872
|
+
tree_style_l_connector: Optional[str] = None,
|
|
873
|
+
tree_style_v_connector: Optional[str] = None,
|
|
874
|
+
tree_style_h_spacer: Optional[str] = None,
|
|
875
|
+
show_token_count: bool = False,
|
|
876
|
+
show_tree_stats: bool = False,
|
|
877
|
+
encoding: str = DEFAULT_ENCODING,
|
|
878
|
+
separator_char: str = DEFAULT_SEPARATOR_CHAR,
|
|
879
|
+
separator_line_len: int = DEFAULT_SEPARATOR_LINE_LENGTH,
|
|
880
|
+
) -> None:
|
|
881
|
+
"""Utility wrapper for process_project in SEARCH mode."""
|
|
882
|
+
if not search_keywords:
|
|
883
|
+
print("Error: 'search_keywords' must be provided for find_in_project.")
|
|
884
|
+
return
|
|
885
|
+
process_project(
|
|
886
|
+
root_dir_param=root_dir_param,
|
|
887
|
+
output_file_name=output_file_name,
|
|
888
|
+
mode=ProjectMode.SEARCH,
|
|
889
|
+
file_types=file_extensions_to_check,
|
|
890
|
+
ignore_dirs_in_path=ignore_dirs_in_path,
|
|
891
|
+
language_presets=language_presets,
|
|
892
|
+
ignore_presets=ignore_presets,
|
|
893
|
+
search_keywords=search_keywords,
|
|
894
|
+
search_file_contents=search_file_contents,
|
|
895
|
+
full_path_compare=full_path_compare,
|
|
896
|
+
max_workers=max_workers,
|
|
897
|
+
tree_style_preset=tree_style_preset,
|
|
898
|
+
tree_style_t_connector=tree_style_t_connector,
|
|
899
|
+
tree_style_l_connector=tree_style_l_connector,
|
|
900
|
+
tree_style_v_connector=tree_style_v_connector,
|
|
901
|
+
tree_style_h_spacer=tree_style_h_spacer,
|
|
902
|
+
show_token_count=show_token_count,
|
|
903
|
+
show_tree_stats=show_tree_stats,
|
|
904
|
+
encoding=encoding,
|
|
905
|
+
separator_char=separator_char,
|
|
906
|
+
separator_line_len=separator_line_len,
|
|
907
|
+
)
|
|
908
|
+
|
|
909
|
+
|
|
910
|
+
__all__ = [
|
|
911
|
+
"process_project",
|
|
912
|
+
"filter_project",
|
|
913
|
+
"find_in_project",
|
|
914
|
+
"deconstruct_snapshot",
|
|
915
|
+
"ProjectMode",
|
|
916
|
+
"LanguagePreset",
|
|
917
|
+
"IgnorePreset",
|
|
918
|
+
"TreeStylePreset",
|
|
919
|
+
]
|
|
920
|
+
|
|
921
|
+
if __name__ == "__main__":
|
|
922
|
+
# --- Example: Scan with Custom Filters and the New Readable Stats ---
|
|
923
|
+
print("\n--- Running a custom filter scan with new stats format ---")
|
|
924
|
+
filter_project(
|
|
925
|
+
root_dir_param=".",
|
|
926
|
+
output_file_name="custom_snapshot_readable.txt",
|
|
927
|
+
file_types=[".py", "requirements.txt", ".sql", ".md"],
|
|
928
|
+
ignore_dirs_in_path=["venv", "build", "node_modules", "static", "templates"],
|
|
929
|
+
show_tree_stats=True,
|
|
930
|
+
show_token_count=True,
|
|
931
|
+
)
|
dirshot/examples.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
from src.dirshot.dirshot import *
|
|
2
|
+
|
|
3
|
+
if __name__ == "__main__":
|
|
4
|
+
# To run a specific example, make sure it is NOT commented out,
|
|
5
|
+
# and the other examples ARE commented out.
|
|
6
|
+
|
|
7
|
+
# --- Example 1: Search with NO Presets (Custom Filters) ---
|
|
8
|
+
# Goal: Find the words "API" or "Controller" inside any .java or .js file,
|
|
9
|
+
# while manually ignoring common dependency/build folders.
|
|
10
|
+
# print("\n--- Example 1: Running a custom search with NO presets ---")
|
|
11
|
+
# find_in_project(
|
|
12
|
+
# root_dir_param="example_project",
|
|
13
|
+
# output_file_name="search_custom_results.txt",
|
|
14
|
+
# search_keywords=["API", "Controller"],
|
|
15
|
+
|
|
16
|
+
# # --- NO language_presets ---
|
|
17
|
+
# # Manually define which file types to scan
|
|
18
|
+
# file_extensions_to_check=[".java", ".js"],
|
|
19
|
+
|
|
20
|
+
# # --- NO ignore_presets ---
|
|
21
|
+
# # Manually define which directories to skip
|
|
22
|
+
# ignore_dirs_in_path=["node_modules", "build", "venv"],
|
|
23
|
+
|
|
24
|
+
# search_file_contents=True,
|
|
25
|
+
# show_tree_stats=True,
|
|
26
|
+
# show_token_count=True,
|
|
27
|
+
# )
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
# # --- Example 2: Search with Python Presets ---
|
|
31
|
+
# # Goal: Find the word "Flask" inside any Python-related file.
|
|
32
|
+
# # The presets will automatically handle file types and ignore folders like 'venv'.
|
|
33
|
+
# print("\n--- Example 2: Running a search with Python presets ---")
|
|
34
|
+
# find_in_project(
|
|
35
|
+
# root_dir_param=".",
|
|
36
|
+
# output_file_name="search_python_preset_results.txt",
|
|
37
|
+
# search_keywords=[""],
|
|
38
|
+
|
|
39
|
+
# # Use presets for convenience
|
|
40
|
+
# language_presets=[LanguagePreset.PYTHON],
|
|
41
|
+
# ignore_presets=[IgnorePreset.PYTHON_ENV],
|
|
42
|
+
|
|
43
|
+
# search_file_contents=True,
|
|
44
|
+
# show_tree_stats=True,
|
|
45
|
+
# show_token_count=True,
|
|
46
|
+
# )
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
filter_project(
|
|
50
|
+
root_dir_param=".",
|
|
51
|
+
output_file_name="snapshot.txt",
|
|
52
|
+
file_types=[".py"],
|
|
53
|
+
# Use presets to define the scope of the snapshot
|
|
54
|
+
# language_presets=[LanguagePreset.PYTHON],
|
|
55
|
+
|
|
56
|
+
ignore_dirs_in_path=[".git"],
|
|
57
|
+
ignore_presets=[
|
|
58
|
+
IgnorePreset.PYTHON_ENV,
|
|
59
|
+
IgnorePreset.NODE_MODULES,
|
|
60
|
+
IgnorePreset.BUILD_ARTIFACTS,
|
|
61
|
+
],
|
|
62
|
+
|
|
63
|
+
show_tree_stats=False,
|
|
64
|
+
show_token_count=True,
|
|
65
|
+
)
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: dirshot
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A flexible utility for creating project snapshots and searching for files.
|
|
5
|
+
Author-email: Your Name <youremail@example.com>
|
|
6
|
+
Project-URL: Homepage, https://github.com/yourusername/dirshot
|
|
7
|
+
Project-URL: Bug Tracker, https://github.com/yourusername/dirshot/issues
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Requires-Python: >=3.7
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
Requires-Dist: tqdm
|
|
14
|
+
|
|
15
|
+
# Dirshot: A Flexible Project Snapshot and Search Tool
|
|
16
|
+
|
|
17
|
+
Dirshot is a Python utility that creates snapshots of a project's directory structure and file contents. It can operate in two modes: filtering files based on their type and path, or searching for files based on keywords in their name or content.
|
|
18
|
+
|
|
19
|
+
The script generates a single output file containing a directory tree and the concatenated text of the selected files. This is useful for quickly gathering project context for code analysis, sharing with collaborators, or providing to a Large Language Model (LLM).
|
|
20
|
+
|
|
21
|
+
## Key Features
|
|
22
|
+
|
|
23
|
+
* **Two Operating Modes**:
|
|
24
|
+
* **Filter Mode**: Create a snapshot of your project by filtering files based on extensions, filenames, and directory paths.
|
|
25
|
+
* **Search Mode**: Search for files containing specific keywords in their name, path, or content.
|
|
26
|
+
* **Customizable Filtering**:
|
|
27
|
+
* Use language presets for popular languages (Python, JavaScript, Java, etc.).
|
|
28
|
+
* Use ignore presets to exclude common files and directories (e.g., `.git`, `node_modules`, `__pycache__`).
|
|
29
|
+
* Define custom file types, and whitelist/blacklist substrings in filenames and paths.
|
|
30
|
+
* **Flexible Tree Generation**:
|
|
31
|
+
* Display a directory tree in various styles (Unicode, ASCII, Compact).
|
|
32
|
+
* Show statistics for included/matched files and directories in the tree.
|
|
33
|
+
* **Content Collation**:
|
|
34
|
+
* Concatenates the content of all selected files into a single output file.
|
|
35
|
+
* Optionally display an approximated token/character count.
|
|
36
|
+
* **Snapshot Deconstruction**:
|
|
37
|
+
* A utility function to parse a generated snapshot file and extract the directory tree and file paths.
|
|
38
|
+
|
|
39
|
+
## Installation
|
|
40
|
+
|
|
41
|
+
You can install Dirshot from PyPI:
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
pip install dirshot
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
## Usage
|
|
50
|
+
|
|
51
|
+
Here are some examples of how to use Dirshot in your own Python scripts.
|
|
52
|
+
|
|
53
|
+
#### Example 1: Creating a Snapshot with Presets (Filter Mode)
|
|
54
|
+
|
|
55
|
+
This example creates a snapshot of a Python project, ignoring common virtual environment and build directories.
|
|
56
|
+
|
|
57
|
+
```python
|
|
58
|
+
from dirshot import filter_project, LanguagePreset, IgnorePreset
|
|
59
|
+
|
|
60
|
+
filter_project(
|
|
61
|
+
root_dir_param=".",
|
|
62
|
+
output_file_name="project_snapshot.txt",
|
|
63
|
+
language_presets=[LanguagePreset.PYTHON],
|
|
64
|
+
ignore_presets=[
|
|
65
|
+
IgnorePreset.PYTHON_ENV,
|
|
66
|
+
IgnorePreset.NODE_MODULES,
|
|
67
|
+
IgnorePreset.BUILD_ARTIFACTS,
|
|
68
|
+
],
|
|
69
|
+
show_token_count=True,
|
|
70
|
+
)
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
#### Example 2: Searching for Keywords in a Project (Search Mode)
|
|
74
|
+
|
|
75
|
+
This example searches for the keywords "API" or "Controller" within `.java` and `.js` files.
|
|
76
|
+
|
|
77
|
+
```python
|
|
78
|
+
from dirshot import find_in_project
|
|
79
|
+
|
|
80
|
+
find_in_project(
|
|
81
|
+
root_dir_param="example_project",
|
|
82
|
+
output_file_name="search_results.txt",
|
|
83
|
+
search_keywords=["API", "Controller"],
|
|
84
|
+
file_extensions_to_check=[".java", ".js"],
|
|
85
|
+
ignore_dirs_in_path=["node_modules", "build"],
|
|
86
|
+
search_file_contents=True,
|
|
87
|
+
show_tree_stats=True,
|
|
88
|
+
)
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
### Deconstructing a Snapshot
|
|
92
|
+
|
|
93
|
+
You can also parse a previously generated snapshot file to extract the directory structure and the list of included files.
|
|
94
|
+
|
|
95
|
+
```python
|
|
96
|
+
from dirshot import deconstruct_snapshot
|
|
97
|
+
|
|
98
|
+
snapshot_data = deconstruct_snapshot("project_snapshot.txt")
|
|
99
|
+
print("Directory Tree:")
|
|
100
|
+
for line in snapshot_data["tree_lines"]:
|
|
101
|
+
print(line)
|
|
102
|
+
|
|
103
|
+
print("\nIncluded Files:")
|
|
104
|
+
for file_path in snapshot_data["file_paths"]:
|
|
105
|
+
print(file_path)
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
## Contributing
|
|
109
|
+
|
|
110
|
+
Contributions are welcome! Please feel free to submit a pull request or open an issue on the project's GitHub repository.
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
dirshot/__init__.py,sha256=ss4HC5VTyD9j6GFGCLMU6VxPlXy0qaGFzXlZB3_d2WM,403
|
|
2
|
+
dirshot/dirshot.py,sha256=ItCwC4BsSbPzBLlHddiFlYsqdB3Hh3PEpwN89EuplIc,34693
|
|
3
|
+
dirshot/examples.py,sha256=q--iNqxmA4xX8nyXYdOP-HPsqzpLHBFo1PTseQ9ki7M,2344
|
|
4
|
+
dirshot-0.1.0.dist-info/METADATA,sha256=IxRaINwm34SoIrMoBRO0y_oMNhg0KP21ww4wkYqKA5M,4168
|
|
5
|
+
dirshot-0.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
6
|
+
dirshot-0.1.0.dist-info/top_level.txt,sha256=ROGW8gTcmwJ2jJ1Fp7TV1REZLRUGbL3L-Lfoy8tPxOA,8
|
|
7
|
+
dirshot-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
dirshot
|