dirshot 0.1.3__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dirshot/__init__.py +14 -13
- dirshot/dirshot.py +955 -989
- dirshot/reconstruct.py +110 -0
- dirshot-0.3.0.dist-info/METADATA +197 -0
- dirshot-0.3.0.dist-info/RECORD +7 -0
- {dirshot-0.1.3.dist-info → dirshot-0.3.0.dist-info}/WHEEL +1 -1
- dirshot/examples.py +0 -65
- dirshot-0.1.3.dist-info/METADATA +0 -110
- dirshot-0.1.3.dist-info/RECORD +0 -7
- {dirshot-0.1.3.dist-info → dirshot-0.3.0.dist-info}/top_level.txt +0 -0
dirshot/dirshot.py
CHANGED
|
@@ -2,1117 +2,1083 @@ import os
|
|
|
2
2
|
import sys
|
|
3
3
|
import re
|
|
4
4
|
import time
|
|
5
|
+
import threading
|
|
5
6
|
from pathlib import Path
|
|
6
7
|
from dataclasses import dataclass, field
|
|
7
|
-
from typing import List, Optional, Set, Tuple,
|
|
8
|
+
from typing import List, Optional, Set, Tuple, NamedTuple, Dict, Any
|
|
8
9
|
from enum import Enum
|
|
9
10
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
10
11
|
from io import StringIO
|
|
12
|
+
from contextlib import contextmanager
|
|
11
13
|
|
|
12
|
-
# --- TQDM Dependency Handler ---
|
|
13
|
-
try:
|
|
14
|
-
from tqdm import tqdm
|
|
15
|
-
except ImportError:
|
|
16
14
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
15
|
+
def strip_markup(text: str) -> str:
|
|
16
|
+
"""Removes rich-style markup tags from a string (e.g., [bold red]Error[/])"""
|
|
17
|
+
return re.sub(r"\[/?[^\]]+\]", "", str(text))
|
|
18
|
+
|
|
20
19
|
|
|
21
|
-
|
|
22
|
-
|
|
20
|
+
# --- Dependency & Console Management ---
|
|
21
|
+
try:
|
|
22
|
+
from rich.console import Console
|
|
23
|
+
from rich.progress import (
|
|
24
|
+
Progress,
|
|
25
|
+
SpinnerColumn,
|
|
26
|
+
BarColumn,
|
|
27
|
+
TextColumn,
|
|
28
|
+
TimeElapsedColumn,
|
|
29
|
+
)
|
|
30
|
+
from rich.table import Table
|
|
31
|
+
from rich.live import Live
|
|
32
|
+
from rich.panel import Panel
|
|
33
|
+
from rich.text import Text
|
|
34
|
+
|
|
35
|
+
RICH_AVAILABLE = True
|
|
36
|
+
except ImportError:
|
|
37
|
+
RICH_AVAILABLE = False
|
|
38
|
+
|
|
39
|
+
class FallbackProgress:
|
|
40
|
+
"""A simple, dependency-free progress handler for when 'rich' is not installed."""
|
|
41
|
+
|
|
42
|
+
def __init__(self):
|
|
43
|
+
self.tasks, self.task_count, self.active_line = {}, 0, ""
|
|
44
|
+
|
|
45
|
+
def add_task(self, description, total=None, **kwargs):
|
|
46
|
+
task_id = self.task_count
|
|
47
|
+
self.tasks[task_id] = {
|
|
48
|
+
"d": strip_markup(description),
|
|
49
|
+
"t": total,
|
|
50
|
+
"c": 0,
|
|
51
|
+
}
|
|
52
|
+
self.task_count += 1
|
|
53
|
+
return task_id
|
|
54
|
+
|
|
55
|
+
def update(
|
|
56
|
+
self, task_id, advance=0, completed=None, description=None, **kwargs
|
|
23
57
|
):
|
|
24
|
-
self.
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
self.update(1)
|
|
43
|
-
self.close()
|
|
44
|
-
|
|
45
|
-
def update(self, n=1):
|
|
46
|
-
"""Update the progress bar by n steps."""
|
|
47
|
-
self.current += n
|
|
48
|
-
now = time.time()
|
|
49
|
-
if (
|
|
50
|
-
self.total is None
|
|
51
|
-
or now - self._last_update_time > 0.1
|
|
52
|
-
or self.current == self.total
|
|
53
|
-
):
|
|
54
|
-
self._last_update_time = now
|
|
55
|
-
self._draw()
|
|
56
|
-
|
|
57
|
-
def set_description(self, desc: str):
|
|
58
|
-
"""Set the description of the progress bar."""
|
|
59
|
-
self.desc = desc
|
|
60
|
-
self._draw()
|
|
61
|
-
|
|
62
|
-
def set_postfix_str(self, s: str):
|
|
63
|
-
self._postfix["info"] = s
|
|
64
|
-
self._draw()
|
|
65
|
-
|
|
66
|
-
def _draw(self):
|
|
67
|
-
"""Draw the progress bar to the console."""
|
|
68
|
-
postfix_str = ", ".join([f"{k}={v}" for k, v in self._postfix.items()])
|
|
69
|
-
|
|
70
|
-
if self.total and self.total > 0:
|
|
71
|
-
percent = int((self.current / self.total) * 100)
|
|
72
|
-
bar_length = 25
|
|
73
|
-
filled_length = int(bar_length * self.current // self.total)
|
|
74
|
-
bar = "█" * filled_length + "-" * (bar_length - filled_length)
|
|
75
|
-
progress_line = f"\r{self.desc}: {percent}%|{bar}| {self.current}/{self.total} [{self.unit}]"
|
|
76
|
-
else: # Case where total is not known
|
|
77
|
-
progress_line = f"\r{self.desc}: {self.current} {self.unit}"
|
|
78
|
-
|
|
79
|
-
if postfix_str:
|
|
80
|
-
progress_line += f" [{postfix_str}]"
|
|
81
|
-
|
|
82
|
-
# Pad with spaces to clear previous, longer lines
|
|
83
|
-
terminal_width = 80
|
|
84
|
-
sys.stdout.write(progress_line.ljust(terminal_width))
|
|
58
|
+
if task_id not in self.tasks:
|
|
59
|
+
return
|
|
60
|
+
task = self.tasks[task_id]
|
|
61
|
+
if description:
|
|
62
|
+
task["d"] = strip_markup(description)
|
|
63
|
+
task["c"] = completed if completed is not None else task["c"] + advance
|
|
64
|
+
|
|
65
|
+
# Simple progress string
|
|
66
|
+
count_str = f"{task['c']}"
|
|
67
|
+
if task["t"]:
|
|
68
|
+
percent = (task["c"] / task["t"]) * 100
|
|
69
|
+
count_str += f"/{task['t']} ({percent:.0f}%)"
|
|
70
|
+
|
|
71
|
+
line = f"-> {task['d']}: {count_str}"
|
|
72
|
+
|
|
73
|
+
# Pad with spaces to clear previous longer lines
|
|
74
|
+
padding = max(0, len(self.active_line) - len(line))
|
|
75
|
+
sys.stdout.write("\r" + line + " " * padding)
|
|
85
76
|
sys.stdout.flush()
|
|
77
|
+
self.active_line = line
|
|
78
|
+
|
|
79
|
+
def __enter__(self):
|
|
80
|
+
return self
|
|
86
81
|
|
|
87
|
-
def
|
|
88
|
-
"""Clean up the progress bar line."""
|
|
89
|
-
# Print a newline to move off the progress bar line
|
|
82
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
90
83
|
sys.stdout.write("\n")
|
|
91
84
|
sys.stdout.flush()
|
|
92
85
|
|
|
93
86
|
|
|
87
|
+
class ConsoleManager:
|
|
88
|
+
"""A wrapper to gracefully handle console output with or without 'rich'."""
|
|
89
|
+
|
|
90
|
+
def __init__(self):
|
|
91
|
+
"""Initializes the ConsoleManager, detecting if 'rich' is available."""
|
|
92
|
+
self.console = Console() if RICH_AVAILABLE else None
|
|
93
|
+
|
|
94
|
+
def log(self, message: str, style: str = ""):
|
|
95
|
+
"""Logs a message to the console, applying a style if 'rich' is available."""
|
|
96
|
+
if self.console:
|
|
97
|
+
self.console.log(message, style=style)
|
|
98
|
+
else:
|
|
99
|
+
clean_msg = strip_markup(message)
|
|
100
|
+
print(f"[{time.strftime('%H:%M:%S')}] {clean_msg}")
|
|
101
|
+
|
|
102
|
+
def print_table(self, title: str, columns: List[str], rows: List[List[str]]):
|
|
103
|
+
"""Prints a formatted table to the console."""
|
|
104
|
+
if self.console:
|
|
105
|
+
table = Table(
|
|
106
|
+
title=title,
|
|
107
|
+
show_header=True,
|
|
108
|
+
header_style="bold magenta",
|
|
109
|
+
border_style="dim",
|
|
110
|
+
)
|
|
111
|
+
for col in columns:
|
|
112
|
+
table.add_column(col)
|
|
113
|
+
for row in rows:
|
|
114
|
+
table.add_row(*row)
|
|
115
|
+
self.console.print(table)
|
|
116
|
+
else:
|
|
117
|
+
# Fallback ASCII table
|
|
118
|
+
print(f"\n{title}")
|
|
119
|
+
|
|
120
|
+
# Clean data and calculate widths
|
|
121
|
+
clean_cols = [strip_markup(c) for c in columns]
|
|
122
|
+
clean_rows = [[strip_markup(c) for c in r] for r in rows]
|
|
123
|
+
|
|
124
|
+
col_widths = [len(c) for c in clean_cols]
|
|
125
|
+
for row in clean_rows:
|
|
126
|
+
for i, cell in enumerate(row):
|
|
127
|
+
if i < len(col_widths):
|
|
128
|
+
col_widths[i] = max(col_widths[i], len(cell))
|
|
129
|
+
|
|
130
|
+
def print_sep(char="-", cross="+"):
|
|
131
|
+
print(cross + cross.join(char * (w + 2) for w in col_widths) + cross)
|
|
132
|
+
|
|
133
|
+
print_sep()
|
|
134
|
+
# Header
|
|
135
|
+
header_str = " | ".join(
|
|
136
|
+
f" {c:<{w}} " for c, w in zip(clean_cols, col_widths)
|
|
137
|
+
)
|
|
138
|
+
print(f"| {header_str} |")
|
|
139
|
+
print_sep("=")
|
|
140
|
+
|
|
141
|
+
# Rows
|
|
142
|
+
for row in clean_rows:
|
|
143
|
+
row_str = " | ".join(f" {c:<{w}} " for c, w in zip(row, col_widths))
|
|
144
|
+
print(f"| {row_str} |")
|
|
145
|
+
|
|
146
|
+
print_sep()
|
|
147
|
+
|
|
148
|
+
|
|
94
149
|
# --- Configuration Constants ---
|
|
95
|
-
DEFAULT_SEPARATOR_CHAR = "-"
|
|
96
|
-
|
|
97
|
-
DEFAULT_ENCODING = "utf-8"
|
|
98
|
-
TREE_HEADER_TEXT = "Project File Structure"
|
|
99
|
-
FILE_HEADER_PREFIX = "FILE: "
|
|
100
|
-
TOKEN_APPROX_MODE = "CHAR_COUNT"
|
|
101
|
-
|
|
102
|
-
# List of binary file extensions to skip during content search
|
|
150
|
+
DEFAULT_SEPARATOR_CHAR, DEFAULT_ENCODING = "-", "utf-8"
|
|
151
|
+
TREE_HEADER_TEXT, FILE_HEADER_PREFIX = "Project File Structure", "FILE: "
|
|
103
152
|
BINARY_FILE_EXTENSIONS = {
|
|
104
|
-
# Images
|
|
105
153
|
".png",
|
|
106
154
|
".jpg",
|
|
107
155
|
".jpeg",
|
|
108
156
|
".gif",
|
|
109
|
-
".bmp",
|
|
110
|
-
".ico",
|
|
111
|
-
".tiff",
|
|
112
|
-
".webp",
|
|
113
|
-
# Documents
|
|
114
157
|
".pdf",
|
|
115
|
-
".doc",
|
|
116
|
-
".docx",
|
|
117
|
-
".xls",
|
|
118
|
-
".xlsx",
|
|
119
|
-
".ppt",
|
|
120
|
-
".pptx",
|
|
121
|
-
".odt",
|
|
122
|
-
".ods",
|
|
123
|
-
# Archives
|
|
124
158
|
".zip",
|
|
125
|
-
".gz",
|
|
126
|
-
".tar",
|
|
127
|
-
".rar",
|
|
128
|
-
".7z",
|
|
129
|
-
".bz2",
|
|
130
|
-
".xz",
|
|
131
|
-
# Executables & Binaries
|
|
132
159
|
".exe",
|
|
133
160
|
".dll",
|
|
134
161
|
".so",
|
|
135
|
-
".o",
|
|
136
|
-
".a",
|
|
137
|
-
".lib",
|
|
138
|
-
".bin",
|
|
139
|
-
".dat",
|
|
140
|
-
".db",
|
|
141
|
-
".sqlite",
|
|
142
|
-
".img",
|
|
143
|
-
".iso",
|
|
144
|
-
# Compiled Code
|
|
145
|
-
".class",
|
|
146
162
|
".jar",
|
|
147
|
-
".war",
|
|
148
163
|
".pyc",
|
|
149
|
-
".pyo",
|
|
150
|
-
# Audio/Video
|
|
151
164
|
".mp3",
|
|
152
|
-
".wav",
|
|
153
|
-
".flac",
|
|
154
|
-
".ogg",
|
|
155
165
|
".mp4",
|
|
156
|
-
".mkv",
|
|
157
|
-
".avi",
|
|
158
|
-
".mov",
|
|
159
|
-
".wmv",
|
|
160
|
-
# Fonts
|
|
161
|
-
".ttf",
|
|
162
|
-
".otf",
|
|
163
|
-
".woff",
|
|
164
|
-
".woff2",
|
|
165
166
|
}
|
|
166
167
|
|
|
167
168
|
|
|
168
|
-
# ---
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
"
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
169
|
+
# --- Base Lists for Presets ---
|
|
170
|
+
# These are defined outside the enums to allow for safe composition.
|
|
171
|
+
_PYTHON_BASE = [
|
|
172
|
+
".py",
|
|
173
|
+
".pyw",
|
|
174
|
+
"requirements.txt",
|
|
175
|
+
"Pipfile",
|
|
176
|
+
"pyproject.toml",
|
|
177
|
+
"setup.py",
|
|
178
|
+
]
|
|
179
|
+
_JAVASCRIPT_BASE = [
|
|
180
|
+
".js",
|
|
181
|
+
".jsx",
|
|
182
|
+
".ts",
|
|
183
|
+
".tsx",
|
|
184
|
+
".mjs",
|
|
185
|
+
".cjs",
|
|
186
|
+
"package.json",
|
|
187
|
+
"jsconfig.json",
|
|
188
|
+
"tsconfig.json",
|
|
189
|
+
]
|
|
190
|
+
_RUBY_BASE = [".rb", "Gemfile", "Rakefile", ".gemspec"]
|
|
191
|
+
_PHP_BASE = [".php", "composer.json", "index.php"]
|
|
192
|
+
_JAVA_BASE = [".java", ".jar", ".war", "pom.xml", ".properties"]
|
|
193
|
+
_KOTLIN_BASE = [".kt", ".kts", ".gradle", "build.gradle.kts"]
|
|
194
|
+
_CSHARP_BASE = [".cs", ".csproj", ".sln", "appsettings.json", "Web.config", ".csx"]
|
|
195
|
+
_C_CPP_BASE = [".c", ".cpp", ".h", ".hpp", "Makefile", "CMakeLists.txt", ".cxx", ".hxx"]
|
|
196
|
+
_RUST_BASE = [".rs", "Cargo.toml", "Cargo.lock"]
|
|
197
|
+
_SWIFT_BASE = [".swift", "Package.swift"]
|
|
198
|
+
_OBJECTIVE_C_BASE = [".m", ".mm", ".h"]
|
|
199
|
+
_ELIXIR_BASE = [".ex", ".exs", "mix.exs"]
|
|
200
|
+
_DART_BASE = [".dart", "pubspec.yaml"]
|
|
201
|
+
_SCALA_BASE = [".scala", ".sbt", "build.sbt"]
|
|
202
|
+
_R_LANG_BASE = [".r", ".R", ".Rmd"]
|
|
203
|
+
_LUA_BASE = [".lua"]
|
|
204
|
+
|
|
205
|
+
_IDE_VSCODE = [".vscode"]
|
|
206
|
+
_IDE_JETBRAINS = [".idea"]
|
|
207
|
+
_IDE_SUBLIME = ["*.sublime-project", "*.sublime-workspace"]
|
|
208
|
+
_IDE_ECLIPSE = [".project", ".settings", ".classpath"]
|
|
209
|
+
_IDE_NETBEANS = ["nbproject"]
|
|
210
|
+
_IDE_ATOM = [".atom"]
|
|
211
|
+
_IDE_VIM = ["*.swp", "*.swo"]
|
|
212
|
+
_IDE_XCODE = ["*.xcodeproj", "*.xcworkspace", "xcuserdata"]
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
# --- Enums and Data Structures ---
|
|
178
216
|
class LanguagePreset(Enum):
|
|
179
|
-
"""
|
|
180
|
-
|
|
181
|
-
PYTHON =
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
217
|
+
"""Provides an extensive list of presets for common language file extensions and key project files."""
|
|
218
|
+
|
|
219
|
+
PYTHON = _PYTHON_BASE
|
|
220
|
+
JAVASCRIPT = _JAVASCRIPT_BASE
|
|
221
|
+
JAVA = _JAVA_BASE
|
|
222
|
+
KOTLIN = _KOTLIN_BASE
|
|
223
|
+
C_CPP = _C_CPP_BASE
|
|
224
|
+
C_SHARP = _CSHARP_BASE
|
|
225
|
+
GO = [".go", "go.mod", "go.sum"]
|
|
226
|
+
RUST = _RUST_BASE
|
|
227
|
+
RUBY = _RUBY_BASE
|
|
228
|
+
PHP = _PHP_BASE
|
|
229
|
+
SWIFT = _SWIFT_BASE
|
|
230
|
+
OBJECTIVE_C = _OBJECTIVE_C_BASE
|
|
231
|
+
DART = _DART_BASE
|
|
232
|
+
LUA = _LUA_BASE
|
|
233
|
+
PERL = [".pl", ".pm", ".t"]
|
|
234
|
+
R_LANG = _R_LANG_BASE
|
|
235
|
+
SCALA = _SCALA_BASE
|
|
236
|
+
GROOVY = [".groovy", ".gvy", ".gy", ".gsh"]
|
|
237
|
+
HASKELL = [".hs", ".lhs", "cabal.project"]
|
|
238
|
+
JULIA = [".jl"]
|
|
239
|
+
ZIG = [".zig", "build.zig"]
|
|
240
|
+
NIM = [".nim", ".nimble"]
|
|
241
|
+
ELIXIR = _ELIXIR_BASE
|
|
242
|
+
CLOJURE = [".clj", ".cljs", ".cljc", "project.clj", "deps.edn"]
|
|
243
|
+
F_SHARP = [".fs", ".fsi", ".fsx"]
|
|
244
|
+
OCAML = [".ml", ".mli", "dune-project"]
|
|
245
|
+
ELM = [".elm", "elm.json"]
|
|
246
|
+
PURE_SCRIPT = [".purs", "spago.dhall"]
|
|
247
|
+
COMMON_LISP = [".lisp", ".cl", ".asd"]
|
|
248
|
+
SCHEME = [".scm", ".ss"]
|
|
249
|
+
RACKET = [".rkt"]
|
|
250
|
+
WEB_FRONTEND = [".html", ".htm", ".css", ".scss", ".sass", ".less", ".styl"]
|
|
251
|
+
REACT = _JAVASCRIPT_BASE
|
|
252
|
+
NODE_JS = _JAVASCRIPT_BASE
|
|
253
|
+
EXPRESS_JS = _JAVASCRIPT_BASE
|
|
254
|
+
NEST_JS = _JAVASCRIPT_BASE + ["nest-cli.json"]
|
|
255
|
+
VUE = _JAVASCRIPT_BASE + [".vue", "vue.config.js"]
|
|
256
|
+
ANGULAR = _JAVASCRIPT_BASE + ["angular.json"]
|
|
257
|
+
SVELTE = _JAVASCRIPT_BASE + [".svelte", "svelte.config.js"]
|
|
258
|
+
EMBER = _JAVASCRIPT_BASE + ["ember-cli-build.js"]
|
|
259
|
+
PUG = [".pug", ".jade"]
|
|
260
|
+
HANDLEBARS = [".hbs", ".handlebars"]
|
|
261
|
+
EJS = [".ejs"]
|
|
262
|
+
DJANGO = _PYTHON_BASE + ["manage.py", "wsgi.py", "asgi.py", ".jinja", ".jinja2"]
|
|
263
|
+
FLASK = _PYTHON_BASE + ["app.py", "wsgi.py"]
|
|
264
|
+
RAILS = _RUBY_BASE + ["routes.rb", ".erb", ".haml", ".slim", "config.ru"]
|
|
265
|
+
LARAVEL = _PHP_BASE + [".blade.php", "artisan"]
|
|
266
|
+
SYMFONY = _PHP_BASE + ["symfony.lock"]
|
|
267
|
+
PHOENIX = _ELIXIR_BASE
|
|
268
|
+
SPRING = _JAVA_BASE + ["application.properties", "application.yml"]
|
|
269
|
+
ASP_NET = _CSHARP_BASE + ["*.cshtml", "*.vbhtml", "*.razor"]
|
|
270
|
+
ROCKET_RS = _RUST_BASE + ["Rocket.toml"]
|
|
271
|
+
ACTIX_WEB = _RUST_BASE
|
|
272
|
+
IOS_NATIVE = (
|
|
273
|
+
_SWIFT_BASE
|
|
274
|
+
+ _OBJECTIVE_C_BASE
|
|
275
|
+
+ [".storyboard", ".xib", "Info.plist", ".pbxproj"]
|
|
276
|
+
)
|
|
277
|
+
ANDROID_NATIVE = _JAVA_BASE + _KOTLIN_BASE + ["AndroidManifest.xml", ".xml"]
|
|
278
|
+
FLUTTER = _DART_BASE
|
|
279
|
+
REACT_NATIVE = _JAVASCRIPT_BASE + ["app.json"]
|
|
280
|
+
XAMARIN = _CSHARP_BASE + [".xaml"]
|
|
281
|
+
DOTNET_MAUI = XAMARIN
|
|
282
|
+
NATIVESCRIPT = _JAVASCRIPT_BASE + ["nativescript.config.ts"]
|
|
283
|
+
UNITY = _CSHARP_BASE + [".unity", ".prefab", ".asset", ".mat", ".unitypackage"]
|
|
284
|
+
UNREAL_ENGINE = _C_CPP_BASE + [".uproject", ".uasset", ".ini"]
|
|
285
|
+
GODOT = [".gd", ".tscn", ".tres", "project.godot"]
|
|
286
|
+
LOVE2D = _LUA_BASE + ["conf.lua", "main.lua"]
|
|
287
|
+
MONOGAME = _CSHARP_BASE + [".mgcb"]
|
|
288
|
+
DOCKER = ["Dockerfile", ".dockerignore", "docker-compose.yml"]
|
|
289
|
+
TERRAFORM = [".tf", ".tfvars", ".tf.json"]
|
|
290
|
+
ANSIBLE = ["ansible.cfg", "inventory.ini"]
|
|
291
|
+
PULUMI = ["Pulumi.yaml"]
|
|
292
|
+
CHEF = _RUBY_BASE
|
|
293
|
+
PUPPET = [".pp"]
|
|
294
|
+
VAGRANT = ["Vagrantfile"]
|
|
295
|
+
GITHUB_ACTIONS = [".yml", ".yaml"]
|
|
296
|
+
GITLAB_CI = [".gitlab-ci.yml"]
|
|
297
|
+
JENKINS = ["Jenkinsfile"]
|
|
298
|
+
CIRCLE_CI = ["config.yml"]
|
|
299
|
+
KUBERNETES = [".yml", ".yaml"]
|
|
300
|
+
BICEP = [".bicep"]
|
|
301
|
+
CLOUDFORMATION = [".json", ".yml"]
|
|
302
|
+
DATA_SCIENCE_NOTEBOOKS = [".ipynb", ".Rmd"]
|
|
303
|
+
SQL = [".sql", ".ddl", ".dml"]
|
|
304
|
+
APACHE_SPARK = list(set(_SCALA_BASE + _PYTHON_BASE + _JAVA_BASE + _R_LANG_BASE))
|
|
305
|
+
ML_CONFIG = ["params.yaml"]
|
|
306
|
+
ELECTRON = _JAVASCRIPT_BASE
|
|
307
|
+
TAURI = _RUST_BASE + ["tauri.conf.json"]
|
|
308
|
+
QT = _C_CPP_BASE + [".pro", ".ui", ".qml"]
|
|
309
|
+
GTK = _C_CPP_BASE + [".ui", "meson.build"]
|
|
310
|
+
WPF = _CSHARP_BASE + [".xaml"]
|
|
311
|
+
WINDOWS_FORMS = _CSHARP_BASE
|
|
312
|
+
BASH = [".sh", ".bash"]
|
|
313
|
+
POWERSHELL = [".ps1", ".psm1"]
|
|
314
|
+
BATCH = [".bat", ".cmd"]
|
|
315
|
+
SOLIDITY = [".sol"]
|
|
316
|
+
VYPER = [".vy"]
|
|
317
|
+
VERILOG = [".v", ".vh"]
|
|
318
|
+
VHDL = [".vhd", ".vhdl"]
|
|
319
|
+
MARKUP = [".md", ".markdown", ".rst", ".adoc", ".asciidoc", ".tex", ".bib"]
|
|
320
|
+
CONFIGURATION = [
|
|
321
|
+
".json",
|
|
322
|
+
".xml",
|
|
323
|
+
".yml",
|
|
324
|
+
".yaml",
|
|
325
|
+
".ini",
|
|
326
|
+
".toml",
|
|
327
|
+
".env",
|
|
328
|
+
".conf",
|
|
329
|
+
".cfg",
|
|
188
330
|
]
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
331
|
+
EDITOR_CONFIG = [".editorconfig"]
|
|
332
|
+
LICENSE = ["LICENSE", "LICENSE.md", "COPYING"]
|
|
333
|
+
CHANGELOG = ["CHANGELOG", "CHANGELOG.md"]
|
|
192
334
|
|
|
193
335
|
|
|
194
336
|
class IgnorePreset(Enum):
|
|
195
|
-
"""
|
|
196
|
-
|
|
197
|
-
VERSION_CONTROL = [".git", ".svn", ".hg", ".
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
337
|
+
"""Provides an extensive list of presets for common directories, files, and patterns to ignore."""
|
|
338
|
+
|
|
339
|
+
VERSION_CONTROL = [".git", ".svn", ".hg", ".bzr", ".gitignore", ".gitattributes"]
|
|
340
|
+
OS_FILES = [".DS_Store", "Thumbs.db", "desktop.ini", "ehthumbs.db"]
|
|
341
|
+
BUILD_ARTIFACTS = [
|
|
342
|
+
"dist",
|
|
343
|
+
"build",
|
|
344
|
+
"target",
|
|
345
|
+
"out",
|
|
346
|
+
"bin",
|
|
347
|
+
"obj",
|
|
348
|
+
"release",
|
|
349
|
+
"debug",
|
|
350
|
+
]
|
|
351
|
+
LOGS = ["*.log", "logs", "npm-debug.log*", "yarn-debug.log*", "yarn-error.log*"]
|
|
352
|
+
TEMP_FILES = ["temp", "tmp", "*.tmp", "*~", "*.bak", "*.swp", "*.swo"]
|
|
353
|
+
SECRET_FILES = [
|
|
354
|
+
".env",
|
|
355
|
+
"*.pem",
|
|
356
|
+
"*.key",
|
|
357
|
+
"credentials.json",
|
|
358
|
+
"*.p12",
|
|
359
|
+
"*.pfx",
|
|
360
|
+
"secrets.yml",
|
|
361
|
+
".env.local",
|
|
362
|
+
]
|
|
363
|
+
COMPRESSED_ARCHIVES = ["*.zip", "*.tar", "*.gz", "*.rar", "*.7z", "*.tgz"]
|
|
364
|
+
IDE_METADATA_VSCODE = _IDE_VSCODE
|
|
365
|
+
IDE_METADATA_JETBRAINS = _IDE_JETBRAINS
|
|
366
|
+
IDE_METADATA_SUBLIME = _IDE_SUBLIME
|
|
367
|
+
IDE_METADATA_ECLIPSE = _IDE_ECLIPSE
|
|
368
|
+
IDE_METADATA_NETBEANS = _IDE_NETBEANS
|
|
369
|
+
IDE_METADATA_ATOM = _IDE_ATOM
|
|
370
|
+
IDE_METADATA_VIM = _IDE_VIM
|
|
371
|
+
IDE_METADATA_XCODE = _IDE_XCODE
|
|
372
|
+
IDE_METADATA = list(
|
|
373
|
+
set(
|
|
374
|
+
_IDE_VSCODE
|
|
375
|
+
+ _IDE_JETBRAINS
|
|
376
|
+
+ _IDE_SUBLIME
|
|
377
|
+
+ _IDE_ECLIPSE
|
|
378
|
+
+ _IDE_NETBEANS
|
|
379
|
+
+ _IDE_ATOM
|
|
380
|
+
+ _IDE_VIM
|
|
381
|
+
+ _IDE_XCODE
|
|
382
|
+
)
|
|
383
|
+
)
|
|
384
|
+
NODE_JS = [
|
|
385
|
+
"node_modules",
|
|
386
|
+
"package-lock.json",
|
|
387
|
+
"yarn.lock",
|
|
388
|
+
"pnpm-lock.yaml",
|
|
389
|
+
".npm",
|
|
390
|
+
]
|
|
391
|
+
PYTHON = [
|
|
392
|
+
"__pycache__",
|
|
393
|
+
"venv",
|
|
394
|
+
".venv",
|
|
395
|
+
"env",
|
|
396
|
+
"lib",
|
|
397
|
+
"lib64",
|
|
398
|
+
".pytest_cache",
|
|
399
|
+
".tox",
|
|
400
|
+
"*.pyc",
|
|
401
|
+
".mypy_cache",
|
|
402
|
+
"htmlcov",
|
|
403
|
+
".coverage",
|
|
404
|
+
]
|
|
405
|
+
RUBY = ["vendor/bundle", ".bundle", "Gemfile.lock", ".gem", "coverage"]
|
|
406
|
+
PHP = ["vendor", "composer.lock"]
|
|
407
|
+
DOTNET = ["bin", "obj", "*.user", "*.suo"]
|
|
408
|
+
RUST = ["target", "Cargo.lock"]
|
|
409
|
+
GO = ["vendor", "go.sum"]
|
|
410
|
+
JAVA_MAVEN = ["target"]
|
|
411
|
+
JAVA_GRADLE = [".gradle", "build"]
|
|
412
|
+
ELIXIR = ["_build", "deps", "mix.lock"]
|
|
413
|
+
DART_FLUTTER = [".dart_tool", ".packages", "build", ".flutter-plugins"]
|
|
414
|
+
ELM = ["elm-stuff"]
|
|
415
|
+
HASKELL = ["dist-newstyle", ".stack-work"]
|
|
416
|
+
TESTING_REPORTS = ["coverage", "junit.xml", "lcov.info", ".nyc_output"]
|
|
417
|
+
STATIC_SITE_GENERATORS = ["_site", "public", "resources"]
|
|
418
|
+
CMS_UPLOADS = ["wp-content/uploads"]
|
|
419
|
+
TERRAFORM = [".terraform", "*.tfstate", "*.tfstate.backup", ".terraform.lock.hcl"]
|
|
420
|
+
JUPYTER_NOTEBOOKS = [".ipynb_checkpoints"]
|
|
421
|
+
ANDROID = [".gradle", "build", "local.properties", "*.apk", "*.aab", "captures"]
|
|
422
|
+
IOS = ["Pods", "Carthage", "DerivedData", "build"]
|
|
423
|
+
UNITY = [
|
|
424
|
+
"Library",
|
|
425
|
+
"Temp",
|
|
426
|
+
"Logs",
|
|
427
|
+
"UserSettings",
|
|
428
|
+
"MemoryCaptures",
|
|
429
|
+
"Assets/AssetStoreTools",
|
|
430
|
+
]
|
|
431
|
+
UNREAL_ENGINE = ["Intermediate", "Saved", "DerivedDataCache", ".vs"]
|
|
432
|
+
GODOT_ENGINE = [".import", "export_presets.cfg"]
|
|
433
|
+
SERVERLESS_FRAMEWORK = [".serverless"]
|
|
434
|
+
AWS = [".aws-sam"]
|
|
435
|
+
VERCEL = [".vercel"]
|
|
436
|
+
NETLIFY = [".netlify"]
|
|
437
|
+
MACOS = [
|
|
438
|
+
".DS_Store",
|
|
439
|
+
".AppleDouble",
|
|
440
|
+
".LSOverride",
|
|
441
|
+
"._*",
|
|
442
|
+
".Spotlight-V100",
|
|
443
|
+
".Trashes",
|
|
444
|
+
]
|
|
445
|
+
WINDOWS = ["Thumbs.db", "ehthumbs.db", "$RECYCLE.BIN/", "Desktop.ini"]
|
|
446
|
+
DEPRECATED_DEPENDENCIES = ["bower_components"]
|
|
217
447
|
|
|
218
|
-
t_connector: str
|
|
219
|
-
l_connector: str
|
|
220
|
-
v_connector: str
|
|
221
|
-
h_spacer: str
|
|
222
448
|
|
|
449
|
+
class FileToProcess(NamedTuple):
|
|
450
|
+
"""Represents a file that needs to be processed and included in the output."""
|
|
223
451
|
|
|
224
|
-
|
|
452
|
+
absolute_path: Path
|
|
453
|
+
relative_path_posix: str
|
|
225
454
|
|
|
226
455
|
|
|
227
456
|
@dataclass
|
|
228
457
|
class FilterCriteria:
|
|
229
|
-
"""Holds
|
|
458
|
+
"""Holds the combined filter criteria for scanning files and directories."""
|
|
230
459
|
|
|
231
460
|
file_extensions: Set[str] = field(default_factory=set)
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
461
|
+
ignore_if_in_path: Set[str] = field(default_factory=set)
|
|
462
|
+
ignore_extensions: Set[str] = field(default_factory=set)
|
|
463
|
+
specific_files: Set[str] = field(default_factory=set)
|
|
464
|
+
case_sensitive: bool = False
|
|
236
465
|
|
|
237
466
|
@classmethod
|
|
238
467
|
def normalize_inputs(
|
|
239
468
|
cls,
|
|
240
|
-
file_types: Optional[List[str]],
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
language_presets: Optional[List[LanguagePreset]] = None,
|
|
469
|
+
file_types: Optional[List[str]] = None,
|
|
470
|
+
ignore_if_in_path: Optional[List[str]] = None,
|
|
471
|
+
ignore_extensions: Optional[List[str]] = None,
|
|
472
|
+
lang_presets: Optional[List[LanguagePreset]] = None,
|
|
245
473
|
ignore_presets: Optional[List[IgnorePreset]] = None,
|
|
474
|
+
files: Optional[List[str]] = None,
|
|
475
|
+
case_sensitive: bool = False,
|
|
246
476
|
) -> "FilterCriteria":
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
477
|
+
"""
|
|
478
|
+
Consolidates various filter inputs into a single FilterCriteria object.
|
|
479
|
+
|
|
480
|
+
Args:
|
|
481
|
+
file_types (list, optional): A list of file extensions to include.
|
|
482
|
+
ignore_if_in_path (list, optional): A list of directory/file substring names to ignore.
|
|
483
|
+
ignore_extensions (list, optional): A list of file extensions to ignore.
|
|
484
|
+
lang_presets (list, optional): A list of LanguagePreset enums.
|
|
485
|
+
ignore_presets (list, optional): A list of IgnorePreset enums.
|
|
486
|
+
files (list, optional): A list of specific filenames to include.
|
|
487
|
+
case_sensitive (bool): If True, filters are case sensitive.
|
|
488
|
+
|
|
489
|
+
Returns:
|
|
490
|
+
FilterCriteria: An object containing the combined sets of filters.
|
|
491
|
+
"""
|
|
492
|
+
|
|
493
|
+
def clean(s):
|
|
494
|
+
s = s.strip()
|
|
495
|
+
return s if case_sensitive else s.lower()
|
|
496
|
+
|
|
497
|
+
all_exts = {clean(ft) for ft in file_types or []}
|
|
498
|
+
all_ignore_paths = {clean(ip) for ip in ignore_if_in_path or []}
|
|
499
|
+
all_ignore_exts = {clean(ie) for ie in ignore_extensions or []}
|
|
500
|
+
all_specific_files = {clean(f) for f in files or []}
|
|
501
|
+
|
|
502
|
+
for p in lang_presets or []:
|
|
503
|
+
for item in p.value:
|
|
504
|
+
all_exts.add(clean(item))
|
|
505
|
+
|
|
506
|
+
for p in ignore_presets or []:
|
|
507
|
+
for item in p.value:
|
|
508
|
+
all_ignore_paths.add(clean(item))
|
|
509
|
+
|
|
266
510
|
return cls(
|
|
267
|
-
file_extensions=
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
else set()
|
|
273
|
-
),
|
|
274
|
-
ignore_fname_substrings=set(
|
|
275
|
-
s.lower() for s in all_ignore_fnames if s.strip()
|
|
276
|
-
),
|
|
277
|
-
ignore_path_components=set(
|
|
278
|
-
d.lower() for d in all_ignore_paths if d.strip()
|
|
279
|
-
),
|
|
511
|
+
file_extensions=all_exts,
|
|
512
|
+
ignore_if_in_path=all_ignore_paths,
|
|
513
|
+
ignore_extensions=all_ignore_exts,
|
|
514
|
+
specific_files=all_specific_files,
|
|
515
|
+
case_sensitive=case_sensitive,
|
|
280
516
|
)
|
|
281
517
|
|
|
282
518
|
|
|
283
|
-
|
|
284
|
-
|
|
519
|
+
# --- Core Logic Functions ---
|
|
520
|
+
def _discover_files(
|
|
521
|
+
root_dir: Path, criteria: FilterCriteria, progress: Any, task_id: Any
|
|
522
|
+
) -> List[Path]:
|
|
523
|
+
"""
|
|
524
|
+
Recursively scans a directory to find all files matching the criteria.
|
|
285
525
|
|
|
286
|
-
|
|
287
|
-
|
|
526
|
+
Args:
|
|
527
|
+
root_dir (Path): The directory to start the scan from.
|
|
528
|
+
criteria (FilterCriteria): The filtering criteria to apply.
|
|
529
|
+
progress (Any): The progress bar object (from rich or fallback).
|
|
530
|
+
task_id (Any): The ID of the progress bar task to update.
|
|
288
531
|
|
|
532
|
+
Returns:
|
|
533
|
+
List[Path]: A list of absolute paths to the candidate files.
|
|
534
|
+
"""
|
|
535
|
+
candidate_files, dirs_scanned = [], 0
|
|
289
536
|
|
|
290
|
-
|
|
537
|
+
def recursive_scan(current_path: Path):
|
|
538
|
+
nonlocal dirs_scanned
|
|
539
|
+
try:
|
|
540
|
+
for entry in os.scandir(current_path):
|
|
541
|
+
# Path relative to the project root, used for substring check in path
|
|
542
|
+
# We use string representation for the check
|
|
543
|
+
rel_path = Path(entry.path).relative_to(root_dir)
|
|
544
|
+
rel_path_str = str(rel_path)
|
|
545
|
+
entry_name = entry.name
|
|
546
|
+
|
|
547
|
+
# Normalize for case check
|
|
548
|
+
if not criteria.case_sensitive:
|
|
549
|
+
rel_path_str = rel_path_str.lower()
|
|
550
|
+
entry_name = entry_name.lower()
|
|
551
|
+
|
|
552
|
+
# Ignore Logic: Substring matching in the path
|
|
553
|
+
# If any ignore string is a substring of the relative path, skip it.
|
|
554
|
+
if any(
|
|
555
|
+
ignored in rel_path_str for ignored in criteria.ignore_if_in_path
|
|
556
|
+
):
|
|
557
|
+
continue
|
|
291
558
|
|
|
559
|
+
if entry.is_dir():
|
|
560
|
+
recursive_scan(Path(entry.path))
|
|
561
|
+
dirs_scanned += 1
|
|
562
|
+
if progress:
|
|
563
|
+
progress.update(
|
|
564
|
+
task_id,
|
|
565
|
+
completed=dirs_scanned,
|
|
566
|
+
description=f"Discovering files in [cyan]{entry.name}[/cyan]",
|
|
567
|
+
)
|
|
568
|
+
elif entry.is_file():
|
|
569
|
+
# Specific File Inclusion
|
|
570
|
+
if (
|
|
571
|
+
criteria.specific_files
|
|
572
|
+
and entry_name not in criteria.specific_files
|
|
573
|
+
):
|
|
574
|
+
continue
|
|
292
575
|
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
try:
|
|
298
|
-
resolved_path = Path(root_dir_param or Path.cwd()).resolve(strict=True)
|
|
299
|
-
except Exception as e:
|
|
300
|
-
print(
|
|
301
|
-
f"Error: Could not resolve root directory '{original_param_for_messaging}': {e}"
|
|
302
|
-
)
|
|
303
|
-
return None
|
|
304
|
-
if not resolved_path.is_dir():
|
|
305
|
-
print(f"Error: Root path '{resolved_path}' is not a directory.")
|
|
306
|
-
return None
|
|
307
|
-
return resolved_path
|
|
576
|
+
# Extension filtering
|
|
577
|
+
file_ext = Path(entry.path).suffix
|
|
578
|
+
if not criteria.case_sensitive:
|
|
579
|
+
file_ext = file_ext.lower()
|
|
308
580
|
|
|
581
|
+
if (
|
|
582
|
+
criteria.ignore_extensions
|
|
583
|
+
and file_ext in criteria.ignore_extensions
|
|
584
|
+
):
|
|
585
|
+
continue
|
|
586
|
+
|
|
587
|
+
# Inclusion Logic
|
|
588
|
+
# Include if no inclusion filters are set OR ext is allowed OR file is specifically allowed
|
|
589
|
+
if (
|
|
590
|
+
not criteria.file_extensions
|
|
591
|
+
or file_ext in criteria.file_extensions
|
|
592
|
+
or (
|
|
593
|
+
criteria.specific_files
|
|
594
|
+
and entry_name in criteria.specific_files
|
|
595
|
+
)
|
|
596
|
+
):
|
|
597
|
+
candidate_files.append(Path(entry.path))
|
|
309
598
|
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
log_func: Optional[Callable[[str], None]] = None,
|
|
316
|
-
) -> bool:
|
|
317
|
-
try:
|
|
318
|
-
relative_path = entry_path.relative_to(root_dir)
|
|
319
|
-
except ValueError:
|
|
320
|
-
return False
|
|
321
|
-
entry_name_lower = entry_path.name.lower()
|
|
322
|
-
if criteria.ignore_path_components and any(
|
|
323
|
-
part.lower() in criteria.ignore_path_components for part in relative_path.parts
|
|
324
|
-
):
|
|
325
|
-
return False
|
|
326
|
-
if is_dir:
|
|
327
|
-
return True
|
|
328
|
-
file_ext_lower = entry_path.suffix.lower()
|
|
329
|
-
matched_type = (file_ext_lower in criteria.file_extensions) or (
|
|
330
|
-
entry_name_lower in criteria.exact_filenames
|
|
331
|
-
)
|
|
332
|
-
if not criteria.file_extensions and not criteria.exact_filenames:
|
|
333
|
-
matched_type = True
|
|
334
|
-
if not matched_type:
|
|
335
|
-
return False
|
|
336
|
-
if criteria.whitelist_fname_substrings and not any(
|
|
337
|
-
sub in entry_name_lower for sub in criteria.whitelist_fname_substrings
|
|
338
|
-
):
|
|
339
|
-
return False
|
|
340
|
-
if criteria.ignore_fname_substrings and any(
|
|
341
|
-
sub in entry_name_lower for sub in criteria.ignore_fname_substrings
|
|
342
|
-
):
|
|
343
|
-
return False
|
|
344
|
-
return True
|
|
599
|
+
except (PermissionError, FileNotFoundError):
|
|
600
|
+
pass
|
|
601
|
+
|
|
602
|
+
recursive_scan(root_dir)
|
|
603
|
+
return candidate_files
|
|
345
604
|
|
|
346
605
|
|
|
347
606
|
def process_file_for_search(
|
|
348
607
|
file_path: Path,
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
608
|
+
keywords: List[str],
|
|
609
|
+
search_content: bool,
|
|
610
|
+
full_path: bool,
|
|
611
|
+
activity: Dict,
|
|
612
|
+
read_binary_files: bool,
|
|
613
|
+
case_sensitive: bool,
|
|
352
614
|
) -> Optional[Path]:
|
|
353
615
|
"""
|
|
354
|
-
|
|
355
|
-
"""
|
|
356
|
-
compare_target = str(file_path) if full_path_compare else file_path.name
|
|
357
|
-
if any(key in compare_target.lower() for key in normalized_keywords):
|
|
358
|
-
return file_path
|
|
616
|
+
Processes a single file to see if it matches the search criteria.
|
|
359
617
|
|
|
360
|
-
if
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
618
|
+
A match can occur if a keyword is found in the filename or, if enabled,
|
|
619
|
+
within the file's content.
|
|
620
|
+
"""
|
|
621
|
+
thread_id = threading.get_ident()
|
|
622
|
+
activity[thread_id] = file_path.name
|
|
623
|
+
try:
|
|
624
|
+
compare_target = str(file_path) if full_path else file_path.name
|
|
625
|
+
|
|
626
|
+
if not case_sensitive:
|
|
627
|
+
compare_target = compare_target.lower()
|
|
628
|
+
# Keywords should already be normalized by the caller if not case_sensitive
|
|
629
|
+
|
|
630
|
+
if any(key in compare_target for key in keywords):
|
|
631
|
+
return file_path
|
|
364
632
|
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
if any(key in line.lower() for key in normalized_keywords):
|
|
369
|
-
return file_path
|
|
370
|
-
except (IOError, OSError):
|
|
371
|
-
pass # Ignore files that can't be opened
|
|
372
|
-
return None
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
def _calculate_total_stats(
|
|
376
|
-
root_dir: Path, criteria: FilterCriteria
|
|
377
|
-
) -> Dict[Path, Tuple[int, int]]:
|
|
378
|
-
stats: Dict[Path, Tuple[int, int]] = {}
|
|
379
|
-
for dirpath_str, dirnames, filenames in os.walk(str(root_dir), topdown=True):
|
|
380
|
-
current_dir = Path(dirpath_str)
|
|
381
|
-
all_children = [current_dir / d for d in dirnames] + [
|
|
382
|
-
current_dir / f for f in filenames
|
|
383
|
-
]
|
|
384
|
-
total_files, total_dirs = 0, 0
|
|
385
|
-
for child_path in all_children:
|
|
633
|
+
if search_content and (
|
|
634
|
+
read_binary_files or file_path.suffix.lower() not in BINARY_FILE_EXTENSIONS
|
|
635
|
+
):
|
|
386
636
|
try:
|
|
387
|
-
|
|
637
|
+
with file_path.open("r", encoding="utf-8", errors="ignore") as f:
|
|
638
|
+
for line in f:
|
|
639
|
+
if not case_sensitive:
|
|
640
|
+
line = line.lower()
|
|
641
|
+
if any(key in line for key in keywords):
|
|
642
|
+
return file_path
|
|
388
643
|
except OSError:
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
except ValueError:
|
|
394
|
-
continue
|
|
395
|
-
if any(
|
|
396
|
-
part.lower() in criteria.ignore_path_components
|
|
397
|
-
for part in relative_path.parts
|
|
398
|
-
):
|
|
399
|
-
continue
|
|
400
|
-
if is_dir:
|
|
401
|
-
total_dirs += 1
|
|
402
|
-
else:
|
|
403
|
-
total_files += 1
|
|
404
|
-
stats[current_dir] = (total_files, total_dirs)
|
|
405
|
-
dirnames[:] = [
|
|
406
|
-
d
|
|
407
|
-
for d in dirnames
|
|
408
|
-
if (current_dir / d).name.lower() not in criteria.ignore_path_components
|
|
409
|
-
]
|
|
410
|
-
return stats
|
|
644
|
+
pass
|
|
645
|
+
return None
|
|
646
|
+
finally:
|
|
647
|
+
activity[thread_id] = ""
|
|
411
648
|
|
|
412
649
|
|
|
413
|
-
|
|
650
|
+
def _process_files_concurrently(
|
|
651
|
+
files: List[Path],
|
|
652
|
+
keywords: List[str],
|
|
653
|
+
search_content: bool,
|
|
654
|
+
full_path: bool,
|
|
655
|
+
max_workers: Optional[int],
|
|
656
|
+
progress: Any,
|
|
657
|
+
task_id: Any,
|
|
658
|
+
read_binary_files: bool,
|
|
659
|
+
case_sensitive: bool,
|
|
660
|
+
) -> Set[Path]:
|
|
661
|
+
"""
|
|
662
|
+
Uses a thread pool to process a list of files for search matches concurrently.
|
|
663
|
+
"""
|
|
664
|
+
matched_files, thread_activity = set(), {}
|
|
665
|
+
|
|
666
|
+
# Normalize keywords once if case insensitive
|
|
667
|
+
if not case_sensitive:
|
|
668
|
+
keywords = [k.lower() for k in keywords]
|
|
669
|
+
|
|
670
|
+
with ThreadPoolExecutor(
|
|
671
|
+
max_workers=max_workers or (os.cpu_count() or 1) + 4,
|
|
672
|
+
thread_name_prefix="scanner",
|
|
673
|
+
) as executor:
|
|
674
|
+
future_to_file = {
|
|
675
|
+
executor.submit(
|
|
676
|
+
process_file_for_search,
|
|
677
|
+
f,
|
|
678
|
+
keywords,
|
|
679
|
+
search_content,
|
|
680
|
+
full_path,
|
|
681
|
+
thread_activity,
|
|
682
|
+
read_binary_files,
|
|
683
|
+
case_sensitive,
|
|
684
|
+
): f
|
|
685
|
+
for f in files
|
|
686
|
+
}
|
|
687
|
+
for future in as_completed(future_to_file):
|
|
688
|
+
if progress:
|
|
689
|
+
active_threads = {
|
|
690
|
+
f"T{str(tid)[-3:]}": name
|
|
691
|
+
for tid, name in thread_activity.items()
|
|
692
|
+
if name
|
|
693
|
+
}
|
|
694
|
+
progress.update(
|
|
695
|
+
task_id,
|
|
696
|
+
advance=1,
|
|
697
|
+
description=f"Processing [yellow]{len(active_threads)} threads[/yellow]",
|
|
698
|
+
)
|
|
699
|
+
if RICH_AVAILABLE:
|
|
700
|
+
status_panel = Panel(
|
|
701
|
+
Text(
|
|
702
|
+
"\n".join(
|
|
703
|
+
f"[bold cyan]{k}[/]: {v}"
|
|
704
|
+
for k, v in active_threads.items()
|
|
705
|
+
)
|
|
706
|
+
),
|
|
707
|
+
border_style="dim",
|
|
708
|
+
title="[dim]Thread Activity",
|
|
709
|
+
)
|
|
710
|
+
progress.update(task_id, status=status_panel)
|
|
711
|
+
if result := future.result():
|
|
712
|
+
matched_files.add(result)
|
|
713
|
+
if progress and RICH_AVAILABLE:
|
|
714
|
+
progress.update(task_id, status="[bold green]Done![/bold green]")
|
|
715
|
+
return matched_files
|
|
414
716
|
|
|
415
717
|
|
|
416
|
-
def
|
|
417
|
-
root_dir: Path,
|
|
718
|
+
def _generate_tree_with_stats(
|
|
719
|
+
root_dir: Path, file_paths: List[Path], show_stats: bool
|
|
418
720
|
) -> List[str]:
|
|
419
|
-
"""Generates a
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
721
|
+
"""Generates a directory tree structure from a list of file paths."""
|
|
722
|
+
tree_dict: Dict[str, Any] = {}
|
|
723
|
+
for path in file_paths:
|
|
724
|
+
level = tree_dict
|
|
725
|
+
for part in path.relative_to(root_dir).parts:
|
|
726
|
+
level = level.setdefault(part, {})
|
|
424
727
|
|
|
425
|
-
def
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
return path_name
|
|
430
|
-
total_files, total_dirs = dir_stats.get(path, (0, 0))
|
|
728
|
+
def count_children(d: Dict) -> Tuple[int, int]:
|
|
729
|
+
files = sum(1 for v in d.values() if not v)
|
|
730
|
+
dirs = len(d) - files
|
|
731
|
+
return files, dirs
|
|
431
732
|
|
|
432
|
-
|
|
433
|
-
|
|
733
|
+
lines = []
|
|
734
|
+
style = ("├── ", "└── ", "│ ", " ")
|
|
434
735
|
|
|
435
|
-
def
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
error_prefix + f"[Error accessing: {current_path.name} - {e.strerror}]"
|
|
442
|
-
)
|
|
443
|
-
return
|
|
444
|
-
displayable_children: List[Tuple[Path, bool]] = []
|
|
445
|
-
for e in entries:
|
|
446
|
-
try:
|
|
447
|
-
is_dir = e.is_dir()
|
|
448
|
-
except OSError:
|
|
449
|
-
continue
|
|
450
|
-
if _should_include_entry(
|
|
451
|
-
e, root_dir, criteria, is_dir=is_dir, log_func=None
|
|
452
|
-
):
|
|
453
|
-
displayable_children.append((e, is_dir))
|
|
454
|
-
num_children = len(displayable_children)
|
|
455
|
-
included_files_in_level = sum(
|
|
456
|
-
1 for _, is_dir in displayable_children if not is_dir
|
|
457
|
-
)
|
|
458
|
-
included_dirs_in_level = sum(1 for _, is_dir in displayable_children if is_dir)
|
|
459
|
-
if not prefix_parts:
|
|
460
|
-
tree_lines.append(
|
|
461
|
-
format_dir_name(
|
|
462
|
-
current_path,
|
|
463
|
-
current_path.name,
|
|
464
|
-
included_files_in_level,
|
|
465
|
-
included_dirs_in_level,
|
|
466
|
-
)
|
|
467
|
-
)
|
|
468
|
-
for i, (child_path, child_is_dir) in enumerate(displayable_children):
|
|
469
|
-
is_last = i == num_children - 1
|
|
470
|
-
connector = style.l_connector if is_last else style.t_connector
|
|
471
|
-
entry_name = child_path.name
|
|
472
|
-
if child_is_dir:
|
|
473
|
-
try:
|
|
474
|
-
child_entries = sorted(
|
|
475
|
-
child_path.iterdir(), key=lambda p: p.name.lower()
|
|
476
|
-
)
|
|
477
|
-
child_displayable_children = [
|
|
478
|
-
(e, e.is_dir())
|
|
479
|
-
for e in child_entries
|
|
480
|
-
if _should_include_entry(
|
|
481
|
-
e, root_dir, criteria, is_dir=e.is_dir(), log_func=None
|
|
482
|
-
)
|
|
483
|
-
]
|
|
484
|
-
child_included_files = sum(
|
|
485
|
-
1 for _, is_dir in child_displayable_children if not is_dir
|
|
486
|
-
)
|
|
487
|
-
child_included_dirs = sum(
|
|
488
|
-
1 for _, is_dir in child_displayable_children if is_dir
|
|
489
|
-
)
|
|
490
|
-
entry_name = format_dir_name(
|
|
491
|
-
child_path,
|
|
492
|
-
child_path.name,
|
|
493
|
-
child_included_files,
|
|
494
|
-
child_included_dirs,
|
|
495
|
-
)
|
|
496
|
-
except OSError:
|
|
497
|
-
pass
|
|
498
|
-
tree_lines.append("".join(prefix_parts) + connector + entry_name)
|
|
499
|
-
if child_is_dir:
|
|
500
|
-
new_prefix_parts = prefix_parts + [
|
|
501
|
-
style.h_spacer if is_last else style.v_connector
|
|
502
|
-
]
|
|
503
|
-
_recursive_build(child_path, new_prefix_parts)
|
|
736
|
+
def build_lines_recursive(d: Dict, prefix: str = ""):
|
|
737
|
+
items = sorted(d.keys(), key=lambda k: (not d[k], k.lower()))
|
|
738
|
+
for i, name in enumerate(items):
|
|
739
|
+
is_last = i == len(items) - 1
|
|
740
|
+
connector = style[1] if is_last else style[0]
|
|
741
|
+
display_name = name
|
|
504
742
|
|
|
505
|
-
|
|
506
|
-
|
|
743
|
+
if d[name]:
|
|
744
|
+
if show_stats:
|
|
745
|
+
files, dirs = count_children(d[name])
|
|
746
|
+
display_name += f" [dim][M: {files}f, {dirs}d][/dim]"
|
|
507
747
|
|
|
748
|
+
lines.append(f"{prefix}{connector}{display_name}")
|
|
508
749
|
|
|
509
|
-
def _generate_tree_from_paths(
|
|
510
|
-
root_dir: Path, file_paths: List[Path], style: TreeStyle, show_stats: bool
|
|
511
|
-
) -> List[str]:
|
|
512
|
-
"""Generates a directory tree structure from a list of *matched* file paths using the given style."""
|
|
513
|
-
tree_dict: Dict[str, Any] = {}
|
|
514
|
-
matched_paths = {p.relative_to(root_dir) for p in file_paths}
|
|
515
|
-
for rel_path in matched_paths:
|
|
516
|
-
parts = rel_path.parts
|
|
517
|
-
current_level = tree_dict
|
|
518
|
-
for part in parts:
|
|
519
|
-
current_level = current_level.setdefault(part, {})
|
|
520
|
-
tree_lines: List[str] = []
|
|
521
|
-
|
|
522
|
-
def format_dir_name_search(name: str, matched_files: int, matched_dirs: int) -> str:
|
|
523
|
-
if not show_stats:
|
|
524
|
-
return name
|
|
525
|
-
|
|
526
|
-
stats_str = f" [M: {matched_files}f, {matched_dirs}d]"
|
|
527
|
-
return name + stats_str
|
|
528
|
-
|
|
529
|
-
def build_lines(d: Dict[str, Any], prefix: str):
|
|
530
|
-
items = sorted(d.keys(), key=lambda k: (len(d[k]) == 0, k.lower()))
|
|
531
|
-
num_children = len(items)
|
|
532
|
-
matched_files_in_level = sum(1 for k in items if not d[k])
|
|
533
|
-
matched_dirs_in_level = sum(1 for k in items if d[k])
|
|
534
|
-
if not prefix:
|
|
535
|
-
tree_lines.append(
|
|
536
|
-
format_dir_name_search(
|
|
537
|
-
root_dir.name, matched_files_in_level, matched_dirs_in_level
|
|
538
|
-
)
|
|
539
|
-
)
|
|
540
|
-
for i, name in enumerate(items):
|
|
541
|
-
is_last = i == num_children - 1
|
|
542
|
-
connector = style.l_connector if is_last else style.t_connector
|
|
543
|
-
entry_name = name
|
|
544
750
|
if d[name]:
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
entry_name = format_dir_name_search(
|
|
548
|
-
name, child_matched_files, child_matched_dirs
|
|
549
|
-
)
|
|
550
|
-
tree_lines.append(prefix + connector + entry_name)
|
|
551
|
-
if d[name]:
|
|
552
|
-
extension = style.h_spacer if is_last else style.v_connector
|
|
553
|
-
build_lines(d[name], prefix + extension)
|
|
751
|
+
extension = style[3] if is_last else style[2]
|
|
752
|
+
build_lines_recursive(d[name], prefix + extension)
|
|
554
753
|
|
|
555
|
-
|
|
556
|
-
|
|
754
|
+
root_name = f"[bold cyan]{root_dir.name}[/bold cyan]"
|
|
755
|
+
if show_stats:
|
|
756
|
+
files, dirs = count_children(tree_dict)
|
|
757
|
+
root_name += f" [dim][M: {files}f, {dirs}d][/dim]"
|
|
758
|
+
lines.append(root_name)
|
|
557
759
|
|
|
558
|
-
|
|
559
|
-
|
|
760
|
+
build_lines_recursive(tree_dict)
|
|
761
|
+
return lines
|
|
560
762
|
|
|
561
763
|
|
|
562
764
|
def _collate_content_to_file(
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
encoding: str,
|
|
567
|
-
separator_char: str,
|
|
568
|
-
separator_line_len: int,
|
|
569
|
-
show_token_count: bool,
|
|
765
|
+
output_path: Path,
|
|
766
|
+
tree_lines: List,
|
|
767
|
+
files: List[FileToProcess],
|
|
570
768
|
show_tree_stats: bool,
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
if tree_content_lines:
|
|
584
|
-
buffer.write(f"{TREE_HEADER_TEXT}\n{separator_line}\n\n")
|
|
585
|
-
stats_key = ""
|
|
769
|
+
show_token_count: bool,
|
|
770
|
+
exclude_whitespace: bool,
|
|
771
|
+
progress: Any,
|
|
772
|
+
task_id: Any,
|
|
773
|
+
only_show_tree: bool,
|
|
774
|
+
) -> Tuple[float, int]:
|
|
775
|
+
"""Collates the file tree and file contents into a single output file."""
|
|
776
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
777
|
+
buffer, total_bytes, token_count = StringIO(), 0, 0
|
|
778
|
+
|
|
779
|
+
if tree_lines:
|
|
780
|
+
buffer.write(f"{TREE_HEADER_TEXT}\n" + "-" * 80 + "\n\n")
|
|
586
781
|
if show_tree_stats:
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
" (f=files, d=directories)\n\n"
|
|
591
|
-
)
|
|
592
|
-
else:
|
|
593
|
-
stats_key = (
|
|
594
|
-
"Key: [M: Matched files/dirs]\n" " (f=files, d=directories)\n\n"
|
|
595
|
-
)
|
|
596
|
-
buffer.write(stats_key)
|
|
597
|
-
buffer.write("\n".join(tree_content_lines) + "\n")
|
|
598
|
-
buffer.write(f"\n{separator_line}\n\n")
|
|
599
|
-
|
|
600
|
-
if not files_to_process:
|
|
601
|
-
message = (
|
|
602
|
-
"No files found matching the specified criteria.\n"
|
|
603
|
-
if mode == ProjectMode.SEARCH
|
|
604
|
-
else "No files found matching specified criteria for content aggregation.\n"
|
|
605
|
-
)
|
|
606
|
-
buffer.write(message)
|
|
607
|
-
else:
|
|
608
|
-
|
|
609
|
-
collation_bar = tqdm(
|
|
610
|
-
files_to_process, desc="Phase 3: Collating files", unit="file", leave=False
|
|
611
|
-
)
|
|
612
|
-
for file_info in collation_bar:
|
|
613
|
-
collation_bar.set_postfix_str(file_info.relative_path_posix, refresh=True)
|
|
614
|
-
header_content = f"{separator_line}\n{FILE_HEADER_PREFIX}{file_info.relative_path_posix}\n{separator_line}\n\n"
|
|
615
|
-
buffer.write(header_content)
|
|
616
|
-
try:
|
|
617
|
-
with open(
|
|
618
|
-
file_info.absolute_path, "r", encoding=encoding, errors="replace"
|
|
619
|
-
) as infile:
|
|
620
|
-
buffer.write(infile.read())
|
|
621
|
-
buffer.write("\n\n")
|
|
622
|
-
except Exception:
|
|
623
|
-
buffer.write(
|
|
624
|
-
f"Error: Could not read file '{file_info.relative_path_posix}'.\n\n"
|
|
625
|
-
)
|
|
626
|
-
|
|
627
|
-
final_content = buffer.getvalue()
|
|
628
|
-
total_token_count = 0
|
|
629
|
-
mode_display = "Characters" if TOKEN_APPROX_MODE == "CHAR_COUNT" else "Words"
|
|
630
|
-
|
|
631
|
-
if show_token_count:
|
|
632
|
-
if TOKEN_APPROX_MODE == "CHAR_COUNT":
|
|
633
|
-
total_token_count = len(final_content)
|
|
634
|
-
elif TOKEN_APPROX_MODE == "WORD_COUNT":
|
|
635
|
-
total_token_count = len(final_content.split())
|
|
636
|
-
|
|
637
|
-
try:
|
|
638
|
-
with open(output_file_path, "w", encoding=encoding) as outfile:
|
|
639
|
-
if show_token_count:
|
|
640
|
-
outfile.write(f"Token Count ({mode_display}): {total_token_count}\n\n")
|
|
641
|
-
outfile.write(final_content)
|
|
642
|
-
except IOError as e:
|
|
643
|
-
print(f"\nError: Could not write to output file '{output_file_path}': {e}")
|
|
644
|
-
return
|
|
782
|
+
buffer.write(
|
|
783
|
+
"Key: [M: Matched files/dirs]\n (f=files, d=directories)\n\n"
|
|
784
|
+
)
|
|
645
785
|
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
786
|
+
if RICH_AVAILABLE:
|
|
787
|
+
content = "\n".join(Text.from_markup(line).plain for line in tree_lines)
|
|
788
|
+
else:
|
|
789
|
+
content = "\n".join(strip_markup(line) for line in tree_lines)
|
|
790
|
+
buffer.write(content + "\n\n")
|
|
791
|
+
|
|
792
|
+
if only_show_tree:
|
|
793
|
+
with output_path.open("w", encoding=DEFAULT_ENCODING) as outfile:
|
|
794
|
+
outfile.write(buffer.getvalue())
|
|
795
|
+
return total_bytes, token_count
|
|
796
|
+
|
|
797
|
+
for file_info in files:
|
|
798
|
+
if progress:
|
|
799
|
+
progress.update(
|
|
800
|
+
task_id,
|
|
801
|
+
advance=1,
|
|
802
|
+
description=f"Collating [green]{file_info.relative_path_posix}[/green]",
|
|
803
|
+
)
|
|
804
|
+
buffer.write(f"{'-'*80}\nFILE: {file_info.relative_path_posix}\n{'-'*80}\n\n")
|
|
805
|
+
try:
|
|
806
|
+
content = file_info.absolute_path.read_text(
|
|
807
|
+
encoding=DEFAULT_ENCODING, errors="replace"
|
|
654
808
|
)
|
|
809
|
+
buffer.write(content + "\n\n")
|
|
810
|
+
total_bytes += len(content.encode(DEFAULT_ENCODING))
|
|
811
|
+
except Exception as e:
|
|
812
|
+
buffer.write(f"Error: Could not read file. Issue: {e}\n\n")
|
|
655
813
|
|
|
814
|
+
final_content = buffer.getvalue()
|
|
656
815
|
if show_token_count:
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
root_dir: Path,
|
|
662
|
-
output_file_path_str: str,
|
|
663
|
-
tree_style: TreeStyle,
|
|
664
|
-
generate_tree: bool,
|
|
665
|
-
file_types: Optional[List[str]],
|
|
666
|
-
whitelist_substrings_in_filename: Optional[List[str]],
|
|
667
|
-
ignore_substrings_in_filename: Optional[List[str]],
|
|
668
|
-
ignore_dirs_in_path: Optional[List[str]],
|
|
669
|
-
language_presets: Optional[List[LanguagePreset]],
|
|
670
|
-
ignore_presets: Optional[List[IgnorePreset]],
|
|
671
|
-
encoding: str,
|
|
672
|
-
separator_char: str,
|
|
673
|
-
separator_line_len: int,
|
|
674
|
-
show_token_count: bool,
|
|
675
|
-
show_tree_stats: bool,
|
|
676
|
-
) -> None:
|
|
677
|
-
"""FILTER MODE: Selects files based on explicit criteria and prepares content/tree."""
|
|
678
|
-
criteria = FilterCriteria.normalize_inputs(
|
|
679
|
-
file_types,
|
|
680
|
-
whitelist_substrings_in_filename,
|
|
681
|
-
ignore_substrings_in_filename,
|
|
682
|
-
ignore_dirs_in_path,
|
|
683
|
-
language_presets,
|
|
684
|
-
ignore_presets,
|
|
685
|
-
)
|
|
686
|
-
tree_content_lines: Optional[List[str]] = (
|
|
687
|
-
_generate_tree_lines(root_dir, criteria, tree_style, show_tree_stats)
|
|
688
|
-
if generate_tree
|
|
689
|
-
else None
|
|
690
|
-
)
|
|
691
|
-
files_to_process: List[FileToProcess] = []
|
|
692
|
-
|
|
693
|
-
with tqdm(desc="Discovering files", unit="dir") as discovery_bar:
|
|
694
|
-
for dirpath_str, dirnames, filenames in os.walk(str(root_dir), topdown=True):
|
|
695
|
-
discovery_bar.update(1)
|
|
696
|
-
discovery_bar.set_postfix_str(os.path.basename(dirpath_str), refresh=True)
|
|
697
|
-
|
|
698
|
-
current_dir_path = Path(dirpath_str)
|
|
699
|
-
orig_dirnames = list(dirnames)
|
|
700
|
-
dirnames[:] = [
|
|
701
|
-
d
|
|
702
|
-
for d in orig_dirnames
|
|
703
|
-
if _should_include_entry(
|
|
704
|
-
current_dir_path / d, root_dir, criteria, is_dir=True
|
|
705
|
-
)
|
|
706
|
-
]
|
|
816
|
+
content_for_count = (
|
|
817
|
+
re.sub(r"\s", "", final_content) if exclude_whitespace else final_content
|
|
818
|
+
)
|
|
819
|
+
token_count = len(content_for_count)
|
|
707
820
|
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
files_to_process.append(
|
|
714
|
-
FileToProcess(
|
|
715
|
-
file_abs_path,
|
|
716
|
-
file_abs_path.relative_to(root_dir).as_posix(),
|
|
717
|
-
)
|
|
718
|
-
)
|
|
821
|
+
with output_path.open("w", encoding=DEFAULT_ENCODING) as outfile:
|
|
822
|
+
if show_token_count:
|
|
823
|
+
mode = "chars, no whitespace" if exclude_whitespace else "characters"
|
|
824
|
+
outfile.write(f"Token Count ({mode}): {token_count}\n\n")
|
|
825
|
+
outfile.write(final_content)
|
|
719
826
|
|
|
720
|
-
|
|
721
|
-
_collate_content_to_file(
|
|
722
|
-
output_file_path_str,
|
|
723
|
-
tree_content_lines,
|
|
724
|
-
files_to_process,
|
|
725
|
-
encoding,
|
|
726
|
-
separator_char,
|
|
727
|
-
separator_line_len,
|
|
728
|
-
show_token_count,
|
|
729
|
-
show_tree_stats,
|
|
730
|
-
ProjectMode.FILTER,
|
|
731
|
-
)
|
|
827
|
+
return total_bytes, token_count
|
|
732
828
|
|
|
733
829
|
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
full_path_compare: bool,
|
|
746
|
-
|
|
747
|
-
|
|
830
|
+
# --- Main Entry Point ---
|
|
831
|
+
def generate_snapshot(
|
|
832
|
+
root_directory: str = ".",
|
|
833
|
+
output_file_name: str = "project_snapshot.txt",
|
|
834
|
+
search_keywords: Optional[List[str]] = None,
|
|
835
|
+
file_extensions: Optional[List[str]] = None,
|
|
836
|
+
ignore_if_in_path: Optional[List[str]] = None,
|
|
837
|
+
ignore_extensions: Optional[List[str]] = None,
|
|
838
|
+
language_presets: Optional[List[LanguagePreset]] = None,
|
|
839
|
+
ignore_presets: Optional[List[IgnorePreset]] = None,
|
|
840
|
+
search_file_contents: bool = True,
|
|
841
|
+
full_path_compare: bool = True,
|
|
842
|
+
max_workers: Optional[int] = None,
|
|
843
|
+
generate_tree: bool = True,
|
|
844
|
+
show_tree_stats: bool = False,
|
|
845
|
+
show_token_count: bool = False,
|
|
846
|
+
exclude_whitespace_in_token_count: bool = False,
|
|
847
|
+
read_binary_files: bool = False,
|
|
848
|
+
files: Optional[List[str]] = None,
|
|
849
|
+
only_show_tree: bool = False,
|
|
850
|
+
case_sensitive_filter: bool = False,
|
|
851
|
+
case_sensitive_search: bool = False,
|
|
748
852
|
) -> None:
|
|
749
|
-
"""
|
|
853
|
+
"""
|
|
854
|
+
Orchestrates the entire process of scanning, filtering, and collating project files.
|
|
855
|
+
"""
|
|
856
|
+
console, start_time = ConsoleManager(), time.perf_counter()
|
|
857
|
+
root_dir = Path(root_directory or ".").resolve()
|
|
858
|
+
if not root_dir.is_dir():
|
|
859
|
+
console.log(f"Error: Root directory '{root_dir}' not found.", style="bold red")
|
|
860
|
+
return
|
|
861
|
+
|
|
862
|
+
# Normalize keywords for display/logic
|
|
863
|
+
keywords = [k.strip() for k in search_keywords or [] if k.strip()]
|
|
864
|
+
if not case_sensitive_search:
|
|
865
|
+
# We don't lower here for the variable passed to functions,
|
|
866
|
+
# but for consistent display in the table we might want to.
|
|
867
|
+
# However, logic downstream handles lowering if case_sensitive_search is False.
|
|
868
|
+
pass
|
|
869
|
+
|
|
870
|
+
snapshot_mode = not keywords
|
|
871
|
+
|
|
872
|
+
# Normalize filtering criteria
|
|
750
873
|
criteria = FilterCriteria.normalize_inputs(
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
874
|
+
file_types=file_extensions,
|
|
875
|
+
ignore_if_in_path=ignore_if_in_path,
|
|
876
|
+
ignore_extensions=ignore_extensions,
|
|
877
|
+
lang_presets=language_presets,
|
|
878
|
+
ignore_presets=ignore_presets,
|
|
879
|
+
files=files,
|
|
880
|
+
case_sensitive=case_sensitive_filter,
|
|
757
881
|
)
|
|
758
|
-
normalized_keywords = [
|
|
759
|
-
sub.lower().strip() for sub in sub_string_match if sub.strip()
|
|
760
|
-
]
|
|
761
|
-
if not normalized_keywords:
|
|
762
|
-
print("Error: Search mode requires 'search_keywords' to be provided.")
|
|
763
|
-
return
|
|
764
882
|
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
883
|
+
config_rows = [
|
|
884
|
+
["Root Directory", str(root_dir)],
|
|
885
|
+
["File Types", ", ".join(criteria.file_extensions) or "All"],
|
|
886
|
+
["Specific Files", ", ".join(criteria.specific_files) or "None"],
|
|
887
|
+
["Ignore Paths", ", ".join(criteria.ignore_if_in_path) or "None"],
|
|
888
|
+
["Ignore Extensions", ", ".join(criteria.ignore_extensions) or "None"],
|
|
889
|
+
["Generate Tree", "[green]Yes[/green]" if generate_tree else "[red]No[/red]"],
|
|
890
|
+
]
|
|
891
|
+
if generate_tree:
|
|
892
|
+
config_rows.append(
|
|
893
|
+
["Tree Stats", "[green]Yes[/green]" if show_tree_stats else "[red]No[/red]"]
|
|
768
894
|
)
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
895
|
+
config_rows.append(
|
|
896
|
+
[
|
|
897
|
+
"Show Token Count",
|
|
898
|
+
"[green]Yes[/green]" if show_token_count else "[red]No[/red]",
|
|
899
|
+
]
|
|
900
|
+
)
|
|
901
|
+
if show_token_count:
|
|
902
|
+
config_rows.append(
|
|
903
|
+
[
|
|
904
|
+
"Exclude Whitespace",
|
|
905
|
+
(
|
|
906
|
+
"[green]Yes[/green]"
|
|
907
|
+
if exclude_whitespace_in_token_count
|
|
908
|
+
else "[red]No[/red]"
|
|
909
|
+
),
|
|
782
910
|
]
|
|
783
|
-
|
|
784
|
-
for filename in filenames:
|
|
785
|
-
file_abs_path = current_dir_path / filename
|
|
786
|
-
try:
|
|
787
|
-
relative_parts = file_abs_path.relative_to(root_dir).parts
|
|
788
|
-
if any(
|
|
789
|
-
part.lower() in criteria.ignore_path_components
|
|
790
|
-
for part in relative_parts
|
|
791
|
-
):
|
|
792
|
-
continue
|
|
793
|
-
except ValueError:
|
|
794
|
-
continue
|
|
795
|
-
|
|
796
|
-
if (
|
|
797
|
-
not criteria.file_extensions
|
|
798
|
-
or file_abs_path.suffix.lower() in criteria.file_extensions
|
|
799
|
-
):
|
|
800
|
-
candidate_files.append(file_abs_path)
|
|
801
|
-
|
|
802
|
-
print(f"Discovered {len(candidate_files)} candidate files to process.")
|
|
803
|
-
|
|
804
|
-
if not candidate_files:
|
|
805
|
-
print(
|
|
806
|
-
"\nScan complete. No files matched the initial criteria (extensions and ignores)."
|
|
807
911
|
)
|
|
808
|
-
with open(output_file, "w", encoding=DEFAULT_ENCODING) as f_out:
|
|
809
|
-
f_out.write("No files found matching the specified criteria.\n")
|
|
810
|
-
return
|
|
811
|
-
|
|
812
|
-
matched_files: Set[Path] = set()
|
|
813
|
-
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
814
|
-
future_to_file = {
|
|
815
|
-
executor.submit(
|
|
816
|
-
process_file_for_search,
|
|
817
|
-
file,
|
|
818
|
-
normalized_keywords,
|
|
819
|
-
search_file_contents,
|
|
820
|
-
full_path_compare,
|
|
821
|
-
): file
|
|
822
|
-
for file in candidate_files
|
|
823
|
-
}
|
|
824
912
|
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
913
|
+
if snapshot_mode:
|
|
914
|
+
config_rows.insert(1, ["Mode", "[bold blue]Snapshot[/bold blue]"])
|
|
915
|
+
config_rows.append(
|
|
916
|
+
[
|
|
917
|
+
"Case Sensitive Filter",
|
|
918
|
+
"[green]Yes[/green]" if case_sensitive_filter else "[red]No[/red]",
|
|
919
|
+
]
|
|
831
920
|
)
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
matched_files.add(result)
|
|
837
|
-
|
|
838
|
-
if not matched_files:
|
|
839
|
-
print(
|
|
840
|
-
"\nScan complete. No matching files were found after processing keywords."
|
|
921
|
+
else:
|
|
922
|
+
config_rows.insert(1, ["Mode", "[bold yellow]Search[/bold yellow]"])
|
|
923
|
+
config_rows.insert(
|
|
924
|
+
2, ["Search Keywords", f"[yellow]{', '.join(keywords)}[/yellow]"]
|
|
841
925
|
)
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
list(matched_files), key=lambda p: p.relative_to(root_dir).as_posix().lower()
|
|
848
|
-
)
|
|
849
|
-
|
|
850
|
-
print(f"Found {len(sorted_matched_files)} matching files.")
|
|
851
|
-
print(f"Generating output file at '{Path(output_file).resolve()}'...")
|
|
852
|
-
|
|
853
|
-
tree_content_lines = _generate_tree_from_paths(
|
|
854
|
-
root_dir, sorted_matched_files, tree_style, show_tree_stats
|
|
855
|
-
)
|
|
856
|
-
files_to_process = [
|
|
857
|
-
FileToProcess(f, f.relative_to(root_dir).as_posix())
|
|
858
|
-
for f in sorted_matched_files
|
|
859
|
-
]
|
|
860
|
-
|
|
861
|
-
_collate_content_to_file(
|
|
862
|
-
output_file,
|
|
863
|
-
tree_content_lines,
|
|
864
|
-
files_to_process,
|
|
865
|
-
DEFAULT_ENCODING,
|
|
866
|
-
DEFAULT_SEPARATOR_CHAR,
|
|
867
|
-
DEFAULT_SEPARATOR_LINE_LENGTH,
|
|
868
|
-
show_token_count,
|
|
869
|
-
show_tree_stats,
|
|
870
|
-
ProjectMode.SEARCH,
|
|
871
|
-
)
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
# --- DECONSTRUCTION FUNCTION ---
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
def deconstruct_snapshot(snapshot_file_path: str) -> Dict[str, Any]:
|
|
878
|
-
"""Scans a compiled snapshot file, extracts the directory tree lines and file paths."""
|
|
879
|
-
snapshot_path = Path(snapshot_file_path)
|
|
880
|
-
if not snapshot_path.is_file():
|
|
881
|
-
raise FileNotFoundError(f"Snapshot file not found: {snapshot_file_path}")
|
|
882
|
-
tree_lines: List[str] = []
|
|
883
|
-
file_paths: List[str] = []
|
|
884
|
-
separator_pattern = re.compile(
|
|
885
|
-
r"^[{}]{{4,}}[{}|]*$".format(
|
|
886
|
-
re.escape(DEFAULT_SEPARATOR_CHAR), re.escape(DEFAULT_SEPARATOR_CHAR)
|
|
926
|
+
config_rows.append(
|
|
927
|
+
[
|
|
928
|
+
"Search Content",
|
|
929
|
+
"[green]Yes[/green]" if search_file_contents else "[red]No[/red]",
|
|
930
|
+
]
|
|
887
931
|
)
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
if state == "LOOKING_FOR_TREE":
|
|
894
|
-
if line == TREE_HEADER_TEXT:
|
|
895
|
-
state = "READING_TREE"
|
|
896
|
-
elif state == "READING_TREE":
|
|
897
|
-
if not line or separator_pattern.match(line):
|
|
898
|
-
if tree_lines and separator_pattern.match(line):
|
|
899
|
-
state = "LOOKING_FOR_CONTENT"
|
|
900
|
-
continue
|
|
901
|
-
if state == "READING_TREE" and not line.startswith("Key:"):
|
|
902
|
-
tree_lines.append(line)
|
|
903
|
-
elif state == "LOOKING_FOR_CONTENT":
|
|
904
|
-
if line.startswith(FILE_HEADER_PREFIX):
|
|
905
|
-
file_paths.append(line[len(FILE_HEADER_PREFIX) :].strip())
|
|
906
|
-
state = "READING_CONTENT"
|
|
907
|
-
elif state == "READING_CONTENT":
|
|
908
|
-
if line.startswith(FILE_HEADER_PREFIX):
|
|
909
|
-
file_paths.append(line[len(FILE_HEADER_PREFIX) :].strip())
|
|
910
|
-
# Post-process to remove the key lines if they were accidentally captured
|
|
911
|
-
tree_lines = [
|
|
912
|
-
line
|
|
913
|
-
for line in tree_lines
|
|
914
|
-
if not line.strip().startswith("Key:")
|
|
915
|
-
and not line.strip().startswith("(f=files")
|
|
916
|
-
]
|
|
917
|
-
return {"tree_lines": tree_lines, "file_paths": file_paths}
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
# --- UNIFIED ENTRY POINT AND UTILITY WRAPPERS ---
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
def process_project(
|
|
924
|
-
root_dir_param: Optional[str] = None,
|
|
925
|
-
output_file_name: str = "project_output.txt",
|
|
926
|
-
mode: ProjectMode = ProjectMode.FILTER,
|
|
927
|
-
file_types: Optional[List[str]] = None,
|
|
928
|
-
ignore_dirs_in_path: Optional[List[str]] = None,
|
|
929
|
-
language_presets: Optional[List[LanguagePreset]] = None,
|
|
930
|
-
ignore_presets: Optional[List[IgnorePreset]] = None,
|
|
931
|
-
whitelist_filename_substrings: Optional[List[str]] = None,
|
|
932
|
-
ignore_filename_substrings: Optional[List[str]] = None,
|
|
933
|
-
generate_tree: bool = True,
|
|
934
|
-
search_keywords: Optional[List[str]] = None,
|
|
935
|
-
search_file_contents: bool = False,
|
|
936
|
-
full_path_compare: bool = True,
|
|
937
|
-
max_workers: Optional[int] = None,
|
|
938
|
-
tree_style_preset: TreeStylePreset = TreeStylePreset.UNICODE,
|
|
939
|
-
tree_style_t_connector: Optional[str] = None,
|
|
940
|
-
tree_style_l_connector: Optional[str] = None,
|
|
941
|
-
tree_style_v_connector: Optional[str] = None,
|
|
942
|
-
tree_style_h_spacer: Optional[str] = None,
|
|
943
|
-
show_token_count: bool = False,
|
|
944
|
-
show_tree_stats: bool = False,
|
|
945
|
-
encoding: str = DEFAULT_ENCODING,
|
|
946
|
-
separator_char: str = DEFAULT_SEPARATOR_CHAR,
|
|
947
|
-
separator_line_len: int = DEFAULT_SEPARATOR_LINE_LENGTH,
|
|
948
|
-
) -> None:
|
|
949
|
-
"""Main function to process a project directory in either FILTER or SEARCH mode."""
|
|
950
|
-
actual_root_dir = validate_root_directory(root_dir_param)
|
|
951
|
-
if actual_root_dir is None:
|
|
952
|
-
sys.exit(1)
|
|
953
|
-
style = tree_style_preset.to_style()
|
|
954
|
-
final_style = TreeStyle(
|
|
955
|
-
t_connector=tree_style_t_connector or style.t_connector,
|
|
956
|
-
l_connector=tree_style_l_connector or style.l_connector,
|
|
957
|
-
v_connector=tree_style_v_connector or style.v_connector,
|
|
958
|
-
h_spacer=tree_style_h_spacer or style.h_spacer,
|
|
959
|
-
)
|
|
960
|
-
print(f"--- Starting Project Processing in {mode.name} Mode ---")
|
|
961
|
-
if mode == ProjectMode.FILTER:
|
|
962
|
-
filter_and_append_content(
|
|
963
|
-
actual_root_dir,
|
|
964
|
-
output_file_name,
|
|
965
|
-
final_style,
|
|
966
|
-
generate_tree,
|
|
967
|
-
file_types,
|
|
968
|
-
whitelist_filename_substrings,
|
|
969
|
-
ignore_filename_substrings,
|
|
970
|
-
ignore_dirs_in_path,
|
|
971
|
-
language_presets,
|
|
972
|
-
ignore_presets,
|
|
973
|
-
encoding,
|
|
974
|
-
separator_char,
|
|
975
|
-
separator_line_len,
|
|
976
|
-
show_token_count,
|
|
977
|
-
show_tree_stats,
|
|
932
|
+
config_rows.append(
|
|
933
|
+
[
|
|
934
|
+
"Read Binary Files",
|
|
935
|
+
"[green]Yes[/green]" if read_binary_files else "[red]No[/red]",
|
|
936
|
+
]
|
|
978
937
|
)
|
|
979
|
-
|
|
980
|
-
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
actual_root_dir,
|
|
985
|
-
search_keywords,
|
|
986
|
-
output_file_name,
|
|
987
|
-
final_style,
|
|
988
|
-
file_types,
|
|
989
|
-
ignore_dirs_in_path,
|
|
990
|
-
language_presets,
|
|
991
|
-
ignore_presets,
|
|
992
|
-
search_file_contents,
|
|
993
|
-
max_workers,
|
|
994
|
-
full_path_compare,
|
|
995
|
-
show_token_count,
|
|
996
|
-
show_tree_stats,
|
|
938
|
+
config_rows.append(
|
|
939
|
+
[
|
|
940
|
+
"Case Sensitive Search",
|
|
941
|
+
"[green]Yes[/green]" if case_sensitive_search else "[red]No[/red]",
|
|
942
|
+
]
|
|
997
943
|
)
|
|
998
|
-
print("--- Script Execution Finished ---")
|
|
999
944
|
|
|
945
|
+
if only_show_tree:
|
|
946
|
+
config_rows.append(["Output Content", "[yellow]Tree Only[/yellow]"])
|
|
1000
947
|
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
output_file_name: str = "project_filter_output.txt",
|
|
1004
|
-
file_types: Optional[List[str]] = None,
|
|
1005
|
-
ignore_dirs_in_path: Optional[List[str]] = None,
|
|
1006
|
-
language_presets: Optional[List[LanguagePreset]] = None,
|
|
1007
|
-
ignore_presets: Optional[List[IgnorePreset]] = None,
|
|
1008
|
-
whitelist_filename_substrings: Optional[List[str]] = None,
|
|
1009
|
-
ignore_filename_substrings: Optional[List[str]] = None,
|
|
1010
|
-
generate_tree: bool = True,
|
|
1011
|
-
tree_style_preset: TreeStylePreset = TreeStylePreset.UNICODE,
|
|
1012
|
-
tree_style_t_connector: Optional[str] = None,
|
|
1013
|
-
tree_style_l_connector: Optional[str] = None,
|
|
1014
|
-
tree_style_v_connector: Optional[str] = None,
|
|
1015
|
-
tree_style_h_spacer: Optional[str] = None,
|
|
1016
|
-
show_token_count: bool = False,
|
|
1017
|
-
show_tree_stats: bool = False,
|
|
1018
|
-
encoding: str = DEFAULT_ENCODING,
|
|
1019
|
-
separator_char: str = DEFAULT_SEPARATOR_CHAR,
|
|
1020
|
-
separator_line_len: int = DEFAULT_SEPARATOR_LINE_LENGTH,
|
|
1021
|
-
) -> None:
|
|
1022
|
-
"""Utility wrapper for process_project in FILTER mode."""
|
|
1023
|
-
process_project(
|
|
1024
|
-
root_dir_param=root_dir_param,
|
|
1025
|
-
output_file_name=output_file_name,
|
|
1026
|
-
mode=ProjectMode.FILTER,
|
|
1027
|
-
file_types=file_types,
|
|
1028
|
-
ignore_dirs_in_path=ignore_dirs_in_path,
|
|
1029
|
-
language_presets=language_presets,
|
|
1030
|
-
ignore_presets=ignore_presets,
|
|
1031
|
-
whitelist_filename_substrings=whitelist_filename_substrings,
|
|
1032
|
-
ignore_filename_substrings=ignore_filename_substrings,
|
|
1033
|
-
generate_tree=generate_tree,
|
|
1034
|
-
tree_style_preset=tree_style_preset,
|
|
1035
|
-
tree_style_t_connector=tree_style_t_connector,
|
|
1036
|
-
tree_style_l_connector=tree_style_l_connector,
|
|
1037
|
-
tree_style_v_connector=tree_style_v_connector,
|
|
1038
|
-
tree_style_h_spacer=tree_style_h_spacer,
|
|
1039
|
-
show_token_count=show_token_count,
|
|
1040
|
-
show_tree_stats=show_tree_stats,
|
|
1041
|
-
encoding=encoding,
|
|
1042
|
-
separator_char=separator_char,
|
|
1043
|
-
separator_line_len=separator_line_len,
|
|
948
|
+
console.print_table(
|
|
949
|
+
"Project Scan Configuration", ["Parameter", "Value"], config_rows
|
|
1044
950
|
)
|
|
1045
951
|
|
|
952
|
+
@contextmanager
|
|
953
|
+
def progress_manager():
|
|
954
|
+
if RICH_AVAILABLE:
|
|
955
|
+
progress = Progress(
|
|
956
|
+
TextColumn("[progress.description]{task.description}"),
|
|
957
|
+
BarColumn(),
|
|
958
|
+
TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
|
|
959
|
+
SpinnerColumn(),
|
|
960
|
+
TimeElapsedColumn(),
|
|
961
|
+
"{task.fields[status]}",
|
|
962
|
+
expand=True,
|
|
963
|
+
)
|
|
964
|
+
with Live(progress, console=console.console, refresh_per_second=10) as live:
|
|
965
|
+
yield progress
|
|
966
|
+
else:
|
|
967
|
+
with FallbackProgress() as progress:
|
|
968
|
+
yield progress
|
|
969
|
+
|
|
970
|
+
with progress_manager() as progress:
|
|
971
|
+
discover_task = progress.add_task("Discovering files", total=None, status="")
|
|
972
|
+
candidate_files = _discover_files(root_dir, criteria, progress, discover_task)
|
|
973
|
+
if RICH_AVAILABLE:
|
|
974
|
+
progress.update(
|
|
975
|
+
discover_task,
|
|
976
|
+
description=f"Discovered [bold green]{len(candidate_files)}[/bold green] candidates",
|
|
977
|
+
status="",
|
|
978
|
+
)
|
|
979
|
+
else:
|
|
980
|
+
progress.update(
|
|
981
|
+
discover_task,
|
|
982
|
+
description=f"Discovered {len(candidate_files)} candidates",
|
|
983
|
+
)
|
|
1046
984
|
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
|
|
1060
|
-
|
|
1061
|
-
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
|
|
1065
|
-
|
|
1066
|
-
|
|
1067
|
-
|
|
1068
|
-
|
|
1069
|
-
|
|
1070
|
-
|
|
1071
|
-
|
|
1072
|
-
|
|
1073
|
-
|
|
1074
|
-
|
|
1075
|
-
output_file_name=output_file_name,
|
|
1076
|
-
mode=ProjectMode.SEARCH,
|
|
1077
|
-
file_types=file_extensions_to_check,
|
|
1078
|
-
ignore_dirs_in_path=ignore_dirs_in_path,
|
|
1079
|
-
language_presets=language_presets,
|
|
1080
|
-
ignore_presets=ignore_presets,
|
|
1081
|
-
search_keywords=search_keywords,
|
|
1082
|
-
search_file_contents=search_file_contents,
|
|
1083
|
-
full_path_compare=full_path_compare,
|
|
1084
|
-
max_workers=max_workers,
|
|
1085
|
-
tree_style_preset=tree_style_preset,
|
|
1086
|
-
tree_style_t_connector=tree_style_t_connector,
|
|
1087
|
-
tree_style_l_connector=tree_style_l_connector,
|
|
1088
|
-
tree_style_v_connector=tree_style_v_connector,
|
|
1089
|
-
tree_style_h_spacer=tree_style_h_spacer,
|
|
1090
|
-
show_token_count=show_token_count,
|
|
1091
|
-
show_tree_stats=show_tree_stats,
|
|
1092
|
-
encoding=encoding,
|
|
1093
|
-
separator_char=separator_char,
|
|
1094
|
-
separator_line_len=separator_line_len,
|
|
1095
|
-
)
|
|
985
|
+
matched_files = set()
|
|
986
|
+
if candidate_files:
|
|
987
|
+
if snapshot_mode:
|
|
988
|
+
matched_files = set(candidate_files)
|
|
989
|
+
if RICH_AVAILABLE:
|
|
990
|
+
progress.add_task(
|
|
991
|
+
"[dim]Keyword Processing[/dim]",
|
|
992
|
+
total=1,
|
|
993
|
+
completed=1,
|
|
994
|
+
status="[bold blue](Snapshot Mode)[/bold blue]",
|
|
995
|
+
)
|
|
996
|
+
else:
|
|
997
|
+
process_task = progress.add_task(
|
|
998
|
+
f"Processing {len(candidate_files)} files",
|
|
999
|
+
total=len(candidate_files),
|
|
1000
|
+
status="",
|
|
1001
|
+
)
|
|
1002
|
+
matched_files = _process_files_concurrently(
|
|
1003
|
+
candidate_files,
|
|
1004
|
+
keywords,
|
|
1005
|
+
search_file_contents,
|
|
1006
|
+
full_path_compare,
|
|
1007
|
+
max_workers,
|
|
1008
|
+
progress,
|
|
1009
|
+
process_task,
|
|
1010
|
+
read_binary_files,
|
|
1011
|
+
case_sensitive_search,
|
|
1012
|
+
)
|
|
1096
1013
|
|
|
1014
|
+
output_path, total_bytes, token_count = None, 0, 0
|
|
1015
|
+
if matched_files:
|
|
1016
|
+
sorted_files = sorted(
|
|
1017
|
+
list(matched_files), key=lambda p: p.relative_to(root_dir).as_posix()
|
|
1018
|
+
)
|
|
1019
|
+
tree_lines = []
|
|
1020
|
+
if generate_tree:
|
|
1021
|
+
tree_task = progress.add_task(
|
|
1022
|
+
"Generating file tree...", total=1, status=""
|
|
1023
|
+
)
|
|
1024
|
+
tree_lines = _generate_tree_with_stats(
|
|
1025
|
+
root_dir, sorted_files, show_tree_stats
|
|
1026
|
+
)
|
|
1027
|
+
progress.update(
|
|
1028
|
+
tree_task, completed=1, description="Generated file tree"
|
|
1029
|
+
)
|
|
1030
|
+
|
|
1031
|
+
collate_task = progress.add_task(
|
|
1032
|
+
f"Collating {len(sorted_files)} files",
|
|
1033
|
+
total=len(sorted_files),
|
|
1034
|
+
status="",
|
|
1035
|
+
)
|
|
1036
|
+
files_to_process = [
|
|
1037
|
+
FileToProcess(f, f.relative_to(root_dir).as_posix())
|
|
1038
|
+
for f in sorted_files
|
|
1039
|
+
]
|
|
1040
|
+
output_path = Path(output_file_name).resolve()
|
|
1041
|
+
total_bytes, token_count = _collate_content_to_file(
|
|
1042
|
+
output_path,
|
|
1043
|
+
tree_lines,
|
|
1044
|
+
files_to_process,
|
|
1045
|
+
show_tree_stats,
|
|
1046
|
+
show_token_count,
|
|
1047
|
+
exclude_whitespace_in_token_count,
|
|
1048
|
+
progress,
|
|
1049
|
+
collate_task,
|
|
1050
|
+
only_show_tree,
|
|
1051
|
+
)
|
|
1052
|
+
|
|
1053
|
+
end_time = time.perf_counter()
|
|
1054
|
+
summary_rows = [
|
|
1055
|
+
["Candidate Files", f"{len(candidate_files)}"],
|
|
1056
|
+
["Files Matched", f"[bold green]{len(matched_files)}[/bold green]"],
|
|
1057
|
+
["Total Time", f"{end_time - start_time:.2f} seconds"],
|
|
1058
|
+
["Output Size", f"{total_bytes / 1024:.2f} KB"],
|
|
1059
|
+
]
|
|
1060
|
+
if show_token_count:
|
|
1061
|
+
summary_rows.append(["Approximated Tokens", f"{token_count:,}"])
|
|
1062
|
+
summary_rows.append(["Output File", str(output_path or "N/A")])
|
|
1063
|
+
console.print_table("Scan Complete", ["Metric", "Value"], summary_rows)
|
|
1097
1064
|
|
|
1098
|
-
__all__ = [
|
|
1099
|
-
"process_project",
|
|
1100
|
-
"filter_project",
|
|
1101
|
-
"find_in_project",
|
|
1102
|
-
"deconstruct_snapshot",
|
|
1103
|
-
"ProjectMode",
|
|
1104
|
-
"LanguagePreset",
|
|
1105
|
-
"IgnorePreset",
|
|
1106
|
-
"TreeStylePreset",
|
|
1107
|
-
]
|
|
1108
1065
|
|
|
1109
1066
|
if __name__ == "__main__":
|
|
1110
|
-
|
|
1111
|
-
|
|
1112
|
-
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
|
|
1067
|
+
generate_snapshot(
|
|
1068
|
+
root_directory=".",
|
|
1069
|
+
output_file_name="project_snapshot_final.txt",
|
|
1070
|
+
# No search keywords triggers Snapshot Mode
|
|
1071
|
+
language_presets=[LanguagePreset.PYTHON],
|
|
1072
|
+
ignore_presets=[
|
|
1073
|
+
IgnorePreset.PYTHON,
|
|
1074
|
+
IgnorePreset.BUILD_ARTIFACTS,
|
|
1075
|
+
IgnorePreset.VERSION_CONTROL,
|
|
1076
|
+
IgnorePreset.NODE_JS,
|
|
1077
|
+
IgnorePreset.IDE_METADATA,
|
|
1078
|
+
],
|
|
1079
|
+
ignore_extensions=[".log", ".tmp"], # Example of new functionality
|
|
1080
|
+
generate_tree=True,
|
|
1116
1081
|
show_tree_stats=True,
|
|
1117
1082
|
show_token_count=True,
|
|
1118
|
-
|
|
1083
|
+
exclude_whitespace_in_token_count=True,
|
|
1084
|
+
)
|