dirshot 0.1.3__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dirshot/__init__.py +14 -13
- dirshot/dirshot.py +890 -986
- dirshot-0.2.0.dist-info/METADATA +194 -0
- dirshot-0.2.0.dist-info/RECORD +6 -0
- dirshot/examples.py +0 -65
- dirshot-0.1.3.dist-info/METADATA +0 -110
- dirshot-0.1.3.dist-info/RECORD +0 -7
- {dirshot-0.1.3.dist-info → dirshot-0.2.0.dist-info}/WHEEL +0 -0
- {dirshot-0.1.3.dist-info → dirshot-0.2.0.dist-info}/top_level.txt +0 -0
dirshot/dirshot.py
CHANGED
|
@@ -2,1117 +2,1021 @@ import os
|
|
|
2
2
|
import sys
|
|
3
3
|
import re
|
|
4
4
|
import time
|
|
5
|
+
import threading
|
|
5
6
|
from pathlib import Path
|
|
6
7
|
from dataclasses import dataclass, field
|
|
7
|
-
from typing import List, Optional, Set, Tuple,
|
|
8
|
+
from typing import List, Optional, Set, Tuple, NamedTuple, Dict, Any
|
|
8
9
|
from enum import Enum
|
|
9
10
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
10
11
|
from io import StringIO
|
|
12
|
+
from contextlib import contextmanager
|
|
11
13
|
|
|
12
|
-
# ---
|
|
14
|
+
# --- Dependency & Console Management ---
|
|
13
15
|
try:
|
|
14
|
-
from
|
|
16
|
+
from rich.console import Console
|
|
17
|
+
from rich.progress import (
|
|
18
|
+
Progress,
|
|
19
|
+
SpinnerColumn,
|
|
20
|
+
BarColumn,
|
|
21
|
+
TextColumn,
|
|
22
|
+
TimeElapsedColumn,
|
|
23
|
+
)
|
|
24
|
+
from rich.table import Table
|
|
25
|
+
from rich.live import Live
|
|
26
|
+
from rich.panel import Panel
|
|
27
|
+
from rich.text import Text
|
|
28
|
+
|
|
29
|
+
RICH_AVAILABLE = True
|
|
15
30
|
except ImportError:
|
|
31
|
+
RICH_AVAILABLE = False
|
|
32
|
+
|
|
33
|
+
class FallbackProgress:
|
|
34
|
+
"""A simple, dependency-free progress handler for when 'rich' is not installed."""
|
|
35
|
+
|
|
36
|
+
def __init__(self):
|
|
37
|
+
self.tasks, self.task_count, self.active_line = {}, 0, ""
|
|
16
38
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
39
|
+
def add_task(self, description, total=None, **kwargs):
|
|
40
|
+
task_id = self.task_count
|
|
41
|
+
self.tasks[task_id] = {"d": description, "t": total, "c": 0}
|
|
42
|
+
self.task_count += 1
|
|
43
|
+
return task_id
|
|
20
44
|
|
|
21
|
-
def
|
|
22
|
-
self,
|
|
45
|
+
def update(
|
|
46
|
+
self, task_id, advance=0, completed=None, description=None, **kwargs
|
|
23
47
|
):
|
|
24
|
-
self.
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
48
|
+
if task_id not in self.tasks:
|
|
49
|
+
return
|
|
50
|
+
task = self.tasks[task_id]
|
|
51
|
+
if description:
|
|
52
|
+
task["d"] = description
|
|
53
|
+
task["c"] = completed if completed is not None else task["c"] + advance
|
|
54
|
+
line = f"-> {task['d']}: {task['c']}" + (
|
|
55
|
+
f"/{task['t']}" if task["t"] else ""
|
|
29
56
|
)
|
|
30
|
-
self.
|
|
31
|
-
self.unit = unit
|
|
32
|
-
self.current = 0
|
|
33
|
-
self.start_time = time.time()
|
|
34
|
-
self._last_update_time = 0
|
|
35
|
-
self._postfix = postfix or {}
|
|
36
|
-
|
|
37
|
-
def __iter__(self):
|
|
38
|
-
if self.iterable is None:
|
|
39
|
-
raise TypeError("tqdm fallback must be initialized with an iterable.")
|
|
40
|
-
for obj in self.iterable:
|
|
41
|
-
yield obj
|
|
42
|
-
self.update(1)
|
|
43
|
-
self.close()
|
|
44
|
-
|
|
45
|
-
def update(self, n=1):
|
|
46
|
-
"""Update the progress bar by n steps."""
|
|
47
|
-
self.current += n
|
|
48
|
-
now = time.time()
|
|
49
|
-
if (
|
|
50
|
-
self.total is None
|
|
51
|
-
or now - self._last_update_time > 0.1
|
|
52
|
-
or self.current == self.total
|
|
53
|
-
):
|
|
54
|
-
self._last_update_time = now
|
|
55
|
-
self._draw()
|
|
56
|
-
|
|
57
|
-
def set_description(self, desc: str):
|
|
58
|
-
"""Set the description of the progress bar."""
|
|
59
|
-
self.desc = desc
|
|
60
|
-
self._draw()
|
|
61
|
-
|
|
62
|
-
def set_postfix_str(self, s: str):
|
|
63
|
-
self._postfix["info"] = s
|
|
64
|
-
self._draw()
|
|
65
|
-
|
|
66
|
-
def _draw(self):
|
|
67
|
-
"""Draw the progress bar to the console."""
|
|
68
|
-
postfix_str = ", ".join([f"{k}={v}" for k, v in self._postfix.items()])
|
|
69
|
-
|
|
70
|
-
if self.total and self.total > 0:
|
|
71
|
-
percent = int((self.current / self.total) * 100)
|
|
72
|
-
bar_length = 25
|
|
73
|
-
filled_length = int(bar_length * self.current // self.total)
|
|
74
|
-
bar = "█" * filled_length + "-" * (bar_length - filled_length)
|
|
75
|
-
progress_line = f"\r{self.desc}: {percent}%|{bar}| {self.current}/{self.total} [{self.unit}]"
|
|
76
|
-
else: # Case where total is not known
|
|
77
|
-
progress_line = f"\r{self.desc}: {self.current} {self.unit}"
|
|
78
|
-
|
|
79
|
-
if postfix_str:
|
|
80
|
-
progress_line += f" [{postfix_str}]"
|
|
81
|
-
|
|
82
|
-
# Pad with spaces to clear previous, longer lines
|
|
83
|
-
terminal_width = 80
|
|
84
|
-
sys.stdout.write(progress_line.ljust(terminal_width))
|
|
57
|
+
sys.stdout.write("\r" + line.ljust(len(self.active_line) + 2))
|
|
85
58
|
sys.stdout.flush()
|
|
59
|
+
self.active_line = line
|
|
86
60
|
|
|
87
|
-
def
|
|
88
|
-
|
|
89
|
-
|
|
61
|
+
def __enter__(self):
|
|
62
|
+
return self
|
|
63
|
+
|
|
64
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
90
65
|
sys.stdout.write("\n")
|
|
91
66
|
sys.stdout.flush()
|
|
92
67
|
|
|
93
68
|
|
|
69
|
+
class ConsoleManager:
|
|
70
|
+
"""A wrapper to gracefully handle console output with or without 'rich'."""
|
|
71
|
+
|
|
72
|
+
def __init__(self):
|
|
73
|
+
"""Initializes the ConsoleManager, detecting if 'rich' is available."""
|
|
74
|
+
self.console = Console() if RICH_AVAILABLE else None
|
|
75
|
+
|
|
76
|
+
def log(self, message: str, style: str = ""):
|
|
77
|
+
"""Logs a message to the console, applying a style if 'rich' is available."""
|
|
78
|
+
if self.console:
|
|
79
|
+
self.console.log(message, style=style)
|
|
80
|
+
else:
|
|
81
|
+
print(f"[{time.strftime('%H:%M:%S')}] {message}")
|
|
82
|
+
|
|
83
|
+
def print_table(self, title: str, columns: List[str], rows: List[List[str]]):
|
|
84
|
+
"""Prints a formatted table to the console."""
|
|
85
|
+
if self.console:
|
|
86
|
+
table = Table(
|
|
87
|
+
title=title,
|
|
88
|
+
show_header=True,
|
|
89
|
+
header_style="bold magenta",
|
|
90
|
+
border_style="dim",
|
|
91
|
+
)
|
|
92
|
+
for col in columns:
|
|
93
|
+
table.add_column(col)
|
|
94
|
+
for row in rows:
|
|
95
|
+
table.add_row(*row)
|
|
96
|
+
self.console.print(table)
|
|
97
|
+
else:
|
|
98
|
+
print(f"\n--- {title} ---")
|
|
99
|
+
print(" | ".join(columns))
|
|
100
|
+
for row in rows:
|
|
101
|
+
print(" | ".join(row))
|
|
102
|
+
print("-" * (len(title) + 6))
|
|
103
|
+
|
|
104
|
+
|
|
94
105
|
# --- Configuration Constants ---
|
|
95
|
-
DEFAULT_SEPARATOR_CHAR = "-"
|
|
96
|
-
|
|
97
|
-
DEFAULT_ENCODING = "utf-8"
|
|
98
|
-
TREE_HEADER_TEXT = "Project File Structure"
|
|
99
|
-
FILE_HEADER_PREFIX = "FILE: "
|
|
100
|
-
TOKEN_APPROX_MODE = "CHAR_COUNT"
|
|
101
|
-
|
|
102
|
-
# List of binary file extensions to skip during content search
|
|
106
|
+
DEFAULT_SEPARATOR_CHAR, DEFAULT_ENCODING = "-", "utf-8"
|
|
107
|
+
TREE_HEADER_TEXT, FILE_HEADER_PREFIX = "Project File Structure", "FILE: "
|
|
103
108
|
BINARY_FILE_EXTENSIONS = {
|
|
104
|
-
# Images
|
|
105
109
|
".png",
|
|
106
110
|
".jpg",
|
|
107
111
|
".jpeg",
|
|
108
112
|
".gif",
|
|
109
|
-
".bmp",
|
|
110
|
-
".ico",
|
|
111
|
-
".tiff",
|
|
112
|
-
".webp",
|
|
113
|
-
# Documents
|
|
114
113
|
".pdf",
|
|
115
|
-
".doc",
|
|
116
|
-
".docx",
|
|
117
|
-
".xls",
|
|
118
|
-
".xlsx",
|
|
119
|
-
".ppt",
|
|
120
|
-
".pptx",
|
|
121
|
-
".odt",
|
|
122
|
-
".ods",
|
|
123
|
-
# Archives
|
|
124
114
|
".zip",
|
|
125
|
-
".gz",
|
|
126
|
-
".tar",
|
|
127
|
-
".rar",
|
|
128
|
-
".7z",
|
|
129
|
-
".bz2",
|
|
130
|
-
".xz",
|
|
131
|
-
# Executables & Binaries
|
|
132
115
|
".exe",
|
|
133
116
|
".dll",
|
|
134
117
|
".so",
|
|
135
|
-
".o",
|
|
136
|
-
".a",
|
|
137
|
-
".lib",
|
|
138
|
-
".bin",
|
|
139
|
-
".dat",
|
|
140
|
-
".db",
|
|
141
|
-
".sqlite",
|
|
142
|
-
".img",
|
|
143
|
-
".iso",
|
|
144
|
-
# Compiled Code
|
|
145
|
-
".class",
|
|
146
118
|
".jar",
|
|
147
|
-
".war",
|
|
148
119
|
".pyc",
|
|
149
|
-
".pyo",
|
|
150
|
-
# Audio/Video
|
|
151
120
|
".mp3",
|
|
152
|
-
".wav",
|
|
153
|
-
".flac",
|
|
154
|
-
".ogg",
|
|
155
121
|
".mp4",
|
|
156
|
-
".mkv",
|
|
157
|
-
".avi",
|
|
158
|
-
".mov",
|
|
159
|
-
".wmv",
|
|
160
|
-
# Fonts
|
|
161
|
-
".ttf",
|
|
162
|
-
".otf",
|
|
163
|
-
".woff",
|
|
164
|
-
".woff2",
|
|
165
122
|
}
|
|
166
123
|
|
|
167
124
|
|
|
168
|
-
# ---
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
"
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
125
|
+
# --- Base Lists for Presets ---
|
|
126
|
+
# These are defined outside the enums to allow for safe composition.
|
|
127
|
+
_PYTHON_BASE = [
|
|
128
|
+
".py",
|
|
129
|
+
".pyw",
|
|
130
|
+
"requirements.txt",
|
|
131
|
+
"Pipfile",
|
|
132
|
+
"pyproject.toml",
|
|
133
|
+
"setup.py",
|
|
134
|
+
]
|
|
135
|
+
_JAVASCRIPT_BASE = [
|
|
136
|
+
".js",
|
|
137
|
+
".jsx",
|
|
138
|
+
".ts",
|
|
139
|
+
".tsx",
|
|
140
|
+
".mjs",
|
|
141
|
+
".cjs",
|
|
142
|
+
"package.json",
|
|
143
|
+
"jsconfig.json",
|
|
144
|
+
"tsconfig.json",
|
|
145
|
+
]
|
|
146
|
+
_RUBY_BASE = [".rb", "Gemfile", "Rakefile", ".gemspec"]
|
|
147
|
+
_PHP_BASE = [".php", "composer.json", "index.php"]
|
|
148
|
+
_JAVA_BASE = [".java", ".jar", ".war", "pom.xml", ".properties"]
|
|
149
|
+
_KOTLIN_BASE = [".kt", ".kts", ".gradle", "build.gradle.kts"]
|
|
150
|
+
_CSHARP_BASE = [".cs", ".csproj", ".sln", "appsettings.json", "Web.config", ".csx"]
|
|
151
|
+
_C_CPP_BASE = [".c", ".cpp", ".h", ".hpp", "Makefile", "CMakeLists.txt", ".cxx", ".hxx"]
|
|
152
|
+
_RUST_BASE = [".rs", "Cargo.toml", "Cargo.lock"]
|
|
153
|
+
_SWIFT_BASE = [".swift", "Package.swift"]
|
|
154
|
+
_OBJECTIVE_C_BASE = [".m", ".mm", ".h"]
|
|
155
|
+
_ELIXIR_BASE = [".ex", ".exs", "mix.exs"]
|
|
156
|
+
_DART_BASE = [".dart", "pubspec.yaml"]
|
|
157
|
+
_SCALA_BASE = [".scala", ".sbt", "build.sbt"]
|
|
158
|
+
_R_LANG_BASE = [".r", ".R", ".Rmd"]
|
|
159
|
+
_LUA_BASE = [".lua"]
|
|
160
|
+
|
|
161
|
+
_IDE_VSCODE = [".vscode"]
|
|
162
|
+
_IDE_JETBRAINS = [".idea"]
|
|
163
|
+
_IDE_SUBLIME = ["*.sublime-project", "*.sublime-workspace"]
|
|
164
|
+
_IDE_ECLIPSE = [".project", ".settings", ".classpath"]
|
|
165
|
+
_IDE_NETBEANS = ["nbproject"]
|
|
166
|
+
_IDE_ATOM = [".atom"]
|
|
167
|
+
_IDE_VIM = ["*.swp", "*.swo"]
|
|
168
|
+
_IDE_XCODE = ["*.xcodeproj", "*.xcworkspace", "xcuserdata"]
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
# --- Enums and Data Structures ---
|
|
178
172
|
class LanguagePreset(Enum):
|
|
179
|
-
"""
|
|
180
|
-
|
|
181
|
-
PYTHON =
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
173
|
+
"""Provides an extensive list of presets for common language file extensions and key project files."""
|
|
174
|
+
|
|
175
|
+
PYTHON = _PYTHON_BASE
|
|
176
|
+
JAVASCRIPT = _JAVASCRIPT_BASE
|
|
177
|
+
JAVA = _JAVA_BASE
|
|
178
|
+
KOTLIN = _KOTLIN_BASE
|
|
179
|
+
C_CPP = _C_CPP_BASE
|
|
180
|
+
C_SHARP = _CSHARP_BASE
|
|
181
|
+
GO = [".go", "go.mod", "go.sum"]
|
|
182
|
+
RUST = _RUST_BASE
|
|
183
|
+
RUBY = _RUBY_BASE
|
|
184
|
+
PHP = _PHP_BASE
|
|
185
|
+
SWIFT = _SWIFT_BASE
|
|
186
|
+
OBJECTIVE_C = _OBJECTIVE_C_BASE
|
|
187
|
+
DART = _DART_BASE
|
|
188
|
+
LUA = _LUA_BASE
|
|
189
|
+
PERL = [".pl", ".pm", ".t"]
|
|
190
|
+
R_LANG = _R_LANG_BASE
|
|
191
|
+
SCALA = _SCALA_BASE
|
|
192
|
+
GROOVY = [".groovy", ".gvy", ".gy", ".gsh"]
|
|
193
|
+
HASKELL = [".hs", ".lhs", "cabal.project"]
|
|
194
|
+
JULIA = [".jl"]
|
|
195
|
+
ZIG = [".zig", "build.zig"]
|
|
196
|
+
NIM = [".nim", ".nimble"]
|
|
197
|
+
ELIXIR = _ELIXIR_BASE
|
|
198
|
+
CLOJURE = [".clj", ".cljs", ".cljc", "project.clj", "deps.edn"]
|
|
199
|
+
F_SHARP = [".fs", ".fsi", ".fsx"]
|
|
200
|
+
OCAML = [".ml", ".mli", "dune-project"]
|
|
201
|
+
ELM = [".elm", "elm.json"]
|
|
202
|
+
PURE_SCRIPT = [".purs", "spago.dhall"]
|
|
203
|
+
COMMON_LISP = [".lisp", ".cl", ".asd"]
|
|
204
|
+
SCHEME = [".scm", ".ss"]
|
|
205
|
+
RACKET = [".rkt"]
|
|
206
|
+
WEB_FRONTEND = [".html", ".htm", ".css", ".scss", ".sass", ".less", ".styl"]
|
|
207
|
+
REACT = _JAVASCRIPT_BASE
|
|
208
|
+
NODE_JS = _JAVASCRIPT_BASE
|
|
209
|
+
EXPRESS_JS = _JAVASCRIPT_BASE
|
|
210
|
+
NEST_JS = _JAVASCRIPT_BASE + ["nest-cli.json"]
|
|
211
|
+
VUE = _JAVASCRIPT_BASE + [".vue", "vue.config.js"]
|
|
212
|
+
ANGULAR = _JAVASCRIPT_BASE + ["angular.json"]
|
|
213
|
+
SVELTE = _JAVASCRIPT_BASE + [".svelte", "svelte.config.js"]
|
|
214
|
+
EMBER = _JAVASCRIPT_BASE + ["ember-cli-build.js"]
|
|
215
|
+
PUG = [".pug", ".jade"]
|
|
216
|
+
HANDLEBARS = [".hbs", ".handlebars"]
|
|
217
|
+
EJS = [".ejs"]
|
|
218
|
+
DJANGO = _PYTHON_BASE + ["manage.py", "wsgi.py", "asgi.py", ".jinja", ".jinja2"]
|
|
219
|
+
FLASK = _PYTHON_BASE + ["app.py", "wsgi.py"]
|
|
220
|
+
RAILS = _RUBY_BASE + ["routes.rb", ".erb", ".haml", ".slim", "config.ru"]
|
|
221
|
+
LARAVEL = _PHP_BASE + [".blade.php", "artisan"]
|
|
222
|
+
SYMFONY = _PHP_BASE + ["symfony.lock"]
|
|
223
|
+
PHOENIX = _ELIXIR_BASE
|
|
224
|
+
SPRING = _JAVA_BASE + ["application.properties", "application.yml"]
|
|
225
|
+
ASP_NET = _CSHARP_BASE + ["*.cshtml", "*.vbhtml", "*.razor"]
|
|
226
|
+
ROCKET_RS = _RUST_BASE + ["Rocket.toml"]
|
|
227
|
+
ACTIX_WEB = _RUST_BASE
|
|
228
|
+
IOS_NATIVE = (
|
|
229
|
+
_SWIFT_BASE
|
|
230
|
+
+ _OBJECTIVE_C_BASE
|
|
231
|
+
+ [".storyboard", ".xib", "Info.plist", ".pbxproj"]
|
|
232
|
+
)
|
|
233
|
+
ANDROID_NATIVE = _JAVA_BASE + _KOTLIN_BASE + ["AndroidManifest.xml", ".xml"]
|
|
234
|
+
FLUTTER = _DART_BASE
|
|
235
|
+
REACT_NATIVE = _JAVASCRIPT_BASE + ["app.json"]
|
|
236
|
+
XAMARIN = _CSHARP_BASE + [".xaml"]
|
|
237
|
+
DOTNET_MAUI = XAMARIN
|
|
238
|
+
NATIVESCRIPT = _JAVASCRIPT_BASE + ["nativescript.config.ts"]
|
|
239
|
+
UNITY = _CSHARP_BASE + [".unity", ".prefab", ".asset", ".mat", ".unitypackage"]
|
|
240
|
+
UNREAL_ENGINE = _C_CPP_BASE + [".uproject", ".uasset", ".ini"]
|
|
241
|
+
GODOT = [".gd", ".tscn", ".tres", "project.godot"]
|
|
242
|
+
LOVE2D = _LUA_BASE + ["conf.lua", "main.lua"]
|
|
243
|
+
MONOGAME = _CSHARP_BASE + [".mgcb"]
|
|
244
|
+
DOCKER = ["Dockerfile", ".dockerignore", "docker-compose.yml"]
|
|
245
|
+
TERRAFORM = [".tf", ".tfvars", ".tf.json"]
|
|
246
|
+
ANSIBLE = ["ansible.cfg", "inventory.ini"]
|
|
247
|
+
PULUMI = ["Pulumi.yaml"]
|
|
248
|
+
CHEF = _RUBY_BASE
|
|
249
|
+
PUPPET = [".pp"]
|
|
250
|
+
VAGRANT = ["Vagrantfile"]
|
|
251
|
+
GITHUB_ACTIONS = [".yml", ".yaml"]
|
|
252
|
+
GITLAB_CI = [".gitlab-ci.yml"]
|
|
253
|
+
JENKINS = ["Jenkinsfile"]
|
|
254
|
+
CIRCLE_CI = ["config.yml"]
|
|
255
|
+
KUBERNETES = [".yml", ".yaml"]
|
|
256
|
+
BICEP = [".bicep"]
|
|
257
|
+
CLOUDFORMATION = [".json", ".yml"]
|
|
258
|
+
DATA_SCIENCE_NOTEBOOKS = [".ipynb", ".Rmd"]
|
|
259
|
+
SQL = [".sql", ".ddl", ".dml"]
|
|
260
|
+
APACHE_SPARK = list(set(_SCALA_BASE + _PYTHON_BASE + _JAVA_BASE + _R_LANG_BASE))
|
|
261
|
+
ML_CONFIG = ["params.yaml"]
|
|
262
|
+
ELECTRON = _JAVASCRIPT_BASE
|
|
263
|
+
TAURI = _RUST_BASE + ["tauri.conf.json"]
|
|
264
|
+
QT = _C_CPP_BASE + [".pro", ".ui", ".qml"]
|
|
265
|
+
GTK = _C_CPP_BASE + [".ui", "meson.build"]
|
|
266
|
+
WPF = _CSHARP_BASE + [".xaml"]
|
|
267
|
+
WINDOWS_FORMS = _CSHARP_BASE
|
|
268
|
+
BASH = [".sh", ".bash"]
|
|
269
|
+
POWERSHELL = [".ps1", ".psm1"]
|
|
270
|
+
BATCH = [".bat", ".cmd"]
|
|
271
|
+
SOLIDITY = [".sol"]
|
|
272
|
+
VYPER = [".vy"]
|
|
273
|
+
VERILOG = [".v", ".vh"]
|
|
274
|
+
VHDL = [".vhd", ".vhdl"]
|
|
275
|
+
MARKUP = [".md", ".markdown", ".rst", ".adoc", ".asciidoc", ".tex", ".bib"]
|
|
276
|
+
CONFIGURATION = [
|
|
277
|
+
".json",
|
|
278
|
+
".xml",
|
|
279
|
+
".yml",
|
|
280
|
+
".yaml",
|
|
281
|
+
".ini",
|
|
282
|
+
".toml",
|
|
283
|
+
".env",
|
|
284
|
+
".conf",
|
|
285
|
+
".cfg",
|
|
188
286
|
]
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
287
|
+
EDITOR_CONFIG = [".editorconfig"]
|
|
288
|
+
LICENSE = ["LICENSE", "LICENSE.md", "COPYING"]
|
|
289
|
+
CHANGELOG = ["CHANGELOG", "CHANGELOG.md"]
|
|
192
290
|
|
|
193
291
|
|
|
194
292
|
class IgnorePreset(Enum):
|
|
195
|
-
"""
|
|
196
|
-
|
|
197
|
-
VERSION_CONTROL = [".git", ".svn", ".hg", ".
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
293
|
+
"""Provides an extensive list of presets for common directories, files, and patterns to ignore."""
|
|
294
|
+
|
|
295
|
+
VERSION_CONTROL = [".git", ".svn", ".hg", ".bzr", ".gitignore", ".gitattributes"]
|
|
296
|
+
OS_FILES = [".DS_Store", "Thumbs.db", "desktop.ini", "ehthumbs.db"]
|
|
297
|
+
BUILD_ARTIFACTS = [
|
|
298
|
+
"dist",
|
|
299
|
+
"build",
|
|
300
|
+
"target",
|
|
301
|
+
"out",
|
|
302
|
+
"bin",
|
|
303
|
+
"obj",
|
|
304
|
+
"release",
|
|
305
|
+
"debug",
|
|
306
|
+
]
|
|
307
|
+
LOGS = ["*.log", "logs", "npm-debug.log*", "yarn-debug.log*", "yarn-error.log*"]
|
|
308
|
+
TEMP_FILES = ["temp", "tmp", "*.tmp", "*~", "*.bak", "*.swp", "*.swo"]
|
|
309
|
+
SECRET_FILES = [
|
|
310
|
+
".env",
|
|
311
|
+
"*.pem",
|
|
312
|
+
"*.key",
|
|
313
|
+
"credentials.json",
|
|
314
|
+
"*.p12",
|
|
315
|
+
"*.pfx",
|
|
316
|
+
"secrets.yml",
|
|
317
|
+
".env.local",
|
|
318
|
+
]
|
|
319
|
+
COMPRESSED_ARCHIVES = ["*.zip", "*.tar", "*.gz", "*.rar", "*.7z", "*.tgz"]
|
|
320
|
+
IDE_METADATA_VSCODE = _IDE_VSCODE
|
|
321
|
+
IDE_METADATA_JETBRAINS = _IDE_JETBRAINS
|
|
322
|
+
IDE_METADATA_SUBLIME = _IDE_SUBLIME
|
|
323
|
+
IDE_METADATA_ECLIPSE = _IDE_ECLIPSE
|
|
324
|
+
IDE_METADATA_NETBEANS = _IDE_NETBEANS
|
|
325
|
+
IDE_METADATA_ATOM = _IDE_ATOM
|
|
326
|
+
IDE_METADATA_VIM = _IDE_VIM
|
|
327
|
+
IDE_METADATA_XCODE = _IDE_XCODE
|
|
328
|
+
IDE_METADATA = list(
|
|
329
|
+
set(
|
|
330
|
+
_IDE_VSCODE
|
|
331
|
+
+ _IDE_JETBRAINS
|
|
332
|
+
+ _IDE_SUBLIME
|
|
333
|
+
+ _IDE_ECLIPSE
|
|
334
|
+
+ _IDE_NETBEANS
|
|
335
|
+
+ _IDE_ATOM
|
|
336
|
+
+ _IDE_VIM
|
|
337
|
+
+ _IDE_XCODE
|
|
338
|
+
)
|
|
339
|
+
)
|
|
340
|
+
NODE_JS = [
|
|
341
|
+
"node_modules",
|
|
342
|
+
"package-lock.json",
|
|
343
|
+
"yarn.lock",
|
|
344
|
+
"pnpm-lock.yaml",
|
|
345
|
+
".npm",
|
|
346
|
+
]
|
|
347
|
+
PYTHON = [
|
|
348
|
+
"__pycache__",
|
|
349
|
+
"venv",
|
|
350
|
+
".venv",
|
|
351
|
+
"env",
|
|
352
|
+
"lib",
|
|
353
|
+
"lib64",
|
|
354
|
+
".pytest_cache",
|
|
355
|
+
".tox",
|
|
356
|
+
"*.pyc",
|
|
357
|
+
".mypy_cache",
|
|
358
|
+
"htmlcov",
|
|
359
|
+
".coverage",
|
|
360
|
+
]
|
|
361
|
+
RUBY = ["vendor/bundle", ".bundle", "Gemfile.lock", ".gem", "coverage"]
|
|
362
|
+
PHP = ["vendor", "composer.lock"]
|
|
363
|
+
DOTNET = ["bin", "obj", "*.user", "*.suo"]
|
|
364
|
+
RUST = ["target", "Cargo.lock"]
|
|
365
|
+
GO = ["vendor", "go.sum"]
|
|
366
|
+
JAVA_MAVEN = ["target"]
|
|
367
|
+
JAVA_GRADLE = [".gradle", "build"]
|
|
368
|
+
ELIXIR = ["_build", "deps", "mix.lock"]
|
|
369
|
+
DART_FLUTTER = [".dart_tool", ".packages", "build", ".flutter-plugins"]
|
|
370
|
+
ELM = ["elm-stuff"]
|
|
371
|
+
HASKELL = ["dist-newstyle", ".stack-work"]
|
|
372
|
+
TESTING_REPORTS = ["coverage", "junit.xml", "lcov.info", ".nyc_output"]
|
|
373
|
+
STATIC_SITE_GENERATORS = ["_site", "public", "resources"]
|
|
374
|
+
CMS_UPLOADS = ["wp-content/uploads"]
|
|
375
|
+
TERRAFORM = [".terraform", "*.tfstate", "*.tfstate.backup", ".terraform.lock.hcl"]
|
|
376
|
+
JUPYTER_NOTEBOOKS = [".ipynb_checkpoints"]
|
|
377
|
+
ANDROID = [".gradle", "build", "local.properties", "*.apk", "*.aab", "captures"]
|
|
378
|
+
IOS = ["Pods", "Carthage", "DerivedData", "build"]
|
|
379
|
+
UNITY = [
|
|
380
|
+
"Library",
|
|
381
|
+
"Temp",
|
|
382
|
+
"Logs",
|
|
383
|
+
"UserSettings",
|
|
384
|
+
"MemoryCaptures",
|
|
385
|
+
"Assets/AssetStoreTools",
|
|
386
|
+
]
|
|
387
|
+
UNREAL_ENGINE = ["Intermediate", "Saved", "DerivedDataCache", ".vs"]
|
|
388
|
+
GODOT_ENGINE = [".import", "export_presets.cfg"]
|
|
389
|
+
SERVERLESS_FRAMEWORK = [".serverless"]
|
|
390
|
+
AWS = [".aws-sam"]
|
|
391
|
+
VERCEL = [".vercel"]
|
|
392
|
+
NETLIFY = [".netlify"]
|
|
393
|
+
MACOS = [
|
|
394
|
+
".DS_Store",
|
|
395
|
+
".AppleDouble",
|
|
396
|
+
".LSOverride",
|
|
397
|
+
"._*",
|
|
398
|
+
".Spotlight-V100",
|
|
399
|
+
".Trashes",
|
|
400
|
+
]
|
|
401
|
+
WINDOWS = ["Thumbs.db", "ehthumbs.db", "$RECYCLE.BIN/", "Desktop.ini"]
|
|
402
|
+
DEPRECATED_DEPENDENCIES = ["bower_components"]
|
|
217
403
|
|
|
218
|
-
t_connector: str
|
|
219
|
-
l_connector: str
|
|
220
|
-
v_connector: str
|
|
221
|
-
h_spacer: str
|
|
222
404
|
|
|
405
|
+
class FileToProcess(NamedTuple):
|
|
406
|
+
"""Represents a file that needs to be processed and included in the output."""
|
|
223
407
|
|
|
224
|
-
|
|
408
|
+
absolute_path: Path
|
|
409
|
+
relative_path_posix: str
|
|
225
410
|
|
|
226
411
|
|
|
227
412
|
@dataclass
|
|
228
413
|
class FilterCriteria:
|
|
229
|
-
"""Holds
|
|
414
|
+
"""Holds the combined filter criteria for scanning files and directories."""
|
|
230
415
|
|
|
231
416
|
file_extensions: Set[str] = field(default_factory=set)
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
ignore_fname_substrings: Set[str] = field(default_factory=set)
|
|
235
|
-
ignore_path_components: Set[str] = field(default_factory=set)
|
|
417
|
+
ignore_if_in_path: Set[str] = field(default_factory=set)
|
|
418
|
+
ignore_extensions: Set[str] = field(default_factory=set)
|
|
236
419
|
|
|
237
420
|
@classmethod
|
|
238
421
|
def normalize_inputs(
|
|
239
422
|
cls,
|
|
240
|
-
file_types: Optional[List[str]],
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
language_presets: Optional[List[LanguagePreset]] = None,
|
|
423
|
+
file_types: Optional[List[str]] = None,
|
|
424
|
+
ignore_if_in_path: Optional[List[str]] = None,
|
|
425
|
+
ignore_extensions: Optional[List[str]] = None,
|
|
426
|
+
lang_presets: Optional[List[LanguagePreset]] = None,
|
|
245
427
|
ignore_presets: Optional[List[IgnorePreset]] = None,
|
|
246
428
|
) -> "FilterCriteria":
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
for ft in
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
429
|
+
"""
|
|
430
|
+
Consolidates various filter inputs into a single FilterCriteria object.
|
|
431
|
+
|
|
432
|
+
Args:
|
|
433
|
+
file_types (list, optional): A list of file extensions to include.
|
|
434
|
+
ignore_if_in_path (list, optional): A list of directory/file names to ignore.
|
|
435
|
+
ignore_extensions (list, optional): A list of file extensions to ignore.
|
|
436
|
+
lang_presets (list, optional): A list of LanguagePreset enums.
|
|
437
|
+
ignore_presets (list, optional): A list of IgnorePreset enums.
|
|
438
|
+
|
|
439
|
+
Returns:
|
|
440
|
+
FilterCriteria: An object containing the combined sets of filters.
|
|
441
|
+
"""
|
|
442
|
+
all_exts = {ft.lower().strip() for ft in file_types or []}
|
|
443
|
+
all_ignore_paths = {ip.lower().strip() for ip in ignore_if_in_path or []}
|
|
444
|
+
all_ignore_exts = {ie.lower().strip() for ie in ignore_extensions or []}
|
|
445
|
+
|
|
446
|
+
for p in lang_presets or []:
|
|
447
|
+
all_exts.update(p.value)
|
|
448
|
+
for p in ignore_presets or []:
|
|
449
|
+
all_ignore_paths.update(p.value)
|
|
450
|
+
|
|
266
451
|
return cls(
|
|
267
|
-
file_extensions=
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
set(s.lower() for s in whitelist_substrings if s.strip())
|
|
271
|
-
if whitelist_substrings
|
|
272
|
-
else set()
|
|
273
|
-
),
|
|
274
|
-
ignore_fname_substrings=set(
|
|
275
|
-
s.lower() for s in all_ignore_fnames if s.strip()
|
|
276
|
-
),
|
|
277
|
-
ignore_path_components=set(
|
|
278
|
-
d.lower() for d in all_ignore_paths if d.strip()
|
|
279
|
-
),
|
|
452
|
+
file_extensions=all_exts,
|
|
453
|
+
ignore_if_in_path=all_ignore_paths,
|
|
454
|
+
ignore_extensions=all_ignore_exts,
|
|
280
455
|
)
|
|
281
456
|
|
|
282
457
|
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
# --- Helper Functions ---
|
|
458
|
+
# --- Core Logic Functions ---
|
|
459
|
+
def _discover_files(
|
|
460
|
+
root_dir: Path, criteria: FilterCriteria, progress: Any, task_id: Any
|
|
461
|
+
) -> List[Path]:
|
|
462
|
+
"""
|
|
463
|
+
Recursively scans a directory to find all files matching the criteria.
|
|
291
464
|
|
|
465
|
+
Args:
|
|
466
|
+
root_dir (Path): The directory to start the scan from.
|
|
467
|
+
criteria (FilterCriteria): The filtering criteria to apply.
|
|
468
|
+
progress (Any): The progress bar object (from rich or fallback).
|
|
469
|
+
task_id (Any): The ID of the progress bar task to update.
|
|
292
470
|
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
try:
|
|
298
|
-
resolved_path = Path(root_dir_param or Path.cwd()).resolve(strict=True)
|
|
299
|
-
except Exception as e:
|
|
300
|
-
print(
|
|
301
|
-
f"Error: Could not resolve root directory '{original_param_for_messaging}': {e}"
|
|
302
|
-
)
|
|
303
|
-
return None
|
|
304
|
-
if not resolved_path.is_dir():
|
|
305
|
-
print(f"Error: Root path '{resolved_path}' is not a directory.")
|
|
306
|
-
return None
|
|
307
|
-
return resolved_path
|
|
471
|
+
Returns:
|
|
472
|
+
List[Path]: A list of absolute paths to the candidate files.
|
|
473
|
+
"""
|
|
474
|
+
candidate_files, dirs_scanned = [], 0
|
|
308
475
|
|
|
476
|
+
def recursive_scan(current_path: Path):
|
|
477
|
+
nonlocal dirs_scanned
|
|
478
|
+
try:
|
|
479
|
+
for entry in os.scandir(current_path):
|
|
480
|
+
entry_path, entry_lower = Path(entry.path), entry.name.lower()
|
|
481
|
+
if entry_lower in criteria.ignore_if_in_path:
|
|
482
|
+
continue
|
|
483
|
+
if entry.is_dir():
|
|
484
|
+
recursive_scan(entry_path)
|
|
485
|
+
dirs_scanned += 1
|
|
486
|
+
if progress:
|
|
487
|
+
progress.update(
|
|
488
|
+
task_id,
|
|
489
|
+
completed=dirs_scanned,
|
|
490
|
+
description=f"Discovering files in [cyan]{entry.name}[/cyan]",
|
|
491
|
+
)
|
|
492
|
+
elif entry.is_file():
|
|
493
|
+
file_ext = entry_path.suffix.lower()
|
|
494
|
+
if (
|
|
495
|
+
criteria.ignore_extensions
|
|
496
|
+
and file_ext in criteria.ignore_extensions
|
|
497
|
+
):
|
|
498
|
+
continue
|
|
499
|
+
if (
|
|
500
|
+
not criteria.file_extensions
|
|
501
|
+
or file_ext in criteria.file_extensions
|
|
502
|
+
):
|
|
503
|
+
candidate_files.append(entry_path)
|
|
504
|
+
except (PermissionError, FileNotFoundError):
|
|
505
|
+
pass
|
|
309
506
|
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
root_dir: Path,
|
|
313
|
-
criteria: FilterCriteria,
|
|
314
|
-
is_dir: bool,
|
|
315
|
-
log_func: Optional[Callable[[str], None]] = None,
|
|
316
|
-
) -> bool:
|
|
317
|
-
try:
|
|
318
|
-
relative_path = entry_path.relative_to(root_dir)
|
|
319
|
-
except ValueError:
|
|
320
|
-
return False
|
|
321
|
-
entry_name_lower = entry_path.name.lower()
|
|
322
|
-
if criteria.ignore_path_components and any(
|
|
323
|
-
part.lower() in criteria.ignore_path_components for part in relative_path.parts
|
|
324
|
-
):
|
|
325
|
-
return False
|
|
326
|
-
if is_dir:
|
|
327
|
-
return True
|
|
328
|
-
file_ext_lower = entry_path.suffix.lower()
|
|
329
|
-
matched_type = (file_ext_lower in criteria.file_extensions) or (
|
|
330
|
-
entry_name_lower in criteria.exact_filenames
|
|
331
|
-
)
|
|
332
|
-
if not criteria.file_extensions and not criteria.exact_filenames:
|
|
333
|
-
matched_type = True
|
|
334
|
-
if not matched_type:
|
|
335
|
-
return False
|
|
336
|
-
if criteria.whitelist_fname_substrings and not any(
|
|
337
|
-
sub in entry_name_lower for sub in criteria.whitelist_fname_substrings
|
|
338
|
-
):
|
|
339
|
-
return False
|
|
340
|
-
if criteria.ignore_fname_substrings and any(
|
|
341
|
-
sub in entry_name_lower for sub in criteria.ignore_fname_substrings
|
|
342
|
-
):
|
|
343
|
-
return False
|
|
344
|
-
return True
|
|
507
|
+
recursive_scan(root_dir)
|
|
508
|
+
return candidate_files
|
|
345
509
|
|
|
346
510
|
|
|
347
511
|
def process_file_for_search(
|
|
348
512
|
file_path: Path,
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
513
|
+
keywords: List[str],
|
|
514
|
+
search_content: bool,
|
|
515
|
+
full_path: bool,
|
|
516
|
+
activity: Dict,
|
|
517
|
+
read_binary_files: bool,
|
|
352
518
|
) -> Optional[Path]:
|
|
353
519
|
"""
|
|
354
|
-
|
|
355
|
-
"""
|
|
356
|
-
compare_target = str(file_path) if full_path_compare else file_path.name
|
|
357
|
-
if any(key in compare_target.lower() for key in normalized_keywords):
|
|
358
|
-
return file_path
|
|
520
|
+
Processes a single file to see if it matches the search criteria.
|
|
359
521
|
|
|
360
|
-
if
|
|
361
|
-
|
|
362
|
-
if file_path.suffix.lower() in BINARY_FILE_EXTENSIONS:
|
|
363
|
-
return None # Do not attempt to read binary file content
|
|
522
|
+
A match can occur if a keyword is found in the filename or, if enabled,
|
|
523
|
+
within the file's content.
|
|
364
524
|
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
525
|
+
Args:
|
|
526
|
+
file_path (Path): The absolute path to the file to process.
|
|
527
|
+
keywords (List[str]): A list of keywords to search for.
|
|
528
|
+
search_content (bool): If True, search the content of the file.
|
|
529
|
+
full_path (bool): If True, compare keywords against the full file path.
|
|
530
|
+
activity (Dict): A dictionary to track thread activity.
|
|
531
|
+
read_binary_files (bool): If True, attempt to read and search binary files.
|
|
532
|
+
|
|
533
|
+
Returns:
|
|
534
|
+
Optional[Path]: The path to the file if it's a match, otherwise None.
|
|
535
|
+
"""
|
|
536
|
+
thread_id = threading.get_ident()
|
|
537
|
+
activity[thread_id] = file_path.name
|
|
538
|
+
try:
|
|
539
|
+
compare_target = str(file_path) if full_path else file_path.name
|
|
540
|
+
if any(key in compare_target.lower() for key in keywords):
|
|
541
|
+
return file_path
|
|
542
|
+
|
|
543
|
+
if search_content and (
|
|
544
|
+
read_binary_files or file_path.suffix.lower() not in BINARY_FILE_EXTENSIONS
|
|
545
|
+
):
|
|
386
546
|
try:
|
|
387
|
-
|
|
547
|
+
with file_path.open("r", encoding="utf-8", errors="ignore") as f:
|
|
548
|
+
for line in f:
|
|
549
|
+
if any(key in line.lower() for key in keywords):
|
|
550
|
+
return file_path
|
|
388
551
|
except OSError:
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
except ValueError:
|
|
394
|
-
continue
|
|
395
|
-
if any(
|
|
396
|
-
part.lower() in criteria.ignore_path_components
|
|
397
|
-
for part in relative_path.parts
|
|
398
|
-
):
|
|
399
|
-
continue
|
|
400
|
-
if is_dir:
|
|
401
|
-
total_dirs += 1
|
|
402
|
-
else:
|
|
403
|
-
total_files += 1
|
|
404
|
-
stats[current_dir] = (total_files, total_dirs)
|
|
405
|
-
dirnames[:] = [
|
|
406
|
-
d
|
|
407
|
-
for d in dirnames
|
|
408
|
-
if (current_dir / d).name.lower() not in criteria.ignore_path_components
|
|
409
|
-
]
|
|
410
|
-
return stats
|
|
552
|
+
pass
|
|
553
|
+
return None
|
|
554
|
+
finally:
|
|
555
|
+
activity[thread_id] = ""
|
|
411
556
|
|
|
412
557
|
|
|
413
|
-
|
|
558
|
+
def _process_files_concurrently(
|
|
559
|
+
files: List[Path],
|
|
560
|
+
keywords: List[str],
|
|
561
|
+
search_content: bool,
|
|
562
|
+
full_path: bool,
|
|
563
|
+
max_workers: Optional[int],
|
|
564
|
+
progress: Any,
|
|
565
|
+
task_id: Any,
|
|
566
|
+
read_binary_files: bool,
|
|
567
|
+
) -> Set[Path]:
|
|
568
|
+
"""
|
|
569
|
+
Uses a thread pool to process a list of files for search matches concurrently.
|
|
570
|
+
|
|
571
|
+
Args:
|
|
572
|
+
files (List[Path]): The list of candidate files to search through.
|
|
573
|
+
keywords (List[str]): The keywords to search for.
|
|
574
|
+
search_content (bool): Whether to search inside file contents.
|
|
575
|
+
full_path (bool): Whether to compare keywords against the full path.
|
|
576
|
+
max_workers (Optional[int]): The maximum number of threads to use.
|
|
577
|
+
progress (Any): The progress bar object.
|
|
578
|
+
task_id (Any): The ID of the processing task on the progress bar.
|
|
579
|
+
read_binary_files (bool): If True, search the content of binary files.
|
|
580
|
+
|
|
581
|
+
Returns:
|
|
582
|
+
Set[Path]: A set of absolute paths for all files that matched.
|
|
583
|
+
"""
|
|
584
|
+
matched_files, thread_activity = set(), {}
|
|
585
|
+
with ThreadPoolExecutor(
|
|
586
|
+
max_workers=max_workers or (os.cpu_count() or 1) + 4,
|
|
587
|
+
thread_name_prefix="scanner",
|
|
588
|
+
) as executor:
|
|
589
|
+
future_to_file = {
|
|
590
|
+
executor.submit(
|
|
591
|
+
process_file_for_search,
|
|
592
|
+
f,
|
|
593
|
+
keywords,
|
|
594
|
+
search_content,
|
|
595
|
+
full_path,
|
|
596
|
+
thread_activity,
|
|
597
|
+
read_binary_files,
|
|
598
|
+
): f
|
|
599
|
+
for f in files
|
|
600
|
+
}
|
|
601
|
+
for future in as_completed(future_to_file):
|
|
602
|
+
if progress:
|
|
603
|
+
active_threads = {
|
|
604
|
+
f"T{str(tid)[-3:]}": name
|
|
605
|
+
for tid, name in thread_activity.items()
|
|
606
|
+
if name
|
|
607
|
+
}
|
|
608
|
+
progress.update(
|
|
609
|
+
task_id,
|
|
610
|
+
advance=1,
|
|
611
|
+
description=f"Processing [yellow]{len(active_threads)} threads[/yellow]",
|
|
612
|
+
)
|
|
613
|
+
if RICH_AVAILABLE:
|
|
614
|
+
status_panel = Panel(
|
|
615
|
+
Text(
|
|
616
|
+
"\n".join(
|
|
617
|
+
f"[bold cyan]{k}[/]: {v}"
|
|
618
|
+
for k, v in active_threads.items()
|
|
619
|
+
)
|
|
620
|
+
),
|
|
621
|
+
border_style="dim",
|
|
622
|
+
title="[dim]Thread Activity",
|
|
623
|
+
)
|
|
624
|
+
progress.update(task_id, status=status_panel)
|
|
625
|
+
if result := future.result():
|
|
626
|
+
matched_files.add(result)
|
|
627
|
+
if progress and RICH_AVAILABLE:
|
|
628
|
+
progress.update(task_id, status="[bold green]Done![/bold green]")
|
|
629
|
+
return matched_files
|
|
414
630
|
|
|
415
631
|
|
|
416
|
-
def
|
|
417
|
-
root_dir: Path,
|
|
632
|
+
def _generate_tree_with_stats(
|
|
633
|
+
root_dir: Path, file_paths: List[Path], show_stats: bool
|
|
418
634
|
) -> List[str]:
|
|
419
|
-
"""
|
|
420
|
-
|
|
421
|
-
_calculate_total_stats(root_dir, criteria) if show_stats else None
|
|
422
|
-
)
|
|
423
|
-
tree_lines: List[str] = []
|
|
424
|
-
|
|
425
|
-
def format_dir_name(
|
|
426
|
-
path: Path, path_name: str, included_files: int, included_dirs: int
|
|
427
|
-
) -> str:
|
|
428
|
-
if not show_stats or not dir_stats:
|
|
429
|
-
return path_name
|
|
430
|
-
total_files, total_dirs = dir_stats.get(path, (0, 0))
|
|
635
|
+
"""
|
|
636
|
+
Generates a directory tree structure from a list of file paths.
|
|
431
637
|
|
|
432
|
-
|
|
433
|
-
|
|
638
|
+
Args:
|
|
639
|
+
root_dir (Path): The root directory of the project, used as the tree's base.
|
|
640
|
+
file_paths (List[Path]): A list of file paths to include in the tree.
|
|
641
|
+
show_stats (bool): If True, include file and directory counts in the tree.
|
|
434
642
|
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
)
|
|
443
|
-
return
|
|
444
|
-
displayable_children: List[Tuple[Path, bool]] = []
|
|
445
|
-
for e in entries:
|
|
446
|
-
try:
|
|
447
|
-
is_dir = e.is_dir()
|
|
448
|
-
except OSError:
|
|
449
|
-
continue
|
|
450
|
-
if _should_include_entry(
|
|
451
|
-
e, root_dir, criteria, is_dir=is_dir, log_func=None
|
|
452
|
-
):
|
|
453
|
-
displayable_children.append((e, is_dir))
|
|
454
|
-
num_children = len(displayable_children)
|
|
455
|
-
included_files_in_level = sum(
|
|
456
|
-
1 for _, is_dir in displayable_children if not is_dir
|
|
457
|
-
)
|
|
458
|
-
included_dirs_in_level = sum(1 for _, is_dir in displayable_children if is_dir)
|
|
459
|
-
if not prefix_parts:
|
|
460
|
-
tree_lines.append(
|
|
461
|
-
format_dir_name(
|
|
462
|
-
current_path,
|
|
463
|
-
current_path.name,
|
|
464
|
-
included_files_in_level,
|
|
465
|
-
included_dirs_in_level,
|
|
466
|
-
)
|
|
467
|
-
)
|
|
468
|
-
for i, (child_path, child_is_dir) in enumerate(displayable_children):
|
|
469
|
-
is_last = i == num_children - 1
|
|
470
|
-
connector = style.l_connector if is_last else style.t_connector
|
|
471
|
-
entry_name = child_path.name
|
|
472
|
-
if child_is_dir:
|
|
473
|
-
try:
|
|
474
|
-
child_entries = sorted(
|
|
475
|
-
child_path.iterdir(), key=lambda p: p.name.lower()
|
|
476
|
-
)
|
|
477
|
-
child_displayable_children = [
|
|
478
|
-
(e, e.is_dir())
|
|
479
|
-
for e in child_entries
|
|
480
|
-
if _should_include_entry(
|
|
481
|
-
e, root_dir, criteria, is_dir=e.is_dir(), log_func=None
|
|
482
|
-
)
|
|
483
|
-
]
|
|
484
|
-
child_included_files = sum(
|
|
485
|
-
1 for _, is_dir in child_displayable_children if not is_dir
|
|
486
|
-
)
|
|
487
|
-
child_included_dirs = sum(
|
|
488
|
-
1 for _, is_dir in child_displayable_children if is_dir
|
|
489
|
-
)
|
|
490
|
-
entry_name = format_dir_name(
|
|
491
|
-
child_path,
|
|
492
|
-
child_path.name,
|
|
493
|
-
child_included_files,
|
|
494
|
-
child_included_dirs,
|
|
495
|
-
)
|
|
496
|
-
except OSError:
|
|
497
|
-
pass
|
|
498
|
-
tree_lines.append("".join(prefix_parts) + connector + entry_name)
|
|
499
|
-
if child_is_dir:
|
|
500
|
-
new_prefix_parts = prefix_parts + [
|
|
501
|
-
style.h_spacer if is_last else style.v_connector
|
|
502
|
-
]
|
|
503
|
-
_recursive_build(child_path, new_prefix_parts)
|
|
643
|
+
Returns:
|
|
644
|
+
List[str]: A list of strings, where each string is a line in the tree.
|
|
645
|
+
"""
|
|
646
|
+
tree_dict: Dict[str, Any] = {}
|
|
647
|
+
for path in file_paths:
|
|
648
|
+
level = tree_dict
|
|
649
|
+
for part in path.relative_to(root_dir).parts:
|
|
650
|
+
level = level.setdefault(part, {})
|
|
504
651
|
|
|
505
|
-
|
|
506
|
-
|
|
652
|
+
def count_children(d: Dict) -> Tuple[int, int]:
|
|
653
|
+
files = sum(1 for v in d.values() if not v)
|
|
654
|
+
dirs = len(d) - files
|
|
655
|
+
return files, dirs
|
|
507
656
|
|
|
657
|
+
lines = []
|
|
658
|
+
style = ("├── ", "└── ", "│ ", " ")
|
|
508
659
|
|
|
509
|
-
def
|
|
510
|
-
|
|
511
|
-
) -> List[str]:
|
|
512
|
-
"""Generates a directory tree structure from a list of *matched* file paths using the given style."""
|
|
513
|
-
tree_dict: Dict[str, Any] = {}
|
|
514
|
-
matched_paths = {p.relative_to(root_dir) for p in file_paths}
|
|
515
|
-
for rel_path in matched_paths:
|
|
516
|
-
parts = rel_path.parts
|
|
517
|
-
current_level = tree_dict
|
|
518
|
-
for part in parts:
|
|
519
|
-
current_level = current_level.setdefault(part, {})
|
|
520
|
-
tree_lines: List[str] = []
|
|
521
|
-
|
|
522
|
-
def format_dir_name_search(name: str, matched_files: int, matched_dirs: int) -> str:
|
|
523
|
-
if not show_stats:
|
|
524
|
-
return name
|
|
525
|
-
|
|
526
|
-
stats_str = f" [M: {matched_files}f, {matched_dirs}d]"
|
|
527
|
-
return name + stats_str
|
|
528
|
-
|
|
529
|
-
def build_lines(d: Dict[str, Any], prefix: str):
|
|
530
|
-
items = sorted(d.keys(), key=lambda k: (len(d[k]) == 0, k.lower()))
|
|
531
|
-
num_children = len(items)
|
|
532
|
-
matched_files_in_level = sum(1 for k in items if not d[k])
|
|
533
|
-
matched_dirs_in_level = sum(1 for k in items if d[k])
|
|
534
|
-
if not prefix:
|
|
535
|
-
tree_lines.append(
|
|
536
|
-
format_dir_name_search(
|
|
537
|
-
root_dir.name, matched_files_in_level, matched_dirs_in_level
|
|
538
|
-
)
|
|
539
|
-
)
|
|
660
|
+
def build_lines_recursive(d: Dict, prefix: str = ""):
|
|
661
|
+
items = sorted(d.keys(), key=lambda k: (not d[k], k.lower()))
|
|
540
662
|
for i, name in enumerate(items):
|
|
541
|
-
is_last = i ==
|
|
542
|
-
connector = style
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
child_matched_files = sum(1 for k in d[name] if not d[name][k])
|
|
546
|
-
child_matched_dirs = sum(1 for k in d[name] if d[name][k])
|
|
547
|
-
entry_name = format_dir_name_search(
|
|
548
|
-
name, child_matched_files, child_matched_dirs
|
|
549
|
-
)
|
|
550
|
-
tree_lines.append(prefix + connector + entry_name)
|
|
663
|
+
is_last = i == len(items) - 1
|
|
664
|
+
connector = style[1] if is_last else style[0]
|
|
665
|
+
display_name = name
|
|
666
|
+
|
|
551
667
|
if d[name]:
|
|
552
|
-
|
|
553
|
-
|
|
668
|
+
if show_stats:
|
|
669
|
+
files, dirs = count_children(d[name])
|
|
670
|
+
display_name += f" [dim][M: {files}f, {dirs}d][/dim]"
|
|
671
|
+
|
|
672
|
+
lines.append(f"{prefix}{connector}{display_name}")
|
|
554
673
|
|
|
555
|
-
|
|
556
|
-
|
|
674
|
+
if d[name]:
|
|
675
|
+
extension = style[3] if is_last else style[2]
|
|
676
|
+
build_lines_recursive(d[name], prefix + extension)
|
|
557
677
|
|
|
678
|
+
root_name = f"[bold cyan]{root_dir.name}[/bold cyan]"
|
|
679
|
+
if show_stats:
|
|
680
|
+
files, dirs = count_children(tree_dict)
|
|
681
|
+
root_name += f" [dim][M: {files}f, {dirs}d][/dim]"
|
|
682
|
+
lines.append(root_name)
|
|
558
683
|
|
|
559
|
-
|
|
684
|
+
build_lines_recursive(tree_dict)
|
|
685
|
+
return lines
|
|
560
686
|
|
|
561
687
|
|
|
562
688
|
def _collate_content_to_file(
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
encoding: str,
|
|
567
|
-
separator_char: str,
|
|
568
|
-
separator_line_len: int,
|
|
569
|
-
show_token_count: bool,
|
|
689
|
+
output_path: Path,
|
|
690
|
+
tree_lines: List,
|
|
691
|
+
files: List[FileToProcess],
|
|
570
692
|
show_tree_stats: bool,
|
|
571
|
-
|
|
572
|
-
|
|
693
|
+
show_token_count: bool,
|
|
694
|
+
exclude_whitespace: bool,
|
|
695
|
+
progress: Any,
|
|
696
|
+
task_id: Any,
|
|
697
|
+
) -> Tuple[float, int]:
|
|
573
698
|
"""
|
|
574
|
-
Collates
|
|
575
|
-
|
|
699
|
+
Collates the file tree and file contents into a single output file.
|
|
700
|
+
|
|
701
|
+
Args:
|
|
702
|
+
output_path (Path): The path to the final output file.
|
|
703
|
+
tree_lines (List): The generated file tree lines.
|
|
704
|
+
files (List[FileToProcess]): The files whose content needs to be collated.
|
|
705
|
+
show_tree_stats (bool): Whether to include the stats key in the header.
|
|
706
|
+
show_token_count (bool): Whether to calculate and include the token count.
|
|
707
|
+
exclude_whitespace (bool): If True, exclude whitespace from token counting.
|
|
708
|
+
progress (Any): The progress bar object.
|
|
709
|
+
task_id (Any): The ID of the collation task on the progress bar.
|
|
710
|
+
|
|
711
|
+
Returns:
|
|
712
|
+
Tuple[float, int]: A tuple containing the total bytes written and the token count.
|
|
576
713
|
"""
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
separator_line = separator_char * separator_line_len
|
|
714
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
715
|
+
buffer, total_bytes, token_count = StringIO(), 0, 0
|
|
580
716
|
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
if tree_content_lines:
|
|
584
|
-
buffer.write(f"{TREE_HEADER_TEXT}\n{separator_line}\n\n")
|
|
585
|
-
stats_key = ""
|
|
717
|
+
if tree_lines:
|
|
718
|
+
buffer.write(f"{TREE_HEADER_TEXT}\n" + "-" * 80 + "\n\n")
|
|
586
719
|
if show_tree_stats:
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
" (f=files, d=directories)\n\n"
|
|
591
|
-
)
|
|
592
|
-
else:
|
|
593
|
-
stats_key = (
|
|
594
|
-
"Key: [M: Matched files/dirs]\n" " (f=files, d=directories)\n\n"
|
|
595
|
-
)
|
|
596
|
-
buffer.write(stats_key)
|
|
597
|
-
buffer.write("\n".join(tree_content_lines) + "\n")
|
|
598
|
-
buffer.write(f"\n{separator_line}\n\n")
|
|
599
|
-
|
|
600
|
-
if not files_to_process:
|
|
601
|
-
message = (
|
|
602
|
-
"No files found matching the specified criteria.\n"
|
|
603
|
-
if mode == ProjectMode.SEARCH
|
|
604
|
-
else "No files found matching specified criteria for content aggregation.\n"
|
|
605
|
-
)
|
|
606
|
-
buffer.write(message)
|
|
607
|
-
else:
|
|
608
|
-
|
|
609
|
-
collation_bar = tqdm(
|
|
610
|
-
files_to_process, desc="Phase 3: Collating files", unit="file", leave=False
|
|
611
|
-
)
|
|
612
|
-
for file_info in collation_bar:
|
|
613
|
-
collation_bar.set_postfix_str(file_info.relative_path_posix, refresh=True)
|
|
614
|
-
header_content = f"{separator_line}\n{FILE_HEADER_PREFIX}{file_info.relative_path_posix}\n{separator_line}\n\n"
|
|
615
|
-
buffer.write(header_content)
|
|
616
|
-
try:
|
|
617
|
-
with open(
|
|
618
|
-
file_info.absolute_path, "r", encoding=encoding, errors="replace"
|
|
619
|
-
) as infile:
|
|
620
|
-
buffer.write(infile.read())
|
|
621
|
-
buffer.write("\n\n")
|
|
622
|
-
except Exception:
|
|
623
|
-
buffer.write(
|
|
624
|
-
f"Error: Could not read file '{file_info.relative_path_posix}'.\n\n"
|
|
625
|
-
)
|
|
626
|
-
|
|
627
|
-
final_content = buffer.getvalue()
|
|
628
|
-
total_token_count = 0
|
|
629
|
-
mode_display = "Characters" if TOKEN_APPROX_MODE == "CHAR_COUNT" else "Words"
|
|
630
|
-
|
|
631
|
-
if show_token_count:
|
|
632
|
-
if TOKEN_APPROX_MODE == "CHAR_COUNT":
|
|
633
|
-
total_token_count = len(final_content)
|
|
634
|
-
elif TOKEN_APPROX_MODE == "WORD_COUNT":
|
|
635
|
-
total_token_count = len(final_content.split())
|
|
636
|
-
|
|
637
|
-
try:
|
|
638
|
-
with open(output_file_path, "w", encoding=encoding) as outfile:
|
|
639
|
-
if show_token_count:
|
|
640
|
-
outfile.write(f"Token Count ({mode_display}): {total_token_count}\n\n")
|
|
641
|
-
outfile.write(final_content)
|
|
642
|
-
except IOError as e:
|
|
643
|
-
print(f"\nError: Could not write to output file '{output_file_path}': {e}")
|
|
644
|
-
return
|
|
720
|
+
buffer.write(
|
|
721
|
+
"Key: [M: Matched files/dirs]\n (f=files, d=directories)\n\n"
|
|
722
|
+
)
|
|
645
723
|
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
724
|
+
if RICH_AVAILABLE:
|
|
725
|
+
content = "\n".join(Text.from_markup(line).plain for line in tree_lines)
|
|
726
|
+
else:
|
|
727
|
+
content = "\n".join(tree_lines)
|
|
728
|
+
buffer.write(content + "\n\n")
|
|
729
|
+
|
|
730
|
+
for file_info in files:
|
|
731
|
+
if progress:
|
|
732
|
+
progress.update(
|
|
733
|
+
task_id,
|
|
734
|
+
advance=1,
|
|
735
|
+
description=f"Collating [green]{file_info.relative_path_posix}[/green]",
|
|
736
|
+
)
|
|
737
|
+
buffer.write(f"{'-'*80}\nFILE: {file_info.relative_path_posix}\n{'-'*80}\n\n")
|
|
738
|
+
try:
|
|
739
|
+
content = file_info.absolute_path.read_text(
|
|
740
|
+
encoding=DEFAULT_ENCODING, errors="replace"
|
|
654
741
|
)
|
|
742
|
+
buffer.write(content + "\n\n")
|
|
743
|
+
total_bytes += len(content.encode(DEFAULT_ENCODING))
|
|
744
|
+
except Exception as e:
|
|
745
|
+
buffer.write(f"Error: Could not read file. Issue: {e}\n\n")
|
|
655
746
|
|
|
747
|
+
final_content = buffer.getvalue()
|
|
656
748
|
if show_token_count:
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
root_dir: Path,
|
|
662
|
-
output_file_path_str: str,
|
|
663
|
-
tree_style: TreeStyle,
|
|
664
|
-
generate_tree: bool,
|
|
665
|
-
file_types: Optional[List[str]],
|
|
666
|
-
whitelist_substrings_in_filename: Optional[List[str]],
|
|
667
|
-
ignore_substrings_in_filename: Optional[List[str]],
|
|
668
|
-
ignore_dirs_in_path: Optional[List[str]],
|
|
669
|
-
language_presets: Optional[List[LanguagePreset]],
|
|
670
|
-
ignore_presets: Optional[List[IgnorePreset]],
|
|
671
|
-
encoding: str,
|
|
672
|
-
separator_char: str,
|
|
673
|
-
separator_line_len: int,
|
|
674
|
-
show_token_count: bool,
|
|
675
|
-
show_tree_stats: bool,
|
|
676
|
-
) -> None:
|
|
677
|
-
"""FILTER MODE: Selects files based on explicit criteria and prepares content/tree."""
|
|
678
|
-
criteria = FilterCriteria.normalize_inputs(
|
|
679
|
-
file_types,
|
|
680
|
-
whitelist_substrings_in_filename,
|
|
681
|
-
ignore_substrings_in_filename,
|
|
682
|
-
ignore_dirs_in_path,
|
|
683
|
-
language_presets,
|
|
684
|
-
ignore_presets,
|
|
685
|
-
)
|
|
686
|
-
tree_content_lines: Optional[List[str]] = (
|
|
687
|
-
_generate_tree_lines(root_dir, criteria, tree_style, show_tree_stats)
|
|
688
|
-
if generate_tree
|
|
689
|
-
else None
|
|
690
|
-
)
|
|
691
|
-
files_to_process: List[FileToProcess] = []
|
|
692
|
-
|
|
693
|
-
with tqdm(desc="Discovering files", unit="dir") as discovery_bar:
|
|
694
|
-
for dirpath_str, dirnames, filenames in os.walk(str(root_dir), topdown=True):
|
|
695
|
-
discovery_bar.update(1)
|
|
696
|
-
discovery_bar.set_postfix_str(os.path.basename(dirpath_str), refresh=True)
|
|
697
|
-
|
|
698
|
-
current_dir_path = Path(dirpath_str)
|
|
699
|
-
orig_dirnames = list(dirnames)
|
|
700
|
-
dirnames[:] = [
|
|
701
|
-
d
|
|
702
|
-
for d in orig_dirnames
|
|
703
|
-
if _should_include_entry(
|
|
704
|
-
current_dir_path / d, root_dir, criteria, is_dir=True
|
|
705
|
-
)
|
|
706
|
-
]
|
|
749
|
+
content_for_count = (
|
|
750
|
+
re.sub(r"\s", "", final_content) if exclude_whitespace else final_content
|
|
751
|
+
)
|
|
752
|
+
token_count = len(content_for_count)
|
|
707
753
|
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
files_to_process.append(
|
|
714
|
-
FileToProcess(
|
|
715
|
-
file_abs_path,
|
|
716
|
-
file_abs_path.relative_to(root_dir).as_posix(),
|
|
717
|
-
)
|
|
718
|
-
)
|
|
754
|
+
with output_path.open("w", encoding=DEFAULT_ENCODING) as outfile:
|
|
755
|
+
if show_token_count:
|
|
756
|
+
mode = "chars, no whitespace" if exclude_whitespace else "characters"
|
|
757
|
+
outfile.write(f"Token Count ({mode}): {token_count}\n\n")
|
|
758
|
+
outfile.write(final_content)
|
|
719
759
|
|
|
720
|
-
|
|
721
|
-
_collate_content_to_file(
|
|
722
|
-
output_file_path_str,
|
|
723
|
-
tree_content_lines,
|
|
724
|
-
files_to_process,
|
|
725
|
-
encoding,
|
|
726
|
-
separator_char,
|
|
727
|
-
separator_line_len,
|
|
728
|
-
show_token_count,
|
|
729
|
-
show_tree_stats,
|
|
730
|
-
ProjectMode.FILTER,
|
|
731
|
-
)
|
|
760
|
+
return total_bytes, token_count
|
|
732
761
|
|
|
733
762
|
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
full_path_compare: bool,
|
|
746
|
-
|
|
747
|
-
|
|
763
|
+
# --- Main Entry Point ---
|
|
764
|
+
def generate_snapshot(
|
|
765
|
+
root_directory: str = ".",
|
|
766
|
+
output_file_name: str = "project_snapshot.txt",
|
|
767
|
+
search_keywords: Optional[List[str]] = None,
|
|
768
|
+
file_extensions: Optional[List[str]] = None,
|
|
769
|
+
ignore_if_in_path: Optional[List[str]] = None,
|
|
770
|
+
ignore_extensions: Optional[List[str]] = None,
|
|
771
|
+
language_presets: Optional[List[LanguagePreset]] = None,
|
|
772
|
+
ignore_presets: Optional[List[IgnorePreset]] = None,
|
|
773
|
+
search_file_contents: bool = True,
|
|
774
|
+
full_path_compare: bool = True,
|
|
775
|
+
max_workers: Optional[int] = None,
|
|
776
|
+
generate_tree: bool = True,
|
|
777
|
+
show_tree_stats: bool = False,
|
|
778
|
+
show_token_count: bool = False,
|
|
779
|
+
exclude_whitespace_in_token_count: bool = False,
|
|
780
|
+
read_binary_files: bool = False,
|
|
748
781
|
) -> None:
|
|
749
|
-
"""
|
|
782
|
+
"""
|
|
783
|
+
Orchestrates the entire process of scanning, filtering, and collating project files.
|
|
784
|
+
|
|
785
|
+
This function serves as the main entry point for the utility. It can be used
|
|
786
|
+
to create a full "snapshot" of a project's source code or to search for
|
|
787
|
+
specific keywords within file names and/or contents. It is highly configurable
|
|
788
|
+
through presets and manual overrides.
|
|
789
|
+
|
|
790
|
+
Args:
|
|
791
|
+
root_directory (str): The starting directory for the scan. Defaults to ".".
|
|
792
|
+
output_file_name (str): The name of the file to save the results to.
|
|
793
|
+
Defaults to "project_snapshot.txt".
|
|
794
|
+
search_keywords (List[str], optional): A list of keywords to search for. If
|
|
795
|
+
None or empty, the function runs in "snapshot" mode, including all
|
|
796
|
+
files that match the other criteria. Defaults to None.
|
|
797
|
+
file_extensions (List[str], optional): A list of specific file
|
|
798
|
+
extensions to include (e.g., [".py", ".md"]). Defaults to None.
|
|
799
|
+
ignore_if_in_path (List[str], optional): A list of directory or file
|
|
800
|
+
names to exclude from the scan. Defaults to None.
|
|
801
|
+
ignore_extensions (List[str], optional): A list of file extensions to
|
|
802
|
+
explicitly ignore (e.g., [".log", ".tmp"]). Defaults to None.
|
|
803
|
+
language_presets (List[LanguagePreset], optional): A list of LanguagePreset
|
|
804
|
+
enums for common file types (e.g., [LanguagePreset.PYTHON]). Defaults to None.
|
|
805
|
+
ignore_presets (List[IgnorePreset], optional): A list of IgnorePreset enums
|
|
806
|
+
for common ignore patterns (e.g., [IgnorePreset.PYTHON]). Defaults to None.
|
|
807
|
+
search_file_contents (bool): If True, search for keywords within file
|
|
808
|
+
contents. Defaults to True.
|
|
809
|
+
full_path_compare (bool): If True, search for keywords in the full file path,
|
|
810
|
+
not just the filename. Defaults to True.
|
|
811
|
+
max_workers (Optional[int]): The maximum number of worker threads for
|
|
812
|
+
concurrent processing. Defaults to CPU count + 4.
|
|
813
|
+
generate_tree (bool): If True, a file tree of the matched files will be
|
|
814
|
+
included at the top of the output file. Defaults to True.
|
|
815
|
+
show_tree_stats (bool): If True, display file and directory counts in the
|
|
816
|
+
generated tree. Defaults to False.
|
|
817
|
+
show_token_count (bool): If True, display an approximated token count in the
|
|
818
|
+
summary and output file. Defaults to False.
|
|
819
|
+
exclude_whitespace_in_token_count (bool): If True, whitespace is removed
|
|
820
|
+
before counting tokens, giving a more compact count. Defaults to False.
|
|
821
|
+
read_binary_files (bool): If True, the content search will attempt to read
|
|
822
|
+
and search through binary files. Defaults to False.
|
|
823
|
+
"""
|
|
824
|
+
console, start_time = ConsoleManager(), time.perf_counter()
|
|
825
|
+
root_dir = Path(root_directory or ".").resolve()
|
|
826
|
+
if not root_dir.is_dir():
|
|
827
|
+
console.log(f"Error: Root directory '{root_dir}' not found.", style="bold red")
|
|
828
|
+
return
|
|
829
|
+
|
|
830
|
+
keywords = [k.lower().strip() for k in search_keywords or [] if k.strip()]
|
|
831
|
+
snapshot_mode = not keywords
|
|
750
832
|
criteria = FilterCriteria.normalize_inputs(
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
ignore_presets,
|
|
833
|
+
file_types=file_extensions,
|
|
834
|
+
ignore_if_in_path=ignore_if_in_path,
|
|
835
|
+
ignore_extensions=ignore_extensions,
|
|
836
|
+
lang_presets=language_presets,
|
|
837
|
+
ignore_presets=ignore_presets,
|
|
757
838
|
)
|
|
758
|
-
normalized_keywords = [
|
|
759
|
-
sub.lower().strip() for sub in sub_string_match if sub.strip()
|
|
760
|
-
]
|
|
761
|
-
if not normalized_keywords:
|
|
762
|
-
print("Error: Search mode requires 'search_keywords' to be provided.")
|
|
763
|
-
return
|
|
764
839
|
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
840
|
+
config_rows = [
|
|
841
|
+
["Root Directory", str(root_dir)],
|
|
842
|
+
["File Types", ", ".join(criteria.file_extensions) or "All"],
|
|
843
|
+
["Ignore Paths", ", ".join(criteria.ignore_if_in_path) or "None"],
|
|
844
|
+
["Ignore Extensions", ", ".join(criteria.ignore_extensions) or "None"],
|
|
845
|
+
["Generate Tree", "[green]Yes[/green]" if generate_tree else "[red]No[/red]"],
|
|
846
|
+
]
|
|
847
|
+
if generate_tree:
|
|
848
|
+
config_rows.append(
|
|
849
|
+
["Tree Stats", "[green]Yes[/green]" if show_tree_stats else "[red]No[/red]"]
|
|
768
850
|
)
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
851
|
+
config_rows.append(
|
|
852
|
+
[
|
|
853
|
+
"Show Token Count",
|
|
854
|
+
"[green]Yes[/green]" if show_token_count else "[red]No[/red]",
|
|
855
|
+
]
|
|
856
|
+
)
|
|
857
|
+
if show_token_count:
|
|
858
|
+
config_rows.append(
|
|
859
|
+
[
|
|
860
|
+
"Exclude Whitespace",
|
|
861
|
+
(
|
|
862
|
+
"[green]Yes[/green]"
|
|
863
|
+
if exclude_whitespace_in_token_count
|
|
864
|
+
else "[red]No[/red]"
|
|
865
|
+
),
|
|
782
866
|
]
|
|
783
|
-
|
|
784
|
-
for filename in filenames:
|
|
785
|
-
file_abs_path = current_dir_path / filename
|
|
786
|
-
try:
|
|
787
|
-
relative_parts = file_abs_path.relative_to(root_dir).parts
|
|
788
|
-
if any(
|
|
789
|
-
part.lower() in criteria.ignore_path_components
|
|
790
|
-
for part in relative_parts
|
|
791
|
-
):
|
|
792
|
-
continue
|
|
793
|
-
except ValueError:
|
|
794
|
-
continue
|
|
795
|
-
|
|
796
|
-
if (
|
|
797
|
-
not criteria.file_extensions
|
|
798
|
-
or file_abs_path.suffix.lower() in criteria.file_extensions
|
|
799
|
-
):
|
|
800
|
-
candidate_files.append(file_abs_path)
|
|
801
|
-
|
|
802
|
-
print(f"Discovered {len(candidate_files)} candidate files to process.")
|
|
803
|
-
|
|
804
|
-
if not candidate_files:
|
|
805
|
-
print(
|
|
806
|
-
"\nScan complete. No files matched the initial criteria (extensions and ignores)."
|
|
807
867
|
)
|
|
808
|
-
with open(output_file, "w", encoding=DEFAULT_ENCODING) as f_out:
|
|
809
|
-
f_out.write("No files found matching the specified criteria.\n")
|
|
810
|
-
return
|
|
811
|
-
|
|
812
|
-
matched_files: Set[Path] = set()
|
|
813
|
-
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
814
|
-
future_to_file = {
|
|
815
|
-
executor.submit(
|
|
816
|
-
process_file_for_search,
|
|
817
|
-
file,
|
|
818
|
-
normalized_keywords,
|
|
819
|
-
search_file_contents,
|
|
820
|
-
full_path_compare,
|
|
821
|
-
): file
|
|
822
|
-
for file in candidate_files
|
|
823
|
-
}
|
|
824
868
|
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
869
|
+
if snapshot_mode:
|
|
870
|
+
config_rows.insert(1, ["Mode", "[bold blue]Snapshot[/bold blue]"])
|
|
871
|
+
else:
|
|
872
|
+
config_rows.insert(1, ["Mode", "[bold yellow]Search[/bold yellow]"])
|
|
873
|
+
config_rows.insert(
|
|
874
|
+
2, ["Search Keywords", f"[yellow]{', '.join(keywords)}[/yellow]"]
|
|
831
875
|
)
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
if not matched_files:
|
|
839
|
-
print(
|
|
840
|
-
"\nScan complete. No matching files were found after processing keywords."
|
|
876
|
+
config_rows.append(
|
|
877
|
+
[
|
|
878
|
+
"Search Content",
|
|
879
|
+
"[green]Yes[/green]" if search_file_contents else "[red]No[/red]",
|
|
880
|
+
]
|
|
841
881
|
)
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
list(matched_files), key=lambda p: p.relative_to(root_dir).as_posix().lower()
|
|
848
|
-
)
|
|
849
|
-
|
|
850
|
-
print(f"Found {len(sorted_matched_files)} matching files.")
|
|
851
|
-
print(f"Generating output file at '{Path(output_file).resolve()}'...")
|
|
852
|
-
|
|
853
|
-
tree_content_lines = _generate_tree_from_paths(
|
|
854
|
-
root_dir, sorted_matched_files, tree_style, show_tree_stats
|
|
855
|
-
)
|
|
856
|
-
files_to_process = [
|
|
857
|
-
FileToProcess(f, f.relative_to(root_dir).as_posix())
|
|
858
|
-
for f in sorted_matched_files
|
|
859
|
-
]
|
|
860
|
-
|
|
861
|
-
_collate_content_to_file(
|
|
862
|
-
output_file,
|
|
863
|
-
tree_content_lines,
|
|
864
|
-
files_to_process,
|
|
865
|
-
DEFAULT_ENCODING,
|
|
866
|
-
DEFAULT_SEPARATOR_CHAR,
|
|
867
|
-
DEFAULT_SEPARATOR_LINE_LENGTH,
|
|
868
|
-
show_token_count,
|
|
869
|
-
show_tree_stats,
|
|
870
|
-
ProjectMode.SEARCH,
|
|
871
|
-
)
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
# --- DECONSTRUCTION FUNCTION ---
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
def deconstruct_snapshot(snapshot_file_path: str) -> Dict[str, Any]:
|
|
878
|
-
"""Scans a compiled snapshot file, extracts the directory tree lines and file paths."""
|
|
879
|
-
snapshot_path = Path(snapshot_file_path)
|
|
880
|
-
if not snapshot_path.is_file():
|
|
881
|
-
raise FileNotFoundError(f"Snapshot file not found: {snapshot_file_path}")
|
|
882
|
-
tree_lines: List[str] = []
|
|
883
|
-
file_paths: List[str] = []
|
|
884
|
-
separator_pattern = re.compile(
|
|
885
|
-
r"^[{}]{{4,}}[{}|]*$".format(
|
|
886
|
-
re.escape(DEFAULT_SEPARATOR_CHAR), re.escape(DEFAULT_SEPARATOR_CHAR)
|
|
882
|
+
config_rows.append(
|
|
883
|
+
[
|
|
884
|
+
"Read Binary Files",
|
|
885
|
+
"[green]Yes[/green]" if read_binary_files else "[red]No[/red]",
|
|
886
|
+
]
|
|
887
887
|
)
|
|
888
|
+
console.print_table(
|
|
889
|
+
"Project Scan Configuration", ["Parameter", "Value"], config_rows
|
|
888
890
|
)
|
|
889
|
-
state = "LOOKING_FOR_TREE"
|
|
890
|
-
with open(snapshot_path, "r", encoding=DEFAULT_ENCODING, errors="replace") as f:
|
|
891
|
-
for line in f:
|
|
892
|
-
line = line.strip()
|
|
893
|
-
if state == "LOOKING_FOR_TREE":
|
|
894
|
-
if line == TREE_HEADER_TEXT:
|
|
895
|
-
state = "READING_TREE"
|
|
896
|
-
elif state == "READING_TREE":
|
|
897
|
-
if not line or separator_pattern.match(line):
|
|
898
|
-
if tree_lines and separator_pattern.match(line):
|
|
899
|
-
state = "LOOKING_FOR_CONTENT"
|
|
900
|
-
continue
|
|
901
|
-
if state == "READING_TREE" and not line.startswith("Key:"):
|
|
902
|
-
tree_lines.append(line)
|
|
903
|
-
elif state == "LOOKING_FOR_CONTENT":
|
|
904
|
-
if line.startswith(FILE_HEADER_PREFIX):
|
|
905
|
-
file_paths.append(line[len(FILE_HEADER_PREFIX) :].strip())
|
|
906
|
-
state = "READING_CONTENT"
|
|
907
|
-
elif state == "READING_CONTENT":
|
|
908
|
-
if line.startswith(FILE_HEADER_PREFIX):
|
|
909
|
-
file_paths.append(line[len(FILE_HEADER_PREFIX) :].strip())
|
|
910
|
-
# Post-process to remove the key lines if they were accidentally captured
|
|
911
|
-
tree_lines = [
|
|
912
|
-
line
|
|
913
|
-
for line in tree_lines
|
|
914
|
-
if not line.strip().startswith("Key:")
|
|
915
|
-
and not line.strip().startswith("(f=files")
|
|
916
|
-
]
|
|
917
|
-
return {"tree_lines": tree_lines, "file_paths": file_paths}
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
# --- UNIFIED ENTRY POINT AND UTILITY WRAPPERS ---
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
def process_project(
|
|
924
|
-
root_dir_param: Optional[str] = None,
|
|
925
|
-
output_file_name: str = "project_output.txt",
|
|
926
|
-
mode: ProjectMode = ProjectMode.FILTER,
|
|
927
|
-
file_types: Optional[List[str]] = None,
|
|
928
|
-
ignore_dirs_in_path: Optional[List[str]] = None,
|
|
929
|
-
language_presets: Optional[List[LanguagePreset]] = None,
|
|
930
|
-
ignore_presets: Optional[List[IgnorePreset]] = None,
|
|
931
|
-
whitelist_filename_substrings: Optional[List[str]] = None,
|
|
932
|
-
ignore_filename_substrings: Optional[List[str]] = None,
|
|
933
|
-
generate_tree: bool = True,
|
|
934
|
-
search_keywords: Optional[List[str]] = None,
|
|
935
|
-
search_file_contents: bool = False,
|
|
936
|
-
full_path_compare: bool = True,
|
|
937
|
-
max_workers: Optional[int] = None,
|
|
938
|
-
tree_style_preset: TreeStylePreset = TreeStylePreset.UNICODE,
|
|
939
|
-
tree_style_t_connector: Optional[str] = None,
|
|
940
|
-
tree_style_l_connector: Optional[str] = None,
|
|
941
|
-
tree_style_v_connector: Optional[str] = None,
|
|
942
|
-
tree_style_h_spacer: Optional[str] = None,
|
|
943
|
-
show_token_count: bool = False,
|
|
944
|
-
show_tree_stats: bool = False,
|
|
945
|
-
encoding: str = DEFAULT_ENCODING,
|
|
946
|
-
separator_char: str = DEFAULT_SEPARATOR_CHAR,
|
|
947
|
-
separator_line_len: int = DEFAULT_SEPARATOR_LINE_LENGTH,
|
|
948
|
-
) -> None:
|
|
949
|
-
"""Main function to process a project directory in either FILTER or SEARCH mode."""
|
|
950
|
-
actual_root_dir = validate_root_directory(root_dir_param)
|
|
951
|
-
if actual_root_dir is None:
|
|
952
|
-
sys.exit(1)
|
|
953
|
-
style = tree_style_preset.to_style()
|
|
954
|
-
final_style = TreeStyle(
|
|
955
|
-
t_connector=tree_style_t_connector or style.t_connector,
|
|
956
|
-
l_connector=tree_style_l_connector or style.l_connector,
|
|
957
|
-
v_connector=tree_style_v_connector or style.v_connector,
|
|
958
|
-
h_spacer=tree_style_h_spacer or style.h_spacer,
|
|
959
|
-
)
|
|
960
|
-
print(f"--- Starting Project Processing in {mode.name} Mode ---")
|
|
961
|
-
if mode == ProjectMode.FILTER:
|
|
962
|
-
filter_and_append_content(
|
|
963
|
-
actual_root_dir,
|
|
964
|
-
output_file_name,
|
|
965
|
-
final_style,
|
|
966
|
-
generate_tree,
|
|
967
|
-
file_types,
|
|
968
|
-
whitelist_filename_substrings,
|
|
969
|
-
ignore_filename_substrings,
|
|
970
|
-
ignore_dirs_in_path,
|
|
971
|
-
language_presets,
|
|
972
|
-
ignore_presets,
|
|
973
|
-
encoding,
|
|
974
|
-
separator_char,
|
|
975
|
-
separator_line_len,
|
|
976
|
-
show_token_count,
|
|
977
|
-
show_tree_stats,
|
|
978
|
-
)
|
|
979
|
-
elif mode == ProjectMode.SEARCH:
|
|
980
|
-
if not search_keywords:
|
|
981
|
-
print("Error: Search mode requires 'search_keywords' to be provided.")
|
|
982
|
-
return
|
|
983
|
-
search_and_collate_content(
|
|
984
|
-
actual_root_dir,
|
|
985
|
-
search_keywords,
|
|
986
|
-
output_file_name,
|
|
987
|
-
final_style,
|
|
988
|
-
file_types,
|
|
989
|
-
ignore_dirs_in_path,
|
|
990
|
-
language_presets,
|
|
991
|
-
ignore_presets,
|
|
992
|
-
search_file_contents,
|
|
993
|
-
max_workers,
|
|
994
|
-
full_path_compare,
|
|
995
|
-
show_token_count,
|
|
996
|
-
show_tree_stats,
|
|
997
|
-
)
|
|
998
|
-
print("--- Script Execution Finished ---")
|
|
999
891
|
|
|
892
|
+
@contextmanager
|
|
893
|
+
def progress_manager():
|
|
894
|
+
if RICH_AVAILABLE:
|
|
895
|
+
progress = Progress(
|
|
896
|
+
TextColumn("[progress.description]{task.description}"),
|
|
897
|
+
BarColumn(),
|
|
898
|
+
TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
|
|
899
|
+
SpinnerColumn(),
|
|
900
|
+
TimeElapsedColumn(),
|
|
901
|
+
"{task.fields[status]}",
|
|
902
|
+
expand=True,
|
|
903
|
+
)
|
|
904
|
+
with Live(progress, console=console.console, refresh_per_second=10) as live:
|
|
905
|
+
yield progress
|
|
906
|
+
else:
|
|
907
|
+
with FallbackProgress() as progress:
|
|
908
|
+
yield progress
|
|
909
|
+
|
|
910
|
+
with progress_manager() as progress:
|
|
911
|
+
discover_task = progress.add_task("Discovering files", total=None, status="")
|
|
912
|
+
candidate_files = _discover_files(root_dir, criteria, progress, discover_task)
|
|
913
|
+
if RICH_AVAILABLE:
|
|
914
|
+
progress.update(
|
|
915
|
+
discover_task,
|
|
916
|
+
description=f"Discovered [bold green]{len(candidate_files)}[/bold green] candidates",
|
|
917
|
+
status="",
|
|
918
|
+
)
|
|
919
|
+
else:
|
|
920
|
+
progress.update(
|
|
921
|
+
discover_task,
|
|
922
|
+
description=f"Discovered {len(candidate_files)} candidates",
|
|
923
|
+
)
|
|
1000
924
|
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
|
|
1004
|
-
|
|
1005
|
-
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
|
|
1011
|
-
|
|
1012
|
-
|
|
1013
|
-
|
|
1014
|
-
|
|
1015
|
-
|
|
1016
|
-
|
|
1017
|
-
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
|
|
1022
|
-
|
|
1023
|
-
|
|
1024
|
-
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
ignore_dirs_in_path=ignore_dirs_in_path,
|
|
1029
|
-
language_presets=language_presets,
|
|
1030
|
-
ignore_presets=ignore_presets,
|
|
1031
|
-
whitelist_filename_substrings=whitelist_filename_substrings,
|
|
1032
|
-
ignore_filename_substrings=ignore_filename_substrings,
|
|
1033
|
-
generate_tree=generate_tree,
|
|
1034
|
-
tree_style_preset=tree_style_preset,
|
|
1035
|
-
tree_style_t_connector=tree_style_t_connector,
|
|
1036
|
-
tree_style_l_connector=tree_style_l_connector,
|
|
1037
|
-
tree_style_v_connector=tree_style_v_connector,
|
|
1038
|
-
tree_style_h_spacer=tree_style_h_spacer,
|
|
1039
|
-
show_token_count=show_token_count,
|
|
1040
|
-
show_tree_stats=show_tree_stats,
|
|
1041
|
-
encoding=encoding,
|
|
1042
|
-
separator_char=separator_char,
|
|
1043
|
-
separator_line_len=separator_line_len,
|
|
1044
|
-
)
|
|
925
|
+
matched_files = set()
|
|
926
|
+
if candidate_files:
|
|
927
|
+
if snapshot_mode:
|
|
928
|
+
matched_files = set(candidate_files)
|
|
929
|
+
if RICH_AVAILABLE:
|
|
930
|
+
progress.add_task(
|
|
931
|
+
"[dim]Keyword Processing[/dim]",
|
|
932
|
+
total=1,
|
|
933
|
+
completed=1,
|
|
934
|
+
status="[bold blue](Snapshot Mode)[/bold blue]",
|
|
935
|
+
)
|
|
936
|
+
else:
|
|
937
|
+
process_task = progress.add_task(
|
|
938
|
+
f"Processing {len(candidate_files)} files",
|
|
939
|
+
total=len(candidate_files),
|
|
940
|
+
status="",
|
|
941
|
+
)
|
|
942
|
+
matched_files = _process_files_concurrently(
|
|
943
|
+
candidate_files,
|
|
944
|
+
keywords,
|
|
945
|
+
search_file_contents,
|
|
946
|
+
full_path_compare,
|
|
947
|
+
max_workers,
|
|
948
|
+
progress,
|
|
949
|
+
process_task,
|
|
950
|
+
read_binary_files,
|
|
951
|
+
)
|
|
1045
952
|
|
|
953
|
+
output_path, total_bytes, token_count = None, 0, 0
|
|
954
|
+
if matched_files:
|
|
955
|
+
sorted_files = sorted(
|
|
956
|
+
list(matched_files), key=lambda p: p.relative_to(root_dir).as_posix()
|
|
957
|
+
)
|
|
958
|
+
tree_lines = []
|
|
959
|
+
if generate_tree:
|
|
960
|
+
tree_task = progress.add_task(
|
|
961
|
+
"Generating file tree...", total=1, status=""
|
|
962
|
+
)
|
|
963
|
+
tree_lines = _generate_tree_with_stats(
|
|
964
|
+
root_dir, sorted_files, show_tree_stats
|
|
965
|
+
)
|
|
966
|
+
progress.update(
|
|
967
|
+
tree_task, completed=1, description="Generated file tree"
|
|
968
|
+
)
|
|
1046
969
|
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
|
|
1060
|
-
|
|
1061
|
-
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
|
|
1065
|
-
|
|
1066
|
-
|
|
1067
|
-
separator_line_len: int = DEFAULT_SEPARATOR_LINE_LENGTH,
|
|
1068
|
-
) -> None:
|
|
1069
|
-
"""Utility wrapper for process_project in SEARCH mode."""
|
|
1070
|
-
if not search_keywords:
|
|
1071
|
-
print("Error: 'search_keywords' must be provided for find_in_project.")
|
|
1072
|
-
return
|
|
1073
|
-
process_project(
|
|
1074
|
-
root_dir_param=root_dir_param,
|
|
1075
|
-
output_file_name=output_file_name,
|
|
1076
|
-
mode=ProjectMode.SEARCH,
|
|
1077
|
-
file_types=file_extensions_to_check,
|
|
1078
|
-
ignore_dirs_in_path=ignore_dirs_in_path,
|
|
1079
|
-
language_presets=language_presets,
|
|
1080
|
-
ignore_presets=ignore_presets,
|
|
1081
|
-
search_keywords=search_keywords,
|
|
1082
|
-
search_file_contents=search_file_contents,
|
|
1083
|
-
full_path_compare=full_path_compare,
|
|
1084
|
-
max_workers=max_workers,
|
|
1085
|
-
tree_style_preset=tree_style_preset,
|
|
1086
|
-
tree_style_t_connector=tree_style_t_connector,
|
|
1087
|
-
tree_style_l_connector=tree_style_l_connector,
|
|
1088
|
-
tree_style_v_connector=tree_style_v_connector,
|
|
1089
|
-
tree_style_h_spacer=tree_style_h_spacer,
|
|
1090
|
-
show_token_count=show_token_count,
|
|
1091
|
-
show_tree_stats=show_tree_stats,
|
|
1092
|
-
encoding=encoding,
|
|
1093
|
-
separator_char=separator_char,
|
|
1094
|
-
separator_line_len=separator_line_len,
|
|
1095
|
-
)
|
|
970
|
+
collate_task = progress.add_task(
|
|
971
|
+
f"Collating {len(sorted_files)} files",
|
|
972
|
+
total=len(sorted_files),
|
|
973
|
+
status="",
|
|
974
|
+
)
|
|
975
|
+
files_to_process = [
|
|
976
|
+
FileToProcess(f, f.relative_to(root_dir).as_posix())
|
|
977
|
+
for f in sorted_files
|
|
978
|
+
]
|
|
979
|
+
output_path = Path(output_file_name).resolve()
|
|
980
|
+
total_bytes, token_count = _collate_content_to_file(
|
|
981
|
+
output_path,
|
|
982
|
+
tree_lines,
|
|
983
|
+
files_to_process,
|
|
984
|
+
show_tree_stats,
|
|
985
|
+
show_token_count,
|
|
986
|
+
exclude_whitespace_in_token_count,
|
|
987
|
+
progress,
|
|
988
|
+
collate_task,
|
|
989
|
+
)
|
|
1096
990
|
|
|
991
|
+
end_time = time.perf_counter()
|
|
992
|
+
summary_rows = [
|
|
993
|
+
["Candidate Files", f"{len(candidate_files)}"],
|
|
994
|
+
["Files Matched", f"[bold green]{len(matched_files)}[/bold green]"],
|
|
995
|
+
["Total Time", f"{end_time - start_time:.2f} seconds"],
|
|
996
|
+
["Output Size", f"{total_bytes / 1024:.2f} KB"],
|
|
997
|
+
]
|
|
998
|
+
if show_token_count:
|
|
999
|
+
summary_rows.append(["Approximated Tokens", f"{token_count:,}"])
|
|
1000
|
+
summary_rows.append(["Output File", str(output_path or "N/A")])
|
|
1001
|
+
console.print_table("Scan Complete", ["Metric", "Value"], summary_rows)
|
|
1097
1002
|
|
|
1098
|
-
__all__ = [
|
|
1099
|
-
"process_project",
|
|
1100
|
-
"filter_project",
|
|
1101
|
-
"find_in_project",
|
|
1102
|
-
"deconstruct_snapshot",
|
|
1103
|
-
"ProjectMode",
|
|
1104
|
-
"LanguagePreset",
|
|
1105
|
-
"IgnorePreset",
|
|
1106
|
-
"TreeStylePreset",
|
|
1107
|
-
]
|
|
1108
1003
|
|
|
1109
1004
|
if __name__ == "__main__":
|
|
1110
|
-
|
|
1111
|
-
|
|
1112
|
-
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
|
|
1005
|
+
generate_snapshot(
|
|
1006
|
+
root_directory=".",
|
|
1007
|
+
output_file_name="project_snapshot_final.txt",
|
|
1008
|
+
# No search keywords triggers Snapshot Mode
|
|
1009
|
+
language_presets=[LanguagePreset.PYTHON],
|
|
1010
|
+
ignore_presets=[
|
|
1011
|
+
IgnorePreset.PYTHON,
|
|
1012
|
+
IgnorePreset.BUILD_ARTIFACTS,
|
|
1013
|
+
IgnorePreset.VERSION_CONTROL,
|
|
1014
|
+
IgnorePreset.NODE_JS,
|
|
1015
|
+
IgnorePreset.IDE_METADATA,
|
|
1016
|
+
],
|
|
1017
|
+
ignore_extensions=[".log", ".tmp"], # Example of new functionality
|
|
1018
|
+
generate_tree=True,
|
|
1116
1019
|
show_tree_stats=True,
|
|
1117
1020
|
show_token_count=True,
|
|
1021
|
+
exclude_whitespace_in_token_count=True,
|
|
1118
1022
|
)
|