dirshot 0.1.3__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
dirshot/dirshot.py CHANGED
@@ -2,1117 +2,1021 @@ import os
2
2
  import sys
3
3
  import re
4
4
  import time
5
+ import threading
5
6
  from pathlib import Path
6
7
  from dataclasses import dataclass, field
7
- from typing import List, Optional, Set, Tuple, Callable, NamedTuple, Dict, Any
8
+ from typing import List, Optional, Set, Tuple, NamedTuple, Dict, Any
8
9
  from enum import Enum
9
10
  from concurrent.futures import ThreadPoolExecutor, as_completed
10
11
  from io import StringIO
12
+ from contextlib import contextmanager
11
13
 
12
- # --- TQDM Dependency Handler ---
14
+ # --- Dependency & Console Management ---
13
15
  try:
14
- from tqdm import tqdm
16
+ from rich.console import Console
17
+ from rich.progress import (
18
+ Progress,
19
+ SpinnerColumn,
20
+ BarColumn,
21
+ TextColumn,
22
+ TimeElapsedColumn,
23
+ )
24
+ from rich.table import Table
25
+ from rich.live import Live
26
+ from rich.panel import Panel
27
+ from rich.text import Text
28
+
29
+ RICH_AVAILABLE = True
15
30
  except ImportError:
31
+ RICH_AVAILABLE = False
32
+
33
+ class FallbackProgress:
34
+ """A simple, dependency-free progress handler for when 'rich' is not installed."""
35
+
36
+ def __init__(self):
37
+ self.tasks, self.task_count, self.active_line = {}, 0, ""
16
38
 
17
- # Define a functional fallback dummy tqdm class if the import fails.
18
- class tqdm:
19
- """A simple, text-based progress bar fallback if tqdm is not installed."""
39
+ def add_task(self, description, total=None, **kwargs):
40
+ task_id = self.task_count
41
+ self.tasks[task_id] = {"d": description, "t": total, "c": 0}
42
+ self.task_count += 1
43
+ return task_id
20
44
 
21
- def __init__(
22
- self, iterable=None, total=None, desc="", unit="it", postfix=None, **kwargs
45
+ def update(
46
+ self, task_id, advance=0, completed=None, description=None, **kwargs
23
47
  ):
24
- self.iterable = iterable
25
- self.total = (
26
- total
27
- if total is not None
28
- else (len(iterable) if hasattr(iterable, "__len__") else None)
48
+ if task_id not in self.tasks:
49
+ return
50
+ task = self.tasks[task_id]
51
+ if description:
52
+ task["d"] = description
53
+ task["c"] = completed if completed is not None else task["c"] + advance
54
+ line = f"-> {task['d']}: {task['c']}" + (
55
+ f"/{task['t']}" if task["t"] else ""
29
56
  )
30
- self.desc = desc
31
- self.unit = unit
32
- self.current = 0
33
- self.start_time = time.time()
34
- self._last_update_time = 0
35
- self._postfix = postfix or {}
36
-
37
- def __iter__(self):
38
- if self.iterable is None:
39
- raise TypeError("tqdm fallback must be initialized with an iterable.")
40
- for obj in self.iterable:
41
- yield obj
42
- self.update(1)
43
- self.close()
44
-
45
- def update(self, n=1):
46
- """Update the progress bar by n steps."""
47
- self.current += n
48
- now = time.time()
49
- if (
50
- self.total is None
51
- or now - self._last_update_time > 0.1
52
- or self.current == self.total
53
- ):
54
- self._last_update_time = now
55
- self._draw()
56
-
57
- def set_description(self, desc: str):
58
- """Set the description of the progress bar."""
59
- self.desc = desc
60
- self._draw()
61
-
62
- def set_postfix_str(self, s: str):
63
- self._postfix["info"] = s
64
- self._draw()
65
-
66
- def _draw(self):
67
- """Draw the progress bar to the console."""
68
- postfix_str = ", ".join([f"{k}={v}" for k, v in self._postfix.items()])
69
-
70
- if self.total and self.total > 0:
71
- percent = int((self.current / self.total) * 100)
72
- bar_length = 25
73
- filled_length = int(bar_length * self.current // self.total)
74
- bar = "█" * filled_length + "-" * (bar_length - filled_length)
75
- progress_line = f"\r{self.desc}: {percent}%|{bar}| {self.current}/{self.total} [{self.unit}]"
76
- else: # Case where total is not known
77
- progress_line = f"\r{self.desc}: {self.current} {self.unit}"
78
-
79
- if postfix_str:
80
- progress_line += f" [{postfix_str}]"
81
-
82
- # Pad with spaces to clear previous, longer lines
83
- terminal_width = 80
84
- sys.stdout.write(progress_line.ljust(terminal_width))
57
+ sys.stdout.write("\r" + line.ljust(len(self.active_line) + 2))
85
58
  sys.stdout.flush()
59
+ self.active_line = line
86
60
 
87
- def close(self):
88
- """Clean up the progress bar line."""
89
- # Print a newline to move off the progress bar line
61
+ def __enter__(self):
62
+ return self
63
+
64
+ def __exit__(self, exc_type, exc_val, exc_tb):
90
65
  sys.stdout.write("\n")
91
66
  sys.stdout.flush()
92
67
 
93
68
 
69
+ class ConsoleManager:
70
+ """A wrapper to gracefully handle console output with or without 'rich'."""
71
+
72
+ def __init__(self):
73
+ """Initializes the ConsoleManager, detecting if 'rich' is available."""
74
+ self.console = Console() if RICH_AVAILABLE else None
75
+
76
+ def log(self, message: str, style: str = ""):
77
+ """Logs a message to the console, applying a style if 'rich' is available."""
78
+ if self.console:
79
+ self.console.log(message, style=style)
80
+ else:
81
+ print(f"[{time.strftime('%H:%M:%S')}] {message}")
82
+
83
+ def print_table(self, title: str, columns: List[str], rows: List[List[str]]):
84
+ """Prints a formatted table to the console."""
85
+ if self.console:
86
+ table = Table(
87
+ title=title,
88
+ show_header=True,
89
+ header_style="bold magenta",
90
+ border_style="dim",
91
+ )
92
+ for col in columns:
93
+ table.add_column(col)
94
+ for row in rows:
95
+ table.add_row(*row)
96
+ self.console.print(table)
97
+ else:
98
+ print(f"\n--- {title} ---")
99
+ print(" | ".join(columns))
100
+ for row in rows:
101
+ print(" | ".join(row))
102
+ print("-" * (len(title) + 6))
103
+
104
+
94
105
  # --- Configuration Constants ---
95
- DEFAULT_SEPARATOR_CHAR = "-"
96
- DEFAULT_SEPARATOR_LINE_LENGTH = 80
97
- DEFAULT_ENCODING = "utf-8"
98
- TREE_HEADER_TEXT = "Project File Structure"
99
- FILE_HEADER_PREFIX = "FILE: "
100
- TOKEN_APPROX_MODE = "CHAR_COUNT"
101
-
102
- # List of binary file extensions to skip during content search
106
+ DEFAULT_SEPARATOR_CHAR, DEFAULT_ENCODING = "-", "utf-8"
107
+ TREE_HEADER_TEXT, FILE_HEADER_PREFIX = "Project File Structure", "FILE: "
103
108
  BINARY_FILE_EXTENSIONS = {
104
- # Images
105
109
  ".png",
106
110
  ".jpg",
107
111
  ".jpeg",
108
112
  ".gif",
109
- ".bmp",
110
- ".ico",
111
- ".tiff",
112
- ".webp",
113
- # Documents
114
113
  ".pdf",
115
- ".doc",
116
- ".docx",
117
- ".xls",
118
- ".xlsx",
119
- ".ppt",
120
- ".pptx",
121
- ".odt",
122
- ".ods",
123
- # Archives
124
114
  ".zip",
125
- ".gz",
126
- ".tar",
127
- ".rar",
128
- ".7z",
129
- ".bz2",
130
- ".xz",
131
- # Executables & Binaries
132
115
  ".exe",
133
116
  ".dll",
134
117
  ".so",
135
- ".o",
136
- ".a",
137
- ".lib",
138
- ".bin",
139
- ".dat",
140
- ".db",
141
- ".sqlite",
142
- ".img",
143
- ".iso",
144
- # Compiled Code
145
- ".class",
146
118
  ".jar",
147
- ".war",
148
119
  ".pyc",
149
- ".pyo",
150
- # Audio/Video
151
120
  ".mp3",
152
- ".wav",
153
- ".flac",
154
- ".ogg",
155
121
  ".mp4",
156
- ".mkv",
157
- ".avi",
158
- ".mov",
159
- ".wmv",
160
- # Fonts
161
- ".ttf",
162
- ".otf",
163
- ".woff",
164
- ".woff2",
165
122
  }
166
123
 
167
124
 
168
- # --- Public Enums for Import and Usage ---
169
-
170
-
171
- class ProjectMode(Enum):
172
- """The mode of operation for the script."""
173
-
174
- FILTER = "filter"
175
- SEARCH = "search"
176
-
177
-
125
+ # --- Base Lists for Presets ---
126
+ # These are defined outside the enums to allow for safe composition.
127
+ _PYTHON_BASE = [
128
+ ".py",
129
+ ".pyw",
130
+ "requirements.txt",
131
+ "Pipfile",
132
+ "pyproject.toml",
133
+ "setup.py",
134
+ ]
135
+ _JAVASCRIPT_BASE = [
136
+ ".js",
137
+ ".jsx",
138
+ ".ts",
139
+ ".tsx",
140
+ ".mjs",
141
+ ".cjs",
142
+ "package.json",
143
+ "jsconfig.json",
144
+ "tsconfig.json",
145
+ ]
146
+ _RUBY_BASE = [".rb", "Gemfile", "Rakefile", ".gemspec"]
147
+ _PHP_BASE = [".php", "composer.json", "index.php"]
148
+ _JAVA_BASE = [".java", ".jar", ".war", "pom.xml", ".properties"]
149
+ _KOTLIN_BASE = [".kt", ".kts", ".gradle", "build.gradle.kts"]
150
+ _CSHARP_BASE = [".cs", ".csproj", ".sln", "appsettings.json", "Web.config", ".csx"]
151
+ _C_CPP_BASE = [".c", ".cpp", ".h", ".hpp", "Makefile", "CMakeLists.txt", ".cxx", ".hxx"]
152
+ _RUST_BASE = [".rs", "Cargo.toml", "Cargo.lock"]
153
+ _SWIFT_BASE = [".swift", "Package.swift"]
154
+ _OBJECTIVE_C_BASE = [".m", ".mm", ".h"]
155
+ _ELIXIR_BASE = [".ex", ".exs", "mix.exs"]
156
+ _DART_BASE = [".dart", "pubspec.yaml"]
157
+ _SCALA_BASE = [".scala", ".sbt", "build.sbt"]
158
+ _R_LANG_BASE = [".r", ".R", ".Rmd"]
159
+ _LUA_BASE = [".lua"]
160
+
161
+ _IDE_VSCODE = [".vscode"]
162
+ _IDE_JETBRAINS = [".idea"]
163
+ _IDE_SUBLIME = ["*.sublime-project", "*.sublime-workspace"]
164
+ _IDE_ECLIPSE = [".project", ".settings", ".classpath"]
165
+ _IDE_NETBEANS = ["nbproject"]
166
+ _IDE_ATOM = [".atom"]
167
+ _IDE_VIM = ["*.swp", "*.swo"]
168
+ _IDE_XCODE = ["*.xcodeproj", "*.xcworkspace", "xcuserdata"]
169
+
170
+
171
+ # --- Enums and Data Structures ---
178
172
  class LanguagePreset(Enum):
179
- """Predefined sets of file extensions/names for common languages/frameworks."""
180
-
181
- PYTHON = [
182
- ".py",
183
- ".pyw",
184
- "setup.py",
185
- "requirements.txt",
186
- "Pipfile",
187
- "pyproject.toml",
173
+ """Provides an extensive list of presets for common language file extensions and key project files."""
174
+
175
+ PYTHON = _PYTHON_BASE
176
+ JAVASCRIPT = _JAVASCRIPT_BASE
177
+ JAVA = _JAVA_BASE
178
+ KOTLIN = _KOTLIN_BASE
179
+ C_CPP = _C_CPP_BASE
180
+ C_SHARP = _CSHARP_BASE
181
+ GO = [".go", "go.mod", "go.sum"]
182
+ RUST = _RUST_BASE
183
+ RUBY = _RUBY_BASE
184
+ PHP = _PHP_BASE
185
+ SWIFT = _SWIFT_BASE
186
+ OBJECTIVE_C = _OBJECTIVE_C_BASE
187
+ DART = _DART_BASE
188
+ LUA = _LUA_BASE
189
+ PERL = [".pl", ".pm", ".t"]
190
+ R_LANG = _R_LANG_BASE
191
+ SCALA = _SCALA_BASE
192
+ GROOVY = [".groovy", ".gvy", ".gy", ".gsh"]
193
+ HASKELL = [".hs", ".lhs", "cabal.project"]
194
+ JULIA = [".jl"]
195
+ ZIG = [".zig", "build.zig"]
196
+ NIM = [".nim", ".nimble"]
197
+ ELIXIR = _ELIXIR_BASE
198
+ CLOJURE = [".clj", ".cljs", ".cljc", "project.clj", "deps.edn"]
199
+ F_SHARP = [".fs", ".fsi", ".fsx"]
200
+ OCAML = [".ml", ".mli", "dune-project"]
201
+ ELM = [".elm", "elm.json"]
202
+ PURE_SCRIPT = [".purs", "spago.dhall"]
203
+ COMMON_LISP = [".lisp", ".cl", ".asd"]
204
+ SCHEME = [".scm", ".ss"]
205
+ RACKET = [".rkt"]
206
+ WEB_FRONTEND = [".html", ".htm", ".css", ".scss", ".sass", ".less", ".styl"]
207
+ REACT = _JAVASCRIPT_BASE
208
+ NODE_JS = _JAVASCRIPT_BASE
209
+ EXPRESS_JS = _JAVASCRIPT_BASE
210
+ NEST_JS = _JAVASCRIPT_BASE + ["nest-cli.json"]
211
+ VUE = _JAVASCRIPT_BASE + [".vue", "vue.config.js"]
212
+ ANGULAR = _JAVASCRIPT_BASE + ["angular.json"]
213
+ SVELTE = _JAVASCRIPT_BASE + [".svelte", "svelte.config.js"]
214
+ EMBER = _JAVASCRIPT_BASE + ["ember-cli-build.js"]
215
+ PUG = [".pug", ".jade"]
216
+ HANDLEBARS = [".hbs", ".handlebars"]
217
+ EJS = [".ejs"]
218
+ DJANGO = _PYTHON_BASE + ["manage.py", "wsgi.py", "asgi.py", ".jinja", ".jinja2"]
219
+ FLASK = _PYTHON_BASE + ["app.py", "wsgi.py"]
220
+ RAILS = _RUBY_BASE + ["routes.rb", ".erb", ".haml", ".slim", "config.ru"]
221
+ LARAVEL = _PHP_BASE + [".blade.php", "artisan"]
222
+ SYMFONY = _PHP_BASE + ["symfony.lock"]
223
+ PHOENIX = _ELIXIR_BASE
224
+ SPRING = _JAVA_BASE + ["application.properties", "application.yml"]
225
+ ASP_NET = _CSHARP_BASE + ["*.cshtml", "*.vbhtml", "*.razor"]
226
+ ROCKET_RS = _RUST_BASE + ["Rocket.toml"]
227
+ ACTIX_WEB = _RUST_BASE
228
+ IOS_NATIVE = (
229
+ _SWIFT_BASE
230
+ + _OBJECTIVE_C_BASE
231
+ + [".storyboard", ".xib", "Info.plist", ".pbxproj"]
232
+ )
233
+ ANDROID_NATIVE = _JAVA_BASE + _KOTLIN_BASE + ["AndroidManifest.xml", ".xml"]
234
+ FLUTTER = _DART_BASE
235
+ REACT_NATIVE = _JAVASCRIPT_BASE + ["app.json"]
236
+ XAMARIN = _CSHARP_BASE + [".xaml"]
237
+ DOTNET_MAUI = XAMARIN
238
+ NATIVESCRIPT = _JAVASCRIPT_BASE + ["nativescript.config.ts"]
239
+ UNITY = _CSHARP_BASE + [".unity", ".prefab", ".asset", ".mat", ".unitypackage"]
240
+ UNREAL_ENGINE = _C_CPP_BASE + [".uproject", ".uasset", ".ini"]
241
+ GODOT = [".gd", ".tscn", ".tres", "project.godot"]
242
+ LOVE2D = _LUA_BASE + ["conf.lua", "main.lua"]
243
+ MONOGAME = _CSHARP_BASE + [".mgcb"]
244
+ DOCKER = ["Dockerfile", ".dockerignore", "docker-compose.yml"]
245
+ TERRAFORM = [".tf", ".tfvars", ".tf.json"]
246
+ ANSIBLE = ["ansible.cfg", "inventory.ini"]
247
+ PULUMI = ["Pulumi.yaml"]
248
+ CHEF = _RUBY_BASE
249
+ PUPPET = [".pp"]
250
+ VAGRANT = ["Vagrantfile"]
251
+ GITHUB_ACTIONS = [".yml", ".yaml"]
252
+ GITLAB_CI = [".gitlab-ci.yml"]
253
+ JENKINS = ["Jenkinsfile"]
254
+ CIRCLE_CI = ["config.yml"]
255
+ KUBERNETES = [".yml", ".yaml"]
256
+ BICEP = [".bicep"]
257
+ CLOUDFORMATION = [".json", ".yml"]
258
+ DATA_SCIENCE_NOTEBOOKS = [".ipynb", ".Rmd"]
259
+ SQL = [".sql", ".ddl", ".dml"]
260
+ APACHE_SPARK = list(set(_SCALA_BASE + _PYTHON_BASE + _JAVA_BASE + _R_LANG_BASE))
261
+ ML_CONFIG = ["params.yaml"]
262
+ ELECTRON = _JAVASCRIPT_BASE
263
+ TAURI = _RUST_BASE + ["tauri.conf.json"]
264
+ QT = _C_CPP_BASE + [".pro", ".ui", ".qml"]
265
+ GTK = _C_CPP_BASE + [".ui", "meson.build"]
266
+ WPF = _CSHARP_BASE + [".xaml"]
267
+ WINDOWS_FORMS = _CSHARP_BASE
268
+ BASH = [".sh", ".bash"]
269
+ POWERSHELL = [".ps1", ".psm1"]
270
+ BATCH = [".bat", ".cmd"]
271
+ SOLIDITY = [".sol"]
272
+ VYPER = [".vy"]
273
+ VERILOG = [".v", ".vh"]
274
+ VHDL = [".vhd", ".vhdl"]
275
+ MARKUP = [".md", ".markdown", ".rst", ".adoc", ".asciidoc", ".tex", ".bib"]
276
+ CONFIGURATION = [
277
+ ".json",
278
+ ".xml",
279
+ ".yml",
280
+ ".yaml",
281
+ ".ini",
282
+ ".toml",
283
+ ".env",
284
+ ".conf",
285
+ ".cfg",
188
286
  ]
189
- JAVASCRIPT = [".js", ".jsx", ".ts", ".tsx", ".mjs", ".cjs"]
190
- WEB = [".html", ".css", ".scss", ".less"]
191
- JAVA = [".java", ".groovy", ".kt", ".gradle", ".properties"]
287
+ EDITOR_CONFIG = [".editorconfig"]
288
+ LICENSE = ["LICENSE", "LICENSE.md", "COPYING"]
289
+ CHANGELOG = ["CHANGELOG", "CHANGELOG.md"]
192
290
 
193
291
 
194
292
  class IgnorePreset(Enum):
195
- """Predefined sets of path components and filename substrings to ignore."""
196
-
197
- VERSION_CONTROL = [".git", ".svn", ".hg", ".idea"]
198
- NODE_MODULES = ["node_modules", "package-lock.json", "yarn.lock"]
199
- PYTHON_ENV = ["__pycache__", "venv", ".venv", "env", "lib", "bin"]
200
- BUILD_ARTIFACTS = ["dist", "build", "target", "out", "temp", "tmp"]
201
- TEST_FILES = ["test", "spec", "fixture", "example", "mock"]
202
-
203
-
204
- class TreeStylePreset(Enum):
205
- """Predefined character sets for directory tree rendering."""
206
-
207
- UNICODE = ("├── ", "└── ", "│ ", " ")
208
- ASCII = ("|-- ", "+-- ", "| ", " ")
209
- COMPACT = ("|---", "`---", "| ", " ")
210
-
211
- def to_style(self) -> "TreeStyle":
212
- return TreeStyle(self.value[0], self.value[1], self.value[2], self.value[3])
213
-
214
-
215
- class TreeStyle(NamedTuple):
216
- """Holds the characters used to render the directory tree."""
293
+ """Provides an extensive list of presets for common directories, files, and patterns to ignore."""
294
+
295
+ VERSION_CONTROL = [".git", ".svn", ".hg", ".bzr", ".gitignore", ".gitattributes"]
296
+ OS_FILES = [".DS_Store", "Thumbs.db", "desktop.ini", "ehthumbs.db"]
297
+ BUILD_ARTIFACTS = [
298
+ "dist",
299
+ "build",
300
+ "target",
301
+ "out",
302
+ "bin",
303
+ "obj",
304
+ "release",
305
+ "debug",
306
+ ]
307
+ LOGS = ["*.log", "logs", "npm-debug.log*", "yarn-debug.log*", "yarn-error.log*"]
308
+ TEMP_FILES = ["temp", "tmp", "*.tmp", "*~", "*.bak", "*.swp", "*.swo"]
309
+ SECRET_FILES = [
310
+ ".env",
311
+ "*.pem",
312
+ "*.key",
313
+ "credentials.json",
314
+ "*.p12",
315
+ "*.pfx",
316
+ "secrets.yml",
317
+ ".env.local",
318
+ ]
319
+ COMPRESSED_ARCHIVES = ["*.zip", "*.tar", "*.gz", "*.rar", "*.7z", "*.tgz"]
320
+ IDE_METADATA_VSCODE = _IDE_VSCODE
321
+ IDE_METADATA_JETBRAINS = _IDE_JETBRAINS
322
+ IDE_METADATA_SUBLIME = _IDE_SUBLIME
323
+ IDE_METADATA_ECLIPSE = _IDE_ECLIPSE
324
+ IDE_METADATA_NETBEANS = _IDE_NETBEANS
325
+ IDE_METADATA_ATOM = _IDE_ATOM
326
+ IDE_METADATA_VIM = _IDE_VIM
327
+ IDE_METADATA_XCODE = _IDE_XCODE
328
+ IDE_METADATA = list(
329
+ set(
330
+ _IDE_VSCODE
331
+ + _IDE_JETBRAINS
332
+ + _IDE_SUBLIME
333
+ + _IDE_ECLIPSE
334
+ + _IDE_NETBEANS
335
+ + _IDE_ATOM
336
+ + _IDE_VIM
337
+ + _IDE_XCODE
338
+ )
339
+ )
340
+ NODE_JS = [
341
+ "node_modules",
342
+ "package-lock.json",
343
+ "yarn.lock",
344
+ "pnpm-lock.yaml",
345
+ ".npm",
346
+ ]
347
+ PYTHON = [
348
+ "__pycache__",
349
+ "venv",
350
+ ".venv",
351
+ "env",
352
+ "lib",
353
+ "lib64",
354
+ ".pytest_cache",
355
+ ".tox",
356
+ "*.pyc",
357
+ ".mypy_cache",
358
+ "htmlcov",
359
+ ".coverage",
360
+ ]
361
+ RUBY = ["vendor/bundle", ".bundle", "Gemfile.lock", ".gem", "coverage"]
362
+ PHP = ["vendor", "composer.lock"]
363
+ DOTNET = ["bin", "obj", "*.user", "*.suo"]
364
+ RUST = ["target", "Cargo.lock"]
365
+ GO = ["vendor", "go.sum"]
366
+ JAVA_MAVEN = ["target"]
367
+ JAVA_GRADLE = [".gradle", "build"]
368
+ ELIXIR = ["_build", "deps", "mix.lock"]
369
+ DART_FLUTTER = [".dart_tool", ".packages", "build", ".flutter-plugins"]
370
+ ELM = ["elm-stuff"]
371
+ HASKELL = ["dist-newstyle", ".stack-work"]
372
+ TESTING_REPORTS = ["coverage", "junit.xml", "lcov.info", ".nyc_output"]
373
+ STATIC_SITE_GENERATORS = ["_site", "public", "resources"]
374
+ CMS_UPLOADS = ["wp-content/uploads"]
375
+ TERRAFORM = [".terraform", "*.tfstate", "*.tfstate.backup", ".terraform.lock.hcl"]
376
+ JUPYTER_NOTEBOOKS = [".ipynb_checkpoints"]
377
+ ANDROID = [".gradle", "build", "local.properties", "*.apk", "*.aab", "captures"]
378
+ IOS = ["Pods", "Carthage", "DerivedData", "build"]
379
+ UNITY = [
380
+ "Library",
381
+ "Temp",
382
+ "Logs",
383
+ "UserSettings",
384
+ "MemoryCaptures",
385
+ "Assets/AssetStoreTools",
386
+ ]
387
+ UNREAL_ENGINE = ["Intermediate", "Saved", "DerivedDataCache", ".vs"]
388
+ GODOT_ENGINE = [".import", "export_presets.cfg"]
389
+ SERVERLESS_FRAMEWORK = [".serverless"]
390
+ AWS = [".aws-sam"]
391
+ VERCEL = [".vercel"]
392
+ NETLIFY = [".netlify"]
393
+ MACOS = [
394
+ ".DS_Store",
395
+ ".AppleDouble",
396
+ ".LSOverride",
397
+ "._*",
398
+ ".Spotlight-V100",
399
+ ".Trashes",
400
+ ]
401
+ WINDOWS = ["Thumbs.db", "ehthumbs.db", "$RECYCLE.BIN/", "Desktop.ini"]
402
+ DEPRECATED_DEPENDENCIES = ["bower_components"]
217
403
 
218
- t_connector: str
219
- l_connector: str
220
- v_connector: str
221
- h_spacer: str
222
404
 
405
+ class FileToProcess(NamedTuple):
406
+ """Represents a file that needs to be processed and included in the output."""
223
407
 
224
- # --- Helper Data Structures ---
408
+ absolute_path: Path
409
+ relative_path_posix: str
225
410
 
226
411
 
227
412
  @dataclass
228
413
  class FilterCriteria:
229
- """Holds normalized filter criteria for files and directories."""
414
+ """Holds the combined filter criteria for scanning files and directories."""
230
415
 
231
416
  file_extensions: Set[str] = field(default_factory=set)
232
- exact_filenames: Set[str] = field(default_factory=set)
233
- whitelist_fname_substrings: Set[str] = field(default_factory=set)
234
- ignore_fname_substrings: Set[str] = field(default_factory=set)
235
- ignore_path_components: Set[str] = field(default_factory=set)
417
+ ignore_if_in_path: Set[str] = field(default_factory=set)
418
+ ignore_extensions: Set[str] = field(default_factory=set)
236
419
 
237
420
  @classmethod
238
421
  def normalize_inputs(
239
422
  cls,
240
- file_types: Optional[List[str]],
241
- whitelist_substrings: Optional[List[str]],
242
- ignore_filename_substrings: Optional[List[str]],
243
- ignore_path_components_list: Optional[List[str]],
244
- language_presets: Optional[List[LanguagePreset]] = None,
423
+ file_types: Optional[List[str]] = None,
424
+ ignore_if_in_path: Optional[List[str]] = None,
425
+ ignore_extensions: Optional[List[str]] = None,
426
+ lang_presets: Optional[List[LanguagePreset]] = None,
245
427
  ignore_presets: Optional[List[IgnorePreset]] = None,
246
428
  ) -> "FilterCriteria":
247
- all_file_types, all_ignore_paths, all_ignore_fnames = (
248
- set(file_types or []),
249
- set(ignore_path_components_list or []),
250
- set(ignore_filename_substrings or []),
251
- )
252
- if language_presets:
253
- for preset in language_presets:
254
- all_file_types.update(preset.value)
255
- if ignore_presets:
256
- for preset in ignore_presets:
257
- all_ignore_paths.update(preset.value)
258
- all_ignore_fnames.update(preset.value)
259
- norm_exts, norm_exact_fnames = set(), set()
260
- for ft in all_file_types:
261
- ft_lower = ft.lower().strip()
262
- if ft_lower.startswith("."):
263
- norm_exts.add(ft_lower)
264
- elif ft_lower:
265
- norm_exact_fnames.add(ft_lower)
429
+ """
430
+ Consolidates various filter inputs into a single FilterCriteria object.
431
+
432
+ Args:
433
+ file_types (list, optional): A list of file extensions to include.
434
+ ignore_if_in_path (list, optional): A list of directory/file names to ignore.
435
+ ignore_extensions (list, optional): A list of file extensions to ignore.
436
+ lang_presets (list, optional): A list of LanguagePreset enums.
437
+ ignore_presets (list, optional): A list of IgnorePreset enums.
438
+
439
+ Returns:
440
+ FilterCriteria: An object containing the combined sets of filters.
441
+ """
442
+ all_exts = {ft.lower().strip() for ft in file_types or []}
443
+ all_ignore_paths = {ip.lower().strip() for ip in ignore_if_in_path or []}
444
+ all_ignore_exts = {ie.lower().strip() for ie in ignore_extensions or []}
445
+
446
+ for p in lang_presets or []:
447
+ all_exts.update(p.value)
448
+ for p in ignore_presets or []:
449
+ all_ignore_paths.update(p.value)
450
+
266
451
  return cls(
267
- file_extensions=norm_exts,
268
- exact_filenames=norm_exact_fnames,
269
- whitelist_fname_substrings=(
270
- set(s.lower() for s in whitelist_substrings if s.strip())
271
- if whitelist_substrings
272
- else set()
273
- ),
274
- ignore_fname_substrings=set(
275
- s.lower() for s in all_ignore_fnames if s.strip()
276
- ),
277
- ignore_path_components=set(
278
- d.lower() for d in all_ignore_paths if d.strip()
279
- ),
452
+ file_extensions=all_exts,
453
+ ignore_if_in_path=all_ignore_paths,
454
+ ignore_extensions=all_ignore_exts,
280
455
  )
281
456
 
282
457
 
283
- class FileToProcess(NamedTuple):
284
- """Represents a file selected for content processing."""
285
-
286
- absolute_path: Path
287
- relative_path_posix: str
288
-
289
-
290
- # --- Helper Functions ---
458
+ # --- Core Logic Functions ---
459
+ def _discover_files(
460
+ root_dir: Path, criteria: FilterCriteria, progress: Any, task_id: Any
461
+ ) -> List[Path]:
462
+ """
463
+ Recursively scans a directory to find all files matching the criteria.
291
464
 
465
+ Args:
466
+ root_dir (Path): The directory to start the scan from.
467
+ criteria (FilterCriteria): The filtering criteria to apply.
468
+ progress (Any): The progress bar object (from rich or fallback).
469
+ task_id (Any): The ID of the progress bar task to update.
292
470
 
293
- def validate_root_directory(root_dir_param: Optional[str]) -> Optional[Path]:
294
- original_param_for_messaging = (
295
- root_dir_param if root_dir_param else "current working directory"
296
- )
297
- try:
298
- resolved_path = Path(root_dir_param or Path.cwd()).resolve(strict=True)
299
- except Exception as e:
300
- print(
301
- f"Error: Could not resolve root directory '{original_param_for_messaging}': {e}"
302
- )
303
- return None
304
- if not resolved_path.is_dir():
305
- print(f"Error: Root path '{resolved_path}' is not a directory.")
306
- return None
307
- return resolved_path
471
+ Returns:
472
+ List[Path]: A list of absolute paths to the candidate files.
473
+ """
474
+ candidate_files, dirs_scanned = [], 0
308
475
 
476
+ def recursive_scan(current_path: Path):
477
+ nonlocal dirs_scanned
478
+ try:
479
+ for entry in os.scandir(current_path):
480
+ entry_path, entry_lower = Path(entry.path), entry.name.lower()
481
+ if entry_lower in criteria.ignore_if_in_path:
482
+ continue
483
+ if entry.is_dir():
484
+ recursive_scan(entry_path)
485
+ dirs_scanned += 1
486
+ if progress:
487
+ progress.update(
488
+ task_id,
489
+ completed=dirs_scanned,
490
+ description=f"Discovering files in [cyan]{entry.name}[/cyan]",
491
+ )
492
+ elif entry.is_file():
493
+ file_ext = entry_path.suffix.lower()
494
+ if (
495
+ criteria.ignore_extensions
496
+ and file_ext in criteria.ignore_extensions
497
+ ):
498
+ continue
499
+ if (
500
+ not criteria.file_extensions
501
+ or file_ext in criteria.file_extensions
502
+ ):
503
+ candidate_files.append(entry_path)
504
+ except (PermissionError, FileNotFoundError):
505
+ pass
309
506
 
310
- def _should_include_entry(
311
- entry_path: Path,
312
- root_dir: Path,
313
- criteria: FilterCriteria,
314
- is_dir: bool,
315
- log_func: Optional[Callable[[str], None]] = None,
316
- ) -> bool:
317
- try:
318
- relative_path = entry_path.relative_to(root_dir)
319
- except ValueError:
320
- return False
321
- entry_name_lower = entry_path.name.lower()
322
- if criteria.ignore_path_components and any(
323
- part.lower() in criteria.ignore_path_components for part in relative_path.parts
324
- ):
325
- return False
326
- if is_dir:
327
- return True
328
- file_ext_lower = entry_path.suffix.lower()
329
- matched_type = (file_ext_lower in criteria.file_extensions) or (
330
- entry_name_lower in criteria.exact_filenames
331
- )
332
- if not criteria.file_extensions and not criteria.exact_filenames:
333
- matched_type = True
334
- if not matched_type:
335
- return False
336
- if criteria.whitelist_fname_substrings and not any(
337
- sub in entry_name_lower for sub in criteria.whitelist_fname_substrings
338
- ):
339
- return False
340
- if criteria.ignore_fname_substrings and any(
341
- sub in entry_name_lower for sub in criteria.ignore_fname_substrings
342
- ):
343
- return False
344
- return True
507
+ recursive_scan(root_dir)
508
+ return candidate_files
345
509
 
346
510
 
347
511
  def process_file_for_search(
348
512
  file_path: Path,
349
- normalized_keywords: List[str],
350
- search_file_contents: bool,
351
- full_path_compare: bool,
513
+ keywords: List[str],
514
+ search_content: bool,
515
+ full_path: bool,
516
+ activity: Dict,
517
+ read_binary_files: bool,
352
518
  ) -> Optional[Path]:
353
519
  """
354
- Checks a single file for keyword matches. Skips content search for binary files.
355
- """
356
- compare_target = str(file_path) if full_path_compare else file_path.name
357
- if any(key in compare_target.lower() for key in normalized_keywords):
358
- return file_path
520
+ Processes a single file to see if it matches the search criteria.
359
521
 
360
- if search_file_contents:
361
- # Before reading content, check if it's a known binary file type
362
- if file_path.suffix.lower() in BINARY_FILE_EXTENSIONS:
363
- return None # Do not attempt to read binary file content
522
+ A match can occur if a keyword is found in the filename or, if enabled,
523
+ within the file's content.
364
524
 
365
- try:
366
- with open(str(file_path), "r", encoding="utf-8", errors="ignore") as f:
367
- for line in f:
368
- if any(key in line.lower() for key in normalized_keywords):
369
- return file_path
370
- except (IOError, OSError):
371
- pass # Ignore files that can't be opened
372
- return None
373
-
374
-
375
- def _calculate_total_stats(
376
- root_dir: Path, criteria: FilterCriteria
377
- ) -> Dict[Path, Tuple[int, int]]:
378
- stats: Dict[Path, Tuple[int, int]] = {}
379
- for dirpath_str, dirnames, filenames in os.walk(str(root_dir), topdown=True):
380
- current_dir = Path(dirpath_str)
381
- all_children = [current_dir / d for d in dirnames] + [
382
- current_dir / f for f in filenames
383
- ]
384
- total_files, total_dirs = 0, 0
385
- for child_path in all_children:
525
+ Args:
526
+ file_path (Path): The absolute path to the file to process.
527
+ keywords (List[str]): A list of keywords to search for.
528
+ search_content (bool): If True, search the content of the file.
529
+ full_path (bool): If True, compare keywords against the full file path.
530
+ activity (Dict): A dictionary to track thread activity.
531
+ read_binary_files (bool): If True, attempt to read and search binary files.
532
+
533
+ Returns:
534
+ Optional[Path]: The path to the file if it's a match, otherwise None.
535
+ """
536
+ thread_id = threading.get_ident()
537
+ activity[thread_id] = file_path.name
538
+ try:
539
+ compare_target = str(file_path) if full_path else file_path.name
540
+ if any(key in compare_target.lower() for key in keywords):
541
+ return file_path
542
+
543
+ if search_content and (
544
+ read_binary_files or file_path.suffix.lower() not in BINARY_FILE_EXTENSIONS
545
+ ):
386
546
  try:
387
- is_dir = child_path.is_dir()
547
+ with file_path.open("r", encoding="utf-8", errors="ignore") as f:
548
+ for line in f:
549
+ if any(key in line.lower() for key in keywords):
550
+ return file_path
388
551
  except OSError:
389
- continue
390
- if criteria.ignore_path_components:
391
- try:
392
- relative_path = child_path.relative_to(root_dir)
393
- except ValueError:
394
- continue
395
- if any(
396
- part.lower() in criteria.ignore_path_components
397
- for part in relative_path.parts
398
- ):
399
- continue
400
- if is_dir:
401
- total_dirs += 1
402
- else:
403
- total_files += 1
404
- stats[current_dir] = (total_files, total_dirs)
405
- dirnames[:] = [
406
- d
407
- for d in dirnames
408
- if (current_dir / d).name.lower() not in criteria.ignore_path_components
409
- ]
410
- return stats
552
+ pass
553
+ return None
554
+ finally:
555
+ activity[thread_id] = ""
411
556
 
412
557
 
413
- # --- Tree Generation Functions ---
558
+ def _process_files_concurrently(
559
+ files: List[Path],
560
+ keywords: List[str],
561
+ search_content: bool,
562
+ full_path: bool,
563
+ max_workers: Optional[int],
564
+ progress: Any,
565
+ task_id: Any,
566
+ read_binary_files: bool,
567
+ ) -> Set[Path]:
568
+ """
569
+ Uses a thread pool to process a list of files for search matches concurrently.
570
+
571
+ Args:
572
+ files (List[Path]): The list of candidate files to search through.
573
+ keywords (List[str]): The keywords to search for.
574
+ search_content (bool): Whether to search inside file contents.
575
+ full_path (bool): Whether to compare keywords against the full path.
576
+ max_workers (Optional[int]): The maximum number of threads to use.
577
+ progress (Any): The progress bar object.
578
+ task_id (Any): The ID of the processing task on the progress bar.
579
+ read_binary_files (bool): If True, search the content of binary files.
580
+
581
+ Returns:
582
+ Set[Path]: A set of absolute paths for all files that matched.
583
+ """
584
+ matched_files, thread_activity = set(), {}
585
+ with ThreadPoolExecutor(
586
+ max_workers=max_workers or (os.cpu_count() or 1) + 4,
587
+ thread_name_prefix="scanner",
588
+ ) as executor:
589
+ future_to_file = {
590
+ executor.submit(
591
+ process_file_for_search,
592
+ f,
593
+ keywords,
594
+ search_content,
595
+ full_path,
596
+ thread_activity,
597
+ read_binary_files,
598
+ ): f
599
+ for f in files
600
+ }
601
+ for future in as_completed(future_to_file):
602
+ if progress:
603
+ active_threads = {
604
+ f"T{str(tid)[-3:]}": name
605
+ for tid, name in thread_activity.items()
606
+ if name
607
+ }
608
+ progress.update(
609
+ task_id,
610
+ advance=1,
611
+ description=f"Processing [yellow]{len(active_threads)} threads[/yellow]",
612
+ )
613
+ if RICH_AVAILABLE:
614
+ status_panel = Panel(
615
+ Text(
616
+ "\n".join(
617
+ f"[bold cyan]{k}[/]: {v}"
618
+ for k, v in active_threads.items()
619
+ )
620
+ ),
621
+ border_style="dim",
622
+ title="[dim]Thread Activity",
623
+ )
624
+ progress.update(task_id, status=status_panel)
625
+ if result := future.result():
626
+ matched_files.add(result)
627
+ if progress and RICH_AVAILABLE:
628
+ progress.update(task_id, status="[bold green]Done![/bold green]")
629
+ return matched_files
414
630
 
415
631
 
416
- def _generate_tree_lines(
417
- root_dir: Path, criteria: FilterCriteria, style: TreeStyle, show_stats: bool
632
+ def _generate_tree_with_stats(
633
+ root_dir: Path, file_paths: List[Path], show_stats: bool
418
634
  ) -> List[str]:
419
- """Generates a list of strings representing the directory tree based on criteria, style, and stats."""
420
- dir_stats: Optional[Dict[Path, Tuple[int, int]]] = (
421
- _calculate_total_stats(root_dir, criteria) if show_stats else None
422
- )
423
- tree_lines: List[str] = []
424
-
425
- def format_dir_name(
426
- path: Path, path_name: str, included_files: int, included_dirs: int
427
- ) -> str:
428
- if not show_stats or not dir_stats:
429
- return path_name
430
- total_files, total_dirs = dir_stats.get(path, (0, 0))
635
+ """
636
+ Generates a directory tree structure from a list of file paths.
431
637
 
432
- stats_str = f" [I: {included_files}f, {included_dirs}d | T: {total_files}f, {total_dirs}d]"
433
- return path_name + stats_str
638
+ Args:
639
+ root_dir (Path): The root directory of the project, used as the tree's base.
640
+ file_paths (List[Path]): A list of file paths to include in the tree.
641
+ show_stats (bool): If True, include file and directory counts in the tree.
434
642
 
435
- def _recursive_build(current_path: Path, prefix_parts: List[str]):
436
- try:
437
- entries = sorted(current_path.iterdir(), key=lambda p: p.name.lower())
438
- except OSError as e:
439
- error_prefix = "".join(prefix_parts) + style.l_connector
440
- tree_lines.append(
441
- error_prefix + f"[Error accessing: {current_path.name} - {e.strerror}]"
442
- )
443
- return
444
- displayable_children: List[Tuple[Path, bool]] = []
445
- for e in entries:
446
- try:
447
- is_dir = e.is_dir()
448
- except OSError:
449
- continue
450
- if _should_include_entry(
451
- e, root_dir, criteria, is_dir=is_dir, log_func=None
452
- ):
453
- displayable_children.append((e, is_dir))
454
- num_children = len(displayable_children)
455
- included_files_in_level = sum(
456
- 1 for _, is_dir in displayable_children if not is_dir
457
- )
458
- included_dirs_in_level = sum(1 for _, is_dir in displayable_children if is_dir)
459
- if not prefix_parts:
460
- tree_lines.append(
461
- format_dir_name(
462
- current_path,
463
- current_path.name,
464
- included_files_in_level,
465
- included_dirs_in_level,
466
- )
467
- )
468
- for i, (child_path, child_is_dir) in enumerate(displayable_children):
469
- is_last = i == num_children - 1
470
- connector = style.l_connector if is_last else style.t_connector
471
- entry_name = child_path.name
472
- if child_is_dir:
473
- try:
474
- child_entries = sorted(
475
- child_path.iterdir(), key=lambda p: p.name.lower()
476
- )
477
- child_displayable_children = [
478
- (e, e.is_dir())
479
- for e in child_entries
480
- if _should_include_entry(
481
- e, root_dir, criteria, is_dir=e.is_dir(), log_func=None
482
- )
483
- ]
484
- child_included_files = sum(
485
- 1 for _, is_dir in child_displayable_children if not is_dir
486
- )
487
- child_included_dirs = sum(
488
- 1 for _, is_dir in child_displayable_children if is_dir
489
- )
490
- entry_name = format_dir_name(
491
- child_path,
492
- child_path.name,
493
- child_included_files,
494
- child_included_dirs,
495
- )
496
- except OSError:
497
- pass
498
- tree_lines.append("".join(prefix_parts) + connector + entry_name)
499
- if child_is_dir:
500
- new_prefix_parts = prefix_parts + [
501
- style.h_spacer if is_last else style.v_connector
502
- ]
503
- _recursive_build(child_path, new_prefix_parts)
643
+ Returns:
644
+ List[str]: A list of strings, where each string is a line in the tree.
645
+ """
646
+ tree_dict: Dict[str, Any] = {}
647
+ for path in file_paths:
648
+ level = tree_dict
649
+ for part in path.relative_to(root_dir).parts:
650
+ level = level.setdefault(part, {})
504
651
 
505
- _recursive_build(root_dir, [])
506
- return tree_lines
652
+ def count_children(d: Dict) -> Tuple[int, int]:
653
+ files = sum(1 for v in d.values() if not v)
654
+ dirs = len(d) - files
655
+ return files, dirs
507
656
 
657
+ lines = []
658
+ style = ("├── ", "└── ", "│ ", " ")
508
659
 
509
- def _generate_tree_from_paths(
510
- root_dir: Path, file_paths: List[Path], style: TreeStyle, show_stats: bool
511
- ) -> List[str]:
512
- """Generates a directory tree structure from a list of *matched* file paths using the given style."""
513
- tree_dict: Dict[str, Any] = {}
514
- matched_paths = {p.relative_to(root_dir) for p in file_paths}
515
- for rel_path in matched_paths:
516
- parts = rel_path.parts
517
- current_level = tree_dict
518
- for part in parts:
519
- current_level = current_level.setdefault(part, {})
520
- tree_lines: List[str] = []
521
-
522
- def format_dir_name_search(name: str, matched_files: int, matched_dirs: int) -> str:
523
- if not show_stats:
524
- return name
525
-
526
- stats_str = f" [M: {matched_files}f, {matched_dirs}d]"
527
- return name + stats_str
528
-
529
- def build_lines(d: Dict[str, Any], prefix: str):
530
- items = sorted(d.keys(), key=lambda k: (len(d[k]) == 0, k.lower()))
531
- num_children = len(items)
532
- matched_files_in_level = sum(1 for k in items if not d[k])
533
- matched_dirs_in_level = sum(1 for k in items if d[k])
534
- if not prefix:
535
- tree_lines.append(
536
- format_dir_name_search(
537
- root_dir.name, matched_files_in_level, matched_dirs_in_level
538
- )
539
- )
660
+ def build_lines_recursive(d: Dict, prefix: str = ""):
661
+ items = sorted(d.keys(), key=lambda k: (not d[k], k.lower()))
540
662
  for i, name in enumerate(items):
541
- is_last = i == num_children - 1
542
- connector = style.l_connector if is_last else style.t_connector
543
- entry_name = name
544
- if d[name]:
545
- child_matched_files = sum(1 for k in d[name] if not d[name][k])
546
- child_matched_dirs = sum(1 for k in d[name] if d[name][k])
547
- entry_name = format_dir_name_search(
548
- name, child_matched_files, child_matched_dirs
549
- )
550
- tree_lines.append(prefix + connector + entry_name)
663
+ is_last = i == len(items) - 1
664
+ connector = style[1] if is_last else style[0]
665
+ display_name = name
666
+
551
667
  if d[name]:
552
- extension = style.h_spacer if is_last else style.v_connector
553
- build_lines(d[name], prefix + extension)
668
+ if show_stats:
669
+ files, dirs = count_children(d[name])
670
+ display_name += f" [dim][M: {files}f, {dirs}d][/dim]"
671
+
672
+ lines.append(f"{prefix}{connector}{display_name}")
554
673
 
555
- build_lines(tree_dict, "")
556
- return tree_lines
674
+ if d[name]:
675
+ extension = style[3] if is_last else style[2]
676
+ build_lines_recursive(d[name], prefix + extension)
557
677
 
678
+ root_name = f"[bold cyan]{root_dir.name}[/bold cyan]"
679
+ if show_stats:
680
+ files, dirs = count_children(tree_dict)
681
+ root_name += f" [dim][M: {files}f, {dirs}d][/dim]"
682
+ lines.append(root_name)
558
683
 
559
- # --- Collation and Main Modes ---
684
+ build_lines_recursive(tree_dict)
685
+ return lines
560
686
 
561
687
 
562
688
  def _collate_content_to_file(
563
- output_file_path_str: str,
564
- tree_content_lines: Optional[List[str]],
565
- files_to_process: List[FileToProcess],
566
- encoding: str,
567
- separator_char: str,
568
- separator_line_len: int,
569
- show_token_count: bool,
689
+ output_path: Path,
690
+ tree_lines: List,
691
+ files: List[FileToProcess],
570
692
  show_tree_stats: bool,
571
- mode: ProjectMode,
572
- ) -> None:
693
+ show_token_count: bool,
694
+ exclude_whitespace: bool,
695
+ progress: Any,
696
+ task_id: Any,
697
+ ) -> Tuple[float, int]:
573
698
  """
574
- Collates content to a string buffer, calculates token count,
575
- and then writes to the output file with a progress bar.
699
+ Collates the file tree and file contents into a single output file.
700
+
701
+ Args:
702
+ output_path (Path): The path to the final output file.
703
+ tree_lines (List): The generated file tree lines.
704
+ files (List[FileToProcess]): The files whose content needs to be collated.
705
+ show_tree_stats (bool): Whether to include the stats key in the header.
706
+ show_token_count (bool): Whether to calculate and include the token count.
707
+ exclude_whitespace (bool): If True, exclude whitespace from token counting.
708
+ progress (Any): The progress bar object.
709
+ task_id (Any): The ID of the collation task on the progress bar.
710
+
711
+ Returns:
712
+ Tuple[float, int]: A tuple containing the total bytes written and the token count.
576
713
  """
577
- output_file_path = Path(output_file_path_str).resolve()
578
- output_file_path.parent.mkdir(parents=True, exist_ok=True)
579
- separator_line = separator_char * separator_line_len
714
+ output_path.parent.mkdir(parents=True, exist_ok=True)
715
+ buffer, total_bytes, token_count = StringIO(), 0, 0
580
716
 
581
- buffer = StringIO()
582
-
583
- if tree_content_lines:
584
- buffer.write(f"{TREE_HEADER_TEXT}\n{separator_line}\n\n")
585
- stats_key = ""
717
+ if tree_lines:
718
+ buffer.write(f"{TREE_HEADER_TEXT}\n" + "-" * 80 + "\n\n")
586
719
  if show_tree_stats:
587
- if mode == ProjectMode.FILTER:
588
- stats_key = (
589
- "Key: [I: Included f/d | T: Total f/d in original dir]\n"
590
- " (f=files, d=directories)\n\n"
591
- )
592
- else:
593
- stats_key = (
594
- "Key: [M: Matched files/dirs]\n" " (f=files, d=directories)\n\n"
595
- )
596
- buffer.write(stats_key)
597
- buffer.write("\n".join(tree_content_lines) + "\n")
598
- buffer.write(f"\n{separator_line}\n\n")
599
-
600
- if not files_to_process:
601
- message = (
602
- "No files found matching the specified criteria.\n"
603
- if mode == ProjectMode.SEARCH
604
- else "No files found matching specified criteria for content aggregation.\n"
605
- )
606
- buffer.write(message)
607
- else:
608
-
609
- collation_bar = tqdm(
610
- files_to_process, desc="Phase 3: Collating files", unit="file", leave=False
611
- )
612
- for file_info in collation_bar:
613
- collation_bar.set_postfix_str(file_info.relative_path_posix, refresh=True)
614
- header_content = f"{separator_line}\n{FILE_HEADER_PREFIX}{file_info.relative_path_posix}\n{separator_line}\n\n"
615
- buffer.write(header_content)
616
- try:
617
- with open(
618
- file_info.absolute_path, "r", encoding=encoding, errors="replace"
619
- ) as infile:
620
- buffer.write(infile.read())
621
- buffer.write("\n\n")
622
- except Exception:
623
- buffer.write(
624
- f"Error: Could not read file '{file_info.relative_path_posix}'.\n\n"
625
- )
626
-
627
- final_content = buffer.getvalue()
628
- total_token_count = 0
629
- mode_display = "Characters" if TOKEN_APPROX_MODE == "CHAR_COUNT" else "Words"
630
-
631
- if show_token_count:
632
- if TOKEN_APPROX_MODE == "CHAR_COUNT":
633
- total_token_count = len(final_content)
634
- elif TOKEN_APPROX_MODE == "WORD_COUNT":
635
- total_token_count = len(final_content.split())
636
-
637
- try:
638
- with open(output_file_path, "w", encoding=encoding) as outfile:
639
- if show_token_count:
640
- outfile.write(f"Token Count ({mode_display}): {total_token_count}\n\n")
641
- outfile.write(final_content)
642
- except IOError as e:
643
- print(f"\nError: Could not write to output file '{output_file_path}': {e}")
644
- return
720
+ buffer.write(
721
+ "Key: [M: Matched files/dirs]\n (f=files, d=directories)\n\n"
722
+ )
645
723
 
646
- if mode == ProjectMode.SEARCH:
647
- if files_to_process:
648
- print("Success! Collation complete.")
649
- else:
650
- print(f"\nProcess complete. Output written to: {output_file_path}")
651
- if len(files_to_process) > 0:
652
- print(
653
- f"Summary: {len(files_to_process)} files selected for content processing."
724
+ if RICH_AVAILABLE:
725
+ content = "\n".join(Text.from_markup(line).plain for line in tree_lines)
726
+ else:
727
+ content = "\n".join(tree_lines)
728
+ buffer.write(content + "\n\n")
729
+
730
+ for file_info in files:
731
+ if progress:
732
+ progress.update(
733
+ task_id,
734
+ advance=1,
735
+ description=f"Collating [green]{file_info.relative_path_posix}[/green]",
736
+ )
737
+ buffer.write(f"{'-'*80}\nFILE: {file_info.relative_path_posix}\n{'-'*80}\n\n")
738
+ try:
739
+ content = file_info.absolute_path.read_text(
740
+ encoding=DEFAULT_ENCODING, errors="replace"
654
741
  )
742
+ buffer.write(content + "\n\n")
743
+ total_bytes += len(content.encode(DEFAULT_ENCODING))
744
+ except Exception as e:
745
+ buffer.write(f"Error: Could not read file. Issue: {e}\n\n")
655
746
 
747
+ final_content = buffer.getvalue()
656
748
  if show_token_count:
657
- print(f"Total Approximated Tokens ({mode_display}): {total_token_count}")
658
-
659
-
660
- def filter_and_append_content(
661
- root_dir: Path,
662
- output_file_path_str: str,
663
- tree_style: TreeStyle,
664
- generate_tree: bool,
665
- file_types: Optional[List[str]],
666
- whitelist_substrings_in_filename: Optional[List[str]],
667
- ignore_substrings_in_filename: Optional[List[str]],
668
- ignore_dirs_in_path: Optional[List[str]],
669
- language_presets: Optional[List[LanguagePreset]],
670
- ignore_presets: Optional[List[IgnorePreset]],
671
- encoding: str,
672
- separator_char: str,
673
- separator_line_len: int,
674
- show_token_count: bool,
675
- show_tree_stats: bool,
676
- ) -> None:
677
- """FILTER MODE: Selects files based on explicit criteria and prepares content/tree."""
678
- criteria = FilterCriteria.normalize_inputs(
679
- file_types,
680
- whitelist_substrings_in_filename,
681
- ignore_substrings_in_filename,
682
- ignore_dirs_in_path,
683
- language_presets,
684
- ignore_presets,
685
- )
686
- tree_content_lines: Optional[List[str]] = (
687
- _generate_tree_lines(root_dir, criteria, tree_style, show_tree_stats)
688
- if generate_tree
689
- else None
690
- )
691
- files_to_process: List[FileToProcess] = []
692
-
693
- with tqdm(desc="Discovering files", unit="dir") as discovery_bar:
694
- for dirpath_str, dirnames, filenames in os.walk(str(root_dir), topdown=True):
695
- discovery_bar.update(1)
696
- discovery_bar.set_postfix_str(os.path.basename(dirpath_str), refresh=True)
697
-
698
- current_dir_path = Path(dirpath_str)
699
- orig_dirnames = list(dirnames)
700
- dirnames[:] = [
701
- d
702
- for d in orig_dirnames
703
- if _should_include_entry(
704
- current_dir_path / d, root_dir, criteria, is_dir=True
705
- )
706
- ]
749
+ content_for_count = (
750
+ re.sub(r"\s", "", final_content) if exclude_whitespace else final_content
751
+ )
752
+ token_count = len(content_for_count)
707
753
 
708
- for filename in filenames:
709
- file_abs_path = current_dir_path / filename
710
- if _should_include_entry(
711
- file_abs_path, root_dir, criteria, is_dir=False
712
- ):
713
- files_to_process.append(
714
- FileToProcess(
715
- file_abs_path,
716
- file_abs_path.relative_to(root_dir).as_posix(),
717
- )
718
- )
754
+ with output_path.open("w", encoding=DEFAULT_ENCODING) as outfile:
755
+ if show_token_count:
756
+ mode = "chars, no whitespace" if exclude_whitespace else "characters"
757
+ outfile.write(f"Token Count ({mode}): {token_count}\n\n")
758
+ outfile.write(final_content)
719
759
 
720
- files_to_process.sort(key=lambda f_info: f_info.relative_path_posix.lower())
721
- _collate_content_to_file(
722
- output_file_path_str,
723
- tree_content_lines,
724
- files_to_process,
725
- encoding,
726
- separator_char,
727
- separator_line_len,
728
- show_token_count,
729
- show_tree_stats,
730
- ProjectMode.FILTER,
731
- )
760
+ return total_bytes, token_count
732
761
 
733
762
 
734
- def search_and_collate_content(
735
- root_dir: Path,
736
- sub_string_match: List[str],
737
- output_file: str,
738
- tree_style: TreeStyle,
739
- file_extensions_to_check: Optional[List[str]],
740
- ignore_substrings_in_path: Optional[List[str]],
741
- language_presets: Optional[List[LanguagePreset]],
742
- ignore_presets: Optional[List[IgnorePreset]],
743
- search_file_contents: bool,
744
- max_workers: Optional[int],
745
- full_path_compare: bool,
746
- show_token_count: bool,
747
- show_tree_stats: bool,
763
+ # --- Main Entry Point ---
764
+ def generate_snapshot(
765
+ root_directory: str = ".",
766
+ output_file_name: str = "project_snapshot.txt",
767
+ search_keywords: Optional[List[str]] = None,
768
+ file_extensions: Optional[List[str]] = None,
769
+ ignore_if_in_path: Optional[List[str]] = None,
770
+ ignore_extensions: Optional[List[str]] = None,
771
+ language_presets: Optional[List[LanguagePreset]] = None,
772
+ ignore_presets: Optional[List[IgnorePreset]] = None,
773
+ search_file_contents: bool = True,
774
+ full_path_compare: bool = True,
775
+ max_workers: Optional[int] = None,
776
+ generate_tree: bool = True,
777
+ show_tree_stats: bool = False,
778
+ show_token_count: bool = False,
779
+ exclude_whitespace_in_token_count: bool = False,
780
+ read_binary_files: bool = False,
748
781
  ) -> None:
749
- """SEARCH MODE: Scans for files that match a substring with multi-phase progress bars."""
782
+ """
783
+ Orchestrates the entire process of scanning, filtering, and collating project files.
784
+
785
+ This function serves as the main entry point for the utility. It can be used
786
+ to create a full "snapshot" of a project's source code or to search for
787
+ specific keywords within file names and/or contents. It is highly configurable
788
+ through presets and manual overrides.
789
+
790
+ Args:
791
+ root_directory (str): The starting directory for the scan. Defaults to ".".
792
+ output_file_name (str): The name of the file to save the results to.
793
+ Defaults to "project_snapshot.txt".
794
+ search_keywords (List[str], optional): A list of keywords to search for. If
795
+ None or empty, the function runs in "snapshot" mode, including all
796
+ files that match the other criteria. Defaults to None.
797
+ file_extensions (List[str], optional): A list of specific file
798
+ extensions to include (e.g., [".py", ".md"]). Defaults to None.
799
+ ignore_if_in_path (List[str], optional): A list of directory or file
800
+ names to exclude from the scan. Defaults to None.
801
+ ignore_extensions (List[str], optional): A list of file extensions to
802
+ explicitly ignore (e.g., [".log", ".tmp"]). Defaults to None.
803
+ language_presets (List[LanguagePreset], optional): A list of LanguagePreset
804
+ enums for common file types (e.g., [LanguagePreset.PYTHON]). Defaults to None.
805
+ ignore_presets (List[IgnorePreset], optional): A list of IgnorePreset enums
806
+ for common ignore patterns (e.g., [IgnorePreset.PYTHON]). Defaults to None.
807
+ search_file_contents (bool): If True, search for keywords within file
808
+ contents. Defaults to True.
809
+ full_path_compare (bool): If True, search for keywords in the full file path,
810
+ not just the filename. Defaults to True.
811
+ max_workers (Optional[int]): The maximum number of worker threads for
812
+ concurrent processing. Defaults to CPU count + 4.
813
+ generate_tree (bool): If True, a file tree of the matched files will be
814
+ included at the top of the output file. Defaults to True.
815
+ show_tree_stats (bool): If True, display file and directory counts in the
816
+ generated tree. Defaults to False.
817
+ show_token_count (bool): If True, display an approximated token count in the
818
+ summary and output file. Defaults to False.
819
+ exclude_whitespace_in_token_count (bool): If True, whitespace is removed
820
+ before counting tokens, giving a more compact count. Defaults to False.
821
+ read_binary_files (bool): If True, the content search will attempt to read
822
+ and search through binary files. Defaults to False.
823
+ """
824
+ console, start_time = ConsoleManager(), time.perf_counter()
825
+ root_dir = Path(root_directory or ".").resolve()
826
+ if not root_dir.is_dir():
827
+ console.log(f"Error: Root directory '{root_dir}' not found.", style="bold red")
828
+ return
829
+
830
+ keywords = [k.lower().strip() for k in search_keywords or [] if k.strip()]
831
+ snapshot_mode = not keywords
750
832
  criteria = FilterCriteria.normalize_inputs(
751
- file_extensions_to_check,
752
- None,
753
- None,
754
- ignore_substrings_in_path,
755
- language_presets,
756
- ignore_presets,
833
+ file_types=file_extensions,
834
+ ignore_if_in_path=ignore_if_in_path,
835
+ ignore_extensions=ignore_extensions,
836
+ lang_presets=language_presets,
837
+ ignore_presets=ignore_presets,
757
838
  )
758
- normalized_keywords = [
759
- sub.lower().strip() for sub in sub_string_match if sub.strip()
760
- ]
761
- if not normalized_keywords:
762
- print("Error: Search mode requires 'search_keywords' to be provided.")
763
- return
764
839
 
765
- if criteria.ignore_path_components:
766
- print(
767
- f"Ignoring directories containing: {', '.join(sorted(list(criteria.ignore_path_components)))}"
840
+ config_rows = [
841
+ ["Root Directory", str(root_dir)],
842
+ ["File Types", ", ".join(criteria.file_extensions) or "All"],
843
+ ["Ignore Paths", ", ".join(criteria.ignore_if_in_path) or "None"],
844
+ ["Ignore Extensions", ", ".join(criteria.ignore_extensions) or "None"],
845
+ ["Generate Tree", "[green]Yes[/green]" if generate_tree else "[red]No[/red]"],
846
+ ]
847
+ if generate_tree:
848
+ config_rows.append(
849
+ ["Tree Stats", "[green]Yes[/green]" if show_tree_stats else "[red]No[/red]"]
768
850
  )
769
-
770
- candidate_files: List[Path] = []
771
-
772
- with tqdm(desc="Phase 1: Discovering files", unit="dir") as discovery_bar:
773
- for dirpath_str, dirnames, filenames in os.walk(str(root_dir), topdown=True):
774
- discovery_bar.update(1)
775
- discovery_bar.set_postfix_str(os.path.basename(dirpath_str), refresh=True)
776
- current_dir_path = Path(dirpath_str)
777
- dirnames[:] = [
778
- d
779
- for d in dirnames
780
- if (current_dir_path / d).name.lower()
781
- not in criteria.ignore_path_components
851
+ config_rows.append(
852
+ [
853
+ "Show Token Count",
854
+ "[green]Yes[/green]" if show_token_count else "[red]No[/red]",
855
+ ]
856
+ )
857
+ if show_token_count:
858
+ config_rows.append(
859
+ [
860
+ "Exclude Whitespace",
861
+ (
862
+ "[green]Yes[/green]"
863
+ if exclude_whitespace_in_token_count
864
+ else "[red]No[/red]"
865
+ ),
782
866
  ]
783
-
784
- for filename in filenames:
785
- file_abs_path = current_dir_path / filename
786
- try:
787
- relative_parts = file_abs_path.relative_to(root_dir).parts
788
- if any(
789
- part.lower() in criteria.ignore_path_components
790
- for part in relative_parts
791
- ):
792
- continue
793
- except ValueError:
794
- continue
795
-
796
- if (
797
- not criteria.file_extensions
798
- or file_abs_path.suffix.lower() in criteria.file_extensions
799
- ):
800
- candidate_files.append(file_abs_path)
801
-
802
- print(f"Discovered {len(candidate_files)} candidate files to process.")
803
-
804
- if not candidate_files:
805
- print(
806
- "\nScan complete. No files matched the initial criteria (extensions and ignores)."
807
867
  )
808
- with open(output_file, "w", encoding=DEFAULT_ENCODING) as f_out:
809
- f_out.write("No files found matching the specified criteria.\n")
810
- return
811
-
812
- matched_files: Set[Path] = set()
813
- with ThreadPoolExecutor(max_workers=max_workers) as executor:
814
- future_to_file = {
815
- executor.submit(
816
- process_file_for_search,
817
- file,
818
- normalized_keywords,
819
- search_file_contents,
820
- full_path_compare,
821
- ): file
822
- for file in candidate_files
823
- }
824
868
 
825
- progress_bar_desc = f"Phase 2: Processing {len(candidate_files)} files"
826
- progress_bar = tqdm(
827
- as_completed(future_to_file),
828
- total=len(future_to_file),
829
- unit="file",
830
- desc=progress_bar_desc,
869
+ if snapshot_mode:
870
+ config_rows.insert(1, ["Mode", "[bold blue]Snapshot[/bold blue]"])
871
+ else:
872
+ config_rows.insert(1, ["Mode", "[bold yellow]Search[/bold yellow]"])
873
+ config_rows.insert(
874
+ 2, ["Search Keywords", f"[yellow]{', '.join(keywords)}[/yellow]"]
831
875
  )
832
-
833
- for future in progress_bar:
834
- result = future.result()
835
- if result:
836
- matched_files.add(result)
837
-
838
- if not matched_files:
839
- print(
840
- "\nScan complete. No matching files were found after processing keywords."
876
+ config_rows.append(
877
+ [
878
+ "Search Content",
879
+ "[green]Yes[/green]" if search_file_contents else "[red]No[/red]",
880
+ ]
841
881
  )
842
- with open(output_file, "w", encoding=DEFAULT_ENCODING) as f_out:
843
- f_out.write("No files found matching the specified search keywords.\n")
844
- return
845
-
846
- sorted_matched_files = sorted(
847
- list(matched_files), key=lambda p: p.relative_to(root_dir).as_posix().lower()
848
- )
849
-
850
- print(f"Found {len(sorted_matched_files)} matching files.")
851
- print(f"Generating output file at '{Path(output_file).resolve()}'...")
852
-
853
- tree_content_lines = _generate_tree_from_paths(
854
- root_dir, sorted_matched_files, tree_style, show_tree_stats
855
- )
856
- files_to_process = [
857
- FileToProcess(f, f.relative_to(root_dir).as_posix())
858
- for f in sorted_matched_files
859
- ]
860
-
861
- _collate_content_to_file(
862
- output_file,
863
- tree_content_lines,
864
- files_to_process,
865
- DEFAULT_ENCODING,
866
- DEFAULT_SEPARATOR_CHAR,
867
- DEFAULT_SEPARATOR_LINE_LENGTH,
868
- show_token_count,
869
- show_tree_stats,
870
- ProjectMode.SEARCH,
871
- )
872
-
873
-
874
- # --- DECONSTRUCTION FUNCTION ---
875
-
876
-
877
- def deconstruct_snapshot(snapshot_file_path: str) -> Dict[str, Any]:
878
- """Scans a compiled snapshot file, extracts the directory tree lines and file paths."""
879
- snapshot_path = Path(snapshot_file_path)
880
- if not snapshot_path.is_file():
881
- raise FileNotFoundError(f"Snapshot file not found: {snapshot_file_path}")
882
- tree_lines: List[str] = []
883
- file_paths: List[str] = []
884
- separator_pattern = re.compile(
885
- r"^[{}]{{4,}}[{}|]*$".format(
886
- re.escape(DEFAULT_SEPARATOR_CHAR), re.escape(DEFAULT_SEPARATOR_CHAR)
882
+ config_rows.append(
883
+ [
884
+ "Read Binary Files",
885
+ "[green]Yes[/green]" if read_binary_files else "[red]No[/red]",
886
+ ]
887
887
  )
888
+ console.print_table(
889
+ "Project Scan Configuration", ["Parameter", "Value"], config_rows
888
890
  )
889
- state = "LOOKING_FOR_TREE"
890
- with open(snapshot_path, "r", encoding=DEFAULT_ENCODING, errors="replace") as f:
891
- for line in f:
892
- line = line.strip()
893
- if state == "LOOKING_FOR_TREE":
894
- if line == TREE_HEADER_TEXT:
895
- state = "READING_TREE"
896
- elif state == "READING_TREE":
897
- if not line or separator_pattern.match(line):
898
- if tree_lines and separator_pattern.match(line):
899
- state = "LOOKING_FOR_CONTENT"
900
- continue
901
- if state == "READING_TREE" and not line.startswith("Key:"):
902
- tree_lines.append(line)
903
- elif state == "LOOKING_FOR_CONTENT":
904
- if line.startswith(FILE_HEADER_PREFIX):
905
- file_paths.append(line[len(FILE_HEADER_PREFIX) :].strip())
906
- state = "READING_CONTENT"
907
- elif state == "READING_CONTENT":
908
- if line.startswith(FILE_HEADER_PREFIX):
909
- file_paths.append(line[len(FILE_HEADER_PREFIX) :].strip())
910
- # Post-process to remove the key lines if they were accidentally captured
911
- tree_lines = [
912
- line
913
- for line in tree_lines
914
- if not line.strip().startswith("Key:")
915
- and not line.strip().startswith("(f=files")
916
- ]
917
- return {"tree_lines": tree_lines, "file_paths": file_paths}
918
-
919
-
920
- # --- UNIFIED ENTRY POINT AND UTILITY WRAPPERS ---
921
-
922
-
923
- def process_project(
924
- root_dir_param: Optional[str] = None,
925
- output_file_name: str = "project_output.txt",
926
- mode: ProjectMode = ProjectMode.FILTER,
927
- file_types: Optional[List[str]] = None,
928
- ignore_dirs_in_path: Optional[List[str]] = None,
929
- language_presets: Optional[List[LanguagePreset]] = None,
930
- ignore_presets: Optional[List[IgnorePreset]] = None,
931
- whitelist_filename_substrings: Optional[List[str]] = None,
932
- ignore_filename_substrings: Optional[List[str]] = None,
933
- generate_tree: bool = True,
934
- search_keywords: Optional[List[str]] = None,
935
- search_file_contents: bool = False,
936
- full_path_compare: bool = True,
937
- max_workers: Optional[int] = None,
938
- tree_style_preset: TreeStylePreset = TreeStylePreset.UNICODE,
939
- tree_style_t_connector: Optional[str] = None,
940
- tree_style_l_connector: Optional[str] = None,
941
- tree_style_v_connector: Optional[str] = None,
942
- tree_style_h_spacer: Optional[str] = None,
943
- show_token_count: bool = False,
944
- show_tree_stats: bool = False,
945
- encoding: str = DEFAULT_ENCODING,
946
- separator_char: str = DEFAULT_SEPARATOR_CHAR,
947
- separator_line_len: int = DEFAULT_SEPARATOR_LINE_LENGTH,
948
- ) -> None:
949
- """Main function to process a project directory in either FILTER or SEARCH mode."""
950
- actual_root_dir = validate_root_directory(root_dir_param)
951
- if actual_root_dir is None:
952
- sys.exit(1)
953
- style = tree_style_preset.to_style()
954
- final_style = TreeStyle(
955
- t_connector=tree_style_t_connector or style.t_connector,
956
- l_connector=tree_style_l_connector or style.l_connector,
957
- v_connector=tree_style_v_connector or style.v_connector,
958
- h_spacer=tree_style_h_spacer or style.h_spacer,
959
- )
960
- print(f"--- Starting Project Processing in {mode.name} Mode ---")
961
- if mode == ProjectMode.FILTER:
962
- filter_and_append_content(
963
- actual_root_dir,
964
- output_file_name,
965
- final_style,
966
- generate_tree,
967
- file_types,
968
- whitelist_filename_substrings,
969
- ignore_filename_substrings,
970
- ignore_dirs_in_path,
971
- language_presets,
972
- ignore_presets,
973
- encoding,
974
- separator_char,
975
- separator_line_len,
976
- show_token_count,
977
- show_tree_stats,
978
- )
979
- elif mode == ProjectMode.SEARCH:
980
- if not search_keywords:
981
- print("Error: Search mode requires 'search_keywords' to be provided.")
982
- return
983
- search_and_collate_content(
984
- actual_root_dir,
985
- search_keywords,
986
- output_file_name,
987
- final_style,
988
- file_types,
989
- ignore_dirs_in_path,
990
- language_presets,
991
- ignore_presets,
992
- search_file_contents,
993
- max_workers,
994
- full_path_compare,
995
- show_token_count,
996
- show_tree_stats,
997
- )
998
- print("--- Script Execution Finished ---")
999
891
 
892
+ @contextmanager
893
+ def progress_manager():
894
+ if RICH_AVAILABLE:
895
+ progress = Progress(
896
+ TextColumn("[progress.description]{task.description}"),
897
+ BarColumn(),
898
+ TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
899
+ SpinnerColumn(),
900
+ TimeElapsedColumn(),
901
+ "{task.fields[status]}",
902
+ expand=True,
903
+ )
904
+ with Live(progress, console=console.console, refresh_per_second=10) as live:
905
+ yield progress
906
+ else:
907
+ with FallbackProgress() as progress:
908
+ yield progress
909
+
910
+ with progress_manager() as progress:
911
+ discover_task = progress.add_task("Discovering files", total=None, status="")
912
+ candidate_files = _discover_files(root_dir, criteria, progress, discover_task)
913
+ if RICH_AVAILABLE:
914
+ progress.update(
915
+ discover_task,
916
+ description=f"Discovered [bold green]{len(candidate_files)}[/bold green] candidates",
917
+ status="",
918
+ )
919
+ else:
920
+ progress.update(
921
+ discover_task,
922
+ description=f"Discovered {len(candidate_files)} candidates",
923
+ )
1000
924
 
1001
- def filter_project(
1002
- root_dir_param: Optional[str] = None,
1003
- output_file_name: str = "project_filter_output.txt",
1004
- file_types: Optional[List[str]] = None,
1005
- ignore_dirs_in_path: Optional[List[str]] = None,
1006
- language_presets: Optional[List[LanguagePreset]] = None,
1007
- ignore_presets: Optional[List[IgnorePreset]] = None,
1008
- whitelist_filename_substrings: Optional[List[str]] = None,
1009
- ignore_filename_substrings: Optional[List[str]] = None,
1010
- generate_tree: bool = True,
1011
- tree_style_preset: TreeStylePreset = TreeStylePreset.UNICODE,
1012
- tree_style_t_connector: Optional[str] = None,
1013
- tree_style_l_connector: Optional[str] = None,
1014
- tree_style_v_connector: Optional[str] = None,
1015
- tree_style_h_spacer: Optional[str] = None,
1016
- show_token_count: bool = False,
1017
- show_tree_stats: bool = False,
1018
- encoding: str = DEFAULT_ENCODING,
1019
- separator_char: str = DEFAULT_SEPARATOR_CHAR,
1020
- separator_line_len: int = DEFAULT_SEPARATOR_LINE_LENGTH,
1021
- ) -> None:
1022
- """Utility wrapper for process_project in FILTER mode."""
1023
- process_project(
1024
- root_dir_param=root_dir_param,
1025
- output_file_name=output_file_name,
1026
- mode=ProjectMode.FILTER,
1027
- file_types=file_types,
1028
- ignore_dirs_in_path=ignore_dirs_in_path,
1029
- language_presets=language_presets,
1030
- ignore_presets=ignore_presets,
1031
- whitelist_filename_substrings=whitelist_filename_substrings,
1032
- ignore_filename_substrings=ignore_filename_substrings,
1033
- generate_tree=generate_tree,
1034
- tree_style_preset=tree_style_preset,
1035
- tree_style_t_connector=tree_style_t_connector,
1036
- tree_style_l_connector=tree_style_l_connector,
1037
- tree_style_v_connector=tree_style_v_connector,
1038
- tree_style_h_spacer=tree_style_h_spacer,
1039
- show_token_count=show_token_count,
1040
- show_tree_stats=show_tree_stats,
1041
- encoding=encoding,
1042
- separator_char=separator_char,
1043
- separator_line_len=separator_line_len,
1044
- )
925
+ matched_files = set()
926
+ if candidate_files:
927
+ if snapshot_mode:
928
+ matched_files = set(candidate_files)
929
+ if RICH_AVAILABLE:
930
+ progress.add_task(
931
+ "[dim]Keyword Processing[/dim]",
932
+ total=1,
933
+ completed=1,
934
+ status="[bold blue](Snapshot Mode)[/bold blue]",
935
+ )
936
+ else:
937
+ process_task = progress.add_task(
938
+ f"Processing {len(candidate_files)} files",
939
+ total=len(candidate_files),
940
+ status="",
941
+ )
942
+ matched_files = _process_files_concurrently(
943
+ candidate_files,
944
+ keywords,
945
+ search_file_contents,
946
+ full_path_compare,
947
+ max_workers,
948
+ progress,
949
+ process_task,
950
+ read_binary_files,
951
+ )
1045
952
 
953
+ output_path, total_bytes, token_count = None, 0, 0
954
+ if matched_files:
955
+ sorted_files = sorted(
956
+ list(matched_files), key=lambda p: p.relative_to(root_dir).as_posix()
957
+ )
958
+ tree_lines = []
959
+ if generate_tree:
960
+ tree_task = progress.add_task(
961
+ "Generating file tree...", total=1, status=""
962
+ )
963
+ tree_lines = _generate_tree_with_stats(
964
+ root_dir, sorted_files, show_tree_stats
965
+ )
966
+ progress.update(
967
+ tree_task, completed=1, description="Generated file tree"
968
+ )
1046
969
 
1047
- def find_in_project(
1048
- root_dir_param: Optional[str] = None,
1049
- output_file_name: str = "project_search_output.txt",
1050
- search_keywords: Optional[List[str]] = None,
1051
- file_extensions_to_check: Optional[List[str]] = None,
1052
- ignore_dirs_in_path: Optional[List[str]] = None,
1053
- language_presets: Optional[List[LanguagePreset]] = None,
1054
- ignore_presets: Optional[List[IgnorePreset]] = None,
1055
- search_file_contents: bool = False,
1056
- full_path_compare: bool = True,
1057
- max_workers: Optional[int] = None,
1058
- tree_style_preset: TreeStylePreset = TreeStylePreset.UNICODE,
1059
- tree_style_t_connector: Optional[str] = None,
1060
- tree_style_l_connector: Optional[str] = None,
1061
- tree_style_v_connector: Optional[str] = None,
1062
- tree_style_h_spacer: Optional[str] = None,
1063
- show_token_count: bool = False,
1064
- show_tree_stats: bool = False,
1065
- encoding: str = DEFAULT_ENCODING,
1066
- separator_char: str = DEFAULT_SEPARATOR_CHAR,
1067
- separator_line_len: int = DEFAULT_SEPARATOR_LINE_LENGTH,
1068
- ) -> None:
1069
- """Utility wrapper for process_project in SEARCH mode."""
1070
- if not search_keywords:
1071
- print("Error: 'search_keywords' must be provided for find_in_project.")
1072
- return
1073
- process_project(
1074
- root_dir_param=root_dir_param,
1075
- output_file_name=output_file_name,
1076
- mode=ProjectMode.SEARCH,
1077
- file_types=file_extensions_to_check,
1078
- ignore_dirs_in_path=ignore_dirs_in_path,
1079
- language_presets=language_presets,
1080
- ignore_presets=ignore_presets,
1081
- search_keywords=search_keywords,
1082
- search_file_contents=search_file_contents,
1083
- full_path_compare=full_path_compare,
1084
- max_workers=max_workers,
1085
- tree_style_preset=tree_style_preset,
1086
- tree_style_t_connector=tree_style_t_connector,
1087
- tree_style_l_connector=tree_style_l_connector,
1088
- tree_style_v_connector=tree_style_v_connector,
1089
- tree_style_h_spacer=tree_style_h_spacer,
1090
- show_token_count=show_token_count,
1091
- show_tree_stats=show_tree_stats,
1092
- encoding=encoding,
1093
- separator_char=separator_char,
1094
- separator_line_len=separator_line_len,
1095
- )
970
+ collate_task = progress.add_task(
971
+ f"Collating {len(sorted_files)} files",
972
+ total=len(sorted_files),
973
+ status="",
974
+ )
975
+ files_to_process = [
976
+ FileToProcess(f, f.relative_to(root_dir).as_posix())
977
+ for f in sorted_files
978
+ ]
979
+ output_path = Path(output_file_name).resolve()
980
+ total_bytes, token_count = _collate_content_to_file(
981
+ output_path,
982
+ tree_lines,
983
+ files_to_process,
984
+ show_tree_stats,
985
+ show_token_count,
986
+ exclude_whitespace_in_token_count,
987
+ progress,
988
+ collate_task,
989
+ )
1096
990
 
991
+ end_time = time.perf_counter()
992
+ summary_rows = [
993
+ ["Candidate Files", f"{len(candidate_files)}"],
994
+ ["Files Matched", f"[bold green]{len(matched_files)}[/bold green]"],
995
+ ["Total Time", f"{end_time - start_time:.2f} seconds"],
996
+ ["Output Size", f"{total_bytes / 1024:.2f} KB"],
997
+ ]
998
+ if show_token_count:
999
+ summary_rows.append(["Approximated Tokens", f"{token_count:,}"])
1000
+ summary_rows.append(["Output File", str(output_path or "N/A")])
1001
+ console.print_table("Scan Complete", ["Metric", "Value"], summary_rows)
1097
1002
 
1098
- __all__ = [
1099
- "process_project",
1100
- "filter_project",
1101
- "find_in_project",
1102
- "deconstruct_snapshot",
1103
- "ProjectMode",
1104
- "LanguagePreset",
1105
- "IgnorePreset",
1106
- "TreeStylePreset",
1107
- ]
1108
1003
 
1109
1004
  if __name__ == "__main__":
1110
- print("\n--- Running a custom filter scan with new stats format ---")
1111
- filter_project(
1112
- root_dir_param=".",
1113
- output_file_name="custom_snapshot_readable.txt",
1114
- file_types=[".py", "requirements.txt", ".sql", ".md"],
1115
- ignore_dirs_in_path=["venv", "build", "node_modules", "static", "templates"],
1005
+ generate_snapshot(
1006
+ root_directory=".",
1007
+ output_file_name="project_snapshot_final.txt",
1008
+ # No search keywords triggers Snapshot Mode
1009
+ language_presets=[LanguagePreset.PYTHON],
1010
+ ignore_presets=[
1011
+ IgnorePreset.PYTHON,
1012
+ IgnorePreset.BUILD_ARTIFACTS,
1013
+ IgnorePreset.VERSION_CONTROL,
1014
+ IgnorePreset.NODE_JS,
1015
+ IgnorePreset.IDE_METADATA,
1016
+ ],
1017
+ ignore_extensions=[".log", ".tmp"], # Example of new functionality
1018
+ generate_tree=True,
1116
1019
  show_tree_stats=True,
1117
1020
  show_token_count=True,
1021
+ exclude_whitespace_in_token_count=True,
1118
1022
  )