dirshot 0.1.3__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
dirshot/dirshot.py CHANGED
@@ -2,1117 +2,1083 @@ import os
2
2
  import sys
3
3
  import re
4
4
  import time
5
+ import threading
5
6
  from pathlib import Path
6
7
  from dataclasses import dataclass, field
7
- from typing import List, Optional, Set, Tuple, Callable, NamedTuple, Dict, Any
8
+ from typing import List, Optional, Set, Tuple, NamedTuple, Dict, Any
8
9
  from enum import Enum
9
10
  from concurrent.futures import ThreadPoolExecutor, as_completed
10
11
  from io import StringIO
12
+ from contextlib import contextmanager
11
13
 
12
- # --- TQDM Dependency Handler ---
13
- try:
14
- from tqdm import tqdm
15
- except ImportError:
16
14
 
17
- # Define a functional fallback dummy tqdm class if the import fails.
18
- class tqdm:
19
- """A simple, text-based progress bar fallback if tqdm is not installed."""
15
+ def strip_markup(text: str) -> str:
16
+ """Removes rich-style markup tags from a string (e.g., [bold red]Error[/])"""
17
+ return re.sub(r"\[/?[^\]]+\]", "", str(text))
18
+
20
19
 
21
- def __init__(
22
- self, iterable=None, total=None, desc="", unit="it", postfix=None, **kwargs
20
+ # --- Dependency & Console Management ---
21
+ try:
22
+ from rich.console import Console
23
+ from rich.progress import (
24
+ Progress,
25
+ SpinnerColumn,
26
+ BarColumn,
27
+ TextColumn,
28
+ TimeElapsedColumn,
29
+ )
30
+ from rich.table import Table
31
+ from rich.live import Live
32
+ from rich.panel import Panel
33
+ from rich.text import Text
34
+
35
+ RICH_AVAILABLE = True
36
+ except ImportError:
37
+ RICH_AVAILABLE = False
38
+
39
+ class FallbackProgress:
40
+ """A simple, dependency-free progress handler for when 'rich' is not installed."""
41
+
42
+ def __init__(self):
43
+ self.tasks, self.task_count, self.active_line = {}, 0, ""
44
+
45
+ def add_task(self, description, total=None, **kwargs):
46
+ task_id = self.task_count
47
+ self.tasks[task_id] = {
48
+ "d": strip_markup(description),
49
+ "t": total,
50
+ "c": 0,
51
+ }
52
+ self.task_count += 1
53
+ return task_id
54
+
55
+ def update(
56
+ self, task_id, advance=0, completed=None, description=None, **kwargs
23
57
  ):
24
- self.iterable = iterable
25
- self.total = (
26
- total
27
- if total is not None
28
- else (len(iterable) if hasattr(iterable, "__len__") else None)
29
- )
30
- self.desc = desc
31
- self.unit = unit
32
- self.current = 0
33
- self.start_time = time.time()
34
- self._last_update_time = 0
35
- self._postfix = postfix or {}
36
-
37
- def __iter__(self):
38
- if self.iterable is None:
39
- raise TypeError("tqdm fallback must be initialized with an iterable.")
40
- for obj in self.iterable:
41
- yield obj
42
- self.update(1)
43
- self.close()
44
-
45
- def update(self, n=1):
46
- """Update the progress bar by n steps."""
47
- self.current += n
48
- now = time.time()
49
- if (
50
- self.total is None
51
- or now - self._last_update_time > 0.1
52
- or self.current == self.total
53
- ):
54
- self._last_update_time = now
55
- self._draw()
56
-
57
- def set_description(self, desc: str):
58
- """Set the description of the progress bar."""
59
- self.desc = desc
60
- self._draw()
61
-
62
- def set_postfix_str(self, s: str):
63
- self._postfix["info"] = s
64
- self._draw()
65
-
66
- def _draw(self):
67
- """Draw the progress bar to the console."""
68
- postfix_str = ", ".join([f"{k}={v}" for k, v in self._postfix.items()])
69
-
70
- if self.total and self.total > 0:
71
- percent = int((self.current / self.total) * 100)
72
- bar_length = 25
73
- filled_length = int(bar_length * self.current // self.total)
74
- bar = "█" * filled_length + "-" * (bar_length - filled_length)
75
- progress_line = f"\r{self.desc}: {percent}%|{bar}| {self.current}/{self.total} [{self.unit}]"
76
- else: # Case where total is not known
77
- progress_line = f"\r{self.desc}: {self.current} {self.unit}"
78
-
79
- if postfix_str:
80
- progress_line += f" [{postfix_str}]"
81
-
82
- # Pad with spaces to clear previous, longer lines
83
- terminal_width = 80
84
- sys.stdout.write(progress_line.ljust(terminal_width))
58
+ if task_id not in self.tasks:
59
+ return
60
+ task = self.tasks[task_id]
61
+ if description:
62
+ task["d"] = strip_markup(description)
63
+ task["c"] = completed if completed is not None else task["c"] + advance
64
+
65
+ # Simple progress string
66
+ count_str = f"{task['c']}"
67
+ if task["t"]:
68
+ percent = (task["c"] / task["t"]) * 100
69
+ count_str += f"/{task['t']} ({percent:.0f}%)"
70
+
71
+ line = f"-> {task['d']}: {count_str}"
72
+
73
+ # Pad with spaces to clear previous longer lines
74
+ padding = max(0, len(self.active_line) - len(line))
75
+ sys.stdout.write("\r" + line + " " * padding)
85
76
  sys.stdout.flush()
77
+ self.active_line = line
78
+
79
+ def __enter__(self):
80
+ return self
86
81
 
87
- def close(self):
88
- """Clean up the progress bar line."""
89
- # Print a newline to move off the progress bar line
82
+ def __exit__(self, exc_type, exc_val, exc_tb):
90
83
  sys.stdout.write("\n")
91
84
  sys.stdout.flush()
92
85
 
93
86
 
87
+ class ConsoleManager:
88
+ """A wrapper to gracefully handle console output with or without 'rich'."""
89
+
90
+ def __init__(self):
91
+ """Initializes the ConsoleManager, detecting if 'rich' is available."""
92
+ self.console = Console() if RICH_AVAILABLE else None
93
+
94
+ def log(self, message: str, style: str = ""):
95
+ """Logs a message to the console, applying a style if 'rich' is available."""
96
+ if self.console:
97
+ self.console.log(message, style=style)
98
+ else:
99
+ clean_msg = strip_markup(message)
100
+ print(f"[{time.strftime('%H:%M:%S')}] {clean_msg}")
101
+
102
+ def print_table(self, title: str, columns: List[str], rows: List[List[str]]):
103
+ """Prints a formatted table to the console."""
104
+ if self.console:
105
+ table = Table(
106
+ title=title,
107
+ show_header=True,
108
+ header_style="bold magenta",
109
+ border_style="dim",
110
+ )
111
+ for col in columns:
112
+ table.add_column(col)
113
+ for row in rows:
114
+ table.add_row(*row)
115
+ self.console.print(table)
116
+ else:
117
+ # Fallback ASCII table
118
+ print(f"\n{title}")
119
+
120
+ # Clean data and calculate widths
121
+ clean_cols = [strip_markup(c) for c in columns]
122
+ clean_rows = [[strip_markup(c) for c in r] for r in rows]
123
+
124
+ col_widths = [len(c) for c in clean_cols]
125
+ for row in clean_rows:
126
+ for i, cell in enumerate(row):
127
+ if i < len(col_widths):
128
+ col_widths[i] = max(col_widths[i], len(cell))
129
+
130
+ def print_sep(char="-", cross="+"):
131
+ print(cross + cross.join(char * (w + 2) for w in col_widths) + cross)
132
+
133
+ print_sep()
134
+ # Header
135
+ header_str = " | ".join(
136
+ f" {c:<{w}} " for c, w in zip(clean_cols, col_widths)
137
+ )
138
+ print(f"| {header_str} |")
139
+ print_sep("=")
140
+
141
+ # Rows
142
+ for row in clean_rows:
143
+ row_str = " | ".join(f" {c:<{w}} " for c, w in zip(row, col_widths))
144
+ print(f"| {row_str} |")
145
+
146
+ print_sep()
147
+
148
+
94
149
  # --- Configuration Constants ---
95
- DEFAULT_SEPARATOR_CHAR = "-"
96
- DEFAULT_SEPARATOR_LINE_LENGTH = 80
97
- DEFAULT_ENCODING = "utf-8"
98
- TREE_HEADER_TEXT = "Project File Structure"
99
- FILE_HEADER_PREFIX = "FILE: "
100
- TOKEN_APPROX_MODE = "CHAR_COUNT"
101
-
102
- # List of binary file extensions to skip during content search
150
+ DEFAULT_SEPARATOR_CHAR, DEFAULT_ENCODING = "-", "utf-8"
151
+ TREE_HEADER_TEXT, FILE_HEADER_PREFIX = "Project File Structure", "FILE: "
103
152
  BINARY_FILE_EXTENSIONS = {
104
- # Images
105
153
  ".png",
106
154
  ".jpg",
107
155
  ".jpeg",
108
156
  ".gif",
109
- ".bmp",
110
- ".ico",
111
- ".tiff",
112
- ".webp",
113
- # Documents
114
157
  ".pdf",
115
- ".doc",
116
- ".docx",
117
- ".xls",
118
- ".xlsx",
119
- ".ppt",
120
- ".pptx",
121
- ".odt",
122
- ".ods",
123
- # Archives
124
158
  ".zip",
125
- ".gz",
126
- ".tar",
127
- ".rar",
128
- ".7z",
129
- ".bz2",
130
- ".xz",
131
- # Executables & Binaries
132
159
  ".exe",
133
160
  ".dll",
134
161
  ".so",
135
- ".o",
136
- ".a",
137
- ".lib",
138
- ".bin",
139
- ".dat",
140
- ".db",
141
- ".sqlite",
142
- ".img",
143
- ".iso",
144
- # Compiled Code
145
- ".class",
146
162
  ".jar",
147
- ".war",
148
163
  ".pyc",
149
- ".pyo",
150
- # Audio/Video
151
164
  ".mp3",
152
- ".wav",
153
- ".flac",
154
- ".ogg",
155
165
  ".mp4",
156
- ".mkv",
157
- ".avi",
158
- ".mov",
159
- ".wmv",
160
- # Fonts
161
- ".ttf",
162
- ".otf",
163
- ".woff",
164
- ".woff2",
165
166
  }
166
167
 
167
168
 
168
- # --- Public Enums for Import and Usage ---
169
-
170
-
171
- class ProjectMode(Enum):
172
- """The mode of operation for the script."""
173
-
174
- FILTER = "filter"
175
- SEARCH = "search"
176
-
177
-
169
+ # --- Base Lists for Presets ---
170
+ # These are defined outside the enums to allow for safe composition.
171
+ _PYTHON_BASE = [
172
+ ".py",
173
+ ".pyw",
174
+ "requirements.txt",
175
+ "Pipfile",
176
+ "pyproject.toml",
177
+ "setup.py",
178
+ ]
179
+ _JAVASCRIPT_BASE = [
180
+ ".js",
181
+ ".jsx",
182
+ ".ts",
183
+ ".tsx",
184
+ ".mjs",
185
+ ".cjs",
186
+ "package.json",
187
+ "jsconfig.json",
188
+ "tsconfig.json",
189
+ ]
190
+ _RUBY_BASE = [".rb", "Gemfile", "Rakefile", ".gemspec"]
191
+ _PHP_BASE = [".php", "composer.json", "index.php"]
192
+ _JAVA_BASE = [".java", ".jar", ".war", "pom.xml", ".properties"]
193
+ _KOTLIN_BASE = [".kt", ".kts", ".gradle", "build.gradle.kts"]
194
+ _CSHARP_BASE = [".cs", ".csproj", ".sln", "appsettings.json", "Web.config", ".csx"]
195
+ _C_CPP_BASE = [".c", ".cpp", ".h", ".hpp", "Makefile", "CMakeLists.txt", ".cxx", ".hxx"]
196
+ _RUST_BASE = [".rs", "Cargo.toml", "Cargo.lock"]
197
+ _SWIFT_BASE = [".swift", "Package.swift"]
198
+ _OBJECTIVE_C_BASE = [".m", ".mm", ".h"]
199
+ _ELIXIR_BASE = [".ex", ".exs", "mix.exs"]
200
+ _DART_BASE = [".dart", "pubspec.yaml"]
201
+ _SCALA_BASE = [".scala", ".sbt", "build.sbt"]
202
+ _R_LANG_BASE = [".r", ".R", ".Rmd"]
203
+ _LUA_BASE = [".lua"]
204
+
205
+ _IDE_VSCODE = [".vscode"]
206
+ _IDE_JETBRAINS = [".idea"]
207
+ _IDE_SUBLIME = ["*.sublime-project", "*.sublime-workspace"]
208
+ _IDE_ECLIPSE = [".project", ".settings", ".classpath"]
209
+ _IDE_NETBEANS = ["nbproject"]
210
+ _IDE_ATOM = [".atom"]
211
+ _IDE_VIM = ["*.swp", "*.swo"]
212
+ _IDE_XCODE = ["*.xcodeproj", "*.xcworkspace", "xcuserdata"]
213
+
214
+
215
+ # --- Enums and Data Structures ---
178
216
  class LanguagePreset(Enum):
179
- """Predefined sets of file extensions/names for common languages/frameworks."""
180
-
181
- PYTHON = [
182
- ".py",
183
- ".pyw",
184
- "setup.py",
185
- "requirements.txt",
186
- "Pipfile",
187
- "pyproject.toml",
217
+ """Provides an extensive list of presets for common language file extensions and key project files."""
218
+
219
+ PYTHON = _PYTHON_BASE
220
+ JAVASCRIPT = _JAVASCRIPT_BASE
221
+ JAVA = _JAVA_BASE
222
+ KOTLIN = _KOTLIN_BASE
223
+ C_CPP = _C_CPP_BASE
224
+ C_SHARP = _CSHARP_BASE
225
+ GO = [".go", "go.mod", "go.sum"]
226
+ RUST = _RUST_BASE
227
+ RUBY = _RUBY_BASE
228
+ PHP = _PHP_BASE
229
+ SWIFT = _SWIFT_BASE
230
+ OBJECTIVE_C = _OBJECTIVE_C_BASE
231
+ DART = _DART_BASE
232
+ LUA = _LUA_BASE
233
+ PERL = [".pl", ".pm", ".t"]
234
+ R_LANG = _R_LANG_BASE
235
+ SCALA = _SCALA_BASE
236
+ GROOVY = [".groovy", ".gvy", ".gy", ".gsh"]
237
+ HASKELL = [".hs", ".lhs", "cabal.project"]
238
+ JULIA = [".jl"]
239
+ ZIG = [".zig", "build.zig"]
240
+ NIM = [".nim", ".nimble"]
241
+ ELIXIR = _ELIXIR_BASE
242
+ CLOJURE = [".clj", ".cljs", ".cljc", "project.clj", "deps.edn"]
243
+ F_SHARP = [".fs", ".fsi", ".fsx"]
244
+ OCAML = [".ml", ".mli", "dune-project"]
245
+ ELM = [".elm", "elm.json"]
246
+ PURE_SCRIPT = [".purs", "spago.dhall"]
247
+ COMMON_LISP = [".lisp", ".cl", ".asd"]
248
+ SCHEME = [".scm", ".ss"]
249
+ RACKET = [".rkt"]
250
+ WEB_FRONTEND = [".html", ".htm", ".css", ".scss", ".sass", ".less", ".styl"]
251
+ REACT = _JAVASCRIPT_BASE
252
+ NODE_JS = _JAVASCRIPT_BASE
253
+ EXPRESS_JS = _JAVASCRIPT_BASE
254
+ NEST_JS = _JAVASCRIPT_BASE + ["nest-cli.json"]
255
+ VUE = _JAVASCRIPT_BASE + [".vue", "vue.config.js"]
256
+ ANGULAR = _JAVASCRIPT_BASE + ["angular.json"]
257
+ SVELTE = _JAVASCRIPT_BASE + [".svelte", "svelte.config.js"]
258
+ EMBER = _JAVASCRIPT_BASE + ["ember-cli-build.js"]
259
+ PUG = [".pug", ".jade"]
260
+ HANDLEBARS = [".hbs", ".handlebars"]
261
+ EJS = [".ejs"]
262
+ DJANGO = _PYTHON_BASE + ["manage.py", "wsgi.py", "asgi.py", ".jinja", ".jinja2"]
263
+ FLASK = _PYTHON_BASE + ["app.py", "wsgi.py"]
264
+ RAILS = _RUBY_BASE + ["routes.rb", ".erb", ".haml", ".slim", "config.ru"]
265
+ LARAVEL = _PHP_BASE + [".blade.php", "artisan"]
266
+ SYMFONY = _PHP_BASE + ["symfony.lock"]
267
+ PHOENIX = _ELIXIR_BASE
268
+ SPRING = _JAVA_BASE + ["application.properties", "application.yml"]
269
+ ASP_NET = _CSHARP_BASE + ["*.cshtml", "*.vbhtml", "*.razor"]
270
+ ROCKET_RS = _RUST_BASE + ["Rocket.toml"]
271
+ ACTIX_WEB = _RUST_BASE
272
+ IOS_NATIVE = (
273
+ _SWIFT_BASE
274
+ + _OBJECTIVE_C_BASE
275
+ + [".storyboard", ".xib", "Info.plist", ".pbxproj"]
276
+ )
277
+ ANDROID_NATIVE = _JAVA_BASE + _KOTLIN_BASE + ["AndroidManifest.xml", ".xml"]
278
+ FLUTTER = _DART_BASE
279
+ REACT_NATIVE = _JAVASCRIPT_BASE + ["app.json"]
280
+ XAMARIN = _CSHARP_BASE + [".xaml"]
281
+ DOTNET_MAUI = XAMARIN
282
+ NATIVESCRIPT = _JAVASCRIPT_BASE + ["nativescript.config.ts"]
283
+ UNITY = _CSHARP_BASE + [".unity", ".prefab", ".asset", ".mat", ".unitypackage"]
284
+ UNREAL_ENGINE = _C_CPP_BASE + [".uproject", ".uasset", ".ini"]
285
+ GODOT = [".gd", ".tscn", ".tres", "project.godot"]
286
+ LOVE2D = _LUA_BASE + ["conf.lua", "main.lua"]
287
+ MONOGAME = _CSHARP_BASE + [".mgcb"]
288
+ DOCKER = ["Dockerfile", ".dockerignore", "docker-compose.yml"]
289
+ TERRAFORM = [".tf", ".tfvars", ".tf.json"]
290
+ ANSIBLE = ["ansible.cfg", "inventory.ini"]
291
+ PULUMI = ["Pulumi.yaml"]
292
+ CHEF = _RUBY_BASE
293
+ PUPPET = [".pp"]
294
+ VAGRANT = ["Vagrantfile"]
295
+ GITHUB_ACTIONS = [".yml", ".yaml"]
296
+ GITLAB_CI = [".gitlab-ci.yml"]
297
+ JENKINS = ["Jenkinsfile"]
298
+ CIRCLE_CI = ["config.yml"]
299
+ KUBERNETES = [".yml", ".yaml"]
300
+ BICEP = [".bicep"]
301
+ CLOUDFORMATION = [".json", ".yml"]
302
+ DATA_SCIENCE_NOTEBOOKS = [".ipynb", ".Rmd"]
303
+ SQL = [".sql", ".ddl", ".dml"]
304
+ APACHE_SPARK = list(set(_SCALA_BASE + _PYTHON_BASE + _JAVA_BASE + _R_LANG_BASE))
305
+ ML_CONFIG = ["params.yaml"]
306
+ ELECTRON = _JAVASCRIPT_BASE
307
+ TAURI = _RUST_BASE + ["tauri.conf.json"]
308
+ QT = _C_CPP_BASE + [".pro", ".ui", ".qml"]
309
+ GTK = _C_CPP_BASE + [".ui", "meson.build"]
310
+ WPF = _CSHARP_BASE + [".xaml"]
311
+ WINDOWS_FORMS = _CSHARP_BASE
312
+ BASH = [".sh", ".bash"]
313
+ POWERSHELL = [".ps1", ".psm1"]
314
+ BATCH = [".bat", ".cmd"]
315
+ SOLIDITY = [".sol"]
316
+ VYPER = [".vy"]
317
+ VERILOG = [".v", ".vh"]
318
+ VHDL = [".vhd", ".vhdl"]
319
+ MARKUP = [".md", ".markdown", ".rst", ".adoc", ".asciidoc", ".tex", ".bib"]
320
+ CONFIGURATION = [
321
+ ".json",
322
+ ".xml",
323
+ ".yml",
324
+ ".yaml",
325
+ ".ini",
326
+ ".toml",
327
+ ".env",
328
+ ".conf",
329
+ ".cfg",
188
330
  ]
189
- JAVASCRIPT = [".js", ".jsx", ".ts", ".tsx", ".mjs", ".cjs"]
190
- WEB = [".html", ".css", ".scss", ".less"]
191
- JAVA = [".java", ".groovy", ".kt", ".gradle", ".properties"]
331
+ EDITOR_CONFIG = [".editorconfig"]
332
+ LICENSE = ["LICENSE", "LICENSE.md", "COPYING"]
333
+ CHANGELOG = ["CHANGELOG", "CHANGELOG.md"]
192
334
 
193
335
 
194
336
  class IgnorePreset(Enum):
195
- """Predefined sets of path components and filename substrings to ignore."""
196
-
197
- VERSION_CONTROL = [".git", ".svn", ".hg", ".idea"]
198
- NODE_MODULES = ["node_modules", "package-lock.json", "yarn.lock"]
199
- PYTHON_ENV = ["__pycache__", "venv", ".venv", "env", "lib", "bin"]
200
- BUILD_ARTIFACTS = ["dist", "build", "target", "out", "temp", "tmp"]
201
- TEST_FILES = ["test", "spec", "fixture", "example", "mock"]
202
-
203
-
204
- class TreeStylePreset(Enum):
205
- """Predefined character sets for directory tree rendering."""
206
-
207
- UNICODE = ("├── ", "└── ", "│ ", " ")
208
- ASCII = ("|-- ", "+-- ", "| ", " ")
209
- COMPACT = ("|---", "`---", "| ", " ")
210
-
211
- def to_style(self) -> "TreeStyle":
212
- return TreeStyle(self.value[0], self.value[1], self.value[2], self.value[3])
213
-
214
-
215
- class TreeStyle(NamedTuple):
216
- """Holds the characters used to render the directory tree."""
337
+ """Provides an extensive list of presets for common directories, files, and patterns to ignore."""
338
+
339
+ VERSION_CONTROL = [".git", ".svn", ".hg", ".bzr", ".gitignore", ".gitattributes"]
340
+ OS_FILES = [".DS_Store", "Thumbs.db", "desktop.ini", "ehthumbs.db"]
341
+ BUILD_ARTIFACTS = [
342
+ "dist",
343
+ "build",
344
+ "target",
345
+ "out",
346
+ "bin",
347
+ "obj",
348
+ "release",
349
+ "debug",
350
+ ]
351
+ LOGS = ["*.log", "logs", "npm-debug.log*", "yarn-debug.log*", "yarn-error.log*"]
352
+ TEMP_FILES = ["temp", "tmp", "*.tmp", "*~", "*.bak", "*.swp", "*.swo"]
353
+ SECRET_FILES = [
354
+ ".env",
355
+ "*.pem",
356
+ "*.key",
357
+ "credentials.json",
358
+ "*.p12",
359
+ "*.pfx",
360
+ "secrets.yml",
361
+ ".env.local",
362
+ ]
363
+ COMPRESSED_ARCHIVES = ["*.zip", "*.tar", "*.gz", "*.rar", "*.7z", "*.tgz"]
364
+ IDE_METADATA_VSCODE = _IDE_VSCODE
365
+ IDE_METADATA_JETBRAINS = _IDE_JETBRAINS
366
+ IDE_METADATA_SUBLIME = _IDE_SUBLIME
367
+ IDE_METADATA_ECLIPSE = _IDE_ECLIPSE
368
+ IDE_METADATA_NETBEANS = _IDE_NETBEANS
369
+ IDE_METADATA_ATOM = _IDE_ATOM
370
+ IDE_METADATA_VIM = _IDE_VIM
371
+ IDE_METADATA_XCODE = _IDE_XCODE
372
+ IDE_METADATA = list(
373
+ set(
374
+ _IDE_VSCODE
375
+ + _IDE_JETBRAINS
376
+ + _IDE_SUBLIME
377
+ + _IDE_ECLIPSE
378
+ + _IDE_NETBEANS
379
+ + _IDE_ATOM
380
+ + _IDE_VIM
381
+ + _IDE_XCODE
382
+ )
383
+ )
384
+ NODE_JS = [
385
+ "node_modules",
386
+ "package-lock.json",
387
+ "yarn.lock",
388
+ "pnpm-lock.yaml",
389
+ ".npm",
390
+ ]
391
+ PYTHON = [
392
+ "__pycache__",
393
+ "venv",
394
+ ".venv",
395
+ "env",
396
+ "lib",
397
+ "lib64",
398
+ ".pytest_cache",
399
+ ".tox",
400
+ "*.pyc",
401
+ ".mypy_cache",
402
+ "htmlcov",
403
+ ".coverage",
404
+ ]
405
+ RUBY = ["vendor/bundle", ".bundle", "Gemfile.lock", ".gem", "coverage"]
406
+ PHP = ["vendor", "composer.lock"]
407
+ DOTNET = ["bin", "obj", "*.user", "*.suo"]
408
+ RUST = ["target", "Cargo.lock"]
409
+ GO = ["vendor", "go.sum"]
410
+ JAVA_MAVEN = ["target"]
411
+ JAVA_GRADLE = [".gradle", "build"]
412
+ ELIXIR = ["_build", "deps", "mix.lock"]
413
+ DART_FLUTTER = [".dart_tool", ".packages", "build", ".flutter-plugins"]
414
+ ELM = ["elm-stuff"]
415
+ HASKELL = ["dist-newstyle", ".stack-work"]
416
+ TESTING_REPORTS = ["coverage", "junit.xml", "lcov.info", ".nyc_output"]
417
+ STATIC_SITE_GENERATORS = ["_site", "public", "resources"]
418
+ CMS_UPLOADS = ["wp-content/uploads"]
419
+ TERRAFORM = [".terraform", "*.tfstate", "*.tfstate.backup", ".terraform.lock.hcl"]
420
+ JUPYTER_NOTEBOOKS = [".ipynb_checkpoints"]
421
+ ANDROID = [".gradle", "build", "local.properties", "*.apk", "*.aab", "captures"]
422
+ IOS = ["Pods", "Carthage", "DerivedData", "build"]
423
+ UNITY = [
424
+ "Library",
425
+ "Temp",
426
+ "Logs",
427
+ "UserSettings",
428
+ "MemoryCaptures",
429
+ "Assets/AssetStoreTools",
430
+ ]
431
+ UNREAL_ENGINE = ["Intermediate", "Saved", "DerivedDataCache", ".vs"]
432
+ GODOT_ENGINE = [".import", "export_presets.cfg"]
433
+ SERVERLESS_FRAMEWORK = [".serverless"]
434
+ AWS = [".aws-sam"]
435
+ VERCEL = [".vercel"]
436
+ NETLIFY = [".netlify"]
437
+ MACOS = [
438
+ ".DS_Store",
439
+ ".AppleDouble",
440
+ ".LSOverride",
441
+ "._*",
442
+ ".Spotlight-V100",
443
+ ".Trashes",
444
+ ]
445
+ WINDOWS = ["Thumbs.db", "ehthumbs.db", "$RECYCLE.BIN/", "Desktop.ini"]
446
+ DEPRECATED_DEPENDENCIES = ["bower_components"]
217
447
 
218
- t_connector: str
219
- l_connector: str
220
- v_connector: str
221
- h_spacer: str
222
448
 
449
+ class FileToProcess(NamedTuple):
450
+ """Represents a file that needs to be processed and included in the output."""
223
451
 
224
- # --- Helper Data Structures ---
452
+ absolute_path: Path
453
+ relative_path_posix: str
225
454
 
226
455
 
227
456
  @dataclass
228
457
  class FilterCriteria:
229
- """Holds normalized filter criteria for files and directories."""
458
+ """Holds the combined filter criteria for scanning files and directories."""
230
459
 
231
460
  file_extensions: Set[str] = field(default_factory=set)
232
- exact_filenames: Set[str] = field(default_factory=set)
233
- whitelist_fname_substrings: Set[str] = field(default_factory=set)
234
- ignore_fname_substrings: Set[str] = field(default_factory=set)
235
- ignore_path_components: Set[str] = field(default_factory=set)
461
+ ignore_if_in_path: Set[str] = field(default_factory=set)
462
+ ignore_extensions: Set[str] = field(default_factory=set)
463
+ specific_files: Set[str] = field(default_factory=set)
464
+ case_sensitive: bool = False
236
465
 
237
466
  @classmethod
238
467
  def normalize_inputs(
239
468
  cls,
240
- file_types: Optional[List[str]],
241
- whitelist_substrings: Optional[List[str]],
242
- ignore_filename_substrings: Optional[List[str]],
243
- ignore_path_components_list: Optional[List[str]],
244
- language_presets: Optional[List[LanguagePreset]] = None,
469
+ file_types: Optional[List[str]] = None,
470
+ ignore_if_in_path: Optional[List[str]] = None,
471
+ ignore_extensions: Optional[List[str]] = None,
472
+ lang_presets: Optional[List[LanguagePreset]] = None,
245
473
  ignore_presets: Optional[List[IgnorePreset]] = None,
474
+ files: Optional[List[str]] = None,
475
+ case_sensitive: bool = False,
246
476
  ) -> "FilterCriteria":
247
- all_file_types, all_ignore_paths, all_ignore_fnames = (
248
- set(file_types or []),
249
- set(ignore_path_components_list or []),
250
- set(ignore_filename_substrings or []),
251
- )
252
- if language_presets:
253
- for preset in language_presets:
254
- all_file_types.update(preset.value)
255
- if ignore_presets:
256
- for preset in ignore_presets:
257
- all_ignore_paths.update(preset.value)
258
- all_ignore_fnames.update(preset.value)
259
- norm_exts, norm_exact_fnames = set(), set()
260
- for ft in all_file_types:
261
- ft_lower = ft.lower().strip()
262
- if ft_lower.startswith("."):
263
- norm_exts.add(ft_lower)
264
- elif ft_lower:
265
- norm_exact_fnames.add(ft_lower)
477
+ """
478
+ Consolidates various filter inputs into a single FilterCriteria object.
479
+
480
+ Args:
481
+ file_types (list, optional): A list of file extensions to include.
482
+ ignore_if_in_path (list, optional): A list of directory/file substring names to ignore.
483
+ ignore_extensions (list, optional): A list of file extensions to ignore.
484
+ lang_presets (list, optional): A list of LanguagePreset enums.
485
+ ignore_presets (list, optional): A list of IgnorePreset enums.
486
+ files (list, optional): A list of specific filenames to include.
487
+ case_sensitive (bool): If True, filters are case sensitive.
488
+
489
+ Returns:
490
+ FilterCriteria: An object containing the combined sets of filters.
491
+ """
492
+
493
+ def clean(s):
494
+ s = s.strip()
495
+ return s if case_sensitive else s.lower()
496
+
497
+ all_exts = {clean(ft) for ft in file_types or []}
498
+ all_ignore_paths = {clean(ip) for ip in ignore_if_in_path or []}
499
+ all_ignore_exts = {clean(ie) for ie in ignore_extensions or []}
500
+ all_specific_files = {clean(f) for f in files or []}
501
+
502
+ for p in lang_presets or []:
503
+ for item in p.value:
504
+ all_exts.add(clean(item))
505
+
506
+ for p in ignore_presets or []:
507
+ for item in p.value:
508
+ all_ignore_paths.add(clean(item))
509
+
266
510
  return cls(
267
- file_extensions=norm_exts,
268
- exact_filenames=norm_exact_fnames,
269
- whitelist_fname_substrings=(
270
- set(s.lower() for s in whitelist_substrings if s.strip())
271
- if whitelist_substrings
272
- else set()
273
- ),
274
- ignore_fname_substrings=set(
275
- s.lower() for s in all_ignore_fnames if s.strip()
276
- ),
277
- ignore_path_components=set(
278
- d.lower() for d in all_ignore_paths if d.strip()
279
- ),
511
+ file_extensions=all_exts,
512
+ ignore_if_in_path=all_ignore_paths,
513
+ ignore_extensions=all_ignore_exts,
514
+ specific_files=all_specific_files,
515
+ case_sensitive=case_sensitive,
280
516
  )
281
517
 
282
518
 
283
- class FileToProcess(NamedTuple):
284
- """Represents a file selected for content processing."""
519
+ # --- Core Logic Functions ---
520
+ def _discover_files(
521
+ root_dir: Path, criteria: FilterCriteria, progress: Any, task_id: Any
522
+ ) -> List[Path]:
523
+ """
524
+ Recursively scans a directory to find all files matching the criteria.
285
525
 
286
- absolute_path: Path
287
- relative_path_posix: str
526
+ Args:
527
+ root_dir (Path): The directory to start the scan from.
528
+ criteria (FilterCriteria): The filtering criteria to apply.
529
+ progress (Any): The progress bar object (from rich or fallback).
530
+ task_id (Any): The ID of the progress bar task to update.
288
531
 
532
+ Returns:
533
+ List[Path]: A list of absolute paths to the candidate files.
534
+ """
535
+ candidate_files, dirs_scanned = [], 0
289
536
 
290
- # --- Helper Functions ---
537
+ def recursive_scan(current_path: Path):
538
+ nonlocal dirs_scanned
539
+ try:
540
+ for entry in os.scandir(current_path):
541
+ # Path relative to the project root, used for substring check in path
542
+ # We use string representation for the check
543
+ rel_path = Path(entry.path).relative_to(root_dir)
544
+ rel_path_str = str(rel_path)
545
+ entry_name = entry.name
546
+
547
+ # Normalize for case check
548
+ if not criteria.case_sensitive:
549
+ rel_path_str = rel_path_str.lower()
550
+ entry_name = entry_name.lower()
551
+
552
+ # Ignore Logic: Substring matching in the path
553
+ # If any ignore string is a substring of the relative path, skip it.
554
+ if any(
555
+ ignored in rel_path_str for ignored in criteria.ignore_if_in_path
556
+ ):
557
+ continue
291
558
 
559
+ if entry.is_dir():
560
+ recursive_scan(Path(entry.path))
561
+ dirs_scanned += 1
562
+ if progress:
563
+ progress.update(
564
+ task_id,
565
+ completed=dirs_scanned,
566
+ description=f"Discovering files in [cyan]{entry.name}[/cyan]",
567
+ )
568
+ elif entry.is_file():
569
+ # Specific File Inclusion
570
+ if (
571
+ criteria.specific_files
572
+ and entry_name not in criteria.specific_files
573
+ ):
574
+ continue
292
575
 
293
- def validate_root_directory(root_dir_param: Optional[str]) -> Optional[Path]:
294
- original_param_for_messaging = (
295
- root_dir_param if root_dir_param else "current working directory"
296
- )
297
- try:
298
- resolved_path = Path(root_dir_param or Path.cwd()).resolve(strict=True)
299
- except Exception as e:
300
- print(
301
- f"Error: Could not resolve root directory '{original_param_for_messaging}': {e}"
302
- )
303
- return None
304
- if not resolved_path.is_dir():
305
- print(f"Error: Root path '{resolved_path}' is not a directory.")
306
- return None
307
- return resolved_path
576
+ # Extension filtering
577
+ file_ext = Path(entry.path).suffix
578
+ if not criteria.case_sensitive:
579
+ file_ext = file_ext.lower()
308
580
 
581
+ if (
582
+ criteria.ignore_extensions
583
+ and file_ext in criteria.ignore_extensions
584
+ ):
585
+ continue
586
+
587
+ # Inclusion Logic
588
+ # Include if no inclusion filters are set OR ext is allowed OR file is specifically allowed
589
+ if (
590
+ not criteria.file_extensions
591
+ or file_ext in criteria.file_extensions
592
+ or (
593
+ criteria.specific_files
594
+ and entry_name in criteria.specific_files
595
+ )
596
+ ):
597
+ candidate_files.append(Path(entry.path))
309
598
 
310
- def _should_include_entry(
311
- entry_path: Path,
312
- root_dir: Path,
313
- criteria: FilterCriteria,
314
- is_dir: bool,
315
- log_func: Optional[Callable[[str], None]] = None,
316
- ) -> bool:
317
- try:
318
- relative_path = entry_path.relative_to(root_dir)
319
- except ValueError:
320
- return False
321
- entry_name_lower = entry_path.name.lower()
322
- if criteria.ignore_path_components and any(
323
- part.lower() in criteria.ignore_path_components for part in relative_path.parts
324
- ):
325
- return False
326
- if is_dir:
327
- return True
328
- file_ext_lower = entry_path.suffix.lower()
329
- matched_type = (file_ext_lower in criteria.file_extensions) or (
330
- entry_name_lower in criteria.exact_filenames
331
- )
332
- if not criteria.file_extensions and not criteria.exact_filenames:
333
- matched_type = True
334
- if not matched_type:
335
- return False
336
- if criteria.whitelist_fname_substrings and not any(
337
- sub in entry_name_lower for sub in criteria.whitelist_fname_substrings
338
- ):
339
- return False
340
- if criteria.ignore_fname_substrings and any(
341
- sub in entry_name_lower for sub in criteria.ignore_fname_substrings
342
- ):
343
- return False
344
- return True
599
+ except (PermissionError, FileNotFoundError):
600
+ pass
601
+
602
+ recursive_scan(root_dir)
603
+ return candidate_files
345
604
 
346
605
 
347
606
  def process_file_for_search(
348
607
  file_path: Path,
349
- normalized_keywords: List[str],
350
- search_file_contents: bool,
351
- full_path_compare: bool,
608
+ keywords: List[str],
609
+ search_content: bool,
610
+ full_path: bool,
611
+ activity: Dict,
612
+ read_binary_files: bool,
613
+ case_sensitive: bool,
352
614
  ) -> Optional[Path]:
353
615
  """
354
- Checks a single file for keyword matches. Skips content search for binary files.
355
- """
356
- compare_target = str(file_path) if full_path_compare else file_path.name
357
- if any(key in compare_target.lower() for key in normalized_keywords):
358
- return file_path
616
+ Processes a single file to see if it matches the search criteria.
359
617
 
360
- if search_file_contents:
361
- # Before reading content, check if it's a known binary file type
362
- if file_path.suffix.lower() in BINARY_FILE_EXTENSIONS:
363
- return None # Do not attempt to read binary file content
618
+ A match can occur if a keyword is found in the filename or, if enabled,
619
+ within the file's content.
620
+ """
621
+ thread_id = threading.get_ident()
622
+ activity[thread_id] = file_path.name
623
+ try:
624
+ compare_target = str(file_path) if full_path else file_path.name
625
+
626
+ if not case_sensitive:
627
+ compare_target = compare_target.lower()
628
+ # Keywords should already be normalized by the caller if not case_sensitive
629
+
630
+ if any(key in compare_target for key in keywords):
631
+ return file_path
364
632
 
365
- try:
366
- with open(str(file_path), "r", encoding="utf-8", errors="ignore") as f:
367
- for line in f:
368
- if any(key in line.lower() for key in normalized_keywords):
369
- return file_path
370
- except (IOError, OSError):
371
- pass # Ignore files that can't be opened
372
- return None
373
-
374
-
375
- def _calculate_total_stats(
376
- root_dir: Path, criteria: FilterCriteria
377
- ) -> Dict[Path, Tuple[int, int]]:
378
- stats: Dict[Path, Tuple[int, int]] = {}
379
- for dirpath_str, dirnames, filenames in os.walk(str(root_dir), topdown=True):
380
- current_dir = Path(dirpath_str)
381
- all_children = [current_dir / d for d in dirnames] + [
382
- current_dir / f for f in filenames
383
- ]
384
- total_files, total_dirs = 0, 0
385
- for child_path in all_children:
633
+ if search_content and (
634
+ read_binary_files or file_path.suffix.lower() not in BINARY_FILE_EXTENSIONS
635
+ ):
386
636
  try:
387
- is_dir = child_path.is_dir()
637
+ with file_path.open("r", encoding="utf-8", errors="ignore") as f:
638
+ for line in f:
639
+ if not case_sensitive:
640
+ line = line.lower()
641
+ if any(key in line for key in keywords):
642
+ return file_path
388
643
  except OSError:
389
- continue
390
- if criteria.ignore_path_components:
391
- try:
392
- relative_path = child_path.relative_to(root_dir)
393
- except ValueError:
394
- continue
395
- if any(
396
- part.lower() in criteria.ignore_path_components
397
- for part in relative_path.parts
398
- ):
399
- continue
400
- if is_dir:
401
- total_dirs += 1
402
- else:
403
- total_files += 1
404
- stats[current_dir] = (total_files, total_dirs)
405
- dirnames[:] = [
406
- d
407
- for d in dirnames
408
- if (current_dir / d).name.lower() not in criteria.ignore_path_components
409
- ]
410
- return stats
644
+ pass
645
+ return None
646
+ finally:
647
+ activity[thread_id] = ""
411
648
 
412
649
 
413
- # --- Tree Generation Functions ---
650
+ def _process_files_concurrently(
651
+ files: List[Path],
652
+ keywords: List[str],
653
+ search_content: bool,
654
+ full_path: bool,
655
+ max_workers: Optional[int],
656
+ progress: Any,
657
+ task_id: Any,
658
+ read_binary_files: bool,
659
+ case_sensitive: bool,
660
+ ) -> Set[Path]:
661
+ """
662
+ Uses a thread pool to process a list of files for search matches concurrently.
663
+ """
664
+ matched_files, thread_activity = set(), {}
665
+
666
+ # Normalize keywords once if case insensitive
667
+ if not case_sensitive:
668
+ keywords = [k.lower() for k in keywords]
669
+
670
+ with ThreadPoolExecutor(
671
+ max_workers=max_workers or (os.cpu_count() or 1) + 4,
672
+ thread_name_prefix="scanner",
673
+ ) as executor:
674
+ future_to_file = {
675
+ executor.submit(
676
+ process_file_for_search,
677
+ f,
678
+ keywords,
679
+ search_content,
680
+ full_path,
681
+ thread_activity,
682
+ read_binary_files,
683
+ case_sensitive,
684
+ ): f
685
+ for f in files
686
+ }
687
+ for future in as_completed(future_to_file):
688
+ if progress:
689
+ active_threads = {
690
+ f"T{str(tid)[-3:]}": name
691
+ for tid, name in thread_activity.items()
692
+ if name
693
+ }
694
+ progress.update(
695
+ task_id,
696
+ advance=1,
697
+ description=f"Processing [yellow]{len(active_threads)} threads[/yellow]",
698
+ )
699
+ if RICH_AVAILABLE:
700
+ status_panel = Panel(
701
+ Text(
702
+ "\n".join(
703
+ f"[bold cyan]{k}[/]: {v}"
704
+ for k, v in active_threads.items()
705
+ )
706
+ ),
707
+ border_style="dim",
708
+ title="[dim]Thread Activity",
709
+ )
710
+ progress.update(task_id, status=status_panel)
711
+ if result := future.result():
712
+ matched_files.add(result)
713
+ if progress and RICH_AVAILABLE:
714
+ progress.update(task_id, status="[bold green]Done![/bold green]")
715
+ return matched_files
414
716
 
415
717
 
416
- def _generate_tree_lines(
417
- root_dir: Path, criteria: FilterCriteria, style: TreeStyle, show_stats: bool
718
+ def _generate_tree_with_stats(
719
+ root_dir: Path, file_paths: List[Path], show_stats: bool
418
720
  ) -> List[str]:
419
- """Generates a list of strings representing the directory tree based on criteria, style, and stats."""
420
- dir_stats: Optional[Dict[Path, Tuple[int, int]]] = (
421
- _calculate_total_stats(root_dir, criteria) if show_stats else None
422
- )
423
- tree_lines: List[str] = []
721
+ """Generates a directory tree structure from a list of file paths."""
722
+ tree_dict: Dict[str, Any] = {}
723
+ for path in file_paths:
724
+ level = tree_dict
725
+ for part in path.relative_to(root_dir).parts:
726
+ level = level.setdefault(part, {})
424
727
 
425
- def format_dir_name(
426
- path: Path, path_name: str, included_files: int, included_dirs: int
427
- ) -> str:
428
- if not show_stats or not dir_stats:
429
- return path_name
430
- total_files, total_dirs = dir_stats.get(path, (0, 0))
728
+ def count_children(d: Dict) -> Tuple[int, int]:
729
+ files = sum(1 for v in d.values() if not v)
730
+ dirs = len(d) - files
731
+ return files, dirs
431
732
 
432
- stats_str = f" [I: {included_files}f, {included_dirs}d | T: {total_files}f, {total_dirs}d]"
433
- return path_name + stats_str
733
+ lines = []
734
+ style = ("├── ", "└── ", "│ ", " ")
434
735
 
435
- def _recursive_build(current_path: Path, prefix_parts: List[str]):
436
- try:
437
- entries = sorted(current_path.iterdir(), key=lambda p: p.name.lower())
438
- except OSError as e:
439
- error_prefix = "".join(prefix_parts) + style.l_connector
440
- tree_lines.append(
441
- error_prefix + f"[Error accessing: {current_path.name} - {e.strerror}]"
442
- )
443
- return
444
- displayable_children: List[Tuple[Path, bool]] = []
445
- for e in entries:
446
- try:
447
- is_dir = e.is_dir()
448
- except OSError:
449
- continue
450
- if _should_include_entry(
451
- e, root_dir, criteria, is_dir=is_dir, log_func=None
452
- ):
453
- displayable_children.append((e, is_dir))
454
- num_children = len(displayable_children)
455
- included_files_in_level = sum(
456
- 1 for _, is_dir in displayable_children if not is_dir
457
- )
458
- included_dirs_in_level = sum(1 for _, is_dir in displayable_children if is_dir)
459
- if not prefix_parts:
460
- tree_lines.append(
461
- format_dir_name(
462
- current_path,
463
- current_path.name,
464
- included_files_in_level,
465
- included_dirs_in_level,
466
- )
467
- )
468
- for i, (child_path, child_is_dir) in enumerate(displayable_children):
469
- is_last = i == num_children - 1
470
- connector = style.l_connector if is_last else style.t_connector
471
- entry_name = child_path.name
472
- if child_is_dir:
473
- try:
474
- child_entries = sorted(
475
- child_path.iterdir(), key=lambda p: p.name.lower()
476
- )
477
- child_displayable_children = [
478
- (e, e.is_dir())
479
- for e in child_entries
480
- if _should_include_entry(
481
- e, root_dir, criteria, is_dir=e.is_dir(), log_func=None
482
- )
483
- ]
484
- child_included_files = sum(
485
- 1 for _, is_dir in child_displayable_children if not is_dir
486
- )
487
- child_included_dirs = sum(
488
- 1 for _, is_dir in child_displayable_children if is_dir
489
- )
490
- entry_name = format_dir_name(
491
- child_path,
492
- child_path.name,
493
- child_included_files,
494
- child_included_dirs,
495
- )
496
- except OSError:
497
- pass
498
- tree_lines.append("".join(prefix_parts) + connector + entry_name)
499
- if child_is_dir:
500
- new_prefix_parts = prefix_parts + [
501
- style.h_spacer if is_last else style.v_connector
502
- ]
503
- _recursive_build(child_path, new_prefix_parts)
736
+ def build_lines_recursive(d: Dict, prefix: str = ""):
737
+ items = sorted(d.keys(), key=lambda k: (not d[k], k.lower()))
738
+ for i, name in enumerate(items):
739
+ is_last = i == len(items) - 1
740
+ connector = style[1] if is_last else style[0]
741
+ display_name = name
504
742
 
505
- _recursive_build(root_dir, [])
506
- return tree_lines
743
+ if d[name]:
744
+ if show_stats:
745
+ files, dirs = count_children(d[name])
746
+ display_name += f" [dim][M: {files}f, {dirs}d][/dim]"
507
747
 
748
+ lines.append(f"{prefix}{connector}{display_name}")
508
749
 
509
- def _generate_tree_from_paths(
510
- root_dir: Path, file_paths: List[Path], style: TreeStyle, show_stats: bool
511
- ) -> List[str]:
512
- """Generates a directory tree structure from a list of *matched* file paths using the given style."""
513
- tree_dict: Dict[str, Any] = {}
514
- matched_paths = {p.relative_to(root_dir) for p in file_paths}
515
- for rel_path in matched_paths:
516
- parts = rel_path.parts
517
- current_level = tree_dict
518
- for part in parts:
519
- current_level = current_level.setdefault(part, {})
520
- tree_lines: List[str] = []
521
-
522
- def format_dir_name_search(name: str, matched_files: int, matched_dirs: int) -> str:
523
- if not show_stats:
524
- return name
525
-
526
- stats_str = f" [M: {matched_files}f, {matched_dirs}d]"
527
- return name + stats_str
528
-
529
- def build_lines(d: Dict[str, Any], prefix: str):
530
- items = sorted(d.keys(), key=lambda k: (len(d[k]) == 0, k.lower()))
531
- num_children = len(items)
532
- matched_files_in_level = sum(1 for k in items if not d[k])
533
- matched_dirs_in_level = sum(1 for k in items if d[k])
534
- if not prefix:
535
- tree_lines.append(
536
- format_dir_name_search(
537
- root_dir.name, matched_files_in_level, matched_dirs_in_level
538
- )
539
- )
540
- for i, name in enumerate(items):
541
- is_last = i == num_children - 1
542
- connector = style.l_connector if is_last else style.t_connector
543
- entry_name = name
544
750
  if d[name]:
545
- child_matched_files = sum(1 for k in d[name] if not d[name][k])
546
- child_matched_dirs = sum(1 for k in d[name] if d[name][k])
547
- entry_name = format_dir_name_search(
548
- name, child_matched_files, child_matched_dirs
549
- )
550
- tree_lines.append(prefix + connector + entry_name)
551
- if d[name]:
552
- extension = style.h_spacer if is_last else style.v_connector
553
- build_lines(d[name], prefix + extension)
751
+ extension = style[3] if is_last else style[2]
752
+ build_lines_recursive(d[name], prefix + extension)
554
753
 
555
- build_lines(tree_dict, "")
556
- return tree_lines
754
+ root_name = f"[bold cyan]{root_dir.name}[/bold cyan]"
755
+ if show_stats:
756
+ files, dirs = count_children(tree_dict)
757
+ root_name += f" [dim][M: {files}f, {dirs}d][/dim]"
758
+ lines.append(root_name)
557
759
 
558
-
559
- # --- Collation and Main Modes ---
760
+ build_lines_recursive(tree_dict)
761
+ return lines
560
762
 
561
763
 
562
764
  def _collate_content_to_file(
563
- output_file_path_str: str,
564
- tree_content_lines: Optional[List[str]],
565
- files_to_process: List[FileToProcess],
566
- encoding: str,
567
- separator_char: str,
568
- separator_line_len: int,
569
- show_token_count: bool,
765
+ output_path: Path,
766
+ tree_lines: List,
767
+ files: List[FileToProcess],
570
768
  show_tree_stats: bool,
571
- mode: ProjectMode,
572
- ) -> None:
573
- """
574
- Collates content to a string buffer, calculates token count,
575
- and then writes to the output file with a progress bar.
576
- """
577
- output_file_path = Path(output_file_path_str).resolve()
578
- output_file_path.parent.mkdir(parents=True, exist_ok=True)
579
- separator_line = separator_char * separator_line_len
580
-
581
- buffer = StringIO()
582
-
583
- if tree_content_lines:
584
- buffer.write(f"{TREE_HEADER_TEXT}\n{separator_line}\n\n")
585
- stats_key = ""
769
+ show_token_count: bool,
770
+ exclude_whitespace: bool,
771
+ progress: Any,
772
+ task_id: Any,
773
+ only_show_tree: bool,
774
+ ) -> Tuple[float, int]:
775
+ """Collates the file tree and file contents into a single output file."""
776
+ output_path.parent.mkdir(parents=True, exist_ok=True)
777
+ buffer, total_bytes, token_count = StringIO(), 0, 0
778
+
779
+ if tree_lines:
780
+ buffer.write(f"{TREE_HEADER_TEXT}\n" + "-" * 80 + "\n\n")
586
781
  if show_tree_stats:
587
- if mode == ProjectMode.FILTER:
588
- stats_key = (
589
- "Key: [I: Included f/d | T: Total f/d in original dir]\n"
590
- " (f=files, d=directories)\n\n"
591
- )
592
- else:
593
- stats_key = (
594
- "Key: [M: Matched files/dirs]\n" " (f=files, d=directories)\n\n"
595
- )
596
- buffer.write(stats_key)
597
- buffer.write("\n".join(tree_content_lines) + "\n")
598
- buffer.write(f"\n{separator_line}\n\n")
599
-
600
- if not files_to_process:
601
- message = (
602
- "No files found matching the specified criteria.\n"
603
- if mode == ProjectMode.SEARCH
604
- else "No files found matching specified criteria for content aggregation.\n"
605
- )
606
- buffer.write(message)
607
- else:
608
-
609
- collation_bar = tqdm(
610
- files_to_process, desc="Phase 3: Collating files", unit="file", leave=False
611
- )
612
- for file_info in collation_bar:
613
- collation_bar.set_postfix_str(file_info.relative_path_posix, refresh=True)
614
- header_content = f"{separator_line}\n{FILE_HEADER_PREFIX}{file_info.relative_path_posix}\n{separator_line}\n\n"
615
- buffer.write(header_content)
616
- try:
617
- with open(
618
- file_info.absolute_path, "r", encoding=encoding, errors="replace"
619
- ) as infile:
620
- buffer.write(infile.read())
621
- buffer.write("\n\n")
622
- except Exception:
623
- buffer.write(
624
- f"Error: Could not read file '{file_info.relative_path_posix}'.\n\n"
625
- )
626
-
627
- final_content = buffer.getvalue()
628
- total_token_count = 0
629
- mode_display = "Characters" if TOKEN_APPROX_MODE == "CHAR_COUNT" else "Words"
630
-
631
- if show_token_count:
632
- if TOKEN_APPROX_MODE == "CHAR_COUNT":
633
- total_token_count = len(final_content)
634
- elif TOKEN_APPROX_MODE == "WORD_COUNT":
635
- total_token_count = len(final_content.split())
636
-
637
- try:
638
- with open(output_file_path, "w", encoding=encoding) as outfile:
639
- if show_token_count:
640
- outfile.write(f"Token Count ({mode_display}): {total_token_count}\n\n")
641
- outfile.write(final_content)
642
- except IOError as e:
643
- print(f"\nError: Could not write to output file '{output_file_path}': {e}")
644
- return
782
+ buffer.write(
783
+ "Key: [M: Matched files/dirs]\n (f=files, d=directories)\n\n"
784
+ )
645
785
 
646
- if mode == ProjectMode.SEARCH:
647
- if files_to_process:
648
- print("Success! Collation complete.")
649
- else:
650
- print(f"\nProcess complete. Output written to: {output_file_path}")
651
- if len(files_to_process) > 0:
652
- print(
653
- f"Summary: {len(files_to_process)} files selected for content processing."
786
+ if RICH_AVAILABLE:
787
+ content = "\n".join(Text.from_markup(line).plain for line in tree_lines)
788
+ else:
789
+ content = "\n".join(strip_markup(line) for line in tree_lines)
790
+ buffer.write(content + "\n\n")
791
+
792
+ if only_show_tree:
793
+ with output_path.open("w", encoding=DEFAULT_ENCODING) as outfile:
794
+ outfile.write(buffer.getvalue())
795
+ return total_bytes, token_count
796
+
797
+ for file_info in files:
798
+ if progress:
799
+ progress.update(
800
+ task_id,
801
+ advance=1,
802
+ description=f"Collating [green]{file_info.relative_path_posix}[/green]",
803
+ )
804
+ buffer.write(f"{'-'*80}\nFILE: {file_info.relative_path_posix}\n{'-'*80}\n\n")
805
+ try:
806
+ content = file_info.absolute_path.read_text(
807
+ encoding=DEFAULT_ENCODING, errors="replace"
654
808
  )
809
+ buffer.write(content + "\n\n")
810
+ total_bytes += len(content.encode(DEFAULT_ENCODING))
811
+ except Exception as e:
812
+ buffer.write(f"Error: Could not read file. Issue: {e}\n\n")
655
813
 
814
+ final_content = buffer.getvalue()
656
815
  if show_token_count:
657
- print(f"Total Approximated Tokens ({mode_display}): {total_token_count}")
658
-
659
-
660
- def filter_and_append_content(
661
- root_dir: Path,
662
- output_file_path_str: str,
663
- tree_style: TreeStyle,
664
- generate_tree: bool,
665
- file_types: Optional[List[str]],
666
- whitelist_substrings_in_filename: Optional[List[str]],
667
- ignore_substrings_in_filename: Optional[List[str]],
668
- ignore_dirs_in_path: Optional[List[str]],
669
- language_presets: Optional[List[LanguagePreset]],
670
- ignore_presets: Optional[List[IgnorePreset]],
671
- encoding: str,
672
- separator_char: str,
673
- separator_line_len: int,
674
- show_token_count: bool,
675
- show_tree_stats: bool,
676
- ) -> None:
677
- """FILTER MODE: Selects files based on explicit criteria and prepares content/tree."""
678
- criteria = FilterCriteria.normalize_inputs(
679
- file_types,
680
- whitelist_substrings_in_filename,
681
- ignore_substrings_in_filename,
682
- ignore_dirs_in_path,
683
- language_presets,
684
- ignore_presets,
685
- )
686
- tree_content_lines: Optional[List[str]] = (
687
- _generate_tree_lines(root_dir, criteria, tree_style, show_tree_stats)
688
- if generate_tree
689
- else None
690
- )
691
- files_to_process: List[FileToProcess] = []
692
-
693
- with tqdm(desc="Discovering files", unit="dir") as discovery_bar:
694
- for dirpath_str, dirnames, filenames in os.walk(str(root_dir), topdown=True):
695
- discovery_bar.update(1)
696
- discovery_bar.set_postfix_str(os.path.basename(dirpath_str), refresh=True)
697
-
698
- current_dir_path = Path(dirpath_str)
699
- orig_dirnames = list(dirnames)
700
- dirnames[:] = [
701
- d
702
- for d in orig_dirnames
703
- if _should_include_entry(
704
- current_dir_path / d, root_dir, criteria, is_dir=True
705
- )
706
- ]
816
+ content_for_count = (
817
+ re.sub(r"\s", "", final_content) if exclude_whitespace else final_content
818
+ )
819
+ token_count = len(content_for_count)
707
820
 
708
- for filename in filenames:
709
- file_abs_path = current_dir_path / filename
710
- if _should_include_entry(
711
- file_abs_path, root_dir, criteria, is_dir=False
712
- ):
713
- files_to_process.append(
714
- FileToProcess(
715
- file_abs_path,
716
- file_abs_path.relative_to(root_dir).as_posix(),
717
- )
718
- )
821
+ with output_path.open("w", encoding=DEFAULT_ENCODING) as outfile:
822
+ if show_token_count:
823
+ mode = "chars, no whitespace" if exclude_whitespace else "characters"
824
+ outfile.write(f"Token Count ({mode}): {token_count}\n\n")
825
+ outfile.write(final_content)
719
826
 
720
- files_to_process.sort(key=lambda f_info: f_info.relative_path_posix.lower())
721
- _collate_content_to_file(
722
- output_file_path_str,
723
- tree_content_lines,
724
- files_to_process,
725
- encoding,
726
- separator_char,
727
- separator_line_len,
728
- show_token_count,
729
- show_tree_stats,
730
- ProjectMode.FILTER,
731
- )
827
+ return total_bytes, token_count
732
828
 
733
829
 
734
- def search_and_collate_content(
735
- root_dir: Path,
736
- sub_string_match: List[str],
737
- output_file: str,
738
- tree_style: TreeStyle,
739
- file_extensions_to_check: Optional[List[str]],
740
- ignore_substrings_in_path: Optional[List[str]],
741
- language_presets: Optional[List[LanguagePreset]],
742
- ignore_presets: Optional[List[IgnorePreset]],
743
- search_file_contents: bool,
744
- max_workers: Optional[int],
745
- full_path_compare: bool,
746
- show_token_count: bool,
747
- show_tree_stats: bool,
830
+ # --- Main Entry Point ---
831
+ def generate_snapshot(
832
+ root_directory: str = ".",
833
+ output_file_name: str = "project_snapshot.txt",
834
+ search_keywords: Optional[List[str]] = None,
835
+ file_extensions: Optional[List[str]] = None,
836
+ ignore_if_in_path: Optional[List[str]] = None,
837
+ ignore_extensions: Optional[List[str]] = None,
838
+ language_presets: Optional[List[LanguagePreset]] = None,
839
+ ignore_presets: Optional[List[IgnorePreset]] = None,
840
+ search_file_contents: bool = True,
841
+ full_path_compare: bool = True,
842
+ max_workers: Optional[int] = None,
843
+ generate_tree: bool = True,
844
+ show_tree_stats: bool = False,
845
+ show_token_count: bool = False,
846
+ exclude_whitespace_in_token_count: bool = False,
847
+ read_binary_files: bool = False,
848
+ files: Optional[List[str]] = None,
849
+ only_show_tree: bool = False,
850
+ case_sensitive_filter: bool = False,
851
+ case_sensitive_search: bool = False,
748
852
  ) -> None:
749
- """SEARCH MODE: Scans for files that match a substring with multi-phase progress bars."""
853
+ """
854
+ Orchestrates the entire process of scanning, filtering, and collating project files.
855
+ """
856
+ console, start_time = ConsoleManager(), time.perf_counter()
857
+ root_dir = Path(root_directory or ".").resolve()
858
+ if not root_dir.is_dir():
859
+ console.log(f"Error: Root directory '{root_dir}' not found.", style="bold red")
860
+ return
861
+
862
+ # Normalize keywords for display/logic
863
+ keywords = [k.strip() for k in search_keywords or [] if k.strip()]
864
+ if not case_sensitive_search:
865
+ # We don't lower here for the variable passed to functions,
866
+ # but for consistent display in the table we might want to.
867
+ # However, logic downstream handles lowering if case_sensitive_search is False.
868
+ pass
869
+
870
+ snapshot_mode = not keywords
871
+
872
+ # Normalize filtering criteria
750
873
  criteria = FilterCriteria.normalize_inputs(
751
- file_extensions_to_check,
752
- None,
753
- None,
754
- ignore_substrings_in_path,
755
- language_presets,
756
- ignore_presets,
874
+ file_types=file_extensions,
875
+ ignore_if_in_path=ignore_if_in_path,
876
+ ignore_extensions=ignore_extensions,
877
+ lang_presets=language_presets,
878
+ ignore_presets=ignore_presets,
879
+ files=files,
880
+ case_sensitive=case_sensitive_filter,
757
881
  )
758
- normalized_keywords = [
759
- sub.lower().strip() for sub in sub_string_match if sub.strip()
760
- ]
761
- if not normalized_keywords:
762
- print("Error: Search mode requires 'search_keywords' to be provided.")
763
- return
764
882
 
765
- if criteria.ignore_path_components:
766
- print(
767
- f"Ignoring directories containing: {', '.join(sorted(list(criteria.ignore_path_components)))}"
883
+ config_rows = [
884
+ ["Root Directory", str(root_dir)],
885
+ ["File Types", ", ".join(criteria.file_extensions) or "All"],
886
+ ["Specific Files", ", ".join(criteria.specific_files) or "None"],
887
+ ["Ignore Paths", ", ".join(criteria.ignore_if_in_path) or "None"],
888
+ ["Ignore Extensions", ", ".join(criteria.ignore_extensions) or "None"],
889
+ ["Generate Tree", "[green]Yes[/green]" if generate_tree else "[red]No[/red]"],
890
+ ]
891
+ if generate_tree:
892
+ config_rows.append(
893
+ ["Tree Stats", "[green]Yes[/green]" if show_tree_stats else "[red]No[/red]"]
768
894
  )
769
-
770
- candidate_files: List[Path] = []
771
-
772
- with tqdm(desc="Phase 1: Discovering files", unit="dir") as discovery_bar:
773
- for dirpath_str, dirnames, filenames in os.walk(str(root_dir), topdown=True):
774
- discovery_bar.update(1)
775
- discovery_bar.set_postfix_str(os.path.basename(dirpath_str), refresh=True)
776
- current_dir_path = Path(dirpath_str)
777
- dirnames[:] = [
778
- d
779
- for d in dirnames
780
- if (current_dir_path / d).name.lower()
781
- not in criteria.ignore_path_components
895
+ config_rows.append(
896
+ [
897
+ "Show Token Count",
898
+ "[green]Yes[/green]" if show_token_count else "[red]No[/red]",
899
+ ]
900
+ )
901
+ if show_token_count:
902
+ config_rows.append(
903
+ [
904
+ "Exclude Whitespace",
905
+ (
906
+ "[green]Yes[/green]"
907
+ if exclude_whitespace_in_token_count
908
+ else "[red]No[/red]"
909
+ ),
782
910
  ]
783
-
784
- for filename in filenames:
785
- file_abs_path = current_dir_path / filename
786
- try:
787
- relative_parts = file_abs_path.relative_to(root_dir).parts
788
- if any(
789
- part.lower() in criteria.ignore_path_components
790
- for part in relative_parts
791
- ):
792
- continue
793
- except ValueError:
794
- continue
795
-
796
- if (
797
- not criteria.file_extensions
798
- or file_abs_path.suffix.lower() in criteria.file_extensions
799
- ):
800
- candidate_files.append(file_abs_path)
801
-
802
- print(f"Discovered {len(candidate_files)} candidate files to process.")
803
-
804
- if not candidate_files:
805
- print(
806
- "\nScan complete. No files matched the initial criteria (extensions and ignores)."
807
911
  )
808
- with open(output_file, "w", encoding=DEFAULT_ENCODING) as f_out:
809
- f_out.write("No files found matching the specified criteria.\n")
810
- return
811
-
812
- matched_files: Set[Path] = set()
813
- with ThreadPoolExecutor(max_workers=max_workers) as executor:
814
- future_to_file = {
815
- executor.submit(
816
- process_file_for_search,
817
- file,
818
- normalized_keywords,
819
- search_file_contents,
820
- full_path_compare,
821
- ): file
822
- for file in candidate_files
823
- }
824
912
 
825
- progress_bar_desc = f"Phase 2: Processing {len(candidate_files)} files"
826
- progress_bar = tqdm(
827
- as_completed(future_to_file),
828
- total=len(future_to_file),
829
- unit="file",
830
- desc=progress_bar_desc,
913
+ if snapshot_mode:
914
+ config_rows.insert(1, ["Mode", "[bold blue]Snapshot[/bold blue]"])
915
+ config_rows.append(
916
+ [
917
+ "Case Sensitive Filter",
918
+ "[green]Yes[/green]" if case_sensitive_filter else "[red]No[/red]",
919
+ ]
831
920
  )
832
-
833
- for future in progress_bar:
834
- result = future.result()
835
- if result:
836
- matched_files.add(result)
837
-
838
- if not matched_files:
839
- print(
840
- "\nScan complete. No matching files were found after processing keywords."
921
+ else:
922
+ config_rows.insert(1, ["Mode", "[bold yellow]Search[/bold yellow]"])
923
+ config_rows.insert(
924
+ 2, ["Search Keywords", f"[yellow]{', '.join(keywords)}[/yellow]"]
841
925
  )
842
- with open(output_file, "w", encoding=DEFAULT_ENCODING) as f_out:
843
- f_out.write("No files found matching the specified search keywords.\n")
844
- return
845
-
846
- sorted_matched_files = sorted(
847
- list(matched_files), key=lambda p: p.relative_to(root_dir).as_posix().lower()
848
- )
849
-
850
- print(f"Found {len(sorted_matched_files)} matching files.")
851
- print(f"Generating output file at '{Path(output_file).resolve()}'...")
852
-
853
- tree_content_lines = _generate_tree_from_paths(
854
- root_dir, sorted_matched_files, tree_style, show_tree_stats
855
- )
856
- files_to_process = [
857
- FileToProcess(f, f.relative_to(root_dir).as_posix())
858
- for f in sorted_matched_files
859
- ]
860
-
861
- _collate_content_to_file(
862
- output_file,
863
- tree_content_lines,
864
- files_to_process,
865
- DEFAULT_ENCODING,
866
- DEFAULT_SEPARATOR_CHAR,
867
- DEFAULT_SEPARATOR_LINE_LENGTH,
868
- show_token_count,
869
- show_tree_stats,
870
- ProjectMode.SEARCH,
871
- )
872
-
873
-
874
- # --- DECONSTRUCTION FUNCTION ---
875
-
876
-
877
- def deconstruct_snapshot(snapshot_file_path: str) -> Dict[str, Any]:
878
- """Scans a compiled snapshot file, extracts the directory tree lines and file paths."""
879
- snapshot_path = Path(snapshot_file_path)
880
- if not snapshot_path.is_file():
881
- raise FileNotFoundError(f"Snapshot file not found: {snapshot_file_path}")
882
- tree_lines: List[str] = []
883
- file_paths: List[str] = []
884
- separator_pattern = re.compile(
885
- r"^[{}]{{4,}}[{}|]*$".format(
886
- re.escape(DEFAULT_SEPARATOR_CHAR), re.escape(DEFAULT_SEPARATOR_CHAR)
926
+ config_rows.append(
927
+ [
928
+ "Search Content",
929
+ "[green]Yes[/green]" if search_file_contents else "[red]No[/red]",
930
+ ]
887
931
  )
888
- )
889
- state = "LOOKING_FOR_TREE"
890
- with open(snapshot_path, "r", encoding=DEFAULT_ENCODING, errors="replace") as f:
891
- for line in f:
892
- line = line.strip()
893
- if state == "LOOKING_FOR_TREE":
894
- if line == TREE_HEADER_TEXT:
895
- state = "READING_TREE"
896
- elif state == "READING_TREE":
897
- if not line or separator_pattern.match(line):
898
- if tree_lines and separator_pattern.match(line):
899
- state = "LOOKING_FOR_CONTENT"
900
- continue
901
- if state == "READING_TREE" and not line.startswith("Key:"):
902
- tree_lines.append(line)
903
- elif state == "LOOKING_FOR_CONTENT":
904
- if line.startswith(FILE_HEADER_PREFIX):
905
- file_paths.append(line[len(FILE_HEADER_PREFIX) :].strip())
906
- state = "READING_CONTENT"
907
- elif state == "READING_CONTENT":
908
- if line.startswith(FILE_HEADER_PREFIX):
909
- file_paths.append(line[len(FILE_HEADER_PREFIX) :].strip())
910
- # Post-process to remove the key lines if they were accidentally captured
911
- tree_lines = [
912
- line
913
- for line in tree_lines
914
- if not line.strip().startswith("Key:")
915
- and not line.strip().startswith("(f=files")
916
- ]
917
- return {"tree_lines": tree_lines, "file_paths": file_paths}
918
-
919
-
920
- # --- UNIFIED ENTRY POINT AND UTILITY WRAPPERS ---
921
-
922
-
923
- def process_project(
924
- root_dir_param: Optional[str] = None,
925
- output_file_name: str = "project_output.txt",
926
- mode: ProjectMode = ProjectMode.FILTER,
927
- file_types: Optional[List[str]] = None,
928
- ignore_dirs_in_path: Optional[List[str]] = None,
929
- language_presets: Optional[List[LanguagePreset]] = None,
930
- ignore_presets: Optional[List[IgnorePreset]] = None,
931
- whitelist_filename_substrings: Optional[List[str]] = None,
932
- ignore_filename_substrings: Optional[List[str]] = None,
933
- generate_tree: bool = True,
934
- search_keywords: Optional[List[str]] = None,
935
- search_file_contents: bool = False,
936
- full_path_compare: bool = True,
937
- max_workers: Optional[int] = None,
938
- tree_style_preset: TreeStylePreset = TreeStylePreset.UNICODE,
939
- tree_style_t_connector: Optional[str] = None,
940
- tree_style_l_connector: Optional[str] = None,
941
- tree_style_v_connector: Optional[str] = None,
942
- tree_style_h_spacer: Optional[str] = None,
943
- show_token_count: bool = False,
944
- show_tree_stats: bool = False,
945
- encoding: str = DEFAULT_ENCODING,
946
- separator_char: str = DEFAULT_SEPARATOR_CHAR,
947
- separator_line_len: int = DEFAULT_SEPARATOR_LINE_LENGTH,
948
- ) -> None:
949
- """Main function to process a project directory in either FILTER or SEARCH mode."""
950
- actual_root_dir = validate_root_directory(root_dir_param)
951
- if actual_root_dir is None:
952
- sys.exit(1)
953
- style = tree_style_preset.to_style()
954
- final_style = TreeStyle(
955
- t_connector=tree_style_t_connector or style.t_connector,
956
- l_connector=tree_style_l_connector or style.l_connector,
957
- v_connector=tree_style_v_connector or style.v_connector,
958
- h_spacer=tree_style_h_spacer or style.h_spacer,
959
- )
960
- print(f"--- Starting Project Processing in {mode.name} Mode ---")
961
- if mode == ProjectMode.FILTER:
962
- filter_and_append_content(
963
- actual_root_dir,
964
- output_file_name,
965
- final_style,
966
- generate_tree,
967
- file_types,
968
- whitelist_filename_substrings,
969
- ignore_filename_substrings,
970
- ignore_dirs_in_path,
971
- language_presets,
972
- ignore_presets,
973
- encoding,
974
- separator_char,
975
- separator_line_len,
976
- show_token_count,
977
- show_tree_stats,
932
+ config_rows.append(
933
+ [
934
+ "Read Binary Files",
935
+ "[green]Yes[/green]" if read_binary_files else "[red]No[/red]",
936
+ ]
978
937
  )
979
- elif mode == ProjectMode.SEARCH:
980
- if not search_keywords:
981
- print("Error: Search mode requires 'search_keywords' to be provided.")
982
- return
983
- search_and_collate_content(
984
- actual_root_dir,
985
- search_keywords,
986
- output_file_name,
987
- final_style,
988
- file_types,
989
- ignore_dirs_in_path,
990
- language_presets,
991
- ignore_presets,
992
- search_file_contents,
993
- max_workers,
994
- full_path_compare,
995
- show_token_count,
996
- show_tree_stats,
938
+ config_rows.append(
939
+ [
940
+ "Case Sensitive Search",
941
+ "[green]Yes[/green]" if case_sensitive_search else "[red]No[/red]",
942
+ ]
997
943
  )
998
- print("--- Script Execution Finished ---")
999
944
 
945
+ if only_show_tree:
946
+ config_rows.append(["Output Content", "[yellow]Tree Only[/yellow]"])
1000
947
 
1001
- def filter_project(
1002
- root_dir_param: Optional[str] = None,
1003
- output_file_name: str = "project_filter_output.txt",
1004
- file_types: Optional[List[str]] = None,
1005
- ignore_dirs_in_path: Optional[List[str]] = None,
1006
- language_presets: Optional[List[LanguagePreset]] = None,
1007
- ignore_presets: Optional[List[IgnorePreset]] = None,
1008
- whitelist_filename_substrings: Optional[List[str]] = None,
1009
- ignore_filename_substrings: Optional[List[str]] = None,
1010
- generate_tree: bool = True,
1011
- tree_style_preset: TreeStylePreset = TreeStylePreset.UNICODE,
1012
- tree_style_t_connector: Optional[str] = None,
1013
- tree_style_l_connector: Optional[str] = None,
1014
- tree_style_v_connector: Optional[str] = None,
1015
- tree_style_h_spacer: Optional[str] = None,
1016
- show_token_count: bool = False,
1017
- show_tree_stats: bool = False,
1018
- encoding: str = DEFAULT_ENCODING,
1019
- separator_char: str = DEFAULT_SEPARATOR_CHAR,
1020
- separator_line_len: int = DEFAULT_SEPARATOR_LINE_LENGTH,
1021
- ) -> None:
1022
- """Utility wrapper for process_project in FILTER mode."""
1023
- process_project(
1024
- root_dir_param=root_dir_param,
1025
- output_file_name=output_file_name,
1026
- mode=ProjectMode.FILTER,
1027
- file_types=file_types,
1028
- ignore_dirs_in_path=ignore_dirs_in_path,
1029
- language_presets=language_presets,
1030
- ignore_presets=ignore_presets,
1031
- whitelist_filename_substrings=whitelist_filename_substrings,
1032
- ignore_filename_substrings=ignore_filename_substrings,
1033
- generate_tree=generate_tree,
1034
- tree_style_preset=tree_style_preset,
1035
- tree_style_t_connector=tree_style_t_connector,
1036
- tree_style_l_connector=tree_style_l_connector,
1037
- tree_style_v_connector=tree_style_v_connector,
1038
- tree_style_h_spacer=tree_style_h_spacer,
1039
- show_token_count=show_token_count,
1040
- show_tree_stats=show_tree_stats,
1041
- encoding=encoding,
1042
- separator_char=separator_char,
1043
- separator_line_len=separator_line_len,
948
+ console.print_table(
949
+ "Project Scan Configuration", ["Parameter", "Value"], config_rows
1044
950
  )
1045
951
 
952
+ @contextmanager
953
+ def progress_manager():
954
+ if RICH_AVAILABLE:
955
+ progress = Progress(
956
+ TextColumn("[progress.description]{task.description}"),
957
+ BarColumn(),
958
+ TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
959
+ SpinnerColumn(),
960
+ TimeElapsedColumn(),
961
+ "{task.fields[status]}",
962
+ expand=True,
963
+ )
964
+ with Live(progress, console=console.console, refresh_per_second=10) as live:
965
+ yield progress
966
+ else:
967
+ with FallbackProgress() as progress:
968
+ yield progress
969
+
970
+ with progress_manager() as progress:
971
+ discover_task = progress.add_task("Discovering files", total=None, status="")
972
+ candidate_files = _discover_files(root_dir, criteria, progress, discover_task)
973
+ if RICH_AVAILABLE:
974
+ progress.update(
975
+ discover_task,
976
+ description=f"Discovered [bold green]{len(candidate_files)}[/bold green] candidates",
977
+ status="",
978
+ )
979
+ else:
980
+ progress.update(
981
+ discover_task,
982
+ description=f"Discovered {len(candidate_files)} candidates",
983
+ )
1046
984
 
1047
- def find_in_project(
1048
- root_dir_param: Optional[str] = None,
1049
- output_file_name: str = "project_search_output.txt",
1050
- search_keywords: Optional[List[str]] = None,
1051
- file_extensions_to_check: Optional[List[str]] = None,
1052
- ignore_dirs_in_path: Optional[List[str]] = None,
1053
- language_presets: Optional[List[LanguagePreset]] = None,
1054
- ignore_presets: Optional[List[IgnorePreset]] = None,
1055
- search_file_contents: bool = False,
1056
- full_path_compare: bool = True,
1057
- max_workers: Optional[int] = None,
1058
- tree_style_preset: TreeStylePreset = TreeStylePreset.UNICODE,
1059
- tree_style_t_connector: Optional[str] = None,
1060
- tree_style_l_connector: Optional[str] = None,
1061
- tree_style_v_connector: Optional[str] = None,
1062
- tree_style_h_spacer: Optional[str] = None,
1063
- show_token_count: bool = False,
1064
- show_tree_stats: bool = False,
1065
- encoding: str = DEFAULT_ENCODING,
1066
- separator_char: str = DEFAULT_SEPARATOR_CHAR,
1067
- separator_line_len: int = DEFAULT_SEPARATOR_LINE_LENGTH,
1068
- ) -> None:
1069
- """Utility wrapper for process_project in SEARCH mode."""
1070
- if not search_keywords:
1071
- print("Error: 'search_keywords' must be provided for find_in_project.")
1072
- return
1073
- process_project(
1074
- root_dir_param=root_dir_param,
1075
- output_file_name=output_file_name,
1076
- mode=ProjectMode.SEARCH,
1077
- file_types=file_extensions_to_check,
1078
- ignore_dirs_in_path=ignore_dirs_in_path,
1079
- language_presets=language_presets,
1080
- ignore_presets=ignore_presets,
1081
- search_keywords=search_keywords,
1082
- search_file_contents=search_file_contents,
1083
- full_path_compare=full_path_compare,
1084
- max_workers=max_workers,
1085
- tree_style_preset=tree_style_preset,
1086
- tree_style_t_connector=tree_style_t_connector,
1087
- tree_style_l_connector=tree_style_l_connector,
1088
- tree_style_v_connector=tree_style_v_connector,
1089
- tree_style_h_spacer=tree_style_h_spacer,
1090
- show_token_count=show_token_count,
1091
- show_tree_stats=show_tree_stats,
1092
- encoding=encoding,
1093
- separator_char=separator_char,
1094
- separator_line_len=separator_line_len,
1095
- )
985
+ matched_files = set()
986
+ if candidate_files:
987
+ if snapshot_mode:
988
+ matched_files = set(candidate_files)
989
+ if RICH_AVAILABLE:
990
+ progress.add_task(
991
+ "[dim]Keyword Processing[/dim]",
992
+ total=1,
993
+ completed=1,
994
+ status="[bold blue](Snapshot Mode)[/bold blue]",
995
+ )
996
+ else:
997
+ process_task = progress.add_task(
998
+ f"Processing {len(candidate_files)} files",
999
+ total=len(candidate_files),
1000
+ status="",
1001
+ )
1002
+ matched_files = _process_files_concurrently(
1003
+ candidate_files,
1004
+ keywords,
1005
+ search_file_contents,
1006
+ full_path_compare,
1007
+ max_workers,
1008
+ progress,
1009
+ process_task,
1010
+ read_binary_files,
1011
+ case_sensitive_search,
1012
+ )
1096
1013
 
1014
+ output_path, total_bytes, token_count = None, 0, 0
1015
+ if matched_files:
1016
+ sorted_files = sorted(
1017
+ list(matched_files), key=lambda p: p.relative_to(root_dir).as_posix()
1018
+ )
1019
+ tree_lines = []
1020
+ if generate_tree:
1021
+ tree_task = progress.add_task(
1022
+ "Generating file tree...", total=1, status=""
1023
+ )
1024
+ tree_lines = _generate_tree_with_stats(
1025
+ root_dir, sorted_files, show_tree_stats
1026
+ )
1027
+ progress.update(
1028
+ tree_task, completed=1, description="Generated file tree"
1029
+ )
1030
+
1031
+ collate_task = progress.add_task(
1032
+ f"Collating {len(sorted_files)} files",
1033
+ total=len(sorted_files),
1034
+ status="",
1035
+ )
1036
+ files_to_process = [
1037
+ FileToProcess(f, f.relative_to(root_dir).as_posix())
1038
+ for f in sorted_files
1039
+ ]
1040
+ output_path = Path(output_file_name).resolve()
1041
+ total_bytes, token_count = _collate_content_to_file(
1042
+ output_path,
1043
+ tree_lines,
1044
+ files_to_process,
1045
+ show_tree_stats,
1046
+ show_token_count,
1047
+ exclude_whitespace_in_token_count,
1048
+ progress,
1049
+ collate_task,
1050
+ only_show_tree,
1051
+ )
1052
+
1053
+ end_time = time.perf_counter()
1054
+ summary_rows = [
1055
+ ["Candidate Files", f"{len(candidate_files)}"],
1056
+ ["Files Matched", f"[bold green]{len(matched_files)}[/bold green]"],
1057
+ ["Total Time", f"{end_time - start_time:.2f} seconds"],
1058
+ ["Output Size", f"{total_bytes / 1024:.2f} KB"],
1059
+ ]
1060
+ if show_token_count:
1061
+ summary_rows.append(["Approximated Tokens", f"{token_count:,}"])
1062
+ summary_rows.append(["Output File", str(output_path or "N/A")])
1063
+ console.print_table("Scan Complete", ["Metric", "Value"], summary_rows)
1097
1064
 
1098
- __all__ = [
1099
- "process_project",
1100
- "filter_project",
1101
- "find_in_project",
1102
- "deconstruct_snapshot",
1103
- "ProjectMode",
1104
- "LanguagePreset",
1105
- "IgnorePreset",
1106
- "TreeStylePreset",
1107
- ]
1108
1065
 
1109
1066
  if __name__ == "__main__":
1110
- print("\n--- Running a custom filter scan with new stats format ---")
1111
- filter_project(
1112
- root_dir_param=".",
1113
- output_file_name="custom_snapshot_readable.txt",
1114
- file_types=[".py", "requirements.txt", ".sql", ".md"],
1115
- ignore_dirs_in_path=["venv", "build", "node_modules", "static", "templates"],
1067
+ generate_snapshot(
1068
+ root_directory=".",
1069
+ output_file_name="project_snapshot_final.txt",
1070
+ # No search keywords triggers Snapshot Mode
1071
+ language_presets=[LanguagePreset.PYTHON],
1072
+ ignore_presets=[
1073
+ IgnorePreset.PYTHON,
1074
+ IgnorePreset.BUILD_ARTIFACTS,
1075
+ IgnorePreset.VERSION_CONTROL,
1076
+ IgnorePreset.NODE_JS,
1077
+ IgnorePreset.IDE_METADATA,
1078
+ ],
1079
+ ignore_extensions=[".log", ".tmp"], # Example of new functionality
1080
+ generate_tree=True,
1116
1081
  show_tree_stats=True,
1117
1082
  show_token_count=True,
1118
- )
1083
+ exclude_whitespace_in_token_count=True,
1084
+ )