dirshot 0.1.2__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
dirshot/dirshot.py CHANGED
@@ -1,1011 +1,1022 @@
1
1
  import os
2
2
  import sys
3
3
  import re
4
- import time # Imported for the fallback progress bar
4
+ import time
5
+ import threading
5
6
  from pathlib import Path
6
7
  from dataclasses import dataclass, field
7
- from typing import List, Optional, Set, Tuple, Callable, NamedTuple, Dict, Any
8
+ from typing import List, Optional, Set, Tuple, NamedTuple, Dict, Any
8
9
  from enum import Enum
9
10
  from concurrent.futures import ThreadPoolExecutor, as_completed
10
11
  from io import StringIO
12
+ from contextlib import contextmanager
11
13
 
12
- # --- TQDM Dependency Handler ---
14
+ # --- Dependency & Console Management ---
13
15
  try:
14
- from tqdm import tqdm
15
- except ImportError:
16
-
17
- # Define a functional fallback dummy tqdm class if the import fails.
18
- class tqdm:
19
- """A simple, text-based progress bar fallback if tqdm is not installed."""
16
+ from rich.console import Console
17
+ from rich.progress import (
18
+ Progress,
19
+ SpinnerColumn,
20
+ BarColumn,
21
+ TextColumn,
22
+ TimeElapsedColumn,
23
+ )
24
+ from rich.table import Table
25
+ from rich.live import Live
26
+ from rich.panel import Panel
27
+ from rich.text import Text
20
28
 
21
- def __init__(self, iterable=None, total=None, desc="", unit="it", **kwargs):
22
- self.iterable = iterable
23
- self.total = (
24
- total
25
- if total is not None
26
- else (len(iterable) if hasattr(iterable, "__len__") else None)
29
+ RICH_AVAILABLE = True
30
+ except ImportError:
31
+ RICH_AVAILABLE = False
32
+
33
+ class FallbackProgress:
34
+ """A simple, dependency-free progress handler for when 'rich' is not installed."""
35
+
36
+ def __init__(self):
37
+ self.tasks, self.task_count, self.active_line = {}, 0, ""
38
+
39
+ def add_task(self, description, total=None, **kwargs):
40
+ task_id = self.task_count
41
+ self.tasks[task_id] = {"d": description, "t": total, "c": 0}
42
+ self.task_count += 1
43
+ return task_id
44
+
45
+ def update(
46
+ self, task_id, advance=0, completed=None, description=None, **kwargs
47
+ ):
48
+ if task_id not in self.tasks:
49
+ return
50
+ task = self.tasks[task_id]
51
+ if description:
52
+ task["d"] = description
53
+ task["c"] = completed if completed is not None else task["c"] + advance
54
+ line = f"-> {task['d']}: {task['c']}" + (
55
+ f"/{task['t']}" if task["t"] else ""
27
56
  )
28
- self.desc = desc
29
- self.unit = unit
30
- self.current = 0
31
- self.start_time = time.time()
32
- self._last_update_time = 0
33
-
34
- def __iter__(self):
35
- for obj in self.iterable:
36
- yield obj
37
- self.update(1)
38
- # The loop is finished, ensure the bar is 100% and close
39
- if self.total is not None and self.current < self.total:
40
- self.update(self.total - self.current)
41
- self.close()
42
-
43
- def update(self, n=1):
44
- """Update the progress bar by n steps."""
45
- self.current += n
46
- now = time.time()
47
- # Throttle screen updates to prevent flickering and performance loss
48
- if (
49
- self.total is None
50
- or now - self._last_update_time > 0.1
51
- or self.current == self.total
52
- ):
53
- self._last_update_time = now
54
- self._draw()
55
-
56
- def set_description(self, desc: str):
57
- """Set the description of the progress bar."""
58
- self.desc = desc
59
- self._draw()
60
-
61
- def _draw(self):
62
- """Draw the progress bar to the console."""
63
- if self.total:
64
- percent = int((self.current / self.total) * 100)
65
- bar_length = 25
66
- filled_length = int(bar_length * self.current // self.total)
67
- bar = "█" * filled_length + "-" * (bar_length - filled_length)
68
- # Use carriage return to print on the same line
69
- progress_line = f"\r{self.desc}: {percent}%|{bar}| {self.current}/{self.total} [{self.unit}]"
70
- sys.stdout.write(progress_line)
71
- else: # Case where total is not known
72
- sys.stdout.write(f"\r{self.desc}: {self.current} {self.unit}")
73
-
57
+ sys.stdout.write("\r" + line.ljust(len(self.active_line) + 2))
74
58
  sys.stdout.flush()
59
+ self.active_line = line
60
+
61
+ def __enter__(self):
62
+ return self
75
63
 
76
- def close(self):
77
- """Clean up the progress bar line."""
78
- # Print a newline to move off the progress bar line
64
+ def __exit__(self, exc_type, exc_val, exc_tb):
79
65
  sys.stdout.write("\n")
80
66
  sys.stdout.flush()
81
67
 
82
68
 
83
- # --- Configuration Constants ---
84
- DEFAULT_SEPARATOR_CHAR = "-"
85
- DEFAULT_SEPARATOR_LINE_LENGTH = 80
86
- DEFAULT_ENCODING = "utf-8"
87
- TREE_HEADER_TEXT = "Project File Structure"
88
- FILE_HEADER_PREFIX = "FILE: "
89
- TOKEN_APPROX_MODE = "CHAR_COUNT"
90
-
91
- # --- Public Enums for Import and Usage ---
92
-
93
-
94
- class ProjectMode(Enum):
95
- """The mode of operation for the script."""
96
-
97
- FILTER = "filter"
98
- SEARCH = "search"
69
+ class ConsoleManager:
70
+ """A wrapper to gracefully handle console output with or without 'rich'."""
71
+
72
+ def __init__(self):
73
+ """Initializes the ConsoleManager, detecting if 'rich' is available."""
74
+ self.console = Console() if RICH_AVAILABLE else None
75
+
76
+ def log(self, message: str, style: str = ""):
77
+ """Logs a message to the console, applying a style if 'rich' is available."""
78
+ if self.console:
79
+ self.console.log(message, style=style)
80
+ else:
81
+ print(f"[{time.strftime('%H:%M:%S')}] {message}")
82
+
83
+ def print_table(self, title: str, columns: List[str], rows: List[List[str]]):
84
+ """Prints a formatted table to the console."""
85
+ if self.console:
86
+ table = Table(
87
+ title=title,
88
+ show_header=True,
89
+ header_style="bold magenta",
90
+ border_style="dim",
91
+ )
92
+ for col in columns:
93
+ table.add_column(col)
94
+ for row in rows:
95
+ table.add_row(*row)
96
+ self.console.print(table)
97
+ else:
98
+ print(f"\n--- {title} ---")
99
+ print(" | ".join(columns))
100
+ for row in rows:
101
+ print(" | ".join(row))
102
+ print("-" * (len(title) + 6))
99
103
 
100
104
 
105
+ # --- Configuration Constants ---
106
+ DEFAULT_SEPARATOR_CHAR, DEFAULT_ENCODING = "-", "utf-8"
107
+ TREE_HEADER_TEXT, FILE_HEADER_PREFIX = "Project File Structure", "FILE: "
108
+ BINARY_FILE_EXTENSIONS = {
109
+ ".png",
110
+ ".jpg",
111
+ ".jpeg",
112
+ ".gif",
113
+ ".pdf",
114
+ ".zip",
115
+ ".exe",
116
+ ".dll",
117
+ ".so",
118
+ ".jar",
119
+ ".pyc",
120
+ ".mp3",
121
+ ".mp4",
122
+ }
123
+
124
+
125
+ # --- Base Lists for Presets ---
126
+ # These are defined outside the enums to allow for safe composition.
127
+ _PYTHON_BASE = [
128
+ ".py",
129
+ ".pyw",
130
+ "requirements.txt",
131
+ "Pipfile",
132
+ "pyproject.toml",
133
+ "setup.py",
134
+ ]
135
+ _JAVASCRIPT_BASE = [
136
+ ".js",
137
+ ".jsx",
138
+ ".ts",
139
+ ".tsx",
140
+ ".mjs",
141
+ ".cjs",
142
+ "package.json",
143
+ "jsconfig.json",
144
+ "tsconfig.json",
145
+ ]
146
+ _RUBY_BASE = [".rb", "Gemfile", "Rakefile", ".gemspec"]
147
+ _PHP_BASE = [".php", "composer.json", "index.php"]
148
+ _JAVA_BASE = [".java", ".jar", ".war", "pom.xml", ".properties"]
149
+ _KOTLIN_BASE = [".kt", ".kts", ".gradle", "build.gradle.kts"]
150
+ _CSHARP_BASE = [".cs", ".csproj", ".sln", "appsettings.json", "Web.config", ".csx"]
151
+ _C_CPP_BASE = [".c", ".cpp", ".h", ".hpp", "Makefile", "CMakeLists.txt", ".cxx", ".hxx"]
152
+ _RUST_BASE = [".rs", "Cargo.toml", "Cargo.lock"]
153
+ _SWIFT_BASE = [".swift", "Package.swift"]
154
+ _OBJECTIVE_C_BASE = [".m", ".mm", ".h"]
155
+ _ELIXIR_BASE = [".ex", ".exs", "mix.exs"]
156
+ _DART_BASE = [".dart", "pubspec.yaml"]
157
+ _SCALA_BASE = [".scala", ".sbt", "build.sbt"]
158
+ _R_LANG_BASE = [".r", ".R", ".Rmd"]
159
+ _LUA_BASE = [".lua"]
160
+
161
+ _IDE_VSCODE = [".vscode"]
162
+ _IDE_JETBRAINS = [".idea"]
163
+ _IDE_SUBLIME = ["*.sublime-project", "*.sublime-workspace"]
164
+ _IDE_ECLIPSE = [".project", ".settings", ".classpath"]
165
+ _IDE_NETBEANS = ["nbproject"]
166
+ _IDE_ATOM = [".atom"]
167
+ _IDE_VIM = ["*.swp", "*.swo"]
168
+ _IDE_XCODE = ["*.xcodeproj", "*.xcworkspace", "xcuserdata"]
169
+
170
+
171
+ # --- Enums and Data Structures ---
101
172
  class LanguagePreset(Enum):
102
- """Predefined sets of file extensions/names for common languages/frameworks."""
103
-
104
- PYTHON = [
105
- ".py",
106
- ".pyw",
107
- "setup.py",
108
- "requirements.txt",
109
- "Pipfile",
110
- "pyproject.toml",
173
+ """Provides an extensive list of presets for common language file extensions and key project files."""
174
+
175
+ PYTHON = _PYTHON_BASE
176
+ JAVASCRIPT = _JAVASCRIPT_BASE
177
+ JAVA = _JAVA_BASE
178
+ KOTLIN = _KOTLIN_BASE
179
+ C_CPP = _C_CPP_BASE
180
+ C_SHARP = _CSHARP_BASE
181
+ GO = [".go", "go.mod", "go.sum"]
182
+ RUST = _RUST_BASE
183
+ RUBY = _RUBY_BASE
184
+ PHP = _PHP_BASE
185
+ SWIFT = _SWIFT_BASE
186
+ OBJECTIVE_C = _OBJECTIVE_C_BASE
187
+ DART = _DART_BASE
188
+ LUA = _LUA_BASE
189
+ PERL = [".pl", ".pm", ".t"]
190
+ R_LANG = _R_LANG_BASE
191
+ SCALA = _SCALA_BASE
192
+ GROOVY = [".groovy", ".gvy", ".gy", ".gsh"]
193
+ HASKELL = [".hs", ".lhs", "cabal.project"]
194
+ JULIA = [".jl"]
195
+ ZIG = [".zig", "build.zig"]
196
+ NIM = [".nim", ".nimble"]
197
+ ELIXIR = _ELIXIR_BASE
198
+ CLOJURE = [".clj", ".cljs", ".cljc", "project.clj", "deps.edn"]
199
+ F_SHARP = [".fs", ".fsi", ".fsx"]
200
+ OCAML = [".ml", ".mli", "dune-project"]
201
+ ELM = [".elm", "elm.json"]
202
+ PURE_SCRIPT = [".purs", "spago.dhall"]
203
+ COMMON_LISP = [".lisp", ".cl", ".asd"]
204
+ SCHEME = [".scm", ".ss"]
205
+ RACKET = [".rkt"]
206
+ WEB_FRONTEND = [".html", ".htm", ".css", ".scss", ".sass", ".less", ".styl"]
207
+ REACT = _JAVASCRIPT_BASE
208
+ NODE_JS = _JAVASCRIPT_BASE
209
+ EXPRESS_JS = _JAVASCRIPT_BASE
210
+ NEST_JS = _JAVASCRIPT_BASE + ["nest-cli.json"]
211
+ VUE = _JAVASCRIPT_BASE + [".vue", "vue.config.js"]
212
+ ANGULAR = _JAVASCRIPT_BASE + ["angular.json"]
213
+ SVELTE = _JAVASCRIPT_BASE + [".svelte", "svelte.config.js"]
214
+ EMBER = _JAVASCRIPT_BASE + ["ember-cli-build.js"]
215
+ PUG = [".pug", ".jade"]
216
+ HANDLEBARS = [".hbs", ".handlebars"]
217
+ EJS = [".ejs"]
218
+ DJANGO = _PYTHON_BASE + ["manage.py", "wsgi.py", "asgi.py", ".jinja", ".jinja2"]
219
+ FLASK = _PYTHON_BASE + ["app.py", "wsgi.py"]
220
+ RAILS = _RUBY_BASE + ["routes.rb", ".erb", ".haml", ".slim", "config.ru"]
221
+ LARAVEL = _PHP_BASE + [".blade.php", "artisan"]
222
+ SYMFONY = _PHP_BASE + ["symfony.lock"]
223
+ PHOENIX = _ELIXIR_BASE
224
+ SPRING = _JAVA_BASE + ["application.properties", "application.yml"]
225
+ ASP_NET = _CSHARP_BASE + ["*.cshtml", "*.vbhtml", "*.razor"]
226
+ ROCKET_RS = _RUST_BASE + ["Rocket.toml"]
227
+ ACTIX_WEB = _RUST_BASE
228
+ IOS_NATIVE = (
229
+ _SWIFT_BASE
230
+ + _OBJECTIVE_C_BASE
231
+ + [".storyboard", ".xib", "Info.plist", ".pbxproj"]
232
+ )
233
+ ANDROID_NATIVE = _JAVA_BASE + _KOTLIN_BASE + ["AndroidManifest.xml", ".xml"]
234
+ FLUTTER = _DART_BASE
235
+ REACT_NATIVE = _JAVASCRIPT_BASE + ["app.json"]
236
+ XAMARIN = _CSHARP_BASE + [".xaml"]
237
+ DOTNET_MAUI = XAMARIN
238
+ NATIVESCRIPT = _JAVASCRIPT_BASE + ["nativescript.config.ts"]
239
+ UNITY = _CSHARP_BASE + [".unity", ".prefab", ".asset", ".mat", ".unitypackage"]
240
+ UNREAL_ENGINE = _C_CPP_BASE + [".uproject", ".uasset", ".ini"]
241
+ GODOT = [".gd", ".tscn", ".tres", "project.godot"]
242
+ LOVE2D = _LUA_BASE + ["conf.lua", "main.lua"]
243
+ MONOGAME = _CSHARP_BASE + [".mgcb"]
244
+ DOCKER = ["Dockerfile", ".dockerignore", "docker-compose.yml"]
245
+ TERRAFORM = [".tf", ".tfvars", ".tf.json"]
246
+ ANSIBLE = ["ansible.cfg", "inventory.ini"]
247
+ PULUMI = ["Pulumi.yaml"]
248
+ CHEF = _RUBY_BASE
249
+ PUPPET = [".pp"]
250
+ VAGRANT = ["Vagrantfile"]
251
+ GITHUB_ACTIONS = [".yml", ".yaml"]
252
+ GITLAB_CI = [".gitlab-ci.yml"]
253
+ JENKINS = ["Jenkinsfile"]
254
+ CIRCLE_CI = ["config.yml"]
255
+ KUBERNETES = [".yml", ".yaml"]
256
+ BICEP = [".bicep"]
257
+ CLOUDFORMATION = [".json", ".yml"]
258
+ DATA_SCIENCE_NOTEBOOKS = [".ipynb", ".Rmd"]
259
+ SQL = [".sql", ".ddl", ".dml"]
260
+ APACHE_SPARK = list(set(_SCALA_BASE + _PYTHON_BASE + _JAVA_BASE + _R_LANG_BASE))
261
+ ML_CONFIG = ["params.yaml"]
262
+ ELECTRON = _JAVASCRIPT_BASE
263
+ TAURI = _RUST_BASE + ["tauri.conf.json"]
264
+ QT = _C_CPP_BASE + [".pro", ".ui", ".qml"]
265
+ GTK = _C_CPP_BASE + [".ui", "meson.build"]
266
+ WPF = _CSHARP_BASE + [".xaml"]
267
+ WINDOWS_FORMS = _CSHARP_BASE
268
+ BASH = [".sh", ".bash"]
269
+ POWERSHELL = [".ps1", ".psm1"]
270
+ BATCH = [".bat", ".cmd"]
271
+ SOLIDITY = [".sol"]
272
+ VYPER = [".vy"]
273
+ VERILOG = [".v", ".vh"]
274
+ VHDL = [".vhd", ".vhdl"]
275
+ MARKUP = [".md", ".markdown", ".rst", ".adoc", ".asciidoc", ".tex", ".bib"]
276
+ CONFIGURATION = [
277
+ ".json",
278
+ ".xml",
279
+ ".yml",
280
+ ".yaml",
281
+ ".ini",
282
+ ".toml",
283
+ ".env",
284
+ ".conf",
285
+ ".cfg",
111
286
  ]
112
- JAVASCRIPT = [".js", ".jsx", ".ts", ".tsx", ".mjs", ".cjs"]
113
- WEB = [".html", ".css", ".scss", ".less"]
114
- JAVA = [".java", ".groovy", ".kt", ".gradle", ".properties"]
287
+ EDITOR_CONFIG = [".editorconfig"]
288
+ LICENSE = ["LICENSE", "LICENSE.md", "COPYING"]
289
+ CHANGELOG = ["CHANGELOG", "CHANGELOG.md"]
115
290
 
116
291
 
117
292
  class IgnorePreset(Enum):
118
- """Predefined sets of path components and filename substrings to ignore."""
119
-
120
- VERSION_CONTROL = [".git", ".svn", ".hg", ".idea"]
121
- NODE_MODULES = ["node_modules", "package-lock.json", "yarn.lock"]
122
- PYTHON_ENV = ["__pycache__", "venv", ".venv", "env", "lib", "bin"]
123
- BUILD_ARTIFACTS = ["dist", "build", "target", "out", "temp", "tmp"]
124
- TEST_FILES = ["test", "spec", "fixture", "example", "mock"]
125
-
126
-
127
- class TreeStylePreset(Enum):
128
- """Predefined character sets for directory tree rendering."""
129
-
130
- UNICODE = ("├── ", "└── ", "│ ", " ")
131
- ASCII = ("|-- ", "+-- ", "| ", " ")
132
- COMPACT = ("|---", "`---", "| ", " ")
133
-
134
- def to_style(self) -> "TreeStyle":
135
- return TreeStyle(self.value[0], self.value[1], self.value[2], self.value[3])
136
-
137
-
138
- class TreeStyle(NamedTuple):
139
- """Holds the characters used to render the directory tree."""
293
+ """Provides an extensive list of presets for common directories, files, and patterns to ignore."""
294
+
295
+ VERSION_CONTROL = [".git", ".svn", ".hg", ".bzr", ".gitignore", ".gitattributes"]
296
+ OS_FILES = [".DS_Store", "Thumbs.db", "desktop.ini", "ehthumbs.db"]
297
+ BUILD_ARTIFACTS = [
298
+ "dist",
299
+ "build",
300
+ "target",
301
+ "out",
302
+ "bin",
303
+ "obj",
304
+ "release",
305
+ "debug",
306
+ ]
307
+ LOGS = ["*.log", "logs", "npm-debug.log*", "yarn-debug.log*", "yarn-error.log*"]
308
+ TEMP_FILES = ["temp", "tmp", "*.tmp", "*~", "*.bak", "*.swp", "*.swo"]
309
+ SECRET_FILES = [
310
+ ".env",
311
+ "*.pem",
312
+ "*.key",
313
+ "credentials.json",
314
+ "*.p12",
315
+ "*.pfx",
316
+ "secrets.yml",
317
+ ".env.local",
318
+ ]
319
+ COMPRESSED_ARCHIVES = ["*.zip", "*.tar", "*.gz", "*.rar", "*.7z", "*.tgz"]
320
+ IDE_METADATA_VSCODE = _IDE_VSCODE
321
+ IDE_METADATA_JETBRAINS = _IDE_JETBRAINS
322
+ IDE_METADATA_SUBLIME = _IDE_SUBLIME
323
+ IDE_METADATA_ECLIPSE = _IDE_ECLIPSE
324
+ IDE_METADATA_NETBEANS = _IDE_NETBEANS
325
+ IDE_METADATA_ATOM = _IDE_ATOM
326
+ IDE_METADATA_VIM = _IDE_VIM
327
+ IDE_METADATA_XCODE = _IDE_XCODE
328
+ IDE_METADATA = list(
329
+ set(
330
+ _IDE_VSCODE
331
+ + _IDE_JETBRAINS
332
+ + _IDE_SUBLIME
333
+ + _IDE_ECLIPSE
334
+ + _IDE_NETBEANS
335
+ + _IDE_ATOM
336
+ + _IDE_VIM
337
+ + _IDE_XCODE
338
+ )
339
+ )
340
+ NODE_JS = [
341
+ "node_modules",
342
+ "package-lock.json",
343
+ "yarn.lock",
344
+ "pnpm-lock.yaml",
345
+ ".npm",
346
+ ]
347
+ PYTHON = [
348
+ "__pycache__",
349
+ "venv",
350
+ ".venv",
351
+ "env",
352
+ "lib",
353
+ "lib64",
354
+ ".pytest_cache",
355
+ ".tox",
356
+ "*.pyc",
357
+ ".mypy_cache",
358
+ "htmlcov",
359
+ ".coverage",
360
+ ]
361
+ RUBY = ["vendor/bundle", ".bundle", "Gemfile.lock", ".gem", "coverage"]
362
+ PHP = ["vendor", "composer.lock"]
363
+ DOTNET = ["bin", "obj", "*.user", "*.suo"]
364
+ RUST = ["target", "Cargo.lock"]
365
+ GO = ["vendor", "go.sum"]
366
+ JAVA_MAVEN = ["target"]
367
+ JAVA_GRADLE = [".gradle", "build"]
368
+ ELIXIR = ["_build", "deps", "mix.lock"]
369
+ DART_FLUTTER = [".dart_tool", ".packages", "build", ".flutter-plugins"]
370
+ ELM = ["elm-stuff"]
371
+ HASKELL = ["dist-newstyle", ".stack-work"]
372
+ TESTING_REPORTS = ["coverage", "junit.xml", "lcov.info", ".nyc_output"]
373
+ STATIC_SITE_GENERATORS = ["_site", "public", "resources"]
374
+ CMS_UPLOADS = ["wp-content/uploads"]
375
+ TERRAFORM = [".terraform", "*.tfstate", "*.tfstate.backup", ".terraform.lock.hcl"]
376
+ JUPYTER_NOTEBOOKS = [".ipynb_checkpoints"]
377
+ ANDROID = [".gradle", "build", "local.properties", "*.apk", "*.aab", "captures"]
378
+ IOS = ["Pods", "Carthage", "DerivedData", "build"]
379
+ UNITY = [
380
+ "Library",
381
+ "Temp",
382
+ "Logs",
383
+ "UserSettings",
384
+ "MemoryCaptures",
385
+ "Assets/AssetStoreTools",
386
+ ]
387
+ UNREAL_ENGINE = ["Intermediate", "Saved", "DerivedDataCache", ".vs"]
388
+ GODOT_ENGINE = [".import", "export_presets.cfg"]
389
+ SERVERLESS_FRAMEWORK = [".serverless"]
390
+ AWS = [".aws-sam"]
391
+ VERCEL = [".vercel"]
392
+ NETLIFY = [".netlify"]
393
+ MACOS = [
394
+ ".DS_Store",
395
+ ".AppleDouble",
396
+ ".LSOverride",
397
+ "._*",
398
+ ".Spotlight-V100",
399
+ ".Trashes",
400
+ ]
401
+ WINDOWS = ["Thumbs.db", "ehthumbs.db", "$RECYCLE.BIN/", "Desktop.ini"]
402
+ DEPRECATED_DEPENDENCIES = ["bower_components"]
140
403
 
141
- t_connector: str
142
- l_connector: str
143
- v_connector: str
144
- h_spacer: str
145
404
 
405
+ class FileToProcess(NamedTuple):
406
+ """Represents a file that needs to be processed and included in the output."""
146
407
 
147
- # --- Helper Data Structures ---
408
+ absolute_path: Path
409
+ relative_path_posix: str
148
410
 
149
411
 
150
412
  @dataclass
151
413
  class FilterCriteria:
152
- """Holds normalized filter criteria for files and directories."""
414
+ """Holds the combined filter criteria for scanning files and directories."""
153
415
 
154
416
  file_extensions: Set[str] = field(default_factory=set)
155
- exact_filenames: Set[str] = field(default_factory=set)
156
- whitelist_fname_substrings: Set[str] = field(default_factory=set)
157
- ignore_fname_substrings: Set[str] = field(default_factory=set)
158
- ignore_path_components: Set[str] = field(default_factory=set)
417
+ ignore_if_in_path: Set[str] = field(default_factory=set)
418
+ ignore_extensions: Set[str] = field(default_factory=set)
159
419
 
160
420
  @classmethod
161
421
  def normalize_inputs(
162
422
  cls,
163
- file_types: Optional[List[str]],
164
- whitelist_substrings: Optional[List[str]],
165
- ignore_filename_substrings: Optional[List[str]],
166
- ignore_path_components_list: Optional[List[str]],
167
- language_presets: Optional[List[LanguagePreset]] = None,
423
+ file_types: Optional[List[str]] = None,
424
+ ignore_if_in_path: Optional[List[str]] = None,
425
+ ignore_extensions: Optional[List[str]] = None,
426
+ lang_presets: Optional[List[LanguagePreset]] = None,
168
427
  ignore_presets: Optional[List[IgnorePreset]] = None,
169
428
  ) -> "FilterCriteria":
170
- all_file_types, all_ignore_paths, all_ignore_fnames = (
171
- set(file_types or []),
172
- set(ignore_path_components_list or []),
173
- set(ignore_filename_substrings or []),
174
- )
175
- if language_presets:
176
- for preset in language_presets:
177
- all_file_types.update(preset.value)
178
- if ignore_presets:
179
- for preset in ignore_presets:
180
- all_ignore_paths.update(preset.value)
181
- all_ignore_fnames.update(preset.value)
182
- norm_exts, norm_exact_fnames = set(), set()
183
- for ft in all_file_types:
184
- ft_lower = ft.lower().strip()
185
- if ft_lower.startswith("."):
186
- norm_exts.add(ft_lower)
187
- elif ft_lower:
188
- norm_exact_fnames.add(ft_lower)
429
+ """
430
+ Consolidates various filter inputs into a single FilterCriteria object.
431
+
432
+ Args:
433
+ file_types (list, optional): A list of file extensions to include.
434
+ ignore_if_in_path (list, optional): A list of directory/file names to ignore.
435
+ ignore_extensions (list, optional): A list of file extensions to ignore.
436
+ lang_presets (list, optional): A list of LanguagePreset enums.
437
+ ignore_presets (list, optional): A list of IgnorePreset enums.
438
+
439
+ Returns:
440
+ FilterCriteria: An object containing the combined sets of filters.
441
+ """
442
+ all_exts = {ft.lower().strip() for ft in file_types or []}
443
+ all_ignore_paths = {ip.lower().strip() for ip in ignore_if_in_path or []}
444
+ all_ignore_exts = {ie.lower().strip() for ie in ignore_extensions or []}
445
+
446
+ for p in lang_presets or []:
447
+ all_exts.update(p.value)
448
+ for p in ignore_presets or []:
449
+ all_ignore_paths.update(p.value)
450
+
189
451
  return cls(
190
- file_extensions=norm_exts,
191
- exact_filenames=norm_exact_fnames,
192
- whitelist_fname_substrings=(
193
- set(s.lower() for s in whitelist_substrings if s.strip())
194
- if whitelist_substrings
195
- else set()
196
- ),
197
- ignore_fname_substrings=set(
198
- s.lower() for s in all_ignore_fnames if s.strip()
199
- ),
200
- ignore_path_components=set(
201
- d.lower() for d in all_ignore_paths if d.strip()
202
- ),
452
+ file_extensions=all_exts,
453
+ ignore_if_in_path=all_ignore_paths,
454
+ ignore_extensions=all_ignore_exts,
203
455
  )
204
456
 
205
457
 
206
- class FileToProcess(NamedTuple):
207
- """Represents a file selected for content processing."""
208
-
209
- absolute_path: Path
210
- relative_path_posix: str
211
-
212
-
213
- # --- Helper Functions ---
458
+ # --- Core Logic Functions ---
459
+ def _discover_files(
460
+ root_dir: Path, criteria: FilterCriteria, progress: Any, task_id: Any
461
+ ) -> List[Path]:
462
+ """
463
+ Recursively scans a directory to find all files matching the criteria.
214
464
 
465
+ Args:
466
+ root_dir (Path): The directory to start the scan from.
467
+ criteria (FilterCriteria): The filtering criteria to apply.
468
+ progress (Any): The progress bar object (from rich or fallback).
469
+ task_id (Any): The ID of the progress bar task to update.
215
470
 
216
- def validate_root_directory(root_dir_param: Optional[str]) -> Optional[Path]:
217
- original_param_for_messaging = (
218
- root_dir_param if root_dir_param else "current working directory"
219
- )
220
- try:
221
- resolved_path = Path(root_dir_param or Path.cwd()).resolve(strict=True)
222
- except Exception as e:
223
- print(
224
- f"Error: Could not resolve root directory '{original_param_for_messaging}': {e}"
225
- )
226
- return None
227
- if not resolved_path.is_dir():
228
- print(f"Error: Root path '{resolved_path}' is not a directory.")
229
- return None
230
- return resolved_path
471
+ Returns:
472
+ List[Path]: A list of absolute paths to the candidate files.
473
+ """
474
+ candidate_files, dirs_scanned = [], 0
231
475
 
476
+ def recursive_scan(current_path: Path):
477
+ nonlocal dirs_scanned
478
+ try:
479
+ for entry in os.scandir(current_path):
480
+ entry_path, entry_lower = Path(entry.path), entry.name.lower()
481
+ if entry_lower in criteria.ignore_if_in_path:
482
+ continue
483
+ if entry.is_dir():
484
+ recursive_scan(entry_path)
485
+ dirs_scanned += 1
486
+ if progress:
487
+ progress.update(
488
+ task_id,
489
+ completed=dirs_scanned,
490
+ description=f"Discovering files in [cyan]{entry.name}[/cyan]",
491
+ )
492
+ elif entry.is_file():
493
+ file_ext = entry_path.suffix.lower()
494
+ if (
495
+ criteria.ignore_extensions
496
+ and file_ext in criteria.ignore_extensions
497
+ ):
498
+ continue
499
+ if (
500
+ not criteria.file_extensions
501
+ or file_ext in criteria.file_extensions
502
+ ):
503
+ candidate_files.append(entry_path)
504
+ except (PermissionError, FileNotFoundError):
505
+ pass
232
506
 
233
- def _should_include_entry(
234
- entry_path: Path,
235
- root_dir: Path,
236
- criteria: FilterCriteria,
237
- is_dir: bool,
238
- log_func: Optional[Callable[[str], None]] = None,
239
- ) -> bool:
240
- try:
241
- relative_path = entry_path.relative_to(root_dir)
242
- except ValueError:
243
- return False
244
- entry_name_lower = entry_path.name.lower()
245
- if criteria.ignore_path_components and any(
246
- part.lower() in criteria.ignore_path_components for part in relative_path.parts
247
- ):
248
- return False
249
- if is_dir:
250
- return True
251
- file_ext_lower = entry_path.suffix.lower()
252
- matched_type = (file_ext_lower in criteria.file_extensions) or (
253
- entry_name_lower in criteria.exact_filenames
254
- )
255
- if not criteria.file_extensions and not criteria.exact_filenames:
256
- matched_type = True
257
- if not matched_type:
258
- return False
259
- if criteria.whitelist_fname_substrings and not any(
260
- sub in entry_name_lower for sub in criteria.whitelist_fname_substrings
261
- ):
262
- return False
263
- if criteria.ignore_fname_substrings and any(
264
- sub in entry_name_lower for sub in criteria.ignore_fname_substrings
265
- ):
266
- return False
267
- return True
507
+ recursive_scan(root_dir)
508
+ return candidate_files
268
509
 
269
510
 
270
511
  def process_file_for_search(
271
512
  file_path: Path,
272
- normalized_keywords: List[str],
273
- search_file_contents: bool,
274
- full_path_compare: bool,
513
+ keywords: List[str],
514
+ search_content: bool,
515
+ full_path: bool,
516
+ activity: Dict,
517
+ read_binary_files: bool,
275
518
  ) -> Optional[Path]:
276
- compare_target = str(file_path) if full_path_compare else file_path.name
277
- if any(key in compare_target.lower() for key in normalized_keywords):
278
- return file_path
279
- if search_file_contents:
280
- try:
281
- with open(str(file_path), "r", encoding="utf-8", errors="ignore") as f:
282
- for line in f:
283
- if any(key in line.lower() for key in normalized_keywords):
284
- return file_path
285
- except (IOError, OSError):
286
- pass
287
- return None
519
+ """
520
+ Processes a single file to see if it matches the search criteria.
288
521
 
522
+ A match can occur if a keyword is found in the filename or, if enabled,
523
+ within the file's content.
289
524
 
290
- def _calculate_total_stats(
291
- root_dir: Path, criteria: FilterCriteria
292
- ) -> Dict[Path, Tuple[int, int]]:
293
- stats: Dict[Path, Tuple[int, int]] = {}
294
- for dirpath_str, dirnames, filenames in os.walk(str(root_dir), topdown=True):
295
- current_dir = Path(dirpath_str)
296
- all_children = [current_dir / d for d in dirnames] + [
297
- current_dir / f for f in filenames
298
- ]
299
- total_files, total_dirs = 0, 0
300
- for child_path in all_children:
525
+ Args:
526
+ file_path (Path): The absolute path to the file to process.
527
+ keywords (List[str]): A list of keywords to search for.
528
+ search_content (bool): If True, search the content of the file.
529
+ full_path (bool): If True, compare keywords against the full file path.
530
+ activity (Dict): A dictionary to track thread activity.
531
+ read_binary_files (bool): If True, attempt to read and search binary files.
532
+
533
+ Returns:
534
+ Optional[Path]: The path to the file if it's a match, otherwise None.
535
+ """
536
+ thread_id = threading.get_ident()
537
+ activity[thread_id] = file_path.name
538
+ try:
539
+ compare_target = str(file_path) if full_path else file_path.name
540
+ if any(key in compare_target.lower() for key in keywords):
541
+ return file_path
542
+
543
+ if search_content and (
544
+ read_binary_files or file_path.suffix.lower() not in BINARY_FILE_EXTENSIONS
545
+ ):
301
546
  try:
302
- is_dir = child_path.is_dir()
547
+ with file_path.open("r", encoding="utf-8", errors="ignore") as f:
548
+ for line in f:
549
+ if any(key in line.lower() for key in keywords):
550
+ return file_path
303
551
  except OSError:
304
- continue
305
- if criteria.ignore_path_components:
306
- try:
307
- relative_path = child_path.relative_to(root_dir)
308
- except ValueError:
309
- continue
310
- if any(
311
- part.lower() in criteria.ignore_path_components
312
- for part in relative_path.parts
313
- ):
314
- continue
315
- if is_dir:
316
- total_dirs += 1
317
- else:
318
- total_files += 1
319
- stats[current_dir] = (total_files, total_dirs)
320
- dirnames[:] = [
321
- d
322
- for d in dirnames
323
- if (current_dir / d).name.lower() not in criteria.ignore_path_components
324
- ]
325
- return stats
552
+ pass
553
+ return None
554
+ finally:
555
+ activity[thread_id] = ""
326
556
 
327
557
 
328
- # --- Tree Generation Functions ---
558
+ def _process_files_concurrently(
559
+ files: List[Path],
560
+ keywords: List[str],
561
+ search_content: bool,
562
+ full_path: bool,
563
+ max_workers: Optional[int],
564
+ progress: Any,
565
+ task_id: Any,
566
+ read_binary_files: bool,
567
+ ) -> Set[Path]:
568
+ """
569
+ Uses a thread pool to process a list of files for search matches concurrently.
570
+
571
+ Args:
572
+ files (List[Path]): The list of candidate files to search through.
573
+ keywords (List[str]): The keywords to search for.
574
+ search_content (bool): Whether to search inside file contents.
575
+ full_path (bool): Whether to compare keywords against the full path.
576
+ max_workers (Optional[int]): The maximum number of threads to use.
577
+ progress (Any): The progress bar object.
578
+ task_id (Any): The ID of the processing task on the progress bar.
579
+ read_binary_files (bool): If True, search the content of binary files.
580
+
581
+ Returns:
582
+ Set[Path]: A set of absolute paths for all files that matched.
583
+ """
584
+ matched_files, thread_activity = set(), {}
585
+ with ThreadPoolExecutor(
586
+ max_workers=max_workers or (os.cpu_count() or 1) + 4,
587
+ thread_name_prefix="scanner",
588
+ ) as executor:
589
+ future_to_file = {
590
+ executor.submit(
591
+ process_file_for_search,
592
+ f,
593
+ keywords,
594
+ search_content,
595
+ full_path,
596
+ thread_activity,
597
+ read_binary_files,
598
+ ): f
599
+ for f in files
600
+ }
601
+ for future in as_completed(future_to_file):
602
+ if progress:
603
+ active_threads = {
604
+ f"T{str(tid)[-3:]}": name
605
+ for tid, name in thread_activity.items()
606
+ if name
607
+ }
608
+ progress.update(
609
+ task_id,
610
+ advance=1,
611
+ description=f"Processing [yellow]{len(active_threads)} threads[/yellow]",
612
+ )
613
+ if RICH_AVAILABLE:
614
+ status_panel = Panel(
615
+ Text(
616
+ "\n".join(
617
+ f"[bold cyan]{k}[/]: {v}"
618
+ for k, v in active_threads.items()
619
+ )
620
+ ),
621
+ border_style="dim",
622
+ title="[dim]Thread Activity",
623
+ )
624
+ progress.update(task_id, status=status_panel)
625
+ if result := future.result():
626
+ matched_files.add(result)
627
+ if progress and RICH_AVAILABLE:
628
+ progress.update(task_id, status="[bold green]Done![/bold green]")
629
+ return matched_files
329
630
 
330
631
 
331
- def _generate_tree_lines(
332
- root_dir: Path, criteria: FilterCriteria, style: TreeStyle, show_stats: bool
632
+ def _generate_tree_with_stats(
633
+ root_dir: Path, file_paths: List[Path], show_stats: bool
333
634
  ) -> List[str]:
334
- """Generates a list of strings representing the directory tree based on criteria, style, and stats."""
335
- dir_stats: Optional[Dict[Path, Tuple[int, int]]] = (
336
- _calculate_total_stats(root_dir, criteria) if show_stats else None
337
- )
338
- tree_lines: List[str] = []
339
-
340
- def format_dir_name(
341
- path: Path, path_name: str, included_files: int, included_dirs: int
342
- ) -> str:
343
- if not show_stats or not dir_stats:
344
- return path_name
345
- total_files, total_dirs = dir_stats.get(path, (0, 0))
635
+ """
636
+ Generates a directory tree structure from a list of file paths.
346
637
 
347
- stats_str = f" [I: {included_files}f, {included_dirs}d | T: {total_files}f, {total_dirs}d]"
348
- return path_name + stats_str
638
+ Args:
639
+ root_dir (Path): The root directory of the project, used as the tree's base.
640
+ file_paths (List[Path]): A list of file paths to include in the tree.
641
+ show_stats (bool): If True, include file and directory counts in the tree.
349
642
 
350
- def _recursive_build(current_path: Path, prefix_parts: List[str]):
351
- try:
352
- entries = sorted(current_path.iterdir(), key=lambda p: p.name.lower())
353
- except OSError as e:
354
- error_prefix = "".join(prefix_parts) + style.l_connector
355
- tree_lines.append(
356
- error_prefix + f"[Error accessing: {current_path.name} - {e.strerror}]"
357
- )
358
- return
359
- displayable_children: List[Tuple[Path, bool]] = []
360
- for e in entries:
361
- try:
362
- is_dir = e.is_dir()
363
- except OSError:
364
- continue
365
- if _should_include_entry(
366
- e, root_dir, criteria, is_dir=is_dir, log_func=None
367
- ):
368
- displayable_children.append((e, is_dir))
369
- num_children = len(displayable_children)
370
- included_files_in_level = sum(
371
- 1 for _, is_dir in displayable_children if not is_dir
372
- )
373
- included_dirs_in_level = sum(1 for _, is_dir in displayable_children if is_dir)
374
- if not prefix_parts:
375
- tree_lines.append(
376
- format_dir_name(
377
- current_path,
378
- current_path.name,
379
- included_files_in_level,
380
- included_dirs_in_level,
381
- )
382
- )
383
- for i, (child_path, child_is_dir) in enumerate(displayable_children):
384
- is_last = i == num_children - 1
385
- connector = style.l_connector if is_last else style.t_connector
386
- entry_name = child_path.name
387
- if child_is_dir:
388
- try:
389
- child_entries = sorted(
390
- child_path.iterdir(), key=lambda p: p.name.lower()
391
- )
392
- child_displayable_children = [
393
- (e, e.is_dir())
394
- for e in child_entries
395
- if _should_include_entry(
396
- e, root_dir, criteria, is_dir=e.is_dir(), log_func=None
397
- )
398
- ]
399
- child_included_files = sum(
400
- 1 for _, is_dir in child_displayable_children if not is_dir
401
- )
402
- child_included_dirs = sum(
403
- 1 for _, is_dir in child_displayable_children if is_dir
404
- )
405
- entry_name = format_dir_name(
406
- child_path,
407
- child_path.name,
408
- child_included_files,
409
- child_included_dirs,
410
- )
411
- except OSError:
412
- pass
413
- tree_lines.append("".join(prefix_parts) + connector + entry_name)
414
- if child_is_dir:
415
- new_prefix_parts = prefix_parts + [
416
- style.h_spacer if is_last else style.v_connector
417
- ]
418
- _recursive_build(child_path, new_prefix_parts)
643
+ Returns:
644
+ List[str]: A list of strings, where each string is a line in the tree.
645
+ """
646
+ tree_dict: Dict[str, Any] = {}
647
+ for path in file_paths:
648
+ level = tree_dict
649
+ for part in path.relative_to(root_dir).parts:
650
+ level = level.setdefault(part, {})
419
651
 
420
- _recursive_build(root_dir, [])
421
- return tree_lines
652
+ def count_children(d: Dict) -> Tuple[int, int]:
653
+ files = sum(1 for v in d.values() if not v)
654
+ dirs = len(d) - files
655
+ return files, dirs
422
656
 
657
+ lines = []
658
+ style = ("├── ", "└── ", "│ ", " ")
423
659
 
424
- def _generate_tree_from_paths(
425
- root_dir: Path, file_paths: List[Path], style: TreeStyle, show_stats: bool
426
- ) -> List[str]:
427
- """Generates a directory tree structure from a list of *matched* file paths using the given style."""
428
- tree_dict: Dict[str, Any] = {}
429
- matched_paths = {p.relative_to(root_dir) for p in file_paths}
430
- for rel_path in matched_paths:
431
- parts = rel_path.parts
432
- current_level = tree_dict
433
- for part in parts:
434
- current_level = current_level.setdefault(part, {})
435
- tree_lines: List[str] = []
436
-
437
- def format_dir_name_search(name: str, matched_files: int, matched_dirs: int) -> str:
438
- if not show_stats:
439
- return name
440
-
441
- stats_str = f" [M: {matched_files}f, {matched_dirs}d]"
442
- return name + stats_str
443
-
444
- def build_lines(d: Dict[str, Any], prefix: str):
445
- items = sorted(d.keys(), key=lambda k: (len(d[k]) == 0, k.lower()))
446
- num_children = len(items)
447
- matched_files_in_level = sum(1 for k in items if not d[k])
448
- matched_dirs_in_level = sum(1 for k in items if d[k])
449
- if not prefix:
450
- tree_lines.append(
451
- format_dir_name_search(
452
- root_dir.name, matched_files_in_level, matched_dirs_in_level
453
- )
454
- )
660
+ def build_lines_recursive(d: Dict, prefix: str = ""):
661
+ items = sorted(d.keys(), key=lambda k: (not d[k], k.lower()))
455
662
  for i, name in enumerate(items):
456
- is_last = i == num_children - 1
457
- connector = style.l_connector if is_last else style.t_connector
458
- entry_name = name
459
- if d[name]:
460
- child_matched_files = sum(1 for k in d[name] if not d[name][k])
461
- child_matched_dirs = sum(1 for k in d[name] if d[name][k])
462
- entry_name = format_dir_name_search(
463
- name, child_matched_files, child_matched_dirs
464
- )
465
- tree_lines.append(prefix + connector + entry_name)
663
+ is_last = i == len(items) - 1
664
+ connector = style[1] if is_last else style[0]
665
+ display_name = name
666
+
466
667
  if d[name]:
467
- extension = style.h_spacer if is_last else style.v_connector
468
- build_lines(d[name], prefix + extension)
668
+ if show_stats:
669
+ files, dirs = count_children(d[name])
670
+ display_name += f" [dim][M: {files}f, {dirs}d][/dim]"
469
671
 
470
- build_lines(tree_dict, "")
471
- return tree_lines
672
+ lines.append(f"{prefix}{connector}{display_name}")
673
+
674
+ if d[name]:
675
+ extension = style[3] if is_last else style[2]
676
+ build_lines_recursive(d[name], prefix + extension)
472
677
 
678
+ root_name = f"[bold cyan]{root_dir.name}[/bold cyan]"
679
+ if show_stats:
680
+ files, dirs = count_children(tree_dict)
681
+ root_name += f" [dim][M: {files}f, {dirs}d][/dim]"
682
+ lines.append(root_name)
473
683
 
474
- # --- Collation and Main Modes ---
684
+ build_lines_recursive(tree_dict)
685
+ return lines
475
686
 
476
687
 
477
688
  def _collate_content_to_file(
478
- output_file_path_str: str,
479
- tree_content_lines: Optional[List[str]],
480
- files_to_process: List[FileToProcess],
481
- encoding: str,
482
- separator_char: str,
483
- separator_line_len: int,
484
- show_token_count: bool,
689
+ output_path: Path,
690
+ tree_lines: List,
691
+ files: List[FileToProcess],
485
692
  show_tree_stats: bool,
486
- mode: ProjectMode,
487
- ) -> None:
693
+ show_token_count: bool,
694
+ exclude_whitespace: bool,
695
+ progress: Any,
696
+ task_id: Any,
697
+ ) -> Tuple[float, int]:
488
698
  """
489
- Collates content to a string buffer, calculates token count,
490
- and then writes to the output file.
699
+ Collates the file tree and file contents into a single output file.
700
+
701
+ Args:
702
+ output_path (Path): The path to the final output file.
703
+ tree_lines (List): The generated file tree lines.
704
+ files (List[FileToProcess]): The files whose content needs to be collated.
705
+ show_tree_stats (bool): Whether to include the stats key in the header.
706
+ show_token_count (bool): Whether to calculate and include the token count.
707
+ exclude_whitespace (bool): If True, exclude whitespace from token counting.
708
+ progress (Any): The progress bar object.
709
+ task_id (Any): The ID of the collation task on the progress bar.
710
+
711
+ Returns:
712
+ Tuple[float, int]: A tuple containing the total bytes written and the token count.
491
713
  """
492
- output_file_path = Path(output_file_path_str).resolve()
493
- output_file_path.parent.mkdir(parents=True, exist_ok=True)
494
- separator_line = separator_char * separator_line_len
714
+ output_path.parent.mkdir(parents=True, exist_ok=True)
715
+ buffer, total_bytes, token_count = StringIO(), 0, 0
495
716
 
496
- # Use an in-memory buffer to build the output first
497
- buffer = StringIO()
498
-
499
- if tree_content_lines:
500
- buffer.write(f"{TREE_HEADER_TEXT}\n{separator_line}\n\n")
501
- stats_key = ""
717
+ if tree_lines:
718
+ buffer.write(f"{TREE_HEADER_TEXT}\n" + "-" * 80 + "\n\n")
502
719
  if show_tree_stats:
503
- if mode == ProjectMode.FILTER:
504
- stats_key = (
505
- "Key: [I: Included f/d | T: Total f/d in original dir]\n"
506
- " (f=files, d=directories)\n\n"
507
- )
508
- else: # ProjectMode.SEARCH
509
- stats_key = (
510
- "Key: [M: Matched files/dirs]\n" " (f=files, d=directories)\n\n"
511
- )
512
- buffer.write(stats_key)
513
- tree_content = "\n".join(tree_content_lines)
514
- buffer.write(tree_content + "\n")
515
- buffer.write(f"\n{separator_line}\n\n")
516
-
517
- # This message is for the file content, not the console.
518
- if not files_to_process:
519
- message = (
520
- "No files found matching the specified criteria.\n"
521
- if mode == ProjectMode.SEARCH
522
- else "No files found matching the specified criteria for content aggregation.\n"
523
- )
524
- buffer.write(message)
525
- else:
526
- for file_info in files_to_process:
527
- header_content = f"{separator_line}\n{FILE_HEADER_PREFIX}{file_info.relative_path_posix}\n{separator_line}\n\n"
528
- buffer.write(header_content)
529
- try:
530
- with open(
531
- file_info.absolute_path, "r", encoding=encoding, errors="replace"
532
- ) as infile:
533
- file_content = infile.read()
534
- buffer.write(file_content)
535
- buffer.write("\n\n")
536
- except Exception:
537
- buffer.write(
538
- f"Error: Could not read file '{file_info.relative_path_posix}'.\n\n"
539
- )
540
-
541
- # Get the complete content from the buffer
542
- final_content = buffer.getvalue()
543
- total_token_count = 0
544
- mode_display = "Characters" if TOKEN_APPROX_MODE == "CHAR_COUNT" else "Words"
545
-
546
- if show_token_count:
547
- if TOKEN_APPROX_MODE == "CHAR_COUNT":
548
- total_token_count = len(final_content)
549
- elif TOKEN_APPROX_MODE == "WORD_COUNT":
550
- total_token_count = len(final_content.split())
551
-
552
- # Now, write everything to the actual file
553
- try:
554
- with open(output_file_path, "w", encoding=encoding) as outfile:
555
- if show_token_count:
556
- # Add the token count at the top of the file as requested
557
- outfile.write(f"Token Count ({mode_display}): {total_token_count}\n\n")
558
-
559
- # Write the main content
560
- outfile.write(final_content)
561
- except IOError as e:
562
- print(f"\nError: Could not write to output file '{output_file_path}': {e}")
563
- return
720
+ buffer.write(
721
+ "Key: [M: Matched files/dirs]\n (f=files, d=directories)\n\n"
722
+ )
564
723
 
565
- # Final console output for user feedback
566
- if mode == ProjectMode.SEARCH:
567
- if files_to_process:
568
- print("\nSuccess! Collation complete.")
569
- else: # Filter mode has its own messaging pattern
570
- print(f"\nProcess complete. Output written to: {output_file_path}")
571
- if len(files_to_process) > 0:
572
- print(
573
- f"Summary: {len(files_to_process)} files selected for content processing."
724
+ if RICH_AVAILABLE:
725
+ content = "\n".join(Text.from_markup(line).plain for line in tree_lines)
726
+ else:
727
+ content = "\n".join(tree_lines)
728
+ buffer.write(content + "\n\n")
729
+
730
+ for file_info in files:
731
+ if progress:
732
+ progress.update(
733
+ task_id,
734
+ advance=1,
735
+ description=f"Collating [green]{file_info.relative_path_posix}[/green]",
574
736
  )
737
+ buffer.write(f"{'-'*80}\nFILE: {file_info.relative_path_posix}\n{'-'*80}\n\n")
738
+ try:
739
+ content = file_info.absolute_path.read_text(
740
+ encoding=DEFAULT_ENCODING, errors="replace"
741
+ )
742
+ buffer.write(content + "\n\n")
743
+ total_bytes += len(content.encode(DEFAULT_ENCODING))
744
+ except Exception as e:
745
+ buffer.write(f"Error: Could not read file. Issue: {e}\n\n")
575
746
 
747
+ final_content = buffer.getvalue()
576
748
  if show_token_count:
577
- print(f"Total Approximated Tokens ({mode_display}): {total_token_count}")
578
-
579
-
580
- def filter_and_append_content(
581
- root_dir: Path,
582
- output_file_path_str: str,
583
- tree_style: TreeStyle,
584
- generate_tree: bool,
585
- file_types: Optional[List[str]],
586
- whitelist_substrings_in_filename: Optional[List[str]],
587
- ignore_substrings_in_filename: Optional[List[str]],
588
- ignore_dirs_in_path: Optional[List[str]],
589
- language_presets: Optional[List[LanguagePreset]],
590
- ignore_presets: Optional[List[IgnorePreset]],
591
- encoding: str,
592
- separator_char: str,
593
- separator_line_len: int,
594
- show_token_count: bool,
595
- show_tree_stats: bool,
596
- ) -> None:
597
- """FILTER MODE: Selects files based on explicit criteria and prepares content/tree."""
598
- criteria = FilterCriteria.normalize_inputs(
599
- file_types,
600
- whitelist_substrings_in_filename,
601
- ignore_substrings_in_filename,
602
- ignore_dirs_in_path,
603
- language_presets,
604
- ignore_presets,
605
- )
606
- tree_content_lines: Optional[List[str]] = (
607
- _generate_tree_lines(root_dir, criteria, tree_style, show_tree_stats)
608
- if generate_tree
609
- else None
610
- )
611
- files_to_process: List[FileToProcess] = []
612
- for dirpath_str, dirnames, filenames in os.walk(str(root_dir), topdown=True):
613
- current_dir_path = Path(dirpath_str)
614
- orig_dirnames = list(dirnames)
615
- dirnames[:] = []
616
- for d_name in orig_dirnames:
617
- dir_abs_path = current_dir_path / d_name
618
- if _should_include_entry(dir_abs_path, root_dir, criteria, is_dir=True):
619
- dirnames.append(d_name)
620
- for filename in filenames:
621
- file_abs_path = current_dir_path / filename
622
- if _should_include_entry(file_abs_path, root_dir, criteria, is_dir=False):
623
- files_to_process.append(
624
- FileToProcess(
625
- file_abs_path, file_abs_path.relative_to(root_dir).as_posix()
626
- )
627
- )
628
- files_to_process.sort(key=lambda f_info: f_info.relative_path_posix.lower())
629
- _collate_content_to_file(
630
- output_file_path_str,
631
- tree_content_lines,
632
- files_to_process,
633
- encoding,
634
- separator_char,
635
- separator_line_len,
636
- show_token_count,
637
- show_tree_stats,
638
- ProjectMode.FILTER,
639
- )
640
-
641
-
642
- def search_and_collate_content(
643
- root_dir: Path,
644
- sub_string_match: List[str],
645
- output_file: str,
646
- tree_style: TreeStyle,
647
- file_extensions_to_check: Optional[List[str]],
648
- ignore_substrings_in_path: Optional[List[str]],
649
- language_presets: Optional[List[LanguagePreset]],
650
- ignore_presets: Optional[List[IgnorePreset]],
651
- search_file_contents: bool,
652
- max_workers: Optional[int],
653
- full_path_compare: bool,
654
- show_token_count: bool,
655
- show_tree_stats: bool,
656
- ) -> None:
657
- """SEARCH MODE: Scans for files that match a substring in their path/name or content."""
658
- criteria = FilterCriteria.normalize_inputs(
659
- file_extensions_to_check,
660
- None,
661
- None,
662
- ignore_substrings_in_path,
663
- language_presets,
664
- ignore_presets,
665
- )
666
- normalized_keywords = [
667
- sub.lower().strip() for sub in sub_string_match if sub.strip()
668
- ]
669
- if not normalized_keywords:
670
- print("Error: Search mode requires 'search_keywords' to be provided.")
671
- return
672
-
673
- print("Phase 1: Finding all matching files...")
674
- if criteria.ignore_path_components:
675
- print(
676
- f"Ignoring directories and files containing: {', '.join(criteria.ignore_path_components)}"
749
+ content_for_count = (
750
+ re.sub(r"\s", "", final_content) if exclude_whitespace else final_content
677
751
  )
752
+ token_count = len(content_for_count)
678
753
 
679
- candidate_files: List[Path] = []
680
- for dirpath_str, dirnames, filenames in os.walk(str(root_dir), topdown=True):
681
- current_dir_path = Path(dirpath_str)
682
- # Prune directories based on ignore criteria
683
- dirnames[:] = [
684
- d
685
- for d in dirnames
686
- if (current_dir_path / d).name.lower()
687
- not in criteria.ignore_path_components
688
- ]
754
+ with output_path.open("w", encoding=DEFAULT_ENCODING) as outfile:
755
+ if show_token_count:
756
+ mode = "chars, no whitespace" if exclude_whitespace else "characters"
757
+ outfile.write(f"Token Count ({mode}): {token_count}\n\n")
758
+ outfile.write(final_content)
689
759
 
690
- for filename in filenames:
691
- file_abs_path = current_dir_path / filename
692
- # Also ignore individual files based on path components
693
- try:
694
- relative_parts = file_abs_path.relative_to(root_dir).parts
695
- if any(
696
- part.lower() in criteria.ignore_path_components
697
- for part in relative_parts
698
- ):
699
- continue
700
- except ValueError:
701
- continue
760
+ return total_bytes, token_count
702
761
 
703
- if (
704
- not criteria.file_extensions
705
- or file_abs_path.suffix.lower() in criteria.file_extensions
706
- ):
707
- candidate_files.append(file_abs_path)
708
762
 
709
- matched_files: Set[Path] = set()
710
- with ThreadPoolExecutor(max_workers=max_workers) as executor:
711
- future_to_file = {
712
- executor.submit(
713
- process_file_for_search,
714
- file,
715
- normalized_keywords,
716
- search_file_contents,
717
- full_path_compare,
718
- ): file
719
- for file in candidate_files
720
- }
721
- progress_bar = tqdm(
722
- as_completed(future_to_file),
723
- total=len(candidate_files),
724
- unit="file",
725
- desc="Scanning",
726
- )
727
- for future in progress_bar:
728
- result = future.result()
729
- if result:
730
- matched_files.add(result)
731
-
732
- if not matched_files:
733
- print("\nScan complete. No matching files were found.")
734
- # Still create the output file with a "not found" message
735
- with open(output_file, "w", encoding=DEFAULT_ENCODING) as f_out:
736
- f_out.write("No files found matching the specified criteria.\n")
763
+ # --- Main Entry Point ---
764
+ def generate_snapshot(
765
+ root_directory: str = ".",
766
+ output_file_name: str = "project_snapshot.txt",
767
+ search_keywords: Optional[List[str]] = None,
768
+ file_extensions: Optional[List[str]] = None,
769
+ ignore_if_in_path: Optional[List[str]] = None,
770
+ ignore_extensions: Optional[List[str]] = None,
771
+ language_presets: Optional[List[LanguagePreset]] = None,
772
+ ignore_presets: Optional[List[IgnorePreset]] = None,
773
+ search_file_contents: bool = True,
774
+ full_path_compare: bool = True,
775
+ max_workers: Optional[int] = None,
776
+ generate_tree: bool = True,
777
+ show_tree_stats: bool = False,
778
+ show_token_count: bool = False,
779
+ exclude_whitespace_in_token_count: bool = False,
780
+ read_binary_files: bool = False,
781
+ ) -> None:
782
+ """
783
+ Orchestrates the entire process of scanning, filtering, and collating project files.
784
+
785
+ This function serves as the main entry point for the utility. It can be used
786
+ to create a full "snapshot" of a project's source code or to search for
787
+ specific keywords within file names and/or contents. It is highly configurable
788
+ through presets and manual overrides.
789
+
790
+ Args:
791
+ root_directory (str): The starting directory for the scan. Defaults to ".".
792
+ output_file_name (str): The name of the file to save the results to.
793
+ Defaults to "project_snapshot.txt".
794
+ search_keywords (List[str], optional): A list of keywords to search for. If
795
+ None or empty, the function runs in "snapshot" mode, including all
796
+ files that match the other criteria. Defaults to None.
797
+ file_extensions (List[str], optional): A list of specific file
798
+ extensions to include (e.g., [".py", ".md"]). Defaults to None.
799
+ ignore_if_in_path (List[str], optional): A list of directory or file
800
+ names to exclude from the scan. Defaults to None.
801
+ ignore_extensions (List[str], optional): A list of file extensions to
802
+ explicitly ignore (e.g., [".log", ".tmp"]). Defaults to None.
803
+ language_presets (List[LanguagePreset], optional): A list of LanguagePreset
804
+ enums for common file types (e.g., [LanguagePreset.PYTHON]). Defaults to None.
805
+ ignore_presets (List[IgnorePreset], optional): A list of IgnorePreset enums
806
+ for common ignore patterns (e.g., [IgnorePreset.PYTHON]). Defaults to None.
807
+ search_file_contents (bool): If True, search for keywords within file
808
+ contents. Defaults to True.
809
+ full_path_compare (bool): If True, search for keywords in the full file path,
810
+ not just the filename. Defaults to True.
811
+ max_workers (Optional[int]): The maximum number of worker threads for
812
+ concurrent processing. Defaults to CPU count + 4.
813
+ generate_tree (bool): If True, a file tree of the matched files will be
814
+ included at the top of the output file. Defaults to True.
815
+ show_tree_stats (bool): If True, display file and directory counts in the
816
+ generated tree. Defaults to False.
817
+ show_token_count (bool): If True, display an approximated token count in the
818
+ summary and output file. Defaults to False.
819
+ exclude_whitespace_in_token_count (bool): If True, whitespace is removed
820
+ before counting tokens, giving a more compact count. Defaults to False.
821
+ read_binary_files (bool): If True, the content search will attempt to read
822
+ and search through binary files. Defaults to False.
823
+ """
824
+ console, start_time = ConsoleManager(), time.perf_counter()
825
+ root_dir = Path(root_directory or ".").resolve()
826
+ if not root_dir.is_dir():
827
+ console.log(f"Error: Root directory '{root_dir}' not found.", style="bold red")
737
828
  return
738
829
 
739
- sorted_matched_files = sorted(
740
- list(matched_files), key=lambda p: p.relative_to(root_dir).as_posix().lower()
830
+ keywords = [k.lower().strip() for k in search_keywords or [] if k.strip()]
831
+ snapshot_mode = not keywords
832
+ criteria = FilterCriteria.normalize_inputs(
833
+ file_types=file_extensions,
834
+ ignore_if_in_path=ignore_if_in_path,
835
+ ignore_extensions=ignore_extensions,
836
+ lang_presets=language_presets,
837
+ ignore_presets=ignore_presets,
741
838
  )
742
839
 
743
- print(f"\nPhase 1 Complete: Found {len(sorted_matched_files)} matching files.")
744
- print(f"\nPhase 2: Generating output file at '{Path(output_file).resolve()}'...")
745
-
746
- tree_content_lines = _generate_tree_from_paths(
747
- root_dir, sorted_matched_files, tree_style, show_tree_stats
748
- )
749
- files_to_process = [
750
- FileToProcess(f, f.relative_to(root_dir).as_posix())
751
- for f in sorted_matched_files
840
+ config_rows = [
841
+ ["Root Directory", str(root_dir)],
842
+ ["File Types", ", ".join(criteria.file_extensions) or "All"],
843
+ ["Ignore Paths", ", ".join(criteria.ignore_if_in_path) or "None"],
844
+ ["Ignore Extensions", ", ".join(criteria.ignore_extensions) or "None"],
845
+ ["Generate Tree", "[green]Yes[/green]" if generate_tree else "[red]No[/red]"],
752
846
  ]
753
- _collate_content_to_file(
754
- output_file,
755
- tree_content_lines,
756
- files_to_process,
757
- DEFAULT_ENCODING,
758
- DEFAULT_SEPARATOR_CHAR,
759
- DEFAULT_SEPARATOR_LINE_LENGTH,
760
- show_token_count,
761
- show_tree_stats,
762
- ProjectMode.SEARCH,
763
- )
764
-
765
-
766
- # --- DECONSTRUCTION FUNCTION ---
767
-
768
-
769
- def deconstruct_snapshot(snapshot_file_path: str) -> Dict[str, Any]:
770
- """Scans a compiled snapshot file, extracts the directory tree lines and file paths."""
771
- snapshot_path = Path(snapshot_file_path)
772
- if not snapshot_path.is_file():
773
- raise FileNotFoundError(f"Snapshot file not found: {snapshot_file_path}")
774
- tree_lines: List[str] = []
775
- file_paths: List[str] = []
776
- separator_pattern = re.compile(
777
- r"^[{}]{{4,}}[{}|]*$".format(
778
- re.escape(DEFAULT_SEPARATOR_CHAR), re.escape(DEFAULT_SEPARATOR_CHAR)
847
+ if generate_tree:
848
+ config_rows.append(
849
+ ["Tree Stats", "[green]Yes[/green]" if show_tree_stats else "[red]No[/red]"]
779
850
  )
851
+ config_rows.append(
852
+ [
853
+ "Show Token Count",
854
+ "[green]Yes[/green]" if show_token_count else "[red]No[/red]",
855
+ ]
780
856
  )
781
- state = "LOOKING_FOR_TREE"
782
- with open(snapshot_path, "r", encoding=DEFAULT_ENCODING, errors="replace") as f:
783
- for line in f:
784
- line = line.strip()
785
- if state == "LOOKING_FOR_TREE":
786
- if line == TREE_HEADER_TEXT:
787
- state = "READING_TREE"
788
- elif state == "READING_TREE":
789
- if not line or separator_pattern.match(line):
790
- if tree_lines and separator_pattern.match(line):
791
- state = "LOOKING_FOR_CONTENT"
792
- continue
793
- if state == "READING_TREE" and not line.startswith("Key:"):
794
- tree_lines.append(line)
795
- elif state == "LOOKING_FOR_CONTENT":
796
- if line.startswith(FILE_HEADER_PREFIX):
797
- file_paths.append(line[len(FILE_HEADER_PREFIX) :].strip())
798
- state = "READING_CONTENT"
799
- elif state == "READING_CONTENT":
800
- if line.startswith(FILE_HEADER_PREFIX):
801
- file_paths.append(line[len(FILE_HEADER_PREFIX) :].strip())
802
- # Post-process to remove the key lines if they were accidentally captured
803
- tree_lines = [
804
- line
805
- for line in tree_lines
806
- if not line.strip().startswith("Key:")
807
- and not line.strip().startswith("(f=files")
808
- ]
809
- return {"tree_lines": tree_lines, "file_paths": file_paths}
810
-
811
-
812
- # --- UNIFIED ENTRY POINT AND UTILITY WRAPPERS ---
813
-
857
+ if show_token_count:
858
+ config_rows.append(
859
+ [
860
+ "Exclude Whitespace",
861
+ (
862
+ "[green]Yes[/green]"
863
+ if exclude_whitespace_in_token_count
864
+ else "[red]No[/red]"
865
+ ),
866
+ ]
867
+ )
814
868
 
815
- def process_project(
816
- root_dir_param: Optional[str] = None,
817
- output_file_name: str = "project_output.txt",
818
- mode: ProjectMode = ProjectMode.FILTER,
819
- file_types: Optional[List[str]] = None,
820
- ignore_dirs_in_path: Optional[List[str]] = None,
821
- language_presets: Optional[List[LanguagePreset]] = None,
822
- ignore_presets: Optional[List[IgnorePreset]] = None,
823
- whitelist_filename_substrings: Optional[List[str]] = None,
824
- ignore_filename_substrings: Optional[List[str]] = None,
825
- generate_tree: bool = True,
826
- search_keywords: Optional[List[str]] = None,
827
- search_file_contents: bool = False,
828
- full_path_compare: bool = True,
829
- max_workers: Optional[int] = None,
830
- tree_style_preset: TreeStylePreset = TreeStylePreset.UNICODE,
831
- tree_style_t_connector: Optional[str] = None,
832
- tree_style_l_connector: Optional[str] = None,
833
- tree_style_v_connector: Optional[str] = None,
834
- tree_style_h_spacer: Optional[str] = None,
835
- show_token_count: bool = False,
836
- show_tree_stats: bool = False,
837
- encoding: str = DEFAULT_ENCODING,
838
- separator_char: str = DEFAULT_SEPARATOR_CHAR,
839
- separator_line_len: int = DEFAULT_SEPARATOR_LINE_LENGTH,
840
- ) -> None:
841
- """Main function to process a project directory in either FILTER or SEARCH mode."""
842
- actual_root_dir = validate_root_directory(root_dir_param)
843
- if actual_root_dir is None:
844
- sys.exit(1)
845
- style = tree_style_preset.to_style()
846
- final_style = TreeStyle(
847
- t_connector=tree_style_t_connector or style.t_connector,
848
- l_connector=tree_style_l_connector or style.l_connector,
849
- v_connector=tree_style_v_connector or style.v_connector,
850
- h_spacer=tree_style_h_spacer or style.h_spacer,
851
- )
852
- print(f"--- Starting Project Processing in {mode.name} Mode ---")
853
- if mode == ProjectMode.FILTER:
854
- filter_and_append_content(
855
- actual_root_dir,
856
- output_file_name,
857
- final_style,
858
- generate_tree,
859
- file_types,
860
- whitelist_filename_substrings,
861
- ignore_filename_substrings,
862
- ignore_dirs_in_path,
863
- language_presets,
864
- ignore_presets,
865
- encoding,
866
- separator_char,
867
- separator_line_len,
868
- show_token_count,
869
- show_tree_stats,
869
+ if snapshot_mode:
870
+ config_rows.insert(1, ["Mode", "[bold blue]Snapshot[/bold blue]"])
871
+ else:
872
+ config_rows.insert(1, ["Mode", "[bold yellow]Search[/bold yellow]"])
873
+ config_rows.insert(
874
+ 2, ["Search Keywords", f"[yellow]{', '.join(keywords)}[/yellow]"]
875
+ )
876
+ config_rows.append(
877
+ [
878
+ "Search Content",
879
+ "[green]Yes[/green]" if search_file_contents else "[red]No[/red]",
880
+ ]
870
881
  )
871
- elif mode == ProjectMode.SEARCH:
872
- if not search_keywords:
873
- print("Error: Search mode requires 'search_keywords' to be provided.")
874
- return
875
- search_and_collate_content(
876
- actual_root_dir,
877
- search_keywords,
878
- output_file_name,
879
- final_style,
880
- file_types,
881
- ignore_dirs_in_path,
882
- language_presets,
883
- ignore_presets,
884
- search_file_contents,
885
- max_workers,
886
- full_path_compare,
887
- show_token_count,
888
- show_tree_stats,
882
+ config_rows.append(
883
+ [
884
+ "Read Binary Files",
885
+ "[green]Yes[/green]" if read_binary_files else "[red]No[/red]",
886
+ ]
889
887
  )
890
- print("--- Script Execution Finished ---")
888
+ console.print_table(
889
+ "Project Scan Configuration", ["Parameter", "Value"], config_rows
890
+ )
891
891
 
892
+ @contextmanager
893
+ def progress_manager():
894
+ if RICH_AVAILABLE:
895
+ progress = Progress(
896
+ TextColumn("[progress.description]{task.description}"),
897
+ BarColumn(),
898
+ TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
899
+ SpinnerColumn(),
900
+ TimeElapsedColumn(),
901
+ "{task.fields[status]}",
902
+ expand=True,
903
+ )
904
+ with Live(progress, console=console.console, refresh_per_second=10) as live:
905
+ yield progress
906
+ else:
907
+ with FallbackProgress() as progress:
908
+ yield progress
909
+
910
+ with progress_manager() as progress:
911
+ discover_task = progress.add_task("Discovering files", total=None, status="")
912
+ candidate_files = _discover_files(root_dir, criteria, progress, discover_task)
913
+ if RICH_AVAILABLE:
914
+ progress.update(
915
+ discover_task,
916
+ description=f"Discovered [bold green]{len(candidate_files)}[/bold green] candidates",
917
+ status="",
918
+ )
919
+ else:
920
+ progress.update(
921
+ discover_task,
922
+ description=f"Discovered {len(candidate_files)} candidates",
923
+ )
892
924
 
893
- def filter_project(
894
- root_dir_param: Optional[str] = None,
895
- output_file_name: str = "project_filter_output.txt",
896
- file_types: Optional[List[str]] = None,
897
- ignore_dirs_in_path: Optional[List[str]] = None,
898
- language_presets: Optional[List[LanguagePreset]] = None,
899
- ignore_presets: Optional[List[IgnorePreset]] = None,
900
- whitelist_filename_substrings: Optional[List[str]] = None,
901
- ignore_filename_substrings: Optional[List[str]] = None,
902
- generate_tree: bool = True,
903
- tree_style_preset: TreeStylePreset = TreeStylePreset.UNICODE,
904
- tree_style_t_connector: Optional[str] = None,
905
- tree_style_l_connector: Optional[str] = None,
906
- tree_style_v_connector: Optional[str] = None,
907
- tree_style_h_spacer: Optional[str] = None,
908
- show_token_count: bool = False,
909
- show_tree_stats: bool = False,
910
- encoding: str = DEFAULT_ENCODING,
911
- separator_char: str = DEFAULT_SEPARATOR_CHAR,
912
- separator_line_len: int = DEFAULT_SEPARATOR_LINE_LENGTH,
913
- ) -> None:
914
- """Utility wrapper for process_project in FILTER mode."""
915
- process_project(
916
- root_dir_param=root_dir_param,
917
- output_file_name=output_file_name,
918
- mode=ProjectMode.FILTER,
919
- file_types=file_types,
920
- ignore_dirs_in_path=ignore_dirs_in_path,
921
- language_presets=language_presets,
922
- ignore_presets=ignore_presets,
923
- whitelist_filename_substrings=whitelist_filename_substrings,
924
- ignore_filename_substrings=ignore_filename_substrings,
925
- generate_tree=generate_tree,
926
- tree_style_preset=tree_style_preset,
927
- tree_style_t_connector=tree_style_t_connector,
928
- tree_style_l_connector=tree_style_l_connector,
929
- tree_style_v_connector=tree_style_v_connector,
930
- tree_style_h_spacer=tree_style_h_spacer,
931
- show_token_count=show_token_count,
932
- show_tree_stats=show_tree_stats,
933
- encoding=encoding,
934
- separator_char=separator_char,
935
- separator_line_len=separator_line_len,
936
- )
925
+ matched_files = set()
926
+ if candidate_files:
927
+ if snapshot_mode:
928
+ matched_files = set(candidate_files)
929
+ if RICH_AVAILABLE:
930
+ progress.add_task(
931
+ "[dim]Keyword Processing[/dim]",
932
+ total=1,
933
+ completed=1,
934
+ status="[bold blue](Snapshot Mode)[/bold blue]",
935
+ )
936
+ else:
937
+ process_task = progress.add_task(
938
+ f"Processing {len(candidate_files)} files",
939
+ total=len(candidate_files),
940
+ status="",
941
+ )
942
+ matched_files = _process_files_concurrently(
943
+ candidate_files,
944
+ keywords,
945
+ search_file_contents,
946
+ full_path_compare,
947
+ max_workers,
948
+ progress,
949
+ process_task,
950
+ read_binary_files,
951
+ )
937
952
 
953
+ output_path, total_bytes, token_count = None, 0, 0
954
+ if matched_files:
955
+ sorted_files = sorted(
956
+ list(matched_files), key=lambda p: p.relative_to(root_dir).as_posix()
957
+ )
958
+ tree_lines = []
959
+ if generate_tree:
960
+ tree_task = progress.add_task(
961
+ "Generating file tree...", total=1, status=""
962
+ )
963
+ tree_lines = _generate_tree_with_stats(
964
+ root_dir, sorted_files, show_tree_stats
965
+ )
966
+ progress.update(
967
+ tree_task, completed=1, description="Generated file tree"
968
+ )
938
969
 
939
- def find_in_project(
940
- root_dir_param: Optional[str] = None,
941
- output_file_name: str = "project_search_output.txt",
942
- search_keywords: Optional[List[str]] = None,
943
- file_extensions_to_check: Optional[List[str]] = None,
944
- ignore_dirs_in_path: Optional[List[str]] = None,
945
- language_presets: Optional[List[LanguagePreset]] = None,
946
- ignore_presets: Optional[List[IgnorePreset]] = None,
947
- search_file_contents: bool = False,
948
- full_path_compare: bool = True,
949
- max_workers: Optional[int] = None,
950
- tree_style_preset: TreeStylePreset = TreeStylePreset.UNICODE,
951
- tree_style_t_connector: Optional[str] = None,
952
- tree_style_l_connector: Optional[str] = None,
953
- tree_style_v_connector: Optional[str] = None,
954
- tree_style_h_spacer: Optional[str] = None,
955
- show_token_count: bool = False,
956
- show_tree_stats: bool = False,
957
- encoding: str = DEFAULT_ENCODING,
958
- separator_char: str = DEFAULT_SEPARATOR_CHAR,
959
- separator_line_len: int = DEFAULT_SEPARATOR_LINE_LENGTH,
960
- ) -> None:
961
- """Utility wrapper for process_project in SEARCH mode."""
962
- if not search_keywords:
963
- print("Error: 'search_keywords' must be provided for find_in_project.")
964
- return
965
- process_project(
966
- root_dir_param=root_dir_param,
967
- output_file_name=output_file_name,
968
- mode=ProjectMode.SEARCH,
969
- file_types=file_extensions_to_check,
970
- ignore_dirs_in_path=ignore_dirs_in_path,
971
- language_presets=language_presets,
972
- ignore_presets=ignore_presets,
973
- search_keywords=search_keywords,
974
- search_file_contents=search_file_contents,
975
- full_path_compare=full_path_compare,
976
- max_workers=max_workers,
977
- tree_style_preset=tree_style_preset,
978
- tree_style_t_connector=tree_style_t_connector,
979
- tree_style_l_connector=tree_style_l_connector,
980
- tree_style_v_connector=tree_style_v_connector,
981
- tree_style_h_spacer=tree_style_h_spacer,
982
- show_token_count=show_token_count,
983
- show_tree_stats=show_tree_stats,
984
- encoding=encoding,
985
- separator_char=separator_char,
986
- separator_line_len=separator_line_len,
987
- )
970
+ collate_task = progress.add_task(
971
+ f"Collating {len(sorted_files)} files",
972
+ total=len(sorted_files),
973
+ status="",
974
+ )
975
+ files_to_process = [
976
+ FileToProcess(f, f.relative_to(root_dir).as_posix())
977
+ for f in sorted_files
978
+ ]
979
+ output_path = Path(output_file_name).resolve()
980
+ total_bytes, token_count = _collate_content_to_file(
981
+ output_path,
982
+ tree_lines,
983
+ files_to_process,
984
+ show_tree_stats,
985
+ show_token_count,
986
+ exclude_whitespace_in_token_count,
987
+ progress,
988
+ collate_task,
989
+ )
988
990
 
991
+ end_time = time.perf_counter()
992
+ summary_rows = [
993
+ ["Candidate Files", f"{len(candidate_files)}"],
994
+ ["Files Matched", f"[bold green]{len(matched_files)}[/bold green]"],
995
+ ["Total Time", f"{end_time - start_time:.2f} seconds"],
996
+ ["Output Size", f"{total_bytes / 1024:.2f} KB"],
997
+ ]
998
+ if show_token_count:
999
+ summary_rows.append(["Approximated Tokens", f"{token_count:,}"])
1000
+ summary_rows.append(["Output File", str(output_path or "N/A")])
1001
+ console.print_table("Scan Complete", ["Metric", "Value"], summary_rows)
989
1002
 
990
- __all__ = [
991
- "process_project",
992
- "filter_project",
993
- "find_in_project",
994
- "deconstruct_snapshot",
995
- "ProjectMode",
996
- "LanguagePreset",
997
- "IgnorePreset",
998
- "TreeStylePreset",
999
- ]
1000
1003
 
1001
1004
  if __name__ == "__main__":
1002
- # --- Example: Scan with Custom Filters and the New Readable Stats ---
1003
- print("\n--- Running a custom filter scan with new stats format ---")
1004
- filter_project(
1005
- root_dir_param=".",
1006
- output_file_name="custom_snapshot_readable.txt",
1007
- file_types=[".py", "requirements.txt", ".sql", ".md"],
1008
- ignore_dirs_in_path=["venv", "build", "node_modules", "static", "templates"],
1005
+ generate_snapshot(
1006
+ root_directory=".",
1007
+ output_file_name="project_snapshot_final.txt",
1008
+ # No search keywords triggers Snapshot Mode
1009
+ language_presets=[LanguagePreset.PYTHON],
1010
+ ignore_presets=[
1011
+ IgnorePreset.PYTHON,
1012
+ IgnorePreset.BUILD_ARTIFACTS,
1013
+ IgnorePreset.VERSION_CONTROL,
1014
+ IgnorePreset.NODE_JS,
1015
+ IgnorePreset.IDE_METADATA,
1016
+ ],
1017
+ ignore_extensions=[".log", ".tmp"], # Example of new functionality
1018
+ generate_tree=True,
1009
1019
  show_tree_stats=True,
1010
1020
  show_token_count=True,
1021
+ exclude_whitespace_in_token_count=True,
1011
1022
  )