kopipasta 0.2.0__tar.gz → 0.45.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. kopipasta-0.45.0/PKG-INFO +161 -0
  2. kopipasta-0.45.0/README.md +133 -0
  3. kopipasta-0.45.0/kopipasta/cache.py +40 -0
  4. kopipasta-0.45.0/kopipasta/file.py +322 -0
  5. kopipasta-0.45.0/kopipasta/import_parser.py +356 -0
  6. kopipasta-0.45.0/kopipasta/main.py +265 -0
  7. kopipasta-0.45.0/kopipasta/ops.py +450 -0
  8. kopipasta-0.45.0/kopipasta/patcher.py +447 -0
  9. kopipasta-0.45.0/kopipasta/prompt.py +293 -0
  10. kopipasta-0.45.0/kopipasta/tree_selector.py +834 -0
  11. kopipasta-0.45.0/kopipasta.egg-info/PKG-INFO +161 -0
  12. kopipasta-0.45.0/kopipasta.egg-info/SOURCES.txt +27 -0
  13. kopipasta-0.45.0/kopipasta.egg-info/requires.txt +6 -0
  14. kopipasta-0.45.0/requirements.txt +6 -0
  15. {kopipasta-0.2.0 → kopipasta-0.45.0}/setup.py +8 -5
  16. kopipasta-0.45.0/tests/test_file.py +67 -0
  17. kopipasta-0.45.0/tests/test_patcher.py +246 -0
  18. kopipasta-0.45.0/tests/test_patcher_edge_cases.py +115 -0
  19. kopipasta-0.45.0/tests/test_patcher_regex.py +67 -0
  20. kopipasta-0.45.0/tests/test_patcher_repro.py +100 -0
  21. kopipasta-0.45.0/tests/test_patcher_repro_failures.py +118 -0
  22. kopipasta-0.45.0/tests/test_tree_selector.py +118 -0
  23. kopipasta-0.2.0/PKG-INFO +0 -122
  24. kopipasta-0.2.0/README.md +0 -99
  25. kopipasta-0.2.0/kopipasta/main.py +0 -251
  26. kopipasta-0.2.0/kopipasta.egg-info/PKG-INFO +0 -122
  27. kopipasta-0.2.0/kopipasta.egg-info/SOURCES.txt +0 -13
  28. kopipasta-0.2.0/kopipasta.egg-info/requires.txt +0 -1
  29. kopipasta-0.2.0/requirements.txt +0 -1
  30. {kopipasta-0.2.0 → kopipasta-0.45.0}/LICENSE +0 -0
  31. {kopipasta-0.2.0 → kopipasta-0.45.0}/MANIFEST.in +0 -0
  32. {kopipasta-0.2.0 → kopipasta-0.45.0}/kopipasta/__init__.py +0 -0
  33. {kopipasta-0.2.0 → kopipasta-0.45.0}/kopipasta.egg-info/dependency_links.txt +0 -0
  34. {kopipasta-0.2.0 → kopipasta-0.45.0}/kopipasta.egg-info/entry_points.txt +0 -0
  35. {kopipasta-0.2.0 → kopipasta-0.45.0}/kopipasta.egg-info/top_level.txt +0 -0
  36. {kopipasta-0.2.0 → kopipasta-0.45.0}/setup.cfg +0 -0
@@ -0,0 +1,161 @@
1
+ Metadata-Version: 2.1
2
+ Name: kopipasta
3
+ Version: 0.45.0
4
+ Summary: A CLI tool to generate prompts with project structure and file contents
5
+ Home-page: https://github.com/mkorpela/kopipasta
6
+ Author: Mikko Korpela
7
+ Author-email: mikko.korpela@gmail.com
8
+ License: MIT
9
+ Classifier: Development Status :: 3 - Alpha
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: License :: OSI Approved :: MIT License
12
+ Classifier: Operating System :: OS Independent
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.8
15
+ Classifier: Programming Language :: Python :: 3.9
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Requires-Python: >=3.8
20
+ Description-Content-Type: text/markdown
21
+ License-File: LICENSE
22
+ Requires-Dist: pyperclip==1.9.0
23
+ Requires-Dist: requests==2.32.3
24
+ Requires-Dist: Pygments==2.18.0
25
+ Requires-Dist: rich==13.8.1
26
+ Requires-Dist: click==8.2.1
27
+ Requires-Dist: prompt-toolkit==3.0.52
28
+
29
+ # kopipasta
30
+
31
+ [![Version](https://img.shields.io/pypi/v/kopipasta.svg)](https://pypi.python.org/pypi/kopipasta)
32
+ [![Downloads](http://pepy.tech/badge/kopipasta)](http://pepy.tech/project/kopipasta)
33
+
34
+ **kopipasta bridges the gap between your local file system and LLM context windows.**
35
+
36
+ A CLI tool for taking **full, transparent control** of your prompt. No black boxes.
37
+
38
+ ```text
39
+ ➜ ~ kopipasta
40
+
41
+ 📁 Project Files
42
+ |-- 📂 src/
43
+ | |-- ● 📄 main.py (4.2 KB)
44
+ | |-- ○ 📄 utils.py (1.5 KB)
45
+ |-- ○ 📄 README.md (2.1 KB)
46
+
47
+ Current: src/main.py | Selected: 1 full | ~4,200 chars
48
+ ```
49
+
50
+ ## You Control the Context
51
+
52
+ Many AI coding assistants automatically find what *they think* is relevant context. This is a black box. When the LLM gives a bad answer, you can't debug it because you don't know what context it was actually given.
53
+
54
+ **`kopipasta` is the opposite.** I built it on the principle of **explicit context control**. You are in the driver's seat. You decide *exactly* what files, functions, and snippets go into the prompt.
55
+
56
+ It's a "smart copy" command for your project, not a magic wand.
57
+
58
+ ## How It Works
59
+
60
+ The workflow is a fast, iterative cycle:
61
+
62
+ 1. **Context:** Run `kopipasta` to select files and define your task.
63
+ 2. **Generate:** Paste the prompt into your LLM (ChatGPT, Claude, etc.).
64
+ 3. **Patch:** Press `p` in `kopipasta` and paste the LLM's response to apply changes locally.
65
+ 4. **Iterate:** Review with `git diff`, then repeat for the next step.
66
+
67
+ ## Use Cases
68
+
69
+ * **Targeted Refactoring:** Select just the module you are cleaning up and its immediate dependencies.
70
+ * **Test Generation:** Pipe your implementation file and a similar existing test file to the LLM to generate consistent new tests.
71
+ * **Docs to Code:** Select an API documentation file (or web URL) and your source file to implement a feature against a spec.
72
+ * **Bug Fixing:** Grab the relevant traceback files and the config to diagnose issues without distracting the LLM with the whole repo.
73
+
74
+ ## Installation
75
+
76
+ ```bash
77
+ # Using pipx (recommended for CLI tools)
78
+ pipx install kopipasta
79
+
80
+ # Or using standard pip
81
+ pip install kopipasta
82
+ ```
83
+
84
+ ## Usage
85
+
86
+ `kopipasta` has two main modes: creating prompts and applying patches.
87
+
88
+ ### Creating a Prompt
89
+
90
+ By default `kopipasta` opens tree selector on the current dir.
91
+
92
+ You may also use the command line arguments:
93
+ ```bash
94
+ kopipasta [options] [files_or_directories_or_urls...]
95
+ ```
96
+
97
+ **Arguments:**
98
+
99
+ * `[files_or_directories_or_urls...]`: One or more paths to files, directories, or web URLs to use as the starting point for your context.
100
+
101
+ **Options:**
102
+
103
+ * `-t TASK`, `--task TASK`: Provide the task description directly on the command line, skipping the editor.
104
+
105
+ ### Applying Patches
106
+
107
+ `kopipasta` automatically injects strict instructions into your prompt, teaching the LLM how to format code for this tool.
108
+ `kopipasta` can apply changes suggested by an LLM directly to your codebase, assuming you are in a Git repository.
109
+
110
+ 1. Press `p` in the file selector.
111
+ 2. Paste the **entire** markdown response from your LLM.
112
+ 3. The tool robustly detects code blocks, handles indentation quirks, and applies changes (full files or diffs).
113
+ 4. If a patch fails, the tool provides **diagnostic feedback** telling you exactly why (e.g., missing headers).
114
+ 5. **Always** review changes with `git diff` before committing.
115
+
116
+ **Example of supported LLM output formats:**
117
+
118
+ ```python
119
+ # FILE: src/utils.py
120
+ def new_feature():
121
+ print("kopipasta handles full file creation")
122
+ ```
123
+
124
+ ```diff
125
+ # FILE: src/main.py
126
+ @@ -10,2 +10,3 @@
127
+ def main():
128
+ - pass
129
+ + new_feature()
130
+ ```
131
+
132
+ ## Key Features
133
+
134
+ * **Total Context Control:** Interactively select files, directories, or snippets. You see everything that goes into the prompt.
135
+ * **Smart Dependency Analysis:** Press `d` on a Python or TypeScript/JavaScript file, and `kopipasta` will scan imports to find and add related local files to your context automatically.
136
+ * **Robust Code Patcher:** Applies LLM suggestions directly. Handles indentation, various comment styles (`#`, `//`, `<!--`), and multiple files per block.
137
+ * **Built-in Search:** Press `g` to grep for text patterns inside directories to find relevant files.
138
+ * **Transparent & Explicit:** No hidden RAG. You know exactly what's in the prompt because you built it. This makes debugging LLM failures possible.
139
+ * **Web-Aware:** Pulls in content directly from URLs—perfect for API documentation.
140
+ * **Safety First:**
141
+ * Automatically respects your `.gitignore` rules.
142
+ * Detects if you're about to include secrets from a `.env` file and asks what to do.
143
+ * **Context-Aware:** Keeps a running total of the prompt size (in characters and estimated tokens) so you don't overload the LLM's context window.
144
+ * **Developer-Friendly:**
145
+ * Provides a rich, interactive prompt for writing task descriptions in terminal.
146
+ * Copies the final prompt directly to your clipboard.
147
+ * Provides syntax highlighting during chunk selection.
148
+
149
+ ## Interactive Controls
150
+
151
+ | Key | Action |
152
+ | :--- | :--- |
153
+ | `Space` | Toggle file/directory selection |
154
+ | `s` | Toggle **Snippet Mode** (include only the first 50 lines) |
155
+ | `d` | **Analyze Dependencies** (find and add imported files) |
156
+ | `g` | **Grep** (search text in directory) |
157
+ | `a` | Add all files in directory |
158
+ | `p` | **Apply Patch** (paste LLM response) |
159
+ | `r` | Reuse selection from previous run |
160
+ | `Enter` | Expand/Collapse directory |
161
+ | `q` | Quit and finalize selection |
@@ -0,0 +1,133 @@
1
+ # kopipasta
2
+
3
+ [![Version](https://img.shields.io/pypi/v/kopipasta.svg)](https://pypi.python.org/pypi/kopipasta)
4
+ [![Downloads](http://pepy.tech/badge/kopipasta)](http://pepy.tech/project/kopipasta)
5
+
6
+ **kopipasta bridges the gap between your local file system and LLM context windows.**
7
+
8
+ A CLI tool for taking **full, transparent control** of your prompt. No black boxes.
9
+
10
+ ```text
11
+ ➜ ~ kopipasta
12
+
13
+ 📁 Project Files
14
+ |-- 📂 src/
15
+ | |-- ● 📄 main.py (4.2 KB)
16
+ | |-- ○ 📄 utils.py (1.5 KB)
17
+ |-- ○ 📄 README.md (2.1 KB)
18
+
19
+ Current: src/main.py | Selected: 1 full | ~4,200 chars
20
+ ```
21
+
22
+ ## You Control the Context
23
+
24
+ Many AI coding assistants automatically find what *they think* is relevant context. This is a black box. When the LLM gives a bad answer, you can't debug it because you don't know what context it was actually given.
25
+
26
+ **`kopipasta` is the opposite.** I built it on the principle of **explicit context control**. You are in the driver's seat. You decide *exactly* what files, functions, and snippets go into the prompt.
27
+
28
+ It's a "smart copy" command for your project, not a magic wand.
29
+
30
+ ## How It Works
31
+
32
+ The workflow is a fast, iterative cycle:
33
+
34
+ 1. **Context:** Run `kopipasta` to select files and define your task.
35
+ 2. **Generate:** Paste the prompt into your LLM (ChatGPT, Claude, etc.).
36
+ 3. **Patch:** Press `p` in `kopipasta` and paste the LLM's response to apply changes locally.
37
+ 4. **Iterate:** Review with `git diff`, then repeat for the next step.
38
+
39
+ ## Use Cases
40
+
41
+ * **Targeted Refactoring:** Select just the module you are cleaning up and its immediate dependencies.
42
+ * **Test Generation:** Pipe your implementation file and a similar existing test file to the LLM to generate consistent new tests.
43
+ * **Docs to Code:** Select an API documentation file (or web URL) and your source file to implement a feature against a spec.
44
+ * **Bug Fixing:** Grab the relevant traceback files and the config to diagnose issues without distracting the LLM with the whole repo.
45
+
46
+ ## Installation
47
+
48
+ ```bash
49
+ # Using pipx (recommended for CLI tools)
50
+ pipx install kopipasta
51
+
52
+ # Or using standard pip
53
+ pip install kopipasta
54
+ ```
55
+
56
+ ## Usage
57
+
58
+ `kopipasta` has two main modes: creating prompts and applying patches.
59
+
60
+ ### Creating a Prompt
61
+
62
+ By default `kopipasta` opens tree selector on the current dir.
63
+
64
+ You may also use the command line arguments:
65
+ ```bash
66
+ kopipasta [options] [files_or_directories_or_urls...]
67
+ ```
68
+
69
+ **Arguments:**
70
+
71
+ * `[files_or_directories_or_urls...]`: One or more paths to files, directories, or web URLs to use as the starting point for your context.
72
+
73
+ **Options:**
74
+
75
+ * `-t TASK`, `--task TASK`: Provide the task description directly on the command line, skipping the editor.
76
+
77
+ ### Applying Patches
78
+
79
+ `kopipasta` automatically injects strict instructions into your prompt, teaching the LLM how to format code for this tool.
80
+ `kopipasta` can apply changes suggested by an LLM directly to your codebase, assuming you are in a Git repository.
81
+
82
+ 1. Press `p` in the file selector.
83
+ 2. Paste the **entire** markdown response from your LLM.
84
+ 3. The tool robustly detects code blocks, handles indentation quirks, and applies changes (full files or diffs).
85
+ 4. If a patch fails, the tool provides **diagnostic feedback** telling you exactly why (e.g., missing headers).
86
+ 5. **Always** review changes with `git diff` before committing.
87
+
88
+ **Example of supported LLM output formats:**
89
+
90
+ ```python
91
+ # FILE: src/utils.py
92
+ def new_feature():
93
+ print("kopipasta handles full file creation")
94
+ ```
95
+
96
+ ```diff
97
+ # FILE: src/main.py
98
+ @@ -10,2 +10,3 @@
99
+ def main():
100
+ - pass
101
+ + new_feature()
102
+ ```
103
+
104
+ ## Key Features
105
+
106
+ * **Total Context Control:** Interactively select files, directories, or snippets. You see everything that goes into the prompt.
107
+ * **Smart Dependency Analysis:** Press `d` on a Python or TypeScript/JavaScript file, and `kopipasta` will scan imports to find and add related local files to your context automatically.
108
+ * **Robust Code Patcher:** Applies LLM suggestions directly. Handles indentation, various comment styles (`#`, `//`, `<!--`), and multiple files per block.
109
+ * **Built-in Search:** Press `g` to grep for text patterns inside directories to find relevant files.
110
+ * **Transparent & Explicit:** No hidden RAG. You know exactly what's in the prompt because you built it. This makes debugging LLM failures possible.
111
+ * **Web-Aware:** Pulls in content directly from URLs—perfect for API documentation.
112
+ * **Safety First:**
113
+ * Automatically respects your `.gitignore` rules.
114
+ * Detects if you're about to include secrets from a `.env` file and asks what to do.
115
+ * **Context-Aware:** Keeps a running total of the prompt size (in characters and estimated tokens) so you don't overload the LLM's context window.
116
+ * **Developer-Friendly:**
117
+ * Provides a rich, interactive prompt for writing task descriptions in terminal.
118
+ * Copies the final prompt directly to your clipboard.
119
+ * Provides syntax highlighting during chunk selection.
120
+
121
+ ## Interactive Controls
122
+
123
+ | Key | Action |
124
+ | :--- | :--- |
125
+ | `Space` | Toggle file/directory selection |
126
+ | `s` | Toggle **Snippet Mode** (include only the first 50 lines) |
127
+ | `d` | **Analyze Dependencies** (find and add imported files) |
128
+ | `g` | **Grep** (search text in directory) |
129
+ | `a` | Add all files in directory |
130
+ | `p` | **Apply Patch** (paste LLM response) |
131
+ | `r` | Reuse selection from previous run |
132
+ | `Enter` | Expand/Collapse directory |
133
+ | `q` | Quit and finalize selection |
@@ -0,0 +1,40 @@
1
+ import json
2
+ import os
3
+ from pathlib import Path
4
+ from typing import List, Tuple
5
+
6
+ # Define FileTuple for type hinting
7
+ FileTuple = Tuple[str, bool, List[str] | None, str]
8
+
9
+
10
+ def get_cache_file_path() -> Path:
11
+ """Gets the cross-platform path to the cache file for the last selection."""
12
+ cache_dir = Path.home() / ".cache" / "kopipasta"
13
+ cache_dir.mkdir(parents=True, exist_ok=True)
14
+ return cache_dir / "last_selection.json"
15
+
16
+
17
+ def save_selection_to_cache(files_to_include: List[FileTuple]):
18
+ """Saves the list of selected file relative paths to the cache."""
19
+ cache_file = get_cache_file_path()
20
+ relative_paths = sorted([os.path.relpath(f[0]) for f in files_to_include])
21
+ try:
22
+ with open(cache_file, "w", encoding="utf-8") as f:
23
+ json.dump(relative_paths, f, indent=2)
24
+ except IOError as e:
25
+ print(f"\nWarning: Could not save selection to cache: {e}")
26
+
27
+
28
+ def load_selection_from_cache() -> List[str]:
29
+ """Loads the list of selected files from the cache file."""
30
+ cache_file = get_cache_file_path()
31
+ if not cache_file.exists():
32
+ return []
33
+ try:
34
+ with open(cache_file, "r", encoding="utf-8") as f:
35
+ paths = json.load(f)
36
+ # Filter out paths that no longer exist
37
+ return [p for p in paths if os.path.exists(p)]
38
+ except (IOError, json.JSONDecodeError) as e:
39
+ print(f"\nWarning: Could not load previous selection from cache: {e}")
40
+ return []
@@ -0,0 +1,322 @@
1
+ import fnmatch
2
+ import os
3
+ from typing import List, Optional, Tuple, Set
4
+ from pathlib import Path
5
+
6
+ FileTuple = Tuple[str, bool, Optional[List[str]], str]
7
+
8
+ # --- Caches ---
9
+ _gitignore_cache: dict[str, list[str]] = {}
10
+ _is_ignored_cache: dict[str, bool] = {}
11
+ _is_binary_cache: dict[str, bool] = {}
12
+
13
+ # --- Known File Extensions for is_binary ---
14
+ # Using sets for O(1) average time complexity lookups
15
+ TEXT_EXTENSIONS = {
16
+ # Code
17
+ ".py",
18
+ ".js",
19
+ ".ts",
20
+ ".jsx",
21
+ ".tsx",
22
+ ".java",
23
+ ".c",
24
+ ".cpp",
25
+ ".h",
26
+ ".hpp",
27
+ ".cs",
28
+ ".go",
29
+ ".rs",
30
+ ".sh",
31
+ ".bash",
32
+ ".ps1",
33
+ ".rb",
34
+ ".php",
35
+ ".swift",
36
+ ".kt",
37
+ ".kts",
38
+ ".scala",
39
+ ".pl",
40
+ ".pm",
41
+ ".tcl",
42
+ # Markup & Data
43
+ ".html",
44
+ ".htm",
45
+ ".xml",
46
+ ".css",
47
+ ".scss",
48
+ ".sass",
49
+ ".less",
50
+ ".json",
51
+ ".yaml",
52
+ ".yml",
53
+ ".toml",
54
+ ".ini",
55
+ ".cfg",
56
+ ".conf",
57
+ ".md",
58
+ ".txt",
59
+ ".rtf",
60
+ ".csv",
61
+ ".tsv",
62
+ ".sql",
63
+ ".graphql",
64
+ ".gql",
65
+ # Config & Other
66
+ ".gitignore",
67
+ ".dockerfile",
68
+ "dockerfile",
69
+ ".env",
70
+ ".properties",
71
+ ".mdx",
72
+ }
73
+
74
+ BINARY_EXTENSIONS = {
75
+ # Images
76
+ ".png",
77
+ ".jpg",
78
+ ".jpeg",
79
+ ".gif",
80
+ ".bmp",
81
+ ".tiff",
82
+ ".ico",
83
+ ".webp",
84
+ ".svg",
85
+ # Audio/Video
86
+ ".mp3",
87
+ ".wav",
88
+ ".ogg",
89
+ ".flac",
90
+ ".mp4",
91
+ ".avi",
92
+ ".mov",
93
+ ".wmv",
94
+ ".mkv",
95
+ # Archives
96
+ ".zip",
97
+ ".rar",
98
+ ".7z",
99
+ ".tar",
100
+ ".gz",
101
+ ".bz2",
102
+ ".xz",
103
+ # Documents
104
+ ".pdf",
105
+ ".doc",
106
+ ".docx",
107
+ ".xls",
108
+ ".xlsx",
109
+ ".ppt",
110
+ ".pptx",
111
+ ".odt",
112
+ # Executables & Compiled
113
+ ".exe",
114
+ ".dll",
115
+ ".so",
116
+ ".dylib",
117
+ ".class",
118
+ ".jar",
119
+ ".pyc",
120
+ ".pyd",
121
+ ".whl",
122
+ # Databases & Other
123
+ ".db",
124
+ ".sqlite",
125
+ ".sqlite3",
126
+ ".db-wal",
127
+ ".db-shm",
128
+ ".lock",
129
+ ".bak",
130
+ ".swo",
131
+ ".swp",
132
+ }
133
+
134
+
135
+ def _read_gitignore_patterns(gitignore_path: str) -> list[str]:
136
+ """Reads patterns from a single .gitignore file and caches them."""
137
+ if gitignore_path in _gitignore_cache:
138
+ return _gitignore_cache[gitignore_path]
139
+ if not os.path.isfile(gitignore_path):
140
+ _gitignore_cache[gitignore_path] = []
141
+ return []
142
+ patterns = []
143
+ try:
144
+ with open(gitignore_path, "r", encoding="utf-8") as f:
145
+ for line in f:
146
+ stripped_line = line.strip()
147
+ if stripped_line and not stripped_line.startswith("#"):
148
+ patterns.append(stripped_line)
149
+ except IOError:
150
+ pass
151
+ _gitignore_cache[gitignore_path] = patterns
152
+ return patterns
153
+
154
+
155
+ def is_ignored(
156
+ path: str, default_ignore_patterns: list[str], project_root: Optional[str] = None
157
+ ) -> bool:
158
+ """
159
+ Checks if a path should be ignored by splitting patterns into fast (basename)
160
+ and slow (full path) checks, with heavy caching and optimized inner loops.
161
+ """
162
+ path_abs = os.path.abspath(path)
163
+ if path_abs in _is_ignored_cache:
164
+ return _is_ignored_cache[path_abs]
165
+
166
+ parent_dir = os.path.dirname(path_abs)
167
+ if parent_dir != path_abs and _is_ignored_cache.get(parent_dir, False):
168
+ _is_ignored_cache[path_abs] = True
169
+ return True
170
+
171
+ if project_root is None:
172
+ project_root = os.getcwd()
173
+ project_root_abs = os.path.abspath(project_root)
174
+
175
+ basename_patterns, path_patterns = get_all_patterns(
176
+ default_ignore_patterns, path_abs, project_root_abs
177
+ )
178
+
179
+ # --- Step 1: Fast check for basename patterns ---
180
+ path_basename = os.path.basename(path_abs)
181
+ for pattern in basename_patterns:
182
+ if fnmatch.fnmatch(path_basename, pattern):
183
+ _is_ignored_cache[path_abs] = True
184
+ return True
185
+
186
+ # --- Step 2: Optimized nested check for path patterns ---
187
+ try:
188
+ path_rel_to_root = os.path.relpath(path_abs, project_root_abs)
189
+ except ValueError:
190
+ _is_ignored_cache[path_abs] = False
191
+ return False
192
+
193
+ # Pre-calculate all path prefixes to check, avoiding re-joins in the loop.
194
+ path_parts = Path(path_rel_to_root).parts
195
+ path_prefixes = [
196
+ os.path.join(*path_parts[:i]) for i in range(1, len(path_parts) + 1)
197
+ ]
198
+
199
+ # Pre-process patterns to remove trailing slashes once.
200
+ processed_path_patterns = [p.rstrip("/") for p in path_patterns]
201
+
202
+ for prefix in path_prefixes:
203
+ for pattern in processed_path_patterns:
204
+ if fnmatch.fnmatch(prefix, pattern):
205
+ _is_ignored_cache[path_abs] = True
206
+ return True
207
+
208
+ _is_ignored_cache[path_abs] = False
209
+ return False
210
+
211
+
212
+ def get_all_patterns(
213
+ default_ignore_patterns, path_abs, project_root_abs
214
+ ) -> Tuple[Set[str], Set[str]]:
215
+ """
216
+ Gathers all applicable ignore patterns, splitting them into two sets
217
+ for optimized checking: one for basenames, one for full paths.
218
+ """
219
+ basename_patterns = set()
220
+ path_patterns = set()
221
+
222
+ for p in default_ignore_patterns:
223
+ if "/" in p:
224
+ path_patterns.add(p)
225
+ else:
226
+ basename_patterns.add(p)
227
+
228
+ search_start_dir = (
229
+ path_abs if os.path.isdir(path_abs) else os.path.dirname(path_abs)
230
+ )
231
+
232
+ current_dir = search_start_dir
233
+ while True:
234
+ gitignore_path = os.path.join(current_dir, ".gitignore")
235
+ patterns_from_file = _read_gitignore_patterns(gitignore_path)
236
+
237
+ if patterns_from_file:
238
+ gitignore_dir_rel = os.path.relpath(current_dir, project_root_abs)
239
+ if gitignore_dir_rel == ".":
240
+ gitignore_dir_rel = ""
241
+
242
+ for p in patterns_from_file:
243
+ if "/" in p:
244
+ # Path patterns are relative to the .gitignore file's location
245
+ path_patterns.add(os.path.join(gitignore_dir_rel, p.lstrip("/")))
246
+ else:
247
+ basename_patterns.add(p)
248
+
249
+ if (
250
+ not current_dir.startswith(project_root_abs)
251
+ or current_dir == project_root_abs
252
+ ):
253
+ break
254
+ parent = os.path.dirname(current_dir)
255
+ if parent == current_dir:
256
+ break
257
+ current_dir = parent
258
+ return basename_patterns, path_patterns
259
+
260
+
261
+ def read_file_contents(file_path):
262
+ try:
263
+ with open(file_path, "r", encoding="utf-8") as file:
264
+ return file.read()
265
+ except (IOError, UnicodeDecodeError) as e:
266
+ failure = f"Error reading {file_path}: {e}"
267
+ print(failure)
268
+ return f"<.. {failure} ..>"
269
+
270
+
271
+ def is_binary(file_path: str) -> bool:
272
+ """
273
+ Efficiently checks if a file is binary.
274
+
275
+ The check follows a fast, multi-step process to minimize I/O:
276
+ 1. Checks a memory cache for a previously determined result.
277
+ 2. Checks the file extension against a list of known text file types.
278
+ 3. Checks the file extension against a list of known binary file types.
279
+ 4. As a last resort, reads the first 512 bytes of the file to check for
280
+ a null byte, a common indicator of a binary file.
281
+ """
282
+ # Step 1: Check cache first for fastest response
283
+ if file_path in _is_binary_cache:
284
+ return _is_binary_cache[file_path]
285
+
286
+ # Step 2: Fast check based on known text/binary extensions (no I/O)
287
+ _, extension = os.path.splitext(file_path)
288
+ extension = extension.lower()
289
+
290
+ if extension in TEXT_EXTENSIONS:
291
+ _is_binary_cache[file_path] = False
292
+ return False
293
+ if extension in BINARY_EXTENSIONS:
294
+ _is_binary_cache[file_path] = True
295
+ return True
296
+
297
+ # Step 3: Fallback to content analysis for unknown extensions
298
+ try:
299
+ with open(file_path, "rb") as file:
300
+ # Read a smaller chunk, 512 bytes is usually enough to find a null byte
301
+ chunk = file.read(512)
302
+ if b"\0" in chunk:
303
+ _is_binary_cache[file_path] = True
304
+ return True
305
+ # If no null byte, assume it's a text file
306
+ _is_binary_cache[file_path] = False
307
+ return False
308
+ except IOError:
309
+ # If we can't open it, treat it as binary to be safe
310
+ _is_binary_cache[file_path] = True
311
+ return True
312
+
313
+
314
+ def get_human_readable_size(size):
315
+ for unit in ["B", "KB", "MB", "GB", "TB"]:
316
+ if size < 1024.0:
317
+ return f"{size:.2f} {unit}"
318
+ size /= 1024.0
319
+
320
+
321
+ def is_large_file(file_path, threshold=102400):
322
+ return os.path.getsize(file_path) > threshold