kopipasta 0.2.0__tar.gz → 0.41.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. kopipasta-0.41.0/PKG-INFO +140 -0
  2. kopipasta-0.41.0/README.md +112 -0
  3. kopipasta-0.41.0/kopipasta/cache.py +40 -0
  4. kopipasta-0.41.0/kopipasta/file.py +322 -0
  5. kopipasta-0.41.0/kopipasta/import_parser.py +356 -0
  6. kopipasta-0.41.0/kopipasta/main.py +262 -0
  7. kopipasta-0.41.0/kopipasta/ops.py +476 -0
  8. kopipasta-0.41.0/kopipasta/patcher.py +245 -0
  9. kopipasta-0.41.0/kopipasta/prompt.py +271 -0
  10. kopipasta-0.41.0/kopipasta/tree_selector.py +831 -0
  11. kopipasta-0.41.0/kopipasta.egg-info/PKG-INFO +140 -0
  12. kopipasta-0.41.0/kopipasta.egg-info/SOURCES.txt +25 -0
  13. kopipasta-0.41.0/kopipasta.egg-info/requires.txt +6 -0
  14. kopipasta-0.41.0/requirements.txt +6 -0
  15. {kopipasta-0.2.0 → kopipasta-0.41.0}/setup.py +8 -5
  16. kopipasta-0.41.0/tests/test_file.py +67 -0
  17. kopipasta-0.41.0/tests/test_patcher.py +246 -0
  18. kopipasta-0.41.0/tests/test_patcher_edge_cases.py +111 -0
  19. kopipasta-0.41.0/tests/test_patcher_regex.py +34 -0
  20. kopipasta-0.41.0/tests/test_tree_selector.py +118 -0
  21. kopipasta-0.2.0/PKG-INFO +0 -122
  22. kopipasta-0.2.0/README.md +0 -99
  23. kopipasta-0.2.0/kopipasta/main.py +0 -251
  24. kopipasta-0.2.0/kopipasta.egg-info/PKG-INFO +0 -122
  25. kopipasta-0.2.0/kopipasta.egg-info/SOURCES.txt +0 -13
  26. kopipasta-0.2.0/kopipasta.egg-info/requires.txt +0 -1
  27. kopipasta-0.2.0/requirements.txt +0 -1
  28. {kopipasta-0.2.0 → kopipasta-0.41.0}/LICENSE +0 -0
  29. {kopipasta-0.2.0 → kopipasta-0.41.0}/MANIFEST.in +0 -0
  30. {kopipasta-0.2.0 → kopipasta-0.41.0}/kopipasta/__init__.py +0 -0
  31. {kopipasta-0.2.0 → kopipasta-0.41.0}/kopipasta.egg-info/dependency_links.txt +0 -0
  32. {kopipasta-0.2.0 → kopipasta-0.41.0}/kopipasta.egg-info/entry_points.txt +0 -0
  33. {kopipasta-0.2.0 → kopipasta-0.41.0}/kopipasta.egg-info/top_level.txt +0 -0
  34. {kopipasta-0.2.0 → kopipasta-0.41.0}/setup.cfg +0 -0
@@ -0,0 +1,140 @@
1
+ Metadata-Version: 2.1
2
+ Name: kopipasta
3
+ Version: 0.41.0
4
+ Summary: A CLI tool to generate prompts with project structure and file contents
5
+ Home-page: https://github.com/mkorpela/kopipasta
6
+ Author: Mikko Korpela
7
+ Author-email: mikko.korpela@gmail.com
8
+ License: MIT
9
+ Classifier: Development Status :: 3 - Alpha
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: License :: OSI Approved :: MIT License
12
+ Classifier: Operating System :: OS Independent
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.8
15
+ Classifier: Programming Language :: Python :: 3.9
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Requires-Python: >=3.8
20
+ Description-Content-Type: text/markdown
21
+ License-File: LICENSE
22
+ Requires-Dist: pyperclip==1.9.0
23
+ Requires-Dist: requests==2.32.3
24
+ Requires-Dist: Pygments==2.18.0
25
+ Requires-Dist: rich==13.8.1
26
+ Requires-Dist: click==8.2.1
27
+ Requires-Dist: prompt-toolkit==3.0.52
28
+
29
+ # kopipasta
30
+
31
+ [![Version](https://img.shields.io/pypi/v/kopipasta.svg)](https://pypi.python.org/pypi/kopipasta)
32
+ [![Downloads](http://pepy.tech/badge/kopipasta)](http://pepy.tech/project/kopipasta)
33
+
34
+ **kopipasta bridges the gap between your local file system and LLM context windows.**
35
+
36
+ A CLI tool for taking **full, transparent control** of your prompt. No black boxes.
37
+
38
+ <img src="kopipasta.jpg" alt="kopipasta" width="300">
39
+
40
+ - An LLM told me that "kopi" means Coffee in some languages... and a Diffusion model then made this delicious soup.
41
+
42
+ ## The Philosophy: You Control the Context
43
+
44
+ Many AI coding assistants use Retrieval-Augmented Generation (RAG) to automatically find what *they think* is relevant context. This is a black box. When the LLM gives a bad answer, you can't debug it because you don't know what context it was actually given.
45
+
46
+ **`kopipasta` is the opposite.** I built it for myself on the principle of **explicit context control**. You are in the driver's seat. You decide *exactly* what files, functions, and snippets go into the prompt. This transparency is the key to getting reliable, debuggable results from an LLM.
47
+
48
+ It's a "smart copy" command for your project, not a magic wand.
49
+
50
+ ## How It Works
51
+
52
+ The workflow is dead simple:
53
+
54
+ 1. **Gather:** Run `kopipasta` and point it at the files, directories, and URLs that matter for your task.
55
+ 2. **Select:** The tool interactively helps you choose what to include. For large files, you can send just a snippet or even hand-pick individual functions.
56
+ 3. **Define:** Write your instructions to the LLM in an interactive prompt directly in your terminal.
57
+ 4. **Paste:** The final, comprehensive prompt is now on your clipboard, ready to be pasted into ChatGPT, Gemini, Claude, or your LLM of choice.
58
+ 5. **Apply:** Inside the file selector, press `p`, paste the LLM's markdown response, and the tool will automatically patch your local files.
59
+
60
+ ## Installation
61
+
62
+ ```bash
63
+ # Using pipx (recommended for CLI tools)
64
+ pipx install kopipasta
65
+
66
+ # Or using standard pip
67
+ pip install kopipasta
68
+ ```
69
+
70
+ ## Usage
71
+
72
+ `kopipasta` has two main modes: creating prompts and applying patches.
73
+
74
+ ### Creating a Prompt
75
+
76
+ ```bash
77
+ kopipasta [options] [files_or_directories_or_urls...]
78
+ ```
79
+
80
+ **Arguments:**
81
+
82
+ * `[files_or_directories_or_urls...]`: One or more paths to files, directories, or web URLs to use as the starting point for your context.
83
+
84
+ **Options:**
85
+
86
+ * `-t TASK`, `--task TASK`: Provide the task description directly on the command line, skipping the editor.
87
+
88
+ ### Applying Patches
89
+
90
+ `kopipasta` automatically injects strict instructions into your prompt, teaching the LLM how to format code for this tool.
91
+ `kopipasta` can apply changes suggested by an LLM directly to your codebase, assuming you are in a Git repository.
92
+
93
+ 1. While running `kopipasta` in the interactive file selector, press the `p` key.
94
+ 2. Paste the entire markdown response from your LLM into the terminal prompt and submit.
95
+ 3. The tool will find code blocks with file paths (e.g., `// FILE: src/main.py`) and immediately write those changes to your local files.
96
+ 4. After applying, use standard Git commands like `git diff` to review the changes before staging and committing them.
97
+
98
+ ## Key Features
99
+
100
+ * **Total Context Control:** Interactively select files, directories, or snippets. You see everything that goes into the prompt.
101
+ * **Smart Dependency Analysis:** Press `d` on a Python or TypeScript/JavaScript file, and `kopipasta` will scan imports to find and add related local files to your context automatically.
102
+ * **Interactive Code Patcher:** Press `p` in the file selector to paste and apply LLM-suggested changes directly to your local files. Relies on your version control (like Git) for safety, enabling a fast workflow.
103
+ * **Built-in Search:** Press `g` to grep for text patterns inside directories to find relevant files.
104
+ * **Transparent & Explicit:** No hidden RAG. You know exactly what's in the prompt because you built it. This makes debugging LLM failures possible.
105
+ * **Web-Aware:** Pulls in content directly from URLs—perfect for API documentation.
106
+ * **Safety First:**
107
+ * Automatically respects your `.gitignore` rules.
108
+ * Detects if you're about to include secrets from a `.env` file and asks what to do.
109
+ * **Context-Aware:** Keeps a running total of the prompt size (in characters and estimated tokens) so you don't overload the LLM's context window.
110
+ * **Developer-Friendly:**
111
+ * Provides a rich, interactive prompt for writing task descriptions in terminal.
112
+ * Copies the final prompt directly to your clipboard.
113
+ * Provides syntax highlighting during chunk selection.
114
+
115
+ ## Interactive Controls
116
+
117
+ | Key | Action |
118
+ | :--- | :--- |
119
+ | `Space` | Toggle file/directory selection |
120
+ | `s` | Toggle **Snippet Mode** (include only the first 50 lines) |
121
+ | `d` | **Analyze Dependencies** (find and add imported files) |
122
+ | `g` | **Grep** (search text in directory) |
123
+ | `a` | Add all files in directory |
124
+ | `p` | **Apply Patch** (paste LLM response) |
125
+ | `r` | Reuse selection from previous run |
126
+ | `Enter` | Expand/Collapse directory |
127
+ | `q` | Quit and finalize selection |
128
+
129
+ ## A Real-World Example
130
+
131
+ I had a bug where my `setup.py` didn't include all the dependencies from `requirements.txt`.
132
+
133
+ 1. I ran `kopipasta -t "Update setup.py to read dependencies dynamically from requirements.txt" setup.py requirements.txt`.
134
+ 2. The tool confirmed the inclusion of both files and copied the complete prompt to my clipboard.
135
+ 3. I pasted the prompt into my LLM chat window.
136
+ 4. I copied the LLM's response (which included a modified `setup.py` in a markdown code block).
137
+ 5. Inside `kopipasta`, I pressed `p`, pasted the response, and my local `setup.py` was updated.
138
+ 6. I ran `git diff` to review the changes, then tested and committed.
139
+
140
+ No manual file reading, no clumsy copy-pasting, just a clean, context-rich prompt that I had full control over, and a seamless way to apply the results.
@@ -0,0 +1,112 @@
1
+ # kopipasta
2
+
3
+ [![Version](https://img.shields.io/pypi/v/kopipasta.svg)](https://pypi.python.org/pypi/kopipasta)
4
+ [![Downloads](http://pepy.tech/badge/kopipasta)](http://pepy.tech/project/kopipasta)
5
+
6
+ **kopipasta bridges the gap between your local file system and LLM context windows.**
7
+
8
+ A CLI tool for taking **full, transparent control** of your prompt. No black boxes.
9
+
10
+ <img src="kopipasta.jpg" alt="kopipasta" width="300">
11
+
12
+ - An LLM told me that "kopi" means Coffee in some languages... and a Diffusion model then made this delicious soup.
13
+
14
+ ## The Philosophy: You Control the Context
15
+
16
+ Many AI coding assistants use Retrieval-Augmented Generation (RAG) to automatically find what *they think* is relevant context. This is a black box. When the LLM gives a bad answer, you can't debug it because you don't know what context it was actually given.
17
+
18
+ **`kopipasta` is the opposite.** I built it for myself on the principle of **explicit context control**. You are in the driver's seat. You decide *exactly* what files, functions, and snippets go into the prompt. This transparency is the key to getting reliable, debuggable results from an LLM.
19
+
20
+ It's a "smart copy" command for your project, not a magic wand.
21
+
22
+ ## How It Works
23
+
24
+ The workflow is dead simple:
25
+
26
+ 1. **Gather:** Run `kopipasta` and point it at the files, directories, and URLs that matter for your task.
27
+ 2. **Select:** The tool interactively helps you choose what to include. For large files, you can send just a snippet or even hand-pick individual functions.
28
+ 3. **Define:** Write your instructions to the LLM in an interactive prompt directly in your terminal.
29
+ 4. **Paste:** The final, comprehensive prompt is now on your clipboard, ready to be pasted into ChatGPT, Gemini, Claude, or your LLM of choice.
30
+ 5. **Apply:** Inside the file selector, press `p`, paste the LLM's markdown response, and the tool will automatically patch your local files.
31
+
32
+ ## Installation
33
+
34
+ ```bash
35
+ # Using pipx (recommended for CLI tools)
36
+ pipx install kopipasta
37
+
38
+ # Or using standard pip
39
+ pip install kopipasta
40
+ ```
41
+
42
+ ## Usage
43
+
44
+ `kopipasta` has two main modes: creating prompts and applying patches.
45
+
46
+ ### Creating a Prompt
47
+
48
+ ```bash
49
+ kopipasta [options] [files_or_directories_or_urls...]
50
+ ```
51
+
52
+ **Arguments:**
53
+
54
+ * `[files_or_directories_or_urls...]`: One or more paths to files, directories, or web URLs to use as the starting point for your context.
55
+
56
+ **Options:**
57
+
58
+ * `-t TASK`, `--task TASK`: Provide the task description directly on the command line, skipping the editor.
59
+
60
+ ### Applying Patches
61
+
62
+ `kopipasta` automatically injects strict instructions into your prompt, teaching the LLM how to format code for this tool.
63
+ `kopipasta` can apply changes suggested by an LLM directly to your codebase, assuming you are in a Git repository.
64
+
65
+ 1. While running `kopipasta` in the interactive file selector, press the `p` key.
66
+ 2. Paste the entire markdown response from your LLM into the terminal prompt and submit.
67
+ 3. The tool will find code blocks with file paths (e.g., `// FILE: src/main.py`) and immediately write those changes to your local files.
68
+ 4. After applying, use standard Git commands like `git diff` to review the changes before staging and committing them.
69
+
70
+ ## Key Features
71
+
72
+ * **Total Context Control:** Interactively select files, directories, or snippets. You see everything that goes into the prompt.
73
+ * **Smart Dependency Analysis:** Press `d` on a Python or TypeScript/JavaScript file, and `kopipasta` will scan imports to find and add related local files to your context automatically.
74
+ * **Interactive Code Patcher:** Press `p` in the file selector to paste and apply LLM-suggested changes directly to your local files. Relies on your version control (like Git) for safety, enabling a fast workflow.
75
+ * **Built-in Search:** Press `g` to grep for text patterns inside directories to find relevant files.
76
+ * **Transparent & Explicit:** No hidden RAG. You know exactly what's in the prompt because you built it. This makes debugging LLM failures possible.
77
+ * **Web-Aware:** Pulls in content directly from URLs—perfect for API documentation.
78
+ * **Safety First:**
79
+ * Automatically respects your `.gitignore` rules.
80
+ * Detects if you're about to include secrets from a `.env` file and asks what to do.
81
+ * **Context-Aware:** Keeps a running total of the prompt size (in characters and estimated tokens) so you don't overload the LLM's context window.
82
+ * **Developer-Friendly:**
83
+ * Provides a rich, interactive prompt for writing task descriptions in terminal.
84
+ * Copies the final prompt directly to your clipboard.
85
+ * Provides syntax highlighting during chunk selection.
86
+
87
+ ## Interactive Controls
88
+
89
+ | Key | Action |
90
+ | :--- | :--- |
91
+ | `Space` | Toggle file/directory selection |
92
+ | `s` | Toggle **Snippet Mode** (include only the first 50 lines) |
93
+ | `d` | **Analyze Dependencies** (find and add imported files) |
94
+ | `g` | **Grep** (search text in directory) |
95
+ | `a` | Add all files in directory |
96
+ | `p` | **Apply Patch** (paste LLM response) |
97
+ | `r` | Reuse selection from previous run |
98
+ | `Enter` | Expand/Collapse directory |
99
+ | `q` | Quit and finalize selection |
100
+
101
+ ## A Real-World Example
102
+
103
+ I had a bug where my `setup.py` didn't include all the dependencies from `requirements.txt`.
104
+
105
+ 1. I ran `kopipasta -t "Update setup.py to read dependencies dynamically from requirements.txt" setup.py requirements.txt`.
106
+ 2. The tool confirmed the inclusion of both files and copied the complete prompt to my clipboard.
107
+ 3. I pasted the prompt into my LLM chat window.
108
+ 4. I copied the LLM's response (which included a modified `setup.py` in a markdown code block).
109
+ 5. Inside `kopipasta`, I pressed `p`, pasted the response, and my local `setup.py` was updated.
110
+ 6. I ran `git diff` to review the changes, then tested and committed.
111
+
112
+ No manual file reading, no clumsy copy-pasting, just a clean, context-rich prompt that I had full control over, and a seamless way to apply the results.
@@ -0,0 +1,40 @@
1
+ import json
2
+ import os
3
+ from pathlib import Path
4
+ from typing import List, Tuple
5
+
6
+ # Define FileTuple for type hinting
7
+ FileTuple = Tuple[str, bool, List[str] | None, str]
8
+
9
+
10
+ def get_cache_file_path() -> Path:
11
+ """Gets the cross-platform path to the cache file for the last selection."""
12
+ cache_dir = Path.home() / ".cache" / "kopipasta"
13
+ cache_dir.mkdir(parents=True, exist_ok=True)
14
+ return cache_dir / "last_selection.json"
15
+
16
+
17
+ def save_selection_to_cache(files_to_include: List[FileTuple]):
18
+ """Saves the list of selected file relative paths to the cache."""
19
+ cache_file = get_cache_file_path()
20
+ relative_paths = sorted([os.path.relpath(f[0]) for f in files_to_include])
21
+ try:
22
+ with open(cache_file, "w", encoding="utf-8") as f:
23
+ json.dump(relative_paths, f, indent=2)
24
+ except IOError as e:
25
+ print(f"\nWarning: Could not save selection to cache: {e}")
26
+
27
+
28
+ def load_selection_from_cache() -> List[str]:
29
+ """Loads the list of selected files from the cache file."""
30
+ cache_file = get_cache_file_path()
31
+ if not cache_file.exists():
32
+ return []
33
+ try:
34
+ with open(cache_file, "r", encoding="utf-8") as f:
35
+ paths = json.load(f)
36
+ # Filter out paths that no longer exist
37
+ return [p for p in paths if os.path.exists(p)]
38
+ except (IOError, json.JSONDecodeError) as e:
39
+ print(f"\nWarning: Could not load previous selection from cache: {e}")
40
+ return []
@@ -0,0 +1,322 @@
1
+ import fnmatch
2
+ import os
3
+ from typing import List, Optional, Tuple, Set
4
+ from pathlib import Path
5
+
6
+ FileTuple = Tuple[str, bool, Optional[List[str]], str]
7
+
8
+ # --- Caches ---
9
+ _gitignore_cache: dict[str, list[str]] = {}
10
+ _is_ignored_cache: dict[str, bool] = {}
11
+ _is_binary_cache: dict[str, bool] = {}
12
+
13
+ # --- Known File Extensions for is_binary ---
14
+ # Using sets for O(1) average time complexity lookups
15
+ TEXT_EXTENSIONS = {
16
+ # Code
17
+ ".py",
18
+ ".js",
19
+ ".ts",
20
+ ".jsx",
21
+ ".tsx",
22
+ ".java",
23
+ ".c",
24
+ ".cpp",
25
+ ".h",
26
+ ".hpp",
27
+ ".cs",
28
+ ".go",
29
+ ".rs",
30
+ ".sh",
31
+ ".bash",
32
+ ".ps1",
33
+ ".rb",
34
+ ".php",
35
+ ".swift",
36
+ ".kt",
37
+ ".kts",
38
+ ".scala",
39
+ ".pl",
40
+ ".pm",
41
+ ".tcl",
42
+ # Markup & Data
43
+ ".html",
44
+ ".htm",
45
+ ".xml",
46
+ ".css",
47
+ ".scss",
48
+ ".sass",
49
+ ".less",
50
+ ".json",
51
+ ".yaml",
52
+ ".yml",
53
+ ".toml",
54
+ ".ini",
55
+ ".cfg",
56
+ ".conf",
57
+ ".md",
58
+ ".txt",
59
+ ".rtf",
60
+ ".csv",
61
+ ".tsv",
62
+ ".sql",
63
+ ".graphql",
64
+ ".gql",
65
+ # Config & Other
66
+ ".gitignore",
67
+ ".dockerfile",
68
+ "dockerfile",
69
+ ".env",
70
+ ".properties",
71
+ ".mdx",
72
+ }
73
+
74
+ BINARY_EXTENSIONS = {
75
+ # Images
76
+ ".png",
77
+ ".jpg",
78
+ ".jpeg",
79
+ ".gif",
80
+ ".bmp",
81
+ ".tiff",
82
+ ".ico",
83
+ ".webp",
84
+ ".svg",
85
+ # Audio/Video
86
+ ".mp3",
87
+ ".wav",
88
+ ".ogg",
89
+ ".flac",
90
+ ".mp4",
91
+ ".avi",
92
+ ".mov",
93
+ ".wmv",
94
+ ".mkv",
95
+ # Archives
96
+ ".zip",
97
+ ".rar",
98
+ ".7z",
99
+ ".tar",
100
+ ".gz",
101
+ ".bz2",
102
+ ".xz",
103
+ # Documents
104
+ ".pdf",
105
+ ".doc",
106
+ ".docx",
107
+ ".xls",
108
+ ".xlsx",
109
+ ".ppt",
110
+ ".pptx",
111
+ ".odt",
112
+ # Executables & Compiled
113
+ ".exe",
114
+ ".dll",
115
+ ".so",
116
+ ".dylib",
117
+ ".class",
118
+ ".jar",
119
+ ".pyc",
120
+ ".pyd",
121
+ ".whl",
122
+ # Databases & Other
123
+ ".db",
124
+ ".sqlite",
125
+ ".sqlite3",
126
+ ".db-wal",
127
+ ".db-shm",
128
+ ".lock",
129
+ ".bak",
130
+ ".swo",
131
+ ".swp",
132
+ }
133
+
134
+
135
+ def _read_gitignore_patterns(gitignore_path: str) -> list[str]:
136
+ """Reads patterns from a single .gitignore file and caches them."""
137
+ if gitignore_path in _gitignore_cache:
138
+ return _gitignore_cache[gitignore_path]
139
+ if not os.path.isfile(gitignore_path):
140
+ _gitignore_cache[gitignore_path] = []
141
+ return []
142
+ patterns = []
143
+ try:
144
+ with open(gitignore_path, "r", encoding="utf-8") as f:
145
+ for line in f:
146
+ stripped_line = line.strip()
147
+ if stripped_line and not stripped_line.startswith("#"):
148
+ patterns.append(stripped_line)
149
+ except IOError:
150
+ pass
151
+ _gitignore_cache[gitignore_path] = patterns
152
+ return patterns
153
+
154
+
155
+ def is_ignored(
156
+ path: str, default_ignore_patterns: list[str], project_root: Optional[str] = None
157
+ ) -> bool:
158
+ """
159
+ Checks if a path should be ignored by splitting patterns into fast (basename)
160
+ and slow (full path) checks, with heavy caching and optimized inner loops.
161
+ """
162
+ path_abs = os.path.abspath(path)
163
+ if path_abs in _is_ignored_cache:
164
+ return _is_ignored_cache[path_abs]
165
+
166
+ parent_dir = os.path.dirname(path_abs)
167
+ if parent_dir != path_abs and _is_ignored_cache.get(parent_dir, False):
168
+ _is_ignored_cache[path_abs] = True
169
+ return True
170
+
171
+ if project_root is None:
172
+ project_root = os.getcwd()
173
+ project_root_abs = os.path.abspath(project_root)
174
+
175
+ basename_patterns, path_patterns = get_all_patterns(
176
+ default_ignore_patterns, path_abs, project_root_abs
177
+ )
178
+
179
+ # --- Step 1: Fast check for basename patterns ---
180
+ path_basename = os.path.basename(path_abs)
181
+ for pattern in basename_patterns:
182
+ if fnmatch.fnmatch(path_basename, pattern):
183
+ _is_ignored_cache[path_abs] = True
184
+ return True
185
+
186
+ # --- Step 2: Optimized nested check for path patterns ---
187
+ try:
188
+ path_rel_to_root = os.path.relpath(path_abs, project_root_abs)
189
+ except ValueError:
190
+ _is_ignored_cache[path_abs] = False
191
+ return False
192
+
193
+ # Pre-calculate all path prefixes to check, avoiding re-joins in the loop.
194
+ path_parts = Path(path_rel_to_root).parts
195
+ path_prefixes = [
196
+ os.path.join(*path_parts[:i]) for i in range(1, len(path_parts) + 1)
197
+ ]
198
+
199
+ # Pre-process patterns to remove trailing slashes once.
200
+ processed_path_patterns = [p.rstrip("/") for p in path_patterns]
201
+
202
+ for prefix in path_prefixes:
203
+ for pattern in processed_path_patterns:
204
+ if fnmatch.fnmatch(prefix, pattern):
205
+ _is_ignored_cache[path_abs] = True
206
+ return True
207
+
208
+ _is_ignored_cache[path_abs] = False
209
+ return False
210
+
211
+
212
+ def get_all_patterns(
213
+ default_ignore_patterns, path_abs, project_root_abs
214
+ ) -> Tuple[Set[str], Set[str]]:
215
+ """
216
+ Gathers all applicable ignore patterns, splitting them into two sets
217
+ for optimized checking: one for basenames, one for full paths.
218
+ """
219
+ basename_patterns = set()
220
+ path_patterns = set()
221
+
222
+ for p in default_ignore_patterns:
223
+ if "/" in p:
224
+ path_patterns.add(p)
225
+ else:
226
+ basename_patterns.add(p)
227
+
228
+ search_start_dir = (
229
+ path_abs if os.path.isdir(path_abs) else os.path.dirname(path_abs)
230
+ )
231
+
232
+ current_dir = search_start_dir
233
+ while True:
234
+ gitignore_path = os.path.join(current_dir, ".gitignore")
235
+ patterns_from_file = _read_gitignore_patterns(gitignore_path)
236
+
237
+ if patterns_from_file:
238
+ gitignore_dir_rel = os.path.relpath(current_dir, project_root_abs)
239
+ if gitignore_dir_rel == ".":
240
+ gitignore_dir_rel = ""
241
+
242
+ for p in patterns_from_file:
243
+ if "/" in p:
244
+ # Path patterns are relative to the .gitignore file's location
245
+ path_patterns.add(os.path.join(gitignore_dir_rel, p.lstrip("/")))
246
+ else:
247
+ basename_patterns.add(p)
248
+
249
+ if (
250
+ not current_dir.startswith(project_root_abs)
251
+ or current_dir == project_root_abs
252
+ ):
253
+ break
254
+ parent = os.path.dirname(current_dir)
255
+ if parent == current_dir:
256
+ break
257
+ current_dir = parent
258
+ return basename_patterns, path_patterns
259
+
260
+
261
+ def read_file_contents(file_path):
262
+ try:
263
+ with open(file_path, "r", encoding="utf-8") as file:
264
+ return file.read()
265
+ except (IOError, UnicodeDecodeError) as e:
266
+ failure = f"Error reading {file_path}: {e}"
267
+ print(failure)
268
+ return f"<.. {failure} ..>"
269
+
270
+
271
+ def is_binary(file_path: str) -> bool:
272
+ """
273
+ Efficiently checks if a file is binary.
274
+
275
+ The check follows a fast, multi-step process to minimize I/O:
276
+ 1. Checks a memory cache for a previously determined result.
277
+ 2. Checks the file extension against a list of known text file types.
278
+ 3. Checks the file extension against a list of known binary file types.
279
+ 4. As a last resort, reads the first 512 bytes of the file to check for
280
+ a null byte, a common indicator of a binary file.
281
+ """
282
+ # Step 1: Check cache first for fastest response
283
+ if file_path in _is_binary_cache:
284
+ return _is_binary_cache[file_path]
285
+
286
+ # Step 2: Fast check based on known text/binary extensions (no I/O)
287
+ _, extension = os.path.splitext(file_path)
288
+ extension = extension.lower()
289
+
290
+ if extension in TEXT_EXTENSIONS:
291
+ _is_binary_cache[file_path] = False
292
+ return False
293
+ if extension in BINARY_EXTENSIONS:
294
+ _is_binary_cache[file_path] = True
295
+ return True
296
+
297
+ # Step 3: Fallback to content analysis for unknown extensions
298
+ try:
299
+ with open(file_path, "rb") as file:
300
+ # Read a smaller chunk, 512 bytes is usually enough to find a null byte
301
+ chunk = file.read(512)
302
+ if b"\0" in chunk:
303
+ _is_binary_cache[file_path] = True
304
+ return True
305
+ # If no null byte, assume it's a text file
306
+ _is_binary_cache[file_path] = False
307
+ return False
308
+ except IOError:
309
+ # If we can't open it, treat it as binary to be safe
310
+ _is_binary_cache[file_path] = True
311
+ return True
312
+
313
+
314
+ def get_human_readable_size(size):
315
+ for unit in ["B", "KB", "MB", "GB", "TB"]:
316
+ if size < 1024.0:
317
+ return f"{size:.2f} {unit}"
318
+ size /= 1024.0
319
+
320
+
321
+ def is_large_file(file_path, threshold=102400):
322
+ return os.path.getsize(file_path) > threshold