kopipasta 0.35.0__py3-none-any.whl → 0.37.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kopipasta might be problematic. Click here for more details.
- kopipasta/cache.py +6 -3
- kopipasta/file.py +156 -60
- kopipasta/import_parser.py +141 -69
- kopipasta/main.py +611 -302
- kopipasta/prompt.py +50 -39
- kopipasta/tree_selector.py +243 -149
- {kopipasta-0.35.0.dist-info → kopipasta-0.37.0.dist-info}/METADATA +1 -1
- kopipasta-0.37.0.dist-info/RECORD +13 -0
- kopipasta-0.35.0.dist-info/RECORD +0 -13
- {kopipasta-0.35.0.dist-info → kopipasta-0.37.0.dist-info}/LICENSE +0 -0
- {kopipasta-0.35.0.dist-info → kopipasta-0.37.0.dist-info}/WHEEL +0 -0
- {kopipasta-0.35.0.dist-info → kopipasta-0.37.0.dist-info}/entry_points.txt +0 -0
- {kopipasta-0.35.0.dist-info → kopipasta-0.37.0.dist-info}/top_level.txt +0 -0
kopipasta/cache.py
CHANGED
|
@@ -6,32 +6,35 @@ from typing import List, Tuple
|
|
|
6
6
|
# Define FileTuple for type hinting
|
|
7
7
|
FileTuple = Tuple[str, bool, List[str] | None, str]
|
|
8
8
|
|
|
9
|
+
|
|
9
10
|
def get_cache_file_path() -> Path:
|
|
10
11
|
"""Gets the cross-platform path to the cache file for the last selection."""
|
|
11
12
|
cache_dir = Path.home() / ".cache" / "kopipasta"
|
|
12
13
|
cache_dir.mkdir(parents=True, exist_ok=True)
|
|
13
14
|
return cache_dir / "last_selection.json"
|
|
14
15
|
|
|
16
|
+
|
|
15
17
|
def save_selection_to_cache(files_to_include: List[FileTuple]):
|
|
16
18
|
"""Saves the list of selected file relative paths to the cache."""
|
|
17
19
|
cache_file = get_cache_file_path()
|
|
18
20
|
relative_paths = sorted([os.path.relpath(f[0]) for f in files_to_include])
|
|
19
21
|
try:
|
|
20
|
-
with open(cache_file,
|
|
22
|
+
with open(cache_file, "w", encoding="utf-8") as f:
|
|
21
23
|
json.dump(relative_paths, f, indent=2)
|
|
22
24
|
except IOError as e:
|
|
23
25
|
print(f"\nWarning: Could not save selection to cache: {e}")
|
|
24
26
|
|
|
27
|
+
|
|
25
28
|
def load_selection_from_cache() -> List[str]:
|
|
26
29
|
"""Loads the list of selected files from the cache file."""
|
|
27
30
|
cache_file = get_cache_file_path()
|
|
28
31
|
if not cache_file.exists():
|
|
29
32
|
return []
|
|
30
33
|
try:
|
|
31
|
-
with open(cache_file,
|
|
34
|
+
with open(cache_file, "r", encoding="utf-8") as f:
|
|
32
35
|
paths = json.load(f)
|
|
33
36
|
# Filter out paths that no longer exist
|
|
34
37
|
return [p for p in paths if os.path.exists(p)]
|
|
35
38
|
except (IOError, json.JSONDecodeError) as e:
|
|
36
39
|
print(f"\nWarning: Could not load previous selection from cache: {e}")
|
|
37
|
-
return []
|
|
40
|
+
return []
|
kopipasta/file.py
CHANGED
|
@@ -1,14 +1,45 @@
|
|
|
1
1
|
import fnmatch
|
|
2
2
|
import os
|
|
3
|
-
from typing import List, Optional, Tuple
|
|
3
|
+
from typing import List, Optional, Tuple, Set
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
|
|
6
6
|
FileTuple = Tuple[str, bool, Optional[List[str]], str]
|
|
7
7
|
|
|
8
|
-
# ---
|
|
9
|
-
# Key: Directory path
|
|
10
|
-
# Value: List of patterns
|
|
8
|
+
# --- Caches ---
|
|
11
9
|
_gitignore_cache: dict[str, list[str]] = {}
|
|
10
|
+
_is_ignored_cache: dict[str, bool] = {}
|
|
11
|
+
_is_binary_cache: dict[str, bool] = {}
|
|
12
|
+
|
|
13
|
+
# --- Known File Extensions for is_binary ---
|
|
14
|
+
# Using sets for O(1) average time complexity lookups
|
|
15
|
+
TEXT_EXTENSIONS = {
|
|
16
|
+
# Code
|
|
17
|
+
".py", ".js", ".ts", ".jsx", ".tsx", ".java", ".c", ".cpp", ".h", ".hpp",
|
|
18
|
+
".cs", ".go", ".rs", ".sh", ".bash", ".ps1", ".rb", ".php", ".swift",
|
|
19
|
+
".kt", ".kts", ".scala", ".pl", ".pm", ".tcl",
|
|
20
|
+
# Markup & Data
|
|
21
|
+
".html", ".htm", ".xml", ".css", ".scss", ".sass", ".less", ".json",
|
|
22
|
+
".yaml", ".yml", ".toml", ".ini", ".cfg", ".conf", ".md", ".txt", ".rtf",
|
|
23
|
+
".csv", ".tsv", ".sql", ".graphql", ".gql",
|
|
24
|
+
# Config & Other
|
|
25
|
+
".gitignore", ".dockerfile", "dockerfile", ".env", ".properties", ".mdx",
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
BINARY_EXTENSIONS = {
|
|
29
|
+
# Images
|
|
30
|
+
".png", ".jpg", ".jpeg", ".gif", ".bmp", ".tiff", ".ico", ".webp", ".svg",
|
|
31
|
+
# Audio/Video
|
|
32
|
+
".mp3", ".wav", ".ogg", ".flac", ".mp4", ".avi", ".mov", ".wmv", ".mkv",
|
|
33
|
+
# Archives
|
|
34
|
+
".zip", ".rar", ".7z", ".tar", ".gz", ".bz2", ".xz",
|
|
35
|
+
# Documents
|
|
36
|
+
".pdf", ".doc", ".docx", ".xls", ".xlsx", ".ppt", ".pptx", ".odt",
|
|
37
|
+
# Executables & Compiled
|
|
38
|
+
".exe", ".dll", ".so", ".dylib", ".class", ".jar", ".pyc", ".pyd", ".whl",
|
|
39
|
+
# Databases & Other
|
|
40
|
+
".db", ".sqlite", ".sqlite3", ".db-wal", ".db-shm", ".lock",
|
|
41
|
+
".bak", ".swo", ".swp",
|
|
42
|
+
}
|
|
12
43
|
|
|
13
44
|
def _read_gitignore_patterns(gitignore_path: str) -> list[str]:
|
|
14
45
|
"""Reads patterns from a single .gitignore file and caches them."""
|
|
@@ -19,31 +50,88 @@ def _read_gitignore_patterns(gitignore_path: str) -> list[str]:
|
|
|
19
50
|
return []
|
|
20
51
|
patterns = []
|
|
21
52
|
try:
|
|
22
|
-
with open(gitignore_path,
|
|
53
|
+
with open(gitignore_path, "r", encoding="utf-8") as f:
|
|
23
54
|
for line in f:
|
|
24
55
|
stripped_line = line.strip()
|
|
25
|
-
if stripped_line and not stripped_line.startswith(
|
|
56
|
+
if stripped_line and not stripped_line.startswith("#"):
|
|
26
57
|
patterns.append(stripped_line)
|
|
27
58
|
except IOError:
|
|
28
59
|
pass
|
|
29
60
|
_gitignore_cache[gitignore_path] = patterns
|
|
30
61
|
return patterns
|
|
31
62
|
|
|
32
|
-
|
|
63
|
+
|
|
64
|
+
def is_ignored(
|
|
65
|
+
path: str, default_ignore_patterns: list[str], project_root: Optional[str] = None
|
|
66
|
+
) -> bool:
|
|
33
67
|
"""
|
|
34
|
-
Checks if a path should be ignored
|
|
35
|
-
|
|
68
|
+
Checks if a path should be ignored by splitting patterns into fast (basename)
|
|
69
|
+
and slow (full path) checks, with heavy caching and optimized inner loops.
|
|
36
70
|
"""
|
|
37
71
|
path_abs = os.path.abspath(path)
|
|
72
|
+
if path_abs in _is_ignored_cache:
|
|
73
|
+
return _is_ignored_cache[path_abs]
|
|
74
|
+
|
|
75
|
+
parent_dir = os.path.dirname(path_abs)
|
|
76
|
+
if parent_dir != path_abs and _is_ignored_cache.get(parent_dir, False):
|
|
77
|
+
_is_ignored_cache[path_abs] = True
|
|
78
|
+
return True
|
|
79
|
+
|
|
38
80
|
if project_root is None:
|
|
39
81
|
project_root = os.getcwd()
|
|
40
82
|
project_root_abs = os.path.abspath(project_root)
|
|
41
83
|
|
|
42
|
-
|
|
43
|
-
|
|
84
|
+
basename_patterns, path_patterns = get_all_patterns(
|
|
85
|
+
default_ignore_patterns, path_abs, project_root_abs
|
|
86
|
+
)
|
|
44
87
|
|
|
45
|
-
#
|
|
46
|
-
|
|
88
|
+
# --- Step 1: Fast check for basename patterns ---
|
|
89
|
+
path_basename = os.path.basename(path_abs)
|
|
90
|
+
for pattern in basename_patterns:
|
|
91
|
+
if fnmatch.fnmatch(path_basename, pattern):
|
|
92
|
+
_is_ignored_cache[path_abs] = True
|
|
93
|
+
return True
|
|
94
|
+
|
|
95
|
+
# --- Step 2: Optimized nested check for path patterns ---
|
|
96
|
+
try:
|
|
97
|
+
path_rel_to_root = os.path.relpath(path_abs, project_root_abs)
|
|
98
|
+
except ValueError:
|
|
99
|
+
_is_ignored_cache[path_abs] = False
|
|
100
|
+
return False
|
|
101
|
+
|
|
102
|
+
# Pre-calculate all path prefixes to check, avoiding re-joins in the loop.
|
|
103
|
+
path_parts = Path(path_rel_to_root).parts
|
|
104
|
+
path_prefixes = [os.path.join(*path_parts[:i]) for i in range(1, len(path_parts) + 1)]
|
|
105
|
+
|
|
106
|
+
# Pre-process patterns to remove trailing slashes once.
|
|
107
|
+
processed_path_patterns = [p.rstrip("/") for p in path_patterns]
|
|
108
|
+
|
|
109
|
+
for prefix in path_prefixes:
|
|
110
|
+
for pattern in processed_path_patterns:
|
|
111
|
+
if fnmatch.fnmatch(prefix, pattern):
|
|
112
|
+
_is_ignored_cache[path_abs] = True
|
|
113
|
+
return True
|
|
114
|
+
|
|
115
|
+
_is_ignored_cache[path_abs] = False
|
|
116
|
+
return False
|
|
117
|
+
|
|
118
|
+
def get_all_patterns(default_ignore_patterns, path_abs, project_root_abs) -> Tuple[Set[str], Set[str]]:
|
|
119
|
+
"""
|
|
120
|
+
Gathers all applicable ignore patterns, splitting them into two sets
|
|
121
|
+
for optimized checking: one for basenames, one for full paths.
|
|
122
|
+
"""
|
|
123
|
+
basename_patterns = set()
|
|
124
|
+
path_patterns = set()
|
|
125
|
+
|
|
126
|
+
for p in default_ignore_patterns:
|
|
127
|
+
if "/" in p:
|
|
128
|
+
path_patterns.add(p)
|
|
129
|
+
else:
|
|
130
|
+
basename_patterns.add(p)
|
|
131
|
+
|
|
132
|
+
search_start_dir = (
|
|
133
|
+
path_abs if os.path.isdir(path_abs) else os.path.dirname(path_abs)
|
|
134
|
+
)
|
|
47
135
|
|
|
48
136
|
current_dir = search_start_dir
|
|
49
137
|
while True:
|
|
@@ -52,78 +140,86 @@ def is_ignored(path: str, default_ignore_patterns: list[str], project_root: Opti
|
|
|
52
140
|
|
|
53
141
|
if patterns_from_file:
|
|
54
142
|
gitignore_dir_rel = os.path.relpath(current_dir, project_root_abs)
|
|
55
|
-
if gitignore_dir_rel ==
|
|
143
|
+
if gitignore_dir_rel == ".":
|
|
144
|
+
gitignore_dir_rel = ""
|
|
56
145
|
|
|
57
146
|
for p in patterns_from_file:
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
all_patterns.add(os.path.join(gitignore_dir_rel, p.lstrip('/')))
|
|
147
|
+
if "/" in p:
|
|
148
|
+
# Path patterns are relative to the .gitignore file's location
|
|
149
|
+
path_patterns.add(os.path.join(gitignore_dir_rel, p.lstrip("/")))
|
|
62
150
|
else:
|
|
63
|
-
|
|
64
|
-
all_patterns.add(p)
|
|
151
|
+
basename_patterns.add(p)
|
|
65
152
|
|
|
66
|
-
if
|
|
153
|
+
if (
|
|
154
|
+
not current_dir.startswith(project_root_abs)
|
|
155
|
+
or current_dir == project_root_abs
|
|
156
|
+
):
|
|
67
157
|
break
|
|
68
158
|
parent = os.path.dirname(current_dir)
|
|
69
|
-
if parent == current_dir:
|
|
159
|
+
if parent == current_dir:
|
|
160
|
+
break
|
|
70
161
|
current_dir = parent
|
|
162
|
+
return basename_patterns, path_patterns
|
|
71
163
|
|
|
72
|
-
# --- Step 2: Check the path and its parents against the patterns ---
|
|
73
|
-
try:
|
|
74
|
-
path_rel_to_root = os.path.relpath(path_abs, project_root_abs)
|
|
75
|
-
except ValueError:
|
|
76
|
-
return False # Path is outside the project root
|
|
77
|
-
|
|
78
|
-
path_parts = Path(path_rel_to_root).parts
|
|
79
|
-
|
|
80
|
-
for pattern in all_patterns:
|
|
81
|
-
# Check against basename for simple wildcards (e.g., `*.log`, `__pycache__`)
|
|
82
|
-
# This is a primary matching mechanism.
|
|
83
|
-
if fnmatch.fnmatch(os.path.basename(path_abs), pattern):
|
|
84
|
-
return True
|
|
85
|
-
|
|
86
|
-
# Check the full path and its parent directories against the pattern.
|
|
87
|
-
# This handles directory ignores (`node_modules/`) and specific path ignores (`src/*.tmp`).
|
|
88
|
-
for i in range(len(path_parts)):
|
|
89
|
-
current_check_path = os.path.join(*path_parts[:i+1])
|
|
90
|
-
|
|
91
|
-
# Handle directory patterns like `node_modules/`
|
|
92
|
-
if pattern.endswith('/'):
|
|
93
|
-
if fnmatch.fnmatch(current_check_path, pattern.rstrip('/')):
|
|
94
|
-
return True
|
|
95
|
-
# Handle full path patterns
|
|
96
|
-
else:
|
|
97
|
-
if fnmatch.fnmatch(current_check_path, pattern):
|
|
98
|
-
return True
|
|
99
|
-
|
|
100
|
-
return False
|
|
101
164
|
|
|
102
165
|
def read_file_contents(file_path):
|
|
103
166
|
try:
|
|
104
|
-
with open(file_path,
|
|
167
|
+
with open(file_path, "r") as file:
|
|
105
168
|
return file.read()
|
|
106
169
|
except Exception as e:
|
|
107
170
|
print(f"Error reading {file_path}: {e}")
|
|
108
171
|
return ""
|
|
109
172
|
|
|
110
|
-
|
|
173
|
+
|
|
174
|
+
def is_binary(file_path: str) -> bool:
|
|
175
|
+
"""
|
|
176
|
+
Efficiently checks if a file is binary.
|
|
177
|
+
|
|
178
|
+
The check follows a fast, multi-step process to minimize I/O:
|
|
179
|
+
1. Checks a memory cache for a previously determined result.
|
|
180
|
+
2. Checks the file extension against a list of known text file types.
|
|
181
|
+
3. Checks the file extension against a list of known binary file types.
|
|
182
|
+
4. As a last resort, reads the first 512 bytes of the file to check for
|
|
183
|
+
a null byte, a common indicator of a binary file.
|
|
184
|
+
"""
|
|
185
|
+
# Step 1: Check cache first for fastest response
|
|
186
|
+
if file_path in _is_binary_cache:
|
|
187
|
+
return _is_binary_cache[file_path]
|
|
188
|
+
|
|
189
|
+
# Step 2: Fast check based on known text/binary extensions (no I/O)
|
|
190
|
+
_, extension = os.path.splitext(file_path)
|
|
191
|
+
extension = extension.lower()
|
|
192
|
+
|
|
193
|
+
if extension in TEXT_EXTENSIONS:
|
|
194
|
+
_is_binary_cache[file_path] = False
|
|
195
|
+
return False
|
|
196
|
+
if extension in BINARY_EXTENSIONS:
|
|
197
|
+
_is_binary_cache[file_path] = True
|
|
198
|
+
return True
|
|
199
|
+
|
|
200
|
+
# Step 3: Fallback to content analysis for unknown extensions
|
|
111
201
|
try:
|
|
112
|
-
with open(file_path,
|
|
113
|
-
chunk
|
|
114
|
-
|
|
202
|
+
with open(file_path, "rb") as file:
|
|
203
|
+
# Read a smaller chunk, 512 bytes is usually enough to find a null byte
|
|
204
|
+
chunk = file.read(512)
|
|
205
|
+
if b"\0" in chunk:
|
|
206
|
+
_is_binary_cache[file_path] = True
|
|
115
207
|
return True
|
|
116
|
-
|
|
117
|
-
|
|
208
|
+
# If no null byte, assume it's a text file
|
|
209
|
+
_is_binary_cache[file_path] = False
|
|
118
210
|
return False
|
|
119
211
|
except IOError:
|
|
120
|
-
|
|
212
|
+
# If we can't open it, treat it as binary to be safe
|
|
213
|
+
_is_binary_cache[file_path] = True
|
|
214
|
+
return True
|
|
215
|
+
|
|
121
216
|
|
|
122
217
|
def get_human_readable_size(size):
|
|
123
|
-
for unit in [
|
|
218
|
+
for unit in ["B", "KB", "MB", "GB", "TB"]:
|
|
124
219
|
if size < 1024.0:
|
|
125
220
|
return f"{size:.2f} {unit}"
|
|
126
221
|
size /= 1024.0
|
|
127
222
|
|
|
223
|
+
|
|
128
224
|
def is_large_file(file_path, threshold=102400):
|
|
129
225
|
return os.path.getsize(file_path) > threshold
|