kopipasta 0.36.0__tar.gz → 0.37.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kopipasta might be problematic. Click here for more details.
- {kopipasta-0.36.0/kopipasta.egg-info → kopipasta-0.37.0}/PKG-INFO +1 -1
- {kopipasta-0.36.0 → kopipasta-0.37.0}/kopipasta/file.py +68 -7
- {kopipasta-0.36.0 → kopipasta-0.37.0/kopipasta.egg-info}/PKG-INFO +1 -1
- {kopipasta-0.36.0 → kopipasta-0.37.0}/setup.py +1 -1
- {kopipasta-0.36.0 → kopipasta-0.37.0}/LICENSE +0 -0
- {kopipasta-0.36.0 → kopipasta-0.37.0}/MANIFEST.in +0 -0
- {kopipasta-0.36.0 → kopipasta-0.37.0}/README.md +0 -0
- {kopipasta-0.36.0 → kopipasta-0.37.0}/kopipasta/__init__.py +0 -0
- {kopipasta-0.36.0 → kopipasta-0.37.0}/kopipasta/cache.py +0 -0
- {kopipasta-0.36.0 → kopipasta-0.37.0}/kopipasta/import_parser.py +0 -0
- {kopipasta-0.36.0 → kopipasta-0.37.0}/kopipasta/main.py +0 -0
- {kopipasta-0.36.0 → kopipasta-0.37.0}/kopipasta/prompt.py +0 -0
- {kopipasta-0.36.0 → kopipasta-0.37.0}/kopipasta/tree_selector.py +0 -0
- {kopipasta-0.36.0 → kopipasta-0.37.0}/kopipasta.egg-info/SOURCES.txt +0 -0
- {kopipasta-0.36.0 → kopipasta-0.37.0}/kopipasta.egg-info/dependency_links.txt +0 -0
- {kopipasta-0.36.0 → kopipasta-0.37.0}/kopipasta.egg-info/entry_points.txt +0 -0
- {kopipasta-0.36.0 → kopipasta-0.37.0}/kopipasta.egg-info/requires.txt +0 -0
- {kopipasta-0.36.0 → kopipasta-0.37.0}/kopipasta.egg-info/top_level.txt +0 -0
- {kopipasta-0.36.0 → kopipasta-0.37.0}/requirements.txt +0 -0
- {kopipasta-0.36.0 → kopipasta-0.37.0}/setup.cfg +0 -0
- {kopipasta-0.36.0 → kopipasta-0.37.0}/tests/test_file.py +0 -0
- {kopipasta-0.36.0 → kopipasta-0.37.0}/tests/test_tree_selector.py +0 -0
|
@@ -8,7 +8,38 @@ FileTuple = Tuple[str, bool, Optional[List[str]], str]
|
|
|
8
8
|
# --- Caches ---
|
|
9
9
|
_gitignore_cache: dict[str, list[str]] = {}
|
|
10
10
|
_is_ignored_cache: dict[str, bool] = {}
|
|
11
|
-
|
|
11
|
+
_is_binary_cache: dict[str, bool] = {}
|
|
12
|
+
|
|
13
|
+
# --- Known File Extensions for is_binary ---
|
|
14
|
+
# Using sets for O(1) average time complexity lookups
|
|
15
|
+
TEXT_EXTENSIONS = {
|
|
16
|
+
# Code
|
|
17
|
+
".py", ".js", ".ts", ".jsx", ".tsx", ".java", ".c", ".cpp", ".h", ".hpp",
|
|
18
|
+
".cs", ".go", ".rs", ".sh", ".bash", ".ps1", ".rb", ".php", ".swift",
|
|
19
|
+
".kt", ".kts", ".scala", ".pl", ".pm", ".tcl",
|
|
20
|
+
# Markup & Data
|
|
21
|
+
".html", ".htm", ".xml", ".css", ".scss", ".sass", ".less", ".json",
|
|
22
|
+
".yaml", ".yml", ".toml", ".ini", ".cfg", ".conf", ".md", ".txt", ".rtf",
|
|
23
|
+
".csv", ".tsv", ".sql", ".graphql", ".gql",
|
|
24
|
+
# Config & Other
|
|
25
|
+
".gitignore", ".dockerfile", "dockerfile", ".env", ".properties", ".mdx",
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
BINARY_EXTENSIONS = {
|
|
29
|
+
# Images
|
|
30
|
+
".png", ".jpg", ".jpeg", ".gif", ".bmp", ".tiff", ".ico", ".webp", ".svg",
|
|
31
|
+
# Audio/Video
|
|
32
|
+
".mp3", ".wav", ".ogg", ".flac", ".mp4", ".avi", ".mov", ".wmv", ".mkv",
|
|
33
|
+
# Archives
|
|
34
|
+
".zip", ".rar", ".7z", ".tar", ".gz", ".bz2", ".xz",
|
|
35
|
+
# Documents
|
|
36
|
+
".pdf", ".doc", ".docx", ".xls", ".xlsx", ".ppt", ".pptx", ".odt",
|
|
37
|
+
# Executables & Compiled
|
|
38
|
+
".exe", ".dll", ".so", ".dylib", ".class", ".jar", ".pyc", ".pyd", ".whl",
|
|
39
|
+
# Databases & Other
|
|
40
|
+
".db", ".sqlite", ".sqlite3", ".db-wal", ".db-shm", ".lock",
|
|
41
|
+
".bak", ".swo", ".swp",
|
|
42
|
+
}
|
|
12
43
|
|
|
13
44
|
def _read_gitignore_patterns(gitignore_path: str) -> list[str]:
|
|
14
45
|
"""Reads patterns from a single .gitignore file and caches them."""
|
|
@@ -70,7 +101,7 @@ def is_ignored(
|
|
|
70
101
|
|
|
71
102
|
# Pre-calculate all path prefixes to check, avoiding re-joins in the loop.
|
|
72
103
|
path_parts = Path(path_rel_to_root).parts
|
|
73
|
-
path_prefixes = [os.path.join(*path_parts[:i
|
|
104
|
+
path_prefixes = [os.path.join(*path_parts[:i]) for i in range(1, len(path_parts) + 1)]
|
|
74
105
|
|
|
75
106
|
# Pre-process patterns to remove trailing slashes once.
|
|
76
107
|
processed_path_patterns = [p.rstrip("/") for p in path_patterns]
|
|
@@ -140,17 +171,47 @@ def read_file_contents(file_path):
|
|
|
140
171
|
return ""
|
|
141
172
|
|
|
142
173
|
|
|
143
|
-
def is_binary(file_path):
|
|
174
|
+
def is_binary(file_path: str) -> bool:
|
|
175
|
+
"""
|
|
176
|
+
Efficiently checks if a file is binary.
|
|
177
|
+
|
|
178
|
+
The check follows a fast, multi-step process to minimize I/O:
|
|
179
|
+
1. Checks a memory cache for a previously determined result.
|
|
180
|
+
2. Checks the file extension against a list of known text file types.
|
|
181
|
+
3. Checks the file extension against a list of known binary file types.
|
|
182
|
+
4. As a last resort, reads the first 512 bytes of the file to check for
|
|
183
|
+
a null byte, a common indicator of a binary file.
|
|
184
|
+
"""
|
|
185
|
+
# Step 1: Check cache first for fastest response
|
|
186
|
+
if file_path in _is_binary_cache:
|
|
187
|
+
return _is_binary_cache[file_path]
|
|
188
|
+
|
|
189
|
+
# Step 2: Fast check based on known text/binary extensions (no I/O)
|
|
190
|
+
_, extension = os.path.splitext(file_path)
|
|
191
|
+
extension = extension.lower()
|
|
192
|
+
|
|
193
|
+
if extension in TEXT_EXTENSIONS:
|
|
194
|
+
_is_binary_cache[file_path] = False
|
|
195
|
+
return False
|
|
196
|
+
if extension in BINARY_EXTENSIONS:
|
|
197
|
+
_is_binary_cache[file_path] = True
|
|
198
|
+
return True
|
|
199
|
+
|
|
200
|
+
# Step 3: Fallback to content analysis for unknown extensions
|
|
144
201
|
try:
|
|
145
202
|
with open(file_path, "rb") as file:
|
|
146
|
-
chunk
|
|
203
|
+
# Read a smaller chunk, 512 bytes is usually enough to find a null byte
|
|
204
|
+
chunk = file.read(512)
|
|
147
205
|
if b"\0" in chunk:
|
|
206
|
+
_is_binary_cache[file_path] = True
|
|
148
207
|
return True
|
|
149
|
-
|
|
150
|
-
|
|
208
|
+
# If no null byte, assume it's a text file
|
|
209
|
+
_is_binary_cache[file_path] = False
|
|
151
210
|
return False
|
|
152
211
|
except IOError:
|
|
153
|
-
|
|
212
|
+
# If we can't open it, treat it as binary to be safe
|
|
213
|
+
_is_binary_cache[file_path] = True
|
|
214
|
+
return True
|
|
154
215
|
|
|
155
216
|
|
|
156
217
|
def get_human_readable_size(size):
|
|
@@ -10,7 +10,7 @@ with open("requirements.txt", "r", encoding="utf-8") as f:
|
|
|
10
10
|
|
|
11
11
|
setup(
|
|
12
12
|
name="kopipasta",
|
|
13
|
-
version="0.
|
|
13
|
+
version="0.37.0",
|
|
14
14
|
author="Mikko Korpela",
|
|
15
15
|
author_email="mikko.korpela@gmail.com",
|
|
16
16
|
description="A CLI tool to generate prompts with project structure and file contents",
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|