kopipasta 0.36.0__tar.gz → 0.37.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kopipasta might be problematic. Click here for more details.

Files changed (22) hide show
  1. {kopipasta-0.36.0/kopipasta.egg-info → kopipasta-0.37.0}/PKG-INFO +1 -1
  2. {kopipasta-0.36.0 → kopipasta-0.37.0}/kopipasta/file.py +68 -7
  3. {kopipasta-0.36.0 → kopipasta-0.37.0/kopipasta.egg-info}/PKG-INFO +1 -1
  4. {kopipasta-0.36.0 → kopipasta-0.37.0}/setup.py +1 -1
  5. {kopipasta-0.36.0 → kopipasta-0.37.0}/LICENSE +0 -0
  6. {kopipasta-0.36.0 → kopipasta-0.37.0}/MANIFEST.in +0 -0
  7. {kopipasta-0.36.0 → kopipasta-0.37.0}/README.md +0 -0
  8. {kopipasta-0.36.0 → kopipasta-0.37.0}/kopipasta/__init__.py +0 -0
  9. {kopipasta-0.36.0 → kopipasta-0.37.0}/kopipasta/cache.py +0 -0
  10. {kopipasta-0.36.0 → kopipasta-0.37.0}/kopipasta/import_parser.py +0 -0
  11. {kopipasta-0.36.0 → kopipasta-0.37.0}/kopipasta/main.py +0 -0
  12. {kopipasta-0.36.0 → kopipasta-0.37.0}/kopipasta/prompt.py +0 -0
  13. {kopipasta-0.36.0 → kopipasta-0.37.0}/kopipasta/tree_selector.py +0 -0
  14. {kopipasta-0.36.0 → kopipasta-0.37.0}/kopipasta.egg-info/SOURCES.txt +0 -0
  15. {kopipasta-0.36.0 → kopipasta-0.37.0}/kopipasta.egg-info/dependency_links.txt +0 -0
  16. {kopipasta-0.36.0 → kopipasta-0.37.0}/kopipasta.egg-info/entry_points.txt +0 -0
  17. {kopipasta-0.36.0 → kopipasta-0.37.0}/kopipasta.egg-info/requires.txt +0 -0
  18. {kopipasta-0.36.0 → kopipasta-0.37.0}/kopipasta.egg-info/top_level.txt +0 -0
  19. {kopipasta-0.36.0 → kopipasta-0.37.0}/requirements.txt +0 -0
  20. {kopipasta-0.36.0 → kopipasta-0.37.0}/setup.cfg +0 -0
  21. {kopipasta-0.36.0 → kopipasta-0.37.0}/tests/test_file.py +0 -0
  22. {kopipasta-0.36.0 → kopipasta-0.37.0}/tests/test_tree_selector.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: kopipasta
3
- Version: 0.36.0
3
+ Version: 0.37.0
4
4
  Summary: A CLI tool to generate prompts with project structure and file contents
5
5
  Home-page: https://github.com/mkorpela/kopipasta
6
6
  Author: Mikko Korpela
@@ -8,7 +8,38 @@ FileTuple = Tuple[str, bool, Optional[List[str]], str]
8
8
  # --- Caches ---
9
9
  _gitignore_cache: dict[str, list[str]] = {}
10
10
  _is_ignored_cache: dict[str, bool] = {}
11
-
11
+ _is_binary_cache: dict[str, bool] = {}
12
+
13
+ # --- Known File Extensions for is_binary ---
14
+ # Using sets for O(1) average time complexity lookups
15
+ TEXT_EXTENSIONS = {
16
+ # Code
17
+ ".py", ".js", ".ts", ".jsx", ".tsx", ".java", ".c", ".cpp", ".h", ".hpp",
18
+ ".cs", ".go", ".rs", ".sh", ".bash", ".ps1", ".rb", ".php", ".swift",
19
+ ".kt", ".kts", ".scala", ".pl", ".pm", ".tcl",
20
+ # Markup & Data
21
+ ".html", ".htm", ".xml", ".css", ".scss", ".sass", ".less", ".json",
22
+ ".yaml", ".yml", ".toml", ".ini", ".cfg", ".conf", ".md", ".txt", ".rtf",
23
+ ".csv", ".tsv", ".sql", ".graphql", ".gql",
24
+ # Config & Other
25
+ ".gitignore", ".dockerfile", "dockerfile", ".env", ".properties", ".mdx",
26
+ }
27
+
28
+ BINARY_EXTENSIONS = {
29
+ # Images
30
+ ".png", ".jpg", ".jpeg", ".gif", ".bmp", ".tiff", ".ico", ".webp", ".svg",
31
+ # Audio/Video
32
+ ".mp3", ".wav", ".ogg", ".flac", ".mp4", ".avi", ".mov", ".wmv", ".mkv",
33
+ # Archives
34
+ ".zip", ".rar", ".7z", ".tar", ".gz", ".bz2", ".xz",
35
+ # Documents
36
+ ".pdf", ".doc", ".docx", ".xls", ".xlsx", ".ppt", ".pptx", ".odt",
37
+ # Executables & Compiled
38
+ ".exe", ".dll", ".so", ".dylib", ".class", ".jar", ".pyc", ".pyd", ".whl",
39
+ # Databases & Other
40
+ ".db", ".sqlite", ".sqlite3", ".db-wal", ".db-shm", ".lock",
41
+ ".bak", ".swo", ".swp",
42
+ }
12
43
 
13
44
  def _read_gitignore_patterns(gitignore_path: str) -> list[str]:
14
45
  """Reads patterns from a single .gitignore file and caches them."""
@@ -70,7 +101,7 @@ def is_ignored(
70
101
 
71
102
  # Pre-calculate all path prefixes to check, avoiding re-joins in the loop.
72
103
  path_parts = Path(path_rel_to_root).parts
73
- path_prefixes = [os.path.join(*path_parts[:i + 1]) for i in range(1, len(path_parts) + 1)]
104
+ path_prefixes = [os.path.join(*path_parts[:i]) for i in range(1, len(path_parts) + 1)]
74
105
 
75
106
  # Pre-process patterns to remove trailing slashes once.
76
107
  processed_path_patterns = [p.rstrip("/") for p in path_patterns]
@@ -140,17 +171,47 @@ def read_file_contents(file_path):
140
171
  return ""
141
172
 
142
173
 
143
- def is_binary(file_path):
174
+ def is_binary(file_path: str) -> bool:
175
+ """
176
+ Efficiently checks if a file is binary.
177
+
178
+ The check follows a fast, multi-step process to minimize I/O:
179
+ 1. Checks a memory cache for a previously determined result.
180
+ 2. Checks the file extension against a list of known text file types.
181
+ 3. Checks the file extension against a list of known binary file types.
182
+ 4. As a last resort, reads the first 512 bytes of the file to check for
183
+ a null byte, a common indicator of a binary file.
184
+ """
185
+ # Step 1: Check cache first for fastest response
186
+ if file_path in _is_binary_cache:
187
+ return _is_binary_cache[file_path]
188
+
189
+ # Step 2: Fast check based on known text/binary extensions (no I/O)
190
+ _, extension = os.path.splitext(file_path)
191
+ extension = extension.lower()
192
+
193
+ if extension in TEXT_EXTENSIONS:
194
+ _is_binary_cache[file_path] = False
195
+ return False
196
+ if extension in BINARY_EXTENSIONS:
197
+ _is_binary_cache[file_path] = True
198
+ return True
199
+
200
+ # Step 3: Fallback to content analysis for unknown extensions
144
201
  try:
145
202
  with open(file_path, "rb") as file:
146
- chunk = file.read(1024)
203
+ # Read a smaller chunk, 512 bytes is usually enough to find a null byte
204
+ chunk = file.read(512)
147
205
  if b"\0" in chunk:
206
+ _is_binary_cache[file_path] = True
148
207
  return True
149
- if file_path.lower().endswith((".json", ".csv")):
150
- return False
208
+ # If no null byte, assume it's a text file
209
+ _is_binary_cache[file_path] = False
151
210
  return False
152
211
  except IOError:
153
- return False
212
+ # If we can't open it, treat it as binary to be safe
213
+ _is_binary_cache[file_path] = True
214
+ return True
154
215
 
155
216
 
156
217
  def get_human_readable_size(size):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: kopipasta
3
- Version: 0.36.0
3
+ Version: 0.37.0
4
4
  Summary: A CLI tool to generate prompts with project structure and file contents
5
5
  Home-page: https://github.com/mkorpela/kopipasta
6
6
  Author: Mikko Korpela
@@ -10,7 +10,7 @@ with open("requirements.txt", "r", encoding="utf-8") as f:
10
10
 
11
11
  setup(
12
12
  name="kopipasta",
13
- version="0.36.0",
13
+ version="0.37.0",
14
14
  author="Mikko Korpela",
15
15
  author_email="mikko.korpela@gmail.com",
16
16
  description="A CLI tool to generate prompts with project structure and file contents",
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes