kopipasta 0.4.0__py3-none-any.whl → 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kopipasta might be problematic. Click here for more details.

kopipasta/main.py CHANGED
@@ -1,7 +1,9 @@
1
1
  #!/usr/bin/env python3
2
2
  import os
3
3
  import argparse
4
+ import ast
4
5
  import re
6
+ from textwrap import dedent
5
7
  import pyperclip
6
8
  import fnmatch
7
9
 
@@ -88,6 +90,7 @@ def get_language_for_file(file_path):
88
90
  '.ts': 'typescript',
89
91
  '.tsx': 'tsx',
90
92
  '.html': 'html',
93
+ '.htm': 'html',
91
94
  '.css': 'css',
92
95
  '.json': 'json',
93
96
  '.md': 'markdown',
@@ -96,10 +99,306 @@ def get_language_for_file(file_path):
96
99
  '.yml': 'yaml',
97
100
  '.yaml': 'yaml',
98
101
  '.go': 'go',
99
- '.toml': 'toml'
102
+ '.toml': 'toml',
103
+ '.c': 'c',
104
+ '.cpp': 'cpp',
105
+ '.cc': 'cpp',
106
+ '.h': 'cpp',
107
+ '.hpp': 'cpp',
100
108
  }
101
109
  return language_map.get(extension, '')
102
110
 
111
+ def split_python_file(file_content):
112
+ """
113
+ Splits Python code into logical chunks using the AST module.
114
+ Ensures each chunk is at least 10 lines.
115
+ Returns a list of tuples: (chunk_code, start_line, end_line)
116
+ """
117
+ import ast
118
+ tree = ast.parse(file_content)
119
+ chunks = []
120
+ prev_end = 0
121
+ lines = file_content.splitlines(keepends=True)
122
+
123
+ def get_code(start, end):
124
+ return ''.join(lines[start:end])
125
+
126
+ nodes = [node for node in ast.iter_child_nodes(tree) if hasattr(node, 'lineno')]
127
+
128
+ i = 0
129
+ while i < len(nodes):
130
+ node = nodes[i]
131
+ start_line = node.lineno - 1 # Convert to 0-indexed
132
+ end_line = getattr(node, 'end_lineno', None)
133
+ if end_line is None:
134
+ end_line = start_line + 1
135
+
136
+ # Merge chunks to meet minimum lines
137
+ chunk_start = start_line
138
+ chunk_end = end_line
139
+ while (chunk_end - chunk_start) < 10 and i + 1 < len(nodes):
140
+ i += 1
141
+ next_node = nodes[i]
142
+ next_start = next_node.lineno - 1
143
+ next_end = getattr(next_node, 'end_lineno', None) or next_start + 1
144
+ chunk_end = next_end
145
+
146
+ # Add code before the node (e.g., imports or global code)
147
+ if prev_end < chunk_start:
148
+ code = get_code(prev_end, chunk_start)
149
+ if code.strip():
150
+ chunks.append((code, prev_end, chunk_start))
151
+
152
+ # Add the merged chunk
153
+ code = get_code(chunk_start, chunk_end)
154
+ chunks.append((code, chunk_start, chunk_end))
155
+ prev_end = chunk_end
156
+ i += 1
157
+
158
+ # Add any remaining code at the end
159
+ if prev_end < len(lines):
160
+ code = get_code(prev_end, len(lines))
161
+ if code.strip():
162
+ chunks.append((code, prev_end, len(lines)))
163
+
164
+ return merge_small_chunks(chunks)
165
+
166
+ def merge_small_chunks(chunks, min_lines=10):
167
+ """
168
+ Merges chunks to ensure each has at least min_lines lines.
169
+ """
170
+ merged_chunks = []
171
+ buffer_code = ''
172
+ buffer_start = None
173
+ buffer_end = None
174
+
175
+ for code, start_line, end_line in chunks:
176
+ num_lines = end_line - start_line
177
+ if buffer_code == '':
178
+ buffer_code = code
179
+ buffer_start = start_line
180
+ buffer_end = end_line
181
+ else:
182
+ buffer_code += code
183
+ buffer_end = end_line
184
+
185
+ if (buffer_end - buffer_start) >= min_lines:
186
+ merged_chunks.append((buffer_code, buffer_start, buffer_end))
187
+ buffer_code = ''
188
+ buffer_start = None
189
+ buffer_end = None
190
+
191
+ if buffer_code:
192
+ merged_chunks.append((buffer_code, buffer_start, buffer_end))
193
+
194
+ return merged_chunks
195
+
196
+ def split_javascript_file(file_content):
197
+ """
198
+ Splits JavaScript code into logical chunks using regular expressions.
199
+ Returns a list of tuples: (chunk_code, start_line, end_line)
200
+ """
201
+ lines = file_content.splitlines(keepends=True)
202
+ chunks = []
203
+ pattern = re.compile(
204
+ r'^\s*(export\s+)?(async\s+)?(function\s+\w+|class\s+\w+|\w+\s*=\s*\(.*?\)\s*=>)',
205
+ re.MULTILINE
206
+ )
207
+ matches = list(pattern.finditer(file_content))
208
+
209
+ if not matches:
210
+ return [(file_content, 0, len(lines))]
211
+
212
+ prev_end_line = 0
213
+ for match in matches:
214
+ start_index = match.start()
215
+ start_line = file_content.count('\n', 0, start_index)
216
+ if prev_end_line < start_line:
217
+ code = ''.join(lines[prev_end_line:start_line])
218
+ chunks.append((code, prev_end_line, start_line))
219
+
220
+ function_code_lines = []
221
+ brace_count = 0
222
+ in_block = False
223
+ for i in range(start_line, len(lines)):
224
+ line = lines[i]
225
+ function_code_lines.append(line)
226
+ brace_count += line.count('{') - line.count('}')
227
+ if '{' in line:
228
+ in_block = True
229
+ if in_block and brace_count == 0:
230
+ end_line = i + 1
231
+ code = ''.join(function_code_lines)
232
+ chunks.append((code, start_line, end_line))
233
+ prev_end_line = end_line
234
+ break
235
+ else:
236
+ end_line = len(lines)
237
+ code = ''.join(function_code_lines)
238
+ chunks.append((code, start_line, end_line))
239
+ prev_end_line = end_line
240
+
241
+ if prev_end_line < len(lines):
242
+ code = ''.join(lines[prev_end_line:])
243
+ chunks.append((code, prev_end_line, len(lines)))
244
+
245
+ return merge_small_chunks(chunks)
246
+
247
+ def split_html_file(file_content):
248
+ """
249
+ Splits HTML code into logical chunks based on top-level elements using regular expressions.
250
+ Returns a list of tuples: (chunk_code, start_line, end_line)
251
+ """
252
+ pattern = re.compile(r'<(?P<tag>\w+)(\s|>).*?</(?P=tag)>', re.DOTALL)
253
+ lines = file_content.splitlines(keepends=True)
254
+ chunks = []
255
+ matches = list(pattern.finditer(file_content))
256
+
257
+ if not matches:
258
+ return [(file_content, 0, len(lines))]
259
+
260
+ prev_end = 0
261
+ for match in matches:
262
+ start_index = match.start()
263
+ end_index = match.end()
264
+ start_line = file_content.count('\n', 0, start_index)
265
+ end_line = file_content.count('\n', 0, end_index)
266
+
267
+ if prev_end < start_line:
268
+ code = ''.join(lines[prev_end:start_line])
269
+ chunks.append((code, prev_end, start_line))
270
+
271
+ code = ''.join(lines[start_line:end_line])
272
+ chunks.append((code, start_line, end_line))
273
+ prev_end = end_line
274
+
275
+ if prev_end < len(lines):
276
+ code = ''.join(lines[prev_end:])
277
+ chunks.append((code, prev_end, len(lines)))
278
+
279
+ return merge_small_chunks(chunks)
280
+
281
+ def split_c_file(file_content):
282
+ """
283
+ Splits C/C++ code into logical chunks using regular expressions.
284
+ Returns a list of tuples: (chunk_code, start_line, end_line)
285
+ """
286
+ pattern = re.compile(r'^\s*(?:[\w\*\s]+)\s+(\w+)\s*\([^)]*\)\s*\{', re.MULTILINE)
287
+ lines = file_content.splitlines(keepends=True)
288
+ chunks = []
289
+ matches = list(pattern.finditer(file_content))
290
+
291
+ if not matches:
292
+ return [(file_content, 0, len(lines))]
293
+
294
+ prev_end_line = 0
295
+ for match in matches:
296
+ start_index = match.start()
297
+ start_line = file_content.count('\n', 0, start_index)
298
+ if prev_end_line < start_line:
299
+ code = ''.join(lines[prev_end_line:start_line])
300
+ chunks.append((code, prev_end_line, start_line))
301
+
302
+ function_code_lines = []
303
+ brace_count = 0
304
+ in_function = False
305
+ for i in range(start_line, len(lines)):
306
+ line = lines[i]
307
+ function_code_lines.append(line)
308
+ brace_count += line.count('{') - line.count('}')
309
+ if '{' in line:
310
+ in_function = True
311
+ if in_function and brace_count == 0:
312
+ end_line = i + 1
313
+ code = ''.join(function_code_lines)
314
+ chunks.append((code, start_line, end_line))
315
+ prev_end_line = end_line
316
+ break
317
+ else:
318
+ end_line = len(lines)
319
+ code = ''.join(function_code_lines)
320
+ chunks.append((code, start_line, end_line))
321
+ prev_end_line = end_line
322
+
323
+ if prev_end_line < len(lines):
324
+ code = ''.join(lines[prev_end_line:])
325
+ chunks.append((code, prev_end_line, len(lines)))
326
+
327
+ return merge_small_chunks(chunks)
328
+
329
+ def split_generic_file(file_content):
330
+ """
331
+ Splits generic text files into chunks based on double newlines.
332
+ Returns a list of tuples: (chunk_code, start_line, end_line)
333
+ """
334
+ lines = file_content.splitlines(keepends=True)
335
+ chunks = []
336
+ start = 0
337
+ for i, line in enumerate(lines):
338
+ if line.strip() == '':
339
+ if start < i:
340
+ chunk_code = ''.join(lines[start:i])
341
+ chunks.append((chunk_code, start, i))
342
+ start = i + 1
343
+ if start < len(lines):
344
+ chunk_code = ''.join(lines[start:])
345
+ chunks.append((chunk_code, start, len(lines)))
346
+ return merge_small_chunks(chunks)
347
+
348
+ def select_file_patches(file_path):
349
+ file_content = read_file_contents(file_path)
350
+ language = get_language_for_file(file_path)
351
+ chunks = []
352
+ total_char_count = 0
353
+
354
+ if language == 'python':
355
+ code_chunks = split_python_file(file_content)
356
+ elif language == 'javascript':
357
+ code_chunks = split_javascript_file(file_content)
358
+ elif language == 'html':
359
+ code_chunks = split_html_file(file_content)
360
+ elif language in ['c', 'cpp']:
361
+ code_chunks = split_c_file(file_content)
362
+ else:
363
+ code_chunks = split_generic_file(file_content)
364
+
365
+ print(f"\nSelecting patches for {file_path}")
366
+ for index, (chunk_code, start_line, end_line) in enumerate(code_chunks):
367
+ print(f"\nChunk {index + 1} (Lines {start_line + 1}-{end_line}):")
368
+ print(f"```{language}\n{chunk_code}\n```")
369
+ while True:
370
+ choice = input("(y)es include / (n)o skip / (q)uit rest of file? ").lower()
371
+ if choice == 'y':
372
+ chunks.append(chunk_code)
373
+ total_char_count += len(chunk_code)
374
+ break
375
+ elif choice == 'n':
376
+ placeholder = get_placeholder_comment(language)
377
+ chunks.append(placeholder)
378
+ total_char_count += len(placeholder)
379
+ break
380
+ elif choice == 'q':
381
+ print("Skipping the rest of the file.")
382
+ return chunks, total_char_count
383
+ else:
384
+ print("Invalid choice. Please enter 'y', 'n', or 'q'.")
385
+
386
+ return chunks, total_char_count
387
+
388
+ def get_placeholder_comment(language):
389
+ comments = {
390
+ 'python': '# Skipped content\n',
391
+ 'javascript': '// Skipped content\n',
392
+ 'typescript': '// Skipped content\n',
393
+ 'java': '// Skipped content\n',
394
+ 'c': '// Skipped content\n',
395
+ 'cpp': '// Skipped content\n',
396
+ 'html': '<!-- Skipped content -->\n',
397
+ 'css': '/* Skipped content */\n',
398
+ 'default': '# Skipped content\n'
399
+ }
400
+ return comments.get(language, comments['default'])
401
+
103
402
  def get_file_snippet(file_path, max_lines=50, max_bytes=4096):
104
403
  snippet = ""
105
404
  byte_count = 0
@@ -166,7 +465,7 @@ def select_files_in_directory(directory, ignore_patterns, current_char_count=0):
166
465
  while True:
167
466
  if current_char_count > 0:
168
467
  print_char_count(current_char_count)
169
- file_choice = input(f"{file} ({file_size_readable}, ~{file_char_estimate} chars, ~{file_token_estimate} tokens) (y/n/q)? ").lower()
468
+ file_choice = input(f"{file} ({file_size_readable}, ~{file_char_estimate} chars, ~{file_token_estimate} tokens) (y/n/p/q)? ").lower()
170
469
  if file_choice == 'y':
171
470
  if is_large_file(file_path):
172
471
  while True:
@@ -186,11 +485,17 @@ def select_files_in_directory(directory, ignore_patterns, current_char_count=0):
186
485
  break
187
486
  elif file_choice == 'n':
188
487
  break
488
+ elif file_choice == 'p':
489
+ chunks, char_count = select_file_patches(file_path)
490
+ if chunks:
491
+ selected_files.append((file_path, False, chunks))
492
+ current_char_count += char_count
493
+ break
189
494
  elif file_choice == 'q':
190
495
  print(f"Quitting selection for {directory}")
191
496
  return selected_files, current_char_count
192
497
  else:
193
- print("Invalid choice. Please enter 'y', 'n', or 'q'.")
498
+ print("Invalid choice. Please enter 'y', 'n', 'p', or 'q'.")
194
499
  print(f"Added {len(selected_files)} files from {directory}")
195
500
  return selected_files, current_char_count
196
501
  elif choice == 'q':
@@ -277,10 +582,22 @@ def generate_prompt(files_to_include, ignore_patterns, web_contents, env_vars):
277
582
  prompt += get_project_structure(ignore_patterns)
278
583
  prompt += "\n```\n\n"
279
584
  prompt += "## File Contents\n\n"
280
- for file, use_snippet in files_to_include:
585
+ for file_tuple in files_to_include:
586
+ if len(file_tuple) == 3:
587
+ file, use_snippet, chunks = file_tuple
588
+ else:
589
+ file, use_snippet = file_tuple
590
+ chunks = None
591
+
281
592
  relative_path = get_relative_path(file)
282
593
  language = get_language_for_file(file)
283
- if use_snippet:
594
+
595
+ if chunks is not None:
596
+ prompt += f"### {relative_path} (selected patches)\n\n```{language}\n"
597
+ for chunk in chunks:
598
+ prompt += f"{chunk}\n"
599
+ prompt += "```\n\n"
600
+ elif use_snippet:
284
601
  file_content = get_file_snippet(file)
285
602
  prompt += f"### {relative_path} (snippet)\n\n```{language}\n{file_content}\n```\n\n"
286
603
  else:
@@ -332,13 +649,30 @@ def main():
332
649
  print(f"Added web content from: {input_path}")
333
650
  elif os.path.isfile(input_path):
334
651
  if not is_ignored(input_path, ignore_patterns) and not is_binary(input_path):
335
- use_snippet = is_large_file(input_path)
336
- files_to_include.append((input_path, use_snippet))
337
- if use_snippet:
338
- current_char_count += len(get_file_snippet(input_path))
339
- else:
340
- current_char_count += os.path.getsize(input_path)
341
- print(f"Added file: {input_path}{' (snippet)' if use_snippet else ''}")
652
+ while True:
653
+ file_choice = input(f"{input_path} (y)es include / (n)o skip / (p)atches / (q)uit? ").lower()
654
+ if file_choice == 'y':
655
+ use_snippet = is_large_file(input_path)
656
+ files_to_include.append((input_path, use_snippet))
657
+ if use_snippet:
658
+ current_char_count += len(get_file_snippet(input_path))
659
+ else:
660
+ current_char_count += os.path.getsize(input_path)
661
+ print(f"Added file: {input_path}{' (snippet)' if use_snippet else ''}")
662
+ break
663
+ elif file_choice == 'n':
664
+ break
665
+ elif file_choice == 'p':
666
+ chunks, char_count = select_file_patches(input_path)
667
+ if chunks:
668
+ files_to_include.append((input_path, False, chunks))
669
+ current_char_count += char_count
670
+ break
671
+ elif file_choice == 'q':
672
+ print("Quitting.")
673
+ return
674
+ else:
675
+ print("Invalid choice. Please enter 'y', 'n', 'p', or 'q'.")
342
676
  else:
343
677
  print(f"Ignored file: {input_path}")
344
678
  elif os.path.isdir(input_path):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: kopipasta
3
- Version: 0.4.0
3
+ Version: 0.6.0
4
4
  Summary: A CLI tool to generate prompts with project structure and file contents
5
5
  Home-page: https://github.com/mkorpela/kopipasta
6
6
  Author: Mikko Korpela
@@ -0,0 +1,8 @@
1
+ kopipasta/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ kopipasta/main.py,sha256=OmS54IQo4mJLUu_t6RYrEuLuJlFmnN9lEoZbZya3m-A,27657
3
+ kopipasta-0.6.0.dist-info/LICENSE,sha256=xw4C9TAU7LFu4r_MwSbky90uzkzNtRwAo3c51IWR8lk,1091
4
+ kopipasta-0.6.0.dist-info/METADATA,sha256=F3sWcMABaUy5hT0RWuZwpCkIsvhMNxLya8mhV6nXcFs,5431
5
+ kopipasta-0.6.0.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
6
+ kopipasta-0.6.0.dist-info/entry_points.txt,sha256=but54qDNz1-F8fVvGstq_QID5tHjczP7bO7rWLFkc6Y,50
7
+ kopipasta-0.6.0.dist-info/top_level.txt,sha256=iXohixMuCdw8UjGDUp0ouICLYBDrx207sgZIJ9lxn0o,10
8
+ kopipasta-0.6.0.dist-info/RECORD,,
@@ -1,8 +0,0 @@
1
- kopipasta/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- kopipasta/main.py,sha256=5-zAPtdF4PU5xbMFJk5x5lmVAAXSIQ0FoKko4znch3k,15829
3
- kopipasta-0.4.0.dist-info/LICENSE,sha256=xw4C9TAU7LFu4r_MwSbky90uzkzNtRwAo3c51IWR8lk,1091
4
- kopipasta-0.4.0.dist-info/METADATA,sha256=8xv0cFWAnXYKS1_gUUCycrL_KGhWG1OUML0la5YAhiE,5431
5
- kopipasta-0.4.0.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
6
- kopipasta-0.4.0.dist-info/entry_points.txt,sha256=but54qDNz1-F8fVvGstq_QID5tHjczP7bO7rWLFkc6Y,50
7
- kopipasta-0.4.0.dist-info/top_level.txt,sha256=iXohixMuCdw8UjGDUp0ouICLYBDrx207sgZIJ9lxn0o,10
8
- kopipasta-0.4.0.dist-info/RECORD,,