kopipasta 0.10.0__py3-none-any.whl → 0.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kopipasta might be problematic. Click here for more details.
- kopipasta/main.py +133 -40
- {kopipasta-0.10.0.dist-info → kopipasta-0.11.0.dist-info}/METADATA +1 -1
- kopipasta-0.11.0.dist-info/RECORD +8 -0
- kopipasta-0.10.0.dist-info/RECORD +0 -8
- {kopipasta-0.10.0.dist-info → kopipasta-0.11.0.dist-info}/LICENSE +0 -0
- {kopipasta-0.10.0.dist-info → kopipasta-0.11.0.dist-info}/WHEEL +0 -0
- {kopipasta-0.10.0.dist-info → kopipasta-0.11.0.dist-info}/entry_points.txt +0 -0
- {kopipasta-0.10.0.dist-info → kopipasta-0.11.0.dist-info}/top_level.txt +0 -0
kopipasta/main.py
CHANGED
|
@@ -1,4 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
|
+
import csv
|
|
3
|
+
import io
|
|
4
|
+
import json
|
|
2
5
|
import os
|
|
3
6
|
import argparse
|
|
4
7
|
import ast
|
|
@@ -44,7 +47,12 @@ def is_ignored(path, ignore_patterns):
|
|
|
44
47
|
def is_binary(file_path):
|
|
45
48
|
try:
|
|
46
49
|
with open(file_path, 'rb') as file:
|
|
47
|
-
|
|
50
|
+
chunk = file.read(1024)
|
|
51
|
+
if b'\0' in chunk: # null bytes indicate binary file
|
|
52
|
+
return True
|
|
53
|
+
if file_path.lower().endswith(('.json', '.csv')):
|
|
54
|
+
return False
|
|
55
|
+
return False
|
|
48
56
|
except IOError:
|
|
49
57
|
return False
|
|
50
58
|
|
|
@@ -413,6 +421,10 @@ def get_file_snippet(file_path, max_lines=50, max_bytes=4096):
|
|
|
413
421
|
byte_count += len(line.encode('utf-8'))
|
|
414
422
|
return snippet
|
|
415
423
|
|
|
424
|
+
def print_char_count(count):
|
|
425
|
+
token_estimate = count // 4
|
|
426
|
+
print(f"\rCurrent prompt size: {count} characters (~ {token_estimate} tokens)", flush=True)
|
|
427
|
+
|
|
416
428
|
def select_files_in_directory(directory, ignore_patterns, current_char_count=0):
|
|
417
429
|
files = [f for f in os.listdir(directory)
|
|
418
430
|
if os.path.isfile(os.path.join(directory, f)) and not is_ignored(os.path.join(directory, f), ignore_patterns) and not is_binary(os.path.join(directory, f))]
|
|
@@ -534,13 +546,77 @@ def fetch_web_content(url):
|
|
|
534
546
|
try:
|
|
535
547
|
response = requests.get(url)
|
|
536
548
|
response.raise_for_status()
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
549
|
+
content_type = response.headers.get('content-type', '').lower()
|
|
550
|
+
if 'json' in content_type:
|
|
551
|
+
return response.json(), 'json'
|
|
552
|
+
elif 'csv' in content_type:
|
|
553
|
+
return response.text, 'csv'
|
|
554
|
+
else:
|
|
555
|
+
return response.text, 'text'
|
|
540
556
|
except requests.RequestException as e:
|
|
541
557
|
print(f"Error fetching content from {url}: {e}")
|
|
542
558
|
return None, None
|
|
543
559
|
|
|
560
|
+
def read_file_content(file_path):
|
|
561
|
+
_, ext = os.path.splitext(file_path)
|
|
562
|
+
if ext.lower() == '.json':
|
|
563
|
+
with open(file_path, 'r') as f:
|
|
564
|
+
return json.load(f), 'json'
|
|
565
|
+
elif ext.lower() == '.csv':
|
|
566
|
+
with open(file_path, 'r') as f:
|
|
567
|
+
return f.read(), 'csv'
|
|
568
|
+
else:
|
|
569
|
+
with open(file_path, 'r') as f:
|
|
570
|
+
return f.read(), 'text'
|
|
571
|
+
|
|
572
|
+
def get_content_snippet(content, content_type, max_lines=50, max_chars=4096):
|
|
573
|
+
if content_type == 'json':
|
|
574
|
+
return json.dumps(content, indent=2)[:max_chars]
|
|
575
|
+
elif content_type == 'csv':
|
|
576
|
+
csv_content = content if isinstance(content, str) else content.getvalue()
|
|
577
|
+
csv_reader = csv.reader(io.StringIO(csv_content))
|
|
578
|
+
rows = list(csv_reader)[:max_lines]
|
|
579
|
+
output = io.StringIO()
|
|
580
|
+
csv.writer(output).writerows(rows)
|
|
581
|
+
return output.getvalue()[:max_chars]
|
|
582
|
+
else:
|
|
583
|
+
return '\n'.join(content.split('\n')[:max_lines])[:max_chars]
|
|
584
|
+
|
|
585
|
+
def handle_content(content, content_type, file_or_url):
|
|
586
|
+
is_large = len(json.dumps(content)) > 102400 if content_type == 'json' else len(content) > 102400
|
|
587
|
+
|
|
588
|
+
if is_large:
|
|
589
|
+
while True:
|
|
590
|
+
choice = input(f"{file_or_url} is large. View (f)ull content, (s)nippet, or (p)review? ").lower()
|
|
591
|
+
if choice in ['f', 's', 'p']:
|
|
592
|
+
break
|
|
593
|
+
print("Invalid choice. Please enter 'f', 's', or 'p'.")
|
|
594
|
+
|
|
595
|
+
if choice == 'f':
|
|
596
|
+
return content, False
|
|
597
|
+
elif choice == 's':
|
|
598
|
+
return get_content_snippet(content, content_type), True
|
|
599
|
+
else: # preview
|
|
600
|
+
preview = get_content_preview(content, content_type)
|
|
601
|
+
print(f"\nPreview of {file_or_url}:\n{preview}\n")
|
|
602
|
+
return handle_content(content, content_type, file_or_url)
|
|
603
|
+
else:
|
|
604
|
+
return content, False
|
|
605
|
+
|
|
606
|
+
|
|
607
|
+
def get_content_preview(content, content_type):
|
|
608
|
+
if content_type == 'json':
|
|
609
|
+
return json.dumps(content, indent=2)[:1000] + "\n..."
|
|
610
|
+
elif content_type == 'csv':
|
|
611
|
+
csv_content = content if isinstance(content, str) else content.getvalue()
|
|
612
|
+
csv_reader = csv.reader(io.StringIO(csv_content))
|
|
613
|
+
rows = list(csv_reader)[:10]
|
|
614
|
+
output = io.StringIO()
|
|
615
|
+
csv.writer(output).writerows(rows)
|
|
616
|
+
return output.getvalue() + "\n..."
|
|
617
|
+
else:
|
|
618
|
+
return '\n'.join(content.split('\n')[:20]) + "\n..."
|
|
619
|
+
|
|
544
620
|
def read_env_file():
|
|
545
621
|
env_vars = {}
|
|
546
622
|
if os.path.exists('.env'):
|
|
@@ -591,33 +667,29 @@ def generate_prompt(files_to_include, ignore_patterns, web_contents, env_vars):
|
|
|
591
667
|
prompt += "\n```\n\n"
|
|
592
668
|
prompt += "## File Contents\n\n"
|
|
593
669
|
for file_tuple in files_to_include:
|
|
594
|
-
if len(file_tuple) ==
|
|
595
|
-
file,
|
|
596
|
-
else:
|
|
597
|
-
file, use_snippet = file_tuple
|
|
670
|
+
if len(file_tuple) == 4:
|
|
671
|
+
file, content, is_snippet, content_type = file_tuple
|
|
598
672
|
chunks = None
|
|
599
|
-
|
|
673
|
+
else:
|
|
674
|
+
file, content, is_snippet, content_type, chunks = file_tuple
|
|
600
675
|
relative_path = get_relative_path(file)
|
|
601
|
-
language = get_language_for_file(file)
|
|
676
|
+
language = get_language_for_file(file) if content_type == 'text' else content_type
|
|
602
677
|
|
|
603
678
|
if chunks is not None:
|
|
604
679
|
prompt += f"### {relative_path} (selected patches)\n\n```{language}\n"
|
|
605
680
|
for chunk in chunks:
|
|
606
681
|
prompt += f"{chunk}\n"
|
|
607
682
|
prompt += "```\n\n"
|
|
608
|
-
elif use_snippet:
|
|
609
|
-
file_content = get_file_snippet(file)
|
|
610
|
-
prompt += f"### {relative_path} (snippet)\n\n```{language}\n{file_content}\n```\n\n"
|
|
611
683
|
else:
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
prompt += f"### {relative_path}\n\n```{language}\n{file_content}\n```\n\n"
|
|
684
|
+
content = handle_env_variables(content, env_vars)
|
|
685
|
+
prompt += f"### {relative_path}{' (snippet)' if is_snippet else ''}\n\n```{language}\n{content}\n```\n\n"
|
|
615
686
|
|
|
616
687
|
if web_contents:
|
|
617
688
|
prompt += "## Web Content\n\n"
|
|
618
|
-
for url, (
|
|
619
|
-
content = handle_env_variables(
|
|
620
|
-
|
|
689
|
+
for url, (content, is_snippet, content_type) in web_contents.items():
|
|
690
|
+
content = handle_env_variables(content, env_vars)
|
|
691
|
+
language = content_type if content_type in ['json', 'csv'] else ''
|
|
692
|
+
prompt += f"### {url}{' (snippet)' if is_snippet else ''}\n\n```{language}\n{content}\n```\n\n"
|
|
621
693
|
|
|
622
694
|
prompt += "## Task Instructions\n\n"
|
|
623
695
|
task_instructions = input("Enter the task instructions: ")
|
|
@@ -631,10 +703,6 @@ def generate_prompt(files_to_include, ignore_patterns, web_contents, env_vars):
|
|
|
631
703
|
prompt += analysis_text
|
|
632
704
|
return prompt
|
|
633
705
|
|
|
634
|
-
def print_char_count(count):
|
|
635
|
-
token_estimate = count // 4
|
|
636
|
-
print(f"\rCurrent prompt size: {count} characters (~ {token_estimate} tokens)", flush=True)
|
|
637
|
-
|
|
638
706
|
def main():
|
|
639
707
|
parser = argparse.ArgumentParser(description="Generate a prompt with project structure, file contents, and web content.")
|
|
640
708
|
parser.add_argument('inputs', nargs='+', help='Files, directories, or URLs to include in the prompt')
|
|
@@ -644,36 +712,34 @@ def main():
|
|
|
644
712
|
env_vars = read_env_file()
|
|
645
713
|
|
|
646
714
|
files_to_include = []
|
|
647
|
-
processed_dirs = set()
|
|
648
715
|
web_contents = {}
|
|
649
716
|
current_char_count = 0
|
|
650
717
|
|
|
651
718
|
for input_path in args.inputs:
|
|
652
719
|
if input_path.startswith(('http://', 'https://')):
|
|
653
|
-
|
|
654
|
-
if
|
|
655
|
-
|
|
656
|
-
|
|
720
|
+
content, content_type = fetch_web_content(input_path)
|
|
721
|
+
if content:
|
|
722
|
+
content, is_snippet = handle_content(content, content_type, input_path)
|
|
723
|
+
web_contents[input_path] = (content, is_snippet, content_type)
|
|
724
|
+
current_char_count += len(json.dumps(content)) if content_type == 'json' else len(content)
|
|
657
725
|
print(f"Added web content from: {input_path}")
|
|
658
726
|
elif os.path.isfile(input_path):
|
|
659
727
|
if not is_ignored(input_path, ignore_patterns) and not is_binary(input_path):
|
|
660
728
|
while True:
|
|
661
729
|
file_choice = input(f"{input_path} (y)es include / (n)o skip / (p)atches / (q)uit? ").lower()
|
|
662
730
|
if file_choice == 'y':
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
else
|
|
668
|
-
current_char_count += os.path.getsize(input_path)
|
|
669
|
-
print(f"Added file: {input_path}{' (snippet)' if use_snippet else ''}")
|
|
731
|
+
content, content_type = read_file_content(input_path)
|
|
732
|
+
content, is_snippet = handle_content(content, content_type, input_path)
|
|
733
|
+
files_to_include.append((input_path, content, is_snippet, content_type))
|
|
734
|
+
current_char_count += len(json.dumps(content)) if content_type == 'json' else len(content)
|
|
735
|
+
print(f"Added file: {input_path}{' (snippet)' if is_snippet else ''}")
|
|
670
736
|
break
|
|
671
737
|
elif file_choice == 'n':
|
|
672
738
|
break
|
|
673
739
|
elif file_choice == 'p':
|
|
674
740
|
chunks, char_count = select_file_patches(input_path)
|
|
675
741
|
if chunks:
|
|
676
|
-
files_to_include.append((input_path, False, chunks))
|
|
742
|
+
files_to_include.append((input_path, None, False, 'text', chunks))
|
|
677
743
|
current_char_count += char_count
|
|
678
744
|
break
|
|
679
745
|
elif file_choice == 'q':
|
|
@@ -684,9 +750,36 @@ def main():
|
|
|
684
750
|
else:
|
|
685
751
|
print(f"Ignored file: {input_path}")
|
|
686
752
|
elif os.path.isdir(input_path):
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
753
|
+
for root, _, files in os.walk(input_path):
|
|
754
|
+
for file in files:
|
|
755
|
+
file_path = os.path.join(root, file)
|
|
756
|
+
if not is_ignored(file_path, ignore_patterns) and not is_binary(file_path):
|
|
757
|
+
while True:
|
|
758
|
+
file_choice = input(f"{file_path} (y)es include / (n)o skip / (p)atches / (q)uit? ").lower()
|
|
759
|
+
if file_choice == 'y':
|
|
760
|
+
content, content_type = read_file_content(file_path)
|
|
761
|
+
content, is_snippet = handle_content(content, content_type, file_path)
|
|
762
|
+
files_to_include.append((file_path, content, is_snippet, content_type))
|
|
763
|
+
current_char_count += len(json.dumps(content)) if content_type == 'json' else len(content)
|
|
764
|
+
print(f"Added file: {file_path}{' (snippet)' if is_snippet else ''}")
|
|
765
|
+
break
|
|
766
|
+
elif file_choice == 'n':
|
|
767
|
+
break
|
|
768
|
+
elif file_choice == 'p':
|
|
769
|
+
chunks, char_count = select_file_patches(file_path)
|
|
770
|
+
if chunks:
|
|
771
|
+
files_to_include.append((file_path, None, False, 'text', chunks))
|
|
772
|
+
current_char_count += char_count
|
|
773
|
+
break
|
|
774
|
+
elif file_choice == 'q':
|
|
775
|
+
print("Quitting directory processing.")
|
|
776
|
+
break
|
|
777
|
+
else:
|
|
778
|
+
print("Invalid choice. Please enter 'y', 'n', 'p', or 'q'.")
|
|
779
|
+
if file_choice == 'q':
|
|
780
|
+
break
|
|
781
|
+
if file_choice == 'q':
|
|
782
|
+
break
|
|
690
783
|
else:
|
|
691
784
|
print(f"Warning: {input_path} is not a valid file, directory, or URL. Skipping.")
|
|
692
785
|
|
|
@@ -696,7 +789,7 @@ def main():
|
|
|
696
789
|
|
|
697
790
|
print("\nFile and web content selection complete.")
|
|
698
791
|
print_char_count(current_char_count)
|
|
699
|
-
print(f"Summary: Added {len(files_to_include)} files
|
|
792
|
+
print(f"Summary: Added {len(files_to_include)} files and {len(web_contents)} web sources.")
|
|
700
793
|
|
|
701
794
|
prompt = generate_prompt(files_to_include, ignore_patterns, web_contents, env_vars)
|
|
702
795
|
print("\n\nGenerated prompt:")
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
kopipasta/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
kopipasta/main.py,sha256=RtRWeLc5tbCPoYJIzW03iyQQnffKEp7e7eH2iW5hLs4,32427
|
|
3
|
+
kopipasta-0.11.0.dist-info/LICENSE,sha256=xw4C9TAU7LFu4r_MwSbky90uzkzNtRwAo3c51IWR8lk,1091
|
|
4
|
+
kopipasta-0.11.0.dist-info/METADATA,sha256=fJsC-ljDacn9r1aj19n5YG6TxNbbpbowcqPKi72EQoY,5646
|
|
5
|
+
kopipasta-0.11.0.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
|
|
6
|
+
kopipasta-0.11.0.dist-info/entry_points.txt,sha256=but54qDNz1-F8fVvGstq_QID5tHjczP7bO7rWLFkc6Y,50
|
|
7
|
+
kopipasta-0.11.0.dist-info/top_level.txt,sha256=iXohixMuCdw8UjGDUp0ouICLYBDrx207sgZIJ9lxn0o,10
|
|
8
|
+
kopipasta-0.11.0.dist-info/RECORD,,
|
|
@@ -1,8 +0,0 @@
|
|
|
1
|
-
kopipasta/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
kopipasta/main.py,sha256=JBVB1IEVs11XYZ0K98WACDTAH9CYSdVtehvX4Z8tmYU,28029
|
|
3
|
-
kopipasta-0.10.0.dist-info/LICENSE,sha256=xw4C9TAU7LFu4r_MwSbky90uzkzNtRwAo3c51IWR8lk,1091
|
|
4
|
-
kopipasta-0.10.0.dist-info/METADATA,sha256=TZx-QEJGaEvF-kuPQRr1E1HKTADb37H3salKkj6o_Do,5646
|
|
5
|
-
kopipasta-0.10.0.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
|
|
6
|
-
kopipasta-0.10.0.dist-info/entry_points.txt,sha256=but54qDNz1-F8fVvGstq_QID5tHjczP7bO7rWLFkc6Y,50
|
|
7
|
-
kopipasta-0.10.0.dist-info/top_level.txt,sha256=iXohixMuCdw8UjGDUp0ouICLYBDrx207sgZIJ9lxn0o,10
|
|
8
|
-
kopipasta-0.10.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|