kopipasta 0.10.0__tar.gz → 0.12.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kopipasta might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: kopipasta
3
- Version: 0.10.0
3
+ Version: 0.12.0
4
4
  Summary: A CLI tool to generate prompts with project structure and file contents
5
5
  Home-page: https://github.com/mkorpela/kopipasta
6
6
  Author: Mikko Korpela
@@ -1,4 +1,7 @@
1
1
  #!/usr/bin/env python3
2
+ import csv
3
+ import io
4
+ import json
2
5
  import os
3
6
  import argparse
4
7
  import ast
@@ -44,7 +47,12 @@ def is_ignored(path, ignore_patterns):
44
47
  def is_binary(file_path):
45
48
  try:
46
49
  with open(file_path, 'rb') as file:
47
- return b'\0' in file.read(1024)
50
+ chunk = file.read(1024)
51
+ if b'\0' in chunk: # null bytes indicate binary file
52
+ return True
53
+ if file_path.lower().endswith(('.json', '.csv')):
54
+ return False
55
+ return False
48
56
  except IOError:
49
57
  return False
50
58
 
@@ -413,6 +421,10 @@ def get_file_snippet(file_path, max_lines=50, max_bytes=4096):
413
421
  byte_count += len(line.encode('utf-8'))
414
422
  return snippet
415
423
 
424
+ def print_char_count(count):
425
+ token_estimate = count // 4
426
+ print(f"\rCurrent prompt size: {count} characters (~ {token_estimate} tokens)", flush=True)
427
+
416
428
  def select_files_in_directory(directory, ignore_patterns, current_char_count=0):
417
429
  files = [f for f in os.listdir(directory)
418
430
  if os.path.isfile(os.path.join(directory, f)) and not is_ignored(os.path.join(directory, f), ignore_patterns) and not is_binary(os.path.join(directory, f))]
@@ -534,13 +546,77 @@ def fetch_web_content(url):
534
546
  try:
535
547
  response = requests.get(url)
536
548
  response.raise_for_status()
537
- full_content = response.text
538
- snippet = full_content[:1000] if len(full_content) > 10000 else full_content
539
- return full_content, snippet
549
+ content_type = response.headers.get('content-type', '').lower()
550
+ if 'json' in content_type:
551
+ return response.json(), 'json'
552
+ elif 'csv' in content_type:
553
+ return response.text, 'csv'
554
+ else:
555
+ return response.text, 'text'
540
556
  except requests.RequestException as e:
541
557
  print(f"Error fetching content from {url}: {e}")
542
558
  return None, None
543
559
 
560
+ def read_file_content(file_path):
561
+ _, ext = os.path.splitext(file_path)
562
+ if ext.lower() == '.json':
563
+ with open(file_path, 'r') as f:
564
+ return json.load(f), 'json'
565
+ elif ext.lower() == '.csv':
566
+ with open(file_path, 'r') as f:
567
+ return f.read(), 'csv'
568
+ else:
569
+ with open(file_path, 'r') as f:
570
+ return f.read(), 'text'
571
+
572
+ def get_content_snippet(content, content_type, max_lines=50, max_chars=4096):
573
+ if content_type == 'json':
574
+ return json.dumps(content, indent=2)[:max_chars]
575
+ elif content_type == 'csv':
576
+ csv_content = content if isinstance(content, str) else content.getvalue()
577
+ csv_reader = csv.reader(io.StringIO(csv_content))
578
+ rows = list(csv_reader)[:max_lines]
579
+ output = io.StringIO()
580
+ csv.writer(output).writerows(rows)
581
+ return output.getvalue()[:max_chars]
582
+ else:
583
+ return '\n'.join(content.split('\n')[:max_lines])[:max_chars]
584
+
585
+ def handle_content(content, content_type, file_or_url):
586
+ is_large = len(json.dumps(content)) > 102400 if content_type == 'json' else len(content) > 102400
587
+
588
+ if is_large:
589
+ while True:
590
+ choice = input(f"{file_or_url} is large. View (f)ull content, (s)nippet, or (p)review? ").lower()
591
+ if choice in ['f', 's', 'p']:
592
+ break
593
+ print("Invalid choice. Please enter 'f', 's', or 'p'.")
594
+
595
+ if choice == 'f':
596
+ return content, False
597
+ elif choice == 's':
598
+ return get_content_snippet(content, content_type), True
599
+ else: # preview
600
+ preview = get_content_preview(content, content_type)
601
+ print(f"\nPreview of {file_or_url}:\n{preview}\n")
602
+ return handle_content(content, content_type, file_or_url)
603
+ else:
604
+ return content, False
605
+
606
+
607
+ def get_content_preview(content, content_type):
608
+ if content_type == 'json':
609
+ return json.dumps(content, indent=2)[:1000] + "\n..."
610
+ elif content_type == 'csv':
611
+ csv_content = content if isinstance(content, str) else content.getvalue()
612
+ csv_reader = csv.reader(io.StringIO(csv_content))
613
+ rows = list(csv_reader)[:10]
614
+ output = io.StringIO()
615
+ csv.writer(output).writerows(rows)
616
+ return output.getvalue() + "\n..."
617
+ else:
618
+ return '\n'.join(content.split('\n')[:20]) + "\n..."
619
+
544
620
  def read_env_file():
545
621
  env_vars = {}
546
622
  if os.path.exists('.env'):
@@ -591,33 +667,29 @@ def generate_prompt(files_to_include, ignore_patterns, web_contents, env_vars):
591
667
  prompt += "\n```\n\n"
592
668
  prompt += "## File Contents\n\n"
593
669
  for file_tuple in files_to_include:
594
- if len(file_tuple) == 3:
595
- file, use_snippet, chunks = file_tuple
596
- else:
597
- file, use_snippet = file_tuple
670
+ if len(file_tuple) == 4:
671
+ file, content, is_snippet, content_type = file_tuple
598
672
  chunks = None
599
-
673
+ else:
674
+ file, content, is_snippet, content_type, chunks = file_tuple
600
675
  relative_path = get_relative_path(file)
601
- language = get_language_for_file(file)
676
+ language = get_language_for_file(file) if content_type == 'text' else content_type
602
677
 
603
678
  if chunks is not None:
604
679
  prompt += f"### {relative_path} (selected patches)\n\n```{language}\n"
605
680
  for chunk in chunks:
606
681
  prompt += f"{chunk}\n"
607
682
  prompt += "```\n\n"
608
- elif use_snippet:
609
- file_content = get_file_snippet(file)
610
- prompt += f"### {relative_path} (snippet)\n\n```{language}\n{file_content}\n```\n\n"
611
683
  else:
612
- file_content = read_file_contents(file)
613
- file_content = handle_env_variables(file_content, env_vars)
614
- prompt += f"### {relative_path}\n\n```{language}\n{file_content}\n```\n\n"
684
+ content = handle_env_variables(content, env_vars)
685
+ prompt += f"### {relative_path}{' (snippet)' if is_snippet else ''}\n\n```{language}\n{content}\n```\n\n"
615
686
 
616
687
  if web_contents:
617
688
  prompt += "## Web Content\n\n"
618
- for url, (full_content, snippet) in web_contents.items():
619
- content = handle_env_variables(snippet if len(full_content) > 10000 else full_content, env_vars)
620
- prompt += f"### {url}{' (snippet)' if len(full_content) > 10000 else ''}\n\n```\n{content}\n```\n\n"
689
+ for url, (content, is_snippet, content_type) in web_contents.items():
690
+ content = handle_env_variables(content, env_vars)
691
+ language = content_type if content_type in ['json', 'csv'] else ''
692
+ prompt += f"### {url}{' (snippet)' if is_snippet else ''}\n\n```{language}\n{content}\n```\n\n"
621
693
 
622
694
  prompt += "## Task Instructions\n\n"
623
695
  task_instructions = input("Enter the task instructions: ")
@@ -631,10 +703,6 @@ def generate_prompt(files_to_include, ignore_patterns, web_contents, env_vars):
631
703
  prompt += analysis_text
632
704
  return prompt
633
705
 
634
- def print_char_count(count):
635
- token_estimate = count // 4
636
- print(f"\rCurrent prompt size: {count} characters (~ {token_estimate} tokens)", flush=True)
637
-
638
706
  def main():
639
707
  parser = argparse.ArgumentParser(description="Generate a prompt with project structure, file contents, and web content.")
640
708
  parser.add_argument('inputs', nargs='+', help='Files, directories, or URLs to include in the prompt')
@@ -644,37 +712,78 @@ def main():
644
712
  env_vars = read_env_file()
645
713
 
646
714
  files_to_include = []
647
- processed_dirs = set()
648
715
  web_contents = {}
649
- current_char_count = 0
716
+
717
+ def process_directory(directory):
718
+ files = [f for f in os.listdir(directory)
719
+ if os.path.isfile(os.path.join(directory, f)) and not is_ignored(os.path.join(directory, f), ignore_patterns) and not is_binary(os.path.join(directory, f))]
720
+
721
+ if not files:
722
+ return []
723
+
724
+ print(f"\nDirectory: {directory}")
725
+ print("Files:")
726
+ for file in files:
727
+ file_path = os.path.join(directory, file)
728
+ file_size = os.path.getsize(file_path)
729
+ file_size_readable = get_human_readable_size(file_size)
730
+ print(f"- {file} ({file_size_readable})")
731
+
732
+ while True:
733
+ choice = input("(y)es add all / (n)o ignore all / (s)elect individually / (q)uit? ").lower()
734
+ if choice == 'y':
735
+ return [(os.path.join(directory, f), False) for f in files]
736
+ elif choice == 'n':
737
+ return []
738
+ elif choice == 's':
739
+ selected_files = []
740
+ for file in files:
741
+ file_path = os.path.join(directory, file)
742
+ while True:
743
+ file_choice = input(f"{file} (y/n/p/q)? ").lower()
744
+ if file_choice == 'y':
745
+ selected_files.append((file_path, False))
746
+ break
747
+ elif file_choice == 'n':
748
+ break
749
+ elif file_choice == 'p':
750
+ chunks, _ = select_file_patches(file_path)
751
+ if chunks:
752
+ selected_files.append((file_path, True, chunks))
753
+ break
754
+ elif file_choice == 'q':
755
+ return selected_files
756
+ else:
757
+ print("Invalid choice. Please enter 'y', 'n', 'p', or 'q'.")
758
+ return selected_files
759
+ elif choice == 'q':
760
+ return []
761
+ else:
762
+ print("Invalid choice. Please try again.")
650
763
 
651
764
  for input_path in args.inputs:
652
765
  if input_path.startswith(('http://', 'https://')):
653
- full_content, snippet = fetch_web_content(input_path)
654
- if full_content:
655
- web_contents[input_path] = (full_content, snippet)
656
- current_char_count += len(snippet if len(full_content) > 10000 else full_content)
766
+ content, content_type = fetch_web_content(input_path)
767
+ if content:
768
+ content, is_snippet = handle_content(content, content_type, input_path)
769
+ web_contents[input_path] = (content, is_snippet, content_type)
657
770
  print(f"Added web content from: {input_path}")
658
771
  elif os.path.isfile(input_path):
659
772
  if not is_ignored(input_path, ignore_patterns) and not is_binary(input_path):
660
773
  while True:
661
774
  file_choice = input(f"{input_path} (y)es include / (n)o skip / (p)atches / (q)uit? ").lower()
662
775
  if file_choice == 'y':
663
- use_snippet = is_large_file(input_path)
664
- files_to_include.append((input_path, use_snippet))
665
- if use_snippet:
666
- current_char_count += len(get_file_snippet(input_path))
667
- else:
668
- current_char_count += os.path.getsize(input_path)
669
- print(f"Added file: {input_path}{' (snippet)' if use_snippet else ''}")
776
+ content, content_type = read_file_content(input_path)
777
+ content, is_snippet = handle_content(content, content_type, input_path)
778
+ files_to_include.append((input_path, content, is_snippet, content_type))
779
+ print(f"Added file: {input_path}{' (snippet)' if is_snippet else ''}")
670
780
  break
671
781
  elif file_choice == 'n':
672
782
  break
673
783
  elif file_choice == 'p':
674
- chunks, char_count = select_file_patches(input_path)
784
+ chunks, _ = select_file_patches(input_path)
675
785
  if chunks:
676
- files_to_include.append((input_path, False, chunks))
677
- current_char_count += char_count
786
+ files_to_include.append((input_path, None, False, 'text', chunks))
678
787
  break
679
788
  elif file_choice == 'q':
680
789
  print("Quitting.")
@@ -684,9 +793,16 @@ def main():
684
793
  else:
685
794
  print(f"Ignored file: {input_path}")
686
795
  elif os.path.isdir(input_path):
687
- dir_files, dir_processed, current_char_count = process_directory(input_path, ignore_patterns, current_char_count)
688
- files_to_include.extend(dir_files)
689
- processed_dirs.update(dir_processed)
796
+ selected_files = process_directory(input_path)
797
+ for file_info in selected_files:
798
+ if len(file_info) == 2:
799
+ file_path, use_snippet = file_info
800
+ content, content_type = read_file_content(file_path)
801
+ content, is_snippet = handle_content(content, content_type, file_path)
802
+ files_to_include.append((file_path, content, is_snippet, content_type))
803
+ else:
804
+ file_path, _, chunks = file_info
805
+ files_to_include.append((file_path, None, False, 'text', chunks))
690
806
  else:
691
807
  print(f"Warning: {input_path} is not a valid file, directory, or URL. Skipping.")
692
808
 
@@ -695,8 +811,7 @@ def main():
695
811
  return
696
812
 
697
813
  print("\nFile and web content selection complete.")
698
- print_char_count(current_char_count)
699
- print(f"Summary: Added {len(files_to_include)} files from {len(processed_dirs)} directories and {len(web_contents)} web sources.")
814
+ print(f"Summary: Added {len(files_to_include)} files and {len(web_contents)} web sources.")
700
815
 
701
816
  prompt = generate_prompt(files_to_include, ignore_patterns, web_contents, env_vars)
702
817
  print("\n\nGenerated prompt:")
@@ -707,7 +822,6 @@ def main():
707
822
  pyperclip.copy(prompt)
708
823
  separator = "\n" + "=" * 40 + "\n☕🍝 Kopipasta Complete! 🍝☕\n" + "=" * 40 + "\n"
709
824
  print(separator)
710
- final_char_count = len(prompt)
711
825
  final_token_estimate = final_char_count // 4
712
826
  print(f"Prompt has been copied to clipboard. Final size: {final_char_count} characters (~ {final_token_estimate} tokens)")
713
827
  except pyperclip.PyperclipException as e:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: kopipasta
3
- Version: 0.10.0
3
+ Version: 0.12.0
4
4
  Summary: A CLI tool to generate prompts with project structure and file contents
5
5
  Home-page: https://github.com/mkorpela/kopipasta
6
6
  Author: Mikko Korpela
@@ -5,7 +5,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
5
5
 
6
6
  setup(
7
7
  name="kopipasta",
8
- version="0.10.0",
8
+ version="0.12.0",
9
9
  author="Mikko Korpela",
10
10
  author_email="mikko.korpela@gmail.com",
11
11
  description="A CLI tool to generate prompts with project structure and file contents",
File without changes
File without changes
File without changes
File without changes
File without changes