kopipasta 0.10.0__py3-none-any.whl → 0.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kopipasta might be problematic. Click here for more details.

kopipasta/main.py CHANGED
@@ -1,4 +1,7 @@
1
1
  #!/usr/bin/env python3
2
+ import csv
3
+ import io
4
+ import json
2
5
  import os
3
6
  import argparse
4
7
  import ast
@@ -44,7 +47,12 @@ def is_ignored(path, ignore_patterns):
44
47
  def is_binary(file_path):
45
48
  try:
46
49
  with open(file_path, 'rb') as file:
47
- return b'\0' in file.read(1024)
50
+ chunk = file.read(1024)
51
+ if b'\0' in chunk: # null bytes indicate binary file
52
+ return True
53
+ if file_path.lower().endswith(('.json', '.csv')):
54
+ return False
55
+ return False
48
56
  except IOError:
49
57
  return False
50
58
 
@@ -413,6 +421,10 @@ def get_file_snippet(file_path, max_lines=50, max_bytes=4096):
413
421
  byte_count += len(line.encode('utf-8'))
414
422
  return snippet
415
423
 
424
+ def print_char_count(count):
425
+ token_estimate = count // 4
426
+ print(f"\rCurrent prompt size: {count} characters (~ {token_estimate} tokens)", flush=True)
427
+
416
428
  def select_files_in_directory(directory, ignore_patterns, current_char_count=0):
417
429
  files = [f for f in os.listdir(directory)
418
430
  if os.path.isfile(os.path.join(directory, f)) and not is_ignored(os.path.join(directory, f), ignore_patterns) and not is_binary(os.path.join(directory, f))]
@@ -534,13 +546,77 @@ def fetch_web_content(url):
534
546
  try:
535
547
  response = requests.get(url)
536
548
  response.raise_for_status()
537
- full_content = response.text
538
- snippet = full_content[:1000] if len(full_content) > 10000 else full_content
539
- return full_content, snippet
549
+ content_type = response.headers.get('content-type', '').lower()
550
+ if 'json' in content_type:
551
+ return response.json(), 'json'
552
+ elif 'csv' in content_type:
553
+ return response.text, 'csv'
554
+ else:
555
+ return response.text, 'text'
540
556
  except requests.RequestException as e:
541
557
  print(f"Error fetching content from {url}: {e}")
542
558
  return None, None
543
559
 
560
+ def read_file_content(file_path):
561
+ _, ext = os.path.splitext(file_path)
562
+ if ext.lower() == '.json':
563
+ with open(file_path, 'r') as f:
564
+ return json.load(f), 'json'
565
+ elif ext.lower() == '.csv':
566
+ with open(file_path, 'r') as f:
567
+ return f.read(), 'csv'
568
+ else:
569
+ with open(file_path, 'r') as f:
570
+ return f.read(), 'text'
571
+
572
+ def get_content_snippet(content, content_type, max_lines=50, max_chars=4096):
573
+ if content_type == 'json':
574
+ return json.dumps(content, indent=2)[:max_chars]
575
+ elif content_type == 'csv':
576
+ csv_content = content if isinstance(content, str) else content.getvalue()
577
+ csv_reader = csv.reader(io.StringIO(csv_content))
578
+ rows = list(csv_reader)[:max_lines]
579
+ output = io.StringIO()
580
+ csv.writer(output).writerows(rows)
581
+ return output.getvalue()[:max_chars]
582
+ else:
583
+ return '\n'.join(content.split('\n')[:max_lines])[:max_chars]
584
+
585
+ def handle_content(content, content_type, file_or_url):
586
+ is_large = len(json.dumps(content)) > 102400 if content_type == 'json' else len(content) > 102400
587
+
588
+ if is_large:
589
+ while True:
590
+ choice = input(f"{file_or_url} is large. View (f)ull content, (s)nippet, or (p)review? ").lower()
591
+ if choice in ['f', 's', 'p']:
592
+ break
593
+ print("Invalid choice. Please enter 'f', 's', or 'p'.")
594
+
595
+ if choice == 'f':
596
+ return content, False
597
+ elif choice == 's':
598
+ return get_content_snippet(content, content_type), True
599
+ else: # preview
600
+ preview = get_content_preview(content, content_type)
601
+ print(f"\nPreview of {file_or_url}:\n{preview}\n")
602
+ return handle_content(content, content_type, file_or_url)
603
+ else:
604
+ return content, False
605
+
606
+
607
+ def get_content_preview(content, content_type):
608
+ if content_type == 'json':
609
+ return json.dumps(content, indent=2)[:1000] + "\n..."
610
+ elif content_type == 'csv':
611
+ csv_content = content if isinstance(content, str) else content.getvalue()
612
+ csv_reader = csv.reader(io.StringIO(csv_content))
613
+ rows = list(csv_reader)[:10]
614
+ output = io.StringIO()
615
+ csv.writer(output).writerows(rows)
616
+ return output.getvalue() + "\n..."
617
+ else:
618
+ return '\n'.join(content.split('\n')[:20]) + "\n..."
619
+
544
620
  def read_env_file():
545
621
  env_vars = {}
546
622
  if os.path.exists('.env'):
@@ -591,33 +667,29 @@ def generate_prompt(files_to_include, ignore_patterns, web_contents, env_vars):
591
667
  prompt += "\n```\n\n"
592
668
  prompt += "## File Contents\n\n"
593
669
  for file_tuple in files_to_include:
594
- if len(file_tuple) == 3:
595
- file, use_snippet, chunks = file_tuple
596
- else:
597
- file, use_snippet = file_tuple
670
+ if len(file_tuple) == 4:
671
+ file, content, is_snippet, content_type = file_tuple
598
672
  chunks = None
599
-
673
+ else:
674
+ file, content, is_snippet, content_type, chunks = file_tuple
600
675
  relative_path = get_relative_path(file)
601
- language = get_language_for_file(file)
676
+ language = get_language_for_file(file) if content_type == 'text' else content_type
602
677
 
603
678
  if chunks is not None:
604
679
  prompt += f"### {relative_path} (selected patches)\n\n```{language}\n"
605
680
  for chunk in chunks:
606
681
  prompt += f"{chunk}\n"
607
682
  prompt += "```\n\n"
608
- elif use_snippet:
609
- file_content = get_file_snippet(file)
610
- prompt += f"### {relative_path} (snippet)\n\n```{language}\n{file_content}\n```\n\n"
611
683
  else:
612
- file_content = read_file_contents(file)
613
- file_content = handle_env_variables(file_content, env_vars)
614
- prompt += f"### {relative_path}\n\n```{language}\n{file_content}\n```\n\n"
684
+ content = handle_env_variables(content, env_vars)
685
+ prompt += f"### {relative_path}{' (snippet)' if is_snippet else ''}\n\n```{language}\n{content}\n```\n\n"
615
686
 
616
687
  if web_contents:
617
688
  prompt += "## Web Content\n\n"
618
- for url, (full_content, snippet) in web_contents.items():
619
- content = handle_env_variables(snippet if len(full_content) > 10000 else full_content, env_vars)
620
- prompt += f"### {url}{' (snippet)' if len(full_content) > 10000 else ''}\n\n```\n{content}\n```\n\n"
689
+ for url, (content, is_snippet, content_type) in web_contents.items():
690
+ content = handle_env_variables(content, env_vars)
691
+ language = content_type if content_type in ['json', 'csv'] else ''
692
+ prompt += f"### {url}{' (snippet)' if is_snippet else ''}\n\n```{language}\n{content}\n```\n\n"
621
693
 
622
694
  prompt += "## Task Instructions\n\n"
623
695
  task_instructions = input("Enter the task instructions: ")
@@ -631,10 +703,6 @@ def generate_prompt(files_to_include, ignore_patterns, web_contents, env_vars):
631
703
  prompt += analysis_text
632
704
  return prompt
633
705
 
634
- def print_char_count(count):
635
- token_estimate = count // 4
636
- print(f"\rCurrent prompt size: {count} characters (~ {token_estimate} tokens)", flush=True)
637
-
638
706
  def main():
639
707
  parser = argparse.ArgumentParser(description="Generate a prompt with project structure, file contents, and web content.")
640
708
  parser.add_argument('inputs', nargs='+', help='Files, directories, or URLs to include in the prompt')
@@ -644,36 +712,34 @@ def main():
644
712
  env_vars = read_env_file()
645
713
 
646
714
  files_to_include = []
647
- processed_dirs = set()
648
715
  web_contents = {}
649
716
  current_char_count = 0
650
717
 
651
718
  for input_path in args.inputs:
652
719
  if input_path.startswith(('http://', 'https://')):
653
- full_content, snippet = fetch_web_content(input_path)
654
- if full_content:
655
- web_contents[input_path] = (full_content, snippet)
656
- current_char_count += len(snippet if len(full_content) > 10000 else full_content)
720
+ content, content_type = fetch_web_content(input_path)
721
+ if content:
722
+ content, is_snippet = handle_content(content, content_type, input_path)
723
+ web_contents[input_path] = (content, is_snippet, content_type)
724
+ current_char_count += len(json.dumps(content)) if content_type == 'json' else len(content)
657
725
  print(f"Added web content from: {input_path}")
658
726
  elif os.path.isfile(input_path):
659
727
  if not is_ignored(input_path, ignore_patterns) and not is_binary(input_path):
660
728
  while True:
661
729
  file_choice = input(f"{input_path} (y)es include / (n)o skip / (p)atches / (q)uit? ").lower()
662
730
  if file_choice == 'y':
663
- use_snippet = is_large_file(input_path)
664
- files_to_include.append((input_path, use_snippet))
665
- if use_snippet:
666
- current_char_count += len(get_file_snippet(input_path))
667
- else:
668
- current_char_count += os.path.getsize(input_path)
669
- print(f"Added file: {input_path}{' (snippet)' if use_snippet else ''}")
731
+ content, content_type = read_file_content(input_path)
732
+ content, is_snippet = handle_content(content, content_type, input_path)
733
+ files_to_include.append((input_path, content, is_snippet, content_type))
734
+ current_char_count += len(json.dumps(content)) if content_type == 'json' else len(content)
735
+ print(f"Added file: {input_path}{' (snippet)' if is_snippet else ''}")
670
736
  break
671
737
  elif file_choice == 'n':
672
738
  break
673
739
  elif file_choice == 'p':
674
740
  chunks, char_count = select_file_patches(input_path)
675
741
  if chunks:
676
- files_to_include.append((input_path, False, chunks))
742
+ files_to_include.append((input_path, None, False, 'text', chunks))
677
743
  current_char_count += char_count
678
744
  break
679
745
  elif file_choice == 'q':
@@ -684,9 +750,36 @@ def main():
684
750
  else:
685
751
  print(f"Ignored file: {input_path}")
686
752
  elif os.path.isdir(input_path):
687
- dir_files, dir_processed, current_char_count = process_directory(input_path, ignore_patterns, current_char_count)
688
- files_to_include.extend(dir_files)
689
- processed_dirs.update(dir_processed)
753
+ for root, _, files in os.walk(input_path):
754
+ for file in files:
755
+ file_path = os.path.join(root, file)
756
+ if not is_ignored(file_path, ignore_patterns) and not is_binary(file_path):
757
+ while True:
758
+ file_choice = input(f"{file_path} (y)es include / (n)o skip / (p)atches / (q)uit? ").lower()
759
+ if file_choice == 'y':
760
+ content, content_type = read_file_content(file_path)
761
+ content, is_snippet = handle_content(content, content_type, file_path)
762
+ files_to_include.append((file_path, content, is_snippet, content_type))
763
+ current_char_count += len(json.dumps(content)) if content_type == 'json' else len(content)
764
+ print(f"Added file: {file_path}{' (snippet)' if is_snippet else ''}")
765
+ break
766
+ elif file_choice == 'n':
767
+ break
768
+ elif file_choice == 'p':
769
+ chunks, char_count = select_file_patches(file_path)
770
+ if chunks:
771
+ files_to_include.append((file_path, None, False, 'text', chunks))
772
+ current_char_count += char_count
773
+ break
774
+ elif file_choice == 'q':
775
+ print("Quitting directory processing.")
776
+ break
777
+ else:
778
+ print("Invalid choice. Please enter 'y', 'n', 'p', or 'q'.")
779
+ if file_choice == 'q':
780
+ break
781
+ if file_choice == 'q':
782
+ break
690
783
  else:
691
784
  print(f"Warning: {input_path} is not a valid file, directory, or URL. Skipping.")
692
785
 
@@ -696,7 +789,7 @@ def main():
696
789
 
697
790
  print("\nFile and web content selection complete.")
698
791
  print_char_count(current_char_count)
699
- print(f"Summary: Added {len(files_to_include)} files from {len(processed_dirs)} directories and {len(web_contents)} web sources.")
792
+ print(f"Summary: Added {len(files_to_include)} files and {len(web_contents)} web sources.")
700
793
 
701
794
  prompt = generate_prompt(files_to_include, ignore_patterns, web_contents, env_vars)
702
795
  print("\n\nGenerated prompt:")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: kopipasta
3
- Version: 0.10.0
3
+ Version: 0.11.0
4
4
  Summary: A CLI tool to generate prompts with project structure and file contents
5
5
  Home-page: https://github.com/mkorpela/kopipasta
6
6
  Author: Mikko Korpela
@@ -0,0 +1,8 @@
1
+ kopipasta/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ kopipasta/main.py,sha256=RtRWeLc5tbCPoYJIzW03iyQQnffKEp7e7eH2iW5hLs4,32427
3
+ kopipasta-0.11.0.dist-info/LICENSE,sha256=xw4C9TAU7LFu4r_MwSbky90uzkzNtRwAo3c51IWR8lk,1091
4
+ kopipasta-0.11.0.dist-info/METADATA,sha256=fJsC-ljDacn9r1aj19n5YG6TxNbbpbowcqPKi72EQoY,5646
5
+ kopipasta-0.11.0.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
6
+ kopipasta-0.11.0.dist-info/entry_points.txt,sha256=but54qDNz1-F8fVvGstq_QID5tHjczP7bO7rWLFkc6Y,50
7
+ kopipasta-0.11.0.dist-info/top_level.txt,sha256=iXohixMuCdw8UjGDUp0ouICLYBDrx207sgZIJ9lxn0o,10
8
+ kopipasta-0.11.0.dist-info/RECORD,,
@@ -1,8 +0,0 @@
1
- kopipasta/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- kopipasta/main.py,sha256=JBVB1IEVs11XYZ0K98WACDTAH9CYSdVtehvX4Z8tmYU,28029
3
- kopipasta-0.10.0.dist-info/LICENSE,sha256=xw4C9TAU7LFu4r_MwSbky90uzkzNtRwAo3c51IWR8lk,1091
4
- kopipasta-0.10.0.dist-info/METADATA,sha256=TZx-QEJGaEvF-kuPQRr1E1HKTADb37H3salKkj6o_Do,5646
5
- kopipasta-0.10.0.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
6
- kopipasta-0.10.0.dist-info/entry_points.txt,sha256=but54qDNz1-F8fVvGstq_QID5tHjczP7bO7rWLFkc6Y,50
7
- kopipasta-0.10.0.dist-info/top_level.txt,sha256=iXohixMuCdw8UjGDUp0ouICLYBDrx207sgZIJ9lxn0o,10
8
- kopipasta-0.10.0.dist-info/RECORD,,