auto-coder 0.1.231__py3-none-any.whl → 0.1.233__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of auto-coder might be problematic. Click here for more details.

autocoder/index/index.py CHANGED
@@ -6,37 +6,22 @@ from datetime import datetime
6
6
  from autocoder.common import SourceCode, AutoCoderArgs
7
7
  from autocoder.index.symbols_utils import (
8
8
  extract_symbols,
9
- SymbolsInfo,
10
9
  SymbolType,
11
10
  symbols_info_to_str,
12
11
  )
13
12
  from concurrent.futures import ThreadPoolExecutor, as_completed
14
13
  import threading
15
14
 
16
- import pydantic
17
15
  import byzerllm
18
16
  import hashlib
19
- import textwrap
20
- import tabulate
21
- from rich.console import Console
22
- from rich.table import Table
23
- from rich.panel import Panel
24
- from rich.text import Text
25
17
 
26
18
  from loguru import logger
27
- from autocoder.utils.queue_communicate import (
28
- queue_communicate,
29
- CommunicateEvent,
30
- CommunicateEventType,
31
- )
32
19
  from autocoder.index.types import (
33
20
  IndexItem,
34
21
  TargetFile,
35
- VerifyFileRelevance,
36
22
  FileList,
37
23
  )
38
24
 
39
-
40
25
  class IndexManager:
41
26
  def __init__(
42
27
  self, llm: byzerllm.ByzerLLM, sources: List[SourceCode], args: AutoCoderArgs
@@ -53,6 +38,11 @@ class IndexManager:
53
38
  else:
54
39
  self.index_llm = llm
55
40
 
41
+ if llm and (s := llm.get_sub_client("index_filter_model")):
42
+ self.index_filter_llm = s
43
+ else:
44
+ self.index_filter_llm = llm
45
+
56
46
  self.llm = llm
57
47
  self.args = args
58
48
  self.max_input_length = (
@@ -63,6 +53,7 @@ class IndexManager:
63
53
  if not os.path.exists(self.index_dir):
64
54
  os.makedirs(self.index_dir)
65
55
 
56
+
66
57
  @byzerllm.prompt()
67
58
  def verify_file_relevance(self, file_content: str, query: str) -> str:
68
59
  """
@@ -565,381 +556,3 @@ class IndexManager:
565
556
  请确保结果的准确性和完整性,包括所有可能相关的文件。
566
557
  """
567
558
 
568
-
569
- def build_index_and_filter_files(
570
- llm, args: AutoCoderArgs, sources: List[SourceCode]
571
- ) -> str:
572
- # Initialize timing and statistics
573
- total_start_time = time.monotonic()
574
- stats = {
575
- "total_files": len(sources),
576
- "indexed_files": 0,
577
- "level1_filtered": 0,
578
- "level2_filtered": 0,
579
- "verified_files": 0,
580
- "final_files": 0,
581
- "timings": {
582
- "process_tagged_sources": 0.0,
583
- "build_index": 0.0,
584
- "level1_filter": 0.0,
585
- "level2_filter": 0.0,
586
- "relevance_verification": 0.0,
587
- "file_selection": 0.0,
588
- "prepare_output": 0.0,
589
- "total": 0.0
590
- }
591
- }
592
-
593
- def get_file_path(file_path):
594
- if file_path.startswith("##"):
595
- return file_path.strip()[2:]
596
- return file_path
597
-
598
- final_files: Dict[str, TargetFile] = {}
599
-
600
- # Phase 1: Process REST/RAG/Search sources
601
- logger.info("Phase 1: Processing REST/RAG/Search sources...")
602
- phase_start = time.monotonic()
603
- for source in sources:
604
- if source.tag in ["REST", "RAG", "SEARCH"]:
605
- final_files[get_file_path(source.module_name)] = TargetFile(
606
- file_path=source.module_name, reason="Rest/Rag/Search"
607
- )
608
- phase_end = time.monotonic()
609
- stats["timings"]["process_tagged_sources"] = phase_end - phase_start
610
-
611
- if not args.skip_build_index and llm:
612
- # Phase 2: Build index
613
- if args.request_id and not args.skip_events:
614
- queue_communicate.send_event(
615
- request_id=args.request_id,
616
- event=CommunicateEvent(
617
- event_type=CommunicateEventType.CODE_INDEX_BUILD_START.value,
618
- data=json.dumps({"total_files": len(sources)})
619
- )
620
- )
621
-
622
- logger.info("Phase 2: Building index for all files...")
623
- phase_start = time.monotonic()
624
- index_manager = IndexManager(llm=llm, sources=sources, args=args)
625
- index_data = index_manager.build_index()
626
- stats["indexed_files"] = len(index_data) if index_data else 0
627
- phase_end = time.monotonic()
628
- stats["timings"]["build_index"] = phase_end - phase_start
629
-
630
- if args.request_id and not args.skip_events:
631
- queue_communicate.send_event(
632
- request_id=args.request_id,
633
- event=CommunicateEvent(
634
- event_type=CommunicateEventType.CODE_INDEX_BUILD_END.value,
635
- data=json.dumps({
636
- "indexed_files": stats["indexed_files"],
637
- "build_index_time": stats["timings"]["build_index"],
638
- })
639
- )
640
- )
641
-
642
- if not args.skip_filter_index:
643
- if args.request_id and not args.skip_events:
644
- queue_communicate.send_event(
645
- request_id=args.request_id,
646
- event=CommunicateEvent(
647
- event_type=CommunicateEventType.CODE_INDEX_FILTER_START.value,
648
- data=json.dumps({})
649
- )
650
- )
651
- # Phase 3: Level 1 filtering - Query-based
652
- logger.info(
653
- "Phase 3: Performing Level 1 filtering (query-based)...")
654
-
655
- phase_start = time.monotonic()
656
- target_files = index_manager.get_target_files_by_query(args.query)
657
-
658
- if target_files:
659
- for file in target_files.file_list:
660
- file_path = file.file_path.strip()
661
- final_files[get_file_path(file_path)] = file
662
- stats["level1_filtered"] = len(target_files.file_list)
663
- phase_end = time.monotonic()
664
- stats["timings"]["level1_filter"] = phase_end - phase_start
665
-
666
- # Phase 4: Level 2 filtering - Related files
667
- if target_files is not None and args.index_filter_level >= 2:
668
- logger.info(
669
- "Phase 4: Performing Level 2 filtering (related files)...")
670
- if args.request_id and not args.skip_events:
671
- queue_communicate.send_event(
672
- request_id=args.request_id,
673
- event=CommunicateEvent(
674
- event_type=CommunicateEventType.CODE_INDEX_FILTER_START.value,
675
- data=json.dumps({})
676
- )
677
- )
678
- phase_start = time.monotonic()
679
- related_files = index_manager.get_related_files(
680
- [file.file_path for file in target_files.file_list]
681
- )
682
- if related_files is not None:
683
- for file in related_files.file_list:
684
- file_path = file.file_path.strip()
685
- final_files[get_file_path(file_path)] = file
686
- stats["level2_filtered"] = len(related_files.file_list)
687
- phase_end = time.monotonic()
688
- stats["timings"]["level2_filter"] = phase_end - phase_start
689
-
690
- if not final_files:
691
- logger.warning("No related files found, using all files")
692
- for source in sources:
693
- final_files[get_file_path(source.module_name)] = TargetFile(
694
- file_path=source.module_name,
695
- reason="No related files found, use all files",
696
- )
697
-
698
- # Phase 5: Relevance verification
699
- logger.info("Phase 5: Performing relevance verification...")
700
- if args.index_filter_enable_relevance_verification:
701
- phase_start = time.monotonic()
702
- verified_files = {}
703
- temp_files = list(final_files.values())
704
- verification_results = []
705
-
706
- def print_verification_results(results):
707
- from rich.table import Table
708
- from rich.console import Console
709
-
710
- console = Console()
711
- table = Table(title="File Relevance Verification Results", show_header=True, header_style="bold magenta")
712
- table.add_column("File Path", style="cyan", no_wrap=True)
713
- table.add_column("Score", justify="right", style="green")
714
- table.add_column("Status", style="yellow")
715
- table.add_column("Reason/Error")
716
-
717
- for file_path, score, status, reason in results:
718
- table.add_row(
719
- file_path,
720
- str(score) if score is not None else "N/A",
721
- status,
722
- reason
723
- )
724
-
725
- console.print(table)
726
-
727
- def verify_single_file(file: TargetFile):
728
- for source in sources:
729
- if source.module_name == file.file_path:
730
- file_content = source.source_code
731
- try:
732
- result = index_manager.verify_file_relevance.with_llm(llm).with_return_type(VerifyFileRelevance).run(
733
- file_content=file_content,
734
- query=args.query
735
- )
736
- if result.relevant_score >= args.verify_file_relevance_score:
737
- verified_files[file.file_path] = TargetFile(
738
- file_path=file.file_path,
739
- reason=f"Score:{result.relevant_score}, {result.reason}"
740
- )
741
- return file.file_path, result.relevant_score, "PASS", result.reason
742
- else:
743
- return file.file_path, result.relevant_score, "FAIL", result.reason
744
- except Exception as e:
745
- error_msg = str(e)
746
- verified_files[file.file_path] = TargetFile(
747
- file_path=file.file_path,
748
- reason=f"Verification failed: {error_msg}"
749
- )
750
- return file.file_path, None, "ERROR", error_msg
751
- return None
752
-
753
- with ThreadPoolExecutor(max_workers=args.index_filter_workers) as executor:
754
- futures = [executor.submit(verify_single_file, file)
755
- for file in temp_files]
756
- for future in as_completed(futures):
757
- result = future.result()
758
- if result:
759
- verification_results.append(result)
760
- time.sleep(args.anti_quota_limit)
761
-
762
- # Print verification results in a table
763
- print_verification_results(verification_results)
764
-
765
- stats["verified_files"] = len(verified_files)
766
- phase_end = time.monotonic()
767
- stats["timings"]["relevance_verification"] = phase_end - phase_start
768
-
769
- # Keep all files, not just verified ones
770
- final_files = verified_files
771
-
772
- def display_table_and_get_selections(data):
773
- from prompt_toolkit.shortcuts import checkboxlist_dialog
774
- from prompt_toolkit.styles import Style
775
-
776
- choices = [(file, f"{file} - {reason}") for file, reason in data]
777
- selected_files = [file for file, _ in choices]
778
-
779
- style = Style.from_dict(
780
- {
781
- "dialog": "bg:#88ff88",
782
- "dialog frame.label": "bg:#ffffff #000000",
783
- "dialog.body": "bg:#88ff88 #000000",
784
- "dialog shadow": "bg:#00aa00",
785
- }
786
- )
787
-
788
- result = checkboxlist_dialog(
789
- title="Target Files",
790
- text="Tab to switch between buttons, and Space/Enter to select/deselect.",
791
- values=choices,
792
- style=style,
793
- default_values=selected_files,
794
- ).run()
795
-
796
- return [file for file in result] if result else []
797
-
798
- def print_selected(data):
799
- console = Console()
800
-
801
- table = Table(
802
- title="Files Used as Context",
803
- show_header=True,
804
- header_style="bold magenta",
805
- )
806
- table.add_column("File Path", style="cyan", no_wrap=True)
807
- table.add_column("Reason", style="green")
808
-
809
- for file, reason in data:
810
- table.add_row(file, reason)
811
-
812
- panel = Panel(
813
- table,
814
- expand=False,
815
- border_style="bold blue",
816
- padding=(1, 1),
817
- )
818
-
819
- console.print(panel)
820
-
821
- # Phase 6: File selection and limitation
822
- logger.info("Phase 6: Processing file selection and limits...")
823
- phase_start = time.monotonic()
824
-
825
- if args.index_filter_file_num > 0:
826
- logger.info(
827
- f"Limiting files from {len(final_files)} to {args.index_filter_file_num}")
828
-
829
- if args.skip_confirm:
830
- final_filenames = [file.file_path for file in final_files.values()]
831
- if args.index_filter_file_num > 0:
832
- final_filenames = final_filenames[: args.index_filter_file_num]
833
- else:
834
- target_files_data = [
835
- (file.file_path, file.reason) for file in final_files.values()
836
- ]
837
- if not target_files_data:
838
- logger.warning(
839
- "No target files found, you may need to rewrite the query and try again."
840
- )
841
- final_filenames = []
842
- else:
843
- final_filenames = display_table_and_get_selections(
844
- target_files_data)
845
-
846
- if args.index_filter_file_num > 0:
847
- final_filenames = final_filenames[: args.index_filter_file_num]
848
-
849
- phase_end = time.monotonic()
850
- stats["timings"]["file_selection"] = phase_end - phase_start
851
-
852
- # Phase 7: Display results and prepare output
853
- logger.info("Phase 7: Preparing final output...")
854
- phase_start = time.monotonic()
855
- try:
856
- print_selected(
857
- [
858
- (file.file_path, file.reason)
859
- for file in final_files.values()
860
- if file.file_path in final_filenames
861
- ]
862
- )
863
- except Exception as e:
864
- logger.warning(
865
- "Failed to display selected files in terminal mode. Falling back to simple print."
866
- )
867
- print("Target Files Selected:")
868
- for file in final_filenames:
869
- print(f"{file} - {final_files[file].reason}")
870
-
871
- source_code = ""
872
- depulicated_sources = set()
873
-
874
- for file in sources:
875
- if file.module_name in final_filenames:
876
- if file.module_name in depulicated_sources:
877
- continue
878
- depulicated_sources.add(file.module_name)
879
- source_code += f"##File: {file.module_name}\n"
880
- source_code += f"{file.source_code}\n\n"
881
-
882
- if args.request_id and not args.skip_events:
883
- queue_communicate.send_event(
884
- request_id=args.request_id,
885
- event=CommunicateEvent(
886
- event_type=CommunicateEventType.CODE_INDEX_FILTER_FILE_SELECTED.value,
887
- data=json.dumps([
888
- (file.file_path, file.reason)
889
- for file in final_files.values()
890
- if file.file_path in depulicated_sources
891
- ])
892
- )
893
- )
894
-
895
- stats["final_files"] = len(depulicated_sources)
896
- phase_end = time.monotonic()
897
- stats["timings"]["prepare_output"] = phase_end - phase_start
898
-
899
- # Calculate total time and print summary
900
- total_end_time = time.monotonic()
901
- total_time = total_end_time - total_start_time
902
- stats["timings"]["total"] = total_time
903
-
904
- # Calculate total filter time
905
- total_filter_time = (
906
- stats["timings"]["level1_filter"] +
907
- stats["timings"]["level2_filter"] +
908
- stats["timings"]["relevance_verification"]
909
- )
910
-
911
- # Print final statistics in a more structured way
912
- summary = f"""
913
- === Indexing and Filtering Summary ===
914
- • Total files scanned: {stats['total_files']}
915
- • Files indexed: {stats['indexed_files']}
916
- • Files filtered:
917
- - Level 1 (query-based): {stats['level1_filtered']}
918
- - Level 2 (related files): {stats['level2_filtered']}
919
- - Relevance verified: {stats.get('verified_files', 0)}
920
- • Final files selected: {stats['final_files']}
921
-
922
- === Time Breakdown ===
923
- • Index build: {stats['timings'].get('build_index', 0):.2f}s
924
- • Level 1 filter: {stats['timings'].get('level1_filter', 0):.2f}s
925
- • Level 2 filter: {stats['timings'].get('level2_filter', 0):.2f}s
926
- • Relevance check: {stats['timings'].get('relevance_verification', 0):.2f}s
927
- • File selection: {stats['timings'].get('file_selection', 0):.2f}s
928
- • Total time: {total_time:.2f}s
929
- ====================================
930
- """
931
- logger.info(summary)
932
-
933
- if args.request_id and not args.skip_events:
934
- queue_communicate.send_event(
935
- request_id=args.request_id,
936
- event=CommunicateEvent(
937
- event_type=CommunicateEventType.CODE_INDEX_FILTER_END.value,
938
- data=json.dumps({
939
- "filtered_files": stats["final_files"],
940
- "filter_time": total_filter_time
941
- })
942
- )
943
- )
944
-
945
- return source_code
autocoder/index/types.py CHANGED
@@ -21,4 +21,7 @@ class VerifyFileRelevance(pydantic.BaseModel):
21
21
 
22
22
 
23
23
  class FileList(pydantic.BaseModel):
24
- file_list: List[TargetFile]
24
+ file_list: List[TargetFile]
25
+
26
+ class FileNumberList(pydantic.BaseModel):
27
+ file_list: List[int]
@@ -39,7 +39,7 @@ def stream_out(
39
39
  console=console
40
40
  ) as live:
41
41
  for res in stream_generator:
42
- last_meta = res[1]
42
+ last_meta = res[1]
43
43
  content = res[0]
44
44
  assistant_response += content
45
45
 
autocoder/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.1.231"
1
+ __version__ = "0.1.233"