auto-coder 0.1.232__py3-none-any.whl → 0.1.235__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of auto-coder might be problematic. Click here for more details.

autocoder/index/index.py CHANGED
@@ -6,37 +6,23 @@ from datetime import datetime
6
6
  from autocoder.common import SourceCode, AutoCoderArgs
7
7
  from autocoder.index.symbols_utils import (
8
8
  extract_symbols,
9
- SymbolsInfo,
10
9
  SymbolType,
11
10
  symbols_info_to_str,
12
11
  )
13
12
  from concurrent.futures import ThreadPoolExecutor, as_completed
14
13
  import threading
15
14
 
16
- import pydantic
17
15
  import byzerllm
18
16
  import hashlib
19
- import textwrap
20
- import tabulate
21
- from rich.console import Console
22
- from rich.table import Table
23
- from rich.panel import Panel
24
- from rich.text import Text
25
-
26
- from loguru import logger
27
- from autocoder.utils.queue_communicate import (
28
- queue_communicate,
29
- CommunicateEvent,
30
- CommunicateEventType,
31
- )
17
+
18
+ from autocoder.common.printer import Printer
19
+ from autocoder.common.auto_coder_lang import get_message
32
20
  from autocoder.index.types import (
33
21
  IndexItem,
34
22
  TargetFile,
35
- VerifyFileRelevance,
36
23
  FileList,
37
24
  )
38
25
 
39
-
40
26
  class IndexManager:
41
27
  def __init__(
42
28
  self, llm: byzerllm.ByzerLLM, sources: List[SourceCode], args: AutoCoderArgs
@@ -53,16 +39,23 @@ class IndexManager:
53
39
  else:
54
40
  self.index_llm = llm
55
41
 
42
+ if llm and (s := llm.get_sub_client("index_filter_model")):
43
+ self.index_filter_llm = s
44
+ else:
45
+ self.index_filter_llm = llm
46
+
56
47
  self.llm = llm
57
48
  self.args = args
58
49
  self.max_input_length = (
59
50
  args.index_model_max_input_length or args.model_max_input_length
60
51
  )
52
+ self.printer = Printer()
61
53
 
62
54
  # 如果索引目录不存在,则创建它
63
55
  if not os.path.exists(self.index_dir):
64
56
  os.makedirs(self.index_dir)
65
57
 
58
+
66
59
  @byzerllm.prompt()
67
60
  def verify_file_relevance(self, file_content: str, query: str) -> str:
68
61
  """
@@ -215,8 +208,12 @@ class IndexManager:
215
208
  start_time = time.monotonic()
216
209
  source_code = source.source_code
217
210
  if len(source.source_code) > self.max_input_length:
218
- logger.warning(
219
- f"Warning[Build Index]: The length of source code({source.module_name}) is too long ({len(source.source_code)}) > model_max_input_length({self.max_input_length}), splitting into chunks..."
211
+ self.printer.print_in_terminal(
212
+ "index_file_too_large",
213
+ style="yellow",
214
+ file_path=source.module_name,
215
+ file_size=len(source.source_code),
216
+ max_length=self.max_input_length
220
217
  )
221
218
  chunks = self.split_text_into_chunks(
222
219
  source_code, self.max_input_length - 1000
@@ -233,12 +230,23 @@ class IndexManager:
233
230
  self.index_llm).run(source.module_name, source_code)
234
231
  time.sleep(self.anti_quota_limit)
235
232
 
236
- logger.info(
237
- f"Parse and update index for {file_path} md5: {md5} took {time.monotonic() - start_time:.2f}s"
233
+ self.printer.print_in_terminal(
234
+ "index_update_success",
235
+ style="green",
236
+ file_path=file_path,
237
+ md5=md5,
238
+ duration=time.monotonic() - start_time
238
239
  )
239
240
 
240
241
  except Exception as e:
241
- logger.warning(f"Error: {e}")
242
+ # import traceback
243
+ # traceback.print_exc()
244
+ self.printer.print_in_terminal(
245
+ "index_build_error",
246
+ style="red",
247
+ file_path=file_path,
248
+ error=str(e)
249
+ )
242
250
  return None
243
251
 
244
252
  return {
@@ -264,8 +272,11 @@ class IndexManager:
264
272
 
265
273
  for item in index_data.keys():
266
274
  if not item.startswith(self.source_dir):
267
- logger.warning(
268
- error_message(source_dir=self.source_dir, file_path=item)
275
+ self.printer.print_in_terminal(
276
+ "index_source_dir_mismatch",
277
+ style="yellow",
278
+ source_dir=self.source_dir,
279
+ file_path=item
269
280
  )
270
281
  break
271
282
 
@@ -300,8 +311,12 @@ class IndexManager:
300
311
  counter = 0
301
312
  num_files = len(wait_to_build_files)
302
313
  total_files = len(self.sources)
303
- logger.info(
304
- f"Total Files: {total_files}, Need to Build Index: {num_files}")
314
+ self.printer.print_in_terminal(
315
+ "index_build_summary",
316
+ style="bold blue",
317
+ total_files=total_files,
318
+ num_files=num_files
319
+ )
305
320
 
306
321
  futures = [
307
322
  executor.submit(self.build_index_for_single_source, source)
@@ -311,7 +326,12 @@ class IndexManager:
311
326
  result = future.result()
312
327
  if result is not None:
313
328
  counter += 1
314
- logger.info(f"Building Index:{counter}/{num_files}...")
329
+ self.printer.print_in_terminal(
330
+ "building_index_progress",
331
+ style="blue",
332
+ counter=counter,
333
+ num_files=num_files
334
+ )
315
335
  module_name = result["module_name"]
316
336
  index_data[module_name] = result
317
337
  updated_sources.append(module_name)
@@ -413,8 +433,10 @@ class IndexManager:
413
433
  with lock:
414
434
  all_results.extend(result.file_list)
415
435
  else:
416
- logger.warning(
417
- f"Fail to find related files for chunk {chunk_count}. This may be caused by the model limit or the query not being suitable for the files."
436
+ self.printer.print_in_terminal(
437
+ "index_related_files_fail",
438
+ style="yellow",
439
+ chunk_count=chunk_count
418
440
  )
419
441
  time.sleep(self.args.anti_quota_limit)
420
442
 
@@ -451,8 +473,10 @@ class IndexManager:
451
473
  all_results.extend(result.file_list)
452
474
  completed_threads += 1
453
475
  else:
454
- logger.warning(
455
- f"Fail to find target files for chunk. This is caused by the model response not being in JSON format or the JSON being empty."
476
+ self.printer.print_in_terminal(
477
+ "index_related_files_fail",
478
+ style="yellow",
479
+ chunk_count="unknown"
456
480
  )
457
481
  time.sleep(self.args.anti_quota_limit)
458
482
 
@@ -466,7 +490,12 @@ class IndexManager:
466
490
  for future in as_completed(futures):
467
491
  future.result()
468
492
 
469
- logger.info(f"Completed {completed_threads}/{total_threads} threads")
493
+ self.printer.print_in_terminal(
494
+ "index_threads_completed",
495
+ style="green",
496
+ completed_threads=completed_threads,
497
+ total_threads=total_threads
498
+ )
470
499
  return all_results, total_threads, completed_threads
471
500
 
472
501
  def get_target_files_by_query(self, query: str) -> FileList:
@@ -565,381 +594,3 @@ class IndexManager:
565
594
  请确保结果的准确性和完整性,包括所有可能相关的文件。
566
595
  """
567
596
 
568
-
569
- def build_index_and_filter_files(
570
- llm, args: AutoCoderArgs, sources: List[SourceCode]
571
- ) -> str:
572
- # Initialize timing and statistics
573
- total_start_time = time.monotonic()
574
- stats = {
575
- "total_files": len(sources),
576
- "indexed_files": 0,
577
- "level1_filtered": 0,
578
- "level2_filtered": 0,
579
- "verified_files": 0,
580
- "final_files": 0,
581
- "timings": {
582
- "process_tagged_sources": 0.0,
583
- "build_index": 0.0,
584
- "level1_filter": 0.0,
585
- "level2_filter": 0.0,
586
- "relevance_verification": 0.0,
587
- "file_selection": 0.0,
588
- "prepare_output": 0.0,
589
- "total": 0.0
590
- }
591
- }
592
-
593
- def get_file_path(file_path):
594
- if file_path.startswith("##"):
595
- return file_path.strip()[2:]
596
- return file_path
597
-
598
- final_files: Dict[str, TargetFile] = {}
599
-
600
- # Phase 1: Process REST/RAG/Search sources
601
- logger.info("Phase 1: Processing REST/RAG/Search sources...")
602
- phase_start = time.monotonic()
603
- for source in sources:
604
- if source.tag in ["REST", "RAG", "SEARCH"]:
605
- final_files[get_file_path(source.module_name)] = TargetFile(
606
- file_path=source.module_name, reason="Rest/Rag/Search"
607
- )
608
- phase_end = time.monotonic()
609
- stats["timings"]["process_tagged_sources"] = phase_end - phase_start
610
-
611
- if not args.skip_build_index and llm:
612
- # Phase 2: Build index
613
- if args.request_id and not args.skip_events:
614
- queue_communicate.send_event(
615
- request_id=args.request_id,
616
- event=CommunicateEvent(
617
- event_type=CommunicateEventType.CODE_INDEX_BUILD_START.value,
618
- data=json.dumps({"total_files": len(sources)})
619
- )
620
- )
621
-
622
- logger.info("Phase 2: Building index for all files...")
623
- phase_start = time.monotonic()
624
- index_manager = IndexManager(llm=llm, sources=sources, args=args)
625
- index_data = index_manager.build_index()
626
- stats["indexed_files"] = len(index_data) if index_data else 0
627
- phase_end = time.monotonic()
628
- stats["timings"]["build_index"] = phase_end - phase_start
629
-
630
- if args.request_id and not args.skip_events:
631
- queue_communicate.send_event(
632
- request_id=args.request_id,
633
- event=CommunicateEvent(
634
- event_type=CommunicateEventType.CODE_INDEX_BUILD_END.value,
635
- data=json.dumps({
636
- "indexed_files": stats["indexed_files"],
637
- "build_index_time": stats["timings"]["build_index"],
638
- })
639
- )
640
- )
641
-
642
- if not args.skip_filter_index:
643
- if args.request_id and not args.skip_events:
644
- queue_communicate.send_event(
645
- request_id=args.request_id,
646
- event=CommunicateEvent(
647
- event_type=CommunicateEventType.CODE_INDEX_FILTER_START.value,
648
- data=json.dumps({})
649
- )
650
- )
651
- # Phase 3: Level 1 filtering - Query-based
652
- logger.info(
653
- "Phase 3: Performing Level 1 filtering (query-based)...")
654
-
655
- phase_start = time.monotonic()
656
- target_files = index_manager.get_target_files_by_query(args.query)
657
-
658
- if target_files:
659
- for file in target_files.file_list:
660
- file_path = file.file_path.strip()
661
- final_files[get_file_path(file_path)] = file
662
- stats["level1_filtered"] = len(target_files.file_list)
663
- phase_end = time.monotonic()
664
- stats["timings"]["level1_filter"] = phase_end - phase_start
665
-
666
- # Phase 4: Level 2 filtering - Related files
667
- if target_files is not None and args.index_filter_level >= 2:
668
- logger.info(
669
- "Phase 4: Performing Level 2 filtering (related files)...")
670
- if args.request_id and not args.skip_events:
671
- queue_communicate.send_event(
672
- request_id=args.request_id,
673
- event=CommunicateEvent(
674
- event_type=CommunicateEventType.CODE_INDEX_FILTER_START.value,
675
- data=json.dumps({})
676
- )
677
- )
678
- phase_start = time.monotonic()
679
- related_files = index_manager.get_related_files(
680
- [file.file_path for file in target_files.file_list]
681
- )
682
- if related_files is not None:
683
- for file in related_files.file_list:
684
- file_path = file.file_path.strip()
685
- final_files[get_file_path(file_path)] = file
686
- stats["level2_filtered"] = len(related_files.file_list)
687
- phase_end = time.monotonic()
688
- stats["timings"]["level2_filter"] = phase_end - phase_start
689
-
690
- if not final_files:
691
- logger.warning("No related files found, using all files")
692
- for source in sources:
693
- final_files[get_file_path(source.module_name)] = TargetFile(
694
- file_path=source.module_name,
695
- reason="No related files found, use all files",
696
- )
697
-
698
- # Phase 5: Relevance verification
699
- logger.info("Phase 5: Performing relevance verification...")
700
- if args.index_filter_enable_relevance_verification:
701
- phase_start = time.monotonic()
702
- verified_files = {}
703
- temp_files = list(final_files.values())
704
- verification_results = []
705
-
706
- def print_verification_results(results):
707
- from rich.table import Table
708
- from rich.console import Console
709
-
710
- console = Console()
711
- table = Table(title="File Relevance Verification Results", show_header=True, header_style="bold magenta")
712
- table.add_column("File Path", style="cyan", no_wrap=True)
713
- table.add_column("Score", justify="right", style="green")
714
- table.add_column("Status", style="yellow")
715
- table.add_column("Reason/Error")
716
-
717
- for file_path, score, status, reason in results:
718
- table.add_row(
719
- file_path,
720
- str(score) if score is not None else "N/A",
721
- status,
722
- reason
723
- )
724
-
725
- console.print(table)
726
-
727
- def verify_single_file(file: TargetFile):
728
- for source in sources:
729
- if source.module_name == file.file_path:
730
- file_content = source.source_code
731
- try:
732
- result = index_manager.verify_file_relevance.with_llm(llm).with_return_type(VerifyFileRelevance).run(
733
- file_content=file_content,
734
- query=args.query
735
- )
736
- if result.relevant_score >= args.verify_file_relevance_score:
737
- verified_files[file.file_path] = TargetFile(
738
- file_path=file.file_path,
739
- reason=f"Score:{result.relevant_score}, {result.reason}"
740
- )
741
- return file.file_path, result.relevant_score, "PASS", result.reason
742
- else:
743
- return file.file_path, result.relevant_score, "FAIL", result.reason
744
- except Exception as e:
745
- error_msg = str(e)
746
- verified_files[file.file_path] = TargetFile(
747
- file_path=file.file_path,
748
- reason=f"Verification failed: {error_msg}"
749
- )
750
- return file.file_path, None, "ERROR", error_msg
751
- return None
752
-
753
- with ThreadPoolExecutor(max_workers=args.index_filter_workers) as executor:
754
- futures = [executor.submit(verify_single_file, file)
755
- for file in temp_files]
756
- for future in as_completed(futures):
757
- result = future.result()
758
- if result:
759
- verification_results.append(result)
760
- time.sleep(args.anti_quota_limit)
761
-
762
- # Print verification results in a table
763
- print_verification_results(verification_results)
764
-
765
- stats["verified_files"] = len(verified_files)
766
- phase_end = time.monotonic()
767
- stats["timings"]["relevance_verification"] = phase_end - phase_start
768
-
769
- # Keep all files, not just verified ones
770
- final_files = verified_files
771
-
772
- def display_table_and_get_selections(data):
773
- from prompt_toolkit.shortcuts import checkboxlist_dialog
774
- from prompt_toolkit.styles import Style
775
-
776
- choices = [(file, f"{file} - {reason}") for file, reason in data]
777
- selected_files = [file for file, _ in choices]
778
-
779
- style = Style.from_dict(
780
- {
781
- "dialog": "bg:#88ff88",
782
- "dialog frame.label": "bg:#ffffff #000000",
783
- "dialog.body": "bg:#88ff88 #000000",
784
- "dialog shadow": "bg:#00aa00",
785
- }
786
- )
787
-
788
- result = checkboxlist_dialog(
789
- title="Target Files",
790
- text="Tab to switch between buttons, and Space/Enter to select/deselect.",
791
- values=choices,
792
- style=style,
793
- default_values=selected_files,
794
- ).run()
795
-
796
- return [file for file in result] if result else []
797
-
798
- def print_selected(data):
799
- console = Console()
800
-
801
- table = Table(
802
- title="Files Used as Context",
803
- show_header=True,
804
- header_style="bold magenta",
805
- )
806
- table.add_column("File Path", style="cyan", no_wrap=True)
807
- table.add_column("Reason", style="green")
808
-
809
- for file, reason in data:
810
- table.add_row(file, reason)
811
-
812
- panel = Panel(
813
- table,
814
- expand=False,
815
- border_style="bold blue",
816
- padding=(1, 1),
817
- )
818
-
819
- console.print(panel)
820
-
821
- # Phase 6: File selection and limitation
822
- logger.info("Phase 6: Processing file selection and limits...")
823
- phase_start = time.monotonic()
824
-
825
- if args.index_filter_file_num > 0:
826
- logger.info(
827
- f"Limiting files from {len(final_files)} to {args.index_filter_file_num}")
828
-
829
- if args.skip_confirm:
830
- final_filenames = [file.file_path for file in final_files.values()]
831
- if args.index_filter_file_num > 0:
832
- final_filenames = final_filenames[: args.index_filter_file_num]
833
- else:
834
- target_files_data = [
835
- (file.file_path, file.reason) for file in final_files.values()
836
- ]
837
- if not target_files_data:
838
- logger.warning(
839
- "No target files found, you may need to rewrite the query and try again."
840
- )
841
- final_filenames = []
842
- else:
843
- final_filenames = display_table_and_get_selections(
844
- target_files_data)
845
-
846
- if args.index_filter_file_num > 0:
847
- final_filenames = final_filenames[: args.index_filter_file_num]
848
-
849
- phase_end = time.monotonic()
850
- stats["timings"]["file_selection"] = phase_end - phase_start
851
-
852
- # Phase 7: Display results and prepare output
853
- logger.info("Phase 7: Preparing final output...")
854
- phase_start = time.monotonic()
855
- try:
856
- print_selected(
857
- [
858
- (file.file_path, file.reason)
859
- for file in final_files.values()
860
- if file.file_path in final_filenames
861
- ]
862
- )
863
- except Exception as e:
864
- logger.warning(
865
- "Failed to display selected files in terminal mode. Falling back to simple print."
866
- )
867
- print("Target Files Selected:")
868
- for file in final_filenames:
869
- print(f"{file} - {final_files[file].reason}")
870
-
871
- source_code = ""
872
- depulicated_sources = set()
873
-
874
- for file in sources:
875
- if file.module_name in final_filenames:
876
- if file.module_name in depulicated_sources:
877
- continue
878
- depulicated_sources.add(file.module_name)
879
- source_code += f"##File: {file.module_name}\n"
880
- source_code += f"{file.source_code}\n\n"
881
-
882
- if args.request_id and not args.skip_events:
883
- queue_communicate.send_event(
884
- request_id=args.request_id,
885
- event=CommunicateEvent(
886
- event_type=CommunicateEventType.CODE_INDEX_FILTER_FILE_SELECTED.value,
887
- data=json.dumps([
888
- (file.file_path, file.reason)
889
- for file in final_files.values()
890
- if file.file_path in depulicated_sources
891
- ])
892
- )
893
- )
894
-
895
- stats["final_files"] = len(depulicated_sources)
896
- phase_end = time.monotonic()
897
- stats["timings"]["prepare_output"] = phase_end - phase_start
898
-
899
- # Calculate total time and print summary
900
- total_end_time = time.monotonic()
901
- total_time = total_end_time - total_start_time
902
- stats["timings"]["total"] = total_time
903
-
904
- # Calculate total filter time
905
- total_filter_time = (
906
- stats["timings"]["level1_filter"] +
907
- stats["timings"]["level2_filter"] +
908
- stats["timings"]["relevance_verification"]
909
- )
910
-
911
- # Print final statistics in a more structured way
912
- summary = f"""
913
- === Indexing and Filtering Summary ===
914
- • Total files scanned: {stats['total_files']}
915
- • Files indexed: {stats['indexed_files']}
916
- • Files filtered:
917
- - Level 1 (query-based): {stats['level1_filtered']}
918
- - Level 2 (related files): {stats['level2_filtered']}
919
- - Relevance verified: {stats.get('verified_files', 0)}
920
- • Final files selected: {stats['final_files']}
921
-
922
- === Time Breakdown ===
923
- • Index build: {stats['timings'].get('build_index', 0):.2f}s
924
- • Level 1 filter: {stats['timings'].get('level1_filter', 0):.2f}s
925
- • Level 2 filter: {stats['timings'].get('level2_filter', 0):.2f}s
926
- • Relevance check: {stats['timings'].get('relevance_verification', 0):.2f}s
927
- • File selection: {stats['timings'].get('file_selection', 0):.2f}s
928
- • Total time: {total_time:.2f}s
929
- ====================================
930
- """
931
- logger.info(summary)
932
-
933
- if args.request_id and not args.skip_events:
934
- queue_communicate.send_event(
935
- request_id=args.request_id,
936
- event=CommunicateEvent(
937
- event_type=CommunicateEventType.CODE_INDEX_FILTER_END.value,
938
- data=json.dumps({
939
- "filtered_files": stats["final_files"],
940
- "filter_time": total_filter_time
941
- })
942
- )
943
- )
944
-
945
- return source_code
autocoder/index/types.py CHANGED
@@ -21,4 +21,7 @@ class VerifyFileRelevance(pydantic.BaseModel):
21
21
 
22
22
 
23
23
  class FileList(pydantic.BaseModel):
24
- file_list: List[TargetFile]
24
+ file_list: List[TargetFile]
25
+
26
+ class FileNumberList(pydantic.BaseModel):
27
+ file_list: List[int]
autocoder/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.1.232"
1
+ __version__ = "0.1.235"