auto-coder 0.1.361__py3-none-any.whl → 0.1.363__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of auto-coder might be problematic. Click here for more details.

Files changed (57) hide show
  1. {auto_coder-0.1.361.dist-info → auto_coder-0.1.363.dist-info}/METADATA +2 -1
  2. {auto_coder-0.1.361.dist-info → auto_coder-0.1.363.dist-info}/RECORD +57 -29
  3. autocoder/agent/auto_learn.py +249 -262
  4. autocoder/agent/base_agentic/__init__.py +0 -0
  5. autocoder/agent/base_agentic/agent_hub.py +169 -0
  6. autocoder/agent/base_agentic/agentic_lang.py +112 -0
  7. autocoder/agent/base_agentic/agentic_tool_display.py +180 -0
  8. autocoder/agent/base_agentic/base_agent.py +1582 -0
  9. autocoder/agent/base_agentic/default_tools.py +683 -0
  10. autocoder/agent/base_agentic/test_base_agent.py +82 -0
  11. autocoder/agent/base_agentic/tool_registry.py +425 -0
  12. autocoder/agent/base_agentic/tools/__init__.py +12 -0
  13. autocoder/agent/base_agentic/tools/ask_followup_question_tool_resolver.py +72 -0
  14. autocoder/agent/base_agentic/tools/attempt_completion_tool_resolver.py +37 -0
  15. autocoder/agent/base_agentic/tools/base_tool_resolver.py +35 -0
  16. autocoder/agent/base_agentic/tools/example_tool_resolver.py +46 -0
  17. autocoder/agent/base_agentic/tools/execute_command_tool_resolver.py +72 -0
  18. autocoder/agent/base_agentic/tools/list_files_tool_resolver.py +110 -0
  19. autocoder/agent/base_agentic/tools/plan_mode_respond_tool_resolver.py +35 -0
  20. autocoder/agent/base_agentic/tools/read_file_tool_resolver.py +54 -0
  21. autocoder/agent/base_agentic/tools/replace_in_file_tool_resolver.py +156 -0
  22. autocoder/agent/base_agentic/tools/search_files_tool_resolver.py +134 -0
  23. autocoder/agent/base_agentic/tools/talk_to_group_tool_resolver.py +96 -0
  24. autocoder/agent/base_agentic/tools/talk_to_tool_resolver.py +79 -0
  25. autocoder/agent/base_agentic/tools/use_mcp_tool_resolver.py +44 -0
  26. autocoder/agent/base_agentic/tools/write_to_file_tool_resolver.py +58 -0
  27. autocoder/agent/base_agentic/types.py +189 -0
  28. autocoder/agent/base_agentic/utils.py +100 -0
  29. autocoder/auto_coder.py +1 -1
  30. autocoder/auto_coder_runner.py +36 -14
  31. autocoder/chat/conf_command.py +11 -10
  32. autocoder/commands/auto_command.py +227 -159
  33. autocoder/common/__init__.py +2 -2
  34. autocoder/common/ignorefiles/ignore_file_utils.py +12 -8
  35. autocoder/common/result_manager.py +10 -2
  36. autocoder/common/rulefiles/autocoderrules_utils.py +169 -0
  37. autocoder/common/save_formatted_log.py +1 -1
  38. autocoder/common/v2/agent/agentic_edit.py +53 -41
  39. autocoder/common/v2/agent/agentic_edit_tools/read_file_tool_resolver.py +15 -12
  40. autocoder/common/v2/agent/agentic_edit_tools/replace_in_file_tool_resolver.py +73 -1
  41. autocoder/common/v2/agent/agentic_edit_tools/write_to_file_tool_resolver.py +132 -4
  42. autocoder/common/v2/agent/agentic_edit_types.py +1 -2
  43. autocoder/common/v2/agent/agentic_tool_display.py +2 -3
  44. autocoder/common/v2/code_auto_generate_editblock.py +3 -1
  45. autocoder/index/index.py +14 -8
  46. autocoder/privacy/model_filter.py +297 -35
  47. autocoder/rag/long_context_rag.py +424 -397
  48. autocoder/rag/test_doc_filter.py +393 -0
  49. autocoder/rag/test_long_context_rag.py +473 -0
  50. autocoder/rag/test_token_limiter.py +342 -0
  51. autocoder/shadows/shadow_manager.py +1 -3
  52. autocoder/utils/_markitdown.py +22 -3
  53. autocoder/version.py +1 -1
  54. {auto_coder-0.1.361.dist-info → auto_coder-0.1.363.dist-info}/LICENSE +0 -0
  55. {auto_coder-0.1.361.dist-info → auto_coder-0.1.363.dist-info}/WHEEL +0 -0
  56. {auto_coder-0.1.361.dist-info → auto_coder-0.1.363.dist-info}/entry_points.txt +0 -0
  57. {auto_coder-0.1.361.dist-info → auto_coder-0.1.363.dist-info}/top_level.txt +0 -0
@@ -3,30 +3,17 @@ import os
3
3
  import time
4
4
  from typing import Any, Dict, Generator, List, Optional, Tuple
5
5
 
6
- import byzerllm
7
- import pandas as pd
8
6
  import pathspec
9
7
  from byzerllm import ByzerLLM
10
8
  from loguru import logger
11
9
  from openai import OpenAI
12
- from rich.console import Console
13
- from rich.panel import Panel
14
- from rich.table import Table
15
10
  import statistics
16
11
  import traceback
17
12
 
18
13
  from autocoder.common import AutoCoderArgs, SourceCode
19
14
  from autocoder.rag.doc_filter import DocFilter
20
15
  from autocoder.rag.document_retriever import LocalDocumentRetriever
21
- from autocoder.rag.relevant_utils import (
22
- DocRelevance,
23
- FilterDoc,
24
- TaskTiming,
25
- parse_relevance,
26
- ProgressUpdate,
27
- DocFilterResult
28
- )
29
- from autocoder.rag.token_checker import check_token_limit
16
+ from autocoder.rag.relevant_utils import DocFilterResult
30
17
  from autocoder.rag.token_counter import RemoteTokenCounter, TokenCounter,count_tokens
31
18
  from autocoder.rag.token_limiter import TokenLimiter
32
19
  from tokenizers import Tokenizer
@@ -235,35 +222,7 @@ class LongContextRAG:
235
222
  def count_tokens(self, text: str) -> int:
236
223
  if self.tokenizer is None:
237
224
  return -1
238
- return self.tokenizer.count_tokens(text)
239
-
240
- @byzerllm.prompt()
241
- def extract_relevance_info_from_docs_with_conversation(
242
- self, conversations: List[Dict[str, str]], documents: List[str]
243
- ) -> str:
244
- """
245
- 使用以下文档和对话历史来提取相关信息。
246
-
247
- 文档:
248
- <documents>
249
- {% for doc in documents %}
250
- {{ doc }}
251
- {% endfor %}
252
- </documents>
253
-
254
- 对话历史:
255
- <conversations>
256
- {% for msg in conversations %}
257
- [{{ msg.role }}]:
258
- {{ msg.content }}
259
-
260
- {% endfor %}
261
- </conversations>
262
-
263
- 请根据提供的文档内容、用户对话历史以及最后一个问题,提取并总结文档中与问题相关的重要信息。
264
- 如果文档中没有相关信息,请回复"该文档中没有与问题相关的信息"。
265
- 提取的信息尽量保持和原文中的一样,并且只输出这些信息。
266
- """
225
+ return self.tokenizer.count_tokens(text)
267
226
 
268
227
  def _get_document_retriever_class(self):
269
228
  """Get the document retriever class based on configuration."""
@@ -338,7 +297,8 @@ class LongContextRAG:
338
297
  conversations=[{"role": "user", "content": target_query}]
339
298
  )
340
299
  url = ",".join(contexts)
341
- return [SourceCode(module_name=f"RAG:{url}", source_code="".join(v))]
300
+ result = (item for (item,_) in v)
301
+ return [SourceCode(module_name=f"RAG:{url}", source_code="".join(result))]
342
302
 
343
303
  def _filter_docs(self, conversations: List[Dict[str, str]]) -> DocFilterResult:
344
304
  query = conversations[-1]["content"]
@@ -407,6 +367,12 @@ class LongContextRAG:
407
367
  llm_config: Dict[str, Any] = {},
408
368
  extra_request_params: Dict[str, Any] = {}
409
369
  ):
370
+ if not llm_config:
371
+ llm_config = {}
372
+
373
+ if extra_request_params:
374
+ llm_config.update(extra_request_params)
375
+
410
376
  conversations = OpenAIContentProcessor.process_conversations(conversations)
411
377
  if self.client:
412
378
  model = model or self.args.model
@@ -423,31 +389,8 @@ class LongContextRAG:
423
389
  if self.llm.get_sub_client("qa_model"):
424
390
  target_llm = self.llm.get_sub_client("qa_model")
425
391
 
426
- query = conversations[-1]["content"]
427
-
392
+ query = conversations[-1]["content"]
428
393
  context = []
429
-
430
- if (
431
- "使用四到五个字直接返回这句话的简要主题,不要解释、不要标点、不要语气词、不要多余文本,不要加粗,如果没有主题"
432
- in query
433
- or "简要总结一下对话内容,用作后续的上下文提示 prompt,控制在 200 字以内"
434
- in query
435
- ):
436
-
437
- chunks = target_llm.stream_chat_oai(
438
- conversations=conversations,
439
- model=model,
440
- role_mapping=role_mapping,
441
- llm_config=llm_config,
442
- delta_mode=True,
443
- extra_request_params=extra_request_params
444
- )
445
-
446
- def generate_chunks():
447
- for chunk in chunks:
448
- yield chunk
449
- return generate_chunks(), context
450
-
451
394
  try:
452
395
  request_params = json.loads(query)
453
396
  if "request_id" in request_params:
@@ -539,352 +482,436 @@ class LongContextRAG:
539
482
 
540
483
  context = []
541
484
 
542
- def generate_sream():
543
- nonlocal context
544
-
545
- yield ("", SingleOutputMeta(input_tokens_count=0,
546
- generated_tokens_count=0,
547
- reasoning_content=get_message_with_format_and_newline(
548
- "rag_searching_docs",
549
- model=rag_stat.recall_stat.model_name
550
- )
551
- ))
552
-
553
- doc_filter_result = DocFilterResult(
554
- docs=[],
555
- raw_docs=[],
556
- input_tokens_counts=[],
557
- generated_tokens_counts=[],
558
- durations=[],
559
- model_name=rag_stat.recall_stat.model_name
560
- )
561
- query = conversations[-1]["content"]
562
- queries = extract_search_queries(
563
- conversations=conversations, args=self.args, llm=self.llm, max_queries=self.args.rag_recall_max_queries)
564
- documents = self._retrieve_documents(
565
- options={"queries": [query] + [query.query for query in queries]})
566
-
567
- # 使用带进度报告的过滤方法
568
- for progress_update, result in self.doc_filter.filter_docs_with_progress(conversations, documents):
569
- if result is not None:
570
- doc_filter_result = result
571
- else:
572
- # 生成进度更新
573
- yield ("", SingleOutputMeta(
574
- input_tokens_count=rag_stat.recall_stat.total_input_tokens,
575
- generated_tokens_count=rag_stat.recall_stat.total_generated_tokens,
576
- reasoning_content=f"{progress_update.message} ({progress_update.completed}/{progress_update.total})"
485
+ return self._generate_sream(
486
+ conversations=conversations,
487
+ query=query,
488
+ only_contexts=only_contexts,
489
+ start_time=start_time,
490
+ rag_stat=rag_stat,
491
+ context=context,
492
+ target_llm=target_llm,
493
+ model=model,
494
+ role_mapping=role_mapping,
495
+ llm_config=llm_config,
496
+ extra_request_params=extra_request_params
497
+ ), context
498
+
499
+
500
+ def _generate_sream(
501
+ self,
502
+ conversations,
503
+ query,
504
+ only_contexts,
505
+ start_time,
506
+ rag_stat,
507
+ context,
508
+ target_llm,
509
+ model=None,
510
+ role_mapping=None,
511
+ llm_config=None,
512
+ extra_request_params=None
513
+ ):
514
+ """将RAG流程分为三个主要阶段的生成器函数"""
515
+ # 第一阶段:文档召回和过滤
516
+ doc_retrieval_generator = self._process_document_retrieval(
517
+ conversations=conversations,
518
+ query=query,
519
+ rag_stat=rag_stat
520
+ )
521
+
522
+ # 处理第一阶段结果
523
+ for item in doc_retrieval_generator:
524
+ if isinstance(item, tuple) and len(item) == 2:
525
+ # 正常的生成器项,包含yield内容和元数据
526
+ yield item
527
+ elif isinstance(item, dict) and "result" in item:
528
+ # 如果是只返回上下文的情况
529
+ if only_contexts:
530
+ try:
531
+ searcher = SearchableResults()
532
+ result = searcher.reorder(docs=item["result"])
533
+ yield (json.dumps(result.model_dump(), ensure_ascii=False), SingleOutputMeta(
534
+ input_tokens_count=rag_stat.recall_stat.total_input_tokens + rag_stat.chunk_stat.total_input_tokens,
535
+ generated_tokens_count=rag_stat.recall_stat.total_generated_tokens + rag_stat.chunk_stat.total_generated_tokens,
536
+ ))
537
+ return
538
+ except Exception as e:
539
+ yield (str(e), SingleOutputMeta(
540
+ input_tokens_count=rag_stat.recall_stat.total_input_tokens + rag_stat.chunk_stat.total_input_tokens,
541
+ generated_tokens_count=rag_stat.recall_stat.total_generated_tokens + rag_stat.chunk_stat.total_generated_tokens,
542
+ ))
543
+ return
544
+
545
+ # 如果没有找到相关文档
546
+ if not item["result"]:
547
+ yield ("没有找到可以回答你问题的相关文档", SingleOutputMeta(
548
+ input_tokens_count=rag_stat.recall_stat.total_input_tokens + rag_stat.chunk_stat.total_input_tokens,
549
+ generated_tokens_count=rag_stat.recall_stat.total_generated_tokens + rag_stat.chunk_stat.total_generated_tokens,
577
550
  ))
578
-
579
- rag_stat.recall_stat.total_input_tokens += sum(
580
- doc_filter_result.input_tokens_counts)
581
- rag_stat.recall_stat.total_generated_tokens += sum(
582
- doc_filter_result.generated_tokens_counts)
583
- rag_stat.recall_stat.model_name = doc_filter_result.model_name
584
-
585
- relevant_docs: List[FilterDoc] = doc_filter_result.docs
586
- filter_time = time.time() - start_time
587
-
588
- yield ("", SingleOutputMeta(input_tokens_count=rag_stat.recall_stat.total_input_tokens,
589
- generated_tokens_count=rag_stat.recall_stat.total_generated_tokens,
590
- reasoning_content=get_message_with_format_and_newline(
591
- "rag_docs_filter_result",
592
- filter_time=filter_time,
593
- docs_num=len(relevant_docs),
594
- input_tokens=rag_stat.recall_stat.total_input_tokens,
595
- output_tokens=rag_stat.recall_stat.total_generated_tokens,
596
- model=rag_stat.recall_stat.model_name
597
- )
598
- ))
599
-
600
- # Filter relevant_docs to only include those with is_relevant=True
601
- highly_relevant_docs = [
602
- doc for doc in relevant_docs if doc.relevance.is_relevant
603
- ]
604
-
605
- if highly_relevant_docs:
606
- relevant_docs = highly_relevant_docs
607
- logger.info(
608
- f"Found {len(relevant_docs)} highly relevant documents")
609
-
610
- logger.info(
611
- f"Filter time: {filter_time:.2f} seconds with {len(relevant_docs)} docs"
612
- )
613
-
614
- if only_contexts:
615
- try:
616
- searcher = SearchableResults()
617
- result = searcher.reorder(docs=relevant_docs)
618
- yield (json.dumps(result.model_dump(), ensure_ascii=False), SingleOutputMeta(input_tokens_count=rag_stat.recall_stat.total_input_tokens + rag_stat.chunk_stat.total_input_tokens,
619
- generated_tokens_count=rag_stat.recall_stat.total_generated_tokens +
620
- rag_stat.chunk_stat.total_generated_tokens,
621
- ))
622
- except Exception as e:
623
- yield (str(e), SingleOutputMeta(input_tokens_count=rag_stat.recall_stat.total_input_tokens + rag_stat.chunk_stat.total_input_tokens,
624
- generated_tokens_count=rag_stat.recall_stat.total_generated_tokens +
625
- rag_stat.chunk_stat.total_generated_tokens,
626
- ))
627
- return
628
-
629
- if not relevant_docs:
630
- yield ("没有找到可以回答你问题的相关文档", SingleOutputMeta(input_tokens_count=rag_stat.recall_stat.total_input_tokens + rag_stat.chunk_stat.total_input_tokens,
631
- generated_tokens_count=rag_stat.recall_stat.total_generated_tokens +
632
- rag_stat.chunk_stat.total_generated_tokens,
633
- ))
634
- return
635
-
636
- context = [doc.source_code.module_name for doc in relevant_docs]
637
-
638
- yield ("", SingleOutputMeta(input_tokens_count=rag_stat.recall_stat.total_input_tokens + rag_stat.chunk_stat.total_input_tokens,
639
- generated_tokens_count=rag_stat.recall_stat.total_generated_tokens +
640
- rag_stat.chunk_stat.total_generated_tokens,
641
- reasoning_content=get_message_with_format_and_newline(
642
- "context_docs_names",
643
- context_docs_names=",".join(
644
- context))
645
- ))
646
-
647
- # 将 FilterDoc 转化为 SourceCode 方便后续的逻辑继续做处理
648
- relevant_docs = [doc.source_code for doc in relevant_docs]
649
-
650
- logger.info(f"=== RAG Search Results ===")
651
- logger.info(f"Query: {query}")
652
- logger.info(f"Found relevant docs: {len(relevant_docs)}")
653
-
654
- # 记录相关文档信息
655
- relevant_docs_info = []
656
- for i, doc in enumerate(relevant_docs):
657
- doc_path = doc.module_name.replace(self.path, '', 1)
658
- info = f"{i+1}. {doc_path}"
659
- if "original_docs" in doc.metadata:
660
- original_docs = ", ".join(
661
- [
662
- doc.replace(self.path, "", 1)
663
- for doc in doc.metadata["original_docs"]
664
- ]
551
+ return
552
+
553
+ # 更新上下文
554
+ context.extend([doc.source_code.module_name for doc in item["result"]])
555
+
556
+ # 输出上下文文档名称
557
+ yield ("", SingleOutputMeta(
558
+ input_tokens_count=rag_stat.recall_stat.total_input_tokens + rag_stat.chunk_stat.total_input_tokens,
559
+ generated_tokens_count=rag_stat.recall_stat.total_generated_tokens + rag_stat.chunk_stat.total_generated_tokens,
560
+ reasoning_content=get_message_with_format_and_newline(
561
+ "context_docs_names",
562
+ context_docs_names=",".join(context)
665
563
  )
666
- info += f" (Original docs: {original_docs})"
667
- relevant_docs_info.append(info)
668
-
669
- if relevant_docs_info:
670
- logger.info(
671
- f"Relevant documents list:"
672
- + "".join([f"\n * {info}" for info in relevant_docs_info])
673
- )
674
-
675
- yield ("", SingleOutputMeta(generated_tokens_count=0,
676
- reasoning_content=get_message_with_format_and_newline(
677
- "dynamic_chunking_start",
678
- model=rag_stat.chunk_stat.model_name
679
- )
680
- ))
681
- first_round_full_docs = []
682
- second_round_extracted_docs = []
683
- sencond_round_time = 0
684
-
685
- if self.tokenizer is not None:
686
-
687
- token_limiter = TokenLimiter(
688
- count_tokens=self.count_tokens,
689
- full_text_limit=self.full_text_limit,
690
- segment_limit=self.segment_limit,
691
- buff_limit=self.buff_limit,
692
- llm=self.llm,
693
- disable_segment_reorder=self.args.disable_segment_reorder,
694
- )
695
-
696
- token_limiter_result = token_limiter.limit_tokens(
564
+ ))
565
+
566
+ # 记录信息到日志
567
+ logger.info(f"=== RAG Search Results ===")
568
+ logger.info(f"Query: {query}")
569
+ relevant_docs = [doc.source_code for doc in item["result"]]
570
+ logger.info(f"Found relevant docs: {len(relevant_docs)}")
571
+
572
+ # 记录相关文档信息
573
+ relevant_docs_info = []
574
+ for i, doc in enumerate(relevant_docs):
575
+ doc_path = doc.module_name.replace(self.path, '', 1)
576
+ info = f"{i+1}. {doc_path}"
577
+ if "original_docs" in doc.metadata:
578
+ original_docs = ", ".join(
579
+ [
580
+ doc.replace(self.path, "", 1)
581
+ for doc in doc.metadata["original_docs"]
582
+ ]
583
+ )
584
+ info += f" (Original docs: {original_docs})"
585
+ relevant_docs_info.append(info)
586
+
587
+ if relevant_docs_info:
588
+ logger.info(
589
+ f"Relevant documents list:"
590
+ + "".join([f"\n * {info}" for info in relevant_docs_info])
591
+ )
592
+
593
+ # 第二阶段:文档分块与重排序
594
+ doc_chunking_generator = self._process_document_chunking(
697
595
  relevant_docs=relevant_docs,
698
596
  conversations=conversations,
699
- index_filter_workers=self.args.index_filter_workers or 5,
597
+ rag_stat=rag_stat,
598
+ filter_time=(time.time() - start_time)
700
599
  )
600
+
601
+ for chunking_item in doc_chunking_generator:
602
+ if isinstance(chunking_item, tuple) and len(chunking_item) == 2:
603
+ # 正常的生成器项
604
+ yield chunking_item
605
+ elif isinstance(chunking_item, dict) and "result" in chunking_item:
606
+ processed_docs = chunking_item["result"]
607
+ filter_time = chunking_item.get("filter_time", 0)
608
+ first_round_full_docs = chunking_item.get("first_round_full_docs", [])
609
+ second_round_extracted_docs = chunking_item.get("second_round_extracted_docs", [])
610
+ sencond_round_time = chunking_item.get("sencond_round_time", 0)
611
+
612
+ # 记录最终选择的文档详情
613
+ final_relevant_docs_info = []
614
+ for i, doc in enumerate(processed_docs):
615
+ doc_path = doc.module_name.replace(self.path, '', 1)
616
+ info = f"{i+1}. {doc_path}"
617
+
618
+ metadata_info = []
619
+ if "original_docs" in doc.metadata:
620
+ original_docs = ", ".join(
621
+ [
622
+ od.replace(self.path, "", 1)
623
+ for od in doc.metadata["original_docs"]
624
+ ]
625
+ )
626
+ metadata_info.append(f"Original docs: {original_docs}")
627
+
628
+ if "chunk_ranges" in doc.metadata:
629
+ chunk_ranges = json.dumps(
630
+ doc.metadata["chunk_ranges"], ensure_ascii=False
631
+ )
632
+ metadata_info.append(f"Chunk ranges: {chunk_ranges}")
633
+
634
+ if "processing_time" in doc.metadata:
635
+ metadata_info.append(
636
+ f"Processing time: {doc.metadata['processing_time']:.2f}s")
637
+
638
+ if metadata_info:
639
+ info += f" ({'; '.join(metadata_info)})"
640
+
641
+ final_relevant_docs_info.append(info)
642
+
643
+ if final_relevant_docs_info:
644
+ logger.info(
645
+ f"Final documents to be sent to model:"
646
+ + "".join([f"\n * {info}" for info in final_relevant_docs_info])
647
+ )
648
+
649
+ # 记录令牌统计
650
+ request_tokens = sum([count_tokens(doc.source_code) for doc in processed_docs])
651
+ target_model = target_llm.default_model_name
652
+ logger.info(
653
+ f"=== LLM Request ===\n"
654
+ f" * Target model: {target_model}\n"
655
+ f" * Total tokens: {request_tokens}"
656
+ )
657
+
658
+ logger.info(
659
+ f"Start to send to model {target_model} with {request_tokens} tokens")
660
+
661
+ yield ("", SingleOutputMeta(
662
+ input_tokens_count=rag_stat.recall_stat.total_input_tokens + rag_stat.chunk_stat.total_input_tokens,
663
+ generated_tokens_count=rag_stat.recall_stat.total_generated_tokens + rag_stat.chunk_stat.total_generated_tokens,
664
+ reasoning_content=get_message_with_format_and_newline(
665
+ "send_to_model",
666
+ model=target_model,
667
+ tokens=request_tokens
668
+ )
669
+ ))
670
+
671
+ yield ("", SingleOutputMeta(
672
+ input_tokens_count=rag_stat.recall_stat.total_input_tokens + rag_stat.chunk_stat.total_input_tokens,
673
+ generated_tokens_count=rag_stat.recall_stat.total_generated_tokens + rag_stat.chunk_stat.total_generated_tokens,
674
+ reasoning_content="qa_model_thinking"
675
+ ))
676
+
677
+ # 第三阶段:大模型问答生成
678
+ qa_generation_generator = self._process_qa_generation(
679
+ relevant_docs=processed_docs,
680
+ conversations=conversations,
681
+ target_llm=target_llm,
682
+ rag_stat=rag_stat,
683
+ model=model,
684
+ role_mapping=role_mapping,
685
+ llm_config=llm_config,
686
+ extra_request_params=extra_request_params
687
+ )
688
+
689
+ for gen_item in qa_generation_generator:
690
+ yield gen_item
691
+
692
+ # 打印最终的统计信息
693
+ self._print_rag_stats(rag_stat)
694
+ return
695
+
696
+ def _process_document_retrieval(self, conversations,
697
+ query, rag_stat):
698
+ """第一阶段:文档召回和过滤"""
699
+ yield ("", SingleOutputMeta(
700
+ input_tokens_count=0,
701
+ generated_tokens_count=0,
702
+ reasoning_content=get_message_with_format_and_newline(
703
+ "rag_searching_docs",
704
+ model=rag_stat.recall_stat.model_name
705
+ )
706
+ ))
707
+
708
+ doc_filter_result = DocFilterResult(
709
+ docs=[],
710
+ raw_docs=[],
711
+ input_tokens_counts=[],
712
+ generated_tokens_counts=[],
713
+ durations=[],
714
+ model_name=rag_stat.recall_stat.model_name
715
+ )
716
+
717
+ # 提取查询并检索候选文档
718
+ queries = extract_search_queries(
719
+ conversations=conversations, args=self.args, llm=self.llm, max_queries=self.args.rag_recall_max_queries)
720
+ documents = self._retrieve_documents(
721
+ options={"queries": [query] + [query.query for query in queries]})
701
722
 
702
- rag_stat.chunk_stat.total_input_tokens += sum(
703
- token_limiter_result.input_tokens_counts)
704
- rag_stat.chunk_stat.total_generated_tokens += sum(
705
- token_limiter_result.generated_tokens_counts)
706
- rag_stat.chunk_stat.model_name = token_limiter_result.model_name
707
-
708
- final_relevant_docs = token_limiter_result.docs
709
- first_round_full_docs = token_limiter.first_round_full_docs
710
- second_round_extracted_docs = token_limiter.second_round_extracted_docs
711
- sencond_round_time = token_limiter.sencond_round_time
712
-
713
- relevant_docs = final_relevant_docs
723
+ # 使用带进度报告的过滤方法
724
+ for progress_update, result in self.doc_filter.filter_docs_with_progress(conversations, documents):
725
+ if result is not None:
726
+ doc_filter_result = result
714
727
  else:
715
- relevant_docs = relevant_docs[: self.args.index_filter_file_num]
716
-
717
- logger.info(f"Finally send to model: {len(relevant_docs)}")
718
- # 记录分段处理的统计信息
719
- logger.info(
720
- f"=== Token Management ===\n"
721
- f" * Only contexts: {only_contexts}\n"
722
- f" * Filter time: {filter_time:.2f} seconds\n"
723
- f" * Final relevant docs: {len(relevant_docs)}\n"
724
- f" * First round full docs: {len(first_round_full_docs)}\n"
725
- f" * Second round extracted docs: {len(second_round_extracted_docs)}\n"
726
- f" * Second round time: {sencond_round_time:.2f} seconds"
728
+ # 生成进度更新
729
+ yield ("", SingleOutputMeta(
730
+ input_tokens_count=rag_stat.recall_stat.total_input_tokens,
731
+ generated_tokens_count=rag_stat.recall_stat.total_generated_tokens,
732
+ reasoning_content=f"{progress_update.message} ({progress_update.completed}/{progress_update.total})"
733
+ ))
734
+
735
+ # 更新统计信息
736
+ rag_stat.recall_stat.total_input_tokens += sum(doc_filter_result.input_tokens_counts)
737
+ rag_stat.recall_stat.total_generated_tokens += sum(doc_filter_result.generated_tokens_counts)
738
+ rag_stat.recall_stat.model_name = doc_filter_result.model_name
739
+
740
+ relevant_docs = doc_filter_result.docs
741
+
742
+ yield ("", SingleOutputMeta(
743
+ input_tokens_count=rag_stat.recall_stat.total_input_tokens,
744
+ generated_tokens_count=rag_stat.recall_stat.total_generated_tokens,
745
+ reasoning_content=get_message_with_format_and_newline(
746
+ "rag_docs_filter_result",
747
+ filter_time=0, # 这里实际应该计算时间,但由于重构,我们需要在外部计算
748
+ docs_num=len(relevant_docs),
749
+ input_tokens=rag_stat.recall_stat.total_input_tokens,
750
+ output_tokens=rag_stat.recall_stat.total_generated_tokens,
751
+ model=rag_stat.recall_stat.model_name
727
752
  )
753
+ ))
728
754
 
729
- yield ("", SingleOutputMeta(generated_tokens_count=rag_stat.chunk_stat.total_generated_tokens + rag_stat.recall_stat.total_generated_tokens,
730
- input_tokens_count=rag_stat.chunk_stat.total_input_tokens +
731
- rag_stat.recall_stat.total_input_tokens,
732
- reasoning_content=get_message_with_format_and_newline(
733
- "dynamic_chunking_result",
734
- model=rag_stat.chunk_stat.model_name,
735
- docs_num=len(relevant_docs),
736
- filter_time=filter_time,
737
- sencond_round_time=sencond_round_time,
738
- first_round_full_docs=len(
739
- first_round_full_docs),
740
- second_round_extracted_docs=len(
741
- second_round_extracted_docs),
742
- input_tokens=rag_stat.chunk_stat.total_input_tokens,
743
- output_tokens=rag_stat.chunk_stat.total_generated_tokens
744
- )
745
- ))
746
-
747
- # 记录最终选择的文档详情
748
- final_relevant_docs_info = []
749
- for i, doc in enumerate(relevant_docs):
750
- doc_path = doc.module_name.replace(self.path, '', 1)
751
- info = f"{i+1}. {doc_path}"
752
-
753
- metadata_info = []
754
- if "original_docs" in doc.metadata:
755
- original_docs = ", ".join(
756
- [
757
- od.replace(self.path, "", 1)
758
- for od in doc.metadata["original_docs"]
759
- ]
760
- )
761
- metadata_info.append(f"Original docs: {original_docs}")
762
-
763
- if "chunk_ranges" in doc.metadata:
764
- chunk_ranges = json.dumps(
765
- doc.metadata["chunk_ranges"], ensure_ascii=False
766
- )
767
- metadata_info.append(f"Chunk ranges: {chunk_ranges}")
768
-
769
- if "processing_time" in doc.metadata:
770
- metadata_info.append(
771
- f"Processing time: {doc.metadata['processing_time']:.2f}s")
772
-
773
- if metadata_info:
774
- info += f" ({'; '.join(metadata_info)})"
775
-
776
- final_relevant_docs_info.append(info)
777
-
778
- if final_relevant_docs_info:
779
- logger.info(
780
- f"Final documents to be sent to model:"
781
- + "".join([f"\n * {info}" for info in final_relevant_docs_info])
782
- )
783
-
784
- # 记录令牌统计
785
- request_tokens = sum([count_tokens(doc.source_code) for doc in relevant_docs])
786
- target_model = target_llm.default_model_name
787
- logger.info(
788
- f"=== LLM Request ===\n"
789
- f" * Target model: {target_model}\n"
790
- f" * Total tokens: {request_tokens}"
755
+ # 仅保留高相关性文档
756
+ highly_relevant_docs = [doc for doc in relevant_docs if doc.relevance.is_relevant]
757
+ if highly_relevant_docs:
758
+ relevant_docs = highly_relevant_docs
759
+ logger.info(f"Found {len(relevant_docs)} highly relevant documents")
760
+
761
+ # 返回结果
762
+ yield {"result": relevant_docs}
763
+
764
+ def _process_document_chunking(self, relevant_docs, conversations, rag_stat, filter_time):
765
+ """第二阶段:文档分块与重排序"""
766
+ yield ("", SingleOutputMeta(
767
+ generated_tokens_count=0,
768
+ reasoning_content=get_message_with_format_and_newline(
769
+ "dynamic_chunking_start",
770
+ model=rag_stat.chunk_stat.model_name
771
+ )
772
+ ))
773
+
774
+ # 默认值
775
+ first_round_full_docs = []
776
+ second_round_extracted_docs = []
777
+ sencond_round_time = 0
778
+
779
+ if self.tokenizer is not None:
780
+ token_limiter = TokenLimiter(
781
+ count_tokens=self.count_tokens,
782
+ full_text_limit=self.full_text_limit,
783
+ segment_limit=self.segment_limit,
784
+ buff_limit=self.buff_limit,
785
+ llm=self.llm,
786
+ disable_segment_reorder=self.args.disable_segment_reorder,
791
787
  )
792
788
 
793
- logger.info(
794
- f"Start to send to model {target_model} with {request_tokens} tokens")
795
-
796
- yield ("", SingleOutputMeta(input_tokens_count=rag_stat.recall_stat.total_input_tokens + rag_stat.chunk_stat.total_input_tokens,
797
- generated_tokens_count=rag_stat.recall_stat.total_generated_tokens +
798
- rag_stat.chunk_stat.total_generated_tokens,
799
- reasoning_content=get_message_with_format_and_newline(
800
- "send_to_model",
801
- model=target_model,
802
- tokens=request_tokens
803
- )
804
- ))
805
-
806
- yield ("", SingleOutputMeta(input_tokens_count=rag_stat.recall_stat.total_input_tokens + rag_stat.chunk_stat.total_input_tokens,
807
- generated_tokens_count=rag_stat.recall_stat.total_generated_tokens +
808
- rag_stat.chunk_stat.total_generated_tokens,
809
- reasoning_content="qa_model_thinking"
810
- ))
811
-
812
- if LLMComputeEngine is not None and not self.args.disable_inference_enhance:
813
- llm_compute_engine = LLMComputeEngine(
814
- llm=target_llm,
815
- inference_enhance=not self.args.disable_inference_enhance,
816
- inference_deep_thought=self.args.inference_deep_thought,
817
- precision=self.args.inference_compute_precision,
818
- data_cells_max_num=self.args.data_cells_max_num,
819
- debug=False,
820
- )
821
- new_conversations = llm_compute_engine.process_conversation(
822
- conversations, query, [
823
- doc.source_code for doc in relevant_docs]
824
- )
825
- chunks = llm_compute_engine.stream_chat_oai(
826
- conversations=new_conversations,
827
- model=model,
828
- role_mapping=role_mapping,
829
- llm_config=llm_config,
830
- delta_mode=True,
831
- )
832
-
833
- for chunk in chunks:
834
- if chunk[1] is not None:
835
- rag_stat.answer_stat.total_input_tokens += chunk[1].input_tokens_count
836
- rag_stat.answer_stat.total_generated_tokens += chunk[1].generated_tokens_count
837
- chunk[1].input_tokens_count = rag_stat.recall_stat.total_input_tokens + \
838
- rag_stat.chunk_stat.total_input_tokens + \
839
- rag_stat.answer_stat.total_input_tokens
840
- chunk[1].generated_tokens_count = rag_stat.recall_stat.total_generated_tokens + \
841
- rag_stat.chunk_stat.total_generated_tokens + \
842
- rag_stat.answer_stat.total_generated_tokens
843
- yield chunk
844
-
845
- self._print_rag_stats(rag_stat)
846
- else:
789
+ token_limiter_result = token_limiter.limit_tokens(
790
+ relevant_docs=relevant_docs,
791
+ conversations=conversations,
792
+ index_filter_workers=self.args.index_filter_workers or 5,
793
+ )
847
794
 
848
- qa_strategy = get_qa_strategy(self.args)
849
- new_conversations = qa_strategy.create_conversation(
850
- documents=[doc.source_code for doc in relevant_docs],
851
- conversations=conversations, local_image_host=self.args.local_image_host
852
- )
795
+ # 更新统计信息
796
+ rag_stat.chunk_stat.total_input_tokens += sum(token_limiter_result.input_tokens_counts)
797
+ rag_stat.chunk_stat.total_generated_tokens += sum(token_limiter_result.generated_tokens_counts)
798
+ rag_stat.chunk_stat.model_name = token_limiter_result.model_name
853
799
 
854
- # 保存 new_conversations
855
- try:
856
- logger.info(f"Saving new_conversations log to {self.args.source_dir}/.cache/logs")
857
- project_root = self.args.source_dir
858
- json_text = json.dumps(new_conversations, ensure_ascii=False)
859
- save_formatted_log(project_root, json_text, "rag_conversation")
860
- except Exception as e:
861
- logger.warning(f"Failed to save new_conversations log: {e}")
862
-
863
- chunks = target_llm.stream_chat_oai(
864
- conversations=new_conversations,
865
- model=model,
866
- role_mapping=role_mapping,
867
- llm_config=llm_config,
868
- delta_mode=True,
869
- extra_request_params=extra_request_params
870
- )
800
+ final_relevant_docs = token_limiter_result.docs
801
+ first_round_full_docs = token_limiter.first_round_full_docs
802
+ second_round_extracted_docs = token_limiter.second_round_extracted_docs
803
+ sencond_round_time = token_limiter.sencond_round_time
804
+ else:
805
+ # 如果没有tokenizer,直接限制文档数量
806
+ final_relevant_docs = relevant_docs[: self.args.index_filter_file_num]
807
+
808
+ # 输出分块结果统计
809
+ yield ("", SingleOutputMeta(
810
+ generated_tokens_count=rag_stat.chunk_stat.total_generated_tokens + rag_stat.recall_stat.total_generated_tokens,
811
+ input_tokens_count=rag_stat.chunk_stat.total_input_tokens + rag_stat.recall_stat.total_input_tokens,
812
+ reasoning_content=get_message_with_format_and_newline(
813
+ "dynamic_chunking_result",
814
+ model=rag_stat.chunk_stat.model_name,
815
+ docs_num=len(final_relevant_docs),
816
+ filter_time=filter_time,
817
+ sencond_round_time=sencond_round_time,
818
+ first_round_full_docs=len(first_round_full_docs),
819
+ second_round_extracted_docs=len(second_round_extracted_docs),
820
+ input_tokens=rag_stat.chunk_stat.total_input_tokens,
821
+ output_tokens=rag_stat.chunk_stat.total_generated_tokens
822
+ )
823
+ ))
824
+
825
+ # 返回处理结果和相关统计信息
826
+ yield {
827
+ "result": final_relevant_docs,
828
+ "filter_time": filter_time,
829
+ "first_round_full_docs": first_round_full_docs,
830
+ "second_round_extracted_docs": second_round_extracted_docs,
831
+ "sencond_round_time": sencond_round_time
832
+ }
833
+
834
+ def _process_qa_generation(self, relevant_docs, conversations,
835
+ target_llm,
836
+ rag_stat,
837
+ model=None,
838
+ role_mapping=None,
839
+ llm_config={},
840
+ extra_request_params={}):
841
+ """第三阶段:大模型问答生成"""
842
+
843
+ # 使用LLMComputeEngine增强处理(如果可用)
844
+ if LLMComputeEngine is not None and not self.args.disable_inference_enhance:
845
+ llm_compute_engine = LLMComputeEngine(
846
+ llm=target_llm,
847
+ inference_enhance=not self.args.disable_inference_enhance,
848
+ inference_deep_thought=self.args.inference_deep_thought,
849
+ precision=self.args.inference_compute_precision,
850
+ data_cells_max_num=self.args.data_cells_max_num,
851
+ debug=False,
852
+ )
853
+ query = conversations[-1]["content"]
854
+ new_conversations = llm_compute_engine.process_conversation(
855
+ conversations, query, [doc.source_code for doc in relevant_docs]
856
+ )
857
+ chunks = llm_compute_engine.stream_chat_oai(
858
+ conversations=new_conversations,
859
+ model=model,
860
+ role_mapping=role_mapping,
861
+ llm_config=llm_config,
862
+ delta_mode=True,
863
+ )
871
864
 
872
- for chunk in chunks:
873
- if chunk[1] is not None:
874
- rag_stat.answer_stat.total_input_tokens += chunk[1].input_tokens_count
875
- rag_stat.answer_stat.total_generated_tokens += chunk[1].generated_tokens_count
876
- chunk[1].input_tokens_count = rag_stat.recall_stat.total_input_tokens + \
877
- rag_stat.chunk_stat.total_input_tokens + \
878
- rag_stat.answer_stat.total_input_tokens
879
- chunk[1].generated_tokens_count = rag_stat.recall_stat.total_generated_tokens + \
880
- rag_stat.chunk_stat.total_generated_tokens + \
881
- rag_stat.answer_stat.total_generated_tokens
865
+ for chunk in chunks:
866
+ if chunk[1] is not None:
867
+ rag_stat.answer_stat.total_input_tokens += chunk[1].input_tokens_count
868
+ rag_stat.answer_stat.total_generated_tokens += chunk[1].generated_tokens_count
869
+ chunk[1].input_tokens_count = rag_stat.recall_stat.total_input_tokens + \
870
+ rag_stat.chunk_stat.total_input_tokens + \
871
+ rag_stat.answer_stat.total_input_tokens
872
+ chunk[1].generated_tokens_count = rag_stat.recall_stat.total_generated_tokens + \
873
+ rag_stat.chunk_stat.total_generated_tokens + \
874
+ rag_stat.answer_stat.total_generated_tokens
875
+ yield chunk
876
+ else:
877
+ # 常规QA处理路径
878
+ qa_strategy = get_qa_strategy(self.args)
879
+ new_conversations = qa_strategy.create_conversation(
880
+ documents=[doc.source_code for doc in relevant_docs],
881
+ conversations=conversations, local_image_host=self.args.local_image_host
882
+ )
882
883
 
883
- yield chunk
884
+ # 保存对话日志
885
+ try:
886
+ logger.info(f"Saving new_conversations log to {self.args.source_dir}/.cache/logs")
887
+ project_root = self.args.source_dir
888
+ json_text = json.dumps(new_conversations, ensure_ascii=False)
889
+ save_formatted_log(project_root, json_text, "rag_conversation")
890
+ except Exception as e:
891
+ logger.warning(f"Failed to save new_conversations log: {e}")
884
892
 
885
- self._print_rag_stats(rag_stat)
893
+ # 流式生成回答
894
+ chunks = target_llm.stream_chat_oai(
895
+ conversations=new_conversations,
896
+ model=model,
897
+ role_mapping=role_mapping,
898
+ llm_config=llm_config,
899
+ delta_mode=True,
900
+ extra_request_params=extra_request_params
901
+ )
886
902
 
887
- return generate_sream(), context
903
+ # 返回结果并更新统计信息
904
+ for chunk in chunks:
905
+ if chunk[1] is not None:
906
+ rag_stat.answer_stat.total_input_tokens += chunk[1].input_tokens_count
907
+ rag_stat.answer_stat.total_generated_tokens += chunk[1].generated_tokens_count
908
+ chunk[1].input_tokens_count = rag_stat.recall_stat.total_input_tokens + \
909
+ rag_stat.chunk_stat.total_input_tokens + \
910
+ rag_stat.answer_stat.total_input_tokens
911
+ chunk[1].generated_tokens_count = rag_stat.recall_stat.total_generated_tokens + \
912
+ rag_stat.chunk_stat.total_generated_tokens + \
913
+ rag_stat.answer_stat.total_generated_tokens
914
+ yield chunk
888
915
 
889
916
  def _print_rag_stats(self, rag_stat: RAGStat) -> None:
890
917
  """打印RAG执行的详细统计信息"""