auto-coder 0.1.362__py3-none-any.whl → 0.1.364__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of auto-coder might be problematic. Click here for more details.
- {auto_coder-0.1.362.dist-info → auto_coder-0.1.364.dist-info}/METADATA +2 -2
- {auto_coder-0.1.362.dist-info → auto_coder-0.1.364.dist-info}/RECORD +65 -22
- autocoder/agent/base_agentic/__init__.py +0 -0
- autocoder/agent/base_agentic/agent_hub.py +169 -0
- autocoder/agent/base_agentic/agentic_lang.py +112 -0
- autocoder/agent/base_agentic/agentic_tool_display.py +180 -0
- autocoder/agent/base_agentic/base_agent.py +1582 -0
- autocoder/agent/base_agentic/default_tools.py +683 -0
- autocoder/agent/base_agentic/test_base_agent.py +82 -0
- autocoder/agent/base_agentic/tool_registry.py +425 -0
- autocoder/agent/base_agentic/tools/__init__.py +12 -0
- autocoder/agent/base_agentic/tools/ask_followup_question_tool_resolver.py +72 -0
- autocoder/agent/base_agentic/tools/attempt_completion_tool_resolver.py +37 -0
- autocoder/agent/base_agentic/tools/base_tool_resolver.py +35 -0
- autocoder/agent/base_agentic/tools/example_tool_resolver.py +46 -0
- autocoder/agent/base_agentic/tools/execute_command_tool_resolver.py +72 -0
- autocoder/agent/base_agentic/tools/list_files_tool_resolver.py +110 -0
- autocoder/agent/base_agentic/tools/plan_mode_respond_tool_resolver.py +35 -0
- autocoder/agent/base_agentic/tools/read_file_tool_resolver.py +54 -0
- autocoder/agent/base_agentic/tools/replace_in_file_tool_resolver.py +156 -0
- autocoder/agent/base_agentic/tools/search_files_tool_resolver.py +134 -0
- autocoder/agent/base_agentic/tools/talk_to_group_tool_resolver.py +96 -0
- autocoder/agent/base_agentic/tools/talk_to_tool_resolver.py +79 -0
- autocoder/agent/base_agentic/tools/use_mcp_tool_resolver.py +44 -0
- autocoder/agent/base_agentic/tools/write_to_file_tool_resolver.py +58 -0
- autocoder/agent/base_agentic/types.py +189 -0
- autocoder/agent/base_agentic/utils.py +100 -0
- autocoder/auto_coder_runner.py +6 -4
- autocoder/chat/conf_command.py +11 -10
- autocoder/common/__init__.py +2 -0
- autocoder/common/file_checkpoint/__init__.py +21 -0
- autocoder/common/file_checkpoint/backup.py +264 -0
- autocoder/common/file_checkpoint/examples.py +217 -0
- autocoder/common/file_checkpoint/manager.py +404 -0
- autocoder/common/file_checkpoint/models.py +156 -0
- autocoder/common/file_checkpoint/store.py +383 -0
- autocoder/common/file_checkpoint/test_backup.py +242 -0
- autocoder/common/file_checkpoint/test_manager.py +570 -0
- autocoder/common/file_checkpoint/test_models.py +360 -0
- autocoder/common/file_checkpoint/test_store.py +327 -0
- autocoder/common/file_checkpoint/test_utils.py +297 -0
- autocoder/common/file_checkpoint/utils.py +119 -0
- autocoder/common/rulefiles/autocoderrules_utils.py +138 -55
- autocoder/common/save_formatted_log.py +76 -5
- autocoder/common/v2/agent/agentic_edit.py +339 -216
- autocoder/common/v2/agent/agentic_edit_tools/read_file_tool_resolver.py +2 -2
- autocoder/common/v2/agent/agentic_edit_tools/replace_in_file_tool_resolver.py +100 -5
- autocoder/common/v2/agent/agentic_edit_tools/test_write_to_file_tool_resolver.py +322 -0
- autocoder/common/v2/agent/agentic_edit_tools/write_to_file_tool_resolver.py +160 -10
- autocoder/common/v2/agent/agentic_edit_types.py +1 -2
- autocoder/common/v2/agent/agentic_tool_display.py +2 -3
- autocoder/compilers/normal_compiler.py +64 -0
- autocoder/events/event_manager_singleton.py +133 -4
- autocoder/linters/normal_linter.py +373 -0
- autocoder/linters/python_linter.py +4 -2
- autocoder/rag/long_context_rag.py +424 -397
- autocoder/rag/test_doc_filter.py +393 -0
- autocoder/rag/test_long_context_rag.py +473 -0
- autocoder/rag/test_token_limiter.py +342 -0
- autocoder/shadows/shadow_manager.py +1 -3
- autocoder/version.py +1 -1
- {auto_coder-0.1.362.dist-info → auto_coder-0.1.364.dist-info}/LICENSE +0 -0
- {auto_coder-0.1.362.dist-info → auto_coder-0.1.364.dist-info}/WHEEL +0 -0
- {auto_coder-0.1.362.dist-info → auto_coder-0.1.364.dist-info}/entry_points.txt +0 -0
- {auto_coder-0.1.362.dist-info → auto_coder-0.1.364.dist-info}/top_level.txt +0 -0
|
@@ -3,30 +3,17 @@ import os
|
|
|
3
3
|
import time
|
|
4
4
|
from typing import Any, Dict, Generator, List, Optional, Tuple
|
|
5
5
|
|
|
6
|
-
import byzerllm
|
|
7
|
-
import pandas as pd
|
|
8
6
|
import pathspec
|
|
9
7
|
from byzerllm import ByzerLLM
|
|
10
8
|
from loguru import logger
|
|
11
9
|
from openai import OpenAI
|
|
12
|
-
from rich.console import Console
|
|
13
|
-
from rich.panel import Panel
|
|
14
|
-
from rich.table import Table
|
|
15
10
|
import statistics
|
|
16
11
|
import traceback
|
|
17
12
|
|
|
18
13
|
from autocoder.common import AutoCoderArgs, SourceCode
|
|
19
14
|
from autocoder.rag.doc_filter import DocFilter
|
|
20
15
|
from autocoder.rag.document_retriever import LocalDocumentRetriever
|
|
21
|
-
from autocoder.rag.relevant_utils import
|
|
22
|
-
DocRelevance,
|
|
23
|
-
FilterDoc,
|
|
24
|
-
TaskTiming,
|
|
25
|
-
parse_relevance,
|
|
26
|
-
ProgressUpdate,
|
|
27
|
-
DocFilterResult
|
|
28
|
-
)
|
|
29
|
-
from autocoder.rag.token_checker import check_token_limit
|
|
16
|
+
from autocoder.rag.relevant_utils import DocFilterResult
|
|
30
17
|
from autocoder.rag.token_counter import RemoteTokenCounter, TokenCounter,count_tokens
|
|
31
18
|
from autocoder.rag.token_limiter import TokenLimiter
|
|
32
19
|
from tokenizers import Tokenizer
|
|
@@ -235,35 +222,7 @@ class LongContextRAG:
|
|
|
235
222
|
def count_tokens(self, text: str) -> int:
|
|
236
223
|
if self.tokenizer is None:
|
|
237
224
|
return -1
|
|
238
|
-
return self.tokenizer.count_tokens(text)
|
|
239
|
-
|
|
240
|
-
@byzerllm.prompt()
|
|
241
|
-
def extract_relevance_info_from_docs_with_conversation(
|
|
242
|
-
self, conversations: List[Dict[str, str]], documents: List[str]
|
|
243
|
-
) -> str:
|
|
244
|
-
"""
|
|
245
|
-
使用以下文档和对话历史来提取相关信息。
|
|
246
|
-
|
|
247
|
-
文档:
|
|
248
|
-
<documents>
|
|
249
|
-
{% for doc in documents %}
|
|
250
|
-
{{ doc }}
|
|
251
|
-
{% endfor %}
|
|
252
|
-
</documents>
|
|
253
|
-
|
|
254
|
-
对话历史:
|
|
255
|
-
<conversations>
|
|
256
|
-
{% for msg in conversations %}
|
|
257
|
-
[{{ msg.role }}]:
|
|
258
|
-
{{ msg.content }}
|
|
259
|
-
|
|
260
|
-
{% endfor %}
|
|
261
|
-
</conversations>
|
|
262
|
-
|
|
263
|
-
请根据提供的文档内容、用户对话历史以及最后一个问题,提取并总结文档中与问题相关的重要信息。
|
|
264
|
-
如果文档中没有相关信息,请回复"该文档中没有与问题相关的信息"。
|
|
265
|
-
提取的信息尽量保持和原文中的一样,并且只输出这些信息。
|
|
266
|
-
"""
|
|
225
|
+
return self.tokenizer.count_tokens(text)
|
|
267
226
|
|
|
268
227
|
def _get_document_retriever_class(self):
|
|
269
228
|
"""Get the document retriever class based on configuration."""
|
|
@@ -338,7 +297,8 @@ class LongContextRAG:
|
|
|
338
297
|
conversations=[{"role": "user", "content": target_query}]
|
|
339
298
|
)
|
|
340
299
|
url = ",".join(contexts)
|
|
341
|
-
|
|
300
|
+
result = (item for (item,_) in v)
|
|
301
|
+
return [SourceCode(module_name=f"RAG:{url}", source_code="".join(result))]
|
|
342
302
|
|
|
343
303
|
def _filter_docs(self, conversations: List[Dict[str, str]]) -> DocFilterResult:
|
|
344
304
|
query = conversations[-1]["content"]
|
|
@@ -407,6 +367,12 @@ class LongContextRAG:
|
|
|
407
367
|
llm_config: Dict[str, Any] = {},
|
|
408
368
|
extra_request_params: Dict[str, Any] = {}
|
|
409
369
|
):
|
|
370
|
+
if not llm_config:
|
|
371
|
+
llm_config = {}
|
|
372
|
+
|
|
373
|
+
if extra_request_params:
|
|
374
|
+
llm_config.update(extra_request_params)
|
|
375
|
+
|
|
410
376
|
conversations = OpenAIContentProcessor.process_conversations(conversations)
|
|
411
377
|
if self.client:
|
|
412
378
|
model = model or self.args.model
|
|
@@ -423,31 +389,8 @@ class LongContextRAG:
|
|
|
423
389
|
if self.llm.get_sub_client("qa_model"):
|
|
424
390
|
target_llm = self.llm.get_sub_client("qa_model")
|
|
425
391
|
|
|
426
|
-
query = conversations[-1]["content"]
|
|
427
|
-
|
|
392
|
+
query = conversations[-1]["content"]
|
|
428
393
|
context = []
|
|
429
|
-
|
|
430
|
-
if (
|
|
431
|
-
"使用四到五个字直接返回这句话的简要主题,不要解释、不要标点、不要语气词、不要多余文本,不要加粗,如果没有主题"
|
|
432
|
-
in query
|
|
433
|
-
or "简要总结一下对话内容,用作后续的上下文提示 prompt,控制在 200 字以内"
|
|
434
|
-
in query
|
|
435
|
-
):
|
|
436
|
-
|
|
437
|
-
chunks = target_llm.stream_chat_oai(
|
|
438
|
-
conversations=conversations,
|
|
439
|
-
model=model,
|
|
440
|
-
role_mapping=role_mapping,
|
|
441
|
-
llm_config=llm_config,
|
|
442
|
-
delta_mode=True,
|
|
443
|
-
extra_request_params=extra_request_params
|
|
444
|
-
)
|
|
445
|
-
|
|
446
|
-
def generate_chunks():
|
|
447
|
-
for chunk in chunks:
|
|
448
|
-
yield chunk
|
|
449
|
-
return generate_chunks(), context
|
|
450
|
-
|
|
451
394
|
try:
|
|
452
395
|
request_params = json.loads(query)
|
|
453
396
|
if "request_id" in request_params:
|
|
@@ -539,352 +482,436 @@ class LongContextRAG:
|
|
|
539
482
|
|
|
540
483
|
context = []
|
|
541
484
|
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
485
|
+
return self._generate_sream(
|
|
486
|
+
conversations=conversations,
|
|
487
|
+
query=query,
|
|
488
|
+
only_contexts=only_contexts,
|
|
489
|
+
start_time=start_time,
|
|
490
|
+
rag_stat=rag_stat,
|
|
491
|
+
context=context,
|
|
492
|
+
target_llm=target_llm,
|
|
493
|
+
model=model,
|
|
494
|
+
role_mapping=role_mapping,
|
|
495
|
+
llm_config=llm_config,
|
|
496
|
+
extra_request_params=extra_request_params
|
|
497
|
+
), context
|
|
498
|
+
|
|
499
|
+
|
|
500
|
+
def _generate_sream(
|
|
501
|
+
self,
|
|
502
|
+
conversations,
|
|
503
|
+
query,
|
|
504
|
+
only_contexts,
|
|
505
|
+
start_time,
|
|
506
|
+
rag_stat,
|
|
507
|
+
context,
|
|
508
|
+
target_llm,
|
|
509
|
+
model=None,
|
|
510
|
+
role_mapping=None,
|
|
511
|
+
llm_config=None,
|
|
512
|
+
extra_request_params=None
|
|
513
|
+
):
|
|
514
|
+
"""将RAG流程分为三个主要阶段的生成器函数"""
|
|
515
|
+
# 第一阶段:文档召回和过滤
|
|
516
|
+
doc_retrieval_generator = self._process_document_retrieval(
|
|
517
|
+
conversations=conversations,
|
|
518
|
+
query=query,
|
|
519
|
+
rag_stat=rag_stat
|
|
520
|
+
)
|
|
521
|
+
|
|
522
|
+
# 处理第一阶段结果
|
|
523
|
+
for item in doc_retrieval_generator:
|
|
524
|
+
if isinstance(item, tuple) and len(item) == 2:
|
|
525
|
+
# 正常的生成器项,包含yield内容和元数据
|
|
526
|
+
yield item
|
|
527
|
+
elif isinstance(item, dict) and "result" in item:
|
|
528
|
+
# 如果是只返回上下文的情况
|
|
529
|
+
if only_contexts:
|
|
530
|
+
try:
|
|
531
|
+
searcher = SearchableResults()
|
|
532
|
+
result = searcher.reorder(docs=item["result"])
|
|
533
|
+
yield (json.dumps(result.model_dump(), ensure_ascii=False), SingleOutputMeta(
|
|
534
|
+
input_tokens_count=rag_stat.recall_stat.total_input_tokens + rag_stat.chunk_stat.total_input_tokens,
|
|
535
|
+
generated_tokens_count=rag_stat.recall_stat.total_generated_tokens + rag_stat.chunk_stat.total_generated_tokens,
|
|
536
|
+
))
|
|
537
|
+
return
|
|
538
|
+
except Exception as e:
|
|
539
|
+
yield (str(e), SingleOutputMeta(
|
|
540
|
+
input_tokens_count=rag_stat.recall_stat.total_input_tokens + rag_stat.chunk_stat.total_input_tokens,
|
|
541
|
+
generated_tokens_count=rag_stat.recall_stat.total_generated_tokens + rag_stat.chunk_stat.total_generated_tokens,
|
|
542
|
+
))
|
|
543
|
+
return
|
|
544
|
+
|
|
545
|
+
# 如果没有找到相关文档
|
|
546
|
+
if not item["result"]:
|
|
547
|
+
yield ("没有找到可以回答你问题的相关文档", SingleOutputMeta(
|
|
548
|
+
input_tokens_count=rag_stat.recall_stat.total_input_tokens + rag_stat.chunk_stat.total_input_tokens,
|
|
549
|
+
generated_tokens_count=rag_stat.recall_stat.total_generated_tokens + rag_stat.chunk_stat.total_generated_tokens,
|
|
577
550
|
))
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
reasoning_content=get_message_with_format_and_newline(
|
|
591
|
-
"rag_docs_filter_result",
|
|
592
|
-
filter_time=filter_time,
|
|
593
|
-
docs_num=len(relevant_docs),
|
|
594
|
-
input_tokens=rag_stat.recall_stat.total_input_tokens,
|
|
595
|
-
output_tokens=rag_stat.recall_stat.total_generated_tokens,
|
|
596
|
-
model=rag_stat.recall_stat.model_name
|
|
597
|
-
)
|
|
598
|
-
))
|
|
599
|
-
|
|
600
|
-
# Filter relevant_docs to only include those with is_relevant=True
|
|
601
|
-
highly_relevant_docs = [
|
|
602
|
-
doc for doc in relevant_docs if doc.relevance.is_relevant
|
|
603
|
-
]
|
|
604
|
-
|
|
605
|
-
if highly_relevant_docs:
|
|
606
|
-
relevant_docs = highly_relevant_docs
|
|
607
|
-
logger.info(
|
|
608
|
-
f"Found {len(relevant_docs)} highly relevant documents")
|
|
609
|
-
|
|
610
|
-
logger.info(
|
|
611
|
-
f"Filter time: {filter_time:.2f} seconds with {len(relevant_docs)} docs"
|
|
612
|
-
)
|
|
613
|
-
|
|
614
|
-
if only_contexts:
|
|
615
|
-
try:
|
|
616
|
-
searcher = SearchableResults()
|
|
617
|
-
result = searcher.reorder(docs=relevant_docs)
|
|
618
|
-
yield (json.dumps(result.model_dump(), ensure_ascii=False), SingleOutputMeta(input_tokens_count=rag_stat.recall_stat.total_input_tokens + rag_stat.chunk_stat.total_input_tokens,
|
|
619
|
-
generated_tokens_count=rag_stat.recall_stat.total_generated_tokens +
|
|
620
|
-
rag_stat.chunk_stat.total_generated_tokens,
|
|
621
|
-
))
|
|
622
|
-
except Exception as e:
|
|
623
|
-
yield (str(e), SingleOutputMeta(input_tokens_count=rag_stat.recall_stat.total_input_tokens + rag_stat.chunk_stat.total_input_tokens,
|
|
624
|
-
generated_tokens_count=rag_stat.recall_stat.total_generated_tokens +
|
|
625
|
-
rag_stat.chunk_stat.total_generated_tokens,
|
|
626
|
-
))
|
|
627
|
-
return
|
|
628
|
-
|
|
629
|
-
if not relevant_docs:
|
|
630
|
-
yield ("没有找到可以回答你问题的相关文档", SingleOutputMeta(input_tokens_count=rag_stat.recall_stat.total_input_tokens + rag_stat.chunk_stat.total_input_tokens,
|
|
631
|
-
generated_tokens_count=rag_stat.recall_stat.total_generated_tokens +
|
|
632
|
-
rag_stat.chunk_stat.total_generated_tokens,
|
|
633
|
-
))
|
|
634
|
-
return
|
|
635
|
-
|
|
636
|
-
context = [doc.source_code.module_name for doc in relevant_docs]
|
|
637
|
-
|
|
638
|
-
yield ("", SingleOutputMeta(input_tokens_count=rag_stat.recall_stat.total_input_tokens + rag_stat.chunk_stat.total_input_tokens,
|
|
639
|
-
generated_tokens_count=rag_stat.recall_stat.total_generated_tokens +
|
|
640
|
-
rag_stat.chunk_stat.total_generated_tokens,
|
|
641
|
-
reasoning_content=get_message_with_format_and_newline(
|
|
642
|
-
"context_docs_names",
|
|
643
|
-
context_docs_names=",".join(
|
|
644
|
-
context))
|
|
645
|
-
))
|
|
646
|
-
|
|
647
|
-
# 将 FilterDoc 转化为 SourceCode 方便后续的逻辑继续做处理
|
|
648
|
-
relevant_docs = [doc.source_code for doc in relevant_docs]
|
|
649
|
-
|
|
650
|
-
logger.info(f"=== RAG Search Results ===")
|
|
651
|
-
logger.info(f"Query: {query}")
|
|
652
|
-
logger.info(f"Found relevant docs: {len(relevant_docs)}")
|
|
653
|
-
|
|
654
|
-
# 记录相关文档信息
|
|
655
|
-
relevant_docs_info = []
|
|
656
|
-
for i, doc in enumerate(relevant_docs):
|
|
657
|
-
doc_path = doc.module_name.replace(self.path, '', 1)
|
|
658
|
-
info = f"{i+1}. {doc_path}"
|
|
659
|
-
if "original_docs" in doc.metadata:
|
|
660
|
-
original_docs = ", ".join(
|
|
661
|
-
[
|
|
662
|
-
doc.replace(self.path, "", 1)
|
|
663
|
-
for doc in doc.metadata["original_docs"]
|
|
664
|
-
]
|
|
551
|
+
return
|
|
552
|
+
|
|
553
|
+
# 更新上下文
|
|
554
|
+
context.extend([doc.source_code.module_name for doc in item["result"]])
|
|
555
|
+
|
|
556
|
+
# 输出上下文文档名称
|
|
557
|
+
yield ("", SingleOutputMeta(
|
|
558
|
+
input_tokens_count=rag_stat.recall_stat.total_input_tokens + rag_stat.chunk_stat.total_input_tokens,
|
|
559
|
+
generated_tokens_count=rag_stat.recall_stat.total_generated_tokens + rag_stat.chunk_stat.total_generated_tokens,
|
|
560
|
+
reasoning_content=get_message_with_format_and_newline(
|
|
561
|
+
"context_docs_names",
|
|
562
|
+
context_docs_names=",".join(context)
|
|
665
563
|
)
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
logger.info(
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
564
|
+
))
|
|
565
|
+
|
|
566
|
+
# 记录信息到日志
|
|
567
|
+
logger.info(f"=== RAG Search Results ===")
|
|
568
|
+
logger.info(f"Query: {query}")
|
|
569
|
+
relevant_docs = [doc.source_code for doc in item["result"]]
|
|
570
|
+
logger.info(f"Found relevant docs: {len(relevant_docs)}")
|
|
571
|
+
|
|
572
|
+
# 记录相关文档信息
|
|
573
|
+
relevant_docs_info = []
|
|
574
|
+
for i, doc in enumerate(relevant_docs):
|
|
575
|
+
doc_path = doc.module_name.replace(self.path, '', 1)
|
|
576
|
+
info = f"{i+1}. {doc_path}"
|
|
577
|
+
if "original_docs" in doc.metadata:
|
|
578
|
+
original_docs = ", ".join(
|
|
579
|
+
[
|
|
580
|
+
doc.replace(self.path, "", 1)
|
|
581
|
+
for doc in doc.metadata["original_docs"]
|
|
582
|
+
]
|
|
583
|
+
)
|
|
584
|
+
info += f" (Original docs: {original_docs})"
|
|
585
|
+
relevant_docs_info.append(info)
|
|
586
|
+
|
|
587
|
+
if relevant_docs_info:
|
|
588
|
+
logger.info(
|
|
589
|
+
f"Relevant documents list:"
|
|
590
|
+
+ "".join([f"\n * {info}" for info in relevant_docs_info])
|
|
591
|
+
)
|
|
592
|
+
|
|
593
|
+
# 第二阶段:文档分块与重排序
|
|
594
|
+
doc_chunking_generator = self._process_document_chunking(
|
|
697
595
|
relevant_docs=relevant_docs,
|
|
698
596
|
conversations=conversations,
|
|
699
|
-
|
|
597
|
+
rag_stat=rag_stat,
|
|
598
|
+
filter_time=(time.time() - start_time)
|
|
700
599
|
)
|
|
600
|
+
|
|
601
|
+
for chunking_item in doc_chunking_generator:
|
|
602
|
+
if isinstance(chunking_item, tuple) and len(chunking_item) == 2:
|
|
603
|
+
# 正常的生成器项
|
|
604
|
+
yield chunking_item
|
|
605
|
+
elif isinstance(chunking_item, dict) and "result" in chunking_item:
|
|
606
|
+
processed_docs = chunking_item["result"]
|
|
607
|
+
filter_time = chunking_item.get("filter_time", 0)
|
|
608
|
+
first_round_full_docs = chunking_item.get("first_round_full_docs", [])
|
|
609
|
+
second_round_extracted_docs = chunking_item.get("second_round_extracted_docs", [])
|
|
610
|
+
sencond_round_time = chunking_item.get("sencond_round_time", 0)
|
|
611
|
+
|
|
612
|
+
# 记录最终选择的文档详情
|
|
613
|
+
final_relevant_docs_info = []
|
|
614
|
+
for i, doc in enumerate(processed_docs):
|
|
615
|
+
doc_path = doc.module_name.replace(self.path, '', 1)
|
|
616
|
+
info = f"{i+1}. {doc_path}"
|
|
617
|
+
|
|
618
|
+
metadata_info = []
|
|
619
|
+
if "original_docs" in doc.metadata:
|
|
620
|
+
original_docs = ", ".join(
|
|
621
|
+
[
|
|
622
|
+
od.replace(self.path, "", 1)
|
|
623
|
+
for od in doc.metadata["original_docs"]
|
|
624
|
+
]
|
|
625
|
+
)
|
|
626
|
+
metadata_info.append(f"Original docs: {original_docs}")
|
|
627
|
+
|
|
628
|
+
if "chunk_ranges" in doc.metadata:
|
|
629
|
+
chunk_ranges = json.dumps(
|
|
630
|
+
doc.metadata["chunk_ranges"], ensure_ascii=False
|
|
631
|
+
)
|
|
632
|
+
metadata_info.append(f"Chunk ranges: {chunk_ranges}")
|
|
633
|
+
|
|
634
|
+
if "processing_time" in doc.metadata:
|
|
635
|
+
metadata_info.append(
|
|
636
|
+
f"Processing time: {doc.metadata['processing_time']:.2f}s")
|
|
637
|
+
|
|
638
|
+
if metadata_info:
|
|
639
|
+
info += f" ({'; '.join(metadata_info)})"
|
|
640
|
+
|
|
641
|
+
final_relevant_docs_info.append(info)
|
|
642
|
+
|
|
643
|
+
if final_relevant_docs_info:
|
|
644
|
+
logger.info(
|
|
645
|
+
f"Final documents to be sent to model:"
|
|
646
|
+
+ "".join([f"\n * {info}" for info in final_relevant_docs_info])
|
|
647
|
+
)
|
|
648
|
+
|
|
649
|
+
# 记录令牌统计
|
|
650
|
+
request_tokens = sum([count_tokens(doc.source_code) for doc in processed_docs])
|
|
651
|
+
target_model = target_llm.default_model_name
|
|
652
|
+
logger.info(
|
|
653
|
+
f"=== LLM Request ===\n"
|
|
654
|
+
f" * Target model: {target_model}\n"
|
|
655
|
+
f" * Total tokens: {request_tokens}"
|
|
656
|
+
)
|
|
657
|
+
|
|
658
|
+
logger.info(
|
|
659
|
+
f"Start to send to model {target_model} with {request_tokens} tokens")
|
|
660
|
+
|
|
661
|
+
yield ("", SingleOutputMeta(
|
|
662
|
+
input_tokens_count=rag_stat.recall_stat.total_input_tokens + rag_stat.chunk_stat.total_input_tokens,
|
|
663
|
+
generated_tokens_count=rag_stat.recall_stat.total_generated_tokens + rag_stat.chunk_stat.total_generated_tokens,
|
|
664
|
+
reasoning_content=get_message_with_format_and_newline(
|
|
665
|
+
"send_to_model",
|
|
666
|
+
model=target_model,
|
|
667
|
+
tokens=request_tokens
|
|
668
|
+
)
|
|
669
|
+
))
|
|
670
|
+
|
|
671
|
+
yield ("", SingleOutputMeta(
|
|
672
|
+
input_tokens_count=rag_stat.recall_stat.total_input_tokens + rag_stat.chunk_stat.total_input_tokens,
|
|
673
|
+
generated_tokens_count=rag_stat.recall_stat.total_generated_tokens + rag_stat.chunk_stat.total_generated_tokens,
|
|
674
|
+
reasoning_content="qa_model_thinking"
|
|
675
|
+
))
|
|
676
|
+
|
|
677
|
+
# 第三阶段:大模型问答生成
|
|
678
|
+
qa_generation_generator = self._process_qa_generation(
|
|
679
|
+
relevant_docs=processed_docs,
|
|
680
|
+
conversations=conversations,
|
|
681
|
+
target_llm=target_llm,
|
|
682
|
+
rag_stat=rag_stat,
|
|
683
|
+
model=model,
|
|
684
|
+
role_mapping=role_mapping,
|
|
685
|
+
llm_config=llm_config,
|
|
686
|
+
extra_request_params=extra_request_params
|
|
687
|
+
)
|
|
688
|
+
|
|
689
|
+
for gen_item in qa_generation_generator:
|
|
690
|
+
yield gen_item
|
|
691
|
+
|
|
692
|
+
# 打印最终的统计信息
|
|
693
|
+
self._print_rag_stats(rag_stat)
|
|
694
|
+
return
|
|
695
|
+
|
|
696
|
+
def _process_document_retrieval(self, conversations,
|
|
697
|
+
query, rag_stat):
|
|
698
|
+
"""第一阶段:文档召回和过滤"""
|
|
699
|
+
yield ("", SingleOutputMeta(
|
|
700
|
+
input_tokens_count=0,
|
|
701
|
+
generated_tokens_count=0,
|
|
702
|
+
reasoning_content=get_message_with_format_and_newline(
|
|
703
|
+
"rag_searching_docs",
|
|
704
|
+
model=rag_stat.recall_stat.model_name
|
|
705
|
+
)
|
|
706
|
+
))
|
|
707
|
+
|
|
708
|
+
doc_filter_result = DocFilterResult(
|
|
709
|
+
docs=[],
|
|
710
|
+
raw_docs=[],
|
|
711
|
+
input_tokens_counts=[],
|
|
712
|
+
generated_tokens_counts=[],
|
|
713
|
+
durations=[],
|
|
714
|
+
model_name=rag_stat.recall_stat.model_name
|
|
715
|
+
)
|
|
716
|
+
|
|
717
|
+
# 提取查询并检索候选文档
|
|
718
|
+
queries = extract_search_queries(
|
|
719
|
+
conversations=conversations, args=self.args, llm=self.llm, max_queries=self.args.rag_recall_max_queries)
|
|
720
|
+
documents = self._retrieve_documents(
|
|
721
|
+
options={"queries": [query] + [query.query for query in queries]})
|
|
701
722
|
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
rag_stat.chunk_stat.model_name = token_limiter_result.model_name
|
|
707
|
-
|
|
708
|
-
final_relevant_docs = token_limiter_result.docs
|
|
709
|
-
first_round_full_docs = token_limiter.first_round_full_docs
|
|
710
|
-
second_round_extracted_docs = token_limiter.second_round_extracted_docs
|
|
711
|
-
sencond_round_time = token_limiter.sencond_round_time
|
|
712
|
-
|
|
713
|
-
relevant_docs = final_relevant_docs
|
|
723
|
+
# 使用带进度报告的过滤方法
|
|
724
|
+
for progress_update, result in self.doc_filter.filter_docs_with_progress(conversations, documents):
|
|
725
|
+
if result is not None:
|
|
726
|
+
doc_filter_result = result
|
|
714
727
|
else:
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
728
|
+
# 生成进度更新
|
|
729
|
+
yield ("", SingleOutputMeta(
|
|
730
|
+
input_tokens_count=rag_stat.recall_stat.total_input_tokens,
|
|
731
|
+
generated_tokens_count=rag_stat.recall_stat.total_generated_tokens,
|
|
732
|
+
reasoning_content=f"{progress_update.message} ({progress_update.completed}/{progress_update.total})"
|
|
733
|
+
))
|
|
734
|
+
|
|
735
|
+
# 更新统计信息
|
|
736
|
+
rag_stat.recall_stat.total_input_tokens += sum(doc_filter_result.input_tokens_counts)
|
|
737
|
+
rag_stat.recall_stat.total_generated_tokens += sum(doc_filter_result.generated_tokens_counts)
|
|
738
|
+
rag_stat.recall_stat.model_name = doc_filter_result.model_name
|
|
739
|
+
|
|
740
|
+
relevant_docs = doc_filter_result.docs
|
|
741
|
+
|
|
742
|
+
yield ("", SingleOutputMeta(
|
|
743
|
+
input_tokens_count=rag_stat.recall_stat.total_input_tokens,
|
|
744
|
+
generated_tokens_count=rag_stat.recall_stat.total_generated_tokens,
|
|
745
|
+
reasoning_content=get_message_with_format_and_newline(
|
|
746
|
+
"rag_docs_filter_result",
|
|
747
|
+
filter_time=0, # 这里实际应该计算时间,但由于重构,我们需要在外部计算
|
|
748
|
+
docs_num=len(relevant_docs),
|
|
749
|
+
input_tokens=rag_stat.recall_stat.total_input_tokens,
|
|
750
|
+
output_tokens=rag_stat.recall_stat.total_generated_tokens,
|
|
751
|
+
model=rag_stat.recall_stat.model_name
|
|
727
752
|
)
|
|
753
|
+
))
|
|
728
754
|
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
metadata_info.append(f"Original docs: {original_docs}")
|
|
762
|
-
|
|
763
|
-
if "chunk_ranges" in doc.metadata:
|
|
764
|
-
chunk_ranges = json.dumps(
|
|
765
|
-
doc.metadata["chunk_ranges"], ensure_ascii=False
|
|
766
|
-
)
|
|
767
|
-
metadata_info.append(f"Chunk ranges: {chunk_ranges}")
|
|
768
|
-
|
|
769
|
-
if "processing_time" in doc.metadata:
|
|
770
|
-
metadata_info.append(
|
|
771
|
-
f"Processing time: {doc.metadata['processing_time']:.2f}s")
|
|
772
|
-
|
|
773
|
-
if metadata_info:
|
|
774
|
-
info += f" ({'; '.join(metadata_info)})"
|
|
775
|
-
|
|
776
|
-
final_relevant_docs_info.append(info)
|
|
777
|
-
|
|
778
|
-
if final_relevant_docs_info:
|
|
779
|
-
logger.info(
|
|
780
|
-
f"Final documents to be sent to model:"
|
|
781
|
-
+ "".join([f"\n * {info}" for info in final_relevant_docs_info])
|
|
782
|
-
)
|
|
783
|
-
|
|
784
|
-
# 记录令牌统计
|
|
785
|
-
request_tokens = sum([count_tokens(doc.source_code) for doc in relevant_docs])
|
|
786
|
-
target_model = target_llm.default_model_name
|
|
787
|
-
logger.info(
|
|
788
|
-
f"=== LLM Request ===\n"
|
|
789
|
-
f" * Target model: {target_model}\n"
|
|
790
|
-
f" * Total tokens: {request_tokens}"
|
|
755
|
+
# 仅保留高相关性文档
|
|
756
|
+
highly_relevant_docs = [doc for doc in relevant_docs if doc.relevance.is_relevant]
|
|
757
|
+
if highly_relevant_docs:
|
|
758
|
+
relevant_docs = highly_relevant_docs
|
|
759
|
+
logger.info(f"Found {len(relevant_docs)} highly relevant documents")
|
|
760
|
+
|
|
761
|
+
# 返回结果
|
|
762
|
+
yield {"result": relevant_docs}
|
|
763
|
+
|
|
764
|
+
def _process_document_chunking(self, relevant_docs, conversations, rag_stat, filter_time):
|
|
765
|
+
"""第二阶段:文档分块与重排序"""
|
|
766
|
+
yield ("", SingleOutputMeta(
|
|
767
|
+
generated_tokens_count=0,
|
|
768
|
+
reasoning_content=get_message_with_format_and_newline(
|
|
769
|
+
"dynamic_chunking_start",
|
|
770
|
+
model=rag_stat.chunk_stat.model_name
|
|
771
|
+
)
|
|
772
|
+
))
|
|
773
|
+
|
|
774
|
+
# 默认值
|
|
775
|
+
first_round_full_docs = []
|
|
776
|
+
second_round_extracted_docs = []
|
|
777
|
+
sencond_round_time = 0
|
|
778
|
+
|
|
779
|
+
if self.tokenizer is not None:
|
|
780
|
+
token_limiter = TokenLimiter(
|
|
781
|
+
count_tokens=self.count_tokens,
|
|
782
|
+
full_text_limit=self.full_text_limit,
|
|
783
|
+
segment_limit=self.segment_limit,
|
|
784
|
+
buff_limit=self.buff_limit,
|
|
785
|
+
llm=self.llm,
|
|
786
|
+
disable_segment_reorder=self.args.disable_segment_reorder,
|
|
791
787
|
)
|
|
792
788
|
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
rag_stat.chunk_stat.total_generated_tokens,
|
|
799
|
-
reasoning_content=get_message_with_format_and_newline(
|
|
800
|
-
"send_to_model",
|
|
801
|
-
model=target_model,
|
|
802
|
-
tokens=request_tokens
|
|
803
|
-
)
|
|
804
|
-
))
|
|
805
|
-
|
|
806
|
-
yield ("", SingleOutputMeta(input_tokens_count=rag_stat.recall_stat.total_input_tokens + rag_stat.chunk_stat.total_input_tokens,
|
|
807
|
-
generated_tokens_count=rag_stat.recall_stat.total_generated_tokens +
|
|
808
|
-
rag_stat.chunk_stat.total_generated_tokens,
|
|
809
|
-
reasoning_content="qa_model_thinking"
|
|
810
|
-
))
|
|
811
|
-
|
|
812
|
-
if LLMComputeEngine is not None and not self.args.disable_inference_enhance:
|
|
813
|
-
llm_compute_engine = LLMComputeEngine(
|
|
814
|
-
llm=target_llm,
|
|
815
|
-
inference_enhance=not self.args.disable_inference_enhance,
|
|
816
|
-
inference_deep_thought=self.args.inference_deep_thought,
|
|
817
|
-
precision=self.args.inference_compute_precision,
|
|
818
|
-
data_cells_max_num=self.args.data_cells_max_num,
|
|
819
|
-
debug=False,
|
|
820
|
-
)
|
|
821
|
-
new_conversations = llm_compute_engine.process_conversation(
|
|
822
|
-
conversations, query, [
|
|
823
|
-
doc.source_code for doc in relevant_docs]
|
|
824
|
-
)
|
|
825
|
-
chunks = llm_compute_engine.stream_chat_oai(
|
|
826
|
-
conversations=new_conversations,
|
|
827
|
-
model=model,
|
|
828
|
-
role_mapping=role_mapping,
|
|
829
|
-
llm_config=llm_config,
|
|
830
|
-
delta_mode=True,
|
|
831
|
-
)
|
|
832
|
-
|
|
833
|
-
for chunk in chunks:
|
|
834
|
-
if chunk[1] is not None:
|
|
835
|
-
rag_stat.answer_stat.total_input_tokens += chunk[1].input_tokens_count
|
|
836
|
-
rag_stat.answer_stat.total_generated_tokens += chunk[1].generated_tokens_count
|
|
837
|
-
chunk[1].input_tokens_count = rag_stat.recall_stat.total_input_tokens + \
|
|
838
|
-
rag_stat.chunk_stat.total_input_tokens + \
|
|
839
|
-
rag_stat.answer_stat.total_input_tokens
|
|
840
|
-
chunk[1].generated_tokens_count = rag_stat.recall_stat.total_generated_tokens + \
|
|
841
|
-
rag_stat.chunk_stat.total_generated_tokens + \
|
|
842
|
-
rag_stat.answer_stat.total_generated_tokens
|
|
843
|
-
yield chunk
|
|
844
|
-
|
|
845
|
-
self._print_rag_stats(rag_stat)
|
|
846
|
-
else:
|
|
789
|
+
token_limiter_result = token_limiter.limit_tokens(
|
|
790
|
+
relevant_docs=relevant_docs,
|
|
791
|
+
conversations=conversations,
|
|
792
|
+
index_filter_workers=self.args.index_filter_workers or 5,
|
|
793
|
+
)
|
|
847
794
|
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
)
|
|
795
|
+
# 更新统计信息
|
|
796
|
+
rag_stat.chunk_stat.total_input_tokens += sum(token_limiter_result.input_tokens_counts)
|
|
797
|
+
rag_stat.chunk_stat.total_generated_tokens += sum(token_limiter_result.generated_tokens_counts)
|
|
798
|
+
rag_stat.chunk_stat.model_name = token_limiter_result.model_name
|
|
853
799
|
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
800
|
+
final_relevant_docs = token_limiter_result.docs
|
|
801
|
+
first_round_full_docs = token_limiter.first_round_full_docs
|
|
802
|
+
second_round_extracted_docs = token_limiter.second_round_extracted_docs
|
|
803
|
+
sencond_round_time = token_limiter.sencond_round_time
|
|
804
|
+
else:
|
|
805
|
+
# 如果没有tokenizer,直接限制文档数量
|
|
806
|
+
final_relevant_docs = relevant_docs[: self.args.index_filter_file_num]
|
|
807
|
+
|
|
808
|
+
# 输出分块结果统计
|
|
809
|
+
yield ("", SingleOutputMeta(
|
|
810
|
+
generated_tokens_count=rag_stat.chunk_stat.total_generated_tokens + rag_stat.recall_stat.total_generated_tokens,
|
|
811
|
+
input_tokens_count=rag_stat.chunk_stat.total_input_tokens + rag_stat.recall_stat.total_input_tokens,
|
|
812
|
+
reasoning_content=get_message_with_format_and_newline(
|
|
813
|
+
"dynamic_chunking_result",
|
|
814
|
+
model=rag_stat.chunk_stat.model_name,
|
|
815
|
+
docs_num=len(final_relevant_docs),
|
|
816
|
+
filter_time=filter_time,
|
|
817
|
+
sencond_round_time=sencond_round_time,
|
|
818
|
+
first_round_full_docs=len(first_round_full_docs),
|
|
819
|
+
second_round_extracted_docs=len(second_round_extracted_docs),
|
|
820
|
+
input_tokens=rag_stat.chunk_stat.total_input_tokens,
|
|
821
|
+
output_tokens=rag_stat.chunk_stat.total_generated_tokens
|
|
822
|
+
)
|
|
823
|
+
))
|
|
824
|
+
|
|
825
|
+
# 返回处理结果和相关统计信息
|
|
826
|
+
yield {
|
|
827
|
+
"result": final_relevant_docs,
|
|
828
|
+
"filter_time": filter_time,
|
|
829
|
+
"first_round_full_docs": first_round_full_docs,
|
|
830
|
+
"second_round_extracted_docs": second_round_extracted_docs,
|
|
831
|
+
"sencond_round_time": sencond_round_time
|
|
832
|
+
}
|
|
833
|
+
|
|
834
|
+
def _process_qa_generation(self, relevant_docs, conversations,
|
|
835
|
+
target_llm,
|
|
836
|
+
rag_stat,
|
|
837
|
+
model=None,
|
|
838
|
+
role_mapping=None,
|
|
839
|
+
llm_config={},
|
|
840
|
+
extra_request_params={}):
|
|
841
|
+
"""第三阶段:大模型问答生成"""
|
|
842
|
+
|
|
843
|
+
# 使用LLMComputeEngine增强处理(如果可用)
|
|
844
|
+
if LLMComputeEngine is not None and not self.args.disable_inference_enhance:
|
|
845
|
+
llm_compute_engine = LLMComputeEngine(
|
|
846
|
+
llm=target_llm,
|
|
847
|
+
inference_enhance=not self.args.disable_inference_enhance,
|
|
848
|
+
inference_deep_thought=self.args.inference_deep_thought,
|
|
849
|
+
precision=self.args.inference_compute_precision,
|
|
850
|
+
data_cells_max_num=self.args.data_cells_max_num,
|
|
851
|
+
debug=False,
|
|
852
|
+
)
|
|
853
|
+
query = conversations[-1]["content"]
|
|
854
|
+
new_conversations = llm_compute_engine.process_conversation(
|
|
855
|
+
conversations, query, [doc.source_code for doc in relevant_docs]
|
|
856
|
+
)
|
|
857
|
+
chunks = llm_compute_engine.stream_chat_oai(
|
|
858
|
+
conversations=new_conversations,
|
|
859
|
+
model=model,
|
|
860
|
+
role_mapping=role_mapping,
|
|
861
|
+
llm_config=llm_config,
|
|
862
|
+
delta_mode=True,
|
|
863
|
+
)
|
|
871
864
|
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
|
|
865
|
+
for chunk in chunks:
|
|
866
|
+
if chunk[1] is not None:
|
|
867
|
+
rag_stat.answer_stat.total_input_tokens += chunk[1].input_tokens_count
|
|
868
|
+
rag_stat.answer_stat.total_generated_tokens += chunk[1].generated_tokens_count
|
|
869
|
+
chunk[1].input_tokens_count = rag_stat.recall_stat.total_input_tokens + \
|
|
870
|
+
rag_stat.chunk_stat.total_input_tokens + \
|
|
871
|
+
rag_stat.answer_stat.total_input_tokens
|
|
872
|
+
chunk[1].generated_tokens_count = rag_stat.recall_stat.total_generated_tokens + \
|
|
873
|
+
rag_stat.chunk_stat.total_generated_tokens + \
|
|
874
|
+
rag_stat.answer_stat.total_generated_tokens
|
|
875
|
+
yield chunk
|
|
876
|
+
else:
|
|
877
|
+
# 常规QA处理路径
|
|
878
|
+
qa_strategy = get_qa_strategy(self.args)
|
|
879
|
+
new_conversations = qa_strategy.create_conversation(
|
|
880
|
+
documents=[doc.source_code for doc in relevant_docs],
|
|
881
|
+
conversations=conversations, local_image_host=self.args.local_image_host
|
|
882
|
+
)
|
|
882
883
|
|
|
883
|
-
|
|
884
|
+
# 保存对话日志
|
|
885
|
+
try:
|
|
886
|
+
logger.info(f"Saving new_conversations log to {self.args.source_dir}/.cache/logs")
|
|
887
|
+
project_root = self.args.source_dir
|
|
888
|
+
json_text = json.dumps(new_conversations, ensure_ascii=False)
|
|
889
|
+
save_formatted_log(project_root, json_text, "rag_conversation")
|
|
890
|
+
except Exception as e:
|
|
891
|
+
logger.warning(f"Failed to save new_conversations log: {e}")
|
|
884
892
|
|
|
885
|
-
|
|
893
|
+
# 流式生成回答
|
|
894
|
+
chunks = target_llm.stream_chat_oai(
|
|
895
|
+
conversations=new_conversations,
|
|
896
|
+
model=model,
|
|
897
|
+
role_mapping=role_mapping,
|
|
898
|
+
llm_config=llm_config,
|
|
899
|
+
delta_mode=True,
|
|
900
|
+
extra_request_params=extra_request_params
|
|
901
|
+
)
|
|
886
902
|
|
|
887
|
-
|
|
903
|
+
# 返回结果并更新统计信息
|
|
904
|
+
for chunk in chunks:
|
|
905
|
+
if chunk[1] is not None:
|
|
906
|
+
rag_stat.answer_stat.total_input_tokens += chunk[1].input_tokens_count
|
|
907
|
+
rag_stat.answer_stat.total_generated_tokens += chunk[1].generated_tokens_count
|
|
908
|
+
chunk[1].input_tokens_count = rag_stat.recall_stat.total_input_tokens + \
|
|
909
|
+
rag_stat.chunk_stat.total_input_tokens + \
|
|
910
|
+
rag_stat.answer_stat.total_input_tokens
|
|
911
|
+
chunk[1].generated_tokens_count = rag_stat.recall_stat.total_generated_tokens + \
|
|
912
|
+
rag_stat.chunk_stat.total_generated_tokens + \
|
|
913
|
+
rag_stat.answer_stat.total_generated_tokens
|
|
914
|
+
yield chunk
|
|
888
915
|
|
|
889
916
|
def _print_rag_stats(self, rag_stat: RAGStat) -> None:
|
|
890
917
|
"""打印RAG执行的详细统计信息"""
|