PyPI - auto-coder - Versions diffs - 0.1.279__py3-none-any.whl → 0.1.280__py3-none-any.whl - Mend

auto-coder 0.1.279py3-none-any.whl → 0.1.280py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of auto-coder might be problematic. Click here for more details.

Files changed (12) hide show

{auto_coder-0.1.279.dist-info → auto_coder-0.1.280.dist-info}/METADATA +1 -1
{auto_coder-0.1.279.dist-info → auto_coder-0.1.280.dist-info}/RECORD +12 -10
autocoder/rag/doc_filter.py +104 -29
autocoder/rag/lang.py +50 -0
autocoder/rag/long_context_rag.py +217 -102
autocoder/rag/relevant_utils.py +10 -0
autocoder/utils/stream_thinking.py +193 -0
autocoder/version.py +1 -1
{auto_coder-0.1.279.dist-info → auto_coder-0.1.280.dist-info}/LICENSE +0 -0
{auto_coder-0.1.279.dist-info → auto_coder-0.1.280.dist-info}/WHEEL +0 -0
{auto_coder-0.1.279.dist-info → auto_coder-0.1.280.dist-info}/entry_points.txt +0 -0
{auto_coder-0.1.279.dist-info → auto_coder-0.1.280.dist-info}/top_level.txt +0 -0

{auto_coder-0.1.279.dist-info → auto_coder-0.1.280.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: auto-coder
-Version: 0.1.279
+Version: 0.1.280
 Summary: AutoCoder: AutoCoder
 Author: allwefantasy
 Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence

{auto_coder-0.1.279.dist-info → auto_coder-0.1.280.dist-info}/RECORD RENAMED Viewed

@@ -12,7 +12,7 @@ autocoder/chat_auto_coder_lang.py,sha256=ShOQVOnMA-WlT-fB9OrOer-xQkbcWxJGl-WMPuZ
 autocoder/command_args.py,sha256=9aYJ-AmPxP1sQh6ciw04FWHjSn31f2W9afXFwo8wgx4,30441
 autocoder/lang.py,sha256=U6AjVV8Rs1uLyjFCZ8sT6WWuNUxMBqkXXIOs4S120uk,14511
 autocoder/models.py,sha256=PlG1tKHSHwB57cKLOl5gTl5yTzFUDzCgeHPJU3N9F6Q,9106
-autocoder/version.py,sha256=bIKEpQ3tDi5heVrpw16rPnkr9ExeYcOXIfk6sw424Tc,23
+autocoder/version.py,sha256=mNnPow60dgdANkDcEoYTXr9_lpoMQZSEy1-LRu7QFHs,23
 autocoder/agent/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 autocoder/agent/auto_demand_organizer.py,sha256=NWSAEsEk94vT3lGjfo25kKLMwYdPcpy9e-i21txPasQ,6942
 autocoder/agent/auto_filegroup.py,sha256=CW7bqp0FW1GIEMnl-blyAc2UGT7O9Mom0q66ITz1ckM,6635
@@ -108,14 +108,15 @@ autocoder/privacy/model_filter.py,sha256=-N9ZvxxDKpxU7hkn-tKv-QHyXjvkCopUaKgvJwT
 autocoder/pyproject/__init__.py,sha256=ms-A_pocgGv0oZPEW8JAdXi7G-VSVhkQ6CnWFe535Ec,14477
 autocoder/rag/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 autocoder/rag/api_server.py,sha256=xiypCkdbclY0Z3Cmq5FTvtKrfQUV7yKcDaFFUttA2n0,7242
-autocoder/rag/doc_filter.py,sha256=yEXaBw1XJH57Gtvk4-RFQtd5eawA6SBjzxeRZrIsQew,11623
+autocoder/rag/doc_filter.py,sha256=UduVO2mlrngwJICrefjDJTYfdmQ4GcRXrfWDQ7xXksk,14206
 autocoder/rag/document_retriever.py,sha256=5oThtxukGuRFF96o3pHKsk306a8diXbhgSrbqyU2BvM,8894
+autocoder/rag/lang.py,sha256=TVNx5m7OtBcdfahzI29tMj9m1yrEm32G1c1zc4ZNIPs,3130
 autocoder/rag/llm_wrapper.py,sha256=Ht5GF5yJtrztoliujsZzx_ooWZmHkd5xLZKcGEiicZw,4303
-autocoder/rag/long_context_rag.py,sha256=nZXADsbaiOQYIGiZvEgokMOSjmjuOCA6xkd3LqGnC7o,33658
+autocoder/rag/long_context_rag.py,sha256=3CAlf7GM-LgewS5j9XGKvsKSO4MM6M8TTkKxAGzqVY0,39308
 autocoder/rag/rag_config.py,sha256=8LwFcTd8OJWWwi1_WY4IzjqgtT6RyE2j4PjxS5cCTDE,802
 autocoder/rag/rag_entry.py,sha256=6TKtErZ0Us9XSV6HgRKXA6yR3SiZGPHpynOKSaR1wgE,2463
 autocoder/rag/raw_rag.py,sha256=BOr0YGf3umjqXOIDVO1LXQ0bIHx8hzBdiubND2ezyxc,2946
-autocoder/rag/relevant_utils.py,sha256=tgTKGbojCrxuZ7dKbyPh2rCw9TIhwE6ltRxJosaA97U,1267
+autocoder/rag/relevant_utils.py,sha256=tnv_g25DDWYPGT-mpfubIyZv86_g2gPXjM4FPvdeIEE,1739
 autocoder/rag/simple_directory_reader.py,sha256=LkKreCkNdEOoL4fNhc3_hDoyyWTQUte4uqextISRz4U,24485
 autocoder/rag/simple_rag.py,sha256=I902EUqOK1WM0Y2WFd7RzDJYofElvTZNLVCBtX5A9rc,14885
 autocoder/rag/token_checker.py,sha256=jc76x6KWmvVxds6W8juZfQGaoErudc2HenG3sNQfSLs,2819
@@ -159,15 +160,16 @@ autocoder/utils/queue_communicate.py,sha256=buyEzdvab1QA4i2QKbq35rG5v_9x9PWVLWWM
 autocoder/utils/request_event_queue.py,sha256=r3lo5qGsB1dIjzVQ05dnr0z_9Z3zOkBdP1vmRciKdi4,2095
 autocoder/utils/request_queue.py,sha256=nwp6PMtgTCiuwJI24p8OLNZjUiprC-TsefQrhMI-yPE,3889
 autocoder/utils/rest.py,sha256=hLBhr78y-WVnV0oQf9Rxc22EwqF78KINkScvYa1MuYA,6435
+autocoder/utils/stream_thinking.py,sha256=vbDObflBFW53eWEjMTEHf3nyL167_cqpDLh9zRx7Yk8,7015
 autocoder/utils/tests.py,sha256=BqphrwyycGAvs-5mhH8pKtMZdObwhFtJ5MC_ZAOiLq8,1340
 autocoder/utils/thread_utils.py,sha256=tv9fhFZOjI18AxVUJbpe_xjBGMpkqgDcOlz9pnDtNik,8583
 autocoder/utils/types.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 autocoder/utils/auto_coder_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 autocoder/utils/auto_coder_utils/chat_stream_out.py,sha256=lkJ_A-sYU36JMzjFWkk3pR6uos8oZHYt9GPsPe_CPAo,11766
 autocoder/utils/chat_auto_coder_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-auto_coder-0.1.279.dist-info/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
-auto_coder-0.1.279.dist-info/METADATA,sha256=ibeocSoPjMW2RjhN5DQq4eARnkV5AQDD5c0quH69t4M,2643
-auto_coder-0.1.279.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
-auto_coder-0.1.279.dist-info/entry_points.txt,sha256=0nzHtHH4pNcM7xq4EBA2toS28Qelrvcbrr59GqD_0Ak,350
-auto_coder-0.1.279.dist-info/top_level.txt,sha256=Jqc0_uJSw2GwoFQAa9iJxYns-2mWla-9ok_Y3Gcznjk,10
-auto_coder-0.1.279.dist-info/RECORD,,
+auto_coder-0.1.280.dist-info/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
+auto_coder-0.1.280.dist-info/METADATA,sha256=SDBMvUk6v6YP7RSwlAWHFGfa3LTOUj3fky1Yz0hlFB0,2643
+auto_coder-0.1.280.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
+auto_coder-0.1.280.dist-info/entry_points.txt,sha256=0nzHtHH4pNcM7xq4EBA2toS28Qelrvcbrr59GqD_0Ak,350
+auto_coder-0.1.280.dist-info/top_level.txt,sha256=Jqc0_uJSw2GwoFQAa9iJxYns-2mWla-9ok_Y3Gcznjk,10
+auto_coder-0.1.280.dist-info/RECORD,,

autocoder/rag/doc_filter.py CHANGED Viewed

@@ -1,13 +1,15 @@
 import time
-from typing import List, Dict, Optional
+from typing import List, Dict, Optional, Generator, Tuple
 from loguru import logger
 from concurrent.futures import ThreadPoolExecutor, as_completed
+from autocoder.rag.lang import get_message_with_format_and_newline
 from autocoder.rag.relevant_utils import (
     parse_relevance,
     FilterDoc,
     TaskTiming,
-    DocFilterResult
+    DocFilterResult,
+    ProgressUpdate
 )
 from autocoder.common import SourceCode, AutoCoderArgs
@@ -49,6 +51,7 @@ def _check_relevance_with_conversation(
     其中， <relevant> 是你认为文档中和问题的相关度，0-10之间的数字，数字越大表示相关度越高。
     """
 class DocFilter:
     def __init__(
         self,
@@ -73,10 +76,10 @@ class DocFilter:
     ) -> DocFilterResult:
         return self.filter_docs_with_threads(conversations, documents)
-    def filter_docs_with_threads(
+    def filter_docs_with_progress(
         self, conversations: List[Dict[str, str]], documents: List[SourceCode]
-    ) -> DocFilterResult:
+    ) -> Generator[Tuple[ProgressUpdate, Optional[DocFilterResult]], None, DocFilterResult]:
+        """使用线程过滤文档，同时产生进度更新"""
         start_time = time.time()
         logger.info(f"=== DocFilter Starting ===")
         logger.info(
@@ -93,6 +96,16 @@ class DocFilter:
         relevant_count = 0
         model_name = self.recall_llm.default_model_name or "unknown"
+        doc_filter_result = DocFilterResult(
+            docs=[],
+            raw_docs=[],
+            input_tokens_counts=[],
+            generated_tokens_counts=[],
+            durations=[],
+            model_name=model_name
+        )
+        relevant_docs = doc_filter_result.docs
         with ThreadPoolExecutor(
             max_workers=self.args.index_filter_workers or 5
         ) as executor:
@@ -141,16 +154,19 @@ class DocFilter:
             logger.info(
                 f"Submitted {submitted_tasks} document filtering tasks to thread pool")
+            # 发送初始进度更新
+            yield (ProgressUpdate(
+                phase="doc_filter",
+                completed=0,
+                total=len(documents),
+                relevant_count=0,
+                message=get_message_with_format_and_newline(
+                    "doc_filter_start",
+                    total=len(documents)
+                )
+            ), None)
             # 处理完成的任务
-            doc_filter_result = DocFilterResult(
-                docs=[],
-                raw_docs=[],
-                input_tokens_counts=[],
-                generated_tokens_counts=[],
-                durations=[],
-                model_name=model_name
-            )
-            relevant_docs = doc_filter_result.docs
             for future in as_completed(list(future_to_doc.keys())):
                 try:
                     doc, submit_time = future_to_doc[future]
@@ -194,32 +210,50 @@ class DocFilter:
                         f"\n  - Timing: Duration={task_timing.duration:.2f}s, Processing={task_timing.real_duration:.2f}s, Queue={queue_time:.2f}s"
                         f"\n  - Response: {v}"
                     )
                     if "rag" not in doc.metadata:
                         doc.metadata["rag"] = {}
                     doc.metadata["rag"]["recall"] = {
                         "input_tokens_count": input_tokens_count,
                         "generated_tokens_count": generated_tokens_count,
                         "recall_model": model_name,
-                        "duration": task_timing.real_duration
+                        "duration": task_timing.real_duration
                     }
-                    doc_filter_result.input_tokens_counts.append(input_tokens_count)
-                    doc_filter_result.generated_tokens_counts.append(generated_tokens_count)
-                    doc_filter_result.durations.append(task_timing.real_duration)
+                    doc_filter_result.input_tokens_counts.append(
+                        input_tokens_count)
+                    doc_filter_result.generated_tokens_counts.append(
+                        generated_tokens_count)
+                    doc_filter_result.durations.append(
+                        task_timing.real_duration)
                     new_filter_doc = FilterDoc(
-                            source_code=doc,
-                            relevance=relevance,
-                            task_timing=task_timing,
-                        )
+                        source_code=doc,
+                        relevance=relevance,
+                        task_timing=task_timing,
+                    )
                     doc_filter_result.raw_docs.append(new_filter_doc)
                     if is_relevant:
                         relevant_docs.append(
                             new_filter_doc
                         )
+                    # 产生进度更新
+                    yield (ProgressUpdate(
+                        phase="doc_filter",
+                        completed=completed_tasks,
+                        total=len(documents),
+                        relevant_count=relevant_count,
+                        message=get_message_with_format_and_newline(
+                            "doc_filter_progress",
+                            progress_percent=progress_percent,
+                            relevant_count=relevant_count,
+                            total=len(documents)
+                        )
+                    ), None)
                 except Exception as exc:
                     try:
                         doc, submit_time = future_to_doc[future]
@@ -236,7 +270,7 @@ class DocFilter:
                             FilterDoc(
                                 source_code=doc,
                                 relevance=None,
-                                task_timing=TaskTiming(),
+                                task_timing=TaskTiming(),
                             )
                         )
                     except Exception as e:
@@ -244,6 +278,18 @@ class DocFilter:
                             f"Document filtering error in task tracking: {exc}"
                         )
+                    # 报告错误进度
+                    yield (ProgressUpdate(
+                        phase="doc_filter",
+                        completed=completed_tasks,
+                        total=len(documents),
+                        relevant_count=relevant_count,
+                        message=get_message_with_format_and_newline(
+                            "doc_filter_error",
+                            error=str(exc)
+                        )
+                    ), None)
         # Sort relevant_docs by relevance score in descending order
         relevant_docs.sort(
             key=lambda x: x.relevance.relevant_score, reverse=True)
@@ -254,7 +300,7 @@ class DocFilter:
             doc.task_timing.real_duration for doc in relevant_docs) / len(relevant_docs) if relevant_docs else 0
         avg_queue_time = sum(doc.task_timing.real_start_time -
                              doc.task_timing.submit_time for doc in relevant_docs) / len(relevant_docs) if relevant_docs else 0
         total_input_tokens = sum(doc_filter_result.input_tokens_counts)
         total_generated_tokens = sum(doc_filter_result.generated_tokens_counts)
@@ -278,4 +324,33 @@ class DocFilter:
         else:
             logger.warning("No relevant documents found!")
-        return doc_filter_result
+        # 返回最终结果
+        yield (ProgressUpdate(
+            phase="doc_filter",
+            completed=len(documents),
+            total=len(documents),
+            relevant_count=relevant_count,
+            message=get_message_with_format_and_newline(
+                "doc_filter_complete",
+                total_time=total_time,
+                relevant_count=relevant_count
+            )
+        ), doc_filter_result)
+    def filter_docs_with_threads(
+        self, conversations: List[Dict[str, str]], documents: List[SourceCode]
+    ) -> DocFilterResult:
+        # 保持兼容性的接口
+        for _, result in self.filter_docs_with_progress(conversations, documents):
+            if result is not None:
+                return result
+        # 这是一个应急情况，不应该到达这里
+        return DocFilterResult(
+            docs=[],
+            raw_docs=[],
+            input_tokens_counts=[],
+            generated_tokens_counts=[],
+            durations=[],
+            model_name=self.recall_llm.default_model_name or "unknown"
+        )

autocoder/rag/lang.py ADDED Viewed

@@ -0,0 +1,50 @@
+import locale
+from byzerllm.utils import format_str_jinja2
+MESSAGES = {
+    "en": {
+        "rag_error_title": "RAG Error",
+        "rag_error_message": "Failed to generate response: {{error}}",
+        "rag_searching_docs": "Searching documents with {{model}}...",
+        "rag_docs_filter_result": "{{model}} processed {{docs_num}} documents, cost {{filter_time}} seconds, input tokens: {{input_tokens}}, output tokens: {{output_tokens}}",
+        "dynamic_chunking_start": "Dynamic chunking start with {{model}}",
+        "dynamic_chunking_result": "Dynamic chunking result with {{model}}, first round cost {{first_round_time}} seconds, second round cost {{sencond_round_time}} seconds, input tokens: {{input_tokens}}, output tokens: {{output_tokens}}, first round full docs: {{first_round_full_docs}}, second round extracted docs: {{second_round_extracted_docs}}",
+        "send_to_model": "Send to model {{model}} with {{tokens}} tokens",
+        "doc_filter_start": "Document filtering start, total {{total}} documents",
+        "doc_filter_progress": "Document filtering progress: {{progress_percent}}% processed {{relevant_count}}/{{total}} documents",
+        "doc_filter_error": "Document filtering error: {{error}}",
+        "doc_filter_complete": "Document filtering complete, cost {{total_time}} seconds, found {{relevant_count}} relevant documents"
+    },
+    "zh": {
+        "rag_error_title": "RAG 错误",
+        "rag_error_message": "生成响应失败: {{error}}",
+        "rag_searching_docs": "正在使用 {{model}} 搜索文档...",
+        "rag_docs_filter_result": "{{model}} 处理了 {{docs_num}} 个文档, 耗时 {{filter_time}} 秒, 输入 tokens: {{input_tokens}}, 输出 tokens: {{output_tokens}}",
+        "dynamic_chunking_start": "使用 {{model}} 进行动态分块",
+        "dynamic_chunking_result": "使用 {{model}} 进行动态分块, 第一轮耗时 {{first_round_time}} 秒, 第二轮耗时 {{sencond_round_time}} 秒, 输入 tokens: {{input_tokens}}, 输出 tokens: {{output_tokens}}, 第一轮全量文档: {{first_round_full_docs}}, 第二轮提取文档: {{second_round_extracted_docs}}",
+        "send_to_model": "发送给模型 {{model}} 的 tokens 数量预估为 {{tokens}}",
+        "doc_filter_start": "开始过滤文档，共 {{total}} 个文档",
+        "doc_filter_progress": "文档过滤进度：{{progress_percent}}%，处理了 {{relevant_count}}/{{total}} 个文档",
+        "doc_filter_error": "文档过滤错误：{{error}}",
+        "doc_filter_complete": "文档过滤完成，耗时 {{total_time}} 秒，找到 {{relevant_count}} 个相关文档"
+    }
+}
+def get_system_language():
+    try:
+        return locale.getdefaultlocale()[0][:2]
+    except:
+        return 'en'
+def get_message(key):
+    lang = get_system_language()
+    return MESSAGES.get(lang, MESSAGES['en']).get(key, MESSAGES['en'][key])
+def get_message_with_format(msg_key: str, **kwargs):
+    return format_str_jinja2(get_message(msg_key), **kwargs)
+def get_message_with_format_and_newline(msg_key: str, **kwargs):
+    return format_str_jinja2(get_message(msg_key), **kwargs) + "\n"

autocoder/rag/long_context_rag.py CHANGED Viewed

@@ -23,6 +23,8 @@ from autocoder.rag.relevant_utils import (
     FilterDoc,
     TaskTiming,
     parse_relevance,
+    ProgressUpdate,
+    DocFilterResult
 )
 from autocoder.rag.token_checker import check_token_limit
 from autocoder.rag.token_counter import RemoteTokenCounter, TokenCounter
@@ -34,14 +36,17 @@ from autocoder.rag.stream_event import event_writer
 from autocoder.rag.relevant_utils import DocFilterResult
 from pydantic import BaseModel
 from byzerllm.utils.types import SingleOutputMeta
+from autocoder.rag.lang import get_message_with_format_and_newline
-try:
+try:
     from autocoder_pro.rag.llm_compute import LLMComputeEngine
     pro_version = version("auto-coder-pro")
     autocoder_version = version("auto-coder")
-    logger.warning(f"auto-coder-pro({pro_version}) plugin is enabled in auto-coder.rag({autocoder_version})")
+    logger.warning(
+        f"auto-coder-pro({pro_version}) plugin is enabled in auto-coder.rag({autocoder_version})")
 except ImportError:
-    logger.warning("Please install auto-coder-pro to enhance llm compute ability")
+    logger.warning(
+        "Please install auto-coder-pro to enhance llm compute ability")
     LLMComputeEngine = None
@@ -49,20 +54,26 @@ class RecallStat(BaseModel):
     total_input_tokens: int
     total_generated_tokens: int
     model_name: str = "unknown"
 class ChunkStat(BaseModel):
     total_input_tokens: int
-    total_generated_tokens: int
+    total_generated_tokens: int
     model_name: str = "unknown"
 class AnswerStat(BaseModel):
     total_input_tokens: int
     total_generated_tokens: int
     model_name: str = "unknown"
 class RAGStat(BaseModel):
     recall_stat: RecallStat
     chunk_stat: ChunkStat
     answer_stat: AnswerStat
 class LongContextRAG:
     def __init__(
         self,
@@ -86,7 +97,7 @@ class LongContextRAG:
             self.chunk_llm = self.llm.get_sub_client("chunk_model")
         self.args = args
         self.path = path
         self.relevant_score = self.args.rag_doc_filter_relevance or 5
@@ -99,8 +110,10 @@ class LongContextRAG:
                 "The sum of full_text_ratio and segment_ratio must be less than or equal to 1.0"
             )
-        self.full_text_limit = int(args.rag_context_window_limit * self.full_text_ratio)
-        self.segment_limit = int(args.rag_context_window_limit * self.segment_ratio)
+        self.full_text_limit = int(
+            args.rag_context_window_limit * self.full_text_ratio)
+        self.segment_limit = int(
+            args.rag_context_window_limit * self.segment_ratio)
         self.buff_limit = int(args.rag_context_window_limit * self.buff_ratio)
         self.tokenizer = None
@@ -109,7 +122,8 @@ class LongContextRAG:
         if self.tokenizer_path:
             VariableHolder.TOKENIZER_PATH = self.tokenizer_path
-            VariableHolder.TOKENIZER_MODEL = Tokenizer.from_file(self.tokenizer_path)
+            VariableHolder.TOKENIZER_MODEL = Tokenizer.from_file(
+                self.tokenizer_path)
             self.tokenizer = TokenCounter(self.tokenizer_path)
         else:
             if llm.is_model_exist("deepseek_tokenizer"):
@@ -161,9 +175,9 @@ class LongContextRAG:
             self.required_exts,
             self.on_ray,
             self.monitor_mode,
-            ## 确保全文区至少能放下一个文件
+            # 确保全文区至少能放下一个文件
             single_file_token_limit=self.full_text_limit - 100,
-            disable_auto_window=self.args.disable_auto_window,
+            disable_auto_window=self.args.disable_auto_window,
             enable_hybrid_index=self.args.enable_hybrid_index,
             extra_params=self.args
         )
@@ -224,14 +238,14 @@ class LongContextRAG:
         {% for msg in conversations %}
         [{{ msg.role }}]:
         {{ msg.content }}
         {% endfor %}
         </conversations>
         请根据提供的文档内容、用户对话历史以及最后一个问题，提取并总结文档中与问题相关的重要信息。
         如果文档中没有相关信息，请回复"该文档中没有与问题相关的信息"。
         提取的信息尽量保持和原文中的一样，并且只输出这些信息。
-        """
+        """
     @byzerllm.prompt()
     def _answer_question(
@@ -266,26 +280,25 @@ class LongContextRAG:
         """Get the document retriever class based on configuration."""
         # Default to LocalDocumentRetriever if not specified
         return LocalDocumentRetriever
     def _load_ignore_file(self):
         serveignore_path = os.path.join(self.path, ".serveignore")
         gitignore_path = os.path.join(self.path, ".gitignore")
         if os.path.exists(serveignore_path):
-            with open(serveignore_path, "r",encoding="utf-8") as ignore_file:
+            with open(serveignore_path, "r", encoding="utf-8") as ignore_file:
                 return pathspec.PathSpec.from_lines("gitwildmatch", ignore_file)
         elif os.path.exists(gitignore_path):
-            with open(gitignore_path, "r",encoding="utf-8") as ignore_file:
+            with open(gitignore_path, "r", encoding="utf-8") as ignore_file:
                 return pathspec.PathSpec.from_lines("gitwildmatch", ignore_file)
         return None
-    def _retrieve_documents(self,options:Optional[Dict[str,Any]]=None) -> Generator[SourceCode, None, None]:
+    def _retrieve_documents(self, options: Optional[Dict[str, Any]] = None) -> Generator[SourceCode, None, None]:
         return self.document_retriever.retrieve_documents(options=options)
     def build(self):
         pass
     def search(self, query: str) -> List[SourceCode]:
         target_query = query
         only_contexts = False
@@ -300,7 +313,8 @@ class LongContextRAG:
             only_contexts = True
         logger.info("Search from RAG.....")
-        logger.info(f"Query: {target_query[0:100]}... only_contexts: {only_contexts}")
+        logger.info(
+            f"Query: {target_query[0:100]}... only_contexts: {only_contexts}")
         if self.client:
             new_query = json.dumps(
@@ -316,7 +330,8 @@ class LongContextRAG:
             if not only_contexts:
                 return [SourceCode(module_name=f"RAG:{target_query}", source_code=v)]
-            json_lines = [json.loads(line) for line in v.split("\n") if line.strip()]
+            json_lines = [json.loads(line)
+                          for line in v.split("\n") if line.strip()]
             return [SourceCode.model_validate(json_line) for json_line in json_lines]
         else:
             if only_contexts:
@@ -335,7 +350,7 @@ class LongContextRAG:
     def _filter_docs(self, conversations: List[Dict[str, str]]) -> DocFilterResult:
         query = conversations[-1]["content"]
-        documents = self._retrieve_documents(options={"query":query})
+        documents = self._retrieve_documents(options={"query": query})
         return self.doc_filter.filter_docs(
             conversations=conversations, documents=documents
         )
@@ -360,9 +375,8 @@ class LongContextRAG:
             logger.error(f"Error in stream_chat_oai: {str(e)}")
             traceback.print_exc()
             return ["出现错误，请稍后再试。"], []
-    def _stream_chatfrom_openai_sdk(self,response):
+    def _stream_chatfrom_openai_sdk(self, response):
         for chunk in response:
             if hasattr(chunk, "usage") and chunk.usage:
                 input_tokens_count = chunk.usage.prompt_tokens
@@ -386,9 +400,9 @@ class LongContextRAG:
                 reasoning_text = chunk.choices[0].delta.reasoning_content or ""
             last_meta = SingleOutputMeta(input_tokens_count=input_tokens_count,
-                                            generated_tokens_count=generated_tokens_count,
-                                            reasoning_content=reasoning_text,
-                                            finish_reason=chunk.choices[0].finish_reason)
+                                         generated_tokens_count=generated_tokens_count,
+                                         reasoning_content=reasoning_text,
+                                         finish_reason=chunk.choices[0].finish_reason)
             yield (content, last_meta)
     def _stream_chat_oai(
@@ -398,7 +412,7 @@ class LongContextRAG:
         role_mapping=None,
         llm_config: Dict[str, Any] = {},
         extra_request_params: Dict[str, Any] = {}
-    ):
+    ):
         if self.client:
             model = model or self.args.model
             response = self.client.chat.completions.create(
@@ -407,8 +421,8 @@ class LongContextRAG:
                 stream=True,
                 max_tokens=self.args.rag_params_max_tokens,
                 extra_body=extra_request_params
-            )
-            return self._stream_chatfrom_openai_sdk(response), []
+            )
+            return self._stream_chatfrom_openai_sdk(response), []
         target_llm = self.llm
         if self.llm.get_sub_client("qa_model"):
@@ -422,7 +436,7 @@ class LongContextRAG:
             in query
             or "简要总结一下对话内容，用作后续的上下文提示 prompt，控制在 200 字以内"
             in query
-        ):
+        ):
             chunks = target_llm.stream_chat_oai(
                 conversations=conversations,
@@ -432,22 +446,24 @@ class LongContextRAG:
                 delta_mode=True,
                 extra_request_params=extra_request_params
             )
             def generate_chunks():
                 for chunk in chunks:
                     yield chunk
             return generate_chunks(), context
-        try:
+        try:
             request_params = json.loads(query)
-            if "request_id" in request_params:
+            if "request_id" in request_params:
                 request_id = request_params["request_id"]
                 index = request_params["index"]
-                file_path = event_writer.get_event_file_path(request_id)
-                logger.info(f"Get events for request_id: {request_id} index: {index} file_path: {file_path}")
+                file_path = event_writer.get_event_file_path(request_id)
+                logger.info(
+                    f"Get events for request_id: {request_id} index: {index} file_path: {file_path}")
                 events = []
                 if not os.path.exists(file_path):
-                    return [],context
+                    return [], context
                 with open(file_path, "r") as f:
                     for line in f:
@@ -455,8 +471,8 @@ class LongContextRAG:
                         if event["index"] >= index:
                             events.append(event)
                 return [json.dumps({
-                    "events": [event for event in events],
-                },ensure_ascii=False)], context
+                    "events": [event for event in events],
+                }, ensure_ascii=False)], context
         except json.JSONDecodeError:
             pass
@@ -465,7 +481,7 @@ class LongContextRAG:
                 llm=target_llm,
                 inference_enhance=not self.args.disable_inference_enhance,
                 inference_deep_thought=self.args.inference_deep_thought,
-                inference_slow_without_deep_thought=self.args.inference_slow_without_deep_thought,
+                inference_slow_without_deep_thought=self.args.inference_slow_without_deep_thought,
                 precision=self.args.inference_compute_precision,
                 data_cells_max_num=self.args.data_cells_max_num,
             )
@@ -474,14 +490,14 @@ class LongContextRAG:
                 conversations, query, []
             )
             chunks = llm_compute_engine.stream_chat_oai(
-                    conversations=new_conversations,
-                    model=model,
-                    role_mapping=role_mapping,
-                    llm_config=llm_config,
-                    delta_mode=True,
-                    extra_request_params=extra_request_params
-                )
+                conversations=new_conversations,
+                model=model,
+                role_mapping=role_mapping,
+                llm_config=llm_config,
+                delta_mode=True,
+                extra_request_params=extra_request_params
+            )
             def generate_chunks():
                 for chunk in chunks:
                     yield chunk
@@ -491,7 +507,6 @@ class LongContextRAG:
                 context,
             )
         only_contexts = False
         try:
             v = json.loads(query)
@@ -504,7 +519,6 @@ class LongContextRAG:
         logger.info(f"Query: {query} only_contexts: {only_contexts}")
         start_time = time.time()
         rag_stat = RAGStat(
             recall_stat=RecallStat(
@@ -525,17 +539,62 @@ class LongContextRAG:
         )
         context = []
         def generate_sream():
             nonlocal context
-            doc_filter_result = self._filter_docs(conversations)
-            rag_stat.recall_stat.total_input_tokens += sum(doc_filter_result.input_tokens_counts)
-            rag_stat.recall_stat.total_generated_tokens += sum(doc_filter_result.generated_tokens_counts)
+            yield ("", SingleOutputMeta(input_tokens_count=0,
+                                        generated_tokens_count=0,
+                                        reasoning_content=get_message_with_format_and_newline(
+                                            "rag_searching_docs",
+                                            model=rag_stat.recall_stat.model_name
+                                        )
+                                        ))
+            doc_filter_result = DocFilterResult(
+                docs=[],
+                raw_docs=[],
+                input_tokens_counts=[],
+                generated_tokens_counts=[],
+                durations=[],
+                model_name=rag_stat.recall_stat.model_name
+            )
+            query = conversations[-1]["content"]
+            documents = self._retrieve_documents(options={"query": query})
+            # 使用带进度报告的过滤方法
+            for progress_update, result in self.doc_filter.filter_docs_with_progress(conversations, documents):
+                if result is not None:
+                    doc_filter_result = result
+                else:
+                    # 生成进度更新
+                    yield ("", SingleOutputMeta(
+                        input_tokens_count=rag_stat.recall_stat.total_input_tokens,
+                        generated_tokens_count=rag_stat.recall_stat.total_generated_tokens,
+                        reasoning_content=f"{progress_update.message} ({progress_update.completed}/{progress_update.total})"
+                    ))
+            rag_stat.recall_stat.total_input_tokens += sum(
+                doc_filter_result.input_tokens_counts)
+            rag_stat.recall_stat.total_generated_tokens += sum(
+                doc_filter_result.generated_tokens_counts)
             rag_stat.recall_stat.model_name = doc_filter_result.model_name
             relevant_docs: List[FilterDoc] = doc_filter_result.docs
             filter_time = time.time() - start_time
+            yield ("", SingleOutputMeta(input_tokens_count=rag_stat.recall_stat.total_input_tokens,
+                                        generated_tokens_count=rag_stat.recall_stat.total_generated_tokens,
+                                        reasoning_content=get_message_with_format_and_newline(
+                                            "rag_docs_filter_result",
+                                            filter_time=filter_time,
+                                            docs_num=len(relevant_docs),
+                                            input_tokens=rag_stat.recall_stat.total_input_tokens,
+                                            output_tokens=rag_stat.recall_stat.total_generated_tokens,
+                                            model=rag_stat.recall_stat.model_name
+                                        )
+                                        ))
             # Filter relevant_docs to only include those with is_relevant=True
             highly_relevant_docs = [
                 doc for doc in relevant_docs if doc.relevance.is_relevant
@@ -543,7 +602,8 @@ class LongContextRAG:
             if highly_relevant_docs:
                 relevant_docs = highly_relevant_docs
-                logger.info(f"Found {len(relevant_docs)} highly relevant documents")
+                logger.info(
+                    f"Found {len(relevant_docs)} highly relevant documents")
             logger.info(
                 f"Filter time: {filter_time:.2f} seconds with {len(relevant_docs)} docs"
@@ -553,7 +613,7 @@ class LongContextRAG:
                 final_docs = []
                 for doc in relevant_docs:
                     final_docs.append(doc.model_dump())
-                return [json.dumps(final_docs,ensure_ascii=False)], []
+                return [json.dumps(final_docs, ensure_ascii=False)], []
             if not relevant_docs:
                 return ["没有找到相关的文档来回答这个问题。"], []
@@ -588,6 +648,12 @@ class LongContextRAG:
                     + "".join([f"\n  * {info}" for info in relevant_docs_info])
                 )
+            yield ("", SingleOutputMeta(generated_tokens_count=0,
+                                        reasoning_content=get_message_with_format_and_newline(
+                                            "dynamic_chunking_start",
+                                            model=rag_stat.chunk_stat.model_name
+                                        )
+                                        ))
             first_round_full_docs = []
             second_round_extracted_docs = []
             sencond_round_time = 0
@@ -602,17 +668,19 @@ class LongContextRAG:
                     llm=self.llm,
                     disable_segment_reorder=self.args.disable_segment_reorder,
                 )
                 token_limiter_result = token_limiter.limit_tokens(
                     relevant_docs=relevant_docs,
                     conversations=conversations,
                     index_filter_workers=self.args.index_filter_workers or 5,
                 )
-                rag_stat.chunk_stat.total_input_tokens += sum(token_limiter_result.input_tokens_counts)
-                rag_stat.chunk_stat.total_generated_tokens += sum(token_limiter_result.generated_tokens_counts)
+                rag_stat.chunk_stat.total_input_tokens += sum(
+                    token_limiter_result.input_tokens_counts)
+                rag_stat.chunk_stat.total_generated_tokens += sum(
+                    token_limiter_result.generated_tokens_counts)
                 rag_stat.chunk_stat.model_name = token_limiter_result.model_name
                 final_relevant_docs = token_limiter_result.docs
                 first_round_full_docs = token_limiter.first_round_full_docs
                 second_round_extracted_docs = token_limiter.second_round_extracted_docs
@@ -623,24 +691,41 @@ class LongContextRAG:
                 relevant_docs = relevant_docs[: self.args.index_filter_file_num]
             logger.info(f"Finally send to model: {len(relevant_docs)}")
             # 记录分段处理的统计信息
             logger.info(
                 f"=== Token Management ===\n"
                 f"  * Only contexts: {only_contexts}\n"
-                f"  * Filter time: {filter_time:.2f} seconds\n"
+                f"  * Filter time: {filter_time:.2f} seconds\n"
                 f"  * Final relevant docs: {len(relevant_docs)}\n"
                 f"  * First round full docs: {len(first_round_full_docs)}\n"
                 f"  * Second round extracted docs: {len(second_round_extracted_docs)}\n"
                 f"  * Second round time: {sencond_round_time:.2f} seconds"
             )
+            yield ("", SingleOutputMeta(generated_tokens_count=rag_stat.chunk_stat.total_generated_tokens + rag_stat.recall_stat.total_generated_tokens,
+                                        input_tokens_count=rag_stat.chunk_stat.total_input_tokens +
+                                        rag_stat.recall_stat.total_input_tokens,
+                                        reasoning_content=get_message_with_format_and_newline(
+                                            "dynamic_chunking_result",
+                                            model=rag_stat.chunk_stat.model_name,
+                                            docs_num=len(relevant_docs),
+                                            filter_time=filter_time,
+                                            sencond_round_time=sencond_round_time,
+                                            first_round_full_docs=len(
+                                                first_round_full_docs),
+                                            second_round_extracted_docs=len(
+                                                second_round_extracted_docs),
+                                            input_tokens=rag_stat.chunk_stat.total_input_tokens,
+                                            output_tokens=rag_stat.chunk_stat.total_generated_tokens
+                                        )
+                                        ))
             # 记录最终选择的文档详情
             final_relevant_docs_info = []
             for i, doc in enumerate(relevant_docs):
                 doc_path = doc.module_name.replace(self.path, '', 1)
                 info = f"{i+1}. {doc_path}"
                 metadata_info = []
                 if "original_docs" in doc.metadata:
                     original_docs = ", ".join(
@@ -650,26 +735,27 @@ class LongContextRAG:
                         ]
                     )
                     metadata_info.append(f"Original docs: {original_docs}")
                 if "chunk_ranges" in doc.metadata:
                     chunk_ranges = json.dumps(
                         doc.metadata["chunk_ranges"], ensure_ascii=False
                     )
                     metadata_info.append(f"Chunk ranges: {chunk_ranges}")
                 if "processing_time" in doc.metadata:
-                    metadata_info.append(f"Processing time: {doc.metadata['processing_time']:.2f}s")
+                    metadata_info.append(
+                        f"Processing time: {doc.metadata['processing_time']:.2f}s")
                 if metadata_info:
                     info += f" ({'; '.join(metadata_info)})"
                 final_relevant_docs_info.append(info)
             if final_relevant_docs_info:
                 logger.info(
                     f"Final documents to be sent to model:"
                     + "".join([f"\n  * {info}" for info in final_relevant_docs_info])
-            )
+                )
             # 记录令牌统计
             request_tokens = sum([doc.tokens for doc in relevant_docs])
@@ -680,7 +766,18 @@ class LongContextRAG:
                 f"  * Total tokens: {request_tokens}"
             )
-            logger.info(f"Start to send to model {target_model} with {request_tokens} tokens")
+            logger.info(
+                f"Start to send to model {target_model} with {request_tokens} tokens")
+            yield ("", SingleOutputMeta(input_tokens_count=rag_stat.recall_stat.total_input_tokens + rag_stat.chunk_stat.total_input_tokens,
+                                        generated_tokens_count=rag_stat.recall_stat.total_generated_tokens +
+                                        rag_stat.chunk_stat.total_generated_tokens,
+                                        reasoning_content=get_message_with_format_and_newline(
+                                            "send_to_model",
+                                            model=target_model,
+                                            tokens=request_tokens
+                                        )
+                                        ))
             if LLMComputeEngine is not None and not self.args.disable_inference_enhance:
                 llm_compute_engine = LLMComputeEngine(
@@ -692,33 +789,42 @@ class LongContextRAG:
                     debug=False,
                 )
                 new_conversations = llm_compute_engine.process_conversation(
-                    conversations, query, [doc.source_code for doc in relevant_docs]
+                    conversations, query, [
+                        doc.source_code for doc in relevant_docs]
                 )
                 chunks = llm_compute_engine.stream_chat_oai(
-                        conversations=new_conversations,
-                        model=model,
-                        role_mapping=role_mapping,
-                        llm_config=llm_config,
-                        delta_mode=True,
-                    )
+                    conversations=new_conversations,
+                    model=model,
+                    role_mapping=role_mapping,
+                    llm_config=llm_config,
+                    delta_mode=True,
+                )
                 for chunk in chunks:
-                    yield chunk
                     if chunk[1] is not None:
                         rag_stat.answer_stat.total_input_tokens += chunk[1].input_tokens_count
-                        rag_stat.answer_stat.total_generated_tokens += chunk[1].generated_tokens_count
-                self._print_rag_stats(rag_stat)
-            else:
+                        rag_stat.answer_stat.total_generated_tokens += chunk[1].generated_tokens_count
+                        chunk[1].input_tokens_count = rag_stat.recall_stat.total_input_tokens + \
+                            rag_stat.chunk_stat.total_input_tokens + \
+                            rag_stat.answer_stat.total_input_tokens
+                        chunk[1].generated_tokens_count = rag_stat.recall_stat.total_generated_tokens + \
+                            rag_stat.chunk_stat.total_generated_tokens + \
+                            rag_stat.answer_stat.total_generated_tokens
+                    yield chunk
+                self._print_rag_stats(rag_stat)
+            else:
                 new_conversations = conversations[:-1] + [
                     {
                         "role": "user",
                         "content": self._answer_question.prompt(
                             query=query,
-                            relevant_docs=[doc.source_code for doc in relevant_docs],
+                            relevant_docs=[
+                                doc.source_code for doc in relevant_docs],
                         ),
                     }
                 ]
                 chunks = target_llm.stream_chat_oai(
                     conversations=new_conversations,
                     model=model,
@@ -727,17 +833,22 @@ class LongContextRAG:
                     delta_mode=True,
                     extra_request_params=extra_request_params
                 )
                 for chunk in chunks:
-                    yield chunk
                     if chunk[1] is not None:
                         rag_stat.answer_stat.total_input_tokens += chunk[1].input_tokens_count
-                        rag_stat.answer_stat.total_generated_tokens += chunk[1].generated_tokens_count
-                self._print_rag_stats(rag_stat)
+                        rag_stat.answer_stat.total_generated_tokens += chunk[1].generated_tokens_count
+                        chunk[1].input_tokens_count = rag_stat.recall_stat.total_input_tokens + \
+                            rag_stat.chunk_stat.total_input_tokens + \
+                            rag_stat.answer_stat.total_input_tokens
+                        chunk[1].generated_tokens_count = rag_stat.recall_stat.total_generated_tokens + \
+                            rag_stat.chunk_stat.total_generated_tokens + \
+                            rag_stat.answer_stat.total_generated_tokens
+                    yield chunk
-        return generate_sream(),context
+                self._print_rag_stats(rag_stat)
+        return generate_sream(), context
     def _print_rag_stats(self, rag_stat: RAGStat) -> None:
         """打印RAG执行的详细统计信息"""
@@ -748,19 +859,22 @@ class LongContextRAG:
         )
         total_generated_tokens = (
             rag_stat.recall_stat.total_generated_tokens +
-            rag_stat.chunk_stat.total_generated_tokens +
+            rag_stat.chunk_stat.total_generated_tokens +
             rag_stat.answer_stat.total_generated_tokens
         )
         total_tokens = total_input_tokens + total_generated_tokens
         # 避免除以零错误
         if total_tokens == 0:
             recall_percent = chunk_percent = answer_percent = 0
         else:
-            recall_percent = (rag_stat.recall_stat.total_input_tokens + rag_stat.recall_stat.total_generated_tokens) / total_tokens * 100
-            chunk_percent = (rag_stat.chunk_stat.total_input_tokens + rag_stat.chunk_stat.total_generated_tokens) / total_tokens * 100
-            answer_percent = (rag_stat.answer_stat.total_input_tokens + rag_stat.answer_stat.total_generated_tokens) / total_tokens * 100
+            recall_percent = (rag_stat.recall_stat.total_input_tokens +
+                              rag_stat.recall_stat.total_generated_tokens) / total_tokens * 100
+            chunk_percent = (rag_stat.chunk_stat.total_input_tokens +
+                             rag_stat.chunk_stat.total_generated_tokens) / total_tokens * 100
+            answer_percent = (rag_stat.answer_stat.total_input_tokens +
+                              rag_stat.answer_stat.total_generated_tokens) / total_tokens * 100
         logger.info(
             f"=== RAG 执行统计信息 ===\n"
             f"总令牌使用: {total_tokens} 令牌\n"
@@ -791,21 +905,22 @@ class LongContextRAG:
             f"  - 文档分块: {chunk_percent:.1f}%\n"
             f"  - 答案生成: {answer_percent:.1f}%\n"
         )
         # 记录原始统计数据，以便调试
         logger.debug(f"RAG Stat 原始数据: {rag_stat}")
         # 返回成本估算
-        estimated_cost = self._estimate_token_cost(total_input_tokens, total_generated_tokens)
+        estimated_cost = self._estimate_token_cost(
+            total_input_tokens, total_generated_tokens)
         if estimated_cost > 0:
             logger.info(f"估计成本: 约 ${estimated_cost:.4f} 人民币")
     def _estimate_token_cost(self, input_tokens: int, output_tokens: int) -> float:
-        """估算当前请求的令牌成本（人民币）"""
+        """估算当前请求的令牌成本（人民币）"""
         # 实际应用中，可以根据不同模型设置不同价格
         input_cost_per_1m = 2.0/1000000   # 每百万输入令牌的成本
         output_cost_per_1m = 8.0/100000   # 每百万输出令牌的成本
-        cost = (input_tokens * input_cost_per_1m / 1000000) + (output_tokens* output_cost_per_1m/1000000)
+        cost = (input_tokens * input_cost_per_1m / 1000000) + \
+            (output_tokens * output_cost_per_1m/1000000)
         return cost

autocoder/rag/relevant_utils.py CHANGED Viewed

@@ -34,6 +34,16 @@ class DocFilterResult(BaseModel):
     model_name: str = "unknown"
+class ProgressUpdate:
+    """表示处理过程中的进度更新"""
+    def __init__(self, phase: str, completed: int, total: int, relevant_count: int, message: str):
+        self.phase = phase  # 当前处理阶段：doc_filter, token_check 等
+        self.completed = completed  # 已完成的任务数
+        self.total = total  # 总任务数
+        self.relevant_count = relevant_count  # 找到的相关文档数
+        self.message = message  # 进度消息
 def parse_relevance(text: Optional[str]) -> Optional[DocRelevance]:
     if text is None:
         return None

autocoder/utils/stream_thinking.py ADDED Viewed

@@ -0,0 +1,193 @@
+import inspect
+def stream_with_thinking(response):
+    """
+    Process an OpenAI streaming response that may contain regular content and reasoning_content.
+    Returns a generator that yields the formatted output.
+    Args:
+        response: An OpenAI streaming response (generator)
+    Yields:
+        str: Formatted output with thinking sections marked
+    """
+    start_mark = "<thinking>\n"
+    end_mark = "\n</thinking>\n"
+    is_thinking = False  # 跟踪我们是否在输出思考内容
+    for chunk in response:
+        # 如果有常规内容
+        if hasattr(chunk.choices[0].delta, 'content') and chunk.choices[0].delta.content:
+            # 如果我们之前在输出思考内容，需要先结束思考部分
+            if is_thinking:
+                yield end_mark
+                is_thinking = False
+            yield chunk.choices[0].delta.content
+        # 如果有思考内容
+        elif hasattr(chunk.choices[0].delta, 'reasoning_content') and chunk.choices[0].delta.reasoning_content:
+            # 如果这是第一次输出思考内容，打印开始标记
+            if not is_thinking:
+                yield start_mark
+                is_thinking = True
+            yield chunk.choices[0].delta.reasoning_content
+    # 确保思考内容结束后有结束标记
+    if is_thinking:
+        yield end_mark
+async def stream_with_thinking_async(response):
+    """
+    Process an OpenAI async streaming response that may contain regular content and reasoning_content.
+    Returns an async generator that yields the formatted output.
+    Args:
+        response: An OpenAI async streaming response
+    Yields:
+        str: Formatted output with thinking sections marked
+    """
+    start_mark = "<thinking>\n"
+    end_mark = "\n</thinking>\n"
+    is_thinking = False  # 跟踪我们是否在输出思考内容
+    async for chunk in response:
+        # 如果有常规内容
+        if hasattr(chunk.choices[0].delta, 'content') and chunk.choices[0].delta.content:
+            # 如果我们之前在输出思考内容，需要先结束思考部分
+            if is_thinking:
+                yield end_mark
+                is_thinking = False
+            yield chunk.choices[0].delta.content
+        # 如果有思考内容
+        elif hasattr(chunk.choices[0].delta, 'reasoning_content') and chunk.choices[0].delta.reasoning_content:
+            # 如果这是第一次输出思考内容，打印开始标记
+            if not is_thinking:
+                yield start_mark
+                is_thinking = True
+            yield chunk.choices[0].delta.reasoning_content
+    # 确保思考内容结束后有结束标记
+    if is_thinking:
+        yield end_mark
+def process_streaming_response(response):
+    """
+    Process an OpenAI streaming response, detecting whether it's a regular or async generator.
+    If using the async version, you must use this with await in an async context.
+    Args:
+        response: An OpenAI streaming response
+    Returns:
+        A generator or async generator that yields formatted output
+    """
+    if inspect.isasyncgen(response):
+        return stream_with_thinking_async(response)
+    else:
+        return stream_with_thinking(response)
+def print_streaming_response(response):
+    """
+    Print a streaming response with thinking sections clearly marked.
+    Args:
+        response: An OpenAI streaming response
+    """
+    for text in stream_with_thinking(response):
+        print(text, end="", flush=True)
+async def print_streaming_response_async(response):
+    """
+    Print an async streaming response with thinking sections clearly marked.
+    Args:
+        response: An OpenAI async streaming response
+    """
+    async for text in stream_with_thinking_async(response):
+        print(text, end="", flush=True)
+def separate_stream_thinking(response):
+    """
+    Process an OpenAI streaming response and return two separate generators:
+    one for thinking content and one for normal content.
+    Args:
+        response: An OpenAI streaming response (generator)
+    Returns:
+        tuple: (thinking_generator, content_generator)
+    """
+    pending_content_chunk = None
+    def thinking_generator():
+        nonlocal pending_content_chunk
+        for chunk in response:
+            # If we have thinking content
+            if hasattr(chunk.choices[0].delta, 'reasoning_content') and chunk.choices[0].delta.reasoning_content:
+                yield chunk.choices[0].delta.reasoning_content
+            # If we have regular content, store it but don't consume more than one chunk
+            elif hasattr(chunk.choices[0].delta, 'content') and chunk.choices[0].delta.content:
+                pending_content_chunk = chunk
+                break
+    def content_generator():
+        nonlocal pending_content_chunk
+        # First yield any pending content chunk from the thinking generator
+        if pending_content_chunk is not None:
+            yield pending_content_chunk.choices[0].delta.content
+            pending_content_chunk = None
+        # Continue with the rest of the response
+        for chunk in response:
+            if chunk.choices[0].delta.content:
+                yield chunk.choices[0].delta.content
+    return thinking_generator(), content_generator()
+async def separate_stream_thinking_async(response):
+    """
+    Process an OpenAI async streaming response and return two separate async generators:
+    one for thinking content and one for normal content.
+    Args:
+        response: An OpenAI async streaming response
+    Returns:
+        tuple: (thinking_generator, content_generator)
+    """
+    pending_content_chunk = None
+    async def thinking_generator():
+        nonlocal pending_content_chunk
+        async for chunk in response:
+            # If we have thinking content
+            if hasattr(chunk.choices[0].delta, 'reasoning_content') and chunk.choices[0].delta.reasoning_content:
+                yield chunk.choices[0].delta.reasoning_content
+            # If we have regular content, store it but don't consume more than one chunk
+            elif hasattr(chunk.choices[0].delta, 'content') and chunk.choices[0].delta.content:
+                pending_content_chunk = chunk
+                break
+    async def content_generator():
+        nonlocal pending_content_chunk
+        # First yield any pending content chunk from the thinking generator
+        if pending_content_chunk is not None:
+            yield pending_content_chunk.choices[0].delta.content
+            pending_content_chunk = None
+        # Continue with the rest of the response
+        async for chunk in response:
+            if chunk.choices[0].delta.content:
+                yield chunk.choices[0].delta.content
+    return thinking_generator(), content_generator()

autocoder/version.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.1.~~279~~"
1	+ __version__ = "0.1.280"

{auto_coder-0.1.279.dist-info → auto_coder-0.1.280.dist-info}/LICENSE RENAMED Viewed

File without changes

{auto_coder-0.1.279.dist-info → auto_coder-0.1.280.dist-info}/WHEEL RENAMED Viewed

File without changes

{auto_coder-0.1.279.dist-info → auto_coder-0.1.280.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{auto_coder-0.1.279.dist-info → auto_coder-0.1.280.dist-info}/top_level.txt RENAMED Viewed

File without changes

auto-coder 0.1.279__py3-none-any.whl → 0.1.280__py3-none-any.whl

Potentially problematic release.

auto-coder 0.1.279py3-none-any.whl → 0.1.280py3-none-any.whl