PyPI - auto-coder - Versions diffs - 0.1.277__py3-none-any.whl → 0.1.279__py3-none-any.whl - Mend

auto-coder 0.1.277py3-none-any.whl → 0.1.279py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of auto-coder might be problematic. Click here for more details.

Files changed (12) hide show

{auto_coder-0.1.277.dist-info → auto_coder-0.1.279.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: auto-coder
-Version: 0.1.277
+Version: 0.1.279
 Summary: AutoCoder: AutoCoder
 Author: allwefantasy
 Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
@@ -26,7 +26,7 @@ Requires-Dist: tabulate
 Requires-Dist: jupyter-client
 Requires-Dist: prompt-toolkit
 Requires-Dist: tokenizers
-Requires-Dist: byzerllm[saas] >=0.1.169
+Requires-Dist: byzerllm[saas] >=0.1.170
 Requires-Dist: patch
 Requires-Dist: diff-match-patch
 Requires-Dist: GitPython

{auto_coder-0.1.277.dist-info → auto_coder-0.1.279.dist-info}/RECORD RENAMED Viewed

@@ -12,7 +12,7 @@ autocoder/chat_auto_coder_lang.py,sha256=ShOQVOnMA-WlT-fB9OrOer-xQkbcWxJGl-WMPuZ
 autocoder/command_args.py,sha256=9aYJ-AmPxP1sQh6ciw04FWHjSn31f2W9afXFwo8wgx4,30441
 autocoder/lang.py,sha256=U6AjVV8Rs1uLyjFCZ8sT6WWuNUxMBqkXXIOs4S120uk,14511
 autocoder/models.py,sha256=PlG1tKHSHwB57cKLOl5gTl5yTzFUDzCgeHPJU3N9F6Q,9106
-autocoder/version.py,sha256=aQmPFbK421hxX_q_qH6lzGAzBJ-yTN3E_wgJvqVGg9k,23
+autocoder/version.py,sha256=bIKEpQ3tDi5heVrpw16rPnkr9ExeYcOXIfk6sw424Tc,23
 autocoder/agent/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 autocoder/agent/auto_demand_organizer.py,sha256=NWSAEsEk94vT3lGjfo25kKLMwYdPcpy9e-i21txPasQ,6942
 autocoder/agent/auto_filegroup.py,sha256=CW7bqp0FW1GIEMnl-blyAc2UGT7O9Mom0q66ITz1ckM,6635
@@ -26,7 +26,7 @@ autocoder/agent/planner.py,sha256=SZTSZHxHzDmuWZo3K5fs79RwvJLWurg-nbJRRNbX65o,91
 autocoder/agent/project_reader.py,sha256=tWLaPoLw1gI6kO_NzivQj28KbobU2ceOLuppHMbfGl8,18234
 autocoder/chat/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 autocoder/commands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-autocoder/commands/auto_command.py,sha256=Qv5hObiIioIOAVkucFB2na6aLUuxSIgLVO9PfnXwyIo,52496
+autocoder/commands/auto_command.py,sha256=3ZQvG_JX2oWxTv_xiXQDQwMfTAVK-Tynqo6mC9fXb60,52671
 autocoder/commands/tools.py,sha256=lanjoBGR6H8HDJSY3KrM6ibrtHZbgKX6mKJHSSE66dg,20493
 autocoder/common/JupyterClient.py,sha256=O-wi6pXeAEYhAY24kDa0BINrLYvKS6rKyWe98pDClS0,2816
 autocoder/common/ShellClient.py,sha256=fM1q8t_XMSbLBl2zkCNC2J9xuyKN3eXzGm6hHhqL2WY,2286
@@ -75,7 +75,7 @@ autocoder/common/result_manager.py,sha256=nBcFRj5reBC7vp13M91f4B8iPW8B8OehayHlUd
 autocoder/common/screenshots.py,sha256=_gA-z1HxGjPShBrtgkdideq58MG6rqFB2qMUJKjrycs,3769
 autocoder/common/search.py,sha256=245iPFgWhMldoUK3CqCP89ltaxZiNPK73evoG6Fp1h8,16518
 autocoder/common/search_replace.py,sha256=GphFkc57Hb673CAwmbiocqTbw8vrV7TrZxtOhD0332g,22147
-autocoder/common/shells.py,sha256=-5j45qb1SVmkZaORqDZ5EM2zJ16b5QGM1wHDfBfGejk,18944
+autocoder/common/shells.py,sha256=elminFpNosnV0hsEUcsugDxlGO8NfH96uah-8bkaBvA,19929
 autocoder/common/stats_panel.py,sha256=wGl9O45pjVVDxhNumLv4_NfLYSlUP_18Tw4hcJSjw50,4596
 autocoder/common/sys_prompt.py,sha256=JlexfjZt554faqbgkCmzOJqYUzDHfbnxly5ugFfHfEE,26403
 autocoder/common/text.py,sha256=KGRQq314GHBmY4MWG8ossRoQi1_DTotvhxchpn78c-k,1003
@@ -107,11 +107,11 @@ autocoder/privacy/__init__.py,sha256=LnIVvGu_K66zCE-yhN_-dPO8R80pQyedCsXJ7wRqQaI
 autocoder/privacy/model_filter.py,sha256=-N9ZvxxDKpxU7hkn-tKv-QHyXjvkCopUaKgvJwTOGQs,3369
 autocoder/pyproject/__init__.py,sha256=ms-A_pocgGv0oZPEW8JAdXi7G-VSVhkQ6CnWFe535Ec,14477
 autocoder/rag/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-autocoder/rag/api_server.py,sha256=dRbhAZVRAOlZ64Cnxf4_rKb4iJwHnrWS9Zr67IVORw0,7288
+autocoder/rag/api_server.py,sha256=xiypCkdbclY0Z3Cmq5FTvtKrfQUV7yKcDaFFUttA2n0,7242
 autocoder/rag/doc_filter.py,sha256=yEXaBw1XJH57Gtvk4-RFQtd5eawA6SBjzxeRZrIsQew,11623
 autocoder/rag/document_retriever.py,sha256=5oThtxukGuRFF96o3pHKsk306a8diXbhgSrbqyU2BvM,8894
-autocoder/rag/llm_wrapper.py,sha256=wf56ofQNOaBkLhnoxK9VoVnHWD0gsj0pP8mUBfS92RI,2737
-autocoder/rag/long_context_rag.py,sha256=qFlNmbgQnstCSCb0SxfkMEYtZRr8p6YEc6u0jpve4Q0,32002
+autocoder/rag/llm_wrapper.py,sha256=Ht5GF5yJtrztoliujsZzx_ooWZmHkd5xLZKcGEiicZw,4303
+autocoder/rag/long_context_rag.py,sha256=nZXADsbaiOQYIGiZvEgokMOSjmjuOCA6xkd3LqGnC7o,33658
 autocoder/rag/rag_config.py,sha256=8LwFcTd8OJWWwi1_WY4IzjqgtT6RyE2j4PjxS5cCTDE,802
 autocoder/rag/rag_entry.py,sha256=6TKtErZ0Us9XSV6HgRKXA6yR3SiZGPHpynOKSaR1wgE,2463
 autocoder/rag/raw_rag.py,sha256=BOr0YGf3umjqXOIDVO1LXQ0bIHx8hzBdiubND2ezyxc,2946
@@ -165,9 +165,9 @@ autocoder/utils/types.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 autocoder/utils/auto_coder_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 autocoder/utils/auto_coder_utils/chat_stream_out.py,sha256=lkJ_A-sYU36JMzjFWkk3pR6uos8oZHYt9GPsPe_CPAo,11766
 autocoder/utils/chat_auto_coder_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-auto_coder-0.1.277.dist-info/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
-auto_coder-0.1.277.dist-info/METADATA,sha256=m2MjOLFknaEjczW5V_NfTL4jj7bikJe0jbn_tuYRfdQ,2643
-auto_coder-0.1.277.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
-auto_coder-0.1.277.dist-info/entry_points.txt,sha256=0nzHtHH4pNcM7xq4EBA2toS28Qelrvcbrr59GqD_0Ak,350
-auto_coder-0.1.277.dist-info/top_level.txt,sha256=Jqc0_uJSw2GwoFQAa9iJxYns-2mWla-9ok_Y3Gcznjk,10
-auto_coder-0.1.277.dist-info/RECORD,,
+auto_coder-0.1.279.dist-info/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
+auto_coder-0.1.279.dist-info/METADATA,sha256=ibeocSoPjMW2RjhN5DQq4eARnkV5AQDD5c0quH69t4M,2643
+auto_coder-0.1.279.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
+auto_coder-0.1.279.dist-info/entry_points.txt,sha256=0nzHtHH4pNcM7xq4EBA2toS28Qelrvcbrr59GqD_0Ak,350
+auto_coder-0.1.279.dist-info/top_level.txt,sha256=Jqc0_uJSw2GwoFQAa9iJxYns-2mWla-9ok_Y3Gcznjk,10
+auto_coder-0.1.279.dist-info/RECORD,,

autocoder/commands/auto_command.py CHANGED Viewed

@@ -23,6 +23,7 @@ from autocoder.rag.token_counter import count_tokens
 from autocoder.common.global_cancel import global_cancel
 from autocoder.common.auto_configure import config_readme
 from autocoder.utils.auto_project_type import ProjectTypeAnalyzer
+from rich.text import Text
 class CommandMessage(BaseModel):
     role: str
@@ -435,8 +436,10 @@ class CommandAutoTuner:
                     "command_execution_result",
                     action=action
                 )
+                # 转义内容，避免Rich将内容中的[]解释为markup语法
+                text_content = Text(truncated_content)
                 console.print(Panel(
-                    truncated_content,
+                    text_content,
                     title=title,
                     border_style="blue",
                     padding=(1, 2)

autocoder/common/shells.py CHANGED Viewed

@@ -11,6 +11,9 @@ from rich.panel import Panel
 from rich.text import Text
 from rich.live import Live
 import getpass
+from rich.markup import escape
+import threading
+import queue
 from autocoder.common.result_manager import ResultManager
@@ -356,7 +359,7 @@ def _win_code_page_to_encoding(code_page: str) -> str:
 def execute_shell_command(command: str):
     """
-    Execute a shell command with cross-platform encoding support.
+    Execute a shell command with cross-platform encoding support and streaming output.
     Args:
         command (str): The shell command to execute
@@ -446,81 +449,95 @@ set PYTHONIOENCODING=utf-8
             encoding='utf-8',  # 直接指定 UTF-8 编码
             errors='replace',  # 处理无法解码的字符
             env=env,          # 传递修改后的环境变量
-            startupinfo=startupinfo
+            startupinfo=startupinfo,
+            bufsize=1,        # Line buffering for immediate flushing
+            universal_newlines=True  # Use text mode to handle platform line endings
         )
         # Safe decoding helper (for binary output)
-        def safe_decode(byte_stream, encoding):
-            if isinstance(byte_stream, str):
-                return byte_stream.strip()
+        def safe_decode(text, encoding):
+            if isinstance(text, str):
+                return text.strip()
             try:
-                # 首先尝试 UTF-8
-                return byte_stream.decode('utf-8').strip()
+                # Try UTF-8 first
+                return text.decode('utf-8').strip()
             except UnicodeDecodeError:
                 try:
-                    # 如果失败，尝试 GBK
-                    return byte_stream.decode('gbk').strip()
+                    # If that fails, try GBK
+                    return text.decode('gbk').strip()
                 except UnicodeDecodeError:
-                    # 最后使用替换模式
-                    return byte_stream.decode(encoding, errors='replace').strip()
+                    # Finally use replacement mode
+                    return text.decode(encoding, errors='replace').strip()
         output = []
-        with Live(console=console, refresh_per_second=4) as live:
-            while True:
-                # Read output streams
-                output_bytes = process.stdout.readline()
-                error_bytes = process.stderr.readline()
-                # Handle standard output
-                if output_bytes:
-                    output_line = safe_decode(output_bytes, encoding)
-                    output.append(output_line)
-                    live.update(
-                        Panel(
-                            Text("\n".join(output[-20:])),
-                            title="Shell Output",
-                            border_style="green",
-                        )
-                    )
-                # Handle error output
-                if error_bytes:
-                    error_line = safe_decode(error_bytes, encoding)
-                    output.append(f"ERROR: {error_line}")
-                    live.update(
-                        Panel(
-                            Text("\n".join(output[-20:])),
-                            title="Shell Output",
-                            border_style="red",
-                        )
-                    )
-                # Check if process has ended
-                if process.poll() is not None:
+        # Use direct printing for streaming output, not a Live object
+        console.print(f"[bold blue]Running command:[/bold blue] {command}")
+        console.print("[bold blue]Output streaming:[/bold blue]")
+        output_queue = queue.Queue()
+        def read_stream(stream, stream_name):
+            """Read data from stream and put in queue"""
+            for line in stream:
+                line = line.rstrip() if isinstance(line, str) else safe_decode(line, encoding)
+                prefix = "[ERROR] " if stream_name == "stderr" else ""
+                output_queue.put((stream_name, f"{prefix}{line}"))
+            output_queue.put((stream_name, None))  # Mark stream end
+        # Create threads to read stdout and stderr
+        stdout_thread = threading.Thread(target=read_stream, args=(process.stdout, "stdout"))
+        stderr_thread = threading.Thread(target=read_stream, args=(process.stderr, "stderr"))
+        stdout_thread.daemon = True
+        stderr_thread.daemon = True
+        stdout_thread.start()
+        stderr_thread.start()
+        # Track number of active streams
+        active_streams = 2
+        # Process output from queue
+        while active_streams > 0:
+            try:
+                stream_name, line = output_queue.get(timeout=0.1)
+                if line is None:
+                    active_streams -= 1
+                    continue
+                output.append(line)
+                # Print each line directly for true streaming output
+                if stream_name == "stderr":
+                    console.print(f"[red]{line}[/red]")
+                else:
+                    console.print(line)
+            except queue.Empty:
+                # Check if process is still running
+                if process.poll() is not None and active_streams == 2:
+                    # If process ended but threads are still running, may have no output
                     break
-        # Get remaining output
-        remaining_out, remaining_err = process.communicate()
-        if remaining_out:
-            output.append(safe_decode(remaining_out, encoding))
-        if remaining_err:
-            output.append(f"ERROR: {safe_decode(remaining_err, encoding)}")
+                continue
+        # Wait for threads to finish
+        stdout_thread.join()
+        stderr_thread.join()
-        result_manager.add_result(content="\n".join(output),meta={
+        # Wait for process to end and get return code
+        return_code = process.wait()
+        # Compile results
+        result_content = "\n".join(output)
+        result_manager.add_result(content=result_content, meta={
             "action": "execute_shell_command",
             "input": {
                 "command": command
-            }
+            },
+            "return_code": return_code
         })
-        # Show final output
-        console.print(
-            Panel(
-                Text("\n".join(output)),
-                title="Final Output",
-                border_style="blue",
-                subtitle=f"Encoding: {encoding} | OS: {sys.platform}"
-            )
-        )
+        # Show command completion info
+        completion_message = f"Command completed with return code: {return_code}"
+        style = "green" if return_code == 0 else "red"
+        console.print(f"[bold {style}]{escape(completion_message)}[/bold {style}]")
     except FileNotFoundError:
         result_manager.add_result(content=f"[bold red]Command not found:[/bold red] [yellow]{command}[/yellow]",meta={

autocoder/rag/api_server.py CHANGED Viewed

@@ -103,9 +103,7 @@ async def create_chat_completion(
     NOTE: Currently we do not support the following features:
         - function_call (Users should implement this by themselves)
         - logit_bias (to be supported by vLLM engine)
-    """
-    # async with async_timeout.timeout(TIMEOUT):
+    """
     generator = await openai_serving_chat.create_chat_completion(body, request)
     if isinstance(generator, ErrorResponse):
         return JSONResponse(

autocoder/rag/llm_wrapper.py CHANGED Viewed

@@ -1,61 +1,91 @@
-from typing import Any, Dict, List, Optional, Union,Callable
-from byzerllm.utils.client.types import (
+from typing import Any, Dict, List, Optional, Union, Callable
+from byzerllm.utils.client.types import (
     LLMFunctionCallResponse,
-    LLMClassResponse,LLMResponse
+    LLMClassResponse, LLMResponse
 )
 import pydantic
 from byzerllm import ByzerLLM
 from byzerllm.utils.client import LLMResponse
 from byzerllm.utils.types import SingleOutputMeta
 from autocoder.rag.simple_rag import SimpleRAG
+from autocoder.rag.long_context_rag import LongContextRAG
 from loguru import logger
 from byzerllm.utils.langutil import asyncfy_with_semaphore
 class LLWrapper:
-    def __init__(self,llm:ByzerLLM,rag:SimpleRAG):
+    def __init__(self, llm: ByzerLLM, rag: Union[SimpleRAG, LongContextRAG]):
         self.llm = llm
         self.rag = rag
     def chat_oai(self,
                  conversations,
-                 tools:List[Union[Callable,str]]=[],
-                 tool_choice:Optional[Union[Callable,str]]=None,
-                 execute_tool:bool=False,
-                 impl_func:Optional[Callable]=None,
-                 execute_impl_func:bool=False,
-                 impl_func_params:Optional[Dict[str,Any]]=None,
-                 func_params:Optional[Dict[str,Any]]=None,
-                 response_class:Optional[Union[pydantic.BaseModel,str]] = None,
-                 response_after_chat:Optional[Union[pydantic.BaseModel,str]] = False,
-                 enable_default_sys_message:bool=True,
-                 model:Optional[str] = None,
-                 role_mapping=None,llm_config:Dict[str,Any]={}
-                 )->Union[List[LLMResponse],List[LLMFunctionCallResponse],List[LLMClassResponse]]:
-        res,contexts = self.rag.stream_chat_oai(conversations,llm_config=llm_config)
-        s = "".join(res)
-        return [LLMResponse(output=s,metadata={},input="")]
-    def stream_chat_oai(self,conversations,
-                        model:Optional[str]=None,
-                        role_mapping=None,
-                        delta_mode=False,
-                        llm_config:Dict[str,Any]={}):
-        res,contexts = self.rag.stream_chat_oai(conversations,llm_config=llm_config)
-        for t in res:
-            yield (t,SingleOutputMeta(0,0))
-    async def async_stream_chat_oai(self,conversations,
-                        model:Optional[str]=None,
+                 tools: List[Union[Callable, str]] = [],
+                 tool_choice: Optional[Union[Callable, str]] = None,
+                 execute_tool: bool = False,
+                 impl_func: Optional[Callable] = None,
+                 execute_impl_func: bool = False,
+                 impl_func_params: Optional[Dict[str, Any]] = None,
+                 func_params: Optional[Dict[str, Any]] = None,
+                 response_class: Optional[Union[pydantic.BaseModel, str]] = None,
+                 response_after_chat: Optional[Union[pydantic.BaseModel, str]] = False,
+                 enable_default_sys_message: bool = True,
+                 model: Optional[str] = None,
+                 role_mapping=None,
+                 llm_config: Dict[str, Any] = {},
+                 only_return_prompt: bool = False,
+                 extra_request_params: Dict[str, Any] = {}
+                 ) -> Union[List[LLMResponse], List[LLMFunctionCallResponse], List[LLMClassResponse]]:
+        res, contexts = self.rag.stream_chat_oai(
+            conversations, llm_config=llm_config, extra_request_params=extra_request_params)
+        metadata = {"request_id":""}
+        output = ""
+        for chunk in res:
+            output += chunk[0]
+            metadata["input_tokens_count"] = chunk[1].input_tokens_count
+            metadata["generated_tokens_count"] = chunk[1].generated_tokens_count
+            metadata["reasoning_content"] = chunk[1].reasoning_content
+            metadata["finish_reason"] = chunk[1].finish_reason
+            metadata["first_token_time"] = chunk[1].first_token_time
+        return [LLMResponse(output=output, metadata=metadata, input="")]
+    def stream_chat_oai(self, conversations,
+                        model: Optional[str] = None,
                         role_mapping=None,
                         delta_mode=False,
-                        llm_config:Dict[str,Any]={}):
-        res,contexts = await asyncfy_with_semaphore(lambda: self.rag.stream_chat_oai(conversations,llm_config=llm_config))()
+                        llm_config: Dict[str, Any] = {},
+                        extra_request_params: Dict[str, Any] = {}
+                        ):
+        res, contexts = self.rag.stream_chat_oai(
+                conversations, llm_config=llm_config, extra_request_params=extra_request_params)
+        if isinstance(res, tuple):
+            for (t, metadata) in res:
+                yield (t, SingleOutputMeta(
+                    input_tokens_count=metadata.get("input_tokens_count", 0),
+                    generated_tokens_count=metadata.get(
+                        "generated_tokens_count", 0),
+                    reasoning_content=metadata.get("reasoning_content", ""),
+                    finish_reason=metadata.get("finish_reason", "stop"),
+                    first_token_time=metadata.get("first_token_time", 0)
+                ))
+        else:
+            for t in res:
+                yield (t, SingleOutputMeta(0, 0))
+    async def async_stream_chat_oai(self, conversations,
+                                    model: Optional[str] = None,
+                                    role_mapping=None,
+                                    delta_mode=False,
+                                    llm_config: Dict[str, Any] = {},
+                                    extra_request_params: Dict[str, Any] = {}
+                                    ):
+        res, contexts = await asyncfy_with_semaphore(lambda: self.rag.stream_chat_oai(conversations, llm_config=llm_config, extra_request_params=extra_request_params))()
         # res,contexts = await self.llm.async_stream_chat_oai(conversations,llm_config=llm_config)
-        for t in res:
-            yield (t,SingleOutputMeta(0,0))
+        for t in res:
+            yield t
-    def __getattr__(self, name):
-        return getattr(self.llm, name)
+    def __getattr__(self, name):
+        return getattr(self.llm, name)

autocoder/rag/long_context_rag.py CHANGED Viewed

@@ -33,6 +33,7 @@ from importlib.metadata import version
 from autocoder.rag.stream_event import event_writer
 from autocoder.rag.relevant_utils import DocFilterResult
 from pydantic import BaseModel
+from byzerllm.utils.types import SingleOutputMeta
 try:
     from autocoder_pro.rag.llm_compute import LLMComputeEngine
@@ -284,6 +285,7 @@ class LongContextRAG:
     def build(self):
         pass
     def search(self, query: str) -> List[SourceCode]:
         target_query = query
         only_contexts = False
@@ -344,6 +346,7 @@ class LongContextRAG:
         model: Optional[str] = None,
         role_mapping=None,
         llm_config: Dict[str, Any] = {},
+        extra_request_params: Dict[str, Any] = {}
     ):
         try:
             return self._stream_chat_oai(
@@ -351,11 +354,42 @@ class LongContextRAG:
                 model=model,
                 role_mapping=role_mapping,
                 llm_config=llm_config,
+                extra_request_params=extra_request_params
             )
         except Exception as e:
             logger.error(f"Error in stream_chat_oai: {str(e)}")
             traceback.print_exc()
             return ["出现错误，请稍后再试。"], []
+    def _stream_chatfrom_openai_sdk(self,response):
+        for chunk in response:
+            if hasattr(chunk, "usage") and chunk.usage:
+                input_tokens_count = chunk.usage.prompt_tokens
+                generated_tokens_count = chunk.usage.completion_tokens
+            else:
+                input_tokens_count = 0
+                generated_tokens_count = 0
+            if not chunk.choices:
+                if last_meta:
+                    yield ("", SingleOutputMeta(input_tokens_count=input_tokens_count,
+                                                generated_tokens_count=generated_tokens_count,
+                                                reasoning_content="",
+                                                finish_reason=last_meta.finish_reason))
+                continue
+            content = chunk.choices[0].delta.content or ""
+            reasoning_text = ""
+            if hasattr(chunk.choices[0].delta, "reasoning_content"):
+                reasoning_text = chunk.choices[0].delta.reasoning_content or ""
+            last_meta = SingleOutputMeta(input_tokens_count=input_tokens_count,
+                                            generated_tokens_count=generated_tokens_count,
+                                            reasoning_content=reasoning_text,
+                                            finish_reason=chunk.choices[0].finish_reason)
+            yield (content, last_meta)
     def _stream_chat_oai(
         self,
@@ -363,128 +397,136 @@ class LongContextRAG:
         model: Optional[str] = None,
         role_mapping=None,
         llm_config: Dict[str, Any] = {},
-    ):
+        extra_request_params: Dict[str, Any] = {}
+    ):
         if self.client:
             model = model or self.args.model
             response = self.client.chat.completions.create(
                 model=model,
                 messages=conversations,
                 stream=True,
-                max_tokens=self.args.rag_params_max_tokens
-            )
-            def response_generator():
-                for chunk in response:
-                    if chunk.choices[0].delta.content is not None:
-                        yield chunk.choices[0].delta.content
+                max_tokens=self.args.rag_params_max_tokens,
+                extra_body=extra_request_params
+            )
+            return self._stream_chatfrom_openai_sdk(response), []
-            return response_generator(), []
-        else:
-            target_llm = self.llm
-            if self.llm.get_sub_client("qa_model"):
-                target_llm = self.llm.get_sub_client("qa_model")
+        target_llm = self.llm
+        if self.llm.get_sub_client("qa_model"):
+            target_llm = self.llm.get_sub_client("qa_model")
-            query = conversations[-1]["content"]
-            context = []
+        query = conversations[-1]["content"]
+        context = []
-            if (
-                "使用四到五个字直接返回这句话的简要主题，不要解释、不要标点、不要语气词、不要多余文本，不要加粗，如果没有主题"
-                in query
-                or "简要总结一下对话内容，用作后续的上下文提示 prompt，控制在 200 字以内"
-                in query
-            ):
+        if (
+            "使用四到五个字直接返回这句话的简要主题，不要解释、不要标点、不要语气词、不要多余文本，不要加粗，如果没有主题"
+            in query
+            or "简要总结一下对话内容，用作后续的上下文提示 prompt，控制在 200 字以内"
+            in query
+        ):
-                chunks = target_llm.stream_chat_oai(
-                    conversations=conversations,
+            chunks = target_llm.stream_chat_oai(
+                conversations=conversations,
+                model=model,
+                role_mapping=role_mapping,
+                llm_config=llm_config,
+                delta_mode=True,
+                extra_request_params=extra_request_params
+            )
+            def generate_chunks():
+                for chunk in chunks:
+                    yield chunk
+            return generate_chunks(), context
+        try:
+            request_params = json.loads(query)
+            if "request_id" in request_params:
+                request_id = request_params["request_id"]
+                index = request_params["index"]
+                file_path = event_writer.get_event_file_path(request_id)
+                logger.info(f"Get events for request_id: {request_id} index: {index} file_path: {file_path}")
+                events = []
+                if not os.path.exists(file_path):
+                    return [],context
+                with open(file_path, "r") as f:
+                    for line in f:
+                        event = json.loads(line)
+                        if event["index"] >= index:
+                            events.append(event)
+                return [json.dumps({
+                    "events": [event for event in events],
+                },ensure_ascii=False)], context
+        except json.JSONDecodeError:
+            pass
+        if self.args.without_contexts and LLMComputeEngine is not None:
+            llm_compute_engine = LLMComputeEngine(
+                llm=target_llm,
+                inference_enhance=not self.args.disable_inference_enhance,
+                inference_deep_thought=self.args.inference_deep_thought,
+                inference_slow_without_deep_thought=self.args.inference_slow_without_deep_thought,
+                precision=self.args.inference_compute_precision,
+                data_cells_max_num=self.args.data_cells_max_num,
+            )
+            conversations = conversations[:-1]
+            new_conversations = llm_compute_engine.process_conversation(
+                conversations, query, []
+            )
+            chunks = llm_compute_engine.stream_chat_oai(
+                    conversations=new_conversations,
                     model=model,
                     role_mapping=role_mapping,
                     llm_config=llm_config,
                     delta_mode=True,
+                    extra_request_params=extra_request_params
                 )
-                return (chunk[0] for chunk in chunks), context
-            try:
-                request_params = json.loads(query)
-                if "request_id" in request_params:
-                    request_id = request_params["request_id"]
-                    index = request_params["index"]
-                    file_path = event_writer.get_event_file_path(request_id)
-                    logger.info(f"Get events for request_id: {request_id} index: {index} file_path: {file_path}")
-                    events = []
-                    if not os.path.exists(file_path):
-                        return [],context
-                    with open(file_path, "r") as f:
-                        for line in f:
-                            event = json.loads(line)
-                            if event["index"] >= index:
-                                events.append(event)
-                    return [json.dumps({
-                        "events": [event for event in events],
-                    },ensure_ascii=False)], context
-            except json.JSONDecodeError:
-                pass
-            if self.args.without_contexts and LLMComputeEngine is not None:
-                llm_compute_engine = LLMComputeEngine(
-                    llm=target_llm,
-                    inference_enhance=not self.args.disable_inference_enhance,
-                    inference_deep_thought=self.args.inference_deep_thought,
-                    inference_slow_without_deep_thought=self.args.inference_slow_without_deep_thought,
-                    precision=self.args.inference_compute_precision,
-                    data_cells_max_num=self.args.data_cells_max_num,
-                )
-                conversations = conversations[:-1]
-                new_conversations = llm_compute_engine.process_conversation(
-                    conversations, query, []
-                )
-                return (
-                    llm_compute_engine.stream_chat_oai(
-                        conversations=new_conversations,
-                        model=model,
-                        role_mapping=role_mapping,
-                        llm_config=llm_config,
-                        delta_mode=True,
-                    ),
-                    context,
-                )
+            def generate_chunks():
+                for chunk in chunks:
+                    yield chunk
+            return (
+                generate_chunks(),
+                context,
+            )
-            only_contexts = False
-            try:
-                v = json.loads(query)
-                if "only_contexts" in v:
-                    query = v["query"]
-                    only_contexts = v["only_contexts"]
-                    conversations[-1]["content"] = query
-            except json.JSONDecodeError:
-                pass
-            logger.info(f"Query: {query} only_contexts: {only_contexts}")
-            start_time = time.time()
+        only_contexts = False
+        try:
+            v = json.loads(query)
+            if "only_contexts" in v:
+                query = v["query"]
+                only_contexts = v["only_contexts"]
+                conversations[-1]["content"] = query
+        except json.JSONDecodeError:
+            pass
+        logger.info(f"Query: {query} only_contexts: {only_contexts}")
+        start_time = time.time()
-            rag_stat = RAGStat(
-                recall_stat=RecallStat(
-                    total_input_tokens=0,
-                    total_generated_tokens=0,
-                    model_name=self.recall_llm.default_model_name,
-                ),
-                chunk_stat=ChunkStat(
-                    total_input_tokens=0,
-                    total_generated_tokens=0,
-                    model_name=self.chunk_llm.default_model_name,
-                ),
-                answer_stat=AnswerStat(
-                    total_input_tokens=0,
-                    total_generated_tokens=0,
-                    model_name=self.qa_llm.default_model_name,
-                ),
-            )
+        rag_stat = RAGStat(
+            recall_stat=RecallStat(
+                total_input_tokens=0,
+                total_generated_tokens=0,
+                model_name=self.recall_llm.default_model_name,
+            ),
+            chunk_stat=ChunkStat(
+                total_input_tokens=0,
+                total_generated_tokens=0,
+                model_name=self.chunk_llm.default_model_name,
+            ),
+            answer_stat=AnswerStat(
+                total_input_tokens=0,
+                total_generated_tokens=0,
+                model_name=self.qa_llm.default_model_name,
+            ),
+        )
+        context = []
+        def generate_sream():
+            nonlocal context
             doc_filter_result = self._filter_docs(conversations)
             rag_stat.recall_stat.total_input_tokens += sum(doc_filter_result.input_tokens_counts)
@@ -659,42 +701,41 @@ class LongContextRAG:
                         llm_config=llm_config,
                         delta_mode=True,
                     )
+                for chunk in chunks:
+                    yield chunk
+                    if chunk[1] is not None:
+                        rag_stat.answer_stat.total_input_tokens += chunk[1].input_tokens_count
+                        rag_stat.answer_stat.total_generated_tokens += chunk[1].generated_tokens_count
+                self._print_rag_stats(rag_stat)
+            else:
+                new_conversations = conversations[:-1] + [
+                    {
+                        "role": "user",
+                        "content": self._answer_question.prompt(
+                            query=query,
+                            relevant_docs=[doc.source_code for doc in relevant_docs],
+                        ),
+                    }
+                ]
-                def generate_chunks():
-                    for chunk in chunks:
-                        yield chunk[0]
-                        if chunk[1] is not None:
-                            rag_stat.answer_stat.total_input_tokens += chunk[1].input_tokens_count
-                            rag_stat.answer_stat.total_generated_tokens += chunk[1].generated_tokens_count
-                    self._print_rag_stats(rag_stat)
-                return generate_chunks(), context
-            new_conversations = conversations[:-1] + [
-                {
-                    "role": "user",
-                    "content": self._answer_question.prompt(
-                        query=query,
-                        relevant_docs=[doc.source_code for doc in relevant_docs],
-                    ),
-                }
-            ]
-            chunks = target_llm.stream_chat_oai(
-                conversations=new_conversations,
-                model=model,
-                role_mapping=role_mapping,
-                llm_config=llm_config,
-                delta_mode=True,
-            )
-            def generate_chunks():
+                chunks = target_llm.stream_chat_oai(
+                    conversations=new_conversations,
+                    model=model,
+                    role_mapping=role_mapping,
+                    llm_config=llm_config,
+                    delta_mode=True,
+                    extra_request_params=extra_request_params
+                )
                 for chunk in chunks:
-                    yield chunk[0]
+                    yield chunk
                     if chunk[1] is not None:
                         rag_stat.answer_stat.total_input_tokens += chunk[1].input_tokens_count
                         rag_stat.answer_stat.total_generated_tokens += chunk[1].generated_tokens_count
                 self._print_rag_stats(rag_stat)
-            return generate_chunks(), context
+        return generate_sream(),context

autocoder/version.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.1.~~277~~"
1	+ __version__ = "0.1.279"

{auto_coder-0.1.277.dist-info → auto_coder-0.1.279.dist-info}/LICENSE RENAMED Viewed

File without changes

{auto_coder-0.1.277.dist-info → auto_coder-0.1.279.dist-info}/WHEEL RENAMED Viewed

File without changes

{auto_coder-0.1.277.dist-info → auto_coder-0.1.279.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{auto_coder-0.1.277.dist-info → auto_coder-0.1.279.dist-info}/top_level.txt RENAMED Viewed

File without changes

auto-coder 0.1.277__py3-none-any.whl → 0.1.279__py3-none-any.whl

Potentially problematic release.

auto-coder 0.1.277py3-none-any.whl → 0.1.279py3-none-any.whl