PyPI - auto-coder - Versions diffs - 0.1.278__py3-none-any.whl → 0.1.279__py3-none-any.whl - Mend

auto-coder 0.1.278py3-none-any.whl → 0.1.279py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of auto-coder might be problematic. Click here for more details.

Files changed (10) hide show

{auto_coder-0.1.278.dist-info → auto_coder-0.1.279.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: auto-coder
-Version: 0.1.278
+Version: 0.1.279
 Summary: AutoCoder: AutoCoder
 Author: allwefantasy
 Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
@@ -26,7 +26,7 @@ Requires-Dist: tabulate
 Requires-Dist: jupyter-client
 Requires-Dist: prompt-toolkit
 Requires-Dist: tokenizers
-Requires-Dist: byzerllm[saas] >=0.1.169
+Requires-Dist: byzerllm[saas] >=0.1.170
 Requires-Dist: patch
 Requires-Dist: diff-match-patch
 Requires-Dist: GitPython

{auto_coder-0.1.278.dist-info → auto_coder-0.1.279.dist-info}/RECORD RENAMED Viewed

@@ -12,7 +12,7 @@ autocoder/chat_auto_coder_lang.py,sha256=ShOQVOnMA-WlT-fB9OrOer-xQkbcWxJGl-WMPuZ
 autocoder/command_args.py,sha256=9aYJ-AmPxP1sQh6ciw04FWHjSn31f2W9afXFwo8wgx4,30441
 autocoder/lang.py,sha256=U6AjVV8Rs1uLyjFCZ8sT6WWuNUxMBqkXXIOs4S120uk,14511
 autocoder/models.py,sha256=PlG1tKHSHwB57cKLOl5gTl5yTzFUDzCgeHPJU3N9F6Q,9106
-autocoder/version.py,sha256=Q3OPt1PiXvKx5xTUCPxUFZP2zsuajLbxdpiOEm_K2L4,23
+autocoder/version.py,sha256=bIKEpQ3tDi5heVrpw16rPnkr9ExeYcOXIfk6sw424Tc,23
 autocoder/agent/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 autocoder/agent/auto_demand_organizer.py,sha256=NWSAEsEk94vT3lGjfo25kKLMwYdPcpy9e-i21txPasQ,6942
 autocoder/agent/auto_filegroup.py,sha256=CW7bqp0FW1GIEMnl-blyAc2UGT7O9Mom0q66ITz1ckM,6635
@@ -107,11 +107,11 @@ autocoder/privacy/__init__.py,sha256=LnIVvGu_K66zCE-yhN_-dPO8R80pQyedCsXJ7wRqQaI
 autocoder/privacy/model_filter.py,sha256=-N9ZvxxDKpxU7hkn-tKv-QHyXjvkCopUaKgvJwTOGQs,3369
 autocoder/pyproject/__init__.py,sha256=ms-A_pocgGv0oZPEW8JAdXi7G-VSVhkQ6CnWFe535Ec,14477
 autocoder/rag/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-autocoder/rag/api_server.py,sha256=dRbhAZVRAOlZ64Cnxf4_rKb4iJwHnrWS9Zr67IVORw0,7288
+autocoder/rag/api_server.py,sha256=xiypCkdbclY0Z3Cmq5FTvtKrfQUV7yKcDaFFUttA2n0,7242
 autocoder/rag/doc_filter.py,sha256=yEXaBw1XJH57Gtvk4-RFQtd5eawA6SBjzxeRZrIsQew,11623
 autocoder/rag/document_retriever.py,sha256=5oThtxukGuRFF96o3pHKsk306a8diXbhgSrbqyU2BvM,8894
-autocoder/rag/llm_wrapper.py,sha256=wf56ofQNOaBkLhnoxK9VoVnHWD0gsj0pP8mUBfS92RI,2737
-autocoder/rag/long_context_rag.py,sha256=qFlNmbgQnstCSCb0SxfkMEYtZRr8p6YEc6u0jpve4Q0,32002
+autocoder/rag/llm_wrapper.py,sha256=Ht5GF5yJtrztoliujsZzx_ooWZmHkd5xLZKcGEiicZw,4303
+autocoder/rag/long_context_rag.py,sha256=nZXADsbaiOQYIGiZvEgokMOSjmjuOCA6xkd3LqGnC7o,33658
 autocoder/rag/rag_config.py,sha256=8LwFcTd8OJWWwi1_WY4IzjqgtT6RyE2j4PjxS5cCTDE,802
 autocoder/rag/rag_entry.py,sha256=6TKtErZ0Us9XSV6HgRKXA6yR3SiZGPHpynOKSaR1wgE,2463
 autocoder/rag/raw_rag.py,sha256=BOr0YGf3umjqXOIDVO1LXQ0bIHx8hzBdiubND2ezyxc,2946
@@ -165,9 +165,9 @@ autocoder/utils/types.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 autocoder/utils/auto_coder_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 autocoder/utils/auto_coder_utils/chat_stream_out.py,sha256=lkJ_A-sYU36JMzjFWkk3pR6uos8oZHYt9GPsPe_CPAo,11766
 autocoder/utils/chat_auto_coder_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-auto_coder-0.1.278.dist-info/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
-auto_coder-0.1.278.dist-info/METADATA,sha256=W8ANCnQ9mFraLjuXIUaggoO_jH5Yx-bmm4FNxHLfOvo,2643
-auto_coder-0.1.278.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
-auto_coder-0.1.278.dist-info/entry_points.txt,sha256=0nzHtHH4pNcM7xq4EBA2toS28Qelrvcbrr59GqD_0Ak,350
-auto_coder-0.1.278.dist-info/top_level.txt,sha256=Jqc0_uJSw2GwoFQAa9iJxYns-2mWla-9ok_Y3Gcznjk,10
-auto_coder-0.1.278.dist-info/RECORD,,
+auto_coder-0.1.279.dist-info/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
+auto_coder-0.1.279.dist-info/METADATA,sha256=ibeocSoPjMW2RjhN5DQq4eARnkV5AQDD5c0quH69t4M,2643
+auto_coder-0.1.279.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
+auto_coder-0.1.279.dist-info/entry_points.txt,sha256=0nzHtHH4pNcM7xq4EBA2toS28Qelrvcbrr59GqD_0Ak,350
+auto_coder-0.1.279.dist-info/top_level.txt,sha256=Jqc0_uJSw2GwoFQAa9iJxYns-2mWla-9ok_Y3Gcznjk,10
+auto_coder-0.1.279.dist-info/RECORD,,

autocoder/rag/api_server.py CHANGED Viewed

@@ -103,9 +103,7 @@ async def create_chat_completion(
     NOTE: Currently we do not support the following features:
         - function_call (Users should implement this by themselves)
         - logit_bias (to be supported by vLLM engine)
-    """
-    # async with async_timeout.timeout(TIMEOUT):
+    """
     generator = await openai_serving_chat.create_chat_completion(body, request)
     if isinstance(generator, ErrorResponse):
         return JSONResponse(

autocoder/rag/llm_wrapper.py CHANGED Viewed

@@ -1,61 +1,91 @@
-from typing import Any, Dict, List, Optional, Union,Callable
-from byzerllm.utils.client.types import (
+from typing import Any, Dict, List, Optional, Union, Callable
+from byzerllm.utils.client.types import (
     LLMFunctionCallResponse,
-    LLMClassResponse,LLMResponse
+    LLMClassResponse, LLMResponse
 )
 import pydantic
 from byzerllm import ByzerLLM
 from byzerllm.utils.client import LLMResponse
 from byzerllm.utils.types import SingleOutputMeta
 from autocoder.rag.simple_rag import SimpleRAG
+from autocoder.rag.long_context_rag import LongContextRAG
 from loguru import logger
 from byzerllm.utils.langutil import asyncfy_with_semaphore
 class LLWrapper:
-    def __init__(self,llm:ByzerLLM,rag:SimpleRAG):
+    def __init__(self, llm: ByzerLLM, rag: Union[SimpleRAG, LongContextRAG]):
         self.llm = llm
         self.rag = rag
     def chat_oai(self,
                  conversations,
-                 tools:List[Union[Callable,str]]=[],
-                 tool_choice:Optional[Union[Callable,str]]=None,
-                 execute_tool:bool=False,
-                 impl_func:Optional[Callable]=None,
-                 execute_impl_func:bool=False,
-                 impl_func_params:Optional[Dict[str,Any]]=None,
-                 func_params:Optional[Dict[str,Any]]=None,
-                 response_class:Optional[Union[pydantic.BaseModel,str]] = None,
-                 response_after_chat:Optional[Union[pydantic.BaseModel,str]] = False,
-                 enable_default_sys_message:bool=True,
-                 model:Optional[str] = None,
-                 role_mapping=None,llm_config:Dict[str,Any]={}
-                 )->Union[List[LLMResponse],List[LLMFunctionCallResponse],List[LLMClassResponse]]:
-        res,contexts = self.rag.stream_chat_oai(conversations,llm_config=llm_config)
-        s = "".join(res)
-        return [LLMResponse(output=s,metadata={},input="")]
-    def stream_chat_oai(self,conversations,
-                        model:Optional[str]=None,
-                        role_mapping=None,
-                        delta_mode=False,
-                        llm_config:Dict[str,Any]={}):
-        res,contexts = self.rag.stream_chat_oai(conversations,llm_config=llm_config)
-        for t in res:
-            yield (t,SingleOutputMeta(0,0))
-    async def async_stream_chat_oai(self,conversations,
-                        model:Optional[str]=None,
+                 tools: List[Union[Callable, str]] = [],
+                 tool_choice: Optional[Union[Callable, str]] = None,
+                 execute_tool: bool = False,
+                 impl_func: Optional[Callable] = None,
+                 execute_impl_func: bool = False,
+                 impl_func_params: Optional[Dict[str, Any]] = None,
+                 func_params: Optional[Dict[str, Any]] = None,
+                 response_class: Optional[Union[pydantic.BaseModel, str]] = None,
+                 response_after_chat: Optional[Union[pydantic.BaseModel, str]] = False,
+                 enable_default_sys_message: bool = True,
+                 model: Optional[str] = None,
+                 role_mapping=None,
+                 llm_config: Dict[str, Any] = {},
+                 only_return_prompt: bool = False,
+                 extra_request_params: Dict[str, Any] = {}
+                 ) -> Union[List[LLMResponse], List[LLMFunctionCallResponse], List[LLMClassResponse]]:
+        res, contexts = self.rag.stream_chat_oai(
+            conversations, llm_config=llm_config, extra_request_params=extra_request_params)
+        metadata = {"request_id":""}
+        output = ""
+        for chunk in res:
+            output += chunk[0]
+            metadata["input_tokens_count"] = chunk[1].input_tokens_count
+            metadata["generated_tokens_count"] = chunk[1].generated_tokens_count
+            metadata["reasoning_content"] = chunk[1].reasoning_content
+            metadata["finish_reason"] = chunk[1].finish_reason
+            metadata["first_token_time"] = chunk[1].first_token_time
+        return [LLMResponse(output=output, metadata=metadata, input="")]
+    def stream_chat_oai(self, conversations,
+                        model: Optional[str] = None,
                         role_mapping=None,
                         delta_mode=False,
-                        llm_config:Dict[str,Any]={}):
-        res,contexts = await asyncfy_with_semaphore(lambda: self.rag.stream_chat_oai(conversations,llm_config=llm_config))()
+                        llm_config: Dict[str, Any] = {},
+                        extra_request_params: Dict[str, Any] = {}
+                        ):
+        res, contexts = self.rag.stream_chat_oai(
+                conversations, llm_config=llm_config, extra_request_params=extra_request_params)
+        if isinstance(res, tuple):
+            for (t, metadata) in res:
+                yield (t, SingleOutputMeta(
+                    input_tokens_count=metadata.get("input_tokens_count", 0),
+                    generated_tokens_count=metadata.get(
+                        "generated_tokens_count", 0),
+                    reasoning_content=metadata.get("reasoning_content", ""),
+                    finish_reason=metadata.get("finish_reason", "stop"),
+                    first_token_time=metadata.get("first_token_time", 0)
+                ))
+        else:
+            for t in res:
+                yield (t, SingleOutputMeta(0, 0))
+    async def async_stream_chat_oai(self, conversations,
+                                    model: Optional[str] = None,
+                                    role_mapping=None,
+                                    delta_mode=False,
+                                    llm_config: Dict[str, Any] = {},
+                                    extra_request_params: Dict[str, Any] = {}
+                                    ):
+        res, contexts = await asyncfy_with_semaphore(lambda: self.rag.stream_chat_oai(conversations, llm_config=llm_config, extra_request_params=extra_request_params))()
         # res,contexts = await self.llm.async_stream_chat_oai(conversations,llm_config=llm_config)
-        for t in res:
-            yield (t,SingleOutputMeta(0,0))
+        for t in res:
+            yield t
-    def __getattr__(self, name):
-        return getattr(self.llm, name)
+    def __getattr__(self, name):
+        return getattr(self.llm, name)

autocoder/rag/long_context_rag.py CHANGED Viewed

@@ -33,6 +33,7 @@ from importlib.metadata import version
 from autocoder.rag.stream_event import event_writer
 from autocoder.rag.relevant_utils import DocFilterResult
 from pydantic import BaseModel
+from byzerllm.utils.types import SingleOutputMeta
 try:
     from autocoder_pro.rag.llm_compute import LLMComputeEngine
@@ -284,6 +285,7 @@ class LongContextRAG:
     def build(self):
         pass
     def search(self, query: str) -> List[SourceCode]:
         target_query = query
         only_contexts = False
@@ -344,6 +346,7 @@ class LongContextRAG:
         model: Optional[str] = None,
         role_mapping=None,
         llm_config: Dict[str, Any] = {},
+        extra_request_params: Dict[str, Any] = {}
     ):
         try:
             return self._stream_chat_oai(
@@ -351,11 +354,42 @@ class LongContextRAG:
                 model=model,
                 role_mapping=role_mapping,
                 llm_config=llm_config,
+                extra_request_params=extra_request_params
             )
         except Exception as e:
             logger.error(f"Error in stream_chat_oai: {str(e)}")
             traceback.print_exc()
             return ["出现错误，请稍后再试。"], []
+    def _stream_chatfrom_openai_sdk(self,response):
+        for chunk in response:
+            if hasattr(chunk, "usage") and chunk.usage:
+                input_tokens_count = chunk.usage.prompt_tokens
+                generated_tokens_count = chunk.usage.completion_tokens
+            else:
+                input_tokens_count = 0
+                generated_tokens_count = 0
+            if not chunk.choices:
+                if last_meta:
+                    yield ("", SingleOutputMeta(input_tokens_count=input_tokens_count,
+                                                generated_tokens_count=generated_tokens_count,
+                                                reasoning_content="",
+                                                finish_reason=last_meta.finish_reason))
+                continue
+            content = chunk.choices[0].delta.content or ""
+            reasoning_text = ""
+            if hasattr(chunk.choices[0].delta, "reasoning_content"):
+                reasoning_text = chunk.choices[0].delta.reasoning_content or ""
+            last_meta = SingleOutputMeta(input_tokens_count=input_tokens_count,
+                                            generated_tokens_count=generated_tokens_count,
+                                            reasoning_content=reasoning_text,
+                                            finish_reason=chunk.choices[0].finish_reason)
+            yield (content, last_meta)
     def _stream_chat_oai(
         self,
@@ -363,128 +397,136 @@ class LongContextRAG:
         model: Optional[str] = None,
         role_mapping=None,
         llm_config: Dict[str, Any] = {},
-    ):
+        extra_request_params: Dict[str, Any] = {}
+    ):
         if self.client:
             model = model or self.args.model
             response = self.client.chat.completions.create(
                 model=model,
                 messages=conversations,
                 stream=True,
-                max_tokens=self.args.rag_params_max_tokens
-            )
-            def response_generator():
-                for chunk in response:
-                    if chunk.choices[0].delta.content is not None:
-                        yield chunk.choices[0].delta.content
+                max_tokens=self.args.rag_params_max_tokens,
+                extra_body=extra_request_params
+            )
+            return self._stream_chatfrom_openai_sdk(response), []
-            return response_generator(), []
-        else:
-            target_llm = self.llm
-            if self.llm.get_sub_client("qa_model"):
-                target_llm = self.llm.get_sub_client("qa_model")
+        target_llm = self.llm
+        if self.llm.get_sub_client("qa_model"):
+            target_llm = self.llm.get_sub_client("qa_model")
-            query = conversations[-1]["content"]
-            context = []
+        query = conversations[-1]["content"]
+        context = []
-            if (
-                "使用四到五个字直接返回这句话的简要主题，不要解释、不要标点、不要语气词、不要多余文本，不要加粗，如果没有主题"
-                in query
-                or "简要总结一下对话内容，用作后续的上下文提示 prompt，控制在 200 字以内"
-                in query
-            ):
+        if (
+            "使用四到五个字直接返回这句话的简要主题，不要解释、不要标点、不要语气词、不要多余文本，不要加粗，如果没有主题"
+            in query
+            or "简要总结一下对话内容，用作后续的上下文提示 prompt，控制在 200 字以内"
+            in query
+        ):
-                chunks = target_llm.stream_chat_oai(
-                    conversations=conversations,
+            chunks = target_llm.stream_chat_oai(
+                conversations=conversations,
+                model=model,
+                role_mapping=role_mapping,
+                llm_config=llm_config,
+                delta_mode=True,
+                extra_request_params=extra_request_params
+            )
+            def generate_chunks():
+                for chunk in chunks:
+                    yield chunk
+            return generate_chunks(), context
+        try:
+            request_params = json.loads(query)
+            if "request_id" in request_params:
+                request_id = request_params["request_id"]
+                index = request_params["index"]
+                file_path = event_writer.get_event_file_path(request_id)
+                logger.info(f"Get events for request_id: {request_id} index: {index} file_path: {file_path}")
+                events = []
+                if not os.path.exists(file_path):
+                    return [],context
+                with open(file_path, "r") as f:
+                    for line in f:
+                        event = json.loads(line)
+                        if event["index"] >= index:
+                            events.append(event)
+                return [json.dumps({
+                    "events": [event for event in events],
+                },ensure_ascii=False)], context
+        except json.JSONDecodeError:
+            pass
+        if self.args.without_contexts and LLMComputeEngine is not None:
+            llm_compute_engine = LLMComputeEngine(
+                llm=target_llm,
+                inference_enhance=not self.args.disable_inference_enhance,
+                inference_deep_thought=self.args.inference_deep_thought,
+                inference_slow_without_deep_thought=self.args.inference_slow_without_deep_thought,
+                precision=self.args.inference_compute_precision,
+                data_cells_max_num=self.args.data_cells_max_num,
+            )
+            conversations = conversations[:-1]
+            new_conversations = llm_compute_engine.process_conversation(
+                conversations, query, []
+            )
+            chunks = llm_compute_engine.stream_chat_oai(
+                    conversations=new_conversations,
                     model=model,
                     role_mapping=role_mapping,
                     llm_config=llm_config,
                     delta_mode=True,
+                    extra_request_params=extra_request_params
                 )
-                return (chunk[0] for chunk in chunks), context
-            try:
-                request_params = json.loads(query)
-                if "request_id" in request_params:
-                    request_id = request_params["request_id"]
-                    index = request_params["index"]
-                    file_path = event_writer.get_event_file_path(request_id)
-                    logger.info(f"Get events for request_id: {request_id} index: {index} file_path: {file_path}")
-                    events = []
-                    if not os.path.exists(file_path):
-                        return [],context
-                    with open(file_path, "r") as f:
-                        for line in f:
-                            event = json.loads(line)
-                            if event["index"] >= index:
-                                events.append(event)
-                    return [json.dumps({
-                        "events": [event for event in events],
-                    },ensure_ascii=False)], context
-            except json.JSONDecodeError:
-                pass
-            if self.args.without_contexts and LLMComputeEngine is not None:
-                llm_compute_engine = LLMComputeEngine(
-                    llm=target_llm,
-                    inference_enhance=not self.args.disable_inference_enhance,
-                    inference_deep_thought=self.args.inference_deep_thought,
-                    inference_slow_without_deep_thought=self.args.inference_slow_without_deep_thought,
-                    precision=self.args.inference_compute_precision,
-                    data_cells_max_num=self.args.data_cells_max_num,
-                )
-                conversations = conversations[:-1]
-                new_conversations = llm_compute_engine.process_conversation(
-                    conversations, query, []
-                )
-                return (
-                    llm_compute_engine.stream_chat_oai(
-                        conversations=new_conversations,
-                        model=model,
-                        role_mapping=role_mapping,
-                        llm_config=llm_config,
-                        delta_mode=True,
-                    ),
-                    context,
-                )
+            def generate_chunks():
+                for chunk in chunks:
+                    yield chunk
+            return (
+                generate_chunks(),
+                context,
+            )
-            only_contexts = False
-            try:
-                v = json.loads(query)
-                if "only_contexts" in v:
-                    query = v["query"]
-                    only_contexts = v["only_contexts"]
-                    conversations[-1]["content"] = query
-            except json.JSONDecodeError:
-                pass
-            logger.info(f"Query: {query} only_contexts: {only_contexts}")
-            start_time = time.time()
+        only_contexts = False
+        try:
+            v = json.loads(query)
+            if "only_contexts" in v:
+                query = v["query"]
+                only_contexts = v["only_contexts"]
+                conversations[-1]["content"] = query
+        except json.JSONDecodeError:
+            pass
+        logger.info(f"Query: {query} only_contexts: {only_contexts}")
+        start_time = time.time()
-            rag_stat = RAGStat(
-                recall_stat=RecallStat(
-                    total_input_tokens=0,
-                    total_generated_tokens=0,
-                    model_name=self.recall_llm.default_model_name,
-                ),
-                chunk_stat=ChunkStat(
-                    total_input_tokens=0,
-                    total_generated_tokens=0,
-                    model_name=self.chunk_llm.default_model_name,
-                ),
-                answer_stat=AnswerStat(
-                    total_input_tokens=0,
-                    total_generated_tokens=0,
-                    model_name=self.qa_llm.default_model_name,
-                ),
-            )
+        rag_stat = RAGStat(
+            recall_stat=RecallStat(
+                total_input_tokens=0,
+                total_generated_tokens=0,
+                model_name=self.recall_llm.default_model_name,
+            ),
+            chunk_stat=ChunkStat(
+                total_input_tokens=0,
+                total_generated_tokens=0,
+                model_name=self.chunk_llm.default_model_name,
+            ),
+            answer_stat=AnswerStat(
+                total_input_tokens=0,
+                total_generated_tokens=0,
+                model_name=self.qa_llm.default_model_name,
+            ),
+        )
+        context = []
+        def generate_sream():
+            nonlocal context
             doc_filter_result = self._filter_docs(conversations)
             rag_stat.recall_stat.total_input_tokens += sum(doc_filter_result.input_tokens_counts)
@@ -659,42 +701,41 @@ class LongContextRAG:
                         llm_config=llm_config,
                         delta_mode=True,
                     )
+                for chunk in chunks:
+                    yield chunk
+                    if chunk[1] is not None:
+                        rag_stat.answer_stat.total_input_tokens += chunk[1].input_tokens_count
+                        rag_stat.answer_stat.total_generated_tokens += chunk[1].generated_tokens_count
+                self._print_rag_stats(rag_stat)
+            else:
+                new_conversations = conversations[:-1] + [
+                    {
+                        "role": "user",
+                        "content": self._answer_question.prompt(
+                            query=query,
+                            relevant_docs=[doc.source_code for doc in relevant_docs],
+                        ),
+                    }
+                ]
-                def generate_chunks():
-                    for chunk in chunks:
-                        yield chunk[0]
-                        if chunk[1] is not None:
-                            rag_stat.answer_stat.total_input_tokens += chunk[1].input_tokens_count
-                            rag_stat.answer_stat.total_generated_tokens += chunk[1].generated_tokens_count
-                    self._print_rag_stats(rag_stat)
-                return generate_chunks(), context
-            new_conversations = conversations[:-1] + [
-                {
-                    "role": "user",
-                    "content": self._answer_question.prompt(
-                        query=query,
-                        relevant_docs=[doc.source_code for doc in relevant_docs],
-                    ),
-                }
-            ]
-            chunks = target_llm.stream_chat_oai(
-                conversations=new_conversations,
-                model=model,
-                role_mapping=role_mapping,
-                llm_config=llm_config,
-                delta_mode=True,
-            )
-            def generate_chunks():
+                chunks = target_llm.stream_chat_oai(
+                    conversations=new_conversations,
+                    model=model,
+                    role_mapping=role_mapping,
+                    llm_config=llm_config,
+                    delta_mode=True,
+                    extra_request_params=extra_request_params
+                )
                 for chunk in chunks:
-                    yield chunk[0]
+                    yield chunk
                     if chunk[1] is not None:
                         rag_stat.answer_stat.total_input_tokens += chunk[1].input_tokens_count
                         rag_stat.answer_stat.total_generated_tokens += chunk[1].generated_tokens_count
                 self._print_rag_stats(rag_stat)
-            return generate_chunks(), context
+        return generate_sream(),context

autocoder/version.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.1.~~278~~"
1	+ __version__ = "0.1.279"

{auto_coder-0.1.278.dist-info → auto_coder-0.1.279.dist-info}/LICENSE RENAMED Viewed

File without changes

{auto_coder-0.1.278.dist-info → auto_coder-0.1.279.dist-info}/WHEEL RENAMED Viewed

File without changes

{auto_coder-0.1.278.dist-info → auto_coder-0.1.279.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{auto_coder-0.1.278.dist-info → auto_coder-0.1.279.dist-info}/top_level.txt RENAMED Viewed

File without changes

auto-coder 0.1.278__py3-none-any.whl → 0.1.279__py3-none-any.whl

Potentially problematic release.

auto-coder 0.1.278py3-none-any.whl → 0.1.279py3-none-any.whl