auto-coder 0.1.278__py3-none-any.whl → 0.1.279__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of auto-coder might be problematic. Click here for more details.
- {auto_coder-0.1.278.dist-info → auto_coder-0.1.279.dist-info}/METADATA +2 -2
- {auto_coder-0.1.278.dist-info → auto_coder-0.1.279.dist-info}/RECORD +10 -10
- autocoder/rag/api_server.py +1 -3
- autocoder/rag/llm_wrapper.py +70 -40
- autocoder/rag/long_context_rag.py +172 -131
- autocoder/version.py +1 -1
- {auto_coder-0.1.278.dist-info → auto_coder-0.1.279.dist-info}/LICENSE +0 -0
- {auto_coder-0.1.278.dist-info → auto_coder-0.1.279.dist-info}/WHEEL +0 -0
- {auto_coder-0.1.278.dist-info → auto_coder-0.1.279.dist-info}/entry_points.txt +0 -0
- {auto_coder-0.1.278.dist-info → auto_coder-0.1.279.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: auto-coder
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.279
|
|
4
4
|
Summary: AutoCoder: AutoCoder
|
|
5
5
|
Author: allwefantasy
|
|
6
6
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
@@ -26,7 +26,7 @@ Requires-Dist: tabulate
|
|
|
26
26
|
Requires-Dist: jupyter-client
|
|
27
27
|
Requires-Dist: prompt-toolkit
|
|
28
28
|
Requires-Dist: tokenizers
|
|
29
|
-
Requires-Dist: byzerllm[saas] >=0.1.
|
|
29
|
+
Requires-Dist: byzerllm[saas] >=0.1.170
|
|
30
30
|
Requires-Dist: patch
|
|
31
31
|
Requires-Dist: diff-match-patch
|
|
32
32
|
Requires-Dist: GitPython
|
|
@@ -12,7 +12,7 @@ autocoder/chat_auto_coder_lang.py,sha256=ShOQVOnMA-WlT-fB9OrOer-xQkbcWxJGl-WMPuZ
|
|
|
12
12
|
autocoder/command_args.py,sha256=9aYJ-AmPxP1sQh6ciw04FWHjSn31f2W9afXFwo8wgx4,30441
|
|
13
13
|
autocoder/lang.py,sha256=U6AjVV8Rs1uLyjFCZ8sT6WWuNUxMBqkXXIOs4S120uk,14511
|
|
14
14
|
autocoder/models.py,sha256=PlG1tKHSHwB57cKLOl5gTl5yTzFUDzCgeHPJU3N9F6Q,9106
|
|
15
|
-
autocoder/version.py,sha256=
|
|
15
|
+
autocoder/version.py,sha256=bIKEpQ3tDi5heVrpw16rPnkr9ExeYcOXIfk6sw424Tc,23
|
|
16
16
|
autocoder/agent/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
17
17
|
autocoder/agent/auto_demand_organizer.py,sha256=NWSAEsEk94vT3lGjfo25kKLMwYdPcpy9e-i21txPasQ,6942
|
|
18
18
|
autocoder/agent/auto_filegroup.py,sha256=CW7bqp0FW1GIEMnl-blyAc2UGT7O9Mom0q66ITz1ckM,6635
|
|
@@ -107,11 +107,11 @@ autocoder/privacy/__init__.py,sha256=LnIVvGu_K66zCE-yhN_-dPO8R80pQyedCsXJ7wRqQaI
|
|
|
107
107
|
autocoder/privacy/model_filter.py,sha256=-N9ZvxxDKpxU7hkn-tKv-QHyXjvkCopUaKgvJwTOGQs,3369
|
|
108
108
|
autocoder/pyproject/__init__.py,sha256=ms-A_pocgGv0oZPEW8JAdXi7G-VSVhkQ6CnWFe535Ec,14477
|
|
109
109
|
autocoder/rag/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
110
|
-
autocoder/rag/api_server.py,sha256=
|
|
110
|
+
autocoder/rag/api_server.py,sha256=xiypCkdbclY0Z3Cmq5FTvtKrfQUV7yKcDaFFUttA2n0,7242
|
|
111
111
|
autocoder/rag/doc_filter.py,sha256=yEXaBw1XJH57Gtvk4-RFQtd5eawA6SBjzxeRZrIsQew,11623
|
|
112
112
|
autocoder/rag/document_retriever.py,sha256=5oThtxukGuRFF96o3pHKsk306a8diXbhgSrbqyU2BvM,8894
|
|
113
|
-
autocoder/rag/llm_wrapper.py,sha256=
|
|
114
|
-
autocoder/rag/long_context_rag.py,sha256=
|
|
113
|
+
autocoder/rag/llm_wrapper.py,sha256=Ht5GF5yJtrztoliujsZzx_ooWZmHkd5xLZKcGEiicZw,4303
|
|
114
|
+
autocoder/rag/long_context_rag.py,sha256=nZXADsbaiOQYIGiZvEgokMOSjmjuOCA6xkd3LqGnC7o,33658
|
|
115
115
|
autocoder/rag/rag_config.py,sha256=8LwFcTd8OJWWwi1_WY4IzjqgtT6RyE2j4PjxS5cCTDE,802
|
|
116
116
|
autocoder/rag/rag_entry.py,sha256=6TKtErZ0Us9XSV6HgRKXA6yR3SiZGPHpynOKSaR1wgE,2463
|
|
117
117
|
autocoder/rag/raw_rag.py,sha256=BOr0YGf3umjqXOIDVO1LXQ0bIHx8hzBdiubND2ezyxc,2946
|
|
@@ -165,9 +165,9 @@ autocoder/utils/types.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
|
165
165
|
autocoder/utils/auto_coder_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
166
166
|
autocoder/utils/auto_coder_utils/chat_stream_out.py,sha256=lkJ_A-sYU36JMzjFWkk3pR6uos8oZHYt9GPsPe_CPAo,11766
|
|
167
167
|
autocoder/utils/chat_auto_coder_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
168
|
-
auto_coder-0.1.
|
|
169
|
-
auto_coder-0.1.
|
|
170
|
-
auto_coder-0.1.
|
|
171
|
-
auto_coder-0.1.
|
|
172
|
-
auto_coder-0.1.
|
|
173
|
-
auto_coder-0.1.
|
|
168
|
+
auto_coder-0.1.279.dist-info/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
|
|
169
|
+
auto_coder-0.1.279.dist-info/METADATA,sha256=ibeocSoPjMW2RjhN5DQq4eARnkV5AQDD5c0quH69t4M,2643
|
|
170
|
+
auto_coder-0.1.279.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
|
|
171
|
+
auto_coder-0.1.279.dist-info/entry_points.txt,sha256=0nzHtHH4pNcM7xq4EBA2toS28Qelrvcbrr59GqD_0Ak,350
|
|
172
|
+
auto_coder-0.1.279.dist-info/top_level.txt,sha256=Jqc0_uJSw2GwoFQAa9iJxYns-2mWla-9ok_Y3Gcznjk,10
|
|
173
|
+
auto_coder-0.1.279.dist-info/RECORD,,
|
autocoder/rag/api_server.py
CHANGED
|
@@ -103,9 +103,7 @@ async def create_chat_completion(
|
|
|
103
103
|
NOTE: Currently we do not support the following features:
|
|
104
104
|
- function_call (Users should implement this by themselves)
|
|
105
105
|
- logit_bias (to be supported by vLLM engine)
|
|
106
|
-
"""
|
|
107
|
-
# async with async_timeout.timeout(TIMEOUT):
|
|
108
|
-
|
|
106
|
+
"""
|
|
109
107
|
generator = await openai_serving_chat.create_chat_completion(body, request)
|
|
110
108
|
if isinstance(generator, ErrorResponse):
|
|
111
109
|
return JSONResponse(
|
autocoder/rag/llm_wrapper.py
CHANGED
|
@@ -1,61 +1,91 @@
|
|
|
1
|
-
from typing import Any, Dict, List, Optional, Union,Callable
|
|
2
|
-
from byzerllm.utils.client.types import (
|
|
1
|
+
from typing import Any, Dict, List, Optional, Union, Callable
|
|
2
|
+
from byzerllm.utils.client.types import (
|
|
3
3
|
LLMFunctionCallResponse,
|
|
4
|
-
LLMClassResponse,LLMResponse
|
|
4
|
+
LLMClassResponse, LLMResponse
|
|
5
5
|
)
|
|
6
6
|
import pydantic
|
|
7
7
|
from byzerllm import ByzerLLM
|
|
8
8
|
from byzerllm.utils.client import LLMResponse
|
|
9
9
|
from byzerllm.utils.types import SingleOutputMeta
|
|
10
10
|
from autocoder.rag.simple_rag import SimpleRAG
|
|
11
|
+
from autocoder.rag.long_context_rag import LongContextRAG
|
|
11
12
|
from loguru import logger
|
|
12
13
|
from byzerllm.utils.langutil import asyncfy_with_semaphore
|
|
13
14
|
|
|
15
|
+
|
|
14
16
|
class LLWrapper:
|
|
15
17
|
|
|
16
|
-
def __init__(self,llm:ByzerLLM,rag:SimpleRAG):
|
|
18
|
+
def __init__(self, llm: ByzerLLM, rag: Union[SimpleRAG, LongContextRAG]):
|
|
17
19
|
self.llm = llm
|
|
18
20
|
self.rag = rag
|
|
19
21
|
|
|
20
22
|
def chat_oai(self,
|
|
21
23
|
conversations,
|
|
22
|
-
tools:List[Union[Callable,str]]=[],
|
|
23
|
-
tool_choice:Optional[Union[Callable,str]]=None,
|
|
24
|
-
execute_tool:bool=False,
|
|
25
|
-
impl_func:Optional[Callable]=None,
|
|
26
|
-
execute_impl_func:bool=False,
|
|
27
|
-
impl_func_params:Optional[Dict[str,Any]]=None,
|
|
28
|
-
func_params:Optional[Dict[str,Any]]=None,
|
|
29
|
-
response_class:Optional[Union[pydantic.BaseModel,str]] = None,
|
|
30
|
-
response_after_chat:Optional[Union[pydantic.BaseModel,str]] = False,
|
|
31
|
-
enable_default_sys_message:bool=True,
|
|
32
|
-
model:Optional[str] = None,
|
|
33
|
-
role_mapping=None,
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
24
|
+
tools: List[Union[Callable, str]] = [],
|
|
25
|
+
tool_choice: Optional[Union[Callable, str]] = None,
|
|
26
|
+
execute_tool: bool = False,
|
|
27
|
+
impl_func: Optional[Callable] = None,
|
|
28
|
+
execute_impl_func: bool = False,
|
|
29
|
+
impl_func_params: Optional[Dict[str, Any]] = None,
|
|
30
|
+
func_params: Optional[Dict[str, Any]] = None,
|
|
31
|
+
response_class: Optional[Union[pydantic.BaseModel, str]] = None,
|
|
32
|
+
response_after_chat: Optional[Union[pydantic.BaseModel, str]] = False,
|
|
33
|
+
enable_default_sys_message: bool = True,
|
|
34
|
+
model: Optional[str] = None,
|
|
35
|
+
role_mapping=None,
|
|
36
|
+
llm_config: Dict[str, Any] = {},
|
|
37
|
+
only_return_prompt: bool = False,
|
|
38
|
+
extra_request_params: Dict[str, Any] = {}
|
|
39
|
+
) -> Union[List[LLMResponse], List[LLMFunctionCallResponse], List[LLMClassResponse]]:
|
|
40
|
+
res, contexts = self.rag.stream_chat_oai(
|
|
41
|
+
conversations, llm_config=llm_config, extra_request_params=extra_request_params)
|
|
42
|
+
metadata = {"request_id":""}
|
|
43
|
+
output = ""
|
|
44
|
+
for chunk in res:
|
|
45
|
+
output += chunk[0]
|
|
46
|
+
metadata["input_tokens_count"] = chunk[1].input_tokens_count
|
|
47
|
+
metadata["generated_tokens_count"] = chunk[1].generated_tokens_count
|
|
48
|
+
metadata["reasoning_content"] = chunk[1].reasoning_content
|
|
49
|
+
metadata["finish_reason"] = chunk[1].finish_reason
|
|
50
|
+
metadata["first_token_time"] = chunk[1].first_token_time
|
|
51
|
+
|
|
52
|
+
return [LLMResponse(output=output, metadata=metadata, input="")]
|
|
53
|
+
|
|
54
|
+
def stream_chat_oai(self, conversations,
|
|
55
|
+
model: Optional[str] = None,
|
|
51
56
|
role_mapping=None,
|
|
52
57
|
delta_mode=False,
|
|
53
|
-
llm_config:Dict[str,Any]={}
|
|
54
|
-
|
|
58
|
+
llm_config: Dict[str, Any] = {},
|
|
59
|
+
extra_request_params: Dict[str, Any] = {}
|
|
60
|
+
):
|
|
61
|
+
res, contexts = self.rag.stream_chat_oai(
|
|
62
|
+
conversations, llm_config=llm_config, extra_request_params=extra_request_params)
|
|
63
|
+
|
|
64
|
+
if isinstance(res, tuple):
|
|
65
|
+
for (t, metadata) in res:
|
|
66
|
+
yield (t, SingleOutputMeta(
|
|
67
|
+
input_tokens_count=metadata.get("input_tokens_count", 0),
|
|
68
|
+
generated_tokens_count=metadata.get(
|
|
69
|
+
"generated_tokens_count", 0),
|
|
70
|
+
reasoning_content=metadata.get("reasoning_content", ""),
|
|
71
|
+
finish_reason=metadata.get("finish_reason", "stop"),
|
|
72
|
+
first_token_time=metadata.get("first_token_time", 0)
|
|
73
|
+
))
|
|
74
|
+
else:
|
|
75
|
+
for t in res:
|
|
76
|
+
yield (t, SingleOutputMeta(0, 0))
|
|
77
|
+
|
|
78
|
+
async def async_stream_chat_oai(self, conversations,
|
|
79
|
+
model: Optional[str] = None,
|
|
80
|
+
role_mapping=None,
|
|
81
|
+
delta_mode=False,
|
|
82
|
+
llm_config: Dict[str, Any] = {},
|
|
83
|
+
extra_request_params: Dict[str, Any] = {}
|
|
84
|
+
):
|
|
85
|
+
res, contexts = await asyncfy_with_semaphore(lambda: self.rag.stream_chat_oai(conversations, llm_config=llm_config, extra_request_params=extra_request_params))()
|
|
55
86
|
# res,contexts = await self.llm.async_stream_chat_oai(conversations,llm_config=llm_config)
|
|
56
|
-
for t in res:
|
|
57
|
-
yield
|
|
58
|
-
|
|
87
|
+
for t in res:
|
|
88
|
+
yield t
|
|
59
89
|
|
|
60
|
-
def __getattr__(self, name):
|
|
61
|
-
return getattr(self.llm, name)
|
|
90
|
+
def __getattr__(self, name):
|
|
91
|
+
return getattr(self.llm, name)
|
|
@@ -33,6 +33,7 @@ from importlib.metadata import version
|
|
|
33
33
|
from autocoder.rag.stream_event import event_writer
|
|
34
34
|
from autocoder.rag.relevant_utils import DocFilterResult
|
|
35
35
|
from pydantic import BaseModel
|
|
36
|
+
from byzerllm.utils.types import SingleOutputMeta
|
|
36
37
|
|
|
37
38
|
try:
|
|
38
39
|
from autocoder_pro.rag.llm_compute import LLMComputeEngine
|
|
@@ -284,6 +285,7 @@ class LongContextRAG:
|
|
|
284
285
|
def build(self):
|
|
285
286
|
pass
|
|
286
287
|
|
|
288
|
+
|
|
287
289
|
def search(self, query: str) -> List[SourceCode]:
|
|
288
290
|
target_query = query
|
|
289
291
|
only_contexts = False
|
|
@@ -344,6 +346,7 @@ class LongContextRAG:
|
|
|
344
346
|
model: Optional[str] = None,
|
|
345
347
|
role_mapping=None,
|
|
346
348
|
llm_config: Dict[str, Any] = {},
|
|
349
|
+
extra_request_params: Dict[str, Any] = {}
|
|
347
350
|
):
|
|
348
351
|
try:
|
|
349
352
|
return self._stream_chat_oai(
|
|
@@ -351,11 +354,42 @@ class LongContextRAG:
|
|
|
351
354
|
model=model,
|
|
352
355
|
role_mapping=role_mapping,
|
|
353
356
|
llm_config=llm_config,
|
|
357
|
+
extra_request_params=extra_request_params
|
|
354
358
|
)
|
|
355
359
|
except Exception as e:
|
|
356
360
|
logger.error(f"Error in stream_chat_oai: {str(e)}")
|
|
357
361
|
traceback.print_exc()
|
|
358
362
|
return ["出现错误,请稍后再试。"], []
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
def _stream_chatfrom_openai_sdk(self,response):
|
|
366
|
+
for chunk in response:
|
|
367
|
+
if hasattr(chunk, "usage") and chunk.usage:
|
|
368
|
+
input_tokens_count = chunk.usage.prompt_tokens
|
|
369
|
+
generated_tokens_count = chunk.usage.completion_tokens
|
|
370
|
+
else:
|
|
371
|
+
input_tokens_count = 0
|
|
372
|
+
generated_tokens_count = 0
|
|
373
|
+
|
|
374
|
+
if not chunk.choices:
|
|
375
|
+
if last_meta:
|
|
376
|
+
yield ("", SingleOutputMeta(input_tokens_count=input_tokens_count,
|
|
377
|
+
generated_tokens_count=generated_tokens_count,
|
|
378
|
+
reasoning_content="",
|
|
379
|
+
finish_reason=last_meta.finish_reason))
|
|
380
|
+
continue
|
|
381
|
+
|
|
382
|
+
content = chunk.choices[0].delta.content or ""
|
|
383
|
+
|
|
384
|
+
reasoning_text = ""
|
|
385
|
+
if hasattr(chunk.choices[0].delta, "reasoning_content"):
|
|
386
|
+
reasoning_text = chunk.choices[0].delta.reasoning_content or ""
|
|
387
|
+
|
|
388
|
+
last_meta = SingleOutputMeta(input_tokens_count=input_tokens_count,
|
|
389
|
+
generated_tokens_count=generated_tokens_count,
|
|
390
|
+
reasoning_content=reasoning_text,
|
|
391
|
+
finish_reason=chunk.choices[0].finish_reason)
|
|
392
|
+
yield (content, last_meta)
|
|
359
393
|
|
|
360
394
|
def _stream_chat_oai(
|
|
361
395
|
self,
|
|
@@ -363,128 +397,136 @@ class LongContextRAG:
|
|
|
363
397
|
model: Optional[str] = None,
|
|
364
398
|
role_mapping=None,
|
|
365
399
|
llm_config: Dict[str, Any] = {},
|
|
366
|
-
|
|
400
|
+
extra_request_params: Dict[str, Any] = {}
|
|
401
|
+
):
|
|
367
402
|
if self.client:
|
|
368
403
|
model = model or self.args.model
|
|
369
404
|
response = self.client.chat.completions.create(
|
|
370
405
|
model=model,
|
|
371
406
|
messages=conversations,
|
|
372
407
|
stream=True,
|
|
373
|
-
max_tokens=self.args.rag_params_max_tokens
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
for chunk in response:
|
|
378
|
-
if chunk.choices[0].delta.content is not None:
|
|
379
|
-
yield chunk.choices[0].delta.content
|
|
408
|
+
max_tokens=self.args.rag_params_max_tokens,
|
|
409
|
+
extra_body=extra_request_params
|
|
410
|
+
)
|
|
411
|
+
return self._stream_chatfrom_openai_sdk(response), []
|
|
380
412
|
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
target_llm = self.llm
|
|
385
|
-
if self.llm.get_sub_client("qa_model"):
|
|
386
|
-
target_llm = self.llm.get_sub_client("qa_model")
|
|
413
|
+
target_llm = self.llm
|
|
414
|
+
if self.llm.get_sub_client("qa_model"):
|
|
415
|
+
target_llm = self.llm.get_sub_client("qa_model")
|
|
387
416
|
|
|
388
|
-
|
|
389
|
-
|
|
417
|
+
query = conversations[-1]["content"]
|
|
418
|
+
context = []
|
|
390
419
|
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
420
|
+
if (
|
|
421
|
+
"使用四到五个字直接返回这句话的简要主题,不要解释、不要标点、不要语气词、不要多余文本,不要加粗,如果没有主题"
|
|
422
|
+
in query
|
|
423
|
+
or "简要总结一下对话内容,用作后续的上下文提示 prompt,控制在 200 字以内"
|
|
424
|
+
in query
|
|
425
|
+
):
|
|
397
426
|
|
|
398
|
-
|
|
399
|
-
|
|
427
|
+
chunks = target_llm.stream_chat_oai(
|
|
428
|
+
conversations=conversations,
|
|
429
|
+
model=model,
|
|
430
|
+
role_mapping=role_mapping,
|
|
431
|
+
llm_config=llm_config,
|
|
432
|
+
delta_mode=True,
|
|
433
|
+
extra_request_params=extra_request_params
|
|
434
|
+
)
|
|
435
|
+
def generate_chunks():
|
|
436
|
+
for chunk in chunks:
|
|
437
|
+
yield chunk
|
|
438
|
+
return generate_chunks(), context
|
|
439
|
+
|
|
440
|
+
try:
|
|
441
|
+
request_params = json.loads(query)
|
|
442
|
+
if "request_id" in request_params:
|
|
443
|
+
request_id = request_params["request_id"]
|
|
444
|
+
index = request_params["index"]
|
|
445
|
+
|
|
446
|
+
file_path = event_writer.get_event_file_path(request_id)
|
|
447
|
+
logger.info(f"Get events for request_id: {request_id} index: {index} file_path: {file_path}")
|
|
448
|
+
events = []
|
|
449
|
+
if not os.path.exists(file_path):
|
|
450
|
+
return [],context
|
|
451
|
+
|
|
452
|
+
with open(file_path, "r") as f:
|
|
453
|
+
for line in f:
|
|
454
|
+
event = json.loads(line)
|
|
455
|
+
if event["index"] >= index:
|
|
456
|
+
events.append(event)
|
|
457
|
+
return [json.dumps({
|
|
458
|
+
"events": [event for event in events],
|
|
459
|
+
},ensure_ascii=False)], context
|
|
460
|
+
except json.JSONDecodeError:
|
|
461
|
+
pass
|
|
462
|
+
|
|
463
|
+
if self.args.without_contexts and LLMComputeEngine is not None:
|
|
464
|
+
llm_compute_engine = LLMComputeEngine(
|
|
465
|
+
llm=target_llm,
|
|
466
|
+
inference_enhance=not self.args.disable_inference_enhance,
|
|
467
|
+
inference_deep_thought=self.args.inference_deep_thought,
|
|
468
|
+
inference_slow_without_deep_thought=self.args.inference_slow_without_deep_thought,
|
|
469
|
+
precision=self.args.inference_compute_precision,
|
|
470
|
+
data_cells_max_num=self.args.data_cells_max_num,
|
|
471
|
+
)
|
|
472
|
+
conversations = conversations[:-1]
|
|
473
|
+
new_conversations = llm_compute_engine.process_conversation(
|
|
474
|
+
conversations, query, []
|
|
475
|
+
)
|
|
476
|
+
chunks = llm_compute_engine.stream_chat_oai(
|
|
477
|
+
conversations=new_conversations,
|
|
400
478
|
model=model,
|
|
401
479
|
role_mapping=role_mapping,
|
|
402
480
|
llm_config=llm_config,
|
|
403
481
|
delta_mode=True,
|
|
482
|
+
extra_request_params=extra_request_params
|
|
404
483
|
)
|
|
405
|
-
return (chunk[0] for chunk in chunks), context
|
|
406
484
|
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
request_id = request_params["request_id"]
|
|
411
|
-
index = request_params["index"]
|
|
412
|
-
|
|
413
|
-
file_path = event_writer.get_event_file_path(request_id)
|
|
414
|
-
logger.info(f"Get events for request_id: {request_id} index: {index} file_path: {file_path}")
|
|
415
|
-
events = []
|
|
416
|
-
if not os.path.exists(file_path):
|
|
417
|
-
return [],context
|
|
418
|
-
|
|
419
|
-
with open(file_path, "r") as f:
|
|
420
|
-
for line in f:
|
|
421
|
-
event = json.loads(line)
|
|
422
|
-
if event["index"] >= index:
|
|
423
|
-
events.append(event)
|
|
424
|
-
return [json.dumps({
|
|
425
|
-
"events": [event for event in events],
|
|
426
|
-
},ensure_ascii=False)], context
|
|
427
|
-
except json.JSONDecodeError:
|
|
428
|
-
pass
|
|
429
|
-
|
|
430
|
-
if self.args.without_contexts and LLMComputeEngine is not None:
|
|
431
|
-
llm_compute_engine = LLMComputeEngine(
|
|
432
|
-
llm=target_llm,
|
|
433
|
-
inference_enhance=not self.args.disable_inference_enhance,
|
|
434
|
-
inference_deep_thought=self.args.inference_deep_thought,
|
|
435
|
-
inference_slow_without_deep_thought=self.args.inference_slow_without_deep_thought,
|
|
436
|
-
precision=self.args.inference_compute_precision,
|
|
437
|
-
data_cells_max_num=self.args.data_cells_max_num,
|
|
438
|
-
)
|
|
439
|
-
conversations = conversations[:-1]
|
|
440
|
-
new_conversations = llm_compute_engine.process_conversation(
|
|
441
|
-
conversations, query, []
|
|
442
|
-
)
|
|
443
|
-
|
|
444
|
-
return (
|
|
445
|
-
llm_compute_engine.stream_chat_oai(
|
|
446
|
-
conversations=new_conversations,
|
|
447
|
-
model=model,
|
|
448
|
-
role_mapping=role_mapping,
|
|
449
|
-
llm_config=llm_config,
|
|
450
|
-
delta_mode=True,
|
|
451
|
-
),
|
|
452
|
-
context,
|
|
453
|
-
)
|
|
485
|
+
def generate_chunks():
|
|
486
|
+
for chunk in chunks:
|
|
487
|
+
yield chunk
|
|
454
488
|
|
|
489
|
+
return (
|
|
490
|
+
generate_chunks(),
|
|
491
|
+
context,
|
|
492
|
+
)
|
|
455
493
|
|
|
456
|
-
only_contexts = False
|
|
457
|
-
try:
|
|
458
|
-
v = json.loads(query)
|
|
459
|
-
if "only_contexts" in v:
|
|
460
|
-
query = v["query"]
|
|
461
|
-
only_contexts = v["only_contexts"]
|
|
462
|
-
conversations[-1]["content"] = query
|
|
463
|
-
except json.JSONDecodeError:
|
|
464
|
-
pass
|
|
465
494
|
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
495
|
+
only_contexts = False
|
|
496
|
+
try:
|
|
497
|
+
v = json.loads(query)
|
|
498
|
+
if "only_contexts" in v:
|
|
499
|
+
query = v["query"]
|
|
500
|
+
only_contexts = v["only_contexts"]
|
|
501
|
+
conversations[-1]["content"] = query
|
|
502
|
+
except json.JSONDecodeError:
|
|
503
|
+
pass
|
|
504
|
+
|
|
505
|
+
logger.info(f"Query: {query} only_contexts: {only_contexts}")
|
|
506
|
+
start_time = time.time()
|
|
507
|
+
|
|
469
508
|
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
509
|
+
rag_stat = RAGStat(
|
|
510
|
+
recall_stat=RecallStat(
|
|
511
|
+
total_input_tokens=0,
|
|
512
|
+
total_generated_tokens=0,
|
|
513
|
+
model_name=self.recall_llm.default_model_name,
|
|
514
|
+
),
|
|
515
|
+
chunk_stat=ChunkStat(
|
|
516
|
+
total_input_tokens=0,
|
|
517
|
+
total_generated_tokens=0,
|
|
518
|
+
model_name=self.chunk_llm.default_model_name,
|
|
519
|
+
),
|
|
520
|
+
answer_stat=AnswerStat(
|
|
521
|
+
total_input_tokens=0,
|
|
522
|
+
total_generated_tokens=0,
|
|
523
|
+
model_name=self.qa_llm.default_model_name,
|
|
524
|
+
),
|
|
525
|
+
)
|
|
487
526
|
|
|
527
|
+
context = []
|
|
528
|
+
def generate_sream():
|
|
529
|
+
nonlocal context
|
|
488
530
|
doc_filter_result = self._filter_docs(conversations)
|
|
489
531
|
|
|
490
532
|
rag_stat.recall_stat.total_input_tokens += sum(doc_filter_result.input_tokens_counts)
|
|
@@ -659,42 +701,41 @@ class LongContextRAG:
|
|
|
659
701
|
llm_config=llm_config,
|
|
660
702
|
delta_mode=True,
|
|
661
703
|
)
|
|
704
|
+
|
|
705
|
+
for chunk in chunks:
|
|
706
|
+
yield chunk
|
|
707
|
+
if chunk[1] is not None:
|
|
708
|
+
rag_stat.answer_stat.total_input_tokens += chunk[1].input_tokens_count
|
|
709
|
+
rag_stat.answer_stat.total_generated_tokens += chunk[1].generated_tokens_count
|
|
710
|
+
self._print_rag_stats(rag_stat)
|
|
711
|
+
else:
|
|
712
|
+
new_conversations = conversations[:-1] + [
|
|
713
|
+
{
|
|
714
|
+
"role": "user",
|
|
715
|
+
"content": self._answer_question.prompt(
|
|
716
|
+
query=query,
|
|
717
|
+
relevant_docs=[doc.source_code for doc in relevant_docs],
|
|
718
|
+
),
|
|
719
|
+
}
|
|
720
|
+
]
|
|
662
721
|
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
new_conversations = conversations[:-1] + [
|
|
673
|
-
{
|
|
674
|
-
"role": "user",
|
|
675
|
-
"content": self._answer_question.prompt(
|
|
676
|
-
query=query,
|
|
677
|
-
relevant_docs=[doc.source_code for doc in relevant_docs],
|
|
678
|
-
),
|
|
679
|
-
}
|
|
680
|
-
]
|
|
681
|
-
|
|
682
|
-
chunks = target_llm.stream_chat_oai(
|
|
683
|
-
conversations=new_conversations,
|
|
684
|
-
model=model,
|
|
685
|
-
role_mapping=role_mapping,
|
|
686
|
-
llm_config=llm_config,
|
|
687
|
-
delta_mode=True,
|
|
688
|
-
)
|
|
689
|
-
|
|
690
|
-
def generate_chunks():
|
|
722
|
+
chunks = target_llm.stream_chat_oai(
|
|
723
|
+
conversations=new_conversations,
|
|
724
|
+
model=model,
|
|
725
|
+
role_mapping=role_mapping,
|
|
726
|
+
llm_config=llm_config,
|
|
727
|
+
delta_mode=True,
|
|
728
|
+
extra_request_params=extra_request_params
|
|
729
|
+
)
|
|
730
|
+
|
|
691
731
|
for chunk in chunks:
|
|
692
|
-
yield chunk
|
|
732
|
+
yield chunk
|
|
693
733
|
if chunk[1] is not None:
|
|
694
734
|
rag_stat.answer_stat.total_input_tokens += chunk[1].input_tokens_count
|
|
695
735
|
rag_stat.answer_stat.total_generated_tokens += chunk[1].generated_tokens_count
|
|
696
736
|
self._print_rag_stats(rag_stat)
|
|
697
|
-
|
|
737
|
+
|
|
738
|
+
return generate_sream(),context
|
|
698
739
|
|
|
699
740
|
|
|
700
741
|
|
autocoder/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.1.
|
|
1
|
+
__version__ = "0.1.279"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|