auto-coder 0.1.278__py3-none-any.whl → 0.1.279__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of auto-coder might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: auto-coder
3
- Version: 0.1.278
3
+ Version: 0.1.279
4
4
  Summary: AutoCoder: AutoCoder
5
5
  Author: allwefantasy
6
6
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
@@ -26,7 +26,7 @@ Requires-Dist: tabulate
26
26
  Requires-Dist: jupyter-client
27
27
  Requires-Dist: prompt-toolkit
28
28
  Requires-Dist: tokenizers
29
- Requires-Dist: byzerllm[saas] >=0.1.169
29
+ Requires-Dist: byzerllm[saas] >=0.1.170
30
30
  Requires-Dist: patch
31
31
  Requires-Dist: diff-match-patch
32
32
  Requires-Dist: GitPython
@@ -12,7 +12,7 @@ autocoder/chat_auto_coder_lang.py,sha256=ShOQVOnMA-WlT-fB9OrOer-xQkbcWxJGl-WMPuZ
12
12
  autocoder/command_args.py,sha256=9aYJ-AmPxP1sQh6ciw04FWHjSn31f2W9afXFwo8wgx4,30441
13
13
  autocoder/lang.py,sha256=U6AjVV8Rs1uLyjFCZ8sT6WWuNUxMBqkXXIOs4S120uk,14511
14
14
  autocoder/models.py,sha256=PlG1tKHSHwB57cKLOl5gTl5yTzFUDzCgeHPJU3N9F6Q,9106
15
- autocoder/version.py,sha256=Q3OPt1PiXvKx5xTUCPxUFZP2zsuajLbxdpiOEm_K2L4,23
15
+ autocoder/version.py,sha256=bIKEpQ3tDi5heVrpw16rPnkr9ExeYcOXIfk6sw424Tc,23
16
16
  autocoder/agent/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
17
  autocoder/agent/auto_demand_organizer.py,sha256=NWSAEsEk94vT3lGjfo25kKLMwYdPcpy9e-i21txPasQ,6942
18
18
  autocoder/agent/auto_filegroup.py,sha256=CW7bqp0FW1GIEMnl-blyAc2UGT7O9Mom0q66ITz1ckM,6635
@@ -107,11 +107,11 @@ autocoder/privacy/__init__.py,sha256=LnIVvGu_K66zCE-yhN_-dPO8R80pQyedCsXJ7wRqQaI
107
107
  autocoder/privacy/model_filter.py,sha256=-N9ZvxxDKpxU7hkn-tKv-QHyXjvkCopUaKgvJwTOGQs,3369
108
108
  autocoder/pyproject/__init__.py,sha256=ms-A_pocgGv0oZPEW8JAdXi7G-VSVhkQ6CnWFe535Ec,14477
109
109
  autocoder/rag/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
110
- autocoder/rag/api_server.py,sha256=dRbhAZVRAOlZ64Cnxf4_rKb4iJwHnrWS9Zr67IVORw0,7288
110
+ autocoder/rag/api_server.py,sha256=xiypCkdbclY0Z3Cmq5FTvtKrfQUV7yKcDaFFUttA2n0,7242
111
111
  autocoder/rag/doc_filter.py,sha256=yEXaBw1XJH57Gtvk4-RFQtd5eawA6SBjzxeRZrIsQew,11623
112
112
  autocoder/rag/document_retriever.py,sha256=5oThtxukGuRFF96o3pHKsk306a8diXbhgSrbqyU2BvM,8894
113
- autocoder/rag/llm_wrapper.py,sha256=wf56ofQNOaBkLhnoxK9VoVnHWD0gsj0pP8mUBfS92RI,2737
114
- autocoder/rag/long_context_rag.py,sha256=qFlNmbgQnstCSCb0SxfkMEYtZRr8p6YEc6u0jpve4Q0,32002
113
+ autocoder/rag/llm_wrapper.py,sha256=Ht5GF5yJtrztoliujsZzx_ooWZmHkd5xLZKcGEiicZw,4303
114
+ autocoder/rag/long_context_rag.py,sha256=nZXADsbaiOQYIGiZvEgokMOSjmjuOCA6xkd3LqGnC7o,33658
115
115
  autocoder/rag/rag_config.py,sha256=8LwFcTd8OJWWwi1_WY4IzjqgtT6RyE2j4PjxS5cCTDE,802
116
116
  autocoder/rag/rag_entry.py,sha256=6TKtErZ0Us9XSV6HgRKXA6yR3SiZGPHpynOKSaR1wgE,2463
117
117
  autocoder/rag/raw_rag.py,sha256=BOr0YGf3umjqXOIDVO1LXQ0bIHx8hzBdiubND2ezyxc,2946
@@ -165,9 +165,9 @@ autocoder/utils/types.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
165
165
  autocoder/utils/auto_coder_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
166
166
  autocoder/utils/auto_coder_utils/chat_stream_out.py,sha256=lkJ_A-sYU36JMzjFWkk3pR6uos8oZHYt9GPsPe_CPAo,11766
167
167
  autocoder/utils/chat_auto_coder_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
168
- auto_coder-0.1.278.dist-info/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
169
- auto_coder-0.1.278.dist-info/METADATA,sha256=W8ANCnQ9mFraLjuXIUaggoO_jH5Yx-bmm4FNxHLfOvo,2643
170
- auto_coder-0.1.278.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
171
- auto_coder-0.1.278.dist-info/entry_points.txt,sha256=0nzHtHH4pNcM7xq4EBA2toS28Qelrvcbrr59GqD_0Ak,350
172
- auto_coder-0.1.278.dist-info/top_level.txt,sha256=Jqc0_uJSw2GwoFQAa9iJxYns-2mWla-9ok_Y3Gcznjk,10
173
- auto_coder-0.1.278.dist-info/RECORD,,
168
+ auto_coder-0.1.279.dist-info/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
169
+ auto_coder-0.1.279.dist-info/METADATA,sha256=ibeocSoPjMW2RjhN5DQq4eARnkV5AQDD5c0quH69t4M,2643
170
+ auto_coder-0.1.279.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
171
+ auto_coder-0.1.279.dist-info/entry_points.txt,sha256=0nzHtHH4pNcM7xq4EBA2toS28Qelrvcbrr59GqD_0Ak,350
172
+ auto_coder-0.1.279.dist-info/top_level.txt,sha256=Jqc0_uJSw2GwoFQAa9iJxYns-2mWla-9ok_Y3Gcznjk,10
173
+ auto_coder-0.1.279.dist-info/RECORD,,
@@ -103,9 +103,7 @@ async def create_chat_completion(
103
103
  NOTE: Currently we do not support the following features:
104
104
  - function_call (Users should implement this by themselves)
105
105
  - logit_bias (to be supported by vLLM engine)
106
- """
107
- # async with async_timeout.timeout(TIMEOUT):
108
-
106
+ """
109
107
  generator = await openai_serving_chat.create_chat_completion(body, request)
110
108
  if isinstance(generator, ErrorResponse):
111
109
  return JSONResponse(
@@ -1,61 +1,91 @@
1
- from typing import Any, Dict, List, Optional, Union,Callable
2
- from byzerllm.utils.client.types import (
1
+ from typing import Any, Dict, List, Optional, Union, Callable
2
+ from byzerllm.utils.client.types import (
3
3
  LLMFunctionCallResponse,
4
- LLMClassResponse,LLMResponse
4
+ LLMClassResponse, LLMResponse
5
5
  )
6
6
  import pydantic
7
7
  from byzerllm import ByzerLLM
8
8
  from byzerllm.utils.client import LLMResponse
9
9
  from byzerllm.utils.types import SingleOutputMeta
10
10
  from autocoder.rag.simple_rag import SimpleRAG
11
+ from autocoder.rag.long_context_rag import LongContextRAG
11
12
  from loguru import logger
12
13
  from byzerllm.utils.langutil import asyncfy_with_semaphore
13
14
 
15
+
14
16
  class LLWrapper:
15
17
 
16
- def __init__(self,llm:ByzerLLM,rag:SimpleRAG):
18
+ def __init__(self, llm: ByzerLLM, rag: Union[SimpleRAG, LongContextRAG]):
17
19
  self.llm = llm
18
20
  self.rag = rag
19
21
 
20
22
  def chat_oai(self,
21
23
  conversations,
22
- tools:List[Union[Callable,str]]=[],
23
- tool_choice:Optional[Union[Callable,str]]=None,
24
- execute_tool:bool=False,
25
- impl_func:Optional[Callable]=None,
26
- execute_impl_func:bool=False,
27
- impl_func_params:Optional[Dict[str,Any]]=None,
28
- func_params:Optional[Dict[str,Any]]=None,
29
- response_class:Optional[Union[pydantic.BaseModel,str]] = None,
30
- response_after_chat:Optional[Union[pydantic.BaseModel,str]] = False,
31
- enable_default_sys_message:bool=True,
32
- model:Optional[str] = None,
33
- role_mapping=None,llm_config:Dict[str,Any]={}
34
- )->Union[List[LLMResponse],List[LLMFunctionCallResponse],List[LLMClassResponse]]:
35
- res,contexts = self.rag.stream_chat_oai(conversations,llm_config=llm_config)
36
- s = "".join(res)
37
- return [LLMResponse(output=s,metadata={},input="")]
38
-
39
- def stream_chat_oai(self,conversations,
40
- model:Optional[str]=None,
41
- role_mapping=None,
42
- delta_mode=False,
43
- llm_config:Dict[str,Any]={}):
44
- res,contexts = self.rag.stream_chat_oai(conversations,llm_config=llm_config)
45
- for t in res:
46
- yield (t,SingleOutputMeta(0,0))
47
-
48
-
49
- async def async_stream_chat_oai(self,conversations,
50
- model:Optional[str]=None,
24
+ tools: List[Union[Callable, str]] = [],
25
+ tool_choice: Optional[Union[Callable, str]] = None,
26
+ execute_tool: bool = False,
27
+ impl_func: Optional[Callable] = None,
28
+ execute_impl_func: bool = False,
29
+ impl_func_params: Optional[Dict[str, Any]] = None,
30
+ func_params: Optional[Dict[str, Any]] = None,
31
+ response_class: Optional[Union[pydantic.BaseModel, str]] = None,
32
+ response_after_chat: Optional[Union[pydantic.BaseModel, str]] = False,
33
+ enable_default_sys_message: bool = True,
34
+ model: Optional[str] = None,
35
+ role_mapping=None,
36
+ llm_config: Dict[str, Any] = {},
37
+ only_return_prompt: bool = False,
38
+ extra_request_params: Dict[str, Any] = {}
39
+ ) -> Union[List[LLMResponse], List[LLMFunctionCallResponse], List[LLMClassResponse]]:
40
+ res, contexts = self.rag.stream_chat_oai(
41
+ conversations, llm_config=llm_config, extra_request_params=extra_request_params)
42
+ metadata = {"request_id":""}
43
+ output = ""
44
+ for chunk in res:
45
+ output += chunk[0]
46
+ metadata["input_tokens_count"] = chunk[1].input_tokens_count
47
+ metadata["generated_tokens_count"] = chunk[1].generated_tokens_count
48
+ metadata["reasoning_content"] = chunk[1].reasoning_content
49
+ metadata["finish_reason"] = chunk[1].finish_reason
50
+ metadata["first_token_time"] = chunk[1].first_token_time
51
+
52
+ return [LLMResponse(output=output, metadata=metadata, input="")]
53
+
54
+ def stream_chat_oai(self, conversations,
55
+ model: Optional[str] = None,
51
56
  role_mapping=None,
52
57
  delta_mode=False,
53
- llm_config:Dict[str,Any]={}):
54
- res,contexts = await asyncfy_with_semaphore(lambda: self.rag.stream_chat_oai(conversations,llm_config=llm_config))()
58
+ llm_config: Dict[str, Any] = {},
59
+ extra_request_params: Dict[str, Any] = {}
60
+ ):
61
+ res, contexts = self.rag.stream_chat_oai(
62
+ conversations, llm_config=llm_config, extra_request_params=extra_request_params)
63
+
64
+ if isinstance(res, tuple):
65
+ for (t, metadata) in res:
66
+ yield (t, SingleOutputMeta(
67
+ input_tokens_count=metadata.get("input_tokens_count", 0),
68
+ generated_tokens_count=metadata.get(
69
+ "generated_tokens_count", 0),
70
+ reasoning_content=metadata.get("reasoning_content", ""),
71
+ finish_reason=metadata.get("finish_reason", "stop"),
72
+ first_token_time=metadata.get("first_token_time", 0)
73
+ ))
74
+ else:
75
+ for t in res:
76
+ yield (t, SingleOutputMeta(0, 0))
77
+
78
+ async def async_stream_chat_oai(self, conversations,
79
+ model: Optional[str] = None,
80
+ role_mapping=None,
81
+ delta_mode=False,
82
+ llm_config: Dict[str, Any] = {},
83
+ extra_request_params: Dict[str, Any] = {}
84
+ ):
85
+ res, contexts = await asyncfy_with_semaphore(lambda: self.rag.stream_chat_oai(conversations, llm_config=llm_config, extra_request_params=extra_request_params))()
55
86
  # res,contexts = await self.llm.async_stream_chat_oai(conversations,llm_config=llm_config)
56
- for t in res:
57
- yield (t,SingleOutputMeta(0,0))
58
-
87
+ for t in res:
88
+ yield t
59
89
 
60
- def __getattr__(self, name):
61
- return getattr(self.llm, name)
90
+ def __getattr__(self, name):
91
+ return getattr(self.llm, name)
@@ -33,6 +33,7 @@ from importlib.metadata import version
33
33
  from autocoder.rag.stream_event import event_writer
34
34
  from autocoder.rag.relevant_utils import DocFilterResult
35
35
  from pydantic import BaseModel
36
+ from byzerllm.utils.types import SingleOutputMeta
36
37
 
37
38
  try:
38
39
  from autocoder_pro.rag.llm_compute import LLMComputeEngine
@@ -284,6 +285,7 @@ class LongContextRAG:
284
285
  def build(self):
285
286
  pass
286
287
 
288
+
287
289
  def search(self, query: str) -> List[SourceCode]:
288
290
  target_query = query
289
291
  only_contexts = False
@@ -344,6 +346,7 @@ class LongContextRAG:
344
346
  model: Optional[str] = None,
345
347
  role_mapping=None,
346
348
  llm_config: Dict[str, Any] = {},
349
+ extra_request_params: Dict[str, Any] = {}
347
350
  ):
348
351
  try:
349
352
  return self._stream_chat_oai(
@@ -351,11 +354,42 @@ class LongContextRAG:
351
354
  model=model,
352
355
  role_mapping=role_mapping,
353
356
  llm_config=llm_config,
357
+ extra_request_params=extra_request_params
354
358
  )
355
359
  except Exception as e:
356
360
  logger.error(f"Error in stream_chat_oai: {str(e)}")
357
361
  traceback.print_exc()
358
362
  return ["出现错误,请稍后再试。"], []
363
+
364
+
365
+ def _stream_chatfrom_openai_sdk(self,response):
366
+ for chunk in response:
367
+ if hasattr(chunk, "usage") and chunk.usage:
368
+ input_tokens_count = chunk.usage.prompt_tokens
369
+ generated_tokens_count = chunk.usage.completion_tokens
370
+ else:
371
+ input_tokens_count = 0
372
+ generated_tokens_count = 0
373
+
374
+ if not chunk.choices:
375
+ if last_meta:
376
+ yield ("", SingleOutputMeta(input_tokens_count=input_tokens_count,
377
+ generated_tokens_count=generated_tokens_count,
378
+ reasoning_content="",
379
+ finish_reason=last_meta.finish_reason))
380
+ continue
381
+
382
+ content = chunk.choices[0].delta.content or ""
383
+
384
+ reasoning_text = ""
385
+ if hasattr(chunk.choices[0].delta, "reasoning_content"):
386
+ reasoning_text = chunk.choices[0].delta.reasoning_content or ""
387
+
388
+ last_meta = SingleOutputMeta(input_tokens_count=input_tokens_count,
389
+ generated_tokens_count=generated_tokens_count,
390
+ reasoning_content=reasoning_text,
391
+ finish_reason=chunk.choices[0].finish_reason)
392
+ yield (content, last_meta)
359
393
 
360
394
  def _stream_chat_oai(
361
395
  self,
@@ -363,128 +397,136 @@ class LongContextRAG:
363
397
  model: Optional[str] = None,
364
398
  role_mapping=None,
365
399
  llm_config: Dict[str, Any] = {},
366
- ):
400
+ extra_request_params: Dict[str, Any] = {}
401
+ ):
367
402
  if self.client:
368
403
  model = model or self.args.model
369
404
  response = self.client.chat.completions.create(
370
405
  model=model,
371
406
  messages=conversations,
372
407
  stream=True,
373
- max_tokens=self.args.rag_params_max_tokens
374
- )
375
-
376
- def response_generator():
377
- for chunk in response:
378
- if chunk.choices[0].delta.content is not None:
379
- yield chunk.choices[0].delta.content
408
+ max_tokens=self.args.rag_params_max_tokens,
409
+ extra_body=extra_request_params
410
+ )
411
+ return self._stream_chatfrom_openai_sdk(response), []
380
412
 
381
- return response_generator(), []
382
- else:
383
-
384
- target_llm = self.llm
385
- if self.llm.get_sub_client("qa_model"):
386
- target_llm = self.llm.get_sub_client("qa_model")
413
+ target_llm = self.llm
414
+ if self.llm.get_sub_client("qa_model"):
415
+ target_llm = self.llm.get_sub_client("qa_model")
387
416
 
388
- query = conversations[-1]["content"]
389
- context = []
417
+ query = conversations[-1]["content"]
418
+ context = []
390
419
 
391
- if (
392
- "使用四到五个字直接返回这句话的简要主题,不要解释、不要标点、不要语气词、不要多余文本,不要加粗,如果没有主题"
393
- in query
394
- or "简要总结一下对话内容,用作后续的上下文提示 prompt,控制在 200 字以内"
395
- in query
396
- ):
420
+ if (
421
+ "使用四到五个字直接返回这句话的简要主题,不要解释、不要标点、不要语气词、不要多余文本,不要加粗,如果没有主题"
422
+ in query
423
+ or "简要总结一下对话内容,用作后续的上下文提示 prompt,控制在 200 字以内"
424
+ in query
425
+ ):
397
426
 
398
- chunks = target_llm.stream_chat_oai(
399
- conversations=conversations,
427
+ chunks = target_llm.stream_chat_oai(
428
+ conversations=conversations,
429
+ model=model,
430
+ role_mapping=role_mapping,
431
+ llm_config=llm_config,
432
+ delta_mode=True,
433
+ extra_request_params=extra_request_params
434
+ )
435
+ def generate_chunks():
436
+ for chunk in chunks:
437
+ yield chunk
438
+ return generate_chunks(), context
439
+
440
+ try:
441
+ request_params = json.loads(query)
442
+ if "request_id" in request_params:
443
+ request_id = request_params["request_id"]
444
+ index = request_params["index"]
445
+
446
+ file_path = event_writer.get_event_file_path(request_id)
447
+ logger.info(f"Get events for request_id: {request_id} index: {index} file_path: {file_path}")
448
+ events = []
449
+ if not os.path.exists(file_path):
450
+ return [],context
451
+
452
+ with open(file_path, "r") as f:
453
+ for line in f:
454
+ event = json.loads(line)
455
+ if event["index"] >= index:
456
+ events.append(event)
457
+ return [json.dumps({
458
+ "events": [event for event in events],
459
+ },ensure_ascii=False)], context
460
+ except json.JSONDecodeError:
461
+ pass
462
+
463
+ if self.args.without_contexts and LLMComputeEngine is not None:
464
+ llm_compute_engine = LLMComputeEngine(
465
+ llm=target_llm,
466
+ inference_enhance=not self.args.disable_inference_enhance,
467
+ inference_deep_thought=self.args.inference_deep_thought,
468
+ inference_slow_without_deep_thought=self.args.inference_slow_without_deep_thought,
469
+ precision=self.args.inference_compute_precision,
470
+ data_cells_max_num=self.args.data_cells_max_num,
471
+ )
472
+ conversations = conversations[:-1]
473
+ new_conversations = llm_compute_engine.process_conversation(
474
+ conversations, query, []
475
+ )
476
+ chunks = llm_compute_engine.stream_chat_oai(
477
+ conversations=new_conversations,
400
478
  model=model,
401
479
  role_mapping=role_mapping,
402
480
  llm_config=llm_config,
403
481
  delta_mode=True,
482
+ extra_request_params=extra_request_params
404
483
  )
405
- return (chunk[0] for chunk in chunks), context
406
484
 
407
- try:
408
- request_params = json.loads(query)
409
- if "request_id" in request_params:
410
- request_id = request_params["request_id"]
411
- index = request_params["index"]
412
-
413
- file_path = event_writer.get_event_file_path(request_id)
414
- logger.info(f"Get events for request_id: {request_id} index: {index} file_path: {file_path}")
415
- events = []
416
- if not os.path.exists(file_path):
417
- return [],context
418
-
419
- with open(file_path, "r") as f:
420
- for line in f:
421
- event = json.loads(line)
422
- if event["index"] >= index:
423
- events.append(event)
424
- return [json.dumps({
425
- "events": [event for event in events],
426
- },ensure_ascii=False)], context
427
- except json.JSONDecodeError:
428
- pass
429
-
430
- if self.args.without_contexts and LLMComputeEngine is not None:
431
- llm_compute_engine = LLMComputeEngine(
432
- llm=target_llm,
433
- inference_enhance=not self.args.disable_inference_enhance,
434
- inference_deep_thought=self.args.inference_deep_thought,
435
- inference_slow_without_deep_thought=self.args.inference_slow_without_deep_thought,
436
- precision=self.args.inference_compute_precision,
437
- data_cells_max_num=self.args.data_cells_max_num,
438
- )
439
- conversations = conversations[:-1]
440
- new_conversations = llm_compute_engine.process_conversation(
441
- conversations, query, []
442
- )
443
-
444
- return (
445
- llm_compute_engine.stream_chat_oai(
446
- conversations=new_conversations,
447
- model=model,
448
- role_mapping=role_mapping,
449
- llm_config=llm_config,
450
- delta_mode=True,
451
- ),
452
- context,
453
- )
485
+ def generate_chunks():
486
+ for chunk in chunks:
487
+ yield chunk
454
488
 
489
+ return (
490
+ generate_chunks(),
491
+ context,
492
+ )
455
493
 
456
- only_contexts = False
457
- try:
458
- v = json.loads(query)
459
- if "only_contexts" in v:
460
- query = v["query"]
461
- only_contexts = v["only_contexts"]
462
- conversations[-1]["content"] = query
463
- except json.JSONDecodeError:
464
- pass
465
494
 
466
- logger.info(f"Query: {query} only_contexts: {only_contexts}")
467
- start_time = time.time()
468
-
495
+ only_contexts = False
496
+ try:
497
+ v = json.loads(query)
498
+ if "only_contexts" in v:
499
+ query = v["query"]
500
+ only_contexts = v["only_contexts"]
501
+ conversations[-1]["content"] = query
502
+ except json.JSONDecodeError:
503
+ pass
504
+
505
+ logger.info(f"Query: {query} only_contexts: {only_contexts}")
506
+ start_time = time.time()
507
+
469
508
 
470
- rag_stat = RAGStat(
471
- recall_stat=RecallStat(
472
- total_input_tokens=0,
473
- total_generated_tokens=0,
474
- model_name=self.recall_llm.default_model_name,
475
- ),
476
- chunk_stat=ChunkStat(
477
- total_input_tokens=0,
478
- total_generated_tokens=0,
479
- model_name=self.chunk_llm.default_model_name,
480
- ),
481
- answer_stat=AnswerStat(
482
- total_input_tokens=0,
483
- total_generated_tokens=0,
484
- model_name=self.qa_llm.default_model_name,
485
- ),
486
- )
509
+ rag_stat = RAGStat(
510
+ recall_stat=RecallStat(
511
+ total_input_tokens=0,
512
+ total_generated_tokens=0,
513
+ model_name=self.recall_llm.default_model_name,
514
+ ),
515
+ chunk_stat=ChunkStat(
516
+ total_input_tokens=0,
517
+ total_generated_tokens=0,
518
+ model_name=self.chunk_llm.default_model_name,
519
+ ),
520
+ answer_stat=AnswerStat(
521
+ total_input_tokens=0,
522
+ total_generated_tokens=0,
523
+ model_name=self.qa_llm.default_model_name,
524
+ ),
525
+ )
487
526
 
527
+ context = []
528
+ def generate_sream():
529
+ nonlocal context
488
530
  doc_filter_result = self._filter_docs(conversations)
489
531
 
490
532
  rag_stat.recall_stat.total_input_tokens += sum(doc_filter_result.input_tokens_counts)
@@ -659,42 +701,41 @@ class LongContextRAG:
659
701
  llm_config=llm_config,
660
702
  delta_mode=True,
661
703
  )
704
+
705
+ for chunk in chunks:
706
+ yield chunk
707
+ if chunk[1] is not None:
708
+ rag_stat.answer_stat.total_input_tokens += chunk[1].input_tokens_count
709
+ rag_stat.answer_stat.total_generated_tokens += chunk[1].generated_tokens_count
710
+ self._print_rag_stats(rag_stat)
711
+ else:
712
+ new_conversations = conversations[:-1] + [
713
+ {
714
+ "role": "user",
715
+ "content": self._answer_question.prompt(
716
+ query=query,
717
+ relevant_docs=[doc.source_code for doc in relevant_docs],
718
+ ),
719
+ }
720
+ ]
662
721
 
663
- def generate_chunks():
664
- for chunk in chunks:
665
- yield chunk[0]
666
- if chunk[1] is not None:
667
- rag_stat.answer_stat.total_input_tokens += chunk[1].input_tokens_count
668
- rag_stat.answer_stat.total_generated_tokens += chunk[1].generated_tokens_count
669
- self._print_rag_stats(rag_stat)
670
- return generate_chunks(), context
671
-
672
- new_conversations = conversations[:-1] + [
673
- {
674
- "role": "user",
675
- "content": self._answer_question.prompt(
676
- query=query,
677
- relevant_docs=[doc.source_code for doc in relevant_docs],
678
- ),
679
- }
680
- ]
681
-
682
- chunks = target_llm.stream_chat_oai(
683
- conversations=new_conversations,
684
- model=model,
685
- role_mapping=role_mapping,
686
- llm_config=llm_config,
687
- delta_mode=True,
688
- )
689
-
690
- def generate_chunks():
722
+ chunks = target_llm.stream_chat_oai(
723
+ conversations=new_conversations,
724
+ model=model,
725
+ role_mapping=role_mapping,
726
+ llm_config=llm_config,
727
+ delta_mode=True,
728
+ extra_request_params=extra_request_params
729
+ )
730
+
691
731
  for chunk in chunks:
692
- yield chunk[0]
732
+ yield chunk
693
733
  if chunk[1] is not None:
694
734
  rag_stat.answer_stat.total_input_tokens += chunk[1].input_tokens_count
695
735
  rag_stat.answer_stat.total_generated_tokens += chunk[1].generated_tokens_count
696
736
  self._print_rag_stats(rag_stat)
697
- return generate_chunks(), context
737
+
738
+ return generate_sream(),context
698
739
 
699
740
 
700
741
 
autocoder/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.1.278"
1
+ __version__ = "0.1.279"