auto-coder 0.1.277__py3-none-any.whl → 0.1.279__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of auto-coder might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: auto-coder
3
- Version: 0.1.277
3
+ Version: 0.1.279
4
4
  Summary: AutoCoder: AutoCoder
5
5
  Author: allwefantasy
6
6
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
@@ -26,7 +26,7 @@ Requires-Dist: tabulate
26
26
  Requires-Dist: jupyter-client
27
27
  Requires-Dist: prompt-toolkit
28
28
  Requires-Dist: tokenizers
29
- Requires-Dist: byzerllm[saas] >=0.1.169
29
+ Requires-Dist: byzerllm[saas] >=0.1.170
30
30
  Requires-Dist: patch
31
31
  Requires-Dist: diff-match-patch
32
32
  Requires-Dist: GitPython
@@ -12,7 +12,7 @@ autocoder/chat_auto_coder_lang.py,sha256=ShOQVOnMA-WlT-fB9OrOer-xQkbcWxJGl-WMPuZ
12
12
  autocoder/command_args.py,sha256=9aYJ-AmPxP1sQh6ciw04FWHjSn31f2W9afXFwo8wgx4,30441
13
13
  autocoder/lang.py,sha256=U6AjVV8Rs1uLyjFCZ8sT6WWuNUxMBqkXXIOs4S120uk,14511
14
14
  autocoder/models.py,sha256=PlG1tKHSHwB57cKLOl5gTl5yTzFUDzCgeHPJU3N9F6Q,9106
15
- autocoder/version.py,sha256=aQmPFbK421hxX_q_qH6lzGAzBJ-yTN3E_wgJvqVGg9k,23
15
+ autocoder/version.py,sha256=bIKEpQ3tDi5heVrpw16rPnkr9ExeYcOXIfk6sw424Tc,23
16
16
  autocoder/agent/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
17
  autocoder/agent/auto_demand_organizer.py,sha256=NWSAEsEk94vT3lGjfo25kKLMwYdPcpy9e-i21txPasQ,6942
18
18
  autocoder/agent/auto_filegroup.py,sha256=CW7bqp0FW1GIEMnl-blyAc2UGT7O9Mom0q66ITz1ckM,6635
@@ -26,7 +26,7 @@ autocoder/agent/planner.py,sha256=SZTSZHxHzDmuWZo3K5fs79RwvJLWurg-nbJRRNbX65o,91
26
26
  autocoder/agent/project_reader.py,sha256=tWLaPoLw1gI6kO_NzivQj28KbobU2ceOLuppHMbfGl8,18234
27
27
  autocoder/chat/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
28
28
  autocoder/commands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
29
- autocoder/commands/auto_command.py,sha256=Qv5hObiIioIOAVkucFB2na6aLUuxSIgLVO9PfnXwyIo,52496
29
+ autocoder/commands/auto_command.py,sha256=3ZQvG_JX2oWxTv_xiXQDQwMfTAVK-Tynqo6mC9fXb60,52671
30
30
  autocoder/commands/tools.py,sha256=lanjoBGR6H8HDJSY3KrM6ibrtHZbgKX6mKJHSSE66dg,20493
31
31
  autocoder/common/JupyterClient.py,sha256=O-wi6pXeAEYhAY24kDa0BINrLYvKS6rKyWe98pDClS0,2816
32
32
  autocoder/common/ShellClient.py,sha256=fM1q8t_XMSbLBl2zkCNC2J9xuyKN3eXzGm6hHhqL2WY,2286
@@ -75,7 +75,7 @@ autocoder/common/result_manager.py,sha256=nBcFRj5reBC7vp13M91f4B8iPW8B8OehayHlUd
75
75
  autocoder/common/screenshots.py,sha256=_gA-z1HxGjPShBrtgkdideq58MG6rqFB2qMUJKjrycs,3769
76
76
  autocoder/common/search.py,sha256=245iPFgWhMldoUK3CqCP89ltaxZiNPK73evoG6Fp1h8,16518
77
77
  autocoder/common/search_replace.py,sha256=GphFkc57Hb673CAwmbiocqTbw8vrV7TrZxtOhD0332g,22147
78
- autocoder/common/shells.py,sha256=-5j45qb1SVmkZaORqDZ5EM2zJ16b5QGM1wHDfBfGejk,18944
78
+ autocoder/common/shells.py,sha256=elminFpNosnV0hsEUcsugDxlGO8NfH96uah-8bkaBvA,19929
79
79
  autocoder/common/stats_panel.py,sha256=wGl9O45pjVVDxhNumLv4_NfLYSlUP_18Tw4hcJSjw50,4596
80
80
  autocoder/common/sys_prompt.py,sha256=JlexfjZt554faqbgkCmzOJqYUzDHfbnxly5ugFfHfEE,26403
81
81
  autocoder/common/text.py,sha256=KGRQq314GHBmY4MWG8ossRoQi1_DTotvhxchpn78c-k,1003
@@ -107,11 +107,11 @@ autocoder/privacy/__init__.py,sha256=LnIVvGu_K66zCE-yhN_-dPO8R80pQyedCsXJ7wRqQaI
107
107
  autocoder/privacy/model_filter.py,sha256=-N9ZvxxDKpxU7hkn-tKv-QHyXjvkCopUaKgvJwTOGQs,3369
108
108
  autocoder/pyproject/__init__.py,sha256=ms-A_pocgGv0oZPEW8JAdXi7G-VSVhkQ6CnWFe535Ec,14477
109
109
  autocoder/rag/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
110
- autocoder/rag/api_server.py,sha256=dRbhAZVRAOlZ64Cnxf4_rKb4iJwHnrWS9Zr67IVORw0,7288
110
+ autocoder/rag/api_server.py,sha256=xiypCkdbclY0Z3Cmq5FTvtKrfQUV7yKcDaFFUttA2n0,7242
111
111
  autocoder/rag/doc_filter.py,sha256=yEXaBw1XJH57Gtvk4-RFQtd5eawA6SBjzxeRZrIsQew,11623
112
112
  autocoder/rag/document_retriever.py,sha256=5oThtxukGuRFF96o3pHKsk306a8diXbhgSrbqyU2BvM,8894
113
- autocoder/rag/llm_wrapper.py,sha256=wf56ofQNOaBkLhnoxK9VoVnHWD0gsj0pP8mUBfS92RI,2737
114
- autocoder/rag/long_context_rag.py,sha256=qFlNmbgQnstCSCb0SxfkMEYtZRr8p6YEc6u0jpve4Q0,32002
113
+ autocoder/rag/llm_wrapper.py,sha256=Ht5GF5yJtrztoliujsZzx_ooWZmHkd5xLZKcGEiicZw,4303
114
+ autocoder/rag/long_context_rag.py,sha256=nZXADsbaiOQYIGiZvEgokMOSjmjuOCA6xkd3LqGnC7o,33658
115
115
  autocoder/rag/rag_config.py,sha256=8LwFcTd8OJWWwi1_WY4IzjqgtT6RyE2j4PjxS5cCTDE,802
116
116
  autocoder/rag/rag_entry.py,sha256=6TKtErZ0Us9XSV6HgRKXA6yR3SiZGPHpynOKSaR1wgE,2463
117
117
  autocoder/rag/raw_rag.py,sha256=BOr0YGf3umjqXOIDVO1LXQ0bIHx8hzBdiubND2ezyxc,2946
@@ -165,9 +165,9 @@ autocoder/utils/types.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
165
165
  autocoder/utils/auto_coder_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
166
166
  autocoder/utils/auto_coder_utils/chat_stream_out.py,sha256=lkJ_A-sYU36JMzjFWkk3pR6uos8oZHYt9GPsPe_CPAo,11766
167
167
  autocoder/utils/chat_auto_coder_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
168
- auto_coder-0.1.277.dist-info/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
169
- auto_coder-0.1.277.dist-info/METADATA,sha256=m2MjOLFknaEjczW5V_NfTL4jj7bikJe0jbn_tuYRfdQ,2643
170
- auto_coder-0.1.277.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
171
- auto_coder-0.1.277.dist-info/entry_points.txt,sha256=0nzHtHH4pNcM7xq4EBA2toS28Qelrvcbrr59GqD_0Ak,350
172
- auto_coder-0.1.277.dist-info/top_level.txt,sha256=Jqc0_uJSw2GwoFQAa9iJxYns-2mWla-9ok_Y3Gcznjk,10
173
- auto_coder-0.1.277.dist-info/RECORD,,
168
+ auto_coder-0.1.279.dist-info/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
169
+ auto_coder-0.1.279.dist-info/METADATA,sha256=ibeocSoPjMW2RjhN5DQq4eARnkV5AQDD5c0quH69t4M,2643
170
+ auto_coder-0.1.279.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
171
+ auto_coder-0.1.279.dist-info/entry_points.txt,sha256=0nzHtHH4pNcM7xq4EBA2toS28Qelrvcbrr59GqD_0Ak,350
172
+ auto_coder-0.1.279.dist-info/top_level.txt,sha256=Jqc0_uJSw2GwoFQAa9iJxYns-2mWla-9ok_Y3Gcznjk,10
173
+ auto_coder-0.1.279.dist-info/RECORD,,
@@ -23,6 +23,7 @@ from autocoder.rag.token_counter import count_tokens
23
23
  from autocoder.common.global_cancel import global_cancel
24
24
  from autocoder.common.auto_configure import config_readme
25
25
  from autocoder.utils.auto_project_type import ProjectTypeAnalyzer
26
+ from rich.text import Text
26
27
 
27
28
  class CommandMessage(BaseModel):
28
29
  role: str
@@ -435,8 +436,10 @@ class CommandAutoTuner:
435
436
  "command_execution_result",
436
437
  action=action
437
438
  )
439
+ # 转义内容,避免Rich将内容中的[]解释为markup语法
440
+ text_content = Text(truncated_content)
438
441
  console.print(Panel(
439
- truncated_content,
442
+ text_content,
440
443
  title=title,
441
444
  border_style="blue",
442
445
  padding=(1, 2)
@@ -11,6 +11,9 @@ from rich.panel import Panel
11
11
  from rich.text import Text
12
12
  from rich.live import Live
13
13
  import getpass
14
+ from rich.markup import escape
15
+ import threading
16
+ import queue
14
17
 
15
18
  from autocoder.common.result_manager import ResultManager
16
19
 
@@ -356,7 +359,7 @@ def _win_code_page_to_encoding(code_page: str) -> str:
356
359
 
357
360
  def execute_shell_command(command: str):
358
361
  """
359
- Execute a shell command with cross-platform encoding support.
362
+ Execute a shell command with cross-platform encoding support and streaming output.
360
363
 
361
364
  Args:
362
365
  command (str): The shell command to execute
@@ -446,81 +449,95 @@ set PYTHONIOENCODING=utf-8
446
449
  encoding='utf-8', # 直接指定 UTF-8 编码
447
450
  errors='replace', # 处理无法解码的字符
448
451
  env=env, # 传递修改后的环境变量
449
- startupinfo=startupinfo
452
+ startupinfo=startupinfo,
453
+ bufsize=1, # Line buffering for immediate flushing
454
+ universal_newlines=True # Use text mode to handle platform line endings
450
455
  )
451
456
 
452
457
  # Safe decoding helper (for binary output)
453
- def safe_decode(byte_stream, encoding):
454
- if isinstance(byte_stream, str):
455
- return byte_stream.strip()
458
+ def safe_decode(text, encoding):
459
+ if isinstance(text, str):
460
+ return text.strip()
456
461
  try:
457
- # 首先尝试 UTF-8
458
- return byte_stream.decode('utf-8').strip()
462
+ # Try UTF-8 first
463
+ return text.decode('utf-8').strip()
459
464
  except UnicodeDecodeError:
460
465
  try:
461
- # 如果失败,尝试 GBK
462
- return byte_stream.decode('gbk').strip()
466
+ # If that fails, try GBK
467
+ return text.decode('gbk').strip()
463
468
  except UnicodeDecodeError:
464
- # 最后使用替换模式
465
- return byte_stream.decode(encoding, errors='replace').strip()
469
+ # Finally use replacement mode
470
+ return text.decode(encoding, errors='replace').strip()
466
471
 
467
472
  output = []
468
- with Live(console=console, refresh_per_second=4) as live:
469
- while True:
470
- # Read output streams
471
- output_bytes = process.stdout.readline()
472
- error_bytes = process.stderr.readline()
473
-
474
- # Handle standard output
475
- if output_bytes:
476
- output_line = safe_decode(output_bytes, encoding)
477
- output.append(output_line)
478
- live.update(
479
- Panel(
480
- Text("\n".join(output[-20:])),
481
- title="Shell Output",
482
- border_style="green",
483
- )
484
- )
485
-
486
- # Handle error output
487
- if error_bytes:
488
- error_line = safe_decode(error_bytes, encoding)
489
- output.append(f"ERROR: {error_line}")
490
- live.update(
491
- Panel(
492
- Text("\n".join(output[-20:])),
493
- title="Shell Output",
494
- border_style="red",
495
- )
496
- )
497
-
498
- # Check if process has ended
499
- if process.poll() is not None:
473
+ # Use direct printing for streaming output, not a Live object
474
+ console.print(f"[bold blue]Running command:[/bold blue] {command}")
475
+ console.print("[bold blue]Output streaming:[/bold blue]")
476
+
477
+ output_queue = queue.Queue()
478
+
479
+ def read_stream(stream, stream_name):
480
+ """Read data from stream and put in queue"""
481
+ for line in stream:
482
+ line = line.rstrip() if isinstance(line, str) else safe_decode(line, encoding)
483
+ prefix = "[ERROR] " if stream_name == "stderr" else ""
484
+ output_queue.put((stream_name, f"{prefix}{line}"))
485
+ output_queue.put((stream_name, None)) # Mark stream end
486
+
487
+ # Create threads to read stdout and stderr
488
+ stdout_thread = threading.Thread(target=read_stream, args=(process.stdout, "stdout"))
489
+ stderr_thread = threading.Thread(target=read_stream, args=(process.stderr, "stderr"))
490
+ stdout_thread.daemon = True
491
+ stderr_thread.daemon = True
492
+ stdout_thread.start()
493
+ stderr_thread.start()
494
+
495
+ # Track number of active streams
496
+ active_streams = 2
497
+
498
+ # Process output from queue
499
+ while active_streams > 0:
500
+ try:
501
+ stream_name, line = output_queue.get(timeout=0.1)
502
+ if line is None:
503
+ active_streams -= 1
504
+ continue
505
+
506
+ output.append(line)
507
+ # Print each line directly for true streaming output
508
+ if stream_name == "stderr":
509
+ console.print(f"[red]{line}[/red]")
510
+ else:
511
+ console.print(line)
512
+
513
+ except queue.Empty:
514
+ # Check if process is still running
515
+ if process.poll() is not None and active_streams == 2:
516
+ # If process ended but threads are still running, may have no output
500
517
  break
501
-
502
- # Get remaining output
503
- remaining_out, remaining_err = process.communicate()
504
- if remaining_out:
505
- output.append(safe_decode(remaining_out, encoding))
506
- if remaining_err:
507
- output.append(f"ERROR: {safe_decode(remaining_err, encoding)}")
518
+ continue
519
+
520
+ # Wait for threads to finish
521
+ stdout_thread.join()
522
+ stderr_thread.join()
508
523
 
509
- result_manager.add_result(content="\n".join(output),meta={
524
+ # Wait for process to end and get return code
525
+ return_code = process.wait()
526
+
527
+ # Compile results
528
+ result_content = "\n".join(output)
529
+ result_manager.add_result(content=result_content, meta={
510
530
  "action": "execute_shell_command",
511
531
  "input": {
512
532
  "command": command
513
- }
533
+ },
534
+ "return_code": return_code
514
535
  })
515
- # Show final output
516
- console.print(
517
- Panel(
518
- Text("\n".join(output)),
519
- title="Final Output",
520
- border_style="blue",
521
- subtitle=f"Encoding: {encoding} | OS: {sys.platform}"
522
- )
523
- )
536
+
537
+ # Show command completion info
538
+ completion_message = f"Command completed with return code: {return_code}"
539
+ style = "green" if return_code == 0 else "red"
540
+ console.print(f"[bold {style}]{escape(completion_message)}[/bold {style}]")
524
541
 
525
542
  except FileNotFoundError:
526
543
  result_manager.add_result(content=f"[bold red]Command not found:[/bold red] [yellow]{command}[/yellow]",meta={
@@ -103,9 +103,7 @@ async def create_chat_completion(
103
103
  NOTE: Currently we do not support the following features:
104
104
  - function_call (Users should implement this by themselves)
105
105
  - logit_bias (to be supported by vLLM engine)
106
- """
107
- # async with async_timeout.timeout(TIMEOUT):
108
-
106
+ """
109
107
  generator = await openai_serving_chat.create_chat_completion(body, request)
110
108
  if isinstance(generator, ErrorResponse):
111
109
  return JSONResponse(
@@ -1,61 +1,91 @@
1
- from typing import Any, Dict, List, Optional, Union,Callable
2
- from byzerllm.utils.client.types import (
1
+ from typing import Any, Dict, List, Optional, Union, Callable
2
+ from byzerllm.utils.client.types import (
3
3
  LLMFunctionCallResponse,
4
- LLMClassResponse,LLMResponse
4
+ LLMClassResponse, LLMResponse
5
5
  )
6
6
  import pydantic
7
7
  from byzerllm import ByzerLLM
8
8
  from byzerllm.utils.client import LLMResponse
9
9
  from byzerllm.utils.types import SingleOutputMeta
10
10
  from autocoder.rag.simple_rag import SimpleRAG
11
+ from autocoder.rag.long_context_rag import LongContextRAG
11
12
  from loguru import logger
12
13
  from byzerllm.utils.langutil import asyncfy_with_semaphore
13
14
 
15
+
14
16
  class LLWrapper:
15
17
 
16
- def __init__(self,llm:ByzerLLM,rag:SimpleRAG):
18
+ def __init__(self, llm: ByzerLLM, rag: Union[SimpleRAG, LongContextRAG]):
17
19
  self.llm = llm
18
20
  self.rag = rag
19
21
 
20
22
  def chat_oai(self,
21
23
  conversations,
22
- tools:List[Union[Callable,str]]=[],
23
- tool_choice:Optional[Union[Callable,str]]=None,
24
- execute_tool:bool=False,
25
- impl_func:Optional[Callable]=None,
26
- execute_impl_func:bool=False,
27
- impl_func_params:Optional[Dict[str,Any]]=None,
28
- func_params:Optional[Dict[str,Any]]=None,
29
- response_class:Optional[Union[pydantic.BaseModel,str]] = None,
30
- response_after_chat:Optional[Union[pydantic.BaseModel,str]] = False,
31
- enable_default_sys_message:bool=True,
32
- model:Optional[str] = None,
33
- role_mapping=None,llm_config:Dict[str,Any]={}
34
- )->Union[List[LLMResponse],List[LLMFunctionCallResponse],List[LLMClassResponse]]:
35
- res,contexts = self.rag.stream_chat_oai(conversations,llm_config=llm_config)
36
- s = "".join(res)
37
- return [LLMResponse(output=s,metadata={},input="")]
38
-
39
- def stream_chat_oai(self,conversations,
40
- model:Optional[str]=None,
41
- role_mapping=None,
42
- delta_mode=False,
43
- llm_config:Dict[str,Any]={}):
44
- res,contexts = self.rag.stream_chat_oai(conversations,llm_config=llm_config)
45
- for t in res:
46
- yield (t,SingleOutputMeta(0,0))
47
-
48
-
49
- async def async_stream_chat_oai(self,conversations,
50
- model:Optional[str]=None,
24
+ tools: List[Union[Callable, str]] = [],
25
+ tool_choice: Optional[Union[Callable, str]] = None,
26
+ execute_tool: bool = False,
27
+ impl_func: Optional[Callable] = None,
28
+ execute_impl_func: bool = False,
29
+ impl_func_params: Optional[Dict[str, Any]] = None,
30
+ func_params: Optional[Dict[str, Any]] = None,
31
+ response_class: Optional[Union[pydantic.BaseModel, str]] = None,
32
+ response_after_chat: Optional[Union[pydantic.BaseModel, str]] = False,
33
+ enable_default_sys_message: bool = True,
34
+ model: Optional[str] = None,
35
+ role_mapping=None,
36
+ llm_config: Dict[str, Any] = {},
37
+ only_return_prompt: bool = False,
38
+ extra_request_params: Dict[str, Any] = {}
39
+ ) -> Union[List[LLMResponse], List[LLMFunctionCallResponse], List[LLMClassResponse]]:
40
+ res, contexts = self.rag.stream_chat_oai(
41
+ conversations, llm_config=llm_config, extra_request_params=extra_request_params)
42
+ metadata = {"request_id":""}
43
+ output = ""
44
+ for chunk in res:
45
+ output += chunk[0]
46
+ metadata["input_tokens_count"] = chunk[1].input_tokens_count
47
+ metadata["generated_tokens_count"] = chunk[1].generated_tokens_count
48
+ metadata["reasoning_content"] = chunk[1].reasoning_content
49
+ metadata["finish_reason"] = chunk[1].finish_reason
50
+ metadata["first_token_time"] = chunk[1].first_token_time
51
+
52
+ return [LLMResponse(output=output, metadata=metadata, input="")]
53
+
54
+ def stream_chat_oai(self, conversations,
55
+ model: Optional[str] = None,
51
56
  role_mapping=None,
52
57
  delta_mode=False,
53
- llm_config:Dict[str,Any]={}):
54
- res,contexts = await asyncfy_with_semaphore(lambda: self.rag.stream_chat_oai(conversations,llm_config=llm_config))()
58
+ llm_config: Dict[str, Any] = {},
59
+ extra_request_params: Dict[str, Any] = {}
60
+ ):
61
+ res, contexts = self.rag.stream_chat_oai(
62
+ conversations, llm_config=llm_config, extra_request_params=extra_request_params)
63
+
64
+ if isinstance(res, tuple):
65
+ for (t, metadata) in res:
66
+ yield (t, SingleOutputMeta(
67
+ input_tokens_count=metadata.get("input_tokens_count", 0),
68
+ generated_tokens_count=metadata.get(
69
+ "generated_tokens_count", 0),
70
+ reasoning_content=metadata.get("reasoning_content", ""),
71
+ finish_reason=metadata.get("finish_reason", "stop"),
72
+ first_token_time=metadata.get("first_token_time", 0)
73
+ ))
74
+ else:
75
+ for t in res:
76
+ yield (t, SingleOutputMeta(0, 0))
77
+
78
+ async def async_stream_chat_oai(self, conversations,
79
+ model: Optional[str] = None,
80
+ role_mapping=None,
81
+ delta_mode=False,
82
+ llm_config: Dict[str, Any] = {},
83
+ extra_request_params: Dict[str, Any] = {}
84
+ ):
85
+ res, contexts = await asyncfy_with_semaphore(lambda: self.rag.stream_chat_oai(conversations, llm_config=llm_config, extra_request_params=extra_request_params))()
55
86
  # res,contexts = await self.llm.async_stream_chat_oai(conversations,llm_config=llm_config)
56
- for t in res:
57
- yield (t,SingleOutputMeta(0,0))
58
-
87
+ for t in res:
88
+ yield t
59
89
 
60
- def __getattr__(self, name):
61
- return getattr(self.llm, name)
90
+ def __getattr__(self, name):
91
+ return getattr(self.llm, name)
@@ -33,6 +33,7 @@ from importlib.metadata import version
33
33
  from autocoder.rag.stream_event import event_writer
34
34
  from autocoder.rag.relevant_utils import DocFilterResult
35
35
  from pydantic import BaseModel
36
+ from byzerllm.utils.types import SingleOutputMeta
36
37
 
37
38
  try:
38
39
  from autocoder_pro.rag.llm_compute import LLMComputeEngine
@@ -284,6 +285,7 @@ class LongContextRAG:
284
285
  def build(self):
285
286
  pass
286
287
 
288
+
287
289
  def search(self, query: str) -> List[SourceCode]:
288
290
  target_query = query
289
291
  only_contexts = False
@@ -344,6 +346,7 @@ class LongContextRAG:
344
346
  model: Optional[str] = None,
345
347
  role_mapping=None,
346
348
  llm_config: Dict[str, Any] = {},
349
+ extra_request_params: Dict[str, Any] = {}
347
350
  ):
348
351
  try:
349
352
  return self._stream_chat_oai(
@@ -351,11 +354,42 @@ class LongContextRAG:
351
354
  model=model,
352
355
  role_mapping=role_mapping,
353
356
  llm_config=llm_config,
357
+ extra_request_params=extra_request_params
354
358
  )
355
359
  except Exception as e:
356
360
  logger.error(f"Error in stream_chat_oai: {str(e)}")
357
361
  traceback.print_exc()
358
362
  return ["出现错误,请稍后再试。"], []
363
+
364
+
365
+ def _stream_chatfrom_openai_sdk(self,response):
366
+ for chunk in response:
367
+ if hasattr(chunk, "usage") and chunk.usage:
368
+ input_tokens_count = chunk.usage.prompt_tokens
369
+ generated_tokens_count = chunk.usage.completion_tokens
370
+ else:
371
+ input_tokens_count = 0
372
+ generated_tokens_count = 0
373
+
374
+ if not chunk.choices:
375
+ if last_meta:
376
+ yield ("", SingleOutputMeta(input_tokens_count=input_tokens_count,
377
+ generated_tokens_count=generated_tokens_count,
378
+ reasoning_content="",
379
+ finish_reason=last_meta.finish_reason))
380
+ continue
381
+
382
+ content = chunk.choices[0].delta.content or ""
383
+
384
+ reasoning_text = ""
385
+ if hasattr(chunk.choices[0].delta, "reasoning_content"):
386
+ reasoning_text = chunk.choices[0].delta.reasoning_content or ""
387
+
388
+ last_meta = SingleOutputMeta(input_tokens_count=input_tokens_count,
389
+ generated_tokens_count=generated_tokens_count,
390
+ reasoning_content=reasoning_text,
391
+ finish_reason=chunk.choices[0].finish_reason)
392
+ yield (content, last_meta)
359
393
 
360
394
  def _stream_chat_oai(
361
395
  self,
@@ -363,128 +397,136 @@ class LongContextRAG:
363
397
  model: Optional[str] = None,
364
398
  role_mapping=None,
365
399
  llm_config: Dict[str, Any] = {},
366
- ):
400
+ extra_request_params: Dict[str, Any] = {}
401
+ ):
367
402
  if self.client:
368
403
  model = model or self.args.model
369
404
  response = self.client.chat.completions.create(
370
405
  model=model,
371
406
  messages=conversations,
372
407
  stream=True,
373
- max_tokens=self.args.rag_params_max_tokens
374
- )
375
-
376
- def response_generator():
377
- for chunk in response:
378
- if chunk.choices[0].delta.content is not None:
379
- yield chunk.choices[0].delta.content
408
+ max_tokens=self.args.rag_params_max_tokens,
409
+ extra_body=extra_request_params
410
+ )
411
+ return self._stream_chatfrom_openai_sdk(response), []
380
412
 
381
- return response_generator(), []
382
- else:
383
-
384
- target_llm = self.llm
385
- if self.llm.get_sub_client("qa_model"):
386
- target_llm = self.llm.get_sub_client("qa_model")
413
+ target_llm = self.llm
414
+ if self.llm.get_sub_client("qa_model"):
415
+ target_llm = self.llm.get_sub_client("qa_model")
387
416
 
388
- query = conversations[-1]["content"]
389
- context = []
417
+ query = conversations[-1]["content"]
418
+ context = []
390
419
 
391
- if (
392
- "使用四到五个字直接返回这句话的简要主题,不要解释、不要标点、不要语气词、不要多余文本,不要加粗,如果没有主题"
393
- in query
394
- or "简要总结一下对话内容,用作后续的上下文提示 prompt,控制在 200 字以内"
395
- in query
396
- ):
420
+ if (
421
+ "使用四到五个字直接返回这句话的简要主题,不要解释、不要标点、不要语气词、不要多余文本,不要加粗,如果没有主题"
422
+ in query
423
+ or "简要总结一下对话内容,用作后续的上下文提示 prompt,控制在 200 字以内"
424
+ in query
425
+ ):
397
426
 
398
- chunks = target_llm.stream_chat_oai(
399
- conversations=conversations,
427
+ chunks = target_llm.stream_chat_oai(
428
+ conversations=conversations,
429
+ model=model,
430
+ role_mapping=role_mapping,
431
+ llm_config=llm_config,
432
+ delta_mode=True,
433
+ extra_request_params=extra_request_params
434
+ )
435
+ def generate_chunks():
436
+ for chunk in chunks:
437
+ yield chunk
438
+ return generate_chunks(), context
439
+
440
+ try:
441
+ request_params = json.loads(query)
442
+ if "request_id" in request_params:
443
+ request_id = request_params["request_id"]
444
+ index = request_params["index"]
445
+
446
+ file_path = event_writer.get_event_file_path(request_id)
447
+ logger.info(f"Get events for request_id: {request_id} index: {index} file_path: {file_path}")
448
+ events = []
449
+ if not os.path.exists(file_path):
450
+ return [],context
451
+
452
+ with open(file_path, "r") as f:
453
+ for line in f:
454
+ event = json.loads(line)
455
+ if event["index"] >= index:
456
+ events.append(event)
457
+ return [json.dumps({
458
+ "events": [event for event in events],
459
+ },ensure_ascii=False)], context
460
+ except json.JSONDecodeError:
461
+ pass
462
+
463
+ if self.args.without_contexts and LLMComputeEngine is not None:
464
+ llm_compute_engine = LLMComputeEngine(
465
+ llm=target_llm,
466
+ inference_enhance=not self.args.disable_inference_enhance,
467
+ inference_deep_thought=self.args.inference_deep_thought,
468
+ inference_slow_without_deep_thought=self.args.inference_slow_without_deep_thought,
469
+ precision=self.args.inference_compute_precision,
470
+ data_cells_max_num=self.args.data_cells_max_num,
471
+ )
472
+ conversations = conversations[:-1]
473
+ new_conversations = llm_compute_engine.process_conversation(
474
+ conversations, query, []
475
+ )
476
+ chunks = llm_compute_engine.stream_chat_oai(
477
+ conversations=new_conversations,
400
478
  model=model,
401
479
  role_mapping=role_mapping,
402
480
  llm_config=llm_config,
403
481
  delta_mode=True,
482
+ extra_request_params=extra_request_params
404
483
  )
405
- return (chunk[0] for chunk in chunks), context
406
484
 
407
- try:
408
- request_params = json.loads(query)
409
- if "request_id" in request_params:
410
- request_id = request_params["request_id"]
411
- index = request_params["index"]
412
-
413
- file_path = event_writer.get_event_file_path(request_id)
414
- logger.info(f"Get events for request_id: {request_id} index: {index} file_path: {file_path}")
415
- events = []
416
- if not os.path.exists(file_path):
417
- return [],context
418
-
419
- with open(file_path, "r") as f:
420
- for line in f:
421
- event = json.loads(line)
422
- if event["index"] >= index:
423
- events.append(event)
424
- return [json.dumps({
425
- "events": [event for event in events],
426
- },ensure_ascii=False)], context
427
- except json.JSONDecodeError:
428
- pass
429
-
430
- if self.args.without_contexts and LLMComputeEngine is not None:
431
- llm_compute_engine = LLMComputeEngine(
432
- llm=target_llm,
433
- inference_enhance=not self.args.disable_inference_enhance,
434
- inference_deep_thought=self.args.inference_deep_thought,
435
- inference_slow_without_deep_thought=self.args.inference_slow_without_deep_thought,
436
- precision=self.args.inference_compute_precision,
437
- data_cells_max_num=self.args.data_cells_max_num,
438
- )
439
- conversations = conversations[:-1]
440
- new_conversations = llm_compute_engine.process_conversation(
441
- conversations, query, []
442
- )
443
-
444
- return (
445
- llm_compute_engine.stream_chat_oai(
446
- conversations=new_conversations,
447
- model=model,
448
- role_mapping=role_mapping,
449
- llm_config=llm_config,
450
- delta_mode=True,
451
- ),
452
- context,
453
- )
485
+ def generate_chunks():
486
+ for chunk in chunks:
487
+ yield chunk
454
488
 
489
+ return (
490
+ generate_chunks(),
491
+ context,
492
+ )
455
493
 
456
- only_contexts = False
457
- try:
458
- v = json.loads(query)
459
- if "only_contexts" in v:
460
- query = v["query"]
461
- only_contexts = v["only_contexts"]
462
- conversations[-1]["content"] = query
463
- except json.JSONDecodeError:
464
- pass
465
494
 
466
- logger.info(f"Query: {query} only_contexts: {only_contexts}")
467
- start_time = time.time()
468
-
495
+ only_contexts = False
496
+ try:
497
+ v = json.loads(query)
498
+ if "only_contexts" in v:
499
+ query = v["query"]
500
+ only_contexts = v["only_contexts"]
501
+ conversations[-1]["content"] = query
502
+ except json.JSONDecodeError:
503
+ pass
504
+
505
+ logger.info(f"Query: {query} only_contexts: {only_contexts}")
506
+ start_time = time.time()
507
+
469
508
 
470
- rag_stat = RAGStat(
471
- recall_stat=RecallStat(
472
- total_input_tokens=0,
473
- total_generated_tokens=0,
474
- model_name=self.recall_llm.default_model_name,
475
- ),
476
- chunk_stat=ChunkStat(
477
- total_input_tokens=0,
478
- total_generated_tokens=0,
479
- model_name=self.chunk_llm.default_model_name,
480
- ),
481
- answer_stat=AnswerStat(
482
- total_input_tokens=0,
483
- total_generated_tokens=0,
484
- model_name=self.qa_llm.default_model_name,
485
- ),
486
- )
509
+ rag_stat = RAGStat(
510
+ recall_stat=RecallStat(
511
+ total_input_tokens=0,
512
+ total_generated_tokens=0,
513
+ model_name=self.recall_llm.default_model_name,
514
+ ),
515
+ chunk_stat=ChunkStat(
516
+ total_input_tokens=0,
517
+ total_generated_tokens=0,
518
+ model_name=self.chunk_llm.default_model_name,
519
+ ),
520
+ answer_stat=AnswerStat(
521
+ total_input_tokens=0,
522
+ total_generated_tokens=0,
523
+ model_name=self.qa_llm.default_model_name,
524
+ ),
525
+ )
487
526
 
527
+ context = []
528
+ def generate_sream():
529
+ nonlocal context
488
530
  doc_filter_result = self._filter_docs(conversations)
489
531
 
490
532
  rag_stat.recall_stat.total_input_tokens += sum(doc_filter_result.input_tokens_counts)
@@ -659,42 +701,41 @@ class LongContextRAG:
659
701
  llm_config=llm_config,
660
702
  delta_mode=True,
661
703
  )
704
+
705
+ for chunk in chunks:
706
+ yield chunk
707
+ if chunk[1] is not None:
708
+ rag_stat.answer_stat.total_input_tokens += chunk[1].input_tokens_count
709
+ rag_stat.answer_stat.total_generated_tokens += chunk[1].generated_tokens_count
710
+ self._print_rag_stats(rag_stat)
711
+ else:
712
+ new_conversations = conversations[:-1] + [
713
+ {
714
+ "role": "user",
715
+ "content": self._answer_question.prompt(
716
+ query=query,
717
+ relevant_docs=[doc.source_code for doc in relevant_docs],
718
+ ),
719
+ }
720
+ ]
662
721
 
663
- def generate_chunks():
664
- for chunk in chunks:
665
- yield chunk[0]
666
- if chunk[1] is not None:
667
- rag_stat.answer_stat.total_input_tokens += chunk[1].input_tokens_count
668
- rag_stat.answer_stat.total_generated_tokens += chunk[1].generated_tokens_count
669
- self._print_rag_stats(rag_stat)
670
- return generate_chunks(), context
671
-
672
- new_conversations = conversations[:-1] + [
673
- {
674
- "role": "user",
675
- "content": self._answer_question.prompt(
676
- query=query,
677
- relevant_docs=[doc.source_code for doc in relevant_docs],
678
- ),
679
- }
680
- ]
681
-
682
- chunks = target_llm.stream_chat_oai(
683
- conversations=new_conversations,
684
- model=model,
685
- role_mapping=role_mapping,
686
- llm_config=llm_config,
687
- delta_mode=True,
688
- )
689
-
690
- def generate_chunks():
722
+ chunks = target_llm.stream_chat_oai(
723
+ conversations=new_conversations,
724
+ model=model,
725
+ role_mapping=role_mapping,
726
+ llm_config=llm_config,
727
+ delta_mode=True,
728
+ extra_request_params=extra_request_params
729
+ )
730
+
691
731
  for chunk in chunks:
692
- yield chunk[0]
732
+ yield chunk
693
733
  if chunk[1] is not None:
694
734
  rag_stat.answer_stat.total_input_tokens += chunk[1].input_tokens_count
695
735
  rag_stat.answer_stat.total_generated_tokens += chunk[1].generated_tokens_count
696
736
  self._print_rag_stats(rag_stat)
697
- return generate_chunks(), context
737
+
738
+ return generate_sream(),context
698
739
 
699
740
 
700
741
 
autocoder/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.1.277"
1
+ __version__ = "0.1.279"