auto-coder-web 0.1.95__py3-none-any.whl → 0.1.97__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,10 @@
1
+ """
2
+ Message parser module for processing agentic messages.
3
+ This module provides functionality to parse and process messages from different tools.
4
+ """
5
+ from .message_parser import parse_message, register_parser, parse_messages
6
+
7
+ # Import tool parsers to register them
8
+ from . import tool_parsers
9
+
10
+ __all__ = ['parse_message', 'register_parser', 'parse_messages']
@@ -0,0 +1,79 @@
1
+ """
2
+ Message parser for processing agentic messages.
3
+
4
+ This module provides a registry-based approach to parse and process messages
5
+ from different tools. New parsers can be easily registered to handle different
6
+ tool types.
7
+ """
8
+ import json
9
+ from typing import Dict, Any, Callable, Optional, List, TypeVar, cast
10
+
11
+ # Define a type for parser functions
12
+ T = TypeVar('T')
13
+ ParserFunc = Callable[[Dict[str, Any], Dict[str, Any]], Optional[Dict[str, Any]]]
14
+
15
+ # Registry to store message parsers
16
+ _PARSERS: Dict[str, ParserFunc] = {}
17
+
18
+ def register_parser(tool_name: str):
19
+ """
20
+ Decorator to register a parser function for a specific tool.
21
+
22
+ Args:
23
+ tool_name: The name of the tool this parser handles
24
+
25
+ Returns:
26
+ Decorator function
27
+ """
28
+ def decorator(func: ParserFunc) -> ParserFunc:
29
+ _PARSERS[tool_name] = func
30
+ return func
31
+ return decorator
32
+
33
+ def parse_message(message: Dict[str, Any]) -> Dict[str, Any]:
34
+ """
35
+ Parse a message and apply the appropriate parser based on the tool_name.
36
+
37
+ Args:
38
+ message: The message to parse
39
+
40
+ Returns:
41
+ The processed message
42
+ """
43
+ processed_message = message.copy()
44
+
45
+ try:
46
+ # Try to parse the message content as JSON
47
+ content = message.get("content", "")
48
+ if not isinstance(content, str):
49
+ return processed_message
50
+
51
+ content_obj = json.loads(content)
52
+
53
+ # Try all registered parsers
54
+ for tool_name, parser in _PARSERS.items():
55
+ # Let each parser decide if it can handle this message
56
+ result = parser(content_obj, message)
57
+ if result is not None:
58
+ return result
59
+
60
+ except (json.JSONDecodeError, TypeError, AttributeError):
61
+ # If parsing fails, keep the original message unchanged
62
+ pass
63
+
64
+ return processed_message
65
+
66
+ def parse_messages(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
67
+ """
68
+ Parse a list of messages, applying the appropriate parser to each.
69
+
70
+ Args:
71
+ messages: List of messages to parse
72
+
73
+ Returns:
74
+ List of processed messages
75
+ """
76
+ return [parse_message(message) for message in messages]
77
+
78
+ # Tool-specific parsers are defined in tool_parsers.py
79
+ # and automatically registered when that module is imported
@@ -0,0 +1,70 @@
1
+ """
2
+ Tool-specific parsers for processing messages from different tools.
3
+
4
+ This module contains parser implementations for various tools.
5
+ New parsers can be added here and will be automatically registered.
6
+ """
7
+ import json
8
+ from typing import Dict, Any, Optional
9
+ from .message_parser import register_parser
10
+
11
+ @register_parser("ReadFileTool")
12
+ def read_file_tool_parser(content_obj: Dict[str, Any], message: Dict[str, Any]) -> Optional[Dict[str, Any]]:
13
+ """
14
+ Parser for ReadFileTool messages.
15
+ Truncates file content to 200 characters if it's too long.
16
+
17
+ Args:
18
+ content_obj: The parsed content object
19
+ message: The original message
20
+
21
+ Returns:
22
+ The processed message if this parser can handle it, None otherwise
23
+ """
24
+ # Validate if this is a ReadFileTool message
25
+ if not (isinstance(content_obj, dict) and
26
+ content_obj.get("tool_name") == "ReadFileTool" and
27
+ "success" in content_obj and
28
+ "message" in content_obj and
29
+ "content" in content_obj):
30
+ return None
31
+
32
+ # Process the content
33
+ processed_message = message.copy()
34
+ if isinstance(content_obj["content"], str) and len(content_obj["content"]) > 200:
35
+ content_obj["content"] = content_obj["content"][:200] + "..."
36
+ processed_message["content"] = json.dumps(content_obj)
37
+
38
+ return processed_message
39
+
40
+ # Example of how to add more parsers in the future:
41
+ #
42
+ # @register_parser("CodeSearchTool")
43
+ # def code_search_tool_parser(content_obj: Dict[str, Any], message: Dict[str, Any]) -> Optional[Dict[str, Any]]:
44
+ # """
45
+ # Parser for CodeSearchTool messages.
46
+ # Truncates search results if they're too long.
47
+ #
48
+ # Args:
49
+ # content_obj: The parsed content object
50
+ # message: The original message
51
+ #
52
+ # Returns:
53
+ # The processed message if this parser can handle it, None otherwise
54
+ # """
55
+ # # Validate if this is a CodeSearchTool message
56
+ # if not (isinstance(content_obj, dict) and
57
+ # content_obj.get("tool_name") == "CodeSearchTool" and
58
+ # "success" in content_obj and
59
+ # "message" in content_obj and
60
+ # "content" in content_obj):
61
+ # return None
62
+ #
63
+ # # Process the content
64
+ # processed_message = message.copy()
65
+ # if isinstance(content_obj["content"], list) and len(content_obj["content"]) > 5:
66
+ # content_obj["content"] = content_obj["content"][:5]
67
+ # content_obj["message"] = f"Showing first 5 of {len(content_obj['content'])} results"
68
+ # processed_message["content"] = json.dumps(content_obj)
69
+ #
70
+ # return processed_message
@@ -16,7 +16,7 @@ def _get_chat_list_file_path(project_path: str, name: str) -> str:
16
16
  chat_lists_dir = _get_chat_lists_dir(project_path)
17
17
  return os.path.join(chat_lists_dir, f"{name}.json")
18
18
 
19
- async def save_chat_list(project_path: str, name: str, messages: List[Dict[str, Any]]) -> None:
19
+ async def save_chat_list(project_path: str, name: str, messages: List[Dict[str, Any]], metadata: dict = None) -> None:
20
20
  """
21
21
  保存聊天列表到文件
22
22
 
@@ -24,14 +24,21 @@ async def save_chat_list(project_path: str, name: str, messages: List[Dict[str,
24
24
  project_path: 项目路径
25
25
  name: 聊天列表名称
26
26
  messages: 聊天消息列表
27
+ metadata: 聊天元数据
27
28
 
28
29
  Raises:
29
30
  Exception: 如果保存失败
30
31
  """
31
32
  file_path = _get_chat_list_file_path(project_path, name)
32
33
  try:
34
+ data = {
35
+ "name": name,
36
+ "messages": messages
37
+ }
38
+ if metadata is not None:
39
+ data["metadata"] = metadata
33
40
  async with aiofiles.open(file_path, 'w') as f:
34
- await f.write(json.dumps({"messages": messages}, indent=2, ensure_ascii=False))
41
+ await f.write(json.dumps(data, indent=2, ensure_ascii=False))
35
42
  except Exception as e:
36
43
  logger.error(f"Error saving chat list {name}: {str(e)}")
37
44
  raise e
@@ -73,18 +80,7 @@ async def get_chat_lists(project_path: str) -> List[str]:
73
80
 
74
81
  async def get_chat_list(project_path: str, name: str) -> Dict[str, Any]:
75
82
  """
76
- 获取特定聊天列表的内容
77
-
78
- Args:
79
- project_path: 项目路径
80
- name: 聊天列表名称
81
-
82
- Returns:
83
- 聊天列表内容
84
-
85
- Raises:
86
- FileNotFoundError: 如果聊天列表不存在
87
- Exception: 如果读取失败
83
+ 获取特定聊天列表的内容(兼容旧结构)
88
84
  """
89
85
  file_path = _get_chat_list_file_path(project_path, name)
90
86
  if not os.path.exists(file_path):
@@ -93,7 +89,13 @@ async def get_chat_list(project_path: str, name: str) -> Dict[str, Any]:
93
89
  try:
94
90
  async with aiofiles.open(file_path, 'r') as f:
95
91
  content = await f.read()
96
- return json.loads(content)
92
+ data = json.loads(content)
93
+ # 兼容旧数据结构(只有messages)
94
+ if "name" not in data:
95
+ data["name"] = name
96
+ if "metadata" not in data:
97
+ data["metadata"] = None
98
+ return data
97
99
  except json.JSONDecodeError as e:
98
100
  logger.error(f"Invalid JSON in chat list {name}: {str(e)}")
99
101
  raise Exception(f"Invalid JSON in chat list file: {str(e)}")
@@ -33,8 +33,8 @@ router = APIRouter()
33
33
  @router.post("/api/chat-lists/save")
34
34
  async def save_chat_list_endpoint(chat_list: ChatList, project_path: str = Depends(get_project_path)):
35
35
  try:
36
- # 调用管理模块保存聊天列表
37
- await save_chat_list(project_path, chat_list.name, chat_list.messages)
36
+ # 调用管理模块保存聊天列表,支持 metadata
37
+ await save_chat_list(project_path, chat_list.name, chat_list.messages, metadata=chat_list.metadata.dict() if chat_list.metadata else None)
38
38
  return {"status": "success", "message": f"Chat list {chat_list.name} saved successfully"}
39
39
  except Exception as e:
40
40
  raise HTTPException(status_code=500, detail=str(e))
@@ -8,6 +8,11 @@ import os
8
8
  from autocoder.rag.token_counter import count_tokens
9
9
  import aiofiles
10
10
  from loguru import logger
11
+ from autocoder.rag.loaders import (
12
+ extract_text_from_pdf,
13
+ extract_text_from_docx,
14
+ extract_text_from_ppt
15
+ )
11
16
 
12
17
  router = APIRouter()
13
18
 
@@ -147,6 +152,53 @@ async def auto_create_groups(
147
152
  raise HTTPException(status_code=500, detail=str(e))
148
153
 
149
154
 
155
+ async def _read_file(file_path_to_read: str) -> str:
156
+ """使用线程模拟异步读取文件内容
157
+
158
+ Args:
159
+ file_path_to_read: 要读取的文件路径
160
+
161
+ Returns:
162
+ str: 文件内容
163
+ """
164
+ ext = os.path.splitext(file_path_to_read)[1].lower()
165
+
166
+ # 定义各种文件类型的读取函数
167
+ def read_pdf():
168
+ logger.info(f"Extracting text from PDF: {file_path_to_read}")
169
+ return extract_text_from_pdf(file_path_to_read)
170
+
171
+ def read_docx():
172
+ logger.info(f"Extracting text from DOCX: {file_path_to_read}")
173
+ return extract_text_from_docx(file_path_to_read)
174
+
175
+ def read_ppt():
176
+ logger.info(f"Extracting text from PPT/PPTX: {file_path_to_read}")
177
+ slide_texts = []
178
+ for slide_identifier, slide_text_content in extract_text_from_ppt(file_path_to_read):
179
+ slide_texts.append(f"--- Slide {slide_identifier} ---\n{slide_text_content}")
180
+ return "\n\n".join(slide_texts) if slide_texts else ""
181
+
182
+ def read_text():
183
+ logger.info(f"Reading plain text file: {file_path_to_read}")
184
+ with open(file_path_to_read, 'r', encoding='utf-8', errors='replace') as f:
185
+ return f.read()
186
+
187
+ # 根据文件类型选择相应的读取函数
188
+ if ext == '.pdf':
189
+ read_func = read_pdf
190
+ elif ext == '.docx':
191
+ read_func = read_docx
192
+ elif ext in ('.pptx', '.ppt'):
193
+ read_func = read_ppt
194
+ else:
195
+ read_func = read_text
196
+
197
+ # 使用线程执行耗时的文件读取操作
198
+ content = await asyncio.to_thread(read_func)
199
+ return content
200
+
201
+
150
202
  async def count_tokens_from_file(file_path: str) -> int:
151
203
  """异步计算文件的token数
152
204
 
@@ -156,19 +208,17 @@ async def count_tokens_from_file(file_path: str) -> int:
156
208
  Returns:
157
209
  int: token数量,出错时返回0
158
210
  """
159
- try:
160
- if not os.path.exists(file_path):
161
- logger.warning(f"文件不存在: {file_path}")
162
- return 0
163
-
211
+ try:
164
212
  logger.info(f"计算文件token: {file_path}")
165
- async with aiofiles.open(file_path, 'r', encoding='utf-8') as f:
166
- content = await f.read()
213
+ content = await _read_file(file_path)
214
+
215
+ if content is None:
216
+ return 0
167
217
 
168
218
  file_tokens = count_tokens(content)
169
219
  return file_tokens if file_tokens > 0 else 0
170
220
  except Exception as e:
171
- logger.error(f"读取或计算文件token出错: {file_path}, 错误: {str(e)}")
221
+ logger.error(f"计算文件token出错: {file_path}, 错误: {str(e)}")
172
222
  return 0
173
223
 
174
224
 
@@ -69,22 +69,30 @@ def ensure_task_dir(project_path: str) -> str:
69
69
 
70
70
  @byzerllm.prompt()
71
71
  def coding_prompt(messages: List[Dict[str, Any]], query: str):
72
- '''
73
- 下面是我们已经产生的一个消息列表,其中 USER_RESPONSE 表示用户的输入,其他都是你的输出:
74
- <messages>
72
+ '''
73
+ 【历史对话】按时间顺序排列,从旧到新:
75
74
  {% for message in messages %}
76
75
  <message>
77
- <type>{{ message.type }}</type>
78
- <content>{{ message.content }}</content>
76
+ {% if message.type == "USER" or message.type == "USER_RESPONSE" or message.metadata.path == "/agent/edit/tool/result" %}【用户】{% else %}【助手】{% endif %}
77
+ <content>
78
+ {{ message.content }}
79
+ </content>
79
80
  </message>
80
81
  {% endfor %}
81
- </messages>
82
82
 
83
- 下面是用户的最新需求:
84
- <request>
85
- {{ query }}
86
- </request>
83
+ 【当前问题】用户的最新需求如下:
84
+ <current_query>
85
+ {{ query }}
86
+ </current_query>
87
87
  '''
88
+ # 使用消息解析器处理消息
89
+ from auto_coder_web.agentic_message_parser import parse_messages
90
+ processed_messages = parse_messages(messages)
91
+
92
+ return {
93
+ "messages": processed_messages,
94
+ "query": query
95
+ }
88
96
 
89
97
 
90
98
 
@@ -148,13 +156,13 @@ async def auto_command(request: AutoCommandRequest, project_path: str = Depends(
148
156
 
149
157
  # 调用auto_command_wrapper方法
150
158
  logger.info(f"Executing auto command {file_id} with prompt: {prompt_text}")
151
- result = wrapper.auto_command_wrapper(prompt_text, {
159
+ wrapper.auto_command_wrapper(prompt_text, {
152
160
  "event_file_id": file_id
153
161
  })
154
- get_event_manager(event_file).write_completion(
155
- EventContentCreator.create_completion(
156
- "200", "completed", result).to_dict()
157
- )
162
+ # get_event_manager(event_file).write_completion(
163
+ # EventContentCreator.create_completion(
164
+ # "200", "completed", result).to_dict()
165
+ # )
158
166
  logger.info(f"Event file id: {file_id} completed successfully")
159
167
  except Exception as e:
160
168
  logger.error(f"Error executing auto command {file_id}: {str(e)}")
auto_coder_web/types.py CHANGED
@@ -24,9 +24,15 @@ class CompletionResponse(BaseModel):
24
24
  completions: List[CompletionItem]
25
25
 
26
26
 
27
+ class ChatMetadata(BaseModel):
28
+ token_usage: Optional[int] = None # token 消耗
29
+ cost: Optional[float] = None # 费用
30
+ window_size: Optional[int] = None # 窗口大小
31
+
27
32
  class ChatList(BaseModel):
28
33
  name: str
29
34
  messages: List[Dict[str, Any]]
35
+ metadata: Optional[ChatMetadata] = None # 新增 metadata 字段
30
36
 
31
37
 
32
38
  class HistoryQuery(BaseModel):
auto_coder_web/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.1.95"
1
+ __version__ = "0.1.97"