auto-coder 0.1.308__py3-none-any.whl → 0.1.310__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of auto-coder might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: auto-coder
3
- Version: 0.1.308
3
+ Version: 0.1.310
4
4
  Summary: AutoCoder: AutoCoder
5
5
  Author: allwefantasy
6
6
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
@@ -61,6 +61,7 @@ Requires-Dist: pydub
61
61
  Requires-Dist: youtube-transcript-api
62
62
  Requires-Dist: SpeechRecognition
63
63
  Requires-Dist: pathvalidate
64
+ Requires-Dist: setuptools
64
65
  Requires-Dist: mcp ; python_version >= "3.10"
65
66
 
66
67
  <p align="center">
@@ -1,7 +1,7 @@
1
1
  autocoder/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  autocoder/auto_coder.py,sha256=ifhdnd39tOIDu_4LdYTxjVCnwmpDoOC90RRwD8bhIKU,65983
3
3
  autocoder/auto_coder_lang.py,sha256=Rtupq6N3_HT7JRhDKdgCBcwRaiAnyCOR_Gsp4jUomrI,3229
4
- autocoder/auto_coder_rag.py,sha256=5TtAfbEBwyt-cB4WcI8eQ1G3AuKij0056wFYRViDhLs,34036
4
+ autocoder/auto_coder_rag.py,sha256=vOqwBHdK_KwMNUUc8ji_tlZ5DoALAG1rDjWAic3rM-4,34561
5
5
  autocoder/auto_coder_rag_client_mcp.py,sha256=QRxUbjc6A8UmDMQ8lXgZkjgqtq3lgKYeatJbDY6rSo0,6270
6
6
  autocoder/auto_coder_rag_mcp.py,sha256=-RrjNwFaS2e5v8XDIrKR-zlUNUE8UBaeOtojffBrvJo,8521
7
7
  autocoder/auto_coder_runner.py,sha256=bvd1UXYzVT2L-I2ZCkdxy9Ap8P2Q6F2JD-F7QLvaIPc,106545
@@ -9,12 +9,12 @@ autocoder/auto_coder_server.py,sha256=E3Z829TPSooRSNhuh3_x9yaZi0f5G0Lm0ntoZhjGao
9
9
  autocoder/benchmark.py,sha256=Ypomkdzd1T3GE6dRICY3Hj547dZ6_inqJbBJIp5QMco,4423
10
10
  autocoder/chat_auto_coder.py,sha256=Cp5_m3pCxEDcRrVG1uojTfD8xecdl9FvYtD948TvLsg,25223
11
11
  autocoder/chat_auto_coder_lang.py,sha256=p1SUPw1_YBHK69yNViXr6iFhHL-PjFnrXExA2mXJ5ko,21655
12
- autocoder/command_args.py,sha256=9aYJ-AmPxP1sQh6ciw04FWHjSn31f2W9afXFwo8wgx4,30441
12
+ autocoder/command_args.py,sha256=Sfn3TVCoijSm937ZFT_JTsjRIB1gtUr-OZvnWLeS2s8,30732
13
13
  autocoder/command_parser.py,sha256=fx1g9E6GaM273lGTcJqaFQ-hoksS_Ik2glBMnVltPCE,10013
14
14
  autocoder/lang.py,sha256=U6AjVV8Rs1uLyjFCZ8sT6WWuNUxMBqkXXIOs4S120uk,14511
15
15
  autocoder/models.py,sha256=AyoZ-Pzy0oyYUmWCxOIRiOImsqboSfRET7LO9-UOuxI,11172
16
16
  autocoder/run_context.py,sha256=IUfSO6_gp2Wt1blFWAmOpN0b0nDrTTk4LmtCYUBIoro,1643
17
- autocoder/version.py,sha256=Cn-FGSwetliy8k_Sn6xMPmQzEopzQ5Jw26xsX1g7uA8,23
17
+ autocoder/version.py,sha256=Yva2ub3_rI3hSMIe4yqnO-D1-Taf21vJw07BOhnUd5E,23
18
18
  autocoder/agent/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
19
  autocoder/agent/auto_demand_organizer.py,sha256=URAq0gSEiHeV_W4zwhOI_83kHz0Ryfj1gcfh5jwCv_w,6501
20
20
  autocoder/agent/auto_filegroup.py,sha256=pBsAkBcpFTff-9L5OwI8xhf2xPKpl-aZwz-skF2B6dc,6296
@@ -74,6 +74,7 @@ autocoder/common/mcp_server.py,sha256=1SCtpBRmN299xWX-0aV0imWS2CX6zBUOZBocbV_J6B
74
74
  autocoder/common/mcp_tools.py,sha256=YdEhDzRnwAr2J3D-23ExIQFWbrNO-EUpIxg179qs9Sw,12666
75
75
  autocoder/common/memory_manager.py,sha256=Xx6Yv0ULxVfcFfmD36hdHFFhxCgRAs-5fTd0fLHJrpQ,3773
76
76
  autocoder/common/model_speed_test.py,sha256=U48xUUpOnbwUal1cdij4YAn_H2PD2pNaqrMHaYtQRfI,15200
77
+ autocoder/common/openai_content.py,sha256=M_V_UyHrqNVWjgrYvxfAupZw2I0Nr3iilYv6SxSvfLA,8091
77
78
  autocoder/common/printer.py,sha256=P1WU0QjlfnjqTP5uA55GkHZCpFzRPFkc34DMMandreg,2023
78
79
  autocoder/common/recall_validation.py,sha256=Avt9Q9dX3kG6Pf2zsdlOHmsjd-OeSj7U1PFBDp_Cve0,1700
79
80
  autocoder/common/result_manager.py,sha256=nBcFRj5reBC7vp13M91f4B8iPW8B8OehayHlUdeAt1g,3776
@@ -131,13 +132,13 @@ autocoder/privacy/__init__.py,sha256=LnIVvGu_K66zCE-yhN_-dPO8R80pQyedCsXJ7wRqQaI
131
132
  autocoder/privacy/model_filter.py,sha256=-N9ZvxxDKpxU7hkn-tKv-QHyXjvkCopUaKgvJwTOGQs,3369
132
133
  autocoder/pyproject/__init__.py,sha256=ms-A_pocgGv0oZPEW8JAdXi7G-VSVhkQ6CnWFe535Ec,14477
133
134
  autocoder/rag/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
134
- autocoder/rag/api_server.py,sha256=gsk450_B-qGtBwJ1niG9-QFJAG0RGr2s2KdiMrzzbyQ,9582
135
+ autocoder/rag/api_server.py,sha256=StGyxrM-7-W2vYHJq-i_Fv-MHrl9UgVWY272Hd-6VJ4,13090
135
136
  autocoder/rag/conversation_to_queries.py,sha256=xwmErn4WbdADnhK1me-h_6fV3KYrl_y1qPNQl1aoI6o,4810
136
137
  autocoder/rag/doc_filter.py,sha256=UduVO2mlrngwJICrefjDJTYfdmQ4GcRXrfWDQ7xXksk,14206
137
138
  autocoder/rag/document_retriever.py,sha256=5BDqKVJqLPScEnua5S5suXhWuCaALIfPf5obXeJoWfs,8461
138
139
  autocoder/rag/lang.py,sha256=_jmUtxZDG1fmF4b2mhMJbYS1YQDb2ZE8nyAn5_vrvjA,3350
139
140
  autocoder/rag/llm_wrapper.py,sha256=Ht5GF5yJtrztoliujsZzx_ooWZmHkd5xLZKcGEiicZw,4303
140
- autocoder/rag/long_context_rag.py,sha256=6rqq0pvYe9N4TvyLwd2OB21ZUrPC4FfxZuks0weAz4A,41935
141
+ autocoder/rag/long_context_rag.py,sha256=RE4xse3XxSC_HQA5erqrx6MhanP_29mBRdYOTJQZYGc,42106
141
142
  autocoder/rag/qa_conversation_strategy.py,sha256=1AcHV0MU00yTls20LlCPO-Un_OhSrr_p-H5lxLleAq4,6060
142
143
  autocoder/rag/rag_config.py,sha256=8LwFcTd8OJWWwi1_WY4IzjqgtT6RyE2j4PjxS5cCTDE,802
143
144
  autocoder/rag/rag_entry.py,sha256=6TKtErZ0Us9XSV6HgRKXA6yR3SiZGPHpynOKSaR1wgE,2463
@@ -198,9 +199,9 @@ autocoder/utils/types.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
198
199
  autocoder/utils/auto_coder_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
199
200
  autocoder/utils/auto_coder_utils/chat_stream_out.py,sha256=xuBeWD0YOckqRo8JB1WkVIMOYH6c24m7JfV4svBfPDo,15113
200
201
  autocoder/utils/chat_auto_coder_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
201
- auto_coder-0.1.308.dist-info/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
202
- auto_coder-0.1.308.dist-info/METADATA,sha256=9Eqj3xOim16B-cWJRnUGOaF16HLZeCYW2wIIRaNJWAk,2721
203
- auto_coder-0.1.308.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
204
- auto_coder-0.1.308.dist-info/entry_points.txt,sha256=0nzHtHH4pNcM7xq4EBA2toS28Qelrvcbrr59GqD_0Ak,350
205
- auto_coder-0.1.308.dist-info/top_level.txt,sha256=Jqc0_uJSw2GwoFQAa9iJxYns-2mWla-9ok_Y3Gcznjk,10
206
- auto_coder-0.1.308.dist-info/RECORD,,
202
+ auto_coder-0.1.310.dist-info/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
203
+ auto_coder-0.1.310.dist-info/METADATA,sha256=zh8Gtsl5ahulVrleWS6UchHXOiJfeG-8SFeikpSffSg,2747
204
+ auto_coder-0.1.310.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
205
+ auto_coder-0.1.310.dist-info/entry_points.txt,sha256=0nzHtHH4pNcM7xq4EBA2toS28Qelrvcbrr59GqD_0Ak,350
206
+ auto_coder-0.1.310.dist-info/top_level.txt,sha256=Jqc0_uJSw2GwoFQAa9iJxYns-2mWla-9ok_Y3Gcznjk,10
207
+ auto_coder-0.1.310.dist-info/RECORD,,
@@ -289,7 +289,11 @@ def main(input_args: Optional[List[str]] = None):
289
289
  serve_parser.add_argument("--ssl_keyfile", default="", help="")
290
290
  serve_parser.add_argument("--ssl_certfile", default="", help="")
291
291
  serve_parser.add_argument("--response_role", default="assistant", help="")
292
- serve_parser.add_argument("--doc_dir", default="", help="")
292
+ serve_parser.add_argument(
293
+ "--doc_dir",
294
+ default="",
295
+ help="Document directory path, also used as the root directory for serving static files"
296
+ )
293
297
  serve_parser.add_argument("--enable_local_image_host", action="store_true", help=" enable local image host for local Chat app")
294
298
  serve_parser.add_argument("--tokenizer_path", default=tokenizer_path, help="")
295
299
  serve_parser.add_argument(
@@ -305,7 +309,17 @@ def main(input_args: Optional[List[str]] = None):
305
309
  action="store_true",
306
310
  help="Monitor mode for the doc update",
307
311
  )
308
-
312
+ serve_parser.add_argument(
313
+ "--max_static_path_length",
314
+ type=int,
315
+ default=3000,
316
+ help="Maximum length allowed for static file paths (larger value to better support Chinese characters)"
317
+ )
318
+ serve_parser.add_argument(
319
+ "--enable_nginx_x_accel",
320
+ action="store_true",
321
+ help="Enable Nginx X-Accel-Redirect for static file serving when behind Nginx"
322
+ )
309
323
  serve_parser.add_argument(
310
324
  "--disable_auto_window",
311
325
  action="store_true",
autocoder/command_args.py CHANGED
@@ -433,7 +433,11 @@ def parse_args(input_args: Optional[List[str]] = None) -> AutoCoderArgs:
433
433
  doc_serve_parse.add_argument("--ssl_certfile", default="", help="")
434
434
  doc_serve_parse.add_argument(
435
435
  "--response_role", default="assistant", help="")
436
- doc_serve_parse.add_argument("--doc_dir", default="", help="")
436
+ doc_serve_parse.add_argument(
437
+ "--doc_dir",
438
+ default="",
439
+ help="Document directory path, also used as the root directory for serving static files"
440
+ )
437
441
  doc_serve_parse.add_argument("--tokenizer_path", default="", help="")
438
442
  doc_serve_parse.add_argument(
439
443
  "--collections", default="", help="Collection name for indexing"
@@ -453,6 +457,12 @@ def parse_args(input_args: Optional[List[str]] = None) -> AutoCoderArgs:
453
457
  action="store_true",
454
458
  help="Monitor mode for the doc update",
455
459
  )
460
+ doc_serve_parse.add_argument(
461
+ "--max_static_path_length",
462
+ type=int,
463
+ default=1000,
464
+ help="Maximum length allowed for static file paths"
465
+ )
456
466
 
457
467
  agent_parser = subparsers.add_parser("agent", help="Run an agent")
458
468
  agent_subparsers = agent_parser.add_subparsers(dest="agent_command")
@@ -0,0 +1,256 @@
1
+ from typing import Any, Dict, List, Optional, Union
2
+ import base64
3
+ import os
4
+ from enum import Enum
5
+ from pydantic import BaseModel, Field, validator
6
+
7
+
8
+ class ContentType(str, Enum):
9
+ """Type of content in the OpenAI chat message."""
10
+ TEXT = "text"
11
+ IMAGE_URL = "image_url"
12
+
13
+
14
+ class ImageUrl(BaseModel):
15
+ """Image URL structure in OpenAI chat messages."""
16
+ url: str = Field(..., description="URL of the image, can be http(s) or data URI")
17
+
18
+ @validator('url')
19
+ def validate_url(cls, v):
20
+ """Validate that URL is either an http(s) URL or a valid data URI."""
21
+ if v.startswith(('http://', 'https://')):
22
+ return v
23
+ elif v.startswith('data:image/'):
24
+ return v
25
+ else:
26
+ raise ValueError("Image URL must be http(s) or data URI format")
27
+
28
+
29
+ class TextContent(BaseModel):
30
+ """Text content in OpenAI chat messages."""
31
+ type: str = ContentType.TEXT
32
+ text: str
33
+
34
+
35
+ class ImageUrlContent(BaseModel):
36
+ """Image URL content in OpenAI chat messages."""
37
+ type: str = ContentType.IMAGE_URL
38
+ image_url: Union[str, ImageUrl]
39
+
40
+ @validator('image_url')
41
+ def validate_image_url(cls, v):
42
+ """Convert string to ImageUrl if necessary."""
43
+ if isinstance(v, str):
44
+ return ImageUrl(url=v)
45
+ return v
46
+
47
+
48
+ ContentItem = Union[TextContent, ImageUrlContent]
49
+
50
+
51
+ class OpenAIMessage(BaseModel):
52
+ """Model for an OpenAI chat message."""
53
+ role: str
54
+ content: Union[str, List[ContentItem]]
55
+ name: Optional[str] = None
56
+
57
+
58
+ class OpenAIConversation(BaseModel):
59
+ """Model for a conversation with OpenAI."""
60
+ messages: List[OpenAIMessage]
61
+
62
+
63
+ def is_structured_content(content: Any) -> bool:
64
+ """
65
+ Check if the content is structured (list of items with type field).
66
+
67
+ Args:
68
+ content: The content to check
69
+
70
+ Returns:
71
+ bool: True if the content is structured, False otherwise
72
+ """
73
+ if not isinstance(content, list):
74
+ return False
75
+
76
+ if not content:
77
+ return False
78
+
79
+ # Check if all items have a 'type' field
80
+ return all(isinstance(item, dict) and 'type' in item for item in content)
81
+
82
+
83
+ def encode_image_to_base64(image_path: str) -> str:
84
+ """
85
+ Encode an image file to base64.
86
+
87
+ Args:
88
+ image_path: Path to the image file
89
+
90
+ Returns:
91
+ str: Base64-encoded image data
92
+ """
93
+ if not os.path.exists(image_path):
94
+ raise FileNotFoundError(f"Image file not found: {image_path}")
95
+
96
+ with open(image_path, "rb") as image_file:
97
+ encoded_string = base64.b64encode(image_file.read()).decode('utf-8')
98
+
99
+ # Determine content type based on file extension
100
+ file_ext = os.path.splitext(image_path)[1].lower()
101
+ content_type = {
102
+ '.jpg': 'image/jpeg',
103
+ '.jpeg': 'image/jpeg',
104
+ '.png': 'image/png',
105
+ '.gif': 'image/gif',
106
+ '.webp': 'image/webp',
107
+ }.get(file_ext, 'image/jpeg')
108
+
109
+ return f"data:{content_type};base64,{encoded_string}"
110
+
111
+
112
+ def create_text_content(text: str) -> TextContent:
113
+ """
114
+ Create a text content item.
115
+
116
+ Args:
117
+ text: The text content
118
+
119
+ Returns:
120
+ TextContent: A text content item
121
+ """
122
+ return TextContent(text=text)
123
+
124
+
125
+ def create_image_content(image_path_or_url: str) -> ImageUrlContent:
126
+ """
127
+ Create an image content item from a file path or URL.
128
+
129
+ Args:
130
+ image_path_or_url: Path to the image file or an image URL
131
+
132
+ Returns:
133
+ ImageUrlContent: An image content item
134
+ """
135
+ # If it's a URL already, use it directly
136
+ if image_path_or_url.startswith(('http://', 'https://', 'data:')):
137
+ return ImageUrlContent(image_url=image_path_or_url)
138
+
139
+ # Otherwise, treat it as a file path and encode it
140
+ return ImageUrlContent(image_url=encode_image_to_base64(image_path_or_url))
141
+
142
+
143
+ def normalize_content(content: Any) -> Union[str, List[ContentItem]]:
144
+ """
145
+ Normalize content to either a string or a list of structured content items.
146
+
147
+ Args:
148
+ content: The content to normalize
149
+
150
+ Returns:
151
+ Union[str, List[ContentItem]]: Normalized content
152
+ """
153
+ if isinstance(content, str):
154
+ return content
155
+
156
+ if is_structured_content(content):
157
+ normalized_items = []
158
+ for item in content:
159
+ if item['type'] == ContentType.TEXT:
160
+ normalized_items.append(create_text_content(item['text']))
161
+ elif item['type'] == ContentType.IMAGE_URL:
162
+ normalized_items.append(ImageUrlContent(image_url=item['image_url']))
163
+ return normalized_items
164
+
165
+ # If it's neither a string nor structured content, convert to string
166
+ return str(content)
167
+
168
+
169
+ def create_message(role: str, content: Union[str, List[ContentItem]], name: Optional[str] = None) -> OpenAIMessage:
170
+ """
171
+ Create an OpenAI chat message.
172
+
173
+ Args:
174
+ role: Role of the message sender (system, user, assistant)
175
+ content: Content of the message (string or structured content)
176
+ name: Optional name of the sender
177
+
178
+ Returns:
179
+ OpenAIMessage: An OpenAI chat message
180
+ """
181
+ return OpenAIMessage(
182
+ role=role,
183
+ content=normalize_content(content),
184
+ name=name
185
+ )
186
+
187
+
188
+ def process_conversations(conversations: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
189
+ """
190
+ 处理会话列表,确保每个消息都符合标准格式,并只保留文本内容。
191
+
192
+ Args:
193
+ conversations: 会话列表,可能包含各种格式的消息
194
+
195
+ Returns:
196
+ List[Dict[str, Any]]: 标准化后的会话列表,每个消息都有 role 和 content 字段
197
+
198
+ 例子:
199
+ >>> conversations = [
200
+ ... {"role": "user", "content": "Hello"},
201
+ ... {"role": "assistant", "content": "Hi, how can I help?"},
202
+ ... {"role": "user", "content": [
203
+ ... {"type": "text", "text": "What's in this image?"},
204
+ ... {"type": "image_url", "image_url": "data:image/jpeg;base64,/9j/4AAQ..."}
205
+ ... ]}
206
+ ... ]
207
+ >>> processed = process_conversations(conversations)
208
+ >>> # 结果保持相同的结构,但确保格式一致性
209
+
210
+ 输出格式要是这样的:
211
+ [
212
+ {"role": "user", "content": "Hello"},
213
+ {"role": "assistant", "content": "Hi, how can I help?"},
214
+ {"role": "user", "content": "What's in this image?"}
215
+ ]
216
+
217
+ 只保留 text 内容。如果有多个 text 内容,用换行符连接弄成一个。
218
+
219
+ """
220
+ processed_conversations = []
221
+
222
+ for message in conversations:
223
+ # 确保消息有 role 字段
224
+ if "role" not in message:
225
+ raise ValueError(f"Message missing 'role' field: {message}")
226
+
227
+ role = message["role"]
228
+
229
+ # 处理 content 字段,确保存在
230
+ if "content" not in message:
231
+ processed_content = "" # 如果不存在,设置为空字符串
232
+ else:
233
+ content = message["content"]
234
+
235
+ # 处理结构化内容
236
+ if isinstance(content, list) and is_structured_content(content):
237
+ # 提取所有文本内容并用换行符连接
238
+ text_contents = []
239
+ for item in content:
240
+ if item.get('type') == ContentType.TEXT and 'text' in item:
241
+ text_contents.append(item['text'])
242
+ processed_content = '\n'.join(text_contents)
243
+ else:
244
+ # 如果是字符串或其他类型,确保转换为字符串
245
+ processed_content = str(content) if content is not None else ""
246
+
247
+ # 构建标准化的消息
248
+ processed_message = {"role": role, "content": processed_content}
249
+
250
+ # 如果原消息有 name 字段,也加入
251
+ if "name" in message and message["name"]:
252
+ processed_message["name"] = message["name"]
253
+
254
+ processed_conversations.append(processed_message)
255
+
256
+ return processed_conversations
@@ -49,6 +49,8 @@ TIMEOUT_KEEP_ALIVE = 5 # seconds
49
49
  # timeout in 10 minutes. Streaming can take longer than 3 min
50
50
  TIMEOUT = float(os.environ.get("BYZERLLM_APISERVER_HTTP_TIMEOUT", 600))
51
51
 
52
+ # Static file serving security settings
53
+
52
54
  router_app = FastAPI()
53
55
 
54
56
 
@@ -178,46 +180,51 @@ async def embed(body: EmbeddingCompletionRequest):
178
180
  )
179
181
 
180
182
  @router_app.get("/static/{full_path:path}")
181
- async def serve_image(full_path: str, request: Request):
182
-
183
- allowed_file_type = ['.png', '.jpg', '.jpeg', '.gif', '.bmp', '.webp']
183
+ async def serve_static_file(full_path: str, request: Request):
184
184
 
185
- if any(full_path.endswith(ext) for ext in allowed_file_type):
186
- try:
187
- # 获取文件的完整路径,并进行URL解码
188
- file_path = unquote(full_path)
189
- # 使用 os.path.normpath 来标准化路径,自动处理不同操作系统的路径分隔符
190
- file_path = os.path.normpath(file_path)
191
- if not os.path.isabs(file_path):
192
- file_path = os.path.join("/", file_path)
193
-
194
- # 检查文件是否存在
195
- if not os.path.exists(file_path):
196
- raise FileNotFoundError(f"File not found: {file_path}")
197
-
198
- # 异步读取文件内容
199
- async with aiofiles.open(file_path, "rb") as f:
200
- content = await f.read()
201
-
185
+ try:
186
+ # 路径安全检查已经在中间件中完成
187
+ # 直接使用规范化的路径
188
+ file_path = os.path.join("/", os.path.normpath(unquote(full_path)))
189
+
190
+ # 检查文件是否存在
191
+ if not os.path.exists(file_path):
192
+ raise FileNotFoundError(f"File not found: {file_path}")
193
+
194
+ # 如果启用了Nginx X-Accel-Redirect,使用X-Accel特性
195
+ if hasattr(request.app.state, "enable_nginx_x_accel") and request.app.state.enable_nginx_x_accel:
202
196
  # 获取文件的 MIME 类型
203
197
  content_type = mimetypes.guess_type(file_path)[0]
204
198
  if not content_type:
205
199
  content_type = "application/octet-stream"
206
200
 
207
- # 返回文件内容
208
- return Response(content=content, media_type=content_type)
209
- except FileNotFoundError as e:
210
- logger.error(f"Image not found: {str(e)}")
211
- raise HTTPException(status_code=404, detail=f"Image not found: {str(e)}")
212
- except PermissionError as e:
213
- logger.error(f"Permission denied: {str(e)}")
214
- raise HTTPException(status_code=403, detail=f"Permission denied: {str(e)}")
215
- except Exception as e:
216
- logger.error(f"Error serving image: {str(e)}")
217
- raise HTTPException(status_code=500, detail=f"Error serving image: {str(e)}")
218
-
219
- # 如果路径中没有图片, 返回 404
220
- raise HTTPException(status_code=404, detail="Only images are supported")
201
+ # 返回带X-Accel-Redirect头的响应
202
+ # 通过添加X-Accel-Redirect头告诉Nginx直接提供该文件
203
+ # 注意:Nginx配置必须正确设置内部路径映射
204
+ response = Response(content="", media_type=content_type)
205
+ response.headers["X-Accel-Redirect"] = f"/internal{file_path}"
206
+ return response
207
+
208
+ # 默认行为:异步读取文件内容
209
+ async with aiofiles.open(file_path, "rb") as f:
210
+ content = await f.read()
211
+
212
+ # 获取文件的 MIME 类型
213
+ content_type = mimetypes.guess_type(file_path)[0]
214
+ if not content_type:
215
+ content_type = "application/octet-stream"
216
+
217
+ # 返回文件内容
218
+ return Response(content=content, media_type=content_type)
219
+ except FileNotFoundError as e:
220
+ logger.error(f"File not found: {str(e)}")
221
+ raise HTTPException(status_code=404, detail=f"File not found: {str(e)}")
222
+ except PermissionError as e:
223
+ logger.error(f"Permission denied: {str(e)}")
224
+ raise HTTPException(status_code=403, detail=f"Permission denied: {str(e)}")
225
+ except Exception as e:
226
+ logger.error(f"Error serving file: {str(e)}")
227
+ raise HTTPException(status_code=500, detail=f"Error serving file: {str(e)}")
221
228
 
222
229
  class ServerArgs(BaseModel):
223
230
  host: str = None
@@ -234,14 +241,38 @@ class ServerArgs(BaseModel):
234
241
  response_role: str = "assistant"
235
242
  ssl_keyfile: str = None
236
243
  ssl_certfile: str = None
237
- doc_dir: str = ""
238
- tokenizer_path: Optional[str] = None
244
+ doc_dir: str = "" # Document directory path, also used as the root directory for serving static files
245
+ tokenizer_path: Optional[str] = None
246
+ max_static_path_length: int = int(os.environ.get("BYZERLLM_MAX_STATIC_PATH_LENGTH", 3000)) # Maximum length allowed for static file paths (larger value to better support Chinese characters)
247
+ enable_nginx_x_accel: bool = False # Enable Nginx X-Accel-Redirect for static file serving
239
248
 
240
249
  def serve(llm:ByzerLLM, args: ServerArgs):
241
250
 
242
251
  logger.info(f"ByzerLLM API server version {version}")
243
252
  logger.info(f"args: {args}")
244
253
 
254
+ # 设置静态文件路径长度限制
255
+ max_path_length = args.max_static_path_length
256
+ logger.info(f"Maximum static file path length: {max_path_length}")
257
+
258
+ # 存储Nginx X-Accel设置到应用状态
259
+ router_app.state.enable_nginx_x_accel = args.enable_nginx_x_accel
260
+ if args.enable_nginx_x_accel:
261
+ logger.info("Nginx X-Accel-Redirect enabled for static file serving")
262
+
263
+ # 确定允许访问的静态文件目录
264
+ # 优先级:1. 环境变量 BYZERLLM_ALLOWED_STATIC_DIR
265
+ # 2. 命令行参数 doc_dir
266
+ # 3. 默认值 "/tmp"
267
+ allowed_static_dir = os.environ.get("BYZERLLM_ALLOWED_STATIC_DIR")
268
+ if not allowed_static_dir and args.doc_dir:
269
+ allowed_static_dir = args.doc_dir
270
+ if not allowed_static_dir:
271
+ allowed_static_dir = "/tmp"
272
+
273
+ allowed_static_abs = os.path.abspath(allowed_static_dir)
274
+ logger.info(f"Static files root directory: {allowed_static_abs}")
275
+
245
276
  router_app.add_middleware(
246
277
  CORSMiddleware,
247
278
  allow_origins=args.allowed_origins,
@@ -250,6 +281,47 @@ def serve(llm:ByzerLLM, args: ServerArgs):
250
281
  allow_headers=args.allowed_headers,
251
282
  )
252
283
 
284
+ # Add static file security middleware
285
+ @router_app.middleware("http")
286
+ async def static_file_security(request: Request, call_next):
287
+ # Only apply to static routes
288
+ if request.url.path.startswith("/static/"):
289
+ # Extract the full_path from the URL
290
+ path_parts = request.url.path.split("/static/", 1)
291
+ if len(path_parts) > 1:
292
+ full_path = path_parts[1]
293
+
294
+ # Check path length
295
+ if len(full_path) > max_path_length:
296
+ logger.warning(f"Path too long: {len(full_path)} > {max_path_length}")
297
+ return JSONResponse(
298
+ content={"error": "Path too long"},
299
+ status_code=401
300
+ )
301
+
302
+ # Add warning when path length approaches the limit (80% of max)
303
+ if len(full_path) > (max_path_length * 0.8):
304
+ logger.warning(f"Path length approaching limit: {len(full_path)} is {(len(full_path) / max_path_length * 100):.1f}% of max ({max_path_length})")
305
+
306
+ # Decode and normalize path
307
+ decoded_path = unquote(full_path)
308
+ normalized_path = os.path.normpath(decoded_path)
309
+
310
+ # Check if path is in allowed directory
311
+ abs_path = os.path.abspath(os.path.join("/", normalized_path))
312
+
313
+ # 使用预先计算好的allowed_static_abs
314
+ is_allowed = abs_path.startswith(allowed_static_abs)
315
+
316
+ if not is_allowed:
317
+ logger.warning(f"Unauthorized path access: {abs_path}")
318
+ return JSONResponse(
319
+ content={"error": "Unauthorized path"},
320
+ status_code=401
321
+ )
322
+
323
+ return await call_next(request)
324
+
253
325
  if token := os.environ.get("BYZERLLM_API_KEY") or args.api_key:
254
326
 
255
327
  @router_app.middleware("http")
@@ -40,6 +40,7 @@ from autocoder.rag.lang import get_message_with_format_and_newline
40
40
  from autocoder.rag.qa_conversation_strategy import get_qa_strategy
41
41
  from autocoder.rag.searchable import SearchableResults
42
42
  from autocoder.rag.conversation_to_queries import extract_search_queries
43
+ from autocoder.common import openai_content as OpenAIContentProcessor
43
44
  try:
44
45
  from autocoder_pro.rag.llm_compute import LLMComputeEngine
45
46
  pro_version = version("auto-coder-pro")
@@ -348,7 +349,7 @@ class LongContextRAG:
348
349
  role_mapping=None,
349
350
  llm_config: Dict[str, Any] = {},
350
351
  extra_request_params: Dict[str, Any] = {}
351
- ):
352
+ ):
352
353
  try:
353
354
  return self._stream_chat_oai(
354
355
  conversations,
@@ -399,6 +400,7 @@ class LongContextRAG:
399
400
  llm_config: Dict[str, Any] = {},
400
401
  extra_request_params: Dict[str, Any] = {}
401
402
  ):
403
+ conversations = OpenAIContentProcessor.process_conversations(conversations)
402
404
  if self.client:
403
405
  model = model or self.args.model
404
406
  response = self.client.chat.completions.create(
@@ -415,6 +417,7 @@ class LongContextRAG:
415
417
  target_llm = self.llm.get_sub_client("qa_model")
416
418
 
417
419
  query = conversations[-1]["content"]
420
+
418
421
  context = []
419
422
 
420
423
  if (
autocoder/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.1.308"
1
+ __version__ = "0.1.310"