PyPI - auto-coder - Versions diffs - 0.1.308__py3-none-any.whl → 0.1.310__py3-none-any.whl - Mend

auto-coder 0.1.308py3-none-any.whl → 0.1.310py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of auto-coder might be problematic. Click here for more details.

Files changed (12) hide show

{auto_coder-0.1.308.dist-info → auto_coder-0.1.310.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: auto-coder
-Version: 0.1.308
+Version: 0.1.310
 Summary: AutoCoder: AutoCoder
 Author: allwefantasy
 Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
@@ -61,6 +61,7 @@ Requires-Dist: pydub
 Requires-Dist: youtube-transcript-api
 Requires-Dist: SpeechRecognition
 Requires-Dist: pathvalidate
+Requires-Dist: setuptools
 Requires-Dist: mcp ; python_version >= "3.10"
 <p align="center">

{auto_coder-0.1.308.dist-info → auto_coder-0.1.310.dist-info}/RECORD RENAMED Viewed

@@ -1,7 +1,7 @@
 autocoder/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 autocoder/auto_coder.py,sha256=ifhdnd39tOIDu_4LdYTxjVCnwmpDoOC90RRwD8bhIKU,65983
 autocoder/auto_coder_lang.py,sha256=Rtupq6N3_HT7JRhDKdgCBcwRaiAnyCOR_Gsp4jUomrI,3229
-autocoder/auto_coder_rag.py,sha256=5TtAfbEBwyt-cB4WcI8eQ1G3AuKij0056wFYRViDhLs,34036
+autocoder/auto_coder_rag.py,sha256=vOqwBHdK_KwMNUUc8ji_tlZ5DoALAG1rDjWAic3rM-4,34561
 autocoder/auto_coder_rag_client_mcp.py,sha256=QRxUbjc6A8UmDMQ8lXgZkjgqtq3lgKYeatJbDY6rSo0,6270
 autocoder/auto_coder_rag_mcp.py,sha256=-RrjNwFaS2e5v8XDIrKR-zlUNUE8UBaeOtojffBrvJo,8521
 autocoder/auto_coder_runner.py,sha256=bvd1UXYzVT2L-I2ZCkdxy9Ap8P2Q6F2JD-F7QLvaIPc,106545
@@ -9,12 +9,12 @@ autocoder/auto_coder_server.py,sha256=E3Z829TPSooRSNhuh3_x9yaZi0f5G0Lm0ntoZhjGao
 autocoder/benchmark.py,sha256=Ypomkdzd1T3GE6dRICY3Hj547dZ6_inqJbBJIp5QMco,4423
 autocoder/chat_auto_coder.py,sha256=Cp5_m3pCxEDcRrVG1uojTfD8xecdl9FvYtD948TvLsg,25223
 autocoder/chat_auto_coder_lang.py,sha256=p1SUPw1_YBHK69yNViXr6iFhHL-PjFnrXExA2mXJ5ko,21655
-autocoder/command_args.py,sha256=9aYJ-AmPxP1sQh6ciw04FWHjSn31f2W9afXFwo8wgx4,30441
+autocoder/command_args.py,sha256=Sfn3TVCoijSm937ZFT_JTsjRIB1gtUr-OZvnWLeS2s8,30732
 autocoder/command_parser.py,sha256=fx1g9E6GaM273lGTcJqaFQ-hoksS_Ik2glBMnVltPCE,10013
 autocoder/lang.py,sha256=U6AjVV8Rs1uLyjFCZ8sT6WWuNUxMBqkXXIOs4S120uk,14511
 autocoder/models.py,sha256=AyoZ-Pzy0oyYUmWCxOIRiOImsqboSfRET7LO9-UOuxI,11172
 autocoder/run_context.py,sha256=IUfSO6_gp2Wt1blFWAmOpN0b0nDrTTk4LmtCYUBIoro,1643
-autocoder/version.py,sha256=Cn-FGSwetliy8k_Sn6xMPmQzEopzQ5Jw26xsX1g7uA8,23
+autocoder/version.py,sha256=Yva2ub3_rI3hSMIe4yqnO-D1-Taf21vJw07BOhnUd5E,23
 autocoder/agent/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 autocoder/agent/auto_demand_organizer.py,sha256=URAq0gSEiHeV_W4zwhOI_83kHz0Ryfj1gcfh5jwCv_w,6501
 autocoder/agent/auto_filegroup.py,sha256=pBsAkBcpFTff-9L5OwI8xhf2xPKpl-aZwz-skF2B6dc,6296
@@ -74,6 +74,7 @@ autocoder/common/mcp_server.py,sha256=1SCtpBRmN299xWX-0aV0imWS2CX6zBUOZBocbV_J6B
 autocoder/common/mcp_tools.py,sha256=YdEhDzRnwAr2J3D-23ExIQFWbrNO-EUpIxg179qs9Sw,12666
 autocoder/common/memory_manager.py,sha256=Xx6Yv0ULxVfcFfmD36hdHFFhxCgRAs-5fTd0fLHJrpQ,3773
 autocoder/common/model_speed_test.py,sha256=U48xUUpOnbwUal1cdij4YAn_H2PD2pNaqrMHaYtQRfI,15200
+autocoder/common/openai_content.py,sha256=M_V_UyHrqNVWjgrYvxfAupZw2I0Nr3iilYv6SxSvfLA,8091
 autocoder/common/printer.py,sha256=P1WU0QjlfnjqTP5uA55GkHZCpFzRPFkc34DMMandreg,2023
 autocoder/common/recall_validation.py,sha256=Avt9Q9dX3kG6Pf2zsdlOHmsjd-OeSj7U1PFBDp_Cve0,1700
 autocoder/common/result_manager.py,sha256=nBcFRj5reBC7vp13M91f4B8iPW8B8OehayHlUdeAt1g,3776
@@ -131,13 +132,13 @@ autocoder/privacy/__init__.py,sha256=LnIVvGu_K66zCE-yhN_-dPO8R80pQyedCsXJ7wRqQaI
 autocoder/privacy/model_filter.py,sha256=-N9ZvxxDKpxU7hkn-tKv-QHyXjvkCopUaKgvJwTOGQs,3369
 autocoder/pyproject/__init__.py,sha256=ms-A_pocgGv0oZPEW8JAdXi7G-VSVhkQ6CnWFe535Ec,14477
 autocoder/rag/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-autocoder/rag/api_server.py,sha256=gsk450_B-qGtBwJ1niG9-QFJAG0RGr2s2KdiMrzzbyQ,9582
+autocoder/rag/api_server.py,sha256=StGyxrM-7-W2vYHJq-i_Fv-MHrl9UgVWY272Hd-6VJ4,13090
 autocoder/rag/conversation_to_queries.py,sha256=xwmErn4WbdADnhK1me-h_6fV3KYrl_y1qPNQl1aoI6o,4810
 autocoder/rag/doc_filter.py,sha256=UduVO2mlrngwJICrefjDJTYfdmQ4GcRXrfWDQ7xXksk,14206
 autocoder/rag/document_retriever.py,sha256=5BDqKVJqLPScEnua5S5suXhWuCaALIfPf5obXeJoWfs,8461
 autocoder/rag/lang.py,sha256=_jmUtxZDG1fmF4b2mhMJbYS1YQDb2ZE8nyAn5_vrvjA,3350
 autocoder/rag/llm_wrapper.py,sha256=Ht5GF5yJtrztoliujsZzx_ooWZmHkd5xLZKcGEiicZw,4303
-autocoder/rag/long_context_rag.py,sha256=6rqq0pvYe9N4TvyLwd2OB21ZUrPC4FfxZuks0weAz4A,41935
+autocoder/rag/long_context_rag.py,sha256=RE4xse3XxSC_HQA5erqrx6MhanP_29mBRdYOTJQZYGc,42106
 autocoder/rag/qa_conversation_strategy.py,sha256=1AcHV0MU00yTls20LlCPO-Un_OhSrr_p-H5lxLleAq4,6060
 autocoder/rag/rag_config.py,sha256=8LwFcTd8OJWWwi1_WY4IzjqgtT6RyE2j4PjxS5cCTDE,802
 autocoder/rag/rag_entry.py,sha256=6TKtErZ0Us9XSV6HgRKXA6yR3SiZGPHpynOKSaR1wgE,2463
@@ -198,9 +199,9 @@ autocoder/utils/types.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 autocoder/utils/auto_coder_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 autocoder/utils/auto_coder_utils/chat_stream_out.py,sha256=xuBeWD0YOckqRo8JB1WkVIMOYH6c24m7JfV4svBfPDo,15113
 autocoder/utils/chat_auto_coder_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-auto_coder-0.1.308.dist-info/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
-auto_coder-0.1.308.dist-info/METADATA,sha256=9Eqj3xOim16B-cWJRnUGOaF16HLZeCYW2wIIRaNJWAk,2721
-auto_coder-0.1.308.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
-auto_coder-0.1.308.dist-info/entry_points.txt,sha256=0nzHtHH4pNcM7xq4EBA2toS28Qelrvcbrr59GqD_0Ak,350
-auto_coder-0.1.308.dist-info/top_level.txt,sha256=Jqc0_uJSw2GwoFQAa9iJxYns-2mWla-9ok_Y3Gcznjk,10
-auto_coder-0.1.308.dist-info/RECORD,,
+auto_coder-0.1.310.dist-info/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
+auto_coder-0.1.310.dist-info/METADATA,sha256=zh8Gtsl5ahulVrleWS6UchHXOiJfeG-8SFeikpSffSg,2747
+auto_coder-0.1.310.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
+auto_coder-0.1.310.dist-info/entry_points.txt,sha256=0nzHtHH4pNcM7xq4EBA2toS28Qelrvcbrr59GqD_0Ak,350
+auto_coder-0.1.310.dist-info/top_level.txt,sha256=Jqc0_uJSw2GwoFQAa9iJxYns-2mWla-9ok_Y3Gcznjk,10
+auto_coder-0.1.310.dist-info/RECORD,,

autocoder/auto_coder_rag.py CHANGED Viewed

@@ -289,7 +289,11 @@ def main(input_args: Optional[List[str]] = None):
     serve_parser.add_argument("--ssl_keyfile", default="", help="")
     serve_parser.add_argument("--ssl_certfile", default="", help="")
     serve_parser.add_argument("--response_role", default="assistant", help="")
-    serve_parser.add_argument("--doc_dir", default="", help="")
+    serve_parser.add_argument(
+        "--doc_dir",
+        default="",
+        help="Document directory path, also used as the root directory for serving static files"
+    )
     serve_parser.add_argument("--enable_local_image_host", action="store_true", help=" enable local image host for local Chat app")
     serve_parser.add_argument("--tokenizer_path", default=tokenizer_path, help="")
     serve_parser.add_argument(
@@ -305,7 +309,17 @@ def main(input_args: Optional[List[str]] = None):
         action="store_true",
         help="Monitor mode for the doc update",
     )
+    serve_parser.add_argument(
+        "--max_static_path_length",
+        type=int,
+        default=3000,
+        help="Maximum length allowed for static file paths (larger value to better support Chinese characters)"
+    )
+    serve_parser.add_argument(
+        "--enable_nginx_x_accel",
+        action="store_true",
+        help="Enable Nginx X-Accel-Redirect for static file serving when behind Nginx"
+    )
     serve_parser.add_argument(
         "--disable_auto_window",
         action="store_true",

autocoder/command_args.py CHANGED Viewed

@@ -433,7 +433,11 @@ def parse_args(input_args: Optional[List[str]] = None) -> AutoCoderArgs:
     doc_serve_parse.add_argument("--ssl_certfile", default="", help="")
     doc_serve_parse.add_argument(
         "--response_role", default="assistant", help="")
-    doc_serve_parse.add_argument("--doc_dir", default="", help="")
+    doc_serve_parse.add_argument(
+        "--doc_dir",
+        default="",
+        help="Document directory path, also used as the root directory for serving static files"
+    )
     doc_serve_parse.add_argument("--tokenizer_path", default="", help="")
     doc_serve_parse.add_argument(
         "--collections", default="", help="Collection name for indexing"
@@ -453,6 +457,12 @@ def parse_args(input_args: Optional[List[str]] = None) -> AutoCoderArgs:
         action="store_true",
         help="Monitor mode for the doc update",
     )
+    doc_serve_parse.add_argument(
+        "--max_static_path_length",
+        type=int,
+        default=1000,
+        help="Maximum length allowed for static file paths"
+    )
     agent_parser = subparsers.add_parser("agent", help="Run an agent")
     agent_subparsers = agent_parser.add_subparsers(dest="agent_command")

autocoder/common/openai_content.py ADDED Viewed

@@ -0,0 +1,256 @@
+from typing import Any, Dict, List, Optional, Union
+import base64
+import os
+from enum import Enum
+from pydantic import BaseModel, Field, validator
+class ContentType(str, Enum):
+    """Type of content in the OpenAI chat message."""
+    TEXT = "text"
+    IMAGE_URL = "image_url"
+class ImageUrl(BaseModel):
+    """Image URL structure in OpenAI chat messages."""
+    url: str = Field(..., description="URL of the image, can be http(s) or data URI")
+    @validator('url')
+    def validate_url(cls, v):
+        """Validate that URL is either an http(s) URL or a valid data URI."""
+        if v.startswith(('http://', 'https://')):
+            return v
+        elif v.startswith('data:image/'):
+            return v
+        else:
+            raise ValueError("Image URL must be http(s) or data URI format")
+class TextContent(BaseModel):
+    """Text content in OpenAI chat messages."""
+    type: str = ContentType.TEXT
+    text: str
+class ImageUrlContent(BaseModel):
+    """Image URL content in OpenAI chat messages."""
+    type: str = ContentType.IMAGE_URL
+    image_url: Union[str, ImageUrl]
+    @validator('image_url')
+    def validate_image_url(cls, v):
+        """Convert string to ImageUrl if necessary."""
+        if isinstance(v, str):
+            return ImageUrl(url=v)
+        return v
+ContentItem = Union[TextContent, ImageUrlContent]
+class OpenAIMessage(BaseModel):
+    """Model for an OpenAI chat message."""
+    role: str
+    content: Union[str, List[ContentItem]]
+    name: Optional[str] = None
+class OpenAIConversation(BaseModel):
+    """Model for a conversation with OpenAI."""
+    messages: List[OpenAIMessage]
+def is_structured_content(content: Any) -> bool:
+    """
+    Check if the content is structured (list of items with type field).
+    Args:
+        content: The content to check
+    Returns:
+        bool: True if the content is structured, False otherwise
+    """
+    if not isinstance(content, list):
+        return False
+    if not content:
+        return False
+    # Check if all items have a 'type' field
+    return all(isinstance(item, dict) and 'type' in item for item in content)
+def encode_image_to_base64(image_path: str) -> str:
+    """
+    Encode an image file to base64.
+    Args:
+        image_path: Path to the image file
+    Returns:
+        str: Base64-encoded image data
+    """
+    if not os.path.exists(image_path):
+        raise FileNotFoundError(f"Image file not found: {image_path}")
+    with open(image_path, "rb") as image_file:
+        encoded_string = base64.b64encode(image_file.read()).decode('utf-8')
+    # Determine content type based on file extension
+    file_ext = os.path.splitext(image_path)[1].lower()
+    content_type = {
+        '.jpg': 'image/jpeg',
+        '.jpeg': 'image/jpeg',
+        '.png': 'image/png',
+        '.gif': 'image/gif',
+        '.webp': 'image/webp',
+    }.get(file_ext, 'image/jpeg')
+    return f"data:{content_type};base64,{encoded_string}"
+def create_text_content(text: str) -> TextContent:
+    """
+    Create a text content item.
+    Args:
+        text: The text content
+    Returns:
+        TextContent: A text content item
+    """
+    return TextContent(text=text)
+def create_image_content(image_path_or_url: str) -> ImageUrlContent:
+    """
+    Create an image content item from a file path or URL.
+    Args:
+        image_path_or_url: Path to the image file or an image URL
+    Returns:
+        ImageUrlContent: An image content item
+    """
+    # If it's a URL already, use it directly
+    if image_path_or_url.startswith(('http://', 'https://', 'data:')):
+        return ImageUrlContent(image_url=image_path_or_url)
+    # Otherwise, treat it as a file path and encode it
+    return ImageUrlContent(image_url=encode_image_to_base64(image_path_or_url))
+def normalize_content(content: Any) -> Union[str, List[ContentItem]]:
+    """
+    Normalize content to either a string or a list of structured content items.
+    Args:
+        content: The content to normalize
+    Returns:
+        Union[str, List[ContentItem]]: Normalized content
+    """
+    if isinstance(content, str):
+        return content
+    if is_structured_content(content):
+        normalized_items = []
+        for item in content:
+            if item['type'] == ContentType.TEXT:
+                normalized_items.append(create_text_content(item['text']))
+            elif item['type'] == ContentType.IMAGE_URL:
+                normalized_items.append(ImageUrlContent(image_url=item['image_url']))
+        return normalized_items
+    # If it's neither a string nor structured content, convert to string
+    return str(content)
+def create_message(role: str, content: Union[str, List[ContentItem]], name: Optional[str] = None) -> OpenAIMessage:
+    """
+    Create an OpenAI chat message.
+    Args:
+        role: Role of the message sender (system, user, assistant)
+        content: Content of the message (string or structured content)
+        name: Optional name of the sender
+    Returns:
+        OpenAIMessage: An OpenAI chat message
+    """
+    return OpenAIMessage(
+        role=role,
+        content=normalize_content(content),
+        name=name
+    )
+def process_conversations(conversations: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    """
+    处理会话列表，确保每个消息都符合标准格式，并只保留文本内容。
+    Args:
+        conversations: 会话列表，可能包含各种格式的消息
+    Returns:
+        List[Dict[str, Any]]: 标准化后的会话列表，每个消息都有 role 和 content 字段
+    例子:
+        >>> conversations = [
+        ...     {"role": "user", "content": "Hello"},
+        ...     {"role": "assistant", "content": "Hi, how can I help?"},
+        ...     {"role": "user", "content": [
+        ...         {"type": "text", "text": "What's in this image?"},
+        ...         {"type": "image_url", "image_url": "data:image/jpeg;base64,/9j/4AAQ..."}
+        ...     ]}
+        ... ]
+        >>> processed = process_conversations(conversations)
+        >>> # 结果保持相同的结构，但确保格式一致性
+        输出格式要是这样的：
+        [
+            {"role": "user", "content": "Hello"},
+            {"role": "assistant", "content": "Hi, how can I help?"},
+            {"role": "user", "content": "What's in this image?"}
+        ]
+        只保留 text 内容。如果有多个 text 内容，用换行符连接弄成一个。
+    """
+    processed_conversations = []
+    for message in conversations:
+        # 确保消息有 role 字段
+        if "role" not in message:
+            raise ValueError(f"Message missing 'role' field: {message}")
+        role = message["role"]
+        # 处理 content 字段，确保存在
+        if "content" not in message:
+            processed_content = ""  # 如果不存在，设置为空字符串
+        else:
+            content = message["content"]
+            # 处理结构化内容
+            if isinstance(content, list) and is_structured_content(content):
+                # 提取所有文本内容并用换行符连接
+                text_contents = []
+                for item in content:
+                    if item.get('type') == ContentType.TEXT and 'text' in item:
+                        text_contents.append(item['text'])
+                processed_content = '\n'.join(text_contents)
+            else:
+                # 如果是字符串或其他类型，确保转换为字符串
+                processed_content = str(content) if content is not None else ""
+        # 构建标准化的消息
+        processed_message = {"role": role, "content": processed_content}
+        # 如果原消息有 name 字段，也加入
+        if "name" in message and message["name"]:
+            processed_message["name"] = message["name"]
+        processed_conversations.append(processed_message)
+    return processed_conversations

autocoder/rag/api_server.py CHANGED Viewed

@@ -49,6 +49,8 @@ TIMEOUT_KEEP_ALIVE = 5  # seconds
 # timeout in 10 minutes. Streaming can take longer than 3 min
 TIMEOUT = float(os.environ.get("BYZERLLM_APISERVER_HTTP_TIMEOUT", 600))
+# Static file serving security settings
 router_app = FastAPI()
@@ -178,46 +180,51 @@ async def embed(body: EmbeddingCompletionRequest):
     )
 @router_app.get("/static/{full_path:path}")
-async def serve_image(full_path: str, request: Request):
-    allowed_file_type = ['.png', '.jpg', '.jpeg', '.gif', '.bmp', '.webp']
+async def serve_static_file(full_path: str, request: Request):
-    if any(full_path.endswith(ext) for ext in allowed_file_type):
-        try:
-            # 获取文件的完整路径，并进行URL解码
-            file_path = unquote(full_path)
-            # 使用 os.path.normpath 来标准化路径，自动处理不同操作系统的路径分隔符
-            file_path = os.path.normpath(file_path)
-            if not os.path.isabs(file_path):
-                file_path = os.path.join("/", file_path)
-            # 检查文件是否存在
-            if not os.path.exists(file_path):
-                raise FileNotFoundError(f"File not found: {file_path}")
-            # 异步读取文件内容
-            async with aiofiles.open(file_path, "rb") as f:
-                content = await f.read()
+    try:
+        # 路径安全检查已经在中间件中完成
+        # 直接使用规范化的路径
+        file_path = os.path.join("/", os.path.normpath(unquote(full_path)))
+        # 检查文件是否存在
+        if not os.path.exists(file_path):
+            raise FileNotFoundError(f"File not found: {file_path}")
+        # 如果启用了Nginx X-Accel-Redirect，使用X-Accel特性
+        if hasattr(request.app.state, "enable_nginx_x_accel") and request.app.state.enable_nginx_x_accel:
             # 获取文件的 MIME 类型
             content_type = mimetypes.guess_type(file_path)[0]
             if not content_type:
                 content_type = "application/octet-stream"
-            # 返回文件内容
-            return Response(content=content, media_type=content_type)
-        except FileNotFoundError as e:
-            logger.error(f"Image not found: {str(e)}")
-            raise HTTPException(status_code=404, detail=f"Image not found: {str(e)}")
-        except PermissionError as e:
-            logger.error(f"Permission denied: {str(e)}")
-            raise HTTPException(status_code=403, detail=f"Permission denied: {str(e)}")
-        except Exception as e:
-            logger.error(f"Error serving image: {str(e)}")
-            raise HTTPException(status_code=500, detail=f"Error serving image: {str(e)}")
-    # 如果路径中没有图片, 返回 404
-    raise HTTPException(status_code=404, detail="Only images are supported")
+            # 返回带X-Accel-Redirect头的响应
+            # 通过添加X-Accel-Redirect头告诉Nginx直接提供该文件
+            # 注意：Nginx配置必须正确设置内部路径映射
+            response = Response(content="", media_type=content_type)
+            response.headers["X-Accel-Redirect"] = f"/internal{file_path}"
+            return response
+        # 默认行为：异步读取文件内容
+        async with aiofiles.open(file_path, "rb") as f:
+            content = await f.read()
+        # 获取文件的 MIME 类型
+        content_type = mimetypes.guess_type(file_path)[0]
+        if not content_type:
+            content_type = "application/octet-stream"
+        # 返回文件内容
+        return Response(content=content, media_type=content_type)
+    except FileNotFoundError as e:
+        logger.error(f"File not found: {str(e)}")
+        raise HTTPException(status_code=404, detail=f"File not found: {str(e)}")
+    except PermissionError as e:
+        logger.error(f"Permission denied: {str(e)}")
+        raise HTTPException(status_code=403, detail=f"Permission denied: {str(e)}")
+    except Exception as e:
+        logger.error(f"Error serving file: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"Error serving file: {str(e)}")
 class ServerArgs(BaseModel):
     host: str = None
@@ -234,14 +241,38 @@ class ServerArgs(BaseModel):
     response_role: str = "assistant"
     ssl_keyfile: str = None
     ssl_certfile: str = None
-    doc_dir: str = ""
-    tokenizer_path: Optional[str] = None
+    doc_dir: str = ""  # Document directory path, also used as the root directory for serving static files
+    tokenizer_path: Optional[str] = None
+    max_static_path_length: int = int(os.environ.get("BYZERLLM_MAX_STATIC_PATH_LENGTH", 3000))  # Maximum length allowed for static file paths (larger value to better support Chinese characters)
+    enable_nginx_x_accel: bool = False  # Enable Nginx X-Accel-Redirect for static file serving
 def serve(llm:ByzerLLM, args: ServerArgs):
     logger.info(f"ByzerLLM API server version {version}")
     logger.info(f"args: {args}")
+    # 设置静态文件路径长度限制
+    max_path_length = args.max_static_path_length
+    logger.info(f"Maximum static file path length: {max_path_length}")
+    # 存储Nginx X-Accel设置到应用状态
+    router_app.state.enable_nginx_x_accel = args.enable_nginx_x_accel
+    if args.enable_nginx_x_accel:
+        logger.info("Nginx X-Accel-Redirect enabled for static file serving")
+    # 确定允许访问的静态文件目录
+    # 优先级：1. 环境变量 BYZERLLM_ALLOWED_STATIC_DIR
+    #        2. 命令行参数 doc_dir
+    #        3. 默认值 "/tmp"
+    allowed_static_dir = os.environ.get("BYZERLLM_ALLOWED_STATIC_DIR")
+    if not allowed_static_dir and args.doc_dir:
+        allowed_static_dir = args.doc_dir
+    if not allowed_static_dir:
+        allowed_static_dir = "/tmp"
+    allowed_static_abs = os.path.abspath(allowed_static_dir)
+    logger.info(f"Static files root directory: {allowed_static_abs}")
     router_app.add_middleware(
         CORSMiddleware,
         allow_origins=args.allowed_origins,
@@ -250,6 +281,47 @@ def serve(llm:ByzerLLM, args: ServerArgs):
         allow_headers=args.allowed_headers,
     )
+    # Add static file security middleware
+    @router_app.middleware("http")
+    async def static_file_security(request: Request, call_next):
+        # Only apply to static routes
+        if request.url.path.startswith("/static/"):
+            # Extract the full_path from the URL
+            path_parts = request.url.path.split("/static/", 1)
+            if len(path_parts) > 1:
+                full_path = path_parts[1]
+                # Check path length
+                if len(full_path) > max_path_length:
+                    logger.warning(f"Path too long: {len(full_path)} > {max_path_length}")
+                    return JSONResponse(
+                        content={"error": "Path too long"},
+                        status_code=401
+                    )
+                # Add warning when path length approaches the limit (80% of max)
+                if len(full_path) > (max_path_length * 0.8):
+                    logger.warning(f"Path length approaching limit: {len(full_path)} is {(len(full_path) / max_path_length * 100):.1f}% of max ({max_path_length})")
+                # Decode and normalize path
+                decoded_path = unquote(full_path)
+                normalized_path = os.path.normpath(decoded_path)
+                # Check if path is in allowed directory
+                abs_path = os.path.abspath(os.path.join("/", normalized_path))
+                # 使用预先计算好的allowed_static_abs
+                is_allowed = abs_path.startswith(allowed_static_abs)
+                if not is_allowed:
+                    logger.warning(f"Unauthorized path access: {abs_path}")
+                    return JSONResponse(
+                        content={"error": "Unauthorized path"},
+                        status_code=401
+                    )
+        return await call_next(request)
     if token := os.environ.get("BYZERLLM_API_KEY") or args.api_key:
         @router_app.middleware("http")

autocoder/rag/long_context_rag.py CHANGED Viewed

@@ -40,6 +40,7 @@ from autocoder.rag.lang import get_message_with_format_and_newline
 from autocoder.rag.qa_conversation_strategy import get_qa_strategy
 from autocoder.rag.searchable import SearchableResults
 from autocoder.rag.conversation_to_queries import extract_search_queries
+from autocoder.common import openai_content as OpenAIContentProcessor
 try:
     from autocoder_pro.rag.llm_compute import LLMComputeEngine
     pro_version = version("auto-coder-pro")
@@ -348,7 +349,7 @@ class LongContextRAG:
         role_mapping=None,
         llm_config: Dict[str, Any] = {},
         extra_request_params: Dict[str, Any] = {}
-    ):
+    ):
         try:
             return self._stream_chat_oai(
                 conversations,
@@ -399,6 +400,7 @@ class LongContextRAG:
         llm_config: Dict[str, Any] = {},
         extra_request_params: Dict[str, Any] = {}
     ):
+        conversations = OpenAIContentProcessor.process_conversations(conversations)
         if self.client:
             model = model or self.args.model
             response = self.client.chat.completions.create(
@@ -415,6 +417,7 @@ class LongContextRAG:
             target_llm = self.llm.get_sub_client("qa_model")
         query = conversations[-1]["content"]
         context = []
         if (

autocoder/version.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.1.~~308~~"
1	+ __version__ = "0.1.310"

{auto_coder-0.1.308.dist-info → auto_coder-0.1.310.dist-info}/LICENSE RENAMED Viewed

File without changes

{auto_coder-0.1.308.dist-info → auto_coder-0.1.310.dist-info}/WHEEL RENAMED Viewed

File without changes

{auto_coder-0.1.308.dist-info → auto_coder-0.1.310.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{auto_coder-0.1.308.dist-info → auto_coder-0.1.310.dist-info}/top_level.txt RENAMED Viewed

File without changes

auto-coder 0.1.308__py3-none-any.whl → 0.1.310__py3-none-any.whl

Potentially problematic release.

auto-coder 0.1.308py3-none-any.whl → 0.1.310py3-none-any.whl