PyPI - SimpleLLMFunc - Versions diffs - 0.2.3__tar.gz → 0.2.6__tar.gz - Mend

SimpleLLMFunc 0.2.3tar.gz → 0.2.6tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

{simplellmfunc-0.2.3 → simplellmfunc-0.2.6}/PKG-INFO RENAMED Viewed

@@ -1,9 +1,9 @@
 Metadata-Version: 2.3
 Name: SimpleLLMFunc
-Version: 0.2.3
+Version: 0.2.6
 Summary: 一个轻量但完备的LLM/Agent应用开发框架，提供装饰器实现将函数DocString作为Prompt而无需函数体具体实现但能够享受函数定义和类型标注带来效率提升的开发体验。以最Code的方式，用最少的代码将LLM能力集成到任意Python项目中。
 Author: Ni Jingzhe
-Author-email: nijingzhe@zjue.edu.cn
+Author-email: nijingzhe@zju.edu.cn
 Requires-Python: >=3.10
 Classifier: Programming Language :: Python :: 3
 Classifier: Programming Language :: Python :: 3.10
@@ -11,6 +11,7 @@ Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
 Classifier: Programming Language :: Python :: 3.13
 Requires-Dist: httpx[socks] (>=0.28.1,<0.29.0)
+Requires-Dist: nest-asyncio (>=1.6.0,<2.0.0)
 Requires-Dist: openai (>=1.84.0,<2.0.0)
 Requires-Dist: pydantic (>=2.11.5,<3.0.0)
 Requires-Dist: pydantic-settings (>=2.9.1,<3.0.0)

{simplellmfunc-0.2.3 → simplellmfunc-0.2.6}/SimpleLLMFunc/__init__.py RENAMED Viewed

@@ -1,3 +1,7 @@
+import nest_asyncio
+nest_asyncio.apply()
 from SimpleLLMFunc.llm_decorator import *
 from SimpleLLMFunc.logger import *
 from SimpleLLMFunc.interface import *

{simplellmfunc-0.2.3 → simplellmfunc-0.2.6}/SimpleLLMFunc/interface/__init__.py RENAMED Viewed

@@ -1,7 +1,11 @@
 from SimpleLLMFunc.interface.key_pool import APIKeyPool
 from SimpleLLMFunc.interface.openai_compatible import OpenAICompatible
+from SimpleLLMFunc.interface.token_bucket import TokenBucket, RateLimitManager, rate_limit_manager
 __all__ = [
     "APIKeyPool",
     "OpenAICompatible",
+    "TokenBucket",
+    "RateLimitManager",
+    "rate_limit_manager",
 ]

{simplellmfunc-0.2.3 → simplellmfunc-0.2.6}/SimpleLLMFunc/interface/openai_compatible.py RENAMED Viewed

@@ -7,6 +7,7 @@ from typing import Optional, Dict, Literal, Iterable, Any, AsyncGenerator
 from openai import AsyncOpenAI
 from SimpleLLMFunc.interface.llm_interface import LLM_Interface
 from SimpleLLMFunc.interface.key_pool import APIKeyPool
+from SimpleLLMFunc.interface.token_bucket import TokenBucket, rate_limit_manager
 from SimpleLLMFunc.logger import (
     app_log,
     push_warning,
@@ -62,32 +63,40 @@ class OpenAICompatible(LLM_Interface):
                     {
                         "model_name": "gpt-3.5-turbo",
                         "api_keys": [key1, key2, key3],
-                        "base_url": "https://api.openai.com/v1"
+                        "base_url": "https://api.openai.com/v1",
                         "max_retries": 5,
-                        "retry_delay": 1.0
+                        "retry_delay": 1.0,
+                        "rate_limit_capacity": 10,
+                        "rate_limit_refill_rate": 1.0
                     },
                     {
                         "model_name": "gpt-4",
                         "api_keys": [key1, key2, key3],
-                        "base_url": "https://api.openai.com/v1"
+                        "base_url": "https://api.openai.com/v1",
                         "max_retries": 5,
-                        "retry_delay": 1.0
+                        "retry_delay": 1.0,
+                        "rate_limit_capacity": 5,
+                        "rate_limit_refill_rate": 0.5
                     }
                 ],
                 "zhipu": [
                     {
                         "model_name": "gpt-3.5-turbo",
                         "api_keys": [key1, key2, key3],
-                        "base_url": "https://open.bigmodel.cn/api/paas/v4/"
+                        "base_url": "https://open.bigmodel.cn/api/paas/v4/",
                         "max_retries": 5,
-                        "retry_delay": 1.0
+                        "retry_delay": 1.0,
+                        "rate_limit_capacity": 15,
+                        "rate_limit_refill_rate": 2.0
                     },
                     {
                         "model_name": "gpt-4",
                         "api_keys": [key1, key2, key3],
-                        "base_url": "https://open.bigmodel.cn/api/paas/v4/"
+                        "base_url": "https://open.bigmodel.cn/api/paas/v4/",
                         "max_retries": 5,
-                        "retry_delay": 1.0
+                        "retry_delay": 1.0,
+                        "rate_limit_capacity": 8,
+                        "rate_limit_refill_rate": 1.5
                     }
                 ]
             }
@@ -145,6 +154,8 @@ class OpenAICompatible(LLM_Interface):
                     base_url = model_info["base_url"]
                     max_retries = model_info.get("max_retries", 5)
                     retry_delay = model_info.get("retry_delay", 1.0)
+                    rate_limit_capacity = model_info.get("rate_limit_capacity", 10)
+                    rate_limit_refill_rate = model_info.get("rate_limit_refill_rate", 1.0)
                     # 创建APIKeyPool实例
                     key_pool = APIKeyPool(api_keys, f"{provider_id}-{model_name}")
@@ -156,6 +167,8 @@ class OpenAICompatible(LLM_Interface):
                         base_url=base_url,
                         max_retries=max_retries,
                         retry_delay=retry_delay,
+                        rate_limit_capacity=rate_limit_capacity,
+                        rate_limit_refill_rate=rate_limit_refill_rate,
                     )
                     all_providers_dict[provider_id][model_name] = instance
@@ -190,6 +203,18 @@ class OpenAICompatible(LLM_Interface):
             f"OpenAICompatible(model_name={self.model_name}, base_url={self.base_url})"
         )
+    def get_rate_limit_status(self) -> Dict[str, Any]:
+        """获取当前实例的令牌桶状态
+        Returns:
+            包含令牌桶状态信息的字典
+        """
+        return self.token_bucket.get_info()
+    def reset_rate_limit(self) -> None:
+        """重置令牌桶（填满令牌）"""
+        self.token_bucket.reset()
     def __init__(
         self,
         api_key_pool: APIKeyPool,
@@ -197,6 +222,8 @@ class OpenAICompatible(LLM_Interface):
         base_url: str,
         max_retries: int = 5,
         retry_delay: float = 1.0,
+        rate_limit_capacity: int = 10,
+        rate_limit_refill_rate: float = 1.0,
     ):
         """初始化OpenAI兼容的LLM接口
@@ -206,6 +233,8 @@ class OpenAICompatible(LLM_Interface):
             base_url: API基础URL，例如"https://api.openai.com/v1"或"https://open.bigmodel.cn/api/paas/v4/"
             max_retries: 最大重试次数
             retry_delay: 重试间隔时间（秒）
+            rate_limit_capacity: 令牌桶容量（最大令牌数）
+            rate_limit_refill_rate: 令牌补充速率（令牌数/秒）
         """
         super().__init__(api_key_pool, model_name)
         self.max_retries = max_retries
@@ -215,10 +244,49 @@ class OpenAICompatible(LLM_Interface):
         self.model_name = model_name
         self.key_pool = api_key_pool
+        # 创建令牌桶，使用provider和model作为唯一标识
+        bucket_id = f"{base_url}_{model_name}"
+        self.token_bucket = rate_limit_manager.get_or_create_bucket(
+            bucket_id=bucket_id,
+            capacity=rate_limit_capacity,
+            refill_rate=rate_limit_refill_rate
+        )
         self.client = AsyncOpenAI(
             api_key=api_key_pool.get_least_loaded_key(), base_url=self.base_url
         )
+    async def _get_or_create_client(self, key: str) -> AsyncOpenAI:
+        """获取或创建客户端，确保使用正确的API密钥"""
+        # 如果当前客户端的API密钥不匹配，或者客户端为None，创建新的客户端
+        if (not hasattr(self, '_current_key') or self._current_key != key or
+            not hasattr(self, 'client') or self.client is None):
+            # 关闭旧客户端
+            if hasattr(self, 'client') and self.client is not None:
+                try:
+                    await self.client.close()  # type: ignore
+                except Exception:
+                    # 忽略关闭异常
+                    pass
+            # 创建新客户端
+            self.client = AsyncOpenAI(api_key=key, base_url=self.base_url)
+            self._current_key = key
+        return self.client
+    async def aclose(self):
+        """关闭客户端连接"""
+        if hasattr(self, 'client') and self.client is not None:
+            try:
+                await self.client.close()  # type: ignore
+            except Exception:
+                pass
+            finally:
+                self.client = None
     async def chat(
         self,
         trace_id: str = get_current_trace_id(),
@@ -246,18 +314,27 @@ class OpenAICompatible(LLM_Interface):
             LLM的响应内容
         """
         key = self.key_pool.get_least_loaded_key()
-        self.client = AsyncOpenAI(api_key=key, base_url=self.base_url)
+        client = await self._get_or_create_client(key)
         attempt = 0
         while attempt < self.max_retries:
             try:
+                # 获取令牌桶令牌，设置30秒超时
+                token_acquired = await self.token_bucket.acquire(tokens_needed=1, timeout=30.0)
+                if not token_acquired:
+                    push_warning(
+                        f"{self.model_name} 令牌桶获取令牌超时，跳过此次请求",
+                        location=get_location(),
+                    )
+                    raise Exception("Rate limit: 令牌桶获取令牌超时")
                 self.key_pool.increment_task_count(key)
                 data = json.dumps(messages, ensure_ascii=False, indent=4)
                 push_debug(
                     f"OpenAICompatible::chat: {self.model_name} request with API key: {key}, and message: {data}",
                     location=get_location(),
                 )
-                response: Dict[Any, Any] = await self.client.chat.completions.create(  # type: ignore
+                response: Dict[Any, Any] = await client.chat.completions.create(  # type: ignore
                     messages=messages,  # type: ignore
                     model=self.model_name,
                     stream=stream,
@@ -295,7 +372,7 @@ class OpenAICompatible(LLM_Interface):
                 )
                 key = self.key_pool.get_least_loaded_key()
-                self.client = AsyncOpenAI(api_key=key, base_url=self.base_url)
+                client = await self._get_or_create_client(key)
                 if attempt >= self.max_retries:
                     push_error(
@@ -333,18 +410,27 @@ class OpenAICompatible(LLM_Interface):
             LLM的响应块
         """
         key = self.key_pool.get_least_loaded_key()
-        self.client = AsyncOpenAI(api_key=key, base_url=self.base_url)
+        client = await self._get_or_create_client(key)
         attempt = 0
         while attempt < self.max_retries:
             try:
+                # 获取令牌桶令牌，设置30秒超时
+                token_acquired = await self.token_bucket.acquire(tokens_needed=1, timeout=30.0)
+                if not token_acquired:
+                    push_warning(
+                        f"{self.model_name} 流式请求令牌桶获取令牌超时，跳过此次请求",
+                        location=get_location(),
+                    )
+                    raise Exception("Rate limit: 令牌桶获取令牌超时")
                 self.key_pool.increment_task_count(key)
                 data = json.dumps(messages, ensure_ascii=False, indent=4)
                 push_debug(
                     f"OpenAICompatible::chat_stream: {self.model_name} request with API key: {key}, and message: {data}",
                     location=get_location(),
                 )
-                response = await self.client.chat.completions.create(  # type: ignore
+                response = await client.chat.completions.create(  # type: ignore
                     messages=messages,  # type: ignore
                     model=self.model_name,
                     stream=stream,
@@ -388,7 +474,7 @@ class OpenAICompatible(LLM_Interface):
                 )
                 key = self.key_pool.get_least_loaded_key()
-                self.client = AsyncOpenAI(api_key=key, base_url=self.base_url)
+                client = await self._get_or_create_client(key)
                 if attempt >= self.max_retries:
                     push_error(

simplellmfunc-0.2.6/SimpleLLMFunc/interface/token_bucket.py ADDED Viewed

@@ -0,0 +1,232 @@
+import asyncio
+import time
+from typing import Optional, Dict, Any
+import threading
+from SimpleLLMFunc.logger import push_debug, push_warning, get_location
+class TokenBucket:
+    """令牌桶算法实现，用于API请求的流量控制
+    令牌桶算法可以平滑突发流量，允许一定程度的突发请求，
+    同时确保长期平均速率不超过配置的限制。
+    """
+    # 类变量用于存储单例实例
+    _instances: Dict[str, 'TokenBucket'] = {}
+    _lock = threading.Lock()
+    def __new__(cls, bucket_id: str, capacity: int = 10, refill_rate: float = 1.0) -> 'TokenBucket':
+        """单例模式，确保相同bucket_id只有一个实例"""
+        with cls._lock:
+            if bucket_id not in cls._instances:
+                instance = super(TokenBucket, cls).__new__(cls)
+                cls._instances[bucket_id] = instance
+            return cls._instances[bucket_id]
+    def __init__(self, bucket_id: str, capacity: int = 10, refill_rate: float = 1.0):
+        """初始化令牌桶
+        Args:
+            bucket_id: 令牌桶唯一标识符
+            capacity: 令牌桶容量（最大令牌数）
+            refill_rate: 令牌补充速率（令牌数/秒）
+        """
+        # 如果已经初始化，跳过初始化过程
+        if hasattr(self, 'initialized') and self.initialized:
+            return
+        self.bucket_id = bucket_id
+        self.capacity = capacity
+        self.refill_rate = refill_rate
+        self.tokens = float(capacity)  # 初始时桶是满的
+        self.last_refill_time = time.time()
+        # 使用线程锁来保护所有操作，因为线程锁在异步环境中也是安全的
+        self._lock = threading.Lock()
+        self.initialized = True
+        push_debug(
+            f"TokenBucket {bucket_id} 初始化完成: capacity={capacity}, refill_rate={refill_rate}",
+            location=get_location()
+        )
+    def _refill_tokens(self) -> None:
+        """补充令牌到桶中"""
+        current_time = time.time()
+        time_passed = current_time - self.last_refill_time
+        # 计算应该补充的令牌数
+        tokens_to_add = time_passed * self.refill_rate
+        # 更新令牌数，不能超过容量
+        self.tokens = min(self.capacity, self.tokens + tokens_to_add)
+        self.last_refill_time = current_time
+        push_debug(
+            f"TokenBucket {self.bucket_id} 补充令牌: 添加={tokens_to_add:.2f}, 当前={self.tokens:.2f}",
+            location=get_location()
+        )
+    async def acquire(self, tokens_needed: int = 1, timeout: Optional[float] = None) -> bool:
+        """异步获取令牌
+        Args:
+            tokens_needed: 需要的令牌数量
+            timeout: 超时时间（秒），None表示无限等待
+        Returns:
+            True表示成功获取令牌，False表示超时失败
+        """
+        start_time = time.time()
+        while True:
+            # 使用线程锁保护临界区
+            with self._lock:
+                self._refill_tokens()
+                if self.tokens >= tokens_needed:
+                    self.tokens -= tokens_needed
+                    push_debug(
+                        f"TokenBucket {self.bucket_id} 成功获取 {tokens_needed} 个令牌, 剩余={self.tokens:.2f}",
+                        location=get_location()
+                    )
+                    return True
+                # 计算等待时间：需要多久才能补充足够的令牌
+                tokens_needed_to_wait = tokens_needed - self.tokens
+                wait_time = tokens_needed_to_wait / self.refill_rate
+            # 检查超时（在锁外检查）
+            if timeout is not None:
+                elapsed = time.time() - start_time
+                if elapsed >= timeout:
+                    push_warning(
+                        f"TokenBucket {self.bucket_id} 获取令牌超时: 需要={tokens_needed}, 可用={self.tokens:.2f}",
+                        location=get_location()
+                    )
+                    return False
+            # 最多等待100ms，避免长时间阻塞
+            wait_time = min(wait_time, 0.1)
+            push_debug(
+                f"TokenBucket {self.bucket_id} 等待令牌补充: 需要={tokens_needed}, 可用={self.tokens:.2f}, 等待={wait_time:.3f}s",
+                location=get_location()
+            )
+            await asyncio.sleep(wait_time)
+    def try_acquire(self, tokens_needed: int = 1) -> bool:
+        """同步方式尝试获取令牌（非阻塞）
+        Args:
+            tokens_needed: 需要的令牌数量
+        Returns:
+            True表示成功获取令牌，False表示令牌不足
+        """
+        with self._lock:
+            self._refill_tokens()
+            if self.tokens >= tokens_needed:
+                self.tokens -= tokens_needed
+                push_debug(
+                    f"TokenBucket {self.bucket_id} 同步获取 {tokens_needed} 个令牌成功, 剩余={self.tokens:.2f}",
+                    location=get_location()
+                )
+                return True
+            else:
+                push_debug(
+                    f"TokenBucket {self.bucket_id} 同步获取 {tokens_needed} 个令牌失败, 可用={self.tokens:.2f}",
+                    location=get_location()
+                )
+                return False
+    def get_available_tokens(self) -> float:
+        """获取当前可用令牌数"""
+        with self._lock:
+            self._refill_tokens()
+            return self.tokens
+    def get_info(self) -> Dict[str, Any]:
+        """获取令牌桶状态信息"""
+        with self._lock:
+            self._refill_tokens()
+            return {
+                "bucket_id": self.bucket_id,
+                "capacity": self.capacity,
+                "refill_rate": self.refill_rate,
+                "available_tokens": self.tokens,
+                "last_refill_time": self.last_refill_time
+            }
+    def reset(self) -> None:
+        """重置令牌桶（填满令牌）"""
+        with self._lock:
+            self.tokens = float(self.capacity)
+            self.last_refill_time = time.time()
+            push_debug(
+                f"TokenBucket {self.bucket_id} 已重置，令牌数={self.tokens}",
+                location=get_location()
+            )
+    def __repr__(self) -> str:
+        """返回令牌桶的字符串表示"""
+        return (
+            f"TokenBucket(id={self.bucket_id}, capacity={self.capacity}, "
+            f"refill_rate={self.refill_rate}, tokens={self.tokens:.2f})"
+        )
+class RateLimitManager:
+    """速率限制管理器，管理多个令牌桶"""
+    def __init__(self):
+        self._buckets: Dict[str, TokenBucket] = {}
+        self._lock = threading.Lock()
+    def get_or_create_bucket(
+        self,
+        bucket_id: str,
+        capacity: int = 10,
+        refill_rate: float = 1.0
+    ) -> TokenBucket:
+        """获取或创建令牌桶
+        Args:
+            bucket_id: 令牌桶ID
+            capacity: 桶容量
+            refill_rate: 补充速率
+        Returns:
+            TokenBucket实例
+        """
+        with self._lock:
+            if bucket_id not in self._buckets:
+                self._buckets[bucket_id] = TokenBucket(bucket_id, capacity, refill_rate)
+            return self._buckets[bucket_id]
+    def get_bucket(self, bucket_id: str) -> Optional[TokenBucket]:
+        """获取指定的令牌桶"""
+        return self._buckets.get(bucket_id)
+    def remove_bucket(self, bucket_id: str) -> bool:
+        """移除指定的令牌桶"""
+        with self._lock:
+            if bucket_id in self._buckets:
+                del self._buckets[bucket_id]
+                return True
+            return False
+    def list_buckets(self) -> Dict[str, Dict[str, Any]]:
+        """列出所有令牌桶的状态"""
+        return {bucket_id: bucket.get_info() for bucket_id, bucket in self._buckets.items()}
+    def reset_all(self) -> None:
+        """重置所有令牌桶"""
+        for bucket in self._buckets.values():
+            bucket.reset()
+# 全局速率限制管理器实例
+rate_limit_manager = RateLimitManager()

SimpleLLMFunc 0.2.3__tar.gz → 0.2.6__tar.gz

SimpleLLMFunc 0.2.3tar.gz → 0.2.6tar.gz