llm-engine-kitty 0.1.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
llm_engine/schemas.py ADDED
@@ -0,0 +1,251 @@
1
+ # llm_engine/schemas.py
2
+
3
+ import asyncio
4
+ import threading
5
+ import time
6
+
7
+ from enum import Enum
8
+ from pydantic import BaseModel, ConfigDict, Field, PrivateAttr
9
+ from typing import Any, Callable, Dict, List, Optional
10
+
11
+
12
+ class MessageRole(str, Enum):
13
+ USER = "user"
14
+ ASSISTANT = "assistant"
15
+ SYSTEM = "system"
16
+
17
+
18
+ class Message(BaseModel):
19
+ role: str = Field(description="消息角色")
20
+ content: str = Field(description="消息内容")
21
+
22
+ def to_dict(self) -> Dict[str, str]:
23
+ return {
24
+ "role": self.role,
25
+ "content": self.content,
26
+ }
27
+
28
+ def __repr__(self) -> str:
29
+ _preview: str
30
+ if len(self.content) > 48:
31
+ _preview = f"{self.content[:16]}......{self.content[-16:]}"
32
+ else:
33
+ _preview = self.content
34
+ return f"Message(role={self.role!r}, content={_preview!r})"
35
+
36
+
37
+ class InferenceParameters(BaseModel):
38
+ model_config = ConfigDict(
39
+ extra="allow",
40
+ populate_by_name=True,
41
+ str_strip_whitespace=True,
42
+ validate_assignment=True,
43
+ use_enum_values=True,
44
+ arbitrary_types_allowed=True,
45
+ )
46
+
47
+ max_tokens: Optional[int] = Field(default=None)
48
+ temperature: Optional[float] = Field(default=None)
49
+ top_p: Optional[float] = Field(default=None)
50
+ top_k: Optional[int] = Field(default=None)
51
+ frequency_penalty: Optional[float] = Field(default=None)
52
+ n: Optional[int] = Field(default=None)
53
+
54
+ def to_dict(self) -> Dict[str, Any]:
55
+ return self.model_dump(exclude_none=True)
56
+
57
+
58
+ class InferenceRequest(BaseModel):
59
+ model_name: Optional[str] = Field(default=None)
60
+ api_key: Optional[str] = Field(default=None)
61
+ messages: List[Message]
62
+ metadata: Dict[str, Any] = Field(default_factory=dict)
63
+ inference_parameters: Optional[InferenceParameters] = Field(default=None)
64
+ stream: Optional[bool] = Field(default=None)
65
+ timeout: Optional[float] = Field(default=None)
66
+
67
+ extra_headers: Dict[str, str] = Field(default_factory=dict)
68
+ extra_payload: Dict[str, Any] = Field(default_factory=dict)
69
+
70
+
71
+ class PreparedRequest(BaseModel):
72
+ model_name: str
73
+ url: str
74
+ urls: list[str] = Field(default_factory=list)
75
+ headers: Dict[str, str]
76
+ payload: Dict[str, Any]
77
+
78
+
79
+ class TaskStatus(str, Enum):
80
+ # —— 非终态 ——
81
+ SUBMITTED = "submitted" # 已提交,等待验证(如 model 解析、registry 查找)
82
+ ACCEPTED = "accepted" # 验证通过,待入队等待 sem
83
+ PENDING = "pending" # 已入队,正在等待 sem 资源
84
+ RUNNING = "running" # 已抢到 sem,正在执行
85
+
86
+ # —— 终态 ——
87
+ SUCCESS = "success"
88
+ FAILED = "failed" # 执行阶段失败(HTTP / 解析 / 网络等非超时异常)
89
+ TIMEOUT = "timeout" # 执行阶段超时(task_timeout 触发)
90
+ CANCELLED = "cancelled"
91
+ REJECTED = "rejected" # 验证阶段失败(非法任务,未进入执行阶段)
92
+
93
+
94
+ class ModelOutput(BaseModel):
95
+ role: Optional[str] = None
96
+ reasoning: Optional[str] = None
97
+ content: str
98
+ finish_reason: Optional[str] = None
99
+ usage: Optional[Dict[str, Any]] = None
100
+
101
+ def to_dict(self) -> Dict[str, Any]:
102
+ return self.model_dump(exclude_none=False)
103
+
104
+
105
+ class InferenceRequestResult(BaseModel):
106
+
107
+ success: bool
108
+
109
+ task_id: str
110
+ request: InferenceRequest
111
+ model_output: Optional[ModelOutput] = None
112
+ error_message: Optional[str] = None
113
+ duration: Optional[float] = None
114
+
115
+ def to_dict(self) -> Dict[str, Any]:
116
+ return self.model_dump(exclude_none=False)
117
+
118
+
119
+ class TaskHandle(BaseModel):
120
+ # 核心配置
121
+ model_config = ConfigDict(arbitrary_types_allowed=True, validate_assignment=True)
122
+
123
+ # 公共数据字段
124
+ task_id: str
125
+ request: InferenceRequest
126
+ status: TaskStatus = TaskStatus.SUBMITTED
127
+ result: Optional[InferenceRequestResult] = None
128
+ error: Optional[str] = None
129
+ error_exception: Optional[Exception] = None
130
+ tag: Optional[str] = None
131
+ submit_time: float = Field(default_factory=time.time)
132
+ start_time: Optional[float] = None
133
+ end_time: Optional[float] = None
134
+ task_timeout: Optional[float] = None
135
+ persist: bool = False
136
+
137
+ # 流式内容
138
+ partial_response: str = ""
139
+ partial_reasoning: str = ""
140
+
141
+ # 运行时对象:不序列化、不在构造参数中
142
+ _asyncio_task: Optional[asyncio.Task] = PrivateAttr(default=None)
143
+ _done_event: Optional[asyncio.Event] = PrivateAttr(default=None)
144
+ _done_threading_event: Optional[threading.Event] = PrivateAttr(default=None)
145
+
146
+ # 回调
147
+ _on_token: Optional[Callable] = PrivateAttr(default=None)
148
+ _on_success: Optional[Callable] = PrivateAttr(default=None)
149
+ _on_failure: Optional[Callable] = PrivateAttr(default=None)
150
+ _on_retry: Optional[Callable] = PrivateAttr(default=None)
151
+
152
+ @property
153
+ def duration(self) -> Optional[float]:
154
+ if self.start_time is None:
155
+ return None
156
+ end = self.end_time or time.time()
157
+ return end - self.start_time
158
+
159
+ def is_finished(self) -> bool:
160
+ return self.status in (
161
+ TaskStatus.SUCCESS,
162
+ TaskStatus.FAILED,
163
+ TaskStatus.TIMEOUT,
164
+ TaskStatus.CANCELLED,
165
+ TaskStatus.REJECTED,
166
+ )
167
+
168
+ def is_timeout(self) -> bool:
169
+ if self.task_timeout is None:
170
+ return False
171
+ return (time.time() - self.submit_time) > self.task_timeout
172
+
173
+
174
+ class EngineStats(BaseModel):
175
+ model_config = ConfigDict(from_attributes=True)
176
+
177
+ # 任务状态计数
178
+ total_submitted: int = Field(0, ge=0)
179
+ total_pending: int = Field(0, ge=0)
180
+ total_running: int = Field(0, ge=0)
181
+ total_done: int = Field(0, ge=0)
182
+ total_failed: int = Field(0, ge=0)
183
+ total_cancelled: int = Field(0, ge=0)
184
+
185
+ # 资源消耗统计
186
+ total_input_tokens: int = Field(0, ge=0)
187
+ total_output_tokens: int = Field(0, ge=0)
188
+ total_tokens: int = Field(0, ge=0)
189
+ total_duration: float = Field(0.0, ge=0.0)
190
+ estimated_cost: float = Field(0.0, ge=0.0)
191
+
192
+ def to_dict(self) -> Dict[str, Any]:
193
+ return self.model_dump()
194
+
195
+ @property
196
+ def success_rate(self) -> float:
197
+ """计算成功率"""
198
+ if self.total_done + self.total_failed == 0:
199
+ return 0.0
200
+ return self.total_done / (self.total_done + self.total_failed)
201
+
202
+
203
+ class ChunkDelta(BaseModel):
204
+ role: Optional[str] = None
205
+ content: Optional[str] = None
206
+ reasoning_content: Optional[str] = None
207
+
208
+
209
+ class ChunkChoice(BaseModel):
210
+ index: int
211
+ delta: ChunkDelta
212
+ finish_reason: Optional[str] = None
213
+ logprobs: Optional[Any] = None
214
+
215
+
216
+ class TokenUsage(BaseModel):
217
+ prompt_tokens: int = 0
218
+ completion_tokens: int = 0
219
+ total_tokens: int = 0
220
+ completion_tokens_details: Optional[Dict[str, Any]] = None
221
+
222
+
223
+ class ChatCompletionChunk(BaseModel):
224
+ id: str
225
+ object: str
226
+ created: int
227
+ model: str
228
+ choices: List[ChunkChoice]
229
+ usage: Optional[TokenUsage] = None
230
+ system_fingerprint: Optional[str] = None
231
+
232
+
233
+ class ChatMessage(BaseModel):
234
+ role: str
235
+ content: str
236
+ reasoning_content: Optional[str] = None
237
+
238
+
239
+ class ChatCompletionChoice(BaseModel):
240
+ index: int
241
+ message: ChatMessage
242
+ finish_reason: Optional[str] = None
243
+
244
+
245
+ class ChatCompletionResponse(BaseModel):
246
+ id: str
247
+ object: str
248
+ created: int
249
+ model: str
250
+ choices: List[ChatCompletionChoice]
251
+ usage: Optional[TokenUsage] = None
llm_engine/utils.py ADDED
@@ -0,0 +1,34 @@
1
+ # llm_engine/utils.py
2
+
3
+ import time
4
+ import uuid
5
+
6
+ from typing import Optional
7
+
8
+ import kitty_logger
9
+
10
+ logger = kitty_logger.getLogger(__name__)
11
+
12
+
13
+ def get_timestamp() -> str:
14
+ timestamp_ns = time.time_ns()
15
+ return f"{timestamp_ns:020d}"
16
+
17
+
18
+ def get_timestamp_h() -> str:
19
+
20
+ timestamp_ns = time.time_ns()
21
+
22
+ seconds, nanoseconds = divmod(timestamp_ns, 1_000_000_000)
23
+
24
+ t = time.localtime(seconds)
25
+
26
+ date_time_str = time.strftime("%Y_%m_%d_%H_%M_%S", t)
27
+
28
+ return f"{date_time_str}_{nanoseconds:09d}"
29
+
30
+
31
+ def gen_unique_id(prefix: Optional[str] = None) -> str:
32
+ uid = uuid.uuid4()
33
+ ts = get_timestamp_h()
34
+ return f"{prefix}_{ts}_{uid}" if prefix else f"{ts}_{uid}"
@@ -0,0 +1,15 @@
1
+ Metadata-Version: 2.4
2
+ Name: llm-engine-kitty
3
+ Version: 0.1.0.dev0
4
+ Summary: A lightweight LLM inference engine supporting OpenAI-compatible APIs.
5
+ Requires-Python: >=3.11
6
+ Requires-Dist: pydantic>=2.0
7
+ Requires-Dist: httpx>=0.28
8
+ Requires-Dist: tqdm>=4.0
9
+ Requires-Dist: pyyaml>=6.0
10
+ Requires-Dist: msgpack>=1.0
11
+ Requires-Dist: kitty-logger>=0.2.0.dev3
12
+ Provides-Extra: dev
13
+ Requires-Dist: pytest>=7.0; extra == "dev"
14
+ Requires-Dist: black; extra == "dev"
15
+ Requires-Dist: python-dotenv; extra == "dev"
@@ -0,0 +1,18 @@
1
+ llm_engine/__init__.py,sha256=-7ezeNIQ1gDo_xyhZ2rtf1eDh10_F78x4aRujbXkdog,1396
2
+ llm_engine/engine.py,sha256=ZHY7oisg4BGGWDAu0oqf2x-lQaC_8umPzEPMOqBmaNo,34125
3
+ llm_engine/general_engine.py,sha256=AHlCCRvMO5wzGlSGO3dJS-YPlMVXebDjf3VPjKJFbs4,24398
4
+ llm_engine/model_config.py,sha256=1THnQ5HmDxBOABRLIvVo-f-yTnGeEUsQSCwHQbraV14,3800
5
+ llm_engine/schemas.py,sha256=nxfClEEkwGjL4am42VkGQ6uYVj-vCUqzbFqjvYjLn-w,7366
6
+ llm_engine/utils.py,sha256=4z99QOzqW7qo5H_S-TQhuc9biTZH80lBrA0YzJE8e6M,681
7
+ llm_engine/kitty/__init__.py,sha256=9b1ESBxdjmwZCAd1HgP7XyqEm7fHZIuDWNkaY7eXGRg,236
8
+ llm_engine/kitty/__main__.py,sha256=f4Qufj3ZCh6s5ajuIKwV9bl4RPhWbDm_rOChsf1HbgQ,1244
9
+ llm_engine/kitty/client.py,sha256=rO-7AESgO4wchjRE4CXiy4Norrs8S-BIuzCYxzsP9Q0,22652
10
+ llm_engine/kitty/config.py,sha256=BVtnEwDC4MHcaHto3tioGlO_tb8i6W2TbupnbdRrl-4,3403
11
+ llm_engine/kitty/engine.py,sha256=mtTKMfROSNDR0PD54MQI_tHR6bJO_aodEHFQIFvFvCs,50564
12
+ llm_engine/kitty/protocol.py,sha256=m-DlhxwqUVENPZg5BvfeR84BuLSeCy-l_33riq7oTlM,7331
13
+ llm_engine/kitty/schemas.py,sha256=GAHBLEUCtnTf2heiYIEI8IIWKtwBrdiQPsZ7kOINjsA,3029
14
+ llm_engine/kitty/server.py,sha256=Hnrasl_0sDTgb-_vrq9y0DE7QPfFFYX4WHUbQvFzSTU,18223
15
+ llm_engine_kitty-0.1.0.dev0.dist-info/METADATA,sha256=BYz6YIXkRLOUwvkzlEj-VEm5xIZREXf2S2VZRob9tWY,489
16
+ llm_engine_kitty-0.1.0.dev0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
17
+ llm_engine_kitty-0.1.0.dev0.dist-info/top_level.txt,sha256=az8036N8tKuDqgMteGVC6Q86IC92YXspiWgo440tARo,11
18
+ llm_engine_kitty-0.1.0.dev0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ llm_engine