llm-engine-kitty 0.1.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llm_engine/__init__.py +54 -0
- llm_engine/engine.py +771 -0
- llm_engine/general_engine.py +562 -0
- llm_engine/kitty/__init__.py +8 -0
- llm_engine/kitty/__main__.py +46 -0
- llm_engine/kitty/client.py +550 -0
- llm_engine/kitty/config.py +83 -0
- llm_engine/kitty/engine.py +1077 -0
- llm_engine/kitty/protocol.py +213 -0
- llm_engine/kitty/schemas.py +89 -0
- llm_engine/kitty/server.py +408 -0
- llm_engine/model_config.py +112 -0
- llm_engine/schemas.py +251 -0
- llm_engine/utils.py +34 -0
- llm_engine_kitty-0.1.0.dev0.dist-info/METADATA +15 -0
- llm_engine_kitty-0.1.0.dev0.dist-info/RECORD +18 -0
- llm_engine_kitty-0.1.0.dev0.dist-info/WHEEL +5 -0
- llm_engine_kitty-0.1.0.dev0.dist-info/top_level.txt +1 -0
llm_engine/schemas.py
ADDED
|
@@ -0,0 +1,251 @@
|
|
|
1
|
+
# llm_engine/schemas.py
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import threading
|
|
5
|
+
import time
|
|
6
|
+
|
|
7
|
+
from enum import Enum
|
|
8
|
+
from pydantic import BaseModel, ConfigDict, Field, PrivateAttr
|
|
9
|
+
from typing import Any, Callable, Dict, List, Optional
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class MessageRole(str, Enum):
|
|
13
|
+
USER = "user"
|
|
14
|
+
ASSISTANT = "assistant"
|
|
15
|
+
SYSTEM = "system"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class Message(BaseModel):
|
|
19
|
+
role: str = Field(description="消息角色")
|
|
20
|
+
content: str = Field(description="消息内容")
|
|
21
|
+
|
|
22
|
+
def to_dict(self) -> Dict[str, str]:
|
|
23
|
+
return {
|
|
24
|
+
"role": self.role,
|
|
25
|
+
"content": self.content,
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
def __repr__(self) -> str:
|
|
29
|
+
_preview: str
|
|
30
|
+
if len(self.content) > 48:
|
|
31
|
+
_preview = f"{self.content[:16]}......{self.content[-16:]}"
|
|
32
|
+
else:
|
|
33
|
+
_preview = self.content
|
|
34
|
+
return f"Message(role={self.role!r}, content={_preview!r})"
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class InferenceParameters(BaseModel):
|
|
38
|
+
model_config = ConfigDict(
|
|
39
|
+
extra="allow",
|
|
40
|
+
populate_by_name=True,
|
|
41
|
+
str_strip_whitespace=True,
|
|
42
|
+
validate_assignment=True,
|
|
43
|
+
use_enum_values=True,
|
|
44
|
+
arbitrary_types_allowed=True,
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
max_tokens: Optional[int] = Field(default=None)
|
|
48
|
+
temperature: Optional[float] = Field(default=None)
|
|
49
|
+
top_p: Optional[float] = Field(default=None)
|
|
50
|
+
top_k: Optional[int] = Field(default=None)
|
|
51
|
+
frequency_penalty: Optional[float] = Field(default=None)
|
|
52
|
+
n: Optional[int] = Field(default=None)
|
|
53
|
+
|
|
54
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
55
|
+
return self.model_dump(exclude_none=True)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class InferenceRequest(BaseModel):
|
|
59
|
+
model_name: Optional[str] = Field(default=None)
|
|
60
|
+
api_key: Optional[str] = Field(default=None)
|
|
61
|
+
messages: List[Message]
|
|
62
|
+
metadata: Dict[str, Any] = Field(default_factory=dict)
|
|
63
|
+
inference_parameters: Optional[InferenceParameters] = Field(default=None)
|
|
64
|
+
stream: Optional[bool] = Field(default=None)
|
|
65
|
+
timeout: Optional[float] = Field(default=None)
|
|
66
|
+
|
|
67
|
+
extra_headers: Dict[str, str] = Field(default_factory=dict)
|
|
68
|
+
extra_payload: Dict[str, Any] = Field(default_factory=dict)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class PreparedRequest(BaseModel):
|
|
72
|
+
model_name: str
|
|
73
|
+
url: str
|
|
74
|
+
urls: list[str] = Field(default_factory=list)
|
|
75
|
+
headers: Dict[str, str]
|
|
76
|
+
payload: Dict[str, Any]
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class TaskStatus(str, Enum):
|
|
80
|
+
# —— 非终态 ——
|
|
81
|
+
SUBMITTED = "submitted" # 已提交,等待验证(如 model 解析、registry 查找)
|
|
82
|
+
ACCEPTED = "accepted" # 验证通过,待入队等待 sem
|
|
83
|
+
PENDING = "pending" # 已入队,正在等待 sem 资源
|
|
84
|
+
RUNNING = "running" # 已抢到 sem,正在执行
|
|
85
|
+
|
|
86
|
+
# —— 终态 ——
|
|
87
|
+
SUCCESS = "success"
|
|
88
|
+
FAILED = "failed" # 执行阶段失败(HTTP / 解析 / 网络等非超时异常)
|
|
89
|
+
TIMEOUT = "timeout" # 执行阶段超时(task_timeout 触发)
|
|
90
|
+
CANCELLED = "cancelled"
|
|
91
|
+
REJECTED = "rejected" # 验证阶段失败(非法任务,未进入执行阶段)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
class ModelOutput(BaseModel):
|
|
95
|
+
role: Optional[str] = None
|
|
96
|
+
reasoning: Optional[str] = None
|
|
97
|
+
content: str
|
|
98
|
+
finish_reason: Optional[str] = None
|
|
99
|
+
usage: Optional[Dict[str, Any]] = None
|
|
100
|
+
|
|
101
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
102
|
+
return self.model_dump(exclude_none=False)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
class InferenceRequestResult(BaseModel):
|
|
106
|
+
|
|
107
|
+
success: bool
|
|
108
|
+
|
|
109
|
+
task_id: str
|
|
110
|
+
request: InferenceRequest
|
|
111
|
+
model_output: Optional[ModelOutput] = None
|
|
112
|
+
error_message: Optional[str] = None
|
|
113
|
+
duration: Optional[float] = None
|
|
114
|
+
|
|
115
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
116
|
+
return self.model_dump(exclude_none=False)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
class TaskHandle(BaseModel):
|
|
120
|
+
# 核心配置
|
|
121
|
+
model_config = ConfigDict(arbitrary_types_allowed=True, validate_assignment=True)
|
|
122
|
+
|
|
123
|
+
# 公共数据字段
|
|
124
|
+
task_id: str
|
|
125
|
+
request: InferenceRequest
|
|
126
|
+
status: TaskStatus = TaskStatus.SUBMITTED
|
|
127
|
+
result: Optional[InferenceRequestResult] = None
|
|
128
|
+
error: Optional[str] = None
|
|
129
|
+
error_exception: Optional[Exception] = None
|
|
130
|
+
tag: Optional[str] = None
|
|
131
|
+
submit_time: float = Field(default_factory=time.time)
|
|
132
|
+
start_time: Optional[float] = None
|
|
133
|
+
end_time: Optional[float] = None
|
|
134
|
+
task_timeout: Optional[float] = None
|
|
135
|
+
persist: bool = False
|
|
136
|
+
|
|
137
|
+
# 流式内容
|
|
138
|
+
partial_response: str = ""
|
|
139
|
+
partial_reasoning: str = ""
|
|
140
|
+
|
|
141
|
+
# 运行时对象:不序列化、不在构造参数中
|
|
142
|
+
_asyncio_task: Optional[asyncio.Task] = PrivateAttr(default=None)
|
|
143
|
+
_done_event: Optional[asyncio.Event] = PrivateAttr(default=None)
|
|
144
|
+
_done_threading_event: Optional[threading.Event] = PrivateAttr(default=None)
|
|
145
|
+
|
|
146
|
+
# 回调
|
|
147
|
+
_on_token: Optional[Callable] = PrivateAttr(default=None)
|
|
148
|
+
_on_success: Optional[Callable] = PrivateAttr(default=None)
|
|
149
|
+
_on_failure: Optional[Callable] = PrivateAttr(default=None)
|
|
150
|
+
_on_retry: Optional[Callable] = PrivateAttr(default=None)
|
|
151
|
+
|
|
152
|
+
@property
|
|
153
|
+
def duration(self) -> Optional[float]:
|
|
154
|
+
if self.start_time is None:
|
|
155
|
+
return None
|
|
156
|
+
end = self.end_time or time.time()
|
|
157
|
+
return end - self.start_time
|
|
158
|
+
|
|
159
|
+
def is_finished(self) -> bool:
|
|
160
|
+
return self.status in (
|
|
161
|
+
TaskStatus.SUCCESS,
|
|
162
|
+
TaskStatus.FAILED,
|
|
163
|
+
TaskStatus.TIMEOUT,
|
|
164
|
+
TaskStatus.CANCELLED,
|
|
165
|
+
TaskStatus.REJECTED,
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
def is_timeout(self) -> bool:
|
|
169
|
+
if self.task_timeout is None:
|
|
170
|
+
return False
|
|
171
|
+
return (time.time() - self.submit_time) > self.task_timeout
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
class EngineStats(BaseModel):
|
|
175
|
+
model_config = ConfigDict(from_attributes=True)
|
|
176
|
+
|
|
177
|
+
# 任务状态计数
|
|
178
|
+
total_submitted: int = Field(0, ge=0)
|
|
179
|
+
total_pending: int = Field(0, ge=0)
|
|
180
|
+
total_running: int = Field(0, ge=0)
|
|
181
|
+
total_done: int = Field(0, ge=0)
|
|
182
|
+
total_failed: int = Field(0, ge=0)
|
|
183
|
+
total_cancelled: int = Field(0, ge=0)
|
|
184
|
+
|
|
185
|
+
# 资源消耗统计
|
|
186
|
+
total_input_tokens: int = Field(0, ge=0)
|
|
187
|
+
total_output_tokens: int = Field(0, ge=0)
|
|
188
|
+
total_tokens: int = Field(0, ge=0)
|
|
189
|
+
total_duration: float = Field(0.0, ge=0.0)
|
|
190
|
+
estimated_cost: float = Field(0.0, ge=0.0)
|
|
191
|
+
|
|
192
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
193
|
+
return self.model_dump()
|
|
194
|
+
|
|
195
|
+
@property
|
|
196
|
+
def success_rate(self) -> float:
|
|
197
|
+
"""计算成功率"""
|
|
198
|
+
if self.total_done + self.total_failed == 0:
|
|
199
|
+
return 0.0
|
|
200
|
+
return self.total_done / (self.total_done + self.total_failed)
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
class ChunkDelta(BaseModel):
|
|
204
|
+
role: Optional[str] = None
|
|
205
|
+
content: Optional[str] = None
|
|
206
|
+
reasoning_content: Optional[str] = None
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
class ChunkChoice(BaseModel):
|
|
210
|
+
index: int
|
|
211
|
+
delta: ChunkDelta
|
|
212
|
+
finish_reason: Optional[str] = None
|
|
213
|
+
logprobs: Optional[Any] = None
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
class TokenUsage(BaseModel):
|
|
217
|
+
prompt_tokens: int = 0
|
|
218
|
+
completion_tokens: int = 0
|
|
219
|
+
total_tokens: int = 0
|
|
220
|
+
completion_tokens_details: Optional[Dict[str, Any]] = None
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
class ChatCompletionChunk(BaseModel):
|
|
224
|
+
id: str
|
|
225
|
+
object: str
|
|
226
|
+
created: int
|
|
227
|
+
model: str
|
|
228
|
+
choices: List[ChunkChoice]
|
|
229
|
+
usage: Optional[TokenUsage] = None
|
|
230
|
+
system_fingerprint: Optional[str] = None
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
class ChatMessage(BaseModel):
|
|
234
|
+
role: str
|
|
235
|
+
content: str
|
|
236
|
+
reasoning_content: Optional[str] = None
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
class ChatCompletionChoice(BaseModel):
|
|
240
|
+
index: int
|
|
241
|
+
message: ChatMessage
|
|
242
|
+
finish_reason: Optional[str] = None
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
class ChatCompletionResponse(BaseModel):
|
|
246
|
+
id: str
|
|
247
|
+
object: str
|
|
248
|
+
created: int
|
|
249
|
+
model: str
|
|
250
|
+
choices: List[ChatCompletionChoice]
|
|
251
|
+
usage: Optional[TokenUsage] = None
|
llm_engine/utils.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# llm_engine/utils.py
|
|
2
|
+
|
|
3
|
+
import time
|
|
4
|
+
import uuid
|
|
5
|
+
|
|
6
|
+
from typing import Optional
|
|
7
|
+
|
|
8
|
+
import kitty_logger
|
|
9
|
+
|
|
10
|
+
logger = kitty_logger.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def get_timestamp() -> str:
|
|
14
|
+
timestamp_ns = time.time_ns()
|
|
15
|
+
return f"{timestamp_ns:020d}"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def get_timestamp_h() -> str:
|
|
19
|
+
|
|
20
|
+
timestamp_ns = time.time_ns()
|
|
21
|
+
|
|
22
|
+
seconds, nanoseconds = divmod(timestamp_ns, 1_000_000_000)
|
|
23
|
+
|
|
24
|
+
t = time.localtime(seconds)
|
|
25
|
+
|
|
26
|
+
date_time_str = time.strftime("%Y_%m_%d_%H_%M_%S", t)
|
|
27
|
+
|
|
28
|
+
return f"{date_time_str}_{nanoseconds:09d}"
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def gen_unique_id(prefix: Optional[str] = None) -> str:
|
|
32
|
+
uid = uuid.uuid4()
|
|
33
|
+
ts = get_timestamp_h()
|
|
34
|
+
return f"{prefix}_{ts}_{uid}" if prefix else f"{ts}_{uid}"
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: llm-engine-kitty
|
|
3
|
+
Version: 0.1.0.dev0
|
|
4
|
+
Summary: A lightweight LLM inference engine supporting OpenAI-compatible APIs.
|
|
5
|
+
Requires-Python: >=3.11
|
|
6
|
+
Requires-Dist: pydantic>=2.0
|
|
7
|
+
Requires-Dist: httpx>=0.28
|
|
8
|
+
Requires-Dist: tqdm>=4.0
|
|
9
|
+
Requires-Dist: pyyaml>=6.0
|
|
10
|
+
Requires-Dist: msgpack>=1.0
|
|
11
|
+
Requires-Dist: kitty-logger>=0.2.0.dev3
|
|
12
|
+
Provides-Extra: dev
|
|
13
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
14
|
+
Requires-Dist: black; extra == "dev"
|
|
15
|
+
Requires-Dist: python-dotenv; extra == "dev"
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
llm_engine/__init__.py,sha256=-7ezeNIQ1gDo_xyhZ2rtf1eDh10_F78x4aRujbXkdog,1396
|
|
2
|
+
llm_engine/engine.py,sha256=ZHY7oisg4BGGWDAu0oqf2x-lQaC_8umPzEPMOqBmaNo,34125
|
|
3
|
+
llm_engine/general_engine.py,sha256=AHlCCRvMO5wzGlSGO3dJS-YPlMVXebDjf3VPjKJFbs4,24398
|
|
4
|
+
llm_engine/model_config.py,sha256=1THnQ5HmDxBOABRLIvVo-f-yTnGeEUsQSCwHQbraV14,3800
|
|
5
|
+
llm_engine/schemas.py,sha256=nxfClEEkwGjL4am42VkGQ6uYVj-vCUqzbFqjvYjLn-w,7366
|
|
6
|
+
llm_engine/utils.py,sha256=4z99QOzqW7qo5H_S-TQhuc9biTZH80lBrA0YzJE8e6M,681
|
|
7
|
+
llm_engine/kitty/__init__.py,sha256=9b1ESBxdjmwZCAd1HgP7XyqEm7fHZIuDWNkaY7eXGRg,236
|
|
8
|
+
llm_engine/kitty/__main__.py,sha256=f4Qufj3ZCh6s5ajuIKwV9bl4RPhWbDm_rOChsf1HbgQ,1244
|
|
9
|
+
llm_engine/kitty/client.py,sha256=rO-7AESgO4wchjRE4CXiy4Norrs8S-BIuzCYxzsP9Q0,22652
|
|
10
|
+
llm_engine/kitty/config.py,sha256=BVtnEwDC4MHcaHto3tioGlO_tb8i6W2TbupnbdRrl-4,3403
|
|
11
|
+
llm_engine/kitty/engine.py,sha256=mtTKMfROSNDR0PD54MQI_tHR6bJO_aodEHFQIFvFvCs,50564
|
|
12
|
+
llm_engine/kitty/protocol.py,sha256=m-DlhxwqUVENPZg5BvfeR84BuLSeCy-l_33riq7oTlM,7331
|
|
13
|
+
llm_engine/kitty/schemas.py,sha256=GAHBLEUCtnTf2heiYIEI8IIWKtwBrdiQPsZ7kOINjsA,3029
|
|
14
|
+
llm_engine/kitty/server.py,sha256=Hnrasl_0sDTgb-_vrq9y0DE7QPfFFYX4WHUbQvFzSTU,18223
|
|
15
|
+
llm_engine_kitty-0.1.0.dev0.dist-info/METADATA,sha256=BYz6YIXkRLOUwvkzlEj-VEm5xIZREXf2S2VZRob9tWY,489
|
|
16
|
+
llm_engine_kitty-0.1.0.dev0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
17
|
+
llm_engine_kitty-0.1.0.dev0.dist-info/top_level.txt,sha256=az8036N8tKuDqgMteGVC6Q86IC92YXspiWgo440tARo,11
|
|
18
|
+
llm_engine_kitty-0.1.0.dev0.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
llm_engine
|