isagellm 0.1.0.6__cp311-none-any.whl → 0.2.2.0__cp311-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {isagellm-0.1.0.6.dist-info → isagellm-0.2.2.0.dist-info}/METADATA +52 -74
- isagellm-0.2.2.0.dist-info/RECORD +11 -0
- sagellm/__init__.py +9 -4
- sagellm/__init__.pyc +0 -0
- sagellm/cli.pyc +0 -0
- isagellm-0.1.0.6.dist-info/RECORD +0 -11
- {isagellm-0.1.0.6.dist-info → isagellm-0.2.2.0.dist-info}/WHEEL +0 -0
- {isagellm-0.1.0.6.dist-info → isagellm-0.2.2.0.dist-info}/entry_points.txt +0 -0
- {isagellm-0.1.0.6.dist-info → isagellm-0.2.2.0.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: isagellm
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.2.0
|
|
4
4
|
Summary: sageLLM: Modular LLM inference engine for domestic computing power (Huawei Ascend, NVIDIA)
|
|
5
5
|
Author: IntelliStream Team
|
|
6
6
|
License: Proprietary - IntelliStream
|
|
@@ -17,10 +17,10 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
|
17
17
|
Requires-Python: ==3.11.*
|
|
18
18
|
Description-Content-Type: text/markdown
|
|
19
19
|
Requires-Dist: isagellm-protocol<0.2.0,>=0.1.0
|
|
20
|
-
Requires-Dist: isagellm-backend<0.
|
|
21
|
-
Requires-Dist: isagellm-core<0.
|
|
22
|
-
Requires-Dist: isagellm-control-plane<0.2.0,>=0.1.0.
|
|
23
|
-
Requires-Dist: isagellm-gateway<0.2.0,>=0.1.0.
|
|
20
|
+
Requires-Dist: isagellm-backend<0.3.0,>=0.2.1.0
|
|
21
|
+
Requires-Dist: isagellm-core<0.3.0,>=0.2.2.0
|
|
22
|
+
Requires-Dist: isagellm-control-plane<0.2.0,>=0.1.0.4
|
|
23
|
+
Requires-Dist: isagellm-gateway<0.2.0,>=0.1.0.4
|
|
24
24
|
Requires-Dist: isagellm-kv-cache<0.2.0,>=0.1.0
|
|
25
25
|
Requires-Dist: isagellm-comm<0.2.0,>=0.1.0
|
|
26
26
|
Requires-Dist: isagellm-compression<0.2.0,>=0.1.0
|
|
@@ -51,7 +51,7 @@ Requires-Dist: mypy>=1.0.0; extra == "dev"
|
|
|
51
51
|
Ollama-like experience for Chinese hardware ecosystems (Huawei Ascend, NVIDIA)
|
|
52
52
|
</p>
|
|
53
53
|
|
|
54
|
-
|
|
54
|
+
______________________________________________________________________
|
|
55
55
|
|
|
56
56
|
## ✨ Features
|
|
57
57
|
|
|
@@ -85,45 +85,37 @@ pip install 'isagellm[all]'
|
|
|
85
85
|
|
|
86
86
|
## 🚀 Quick Start
|
|
87
87
|
|
|
88
|
-
### 启动模式选择
|
|
89
|
-
|
|
90
|
-
sageLLM 支持两种启动模式,满足不同场景需求:
|
|
91
|
-
|
|
92
|
-
| 模式 | 使用场景 | 依赖 | 命令示例 |
|
|
93
|
-
|------|---------|------|---------|
|
|
94
|
-
| **Mock** | CI/测试/本地开发 | 无需 GPU | `sage-llm serve --mock` |
|
|
95
|
-
| **生产** | 真实推理服务 | GPU/CPU 后端 | `sage-llm serve --control-plane` |
|
|
96
|
-
|
|
97
|
-
**⚠️ Fail-Fast 保证**:非 mock 模式下,若依赖缺失或配置错误,系统将**立即报错退出**,不会静默回退到 mock 模式。
|
|
98
|
-
|
|
99
88
|
### CLI (like ollama)
|
|
100
89
|
|
|
101
90
|
```bash
|
|
102
91
|
# Show system info
|
|
103
92
|
sage-llm info
|
|
104
93
|
|
|
105
|
-
#
|
|
94
|
+
# Default mode (CPU engine, no GPU required)
|
|
95
|
+
sage-llm serve
|
|
96
|
+
sage-llm run -p "What is LLM inference?"
|
|
97
|
+
|
|
98
|
+
# Mock mode (CI / fast smoke tests)
|
|
106
99
|
sage-llm serve --mock
|
|
107
100
|
sage-llm run -p "What is LLM inference?" --mock
|
|
108
|
-
sage-llm demo --workload year1 --mock
|
|
109
101
|
|
|
110
|
-
#
|
|
102
|
+
# Production mode (requires control-plane)
|
|
111
103
|
# pip install 'isagellm[server]'
|
|
112
104
|
sage-llm serve --control-plane
|
|
113
|
-
sage-llm gateway --
|
|
114
|
-
|
|
115
|
-
# 如果缺少依赖,将看到:
|
|
116
|
-
# ❌ Error: Control Plane required but not installed
|
|
117
|
-
# Install: pip install 'isagellm[control-plane]'
|
|
105
|
+
sage-llm gateway --port 8080
|
|
118
106
|
```
|
|
119
107
|
|
|
120
108
|
### Python API
|
|
121
109
|
|
|
122
110
|
```python
|
|
123
|
-
from sagellm import Request,
|
|
111
|
+
from sagellm import BackendConfig, EngineConfig, Request, create_backend, create_engine
|
|
124
112
|
|
|
125
|
-
# Create
|
|
126
|
-
|
|
113
|
+
# Create CPU backend + engine (no GPU needed)
|
|
114
|
+
backend = create_backend(BackendConfig(kind="cpu", device="cpu"))
|
|
115
|
+
engine = create_engine(
|
|
116
|
+
EngineConfig(kind="cpu", model="sshleifer/tiny-gpt2", device="cpu"),
|
|
117
|
+
backend,
|
|
118
|
+
)
|
|
127
119
|
|
|
128
120
|
# Run inference
|
|
129
121
|
request = Request(
|
|
@@ -143,39 +135,20 @@ print(f"Throughput: {response.metrics.throughput_tps:.2f} tokens/s")
|
|
|
143
135
|
```yaml
|
|
144
136
|
# ~/.sage-llm/config.yaml
|
|
145
137
|
backend:
|
|
146
|
-
kind:
|
|
147
|
-
|
|
148
|
-
# Fail-fast 配置:如果指定了非 mock 后端但不可用,将报错退出
|
|
149
|
-
strict_mode: true # 默认为 true,符合申报书要求
|
|
138
|
+
kind: cpu # Options: cpu, mock, cuda, ascend
|
|
139
|
+
device: cpu
|
|
150
140
|
|
|
151
141
|
engine:
|
|
152
|
-
kind:
|
|
153
|
-
model:
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
enabled: false # true 时强制使用 mock,无论其他配置
|
|
158
|
-
deterministic: true # mock 输出是否固定(用于回归测试)
|
|
159
|
-
|
|
160
|
-
# 生产模式最低要求
|
|
161
|
-
production:
|
|
162
|
-
control_plane:
|
|
163
|
-
required: true # true 时缺少 control-plane 将报错(非 mock 模式)
|
|
164
|
-
endpoint: "localhost:8080"
|
|
165
|
-
backend:
|
|
166
|
-
required: true # true 时缺少真实后端将报错
|
|
167
|
-
fallback_to_mock: false # 禁止自动降级到 mock(fail-fast)
|
|
168
|
-
|
|
169
|
-
workload:
|
|
170
|
-
segments:
|
|
171
|
-
- short # 128 in → 128 out
|
|
172
|
-
- long # 2048 in → 512 out
|
|
173
|
-
- stress # concurrent requests
|
|
142
|
+
kind: cpu
|
|
143
|
+
model: sshleifer/tiny-gpt2
|
|
144
|
+
|
|
145
|
+
control_plane:
|
|
146
|
+
endpoint: "localhost:8080"
|
|
174
147
|
```
|
|
175
148
|
|
|
176
|
-
## 📊
|
|
149
|
+
## 📊 Metrics & Validation
|
|
177
150
|
|
|
178
|
-
sageLLM
|
|
151
|
+
sageLLM provides comprehensive performance metrics:
|
|
179
152
|
|
|
180
153
|
```json
|
|
181
154
|
{
|
|
@@ -184,12 +157,12 @@ sageLLM must produce these metrics for validation:
|
|
|
184
157
|
"throughput_tps": 80.0,
|
|
185
158
|
"peak_mem_mb": 24576,
|
|
186
159
|
"kv_used_tokens": 4096,
|
|
187
|
-
"prefix_hit_rate": 0.85
|
|
188
|
-
"evict_count": 3
|
|
160
|
+
"prefix_hit_rate": 0.85
|
|
189
161
|
}
|
|
190
162
|
```
|
|
191
163
|
|
|
192
|
-
Run
|
|
164
|
+
Run benchmarks:
|
|
165
|
+
|
|
193
166
|
```bash
|
|
194
167
|
sage-llm demo --workload year1 --output metrics.json
|
|
195
168
|
```
|
|
@@ -259,59 +232,64 @@ python scripts/verify_dependencies.py
|
|
|
259
232
|
- **[INFERENCE_FLOW.md](docs/INFERENCE_FLOW.md)** - 推理流程详解
|
|
260
233
|
- **[PR_CHECKLIST.md](docs/PR_CHECKLIST.md)** - Pull Request 检查清单
|
|
261
234
|
|
|
262
|
-
|
|
235
|
+
______________________________________________________________________
|
|
263
236
|
|
|
264
237
|
## 📚 Documentation Index
|
|
265
238
|
|
|
266
239
|
### 用户文档
|
|
240
|
+
|
|
267
241
|
- [快速开始](README.md#-quick-start) - 5 分钟上手
|
|
268
242
|
- [部署指南](docs/DEPLOYMENT_GUIDE.md) - 生产环境部署
|
|
269
|
-
- [配置参考](docs/DEPLOYMENT_GUIDE.md
|
|
243
|
+
- [配置参考](docs/DEPLOYMENT_GUIDE.md#%E9%85%8D%E7%BD%AE%E6%96%87%E4%BB%B6%E8%AF%B4%E6%98%8E) - 完整配置选项
|
|
270
244
|
- [环境变量](docs/ENVIRONMENT_VARIABLES.md) - 环境变量参考
|
|
271
245
|
- [故障排查](docs/TROUBLESHOOTING.md) - 常见问题解决
|
|
272
246
|
|
|
273
247
|
### 开发者文档
|
|
248
|
+
|
|
274
249
|
- [开发指南](docs/DEVELOPER_GUIDE.md) - 贡献代码
|
|
275
250
|
- [架构设计](README.md#-architecture) - 系统架构
|
|
276
251
|
- [Workspace 使用](docs/WORKSPACE_GUIDE.md) - Multi-root 工作区
|
|
277
252
|
- [PR 检查清单](docs/PR_CHECKLIST.md) - 提交前检查
|
|
278
253
|
|
|
279
254
|
### API 文档
|
|
255
|
+
|
|
280
256
|
- OpenAI 兼容 API - 参见 [sagellm-gateway](https://github.com/intellistream/sagellm-gateway)
|
|
281
257
|
- Python API - 参见 [API_REFERENCE.md](docs/API_REFERENCE.md)(待补充)
|
|
282
258
|
|
|
283
259
|
### 子包文档
|
|
260
|
+
|
|
284
261
|
- [sagellm-protocol](https://github.com/intellistream/sagellm-protocol) - 协议定义
|
|
262
|
+
|
|
285
263
|
- [sagellm-backend](https://github.com/intellistream/sagellm-backend) - 后端抽象
|
|
264
|
+
|
|
286
265
|
- [sagellm-core](https://github.com/intellistream/sagellm-core) - 引擎核心
|
|
266
|
+
|
|
287
267
|
- [sagellm-control-plane](https://github.com/intellistream/sagellm-control-plane) - 控制面
|
|
268
|
+
|
|
288
269
|
- [sagellm-gateway](https://github.com/intellistream/sagellm-gateway) - API 网关
|
|
270
|
+
|
|
289
271
|
- [sagellm-benchmark](https://github.com/intellistream/sagellm-benchmark) - 基准测试
|
|
290
272
|
|
|
291
273
|
- [**DEVELOPER_GUIDE.md**](DEVELOPER_GUIDE.md) - 架构规范与开发指南
|
|
274
|
+
|
|
292
275
|
- [**PR_CHECKLIST.md**](PR_CHECKLIST.md) - Pull Request 审查清单
|
|
276
|
+
|
|
293
277
|
- [**scripts/verify_dependencies.py**](scripts/verify_dependencies.py) - 依赖层次验证
|
|
294
278
|
|
|
295
279
|
## 📚 Package Details
|
|
296
280
|
|
|
297
|
-
| Package
|
|
298
|
-
|
|
299
|
-
| sagellm
|
|
300
|
-
| sagellm-protocol | `isagellm-protocol` | `sagellm_protocol` | Protocol v0.1 types
|
|
301
|
-
| sagellm-core
|
|
302
|
-
| sagellm-backend
|
|
303
|
-
|
|
304
|
-
## 🎯 Roadmap
|
|
305
|
-
|
|
306
|
-
- **Year 1**: Core inference with KV cache, prefix sharing, basic eviction
|
|
307
|
-
- **Year 2**: Multi-node inference, advanced scheduling
|
|
308
|
-
- **Year 3**: Full production-ready deployment
|
|
281
|
+
| Package | PyPI Name | Import Name | Description |
|
|
282
|
+
| ---------------- | ------------------- | ------------------ | ------------------------------- |
|
|
283
|
+
| sagellm | `isagellm` | `sagellm` | Umbrella package (install this) |
|
|
284
|
+
| sagellm-protocol | `isagellm-protocol` | `sagellm_protocol` | Protocol v0.1 types |
|
|
285
|
+
| sagellm-core | `isagellm-core` | `sagellm_core` | Runtime & config |
|
|
286
|
+
| sagellm-backend | `isagellm-backend` | `sagellm_backend` | Hardware abstraction |
|
|
309
287
|
|
|
310
288
|
## 📄 License
|
|
311
289
|
|
|
312
290
|
Proprietary - IntelliStream. Internal use only.
|
|
313
291
|
|
|
314
|
-
|
|
292
|
+
______________________________________________________________________
|
|
315
293
|
|
|
316
294
|
<p align="center">
|
|
317
295
|
<sub>Built with ❤️ by IntelliStream Team for domestic AI infrastructure</sub>
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
sagellm/__init__.py,sha256=UCOHeECsFoHYcSqdcsY0eObBDS18HPzj9GW-fIgCNaM,5647
|
|
2
|
+
sagellm/__init__.pyc,sha256=-JtSOMiNExRaYR1aBwkXZiDsvScL1EFDOX2vs_iU8JU,4860
|
|
3
|
+
sagellm/cli.pyc,sha256=rgkJu6Npz3Kf1WppnYt1K3JvO8IBfJpLlry_idWI1OU,53692
|
|
4
|
+
sagellm/py.typed,sha256=ixa8YukDZ3kLo0WsFJRGohLMyHzbMur1ALmmASML2cs,64
|
|
5
|
+
sagellm/__pycache__/__init__.cpython-311.pyc,sha256=xGiRA2rVPAyoIUzq5phb-y3bV3VRpjYU3eIJbmGhID0,4616
|
|
6
|
+
sagellm/__pycache__/cli.cpython-311.pyc,sha256=n-HafVk9XrfnPpl56eonKAeIF2XCoAaUvdkJ0unLJBY,52767
|
|
7
|
+
isagellm-0.2.2.0.dist-info/METADATA,sha256=X7lblvqCgr6U8XPcGAE18DQJ8RGukmbtpIPtYVw_Yqo,9128
|
|
8
|
+
isagellm-0.2.2.0.dist-info/WHEEL,sha256=ZJeWpR6hcCRGwxVKXlDk-HsGwijNyTq4fszaDj4Ycyo,93
|
|
9
|
+
isagellm-0.2.2.0.dist-info/entry_points.txt,sha256=NqSiD9EEbziWs94BYtKFUzrnKTyCFG2MuZcvrRryhtg,73
|
|
10
|
+
isagellm-0.2.2.0.dist-info/top_level.txt,sha256=q-O8RUHV2YT7pQv12AYgFiK7PNvB9cHVg_7s5Tp08xI,8
|
|
11
|
+
isagellm-0.2.2.0.dist-info/RECORD,,
|
sagellm/__init__.py
CHANGED
|
@@ -7,15 +7,20 @@ Quick Start:
|
|
|
7
7
|
pip install isagellm
|
|
8
8
|
|
|
9
9
|
# CLI usage (like ollama)
|
|
10
|
-
sage-llm serve
|
|
10
|
+
sage-llm serve # Start CPU engine server
|
|
11
11
|
sage-llm run -p "Hello world" # Single inference
|
|
12
|
+
sage-llm serve --mock # Mock server for CI
|
|
12
13
|
sage-llm demo --workload year1 # Run Year1 demo validation
|
|
13
14
|
sage-llm info # Show system info
|
|
14
15
|
|
|
15
16
|
# Python API
|
|
16
|
-
from sagellm import Request,
|
|
17
|
+
from sagellm import BackendConfig, EngineConfig, Request, create_backend, create_engine
|
|
17
18
|
|
|
18
|
-
|
|
19
|
+
backend = create_backend(BackendConfig(kind="cpu", device="cpu"))
|
|
20
|
+
engine = create_engine(
|
|
21
|
+
EngineConfig(kind="cpu", model="sshleifer/tiny-gpt2", device="cpu"),
|
|
22
|
+
backend,
|
|
23
|
+
)
|
|
19
24
|
response = engine.generate(Request(prompt="Hello", max_tokens=128))
|
|
20
25
|
print(response.text)
|
|
21
26
|
|
|
@@ -28,7 +33,7 @@ Architecture:
|
|
|
28
33
|
|
|
29
34
|
from __future__ import annotations
|
|
30
35
|
|
|
31
|
-
__version__ = "0.
|
|
36
|
+
__version__ = "0.2.2.0"
|
|
32
37
|
|
|
33
38
|
# Lazy imports to handle installation order
|
|
34
39
|
_LAZY_IMPORTS: dict[str, tuple[str, str]] = {
|
sagellm/__init__.pyc
CHANGED
|
Binary file
|
sagellm/cli.pyc
CHANGED
|
Binary file
|
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
sagellm/__init__.py,sha256=fvvYT8rqWc77Q6mHyEZp5o4BiTwa1fpmxHjzXypz5xo,5379
|
|
2
|
-
sagellm/__init__.pyc,sha256=GwhdZnZYMqAatjKR1T-2j1l7gDvAmRgGLg8QFseZcgw,4591
|
|
3
|
-
sagellm/cli.pyc,sha256=pUhwr6R1ChduZB-Z8x_-kgRXy0OtDFqnM_r3XSSlp_s,52742
|
|
4
|
-
sagellm/py.typed,sha256=ixa8YukDZ3kLo0WsFJRGohLMyHzbMur1ALmmASML2cs,64
|
|
5
|
-
sagellm/__pycache__/__init__.cpython-311.pyc,sha256=xGiRA2rVPAyoIUzq5phb-y3bV3VRpjYU3eIJbmGhID0,4616
|
|
6
|
-
sagellm/__pycache__/cli.cpython-311.pyc,sha256=n-HafVk9XrfnPpl56eonKAeIF2XCoAaUvdkJ0unLJBY,52767
|
|
7
|
-
isagellm-0.1.0.6.dist-info/METADATA,sha256=9VO23GmlyIdekx0baW9SS3H5nhXHMbhWQIdpkjSzgP8,10008
|
|
8
|
-
isagellm-0.1.0.6.dist-info/WHEEL,sha256=ZJeWpR6hcCRGwxVKXlDk-HsGwijNyTq4fszaDj4Ycyo,93
|
|
9
|
-
isagellm-0.1.0.6.dist-info/entry_points.txt,sha256=NqSiD9EEbziWs94BYtKFUzrnKTyCFG2MuZcvrRryhtg,73
|
|
10
|
-
isagellm-0.1.0.6.dist-info/top_level.txt,sha256=q-O8RUHV2YT7pQv12AYgFiK7PNvB9cHVg_7s5Tp08xI,8
|
|
11
|
-
isagellm-0.1.0.6.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|