isagellm 0.1.0.6__cp311-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,318 @@
1
+ Metadata-Version: 2.1
2
+ Name: isagellm
3
+ Version: 0.1.0.6
4
+ Summary: sageLLM: Modular LLM inference engine for domestic computing power (Huawei Ascend, NVIDIA)
5
+ Author: IntelliStream Team
6
+ License: Proprietary - IntelliStream
7
+ Project-URL: Homepage, https://github.com/IntelliStream/sagellm
8
+ Project-URL: Documentation, https://github.com/IntelliStream/sagellm#readme
9
+ Project-URL: Repository, https://github.com/IntelliStream/sagellm
10
+ Keywords: llm,inference,ascend,huawei,npu,cuda,domestic
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.10
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
17
+ Requires-Python: ==3.11.*
18
+ Description-Content-Type: text/markdown
19
+ Requires-Dist: isagellm-protocol<0.2.0,>=0.1.0
20
+ Requires-Dist: isagellm-backend<0.2.0,>=0.1.1.3
21
+ Requires-Dist: isagellm-core<0.2.0,>=0.1.0.2
22
+ Requires-Dist: isagellm-control-plane<0.2.0,>=0.1.0.2
23
+ Requires-Dist: isagellm-gateway<0.2.0,>=0.1.0.2
24
+ Requires-Dist: isagellm-kv-cache<0.2.0,>=0.1.0
25
+ Requires-Dist: isagellm-comm<0.2.0,>=0.1.0
26
+ Requires-Dist: isagellm-compression<0.2.0,>=0.1.0
27
+ Requires-Dist: click>=8.0.0
28
+ Requires-Dist: rich>=13.0.0
29
+ Requires-Dist: pyyaml>=6.0.0
30
+ Provides-Extra: all
31
+ Requires-Dist: isagellm[benchmark,cuda]; extra == "all"
32
+ Provides-Extra: ascend
33
+ Requires-Dist: torch>=2.0.0; extra == "ascend"
34
+ Provides-Extra: benchmark
35
+ Requires-Dist: isagellm-benchmark>=0.1.0; extra == "benchmark"
36
+ Provides-Extra: cuda
37
+ Requires-Dist: torch>=2.0.0; extra == "cuda"
38
+ Provides-Extra: dev
39
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
40
+ Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
41
+ Requires-Dist: ruff>=0.8.0; extra == "dev"
42
+ Requires-Dist: mypy>=1.0.0; extra == "dev"
43
+
44
+ # sageLLM
45
+
46
+ <p align="center">
47
+ <strong>🚀 Modular LLM Inference Engine for Domestic Computing Power</strong>
48
+ </p>
49
+
50
+ <p align="center">
51
+ Ollama-like experience for Chinese hardware ecosystems (Huawei Ascend, NVIDIA)
52
+ </p>
53
+
54
+ ---
55
+
56
+ ## ✨ Features
57
+
58
+ - 🎯 **One-Click Install** - `pip install isagellm` gets you started immediately
59
+ - 🔌 **Mock-First** - Test without GPU, perfect for CI/CD
60
+ - 🇨🇳 **Domestic Hardware** - First-class support for Huawei Ascend NPU
61
+ - 📊 **Observable** - Built-in metrics (TTFT, TBT, throughput, KV usage)
62
+ - 🧩 **Plugin System** - Extend with custom backends and engines
63
+
64
+ ## 📦 Quick Install
65
+
66
+ ```bash
67
+ # Install sageLLM (includes mock backend, no GPU required)
68
+ pip install isagellm
69
+
70
+ # With Control Plane (request routing & scheduling)
71
+ pip install 'isagellm[control-plane]'
72
+
73
+ # With API Gateway (OpenAI-compatible REST API)
74
+ pip install 'isagellm[gateway]'
75
+
76
+ # Full server (Control Plane + Gateway)
77
+ pip install 'isagellm[server]'
78
+
79
+ # With CUDA support
80
+ pip install 'isagellm[cuda]'
81
+
82
+ # All features
83
+ pip install 'isagellm[all]'
84
+ ```
85
+
86
+ ## 🚀 Quick Start
87
+
88
+ ### 启动模式选择
89
+
90
+ sageLLM 支持两种启动模式,满足不同场景需求:
91
+
92
+ | 模式 | 使用场景 | 依赖 | 命令示例 |
93
+ |------|---------|------|---------|
94
+ | **Mock** | CI/测试/本地开发 | 无需 GPU | `sage-llm serve --mock` |
95
+ | **生产** | 真实推理服务 | GPU/CPU 后端 | `sage-llm serve --control-plane` |
96
+
97
+ **⚠️ Fail-Fast 保证**:非 mock 模式下,若依赖缺失或配置错误,系统将**立即报错退出**,不会静默回退到 mock 模式。
98
+
99
+ ### CLI (like ollama)
100
+
101
+ ```bash
102
+ # Show system info
103
+ sage-llm info
104
+
105
+ # Mock 模式(无 GPU 依赖)
106
+ sage-llm serve --mock
107
+ sage-llm run -p "What is LLM inference?" --mock
108
+ sage-llm demo --workload year1 --mock
109
+
110
+ # 生产模式(需要安装 control-plane)
111
+ # pip install 'isagellm[server]'
112
+ sage-llm serve --control-plane
113
+ sage-llm gateway --control-plane --port 8080
114
+
115
+ # 如果缺少依赖,将看到:
116
+ # ❌ Error: Control Plane required but not installed
117
+ # Install: pip install 'isagellm[control-plane]'
118
+ ```
119
+
120
+ ### Python API
121
+
122
+ ```python
123
+ from sagellm import Request, MockEngine
124
+
125
+ # Create mock engine (no GPU needed)
126
+ engine = MockEngine()
127
+
128
+ # Run inference
129
+ request = Request(
130
+ request_id="demo-001",
131
+ prompt="Hello, world!",
132
+ max_tokens=128,
133
+ )
134
+ response = engine.generate(request)
135
+
136
+ print(f"Response: {response.text}")
137
+ print(f"TTFT: {response.metrics.ttft_ms:.2f} ms")
138
+ print(f"Throughput: {response.metrics.throughput_tps:.2f} tokens/s")
139
+ ```
140
+
141
+ ### Configuration
142
+
143
+ ```yaml
144
+ # ~/.sage-llm/config.yaml
145
+ backend:
146
+ kind: mock # Options: mock, cpu, cuda, ascend
147
+
148
+ # Fail-fast 配置:如果指定了非 mock 后端但不可用,将报错退出
149
+ strict_mode: true # 默认为 true,符合申报书要求
150
+
151
+ engine:
152
+ kind: mock
153
+ model: Qwen/Qwen2-7B
154
+
155
+ # Mock 模式配置
156
+ mock:
157
+ enabled: false # true 时强制使用 mock,无论其他配置
158
+ deterministic: true # mock 输出是否固定(用于回归测试)
159
+
160
+ # 生产模式最低要求
161
+ production:
162
+ control_plane:
163
+ required: true # true 时缺少 control-plane 将报错(非 mock 模式)
164
+ endpoint: "localhost:8080"
165
+ backend:
166
+ required: true # true 时缺少真实后端将报错
167
+ fallback_to_mock: false # 禁止自动降级到 mock(fail-fast)
168
+
169
+ workload:
170
+ segments:
171
+ - short # 128 in → 128 out
172
+ - long # 2048 in → 512 out
173
+ - stress # concurrent requests
174
+ ```
175
+
176
+ ## 📊 Year 1 Demo Contract
177
+
178
+ sageLLM must produce these metrics for validation:
179
+
180
+ ```json
181
+ {
182
+ "ttft_ms": 45.2,
183
+ "tbt_ms": 12.5,
184
+ "throughput_tps": 80.0,
185
+ "peak_mem_mb": 24576,
186
+ "kv_used_tokens": 4096,
187
+ "prefix_hit_rate": 0.85,
188
+ "evict_count": 3
189
+ }
190
+ ```
191
+
192
+ Run validation:
193
+ ```bash
194
+ sage-llm demo --workload year1 --output metrics.json
195
+ ```
196
+
197
+ ## 🏗️ Architecture
198
+
199
+ ```
200
+ isagellm (umbrella package)
201
+ ├── isagellm-protocol # Protocol v0.1 types
202
+ │ └── Request, Response, Metrics, Error, StreamEvent
203
+ ├── isagellm-core # Runtime & Demo Runner
204
+ │ └── Config, Engine, Factory, DemoRunner
205
+ ├── isagellm-backend # Hardware abstraction
206
+ │ └── BackendProvider, MockBackend, (CUDABackend, AscendBackend)
207
+ ├── isagellm-control-plane # Request routing & scheduling (optional)
208
+ │ └── ControlPlaneManager, Router, Policies, Lifecycle
209
+ └── isagellm-gateway # OpenAI-compatible REST API (optional)
210
+ └── FastAPI server, /v1/chat/completions, Session management
211
+ ```
212
+
213
+ ## 🔧 Development
214
+
215
+ ### Quick Setup (Development Mode)
216
+
217
+ ```bash
218
+ # Clone all repositories
219
+ ./scripts/clone-all-repos.sh
220
+
221
+ # Install all packages in editable mode
222
+ ./quickstart.sh
223
+
224
+ # Open all repos in VS Code Multi-root Workspace
225
+ code sagellm.code-workspace
226
+ ```
227
+
228
+ **📖 See [WORKSPACE_GUIDE.md](WORKSPACE_GUIDE.md) for Multi-root Workspace usage.**
229
+
230
+ ### Testing
231
+
232
+ ```bash
233
+ # Clone and setup
234
+ git clone https://github.com/IntelliStream/sagellm.git
235
+ cd sagellm
236
+ pip install -e ".[dev]"
237
+
238
+ # Run tests
239
+ pytest -v
240
+
241
+ # Format & lint
242
+ ruff format .
243
+ ruff check . --fix
244
+
245
+ # Type check
246
+ mypy src/sagellm/
247
+
248
+ # Verify dependency hierarchy
249
+ python scripts/verify_dependencies.py
250
+ ```
251
+
252
+ ### 📖 Development Resources
253
+
254
+ - **[DEPLOYMENT_GUIDE.md](docs/DEPLOYMENT_GUIDE.md)** - 完整部署与配置指南
255
+ - **[TROUBLESHOOTING.md](docs/TROUBLESHOOTING.md)** - 故障排查快速参考
256
+ - **[ENVIRONMENT_VARIABLES.md](docs/ENVIRONMENT_VARIABLES.md)** - 环境变量完整参考
257
+ - **[DEVELOPER_GUIDE.md](docs/DEVELOPER_GUIDE.md)** - 开发者指南
258
+ - **[WORKSPACE_GUIDE.md](docs/WORKSPACE_GUIDE.md)** - Multi-root Workspace 使用
259
+ - **[INFERENCE_FLOW.md](docs/INFERENCE_FLOW.md)** - 推理流程详解
260
+ - **[PR_CHECKLIST.md](docs/PR_CHECKLIST.md)** - Pull Request 检查清单
261
+
262
+ ---
263
+
264
+ ## 📚 Documentation Index
265
+
266
+ ### 用户文档
267
+ - [快速开始](README.md#-quick-start) - 5 分钟上手
268
+ - [部署指南](docs/DEPLOYMENT_GUIDE.md) - 生产环境部署
269
+ - [配置参考](docs/DEPLOYMENT_GUIDE.md#配置文件说明) - 完整配置选项
270
+ - [环境变量](docs/ENVIRONMENT_VARIABLES.md) - 环境变量参考
271
+ - [故障排查](docs/TROUBLESHOOTING.md) - 常见问题解决
272
+
273
+ ### 开发者文档
274
+ - [开发指南](docs/DEVELOPER_GUIDE.md) - 贡献代码
275
+ - [架构设计](README.md#-architecture) - 系统架构
276
+ - [Workspace 使用](docs/WORKSPACE_GUIDE.md) - Multi-root 工作区
277
+ - [PR 检查清单](docs/PR_CHECKLIST.md) - 提交前检查
278
+
279
+ ### API 文档
280
+ - OpenAI 兼容 API - 参见 [sagellm-gateway](https://github.com/intellistream/sagellm-gateway)
281
+ - Python API - 参见 [API_REFERENCE.md](docs/API_REFERENCE.md)(待补充)
282
+
283
+ ### 子包文档
284
+ - [sagellm-protocol](https://github.com/intellistream/sagellm-protocol) - 协议定义
285
+ - [sagellm-backend](https://github.com/intellistream/sagellm-backend) - 后端抽象
286
+ - [sagellm-core](https://github.com/intellistream/sagellm-core) - 引擎核心
287
+ - [sagellm-control-plane](https://github.com/intellistream/sagellm-control-plane) - 控制面
288
+ - [sagellm-gateway](https://github.com/intellistream/sagellm-gateway) - API 网关
289
+ - [sagellm-benchmark](https://github.com/intellistream/sagellm-benchmark) - 基准测试
290
+
291
+ - [**DEVELOPER_GUIDE.md**](DEVELOPER_GUIDE.md) - 架构规范与开发指南
292
+ - [**PR_CHECKLIST.md**](PR_CHECKLIST.md) - Pull Request 审查清单
293
+ - [**scripts/verify_dependencies.py**](scripts/verify_dependencies.py) - 依赖层次验证
294
+
295
+ ## 📚 Package Details
296
+
297
+ | Package | PyPI Name | Import Name | Description |
298
+ |---------|-----------|-------------|-------------|
299
+ | sagellm | `isagellm` | `sagellm` | Umbrella package (install this) |
300
+ | sagellm-protocol | `isagellm-protocol` | `sagellm_protocol` | Protocol v0.1 types |
301
+ | sagellm-core | `isagellm-core` | `sagellm_core` | Runtime & config |
302
+ | sagellm-backend | `isagellm-backend` | `sagellm_backend` | Hardware abstraction |
303
+
304
+ ## 🎯 Roadmap
305
+
306
+ - **Year 1**: Core inference with KV cache, prefix sharing, basic eviction
307
+ - **Year 2**: Multi-node inference, advanced scheduling
308
+ - **Year 3**: Full production-ready deployment
309
+
310
+ ## 📄 License
311
+
312
+ Proprietary - IntelliStream. Internal use only.
313
+
314
+ ---
315
+
316
+ <p align="center">
317
+ <sub>Built with ❤️ by IntelliStream Team for domestic AI infrastructure</sub>
318
+ </p>
@@ -0,0 +1,11 @@
1
+ sagellm/__init__.py,sha256=fvvYT8rqWc77Q6mHyEZp5o4BiTwa1fpmxHjzXypz5xo,5379
2
+ sagellm/__init__.pyc,sha256=GwhdZnZYMqAatjKR1T-2j1l7gDvAmRgGLg8QFseZcgw,4591
3
+ sagellm/cli.pyc,sha256=pUhwr6R1ChduZB-Z8x_-kgRXy0OtDFqnM_r3XSSlp_s,52742
4
+ sagellm/py.typed,sha256=ixa8YukDZ3kLo0WsFJRGohLMyHzbMur1ALmmASML2cs,64
5
+ sagellm/__pycache__/__init__.cpython-311.pyc,sha256=xGiRA2rVPAyoIUzq5phb-y3bV3VRpjYU3eIJbmGhID0,4616
6
+ sagellm/__pycache__/cli.cpython-311.pyc,sha256=n-HafVk9XrfnPpl56eonKAeIF2XCoAaUvdkJ0unLJBY,52767
7
+ isagellm-0.1.0.6.dist-info/METADATA,sha256=9VO23GmlyIdekx0baW9SS3H5nhXHMbhWQIdpkjSzgP8,10008
8
+ isagellm-0.1.0.6.dist-info/WHEEL,sha256=ZJeWpR6hcCRGwxVKXlDk-HsGwijNyTq4fszaDj4Ycyo,93
9
+ isagellm-0.1.0.6.dist-info/entry_points.txt,sha256=NqSiD9EEbziWs94BYtKFUzrnKTyCFG2MuZcvrRryhtg,73
10
+ isagellm-0.1.0.6.dist-info/top_level.txt,sha256=q-O8RUHV2YT7pQv12AYgFiK7PNvB9cHVg_7s5Tp08xI,8
11
+ isagellm-0.1.0.6.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (73.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: cp311-none-any
5
+
@@ -0,0 +1,3 @@
1
+ [console_scripts]
2
+ sage-llm = sagellm.cli:main
3
+ sagellm = sagellm.cli:main
@@ -0,0 +1 @@
1
+ sagellm
sagellm/__init__.py ADDED
@@ -0,0 +1,158 @@
1
+ """sageLLM: Modular LLM inference engine for domestic computing power.
2
+
3
+ Ollama-like experience for Chinese hardware ecosystems (Huawei Ascend, NVIDIA).
4
+
5
+ Quick Start:
6
+ # Install
7
+ pip install isagellm
8
+
9
+ # CLI usage (like ollama)
10
+ sage-llm serve --mock # Start mock server
11
+ sage-llm run -p "Hello world" # Single inference
12
+ sage-llm demo --workload year1 # Run Year1 demo validation
13
+ sage-llm info # Show system info
14
+
15
+ # Python API
16
+ from sagellm import Request, MockEngine, create_engine
17
+
18
+ engine = MockEngine()
19
+ response = engine.generate(Request(prompt="Hello", max_tokens=128))
20
+ print(response.text)
21
+
22
+ Architecture:
23
+ sagellm (umbrella)
24
+ ├── sagellm-protocol # Protocol v0.1 types (Request, Response, Metrics, Error)
25
+ ├── sagellm-core # Runtime (config, engine factory, demo runner)
26
+ └── sagellm-backend # Hardware abstraction (CUDA, Ascend, Mock)
27
+ """
28
+
29
+ from __future__ import annotations
30
+
31
+ __version__ = "0.1.0.6"
32
+
33
+ # Lazy imports to handle installation order
34
+ _LAZY_IMPORTS: dict[str, tuple[str, str]] = {
35
+ # Protocol types
36
+ "Request": ("sagellm_protocol", "Request"),
37
+ "Response": ("sagellm_protocol", "Response"),
38
+ "Metrics": ("sagellm_protocol", "Metrics"),
39
+ "Error": ("sagellm_protocol", "Error"),
40
+ "ErrorCode": ("sagellm_protocol", "ErrorCode"),
41
+ "Timestamps": ("sagellm_protocol", "Timestamps"),
42
+ "StreamEvent": ("sagellm_protocol", "StreamEvent"),
43
+ "StreamEventStart": ("sagellm_protocol", "StreamEventStart"),
44
+ "StreamEventDelta": ("sagellm_protocol", "StreamEventDelta"),
45
+ "StreamEventEnd": ("sagellm_protocol", "StreamEventEnd"),
46
+ # KV hooks
47
+ "KVAllocateParams": ("sagellm_protocol", "KVAllocateParams"),
48
+ "KVHandle": ("sagellm_protocol", "KVHandle"),
49
+ "KVMigrateParams": ("sagellm_protocol", "KVMigrateParams"),
50
+ # Backend
51
+ "BackendProvider": ("sagellm_backend", "BackendProvider"),
52
+ "CapabilityDescriptor": ("sagellm_backend", "CapabilityDescriptor"),
53
+ "DType": ("sagellm_backend", "DType"),
54
+ "KernelKind": ("sagellm_backend", "KernelKind"),
55
+ "MockBackendProvider": ("sagellm_backend", "MockBackendProvider"),
56
+ "create_mock_backend": ("sagellm_backend", "create_mock_backend"),
57
+ # Core - Config
58
+ "BackendConfig": ("sagellm_core", "BackendConfig"),
59
+ "EngineConfig": ("sagellm_core", "EngineConfig"),
60
+ "DemoConfig": ("sagellm_core", "DemoConfig"),
61
+ "WorkloadConfig": ("sagellm_core", "WorkloadConfig"),
62
+ "WorkloadSegment": ("sagellm_core", "WorkloadSegment"),
63
+ "load_config": ("sagellm_core", "load_config"),
64
+ # Core - Engine
65
+ "BaseEngine": ("sagellm_core", "BaseEngine"),
66
+ "MockEngine": ("sagellm_core", "MockEngine"),
67
+ "create_engine": ("sagellm_core", "create_engine"),
68
+ "create_backend": ("sagellm_core", "create_backend"),
69
+ # Core - Demo
70
+ "DemoRunner": ("sagellm_core", "DemoRunner"),
71
+ "demo_main": ("sagellm_core", "demo_main"),
72
+ # Control Plane (optional - install with isagellm[control-plane])
73
+ "ControlPlaneManager": ("sagellm_control", "ControlPlaneManager"),
74
+ "MockControlPlane": ("sagellm_control", "MockControlPlane"),
75
+ "EngineInfo": ("sagellm_control", "EngineInfo"),
76
+ "EngineState": ("sagellm_control", "EngineState"),
77
+ "SchedulingDecision": ("sagellm_control", "SchedulingDecision"),
78
+ }
79
+
80
+
81
+ def __getattr__(name: str) -> object:
82
+ """Lazy import for all exported symbols."""
83
+ if name in _LAZY_IMPORTS:
84
+ module_name, attr_name = _LAZY_IMPORTS[name]
85
+ import importlib
86
+
87
+ try:
88
+ module = importlib.import_module(module_name)
89
+ return getattr(module, attr_name)
90
+ except ImportError as e:
91
+ # Provide helpful error for optional dependencies
92
+ if module_name == "sagellm_control":
93
+ raise ImportError(
94
+ f"{name} requires sagellm-control-plane. "
95
+ "Install with: pip install 'isagellm[control-plane]'"
96
+ ) from e
97
+ if module_name == "sagellm_gateway":
98
+ raise ImportError(
99
+ f"{name} requires sagellm-gateway. "
100
+ "Install with: pip install 'isagellm[gateway]'"
101
+ ) from e
102
+ raise
103
+ raise AttributeError(f"module 'sagellm' has no attribute {name!r}")
104
+
105
+
106
+ def __dir__() -> list[str]:
107
+ """Return all public symbols."""
108
+ return list(__all__)
109
+
110
+
111
+ __all__ = [
112
+ # Version
113
+ "__version__",
114
+ # Protocol - Core types
115
+ "Request",
116
+ "Response",
117
+ "Metrics",
118
+ "Error",
119
+ "ErrorCode",
120
+ "Timestamps",
121
+ # Protocol - Streaming
122
+ "StreamEvent",
123
+ "StreamEventStart",
124
+ "StreamEventDelta",
125
+ "StreamEventEnd",
126
+ # Protocol - KV hooks
127
+ "KVAllocateParams",
128
+ "KVHandle",
129
+ "KVMigrateParams",
130
+ # Backend
131
+ "BackendProvider",
132
+ "CapabilityDescriptor",
133
+ "DType",
134
+ "KernelKind",
135
+ "MockBackendProvider",
136
+ "create_mock_backend",
137
+ # Core - Config
138
+ "BackendConfig",
139
+ "EngineConfig",
140
+ "DemoConfig",
141
+ "WorkloadConfig",
142
+ "WorkloadSegment",
143
+ "load_config",
144
+ # Core - Engine
145
+ "BaseEngine",
146
+ "MockEngine",
147
+ "create_engine",
148
+ "create_backend",
149
+ # Core - Demo
150
+ "DemoRunner",
151
+ "demo_main",
152
+ # Control Plane (optional)
153
+ "ControlPlaneManager",
154
+ "MockControlPlane",
155
+ "EngineInfo",
156
+ "EngineState",
157
+ "SchedulingDecision",
158
+ ]
sagellm/__init__.pyc ADDED
Binary file
Binary file
sagellm/cli.pyc ADDED
Binary file
sagellm/py.typed ADDED
@@ -0,0 +1,2 @@
1
+ # Marker file for PEP 561
2
+ # This package supports type checking