isagellm-core 0.3.0.9__tar.gz → 0.4.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {isagellm_core-0.3.0.9/src/isagellm_core.egg-info → isagellm_core-0.4.0.0}/PKG-INFO +11 -12
- {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/README.md +6 -7
- {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/pyproject.toml +17 -10
- {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0/src/isagellm_core.egg-info}/PKG-INFO +11 -12
- isagellm_core-0.4.0.0/src/isagellm_core.egg-info/SOURCES.txt +129 -0
- isagellm_core-0.4.0.0/src/isagellm_core.egg-info/entry_points.txt +2 -0
- {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/isagellm_core.egg-info/requires.txt +4 -4
- isagellm_core-0.4.0.0/src/sagellm_core/__init__.py +192 -0
- isagellm_core-0.4.0.0/src/sagellm_core/__init__.pyc +0 -0
- isagellm_core-0.4.0.0/src/sagellm_core/__pycache__/__init__.cpython-311.pyc +0 -0
- {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/__pycache__/engine_server.cpython-311.pyc +0 -0
- isagellm_core-0.4.0.0/src/sagellm_core/__pycache__/llm_engine.cpython-311.pyc +0 -0
- {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/__pycache__/pd_executor.cpython-311.pyc +0 -0
- {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/__pycache__/runner.cpython-311.pyc +0 -0
- isagellm_core-0.4.0.0/src/sagellm_core/distributed/__init__.py +15 -0
- isagellm_core-0.4.0.0/src/sagellm_core/distributed/__init__.pyc +0 -0
- isagellm_core-0.4.0.0/src/sagellm_core/distributed/__pycache__/__init__.cpython-311.pyc +0 -0
- isagellm_core-0.4.0.0/src/sagellm_core/distributed/__pycache__/strategies.cpython-311.pyc +0 -0
- isagellm_core-0.4.0.0/src/sagellm_core/distributed/strategies.pyc +0 -0
- isagellm_core-0.4.0.0/src/sagellm_core/engine_core/__init__.py +12 -0
- isagellm_core-0.4.0.0/src/sagellm_core/engine_core/__init__.pyc +0 -0
- isagellm_core-0.4.0.0/src/sagellm_core/engine_core/__pycache__/__init__.cpython-311.pyc +0 -0
- isagellm_core-0.4.0.0/src/sagellm_core/engine_core/__pycache__/engine_core.cpython-311.pyc +0 -0
- isagellm_core-0.4.0.0/src/sagellm_core/engine_core/engine_core.pyc +0 -0
- isagellm_core-0.4.0.0/src/sagellm_core/engine_core/scheduler/__init__.py +19 -0
- isagellm_core-0.4.0.0/src/sagellm_core/engine_core/scheduler/__init__.pyc +0 -0
- isagellm_core-0.4.0.0/src/sagellm_core/engine_core/scheduler/__pycache__/__init__.cpython-311.pyc +0 -0
- isagellm_core-0.4.0.0/src/sagellm_core/engine_core/scheduler/__pycache__/scheduler.cpython-311.pyc +0 -0
- isagellm_core-0.4.0.0/src/sagellm_core/engine_core/scheduler/scheduler.pyc +0 -0
- {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/engine_server.pyc +0 -0
- isagellm_core-0.4.0.0/src/sagellm_core/engines/__init__.py +29 -0
- isagellm_core-0.4.0.0/src/sagellm_core/engines/__init__.pyc +0 -0
- isagellm_core-0.4.0.0/src/sagellm_core/engines/__pycache__/__init__.cpython-311.pyc +0 -0
- isagellm_core-0.4.0.0/src/sagellm_core/engines/__pycache__/ascend.cpython-311.pyc +0 -0
- {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/engines/__pycache__/embedding.cpython-311.pyc +0 -0
- {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/engines/embedding.pyc +0 -0
- isagellm_core-0.4.0.0/src/sagellm_core/executor/__init__.py +16 -0
- isagellm_core-0.4.0.0/src/sagellm_core/executor/__init__.pyc +0 -0
- isagellm_core-0.4.0.0/src/sagellm_core/executor/__pycache__/__init__.cpython-311.pyc +0 -0
- isagellm_core-0.4.0.0/src/sagellm_core/executor/__pycache__/executor_base.cpython-311.pyc +0 -0
- isagellm_core-0.4.0.0/src/sagellm_core/executor/__pycache__/uniproc_executor.cpython-311.pyc +0 -0
- isagellm_core-0.4.0.0/src/sagellm_core/executor/executor_base.pyc +0 -0
- isagellm_core-0.4.0.0/src/sagellm_core/executor/uniproc_executor.pyc +0 -0
- isagellm_core-0.4.0.0/src/sagellm_core/inputs/__init__.py +12 -0
- isagellm_core-0.4.0.0/src/sagellm_core/inputs/__init__.pyc +0 -0
- isagellm_core-0.4.0.0/src/sagellm_core/inputs/__pycache__/__init__.cpython-311.pyc +0 -0
- isagellm_core-0.4.0.0/src/sagellm_core/inputs/__pycache__/processor.cpython-311.pyc +0 -0
- isagellm_core-0.4.0.0/src/sagellm_core/inputs/__pycache__/tokenizer_utils.cpython-311.pyc +0 -0
- isagellm_core-0.4.0.0/src/sagellm_core/inputs/processor.pyc +0 -0
- isagellm_core-0.4.0.0/src/sagellm_core/inputs/tokenizer_utils.pyc +0 -0
- isagellm_core-0.4.0.0/src/sagellm_core/llm_engine.pyc +0 -0
- isagellm_core-0.4.0.0/src/sagellm_core/model/__init__.py +13 -0
- isagellm_core-0.4.0.0/src/sagellm_core/model/__init__.pyc +0 -0
- isagellm_core-0.4.0.0/src/sagellm_core/model/__pycache__/__init__.cpython-311.pyc +0 -0
- isagellm_core-0.4.0.0/src/sagellm_core/model/__pycache__/model_loader.cpython-311.pyc +0 -0
- isagellm_core-0.4.0.0/src/sagellm_core/model/__pycache__/weight_utils.cpython-311.pyc +0 -0
- isagellm_core-0.4.0.0/src/sagellm_core/model/model_loader.pyc +0 -0
- isagellm_core-0.4.0.0/src/sagellm_core/model/weight_utils.pyc +0 -0
- isagellm_core-0.4.0.0/src/sagellm_core/observability/__init__.py +16 -0
- isagellm_core-0.4.0.0/src/sagellm_core/observability/__init__.pyc +0 -0
- isagellm_core-0.4.0.0/src/sagellm_core/observability/__pycache__/__init__.cpython-311.pyc +0 -0
- isagellm_core-0.4.0.0/src/sagellm_core/observability/__pycache__/logger.cpython-311.pyc +0 -0
- isagellm_core-0.4.0.0/src/sagellm_core/observability/__pycache__/metrics.cpython-311.pyc +0 -0
- isagellm_core-0.4.0.0/src/sagellm_core/observability/logger.pyc +0 -0
- isagellm_core-0.4.0.0/src/sagellm_core/observability/metrics.pyc +0 -0
- {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/pd_executor.pyc +0 -0
- {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/runner.pyc +0 -0
- isagellm_core-0.4.0.0/src/sagellm_core/sampling/__init__.py +14 -0
- isagellm_core-0.4.0.0/src/sagellm_core/sampling/__init__.pyc +0 -0
- isagellm_core-0.4.0.0/src/sagellm_core/sampling/__pycache__/__init__.cpython-311.pyc +0 -0
- isagellm_core-0.4.0.0/src/sagellm_core/sampling/__pycache__/params.cpython-311.pyc +0 -0
- isagellm_core-0.4.0.0/src/sagellm_core/sampling/__pycache__/sampler.cpython-311.pyc +0 -0
- isagellm_core-0.4.0.0/src/sagellm_core/sampling/params.pyc +0 -0
- isagellm_core-0.4.0.0/src/sagellm_core/sampling/sampler.pyc +0 -0
- isagellm_core-0.4.0.0/src/sagellm_core/worker/__init__.py +11 -0
- isagellm_core-0.4.0.0/src/sagellm_core/worker/__init__.pyc +0 -0
- isagellm_core-0.4.0.0/src/sagellm_core/worker/__pycache__/__init__.cpython-311.pyc +0 -0
- isagellm_core-0.4.0.0/src/sagellm_core/worker/__pycache__/worker.cpython-311.pyc +0 -0
- isagellm_core-0.4.0.0/src/sagellm_core/worker/model_runner/__init__.py +8 -0
- isagellm_core-0.4.0.0/src/sagellm_core/worker/model_runner/__init__.pyc +0 -0
- isagellm_core-0.4.0.0/src/sagellm_core/worker/model_runner/__pycache__/__init__.cpython-311.pyc +0 -0
- isagellm_core-0.4.0.0/src/sagellm_core/worker/model_runner/__pycache__/model_runner.cpython-311.pyc +0 -0
- isagellm_core-0.4.0.0/src/sagellm_core/worker/model_runner/model_runner.pyc +0 -0
- isagellm_core-0.4.0.0/src/sagellm_core/worker/worker.pyc +0 -0
- {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/tests/test_ci_smoke.py +19 -22
- isagellm_core-0.3.0.9/tests/test_e2e_cpu_integration.py → isagellm_core-0.4.0.0/tests/test_e2e_llm_integration.py +69 -31
- {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/tests/test_engine.py +29 -35
- isagellm_core-0.4.0.0/tests/test_engine_behavior_parity.py +124 -0
- {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/tests/test_engine_contract_simplified.py +4 -2
- isagellm_core-0.3.0.9/tests/test_cpu_engine_contract.py → isagellm_core-0.4.0.0/tests/test_llm_engine_contract.py +108 -71
- isagellm_core-0.3.0.9/tests/test_cpu_engine_error_handling.py → isagellm_core-0.4.0.0/tests/test_llm_engine_error_handling.py +86 -60
- isagellm_core-0.4.0.0/tests/test_model_loader.py +49 -0
- isagellm_core-0.4.0.0/tests/test_observability.py +89 -0
- isagellm_core-0.4.0.0/tests/test_pd_separation.py +108 -0
- isagellm_core-0.4.0.0/tests/test_sampling.py +80 -0
- {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/tests/test_streaming_pd.py +24 -16
- isagellm_core-0.3.0.9/src/isagellm_core.egg-info/SOURCES.txt +0 -71
- isagellm_core-0.3.0.9/src/isagellm_core.egg-info/entry_points.txt +0 -8
- isagellm_core-0.3.0.9/src/sagellm_core/__init__.py +0 -146
- isagellm_core-0.3.0.9/src/sagellm_core/__init__.pyc +0 -0
- isagellm_core-0.3.0.9/src/sagellm_core/__pycache__/__init__.cpython-311.pyc +0 -0
- isagellm_core-0.3.0.9/src/sagellm_core/engines/__init__.py +0 -45
- isagellm_core-0.3.0.9/src/sagellm_core/engines/__init__.pyc +0 -0
- isagellm_core-0.3.0.9/src/sagellm_core/engines/__pycache__/__init__.cpython-311.pyc +0 -0
- isagellm_core-0.3.0.9/src/sagellm_core/engines/__pycache__/ascend.cpython-311.pyc +0 -0
- isagellm_core-0.3.0.9/src/sagellm_core/engines/ascend.pyc +0 -0
- isagellm_core-0.3.0.9/src/sagellm_core/engines/cpu.pyc +0 -0
- isagellm_core-0.3.0.9/src/sagellm_core/engines/hf_cuda.pyc +0 -0
- isagellm_core-0.3.0.9/src/sagellm_core/engines/pytorch.pyc +0 -0
- isagellm_core-0.3.0.9/src/sagellm_core/engines/pytorch_engine.pyc +0 -0
- isagellm_core-0.3.0.9/tests/test_engine_behavior_parity.py +0 -154
- isagellm_core-0.3.0.9/tests/test_engine_contract.py +0 -361
- isagellm_core-0.3.0.9/tests/test_pd_separation.py +0 -207
- isagellm_core-0.3.0.9/tests/test_pytorch_engine.py +0 -81
- {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/MANIFEST.in +0 -0
- {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/setup.cfg +0 -0
- {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/setup.py +0 -0
- {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/isagellm_core.egg-info/dependency_links.txt +0 -0
- {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/isagellm_core.egg-info/top_level.txt +0 -0
- {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/__main__.pyc +0 -0
- {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/__pycache__/base_engine.cpython-311.pyc +0 -0
- {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/__pycache__/config.cpython-311.pyc +0 -0
- {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/__pycache__/demo.cpython-311.pyc +0 -0
- {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/__pycache__/engine.cpython-311.pyc +0 -0
- {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/__pycache__/engine_factory.cpython-311.pyc +0 -0
- {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/__pycache__/factory.cpython-311.pyc +0 -0
- {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/__pycache__/health.cpython-311.pyc +0 -0
- {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/__pycache__/mock_engine.cpython-311.pyc +0 -0
- {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/__pycache__/plugins.cpython-311.pyc +0 -0
- {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/__pycache__/runtime.cpython-311.pyc +0 -0
- {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/__pycache__/workload.cpython-311.pyc +0 -0
- {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/config.pyc +0 -0
- {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/demo.pyc +0 -0
- {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/engine.pyc +0 -0
- {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/engine_factory.pyc +0 -0
- {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/engines/__pycache__/cpu.cpython-311.pyc +0 -0
- {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/engines/__pycache__/hf_cuda.cpython-311.pyc +0 -0
- {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/engines/__pycache__/mock.cpython-311.pyc +0 -0
- {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/engines/__pycache__/pytorch.cpython-311.pyc +0 -0
- {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/engines/__pycache__/pytorch_engine.cpython-311.pyc +0 -0
- {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/factory.pyc +0 -0
- {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/health.pyc +0 -0
- {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/plugins.pyc +0 -0
- {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/py.typed +0 -0
- {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/runtime.pyc +0 -0
- {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/workload.pyc +0 -0
- {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/tests/test_config.py +0 -0
- {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/tests/test_engine_server.py +0 -0
- {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/tests/test_task0_10_workload.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: isagellm-core
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.0.0
|
|
4
4
|
Summary: sageLLM core runtime with PD separation (MVP)
|
|
5
5
|
Author: IntelliStream Team
|
|
6
6
|
License: Proprietary - IntelliStream
|
|
@@ -13,10 +13,10 @@ Requires-Python: ==3.11.*
|
|
|
13
13
|
Description-Content-Type: text/markdown
|
|
14
14
|
Requires-Dist: pydantic>=2.0.0
|
|
15
15
|
Requires-Dist: pyyaml>=6.0.0
|
|
16
|
-
Requires-Dist: isagellm-protocol<0.
|
|
17
|
-
Requires-Dist: isagellm-backend<0.
|
|
18
|
-
Requires-Dist: isagellm-comm<0.
|
|
19
|
-
Requires-Dist: isagellm-kv-cache<0.
|
|
16
|
+
Requires-Dist: isagellm-protocol<0.5.0,>=0.3.0.2
|
|
17
|
+
Requires-Dist: isagellm-backend<0.5.0,>=0.4.0.0
|
|
18
|
+
Requires-Dist: isagellm-comm<0.5.0,>=0.4.0.0
|
|
19
|
+
Requires-Dist: isagellm-kv-cache<0.5.0,>=0.3.0.1
|
|
20
20
|
Requires-Dist: fastapi>=0.100.0
|
|
21
21
|
Requires-Dist: uvicorn>=0.22.0
|
|
22
22
|
Provides-Extra: dev
|
|
@@ -50,10 +50,10 @@ sageLLM Core - 引擎协调层与运行时系统
|
|
|
50
50
|
┌─────────────────────────────────────────────────────────────┐
|
|
51
51
|
│ sagellm-core (引擎协调层) ← 本仓库 │
|
|
52
52
|
│ ┌─────────────────────────────────────────────────────┐ │
|
|
53
|
-
│ │
|
|
54
|
-
│ │ •
|
|
55
|
-
│ │ •
|
|
56
|
-
│ │ •
|
|
53
|
+
│ │ LLMEngine (Hardware-Agnostic, vLLM v1 style) │ │
|
|
54
|
+
│ │ • 统一推理接口: generate, stream, execute │ │
|
|
55
|
+
│ │ • 自动后端选择 (auto-detect cuda/ascend/cpu) │ │
|
|
56
|
+
│ │ • 配置驱动 (LLMEngineConfig) │ │
|
|
57
57
|
│ └─────────────────────────────────────────────────────┘ │
|
|
58
58
|
│ ┌─────────────────────────────────────────────────────┐ │
|
|
59
59
|
│ │ Configuration System (config.py) │ │
|
|
@@ -68,9 +68,8 @@ sageLLM Core - 引擎协调层与运行时系统
|
|
|
68
68
|
```
|
|
69
69
|
|
|
70
70
|
**职责分离**:
|
|
71
|
-
-
|
|
72
|
-
- ✅ **
|
|
73
|
-
- ✅ **Backend 负责**:硬件抽象、设备管理、内存原语
|
|
71
|
+
- ✅ **Core 负责**:LLMEngine (硬件无关)、配置、协调
|
|
72
|
+
- ✅ **Backend 负责**:硬件抽象、设备管理、Provider 实现
|
|
74
73
|
|
|
75
74
|
## Features
|
|
76
75
|
|
|
@@ -19,10 +19,10 @@ sageLLM Core - 引擎协调层与运行时系统
|
|
|
19
19
|
┌─────────────────────────────────────────────────────────────┐
|
|
20
20
|
│ sagellm-core (引擎协调层) ← 本仓库 │
|
|
21
21
|
│ ┌─────────────────────────────────────────────────────┐ │
|
|
22
|
-
│ │
|
|
23
|
-
│ │ •
|
|
24
|
-
│ │ •
|
|
25
|
-
│ │ •
|
|
22
|
+
│ │ LLMEngine (Hardware-Agnostic, vLLM v1 style) │ │
|
|
23
|
+
│ │ • 统一推理接口: generate, stream, execute │ │
|
|
24
|
+
│ │ • 自动后端选择 (auto-detect cuda/ascend/cpu) │ │
|
|
25
|
+
│ │ • 配置驱动 (LLMEngineConfig) │ │
|
|
26
26
|
│ └─────────────────────────────────────────────────────┘ │
|
|
27
27
|
│ ┌─────────────────────────────────────────────────────┐ │
|
|
28
28
|
│ │ Configuration System (config.py) │ │
|
|
@@ -37,9 +37,8 @@ sageLLM Core - 引擎协调层与运行时系统
|
|
|
37
37
|
```
|
|
38
38
|
|
|
39
39
|
**职责分离**:
|
|
40
|
-
-
|
|
41
|
-
- ✅ **
|
|
42
|
-
- ✅ **Backend 负责**:硬件抽象、设备管理、内存原语
|
|
40
|
+
- ✅ **Core 负责**:LLMEngine (硬件无关)、配置、协调
|
|
41
|
+
- ✅ **Backend 负责**:硬件抽象、设备管理、Provider 实现
|
|
43
42
|
|
|
44
43
|
## Features
|
|
45
44
|
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "isagellm-core"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.4.0.0"
|
|
8
8
|
description = "sageLLM core runtime with PD separation (MVP)"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = "==3.11.*"
|
|
@@ -20,10 +20,10 @@ classifiers = [
|
|
|
20
20
|
dependencies = [
|
|
21
21
|
"pydantic>=2.0.0",
|
|
22
22
|
"pyyaml>=6.0.0",
|
|
23
|
-
"isagellm-protocol>=0.3.0.2,<0.
|
|
24
|
-
"isagellm-backend>=0.
|
|
25
|
-
"isagellm-comm>=0.
|
|
26
|
-
"isagellm-kv-cache>=0.3.0.1,<0.
|
|
23
|
+
"isagellm-protocol>=0.3.0.2,<0.5.0",
|
|
24
|
+
"isagellm-backend>=0.4.0.0,<0.5.0",
|
|
25
|
+
"isagellm-comm>=0.4.0.0,<0.5.0",
|
|
26
|
+
"isagellm-kv-cache>=0.3.0.1,<0.5.0",
|
|
27
27
|
"fastapi>=0.100.0",
|
|
28
28
|
"uvicorn>=0.22.0",
|
|
29
29
|
]
|
|
@@ -57,11 +57,18 @@ line-length = 100
|
|
|
57
57
|
[project.scripts]
|
|
58
58
|
sage-engine = "sagellm_core.engine_server:main"
|
|
59
59
|
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
60
|
+
# DEPRECATED: Old hardware-specific engines have been removed
|
|
61
|
+
# Use LLMEngine with BackendProvider instead:
|
|
62
|
+
# from sagellm_core import LLMEngine, LLMEngineConfig
|
|
63
|
+
# engine = LLMEngine(LLMEngineConfig(model="...", backend="cpu"))
|
|
64
|
+
#
|
|
65
|
+
# The entry-points system is being phased out in favor of the unified
|
|
66
|
+
# LLMEngine + BackendProvider architecture (vLLM v1 style).
|
|
67
|
+
# [project.entry-points."sagellm.engines"]
|
|
68
|
+
# cpu = "sagellm_core.engines.cpu:create_cpu_engine"
|
|
69
|
+
# hf-cuda = "sagellm_core.engines.hf_cuda:create_hf_cuda_engine"
|
|
70
|
+
# pytorch = "sagellm_core.engines.pytorch:create_pytorch_engine"
|
|
71
|
+
# ascend = "sagellm_core.engines.ascend:create_ascend_engine"
|
|
65
72
|
|
|
66
73
|
[tool.mypy]
|
|
67
74
|
python_version = "3.10"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: isagellm-core
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.0.0
|
|
4
4
|
Summary: sageLLM core runtime with PD separation (MVP)
|
|
5
5
|
Author: IntelliStream Team
|
|
6
6
|
License: Proprietary - IntelliStream
|
|
@@ -13,10 +13,10 @@ Requires-Python: ==3.11.*
|
|
|
13
13
|
Description-Content-Type: text/markdown
|
|
14
14
|
Requires-Dist: pydantic>=2.0.0
|
|
15
15
|
Requires-Dist: pyyaml>=6.0.0
|
|
16
|
-
Requires-Dist: isagellm-protocol<0.
|
|
17
|
-
Requires-Dist: isagellm-backend<0.
|
|
18
|
-
Requires-Dist: isagellm-comm<0.
|
|
19
|
-
Requires-Dist: isagellm-kv-cache<0.
|
|
16
|
+
Requires-Dist: isagellm-protocol<0.5.0,>=0.3.0.2
|
|
17
|
+
Requires-Dist: isagellm-backend<0.5.0,>=0.4.0.0
|
|
18
|
+
Requires-Dist: isagellm-comm<0.5.0,>=0.4.0.0
|
|
19
|
+
Requires-Dist: isagellm-kv-cache<0.5.0,>=0.3.0.1
|
|
20
20
|
Requires-Dist: fastapi>=0.100.0
|
|
21
21
|
Requires-Dist: uvicorn>=0.22.0
|
|
22
22
|
Provides-Extra: dev
|
|
@@ -50,10 +50,10 @@ sageLLM Core - 引擎协调层与运行时系统
|
|
|
50
50
|
┌─────────────────────────────────────────────────────────────┐
|
|
51
51
|
│ sagellm-core (引擎协调层) ← 本仓库 │
|
|
52
52
|
│ ┌─────────────────────────────────────────────────────┐ │
|
|
53
|
-
│ │
|
|
54
|
-
│ │ •
|
|
55
|
-
│ │ •
|
|
56
|
-
│ │ •
|
|
53
|
+
│ │ LLMEngine (Hardware-Agnostic, vLLM v1 style) │ │
|
|
54
|
+
│ │ • 统一推理接口: generate, stream, execute │ │
|
|
55
|
+
│ │ • 自动后端选择 (auto-detect cuda/ascend/cpu) │ │
|
|
56
|
+
│ │ • 配置驱动 (LLMEngineConfig) │ │
|
|
57
57
|
│ └─────────────────────────────────────────────────────┘ │
|
|
58
58
|
│ ┌─────────────────────────────────────────────────────┐ │
|
|
59
59
|
│ │ Configuration System (config.py) │ │
|
|
@@ -68,9 +68,8 @@ sageLLM Core - 引擎协调层与运行时系统
|
|
|
68
68
|
```
|
|
69
69
|
|
|
70
70
|
**职责分离**:
|
|
71
|
-
-
|
|
72
|
-
- ✅ **
|
|
73
|
-
- ✅ **Backend 负责**:硬件抽象、设备管理、内存原语
|
|
71
|
+
- ✅ **Core 负责**:LLMEngine (硬件无关)、配置、协调
|
|
72
|
+
- ✅ **Backend 负责**:硬件抽象、设备管理、Provider 实现
|
|
74
73
|
|
|
75
74
|
## Features
|
|
76
75
|
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
MANIFEST.in
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
setup.py
|
|
5
|
+
src/isagellm_core.egg-info/PKG-INFO
|
|
6
|
+
src/isagellm_core.egg-info/SOURCES.txt
|
|
7
|
+
src/isagellm_core.egg-info/dependency_links.txt
|
|
8
|
+
src/isagellm_core.egg-info/entry_points.txt
|
|
9
|
+
src/isagellm_core.egg-info/requires.txt
|
|
10
|
+
src/isagellm_core.egg-info/top_level.txt
|
|
11
|
+
src/sagellm_core/__init__.py
|
|
12
|
+
src/sagellm_core/__init__.pyc
|
|
13
|
+
src/sagellm_core/__main__.pyc
|
|
14
|
+
src/sagellm_core/config.pyc
|
|
15
|
+
src/sagellm_core/demo.pyc
|
|
16
|
+
src/sagellm_core/engine.pyc
|
|
17
|
+
src/sagellm_core/engine_factory.pyc
|
|
18
|
+
src/sagellm_core/engine_server.pyc
|
|
19
|
+
src/sagellm_core/factory.pyc
|
|
20
|
+
src/sagellm_core/health.pyc
|
|
21
|
+
src/sagellm_core/llm_engine.pyc
|
|
22
|
+
src/sagellm_core/pd_executor.pyc
|
|
23
|
+
src/sagellm_core/plugins.pyc
|
|
24
|
+
src/sagellm_core/py.typed
|
|
25
|
+
src/sagellm_core/runner.pyc
|
|
26
|
+
src/sagellm_core/runtime.pyc
|
|
27
|
+
src/sagellm_core/workload.pyc
|
|
28
|
+
src/sagellm_core/__pycache__/__init__.cpython-311.pyc
|
|
29
|
+
src/sagellm_core/__pycache__/base_engine.cpython-311.pyc
|
|
30
|
+
src/sagellm_core/__pycache__/config.cpython-311.pyc
|
|
31
|
+
src/sagellm_core/__pycache__/demo.cpython-311.pyc
|
|
32
|
+
src/sagellm_core/__pycache__/engine.cpython-311.pyc
|
|
33
|
+
src/sagellm_core/__pycache__/engine_factory.cpython-311.pyc
|
|
34
|
+
src/sagellm_core/__pycache__/engine_server.cpython-311.pyc
|
|
35
|
+
src/sagellm_core/__pycache__/factory.cpython-311.pyc
|
|
36
|
+
src/sagellm_core/__pycache__/health.cpython-311.pyc
|
|
37
|
+
src/sagellm_core/__pycache__/llm_engine.cpython-311.pyc
|
|
38
|
+
src/sagellm_core/__pycache__/mock_engine.cpython-311.pyc
|
|
39
|
+
src/sagellm_core/__pycache__/pd_executor.cpython-311.pyc
|
|
40
|
+
src/sagellm_core/__pycache__/plugins.cpython-311.pyc
|
|
41
|
+
src/sagellm_core/__pycache__/runner.cpython-311.pyc
|
|
42
|
+
src/sagellm_core/__pycache__/runtime.cpython-311.pyc
|
|
43
|
+
src/sagellm_core/__pycache__/workload.cpython-311.pyc
|
|
44
|
+
src/sagellm_core/distributed/__init__.py
|
|
45
|
+
src/sagellm_core/distributed/__init__.pyc
|
|
46
|
+
src/sagellm_core/distributed/strategies.pyc
|
|
47
|
+
src/sagellm_core/distributed/__pycache__/__init__.cpython-311.pyc
|
|
48
|
+
src/sagellm_core/distributed/__pycache__/strategies.cpython-311.pyc
|
|
49
|
+
src/sagellm_core/engine_core/__init__.py
|
|
50
|
+
src/sagellm_core/engine_core/__init__.pyc
|
|
51
|
+
src/sagellm_core/engine_core/engine_core.pyc
|
|
52
|
+
src/sagellm_core/engine_core/__pycache__/__init__.cpython-311.pyc
|
|
53
|
+
src/sagellm_core/engine_core/__pycache__/engine_core.cpython-311.pyc
|
|
54
|
+
src/sagellm_core/engine_core/scheduler/__init__.py
|
|
55
|
+
src/sagellm_core/engine_core/scheduler/__init__.pyc
|
|
56
|
+
src/sagellm_core/engine_core/scheduler/scheduler.pyc
|
|
57
|
+
src/sagellm_core/engine_core/scheduler/__pycache__/__init__.cpython-311.pyc
|
|
58
|
+
src/sagellm_core/engine_core/scheduler/__pycache__/scheduler.cpython-311.pyc
|
|
59
|
+
src/sagellm_core/engines/__init__.py
|
|
60
|
+
src/sagellm_core/engines/__init__.pyc
|
|
61
|
+
src/sagellm_core/engines/embedding.pyc
|
|
62
|
+
src/sagellm_core/engines/__pycache__/__init__.cpython-311.pyc
|
|
63
|
+
src/sagellm_core/engines/__pycache__/ascend.cpython-311.pyc
|
|
64
|
+
src/sagellm_core/engines/__pycache__/cpu.cpython-311.pyc
|
|
65
|
+
src/sagellm_core/engines/__pycache__/embedding.cpython-311.pyc
|
|
66
|
+
src/sagellm_core/engines/__pycache__/hf_cuda.cpython-311.pyc
|
|
67
|
+
src/sagellm_core/engines/__pycache__/mock.cpython-311.pyc
|
|
68
|
+
src/sagellm_core/engines/__pycache__/pytorch.cpython-311.pyc
|
|
69
|
+
src/sagellm_core/engines/__pycache__/pytorch_engine.cpython-311.pyc
|
|
70
|
+
src/sagellm_core/executor/__init__.py
|
|
71
|
+
src/sagellm_core/executor/__init__.pyc
|
|
72
|
+
src/sagellm_core/executor/executor_base.pyc
|
|
73
|
+
src/sagellm_core/executor/uniproc_executor.pyc
|
|
74
|
+
src/sagellm_core/executor/__pycache__/__init__.cpython-311.pyc
|
|
75
|
+
src/sagellm_core/executor/__pycache__/executor_base.cpython-311.pyc
|
|
76
|
+
src/sagellm_core/executor/__pycache__/uniproc_executor.cpython-311.pyc
|
|
77
|
+
src/sagellm_core/inputs/__init__.py
|
|
78
|
+
src/sagellm_core/inputs/__init__.pyc
|
|
79
|
+
src/sagellm_core/inputs/processor.pyc
|
|
80
|
+
src/sagellm_core/inputs/tokenizer_utils.pyc
|
|
81
|
+
src/sagellm_core/inputs/__pycache__/__init__.cpython-311.pyc
|
|
82
|
+
src/sagellm_core/inputs/__pycache__/processor.cpython-311.pyc
|
|
83
|
+
src/sagellm_core/inputs/__pycache__/tokenizer_utils.cpython-311.pyc
|
|
84
|
+
src/sagellm_core/model/__init__.py
|
|
85
|
+
src/sagellm_core/model/__init__.pyc
|
|
86
|
+
src/sagellm_core/model/model_loader.pyc
|
|
87
|
+
src/sagellm_core/model/weight_utils.pyc
|
|
88
|
+
src/sagellm_core/model/__pycache__/__init__.cpython-311.pyc
|
|
89
|
+
src/sagellm_core/model/__pycache__/model_loader.cpython-311.pyc
|
|
90
|
+
src/sagellm_core/model/__pycache__/weight_utils.cpython-311.pyc
|
|
91
|
+
src/sagellm_core/observability/__init__.py
|
|
92
|
+
src/sagellm_core/observability/__init__.pyc
|
|
93
|
+
src/sagellm_core/observability/logger.pyc
|
|
94
|
+
src/sagellm_core/observability/metrics.pyc
|
|
95
|
+
src/sagellm_core/observability/__pycache__/__init__.cpython-311.pyc
|
|
96
|
+
src/sagellm_core/observability/__pycache__/logger.cpython-311.pyc
|
|
97
|
+
src/sagellm_core/observability/__pycache__/metrics.cpython-311.pyc
|
|
98
|
+
src/sagellm_core/sampling/__init__.py
|
|
99
|
+
src/sagellm_core/sampling/__init__.pyc
|
|
100
|
+
src/sagellm_core/sampling/params.pyc
|
|
101
|
+
src/sagellm_core/sampling/sampler.pyc
|
|
102
|
+
src/sagellm_core/sampling/__pycache__/__init__.cpython-311.pyc
|
|
103
|
+
src/sagellm_core/sampling/__pycache__/params.cpython-311.pyc
|
|
104
|
+
src/sagellm_core/sampling/__pycache__/sampler.cpython-311.pyc
|
|
105
|
+
src/sagellm_core/worker/__init__.py
|
|
106
|
+
src/sagellm_core/worker/__init__.pyc
|
|
107
|
+
src/sagellm_core/worker/worker.pyc
|
|
108
|
+
src/sagellm_core/worker/__pycache__/__init__.cpython-311.pyc
|
|
109
|
+
src/sagellm_core/worker/__pycache__/worker.cpython-311.pyc
|
|
110
|
+
src/sagellm_core/worker/model_runner/__init__.py
|
|
111
|
+
src/sagellm_core/worker/model_runner/__init__.pyc
|
|
112
|
+
src/sagellm_core/worker/model_runner/model_runner.pyc
|
|
113
|
+
src/sagellm_core/worker/model_runner/__pycache__/__init__.cpython-311.pyc
|
|
114
|
+
src/sagellm_core/worker/model_runner/__pycache__/model_runner.cpython-311.pyc
|
|
115
|
+
tests/test_ci_smoke.py
|
|
116
|
+
tests/test_config.py
|
|
117
|
+
tests/test_e2e_llm_integration.py
|
|
118
|
+
tests/test_engine.py
|
|
119
|
+
tests/test_engine_behavior_parity.py
|
|
120
|
+
tests/test_engine_contract_simplified.py
|
|
121
|
+
tests/test_engine_server.py
|
|
122
|
+
tests/test_llm_engine_contract.py
|
|
123
|
+
tests/test_llm_engine_error_handling.py
|
|
124
|
+
tests/test_model_loader.py
|
|
125
|
+
tests/test_observability.py
|
|
126
|
+
tests/test_pd_separation.py
|
|
127
|
+
tests/test_sampling.py
|
|
128
|
+
tests/test_streaming_pd.py
|
|
129
|
+
tests/test_task0_10_workload.py
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
pydantic>=2.0.0
|
|
2
2
|
pyyaml>=6.0.0
|
|
3
|
-
isagellm-protocol<0.
|
|
4
|
-
isagellm-backend<0.
|
|
5
|
-
isagellm-comm<0.
|
|
6
|
-
isagellm-kv-cache<0.
|
|
3
|
+
isagellm-protocol<0.5.0,>=0.3.0.2
|
|
4
|
+
isagellm-backend<0.5.0,>=0.4.0.0
|
|
5
|
+
isagellm-comm<0.5.0,>=0.4.0.0
|
|
6
|
+
isagellm-kv-cache<0.5.0,>=0.3.0.1
|
|
7
7
|
fastapi>=0.100.0
|
|
8
8
|
uvicorn>=0.22.0
|
|
9
9
|
|
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
"""sageLLM Core 运行时。
|
|
2
|
+
|
|
3
|
+
本包提供 sageLLM 的核心运行时组件:
|
|
4
|
+
- LLMEngine: 统一的硬件无关推理引擎(vLLM v1 风格)
|
|
5
|
+
- EngineCore: 协调 Scheduler 和 Executor
|
|
6
|
+
- Scheduler: Continuous Batching 调度器
|
|
7
|
+
- Executor: 管理 Worker 执行
|
|
8
|
+
- Worker/ModelRunner: 模型前向传播
|
|
9
|
+
- 配置 schema 与校验
|
|
10
|
+
- Engine 工厂函数
|
|
11
|
+
- 插件系统
|
|
12
|
+
- Demo Runner
|
|
13
|
+
- 分布式 Runtime(PD 分离 MVP)
|
|
14
|
+
- PD 分离执行器
|
|
15
|
+
|
|
16
|
+
Architecture (vLLM v1 style):
|
|
17
|
+
LLMEngine (hardware-agnostic)
|
|
18
|
+
├── EngineCore (coordinates Scheduler and Executor)
|
|
19
|
+
│ └── Scheduler (Continuous Batching)
|
|
20
|
+
└── Executor
|
|
21
|
+
└── Worker
|
|
22
|
+
└── ModelRunner
|
|
23
|
+
├── uses BackendProvider (from sagellm-backend)
|
|
24
|
+
└── uses CommBackend (from sagellm-comm)
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
from __future__ import annotations
|
|
28
|
+
|
|
29
|
+
__version__ = "0.4.0.0"
|
|
30
|
+
|
|
31
|
+
# ============================================================================
|
|
32
|
+
# New Architecture (vLLM v1 style) - Hardware Agnostic
|
|
33
|
+
# ============================================================================
|
|
34
|
+
from sagellm_core.llm_engine import LLMEngine, LLMEngineConfig
|
|
35
|
+
from sagellm_core.engine_core import EngineCore
|
|
36
|
+
from sagellm_core.engine_core.engine_core import EngineCoreConfig
|
|
37
|
+
from sagellm_core.engine_core.scheduler import (
|
|
38
|
+
ContinuousBatchingScheduler,
|
|
39
|
+
SchedulerConfig,
|
|
40
|
+
SchedulerOutput,
|
|
41
|
+
)
|
|
42
|
+
from sagellm_core.executor import ExecutorBase, UniprocExecutor
|
|
43
|
+
from sagellm_core.executor.executor_base import ExecutorConfig
|
|
44
|
+
from sagellm_core.worker import Worker
|
|
45
|
+
from sagellm_core.worker.model_runner import ModelRunner
|
|
46
|
+
|
|
47
|
+
# ============================================================================
|
|
48
|
+
# Legacy Architecture (still supported, being refactored)
|
|
49
|
+
# ============================================================================
|
|
50
|
+
from sagellm_core.engine import BaseEngine, EngineInstanceConfig
|
|
51
|
+
from sagellm_core.config import (
|
|
52
|
+
BackendConfig,
|
|
53
|
+
DemoConfig,
|
|
54
|
+
EngineConfig,
|
|
55
|
+
OutputConfig,
|
|
56
|
+
WorkloadConfig,
|
|
57
|
+
WorkloadSegment,
|
|
58
|
+
load_config,
|
|
59
|
+
)
|
|
60
|
+
from sagellm_core.demo import main as demo_main
|
|
61
|
+
from sagellm_core.engine_factory import EngineFactory
|
|
62
|
+
|
|
63
|
+
# Only EmbeddingEngine remains from legacy engines
|
|
64
|
+
from sagellm_core.engines import (
|
|
65
|
+
EmbeddingEngine,
|
|
66
|
+
EmbeddingEngineConfig,
|
|
67
|
+
)
|
|
68
|
+
from sagellm_core.factory import create_backend, create_engine
|
|
69
|
+
from sagellm_core.health import HealthStatus
|
|
70
|
+
from sagellm_core.plugins import PluginResolutionError, list_entry_points, resolve_kind
|
|
71
|
+
from sagellm_core.runner import DemoRunner, RunnerContext
|
|
72
|
+
|
|
73
|
+
# PD 分离 MVP 模块
|
|
74
|
+
from sagellm_core.runtime import DistributedConfig, DistributedRuntime, RuntimeState
|
|
75
|
+
from sagellm_core.pd_executor import PDExecutionContext, PDSeparatedExecutor
|
|
76
|
+
|
|
77
|
+
# Engine HTTP Server
|
|
78
|
+
from sagellm_core.engine_server import app as engine_server_app
|
|
79
|
+
from sagellm_core.engine_server import main as serve_engine
|
|
80
|
+
|
|
81
|
+
# ============================================================================
|
|
82
|
+
# Phase 2: New Modules (P2 Priority)
|
|
83
|
+
# ============================================================================
|
|
84
|
+
# Model loading utilities
|
|
85
|
+
from sagellm_core.model import ModelLoader, load_model
|
|
86
|
+
|
|
87
|
+
# Input processing
|
|
88
|
+
from sagellm_core.inputs import InputProcessor, ProcessedInput, TokenizerWrapper
|
|
89
|
+
|
|
90
|
+
# Sampling utilities
|
|
91
|
+
from sagellm_core.sampling import SamplingParams, Sampler, GreedySampler
|
|
92
|
+
|
|
93
|
+
# Distributed strategies
|
|
94
|
+
from sagellm_core.distributed import DistributedStrategy, TensorParallelStrategy
|
|
95
|
+
|
|
96
|
+
# Observability
|
|
97
|
+
from sagellm_core.observability import MetricsCollector, EngineMetrics, setup_logger
|
|
98
|
+
|
|
99
|
+
# PyTorch engine (optional, loaded lazily)
|
|
100
|
+
PyTorchEngine = None
|
|
101
|
+
create_pytorch_engine = None
|
|
102
|
+
|
|
103
|
+
# Optional PyTorchEngine import (deprecated, use LLMEngine)
|
|
104
|
+
# try:
|
|
105
|
+
# from sagellm_core.engines.pytorch_engine import (
|
|
106
|
+
# PyTorchEngine,
|
|
107
|
+
# create_pytorch_engine,
|
|
108
|
+
# )
|
|
109
|
+
# except ImportError:
|
|
110
|
+
# pass # torch or transformers not available
|
|
111
|
+
|
|
112
|
+
# =========================================================================
|
|
113
|
+
# DEPRECATED: Old hardware-specific engines have been removed
|
|
114
|
+
# Use LLMEngine instead:
|
|
115
|
+
# from sagellm_core import LLMEngine, LLMEngineConfig
|
|
116
|
+
# engine = LLMEngine(LLMEngineConfig(model="..."))
|
|
117
|
+
#
|
|
118
|
+
# The following engines no longer exist:
|
|
119
|
+
# - CPUEngine → use LLMEngine(backend="cpu")
|
|
120
|
+
# - HFCudaEngine → use LLMEngine(backend="cuda")
|
|
121
|
+
# - AscendEngine → use LLMEngine(backend="ascend")
|
|
122
|
+
# - PyTorchEngine → use LLMEngine
|
|
123
|
+
#
|
|
124
|
+
# EmbeddingEngine is still available for embedding-only use cases.
|
|
125
|
+
# =========================================================================
|
|
126
|
+
|
|
127
|
+
# Version is defined at the top of the file (line 29)
|
|
128
|
+
|
|
129
|
+
__all__ = [
|
|
130
|
+
# Version
|
|
131
|
+
"__version__",
|
|
132
|
+
# =========================================================================
|
|
133
|
+
# New Architecture (vLLM v1 style) - RECOMMENDED
|
|
134
|
+
# =========================================================================
|
|
135
|
+
# LLMEngine - Unified hardware-agnostic engine
|
|
136
|
+
"LLMEngine",
|
|
137
|
+
"LLMEngineConfig",
|
|
138
|
+
# EngineCore - Coordinates Scheduler and Executor
|
|
139
|
+
"EngineCore",
|
|
140
|
+
"EngineCoreConfig",
|
|
141
|
+
# Scheduler - Continuous Batching
|
|
142
|
+
"ContinuousBatchingScheduler",
|
|
143
|
+
"SchedulerConfig",
|
|
144
|
+
"SchedulerOutput",
|
|
145
|
+
# Executor - Manages Workers
|
|
146
|
+
"ExecutorBase",
|
|
147
|
+
"ExecutorConfig",
|
|
148
|
+
"UniprocExecutor",
|
|
149
|
+
# Worker - Model execution
|
|
150
|
+
"Worker",
|
|
151
|
+
"ModelRunner",
|
|
152
|
+
# =========================================================================
|
|
153
|
+
# Configuration (for YAML/config files)
|
|
154
|
+
# =========================================================================
|
|
155
|
+
"BackendConfig",
|
|
156
|
+
"DemoConfig",
|
|
157
|
+
"EngineConfig",
|
|
158
|
+
"OutputConfig",
|
|
159
|
+
"WorkloadConfig",
|
|
160
|
+
"WorkloadSegment",
|
|
161
|
+
"load_config",
|
|
162
|
+
# Engine abstraction
|
|
163
|
+
"BaseEngine",
|
|
164
|
+
"EngineInstanceConfig", # For runtime engine instantiation
|
|
165
|
+
"HealthStatus",
|
|
166
|
+
# Engine implementations
|
|
167
|
+
# DEPRECATED: Old engines removed, use LLMEngine instead
|
|
168
|
+
# Only EmbeddingEngine remains for embedding-only use cases
|
|
169
|
+
"EmbeddingEngine",
|
|
170
|
+
"EmbeddingEngineConfig",
|
|
171
|
+
# Factory functions
|
|
172
|
+
"create_backend",
|
|
173
|
+
"create_engine",
|
|
174
|
+
"EngineFactory",
|
|
175
|
+
# Plugin system
|
|
176
|
+
"PluginResolutionError",
|
|
177
|
+
"list_entry_points",
|
|
178
|
+
"resolve_kind",
|
|
179
|
+
# Demo runner
|
|
180
|
+
"demo_main",
|
|
181
|
+
"DemoRunner",
|
|
182
|
+
"RunnerContext",
|
|
183
|
+
# PD Separation MVP
|
|
184
|
+
"DistributedConfig",
|
|
185
|
+
"DistributedRuntime",
|
|
186
|
+
"RuntimeState",
|
|
187
|
+
"PDExecutionContext",
|
|
188
|
+
"PDSeparatedExecutor",
|
|
189
|
+
# Engine HTTP Server
|
|
190
|
+
"engine_server_app",
|
|
191
|
+
"serve_engine",
|
|
192
|
+
]
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
{isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/__pycache__/runner.cpython-311.pyc
RENAMED
|
Binary file
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""Distributed inference strategies for sageLLM."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from sagellm_core.distributed.strategies import (
|
|
6
|
+
DistributedStrategy,
|
|
7
|
+
TensorParallelStrategy,
|
|
8
|
+
PipelineParallelStrategy,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"DistributedStrategy",
|
|
13
|
+
"TensorParallelStrategy",
|
|
14
|
+
"PipelineParallelStrategy",
|
|
15
|
+
]
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
"""EngineCore - Coordinates Scheduler and Executor.
|
|
2
|
+
|
|
3
|
+
The EngineCore is responsible for:
|
|
4
|
+
1. Managing request queues
|
|
5
|
+
2. Coordinating with Scheduler for batch formation
|
|
6
|
+
3. Dispatching batches to Executor
|
|
7
|
+
4. Collecting results
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from sagellm_core.engine_core.engine_core import EngineCore
|
|
11
|
+
|
|
12
|
+
__all__ = ["EngineCore"]
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""Scheduler - Request scheduling for Continuous Batching.
|
|
2
|
+
|
|
3
|
+
The Scheduler is responsible for:
|
|
4
|
+
1. Selecting which requests to run in the next step
|
|
5
|
+
2. Managing prefill vs decode scheduling
|
|
6
|
+
3. Preemption decisions
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from sagellm_core.engine_core.scheduler.scheduler import (
|
|
10
|
+
ContinuousBatchingScheduler,
|
|
11
|
+
SchedulerConfig,
|
|
12
|
+
SchedulerOutput,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
__all__ = [
|
|
16
|
+
"ContinuousBatchingScheduler",
|
|
17
|
+
"SchedulerConfig",
|
|
18
|
+
"SchedulerOutput",
|
|
19
|
+
]
|
isagellm_core-0.4.0.0/src/sagellm_core/engine_core/scheduler/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file
|
isagellm_core-0.4.0.0/src/sagellm_core/engine_core/scheduler/__pycache__/scheduler.cpython-311.pyc
ADDED
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
"""Engine implementations for sageLLM Core.
|
|
2
|
+
|
|
3
|
+
DEPRECATED: This module contains legacy engine implementations.
|
|
4
|
+
Use LLMEngine from sagellm_core instead:
|
|
5
|
+
|
|
6
|
+
from sagellm_core import LLMEngine, LLMEngineConfig
|
|
7
|
+
|
|
8
|
+
config = LLMEngineConfig(
|
|
9
|
+
model_path="Qwen/Qwen2-7B",
|
|
10
|
+
backend_type="cuda", # or "cpu", "ascend", "auto"
|
|
11
|
+
)
|
|
12
|
+
engine = LLMEngine(config)
|
|
13
|
+
await engine.start()
|
|
14
|
+
response = await engine.generate("Hello!")
|
|
15
|
+
|
|
16
|
+
Remaining engines:
|
|
17
|
+
- EmbeddingEngine: Embedding model inference (not yet migrated to LLMEngine)
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
# Only EmbeddingEngine remains - others have been migrated to LLMEngine
|
|
23
|
+
from sagellm_core.engines.embedding import EmbeddingEngine, EmbeddingEngineConfig
|
|
24
|
+
|
|
25
|
+
__all__ = [
|
|
26
|
+
# Embedding engine (still needed for embedding-only models)
|
|
27
|
+
"EmbeddingEngine",
|
|
28
|
+
"EmbeddingEngineConfig",
|
|
29
|
+
]
|
|
Binary file
|
|
Binary file
|