isagellm-core 0.3.0.10__py2.py3-none-any.whl → 0.4.0.0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {isagellm_core-0.3.0.10.dist-info → isagellm_core-0.4.0.0.dist-info}/METADATA +11 -12
- isagellm_core-0.4.0.0.dist-info/RECORD +109 -0
- isagellm_core-0.4.0.0.dist-info/entry_points.txt +2 -0
- sagellm_core/__init__.py +104 -58
- sagellm_core/__init__.pyc +0 -0
- sagellm_core/__pycache__/__init__.cpython-311.pyc +0 -0
- sagellm_core/__pycache__/engine_server.cpython-311.pyc +0 -0
- sagellm_core/__pycache__/llm_engine.cpython-311.pyc +0 -0
- sagellm_core/__pycache__/pd_executor.cpython-311.pyc +0 -0
- sagellm_core/__pycache__/runner.cpython-311.pyc +0 -0
- sagellm_core/distributed/__init__.py +15 -0
- sagellm_core/distributed/__init__.pyc +0 -0
- sagellm_core/distributed/__pycache__/__init__.cpython-311.pyc +0 -0
- sagellm_core/distributed/__pycache__/strategies.cpython-311.pyc +0 -0
- sagellm_core/distributed/strategies.pyc +0 -0
- sagellm_core/engine_core/__init__.py +12 -0
- sagellm_core/engine_core/__init__.pyc +0 -0
- sagellm_core/engine_core/__pycache__/__init__.cpython-311.pyc +0 -0
- sagellm_core/engine_core/__pycache__/engine_core.cpython-311.pyc +0 -0
- sagellm_core/engine_core/engine_core.pyc +0 -0
- sagellm_core/engine_core/scheduler/__init__.py +19 -0
- sagellm_core/engine_core/scheduler/__init__.pyc +0 -0
- sagellm_core/engine_core/scheduler/__pycache__/__init__.cpython-311.pyc +0 -0
- sagellm_core/engine_core/scheduler/__pycache__/scheduler.cpython-311.pyc +0 -0
- sagellm_core/engine_core/scheduler/scheduler.pyc +0 -0
- sagellm_core/engine_server.pyc +0 -0
- sagellm_core/engines/__init__.py +17 -33
- sagellm_core/engines/__init__.pyc +0 -0
- sagellm_core/engines/__pycache__/__init__.cpython-311.pyc +0 -0
- sagellm_core/engines/__pycache__/ascend.cpython-311.pyc +0 -0
- sagellm_core/engines/__pycache__/embedding.cpython-311.pyc +0 -0
- sagellm_core/engines/embedding.pyc +0 -0
- sagellm_core/executor/__init__.py +16 -0
- sagellm_core/executor/__init__.pyc +0 -0
- sagellm_core/executor/__pycache__/__init__.cpython-311.pyc +0 -0
- sagellm_core/executor/__pycache__/executor_base.cpython-311.pyc +0 -0
- sagellm_core/executor/__pycache__/uniproc_executor.cpython-311.pyc +0 -0
- sagellm_core/executor/executor_base.pyc +0 -0
- sagellm_core/executor/uniproc_executor.pyc +0 -0
- sagellm_core/inputs/__init__.py +12 -0
- sagellm_core/inputs/__init__.pyc +0 -0
- sagellm_core/inputs/__pycache__/__init__.cpython-311.pyc +0 -0
- sagellm_core/inputs/__pycache__/processor.cpython-311.pyc +0 -0
- sagellm_core/inputs/__pycache__/tokenizer_utils.cpython-311.pyc +0 -0
- sagellm_core/inputs/processor.pyc +0 -0
- sagellm_core/inputs/tokenizer_utils.pyc +0 -0
- sagellm_core/llm_engine.pyc +0 -0
- sagellm_core/model/__init__.py +13 -0
- sagellm_core/model/__init__.pyc +0 -0
- sagellm_core/model/__pycache__/__init__.cpython-311.pyc +0 -0
- sagellm_core/model/__pycache__/model_loader.cpython-311.pyc +0 -0
- sagellm_core/model/__pycache__/weight_utils.cpython-311.pyc +0 -0
- sagellm_core/model/model_loader.pyc +0 -0
- sagellm_core/model/weight_utils.pyc +0 -0
- sagellm_core/observability/__init__.py +16 -0
- sagellm_core/observability/__init__.pyc +0 -0
- sagellm_core/observability/__pycache__/__init__.cpython-311.pyc +0 -0
- sagellm_core/observability/__pycache__/logger.cpython-311.pyc +0 -0
- sagellm_core/observability/__pycache__/metrics.cpython-311.pyc +0 -0
- sagellm_core/observability/logger.pyc +0 -0
- sagellm_core/observability/metrics.pyc +0 -0
- sagellm_core/pd_executor.pyc +0 -0
- sagellm_core/runner.pyc +0 -0
- sagellm_core/sampling/__init__.py +14 -0
- sagellm_core/sampling/__init__.pyc +0 -0
- sagellm_core/sampling/__pycache__/__init__.cpython-311.pyc +0 -0
- sagellm_core/sampling/__pycache__/params.cpython-311.pyc +0 -0
- sagellm_core/sampling/__pycache__/sampler.cpython-311.pyc +0 -0
- sagellm_core/sampling/params.pyc +0 -0
- sagellm_core/sampling/sampler.pyc +0 -0
- sagellm_core/worker/__init__.py +11 -0
- sagellm_core/worker/__init__.pyc +0 -0
- sagellm_core/worker/__pycache__/__init__.cpython-311.pyc +0 -0
- sagellm_core/worker/__pycache__/worker.cpython-311.pyc +0 -0
- sagellm_core/worker/model_runner/__init__.py +8 -0
- sagellm_core/worker/model_runner/__init__.pyc +0 -0
- sagellm_core/worker/model_runner/__pycache__/__init__.cpython-311.pyc +0 -0
- sagellm_core/worker/model_runner/__pycache__/model_runner.cpython-311.pyc +0 -0
- sagellm_core/worker/model_runner/model_runner.pyc +0 -0
- sagellm_core/worker/worker.pyc +0 -0
- isagellm_core-0.3.0.10.dist-info/RECORD +0 -52
- isagellm_core-0.3.0.10.dist-info/entry_points.txt +0 -8
- sagellm_core/engines/ascend.pyc +0 -0
- sagellm_core/engines/cpu.pyc +0 -0
- sagellm_core/engines/hf_cuda.pyc +0 -0
- sagellm_core/engines/pytorch.pyc +0 -0
- sagellm_core/engines/pytorch_engine.pyc +0 -0
- {isagellm_core-0.3.0.10.dist-info → isagellm_core-0.4.0.0.dist-info}/WHEEL +0 -0
- {isagellm_core-0.3.0.10.dist-info → isagellm_core-0.4.0.0.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: isagellm-core
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.0.0
|
|
4
4
|
Summary: sageLLM core runtime with PD separation (MVP)
|
|
5
5
|
Author: IntelliStream Team
|
|
6
6
|
License: Proprietary - IntelliStream
|
|
@@ -13,10 +13,10 @@ Requires-Python: ==3.11.*
|
|
|
13
13
|
Description-Content-Type: text/markdown
|
|
14
14
|
Requires-Dist: pydantic>=2.0.0
|
|
15
15
|
Requires-Dist: pyyaml>=6.0.0
|
|
16
|
-
Requires-Dist: isagellm-protocol<0.
|
|
17
|
-
Requires-Dist: isagellm-backend<0.
|
|
18
|
-
Requires-Dist: isagellm-comm<0.
|
|
19
|
-
Requires-Dist: isagellm-kv-cache<0.
|
|
16
|
+
Requires-Dist: isagellm-protocol<0.5.0,>=0.3.0.2
|
|
17
|
+
Requires-Dist: isagellm-backend<0.5.0,>=0.4.0.0
|
|
18
|
+
Requires-Dist: isagellm-comm<0.5.0,>=0.4.0.0
|
|
19
|
+
Requires-Dist: isagellm-kv-cache<0.5.0,>=0.3.0.1
|
|
20
20
|
Requires-Dist: fastapi>=0.100.0
|
|
21
21
|
Requires-Dist: uvicorn>=0.22.0
|
|
22
22
|
Provides-Extra: dev
|
|
@@ -50,10 +50,10 @@ sageLLM Core - 引擎协调层与运行时系统
|
|
|
50
50
|
┌─────────────────────────────────────────────────────────────┐
|
|
51
51
|
│ sagellm-core (引擎协调层) ← 本仓库 │
|
|
52
52
|
│ ┌─────────────────────────────────────────────────────┐ │
|
|
53
|
-
│ │
|
|
54
|
-
│ │ •
|
|
55
|
-
│ │ •
|
|
56
|
-
│ │ •
|
|
53
|
+
│ │ LLMEngine (Hardware-Agnostic, vLLM v1 style) │ │
|
|
54
|
+
│ │ • 统一推理接口: generate, stream, execute │ │
|
|
55
|
+
│ │ • 自动后端选择 (auto-detect cuda/ascend/cpu) │ │
|
|
56
|
+
│ │ • 配置驱动 (LLMEngineConfig) │ │
|
|
57
57
|
│ └─────────────────────────────────────────────────────┘ │
|
|
58
58
|
│ ┌─────────────────────────────────────────────────────┐ │
|
|
59
59
|
│ │ Configuration System (config.py) │ │
|
|
@@ -68,9 +68,8 @@ sageLLM Core - 引擎协调层与运行时系统
|
|
|
68
68
|
```
|
|
69
69
|
|
|
70
70
|
**职责分离**:
|
|
71
|
-
-
|
|
72
|
-
- ✅ **
|
|
73
|
-
- ✅ **Backend 负责**:硬件抽象、设备管理、内存原语
|
|
71
|
+
- ✅ **Core 负责**:LLMEngine (硬件无关)、配置、协调
|
|
72
|
+
- ✅ **Backend 负责**:硬件抽象、设备管理、Provider 实现
|
|
74
73
|
|
|
75
74
|
## Features
|
|
76
75
|
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
sagellm_core/__init__.py,sha256=xw3aVDgBsbemqEsfM2dLd9bqDBi1edXdKrz2jIS44UE,6615
|
|
2
|
+
sagellm_core/__init__.pyc,sha256=vwQCwAbKpbKiofl2DFQWLVEhY9sO4vAuRo2ne-LrLi0,4414
|
|
3
|
+
sagellm_core/__main__.pyc,sha256=48Ej1ycqV-z87qawGOTNBKNgY3EcbffUOOXrDecOR3g,384
|
|
4
|
+
sagellm_core/config.pyc,sha256=cCMLvMVb_s3HzPYSfqlbRWodQ4Uby8U962l1vmunT5Y,8453
|
|
5
|
+
sagellm_core/demo.pyc,sha256=0hZkAJch6ETjSDsPq8XkDLNcd8qAys_hqOH1qi0xBzg,6028
|
|
6
|
+
sagellm_core/engine.pyc,sha256=o5yzFfLhpRx5lEYC2E75q9b04UqsDY63fC4nZdkHyeU,12258
|
|
7
|
+
sagellm_core/engine_factory.pyc,sha256=cNbEImM0MNcCgVeC0zViAT1AZkPAt2WARrVd1U__tS4,14886
|
|
8
|
+
sagellm_core/engine_server.pyc,sha256=ldiQFB0nYDX6jSGtAduDqdUitBIoMwwnWsblPauQ5qI,19120
|
|
9
|
+
sagellm_core/factory.pyc,sha256=B0s_JSOjVPwE95Oj2moP_vVA3vCrWsi2753UrwF8-eM,2737
|
|
10
|
+
sagellm_core/health.pyc,sha256=c4A60b1ZANbAqck59AV-kczK2b355EPi9JxoYHRlhl8,684
|
|
11
|
+
sagellm_core/llm_engine.pyc,sha256=rc8esb3UDLmwM6LtYDH2olHYWQtqpiuEUshI58XNW-U,22251
|
|
12
|
+
sagellm_core/pd_executor.pyc,sha256=FqwEBT_ooJzJUpFoVr9_aoJmVK1gXVZhhkoZEHABRSQ,17970
|
|
13
|
+
sagellm_core/plugins.pyc,sha256=g6eke_yH65_7Bu6yfT7v19cZ7gjIAWbkyG7gv1JSXy0,2617
|
|
14
|
+
sagellm_core/py.typed,sha256=ixa8YukDZ3kLo0WsFJRGohLMyHzbMur1ALmmASML2cs,64
|
|
15
|
+
sagellm_core/runner.pyc,sha256=AqOu9y2avlD4SNb89YBGV_WgNdC2CTjJ8WFDEtrpdkM,14180
|
|
16
|
+
sagellm_core/runtime.pyc,sha256=RFPMN7Vu9M5O1ZYgFQ9REz4wkMFznRPVPt2ra3u2vns,10018
|
|
17
|
+
sagellm_core/workload.pyc,sha256=G9yMm2L4ZyZpGsCxuEOQ83t78Y_lCQmbg5p6OHl5rn0,4127
|
|
18
|
+
sagellm_core/__pycache__/__init__.cpython-311.pyc,sha256=nBqzd2ITLq8p3n22-0ntfsziD1qImxCYVFUGYIuay1U,4444
|
|
19
|
+
sagellm_core/__pycache__/base_engine.cpython-311.pyc,sha256=XiynMTxG3srYsUwzIoEbwEJOmpjBBB6UNEoAXDxDgWM,9563
|
|
20
|
+
sagellm_core/__pycache__/config.cpython-311.pyc,sha256=6gUdzUs3m31zGQVa2OzlPcPbX1it2Ziag0cQbCaXvIU,8483
|
|
21
|
+
sagellm_core/__pycache__/demo.cpython-311.pyc,sha256=PN6yZ3PgPWjiZ4XqpqRT-s5YYWitMwR_muYFvPmXdWA,6058
|
|
22
|
+
sagellm_core/__pycache__/engine.cpython-311.pyc,sha256=B6ynwwETTD_kjZ2CvUao_QRjM0EluEWg60Be-nRiVeE,12288
|
|
23
|
+
sagellm_core/__pycache__/engine_factory.cpython-311.pyc,sha256=Qte3LXKSpcOWGhu-Hns7xPvTdkyYry3v_EeYQbfVp4w,14916
|
|
24
|
+
sagellm_core/__pycache__/engine_server.cpython-311.pyc,sha256=Hg9T6xwjqulMcyB8MaIdbP9S9ci7CwDmYZ9E9zPS4mc,19150
|
|
25
|
+
sagellm_core/__pycache__/factory.cpython-311.pyc,sha256=R4MbGyreD3U2iWmwQ0r14R347GAY9RM_I1xOiWovozo,2767
|
|
26
|
+
sagellm_core/__pycache__/health.cpython-311.pyc,sha256=Z2mo-4iGINZp0w9AmCJomhHGRSzvbwxCfmdMAjg5vc4,714
|
|
27
|
+
sagellm_core/__pycache__/llm_engine.cpython-311.pyc,sha256=nBLF5BI0rRHRl_PE_1R_XPI-d5tjvczQ3tMi5k2eB0Y,22281
|
|
28
|
+
sagellm_core/__pycache__/mock_engine.cpython-311.pyc,sha256=zCHNncwIzEgqZ0lc7kiWkS-zOHf7CQ73UhCXztR6mFM,581
|
|
29
|
+
sagellm_core/__pycache__/pd_executor.cpython-311.pyc,sha256=zzZ2Cbitv9BsWrqWMqKxE2DbtKkkW3UFacP4WaK2FZA,18000
|
|
30
|
+
sagellm_core/__pycache__/plugins.cpython-311.pyc,sha256=gaoaHApylGSrmMxXVwFs2v4idiZJnisttawlXTbXBWY,2647
|
|
31
|
+
sagellm_core/__pycache__/runner.cpython-311.pyc,sha256=9BLsarR-ouiHIpPpKmJYlo51zBE7PcA5ePvD4PGjbak,14210
|
|
32
|
+
sagellm_core/__pycache__/runtime.cpython-311.pyc,sha256=STMbXptWWqislgsqEk4kIXmUYXdFzGDihSeXGWLy_6k,10048
|
|
33
|
+
sagellm_core/__pycache__/workload.cpython-311.pyc,sha256=Y01UUCDE85D8GY5kF_UsHyNCBbMkmsoB3vOUJV-vulA,4157
|
|
34
|
+
sagellm_core/distributed/__init__.py,sha256=8WTrKkZmYGrV7pj3L083496LskHAv3BOULEIZflgBTc,328
|
|
35
|
+
sagellm_core/distributed/__init__.pyc,sha256=Ur1qc3njwxEPY7PweXoIHTgo1zMblp7HUX84hT0tRAo,498
|
|
36
|
+
sagellm_core/distributed/strategies.pyc,sha256=caK8josx0icKJ0vDwo1UQFeVLzbZf4KB6iUq30ClVnk,6061
|
|
37
|
+
sagellm_core/distributed/__pycache__/__init__.cpython-311.pyc,sha256=4Fv2xNuHYN0Sf98_c9KfU0SbafKhWANL8s4YeTLxiVk,528
|
|
38
|
+
sagellm_core/distributed/__pycache__/strategies.cpython-311.pyc,sha256=MOa7p3j41XvmkJ2tr4OnuHID6gZm6vF6tDWRPe3Uzb8,6091
|
|
39
|
+
sagellm_core/engine_core/__init__.py,sha256=wG6ONHCRWk4ScU6WwW8f-qL3xITF5OcqbxBHGKAAwd4,314
|
|
40
|
+
sagellm_core/engine_core/__init__.pyc,sha256=tnc2J-6ys0KEntuHngGj2vMa_dr6O2NMwkNB2X_BhLQ,502
|
|
41
|
+
sagellm_core/engine_core/engine_core.pyc,sha256=IH5gVbWk3H4Bd2FHqOdb2NVwqhYWcO3qzPKFmLaSfXE,8790
|
|
42
|
+
sagellm_core/engine_core/__pycache__/__init__.cpython-311.pyc,sha256=z_4RxeRts-lRdyGW8z7GVqpcJUNnqK79ep-8GozJGgs,532
|
|
43
|
+
sagellm_core/engine_core/__pycache__/engine_core.cpython-311.pyc,sha256=GnFylOovdrLKBY9wAYymsr_BHMfHpezgEHYTCr9cVI8,8820
|
|
44
|
+
sagellm_core/engine_core/scheduler/__init__.py,sha256=V-XPvuplqlFusYZE7ta2NjLivYiN1ALDLkH7H6BbegU,448
|
|
45
|
+
sagellm_core/engine_core/scheduler/__init__.pyc,sha256=OGnLLe6IHOa0LNy0cO2ojDE8e_vRNsSlEq3za6r5SLU,614
|
|
46
|
+
sagellm_core/engine_core/scheduler/scheduler.pyc,sha256=hrlGzabvyv1mXbB2H17aVIrAPkAdP9GpRekXmiALzzI,8974
|
|
47
|
+
sagellm_core/engine_core/scheduler/__pycache__/__init__.cpython-311.pyc,sha256=no6FHo-7naNZChZLX4Bx6EV5fThrKUXPKjzeT6e7HkM,644
|
|
48
|
+
sagellm_core/engine_core/scheduler/__pycache__/scheduler.cpython-311.pyc,sha256=C_MblVBp8AwnDeE_35J9OS3IMC1HZhZ3-LRp5J6G89o,9004
|
|
49
|
+
sagellm_core/engines/__init__.py,sha256=eRxLCXebcl0DqhMh4J3yPcpmoSMtZzjWjCXNku0hcek,865
|
|
50
|
+
sagellm_core/engines/__init__.pyc,sha256=sOw8q-14ii7rYPyRgXMbyYmDArYQ7fXclqZ86eWhmkE,934
|
|
51
|
+
sagellm_core/engines/embedding.pyc,sha256=bdCOYYasewAb2q5aeAksOEFKGG7VfBjNGt4Ms1_uHcc,12189
|
|
52
|
+
sagellm_core/engines/__pycache__/__init__.cpython-311.pyc,sha256=lDgW-nA9PGC1xxQOYI-2xNMlj5mi5DoB7Re26E334_k,964
|
|
53
|
+
sagellm_core/engines/__pycache__/ascend.cpython-311.pyc,sha256=A3F0gZAhTeUDM7Fpur1skw1DaWBX-W88oXCs9L-0qm0,34653
|
|
54
|
+
sagellm_core/engines/__pycache__/cpu.cpython-311.pyc,sha256=lxuj9LGXjsg7WwB0GXQUsYBphevFENmeLAjKYiswNxY,27863
|
|
55
|
+
sagellm_core/engines/__pycache__/embedding.cpython-311.pyc,sha256=OOCYxcrrEQjzLw7pHCkw_jIYv3t5ihYZ-e9DbEbiRrU,12219
|
|
56
|
+
sagellm_core/engines/__pycache__/hf_cuda.cpython-311.pyc,sha256=WcWhj1ktaGONooxWJZKRv-os5Hw-InlfV2ZfZ6myrLY,61133
|
|
57
|
+
sagellm_core/engines/__pycache__/mock.cpython-311.pyc,sha256=1g9YyKfo9yv-6VTjzzfacv-9ZICHjsVXjyJ_IajRnQo,14641
|
|
58
|
+
sagellm_core/engines/__pycache__/pytorch.cpython-311.pyc,sha256=MRSQN2ZBJmuIxFIA_yflaxarFhslfT0SG0Lw7-EFyDQ,18438
|
|
59
|
+
sagellm_core/engines/__pycache__/pytorch_engine.cpython-311.pyc,sha256=dLBcBODxEqgmH25nCBPWhwy3uu3-dqzyzOwQy-izN7A,11675
|
|
60
|
+
sagellm_core/executor/__init__.py,sha256=Acm1uxVigM823XnTaJ2UC_mSgTIlvtHb6uX076tMSN0,507
|
|
61
|
+
sagellm_core/executor/__init__.pyc,sha256=shUq-xmHO7s6GZGQgUH5XCTJaowF_Pu4KfhcKAbO8KU,705
|
|
62
|
+
sagellm_core/executor/executor_base.pyc,sha256=8ayzIqog994GKozxRQfHHpm5di9HTlcQ-QSAaQit08w,4759
|
|
63
|
+
sagellm_core/executor/uniproc_executor.pyc,sha256=5JQOhTzMFK5onf19qbnzNXV1S_j3xw4p2Vgd8VWmhEY,4940
|
|
64
|
+
sagellm_core/executor/__pycache__/__init__.cpython-311.pyc,sha256=QLugLvDhoO3nbdW-f_3g-HQXKIK437_QkgcLcPdQabA,735
|
|
65
|
+
sagellm_core/executor/__pycache__/executor_base.cpython-311.pyc,sha256=soUj35YdORDrUQG99uxMBlL0MDXwowPAPRR38TtQ7N4,4789
|
|
66
|
+
sagellm_core/executor/__pycache__/uniproc_executor.cpython-311.pyc,sha256=4UEx0RnvX1AoVHvv22OasO0o70KApSNopdSih5l9iDk,4970
|
|
67
|
+
sagellm_core/inputs/__init__.py,sha256=t_DP0n7Cx-L9BiniDcKHBcg6IK6EVCs8J7GRbAwPbLk,301
|
|
68
|
+
sagellm_core/inputs/__init__.pyc,sha256=HcZwXTd9lUm2sOjAz6EP9vLXq9CyQap9GJyRHxeTJMc,513
|
|
69
|
+
sagellm_core/inputs/processor.pyc,sha256=1V5pK5K7_fH6Il93uUoa6317SdiV_6zrZ75U-RFZ_BU,3641
|
|
70
|
+
sagellm_core/inputs/tokenizer_utils.pyc,sha256=6US7QanjlsJhNl5wy9jFQPt3gA8ik2Um3QvFO2_9-gk,2871
|
|
71
|
+
sagellm_core/inputs/__pycache__/__init__.cpython-311.pyc,sha256=1VPyuhLyj5pwRrAcZvoEVIzNiASV38linDfG3RsZw7w,543
|
|
72
|
+
sagellm_core/inputs/__pycache__/processor.cpython-311.pyc,sha256=-6LVrSIfXnca6BK3aji1hcAbk1yuB0jIjp1J9_WQ8y4,3671
|
|
73
|
+
sagellm_core/inputs/__pycache__/tokenizer_utils.cpython-311.pyc,sha256=sOa5f9-SDvPIbLnVgfrYyM4TP7pNuiNbpOJkaFQn7Ds,2901
|
|
74
|
+
sagellm_core/model/__init__.py,sha256=WKrGGLrz5yhK5R1-YapCiV8eBec7kH-NvBOEIm6VD90,329
|
|
75
|
+
sagellm_core/model/__init__.pyc,sha256=yqCreHdJZqQ32Mgg-syjtxuDSZcJXRX5vBYjPkjPopA,542
|
|
76
|
+
sagellm_core/model/model_loader.pyc,sha256=g3Xtz87w22DCdPVbV_eOkd7W_iH7E6Bbc6O23EAYMQI,5713
|
|
77
|
+
sagellm_core/model/weight_utils.pyc,sha256=QHflYnXneKpU4UHGb3RP51_Bx5UiHtYKVsaH_MfvcCg,1830
|
|
78
|
+
sagellm_core/model/__pycache__/__init__.cpython-311.pyc,sha256=o1Raqg_7W-OQzqIDEB_tA-ekmbbIZF9b9TNRM_sRs-k,572
|
|
79
|
+
sagellm_core/model/__pycache__/model_loader.cpython-311.pyc,sha256=fPtDeWbF0fbc0lFxcoCI3RFC84wNjlASypdEniuYMMU,5743
|
|
80
|
+
sagellm_core/model/__pycache__/weight_utils.cpython-311.pyc,sha256=dBd4nMqBX9fyHMFjLoL45-oQ3lQfP1eqeZm47jgz6-M,1860
|
|
81
|
+
sagellm_core/observability/__init__.py,sha256=lT3qt-69vwH3KlxqN0H_hjknmaXl8Yx2Q3PrWifNvd8,378
|
|
82
|
+
sagellm_core/observability/__init__.pyc,sha256=1K3GCd5Gi7770Bx6tLW665_C6KNK3-j447miFkM8GVg,608
|
|
83
|
+
sagellm_core/observability/logger.pyc,sha256=Ng9CYnn9Xr-rJUhethU6HN3oSy5lalmupOtbxk6-g84,1910
|
|
84
|
+
sagellm_core/observability/metrics.pyc,sha256=DZ-bXLTQfnt54BN17PRFtdMZkvIBB4btoEmBzzYmSng,6314
|
|
85
|
+
sagellm_core/observability/__pycache__/__init__.cpython-311.pyc,sha256=xgniiPnO5ImAeCfmvTW6w78ezs8c7aB6vW0XHWn8o7c,638
|
|
86
|
+
sagellm_core/observability/__pycache__/logger.cpython-311.pyc,sha256=7mFsPuy8eHu1nRKi4M5_d4iRSPEPf9fa_jqM8aEDZFY,1940
|
|
87
|
+
sagellm_core/observability/__pycache__/metrics.cpython-311.pyc,sha256=FdoJItSrs_g5_6GTJk7vqjTrpoqc7UthuAuq2m40uw0,6344
|
|
88
|
+
sagellm_core/sampling/__init__.py,sha256=H8b1UnwiYfMP2rODrjDJXvoI8fRTyfCoDImHRstIDGg,330
|
|
89
|
+
sagellm_core/sampling/__init__.pyc,sha256=36Xya9y4YCKQQx8a7EzKJ6htImljpW_dE18TLZMX_OU,556
|
|
90
|
+
sagellm_core/sampling/params.pyc,sha256=LMPOgZL2nkpDAYHC5jG80uMD9vHkBZXM4N3eGFt9PIw,3391
|
|
91
|
+
sagellm_core/sampling/sampler.pyc,sha256=g2tr8FU-71rMvlpWRNNdiY-d6oTAzJ5v-NfRMFpsLYg,6721
|
|
92
|
+
sagellm_core/sampling/__pycache__/__init__.cpython-311.pyc,sha256=aHSszIvPeMHCRRbWg7CSbuXNI5JVBJ3W2tU5Y254wUA,586
|
|
93
|
+
sagellm_core/sampling/__pycache__/params.cpython-311.pyc,sha256=COqJpqpVd1VUFsrZVkblWhTwkBp7A-tmeOh5saIIDoU,3421
|
|
94
|
+
sagellm_core/sampling/__pycache__/sampler.cpython-311.pyc,sha256=-24n6bhHzS3paTzCIrfpbX50XL7mtB4NehbD4jn_ltk,6751
|
|
95
|
+
sagellm_core/worker/__init__.py,sha256=isHAvgRYHX_HBRQsJtB72ziV4xeSDyjR1xFH3h_HEHQ,258
|
|
96
|
+
sagellm_core/worker/__init__.pyc,sha256=mf0kfM381gC3ghpXYQfUOsT9OJGBXy4kdz6Q__qYnIw,445
|
|
97
|
+
sagellm_core/worker/worker.pyc,sha256=yPn1TqZdy7fvPQ_EuImwW5R9-MCnCcCxWwjHeqqf-5k,4772
|
|
98
|
+
sagellm_core/worker/__pycache__/__init__.cpython-311.pyc,sha256=zAe99l1KXH7X1sFabMubGLK0psDOyEv4zRMTF23-fTg,475
|
|
99
|
+
sagellm_core/worker/__pycache__/worker.cpython-311.pyc,sha256=lotaWHeDvpNoyrOCWHKvGjzDdAVBKGwj0h1z4R9frdI,4802
|
|
100
|
+
sagellm_core/worker/model_runner/__init__.py,sha256=w4cq2JoxfHYNUQI3jXO-XhFCoYxNg6YHkioK-S3Ayro,246
|
|
101
|
+
sagellm_core/worker/model_runner/__init__.pyc,sha256=QhC7F-6BIerf9GdwkEPegvF5lQfiepVzhUkMuEstpD8,447
|
|
102
|
+
sagellm_core/worker/model_runner/model_runner.pyc,sha256=wTv8K-F4qtwlQCXpLr1LGIYD-Gae0rzMayp84U5BJWY,16106
|
|
103
|
+
sagellm_core/worker/model_runner/__pycache__/__init__.cpython-311.pyc,sha256=ykIwBnnZYUKPe0foKGwG9rovfzNuSEO6f-4t5DLpMeY,477
|
|
104
|
+
sagellm_core/worker/model_runner/__pycache__/model_runner.cpython-311.pyc,sha256=CLEz8NGJRepPauN_YjT5O4Wrb_FTQ_C8rS1QPbew5TE,16136
|
|
105
|
+
isagellm_core-0.4.0.0.dist-info/METADATA,sha256=T3grOEyLR-G7-hQiCx4U3xGVoVueLAVeAdIC233fxkw,9168
|
|
106
|
+
isagellm_core-0.4.0.0.dist-info/WHEEL,sha256=JNWh1Fm1UdwIQV075glCn4MVuCRs0sotJIq-J6rbxCU,109
|
|
107
|
+
isagellm_core-0.4.0.0.dist-info/entry_points.txt,sha256=w0kgbHe7jVNOuy785wFMEBwqoZUnfU24HCImsQII3y0,64
|
|
108
|
+
isagellm_core-0.4.0.0.dist-info/top_level.txt,sha256=wcgdWrvkaoYYh_dWSFI5Toi8PZsHutVqfhTB2tb0K6g,13
|
|
109
|
+
isagellm_core-0.4.0.0.dist-info/RECORD,,
|
sagellm_core/__init__.py
CHANGED
|
@@ -1,19 +1,52 @@
|
|
|
1
1
|
"""sageLLM Core 运行时。
|
|
2
2
|
|
|
3
3
|
本包提供 sageLLM 的核心运行时组件:
|
|
4
|
+
- LLMEngine: 统一的硬件无关推理引擎(vLLM v1 风格)
|
|
5
|
+
- EngineCore: 协调 Scheduler 和 Executor
|
|
6
|
+
- Scheduler: Continuous Batching 调度器
|
|
7
|
+
- Executor: 管理 Worker 执行
|
|
8
|
+
- Worker/ModelRunner: 模型前向传播
|
|
4
9
|
- 配置 schema 与校验
|
|
5
|
-
- Engine 抽象接口与实现
|
|
6
10
|
- Engine 工厂函数
|
|
7
11
|
- 插件系统
|
|
8
12
|
- Demo Runner
|
|
9
13
|
- 分布式 Runtime(PD 分离 MVP)
|
|
10
14
|
- PD 分离执行器
|
|
15
|
+
|
|
16
|
+
Architecture (vLLM v1 style):
|
|
17
|
+
LLMEngine (hardware-agnostic)
|
|
18
|
+
├── EngineCore (coordinates Scheduler and Executor)
|
|
19
|
+
│ └── Scheduler (Continuous Batching)
|
|
20
|
+
└── Executor
|
|
21
|
+
└── Worker
|
|
22
|
+
└── ModelRunner
|
|
23
|
+
├── uses BackendProvider (from sagellm-backend)
|
|
24
|
+
└── uses CommBackend (from sagellm-comm)
|
|
11
25
|
"""
|
|
12
26
|
|
|
13
27
|
from __future__ import annotations
|
|
14
28
|
|
|
15
|
-
__version__ = "0.
|
|
29
|
+
__version__ = "0.4.0.0"
|
|
30
|
+
|
|
31
|
+
# ============================================================================
|
|
32
|
+
# New Architecture (vLLM v1 style) - Hardware Agnostic
|
|
33
|
+
# ============================================================================
|
|
34
|
+
from sagellm_core.llm_engine import LLMEngine, LLMEngineConfig
|
|
35
|
+
from sagellm_core.engine_core import EngineCore
|
|
36
|
+
from sagellm_core.engine_core.engine_core import EngineCoreConfig
|
|
37
|
+
from sagellm_core.engine_core.scheduler import (
|
|
38
|
+
ContinuousBatchingScheduler,
|
|
39
|
+
SchedulerConfig,
|
|
40
|
+
SchedulerOutput,
|
|
41
|
+
)
|
|
42
|
+
from sagellm_core.executor import ExecutorBase, UniprocExecutor
|
|
43
|
+
from sagellm_core.executor.executor_base import ExecutorConfig
|
|
44
|
+
from sagellm_core.worker import Worker
|
|
45
|
+
from sagellm_core.worker.model_runner import ModelRunner
|
|
16
46
|
|
|
47
|
+
# ============================================================================
|
|
48
|
+
# Legacy Architecture (still supported, being refactored)
|
|
49
|
+
# ============================================================================
|
|
17
50
|
from sagellm_core.engine import BaseEngine, EngineInstanceConfig
|
|
18
51
|
from sagellm_core.config import (
|
|
19
52
|
BackendConfig,
|
|
@@ -26,15 +59,11 @@ from sagellm_core.config import (
|
|
|
26
59
|
)
|
|
27
60
|
from sagellm_core.demo import main as demo_main
|
|
28
61
|
from sagellm_core.engine_factory import EngineFactory
|
|
62
|
+
|
|
63
|
+
# Only EmbeddingEngine remains from legacy engines
|
|
29
64
|
from sagellm_core.engines import (
|
|
30
|
-
AscendEngine,
|
|
31
|
-
AscendEngineConfig,
|
|
32
|
-
CPUEngine,
|
|
33
65
|
EmbeddingEngine,
|
|
34
66
|
EmbeddingEngineConfig,
|
|
35
|
-
HFCudaEngine,
|
|
36
|
-
HFCudaEngineConfig,
|
|
37
|
-
create_ascend_engine,
|
|
38
67
|
)
|
|
39
68
|
from sagellm_core.factory import create_backend, create_engine
|
|
40
69
|
from sagellm_core.health import HealthStatus
|
|
@@ -49,55 +78,80 @@ from sagellm_core.pd_executor import PDExecutionContext, PDSeparatedExecutor
|
|
|
49
78
|
from sagellm_core.engine_server import app as engine_server_app
|
|
50
79
|
from sagellm_core.engine_server import main as serve_engine
|
|
51
80
|
|
|
81
|
+
# ============================================================================
|
|
82
|
+
# Phase 2: New Modules (P2 Priority)
|
|
83
|
+
# ============================================================================
|
|
84
|
+
# Model loading utilities
|
|
85
|
+
from sagellm_core.model import ModelLoader, load_model
|
|
86
|
+
|
|
87
|
+
# Input processing
|
|
88
|
+
from sagellm_core.inputs import InputProcessor, ProcessedInput, TokenizerWrapper
|
|
89
|
+
|
|
90
|
+
# Sampling utilities
|
|
91
|
+
from sagellm_core.sampling import SamplingParams, Sampler, GreedySampler
|
|
92
|
+
|
|
93
|
+
# Distributed strategies
|
|
94
|
+
from sagellm_core.distributed import DistributedStrategy, TensorParallelStrategy
|
|
95
|
+
|
|
96
|
+
# Observability
|
|
97
|
+
from sagellm_core.observability import MetricsCollector, EngineMetrics, setup_logger
|
|
98
|
+
|
|
52
99
|
# PyTorch engine (optional, loaded lazily)
|
|
53
100
|
PyTorchEngine = None
|
|
54
|
-
PyTorchEngineConfig = None
|
|
55
|
-
PyTorchEngineInstanceConfig = None
|
|
56
101
|
create_pytorch_engine = None
|
|
57
102
|
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
#
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
try:
|
|
85
|
-
EngineFactory.register(EmbeddingEngine)
|
|
86
|
-
except Exception:
|
|
87
|
-
pass # sentence-transformers not available
|
|
88
|
-
|
|
89
|
-
try:
|
|
90
|
-
if PyTorchEngine is not None:
|
|
91
|
-
EngineFactory.register(PyTorchEngine)
|
|
92
|
-
except Exception:
|
|
93
|
-
pass # torch or transformers not available
|
|
94
|
-
|
|
95
|
-
__version__ = "0.3.0.9"
|
|
103
|
+
# Optional PyTorchEngine import (deprecated, use LLMEngine)
|
|
104
|
+
# try:
|
|
105
|
+
# from sagellm_core.engines.pytorch_engine import (
|
|
106
|
+
# PyTorchEngine,
|
|
107
|
+
# create_pytorch_engine,
|
|
108
|
+
# )
|
|
109
|
+
# except ImportError:
|
|
110
|
+
# pass # torch or transformers not available
|
|
111
|
+
|
|
112
|
+
# =========================================================================
|
|
113
|
+
# DEPRECATED: Old hardware-specific engines have been removed
|
|
114
|
+
# Use LLMEngine instead:
|
|
115
|
+
# from sagellm_core import LLMEngine, LLMEngineConfig
|
|
116
|
+
# engine = LLMEngine(LLMEngineConfig(model="..."))
|
|
117
|
+
#
|
|
118
|
+
# The following engines no longer exist:
|
|
119
|
+
# - CPUEngine → use LLMEngine(backend="cpu")
|
|
120
|
+
# - HFCudaEngine → use LLMEngine(backend="cuda")
|
|
121
|
+
# - AscendEngine → use LLMEngine(backend="ascend")
|
|
122
|
+
# - PyTorchEngine → use LLMEngine
|
|
123
|
+
#
|
|
124
|
+
# EmbeddingEngine is still available for embedding-only use cases.
|
|
125
|
+
# =========================================================================
|
|
126
|
+
|
|
127
|
+
# Version is defined at the top of the file (line 29)
|
|
96
128
|
|
|
97
129
|
__all__ = [
|
|
98
130
|
# Version
|
|
99
131
|
"__version__",
|
|
132
|
+
# =========================================================================
|
|
133
|
+
# New Architecture (vLLM v1 style) - RECOMMENDED
|
|
134
|
+
# =========================================================================
|
|
135
|
+
# LLMEngine - Unified hardware-agnostic engine
|
|
136
|
+
"LLMEngine",
|
|
137
|
+
"LLMEngineConfig",
|
|
138
|
+
# EngineCore - Coordinates Scheduler and Executor
|
|
139
|
+
"EngineCore",
|
|
140
|
+
"EngineCoreConfig",
|
|
141
|
+
# Scheduler - Continuous Batching
|
|
142
|
+
"ContinuousBatchingScheduler",
|
|
143
|
+
"SchedulerConfig",
|
|
144
|
+
"SchedulerOutput",
|
|
145
|
+
# Executor - Manages Workers
|
|
146
|
+
"ExecutorBase",
|
|
147
|
+
"ExecutorConfig",
|
|
148
|
+
"UniprocExecutor",
|
|
149
|
+
# Worker - Model execution
|
|
150
|
+
"Worker",
|
|
151
|
+
"ModelRunner",
|
|
152
|
+
# =========================================================================
|
|
100
153
|
# Configuration (for YAML/config files)
|
|
154
|
+
# =========================================================================
|
|
101
155
|
"BackendConfig",
|
|
102
156
|
"DemoConfig",
|
|
103
157
|
"EngineConfig",
|
|
@@ -110,18 +164,10 @@ __all__ = [
|
|
|
110
164
|
"EngineInstanceConfig", # For runtime engine instantiation
|
|
111
165
|
"HealthStatus",
|
|
112
166
|
# Engine implementations
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
"create_ascend_engine",
|
|
116
|
-
"CPUEngine",
|
|
117
|
-
"HFCudaEngine",
|
|
118
|
-
"HFCudaEngineConfig",
|
|
167
|
+
# DEPRECATED: Old engines removed, use LLMEngine instead
|
|
168
|
+
# Only EmbeddingEngine remains for embedding-only use cases
|
|
119
169
|
"EmbeddingEngine",
|
|
120
170
|
"EmbeddingEngineConfig",
|
|
121
|
-
"PyTorchEngine",
|
|
122
|
-
"PyTorchEngineConfig",
|
|
123
|
-
"PyTorchEngineInstanceConfig",
|
|
124
|
-
"create_pytorch_engine",
|
|
125
171
|
# Factory functions
|
|
126
172
|
"create_backend",
|
|
127
173
|
"create_engine",
|
sagellm_core/__init__.pyc
CHANGED
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""Distributed inference strategies for sageLLM."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from sagellm_core.distributed.strategies import (
|
|
6
|
+
DistributedStrategy,
|
|
7
|
+
TensorParallelStrategy,
|
|
8
|
+
PipelineParallelStrategy,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"DistributedStrategy",
|
|
13
|
+
"TensorParallelStrategy",
|
|
14
|
+
"PipelineParallelStrategy",
|
|
15
|
+
]
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
"""EngineCore - Coordinates Scheduler and Executor.
|
|
2
|
+
|
|
3
|
+
The EngineCore is responsible for:
|
|
4
|
+
1. Managing request queues
|
|
5
|
+
2. Coordinating with Scheduler for batch formation
|
|
6
|
+
3. Dispatching batches to Executor
|
|
7
|
+
4. Collecting results
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from sagellm_core.engine_core.engine_core import EngineCore
|
|
11
|
+
|
|
12
|
+
__all__ = ["EngineCore"]
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""Scheduler - Request scheduling for Continuous Batching.
|
|
2
|
+
|
|
3
|
+
The Scheduler is responsible for:
|
|
4
|
+
1. Selecting which requests to run in the next step
|
|
5
|
+
2. Managing prefill vs decode scheduling
|
|
6
|
+
3. Preemption decisions
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from sagellm_core.engine_core.scheduler.scheduler import (
|
|
10
|
+
ContinuousBatchingScheduler,
|
|
11
|
+
SchedulerConfig,
|
|
12
|
+
SchedulerOutput,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
__all__ = [
|
|
16
|
+
"ContinuousBatchingScheduler",
|
|
17
|
+
"SchedulerConfig",
|
|
18
|
+
"SchedulerOutput",
|
|
19
|
+
]
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
sagellm_core/engine_server.pyc
CHANGED
|
Binary file
|
sagellm_core/engines/__init__.py
CHANGED
|
@@ -1,45 +1,29 @@
|
|
|
1
1
|
"""Engine implementations for sageLLM Core.
|
|
2
2
|
|
|
3
|
-
This module
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
3
|
+
DEPRECATED: This module contains legacy engine implementations.
|
|
4
|
+
Use LLMEngine from sagellm_core instead:
|
|
5
|
+
|
|
6
|
+
from sagellm_core import LLMEngine, LLMEngineConfig
|
|
7
|
+
|
|
8
|
+
config = LLMEngineConfig(
|
|
9
|
+
model_path="Qwen/Qwen2-7B",
|
|
10
|
+
backend_type="cuda", # or "cpu", "ascend", "auto"
|
|
11
|
+
)
|
|
12
|
+
engine = LLMEngine(config)
|
|
13
|
+
await engine.start()
|
|
14
|
+
response = await engine.generate("Hello!")
|
|
15
|
+
|
|
16
|
+
Remaining engines:
|
|
17
|
+
- EmbeddingEngine: Embedding model inference (not yet migrated to LLMEngine)
|
|
9
18
|
"""
|
|
10
19
|
|
|
11
20
|
from __future__ import annotations
|
|
12
21
|
|
|
13
|
-
|
|
14
|
-
from sagellm_core.engines.cpu import CPUEngine, create_cpu_engine
|
|
22
|
+
# Only EmbeddingEngine remains - others have been migrated to LLMEngine
|
|
15
23
|
from sagellm_core.engines.embedding import EmbeddingEngine, EmbeddingEngineConfig
|
|
16
|
-
from sagellm_core.engines.hf_cuda import (
|
|
17
|
-
HFCudaEngine,
|
|
18
|
-
HFCudaEngineInstanceConfig,
|
|
19
|
-
create_hf_cuda_engine,
|
|
20
|
-
)
|
|
21
|
-
from sagellm_core.engines.pytorch_engine import PyTorchEngine, create_pytorch_engine
|
|
22
|
-
|
|
23
|
-
# Export aliases for backward compatibility
|
|
24
|
-
HFCudaEngineConfig = HFCudaEngineInstanceConfig
|
|
25
24
|
|
|
26
25
|
__all__ = [
|
|
27
|
-
#
|
|
28
|
-
"AscendEngine",
|
|
29
|
-
"AscendEngineConfig",
|
|
30
|
-
"create_ascend_engine",
|
|
31
|
-
# CPU engine
|
|
32
|
-
"CPUEngine",
|
|
33
|
-
"create_cpu_engine",
|
|
34
|
-
# CUDA engine
|
|
35
|
-
"HFCudaEngine",
|
|
36
|
-
"HFCudaEngineConfig",
|
|
37
|
-
"HFCudaEngineInstanceConfig",
|
|
38
|
-
"create_hf_cuda_engine",
|
|
39
|
-
# PyTorch engine (unified)
|
|
40
|
-
"PyTorchEngine",
|
|
41
|
-
"create_pytorch_engine",
|
|
42
|
-
# Embedding engine
|
|
26
|
+
# Embedding engine (still needed for embedding-only models)
|
|
43
27
|
"EmbeddingEngine",
|
|
44
28
|
"EmbeddingEngineConfig",
|
|
45
29
|
]
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"""Executor - Manages Workers for model execution.
|
|
2
|
+
|
|
3
|
+
Executors dispatch scheduled batches to Workers for execution.
|
|
4
|
+
Different executor types handle different parallelism patterns:
|
|
5
|
+
- UniprocExecutor: Single-process execution
|
|
6
|
+
- MultiprocessExecutor: Multi-process (future)
|
|
7
|
+
- RayExecutor: Ray-based distributed (future)
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from sagellm_core.executor.executor_base import ExecutorBase
|
|
11
|
+
from sagellm_core.executor.uniproc_executor import UniprocExecutor
|
|
12
|
+
|
|
13
|
+
__all__ = [
|
|
14
|
+
"ExecutorBase",
|
|
15
|
+
"UniprocExecutor",
|
|
16
|
+
]
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
"""Input processing module for sageLLM."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from sagellm_core.inputs.processor import InputProcessor, ProcessedInput
|
|
6
|
+
from sagellm_core.inputs.tokenizer_utils import TokenizerWrapper
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
"InputProcessor",
|
|
10
|
+
"ProcessedInput",
|
|
11
|
+
"TokenizerWrapper",
|
|
12
|
+
]
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""Model loading utilities for sageLLM."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from sagellm_core.model.model_loader import ModelLoader, load_model
|
|
6
|
+
from sagellm_core.model.weight_utils import WeightLoader, QuantizedWeightLoader
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
"ModelLoader",
|
|
10
|
+
"load_model",
|
|
11
|
+
"WeightLoader",
|
|
12
|
+
"QuantizedWeightLoader",
|
|
13
|
+
]
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"""Observability module for sageLLM.
|
|
2
|
+
|
|
3
|
+
Provides structured logging, metrics, and tracing.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
from sagellm_core.observability.metrics import MetricsCollector, EngineMetrics
|
|
9
|
+
from sagellm_core.observability.logger import setup_logger, get_logger
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"MetricsCollector",
|
|
13
|
+
"EngineMetrics",
|
|
14
|
+
"setup_logger",
|
|
15
|
+
"get_logger",
|
|
16
|
+
]
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
sagellm_core/pd_executor.pyc
CHANGED
|
Binary file
|
sagellm_core/runner.pyc
CHANGED
|
Binary file
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"""Sampling module for sageLLM."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from sagellm_core.sampling.params import SamplingParams
|
|
6
|
+
from sagellm_core.sampling.sampler import Sampler, GreedySampler, TopKSampler, TopPSampler
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
"SamplingParams",
|
|
10
|
+
"Sampler",
|
|
11
|
+
"GreedySampler",
|
|
12
|
+
"TopKSampler",
|
|
13
|
+
"TopPSampler",
|
|
14
|
+
]
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
"""Worker - Executes model forward passes.
|
|
2
|
+
|
|
3
|
+
Workers are responsible for:
|
|
4
|
+
1. Loading model weights
|
|
5
|
+
2. Running ModelRunner for forward passes
|
|
6
|
+
3. Managing GPU memory (via BackendProvider)
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from sagellm_core.worker.worker import Worker
|
|
10
|
+
|
|
11
|
+
__all__ = ["Worker"]
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -1,52 +0,0 @@
|
|
|
1
|
-
sagellm_core/__init__.py,sha256=SD78wlVmkv9ZhzcdebPEy2Qr6uWA4FGLf1UJPQOnICE,3691
|
|
2
|
-
sagellm_core/__init__.pyc,sha256=E0UCvZzZHJrdpIFw0xLRu873Vqe3XoaN57Nwu-sdroY,3525
|
|
3
|
-
sagellm_core/__main__.pyc,sha256=48Ej1ycqV-z87qawGOTNBKNgY3EcbffUOOXrDecOR3g,384
|
|
4
|
-
sagellm_core/config.pyc,sha256=cCMLvMVb_s3HzPYSfqlbRWodQ4Uby8U962l1vmunT5Y,8453
|
|
5
|
-
sagellm_core/demo.pyc,sha256=0hZkAJch6ETjSDsPq8XkDLNcd8qAys_hqOH1qi0xBzg,6028
|
|
6
|
-
sagellm_core/engine.pyc,sha256=o5yzFfLhpRx5lEYC2E75q9b04UqsDY63fC4nZdkHyeU,12258
|
|
7
|
-
sagellm_core/engine_factory.pyc,sha256=cNbEImM0MNcCgVeC0zViAT1AZkPAt2WARrVd1U__tS4,14886
|
|
8
|
-
sagellm_core/engine_server.pyc,sha256=oKPJNu_s5v_wBvhkoMfPfCU-bD_pVA9syLcHLfN-rs4,19389
|
|
9
|
-
sagellm_core/factory.pyc,sha256=B0s_JSOjVPwE95Oj2moP_vVA3vCrWsi2753UrwF8-eM,2737
|
|
10
|
-
sagellm_core/health.pyc,sha256=c4A60b1ZANbAqck59AV-kczK2b355EPi9JxoYHRlhl8,684
|
|
11
|
-
sagellm_core/pd_executor.pyc,sha256=1ytnDJi56sTSnZjAa7cRlN_1yQU4t7OJwXYgeUgjqA4,17784
|
|
12
|
-
sagellm_core/plugins.pyc,sha256=g6eke_yH65_7Bu6yfT7v19cZ7gjIAWbkyG7gv1JSXy0,2617
|
|
13
|
-
sagellm_core/py.typed,sha256=ixa8YukDZ3kLo0WsFJRGohLMyHzbMur1ALmmASML2cs,64
|
|
14
|
-
sagellm_core/runner.pyc,sha256=GK_t15IH2etN6flRqUui4iUPkD39zl3_rw1wnOSG5Jg,14238
|
|
15
|
-
sagellm_core/runtime.pyc,sha256=RFPMN7Vu9M5O1ZYgFQ9REz4wkMFznRPVPt2ra3u2vns,10018
|
|
16
|
-
sagellm_core/workload.pyc,sha256=G9yMm2L4ZyZpGsCxuEOQ83t78Y_lCQmbg5p6OHl5rn0,4127
|
|
17
|
-
sagellm_core/__pycache__/__init__.cpython-311.pyc,sha256=Ic7SZye-CxAZ_1ftXwr1XnyqmJWXcy9CCoJkRNg9ZGQ,3555
|
|
18
|
-
sagellm_core/__pycache__/base_engine.cpython-311.pyc,sha256=XiynMTxG3srYsUwzIoEbwEJOmpjBBB6UNEoAXDxDgWM,9563
|
|
19
|
-
sagellm_core/__pycache__/config.cpython-311.pyc,sha256=6gUdzUs3m31zGQVa2OzlPcPbX1it2Ziag0cQbCaXvIU,8483
|
|
20
|
-
sagellm_core/__pycache__/demo.cpython-311.pyc,sha256=PN6yZ3PgPWjiZ4XqpqRT-s5YYWitMwR_muYFvPmXdWA,6058
|
|
21
|
-
sagellm_core/__pycache__/engine.cpython-311.pyc,sha256=B6ynwwETTD_kjZ2CvUao_QRjM0EluEWg60Be-nRiVeE,12288
|
|
22
|
-
sagellm_core/__pycache__/engine_factory.cpython-311.pyc,sha256=Qte3LXKSpcOWGhu-Hns7xPvTdkyYry3v_EeYQbfVp4w,14916
|
|
23
|
-
sagellm_core/__pycache__/engine_server.cpython-311.pyc,sha256=m5YWclUbXt_IHuJLesJwEtFQF6zgTVrykD5T_9YznMk,19419
|
|
24
|
-
sagellm_core/__pycache__/factory.cpython-311.pyc,sha256=R4MbGyreD3U2iWmwQ0r14R347GAY9RM_I1xOiWovozo,2767
|
|
25
|
-
sagellm_core/__pycache__/health.cpython-311.pyc,sha256=Z2mo-4iGINZp0w9AmCJomhHGRSzvbwxCfmdMAjg5vc4,714
|
|
26
|
-
sagellm_core/__pycache__/mock_engine.cpython-311.pyc,sha256=zCHNncwIzEgqZ0lc7kiWkS-zOHf7CQ73UhCXztR6mFM,581
|
|
27
|
-
sagellm_core/__pycache__/pd_executor.cpython-311.pyc,sha256=XuMKNOZEYiupvhyQjgN1iM3p2Q0daXQvTNK745BPL5U,17814
|
|
28
|
-
sagellm_core/__pycache__/plugins.cpython-311.pyc,sha256=gaoaHApylGSrmMxXVwFs2v4idiZJnisttawlXTbXBWY,2647
|
|
29
|
-
sagellm_core/__pycache__/runner.cpython-311.pyc,sha256=krW5MagsiQcX5J9fljblEshfsQB_gAIAWR7S0uD-TzU,14268
|
|
30
|
-
sagellm_core/__pycache__/runtime.cpython-311.pyc,sha256=STMbXptWWqislgsqEk4kIXmUYXdFzGDihSeXGWLy_6k,10048
|
|
31
|
-
sagellm_core/__pycache__/workload.cpython-311.pyc,sha256=Y01UUCDE85D8GY5kF_UsHyNCBbMkmsoB3vOUJV-vulA,4157
|
|
32
|
-
sagellm_core/engines/__init__.py,sha256=dK5-xPVkGftvSUDg7B6rNuaMohibckfXrklaWFzz_Jo,1418
|
|
33
|
-
sagellm_core/engines/__init__.pyc,sha256=k_8HvoXGNmRVikV24Dxy0QIoLJ_XLP2aKwu7Dw3CIkQ,1438
|
|
34
|
-
sagellm_core/engines/ascend.pyc,sha256=T2qveWOhxV1AyVwJ5n_q8-TLg0uBJN_G6EZuVpA5rCU,17263
|
|
35
|
-
sagellm_core/engines/cpu.pyc,sha256=CLLNyuRLDIER5Zgny91-eGYUSMxmVVAw4E0rUMJDDsU,27833
|
|
36
|
-
sagellm_core/engines/embedding.pyc,sha256=Be4To-qpamFuGcyFZ1MKPk6XOf_HDBKnBGoIisxNemE,11476
|
|
37
|
-
sagellm_core/engines/hf_cuda.pyc,sha256=zo-Ht70Q3aba7L5xugpc6fwTIRrpkEFe9SeTy3exI-s,61103
|
|
38
|
-
sagellm_core/engines/pytorch.pyc,sha256=MLcymoDnwMduWoWVDZmYuG6UoQavNO-FjddPQ8ccccI,18408
|
|
39
|
-
sagellm_core/engines/pytorch_engine.pyc,sha256=fdzf-VN2hVRWzP8PCmGV-pg9qUIJWWqEH662CRmh5_0,11645
|
|
40
|
-
sagellm_core/engines/__pycache__/__init__.cpython-311.pyc,sha256=dI0jrBb6-rlnTvVQZojWA2n-lUhE9j0aGn4cz3SxmqY,1468
|
|
41
|
-
sagellm_core/engines/__pycache__/ascend.cpython-311.pyc,sha256=UvdVYqCptZqrPhyrbqK2dxnVa9NY-HH_vsXffsblzC8,17293
|
|
42
|
-
sagellm_core/engines/__pycache__/cpu.cpython-311.pyc,sha256=lxuj9LGXjsg7WwB0GXQUsYBphevFENmeLAjKYiswNxY,27863
|
|
43
|
-
sagellm_core/engines/__pycache__/embedding.cpython-311.pyc,sha256=ZNqFpuCsS4Wp0xg7yV3GBbaJ4G0saqGNaKMdivxEBkA,11506
|
|
44
|
-
sagellm_core/engines/__pycache__/hf_cuda.cpython-311.pyc,sha256=WcWhj1ktaGONooxWJZKRv-os5Hw-InlfV2ZfZ6myrLY,61133
|
|
45
|
-
sagellm_core/engines/__pycache__/mock.cpython-311.pyc,sha256=1g9YyKfo9yv-6VTjzzfacv-9ZICHjsVXjyJ_IajRnQo,14641
|
|
46
|
-
sagellm_core/engines/__pycache__/pytorch.cpython-311.pyc,sha256=MRSQN2ZBJmuIxFIA_yflaxarFhslfT0SG0Lw7-EFyDQ,18438
|
|
47
|
-
sagellm_core/engines/__pycache__/pytorch_engine.cpython-311.pyc,sha256=dLBcBODxEqgmH25nCBPWhwy3uu3-dqzyzOwQy-izN7A,11675
|
|
48
|
-
isagellm_core-0.3.0.10.dist-info/METADATA,sha256=xTzEVbHCbeZr5S3gTu6lTvDy9QR_1I34DiU2iWvS2-0,9254
|
|
49
|
-
isagellm_core-0.3.0.10.dist-info/WHEEL,sha256=JNWh1Fm1UdwIQV075glCn4MVuCRs0sotJIq-J6rbxCU,109
|
|
50
|
-
isagellm_core-0.3.0.10.dist-info/entry_points.txt,sha256=QWQd0kFD5erCgpS6DqfLpgkbD03vLf1ouojaUXHGX8w,312
|
|
51
|
-
isagellm_core-0.3.0.10.dist-info/top_level.txt,sha256=wcgdWrvkaoYYh_dWSFI5Toi8PZsHutVqfhTB2tb0K6g,13
|
|
52
|
-
isagellm_core-0.3.0.10.dist-info/RECORD,,
|
|
@@ -1,8 +0,0 @@
|
|
|
1
|
-
[console_scripts]
|
|
2
|
-
sage-engine = sagellm_core.engine_server:main
|
|
3
|
-
|
|
4
|
-
[sagellm.engines]
|
|
5
|
-
ascend = sagellm_core.engines.ascend:create_ascend_engine
|
|
6
|
-
cpu = sagellm_core.engines.cpu:create_cpu_engine
|
|
7
|
-
hf-cuda = sagellm_core.engines.hf_cuda:create_hf_cuda_engine
|
|
8
|
-
pytorch = sagellm_core.engines.pytorch:create_pytorch_engine
|
sagellm_core/engines/ascend.pyc
DELETED
|
Binary file
|
sagellm_core/engines/cpu.pyc
DELETED
|
Binary file
|
sagellm_core/engines/hf_cuda.pyc
DELETED
|
Binary file
|
sagellm_core/engines/pytorch.pyc
DELETED
|
Binary file
|
|
Binary file
|
|
File without changes
|
|
File without changes
|