isagellm-core 0.3.0.9__tar.gz → 0.4.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (149) hide show
  1. {isagellm_core-0.3.0.9/src/isagellm_core.egg-info → isagellm_core-0.4.0.0}/PKG-INFO +11 -12
  2. {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/README.md +6 -7
  3. {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/pyproject.toml +17 -10
  4. {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0/src/isagellm_core.egg-info}/PKG-INFO +11 -12
  5. isagellm_core-0.4.0.0/src/isagellm_core.egg-info/SOURCES.txt +129 -0
  6. isagellm_core-0.4.0.0/src/isagellm_core.egg-info/entry_points.txt +2 -0
  7. {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/isagellm_core.egg-info/requires.txt +4 -4
  8. isagellm_core-0.4.0.0/src/sagellm_core/__init__.py +192 -0
  9. isagellm_core-0.4.0.0/src/sagellm_core/__init__.pyc +0 -0
  10. isagellm_core-0.4.0.0/src/sagellm_core/__pycache__/__init__.cpython-311.pyc +0 -0
  11. {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/__pycache__/engine_server.cpython-311.pyc +0 -0
  12. isagellm_core-0.4.0.0/src/sagellm_core/__pycache__/llm_engine.cpython-311.pyc +0 -0
  13. {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/__pycache__/pd_executor.cpython-311.pyc +0 -0
  14. {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/__pycache__/runner.cpython-311.pyc +0 -0
  15. isagellm_core-0.4.0.0/src/sagellm_core/distributed/__init__.py +15 -0
  16. isagellm_core-0.4.0.0/src/sagellm_core/distributed/__init__.pyc +0 -0
  17. isagellm_core-0.4.0.0/src/sagellm_core/distributed/__pycache__/__init__.cpython-311.pyc +0 -0
  18. isagellm_core-0.4.0.0/src/sagellm_core/distributed/__pycache__/strategies.cpython-311.pyc +0 -0
  19. isagellm_core-0.4.0.0/src/sagellm_core/distributed/strategies.pyc +0 -0
  20. isagellm_core-0.4.0.0/src/sagellm_core/engine_core/__init__.py +12 -0
  21. isagellm_core-0.4.0.0/src/sagellm_core/engine_core/__init__.pyc +0 -0
  22. isagellm_core-0.4.0.0/src/sagellm_core/engine_core/__pycache__/__init__.cpython-311.pyc +0 -0
  23. isagellm_core-0.4.0.0/src/sagellm_core/engine_core/__pycache__/engine_core.cpython-311.pyc +0 -0
  24. isagellm_core-0.4.0.0/src/sagellm_core/engine_core/engine_core.pyc +0 -0
  25. isagellm_core-0.4.0.0/src/sagellm_core/engine_core/scheduler/__init__.py +19 -0
  26. isagellm_core-0.4.0.0/src/sagellm_core/engine_core/scheduler/__init__.pyc +0 -0
  27. isagellm_core-0.4.0.0/src/sagellm_core/engine_core/scheduler/__pycache__/__init__.cpython-311.pyc +0 -0
  28. isagellm_core-0.4.0.0/src/sagellm_core/engine_core/scheduler/__pycache__/scheduler.cpython-311.pyc +0 -0
  29. isagellm_core-0.4.0.0/src/sagellm_core/engine_core/scheduler/scheduler.pyc +0 -0
  30. {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/engine_server.pyc +0 -0
  31. isagellm_core-0.4.0.0/src/sagellm_core/engines/__init__.py +29 -0
  32. isagellm_core-0.4.0.0/src/sagellm_core/engines/__init__.pyc +0 -0
  33. isagellm_core-0.4.0.0/src/sagellm_core/engines/__pycache__/__init__.cpython-311.pyc +0 -0
  34. isagellm_core-0.4.0.0/src/sagellm_core/engines/__pycache__/ascend.cpython-311.pyc +0 -0
  35. {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/engines/__pycache__/embedding.cpython-311.pyc +0 -0
  36. {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/engines/embedding.pyc +0 -0
  37. isagellm_core-0.4.0.0/src/sagellm_core/executor/__init__.py +16 -0
  38. isagellm_core-0.4.0.0/src/sagellm_core/executor/__init__.pyc +0 -0
  39. isagellm_core-0.4.0.0/src/sagellm_core/executor/__pycache__/__init__.cpython-311.pyc +0 -0
  40. isagellm_core-0.4.0.0/src/sagellm_core/executor/__pycache__/executor_base.cpython-311.pyc +0 -0
  41. isagellm_core-0.4.0.0/src/sagellm_core/executor/__pycache__/uniproc_executor.cpython-311.pyc +0 -0
  42. isagellm_core-0.4.0.0/src/sagellm_core/executor/executor_base.pyc +0 -0
  43. isagellm_core-0.4.0.0/src/sagellm_core/executor/uniproc_executor.pyc +0 -0
  44. isagellm_core-0.4.0.0/src/sagellm_core/inputs/__init__.py +12 -0
  45. isagellm_core-0.4.0.0/src/sagellm_core/inputs/__init__.pyc +0 -0
  46. isagellm_core-0.4.0.0/src/sagellm_core/inputs/__pycache__/__init__.cpython-311.pyc +0 -0
  47. isagellm_core-0.4.0.0/src/sagellm_core/inputs/__pycache__/processor.cpython-311.pyc +0 -0
  48. isagellm_core-0.4.0.0/src/sagellm_core/inputs/__pycache__/tokenizer_utils.cpython-311.pyc +0 -0
  49. isagellm_core-0.4.0.0/src/sagellm_core/inputs/processor.pyc +0 -0
  50. isagellm_core-0.4.0.0/src/sagellm_core/inputs/tokenizer_utils.pyc +0 -0
  51. isagellm_core-0.4.0.0/src/sagellm_core/llm_engine.pyc +0 -0
  52. isagellm_core-0.4.0.0/src/sagellm_core/model/__init__.py +13 -0
  53. isagellm_core-0.4.0.0/src/sagellm_core/model/__init__.pyc +0 -0
  54. isagellm_core-0.4.0.0/src/sagellm_core/model/__pycache__/__init__.cpython-311.pyc +0 -0
  55. isagellm_core-0.4.0.0/src/sagellm_core/model/__pycache__/model_loader.cpython-311.pyc +0 -0
  56. isagellm_core-0.4.0.0/src/sagellm_core/model/__pycache__/weight_utils.cpython-311.pyc +0 -0
  57. isagellm_core-0.4.0.0/src/sagellm_core/model/model_loader.pyc +0 -0
  58. isagellm_core-0.4.0.0/src/sagellm_core/model/weight_utils.pyc +0 -0
  59. isagellm_core-0.4.0.0/src/sagellm_core/observability/__init__.py +16 -0
  60. isagellm_core-0.4.0.0/src/sagellm_core/observability/__init__.pyc +0 -0
  61. isagellm_core-0.4.0.0/src/sagellm_core/observability/__pycache__/__init__.cpython-311.pyc +0 -0
  62. isagellm_core-0.4.0.0/src/sagellm_core/observability/__pycache__/logger.cpython-311.pyc +0 -0
  63. isagellm_core-0.4.0.0/src/sagellm_core/observability/__pycache__/metrics.cpython-311.pyc +0 -0
  64. isagellm_core-0.4.0.0/src/sagellm_core/observability/logger.pyc +0 -0
  65. isagellm_core-0.4.0.0/src/sagellm_core/observability/metrics.pyc +0 -0
  66. {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/pd_executor.pyc +0 -0
  67. {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/runner.pyc +0 -0
  68. isagellm_core-0.4.0.0/src/sagellm_core/sampling/__init__.py +14 -0
  69. isagellm_core-0.4.0.0/src/sagellm_core/sampling/__init__.pyc +0 -0
  70. isagellm_core-0.4.0.0/src/sagellm_core/sampling/__pycache__/__init__.cpython-311.pyc +0 -0
  71. isagellm_core-0.4.0.0/src/sagellm_core/sampling/__pycache__/params.cpython-311.pyc +0 -0
  72. isagellm_core-0.4.0.0/src/sagellm_core/sampling/__pycache__/sampler.cpython-311.pyc +0 -0
  73. isagellm_core-0.4.0.0/src/sagellm_core/sampling/params.pyc +0 -0
  74. isagellm_core-0.4.0.0/src/sagellm_core/sampling/sampler.pyc +0 -0
  75. isagellm_core-0.4.0.0/src/sagellm_core/worker/__init__.py +11 -0
  76. isagellm_core-0.4.0.0/src/sagellm_core/worker/__init__.pyc +0 -0
  77. isagellm_core-0.4.0.0/src/sagellm_core/worker/__pycache__/__init__.cpython-311.pyc +0 -0
  78. isagellm_core-0.4.0.0/src/sagellm_core/worker/__pycache__/worker.cpython-311.pyc +0 -0
  79. isagellm_core-0.4.0.0/src/sagellm_core/worker/model_runner/__init__.py +8 -0
  80. isagellm_core-0.4.0.0/src/sagellm_core/worker/model_runner/__init__.pyc +0 -0
  81. isagellm_core-0.4.0.0/src/sagellm_core/worker/model_runner/__pycache__/__init__.cpython-311.pyc +0 -0
  82. isagellm_core-0.4.0.0/src/sagellm_core/worker/model_runner/__pycache__/model_runner.cpython-311.pyc +0 -0
  83. isagellm_core-0.4.0.0/src/sagellm_core/worker/model_runner/model_runner.pyc +0 -0
  84. isagellm_core-0.4.0.0/src/sagellm_core/worker/worker.pyc +0 -0
  85. {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/tests/test_ci_smoke.py +19 -22
  86. isagellm_core-0.3.0.9/tests/test_e2e_cpu_integration.py → isagellm_core-0.4.0.0/tests/test_e2e_llm_integration.py +69 -31
  87. {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/tests/test_engine.py +29 -35
  88. isagellm_core-0.4.0.0/tests/test_engine_behavior_parity.py +124 -0
  89. {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/tests/test_engine_contract_simplified.py +4 -2
  90. isagellm_core-0.3.0.9/tests/test_cpu_engine_contract.py → isagellm_core-0.4.0.0/tests/test_llm_engine_contract.py +108 -71
  91. isagellm_core-0.3.0.9/tests/test_cpu_engine_error_handling.py → isagellm_core-0.4.0.0/tests/test_llm_engine_error_handling.py +86 -60
  92. isagellm_core-0.4.0.0/tests/test_model_loader.py +49 -0
  93. isagellm_core-0.4.0.0/tests/test_observability.py +89 -0
  94. isagellm_core-0.4.0.0/tests/test_pd_separation.py +108 -0
  95. isagellm_core-0.4.0.0/tests/test_sampling.py +80 -0
  96. {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/tests/test_streaming_pd.py +24 -16
  97. isagellm_core-0.3.0.9/src/isagellm_core.egg-info/SOURCES.txt +0 -71
  98. isagellm_core-0.3.0.9/src/isagellm_core.egg-info/entry_points.txt +0 -8
  99. isagellm_core-0.3.0.9/src/sagellm_core/__init__.py +0 -146
  100. isagellm_core-0.3.0.9/src/sagellm_core/__init__.pyc +0 -0
  101. isagellm_core-0.3.0.9/src/sagellm_core/__pycache__/__init__.cpython-311.pyc +0 -0
  102. isagellm_core-0.3.0.9/src/sagellm_core/engines/__init__.py +0 -45
  103. isagellm_core-0.3.0.9/src/sagellm_core/engines/__init__.pyc +0 -0
  104. isagellm_core-0.3.0.9/src/sagellm_core/engines/__pycache__/__init__.cpython-311.pyc +0 -0
  105. isagellm_core-0.3.0.9/src/sagellm_core/engines/__pycache__/ascend.cpython-311.pyc +0 -0
  106. isagellm_core-0.3.0.9/src/sagellm_core/engines/ascend.pyc +0 -0
  107. isagellm_core-0.3.0.9/src/sagellm_core/engines/cpu.pyc +0 -0
  108. isagellm_core-0.3.0.9/src/sagellm_core/engines/hf_cuda.pyc +0 -0
  109. isagellm_core-0.3.0.9/src/sagellm_core/engines/pytorch.pyc +0 -0
  110. isagellm_core-0.3.0.9/src/sagellm_core/engines/pytorch_engine.pyc +0 -0
  111. isagellm_core-0.3.0.9/tests/test_engine_behavior_parity.py +0 -154
  112. isagellm_core-0.3.0.9/tests/test_engine_contract.py +0 -361
  113. isagellm_core-0.3.0.9/tests/test_pd_separation.py +0 -207
  114. isagellm_core-0.3.0.9/tests/test_pytorch_engine.py +0 -81
  115. {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/MANIFEST.in +0 -0
  116. {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/setup.cfg +0 -0
  117. {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/setup.py +0 -0
  118. {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/isagellm_core.egg-info/dependency_links.txt +0 -0
  119. {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/isagellm_core.egg-info/top_level.txt +0 -0
  120. {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/__main__.pyc +0 -0
  121. {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/__pycache__/base_engine.cpython-311.pyc +0 -0
  122. {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/__pycache__/config.cpython-311.pyc +0 -0
  123. {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/__pycache__/demo.cpython-311.pyc +0 -0
  124. {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/__pycache__/engine.cpython-311.pyc +0 -0
  125. {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/__pycache__/engine_factory.cpython-311.pyc +0 -0
  126. {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/__pycache__/factory.cpython-311.pyc +0 -0
  127. {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/__pycache__/health.cpython-311.pyc +0 -0
  128. {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/__pycache__/mock_engine.cpython-311.pyc +0 -0
  129. {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/__pycache__/plugins.cpython-311.pyc +0 -0
  130. {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/__pycache__/runtime.cpython-311.pyc +0 -0
  131. {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/__pycache__/workload.cpython-311.pyc +0 -0
  132. {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/config.pyc +0 -0
  133. {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/demo.pyc +0 -0
  134. {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/engine.pyc +0 -0
  135. {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/engine_factory.pyc +0 -0
  136. {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/engines/__pycache__/cpu.cpython-311.pyc +0 -0
  137. {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/engines/__pycache__/hf_cuda.cpython-311.pyc +0 -0
  138. {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/engines/__pycache__/mock.cpython-311.pyc +0 -0
  139. {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/engines/__pycache__/pytorch.cpython-311.pyc +0 -0
  140. {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/engines/__pycache__/pytorch_engine.cpython-311.pyc +0 -0
  141. {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/factory.pyc +0 -0
  142. {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/health.pyc +0 -0
  143. {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/plugins.pyc +0 -0
  144. {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/py.typed +0 -0
  145. {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/runtime.pyc +0 -0
  146. {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/src/sagellm_core/workload.pyc +0 -0
  147. {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/tests/test_config.py +0 -0
  148. {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/tests/test_engine_server.py +0 -0
  149. {isagellm_core-0.3.0.9 → isagellm_core-0.4.0.0}/tests/test_task0_10_workload.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: isagellm-core
3
- Version: 0.3.0.9
3
+ Version: 0.4.0.0
4
4
  Summary: sageLLM core runtime with PD separation (MVP)
5
5
  Author: IntelliStream Team
6
6
  License: Proprietary - IntelliStream
@@ -13,10 +13,10 @@ Requires-Python: ==3.11.*
13
13
  Description-Content-Type: text/markdown
14
14
  Requires-Dist: pydantic>=2.0.0
15
15
  Requires-Dist: pyyaml>=6.0.0
16
- Requires-Dist: isagellm-protocol<0.4.0,>=0.3.0.2
17
- Requires-Dist: isagellm-backend<0.4.0,>=0.3.0.5
18
- Requires-Dist: isagellm-comm<0.4.0,>=0.3.0.1
19
- Requires-Dist: isagellm-kv-cache<0.4.0,>=0.3.0.1
16
+ Requires-Dist: isagellm-protocol<0.5.0,>=0.3.0.2
17
+ Requires-Dist: isagellm-backend<0.5.0,>=0.4.0.0
18
+ Requires-Dist: isagellm-comm<0.5.0,>=0.4.0.0
19
+ Requires-Dist: isagellm-kv-cache<0.5.0,>=0.3.0.1
20
20
  Requires-Dist: fastapi>=0.100.0
21
21
  Requires-Dist: uvicorn>=0.22.0
22
22
  Provides-Extra: dev
@@ -50,10 +50,10 @@ sageLLM Core - 引擎协调层与运行时系统
50
50
  ┌─────────────────────────────────────────────────────────────┐
51
51
  │ sagellm-core (引擎协调层) ← 本仓库 │
52
52
  │ ┌─────────────────────────────────────────────────────┐ │
53
- │ │ Engine Abstraction (BaseEngine, EngineFactory) │ │
54
- │ │ • CPUEngine, HFCudaEngine │ │
55
- │ │ • 自描述架构 (is_available, priority) │ │
56
- │ │ • 引擎自动发现与注册 │ │
53
+ │ │ LLMEngine (Hardware-Agnostic, vLLM v1 style) │ │
54
+ │ │ • 统一推理接口: generate, stream, execute │ │
55
+ │ │ • 自动后端选择 (auto-detect cuda/ascend/cpu) │ │
56
+ │ │ • 配置驱动 (LLMEngineConfig) │ │
57
57
  │ └─────────────────────────────────────────────────────┘ │
58
58
  │ ┌─────────────────────────────────────────────────────┐ │
59
59
  │ │ Configuration System (config.py) │ │
@@ -68,9 +68,8 @@ sageLLM Core - 引擎协调层与运行时系统
68
68
  ```
69
69
 
70
70
  **职责分离**:
71
- - **Backend 不再包含**:BaseEngine, EngineFactory(已移至 core)
72
- - ✅ **Core 负责**:引擎接口、工厂、运行时、配置、协调
73
- - ✅ **Backend 负责**:硬件抽象、设备管理、内存原语
71
+ - **Core 负责**:LLMEngine (硬件无关)、配置、协调
72
+ - ✅ **Backend 负责**:硬件抽象、设备管理、Provider 实现
74
73
 
75
74
  ## Features
76
75
 
@@ -19,10 +19,10 @@ sageLLM Core - 引擎协调层与运行时系统
19
19
  ┌─────────────────────────────────────────────────────────────┐
20
20
  │ sagellm-core (引擎协调层) ← 本仓库 │
21
21
  │ ┌─────────────────────────────────────────────────────┐ │
22
- │ │ Engine Abstraction (BaseEngine, EngineFactory) │ │
23
- │ │ • CPUEngine, HFCudaEngine │ │
24
- │ │ • 自描述架构 (is_available, priority) │ │
25
- │ │ • 引擎自动发现与注册 │ │
22
+ │ │ LLMEngine (Hardware-Agnostic, vLLM v1 style) │ │
23
+ │ │ • 统一推理接口: generate, stream, execute │ │
24
+ │ │ • 自动后端选择 (auto-detect cuda/ascend/cpu) │ │
25
+ │ │ • 配置驱动 (LLMEngineConfig) │ │
26
26
  │ └─────────────────────────────────────────────────────┘ │
27
27
  │ ┌─────────────────────────────────────────────────────┐ │
28
28
  │ │ Configuration System (config.py) │ │
@@ -37,9 +37,8 @@ sageLLM Core - 引擎协调层与运行时系统
37
37
  ```
38
38
 
39
39
  **职责分离**:
40
- - **Backend 不再包含**:BaseEngine, EngineFactory(已移至 core)
41
- - ✅ **Core 负责**:引擎接口、工厂、运行时、配置、协调
42
- - ✅ **Backend 负责**:硬件抽象、设备管理、内存原语
40
+ - **Core 负责**:LLMEngine (硬件无关)、配置、协调
41
+ - ✅ **Backend 负责**:硬件抽象、设备管理、Provider 实现
43
42
 
44
43
  ## Features
45
44
 
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "isagellm-core"
7
- version = "0.3.0.9"
7
+ version = "0.4.0.0"
8
8
  description = "sageLLM core runtime with PD separation (MVP)"
9
9
  readme = "README.md"
10
10
  requires-python = "==3.11.*"
@@ -20,10 +20,10 @@ classifiers = [
20
20
  dependencies = [
21
21
  "pydantic>=2.0.0",
22
22
  "pyyaml>=6.0.0",
23
- "isagellm-protocol>=0.3.0.2,<0.4.0",
24
- "isagellm-backend>=0.3.0.5,<0.4.0",
25
- "isagellm-comm>=0.3.0.1,<0.4.0",
26
- "isagellm-kv-cache>=0.3.0.1,<0.4.0",
23
+ "isagellm-protocol>=0.3.0.2,<0.5.0",
24
+ "isagellm-backend>=0.4.0.0,<0.5.0",
25
+ "isagellm-comm>=0.4.0.0,<0.5.0",
26
+ "isagellm-kv-cache>=0.3.0.1,<0.5.0",
27
27
  "fastapi>=0.100.0",
28
28
  "uvicorn>=0.22.0",
29
29
  ]
@@ -57,11 +57,18 @@ line-length = 100
57
57
  [project.scripts]
58
58
  sage-engine = "sagellm_core.engine_server:main"
59
59
 
60
- [project.entry-points."sagellm.engines"]
61
- cpu = "sagellm_core.engines.cpu:create_cpu_engine"
62
- hf-cuda = "sagellm_core.engines.hf_cuda:create_hf_cuda_engine"
63
- pytorch = "sagellm_core.engines.pytorch:create_pytorch_engine"
64
- ascend = "sagellm_core.engines.ascend:create_ascend_engine"
60
+ # DEPRECATED: Old hardware-specific engines have been removed
61
+ # Use LLMEngine with BackendProvider instead:
62
+ # from sagellm_core import LLMEngine, LLMEngineConfig
63
+ # engine = LLMEngine(LLMEngineConfig(model="...", backend="cpu"))
64
+ #
65
+ # The entry-points system is being phased out in favor of the unified
66
+ # LLMEngine + BackendProvider architecture (vLLM v1 style).
67
+ # [project.entry-points."sagellm.engines"]
68
+ # cpu = "sagellm_core.engines.cpu:create_cpu_engine"
69
+ # hf-cuda = "sagellm_core.engines.hf_cuda:create_hf_cuda_engine"
70
+ # pytorch = "sagellm_core.engines.pytorch:create_pytorch_engine"
71
+ # ascend = "sagellm_core.engines.ascend:create_ascend_engine"
65
72
 
66
73
  [tool.mypy]
67
74
  python_version = "3.10"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: isagellm-core
3
- Version: 0.3.0.9
3
+ Version: 0.4.0.0
4
4
  Summary: sageLLM core runtime with PD separation (MVP)
5
5
  Author: IntelliStream Team
6
6
  License: Proprietary - IntelliStream
@@ -13,10 +13,10 @@ Requires-Python: ==3.11.*
13
13
  Description-Content-Type: text/markdown
14
14
  Requires-Dist: pydantic>=2.0.0
15
15
  Requires-Dist: pyyaml>=6.0.0
16
- Requires-Dist: isagellm-protocol<0.4.0,>=0.3.0.2
17
- Requires-Dist: isagellm-backend<0.4.0,>=0.3.0.5
18
- Requires-Dist: isagellm-comm<0.4.0,>=0.3.0.1
19
- Requires-Dist: isagellm-kv-cache<0.4.0,>=0.3.0.1
16
+ Requires-Dist: isagellm-protocol<0.5.0,>=0.3.0.2
17
+ Requires-Dist: isagellm-backend<0.5.0,>=0.4.0.0
18
+ Requires-Dist: isagellm-comm<0.5.0,>=0.4.0.0
19
+ Requires-Dist: isagellm-kv-cache<0.5.0,>=0.3.0.1
20
20
  Requires-Dist: fastapi>=0.100.0
21
21
  Requires-Dist: uvicorn>=0.22.0
22
22
  Provides-Extra: dev
@@ -50,10 +50,10 @@ sageLLM Core - 引擎协调层与运行时系统
50
50
  ┌─────────────────────────────────────────────────────────────┐
51
51
  │ sagellm-core (引擎协调层) ← 本仓库 │
52
52
  │ ┌─────────────────────────────────────────────────────┐ │
53
- │ │ Engine Abstraction (BaseEngine, EngineFactory) │ │
54
- │ │ • CPUEngine, HFCudaEngine │ │
55
- │ │ • 自描述架构 (is_available, priority) │ │
56
- │ │ • 引擎自动发现与注册 │ │
53
+ │ │ LLMEngine (Hardware-Agnostic, vLLM v1 style) │ │
54
+ │ │ • 统一推理接口: generate, stream, execute │ │
55
+ │ │ • 自动后端选择 (auto-detect cuda/ascend/cpu) │ │
56
+ │ │ • 配置驱动 (LLMEngineConfig) │ │
57
57
  │ └─────────────────────────────────────────────────────┘ │
58
58
  │ ┌─────────────────────────────────────────────────────┐ │
59
59
  │ │ Configuration System (config.py) │ │
@@ -68,9 +68,8 @@ sageLLM Core - 引擎协调层与运行时系统
68
68
  ```
69
69
 
70
70
  **职责分离**:
71
- - **Backend 不再包含**:BaseEngine, EngineFactory(已移至 core)
72
- - ✅ **Core 负责**:引擎接口、工厂、运行时、配置、协调
73
- - ✅ **Backend 负责**:硬件抽象、设备管理、内存原语
71
+ - **Core 负责**:LLMEngine (硬件无关)、配置、协调
72
+ - ✅ **Backend 负责**:硬件抽象、设备管理、Provider 实现
74
73
 
75
74
  ## Features
76
75
 
@@ -0,0 +1,129 @@
1
+ MANIFEST.in
2
+ README.md
3
+ pyproject.toml
4
+ setup.py
5
+ src/isagellm_core.egg-info/PKG-INFO
6
+ src/isagellm_core.egg-info/SOURCES.txt
7
+ src/isagellm_core.egg-info/dependency_links.txt
8
+ src/isagellm_core.egg-info/entry_points.txt
9
+ src/isagellm_core.egg-info/requires.txt
10
+ src/isagellm_core.egg-info/top_level.txt
11
+ src/sagellm_core/__init__.py
12
+ src/sagellm_core/__init__.pyc
13
+ src/sagellm_core/__main__.pyc
14
+ src/sagellm_core/config.pyc
15
+ src/sagellm_core/demo.pyc
16
+ src/sagellm_core/engine.pyc
17
+ src/sagellm_core/engine_factory.pyc
18
+ src/sagellm_core/engine_server.pyc
19
+ src/sagellm_core/factory.pyc
20
+ src/sagellm_core/health.pyc
21
+ src/sagellm_core/llm_engine.pyc
22
+ src/sagellm_core/pd_executor.pyc
23
+ src/sagellm_core/plugins.pyc
24
+ src/sagellm_core/py.typed
25
+ src/sagellm_core/runner.pyc
26
+ src/sagellm_core/runtime.pyc
27
+ src/sagellm_core/workload.pyc
28
+ src/sagellm_core/__pycache__/__init__.cpython-311.pyc
29
+ src/sagellm_core/__pycache__/base_engine.cpython-311.pyc
30
+ src/sagellm_core/__pycache__/config.cpython-311.pyc
31
+ src/sagellm_core/__pycache__/demo.cpython-311.pyc
32
+ src/sagellm_core/__pycache__/engine.cpython-311.pyc
33
+ src/sagellm_core/__pycache__/engine_factory.cpython-311.pyc
34
+ src/sagellm_core/__pycache__/engine_server.cpython-311.pyc
35
+ src/sagellm_core/__pycache__/factory.cpython-311.pyc
36
+ src/sagellm_core/__pycache__/health.cpython-311.pyc
37
+ src/sagellm_core/__pycache__/llm_engine.cpython-311.pyc
38
+ src/sagellm_core/__pycache__/mock_engine.cpython-311.pyc
39
+ src/sagellm_core/__pycache__/pd_executor.cpython-311.pyc
40
+ src/sagellm_core/__pycache__/plugins.cpython-311.pyc
41
+ src/sagellm_core/__pycache__/runner.cpython-311.pyc
42
+ src/sagellm_core/__pycache__/runtime.cpython-311.pyc
43
+ src/sagellm_core/__pycache__/workload.cpython-311.pyc
44
+ src/sagellm_core/distributed/__init__.py
45
+ src/sagellm_core/distributed/__init__.pyc
46
+ src/sagellm_core/distributed/strategies.pyc
47
+ src/sagellm_core/distributed/__pycache__/__init__.cpython-311.pyc
48
+ src/sagellm_core/distributed/__pycache__/strategies.cpython-311.pyc
49
+ src/sagellm_core/engine_core/__init__.py
50
+ src/sagellm_core/engine_core/__init__.pyc
51
+ src/sagellm_core/engine_core/engine_core.pyc
52
+ src/sagellm_core/engine_core/__pycache__/__init__.cpython-311.pyc
53
+ src/sagellm_core/engine_core/__pycache__/engine_core.cpython-311.pyc
54
+ src/sagellm_core/engine_core/scheduler/__init__.py
55
+ src/sagellm_core/engine_core/scheduler/__init__.pyc
56
+ src/sagellm_core/engine_core/scheduler/scheduler.pyc
57
+ src/sagellm_core/engine_core/scheduler/__pycache__/__init__.cpython-311.pyc
58
+ src/sagellm_core/engine_core/scheduler/__pycache__/scheduler.cpython-311.pyc
59
+ src/sagellm_core/engines/__init__.py
60
+ src/sagellm_core/engines/__init__.pyc
61
+ src/sagellm_core/engines/embedding.pyc
62
+ src/sagellm_core/engines/__pycache__/__init__.cpython-311.pyc
63
+ src/sagellm_core/engines/__pycache__/ascend.cpython-311.pyc
64
+ src/sagellm_core/engines/__pycache__/cpu.cpython-311.pyc
65
+ src/sagellm_core/engines/__pycache__/embedding.cpython-311.pyc
66
+ src/sagellm_core/engines/__pycache__/hf_cuda.cpython-311.pyc
67
+ src/sagellm_core/engines/__pycache__/mock.cpython-311.pyc
68
+ src/sagellm_core/engines/__pycache__/pytorch.cpython-311.pyc
69
+ src/sagellm_core/engines/__pycache__/pytorch_engine.cpython-311.pyc
70
+ src/sagellm_core/executor/__init__.py
71
+ src/sagellm_core/executor/__init__.pyc
72
+ src/sagellm_core/executor/executor_base.pyc
73
+ src/sagellm_core/executor/uniproc_executor.pyc
74
+ src/sagellm_core/executor/__pycache__/__init__.cpython-311.pyc
75
+ src/sagellm_core/executor/__pycache__/executor_base.cpython-311.pyc
76
+ src/sagellm_core/executor/__pycache__/uniproc_executor.cpython-311.pyc
77
+ src/sagellm_core/inputs/__init__.py
78
+ src/sagellm_core/inputs/__init__.pyc
79
+ src/sagellm_core/inputs/processor.pyc
80
+ src/sagellm_core/inputs/tokenizer_utils.pyc
81
+ src/sagellm_core/inputs/__pycache__/__init__.cpython-311.pyc
82
+ src/sagellm_core/inputs/__pycache__/processor.cpython-311.pyc
83
+ src/sagellm_core/inputs/__pycache__/tokenizer_utils.cpython-311.pyc
84
+ src/sagellm_core/model/__init__.py
85
+ src/sagellm_core/model/__init__.pyc
86
+ src/sagellm_core/model/model_loader.pyc
87
+ src/sagellm_core/model/weight_utils.pyc
88
+ src/sagellm_core/model/__pycache__/__init__.cpython-311.pyc
89
+ src/sagellm_core/model/__pycache__/model_loader.cpython-311.pyc
90
+ src/sagellm_core/model/__pycache__/weight_utils.cpython-311.pyc
91
+ src/sagellm_core/observability/__init__.py
92
+ src/sagellm_core/observability/__init__.pyc
93
+ src/sagellm_core/observability/logger.pyc
94
+ src/sagellm_core/observability/metrics.pyc
95
+ src/sagellm_core/observability/__pycache__/__init__.cpython-311.pyc
96
+ src/sagellm_core/observability/__pycache__/logger.cpython-311.pyc
97
+ src/sagellm_core/observability/__pycache__/metrics.cpython-311.pyc
98
+ src/sagellm_core/sampling/__init__.py
99
+ src/sagellm_core/sampling/__init__.pyc
100
+ src/sagellm_core/sampling/params.pyc
101
+ src/sagellm_core/sampling/sampler.pyc
102
+ src/sagellm_core/sampling/__pycache__/__init__.cpython-311.pyc
103
+ src/sagellm_core/sampling/__pycache__/params.cpython-311.pyc
104
+ src/sagellm_core/sampling/__pycache__/sampler.cpython-311.pyc
105
+ src/sagellm_core/worker/__init__.py
106
+ src/sagellm_core/worker/__init__.pyc
107
+ src/sagellm_core/worker/worker.pyc
108
+ src/sagellm_core/worker/__pycache__/__init__.cpython-311.pyc
109
+ src/sagellm_core/worker/__pycache__/worker.cpython-311.pyc
110
+ src/sagellm_core/worker/model_runner/__init__.py
111
+ src/sagellm_core/worker/model_runner/__init__.pyc
112
+ src/sagellm_core/worker/model_runner/model_runner.pyc
113
+ src/sagellm_core/worker/model_runner/__pycache__/__init__.cpython-311.pyc
114
+ src/sagellm_core/worker/model_runner/__pycache__/model_runner.cpython-311.pyc
115
+ tests/test_ci_smoke.py
116
+ tests/test_config.py
117
+ tests/test_e2e_llm_integration.py
118
+ tests/test_engine.py
119
+ tests/test_engine_behavior_parity.py
120
+ tests/test_engine_contract_simplified.py
121
+ tests/test_engine_server.py
122
+ tests/test_llm_engine_contract.py
123
+ tests/test_llm_engine_error_handling.py
124
+ tests/test_model_loader.py
125
+ tests/test_observability.py
126
+ tests/test_pd_separation.py
127
+ tests/test_sampling.py
128
+ tests/test_streaming_pd.py
129
+ tests/test_task0_10_workload.py
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ sage-engine = sagellm_core.engine_server:main
@@ -1,9 +1,9 @@
1
1
  pydantic>=2.0.0
2
2
  pyyaml>=6.0.0
3
- isagellm-protocol<0.4.0,>=0.3.0.2
4
- isagellm-backend<0.4.0,>=0.3.0.5
5
- isagellm-comm<0.4.0,>=0.3.0.1
6
- isagellm-kv-cache<0.4.0,>=0.3.0.1
3
+ isagellm-protocol<0.5.0,>=0.3.0.2
4
+ isagellm-backend<0.5.0,>=0.4.0.0
5
+ isagellm-comm<0.5.0,>=0.4.0.0
6
+ isagellm-kv-cache<0.5.0,>=0.3.0.1
7
7
  fastapi>=0.100.0
8
8
  uvicorn>=0.22.0
9
9
 
@@ -0,0 +1,192 @@
1
+ """sageLLM Core 运行时。
2
+
3
+ 本包提供 sageLLM 的核心运行时组件:
4
+ - LLMEngine: 统一的硬件无关推理引擎(vLLM v1 风格)
5
+ - EngineCore: 协调 Scheduler 和 Executor
6
+ - Scheduler: Continuous Batching 调度器
7
+ - Executor: 管理 Worker 执行
8
+ - Worker/ModelRunner: 模型前向传播
9
+ - 配置 schema 与校验
10
+ - Engine 工厂函数
11
+ - 插件系统
12
+ - Demo Runner
13
+ - 分布式 Runtime(PD 分离 MVP)
14
+ - PD 分离执行器
15
+
16
+ Architecture (vLLM v1 style):
17
+ LLMEngine (hardware-agnostic)
18
+ ├── EngineCore (coordinates Scheduler and Executor)
19
+ │ └── Scheduler (Continuous Batching)
20
+ └── Executor
21
+ └── Worker
22
+ └── ModelRunner
23
+ ├── uses BackendProvider (from sagellm-backend)
24
+ └── uses CommBackend (from sagellm-comm)
25
+ """
26
+
27
+ from __future__ import annotations
28
+
29
+ __version__ = "0.4.0.0"
30
+
31
+ # ============================================================================
32
+ # New Architecture (vLLM v1 style) - Hardware Agnostic
33
+ # ============================================================================
34
+ from sagellm_core.llm_engine import LLMEngine, LLMEngineConfig
35
+ from sagellm_core.engine_core import EngineCore
36
+ from sagellm_core.engine_core.engine_core import EngineCoreConfig
37
+ from sagellm_core.engine_core.scheduler import (
38
+ ContinuousBatchingScheduler,
39
+ SchedulerConfig,
40
+ SchedulerOutput,
41
+ )
42
+ from sagellm_core.executor import ExecutorBase, UniprocExecutor
43
+ from sagellm_core.executor.executor_base import ExecutorConfig
44
+ from sagellm_core.worker import Worker
45
+ from sagellm_core.worker.model_runner import ModelRunner
46
+
47
+ # ============================================================================
48
+ # Legacy Architecture (still supported, being refactored)
49
+ # ============================================================================
50
+ from sagellm_core.engine import BaseEngine, EngineInstanceConfig
51
+ from sagellm_core.config import (
52
+ BackendConfig,
53
+ DemoConfig,
54
+ EngineConfig,
55
+ OutputConfig,
56
+ WorkloadConfig,
57
+ WorkloadSegment,
58
+ load_config,
59
+ )
60
+ from sagellm_core.demo import main as demo_main
61
+ from sagellm_core.engine_factory import EngineFactory
62
+
63
+ # Only EmbeddingEngine remains from legacy engines
64
+ from sagellm_core.engines import (
65
+ EmbeddingEngine,
66
+ EmbeddingEngineConfig,
67
+ )
68
+ from sagellm_core.factory import create_backend, create_engine
69
+ from sagellm_core.health import HealthStatus
70
+ from sagellm_core.plugins import PluginResolutionError, list_entry_points, resolve_kind
71
+ from sagellm_core.runner import DemoRunner, RunnerContext
72
+
73
+ # PD 分离 MVP 模块
74
+ from sagellm_core.runtime import DistributedConfig, DistributedRuntime, RuntimeState
75
+ from sagellm_core.pd_executor import PDExecutionContext, PDSeparatedExecutor
76
+
77
+ # Engine HTTP Server
78
+ from sagellm_core.engine_server import app as engine_server_app
79
+ from sagellm_core.engine_server import main as serve_engine
80
+
81
+ # ============================================================================
82
+ # Phase 2: New Modules (P2 Priority)
83
+ # ============================================================================
84
+ # Model loading utilities
85
+ from sagellm_core.model import ModelLoader, load_model
86
+
87
+ # Input processing
88
+ from sagellm_core.inputs import InputProcessor, ProcessedInput, TokenizerWrapper
89
+
90
+ # Sampling utilities
91
+ from sagellm_core.sampling import SamplingParams, Sampler, GreedySampler
92
+
93
+ # Distributed strategies
94
+ from sagellm_core.distributed import DistributedStrategy, TensorParallelStrategy
95
+
96
+ # Observability
97
+ from sagellm_core.observability import MetricsCollector, EngineMetrics, setup_logger
98
+
99
+ # PyTorch engine (optional, loaded lazily)
100
+ PyTorchEngine = None
101
+ create_pytorch_engine = None
102
+
103
+ # Optional PyTorchEngine import (deprecated, use LLMEngine)
104
+ # try:
105
+ # from sagellm_core.engines.pytorch_engine import (
106
+ # PyTorchEngine,
107
+ # create_pytorch_engine,
108
+ # )
109
+ # except ImportError:
110
+ # pass # torch or transformers not available
111
+
112
+ # =========================================================================
113
+ # DEPRECATED: Old hardware-specific engines have been removed
114
+ # Use LLMEngine instead:
115
+ # from sagellm_core import LLMEngine, LLMEngineConfig
116
+ # engine = LLMEngine(LLMEngineConfig(model="..."))
117
+ #
118
+ # The following engines no longer exist:
119
+ # - CPUEngine → use LLMEngine(backend="cpu")
120
+ # - HFCudaEngine → use LLMEngine(backend="cuda")
121
+ # - AscendEngine → use LLMEngine(backend="ascend")
122
+ # - PyTorchEngine → use LLMEngine
123
+ #
124
+ # EmbeddingEngine is still available for embedding-only use cases.
125
+ # =========================================================================
126
+
127
+ # Version is defined at the top of the file (line 29)
128
+
129
+ __all__ = [
130
+ # Version
131
+ "__version__",
132
+ # =========================================================================
133
+ # New Architecture (vLLM v1 style) - RECOMMENDED
134
+ # =========================================================================
135
+ # LLMEngine - Unified hardware-agnostic engine
136
+ "LLMEngine",
137
+ "LLMEngineConfig",
138
+ # EngineCore - Coordinates Scheduler and Executor
139
+ "EngineCore",
140
+ "EngineCoreConfig",
141
+ # Scheduler - Continuous Batching
142
+ "ContinuousBatchingScheduler",
143
+ "SchedulerConfig",
144
+ "SchedulerOutput",
145
+ # Executor - Manages Workers
146
+ "ExecutorBase",
147
+ "ExecutorConfig",
148
+ "UniprocExecutor",
149
+ # Worker - Model execution
150
+ "Worker",
151
+ "ModelRunner",
152
+ # =========================================================================
153
+ # Configuration (for YAML/config files)
154
+ # =========================================================================
155
+ "BackendConfig",
156
+ "DemoConfig",
157
+ "EngineConfig",
158
+ "OutputConfig",
159
+ "WorkloadConfig",
160
+ "WorkloadSegment",
161
+ "load_config",
162
+ # Engine abstraction
163
+ "BaseEngine",
164
+ "EngineInstanceConfig", # For runtime engine instantiation
165
+ "HealthStatus",
166
+ # Engine implementations
167
+ # DEPRECATED: Old engines removed, use LLMEngine instead
168
+ # Only EmbeddingEngine remains for embedding-only use cases
169
+ "EmbeddingEngine",
170
+ "EmbeddingEngineConfig",
171
+ # Factory functions
172
+ "create_backend",
173
+ "create_engine",
174
+ "EngineFactory",
175
+ # Plugin system
176
+ "PluginResolutionError",
177
+ "list_entry_points",
178
+ "resolve_kind",
179
+ # Demo runner
180
+ "demo_main",
181
+ "DemoRunner",
182
+ "RunnerContext",
183
+ # PD Separation MVP
184
+ "DistributedConfig",
185
+ "DistributedRuntime",
186
+ "RuntimeState",
187
+ "PDExecutionContext",
188
+ "PDSeparatedExecutor",
189
+ # Engine HTTP Server
190
+ "engine_server_app",
191
+ "serve_engine",
192
+ ]
@@ -0,0 +1,15 @@
1
+ """Distributed inference strategies for sageLLM."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from sagellm_core.distributed.strategies import (
6
+ DistributedStrategy,
7
+ TensorParallelStrategy,
8
+ PipelineParallelStrategy,
9
+ )
10
+
11
+ __all__ = [
12
+ "DistributedStrategy",
13
+ "TensorParallelStrategy",
14
+ "PipelineParallelStrategy",
15
+ ]
@@ -0,0 +1,12 @@
1
+ """EngineCore - Coordinates Scheduler and Executor.
2
+
3
+ The EngineCore is responsible for:
4
+ 1. Managing request queues
5
+ 2. Coordinating with Scheduler for batch formation
6
+ 3. Dispatching batches to Executor
7
+ 4. Collecting results
8
+ """
9
+
10
+ from sagellm_core.engine_core.engine_core import EngineCore
11
+
12
+ __all__ = ["EngineCore"]
@@ -0,0 +1,19 @@
1
+ """Scheduler - Request scheduling for Continuous Batching.
2
+
3
+ The Scheduler is responsible for:
4
+ 1. Selecting which requests to run in the next step
5
+ 2. Managing prefill vs decode scheduling
6
+ 3. Preemption decisions
7
+ """
8
+
9
+ from sagellm_core.engine_core.scheduler.scheduler import (
10
+ ContinuousBatchingScheduler,
11
+ SchedulerConfig,
12
+ SchedulerOutput,
13
+ )
14
+
15
+ __all__ = [
16
+ "ContinuousBatchingScheduler",
17
+ "SchedulerConfig",
18
+ "SchedulerOutput",
19
+ ]
@@ -0,0 +1,29 @@
1
+ """Engine implementations for sageLLM Core.
2
+
3
+ DEPRECATED: This module contains legacy engine implementations.
4
+ Use LLMEngine from sagellm_core instead:
5
+
6
+ from sagellm_core import LLMEngine, LLMEngineConfig
7
+
8
+ config = LLMEngineConfig(
9
+ model_path="Qwen/Qwen2-7B",
10
+ backend_type="cuda", # or "cpu", "ascend", "auto"
11
+ )
12
+ engine = LLMEngine(config)
13
+ await engine.start()
14
+ response = await engine.generate("Hello!")
15
+
16
+ Remaining engines:
17
+ - EmbeddingEngine: Embedding model inference (not yet migrated to LLMEngine)
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ # Only EmbeddingEngine remains - others have been migrated to LLMEngine
23
+ from sagellm_core.engines.embedding import EmbeddingEngine, EmbeddingEngineConfig
24
+
25
+ __all__ = [
26
+ # Embedding engine (still needed for embedding-only models)
27
+ "EmbeddingEngine",
28
+ "EmbeddingEngineConfig",
29
+ ]