isagellm-core 0.2.2.7__tar.gz → 0.3.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. {isagellm_core-0.2.2.7/src/isagellm_core.egg-info → isagellm_core-0.3.0.2}/PKG-INFO +7 -4
  2. {isagellm_core-0.2.2.7 → isagellm_core-0.3.0.2}/pyproject.toml +9 -4
  3. {isagellm_core-0.2.2.7 → isagellm_core-0.3.0.2/src/isagellm_core.egg-info}/PKG-INFO +7 -4
  4. {isagellm_core-0.2.2.7 → isagellm_core-0.3.0.2}/src/isagellm_core.egg-info/SOURCES.txt +11 -0
  5. {isagellm_core-0.2.2.7 → isagellm_core-0.3.0.2}/src/isagellm_core.egg-info/entry_points.txt +2 -0
  6. isagellm_core-0.3.0.2/src/isagellm_core.egg-info/requires.txt +16 -0
  7. {isagellm_core-0.2.2.7 → isagellm_core-0.3.0.2}/src/sagellm_core/__init__.py +24 -1
  8. isagellm_core-0.3.0.2/src/sagellm_core/__init__.pyc +0 -0
  9. {isagellm_core-0.2.2.7 → isagellm_core-0.3.0.2}/src/sagellm_core/__main__.pyc +0 -0
  10. isagellm_core-0.3.0.2/src/sagellm_core/__pycache__/__init__.cpython-311.pyc +0 -0
  11. isagellm_core-0.3.0.2/src/sagellm_core/__pycache__/base_engine.cpython-311.pyc +0 -0
  12. {isagellm_core-0.2.2.7 → isagellm_core-0.3.0.2}/src/sagellm_core/__pycache__/config.cpython-311.pyc +0 -0
  13. {isagellm_core-0.2.2.7 → isagellm_core-0.3.0.2}/src/sagellm_core/__pycache__/demo.cpython-311.pyc +0 -0
  14. {isagellm_core-0.2.2.7 → isagellm_core-0.3.0.2}/src/sagellm_core/__pycache__/engine.cpython-311.pyc +0 -0
  15. {isagellm_core-0.2.2.7 → isagellm_core-0.3.0.2}/src/sagellm_core/__pycache__/engine_factory.cpython-311.pyc +0 -0
  16. {isagellm_core-0.2.2.7 → isagellm_core-0.3.0.2}/src/sagellm_core/__pycache__/factory.cpython-311.pyc +0 -0
  17. {isagellm_core-0.2.2.7 → isagellm_core-0.3.0.2}/src/sagellm_core/__pycache__/health.cpython-311.pyc +0 -0
  18. isagellm_core-0.3.0.2/src/sagellm_core/__pycache__/mock_engine.cpython-311.pyc +0 -0
  19. isagellm_core-0.3.0.2/src/sagellm_core/__pycache__/pd_executor.cpython-311.pyc +0 -0
  20. {isagellm_core-0.2.2.7 → isagellm_core-0.3.0.2}/src/sagellm_core/__pycache__/plugins.cpython-311.pyc +0 -0
  21. {isagellm_core-0.2.2.7 → isagellm_core-0.3.0.2}/src/sagellm_core/__pycache__/runner.cpython-311.pyc +0 -0
  22. isagellm_core-0.3.0.2/src/sagellm_core/__pycache__/runtime.cpython-311.pyc +0 -0
  23. {isagellm_core-0.2.2.7 → isagellm_core-0.3.0.2}/src/sagellm_core/__pycache__/workload.cpython-311.pyc +0 -0
  24. {isagellm_core-0.2.2.7 → isagellm_core-0.3.0.2}/src/sagellm_core/config.pyc +0 -0
  25. {isagellm_core-0.2.2.7 → isagellm_core-0.3.0.2}/src/sagellm_core/demo.pyc +0 -0
  26. {isagellm_core-0.2.2.7 → isagellm_core-0.3.0.2}/src/sagellm_core/engine.pyc +0 -0
  27. {isagellm_core-0.2.2.7 → isagellm_core-0.3.0.2}/src/sagellm_core/engine_factory.pyc +0 -0
  28. {isagellm_core-0.2.2.7 → isagellm_core-0.3.0.2}/src/sagellm_core/engines/__init__.py +14 -2
  29. isagellm_core-0.3.0.2/src/sagellm_core/engines/__init__.pyc +0 -0
  30. isagellm_core-0.3.0.2/src/sagellm_core/engines/__pycache__/__init__.cpython-311.pyc +0 -0
  31. isagellm_core-0.3.0.2/src/sagellm_core/engines/__pycache__/ascend.cpython-311.pyc +0 -0
  32. isagellm_core-0.3.0.2/src/sagellm_core/engines/__pycache__/cpu.cpython-311.pyc +0 -0
  33. {isagellm_core-0.2.2.7 → isagellm_core-0.3.0.2}/src/sagellm_core/engines/__pycache__/embedding.cpython-311.pyc +0 -0
  34. {isagellm_core-0.2.2.7 → isagellm_core-0.3.0.2}/src/sagellm_core/engines/__pycache__/hf_cuda.cpython-311.pyc +0 -0
  35. isagellm_core-0.3.0.2/src/sagellm_core/engines/__pycache__/mock.cpython-311.pyc +0 -0
  36. {isagellm_core-0.2.2.7 → isagellm_core-0.3.0.2}/src/sagellm_core/engines/__pycache__/pytorch.cpython-311.pyc +0 -0
  37. {isagellm_core-0.2.2.7 → isagellm_core-0.3.0.2}/src/sagellm_core/engines/__pycache__/pytorch_engine.cpython-311.pyc +0 -0
  38. isagellm_core-0.3.0.2/src/sagellm_core/engines/ascend.pyc +0 -0
  39. isagellm_core-0.3.0.2/src/sagellm_core/engines/cpu.pyc +0 -0
  40. {isagellm_core-0.2.2.7 → isagellm_core-0.3.0.2}/src/sagellm_core/engines/embedding.pyc +0 -0
  41. {isagellm_core-0.2.2.7 → isagellm_core-0.3.0.2}/src/sagellm_core/engines/hf_cuda.pyc +0 -0
  42. {isagellm_core-0.2.2.7 → isagellm_core-0.3.0.2}/src/sagellm_core/engines/pytorch.pyc +0 -0
  43. {isagellm_core-0.2.2.7 → isagellm_core-0.3.0.2}/src/sagellm_core/engines/pytorch_engine.pyc +0 -0
  44. {isagellm_core-0.2.2.7 → isagellm_core-0.3.0.2}/src/sagellm_core/factory.pyc +0 -0
  45. {isagellm_core-0.2.2.7 → isagellm_core-0.3.0.2}/src/sagellm_core/health.pyc +0 -0
  46. isagellm_core-0.3.0.2/src/sagellm_core/pd_executor.pyc +0 -0
  47. {isagellm_core-0.2.2.7 → isagellm_core-0.3.0.2}/src/sagellm_core/plugins.pyc +0 -0
  48. {isagellm_core-0.2.2.7 → isagellm_core-0.3.0.2}/src/sagellm_core/runner.pyc +0 -0
  49. isagellm_core-0.3.0.2/src/sagellm_core/runtime.pyc +0 -0
  50. {isagellm_core-0.2.2.7 → isagellm_core-0.3.0.2}/src/sagellm_core/workload.pyc +0 -0
  51. {isagellm_core-0.2.2.7 → isagellm_core-0.3.0.2}/tests/test_engine.py +22 -0
  52. isagellm_core-0.3.0.2/tests/test_pd_separation.py +207 -0
  53. isagellm_core-0.3.0.2/tests/test_streaming_pd.py +186 -0
  54. isagellm_core-0.2.2.7/src/isagellm_core.egg-info/requires.txt +0 -13
  55. isagellm_core-0.2.2.7/src/sagellm_core/__init__.pyc +0 -0
  56. isagellm_core-0.2.2.7/src/sagellm_core/__pycache__/__init__.cpython-311.pyc +0 -0
  57. isagellm_core-0.2.2.7/src/sagellm_core/engines/__init__.pyc +0 -0
  58. isagellm_core-0.2.2.7/src/sagellm_core/engines/__pycache__/__init__.cpython-311.pyc +0 -0
  59. isagellm_core-0.2.2.7/src/sagellm_core/engines/__pycache__/cpu.cpython-311.pyc +0 -0
  60. isagellm_core-0.2.2.7/src/sagellm_core/engines/cpu.pyc +0 -0
  61. {isagellm_core-0.2.2.7 → isagellm_core-0.3.0.2}/MANIFEST.in +0 -0
  62. {isagellm_core-0.2.2.7 → isagellm_core-0.3.0.2}/README.md +0 -0
  63. {isagellm_core-0.2.2.7 → isagellm_core-0.3.0.2}/setup.cfg +0 -0
  64. {isagellm_core-0.2.2.7 → isagellm_core-0.3.0.2}/setup.py +0 -0
  65. {isagellm_core-0.2.2.7 → isagellm_core-0.3.0.2}/src/isagellm_core.egg-info/dependency_links.txt +0 -0
  66. {isagellm_core-0.2.2.7 → isagellm_core-0.3.0.2}/src/isagellm_core.egg-info/top_level.txt +0 -0
  67. {isagellm_core-0.2.2.7 → isagellm_core-0.3.0.2}/src/sagellm_core/py.typed +0 -0
  68. {isagellm_core-0.2.2.7 → isagellm_core-0.3.0.2}/tests/test_ci_smoke.py +0 -0
  69. {isagellm_core-0.2.2.7 → isagellm_core-0.3.0.2}/tests/test_config.py +0 -0
  70. {isagellm_core-0.2.2.7 → isagellm_core-0.3.0.2}/tests/test_cpu_engine_contract.py +0 -0
  71. {isagellm_core-0.2.2.7 → isagellm_core-0.3.0.2}/tests/test_cpu_engine_error_handling.py +0 -0
  72. {isagellm_core-0.2.2.7 → isagellm_core-0.3.0.2}/tests/test_e2e_cpu_integration.py +0 -0
  73. {isagellm_core-0.2.2.7 → isagellm_core-0.3.0.2}/tests/test_engine_behavior_parity.py +0 -0
  74. {isagellm_core-0.2.2.7 → isagellm_core-0.3.0.2}/tests/test_engine_contract.py +0 -0
  75. {isagellm_core-0.2.2.7 → isagellm_core-0.3.0.2}/tests/test_engine_contract_simplified.py +0 -0
  76. {isagellm_core-0.2.2.7 → isagellm_core-0.3.0.2}/tests/test_pytorch_engine.py +0 -0
  77. {isagellm_core-0.2.2.7 → isagellm_core-0.3.0.2}/tests/test_task0_10_workload.py +0 -0
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: isagellm-core
3
- Version: 0.2.2.7
4
- Summary: sageLLM core runtime (config/observability/registries), plugin-ready
3
+ Version: 0.3.0.2
4
+ Summary: sageLLM core runtime with PD separation (MVP)
5
5
  Author: IntelliStream Team
6
6
  License: Proprietary - IntelliStream
7
7
  Classifier: Development Status :: 3 - Alpha
@@ -13,8 +13,10 @@ Requires-Python: ==3.11.*
13
13
  Description-Content-Type: text/markdown
14
14
  Requires-Dist: pydantic>=2.0.0
15
15
  Requires-Dist: pyyaml>=6.0.0
16
- Requires-Dist: isagellm-protocol<0.2.0,>=0.1.0
17
- Requires-Dist: isagellm-backend<0.3.0,>=0.2.0.0
16
+ Requires-Dist: isagellm-protocol<0.4.0,>=0.3.0.0
17
+ Requires-Dist: isagellm-backend<0.4.0,>=0.3.0.0
18
+ Requires-Dist: isagellm-comm<0.4.0,>=0.3.0.0
19
+ Requires-Dist: isagellm-kv-cache<0.4.0,>=0.3.0.0
18
20
  Provides-Extra: dev
19
21
  Requires-Dist: pytest>=7.0.0; extra == "dev"
20
22
  Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
@@ -23,6 +25,7 @@ Requires-Dist: ruff>=0.8.0; extra == "dev"
23
25
  Requires-Dist: mypy>=1.0.0; extra == "dev"
24
26
  Requires-Dist: types-PyYAML>=6.0.0; extra == "dev"
25
27
  Requires-Dist: pre-commit>=3.0.0; extra == "dev"
28
+ Requires-Dist: isage-pypi-publisher>=0.2.0; extra == "dev"
26
29
 
27
30
  # sagellm-core
28
31
 
@@ -4,8 +4,8 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "isagellm-core"
7
- version = "0.2.2.7"
8
- description = "sageLLM core runtime (config/observability/registries), plugin-ready"
7
+ version = "0.3.0.2"
8
+ description = "sageLLM core runtime with PD separation (MVP)"
9
9
  readme = "README.md"
10
10
  requires-python = "==3.11.*"
11
11
  authors = [{ name = "IntelliStream Team" }]
@@ -20,8 +20,10 @@ classifiers = [
20
20
  dependencies = [
21
21
  "pydantic>=2.0.0",
22
22
  "pyyaml>=6.0.0",
23
- "isagellm-protocol>=0.1.0,<0.2.0",
24
- "isagellm-backend>=0.2.0.0,<0.3.0",
23
+ "isagellm-protocol>=0.3.0.0,<0.4.0",
24
+ "isagellm-backend>=0.3.0.0,<0.4.0",
25
+ "isagellm-comm>=0.3.0.0,<0.4.0",
26
+ "isagellm-kv-cache>=0.3.0.0,<0.4.0",
25
27
  ]
26
28
 
27
29
  [project.optional-dependencies]
@@ -33,6 +35,7 @@ dev = [
33
35
  "mypy>=1.0.0",
34
36
  "types-PyYAML>=6.0.0",
35
37
  "pre-commit>=3.0.0",
38
+ "isage-pypi-publisher>=0.2.0",
36
39
  ]
37
40
 
38
41
  [tool.setuptools]
@@ -51,7 +54,9 @@ line-length = 100
51
54
 
52
55
  [project.entry-points."sagellm.engines"]
53
56
  cpu = "sagellm_core.engines.cpu:create_cpu_engine"
57
+ hf-cuda = "sagellm_core.engines.hf_cuda:create_hf_cuda_engine"
54
58
  pytorch = "sagellm_core.engines.pytorch:create_pytorch_engine"
59
+ ascend = "sagellm_core.engines.ascend:create_ascend_engine"
55
60
 
56
61
  [tool.mypy]
57
62
  python_version = "3.10"
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: isagellm-core
3
- Version: 0.2.2.7
4
- Summary: sageLLM core runtime (config/observability/registries), plugin-ready
3
+ Version: 0.3.0.2
4
+ Summary: sageLLM core runtime with PD separation (MVP)
5
5
  Author: IntelliStream Team
6
6
  License: Proprietary - IntelliStream
7
7
  Classifier: Development Status :: 3 - Alpha
@@ -13,8 +13,10 @@ Requires-Python: ==3.11.*
13
13
  Description-Content-Type: text/markdown
14
14
  Requires-Dist: pydantic>=2.0.0
15
15
  Requires-Dist: pyyaml>=6.0.0
16
- Requires-Dist: isagellm-protocol<0.2.0,>=0.1.0
17
- Requires-Dist: isagellm-backend<0.3.0,>=0.2.0.0
16
+ Requires-Dist: isagellm-protocol<0.4.0,>=0.3.0.0
17
+ Requires-Dist: isagellm-backend<0.4.0,>=0.3.0.0
18
+ Requires-Dist: isagellm-comm<0.4.0,>=0.3.0.0
19
+ Requires-Dist: isagellm-kv-cache<0.4.0,>=0.3.0.0
18
20
  Provides-Extra: dev
19
21
  Requires-Dist: pytest>=7.0.0; extra == "dev"
20
22
  Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
@@ -23,6 +25,7 @@ Requires-Dist: ruff>=0.8.0; extra == "dev"
23
25
  Requires-Dist: mypy>=1.0.0; extra == "dev"
24
26
  Requires-Dist: types-PyYAML>=6.0.0; extra == "dev"
25
27
  Requires-Dist: pre-commit>=3.0.0; extra == "dev"
28
+ Requires-Dist: isage-pypi-publisher>=0.2.0; extra == "dev"
26
29
 
27
30
  # sagellm-core
28
31
 
@@ -17,31 +17,40 @@ src/sagellm_core/engine.pyc
17
17
  src/sagellm_core/engine_factory.pyc
18
18
  src/sagellm_core/factory.pyc
19
19
  src/sagellm_core/health.pyc
20
+ src/sagellm_core/pd_executor.pyc
20
21
  src/sagellm_core/plugins.pyc
21
22
  src/sagellm_core/py.typed
22
23
  src/sagellm_core/runner.pyc
24
+ src/sagellm_core/runtime.pyc
23
25
  src/sagellm_core/workload.pyc
24
26
  src/sagellm_core/__pycache__/__init__.cpython-311.pyc
27
+ src/sagellm_core/__pycache__/base_engine.cpython-311.pyc
25
28
  src/sagellm_core/__pycache__/config.cpython-311.pyc
26
29
  src/sagellm_core/__pycache__/demo.cpython-311.pyc
27
30
  src/sagellm_core/__pycache__/engine.cpython-311.pyc
28
31
  src/sagellm_core/__pycache__/engine_factory.cpython-311.pyc
29
32
  src/sagellm_core/__pycache__/factory.cpython-311.pyc
30
33
  src/sagellm_core/__pycache__/health.cpython-311.pyc
34
+ src/sagellm_core/__pycache__/mock_engine.cpython-311.pyc
35
+ src/sagellm_core/__pycache__/pd_executor.cpython-311.pyc
31
36
  src/sagellm_core/__pycache__/plugins.cpython-311.pyc
32
37
  src/sagellm_core/__pycache__/runner.cpython-311.pyc
38
+ src/sagellm_core/__pycache__/runtime.cpython-311.pyc
33
39
  src/sagellm_core/__pycache__/workload.cpython-311.pyc
34
40
  src/sagellm_core/engines/__init__.py
35
41
  src/sagellm_core/engines/__init__.pyc
42
+ src/sagellm_core/engines/ascend.pyc
36
43
  src/sagellm_core/engines/cpu.pyc
37
44
  src/sagellm_core/engines/embedding.pyc
38
45
  src/sagellm_core/engines/hf_cuda.pyc
39
46
  src/sagellm_core/engines/pytorch.pyc
40
47
  src/sagellm_core/engines/pytorch_engine.pyc
41
48
  src/sagellm_core/engines/__pycache__/__init__.cpython-311.pyc
49
+ src/sagellm_core/engines/__pycache__/ascend.cpython-311.pyc
42
50
  src/sagellm_core/engines/__pycache__/cpu.cpython-311.pyc
43
51
  src/sagellm_core/engines/__pycache__/embedding.cpython-311.pyc
44
52
  src/sagellm_core/engines/__pycache__/hf_cuda.cpython-311.pyc
53
+ src/sagellm_core/engines/__pycache__/mock.cpython-311.pyc
45
54
  src/sagellm_core/engines/__pycache__/pytorch.cpython-311.pyc
46
55
  src/sagellm_core/engines/__pycache__/pytorch_engine.cpython-311.pyc
47
56
  tests/test_ci_smoke.py
@@ -53,5 +62,7 @@ tests/test_engine.py
53
62
  tests/test_engine_behavior_parity.py
54
63
  tests/test_engine_contract.py
55
64
  tests/test_engine_contract_simplified.py
65
+ tests/test_pd_separation.py
56
66
  tests/test_pytorch_engine.py
67
+ tests/test_streaming_pd.py
57
68
  tests/test_task0_10_workload.py
@@ -1,3 +1,5 @@
1
1
  [sagellm.engines]
2
+ ascend = sagellm_core.engines.ascend:create_ascend_engine
2
3
  cpu = sagellm_core.engines.cpu:create_cpu_engine
4
+ hf-cuda = sagellm_core.engines.hf_cuda:create_hf_cuda_engine
3
5
  pytorch = sagellm_core.engines.pytorch:create_pytorch_engine
@@ -0,0 +1,16 @@
1
+ pydantic>=2.0.0
2
+ pyyaml>=6.0.0
3
+ isagellm-protocol<0.4.0,>=0.3.0.0
4
+ isagellm-backend<0.4.0,>=0.3.0.0
5
+ isagellm-comm<0.4.0,>=0.3.0.0
6
+ isagellm-kv-cache<0.4.0,>=0.3.0.0
7
+
8
+ [dev]
9
+ pytest>=7.0.0
10
+ pytest-cov>=4.0.0
11
+ pytest-timeout>=2.0.0
12
+ ruff>=0.8.0
13
+ mypy>=1.0.0
14
+ types-PyYAML>=6.0.0
15
+ pre-commit>=3.0.0
16
+ isage-pypi-publisher>=0.2.0
@@ -6,6 +6,8 @@
6
6
  - Engine 工厂函数
7
7
  - 插件系统
8
8
  - Demo Runner
9
+ - 分布式 Runtime(PD 分离 MVP)
10
+ - PD 分离执行器
9
11
  """
10
12
 
11
13
  from __future__ import annotations
@@ -23,17 +25,24 @@ from sagellm_core.config import (
23
25
  from sagellm_core.demo import main as demo_main
24
26
  from sagellm_core.engine_factory import EngineFactory
25
27
  from sagellm_core.engines import (
28
+ AscendEngine,
29
+ AscendEngineConfig,
26
30
  CPUEngine,
27
31
  EmbeddingEngine,
28
32
  EmbeddingEngineConfig,
29
33
  HFCudaEngine,
30
34
  HFCudaEngineConfig,
35
+ create_ascend_engine,
31
36
  )
32
37
  from sagellm_core.factory import create_backend, create_engine
33
38
  from sagellm_core.health import HealthStatus
34
39
  from sagellm_core.plugins import PluginResolutionError, list_entry_points, resolve_kind
35
40
  from sagellm_core.runner import DemoRunner, RunnerContext
36
41
 
42
+ # PD 分离 MVP 模块
43
+ from sagellm_core.runtime import DistributedConfig, DistributedRuntime, RuntimeState
44
+ from sagellm_core.pd_executor import PDExecutionContext, PDSeparatedExecutor
45
+
37
46
  # PyTorch engine (optional, loaded lazily)
38
47
  PyTorchEngine = None
39
48
  PyTorchEngineConfig = None
@@ -61,6 +70,11 @@ try:
61
70
  except Exception:
62
71
  pass # torch or CUDA not available
63
72
 
73
+ try:
74
+ EngineFactory.register(AscendEngine)
75
+ except Exception:
76
+ pass # torch_npu or Ascend deps not available
77
+
64
78
  try:
65
79
  EngineFactory.register(EmbeddingEngine)
66
80
  except Exception:
@@ -72,7 +86,7 @@ try:
72
86
  except Exception:
73
87
  pass # torch or transformers not available
74
88
 
75
- __version__ = "0.2.2.7"
89
+ __version__ = "0.3.0.0"
76
90
 
77
91
  __all__ = [
78
92
  # Version
@@ -90,6 +104,9 @@ __all__ = [
90
104
  "EngineInstanceConfig", # For runtime engine instantiation
91
105
  "HealthStatus",
92
106
  # Engine implementations
107
+ "AscendEngine",
108
+ "AscendEngineConfig",
109
+ "create_ascend_engine",
93
110
  "CPUEngine",
94
111
  "HFCudaEngine",
95
112
  "HFCudaEngineConfig",
@@ -111,4 +128,10 @@ __all__ = [
111
128
  "demo_main",
112
129
  "DemoRunner",
113
130
  "RunnerContext",
131
+ # PD Separation MVP
132
+ "DistributedConfig",
133
+ "DistributedRuntime",
134
+ "RuntimeState",
135
+ "PDExecutionContext",
136
+ "PDSeparatedExecutor",
114
137
  ]
@@ -3,27 +3,39 @@
3
3
  This module provides built-in engine implementations:
4
4
  - CPUEngine: CPU-only inference with HuggingFace Transformers
5
5
  - HFCudaEngine: CUDA inference with HuggingFace Transformers
6
+ - AscendEngine: Huawei Ascend NPU (PyTorch-NPU)
6
7
  - PyTorchEngine: Unified PyTorch engine (auto-detects CUDA/NPU/CPU)
7
8
  - EmbeddingEngine: Embedding model inference
8
9
  """
9
10
 
10
11
  from __future__ import annotations
11
12
 
12
- from sagellm_core.engines.cpu import CPUEngine
13
+ from sagellm_core.engines.ascend import AscendEngine, AscendEngineConfig, create_ascend_engine
14
+ from sagellm_core.engines.cpu import CPUEngine, create_cpu_engine
13
15
  from sagellm_core.engines.embedding import EmbeddingEngine, EmbeddingEngineConfig
14
- from sagellm_core.engines.hf_cuda import HFCudaEngine, HFCudaEngineInstanceConfig
16
+ from sagellm_core.engines.hf_cuda import (
17
+ HFCudaEngine,
18
+ HFCudaEngineInstanceConfig,
19
+ create_hf_cuda_engine,
20
+ )
15
21
  from sagellm_core.engines.pytorch_engine import PyTorchEngine, create_pytorch_engine
16
22
 
17
23
  # Export aliases for backward compatibility
18
24
  HFCudaEngineConfig = HFCudaEngineInstanceConfig
19
25
 
20
26
  __all__ = [
27
+ # Ascend engine
28
+ "AscendEngine",
29
+ "AscendEngineConfig",
30
+ "create_ascend_engine",
21
31
  # CPU engine
22
32
  "CPUEngine",
33
+ "create_cpu_engine",
23
34
  # CUDA engine
24
35
  "HFCudaEngine",
25
36
  "HFCudaEngineConfig",
26
37
  "HFCudaEngineInstanceConfig",
38
+ "create_hf_cuda_engine",
27
39
  # PyTorch engine (unified)
28
40
  "PyTorchEngine",
29
41
  "create_pytorch_engine",
@@ -43,6 +43,28 @@ class TestCPUEngine(BaseEngine):
43
43
  async def health_check(self) -> bool:
44
44
  return self._is_running
45
45
 
46
+ async def prefill(self, request: Request) -> dict:
47
+ """Minimal prefill implementation for testing."""
48
+ if not self._is_running:
49
+ raise RuntimeError("not running")
50
+ return {
51
+ "kv_handle": {"test": "handle"},
52
+ "num_tokens": len(request.prompt.split()) if request.prompt else 0,
53
+ "first_token_id": 1,
54
+ }
55
+
56
+ async def decode(self, request: Request, kv_handle=None, max_new_tokens=None) -> dict:
57
+ """Minimal decode implementation for testing."""
58
+ if not self._is_running:
59
+ raise RuntimeError("not running")
60
+ num_tokens = max_new_tokens or request.max_tokens
61
+ return {
62
+ "output_tokens": [1, 2, 3][:num_tokens],
63
+ "output_text": "test output",
64
+ "finish_reason": "stop",
65
+ "num_tokens": min(3, num_tokens),
66
+ }
67
+
46
68
  async def execute(self, request: Request) -> Response:
47
69
  if not self._is_running:
48
70
  raise RuntimeError("not running")
@@ -0,0 +1,207 @@
1
+ """测试 PD 分离功能
2
+
3
+ 验证 CPUEngine 的 prefill() 和 decode() 方法是否正常工作。
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import asyncio
9
+ import logging
10
+
11
+ import pytest
12
+
13
+ from sagellm_protocol.types import Request
14
+ from sagellm_core import PDSeparatedExecutor, DistributedRuntime, create_backend, create_engine
15
+ from sagellm_core.config import EngineConfig
16
+
17
+ logging.basicConfig(level=logging.INFO)
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ @pytest.mark.asyncio
22
+ async def test_cpu_engine_prefill():
23
+ """测试 CPUEngine 的 prefill 方法"""
24
+ # 创建引擎 - 使用轻量级模型(约 50MB,快速下载)
25
+ backend = create_backend({"kind": "cpu"})
26
+ config = EngineConfig(
27
+ kind="cpu",
28
+ model="sshleifer/tiny-gpt2", # 轻量级测试模型
29
+ device="cpu",
30
+ )
31
+ engine = create_engine(config, backend)
32
+
33
+ try:
34
+ await engine.start()
35
+
36
+ # 创建请求
37
+ request = Request(
38
+ request_id="req-prefill-001",
39
+ trace_id="trace-001",
40
+ model="sshleifer/tiny-gpt2",
41
+ prompt="Hello, how are you?",
42
+ max_tokens=10,
43
+ stream=False,
44
+ )
45
+
46
+ # 执行 Prefill
47
+ result = await engine.prefill(request)
48
+
49
+ # 验证结果
50
+ assert "kv_handle" in result
51
+ assert "num_tokens" in result
52
+ assert result["num_tokens"] > 0
53
+ assert "first_token_id" in result
54
+
55
+ logger.info(f"✓ Prefill completed: {result['num_tokens']} tokens processed")
56
+
57
+ finally:
58
+ await engine.stop()
59
+
60
+
61
+ @pytest.mark.asyncio
62
+ async def test_cpu_engine_decode():
63
+ """测试 CPUEngine 的 decode 方法"""
64
+ backend = create_backend({"kind": "cpu"})
65
+ config = EngineConfig(
66
+ kind="cpu",
67
+ model="sshleifer/tiny-gpt2",
68
+ device="cpu",
69
+ )
70
+ engine = create_engine(config, backend)
71
+
72
+ try:
73
+ await engine.start()
74
+
75
+ # 先执行 Prefill
76
+ request = Request(
77
+ request_id="req-decode-001",
78
+ trace_id="trace-001",
79
+ model="sshleifer/tiny-gpt2",
80
+ prompt="Hello!",
81
+ max_tokens=10,
82
+ stream=False,
83
+ )
84
+
85
+ prefill_result = await engine.prefill(request)
86
+ kv_handle = prefill_result["kv_handle"]
87
+
88
+ # 执行 Decode
89
+ decode_result = await engine.decode(request, kv_handle=kv_handle, max_new_tokens=10)
90
+
91
+ # 验证结果
92
+ assert "output_tokens" in decode_result
93
+ assert "output_text" in decode_result
94
+ assert "finish_reason" in decode_result
95
+ assert len(decode_result["output_tokens"]) > 0
96
+
97
+ logger.info(
98
+ f"✓ Decode completed: {len(decode_result['output_tokens'])} tokens generated, "
99
+ f"text='{decode_result['output_text']}'"
100
+ )
101
+
102
+ finally:
103
+ await engine.stop()
104
+
105
+
106
+ @pytest.mark.asyncio
107
+ async def test_pd_executor_hybrid():
108
+ """测试 PDSeparatedExecutor 的 Hybrid 模式"""
109
+ # 初始化 Runtime
110
+ runtime = DistributedRuntime()
111
+ await runtime.initialize()
112
+
113
+ # 创建引擎
114
+ backend = create_backend({"kind": "cpu"})
115
+ config = EngineConfig(
116
+ kind="cpu",
117
+ model="sshleifer/tiny-gpt2",
118
+ device="cpu",
119
+ )
120
+ engine = create_engine(config, backend)
121
+
122
+ try:
123
+ await engine.start()
124
+
125
+ # 创建 PD Executor
126
+ executor = PDSeparatedExecutor(engine=engine, runtime=runtime)
127
+
128
+ # 创建 Hybrid 请求
129
+ request = Request(
130
+ request_id="req-hybrid-001",
131
+ trace_id="trace-001",
132
+ model="sshleifer/tiny-gpt2",
133
+ prompt="Hello!",
134
+ max_tokens=10,
135
+ stream=False,
136
+ phase="decode", # hybrid mode
137
+ )
138
+
139
+ # 执行
140
+ response = await executor.execute(request)
141
+
142
+ # 验证结果
143
+ assert response.request_id == "req-hybrid-001"
144
+ assert response.metrics.prefill_ms > 0
145
+ assert response.metrics.decode_ms > 0
146
+ assert len(response.output_text) > 0
147
+
148
+ logger.info("✓ Hybrid execution completed:")
149
+ logger.info(f" - Prefill: {response.metrics.prefill_ms:.2f} ms")
150
+ logger.info(f" - Decode: {response.metrics.decode_ms:.2f} ms")
151
+ logger.info(f" - Output: '{response.output_text}'")
152
+
153
+ finally:
154
+ await engine.stop()
155
+ await runtime.shutdown()
156
+
157
+
158
+ @pytest.mark.asyncio
159
+ async def test_pd_executor_prefill_only():
160
+ """测试 PDSeparatedExecutor 的 Prefill-Only 模式"""
161
+ runtime = DistributedRuntime()
162
+ await runtime.initialize()
163
+
164
+ backend = create_backend({"kind": "cpu"})
165
+ config = EngineConfig(
166
+ kind="cpu",
167
+ model="sshleifer/tiny-gpt2",
168
+ device="cpu",
169
+ )
170
+ engine = create_engine(config, backend)
171
+
172
+ try:
173
+ await engine.start()
174
+ executor = PDSeparatedExecutor(engine=engine, runtime=runtime)
175
+
176
+ # Prefill-Only 请求
177
+ request = Request(
178
+ request_id="req-prefill-only-001",
179
+ trace_id="trace-001",
180
+ model="sshleifer/tiny-gpt2",
181
+ prompt="Hello!",
182
+ max_tokens=1, # 最小值(但实际只做 prefill)
183
+ stream=False,
184
+ phase="prefill",
185
+ )
186
+
187
+ response = await executor.execute(request)
188
+
189
+ # 验证:只有 Prefill,没有 Decode(或 Decode 很少)
190
+ assert response.metrics.prefill_ms > 0
191
+ # decode_ms 可能为 0(prefill-only)或很小(生成了 1 token)
192
+
193
+ logger.info("✓ Prefill-Only execution completed:")
194
+ logger.info(f" - Prefill: {response.metrics.prefill_ms:.2f} ms")
195
+
196
+ finally:
197
+ await engine.stop()
198
+ await runtime.shutdown()
199
+
200
+
201
+ if __name__ == "__main__":
202
+ # 运行测试
203
+ asyncio.run(test_cpu_engine_prefill())
204
+ asyncio.run(test_cpu_engine_decode())
205
+ asyncio.run(test_pd_executor_hybrid())
206
+ asyncio.run(test_pd_executor_prefill_only())
207
+ print("\n✅ All PD separation tests passed!")
@@ -0,0 +1,186 @@
1
+ """Tests for Streaming PD Separation.
2
+
3
+ 测试流式 Prefill-Decode 分离功能。
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import pytest
9
+ from sagellm_protocol.types import Request
10
+
11
+ from sagellm_core.engines.cpu import CPUEngine, CPUEngineConfig
12
+ from sagellm_core.pd_executor import PDSeparatedExecutor
13
+ from sagellm_core.runtime import DistributedRuntime
14
+
15
+
16
+ @pytest.mark.asyncio
17
+ async def test_streaming_pd_separation():
18
+ """测试流式 PD 分离:验证事件顺序和 TTFT 在首 token"""
19
+ config = CPUEngineConfig(
20
+ engine_id="cpu-engine-stream-001",
21
+ model_path="sshleifer/tiny-gpt2",
22
+ torch_dtype="float32",
23
+ trust_remote_code=True,
24
+ )
25
+ engine = CPUEngine(config)
26
+ runtime = DistributedRuntime()
27
+
28
+ try:
29
+ await engine.start()
30
+ await runtime.initialize()
31
+
32
+ executor = PDSeparatedExecutor(engine=engine, runtime=runtime)
33
+
34
+ request = Request(
35
+ request_id="test-stream-001",
36
+ trace_id="trace-stream-001",
37
+ model="sshleifer/tiny-gpt2",
38
+ prompt="Once upon a time",
39
+ max_tokens=5,
40
+ stream=True,
41
+ )
42
+
43
+ events = []
44
+ async for event in executor.stream(request):
45
+ events.append(event)
46
+
47
+ # Verify Event Sequence
48
+ # ═════════════════════════════════════════════════════════════════
49
+ assert len(events) >= 3, "Should have at least start, delta, end"
50
+
51
+ # 1. Start Event
52
+ assert events[0].event == "start"
53
+ assert events[0].request_id == "test-stream-001"
54
+ assert events[0].trace_id == "trace-stream-001"
55
+
56
+ # 2. Delta Events (至少一个)
57
+ delta_events = [e for e in events if e.event == "delta"]
58
+ assert len(delta_events) > 0, "Should have at least one delta event"
59
+
60
+ # 每个 delta 应包含 chunk 和 chunk_tokens
61
+ for delta in delta_events:
62
+ assert hasattr(delta, "chunk"), "Delta should have chunk"
63
+ assert hasattr(delta, "chunk_tokens"), "Delta should have chunk_tokens"
64
+
65
+ # 3. End Event
66
+ assert events[-1].event == "end"
67
+ assert events[-1].finish_reason in ["stop", "length", "error"]
68
+ assert events[-1].metrics is not None, "End event should contain final metrics"
69
+
70
+ # Verify PD Metrics
71
+ # ═════════════════════════════════════════════════════════════════
72
+ final_metrics = events[-1].metrics
73
+ assert final_metrics.ttft_ms > 0, "TTFT should be positive"
74
+ assert final_metrics.prefill_ms > 0, "Prefill time should be positive"
75
+ assert final_metrics.decode_ms > 0, "Decode time should be positive"
76
+
77
+ # TBT 应该是 decode 平均时间
78
+ assert final_metrics.tbt_ms > 0, "TBT should be positive"
79
+
80
+ finally:
81
+ await engine.stop()
82
+ await runtime.shutdown()
83
+
84
+
85
+ @pytest.mark.asyncio
86
+ async def test_streaming_pd_event_content():
87
+ """测试流式事件内容:delta text 和 tokens"""
88
+ config = CPUEngineConfig(
89
+ engine_id="cpu-engine-stream-002",
90
+ model_path="sshleifer/tiny-gpt2",
91
+ torch_dtype="float32",
92
+ trust_remote_code=True,
93
+ )
94
+ engine = CPUEngine(config)
95
+ runtime = DistributedRuntime()
96
+
97
+ try:
98
+ await engine.start()
99
+ await runtime.initialize()
100
+
101
+ executor = PDSeparatedExecutor(engine=engine, runtime=runtime)
102
+
103
+ request = Request(
104
+ request_id="test-stream-002",
105
+ trace_id="trace-stream-002",
106
+ model="sshleifer/tiny-gpt2",
107
+ prompt="Hello world",
108
+ max_tokens=3,
109
+ stream=True,
110
+ )
111
+
112
+ events = []
113
+ async for event in executor.stream(request):
114
+ events.append(event)
115
+
116
+ # ═════════════════════════════════════════════════════════════════
117
+ # Verify Delta Content
118
+ # ═════════════════════════════════════════════════════════════════
119
+ delta_events = [e for e in events if e.event == "delta"]
120
+
121
+ for delta in delta_events:
122
+ # 每个 delta 应包含 chunk 和 chunk_tokens
123
+ assert hasattr(delta, "chunk"), "Delta should have chunk"
124
+ assert hasattr(delta, "chunk_tokens"), "Delta should have chunk_tokens"
125
+
126
+ # ═════════════════════════════════════════════════════════════════
127
+ # Verify End Content
128
+ # ═════════════════════════════════════════════════════════════════
129
+ end_event = events[-1]
130
+ assert end_event.output_text, "End event should contain full output text"
131
+ assert end_event.output_tokens, "End event should contain output tokens"
132
+
133
+ finally:
134
+ await engine.stop()
135
+ await runtime.shutdown()
136
+
137
+
138
+ @pytest.mark.asyncio
139
+ async def test_streaming_pd_metrics_consistency():
140
+ """测试流式 PD 指标一致性:首 delta 和 end 的 TTFT 应相同"""
141
+ config = CPUEngineConfig(
142
+ engine_id="cpu-engine-stream-003",
143
+ model_path="sshleifer/tiny-gpt2",
144
+ torch_dtype="float32",
145
+ trust_remote_code=True,
146
+ )
147
+ engine = CPUEngine(config)
148
+ runtime = DistributedRuntime()
149
+
150
+ try:
151
+ await engine.start()
152
+ await runtime.initialize()
153
+
154
+ executor = PDSeparatedExecutor(engine=engine, runtime=runtime)
155
+
156
+ request = Request(
157
+ request_id="test-stream-003",
158
+ trace_id="trace-stream-003",
159
+ model="sshleifer/tiny-gpt2",
160
+ prompt="Testing metrics",
161
+ max_tokens=4,
162
+ stream=True,
163
+ )
164
+
165
+ events = []
166
+ async for event in executor.stream(request):
167
+ events.append(event)
168
+
169
+ # ═════════════════════════════════════════════════════════════════
170
+ # Extract Metrics
171
+ # ═════════════════════════════════════════════════════════════════
172
+ end_metrics = events[-1].metrics
173
+
174
+ # Verify TTFT Consistency
175
+ # ═════════════════════════════════════════════════════════════════
176
+ assert end_metrics is not None
177
+
178
+ # TTFT 应相同(首 token 延迟不变)
179
+ assert end_metrics.ttft_ms > 0, "TTFT should be positive"
180
+
181
+ # Prefill 时间应相同
182
+ assert end_metrics.prefill_ms > 0, "Prefill time should be consistent"
183
+
184
+ finally:
185
+ await engine.stop()
186
+ await runtime.shutdown()
@@ -1,13 +0,0 @@
1
- pydantic>=2.0.0
2
- pyyaml>=6.0.0
3
- isagellm-protocol<0.2.0,>=0.1.0
4
- isagellm-backend<0.3.0,>=0.2.0.0
5
-
6
- [dev]
7
- pytest>=7.0.0
8
- pytest-cov>=4.0.0
9
- pytest-timeout>=2.0.0
10
- ruff>=0.8.0
11
- mypy>=1.0.0
12
- types-PyYAML>=6.0.0
13
- pre-commit>=3.0.0