massgen 0.0.3__py3-none-any.whl → 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of massgen might be problematic. Click here for more details.
- massgen/__init__.py +142 -8
- massgen/adapters/__init__.py +29 -0
- massgen/adapters/ag2_adapter.py +483 -0
- massgen/adapters/base.py +183 -0
- massgen/adapters/tests/__init__.py +0 -0
- massgen/adapters/tests/test_ag2_adapter.py +439 -0
- massgen/adapters/tests/test_agent_adapter.py +128 -0
- massgen/adapters/utils/__init__.py +2 -0
- massgen/adapters/utils/ag2_utils.py +236 -0
- massgen/adapters/utils/tests/__init__.py +0 -0
- massgen/adapters/utils/tests/test_ag2_utils.py +138 -0
- massgen/agent_config.py +329 -55
- massgen/api_params_handler/__init__.py +10 -0
- massgen/api_params_handler/_api_params_handler_base.py +99 -0
- massgen/api_params_handler/_chat_completions_api_params_handler.py +176 -0
- massgen/api_params_handler/_claude_api_params_handler.py +113 -0
- massgen/api_params_handler/_response_api_params_handler.py +130 -0
- massgen/backend/__init__.py +39 -4
- massgen/backend/azure_openai.py +385 -0
- massgen/backend/base.py +341 -69
- massgen/backend/base_with_mcp.py +1102 -0
- massgen/backend/capabilities.py +386 -0
- massgen/backend/chat_completions.py +577 -130
- massgen/backend/claude.py +1033 -537
- massgen/backend/claude_code.py +1203 -0
- massgen/backend/cli_base.py +209 -0
- massgen/backend/docs/BACKEND_ARCHITECTURE.md +126 -0
- massgen/backend/{CLAUDE_API_RESEARCH.md → docs/CLAUDE_API_RESEARCH.md} +18 -18
- massgen/backend/{GEMINI_API_DOCUMENTATION.md → docs/GEMINI_API_DOCUMENTATION.md} +9 -9
- massgen/backend/docs/Gemini MCP Integration Analysis.md +1050 -0
- massgen/backend/docs/MCP_IMPLEMENTATION_CLAUDE_BACKEND.md +177 -0
- massgen/backend/docs/MCP_INTEGRATION_RESPONSE_BACKEND.md +352 -0
- massgen/backend/docs/OPENAI_GPT5_MODELS.md +211 -0
- massgen/backend/{OPENAI_RESPONSES_API_FORMAT.md → docs/OPENAI_RESPONSE_API_TOOL_CALLS.md} +3 -3
- massgen/backend/docs/OPENAI_response_streaming.md +20654 -0
- massgen/backend/docs/inference_backend.md +257 -0
- massgen/backend/docs/permissions_and_context_files.md +1085 -0
- massgen/backend/external.py +126 -0
- massgen/backend/gemini.py +1850 -241
- massgen/backend/grok.py +40 -156
- massgen/backend/inference.py +156 -0
- massgen/backend/lmstudio.py +171 -0
- massgen/backend/response.py +1095 -322
- massgen/chat_agent.py +131 -113
- massgen/cli.py +1560 -275
- massgen/config_builder.py +2396 -0
- massgen/configs/BACKEND_CONFIGURATION.md +458 -0
- massgen/configs/README.md +559 -216
- massgen/configs/ag2/ag2_case_study.yaml +27 -0
- massgen/configs/ag2/ag2_coder.yaml +34 -0
- massgen/configs/ag2/ag2_coder_case_study.yaml +36 -0
- massgen/configs/ag2/ag2_gemini.yaml +27 -0
- massgen/configs/ag2/ag2_groupchat.yaml +108 -0
- massgen/configs/ag2/ag2_groupchat_gpt.yaml +118 -0
- massgen/configs/ag2/ag2_single_agent.yaml +21 -0
- massgen/configs/basic/multi/fast_timeout_example.yaml +37 -0
- massgen/configs/basic/multi/gemini_4o_claude.yaml +31 -0
- massgen/configs/basic/multi/gemini_gpt5nano_claude.yaml +36 -0
- massgen/configs/{gemini_4o_claude.yaml → basic/multi/geminicode_4o_claude.yaml} +3 -3
- massgen/configs/basic/multi/geminicode_gpt5nano_claude.yaml +36 -0
- massgen/configs/basic/multi/glm_gemini_claude.yaml +25 -0
- massgen/configs/basic/multi/gpt4o_audio_generation.yaml +30 -0
- massgen/configs/basic/multi/gpt4o_image_generation.yaml +31 -0
- massgen/configs/basic/multi/gpt5nano_glm_qwen.yaml +26 -0
- massgen/configs/basic/multi/gpt5nano_image_understanding.yaml +26 -0
- massgen/configs/{three_agents_default.yaml → basic/multi/three_agents_default.yaml} +8 -4
- massgen/configs/basic/multi/three_agents_opensource.yaml +27 -0
- massgen/configs/basic/multi/three_agents_vllm.yaml +20 -0
- massgen/configs/basic/multi/two_agents_gemini.yaml +19 -0
- massgen/configs/{two_agents.yaml → basic/multi/two_agents_gpt5.yaml} +14 -6
- massgen/configs/basic/multi/two_agents_opensource_lmstudio.yaml +31 -0
- massgen/configs/basic/multi/two_qwen_vllm_sglang.yaml +28 -0
- massgen/configs/{single_agent.yaml → basic/single/single_agent.yaml} +1 -1
- massgen/configs/{single_flash2.5.yaml → basic/single/single_flash2.5.yaml} +1 -2
- massgen/configs/basic/single/single_gemini2.5pro.yaml +16 -0
- massgen/configs/basic/single/single_gpt4o_audio_generation.yaml +22 -0
- massgen/configs/basic/single/single_gpt4o_image_generation.yaml +22 -0
- massgen/configs/basic/single/single_gpt4o_video_generation.yaml +24 -0
- massgen/configs/basic/single/single_gpt5nano.yaml +20 -0
- massgen/configs/basic/single/single_gpt5nano_file_search.yaml +18 -0
- massgen/configs/basic/single/single_gpt5nano_image_understanding.yaml +17 -0
- massgen/configs/basic/single/single_gptoss120b.yaml +15 -0
- massgen/configs/basic/single/single_openrouter_audio_understanding.yaml +15 -0
- massgen/configs/basic/single/single_qwen_video_understanding.yaml +15 -0
- massgen/configs/debug/code_execution/command_filtering_blacklist.yaml +29 -0
- massgen/configs/debug/code_execution/command_filtering_whitelist.yaml +28 -0
- massgen/configs/debug/code_execution/docker_verification.yaml +29 -0
- massgen/configs/debug/skip_coordination_test.yaml +27 -0
- massgen/configs/debug/test_sdk_migration.yaml +17 -0
- massgen/configs/docs/DISCORD_MCP_SETUP.md +208 -0
- massgen/configs/docs/TWITTER_MCP_ENESCINAR_SETUP.md +82 -0
- massgen/configs/providers/azure/azure_openai_multi.yaml +21 -0
- massgen/configs/providers/azure/azure_openai_single.yaml +19 -0
- massgen/configs/providers/claude/claude.yaml +14 -0
- massgen/configs/providers/gemini/gemini_gpt5nano.yaml +28 -0
- massgen/configs/providers/local/lmstudio.yaml +11 -0
- massgen/configs/providers/openai/gpt5.yaml +46 -0
- massgen/configs/providers/openai/gpt5_nano.yaml +46 -0
- massgen/configs/providers/others/grok_single_agent.yaml +19 -0
- massgen/configs/providers/others/zai_coding_team.yaml +108 -0
- massgen/configs/providers/others/zai_glm45.yaml +12 -0
- massgen/configs/{creative_team.yaml → teams/creative/creative_team.yaml} +16 -6
- massgen/configs/{travel_planning.yaml → teams/creative/travel_planning.yaml} +16 -6
- massgen/configs/{news_analysis.yaml → teams/research/news_analysis.yaml} +16 -6
- massgen/configs/{research_team.yaml → teams/research/research_team.yaml} +15 -7
- massgen/configs/{technical_analysis.yaml → teams/research/technical_analysis.yaml} +16 -6
- massgen/configs/tools/code-execution/basic_command_execution.yaml +25 -0
- massgen/configs/tools/code-execution/code_execution_use_case_simple.yaml +41 -0
- massgen/configs/tools/code-execution/docker_claude_code.yaml +32 -0
- massgen/configs/tools/code-execution/docker_multi_agent.yaml +32 -0
- massgen/configs/tools/code-execution/docker_simple.yaml +29 -0
- massgen/configs/tools/code-execution/docker_with_resource_limits.yaml +32 -0
- massgen/configs/tools/code-execution/multi_agent_playwright_automation.yaml +57 -0
- massgen/configs/tools/filesystem/cc_gpt5_gemini_filesystem.yaml +34 -0
- massgen/configs/tools/filesystem/claude_code_context_sharing.yaml +68 -0
- massgen/configs/tools/filesystem/claude_code_flash2.5.yaml +43 -0
- massgen/configs/tools/filesystem/claude_code_flash2.5_gptoss.yaml +49 -0
- massgen/configs/tools/filesystem/claude_code_gpt5nano.yaml +31 -0
- massgen/configs/tools/filesystem/claude_code_single.yaml +40 -0
- massgen/configs/tools/filesystem/fs_permissions_test.yaml +87 -0
- massgen/configs/tools/filesystem/gemini_gemini_workspace_cleanup.yaml +54 -0
- massgen/configs/tools/filesystem/gemini_gpt5_filesystem_casestudy.yaml +30 -0
- massgen/configs/tools/filesystem/gemini_gpt5nano_file_context_path.yaml +43 -0
- massgen/configs/tools/filesystem/gemini_gpt5nano_protected_paths.yaml +45 -0
- massgen/configs/tools/filesystem/gpt5mini_cc_fs_context_path.yaml +31 -0
- massgen/configs/tools/filesystem/grok4_gpt5_gemini_filesystem.yaml +32 -0
- massgen/configs/tools/filesystem/multiturn/grok4_gpt5_claude_code_filesystem_multiturn.yaml +58 -0
- massgen/configs/tools/filesystem/multiturn/grok4_gpt5_gemini_filesystem_multiturn.yaml +58 -0
- massgen/configs/tools/filesystem/multiturn/two_claude_code_filesystem_multiturn.yaml +47 -0
- massgen/configs/tools/filesystem/multiturn/two_gemini_flash_filesystem_multiturn.yaml +48 -0
- massgen/configs/tools/mcp/claude_code_discord_mcp_example.yaml +27 -0
- massgen/configs/tools/mcp/claude_code_simple_mcp.yaml +35 -0
- massgen/configs/tools/mcp/claude_code_twitter_mcp_example.yaml +32 -0
- massgen/configs/tools/mcp/claude_mcp_example.yaml +24 -0
- massgen/configs/tools/mcp/claude_mcp_test.yaml +27 -0
- massgen/configs/tools/mcp/five_agents_travel_mcp_test.yaml +157 -0
- massgen/configs/tools/mcp/five_agents_weather_mcp_test.yaml +103 -0
- massgen/configs/tools/mcp/gemini_mcp_example.yaml +24 -0
- massgen/configs/tools/mcp/gemini_mcp_filesystem_test.yaml +23 -0
- massgen/configs/tools/mcp/gemini_mcp_filesystem_test_sharing.yaml +23 -0
- massgen/configs/tools/mcp/gemini_mcp_filesystem_test_single_agent.yaml +17 -0
- massgen/configs/tools/mcp/gemini_mcp_filesystem_test_with_claude_code.yaml +24 -0
- massgen/configs/tools/mcp/gemini_mcp_test.yaml +27 -0
- massgen/configs/tools/mcp/gemini_notion_mcp.yaml +52 -0
- massgen/configs/tools/mcp/gpt5_nano_mcp_example.yaml +24 -0
- massgen/configs/tools/mcp/gpt5_nano_mcp_test.yaml +27 -0
- massgen/configs/tools/mcp/gpt5mini_claude_code_discord_mcp_example.yaml +38 -0
- massgen/configs/tools/mcp/gpt_oss_mcp_example.yaml +25 -0
- massgen/configs/tools/mcp/gpt_oss_mcp_test.yaml +28 -0
- massgen/configs/tools/mcp/grok3_mini_mcp_example.yaml +24 -0
- massgen/configs/tools/mcp/grok3_mini_mcp_test.yaml +27 -0
- massgen/configs/tools/mcp/multimcp_gemini.yaml +111 -0
- massgen/configs/tools/mcp/qwen_api_mcp_example.yaml +25 -0
- massgen/configs/tools/mcp/qwen_api_mcp_test.yaml +28 -0
- massgen/configs/tools/mcp/qwen_local_mcp_example.yaml +24 -0
- massgen/configs/tools/mcp/qwen_local_mcp_test.yaml +27 -0
- massgen/configs/tools/planning/five_agents_discord_mcp_planning_mode.yaml +140 -0
- massgen/configs/tools/planning/five_agents_filesystem_mcp_planning_mode.yaml +151 -0
- massgen/configs/tools/planning/five_agents_notion_mcp_planning_mode.yaml +151 -0
- massgen/configs/tools/planning/five_agents_twitter_mcp_planning_mode.yaml +155 -0
- massgen/configs/tools/planning/gpt5_mini_case_study_mcp_planning_mode.yaml +73 -0
- massgen/configs/tools/web-search/claude_streamable_http_test.yaml +43 -0
- massgen/configs/tools/web-search/gemini_streamable_http_test.yaml +43 -0
- massgen/configs/tools/web-search/gpt5_mini_streamable_http_test.yaml +43 -0
- massgen/configs/tools/web-search/gpt_oss_streamable_http_test.yaml +44 -0
- massgen/configs/tools/web-search/grok3_mini_streamable_http_test.yaml +43 -0
- massgen/configs/tools/web-search/qwen_api_streamable_http_test.yaml +44 -0
- massgen/configs/tools/web-search/qwen_local_streamable_http_test.yaml +43 -0
- massgen/coordination_tracker.py +708 -0
- massgen/docker/README.md +462 -0
- massgen/filesystem_manager/__init__.py +21 -0
- massgen/filesystem_manager/_base.py +9 -0
- massgen/filesystem_manager/_code_execution_server.py +545 -0
- massgen/filesystem_manager/_docker_manager.py +477 -0
- massgen/filesystem_manager/_file_operation_tracker.py +248 -0
- massgen/filesystem_manager/_filesystem_manager.py +813 -0
- massgen/filesystem_manager/_path_permission_manager.py +1261 -0
- massgen/filesystem_manager/_workspace_tools_server.py +1815 -0
- massgen/formatter/__init__.py +10 -0
- massgen/formatter/_chat_completions_formatter.py +284 -0
- massgen/formatter/_claude_formatter.py +235 -0
- massgen/formatter/_formatter_base.py +156 -0
- massgen/formatter/_response_formatter.py +263 -0
- massgen/frontend/__init__.py +1 -2
- massgen/frontend/coordination_ui.py +471 -286
- massgen/frontend/displays/base_display.py +56 -11
- massgen/frontend/displays/create_coordination_table.py +1956 -0
- massgen/frontend/displays/rich_terminal_display.py +1259 -619
- massgen/frontend/displays/simple_display.py +9 -4
- massgen/frontend/displays/terminal_display.py +27 -68
- massgen/logger_config.py +681 -0
- massgen/mcp_tools/README.md +232 -0
- massgen/mcp_tools/__init__.py +105 -0
- massgen/mcp_tools/backend_utils.py +1035 -0
- massgen/mcp_tools/circuit_breaker.py +195 -0
- massgen/mcp_tools/client.py +894 -0
- massgen/mcp_tools/config_validator.py +138 -0
- massgen/mcp_tools/docs/circuit_breaker.md +646 -0
- massgen/mcp_tools/docs/client.md +950 -0
- massgen/mcp_tools/docs/config_validator.md +478 -0
- massgen/mcp_tools/docs/exceptions.md +1165 -0
- massgen/mcp_tools/docs/security.md +854 -0
- massgen/mcp_tools/exceptions.py +338 -0
- massgen/mcp_tools/hooks.py +212 -0
- massgen/mcp_tools/security.py +780 -0
- massgen/message_templates.py +342 -64
- massgen/orchestrator.py +1515 -241
- massgen/stream_chunk/__init__.py +35 -0
- massgen/stream_chunk/base.py +92 -0
- massgen/stream_chunk/multimodal.py +237 -0
- massgen/stream_chunk/text.py +162 -0
- massgen/tests/mcp_test_server.py +150 -0
- massgen/tests/multi_turn_conversation_design.md +0 -8
- massgen/tests/test_azure_openai_backend.py +156 -0
- massgen/tests/test_backend_capabilities.py +262 -0
- massgen/tests/test_backend_event_loop_all.py +179 -0
- massgen/tests/test_chat_completions_refactor.py +142 -0
- massgen/tests/test_claude_backend.py +15 -28
- massgen/tests/test_claude_code.py +268 -0
- massgen/tests/test_claude_code_context_sharing.py +233 -0
- massgen/tests/test_claude_code_orchestrator.py +175 -0
- massgen/tests/test_cli_backends.py +180 -0
- massgen/tests/test_code_execution.py +679 -0
- massgen/tests/test_external_agent_backend.py +134 -0
- massgen/tests/test_final_presentation_fallback.py +237 -0
- massgen/tests/test_gemini_planning_mode.py +351 -0
- massgen/tests/test_grok_backend.py +7 -10
- massgen/tests/test_http_mcp_server.py +42 -0
- massgen/tests/test_integration_simple.py +198 -0
- massgen/tests/test_mcp_blocking.py +125 -0
- massgen/tests/test_message_context_building.py +29 -47
- massgen/tests/test_orchestrator_final_presentation.py +48 -0
- massgen/tests/test_path_permission_manager.py +2087 -0
- massgen/tests/test_rich_terminal_display.py +14 -13
- massgen/tests/test_timeout.py +133 -0
- massgen/tests/test_v3_3agents.py +11 -12
- massgen/tests/test_v3_simple.py +8 -13
- massgen/tests/test_v3_three_agents.py +11 -18
- massgen/tests/test_v3_two_agents.py +8 -13
- massgen/token_manager/__init__.py +7 -0
- massgen/token_manager/token_manager.py +400 -0
- massgen/utils.py +52 -16
- massgen/v1/agent.py +45 -91
- massgen/v1/agents.py +18 -53
- massgen/v1/backends/gemini.py +50 -153
- massgen/v1/backends/grok.py +21 -54
- massgen/v1/backends/oai.py +39 -111
- massgen/v1/cli.py +36 -93
- massgen/v1/config.py +8 -12
- massgen/v1/logging.py +43 -127
- massgen/v1/main.py +18 -32
- massgen/v1/orchestrator.py +68 -209
- massgen/v1/streaming_display.py +62 -163
- massgen/v1/tools.py +8 -12
- massgen/v1/types.py +9 -23
- massgen/v1/utils.py +5 -23
- massgen-0.1.0.dist-info/METADATA +1245 -0
- massgen-0.1.0.dist-info/RECORD +273 -0
- massgen-0.1.0.dist-info/entry_points.txt +2 -0
- massgen/frontend/logging/__init__.py +0 -9
- massgen/frontend/logging/realtime_logger.py +0 -197
- massgen-0.0.3.dist-info/METADATA +0 -568
- massgen-0.0.3.dist-info/RECORD +0 -76
- massgen-0.0.3.dist-info/entry_points.txt +0 -2
- /massgen/backend/{Function calling openai responses.md → docs/Function calling openai responses.md} +0 -0
- {massgen-0.0.3.dist-info → massgen-0.1.0.dist-info}/WHEEL +0 -0
- {massgen-0.0.3.dist-info → massgen-0.1.0.dist-info}/licenses/LICENSE +0 -0
- {massgen-0.0.3.dist-info → massgen-0.1.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,257 @@
|
|
|
1
|
+
# Inference Backend Implementation Guide
|
|
2
|
+
|
|
3
|
+
## Overview
|
|
4
|
+
|
|
5
|
+
The inference backend (`massgen/backend/inference.py`) provides unified OpenAI-compatible integration with both vLLM and SGLang servers for high-performance local model deployment within the MassGen framework.
|
|
6
|
+
|
|
7
|
+
## vLLM Implementation
|
|
8
|
+
|
|
9
|
+
### Overview
|
|
10
|
+
|
|
11
|
+
The vLLM backend option shares the same inference backend class, but is configured to connect to vLLM servers that provide OpenAI-compatible APIs.
|
|
12
|
+
|
|
13
|
+
### Key Features
|
|
14
|
+
|
|
15
|
+
* **OpenAI-Compatible**: Full compatibility with OpenAI Chat Completions API.
|
|
16
|
+
* **Local Deployment**: Run models locally with full control.
|
|
17
|
+
* **vLLM-Specific Features**: Supports `top_k`, `repetition_penalty`, `enable_thinking`
|
|
18
|
+
|
|
19
|
+
---
|
|
20
|
+
|
|
21
|
+
## Configuration
|
|
22
|
+
|
|
23
|
+
### Example Configuration (`three_agents_vllm.yaml`)
|
|
24
|
+
|
|
25
|
+
```yaml
|
|
26
|
+
agents:
|
|
27
|
+
- id: "gpt-oss"
|
|
28
|
+
backend:
|
|
29
|
+
type: "chatcompletion"
|
|
30
|
+
model: "gpt-oss-120b"
|
|
31
|
+
base_url: "https://api.cerebras.ai/v1"
|
|
32
|
+
- id: "qwen"
|
|
33
|
+
backend:
|
|
34
|
+
type: "vllm"
|
|
35
|
+
model: "Qwen/Qwen3-4B"
|
|
36
|
+
base_url: "http://localhost:8000/v1" # Change this to your vLLM server
|
|
37
|
+
- id: "glm"
|
|
38
|
+
backend:
|
|
39
|
+
type: "chatcompletion"
|
|
40
|
+
model: "glm-4.5"
|
|
41
|
+
base_url: "https://api.z.ai/api/paas/v4"
|
|
42
|
+
ui:
|
|
43
|
+
display_type: "rich_terminal"
|
|
44
|
+
logging_enabled: true
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
---
|
|
48
|
+
|
|
49
|
+
### Example Backend Configuration with vLLM Parameters
|
|
50
|
+
|
|
51
|
+
```yaml
|
|
52
|
+
backend:
|
|
53
|
+
type: "vllm"
|
|
54
|
+
model: "Qwen/Qwen3-4B"
|
|
55
|
+
base_url: "http://localhost:8000/v1"
|
|
56
|
+
top_k: 50
|
|
57
|
+
repetition_penalty: 1.2
|
|
58
|
+
enable_thinking: true
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
---
|
|
62
|
+
|
|
63
|
+
## Base URL Configuration
|
|
64
|
+
|
|
65
|
+
The `base_url` should be specified in your config YAML file under the backend configuration. Here are example configurations:
|
|
66
|
+
|
|
67
|
+
```yaml
|
|
68
|
+
backend:
|
|
69
|
+
type: "vllm"
|
|
70
|
+
model: "Qwen/Qwen3-4B"
|
|
71
|
+
base_url: "http://localhost:8000/v1" # Local server (default)
|
|
72
|
+
# OR for remote/tunneled servers:
|
|
73
|
+
# base_url: "http://your-remote-server:8000/v1" # replace with the server url
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
---
|
|
77
|
+
|
|
78
|
+
## Environment Variables
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
# vLLM API key (default "EMPTY" for local servers)
|
|
82
|
+
export VLLM_API_KEY="EMPTY"
|
|
83
|
+
```
|
|
84
|
+
---
|
|
85
|
+
|
|
86
|
+
## vLLM Server Startup
|
|
87
|
+
|
|
88
|
+
```bash
|
|
89
|
+
# Basic vLLM server
|
|
90
|
+
python -m vllm.entrypoints.openai.api_server \
|
|
91
|
+
--model Qwen/Qwen3-4B \
|
|
92
|
+
--host 0.0.0.0 \
|
|
93
|
+
--port 8000
|
|
94
|
+
|
|
95
|
+
# Advanced vLLM server with Additional Parameters
|
|
96
|
+
python -m vllm.entrypoints.openai.api_server \
|
|
97
|
+
--model Qwen/Qwen3-4B \
|
|
98
|
+
--host 0.0.0.0 \
|
|
99
|
+
--port 8000 \
|
|
100
|
+
--tensor-parallel-size 1 \
|
|
101
|
+
--gpu-memory-utilization 0.9 \
|
|
102
|
+
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
## Usage
|
|
106
|
+
|
|
107
|
+
```bash
|
|
108
|
+
# Run with your vLLM configuration
|
|
109
|
+
uv run python -m massgen.cli --config massgen/configs/basic/multi/three_agents_vllm.yaml "your prompt"
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
## Parameter Handling
|
|
113
|
+
|
|
114
|
+
### How to Add vLLM Parameters
|
|
115
|
+
|
|
116
|
+
Simply include vLLM-specific parameters in your backend configuration YAML—they will be automatically processed and passed to the vLLM server.
|
|
117
|
+
|
|
118
|
+
If you need specific parameters that aren't automatically handled, you can wrap them in an `extra_body` configuration in your YAML file:
|
|
119
|
+
|
|
120
|
+
---
|
|
121
|
+
|
|
122
|
+
## Backend Architecture
|
|
123
|
+
|
|
124
|
+
The vLLM backend extends `ChatCompletionsBackend` and implements:
|
|
125
|
+
|
|
126
|
+
* Custom provider naming (returns `"vLLM"`).
|
|
127
|
+
* vLLM-specific API key handling (defaults to `"EMPTY"`).
|
|
128
|
+
* Specialized parameter processing in `_build_vllm_extra_body()`.
|
|
129
|
+
* Management of extra body parameters for vLLM-specific features.
|
|
130
|
+
|
|
131
|
+
---
|
|
132
|
+
|
|
133
|
+
For more details, refer to the [vLLM Official Documentation](https://docs.vllm.ai/en/stable/).
|
|
134
|
+
|
|
135
|
+
---
|
|
136
|
+
|
|
137
|
+
## SGLang Implementation
|
|
138
|
+
|
|
139
|
+
### Overview
|
|
140
|
+
|
|
141
|
+
The SGLang backend option shares the same inference backend class, but is configured to connect to SGLang servers that provide OpenAI-compatible APIs.
|
|
142
|
+
|
|
143
|
+
### Key Features
|
|
144
|
+
|
|
145
|
+
* **OpenAI-Compatible**: Uses SGLang's OpenAI-compatible endpoint at `/v1`.
|
|
146
|
+
* **Tool Call Parser**: Supports Qwen-family tool call parsing (e.g. `--tool-call-parser qwen25`).
|
|
147
|
+
* **Thinking Mode**: Forward compatible with `chat_template_kwargs.enable_thinking` and `separate_reasoning` in `extra_body`.
|
|
148
|
+
|
|
149
|
+
---
|
|
150
|
+
|
|
151
|
+
## Configuration
|
|
152
|
+
|
|
153
|
+
### Example Configuration (`two_qwen_vllm_sglang.yaml`)
|
|
154
|
+
|
|
155
|
+
```yaml
|
|
156
|
+
agents:
|
|
157
|
+
- id: "qwen1"
|
|
158
|
+
backend:
|
|
159
|
+
type: "vllm"
|
|
160
|
+
model: "Qwen/Qwen3-4B"
|
|
161
|
+
base_url: "http://localhost:8000/v1"
|
|
162
|
+
chat_template_kwargs:
|
|
163
|
+
enable_thinking: True
|
|
164
|
+
top_k: 50
|
|
165
|
+
- id: "qwen2"
|
|
166
|
+
backend:
|
|
167
|
+
type: "sglang"
|
|
168
|
+
model: "Qwen/Qwen3-4B"
|
|
169
|
+
base_url: "http://localhost:30000/v1"
|
|
170
|
+
extra_body:
|
|
171
|
+
chat_template_kwargs:
|
|
172
|
+
enable_thinking: True
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
---
|
|
176
|
+
|
|
177
|
+
### Example Backend Configuration with SGLang Parameters
|
|
178
|
+
|
|
179
|
+
```yaml
|
|
180
|
+
backend:
|
|
181
|
+
type: "sglang"
|
|
182
|
+
model: "Qwen/Qwen3-4B"
|
|
183
|
+
base_url: "http://localhost:30000/v1"
|
|
184
|
+
extra_body:
|
|
185
|
+
chat_template_kwargs:
|
|
186
|
+
enable_thinking: true
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
---
|
|
190
|
+
|
|
191
|
+
## Base URL Configuration
|
|
192
|
+
|
|
193
|
+
The `base_url` should be specified in your config YAML file under the backend configuration. Here are example configurations:
|
|
194
|
+
|
|
195
|
+
```yaml
|
|
196
|
+
backend:
|
|
197
|
+
type: "sglang"
|
|
198
|
+
model: "Qwen/Qwen3-4B"
|
|
199
|
+
base_url: "http://localhost:30000/v1" # Local server (default)
|
|
200
|
+
# OR for remote/tunneled servers:
|
|
201
|
+
# base_url: "http://your-remote-server:30000/v1" # replace with the server url
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
---
|
|
205
|
+
|
|
206
|
+
## Environment Variables
|
|
207
|
+
|
|
208
|
+
```bash
|
|
209
|
+
# SGLang API key (default "EMPTY" for local servers)
|
|
210
|
+
export SGLANG_API_KEY="EMPTY"
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
---
|
|
214
|
+
|
|
215
|
+
## SGLang Server Startup
|
|
216
|
+
|
|
217
|
+
```bash
|
|
218
|
+
# SGLang server
|
|
219
|
+
python -m sglang.launch_server \
|
|
220
|
+
--model-path Qwen/Qwen3-4B \
|
|
221
|
+
--tool-call-parser qwen25 \
|
|
222
|
+
--tensor-parallel-size 1 \
|
|
223
|
+
--log-level debug \
|
|
224
|
+
--log-requests \
|
|
225
|
+
--log-requests-level 1 \
|
|
226
|
+
--show-time-cost \
|
|
227
|
+
--port 30000
|
|
228
|
+
```
|
|
229
|
+
|
|
230
|
+
## Usage
|
|
231
|
+
|
|
232
|
+
```bash
|
|
233
|
+
# Run with your SGLang configuration
|
|
234
|
+
uv run python -m massgen.cli --config massgen/configs/basic/multi/two_qwen_vllm_sglang.yaml "your prompt"
|
|
235
|
+
```
|
|
236
|
+
|
|
237
|
+
## Parameter Handling
|
|
238
|
+
|
|
239
|
+
### How to Add SGLang Parameters
|
|
240
|
+
|
|
241
|
+
Simply include SGLang-specific parameters in your backend configuration YAML—they will be automatically processed and passed to the SGLang server.
|
|
242
|
+
|
|
243
|
+
If you need specific parameters that aren't automatically handled, you can wrap them in an `extra_body` configuration in your YAML file:
|
|
244
|
+
|
|
245
|
+
---
|
|
246
|
+
|
|
247
|
+
## Backend Architecture
|
|
248
|
+
|
|
249
|
+
The SGLang backend extends the unified `InferenceBackend` and implements:
|
|
250
|
+
|
|
251
|
+
* Custom provider naming (returns `"SGLang"`).
|
|
252
|
+
* SGLang-specific API key handling (defaults to `"EMPTY"`).
|
|
253
|
+
* Specialized parameter processing in `_build_extra_body()` for SGLang-specific features like `chat_template_kwargs` and `separate_reasoning`.
|
|
254
|
+
|
|
255
|
+
---
|
|
256
|
+
|
|
257
|
+
For more details, refer to the [SGLang Official Documentation](https://docs.sglang.ai/).
|