northserve 2.0.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. northserve-2.0.6/CHANGELOG.md +107 -0
  2. northserve-2.0.6/MANIFEST.in +26 -0
  3. northserve-2.0.6/PKG-INFO +449 -0
  4. northserve-2.0.6/README.md +408 -0
  5. northserve-2.0.6/benchmark/backend_request_func.py +241 -0
  6. northserve-2.0.6/benchmark/benchmark_serving.py +643 -0
  7. northserve-2.0.6/benchmark/benchmark_serving_throughput.sh +48 -0
  8. northserve-2.0.6/benchmark/feishu.py +58 -0
  9. northserve-2.0.6/benchmark/report_to_feishu.py +100 -0
  10. northserve-2.0.6/configs/FILENAME.md +14 -0
  11. northserve-2.0.6/configs/bp-sglang_openai_debug.yaml +7 -0
  12. northserve-2.0.6/configs/bp-sglang_openai_generation.yaml +17 -0
  13. northserve-2.0.6/configs/bp-sglang_openai_sleep.yaml +5 -0
  14. northserve-2.0.6/configs/bp-vllm-wp-2_openai_generation.yaml +14 -0
  15. northserve-2.0.6/configs/bp-vllm-wp_openai_generation.yaml +19 -0
  16. northserve-2.0.6/configs/bp-vllm_openai_generation.yaml +13 -0
  17. northserve-2.0.6/configs/install.sh +16 -0
  18. northserve-2.0.6/configs/nla-sglang_openai_generation.yaml +16 -0
  19. northserve-2.0.6/configs/nla-sglang_openai_multinode.yaml +15 -0
  20. northserve-2.0.6/configs/north-llm-api_service.yaml +9 -0
  21. northserve-2.0.6/configs/sglang_openai_generation.yaml +15 -0
  22. northserve-2.0.6/configs/sglang_openai_minilb.yaml +11 -0
  23. northserve-2.0.6/configs/sglang_openai_multinode.yaml +22 -0
  24. northserve-2.0.6/configs/sglang_openai_multinodeserving.yaml +21 -0
  25. northserve-2.0.6/configs/sglang_openai_pddec.yaml +37 -0
  26. northserve-2.0.6/configs/sglang_openai_pdpre.yaml +37 -0
  27. northserve-2.0.6/configs/sglang_openai_sleep.yaml +3 -0
  28. northserve-2.0.6/configs/sglang_router.yaml +13 -0
  29. northserve-2.0.6/configs/sglang_weaver_generation.yaml +14 -0
  30. northserve-2.0.6/configs/vllm-native_openai_generation.yaml +12 -0
  31. northserve-2.0.6/configs/vllm_anthropic_generation.yaml +10 -0
  32. northserve-2.0.6/configs/vllm_anthropic_multinode.yaml +12 -0
  33. northserve-2.0.6/configs/vllm_openai_generation.yaml +11 -0
  34. northserve-2.0.6/configs/vllm_openai_multinode.yaml +13 -0
  35. northserve-2.0.6/northserve/__init__.py +13 -0
  36. northserve-2.0.6/northserve/__main__.py +8 -0
  37. northserve-2.0.6/northserve/cli.py +83 -0
  38. northserve-2.0.6/northserve/clients/__init__.py +3 -0
  39. northserve-2.0.6/northserve/clients/infrawave.py +338 -0
  40. northserve-2.0.6/northserve/clients/kubernetes.py +254 -0
  41. northserve-2.0.6/northserve/commands/__init__.py +3 -0
  42. northserve-2.0.6/northserve/commands/benchmark.py +73 -0
  43. northserve-2.0.6/northserve/commands/launch.py +184 -0
  44. northserve-2.0.6/northserve/commands/list.py +31 -0
  45. northserve-2.0.6/northserve/commands/north_llm_api.py +235 -0
  46. northserve-2.0.6/northserve/commands/stop.py +53 -0
  47. northserve-2.0.6/northserve/constants.py +71 -0
  48. northserve-2.0.6/northserve/core/__init__.py +3 -0
  49. northserve-2.0.6/northserve/core/benchmark_engine.py +156 -0
  50. northserve-2.0.6/northserve/core/config_builder.py +260 -0
  51. northserve-2.0.6/northserve/core/job_manager.py +423 -0
  52. northserve-2.0.6/northserve/core/template_renderer.py +244 -0
  53. northserve-2.0.6/northserve/models/__init__.py +3 -0
  54. northserve-2.0.6/northserve/models/deployment.py +198 -0
  55. northserve-2.0.6/northserve/models/enums.py +64 -0
  56. northserve-2.0.6/northserve/utils/__init__.py +3 -0
  57. northserve-2.0.6/northserve/utils/helpers.py +212 -0
  58. northserve-2.0.6/northserve/utils/logger.py +107 -0
  59. northserve-2.0.6/northserve/utils/updater.py +79 -0
  60. northserve-2.0.6/northserve/utils/validator.py +224 -0
  61. northserve-2.0.6/northserve.egg-info/PKG-INFO +449 -0
  62. northserve-2.0.6/northserve.egg-info/SOURCES.txt +114 -0
  63. northserve-2.0.6/northserve.egg-info/dependency_links.txt +1 -0
  64. northserve-2.0.6/northserve.egg-info/entry_points.txt +2 -0
  65. northserve-2.0.6/northserve.egg-info/requires.txt +17 -0
  66. northserve-2.0.6/northserve.egg-info/top_level.txt +1 -0
  67. northserve-2.0.6/pyproject.toml +100 -0
  68. northserve-2.0.6/requirements-dev.txt +19 -0
  69. northserve-2.0.6/requirements.txt +9 -0
  70. northserve-2.0.6/setup.cfg +4 -0
  71. northserve-2.0.6/setup.py +92 -0
  72. northserve-2.0.6/yaml_templates/deployment.yaml.jinja +349 -0
  73. northserve-2.0.6/yaml_templates/deployment_multi_node.jinja +552 -0
  74. northserve-2.0.6/yaml_templates/deployment_pd_decode.jinja +257 -0
  75. northserve-2.0.6/yaml_templates/deployment_pd_minilb.jinja +267 -0
  76. northserve-2.0.6/yaml_templates/deployment_pd_prefill.jinja +257 -0
  77. northserve-2.0.6/yaml_templates/ingress.yaml +26 -0
  78. northserve-2.0.6/yaml_templates/sa.yaml +34 -0
  79. northserve-2.0.6/yaml_templates/service.yaml +15 -0
@@ -0,0 +1,107 @@
1
+ # Changelog
2
+
3
+ All notable changes to NorthServing will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [2.0.2] - 2026-01-15
9
+
10
+ ### Changed
11
+ - **BREAKING**: Configuration now uses environment variables instead of config file
12
+ - Use `INFRAWAVES_USERNAME` and `INFRAWAVES_PASSWORD` environment variables
13
+ - Removed `~/.config/northjob/userinfo.conf` config file support
14
+ - See [ENV_CONFIG.md](ENV_CONFIG.md) for migration guide
15
+
16
+ ### Added
17
+ - Environment variable configuration support
18
+ - `migrate_config.sh` script to help migrate from old config file
19
+ - `test_env_config.py` script to verify configuration
20
+ - Comprehensive environment configuration documentation (ENV_CONFIG.md)
21
+
22
+ ### Improved
23
+ - Better error messages when credentials are not configured
24
+ - Simplified credential management for Docker/Kubernetes deployments
25
+ - More secure credential handling (no files to protect)
26
+
27
+ ### Migration Guide
28
+ See [ENV_CONFIG.md](ENV_CONFIG.md) for detailed migration instructions.
29
+
30
+ Quick migration:
31
+ ```bash
32
+ # Set environment variables
33
+ export INFRAWAVES_USERNAME='your_username'
34
+ export INFRAWAVES_PASSWORD='your_password'
35
+
36
+ # Add to shell config for persistence
37
+ echo 'export INFRAWAVES_USERNAME="your_username"' >> ~/.bashrc
38
+ echo 'export INFRAWAVES_PASSWORD="your_password"' >> ~/.bashrc
39
+ source ~/.bashrc
40
+ ```
41
+
42
+ Or use the migration script:
43
+ ```bash
44
+ ./migrate_config.sh
45
+ ```
46
+
47
+ ## [2.0.1] - 2026-01-14
48
+
49
+ ### Fixed
50
+ - Fixed static file packaging in wheel distribution
51
+ - YAML templates, configs, and benchmark files now correctly included in package
52
+ - Updated path resolution for installed packages
53
+
54
+ ### Changed
55
+ - Improved `pyproject.toml` configuration for package data
56
+ - Simplified build process by removing `prepare_package.sh`
57
+ - Updated `build_wheel.py` to work with new packaging approach
58
+
59
+ ## [2.0.0] - 2026-01-13
60
+
61
+ ### Added
62
+ - Complete Python refactoring from shell scripts
63
+ - Modern CLI using Click framework
64
+ - Comprehensive test suite with pytest
65
+ - Modular architecture:
66
+ - `commands/`: CLI command implementations
67
+ - `core/`: Core business logic
68
+ - `clients/`: API clients (Infrawave, Kubernetes)
69
+ - `models/`: Data models
70
+ - `utils/`: Utility functions
71
+ - Type hints and documentation
72
+ - Packaging support with wheel distribution
73
+ - Build automation with `build_wheel.py`
74
+
75
+ ### Changed
76
+ - **BREAKING**: All commands now use Python instead of shell scripts
77
+ - Configuration management using YAML and Jinja2 templates
78
+ - Improved error handling and logging
79
+ - Better code organization and maintainability
80
+
81
+ ### Removed
82
+ - Shell script implementations (moved to `tools/` for reference)
83
+ - Legacy configuration format
84
+
85
+ ### Migration from 1.x
86
+ The 1.x shell-based version is preserved in the `tools/` directory.
87
+ All functionality has been ported to Python with improved design.
88
+
89
+ To upgrade:
90
+ 1. Install the new Python package: `pip install -e .`
91
+ 2. Update your configuration (see Configuration section in README.md)
92
+ 3. Use `northserve` command instead of `./northserve`
93
+
94
+ ## [1.x] - Legacy Shell Version
95
+
96
+ The original shell-based implementation is available in the `tools/` directory:
97
+ - `northserve.sh`: Main entry point
98
+ - `tools/launch.sh`: Launch deployments
99
+ - `tools/stop.sh`: Stop deployments
100
+ - `tools/list.sh`: List deployments
101
+ - And other helper scripts
102
+
103
+ This version is deprecated but preserved for reference.
104
+
105
+ ---
106
+
107
+ For the complete change history of the 1.x shell version, refer to the git history.
@@ -0,0 +1,26 @@
1
+ # Include documentation
2
+ include README.md
3
+ include CHANGELOG.md
4
+ include MIGRATION.md
5
+ include BUILD_GUIDE.md
6
+ include PACKAGING.md
7
+ include FIXES_SUMMARY.md
8
+ include requirements.txt
9
+ include requirements-dev.txt
10
+
11
+ # Include all data directories
12
+ graft configs
13
+ graft yaml_templates
14
+ graft benchmark
15
+
16
+ # Exclude unnecessary files
17
+ global-exclude *.pyc
18
+ global-exclude *.pyo
19
+ global-exclude __pycache__
20
+ global-exclude .DS_Store
21
+ global-exclude *.swp
22
+ global-exclude *.swo
23
+
24
+ # Exclude test and development files
25
+ prune tests
26
+ prune tools
@@ -0,0 +1,449 @@
1
+ Metadata-Version: 2.4
2
+ Name: northserve
3
+ Version: 2.0.6
4
+ Summary: A one-click LLM serving deployment tool for Kubernetes
5
+ Home-page: https://github.com/china-qijizhifeng/NorthServing
6
+ Author: NorthServing Team
7
+ License: MIT
8
+ Project-URL: Homepage, https://github.com/china-qijizhifeng/NorthServing
9
+ Project-URL: Documentation, https://github.com/china-qijizhifeng/NorthServing#readme
10
+ Project-URL: Repository, https://github.com/china-qijizhifeng/NorthServing
11
+ Project-URL: Issues, https://github.com/china-qijizhifeng/NorthServing/issues
12
+ Keywords: llm,serving,kubernetes,volcano,deployment
13
+ Classifier: Development Status :: 4 - Beta
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.8
18
+ Classifier: Programming Language :: Python :: 3.9
19
+ Classifier: Programming Language :: Python :: 3.10
20
+ Classifier: Programming Language :: Python :: 3.11
21
+ Requires-Python: >=3.8
22
+ Description-Content-Type: text/markdown
23
+ Requires-Dist: click>=8.1.0
24
+ Requires-Dist: jinja2>=3.1.0
25
+ Requires-Dist: pyyaml>=6.0
26
+ Requires-Dist: requests>=2.31.0
27
+ Requires-Dist: colorama>=0.4.6
28
+ Requires-Dist: packaging>=23.0
29
+ Provides-Extra: dev
30
+ Requires-Dist: pytest>=7.4.0; extra == "dev"
31
+ Requires-Dist: pytest-mock>=3.11.0; extra == "dev"
32
+ Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
33
+ Requires-Dist: black>=23.0.0; extra == "dev"
34
+ Requires-Dist: flake8>=6.0.0; extra == "dev"
35
+ Requires-Dist: mypy>=1.5.0; extra == "dev"
36
+ Requires-Dist: isort>=5.12.0; extra == "dev"
37
+ Requires-Dist: types-PyYAML>=6.0.0; extra == "dev"
38
+ Requires-Dist: types-requests>=2.31.0; extra == "dev"
39
+ Dynamic: home-page
40
+ Dynamic: requires-python
41
+
42
+ # NorthServing
43
+
44
+ A one-click LLM serving deployment tool for Kubernetes with Volcano job scheduling.
45
+
46
+ ## Overview
47
+
48
+ NorthServing (北服) is a Python-based tool that simplifies the deployment and management of Large Language Model (LLM) serving infrastructure on Kubernetes. It provides a unified command-line interface for deploying models using various backends (vLLM, SGLang, etc.) with support for multi-node, multi-GPU configurations.
49
+
50
+ ## Features
51
+
52
+ - 🚀 **One-Click Deployment**: Launch LLM serving with a single command
53
+ - 🔄 **Multiple Backends**: Support for vLLM, SGLang, and other inference engines
54
+ - 📊 **Performance Benchmarking**: Built-in benchmarking tools with Feishu reporting
55
+ - 🌐 **Multi-Cluster Support**: Deploy across different Kubernetes clusters
56
+ - ⚡ **Advanced Configurations**:
57
+ - PD (Prefill-Decode) separation mode
58
+ - Multi-node deployments with Ray
59
+ - Tensor/Pipeline parallelism
60
+ - Custom resource scheduling
61
+ - 🧪 **Well-Tested**: Comprehensive test suite with >80% coverage
62
+
63
+ ## Installation
64
+
65
+ ### Prerequisites
66
+
67
+ - Python >= 3.8
68
+ - Kubernetes cluster with Volcano scheduler
69
+ - kubectl configured
70
+ - Access to Infrawave API (for job management)
71
+
72
+ ### Install from PyPI
73
+
74
+ ```bash
75
+ # Install from internal PyPI server
76
+ pip install northserve -i http://10.51.6.7:31624/simple/ --trusted-host 10.51.6.7 --extra-index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
77
+
78
+ # Or configure pip once (recommended)
79
+ mkdir -p ~/.pip
80
+ cat > ~/.pip/pip.conf << 'EOF'
81
+ [global]
82
+ index-url = http://10.51.6.7:31624/simple/
83
+ trusted-host = 10.51.6.7
84
+ EOF
85
+
86
+ # Then install normally
87
+ pip install northserve
88
+ ```
89
+
90
+ ### Install from Source
91
+
92
+ ```bash
93
+ git clone https://github.com/china-qijizhifeng/NorthServing.git
94
+ cd NorthServing
95
+
96
+ # Install dependencies from Tsinghua mirror
97
+ pip install -r requirements.txt -i https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
98
+
99
+ # Install in development mode
100
+ pip install -e .
101
+ ```
102
+
103
+ For detailed installation guide, see [INSTALL.md](INSTALL.md)
104
+
105
+ ### Configuration
106
+
107
+ Set up your credentials using environment variables:
108
+
109
+ ```bash
110
+ # Add to your ~/.bashrc or ~/.zshrc
111
+ export INFRAWAVES_USERNAME='your_username'
112
+ export INFRAWAVES_PASSWORD='your_password'
113
+
114
+ # Apply changes
115
+ source ~/.bashrc # or source ~/.zshrc
116
+ ```
117
+
118
+ ## Quick Start
119
+
120
+ ### Launch a Model
121
+
122
+ ```bash
123
+ northserve launch \
124
+ --model-name qwen2-72b-instruct \
125
+ --model-path /gpfs/models/huggingface.co/Qwen/Qwen2-72B-Instruct/ \
126
+ --replicas 1 \
127
+ --gpus-per-pod 8 \
128
+ --profile generation
129
+ ```
130
+
131
+ ### List Running Models
132
+
133
+ ```bash
134
+ northserve list
135
+ ```
136
+
137
+ ### Stop a Model
138
+
139
+ ```bash
140
+ northserve stop --model-name qwen2-72b-instruct
141
+ ```
142
+
143
+ ## Command Reference
144
+
145
+ ### `northserve launch`
146
+
147
+ Launch a new LLM serving deployment.
148
+
149
+ **Required Options:**
150
+ - `--model-name`: Model name for identification
151
+ - `--model-path`: Path to model weights (optional for some backends)
152
+
153
+ **Common Options:**
154
+ - `--backend`: Inference backend (default: vllm)
155
+ - `vllm`: vLLM inference engine
156
+ - `sglang`: SGLang inference engine
157
+ - `bp-vllm`: BP-optimized vLLM
158
+ - `crossing`: Crossing inference engine
159
+ - `--protocol`: API protocol (default: openai)
160
+ - `openai`: OpenAI-compatible API
161
+ - `anthropic`: Anthropic-compatible API
162
+ - `--replicas`: Number of replicas (default: 1)
163
+ - `--gpus-per-pod`: GPUs per pod (default: 1)
164
+ - `--pods-per-job`: Pods per job for multi-node (default: 1)
165
+ - `--gpu-type`: GPU type - `gpu`, `h20`, `4090d`
166
+ - `--namespace`: Kubernetes namespace (default: qiji)
167
+ - `--priority-class-name`: Priority class (default: low-priority-job)
168
+
169
+ **Advanced Options:**
170
+ - `--extra-cmds`: Additional command-line arguments for the engine
171
+ - `--extra-envs`: Extra environment variables (KEY=value KEY2=value2)
172
+ - `--tensor-parallel-size`: Tensor parallelism (defaults to gpus-per-pod)
173
+ - `--pipeline-parallel-size`: Pipeline parallelism (default: 1)
174
+ - `--prefill-nodes`: Prefill nodes for PD separation (SGLang only)
175
+ - `--decode-nodes`: Decode nodes for PD separation (SGLang only)
176
+ - `--use-host-network`: Use host network
177
+ - `--standalone`: Create standalone service with NodePort
178
+ - `-y, --yes`: Skip confirmation prompts
179
+
180
+ **Examples:**
181
+
182
+ Simple deployment:
183
+ ```bash
184
+ northserve launch --model-name llama2-7b --model-path /gpfs/models/llama2-7b --gpus-per-pod 1
185
+ ```
186
+
187
+ Multi-GPU deployment:
188
+ ```bash
189
+ northserve launch \
190
+ --model-name qwen2-72b \
191
+ --model-path /gpfs/models/qwen2-72b \
192
+ --gpus-per-pod 8 \
193
+ --replicas 2
194
+ ```
195
+
196
+ With custom arguments:
197
+ ```bash
198
+ northserve launch \
199
+ --model-name mistral-large \
200
+ --model-path /gpfs/models/mistral-large \
201
+ --gpus-per-pod 8 \
202
+ --extra-cmds "--max-num-batched-tokens=16384 --max-model-len=16384 --enforce-eager"
203
+ ```
204
+
205
+ PD separation mode (SGLang):
206
+ ```bash
207
+ northserve launch \
208
+ --model-name qwen2-72b \
209
+ --model-path /gpfs/models/qwen2-72b \
210
+ --backend sglang \
211
+ --gpus-per-pod 8 \
212
+ --prefill-nodes 2 \
213
+ --decode-nodes 4 \
214
+ --minilb-replicas 4
215
+ ```
216
+
217
+ ### `northserve stop`
218
+
219
+ Stop a running deployment.
220
+
221
+ **Options:**
222
+ - `--model-name`: Model name to stop (required)
223
+ - `--backend`: Backend type (default: vllm)
224
+ - `--namespace`: Kubernetes namespace (default: qiji)
225
+ - `--standalone`: Stop standalone service
226
+ - `-y, --yes`: Skip confirmation
227
+
228
+ **Example:**
229
+ ```bash
230
+ northserve stop --model-name qwen2-72b-instruct
231
+ ```
232
+
233
+ ### `northserve list`
234
+
235
+ List all deployed models and their status.
236
+
237
+ **Example:**
238
+ ```bash
239
+ northserve list
240
+ ```
241
+
242
+ ### `northserve benchmark`
243
+
244
+ Performance benchmarking commands.
245
+
246
+ #### `northserve benchmark launch`
247
+
248
+ Launch a benchmark test on a running deployment.
249
+
250
+ **Options:**
251
+ - `--model-name`: Model name to benchmark (required)
252
+ - `--model-path`: Path to model weights (required)
253
+ - `--backend`: Backend type
254
+ - `--namespace`: Kubernetes namespace
255
+
256
+ **Example:**
257
+ ```bash
258
+ northserve benchmark launch \
259
+ --model-name qwen2-72b \
260
+ --model-path /gpfs/models/qwen2-72b \
261
+ --backend vllm
262
+ ```
263
+
264
+ #### `northserve benchmark report`
265
+
266
+ Report benchmark results to Feishu.
267
+
268
+ **Options:**
269
+ - `--log-path`: Path to benchmark logs (required)
270
+ - `--config-file`: Path to Feishu config file (required)
271
+
272
+ **Example:**
273
+ ```bash
274
+ northserve benchmark report \
275
+ --log-path ~/.northserve/logs/qwen2-72b-vllm-server-0 \
276
+ --config-file ~/.northserve/feishu.json
277
+ ```
278
+
279
+ ### `northserve launch_north_llm_api`
280
+
281
+ Launch the North LLM API service.
282
+
283
+ **Options:**
284
+ - `--version`: Version to deploy (default: v0.2.3)
285
+ - `--replicas`: Number of replicas (default: 1)
286
+ - `--namespace`: Kubernetes namespace (default: qiji)
287
+
288
+ **Example:**
289
+ ```bash
290
+ northserve launch_north_llm_api --version v0.2.3 --replicas 2
291
+ ```
292
+
293
+ ### `northserve stop_north_llm_api`
294
+
295
+ Stop the North LLM API service.
296
+
297
+ **Example:**
298
+ ```bash
299
+ northserve stop_north_llm_api --version v0.2.3
300
+ ```
301
+
302
+ ## Architecture
303
+
304
+ NorthServing follows a modular architecture:
305
+
306
+ ```
307
+ ┌─────────────────────────────────────┐
308
+ │ CLI Interface (Click) │
309
+ └──────────────┬──────────────────────┘
310
+
311
+ ┌──────────┴──────────┐
312
+ │ │
313
+ ┌───▼────┐ ┌─────▼──────┐
314
+ │Commands│ │Core Logic │
315
+ └───┬────┘ └─────┬──────┘
316
+ │ │
317
+ │ ┌───────────────┴────────────────┐
318
+ │ │ │
319
+ │ ┌──▼──────────┐ ┌─────────▼────────┐
320
+ │ │Job Manager │ │Config Builder │
321
+ │ └──┬──────────┘ └─────────┬────────┘
322
+ │ │ │
323
+ │ ┌──▼────────────┐ ┌─────────▼────────┐
324
+ └─┤API Clients │ │Template Renderer │
325
+ └───────────────┘ └──────────────────┘
326
+ ```
327
+
328
+ ### Key Components
329
+
330
+ - **CLI Layer**: Click-based command-line interface
331
+ - **Commands**: Individual command implementations (launch, stop, list, etc.)
332
+ - **Core Logic**:
333
+ - `JobManager`: Orchestrates deployment lifecycle
334
+ - `ConfigBuilder`: Builds deployment configurations
335
+ - `TemplateRenderer`: Jinja2 template rendering
336
+ - `BenchmarkEngine`: Performance testing
337
+ - **API Clients**:
338
+ - `InfrawaveClient`: Infrawave API integration
339
+ - `KubernetesClient`: Direct kubectl operations
340
+ - **Models**: Type-safe data models with validation
341
+ - **Utils**: Validators, logger, helpers
342
+
343
+ ## Development
344
+
345
+ ### Running Tests
346
+
347
+ ```bash
348
+ # Install dev dependencies
349
+ pip install -r requirements-dev.txt
350
+
351
+ # Run all tests
352
+ pytest
353
+
354
+ # Run with coverage
355
+ pytest --cov=northserve --cov-report=html
356
+
357
+ # Run specific test file
358
+ pytest tests/test_core/test_config_builder.py
359
+ ```
360
+
361
+ ### Code Quality
362
+
363
+ ```bash
364
+ # Format code
365
+ black northserve tests
366
+
367
+ # Sort imports
368
+ isort northserve tests
369
+
370
+ # Lint
371
+ flake8 northserve tests
372
+
373
+ # Type checking
374
+ mypy northserve
375
+ ```
376
+
377
+ ## Migration from Shell Version
378
+
379
+ The Python version maintains backward compatibility with the shell-based version:
380
+
381
+ - **Same Command Interface**: All commands work the same way
382
+ - **Same Configuration Files**: YAML configs and templates unchanged
383
+ - **Same Output**: Identical deployment behavior
384
+
385
+ To use the new version, simply install it and use `northserve` instead of the old shell script.
386
+
387
+ ## Troubleshooting
388
+
389
+ ### Common Issues
390
+
391
+ **"Config file not found"**
392
+ - Ensure `~/.config/northjob/userinfo.conf` exists with valid credentials
393
+
394
+ **"Failed to create job"**
395
+ - Check Infrawave API connectivity
396
+ - Verify your credentials are correct
397
+ - Ensure you have permissions for the namespace
398
+
399
+ **"Template not found"**
400
+ - Make sure you installed from the repository root
401
+ - YAML templates should be in `yaml_templates/` directory
402
+
403
+ **"Invalid backend"**
404
+ - Use one of: vllm, sglang, bp-vllm, crossing
405
+ - Note: `nla-vllm` is deprecated, use `bp-vllm`
406
+
407
+ ### Debug Mode
408
+
409
+ Enable debug logging:
410
+
411
+ ```bash
412
+ export NORTHSERVE_LOG_LEVEL=DEBUG
413
+ northserve launch ...
414
+ ```
415
+
416
+ Skip auto-update checks:
417
+
418
+ ```bash
419
+ export NORTHSERVE_SKIP_UPDATE=1
420
+ northserve launch ...
421
+ ```
422
+
423
+ ## Contributing
424
+
425
+ Contributions are welcome! Please:
426
+
427
+ 1. Fork the repository
428
+ 2. Create a feature branch
429
+ 3. Make your changes with tests
430
+ 4. Run the test suite
431
+ 5. Submit a pull request
432
+
433
+ ## License
434
+
435
+ See LICENSE file for details.
436
+
437
+ ## Support
438
+
439
+ For issues and questions:
440
+ - GitHub Issues: https://github.com/china-qijizhifeng/NorthServing/issues
441
+ - Documentation: See this README and inline help (`northserve --help`)
442
+
443
+ ## Why NorthServing?
444
+
445
+ - ✅ **Training-Inference Unified Scheduling**: Uses Volcano jobs compatible with training workloads
446
+ - ✅ **Multi-Backend Support**: Unified interface for different inference engines
447
+ - ✅ **Cross-Cluster Deployment**: Deploy to multiple clusters with unified ingress
448
+ - ✅ **Production Ready**: Mature codebase with comprehensive testing
449
+ - ✅ **Easy Automation**: Command-line interface perfect for CI/CD pipelines