sglang 0.4.10.post2__py3-none-any.whl → 0.5.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (175) hide show
  1. sglang/__init__.py +8 -3
  2. sglang/bench_one_batch.py +119 -17
  3. sglang/lang/chat_template.py +18 -0
  4. sglang/srt/bench_utils.py +137 -0
  5. sglang/srt/configs/model_config.py +42 -7
  6. sglang/srt/conversation.py +9 -5
  7. sglang/srt/disaggregation/base/conn.py +5 -2
  8. sglang/srt/disaggregation/decode.py +14 -4
  9. sglang/srt/disaggregation/decode_schedule_batch_mixin.py +3 -0
  10. sglang/srt/disaggregation/mooncake/conn.py +286 -160
  11. sglang/srt/disaggregation/mooncake/transfer_engine.py +29 -0
  12. sglang/srt/disaggregation/prefill.py +2 -0
  13. sglang/srt/distributed/parallel_state.py +15 -11
  14. sglang/srt/entrypoints/context.py +227 -0
  15. sglang/srt/entrypoints/engine.py +15 -9
  16. sglang/srt/entrypoints/harmony_utils.py +372 -0
  17. sglang/srt/entrypoints/http_server.py +74 -4
  18. sglang/srt/entrypoints/openai/protocol.py +218 -1
  19. sglang/srt/entrypoints/openai/serving_chat.py +41 -11
  20. sglang/srt/entrypoints/openai/serving_responses.py +1273 -0
  21. sglang/srt/entrypoints/openai/tool_server.py +175 -0
  22. sglang/srt/entrypoints/tool.py +87 -0
  23. sglang/srt/eplb/expert_location.py +5 -1
  24. sglang/srt/function_call/ebnf_composer.py +1 -0
  25. sglang/srt/function_call/function_call_parser.py +2 -0
  26. sglang/srt/function_call/glm4_moe_detector.py +1 -1
  27. sglang/srt/function_call/gpt_oss_detector.py +331 -0
  28. sglang/srt/function_call/kimik2_detector.py +3 -3
  29. sglang/srt/function_call/qwen3_coder_detector.py +219 -9
  30. sglang/srt/hf_transformers_utils.py +30 -3
  31. sglang/srt/jinja_template_utils.py +14 -1
  32. sglang/srt/layers/attention/aiter_backend.py +375 -115
  33. sglang/srt/layers/attention/ascend_backend.py +3 -0
  34. sglang/srt/layers/attention/dual_chunk_flashattention_backend.py +1700 -0
  35. sglang/srt/layers/attention/flashattention_backend.py +18 -0
  36. sglang/srt/layers/attention/flashinfer_backend.py +52 -13
  37. sglang/srt/layers/attention/hybrid_attn_backend.py +1 -1
  38. sglang/srt/layers/attention/triton_backend.py +85 -14
  39. sglang/srt/layers/attention/triton_ops/decode_attention.py +17 -0
  40. sglang/srt/layers/attention/triton_ops/extend_attention.py +143 -98
  41. sglang/srt/layers/attention/trtllm_mha_backend.py +332 -0
  42. sglang/srt/layers/attention/trtllm_mla_backend.py +119 -22
  43. sglang/srt/layers/attention/vision.py +22 -6
  44. sglang/srt/layers/attention/wave_backend.py +627 -0
  45. sglang/srt/layers/attention/wave_ops/decode_attention.py +186 -0
  46. sglang/srt/layers/attention/wave_ops/extend_attention.py +149 -0
  47. sglang/srt/layers/attention/wave_ops/prefill_attention.py +79 -0
  48. sglang/srt/layers/communicator.py +29 -14
  49. sglang/srt/layers/dp_attention.py +12 -0
  50. sglang/srt/layers/flashinfer_comm_fusion.py +4 -4
  51. sglang/srt/layers/linear.py +3 -7
  52. sglang/srt/layers/moe/cutlass_moe.py +12 -3
  53. sglang/srt/layers/moe/cutlass_w4a8_moe.py +4 -5
  54. sglang/srt/layers/moe/ep_moe/kernels.py +43 -0
  55. sglang/srt/layers/moe/ep_moe/layer.py +135 -73
  56. sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=128,N=768,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json +146 -0
  57. sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=384,N=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json +146 -0
  58. sglang/srt/layers/moe/fused_moe_triton/fused_moe.py +101 -12
  59. sglang/srt/layers/moe/fused_moe_triton/layer.py +412 -33
  60. sglang/srt/layers/moe/fused_moe_triton/triton_kernels_moe.py +188 -3
  61. sglang/srt/layers/moe/token_dispatcher/deepep.py +61 -24
  62. sglang/srt/layers/moe/topk.py +16 -4
  63. sglang/srt/layers/moe/utils.py +16 -0
  64. sglang/srt/layers/quantization/__init__.py +27 -3
  65. sglang/srt/layers/quantization/fp4.py +557 -0
  66. sglang/srt/layers/quantization/fp8.py +3 -6
  67. sglang/srt/layers/quantization/fp8_kernel.py +277 -0
  68. sglang/srt/layers/quantization/fp8_utils.py +51 -10
  69. sglang/srt/layers/quantization/modelopt_quant.py +258 -68
  70. sglang/srt/layers/quantization/mxfp4.py +654 -0
  71. sglang/srt/layers/quantization/mxfp4_tensor.py +133 -0
  72. sglang/srt/layers/quantization/quark/schemes/__init__.py +6 -0
  73. sglang/srt/layers/quantization/quark/schemes/quark_scheme.py +55 -0
  74. sglang/srt/layers/quantization/quark/schemes/quark_w4a4_mxfp4.py +118 -0
  75. sglang/srt/layers/quantization/quark/utils.py +107 -0
  76. sglang/srt/layers/quantization/unquant.py +60 -6
  77. sglang/srt/layers/quantization/w4afp8.py +21 -12
  78. sglang/srt/layers/quantization/w8a8_int8.py +48 -34
  79. sglang/srt/layers/rotary_embedding.py +506 -3
  80. sglang/srt/layers/utils.py +9 -0
  81. sglang/srt/layers/vocab_parallel_embedding.py +8 -3
  82. sglang/srt/lora/backend/base_backend.py +3 -23
  83. sglang/srt/lora/layers.py +60 -114
  84. sglang/srt/lora/lora.py +17 -62
  85. sglang/srt/lora/lora_manager.py +82 -62
  86. sglang/srt/lora/lora_registry.py +23 -11
  87. sglang/srt/lora/mem_pool.py +63 -68
  88. sglang/srt/lora/triton_ops/qkv_lora_b.py +1 -1
  89. sglang/srt/lora/utils.py +25 -58
  90. sglang/srt/managers/cache_controller.py +75 -58
  91. sglang/srt/managers/detokenizer_manager.py +1 -1
  92. sglang/srt/managers/io_struct.py +20 -8
  93. sglang/srt/managers/mm_utils.py +6 -13
  94. sglang/srt/managers/multimodal_processor.py +1 -1
  95. sglang/srt/managers/schedule_batch.py +61 -25
  96. sglang/srt/managers/schedule_policy.py +6 -6
  97. sglang/srt/managers/scheduler.py +41 -19
  98. sglang/srt/managers/scheduler_output_processor_mixin.py +1 -2
  99. sglang/srt/managers/scheduler_profiler_mixin.py +28 -8
  100. sglang/srt/managers/scheduler_recv_skipper.py +37 -0
  101. sglang/srt/managers/scheduler_update_weights_mixin.py +6 -0
  102. sglang/srt/managers/template_manager.py +35 -1
  103. sglang/srt/managers/tokenizer_manager.py +47 -30
  104. sglang/srt/managers/tp_worker.py +3 -0
  105. sglang/srt/managers/tp_worker_overlap_thread.py +3 -0
  106. sglang/srt/mem_cache/allocator.py +61 -87
  107. sglang/srt/mem_cache/hicache_storage.py +1 -1
  108. sglang/srt/mem_cache/hiradix_cache.py +80 -22
  109. sglang/srt/mem_cache/lora_radix_cache.py +421 -0
  110. sglang/srt/mem_cache/memory_pool_host.py +34 -36
  111. sglang/srt/mem_cache/multimodal_cache.py +33 -13
  112. sglang/srt/mem_cache/radix_cache.py +2 -5
  113. sglang/srt/mem_cache/storage/hf3fs/client_hf3fs.py +2 -2
  114. sglang/srt/mem_cache/storage/hf3fs/mini_3fs_metadata_server.py +443 -0
  115. sglang/srt/mem_cache/storage/hf3fs/storage_hf3fs.py +139 -67
  116. sglang/srt/mem_cache/storage/mooncake_store/mooncake_store.py +6 -9
  117. sglang/srt/model_executor/cuda_graph_runner.py +29 -9
  118. sglang/srt/model_executor/forward_batch_info.py +61 -19
  119. sglang/srt/model_executor/model_runner.py +148 -37
  120. sglang/srt/model_loader/loader.py +18 -6
  121. sglang/srt/model_loader/weight_utils.py +10 -0
  122. sglang/srt/models/bailing_moe.py +425 -0
  123. sglang/srt/models/deepseek_v2.py +137 -59
  124. sglang/srt/models/ernie4.py +426 -0
  125. sglang/srt/models/ernie4_eagle.py +203 -0
  126. sglang/srt/models/gemma2.py +0 -34
  127. sglang/srt/models/gemma3n_mm.py +38 -0
  128. sglang/srt/models/glm4.py +6 -0
  129. sglang/srt/models/glm4_moe.py +28 -16
  130. sglang/srt/models/glm4v.py +589 -0
  131. sglang/srt/models/glm4v_moe.py +400 -0
  132. sglang/srt/models/gpt_oss.py +1251 -0
  133. sglang/srt/models/granite.py +0 -25
  134. sglang/srt/models/llama.py +0 -25
  135. sglang/srt/models/llama4.py +1 -1
  136. sglang/srt/models/qwen2.py +6 -0
  137. sglang/srt/models/qwen2_5_vl.py +7 -3
  138. sglang/srt/models/qwen2_audio.py +10 -9
  139. sglang/srt/models/qwen2_moe.py +6 -0
  140. sglang/srt/models/qwen3.py +0 -24
  141. sglang/srt/models/qwen3_moe.py +32 -6
  142. sglang/srt/models/registry.py +1 -1
  143. sglang/srt/models/step3_vl.py +9 -0
  144. sglang/srt/models/torch_native_llama.py +0 -24
  145. sglang/srt/models/transformers.py +2 -5
  146. sglang/srt/multimodal/processors/base_processor.py +23 -13
  147. sglang/srt/multimodal/processors/glm4v.py +132 -0
  148. sglang/srt/multimodal/processors/qwen_audio.py +4 -2
  149. sglang/srt/multimodal/processors/step3_vl.py +3 -1
  150. sglang/srt/reasoning_parser.py +332 -37
  151. sglang/srt/server_args.py +186 -75
  152. sglang/srt/speculative/eagle_worker.py +16 -0
  153. sglang/srt/two_batch_overlap.py +169 -9
  154. sglang/srt/utils.py +41 -5
  155. sglang/srt/weight_sync/tensor_bucket.py +106 -0
  156. sglang/test/attention/test_trtllm_mla_backend.py +186 -36
  157. sglang/test/doc_patch.py +59 -0
  158. sglang/test/few_shot_gsm8k.py +1 -1
  159. sglang/test/few_shot_gsm8k_engine.py +1 -1
  160. sglang/test/run_eval.py +4 -1
  161. sglang/test/runners.py +2 -2
  162. sglang/test/simple_eval_common.py +6 -0
  163. sglang/test/simple_eval_gpqa.py +2 -0
  164. sglang/test/test_fp4_moe.py +118 -36
  165. sglang/test/test_utils.py +1 -1
  166. sglang/utils.py +1 -1
  167. sglang/version.py +1 -1
  168. {sglang-0.4.10.post2.dist-info → sglang-0.5.0rc1.dist-info}/METADATA +36 -38
  169. {sglang-0.4.10.post2.dist-info → sglang-0.5.0rc1.dist-info}/RECORD +174 -141
  170. sglang/srt/lora/backend/flashinfer_backend.py +0 -131
  171. /sglang/{api.py → lang/api.py} +0 -0
  172. /sglang/{lang/backend → srt/layers/quantization/quark}/__init__.py +0 -0
  173. {sglang-0.4.10.post2.dist-info → sglang-0.5.0rc1.dist-info}/WHEEL +0 -0
  174. {sglang-0.4.10.post2.dist-info → sglang-0.5.0rc1.dist-info}/licenses/LICENSE +0 -0
  175. {sglang-0.4.10.post2.dist-info → sglang-0.5.0rc1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,443 @@
1
+ import argparse
2
+ import atexit
3
+ import json
4
+ import logging
5
+ import threading
6
+ from pathlib import Path
7
+ from typing import Dict, List, Optional, Tuple
8
+
9
+ import requests
10
+ from fastapi import FastAPI, HTTPException, Request, status
11
+ from requests.adapters import HTTPAdapter
12
+ from urllib3.util.retry import Retry
13
+
14
+ from sglang.srt.mem_cache.storage.hf3fs.storage_hf3fs import Hf3fsMetadataInterface
15
+
16
+ # --- Configuration ---
17
+ logging.basicConfig(
18
+ level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
19
+ )
20
+
21
+
22
+ # --- Data Models ---
23
+ class RankMetadata:
24
+ """Holds all metadata for a single rank."""
25
+
26
+ def __init__(self, num_pages: int):
27
+ self.lock = threading.RLock()
28
+ self.num_pages = num_pages
29
+ self.free_pages: List[int] = list(range(num_pages))
30
+ self.key_to_index: Dict[str, int] = {}
31
+ # Todo: Support multi files for HF3FS
32
+
33
+ def exists_keys(self, keys: List[str]) -> List[bool]:
34
+ """Check if keys exist in metadata."""
35
+ with self.lock:
36
+ return [key in self.key_to_index for key in keys]
37
+
38
+ def reserve_and_allocate_page_indices(
39
+ self, keys: List[Tuple[str, str]]
40
+ ) -> List[Tuple[bool, int]]:
41
+ """Reserve and allocate page indices for keys."""
42
+ with self.lock:
43
+ results = [None] * len(keys)
44
+ new_keys_to_process = []
45
+
46
+ for i, (key, prefix_key) in enumerate(keys):
47
+ if key in self.key_to_index:
48
+ results[i] = (True, self.key_to_index[key])
49
+ else:
50
+ new_keys_to_process.append((i, key, prefix_key))
51
+
52
+ # Todo: Implementing data eviction logic after HiCache supports prefix information pass-through
53
+ for i, key, prefix_key in new_keys_to_process:
54
+ if len(self.free_pages) > 0:
55
+ page_idx = self.free_pages.pop()
56
+ results[i] = (False, page_idx)
57
+ else:
58
+ results[i] = (False, -1)
59
+
60
+ return results
61
+
62
+ def confirm_write(
63
+ self,
64
+ written_keys_to_confirm: List[Tuple[str, int]],
65
+ pages_to_release: List[int],
66
+ ) -> None:
67
+ """Confirm write operations and release pages."""
68
+ with self.lock:
69
+ for key, page_index in written_keys_to_confirm:
70
+ self.key_to_index[key] = page_index
71
+
72
+ for page_index in pages_to_release:
73
+ if page_index not in self.free_pages:
74
+ self.free_pages.append(page_index)
75
+
76
+ def delete_keys(self, keys: List[str]) -> int:
77
+ """Delete keys and return count of deleted keys."""
78
+ with self.lock:
79
+ count = 0
80
+ for key in keys:
81
+ if key in self.key_to_index:
82
+ page_index = self.key_to_index.pop(key)
83
+ if page_index not in self.free_pages:
84
+ self.free_pages.append(page_index)
85
+ count += 1
86
+ return count
87
+
88
+ def clear_all(self) -> None:
89
+ """Clear all metadata."""
90
+ with self.lock:
91
+ self.free_pages = list(range(self.num_pages))
92
+ self.key_to_index.clear()
93
+
94
+ def get_page_indices(self, keys: List[str]) -> List[Optional[int]]:
95
+ """Get page indices for keys."""
96
+ with self.lock:
97
+ return [self.key_to_index.get(key) for key in keys]
98
+
99
+
100
+ class GlobalMetadataState:
101
+ """Manages the state for all ranks and persistence."""
102
+
103
+ def __init__(self, persistence_path: Optional[str], save_interval: int):
104
+ self.global_lock = threading.RLock()
105
+ self.ranks: Dict[int, RankMetadata] = {}
106
+ self.persistence_path = Path(persistence_path) if persistence_path else None
107
+ self.save_interval = save_interval
108
+ self.save_timer: Optional[threading.Timer] = None
109
+ self.is_shutting_down = False
110
+
111
+ def load_from_disk(self):
112
+ if not self.persistence_path or not self.persistence_path.exists():
113
+ logging.info("Persistence file not found. Starting with a clean state.")
114
+ return
115
+
116
+ logging.info(f"Loading state from {self.persistence_path}")
117
+ try:
118
+ with open(self.persistence_path, "r") as f:
119
+ persisted_data = json.load(f)
120
+
121
+ with self.global_lock:
122
+ for rank_id_str, data in persisted_data.items():
123
+ rank_id = int(rank_id_str)
124
+ num_pages = data["num_pages"]
125
+ rank_meta = RankMetadata(num_pages)
126
+ rank_meta.free_pages = data["free_pages"]
127
+ rank_meta.key_to_index = dict(data["key_to_index"])
128
+ self.ranks[rank_id] = rank_meta
129
+ logging.info(
130
+ f"Successfully loaded metadata for {len(self.ranks)} ranks."
131
+ )
132
+ except (json.JSONDecodeError, KeyError, TypeError) as e:
133
+ logging.error(
134
+ f"Failed to load or parse persistence file: {e}. Starting fresh.",
135
+ exc_info=True,
136
+ )
137
+ self.ranks.clear()
138
+
139
+ def save_to_disk(self):
140
+ if not self.persistence_path:
141
+ return
142
+
143
+ logging.info("Persisting metadata to disk...")
144
+ with self.global_lock:
145
+ serializable_state = {}
146
+ for rank_id, rank_meta in self.ranks.items():
147
+ with rank_meta.lock:
148
+ serializable_state[rank_id] = {
149
+ "num_pages": rank_meta.num_pages,
150
+ "free_pages": rank_meta.free_pages,
151
+ "key_to_index": list(rank_meta.key_to_index.items()),
152
+ }
153
+
154
+ try:
155
+ temp_path = self.persistence_path.with_suffix(".tmp")
156
+ with open(temp_path, "w") as f:
157
+ json.dump(serializable_state, f, indent=4)
158
+ temp_path.rename(self.persistence_path)
159
+ logging.info(f"Metadata successfully persisted to {self.persistence_path}")
160
+ except Exception as e:
161
+ logging.error(f"Failed to save metadata to disk: {e}", exc_info=True)
162
+
163
+ def schedule_save(self):
164
+ if self.is_shutting_down or not self.persistence_path:
165
+ return
166
+ self.save_to_disk()
167
+ self.save_timer = threading.Timer(self.save_interval, self.schedule_save)
168
+ self.save_timer.start()
169
+
170
+ def shutdown(self):
171
+ logging.info("Shutting down metadata server...")
172
+ self.is_shutting_down = True
173
+ if self.save_timer:
174
+ self.save_timer.cancel()
175
+ self.save_to_disk()
176
+ logging.info("Shutdown complete.")
177
+
178
+
179
+ # --- Global MetadataServer implementation ---
180
+ class Hf3fsMetadataServer:
181
+ """HF3FS Metadata Server that manages metadata for multiple ranks."""
182
+
183
+ def __init__(self, persistence_path: Optional[str] = None, save_interval: int = 60):
184
+ self.state = GlobalMetadataState(persistence_path, save_interval)
185
+ self.app = FastAPI()
186
+ self._setup_routes()
187
+
188
+ def _setup_routes(self):
189
+ """Setup FastAPI routes."""
190
+ self.app.post("/{rank}/initialize")(self.initialize)
191
+ self.app.post("/{rank}/exists")(self.exists)
192
+ self.app.post("/{rank}/reserve_and_allocate_page_indices")(
193
+ self.reserve_and_allocate_page_indices
194
+ )
195
+ self.app.post("/{rank}/confirm_write")(self.confirm_write)
196
+ self.app.post("/{rank}/delete_keys")(self.delete_keys)
197
+ self.app.post("/{rank}/clear")(self.clear)
198
+ self.app.post("/{rank}/get_page_indices")(self.get_page_indices)
199
+
200
+ def get_rank_metadata(self, rank: int) -> RankMetadata:
201
+ """Get rank metadata with proper error handling."""
202
+ with self.state.global_lock:
203
+ if rank not in self.state.ranks:
204
+ raise HTTPException(
205
+ status_code=404,
206
+ detail=f"Rank {rank} not initialized. Please call /{{rank}}/initialize first.",
207
+ )
208
+ return self.state.ranks[rank]
209
+
210
+ async def initialize(self, rank: int, request: Request):
211
+ """Initialize a rank with specified number of pages."""
212
+ data = await request.json()
213
+ num_pages = data["num_pages"]
214
+ with self.state.global_lock:
215
+ if rank in self.state.ranks:
216
+ logging.info(
217
+ f"Rank {rank} already exists. Initialization request ignored."
218
+ )
219
+ if self.state.ranks[rank].num_pages != num_pages:
220
+ logging.warning(
221
+ f"Rank {rank} initialized with different num_pages. Existing: {self.state.ranks[rank].num_pages}, New: {num_pages}"
222
+ )
223
+ else:
224
+ logging.info(f"Initializing new Rank {rank} with {num_pages} pages.")
225
+ self.state.ranks[rank] = RankMetadata(num_pages)
226
+ return {"message": f"Rank {rank} is ready."}
227
+
228
+ async def exists(self, rank: int, request: Request):
229
+ """Check if keys exist in metadata."""
230
+ data = await request.json()
231
+ keys = data["keys"]
232
+ metadata = self.get_rank_metadata(rank)
233
+ results = metadata.exists_keys(keys)
234
+ return {"exists": results}
235
+
236
+ async def reserve_and_allocate_page_indices(self, rank: int, request: Request):
237
+ """Reserve and allocate page indices for keys."""
238
+ data = await request.json()
239
+ metadata = self.get_rank_metadata(rank)
240
+ keys = data["keys"]
241
+ results = metadata.reserve_and_allocate_page_indices(keys)
242
+ return {"indices": results}
243
+
244
+ async def confirm_write(self, rank: int, request: Request):
245
+ """Confirm write operations and release pages."""
246
+ data = await request.json()
247
+ metadata = self.get_rank_metadata(rank)
248
+ success_written_keys = data.get("written_keys_to_confirm", [])
249
+ released_pages = data.get("pages_to_release", [])
250
+
251
+ metadata.confirm_write(success_written_keys, released_pages)
252
+
253
+ return {
254
+ "message": f"Rank {rank}: Write confirmed for {len(success_written_keys)} keys. {len(released_pages)} pages released."
255
+ }
256
+
257
+ async def delete_keys(self, rank: int, request: Request):
258
+ """Delete keys from metadata."""
259
+ data = await request.json()
260
+ metadata = self.get_rank_metadata(rank)
261
+ count = metadata.delete_keys(data["keys"])
262
+ return {"message": f"Rank {rank}: {count} keys deleted."}
263
+
264
+ async def clear(self, rank: int):
265
+ """Clear all metadata for a rank."""
266
+ metadata = self.get_rank_metadata(rank)
267
+ metadata.clear_all()
268
+ return {"message": f"Rank {rank}: Metadata cleared."}
269
+
270
+ async def get_page_indices(self, rank: int, request: Request):
271
+ """Get page indices for keys."""
272
+ data = await request.json()
273
+ metadata = self.get_rank_metadata(rank)
274
+ keys = data["keys"]
275
+ results = metadata.get_page_indices(keys)
276
+ return {"indices": results}
277
+
278
+ def run(self, host: str = "0.0.0.0", port: int = 18000):
279
+ """Run the metadata server."""
280
+ self.state.load_from_disk()
281
+ if self.state.persistence_path:
282
+ self.state.schedule_save()
283
+ atexit.register(self.state.shutdown)
284
+
285
+ import uvicorn
286
+
287
+ logging.info(f"Starting metadata server on http://{host}:{port}")
288
+ if self.state.persistence_path:
289
+ logging.info(
290
+ f"Persistence is ENABLED. Saving to '{self.state.persistence_path}' every {self.state.save_interval} seconds."
291
+ )
292
+ else:
293
+ logging.info("Persistence is DISABLED.")
294
+
295
+ uvicorn.run(self.app, host=host, port=port)
296
+
297
+
298
+ # --- Client implementation ---
299
+ class Hf3fsGlobalMetadataClient(Hf3fsMetadataInterface):
300
+ """Global http metadata client for HF3FS."""
301
+
302
+ def __init__(self, base_url: str, max_retries: int = 3):
303
+ self.base_url = base_url.rstrip("/")
304
+ self._session = requests.Session()
305
+
306
+ retry_strategy = Retry(
307
+ total=max_retries,
308
+ backoff_factor=0.3,
309
+ status_forcelist=[500, 502, 503, 504],
310
+ allowed_methods=["GET", "POST"],
311
+ )
312
+ adapter = HTTPAdapter(max_retries=retry_strategy)
313
+ self._session.mount("http://", adapter)
314
+
315
+ def _post(self, endpoint: str, json_data: dict) -> dict:
316
+ try:
317
+ response = self._session.post(f"{self.base_url}/{endpoint}", json=json_data)
318
+ response.raise_for_status()
319
+ return response.json()
320
+ except requests.exceptions.RequestException as e:
321
+ logging.error(f"Failed to POST to {endpoint} after retries: {e}")
322
+ raise RuntimeError(f"Failed to connect to metadata server: {e}") from e
323
+
324
+ def initialize(self, rank: int, num_pages: int) -> None:
325
+ self._post(f"{rank}/initialize", {"num_pages": num_pages})
326
+
327
+ def reserve_and_allocate_page_indices(
328
+ self, rank: int, keys: List[Tuple[str, str]]
329
+ ) -> List[Tuple[bool, int]]:
330
+ response = self._post(
331
+ f"{rank}/reserve_and_allocate_page_indices", {"keys": keys}
332
+ )
333
+ return [tuple(item) for item in response.get("indices")]
334
+
335
+ def confirm_write(
336
+ self,
337
+ rank: int,
338
+ written_keys_to_confirm: List[Tuple[str, int]],
339
+ pages_to_release: List[int],
340
+ ) -> None:
341
+ self._post(
342
+ f"{rank}/confirm_write",
343
+ {
344
+ "written_keys_to_confirm": written_keys_to_confirm,
345
+ "pages_to_release": pages_to_release,
346
+ },
347
+ )
348
+
349
+ def delete_keys(self, rank: int, keys: List[str]) -> None:
350
+ self._post(f"{rank}/delete_keys", {"keys": keys})
351
+
352
+ def exists(self, rank: int, keys: List[str]) -> List[bool]:
353
+ response = self._post(f"{rank}/exists", {"keys": keys})
354
+ return response.get("exists", [])
355
+
356
+ def clear(self, rank: int) -> None:
357
+ self._post(f"{rank}/clear", {})
358
+
359
+ def get_page_indices(self, rank: int, keys: List[str]) -> List[Optional[int]]:
360
+ response = self._post(f"{rank}/get_page_indices", {"keys": keys})
361
+ return response.get("indices")
362
+
363
+
364
+ class Hf3fsLocalMetadataClient(Hf3fsMetadataInterface):
365
+ """Local metadata client that directly operates on single RankMetadata in memory without metadata server."""
366
+
367
+ def __init__(self):
368
+ self.rank_metadata = None
369
+
370
+ def initialize(self, rank: int, num_pages: int) -> None:
371
+ self.rank_metadata = RankMetadata(num_pages)
372
+
373
+ def reserve_and_allocate_page_indices(
374
+ self, rank: int, keys: List[Tuple[str, str]]
375
+ ) -> List[Tuple[bool, int]]:
376
+ """Reserve and allocate page indices for keys."""
377
+ return self.rank_metadata.reserve_and_allocate_page_indices(keys)
378
+
379
+ def confirm_write(
380
+ self,
381
+ rank: int,
382
+ written_keys_to_confirm: List[Tuple[str, int]],
383
+ pages_to_release: List[int],
384
+ ) -> None:
385
+ """Confirm write operations."""
386
+ self.rank_metadata.confirm_write(written_keys_to_confirm, pages_to_release)
387
+
388
+ def delete_keys(self, rank: int, keys: List[str]) -> None:
389
+ """Delete keys."""
390
+ self.rank_metadata.delete_keys(keys)
391
+
392
+ def exists(self, rank: int, keys: List[str]) -> List[bool]:
393
+ """Check if keys exist."""
394
+ return self.rank_metadata.exists_keys(keys)
395
+
396
+ def clear(self, rank: int) -> None:
397
+ """Clear all metadata for rank."""
398
+ self.rank_metadata.clear_all()
399
+
400
+ def get_page_indices(self, rank: int, keys: List[str]) -> List[Optional[int]]:
401
+ """Get page indices for keys."""
402
+ return self.rank_metadata.get_page_indices(keys)
403
+
404
+
405
+ def run_metadata_server(
406
+ host: str = "0.0.0.0",
407
+ port: int = 18000,
408
+ persistence_path: Optional[str] = None,
409
+ save_interval: int = 60,
410
+ ):
411
+ """Run the HF3FS metadata server."""
412
+ global server
413
+ server = Hf3fsMetadataServer(
414
+ persistence_path=persistence_path, save_interval=save_interval
415
+ )
416
+
417
+ server.run(host=host, port=port)
418
+
419
+
420
+ # --- Main Execution ---
421
+ if __name__ == "__main__":
422
+ parser = argparse.ArgumentParser(description="HF3FS Metadata Server")
423
+ parser.add_argument(
424
+ "--host", type=str, default="0.0.0.0", help="Host to bind the server to."
425
+ )
426
+ parser.add_argument(
427
+ "--port", type=int, default=18000, help="Port to run the server on."
428
+ )
429
+ parser.add_argument(
430
+ "--persistence-path",
431
+ type=str,
432
+ default=None,
433
+ help="Path to the file for persisting metadata. If not provided, persistence is disabled.",
434
+ )
435
+ parser.add_argument(
436
+ "--save-interval",
437
+ type=int,
438
+ default=60,
439
+ help="Interval in seconds for periodically saving metadata to disk.",
440
+ )
441
+ args = parser.parse_args()
442
+
443
+ run_metadata_server(args.host, args.port, args.persistence_path, args.save_interval)