sglang 0.4.3__py3-none-any.whl → 0.4.3.post2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. sglang/lang/backend/openai.py +5 -0
  2. sglang/lang/chat_template.py +22 -7
  3. sglang/lang/ir.py +1 -0
  4. sglang/srt/configs/__init__.py +6 -3
  5. sglang/srt/configs/model_config.py +2 -0
  6. sglang/srt/configs/qwen2_5_vl_config.py +1003 -0
  7. sglang/srt/entrypoints/engine.py +17 -2
  8. sglang/srt/hf_transformers_utils.py +2 -3
  9. sglang/srt/layers/attention/flashinfer_backend.py +101 -30
  10. sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=128,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +146 -0
  11. sglang/srt/layers/quantization/configs/N=1536,K=1536,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +26 -0
  12. sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +26 -0
  13. sglang/srt/layers/quantization/configs/N=2048,K=512,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +26 -0
  14. sglang/srt/layers/quantization/configs/N=2304,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +26 -0
  15. sglang/srt/layers/quantization/configs/N=24576,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +26 -0
  16. sglang/srt/layers/quantization/configs/N=256,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +26 -0
  17. sglang/srt/layers/quantization/configs/N=32768,K=512,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +26 -0
  18. sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +26 -0
  19. sglang/srt/layers/quantization/configs/N=7168,K=1024,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +26 -0
  20. sglang/srt/layers/quantization/configs/N=7168,K=1152,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +26 -0
  21. sglang/srt/layers/quantization/configs/N=7168,K=128,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +26 -0
  22. sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +26 -0
  23. sglang/srt/layers/quantization/configs/N=7168,K=18432,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +26 -0
  24. sglang/srt/managers/image_processor.py +217 -122
  25. sglang/srt/managers/schedule_batch.py +1 -0
  26. sglang/srt/model_executor/forward_batch_info.py +4 -1
  27. sglang/srt/model_executor/model_runner.py +1 -0
  28. sglang/srt/models/deepseek_nextn.py +295 -0
  29. sglang/srt/models/deepseek_v2.py +9 -3
  30. sglang/srt/models/llava.py +2 -1
  31. sglang/srt/models/qwen2_5_vl.py +722 -0
  32. sglang/srt/models/qwen2_vl.py +2 -1
  33. sglang/srt/openai_api/adapter.py +17 -3
  34. sglang/srt/server_args.py +6 -3
  35. sglang/srt/speculative/eagle_worker.py +7 -2
  36. sglang/srt/speculative/spec_info.py +11 -1
  37. sglang/utils.py +99 -19
  38. sglang/version.py +1 -1
  39. {sglang-0.4.3.dist-info → sglang-0.4.3.post2.dist-info}/METADATA +3 -3
  40. {sglang-0.4.3.dist-info → sglang-0.4.3.post2.dist-info}/RECORD +43 -27
  41. sglang/srt/configs/qwen2vl.py +0 -130
  42. {sglang-0.4.3.dist-info → sglang-0.4.3.post2.dist-info}/LICENSE +0 -0
  43. {sglang-0.4.3.dist-info → sglang-0.4.3.post2.dist-info}/WHEEL +0 -0
  44. {sglang-0.4.3.dist-info → sglang-0.4.3.post2.dist-info}/top_level.txt +0 -0
@@ -115,6 +115,9 @@ class Engine:
115
115
  sampling_params: Optional[Union[List[Dict], Dict]] = None,
116
116
  # The token ids for text; one can either specify text or input_ids.
117
117
  input_ids: Optional[Union[List[List[int]], List[int]]] = None,
118
+ # The image input. It can be a file name, a url, or base64 encoded string.
119
+ # See also python/sglang/srt/utils.py:load_image.
120
+ image_data: Optional[Union[List[str], str]] = None,
118
121
  return_logprob: Optional[Union[List[bool], bool]] = False,
119
122
  logprob_start_len: Optional[Union[List[int], int]] = None,
120
123
  top_logprobs_num: Optional[Union[List[int], int]] = None,
@@ -126,14 +129,20 @@ class Engine:
126
129
  The arguments of this function is the same as `sglang/srt/managers/io_struct.py::GenerateReqInput`.
127
130
  Please refer to `GenerateReqInput` for the documentation.
128
131
  """
132
+ modalities_list = []
133
+ if image_data is not None:
134
+ modalities_list.append("image")
135
+
129
136
  obj = GenerateReqInput(
130
137
  text=prompt,
131
138
  input_ids=input_ids,
132
139
  sampling_params=sampling_params,
140
+ image_data=image_data,
133
141
  return_logprob=return_logprob,
134
142
  logprob_start_len=logprob_start_len,
135
143
  top_logprobs_num=top_logprobs_num,
136
144
  lora_path=lora_path,
145
+ modalities=modalities_list,
137
146
  custom_logit_processor=custom_logit_processor,
138
147
  stream=stream,
139
148
  )
@@ -162,6 +171,9 @@ class Engine:
162
171
  sampling_params: Optional[Union[List[Dict], Dict]] = None,
163
172
  # The token ids for text; one can either specify text or input_ids.
164
173
  input_ids: Optional[Union[List[List[int]], List[int]]] = None,
174
+ # The image input. It can be a file name, a url, or base64 encoded string.
175
+ # See also python/sglang/srt/utils.py:load_image.
176
+ image_data: Optional[Union[List[str], str]] = None,
165
177
  return_logprob: Optional[Union[List[bool], bool]] = False,
166
178
  logprob_start_len: Optional[Union[List[int], int]] = None,
167
179
  top_logprobs_num: Optional[Union[List[int], int]] = None,
@@ -177,6 +189,7 @@ class Engine:
177
189
  text=prompt,
178
190
  input_ids=input_ids,
179
191
  sampling_params=sampling_params,
192
+ image_data=image_data,
180
193
  return_logprob=return_logprob,
181
194
  logprob_start_len=logprob_start_len,
182
195
  top_logprobs_num=top_logprobs_num,
@@ -317,7 +330,7 @@ def _set_envs_and_config(server_args: ServerArgs):
317
330
  if server_args.attention_backend == "flashinfer":
318
331
  assert_pkg_version(
319
332
  "flashinfer_python",
320
- "0.2.1.post1",
333
+ "0.2.1.post2",
321
334
  "Please uninstall the old version and "
322
335
  "reinstall the latest version by following the instructions "
323
336
  "at https://docs.flashinfer.ai/installation.html.",
@@ -425,7 +438,9 @@ def _launch_subprocesses(server_args: ServerArgs) -> Tuple[TokenizerManager, Dic
425
438
  # Launch tokenizer process
426
439
  tokenizer_manager = TokenizerManager(server_args, port_args)
427
440
  if server_args.chat_template:
428
- load_chat_template_for_openai_api(tokenizer_manager, server_args.chat_template)
441
+ load_chat_template_for_openai_api(
442
+ tokenizer_manager, server_args.chat_template, server_args.model_path
443
+ )
429
444
 
430
445
  # Wait for the model to finish loading
431
446
  scheduler_infos = []
@@ -30,16 +30,15 @@ from transformers import (
30
30
  )
31
31
  from transformers.models.auto.modeling_auto import MODEL_FOR_CAUSAL_LM_MAPPING_NAMES
32
32
 
33
- from sglang.srt.configs import ChatGLMConfig, DbrxConfig, ExaoneConfig, Qwen2VLConfig
33
+ from sglang.srt.configs import ChatGLMConfig, DbrxConfig, ExaoneConfig, Qwen2_5_VLConfig
34
34
 
35
35
  _CONFIG_REGISTRY: Dict[str, Type[PretrainedConfig]] = {
36
36
  ChatGLMConfig.model_type: ChatGLMConfig,
37
37
  DbrxConfig.model_type: DbrxConfig,
38
38
  ExaoneConfig.model_type: ExaoneConfig,
39
- Qwen2VLConfig.model_type: Qwen2VLConfig,
39
+ Qwen2_5_VLConfig.model_type: Qwen2_5_VLConfig,
40
40
  }
41
41
 
42
-
43
42
  for name, cls in _CONFIG_REGISTRY.items():
44
43
  with contextlib.suppress(ValueError):
45
44
  AutoConfig.register(name, cls)
@@ -54,7 +54,9 @@ class DecodeMetadata:
54
54
 
55
55
  @dataclass
56
56
  class PrefillMetadata:
57
- prefill_wrappers: List[BatchPrefillWithPagedKVCacheWrapper]
57
+ prefill_wrappers: List[
58
+ Union[BatchPrefillWithPagedKVCacheWrapper, BatchMLAPagedAttentionWrapper]
59
+ ]
58
60
  use_ragged: bool
59
61
  extend_no_prefix: bool
60
62
 
@@ -160,16 +162,36 @@ class FlashInferAttnBackend(AttentionBackend):
160
162
  self.decode_wrappers = []
161
163
  for _ in range(self.num_wrappers):
162
164
  if not skip_prefill:
163
- self.prefill_wrappers_paged.append(
164
- BatchPrefillWithPagedKVCacheWrapper(
165
- self.workspace_buffer,
166
- "NHD",
167
- backend="fa2",
165
+ if (
166
+ self.enable_flashinfer_mla
167
+ and not global_server_args_dict["disable_radix_cache"]
168
+ ):
169
+ # use mla paged prefill
170
+ self.prefill_wrappers_paged.append(
171
+ BatchMLAPagedAttentionWrapper(
172
+ self.workspace_buffer,
173
+ backend="fa2",
174
+ )
175
+ )
176
+ self.prefill_wrappers_verify.append(
177
+ BatchMLAPagedAttentionWrapper(
178
+ self.workspace_buffer,
179
+ backend="fa2",
180
+ )
181
+ )
182
+ else:
183
+ self.prefill_wrappers_paged.append(
184
+ BatchPrefillWithPagedKVCacheWrapper(
185
+ self.workspace_buffer,
186
+ "NHD",
187
+ backend="fa2",
188
+ )
189
+ )
190
+ self.prefill_wrappers_verify.append(
191
+ BatchPrefillWithPagedKVCacheWrapper(
192
+ self.workspace_buffer, "NHD"
193
+ )
168
194
  )
169
- )
170
- self.prefill_wrappers_verify.append(
171
- BatchPrefillWithPagedKVCacheWrapper(self.workspace_buffer, "NHD")
172
- )
173
195
  if self.enable_flashinfer_mla:
174
196
  self.decode_wrappers.append(
175
197
  BatchMLAPagedAttentionWrapper(self.workspace_buffer, backend="fa2")
@@ -237,7 +259,10 @@ class FlashInferAttnBackend(AttentionBackend):
237
259
  else:
238
260
  prefix_lens = forward_batch.extend_prefix_lens
239
261
 
240
- if self.is_multimodal:
262
+ if self.is_multimodal or (
263
+ self.enable_flashinfer_mla
264
+ and not global_server_args_dict["disable_radix_cache"]
265
+ ):
241
266
  use_ragged = False
242
267
  extend_no_prefix = False
243
268
  else:
@@ -419,23 +444,43 @@ class FlashInferAttnBackend(AttentionBackend):
419
444
 
420
445
  logits_soft_cap = layer.logit_cap
421
446
 
422
- o1, _ = self.prefill_wrapper_ragged.forward_return_lse(
423
- q.view(-1, layer.tp_q_head_num, layer.head_dim),
424
- k.view(-1, layer.tp_k_head_num, layer.head_dim),
425
- v.view(-1, layer.tp_v_head_num, layer.v_head_dim),
426
- causal=True,
427
- sm_scale=layer.scaling,
428
- logits_soft_cap=logits_soft_cap,
429
- )
430
-
431
- o = o1
447
+ if global_server_args_dict["disable_radix_cache"]:
448
+ # use mla ragged prefill
449
+ o, _ = self.prefill_wrapper_ragged.forward_return_lse(
450
+ q.view(-1, layer.tp_q_head_num, layer.head_dim),
451
+ k.view(-1, layer.tp_k_head_num, layer.head_dim),
452
+ v.view(-1, layer.tp_v_head_num, layer.v_head_dim),
453
+ causal=True,
454
+ sm_scale=layer.scaling,
455
+ logits_soft_cap=logits_soft_cap,
456
+ )
432
457
 
433
- if save_kv_cache:
434
- forward_batch.token_to_kv_pool.set_kv_buffer(
435
- layer,
436
- cache_loc,
437
- k,
438
- v,
458
+ if save_kv_cache:
459
+ forward_batch.token_to_kv_pool.set_kv_buffer(
460
+ layer,
461
+ cache_loc,
462
+ k,
463
+ v,
464
+ )
465
+ else:
466
+ # use mla paged prefill
467
+ prefill_wrapper_paged = self.forward_metadata.prefill_wrappers[
468
+ self._get_wrapper_idx(layer)
469
+ ]
470
+ if k is not None:
471
+ assert v is not None
472
+ if save_kv_cache:
473
+ forward_batch.token_to_kv_pool.set_kv_buffer(
474
+ layer, cache_loc, k, v
475
+ )
476
+ qall = q.view(-1, layer.tp_q_head_num, layer.head_dim)
477
+ k_buf = forward_batch.token_to_kv_pool.get_key_buffer(layer.layer_id)
478
+
479
+ o = prefill_wrapper_paged.run(
480
+ qall[:, :, : layer.v_head_dim],
481
+ qall[:, :, layer.v_head_dim :],
482
+ k_buf[:, :, : layer.v_head_dim],
483
+ k_buf[:, :, layer.v_head_dim :],
439
484
  )
440
485
 
441
486
  return o.view(-1, layer.tp_q_head_num * layer.v_head_dim)
@@ -800,7 +845,9 @@ class FlashInferIndicesUpdaterPrefill:
800
845
  seq_lens: torch.Tensor,
801
846
  seq_lens_sum: int,
802
847
  prefix_lens: torch.Tensor,
803
- prefill_wrappers: List[BatchPrefillWithPagedKVCacheWrapper],
848
+ prefill_wrappers: List[
849
+ Union[BatchPrefillWithPagedKVCacheWrapper, BatchMLAPagedAttentionWrapper]
850
+ ],
804
851
  use_ragged: bool,
805
852
  encoder_lens: Optional[torch.Tensor],
806
853
  spec_info: Optional[SpecInfo],
@@ -814,7 +861,9 @@ class FlashInferIndicesUpdaterPrefill:
814
861
  seq_lens: torch.Tensor,
815
862
  seq_lens_sum: int,
816
863
  prefix_lens: torch.Tensor,
817
- prefill_wrappers: List[BatchPrefillWithPagedKVCacheWrapper],
864
+ prefill_wrappers: List[
865
+ Union[BatchPrefillWithPagedKVCacheWrapper, BatchMLAPagedAttentionWrapper]
866
+ ],
818
867
  use_ragged: bool,
819
868
  encoder_lens: Optional[torch.Tensor],
820
869
  spec_info: Optional[SpecInfo],
@@ -923,7 +972,9 @@ class FlashInferIndicesUpdaterPrefill:
923
972
  def call_begin_forward(
924
973
  self,
925
974
  wrapper_ragged: BatchPrefillWithRaggedKVCacheWrapper,
926
- wrapper_paged: BatchPrefillWithPagedKVCacheWrapper,
975
+ wrapper_paged: Union[
976
+ BatchPrefillWithPagedKVCacheWrapper, BatchMLAPagedAttentionWrapper
977
+ ],
927
978
  req_pool_indices: torch.Tensor,
928
979
  paged_kernel_lens: torch.Tensor,
929
980
  paged_kernel_lens_sum: int,
@@ -1004,6 +1055,26 @@ class FlashInferIndicesUpdaterPrefill:
1004
1055
  custom_mask=custom_mask,
1005
1056
  non_blocking=True,
1006
1057
  )
1058
+ elif (
1059
+ global_config.enable_flashinfer_mla
1060
+ and not global_server_args_dict["disable_radix_cache"]
1061
+ ):
1062
+ # mla paged prefill
1063
+ kv_len_arr = kv_indptr[1:] - kv_indptr[:-1]
1064
+ wrapper_paged.plan(
1065
+ qo_indptr,
1066
+ kv_indptr,
1067
+ kv_indices,
1068
+ kv_len_arr,
1069
+ self.num_qo_heads,
1070
+ 512,
1071
+ 64,
1072
+ 1,
1073
+ True,
1074
+ 1 / math.sqrt(192),
1075
+ self.data_type,
1076
+ self.data_type,
1077
+ )
1007
1078
 
1008
1079
 
1009
1080
  class FlashInferMultiStepDraftBackend:
@@ -0,0 +1,146 @@
1
+ {
2
+ "1": {
3
+ "BLOCK_SIZE_M": 64,
4
+ "BLOCK_SIZE_N": 64,
5
+ "BLOCK_SIZE_K": 128,
6
+ "GROUP_SIZE_M": 16,
7
+ "num_warps": 4,
8
+ "num_stages": 3
9
+ },
10
+ "2": {
11
+ "BLOCK_SIZE_M": 64,
12
+ "BLOCK_SIZE_N": 64,
13
+ "BLOCK_SIZE_K": 128,
14
+ "GROUP_SIZE_M": 16,
15
+ "num_warps": 4,
16
+ "num_stages": 3
17
+ },
18
+ "4": {
19
+ "BLOCK_SIZE_M": 16,
20
+ "BLOCK_SIZE_N": 128,
21
+ "BLOCK_SIZE_K": 128,
22
+ "GROUP_SIZE_M": 16,
23
+ "num_warps": 4,
24
+ "num_stages": 3
25
+ },
26
+ "8": {
27
+ "BLOCK_SIZE_M": 64,
28
+ "BLOCK_SIZE_N": 64,
29
+ "BLOCK_SIZE_K": 128,
30
+ "GROUP_SIZE_M": 32,
31
+ "num_warps": 4,
32
+ "num_stages": 3
33
+ },
34
+ "16": {
35
+ "BLOCK_SIZE_M": 64,
36
+ "BLOCK_SIZE_N": 128,
37
+ "BLOCK_SIZE_K": 128,
38
+ "GROUP_SIZE_M": 16,
39
+ "num_warps": 4,
40
+ "num_stages": 3
41
+ },
42
+ "24": {
43
+ "BLOCK_SIZE_M": 64,
44
+ "BLOCK_SIZE_N": 64,
45
+ "BLOCK_SIZE_K": 128,
46
+ "GROUP_SIZE_M": 1,
47
+ "num_warps": 4,
48
+ "num_stages": 3
49
+ },
50
+ "32": {
51
+ "BLOCK_SIZE_M": 64,
52
+ "BLOCK_SIZE_N": 64,
53
+ "BLOCK_SIZE_K": 128,
54
+ "GROUP_SIZE_M": 64,
55
+ "num_warps": 4,
56
+ "num_stages": 3
57
+ },
58
+ "48": {
59
+ "BLOCK_SIZE_M": 64,
60
+ "BLOCK_SIZE_N": 64,
61
+ "BLOCK_SIZE_K": 128,
62
+ "GROUP_SIZE_M": 1,
63
+ "num_warps": 4,
64
+ "num_stages": 3
65
+ },
66
+ "64": {
67
+ "BLOCK_SIZE_M": 64,
68
+ "BLOCK_SIZE_N": 128,
69
+ "BLOCK_SIZE_K": 128,
70
+ "GROUP_SIZE_M": 16,
71
+ "num_warps": 4,
72
+ "num_stages": 3
73
+ },
74
+ "96": {
75
+ "BLOCK_SIZE_M": 64,
76
+ "BLOCK_SIZE_N": 128,
77
+ "BLOCK_SIZE_K": 128,
78
+ "GROUP_SIZE_M": 16,
79
+ "num_warps": 4,
80
+ "num_stages": 3
81
+ },
82
+ "128": {
83
+ "BLOCK_SIZE_M": 64,
84
+ "BLOCK_SIZE_N": 128,
85
+ "BLOCK_SIZE_K": 128,
86
+ "GROUP_SIZE_M": 16,
87
+ "num_warps": 4,
88
+ "num_stages": 3
89
+ },
90
+ "256": {
91
+ "BLOCK_SIZE_M": 64,
92
+ "BLOCK_SIZE_N": 128,
93
+ "BLOCK_SIZE_K": 128,
94
+ "GROUP_SIZE_M": 32,
95
+ "num_warps": 4,
96
+ "num_stages": 3
97
+ },
98
+ "512": {
99
+ "BLOCK_SIZE_M": 64,
100
+ "BLOCK_SIZE_N": 128,
101
+ "BLOCK_SIZE_K": 128,
102
+ "GROUP_SIZE_M": 16,
103
+ "num_warps": 4,
104
+ "num_stages": 3
105
+ },
106
+ "1024": {
107
+ "BLOCK_SIZE_M": 64,
108
+ "BLOCK_SIZE_N": 128,
109
+ "BLOCK_SIZE_K": 128,
110
+ "GROUP_SIZE_M": 64,
111
+ "num_warps": 4,
112
+ "num_stages": 3
113
+ },
114
+ "1536": {
115
+ "BLOCK_SIZE_M": 64,
116
+ "BLOCK_SIZE_N": 128,
117
+ "BLOCK_SIZE_K": 128,
118
+ "GROUP_SIZE_M": 32,
119
+ "num_warps": 4,
120
+ "num_stages": 3
121
+ },
122
+ "2048": {
123
+ "BLOCK_SIZE_M": 64,
124
+ "BLOCK_SIZE_N": 128,
125
+ "BLOCK_SIZE_K": 128,
126
+ "GROUP_SIZE_M": 64,
127
+ "num_warps": 4,
128
+ "num_stages": 3
129
+ },
130
+ "3072": {
131
+ "BLOCK_SIZE_M": 128,
132
+ "BLOCK_SIZE_N": 64,
133
+ "BLOCK_SIZE_K": 128,
134
+ "GROUP_SIZE_M": 16,
135
+ "num_warps": 4,
136
+ "num_stages": 3
137
+ },
138
+ "4096": {
139
+ "BLOCK_SIZE_M": 64,
140
+ "BLOCK_SIZE_N": 128,
141
+ "BLOCK_SIZE_K": 128,
142
+ "GROUP_SIZE_M": 64,
143
+ "num_warps": 4,
144
+ "num_stages": 3
145
+ }
146
+ }
@@ -0,0 +1,26 @@
1
+ {
2
+ "2048": {
3
+ "BLOCK_SIZE_M": 64,
4
+ "BLOCK_SIZE_N": 64,
5
+ "BLOCK_SIZE_K": 128,
6
+ "GROUP_SIZE_M": 64,
7
+ "num_warps": 4,
8
+ "num_stages": 3
9
+ },
10
+ "3072": {
11
+ "BLOCK_SIZE_M": 64,
12
+ "BLOCK_SIZE_N": 64,
13
+ "BLOCK_SIZE_K": 128,
14
+ "GROUP_SIZE_M": 32,
15
+ "num_warps": 4,
16
+ "num_stages": 3
17
+ },
18
+ "4096": {
19
+ "BLOCK_SIZE_M": 64,
20
+ "BLOCK_SIZE_N": 128,
21
+ "BLOCK_SIZE_K": 128,
22
+ "GROUP_SIZE_M": 32,
23
+ "num_warps": 4,
24
+ "num_stages": 3
25
+ }
26
+ }
@@ -0,0 +1,26 @@
1
+ {
2
+ "2048": {
3
+ "BLOCK_SIZE_M": 64,
4
+ "BLOCK_SIZE_N": 64,
5
+ "BLOCK_SIZE_K": 128,
6
+ "GROUP_SIZE_M": 32,
7
+ "num_warps": 4,
8
+ "num_stages": 3
9
+ },
10
+ "3072": {
11
+ "BLOCK_SIZE_M": 64,
12
+ "BLOCK_SIZE_N": 64,
13
+ "BLOCK_SIZE_K": 128,
14
+ "GROUP_SIZE_M": 1,
15
+ "num_warps": 4,
16
+ "num_stages": 3
17
+ },
18
+ "4096": {
19
+ "BLOCK_SIZE_M": 64,
20
+ "BLOCK_SIZE_N": 128,
21
+ "BLOCK_SIZE_K": 128,
22
+ "GROUP_SIZE_M": 64,
23
+ "num_warps": 4,
24
+ "num_stages": 3
25
+ }
26
+ }
@@ -0,0 +1,26 @@
1
+ {
2
+ "2048": {
3
+ "BLOCK_SIZE_M": 128,
4
+ "BLOCK_SIZE_N": 64,
5
+ "BLOCK_SIZE_K": 128,
6
+ "GROUP_SIZE_M": 1,
7
+ "num_warps": 4,
8
+ "num_stages": 3
9
+ },
10
+ "3072": {
11
+ "BLOCK_SIZE_M": 64,
12
+ "BLOCK_SIZE_N": 128,
13
+ "BLOCK_SIZE_K": 128,
14
+ "GROUP_SIZE_M": 64,
15
+ "num_warps": 4,
16
+ "num_stages": 2
17
+ },
18
+ "4096": {
19
+ "BLOCK_SIZE_M": 64,
20
+ "BLOCK_SIZE_N": 128,
21
+ "BLOCK_SIZE_K": 128,
22
+ "GROUP_SIZE_M": 64,
23
+ "num_warps": 4,
24
+ "num_stages": 3
25
+ }
26
+ }
@@ -0,0 +1,26 @@
1
+ {
2
+ "2048": {
3
+ "BLOCK_SIZE_M": 64,
4
+ "BLOCK_SIZE_N": 64,
5
+ "BLOCK_SIZE_K": 128,
6
+ "GROUP_SIZE_M": 32,
7
+ "num_warps": 4,
8
+ "num_stages": 3
9
+ },
10
+ "3072": {
11
+ "BLOCK_SIZE_M": 64,
12
+ "BLOCK_SIZE_N": 64,
13
+ "BLOCK_SIZE_K": 128,
14
+ "GROUP_SIZE_M": 1,
15
+ "num_warps": 4,
16
+ "num_stages": 3
17
+ },
18
+ "4096": {
19
+ "BLOCK_SIZE_M": 64,
20
+ "BLOCK_SIZE_N": 128,
21
+ "BLOCK_SIZE_K": 128,
22
+ "GROUP_SIZE_M": 16,
23
+ "num_warps": 4,
24
+ "num_stages": 3
25
+ }
26
+ }
@@ -0,0 +1,26 @@
1
+ {
2
+ "2048": {
3
+ "BLOCK_SIZE_M": 64,
4
+ "BLOCK_SIZE_N": 128,
5
+ "BLOCK_SIZE_K": 128,
6
+ "GROUP_SIZE_M": 32,
7
+ "num_warps": 4,
8
+ "num_stages": 3
9
+ },
10
+ "3072": {
11
+ "BLOCK_SIZE_M": 64,
12
+ "BLOCK_SIZE_N": 128,
13
+ "BLOCK_SIZE_K": 128,
14
+ "GROUP_SIZE_M": 32,
15
+ "num_warps": 4,
16
+ "num_stages": 3
17
+ },
18
+ "4096": {
19
+ "BLOCK_SIZE_M": 64,
20
+ "BLOCK_SIZE_N": 128,
21
+ "BLOCK_SIZE_K": 128,
22
+ "GROUP_SIZE_M": 32,
23
+ "num_warps": 4,
24
+ "num_stages": 3
25
+ }
26
+ }
@@ -0,0 +1,26 @@
1
+ {
2
+ "2048": {
3
+ "BLOCK_SIZE_M": 64,
4
+ "BLOCK_SIZE_N": 64,
5
+ "BLOCK_SIZE_K": 128,
6
+ "GROUP_SIZE_M": 64,
7
+ "num_warps": 4,
8
+ "num_stages": 4
9
+ },
10
+ "3072": {
11
+ "BLOCK_SIZE_M": 64,
12
+ "BLOCK_SIZE_N": 64,
13
+ "BLOCK_SIZE_K": 128,
14
+ "GROUP_SIZE_M": 1,
15
+ "num_warps": 4,
16
+ "num_stages": 3
17
+ },
18
+ "4096": {
19
+ "BLOCK_SIZE_M": 64,
20
+ "BLOCK_SIZE_N": 128,
21
+ "BLOCK_SIZE_K": 128,
22
+ "GROUP_SIZE_M": 1,
23
+ "num_warps": 8,
24
+ "num_stages": 5
25
+ }
26
+ }
@@ -0,0 +1,26 @@
1
+ {
2
+ "2048": {
3
+ "BLOCK_SIZE_M": 64,
4
+ "BLOCK_SIZE_N": 128,
5
+ "BLOCK_SIZE_K": 128,
6
+ "GROUP_SIZE_M": 1,
7
+ "num_warps": 4,
8
+ "num_stages": 3
9
+ },
10
+ "3072": {
11
+ "BLOCK_SIZE_M": 64,
12
+ "BLOCK_SIZE_N": 128,
13
+ "BLOCK_SIZE_K": 128,
14
+ "GROUP_SIZE_M": 1,
15
+ "num_warps": 4,
16
+ "num_stages": 3
17
+ },
18
+ "4096": {
19
+ "BLOCK_SIZE_M": 64,
20
+ "BLOCK_SIZE_N": 128,
21
+ "BLOCK_SIZE_K": 128,
22
+ "GROUP_SIZE_M": 1,
23
+ "num_warps": 4,
24
+ "num_stages": 3
25
+ }
26
+ }
@@ -0,0 +1,26 @@
1
+ {
2
+ "2048": {
3
+ "BLOCK_SIZE_M": 64,
4
+ "BLOCK_SIZE_N": 64,
5
+ "BLOCK_SIZE_K": 128,
6
+ "GROUP_SIZE_M": 64,
7
+ "num_warps": 4,
8
+ "num_stages": 2
9
+ },
10
+ "3072": {
11
+ "BLOCK_SIZE_M": 64,
12
+ "BLOCK_SIZE_N": 128,
13
+ "BLOCK_SIZE_K": 128,
14
+ "GROUP_SIZE_M": 1,
15
+ "num_warps": 4,
16
+ "num_stages": 3
17
+ },
18
+ "4096": {
19
+ "BLOCK_SIZE_M": 64,
20
+ "BLOCK_SIZE_N": 64,
21
+ "BLOCK_SIZE_K": 128,
22
+ "GROUP_SIZE_M": 64,
23
+ "num_warps": 4,
24
+ "num_stages": 3
25
+ }
26
+ }
@@ -0,0 +1,26 @@
1
+ {
2
+ "2048": {
3
+ "BLOCK_SIZE_M": 64,
4
+ "BLOCK_SIZE_N": 128,
5
+ "BLOCK_SIZE_K": 128,
6
+ "GROUP_SIZE_M": 16,
7
+ "num_warps": 4,
8
+ "num_stages": 3
9
+ },
10
+ "3072": {
11
+ "BLOCK_SIZE_M": 64,
12
+ "BLOCK_SIZE_N": 128,
13
+ "BLOCK_SIZE_K": 128,
14
+ "GROUP_SIZE_M": 1,
15
+ "num_warps": 4,
16
+ "num_stages": 3
17
+ },
18
+ "4096": {
19
+ "BLOCK_SIZE_M": 64,
20
+ "BLOCK_SIZE_N": 128,
21
+ "BLOCK_SIZE_K": 128,
22
+ "GROUP_SIZE_M": 1,
23
+ "num_warps": 4,
24
+ "num_stages": 2
25
+ }
26
+ }