sglang 0.2.12__py3-none-any.whl → 0.2.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. sglang/api.py +13 -1
  2. sglang/bench_latency.py +10 -5
  3. sglang/bench_serving.py +50 -26
  4. sglang/check_env.py +15 -0
  5. sglang/global_config.py +1 -1
  6. sglang/lang/backend/runtime_endpoint.py +60 -49
  7. sglang/lang/chat_template.py +10 -5
  8. sglang/lang/compiler.py +4 -0
  9. sglang/lang/interpreter.py +5 -2
  10. sglang/lang/ir.py +22 -4
  11. sglang/launch_server.py +8 -1
  12. sglang/srt/constrained/jump_forward.py +13 -2
  13. sglang/srt/conversation.py +50 -1
  14. sglang/srt/hf_transformers_utils.py +22 -23
  15. sglang/srt/layers/activation.py +24 -2
  16. sglang/srt/layers/decode_attention.py +338 -50
  17. sglang/srt/layers/extend_attention.py +3 -1
  18. sglang/srt/layers/fused_moe/__init__.py +1 -0
  19. sglang/srt/layers/{fused_moe.py → fused_moe/fused_moe.py} +165 -108
  20. sglang/srt/layers/fused_moe/layer.py +587 -0
  21. sglang/srt/layers/layernorm.py +3 -0
  22. sglang/srt/layers/logits_processor.py +64 -27
  23. sglang/srt/layers/radix_attention.py +41 -18
  24. sglang/srt/layers/sampler.py +154 -0
  25. sglang/srt/managers/controller_multi.py +2 -8
  26. sglang/srt/managers/controller_single.py +7 -10
  27. sglang/srt/managers/detokenizer_manager.py +20 -9
  28. sglang/srt/managers/io_struct.py +44 -11
  29. sglang/srt/managers/policy_scheduler.py +5 -2
  30. sglang/srt/managers/schedule_batch.py +59 -179
  31. sglang/srt/managers/tokenizer_manager.py +193 -84
  32. sglang/srt/managers/tp_worker.py +131 -50
  33. sglang/srt/mem_cache/memory_pool.py +82 -8
  34. sglang/srt/mm_utils.py +79 -7
  35. sglang/srt/model_executor/cuda_graph_runner.py +97 -28
  36. sglang/srt/model_executor/forward_batch_info.py +188 -82
  37. sglang/srt/model_executor/model_runner.py +269 -87
  38. sglang/srt/models/chatglm.py +6 -14
  39. sglang/srt/models/commandr.py +6 -2
  40. sglang/srt/models/dbrx.py +5 -1
  41. sglang/srt/models/deepseek.py +7 -3
  42. sglang/srt/models/deepseek_v2.py +12 -7
  43. sglang/srt/models/gemma.py +6 -2
  44. sglang/srt/models/gemma2.py +22 -8
  45. sglang/srt/models/gpt_bigcode.py +5 -1
  46. sglang/srt/models/grok.py +66 -398
  47. sglang/srt/models/internlm2.py +5 -1
  48. sglang/srt/models/llama2.py +7 -3
  49. sglang/srt/models/llama_classification.py +2 -2
  50. sglang/srt/models/llama_embedding.py +4 -0
  51. sglang/srt/models/llava.py +176 -59
  52. sglang/srt/models/minicpm.py +7 -3
  53. sglang/srt/models/mixtral.py +61 -255
  54. sglang/srt/models/mixtral_quant.py +6 -5
  55. sglang/srt/models/qwen.py +7 -4
  56. sglang/srt/models/qwen2.py +15 -5
  57. sglang/srt/models/qwen2_moe.py +7 -16
  58. sglang/srt/models/stablelm.py +6 -2
  59. sglang/srt/openai_api/adapter.py +149 -58
  60. sglang/srt/sampling/sampling_batch_info.py +209 -0
  61. sglang/srt/{sampling_params.py → sampling/sampling_params.py} +18 -4
  62. sglang/srt/server.py +107 -71
  63. sglang/srt/server_args.py +49 -15
  64. sglang/srt/utils.py +27 -18
  65. sglang/test/runners.py +38 -38
  66. sglang/test/simple_eval_common.py +9 -10
  67. sglang/test/simple_eval_gpqa.py +2 -1
  68. sglang/test/simple_eval_humaneval.py +2 -2
  69. sglang/test/simple_eval_math.py +2 -1
  70. sglang/test/simple_eval_mmlu.py +2 -1
  71. sglang/test/test_activation.py +55 -0
  72. sglang/test/test_programs.py +32 -5
  73. sglang/test/test_utils.py +37 -50
  74. sglang/version.py +1 -1
  75. {sglang-0.2.12.dist-info → sglang-0.2.14.dist-info}/METADATA +102 -27
  76. sglang-0.2.14.dist-info/RECORD +114 -0
  77. {sglang-0.2.12.dist-info → sglang-0.2.14.dist-info}/WHEEL +1 -1
  78. sglang/launch_server_llavavid.py +0 -29
  79. sglang/srt/model_loader/model_loader.py +0 -292
  80. sglang/srt/model_loader/utils.py +0 -275
  81. sglang-0.2.12.dist-info/RECORD +0 -112
  82. {sglang-0.2.12.dist-info → sglang-0.2.14.dist-info}/LICENSE +0 -0
  83. {sglang-0.2.12.dist-info → sglang-0.2.14.dist-info}/top_level.txt +0 -0
@@ -21,7 +21,7 @@ import dataclasses
21
21
  import logging
22
22
  import multiprocessing as mp
23
23
  import os
24
- from typing import Dict, List, Tuple, Union
24
+ from typing import Dict, List, Optional, Tuple, Union
25
25
 
26
26
  import numpy as np
27
27
  import transformers
@@ -46,9 +46,11 @@ from sglang.srt.managers.io_struct import (
46
46
  GenerateReqInput,
47
47
  TokenizedEmbeddingReqInput,
48
48
  TokenizedGenerateReqInput,
49
+ UpdateWeightReqInput,
50
+ UpdateWeightReqOutput,
49
51
  )
50
52
  from sglang.srt.mm_utils import expand2square, process_anyres_image
51
- from sglang.srt.sampling_params import SamplingParams
53
+ from sglang.srt.sampling.sampling_params import SamplingParams
52
54
  from sglang.srt.server_args import PortArgs, ServerArgs
53
55
  from sglang.srt.utils import is_generation_model, is_multimodal_model, load_image
54
56
  from sglang.utils import get_exception_traceback
@@ -60,12 +62,16 @@ logger = logging.getLogger(__name__)
60
62
 
61
63
  @dataclasses.dataclass
62
64
  class ReqState:
65
+ """Store the state a request."""
66
+
63
67
  out_list: List
64
68
  finished: bool
65
69
  event: asyncio.Event
66
70
 
67
71
 
68
72
  class TokenizerManager:
73
+ """TokenizerManager is a process that tokenizes the text."""
74
+
69
75
  def __init__(
70
76
  self,
71
77
  server_args: ServerArgs,
@@ -74,6 +80,7 @@ class TokenizerManager:
74
80
  ):
75
81
  self.server_args = server_args
76
82
 
83
+ # Init inter-process communication
77
84
  context = zmq.asyncio.Context(2)
78
85
  self.recv_from_detokenizer = context.socket(zmq.PULL)
79
86
  self.recv_from_detokenizer.bind(f"tcp://127.0.0.1:{port_args.tokenizer_port}")
@@ -81,6 +88,7 @@ class TokenizerManager:
81
88
  self.send_to_router = context.socket(zmq.PUSH)
82
89
  self.send_to_router.connect(f"tcp://127.0.0.1:{port_args.controller_port}")
83
90
 
91
+ # Read model args
84
92
  self.model_path = server_args.model_path
85
93
  self.served_model_name = server_args.served_model_name
86
94
  self.hf_config = get_config(
@@ -88,13 +96,17 @@ class TokenizerManager:
88
96
  trust_remote_code=server_args.trust_remote_code,
89
97
  model_overide_args=model_overide_args,
90
98
  )
91
- self.is_generation = is_generation_model(self.hf_config.architectures)
99
+
100
+ self.is_generation = is_generation_model(
101
+ self.hf_config.architectures, self.server_args.is_embedding
102
+ )
92
103
 
93
104
  if server_args.context_length is not None:
94
105
  self.context_len = server_args.context_length
95
106
  else:
96
107
  self.context_len = get_context_length(self.hf_config)
97
108
 
109
+ # Create tokenizer
98
110
  if server_args.skip_tokenizer_init:
99
111
  self.tokenizer = self.processor = None
100
112
  else:
@@ -118,27 +130,13 @@ class TokenizerManager:
118
130
  trust_remote_code=server_args.trust_remote_code,
119
131
  )
120
132
 
133
+ # Store states
121
134
  self.to_create_loop = True
122
135
  self.rid_to_state: Dict[str, ReqState] = {}
123
136
 
124
- async def get_pixel_values(self, image_data):
125
- aspect_ratio = getattr(self.hf_config, "image_aspect_ratio", None)
126
- grid_pinpoints = (
127
- self.hf_config.image_grid_pinpoints if aspect_ratio == "anyres" else None
128
- )
129
- if self.executor is not None:
130
- loop = asyncio.get_event_loop()
131
- return await loop.run_in_executor(
132
- self.executor,
133
- get_pixel_values,
134
- image_data,
135
- aspect_ratio,
136
- grid_pinpoints,
137
- )
138
- else:
139
- return get_pixel_values(
140
- image_data, aspect_ratio, grid_pinpoints, self.processor
141
- )
137
+ # for update model weights
138
+ self.model_update_lock = asyncio.Lock()
139
+ self.model_update_result = None
142
140
 
143
141
  async def generate_request(
144
142
  self, obj: Union[GenerateReqInput, EmbeddingReqInput], request=None
@@ -146,6 +144,9 @@ class TokenizerManager:
146
144
  if self.to_create_loop:
147
145
  self.create_handle_loop()
148
146
 
147
+ while self.model_update_lock.locked():
148
+ await asyncio.sleep(0.001)
149
+
149
150
  obj.post_init()
150
151
  is_single = obj.is_single
151
152
 
@@ -153,9 +154,6 @@ class TokenizerManager:
153
154
  async for response in self._handle_single_request(obj, request):
154
155
  yield response
155
156
  else:
156
- if hasattr(obj, "stream") and obj.stream:
157
- raise ValueError("Do not support stream for batch mode.")
158
-
159
157
  async for response in self._handle_batch_request(obj, request):
160
158
  yield response
161
159
 
@@ -163,8 +161,8 @@ class TokenizerManager:
163
161
  self,
164
162
  obj: Union[GenerateReqInput, EmbeddingReqInput],
165
163
  request,
166
- index=None,
167
- is_cache_for_prefill=False,
164
+ index: Optional[int] = None,
165
+ is_cache_for_prefill: Optional[bool] = False,
168
166
  ):
169
167
  if not is_cache_for_prefill: # The normal case with a single prompt
170
168
  not_use_index = index is None
@@ -185,7 +183,7 @@ class TokenizerManager:
185
183
 
186
184
  if self.is_generation:
187
185
  pixel_values, image_hash, image_size = await self._get_pixel_values(
188
- obj.image_data if not_use_index else obj.image_data[index]
186
+ obj.image_data
189
187
  )
190
188
  return_logprob = (
191
189
  obj.return_logprob if not_use_index else obj.return_logprob[index]
@@ -195,6 +193,9 @@ class TokenizerManager:
195
193
  if not_use_index
196
194
  else obj.logprob_start_len[index]
197
195
  )
196
+ if return_logprob and logprob_start_len == -1:
197
+ logprob_start_len = len(input_ids) - 1
198
+
198
199
  top_logprobs_num = (
199
200
  obj.top_logprobs_num
200
201
  if not_use_index
@@ -245,6 +246,8 @@ class TokenizerManager:
245
246
  top_logprobs_num = obj.top_logprobs_num[0]
246
247
 
247
248
  if self.is_generation:
249
+ if return_logprob and logprob_start_len == -1:
250
+ logprob_start_len = len(input_ids) - 1
248
251
  tokenized_obj = TokenizedGenerateReqInput(
249
252
  rid,
250
253
  input_text,
@@ -289,7 +292,7 @@ class TokenizerManager:
289
292
  parallel_sample_num = obj.parallel_sample_num
290
293
 
291
294
  if parallel_sample_num != 1:
292
- # Send prefill requests to cache the common input
295
+ # Send prefill requests to cache the common prefix
293
296
  parallel_sample_num += 1
294
297
  input_id_result = [] if obj.input_ids is None else None
295
298
  for i in range(batch_size):
@@ -306,6 +309,7 @@ class TokenizerManager:
306
309
  parallel_sample_num = 1
307
310
 
308
311
  # First send out all requests
312
+ generators = []
309
313
  for i in range(batch_size):
310
314
  for j in range(parallel_sample_num):
311
315
  if j == 0 and parallel_sample_num != 1:
@@ -334,6 +338,8 @@ class TokenizerManager:
334
338
  sampling_params = self._get_sampling_params(obj.sampling_params[index])
335
339
 
336
340
  if self.is_generation:
341
+ if obj.return_logprob[index] and obj.logprob_start_len[index] == -1:
342
+ obj.logprob_start_len[index] = len(input_ids) - 1
337
343
  pixel_values, image_hash, image_size = await self._get_pixel_values(
338
344
  obj.image_data[index]
339
345
  )
@@ -364,42 +370,47 @@ class TokenizerManager:
364
370
  state = ReqState([], False, event)
365
371
  self.rid_to_state[rid] = state
366
372
 
367
- # Then wait for all responses
368
- output_list = []
369
- for i in range(batch_size):
370
- for j in range(parallel_sample_num):
371
- if j == 0 and parallel_sample_num != 1:
372
- continue
373
- index = i * parallel_sample_num + j
374
- if parallel_sample_num != 1:
375
- index += batch_size - 1 - i
376
- rid = obj.rid[index]
377
- state = self.rid_to_state[rid]
378
-
379
- while True:
380
- try:
381
- await asyncio.wait_for(state.event.wait(), timeout=4)
382
- break
383
- except asyncio.TimeoutError:
384
- if request is not None and await request.is_disconnected():
385
- for rid in obj.rid:
386
- self.abort_request(rid)
387
- raise ValueError(f"Abort request {rid}")
388
- continue
389
- if self.is_generation:
390
- output_list.append(
391
- self.convert_logprob_style(
392
- state.out_list[-1],
393
- obj.return_logprob[index],
394
- obj.top_logprobs_num[index],
395
- obj.return_text_in_logprobs,
396
- )
373
+ generators.append(
374
+ self._wait_for_response(
375
+ event,
376
+ state,
377
+ obj,
378
+ rid,
379
+ request,
380
+ index=index,
381
+ response_index=len(generators),
397
382
  )
398
- else:
399
- output_list.append(state.out_list[-1])
400
- assert state.finished
401
- del self.rid_to_state[rid]
402
- yield output_list
383
+ )
384
+
385
+ # Then process the responses based on streaming option
386
+ is_stream = hasattr(obj, "stream") and obj.stream
387
+
388
+ tasks = [asyncio.create_task(gen.__anext__()) for gen in generators]
389
+ output_list = [None] * len(tasks)
390
+
391
+ while tasks:
392
+ done, _ = await asyncio.wait(tasks, return_when=asyncio.FIRST_COMPLETED)
393
+
394
+ for task in done:
395
+ cur_index = tasks.index(task)
396
+
397
+ try:
398
+ result = task.result()
399
+
400
+ if is_stream:
401
+ yield result
402
+ else:
403
+ output_list[result["index"]] = result
404
+
405
+ tasks[cur_index] = asyncio.create_task(
406
+ generators[cur_index].__anext__()
407
+ )
408
+ except StopAsyncIteration:
409
+ del generators[cur_index]
410
+ del tasks[cur_index]
411
+
412
+ if not is_stream:
413
+ yield output_list
403
414
 
404
415
  def _validate_input_length(self, input_ids: List[int]):
405
416
  if len(input_ids) >= self.context_len:
@@ -416,12 +427,10 @@ class TokenizerManager:
416
427
  return sampling_params
417
428
 
418
429
  async def _get_pixel_values(self, image_data):
419
- if isinstance(image_data, list) and len(image_data) > 0:
420
- return await self.get_pixel_values(image_data[0])
421
- elif isinstance(image_data, str):
422
- return await self.get_pixel_values(image_data)
423
- else:
430
+ if image_data is None:
424
431
  return None, None, None
432
+ else:
433
+ return await self._get_pixel_values_internal(image_data)
425
434
 
426
435
  async def _wait_for_response(
427
436
  self,
@@ -430,33 +439,38 @@ class TokenizerManager:
430
439
  obj: Union[GenerateReqInput, EmbeddingReqInput],
431
440
  rid: str,
432
441
  request,
442
+ index: int = None,
443
+ response_index: int = 0,
433
444
  ):
434
445
  while True:
435
446
  try:
436
447
  await asyncio.wait_for(event.wait(), timeout=4)
437
448
  except asyncio.TimeoutError:
438
449
  if request is not None and await request.is_disconnected():
439
- self.abort_request(rid)
450
+ for rid in [obj.rid] if obj.is_single else obj.rid:
451
+ self.abort_request(rid)
440
452
  raise ValueError(f"Abort request {rid}")
441
453
  continue
442
454
 
443
455
  if self.is_generation:
444
456
  out = self.convert_logprob_style(
445
457
  state.out_list[-1],
446
- obj.return_logprob,
447
- obj.top_logprobs_num,
458
+ obj.return_logprob if index is None else obj.return_logprob[index],
459
+ (
460
+ obj.top_logprobs_num
461
+ if index is None
462
+ else obj.top_logprobs_num[index]
463
+ ),
448
464
  obj.return_text_in_logprobs,
449
465
  )
450
466
  else: # isinstance(obj, EmbeddingReqInput)
451
467
  out = state.out_list[-1]
452
468
 
469
+ out["index"] = response_index
470
+
453
471
  # Log requests
454
472
  if self.server_args.log_requests and state.finished:
455
- if obj.text is None:
456
- in_obj = {"input_ids": obj.input_ids}
457
- else:
458
- in_obj = {"text": obj.text}
459
- logger.info(f"in={in_obj}, out={out}")
473
+ logger.info(f"in={obj}, out={out}")
460
474
 
461
475
  state.out_list = []
462
476
  if state.finished:
@@ -500,6 +514,30 @@ class TokenizerManager:
500
514
  req = AbortReq(rid)
501
515
  self.send_to_router.send_pyobj(req)
502
516
 
517
+ async def update_weights(self, obj: UpdateWeightReqInput, request):
518
+ if self.to_create_loop:
519
+ self.create_handle_loop()
520
+
521
+ # default the load format to the server_args
522
+ if obj.load_format is None:
523
+ obj.load_format = self.server_args.load_format
524
+
525
+ if not self.model_update_lock.locked():
526
+ async with self.model_update_lock:
527
+ # wait for the previous generation requests to finish
528
+ while len(self.rid_to_state) > 0:
529
+ await asyncio.sleep(0)
530
+ self.send_to_router.send_pyobj(obj)
531
+ self.model_update_result = asyncio.Future()
532
+ result = await self.model_update_result
533
+ if result.success:
534
+ self.server_args.model_path = obj.model_path
535
+ self.server_args.load_format = obj.load_format
536
+ self.model_path = obj.model_path
537
+ return result.success, result.message
538
+ else:
539
+ return False, "Another update is in progress. Please try again later."
540
+
503
541
  def create_abort_task(self, obj: GenerateReqInput):
504
542
  # Abort the request if the client is disconnected.
505
543
  async def abort_request():
@@ -507,7 +545,7 @@ class TokenizerManager:
507
545
  if obj.is_single:
508
546
  self.abort_request(obj.rid)
509
547
  else:
510
- for rid in obj.rids:
548
+ for rid in obj.rid:
511
549
  self.abort_request(rid)
512
550
 
513
551
  background_tasks = BackgroundTasks()
@@ -515,18 +553,29 @@ class TokenizerManager:
515
553
  return background_tasks
516
554
 
517
555
  def create_handle_loop(self):
556
+ if not self.to_create_loop:
557
+ return
558
+
518
559
  self.to_create_loop = False
519
560
  loop = asyncio.get_event_loop()
520
561
  loop.create_task(self.handle_loop())
521
562
 
522
563
  async def handle_loop(self):
564
+ """The event loop that handles requests"""
565
+
523
566
  while True:
524
- recv_obj: Union[BatchStrOut, BatchEmbeddingOut, BatchTokenIDOut] = (
525
- await self.recv_from_detokenizer.recv_pyobj()
526
- )
567
+ recv_obj: Union[
568
+ BatchStrOut, BatchEmbeddingOut, BatchTokenIDOut, UpdateWeightReqOutput
569
+ ] = await self.recv_from_detokenizer.recv_pyobj()
570
+
571
+ if isinstance(recv_obj, UpdateWeightReqOutput):
572
+ self.model_update_result.set_result(recv_obj)
573
+ continue
574
+
527
575
  assert isinstance(
528
576
  recv_obj, (BatchStrOut, BatchEmbeddingOut, BatchTokenIDOut)
529
577
  ), f"Unexpected obj received: {type(recv_obj)}"
578
+
530
579
  for i, rid in enumerate(recv_obj.rids):
531
580
  state = self.rid_to_state.get(rid, None)
532
581
  if state is None:
@@ -610,11 +659,69 @@ class TokenizerManager:
610
659
  )
611
660
  return top_logprobs
612
661
 
662
+ async def _get_pixel_values_internal(self, image_data, aspect_ratio=None):
663
+ aspect_ratio = (
664
+ getattr(self.hf_config, "image_aspect_ratio", None)
665
+ if aspect_ratio is None
666
+ else aspect_ratio
667
+ )
668
+ grid_pinpoints = (
669
+ self.hf_config.image_grid_pinpoints
670
+ if hasattr(self.hf_config, "image_grid_pinpoints")
671
+ and "anyres" in aspect_ratio
672
+ else None
673
+ )
674
+
675
+ if isinstance(image_data, list) and len(image_data) > 0:
676
+ pixel_values, image_hash, image_size = [], [], []
677
+ if len(image_data) > 1:
678
+ aspect_ratio = "pad" # LLaVA OneVision Handling: more than one image --> interleaved image mode or video mode. We do not use anyres
679
+ for img_data in image_data:
680
+ pixel_v, image_h, image_s = await self._process_single_image(
681
+ img_data, aspect_ratio, grid_pinpoints
682
+ )
683
+ pixel_values.append(pixel_v)
684
+ image_hash.append(image_h)
685
+ image_size.append(image_s)
686
+ pixel_values = np.stack(pixel_values, axis=0)
687
+ else:
688
+ pixel_values, image_hash, image_size = await self._process_single_image(
689
+ image_data[0], aspect_ratio, grid_pinpoints
690
+ )
691
+ image_hash = [image_hash]
692
+ image_size = [image_size]
693
+ elif isinstance(image_data, str):
694
+ pixel_values, image_hash, image_size = await self._process_single_image(
695
+ image_data, aspect_ratio, grid_pinpoints
696
+ )
697
+ image_hash = [image_hash]
698
+ image_size = [image_size]
699
+ else:
700
+ pixel_values, image_hash, image_size = None, None, None
701
+
702
+ return pixel_values, image_hash, image_size
703
+
704
+ async def _process_single_image(self, image_data, aspect_ratio, grid_pinpoints):
705
+ if self.executor is not None:
706
+ loop = asyncio.get_event_loop()
707
+ return await loop.run_in_executor(
708
+ self.executor,
709
+ _process_single_image_task,
710
+ image_data,
711
+ aspect_ratio,
712
+ grid_pinpoints,
713
+ )
714
+ else:
715
+ return _process_single_image_task(
716
+ image_data, aspect_ratio, grid_pinpoints, self.processor
717
+ )
718
+
613
719
 
614
720
  global global_processor
615
721
 
616
722
 
617
723
  def init_global_processor(server_args: ServerArgs):
724
+ """Init the global processor for multi modal models."""
618
725
  global global_processor
619
726
  transformers.logging.set_verbosity_error()
620
727
  global_processor = get_processor(
@@ -624,7 +731,7 @@ def init_global_processor(server_args: ServerArgs):
624
731
  )
625
732
 
626
733
 
627
- def get_pixel_values(
734
+ def _process_single_image_task(
628
735
  image_data, image_aspect_ratio=None, image_grid_pinpoints=None, processor=None
629
736
  ):
630
737
  try:
@@ -644,8 +751,10 @@ def get_pixel_values(
644
751
  image,
645
752
  tuple(int(x * 255) for x in processor.image_processor.image_mean),
646
753
  )
647
- pixel_values = processor.image_processor(image)["pixel_values"][0]
648
- elif image_aspect_ratio == "anyres":
754
+ pixel_values = processor.image_processor(image.convert("RGB"))[
755
+ "pixel_values"
756
+ ][0]
757
+ elif image_aspect_ratio == "anyres" or "anyres_max" in image_aspect_ratio:
649
758
  pixel_values = process_anyres_image(
650
759
  image, processor.image_processor, image_grid_pinpoints
651
760
  )
@@ -654,4 +763,4 @@ def get_pixel_values(
654
763
  pixel_values = pixel_values.astype(np.float16)
655
764
  return pixel_values, image_hash, image.size
656
765
  except Exception:
657
- print("Exception in TokenizerManager:\n" + get_exception_traceback())
766
+ logger.error("Exception in TokenizerManager:\n" + get_exception_traceback())