liger-kernel-nightly 0.5.5.dev20250331042257__py3-none-any.whl → 0.5.5.dev20250331170510__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of liger-kernel-nightly might be problematic. Click here for more details.

@@ -8,7 +8,6 @@ import torch
8
8
  from transformers.models.llava.modeling_llava import _CONFIG_FOR_DOC
9
9
  from transformers.models.llava.modeling_llava import LLAVA_INPUTS_DOCSTRING
10
10
  from transformers.models.llava.modeling_llava import LlavaCausalLMOutputWithPast
11
- from transformers.models.llava.modeling_llava import logger
12
11
  from transformers.utils import add_start_docstrings_to_model_forward
13
12
  from transformers.utils import is_torchdynamo_compiling
14
13
  from transformers.utils import replace_return_docstrings
@@ -34,8 +33,6 @@ def lce_forward_deprecated(
34
33
  output_attentions: Optional[bool] = None,
35
34
  output_hidden_states: Optional[bool] = None,
36
35
  return_dict: Optional[bool] = None,
37
- cache_position: Optional[torch.LongTensor] = None,
38
- num_logits_to_keep: int = 0,
39
36
  ) -> Union[Tuple, LlavaCausalLMOutputWithPast]:
40
37
  r"""
41
38
  Args:
@@ -96,39 +93,32 @@ def lce_forward_deprecated(
96
93
  "You cannot specify both pixel_values and inputs_embeds at the same time, and must specify either one"
97
94
  )
98
95
 
99
- legacy_processing = False
100
96
  if inputs_embeds is None:
97
+ # 1. Extra the input embeddings
101
98
  inputs_embeds = self.get_input_embeddings()(input_ids)
102
99
 
103
- # if the number of image tokens is more than image embeddings seq length, then prob we expanded it in processing
104
- # not very reliable, but we don't expect one to actually pass 500+ images for one prompt
105
- # In case we're in decoding stage, legacy behavior is checked by presence of pixel values even if use_cache=True
106
- legacy_processing = (
107
- (input_ids == self.config.image_token_index).sum(1).max() < self.config.image_seq_length
108
- ) or (input_ids.shape[-1] == 1 and pixel_values is not None)
109
-
110
- image_features = None
111
- if pixel_values is not None:
112
- image_features = self.get_image_features(
113
- pixel_values=pixel_values,
114
- vision_feature_layer=vision_feature_layer,
115
- vision_feature_select_strategy=vision_feature_select_strategy,
116
- )
117
-
118
- if legacy_processing and image_features is not None:
119
- logger.warning_once(
120
- "Expanding inputs for image tokens in LLaVa should be done in processing. "
121
- "Please add `patch_size` and `vision_feature_select_strategy` to the model's processing config or set directly "
122
- "with `processor.patch_size = {{patch_size}}` and processor.vision_feature_select_strategy = {{vision_feature_select_strategy}}`. "
123
- "Using processors without these attributes in the config is deprecated and will throw an error in v4.50."
124
- )
125
- # prefill stage vs decoding stage (legacy behavior copied)
126
- if input_ids.shape[1] != 1:
100
+ # 2. Merge text and images
101
+ if pixel_values is not None and input_ids.shape[1] != 1:
102
+ image_outputs = self.vision_tower(pixel_values, output_hidden_states=True)
103
+ # this is not memory efficient at all (output_hidden_states=True) will save all the hidden stated.
104
+ selected_image_feature = image_outputs.hidden_states[vision_feature_layer]
105
+
106
+ if vision_feature_select_strategy == "default":
107
+ selected_image_feature = selected_image_feature[:, 1:]
108
+ elif vision_feature_select_strategy == "full":
109
+ selected_image_feature = selected_image_feature
110
+ else:
111
+ raise ValueError(f"Unexpected select feature strategy: {self.config.vision_feature_select_strategy}")
112
+
113
+ image_features = self.multi_modal_projector(selected_image_feature)
114
+ inputs_embeds = inputs_embeds.to(image_features.dtype)
127
115
  inputs_embeds, attention_mask, labels, position_ids = self._merge_input_ids_with_image_features(
128
116
  image_features, inputs_embeds, input_ids, attention_mask, labels
129
117
  )
130
- cache_position = torch.arange(attention_mask.shape[1], device=attention_mask.device)
131
- else:
118
+
119
+ # In case input_ids.shape[1] == 1 & pixel_values==None & past_key_values != None, we are in the case of
120
+ # generation with cache
121
+ elif past_key_values is not None and pixel_values is not None and input_ids.shape[1] == 1:
132
122
  # Retrieve the first layer to inspect the logits and mask out the hidden states
133
123
  # that are set to 0
134
124
  first_layer_past_key_value = past_key_values[0][0][:, :, :, 0]
@@ -158,7 +148,6 @@ def lce_forward_deprecated(
158
148
 
159
149
  attention_mask = torch.cat((extended_attention_mask, attention_mask[:, -target_length:]), dim=1)
160
150
  position_ids = torch.sum(attention_mask, dim=1).unsqueeze(-1) - 1
161
- cache_position = torch.arange(attention_mask.shape[1], device=attention_mask.device)[-target_length:]
162
151
 
163
152
  # TODO: @raushan retain only the new behavior after v4.47
164
153
  elif image_features is not None:
@@ -184,8 +173,6 @@ def lce_forward_deprecated(
184
173
  output_attentions=output_attentions,
185
174
  output_hidden_states=output_hidden_states,
186
175
  return_dict=return_dict,
187
- cache_position=cache_position,
188
- num_logits_to_keep=num_logits_to_keep,
189
176
  )
190
177
  hidden_states = outputs[0]
191
178
 
@@ -220,7 +207,6 @@ def lce_forward_deprecated(
220
207
  past_key_values=outputs.past_key_values,
221
208
  hidden_states=outputs.hidden_states,
222
209
  attentions=outputs.attentions,
223
- image_hidden_states=image_features if pixel_values is not None else None,
224
210
  )
225
211
 
226
212
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: liger_kernel_nightly
3
- Version: 0.5.5.dev20250331042257
3
+ Version: 0.5.5.dev20250331170510
4
4
  Summary: Efficient Triton kernels for LLM Training
5
5
  License: BSD 2-CLAUSE LICENSE
6
6
  Copyright 2024 LinkedIn Corporation
@@ -57,7 +57,7 @@ liger_kernel/transformers/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm
57
57
  liger_kernel/transformers/model/gemma.py,sha256=7cBTljzh-8_ACBhYl6NUfj5_ux92YRlmnAU5gfDAQAI,9312
58
58
  liger_kernel/transformers/model/gemma2.py,sha256=X0FOIhvFlTrmWI7Ws06wUkutgHW3lWtLOnnHp1NgZ3A,10403
59
59
  liger_kernel/transformers/model/llama.py,sha256=d9rBaK8e8RSMCFHdgom9ZHuXOlnh6U_o-GkAFGRNGOY,9989
60
- liger_kernel/transformers/model/llava.py,sha256=pGjos4_MpJcMP-3vLGDWhJeWbBtuqUIMM1JUC36DS08,18467
60
+ liger_kernel/transformers/model/llava.py,sha256=b0pEagjUbu2-eS9xegjyfl1DwIXLwZcNpff55ibaMbA,17601
61
61
  liger_kernel/transformers/model/loss_utils.py,sha256=Z-fUrf-cUDUjUIH7Tl9OL2hT8nmtx7ES3kg8syuWKy4,1476
62
62
  liger_kernel/transformers/model/mistral.py,sha256=o7tyl1sPWPfZwwrBLRlryHlSI8I55viuJoMI5Bh5Nww,5014
63
63
  liger_kernel/transformers/model/mixtral.py,sha256=T0ITv2-PkR8VErVOVUizoS4EzjmARyR7GFh0tXDB_i4,11089
@@ -72,9 +72,9 @@ liger_kernel/transformers/trainer/__init__.py,sha256=p7yQfklV8-467qSz_ZMimkbDF7H
72
72
  liger_kernel/transformers/trainer/orpo_trainer.py,sha256=pdekW7l6Qg_aqa5SYKYlSWUF8m3lkOFvFLcIMEHrz9s,8338
73
73
  liger_kernel/triton/__init__.py,sha256=qCiCamzCRv6lpV8IqpAc9YMdNKC7GKurClWceQPnlis,92
74
74
  liger_kernel/triton/monkey_patch.py,sha256=Rd0hUHAzDkFfHvnX7-PBaNK5EKnZhtfM_h-fgQH9HPY,1568
75
- liger_kernel_nightly-0.5.5.dev20250331042257.dist-info/LICENSE,sha256=OhzLDHJ0to4a8sodVLELZiCFylZ1NAAYLs-HrjPy0ag,1312
76
- liger_kernel_nightly-0.5.5.dev20250331042257.dist-info/METADATA,sha256=0ENI79A41vXw0onIGtzINqohZzFkj9MqLbWEvDhmTNU,22959
77
- liger_kernel_nightly-0.5.5.dev20250331042257.dist-info/NOTICE,sha256=njwnoPZLh9AN8SJQzxvCGLHi-8X__AvWRze6joNXIY8,2066
78
- liger_kernel_nightly-0.5.5.dev20250331042257.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
79
- liger_kernel_nightly-0.5.5.dev20250331042257.dist-info/top_level.txt,sha256=2eghu4hA3LnkM7ElW92tQ8zegWKgSbeo-k-aGe1YnvY,13
80
- liger_kernel_nightly-0.5.5.dev20250331042257.dist-info/RECORD,,
75
+ liger_kernel_nightly-0.5.5.dev20250331170510.dist-info/LICENSE,sha256=OhzLDHJ0to4a8sodVLELZiCFylZ1NAAYLs-HrjPy0ag,1312
76
+ liger_kernel_nightly-0.5.5.dev20250331170510.dist-info/METADATA,sha256=KEjXLNI8PYfmvipid4KUVeM0XE5oKXd5Pl7ikrZbAqU,22959
77
+ liger_kernel_nightly-0.5.5.dev20250331170510.dist-info/NOTICE,sha256=njwnoPZLh9AN8SJQzxvCGLHi-8X__AvWRze6joNXIY8,2066
78
+ liger_kernel_nightly-0.5.5.dev20250331170510.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
79
+ liger_kernel_nightly-0.5.5.dev20250331170510.dist-info/top_level.txt,sha256=2eghu4hA3LnkM7ElW92tQ8zegWKgSbeo-k-aGe1YnvY,13
80
+ liger_kernel_nightly-0.5.5.dev20250331170510.dist-info/RECORD,,