liger-kernel-nightly 0.5.3.dev20250221230243__py3-none-any.whl → 0.5.3.dev20250224175624__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of liger-kernel-nightly might be problematic. Click here for more details.

@@ -57,13 +57,14 @@ def _layer_norm_forward_kernel(
57
57
  B_row = tl.load(B_ptr + col_offsets, mask=mask, other=0)
58
58
 
59
59
  mean = tl.sum(X_row, axis=0) / n_cols
60
- var = tl.sum((X_row - mean) * (X_row - mean), axis=0) / n_cols
60
+ Xmm = tl.where(mask, X_row - mean, 0)
61
+ var = tl.sum(Xmm * Xmm, axis=0) / n_cols
61
62
  rstd = rsqrt(var + eps)
62
63
 
63
64
  tl.store(Mean_ptr, mean)
64
65
  tl.store(RSTD_ptr, rstd)
65
66
 
66
- Y_row = (X_row - mean) * rstd * W_row + B_row
67
+ Y_row = Xmm * rstd * W_row + B_row
67
68
 
68
69
  tl.store(Y_ptr + col_offsets, Y_row, mask=mask)
69
70
 
@@ -147,9 +148,11 @@ def layer_norm_forward(X, W, B, eps):
147
148
  Y = torch.empty((n_rows, n_cols), dtype=X.dtype, device=X.device)
148
149
  Mean = torch.empty(n_rows, dtype=X.dtype, device=X.device)
149
150
  RSTD = torch.empty(n_rows, dtype=X.dtype, device=X.device)
150
- assert X.shape[1] == W.shape[0], (
151
- f"Incompatible hidden size dimension between input tensor with shape[1] = {X.shape[1]} and weight tensor with shape[0] = {W.shape[0]}"
152
- )
151
+ if X.shape[1] != W.shape[0]:
152
+ raise ValueError(
153
+ f"Incompatible dimensions: input feature size (X.shape[1]={X.shape[1]}) "
154
+ f"must match weight size (W.shape[0]={W.shape[0]})"
155
+ )
153
156
 
154
157
  _layer_norm_forward_kernel[(n_rows,)](
155
158
  Y,
@@ -190,11 +193,21 @@ def layer_norm_backward(dY, X, W, B, Mean, RSTD):
190
193
 
191
194
  BLOCK_SIZE, num_warps = calculate_settings(n_cols)
192
195
  if n_cols > BLOCK_SIZE:
193
- raise RuntimeError("This layer norm doesn't support feature dim >= 64KB.")
196
+ raise RuntimeError(
197
+ f"Feature dimension {n_cols} exceeds maximum supported size of {BLOCK_SIZE}. Consider using a smaller feature dimension."
198
+ )
194
199
 
195
200
  rows_per_program = math.ceil(n_rows / sm_count)
196
201
  grid = (sm_count,)
197
- triton_dtype = tl.float32 if X.dtype == torch.float32 else tl.bfloat16
202
+ triton_dtype = (
203
+ tl.float32
204
+ if X.dtype == torch.float32
205
+ else tl.bfloat16
206
+ if X.dtype == torch.bfloat16
207
+ else tl.float16
208
+ if X.dtype == torch.float16
209
+ else tl.float32 # fallback to float32 for other types
210
+ )
198
211
  _layer_norm_backward_kernel[grid](
199
212
  X,
200
213
  W,
liger_kernel/utils.py CHANGED
@@ -13,3 +13,50 @@ def infer_device():
13
13
  return "hip"
14
14
  else:
15
15
  return "cpu"
16
+
17
+
18
+ def transformers_version_dispatch(
19
+ required_version: str,
20
+ before_fn,
21
+ after_fn,
22
+ before_args: tuple = (),
23
+ after_args: tuple = (),
24
+ before_kwargs: dict = None,
25
+ after_kwargs: dict = None,
26
+ ):
27
+ """
28
+ Dispatches to different functions based on package version comparison.
29
+
30
+ Args:
31
+ required_version: Version to compare against (e.g. "4.48.0")
32
+ before_fn: Function to call if package_version < required_version
33
+ after_fn: Function to call if package_version >= required_version
34
+ before_args: Positional arguments for before_fn
35
+ after_args: Positional arguments for after_fn
36
+ before_kwargs: Keyword arguments for before_fn
37
+ after_kwargs: Keyword arguments for after_fn
38
+
39
+ Returns:
40
+ Result from either before_fn or after_fn
41
+
42
+ Example:
43
+ >>> rotary_emb = transformers_version_dispatch(
44
+ ... "4.48.0",
45
+ ... LlamaRotaryEmbedding,
46
+ ... LlamaRotaryEmbedding,
47
+ ... before_args=(head_dim,),
48
+ ... after_args=(LlamaConfig(head_dim=head_dim),),
49
+ ... before_kwargs={'device': device},
50
+ ... after_kwargs={'device': device}
51
+ ... )
52
+ """
53
+ from packaging import version
54
+ from transformers import __version__ as transformers_version
55
+
56
+ before_kwargs = before_kwargs or {}
57
+ after_kwargs = after_kwargs or {}
58
+
59
+ if version.parse(transformers_version) < version.parse(required_version):
60
+ return before_fn(*before_args, **before_kwargs)
61
+ else:
62
+ return after_fn(*after_args, **after_kwargs)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: liger_kernel_nightly
3
- Version: 0.5.3.dev20250221230243
3
+ Version: 0.5.3.dev20250224175624
4
4
  Summary: Efficient Triton kernels for LLM Training
5
5
  License: BSD 2-CLAUSE LICENSE
6
6
  Copyright 2024 LinkedIn Corporation
@@ -1,6 +1,6 @@
1
1
  liger_kernel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  liger_kernel/env_report.py,sha256=uhdEC8OydxoZlb7B6YYcAaBF3crGFdIck-4cxaW4NJY,1728
3
- liger_kernel/utils.py,sha256=Wh9TkveQY4snwiyKWAvWXUpVQKX1ARX2tL0T6qzEoIQ,305
3
+ liger_kernel/utils.py,sha256=FtVUkCGBT1UNasTl6HMNycWwiwHayK6tx-ZDdA-sNX4,1884
4
4
  liger_kernel/chunked_loss/README.md,sha256=0FmkFC3hKBqyoDT5uTlIYmrvRkF-EOCR1y-EBU1LpWU,2248
5
5
  liger_kernel/chunked_loss/__init__.py,sha256=ATu-xX5Fc49Cr6yBOGBRNTo593ZrU5ZCsIuvoIbJWw4,603
6
6
  liger_kernel/chunked_loss/cpo_loss.py,sha256=OdBR8WYdHTKpLI_c9DcuwqKSWPeAAeTyREz46Vu_cAY,3682
@@ -23,7 +23,7 @@ liger_kernel/ops/geglu.py,sha256=axGvCIvlBzuluoAIrWTsp2iZM4BFKNInkPov8YVvH9E,412
23
23
  liger_kernel/ops/group_norm.py,sha256=qD4D4lSjSgVtO52EBNLC2iTseALRgPgqXE50U2woggk,10837
24
24
  liger_kernel/ops/jsd.py,sha256=0jNeRxpcNI5ckxCdoCNyO5GEedLIuzx3lz6KAiksc4o,6109
25
25
  liger_kernel/ops/kl_div.py,sha256=MnfuYqqQESON1X2Swy064x1urKtMFdgeSWd60VttBXI,8420
26
- liger_kernel/ops/layer_norm.py,sha256=o5X_N0XNX0t-1AV3dyv43G0KJSyclUxcpNXzHNh35ks,7640
26
+ liger_kernel/ops/layer_norm.py,sha256=6roQjioyg-9O2qLPV8nL4U0-5UH80tdzOMTWwjvDnn8,7961
27
27
  liger_kernel/ops/qwen2vl_mrope.py,sha256=3GExhYpLgB4VUtyZyjRk8XjEur3W4EWF6HQ67ML5vBU,8481
28
28
  liger_kernel/ops/rms_norm.py,sha256=PWLJcdIKU5e-8BuYFHd9Cqlq6wmr6fUXKi9zQD4LetU,11727
29
29
  liger_kernel/ops/rope.py,sha256=ofmBOkUpZZO-Q8Z5B_LOFYYLD-YT-8WnJ4vGOrDYouI,8943
@@ -65,9 +65,9 @@ liger_kernel/transformers/trainer/__init__.py,sha256=p7yQfklV8-467qSz_ZMimkbDF7H
65
65
  liger_kernel/transformers/trainer/orpo_trainer.py,sha256=pdekW7l6Qg_aqa5SYKYlSWUF8m3lkOFvFLcIMEHrz9s,8338
66
66
  liger_kernel/triton/__init__.py,sha256=qCiCamzCRv6lpV8IqpAc9YMdNKC7GKurClWceQPnlis,92
67
67
  liger_kernel/triton/monkey_patch.py,sha256=Rd0hUHAzDkFfHvnX7-PBaNK5EKnZhtfM_h-fgQH9HPY,1568
68
- liger_kernel_nightly-0.5.3.dev20250221230243.dist-info/LICENSE,sha256=OhzLDHJ0to4a8sodVLELZiCFylZ1NAAYLs-HrjPy0ag,1312
69
- liger_kernel_nightly-0.5.3.dev20250221230243.dist-info/METADATA,sha256=WdkWsUQstDqFp6VlaycZn_D5hm4tuHc_4NA6cAo8Gl4,22093
70
- liger_kernel_nightly-0.5.3.dev20250221230243.dist-info/NOTICE,sha256=njwnoPZLh9AN8SJQzxvCGLHi-8X__AvWRze6joNXIY8,2066
71
- liger_kernel_nightly-0.5.3.dev20250221230243.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
72
- liger_kernel_nightly-0.5.3.dev20250221230243.dist-info/top_level.txt,sha256=2eghu4hA3LnkM7ElW92tQ8zegWKgSbeo-k-aGe1YnvY,13
73
- liger_kernel_nightly-0.5.3.dev20250221230243.dist-info/RECORD,,
68
+ liger_kernel_nightly-0.5.3.dev20250224175624.dist-info/LICENSE,sha256=OhzLDHJ0to4a8sodVLELZiCFylZ1NAAYLs-HrjPy0ag,1312
69
+ liger_kernel_nightly-0.5.3.dev20250224175624.dist-info/METADATA,sha256=xQ6yDpRjcC7Egp-O_XmndQ_XzHjTXWyg_ykJgnP3dGI,22093
70
+ liger_kernel_nightly-0.5.3.dev20250224175624.dist-info/NOTICE,sha256=njwnoPZLh9AN8SJQzxvCGLHi-8X__AvWRze6joNXIY8,2066
71
+ liger_kernel_nightly-0.5.3.dev20250224175624.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
72
+ liger_kernel_nightly-0.5.3.dev20250224175624.dist-info/top_level.txt,sha256=2eghu4hA3LnkM7ElW92tQ8zegWKgSbeo-k-aGe1YnvY,13
73
+ liger_kernel_nightly-0.5.3.dev20250224175624.dist-info/RECORD,,