lucid-dl 2.12.0__py3-none-any.whl → 2.12.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lucid/__init__.py CHANGED
@@ -25,7 +25,7 @@ import json
25
25
  import math
26
26
  import numpy as np
27
27
 
28
- from lucid._tensor import Tensor
28
+ from lucid._tensor import *
29
29
  from lucid._func import *
30
30
  from lucid._util import *
31
31
 
@@ -308,7 +308,7 @@ def register_model(func: _ModuleReturnFunc) -> _ModuleReturnFunc:
308
308
 
309
309
 
310
310
  def _conv_view_limit_mb() -> int:
311
- from lucid._kernel import conv as _conv_kernel
311
+ from lucid.nn._kernel import conv as _conv_kernel
312
312
 
313
313
  return _conv_kernel.get_conv_view_limit_mb()
314
314
 
lucid/_tensor/__init__.py CHANGED
@@ -1 +1,11 @@
1
- from lucid._tensor.tensor import Tensor
1
+ from lucid._tensor.tensor import (
2
+ Tensor,
3
+ LongTensor,
4
+ IntTensor,
5
+ ShortTensor,
6
+ CharTensor,
7
+ HalfTensor,
8
+ FloatTensor,
9
+ DoubleTensor,
10
+ BoolTensor,
11
+ )
lucid/_tensor/base.py CHANGED
@@ -108,6 +108,8 @@ class _TensorBase:
108
108
 
109
109
  def broadcast_to(self, shape: _ShapeLike) -> Self: ...
110
110
 
111
+ def expand(self, *sizes: int | _ShapeLike) -> Self: ...
112
+
111
113
  def chunk(self, chunks: int, axis: int = 0) -> tuple[Self, ...]: ...
112
114
 
113
115
  def swapaxes(self, axis1: int, axis2: int) -> Self: ...
lucid/_tensor/tensor.py CHANGED
@@ -1,4 +1,15 @@
1
- from typing import Callable, Iterator, Optional, Self, SupportsIndex, Any, overload
1
+ from typing import (
2
+ Callable,
3
+ Iterator,
4
+ Optional,
5
+ Self,
6
+ SupportsIndex,
7
+ Any,
8
+ overload,
9
+ Generic,
10
+ TypeVar,
11
+ ClassVar,
12
+ )
2
13
  from types import NoneType
3
14
  from collections import deque
4
15
 
@@ -22,15 +33,32 @@ from lucid._backend.core import BackwardOperation, Operation, noop
22
33
  from lucid._backend.metal import mx, parse_mlx_indexing, check_metal_availability
23
34
 
24
35
 
36
+ __all__ = [
37
+ "Tensor",
38
+ "FloatTensor",
39
+ "DoubleTensor",
40
+ "HalfTensor",
41
+ "CharTensor",
42
+ "ShortTensor",
43
+ "IntTensor",
44
+ "LongTensor",
45
+ "BoolTensor",
46
+ ]
47
+
48
+
49
+ DType = TypeVar("DType", bound=Numeric | bool)
50
+
25
51
  _HookType = Callable[["Tensor", _NumPyArray | _MLXArray], None]
26
52
 
27
53
  _dtype_map = {int: types.Int64, float: types.Float64, complex: types.Complex64}
28
54
 
29
55
 
30
- class Tensor(_TensorBase, _TensorInplace):
56
+ class Tensor(Generic[DType], _TensorBase, _TensorInplace):
57
+ _fixed_dtype: ClassVar[Numeric | None] = None
58
+
31
59
  def __init__(
32
60
  self,
33
- data: _ArrayOrScalar | _MLXArray,
61
+ data: _ArrayOrScalar,
34
62
  requires_grad: bool = False,
35
63
  keep_grad: bool = False,
36
64
  dtype: _BuiltinNumeric | Numeric | None = None,
@@ -39,6 +67,9 @@ class Tensor(_TensorBase, _TensorInplace):
39
67
  self._is_free = False
40
68
  self._is_bool_tensor = False
41
69
 
70
+ if self._fixed_dtype is not None:
71
+ dtype = self._fixed_dtype
72
+
42
73
  if dtype is bool:
43
74
  self._is_bool_tensor = True
44
75
  dtype = None
@@ -285,6 +316,12 @@ class Tensor(_TensorBase, _TensorInplace):
285
316
  dtype = device_or_dtype
286
317
  return self.astype(dtype)
287
318
 
319
+ def cpu(self) -> Self:
320
+ return self.to(device="cpu")
321
+
322
+ def gpu(self) -> Self:
323
+ return self.to(device="gpu")
324
+
288
325
  def is_cpu(self) -> bool:
289
326
  return self.device == "cpu"
290
327
 
@@ -480,3 +517,155 @@ class Tensor(_TensorBase, _TensorInplace):
480
517
 
481
518
  def bool(self) -> Self:
482
519
  return self.astype(bool)
520
+
521
+
522
+ class LongTensor(Tensor[types.Int64]):
523
+ _fixed_dtype: ClassVar[Numeric | None] = types.Int64
524
+
525
+ def __init__(
526
+ self,
527
+ data: _ArrayOrScalar,
528
+ requires_grad: bool = False,
529
+ keep_grad: bool = False,
530
+ device: _DeviceType = "cpu",
531
+ ) -> None:
532
+ super().__init__(
533
+ data=data,
534
+ requires_grad=requires_grad,
535
+ keep_grad=keep_grad,
536
+ dtype=types.Int64,
537
+ device=device,
538
+ )
539
+
540
+
541
+ class IntTensor(Tensor[types.Int32]):
542
+ _fixed_dtype: ClassVar[Numeric | None] = types.Int32
543
+
544
+ def __init__(
545
+ self,
546
+ data: _ArrayOrScalar,
547
+ requires_grad: bool = False,
548
+ keep_grad: bool = False,
549
+ device: _DeviceType = "cpu",
550
+ ) -> None:
551
+ super().__init__(
552
+ data=data,
553
+ requires_grad=requires_grad,
554
+ keep_grad=keep_grad,
555
+ dtype=types.Int32,
556
+ device=device,
557
+ )
558
+
559
+
560
+ class ShortTensor(Tensor[types.Int16]):
561
+ _fixed_dtype: ClassVar[Numeric | None] = types.Int16
562
+
563
+ def __init__(
564
+ self,
565
+ data: _ArrayOrScalar,
566
+ requires_grad: bool = False,
567
+ keep_grad: bool = False,
568
+ device: _DeviceType = "cpu",
569
+ ) -> None:
570
+ super().__init__(
571
+ data=data,
572
+ requires_grad=requires_grad,
573
+ keep_grad=keep_grad,
574
+ dtype=types.Int16,
575
+ device=device,
576
+ )
577
+
578
+
579
+ class CharTensor(Tensor[types.Int8]):
580
+ _fixed_dtype: ClassVar[Numeric | None] = types.Int8
581
+
582
+ def __init__(
583
+ self,
584
+ data: _ArrayOrScalar,
585
+ requires_grad: bool = False,
586
+ keep_grad: bool = False,
587
+ device: _DeviceType = "cpu",
588
+ ) -> None:
589
+ super().__init__(
590
+ data=data,
591
+ requires_grad=requires_grad,
592
+ keep_grad=keep_grad,
593
+ dtype=types.Int8,
594
+ device=device,
595
+ )
596
+
597
+
598
+ class HalfTensor(Tensor[types.Float16]):
599
+ _fixed_dtype: ClassVar[Numeric | None] = types.Float16
600
+
601
+ def __init__(
602
+ self,
603
+ data: _ArrayOrScalar,
604
+ requires_grad: bool = False,
605
+ keep_grad: bool = False,
606
+ device: _DeviceType = "cpu",
607
+ ) -> None:
608
+ super().__init__(
609
+ data=data,
610
+ requires_grad=requires_grad,
611
+ keep_grad=keep_grad,
612
+ dtype=types.Float16,
613
+ device=device,
614
+ )
615
+
616
+
617
+ class FloatTensor(Tensor[types.Float32]):
618
+ _fixed_dtype: ClassVar[Numeric | None] = types.Float32
619
+
620
+ def __init__(
621
+ self,
622
+ data: _ArrayOrScalar,
623
+ requires_grad: bool = False,
624
+ keep_grad: bool = False,
625
+ device: _DeviceType = "cpu",
626
+ ) -> None:
627
+ super().__init__(
628
+ data=data,
629
+ requires_grad=requires_grad,
630
+ keep_grad=keep_grad,
631
+ dtype=types.Float32,
632
+ device=device,
633
+ )
634
+
635
+
636
+ class DoubleTensor(Tensor[types.Float64]):
637
+ _fixed_dtype: ClassVar[Numeric | None] = types.Float64
638
+
639
+ def __init__(
640
+ self,
641
+ data: _ArrayOrScalar,
642
+ requires_grad: bool = False,
643
+ keep_grad: bool = False,
644
+ device: _DeviceType = "cpu",
645
+ ) -> None:
646
+ super().__init__(
647
+ data=data,
648
+ requires_grad=requires_grad,
649
+ keep_grad=keep_grad,
650
+ dtype=types.Float64,
651
+ device=device,
652
+ )
653
+
654
+
655
+ class BoolTensor(Tensor[bool]):
656
+ _fixed_dtype: ClassVar[Numeric | None] = None
657
+
658
+ def __init__(
659
+ self,
660
+ data: _ArrayOrScalar,
661
+ requires_grad: bool = False,
662
+ keep_grad: bool = False,
663
+ device: _DeviceType = "cpu",
664
+ ) -> None:
665
+ super().__init__(
666
+ data=data,
667
+ requires_grad=requires_grad,
668
+ keep_grad=keep_grad,
669
+ dtype=bool,
670
+ device=device,
671
+ )
lucid/_util/__init__.py CHANGED
@@ -9,11 +9,11 @@ from lucid._util import func
9
9
  # fmt: off
10
10
  __all__ = [
11
11
  "reshape", "squeeze", "unsqueeze", "expand_dims", "ravel", "stack", "hstack",
12
- "vstack", "concatenate", "pad", "repeat", "tile", "flatten", "meshgrid",
13
- "split", "tril", "triu", "broadcast_to", "chunk", "masked_fill", "roll",
14
- "unbind", "sort", "nonzero", "unique", "topk", "argsort", "histogramdd",
15
- "histogram", "histogram2d", "where", "nonzero", "argmin", "argmax",
16
- "diagonal",
12
+ "vstack", "concatenate", "pad", "repeat", "tile", "flatten", "meshgrid",
13
+ "split", "tril", "triu", "broadcast_to", "expand", "chunk", "masked_fill",
14
+ "roll", "unbind", "sort", "nonzero", "unique", "topk", "argsort",
15
+ "histogramdd", "histogram", "histogram2d", "where", "nonzero", "argmin",
16
+ "argmax", "diagonal",
17
17
  ]
18
18
  # fmt: on
19
19
 
@@ -106,6 +106,14 @@ def broadcast_to(a: Tensor, /, shape: _ShapeLike) -> Tensor:
106
106
  return func.broadcast_to(shape)(a)
107
107
 
108
108
 
109
+ def expand(a: Tensor, /, *sizes: int | _ShapeLike) -> Tensor:
110
+ if len(sizes) == 1 and isinstance(sizes[0], (tuple, list)):
111
+ shape = sizes[0]
112
+ else:
113
+ shape = sizes
114
+ return func.expand(shape)(a)
115
+
116
+
109
117
  def chunk(a: Tensor, /, chunks: int, axis: int = 0) -> tuple[Tensor, ...]:
110
118
  return func.chunk(chunks, axis)(a)
111
119
 
@@ -257,6 +265,7 @@ Tensor.split = split
257
265
  Tensor.tril = tril
258
266
  Tensor.triu = triu
259
267
  Tensor.broadcast_to = broadcast_to
268
+ Tensor.expand = expand
260
269
  Tensor.chunk = chunk
261
270
  Tensor.masked_fill = masked_fill
262
271
  Tensor.roll = roll
lucid/_util/func.py CHANGED
@@ -605,6 +605,79 @@ class broadcast_to(Operation):
605
605
  return self.result.grad.reshape(self.original_shape)
606
606
 
607
607
 
608
+ class expand(Operation):
609
+ def __init__(self, shape: _ShapeLike) -> None:
610
+ super().__init__()
611
+ self.shape = shape
612
+
613
+ def _resolve_shape(self, input_shape: tuple[int, ...]) -> tuple[int, ...]:
614
+ shape = tuple(int(dim) for dim in self.shape)
615
+ if len(shape) == 0:
616
+ raise ValueError("expand() expects at least one dimension.")
617
+
618
+ if len(shape) < len(input_shape):
619
+ raise ValueError(
620
+ "expand() cannot shrink the number of dimensions from "
621
+ f"{len(input_shape)} to {len(shape)}."
622
+ )
623
+
624
+ ndim_diff = len(shape) - len(input_shape)
625
+ padded_input = (1,) * ndim_diff + input_shape
626
+
627
+ resolved: list[int] = []
628
+ for axis, (target_dim, input_dim) in enumerate(zip(shape, padded_input)):
629
+ if target_dim == -1:
630
+ if axis < ndim_diff:
631
+ raise ValueError(
632
+ "expand() cannot use -1 in a leading, "
633
+ "non-existing dimension."
634
+ )
635
+ target_dim = input_dim
636
+
637
+ elif target_dim < -1:
638
+ raise ValueError("expand() size must be >= -1.")
639
+
640
+ if input_dim == target_dim:
641
+ resolved.append(target_dim)
642
+ elif input_dim == 1 and target_dim >= 0:
643
+ resolved.append(target_dim)
644
+ else:
645
+ raise ValueError(
646
+ "expand() cannot expand dimension "
647
+ f"{axis} from {input_dim} to {target_dim}."
648
+ )
649
+
650
+ return tuple(resolved)
651
+
652
+ @unary_func_op()
653
+ def cpu(self, a: Tensor) -> _FuncOpReturnType:
654
+ self.original_shape = a.shape
655
+ self.expanded_shape = self._resolve_shape(a.shape)
656
+
657
+ self.result = Tensor(np.broadcast_to(a.data, self.expanded_shape))
658
+ return self.result, self.__grad__
659
+
660
+ @unary_func_op(device="gpu")
661
+ def gpu(self, a: Tensor) -> _FuncOpReturnType:
662
+ self.original_shape = a.shape
663
+ self.expanded_shape = self._resolve_shape(a.shape)
664
+
665
+ self.result = Tensor(mx.broadcast_to(a.data, self.expanded_shape))
666
+ return self.result, self.__grad__
667
+
668
+ def __grad__(self) -> _GradType:
669
+ input_shape = self.original_shape
670
+ ndim_diff = len(self.expanded_shape) - len(input_shape)
671
+ if ndim_diff > 0:
672
+ input_shape = (1,) * ndim_diff + input_shape
673
+
674
+ for axis, (in_dim, out_dim) in enumerate(zip(input_shape, self.expanded_shape)):
675
+ if in_dim == 1 and out_dim > 1:
676
+ self.result.grad = self.result.grad.sum(axis=axis, keepdims=True)
677
+
678
+ return self.result.grad.reshape(self.original_shape)
679
+
680
+
608
681
  class chunk(Operation):
609
682
  def __init__(self, chunks: int, axis: int) -> None:
610
683
  super().__init__()
lucid/models/__init__.py CHANGED
@@ -2,3 +2,4 @@ from .imgclf import *
2
2
  from .imggen import *
3
3
  from .objdet import *
4
4
  from .seq2seq import *
5
+ from .seqclf import *
@@ -0,0 +1 @@
1
+ from .bert import *
@@ -0,0 +1,31 @@
1
+ import lucid
2
+ import lucid.nn as nn
3
+ import lucid.nn.functional as F
4
+
5
+ from lucid._tensor import Tensor
6
+
7
+
8
+ class _BertEmbeddings(nn.Module):
9
+ def __init__(
10
+ self,
11
+ vocab_size: int,
12
+ hidden_size: int,
13
+ pad_token_id: int,
14
+ max_position_embeddings: int,
15
+ type_vocab_size: int,
16
+ layer_norm_eps: float,
17
+ hidden_dropout_prob: float,
18
+ ) -> None:
19
+ super().__init__()
20
+ self.word_embeddings = nn.Embedding(vocab_size, hidden_size, pad_token_id)
21
+ self.position_embeddings = nn.Embedding(max_position_embeddings, hidden_size)
22
+ self.token_type_embeddings = nn.Embedding(type_vocab_size)
23
+
24
+ self.layernorm = nn.LayerNorm(hidden_size, eps=layer_norm_eps)
25
+ self.dropout = nn.Dropout(hidden_dropout_prob)
26
+
27
+ self.position_ids: nn.Buffer
28
+ self.register_buffer(
29
+ "position_ids", nn.Buffer(lucid.arange(max_position_embeddings))
30
+ )
31
+ # TODO: Implement `lucid.Tensor.expand`
@@ -1,4 +1,4 @@
1
- import functools
1
+ from functools import partial
2
2
  from types import ModuleType
3
3
 
4
4
  import numpy as np
@@ -7,49 +7,44 @@ from lucid._backend.core import Operation, func_op, _FuncOpReturnType, _GradType
7
7
  from lucid._backend.metal import mx
8
8
  from lucid._tensor import Tensor
9
9
 
10
- from lucid.types import _DeviceType, _TensorData
11
-
12
-
13
- def _as_int_array(arr, lib_: ModuleType) -> _TensorData:
14
- if lib_ is np:
15
- return arr.astype(np.int64)
16
- return arr.astype(mx.int32)
17
-
18
10
 
19
11
  class embedding_kernel(Operation):
20
- def __init__(self) -> None:
12
+ def __init__(self, padding_idx: int = -1) -> None:
21
13
  super().__init__()
14
+ self.padding_idx = int(padding_idx)
22
15
  self._indices = None
23
16
  self._num_embeddings = None
24
17
 
25
18
  def clear(self) -> None:
26
19
  super().clear()
20
+ self.padding_idx = -1
27
21
  self._indices = None
28
22
  self._num_embeddings = None
29
23
 
30
24
  @func_op(n_in=2, n_ret=1)
31
25
  def cpu(self, indices: Tensor, weight: Tensor) -> _FuncOpReturnType:
32
- return self._forward(indices, weight, lib_=np, device="cpu")
26
+ return self._forward(indices, weight, lib_=np)
33
27
 
34
28
  @func_op(n_in=2, n_ret=1, device="gpu")
35
29
  def gpu(self, indices: Tensor, weight: Tensor) -> _FuncOpReturnType:
36
- return self._forward(indices, weight, lib_=mx, device="gpu")
30
+ return self._forward(indices, weight, lib_=mx)
37
31
 
38
32
  def _forward(
39
- self, indices: Tensor, weight: Tensor, lib_: ModuleType, device: _DeviceType
33
+ self, indices: Tensor, weight: Tensor, lib_: ModuleType
40
34
  ) -> _FuncOpReturnType:
41
- idx = _as_int_array(indices.data, lib_)
35
+ idx = indices.data
42
36
  out = weight.data[idx]
43
37
 
44
38
  self._indices = idx
45
39
  self._num_embeddings = int(weight.shape[0])
46
40
 
47
- self.result = Tensor(out, device=device)
48
- return self.result, functools.partial(self.__grad__, lib_=lib_)
41
+ self.result = Tensor(out)
42
+ return self.result, partial(self.__grad__, lib_=lib_)
49
43
 
50
44
  def __grad__(self, lib_: ModuleType) -> _GradType:
51
45
  if self.result is None or self.result.grad is None:
52
46
  raise RuntimeError("embedding backward called before forward.")
47
+
53
48
  if self._indices is None or self._num_embeddings is None:
54
49
  raise RuntimeError("embedding cached data missing.")
55
50
 
@@ -58,15 +53,23 @@ class embedding_kernel(Operation):
58
53
  grad_flat = grad_out.reshape(idx.shape[0], -1)
59
54
 
60
55
  if lib_ is np:
56
+ if self.padding_idx >= 0:
57
+ keep = idx != self.padding_idx
58
+ idx = idx[keep]
59
+ grad_flat = grad_flat[keep]
60
+
61
61
  grad_w = np.zeros(
62
62
  (self._num_embeddings, grad_flat.shape[1]), dtype=grad_out.dtype
63
63
  )
64
64
  np.add.at(grad_w, idx, grad_flat)
65
+
65
66
  else:
66
67
  grad_w = mx.zeros(
67
68
  (self._num_embeddings, grad_flat.shape[1]), dtype=grad_out.dtype
68
69
  )
69
70
  for i in range(idx.shape[0]):
71
+ if self.padding_idx >= 0 and int(idx[i]) == self.padding_idx:
72
+ continue
70
73
  grad_w = grad_w.at[idx[i]].add(grad_flat[i])
71
74
 
72
75
  return None, grad_w
@@ -1,3 +1,5 @@
1
+ import numpy as np
2
+
1
3
  import lucid
2
4
  import lucid.nn.functional
3
5
 
@@ -5,6 +7,7 @@ from lucid._tensor import Tensor
5
7
  from lucid.types import _Scalar, Numeric
6
8
 
7
9
  from lucid.nn._kernel.embedding import embedding_kernel
10
+ from lucid._backend.metal import mx
8
11
 
9
12
 
10
13
  def _interpolate_bilinear(
@@ -131,17 +134,46 @@ def embedding(
131
134
  max_norm: float | None = None,
132
135
  norm_type: float = 2.0,
133
136
  ) -> Tensor:
137
+ num_embeddings = int(weight.shape[0])
138
+ if padding_idx is None:
139
+ pad = -1
140
+ else:
141
+ pad = int(padding_idx)
142
+ if pad < 0:
143
+ pad += num_embeddings
144
+ if pad < 0 or pad >= num_embeddings:
145
+ raise IndexError("padding_idx out of range.")
146
+
134
147
  indices = input_.astype(lucid.Int)
135
- op = embedding_kernel()
136
- output = op(indices, weight)
137
- if padding_idx is not None:
138
- mask = input_.data == padding_idx
139
- output *= 1 - mask[..., None]
148
+ idx_data = indices.data
149
+
150
+ if (idx_data < 0).any() or (idx_data >= num_embeddings).any():
151
+ raise IndexError("embedding indices out of range.")
140
152
 
141
153
  if max_norm is not None:
142
- norm = (output**norm_type).sum(axis=-1, keepdims=True) ** (1 / norm_type)
143
- scaling = max_norm / (norm + (norm == 0))
144
- output *= scaling
154
+ lib_ = np if weight.is_cpu() else mx
155
+ flat = idx_data.reshape(-1)
156
+
157
+ w = weight.data[flat]
158
+ if norm_type <= 0:
159
+ raise ValueError("norm_type must be positive.")
160
+
161
+ norms = (lib_.abs(w) ** norm_type).sum(axis=1) ** (1.0 / norm_type)
162
+ scale = lib_.minimum(1.0, max_norm / (norms + (norms == 0)))
163
+
164
+ if pad >= 0:
165
+ mask = flat == pad
166
+ mask_f = mask.astype(scale.dtype)
167
+ scale = scale * (1 - mask_f) + mask_f
168
+
169
+ weight.data[flat] = w * scale[:, None]
170
+
171
+ op = embedding_kernel(padding_idx=pad)
172
+ output = op(indices, weight)
173
+
174
+ if pad >= 0:
175
+ mask = input_.data == pad
176
+ output *= 1 - mask[..., None]
145
177
 
146
178
  return output
147
179
 
@@ -1,3 +1,5 @@
1
+ import math
2
+
1
3
  import lucid
2
4
  import lucid.nn as nn
3
5
  import lucid.nn.functional as F
@@ -41,6 +43,7 @@ class ScaledDotProductAttention(nn.Module):
41
43
  "num_heads",
42
44
  "dropout",
43
45
  "bias",
46
+ "use_separate_proj_weight",
44
47
  "add_bias_kv",
45
48
  "add_zero_attn",
46
49
  )
@@ -51,6 +54,7 @@ class MultiHeadAttention(nn.Module):
51
54
  num_heads: int,
52
55
  dropout: float = 0.0,
53
56
  bias: bool = True,
57
+ use_separate_proj_weight: bool = True,
54
58
  add_bias_kv: bool = False,
55
59
  add_zero_attn: bool = False,
56
60
  kdim: int | None = None,
@@ -60,6 +64,7 @@ class MultiHeadAttention(nn.Module):
60
64
  self.embed_dim = embed_dim
61
65
  self.num_heads = num_heads
62
66
  self.dropout = dropout
67
+ self.use_separate_proj_weight = use_separate_proj_weight
63
68
  self.add_bias_kv = add_bias_kv
64
69
  self.add_zero_attn = add_zero_attn
65
70
 
@@ -70,9 +75,30 @@ class MultiHeadAttention(nn.Module):
70
75
  kdim = kdim if kdim is not None else embed_dim
71
76
  vdim = vdim if vdim is not None else embed_dim
72
77
 
73
- self.q_proj = nn.Linear(embed_dim, embed_dim, bias=bias)
74
- self.k_proj = nn.Linear(kdim, embed_dim, bias=bias)
75
- self.v_proj = nn.Linear(vdim, embed_dim, bias=bias)
78
+ if use_separate_proj_weight:
79
+ self.q_proj = nn.Linear(embed_dim, embed_dim, bias=bias)
80
+ self.k_proj = nn.Linear(kdim, embed_dim, bias=bias)
81
+ self.v_proj = nn.Linear(vdim, embed_dim, bias=bias)
82
+
83
+ self.register_parameter("in_proj_weight", None)
84
+ self.register_parameter("in_proj_bias", None)
85
+ else:
86
+ if kdim != embed_dim or vdim != embed_dim:
87
+ raise ValueError(
88
+ "in_proj_weight requires kdim and vdim to equal embed_dim."
89
+ )
90
+
91
+ weight_ = lucid.empty(3 * embed_dim, embed_dim)
92
+ self.in_proj_weight = nn.Parameter(weight_)
93
+ if bias:
94
+ bias_ = lucid.empty(3 * embed_dim)
95
+ self.in_proj_bias = nn.Parameter(bias_)
96
+ else:
97
+ self.register_parameter("in_proj_bias", None)
98
+
99
+ self.q_proj = None
100
+ self.k_proj = None
101
+ self.v_proj = None
76
102
 
77
103
  self.out_proj = nn.Linear(embed_dim, embed_dim, bias=bias)
78
104
 
@@ -84,6 +110,17 @@ class MultiHeadAttention(nn.Module):
84
110
  self.bias_v = None
85
111
 
86
112
  self.scale: _Scalar = self.head_dim**-0.5
113
+ self._reset_parameters()
114
+
115
+ def _reset_parameters(self) -> None:
116
+ if self.in_proj_weight is None:
117
+ return
118
+
119
+ nn.init.kaiming_uniform(self.in_proj_weight)
120
+ if self.in_proj_bias is not None:
121
+ fan_in, _ = nn.init._dist._calculate_fan_in_and_fan_out(self.in_proj_weight)
122
+ bound = 1 / math.sqrt(fan_in) if fan_in > 0 else 0
123
+ nn.init.uniform(self.in_proj_bias, -bound, bound)
87
124
 
88
125
  def forward(
89
126
  self,
@@ -97,9 +134,24 @@ class MultiHeadAttention(nn.Module):
97
134
  N, q_len = query.shape[:2]
98
135
  k_len, v_len = key.shape[1], value.shape[1]
99
136
 
100
- q = self.q_proj(query)
101
- k = self.k_proj(key)
102
- v = self.v_proj(value)
137
+ if self.in_proj_weight is None:
138
+ q = self.q_proj(query)
139
+ k = self.k_proj(key)
140
+ v = self.v_proj(value)
141
+ else:
142
+ if query is key and key is value:
143
+ qkv = F.linear(query, self.in_proj_weight, self.in_proj_bias)
144
+ q, k, v = lucid.chunk(qkv, 3, axis=-1)
145
+ else:
146
+ w_q, w_k, w_v = lucid.chunk(self.in_proj_weight, 3, axis=0)
147
+ if self.in_proj_bias is not None:
148
+ b_q, b_k, b_v = lucid.chunk(self.in_proj_bias, 3, axis=0)
149
+ else:
150
+ b_q = b_k = b_v = None
151
+
152
+ q = F.linear(query, w_q, b_q)
153
+ k = F.linear(key, w_k, b_k)
154
+ v = F.linear(value, w_v, b_v)
103
155
 
104
156
  q = q.reshape(N, self.num_heads, q_len, self.head_dim)
105
157
  k = k.reshape(N, self.num_heads, k_len, self.head_dim)
lucid/nn/modules/rnn.py CHANGED
@@ -284,21 +284,16 @@ class RNNBase(nn.Module):
284
284
  ) -> None:
285
285
  super().__init__()
286
286
  self.is_lstm = False
287
- cell_kwargs = {}
288
287
  nonlinearity = "tanh"
289
288
 
290
289
  if mode == "RNN_TANH":
291
- cell_cls = RNNCell
292
- cell_kwargs: dict[str, object] = {"nonlinearity": nonlinearity}
290
+ pass
293
291
  elif mode == "RNN_RELU":
294
292
  nonlinearity = "relu"
295
- cell_cls = RNNCell
296
- cell_kwargs = {"nonlinearity": nonlinearity}
297
293
  elif mode == "LSTM":
298
- cell_cls = LSTMCell
299
294
  self.is_lstm = True
300
295
  elif mode == "GRU":
301
- cell_cls = GRUCell
296
+ pass
302
297
  else:
303
298
  raise ValueError(
304
299
  f"Invalid mode '{mode}'. Supported modes are 'RNN_TANH', "
@@ -315,18 +310,120 @@ class RNNBase(nn.Module):
315
310
  self.batch_first = batch_first
316
311
  self.dropout = float(dropout)
317
312
 
318
- layers: list[nn.Module] = []
319
313
  for layer in range(num_layers):
320
314
  layer_input_size = input_size if layer == 0 else hidden_size
321
- layers.append(
322
- cell_cls(
323
- input_size=layer_input_size,
324
- hidden_size=hidden_size,
325
- bias=bias,
326
- **cell_kwargs,
315
+ sqrt_k = 1.0 / (hidden_size**0.5)
316
+
317
+ if mode in ("RNN_TANH", "RNN_RELU"):
318
+ w_ih = nn.Parameter(
319
+ lucid.random.uniform(
320
+ -sqrt_k, sqrt_k, (hidden_size, layer_input_size)
321
+ )
327
322
  )
328
- )
329
- self.layers = nn.ModuleList(layers)
323
+ w_hh = nn.Parameter(
324
+ lucid.random.uniform(-sqrt_k, sqrt_k, (hidden_size, hidden_size))
325
+ )
326
+ elif mode == "LSTM":
327
+ w_ih = nn.Parameter(
328
+ lucid.random.uniform(
329
+ -sqrt_k, sqrt_k, (4 * hidden_size, layer_input_size)
330
+ )
331
+ )
332
+ w_hh = nn.Parameter(
333
+ lucid.random.uniform(
334
+ -sqrt_k, sqrt_k, (4 * hidden_size, hidden_size)
335
+ )
336
+ )
337
+ else:
338
+ w_ih = nn.Parameter(
339
+ lucid.random.uniform(
340
+ -sqrt_k, sqrt_k, (3 * hidden_size, layer_input_size)
341
+ )
342
+ )
343
+ w_hh = nn.Parameter(
344
+ lucid.random.uniform(
345
+ -sqrt_k, sqrt_k, (3 * hidden_size, hidden_size)
346
+ )
347
+ )
348
+
349
+ self.register_parameter(f"weight_ih_l{layer}", w_ih)
350
+ self.register_parameter(f"weight_hh_l{layer}", w_hh)
351
+
352
+ if bias:
353
+ b_ih = nn.Parameter(
354
+ lucid.random.uniform(-sqrt_k, sqrt_k, w_ih.shape[0])
355
+ )
356
+ b_hh = nn.Parameter(
357
+ lucid.random.uniform(-sqrt_k, sqrt_k, w_hh.shape[0])
358
+ )
359
+ self.register_parameter(f"bias_ih_l{layer}", b_ih)
360
+ self.register_parameter(f"bias_hh_l{layer}", b_hh)
361
+ else:
362
+ self.register_parameter(f"bias_ih_l{layer}", None)
363
+ self.register_parameter(f"bias_hh_l{layer}", None)
364
+
365
+ def _rnn_cell(
366
+ self,
367
+ input_: Tensor,
368
+ hx: Tensor,
369
+ w_ih: Tensor,
370
+ w_hh: Tensor,
371
+ b_ih: Tensor | None,
372
+ b_hh: Tensor | None,
373
+ ) -> Tensor:
374
+ hy = F.linear(input_, w_ih, b_ih)
375
+ hy += F.linear(hx, w_hh, b_hh)
376
+
377
+ if self.mode == "RNN_TANH":
378
+ return F.tanh(hy)
379
+
380
+ return F.relu(hy)
381
+
382
+ def _lstm_cell(
383
+ self,
384
+ input_: Tensor,
385
+ hx: Tensor,
386
+ cx: Tensor,
387
+ w_ih: Tensor,
388
+ w_hh: Tensor,
389
+ b_ih: Tensor | None,
390
+ b_hh: Tensor | None,
391
+ ) -> tuple[Tensor, Tensor]:
392
+ gates = F.linear(input_, w_ih, b_ih)
393
+ gates += F.linear(hx, w_hh, b_hh)
394
+
395
+ i_t, f_t, g_t, o_t = lucid.split(gates, 4, axis=1)
396
+ i_t = F.sigmoid(i_t)
397
+ f_t = F.sigmoid(f_t)
398
+ g_t = F.tanh(g_t)
399
+ o_t = F.sigmoid(o_t)
400
+
401
+ c_t = f_t * cx + i_t * g_t
402
+ h_t = o_t * F.tanh(c_t)
403
+
404
+ return h_t, c_t
405
+
406
+ def _gru_cell(
407
+ self,
408
+ input_: Tensor,
409
+ hx: Tensor,
410
+ w_ih: Tensor,
411
+ w_hh: Tensor,
412
+ b_ih: Tensor | None,
413
+ b_hh: Tensor | None,
414
+ ) -> Tensor:
415
+ input_gates = F.linear(input_, w_ih, b_ih)
416
+ hidden_gates = F.linear(hx, w_hh, b_hh)
417
+
418
+ i_r, i_z, i_n = lucid.split(input_gates, 3, axis=1)
419
+ h_r, h_z, h_n = lucid.split(hidden_gates, 3, axis=1)
420
+
421
+ r_t = F.sigmoid(i_r + h_r)
422
+ z_t = F.sigmoid(i_z + h_z)
423
+ n_t = F.tanh(i_n + r_t * h_n)
424
+ h_t = (1 - z_t) * n_t + z_t * hx
425
+
426
+ return h_t
330
427
 
331
428
  def _init_hidden(
332
429
  self, batch_size: int, dtype: Numeric, device: _DeviceType
@@ -441,7 +538,12 @@ class RNNBase(nn.Module):
441
538
  h_n_list: list[Tensor] = []
442
539
  c_n_list: list[Tensor] | None = [] if self.is_lstm else None
443
540
 
444
- for layer_idx, cell in enumerate(self.layers):
541
+ for layer_idx in range(self.num_layers):
542
+ w_ih = getattr(self, f"weight_ih_l{layer_idx}")
543
+ w_hh = getattr(self, f"weight_hh_l{layer_idx}")
544
+ b_ih = getattr(self, f"bias_ih_l{layer_idx}")
545
+ b_hh = getattr(self, f"bias_hh_l{layer_idx}")
546
+
445
547
  if self.is_lstm:
446
548
  h_t = hx_h[layer_idx]
447
549
  c_t = hx_c[layer_idx]
@@ -481,9 +583,13 @@ class RNNBase(nn.Module):
481
583
  offset += bs
482
584
 
483
585
  if self.is_lstm:
484
- h_t, c_t = cell(step_input, (h_t, c_t))
586
+ h_t, c_t = self._lstm_cell(
587
+ step_input, h_t, c_t, w_ih, w_hh, b_ih, b_hh
588
+ )
589
+ elif self.mode == "GRU":
590
+ h_t = self._gru_cell(step_input, h_t, w_ih, w_hh, b_ih, b_hh)
485
591
  else:
486
- h_t = cell(step_input, h_t)
592
+ h_t = self._rnn_cell(step_input, h_t, w_ih, w_hh, b_ih, b_hh)
487
593
 
488
594
  outputs.append(h_t)
489
595
  prev_bs = bs
@@ -512,11 +618,17 @@ class RNNBase(nn.Module):
512
618
 
513
619
  else:
514
620
  for t in range(seq_len):
621
+ step_input = layer_input[t]
515
622
  if self.is_lstm:
516
- h_t, c_t = cell(layer_input[t], (h_t, c_t))
623
+ h_t, c_t = self._lstm_cell(
624
+ step_input, h_t, c_t, w_ih, w_hh, b_ih, b_hh
625
+ )
626
+ outputs.append(h_t.unsqueeze(axis=0))
627
+ elif self.mode == "GRU":
628
+ h_t = self._gru_cell(step_input, h_t, w_ih, w_hh, b_ih, b_hh)
517
629
  outputs.append(h_t.unsqueeze(axis=0))
518
630
  else:
519
- h_t = cell(layer_input[t], h_t)
631
+ h_t = self._rnn_cell(step_input, h_t, w_ih, w_hh, b_ih, b_hh)
520
632
  outputs.append(h_t.unsqueeze(axis=0))
521
633
 
522
634
  layer_output = lucid.concatenate(tuple(outputs), axis=0)
@@ -21,7 +21,17 @@ class Embedding(nn.Module):
21
21
  super().__init__()
22
22
  self.num_embeddings = num_embeddings
23
23
  self.embedding_dim = embedding_dim
24
- self.padding_idx = padding_idx
24
+
25
+ if padding_idx is None:
26
+ self.padding_idx = None
27
+ else:
28
+ pad = int(padding_idx)
29
+ if pad < 0:
30
+ pad += num_embeddings
31
+ if pad < 0 or pad >= num_embeddings:
32
+ raise IndexError("padding_idx out of range.")
33
+ self.padding_idx = pad
34
+
25
35
  self.max_norm = max_norm
26
36
  self.norm_type = norm_type
27
37
 
@@ -32,6 +42,9 @@ class Embedding(nn.Module):
32
42
  else:
33
43
  self.weight = nn.Parameter(_weight)
34
44
 
45
+ if self.padding_idx is not None:
46
+ self.weight.data[self.padding_idx] = 0
47
+
35
48
  def forward(self, input_: Tensor) -> Tensor:
36
49
  return F.embedding(
37
50
  input_, self.weight, self.padding_idx, self.max_norm, self.norm_type
@@ -39,3 +52,5 @@ class Embedding(nn.Module):
39
52
 
40
53
  def reset_parameters(self) -> None:
41
54
  self.weight.data = lucid.random.uniform(-0.1, 0.1, self.weight.shape)
55
+ if self.padding_idx is not None:
56
+ self.weight.data[self.padding_idx] = 0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lucid-dl
3
- Version: 2.12.0
3
+ Version: 2.12.1
4
4
  Summary: Lumerico's Comprehensive Interface for Deep Learning
5
5
  Home-page: https://github.com/ChanLumerico/lucid
6
6
  Author: ChanLumerico
@@ -48,6 +48,10 @@ Whether you're a student, educator, or an advanced researcher seeking to demysti
48
48
 
49
49
  ### 🔥 What's New
50
50
 
51
+ - New Tensor utility function added: `lucid.Tensor.expand`
52
+
53
+ - Added Type-Generic Tensors: `lucid.LongTensor`, `lucid.DoubleTensor`, etc.
54
+
51
55
  - Added new visual tool: `lucid.visual.build_tensor_mermaid_chart` which builds a Mermaid chart of given tensor's computatoinal graph
52
56
 
53
57
  - Added additional `nn.Module` hooks for richer introspection during training:
@@ -1,4 +1,4 @@
1
- lucid/__init__.py,sha256=EwNZkKALNS54RQw_D_1BtqbBo3kFMi4uSveSs6i-bdM,9161
1
+ lucid/__init__.py,sha256=p5SmXNcVcuTeiCmgpKRU6ttNp-Nut58VblUy6B2a5HU,9159
2
2
  lucid/error.py,sha256=qnTiVuZm3c5-DIt-OOyobZ7RUm7E1K4NR0j998LG1ug,709
3
3
  lucid/port.py,sha256=Kt1YaSWef_eKF4KRj-UFhirvFC5urEESfYQ_BSlBZGE,3811
4
4
  lucid/types.py,sha256=Zdz2r4ledouEG-6Gi6yEza5vSLyyTzZJn7AcRKbxy8o,6906
@@ -12,11 +12,11 @@ lucid/_func/ufunc.py,sha256=AnCSykuYC0fNNoZso-TM60Rlq_4c54uMYOG3BHdvy20,30261
12
12
  lucid/_fusion/__init__.py,sha256=SVzLiFzs4m1mMOpefKDLFkYqV0zV5FGwFd9hEbUZtSo,68
13
13
  lucid/_fusion/base.py,sha256=d6nWuPjYxkie9Xrtbj3JVusnIN61PIoSFFSthJNm9os,3821
14
14
  lucid/_fusion/func.py,sha256=9tXzB-QNrx_AvNJiPto807faXKlzjuMG4o9gRgI5usc,1659
15
- lucid/_tensor/__init__.py,sha256=wFWAMhTnQwThNiBEIT4fcw4ryIm8A4AoR-m9KDhklOQ,40
16
- lucid/_tensor/base.py,sha256=pEeL4O-R_xFbugdBG8mWipP_pEhtIhWPSgAgHc51ZcQ,5262
17
- lucid/_tensor/tensor.py,sha256=BLqpVqxP657W8j-oq7la9IVzH5RZnvQDKEMsU1s5rjM,14821
18
- lucid/_util/__init__.py,sha256=NgOleItHJGVLdJlKHKfpzuSl3vofzJpNsZByHAYJmKs,6838
19
- lucid/_util/func.py,sha256=ZODVlGdAejMTJnwEls7yMCI5WJ9Thkb3RIq1WwQCS4E,44239
15
+ lucid/_tensor/__init__.py,sha256=prEYQ-GevlGoO7JblW_1dpwarRpCoWzrOYMjWAjSg2Q,180
16
+ lucid/_tensor/base.py,sha256=KWYTFIuc8_ZOeVPUZh-ogT3C9Ey6wA6WLkwepE_CGhk,5323
17
+ lucid/_tensor/tensor.py,sha256=gb9h30Kh6mdKg3eH0XFNLGlhUgyhKF-1lNr_wNcKw1k,19174
18
+ lucid/_util/__init__.py,sha256=_p_qhtPKWzV-kwBpCqSsgXrWSbWWw3I7PMMhgFqiLYg,7085
19
+ lucid/_util/func.py,sha256=DDA6vUYc9b8FeX1pvfI1jW8BpGOm7Wr8E_9btGrYYs0,46921
20
20
  lucid/autograd/__init__.py,sha256=hDoK_B2chRFVhoxsT4vxRKangzBEMWqF8gj2hdoTenk,6775
21
21
  lucid/data/__init__.py,sha256=qrDIQsnix5ZUEa0yrtomaaWbNJyJ3xEr2gdhRvg70_8,118
22
22
  lucid/data/_base.py,sha256=RM8xpBl8qFhm19n7eER_jOsRaxkL3rbOkwUvn6VetSE,5921
@@ -29,7 +29,7 @@ lucid/einops/__init__.py,sha256=9Dlmfw6PsIU9b_a89Zre4yV2rztRHPCL4QpsUnXJwjM,802
29
29
  lucid/einops/_func.py,sha256=XXsX9lse_0turKoFnOTtLdY6hBUi0gq_8K81G7nr80I,21026
30
30
  lucid/linalg/__init__.py,sha256=N-LrlC3qSsOMt6Ad1-PP3Qc3QH6EWNf5P50GBvwb9aQ,1118
31
31
  lucid/linalg/_func.py,sha256=Iyeut5nHwQmO8N326kQUaTjgoKVoBaxt_gy_3NXXD60,16378
32
- lucid/models/__init__.py,sha256=wegfOBvwJTFFee8eVt90zJoLsbbEpdT5G2y-mpO5xcE,89
32
+ lucid/models/__init__.py,sha256=0BoSSrffJnK3Vqz3yVtvUReuB5uFWTYG8NosK2dz97U,111
33
33
  lucid/models/utils.py,sha256=2g8FLcMLRgVxgGEaYuwJyFxeXu-A_a4_MVr0K-TNh74,5195
34
34
  lucid/models/imgclf/__init__.py,sha256=kQH-nNu8_TPJ7Av151WSpcY4GJ06gGAd6Ozs3m3KMcE,590
35
35
  lucid/models/imgclf/alex.py,sha256=fZsPdCjWUseCrxBwKj-i5fPSDYLgBpfm0SJe07YKRuE,1472
@@ -76,6 +76,8 @@ lucid/models/objdet/yolo/yolo_v3.py,sha256=B5U42Npwfg8nSgU9E261zf0cbQS9RVYrX1ADD
76
76
  lucid/models/objdet/yolo/yolo_v4.py,sha256=RFbBumreXmy6s8IYZvUuhW0893ss8sx_8Vgi6KbBKWo,21467
77
77
  lucid/models/seq2seq/__init__.py,sha256=wjsrhj4H_AcqwwbebAN8b68QBA8L6p1_12dkG2995-w,27
78
78
  lucid/models/seq2seq/transformer.py,sha256=y5rerCs1s6jXTsVvbgscWScKpQKuSu1fezsBe7PNTRA,3513
79
+ lucid/models/seqclf/__init__.py,sha256=qpzGjlHlqe7oQO4KBiz2XtchpoI9u1PUlaPAIh6EY0w,20
80
+ lucid/models/seqclf/bert.py,sha256=wlnZsNci9dMd3yYCY3QoJqgA0s7gnSO2XcfR99l1JaA,990
79
81
  lucid/nn/__init__.py,sha256=nyy6px1CxfchWUh68xCiQSxD7Gk65vamhWK8ztRvH68,184
80
82
  lucid/nn/fused.py,sha256=75fcXuo6fHSO-JtjuKhowhHSDr4qc5871WR63sUzH0g,5492
81
83
  lucid/nn/module.py,sha256=_EWtGkAuWWCPZ5f3t5pJOOzpi14gQBpP7JW2S8o4_GE,26855
@@ -84,7 +86,7 @@ lucid/nn/_kernel/__init__.py,sha256=n1bnYdeb_bNDBKASWGywTRa0Ne9hMAkal3AuVZJgovI,
84
86
  lucid/nn/_kernel/activation.py,sha256=mfe48Aw3_Hv0hZEVC7DxDw19XK9XSLfdCOvo2JcZz_o,5662
85
87
  lucid/nn/_kernel/attention.py,sha256=1k0gboLObMNVow2v3TwliXC_2v8uKf2o8jHYFuyQqcg,3699
86
88
  lucid/nn/_kernel/conv.py,sha256=TiY3EkUAmwFCI1aA8YVMoZJHIRrqmJAXZEPh1C7lons,16412
87
- lucid/nn/_kernel/embedding.py,sha256=uf_G0aKphxBEOtfR9DHXmSyqkDCwc6tJEBNMKt4CNOU,2390
89
+ lucid/nn/_kernel/embedding.py,sha256=w90-SSr_DYzcI-zLkvye8P2o9C103imPPe4HBRPKUSg,2480
88
90
  lucid/nn/_kernel/loss.py,sha256=UD0B5DZ3R98OPZUigHsctL0eAJch2rKQpn1uaI3fzGg,13935
89
91
  lucid/nn/_kernel/norm.py,sha256=261WtixerLxFISIroQw8l_zZ3X0b4c_eDy8QHHA-i4M,11992
90
92
  lucid/nn/_kernel/pool.py,sha256=IQh5hfKU4PUvnGS1ayorUmytB_vCSxcbAwBYlFKw0iI,10697
@@ -98,12 +100,12 @@ lucid/nn/functional/_loss.py,sha256=b6KT8SrKe5lgAqlAmQnT00Hk7tvd-UcBPNryGYtTPWQ,
98
100
  lucid/nn/functional/_norm.py,sha256=yunKJttd3WTxXvzKuugL2LgHLmp-9dMxhHgQ9myLUzA,5041
99
101
  lucid/nn/functional/_pool.py,sha256=u6ykwqTZ38b9QPwUqFXpnPhOx2cc_9x9AfH0k26Y9pQ,4085
100
102
  lucid/nn/functional/_spatial.py,sha256=lazoSvVMFcauBWRbMOqmkgixA5bDes6scGHVWCgVmHE,3911
101
- lucid/nn/functional/_util.py,sha256=I2MvzuqPqZBC7Xo_rOq5d1R-f1Hqf7CtTKE06nudY60,5060
103
+ lucid/nn/functional/_util.py,sha256=gfsoGo7JgCHtPkcQqqisO8MfyyK57Pzy7Oeny8k2KKo,5936
102
104
  lucid/nn/init/__init__.py,sha256=YFi-HD2TEglweJ-gyX3n4UVZYzd70gcUi1dBu6hnOAY,1533
103
105
  lucid/nn/init/_dist.py,sha256=Tj9SKl43ZrJdv99X5qXUowdcts4f4D3tUk7RBmX5uCg,2462
104
106
  lucid/nn/modules/__init__.py,sha256=mol5Gfy-3ab5hBYZRxX0vjiI0w5VyKtBxVwj_vrOAZs,285
105
107
  lucid/nn/modules/activation.py,sha256=CpiKpzgZHoCp8UO5taCJ9BuwFz5mYUs0o1_TQcEwQbQ,2823
106
- lucid/nn/modules/attention.py,sha256=pZi7IGsNFu2xCmeLMuyWgveMyi2QXtaKRKQ70yAeE0c,4407
108
+ lucid/nn/modules/attention.py,sha256=XdOrGsS0zTPM8isP7MXoelGuuosNXO9HgD53wGPMBdM,6465
107
109
  lucid/nn/modules/conv.py,sha256=KbtInQgKSw3U_qXiqy7x53DZM9YAMUq7sFas1nV7NxY,13932
108
110
  lucid/nn/modules/drop.py,sha256=8127XhAbwk0nHWKVcGYqnnzsfmYn-WZ8iR6DXW_al5g,2127
109
111
  lucid/nn/modules/einops.py,sha256=3NGbfcBq9PZ9Vlbai53eBGGY4ckeWGXTCdPD73zuuNE,512
@@ -111,8 +113,8 @@ lucid/nn/modules/linear.py,sha256=87cuFWYct9JlmtVC3jGR-8eouxxzANaVA6cd7p9r2Ho,28
111
113
  lucid/nn/modules/loss.py,sha256=pjEMIruhtpTHhHFsNThS9LFz-aI_DAXLqMV8KRXydEg,3431
112
114
  lucid/nn/modules/norm.py,sha256=bYsKOg58kxzhMhbyvHrDDgVzN_p3D9HBTdYWpDtDeHQ,6842
113
115
  lucid/nn/modules/pool.py,sha256=ymVnS2NZjh08Tw0VeOfkB6AVrMeLmCKvgxkmEO3KUuw,5044
114
- lucid/nn/modules/rnn.py,sha256=L2rqFRcdr0U33YFeVvthDwDFIE98PrO-OjFiX9IzlIs,21098
115
- lucid/nn/modules/sparse.py,sha256=EpjiviED2nI55wUjh1twFwa4Lvlrzw0TR6lpCDGeSbo,1147
116
+ lucid/nn/modules/rnn.py,sha256=y_dfvs-2PabKzug9jxMn8o4ir0KfMg64nOS2UKScBZY,24855
117
+ lucid/nn/modules/sparse.py,sha256=C6Kz6Vhe9ko0Ym6JJHOH3HKdNFZJ_xTt2KSRqITXGl8,1620
116
118
  lucid/nn/modules/transformer.py,sha256=z56emF_eX18pxRELjfmmsY-7Bn9h2yjIdxCaxs6YDwA,11246
117
119
  lucid/nn/modules/vision.py,sha256=8xYasT7TNj4NXwMwwJIw1nbV1paeWEFg_ZohXn9kZBg,1579
118
120
  lucid/nn/utils/__init__.py,sha256=ynHrPi9SPdRRXhGjghG42FRBcEiVN8Hb_04XHBZqy_o,46
@@ -136,8 +138,8 @@ lucid/visual/__init__.py,sha256=tRgyNHzKWA8cp-a_GV586Bs0yJUN5ZTmKgnUhscutHQ,23
136
138
  lucid/visual/mermaid.py,sha256=m0X0kkdLuCxEzKmXSy3zplUaa3Gov8RRonKyHiEvfHE,32738
137
139
  lucid/weights/__init__.py,sha256=z1AikA3rOEeckWGkYWlcZkxNlJo9Xwa39PL6ly3hWnc,8801
138
140
  lucid/weights/__init__.pyi,sha256=lFonYC3cUx2Idolf3AEPnjFcyqcn3UDU84oJlZafqLY,3013
139
- lucid_dl-2.12.0.dist-info/licenses/LICENSE,sha256=vxRFYnVD1IeYtsvw-KmoElfqrjxKHv1h9YTvsG54loQ,1065
140
- lucid_dl-2.12.0.dist-info/METADATA,sha256=Y7doYNmgXQugwLzkYsJBv4Jzw1g9ZMsIxXYofaCmdAc,11679
141
- lucid_dl-2.12.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
142
- lucid_dl-2.12.0.dist-info/top_level.txt,sha256=uzP_qBx9iNWIHKJRlElYcBLYVqMpdm9Q1Ma63QPYbFc,6
143
- lucid_dl-2.12.0.dist-info/RECORD,,
141
+ lucid_dl-2.12.1.dist-info/licenses/LICENSE,sha256=vxRFYnVD1IeYtsvw-KmoElfqrjxKHv1h9YTvsG54loQ,1065
142
+ lucid_dl-2.12.1.dist-info/METADATA,sha256=QsDRFeh22Zlxi7RNDDb9aSM75Ly0lZVBtpX82v34kl8,11817
143
+ lucid_dl-2.12.1.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
144
+ lucid_dl-2.12.1.dist-info/top_level.txt,sha256=uzP_qBx9iNWIHKJRlElYcBLYVqMpdm9Q1Ma63QPYbFc,6
145
+ lucid_dl-2.12.1.dist-info/RECORD,,