rxnn 0.2.1__py3-none-any.whl → 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
rxnn/memory/stm.py
CHANGED
@@ -5,7 +5,7 @@ class ShortTermMemory(nn.Module):
|
|
5
5
|
"""Short-term memory module for the Attention-based Memory System"""
|
6
6
|
|
7
7
|
def __init__(self, num_layers: int, embed_dim: int, stm_size: int, init_type: str = 'normal',
|
8
|
-
is_trainable: bool = False,
|
8
|
+
is_trainable: bool = False, *args, **kwargs):
|
9
9
|
super(ShortTermMemory, self).__init__(*args, **kwargs)
|
10
10
|
self.num_layers = num_layers
|
11
11
|
self.embed_dim = embed_dim
|
@@ -14,10 +14,6 @@ class ShortTermMemory(nn.Module):
|
|
14
14
|
self.is_trainable = is_trainable
|
15
15
|
assert init_type in ['normal', 'standard', 'uniform', 'ones', 'zeros'], \
|
16
16
|
'STM init type must be one of "normal", "standard", "uniform", "ones", "zeros"'
|
17
|
-
|
18
|
-
# Legacy init - temporary option to load old models with not-batched STM (they will be loaded, updated and then the option will be removed)
|
19
|
-
self.legacy_init = legacy_init
|
20
|
-
|
21
17
|
self.init_type = init_type
|
22
18
|
stm = self._init_tensor()
|
23
19
|
if self.is_trainable:
|
@@ -27,8 +23,7 @@ class ShortTermMemory(nn.Module):
|
|
27
23
|
|
28
24
|
def _init_tensor(self, init_type: str = None):
|
29
25
|
init_type = init_type or self.init_type
|
30
|
-
stm_shape = (self.num_layers, self.stm_size, self.embed_dim)
|
31
|
-
if self.legacy_init else (self.num_layers, self.batch_size, self.stm_size, self.embed_dim)
|
26
|
+
stm_shape = (self.num_layers, self.batch_size, self.stm_size, self.embed_dim)
|
32
27
|
if init_type == 'normal':
|
33
28
|
return torch.normal(0, 0.02, stm_shape)
|
34
29
|
elif init_type == 'standard':
|
@@ -40,12 +35,8 @@ class ShortTermMemory(nn.Module):
|
|
40
35
|
else:
|
41
36
|
return torch.zeros(*stm_shape)
|
42
37
|
|
43
|
-
def reset_legacy_(self):
|
44
|
-
self.legacy_init = False
|
45
|
-
self.memory = self._init_tensor()
|
46
|
-
|
47
38
|
def forward(self, layer: int) -> torch.Tensor:
|
48
|
-
return self.memory[layer]
|
39
|
+
return self.memory[layer]
|
49
40
|
|
50
41
|
def update_layer(self, layer: int, new_stm: torch.Tensor):
|
51
42
|
self.memory[layer] = new_stm
|
@@ -7,7 +7,7 @@ rxnn/experimental/moe.py,sha256=jHZ1QhpWiVQOswVpFmuH7b2IUOPf0Uuf-I2Ddwsd7Us,6140
|
|
7
7
|
rxnn/memory/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
8
8
|
rxnn/memory/attention.py,sha256=p-r8DK3iVhNn-JAESVzIXDCG8gk1R_-x5xHclZ5jgb0,1813
|
9
9
|
rxnn/memory/norm.py,sha256=Ofl8Q5NYEF9GQeO0bhM43tkTW91J0y6TSvTAOYMgloM,6278
|
10
|
-
rxnn/memory/stm.py,sha256=
|
10
|
+
rxnn/memory/stm.py,sha256=eSMK5KdupWNf56FcDYprHnjA51EeYBzSKza7tiZxKSc,3618
|
11
11
|
rxnn/rxt/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
12
12
|
rxnn/rxt/models.py,sha256=zNrf6mn-s2vJyauHwNgYm_e-gFI1clmXp_JyCKGQD3E,12083
|
13
13
|
rxnn/training/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -32,7 +32,7 @@ rxnn/transformers/moe.py,sha256=j6jEx6Ip0zttlUZKKn82azxo95lkLZs-H2GLSMD88hY,5859
|
|
32
32
|
rxnn/transformers/positional.py,sha256=1PjcJybUzeQlIKJI4tahAGZcYgCRCL0otxs7mpsNuzM,4410
|
33
33
|
rxnn/transformers/sampler.py,sha256=2dpUQv88ekZa_CMSPLrXvB6X684wxUE2bDVznsi5ACs,17429
|
34
34
|
rxnn/utils.py,sha256=ihb6OTyDtPiocB_lOvnq7eOkjjpCkgs8wxvXUBNQ7mM,996
|
35
|
-
rxnn-0.2.
|
36
|
-
rxnn-0.2.
|
37
|
-
rxnn-0.2.
|
38
|
-
rxnn-0.2.
|
35
|
+
rxnn-0.2.2.dist-info/LICENSE,sha256=C8coDFIUYuOcke4JLPwTqahQUCyXyGq6WOaigOkx8tY,11275
|
36
|
+
rxnn-0.2.2.dist-info/METADATA,sha256=GlH7tyaDt27dzlp7G3CafWLAic8S5dTd-eiYKzDNQlA,25959
|
37
|
+
rxnn-0.2.2.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
38
|
+
rxnn-0.2.2.dist-info/RECORD,,
|
File without changes
|
File without changes
|