openarchx 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. openarchx/__init__.py +11 -0
  2. openarchx/core/tensor.py +179 -0
  3. openarchx/cuda/__init__.py +27 -0
  4. openarchx/cuda/cuda_ops.py +296 -0
  5. openarchx/layers/activations.py +63 -0
  6. openarchx/layers/base.py +40 -0
  7. openarchx/layers/cnn.py +145 -0
  8. openarchx/layers/transformer.py +131 -0
  9. openarchx/nn/__init__.py +26 -0
  10. openarchx/nn/activations.py +127 -0
  11. openarchx/nn/containers.py +174 -0
  12. openarchx/nn/dropout.py +121 -0
  13. openarchx/nn/layers.py +338 -0
  14. openarchx/nn/losses.py +156 -0
  15. openarchx/nn/module.py +18 -0
  16. openarchx/nn/padding.py +120 -0
  17. openarchx/nn/pooling.py +318 -0
  18. openarchx/nn/rnn.py +226 -0
  19. openarchx/nn/transformers.py +187 -0
  20. openarchx/optimizers/adam.py +49 -0
  21. openarchx/optimizers/adaptive.py +63 -0
  22. openarchx/optimizers/base.py +24 -0
  23. openarchx/optimizers/modern.py +98 -0
  24. openarchx/optimizers/optx.py +91 -0
  25. openarchx/optimizers/sgd.py +63 -0
  26. openarchx/quantum/circuit.py +92 -0
  27. openarchx/quantum/gates.py +126 -0
  28. openarchx/utils/__init__.py +50 -0
  29. openarchx/utils/data.py +229 -0
  30. openarchx/utils/huggingface.py +288 -0
  31. openarchx/utils/losses.py +21 -0
  32. openarchx/utils/model_io.py +553 -0
  33. openarchx/utils/pytorch.py +420 -0
  34. openarchx/utils/tensorflow.py +467 -0
  35. openarchx/utils/transforms.py +259 -0
  36. openarchx-0.1.0.dist-info/METADATA +180 -0
  37. openarchx-0.1.0.dist-info/RECORD +43 -0
  38. openarchx-0.1.0.dist-info/WHEEL +5 -0
  39. openarchx-0.1.0.dist-info/licenses/LICENSE +21 -0
  40. openarchx-0.1.0.dist-info/top_level.txt +2 -0
  41. tests/__init__.py +1 -0
  42. tests/test_cuda_ops.py +205 -0
  43. tests/test_integrations.py +236 -0
@@ -0,0 +1,145 @@
1
+ import numpy as np
2
+ from ..core.tensor import Tensor
3
+ from ..nn.module import Module
4
+
5
+ class Conv2d(Module):
6
+ def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0):
7
+ super().__init__()
8
+ self.in_channels = in_channels
9
+ self.out_channels = out_channels
10
+ self.kernel_size = kernel_size if isinstance(kernel_size, tuple) else (kernel_size, kernel_size)
11
+ self.stride = stride if isinstance(stride, tuple) else (stride, stride)
12
+ self.padding = padding if isinstance(padding, tuple) else (padding, padding)
13
+
14
+ # Initialize weights using He initialization
15
+ scale = np.sqrt(2.0 / (in_channels * self.kernel_size[0] * self.kernel_size[1]))
16
+ self.weight = Tensor(
17
+ np.random.normal(0, scale,
18
+ (out_channels, in_channels, *self.kernel_size)),
19
+ requires_grad=True
20
+ )
21
+ self.bias = Tensor(np.zeros(out_channels), requires_grad=True)
22
+
23
+ def _extract_patches(self, x, k_h, k_w, stride_h, stride_w):
24
+ """Extract patches from input tensor efficiently"""
25
+ batch_size, channels, height, width = x.shape
26
+
27
+ # Calculate output dimensions
28
+ out_h = (height - k_h) // stride_h + 1
29
+ out_w = (width - k_w) // stride_w + 1
30
+
31
+ # Initialize patches array
32
+ patches = np.zeros((batch_size, out_h * out_w, channels * k_h * k_w))
33
+
34
+ # Extract patches
35
+ patch_idx = 0
36
+ for i in range(0, height - k_h + 1, stride_h):
37
+ for j in range(0, width - k_w + 1, stride_w):
38
+ # Extract patch for all batches and channels
39
+ patch = x[:, :, i:i+k_h, j:j+k_w]
40
+ # Reshape patch to (batch_size, channels * k_h * k_w)
41
+ patches[:, patch_idx, :] = patch.reshape(batch_size, -1)
42
+ patch_idx += 1
43
+
44
+ return patches, out_h, out_w
45
+
46
+ def forward(self, x):
47
+ batch_size, C, H, W = x.data.shape
48
+ pad_h, pad_w = self.padding
49
+ stride_h, stride_w = self.stride
50
+ k_h, k_w = self.kernel_size
51
+
52
+ # Add padding if needed
53
+ if pad_h > 0 or pad_w > 0:
54
+ x_padded = np.pad(x.data, ((0,0), (0,0), (pad_h,pad_h), (pad_w,pad_w)), mode='constant')
55
+ else:
56
+ x_padded = x.data
57
+
58
+ # Extract patches
59
+ patches, H_out, W_out = self._extract_patches(x_padded, k_h, k_w, stride_h, stride_w)
60
+
61
+ # Reshape weights to [out_channels, in_channels * k_h * k_w]
62
+ w_reshaped = self.weight.data.reshape(self.out_channels, -1)
63
+
64
+ # Compute convolution using matrix multiplication
65
+ out = patches @ w_reshaped.T # [batch_size, H_out * W_out, out_channels]
66
+ out = out.transpose(0, 2, 1).reshape(batch_size, self.out_channels, H_out, W_out)
67
+
68
+ # Add bias
69
+ out += self.bias.data.reshape(1, -1, 1, 1)
70
+
71
+ return Tensor(out, requires_grad=True)
72
+
73
+ class MaxPool2d(Module):
74
+ def __init__(self, kernel_size, stride=None):
75
+ super().__init__()
76
+ self.kernel_size = kernel_size if isinstance(kernel_size, tuple) else (kernel_size, kernel_size)
77
+ self.stride = self.kernel_size if stride is None else (stride if isinstance(stride, tuple) else (stride, stride))
78
+
79
+ def forward(self, x):
80
+ batch_size, C, H, W = x.data.shape
81
+ k_h, k_w = self.kernel_size
82
+ stride_h, stride_w = self.stride
83
+
84
+ # Calculate output dimensions
85
+ H_out = (H - k_h) // stride_h + 1
86
+ W_out = (W - k_w) // stride_w + 1
87
+
88
+ # Initialize output array
89
+ out = np.zeros((batch_size, C, H_out, W_out))
90
+
91
+ # Perform max pooling
92
+ for b in range(batch_size):
93
+ for c in range(C):
94
+ for h in range(H_out):
95
+ for w in range(W_out):
96
+ h_start = h * stride_h
97
+ w_start = w * stride_w
98
+ h_end = h_start + k_h
99
+ w_end = w_start + k_w
100
+
101
+ pool_region = x.data[b, c, h_start:h_end, w_start:w_end]
102
+ out[b, c, h, w] = np.max(pool_region)
103
+
104
+ return Tensor(out, requires_grad=True)
105
+
106
+ class BatchNorm2d(Module):
107
+ def __init__(self, num_features, eps=1e-5, momentum=0.1):
108
+ super().__init__()
109
+ self.num_features = num_features
110
+ self.eps = eps
111
+ self.momentum = momentum
112
+
113
+ # Parameters
114
+ self.gamma = Tensor(np.ones(num_features), requires_grad=True)
115
+ self.beta = Tensor(np.zeros(num_features), requires_grad=True)
116
+
117
+ # Running estimates
118
+ self.running_mean = np.zeros(num_features)
119
+ self.running_var = np.ones(num_features)
120
+
121
+ # Training mode flag
122
+ self.training = True
123
+
124
+ def forward(self, x):
125
+ if self.training:
126
+ # Calculate batch statistics
127
+ batch_mean = x.data.mean(axis=(0,2,3), keepdims=True)
128
+ batch_var = x.data.var(axis=(0,2,3), keepdims=True)
129
+
130
+ # Update running statistics
131
+ self.running_mean = (1 - self.momentum) * self.running_mean + self.momentum * batch_mean.squeeze()
132
+ self.running_var = (1 - self.momentum) * self.running_var + self.momentum * batch_var.squeeze()
133
+
134
+ # Normalize
135
+ x_normalized = (x.data - batch_mean) / np.sqrt(batch_var + self.eps)
136
+ else:
137
+ # Use running statistics
138
+ x_normalized = (x.data - self.running_mean.reshape(1,-1,1,1)) / \
139
+ np.sqrt(self.running_var.reshape(1,-1,1,1) + self.eps)
140
+
141
+ # Apply scale and shift
142
+ out = self.gamma.data.reshape(1,-1,1,1) * x_normalized + \
143
+ self.beta.data.reshape(1,-1,1,1)
144
+
145
+ return Tensor(out, requires_grad=True)
@@ -0,0 +1,131 @@
1
+ import numpy as np
2
+ from ..core.tensor import Tensor
3
+ from ..nn.module import Module
4
+ from .base import Linear
5
+
6
+ def get_positional_encoding(seq_length, d_model):
7
+ """Generate positional encodings for transformer input"""
8
+ position = np.arange(seq_length)[:, np.newaxis]
9
+ div_term = np.exp(np.arange(0, d_model, 2) * -(np.log(10000.0) / d_model))
10
+
11
+ pos_encoding = np.zeros((seq_length, d_model))
12
+ pos_encoding[:, 0::2] = np.sin(position * div_term)
13
+ pos_encoding[:, 1::2] = np.cos(position * div_term)
14
+
15
+ return Tensor(pos_encoding[np.newaxis, :, :]) # Add batch dimension
16
+
17
+ class PositionalEncoding(Module):
18
+ def __init__(self, d_model, max_seq_length=5000):
19
+ super().__init__()
20
+ self.pos_encoding = get_positional_encoding(max_seq_length, d_model)
21
+
22
+ def forward(self, x):
23
+ return x + self.pos_encoding[:, :x.data.shape[1], :]
24
+
25
+ class MultiHeadAttention(Module):
26
+ def __init__(self, embed_dim, num_heads):
27
+ super().__init__()
28
+ assert embed_dim % num_heads == 0, "embed_dim must be divisible by num_heads"
29
+
30
+ self.embed_dim = embed_dim
31
+ self.num_heads = num_heads
32
+ self.head_dim = embed_dim // num_heads
33
+
34
+ self.q_proj = Linear(embed_dim, embed_dim)
35
+ self.k_proj = Linear(embed_dim, embed_dim)
36
+ self.v_proj = Linear(embed_dim, embed_dim)
37
+ self.out_proj = Linear(embed_dim, embed_dim)
38
+
39
+ def split_heads(self, x, batch_size):
40
+ # [batch_size, seq_len, embed_dim] -> [batch_size, seq_len, num_heads, head_dim]
41
+ new_shape = (batch_size, -1, self.num_heads, self.head_dim)
42
+ x = x.reshape(*new_shape)
43
+ # [batch_size, seq_len, num_heads, head_dim] -> [batch_size, num_heads, seq_len, head_dim]
44
+ return x.transpose(0, 2, 1, 3)
45
+
46
+ def merge_heads(self, x, batch_size, seq_len):
47
+ # [batch_size, num_heads, seq_len, head_dim] -> [batch_size, seq_len, num_heads, head_dim]
48
+ x = x.transpose(0, 2, 1, 3)
49
+ # [batch_size, seq_len, num_heads, head_dim] -> [batch_size, seq_len, embed_dim]
50
+ return x.reshape(batch_size, seq_len, self.embed_dim)
51
+
52
+ def forward(self, query, key, value, mask=None):
53
+ batch_size = query.data.shape[0]
54
+ q_len, k_len = query.data.shape[1], key.data.shape[1]
55
+
56
+ # Linear projections and split heads
57
+ q = self.split_heads(self.q_proj.forward(query), batch_size) # [batch, heads, q_len, head_dim]
58
+ k = self.split_heads(self.k_proj.forward(key), batch_size) # [batch, heads, k_len, head_dim]
59
+ v = self.split_heads(self.v_proj.forward(value), batch_size) # [batch, heads, v_len, head_dim]
60
+
61
+ # Scaled dot-product attention
62
+ # [batch, heads, q_len, head_dim] @ [batch, heads, head_dim, k_len]
63
+ scores = (q @ k.transpose(0, 1, 3, 2)) / np.sqrt(self.head_dim)
64
+
65
+ if mask is not None:
66
+ scores.data = scores.data + mask.data * -1e9
67
+
68
+ # Apply softmax and attention
69
+ attn = self._softmax(scores) # [batch, heads, q_len, k_len]
70
+ out = attn @ v # [batch, heads, q_len, head_dim]
71
+
72
+ # Merge heads and project
73
+ out = self.merge_heads(out, batch_size, q_len) # [batch, q_len, embed_dim]
74
+ return self.out_proj.forward(out)
75
+
76
+ def _softmax(self, x):
77
+ exp_x = Tensor(np.exp(x.data - np.max(x.data, axis=-1, keepdims=True)))
78
+ return exp_x / exp_x.sum(axis=-1, keepdims=True)
79
+
80
+ class LayerNorm(Module):
81
+ def __init__(self, normalized_shape, eps=1e-5):
82
+ super().__init__()
83
+ self.eps = eps
84
+ self.gamma = Tensor(np.ones(normalized_shape), requires_grad=True)
85
+ self.beta = Tensor(np.zeros(normalized_shape), requires_grad=True)
86
+
87
+ def forward(self, x):
88
+ mean = x.mean(axis=-1, keepdims=True)
89
+ var = ((x - mean) ** 2).mean(axis=-1, keepdims=True)
90
+ return self.gamma * (x - mean) / (var + self.eps).sqrt() + self.beta
91
+
92
+ def parameters(self):
93
+ return [self.gamma, self.beta]
94
+
95
+ class TransformerEncoderLayer(Module):
96
+ def __init__(self, d_model, nhead, dim_feedforward=2048, dropout=0.1):
97
+ super().__init__()
98
+ self.self_attn = MultiHeadAttention(d_model, nhead)
99
+ self.pos_encoding = PositionalEncoding(d_model)
100
+ self.linear1 = Linear(d_model, dim_feedforward)
101
+ self.linear2 = Linear(dim_feedforward, d_model)
102
+ self.norm1 = LayerNorm(d_model)
103
+ self.norm2 = LayerNorm(d_model)
104
+ self.dropout = dropout
105
+
106
+ def forward(self, src, src_mask=None):
107
+ # Add positional encoding
108
+ src = self.pos_encoding.forward(src)
109
+
110
+ # Multi-head self-attention
111
+ attn_output = self.self_attn.forward(src, src, src, mask=src_mask)
112
+ attn_output = self._dropout(attn_output)
113
+ out1 = self.norm1.forward(src + attn_output)
114
+
115
+ # Position-wise feed-forward network
116
+ ff_output = self.linear1.forward(out1)
117
+ ff_output = self._relu(ff_output)
118
+ ff_output = self._dropout(ff_output)
119
+ ff_output = self.linear2.forward(ff_output)
120
+ ff_output = self._dropout(ff_output)
121
+
122
+ return self.norm2.forward(out1 + ff_output)
123
+
124
+ def _dropout(self, x):
125
+ if self.dropout > 0:
126
+ mask = np.random.binomial(1, 1-self.dropout, x.data.shape)
127
+ return Tensor(x.data * mask / (1-self.dropout))
128
+ return x
129
+
130
+ def _relu(self, x):
131
+ return Tensor(np.maximum(0, x.data))
@@ -0,0 +1,26 @@
1
+ # Core module
2
+ from .module import Module
3
+
4
+ # Activation functions
5
+ from .activations import (
6
+ ReLU, LeakyReLU, Sigmoid, Tanh, GELU, SiLU,
7
+ ELU, SELU, Softmax, LogSoftmax
8
+ )
9
+
10
+ # Core layers
11
+ from .layers import (
12
+ Linear, Conv1d, Conv2d, LayerNorm,
13
+ Embedding
14
+ )
15
+
16
+ # Pooling layers
17
+ from .pooling import (
18
+ MaxPool1d, MaxPool2d, AvgPool1d, AvgPool2d,
19
+ AdaptiveAvgPool2d, AdaptiveMaxPool2d
20
+ )
21
+
22
+ # Transformer components
23
+ from ..layers.transformer import PositionalEncoding
24
+
25
+ # Container modules
26
+ from .containers import Sequential, ModuleList
@@ -0,0 +1,127 @@
1
+ import numpy as np
2
+ from ..core.tensor import Tensor
3
+ from .module import Module
4
+
5
+ class ReLU(Module):
6
+ def forward(self, x):
7
+ return Tensor(np.maximum(0, x.data), requires_grad=True)
8
+
9
+ class LeakyReLU(Module):
10
+ def __init__(self, negative_slope=0.01):
11
+ super().__init__()
12
+ self.negative_slope = negative_slope
13
+
14
+ def forward(self, x):
15
+ return Tensor(np.where(x.data > 0, x.data, self.negative_slope * x.data), requires_grad=True)
16
+
17
+ class PReLU(Module):
18
+ def __init__(self, num_parameters=1, init=0.25):
19
+ super().__init__()
20
+ self.weight = Tensor(np.full(num_parameters, init), requires_grad=True)
21
+
22
+ def forward(self, x):
23
+ return Tensor(np.where(x.data > 0, x.data, self.weight.data * x.data), requires_grad=True)
24
+
25
+ class ELU(Module):
26
+ def __init__(self, alpha=1.0):
27
+ super().__init__()
28
+ self.alpha = alpha
29
+
30
+ def forward(self, x):
31
+ return Tensor(np.where(x.data > 0, x.data, self.alpha * (np.exp(x.data) - 1)), requires_grad=True)
32
+
33
+ class GELU(Module):
34
+ def forward(self, x):
35
+ return Tensor(0.5 * x.data * (1 + np.tanh(np.sqrt(2 / np.pi) * (x.data + 0.044715 * x.data**3))), requires_grad=True)
36
+
37
+ class Sigmoid(Module):
38
+ def forward(self, x):
39
+ return Tensor(1 / (1 + np.exp(-x.data)), requires_grad=True)
40
+
41
+ class Tanh(Module):
42
+ def forward(self, x):
43
+ return Tensor(np.tanh(x.data), requires_grad=True)
44
+
45
+ class Softmax(Module):
46
+ def __init__(self, dim=-1):
47
+ super().__init__()
48
+ self.dim = dim
49
+
50
+ def forward(self, x):
51
+ exp_x = np.exp(x.data - np.max(x.data, axis=self.dim, keepdims=True))
52
+ return Tensor(exp_x / np.sum(exp_x, axis=self.dim, keepdims=True), requires_grad=True)
53
+
54
+ class LogSoftmax(Module):
55
+ def __init__(self, dim=-1):
56
+ super().__init__()
57
+ self.dim = dim
58
+
59
+ def forward(self, x):
60
+ exp_x = np.exp(x.data - np.max(x.data, axis=self.dim, keepdims=True))
61
+ softmax = exp_x / np.sum(exp_x, axis=self.dim, keepdims=True)
62
+ return Tensor(np.log(softmax), requires_grad=True)
63
+
64
+ class SELU(Module):
65
+ def __init__(self):
66
+ super().__init__()
67
+ self.alpha = 1.6732632423543772848170429916717
68
+ self.scale = 1.0507009873554804934193349852946
69
+
70
+ def forward(self, x):
71
+ return Tensor(self.scale * np.where(x.data > 0, x.data,
72
+ self.alpha * (np.exp(x.data) - 1)), requires_grad=True)
73
+
74
+ class Hardtanh(Module):
75
+ def __init__(self, min_val=-1.0, max_val=1.0):
76
+ super().__init__()
77
+ self.min_val = min_val
78
+ self.max_val = max_val
79
+
80
+ def forward(self, x):
81
+ return Tensor(np.clip(x.data, self.min_val, self.max_val), requires_grad=True)
82
+
83
+ class SiLU(Module): # Also known as Swish
84
+ def forward(self, x):
85
+ return Tensor(x.data * (1 / (1 + np.exp(-x.data))), requires_grad=True)
86
+
87
+ class Mish(Module):
88
+ def forward(self, x):
89
+ return Tensor(x.data * np.tanh(np.log(1 + np.exp(x.data))), requires_grad=True)
90
+
91
+ class ActX(Module):
92
+ """
93
+ Advanced activation function that combines multiple activation types with learnable parameters.
94
+ ActX(x) = α * GELU(x) + β * SiLU(x) + γ * tanh(λx)
95
+ where α, β, γ, and λ are learnable parameters
96
+ """
97
+ def __init__(self, num_parameters=1, init_alpha=0.5, init_beta=0.5, init_gamma=0.25, init_lambda=1.0):
98
+ super().__init__()
99
+ self.num_parameters = num_parameters
100
+
101
+ # Initialize learnable parameters
102
+ self.alpha = Tensor(np.full(num_parameters, init_alpha), requires_grad=True)
103
+ self.beta = Tensor(np.full(num_parameters, init_beta), requires_grad=True)
104
+ self.gamma = Tensor(np.full(num_parameters, init_gamma), requires_grad=True)
105
+ self.lambda_param = Tensor(np.full(num_parameters, init_lambda), requires_grad=True)
106
+
107
+ def forward(self, x):
108
+ # GELU component
109
+ gelu = 0.5 * x.data * (1 + np.tanh(np.sqrt(2 / np.pi) * (x.data + 0.044715 * x.data**3)))
110
+
111
+ # SiLU (Swish) component
112
+ silu = x.data * (1 / (1 + np.exp(-x.data)))
113
+
114
+ # Tanh component with learnable frequency
115
+ tanh = np.tanh(self.lambda_param.data.reshape(-1, 1, 1) * x.data)
116
+
117
+ # Combine components with learnable weights
118
+ alpha = self.alpha.data.reshape(-1, 1, 1)
119
+ beta = self.beta.data.reshape(-1, 1, 1)
120
+ gamma = self.gamma.data.reshape(-1, 1, 1)
121
+
122
+ result = alpha * gelu + beta * silu + gamma * tanh
123
+
124
+ return Tensor(result, requires_grad=True)
125
+
126
+ def parameters(self):
127
+ return [self.alpha, self.beta, self.gamma, self.lambda_param]
@@ -0,0 +1,174 @@
1
+ import numpy as np
2
+ from ..core.tensor import Tensor
3
+ from .module import Module
4
+ from collections import OrderedDict
5
+
6
+ class Sequential(Module):
7
+ def __init__(self, *args):
8
+ super().__init__()
9
+ self.modules = []
10
+ for arg in args:
11
+ if isinstance(arg, (list, tuple)):
12
+ self.modules.extend(arg)
13
+ elif isinstance(arg, dict):
14
+ self.modules.extend(arg.values())
15
+ else:
16
+ self.modules.append(arg)
17
+
18
+ def forward(self, x):
19
+ for module in self.modules:
20
+ x = module(x)
21
+ return x
22
+
23
+ def parameters(self):
24
+ params = []
25
+ for module in self.modules:
26
+ params.extend(module.parameters())
27
+ return params
28
+
29
+ class ModuleList(Module):
30
+ def __init__(self, modules=None):
31
+ super().__init__()
32
+ self.modules = []
33
+ if modules is not None:
34
+ self.extend(modules)
35
+
36
+ def __getitem__(self, idx):
37
+ return self.modules[idx]
38
+
39
+ def __setitem__(self, idx, module):
40
+ self.modules[idx] = module
41
+
42
+ def __len__(self):
43
+ return len(self.modules)
44
+
45
+ def append(self, module):
46
+ self.modules.append(module)
47
+
48
+ def extend(self, modules):
49
+ if isinstance(modules, (list, tuple)):
50
+ self.modules.extend(modules)
51
+ else:
52
+ self.modules.extend(list(modules))
53
+
54
+ def parameters(self):
55
+ params = []
56
+ for module in self.modules:
57
+ params.extend(module.parameters())
58
+ return params
59
+
60
+ class ModuleDict(Module):
61
+ def __init__(self, modules=None):
62
+ super().__init__()
63
+ self.modules = OrderedDict()
64
+ if modules is not None:
65
+ self.update(modules)
66
+
67
+ def __getitem__(self, key):
68
+ return self.modules[key]
69
+
70
+ def __setitem__(self, key, module):
71
+ self.modules[key] = module
72
+
73
+ def __delitem__(self, key):
74
+ del self.modules[key]
75
+
76
+ def __len__(self):
77
+ return len(self.modules)
78
+
79
+ def __iter__(self):
80
+ return iter(self.modules)
81
+
82
+ def keys(self):
83
+ return self.modules.keys()
84
+
85
+ def items(self):
86
+ return self.modules.items()
87
+
88
+ def values(self):
89
+ return self.modules.values()
90
+
91
+ def update(self, modules):
92
+ if isinstance(modules, dict):
93
+ self.modules.update(modules)
94
+ else:
95
+ for key, module in modules:
96
+ self.modules[key] = module
97
+
98
+ def parameters(self):
99
+ params = []
100
+ for module in self.modules.values():
101
+ params.extend(module.parameters())
102
+ return params
103
+
104
+ class ParameterList(Module):
105
+ def __init__(self, parameters=None):
106
+ super().__init__()
107
+ self.parameters_list = []
108
+ if parameters is not None:
109
+ self.extend(parameters)
110
+
111
+ def __getitem__(self, idx):
112
+ return self.parameters_list[idx]
113
+
114
+ def __setitem__(self, idx, parameter):
115
+ self.parameters_list[idx] = parameter
116
+
117
+ def __len__(self):
118
+ return len(self.parameters_list)
119
+
120
+ def append(self, parameter):
121
+ if not isinstance(parameter, Tensor):
122
+ parameter = Tensor(parameter, requires_grad=True)
123
+ self.parameters_list.append(parameter)
124
+
125
+ def extend(self, parameters):
126
+ for param in parameters:
127
+ self.append(param)
128
+
129
+ def parameters(self):
130
+ return self.parameters_list
131
+
132
+ class ParameterDict(Module):
133
+ def __init__(self, parameters=None):
134
+ super().__init__()
135
+ self.parameters_dict = OrderedDict()
136
+ if parameters is not None:
137
+ self.update(parameters)
138
+
139
+ def __getitem__(self, key):
140
+ return self.parameters_dict[key]
141
+
142
+ def __setitem__(self, key, parameter):
143
+ if not isinstance(parameter, Tensor):
144
+ parameter = Tensor(parameter, requires_grad=True)
145
+ self.parameters_dict[key] = parameter
146
+
147
+ def __delitem__(self, key):
148
+ del self.parameters_dict[key]
149
+
150
+ def __len__(self):
151
+ return len(self.parameters_dict)
152
+
153
+ def __iter__(self):
154
+ return iter(self.parameters_dict)
155
+
156
+ def keys(self):
157
+ return self.parameters_dict.keys()
158
+
159
+ def items(self):
160
+ return self.parameters_dict.items()
161
+
162
+ def values(self):
163
+ return self.parameters_dict.values()
164
+
165
+ def update(self, parameters):
166
+ if isinstance(parameters, dict):
167
+ for key, param in parameters.items():
168
+ self[key] = param
169
+ else:
170
+ for key, param in parameters:
171
+ self[key] = param
172
+
173
+ def parameters(self):
174
+ return list(self.parameters_dict.values())