openarchx 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openarchx/__init__.py +11 -0
- openarchx/core/tensor.py +179 -0
- openarchx/cuda/__init__.py +27 -0
- openarchx/cuda/cuda_ops.py +296 -0
- openarchx/layers/activations.py +63 -0
- openarchx/layers/base.py +40 -0
- openarchx/layers/cnn.py +145 -0
- openarchx/layers/transformer.py +131 -0
- openarchx/nn/__init__.py +26 -0
- openarchx/nn/activations.py +127 -0
- openarchx/nn/containers.py +174 -0
- openarchx/nn/dropout.py +121 -0
- openarchx/nn/layers.py +338 -0
- openarchx/nn/losses.py +156 -0
- openarchx/nn/module.py +18 -0
- openarchx/nn/padding.py +120 -0
- openarchx/nn/pooling.py +318 -0
- openarchx/nn/rnn.py +226 -0
- openarchx/nn/transformers.py +187 -0
- openarchx/optimizers/adam.py +49 -0
- openarchx/optimizers/adaptive.py +63 -0
- openarchx/optimizers/base.py +24 -0
- openarchx/optimizers/modern.py +98 -0
- openarchx/optimizers/optx.py +91 -0
- openarchx/optimizers/sgd.py +63 -0
- openarchx/quantum/circuit.py +92 -0
- openarchx/quantum/gates.py +126 -0
- openarchx/utils/__init__.py +50 -0
- openarchx/utils/data.py +229 -0
- openarchx/utils/huggingface.py +288 -0
- openarchx/utils/losses.py +21 -0
- openarchx/utils/model_io.py +553 -0
- openarchx/utils/pytorch.py +420 -0
- openarchx/utils/tensorflow.py +467 -0
- openarchx/utils/transforms.py +259 -0
- openarchx-0.1.0.dist-info/METADATA +180 -0
- openarchx-0.1.0.dist-info/RECORD +43 -0
- openarchx-0.1.0.dist-info/WHEEL +5 -0
- openarchx-0.1.0.dist-info/licenses/LICENSE +21 -0
- openarchx-0.1.0.dist-info/top_level.txt +2 -0
- tests/__init__.py +1 -0
- tests/test_cuda_ops.py +205 -0
- tests/test_integrations.py +236 -0
@@ -0,0 +1,126 @@
|
|
1
|
+
import numpy as np
|
2
|
+
from ..core.tensor import Tensor
|
3
|
+
|
4
|
+
# Basic quantum gates as numpy arrays
|
5
|
+
class QuantumGates:
|
6
|
+
@staticmethod
|
7
|
+
def I():
|
8
|
+
return np.array([[1, 0], [0, 1]], dtype=complex)
|
9
|
+
|
10
|
+
@staticmethod
|
11
|
+
def X():
|
12
|
+
return np.array([[0, 1], [1, 0]], dtype=complex)
|
13
|
+
|
14
|
+
@staticmethod
|
15
|
+
def Y():
|
16
|
+
return np.array([[0, -1j], [1j, 0]], dtype=complex)
|
17
|
+
|
18
|
+
@staticmethod
|
19
|
+
def Z():
|
20
|
+
return np.array([[1, 0], [0, -1]], dtype=complex)
|
21
|
+
|
22
|
+
@staticmethod
|
23
|
+
def H():
|
24
|
+
return np.array([[1, 1], [1, -1]], dtype=complex) / np.sqrt(2)
|
25
|
+
|
26
|
+
@staticmethod
|
27
|
+
def CNOT():
|
28
|
+
return np.array([[1, 0, 0, 0],
|
29
|
+
[0, 1, 0, 0],
|
30
|
+
[0, 0, 0, 1],
|
31
|
+
[0, 0, 1, 0]], dtype=complex)
|
32
|
+
|
33
|
+
@staticmethod
|
34
|
+
def RX(theta):
|
35
|
+
return np.array([[np.cos(theta/2), -1j*np.sin(theta/2)],
|
36
|
+
[-1j*np.sin(theta/2), np.cos(theta/2)]], dtype=complex)
|
37
|
+
|
38
|
+
@staticmethod
|
39
|
+
def RY(theta):
|
40
|
+
return np.array([[np.cos(theta/2), -np.sin(theta/2)],
|
41
|
+
[np.sin(theta/2), np.cos(theta/2)]], dtype=complex)
|
42
|
+
|
43
|
+
@staticmethod
|
44
|
+
def RZ(theta):
|
45
|
+
return np.array([[np.exp(-1j*theta/2), 0],
|
46
|
+
[0, np.exp(1j*theta/2)]], dtype=complex)
|
47
|
+
|
48
|
+
class QuantumRegister:
|
49
|
+
def __init__(self, num_qubits):
|
50
|
+
self.num_qubits = num_qubits
|
51
|
+
# Initialize to |0>^⊗n state
|
52
|
+
self.state = np.zeros(2**num_qubits, dtype=complex)
|
53
|
+
self.state[0] = 1.0
|
54
|
+
|
55
|
+
def apply_gate(self, gate, target_qubit):
|
56
|
+
"""Apply a single-qubit gate to the specified target qubit"""
|
57
|
+
n = self.num_qubits
|
58
|
+
|
59
|
+
# Create the full operator using tensor products
|
60
|
+
op = np.array([[1]])
|
61
|
+
for i in range(n):
|
62
|
+
if i == target_qubit:
|
63
|
+
op = np.kron(op, gate)
|
64
|
+
else:
|
65
|
+
op = np.kron(op, QuantumGates.I())
|
66
|
+
|
67
|
+
self.state = op @ self.state
|
68
|
+
|
69
|
+
def apply_controlled_gate(self, gate, control_qubit, target_qubit):
|
70
|
+
"""Apply a controlled gate with specified control and target qubits"""
|
71
|
+
if abs(control_qubit - target_qubit) == 1:
|
72
|
+
# Adjacent qubits can use the gate directly
|
73
|
+
n = self.num_qubits
|
74
|
+
op = np.array([[1]])
|
75
|
+
for i in range(n):
|
76
|
+
if i == min(control_qubit, target_qubit):
|
77
|
+
op = np.kron(op, gate)
|
78
|
+
i += 1 # Skip next qubit as it's part of the controlled operation
|
79
|
+
else:
|
80
|
+
op = np.kron(op, QuantumGates.I())
|
81
|
+
else:
|
82
|
+
# Non-adjacent qubits need swap operations
|
83
|
+
# This is a simplified implementation
|
84
|
+
op = self._create_non_adjacent_controlled_op(gate, control_qubit, target_qubit)
|
85
|
+
|
86
|
+
self.state = op @ self.state
|
87
|
+
|
88
|
+
def measure(self, qubit=None):
|
89
|
+
"""Measure the specified qubit or the whole register"""
|
90
|
+
if qubit is None:
|
91
|
+
# Measure all qubits
|
92
|
+
probs = np.abs(self.state) ** 2
|
93
|
+
result = np.random.choice(len(self.state), p=probs)
|
94
|
+
self.state = np.zeros_like(self.state)
|
95
|
+
self.state[result] = 1.0
|
96
|
+
return bin(result)[2:].zfill(self.num_qubits)
|
97
|
+
else:
|
98
|
+
# Measure single qubit
|
99
|
+
# Project and renormalize the state
|
100
|
+
raise NotImplementedError("Single qubit measurement not implemented yet")
|
101
|
+
|
102
|
+
def _create_non_adjacent_controlled_op(self, gate, control, target):
|
103
|
+
"""Helper method to create controlled operations between non-adjacent qubits"""
|
104
|
+
# This is a simplified implementation
|
105
|
+
n = self.num_qubits
|
106
|
+
dim = 2**n
|
107
|
+
op = np.eye(dim, dtype=complex)
|
108
|
+
|
109
|
+
# Create the controlled operation
|
110
|
+
ctrl_mask = 1 << control
|
111
|
+
target_mask = 1 << target
|
112
|
+
|
113
|
+
for i in range(dim):
|
114
|
+
if i & ctrl_mask: # If control qubit is 1
|
115
|
+
# Apply gate to target qubit
|
116
|
+
i_target_0 = i & ~target_mask # Target bit set to 0
|
117
|
+
i_target_1 = i | target_mask # Target bit set to 1
|
118
|
+
|
119
|
+
if i & target_mask == 0: # Target is 0
|
120
|
+
op[i, i] = gate[0, 0]
|
121
|
+
op[i, i_target_1] = gate[0, 1]
|
122
|
+
else: # Target is 1
|
123
|
+
op[i, i_target_0] = gate[1, 0]
|
124
|
+
op[i, i] = gate[1, 1]
|
125
|
+
|
126
|
+
return op
|
@@ -0,0 +1,50 @@
|
|
1
|
+
from .data import Dataset, DataLoader, DatasetFactory
|
2
|
+
|
3
|
+
# Optional integrations - handle gracefully if not available
|
4
|
+
try:
|
5
|
+
from .pytorch import (get_pytorch_model_adapter, convert_to_pytorch_dataset,
|
6
|
+
convert_from_pytorch_dataset, convert_pytorch_model)
|
7
|
+
except ImportError:
|
8
|
+
# PyTorch integration not available
|
9
|
+
pass
|
10
|
+
|
11
|
+
try:
|
12
|
+
from .tensorflow import (get_tensorflow_model_adapter, convert_to_tensorflow_dataset,
|
13
|
+
convert_from_tensorflow_dataset, convert_tensorflow_model)
|
14
|
+
except ImportError:
|
15
|
+
# TensorFlow integration not available
|
16
|
+
pass
|
17
|
+
|
18
|
+
try:
|
19
|
+
from .huggingface import (get_huggingface_model, get_huggingface_dataset,
|
20
|
+
get_huggingface_tokenizer)
|
21
|
+
except ImportError:
|
22
|
+
# Hugging Face integration not available
|
23
|
+
pass
|
24
|
+
|
25
|
+
# Model I/O utilities
|
26
|
+
from .model_io import (save_model, load_model, convert_from_pytorch,
|
27
|
+
convert_from_tensorflow, convert_to_pytorch,
|
28
|
+
convert_to_tensorflow, register_model, get_model_class,
|
29
|
+
list_registered_models)
|
30
|
+
|
31
|
+
__all__ = [
|
32
|
+
# Data utilities
|
33
|
+
'Dataset', 'DataLoader', 'DatasetFactory',
|
34
|
+
|
35
|
+
# PyTorch integration
|
36
|
+
'get_pytorch_model_adapter', 'convert_to_pytorch_dataset',
|
37
|
+
'convert_from_pytorch_dataset', 'convert_pytorch_model',
|
38
|
+
|
39
|
+
# TensorFlow integration
|
40
|
+
'get_tensorflow_model_adapter', 'convert_to_tensorflow_dataset',
|
41
|
+
'convert_from_tensorflow_dataset', 'convert_tensorflow_model',
|
42
|
+
|
43
|
+
# Hugging Face integration
|
44
|
+
'get_huggingface_model', 'get_huggingface_dataset', 'get_huggingface_tokenizer',
|
45
|
+
|
46
|
+
# Model I/O utilities
|
47
|
+
'save_model', 'load_model', 'convert_from_pytorch', 'convert_from_tensorflow',
|
48
|
+
'convert_to_pytorch', 'convert_to_tensorflow', 'register_model',
|
49
|
+
'get_model_class', 'list_registered_models'
|
50
|
+
]
|
openarchx/utils/data.py
ADDED
@@ -0,0 +1,229 @@
|
|
1
|
+
import numpy as np
|
2
|
+
from ..core.tensor import Tensor
|
3
|
+
import importlib.util
|
4
|
+
|
5
|
+
class Dataset:
|
6
|
+
"""Base dataset class for OpenArchX framework."""
|
7
|
+
|
8
|
+
def __init__(self, data=None, targets=None):
|
9
|
+
self.data = data
|
10
|
+
self.targets = targets
|
11
|
+
self.length = len(data) if data is not None else 0
|
12
|
+
|
13
|
+
def __getitem__(self, index):
|
14
|
+
if self.data is None or self.targets is None:
|
15
|
+
raise NotImplementedError("Dataset must implement __getitem__ or provide data and targets")
|
16
|
+
return self.data[index], self.targets[index]
|
17
|
+
|
18
|
+
def __len__(self):
|
19
|
+
return self.length
|
20
|
+
|
21
|
+
|
22
|
+
class DataLoader:
|
23
|
+
"""DataLoader for iterating over a dataset in batches with optional shuffling."""
|
24
|
+
|
25
|
+
def __init__(self, dataset, batch_size=32, shuffle=False, drop_last=False):
|
26
|
+
self.dataset = dataset
|
27
|
+
self.batch_size = batch_size
|
28
|
+
self.shuffle = shuffle
|
29
|
+
self.drop_last = drop_last
|
30
|
+
self.indices = np.arange(len(dataset))
|
31
|
+
|
32
|
+
def __iter__(self):
|
33
|
+
if self.shuffle:
|
34
|
+
np.random.shuffle(self.indices)
|
35
|
+
|
36
|
+
for start_idx in range(0, len(self.dataset), self.batch_size):
|
37
|
+
batch_indices = self.indices[start_idx:start_idx + self.batch_size]
|
38
|
+
|
39
|
+
if len(batch_indices) < self.batch_size and self.drop_last:
|
40
|
+
continue
|
41
|
+
|
42
|
+
batch = [self.dataset[i] for i in batch_indices]
|
43
|
+
|
44
|
+
# Transpose the batch to get separate data and target batches
|
45
|
+
batch_data, batch_targets = zip(*batch)
|
46
|
+
|
47
|
+
# Convert to arrays
|
48
|
+
batch_data = np.array(batch_data)
|
49
|
+
batch_targets = np.array(batch_targets)
|
50
|
+
|
51
|
+
# Convert to Tensors
|
52
|
+
yield Tensor(batch_data), Tensor(batch_targets)
|
53
|
+
|
54
|
+
def __len__(self):
|
55
|
+
if self.drop_last:
|
56
|
+
return len(self.dataset) // self.batch_size
|
57
|
+
return (len(self.dataset) + self.batch_size - 1) // self.batch_size
|
58
|
+
|
59
|
+
|
60
|
+
# ===== ADAPTERS FOR EXTERNAL FRAMEWORKS =====
|
61
|
+
# These are optional conversion utilities that don't affect core functionality
|
62
|
+
|
63
|
+
class TorchDatasetAdapter(Dataset):
|
64
|
+
"""Adapter for PyTorch datasets to OpenArchX framework."""
|
65
|
+
|
66
|
+
def __init__(self, torch_dataset):
|
67
|
+
"""
|
68
|
+
Args:
|
69
|
+
torch_dataset: A PyTorch Dataset object
|
70
|
+
"""
|
71
|
+
super().__init__()
|
72
|
+
self.torch_dataset = torch_dataset
|
73
|
+
self.length = len(torch_dataset)
|
74
|
+
|
75
|
+
def __getitem__(self, index):
|
76
|
+
data, target = self.torch_dataset[index]
|
77
|
+
|
78
|
+
# Convert torch tensors to numpy arrays if needed
|
79
|
+
if hasattr(data, 'numpy'):
|
80
|
+
data = data.numpy()
|
81
|
+
if hasattr(target, 'numpy'):
|
82
|
+
target = target.numpy()
|
83
|
+
|
84
|
+
return data, target
|
85
|
+
|
86
|
+
def __len__(self):
|
87
|
+
return self.length
|
88
|
+
|
89
|
+
|
90
|
+
class TFDatasetAdapter(Dataset):
|
91
|
+
"""Adapter for TensorFlow datasets to OpenArchX framework."""
|
92
|
+
|
93
|
+
def __init__(self, tf_dataset, x_key='x', y_key='y'):
|
94
|
+
"""
|
95
|
+
Args:
|
96
|
+
tf_dataset: A TensorFlow Dataset object
|
97
|
+
x_key: Key for input features in TF dataset elements
|
98
|
+
y_key: Key for target values in TF dataset elements
|
99
|
+
"""
|
100
|
+
super().__init__()
|
101
|
+
self.tf_dataset = tf_dataset
|
102
|
+
self.x_key = x_key
|
103
|
+
self.y_key = y_key
|
104
|
+
|
105
|
+
# Count samples (might be expensive for large datasets)
|
106
|
+
try:
|
107
|
+
self.length = tf_dataset.cardinality().numpy()
|
108
|
+
if self.length < 0: # If cardinality is unknown
|
109
|
+
self.length = sum(1 for _ in tf_dataset)
|
110
|
+
except:
|
111
|
+
# Fallback method
|
112
|
+
self.length = sum(1 for _ in tf_dataset)
|
113
|
+
|
114
|
+
# Create iterator
|
115
|
+
self.iterator = iter(tf_dataset)
|
116
|
+
|
117
|
+
def __getitem__(self, index):
|
118
|
+
# TensorFlow datasets don't support random indexing
|
119
|
+
# This implementation will iterate to the requested index
|
120
|
+
# Warning: This is inefficient for large indices or repeated access
|
121
|
+
# Better to use batching through the DataLoader
|
122
|
+
|
123
|
+
# Reset iterator if needed
|
124
|
+
if index == 0 or not hasattr(self, 'current_index') or index < self.current_index:
|
125
|
+
self.iterator = iter(self.tf_dataset)
|
126
|
+
self.current_index = 0
|
127
|
+
|
128
|
+
# Iterate until we reach the desired index
|
129
|
+
while self.current_index < index:
|
130
|
+
next(self.iterator)
|
131
|
+
self.current_index += 1
|
132
|
+
|
133
|
+
# Get the item at the current index
|
134
|
+
item = next(self.iterator)
|
135
|
+
self.current_index += 1
|
136
|
+
|
137
|
+
# Extract features and target
|
138
|
+
if isinstance(item, dict):
|
139
|
+
data = item[self.x_key].numpy()
|
140
|
+
target = item[self.y_key].numpy()
|
141
|
+
else:
|
142
|
+
data = item[0].numpy()
|
143
|
+
target = item[1].numpy()
|
144
|
+
|
145
|
+
return data, target
|
146
|
+
|
147
|
+
def __len__(self):
|
148
|
+
return self.length
|
149
|
+
|
150
|
+
|
151
|
+
class HuggingFaceDatasetAdapter(Dataset):
|
152
|
+
"""Adapter for Hugging Face datasets to OpenArchX framework."""
|
153
|
+
|
154
|
+
def __init__(self, hf_dataset, input_cols=None, target_col=None, transform=None):
|
155
|
+
"""
|
156
|
+
Args:
|
157
|
+
hf_dataset: A Hugging Face Dataset object
|
158
|
+
input_cols: Columns to use as input features
|
159
|
+
target_col: Column to use as target
|
160
|
+
transform: Optional transform to apply to inputs
|
161
|
+
"""
|
162
|
+
super().__init__()
|
163
|
+
self.hf_dataset = hf_dataset
|
164
|
+
self.input_cols = input_cols
|
165
|
+
self.target_col = target_col
|
166
|
+
self.transform = transform
|
167
|
+
self.length = len(hf_dataset)
|
168
|
+
|
169
|
+
def __getitem__(self, index):
|
170
|
+
item = self.hf_dataset[index]
|
171
|
+
|
172
|
+
# Extract inputs
|
173
|
+
if self.input_cols:
|
174
|
+
if isinstance(self.input_cols, list):
|
175
|
+
data = np.array([item[col] for col in self.input_cols])
|
176
|
+
else:
|
177
|
+
data = np.array(item[self.input_cols])
|
178
|
+
else:
|
179
|
+
# Default: use all columns except target as input
|
180
|
+
data = np.array([v for k, v in item.items() if k != self.target_col])
|
181
|
+
|
182
|
+
# Extract target
|
183
|
+
if self.target_col:
|
184
|
+
target = np.array(item[self.target_col])
|
185
|
+
else:
|
186
|
+
# Default behavior if no target specified
|
187
|
+
target = np.zeros(1) # For unsupervised learning
|
188
|
+
|
189
|
+
# Apply transform if specified
|
190
|
+
if self.transform:
|
191
|
+
data = self.transform(data)
|
192
|
+
|
193
|
+
return data, target
|
194
|
+
|
195
|
+
def __len__(self):
|
196
|
+
return self.length
|
197
|
+
|
198
|
+
|
199
|
+
class DatasetFactory:
|
200
|
+
"""Factory for creating datasets from various sources."""
|
201
|
+
|
202
|
+
@staticmethod
|
203
|
+
def from_numpy(data, targets):
|
204
|
+
"""Create a dataset from NumPy arrays."""
|
205
|
+
return Dataset(data, targets)
|
206
|
+
|
207
|
+
@staticmethod
|
208
|
+
def from_torch(torch_dataset):
|
209
|
+
"""Create a dataset from a PyTorch dataset."""
|
210
|
+
# Check if PyTorch is available
|
211
|
+
if importlib.util.find_spec("torch") is None:
|
212
|
+
raise ImportError("PyTorch is required for this adapter. Please install it with 'pip install torch'")
|
213
|
+
return TorchDatasetAdapter(torch_dataset)
|
214
|
+
|
215
|
+
@staticmethod
|
216
|
+
def from_tensorflow(tf_dataset, x_key='x', y_key='y'):
|
217
|
+
"""Create a dataset from a TensorFlow dataset."""
|
218
|
+
# Check if TensorFlow is available
|
219
|
+
if importlib.util.find_spec("tensorflow") is None:
|
220
|
+
raise ImportError("TensorFlow is required for this adapter. Please install it with 'pip install tensorflow'")
|
221
|
+
return TFDatasetAdapter(tf_dataset, x_key, y_key)
|
222
|
+
|
223
|
+
@staticmethod
|
224
|
+
def from_huggingface(hf_dataset, input_cols=None, target_col=None, transform=None):
|
225
|
+
"""Create a dataset from a Hugging Face dataset."""
|
226
|
+
# Check if datasets is available
|
227
|
+
if importlib.util.find_spec("datasets") is None:
|
228
|
+
raise ImportError("Hugging Face datasets is required for this adapter. Please install it with 'pip install datasets'")
|
229
|
+
return HuggingFaceDatasetAdapter(hf_dataset, input_cols, target_col, transform)
|
@@ -0,0 +1,288 @@
|
|
1
|
+
"""
|
2
|
+
Hugging Face Integration Utilities for OpenArchX.
|
3
|
+
|
4
|
+
This module provides conversion and adapter utilities for using Hugging Face models
|
5
|
+
and datasets with OpenArchX. These utilities are completely optional and do not
|
6
|
+
affect OpenArchX's core functionality, which remains independent from external libraries.
|
7
|
+
"""
|
8
|
+
|
9
|
+
import numpy as np
|
10
|
+
import importlib.util
|
11
|
+
from ..core.tensor import Tensor
|
12
|
+
|
13
|
+
|
14
|
+
class HuggingFaceModelAdapter:
|
15
|
+
"""Adapter for using Hugging Face models with OpenArchX."""
|
16
|
+
|
17
|
+
def __init__(self, model_name_or_path, task="text-classification", **model_kwargs):
|
18
|
+
"""
|
19
|
+
Initialize a Hugging Face model adapter.
|
20
|
+
|
21
|
+
Args:
|
22
|
+
model_name_or_path: Name or path of the pretrained model.
|
23
|
+
task: The task the model should perform (e.g., "text-classification", "token-classification").
|
24
|
+
**model_kwargs: Additional arguments to pass to the model constructor.
|
25
|
+
"""
|
26
|
+
# Check if transformers is installed
|
27
|
+
if importlib.util.find_spec("transformers") is None:
|
28
|
+
raise ImportError("Hugging Face transformers library is required. Install with 'pip install transformers'")
|
29
|
+
|
30
|
+
from transformers import AutoModel, AutoModelForSequenceClassification, AutoTokenizer
|
31
|
+
|
32
|
+
self.task = task
|
33
|
+
self.tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
|
34
|
+
|
35
|
+
# Load appropriate model for the task
|
36
|
+
if task == "text-classification" or task == "sequence-classification":
|
37
|
+
self.model = AutoModelForSequenceClassification.from_pretrained(model_name_or_path, **model_kwargs)
|
38
|
+
elif task == "feature-extraction" or task == "embeddings":
|
39
|
+
self.model = AutoModel.from_pretrained(model_name_or_path, **model_kwargs)
|
40
|
+
else:
|
41
|
+
# For other tasks, try generic loading
|
42
|
+
from transformers import AutoModelForPreTraining
|
43
|
+
self.model = AutoModelForPreTraining.from_pretrained(model_name_or_path, **model_kwargs)
|
44
|
+
|
45
|
+
def __call__(self, texts, **kwargs):
|
46
|
+
"""
|
47
|
+
Process text through the model.
|
48
|
+
|
49
|
+
Args:
|
50
|
+
texts: Input text or list of texts.
|
51
|
+
**kwargs: Additional arguments to pass to the tokenizer.
|
52
|
+
|
53
|
+
Returns:
|
54
|
+
OpenArchX Tensor containing the model output.
|
55
|
+
"""
|
56
|
+
# Prepare inputs
|
57
|
+
if isinstance(texts, str):
|
58
|
+
texts = [texts]
|
59
|
+
|
60
|
+
# Tokenize
|
61
|
+
inputs = self.tokenizer(texts, return_tensors="pt", padding=True, truncation=True, **kwargs)
|
62
|
+
|
63
|
+
# Get model output
|
64
|
+
with torch_no_grad():
|
65
|
+
outputs = self.model(**inputs)
|
66
|
+
|
67
|
+
# Process outputs based on task
|
68
|
+
if self.task == "text-classification" or self.task == "sequence-classification":
|
69
|
+
# Get logits
|
70
|
+
result = outputs.logits.detach().cpu().numpy()
|
71
|
+
elif self.task == "feature-extraction" or self.task == "embeddings":
|
72
|
+
# Get last hidden state (embeddings)
|
73
|
+
result = outputs.last_hidden_state.detach().cpu().numpy()
|
74
|
+
else:
|
75
|
+
# Default to returning all outputs as a dict of numpy arrays
|
76
|
+
result = {k: v.detach().cpu().numpy() for k, v in outputs.items() if hasattr(v, 'detach')}
|
77
|
+
|
78
|
+
# Convert to Tensor
|
79
|
+
if isinstance(result, dict):
|
80
|
+
return {k: Tensor(v) for k, v in result.items()}
|
81
|
+
return Tensor(result)
|
82
|
+
|
83
|
+
|
84
|
+
class HuggingFaceDatasetLoader:
|
85
|
+
"""Loader for Hugging Face datasets."""
|
86
|
+
|
87
|
+
def __init__(self, dataset_name=None, dataset_path=None, split="train", **dataset_kwargs):
|
88
|
+
"""
|
89
|
+
Initialize a Hugging Face dataset loader.
|
90
|
+
|
91
|
+
Args:
|
92
|
+
dataset_name: Name of the dataset in the Hugging Face Hub.
|
93
|
+
dataset_path: Path to a local dataset.
|
94
|
+
split: Dataset split to use ("train", "validation", "test").
|
95
|
+
**dataset_kwargs: Additional arguments to pass to the dataset loader.
|
96
|
+
"""
|
97
|
+
# Check if datasets is installed
|
98
|
+
if importlib.util.find_spec("datasets") is None:
|
99
|
+
raise ImportError("Hugging Face datasets library is required. Install with 'pip install datasets'")
|
100
|
+
|
101
|
+
from datasets import load_dataset
|
102
|
+
|
103
|
+
if dataset_name is None and dataset_path is None:
|
104
|
+
raise ValueError("Either dataset_name or dataset_path must be provided")
|
105
|
+
|
106
|
+
# Load dataset
|
107
|
+
if dataset_path is not None:
|
108
|
+
self.dataset = load_dataset(dataset_path, split=split, **dataset_kwargs)
|
109
|
+
else:
|
110
|
+
self.dataset = load_dataset(dataset_name, split=split, **dataset_kwargs)
|
111
|
+
|
112
|
+
def to_openarchx_dataset(self, input_cols=None, target_col=None, transform=None):
|
113
|
+
"""
|
114
|
+
Convert the Hugging Face dataset to an OpenArchX Dataset.
|
115
|
+
|
116
|
+
Args:
|
117
|
+
input_cols: Column(s) to use as input features.
|
118
|
+
target_col: Column to use as target.
|
119
|
+
transform: Transform to apply to the input features.
|
120
|
+
|
121
|
+
Returns:
|
122
|
+
An OpenArchX Dataset.
|
123
|
+
"""
|
124
|
+
from .data import HuggingFaceDatasetAdapter
|
125
|
+
return HuggingFaceDatasetAdapter(self.dataset, input_cols, target_col, transform)
|
126
|
+
|
127
|
+
|
128
|
+
def torch_no_grad():
|
129
|
+
"""Context manager to disable gradient calculation in PyTorch."""
|
130
|
+
try:
|
131
|
+
import torch
|
132
|
+
return torch.no_grad()
|
133
|
+
except ImportError:
|
134
|
+
# Fallback for when torch is not available
|
135
|
+
from contextlib import contextmanager
|
136
|
+
|
137
|
+
@contextmanager
|
138
|
+
def dummy_context():
|
139
|
+
yield
|
140
|
+
|
141
|
+
return dummy_context()
|
142
|
+
|
143
|
+
|
144
|
+
class HuggingFaceTokenizerAdapter:
|
145
|
+
"""Adapter for using Hugging Face tokenizers with OpenArchX."""
|
146
|
+
|
147
|
+
def __init__(self, tokenizer_name_or_path, **tokenizer_kwargs):
|
148
|
+
"""
|
149
|
+
Initialize a Hugging Face tokenizer adapter.
|
150
|
+
|
151
|
+
Args:
|
152
|
+
tokenizer_name_or_path: Name or path of the pretrained tokenizer.
|
153
|
+
**tokenizer_kwargs: Additional arguments to pass to the tokenizer constructor.
|
154
|
+
"""
|
155
|
+
# Check if transformers is installed
|
156
|
+
if importlib.util.find_spec("transformers") is None:
|
157
|
+
raise ImportError("Hugging Face transformers library is required. Install with 'pip install transformers'")
|
158
|
+
|
159
|
+
from transformers import AutoTokenizer
|
160
|
+
self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_name_or_path, **tokenizer_kwargs)
|
161
|
+
|
162
|
+
def __call__(self, texts, return_tensors="np", **kwargs):
|
163
|
+
"""
|
164
|
+
Tokenize texts.
|
165
|
+
|
166
|
+
Args:
|
167
|
+
texts: Input text or list of texts.
|
168
|
+
return_tensors: Output tensor format ("np" for numpy, "pt" for PyTorch, "tf" for TensorFlow).
|
169
|
+
**kwargs: Additional arguments to pass to the tokenizer.
|
170
|
+
|
171
|
+
Returns:
|
172
|
+
Tokenized inputs, converted to OpenArchX Tensors if return_tensors="np".
|
173
|
+
"""
|
174
|
+
# Call the tokenizer
|
175
|
+
outputs = self.tokenizer(texts, return_tensors=return_tensors, **kwargs)
|
176
|
+
|
177
|
+
# Convert to OpenArchX Tensors if requested
|
178
|
+
if return_tensors == "np":
|
179
|
+
return {k: Tensor(v) if isinstance(v, np.ndarray) else v for k, v in outputs.items()}
|
180
|
+
return outputs
|
181
|
+
|
182
|
+
|
183
|
+
class ModelWeightsExtractor:
|
184
|
+
"""Utility to extract weights from a Hugging Face model into native OpenArchX format."""
|
185
|
+
|
186
|
+
@staticmethod
|
187
|
+
def extract_transformer_weights(hf_model, layer_mapping=None):
|
188
|
+
"""
|
189
|
+
Extract weights from a Hugging Face transformer model into a format
|
190
|
+
usable by OpenArchX native models.
|
191
|
+
|
192
|
+
Args:
|
193
|
+
hf_model: A Hugging Face transformer model
|
194
|
+
layer_mapping: Optional dictionary mapping HF layer names to OpenArchX layer names
|
195
|
+
|
196
|
+
Returns:
|
197
|
+
Dictionary of weights that can be loaded into a native OpenArchX model
|
198
|
+
"""
|
199
|
+
# Default layer mapping if none provided
|
200
|
+
if layer_mapping is None:
|
201
|
+
layer_mapping = {
|
202
|
+
"embeddings": "embeddings",
|
203
|
+
"attention": "attention",
|
204
|
+
"intermediate": "intermediate",
|
205
|
+
"output": "output",
|
206
|
+
"layernorm": "layer_norm",
|
207
|
+
}
|
208
|
+
|
209
|
+
# Get state dict from HF model
|
210
|
+
state_dict = hf_model.state_dict()
|
211
|
+
|
212
|
+
# Convert to numpy arrays
|
213
|
+
numpy_weights = {k: v.detach().cpu().numpy() for k, v in state_dict.items()}
|
214
|
+
|
215
|
+
# Transform to OpenArchX format based on mapping
|
216
|
+
openarchx_weights = {}
|
217
|
+
|
218
|
+
for hf_name, param in numpy_weights.items():
|
219
|
+
# Map the parameter name to OpenArchX format
|
220
|
+
openarchx_name = hf_name
|
221
|
+
for hf_pattern, ox_pattern in layer_mapping.items():
|
222
|
+
if hf_pattern in hf_name:
|
223
|
+
openarchx_name = hf_name.replace(hf_pattern, ox_pattern)
|
224
|
+
break
|
225
|
+
|
226
|
+
openarchx_weights[openarchx_name] = Tensor(param)
|
227
|
+
|
228
|
+
return openarchx_weights
|
229
|
+
|
230
|
+
|
231
|
+
# Convenience functions
|
232
|
+
|
233
|
+
def get_huggingface_model(model_name, task="text-classification", **kwargs):
|
234
|
+
"""
|
235
|
+
Helper function to get a Hugging Face model adapter.
|
236
|
+
|
237
|
+
Args:
|
238
|
+
model_name: Name or path of the pretrained model.
|
239
|
+
task: The task the model should perform.
|
240
|
+
**kwargs: Additional arguments to pass to the model constructor.
|
241
|
+
|
242
|
+
Returns:
|
243
|
+
A HuggingFaceModelAdapter instance.
|
244
|
+
"""
|
245
|
+
return HuggingFaceModelAdapter(model_name, task=task, **kwargs)
|
246
|
+
|
247
|
+
|
248
|
+
def get_huggingface_dataset(dataset_name=None, dataset_path=None, **kwargs):
|
249
|
+
"""
|
250
|
+
Helper function to get a Hugging Face dataset loader.
|
251
|
+
|
252
|
+
Args:
|
253
|
+
dataset_name: Name of the dataset in the Hugging Face Hub.
|
254
|
+
dataset_path: Path to a local dataset.
|
255
|
+
**kwargs: Additional arguments to pass to the dataset loader.
|
256
|
+
|
257
|
+
Returns:
|
258
|
+
A HuggingFaceDatasetLoader instance.
|
259
|
+
"""
|
260
|
+
return HuggingFaceDatasetLoader(dataset_name=dataset_name, dataset_path=dataset_path, **kwargs)
|
261
|
+
|
262
|
+
|
263
|
+
def get_huggingface_tokenizer(tokenizer_name, **kwargs):
|
264
|
+
"""
|
265
|
+
Helper function to get a Hugging Face tokenizer adapter.
|
266
|
+
|
267
|
+
Args:
|
268
|
+
tokenizer_name: Name or path of the pretrained tokenizer.
|
269
|
+
**kwargs: Additional arguments to pass to the tokenizer constructor.
|
270
|
+
|
271
|
+
Returns:
|
272
|
+
A HuggingFaceTokenizerAdapter instance.
|
273
|
+
"""
|
274
|
+
return HuggingFaceTokenizerAdapter(tokenizer_name, **kwargs)
|
275
|
+
|
276
|
+
|
277
|
+
def extract_model_weights(hf_model, layer_mapping=None):
|
278
|
+
"""
|
279
|
+
Extract weights from a Hugging Face model for use in a native OpenArchX model.
|
280
|
+
|
281
|
+
Args:
|
282
|
+
hf_model: A Hugging Face model
|
283
|
+
layer_mapping: Optional mapping between HF and OpenArchX layer names
|
284
|
+
|
285
|
+
Returns:
|
286
|
+
Dictionary of weights compatible with OpenArchX models
|
287
|
+
"""
|
288
|
+
return ModelWeightsExtractor.extract_transformer_weights(hf_model, layer_mapping)
|