sawnergy 1.0.5__py3-none-any.whl → 1.0.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sawnergy might be problematic. Click here for more details.
- sawnergy/embedding/SGNS_pml.py +276 -41
- sawnergy/embedding/SGNS_torch.py +145 -11
- sawnergy/embedding/__init__.py +24 -0
- sawnergy/embedding/embedder.py +106 -50
- sawnergy/embedding/visualizer.py +247 -0
- sawnergy/logging_util.py +1 -1
- sawnergy/rin/rin_builder.py +1 -1
- sawnergy/visual/visualizer.py +6 -6
- sawnergy/visual/visualizer_util.py +3 -0
- {sawnergy-1.0.5.dist-info → sawnergy-1.0.7.dist-info}/METADATA +48 -24
- sawnergy-1.0.7.dist-info/RECORD +23 -0
- sawnergy-1.0.5.dist-info/RECORD +0 -22
- {sawnergy-1.0.5.dist-info → sawnergy-1.0.7.dist-info}/WHEEL +0 -0
- {sawnergy-1.0.5.dist-info → sawnergy-1.0.7.dist-info}/licenses/LICENSE +0 -0
- {sawnergy-1.0.5.dist-info → sawnergy-1.0.7.dist-info}/licenses/NOTICE +0 -0
- {sawnergy-1.0.5.dist-info → sawnergy-1.0.7.dist-info}/top_level.txt +0 -0
sawnergy/embedding/SGNS_pml.py
CHANGED
|
@@ -3,11 +3,11 @@ from __future__ import annotations
|
|
|
3
3
|
# third party
|
|
4
4
|
import numpy as np
|
|
5
5
|
from pureml.machinery import Tensor
|
|
6
|
-
from pureml.layers import Embedding
|
|
7
|
-
from pureml.losses import BCE
|
|
6
|
+
from pureml.layers import Embedding, Affine
|
|
7
|
+
from pureml.losses import BCE, CCE
|
|
8
8
|
from pureml.general_math import sum as t_sum
|
|
9
9
|
from pureml.optimizers import Optim, LRScheduler
|
|
10
|
-
from pureml.training_utils import TensorDataset, DataLoader
|
|
10
|
+
from pureml.training_utils import TensorDataset, DataLoader, one_hot
|
|
11
11
|
from pureml.base import NN
|
|
12
12
|
|
|
13
13
|
# built-in
|
|
@@ -34,45 +34,73 @@ class SGNS_PureML(NN):
|
|
|
34
34
|
seed: int | None = None,
|
|
35
35
|
optim: Type[Optim],
|
|
36
36
|
optim_kwargs: dict,
|
|
37
|
-
lr_sched: Type[LRScheduler],
|
|
38
|
-
lr_sched_kwargs: dict
|
|
37
|
+
lr_sched: Type[LRScheduler] | None = None,
|
|
38
|
+
lr_sched_kwargs: dict | None = None,
|
|
39
|
+
device: str | None = None):
|
|
39
40
|
"""
|
|
40
41
|
Args:
|
|
41
42
|
V: Vocabulary size (number of nodes).
|
|
42
43
|
D: Embedding dimensionality.
|
|
43
44
|
seed: Optional RNG seed for negative sampling.
|
|
44
|
-
optim:
|
|
45
|
-
optim_kwargs: Keyword arguments
|
|
46
|
-
lr_sched:
|
|
47
|
-
lr_sched_kwargs: Keyword arguments
|
|
45
|
+
optim: Optimizer class to instantiate.
|
|
46
|
+
optim_kwargs: Keyword arguments for the optimizer (required).
|
|
47
|
+
lr_sched: Optional learning-rate scheduler class.
|
|
48
|
+
lr_sched_kwargs: Keyword arguments for the scheduler (required if lr_sched is provided).
|
|
49
|
+
device: Target device string (e.g. "cuda"); accepted for API parity, ignored by PureML.
|
|
48
50
|
"""
|
|
51
|
+
|
|
52
|
+
if optim_kwargs is None:
|
|
53
|
+
raise ValueError("optim_kwargs must be provided")
|
|
54
|
+
if lr_sched is not None and lr_sched_kwargs is None:
|
|
55
|
+
raise ValueError("lr_sched_kwargs required when lr_sched is provided")
|
|
56
|
+
|
|
49
57
|
self.V, self.D = int(V), int(D)
|
|
50
|
-
self.in_emb = Embedding(V, D)
|
|
51
|
-
self.out_emb = Embedding(V, D)
|
|
52
58
|
|
|
59
|
+
# embeddings
|
|
60
|
+
self.in_emb = Embedding(self.V, self.D)
|
|
61
|
+
self.out_emb = Embedding(self.V, self.D)
|
|
62
|
+
|
|
63
|
+
# seed + RNG for negative sampling
|
|
53
64
|
self.seed = None if seed is None else int(seed)
|
|
54
65
|
self._rng = np.random.default_rng(self.seed)
|
|
66
|
+
if self.seed is not None:
|
|
67
|
+
# optional: also set global NumPy seed for any non-RNG paths
|
|
68
|
+
np.random.seed(self.seed)
|
|
55
69
|
|
|
56
|
-
|
|
57
|
-
self.
|
|
58
|
-
_logger.info("SGNS_PureML init: V=%d D=%d seed=%s", self.V, self.D, self.seed)
|
|
70
|
+
# API compatibility: PureML is CPU-only
|
|
71
|
+
self.device = "cpu"
|
|
59
72
|
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
73
|
+
# optimizer / scheduler
|
|
74
|
+
self.optim: Optim = optim(self.parameters, **optim_kwargs)
|
|
75
|
+
self.lr_sched: LRScheduler | None = (
|
|
76
|
+
lr_sched(optim=self.optim, **lr_sched_kwargs) if lr_sched is not None else None
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
_logger.info(
|
|
80
|
+
"SGNS_PureML init: V=%d D=%d device=%s seed=%s",
|
|
81
|
+
self.V, self.D, self.device, self.seed
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
def _sample_neg(self, B: int, K: int, dist: np.ndarray) -> np.ndarray:
|
|
64
85
|
return self._rng.choice(self.V, size=(B, K), replace=True, p=dist)
|
|
65
86
|
|
|
66
|
-
def predict(self, center: Tensor, pos: Tensor, neg: Tensor) -> Tensor:
|
|
67
|
-
"""Compute positive/negative logits for SGNS.
|
|
68
|
-
c = self.in_emb(center)
|
|
69
|
-
pos_e = self.out_emb(pos)
|
|
70
|
-
neg_e = self.out_emb(neg)
|
|
71
|
-
pos_logits = t_sum(c * pos_e, axis=-1)
|
|
72
|
-
neg_logits = t_sum(c[:, None, :] * neg_e, axis=-1)
|
|
73
|
-
# ^^^
|
|
74
|
-
# (B,1,D) * (B,K,D) → (B,K,D) → sum D → (B,K)
|
|
87
|
+
def predict(self, center: Tensor, pos: Tensor, neg: Tensor) -> tuple[Tensor, Tensor]:
|
|
88
|
+
"""Compute positive/negative logits for SGNS.
|
|
75
89
|
|
|
90
|
+
Shapes:
|
|
91
|
+
center: (B,)
|
|
92
|
+
pos: (B,)
|
|
93
|
+
neg: (B, K)
|
|
94
|
+
Returns:
|
|
95
|
+
pos_logits: (B,)
|
|
96
|
+
neg_logits: (B, K)
|
|
97
|
+
"""
|
|
98
|
+
c = self.in_emb(center) # (B, D)
|
|
99
|
+
pos_e = self.out_emb(pos) # (B, D)
|
|
100
|
+
neg_e = self.out_emb(neg) # (B, K, D)
|
|
101
|
+
|
|
102
|
+
pos_logits = t_sum(c * pos_e, axis=-1) # (B,)
|
|
103
|
+
neg_logits = t_sum(c[:, None, :] * neg_e, axis=-1) # (B, K)
|
|
76
104
|
return pos_logits, neg_logits
|
|
77
105
|
|
|
78
106
|
def fit(self,
|
|
@@ -89,47 +117,254 @@ class SGNS_PureML(NN):
|
|
|
89
117
|
"SGNS_PureML fit: epochs=%d batch=%d negatives=%d shuffle=%s",
|
|
90
118
|
num_epochs, batch_size, num_negative_samples, shuffle_data
|
|
91
119
|
)
|
|
92
|
-
data = TensorDataset(centers, contexts)
|
|
93
120
|
|
|
121
|
+
if noise_dist.ndim != 1 or noise_dist.size != self.V:
|
|
122
|
+
raise ValueError(f"noise_dist must be 1-D with length {self.V}; got {noise_dist.shape}")
|
|
123
|
+
dist = np.asarray(noise_dist, dtype=np.float64)
|
|
124
|
+
if np.any(dist < 0):
|
|
125
|
+
raise ValueError("noise_dist has negative entries")
|
|
126
|
+
s = dist.sum()
|
|
127
|
+
if not np.isfinite(s) or s <= 0:
|
|
128
|
+
raise ValueError("noise_dist must have positive finite sum")
|
|
129
|
+
if abs(s - 1.0) > 1e-6:
|
|
130
|
+
dist = dist / s
|
|
131
|
+
|
|
132
|
+
data = TensorDataset(centers, contexts)
|
|
94
133
|
for epoch in range(1, num_epochs + 1):
|
|
95
134
|
epoch_loss = 0.0
|
|
96
135
|
batches = 0
|
|
136
|
+
|
|
97
137
|
for cen, pos in DataLoader(data, batch_size=batch_size, shuffle=shuffle_data):
|
|
98
|
-
|
|
138
|
+
B = cen.data.shape[0] if isinstance(cen, Tensor) else len(cen)
|
|
99
139
|
|
|
140
|
+
neg_idx_np = self._sample_neg(B, num_negative_samples, dist)
|
|
141
|
+
neg = Tensor(neg_idx_np, requires_grad=False)
|
|
100
142
|
x_pos_logits, x_neg_logits = self(cen, pos, neg)
|
|
101
143
|
|
|
102
|
-
y_pos = Tensor(np.ones_like(x_pos_logits.
|
|
103
|
-
y_neg = Tensor(np.zeros_like(x_neg_logits.
|
|
144
|
+
y_pos = Tensor(np.ones_like(x_pos_logits.numpy(copy=False)), requires_grad=False)
|
|
145
|
+
y_neg = Tensor(np.zeros_like(x_neg_logits.numpy(copy=False)), requires_grad=False)
|
|
104
146
|
|
|
105
|
-
|
|
147
|
+
K = int(neg.data.shape[1])
|
|
148
|
+
loss = (
|
|
149
|
+
BCE(y_pos, x_pos_logits, from_logits=True)
|
|
150
|
+
+ K*BCE(y_neg, x_neg_logits, from_logits=True)
|
|
151
|
+
)
|
|
106
152
|
|
|
107
153
|
self.optim.zero_grad()
|
|
108
154
|
loss.backward()
|
|
109
155
|
self.optim.step()
|
|
110
|
-
|
|
111
|
-
if lr_step_per_batch:
|
|
156
|
+
|
|
157
|
+
if lr_step_per_batch and self.lr_sched is not None:
|
|
112
158
|
self.lr_sched.step()
|
|
113
159
|
|
|
114
|
-
loss_value = float(np.asarray(loss.data)
|
|
160
|
+
loss_value = float(np.asarray(loss.data))
|
|
115
161
|
epoch_loss += loss_value
|
|
116
162
|
batches += 1
|
|
117
163
|
_logger.debug("Epoch %d batch %d loss=%.6f", epoch, batches, loss_value)
|
|
118
164
|
|
|
119
|
-
if not lr_step_per_batch:
|
|
165
|
+
if (not lr_step_per_batch) and (self.lr_sched is not None):
|
|
120
166
|
self.lr_sched.step()
|
|
121
167
|
|
|
122
168
|
mean_loss = epoch_loss / max(batches, 1)
|
|
123
169
|
_logger.info("Epoch %d/%d mean_loss=%.6f", epoch, num_epochs, mean_loss)
|
|
124
170
|
|
|
125
171
|
@property
|
|
126
|
-
def
|
|
127
|
-
|
|
128
|
-
W
|
|
129
|
-
|
|
172
|
+
def in_embeddings(self) -> np.ndarray:
|
|
173
|
+
W: Tensor = self.in_emb.parameters[0] # (V, D)
|
|
174
|
+
if W.shape != (self.V, self.D):
|
|
175
|
+
raise RuntimeError(
|
|
176
|
+
"Wrong embedding matrix shape: "
|
|
177
|
+
"self.in_emb.parameters[0].shape != (V, D)"
|
|
178
|
+
)
|
|
179
|
+
return W.numpy(copy=True, readonly=True)
|
|
180
|
+
|
|
181
|
+
@property
|
|
182
|
+
def out_embeddings(self) -> np.ndarray:
|
|
183
|
+
W: Tensor = self.out_emb.parameters[0] # (V, D)
|
|
184
|
+
if W.shape != (self.V, self.D):
|
|
185
|
+
raise RuntimeError(
|
|
186
|
+
"Wrong embedding matrix shape: "
|
|
187
|
+
"self.out_emb.parameters[0].shape != (V, D)"
|
|
188
|
+
)
|
|
189
|
+
return W.numpy(copy=True, readonly=True)
|
|
190
|
+
|
|
191
|
+
@property
|
|
192
|
+
def avg_embeddings(self) -> np.ndarray:
|
|
193
|
+
return 0.5 * (self.in_embeddings + self.out_embeddings)
|
|
194
|
+
|
|
195
|
+
class SG_PureML(NN):
|
|
196
|
+
"""Plain Skip-Gram (full softmax) in PureML.
|
|
197
|
+
|
|
198
|
+
Trains two affine layers to emulate the classic Skip-Gram objective with a
|
|
199
|
+
**full** softmax over the vocabulary (no negative sampling):
|
|
200
|
+
|
|
201
|
+
x = one_hot(center, V) # (B, V)
|
|
202
|
+
y = x @ W_in + b_in # (B, D)
|
|
203
|
+
logits = y @ W_out + b_out # (B, V)
|
|
204
|
+
loss = CCE(one_hot(context, V), logits, from_logits=True)
|
|
205
|
+
|
|
206
|
+
The learnable “input” embeddings are the rows of `W_in` (shape `(V, D)`), and
|
|
207
|
+
the “output” embeddings are the rows of `W_outᵀ` (also `(V, D)`).
|
|
208
|
+
"""
|
|
209
|
+
|
|
210
|
+
def __init__(self,
|
|
211
|
+
V: int,
|
|
212
|
+
D: int,
|
|
213
|
+
*,
|
|
214
|
+
seed: int | None = None,
|
|
215
|
+
optim: Type[Optim],
|
|
216
|
+
optim_kwargs: dict,
|
|
217
|
+
lr_sched: Type[LRScheduler] | None = None,
|
|
218
|
+
lr_sched_kwargs: dict | None = None,
|
|
219
|
+
device: str | None = None):
|
|
220
|
+
"""Initialize the plain Skip-Gram model (full softmax).
|
|
221
|
+
|
|
222
|
+
Args:
|
|
223
|
+
V: Vocabulary size (number of nodes/tokens).
|
|
224
|
+
D: Embedding dimensionality.
|
|
225
|
+
seed: Optional RNG seed (kept for API parity; not used in layer init).
|
|
226
|
+
optim: Optimizer class to instantiate (e.g., `Adam`, `SGD`).
|
|
227
|
+
optim_kwargs: Keyword arguments passed to the optimizer constructor.
|
|
228
|
+
lr_sched: Optional learning-rate scheduler class.
|
|
229
|
+
lr_sched_kwargs: Keyword arguments for the scheduler
|
|
230
|
+
(required if `lr_sched` is provided).
|
|
231
|
+
device: Device string (e.g., `"cuda"`). Accepted for parity, ignored
|
|
232
|
+
by PureML (CPU-only).
|
|
233
|
+
|
|
234
|
+
Notes:
|
|
235
|
+
The encoder/decoder are implemented as:
|
|
236
|
+
• `in_emb = Affine(V, D)` (acts on a one-hot center index)
|
|
237
|
+
• `out_emb = Affine(D, V)`
|
|
238
|
+
so forward pass produces vocabulary-sized logits.
|
|
239
|
+
"""
|
|
240
|
+
if optim_kwargs is None:
|
|
241
|
+
raise ValueError("optim_kwargs must be provided")
|
|
242
|
+
if lr_sched is not None and lr_sched_kwargs is None:
|
|
243
|
+
raise ValueError("lr_sched_kwargs required when lr_sched is provided")
|
|
244
|
+
|
|
245
|
+
self.V, self.D = int(V), int(D)
|
|
246
|
+
|
|
247
|
+
# input/output “embedding” projections
|
|
248
|
+
self.in_emb = Affine(self.V, self.D)
|
|
249
|
+
self.out_emb = Affine(self.D, self.V)
|
|
250
|
+
|
|
251
|
+
self.seed = None if seed is None else int(seed)
|
|
252
|
+
|
|
253
|
+
# API compatibility: PureML is CPU-only
|
|
254
|
+
self.device = "cpu"
|
|
255
|
+
|
|
256
|
+
# optimizer / scheduler
|
|
257
|
+
self.optim: Optim = optim(self.parameters, **optim_kwargs)
|
|
258
|
+
self.lr_sched: LRScheduler | None = (
|
|
259
|
+
lr_sched(optim=self.optim, **lr_sched_kwargs) if lr_sched is not None else None
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
_logger.info(
|
|
263
|
+
"SG_PureML init: V=%d D=%d device=%s seed=%s",
|
|
264
|
+
self.V, self.D, self.device, self.seed
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
def predict(self, center: Tensor) -> Tensor:
|
|
268
|
+
"""Return vocabulary logits for each center index.
|
|
269
|
+
|
|
270
|
+
Args:
|
|
271
|
+
center: Tensor of center indices with shape `(B,)` and integer dtype.
|
|
272
|
+
|
|
273
|
+
Returns:
|
|
274
|
+
Tensor: Logits over the vocabulary with shape `(B, V)`.
|
|
275
|
+
"""
|
|
276
|
+
c = one_hot(dims=self.V, label=center) # (B, V)
|
|
277
|
+
y = self.in_emb(c) # (B, D)
|
|
278
|
+
z = self.out_emb(y) # (B, V)
|
|
279
|
+
return z
|
|
280
|
+
|
|
281
|
+
def fit(self,
|
|
282
|
+
centers: np.ndarray,
|
|
283
|
+
contexts: np.ndarray,
|
|
284
|
+
num_epochs: int,
|
|
285
|
+
batch_size: int,
|
|
286
|
+
shuffle_data: bool,
|
|
287
|
+
lr_step_per_batch: bool,
|
|
288
|
+
**_ignore):
|
|
289
|
+
"""Train Skip-Gram with full softmax on center/context pairs.
|
|
290
|
+
|
|
291
|
+
Args:
|
|
292
|
+
centers: Array of center indices, shape `(N,)`, dtype integer in `[0, V)`.
|
|
293
|
+
contexts: Array of context (target) indices, shape `(N,)`, dtype integer.
|
|
294
|
+
num_epochs: Number of passes over the dataset.
|
|
295
|
+
batch_size: Mini-batch size.
|
|
296
|
+
shuffle_data: Whether to shuffle pairs each epoch.
|
|
297
|
+
lr_step_per_batch: If True, call `lr_sched.step()` after every batch
|
|
298
|
+
(when a scheduler is provided). If False, step once per epoch.
|
|
299
|
+
**_ignore: Ignored kwargs for API compatibility with SGNS.
|
|
300
|
+
|
|
301
|
+
Optimization:
|
|
302
|
+
Uses `CCE(one_hot(context), logits, from_logits=True)` where
|
|
303
|
+
`logits = predict(center)`. Scheduler stepping obeys `lr_step_per_batch`.
|
|
304
|
+
"""
|
|
305
|
+
_logger.info(
|
|
306
|
+
"SG_PureML fit: epochs=%d batch=%d shuffle=%s",
|
|
307
|
+
num_epochs, batch_size, shuffle_data
|
|
308
|
+
)
|
|
309
|
+
data = TensorDataset(centers, contexts)
|
|
310
|
+
|
|
311
|
+
for epoch in range(1, num_epochs + 1):
|
|
312
|
+
epoch_loss = 0.0
|
|
313
|
+
batches = 0
|
|
314
|
+
|
|
315
|
+
for cen, ctx in DataLoader(data, batch_size=batch_size, shuffle=shuffle_data):
|
|
316
|
+
logits = self(cen) # (B, V)
|
|
317
|
+
y = one_hot(self.V, label=ctx) # (B, V)
|
|
318
|
+
loss = CCE(y, logits, from_logits=True) # scalar
|
|
319
|
+
|
|
320
|
+
self.optim.zero_grad()
|
|
321
|
+
loss.backward()
|
|
322
|
+
self.optim.step()
|
|
323
|
+
|
|
324
|
+
if lr_step_per_batch and self.lr_sched is not None:
|
|
325
|
+
self.lr_sched.step()
|
|
326
|
+
|
|
327
|
+
loss_value = float(np.asarray(loss.data))
|
|
328
|
+
epoch_loss += loss_value
|
|
329
|
+
batches += 1
|
|
330
|
+
_logger.debug("Epoch %d batch %d loss=%.6f", epoch, batches, loss_value)
|
|
331
|
+
|
|
332
|
+
if (not lr_step_per_batch) and (self.lr_sched is not None):
|
|
333
|
+
self.lr_sched.step()
|
|
334
|
+
|
|
335
|
+
mean_loss = epoch_loss / max(batches, 1)
|
|
336
|
+
_logger.info("Epoch %d/%d mean_loss=%.6f", epoch, num_epochs, mean_loss)
|
|
337
|
+
|
|
338
|
+
@property
|
|
339
|
+
def in_embeddings(self) -> np.ndarray:
|
|
340
|
+
"""Input embeddings matrix `W_in` as `(V, D)` (copy, read-only)."""
|
|
341
|
+
W = self.in_emb.parameters[0] # (V, D)
|
|
342
|
+
if W.shape != (self.V, self.D):
|
|
343
|
+
raise RuntimeError(
|
|
344
|
+
"Wrong embedding matrix shape: "
|
|
345
|
+
"self.in_emb.parameters[0].shape != (V, D)"
|
|
346
|
+
)
|
|
347
|
+
return W.numpy(copy=True, readonly=True) # (V, D)
|
|
348
|
+
|
|
349
|
+
@property
|
|
350
|
+
def out_embeddings(self) -> np.ndarray:
|
|
351
|
+
"""Output embeddings matrix `W_outᵀ` as `(V, D)` (copy, read-only).
|
|
352
|
+
(`out_emb.parameters[0]` is `(D, V)`, so we transpose.)"""
|
|
353
|
+
W = self.out_emb.parameters[0] # (D, V)
|
|
354
|
+
if W.shape != (self.D, self.V):
|
|
355
|
+
raise RuntimeError(
|
|
356
|
+
"Wrong embedding matrix shape: "
|
|
357
|
+
"self.out_emb.parameters[0].shape != (D, V)"
|
|
358
|
+
)
|
|
359
|
+
return W.numpy(copy=True, readonly=True).T # (V, D)
|
|
360
|
+
|
|
361
|
+
@property
|
|
362
|
+
def avg_embeddings(self) -> np.ndarray:
|
|
363
|
+
"""Elementwise average of input/output embeddings, shape `(V, D)`."""
|
|
364
|
+
return 0.5 * (self.in_embeddings + self.out_embeddings) # (V, D)
|
|
130
365
|
|
|
131
366
|
|
|
132
|
-
__all__ = ["SGNS_PureML"]
|
|
367
|
+
__all__ = ["SGNS_PureML", "SG_PureML"]
|
|
133
368
|
|
|
134
369
|
if __name__ == "__main__":
|
|
135
370
|
pass
|
sawnergy/embedding/SGNS_torch.py
CHANGED
|
@@ -42,31 +42,31 @@ class SGNS_Torch:
|
|
|
42
42
|
optim: Optimizer class to instantiate.
|
|
43
43
|
optim_kwargs: Keyword arguments for the optimizer.
|
|
44
44
|
lr_sched: Optional learning-rate scheduler class.
|
|
45
|
-
lr_sched_kwargs: Keyword arguments for the scheduler.
|
|
45
|
+
lr_sched_kwargs: Keyword arguments for the scheduler (required if lr_sched is provided).
|
|
46
46
|
device: Target device string (e.g. ``"cuda"``). Defaults to CUDA if available, else CPU.
|
|
47
47
|
"""
|
|
48
48
|
if optim_kwargs is None:
|
|
49
49
|
raise ValueError("optim_kwargs must be provided")
|
|
50
50
|
if lr_sched is not None and lr_sched_kwargs is None:
|
|
51
51
|
raise ValueError("lr_sched_kwargs required when lr_sched is provided")
|
|
52
|
+
|
|
52
53
|
self.V, self.D = int(V), int(D)
|
|
53
|
-
|
|
54
|
-
self.
|
|
55
|
-
|
|
54
|
+
# two embeddings as in/out matrices
|
|
55
|
+
self.in_emb = nn.Embedding(self.V, self.D)
|
|
56
|
+
self.out_emb = nn.Embedding(self.V, self.D)
|
|
56
57
|
|
|
58
|
+
resolved_device = device or ("cuda" if torch.cuda.is_available() else "cpu")
|
|
59
|
+
self.device = torch.device(resolved_device)
|
|
57
60
|
if seed is not None:
|
|
58
61
|
torch.manual_seed(int(seed))
|
|
59
62
|
np.random.seed(int(seed))
|
|
60
63
|
if self.device.type == "cuda":
|
|
61
64
|
torch.cuda.manual_seed_all(int(seed))
|
|
62
65
|
|
|
63
|
-
# two embeddings as in/out matrices
|
|
64
|
-
self.in_emb = nn.Embedding(self.V, self.D)
|
|
65
|
-
self.out_emb = nn.Embedding(self.V, self.D)
|
|
66
|
-
|
|
67
66
|
self.to(self.device)
|
|
68
|
-
|
|
67
|
+
_logger.info("SGNS_Torch init: V=%d D=%d device=%s seed=%s", self.V, self.D, self.device, seed)
|
|
69
68
|
params = list(self.in_emb.parameters()) + list(self.out_emb.parameters())
|
|
69
|
+
# optimizer / scheduler
|
|
70
70
|
self.opt = optim(params=params, **optim_kwargs)
|
|
71
71
|
self.lr_sched = lr_sched(self.opt, **lr_sched_kwargs) if lr_sched is not None else None
|
|
72
72
|
|
|
@@ -112,6 +112,15 @@ class SGNS_Torch:
|
|
|
112
112
|
idx = np.arange(N)
|
|
113
113
|
|
|
114
114
|
noise_probs = torch.as_tensor(noise_dist, dtype=torch.float32, device=self.device)
|
|
115
|
+
# require normalized, non-negative distribution
|
|
116
|
+
if (not torch.isfinite(noise_probs).all()
|
|
117
|
+
or (noise_probs < 0).any()
|
|
118
|
+
or abs(float(noise_probs.sum().item()) - 1.0) > 1e-6):
|
|
119
|
+
raise ValueError(
|
|
120
|
+
"noise_dist must be non-negative, finite, and sum to 1.0 "
|
|
121
|
+
f"(got sum={float(noise_probs.sum().item()):.6f}, "
|
|
122
|
+
f"min={float(noise_probs.min().item()):.6f})"
|
|
123
|
+
)
|
|
115
124
|
|
|
116
125
|
for epoch in range(1, int(num_epochs) + 1):
|
|
117
126
|
epoch_loss = 0.0
|
|
@@ -140,7 +149,7 @@ class SGNS_Torch:
|
|
|
140
149
|
y_neg = torch.zeros_like(neg_logits)
|
|
141
150
|
loss_neg = bce(neg_logits, y_neg)
|
|
142
151
|
|
|
143
|
-
loss = loss_pos + loss_neg
|
|
152
|
+
loss = loss_pos + K*loss_neg
|
|
144
153
|
|
|
145
154
|
self.opt.zero_grad(set_to_none=True)
|
|
146
155
|
loss.backward()
|
|
@@ -163,6 +172,131 @@ class SGNS_Torch:
|
|
|
163
172
|
def embeddings(self) -> np.ndarray:
|
|
164
173
|
"""Return the input embedding matrix as a NumPy array."""
|
|
165
174
|
return self.in_emb.weight.detach().cpu().numpy()
|
|
175
|
+
|
|
176
|
+
@property
|
|
177
|
+
def in_embeddings(self) -> np.ndarray:
|
|
178
|
+
return self.in_emb.weight.detach().cpu().numpy()
|
|
179
|
+
|
|
180
|
+
@property
|
|
181
|
+
def out_embeddings(self) -> np.ndarray:
|
|
182
|
+
return self.out_emb.weight.detach().cpu().numpy()
|
|
183
|
+
|
|
184
|
+
@property
|
|
185
|
+
def avg_embeddings(self) -> np.ndarray:
|
|
186
|
+
return 0.5 * (self.in_embeddings + self.out_embeddings)
|
|
187
|
+
|
|
188
|
+
# tiny helper for device move
|
|
189
|
+
def to(self, device):
|
|
190
|
+
self.in_emb.to(device)
|
|
191
|
+
self.out_emb.to(device)
|
|
192
|
+
return self
|
|
193
|
+
|
|
194
|
+
class SG_Torch:
|
|
195
|
+
|
|
196
|
+
def __init__(self,
|
|
197
|
+
V: int,
|
|
198
|
+
D: int,
|
|
199
|
+
*,
|
|
200
|
+
seed: int | None = None,
|
|
201
|
+
optim: Type[Optimizer],
|
|
202
|
+
optim_kwargs: dict,
|
|
203
|
+
lr_sched: Type[LRScheduler] | None = None,
|
|
204
|
+
lr_sched_kwargs: dict | None = None,
|
|
205
|
+
device: str | None = None):
|
|
206
|
+
|
|
207
|
+
if optim_kwargs is None:
|
|
208
|
+
raise ValueError("optim_kwargs must be provided")
|
|
209
|
+
if lr_sched is not None and lr_sched_kwargs is None:
|
|
210
|
+
raise ValueError("lr_sched_kwargs required when lr_sched is provided")
|
|
211
|
+
|
|
212
|
+
self.V, self.D = int(V), int(D)
|
|
213
|
+
|
|
214
|
+
self.in_emb = nn.Linear(self.V, self.D)
|
|
215
|
+
self.out_emb = nn.Linear(self.D, self.V)
|
|
216
|
+
|
|
217
|
+
resolved_device = device or ("cuda" if torch.cuda.is_available() else "cpu")
|
|
218
|
+
self.device = torch.device(resolved_device)
|
|
219
|
+
if seed is not None:
|
|
220
|
+
torch.manual_seed(int(seed))
|
|
221
|
+
np.random.seed(int(seed))
|
|
222
|
+
if self.device.type == "cuda":
|
|
223
|
+
torch.cuda.manual_seed_all(int(seed))
|
|
224
|
+
self.to(self.device)
|
|
225
|
+
_logger.info("SG_Torch init: V=%d D=%d device=%s seed=%s", self.V, self.D, self.device, seed)
|
|
226
|
+
|
|
227
|
+
params = list(self.in_emb.parameters()) + list(self.out_emb.parameters())
|
|
228
|
+
|
|
229
|
+
# optimizer / scheduler
|
|
230
|
+
self.opt = optim(params=params, **optim_kwargs)
|
|
231
|
+
self.lr_sched = lr_sched(self.opt, **lr_sched_kwargs) if lr_sched is not None else None
|
|
232
|
+
|
|
233
|
+
def predict(self, center: torch.Tensor) -> torch.Tensor:
|
|
234
|
+
center = center.to(self.device, dtype=torch.long)
|
|
235
|
+
c = nn.functional.one_hot(center, num_classes=self.V).to(dtype=torch.float32, device=self.device)
|
|
236
|
+
y = self.in_emb(c)
|
|
237
|
+
z = self.out_emb(y)
|
|
238
|
+
return z
|
|
239
|
+
|
|
240
|
+
__call__ = predict
|
|
241
|
+
|
|
242
|
+
def fit(self,
|
|
243
|
+
centers: np.ndarray,
|
|
244
|
+
contexts: np.ndarray,
|
|
245
|
+
num_epochs: int,
|
|
246
|
+
batch_size: int,
|
|
247
|
+
shuffle_data: bool,
|
|
248
|
+
lr_step_per_batch: bool,
|
|
249
|
+
**_ignore):
|
|
250
|
+
cce = nn.CrossEntropyLoss(reduction="mean")
|
|
251
|
+
|
|
252
|
+
N = centers.shape[0]
|
|
253
|
+
idx = np.arange(N)
|
|
254
|
+
|
|
255
|
+
for epoch in range(1, int(num_epochs) + 1):
|
|
256
|
+
epoch_loss = 0.0
|
|
257
|
+
batches = 0
|
|
258
|
+
if shuffle_data:
|
|
259
|
+
np.random.shuffle(idx)
|
|
260
|
+
|
|
261
|
+
for s in range(0, N, int(batch_size)):
|
|
262
|
+
take = idx[s:s+int(batch_size)]
|
|
263
|
+
if take.size == 0:
|
|
264
|
+
continue
|
|
265
|
+
|
|
266
|
+
cen = torch.as_tensor(centers[take], dtype=torch.long, device=self.device)
|
|
267
|
+
ctx = torch.as_tensor(contexts[take], dtype=torch.long, device=self.device)
|
|
268
|
+
|
|
269
|
+
logits = self(cen)
|
|
270
|
+
loss = cce(logits, ctx)
|
|
271
|
+
|
|
272
|
+
self.opt.zero_grad(set_to_none=True)
|
|
273
|
+
loss.backward()
|
|
274
|
+
self.opt.step()
|
|
275
|
+
|
|
276
|
+
if lr_step_per_batch and self.lr_sched is not None:
|
|
277
|
+
self.lr_sched.step()
|
|
278
|
+
|
|
279
|
+
epoch_loss += float(loss.detach().cpu().item())
|
|
280
|
+
batches += 1
|
|
281
|
+
_logger.debug("Epoch %d batch %d loss=%.6f", epoch, batches, loss.item())
|
|
282
|
+
|
|
283
|
+
if not lr_step_per_batch and self.lr_sched is not None:
|
|
284
|
+
self.lr_sched.step()
|
|
285
|
+
|
|
286
|
+
mean_loss = epoch_loss / max(batches, 1)
|
|
287
|
+
_logger.info("Epoch %d/%d mean_loss=%.6f", epoch, num_epochs, mean_loss)
|
|
288
|
+
|
|
289
|
+
@property
|
|
290
|
+
def in_embeddings(self) -> np.ndarray:
|
|
291
|
+
return self.in_emb.weight.detach().T.cpu().numpy()
|
|
292
|
+
|
|
293
|
+
@property
|
|
294
|
+
def out_embeddings(self) -> np.ndarray:
|
|
295
|
+
return self.out_emb.weight.detach().cpu().numpy()
|
|
296
|
+
|
|
297
|
+
@property
|
|
298
|
+
def avg_embeddings(self) -> np.ndarray:
|
|
299
|
+
return 0.5 * (self.in_embeddings + self.out_embeddings)
|
|
166
300
|
|
|
167
301
|
# tiny helper for device move
|
|
168
302
|
def to(self, device):
|
|
@@ -171,7 +305,7 @@ class SGNS_Torch:
|
|
|
171
305
|
return self
|
|
172
306
|
|
|
173
307
|
|
|
174
|
-
__all__ = ["SGNS_Torch"]
|
|
308
|
+
__all__ = ["SGNS_Torch", "SG_Torch"]
|
|
175
309
|
|
|
176
310
|
if __name__ == "__main__":
|
|
177
311
|
pass
|
sawnergy/embedding/__init__.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
from .embedder import Embedder
|
|
4
|
+
from .visualizer import Visualizer
|
|
4
5
|
|
|
5
6
|
def __getattr__(name: str):
|
|
6
7
|
"""Lazily expose optional backends."""
|
|
@@ -14,6 +15,16 @@ def __getattr__(name: str):
|
|
|
14
15
|
) from exc
|
|
15
16
|
return SGNS_Torch
|
|
16
17
|
|
|
18
|
+
if name == "SG_Torch":
|
|
19
|
+
try:
|
|
20
|
+
from .SGNS_torch import SG_Torch
|
|
21
|
+
except Exception as exc:
|
|
22
|
+
raise ImportError(
|
|
23
|
+
"PyTorch backend requested but torch is not installed. "
|
|
24
|
+
"Install PyTorch via `pip install torch` (see https://pytorch.org/get-started)."
|
|
25
|
+
) from exc
|
|
26
|
+
return SG_Torch
|
|
27
|
+
|
|
17
28
|
if name == "SGNS_PureML":
|
|
18
29
|
try:
|
|
19
30
|
from .SGNS_pml import SGNS_PureML
|
|
@@ -24,11 +35,24 @@ def __getattr__(name: str):
|
|
|
24
35
|
"Install PureML first via `pip install ym-pure-ml` "
|
|
25
36
|
) from exc
|
|
26
37
|
|
|
38
|
+
if name == "SG_PureML":
|
|
39
|
+
try:
|
|
40
|
+
from .SGNS_pml import SG_PureML
|
|
41
|
+
return SG_PureML
|
|
42
|
+
except Exception as exc:
|
|
43
|
+
raise ImportError(
|
|
44
|
+
"PureML is not installed. "
|
|
45
|
+
"Install PureML first via `pip install ym-pure-ml` "
|
|
46
|
+
) from exc
|
|
47
|
+
|
|
27
48
|
raise AttributeError(name)
|
|
28
49
|
|
|
29
50
|
|
|
30
51
|
__all__ = [
|
|
31
52
|
"Embedder",
|
|
53
|
+
"Visualizer",
|
|
32
54
|
"SGNS_PureML",
|
|
33
55
|
"SGNS_Torch",
|
|
56
|
+
"SG_PureML",
|
|
57
|
+
"SG_Torch"
|
|
34
58
|
]
|