sawnergy 1.0.7__py3-none-any.whl → 1.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sawnergy might be problematic. Click here for more details.
- sawnergy/embedding/SGNS_pml.py +36 -38
- sawnergy/embedding/SGNS_torch.py +82 -29
- sawnergy/embedding/embedder.py +325 -245
- sawnergy/embedding/visualizer.py +9 -5
- {sawnergy-1.0.7.dist-info → sawnergy-1.0.8.dist-info}/METADATA +39 -40
- {sawnergy-1.0.7.dist-info → sawnergy-1.0.8.dist-info}/RECORD +10 -10
- {sawnergy-1.0.7.dist-info → sawnergy-1.0.8.dist-info}/WHEEL +0 -0
- {sawnergy-1.0.7.dist-info → sawnergy-1.0.8.dist-info}/licenses/LICENSE +0 -0
- {sawnergy-1.0.7.dist-info → sawnergy-1.0.8.dist-info}/licenses/NOTICE +0 -0
- {sawnergy-1.0.7.dist-info → sawnergy-1.0.8.dist-info}/top_level.txt +0 -0
sawnergy/embedding/SGNS_pml.py
CHANGED
|
@@ -6,7 +6,7 @@ from pureml.machinery import Tensor
|
|
|
6
6
|
from pureml.layers import Embedding, Affine
|
|
7
7
|
from pureml.losses import BCE, CCE
|
|
8
8
|
from pureml.general_math import sum as t_sum
|
|
9
|
-
from pureml.optimizers import Optim, LRScheduler
|
|
9
|
+
from pureml.optimizers import Optim, LRScheduler, SGD
|
|
10
10
|
from pureml.training_utils import TensorDataset, DataLoader, one_hot
|
|
11
11
|
from pureml.base import NN
|
|
12
12
|
|
|
@@ -32,8 +32,8 @@ class SGNS_PureML(NN):
|
|
|
32
32
|
D: int,
|
|
33
33
|
*,
|
|
34
34
|
seed: int | None = None,
|
|
35
|
-
optim: Type[Optim],
|
|
36
|
-
optim_kwargs: dict,
|
|
35
|
+
optim: Type[Optim] = SGD,
|
|
36
|
+
optim_kwargs: dict | None = None,
|
|
37
37
|
lr_sched: Type[LRScheduler] | None = None,
|
|
38
38
|
lr_sched_kwargs: dict | None = None,
|
|
39
39
|
device: str | None = None):
|
|
@@ -42,15 +42,15 @@ class SGNS_PureML(NN):
|
|
|
42
42
|
V: Vocabulary size (number of nodes).
|
|
43
43
|
D: Embedding dimensionality.
|
|
44
44
|
seed: Optional RNG seed for negative sampling.
|
|
45
|
-
optim: Optimizer class to instantiate.
|
|
46
|
-
optim_kwargs: Keyword arguments for the optimizer
|
|
45
|
+
optim: Optimizer class to instantiate. Defaults to plain SGD.
|
|
46
|
+
optim_kwargs: Keyword arguments for the optimizer. Defaults to {"lr": 0.1}.
|
|
47
47
|
lr_sched: Optional learning-rate scheduler class.
|
|
48
48
|
lr_sched_kwargs: Keyword arguments for the scheduler (required if lr_sched is provided).
|
|
49
49
|
device: Target device string (e.g. "cuda"); accepted for API parity, ignored by PureML.
|
|
50
50
|
"""
|
|
51
51
|
|
|
52
|
-
|
|
53
|
-
|
|
52
|
+
optim_kwargs = optim_kwargs or {"lr": 0.1}
|
|
53
|
+
|
|
54
54
|
if lr_sched is not None and lr_sched_kwargs is None:
|
|
55
55
|
raise ValueError("lr_sched_kwargs required when lr_sched is provided")
|
|
56
56
|
|
|
@@ -147,7 +147,7 @@ class SGNS_PureML(NN):
|
|
|
147
147
|
K = int(neg.data.shape[1])
|
|
148
148
|
loss = (
|
|
149
149
|
BCE(y_pos, x_pos_logits, from_logits=True)
|
|
150
|
-
+ K*BCE(y_neg, x_neg_logits, from_logits=True)
|
|
150
|
+
+ Tensor(K)*BCE(y_neg, x_neg_logits, from_logits=True)
|
|
151
151
|
)
|
|
152
152
|
|
|
153
153
|
self.optim.zero_grad()
|
|
@@ -176,7 +176,9 @@ class SGNS_PureML(NN):
|
|
|
176
176
|
"Wrong embedding matrix shape: "
|
|
177
177
|
"self.in_emb.parameters[0].shape != (V, D)"
|
|
178
178
|
)
|
|
179
|
-
|
|
179
|
+
arr = W.numpy(copy=True, readonly=True) # (V, D)
|
|
180
|
+
_logger.debug("In emb shape: %s", arr.shape)
|
|
181
|
+
return arr
|
|
180
182
|
|
|
181
183
|
@property
|
|
182
184
|
def out_embeddings(self) -> np.ndarray:
|
|
@@ -186,7 +188,9 @@ class SGNS_PureML(NN):
|
|
|
186
188
|
"Wrong embedding matrix shape: "
|
|
187
189
|
"self.out_emb.parameters[0].shape != (V, D)"
|
|
188
190
|
)
|
|
189
|
-
|
|
191
|
+
arr = W.numpy(copy=True, readonly=True) # (V, D)
|
|
192
|
+
_logger.debug("Out emb shape: %s", arr.shape)
|
|
193
|
+
return arr
|
|
190
194
|
|
|
191
195
|
@property
|
|
192
196
|
def avg_embeddings(self) -> np.ndarray:
|
|
@@ -208,37 +212,29 @@ class SG_PureML(NN):
|
|
|
208
212
|
"""
|
|
209
213
|
|
|
210
214
|
def __init__(self,
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
215
|
+
V: int,
|
|
216
|
+
D: int,
|
|
217
|
+
*,
|
|
218
|
+
seed: int | None = None,
|
|
219
|
+
optim: Type[Optim] = SGD,
|
|
220
|
+
optim_kwargs: dict | None = None,
|
|
221
|
+
lr_sched: Type[LRScheduler] | None = None,
|
|
222
|
+
lr_sched_kwargs: dict | None = None,
|
|
223
|
+
device: str | None = None):
|
|
220
224
|
"""Initialize the plain Skip-Gram model (full softmax).
|
|
221
225
|
|
|
222
226
|
Args:
|
|
223
227
|
V: Vocabulary size (number of nodes/tokens).
|
|
224
228
|
D: Embedding dimensionality.
|
|
225
229
|
seed: Optional RNG seed (kept for API parity; not used in layer init).
|
|
226
|
-
optim: Optimizer class to instantiate
|
|
227
|
-
optim_kwargs: Keyword arguments
|
|
230
|
+
optim: Optimizer class to instantiate. Defaults to plain SGD.
|
|
231
|
+
optim_kwargs: Keyword arguments for the optimizer. Defaults to {"lr": 0.1}.
|
|
228
232
|
lr_sched: Optional learning-rate scheduler class.
|
|
229
|
-
lr_sched_kwargs: Keyword arguments for the scheduler
|
|
230
|
-
|
|
231
|
-
device: Device string (e.g., `"cuda"`). Accepted for parity, ignored
|
|
232
|
-
by PureML (CPU-only).
|
|
233
|
-
|
|
234
|
-
Notes:
|
|
235
|
-
The encoder/decoder are implemented as:
|
|
236
|
-
• `in_emb = Affine(V, D)` (acts on a one-hot center index)
|
|
237
|
-
• `out_emb = Affine(D, V)`
|
|
238
|
-
so forward pass produces vocabulary-sized logits.
|
|
233
|
+
lr_sched_kwargs: Keyword arguments for the scheduler (required if lr_sched is provided).
|
|
234
|
+
device: Device string (e.g., "cuda"). Accepted for parity, ignored by PureML (CPU-only).
|
|
239
235
|
"""
|
|
240
|
-
|
|
241
|
-
|
|
236
|
+
|
|
237
|
+
optim_kwargs = optim_kwargs or {"lr": 0.1}
|
|
242
238
|
if lr_sched is not None and lr_sched_kwargs is None:
|
|
243
239
|
raise ValueError("lr_sched_kwargs required when lr_sched is provided")
|
|
244
240
|
|
|
@@ -249,9 +245,7 @@ class SG_PureML(NN):
|
|
|
249
245
|
self.out_emb = Affine(self.D, self.V)
|
|
250
246
|
|
|
251
247
|
self.seed = None if seed is None else int(seed)
|
|
252
|
-
|
|
253
|
-
# API compatibility: PureML is CPU-only
|
|
254
|
-
self.device = "cpu"
|
|
248
|
+
self.device = "cpu" # API parity
|
|
255
249
|
|
|
256
250
|
# optimizer / scheduler
|
|
257
251
|
self.optim: Optim = optim(self.parameters, **optim_kwargs)
|
|
@@ -344,7 +338,9 @@ class SG_PureML(NN):
|
|
|
344
338
|
"Wrong embedding matrix shape: "
|
|
345
339
|
"self.in_emb.parameters[0].shape != (V, D)"
|
|
346
340
|
)
|
|
347
|
-
|
|
341
|
+
arr = W.numpy(copy=True, readonly=True) # (V, D)
|
|
342
|
+
_logger.debug("In emb shape: %s", arr.shape)
|
|
343
|
+
return arr
|
|
348
344
|
|
|
349
345
|
@property
|
|
350
346
|
def out_embeddings(self) -> np.ndarray:
|
|
@@ -356,7 +352,9 @@ class SG_PureML(NN):
|
|
|
356
352
|
"Wrong embedding matrix shape: "
|
|
357
353
|
"self.out_emb.parameters[0].shape != (D, V)"
|
|
358
354
|
)
|
|
359
|
-
|
|
355
|
+
arr = W.numpy(copy=True, readonly=True).T # (V, D)
|
|
356
|
+
_logger.debug("Out emb shape: %s", arr.shape)
|
|
357
|
+
return arr
|
|
360
358
|
|
|
361
359
|
@property
|
|
362
360
|
def avg_embeddings(self) -> np.ndarray:
|
sawnergy/embedding/SGNS_torch.py
CHANGED
|
@@ -10,6 +10,7 @@ from torch.optim.lr_scheduler import LRScheduler
|
|
|
10
10
|
# built-in
|
|
11
11
|
import logging
|
|
12
12
|
from typing import Type
|
|
13
|
+
import warnings
|
|
13
14
|
|
|
14
15
|
# *----------------------------------------------------*
|
|
15
16
|
# GLOBALS
|
|
@@ -22,31 +23,64 @@ _logger = logging.getLogger(__name__)
|
|
|
22
23
|
# *----------------------------------------------------*
|
|
23
24
|
|
|
24
25
|
class SGNS_Torch:
|
|
25
|
-
"""PyTorch implementation of Skip-Gram with Negative Sampling.
|
|
26
|
+
"""PyTorch implementation of Skip-Gram with Negative Sampling.
|
|
27
|
+
|
|
28
|
+
DEPRECATED (temporary): This class currently produces noisy embeddings in
|
|
29
|
+
practice and is deprecated until further notice. The issue likely stems from
|
|
30
|
+
weight initialization, although the root cause has not yet been determined.
|
|
31
|
+
|
|
32
|
+
Prefer one of the following alternatives:
|
|
33
|
+
• Plain PyTorch Skip-Gram (full softmax): `SG_Torch`
|
|
34
|
+
• PureML-based implementations: `SGNS_PureML` or `SG_PureML` (if available)
|
|
35
|
+
|
|
36
|
+
This API may change or be removed once the root cause is resolved.
|
|
37
|
+
"""
|
|
26
38
|
|
|
27
39
|
def __init__(self,
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
40
|
+
V: int,
|
|
41
|
+
D: int,
|
|
42
|
+
*,
|
|
31
43
|
seed: int | None = None,
|
|
32
|
-
optim: Type[Optimizer],
|
|
33
|
-
optim_kwargs: dict,
|
|
44
|
+
optim: Type[Optimizer] = torch.optim.SGD,
|
|
45
|
+
optim_kwargs: dict | None = None,
|
|
34
46
|
lr_sched: Type[LRScheduler] | None = None,
|
|
35
47
|
lr_sched_kwargs: dict | None = None,
|
|
36
48
|
device: str | None = None):
|
|
37
|
-
"""
|
|
49
|
+
"""Initialize SGNS (negative sampling) in PyTorch.
|
|
50
|
+
|
|
51
|
+
DEPRECATION WARNING:
|
|
52
|
+
This implementation is temporarily deprecated for producing noisy
|
|
53
|
+
embeddings. The issue likely stems from weight initialization, though
|
|
54
|
+
the exact root cause has not been conclusively determined. Please use
|
|
55
|
+
`SG_Torch` (plain Skip-Gram with full softmax) or the PureML-based
|
|
56
|
+
`SGNS_PureML` / `SG_PureML` models instead.
|
|
57
|
+
|
|
38
58
|
Args:
|
|
39
59
|
V: Vocabulary size (number of nodes).
|
|
40
60
|
D: Embedding dimensionality.
|
|
41
61
|
seed: Optional RNG seed for PyTorch.
|
|
42
|
-
optim: Optimizer class to instantiate.
|
|
43
|
-
optim_kwargs: Keyword arguments for the optimizer.
|
|
62
|
+
optim: Optimizer class to instantiate. Defaults to plain SGD.
|
|
63
|
+
optim_kwargs: Keyword arguments for the optimizer. Defaults to {"lr": 0.1}.
|
|
44
64
|
lr_sched: Optional learning-rate scheduler class.
|
|
45
65
|
lr_sched_kwargs: Keyword arguments for the scheduler (required if lr_sched is provided).
|
|
46
|
-
device: Target device string (e.g.
|
|
66
|
+
device: Target device string (e.g. "cuda"). Defaults to CUDA if available, else CPU.
|
|
47
67
|
"""
|
|
48
|
-
|
|
49
|
-
|
|
68
|
+
|
|
69
|
+
# --- runtime deprecation notice ---
|
|
70
|
+
warnings.warn(
|
|
71
|
+
"SGNS_Torch is temporarily deprecated: it currently produces noisy "
|
|
72
|
+
"embeddings (likely due to weight initialization). Use SG_Torch "
|
|
73
|
+
"(plain Skip-Gram, full softmax) or the PureML-based SG/SGNS classes.",
|
|
74
|
+
DeprecationWarning,
|
|
75
|
+
stacklevel=2,
|
|
76
|
+
)
|
|
77
|
+
_logger.warning(
|
|
78
|
+
"DEPRECATED: SGNS_Torch currently produces noisy embeddings "
|
|
79
|
+
"(likely weight initialization). Prefer SG_Torch or PureML SG/SGNS."
|
|
80
|
+
)
|
|
81
|
+
# ----------------------------------
|
|
82
|
+
|
|
83
|
+
optim_kwargs = optim_kwargs or {"lr": 0.1}
|
|
50
84
|
if lr_sched is not None and lr_sched_kwargs is None:
|
|
51
85
|
raise ValueError("lr_sched_kwargs required when lr_sched is provided")
|
|
52
86
|
|
|
@@ -167,19 +201,18 @@ class SGNS_Torch:
|
|
|
167
201
|
|
|
168
202
|
mean_loss = epoch_loss / max(batches, 1)
|
|
169
203
|
_logger.info("Epoch %d/%d mean_loss=%.6f", epoch, num_epochs, mean_loss)
|
|
170
|
-
|
|
171
|
-
@property
|
|
172
|
-
def embeddings(self) -> np.ndarray:
|
|
173
|
-
"""Return the input embedding matrix as a NumPy array."""
|
|
174
|
-
return self.in_emb.weight.detach().cpu().numpy()
|
|
175
204
|
|
|
176
205
|
@property
|
|
177
206
|
def in_embeddings(self) -> np.ndarray:
|
|
178
|
-
|
|
207
|
+
W = self.in_emb.weight.detach().cpu().numpy()
|
|
208
|
+
_logger.debug("In emb shape: %s", W.shape)
|
|
209
|
+
return W
|
|
179
210
|
|
|
180
211
|
@property
|
|
181
212
|
def out_embeddings(self) -> np.ndarray:
|
|
182
|
-
|
|
213
|
+
W = self.out_emb.weight.detach().cpu().numpy()
|
|
214
|
+
_logger.debug("Out emb shape: %s", W.shape)
|
|
215
|
+
return W
|
|
183
216
|
|
|
184
217
|
@property
|
|
185
218
|
def avg_embeddings(self) -> np.ndarray:
|
|
@@ -192,20 +225,37 @@ class SGNS_Torch:
|
|
|
192
225
|
return self
|
|
193
226
|
|
|
194
227
|
class SG_Torch:
|
|
228
|
+
"""PyTorch implementation of Skip-Gram."""
|
|
195
229
|
|
|
196
230
|
def __init__(self,
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
231
|
+
V: int,
|
|
232
|
+
D: int,
|
|
233
|
+
*,
|
|
200
234
|
seed: int | None = None,
|
|
201
|
-
optim: Type[Optimizer],
|
|
202
|
-
optim_kwargs: dict,
|
|
235
|
+
optim: Type[Optimizer] = torch.optim.SGD,
|
|
236
|
+
optim_kwargs: dict | None = None,
|
|
203
237
|
lr_sched: Type[LRScheduler] | None = None,
|
|
204
238
|
lr_sched_kwargs: dict | None = None,
|
|
205
239
|
device: str | None = None):
|
|
240
|
+
"""Initialize the plain Skip-Gram (full softmax) model in PyTorch.
|
|
206
241
|
|
|
207
|
-
|
|
208
|
-
|
|
242
|
+
Args:
|
|
243
|
+
V: Vocabulary size (number of nodes/tokens).
|
|
244
|
+
D: Embedding dimensionality.
|
|
245
|
+
seed: Optional RNG seed for reproducibility.
|
|
246
|
+
optim: Optimizer class to instantiate. Defaults to :class:`torch.optim.SGD`.
|
|
247
|
+
optim_kwargs: Keyword args for the optimizer. Defaults to ``{"lr": 0.1}``.
|
|
248
|
+
lr_sched: Optional learning-rate scheduler class.
|
|
249
|
+
lr_sched_kwargs: Keyword args for the scheduler (required if ``lr_sched`` is provided).
|
|
250
|
+
device: Target device string (e.g., ``"cuda"``). Defaults to CUDA if available, else CPU.
|
|
251
|
+
|
|
252
|
+
Notes:
|
|
253
|
+
The encoder/decoder are linear layers acting on one-hot centers:
|
|
254
|
+
• ``in_emb = nn.Linear(V, D)``
|
|
255
|
+
• ``out_emb = nn.Linear(D, V)``
|
|
256
|
+
Forward pass produces vocabulary-sized logits and is trained with CrossEntropyLoss.
|
|
257
|
+
"""
|
|
258
|
+
optim_kwargs = optim_kwargs or {"lr": 0.1}
|
|
209
259
|
if lr_sched is not None and lr_sched_kwargs is None:
|
|
210
260
|
raise ValueError("lr_sched_kwargs required when lr_sched is provided")
|
|
211
261
|
|
|
@@ -225,7 +275,6 @@ class SG_Torch:
|
|
|
225
275
|
_logger.info("SG_Torch init: V=%d D=%d device=%s seed=%s", self.V, self.D, self.device, seed)
|
|
226
276
|
|
|
227
277
|
params = list(self.in_emb.parameters()) + list(self.out_emb.parameters())
|
|
228
|
-
|
|
229
278
|
# optimizer / scheduler
|
|
230
279
|
self.opt = optim(params=params, **optim_kwargs)
|
|
231
280
|
self.lr_sched = lr_sched(self.opt, **lr_sched_kwargs) if lr_sched is not None else None
|
|
@@ -288,11 +337,15 @@ class SG_Torch:
|
|
|
288
337
|
|
|
289
338
|
@property
|
|
290
339
|
def in_embeddings(self) -> np.ndarray:
|
|
291
|
-
|
|
340
|
+
W = self.in_emb.weight.detach().T.cpu().numpy()
|
|
341
|
+
_logger.debug("In emb shape: %s", W.shape)
|
|
342
|
+
return W
|
|
292
343
|
|
|
293
344
|
@property
|
|
294
345
|
def out_embeddings(self) -> np.ndarray:
|
|
295
|
-
|
|
346
|
+
W = self.out_emb.weight.detach().cpu().numpy()
|
|
347
|
+
_logger.debug("Out emb shape: %s", W.shape)
|
|
348
|
+
return W
|
|
296
349
|
|
|
297
350
|
@property
|
|
298
351
|
def avg_embeddings(self) -> np.ndarray:
|