sawnergy 1.0.7__py3-none-any.whl → 1.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sawnergy might be problematic. Click here for more details.

@@ -6,7 +6,7 @@ from pureml.machinery import Tensor
6
6
  from pureml.layers import Embedding, Affine
7
7
  from pureml.losses import BCE, CCE
8
8
  from pureml.general_math import sum as t_sum
9
- from pureml.optimizers import Optim, LRScheduler
9
+ from pureml.optimizers import Optim, LRScheduler, SGD
10
10
  from pureml.training_utils import TensorDataset, DataLoader, one_hot
11
11
  from pureml.base import NN
12
12
 
@@ -32,8 +32,8 @@ class SGNS_PureML(NN):
32
32
  D: int,
33
33
  *,
34
34
  seed: int | None = None,
35
- optim: Type[Optim],
36
- optim_kwargs: dict,
35
+ optim: Type[Optim] = SGD,
36
+ optim_kwargs: dict | None = None,
37
37
  lr_sched: Type[LRScheduler] | None = None,
38
38
  lr_sched_kwargs: dict | None = None,
39
39
  device: str | None = None):
@@ -42,15 +42,15 @@ class SGNS_PureML(NN):
42
42
  V: Vocabulary size (number of nodes).
43
43
  D: Embedding dimensionality.
44
44
  seed: Optional RNG seed for negative sampling.
45
- optim: Optimizer class to instantiate.
46
- optim_kwargs: Keyword arguments for the optimizer (required).
45
+ optim: Optimizer class to instantiate. Defaults to plain SGD.
46
+ optim_kwargs: Keyword arguments for the optimizer. Defaults to {"lr": 0.1}.
47
47
  lr_sched: Optional learning-rate scheduler class.
48
48
  lr_sched_kwargs: Keyword arguments for the scheduler (required if lr_sched is provided).
49
49
  device: Target device string (e.g. "cuda"); accepted for API parity, ignored by PureML.
50
50
  """
51
51
 
52
- if optim_kwargs is None:
53
- raise ValueError("optim_kwargs must be provided")
52
+ optim_kwargs = optim_kwargs or {"lr": 0.1}
53
+
54
54
  if lr_sched is not None and lr_sched_kwargs is None:
55
55
  raise ValueError("lr_sched_kwargs required when lr_sched is provided")
56
56
 
@@ -147,7 +147,7 @@ class SGNS_PureML(NN):
147
147
  K = int(neg.data.shape[1])
148
148
  loss = (
149
149
  BCE(y_pos, x_pos_logits, from_logits=True)
150
- + K*BCE(y_neg, x_neg_logits, from_logits=True)
150
+ + Tensor(K)*BCE(y_neg, x_neg_logits, from_logits=True)
151
151
  )
152
152
 
153
153
  self.optim.zero_grad()
@@ -176,7 +176,9 @@ class SGNS_PureML(NN):
176
176
  "Wrong embedding matrix shape: "
177
177
  "self.in_emb.parameters[0].shape != (V, D)"
178
178
  )
179
- return W.numpy(copy=True, readonly=True)
179
+ arr = W.numpy(copy=True, readonly=True) # (V, D)
180
+ _logger.debug("In emb shape: %s", arr.shape)
181
+ return arr
180
182
 
181
183
  @property
182
184
  def out_embeddings(self) -> np.ndarray:
@@ -186,7 +188,9 @@ class SGNS_PureML(NN):
186
188
  "Wrong embedding matrix shape: "
187
189
  "self.out_emb.parameters[0].shape != (V, D)"
188
190
  )
189
- return W.numpy(copy=True, readonly=True)
191
+ arr = W.numpy(copy=True, readonly=True) # (V, D)
192
+ _logger.debug("Out emb shape: %s", arr.shape)
193
+ return arr
190
194
 
191
195
  @property
192
196
  def avg_embeddings(self) -> np.ndarray:
@@ -208,37 +212,29 @@ class SG_PureML(NN):
208
212
  """
209
213
 
210
214
  def __init__(self,
211
- V: int,
212
- D: int,
213
- *,
214
- seed: int | None = None,
215
- optim: Type[Optim],
216
- optim_kwargs: dict,
217
- lr_sched: Type[LRScheduler] | None = None,
218
- lr_sched_kwargs: dict | None = None,
219
- device: str | None = None):
215
+ V: int,
216
+ D: int,
217
+ *,
218
+ seed: int | None = None,
219
+ optim: Type[Optim] = SGD,
220
+ optim_kwargs: dict | None = None,
221
+ lr_sched: Type[LRScheduler] | None = None,
222
+ lr_sched_kwargs: dict | None = None,
223
+ device: str | None = None):
220
224
  """Initialize the plain Skip-Gram model (full softmax).
221
225
 
222
226
  Args:
223
227
  V: Vocabulary size (number of nodes/tokens).
224
228
  D: Embedding dimensionality.
225
229
  seed: Optional RNG seed (kept for API parity; not used in layer init).
226
- optim: Optimizer class to instantiate (e.g., `Adam`, `SGD`).
227
- optim_kwargs: Keyword arguments passed to the optimizer constructor.
230
+ optim: Optimizer class to instantiate. Defaults to plain SGD.
231
+ optim_kwargs: Keyword arguments for the optimizer. Defaults to {"lr": 0.1}.
228
232
  lr_sched: Optional learning-rate scheduler class.
229
- lr_sched_kwargs: Keyword arguments for the scheduler
230
- (required if `lr_sched` is provided).
231
- device: Device string (e.g., `"cuda"`). Accepted for parity, ignored
232
- by PureML (CPU-only).
233
-
234
- Notes:
235
- The encoder/decoder are implemented as:
236
- • `in_emb = Affine(V, D)` (acts on a one-hot center index)
237
- • `out_emb = Affine(D, V)`
238
- so forward pass produces vocabulary-sized logits.
233
+ lr_sched_kwargs: Keyword arguments for the scheduler (required if lr_sched is provided).
234
+ device: Device string (e.g., "cuda"). Accepted for parity, ignored by PureML (CPU-only).
239
235
  """
240
- if optim_kwargs is None:
241
- raise ValueError("optim_kwargs must be provided")
236
+
237
+ optim_kwargs = optim_kwargs or {"lr": 0.1}
242
238
  if lr_sched is not None and lr_sched_kwargs is None:
243
239
  raise ValueError("lr_sched_kwargs required when lr_sched is provided")
244
240
 
@@ -249,9 +245,7 @@ class SG_PureML(NN):
249
245
  self.out_emb = Affine(self.D, self.V)
250
246
 
251
247
  self.seed = None if seed is None else int(seed)
252
-
253
- # API compatibility: PureML is CPU-only
254
- self.device = "cpu"
248
+ self.device = "cpu" # API parity
255
249
 
256
250
  # optimizer / scheduler
257
251
  self.optim: Optim = optim(self.parameters, **optim_kwargs)
@@ -344,7 +338,9 @@ class SG_PureML(NN):
344
338
  "Wrong embedding matrix shape: "
345
339
  "self.in_emb.parameters[0].shape != (V, D)"
346
340
  )
347
- return W.numpy(copy=True, readonly=True) # (V, D)
341
+ arr = W.numpy(copy=True, readonly=True) # (V, D)
342
+ _logger.debug("In emb shape: %s", arr.shape)
343
+ return arr
348
344
 
349
345
  @property
350
346
  def out_embeddings(self) -> np.ndarray:
@@ -356,7 +352,9 @@ class SG_PureML(NN):
356
352
  "Wrong embedding matrix shape: "
357
353
  "self.out_emb.parameters[0].shape != (D, V)"
358
354
  )
359
- return W.numpy(copy=True, readonly=True).T # (V, D)
355
+ arr = W.numpy(copy=True, readonly=True).T # (V, D)
356
+ _logger.debug("Out emb shape: %s", arr.shape)
357
+ return arr
360
358
 
361
359
  @property
362
360
  def avg_embeddings(self) -> np.ndarray:
@@ -10,6 +10,7 @@ from torch.optim.lr_scheduler import LRScheduler
10
10
  # built-in
11
11
  import logging
12
12
  from typing import Type
13
+ import warnings
13
14
 
14
15
  # *----------------------------------------------------*
15
16
  # GLOBALS
@@ -22,31 +23,64 @@ _logger = logging.getLogger(__name__)
22
23
  # *----------------------------------------------------*
23
24
 
24
25
  class SGNS_Torch:
25
- """PyTorch implementation of Skip-Gram with Negative Sampling."""
26
+ """PyTorch implementation of Skip-Gram with Negative Sampling.
27
+
28
+ DEPRECATED (temporary): This class currently produces noisy embeddings in
29
+ practice and is deprecated until further notice. The issue likely stems from
30
+ weight initialization, although the root cause has not yet been determined.
31
+
32
+ Prefer one of the following alternatives:
33
+ • Plain PyTorch Skip-Gram (full softmax): `SG_Torch`
34
+ • PureML-based implementations: `SGNS_PureML` or `SG_PureML` (if available)
35
+
36
+ This API may change or be removed once the root cause is resolved.
37
+ """
26
38
 
27
39
  def __init__(self,
28
- V: int,
29
- D: int,
30
- *,
40
+ V: int,
41
+ D: int,
42
+ *,
31
43
  seed: int | None = None,
32
- optim: Type[Optimizer],
33
- optim_kwargs: dict,
44
+ optim: Type[Optimizer] = torch.optim.SGD,
45
+ optim_kwargs: dict | None = None,
34
46
  lr_sched: Type[LRScheduler] | None = None,
35
47
  lr_sched_kwargs: dict | None = None,
36
48
  device: str | None = None):
37
- """
49
+ """Initialize SGNS (negative sampling) in PyTorch.
50
+
51
+ DEPRECATION WARNING:
52
+ This implementation is temporarily deprecated for producing noisy
53
+ embeddings. The issue likely stems from weight initialization, though
54
+ the exact root cause has not been conclusively determined. Please use
55
+ `SG_Torch` (plain Skip-Gram with full softmax) or the PureML-based
56
+ `SGNS_PureML` / `SG_PureML` models instead.
57
+
38
58
  Args:
39
59
  V: Vocabulary size (number of nodes).
40
60
  D: Embedding dimensionality.
41
61
  seed: Optional RNG seed for PyTorch.
42
- optim: Optimizer class to instantiate.
43
- optim_kwargs: Keyword arguments for the optimizer.
62
+ optim: Optimizer class to instantiate. Defaults to plain SGD.
63
+ optim_kwargs: Keyword arguments for the optimizer. Defaults to {"lr": 0.1}.
44
64
  lr_sched: Optional learning-rate scheduler class.
45
65
  lr_sched_kwargs: Keyword arguments for the scheduler (required if lr_sched is provided).
46
- device: Target device string (e.g. ``"cuda"``). Defaults to CUDA if available, else CPU.
66
+ device: Target device string (e.g. "cuda"). Defaults to CUDA if available, else CPU.
47
67
  """
48
- if optim_kwargs is None:
49
- raise ValueError("optim_kwargs must be provided")
68
+
69
+ # --- runtime deprecation notice ---
70
+ warnings.warn(
71
+ "SGNS_Torch is temporarily deprecated: it currently produces noisy "
72
+ "embeddings (likely due to weight initialization). Use SG_Torch "
73
+ "(plain Skip-Gram, full softmax) or the PureML-based SG/SGNS classes.",
74
+ DeprecationWarning,
75
+ stacklevel=2,
76
+ )
77
+ _logger.warning(
78
+ "DEPRECATED: SGNS_Torch currently produces noisy embeddings "
79
+ "(likely weight initialization). Prefer SG_Torch or PureML SG/SGNS."
80
+ )
81
+ # ----------------------------------
82
+
83
+ optim_kwargs = optim_kwargs or {"lr": 0.1}
50
84
  if lr_sched is not None and lr_sched_kwargs is None:
51
85
  raise ValueError("lr_sched_kwargs required when lr_sched is provided")
52
86
 
@@ -167,19 +201,18 @@ class SGNS_Torch:
167
201
 
168
202
  mean_loss = epoch_loss / max(batches, 1)
169
203
  _logger.info("Epoch %d/%d mean_loss=%.6f", epoch, num_epochs, mean_loss)
170
-
171
- @property
172
- def embeddings(self) -> np.ndarray:
173
- """Return the input embedding matrix as a NumPy array."""
174
- return self.in_emb.weight.detach().cpu().numpy()
175
204
 
176
205
  @property
177
206
  def in_embeddings(self) -> np.ndarray:
178
- return self.in_emb.weight.detach().cpu().numpy()
207
+ W = self.in_emb.weight.detach().cpu().numpy()
208
+ _logger.debug("In emb shape: %s", W.shape)
209
+ return W
179
210
 
180
211
  @property
181
212
  def out_embeddings(self) -> np.ndarray:
182
- return self.out_emb.weight.detach().cpu().numpy()
213
+ W = self.out_emb.weight.detach().cpu().numpy()
214
+ _logger.debug("Out emb shape: %s", W.shape)
215
+ return W
183
216
 
184
217
  @property
185
218
  def avg_embeddings(self) -> np.ndarray:
@@ -192,20 +225,37 @@ class SGNS_Torch:
192
225
  return self
193
226
 
194
227
  class SG_Torch:
228
+ """PyTorch implementation of Skip-Gram."""
195
229
 
196
230
  def __init__(self,
197
- V: int,
198
- D: int,
199
- *,
231
+ V: int,
232
+ D: int,
233
+ *,
200
234
  seed: int | None = None,
201
- optim: Type[Optimizer],
202
- optim_kwargs: dict,
235
+ optim: Type[Optimizer] = torch.optim.SGD,
236
+ optim_kwargs: dict | None = None,
203
237
  lr_sched: Type[LRScheduler] | None = None,
204
238
  lr_sched_kwargs: dict | None = None,
205
239
  device: str | None = None):
240
+ """Initialize the plain Skip-Gram (full softmax) model in PyTorch.
206
241
 
207
- if optim_kwargs is None:
208
- raise ValueError("optim_kwargs must be provided")
242
+ Args:
243
+ V: Vocabulary size (number of nodes/tokens).
244
+ D: Embedding dimensionality.
245
+ seed: Optional RNG seed for reproducibility.
246
+ optim: Optimizer class to instantiate. Defaults to :class:`torch.optim.SGD`.
247
+ optim_kwargs: Keyword args for the optimizer. Defaults to ``{"lr": 0.1}``.
248
+ lr_sched: Optional learning-rate scheduler class.
249
+ lr_sched_kwargs: Keyword args for the scheduler (required if ``lr_sched`` is provided).
250
+ device: Target device string (e.g., ``"cuda"``). Defaults to CUDA if available, else CPU.
251
+
252
+ Notes:
253
+ The encoder/decoder are linear layers acting on one-hot centers:
254
+ • ``in_emb = nn.Linear(V, D)``
255
+ • ``out_emb = nn.Linear(D, V)``
256
+ Forward pass produces vocabulary-sized logits and is trained with CrossEntropyLoss.
257
+ """
258
+ optim_kwargs = optim_kwargs or {"lr": 0.1}
209
259
  if lr_sched is not None and lr_sched_kwargs is None:
210
260
  raise ValueError("lr_sched_kwargs required when lr_sched is provided")
211
261
 
@@ -225,7 +275,6 @@ class SG_Torch:
225
275
  _logger.info("SG_Torch init: V=%d D=%d device=%s seed=%s", self.V, self.D, self.device, seed)
226
276
 
227
277
  params = list(self.in_emb.parameters()) + list(self.out_emb.parameters())
228
-
229
278
  # optimizer / scheduler
230
279
  self.opt = optim(params=params, **optim_kwargs)
231
280
  self.lr_sched = lr_sched(self.opt, **lr_sched_kwargs) if lr_sched is not None else None
@@ -288,11 +337,15 @@ class SG_Torch:
288
337
 
289
338
  @property
290
339
  def in_embeddings(self) -> np.ndarray:
291
- return self.in_emb.weight.detach().T.cpu().numpy()
340
+ W = self.in_emb.weight.detach().T.cpu().numpy()
341
+ _logger.debug("In emb shape: %s", W.shape)
342
+ return W
292
343
 
293
344
  @property
294
345
  def out_embeddings(self) -> np.ndarray:
295
- return self.out_emb.weight.detach().cpu().numpy()
346
+ W = self.out_emb.weight.detach().cpu().numpy()
347
+ _logger.debug("Out emb shape: %s", W.shape)
348
+ return W
296
349
 
297
350
  @property
298
351
  def avg_embeddings(self) -> np.ndarray: