PyPI - sawnergy - Versions diffs - 1.0.7__py3-none-any.whl → 1.0.8__py3-none-any.whl - Mend - Supply Chain Defender

sawnergy 1.0.7py3-none-any.whl → 1.0.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of sawnergy might be problematic. Click here for more details.

Files changed (10) hide show

sawnergy/embedding/SGNS_pml.py CHANGED Viewed

@@ -6,7 +6,7 @@ from pureml.machinery import Tensor
 from pureml.layers import Embedding, Affine
 from pureml.losses import BCE, CCE
 from pureml.general_math import sum as t_sum
-from pureml.optimizers import Optim, LRScheduler
+from pureml.optimizers import Optim, LRScheduler, SGD
 from pureml.training_utils import TensorDataset, DataLoader, one_hot
 from pureml.base import NN
@@ -32,8 +32,8 @@ class SGNS_PureML(NN):
                 D: int,
                 *,
                 seed: int | None = None,
-                optim: Type[Optim],
-                optim_kwargs: dict,
+                optim: Type[Optim] = SGD,
+                optim_kwargs: dict | None = None,
                 lr_sched: Type[LRScheduler] | None = None,
                 lr_sched_kwargs: dict | None = None,
                 device: str | None = None):
@@ -42,15 +42,15 @@ class SGNS_PureML(NN):
             V: Vocabulary size (number of nodes).
             D: Embedding dimensionality.
             seed: Optional RNG seed for negative sampling.
-            optim: Optimizer class to instantiate.
-            optim_kwargs: Keyword arguments for the optimizer (required).
+            optim: Optimizer class to instantiate. Defaults to plain SGD.
+            optim_kwargs: Keyword arguments for the optimizer. Defaults to {"lr": 0.1}.
             lr_sched: Optional learning-rate scheduler class.
             lr_sched_kwargs: Keyword arguments for the scheduler (required if lr_sched is provided).
             device: Target device string (e.g. "cuda"); accepted for API parity, ignored by PureML.
         """
-        if optim_kwargs is None:
-            raise ValueError("optim_kwargs must be provided")
+        optim_kwargs = optim_kwargs or {"lr": 0.1}
         if lr_sched is not None and lr_sched_kwargs is None:
             raise ValueError("lr_sched_kwargs required when lr_sched is provided")
@@ -147,7 +147,7 @@ class SGNS_PureML(NN):
                 K = int(neg.data.shape[1])
                 loss = (
                     BCE(y_pos, x_pos_logits, from_logits=True)
-                    + K*BCE(y_neg, x_neg_logits, from_logits=True)
+                    + Tensor(K)*BCE(y_neg, x_neg_logits, from_logits=True)
                 )
                 self.optim.zero_grad()
@@ -176,7 +176,9 @@ class SGNS_PureML(NN):
                 "Wrong embedding matrix shape: "
                 "self.in_emb.parameters[0].shape != (V, D)"
             )
-        return W.numpy(copy=True, readonly=True)
+        arr = W.numpy(copy=True, readonly=True)  # (V, D)
+        _logger.debug("In emb shape: %s", arr.shape)
+        return arr
     @property
     def out_embeddings(self) -> np.ndarray:
@@ -186,7 +188,9 @@ class SGNS_PureML(NN):
                 "Wrong embedding matrix shape: "
                 "self.out_emb.parameters[0].shape != (V, D)"
             )
-        return W.numpy(copy=True, readonly=True)
+        arr = W.numpy(copy=True, readonly=True)  # (V, D)
+        _logger.debug("Out emb shape: %s", arr.shape)
+        return arr
     @property
     def avg_embeddings(self) -> np.ndarray:
@@ -208,37 +212,29 @@ class SG_PureML(NN):
     """
     def __init__(self,
-                 V: int,
-                 D: int,
-                 *,
-                 seed: int | None = None,
-                 optim: Type[Optim],
-                 optim_kwargs: dict,
-                 lr_sched: Type[LRScheduler] | None = None,
-                 lr_sched_kwargs: dict | None = None,
-                 device: str | None = None):
+                V: int,
+                D: int,
+                *,
+                seed: int | None = None,
+                optim: Type[Optim] = SGD,
+                optim_kwargs: dict | None = None,
+                lr_sched: Type[LRScheduler] | None = None,
+                lr_sched_kwargs: dict | None = None,
+                device: str | None = None):
         """Initialize the plain Skip-Gram model (full softmax).
         Args:
             V: Vocabulary size (number of nodes/tokens).
             D: Embedding dimensionality.
             seed: Optional RNG seed (kept for API parity; not used in layer init).
-            optim: Optimizer class to instantiate (e.g., `Adam`, `SGD`).
-            optim_kwargs: Keyword arguments passed to the optimizer constructor.
+            optim: Optimizer class to instantiate. Defaults to plain SGD.
+            optim_kwargs: Keyword arguments for the optimizer. Defaults to {"lr": 0.1}.
             lr_sched: Optional learning-rate scheduler class.
-            lr_sched_kwargs: Keyword arguments for the scheduler
-                (required if `lr_sched` is provided).
-            device: Device string (e.g., `"cuda"`). Accepted for parity, ignored
-                by PureML (CPU-only).
-        Notes:
-            The encoder/decoder are implemented as:
-              • `in_emb = Affine(V, D)` (acts on a one-hot center index)
-              • `out_emb = Affine(D, V)`
-            so forward pass produces vocabulary-sized logits.
+            lr_sched_kwargs: Keyword arguments for the scheduler (required if lr_sched is provided).
+            device: Device string (e.g., "cuda"). Accepted for parity, ignored by PureML (CPU-only).
         """
-        if optim_kwargs is None:
-            raise ValueError("optim_kwargs must be provided")
+        optim_kwargs = optim_kwargs or {"lr": 0.1}
         if lr_sched is not None and lr_sched_kwargs is None:
             raise ValueError("lr_sched_kwargs required when lr_sched is provided")
@@ -249,9 +245,7 @@ class SG_PureML(NN):
         self.out_emb = Affine(self.D, self.V)
         self.seed = None if seed is None else int(seed)
-        # API compatibility: PureML is CPU-only
-        self.device = "cpu"
+        self.device = "cpu"  # API parity
         # optimizer / scheduler
         self.optim: Optim = optim(self.parameters, **optim_kwargs)
@@ -344,7 +338,9 @@ class SG_PureML(NN):
                 "Wrong embedding matrix shape: "
                 "self.in_emb.parameters[0].shape != (V, D)"
             )
-        return W.numpy(copy=True, readonly=True)   # (V, D)
+        arr = W.numpy(copy=True, readonly=True)    # (V, D)
+        _logger.debug("In emb shape: %s", arr.shape)
+        return arr
     @property
     def out_embeddings(self) -> np.ndarray:
@@ -356,7 +352,9 @@ class SG_PureML(NN):
                 "Wrong embedding matrix shape: "
                 "self.out_emb.parameters[0].shape != (D, V)"
             )
-        return W.numpy(copy=True, readonly=True).T # (V, D)
+        arr = W.numpy(copy=True, readonly=True).T  # (V, D)
+        _logger.debug("Out emb shape: %s", arr.shape)
+        return arr
     @property
     def avg_embeddings(self) -> np.ndarray:

sawnergy/embedding/SGNS_torch.py CHANGED Viewed

@@ -10,6 +10,7 @@ from torch.optim.lr_scheduler import LRScheduler
 # built-in
 import logging
 from typing import Type
+import warnings
 # *----------------------------------------------------*
 #                        GLOBALS
@@ -22,31 +23,64 @@ _logger = logging.getLogger(__name__)
 # *----------------------------------------------------*
 class SGNS_Torch:
-    """PyTorch implementation of Skip-Gram with Negative Sampling."""
+    """PyTorch implementation of Skip-Gram with Negative Sampling.
+    DEPRECATED (temporary): This class currently produces noisy embeddings in
+    practice and is deprecated until further notice. The issue likely stems from
+    weight initialization, although the root cause has not yet been determined.
+    Prefer one of the following alternatives:
+      • Plain PyTorch Skip-Gram (full softmax): `SG_Torch`
+      • PureML-based implementations: `SGNS_PureML` or `SG_PureML` (if available)
+    This API may change or be removed once the root cause is resolved.
+    """
     def __init__(self,
-                 V: int,
-                 D: int,
-                 *,
+                V: int,
+                D: int,
+                *,
                 seed: int | None = None,
-                optim: Type[Optimizer],
-                optim_kwargs: dict,
+                optim: Type[Optimizer] = torch.optim.SGD,
+                optim_kwargs: dict | None = None,
                 lr_sched: Type[LRScheduler] | None = None,
                 lr_sched_kwargs: dict | None = None,
                 device: str | None = None):
-        """
+        """Initialize SGNS (negative sampling) in PyTorch.
+        DEPRECATION WARNING:
+            This implementation is temporarily deprecated for producing noisy
+            embeddings. The issue likely stems from weight initialization, though
+            the exact root cause has not been conclusively determined. Please use
+            `SG_Torch` (plain Skip-Gram with full softmax) or the PureML-based
+            `SGNS_PureML` / `SG_PureML` models instead.
         Args:
             V: Vocabulary size (number of nodes).
             D: Embedding dimensionality.
             seed: Optional RNG seed for PyTorch.
-            optim: Optimizer class to instantiate.
-            optim_kwargs: Keyword arguments for the optimizer.
+            optim: Optimizer class to instantiate. Defaults to plain SGD.
+            optim_kwargs: Keyword arguments for the optimizer. Defaults to {"lr": 0.1}.
             lr_sched: Optional learning-rate scheduler class.
             lr_sched_kwargs: Keyword arguments for the scheduler (required if lr_sched is provided).
-            device: Target device string (e.g. ``"cuda"``). Defaults to CUDA if available, else CPU.
+            device: Target device string (e.g. "cuda"). Defaults to CUDA if available, else CPU.
         """
-        if optim_kwargs is None:
-            raise ValueError("optim_kwargs must be provided")
+        # --- runtime deprecation notice ---
+        warnings.warn(
+            "SGNS_Torch is temporarily deprecated: it currently produces noisy "
+            "embeddings (likely due to weight initialization). Use SG_Torch "
+            "(plain Skip-Gram, full softmax) or the PureML-based SG/SGNS classes.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        _logger.warning(
+            "DEPRECATED: SGNS_Torch currently produces noisy embeddings "
+            "(likely weight initialization). Prefer SG_Torch or PureML SG/SGNS."
+        )
+        # ----------------------------------
+        optim_kwargs = optim_kwargs or {"lr": 0.1}
         if lr_sched is not None and lr_sched_kwargs is None:
             raise ValueError("lr_sched_kwargs required when lr_sched is provided")
@@ -167,19 +201,18 @@ class SGNS_Torch:
             mean_loss = epoch_loss / max(batches, 1)
             _logger.info("Epoch %d/%d mean_loss=%.6f", epoch, num_epochs, mean_loss)
-    @property
-    def embeddings(self) -> np.ndarray:
-        """Return the input embedding matrix as a NumPy array."""
-        return self.in_emb.weight.detach().cpu().numpy()
     @property
     def in_embeddings(self) -> np.ndarray:
-        return self.in_emb.weight.detach().cpu().numpy()
+        W = self.in_emb.weight.detach().cpu().numpy()
+        _logger.debug("In emb shape: %s", W.shape)
+        return W
     @property
     def out_embeddings(self) -> np.ndarray:
-        return self.out_emb.weight.detach().cpu().numpy()
+        W = self.out_emb.weight.detach().cpu().numpy()
+        _logger.debug("Out emb shape: %s", W.shape)
+        return W
     @property
     def avg_embeddings(self) -> np.ndarray:
@@ -192,20 +225,37 @@ class SGNS_Torch:
         return self
 class SG_Torch:
+    """PyTorch implementation of Skip-Gram."""
     def __init__(self,
-                 V: int,
-                 D: int,
-                 *,
+                V: int,
+                D: int,
+                *,
                 seed: int | None = None,
-                optim: Type[Optimizer],
-                optim_kwargs: dict,
+                optim: Type[Optimizer] = torch.optim.SGD,
+                optim_kwargs: dict | None = None,
                 lr_sched: Type[LRScheduler] | None = None,
                 lr_sched_kwargs: dict | None = None,
                 device: str | None = None):
+        """Initialize the plain Skip-Gram (full softmax) model in PyTorch.
-        if optim_kwargs is None:
-            raise ValueError("optim_kwargs must be provided")
+        Args:
+            V: Vocabulary size (number of nodes/tokens).
+            D: Embedding dimensionality.
+            seed: Optional RNG seed for reproducibility.
+            optim: Optimizer class to instantiate. Defaults to :class:`torch.optim.SGD`.
+            optim_kwargs: Keyword args for the optimizer. Defaults to ``{"lr": 0.1}``.
+            lr_sched: Optional learning-rate scheduler class.
+            lr_sched_kwargs: Keyword args for the scheduler (required if ``lr_sched`` is provided).
+            device: Target device string (e.g., ``"cuda"``). Defaults to CUDA if available, else CPU.
+        Notes:
+            The encoder/decoder are linear layers acting on one-hot centers:
+            • ``in_emb = nn.Linear(V, D)``
+            • ``out_emb = nn.Linear(D, V)``
+            Forward pass produces vocabulary-sized logits and is trained with CrossEntropyLoss.
+        """
+        optim_kwargs = optim_kwargs or {"lr": 0.1}
         if lr_sched is not None and lr_sched_kwargs is None:
             raise ValueError("lr_sched_kwargs required when lr_sched is provided")
@@ -225,7 +275,6 @@ class SG_Torch:
         _logger.info("SG_Torch init: V=%d D=%d device=%s seed=%s", self.V, self.D, self.device, seed)
         params = list(self.in_emb.parameters()) + list(self.out_emb.parameters())
         # optimizer / scheduler
         self.opt = optim(params=params, **optim_kwargs)
         self.lr_sched = lr_sched(self.opt, **lr_sched_kwargs) if lr_sched is not None else None
@@ -288,11 +337,15 @@ class SG_Torch:
     @property
     def in_embeddings(self) -> np.ndarray:
-        return self.in_emb.weight.detach().T.cpu().numpy()
+        W = self.in_emb.weight.detach().T.cpu().numpy()
+        _logger.debug("In emb shape: %s", W.shape)
+        return W
     @property
     def out_embeddings(self) -> np.ndarray:
-        return self.out_emb.weight.detach().cpu().numpy()
+        W = self.out_emb.weight.detach().cpu().numpy()
+        _logger.debug("Out emb shape: %s", W.shape)
+        return W
     @property
     def avg_embeddings(self) -> np.ndarray: