sawnergy 1.0.5__py3-none-any.whl → 1.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sawnergy might be problematic. Click here for more details.

@@ -3,11 +3,11 @@ from __future__ import annotations
3
3
  # third party
4
4
  import numpy as np
5
5
  from pureml.machinery import Tensor
6
- from pureml.layers import Embedding
7
- from pureml.losses import BCE
6
+ from pureml.layers import Embedding, Affine
7
+ from pureml.losses import BCE, CCE
8
8
  from pureml.general_math import sum as t_sum
9
9
  from pureml.optimizers import Optim, LRScheduler
10
- from pureml.training_utils import TensorDataset, DataLoader
10
+ from pureml.training_utils import TensorDataset, DataLoader, one_hot
11
11
  from pureml.base import NN
12
12
 
13
13
  # built-in
@@ -34,45 +34,73 @@ class SGNS_PureML(NN):
34
34
  seed: int | None = None,
35
35
  optim: Type[Optim],
36
36
  optim_kwargs: dict,
37
- lr_sched: Type[LRScheduler],
38
- lr_sched_kwargs: dict):
37
+ lr_sched: Type[LRScheduler] | None = None,
38
+ lr_sched_kwargs: dict | None = None,
39
+ device: str | None = None):
39
40
  """
40
41
  Args:
41
42
  V: Vocabulary size (number of nodes).
42
43
  D: Embedding dimensionality.
43
44
  seed: Optional RNG seed for negative sampling.
44
- optim: PureML optimizer class.
45
- optim_kwargs: Keyword arguments forwarded to the optimizer.
46
- lr_sched: PureML learning-rate scheduler class.
47
- lr_sched_kwargs: Keyword arguments forwarded to the scheduler.
45
+ optim: Optimizer class to instantiate.
46
+ optim_kwargs: Keyword arguments for the optimizer (required).
47
+ lr_sched: Optional learning-rate scheduler class.
48
+ lr_sched_kwargs: Keyword arguments for the scheduler (required if lr_sched is provided).
49
+ device: Target device string (e.g. "cuda"); accepted for API parity, ignored by PureML.
48
50
  """
51
+
52
+ if optim_kwargs is None:
53
+ raise ValueError("optim_kwargs must be provided")
54
+ if lr_sched is not None and lr_sched_kwargs is None:
55
+ raise ValueError("lr_sched_kwargs required when lr_sched is provided")
56
+
49
57
  self.V, self.D = int(V), int(D)
50
- self.in_emb = Embedding(V, D)
51
- self.out_emb = Embedding(V, D)
52
58
 
59
+ # embeddings
60
+ self.in_emb = Embedding(self.V, self.D)
61
+ self.out_emb = Embedding(self.V, self.D)
62
+
63
+ # seed + RNG for negative sampling
53
64
  self.seed = None if seed is None else int(seed)
54
65
  self._rng = np.random.default_rng(self.seed)
66
+ if self.seed is not None:
67
+ # optional: also set global NumPy seed for any non-RNG paths
68
+ np.random.seed(self.seed)
55
69
 
56
- self.optim: Optim = optim(self.parameters, **optim_kwargs)
57
- self.lr_sched: LRScheduler = lr_sched(**lr_sched_kwargs)
58
- _logger.info("SGNS_PureML init: V=%d D=%d seed=%s", self.V, self.D, self.seed)
70
+ # API compatibility: PureML is CPU-only
71
+ self.device = "cpu"
59
72
 
60
- def _sample_neg(self, B: int, K: int, dist: np.ndarray):
61
- """Draw negative samples according to the provided unigram distribution."""
62
- if dist.ndim != 1 or dist.size != self.V:
63
- raise ValueError(f"noise_dist must be 1-D with length {self.V}; got {dist.shape}")
73
+ # optimizer / scheduler
74
+ self.optim: Optim = optim(self.parameters, **optim_kwargs)
75
+ self.lr_sched: LRScheduler | None = (
76
+ lr_sched(optim=self.optim, **lr_sched_kwargs) if lr_sched is not None else None
77
+ )
78
+
79
+ _logger.info(
80
+ "SGNS_PureML init: V=%d D=%d device=%s seed=%s",
81
+ self.V, self.D, self.device, self.seed
82
+ )
83
+
84
+ def _sample_neg(self, B: int, K: int, dist: np.ndarray) -> np.ndarray:
64
85
  return self._rng.choice(self.V, size=(B, K), replace=True, p=dist)
65
86
 
66
- def predict(self, center: Tensor, pos: Tensor, neg: Tensor) -> Tensor:
67
- """Compute positive/negative logits for SGNS."""
68
- c = self.in_emb(center)
69
- pos_e = self.out_emb(pos)
70
- neg_e = self.out_emb(neg)
71
- pos_logits = t_sum(c * pos_e, axis=-1)
72
- neg_logits = t_sum(c[:, None, :] * neg_e, axis=-1)
73
- # ^^^
74
- # (B,1,D) * (B,K,D) → (B,K,D) → sum D → (B,K)
87
+ def predict(self, center: Tensor, pos: Tensor, neg: Tensor) -> tuple[Tensor, Tensor]:
88
+ """Compute positive/negative logits for SGNS.
75
89
 
90
+ Shapes:
91
+ center: (B,)
92
+ pos: (B,)
93
+ neg: (B, K)
94
+ Returns:
95
+ pos_logits: (B,)
96
+ neg_logits: (B, K)
97
+ """
98
+ c = self.in_emb(center) # (B, D)
99
+ pos_e = self.out_emb(pos) # (B, D)
100
+ neg_e = self.out_emb(neg) # (B, K, D)
101
+
102
+ pos_logits = t_sum(c * pos_e, axis=-1) # (B,)
103
+ neg_logits = t_sum(c[:, None, :] * neg_e, axis=-1) # (B, K)
76
104
  return pos_logits, neg_logits
77
105
 
78
106
  def fit(self,
@@ -89,47 +117,254 @@ class SGNS_PureML(NN):
89
117
  "SGNS_PureML fit: epochs=%d batch=%d negatives=%d shuffle=%s",
90
118
  num_epochs, batch_size, num_negative_samples, shuffle_data
91
119
  )
92
- data = TensorDataset(centers, contexts)
93
120
 
121
+ if noise_dist.ndim != 1 or noise_dist.size != self.V:
122
+ raise ValueError(f"noise_dist must be 1-D with length {self.V}; got {noise_dist.shape}")
123
+ dist = np.asarray(noise_dist, dtype=np.float64)
124
+ if np.any(dist < 0):
125
+ raise ValueError("noise_dist has negative entries")
126
+ s = dist.sum()
127
+ if not np.isfinite(s) or s <= 0:
128
+ raise ValueError("noise_dist must have positive finite sum")
129
+ if abs(s - 1.0) > 1e-6:
130
+ dist = dist / s
131
+
132
+ data = TensorDataset(centers, contexts)
94
133
  for epoch in range(1, num_epochs + 1):
95
134
  epoch_loss = 0.0
96
135
  batches = 0
136
+
97
137
  for cen, pos in DataLoader(data, batch_size=batch_size, shuffle=shuffle_data):
98
- neg = self._sample_neg(batch_size, num_negative_samples, noise_dist)
138
+ B = cen.data.shape[0] if isinstance(cen, Tensor) else len(cen)
99
139
 
140
+ neg_idx_np = self._sample_neg(B, num_negative_samples, dist)
141
+ neg = Tensor(neg_idx_np, requires_grad=False)
100
142
  x_pos_logits, x_neg_logits = self(cen, pos, neg)
101
143
 
102
- y_pos = Tensor(np.ones_like(x_pos_logits.data))
103
- y_neg = Tensor(np.zeros_like(x_neg_logits.data))
144
+ y_pos = Tensor(np.ones_like(x_pos_logits.numpy(copy=False)), requires_grad=False)
145
+ y_neg = Tensor(np.zeros_like(x_neg_logits.numpy(copy=False)), requires_grad=False)
104
146
 
105
- loss = BCE(y_pos, x_pos_logits, from_logits=True) + BCE(y_neg, x_neg_logits, from_logits=True)
147
+ K = int(neg.data.shape[1])
148
+ loss = (
149
+ BCE(y_pos, x_pos_logits, from_logits=True)
150
+ + K*BCE(y_neg, x_neg_logits, from_logits=True)
151
+ )
106
152
 
107
153
  self.optim.zero_grad()
108
154
  loss.backward()
109
155
  self.optim.step()
110
-
111
- if lr_step_per_batch:
156
+
157
+ if lr_step_per_batch and self.lr_sched is not None:
112
158
  self.lr_sched.step()
113
159
 
114
- loss_value = float(np.asarray(loss.data).mean())
160
+ loss_value = float(np.asarray(loss.data))
115
161
  epoch_loss += loss_value
116
162
  batches += 1
117
163
  _logger.debug("Epoch %d batch %d loss=%.6f", epoch, batches, loss_value)
118
164
 
119
- if not lr_step_per_batch:
165
+ if (not lr_step_per_batch) and (self.lr_sched is not None):
120
166
  self.lr_sched.step()
121
167
 
122
168
  mean_loss = epoch_loss / max(batches, 1)
123
169
  _logger.info("Epoch %d/%d mean_loss=%.6f", epoch, num_epochs, mean_loss)
124
170
 
125
171
  @property
126
- def embeddings(self) -> np.ndarray:
127
- """Return the input embedding matrix as a NumPy array."""
128
- W: Tensor = self.in_emb.parameters[0]
129
- return np.asarray(W.data)
172
+ def in_embeddings(self) -> np.ndarray:
173
+ W: Tensor = self.in_emb.parameters[0] # (V, D)
174
+ if W.shape != (self.V, self.D):
175
+ raise RuntimeError(
176
+ "Wrong embedding matrix shape: "
177
+ "self.in_emb.parameters[0].shape != (V, D)"
178
+ )
179
+ return W.numpy(copy=True, readonly=True)
180
+
181
+ @property
182
+ def out_embeddings(self) -> np.ndarray:
183
+ W: Tensor = self.out_emb.parameters[0] # (V, D)
184
+ if W.shape != (self.V, self.D):
185
+ raise RuntimeError(
186
+ "Wrong embedding matrix shape: "
187
+ "self.out_emb.parameters[0].shape != (V, D)"
188
+ )
189
+ return W.numpy(copy=True, readonly=True)
190
+
191
+ @property
192
+ def avg_embeddings(self) -> np.ndarray:
193
+ return 0.5 * (self.in_embeddings + self.out_embeddings)
194
+
195
+ class SG_PureML(NN):
196
+ """Plain Skip-Gram (full softmax) in PureML.
197
+
198
+ Trains two affine layers to emulate the classic Skip-Gram objective with a
199
+ **full** softmax over the vocabulary (no negative sampling):
200
+
201
+ x = one_hot(center, V) # (B, V)
202
+ y = x @ W_in + b_in # (B, D)
203
+ logits = y @ W_out + b_out # (B, V)
204
+ loss = CCE(one_hot(context, V), logits, from_logits=True)
205
+
206
+ The learnable “input” embeddings are the rows of `W_in` (shape `(V, D)`), and
207
+ the “output” embeddings are the rows of `W_outᵀ` (also `(V, D)`).
208
+ """
209
+
210
+ def __init__(self,
211
+ V: int,
212
+ D: int,
213
+ *,
214
+ seed: int | None = None,
215
+ optim: Type[Optim],
216
+ optim_kwargs: dict,
217
+ lr_sched: Type[LRScheduler] | None = None,
218
+ lr_sched_kwargs: dict | None = None,
219
+ device: str | None = None):
220
+ """Initialize the plain Skip-Gram model (full softmax).
221
+
222
+ Args:
223
+ V: Vocabulary size (number of nodes/tokens).
224
+ D: Embedding dimensionality.
225
+ seed: Optional RNG seed (kept for API parity; not used in layer init).
226
+ optim: Optimizer class to instantiate (e.g., `Adam`, `SGD`).
227
+ optim_kwargs: Keyword arguments passed to the optimizer constructor.
228
+ lr_sched: Optional learning-rate scheduler class.
229
+ lr_sched_kwargs: Keyword arguments for the scheduler
230
+ (required if `lr_sched` is provided).
231
+ device: Device string (e.g., `"cuda"`). Accepted for parity, ignored
232
+ by PureML (CPU-only).
233
+
234
+ Notes:
235
+ The encoder/decoder are implemented as:
236
+ • `in_emb = Affine(V, D)` (acts on a one-hot center index)
237
+ • `out_emb = Affine(D, V)`
238
+ so forward pass produces vocabulary-sized logits.
239
+ """
240
+ if optim_kwargs is None:
241
+ raise ValueError("optim_kwargs must be provided")
242
+ if lr_sched is not None and lr_sched_kwargs is None:
243
+ raise ValueError("lr_sched_kwargs required when lr_sched is provided")
244
+
245
+ self.V, self.D = int(V), int(D)
246
+
247
+ # input/output “embedding” projections
248
+ self.in_emb = Affine(self.V, self.D)
249
+ self.out_emb = Affine(self.D, self.V)
250
+
251
+ self.seed = None if seed is None else int(seed)
252
+
253
+ # API compatibility: PureML is CPU-only
254
+ self.device = "cpu"
255
+
256
+ # optimizer / scheduler
257
+ self.optim: Optim = optim(self.parameters, **optim_kwargs)
258
+ self.lr_sched: LRScheduler | None = (
259
+ lr_sched(optim=self.optim, **lr_sched_kwargs) if lr_sched is not None else None
260
+ )
261
+
262
+ _logger.info(
263
+ "SG_PureML init: V=%d D=%d device=%s seed=%s",
264
+ self.V, self.D, self.device, self.seed
265
+ )
266
+
267
+ def predict(self, center: Tensor) -> Tensor:
268
+ """Return vocabulary logits for each center index.
269
+
270
+ Args:
271
+ center: Tensor of center indices with shape `(B,)` and integer dtype.
272
+
273
+ Returns:
274
+ Tensor: Logits over the vocabulary with shape `(B, V)`.
275
+ """
276
+ c = one_hot(dims=self.V, label=center) # (B, V)
277
+ y = self.in_emb(c) # (B, D)
278
+ z = self.out_emb(y) # (B, V)
279
+ return z
280
+
281
+ def fit(self,
282
+ centers: np.ndarray,
283
+ contexts: np.ndarray,
284
+ num_epochs: int,
285
+ batch_size: int,
286
+ shuffle_data: bool,
287
+ lr_step_per_batch: bool,
288
+ **_ignore):
289
+ """Train Skip-Gram with full softmax on center/context pairs.
290
+
291
+ Args:
292
+ centers: Array of center indices, shape `(N,)`, dtype integer in `[0, V)`.
293
+ contexts: Array of context (target) indices, shape `(N,)`, dtype integer.
294
+ num_epochs: Number of passes over the dataset.
295
+ batch_size: Mini-batch size.
296
+ shuffle_data: Whether to shuffle pairs each epoch.
297
+ lr_step_per_batch: If True, call `lr_sched.step()` after every batch
298
+ (when a scheduler is provided). If False, step once per epoch.
299
+ **_ignore: Ignored kwargs for API compatibility with SGNS.
300
+
301
+ Optimization:
302
+ Uses `CCE(one_hot(context), logits, from_logits=True)` where
303
+ `logits = predict(center)`. Scheduler stepping obeys `lr_step_per_batch`.
304
+ """
305
+ _logger.info(
306
+ "SG_PureML fit: epochs=%d batch=%d shuffle=%s",
307
+ num_epochs, batch_size, shuffle_data
308
+ )
309
+ data = TensorDataset(centers, contexts)
310
+
311
+ for epoch in range(1, num_epochs + 1):
312
+ epoch_loss = 0.0
313
+ batches = 0
314
+
315
+ for cen, ctx in DataLoader(data, batch_size=batch_size, shuffle=shuffle_data):
316
+ logits = self(cen) # (B, V)
317
+ y = one_hot(self.V, label=ctx) # (B, V)
318
+ loss = CCE(y, logits, from_logits=True) # scalar
319
+
320
+ self.optim.zero_grad()
321
+ loss.backward()
322
+ self.optim.step()
323
+
324
+ if lr_step_per_batch and self.lr_sched is not None:
325
+ self.lr_sched.step()
326
+
327
+ loss_value = float(np.asarray(loss.data))
328
+ epoch_loss += loss_value
329
+ batches += 1
330
+ _logger.debug("Epoch %d batch %d loss=%.6f", epoch, batches, loss_value)
331
+
332
+ if (not lr_step_per_batch) and (self.lr_sched is not None):
333
+ self.lr_sched.step()
334
+
335
+ mean_loss = epoch_loss / max(batches, 1)
336
+ _logger.info("Epoch %d/%d mean_loss=%.6f", epoch, num_epochs, mean_loss)
337
+
338
+ @property
339
+ def in_embeddings(self) -> np.ndarray:
340
+ """Input embeddings matrix `W_in` as `(V, D)` (copy, read-only)."""
341
+ W = self.in_emb.parameters[0] # (V, D)
342
+ if W.shape != (self.V, self.D):
343
+ raise RuntimeError(
344
+ "Wrong embedding matrix shape: "
345
+ "self.in_emb.parameters[0].shape != (V, D)"
346
+ )
347
+ return W.numpy(copy=True, readonly=True) # (V, D)
348
+
349
+ @property
350
+ def out_embeddings(self) -> np.ndarray:
351
+ """Output embeddings matrix `W_outᵀ` as `(V, D)` (copy, read-only).
352
+ (`out_emb.parameters[0]` is `(D, V)`, so we transpose.)"""
353
+ W = self.out_emb.parameters[0] # (D, V)
354
+ if W.shape != (self.D, self.V):
355
+ raise RuntimeError(
356
+ "Wrong embedding matrix shape: "
357
+ "self.out_emb.parameters[0].shape != (D, V)"
358
+ )
359
+ return W.numpy(copy=True, readonly=True).T # (V, D)
360
+
361
+ @property
362
+ def avg_embeddings(self) -> np.ndarray:
363
+ """Elementwise average of input/output embeddings, shape `(V, D)`."""
364
+ return 0.5 * (self.in_embeddings + self.out_embeddings) # (V, D)
130
365
 
131
366
 
132
- __all__ = ["SGNS_PureML"]
367
+ __all__ = ["SGNS_PureML", "SG_PureML"]
133
368
 
134
369
  if __name__ == "__main__":
135
370
  pass
@@ -42,31 +42,31 @@ class SGNS_Torch:
42
42
  optim: Optimizer class to instantiate.
43
43
  optim_kwargs: Keyword arguments for the optimizer.
44
44
  lr_sched: Optional learning-rate scheduler class.
45
- lr_sched_kwargs: Keyword arguments for the scheduler.
45
+ lr_sched_kwargs: Keyword arguments for the scheduler (required if lr_sched is provided).
46
46
  device: Target device string (e.g. ``"cuda"``). Defaults to CUDA if available, else CPU.
47
47
  """
48
48
  if optim_kwargs is None:
49
49
  raise ValueError("optim_kwargs must be provided")
50
50
  if lr_sched is not None and lr_sched_kwargs is None:
51
51
  raise ValueError("lr_sched_kwargs required when lr_sched is provided")
52
+
52
53
  self.V, self.D = int(V), int(D)
53
- resolved_device = device if device is not None else ("cuda" if torch.cuda.is_available() else "cpu")
54
- self.device = torch.device(resolved_device)
55
- _logger.info("SGNS_Torch init: V=%d D=%d device=%s seed=%s", self.V, self.D, self.device, seed)
54
+ # two embeddings as in/out matrices
55
+ self.in_emb = nn.Embedding(self.V, self.D)
56
+ self.out_emb = nn.Embedding(self.V, self.D)
56
57
 
58
+ resolved_device = device or ("cuda" if torch.cuda.is_available() else "cpu")
59
+ self.device = torch.device(resolved_device)
57
60
  if seed is not None:
58
61
  torch.manual_seed(int(seed))
59
62
  np.random.seed(int(seed))
60
63
  if self.device.type == "cuda":
61
64
  torch.cuda.manual_seed_all(int(seed))
62
65
 
63
- # two embeddings as in/out matrices
64
- self.in_emb = nn.Embedding(self.V, self.D)
65
- self.out_emb = nn.Embedding(self.V, self.D)
66
-
67
66
  self.to(self.device)
68
-
67
+ _logger.info("SGNS_Torch init: V=%d D=%d device=%s seed=%s", self.V, self.D, self.device, seed)
69
68
  params = list(self.in_emb.parameters()) + list(self.out_emb.parameters())
69
+ # optimizer / scheduler
70
70
  self.opt = optim(params=params, **optim_kwargs)
71
71
  self.lr_sched = lr_sched(self.opt, **lr_sched_kwargs) if lr_sched is not None else None
72
72
 
@@ -112,6 +112,15 @@ class SGNS_Torch:
112
112
  idx = np.arange(N)
113
113
 
114
114
  noise_probs = torch.as_tensor(noise_dist, dtype=torch.float32, device=self.device)
115
+ # require normalized, non-negative distribution
116
+ if (not torch.isfinite(noise_probs).all()
117
+ or (noise_probs < 0).any()
118
+ or abs(float(noise_probs.sum().item()) - 1.0) > 1e-6):
119
+ raise ValueError(
120
+ "noise_dist must be non-negative, finite, and sum to 1.0 "
121
+ f"(got sum={float(noise_probs.sum().item()):.6f}, "
122
+ f"min={float(noise_probs.min().item()):.6f})"
123
+ )
115
124
 
116
125
  for epoch in range(1, int(num_epochs) + 1):
117
126
  epoch_loss = 0.0
@@ -140,7 +149,7 @@ class SGNS_Torch:
140
149
  y_neg = torch.zeros_like(neg_logits)
141
150
  loss_neg = bce(neg_logits, y_neg)
142
151
 
143
- loss = loss_pos + loss_neg
152
+ loss = loss_pos + K*loss_neg
144
153
 
145
154
  self.opt.zero_grad(set_to_none=True)
146
155
  loss.backward()
@@ -163,6 +172,131 @@ class SGNS_Torch:
163
172
  def embeddings(self) -> np.ndarray:
164
173
  """Return the input embedding matrix as a NumPy array."""
165
174
  return self.in_emb.weight.detach().cpu().numpy()
175
+
176
+ @property
177
+ def in_embeddings(self) -> np.ndarray:
178
+ return self.in_emb.weight.detach().cpu().numpy()
179
+
180
+ @property
181
+ def out_embeddings(self) -> np.ndarray:
182
+ return self.out_emb.weight.detach().cpu().numpy()
183
+
184
+ @property
185
+ def avg_embeddings(self) -> np.ndarray:
186
+ return 0.5 * (self.in_embeddings + self.out_embeddings)
187
+
188
+ # tiny helper for device move
189
+ def to(self, device):
190
+ self.in_emb.to(device)
191
+ self.out_emb.to(device)
192
+ return self
193
+
194
+ class SG_Torch:
195
+
196
+ def __init__(self,
197
+ V: int,
198
+ D: int,
199
+ *,
200
+ seed: int | None = None,
201
+ optim: Type[Optimizer],
202
+ optim_kwargs: dict,
203
+ lr_sched: Type[LRScheduler] | None = None,
204
+ lr_sched_kwargs: dict | None = None,
205
+ device: str | None = None):
206
+
207
+ if optim_kwargs is None:
208
+ raise ValueError("optim_kwargs must be provided")
209
+ if lr_sched is not None and lr_sched_kwargs is None:
210
+ raise ValueError("lr_sched_kwargs required when lr_sched is provided")
211
+
212
+ self.V, self.D = int(V), int(D)
213
+
214
+ self.in_emb = nn.Linear(self.V, self.D)
215
+ self.out_emb = nn.Linear(self.D, self.V)
216
+
217
+ resolved_device = device or ("cuda" if torch.cuda.is_available() else "cpu")
218
+ self.device = torch.device(resolved_device)
219
+ if seed is not None:
220
+ torch.manual_seed(int(seed))
221
+ np.random.seed(int(seed))
222
+ if self.device.type == "cuda":
223
+ torch.cuda.manual_seed_all(int(seed))
224
+ self.to(self.device)
225
+ _logger.info("SG_Torch init: V=%d D=%d device=%s seed=%s", self.V, self.D, self.device, seed)
226
+
227
+ params = list(self.in_emb.parameters()) + list(self.out_emb.parameters())
228
+
229
+ # optimizer / scheduler
230
+ self.opt = optim(params=params, **optim_kwargs)
231
+ self.lr_sched = lr_sched(self.opt, **lr_sched_kwargs) if lr_sched is not None else None
232
+
233
+ def predict(self, center: torch.Tensor) -> torch.Tensor:
234
+ center = center.to(self.device, dtype=torch.long)
235
+ c = nn.functional.one_hot(center, num_classes=self.V).to(dtype=torch.float32, device=self.device)
236
+ y = self.in_emb(c)
237
+ z = self.out_emb(y)
238
+ return z
239
+
240
+ __call__ = predict
241
+
242
+ def fit(self,
243
+ centers: np.ndarray,
244
+ contexts: np.ndarray,
245
+ num_epochs: int,
246
+ batch_size: int,
247
+ shuffle_data: bool,
248
+ lr_step_per_batch: bool,
249
+ **_ignore):
250
+ cce = nn.CrossEntropyLoss(reduction="mean")
251
+
252
+ N = centers.shape[0]
253
+ idx = np.arange(N)
254
+
255
+ for epoch in range(1, int(num_epochs) + 1):
256
+ epoch_loss = 0.0
257
+ batches = 0
258
+ if shuffle_data:
259
+ np.random.shuffle(idx)
260
+
261
+ for s in range(0, N, int(batch_size)):
262
+ take = idx[s:s+int(batch_size)]
263
+ if take.size == 0:
264
+ continue
265
+
266
+ cen = torch.as_tensor(centers[take], dtype=torch.long, device=self.device)
267
+ ctx = torch.as_tensor(contexts[take], dtype=torch.long, device=self.device)
268
+
269
+ logits = self(cen)
270
+ loss = cce(logits, ctx)
271
+
272
+ self.opt.zero_grad(set_to_none=True)
273
+ loss.backward()
274
+ self.opt.step()
275
+
276
+ if lr_step_per_batch and self.lr_sched is not None:
277
+ self.lr_sched.step()
278
+
279
+ epoch_loss += float(loss.detach().cpu().item())
280
+ batches += 1
281
+ _logger.debug("Epoch %d batch %d loss=%.6f", epoch, batches, loss.item())
282
+
283
+ if not lr_step_per_batch and self.lr_sched is not None:
284
+ self.lr_sched.step()
285
+
286
+ mean_loss = epoch_loss / max(batches, 1)
287
+ _logger.info("Epoch %d/%d mean_loss=%.6f", epoch, num_epochs, mean_loss)
288
+
289
+ @property
290
+ def in_embeddings(self) -> np.ndarray:
291
+ return self.in_emb.weight.detach().T.cpu().numpy()
292
+
293
+ @property
294
+ def out_embeddings(self) -> np.ndarray:
295
+ return self.out_emb.weight.detach().cpu().numpy()
296
+
297
+ @property
298
+ def avg_embeddings(self) -> np.ndarray:
299
+ return 0.5 * (self.in_embeddings + self.out_embeddings)
166
300
 
167
301
  # tiny helper for device move
168
302
  def to(self, device):
@@ -171,7 +305,7 @@ class SGNS_Torch:
171
305
  return self
172
306
 
173
307
 
174
- __all__ = ["SGNS_Torch"]
308
+ __all__ = ["SGNS_Torch", "SG_Torch"]
175
309
 
176
310
  if __name__ == "__main__":
177
311
  pass
@@ -1,6 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  from .embedder import Embedder
4
+ from .visualizer import Visualizer
4
5
 
5
6
  def __getattr__(name: str):
6
7
  """Lazily expose optional backends."""
@@ -14,6 +15,16 @@ def __getattr__(name: str):
14
15
  ) from exc
15
16
  return SGNS_Torch
16
17
 
18
+ if name == "SG_Torch":
19
+ try:
20
+ from .SGNS_torch import SG_Torch
21
+ except Exception as exc:
22
+ raise ImportError(
23
+ "PyTorch backend requested but torch is not installed. "
24
+ "Install PyTorch via `pip install torch` (see https://pytorch.org/get-started)."
25
+ ) from exc
26
+ return SG_Torch
27
+
17
28
  if name == "SGNS_PureML":
18
29
  try:
19
30
  from .SGNS_pml import SGNS_PureML
@@ -24,11 +35,24 @@ def __getattr__(name: str):
24
35
  "Install PureML first via `pip install ym-pure-ml` "
25
36
  ) from exc
26
37
 
38
+ if name == "SG_PureML":
39
+ try:
40
+ from .SGNS_pml import SG_PureML
41
+ return SG_PureML
42
+ except Exception as exc:
43
+ raise ImportError(
44
+ "PureML is not installed. "
45
+ "Install PureML first via `pip install ym-pure-ml` "
46
+ ) from exc
47
+
27
48
  raise AttributeError(name)
28
49
 
29
50
 
30
51
  __all__ = [
31
52
  "Embedder",
53
+ "Visualizer",
32
54
  "SGNS_PureML",
33
55
  "SGNS_Torch",
56
+ "SG_PureML",
57
+ "SG_Torch"
34
58
  ]