npm - @dniskav/neuron - Versions diffs - 0.3.0 → 0.3.1 - Mend

@dniskav/neuron 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/dist/index.mjs CHANGED Viewed

@@ -4736,6 +4736,749 @@ var TCN = class {
   }
 };
+// src/Word2Vec.ts
+var Word2Vec = class {
+  constructor(embeddingDim = 50, options = {}) {
+    this._trained = false;
+    this.embeddingDim = embeddingDim;
+    this._windowSize = options.windowSize ?? 2;
+    this._model = options.model ?? "skipgram";
+    this._minCount = options.minCount ?? 1;
+    this.embeddings = [];
+    this._W2 = [];
+    this.vocab = /* @__PURE__ */ new Map();
+    this._indexToWord = [];
+    this.vocabSize = 0;
+  }
+  // ── buildVocab ─────────────────────────────────────────────────────────────
+  // Scans the corpus, counts word frequencies, discards rare words (< minCount),
+  // and assigns each remaining word a unique integer index.
+  buildVocab(sentences) {
+    const freq = /* @__PURE__ */ new Map();
+    for (const sentence of sentences) {
+      for (const word of sentence) {
+        freq.set(word, (freq.get(word) ?? 0) + 1);
+      }
+    }
+    this.vocab = /* @__PURE__ */ new Map();
+    this._indexToWord = [];
+    for (const [word, count] of freq) {
+      if (count >= this._minCount) {
+        const idx = this._indexToWord.length;
+        this.vocab.set(word, idx);
+        this._indexToWord.push(word);
+      }
+    }
+    this.vocabSize = this._indexToWord.length;
+    if (this.vocabSize === 0) {
+      throw new Error("Word2Vec.buildVocab: vocabulary is empty after applying minCount filter");
+    }
+    const scale1 = Math.sqrt(1 / this.embeddingDim);
+    const scale2 = Math.sqrt(1 / this.vocabSize);
+    this.embeddings = Array.from(
+      { length: this.vocabSize },
+      () => Array.from({ length: this.embeddingDim }, () => (Math.random() * 2 - 1) * scale1)
+    );
+    this._W2 = Array.from(
+      { length: this.embeddingDim },
+      () => Array.from({ length: this.vocabSize }, () => (Math.random() * 2 - 1) * scale2)
+    );
+    this._trained = false;
+  }
+  // ── tokenize ───────────────────────────────────────────────────────────────
+  // Simple tokenizer: lowercase, strip punctuation, split on whitespace.
+  // Returns an array of tokens suitable for buildVocab / train.
+  static tokenize(text) {
+    return text.toLowerCase().replace(/[^a-z0-9\s'-]/g, " ").split(/\s+/).filter((t) => t.length > 0);
+  }
+  // ── train ──────────────────────────────────────────────────────────────────
+  // Runs SGD over all (center, context) pairs in the corpus for `epochs` passes.
+  // Returns the average cross-entropy loss per epoch.
+  //
+  // Note: uses full-vocabulary softmax (not negative sampling) for educational
+  // clarity. This is O(vocabSize) per step — for large vocabularies you would
+  // normally switch to negative sampling or hierarchical softmax.
+  train(sentences, lr = 0.025, epochs = 5) {
+    if (this.vocabSize === 0) this.buildVocab(sentences);
+    const lossHistory = [];
+    for (let epoch = 0; epoch < epochs; epoch++) {
+      let totalLoss = 0;
+      let nPairs = 0;
+      for (const sentence of sentences) {
+        const indices = sentence.map((w) => this.vocab.get(w)).filter((idx) => idx !== void 0);
+        for (let t = 0; t < indices.length; t++) {
+          const centerIdx = indices[t];
+          const contextIndices = [];
+          for (let offset = -this._windowSize; offset <= this._windowSize; offset++) {
+            if (offset === 0) continue;
+            const pos = t + offset;
+            if (pos >= 0 && pos < indices.length) {
+              contextIndices.push(indices[pos]);
+            }
+          }
+          if (contextIndices.length === 0) continue;
+          if (this._model === "skipgram") {
+            for (const contextIdx of contextIndices) {
+              totalLoss += this._skipgramStep(centerIdx, contextIdx, lr);
+              nPairs++;
+            }
+          } else {
+            totalLoss += this._cbowStep(centerIdx, contextIndices, lr);
+            nPairs++;
+          }
+        }
+      }
+      lossHistory.push(nPairs > 0 ? totalLoss / nPairs : 0);
+    }
+    this._trained = true;
+    return lossHistory;
+  }
+  // ── getEmbedding ───────────────────────────────────────────────────────────
+  // Returns the learned embedding vector for a word. Throws if unknown.
+  getEmbedding(word) {
+    const idx = this.vocab.get(word);
+    if (idx === void 0) throw new Error(`Word2Vec: unknown word "${word}"`);
+    return this.embeddings[idx];
+  }
+  // ── similarity ─────────────────────────────────────────────────────────────
+  // Cosine similarity between two words.
+  //   cos(v1, v2) = (v1 · v2) / (‖v1‖ · ‖v2‖)
+  // Returns a value in [-1, 1]. Higher → more similar context usage.
+  similarity(word1, word2) {
+    const v1 = this.getEmbedding(word1);
+    const v2 = this.getEmbedding(word2);
+    return this._cosine(v1, v2);
+  }
+  // ── mostSimilar ────────────────────────────────────────────────────────────
+  // Returns the topK words (excluding `word` itself) sorted by cosine similarity.
+  mostSimilar(word, topK = 10) {
+    const v = this.getEmbedding(word);
+    return this._nearestByVector(v, topK, /* @__PURE__ */ new Set([word]));
+  }
+  // ── analogy ───────────────────────────────────────────────────────────────
+  // Vector arithmetic analogy: positive1 - negative + positive2 ≈ result
+  //
+  //   getAnalogy('king', 'man', 'woman') finds the word closest to
+  //   vec('king') - vec('man') + vec('woman') ≈ vec('queen')
+  //
+  // The result is excluded from the input words so they don't pollute the top-K.
+  analogy(positive1, negative, positive2, topK = 5) {
+    const vPos1 = this.getEmbedding(positive1);
+    const vNeg = this.getEmbedding(negative);
+    const vPos2 = this.getEmbedding(positive2);
+    const target = vPos1.map((v, i) => v - vNeg[i] + vPos2[i]);
+    const exclude = /* @__PURE__ */ new Set([positive1, negative, positive2]);
+    return this._nearestByVector(target, topK, exclude);
+  }
+  // ── Private: skip-gram step ───────────────────────────────────────────────
+  // Forward + backward for one (center, target) pair.
+  // Returns the cross-entropy loss for this pair.
+  _skipgramStep(centerIdx, targetIdx, lr) {
+    const h = this.embeddings[centerIdx];
+    const scores = this._hiddenToScores(h);
+    const probs = _softmax(scores);
+    const loss = -Math.log(probs[targetIdx] + 1e-12);
+    const err = probs.map((p, j) => j === targetIdx ? p - 1 : p);
+    const dh = new Array(this.embeddingDim).fill(0);
+    for (let d = 0; d < this.embeddingDim; d++) {
+      for (let j = 0; j < this.vocabSize; j++) {
+        this._W2[d][j] -= lr * h[d] * err[j];
+        dh[d] += this._W2[d][j] * err[j];
+      }
+    }
+    for (let d = 0; d < this.embeddingDim; d++) {
+      this.embeddings[centerIdx][d] -= lr * dh[d];
+    }
+    return loss;
+  }
+  // ── Private: CBOW step ────────────────────────────────────────────────────
+  // Forward + backward for one (contextIndices → centerIdx) pair.
+  // h is the mean of all context embeddings. The gradient is distributed
+  // equally back to each context word's embedding row.
+  _cbowStep(centerIdx, contextIndices, lr) {
+    const k = contextIndices.length;
+    const h = new Array(this.embeddingDim).fill(0);
+    for (const ci of contextIndices) {
+      for (let d = 0; d < this.embeddingDim; d++) {
+        h[d] += this.embeddings[ci][d];
+      }
+    }
+    for (let d = 0; d < this.embeddingDim; d++) h[d] /= k;
+    const scores = this._hiddenToScores(h);
+    const probs = _softmax(scores);
+    const loss = -Math.log(probs[centerIdx] + 1e-12);
+    const err = probs.map((p, j) => j === centerIdx ? p - 1 : p);
+    const dh = new Array(this.embeddingDim).fill(0);
+    for (let d = 0; d < this.embeddingDim; d++) {
+      for (let j = 0; j < this.vocabSize; j++) {
+        this._W2[d][j] -= lr * h[d] * err[j];
+        dh[d] += this._W2[d][j] * err[j];
+      }
+    }
+    for (const ci of contextIndices) {
+      for (let d = 0; d < this.embeddingDim; d++) {
+        this.embeddings[ci][d] -= lr * dh[d] / k;
+      }
+    }
+    return loss;
+  }
+  // Computes scores = h · W2  →  [vocabSize]
+  _hiddenToScores(h) {
+    const scores = new Array(this.vocabSize).fill(0);
+    for (let d = 0; d < this.embeddingDim; d++) {
+      for (let j = 0; j < this.vocabSize; j++) {
+        scores[j] += h[d] * this._W2[d][j];
+      }
+    }
+    return scores;
+  }
+  // Returns topK words (from all embeddings) sorted by cosine similarity to v,
+  // skipping any word in the exclude set.
+  _nearestByVector(v, topK, exclude) {
+    const results = [];
+    for (let i = 0; i < this.vocabSize; i++) {
+      const w = this._indexToWord[i];
+      if (exclude.has(w)) continue;
+      results.push({ word: w, score: this._cosine(v, this.embeddings[i]) });
+    }
+    results.sort((a, b) => b.score - a.score);
+    return results.slice(0, topK);
+  }
+  // Cosine similarity: (v1 · v2) / (‖v1‖ · ‖v2‖)
+  _cosine(v1, v2) {
+    let dot = 0, n1 = 0, n2 = 0;
+    for (let i = 0; i < v1.length; i++) {
+      dot += v1[i] * v2[i];
+      n1 += v1[i] * v1[i];
+      n2 += v2[i] * v2[i];
+    }
+    const denom = Math.sqrt(n1) * Math.sqrt(n2);
+    return denom < 1e-12 ? 0 : dot / denom;
+  }
+};
+function _softmax(scores) {
+  const max = Math.max(...scores);
+  const exps = scores.map((s) => Math.exp(s - max));
+  const sum = exps.reduce((a, b) => a + b, 0);
+  return exps.map((e) => e / sum);
+}
+// src/TSNE.ts
+var TSNE = class {
+  constructor(options = {}) {
+    // KL divergence tracked during the last fit() call.
+    this._klDivergence = 0;
+    // P matrix stored for kl() reporting.
+    this._P = [];
+    this._nComponents = options.nComponents ?? 2;
+    this._perplexity = options.perplexity ?? 30;
+    this._lr = options.lr ?? 200;
+    this._nIter = options.nIter ?? 1e3;
+    this._seed = options.seed;
+    this.embedding = [];
+  }
+  // ── fit ────────────────────────────────────────────────────────────────────
+  // Runs the full t-SNE algorithm on X (shape [n][d]).
+  // Stores the result in this.embedding ([n][nComponents]).
+  fit(X) {
+    const n = X.length;
+    if (n < 2) throw new Error("TSNE.fit: need at least 2 data points");
+    if (this._perplexity >= n) {
+      throw new Error(
+        `TSNE.fit: perplexity (${this._perplexity}) must be less than n (${n})`
+      );
+    }
+    const rng = this._seed !== void 0 ? _mulberry32(this._seed) : Math.random;
+    const distSq = _pairwiseDistSq(X, n);
+    const Pcond = this._computePcond(distSq, n);
+    const P = _symmetrize(Pcond, n);
+    this._P = P;
+    let Y = Array.from({ length: n }, () => {
+      return Array.from({ length: this._nComponents }, () => {
+        const u1 = Math.max(rng(), 1e-12);
+        const u2 = rng();
+        const z = Math.sqrt(-2 * Math.log(u1)) * Math.cos(2 * Math.PI * u2);
+        return z * 0.01;
+      });
+    });
+    let Yprev = Y.map((row) => [...row]);
+    const EXAGGERATION_ITERS = 50;
+    const EXAGGERATION_FACTOR = 4;
+    const MOMENTUM_SWITCH = 20;
+    for (let iter = 0; iter < this._nIter; iter++) {
+      const momentum = iter < MOMENTUM_SWITCH ? 0.5 : 0.8;
+      const pScale = iter < EXAGGERATION_ITERS ? EXAGGERATION_FACTOR : 1;
+      const { Q, invDist } = _computeQ(Y, n, this._nComponents);
+      const grad = Array.from(
+        { length: n },
+        () => new Array(this._nComponents).fill(0)
+      );
+      for (let i = 0; i < n; i++) {
+        for (let j = 0; j < n; j++) {
+          if (i === j) continue;
+          const pq = pScale * P[i][j] - Q[i][j];
+          const c = 4 * pq * invDist[i][j];
+          for (let d = 0; d < this._nComponents; d++) {
+            grad[i][d] += c * (Y[i][d] - Y[j][d]);
+          }
+        }
+      }
+      const Ynext = Array.from(
+        { length: n },
+        (_, i) => Array.from(
+          { length: this._nComponents },
+          (_2, d) => Y[i][d] - this._lr * grad[i][d] + momentum * (Y[i][d] - Yprev[i][d])
+        )
+      );
+      Yprev = Y;
+      Y = Ynext;
+    }
+    this.embedding = Y;
+    const { Q: Qfinal } = _computeQ(Y, n, this._nComponents);
+    let kl = 0;
+    for (let i = 0; i < n; i++) {
+      for (let j = 0; j < n; j++) {
+        if (i === j) continue;
+        const p = P[i][j];
+        if (p > 1e-12) {
+          kl += p * Math.log(p / (Qfinal[i][j] + 1e-12));
+        }
+      }
+    }
+    this._klDivergence = kl;
+  }
+  // ── fitTransform ───────────────────────────────────────────────────────────
+  // Convenience: fit() then return this.embedding.
+  fitTransform(X) {
+    this.fit(X);
+    return this.embedding;
+  }
+  // ── kl ─────────────────────────────────────────────────────────────────────
+  // Returns the KL divergence KL(P ‖ Q) from the last fit() call.
+  // Lower is better. Useful for comparing perplexity settings or iteration counts.
+  kl() {
+    return this._klDivergence;
+  }
+  // ── Private: binary search for σi ─────────────────────────────────────────
+  // For each point i, find σi such that the Shannon entropy of P(·|i) equals
+  // log₂(perplexity). We use binary search on σ².
+  _computePcond(distSq, n) {
+    const targetEntropy = Math.log2(this._perplexity);
+    const Pcond = Array.from({ length: n }, () => new Array(n).fill(0));
+    for (let i = 0; i < n; i++) {
+      let sigmaLo = 0;
+      let sigmaHi = 1e10;
+      let sigma2 = 1;
+      for (let attempt = 0; attempt < 50; attempt++) {
+        const dists = distSq[i];
+        let sumExp = 0;
+        const exps = new Array(n).fill(0);
+        for (let j = 0; j < n; j++) {
+          if (j === i) continue;
+          const e = Math.exp(-dists[j] / (2 * sigma2));
+          exps[j] = e;
+          sumExp += e;
+        }
+        if (sumExp < 1e-12) break;
+        let H = 0;
+        for (let j = 0; j < n; j++) {
+          if (j === i) continue;
+          const p = exps[j] / sumExp;
+          Pcond[i][j] = p;
+          if (p > 1e-12) H -= p * Math.log2(p);
+        }
+        const delta = H - targetEntropy;
+        if (Math.abs(delta) < 1e-5) break;
+        if (delta > 0) {
+          sigmaHi = sigma2;
+          sigma2 = (sigmaLo + sigma2) / 2;
+        } else {
+          sigmaLo = sigma2;
+          sigma2 = sigmaHi < 1e9 ? (sigma2 + sigmaHi) / 2 : sigma2 * 2;
+        }
+      }
+    }
+    return Pcond;
+  }
+};
+function _pairwiseDistSq(X, n) {
+  const D = Array.from({ length: n }, () => new Array(n).fill(0));
+  for (let i = 0; i < n; i++) {
+    for (let j = i + 1; j < n; j++) {
+      let d = 0;
+      for (let k = 0; k < X[i].length; k++) {
+        const diff = X[i][k] - X[j][k];
+        d += diff * diff;
+      }
+      D[i][j] = d;
+      D[j][i] = d;
+    }
+  }
+  return D;
+}
+function _symmetrize(Pcond, n) {
+  const P = Array.from({ length: n }, () => new Array(n).fill(0));
+  for (let i = 0; i < n; i++) {
+    for (let j = 0; j < n; j++) {
+      P[i][j] = (Pcond[i][j] + Pcond[j][i]) / (2 * n);
+    }
+  }
+  return P;
+}
+function _computeQ(Y, n, nComponents) {
+  const num = Array.from({ length: n }, () => new Array(n).fill(0));
+  let Z = 0;
+  for (let i = 0; i < n; i++) {
+    for (let j = i + 1; j < n; j++) {
+      let d2 = 0;
+      for (let d = 0; d < nComponents; d++) {
+        const diff = Y[i][d] - Y[j][d];
+        d2 += diff * diff;
+      }
+      const inv = 1 / (1 + d2);
+      num[i][j] = inv;
+      num[j][i] = inv;
+      Z += 2 * inv;
+    }
+  }
+  if (Z < 1e-12) Z = 1e-12;
+  const Q = Array.from(
+    { length: n },
+    (_, i) => num[i].map((v) => v / Z)
+  );
+  return { Q, invDist: num };
+}
+function _mulberry32(seed) {
+  let s = seed >>> 0;
+  return function() {
+    s = s + 1831565813 >>> 0;
+    let z = s;
+    z = Math.imul(z ^ z >>> 15, z | 1);
+    z ^= z + Math.imul(z ^ z >>> 7, z | 61);
+    z = (z ^ z >>> 14) >>> 0;
+    return z / 4294967296;
+  };
+}
+// src/PositionalEncoding.ts
+var PositionalEncoding = class _PositionalEncoding {
+  // Compute the full PE vector for one token at position `pos`.
+  // Returns an array of length `dModel`.
+  //
+  // Each pair of dimensions (2i, 2i+1) shares the same frequency 1/10000^(2i/dModel)
+  // but is 90° out of phase (sin vs cos), which ensures no two positions produce
+  // the identical vector.
+  static encode(pos, dModel) {
+    const pe = new Array(dModel);
+    for (let i = 0; i < Math.floor(dModel / 2); i++) {
+      const freq = Math.pow(1e4, 2 * i / dModel);
+      pe[2 * i] = Math.sin(pos / freq);
+      pe[2 * i + 1] = Math.cos(pos / freq);
+    }
+    if (dModel % 2 !== 0) {
+      const i = Math.floor(dModel / 2);
+      const freq = Math.pow(1e4, 2 * i / dModel);
+      pe[dModel - 1] = Math.sin(pos / freq);
+    }
+    return pe;
+  }
+  // Build the full positional encoding matrix for a sequence of `seqLen` tokens.
+  // Returns shape [seqLen][dModel].
+  //
+  // In practice this matrix is computed once and cached — it doesn't change
+  // across examples, batches, or epochs.
+  static encodeSequence(seqLen, dModel) {
+    return Array.from(
+      { length: seqLen },
+      (_, pos) => _PositionalEncoding.encode(pos, dModel)
+    );
+  }
+  // Add positional encoding to an existing embedding matrix (in-place on a copy).
+  //
+  // `embeddings` shape: [seqLen][dModel].
+  // `seqLen` is optional; defaults to embeddings.length.
+  //
+  // The sum e = token_embedding + PE is what actually enters the first
+  // Transformer layer. Summing (rather than concatenating) keeps the model
+  // dimension fixed and lets the network distribute its capacity freely —
+  // it can choose how much of each dimension to allocate to content vs. position.
+  static apply(embeddings, seqLen) {
+    const len = seqLen ?? embeddings.length;
+    const dModel = embeddings[0].length;
+    const pe = _PositionalEncoding.encodeSequence(len, dModel);
+    return embeddings.map(
+      (emb, pos) => emb.map((val, d) => val + pe[pos][d])
+    );
+  }
+};
+var LearnedPositionalEncoding = class {
+  constructor(maxSeqLen, dModel) {
+    this.maxSeqLen = maxSeqLen;
+    this.dModel = dModel;
+    const limit = Math.sqrt(1 / dModel);
+    this.weights = Array.from(
+      { length: maxSeqLen },
+      () => Array.from({ length: dModel }, () => (Math.random() * 2 - 1) * limit)
+    );
+  }
+  // Return the learned encoding for one position.
+  // Returns a copy so callers cannot accidentally mutate the weight table.
+  getEncoding(pos) {
+    if (pos >= this.maxSeqLen) {
+      throw new Error(
+        `Position ${pos} exceeds maxSeqLen=${this.maxSeqLen}. Learned encodings cannot generalize beyond their training length.`
+      );
+    }
+    return [...this.weights[pos]];
+  }
+  // Add learned positional encodings to `embeddings` (returns a new matrix).
+  // Shape: [seqLen][dModel] → [seqLen][dModel].
+  apply(embeddings, seqLen) {
+    const len = seqLen ?? embeddings.length;
+    if (len > this.maxSeqLen) {
+      throw new Error(
+        `Sequence length ${len} exceeds maxSeqLen=${this.maxSeqLen}.`
+      );
+    }
+    return embeddings.map(
+      (emb, pos) => emb.map((val, d) => val + this.weights[pos][d])
+    );
+  }
+  // Apply gradient update to position encoding weights.
+  //
+  // `dWeights` has the same shape as `weights`: [maxSeqLen][dModel].
+  // Each entry is dL/dW_pos[pos][d] — the loss gradient w.r.t. that weight.
+  //
+  // Simple SGD is used here (matching EmbeddingMatrix in MatMul.ts):
+  // position embeddings are updated every step for all positions in the batch,
+  // so the sparse-update problem of token embeddings doesn't apply.
+  update(dWeights, lr) {
+    for (let pos = 0; pos < this.maxSeqLen; pos++) {
+      for (let d = 0; d < this.dModel; d++) {
+        this.weights[pos][d] += lr * dWeights[pos][d];
+      }
+    }
+  }
+};
+// src/ContrastiveLearning.ts
+var Augmenter = class _Augmenter {
+  // Add zero-mean Gaussian noise with standard deviation `sigma`.
+  //
+  // Uses the Box-Muller transform to produce normally distributed noise from
+  // two uniform random variables:
+  //   z = √(-2·ln(u₁)) · cos(2π·u₂)   where u₁, u₂ ~ Uniform(0, 1)
+  //
+  // This keeps us dependency-free while yielding proper Gaussian samples.
+  static addNoise(x, sigma = 0.05) {
+    return x.map((v) => {
+      const u1 = Math.max(1e-10, Math.random());
+      const u2 = Math.random();
+      const z = Math.sqrt(-2 * Math.log(u1)) * Math.cos(2 * Math.PI * u2);
+      return v + sigma * z;
+    });
+  }
+  // Randomly zero out features with probability `rate`.
+  //
+  // Analogous to masking in BERT or random crops in vision contrastive learning.
+  // The encoder must learn representations that are robust to missing features —
+  // it cannot simply memorize individual dimensions.
+  static dropoutFeatures(x, rate = 0.1) {
+    return x.map((v) => Math.random() < rate ? 0 : v);
+  }
+  // Apply both noise and feature dropout in sequence.
+  //
+  // Combining augmentations is standard in SimCLR — stronger augmentations
+  // force the encoder to learn more robust, abstract representations.
+  static augment(x, noiseStd = 0.05, dropRate = 0.1) {
+    return _Augmenter.dropoutFeatures(_Augmenter.addNoise(x, noiseStd), dropRate);
+  }
+  // Generate a positive pair: [original, augmented_copy].
+  //
+  // These two views are used as the (i, j) positive pair in NT-Xent.
+  // Everything else in the batch acts as a negative.
+  static makePair(x) {
+    return [x, _Augmenter.augment(x)];
+  }
+};
+var ContrastiveLearning = class _ContrastiveLearning {
+  // encoderHidden: hidden layer sizes for the encoder (not counting input/output).
+  //   e.g. inputSize=64, encoderHidden=[256, 128] → NetworkN([64, 256, 128])
+  //   The encoder output dimension is encoderHidden[last].
+  //
+  // projectionDim: dimension of the projection head output (the z space).
+  //   e.g. 64. Typically smaller than the encoder's output.
+  //
+  // The encoder uses ReLU activations throughout — empirically stronger than
+  // sigmoid for representation learning because it doesn't saturate.
+  constructor(inputSize, encoderHidden, projectionDim, options = {}) {
+    if (encoderHidden.length === 0) {
+      throw new Error("encoderHidden must have at least one element.");
+    }
+    this.temperature = options.temperature ?? 0.5;
+    const encoderStructure = [inputSize, ...encoderHidden];
+    const encoderActivations = encoderHidden.map(() => relu);
+    this.encoder = new NetworkN(encoderStructure, {
+      activations: encoderActivations,
+      ...options.encoderOptions
+    });
+    const encoderOut = encoderHidden[encoderHidden.length - 1];
+    const projHidden = Math.max(projectionDim, Math.floor(encoderOut / 2));
+    this.projectionHead = new NetworkN(
+      [encoderOut, projHidden, projectionDim],
+      { activations: [relu, relu] }
+    );
+  }
+  // ── Inference (downstream tasks use this, not project()) ─────────────────
+  //
+  // Returns h — the encoder representation before the projection head.
+  // This is the vector to use for classification, clustering, retrieval, etc.
+  //
+  // The projection head is only active during training.
+  encode(x) {
+    return this.encoder.predict(x);
+  }
+  // ── Training path: encode then project ───────────────────────────────────
+  //
+  // Returns z — the projected representation used to compute NT-Xent.
+  // Do NOT use this for downstream tasks (see encode() above).
+  project(x) {
+    const h = this.encoder.predict(x);
+    return this.projectionHead.predict(h);
+  }
+  // ── Cosine similarity ─────────────────────────────────────────────────────
+  //
+  // sim(u, v) = uᵀv / (||u|| · ||v||)
+  //
+  // Range: [-1, 1]. We use cosine rather than Euclidean distance because it is
+  // scale-invariant — only the direction of the projection matters, not its
+  // magnitude. This prevents the trivial solution of making ||z|| → ∞.
+  static cosineSimilarity(a, b) {
+    let dot = 0, normA = 0, normB = 0;
+    for (let d = 0; d < a.length; d++) {
+      dot += a[d] * b[d];
+      normA += a[d] * a[d];
+      normB += b[d] * b[d];
+    }
+    const denom = Math.sqrt(normA) * Math.sqrt(normB);
+    return denom < 1e-10 ? 0 : dot / denom;
+  }
+  // ── NT-Xent loss (no weight update) ──────────────────────────────────────
+  //
+  // Forward-only pass. Used for validation / monitoring during training.
+  computeLoss(pairs) {
+    const { projections, N } = this._forwardProjections(pairs);
+    return this._ntXentLoss(projections, N);
+  }
+  // ── Training step ─────────────────────────────────────────────────────────
+  //
+  // Given a batch of positive pairs, compute NT-Xent loss and update weights
+  // via finite-difference gradient approximation.
+  //
+  // Full analytical backprop through NT-Xent is complex to implement from
+  // scratch without an autograd engine. Finite differences are slower but
+  // correct and keep the implementation readable for educational purposes.
+  // For production use, couple this with the Tape (autograd) module.
+  //
+  // Step-by-step:
+  //   1. Forward all 2N inputs through encoder + projection head → { z_i }.
+  //   2. Build the 2N×2N cosine similarity matrix (scaled by 1/τ).
+  //   3. For each anchor i, identify its positive pair and all 2N-2 negatives.
+  //   4. Apply softmax over the row; loss = -log(softmax at positive index).
+  //   5. Average over all 2N anchors.
+  //   6. Approximate ∂L/∂w per weight with finite differences and apply update.
+  //
+  // Returns: NT-Xent loss before the weight update.
+  trainStep(pairs, lr) {
+    const loss = this.computeLoss(pairs);
+    const eps = 1e-4;
+    for (const layer of this.encoder.layers) {
+      for (const neuron of layer.neurons) {
+        for (let j = 0; j < neuron.weights.length; j++) {
+          neuron.weights[j] += eps;
+          const lossPlus2 = this.computeLoss(pairs);
+          neuron.weights[j] -= 2 * eps;
+          const lossMinus2 = this.computeLoss(pairs);
+          neuron.weights[j] += eps;
+          const grad2 = (lossPlus2 - lossMinus2) / (2 * eps);
+          neuron.weights[j] += lr * -grad2;
+        }
+        neuron.bias += eps;
+        const lossPlus = this.computeLoss(pairs);
+        neuron.bias -= 2 * eps;
+        const lossMinus = this.computeLoss(pairs);
+        neuron.bias += eps;
+        const grad = (lossPlus - lossMinus) / (2 * eps);
+        neuron.bias += lr * -grad;
+      }
+    }
+    for (const layer of this.projectionHead.layers) {
+      for (const neuron of layer.neurons) {
+        for (let j = 0; j < neuron.weights.length; j++) {
+          neuron.weights[j] += eps;
+          const lossPlus2 = this.computeLoss(pairs);
+          neuron.weights[j] -= 2 * eps;
+          const lossMinus2 = this.computeLoss(pairs);
+          neuron.weights[j] += eps;
+          const grad2 = (lossPlus2 - lossMinus2) / (2 * eps);
+          neuron.weights[j] += lr * -grad2;
+        }
+        neuron.bias += eps;
+        const lossPlus = this.computeLoss(pairs);
+        neuron.bias -= 2 * eps;
+        const lossMinus = this.computeLoss(pairs);
+        neuron.bias += eps;
+        const grad = (lossPlus - lossMinus) / (2 * eps);
+        neuron.bias += lr * -grad;
+      }
+    }
+    return loss;
+  }
+  // ── Private: forward all pairs through the projection head ───────────────
+  //
+  // Returns a flat array of 2N projections.
+  // Layout: [ z_0, z_0', z_1, z_1', ..., z_{N-1}, z_{N-1}' ]
+  // Even indices 2i   → original view of pair i
+  // Odd  indices 2i+1 → augmented view of pair i (the positive)
+  _forwardProjections(pairs) {
+    const N = pairs.length;
+    const projections = [];
+    for (const [x, xAug] of pairs) {
+      projections.push(this.project(x));
+      projections.push(this.project(xAug));
+    }
+    return { projections, N };
+  }
+  // ── Private: NT-Xent loss over a set of 2N projections ───────────────────
+  //
+  // pairs[2i]   and pairs[2i+1] are positives.
+  // All other 2N-2 samples are negatives for each anchor.
+  _ntXentLoss(projections, N) {
+    const total = 2 * N;
+    const tau = this.temperature;
+    const sim = Array.from(
+      { length: total },
+      (_, i) => Array.from(
+        { length: total },
+        (_2, j) => _ContrastiveLearning.cosineSimilarity(projections[i], projections[j]) / tau
+      )
+    );
+    let totalLoss = 0;
+    for (let i = 0; i < total; i++) {
+      const posIdx = i % 2 === 0 ? i + 1 : i - 1;
+      const numerator = Math.exp(sim[i][posIdx]);
+      let denominator = 0;
+      for (let k = 0; k < total; k++) {
+        if (k !== i) {
+          denominator += Math.exp(sim[i][k]);
+        }
+      }
+      totalLoss += -Math.log(numerator / (denominator + 1e-10));
+    }
+    return totalLoss / total;
+  }
+};
 // src/GAN.ts
 var GAN = class {
   constructor(latentDim, generatorHidden, outputDim, discriminatorHidden, options) {
@@ -5546,12 +6289,14 @@ function _sampleNormal() {
 export {
   Adam,
   AttentionHead,
+  Augmenter,
   Autoencoder,
   BatchNorm,
   BiasVector,
   CausalConv1D,
   ClipOptimizer,
   ClippedOptimizerFactory,
+  ContrastiveLearning,
   Conv1D,
   Conv2D,
   DataAugmentation,
@@ -5570,6 +6315,7 @@ export {
   LSTMLayer,
   Layer,
   LayerNorm,
+  LearnedPositionalEncoding,
   LinearRegression,
   LogisticRegression,
   LossPlotter,
@@ -5586,18 +6332,21 @@ export {
   NeuronN,
   PCA,
   Perceptron,
+  PositionalEncoding,
   RNN,
   SGD,
   SOM,
   Seq2Seq,
   SoftmaxRegression,
   TCN,
+  TSNE,
   Trainer,
   TransformerBlock,
   VAE,
   Value,
   WeightInspector,
   WeightMatrix,
+  Word2Vec,
   accuracy,
   auc,
   classificationReport,