PyPI - mcp-plesk-dev-docs - Versions diffs - 0.4.2__tar.gz → 0.5.0__tar.gz - Mend

mcp-plesk-dev-docs 0.4.2tar.gz → 0.5.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (56) hide show

{mcp_plesk_dev_docs-0.4.2/mcp_plesk_dev_docs.egg-info → mcp_plesk_dev_docs-0.5.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mcp-plesk-dev-docs
-Version: 0.4.2
+Version: 0.5.0
 Summary: A unified MCP server that indexes and retrieves Plesk documentation using vector embeddings and semantic search with reranking
 Author-email: Gilson Siqueira <gilson@example.com>
 License-Expression: MIT
@@ -35,6 +35,7 @@ Requires-Dist: torch>=2.4.0
 Requires-Dist: markdownify>=0.14.1
 Requires-Dist: tantivy>=0.22.0
 Requires-Dist: lance-namespace==0.6.1
+Requires-Dist: tq-search
 Provides-Extra: dev
 Requires-Dist: pytest>=8.0.0; extra == "dev"
 Requires-Dist: requests>=2.32.0; extra == "dev"
@@ -49,6 +50,8 @@ Dynamic: license-file
 # mcp-plesk-dev-docs
 [![Python 3.12+](https://img.shields.io/badge/python-3.12%2B-blue?style=flat-square)](https://www.python.org/downloads/)
+[![PyPI](https://img.shields.io/pypi/v/mcp-plesk-dev-docs?style=flat-square)](https://pypi.org/project/mcp-plesk-dev-docs/)
+[![MCP Registry](https://img.shields.io/badge/MCP%20Registry-listed-green?style=flat-square)](https://registry.modelcontextprotocol.io/v0.1/servers/io.github.barateza%2Fmcp-plesk-dev-docs/versions/0.4.3)
 [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg?style=flat-square)](LICENSE)
 [![MCP Compatible](https://img.shields.io/badge/MCP-Compatible-green?style=flat-square)](https://modelcontextprotocol.io/)
 [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg?style=flat-square)](https://github.com/psf/black)

{mcp_plesk_dev_docs-0.4.2 → mcp_plesk_dev_docs-0.5.0}/README.md RENAMED Viewed

@@ -1,6 +1,8 @@
 # mcp-plesk-dev-docs
 [![Python 3.12+](https://img.shields.io/badge/python-3.12%2B-blue?style=flat-square)](https://www.python.org/downloads/)
+[![PyPI](https://img.shields.io/pypi/v/mcp-plesk-dev-docs?style=flat-square)](https://pypi.org/project/mcp-plesk-dev-docs/)
+[![MCP Registry](https://img.shields.io/badge/MCP%20Registry-listed-green?style=flat-square)](https://registry.modelcontextprotocol.io/v0.1/servers/io.github.barateza%2Fmcp-plesk-dev-docs/versions/0.4.3)
 [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg?style=flat-square)](LICENSE)
 [![MCP Compatible](https://img.shields.io/badge/MCP-Compatible-green?style=flat-square)](https://modelcontextprotocol.io/)
 [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg?style=flat-square)](https://github.com/psf/black)
@@ -170,4 +172,4 @@ Portions of this repository were developed under contract for Plesk Internationa
 *Built to make Plesk extension development faster.*
-<!-- mcp-name: io.github.barateza/mcp-plesk-dev-docs -->
+<!-- mcp-name: io.github.barateza/mcp-plesk-dev-docs -->

{mcp_plesk_dev_docs-0.4.2 → mcp_plesk_dev_docs-0.5.0/mcp_plesk_dev_docs.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mcp-plesk-dev-docs
-Version: 0.4.2
+Version: 0.5.0
 Summary: A unified MCP server that indexes and retrieves Plesk documentation using vector embeddings and semantic search with reranking
 Author-email: Gilson Siqueira <gilson@example.com>
 License-Expression: MIT
@@ -35,6 +35,7 @@ Requires-Dist: torch>=2.4.0
 Requires-Dist: markdownify>=0.14.1
 Requires-Dist: tantivy>=0.22.0
 Requires-Dist: lance-namespace==0.6.1
+Requires-Dist: tq-search
 Provides-Extra: dev
 Requires-Dist: pytest>=8.0.0; extra == "dev"
 Requires-Dist: requests>=2.32.0; extra == "dev"
@@ -49,6 +50,8 @@ Dynamic: license-file
 # mcp-plesk-dev-docs
 [![Python 3.12+](https://img.shields.io/badge/python-3.12%2B-blue?style=flat-square)](https://www.python.org/downloads/)
+[![PyPI](https://img.shields.io/pypi/v/mcp-plesk-dev-docs?style=flat-square)](https://pypi.org/project/mcp-plesk-dev-docs/)
+[![MCP Registry](https://img.shields.io/badge/MCP%20Registry-listed-green?style=flat-square)](https://registry.modelcontextprotocol.io/v0.1/servers/io.github.barateza%2Fmcp-plesk-dev-docs/versions/0.4.3)
 [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg?style=flat-square)](LICENSE)
 [![MCP Compatible](https://img.shields.io/badge/MCP-Compatible-green?style=flat-square)](https://modelcontextprotocol.io/)
 [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg?style=flat-square)](https://github.com/psf/black)

{mcp_plesk_dev_docs-0.4.2 → mcp_plesk_dev_docs-0.5.0}/mcp_plesk_dev_docs.egg-info/SOURCES.txt RENAMED Viewed

@@ -27,10 +27,6 @@ plesk_unified/settings.py
 plesk_unified/summary_cache.py
 plesk_unified/tq_index.py
 plesk_unified/types.py
-plesk_unified/turboquant/__init__.py
-plesk_unified/turboquant/compressors.py
-plesk_unified/turboquant/lloyd_max.py
-plesk_unified/turboquant/turboquant.py
 tests/test_ai_client.py
 tests/test_async_tools.py
 tests/test_benchmark_engines.py

{mcp_plesk_dev_docs-0.4.2 → mcp_plesk_dev_docs-0.5.0}/mcp_plesk_dev_docs.egg-info/requires.txt RENAMED Viewed

@@ -11,6 +11,7 @@ torch>=2.4.0
 markdownify>=0.14.1
 tantivy>=0.22.0
 lance-namespace==0.6.1
+tq-search
 [dev]
 pytest>=8.0.0

{mcp_plesk_dev_docs-0.4.2 → mcp_plesk_dev_docs-0.5.0}/plesk_unified/tq_index.py RENAMED Viewed

@@ -5,7 +5,7 @@ from __future__ import annotations
 import numpy as np
 import torch
-from plesk_unified.turboquant import TurboQuantProd
+from tq_search import TurboQuantProd
 class TurboQuantIndex:
@@ -52,6 +52,13 @@ class TurboQuantIndex:
         if self.compressed_db is None:
             return []
+        # 1. Lazily move the compressed database to the target device once
+        first_val = next(iter(self.compressed_db.values()))
+        if str(first_val.device) != self.device:
+            self.compressed_db = {
+                k: v.to(self.device) for k, v in self.compressed_db.items()
+            }
         selected_indices: list[int]
         if category:
             selected_indices = self._category_to_indices.get(category, [])
@@ -60,25 +67,30 @@ class TurboQuantIndex:
         else:
             selected_indices = list(range(len(self._meta)))
-        # 1. L2-Normalize the query
+        # 2. L2-Normalize the query
         norm = np.linalg.norm(query_vec)
         query_normalized = query_vec / max(norm, 1e-12)
-        # 2. Prepare query as a batched tensor (1, dim)
+        # 3. Prepare query as a batched tensor (1, dim) directly on target device
         q = torch.from_numpy(query_normalized).to(self.device).unsqueeze(0)
-        # 3. Slice candidates and move them to the target device.
-        selected_tensor = torch.as_tensor(selected_indices, dtype=torch.long)
-        db_on_device = {
-            k: v.index_select(0, selected_tensor).to(self.device)
-            for k, v in self.compressed_db.items()
-        }
+        # 4. Slice candidates only if we are actually filtering a subset
+        if category and len(selected_indices) < len(self._meta):
+            selected_tensor = torch.as_tensor(
+                selected_indices, dtype=torch.long, device=self.device
+            )
+            db_on_device = {
+                k: v.index_select(0, selected_tensor)
+                for k, v in self.compressed_db.items()
+            }
+        else:
+            db_on_device = self.compressed_db
-        # 4. Perform a SINGLE batched inner product calculation
+        # 5. Perform a SINGLE batched inner product calculation
         with torch.no_grad():
             scores = self.quantizer.inner_product(q, db_on_device).squeeze(0)
-        # 5. Sort and return
+        # 6. Sort and return
         scores_np = scores.cpu().numpy()
         idx = np.argsort(-scores_np)[:top_k]

{mcp_plesk_dev_docs-0.4.2 → mcp_plesk_dev_docs-0.5.0}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "mcp-plesk-dev-docs"
-version = "0.4.2"
+version = "0.5.0"
 description = "A unified MCP server that indexes and retrieves Plesk documentation using vector embeddings and semantic search with reranking"
 readme = "README.md"
 license = "MIT"
@@ -38,8 +38,11 @@ dependencies = [
     "markdownify>=0.14.1",
     "tantivy>=0.22.0",
     "lance-namespace==0.6.1",
+    "tq-search",
 ]
 [project.urls]
 Homepage = "https://github.com/barateza/mcp-plesk-dev-docs"
 Documentation = "https://github.com/barateza/mcp-plesk-dev-docs#readme"
@@ -47,7 +50,8 @@ Repository = "https://github.com/barateza/mcp-plesk-dev-docs.git"
 "Bug Tracker" = "https://github.com/barateza/mcp-plesk-dev-docs/issues"
 [tool.setuptools]
-packages = ["plesk_unified", "plesk_unified.turboquant"]
+packages = ["plesk_unified"]
 [project.scripts]
 # Console script to run the MCP server
@@ -78,7 +82,9 @@ url = "https://download.pytorch.org/whl/cu124"
 explicit = true
 [tool.uv.sources]
+tq-search = { path = "/Users/gilsonsiqueira/tq-search", editable = true }
 torch = [
   { index = "pytorch-cu124", marker = "sys_platform == 'win32'" },
 ]
 torchvision = [

{mcp_plesk_dev_docs-0.4.2 → mcp_plesk_dev_docs-0.5.0}/tests/test_turboquant_regression.py RENAMED Viewed

@@ -4,7 +4,7 @@ import numpy as np
 import torch
 from plesk_unified import tq_index
-from plesk_unified.turboquant import LloydMaxCodebook, TurboQuantMSE, TurboQuantProd
+from tq_search import LloydMaxCodebook, TurboQuantMSE, TurboQuantProd
 def test_turboquant_package_exports():

mcp_plesk_dev_docs-0.4.2/plesk_unified/turboquant/__init__.py DELETED Viewed

@@ -1,21 +0,0 @@
-"""TurboQuant helpers used by the unified retrieval path.
-Base implementation: https://github.com/tonbistudio/turboquant-pytorch
-"""
-from __future__ import annotations
-from .compressors import TurboQuantCompressorMSE, TurboQuantCompressorV2
-from .lloyd_max import LloydMaxCodebook, compute_expected_distortion, solve_lloyd_max
-from .turboquant import TurboQuantKVCache, TurboQuantMSE, TurboQuantProd
-__all__ = [
-    "TurboQuantCompressorMSE",
-    "TurboQuantCompressorV2",
-    "TurboQuantKVCache",
-    "TurboQuantMSE",
-    "TurboQuantProd",
-    "LloydMaxCodebook",
-    "compute_expected_distortion",
-    "solve_lloyd_max",
-]

mcp_plesk_dev_docs-0.4.2/plesk_unified/turboquant/compressors.py DELETED Viewed

@@ -1,190 +0,0 @@
-"""TurboQuant KV cache helpers."""
-from __future__ import annotations
-import math
-import torch
-# ---------------------------------------------------------------------------
-# Closed-form Gaussian integration helpers (replaces scipy.integrate.quad)
-# ---------------------------------------------------------------------------
-def _gauss_pdf(x: float, sigma: float) -> float:
-    return math.exp(-0.5 * (x / sigma) ** 2) / (sigma * math.sqrt(2.0 * math.pi))
-def _gauss_cdf(x: float, sigma: float) -> float:
-    return 0.5 * (1.0 + math.erf(x / (sigma * math.sqrt(2.0))))
-def _int_pdf(a: float, b: float, sigma: float) -> float:
-    return _gauss_cdf(b, sigma) - _gauss_cdf(a, sigma)
-def _int_x_pdf(a: float, b: float, sigma: float) -> float:
-    return sigma * sigma * (_gauss_pdf(a, sigma) - _gauss_pdf(b, sigma))
-class TurboQuantCompressorV2:
-    """Compressed key store with direct inner-product scoring."""
-    def __init__(self, head_dim: int, bits: int, seed: int, device: str = "cpu"):
-        self.head_dim = head_dim
-        self.bits = bits
-        self.mse_bits = max(bits - 1, 1)
-        self.device = device
-        gen = torch.Generator(device="cpu")
-        gen.manual_seed(seed)
-        G = torch.randn(head_dim, head_dim, generator=gen)
-        Q, R = torch.linalg.qr(G)
-        diag_sign = torch.sign(torch.diag(R))
-        diag_sign[diag_sign == 0] = 1.0
-        self.Pi = (Q * diag_sign.unsqueeze(0)).to(device)
-        self.centroids = self._solve_codebook(head_dim, self.mse_bits).to(device)
-        gen2 = torch.Generator(device="cpu")
-        gen2.manual_seed(seed + 10000)
-        self.S = torch.randn(head_dim, head_dim, generator=gen2).to(device)
-        self.PiT = self.Pi.T.contiguous()
-    def _solve_codebook(self, d: int, bits: int) -> torch.Tensor:
-        n_levels = 2**bits
-        sigma = 1.0 / math.sqrt(d)
-        lo, hi = -3.5 * sigma, 3.5 * sigma
-        centroids = [lo + (hi - lo) * (i + 0.5) / n_levels for i in range(n_levels)]
-        for _ in range(200):
-            boundaries = [
-                (centroids[i] + centroids[i + 1]) / 2.0 for i in range(n_levels - 1)
-            ]
-            edges = [lo * 3] + boundaries + [hi * 3]
-            new_centroids = []
-            for i in range(n_levels):
-                a, b = edges[i], edges[i + 1]
-                num = _int_x_pdf(a, b, sigma)
-                den = _int_pdf(a, b, sigma)
-                new_centroids.append(num / den if den > 1e-15 else centroids[i])
-            if (
-                max(abs(new_centroids[i] - centroids[i]) for i in range(n_levels))
-                < 1e-10
-            ):
-                break
-            centroids = new_centroids
-        return torch.tensor(centroids, dtype=torch.float32)
-    @torch.no_grad()
-    def compress(self, states: torch.Tensor) -> dict:
-        B, H, S, D = states.shape
-        flat = states.reshape(-1, D).float()
-        vec_norms = torch.norm(flat, dim=-1, keepdim=True)
-        flat_norm = flat / (vec_norms + 1e-8)
-        rotated = flat_norm @ self.Pi.T
-        diffs = rotated.unsqueeze(-1) - self.centroids
-        indices = diffs.abs().argmin(dim=-1).to(torch.uint8)
-        reconstructed_rotated = self.centroids[indices.long()]
-        k_mse = (reconstructed_rotated @ self.Pi) * vec_norms
-        residual = flat - k_mse
-        residual_norm = torch.norm(residual, dim=-1)
-        projected = residual @ self.S.T
-        signs = (projected >= 0).to(torch.int8) * 2 - 1
-        return {
-            "k_mse": k_mse.to(torch.float16).reshape(B, H, S, D),
-            "qjl_signs": signs.reshape(B, H, S, D),
-            "residual_norm": residual_norm.to(torch.float16).reshape(B, H, S),
-            "shape": (B, H, S, D),
-        }
-    @torch.no_grad()
-    def asymmetric_attention_scores(
-        self, queries: torch.Tensor, compressed: dict
-    ) -> torch.Tensor:
-        k_mse = compressed["k_mse"].float()
-        signs = compressed["qjl_signs"].float()
-        r_norm = compressed["residual_norm"].float()
-        term1 = torch.matmul(queries.float(), k_mse.transpose(-2, -1))
-        q_projected = torch.matmul(queries.float(), self.S.T)
-        qjl_ip = torch.matmul(q_projected, signs.transpose(-2, -1))
-        m = self.S.shape[0]
-        correction_scale = math.sqrt(math.pi / 2) / m
-        term2 = correction_scale * qjl_ip * r_norm.unsqueeze(-2)
-        return term1 + term2
-class TurboQuantCompressorMSE:
-    """MSE-only compressor for values."""
-    def __init__(self, head_dim: int, bits: int, seed: int, device: str = "cpu"):
-        self.head_dim = head_dim
-        self.bits = bits
-        self.device = device
-        gen = torch.Generator(device="cpu")
-        gen.manual_seed(seed)
-        G = torch.randn(head_dim, head_dim, generator=gen)
-        Q, R = torch.linalg.qr(G)
-        diag_sign = torch.sign(torch.diag(R))
-        diag_sign[diag_sign == 0] = 1.0
-        self.Pi = (Q * diag_sign.unsqueeze(0)).to(device)
-        self.centroids = self._solve_codebook(head_dim, bits).to(device)
-    def _solve_codebook(self, d, bits):
-        n_levels = 2**bits
-        sigma = 1.0 / math.sqrt(d)
-        lo, hi = -3.5 * sigma, 3.5 * sigma
-        centroids = [lo + (hi - lo) * (i + 0.5) / n_levels for i in range(n_levels)]
-        for _ in range(200):
-            boundaries = [
-                (centroids[i] + centroids[i + 1]) / 2.0 for i in range(n_levels - 1)
-            ]
-            edges = [lo * 3] + boundaries + [hi * 3]
-            new_c = []
-            for i in range(n_levels):
-                a, b = edges[i], edges[i + 1]
-                num = _int_x_pdf(a, b, sigma)
-                den = _int_pdf(a, b, sigma)
-                new_c.append(num / den if den > 1e-15 else centroids[i])
-            if max(abs(new_c[i] - centroids[i]) for i in range(n_levels)) < 1e-10:
-                break
-            centroids = new_c
-        return torch.tensor(centroids, dtype=torch.float32)
-    @torch.no_grad()
-    def compress(self, states: torch.Tensor) -> dict:
-        B, H, S, D = states.shape
-        flat = states.reshape(-1, D).float()
-        vec_norms = torch.norm(flat, dim=-1, keepdim=True)
-        flat_norm = flat / (vec_norms + 1e-8)
-        rotated = flat_norm @ self.Pi.T
-        diffs = rotated.unsqueeze(-1) - self.centroids
-        indices = diffs.abs().argmin(dim=-1).to(torch.uint8)
-        return {
-            "indices": indices,
-            "vec_norms": vec_norms.squeeze(-1).to(torch.float16),
-            "shape": (B, H, S, D),
-        }
-    @torch.no_grad()
-    def decompress(self, compressed: dict) -> torch.Tensor:
-        B, H, S, D = compressed["shape"]
-        indices = compressed["indices"].long()
-        reconstructed = self.centroids[indices] @ self.Pi
-        vec_norms = compressed["vec_norms"].float().unsqueeze(-1)
-        return (reconstructed * vec_norms).reshape(B, H, S, D)

mcp_plesk_dev_docs-0.4.2/plesk_unified/turboquant/lloyd_max.py DELETED Viewed

@@ -1,190 +0,0 @@
-# ruff: noqa
-"""Lloyd-Max scalar quantizer for rotated unit vectors.
-The coordinate distribution is approximately Beta-shaped on [-1, 1] after
-random rotation. For d >= 64, a Gaussian N(0, 1/d) is a good approximation.
-"""
-from __future__ import annotations
-import math
-import torch
-# ---------------------------------------------------------------------------
-# Pure-Python Gaussian integration helpers (replaces scipy.integrate.quad)
-# ---------------------------------------------------------------------------
-def _gauss_pdf(x: float, sigma: float) -> float:
-    """N(0, σ²) probability density at x."""
-    return math.exp(-0.5 * (x / sigma) ** 2) / (sigma * math.sqrt(2.0 * math.pi))
-def _gauss_cdf(x: float, sigma: float) -> float:
-    """N(0, σ²) cumulative distribution at x."""
-    return 0.5 * (1.0 + math.erf(x / (sigma * math.sqrt(2.0))))
-def _int_pdf(a: float, b: float, sigma: float) -> float:
-    """∫[a,b] N(0,σ²)(x) dx — closed form via erf."""
-    return _gauss_cdf(b, sigma) - _gauss_cdf(a, sigma)
-def _int_x_pdf(a: float, b: float, sigma: float) -> float:
-    """∫[a,b] x·N(0,σ²)(x) dx = σ²·[f(a) − f(b)]."""
-    return sigma * sigma * (_gauss_pdf(a, sigma) - _gauss_pdf(b, sigma))
-def _int_sq_pdf(a: float, b: float, sigma: float, c: float) -> float:
-    """∫[a,b] (x−c)²·N(0,σ²)(x) dx — closed form."""
-    fa, fb = _gauss_pdf(a, sigma), _gauss_pdf(b, sigma)
-    cdf_diff = _gauss_cdf(b, sigma) - _gauss_cdf(a, sigma)
-    sig2 = sigma * sigma
-    return (
-        sig2 * (a * fa - b * fb)
-        - 2.0 * c * sig2 * (fa - fb)
-        + (sig2 + c * c) * cdf_diff
-    )
-def _quad(f, a: float, b: float, n: int = 200) -> float:
-    """Composite Simpson's rule numerical integration over [a, b].
-    Used only for the ``use_exact=True`` (Beta-PDF) path; the Gaussian path
-    uses closed-form helpers above.
-    """
-    if n % 2 != 0:
-        n += 1
-    h = (b - a) / n
-    s = f(a) + f(b)
-    for i in range(1, n):
-        s += (4 if i % 2 else 2) * f(a + i * h)
-    return h / 3.0 * s
-def beta_pdf(x: float, d: int) -> float:
-    """PDF of a single coordinate after random rotation of a d-dim unit vector."""
-    if abs(x) >= 1.0:
-        return 0.0
-    coeff = math.gamma(d / 2) / (math.sqrt(math.pi) * math.gamma((d - 1) / 2))
-    return coeff * (1 - x * x) ** ((d - 3) / 2)
-def gaussian_approx_pdf(x: float, d: int) -> float:
-    """Gaussian approximation N(0, 1/d) -- accurate for d >= 64."""
-    sigma2 = 1.0 / d
-    return (1.0 / math.sqrt(2 * math.pi * sigma2)) * math.exp(-x * x / (2 * sigma2))
-def solve_lloyd_max(
-    d: int, bits: int, use_exact: bool = False, max_iter: int = 200, tol: float = 1e-10
-):
-    """
-    Solve Lloyd-Max optimal quantizer for the coordinate distribution.
-    Args:
-        d: vector dimension
-        bits: number of quantization bits
-        use_exact: if True, use exact Beta PDF; if False, use Gaussian approx
-        max_iter: maximum Lloyd-Max iterations
-        tol: convergence tolerance
-    Returns:
-        centroids: sorted tensor of 2^bits optimal centroids
-        boundaries: sorted tensor of 2^bits - 1 boundaries between centroids
-    """
-    n_levels = 2**bits
-    sigma = 1.0 / math.sqrt(d)
-    lo, hi = -3.5 * sigma, 3.5 * sigma
-    centroids = [lo + (hi - lo) * (i + 0.5) / n_levels for i in range(n_levels)]
-    for _ in range(max_iter):
-        boundaries = [
-            (centroids[i] + centroids[i + 1]) / 2.0 for i in range(n_levels - 1)
-        ]
-        edges = [lo * 3] + boundaries + [hi * 3]
-        new_centroids = []
-        for i in range(n_levels):
-            a, b = edges[i], edges[i + 1]
-            if use_exact:
-                numerator = _quad(lambda x: x * beta_pdf(x, d), a, b)
-                denominator = _quad(lambda x: beta_pdf(x, d), a, b)
-            else:
-                numerator = _int_x_pdf(a, b, sigma)
-                denominator = _int_pdf(a, b, sigma)
-            if denominator > 1e-15:
-                new_centroids.append(numerator / denominator)
-            else:
-                new_centroids.append(centroids[i])
-        max_shift = max(abs(new_centroids[i] - centroids[i]) for i in range(n_levels))
-        centroids = new_centroids
-        if max_shift < tol:
-            break
-    boundaries = [(centroids[i] + centroids[i + 1]) / 2.0 for i in range(n_levels - 1)]
-    return (
-        torch.tensor(centroids, dtype=torch.float32),
-        torch.tensor(boundaries, dtype=torch.float32),
-    )
-def compute_expected_distortion(
-    d: int,
-    bits: int,
-    centroids: torch.Tensor,
-    boundaries: torch.Tensor,
-    use_exact: bool = False,
-) -> float:
-    """Compute the expected MSE distortion per coordinate for the given quantizer."""
-    sigma = 1.0 / math.sqrt(d)
-    n_levels = len(centroids)
-    edges = [-3.5 * sigma * 3] + boundaries.tolist() + [3.5 * sigma * 3]
-    total_distortion = 0.0
-    for i in range(n_levels):
-        a, b = edges[i], edges[i + 1]
-        c = centroids[i].item()
-        if use_exact:
-            dist = _quad(lambda x, _c=c: (x - _c) ** 2 * beta_pdf(x, d), a, b)
-        else:
-            dist = _int_sq_pdf(a, b, sigma, c)
-        total_distortion += dist
-    return total_distortion
-class LloydMaxCodebook:
-    """Precomputed Lloyd-Max codebook for a given dimension and bit-width."""
-    def __init__(self, d: int, bits: int, use_exact: bool = False):
-        self.d = d
-        self.bits = bits
-        self.n_levels = 2**bits
-        self.centroids, self.boundaries = solve_lloyd_max(d, bits, use_exact)
-        self.distortion = compute_expected_distortion(
-            d, bits, self.centroids, self.boundaries, use_exact
-        )
-    def quantize(self, x: torch.Tensor) -> torch.Tensor:
-        """Quantize values to nearest centroid indices."""
-        diffs = x.unsqueeze(-1) - self.centroids.to(x.device)
-        return diffs.abs().argmin(dim=-1)
-    def dequantize(self, indices: torch.Tensor) -> torch.Tensor:
-        """Map indices back to centroid values."""
-        return self.centroids.to(indices.device)[indices]
-    def __repr__(self):
-        return (
-            f"LloydMaxCodebook(d={self.d}, bits={self.bits}, "
-            f"levels={self.n_levels}, distortion_per_coord={self.distortion:.6f})"
-        )

mcp_plesk_dev_docs-0.4.2/plesk_unified/turboquant/turboquant.py DELETED Viewed

@@ -1,249 +0,0 @@
-"""TurboQuant: two-stage vector quantization."""
-from __future__ import annotations
-import math
-from typing import Optional, Tuple, cast
-import torch
-from torch import nn
-from .lloyd_max import LloydMaxCodebook
-def generate_rotation_matrix(
-    d: int, seed: Optional[int] = None, device: str = "cpu"
-) -> torch.Tensor:
-    """Generate a random orthogonal rotation matrix via QR decomposition."""
-    gen = torch.Generator(device="cpu")
-    if seed is not None:
-        gen.manual_seed(seed)
-    G = torch.randn(d, d, generator=gen)
-    Q, R = torch.linalg.qr(G)
-    diag_sign = torch.sign(torch.diag(R))
-    diag_sign[diag_sign == 0] = 1.0
-    Q = Q * diag_sign.unsqueeze(0)
-    return Q.to(device)
-def generate_qjl_matrix(
-    d: int, m: Optional[int] = None, seed: Optional[int] = None, device: str = "cpu"
-) -> torch.Tensor:
-    """
-    Generate the random projection matrix S for QJL.
-    S has i.i.d. N(0,1) entries, shape (m, d).
-    Default m = d (same dimensionality).
-    """
-    if m is None:
-        m = d
-    gen = torch.Generator(device="cpu")
-    if seed is not None:
-        gen.manual_seed(seed)
-    S = torch.randn(m, d, generator=gen)
-    return S.to(device)
-class TurboQuantMSE(nn.Module):
-    """Stage 1: MSE-optimal quantizer."""
-    def __init__(self, d: int, bits: int, seed: int = 42, device: str = "cpu"):
-        super().__init__()
-        self.d = d
-        self.bits = bits
-        self.device = device
-        self.register_buffer(
-            "Pi", generate_rotation_matrix(d, seed=seed, device=device)
-        )
-        self.codebook = LloydMaxCodebook(d, bits)
-        self.register_buffer("centroids", self.codebook.centroids.to(device))
-        self.register_buffer("boundaries", self.codebook.boundaries.to(device))
-    def rotate(self, x: torch.Tensor) -> torch.Tensor:
-        Pi = cast("torch.Tensor", self.Pi)
-        return x @ Pi.T
-    def unrotate(self, y: torch.Tensor) -> torch.Tensor:
-        Pi = cast("torch.Tensor", self.Pi)
-        return y @ Pi
-    def quantize(self, x: torch.Tensor) -> torch.Tensor:
-        centroids = cast("torch.Tensor", self.centroids)
-        y = self.rotate(x)
-        diffs = y.unsqueeze(-1) - centroids
-        indices = diffs.abs().argmin(dim=-1)
-        return indices
-    def dequantize(self, indices: torch.Tensor) -> torch.Tensor:
-        centroids = cast("torch.Tensor", self.centroids)
-        y_hat = centroids[indices]
-        return self.unrotate(y_hat)
-    def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
-        indices = self.quantize(x)
-        x_hat = self.dequantize(indices)
-        return x_hat, indices
-class TurboQuantProd(nn.Module):
-    """Stage 1 + Stage 2: Unbiased inner product quantizer."""
-    def __init__(
-        self,
-        d: int,
-        bits: int,
-        qjl_dim: Optional[int] = None,
-        seed: int = 42,
-        device: str = "cpu",
-    ):
-        super().__init__()
-        self.d = d
-        self.bits = bits
-        self.mse_bits = max(bits - 1, 1)
-        self.qjl_dim = qjl_dim or d
-        self.device = device
-        self.mse = TurboQuantMSE(d, self.mse_bits, seed=seed, device=device)
-        self.register_buffer(
-            "S", generate_qjl_matrix(d, m=self.qjl_dim, seed=seed + 1, device=device)
-        )
-    def quantize(self, x: torch.Tensor) -> dict:
-        x_hat, mse_indices = self.mse(x)
-        residual = x - x_hat
-        residual_norm = torch.norm(residual, dim=-1, keepdim=True)
-        S = cast("torch.Tensor", self.S)
-        projected = residual @ S.T
-        qjl_signs = torch.sign(projected)
-        qjl_signs[qjl_signs == 0] = 1.0
-        return {
-            "mse_indices": mse_indices,
-            "qjl_signs": qjl_signs,
-            "residual_norm": residual_norm.squeeze(-1),
-        }
-    def dequantize(self, compressed: dict) -> torch.Tensor:
-        return self.mse.dequantize(compressed["mse_indices"])
-    def inner_product(self, y: torch.Tensor, compressed: dict) -> torch.Tensor:
-        x_mse = self.mse.dequantize(compressed["mse_indices"])
-        term1 = (y * x_mse).sum(dim=-1)
-        S = cast("torch.Tensor", self.S)
-        y_projected = y @ S.T
-        qjl_ip = (y_projected * compressed["qjl_signs"]).sum(dim=-1)
-        m = self.qjl_dim
-        correction_scale = math.sqrt(math.pi / 2) / m
-        term2 = compressed["residual_norm"] * correction_scale * qjl_ip
-        return term1 + term2
-    def forward(self, x: torch.Tensor) -> dict:
-        return self.quantize(x)
-class TurboQuantKVCache:
-    """KV cache wrapper that uses TurboQuant to compress keys and values."""
-    def __init__(
-        self,
-        d_key: int,
-        d_value: int,
-        bits: int = 3,
-        seed: int = 42,
-        device: str = "cpu",
-    ):
-        self.d_key = d_key
-        self.d_value = d_value
-        self.bits = bits
-        self.device = device
-        self.key_quantizer = TurboQuantProd(d_key, bits, seed=seed, device=device)
-        self.value_quantizer = TurboQuantMSE(
-            d_value, bits, seed=seed + 100, device=device
-        )
-        self.key_cache = []
-        self.value_cache = []
-    def append(self, keys: torch.Tensor, values: torch.Tensor):
-        orig_shape = keys.shape
-        flat_keys = keys.reshape(-1, self.d_key)
-        flat_values = values.reshape(-1, self.d_value)
-        compressed_keys = self.key_quantizer.quantize(flat_keys)
-        value_indices = self.value_quantizer.quantize(flat_values)
-        self.key_cache.append(
-            {
-                "mse_indices": compressed_keys["mse_indices"],
-                "qjl_signs": compressed_keys["qjl_signs"],
-                "residual_norm": compressed_keys["residual_norm"],
-                "shape": orig_shape,
-            }
-        )
-        self.value_cache.append(
-            {
-                "indices": value_indices,
-                "shape": values.shape,
-            }
-        )
-    def attention_scores(self, queries: torch.Tensor) -> torch.Tensor:
-        scores = []
-        for cached in self.key_cache:
-            s = self.key_quantizer.inner_product(queries, cached)
-            scores.append(s)
-        return torch.cat(scores, dim=-1) if scores else torch.tensor([])
-    def get_values(self) -> torch.Tensor:
-        values = []
-        for cached in self.value_cache:
-            v = self.value_quantizer.dequantize(cached["indices"])
-            values.append(v)
-        return torch.cat(values, dim=0) if values else torch.tensor([])
-    def memory_usage_bits(self) -> dict:
-        n_keys = (
-            sum(c["mse_indices"].numel() for c in self.key_cache)
-            if self.key_cache
-            else 0
-        )
-        n_qjl = (
-            sum(c["qjl_signs"].numel() for c in self.key_cache) if self.key_cache else 0
-        )
-        n_norms = (
-            sum(c["residual_norm"].numel() for c in self.key_cache)
-            if self.key_cache
-            else 0
-        )
-        n_values = (
-            sum(c["indices"].numel() for c in self.value_cache)
-            if self.value_cache
-            else 0
-        )
-        key_bits = n_keys * self.key_quantizer.mse_bits + n_qjl * 1 + n_norms * 16
-        value_bits = n_values * self.bits
-        fp16_equivalent = (n_keys + n_values) * 16
-        return {
-            "key_bits": key_bits,
-            "value_bits": value_bits,
-            "total_bits": key_bits + value_bits,
-            "fp16_bits": fp16_equivalent,
-            "compression_ratio": (
-                fp16_equivalent / (key_bits + value_bits)
-                if (key_bits + value_bits) > 0
-                else 0
-            ),
-        }
-    def __len__(self):
-        return (
-            sum(c["mse_indices"].shape[0] for c in self.key_cache)
-            if self.key_cache
-            else 0
-        )