PyPI - memstack-skill-loader - Versions diffs - 3.5.0__py3-none-any.whl - Mend

memstack-skill-loader 3.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

memstack_skill_loader/__init__.py +1 -0
memstack_skill_loader/__main__.py +18 -0
memstack_skill_loader/compression.py +345 -0
memstack_skill_loader/config.py +114 -0
memstack_skill_loader/dashboard.html +829 -0
memstack_skill_loader/dashboard.py +360 -0
memstack_skill_loader/indexer.py +240 -0
memstack_skill_loader/license.py +409 -0
memstack_skill_loader/search.py +164 -0
memstack_skill_loader/server.py +883 -0
memstack_skill_loader/stats.py +428 -0
memstack_skill_loader/tfidf_search.py +142 -0
memstack_skill_loader/version_check.py +93 -0
memstack_skill_loader-3.5.0.dist-info/METADATA +10 -0
memstack_skill_loader-3.5.0.dist-info/RECORD +18 -0
memstack_skill_loader-3.5.0.dist-info/WHEEL +5 -0
memstack_skill_loader-3.5.0.dist-info/entry_points.txt +2 -0
memstack_skill_loader-3.5.0.dist-info/top_level.txt +1 -0

memstack_skill_loader/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """MemStack Skill Loader — MCP server for semantic skill search."""

memstack_skill_loader/__main__.py ADDED Viewed

@@ -0,0 +1,18 @@
+"""Entry point for python -m memstack_skill_loader."""
+import asyncio
+import sys
+from .server import run
+def main():
+    if len(sys.argv) > 1 and sys.argv[1] == "dashboard":
+        from .dashboard import start_dashboard
+        start_dashboard()
+    else:
+        asyncio.run(run())
+if __name__ == "__main__":
+    main()

memstack_skill_loader/compression.py ADDED Viewed

@@ -0,0 +1,345 @@
+"""Skill-aware compression for MemStack MCP Skill Loader.
+Compresses skill content before serving to reduce token consumption.
+Compression is tiered: free tier gets basic stripping, Pro tier gets
+advanced section-aware compression.
+All compression preserves:
+- Code blocks (fenced with triple backticks)
+- Checklists and action items
+- Decision tables (content, not formatting padding)
+- URLs and links
+- Conditional logic
+"""
+import hashlib
+import re
+from collections import OrderedDict
+# ---------------------------------------------------------------------------
+# Cache
+# ---------------------------------------------------------------------------
+_MAX_CACHE = 200
+_cache: OrderedDict[tuple[str, str, str], str] = OrderedDict()
+def _cache_key(slug: str, tier: str, content: str) -> tuple[str, str, str]:
+    h = hashlib.md5(content.encode(), usedforsecurity=False).hexdigest()[:8]
+    return (slug, tier, h)
+def clear_cache() -> None:
+    """Clear the compression cache (called on reindex)."""
+    _cache.clear()
+# ---------------------------------------------------------------------------
+# Token estimation
+# ---------------------------------------------------------------------------
+def estimate_tokens(text: str) -> int:
+    """Estimate token count using character ratio (~4 chars/token for mixed)."""
+    return max(1, len(text) // 4)
+# ---------------------------------------------------------------------------
+# Compression helpers — operate on text OUTSIDE code blocks only
+# ---------------------------------------------------------------------------
+_CODE_BLOCK_RE = re.compile(r"(```[\s\S]*?```)", re.DOTALL)
+def _split_code_blocks(content: str) -> list[tuple[str, bool]]:
+    """Split content into (text, is_code_block) segments."""
+    parts: list[tuple[str, bool]] = []
+    last = 0
+    for m in _CODE_BLOCK_RE.finditer(content):
+        if m.start() > last:
+            parts.append((content[last:m.start()], False))
+        parts.append((m.group(0), True))
+        last = m.end()
+    if last < len(content):
+        parts.append((content[last:], False))
+    return parts
+def _apply_outside_code(content: str, fn) -> str:
+    """Apply a transform function only to text outside code blocks."""
+    parts = _split_code_blocks(content)
+    result = []
+    for text, is_code in parts:
+        if is_code:
+            result.append(text)
+        else:
+            result.append(fn(text))
+    return "".join(result)
+# ---------------------------------------------------------------------------
+# Free tier transforms
+# ---------------------------------------------------------------------------
+# Patterns for "when to use" sections at the top of a skill
+_WHEN_TO_USE_RE = re.compile(
+    r"^(##?\s*(?:When\s+to\s+[Uu]se|Trigger|Use\s+[Ww]hen|Description)\b.*?)(?=^##?\s|\Z)",
+    re.MULTILINE | re.DOTALL,
+)
+_FRONTMATTER_ECHO_RE = re.compile(
+    r"^\*?\*?(?:Name|Version|License|Pro since|Description)\s*[:]\s*.*$",
+    re.MULTILINE | re.IGNORECASE,
+)
+_BADGE_RE = re.compile(r"!\[.*?\]\(https?://.*?\)\s*", re.MULTILINE)
+_HR_RE = re.compile(r"^-{3,}\s*$", re.MULTILINE)
+_ALIGN_HTML_RE = re.compile(r"</?p\s+align\s*=\s*[\"']center[\"']\s*/?>", re.IGNORECASE)
+_MULTI_BLANK_RE = re.compile(r"\n{3,}")
+_TRAILING_WS_RE = re.compile(r"[ \t]+$", re.MULTILINE)
+_EMOJI_HEADING_RE = re.compile(
+    r"^(#{1,6}\s+)"  # heading prefix
+    r"[\U0001F300-\U0001FAFF\U00002702-\U000027B0\U0000FE00-\U0000FE0F\U0000200D]+"  # emoji cluster
+    r"\s*",  # trailing space after emoji
+    re.MULTILINE,
+)
+_TABLE_PADDING_RE = re.compile(r"\|[ \t]{2,}")
+def _strip_when_to_use(text: str) -> str:
+    """Remove 'When to use' / trigger sections from the top of the skill.
+    Only removes if it appears before any implementation content
+    (within the first 40 lines).
+    """
+    lines = text.split("\n")
+    # Only look in the first 40 lines for the section
+    head = "\n".join(lines[:40])
+    m = _WHEN_TO_USE_RE.search(head)
+    if m:
+        # Remove the matched section from the full text
+        return text[:m.start()] + text[m.end():]
+    return text
+def _strip_frontmatter_echo(text: str) -> str:
+    return _FRONTMATTER_ECHO_RE.sub("", text)
+def _strip_badges(text: str) -> str:
+    return _BADGE_RE.sub("", text)
+def _strip_hrs(text: str) -> str:
+    return _HR_RE.sub("\n", text)
+def _strip_align_html(text: str) -> str:
+    return _ALIGN_HTML_RE.sub("", text)
+def _normalize_whitespace(text: str) -> str:
+    text = _TRAILING_WS_RE.sub("", text)
+    text = _MULTI_BLANK_RE.sub("\n\n", text)
+    return text
+def _strip_emoji_headings(text: str) -> str:
+    """Remove leading emoji from markdown headings."""
+    return _EMOJI_HEADING_RE.sub(r"\1", text)
+def _compact_tables(text: str) -> str:
+    """Reduce excessive padding inside markdown tables."""
+    return _TABLE_PADDING_RE.sub("| ", text)
+# ---------------------------------------------------------------------------
+# Pro tier transforms
+# ---------------------------------------------------------------------------
+_CHECKLIST_SUB_RE = re.compile(
+    r"^(- \[[ x]\] .+)\n"  # main checklist item
+    r"((?:  - .+\n)+)",  # one or more sub-items (2-space indented)
+    re.MULTILINE,
+)
+def _compact_checklists(text: str) -> str:
+    """Flatten simple sub-items into the parent checklist item."""
+    def _flatten(m: re.Match) -> str:
+        main = m.group(1).rstrip()
+        subs = m.group(2).strip().split("\n")
+        # Only flatten if sub-items are simple (single line each, <80 chars)
+        sub_texts = []
+        for s in subs:
+            s = s.strip().lstrip("- ").strip()
+            if len(s) > 80:
+                return m.group(0)  # too complex, leave as-is
+            sub_texts.append(s)
+        return main + " (" + "; ".join(sub_texts) + ")\n"
+    return _CHECKLIST_SUB_RE.sub(_flatten, text)
+def _summarize_long_sections(text: str) -> str:
+    """For skills >150 lines, compress non-critical sections.
+    Sections that are not checklists, code blocks, or 'pitfalls/gotchas'
+    and are >10 lines get compressed to their heading + first sentence.
+    """
+    lines = text.split("\n")
+    if len(lines) <= 150:
+        return text
+    # Identify sections
+    sections: list[tuple[int, str]] = []
+    for i, line in enumerate(lines):
+        if re.match(r"^#{1,3}\s+", line):
+            sections.append((i, line))
+    if not sections:
+        return text
+    # Protected section keywords
+    protected = {"checklist", "pitfall", "gotcha", "common mistake", "warning",
+                 "important", "critical", "prerequisite", "setup", "install",
+                 "implementation", "step", "example", "code"}
+    result_lines = list(lines)
+    # Process sections in reverse to preserve indices
+    for idx in range(len(sections) - 1, -1, -1):
+        start = sections[idx][0]
+        heading = sections[idx][1].lower()
+        end = sections[idx + 1][0] if idx + 1 < len(sections) else len(lines)
+        section_len = end - start
+        if section_len <= 10:
+            continue
+        # Check if section is protected
+        if any(kw in heading for kw in protected):
+            continue
+        # Check if section contains code blocks — protect it
+        section_text = "\n".join(lines[start:end])
+        if "```" in section_text:
+            continue
+        # Compress: keep heading + first non-empty sentence
+        first_sentence = ""
+        for line in lines[start + 1:end]:
+            stripped = line.strip()
+            if stripped and not stripped.startswith("#"):
+                first_sentence = stripped
+                break
+        if first_sentence:
+            result_lines[start:end] = [lines[start], first_sentence, ""]
+        # else leave as-is
+    return "\n".join(result_lines)
+def _trim_redundant_examples(text: str) -> str:
+    """If 3+ consecutive code blocks exist for the same concept, keep first two."""
+    parts = _split_code_blocks(text)
+    if len(parts) < 7:  # Need at least 3 code blocks (interleaved with text)
+        return text
+    # Count consecutive code blocks (separated only by short text)
+    result = []
+    consecutive_code = 0
+    for txt, is_code in parts:
+        if is_code:
+            consecutive_code += 1
+            if consecutive_code <= 2:
+                result.append(txt)
+            else:
+                result.append("\n*[Additional example omitted — use `get_skill(name, full=true)` for all examples]*\n")
+        else:
+            # If text between code blocks is short (<50 chars), treat as same group
+            if txt.strip() and len(txt.strip()) > 50:
+                consecutive_code = 0
+            result.append(txt)
+    return "".join(result)
+# ---------------------------------------------------------------------------
+# Main entry point
+# ---------------------------------------------------------------------------
+_MIN_COMPRESSED_TOKENS = 50
+def compress_skill(content: str, tier: str = "free") -> str:
+    """Compress skill content based on tier.
+    Args:
+        content: Raw skill markdown content (frontmatter already stripped).
+        tier: "free" or "pro".
+    Returns:
+        Compressed content string.
+    """
+    original_content = content
+    original_tokens = estimate_tokens(content)
+    # --- Free tier transforms (always applied) ---
+    # Apply text transforms only outside code blocks
+    content = _apply_outside_code(content, _strip_when_to_use)
+    content = _apply_outside_code(content, _strip_frontmatter_echo)
+    content = _apply_outside_code(content, _strip_badges)
+    content = _apply_outside_code(content, _strip_hrs)
+    content = _apply_outside_code(content, _strip_align_html)
+    content = _apply_outside_code(content, _strip_emoji_headings)
+    content = _apply_outside_code(content, _compact_tables)
+    # --- Pro tier transforms ---
+    if tier == "pro":
+        content = _apply_outside_code(content, _compact_checklists)
+        content = _summarize_long_sections(content)
+        content = _trim_redundant_examples(content)
+    # Normalize whitespace last (applies everywhere outside code)
+    content = _apply_outside_code(content, _normalize_whitespace)
+    # Final trim of leading/trailing whitespace
+    content = content.strip()
+    # Safety: if compression was too aggressive, return original uncompressed
+    compressed_tokens = estimate_tokens(content)
+    if compressed_tokens < _MIN_COMPRESSED_TOKENS and original_tokens >= _MIN_COMPRESSED_TOKENS:
+        return original_content
+    return content
+def get_or_compress(skill: dict, tier: str = "free") -> tuple[str, int, int]:
+    """Get compressed skill content, using cache if available.
+    Args:
+        skill: Skill dict with 'content', 'slug' keys.
+        tier: "free" or "pro".
+    Returns:
+        Tuple of (compressed_content, tokens_before, tokens_after).
+    """
+    raw = skill["content"]
+    tokens_before = estimate_tokens(raw)
+    key = _cache_key(skill.get("slug", skill["name"]), tier, raw)
+    if key in _cache:
+        _cache.move_to_end(key)
+        compressed = _cache[key]
+        return compressed, tokens_before, estimate_tokens(compressed)
+    compressed = compress_skill(raw, tier=tier)
+    # Add to cache with LRU eviction
+    _cache[key] = compressed
+    if len(_cache) > _MAX_CACHE:
+        _cache.popitem(last=False)
+    tokens_after = estimate_tokens(compressed)
+    return compressed, tokens_before, tokens_after

memstack_skill_loader/config.py ADDED Viewed

@@ -0,0 +1,114 @@
+"""Config loading and validation for MemStack Skill Loader."""
+import json
+import os
+import sys
+from dataclasses import dataclass, field
+from pathlib import Path
+@dataclass
+class SkillSource:
+    type: str
+    path: str
+    pattern: str = "**/SKILL.md"
+    label: str = "Unknown"
+@dataclass
+class Config:
+    skill_sources: list[SkillSource] = field(default_factory=list)
+    embedding_model: str = "all-MiniLM-L6-v2"
+    default_top_k: int = 3
+    vector_db_path: str = "./vectors"
+    auto_reindex_on_start: bool = False
+    _config_dir: Path = field(default_factory=lambda: Path.cwd(), repr=False)
+    @property
+    def resolved_vector_db_path(self) -> Path:
+        p = Path(self.vector_db_path).expanduser()
+        if not p.is_absolute():
+            p = self._config_dir / p
+        return p.resolve()
+    @property
+    def pro_skills_dir(self) -> Path:
+        """Return the pro-skills directory — customer download first, bundled fallback."""
+        customer_dir = Path.home() / ".memstack" / "pro-skills"
+        if customer_dir.exists() and (customer_dir / ".complete").exists():
+            return customer_dir
+        return Path(__file__).resolve().parent.parent.parent / "pro-skills"
+    def with_pro_skills(self) -> "Config":
+        """Return a copy of this config with the bundled pro-skills source added."""
+        pro_dir = self.pro_skills_dir
+        if not pro_dir.exists():
+            return self
+        pro_source = SkillSource(
+            type="local",
+            path=str(pro_dir),
+            pattern="**/SKILL.md",
+            label="MemStack Pro",
+        )
+        return Config(
+            skill_sources=self.skill_sources + [pro_source],
+            embedding_model=self.embedding_model,
+            default_top_k=self.default_top_k,
+            vector_db_path=self.vector_db_path,
+            auto_reindex_on_start=self.auto_reindex_on_start,
+            _config_dir=self._config_dir,
+        )
+def load_config(config_path: Path | None = None) -> Config:
+    """Load config from JSON file. Falls back to defaults if not found."""
+    if config_path is None:
+        config_path = Path(__file__).resolve().parent.parent.parent / "config.json"
+    if not config_path.exists():
+        print(f"Config not found at {config_path}, using defaults", file=sys.stderr)
+        return Config()
+    try:
+        with open(config_path, encoding="utf-8") as f:
+            data = json.load(f)
+    except json.JSONDecodeError as e:
+        print(f"Invalid JSON in {config_path}: {e}", file=sys.stderr)
+        return Config()
+    sources = []
+    env_skills_dir = os.environ.get("MEMSTACK_SKILLS_DIR")
+    for s in data.get("skill_sources", []):
+        if "path" not in s:
+            print(f"Warning: skill source missing 'path', skipping: {s}", file=sys.stderr)
+            continue
+        skill_path = env_skills_dir if env_skills_dir else str(Path(s["path"]).expanduser())
+        sources.append(SkillSource(
+            type=s.get("type", "local"),
+            path=skill_path,
+            pattern=s.get("pattern", "**/SKILL.md"),
+            label=s.get("label", "Unknown"),
+        ))
+    for source in sources:
+        p = Path(source.path).expanduser()
+        if not p.exists():
+            sibling = config_path.parent.resolve().parent / "memstack" / "skills"
+            if sibling.exists():
+                source.path = str(sibling)
+                print(f"Auto-detected skills at {sibling}", file=sys.stderr)
+    config = Config(
+        skill_sources=sources,
+        embedding_model=data.get("embedding_model", "all-MiniLM-L6-v2"),
+        default_top_k=data.get("default_top_k", 3),
+        vector_db_path=data.get("vector_db_path", "./vectors"),
+        auto_reindex_on_start=data.get("auto_reindex_on_start", False),
+        _config_dir=config_path.parent.resolve(),
+    )
+    # Auto-detect pro-skills if license key is set and directory exists
+    if os.environ.get("MEMSTACK_PRO_LICENSE_KEY"):
+        config = config.with_pro_skills()
+    return config