memstack-skill-loader 3.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- memstack_skill_loader-3.5.0/MANIFEST.in +1 -0
- memstack_skill_loader-3.5.0/PKG-INFO +10 -0
- memstack_skill_loader-3.5.0/README.md +138 -0
- memstack_skill_loader-3.5.0/pyproject.toml +25 -0
- memstack_skill_loader-3.5.0/setup.cfg +4 -0
- memstack_skill_loader-3.5.0/src/memstack_skill_loader/__init__.py +1 -0
- memstack_skill_loader-3.5.0/src/memstack_skill_loader/__main__.py +18 -0
- memstack_skill_loader-3.5.0/src/memstack_skill_loader/compression.py +345 -0
- memstack_skill_loader-3.5.0/src/memstack_skill_loader/config.py +114 -0
- memstack_skill_loader-3.5.0/src/memstack_skill_loader/dashboard.html +829 -0
- memstack_skill_loader-3.5.0/src/memstack_skill_loader/dashboard.py +360 -0
- memstack_skill_loader-3.5.0/src/memstack_skill_loader/indexer.py +240 -0
- memstack_skill_loader-3.5.0/src/memstack_skill_loader/license.py +409 -0
- memstack_skill_loader-3.5.0/src/memstack_skill_loader/search.py +164 -0
- memstack_skill_loader-3.5.0/src/memstack_skill_loader/server.py +883 -0
- memstack_skill_loader-3.5.0/src/memstack_skill_loader/stats.py +428 -0
- memstack_skill_loader-3.5.0/src/memstack_skill_loader/tfidf_search.py +142 -0
- memstack_skill_loader-3.5.0/src/memstack_skill_loader/version_check.py +93 -0
- memstack_skill_loader-3.5.0/src/memstack_skill_loader.egg-info/PKG-INFO +10 -0
- memstack_skill_loader-3.5.0/src/memstack_skill_loader.egg-info/SOURCES.txt +22 -0
- memstack_skill_loader-3.5.0/src/memstack_skill_loader.egg-info/dependency_links.txt +1 -0
- memstack_skill_loader-3.5.0/src/memstack_skill_loader.egg-info/entry_points.txt +2 -0
- memstack_skill_loader-3.5.0/src/memstack_skill_loader.egg-info/requires.txt +5 -0
- memstack_skill_loader-3.5.0/src/memstack_skill_loader.egg-info/top_level.txt +1 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
prune pro-skills
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: memstack-skill-loader
|
|
3
|
+
Version: 3.5.0
|
|
4
|
+
Summary: MCP server that vector-indexes MemStack Pro skills for on-demand loading
|
|
5
|
+
Requires-Python: >=3.12
|
|
6
|
+
Requires-Dist: mcp>=1.0.0
|
|
7
|
+
Requires-Dist: lancedb>=0.6.0
|
|
8
|
+
Requires-Dist: sentence-transformers>=2.2.0
|
|
9
|
+
Requires-Dist: pyarrow>=14.0.0
|
|
10
|
+
Requires-Dist: httpx>=0.24.0
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
# MemStack™ Skill Loader
|
|
2
|
+
|
|
3
|
+
**114 skills for Claude Code** — 85 free + 29 Pro exclusive. Vector-indexed so CC loads only the skill it needs, saving your context window.
|
|
4
|
+
|
|
5
|
+
## Quick Start (5 minutes)
|
|
6
|
+
|
|
7
|
+
1. **Clone both repos side-by-side:**
|
|
8
|
+
```bash
|
|
9
|
+
git clone https://github.com/cwinvestments/memstack.git
|
|
10
|
+
git clone https://github.com/cwinvestments/memstack-skill-loader.git
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
2. **Install the skill loader:**
|
|
14
|
+
```bash
|
|
15
|
+
cd memstack-skill-loader
|
|
16
|
+
pip install -e . --break-system-packages
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
3. **Register with Claude Code:**
|
|
20
|
+
```bash
|
|
21
|
+
claude mcp add --scope user memstack-skills -- python -m memstack_skill_loader
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
4. **Restart Claude Code**, then type `list skills` to verify.
|
|
25
|
+
|
|
26
|
+
> The skill loader auto-detects the `memstack` repo if cloned as a sibling directory.
|
|
27
|
+
> To use a different location, set `MEMSTACK_SKILLS_DIR=/path/to/memstack/skills`.
|
|
28
|
+
|
|
29
|
+
> See [QUICKSTART.md](QUICKSTART.md) for detailed setup, [QUICK-REFERENCE.md](QUICK-REFERENCE.md) for the full skill catalog, and [TROUBLESHOOTING.md](TROUBLESHOOTING.md) if you hit issues.
|
|
30
|
+
|
|
31
|
+
## How It Works
|
|
32
|
+
|
|
33
|
+
MCP server that vector-indexes all 114 MemStack™ skills so Claude Code can call `find_skill("deploy to Railway")` and load **only** the relevant skill on demand — instead of all skills consuming context window.
|
|
34
|
+
|
|
35
|
+
- **No API keys required** — everything runs locally
|
|
36
|
+
- **Pro skills auto-detected** — set your license key and they appear automatically
|
|
37
|
+
- **Auto-reindex on start** — skills stay current without manual rebuilds
|
|
38
|
+
|
|
39
|
+
### Environment Variable Override
|
|
40
|
+
|
|
41
|
+
Set `MEMSTACK_SKILLS_DIR` to override the skills path in `config.json`:
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
export MEMSTACK_SKILLS_DIR=/path/to/your/memstack/skills
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
## Stack
|
|
48
|
+
|
|
49
|
+
- Python 3.12+
|
|
50
|
+
- [MCP SDK](https://pypi.org/project/mcp/) (stdio transport)
|
|
51
|
+
- [LanceDB](https://lancedb.com/) (vector storage, zero external dependencies)
|
|
52
|
+
- [sentence-transformers](https://www.sbert.net/) with `all-MiniLM-L6-v2` (384-dim local embeddings)
|
|
53
|
+
|
|
54
|
+
## Available Tools
|
|
55
|
+
|
|
56
|
+
### `find_skill`
|
|
57
|
+
Search skills by describing what you need. Returns the most relevant skill(s) with full instructions.
|
|
58
|
+
|
|
59
|
+
```
|
|
60
|
+
find_skill(query="deploy to Railway", top_k=3)
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
### `list_skills`
|
|
64
|
+
Browse the full skill catalog with names and descriptions.
|
|
65
|
+
|
|
66
|
+
```
|
|
67
|
+
list_skills()
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
### `get_skill`
|
|
71
|
+
Load a specific skill by exact name.
|
|
72
|
+
|
|
73
|
+
```
|
|
74
|
+
get_skill(name="railway deploy")
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
### `reindex_skills`
|
|
78
|
+
Rebuild the vector index after adding or modifying skills.
|
|
79
|
+
|
|
80
|
+
```
|
|
81
|
+
reindex_skills()
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
## Configuration
|
|
85
|
+
|
|
86
|
+
The `config.json` file controls where skills are loaded from:
|
|
87
|
+
|
|
88
|
+
```json
|
|
89
|
+
{
|
|
90
|
+
"skill_sources": [
|
|
91
|
+
{
|
|
92
|
+
"type": "local",
|
|
93
|
+
"path": "C:\\Projects\\memstack\\skills",
|
|
94
|
+
"pattern": "**/SKILL.md",
|
|
95
|
+
"label": "MemStack"
|
|
96
|
+
}
|
|
97
|
+
],
|
|
98
|
+
"embedding_model": "all-MiniLM-L6-v2",
|
|
99
|
+
"default_top_k": 3,
|
|
100
|
+
"vector_db_path": "./vectors",
|
|
101
|
+
"auto_reindex_on_start": true
|
|
102
|
+
}
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
Pro skills are **auto-detected** when `MEMSTACK_PRO_LICENSE_KEY` is set — no need to add them to `config.json`.
|
|
106
|
+
|
|
107
|
+
Add entries to `skill_sources` to index skills from multiple directories:
|
|
108
|
+
|
|
109
|
+
```json
|
|
110
|
+
{
|
|
111
|
+
"skill_sources": [
|
|
112
|
+
{
|
|
113
|
+
"type": "local",
|
|
114
|
+
"path": "C:\\Projects\\memstack\\skills",
|
|
115
|
+
"pattern": "**/SKILL.md",
|
|
116
|
+
"label": "MemStack"
|
|
117
|
+
},
|
|
118
|
+
{
|
|
119
|
+
"type": "local",
|
|
120
|
+
"path": "/home/user/custom-skills",
|
|
121
|
+
"pattern": "*.md",
|
|
122
|
+
"label": "My Custom Skills"
|
|
123
|
+
}
|
|
124
|
+
]
|
|
125
|
+
}
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
The `pattern` field controls how skills are discovered:
|
|
129
|
+
- `**/SKILL.md` — Subdirectory structure (e.g., `category/skill-name/SKILL.md`)
|
|
130
|
+
- `*.md` — Flat directory (each `.md` file is a skill)
|
|
131
|
+
|
|
132
|
+
## Release Notes
|
|
133
|
+
|
|
134
|
+
**[v3.4.0](https://github.com/cwinvestments/memstack-skill-loader/releases/tag/v3.4.0)** — 100 Skills Milestone (18 new Pro skills, auto-detection, display name fixes)
|
|
135
|
+
|
|
136
|
+
## License
|
|
137
|
+
|
|
138
|
+
Proprietary — Part of MemStack™ Pro by CW Affiliate Investments LLC.
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68.0"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "memstack-skill-loader"
|
|
7
|
+
version = "3.5.0"
|
|
8
|
+
description = "MCP server that vector-indexes MemStack Pro skills for on-demand loading"
|
|
9
|
+
requires-python = ">=3.12"
|
|
10
|
+
dependencies = [
|
|
11
|
+
"mcp>=1.0.0",
|
|
12
|
+
"lancedb>=0.6.0",
|
|
13
|
+
"sentence-transformers>=2.2.0",
|
|
14
|
+
"pyarrow>=14.0.0",
|
|
15
|
+
"httpx>=0.24.0",
|
|
16
|
+
]
|
|
17
|
+
|
|
18
|
+
[project.scripts]
|
|
19
|
+
memstack-index = "memstack_skill_loader.indexer:main"
|
|
20
|
+
|
|
21
|
+
[tool.setuptools.packages.find]
|
|
22
|
+
where = ["src"]
|
|
23
|
+
|
|
24
|
+
[tool.setuptools.package-data]
|
|
25
|
+
memstack_skill_loader = ["dashboard.html"]
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""MemStack Skill Loader — MCP server for semantic skill search."""
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
"""Entry point for python -m memstack_skill_loader."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import sys
|
|
5
|
+
|
|
6
|
+
from .server import run
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def main():
|
|
10
|
+
if len(sys.argv) > 1 and sys.argv[1] == "dashboard":
|
|
11
|
+
from .dashboard import start_dashboard
|
|
12
|
+
start_dashboard()
|
|
13
|
+
else:
|
|
14
|
+
asyncio.run(run())
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
if __name__ == "__main__":
|
|
18
|
+
main()
|
|
@@ -0,0 +1,345 @@
|
|
|
1
|
+
"""Skill-aware compression for MemStack MCP Skill Loader.
|
|
2
|
+
|
|
3
|
+
Compresses skill content before serving to reduce token consumption.
|
|
4
|
+
Compression is tiered: free tier gets basic stripping, Pro tier gets
|
|
5
|
+
advanced section-aware compression.
|
|
6
|
+
|
|
7
|
+
All compression preserves:
|
|
8
|
+
- Code blocks (fenced with triple backticks)
|
|
9
|
+
- Checklists and action items
|
|
10
|
+
- Decision tables (content, not formatting padding)
|
|
11
|
+
- URLs and links
|
|
12
|
+
- Conditional logic
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import hashlib
|
|
16
|
+
import re
|
|
17
|
+
from collections import OrderedDict
|
|
18
|
+
|
|
19
|
+
# ---------------------------------------------------------------------------
|
|
20
|
+
# Cache
|
|
21
|
+
# ---------------------------------------------------------------------------
|
|
22
|
+
|
|
23
|
+
_MAX_CACHE = 200
|
|
24
|
+
_cache: OrderedDict[tuple[str, str, str], str] = OrderedDict()
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _cache_key(slug: str, tier: str, content: str) -> tuple[str, str, str]:
|
|
28
|
+
h = hashlib.md5(content.encode(), usedforsecurity=False).hexdigest()[:8]
|
|
29
|
+
return (slug, tier, h)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def clear_cache() -> None:
|
|
33
|
+
"""Clear the compression cache (called on reindex)."""
|
|
34
|
+
_cache.clear()
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
# ---------------------------------------------------------------------------
|
|
38
|
+
# Token estimation
|
|
39
|
+
# ---------------------------------------------------------------------------
|
|
40
|
+
|
|
41
|
+
def estimate_tokens(text: str) -> int:
|
|
42
|
+
"""Estimate token count using character ratio (~4 chars/token for mixed)."""
|
|
43
|
+
return max(1, len(text) // 4)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
# ---------------------------------------------------------------------------
|
|
47
|
+
# Compression helpers — operate on text OUTSIDE code blocks only
|
|
48
|
+
# ---------------------------------------------------------------------------
|
|
49
|
+
|
|
50
|
+
_CODE_BLOCK_RE = re.compile(r"(```[\s\S]*?```)", re.DOTALL)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _split_code_blocks(content: str) -> list[tuple[str, bool]]:
|
|
54
|
+
"""Split content into (text, is_code_block) segments."""
|
|
55
|
+
parts: list[tuple[str, bool]] = []
|
|
56
|
+
last = 0
|
|
57
|
+
for m in _CODE_BLOCK_RE.finditer(content):
|
|
58
|
+
if m.start() > last:
|
|
59
|
+
parts.append((content[last:m.start()], False))
|
|
60
|
+
parts.append((m.group(0), True))
|
|
61
|
+
last = m.end()
|
|
62
|
+
if last < len(content):
|
|
63
|
+
parts.append((content[last:], False))
|
|
64
|
+
return parts
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def _apply_outside_code(content: str, fn) -> str:
|
|
68
|
+
"""Apply a transform function only to text outside code blocks."""
|
|
69
|
+
parts = _split_code_blocks(content)
|
|
70
|
+
result = []
|
|
71
|
+
for text, is_code in parts:
|
|
72
|
+
if is_code:
|
|
73
|
+
result.append(text)
|
|
74
|
+
else:
|
|
75
|
+
result.append(fn(text))
|
|
76
|
+
return "".join(result)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
# ---------------------------------------------------------------------------
|
|
80
|
+
# Free tier transforms
|
|
81
|
+
# ---------------------------------------------------------------------------
|
|
82
|
+
|
|
83
|
+
# Patterns for "when to use" sections at the top of a skill
|
|
84
|
+
_WHEN_TO_USE_RE = re.compile(
|
|
85
|
+
r"^(##?\s*(?:When\s+to\s+[Uu]se|Trigger|Use\s+[Ww]hen|Description)\b.*?)(?=^##?\s|\Z)",
|
|
86
|
+
re.MULTILINE | re.DOTALL,
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
_FRONTMATTER_ECHO_RE = re.compile(
|
|
90
|
+
r"^\*?\*?(?:Name|Version|License|Pro since|Description)\s*[:]\s*.*$",
|
|
91
|
+
re.MULTILINE | re.IGNORECASE,
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
_BADGE_RE = re.compile(r"!\[.*?\]\(https?://.*?\)\s*", re.MULTILINE)
|
|
95
|
+
_HR_RE = re.compile(r"^-{3,}\s*$", re.MULTILINE)
|
|
96
|
+
_ALIGN_HTML_RE = re.compile(r"</?p\s+align\s*=\s*[\"']center[\"']\s*/?>", re.IGNORECASE)
|
|
97
|
+
_MULTI_BLANK_RE = re.compile(r"\n{3,}")
|
|
98
|
+
_TRAILING_WS_RE = re.compile(r"[ \t]+$", re.MULTILINE)
|
|
99
|
+
_EMOJI_HEADING_RE = re.compile(
|
|
100
|
+
r"^(#{1,6}\s+)" # heading prefix
|
|
101
|
+
r"[\U0001F300-\U0001FAFF\U00002702-\U000027B0\U0000FE00-\U0000FE0F\U0000200D]+" # emoji cluster
|
|
102
|
+
r"\s*", # trailing space after emoji
|
|
103
|
+
re.MULTILINE,
|
|
104
|
+
)
|
|
105
|
+
_TABLE_PADDING_RE = re.compile(r"\|[ \t]{2,}")
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def _strip_when_to_use(text: str) -> str:
|
|
109
|
+
"""Remove 'When to use' / trigger sections from the top of the skill.
|
|
110
|
+
|
|
111
|
+
Only removes if it appears before any implementation content
|
|
112
|
+
(within the first 40 lines).
|
|
113
|
+
"""
|
|
114
|
+
lines = text.split("\n")
|
|
115
|
+
# Only look in the first 40 lines for the section
|
|
116
|
+
head = "\n".join(lines[:40])
|
|
117
|
+
m = _WHEN_TO_USE_RE.search(head)
|
|
118
|
+
if m:
|
|
119
|
+
# Remove the matched section from the full text
|
|
120
|
+
return text[:m.start()] + text[m.end():]
|
|
121
|
+
return text
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def _strip_frontmatter_echo(text: str) -> str:
|
|
125
|
+
return _FRONTMATTER_ECHO_RE.sub("", text)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def _strip_badges(text: str) -> str:
|
|
129
|
+
return _BADGE_RE.sub("", text)
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def _strip_hrs(text: str) -> str:
|
|
133
|
+
return _HR_RE.sub("\n", text)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def _strip_align_html(text: str) -> str:
|
|
137
|
+
return _ALIGN_HTML_RE.sub("", text)
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def _normalize_whitespace(text: str) -> str:
|
|
141
|
+
text = _TRAILING_WS_RE.sub("", text)
|
|
142
|
+
text = _MULTI_BLANK_RE.sub("\n\n", text)
|
|
143
|
+
return text
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def _strip_emoji_headings(text: str) -> str:
|
|
147
|
+
"""Remove leading emoji from markdown headings."""
|
|
148
|
+
return _EMOJI_HEADING_RE.sub(r"\1", text)
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def _compact_tables(text: str) -> str:
|
|
152
|
+
"""Reduce excessive padding inside markdown tables."""
|
|
153
|
+
return _TABLE_PADDING_RE.sub("| ", text)
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
# ---------------------------------------------------------------------------
|
|
157
|
+
# Pro tier transforms
|
|
158
|
+
# ---------------------------------------------------------------------------
|
|
159
|
+
|
|
160
|
+
_CHECKLIST_SUB_RE = re.compile(
|
|
161
|
+
r"^(- \[[ x]\] .+)\n" # main checklist item
|
|
162
|
+
r"((?: - .+\n)+)", # one or more sub-items (2-space indented)
|
|
163
|
+
re.MULTILINE,
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def _compact_checklists(text: str) -> str:
|
|
168
|
+
"""Flatten simple sub-items into the parent checklist item."""
|
|
169
|
+
def _flatten(m: re.Match) -> str:
|
|
170
|
+
main = m.group(1).rstrip()
|
|
171
|
+
subs = m.group(2).strip().split("\n")
|
|
172
|
+
# Only flatten if sub-items are simple (single line each, <80 chars)
|
|
173
|
+
sub_texts = []
|
|
174
|
+
for s in subs:
|
|
175
|
+
s = s.strip().lstrip("- ").strip()
|
|
176
|
+
if len(s) > 80:
|
|
177
|
+
return m.group(0) # too complex, leave as-is
|
|
178
|
+
sub_texts.append(s)
|
|
179
|
+
return main + " (" + "; ".join(sub_texts) + ")\n"
|
|
180
|
+
return _CHECKLIST_SUB_RE.sub(_flatten, text)
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def _summarize_long_sections(text: str) -> str:
|
|
184
|
+
"""For skills >150 lines, compress non-critical sections.
|
|
185
|
+
|
|
186
|
+
Sections that are not checklists, code blocks, or 'pitfalls/gotchas'
|
|
187
|
+
and are >10 lines get compressed to their heading + first sentence.
|
|
188
|
+
"""
|
|
189
|
+
lines = text.split("\n")
|
|
190
|
+
if len(lines) <= 150:
|
|
191
|
+
return text
|
|
192
|
+
|
|
193
|
+
# Identify sections
|
|
194
|
+
sections: list[tuple[int, str]] = []
|
|
195
|
+
for i, line in enumerate(lines):
|
|
196
|
+
if re.match(r"^#{1,3}\s+", line):
|
|
197
|
+
sections.append((i, line))
|
|
198
|
+
|
|
199
|
+
if not sections:
|
|
200
|
+
return text
|
|
201
|
+
|
|
202
|
+
# Protected section keywords
|
|
203
|
+
protected = {"checklist", "pitfall", "gotcha", "common mistake", "warning",
|
|
204
|
+
"important", "critical", "prerequisite", "setup", "install",
|
|
205
|
+
"implementation", "step", "example", "code"}
|
|
206
|
+
|
|
207
|
+
result_lines = list(lines)
|
|
208
|
+
# Process sections in reverse to preserve indices
|
|
209
|
+
for idx in range(len(sections) - 1, -1, -1):
|
|
210
|
+
start = sections[idx][0]
|
|
211
|
+
heading = sections[idx][1].lower()
|
|
212
|
+
end = sections[idx + 1][0] if idx + 1 < len(sections) else len(lines)
|
|
213
|
+
|
|
214
|
+
section_len = end - start
|
|
215
|
+
if section_len <= 10:
|
|
216
|
+
continue
|
|
217
|
+
|
|
218
|
+
# Check if section is protected
|
|
219
|
+
if any(kw in heading for kw in protected):
|
|
220
|
+
continue
|
|
221
|
+
|
|
222
|
+
# Check if section contains code blocks — protect it
|
|
223
|
+
section_text = "\n".join(lines[start:end])
|
|
224
|
+
if "```" in section_text:
|
|
225
|
+
continue
|
|
226
|
+
|
|
227
|
+
# Compress: keep heading + first non-empty sentence
|
|
228
|
+
first_sentence = ""
|
|
229
|
+
for line in lines[start + 1:end]:
|
|
230
|
+
stripped = line.strip()
|
|
231
|
+
if stripped and not stripped.startswith("#"):
|
|
232
|
+
first_sentence = stripped
|
|
233
|
+
break
|
|
234
|
+
|
|
235
|
+
if first_sentence:
|
|
236
|
+
result_lines[start:end] = [lines[start], first_sentence, ""]
|
|
237
|
+
# else leave as-is
|
|
238
|
+
|
|
239
|
+
return "\n".join(result_lines)
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def _trim_redundant_examples(text: str) -> str:
|
|
243
|
+
"""If 3+ consecutive code blocks exist for the same concept, keep first two."""
|
|
244
|
+
parts = _split_code_blocks(text)
|
|
245
|
+
if len(parts) < 7: # Need at least 3 code blocks (interleaved with text)
|
|
246
|
+
return text
|
|
247
|
+
|
|
248
|
+
# Count consecutive code blocks (separated only by short text)
|
|
249
|
+
result = []
|
|
250
|
+
consecutive_code = 0
|
|
251
|
+
for txt, is_code in parts:
|
|
252
|
+
if is_code:
|
|
253
|
+
consecutive_code += 1
|
|
254
|
+
if consecutive_code <= 2:
|
|
255
|
+
result.append(txt)
|
|
256
|
+
else:
|
|
257
|
+
result.append("\n*[Additional example omitted — use `get_skill(name, full=true)` for all examples]*\n")
|
|
258
|
+
else:
|
|
259
|
+
# If text between code blocks is short (<50 chars), treat as same group
|
|
260
|
+
if txt.strip() and len(txt.strip()) > 50:
|
|
261
|
+
consecutive_code = 0
|
|
262
|
+
result.append(txt)
|
|
263
|
+
|
|
264
|
+
return "".join(result)
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
# ---------------------------------------------------------------------------
|
|
268
|
+
# Main entry point
|
|
269
|
+
# ---------------------------------------------------------------------------
|
|
270
|
+
|
|
271
|
+
_MIN_COMPRESSED_TOKENS = 50
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
def compress_skill(content: str, tier: str = "free") -> str:
|
|
275
|
+
"""Compress skill content based on tier.
|
|
276
|
+
|
|
277
|
+
Args:
|
|
278
|
+
content: Raw skill markdown content (frontmatter already stripped).
|
|
279
|
+
tier: "free" or "pro".
|
|
280
|
+
|
|
281
|
+
Returns:
|
|
282
|
+
Compressed content string.
|
|
283
|
+
"""
|
|
284
|
+
original_content = content
|
|
285
|
+
original_tokens = estimate_tokens(content)
|
|
286
|
+
|
|
287
|
+
# --- Free tier transforms (always applied) ---
|
|
288
|
+
# Apply text transforms only outside code blocks
|
|
289
|
+
content = _apply_outside_code(content, _strip_when_to_use)
|
|
290
|
+
content = _apply_outside_code(content, _strip_frontmatter_echo)
|
|
291
|
+
content = _apply_outside_code(content, _strip_badges)
|
|
292
|
+
content = _apply_outside_code(content, _strip_hrs)
|
|
293
|
+
content = _apply_outside_code(content, _strip_align_html)
|
|
294
|
+
content = _apply_outside_code(content, _strip_emoji_headings)
|
|
295
|
+
content = _apply_outside_code(content, _compact_tables)
|
|
296
|
+
|
|
297
|
+
# --- Pro tier transforms ---
|
|
298
|
+
if tier == "pro":
|
|
299
|
+
content = _apply_outside_code(content, _compact_checklists)
|
|
300
|
+
content = _summarize_long_sections(content)
|
|
301
|
+
content = _trim_redundant_examples(content)
|
|
302
|
+
|
|
303
|
+
# Normalize whitespace last (applies everywhere outside code)
|
|
304
|
+
content = _apply_outside_code(content, _normalize_whitespace)
|
|
305
|
+
|
|
306
|
+
# Final trim of leading/trailing whitespace
|
|
307
|
+
content = content.strip()
|
|
308
|
+
|
|
309
|
+
# Safety: if compression was too aggressive, return original uncompressed
|
|
310
|
+
compressed_tokens = estimate_tokens(content)
|
|
311
|
+
if compressed_tokens < _MIN_COMPRESSED_TOKENS and original_tokens >= _MIN_COMPRESSED_TOKENS:
|
|
312
|
+
return original_content
|
|
313
|
+
|
|
314
|
+
return content
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
def get_or_compress(skill: dict, tier: str = "free") -> tuple[str, int, int]:
|
|
318
|
+
"""Get compressed skill content, using cache if available.
|
|
319
|
+
|
|
320
|
+
Args:
|
|
321
|
+
skill: Skill dict with 'content', 'slug' keys.
|
|
322
|
+
tier: "free" or "pro".
|
|
323
|
+
|
|
324
|
+
Returns:
|
|
325
|
+
Tuple of (compressed_content, tokens_before, tokens_after).
|
|
326
|
+
"""
|
|
327
|
+
raw = skill["content"]
|
|
328
|
+
tokens_before = estimate_tokens(raw)
|
|
329
|
+
|
|
330
|
+
key = _cache_key(skill.get("slug", skill["name"]), tier, raw)
|
|
331
|
+
|
|
332
|
+
if key in _cache:
|
|
333
|
+
_cache.move_to_end(key)
|
|
334
|
+
compressed = _cache[key]
|
|
335
|
+
return compressed, tokens_before, estimate_tokens(compressed)
|
|
336
|
+
|
|
337
|
+
compressed = compress_skill(raw, tier=tier)
|
|
338
|
+
|
|
339
|
+
# Add to cache with LRU eviction
|
|
340
|
+
_cache[key] = compressed
|
|
341
|
+
if len(_cache) > _MAX_CACHE:
|
|
342
|
+
_cache.popitem(last=False)
|
|
343
|
+
|
|
344
|
+
tokens_after = estimate_tokens(compressed)
|
|
345
|
+
return compressed, tokens_before, tokens_after
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
"""Config loading and validation for MemStack Skill Loader."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
import sys
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class SkillSource:
|
|
12
|
+
type: str
|
|
13
|
+
path: str
|
|
14
|
+
pattern: str = "**/SKILL.md"
|
|
15
|
+
label: str = "Unknown"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class Config:
|
|
20
|
+
skill_sources: list[SkillSource] = field(default_factory=list)
|
|
21
|
+
embedding_model: str = "all-MiniLM-L6-v2"
|
|
22
|
+
default_top_k: int = 3
|
|
23
|
+
vector_db_path: str = "./vectors"
|
|
24
|
+
auto_reindex_on_start: bool = False
|
|
25
|
+
_config_dir: Path = field(default_factory=lambda: Path.cwd(), repr=False)
|
|
26
|
+
|
|
27
|
+
@property
|
|
28
|
+
def resolved_vector_db_path(self) -> Path:
|
|
29
|
+
p = Path(self.vector_db_path).expanduser()
|
|
30
|
+
if not p.is_absolute():
|
|
31
|
+
p = self._config_dir / p
|
|
32
|
+
return p.resolve()
|
|
33
|
+
|
|
34
|
+
@property
|
|
35
|
+
def pro_skills_dir(self) -> Path:
|
|
36
|
+
"""Return the pro-skills directory — customer download first, bundled fallback."""
|
|
37
|
+
customer_dir = Path.home() / ".memstack" / "pro-skills"
|
|
38
|
+
if customer_dir.exists() and (customer_dir / ".complete").exists():
|
|
39
|
+
return customer_dir
|
|
40
|
+
return Path(__file__).resolve().parent.parent.parent / "pro-skills"
|
|
41
|
+
|
|
42
|
+
def with_pro_skills(self) -> "Config":
|
|
43
|
+
"""Return a copy of this config with the bundled pro-skills source added."""
|
|
44
|
+
pro_dir = self.pro_skills_dir
|
|
45
|
+
if not pro_dir.exists():
|
|
46
|
+
return self
|
|
47
|
+
pro_source = SkillSource(
|
|
48
|
+
type="local",
|
|
49
|
+
path=str(pro_dir),
|
|
50
|
+
pattern="**/SKILL.md",
|
|
51
|
+
label="MemStack Pro",
|
|
52
|
+
)
|
|
53
|
+
return Config(
|
|
54
|
+
skill_sources=self.skill_sources + [pro_source],
|
|
55
|
+
embedding_model=self.embedding_model,
|
|
56
|
+
default_top_k=self.default_top_k,
|
|
57
|
+
vector_db_path=self.vector_db_path,
|
|
58
|
+
auto_reindex_on_start=self.auto_reindex_on_start,
|
|
59
|
+
_config_dir=self._config_dir,
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def load_config(config_path: Path | None = None) -> Config:
|
|
64
|
+
"""Load config from JSON file. Falls back to defaults if not found."""
|
|
65
|
+
if config_path is None:
|
|
66
|
+
config_path = Path(__file__).resolve().parent.parent.parent / "config.json"
|
|
67
|
+
|
|
68
|
+
if not config_path.exists():
|
|
69
|
+
print(f"Config not found at {config_path}, using defaults", file=sys.stderr)
|
|
70
|
+
return Config()
|
|
71
|
+
|
|
72
|
+
try:
|
|
73
|
+
with open(config_path, encoding="utf-8") as f:
|
|
74
|
+
data = json.load(f)
|
|
75
|
+
except json.JSONDecodeError as e:
|
|
76
|
+
print(f"Invalid JSON in {config_path}: {e}", file=sys.stderr)
|
|
77
|
+
return Config()
|
|
78
|
+
|
|
79
|
+
sources = []
|
|
80
|
+
env_skills_dir = os.environ.get("MEMSTACK_SKILLS_DIR")
|
|
81
|
+
for s in data.get("skill_sources", []):
|
|
82
|
+
if "path" not in s:
|
|
83
|
+
print(f"Warning: skill source missing 'path', skipping: {s}", file=sys.stderr)
|
|
84
|
+
continue
|
|
85
|
+
skill_path = env_skills_dir if env_skills_dir else str(Path(s["path"]).expanduser())
|
|
86
|
+
sources.append(SkillSource(
|
|
87
|
+
type=s.get("type", "local"),
|
|
88
|
+
path=skill_path,
|
|
89
|
+
pattern=s.get("pattern", "**/SKILL.md"),
|
|
90
|
+
label=s.get("label", "Unknown"),
|
|
91
|
+
))
|
|
92
|
+
|
|
93
|
+
for source in sources:
|
|
94
|
+
p = Path(source.path).expanduser()
|
|
95
|
+
if not p.exists():
|
|
96
|
+
sibling = config_path.parent.resolve().parent / "memstack" / "skills"
|
|
97
|
+
if sibling.exists():
|
|
98
|
+
source.path = str(sibling)
|
|
99
|
+
print(f"Auto-detected skills at {sibling}", file=sys.stderr)
|
|
100
|
+
|
|
101
|
+
config = Config(
|
|
102
|
+
skill_sources=sources,
|
|
103
|
+
embedding_model=data.get("embedding_model", "all-MiniLM-L6-v2"),
|
|
104
|
+
default_top_k=data.get("default_top_k", 3),
|
|
105
|
+
vector_db_path=data.get("vector_db_path", "./vectors"),
|
|
106
|
+
auto_reindex_on_start=data.get("auto_reindex_on_start", False),
|
|
107
|
+
_config_dir=config_path.parent.resolve(),
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
# Auto-detect pro-skills if license key is set and directory exists
|
|
111
|
+
if os.environ.get("MEMSTACK_PRO_LICENSE_KEY"):
|
|
112
|
+
config = config.with_pro_skills()
|
|
113
|
+
|
|
114
|
+
return config
|