memstack-skill-loader 3.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,360 @@
1
+ """Local HTTP dashboard server for MemStack usage analytics."""
2
+
3
+ import json
4
+ import os
5
+ import pickle
6
+ import re
7
+ import sys
8
+ import webbrowser
9
+ from http.server import BaseHTTPRequestHandler, HTTPServer
10
+ from pathlib import Path
11
+
12
+ from .config import load_config
13
+ from .license import is_pro_exclusive
14
+ import sqlite3
15
+ from .stats import DB_PATH, get_dashboard_data, get_project_details, get_skill_fire_counts
16
+
17
+ _HTML_PATH = Path(__file__).parent / "dashboard.html"
18
+
19
+
20
+ def _load_ignore_set() -> frozenset[str]:
21
+ """Load disabled skill names from .memstack-ignore in CWD."""
22
+ ignore_path = Path.cwd() / ".memstack-ignore"
23
+ if not ignore_path.exists():
24
+ return frozenset()
25
+ try:
26
+ lines = ignore_path.read_text(encoding="utf-8").splitlines()
27
+ return frozenset(
28
+ line.strip().lower()
29
+ for line in lines
30
+ if line.strip() and not line.strip().startswith("#")
31
+ )
32
+ except Exception:
33
+ return frozenset()
34
+
35
+
36
+ _CATEGORY_MAP = {
37
+ "automation": "Automation", "cron-scheduler": "Automation", "n8n-workflow": "Automation",
38
+ "api-integration": "Automation", "webhook-receiver": "Automation",
39
+ "business": "Business", "quill": "Business", "scan": "Business", "governor": "Business",
40
+ "content": "Content", "content-pipeline": "Content", "humanize": "Content",
41
+ "deployment": "Deployment", "railway-deploy": "Deployment", "docker-deploy": "Deployment",
42
+ "vps-deploy": "Deployment",
43
+ "development": "Development", "forge": "Development", "shard": "Development",
44
+ "state": "Development", "work": "Development", "verify": "Development",
45
+ "project": "Development", "familiar": "Development",
46
+ "compress": "Core", "diary": "Core", "echo": "Core", "grimoire": "Core",
47
+ "sight": "Core", "token-optimization": "Core",
48
+ "marketing": "Marketing", "seo-geo": "Marketing",
49
+ "product": "Product",
50
+ "security": "Security", "advanced-security": "Security", "env-manager-pro": "Security",
51
+ }
52
+
53
+
54
+ def _derive_category(slug: str) -> str:
55
+ """Derive a category from a skill slug."""
56
+ return _CATEGORY_MAP.get(slug, "Other")
57
+
58
+
59
+ def _strip_non_ascii(text: str) -> str:
60
+ """Strip non-ASCII characters (emoji, unicode) from text."""
61
+ return re.sub(r'[^\x00-\x7F]+', '', text).strip()
62
+
63
+
64
+ def _get_skills_data() -> dict:
65
+ """Build the full skill catalog with fire counts and status."""
66
+ config = load_config()
67
+ pkl_path = config.resolved_vector_db_path / "tfidf_index.pkl"
68
+
69
+ if not pkl_path.exists():
70
+ return {
71
+ "project": os.path.basename(os.getcwd()),
72
+ "skills": [],
73
+ "total": 0,
74
+ "free_count": 0,
75
+ "pro_count": 0,
76
+ "disabled_count": 0,
77
+ "error": "No skill index found. Run reindex_skills first.",
78
+ }
79
+
80
+ try:
81
+ with open(pkl_path, "rb") as f:
82
+ index = pickle.load(f)
83
+ except Exception:
84
+ return {
85
+ "project": os.path.basename(os.getcwd()),
86
+ "skills": [],
87
+ "total": 0,
88
+ "free_count": 0,
89
+ "pro_count": 0,
90
+ "disabled_count": 0,
91
+ "error": "Failed to load skill index.",
92
+ }
93
+
94
+ skills_raw = index.get("skills", [])
95
+ fire_counts = get_skill_fire_counts()
96
+ ignored = _load_ignore_set()
97
+
98
+ skills = []
99
+ free_count = 0
100
+ pro_count = 0
101
+ disabled_count = 0
102
+
103
+ for s in skills_raw:
104
+ slug = s.get("slug", s.get("name", ""))
105
+ is_pro = is_pro_exclusive(slug)
106
+ name_raw = s.get("name", "")
107
+ name_ascii = _strip_non_ascii(name_raw).lower()
108
+ enabled = (slug.lower() not in ignored
109
+ and name_raw.lower() not in ignored
110
+ and name_ascii not in ignored)
111
+ fires = fire_counts.get(s.get("name", ""), 0)
112
+
113
+ if is_pro:
114
+ pro_count += 1
115
+ else:
116
+ free_count += 1
117
+ if not enabled:
118
+ disabled_count += 1
119
+
120
+ skills.append({
121
+ "name": s.get("name", ""),
122
+ "slug": slug,
123
+ "description": s.get("description", ""),
124
+ "source_label": s.get("source_label", ""),
125
+ "category": _derive_category(slug),
126
+ "is_pro": is_pro,
127
+ "enabled": enabled,
128
+ "fire_count": fires,
129
+ })
130
+
131
+ return {
132
+ "project": os.path.basename(os.getcwd()),
133
+ "skills": skills,
134
+ "total": len(skills),
135
+ "free_count": free_count,
136
+ "pro_count": pro_count,
137
+ "disabled_count": disabled_count,
138
+ # Display counts — use fixed values since index may not have all Pro stubs
139
+ "display_free_count": 85,
140
+ "display_pro_count": 29,
141
+ "display_total": 114,
142
+ }
143
+
144
+
145
+ def _toggle_skill(skill_name: str, action: str) -> dict:
146
+ """Enable or disable a skill by updating .memstack-ignore."""
147
+ ignore_path = Path.cwd() / ".memstack-ignore"
148
+
149
+ # Validate action
150
+ if action not in ("enable", "disable"):
151
+ return {"success": False, "error": "Invalid action. Use 'enable' or 'disable'."}
152
+
153
+ # Validate skill exists in index
154
+ config = load_config()
155
+ pkl_path = config.resolved_vector_db_path / "tfidf_index.pkl"
156
+ if pkl_path.exists():
157
+ try:
158
+ with open(pkl_path, "rb") as f:
159
+ index = pickle.load(f)
160
+ known = {s.get("name", "").lower() for s in index.get("skills", [])}
161
+ known |= {s.get("slug", "").lower() for s in index.get("skills", [])}
162
+ if skill_name.lower() not in known:
163
+ return {"success": False, "error": f"Skill '{skill_name}' not found in index."}
164
+ except Exception:
165
+ pass
166
+
167
+ # Read existing ignore file
168
+ existing_lines: list[str] = []
169
+ if ignore_path.exists():
170
+ try:
171
+ existing_lines = ignore_path.read_text(encoding="utf-8").splitlines()
172
+ except Exception:
173
+ pass
174
+
175
+ name_lower = skill_name.lower()
176
+ name_lower_ascii = _strip_non_ascii(skill_name).lower()
177
+
178
+ if action == "disable":
179
+ # Check if already disabled
180
+ active = [
181
+ line.strip().lower()
182
+ for line in existing_lines
183
+ if line.strip() and not line.strip().startswith("#")
184
+ ]
185
+ if name_lower in active or name_lower_ascii in active:
186
+ return {"success": True, "skill": skill_name, "enabled": False,
187
+ "disabled_count": len(active)}
188
+ # Strip emoji/unicode — write only ASCII to .memstack-ignore
189
+ existing_lines.append(_strip_non_ascii(skill_name))
190
+ ignore_path.parent.mkdir(parents=True, exist_ok=True)
191
+ tmp = ignore_path.with_suffix(".tmp")
192
+ tmp.write_text("\n".join(existing_lines) + "\n", encoding="utf-8")
193
+ tmp.replace(ignore_path)
194
+ ignored = _load_ignore_set()
195
+ return {"success": True, "skill": skill_name, "enabled": False,
196
+ "disabled_count": len(ignored)}
197
+
198
+ else: # enable
199
+ if not ignore_path.exists():
200
+ return {"success": True, "skill": skill_name, "enabled": True,
201
+ "disabled_count": 0}
202
+ new_lines = [
203
+ line for line in existing_lines
204
+ if line.strip().lower() != name_lower and line.strip().lower() != name_lower_ascii
205
+ ]
206
+ if len(new_lines) == len(existing_lines):
207
+ return {"success": True, "skill": skill_name, "enabled": True,
208
+ "disabled_count": len(_load_ignore_set())}
209
+ if not any(l.strip() and not l.strip().startswith("#") for l in new_lines):
210
+ try:
211
+ ignore_path.unlink()
212
+ except Exception:
213
+ pass
214
+ return {"success": True, "skill": skill_name, "enabled": True,
215
+ "disabled_count": 0}
216
+ tmp = ignore_path.with_suffix(".tmp")
217
+ tmp.write_text("\n".join(new_lines) + "\n", encoding="utf-8")
218
+ tmp.replace(ignore_path)
219
+ ignored = _load_ignore_set()
220
+ return {"success": True, "skill": skill_name, "enabled": True,
221
+ "disabled_count": len(ignored)}
222
+
223
+
224
+ def _get_category_skills() -> dict:
225
+ """Return skill fire counts grouped by category."""
226
+ if not DB_PATH.exists():
227
+ return {}
228
+ try:
229
+ conn = sqlite3.connect(str(DB_PATH), timeout=5)
230
+ rows = conn.execute(
231
+ """
232
+ SELECT COALESCE(category, 'uncategorized') as cat, skill_name, COUNT(*) as fires
233
+ FROM skill_fires
234
+ GROUP BY cat, skill_name
235
+ ORDER BY cat, fires DESC
236
+ """,
237
+ ).fetchall()
238
+ conn.close()
239
+ except Exception:
240
+ return {}
241
+
242
+ result: dict[str, list[dict]] = {}
243
+ for cat, name, fires in rows:
244
+ result.setdefault(cat, []).append({"name": name, "fires": fires})
245
+ return result
246
+
247
+
248
+ def _get_diary_entries() -> list[dict]:
249
+ """Read diary entries from known locations."""
250
+ entries = []
251
+ diary_dirs = [
252
+ Path.home() / ".memstack" / "diary",
253
+ Path.cwd() / "memory" / "sessions",
254
+ ]
255
+ for diary_dir in diary_dirs:
256
+ if not diary_dir.exists():
257
+ continue
258
+ for f in sorted(diary_dir.glob("*.md"), reverse=True):
259
+ try:
260
+ content = f.read_text(encoding="utf-8", errors="replace")
261
+ # Extract date from filename (e.g., 2026-04-04-multi-agent-session.md)
262
+ name = f.stem
263
+ date_part = name[:10] if len(name) >= 10 else name
264
+ # Extract project from first line or filename
265
+ first_line = content.split("\n", 1)[0].strip("#").strip()
266
+ entries.append({
267
+ "date": date_part,
268
+ "filename": f.name,
269
+ "title": first_line or name,
270
+ "content": content,
271
+ "source": str(diary_dir),
272
+ })
273
+ except Exception:
274
+ continue
275
+ return entries
276
+
277
+
278
+ class _Handler(BaseHTTPRequestHandler):
279
+ def log_message(self, format, *args):
280
+ pass # suppress default logging
281
+
282
+ def do_GET(self):
283
+ if self.path == "/":
284
+ body = _HTML_PATH.read_bytes()
285
+ self._respond(200, "text/html", body)
286
+ elif self.path == "/api/stats":
287
+ body = json.dumps(get_dashboard_data()).encode()
288
+ self._respond(200, "application/json", body)
289
+ elif self.path == "/api/skills":
290
+ body = json.dumps(_get_skills_data()).encode()
291
+ self._respond(200, "application/json", body)
292
+ elif self.path == "/api/diary":
293
+ body = json.dumps(_get_diary_entries()).encode()
294
+ self._respond(200, "application/json", body)
295
+ elif self.path == "/api/projects":
296
+ body = json.dumps(get_project_details()).encode()
297
+ self._respond(200, "application/json", body)
298
+ elif self.path == "/api/category-skills":
299
+ body = json.dumps(_get_category_skills()).encode()
300
+ self._respond(200, "application/json", body)
301
+ else:
302
+ self.send_error(404)
303
+
304
+ def do_POST(self):
305
+ if self.path == "/api/skills/toggle":
306
+ content_length = int(self.headers.get("Content-Length", 0))
307
+ if content_length == 0:
308
+ body = json.dumps({"success": False, "error": "Request body required."}).encode()
309
+ self._respond(400, "application/json", body)
310
+ return
311
+ raw = self.rfile.read(content_length)
312
+ try:
313
+ data = json.loads(raw)
314
+ except (json.JSONDecodeError, ValueError):
315
+ body = json.dumps({"success": False, "error": "Invalid JSON body."}).encode()
316
+ self._respond(400, "application/json", body)
317
+ return
318
+
319
+ skill = data.get("skill", "").strip()
320
+ action = data.get("action", "").strip()
321
+
322
+ if not skill:
323
+ body = json.dumps({"success": False, "error": "Missing 'skill' field."}).encode()
324
+ self._respond(400, "application/json", body)
325
+ return
326
+
327
+ if action not in ("enable", "disable"):
328
+ body = json.dumps({"success": False, "error": "Invalid action. Use 'enable' or 'disable'."}).encode()
329
+ self._respond(400, "application/json", body)
330
+ return
331
+
332
+ result = _toggle_skill(skill, action)
333
+ status = 200 if result.get("success") else 400
334
+ body = json.dumps(result).encode()
335
+ self._respond(status, "application/json", body)
336
+ else:
337
+ self.send_error(404)
338
+
339
+ def _respond(self, status: int, content_type: str, body: bytes):
340
+ self.send_response(status)
341
+ self.send_header("Content-Type", content_type)
342
+ self.send_header("Content-Length", str(len(body)))
343
+ self.send_header("Access-Control-Allow-Origin", "*")
344
+ self.end_headers()
345
+ self.wfile.write(body)
346
+
347
+
348
+ def start_dashboard(port: int = 3333, open_browser: bool = True):
349
+ """Start the dashboard HTTP server (blocking)."""
350
+ httpd = HTTPServer(("127.0.0.1", port), _Handler)
351
+ url = f"http://localhost:{port}"
352
+ print(f"MemStack\u2122 Dashboard running at {url}", file=sys.stderr)
353
+ if open_browser:
354
+ webbrowser.open(url)
355
+ try:
356
+ httpd.serve_forever()
357
+ except KeyboardInterrupt:
358
+ print("\nDashboard stopped.", file=sys.stderr)
359
+ finally:
360
+ httpd.server_close()
@@ -0,0 +1,240 @@
1
+ """Skill indexer — reads SKILL.md files and builds LanceDB vector index."""
2
+
3
+ import sys
4
+ from pathlib import Path
5
+
6
+ import lancedb
7
+
8
+ from .config import Config, load_config
9
+
10
+
11
+ def _parse_frontmatter(content: str) -> tuple[dict, str]:
12
+ """Extract YAML frontmatter and body from markdown content."""
13
+ if not content.startswith("---"):
14
+ return {}, content
15
+
16
+ end = content.find("---", 3)
17
+ if end == -1:
18
+ return {}, content
19
+
20
+ frontmatter_text = content[3:end].strip()
21
+ body = content[end + 3:].strip()
22
+
23
+ metadata = {}
24
+ for line in frontmatter_text.split("\n"):
25
+ line = line.strip()
26
+ if ":" in line:
27
+ key, _, value = line.partition(":")
28
+ value = value.strip().strip('"').strip("'")
29
+ metadata[key.strip()] = value
30
+
31
+ return metadata, body
32
+
33
+
34
+ def _display_name_from_filename(dirname: str) -> str:
35
+ """Convert 'railway-deploy' to 'Railway Deploy'."""
36
+ return dirname.replace("-", " ").replace("_", " ").title()
37
+
38
+
39
+ def _display_name_from_meta_name(meta_name: str) -> str:
40
+ """Convert 'memstack-codebase-index' to 'Codebase Index'."""
41
+ parts = meta_name.split("-")
42
+ if parts[0] == "memstack":
43
+ parts = parts[1:]
44
+ return " ".join(parts).title()
45
+
46
+
47
+ def _display_name_from_h1(body: str) -> str:
48
+ """Extract display name from the first H1 heading, before any em-dash."""
49
+ for line in body.split("\n"):
50
+ line = line.strip()
51
+ if line.startswith("# "):
52
+ heading = line[2:].strip()
53
+ # Strip subtitle after em-dash (e.g., "Codebase Index — Scanning...")
54
+ if " — " in heading:
55
+ heading = heading.split(" — ")[0].strip()
56
+ elif " - " in heading:
57
+ heading = heading.split(" - ")[0].strip()
58
+ return heading
59
+ return ""
60
+
61
+
62
+ def discover_skills(config: Config) -> list[dict]:
63
+ """Walk skill sources and parse all SKILL.md files."""
64
+ skills = []
65
+
66
+ for source in config.skill_sources:
67
+ source_path = Path(source.path).expanduser()
68
+ if not source_path.exists():
69
+ print(f"Warning: skill source not found: {source_path}", file=sys.stderr)
70
+ continue
71
+
72
+ pattern = source.pattern
73
+ for skill_file in sorted(source_path.glob(pattern)):
74
+ content = skill_file.read_text(encoding="utf-8", errors="replace")
75
+ metadata, body = _parse_frontmatter(content)
76
+
77
+ # Primary: H1 heading (human-readable)
78
+ display_name = _display_name_from_h1(body)
79
+ # Fallback: frontmatter name (machine ID → title case)
80
+ if not display_name:
81
+ meta_name = metadata.get("name", "")
82
+ if meta_name:
83
+ display_name = _display_name_from_meta_name(meta_name)
84
+ # Last resort: directory name
85
+ if not display_name:
86
+ display_name = _display_name_from_filename(skill_file.parent.name)
87
+
88
+ description = metadata.get("description", "")
89
+ if not description:
90
+ for line in body.split("\n"):
91
+ line = line.strip().lstrip("#").strip()
92
+ if line and not line.startswith("*"):
93
+ description = line
94
+ break
95
+
96
+ skills.append({
97
+ "name": display_name,
98
+ "slug": skill_file.parent.name,
99
+ "description": description,
100
+ "filename": skill_file.name,
101
+ "filepath": str(skill_file.resolve()),
102
+ "source_label": source.label,
103
+ "content": content,
104
+ })
105
+
106
+ return skills
107
+
108
+
109
+ def _build_tfidf_index(skills: list[dict], texts: list[str], config: Config) -> None:
110
+ """Build a TF-IDF index and save as pickle for fast search.
111
+
112
+ Pickle is used here because sklearn's TfidfVectorizer and sparse matrices
113
+ cannot be serialized with JSON. The pickle is generated and consumed by
114
+ this same codebase (indexer writes, tfidf_search reads).
115
+ """
116
+ import pickle
117
+ from sklearn.feature_extraction.text import TfidfVectorizer
118
+
119
+ vectorizer = TfidfVectorizer(
120
+ stop_words="english",
121
+ ngram_range=(1, 2),
122
+ max_features=5000,
123
+ sublinear_tf=True,
124
+ )
125
+ matrix = vectorizer.fit_transform(texts)
126
+
127
+ # Store skill metadata (without vectors) alongside the TF-IDF index
128
+ skill_meta = [
129
+ {
130
+ "name": s["name"],
131
+ "slug": s["slug"],
132
+ "description": s["description"],
133
+ "filename": s["filename"],
134
+ "filepath": s["filepath"],
135
+ "source_label": s["source_label"],
136
+ "content": s["content"],
137
+ }
138
+ for s in skills
139
+ ]
140
+
141
+ pkl_path = config.resolved_vector_db_path / "tfidf_index.pkl"
142
+ with open(pkl_path, "wb") as f:
143
+ pickle.dump({"vectorizer": vectorizer, "matrix": matrix, "skills": skill_meta}, f)
144
+
145
+ # TF-IDF index saved silently
146
+
147
+
148
+ def build_index(config: Config | None = None) -> int:
149
+ """Build/rebuild the LanceDB vector index. Returns number of skills indexed."""
150
+ if config is None:
151
+ config = load_config()
152
+
153
+ skills = discover_skills(config)
154
+ if not skills:
155
+ print("No skills found to index.", file=sys.stderr)
156
+ return 0
157
+
158
+ import os
159
+ import time
160
+ import logging
161
+ import warnings
162
+
163
+ # Suppress all noisy output before importing ML libraries
164
+ os.environ["HF_HUB_DISABLE_TELEMETRY"] = "1"
165
+ os.environ["HF_HUB_DISABLE_IMPLICIT_TOKEN"] = "1"
166
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
167
+ os.environ["SAFETENSORS_FAST_GPU"] = "0"
168
+ os.environ["TQDM_DISABLE"] = "1"
169
+ warnings.filterwarnings("ignore")
170
+ logging.getLogger("transformers").setLevel(logging.ERROR)
171
+ logging.getLogger("sentence_transformers").setLevel(logging.ERROR)
172
+ logging.getLogger("huggingface_hub").setLevel(logging.ERROR)
173
+ logging.getLogger("safetensors").setLevel(logging.ERROR)
174
+
175
+ print("Loading embedding model (first run may take 30-60 seconds)...", file=sys.stderr)
176
+
177
+ # Suppress safetensors LOAD REPORT, tqdm progress bars, and HF Hub warnings
178
+ # by redirecting at the OS file-descriptor level (C extensions bypass Python
179
+ # sys.stdout/stderr). Import is inside the block because it also emits warnings.
180
+ _null_fd = os.open(os.devnull, os.O_WRONLY)
181
+ _saved_stdout_fd = os.dup(1)
182
+ _saved_stderr_fd = os.dup(2)
183
+ os.dup2(_null_fd, 1)
184
+ os.dup2(_null_fd, 2)
185
+ try:
186
+ from sentence_transformers import SentenceTransformer
187
+ model = SentenceTransformer(config.embedding_model)
188
+ finally:
189
+ os.dup2(_saved_stdout_fd, 1)
190
+ os.dup2(_saved_stderr_fd, 2)
191
+ os.close(_saved_stdout_fd)
192
+ os.close(_saved_stderr_fd)
193
+ os.close(_null_fd)
194
+
195
+ print(f"Indexing {len(skills)} skills...", file=sys.stderr)
196
+ t0 = time.time()
197
+
198
+ texts = [f"{s['name']} {s['description']}" for s in skills]
199
+ vectors = model.encode(texts, show_progress_bar=False)
200
+
201
+ records = []
202
+ for i, skill in enumerate(skills):
203
+ records.append({
204
+ "name": skill["name"],
205
+ "description": skill["description"],
206
+ "filename": skill["filename"],
207
+ "filepath": skill["filepath"],
208
+ "source_label": skill["source_label"],
209
+ "content": skill["content"],
210
+ "vector": vectors[i].tolist(),
211
+ })
212
+
213
+ db_path = str(config.resolved_vector_db_path)
214
+ db = lancedb.connect(db_path)
215
+
216
+ try:
217
+ db.drop_table("skills")
218
+ except Exception:
219
+ pass
220
+
221
+ db.create_table("skills", data=records)
222
+ elapsed = time.time() - t0
223
+ print(f"Done! {len(records)} skills indexed in {elapsed:.1f}s", file=sys.stderr)
224
+
225
+ # Also build TF-IDF index for fast search (no PyTorch needed at search time)
226
+ _build_tfidf_index(skills, texts, config)
227
+
228
+ return len(records)
229
+
230
+
231
+ def main():
232
+ """CLI entry point for rebuilding the index."""
233
+ config = load_config()
234
+ count = build_index(config)
235
+ if count == 0:
236
+ sys.exit(1)
237
+
238
+
239
+ if __name__ == "__main__":
240
+ main()