memstack-skill-loader 3.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- memstack_skill_loader/__init__.py +1 -0
- memstack_skill_loader/__main__.py +18 -0
- memstack_skill_loader/compression.py +345 -0
- memstack_skill_loader/config.py +114 -0
- memstack_skill_loader/dashboard.html +829 -0
- memstack_skill_loader/dashboard.py +360 -0
- memstack_skill_loader/indexer.py +240 -0
- memstack_skill_loader/license.py +409 -0
- memstack_skill_loader/search.py +164 -0
- memstack_skill_loader/server.py +883 -0
- memstack_skill_loader/stats.py +428 -0
- memstack_skill_loader/tfidf_search.py +142 -0
- memstack_skill_loader/version_check.py +93 -0
- memstack_skill_loader-3.5.0.dist-info/METADATA +10 -0
- memstack_skill_loader-3.5.0.dist-info/RECORD +18 -0
- memstack_skill_loader-3.5.0.dist-info/WHEEL +5 -0
- memstack_skill_loader-3.5.0.dist-info/entry_points.txt +2 -0
- memstack_skill_loader-3.5.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,428 @@
|
|
|
1
|
+
"""Local usage analytics — SQLite-backed skill fire tracking and dashboard data."""
|
|
2
|
+
|
|
3
|
+
import sqlite3
|
|
4
|
+
import sys
|
|
5
|
+
from datetime import datetime, timedelta
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
DB_PATH = Path.home() / ".memstack" / "stats.db"
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _get_conn() -> sqlite3.Connection:
|
|
12
|
+
"""Open (and initialize) the stats database."""
|
|
13
|
+
DB_PATH.parent.mkdir(parents=True, exist_ok=True)
|
|
14
|
+
conn = sqlite3.connect(str(DB_PATH), timeout=5)
|
|
15
|
+
conn.execute("PRAGMA journal_mode=WAL")
|
|
16
|
+
conn.execute(
|
|
17
|
+
"""
|
|
18
|
+
CREATE TABLE IF NOT EXISTS skill_fires (
|
|
19
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
20
|
+
skill_name TEXT NOT NULL,
|
|
21
|
+
category TEXT,
|
|
22
|
+
project TEXT,
|
|
23
|
+
tool TEXT,
|
|
24
|
+
timestamp DATETIME DEFAULT CURRENT_TIMESTAMP
|
|
25
|
+
)
|
|
26
|
+
"""
|
|
27
|
+
)
|
|
28
|
+
conn.execute(
|
|
29
|
+
"""
|
|
30
|
+
CREATE TABLE IF NOT EXISTS daily_summary (
|
|
31
|
+
date TEXT PRIMARY KEY,
|
|
32
|
+
total_fires INTEGER DEFAULT 0,
|
|
33
|
+
unique_skills INTEGER DEFAULT 0,
|
|
34
|
+
unique_projects INTEGER DEFAULT 0
|
|
35
|
+
)
|
|
36
|
+
"""
|
|
37
|
+
)
|
|
38
|
+
conn.execute(
|
|
39
|
+
"""
|
|
40
|
+
CREATE TABLE IF NOT EXISTS compression_stats (
|
|
41
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
42
|
+
skill_name TEXT NOT NULL,
|
|
43
|
+
tokens_before INTEGER NOT NULL,
|
|
44
|
+
tokens_after INTEGER NOT NULL,
|
|
45
|
+
tier TEXT,
|
|
46
|
+
timestamp DATETIME DEFAULT CURRENT_TIMESTAMP
|
|
47
|
+
)
|
|
48
|
+
"""
|
|
49
|
+
)
|
|
50
|
+
conn.commit()
|
|
51
|
+
return conn
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def log_skill_fire(
|
|
55
|
+
skill_name: str,
|
|
56
|
+
category: str | None = None,
|
|
57
|
+
project: str | None = None,
|
|
58
|
+
tool: str | None = None,
|
|
59
|
+
) -> None:
|
|
60
|
+
"""Record a skill activation. Silently swallows all errors."""
|
|
61
|
+
try:
|
|
62
|
+
conn = _get_conn()
|
|
63
|
+
try:
|
|
64
|
+
today = datetime.now().strftime("%Y-%m-%d")
|
|
65
|
+
|
|
66
|
+
conn.execute(
|
|
67
|
+
"INSERT INTO skill_fires (skill_name, category, project, tool) VALUES (?, ?, ?, ?)",
|
|
68
|
+
(skill_name, category, project, tool),
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
# Upsert daily_summary for today
|
|
72
|
+
row = conn.execute(
|
|
73
|
+
"""
|
|
74
|
+
SELECT COUNT(*), COUNT(DISTINCT skill_name), COUNT(DISTINCT project)
|
|
75
|
+
FROM skill_fires WHERE date(timestamp) = ?
|
|
76
|
+
""",
|
|
77
|
+
(today,),
|
|
78
|
+
).fetchone()
|
|
79
|
+
|
|
80
|
+
conn.execute(
|
|
81
|
+
"""
|
|
82
|
+
INSERT INTO daily_summary (date, total_fires, unique_skills, unique_projects)
|
|
83
|
+
VALUES (?, ?, ?, ?)
|
|
84
|
+
ON CONFLICT(date) DO UPDATE SET
|
|
85
|
+
total_fires = excluded.total_fires,
|
|
86
|
+
unique_skills = excluded.unique_skills,
|
|
87
|
+
unique_projects = excluded.unique_projects
|
|
88
|
+
""",
|
|
89
|
+
(today, row[0], row[1], row[2]),
|
|
90
|
+
)
|
|
91
|
+
conn.commit()
|
|
92
|
+
finally:
|
|
93
|
+
conn.close()
|
|
94
|
+
except Exception as exc:
|
|
95
|
+
print(f"[memstack-stats] log_skill_fire failed: {exc}", file=sys.stderr)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def log_compression(
|
|
99
|
+
skill_name: str,
|
|
100
|
+
tokens_before: int,
|
|
101
|
+
tokens_after: int,
|
|
102
|
+
tier: str = "free",
|
|
103
|
+
) -> None:
|
|
104
|
+
"""Record a compression event."""
|
|
105
|
+
try:
|
|
106
|
+
conn = _get_conn()
|
|
107
|
+
try:
|
|
108
|
+
conn.execute(
|
|
109
|
+
"INSERT INTO compression_stats (skill_name, tokens_before, tokens_after, tier) "
|
|
110
|
+
"VALUES (?, ?, ?, ?)",
|
|
111
|
+
(skill_name, tokens_before, tokens_after, tier),
|
|
112
|
+
)
|
|
113
|
+
conn.commit()
|
|
114
|
+
finally:
|
|
115
|
+
conn.close()
|
|
116
|
+
except Exception as exc:
|
|
117
|
+
print(f"[memstack-stats] log_compression failed: {exc}", file=sys.stderr)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def get_compression_stats() -> dict:
|
|
121
|
+
"""Return aggregate compression stats for dashboard."""
|
|
122
|
+
try:
|
|
123
|
+
conn = _get_conn()
|
|
124
|
+
try:
|
|
125
|
+
row = conn.execute(
|
|
126
|
+
"SELECT COUNT(*), COALESCE(SUM(tokens_before), 0), "
|
|
127
|
+
"COALESCE(SUM(tokens_after), 0) FROM compression_stats"
|
|
128
|
+
).fetchone()
|
|
129
|
+
total_events = row[0]
|
|
130
|
+
total_before = row[1]
|
|
131
|
+
total_after = row[2]
|
|
132
|
+
total_saved = total_before - total_after
|
|
133
|
+
ratio = round(total_saved / total_before * 100, 1) if total_before > 0 else 0.0
|
|
134
|
+
|
|
135
|
+
# Per-skill breakdown (top 10)
|
|
136
|
+
per_skill = conn.execute(
|
|
137
|
+
"SELECT skill_name, COUNT(*), SUM(tokens_before), SUM(tokens_after) "
|
|
138
|
+
"FROM compression_stats GROUP BY skill_name "
|
|
139
|
+
"ORDER BY SUM(tokens_before) - SUM(tokens_after) DESC LIMIT 10"
|
|
140
|
+
).fetchall()
|
|
141
|
+
|
|
142
|
+
return {
|
|
143
|
+
"total_compressions": total_events,
|
|
144
|
+
"total_tokens_before": total_before,
|
|
145
|
+
"total_tokens_after": total_after,
|
|
146
|
+
"total_tokens_saved": total_saved,
|
|
147
|
+
"avg_compression_pct": ratio,
|
|
148
|
+
"top_skills": [
|
|
149
|
+
{
|
|
150
|
+
"name": r[0],
|
|
151
|
+
"compressions": r[1],
|
|
152
|
+
"tokens_saved": r[2] - r[3],
|
|
153
|
+
"avg_pct": round((r[2] - r[3]) / r[2] * 100, 1) if r[2] > 0 else 0,
|
|
154
|
+
}
|
|
155
|
+
for r in per_skill
|
|
156
|
+
],
|
|
157
|
+
}
|
|
158
|
+
finally:
|
|
159
|
+
conn.close()
|
|
160
|
+
except Exception as exc:
|
|
161
|
+
print(f"[memstack-stats] get_compression_stats failed: {exc}", file=sys.stderr)
|
|
162
|
+
return {"total_compressions": 0, "total_tokens_saved": 0}
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def get_dashboard_data() -> dict:
|
|
166
|
+
"""Return a dict of dashboard metrics. Returns sensible defaults if no data exists."""
|
|
167
|
+
defaults = {
|
|
168
|
+
"total_fires": 0,
|
|
169
|
+
"unique_skills_used": 0,
|
|
170
|
+
"total_sessions": 0,
|
|
171
|
+
"fires_today": 0,
|
|
172
|
+
"top_skills": [],
|
|
173
|
+
"category_breakdown": [],
|
|
174
|
+
"daily_trend": [],
|
|
175
|
+
"project_breakdown": [],
|
|
176
|
+
"context_saved_estimate": 0,
|
|
177
|
+
"dollar_savings": 0.0,
|
|
178
|
+
"most_active_project": None,
|
|
179
|
+
}
|
|
180
|
+
try:
|
|
181
|
+
if not DB_PATH.exists():
|
|
182
|
+
return defaults
|
|
183
|
+
|
|
184
|
+
conn = _get_conn()
|
|
185
|
+
try:
|
|
186
|
+
today = datetime.now().strftime("%Y-%m-%d")
|
|
187
|
+
|
|
188
|
+
# All-time totals
|
|
189
|
+
total_fires = conn.execute("SELECT COUNT(*) FROM skill_fires").fetchone()[0]
|
|
190
|
+
unique_skills = conn.execute(
|
|
191
|
+
"SELECT COUNT(DISTINCT skill_name) FROM skill_fires"
|
|
192
|
+
).fetchone()[0]
|
|
193
|
+
total_sessions = conn.execute(
|
|
194
|
+
"SELECT COUNT(DISTINCT date(timestamp)) FROM skill_fires"
|
|
195
|
+
).fetchone()[0]
|
|
196
|
+
|
|
197
|
+
# Today
|
|
198
|
+
fires_today = conn.execute(
|
|
199
|
+
"SELECT COUNT(*) FROM skill_fires WHERE date(timestamp) = ?",
|
|
200
|
+
(today,),
|
|
201
|
+
).fetchone()[0]
|
|
202
|
+
|
|
203
|
+
# Top skills — last 7 days
|
|
204
|
+
seven_days_ago = (datetime.now() - timedelta(days=7)).strftime("%Y-%m-%d")
|
|
205
|
+
top_skills = conn.execute(
|
|
206
|
+
"""
|
|
207
|
+
SELECT skill_name, COUNT(*) as cnt
|
|
208
|
+
FROM skill_fires WHERE date(timestamp) >= ?
|
|
209
|
+
GROUP BY skill_name ORDER BY cnt DESC LIMIT 10
|
|
210
|
+
""",
|
|
211
|
+
(seven_days_ago,),
|
|
212
|
+
).fetchall()
|
|
213
|
+
|
|
214
|
+
# Category breakdown — last 7 days
|
|
215
|
+
category_breakdown = conn.execute(
|
|
216
|
+
"""
|
|
217
|
+
SELECT COALESCE(category, 'uncategorized') as cat, COUNT(*) as cnt
|
|
218
|
+
FROM skill_fires WHERE date(timestamp) >= ?
|
|
219
|
+
GROUP BY cat ORDER BY cnt DESC
|
|
220
|
+
""",
|
|
221
|
+
(seven_days_ago,),
|
|
222
|
+
).fetchall()
|
|
223
|
+
|
|
224
|
+
# Daily trend — last 14 days (include zero-count days)
|
|
225
|
+
fourteen_days_ago = datetime.now() - timedelta(days=13)
|
|
226
|
+
daily_counts = dict(
|
|
227
|
+
conn.execute(
|
|
228
|
+
"""
|
|
229
|
+
SELECT date(timestamp), COUNT(*)
|
|
230
|
+
FROM skill_fires WHERE date(timestamp) >= ?
|
|
231
|
+
GROUP BY date(timestamp)
|
|
232
|
+
""",
|
|
233
|
+
(fourteen_days_ago.strftime("%Y-%m-%d"),),
|
|
234
|
+
).fetchall()
|
|
235
|
+
)
|
|
236
|
+
daily_trend = []
|
|
237
|
+
for i in range(14):
|
|
238
|
+
d = (fourteen_days_ago + timedelta(days=i)).strftime("%Y-%m-%d")
|
|
239
|
+
daily_trend.append({"date": d, "count": daily_counts.get(d, 0)})
|
|
240
|
+
|
|
241
|
+
# Project breakdown — all time, top 10
|
|
242
|
+
project_breakdown = conn.execute(
|
|
243
|
+
"""
|
|
244
|
+
SELECT COALESCE(project, 'unknown') as proj, COUNT(*) as cnt
|
|
245
|
+
FROM skill_fires
|
|
246
|
+
GROUP BY proj ORDER BY cnt DESC LIMIT 10
|
|
247
|
+
"""
|
|
248
|
+
).fetchall()
|
|
249
|
+
|
|
250
|
+
# Most active project this week
|
|
251
|
+
most_active_project = conn.execute(
|
|
252
|
+
"""
|
|
253
|
+
SELECT COALESCE(project, 'unknown') as proj, COUNT(*) as cnt
|
|
254
|
+
FROM skill_fires WHERE date(timestamp) >= ?
|
|
255
|
+
GROUP BY proj ORDER BY cnt DESC LIMIT 1
|
|
256
|
+
""",
|
|
257
|
+
(seven_days_ago,),
|
|
258
|
+
).fetchone()
|
|
259
|
+
|
|
260
|
+
tokens_saved = total_fires * 1700
|
|
261
|
+
dollar_savings = round((tokens_saved / 1_000_000) * 15, 2)
|
|
262
|
+
|
|
263
|
+
return {
|
|
264
|
+
"total_fires": total_fires,
|
|
265
|
+
"unique_skills_used": unique_skills,
|
|
266
|
+
"total_sessions": total_sessions,
|
|
267
|
+
"fires_today": fires_today,
|
|
268
|
+
"top_skills": [{"name": r[0], "count": r[1]} for r in top_skills],
|
|
269
|
+
"category_breakdown": [
|
|
270
|
+
{"category": r[0], "count": r[1]} for r in category_breakdown
|
|
271
|
+
],
|
|
272
|
+
"daily_trend": daily_trend,
|
|
273
|
+
"project_breakdown": [
|
|
274
|
+
{"project": r[0], "count": r[1]} for r in project_breakdown
|
|
275
|
+
],
|
|
276
|
+
"context_saved_estimate": tokens_saved,
|
|
277
|
+
"dollar_savings": dollar_savings,
|
|
278
|
+
"most_active_project": {
|
|
279
|
+
"name": most_active_project[0],
|
|
280
|
+
"fires": most_active_project[1],
|
|
281
|
+
} if most_active_project else None,
|
|
282
|
+
}
|
|
283
|
+
finally:
|
|
284
|
+
conn.close()
|
|
285
|
+
except Exception as exc:
|
|
286
|
+
print(f"[memstack-stats] get_dashboard_data failed: {exc}", file=sys.stderr)
|
|
287
|
+
return defaults
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
def get_project_details() -> list[dict]:
|
|
291
|
+
"""Return per-project stats: total fires, last active, top 3 skills."""
|
|
292
|
+
try:
|
|
293
|
+
if not DB_PATH.exists():
|
|
294
|
+
return []
|
|
295
|
+
conn = _get_conn()
|
|
296
|
+
try:
|
|
297
|
+
rows = conn.execute(
|
|
298
|
+
"""
|
|
299
|
+
SELECT COALESCE(project, 'unknown') as proj,
|
|
300
|
+
COUNT(*) as cnt,
|
|
301
|
+
MAX(timestamp) as last_active
|
|
302
|
+
FROM skill_fires
|
|
303
|
+
GROUP BY proj ORDER BY cnt DESC
|
|
304
|
+
"""
|
|
305
|
+
).fetchall()
|
|
306
|
+
projects = []
|
|
307
|
+
for proj_name, cnt, last_active in rows:
|
|
308
|
+
top_skills = conn.execute(
|
|
309
|
+
"""
|
|
310
|
+
SELECT skill_name, COUNT(*) as c
|
|
311
|
+
FROM skill_fires WHERE COALESCE(project, 'unknown') = ?
|
|
312
|
+
AND skill_name NOT LIKE '@_%' ESCAPE '@'
|
|
313
|
+
GROUP BY skill_name ORDER BY c DESC LIMIT 3
|
|
314
|
+
""",
|
|
315
|
+
(proj_name,),
|
|
316
|
+
).fetchall()
|
|
317
|
+
projects.append({
|
|
318
|
+
"name": proj_name,
|
|
319
|
+
"total_fires": cnt,
|
|
320
|
+
"last_active": last_active,
|
|
321
|
+
"top_skills": [{"name": s[0], "count": s[1]} for s in top_skills],
|
|
322
|
+
})
|
|
323
|
+
return projects
|
|
324
|
+
finally:
|
|
325
|
+
conn.close()
|
|
326
|
+
except Exception as exc:
|
|
327
|
+
print(f"[memstack-stats] get_project_details failed: {exc}", file=sys.stderr)
|
|
328
|
+
return []
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
def backfill_categories(category_map: dict[str, str]) -> int:
|
|
332
|
+
"""Backfill NULL categories in existing skill_fires using a slug->category map.
|
|
333
|
+
|
|
334
|
+
Handles emoji-prefixed display names (e.g. "🛡️ OWASP Top 10") by stripping
|
|
335
|
+
non-ASCII chars before converting to slug form for matching.
|
|
336
|
+
|
|
337
|
+
Returns the number of rows updated.
|
|
338
|
+
"""
|
|
339
|
+
import re
|
|
340
|
+
|
|
341
|
+
def _to_slug(name: str) -> str:
|
|
342
|
+
"""Convert a display name to a slug for category map lookup."""
|
|
343
|
+
# Strip emoji and non-ASCII characters
|
|
344
|
+
clean = re.sub(r'[^\x00-\x7F]+', '', name).strip()
|
|
345
|
+
# Convert to lowercase slug
|
|
346
|
+
return clean.lower().replace(" ", "-").replace("_", "-")
|
|
347
|
+
|
|
348
|
+
updated = 0
|
|
349
|
+
try:
|
|
350
|
+
conn = _get_conn()
|
|
351
|
+
try:
|
|
352
|
+
rows = conn.execute(
|
|
353
|
+
"SELECT DISTINCT skill_name FROM skill_fires WHERE category IS NULL"
|
|
354
|
+
).fetchall()
|
|
355
|
+
|
|
356
|
+
for (skill_name,) in rows:
|
|
357
|
+
slug = _to_slug(skill_name)
|
|
358
|
+
# For internal names like __list__, use the raw name as slug
|
|
359
|
+
if skill_name.startswith("__"):
|
|
360
|
+
slug = skill_name
|
|
361
|
+
category = category_map.get(slug)
|
|
362
|
+
|
|
363
|
+
if not category:
|
|
364
|
+
# Try substring matching against map keys
|
|
365
|
+
for map_slug, map_cat in category_map.items():
|
|
366
|
+
if map_slug in slug or slug in map_slug:
|
|
367
|
+
category = map_cat
|
|
368
|
+
break
|
|
369
|
+
|
|
370
|
+
if category:
|
|
371
|
+
conn.execute(
|
|
372
|
+
"UPDATE skill_fires SET category = ? WHERE skill_name = ? AND category IS NULL",
|
|
373
|
+
(category, skill_name),
|
|
374
|
+
)
|
|
375
|
+
updated += 1
|
|
376
|
+
|
|
377
|
+
conn.commit()
|
|
378
|
+
finally:
|
|
379
|
+
conn.close()
|
|
380
|
+
except Exception as exc:
|
|
381
|
+
print(f"[memstack-stats] backfill_categories failed: {exc}", file=sys.stderr)
|
|
382
|
+
return updated
|
|
383
|
+
|
|
384
|
+
|
|
385
|
+
def get_recent_activity(limit: int = 10) -> list[dict]:
|
|
386
|
+
"""Return the most recent skill fires for the activity feed."""
|
|
387
|
+
try:
|
|
388
|
+
conn = _get_conn()
|
|
389
|
+
try:
|
|
390
|
+
rows = conn.execute(
|
|
391
|
+
"SELECT timestamp, skill_name, category, project "
|
|
392
|
+
"FROM skill_fires "
|
|
393
|
+
"WHERE skill_name NOT LIKE '@_%' ESCAPE '@' "
|
|
394
|
+
"ORDER BY timestamp DESC LIMIT ?",
|
|
395
|
+
(limit,),
|
|
396
|
+
).fetchall()
|
|
397
|
+
return [
|
|
398
|
+
{
|
|
399
|
+
"timestamp": row[0],
|
|
400
|
+
"skill_name": row[1],
|
|
401
|
+
"category": row[2],
|
|
402
|
+
"project": row[3],
|
|
403
|
+
}
|
|
404
|
+
for row in rows
|
|
405
|
+
]
|
|
406
|
+
finally:
|
|
407
|
+
conn.close()
|
|
408
|
+
except Exception as exc:
|
|
409
|
+
print(f"[memstack-stats] get_recent_activity failed: {exc}", file=sys.stderr)
|
|
410
|
+
return []
|
|
411
|
+
|
|
412
|
+
|
|
413
|
+
def get_skill_fire_counts() -> dict[str, int]:
|
|
414
|
+
"""Return {skill_name: fire_count} for all skills (excluding internal names)."""
|
|
415
|
+
try:
|
|
416
|
+
conn = _get_conn()
|
|
417
|
+
try:
|
|
418
|
+
rows = conn.execute(
|
|
419
|
+
"SELECT skill_name, COUNT(*) FROM skill_fires "
|
|
420
|
+
"WHERE skill_name NOT LIKE '@_%' ESCAPE '@' "
|
|
421
|
+
"GROUP BY skill_name"
|
|
422
|
+
).fetchall()
|
|
423
|
+
return {row[0]: row[1] for row in rows}
|
|
424
|
+
finally:
|
|
425
|
+
conn.close()
|
|
426
|
+
except Exception as exc:
|
|
427
|
+
print(f"[memstack-stats] get_skill_fire_counts failed: {exc}", file=sys.stderr)
|
|
428
|
+
return {}
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
"""Lightweight TF-IDF search against pre-built skill index.
|
|
2
|
+
|
|
3
|
+
This module replaces the heavy sentence-transformers/PyTorch search
|
|
4
|
+
with scikit-learn TF-IDF, cutting cold start from ~10s to ~2s.
|
|
5
|
+
The TF-IDF index is built at index time by indexer.py and loaded
|
|
6
|
+
from a pickle file at search time.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import pickle
|
|
10
|
+
import sys
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
from sklearn.metrics.pairwise import cosine_similarity
|
|
14
|
+
|
|
15
|
+
from .config import Config, load_config
|
|
16
|
+
|
|
17
|
+
# Lazy-loaded singletons
|
|
18
|
+
_index = None # dict with 'vectorizer', 'matrix', 'skills'
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _get_index(config: Config) -> dict | None:
|
|
22
|
+
"""Load the pre-built TF-IDF index from pickle."""
|
|
23
|
+
global _index
|
|
24
|
+
if _index is not None:
|
|
25
|
+
return _index
|
|
26
|
+
|
|
27
|
+
pkl_path = config.resolved_vector_db_path / "tfidf_index.pkl"
|
|
28
|
+
if not pkl_path.exists():
|
|
29
|
+
print(f"TF-IDF index not found at {pkl_path}. Run index_skills.py first.", file=sys.stderr)
|
|
30
|
+
return None
|
|
31
|
+
|
|
32
|
+
try:
|
|
33
|
+
with open(pkl_path, "rb") as f:
|
|
34
|
+
_index = pickle.load(f)
|
|
35
|
+
print(f"Loaded TF-IDF index: {len(_index['skills'])} skills", file=sys.stderr)
|
|
36
|
+
return _index
|
|
37
|
+
except Exception as e:
|
|
38
|
+
print(f"Error loading TF-IDF index: {e}", file=sys.stderr)
|
|
39
|
+
return None
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def search_skills(query: str, config: Config, top_k: int = 3) -> list[dict]:
|
|
43
|
+
"""Search skills using TF-IDF cosine similarity."""
|
|
44
|
+
index = _get_index(config)
|
|
45
|
+
if index is None:
|
|
46
|
+
return []
|
|
47
|
+
|
|
48
|
+
vectorizer = index["vectorizer"]
|
|
49
|
+
matrix = index["matrix"]
|
|
50
|
+
skills = index["skills"]
|
|
51
|
+
|
|
52
|
+
query_vec = vectorizer.transform([query])
|
|
53
|
+
similarities = cosine_similarity(query_vec, matrix)[0]
|
|
54
|
+
|
|
55
|
+
# Get top-k indices sorted by score descending
|
|
56
|
+
ranked = sorted(range(len(similarities)), key=lambda i: similarities[i], reverse=True)[:top_k]
|
|
57
|
+
|
|
58
|
+
results = []
|
|
59
|
+
for idx in ranked:
|
|
60
|
+
score = round(float(similarities[idx]), 4)
|
|
61
|
+
if score <= 0:
|
|
62
|
+
continue
|
|
63
|
+
skill = skills[idx]
|
|
64
|
+
results.append({
|
|
65
|
+
"name": skill["name"],
|
|
66
|
+
"slug": skill["slug"],
|
|
67
|
+
"description": skill["description"],
|
|
68
|
+
"source_label": skill["source_label"],
|
|
69
|
+
"content": skill["content"],
|
|
70
|
+
"filepath": skill["filepath"],
|
|
71
|
+
"score": score,
|
|
72
|
+
})
|
|
73
|
+
|
|
74
|
+
return results
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def list_all_skills(config: Config) -> list[dict]:
|
|
78
|
+
"""Return all indexed skills (name, description, source_label)."""
|
|
79
|
+
index = _get_index(config)
|
|
80
|
+
if index is None:
|
|
81
|
+
return []
|
|
82
|
+
|
|
83
|
+
return sorted(
|
|
84
|
+
[
|
|
85
|
+
{
|
|
86
|
+
"name": s["name"],
|
|
87
|
+
"slug": s["slug"],
|
|
88
|
+
"description": s["description"],
|
|
89
|
+
"source_label": s["source_label"],
|
|
90
|
+
}
|
|
91
|
+
for s in index["skills"]
|
|
92
|
+
],
|
|
93
|
+
key=lambda s: s["name"],
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def get_skill_by_name(name: str, config: Config) -> dict | None:
|
|
98
|
+
"""Find a skill by name: exact match -> substring -> TF-IDF fallback."""
|
|
99
|
+
index = _get_index(config)
|
|
100
|
+
if index is None:
|
|
101
|
+
return None
|
|
102
|
+
|
|
103
|
+
skills = index["skills"]
|
|
104
|
+
name_lower = name.strip().lower()
|
|
105
|
+
|
|
106
|
+
# Stage 1: exact match (by name or slug)
|
|
107
|
+
for skill in skills:
|
|
108
|
+
if skill["name"].lower() == name_lower or skill["slug"] == name_lower:
|
|
109
|
+
return {
|
|
110
|
+
"name": skill["name"],
|
|
111
|
+
"slug": skill["slug"],
|
|
112
|
+
"description": skill["description"],
|
|
113
|
+
"source_label": skill["source_label"],
|
|
114
|
+
"content": skill["content"],
|
|
115
|
+
"filepath": skill["filepath"],
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
# Stage 2: substring fuzzy match
|
|
119
|
+
for skill in skills:
|
|
120
|
+
skill_lower = skill["name"].lower()
|
|
121
|
+
if name_lower in skill_lower or skill_lower in name_lower:
|
|
122
|
+
return {
|
|
123
|
+
"name": skill["name"],
|
|
124
|
+
"slug": skill["slug"],
|
|
125
|
+
"description": skill["description"],
|
|
126
|
+
"source_label": skill["source_label"],
|
|
127
|
+
"content": skill["content"],
|
|
128
|
+
"filepath": skill["filepath"],
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
# Stage 3: TF-IDF search fallback
|
|
132
|
+
results = search_skills(name, config, top_k=1)
|
|
133
|
+
if results and results[0]["score"] > 0.3:
|
|
134
|
+
return results[0]
|
|
135
|
+
|
|
136
|
+
return None
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def reset_cache():
|
|
140
|
+
"""Clear cached index so next query reloads from disk."""
|
|
141
|
+
global _index
|
|
142
|
+
_index = None
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
"""Check for MemStack updates via GitHub releases API."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
import sys
|
|
6
|
+
import time
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
GITHUB_API_URL = "https://api.github.com/repos/cwinvestments/memstack/releases/latest"
|
|
10
|
+
CACHE_FILE = Path.home() / ".memstack" / "version-check.json"
|
|
11
|
+
CACHE_TTL = 86400 # 24 hours in seconds
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _read_local_version() -> str | None:
|
|
15
|
+
"""Read the VERSION file from the parent memstack repo directory."""
|
|
16
|
+
# The skill loader lives inside memstack, so walk up to find VERSION
|
|
17
|
+
loader_dir = Path(__file__).resolve().parent
|
|
18
|
+
for parent in [loader_dir] + list(loader_dir.parents):
|
|
19
|
+
version_file = parent / "VERSION"
|
|
20
|
+
if version_file.exists():
|
|
21
|
+
try:
|
|
22
|
+
return version_file.read_text(encoding="utf-8").strip()
|
|
23
|
+
except OSError:
|
|
24
|
+
return None
|
|
25
|
+
return None
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _read_cache() -> dict | None:
|
|
29
|
+
"""Read cached version check result if still valid."""
|
|
30
|
+
try:
|
|
31
|
+
if not CACHE_FILE.exists():
|
|
32
|
+
return None
|
|
33
|
+
data = json.loads(CACHE_FILE.read_text(encoding="utf-8"))
|
|
34
|
+
if time.time() - data.get("checked_at", 0) < CACHE_TTL:
|
|
35
|
+
return data
|
|
36
|
+
except (json.JSONDecodeError, OSError, KeyError):
|
|
37
|
+
pass
|
|
38
|
+
return None
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _write_cache(remote_version: str) -> None:
|
|
42
|
+
"""Cache the remote version check result."""
|
|
43
|
+
try:
|
|
44
|
+
CACHE_FILE.parent.mkdir(parents=True, exist_ok=True)
|
|
45
|
+
CACHE_FILE.write_text(
|
|
46
|
+
json.dumps({"remote_version": remote_version, "checked_at": time.time()}, indent=2),
|
|
47
|
+
encoding="utf-8",
|
|
48
|
+
)
|
|
49
|
+
except OSError:
|
|
50
|
+
pass
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def check_for_updates() -> None:
|
|
54
|
+
"""Check GitHub for a newer MemStack version. Prints to stderr if update available."""
|
|
55
|
+
try:
|
|
56
|
+
local_version = _read_local_version()
|
|
57
|
+
if not local_version:
|
|
58
|
+
return
|
|
59
|
+
|
|
60
|
+
# Check cache first
|
|
61
|
+
cached = _read_cache()
|
|
62
|
+
if cached:
|
|
63
|
+
remote_version = cached.get("remote_version", "")
|
|
64
|
+
if remote_version and remote_version != local_version:
|
|
65
|
+
print(
|
|
66
|
+
f"[memstack] Update available: {remote_version} (you have {local_version}). "
|
|
67
|
+
f"Run 'git pull' in your MemStack\u2122 directory to update.",
|
|
68
|
+
file=sys.stderr,
|
|
69
|
+
)
|
|
70
|
+
return
|
|
71
|
+
|
|
72
|
+
# Fetch from GitHub API
|
|
73
|
+
import httpx
|
|
74
|
+
response = httpx.get(GITHUB_API_URL, timeout=5.0, follow_redirects=True)
|
|
75
|
+
if response.status_code != 200:
|
|
76
|
+
return
|
|
77
|
+
|
|
78
|
+
data = response.json()
|
|
79
|
+
remote_version = data.get("tag_name", "")
|
|
80
|
+
if not remote_version:
|
|
81
|
+
return
|
|
82
|
+
|
|
83
|
+
_write_cache(remote_version)
|
|
84
|
+
|
|
85
|
+
if remote_version != local_version:
|
|
86
|
+
print(
|
|
87
|
+
f"[memstack] Update available: {remote_version} (you have {local_version}). "
|
|
88
|
+
f"Run 'git pull' in your MemStack\u2122 directory to update.",
|
|
89
|
+
file=sys.stderr,
|
|
90
|
+
)
|
|
91
|
+
except Exception:
|
|
92
|
+
# Silently skip on any error — never block skill loading
|
|
93
|
+
pass
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: memstack-skill-loader
|
|
3
|
+
Version: 3.5.0
|
|
4
|
+
Summary: MCP server that vector-indexes MemStack Pro skills for on-demand loading
|
|
5
|
+
Requires-Python: >=3.12
|
|
6
|
+
Requires-Dist: mcp>=1.0.0
|
|
7
|
+
Requires-Dist: lancedb>=0.6.0
|
|
8
|
+
Requires-Dist: sentence-transformers>=2.2.0
|
|
9
|
+
Requires-Dist: pyarrow>=14.0.0
|
|
10
|
+
Requires-Dist: httpx>=0.24.0
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
memstack_skill_loader/__init__.py,sha256=zIctJ-xsmRIAzE7HmG1Z4kDvbjiB8rxQT0LOmb1Fga0,71
|
|
2
|
+
memstack_skill_loader/__main__.py,sha256=f4T6TuX1EPI4bHi-LOEWavdax2uDeG9J6mkoE-d8uzE,346
|
|
3
|
+
memstack_skill_loader/compression.py,sha256=5NQMYa9wj6dyAA6OU1z6Udma96KZj6bFJmYrr0kGcDw,11761
|
|
4
|
+
memstack_skill_loader/config.py,sha256=vQZr4DOM4cEL3oXm1xB1lqldNpXwEMcKyUtuiMXuqV4,4137
|
|
5
|
+
memstack_skill_loader/dashboard.html,sha256=QGetBqAWZEmQc0CSLEpHlbZc9nLlwtr_-HpEcG4yyjc,34547
|
|
6
|
+
memstack_skill_loader/dashboard.py,sha256=StM1ibJ6L0YLRMynwgmIWNl4uO-Z4CmAz2Hh4OkzRXI,13707
|
|
7
|
+
memstack_skill_loader/indexer.py,sha256=hvKvJ8NTX-JpSEZ4G-6-qOCVMowj8jmmfeKVtlQhqpM,8276
|
|
8
|
+
memstack_skill_loader/license.py,sha256=aVGKO30hEUuO7SJYbjhP7fmvX8R6V6smMMgWa7r9sn4,15789
|
|
9
|
+
memstack_skill_loader/search.py,sha256=rKrYNqblgkl00XRcCIBg-PobCNCfiEiWWk_OmX7CGlg,4995
|
|
10
|
+
memstack_skill_loader/server.py,sha256=-QoIBFrohMjak1DHjIv_EMOUaYd_4bAoaVeqZ6DgrSQ,36471
|
|
11
|
+
memstack_skill_loader/stats.py,sha256=ynx75NyrUSzWq336e3_p38MTAWiDISVUC14vq69QKho,15651
|
|
12
|
+
memstack_skill_loader/tfidf_search.py,sha256=QtNuQpdL261EIIoyqizv7psBVsH52uqDn7oHln8gfRQ,4473
|
|
13
|
+
memstack_skill_loader/version_check.py,sha256=zuR0w3r_FNMeWoiUFinX_xRrClof92l2RjE9rI91VtI,3183
|
|
14
|
+
memstack_skill_loader-3.5.0.dist-info/METADATA,sha256=NtRQ7EYvnYKJncLPejA8HlfnAZeArdogjiKaqiR6UAc,341
|
|
15
|
+
memstack_skill_loader-3.5.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
16
|
+
memstack_skill_loader-3.5.0.dist-info/entry_points.txt,sha256=pbqO6c8Gtff3boho-pDlAOpt4s6JdCNtV0xGiqIQkuA,70
|
|
17
|
+
memstack_skill_loader-3.5.0.dist-info/top_level.txt,sha256=tPNsHqnSWoQ8AYfLRRJR3fpHOWSSjoQzAE5FjJIiKPw,22
|
|
18
|
+
memstack_skill_loader-3.5.0.dist-info/RECORD,,
|