sad-mcp 0.1.13 → 0.1.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,13 +1,34 @@
1
- import { readFileSync, writeFileSync, mkdirSync, existsSync } from "fs";
1
+ import { readFileSync, writeFileSync, mkdirSync, existsSync, rmSync } from "fs";
2
2
  import { join } from "path";
3
3
  import { homedir } from "os";
4
+ // Bump this when extraction logic changes to auto-invalidate student caches
5
+ const CACHE_VERSION = 2;
4
6
  const TEXT_CACHE_DIR = join(homedir(), ".sad-mcp", "text-cache");
5
7
  const TEXT_CACHE_INDEX = join(TEXT_CACHE_DIR, "index.json");
8
+ const CACHE_VERSION_FILE = join(TEXT_CACHE_DIR, "version");
6
9
  function ensureDir() {
7
10
  if (!existsSync(TEXT_CACHE_DIR)) {
8
11
  mkdirSync(TEXT_CACHE_DIR, { recursive: true });
9
12
  }
10
13
  }
14
+ function checkCacheVersion() {
15
+ try {
16
+ const stored = parseInt(readFileSync(CACHE_VERSION_FILE, "utf-8").trim(), 10);
17
+ if (stored >= CACHE_VERSION)
18
+ return;
19
+ }
20
+ catch {
21
+ // No version file — treat as outdated
22
+ }
23
+ // Wipe stale cache
24
+ if (existsSync(TEXT_CACHE_DIR)) {
25
+ rmSync(TEXT_CACHE_DIR, { recursive: true, force: true });
26
+ }
27
+ ensureDir();
28
+ writeFileSync(CACHE_VERSION_FILE, String(CACHE_VERSION));
29
+ }
30
+ // Run on import — wipes cache if version changed
31
+ checkCacheVersion();
11
32
  function loadIndex() {
12
33
  try {
13
34
  return JSON.parse(readFileSync(TEXT_CACHE_INDEX, "utf-8"));
package/dist/tools.js CHANGED
@@ -32,13 +32,27 @@ async function ensureTextCache() {
32
32
  }
33
33
  function searchInText(text, query) {
34
34
  const queryLower = query.toLowerCase();
35
+ const words = queryLower.split(/\s+/).filter(w => w.length > 0);
35
36
  const lines = text.split("\n");
36
37
  const matches = [];
37
38
  for (let i = 0; i < lines.length; i++) {
38
- if (lines[i].toLowerCase().includes(queryLower)) {
39
- matches.push({ line: lines[i].trim(), lineNumber: i + 1 });
39
+ const lineLower = lines[i].toLowerCase();
40
+ // Exact phrase match highest score
41
+ if (lineLower.includes(queryLower)) {
42
+ matches.push({ line: lines[i].trim(), lineNumber: i + 1, score: words.length + 1 });
43
+ continue;
44
+ }
45
+ // Multi-word partial match — require >= 60% of words
46
+ if (words.length >= 2) {
47
+ const wordHits = words.filter(w => lineLower.includes(w)).length;
48
+ const threshold = Math.ceil(words.length * 0.6);
49
+ if (wordHits >= threshold) {
50
+ matches.push({ line: lines[i].trim(), lineNumber: i + 1, score: wordHits });
51
+ }
40
52
  }
41
53
  }
54
+ // Sort by score descending so best matches come first
55
+ matches.sort((a, b) => b.score - a.score);
42
56
  return matches;
43
57
  }
44
58
  export function registerToolHandlers(server) {
@@ -46,7 +60,7 @@ export function registerToolHandlers(server) {
46
60
  tools: [
47
61
  {
48
62
  name: "search_materials",
49
- description: "Search across all course materials for a topic. Returns a SHORT summary list of matching files (name, category, match count). To read the actual content, use get_material on the most relevant file(s) from the results.",
63
+ description: "Search across all course materials for a topic. Returns a SHORT summary list of matching files (name, category, match count). To read the actual content, use get_material on the most relevant file(s) from the results. Note: course materials are in Hebrew. If a search returns few or no results, try searching in Hebrew, or break the query into individual keywords.",
50
64
  inputSchema: {
51
65
  type: "object",
52
66
  properties: {
@@ -64,13 +78,17 @@ export function registerToolHandlers(server) {
64
78
  },
65
79
  {
66
80
  name: "get_material",
67
- description: "Get the full text content of a specific course material file. Use this AFTER search_materials to read the content of a relevant file.",
81
+ description: "Get the text content of a specific course material file. Returns one page (~5000 chars) at a time. Use the `page` parameter to read further into long files. Always check if there are more pages when looking for specific content.",
68
82
  inputSchema: {
69
83
  type: "object",
70
84
  properties: {
71
85
  name: {
72
86
  type: "string",
73
- description: "The file name (or partial name) to retrieve. Matched against file names from search_materials or list_materials results.",
87
+ description: "The file name or path (or partial match) to retrieve. Use the path from search_materials results for exact matching (e.g., 'מבחנים-לסטודנטים/2024-א-א/מבחן.pdf').",
88
+ },
89
+ page: {
90
+ type: "number",
91
+ description: "Page number (1-indexed). Each page is ~5000 characters. Defaults to 1.",
74
92
  },
75
93
  user_question: {
76
94
  type: "string",
@@ -142,6 +160,7 @@ export function registerToolHandlers(server) {
142
160
  if (matches.length > 0 || nameMatch) {
143
161
  results.push({
144
162
  fileName: file.name,
163
+ path: file.path,
145
164
  category: categorizeFile(file),
146
165
  matchCount: nameMatch ? matches.length + 100 : matches.length, // Boost file-name matches
147
166
  preview: matches.length > 0
@@ -158,6 +177,7 @@ export function registerToolHandlers(server) {
158
177
  if (file.name.toLowerCase().includes(queryLower) || file.path.toLowerCase().includes(queryLower)) {
159
178
  results.push({
160
179
  fileName: file.name,
180
+ path: file.path,
161
181
  category: categorizeFile(file),
162
182
  matchCount: 100,
163
183
  preview: `(file name matches "${query}" — use get_material to read)`,
@@ -168,7 +188,7 @@ export function registerToolHandlers(server) {
168
188
  results.sort((a, b) => b.matchCount - a.matchCount);
169
189
  const responseText = results.length === 0
170
190
  ? `No results found for "${query}" in course materials.`
171
- : `Found "${query}" in ${results.length} file(s). Use get_material to read the most relevant one(s):\n\n${results.map((r) => `- ${r.fileName} [${r.category}] (${r.matchCount} matches) — "${r.preview}"`).join("\n")}`;
191
+ : `Found "${query}" in ${results.length} file(s). Use get_material with the file path to read the most relevant one(s):\n\n${results.map((r) => `- ${r.path} [${r.category}] (${r.matchCount} matches) — "${r.preview}"`).join("\n")}`;
172
192
  trackToolCall(name, toolArgs, { resultCount: results.length, success: results.length > 0, responseChars: responseText.length }, Date.now() - startTime);
173
193
  return { content: [{ type: "text", text: responseText }] };
174
194
  }
@@ -181,18 +201,18 @@ export function registerToolHandlers(server) {
181
201
  }
182
202
  await ensureTextCache();
183
203
  const queryLower = queryName.toLowerCase();
184
- // First: check text cache
204
+ // First: check text cache (match against both name and path)
185
205
  let bestMatch = null;
186
206
  for (const [, entry] of textCache) {
187
- if (entry.file.name.toLowerCase().includes(queryLower)) {
207
+ if (entry.file.name.toLowerCase().includes(queryLower) || entry.file.path.toLowerCase().includes(queryLower)) {
188
208
  bestMatch = entry;
189
209
  break;
190
210
  }
191
211
  }
192
- // Fallback: search all files by name and attempt fresh extraction
212
+ // Fallback: search all files by name/path and attempt fresh extraction
193
213
  if (!bestMatch) {
194
214
  const allFiles = await listAllFiles();
195
- const matchedFile = allFiles.find(f => f.name.toLowerCase().includes(queryLower));
215
+ const matchedFile = allFiles.find(f => f.name.toLowerCase().includes(queryLower) || f.path.toLowerCase().includes(queryLower));
196
216
  if (matchedFile && isExtractable(matchedFile)) {
197
217
  try {
198
218
  const buffer = await downloadFile(matchedFile);
@@ -215,12 +235,19 @@ export function registerToolHandlers(server) {
215
235
  trackToolCall(name, toolArgs, { success: false, responseChars: notFoundText.length }, Date.now() - startTime);
216
236
  return { content: [{ type: "text", text: notFoundText }] };
217
237
  }
218
- // Truncate very large files
219
- const maxLen = 30000;
220
- const responseText = bestMatch.text.length > maxLen
221
- ? bestMatch.text.substring(0, maxLen) + "\n...[truncated]"
222
- : bestMatch.text;
223
- const fullResponse = `📄 ${bestMatch.file.name} [${categorizeFile(bestMatch.file)}]\n\n${responseText}`;
238
+ // Pagination
239
+ const PAGE_SIZE = 5000;
240
+ const page = Math.max(1, args.page || 1);
241
+ const totalChars = bestMatch.text.length;
242
+ const totalPages = Math.ceil(totalChars / PAGE_SIZE);
243
+ const start = (page - 1) * PAGE_SIZE;
244
+ const end = Math.min(start + PAGE_SIZE, totalChars);
245
+ const pageText = bestMatch.text.substring(start, end);
246
+ const header = `📄 ${bestMatch.file.name} [${categorizeFile(bestMatch.file)}] — Page ${page}/${totalPages} (${totalChars} chars total)`;
247
+ const footer = page < totalPages
248
+ ? `\n\n[More content available — call get_material with page: ${page + 1} to continue reading]`
249
+ : "";
250
+ const fullResponse = `${header}\n\n${pageText}${footer}`;
224
251
  trackToolCall(name, toolArgs, { success: true, responseChars: fullResponse.length }, Date.now() - startTime);
225
252
  return { content: [{ type: "text", text: fullResponse }] };
226
253
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "sad-mcp",
3
- "version": "0.1.13",
3
+ "version": "0.1.15",
4
4
  "description": "MCP server for Software Analysis and Design course materials at BGU",
5
5
  "type": "module",
6
6
  "bin": {