scythe-context-mcp 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,69 @@
1
+ import { keywordTerms } from "./keywordSearch.js";
2
+ const truncationMarker = "... [truncated]";
3
+ const truncationMarkerWithBreak = `\n${truncationMarker}`;
4
+ export function grepKeywords(query, result) {
5
+ const pathParts = result.path
6
+ .split(/[\/._-]+/g)
7
+ .filter((part) => part.length >= 3);
8
+ return Array.from(new Set([...keywordTerms(query), ...pathParts])).slice(0, 8);
9
+ }
10
+ export function matchReason(result) {
11
+ const matchTypes = result.matchTypes || [];
12
+ if (matchTypes.includes("semantic") && matchTypes.includes("keyword")) {
13
+ return "semantic similarity plus keyword/path match";
14
+ }
15
+ if (matchTypes.includes("keyword")) {
16
+ return "keyword/path match";
17
+ }
18
+ if (typeof result.distance === "number") {
19
+ return `semantic similarity distance ${result.distance.toFixed(4)}`;
20
+ }
21
+ if (typeof result.score === "number") {
22
+ return `ranked score ${result.score.toFixed(4)}`;
23
+ }
24
+ return "ranked match";
25
+ }
26
+ export function formatSearchResults(query, results, options = {}) {
27
+ const maxContextChars = options.maxContextChars ?? null;
28
+ let usedContextChars = 0;
29
+ let truncatedResults = 0;
30
+ const formatted = results.map((result) => {
31
+ let snippet = result.snippet;
32
+ let snippetTruncated = false;
33
+ if (typeof snippet === "string" && maxContextChars !== null) {
34
+ const remaining = Math.max(0, maxContextChars - usedContextChars);
35
+ if (snippet.length > remaining) {
36
+ if (remaining >= truncationMarkerWithBreak.length) {
37
+ snippet = `${snippet.slice(0, remaining - truncationMarkerWithBreak.length).trimEnd()}${truncationMarkerWithBreak}`;
38
+ }
39
+ else if (remaining >= truncationMarker.length) {
40
+ snippet = truncationMarker;
41
+ }
42
+ else {
43
+ snippet = "";
44
+ }
45
+ snippetTruncated = true;
46
+ truncatedResults += 1;
47
+ }
48
+ usedContextChars += snippet.length;
49
+ }
50
+ else if (typeof snippet === "string") {
51
+ usedContextChars += snippet.length;
52
+ }
53
+ return {
54
+ ...result,
55
+ snippet,
56
+ grepKeywords: grepKeywords(query, result),
57
+ matchReason: matchReason(result),
58
+ ...(snippetTruncated ? { snippetTruncated } : {}),
59
+ };
60
+ });
61
+ return {
62
+ results: formatted,
63
+ summary: {
64
+ maxContextChars,
65
+ usedContextChars,
66
+ truncatedResults,
67
+ },
68
+ };
69
+ }
@@ -0,0 +1,123 @@
1
+ import fs from "node:fs/promises";
2
+ import path from "node:path";
3
+ import fg from "fast-glob";
4
+ import ignore from "ignore";
5
+ import { isProbablyBinary } from "./binary.js";
6
+ const BUILT_IN_IGNORES = [
7
+ ".git/**",
8
+ ".scythe-context/**",
9
+ ".repo-beacon/**",
10
+ "node_modules/**",
11
+ "dist/**",
12
+ "build/**",
13
+ ".next/**",
14
+ ".turbo/**",
15
+ "coverage/**",
16
+ "package-lock.json",
17
+ "npm-shrinkwrap.json",
18
+ "yarn.lock",
19
+ "pnpm-lock.yaml",
20
+ "bun.lock",
21
+ "bun.lockb",
22
+ "*.png",
23
+ "*.jpg",
24
+ "*.jpeg",
25
+ "*.gif",
26
+ "*.webp",
27
+ "*.ico",
28
+ "*.pdf",
29
+ "*.zip",
30
+ "*.gz",
31
+ "*.tar",
32
+ "*.7z",
33
+ "*.woff",
34
+ "*.woff2",
35
+ "*.ttf",
36
+ ];
37
+ async function loadGitignore(projectPath) {
38
+ const matcher = ignore();
39
+ try {
40
+ const content = await fs.readFile(path.join(projectPath, ".gitignore"), "utf8");
41
+ matcher.add(content);
42
+ }
43
+ catch (error) {
44
+ if (error.code !== "ENOENT") {
45
+ throw error;
46
+ }
47
+ }
48
+ return matcher;
49
+ }
50
+ function toPosixPath(value) {
51
+ return value.split(path.sep).join("/");
52
+ }
53
+ async function hasBinaryHeader(absolutePath) {
54
+ const handle = await fs.open(absolutePath, "r");
55
+ try {
56
+ const buffer = Buffer.alloc(8192);
57
+ const { bytesRead } = await handle.read(buffer, 0, buffer.length, 0);
58
+ return isProbablyBinary(buffer.subarray(0, bytesRead));
59
+ }
60
+ finally {
61
+ await handle.close();
62
+ }
63
+ }
64
+ export async function scanProject(projectPath, limits) {
65
+ const resolvedProjectPath = path.resolve(projectPath);
66
+ const gitignore = await loadGitignore(resolvedProjectPath);
67
+ const entries = await fg("**/*", {
68
+ cwd: resolvedProjectPath,
69
+ dot: true,
70
+ onlyFiles: true,
71
+ followSymbolicLinks: false,
72
+ ignore: BUILT_IN_IGNORES,
73
+ unique: true,
74
+ });
75
+ const files = [];
76
+ const skipped = [];
77
+ for (const entry of entries.sort()) {
78
+ const relativePath = toPosixPath(entry);
79
+ if (gitignore.ignores(relativePath)) {
80
+ skipped.push({ relativePath, reason: "ignored" });
81
+ continue;
82
+ }
83
+ const absolutePath = path.resolve(resolvedProjectPath, relativePath);
84
+ if (!absolutePath.startsWith(resolvedProjectPath + path.sep)) {
85
+ skipped.push({ relativePath, reason: "outside_project" });
86
+ continue;
87
+ }
88
+ try {
89
+ const stat = await fs.stat(absolutePath);
90
+ if (!stat.isFile()) {
91
+ skipped.push({ relativePath, reason: "not_file" });
92
+ continue;
93
+ }
94
+ if (stat.size > limits.maxFileBytes) {
95
+ skipped.push({
96
+ relativePath,
97
+ reason: "too_large",
98
+ size: stat.size,
99
+ detail: `size ${stat.size} exceeds maxFileBytes ${limits.maxFileBytes}`,
100
+ });
101
+ continue;
102
+ }
103
+ if (await hasBinaryHeader(absolutePath)) {
104
+ skipped.push({ relativePath, reason: "binary", size: stat.size });
105
+ continue;
106
+ }
107
+ files.push({
108
+ relativePath,
109
+ absolutePath,
110
+ size: stat.size,
111
+ mtimeMs: stat.mtimeMs,
112
+ });
113
+ }
114
+ catch (error) {
115
+ skipped.push({
116
+ relativePath,
117
+ reason: "read_error",
118
+ detail: error instanceof Error ? error.message : String(error),
119
+ });
120
+ }
121
+ }
122
+ return { projectPath: resolvedProjectPath, files, skipped };
123
+ }
@@ -0,0 +1,48 @@
1
+ import Database from "better-sqlite3";
2
+ import { vectorTableName } from "../storage/schema.js";
3
+ import { loadSqliteVec, vectorToFloat32Buffer } from "../storage/sqliteVec.js";
4
+ function compactSnippet(text, maxChars) {
5
+ const normalized = text.replace(/\s+$/g, "");
6
+ if (normalized.length <= maxChars)
7
+ return normalized;
8
+ return `${normalized.slice(0, Math.max(0, maxChars - 3))}...`;
9
+ }
10
+ export function searchByVector(options) {
11
+ if (!Number.isInteger(options.maxResults) || options.maxResults <= 0) {
12
+ throw new Error("maxResults must be a positive integer");
13
+ }
14
+ if (!Number.isInteger(options.maxSnippetChars) || options.maxSnippetChars <= 0) {
15
+ throw new Error("maxSnippetChars must be a positive integer");
16
+ }
17
+ const db = new Database(options.dbPath, { readonly: true });
18
+ try {
19
+ loadSqliteVec(db);
20
+ const rows = db
21
+ .prepare(`
22
+ select files.path,
23
+ chunks.start_line as startLine,
24
+ chunks.end_line as endLine,
25
+ chunks.text,
26
+ vec.distance as distance
27
+ from ${vectorTableName(options.dimensions)} vec
28
+ join embeddings on embeddings.id = vec.rowid
29
+ join chunks on chunks.id = embeddings.chunk_id
30
+ join files on files.id = chunks.file_id
31
+ where vec.embedding match ?
32
+ and vec.k = ?
33
+ order by vec.distance
34
+ `)
35
+ .all(vectorToFloat32Buffer(options.queryVector, options.dimensions), options.maxResults);
36
+ return rows.map((row) => ({
37
+ path: row.path,
38
+ startLine: row.startLine,
39
+ endLine: row.endLine,
40
+ distance: row.distance,
41
+ snippet: compactSnippet(row.text, options.maxSnippetChars),
42
+ matchType: "semantic",
43
+ }));
44
+ }
45
+ finally {
46
+ db.close();
47
+ }
48
+ }
@@ -0,0 +1,121 @@
1
+ const symbolPatterns = [
2
+ { kind: "function", regex: /^(\s*export\s+)?(?:async\s+)?function\s+([A-Za-z_$][\w$]*)\b/, nameGroup: 2, exportedGroup: 1 },
3
+ { kind: "class", regex: /^(\s*export\s+)?(?:abstract\s+)?class\s+([A-Za-z_$][\w$]*)\b/, nameGroup: 2, exportedGroup: 1 },
4
+ { kind: "interface", regex: /^(\s*export\s+)?interface\s+([A-Za-z_$][\w$]*)\b/, nameGroup: 2, exportedGroup: 1 },
5
+ { kind: "type", regex: /^(\s*export\s+)?type\s+([A-Za-z_$][\w$]*)\b/, nameGroup: 2, exportedGroup: 1 },
6
+ { kind: "enum", regex: /^(\s*export\s+)?enum\s+([A-Za-z_$][\w$]*)\b/, nameGroup: 2, exportedGroup: 1 },
7
+ { kind: "const", regex: /^(\s*export\s+)?const\s+([A-Za-z_$][\w$]*)\b/, nameGroup: 2, exportedGroup: 1 },
8
+ { kind: "variable", regex: /^\s*(?:let|var)\s+([A-Za-z_$][\w$]*)\b/, nameGroup: 1 },
9
+ { kind: "function", regex: /^\s*def\s+([A-Za-z_]\w*)\b/, nameGroup: 1 },
10
+ { kind: "class", regex: /^\s*class\s+([A-Za-z_]\w*)\b/, nameGroup: 1 },
11
+ { kind: "function", regex: /^\s*func\s+(?:\([^)]*\)\s*)?([A-Za-z_]\w*)\b/, nameGroup: 1 },
12
+ { kind: "type", regex: /^\s*type\s+([A-Za-z_]\w*)\b/, nameGroup: 1 },
13
+ { kind: "function", regex: /^\s*(?:pub\s+)?(?:async\s+)?fn\s+([A-Za-z_]\w*)\b/, nameGroup: 1, exportedGroup: 0 },
14
+ { kind: "struct", regex: /^\s*(?:pub\s+)?struct\s+([A-Za-z_]\w*)\b/, nameGroup: 1, exportedGroup: 0 },
15
+ { kind: "enum", regex: /^\s*(?:pub\s+)?enum\s+([A-Za-z_]\w*)\b/, nameGroup: 1, exportedGroup: 0 },
16
+ { kind: "trait", regex: /^\s*(?:pub\s+)?trait\s+([A-Za-z_]\w*)\b/, nameGroup: 1, exportedGroup: 0 },
17
+ { kind: "module", regex: /^\s*(?:pub\s+)?mod\s+([A-Za-z_]\w*)\b/, nameGroup: 1, exportedGroup: 0 },
18
+ ];
19
+ const javascriptImportPatterns = [
20
+ /\bimport\s+(?:[^'"`"]+\s+from\s+)?["'`]([^"'`]+)["'`]/g,
21
+ /\bexport\s+[^"'`]+\s+from\s+["'`]([^"'`]+)["'`]/g,
22
+ /\brequire\(\s*["'`]([^"'`]+)["'`]\s*\)/g,
23
+ ];
24
+ const pythonImportPatterns = [
25
+ /\bfrom\s+([A-Za-z_][\w.]+)\s+import\b/g,
26
+ /^\s*import\s+([A-Za-z_][\w.]+)/g,
27
+ ];
28
+ const rustImportPatterns = [
29
+ /^\s*use\s+([^;]+);/g,
30
+ /^\s*mod\s+([A-Za-z_]\w*)\s*;/g,
31
+ ];
32
+ const goImportPatterns = [/^\s*import\s+["`]([^"`]+)["`]/g, /^\s*["`]([^"`]+)["`]/g];
33
+ function dependencyPatternsForPath(relativePath) {
34
+ if (/\.(?:js|jsx|mjs|cjs|ts|tsx|mts|cts)$/.test(relativePath))
35
+ return javascriptImportPatterns;
36
+ if (/\.py$/.test(relativePath))
37
+ return pythonImportPatterns;
38
+ if (/\.rs$/.test(relativePath))
39
+ return rustImportPatterns;
40
+ if (/\.go$/.test(relativePath))
41
+ return goImportPatterns;
42
+ return javascriptImportPatterns;
43
+ }
44
+ function normalizeSignature(line) {
45
+ return line.trim().replace(/\s+/g, " ").slice(0, 240);
46
+ }
47
+ function addUnique(items, item, key) {
48
+ if (!items.some((existing) => key(existing) === key(item))) {
49
+ items.push(item);
50
+ }
51
+ }
52
+ function extractDependenciesFromLine(patterns, line, lineNumber) {
53
+ const dependencies = [];
54
+ for (const pattern of patterns) {
55
+ pattern.lastIndex = 0;
56
+ let match;
57
+ while ((match = pattern.exec(line))) {
58
+ const specifier = match[1]?.trim();
59
+ if (specifier) {
60
+ addUnique(dependencies, { specifier, line: lineNumber }, (dep) => `${dep.specifier}:${dep.line}`);
61
+ }
62
+ }
63
+ }
64
+ return dependencies;
65
+ }
66
+ export function extractFileGraph(relativePath, content) {
67
+ const symbols = [];
68
+ const dependencies = [];
69
+ const dependencyPatterns = dependencyPatternsForPath(relativePath);
70
+ const lines = content.split(/\r?\n/);
71
+ lines.forEach((line, index) => {
72
+ const lineNumber = index + 1;
73
+ for (const pattern of symbolPatterns) {
74
+ const match = pattern.regex.exec(line);
75
+ if (!match)
76
+ continue;
77
+ const name = match[pattern.nameGroup ?? 1];
78
+ if (!name)
79
+ continue;
80
+ addUnique(symbols, {
81
+ name,
82
+ kind: pattern.kind,
83
+ line: lineNumber,
84
+ signature: normalizeSignature(line),
85
+ exported: pattern.exportedGroup === 0 ? /\bpub\b/.test(line) : Boolean(match[pattern.exportedGroup ?? -1]),
86
+ }, (symbol) => `${symbol.name}:${symbol.kind}:${symbol.line}`);
87
+ break;
88
+ }
89
+ for (const dependency of extractDependenciesFromLine(dependencyPatterns, line, lineNumber)) {
90
+ addUnique(dependencies, dependency, (dep) => `${dep.specifier}:${dep.line}`);
91
+ }
92
+ });
93
+ return { symbols, dependencies };
94
+ }
95
+ export function resolveDependencyPath(fromPath, specifier, activePaths) {
96
+ if (!specifier.startsWith("."))
97
+ return null;
98
+ const fromDir = fromPath.includes("/") ? fromPath.slice(0, fromPath.lastIndexOf("/")) : "";
99
+ const normalized = new URL(specifier, `file:///${fromDir ? `${fromDir}/` : ""}`).pathname.replace(/^\/+/, "");
100
+ const withoutRuntimeExtension = normalized.replace(/\.(?:js|jsx|mjs|cjs)$/, "");
101
+ const candidates = [
102
+ normalized,
103
+ withoutRuntimeExtension,
104
+ `${withoutRuntimeExtension}.ts`,
105
+ `${withoutRuntimeExtension}.tsx`,
106
+ `${withoutRuntimeExtension}.mts`,
107
+ `${withoutRuntimeExtension}.cts`,
108
+ `${normalized}.ts`,
109
+ `${normalized}.tsx`,
110
+ `${normalized}.js`,
111
+ `${normalized}.jsx`,
112
+ `${normalized}.mjs`,
113
+ `${normalized}.cjs`,
114
+ `${normalized}.py`,
115
+ `${normalized}/index.ts`,
116
+ `${normalized}/index.tsx`,
117
+ `${normalized}/index.js`,
118
+ `${normalized}/index.jsx`,
119
+ ];
120
+ return candidates.find((candidate) => activePaths.has(candidate)) ?? null;
121
+ }
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,149 @@
1
+ export class GeminiEmbeddingError extends Error {
2
+ status;
3
+ bodySnippet;
4
+ retryable;
5
+ constructor(message, options = {}) {
6
+ super(message);
7
+ this.name = "GeminiEmbeddingError";
8
+ this.status = options.status;
9
+ this.bodySnippet = options.bodySnippet;
10
+ this.retryable = options.retryable ?? false;
11
+ }
12
+ }
13
+ function trimTrailingSlash(value) {
14
+ return value.replace(/\/+$/, "");
15
+ }
16
+ export function normalizeGeminiBaseUrl(baseUrl) {
17
+ const url = new URL(baseUrl);
18
+ const pathParts = url.pathname.split("/").filter(Boolean);
19
+ const lastPart = pathParts.at(-1);
20
+ if (lastPart !== "v1" && lastPart !== "v1beta") {
21
+ pathParts.push("v1beta");
22
+ }
23
+ url.pathname = `/${pathParts.join("/")}`;
24
+ url.search = "";
25
+ url.hash = "";
26
+ return trimTrailingSlash(url.toString());
27
+ }
28
+ function modelResource(model) {
29
+ return model.startsWith("models/") ? model : `models/${model}`;
30
+ }
31
+ export function buildGeminiEndpoint(baseUrl, model, method) {
32
+ return new URL(`${normalizeGeminiBaseUrl(baseUrl)}/${modelResource(model)}:${method}`);
33
+ }
34
+ function formatEmbeddingText(input) {
35
+ if (input.kind === "query") {
36
+ return `task: code retrieval | query: ${input.text}`;
37
+ }
38
+ return `title: ${input.title || "none"} | text: ${input.text}`;
39
+ }
40
+ function isRetryableStatus(status) {
41
+ return status === 408 || status === 409 || status === 425 || status === 429 || status >= 500;
42
+ }
43
+ function safeSnippet(text, secret) {
44
+ const collapsed = text.replace(/\s+/g, " ").trim();
45
+ const redacted = secret ? collapsed.split(secret).join("[REDACTED]") : collapsed;
46
+ return redacted.length > 1000 ? `${redacted.slice(0, 1000)}...` : redacted;
47
+ }
48
+ export class GeminiEmbeddingProvider {
49
+ config;
50
+ constructor(config) {
51
+ this.config = config;
52
+ }
53
+ async embed(input) {
54
+ const response = await this.post("embedContent", {
55
+ model: modelResource(this.config.model),
56
+ content: { parts: [{ text: formatEmbeddingText(input) }] },
57
+ ...(this.config.outputDimensionality
58
+ ? { output_dimensionality: this.config.outputDimensionality }
59
+ : {}),
60
+ });
61
+ const vector = this.extractSingleVector(response);
62
+ return {
63
+ vector,
64
+ model: this.config.model,
65
+ dimensions: vector.length,
66
+ };
67
+ }
68
+ async embedBatch(inputs) {
69
+ if (inputs.length === 0)
70
+ return [];
71
+ const response = await this.post("batchEmbedContents", {
72
+ requests: inputs.map((input) => ({
73
+ model: modelResource(this.config.model),
74
+ content: { parts: [{ text: formatEmbeddingText(input) }] },
75
+ ...(this.config.outputDimensionality
76
+ ? { output_dimensionality: this.config.outputDimensionality }
77
+ : {}),
78
+ })),
79
+ });
80
+ const vectors = this.extractVectors(response);
81
+ if (vectors.length !== inputs.length) {
82
+ throw new Error(`Gemini returned ${vectors.length} embeddings for ${inputs.length} inputs`);
83
+ }
84
+ return vectors.map((vector) => ({
85
+ vector,
86
+ model: this.config.model,
87
+ dimensions: vector.length,
88
+ }));
89
+ }
90
+ async post(method, body) {
91
+ if (!this.config.apiKey) {
92
+ throw new GeminiEmbeddingError("GEMINI_API_KEY is required for embedding calls");
93
+ }
94
+ let url;
95
+ try {
96
+ url = buildGeminiEndpoint(this.config.baseUrl, this.config.model, method);
97
+ }
98
+ catch (error) {
99
+ throw new GeminiEmbeddingError(`Invalid GEMINI_BASE_URL: ${error instanceof Error ? error.message : String(error)}`);
100
+ }
101
+ const headers = {
102
+ "content-type": "application/json",
103
+ };
104
+ if (this.config.authMode === "bearer") {
105
+ headers.authorization = `Bearer ${this.config.apiKey}`;
106
+ }
107
+ else if (this.config.authMode === "query") {
108
+ url.searchParams.set(this.config.apiKeyQueryParam, this.config.apiKey);
109
+ }
110
+ else {
111
+ headers[this.config.apiKeyHeader] = this.config.apiKey;
112
+ }
113
+ const response = await fetch(url, {
114
+ method: "POST",
115
+ headers,
116
+ body: JSON.stringify(body),
117
+ });
118
+ const text = await response.text();
119
+ if (!response.ok) {
120
+ throw new GeminiEmbeddingError(`Gemini embedding request failed with HTTP ${response.status}`, {
121
+ status: response.status,
122
+ bodySnippet: safeSnippet(text, this.config.apiKey),
123
+ retryable: isRetryableStatus(response.status),
124
+ });
125
+ }
126
+ try {
127
+ return JSON.parse(text);
128
+ }
129
+ catch {
130
+ throw new GeminiEmbeddingError("Gemini embedding response was not valid JSON", {
131
+ bodySnippet: safeSnippet(text, this.config.apiKey),
132
+ });
133
+ }
134
+ }
135
+ extractSingleVector(response) {
136
+ const vector = response.embedding?.values || response.embeddings?.[0]?.values;
137
+ if (!vector || vector.length === 0) {
138
+ throw new Error("Gemini response did not include an embedding vector");
139
+ }
140
+ return vector;
141
+ }
142
+ extractVectors(response) {
143
+ const vectors = response.embeddings?.map((embedding) => embedding.values || []) || [];
144
+ if (vectors.some((vector) => vector.length === 0)) {
145
+ throw new Error("Gemini batch response included an empty embedding vector");
146
+ }
147
+ return vectors;
148
+ }
149
+ }
@@ -0,0 +1 @@
1
+ export {};