prism-mcp-server 8.0.2 → 9.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,104 +0,0 @@
1
- /**
2
- * Embedding Generation Utility (v0.5.0 — Model Migration)
3
- *
4
- * ═══════════════════════════════════════════════════════════════════
5
- * REVIEWER NOTE: This module wraps Google's gemini-embedding-001 model
6
- * to generate 768-dimensional vector embeddings for text.
7
- *
8
- * MIGRATION (2026-03): text-embedding-004 was deprecated Jan 14, 2026.
9
- * Replaced with gemini-embedding-001 (GA on v1 API). The new model
10
- * supports Matryoshka Representation Learning (MRL) for flexible
11
- * output dimensions (768, 1536, 3072). We use 768 for compatibility
12
- * with existing pgvector columns.
13
- *
14
- * USAGE — Called in two places:
15
- * 1. sessionSaveLedgerHandler — embeds summary+decisions at save time
16
- * (fire-and-forget, non-blocking)
17
- * 2. sessionSearchMemoryHandler — embeds the user's search query
18
- * to find semantically similar past sessions
19
- *
20
- * WHY GEMINI: We already have @google/generative-ai as a dependency
21
- * and GOOGLE_API_KEY configured for the research paper analysis tool.
22
- * Using a separate embedding service (OpenAI, Cohere) would add
23
- * another API key dependency and increase configuration complexity.
24
- *
25
- * COST: Gemini's gemini-embedding-001 is free tier for <1500 req/min.
26
- * At typical usage (~10-50 ledger saves/day), we'll never approach
27
- * this limit.
28
- *
29
- * TRUNCATION GUARD: gemini-embedding-001 supports up to 2048 tokens
30
- * per input. We implement a hard character limit (default 8000 chars)
31
- * to guarantee the API call never crashes. This is applied before
32
- * sending to the API, not after.
33
- * ═══════════════════════════════════════════════════════════════════
34
- */
35
- import { GoogleGenerativeAI, TaskType } from "@google/generative-ai";
36
- import { GOOGLE_API_KEY } from "../config.js";
37
- import { debugLog } from "./logger.js";
38
- // ─── Constants ────────────────────────────────────────────────
39
- // REVIEWER NOTE: Maximum characters to send to the embedding API.
40
- // gemini-embedding-001 supports up to 2048 tokens. At ~4 chars/token,
41
- // 8000 chars is a safe ceiling. Truncation is silent and non-fatal —
42
- // the embedding still captures the semantic meaning of the leading
43
- // content, which is more than enough for similarity search.
44
- const MAX_EMBEDDING_CHARS = 8000;
45
- // ─── Embedding Client ─────────────────────────────────────────
46
- /**
47
- * Generates a 768-dimensional embedding vector for the given text.
48
- *
49
- * @param text - The text to embed (summary + decisions, search query, etc.)
50
- * @returns Array of 768 floating-point numbers representing the text's
51
- * semantic meaning in vector space.
52
- * @throws Error if GOOGLE_API_KEY is not configured or API call fails.
53
- *
54
- * REVIEWER NOTE: The truncation happens BEFORE the API call, not after.
55
- * If the text is longer than MAX_EMBEDDING_CHARS, we silently truncate
56
- * and log a warning to stderr. This prevents 400 Bad Request errors
57
- * from the Gemini API without blocking the caller.
58
- */
59
- export async function generateEmbedding(text) {
60
- if (!GOOGLE_API_KEY) {
61
- throw new Error("Cannot generate embeddings: GOOGLE_API_KEY is not configured. " +
62
- "Set this environment variable to enable semantic search.");
63
- }
64
- // Truncation guard — prevent exceeding API token limits
65
- // REVIEWER NOTE (v1.5.0 fix): JavaScript's substring() counts UTF-16
66
- // code units. If the cut point lands in the middle of a surrogate pair
67
- // (e.g., emoji 🚀 or complex CJK characters), the result contains an
68
- // invalid trailing byte (\uFFFD) that some APIs reject with 400.
69
- // Fix: truncate at the last word boundary before the limit.
70
- let inputText = text;
71
- if (inputText.length > MAX_EMBEDDING_CHARS) {
72
- debugLog(`[embedding] Input text truncated from ${inputText.length} to ~${MAX_EMBEDDING_CHARS} chars (word-safe)`);
73
- inputText = inputText.substring(0, MAX_EMBEDDING_CHARS);
74
- // Snap back to the last space to avoid splitting a word or surrogate pair
75
- const lastSpace = inputText.lastIndexOf(' ');
76
- if (lastSpace > 0) {
77
- inputText = inputText.substring(0, lastSpace);
78
- }
79
- }
80
- // Skip empty or whitespace-only text
81
- if (!inputText.trim()) {
82
- throw new Error("Cannot generate embedding for empty text");
83
- }
84
- const genAI = new GoogleGenerativeAI(GOOGLE_API_KEY);
85
- const model = genAI.getGenerativeModel({ model: "gemini-embedding-001" }, { apiVersion: "v1beta" } // gemini-embedding-001 requires v1beta
86
- );
87
- debugLog(`[embedding] Generating 768-dim embedding for ${inputText.length} chars`);
88
- const request = {
89
- content: {
90
- role: "user",
91
- parts: [{ text: inputText }],
92
- },
93
- taskType: TaskType.SEMANTIC_SIMILARITY,
94
- // SDK runtime supports this for gemini-embedding-001 even when older
95
- // type defs may lag; keep cast localized at request boundary.
96
- ...{ outputDimensionality: 768 },
97
- };
98
- const result = await model.embedContent(request);
99
- const values = result.embedding.values;
100
- if (!Array.isArray(values) || values.length !== 768) {
101
- throw new Error(`Embedding dimension mismatch: expected 768, got ${values?.length ?? 'unknown'}`);
102
- }
103
- return values;
104
- }
@@ -1,88 +0,0 @@
1
- /**
2
- * Google Gemini AI Client
3
- *
4
- * This module integrates with Google's Gemini AI models for research
5
- * paper analysis. It uses the @google/generative-ai SDK (Google AI Studio).
6
- *
7
- * Current model: gemini-2.0-flash (fast, high-quality, good for long documents)
8
- *
9
- * The main function (analyzePaperWithGemini) takes a paper's full text and
10
- * generates a detailed analysis based on the requested type:
11
- * - "summary" → research question, methodology, findings, conclusions
12
- * - "critique" → methodology assessment, validity, limitations
13
- * - "literature review" → how it fits in the broader research landscape
14
- * - "key findings" → most significant results and implications
15
- * - "comprehensive" → all of the above combined (default)
16
- *
17
- * Requires: GOOGLE_API_KEY environment variable
18
- *
19
- * Note: This module also exports an MCP client factory (createMcpClient)
20
- * which can be used for testing or inter-server communication.
21
- */
22
- import { GoogleGenerativeAI } from "@google/generative-ai";
23
- import { Readable } from "stream";
24
- import { Client } from "@modelcontextprotocol/sdk/client/index.js";
25
- import { StdioClientTransport } from "@modelcontextprotocol/sdk/client/stdio.js";
26
- export const googleGenAi = new GoogleGenerativeAI(process.env.GOOGLE_API_KEY);
27
- export function bufferToStream(buffer) {
28
- const stream = new Readable();
29
- stream.push(buffer);
30
- stream.push(null); // Signal the end of the stream
31
- return stream;
32
- }
33
- /**
34
- * Creates an MCP client connected to the Brave Search server
35
- */
36
- export async function createMcpClient() {
37
- const transport = new StdioClientTransport({
38
- command: "node",
39
- args: ["index.js"], // Server entry point
40
- });
41
- const client = new Client({ name: "gemini-mcp-client", version: "1.0.0" });
42
- await client.connect(transport);
43
- return { client, transport };
44
- }
45
- /**
46
- * Analyzes research paper content using Google's Gemini-2.0-flash model
47
- * @param paperContent - The text content of the research paper
48
- * @param analysisType - The type of analysis to perform (summary, critique, etc.)
49
- * @param additionalContext - Any additional context or specific questions
50
- * @returns Detailed analysis of the research paper
51
- */
52
- export async function analyzePaperWithGemini(paperContent, analysisType, additionalContext) {
53
- try {
54
- // Initialize the Gemini Pro model
55
- const model = googleGenAi.getGenerativeModel({ model: "gemini-2.0-flash" });
56
- // Create the prompt based on analysis type
57
- let prompt = `I need you to perform a detailed ${analysisType} analysis of the following research paper.\n\n`;
58
- if (additionalContext) {
59
- prompt += `Additional context: ${additionalContext}\n\n`;
60
- }
61
- prompt += `Research paper content:\n${paperContent}\n\n`;
62
- switch (analysisType.toLowerCase()) {
63
- case "summary":
64
- prompt += "Provide a comprehensive summary including the research question, methodology, key findings, and conclusions.";
65
- break;
66
- case "critique":
67
- prompt += "Provide a critical evaluation of the research methodology, validity of findings, limitations, and suggestions for improvement.";
68
- break;
69
- case "literature review":
70
- prompt += "Analyze how this paper fits into the broader research landscape, identifying key related works and research gaps.";
71
- break;
72
- case "key findings":
73
- prompt += "Extract and explain the most significant findings and their implications.";
74
- break;
75
- default:
76
- prompt += "Perform a comprehensive analysis including summary, methodology assessment, key findings, limitations, and significance.";
77
- }
78
- // Generate content using Gemini
79
- const result = await model.generateContent(prompt);
80
- const response = await result.response;
81
- const text = response.text();
82
- return text;
83
- }
84
- catch (error) {
85
- console.error("Error analyzing paper with Gemini:", error);
86
- throw new Error(`Failed to analyze paper: ${error instanceof Error ? error.message : String(error)}`);
87
- }
88
- }
@@ -1,10 +0,0 @@
1
- import { importHistory } from './universalImporter.js';
2
- async function test() {
3
- const claudePath = '/tmp/sample_claude_history.jsonl';
4
- const geminiPath = '/tmp/sample_gemini_history.json';
5
- console.log('--- TEST 1: Claude JSONL (Dry Run) ---');
6
- await importHistory(claudePath, { format: 'claude-jsonl', dryRun: true, verbose: true });
7
- console.log('\n--- TEST 2: Gemini JSON (Dry Run) ---');
8
- await importHistory(geminiPath, { format: 'gemini-json', dryRun: true, verbose: true, projectId: 'gemini-test' });
9
- }
10
- test().catch(console.error);
@@ -1,170 +0,0 @@
1
- /**
2
- * Rename Detection Heuristic Engine
3
- *
4
- * Isolated module that detects probable test assertion renames by computing
5
- * similarity scores between removed and added tests. This module is ONLY
6
- * invoked when rename detection is explicitly enabled via --rename-detection
7
- * flag or PRISM_RENAME_DETECTION=true env var.
8
- *
9
- * When this module is NOT invoked, the strict-by-ID deterministic behavior
10
- * of v7.3.2 is preserved byte-for-byte.
11
- *
12
- * Algorithm: Greedy bipartite matching on composite similarity scores.
13
- * - Field overlap via Jaccard coefficient over non-ID field values
14
- * - Description similarity via normalized Levenshtein distance
15
- * - Greedy highest-score-first, one-to-one matching (no test matched twice)
16
- *
17
- * @module renameDetector
18
- */
19
- // ─── Constants ────────────────────────────────────────────────────────────────
20
- /** Minimum allowed threshold (too low = excessive false positives) */
21
- export const MIN_THRESHOLD = 0.50;
22
- /** Maximum allowed threshold (too high = nothing ever matches) */
23
- export const MAX_THRESHOLD = 0.95;
24
- /** Default similarity threshold when not specified */
25
- export const DEFAULT_THRESHOLD = 0.70;
26
- // ─── Similarity Functions ─────────────────────────────────────────────────────
27
- /**
28
- * Compute normalized Levenshtein distance between two strings.
29
- * Returns 0.0 (completely different) to 1.0 (identical).
30
- */
31
- export function levenshteinSimilarity(a, b) {
32
- if (a === b)
33
- return 1.0;
34
- if (a.length === 0 || b.length === 0)
35
- return 0.0;
36
- const maxLen = Math.max(a.length, b.length);
37
- // Wagner-Fischer algorithm for edit distance
38
- const prev = new Array(b.length + 1);
39
- const curr = new Array(b.length + 1);
40
- for (let j = 0; j <= b.length; j++)
41
- prev[j] = j;
42
- for (let i = 1; i <= a.length; i++) {
43
- curr[0] = i;
44
- for (let j = 1; j <= b.length; j++) {
45
- const cost = a[i - 1] === b[j - 1] ? 0 : 1;
46
- curr[j] = Math.min(curr[j - 1] + 1, // insertion
47
- prev[j] + 1, // deletion
48
- prev[j - 1] + cost);
49
- }
50
- // Swap rows
51
- for (let j = 0; j <= b.length; j++) {
52
- prev[j] = curr[j];
53
- }
54
- }
55
- const distance = prev[b.length];
56
- return 1.0 - distance / maxLen;
57
- }
58
- /**
59
- * Compute Jaccard similarity coefficient over the non-ID field values
60
- * of two TestAssertion objects.
61
- * Returns 0.0 (no overlap) to 1.0 (identical field values).
62
- */
63
- export function fieldJaccardSimilarity(a, b) {
64
- const keysA = Object.keys(a).filter(k => k !== 'id');
65
- const keysB = Object.keys(b).filter(k => k !== 'id');
66
- const allKeys = new Set([...keysA, ...keysB]);
67
- if (allKeys.size === 0)
68
- return 1.0; // Both empty — trivially identical
69
- let matches = 0;
70
- for (const key of allKeys) {
71
- const valA = JSON.stringify(a[key]);
72
- const valB = JSON.stringify(b[key]);
73
- if (valA === valB)
74
- matches++;
75
- }
76
- return matches / allKeys.size;
77
- }
78
- /**
79
- * Compute composite similarity between two TestAssertions.
80
- * Weights: 40% Jaccard field overlap + 60% Levenshtein on description.
81
- *
82
- * The heavier description weight reflects the observation that operators
83
- * typically rename tests when restructuring but preserve the intent —
84
- * the description carries the most semantic signal.
85
- */
86
- export function compositeSimilarity(removed, added) {
87
- const jaccard = fieldJaccardSimilarity(removed, added);
88
- const descSim = levenshteinSimilarity(removed.description || '', added.description || '');
89
- return 0.4 * jaccard + 0.6 * descSim;
90
- }
91
- /**
92
- * Compute field-level changed_keys between two TestAssertions (excluding id).
93
- */
94
- function computeChangedKeys(removed, added) {
95
- const allKeys = new Set([
96
- ...Object.keys(removed).filter(k => k !== 'id'),
97
- ...Object.keys(added).filter(k => k !== 'id'),
98
- ]);
99
- const changed = [];
100
- for (const key of allKeys) {
101
- if (JSON.stringify(removed[key]) !== JSON.stringify(added[key])) {
102
- changed.push(key);
103
- }
104
- }
105
- changed.sort();
106
- return changed;
107
- }
108
- // ─── Core Detection ───────────────────────────────────────────────────────────
109
- /**
110
- * Clamp and validate the threshold value.
111
- */
112
- export function clampThreshold(threshold) {
113
- return Math.max(MIN_THRESHOLD, Math.min(MAX_THRESHOLD, threshold));
114
- }
115
- /**
116
- * Detect probable renames between removed and added test assertion sets.
117
- *
118
- * Uses greedy bipartite matching: compute all pairwise similarity scores,
119
- * sort descending, and greedily assign one-to-one matches above threshold.
120
- * This is O(n*m) where n=|removed|, m=|added| — acceptable for test suites
121
- * which are typically <100 assertions.
122
- *
123
- * @param added Tests present locally but not in stored harness
124
- * @param removed Tests present in stored harness but not locally
125
- * @param threshold Minimum similarity to consider a rename (0.50-0.95)
126
- * @returns Detected renames and residual unmatched tests
127
- */
128
- export function detectRenames(added, removed, threshold = DEFAULT_THRESHOLD) {
129
- const effectiveThreshold = clampThreshold(threshold);
130
- // Edge case: nothing to match
131
- if (added.length === 0 || removed.length === 0) {
132
- return { renamed: [], residualAdded: [...added], residualRemoved: [...removed] };
133
- }
134
- const pairs = [];
135
- for (let ri = 0; ri < removed.length; ri++) {
136
- for (let ai = 0; ai < added.length; ai++) {
137
- const sim = compositeSimilarity(removed[ri], added[ai]);
138
- if (sim >= effectiveThreshold) {
139
- pairs.push({ removedIdx: ri, addedIdx: ai, similarity: sim });
140
- }
141
- }
142
- }
143
- // Step 2: Sort descending by similarity (greedy — highest first)
144
- pairs.sort((a, b) => b.similarity - a.similarity);
145
- // Step 3: Greedy one-to-one matching
146
- const matchedRemoved = new Set();
147
- const matchedAdded = new Set();
148
- const renamed = [];
149
- for (const pair of pairs) {
150
- if (matchedRemoved.has(pair.removedIdx) || matchedAdded.has(pair.addedIdx)) {
151
- continue; // Already matched — skip
152
- }
153
- matchedRemoved.add(pair.removedIdx);
154
- matchedAdded.add(pair.addedIdx);
155
- const removedTest = removed[pair.removedIdx];
156
- const addedTest = added[pair.addedIdx];
157
- renamed.push({
158
- removed: removedTest,
159
- added: addedTest,
160
- similarity: Math.round(pair.similarity * 1000) / 1000, // 3 decimal places
161
- changed_keys: computeChangedKeys(removedTest, addedTest),
162
- });
163
- }
164
- // Step 4: Build residuals — unmatched tests stay in added/removed
165
- const residualAdded = added.filter((_, i) => !matchedAdded.has(i));
166
- const residualRemoved = removed.filter((_, i) => !matchedRemoved.has(i));
167
- // Sort renamed by old_id for deterministic output within the heuristic domain
168
- renamed.sort((a, b) => a.removed.id.localeCompare(b.removed.id));
169
- return { renamed, residualAdded, residualRemoved };
170
- }