prism-mcp-server 5.1.0 → 5.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,295 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * ═══════════════════════════════════════════════════════════════════
4
+ * Universal History Importer — Strategy Pattern Orchestrator
5
+ * ═══════════════════════════════════════════════════════════════════
6
+ *
7
+ * REVIEWER NOTE — Architecture:
8
+ * This module serves two purposes:
9
+ * 1. LIBRARY: The `universalImporter()` function is importable for
10
+ * programmatic use (e.g., tests, future MCP tool integration).
11
+ * 2. CLI: The `runCLI()` function parses argv and invokes the library.
12
+ *
13
+ * The CLI entry point is guarded by an `isMain` check so importing
14
+ * this module in tests doesn't trigger `process.exit()`.
15
+ *
16
+ * CONVERSATION GROUPING (v5.2.1):
17
+ * Individual turns are grouped into logical conversations using a
18
+ * time-gap heuristic (default: 30 minutes of silence = new conversation).
19
+ * Each conversation is stored as ONE summary ledger entry, not per-turn.
20
+ * This prevents a 100MB import from creating 50,000 individual rows.
21
+ *
22
+ * DEDUPLICATION:
23
+ * Each conversation gets a deterministic ID based on adapter + start
24
+ * timestamp. Before writing, we check if that conversation_id already
25
+ * exists for user_id "universal-migration-tool". Re-running the same
26
+ * import is a no-op.
27
+ *
28
+ * CONCURRENCY:
29
+ * Uses `p-limit(5)` to cap parallel database writes. Without this,
30
+ * ingesting many conversations would saturate SQLite's write lock or
31
+ * exhaust Supabase connection pool limits.
32
+ *
33
+ * ADAPTER RESOLUTION:
34
+ * Priority: explicit --format= flag > canHandle() auto-detection.
35
+ * Auto-detection is filename-based (see each adapter's canHandle docs).
36
+ * For ambiguous files, --format= is mandatory.
37
+ * ═══════════════════════════════════════════════════════════════════
38
+ */
39
+ import { getStorage } from "../storage/index.js";
40
+ import { claudeAdapter } from "./migration/claudeAdapter.js";
41
+ import { geminiAdapter } from "./migration/geminiAdapter.js";
42
+ import { openaiAdapter } from "./migration/openaiAdapter.js";
43
+ import { sniffFormat } from "./migration/utils.js";
44
+ import pLimit from "p-limit";
45
+ // ── Adapter Registry ──────────────────────────────────────────────
46
+ // Order matters for auto-detection: Claude (.jsonl) is unambiguous,
47
+ // so it's checked first. Gemini/OpenAI both use .json, and are
48
+ // disambiguated by filename conventions (see canHandle docs).
49
+ // If filename detection fails, content-sniffing is used as a fallback.
50
+ const adapters = [claudeAdapter, geminiAdapter, openaiAdapter];
51
+ // ── Conversation Grouping Constants ──────────────────────────────
52
+ // A gap of 30+ minutes between turns signals a new conversation.
53
+ // This matches typical coding session patterns: developers take breaks,
54
+ // switch tasks, or come back the next day.
55
+ const CONVERSATION_GAP_MS = 30 * 60 * 1000; // 30 minutes
56
+ /**
57
+ * Build a human-readable summary from a group of conversation turns.
58
+ *
59
+ * Output format:
60
+ * [Imported] <first user message (truncated to 300 chars)>
61
+ *
62
+ * Conversation: X turns (Y user, Z assistant)
63
+ * Time range: <start> → <end>
64
+ * Tools used: tool1, tool2, ... (if any)
65
+ */
66
+ function buildConversationSummary(turns) {
67
+ const firstUserTurn = turns.find(t => t.role === 'user');
68
+ const turnCount = turns.length;
69
+ const userTurnCount = turns.filter(t => t.role === 'user').length;
70
+ const assistantTurnCount = turns.filter(t => t.role === 'assistant').length;
71
+ const startTime = turns[0].timestamp;
72
+ const endTime = turns[turns.length - 1].timestamp;
73
+ // Topic: first user message, truncated for readability
74
+ const topic = firstUserTurn
75
+ ? firstUserTurn.content.substring(0, 300).replace(/\n/g, ' ').trim()
76
+ : 'No user message';
77
+ // Collect all unique tools used across the conversation
78
+ const allTools = [...new Set(turns.flatMap(t => t.tools || []).filter(Boolean))];
79
+ const toolsSummary = allTools.length > 0 ? `\nTools used: ${allTools.join(', ')}` : '';
80
+ // Collect all unique files referenced
81
+ const allFiles = [...new Set(turns.flatMap(t => t.files_changed || []).filter(Boolean))];
82
+ const filesSummary = allFiles.length > 0 ? `\nFiles: ${allFiles.slice(0, 10).join(', ')}${allFiles.length > 10 ? ` (+${allFiles.length - 10} more)` : ''}` : '';
83
+ return `[Imported] ${topic}\n\n` +
84
+ `Conversation: ${turnCount} turns (${userTurnCount} user, ${assistantTurnCount} assistant)\n` +
85
+ `Time range: ${startTime} → ${endTime}` +
86
+ toolsSummary +
87
+ filesSummary;
88
+ }
89
+ /**
90
+ * Generate a deterministic conversation_id from adapter name + start timestamp.
91
+ * Ensures re-running the same import produces the same IDs for dedup.
92
+ */
93
+ function makeConversationId(adapterId, firstTimestamp) {
94
+ // Use epoch ms for uniqueness, but keep human-readable prefix
95
+ const epoch = new Date(firstTimestamp).getTime();
96
+ return `import-${adapterId}-${epoch}`;
97
+ }
98
+ /**
99
+ * Core migration function — importable for programmatic use.
100
+ *
101
+ * REVIEWER NOTE — Conversation Grouping Pipeline:
102
+ * 1. Adapter streams individual turns via onTurn callback
103
+ * 2. Turns are buffered and grouped by 30-min time gaps
104
+ * 3. Each conversation group is summarized into ONE ledger entry
105
+ * 4. Duplicate conversations (same conversation_id) are skipped
106
+ *
107
+ * This means a 100MB file with 200 conversations → 200 ledger entries,
108
+ * NOT 50,000 individual turn rows.
109
+ */
110
+ export async function universalImporter(options) {
111
+ const { path: filePathArg, format: formatArg, project: projectArg, dryRun, verbose } = options;
112
+ // ── Adapter Resolution (Three-Stage Pipeline) ───────────────────
113
+ // Stage 1: Explicit --format= flag (highest priority, always correct)
114
+ // Stage 2: Filename-based canHandle() heuristic (fast, reliable for .jsonl)
115
+ // Stage 3: Content-sniffing fallback (reads first 4KB to detect markers)
116
+ let adapter;
117
+ if (formatArg) {
118
+ // Stage 1: Explicit format flag
119
+ adapter = adapters.find((a) => a.id === formatArg);
120
+ }
121
+ if (!adapter) {
122
+ // Stage 2: Filename-based auto-detection
123
+ adapter = adapters.find((a) => a.canHandle(filePathArg));
124
+ }
125
+ if (!adapter) {
126
+ // Stage 3: Content-sniffing fallback
127
+ const sniffed = sniffFormat(filePathArg);
128
+ if (sniffed) {
129
+ adapter = adapters.find((a) => a.id === sniffed);
130
+ if (adapter) {
131
+ console.log(`🔍 Auto-detected format: ${sniffed} (via content sniffing)`);
132
+ }
133
+ }
134
+ }
135
+ if (!adapter) {
136
+ throw new Error(`Could not determine adapter for file: ${filePathArg}. Use --format to specify.`);
137
+ }
138
+ console.log(`🚀 Starting migration from ${adapter.id} to Prism...`);
139
+ if (dryRun)
140
+ console.log("⚠️ DRY RUN MODE - storage writes disabled.");
141
+ // ── Storage + Concurrency ──────────────────────────────────────
142
+ const storage = await getStorage();
143
+ const limit = pLimit(5);
144
+ let successCount = 0;
145
+ let skipCount = 0;
146
+ let failCount = 0;
147
+ let conversationCount = 0;
148
+ // ── Conversation Grouping Buffer ───────────────────────────────
149
+ // Accumulates turns until a time gap > 30 min is detected,
150
+ // then flushes the buffer as one conversation summary.
151
+ let conversationBuffer = [];
152
+ let lastTurnTime = null;
153
+ /**
154
+ * Flush the current conversation buffer as a single ledger entry.
155
+ * Called when a time gap is detected or at end-of-file.
156
+ */
157
+ async function flushConversation() {
158
+ if (conversationBuffer.length === 0)
159
+ return;
160
+ const turns = conversationBuffer;
161
+ conversationBuffer = []; // Reset buffer immediately
162
+ const targetProject = projectArg || turns[0].project || "default";
163
+ const conversationId = makeConversationId(adapter.id, turns[0].timestamp);
164
+ const summary = buildConversationSummary(turns);
165
+ // Collect metadata from all turns
166
+ const allTools = [...new Set(turns.flatMap(t => t.tools || []).filter(Boolean))];
167
+ const allFiles = [...new Set(turns.flatMap(t => t.files_changed || []).filter(Boolean))];
168
+ const sessionDate = turns[0].timestamp.split("T")[0]; // YYYY-MM-DD
169
+ conversationCount++;
170
+ if (verbose) {
171
+ const turnCount = turns.length;
172
+ console.log(`📦 Conversation #${conversationCount}: ${turnCount} turns (${sessionDate}) → ${conversationId}`);
173
+ }
174
+ if (dryRun) {
175
+ successCount += turns.length;
176
+ return;
177
+ }
178
+ try {
179
+ // ── Deduplication Check ──────────────────────────────────────
180
+ // Query existing entries with the same deterministic conversation_id
181
+ // and migration user_id. If found, skip — this conversation was
182
+ // already imported in a previous run.
183
+ const existing = await storage.getLedgerEntries({
184
+ conversation_id: `eq.${conversationId}`,
185
+ user_id: 'eq.universal-migration-tool',
186
+ limit: '1',
187
+ });
188
+ if (existing.length > 0) {
189
+ skipCount += turns.length;
190
+ if (verbose) {
191
+ console.log(`⏭️ Skipping duplicate: ${conversationId}`);
192
+ }
193
+ return;
194
+ }
195
+ // ── Store Single Summary Entry ──────────────────────────────
196
+ await limit(() => storage.saveLedger({
197
+ project: targetProject,
198
+ conversation_id: conversationId,
199
+ user_id: "universal-migration-tool",
200
+ role: "global",
201
+ summary,
202
+ created_at: turns[0].timestamp,
203
+ session_date: sessionDate,
204
+ todos: [],
205
+ files_changed: allFiles,
206
+ keywords: allTools,
207
+ }));
208
+ successCount += turns.length;
209
+ }
210
+ catch (err) {
211
+ failCount += turns.length;
212
+ if (verbose)
213
+ console.error(`Failed to ingest conversation ${conversationId}:`, err);
214
+ }
215
+ }
216
+ try {
217
+ // ── Streaming Parse + Conversation Windowing ──────────────────
218
+ // Memory usage: O(turns_per_conversation), NOT O(file_size).
219
+ // A typical conversation is 20-200 turns — easily fits in memory.
220
+ await adapter.parse(filePathArg, async (turn) => {
221
+ const turnTime = new Date(turn.timestamp);
222
+ // Detect conversation boundary: time gap > 30 min
223
+ if (lastTurnTime && (turnTime.getTime() - lastTurnTime.getTime()) > CONVERSATION_GAP_MS) {
224
+ await flushConversation();
225
+ }
226
+ conversationBuffer.push(turn);
227
+ lastTurnTime = turnTime;
228
+ });
229
+ // ── Final Flush ──────────────────────────────────────────────
230
+ // Flush the last conversation (no trailing time gap to trigger it)
231
+ await flushConversation();
232
+ console.log("\n✅ Migration complete!");
233
+ console.log(` Conversations: ${conversationCount}`);
234
+ console.log(` Turns processed: ${successCount}`);
235
+ if (skipCount > 0)
236
+ console.log(` Skipped (dup): ${skipCount}`);
237
+ if (failCount > 0)
238
+ console.log(` Failed: ${failCount}`);
239
+ return { successCount, failCount, skipCount, conversationCount };
240
+ }
241
+ catch (err) {
242
+ console.error("\n❌ Fatal error during migration:", err);
243
+ throw err;
244
+ }
245
+ finally {
246
+ // ── Cleanup ────────────────────────────────────────────────────
247
+ // Close DB handle if running as standalone CLI (not in server context).
248
+ if (typeof storage.close === 'function') {
249
+ await storage.close();
250
+ }
251
+ }
252
+ }
253
+ // ═══════════════════════════════════════════════════════════════════
254
+ // CLI Entry Point — only runs when invoked directly via `node`
255
+ // ═══════════════════════════════════════════════════════════════════
256
+ async function runCLI() {
257
+ const args = process.argv.slice(2);
258
+ const filePathArg = args.find((a) => !a.startsWith("-"));
259
+ const formatArg = args.find((a) => a.startsWith("--format="))?.split("=")[1];
260
+ const projectArg = args.find((a) => a.startsWith("--project="))?.split("=")[1];
261
+ const dryRun = args.includes("--dry-run") || args.includes("-d");
262
+ const verbose = args.includes("--verbose") || args.includes("-v");
263
+ if (!filePathArg) {
264
+ console.log(`
265
+ Prism Universal History Importer
266
+ Usage: node universalImporter.js <file> [options]
267
+
268
+ Options:
269
+ --format=<claude|gemini|openai> Force a specific format adapter
270
+ --project=<name> Override target project name (default: "default")
271
+ --dry-run, -d Process and validate without saving to storage
272
+ --verbose, -v Print detailed turn information during processing
273
+ `);
274
+ process.exit(0);
275
+ }
276
+ try {
277
+ await universalImporter({
278
+ path: filePathArg,
279
+ format: formatArg,
280
+ project: projectArg,
281
+ dryRun,
282
+ verbose
283
+ });
284
+ }
285
+ catch (err) {
286
+ process.exit(1);
287
+ }
288
+ }
289
+ // ── Main Guard ─────────────────────────────────────────────────────
290
+ // Only invoke CLI when this file is the direct entry point.
291
+ // Importing this module from tests or other code won't trigger CLI.
292
+ const isMain = process.argv[1]?.includes('universalImporter');
293
+ if (isMain) {
294
+ runCLI();
295
+ }
package/package.json CHANGED
@@ -1,13 +1,14 @@
1
1
  {
2
2
  "name": "prism-mcp-server",
3
- "version": "5.1.0",
3
+ "version": "5.2.0",
4
4
  "mcpName": "io.github.dcostenco/prism-mcp",
5
5
  "description": "The Mind Palace for AI Agents — persistent memory (SQLite/Supabase), behavioral learning & IDE rules sync, multimodal VLM image captioning, pluggable LLM providers (OpenAI/Anthropic/Gemini/Ollama), OpenTelemetry distributed tracing, GDPR export, multi-agent Hivemind sync, time travel, visual Mind Palace dashboard. Zero-config local mode.",
6
6
  "module": "index.ts",
7
7
  "type": "module",
8
8
  "main": "dist/server.js",
9
9
  "bin": {
10
- "prism-mcp-server": "dist/server.js"
10
+ "prism-mcp-server": "dist/server.js",
11
+ "prism-import": "dist/utils/universalImporter.js"
11
12
  },
12
13
  "files": [
13
14
  "dist"
@@ -19,7 +20,8 @@
19
20
  "test:watch": "vitest",
20
21
  "test:load": "vitest run tests/load/",
21
22
  "test:ci": "vitest run --reporter=junit --outputFile=test-results.xml",
22
- "test:mcp": "node ./test_cross_mcp.js"
23
+ "test:mcp": "node ./test_cross_mcp.js",
24
+ "import": "node dist/utils/universalImporter.js"
23
25
  },
24
26
  "keywords": [
25
27
  "mcp",
@@ -95,6 +97,8 @@
95
97
  "dotenv": "^16.5.0",
96
98
  "fflate": "^0.8.2",
97
99
  "openai": "^6.32.0",
98
- "quickjs-emscripten": "^0.32.0"
100
+ "p-limit": "^7.3.0",
101
+ "quickjs-emscripten": "^0.32.0",
102
+ "stream-json": "^2.0.0"
99
103
  }
100
104
  }