npm - prism-mcp-server - Versions diffs - 4.6.1 → 5.2.0 - Mend

prism-mcp-server 4.6.1 → 5.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/README.md +408 -1306
package/dist/dashboard/server.js +391 -22
package/dist/dashboard/ui.js +363 -17
package/dist/server.js +15 -2
package/dist/storage/sqlite.js +277 -6
package/dist/storage/supabase.js +58 -0
package/dist/storage/supabaseMigrations.js +104 -1
package/dist/tools/compactionHandler.js +17 -7
package/dist/tools/index.js +2 -2
package/dist/tools/sessionMemoryDefinitions.js +70 -0
package/dist/tools/sessionMemoryHandlers.js +167 -9
package/dist/utils/migration/claudeAdapter.js +131 -0
package/dist/utils/migration/geminiAdapter.js +87 -0
package/dist/utils/migration/openaiAdapter.js +88 -0
package/dist/utils/migration/types.js +18 -0
package/dist/utils/migration/utils.js +99 -0
package/dist/utils/testUniversalImporter.js +10 -0
package/dist/utils/turboquant.js +730 -0
package/dist/utils/universalImporter.js +295 -0
package/package.json +8 -4

package/dist/tools/sessionMemoryHandlers.js CHANGED Viewed

@@ -37,7 +37,9 @@ isKnowledgeSetRetentionArgs, // v3.1: TTL retention policy type guard
 // v4.0: Active Behavioral Memory type guards
 isSessionSaveExperienceArgs, isKnowledgeVoteArgs,
 // v4.2: Sync Rules type guard
-isKnowledgeSyncRulesArgs, } from "./sessionMemoryDefinitions.js";
+isKnowledgeSyncRulesArgs,
+// v5.1: Deep Storage Mode type guard
+isDeepStoragePurgeArgs, } from "./sessionMemoryDefinitions.js";
 // v4.2: File system access for knowledge_sync_rules
 import { readFile, writeFile, mkdir } from "node:fs/promises";
 import { existsSync } from "node:fs";
@@ -105,9 +107,26 @@ export async function sessionSaveLedgerHandler(args) {
         if (entryId) {
             getLLMProvider().generateEmbedding(embeddingText)
                 .then(async (embedding) => {
-                await storage.patchLedger(entryId, {
+                // Build atomic patch — float32 + TurboQuant in ONE DB update
+                const patchData = {
                     embedding: JSON.stringify(embedding),
-                });
+                };
+                // TurboQuant: compress alongside float32 (non-fatal)
+                try {
+                    const { getDefaultCompressor, serialize } = await import("../utils/turboquant.js");
+                    const compressor = getDefaultCompressor();
+                    const compressed = compressor.compress(embedding);
+                    const buf = serialize(compressed);
+                    patchData.embedding_compressed = buf.toString("base64");
+                    patchData.embedding_format = `turbo${compressor.bits}`;
+                    patchData.embedding_turbo_radius = compressed.radius;
+                    debugLog(`[session_save_ledger] TurboQuant compressed: ${buf.length} bytes (${(3072 / buf.length).toFixed(1)}× ratio)`);
+                }
+                catch (turboErr) {
+                    console.error(`[session_save_ledger] TurboQuant compression failed (non-fatal): ${turboErr.message}`);
+                }
+                // Single atomic DB update for all embedding data
+                await storage.patchLedger(entryId, patchData);
                 debugLog(`[session_save_ledger] Embedding saved for entry ${entryId}`);
             })
                 .catch((err) => {
@@ -810,9 +829,12 @@ export async function sessionSearchMemoryHandler(args) {
     const { query, project, limit = 5, similarity_threshold = 0.7,
     // Phase 1: enable_trace defaults to false for full backward compatibility.
     // When true, a MemoryTrace JSON block is appended as content[1].
-    enable_trace = false, } = args;
+    enable_trace = false,
+    // v5.2: Context-Weighted Retrieval — biases search toward active work context
+    context_boost = false, } = args;
     debugLog(`[session_search_memory] Semantic search: query="${query}", ` +
-        `project=${project || "all"}, limit=${limit}, threshold=${similarity_threshold}`);
+        `project=${project || "all"}, limit=${limit}, threshold=${similarity_threshold}` +
+        `${context_boost ? ", context_boost=ON" : ""}`);
     // Phase 1: Start total latency timer BEFORE any work (embedding + storage)
     const totalStart = performance.now();
     // Step 1: Generate embedding for the search query
@@ -831,8 +853,43 @@ export async function sessionSearchMemoryHandler(args) {
     // Phase 1: Start embedding latency timer — isolates Gemini API call time.
     // This is the most variable component: 50ms on a good day, 2000ms under load.
     const embeddingStart = performance.now();
+    // ── v5.2: Context-Weighted Retrieval ───────────────────────────
+    // When context_boost is enabled, prepend active project context to the
+    // search query before embedding generation. This naturally biases the
+    // embedding vector toward memories from the same project/branch/context.
+    // Elegant: no scoring heuristics needed — semantics do the work.
+    let effectiveQuery = query;
+    if (context_boost && project) {
+        try {
+            const storage = await getStorage();
+            const ctx = await storage.loadContext(project, "quick", PRISM_USER_ID);
+            const contextParts = [];
+            if (ctx && typeof ctx === "object") {
+                const ctxObj = ctx;
+                if (ctxObj.active_branch)
+                    contextParts.push(`branch: ${ctxObj.active_branch}`);
+                if (ctxObj.key_context)
+                    contextParts.push(`context: ${String(ctxObj.key_context).substring(0, 200)}`);
+                const keywords = ctxObj.keywords;
+                if (keywords?.length)
+                    contextParts.push(`keywords: ${keywords.slice(0, 5).join(", ")}`);
+            }
+            if (contextParts.length > 0) {
+                effectiveQuery = `[${contextParts.join("; ")}] ${query}`;
+                debugLog(`[session_search_memory] Context boost applied: "${effectiveQuery.substring(0, 100)}..."`);
+            }
+        }
+        catch {
+            // Context load failed — proceed with unmodified query (graceful degradation)
+            debugLog("[session_search_memory] Context boost failed (non-fatal) — using original query");
+        }
+    }
+    else if (context_boost && !project) {
+        // User enabled context_boost but didn't specify a project — can't boost without context
+        debugLog("[session_search_memory] context_boost ignored — requires a project parameter to load context");
+    }
     try {
-        queryEmbedding = await getLLMProvider().generateEmbedding(query);
+        queryEmbedding = await getLLMProvider().generateEmbedding(effectiveQuery);
     }
     catch (err) {
         return {
@@ -893,14 +950,40 @@ export async function sessionSearchMemoryHandler(args) {
             }
             return { content: contentBlocks, isError: false };
         }
-        // Format results with similarity scores
+        // ── v5.2: Dynamic Importance Decay (Ebbinghaus Curve) ──────
+        // Compute effective_importance at retrieval time:
+        //   effective = base_importance * 0.95^days_since_accessed
+        // This avoids background workers — decay is a pure function of time.
+        // Also fire-and-forget update last_accessed_at on all returned results.
+        const now = new Date();
+        const resultIds = results.map((r) => r.id).filter(Boolean);
+        // Fire-and-forget: update last_accessed_at for all returned results
+        if (resultIds.length > 0) {
+            const nowISO = now.toISOString();
+            for (const id of resultIds) {
+                storage.patchLedger(id, { last_accessed_at: nowISO }).catch(() => { });
+            }
+        }
+        // Format results with similarity scores + effective importance
         const formatted = results.map((r, i) => {
             const score = typeof r.similarity === "number"
                 ? `${(r.similarity * 100).toFixed(1)}%`
                 : "N/A";
+            // Dynamic importance decay: effective = base * 0.95^days
+            const baseImportance = r.importance ?? 0;
+            let effectiveImportance = baseImportance;
+            if (baseImportance > 0) {
+                const lastAccess = r.last_accessed_at || r.created_at || now.toISOString();
+                const daysSince = Math.max(0, (now.getTime() - new Date(lastAccess).getTime()) / 86400000);
+                effectiveImportance = Math.round(baseImportance * Math.pow(0.95, daysSince) * 100) / 100;
+            }
+            const importanceStr = baseImportance > 0
+                ? `  Importance: ${effectiveImportance}${effectiveImportance !== baseImportance ? ` (base: ${baseImportance}, decayed)` : ""}\n`
+                : "";
             return `[${i + 1}] ${score} similar — ${r.session_date || "unknown date"}\n` +
                 `  Project: ${r.project}\n` +
                 `  Summary: ${r.summary}\n` +
+                importanceStr +
                 (r.decisions?.length ? `  Decisions: ${r.decisions.join("; ")}\n` : "") +
                 (r.files_changed?.length ? `  Files: ${r.files_changed.join(", ")}\n` : "");
         }).join("\n");
@@ -1022,9 +1105,24 @@ export async function backfillEmbeddingsHandler(args) {
                 continue;
             }
             const embedding = await getLLMProvider().generateEmbedding(textToEmbed);
-            await storage.patchLedger(e.id, {
+            // Build atomic patch — float32 + TurboQuant in ONE DB update
+            const patchData = {
                 embedding: JSON.stringify(embedding),
-            });
+            };
+            // TurboQuant: compress alongside repair (non-fatal)
+            try {
+                const { getDefaultCompressor, serialize } = await import("../utils/turboquant.js");
+                const compressor = getDefaultCompressor();
+                const compressed = compressor.compress(embedding);
+                const buf = serialize(compressed);
+                patchData.embedding_compressed = buf.toString("base64");
+                patchData.embedding_format = `turbo${compressor.bits}`;
+                patchData.embedding_turbo_radius = compressed.radius;
+            }
+            catch (turboErr) {
+                debugLog(`[backfill] TurboQuant compression failed for ${e.id} (non-fatal): ${turboErr.message}`);
+            }
+            await storage.patchLedger(e.id, patchData);
             repaired++;
             debugLog(`[backfill] ✅ Repaired ${e.id} (${e.project})`);
         }
@@ -2066,3 +2164,63 @@ export async function sessionExportMemoryHandler(args) {
         };
     }
 }
+// ─── v5.1: Deep Storage Mode (The Purge) ──────────────────────
+//
+// REVIEWER NOTE: This handler is the storage optimization payoff of v5.0.
+// After TurboQuant backfill, old entries have BOTH float32 (3KB) and
+// compressed (400B) representations. This tool NULLs out the float32
+// column for entries old enough that Tier-1 native search value is minimal.
+//
+// HANDLER PATTERN:
+//   1. Validate args via isDeepStoragePurgeArgs (imported from definitions)
+//   2. Apply defaults (older_than_days=30, dry_run=false)
+//   3. Delegate to storage.purgeHighPrecisionEmbeddings()
+//   4. Format response with human-readable byte counts
+//
+// NO SERVER REF NEEDED: Unlike sessionSaveHandoffHandler, this tool
+// doesn't modify any subscribed resource — no notification needed.
+export async function deepStoragePurgeHandler(args) {
+    if (!isDeepStoragePurgeArgs(args)) {
+        throw new Error("Invalid arguments for deep_storage_purge");
+    }
+    const olderThanDays = args.older_than_days ?? 30;
+    const dryRun = args.dry_run ?? false;
+    debugLog(`[deep_storage_purge] ${dryRun ? "DRY RUN" : "EXECUTING"}: ` +
+        `olderThanDays=${olderThanDays}, project=${args.project || "all"}`);
+    const storage = await getStorage();
+    const result = await storage.purgeHighPrecisionEmbeddings({
+        project: args.project,
+        olderThanDays,
+        dryRun,
+        userId: PRISM_USER_ID,
+    });
+    // Format bytes as human-readable MB with 2 decimal places
+    const mbs = (result.reclaimedBytes / (1024 * 1024)).toFixed(2);
+    if (dryRun) {
+        return {
+            content: [{
+                    type: "text",
+                    text: `🔍 **Deep Storage Purge — DRY RUN**\n\n` +
+                        `Eligible entries: **${result.eligible}**\n` +
+                        `Estimated space to reclaim: **${result.reclaimedBytes.toLocaleString()} bytes** (~${mbs} MB)\n\n` +
+                        (args.project ? `Project: \`${args.project}\`\n` : `Scope: all projects\n`) +
+                        `Age threshold: entries older than ${olderThanDays} days\n\n` +
+                        `To execute the purge, call again with \`dry_run: false\`.`,
+                }],
+            isError: false,
+        };
+    }
+    return {
+        content: [{
+                type: "text",
+                text: `✅ **Deep Storage Purge Complete**\n\n` +
+                    `Purged entries: **${result.purged}**\n` +
+                    `Reclaimed space: **${result.reclaimedBytes.toLocaleString()} bytes** (~${mbs} MB)\n\n` +
+                    (args.project ? `Project: \`${args.project}\`\n` : `Scope: all projects\n`) +
+                    `Age threshold: entries older than ${olderThanDays} days\n\n` +
+                    `💡 Tier-2 (TurboQuant) and Tier-3 (FTS5) search remain fully functional.\n` +
+                    `Tier-1 (native sqlite-vec) search will skip these entries — this is expected.`,
+            }],
+        isError: false,
+    };
+}

package/dist/utils/migration/claudeAdapter.js ADDED Viewed

@@ -0,0 +1,131 @@
+/**
+ * ═══════════════════════════════════════════════════════════════════
+ * Claude Code JSONL Adapter
+ * ═══════════════════════════════════════════════════════════════════
+ *
+ * REVIEWER NOTE — Claude Code Streaming Deduplication:
+ *   Claude Code does NOT write one clean JSON line per turn. It writes
+ *   to the JSONL file DURING streaming. This means you see multiple
+ *   JSON lines for the exact same `message.id` as the response streams in.
+ *
+ *   If we processed every `type: assistant` line blindly, we'd ingest
+ *   highly fragmented or duplicate entries. The solution is to aggregate
+ *   by `message.id` and only flush the LATEST version of each assistant
+ *   message when a user message arrives (or at end-of-file).
+ *
+ * STREAMING STRATEGY:
+ *   Uses Node's readline interface for true line-by-line processing.
+ *   Memory usage is O(pending_assistant_chunks), not O(file_size).
+ *   For a typical session, pending chunks rarely exceed 2-3 entries.
+ *
+ * SOURCE FORMAT (simplified):
+ *   { type: "assistant", message: { id: "msg_xxx", content: [...] }, timestamp: "..." }
+ *   { type: "user", content: "...", timestamp: "..." }
+ * ═══════════════════════════════════════════════════════════════════
+ */
+import fs from 'node:fs';
+import readline from 'node:readline';
+import { normalizeContent } from './utils.js';
+export const claudeAdapter = {
+    id: 'claude',
+    /**
+     * Claude Code uses .jsonl (JSON Lines) format exclusively.
+     * This is a reliable heuristic — no other major LLM uses .jsonl for exports.
+     */
+    canHandle(filePath) {
+        return filePath.endsWith('.jsonl');
+    },
+    async parse(filePath, onTurn) {
+        const fileStream = fs.createReadStream(filePath);
+        const rl = readline.createInterface({
+            input: fileStream,
+            crlfDelay: Infinity, // Handle both \n and \r\n line endings
+        });
+        // ── Deduplication Buffer ──────────────────────────────────────
+        // Accumulates assistant chunks by message.id. When a user message
+        // arrives, we flush all pending assistant messages (keeping only
+        // the latest content for each ID) and then emit the user message.
+        const pendingAssistantChunks = new Map();
+        for await (const line of rl) {
+            if (!line.trim())
+                continue; // Skip blank lines
+            try {
+                const entry = JSON.parse(line);
+                // ── Role Detection ─────────────────────────────────────────
+                // Claude Code logs have two role indicators:
+                //   1. `entry.type` (top-level) — "assistant" or "user"
+                //   2. `entry.message.role` (nested) — "assistant" or "user"
+                // We check both for robustness.
+                const role = entry.type === 'assistant' || entry.message?.role === 'assistant' ? 'assistant' : 'user';
+                // ── Content Extraction ─────────────────────────────────────
+                // Content can be at `entry.content` or nested at `entry.message.content`.
+                // Both may be strings or arrays of content blocks.
+                const content = normalizeContent(entry.content || entry.message?.content || "");
+                const timestamp = entry.timestamp || new Date().toISOString();
+                // ── Message ID for Deduplication ───────────────────────────
+                // Claude logs may use `entry.id`, `entry.message.id`, or `entry.requestId`.
+                // Any of these can serve as the deduplication key.
+                const messageId = entry.id || entry.message?.id || entry.requestId;
+                if (role === 'assistant' && messageId) {
+                    // ── Streaming Chunk Aggregation ────────────────────────────
+                    // For assistant messages with an ID, we DON'T emit immediately.
+                    // Instead, we overwrite the buffer entry — the last chunk for a
+                    // given ID contains the complete content (Claude rewrites the
+                    // full message in the final streaming chunk).
+                    pendingAssistantChunks.set(messageId, { content, tools: [], timestamp });
+                    continue;
+                }
+                // ── User Message: Flush Pending Assistants ──────────────────
+                // A user message signals the end of the previous assistant turn.
+                // Flush all pending assistant chunks before emitting the user turn.
+                if (role === 'user') {
+                    for (const [id, msg] of pendingAssistantChunks) {
+                        await onTurn({
+                            role: 'assistant',
+                            content: msg.content,
+                            timestamp: msg.timestamp,
+                            sessionId: 'claude-migration',
+                            project: 'default',
+                            todos: [],
+                            files_changed: [],
+                            messageId: id,
+                            tools: msg.tools,
+                        });
+                    }
+                    pendingAssistantChunks.clear();
+                    await onTurn({
+                        role: 'user',
+                        content,
+                        timestamp,
+                        sessionId: 'claude-migration',
+                        project: 'default',
+                        todos: [],
+                        files_changed: [],
+                        messageId,
+                    });
+                }
+            }
+            catch (e) {
+                // ── Malformed Line Handling ──────────────────────────────────
+                // Skip lines that fail JSON parsing. This is expected for
+                // corrupted exports or partial writes during Claude crashes.
+            }
+        }
+        // ── Final Flush ──────────────────────────────────────────────────
+        // If the file ends with assistant messages (no trailing user message),
+        // we must flush any remaining pending chunks.
+        for (const [id, msg] of pendingAssistantChunks) {
+            await onTurn({
+                role: 'assistant',
+                content: msg.content,
+                timestamp: msg.timestamp,
+                sessionId: 'claude-migration',
+                project: 'default',
+                todos: [],
+                files_changed: [],
+                messageId: id,
+                tools: msg.tools,
+            });
+        }
+    },
+};

package/dist/utils/migration/geminiAdapter.js ADDED Viewed

@@ -0,0 +1,87 @@
+/**
+ * ═══════════════════════════════════════════════════════════════════
+ * Gemini History JSON Adapter
+ * ═══════════════════════════════════════════════════════════════════
+ *
+ * REVIEWER NOTE — Streaming Large JSON Arrays:
+ *   Gemini exports history as a single JSON array (not JSONL).
+ *   A naive `JSON.parse(fs.readFileSync(...))` would load the entire
+ *   file into memory — OOM for 100MB+ exports.
+ *
+ *   We use `stream-json/StreamArray` to parse array elements one at a
+ *   time in streaming fashion. Memory usage is O(1) per entry.
+ *
+ * ROLE MAPPING:
+ *   Gemini uses "model" for assistant responses (not "assistant").
+ *   We normalize this to "assistant" for Prism's unified schema.
+ *
+ * TIMESTAMP FALLBACK:
+ *   Gemini SDK history arrays often lack per-turn timestamps.
+ *   We fall back to `createTime` (if present) or current time.
+ *   The orchestrator may override timestamps via ensureChronology.
+ *
+ * SOURCE FORMAT (simplified):
+ *   [
+ *     { role: "user", parts: [{ text: "..." }] },
+ *     { role: "model", parts: [{ text: "..." }] }
+ *   ]
+ * ═══════════════════════════════════════════════════════════════════
+ */
+import fs from 'node:fs';
+import { chain } from 'stream-chain';
+import StreamArray from 'stream-json/streamers/stream-array.js';
+import { normalizeContent } from './utils.js';
+export const geminiAdapter = {
+    id: 'gemini',
+    /**
+     * Auto-detection heuristic for Gemini files.
+     *
+     * REVIEWER NOTE — canHandle Overlap (Finding 1):
+     *   Both Gemini and OpenAI use .json. To disambiguate without content sniffing,
+     *   we use a filename convention: if the path contains "openai" or "chatgpt",
+     *   we defer to the OpenAI adapter. Otherwise, .json files default to Gemini.
+     *
+     *   For production use, users should ALWAYS use --format= to avoid ambiguity.
+     *   This heuristic is a convenience fallback only.
+     */
+    canHandle(filePath) {
+        const lower = filePath.toLowerCase();
+        return lower.endsWith('.json') && !lower.includes('openai') && !lower.includes('chatgpt');
+    },
+    async parse(filePath, onTurn) {
+        // ── Streaming Pipeline ────────────────────────────────────────
+        // `StreamArray.withParser()` combines the JSON parser + array streamer
+        // into a single transform. Each emitted object has { key, value }
+        // where `key` is the array index and `value` is the parsed element.
+        const pipeline = chain([
+            fs.createReadStream(filePath),
+            StreamArray.withParser(),
+        ]);
+        for await (const { value: entry } of pipeline) {
+            // ── Role Normalization ──────────────────────────────────────
+            // Gemini uses 'model' for AI responses; some exports may use 'assistant'.
+            // Both are mapped to 'assistant' in the normalized schema.
+            const role = entry.role === 'model' || entry.role === 'assistant' ? 'assistant' : 'user';
+            // ── Content Extraction ──────────────────────────────────────
+            // Gemini stores content in `parts` (array of { text: '...' } objects).
+            // Falls back to `entry.content` for non-standard exports.
+            const content = normalizeContent(entry.parts || entry.content || "");
+            // ── Timestamp Fallback Chain ────────────────────────────────
+            // Priority: entry.timestamp > entry.createTime > now()
+            // REVIEWER NOTE: Using Date.now() as final fallback means all turns
+            // without timestamps get the SAME timestamp — the orchestrator's
+            // session_date splitting may group them incorrectly. This is a known
+            // acceptable tradeoff for the initial implementation.
+            const timestamp = entry.timestamp || entry.createTime || new Date().toISOString();
+            await onTurn({
+                role,
+                content,
+                timestamp,
+                sessionId: 'gemini-migration',
+                project: 'default',
+                todos: [],
+                files_changed: [],
+            });
+        }
+    },
+};

package/dist/utils/migration/openaiAdapter.js ADDED Viewed

@@ -0,0 +1,88 @@
+/**
+ * ═══════════════════════════════════════════════════════════════════
+ * OpenAI / ChatGPT History JSON Adapter
+ * ═══════════════════════════════════════════════════════════════════
+ *
+ * REVIEWER NOTE — Tool Call Normalization:
+ *   OpenAI's chat completion format includes structured `tool_calls`
+ *   arrays on assistant messages. These contain function names, arguments,
+ *   and call IDs. Since Prism's ledger stores content as plain text,
+ *   we inline tool calls as readable markers: `[Tool Use: function_name]`.
+ *
+ *   The original tool names are also preserved in `NormalizedTurn.tools[]`
+ *   for keyword indexing in the Mind Palace.
+ *
+ * TIMESTAMP HANDLING:
+ *   OpenAI uses Unix epoch seconds in `created_at` (not milliseconds).
+ *   We convert: `new Date(created_at * 1000).toISOString()`.
+ *   Standard ISO timestamps in `entry.timestamp` take priority.
+ *
+ * SOURCE FORMAT (simplified):
+ *   [
+ *     { role: "user", content: "..." },
+ *     { role: "assistant", content: "...", tool_calls: [{ function: { name: "..." } }] }
+ *   ]
+ * ═══════════════════════════════════════════════════════════════════
+ */
+import fs from 'node:fs';
+import { chain } from 'stream-chain';
+import StreamArray from 'stream-json/streamers/stream-array.js';
+import { normalizeContent } from './utils.js';
+export const openaiAdapter = {
+    id: 'openai',
+    /**
+     * Auto-detection heuristic for OpenAI/ChatGPT files.
+     *
+     * REVIEWER NOTE — canHandle Strategy:
+     *   Matches files with "openai" or "chatgpt" anywhere in the path.
+     *   This is intentionally broad — ChatGPT export filenames vary widely.
+     *   For ambiguous files (e.g., `history.json`), users MUST use --format=openai.
+     */
+    canHandle(filePath) {
+        const lower = filePath.toLowerCase();
+        return lower.includes('openai') || lower.includes('chatgpt');
+    },
+    async parse(filePath, onTurn) {
+        // ── Streaming Pipeline ────────────────────────────────────────
+        // Same OOM-safe pattern as geminiAdapter. See that file for details.
+        const pipeline = chain([
+            fs.createReadStream(filePath),
+            StreamArray.withParser(),
+        ]);
+        for await (const { value: entry } of pipeline) {
+            // ── Role Normalization ──────────────────────────────────────
+            // OpenAI also has 'system' and 'tool' roles — we skip those.
+            // Only 'user' and 'assistant' turns are meaningful for migration.
+            const role = entry.role === 'assistant' ? 'assistant' : 'user';
+            let content = normalizeContent(entry.content || "");
+            // ── Tool Call Inlining ──────────────────────────────────────
+            // Convert structured tool_calls into human-readable content markers.
+            // This preserves the semantic intent while keeping storage as plain text.
+            if (entry.tool_calls) {
+                const tools = entry.tool_calls
+                    .map((tc) => `[Tool Use: ${tc.function?.name || tc.id}]`)
+                    .join("\n");
+                content = `${content}\n${tools}`.trim();
+            }
+            // ── Timestamp Fallback Chain ────────────────────────────────
+            // Priority: entry.timestamp (ISO) > entry.created_at (Unix epoch) > now()
+            // REVIEWER NOTE: OpenAI's `created_at` is in SECONDS, not milliseconds.
+            // Multiplying by 1000 is critical — without it, dates land in 1970.
+            const timestamp = entry.timestamp
+                || (entry.created_at ? new Date(entry.created_at * 1000).toISOString() : new Date().toISOString());
+            await onTurn({
+                role,
+                content,
+                timestamp,
+                sessionId: 'openai-migration',
+                project: 'default',
+                todos: [],
+                files_changed: [],
+                // ── Keyword Indexing ────────────────────────────────────────
+                // Extract tool function names for Prism's keyword search index.
+                // `undefined` tools are filtered out by the optional chaining.
+                tools: entry.tool_calls?.map((tc) => tc.function?.name),
+            });
+        }
+    },
+};

package/dist/utils/migration/types.js ADDED Viewed

@@ -0,0 +1,18 @@
+/**
+ * ═══════════════════════════════════════════════════════════════════
+ * Migration Types — Strategy Pattern Interfaces
+ * ═══════════════════════════════════════════════════════════════════
+ *
+ * REVIEWER NOTE:
+ *   This file defines the core contracts for the Universal Migration
+ *   Utility. Each LLM format (Claude, Gemini, OpenAI) implements the
+ *   MigrationAdapter interface. All turns are normalized into the
+ *   NormalizedTurn schema before being mapped to Prism's LedgerEntry.
+ *
+ * DESIGN DECISION:
+ *   NormalizedTurn is intentionally NOT a subset of LedgerEntry.
+ *   The orchestrator (universalImporter.ts) performs the final mapping.
+ *   This keeps adapters decoupled from storage internals.
+ * ═══════════════════════════════════════════════════════════════════
+ */
+export {};

package/dist/utils/migration/utils.js ADDED Viewed

@@ -0,0 +1,99 @@
+/**
+ * ═══════════════════════════════════════════════════════════════════
+ * Migration Utilities — Shared Normalization Helpers
+ * ═══════════════════════════════════════════════════════════════════
+ *
+ * REVIEWER NOTE:
+ *   These utilities handle the messiest part of cross-format migration:
+ *   normalizing wildly different content representations into plain strings.
+ *
+ *   Claude uses: `content: [{ type: 'text', text: '...' }]` (array of blocks)
+ *   Gemini uses: `parts: [{ text: '...' }]` (array of parts)
+ *   OpenAI uses: `content: '...'` (plain string, usually)
+ *
+ *   The `normalizeContent` function handles all three shapes.
+ * ═══════════════════════════════════════════════════════════════════
+ */
+/**
+ * Normalizes content from various LLM formats into a plain string.
+ *
+ * Handles three shapes:
+ *   1. Plain string → returned as-is
+ *   2. Array of objects with `.text` → concatenated
+ *   3. Array of strings → joined
+ *   4. Anything else → empty string (safe fallback)
+ *
+ * REVIEWER NOTE:
+ *   Gemini's `functionCall` parts (which have `.functionCall` but no `.text`)
+ *   are intentionally dropped here. They are handled separately by the
+ *   Gemini adapter via tool-call extraction. Returning "" for unknown part
+ *   types is the correct behavior — it avoids injecting [object Object] strings.
+ */
+/**
+ * Content-sniffs the first ~4KB of a file to detect its LLM format.
+ *
+ * REVIEWER NOTE:
+ *   This is a best-effort heuristic that supplements filename-based detection.
+ *   It reads only the first 4KB to stay fast and memory-safe on large files.
+ *   Returns the adapter ID ('claude', 'gemini', 'openai') or null if ambiguous.
+ *
+ * Detection markers:
+ *   Claude  → JSONL format (newline-delimited), or `"message":{"id":` / `"type":"assistant"`
+ *   Gemini  → `"parts":` array or `"role":"model"`
+ *   OpenAI  → `"tool_calls":` or `"created_at":` (Unix epoch) or `"role":"system"`
+ */
+export function sniffFormat(filePath) {
+    const fs = require('node:fs');
+    const fd = fs.openSync(filePath, 'r');
+    const buf = Buffer.alloc(4096);
+    const bytesRead = fs.readSync(fd, buf, 0, 4096, 0);
+    fs.closeSync(fd);
+    if (bytesRead === 0)
+        return null;
+    const head = buf.toString('utf8', 0, bytesRead);
+    // ── JSONL detection (Claude) ────────────────────────────────────
+    // If the file starts with `{` and contains newlines followed by `{`,
+    // it's JSONL (not a JSON array). Claude Code is the only major LLM
+    // that uses JSONL for exports.
+    const trimmed = head.trimStart();
+    if (trimmed.startsWith('{') && !trimmed.startsWith('[')) {
+        return 'claude';
+    }
+    // ── JSON array content inspection ──────────────────────────────
+    // For JSON arrays, inspect the content for format-specific markers.
+    // Gemini markers: "parts" array or "role":"model"
+    if (head.includes('"parts"') || head.includes('"role":"model"') || head.includes('"role": "model"')) {
+        return 'gemini';
+    }
+    // OpenAI markers: "tool_calls", "created_at" (Unix epoch), or "role":"system"
+    if (head.includes('"tool_calls"') || head.includes('"created_at"') ||
+        head.includes('"role":"system"') || head.includes('"role": "system"')) {
+        return 'openai';
+    }
+    // Claude markers in JSON form (less common but possible)
+    if (head.includes('"message":{') || head.includes('"message": {') ||
+        head.includes('"type":"assistant"') || head.includes('"type": "assistant"')) {
+        return 'claude';
+    }
+    return null;
+}
+export function normalizeContent(content) {
+    if (typeof content === 'string')
+        return content;
+    if (Array.isArray(content)) {
+        return content
+            .map((part) => {
+            if (typeof part === 'string')
+                return part;
+            // Handle Claude's `{ type: 'text', text: '...' }` and Gemini's `{ text: '...' }`
+            if (part.text)
+                return part.text;
+            // Explicit type-check for safety (redundant with above, but clear for reviewers)
+            if (part.type === 'text')
+                return part.text;
+            return "";
+        })
+            .join("");
+    }
+    return "";
+}