prism-mcp-server 5.1.0 → 5.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +379 -1385
- package/dist/dashboard/server.js +151 -15
- package/dist/dashboard/ui.js +165 -1
- package/dist/storage/sqlite.js +30 -0
- package/dist/storage/supabaseMigrations.js +19 -1
- package/dist/tools/compactionHandler.js +17 -7
- package/dist/tools/sessionMemoryDefinitions.js +7 -0
- package/dist/tools/sessionMemoryHandlers.js +68 -4
- package/dist/utils/migration/claudeAdapter.js +131 -0
- package/dist/utils/migration/geminiAdapter.js +87 -0
- package/dist/utils/migration/openaiAdapter.js +88 -0
- package/dist/utils/migration/types.js +18 -0
- package/dist/utils/migration/utils.js +99 -0
- package/dist/utils/testUniversalImporter.js +10 -0
- package/dist/utils/universalImporter.js +295 -0
- package/package.json +8 -4
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ═══════════════════════════════════════════════════════════════════
|
|
3
|
+
* Claude Code JSONL Adapter
|
|
4
|
+
* ═══════════════════════════════════════════════════════════════════
|
|
5
|
+
*
|
|
6
|
+
* REVIEWER NOTE — Claude Code Streaming Deduplication:
|
|
7
|
+
* Claude Code does NOT write one clean JSON line per turn. It writes
|
|
8
|
+
* to the JSONL file DURING streaming. This means you see multiple
|
|
9
|
+
* JSON lines for the exact same `message.id` as the response streams in.
|
|
10
|
+
*
|
|
11
|
+
* If we processed every `type: assistant` line blindly, we'd ingest
|
|
12
|
+
* highly fragmented or duplicate entries. The solution is to aggregate
|
|
13
|
+
* by `message.id` and only flush the LATEST version of each assistant
|
|
14
|
+
* message when a user message arrives (or at end-of-file).
|
|
15
|
+
*
|
|
16
|
+
* STREAMING STRATEGY:
|
|
17
|
+
* Uses Node's readline interface for true line-by-line processing.
|
|
18
|
+
* Memory usage is O(pending_assistant_chunks), not O(file_size).
|
|
19
|
+
* For a typical session, pending chunks rarely exceed 2-3 entries.
|
|
20
|
+
*
|
|
21
|
+
* SOURCE FORMAT (simplified):
|
|
22
|
+
* { type: "assistant", message: { id: "msg_xxx", content: [...] }, timestamp: "..." }
|
|
23
|
+
* { type: "user", content: "...", timestamp: "..." }
|
|
24
|
+
* ═══════════════════════════════════════════════════════════════════
|
|
25
|
+
*/
|
|
26
|
+
import fs from 'node:fs';
|
|
27
|
+
import readline from 'node:readline';
|
|
28
|
+
import { normalizeContent } from './utils.js';
|
|
29
|
+
export const claudeAdapter = {
|
|
30
|
+
id: 'claude',
|
|
31
|
+
/**
|
|
32
|
+
* Claude Code uses .jsonl (JSON Lines) format exclusively.
|
|
33
|
+
* This is a reliable heuristic — no other major LLM uses .jsonl for exports.
|
|
34
|
+
*/
|
|
35
|
+
canHandle(filePath) {
|
|
36
|
+
return filePath.endsWith('.jsonl');
|
|
37
|
+
},
|
|
38
|
+
async parse(filePath, onTurn) {
|
|
39
|
+
const fileStream = fs.createReadStream(filePath);
|
|
40
|
+
const rl = readline.createInterface({
|
|
41
|
+
input: fileStream,
|
|
42
|
+
crlfDelay: Infinity, // Handle both \n and \r\n line endings
|
|
43
|
+
});
|
|
44
|
+
// ── Deduplication Buffer ──────────────────────────────────────
|
|
45
|
+
// Accumulates assistant chunks by message.id. When a user message
|
|
46
|
+
// arrives, we flush all pending assistant messages (keeping only
|
|
47
|
+
// the latest content for each ID) and then emit the user message.
|
|
48
|
+
const pendingAssistantChunks = new Map();
|
|
49
|
+
for await (const line of rl) {
|
|
50
|
+
if (!line.trim())
|
|
51
|
+
continue; // Skip blank lines
|
|
52
|
+
try {
|
|
53
|
+
const entry = JSON.parse(line);
|
|
54
|
+
// ── Role Detection ─────────────────────────────────────────
|
|
55
|
+
// Claude Code logs have two role indicators:
|
|
56
|
+
// 1. `entry.type` (top-level) — "assistant" or "user"
|
|
57
|
+
// 2. `entry.message.role` (nested) — "assistant" or "user"
|
|
58
|
+
// We check both for robustness.
|
|
59
|
+
const role = entry.type === 'assistant' || entry.message?.role === 'assistant' ? 'assistant' : 'user';
|
|
60
|
+
// ── Content Extraction ─────────────────────────────────────
|
|
61
|
+
// Content can be at `entry.content` or nested at `entry.message.content`.
|
|
62
|
+
// Both may be strings or arrays of content blocks.
|
|
63
|
+
const content = normalizeContent(entry.content || entry.message?.content || "");
|
|
64
|
+
const timestamp = entry.timestamp || new Date().toISOString();
|
|
65
|
+
// ── Message ID for Deduplication ───────────────────────────
|
|
66
|
+
// Claude logs may use `entry.id`, `entry.message.id`, or `entry.requestId`.
|
|
67
|
+
// Any of these can serve as the deduplication key.
|
|
68
|
+
const messageId = entry.id || entry.message?.id || entry.requestId;
|
|
69
|
+
if (role === 'assistant' && messageId) {
|
|
70
|
+
// ── Streaming Chunk Aggregation ────────────────────────────
|
|
71
|
+
// For assistant messages with an ID, we DON'T emit immediately.
|
|
72
|
+
// Instead, we overwrite the buffer entry — the last chunk for a
|
|
73
|
+
// given ID contains the complete content (Claude rewrites the
|
|
74
|
+
// full message in the final streaming chunk).
|
|
75
|
+
pendingAssistantChunks.set(messageId, { content, tools: [], timestamp });
|
|
76
|
+
continue;
|
|
77
|
+
}
|
|
78
|
+
// ── User Message: Flush Pending Assistants ──────────────────
|
|
79
|
+
// A user message signals the end of the previous assistant turn.
|
|
80
|
+
// Flush all pending assistant chunks before emitting the user turn.
|
|
81
|
+
if (role === 'user') {
|
|
82
|
+
for (const [id, msg] of pendingAssistantChunks) {
|
|
83
|
+
await onTurn({
|
|
84
|
+
role: 'assistant',
|
|
85
|
+
content: msg.content,
|
|
86
|
+
timestamp: msg.timestamp,
|
|
87
|
+
sessionId: 'claude-migration',
|
|
88
|
+
project: 'default',
|
|
89
|
+
todos: [],
|
|
90
|
+
files_changed: [],
|
|
91
|
+
messageId: id,
|
|
92
|
+
tools: msg.tools,
|
|
93
|
+
});
|
|
94
|
+
}
|
|
95
|
+
pendingAssistantChunks.clear();
|
|
96
|
+
await onTurn({
|
|
97
|
+
role: 'user',
|
|
98
|
+
content,
|
|
99
|
+
timestamp,
|
|
100
|
+
sessionId: 'claude-migration',
|
|
101
|
+
project: 'default',
|
|
102
|
+
todos: [],
|
|
103
|
+
files_changed: [],
|
|
104
|
+
messageId,
|
|
105
|
+
});
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
catch (e) {
|
|
109
|
+
// ── Malformed Line Handling ──────────────────────────────────
|
|
110
|
+
// Skip lines that fail JSON parsing. This is expected for
|
|
111
|
+
// corrupted exports or partial writes during Claude crashes.
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
// ── Final Flush ──────────────────────────────────────────────────
|
|
115
|
+
// If the file ends with assistant messages (no trailing user message),
|
|
116
|
+
// we must flush any remaining pending chunks.
|
|
117
|
+
for (const [id, msg] of pendingAssistantChunks) {
|
|
118
|
+
await onTurn({
|
|
119
|
+
role: 'assistant',
|
|
120
|
+
content: msg.content,
|
|
121
|
+
timestamp: msg.timestamp,
|
|
122
|
+
sessionId: 'claude-migration',
|
|
123
|
+
project: 'default',
|
|
124
|
+
todos: [],
|
|
125
|
+
files_changed: [],
|
|
126
|
+
messageId: id,
|
|
127
|
+
tools: msg.tools,
|
|
128
|
+
});
|
|
129
|
+
}
|
|
130
|
+
},
|
|
131
|
+
};
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ═══════════════════════════════════════════════════════════════════
|
|
3
|
+
* Gemini History JSON Adapter
|
|
4
|
+
* ═══════════════════════════════════════════════════════════════════
|
|
5
|
+
*
|
|
6
|
+
* REVIEWER NOTE — Streaming Large JSON Arrays:
|
|
7
|
+
* Gemini exports history as a single JSON array (not JSONL).
|
|
8
|
+
* A naive `JSON.parse(fs.readFileSync(...))` would load the entire
|
|
9
|
+
* file into memory — OOM for 100MB+ exports.
|
|
10
|
+
*
|
|
11
|
+
* We use `stream-json/StreamArray` to parse array elements one at a
|
|
12
|
+
* time in streaming fashion. Memory usage is O(1) per entry.
|
|
13
|
+
*
|
|
14
|
+
* ROLE MAPPING:
|
|
15
|
+
* Gemini uses "model" for assistant responses (not "assistant").
|
|
16
|
+
* We normalize this to "assistant" for Prism's unified schema.
|
|
17
|
+
*
|
|
18
|
+
* TIMESTAMP FALLBACK:
|
|
19
|
+
* Gemini SDK history arrays often lack per-turn timestamps.
|
|
20
|
+
* We fall back to `createTime` (if present) or current time.
|
|
21
|
+
* The orchestrator may override timestamps via ensureChronology.
|
|
22
|
+
*
|
|
23
|
+
* SOURCE FORMAT (simplified):
|
|
24
|
+
* [
|
|
25
|
+
* { role: "user", parts: [{ text: "..." }] },
|
|
26
|
+
* { role: "model", parts: [{ text: "..." }] }
|
|
27
|
+
* ]
|
|
28
|
+
* ═══════════════════════════════════════════════════════════════════
|
|
29
|
+
*/
|
|
30
|
+
import fs from 'node:fs';
|
|
31
|
+
import { chain } from 'stream-chain';
|
|
32
|
+
import StreamArray from 'stream-json/streamers/stream-array.js';
|
|
33
|
+
import { normalizeContent } from './utils.js';
|
|
34
|
+
export const geminiAdapter = {
|
|
35
|
+
id: 'gemini',
|
|
36
|
+
/**
|
|
37
|
+
* Auto-detection heuristic for Gemini files.
|
|
38
|
+
*
|
|
39
|
+
* REVIEWER NOTE — canHandle Overlap (Finding 1):
|
|
40
|
+
* Both Gemini and OpenAI use .json. To disambiguate without content sniffing,
|
|
41
|
+
* we use a filename convention: if the path contains "openai" or "chatgpt",
|
|
42
|
+
* we defer to the OpenAI adapter. Otherwise, .json files default to Gemini.
|
|
43
|
+
*
|
|
44
|
+
* For production use, users should ALWAYS use --format= to avoid ambiguity.
|
|
45
|
+
* This heuristic is a convenience fallback only.
|
|
46
|
+
*/
|
|
47
|
+
canHandle(filePath) {
|
|
48
|
+
const lower = filePath.toLowerCase();
|
|
49
|
+
return lower.endsWith('.json') && !lower.includes('openai') && !lower.includes('chatgpt');
|
|
50
|
+
},
|
|
51
|
+
async parse(filePath, onTurn) {
|
|
52
|
+
// ── Streaming Pipeline ────────────────────────────────────────
|
|
53
|
+
// `StreamArray.withParser()` combines the JSON parser + array streamer
|
|
54
|
+
// into a single transform. Each emitted object has { key, value }
|
|
55
|
+
// where `key` is the array index and `value` is the parsed element.
|
|
56
|
+
const pipeline = chain([
|
|
57
|
+
fs.createReadStream(filePath),
|
|
58
|
+
StreamArray.withParser(),
|
|
59
|
+
]);
|
|
60
|
+
for await (const { value: entry } of pipeline) {
|
|
61
|
+
// ── Role Normalization ──────────────────────────────────────
|
|
62
|
+
// Gemini uses 'model' for AI responses; some exports may use 'assistant'.
|
|
63
|
+
// Both are mapped to 'assistant' in the normalized schema.
|
|
64
|
+
const role = entry.role === 'model' || entry.role === 'assistant' ? 'assistant' : 'user';
|
|
65
|
+
// ── Content Extraction ──────────────────────────────────────
|
|
66
|
+
// Gemini stores content in `parts` (array of { text: '...' } objects).
|
|
67
|
+
// Falls back to `entry.content` for non-standard exports.
|
|
68
|
+
const content = normalizeContent(entry.parts || entry.content || "");
|
|
69
|
+
// ── Timestamp Fallback Chain ────────────────────────────────
|
|
70
|
+
// Priority: entry.timestamp > entry.createTime > now()
|
|
71
|
+
// REVIEWER NOTE: Using Date.now() as final fallback means all turns
|
|
72
|
+
// without timestamps get the SAME timestamp — the orchestrator's
|
|
73
|
+
// session_date splitting may group them incorrectly. This is a known
|
|
74
|
+
// acceptable tradeoff for the initial implementation.
|
|
75
|
+
const timestamp = entry.timestamp || entry.createTime || new Date().toISOString();
|
|
76
|
+
await onTurn({
|
|
77
|
+
role,
|
|
78
|
+
content,
|
|
79
|
+
timestamp,
|
|
80
|
+
sessionId: 'gemini-migration',
|
|
81
|
+
project: 'default',
|
|
82
|
+
todos: [],
|
|
83
|
+
files_changed: [],
|
|
84
|
+
});
|
|
85
|
+
}
|
|
86
|
+
},
|
|
87
|
+
};
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ═══════════════════════════════════════════════════════════════════
|
|
3
|
+
* OpenAI / ChatGPT History JSON Adapter
|
|
4
|
+
* ═══════════════════════════════════════════════════════════════════
|
|
5
|
+
*
|
|
6
|
+
* REVIEWER NOTE — Tool Call Normalization:
|
|
7
|
+
* OpenAI's chat completion format includes structured `tool_calls`
|
|
8
|
+
* arrays on assistant messages. These contain function names, arguments,
|
|
9
|
+
* and call IDs. Since Prism's ledger stores content as plain text,
|
|
10
|
+
* we inline tool calls as readable markers: `[Tool Use: function_name]`.
|
|
11
|
+
*
|
|
12
|
+
* The original tool names are also preserved in `NormalizedTurn.tools[]`
|
|
13
|
+
* for keyword indexing in the Mind Palace.
|
|
14
|
+
*
|
|
15
|
+
* TIMESTAMP HANDLING:
|
|
16
|
+
* OpenAI uses Unix epoch seconds in `created_at` (not milliseconds).
|
|
17
|
+
* We convert: `new Date(created_at * 1000).toISOString()`.
|
|
18
|
+
* Standard ISO timestamps in `entry.timestamp` take priority.
|
|
19
|
+
*
|
|
20
|
+
* SOURCE FORMAT (simplified):
|
|
21
|
+
* [
|
|
22
|
+
* { role: "user", content: "..." },
|
|
23
|
+
* { role: "assistant", content: "...", tool_calls: [{ function: { name: "..." } }] }
|
|
24
|
+
* ]
|
|
25
|
+
* ═══════════════════════════════════════════════════════════════════
|
|
26
|
+
*/
|
|
27
|
+
import fs from 'node:fs';
|
|
28
|
+
import { chain } from 'stream-chain';
|
|
29
|
+
import StreamArray from 'stream-json/streamers/stream-array.js';
|
|
30
|
+
import { normalizeContent } from './utils.js';
|
|
31
|
+
export const openaiAdapter = {
|
|
32
|
+
id: 'openai',
|
|
33
|
+
/**
|
|
34
|
+
* Auto-detection heuristic for OpenAI/ChatGPT files.
|
|
35
|
+
*
|
|
36
|
+
* REVIEWER NOTE — canHandle Strategy:
|
|
37
|
+
* Matches files with "openai" or "chatgpt" anywhere in the path.
|
|
38
|
+
* This is intentionally broad — ChatGPT export filenames vary widely.
|
|
39
|
+
* For ambiguous files (e.g., `history.json`), users MUST use --format=openai.
|
|
40
|
+
*/
|
|
41
|
+
canHandle(filePath) {
|
|
42
|
+
const lower = filePath.toLowerCase();
|
|
43
|
+
return lower.includes('openai') || lower.includes('chatgpt');
|
|
44
|
+
},
|
|
45
|
+
async parse(filePath, onTurn) {
|
|
46
|
+
// ── Streaming Pipeline ────────────────────────────────────────
|
|
47
|
+
// Same OOM-safe pattern as geminiAdapter. See that file for details.
|
|
48
|
+
const pipeline = chain([
|
|
49
|
+
fs.createReadStream(filePath),
|
|
50
|
+
StreamArray.withParser(),
|
|
51
|
+
]);
|
|
52
|
+
for await (const { value: entry } of pipeline) {
|
|
53
|
+
// ── Role Normalization ──────────────────────────────────────
|
|
54
|
+
// OpenAI also has 'system' and 'tool' roles — we skip those.
|
|
55
|
+
// Only 'user' and 'assistant' turns are meaningful for migration.
|
|
56
|
+
const role = entry.role === 'assistant' ? 'assistant' : 'user';
|
|
57
|
+
let content = normalizeContent(entry.content || "");
|
|
58
|
+
// ── Tool Call Inlining ──────────────────────────────────────
|
|
59
|
+
// Convert structured tool_calls into human-readable content markers.
|
|
60
|
+
// This preserves the semantic intent while keeping storage as plain text.
|
|
61
|
+
if (entry.tool_calls) {
|
|
62
|
+
const tools = entry.tool_calls
|
|
63
|
+
.map((tc) => `[Tool Use: ${tc.function?.name || tc.id}]`)
|
|
64
|
+
.join("\n");
|
|
65
|
+
content = `${content}\n${tools}`.trim();
|
|
66
|
+
}
|
|
67
|
+
// ── Timestamp Fallback Chain ────────────────────────────────
|
|
68
|
+
// Priority: entry.timestamp (ISO) > entry.created_at (Unix epoch) > now()
|
|
69
|
+
// REVIEWER NOTE: OpenAI's `created_at` is in SECONDS, not milliseconds.
|
|
70
|
+
// Multiplying by 1000 is critical — without it, dates land in 1970.
|
|
71
|
+
const timestamp = entry.timestamp
|
|
72
|
+
|| (entry.created_at ? new Date(entry.created_at * 1000).toISOString() : new Date().toISOString());
|
|
73
|
+
await onTurn({
|
|
74
|
+
role,
|
|
75
|
+
content,
|
|
76
|
+
timestamp,
|
|
77
|
+
sessionId: 'openai-migration',
|
|
78
|
+
project: 'default',
|
|
79
|
+
todos: [],
|
|
80
|
+
files_changed: [],
|
|
81
|
+
// ── Keyword Indexing ────────────────────────────────────────
|
|
82
|
+
// Extract tool function names for Prism's keyword search index.
|
|
83
|
+
// `undefined` tools are filtered out by the optional chaining.
|
|
84
|
+
tools: entry.tool_calls?.map((tc) => tc.function?.name),
|
|
85
|
+
});
|
|
86
|
+
}
|
|
87
|
+
},
|
|
88
|
+
};
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ═══════════════════════════════════════════════════════════════════
|
|
3
|
+
* Migration Types — Strategy Pattern Interfaces
|
|
4
|
+
* ═══════════════════════════════════════════════════════════════════
|
|
5
|
+
*
|
|
6
|
+
* REVIEWER NOTE:
|
|
7
|
+
* This file defines the core contracts for the Universal Migration
|
|
8
|
+
* Utility. Each LLM format (Claude, Gemini, OpenAI) implements the
|
|
9
|
+
* MigrationAdapter interface. All turns are normalized into the
|
|
10
|
+
* NormalizedTurn schema before being mapped to Prism's LedgerEntry.
|
|
11
|
+
*
|
|
12
|
+
* DESIGN DECISION:
|
|
13
|
+
* NormalizedTurn is intentionally NOT a subset of LedgerEntry.
|
|
14
|
+
* The orchestrator (universalImporter.ts) performs the final mapping.
|
|
15
|
+
* This keeps adapters decoupled from storage internals.
|
|
16
|
+
* ═══════════════════════════════════════════════════════════════════
|
|
17
|
+
*/
|
|
18
|
+
export {};
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ═══════════════════════════════════════════════════════════════════
|
|
3
|
+
* Migration Utilities — Shared Normalization Helpers
|
|
4
|
+
* ═══════════════════════════════════════════════════════════════════
|
|
5
|
+
*
|
|
6
|
+
* REVIEWER NOTE:
|
|
7
|
+
* These utilities handle the messiest part of cross-format migration:
|
|
8
|
+
* normalizing wildly different content representations into plain strings.
|
|
9
|
+
*
|
|
10
|
+
* Claude uses: `content: [{ type: 'text', text: '...' }]` (array of blocks)
|
|
11
|
+
* Gemini uses: `parts: [{ text: '...' }]` (array of parts)
|
|
12
|
+
* OpenAI uses: `content: '...'` (plain string, usually)
|
|
13
|
+
*
|
|
14
|
+
* The `normalizeContent` function handles all three shapes.
|
|
15
|
+
* ═══════════════════════════════════════════════════════════════════
|
|
16
|
+
*/
|
|
17
|
+
/**
|
|
18
|
+
* Normalizes content from various LLM formats into a plain string.
|
|
19
|
+
*
|
|
20
|
+
* Handles three shapes:
|
|
21
|
+
* 1. Plain string → returned as-is
|
|
22
|
+
* 2. Array of objects with `.text` → concatenated
|
|
23
|
+
* 3. Array of strings → joined
|
|
24
|
+
* 4. Anything else → empty string (safe fallback)
|
|
25
|
+
*
|
|
26
|
+
* REVIEWER NOTE:
|
|
27
|
+
* Gemini's `functionCall` parts (which have `.functionCall` but no `.text`)
|
|
28
|
+
* are intentionally dropped here. They are handled separately by the
|
|
29
|
+
* Gemini adapter via tool-call extraction. Returning "" for unknown part
|
|
30
|
+
* types is the correct behavior — it avoids injecting [object Object] strings.
|
|
31
|
+
*/
|
|
32
|
+
/**
|
|
33
|
+
* Content-sniffs the first ~4KB of a file to detect its LLM format.
|
|
34
|
+
*
|
|
35
|
+
* REVIEWER NOTE:
|
|
36
|
+
* This is a best-effort heuristic that supplements filename-based detection.
|
|
37
|
+
* It reads only the first 4KB to stay fast and memory-safe on large files.
|
|
38
|
+
* Returns the adapter ID ('claude', 'gemini', 'openai') or null if ambiguous.
|
|
39
|
+
*
|
|
40
|
+
* Detection markers:
|
|
41
|
+
* Claude → JSONL format (newline-delimited), or `"message":{"id":` / `"type":"assistant"`
|
|
42
|
+
* Gemini → `"parts":` array or `"role":"model"`
|
|
43
|
+
* OpenAI → `"tool_calls":` or `"created_at":` (Unix epoch) or `"role":"system"`
|
|
44
|
+
*/
|
|
45
|
+
export function sniffFormat(filePath) {
|
|
46
|
+
const fs = require('node:fs');
|
|
47
|
+
const fd = fs.openSync(filePath, 'r');
|
|
48
|
+
const buf = Buffer.alloc(4096);
|
|
49
|
+
const bytesRead = fs.readSync(fd, buf, 0, 4096, 0);
|
|
50
|
+
fs.closeSync(fd);
|
|
51
|
+
if (bytesRead === 0)
|
|
52
|
+
return null;
|
|
53
|
+
const head = buf.toString('utf8', 0, bytesRead);
|
|
54
|
+
// ── JSONL detection (Claude) ────────────────────────────────────
|
|
55
|
+
// If the file starts with `{` and contains newlines followed by `{`,
|
|
56
|
+
// it's JSONL (not a JSON array). Claude Code is the only major LLM
|
|
57
|
+
// that uses JSONL for exports.
|
|
58
|
+
const trimmed = head.trimStart();
|
|
59
|
+
if (trimmed.startsWith('{') && !trimmed.startsWith('[')) {
|
|
60
|
+
return 'claude';
|
|
61
|
+
}
|
|
62
|
+
// ── JSON array content inspection ──────────────────────────────
|
|
63
|
+
// For JSON arrays, inspect the content for format-specific markers.
|
|
64
|
+
// Gemini markers: "parts" array or "role":"model"
|
|
65
|
+
if (head.includes('"parts"') || head.includes('"role":"model"') || head.includes('"role": "model"')) {
|
|
66
|
+
return 'gemini';
|
|
67
|
+
}
|
|
68
|
+
// OpenAI markers: "tool_calls", "created_at" (Unix epoch), or "role":"system"
|
|
69
|
+
if (head.includes('"tool_calls"') || head.includes('"created_at"') ||
|
|
70
|
+
head.includes('"role":"system"') || head.includes('"role": "system"')) {
|
|
71
|
+
return 'openai';
|
|
72
|
+
}
|
|
73
|
+
// Claude markers in JSON form (less common but possible)
|
|
74
|
+
if (head.includes('"message":{') || head.includes('"message": {') ||
|
|
75
|
+
head.includes('"type":"assistant"') || head.includes('"type": "assistant"')) {
|
|
76
|
+
return 'claude';
|
|
77
|
+
}
|
|
78
|
+
return null;
|
|
79
|
+
}
|
|
80
|
+
export function normalizeContent(content) {
|
|
81
|
+
if (typeof content === 'string')
|
|
82
|
+
return content;
|
|
83
|
+
if (Array.isArray(content)) {
|
|
84
|
+
return content
|
|
85
|
+
.map((part) => {
|
|
86
|
+
if (typeof part === 'string')
|
|
87
|
+
return part;
|
|
88
|
+
// Handle Claude's `{ type: 'text', text: '...' }` and Gemini's `{ text: '...' }`
|
|
89
|
+
if (part.text)
|
|
90
|
+
return part.text;
|
|
91
|
+
// Explicit type-check for safety (redundant with above, but clear for reviewers)
|
|
92
|
+
if (part.type === 'text')
|
|
93
|
+
return part.text;
|
|
94
|
+
return "";
|
|
95
|
+
})
|
|
96
|
+
.join("");
|
|
97
|
+
}
|
|
98
|
+
return "";
|
|
99
|
+
}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import { importHistory } from './universalImporter.js';
|
|
2
|
+
async function test() {
|
|
3
|
+
const claudePath = '/tmp/sample_claude_history.jsonl';
|
|
4
|
+
const geminiPath = '/tmp/sample_gemini_history.json';
|
|
5
|
+
console.log('--- TEST 1: Claude JSONL (Dry Run) ---');
|
|
6
|
+
await importHistory(claudePath, { format: 'claude-jsonl', dryRun: true, verbose: true });
|
|
7
|
+
console.log('\n--- TEST 2: Gemini JSON (Dry Run) ---');
|
|
8
|
+
await importHistory(geminiPath, { format: 'gemini-json', dryRun: true, verbose: true, projectId: 'gemini-test' });
|
|
9
|
+
}
|
|
10
|
+
test().catch(console.error);
|