@gswangg/duncan-cc 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/parser.ts ADDED
@@ -0,0 +1,257 @@
1
+ /**
2
+ * CC Session JSONL Parser
3
+ *
4
+ * Parses Claude Code session files into structured entries.
5
+ * Separates transcript messages from metadata entries.
6
+ *
7
+ * Equivalent to CC's mu() + G26() separation logic.
8
+ */
9
+
10
+ // ============================================================================
11
+ // Types
12
+ // ============================================================================
13
+
14
+ export interface CCMessage {
15
+ uuid: string;
16
+ parentUuid: string | null;
17
+ session_id?: string;
18
+ type: "user" | "assistant" | "system" | "progress" | "attachment";
19
+ timestamp: string;
20
+ isSidechain?: boolean;
21
+ isMeta?: boolean;
22
+ isVisibleInTranscriptOnly?: boolean;
23
+ isCompactSummary?: boolean;
24
+ isApiErrorMessage?: boolean;
25
+ apiError?: string;
26
+ requestId?: string;
27
+ parent_tool_use_id?: string | null;
28
+ parentToolUseID?: string | null;
29
+ toolUseID?: string | null;
30
+ toolUseResult?: string;
31
+ sourceToolAssistantUUID?: string;
32
+ imagePasteIds?: string[];
33
+ permissionMode?: string;
34
+ origin?: { kind: string };
35
+ gitBranch?: string;
36
+ teamName?: string;
37
+ cwd?: string;
38
+ subtype?: string;
39
+ content?: string;
40
+ compactMetadata?: {
41
+ preservedSegment?: {
42
+ headUuid: string;
43
+ tailUuid: string;
44
+ anchorUuid: string;
45
+ };
46
+ };
47
+ attachment?: any;
48
+ message: {
49
+ role: string;
50
+ content: string | any[];
51
+ model?: string;
52
+ usage?: any;
53
+ id?: string;
54
+ type?: string;
55
+ stop_reason?: string | null;
56
+ stop_sequence?: string | null;
57
+ };
58
+ // Additional fields we pass through
59
+ [key: string]: any;
60
+ }
61
+
62
+ export interface SummaryEntry {
63
+ type: "summary";
64
+ leafUuid: string;
65
+ summary: string;
66
+ }
67
+
68
+ export interface ContentReplacementEntry {
69
+ type: "content-replacement";
70
+ sessionId?: string;
71
+ agentId?: string;
72
+ replacements: Array<{
73
+ kind: string;
74
+ toolUseId: string;
75
+ replacement: string;
76
+ }>;
77
+ }
78
+
79
+ export interface MetadataEntry {
80
+ type: string;
81
+ sessionId?: string;
82
+ [key: string]: any;
83
+ }
84
+
85
+ export interface ParsedSession {
86
+ messages: Map<string, CCMessage>;
87
+ summaries: Map<string, string>; // leafUuid → summary text
88
+ customTitles: Map<string, string>; // sessionId → title
89
+ tags: Map<string, string>; // sessionId → tag
90
+ agentNames: Map<string, string>; // sessionId → name
91
+ agentColors: Map<string, string>; // sessionId → color
92
+ agentSettings: Map<string, string>; // sessionId → setting
93
+ modes: Map<string, string>; // sessionId → mode
94
+ contentReplacements: Map<string, ContentReplacementEntry["replacements"]>; // sessionId → replacements
95
+ contextCollapseCommits: any[];
96
+ contextCollapseSnapshot: any | null;
97
+ }
98
+
99
+ // ============================================================================
100
+ // Entry type checks — mirrors CC's mi(), of(), Ns6()
101
+ // ============================================================================
102
+
103
+ /** Transcript message check — CC's mi() */
104
+ export function isTranscriptMessage(entry: any): entry is CCMessage {
105
+ return (
106
+ entry.type === "user" ||
107
+ entry.type === "assistant" ||
108
+ entry.type === "attachment" ||
109
+ entry.type === "system" ||
110
+ entry.type === "progress"
111
+ );
112
+ }
113
+
114
+ /** Compact boundary check — CC's of() */
115
+ export function isCompactBoundary(entry: any): boolean {
116
+ return entry?.type === "system" && entry.subtype === "compact_boundary";
117
+ }
118
+
119
+ /** Ephemeral progress types — CC's Ns6() */
120
+ const EPHEMERAL_PROGRESS_TYPES = new Set([
121
+ "bash_progress",
122
+ "powershell_progress",
123
+ "mcp_progress",
124
+ ]);
125
+
126
+ export function isEphemeralProgress(type: string): boolean {
127
+ return typeof type === "string" && EPHEMERAL_PROGRESS_TYPES.has(type);
128
+ }
129
+
130
+ /** API error message check — CC's Lt1() */
131
+ const INTERNAL_ERROR_MODEL = "internal_error";
132
+
133
+ export function isApiErrorMessage(entry: any): boolean {
134
+ return (
135
+ entry.type === "assistant" &&
136
+ entry.isApiErrorMessage === true &&
137
+ entry.message?.model === INTERNAL_ERROR_MODEL
138
+ );
139
+ }
140
+
141
+ /** Local command system message check — CC's gp1() */
142
+ export function isLocalCommand(entry: any): boolean {
143
+ return entry.type === "system" && entry.subtype === "local_command";
144
+ }
145
+
146
+ // ============================================================================
147
+ // JSONL Parser — mirrors CC's mu()
148
+ // ============================================================================
149
+
150
+ export function parseJsonl(content: string | Buffer): any[] {
151
+ const text = typeof content === "string" ? content : content.toString("utf-8");
152
+ const results: any[] = [];
153
+ let pos = 0;
154
+ const len = text.length;
155
+
156
+ while (pos < len) {
157
+ let end = text.indexOf("\n", pos);
158
+ if (end === -1) end = len;
159
+ const line = text.substring(pos, end).trim();
160
+ pos = end + 1;
161
+ if (!line) continue;
162
+ try {
163
+ results.push(JSON.parse(line));
164
+ } catch {
165
+ // skip unparseable lines
166
+ }
167
+ }
168
+ return results;
169
+ }
170
+
171
+ // ============================================================================
172
+ // Session Parser — mirrors CC's G26() separation logic
173
+ // ============================================================================
174
+
175
+ export function parseSession(content: string | Buffer): ParsedSession {
176
+ const entries = parseJsonl(content);
177
+
178
+ const messages = new Map<string, CCMessage>();
179
+ const summaries = new Map<string, string>();
180
+ const customTitles = new Map<string, string>();
181
+ const tags = new Map<string, string>();
182
+ const agentNames = new Map<string, string>();
183
+ const agentColors = new Map<string, string>();
184
+ const agentSettings = new Map<string, string>();
185
+ const modes = new Map<string, string>();
186
+ const contentReplacements = new Map<string, ContentReplacementEntry["replacements"]>();
187
+ const contextCollapseCommits: any[] = [];
188
+ let contextCollapseSnapshot: any | null = null;
189
+
190
+ for (const entry of entries) {
191
+ if (isTranscriptMessage(entry)) {
192
+ // Skip ephemeral progress messages
193
+ if (
194
+ entry.type === "progress" &&
195
+ entry.data &&
196
+ typeof entry.data === "object" &&
197
+ "type" in entry.data &&
198
+ isEphemeralProgress(entry.data.type as string)
199
+ ) {
200
+ continue;
201
+ }
202
+
203
+ // Strip normalizedMessages from progress data (save memory)
204
+ if (
205
+ entry.type === "progress" &&
206
+ entry.data &&
207
+ typeof entry.data === "object" &&
208
+ "normalizedMessages" in entry.data &&
209
+ Array.isArray(entry.data.normalizedMessages) &&
210
+ entry.data.normalizedMessages.length > 0
211
+ ) {
212
+ entry.data.normalizedMessages = [];
213
+ }
214
+
215
+ messages.set(entry.uuid, entry as CCMessage);
216
+ } else if (entry.type === "summary" && entry.leafUuid) {
217
+ summaries.set(entry.leafUuid, entry.summary);
218
+ } else if (entry.type === "custom-title" && entry.sessionId) {
219
+ customTitles.set(entry.sessionId, entry.customTitle);
220
+ } else if (entry.type === "tag" && entry.sessionId) {
221
+ tags.set(entry.sessionId, entry.tag);
222
+ } else if (entry.type === "agent-name" && entry.sessionId) {
223
+ agentNames.set(entry.sessionId, entry.agentName);
224
+ } else if (entry.type === "agent-color" && entry.sessionId) {
225
+ agentColors.set(entry.sessionId, entry.agentColor);
226
+ } else if (entry.type === "agent-setting" && entry.sessionId) {
227
+ agentSettings.set(entry.sessionId, entry.agentSetting);
228
+ } else if (entry.type === "mode" && entry.sessionId) {
229
+ modes.set(entry.sessionId, entry.mode);
230
+ } else if (entry.type === "content-replacement") {
231
+ const key = entry.agentId || entry.sessionId;
232
+ if (key) {
233
+ const existing = contentReplacements.get(key) ?? [];
234
+ existing.push(...entry.replacements);
235
+ contentReplacements.set(key, existing);
236
+ }
237
+ } else if (entry.type === "marble-origami-commit") {
238
+ contextCollapseCommits.push(entry);
239
+ } else if (entry.type === "marble-origami-snapshot") {
240
+ contextCollapseSnapshot = entry;
241
+ }
242
+ }
243
+
244
+ return {
245
+ messages,
246
+ summaries,
247
+ customTitles,
248
+ tags,
249
+ agentNames,
250
+ agentColors,
251
+ agentSettings,
252
+ modes,
253
+ contentReplacements,
254
+ contextCollapseCommits,
255
+ contextCollapseSnapshot,
256
+ };
257
+ }
@@ -0,0 +1,274 @@
1
+ /**
2
+ * Full Pipeline Integration
3
+ *
4
+ * Wires all layers together:
5
+ * parse → relink → walk → slice → strip → normalize →
6
+ * content-replace → microcompact → userContext → API format
7
+ *
8
+ * Produces the final messages array ready for an API call.
9
+ */
10
+
11
+ import { readFileSync } from "node:fs";
12
+ import { parseSession, type ParsedSession } from "./parser.js";
13
+ import { buildRawChain, sliceFromBoundary, stripInternalFields, getCompactionWindows, type CompactionWindow } from "./tree.js";
14
+ import { normalizeMessages } from "./normalize.js";
15
+ import { applyContentReplacements, microcompact } from "./content-replacements.js";
16
+ import { injectUserContext, buildSystemPromptString, extractToolNames, type SystemPromptOptions } from "./system-prompt.js";
17
+ import type { CCMessage } from "./parser.js";
18
+
19
+ // ============================================================================
20
+ // API Format Conversion
21
+ // ============================================================================
22
+
23
+ interface ApiMessage {
24
+ role: "user" | "assistant";
25
+ content: string | any[];
26
+ }
27
+
28
+ /**
29
+ * Convert a CC message to API format — strip everything except role + content.
30
+ * Converts internal messages to API format ({role, content} only).
31
+ */
32
+ function toApiMessage(msg: CCMessage): ApiMessage {
33
+ return {
34
+ role: msg.type === "assistant" ? "assistant" : "user",
35
+ content: Array.isArray(msg.message.content)
36
+ ? [...msg.message.content]
37
+ : msg.message.content,
38
+ };
39
+ }
40
+
41
+ /**
42
+ * Convert an array of CC messages to API format.
43
+ */
44
+ export function toApiMessages(messages: CCMessage[]): ApiMessage[] {
45
+ return messages.map(toApiMessage);
46
+ }
47
+
48
+ // ============================================================================
49
+ // Pipeline Options
50
+ // ============================================================================
51
+
52
+ export interface PipelineOptions {
53
+ /** Working directory the session was run from */
54
+ cwd?: string;
55
+ /** Apply content replacements (default: true) */
56
+ applyReplacements?: boolean;
57
+ /** Apply microcompact (default: true) */
58
+ applyMicrocompact?: boolean;
59
+ /** Microcompact gap threshold in minutes (default: 30) */
60
+ microcompactGapMinutes?: number;
61
+ /** Microcompact: number of recent turns to keep (default: 2) */
62
+ microcompactKeepTurns?: number;
63
+ /** Inject userContext (CLAUDE.md + date) (default: true) */
64
+ injectContext?: boolean;
65
+ /** Skip system prompt building (default: false) */
66
+ skipSystemPrompt?: boolean;
67
+ /** CC project directory (~/.claude/projects/<hash>/) for memory loading */
68
+ projectDir?: string | null;
69
+ }
70
+
71
+ // ============================================================================
72
+ // Pipeline Result
73
+ // ============================================================================
74
+
75
+ export interface PipelineResult {
76
+ /** Messages ready for the API (role + content only) */
77
+ messages: ApiMessage[];
78
+ /** System prompt string */
79
+ systemPrompt: string;
80
+ /** Model info extracted from session */
81
+ modelInfo?: { provider: string; modelId: string };
82
+ /** Number of messages before normalization */
83
+ rawMessageCount: number;
84
+ /** Session CWD (extracted from messages) */
85
+ sessionCwd: string;
86
+ }
87
+
88
+ // ============================================================================
89
+ // Full Pipeline
90
+ // ============================================================================
91
+
92
+ /**
93
+ * Run the full pipeline on a session file.
94
+ * Returns API-ready messages + system prompt.
95
+ */
96
+ export function processSessionFile(sessionFile: string, opts: PipelineOptions = {}): PipelineResult {
97
+ const content = readFileSync(sessionFile, "utf-8");
98
+ return processSessionContent(content, sessionFile, opts);
99
+ }
100
+
101
+ /**
102
+ * Run the full pipeline on session content (string).
103
+ */
104
+ export function processSessionContent(
105
+ content: string,
106
+ sessionFile?: string,
107
+ opts: PipelineOptions = {},
108
+ ): PipelineResult {
109
+ const parsed = parseSession(content);
110
+ return processSession(parsed, sessionFile, opts);
111
+ }
112
+
113
+ /**
114
+ * Run the full pipeline on a parsed session.
115
+ */
116
+ export function processSession(
117
+ parsed: ParsedSession,
118
+ sessionFile?: string,
119
+ opts: PipelineOptions = {},
120
+ ): PipelineResult {
121
+ // 1. Build raw chain (relink + tree walk)
122
+ const chain = buildRawChain(parsed);
123
+ if (chain.length === 0) {
124
+ return {
125
+ messages: [],
126
+ systemPrompt: "",
127
+ rawMessageCount: 0,
128
+ sessionCwd: opts.cwd ?? process.cwd(),
129
+ };
130
+ }
131
+
132
+ // Extract CWD from session messages
133
+ const sessionCwd = opts.cwd ?? extractCwd(chain) ?? process.cwd();
134
+
135
+ // Extract model info
136
+ const modelInfo = extractModelInfo(chain);
137
+
138
+ // 2. Slice from last boundary
139
+ let messages = sliceFromBoundary(chain);
140
+
141
+ // 3. Strip internal fields
142
+ messages = stripInternalFields(messages);
143
+
144
+ // 4. Normalize (filter, convert, merge, post-transform)
145
+ messages = normalizeMessages(messages);
146
+
147
+ // 5. Content replacements
148
+ if (opts.applyReplacements !== false) {
149
+ messages = applyContentReplacements(messages, parsed, sessionFile);
150
+ }
151
+
152
+ // 6. Microcompact
153
+ if (opts.applyMicrocompact !== false) {
154
+ messages = microcompact(
155
+ messages,
156
+ opts.microcompactGapMinutes ?? 30,
157
+ opts.microcompactKeepTurns ?? 2,
158
+ );
159
+ }
160
+
161
+ // 7. Inject userContext
162
+ if (opts.injectContext !== false) {
163
+ messages = injectUserContext(messages, sessionCwd);
164
+ }
165
+
166
+ // 8. Build system prompt (full parity with CC's U2)
167
+ const toolNames = extractToolNames(messages);
168
+ const systemPrompt = opts.skipSystemPrompt
169
+ ? ""
170
+ : buildSystemPromptString({
171
+ cwd: sessionCwd,
172
+ modelId: modelInfo?.modelId,
173
+ toolNames,
174
+ projectDir: opts.projectDir ?? null,
175
+ });
176
+
177
+ // 9. Convert to API format
178
+ const apiMessages = toApiMessages(messages);
179
+
180
+ return {
181
+ messages: apiMessages,
182
+ systemPrompt,
183
+ modelInfo,
184
+ rawMessageCount: chain.length,
185
+ sessionCwd,
186
+ };
187
+ }
188
+
189
+ // ============================================================================
190
+ // Compaction Window Pipeline
191
+ // ============================================================================
192
+
193
+ export interface WindowPipelineResult extends PipelineResult {
194
+ windowIndex: number;
195
+ }
196
+
197
+ /**
198
+ * Process a session into compaction windows, each independently queryable.
199
+ */
200
+ export function processSessionWindows(
201
+ sessionFile: string,
202
+ opts: PipelineOptions = {},
203
+ ): WindowPipelineResult[] {
204
+ const content = readFileSync(sessionFile, "utf-8");
205
+ const parsed = parseSession(content);
206
+ const chain = buildRawChain(parsed);
207
+
208
+ if (chain.length === 0) return [];
209
+
210
+ const windows = getCompactionWindows(chain);
211
+ const sessionCwd = opts.cwd ?? extractCwd(chain) ?? process.cwd();
212
+
213
+ return windows.map((window) => {
214
+ let messages = stripInternalFields(window.messages);
215
+ messages = normalizeMessages(messages);
216
+
217
+ if (opts.applyReplacements !== false) {
218
+ messages = applyContentReplacements(messages, parsed, sessionFile);
219
+ }
220
+
221
+ if (opts.applyMicrocompact !== false) {
222
+ messages = microcompact(
223
+ messages,
224
+ opts.microcompactGapMinutes ?? 30,
225
+ opts.microcompactKeepTurns ?? 2,
226
+ );
227
+ }
228
+
229
+ if (opts.injectContext !== false) {
230
+ messages = injectUserContext(messages, sessionCwd);
231
+ }
232
+
233
+ const modelInfo = window.modelInfo;
234
+ const systemPrompt = opts.skipSystemPrompt
235
+ ? ""
236
+ : buildSystemPromptString({
237
+ cwd: sessionCwd,
238
+ modelId: modelInfo?.modelId,
239
+ modelName: modelInfo?.modelId,
240
+ });
241
+
242
+ return {
243
+ windowIndex: window.windowIndex,
244
+ messages: toApiMessages(messages),
245
+ systemPrompt,
246
+ modelInfo,
247
+ rawMessageCount: window.messages.length,
248
+ sessionCwd,
249
+ };
250
+ });
251
+ }
252
+
253
+ // ============================================================================
254
+ // Helpers
255
+ // ============================================================================
256
+
257
+ function extractCwd(chain: CCMessage[]): string | undefined {
258
+ // Try to find cwd from messages (most messages have a cwd field)
259
+ for (const msg of chain) {
260
+ if (msg.cwd) return msg.cwd;
261
+ }
262
+ return undefined;
263
+ }
264
+
265
+ function extractModelInfo(chain: CCMessage[]): { provider: string; modelId: string } | undefined {
266
+ // Find the last assistant message with a model
267
+ for (let i = chain.length - 1; i >= 0; i--) {
268
+ const msg = chain[i];
269
+ if (msg.type === "assistant" && msg.message.model) {
270
+ return { provider: "anthropic", modelId: msg.message.model };
271
+ }
272
+ }
273
+ return undefined;
274
+ }