@phenx-inc/ctlsurf 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,341 @@
1
+ import fs from 'fs'
2
+ import path from 'path'
3
+ import os from 'os'
4
+
5
+ /**
6
+ * Transcript Tailer
7
+ *
8
+ * Primary chat-log source for agents that write native session transcripts
9
+ * (Claude Code, Codex CLI). Instead of scraping the TUI screen — which hard-
10
+ * wraps paragraphs at terminal width and leaks chrome — we tail the agent's
11
+ * own JSONL transcript and forward exact user/assistant messages.
12
+ *
13
+ * The ConversationBridge screen-scraper remains the fallback for agents
14
+ * without a known transcript format (plain shell, unknown CLIs).
15
+ *
16
+ * Formats:
17
+ * - Claude Code: ~/.claude/projects/<cwd-slug>/<sessionId>.jsonl
18
+ * One JSON object per line: { type: 'user'|'assistant', message, timestamp, cwd, ... }
19
+ * - Codex CLI: ~/.codex/sessions/YYYY/MM/DD/rollout-<ts>-<id>.jsonl
20
+ * First line is { type: 'session_meta', payload: { cwd, ... } }; chat content
21
+ * arrives as { type: 'event_msg', payload: { type: 'user_message'|'agent_message', message } }
22
+ */
23
+
24
+ export interface ChatLogEntry {
25
+ ts: string
26
+ type: 'user_input' | 'terminal_output'
27
+ content: string
28
+ }
29
+
30
+ export type ChatLogSink = (entry: ChatLogEntry) => void
31
+
32
+ export type TranscriptAgentId = 'claude' | 'codex'
33
+
34
+ export function supportsTranscriptLogging(agentId: string): agentId is TranscriptAgentId {
35
+ return agentId === 'claude' || agentId === 'codex'
36
+ }
37
+
38
+ export interface TranscriptTailerOptions {
39
+ agentId: TranscriptAgentId
40
+ cwd: string
41
+ sink: ChatLogSink
42
+ /** Override transcript roots (for tests). */
43
+ claudeProjectsDir?: string
44
+ codexSessionsDir?: string
45
+ }
46
+
47
+ const POLL_INTERVAL_MS = 1_000
48
+ const DISCOVERY_SLACK_MS = 10_000
49
+ const READ_CHUNK_BYTES = 64 * 1024
50
+ const MAX_ENTRY_CHARS = 20_000
51
+
52
+ interface FileTail {
53
+ offset: number
54
+ remainder: string
55
+ /** Codex file whose session_meta cwd belongs to a different project. */
56
+ excluded: boolean
57
+ }
58
+
59
+ export class TranscriptTailer {
60
+ private readonly agentId: TranscriptAgentId
61
+ private readonly cwd: string
62
+ private readonly sink: ChatLogSink
63
+ private readonly claudeProjectsDir: string
64
+ private readonly codexSessionsDir: string
65
+
66
+ private files = new Map<string, FileTail>()
67
+ private pollTimer: ReturnType<typeof setInterval> | null = null
68
+ private sinceMs = 0
69
+
70
+ constructor(options: TranscriptTailerOptions) {
71
+ this.agentId = options.agentId
72
+ this.cwd = stripTrailingSep(options.cwd)
73
+ this.sink = options.sink
74
+ this.claudeProjectsDir = options.claudeProjectsDir || path.join(os.homedir(), '.claude', 'projects')
75
+ this.codexSessionsDir = options.codexSessionsDir || path.join(os.homedir(), '.codex', 'sessions')
76
+ }
77
+
78
+ start(): void {
79
+ if (this.pollTimer) return
80
+ this.sinceMs = Date.now()
81
+ this.files.clear()
82
+ this.pollTimer = setInterval(() => this.poll(), POLL_INTERVAL_MS)
83
+ console.log(`[transcripts] Tailing ${this.agentId} transcripts for ${this.cwd}`)
84
+ }
85
+
86
+ stop(): void {
87
+ if (!this.pollTimer) return
88
+ clearInterval(this.pollTimer)
89
+ this.pollTimer = null
90
+ // Final drain so messages written just before exit aren't lost.
91
+ this.poll()
92
+ this.files.clear()
93
+ console.log('[transcripts] Stopped')
94
+ }
95
+
96
+ private poll(): void {
97
+ try {
98
+ this.discover()
99
+ for (const [filePath, tail] of this.files) {
100
+ if (!tail.excluded) this.drainFile(filePath, tail)
101
+ }
102
+ } catch (err) {
103
+ console.error('[transcripts] Poll error:', err)
104
+ }
105
+ }
106
+
107
+ // ─── Discovery ──────────────────────────────────
108
+
109
+ /**
110
+ * Track every transcript file with recent activity, not just the first
111
+ * match: /clear (Claude) or /new (Codex) starts a new session file in the
112
+ * middle of one PTY run, and tailing all active candidates handles the
113
+ * switch without special cases. Old idle files never match (stale mtime).
114
+ */
115
+ private discover(): void {
116
+ const dirs = this.agentId === 'claude' ? [this.claudeProjectsDirForCwd()] : this.codexDateDirs()
117
+
118
+ for (const dir of dirs) {
119
+ let names: string[]
120
+ try {
121
+ names = fs.readdirSync(dir)
122
+ } catch {
123
+ continue // Directory may not exist until the agent writes its first message
124
+ }
125
+ for (const name of names) {
126
+ if (!name.endsWith('.jsonl')) continue
127
+ if (this.agentId === 'codex' && !name.startsWith('rollout-')) continue
128
+ const filePath = path.join(dir, name)
129
+ if (this.files.has(filePath)) continue
130
+ try {
131
+ const stat = fs.statSync(filePath)
132
+ if (stat.mtimeMs >= this.sinceMs - DISCOVERY_SLACK_MS) {
133
+ this.files.set(filePath, { offset: 0, remainder: '', excluded: false })
134
+ }
135
+ } catch { /* ignore — file may have vanished */ }
136
+ }
137
+ }
138
+ }
139
+
140
+ private claudeProjectsDirForCwd(): string {
141
+ // Claude Code slugs the cwd by replacing every non-alphanumeric char with '-'
142
+ // e.g. /Users/me/Code/quick_apps/app → -Users-me-Code-quick-apps-app
143
+ const slug = this.cwd.replace(/[^a-zA-Z0-9]/g, '-')
144
+ return path.join(this.claudeProjectsDir, slug)
145
+ }
146
+
147
+ private codexDateDirs(): string[] {
148
+ // Codex groups sessions by local date; check the start date and today to
149
+ // cover sessions running across midnight.
150
+ const dirs = new Set<string>()
151
+ for (const ms of [this.sinceMs, Date.now()]) {
152
+ const d = new Date(ms)
153
+ const yyyy = String(d.getFullYear())
154
+ const mm = String(d.getMonth() + 1).padStart(2, '0')
155
+ const dd = String(d.getDate()).padStart(2, '0')
156
+ dirs.add(path.join(this.codexSessionsDir, yyyy, mm, dd))
157
+ }
158
+ return [...dirs]
159
+ }
160
+
161
+ // ─── Tailing ────────────────────────────────────
162
+
163
+ private drainFile(filePath: string, tail: FileTail): void {
164
+ let size: number
165
+ try {
166
+ size = fs.statSync(filePath).size
167
+ } catch {
168
+ return
169
+ }
170
+ if (size <= tail.offset) return
171
+
172
+ let fd: number
173
+ try {
174
+ fd = fs.openSync(filePath, 'r')
175
+ } catch {
176
+ return
177
+ }
178
+ try {
179
+ const buf = Buffer.alloc(READ_CHUNK_BYTES)
180
+ while (tail.offset < size && !tail.excluded) {
181
+ const bytesRead = fs.readSync(fd, buf, 0, READ_CHUNK_BYTES, tail.offset)
182
+ if (bytesRead <= 0) break
183
+ tail.offset += bytesRead
184
+ tail.remainder += buf.toString('utf-8', 0, bytesRead)
185
+
186
+ const lines = tail.remainder.split('\n')
187
+ tail.remainder = lines.pop() || ''
188
+ for (const line of lines) {
189
+ this.handleLine(line, tail)
190
+ if (tail.excluded) break
191
+ }
192
+ }
193
+ } catch (err) {
194
+ console.error(`[transcripts] Read error for ${filePath}:`, err)
195
+ } finally {
196
+ try { fs.closeSync(fd) } catch { /* ignore */ }
197
+ }
198
+ }
199
+
200
+ private handleLine(line: string, tail: FileTail): void {
201
+ const trimmed = line.trim()
202
+ if (!trimmed) return
203
+
204
+ let obj: any
205
+ try {
206
+ obj = JSON.parse(trimmed)
207
+ } catch {
208
+ return // tolerate partial/corrupt lines
209
+ }
210
+
211
+ const entry = this.agentId === 'claude' ? this.parseClaudeLine(obj) : this.parseCodexLine(obj, tail)
212
+ if (!entry) return
213
+
214
+ // Skip history replayed into resumed/forked session files.
215
+ const ms = Date.parse(entry.ts)
216
+ if (Number.isFinite(ms) && ms < this.sinceMs - DISCOVERY_SLACK_MS) return
217
+
218
+ this.sink({ ...entry, content: capLength(entry.content) })
219
+ }
220
+
221
+ // ─── Claude Code format ─────────────────────────
222
+
223
+ private parseClaudeLine(obj: any): ChatLogEntry | null {
224
+ if (!obj || typeof obj !== 'object') return null
225
+ if (obj.isMeta) return null
226
+ if (obj.type !== 'user' && obj.type !== 'assistant') return null
227
+ // Belt-and-braces: the slug dir already scopes to this cwd, but lines
228
+ // carry the cwd too (subagent/sidechain lines can differ).
229
+ if (typeof obj.cwd === 'string' && stripTrailingSep(obj.cwd) !== this.cwd) return null
230
+ if (obj.isSidechain) return null
231
+
232
+ const message = obj.message
233
+ if (!message) return null
234
+
235
+ const text = extractClaudeText(message.content, obj.type === 'user')
236
+ if (!text) return null
237
+
238
+ return {
239
+ ts: typeof obj.timestamp === 'string' ? obj.timestamp : new Date().toISOString(),
240
+ type: obj.type === 'user' ? 'user_input' : 'terminal_output',
241
+ content: text,
242
+ }
243
+ }
244
+
245
+ // ─── Codex CLI format ───────────────────────────
246
+
247
+ private parseCodexLine(obj: any, tail: FileTail): ChatLogEntry | null {
248
+ if (!obj || typeof obj !== 'object') return null
249
+ const payload = obj.payload
250
+
251
+ if (obj.type === 'session_meta') {
252
+ const metaCwd = payload?.cwd
253
+ if (typeof metaCwd === 'string' && stripTrailingSep(metaCwd) !== this.cwd) {
254
+ tail.excluded = true // another project's session sharing the date dir
255
+ }
256
+ return null
257
+ }
258
+
259
+ if (obj.type !== 'event_msg' || !payload || typeof payload !== 'object') return null
260
+
261
+ let type: ChatLogEntry['type']
262
+ if (payload.type === 'user_message') {
263
+ type = 'user_input'
264
+ } else if (payload.type === 'agent_message') {
265
+ type = 'terminal_output'
266
+ } else {
267
+ return null
268
+ }
269
+
270
+ const text = typeof payload.message === 'string' ? payload.message.trim() : ''
271
+ if (!text || isCodexNoise(text)) return null
272
+
273
+ return {
274
+ ts: typeof obj.timestamp === 'string' ? obj.timestamp : new Date().toISOString(),
275
+ type,
276
+ content: text,
277
+ }
278
+ }
279
+ }
280
+
281
+ // ─── Text extraction & filters ────────────────────
282
+
283
+ /** Prefixes of injected/meta user content that isn't something the user typed. */
284
+ const CLAUDE_NOISE_PREFIXES = [
285
+ '<local-command-caveat>',
286
+ '<command-name>',
287
+ '<command-message>',
288
+ '<local-command-stdout>',
289
+ '<bash-input>',
290
+ '<bash-stdout>',
291
+ '<bash-stderr>',
292
+ '<system-reminder>',
293
+ '<task-notification>',
294
+ 'caveat: the messages below',
295
+ '[request interrupted',
296
+ ]
297
+
298
+ const CODEX_NOISE_PREFIXES = [
299
+ '<environment_context>',
300
+ '<user_instructions>',
301
+ '<permissions instructions>',
302
+ '<turn_aborted>',
303
+ ]
304
+
305
+ function isClaudeNoise(text: string): boolean {
306
+ const lower = text.trimStart().toLowerCase()
307
+ return CLAUDE_NOISE_PREFIXES.some((p) => lower.startsWith(p))
308
+ }
309
+
310
+ function isCodexNoise(text: string): boolean {
311
+ const lower = text.trimStart().toLowerCase()
312
+ return CODEX_NOISE_PREFIXES.some((p) => lower.startsWith(p))
313
+ }
314
+
315
+ function extractClaudeText(content: unknown, isUser: boolean): string {
316
+ if (typeof content === 'string') {
317
+ const text = content.trim()
318
+ return text && !isClaudeNoise(text) ? text : ''
319
+ }
320
+ if (!Array.isArray(content)) return ''
321
+
322
+ // Tool results come back as user-role lines; they're not typed input.
323
+ if (isUser && content.some((b: any) => b?.type === 'tool_result')) return ''
324
+
325
+ const parts: string[] = []
326
+ for (const block of content) {
327
+ if (block?.type !== 'text' || typeof block.text !== 'string') continue
328
+ const text = block.text.trim()
329
+ if (text && !isClaudeNoise(text)) parts.push(text)
330
+ }
331
+ return parts.join('\n\n')
332
+ }
333
+
334
+ function stripTrailingSep(p: string): string {
335
+ return p.length > 1 ? p.replace(/[/\\]+$/, '') : p
336
+ }
337
+
338
+ function capLength(str: string): string {
339
+ if (str.length <= MAX_ENTRY_CHARS) return str
340
+ return str.slice(0, MAX_ENTRY_CHARS) + `… [truncated, ${str.length} total chars]`
341
+ }