@arcreflex/agent-transcripts 0.1.4 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/publish.yml +5 -0
- package/CLAUDE.md +10 -0
- package/README.md +44 -5
- package/package.json +1 -1
- package/src/adapters/claude-code.ts +97 -1
- package/src/cli.ts +45 -75
- package/src/convert.ts +126 -0
- package/src/parse.ts +12 -106
- package/src/render.ts +20 -85
- package/src/sync.ts +109 -103
- package/src/types.ts +11 -2
- package/src/utils/naming.ts +30 -143
- package/src/utils/provenance.ts +228 -0
- package/test/snapshots.test.ts +39 -33
|
@@ -9,10 +9,15 @@ jobs:
|
|
|
9
9
|
publish:
|
|
10
10
|
runs-on: ubuntu-latest
|
|
11
11
|
permissions:
|
|
12
|
+
contents: read
|
|
12
13
|
id-token: write
|
|
13
14
|
steps:
|
|
14
15
|
- uses: actions/checkout@v4
|
|
15
16
|
|
|
17
|
+
- uses: actions/setup-node@v4
|
|
18
|
+
with:
|
|
19
|
+
node-version: "24"
|
|
20
|
+
|
|
16
21
|
- uses: oven-sh/setup-bun@v2
|
|
17
22
|
|
|
18
23
|
- run: bun install
|
package/CLAUDE.md
ADDED
package/README.md
CHANGED
|
@@ -13,12 +13,16 @@ CLI tool that transforms AI coding agent session files into readable transcripts
|
|
|
13
13
|
```
|
|
14
14
|
src/
|
|
15
15
|
cli.ts # CLI entry point, subcommand routing
|
|
16
|
-
parse.ts # Source → intermediate
|
|
17
|
-
render.ts # Intermediate
|
|
16
|
+
parse.ts # Source → intermediate format
|
|
17
|
+
render.ts # Intermediate format → markdown
|
|
18
|
+
convert.ts # Full pipeline with provenance tracking
|
|
18
19
|
sync.ts # Batch sync sessions → markdown
|
|
19
20
|
types.ts # Core types (Transcript, Message, Adapter)
|
|
20
21
|
adapters/ # Source format adapters (currently: claude-code)
|
|
21
|
-
utils/
|
|
22
|
+
utils/
|
|
23
|
+
naming.ts # Deterministic output file naming
|
|
24
|
+
provenance.ts # Source tracking via transcripts.json + YAML front matter
|
|
25
|
+
summary.ts # Tool call summary extraction
|
|
22
26
|
test/
|
|
23
27
|
fixtures/ # Snapshot test inputs/outputs
|
|
24
28
|
snapshots.test.ts
|
|
@@ -32,19 +36,54 @@ bun run test # snapshot tests
|
|
|
32
36
|
bun run format # auto-format
|
|
33
37
|
```
|
|
34
38
|
|
|
39
|
+
## CLI Usage
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
# Subcommands (convert is default if omitted)
|
|
43
|
+
agent-transcripts convert <file> # Parse and render to stdout
|
|
44
|
+
agent-transcripts convert <file> -o <dir> # Parse and render to directory
|
|
45
|
+
agent-transcripts sync <dir> -o <out> # Batch sync sessions
|
|
46
|
+
|
|
47
|
+
# Use "-" for stdin
|
|
48
|
+
cat session.jsonl | agent-transcripts -
|
|
49
|
+
```
|
|
50
|
+
|
|
35
51
|
## Architecture
|
|
36
52
|
|
|
37
|
-
Two-stage pipeline: Parse (source →
|
|
53
|
+
Two-stage pipeline: Parse (source → intermediate) → Render (intermediate → markdown).
|
|
38
54
|
|
|
39
55
|
- Adapters handle source formats (see `src/adapters/index.ts` for registry)
|
|
40
56
|
- Auto-detection: paths containing `.claude/` → claude-code adapter
|
|
41
57
|
- Branching conversations preserved via `parentMessageRef` on messages
|
|
58
|
+
- Provenance tracking via `transcripts.json` index + YAML front matter
|
|
59
|
+
- Deterministic naming: `{datetime}-{sessionId}.md`
|
|
60
|
+
- Sync uses sessions-index.json for discovery (claude-code), skipping subagent files
|
|
61
|
+
- Sync uses mtime via index to skip unchanged sources
|
|
62
|
+
|
|
63
|
+
### transcripts.json
|
|
64
|
+
|
|
65
|
+
The index file tracks the relationship between source files and outputs:
|
|
66
|
+
|
|
67
|
+
```typescript
|
|
68
|
+
interface TranscriptsIndex {
|
|
69
|
+
version: 1;
|
|
70
|
+
entries: {
|
|
71
|
+
[outputFilename: string]: {
|
|
72
|
+
source: string; // absolute path to source
|
|
73
|
+
sourceMtime: number; // ms since epoch
|
|
74
|
+
sessionId: string; // full session ID from filename
|
|
75
|
+
segmentIndex?: number; // for multi-transcript sources (1-indexed)
|
|
76
|
+
syncedAt: string; // ISO timestamp
|
|
77
|
+
};
|
|
78
|
+
};
|
|
79
|
+
}
|
|
80
|
+
```
|
|
42
81
|
|
|
43
82
|
## Key Types
|
|
44
83
|
|
|
45
84
|
- `Transcript`: source info, warnings, messages array
|
|
46
85
|
- `Message`: union of UserMessage | AssistantMessage | SystemMessage | ToolCallGroup | ErrorMessage
|
|
47
|
-
- `Adapter`: name,
|
|
86
|
+
- `Adapter`: name, discover function, parse function
|
|
48
87
|
|
|
49
88
|
## Adding an Adapter
|
|
50
89
|
|
package/package.json
CHANGED
|
@@ -4,8 +4,12 @@
|
|
|
4
4
|
* Parses session files from ~/.claude/projects/{project}/sessions/{session}.jsonl
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
|
+
import { Glob } from "bun";
|
|
8
|
+
import { basename, join, relative } from "path";
|
|
9
|
+
import { stat } from "fs/promises";
|
|
7
10
|
import type {
|
|
8
11
|
Adapter,
|
|
12
|
+
DiscoveredSession,
|
|
9
13
|
Transcript,
|
|
10
14
|
Message,
|
|
11
15
|
Warning,
|
|
@@ -13,6 +17,21 @@ import type {
|
|
|
13
17
|
} from "../types.ts";
|
|
14
18
|
import { extractToolSummary } from "../utils/summary.ts";
|
|
15
19
|
|
|
20
|
+
/**
|
|
21
|
+
* Claude Code sessions-index.json structure.
|
|
22
|
+
*/
|
|
23
|
+
interface SessionsIndex {
|
|
24
|
+
version: number;
|
|
25
|
+
entries: SessionIndexEntry[];
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
interface SessionIndexEntry {
|
|
29
|
+
sessionId: string;
|
|
30
|
+
fullPath: string;
|
|
31
|
+
fileMtime: number;
|
|
32
|
+
isSidechain: boolean;
|
|
33
|
+
}
|
|
34
|
+
|
|
16
35
|
// Claude Code JSONL record types
|
|
17
36
|
interface ClaudeRecord {
|
|
18
37
|
type: string;
|
|
@@ -333,9 +352,86 @@ function transformConversation(
|
|
|
333
352
|
};
|
|
334
353
|
}
|
|
335
354
|
|
|
355
|
+
/**
|
|
356
|
+
* Discover sessions from sessions-index.json.
|
|
357
|
+
* Returns undefined if index doesn't exist or is invalid.
|
|
358
|
+
*/
|
|
359
|
+
async function discoverFromIndex(
|
|
360
|
+
source: string,
|
|
361
|
+
): Promise<DiscoveredSession[] | undefined> {
|
|
362
|
+
const indexPath = join(source, "sessions-index.json");
|
|
363
|
+
|
|
364
|
+
try {
|
|
365
|
+
const content = await Bun.file(indexPath).text();
|
|
366
|
+
const index: SessionsIndex = JSON.parse(content);
|
|
367
|
+
|
|
368
|
+
if (index.version !== 1 || !Array.isArray(index.entries)) {
|
|
369
|
+
return undefined;
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
const sessions: DiscoveredSession[] = [];
|
|
373
|
+
|
|
374
|
+
for (const entry of index.entries) {
|
|
375
|
+
// Skip sidechains (subagents)
|
|
376
|
+
if (entry.isSidechain) continue;
|
|
377
|
+
|
|
378
|
+
// Verify the file exists and get current mtime
|
|
379
|
+
try {
|
|
380
|
+
const fileStat = await stat(entry.fullPath);
|
|
381
|
+
sessions.push({
|
|
382
|
+
path: entry.fullPath,
|
|
383
|
+
relativePath:
|
|
384
|
+
relative(source, entry.fullPath) || basename(entry.fullPath),
|
|
385
|
+
mtime: fileStat.mtime.getTime(),
|
|
386
|
+
});
|
|
387
|
+
} catch {
|
|
388
|
+
// Skip files that no longer exist
|
|
389
|
+
}
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
return sessions;
|
|
393
|
+
} catch {
|
|
394
|
+
// Index doesn't exist or is invalid
|
|
395
|
+
return undefined;
|
|
396
|
+
}
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
/**
|
|
400
|
+
* Discover sessions via glob pattern fallback.
|
|
401
|
+
*/
|
|
402
|
+
async function discoverByGlob(source: string): Promise<DiscoveredSession[]> {
|
|
403
|
+
const sessions: DiscoveredSession[] = [];
|
|
404
|
+
const glob = new Glob("**/*.jsonl");
|
|
405
|
+
|
|
406
|
+
for await (const file of glob.scan({ cwd: source, absolute: false })) {
|
|
407
|
+
// Skip files in subagents directories
|
|
408
|
+
if (file.includes("/subagents/")) continue;
|
|
409
|
+
|
|
410
|
+
const fullPath = join(source, file);
|
|
411
|
+
|
|
412
|
+
try {
|
|
413
|
+
const fileStat = await stat(fullPath);
|
|
414
|
+
sessions.push({
|
|
415
|
+
path: fullPath,
|
|
416
|
+
relativePath: file,
|
|
417
|
+
mtime: fileStat.mtime.getTime(),
|
|
418
|
+
});
|
|
419
|
+
} catch {
|
|
420
|
+
// Skip files we can't stat
|
|
421
|
+
}
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
return sessions;
|
|
425
|
+
}
|
|
426
|
+
|
|
336
427
|
export const claudeCodeAdapter: Adapter = {
|
|
337
428
|
name: "claude-code",
|
|
338
|
-
|
|
429
|
+
|
|
430
|
+
async discover(source: string): Promise<DiscoveredSession[]> {
|
|
431
|
+
// Try index-based discovery first, fall back to glob
|
|
432
|
+
const fromIndex = await discoverFromIndex(source);
|
|
433
|
+
return fromIndex ?? (await discoverByGlob(source));
|
|
434
|
+
},
|
|
339
435
|
|
|
340
436
|
parse(content: string, sourcePath: string): Transcript[] {
|
|
341
437
|
const { records, warnings } = parseJsonl(content);
|
package/src/cli.ts
CHANGED
|
@@ -12,25 +12,23 @@ import {
|
|
|
12
12
|
positional,
|
|
13
13
|
flag,
|
|
14
14
|
} from "cmd-ts";
|
|
15
|
-
import {
|
|
16
|
-
import {
|
|
15
|
+
import { parseToTranscripts } from "./parse.ts";
|
|
16
|
+
import { renderTranscript } from "./render.ts";
|
|
17
17
|
import { sync } from "./sync.ts";
|
|
18
|
-
|
|
19
|
-
// Read OpenRouter API key from environment for LLM-based slug generation
|
|
20
|
-
const OPENROUTER_API_KEY = process.env.OPENROUTER_API_KEY;
|
|
18
|
+
import { convertToDirectory } from "./convert.ts";
|
|
21
19
|
|
|
22
20
|
// Shared options
|
|
23
21
|
const inputArg = positional({
|
|
24
|
-
type:
|
|
22
|
+
type: string,
|
|
25
23
|
displayName: "file",
|
|
26
|
-
description: "Input file (
|
|
24
|
+
description: "Input file (use - for stdin)",
|
|
27
25
|
});
|
|
28
26
|
|
|
29
27
|
const outputOpt = option({
|
|
30
28
|
type: optional(string),
|
|
31
29
|
long: "output",
|
|
32
30
|
short: "o",
|
|
33
|
-
description: "Output
|
|
31
|
+
description: "Output directory (prints to stdout if not specified)",
|
|
34
32
|
});
|
|
35
33
|
|
|
36
34
|
const adapterOpt = option({
|
|
@@ -46,46 +44,6 @@ const headOpt = option({
|
|
|
46
44
|
description: "Render branch ending at this message ID (default: latest)",
|
|
47
45
|
});
|
|
48
46
|
|
|
49
|
-
// Parse subcommand
|
|
50
|
-
const parseCmd = command({
|
|
51
|
-
name: "parse",
|
|
52
|
-
description: "Parse source format to intermediate JSON",
|
|
53
|
-
args: {
|
|
54
|
-
input: inputArg,
|
|
55
|
-
output: outputOpt,
|
|
56
|
-
adapter: adapterOpt,
|
|
57
|
-
},
|
|
58
|
-
async handler({ input, output, adapter }) {
|
|
59
|
-
const naming = OPENROUTER_API_KEY
|
|
60
|
-
? { apiKey: OPENROUTER_API_KEY }
|
|
61
|
-
: undefined;
|
|
62
|
-
|
|
63
|
-
if (output) {
|
|
64
|
-
await parse({ input, output, adapter, naming });
|
|
65
|
-
} else {
|
|
66
|
-
// Print JSONL to stdout (one transcript per line)
|
|
67
|
-
const { transcripts } = await parseToTranscripts({ input, adapter });
|
|
68
|
-
for (const transcript of transcripts) {
|
|
69
|
-
console.log(JSON.stringify(transcript));
|
|
70
|
-
}
|
|
71
|
-
}
|
|
72
|
-
},
|
|
73
|
-
});
|
|
74
|
-
|
|
75
|
-
// Render subcommand
|
|
76
|
-
const renderCmd = command({
|
|
77
|
-
name: "render",
|
|
78
|
-
description: "Render intermediate JSON to markdown",
|
|
79
|
-
args: {
|
|
80
|
-
input: inputArg,
|
|
81
|
-
output: outputOpt,
|
|
82
|
-
head: headOpt,
|
|
83
|
-
},
|
|
84
|
-
async handler({ input, output, head }) {
|
|
85
|
-
await render({ input, output, head });
|
|
86
|
-
},
|
|
87
|
-
});
|
|
88
|
-
|
|
89
47
|
// Sync subcommand
|
|
90
48
|
const syncCmd = command({
|
|
91
49
|
name: "sync",
|
|
@@ -100,7 +58,7 @@ const syncCmd = command({
|
|
|
100
58
|
type: string,
|
|
101
59
|
long: "output",
|
|
102
60
|
short: "o",
|
|
103
|
-
description: "Output directory
|
|
61
|
+
description: "Output directory for transcripts",
|
|
104
62
|
}),
|
|
105
63
|
force: flag({
|
|
106
64
|
long: "force",
|
|
@@ -118,10 +76,17 @@ const syncCmd = command({
|
|
|
118
76
|
},
|
|
119
77
|
});
|
|
120
78
|
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
79
|
+
/**
|
|
80
|
+
* Check if output looks like a directory (no extension) vs a specific file.
|
|
81
|
+
*/
|
|
82
|
+
function isDirectoryOutput(output: string): boolean {
|
|
83
|
+
return !output.match(/\.\w+$/);
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
// Convert subcommand: full pipeline (parse → render) - the default
|
|
87
|
+
const convertCmd = command({
|
|
88
|
+
name: "convert",
|
|
89
|
+
description: "Full pipeline: parse source and render to markdown (default)",
|
|
125
90
|
args: {
|
|
126
91
|
input: inputArg,
|
|
127
92
|
output: outputOpt,
|
|
@@ -129,19 +94,22 @@ const defaultCmd = command({
|
|
|
129
94
|
head: headOpt,
|
|
130
95
|
},
|
|
131
96
|
async handler({ input, output, adapter, head }) {
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
97
|
+
if (output && isDirectoryOutput(output)) {
|
|
98
|
+
// Directory output: use provenance tracking
|
|
99
|
+
await convertToDirectory({
|
|
100
|
+
input,
|
|
101
|
+
outputDir: output,
|
|
102
|
+
adapter,
|
|
103
|
+
head,
|
|
104
|
+
});
|
|
105
|
+
} else if (output) {
|
|
106
|
+
// Explicit file output: not supported anymore (use directory)
|
|
107
|
+
console.error(
|
|
108
|
+
"Error: Explicit file output not supported. Use a directory path instead.",
|
|
109
|
+
);
|
|
110
|
+
process.exit(1);
|
|
143
111
|
} else {
|
|
144
|
-
//
|
|
112
|
+
// No output: stream to stdout
|
|
145
113
|
const { transcripts } = await parseToTranscripts({ input, adapter });
|
|
146
114
|
for (let i = 0; i < transcripts.length; i++) {
|
|
147
115
|
if (i > 0) console.log(); // blank line between transcripts
|
|
@@ -151,25 +119,27 @@ const defaultCmd = command({
|
|
|
151
119
|
},
|
|
152
120
|
});
|
|
153
121
|
|
|
122
|
+
const SUBCOMMANDS = ["convert", "sync"] as const;
|
|
123
|
+
|
|
154
124
|
// Main CLI with subcommands
|
|
155
125
|
const cli = subcommands({
|
|
156
126
|
name: "agent-transcripts",
|
|
157
127
|
description: "Transform agent session files to readable transcripts",
|
|
158
128
|
cmds: {
|
|
159
|
-
|
|
160
|
-
render: renderCmd,
|
|
129
|
+
convert: convertCmd,
|
|
161
130
|
sync: syncCmd,
|
|
162
131
|
},
|
|
163
|
-
// Default command when no subcommand is specified
|
|
164
132
|
});
|
|
165
133
|
|
|
166
134
|
// Run CLI
|
|
167
135
|
const args = process.argv.slice(2);
|
|
168
136
|
|
|
169
|
-
//
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
137
|
+
// If first arg isn't a subcommand (and isn't a help flag), prepend "convert" as the default
|
|
138
|
+
const isSubcommand =
|
|
139
|
+
args.length > 0 &&
|
|
140
|
+
SUBCOMMANDS.includes(args[0] as (typeof SUBCOMMANDS)[number]);
|
|
141
|
+
const isHelpFlag =
|
|
142
|
+
args.length === 0 || args[0] === "--help" || args[0] === "-h";
|
|
143
|
+
const effectiveArgs = isSubcommand || isHelpFlag ? args : ["convert", ...args];
|
|
144
|
+
|
|
145
|
+
run(cli, effectiveArgs);
|
package/src/convert.ts
ADDED
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Convert command: full pipeline with provenance tracking.
|
|
3
|
+
*
|
|
4
|
+
* When output is a directory, uses provenance tracking via transcripts.json
|
|
5
|
+
* index to manage output files.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { join } from "path";
|
|
9
|
+
import { mkdir } from "fs/promises";
|
|
10
|
+
import { parseToTranscripts } from "./parse.ts";
|
|
11
|
+
import { renderTranscript } from "./render.ts";
|
|
12
|
+
import { generateOutputName, extractSessionId } from "./utils/naming.ts";
|
|
13
|
+
import {
|
|
14
|
+
loadIndex,
|
|
15
|
+
saveIndex,
|
|
16
|
+
removeEntriesForSource,
|
|
17
|
+
restoreEntries,
|
|
18
|
+
deleteOutputFiles,
|
|
19
|
+
setEntry,
|
|
20
|
+
normalizeSourcePath,
|
|
21
|
+
} from "./utils/provenance.ts";
|
|
22
|
+
|
|
23
|
+
export interface ConvertToDirectoryOptions {
|
|
24
|
+
input: string;
|
|
25
|
+
outputDir: string;
|
|
26
|
+
adapter?: string;
|
|
27
|
+
head?: string;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Convert source file to markdown in output directory.
|
|
32
|
+
* Uses provenance tracking to replace existing outputs.
|
|
33
|
+
*/
|
|
34
|
+
export async function convertToDirectory(
|
|
35
|
+
options: ConvertToDirectoryOptions,
|
|
36
|
+
): Promise<void> {
|
|
37
|
+
const { input, outputDir, adapter, head } = options;
|
|
38
|
+
|
|
39
|
+
// Ensure output directory exists
|
|
40
|
+
await mkdir(outputDir, { recursive: true });
|
|
41
|
+
|
|
42
|
+
// Parse input to transcripts
|
|
43
|
+
const { transcripts, inputPath } = await parseToTranscripts({
|
|
44
|
+
input,
|
|
45
|
+
adapter,
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
// Normalize source path for consistent index keys
|
|
49
|
+
const sourcePath = normalizeSourcePath(inputPath);
|
|
50
|
+
|
|
51
|
+
// Load index and handle existing outputs
|
|
52
|
+
const index = await loadIndex(outputDir);
|
|
53
|
+
|
|
54
|
+
// Remove old entries (save for restoration on error)
|
|
55
|
+
const removedEntries =
|
|
56
|
+
sourcePath !== "<stdin>" ? removeEntriesForSource(index, sourcePath) : [];
|
|
57
|
+
|
|
58
|
+
// Get source mtime for index entry
|
|
59
|
+
let sourceMtime = Date.now();
|
|
60
|
+
if (sourcePath !== "<stdin>") {
|
|
61
|
+
try {
|
|
62
|
+
const stat = await Bun.file(sourcePath).stat();
|
|
63
|
+
if (stat) {
|
|
64
|
+
sourceMtime = stat.mtime.getTime();
|
|
65
|
+
}
|
|
66
|
+
} catch {
|
|
67
|
+
// Use current time as fallback
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
const sessionId = extractSessionId(inputPath);
|
|
72
|
+
const newOutputs: string[] = [];
|
|
73
|
+
|
|
74
|
+
try {
|
|
75
|
+
// Generate fresh outputs
|
|
76
|
+
for (let i = 0; i < transcripts.length; i++) {
|
|
77
|
+
const transcript = transcripts[i];
|
|
78
|
+
const segmentIndex = transcripts.length > 1 ? i + 1 : undefined;
|
|
79
|
+
|
|
80
|
+
// Generate deterministic name
|
|
81
|
+
const baseName = generateOutputName(transcript, inputPath);
|
|
82
|
+
const suffix = segmentIndex ? `_${segmentIndex}` : "";
|
|
83
|
+
const relativePath = `${baseName}${suffix}.md`;
|
|
84
|
+
const outputPath = join(outputDir, relativePath);
|
|
85
|
+
|
|
86
|
+
// Render with provenance front matter
|
|
87
|
+
const markdown = renderTranscript(transcript, {
|
|
88
|
+
head,
|
|
89
|
+
sourcePath: sourcePath !== "<stdin>" ? sourcePath : undefined,
|
|
90
|
+
});
|
|
91
|
+
await Bun.write(outputPath, markdown);
|
|
92
|
+
newOutputs.push(relativePath);
|
|
93
|
+
|
|
94
|
+
// Update index (only for non-stdin sources)
|
|
95
|
+
if (sourcePath !== "<stdin>") {
|
|
96
|
+
setEntry(index, relativePath, {
|
|
97
|
+
source: sourcePath,
|
|
98
|
+
sourceMtime,
|
|
99
|
+
sessionId,
|
|
100
|
+
segmentIndex,
|
|
101
|
+
syncedAt: new Date().toISOString(),
|
|
102
|
+
});
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
console.error(`Wrote: ${outputPath}`);
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
// Success: delete old output files (after new ones are written)
|
|
109
|
+
const oldFilenames = removedEntries.map((e) => e.filename);
|
|
110
|
+
const toDelete = oldFilenames.filter((f) => !newOutputs.includes(f));
|
|
111
|
+
if (toDelete.length > 0) {
|
|
112
|
+
await deleteOutputFiles(outputDir, toDelete);
|
|
113
|
+
}
|
|
114
|
+
} catch (error) {
|
|
115
|
+
// Clean up any newly written files before restoring old entries
|
|
116
|
+
if (newOutputs.length > 0) {
|
|
117
|
+
await deleteOutputFiles(outputDir, newOutputs);
|
|
118
|
+
}
|
|
119
|
+
// Restore old entries on error to preserve provenance
|
|
120
|
+
restoreEntries(index, removedEntries);
|
|
121
|
+
throw error;
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
// Save index
|
|
125
|
+
await saveIndex(outputDir, index);
|
|
126
|
+
}
|
package/src/parse.ts
CHANGED
|
@@ -1,32 +1,32 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Parse
|
|
2
|
+
* Parse: source format → intermediate transcript format
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
|
-
import { dirname, join } from "path";
|
|
6
|
-
import { mkdir } from "fs/promises";
|
|
7
5
|
import type { Transcript } from "./types.ts";
|
|
8
6
|
import { detectAdapter, getAdapter, listAdapters } from "./adapters/index.ts";
|
|
9
|
-
import { generateOutputName, type NamingOptions } from "./utils/naming.ts";
|
|
10
7
|
|
|
11
8
|
export interface ParseOptions {
|
|
12
|
-
input
|
|
13
|
-
output?: string; // output path/dir
|
|
9
|
+
input: string; // file path, or "-" for stdin
|
|
14
10
|
adapter?: string; // explicit adapter name
|
|
15
|
-
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export interface ParseResult {
|
|
14
|
+
transcripts: Transcript[];
|
|
15
|
+
inputPath: string;
|
|
16
16
|
}
|
|
17
17
|
|
|
18
18
|
/**
|
|
19
19
|
* Read input content from file or stdin.
|
|
20
20
|
*/
|
|
21
21
|
async function readInput(
|
|
22
|
-
input
|
|
22
|
+
input: string,
|
|
23
23
|
): Promise<{ content: string; path: string }> {
|
|
24
|
-
if (input) {
|
|
24
|
+
if (input !== "-") {
|
|
25
25
|
const content = await Bun.file(input).text();
|
|
26
26
|
return { content, path: input };
|
|
27
27
|
}
|
|
28
28
|
|
|
29
|
-
// Read from stdin
|
|
29
|
+
// Read from stdin (when input is "-")
|
|
30
30
|
const chunks: string[] = [];
|
|
31
31
|
const reader = Bun.stdin.stream().getReader();
|
|
32
32
|
|
|
@@ -40,73 +40,7 @@ async function readInput(
|
|
|
40
40
|
}
|
|
41
41
|
|
|
42
42
|
/**
|
|
43
|
-
*
|
|
44
|
-
*/
|
|
45
|
-
async function getOutputPaths(
|
|
46
|
-
transcripts: Transcript[],
|
|
47
|
-
inputPath: string,
|
|
48
|
-
outputOption?: string,
|
|
49
|
-
namingOptions?: NamingOptions,
|
|
50
|
-
): Promise<string[]> {
|
|
51
|
-
// Determine output directory
|
|
52
|
-
let outputDir: string;
|
|
53
|
-
let explicitBaseName: string | undefined;
|
|
54
|
-
|
|
55
|
-
if (outputOption) {
|
|
56
|
-
// If output looks like a file (has extension), use its directory and name
|
|
57
|
-
if (outputOption.match(/\.\w+$/)) {
|
|
58
|
-
outputDir = dirname(outputOption);
|
|
59
|
-
explicitBaseName = outputOption
|
|
60
|
-
.split("/")
|
|
61
|
-
.pop()!
|
|
62
|
-
.replace(/\.\w+$/, "");
|
|
63
|
-
} else {
|
|
64
|
-
outputDir = outputOption;
|
|
65
|
-
}
|
|
66
|
-
} else {
|
|
67
|
-
outputDir = process.cwd();
|
|
68
|
-
}
|
|
69
|
-
|
|
70
|
-
// Generate paths with descriptive names
|
|
71
|
-
const paths: string[] = [];
|
|
72
|
-
|
|
73
|
-
for (let i = 0; i < transcripts.length; i++) {
|
|
74
|
-
let baseName: string;
|
|
75
|
-
|
|
76
|
-
if (explicitBaseName) {
|
|
77
|
-
// User provided explicit filename
|
|
78
|
-
baseName = explicitBaseName;
|
|
79
|
-
} else {
|
|
80
|
-
// Generate descriptive name
|
|
81
|
-
baseName = await generateOutputName(
|
|
82
|
-
transcripts[i],
|
|
83
|
-
inputPath,
|
|
84
|
-
namingOptions || {},
|
|
85
|
-
);
|
|
86
|
-
}
|
|
87
|
-
|
|
88
|
-
// Add suffix for multiple transcripts
|
|
89
|
-
if (transcripts.length > 1) {
|
|
90
|
-
baseName = `${baseName}_${i + 1}`;
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
paths.push(join(outputDir, `${baseName}.json`));
|
|
94
|
-
}
|
|
95
|
-
|
|
96
|
-
return paths;
|
|
97
|
-
}
|
|
98
|
-
|
|
99
|
-
export interface ParseResult {
|
|
100
|
-
transcripts: Transcript[];
|
|
101
|
-
inputPath: string;
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
export interface ParseAndWriteResult extends ParseResult {
|
|
105
|
-
outputPaths: string[];
|
|
106
|
-
}
|
|
107
|
-
|
|
108
|
-
/**
|
|
109
|
-
* Parse source file(s) to transcripts (no file I/O beyond reading input).
|
|
43
|
+
* Parse source file(s) to transcripts.
|
|
110
44
|
*/
|
|
111
45
|
export async function parseToTranscripts(
|
|
112
46
|
options: ParseOptions,
|
|
@@ -115,7 +49,7 @@ export async function parseToTranscripts(
|
|
|
115
49
|
|
|
116
50
|
// Determine adapter
|
|
117
51
|
let adapterName = options.adapter;
|
|
118
|
-
if (!adapterName && options.input) {
|
|
52
|
+
if (!adapterName && options.input !== "-") {
|
|
119
53
|
adapterName = detectAdapter(options.input);
|
|
120
54
|
}
|
|
121
55
|
|
|
@@ -135,31 +69,3 @@ export async function parseToTranscripts(
|
|
|
135
69
|
const transcripts = adapter.parse(content, inputPath);
|
|
136
70
|
return { transcripts, inputPath };
|
|
137
71
|
}
|
|
138
|
-
|
|
139
|
-
/**
|
|
140
|
-
* Parse source file(s) to intermediate JSON and write to files.
|
|
141
|
-
*/
|
|
142
|
-
export async function parse(
|
|
143
|
-
options: ParseOptions,
|
|
144
|
-
): Promise<ParseAndWriteResult> {
|
|
145
|
-
const { transcripts, inputPath } = await parseToTranscripts(options);
|
|
146
|
-
|
|
147
|
-
// Write output files
|
|
148
|
-
const outputPaths = await getOutputPaths(
|
|
149
|
-
transcripts,
|
|
150
|
-
inputPath,
|
|
151
|
-
options.output,
|
|
152
|
-
options.naming,
|
|
153
|
-
);
|
|
154
|
-
|
|
155
|
-
for (let i = 0; i < transcripts.length; i++) {
|
|
156
|
-
const json = JSON.stringify(transcripts[i], null, 2);
|
|
157
|
-
// Ensure directory exists
|
|
158
|
-
const dir = dirname(outputPaths[i]);
|
|
159
|
-
await mkdir(dir, { recursive: true });
|
|
160
|
-
await Bun.write(outputPaths[i], json);
|
|
161
|
-
console.error(`Wrote: ${outputPaths[i]}`);
|
|
162
|
-
}
|
|
163
|
-
|
|
164
|
-
return { transcripts, inputPath, outputPaths };
|
|
165
|
-
}
|