@jackwener/opencli 0.7.2 → 0.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,184 @@
1
+ /**
2
+ * Reddit post reader with threaded comment tree.
3
+ *
4
+ * Replaces the original flat read.yaml with recursive comment traversal:
5
+ * - Top-K comments by score at each level
6
+ * - Configurable depth and replies-per-level
7
+ * - Indented output showing conversation threads
8
+ */
9
+ import { cli, Strategy } from '../../registry.js';
10
+ cli({
11
+ site: 'reddit',
12
+ name: 'read',
13
+ description: 'Read a Reddit post and its comments',
14
+ domain: 'reddit.com',
15
+ strategy: Strategy.COOKIE,
16
+ args: [
17
+ { name: 'post_id', required: true, help: 'Post ID (e.g. 1abc123) or full URL' },
18
+ { name: 'sort', default: 'best', help: 'Comment sort: best, top, new, controversial, old, qa' },
19
+ { name: 'limit', type: 'int', default: 25, help: 'Number of top-level comments' },
20
+ { name: 'depth', type: 'int', default: 2, help: 'Max reply depth (1=no replies, 2=one level of replies, etc.)' },
21
+ { name: 'replies', type: 'int', default: 5, help: 'Max replies shown per comment at each level (sorted by score)' },
22
+ { name: 'max_length', type: 'int', default: 2000, help: 'Max characters per comment body (min 100)' },
23
+ ],
24
+ columns: ['type', 'author', 'score', 'text'],
25
+ func: async (page, kwargs) => {
26
+ const sort = kwargs.sort ?? 'best';
27
+ const limit = Math.max(1, kwargs.limit ?? 25);
28
+ const maxDepth = Math.max(1, kwargs.depth ?? 2);
29
+ const maxReplies = Math.max(1, kwargs.replies ?? 5);
30
+ const maxLength = Math.max(100, kwargs.max_length ?? 2000);
31
+ await page.goto('https://www.reddit.com');
32
+ await page.wait(2);
33
+ const data = await page.evaluate(`
34
+ (async function() {
35
+ var postId = ${JSON.stringify(kwargs.post_id)};
36
+ var urlMatch = postId.match(/comments\\/([a-z0-9]+)/);
37
+ if (urlMatch) postId = urlMatch[1];
38
+
39
+ var sort = ${JSON.stringify(sort)};
40
+ var limit = ${limit};
41
+ var maxDepth = ${maxDepth};
42
+ var maxReplies = ${maxReplies};
43
+ var maxLength = ${maxLength};
44
+
45
+ // Request more from API than top-level limit to get inline replies
46
+ // depth param tells Reddit how deep to inline replies vs "more" stubs
47
+ var apiLimit = Math.max(limit * 3, 100);
48
+ var res = await fetch(
49
+ '/comments/' + postId + '.json?sort=' + sort + '&limit=' + apiLimit + '&depth=' + (maxDepth + 1) + '&raw_json=1',
50
+ { credentials: 'include' }
51
+ );
52
+ if (!res.ok) return { error: 'Reddit API returned HTTP ' + res.status };
53
+
54
+ var data;
55
+ try { data = await res.json(); } catch(e) { return { error: 'Failed to parse response' }; }
56
+ if (!Array.isArray(data) || data.length < 2) return { error: 'Unexpected response format' };
57
+
58
+ var results = [];
59
+
60
+ // Post
61
+ var post = data[0] && data[0].data && data[0].data.children && data[0].data.children[0] && data[0].data.children[0].data;
62
+ if (post) {
63
+ var body = post.selftext || '';
64
+ if (body.length > maxLength) body = body.slice(0, maxLength) + '\\n... [truncated]';
65
+ results.push({
66
+ type: 'POST',
67
+ author: post.author || '[deleted]',
68
+ score: post.score || 0,
69
+ text: post.title + (body ? '\\n\\n' + body : '') + (post.url && !post.is_self ? '\\n' + post.url : ''),
70
+ });
71
+ }
72
+
73
+ // Recursive comment walker
74
+ // depth 0 = top-level comments; maxDepth is exclusive,
75
+ // so --depth 1 means top-level only, --depth 2 means one reply level, etc.
76
+ function walkComment(node, depth) {
77
+ if (!node || node.kind !== 't1') return;
78
+ var d = node.data;
79
+ var body = d.body || '';
80
+ if (body.length > maxLength) body = body.slice(0, maxLength) + '...';
81
+
82
+ // Indent prefix: apply to every line so multiline bodies stay aligned
83
+ var indent = '';
84
+ for (var i = 0; i < depth; i++) indent += ' ';
85
+ var prefix = depth === 0 ? '' : indent + '> ';
86
+ var indentedBody = depth === 0
87
+ ? body
88
+ : body.split('\\n').map(function(line) { return prefix + line; }).join('\\n');
89
+
90
+ results.push({
91
+ type: depth === 0 ? 'L0' : 'L' + depth,
92
+ author: d.author || '[deleted]',
93
+ score: d.score || 0,
94
+ text: indentedBody,
95
+ });
96
+
97
+ // Count all available replies (for accurate "more" count)
98
+ var t1Children = [];
99
+ var moreCount = 0;
100
+ if (d.replies && d.replies.data && d.replies.data.children) {
101
+ var children = d.replies.data.children;
102
+ for (var i = 0; i < children.length; i++) {
103
+ if (children[i].kind === 't1') {
104
+ t1Children.push(children[i]);
105
+ } else if (children[i].kind === 'more') {
106
+ moreCount += children[i].data.count || 0;
107
+ }
108
+ }
109
+ }
110
+
111
+ // At depth cutoff: don't recurse, but show all replies as hidden
112
+ if (depth + 1 >= maxDepth) {
113
+ var totalHidden = t1Children.length + moreCount;
114
+ if (totalHidden > 0) {
115
+ var cutoffIndent = '';
116
+ for (var j = 0; j <= depth; j++) cutoffIndent += ' ';
117
+ results.push({
118
+ type: 'L' + (depth + 1),
119
+ author: '',
120
+ score: '',
121
+ text: cutoffIndent + '[+' + totalHidden + ' more replies]',
122
+ });
123
+ }
124
+ return;
125
+ }
126
+
127
+ // Sort by score descending, take top N
128
+ t1Children.sort(function(a, b) { return (b.data.score || 0) - (a.data.score || 0); });
129
+ var toProcess = Math.min(t1Children.length, maxReplies);
130
+ for (var i = 0; i < toProcess; i++) {
131
+ walkComment(t1Children[i], depth + 1);
132
+ }
133
+
134
+ // Show hidden count (skipped replies + "more" stubs)
135
+ var hidden = t1Children.length - toProcess + moreCount;
136
+ if (hidden > 0) {
137
+ var moreIndent = '';
138
+ for (var j = 0; j <= depth; j++) moreIndent += ' ';
139
+ results.push({
140
+ type: 'L' + (depth + 1),
141
+ author: '',
142
+ score: '',
143
+ text: moreIndent + '[+' + hidden + ' more replies]',
144
+ });
145
+ }
146
+ }
147
+
148
+ // Walk top-level comments
149
+ var topLevel = data[1].data.children || [];
150
+ var t1TopLevel = [];
151
+ for (var i = 0; i < topLevel.length; i++) {
152
+ if (topLevel[i].kind === 't1') t1TopLevel.push(topLevel[i]);
153
+ }
154
+
155
+ // Top-level are already sorted by Reddit (sort param), take top N
156
+ for (var i = 0; i < Math.min(t1TopLevel.length, limit); i++) {
157
+ walkComment(t1TopLevel[i], 0);
158
+ }
159
+
160
+ // Count remaining
161
+ var moreTopLevel = topLevel.filter(function(c) { return c.kind === 'more'; })
162
+ .reduce(function(sum, c) { return sum + (c.data.count || 0); }, 0);
163
+ var hiddenTopLevel = Math.max(0, t1TopLevel.length - limit) + moreTopLevel;
164
+ if (hiddenTopLevel > 0) {
165
+ results.push({
166
+ type: '',
167
+ author: '',
168
+ score: '',
169
+ text: '[+' + hiddenTopLevel + ' more top-level comments]',
170
+ });
171
+ }
172
+
173
+ return results;
174
+ })()
175
+ `);
176
+ if (!data || typeof data !== 'object')
177
+ throw new Error('Failed to fetch post data');
178
+ if (!Array.isArray(data) && data.error)
179
+ throw new Error(data.error);
180
+ if (!Array.isArray(data))
181
+ throw new Error('Unexpected response');
182
+ return data;
183
+ },
184
+ });
@@ -0,0 +1,44 @@
1
+ /**
2
+ * Transcript grouping: sentence merging, speaker detection, and chapter support.
3
+ * Ported and simplified from Defuddle's YouTube extractor.
4
+ *
5
+ * Raw segments (2-3 second fragments) are grouped into readable paragraphs:
6
+ * - Sentence boundaries: merge until sentence-ending punctuation (.!?)
7
+ * - Speaker turns: detect ">>" markers from YouTube auto-captions
8
+ * - Chapters: optional chapter headings inserted at appropriate timestamps
9
+ */
10
+ export interface RawSegment {
11
+ start: number;
12
+ end: number;
13
+ text: string;
14
+ }
15
+ export interface GroupedSegment {
16
+ start: number;
17
+ text: string;
18
+ speakerChange: boolean;
19
+ speaker?: number;
20
+ }
21
+ export interface Chapter {
22
+ title: string;
23
+ start: number;
24
+ }
25
+ /**
26
+ * Group raw transcript segments into readable blocks.
27
+ * If speaker markers (>>) are present, groups by speaker turn.
28
+ * Otherwise, groups by sentence boundaries.
29
+ */
30
+ export declare function groupTranscriptSegments(segments: {
31
+ start: number;
32
+ text: string;
33
+ }[]): GroupedSegment[];
34
+ /**
35
+ * Format grouped segments + chapters into a final text output.
36
+ */
37
+ export declare function formatGroupedTranscript(segments: GroupedSegment[], chapters?: Chapter[]): {
38
+ rows: Array<{
39
+ timestamp: string;
40
+ speaker: string;
41
+ text: string;
42
+ }>;
43
+ plainText: string;
44
+ };
@@ -0,0 +1,226 @@
1
+ /**
2
+ * Transcript grouping: sentence merging, speaker detection, and chapter support.
3
+ * Ported and simplified from Defuddle's YouTube extractor.
4
+ *
5
+ * Raw segments (2-3 second fragments) are grouped into readable paragraphs:
6
+ * - Sentence boundaries: merge until sentence-ending punctuation (.!?)
7
+ * - Speaker turns: detect ">>" markers from YouTube auto-captions
8
+ * - Chapters: optional chapter headings inserted at appropriate timestamps
9
+ */
10
+ // Include CJK sentence-ending punctuation: 。!? (fullwidth: .!?)
11
+ const SENTENCE_END = /[.!?\u3002\uFF01\uFF1F\uFF0E]["'\u2019\u201D)]*\s*$/;
12
+ const QUESTION_END = /[?\uFF1F]["'\u2019\u201D)]*\s*$/;
13
+ const TRANSCRIPT_GROUP_GAP_SECONDS = 20;
14
+ const TURN_MERGE_MAX_WORDS = 80;
15
+ const TURN_MERGE_MAX_SPAN_SECONDS = 45;
16
+ const SHORT_UTTERANCE_MAX_WORDS = 3;
17
+ const FIRST_GROUP_MERGE_MIN_WORDS = 8;
18
+ function countWords(text) {
19
+ return text.split(/\s+/).filter(Boolean).length;
20
+ }
21
+ /**
22
+ * Group raw transcript segments into readable blocks.
23
+ * If speaker markers (>>) are present, groups by speaker turn.
24
+ * Otherwise, groups by sentence boundaries.
25
+ */
26
+ export function groupTranscriptSegments(segments) {
27
+ if (segments.length === 0)
28
+ return [];
29
+ const hasSpeakerMarkers = segments.some(s => /^>>/.test(s.text));
30
+ return hasSpeakerMarkers ? groupBySpeaker(segments) : groupBySentence(segments);
31
+ }
32
+ /**
33
+ * Format grouped segments + chapters into a final text output.
34
+ */
35
+ export function formatGroupedTranscript(segments, chapters = []) {
36
+ const sortedChapters = [...chapters].sort((a, b) => a.start - b.start);
37
+ let chapterIdx = 0;
38
+ const rows = [];
39
+ const textParts = [];
40
+ for (const segment of segments) {
41
+ // Insert chapter headings
42
+ while (chapterIdx < sortedChapters.length && sortedChapters[chapterIdx].start <= segment.start) {
43
+ const title = sortedChapters[chapterIdx].title;
44
+ rows.push({ timestamp: fmtTime(sortedChapters[chapterIdx].start), speaker: '', text: `[Chapter] ${title}` });
45
+ if (textParts.length > 0)
46
+ textParts.push('');
47
+ textParts.push(`### ${title}`);
48
+ textParts.push('');
49
+ chapterIdx++;
50
+ }
51
+ const timestamp = fmtTime(segment.start);
52
+ const speaker = segment.speaker !== undefined ? `Speaker ${segment.speaker + 1}` : '';
53
+ rows.push({ timestamp, speaker, text: segment.text });
54
+ if (segment.speakerChange && textParts.length > 0) {
55
+ textParts.push('');
56
+ }
57
+ textParts.push(`${timestamp} ${segment.text}`);
58
+ }
59
+ return { rows, plainText: textParts.join('\n') };
60
+ }
61
+ function fmtTime(sec) {
62
+ const h = Math.floor(sec / 3600);
63
+ const m = Math.floor((sec % 3600) / 60);
64
+ const s = Math.floor(sec % 60);
65
+ if (h > 0) {
66
+ return `${h}:${String(m).padStart(2, '0')}:${String(s).padStart(2, '0')}`;
67
+ }
68
+ return `${m}:${String(s).padStart(2, '0')}`;
69
+ }
70
+ // ── Sentence grouping ─────────────────────────────────────────────────────
71
+ // Max time span (seconds) for a single group when no sentence boundaries are found.
72
+ // Prevents unbounded merging for languages without punctuation (Chinese, etc.).
73
+ const MAX_GROUP_SPAN_SECONDS = 30;
74
+ function groupBySentence(segments) {
75
+ const groups = [];
76
+ let buffer = '';
77
+ let bufferStart = 0;
78
+ let lastStart = 0;
79
+ const flush = () => {
80
+ if (buffer.trim()) {
81
+ groups.push({ start: bufferStart, text: buffer.trim(), speakerChange: false });
82
+ buffer = '';
83
+ }
84
+ };
85
+ for (const seg of segments) {
86
+ // Large gap between segments — always flush
87
+ if (buffer && seg.start - lastStart > TRANSCRIPT_GROUP_GAP_SECONDS) {
88
+ flush();
89
+ }
90
+ // Time-based flush: prevent unbounded groups for unpunctuated languages
91
+ if (buffer && seg.start - bufferStart > MAX_GROUP_SPAN_SECONDS) {
92
+ flush();
93
+ }
94
+ if (!buffer)
95
+ bufferStart = seg.start;
96
+ buffer += (buffer ? ' ' : '') + seg.text;
97
+ lastStart = seg.start;
98
+ if (SENTENCE_END.test(seg.text))
99
+ flush();
100
+ }
101
+ flush();
102
+ return groups;
103
+ }
104
+ // ── Speaker grouping ──────────────────────────────────────────────────────
105
+ function groupBySpeaker(segments) {
106
+ const turns = [];
107
+ let currentTurn = null;
108
+ let speakerIndex = -1;
109
+ let prevSegText = '';
110
+ for (const seg of segments) {
111
+ const isSpeakerChange = /^>>/.test(seg.text);
112
+ const cleanText = seg.text.replace(/^>>\s*/, '').replace(/^-\s+/, '');
113
+ const prevEndsWithComma = /,\s*$/.test(prevSegText);
114
+ const prevEndedSentence = (SENTENCE_END.test(prevSegText) || !prevSegText) && !prevEndsWithComma;
115
+ const isRealSpeakerChange = isSpeakerChange && prevEndedSentence;
116
+ if (isRealSpeakerChange) {
117
+ if (currentTurn)
118
+ turns.push(currentTurn);
119
+ speakerIndex = (speakerIndex + 1) % 2;
120
+ currentTurn = {
121
+ start: seg.start,
122
+ segments: [{ start: seg.start, text: cleanText }],
123
+ speakerChange: true,
124
+ speaker: speakerIndex,
125
+ };
126
+ }
127
+ else {
128
+ if (!currentTurn) {
129
+ currentTurn = { start: seg.start, segments: [], speakerChange: false };
130
+ }
131
+ currentTurn.segments.push({ start: seg.start, text: cleanText });
132
+ }
133
+ prevSegText = cleanText;
134
+ }
135
+ if (currentTurn)
136
+ turns.push(currentTurn);
137
+ splitAffirmativeTurns(turns);
138
+ const groups = [];
139
+ for (const turn of turns) {
140
+ const sentenceGroups = turn.speaker === undefined
141
+ ? groupBySentence(turn.segments)
142
+ : mergeSentenceGroupsWithinTurn(groupBySentence(turn.segments));
143
+ for (let i = 0; i < sentenceGroups.length; i++) {
144
+ groups.push({
145
+ ...sentenceGroups[i],
146
+ speakerChange: i === 0 && turn.speakerChange,
147
+ speaker: turn.speaker,
148
+ });
149
+ }
150
+ }
151
+ return groups;
152
+ }
153
+ function splitAffirmativeTurns(turns) {
154
+ const affirmativePattern = /^(mhm|yeah|yes|yep|right|okay|ok|absolutely|sure|exactly|uh-huh|mm-hmm)[.!,]?\s+/i;
155
+ for (let i = 0; i < turns.length; i++) {
156
+ const turn = turns[i];
157
+ if (turn.speaker === undefined || turn.segments.length === 0)
158
+ continue;
159
+ const firstSeg = turn.segments[0];
160
+ const match = affirmativePattern.exec(firstSeg.text);
161
+ if (!match)
162
+ continue;
163
+ if (/,\s*$/.test(match[0]))
164
+ continue;
165
+ const remainder = firstSeg.text.slice(match[0].length).trim();
166
+ const restSegments = turn.segments.slice(1);
167
+ const restWords = countWords(remainder) + restSegments.reduce((sum, s) => sum + countWords(s.text), 0);
168
+ if (restWords < 30)
169
+ continue;
170
+ const affirmativeText = match[0].trimEnd();
171
+ const newRestSegments = remainder
172
+ ? [{ start: firstSeg.start, text: remainder }, ...restSegments]
173
+ : restSegments;
174
+ turns.splice(i, 1, {
175
+ start: turn.start,
176
+ segments: [{ start: firstSeg.start, text: affirmativeText }],
177
+ speakerChange: turn.speakerChange,
178
+ speaker: turn.speaker,
179
+ }, {
180
+ start: newRestSegments[0].start,
181
+ segments: newRestSegments,
182
+ speakerChange: true,
183
+ speaker: turn.speaker === 0 ? 1 : 0,
184
+ });
185
+ i++;
186
+ }
187
+ }
188
+ function mergeSentenceGroupsWithinTurn(groups) {
189
+ if (groups.length <= 1)
190
+ return groups;
191
+ const merged = [];
192
+ let current = { ...groups[0] };
193
+ let currentIsFirstInTurn = true;
194
+ for (let i = 1; i < groups.length; i++) {
195
+ const next = groups[i];
196
+ if (shouldMergeSentenceGroups(current, next, currentIsFirstInTurn)) {
197
+ current.text = `${current.text} ${next.text}`;
198
+ continue;
199
+ }
200
+ merged.push(current);
201
+ current = { ...next };
202
+ currentIsFirstInTurn = false;
203
+ }
204
+ merged.push(current);
205
+ return merged;
206
+ }
207
+ function shouldMergeSentenceGroups(current, next, currentIsFirstInTurn) {
208
+ const currentWords = countWords(current.text);
209
+ const nextWords = countWords(next.text);
210
+ if (isShortStandaloneUtterance(current.text, currentWords)
211
+ || isShortStandaloneUtterance(next.text, nextWords))
212
+ return false;
213
+ if (currentIsFirstInTurn && currentWords < FIRST_GROUP_MERGE_MIN_WORDS)
214
+ return false;
215
+ if (QUESTION_END.test(current.text) || QUESTION_END.test(next.text))
216
+ return false;
217
+ if (currentWords + nextWords > TURN_MERGE_MAX_WORDS)
218
+ return false;
219
+ if (next.start - current.start > TURN_MERGE_MAX_SPAN_SECONDS)
220
+ return false;
221
+ return true;
222
+ }
223
+ function isShortStandaloneUtterance(text, words) {
224
+ const w = words ?? countWords(text);
225
+ return w > 0 && w <= SHORT_UTTERANCE_MAX_WORDS && SENTENCE_END.test(text);
226
+ }
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,99 @@
1
+ import { describe, it, expect } from 'vitest';
2
+ import { groupTranscriptSegments, formatGroupedTranscript } from './transcript-group.js';
3
+ describe('groupTranscriptSegments', () => {
4
+ it('groups segments by sentence boundaries', () => {
5
+ const segments = [
6
+ { start: 0, text: 'Hello there.' },
7
+ { start: 2, text: 'How are you doing today?' },
8
+ { start: 5, text: 'I am' },
9
+ { start: 6, text: 'doing well.' },
10
+ ];
11
+ const result = groupTranscriptSegments(segments);
12
+ expect(result).toHaveLength(3);
13
+ expect(result[0].text).toBe('Hello there.');
14
+ expect(result[1].text).toBe('How are you doing today?');
15
+ expect(result[2].text).toBe('I am doing well.');
16
+ });
17
+ it('flushes on large time gaps', () => {
18
+ const segments = [
19
+ { start: 0, text: 'First part' },
20
+ { start: 2, text: 'still first' },
21
+ { start: 25, text: 'second part after gap' },
22
+ ];
23
+ const result = groupTranscriptSegments(segments);
24
+ expect(result).toHaveLength(2);
25
+ expect(result[0].text).toBe('First part still first');
26
+ expect(result[1].text).toBe('second part after gap');
27
+ });
28
+ it('respects 30s max group span for unpunctuated text', () => {
29
+ // Simulate CJK captions without punctuation
30
+ const segments = Array.from({ length: 20 }, (_, i) => ({
31
+ start: i * 2,
32
+ text: `segment${i}`,
33
+ }));
34
+ const result = groupTranscriptSegments(segments);
35
+ // 20 segments * 2s = 40s total, should be split into at least 2 groups
36
+ expect(result.length).toBeGreaterThanOrEqual(2);
37
+ // No single group should span more than ~30s
38
+ for (const g of result) {
39
+ const words = g.text.split(' ');
40
+ // With 2s per segment and 30s max, each group should have at most ~16 segments
41
+ expect(words.length).toBeLessThanOrEqual(16);
42
+ }
43
+ });
44
+ it('detects speaker changes via >> markers', () => {
45
+ const segments = [
46
+ { start: 0, text: '>> How are you?' },
47
+ { start: 3, text: '>> I am fine.' },
48
+ ];
49
+ const result = groupTranscriptSegments(segments);
50
+ expect(result.some(g => g.speakerChange)).toBe(true);
51
+ expect(result.some(g => g.speaker !== undefined)).toBe(true);
52
+ });
53
+ it('recognizes CJK sentence-ending punctuation', () => {
54
+ const segments = [
55
+ { start: 0, text: '你好世界。' },
56
+ { start: 2, text: '这是测试' },
57
+ { start: 4, text: '内容。' },
58
+ ];
59
+ const result = groupTranscriptSegments(segments);
60
+ expect(result).toHaveLength(2);
61
+ expect(result[0].text).toBe('你好世界。');
62
+ expect(result[1].text).toBe('这是测试 内容。');
63
+ });
64
+ it('returns empty array for empty input', () => {
65
+ expect(groupTranscriptSegments([])).toEqual([]);
66
+ });
67
+ });
68
+ describe('formatGroupedTranscript', () => {
69
+ it('formats timestamps correctly', () => {
70
+ const segments = [
71
+ { start: 65, text: 'One minute five.', speakerChange: false },
72
+ { start: 3661, text: 'One hour one minute.', speakerChange: false },
73
+ ];
74
+ const { rows } = formatGroupedTranscript(segments);
75
+ expect(rows[0].timestamp).toBe('1:05');
76
+ expect(rows[1].timestamp).toBe('1:01:01');
77
+ });
78
+ it('inserts chapter headings at correct positions', () => {
79
+ const segments = [
80
+ { start: 0, text: 'Intro text.', speakerChange: false },
81
+ { start: 60, text: 'Chapter content.', speakerChange: false },
82
+ ];
83
+ const chapters = [{ title: 'Introduction', start: 0 }, { title: 'Main', start: 50 }];
84
+ const { rows } = formatGroupedTranscript(segments, chapters);
85
+ expect(rows[0].text).toBe('[Chapter] Introduction');
86
+ expect(rows[1].text).toBe('Intro text.');
87
+ expect(rows[2].text).toBe('[Chapter] Main');
88
+ expect(rows[3].text).toBe('Chapter content.');
89
+ });
90
+ it('labels speakers', () => {
91
+ const segments = [
92
+ { start: 0, text: 'Hello.', speakerChange: true, speaker: 0 },
93
+ { start: 5, text: 'Hi there.', speakerChange: true, speaker: 1 },
94
+ ];
95
+ const { rows } = formatGroupedTranscript(segments);
96
+ expect(rows[0].speaker).toBe('Speaker 1');
97
+ expect(rows[1].speaker).toBe('Speaker 2');
98
+ });
99
+ });
@@ -0,0 +1 @@
1
+ export {};