@jackwener/opencli 0.7.2 → 0.7.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,226 @@
1
+ /**
2
+ * Transcript grouping: sentence merging, speaker detection, and chapter support.
3
+ * Ported and simplified from Defuddle's YouTube extractor.
4
+ *
5
+ * Raw segments (2-3 second fragments) are grouped into readable paragraphs:
6
+ * - Sentence boundaries: merge until sentence-ending punctuation (.!?)
7
+ * - Speaker turns: detect ">>" markers from YouTube auto-captions
8
+ * - Chapters: optional chapter headings inserted at appropriate timestamps
9
+ */
10
+ // Include CJK sentence-ending punctuation: 。!? (fullwidth: .!?)
11
+ const SENTENCE_END = /[.!?\u3002\uFF01\uFF1F\uFF0E]["'\u2019\u201D)]*\s*$/;
12
+ const QUESTION_END = /[?\uFF1F]["'\u2019\u201D)]*\s*$/;
13
+ const TRANSCRIPT_GROUP_GAP_SECONDS = 20;
14
+ const TURN_MERGE_MAX_WORDS = 80;
15
+ const TURN_MERGE_MAX_SPAN_SECONDS = 45;
16
+ const SHORT_UTTERANCE_MAX_WORDS = 3;
17
+ const FIRST_GROUP_MERGE_MIN_WORDS = 8;
18
+ function countWords(text) {
19
+ return text.split(/\s+/).filter(Boolean).length;
20
+ }
21
+ /**
22
+ * Group raw transcript segments into readable blocks.
23
+ * If speaker markers (>>) are present, groups by speaker turn.
24
+ * Otherwise, groups by sentence boundaries.
25
+ */
26
+ export function groupTranscriptSegments(segments) {
27
+ if (segments.length === 0)
28
+ return [];
29
+ const hasSpeakerMarkers = segments.some(s => /^>>/.test(s.text));
30
+ return hasSpeakerMarkers ? groupBySpeaker(segments) : groupBySentence(segments);
31
+ }
32
+ /**
33
+ * Format grouped segments + chapters into a final text output.
34
+ */
35
+ export function formatGroupedTranscript(segments, chapters = []) {
36
+ const sortedChapters = [...chapters].sort((a, b) => a.start - b.start);
37
+ let chapterIdx = 0;
38
+ const rows = [];
39
+ const textParts = [];
40
+ for (const segment of segments) {
41
+ // Insert chapter headings
42
+ while (chapterIdx < sortedChapters.length && sortedChapters[chapterIdx].start <= segment.start) {
43
+ const title = sortedChapters[chapterIdx].title;
44
+ rows.push({ timestamp: fmtTime(sortedChapters[chapterIdx].start), speaker: '', text: `[Chapter] ${title}` });
45
+ if (textParts.length > 0)
46
+ textParts.push('');
47
+ textParts.push(`### ${title}`);
48
+ textParts.push('');
49
+ chapterIdx++;
50
+ }
51
+ const timestamp = fmtTime(segment.start);
52
+ const speaker = segment.speaker !== undefined ? `Speaker ${segment.speaker + 1}` : '';
53
+ rows.push({ timestamp, speaker, text: segment.text });
54
+ if (segment.speakerChange && textParts.length > 0) {
55
+ textParts.push('');
56
+ }
57
+ textParts.push(`${timestamp} ${segment.text}`);
58
+ }
59
+ return { rows, plainText: textParts.join('\n') };
60
+ }
61
+ function fmtTime(sec) {
62
+ const h = Math.floor(sec / 3600);
63
+ const m = Math.floor((sec % 3600) / 60);
64
+ const s = Math.floor(sec % 60);
65
+ if (h > 0) {
66
+ return `${h}:${String(m).padStart(2, '0')}:${String(s).padStart(2, '0')}`;
67
+ }
68
+ return `${m}:${String(s).padStart(2, '0')}`;
69
+ }
70
+ // ── Sentence grouping ─────────────────────────────────────────────────────
71
+ // Max time span (seconds) for a single group when no sentence boundaries are found.
72
+ // Prevents unbounded merging for languages without punctuation (Chinese, etc.).
73
+ const MAX_GROUP_SPAN_SECONDS = 30;
74
+ function groupBySentence(segments) {
75
+ const groups = [];
76
+ let buffer = '';
77
+ let bufferStart = 0;
78
+ let lastStart = 0;
79
+ const flush = () => {
80
+ if (buffer.trim()) {
81
+ groups.push({ start: bufferStart, text: buffer.trim(), speakerChange: false });
82
+ buffer = '';
83
+ }
84
+ };
85
+ for (const seg of segments) {
86
+ // Large gap between segments — always flush
87
+ if (buffer && seg.start - lastStart > TRANSCRIPT_GROUP_GAP_SECONDS) {
88
+ flush();
89
+ }
90
+ // Time-based flush: prevent unbounded groups for unpunctuated languages
91
+ if (buffer && seg.start - bufferStart > MAX_GROUP_SPAN_SECONDS) {
92
+ flush();
93
+ }
94
+ if (!buffer)
95
+ bufferStart = seg.start;
96
+ buffer += (buffer ? ' ' : '') + seg.text;
97
+ lastStart = seg.start;
98
+ if (SENTENCE_END.test(seg.text))
99
+ flush();
100
+ }
101
+ flush();
102
+ return groups;
103
+ }
104
+ // ── Speaker grouping ──────────────────────────────────────────────────────
105
+ function groupBySpeaker(segments) {
106
+ const turns = [];
107
+ let currentTurn = null;
108
+ let speakerIndex = -1;
109
+ let prevSegText = '';
110
+ for (const seg of segments) {
111
+ const isSpeakerChange = /^>>/.test(seg.text);
112
+ const cleanText = seg.text.replace(/^>>\s*/, '').replace(/^-\s+/, '');
113
+ const prevEndsWithComma = /,\s*$/.test(prevSegText);
114
+ const prevEndedSentence = (SENTENCE_END.test(prevSegText) || !prevSegText) && !prevEndsWithComma;
115
+ const isRealSpeakerChange = isSpeakerChange && prevEndedSentence;
116
+ if (isRealSpeakerChange) {
117
+ if (currentTurn)
118
+ turns.push(currentTurn);
119
+ speakerIndex = (speakerIndex + 1) % 2;
120
+ currentTurn = {
121
+ start: seg.start,
122
+ segments: [{ start: seg.start, text: cleanText }],
123
+ speakerChange: true,
124
+ speaker: speakerIndex,
125
+ };
126
+ }
127
+ else {
128
+ if (!currentTurn) {
129
+ currentTurn = { start: seg.start, segments: [], speakerChange: false };
130
+ }
131
+ currentTurn.segments.push({ start: seg.start, text: cleanText });
132
+ }
133
+ prevSegText = cleanText;
134
+ }
135
+ if (currentTurn)
136
+ turns.push(currentTurn);
137
+ splitAffirmativeTurns(turns);
138
+ const groups = [];
139
+ for (const turn of turns) {
140
+ const sentenceGroups = turn.speaker === undefined
141
+ ? groupBySentence(turn.segments)
142
+ : mergeSentenceGroupsWithinTurn(groupBySentence(turn.segments));
143
+ for (let i = 0; i < sentenceGroups.length; i++) {
144
+ groups.push({
145
+ ...sentenceGroups[i],
146
+ speakerChange: i === 0 && turn.speakerChange,
147
+ speaker: turn.speaker,
148
+ });
149
+ }
150
+ }
151
+ return groups;
152
+ }
153
+ function splitAffirmativeTurns(turns) {
154
+ const affirmativePattern = /^(mhm|yeah|yes|yep|right|okay|ok|absolutely|sure|exactly|uh-huh|mm-hmm)[.!,]?\s+/i;
155
+ for (let i = 0; i < turns.length; i++) {
156
+ const turn = turns[i];
157
+ if (turn.speaker === undefined || turn.segments.length === 0)
158
+ continue;
159
+ const firstSeg = turn.segments[0];
160
+ const match = affirmativePattern.exec(firstSeg.text);
161
+ if (!match)
162
+ continue;
163
+ if (/,\s*$/.test(match[0]))
164
+ continue;
165
+ const remainder = firstSeg.text.slice(match[0].length).trim();
166
+ const restSegments = turn.segments.slice(1);
167
+ const restWords = countWords(remainder) + restSegments.reduce((sum, s) => sum + countWords(s.text), 0);
168
+ if (restWords < 30)
169
+ continue;
170
+ const affirmativeText = match[0].trimEnd();
171
+ const newRestSegments = remainder
172
+ ? [{ start: firstSeg.start, text: remainder }, ...restSegments]
173
+ : restSegments;
174
+ turns.splice(i, 1, {
175
+ start: turn.start,
176
+ segments: [{ start: firstSeg.start, text: affirmativeText }],
177
+ speakerChange: turn.speakerChange,
178
+ speaker: turn.speaker,
179
+ }, {
180
+ start: newRestSegments[0].start,
181
+ segments: newRestSegments,
182
+ speakerChange: true,
183
+ speaker: turn.speaker === 0 ? 1 : 0,
184
+ });
185
+ i++;
186
+ }
187
+ }
188
+ function mergeSentenceGroupsWithinTurn(groups) {
189
+ if (groups.length <= 1)
190
+ return groups;
191
+ const merged = [];
192
+ let current = { ...groups[0] };
193
+ let currentIsFirstInTurn = true;
194
+ for (let i = 1; i < groups.length; i++) {
195
+ const next = groups[i];
196
+ if (shouldMergeSentenceGroups(current, next, currentIsFirstInTurn)) {
197
+ current.text = `${current.text} ${next.text}`;
198
+ continue;
199
+ }
200
+ merged.push(current);
201
+ current = { ...next };
202
+ currentIsFirstInTurn = false;
203
+ }
204
+ merged.push(current);
205
+ return merged;
206
+ }
207
+ function shouldMergeSentenceGroups(current, next, currentIsFirstInTurn) {
208
+ const currentWords = countWords(current.text);
209
+ const nextWords = countWords(next.text);
210
+ if (isShortStandaloneUtterance(current.text, currentWords)
211
+ || isShortStandaloneUtterance(next.text, nextWords))
212
+ return false;
213
+ if (currentIsFirstInTurn && currentWords < FIRST_GROUP_MERGE_MIN_WORDS)
214
+ return false;
215
+ if (QUESTION_END.test(current.text) || QUESTION_END.test(next.text))
216
+ return false;
217
+ if (currentWords + nextWords > TURN_MERGE_MAX_WORDS)
218
+ return false;
219
+ if (next.start - current.start > TURN_MERGE_MAX_SPAN_SECONDS)
220
+ return false;
221
+ return true;
222
+ }
223
+ function isShortStandaloneUtterance(text, words) {
224
+ const w = words ?? countWords(text);
225
+ return w > 0 && w <= SHORT_UTTERANCE_MAX_WORDS && SENTENCE_END.test(text);
226
+ }
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,99 @@
1
+ import { describe, it, expect } from 'vitest';
2
+ import { groupTranscriptSegments, formatGroupedTranscript } from './transcript-group.js';
3
+ describe('groupTranscriptSegments', () => {
4
+ it('groups segments by sentence boundaries', () => {
5
+ const segments = [
6
+ { start: 0, text: 'Hello there.' },
7
+ { start: 2, text: 'How are you doing today?' },
8
+ { start: 5, text: 'I am' },
9
+ { start: 6, text: 'doing well.' },
10
+ ];
11
+ const result = groupTranscriptSegments(segments);
12
+ expect(result).toHaveLength(3);
13
+ expect(result[0].text).toBe('Hello there.');
14
+ expect(result[1].text).toBe('How are you doing today?');
15
+ expect(result[2].text).toBe('I am doing well.');
16
+ });
17
+ it('flushes on large time gaps', () => {
18
+ const segments = [
19
+ { start: 0, text: 'First part' },
20
+ { start: 2, text: 'still first' },
21
+ { start: 25, text: 'second part after gap' },
22
+ ];
23
+ const result = groupTranscriptSegments(segments);
24
+ expect(result).toHaveLength(2);
25
+ expect(result[0].text).toBe('First part still first');
26
+ expect(result[1].text).toBe('second part after gap');
27
+ });
28
+ it('respects 30s max group span for unpunctuated text', () => {
29
+ // Simulate CJK captions without punctuation
30
+ const segments = Array.from({ length: 20 }, (_, i) => ({
31
+ start: i * 2,
32
+ text: `segment${i}`,
33
+ }));
34
+ const result = groupTranscriptSegments(segments);
35
+ // 20 segments * 2s = 40s total, should be split into at least 2 groups
36
+ expect(result.length).toBeGreaterThanOrEqual(2);
37
+ // No single group should span more than ~30s
38
+ for (const g of result) {
39
+ const words = g.text.split(' ');
40
+ // With 2s per segment and 30s max, each group should have at most ~16 segments
41
+ expect(words.length).toBeLessThanOrEqual(16);
42
+ }
43
+ });
44
+ it('detects speaker changes via >> markers', () => {
45
+ const segments = [
46
+ { start: 0, text: '>> How are you?' },
47
+ { start: 3, text: '>> I am fine.' },
48
+ ];
49
+ const result = groupTranscriptSegments(segments);
50
+ expect(result.some(g => g.speakerChange)).toBe(true);
51
+ expect(result.some(g => g.speaker !== undefined)).toBe(true);
52
+ });
53
+ it('recognizes CJK sentence-ending punctuation', () => {
54
+ const segments = [
55
+ { start: 0, text: '你好世界。' },
56
+ { start: 2, text: '这是测试' },
57
+ { start: 4, text: '内容。' },
58
+ ];
59
+ const result = groupTranscriptSegments(segments);
60
+ expect(result).toHaveLength(2);
61
+ expect(result[0].text).toBe('你好世界。');
62
+ expect(result[1].text).toBe('这是测试 内容。');
63
+ });
64
+ it('returns empty array for empty input', () => {
65
+ expect(groupTranscriptSegments([])).toEqual([]);
66
+ });
67
+ });
68
+ describe('formatGroupedTranscript', () => {
69
+ it('formats timestamps correctly', () => {
70
+ const segments = [
71
+ { start: 65, text: 'One minute five.', speakerChange: false },
72
+ { start: 3661, text: 'One hour one minute.', speakerChange: false },
73
+ ];
74
+ const { rows } = formatGroupedTranscript(segments);
75
+ expect(rows[0].timestamp).toBe('1:05');
76
+ expect(rows[1].timestamp).toBe('1:01:01');
77
+ });
78
+ it('inserts chapter headings at correct positions', () => {
79
+ const segments = [
80
+ { start: 0, text: 'Intro text.', speakerChange: false },
81
+ { start: 60, text: 'Chapter content.', speakerChange: false },
82
+ ];
83
+ const chapters = [{ title: 'Introduction', start: 0 }, { title: 'Main', start: 50 }];
84
+ const { rows } = formatGroupedTranscript(segments, chapters);
85
+ expect(rows[0].text).toBe('[Chapter] Introduction');
86
+ expect(rows[1].text).toBe('Intro text.');
87
+ expect(rows[2].text).toBe('[Chapter] Main');
88
+ expect(rows[3].text).toBe('Chapter content.');
89
+ });
90
+ it('labels speakers', () => {
91
+ const segments = [
92
+ { start: 0, text: 'Hello.', speakerChange: true, speaker: 0 },
93
+ { start: 5, text: 'Hi there.', speakerChange: true, speaker: 1 },
94
+ ];
95
+ const { rows } = formatGroupedTranscript(segments);
96
+ expect(rows[0].speaker).toBe('Speaker 1');
97
+ expect(rows[1].speaker).toBe('Speaker 2');
98
+ });
99
+ });
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,264 @@
1
+ /**
2
+ * YouTube transcript — uses InnerTube player API with Android client context.
3
+ *
4
+ * The Web client's caption URLs require a PoToken (proof of origin) generated
5
+ * by BotGuard at runtime. The Android client returns caption URLs that work
6
+ * without PoToken — same approach used by youtube-transcript-api (Python).
7
+ *
8
+ * Modes:
9
+ * --mode grouped (default): sentences merged, speaker detection, chapters
10
+ * --mode raw: every caption segment as-is with precise timestamps
11
+ */
12
+ import { cli, Strategy } from '../../registry.js';
13
+ import { parseVideoId } from './utils.js';
14
+ import { groupTranscriptSegments, formatGroupedTranscript, } from './transcript-group.js';
15
+ cli({
16
+ site: 'youtube',
17
+ name: 'transcript',
18
+ description: 'Get YouTube video transcript/subtitles',
19
+ domain: 'www.youtube.com',
20
+ strategy: Strategy.COOKIE,
21
+ args: [
22
+ { name: 'url', required: true, help: 'YouTube video URL or video ID' },
23
+ { name: 'lang', required: false, help: 'Language code (e.g. en, zh-Hans). Omit to auto-select' },
24
+ { name: 'mode', required: false, default: 'grouped', help: 'Output mode: grouped (readable paragraphs) or raw (every segment)' },
25
+ ],
26
+ // columns intentionally omitted — raw and grouped modes return different schemas,
27
+ // so we let the renderer auto-detect columns from the data keys.
28
+ func: async (page, kwargs) => {
29
+ const videoId = parseVideoId(kwargs.url);
30
+ const videoUrl = `https://www.youtube.com/watch?v=${videoId}`;
31
+ await page.goto(videoUrl);
32
+ await page.wait(3);
33
+ const lang = kwargs.lang || '';
34
+ const mode = kwargs.mode || 'grouped';
35
+ // Step 1: Get caption track URL via Android InnerTube API
36
+ const captionData = await page.evaluate(`
37
+ (async () => {
38
+ const cfg = window.ytcfg?.data_ || {};
39
+ const apiKey = cfg.INNERTUBE_API_KEY;
40
+ if (!apiKey) return { error: 'INNERTUBE_API_KEY not found on page' };
41
+
42
+ const resp = await fetch('/youtubei/v1/player?key=' + apiKey + '&prettyPrint=false', {
43
+ method: 'POST',
44
+ credentials: 'include',
45
+ headers: { 'Content-Type': 'application/json' },
46
+ body: JSON.stringify({
47
+ context: { client: { clientName: 'ANDROID', clientVersion: '20.10.38' } },
48
+ videoId: ${JSON.stringify(videoId)}
49
+ })
50
+ });
51
+
52
+ if (!resp.ok) return { error: 'InnerTube player API returned HTTP ' + resp.status };
53
+ const data = await resp.json();
54
+
55
+ const renderer = data.captions?.playerCaptionsTracklistRenderer;
56
+ if (!renderer?.captionTracks?.length) {
57
+ return { error: 'No captions available for this video' };
58
+ }
59
+
60
+ const tracks = renderer.captionTracks;
61
+ const available = tracks.map(t => t.languageCode + (t.kind === 'asr' ? ' (auto)' : ''));
62
+
63
+ const langPref = ${JSON.stringify(lang)};
64
+ let track = null;
65
+ if (langPref) {
66
+ track = tracks.find(t => t.languageCode === langPref)
67
+ || tracks.find(t => t.languageCode.startsWith(langPref));
68
+ }
69
+ if (!track) {
70
+ track = tracks.find(t => t.kind !== 'asr') || tracks[0];
71
+ }
72
+
73
+ return {
74
+ captionUrl: track.baseUrl,
75
+ language: track.languageCode,
76
+ kind: track.kind || 'manual',
77
+ available,
78
+ requestedLang: langPref || null,
79
+ langMatched: !!(langPref && track.languageCode === langPref),
80
+ langPrefixMatched: !!(langPref && track.languageCode !== langPref && track.languageCode.startsWith(langPref))
81
+ };
82
+ })()
83
+ `);
84
+ if (!captionData || typeof captionData === 'string') {
85
+ throw new Error(`Failed to get caption info: ${typeof captionData === 'string' ? captionData : 'null response'}`);
86
+ }
87
+ if (captionData.error) {
88
+ throw new Error(`${captionData.error}${captionData.available ? ' (available: ' + captionData.available.join(', ') + ')' : ''}`);
89
+ }
90
+ // Warn if --lang was specified but not matched
91
+ if (captionData.requestedLang && !captionData.langMatched && !captionData.langPrefixMatched) {
92
+ console.error(`Warning: --lang "${captionData.requestedLang}" not found. Using "${captionData.language}" instead. Available: ${captionData.available.join(', ')}`);
93
+ }
94
+ // Step 2: Fetch caption XML and parse segments
95
+ const segments = await page.evaluate(`
96
+ (async () => {
97
+ const resp = await fetch(${JSON.stringify(captionData.captionUrl)});
98
+ const xml = await resp.text();
99
+
100
+ if (!xml?.length) {
101
+ return { error: 'Caption URL returned empty response' };
102
+ }
103
+
104
+ function getAttr(tag, name) {
105
+ const needle = name + '="';
106
+ const idx = tag.indexOf(needle);
107
+ if (idx === -1) return '';
108
+ const valStart = idx + needle.length;
109
+ const valEnd = tag.indexOf('"', valStart);
110
+ if (valEnd === -1) return '';
111
+ return tag.substring(valStart, valEnd);
112
+ }
113
+
114
+ function decodeEntities(s) {
115
+ return s
116
+ .replaceAll('&amp;', '&')
117
+ .replaceAll('&lt;', '<')
118
+ .replaceAll('&gt;', '>')
119
+ .replaceAll('&quot;', '"')
120
+ .replaceAll('&#39;', "'");
121
+ }
122
+
123
+ const isFormat3 = xml.includes('<p t="');
124
+ const marker = isFormat3 ? '<p ' : '<text ';
125
+ const endMarker = isFormat3 ? '</p>' : '</text>';
126
+ const results = [];
127
+ let pos = 0;
128
+
129
+ while (true) {
130
+ const tagStart = xml.indexOf(marker, pos);
131
+ if (tagStart === -1) break;
132
+ let contentStart = xml.indexOf('>', tagStart);
133
+ if (contentStart === -1) break;
134
+ contentStart += 1;
135
+ const tagEnd = xml.indexOf(endMarker, contentStart);
136
+ if (tagEnd === -1) break;
137
+
138
+ const attrStr = xml.substring(tagStart + marker.length, contentStart - 1);
139
+ const content = xml.substring(contentStart, tagEnd);
140
+
141
+ let startSec, durSec;
142
+ if (isFormat3) {
143
+ startSec = (parseFloat(getAttr(attrStr, 't')) || 0) / 1000;
144
+ durSec = (parseFloat(getAttr(attrStr, 'd')) || 0) / 1000;
145
+ } else {
146
+ startSec = parseFloat(getAttr(attrStr, 'start')) || 0;
147
+ durSec = parseFloat(getAttr(attrStr, 'dur')) || 0;
148
+ }
149
+
150
+ // Strip inner tags (e.g. <s> in srv3 format) and decode entities
151
+ const text = decodeEntities(content.replace(/<[^>]+>/g, '')).split('\\\\n').join(' ').trim();
152
+ if (text) {
153
+ results.push({ start: startSec, end: startSec + durSec, text });
154
+ }
155
+
156
+ pos = tagEnd + endMarker.length;
157
+ }
158
+
159
+ if (results.length === 0) {
160
+ return { error: 'Parsed 0 segments from caption XML' };
161
+ }
162
+
163
+ return results;
164
+ })()
165
+ `);
166
+ if (!Array.isArray(segments)) {
167
+ throw new Error(segments?.error || 'Failed to parse caption segments');
168
+ }
169
+ if (segments.length === 0) {
170
+ throw new Error('No caption segments found');
171
+ }
172
+ // Step 3: Fetch chapters (for grouped mode)
173
+ let chapters = [];
174
+ if (mode === 'grouped') {
175
+ try {
176
+ const chapterData = await page.evaluate(`
177
+ (async () => {
178
+ const cfg = window.ytcfg?.data_ || {};
179
+ const apiKey = cfg.INNERTUBE_API_KEY;
180
+ if (!apiKey) return [];
181
+
182
+ const resp = await fetch('/youtubei/v1/next?key=' + apiKey + '&prettyPrint=false', {
183
+ method: 'POST',
184
+ credentials: 'include',
185
+ headers: { 'Content-Type': 'application/json' },
186
+ body: JSON.stringify({
187
+ context: { client: { clientName: 'WEB', clientVersion: '2.20240101.00.00' } },
188
+ videoId: ${JSON.stringify(videoId)}
189
+ })
190
+ });
191
+ if (!resp.ok) return [];
192
+ const data = await resp.json();
193
+
194
+ const chapters = [];
195
+
196
+ // Try chapterRenderer from player bar
197
+ const panels = data.playerOverlays?.playerOverlayRenderer
198
+ ?.decoratedPlayerBarRenderer?.decoratedPlayerBarRenderer
199
+ ?.playerBar?.multiMarkersPlayerBarRenderer?.markersMap;
200
+
201
+ if (Array.isArray(panels)) {
202
+ for (const panel of panels) {
203
+ const markers = panel.value?.chapters;
204
+ if (!Array.isArray(markers)) continue;
205
+ for (const marker of markers) {
206
+ const ch = marker.chapterRenderer;
207
+ if (!ch) continue;
208
+ const title = ch.title?.simpleText || '';
209
+ const startMs = ch.timeRangeStartMillis;
210
+ if (title && typeof startMs === 'number') {
211
+ chapters.push({ title, start: startMs / 1000 });
212
+ }
213
+ }
214
+ }
215
+ }
216
+ if (chapters.length > 0) return chapters;
217
+
218
+ // Fallback: macroMarkersListItemRenderer from engagement panels
219
+ const engPanels = data.engagementPanels;
220
+ if (!Array.isArray(engPanels)) return [];
221
+ for (const ep of engPanels) {
222
+ const content = ep.engagementPanelSectionListRenderer?.content;
223
+ const items = content?.macroMarkersListRenderer?.contents;
224
+ if (!Array.isArray(items)) continue;
225
+ for (const item of items) {
226
+ const renderer = item.macroMarkersListItemRenderer;
227
+ if (!renderer) continue;
228
+ const t = renderer.title?.simpleText || '';
229
+ const ts = renderer.timeDescription?.simpleText || '';
230
+ if (!t || !ts) continue;
231
+ const parts = ts.split(':').map(Number);
232
+ let secs = null;
233
+ if (parts.length === 3 && parts.every(n => !isNaN(n))) secs = parts[0]*3600 + parts[1]*60 + parts[2];
234
+ else if (parts.length === 2 && parts.every(n => !isNaN(n))) secs = parts[0]*60 + parts[1];
235
+ if (secs !== null) chapters.push({ title: t, start: secs });
236
+ }
237
+ }
238
+ return chapters;
239
+ })()
240
+ `);
241
+ if (Array.isArray(chapterData)) {
242
+ chapters = chapterData;
243
+ }
244
+ }
245
+ catch {
246
+ // Chapters are optional — proceed without them
247
+ }
248
+ }
249
+ // Step 4: Format output based on mode
250
+ if (mode === 'raw') {
251
+ // Precise timestamps in seconds with decimals, matching bilibili/subtitle format
252
+ return segments.map((seg, i) => ({
253
+ index: i + 1,
254
+ start: Number(seg.start).toFixed(2) + 's',
255
+ end: Number(seg.end).toFixed(2) + 's',
256
+ text: seg.text,
257
+ }));
258
+ }
259
+ // Grouped mode: merge sentences, detect speakers, insert chapters
260
+ const grouped = groupTranscriptSegments(segments.map(s => ({ start: s.start, text: s.text })));
261
+ const { rows } = formatGroupedTranscript(grouped, chapters);
262
+ return rows;
263
+ },
264
+ });
@@ -0,0 +1,8 @@
1
+ /**
2
+ * Shared YouTube utilities — URL parsing, video ID extraction, etc.
3
+ */
4
+ /**
5
+ * Extract a YouTube video ID from a URL or bare video ID string.
6
+ * Supports: watch?v=, youtu.be/, /shorts/, /embed/, /live/, /v/
7
+ */
8
+ export declare function parseVideoId(input: string): string;
@@ -0,0 +1,28 @@
1
+ /**
2
+ * Shared YouTube utilities — URL parsing, video ID extraction, etc.
3
+ */
4
+ /**
5
+ * Extract a YouTube video ID from a URL or bare video ID string.
6
+ * Supports: watch?v=, youtu.be/, /shorts/, /embed/, /live/, /v/
7
+ */
8
+ export function parseVideoId(input) {
9
+ if (!input.startsWith('http'))
10
+ return input;
11
+ try {
12
+ const parsed = new URL(input);
13
+ if (parsed.searchParams.has('v')) {
14
+ return parsed.searchParams.get('v');
15
+ }
16
+ if (parsed.hostname === 'youtu.be') {
17
+ return parsed.pathname.slice(1).split('/')[0];
18
+ }
19
+ // Handle /shorts/xxx, /embed/xxx, /live/xxx, /v/xxx
20
+ const pathMatch = parsed.pathname.match(/^\/(shorts|embed|live|v)\/([^/?]+)/);
21
+ if (pathMatch)
22
+ return pathMatch[2];
23
+ }
24
+ catch {
25
+ // Not a valid URL — treat entire input as video ID
26
+ }
27
+ return input;
28
+ }
@@ -0,0 +1 @@
1
+ export {};