specky-sdd 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +22 -0
- package/README.md +446 -0
- package/dist/constants.d.ts +68 -0
- package/dist/constants.d.ts.map +1 -0
- package/dist/constants.js +120 -0
- package/dist/constants.js.map +1 -0
- package/dist/index.d.ts +10 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +95 -0
- package/dist/index.js.map +1 -0
- package/dist/schemas/common.d.ts +8 -0
- package/dist/schemas/common.d.ts.map +1 -0
- package/dist/schemas/common.js +18 -0
- package/dist/schemas/common.js.map +1 -0
- package/dist/schemas/pipeline.d.ts +296 -0
- package/dist/schemas/pipeline.d.ts.map +1 -0
- package/dist/schemas/pipeline.js +132 -0
- package/dist/schemas/pipeline.js.map +1 -0
- package/dist/schemas/transcript.d.ts +59 -0
- package/dist/schemas/transcript.d.ts.map +1 -0
- package/dist/schemas/transcript.js +61 -0
- package/dist/schemas/transcript.js.map +1 -0
- package/dist/schemas/utility.d.ts +92 -0
- package/dist/schemas/utility.d.ts.map +1 -0
- package/dist/schemas/utility.js +82 -0
- package/dist/schemas/utility.js.map +1 -0
- package/dist/services/codebase-scanner.d.ts +24 -0
- package/dist/services/codebase-scanner.d.ts.map +1 -0
- package/dist/services/codebase-scanner.js +185 -0
- package/dist/services/codebase-scanner.js.map +1 -0
- package/dist/services/ears-validator.d.ts +29 -0
- package/dist/services/ears-validator.d.ts.map +1 -0
- package/dist/services/ears-validator.js +163 -0
- package/dist/services/ears-validator.js.map +1 -0
- package/dist/services/file-manager.d.ts +56 -0
- package/dist/services/file-manager.d.ts.map +1 -0
- package/dist/services/file-manager.js +203 -0
- package/dist/services/file-manager.js.map +1 -0
- package/dist/services/state-machine.d.ts +46 -0
- package/dist/services/state-machine.d.ts.map +1 -0
- package/dist/services/state-machine.js +167 -0
- package/dist/services/state-machine.js.map +1 -0
- package/dist/services/template-engine.d.ts +37 -0
- package/dist/services/template-engine.d.ts.map +1 -0
- package/dist/services/template-engine.js +111 -0
- package/dist/services/template-engine.js.map +1 -0
- package/dist/services/transcript-parser.d.ts +61 -0
- package/dist/services/transcript-parser.d.ts.map +1 -0
- package/dist/services/transcript-parser.js +810 -0
- package/dist/services/transcript-parser.js.map +1 -0
- package/dist/tools/analysis.d.ts +10 -0
- package/dist/tools/analysis.d.ts.map +1 -0
- package/dist/tools/analysis.js +95 -0
- package/dist/tools/analysis.js.map +1 -0
- package/dist/tools/pipeline.d.ts +11 -0
- package/dist/tools/pipeline.d.ts.map +1 -0
- package/dist/tools/pipeline.js +583 -0
- package/dist/tools/pipeline.js.map +1 -0
- package/dist/tools/transcript.d.ts +14 -0
- package/dist/tools/transcript.d.ts.map +1 -0
- package/dist/tools/transcript.js +813 -0
- package/dist/tools/transcript.js.map +1 -0
- package/dist/tools/utility.d.ts +10 -0
- package/dist/tools/utility.d.ts.map +1 -0
- package/dist/tools/utility.js +239 -0
- package/dist/tools/utility.js.map +1 -0
- package/dist/types.d.ts +161 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +6 -0
- package/dist/types.js.map +1 -0
- package/package.json +53 -0
- package/templates/analysis.md +54 -0
- package/templates/bugfix.md +45 -0
- package/templates/constitution.md +56 -0
- package/templates/design.md +47 -0
- package/templates/specification.md +49 -0
- package/templates/sync-report.md +43 -0
- package/templates/tasks.md +38 -0
|
@@ -0,0 +1,810 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* TranscriptParser — Parses VTT, SRT, TXT, and MD meeting transcripts.
|
|
3
|
+
* Extracts speakers, topics, decisions, action items, and raw requirements.
|
|
4
|
+
*/
|
|
5
|
+
export class TranscriptParser {
|
|
6
|
+
fileManager;
|
|
7
|
+
constructor(fileManager) {
|
|
8
|
+
this.fileManager = fileManager;
|
|
9
|
+
}
|
|
10
|
+
/**
|
|
11
|
+
* Parse a transcript file and extract structured data.
|
|
12
|
+
*/
|
|
13
|
+
async parseFile(filePath) {
|
|
14
|
+
const content = await this.fileManager.readProjectFile(filePath);
|
|
15
|
+
const format = this.detectFormat(filePath, content);
|
|
16
|
+
return this.parse(content, format, filePath);
|
|
17
|
+
}
|
|
18
|
+
/**
|
|
19
|
+
* Parse raw transcript text.
|
|
20
|
+
*/
|
|
21
|
+
parse(content, format = "txt", source = "inline") {
|
|
22
|
+
let segments;
|
|
23
|
+
switch (format) {
|
|
24
|
+
case "vtt":
|
|
25
|
+
segments = this.parseVTT(content);
|
|
26
|
+
break;
|
|
27
|
+
case "srt":
|
|
28
|
+
segments = this.parseSRT(content);
|
|
29
|
+
break;
|
|
30
|
+
case "md":
|
|
31
|
+
segments = this.parseMD(content);
|
|
32
|
+
break;
|
|
33
|
+
default:
|
|
34
|
+
segments = this.parsePlainText(content);
|
|
35
|
+
}
|
|
36
|
+
// Extract structured data from segments
|
|
37
|
+
const participants = this.extractParticipants(segments);
|
|
38
|
+
const fullText = segments.map((s) => s.text).join(" ");
|
|
39
|
+
const topics = this.extractTopics(segments);
|
|
40
|
+
const decisions = this.extractDecisions(segments);
|
|
41
|
+
const actionItems = this.extractActionItems(segments);
|
|
42
|
+
const requirementsRaw = this.extractRequirements(segments);
|
|
43
|
+
const constraints = this.extractConstraints(segments);
|
|
44
|
+
const openQuestions = this.extractQuestions(segments);
|
|
45
|
+
const duration = this.estimateDuration(segments);
|
|
46
|
+
// Derive title: from meta tag, or first topic, or first segment
|
|
47
|
+
const metaTitle = segments.find((s) => s.speaker === "__META_TITLE__");
|
|
48
|
+
const title = metaTitle
|
|
49
|
+
? metaTitle.text
|
|
50
|
+
: topics.length > 0
|
|
51
|
+
? topics[0].name
|
|
52
|
+
: segments.length > 0
|
|
53
|
+
? segments[0].text.slice(0, 80)
|
|
54
|
+
: "Meeting Transcript";
|
|
55
|
+
return {
|
|
56
|
+
title,
|
|
57
|
+
participants,
|
|
58
|
+
duration_estimate: duration,
|
|
59
|
+
segments,
|
|
60
|
+
topics,
|
|
61
|
+
decisions,
|
|
62
|
+
action_items: actionItems,
|
|
63
|
+
requirements_raw: requirementsRaw,
|
|
64
|
+
constraints_mentioned: constraints,
|
|
65
|
+
open_questions: openQuestions,
|
|
66
|
+
full_text: fullText,
|
|
67
|
+
};
|
|
68
|
+
}
|
|
69
|
+
/**
|
|
70
|
+
* Convert a TranscriptAnalysis into clean Markdown.
|
|
71
|
+
*/
|
|
72
|
+
toMarkdown(analysis) {
|
|
73
|
+
const lines = [
|
|
74
|
+
`# Meeting Transcript: ${analysis.title}`,
|
|
75
|
+
"",
|
|
76
|
+
`**Participants:** ${analysis.participants.join(", ")}`,
|
|
77
|
+
`**Duration:** ${analysis.duration_estimate}`,
|
|
78
|
+
"",
|
|
79
|
+
];
|
|
80
|
+
if (analysis.topics.length > 0) {
|
|
81
|
+
lines.push("## Topics Discussed", "");
|
|
82
|
+
for (const topic of analysis.topics) {
|
|
83
|
+
lines.push(`### ${topic.name}`, "");
|
|
84
|
+
lines.push(topic.summary, "");
|
|
85
|
+
if (topic.key_points.length > 0) {
|
|
86
|
+
lines.push("**Key Points:**");
|
|
87
|
+
for (const point of topic.key_points) {
|
|
88
|
+
lines.push(`- ${point}`);
|
|
89
|
+
}
|
|
90
|
+
lines.push("");
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
if (analysis.decisions.length > 0) {
|
|
95
|
+
lines.push("## Decisions", "");
|
|
96
|
+
for (const d of analysis.decisions) {
|
|
97
|
+
lines.push(`- ${d}`);
|
|
98
|
+
}
|
|
99
|
+
lines.push("");
|
|
100
|
+
}
|
|
101
|
+
if (analysis.action_items.length > 0) {
|
|
102
|
+
lines.push("## Action Items", "");
|
|
103
|
+
for (const a of analysis.action_items) {
|
|
104
|
+
lines.push(`- [ ] ${a}`);
|
|
105
|
+
}
|
|
106
|
+
lines.push("");
|
|
107
|
+
}
|
|
108
|
+
if (analysis.requirements_raw.length > 0) {
|
|
109
|
+
lines.push("## Requirements Identified", "");
|
|
110
|
+
for (const r of analysis.requirements_raw) {
|
|
111
|
+
lines.push(`- ${r}`);
|
|
112
|
+
}
|
|
113
|
+
lines.push("");
|
|
114
|
+
}
|
|
115
|
+
if (analysis.constraints_mentioned.length > 0) {
|
|
116
|
+
lines.push("## Constraints Mentioned", "");
|
|
117
|
+
for (const c of analysis.constraints_mentioned) {
|
|
118
|
+
lines.push(`- ${c}`);
|
|
119
|
+
}
|
|
120
|
+
lines.push("");
|
|
121
|
+
}
|
|
122
|
+
if (analysis.open_questions.length > 0) {
|
|
123
|
+
lines.push("## Open Questions", "");
|
|
124
|
+
for (const q of analysis.open_questions) {
|
|
125
|
+
lines.push(`- ${q}`);
|
|
126
|
+
}
|
|
127
|
+
lines.push("");
|
|
128
|
+
}
|
|
129
|
+
lines.push("## Full Transcript", "");
|
|
130
|
+
for (const seg of analysis.segments) {
|
|
131
|
+
if (seg.speaker) {
|
|
132
|
+
lines.push(`**${seg.speaker}:** ${seg.text}`, "");
|
|
133
|
+
}
|
|
134
|
+
else {
|
|
135
|
+
lines.push(`${seg.text}`, "");
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
return lines.join("\n");
|
|
139
|
+
}
|
|
140
|
+
// ─── Format Parsers ───
|
|
141
|
+
parseVTT(content) {
|
|
142
|
+
const segments = [];
|
|
143
|
+
// Remove WEBVTT header and NOTE blocks
|
|
144
|
+
const cleaned = content
|
|
145
|
+
.replace(/^WEBVTT.*$/m, "")
|
|
146
|
+
.replace(/^NOTE[\s\S]*?(?=\n\n)/gm, "")
|
|
147
|
+
.trim();
|
|
148
|
+
// VTT blocks: optional id, timestamp line, text lines
|
|
149
|
+
const blocks = cleaned.split(/\n\n+/).filter((b) => b.trim());
|
|
150
|
+
for (const block of blocks) {
|
|
151
|
+
const lines = block.trim().split("\n");
|
|
152
|
+
// Find timestamp line
|
|
153
|
+
const tsLineIdx = lines.findIndex((l) => /\d{2}:\d{2}[.:]\d{2}\.\d{3}\s*-->\s*\d{2}:\d{2}[.:]\d{2}\.\d{3}/.test(l));
|
|
154
|
+
if (tsLineIdx === -1)
|
|
155
|
+
continue;
|
|
156
|
+
const timestamp = lines[tsLineIdx].trim();
|
|
157
|
+
const textLines = lines.slice(tsLineIdx + 1);
|
|
158
|
+
const rawText = textLines.join(" ").trim();
|
|
159
|
+
// Extract speaker from <v SpeakerName> tag or "SpeakerName:" prefix
|
|
160
|
+
const vTagMatch = rawText.match(/^<v\s+([^>]+)>(.*?)(?:<\/v>)?$/s);
|
|
161
|
+
const colonMatch = rawText.match(/^([A-Z][a-zA-ZÀ-ú\s.]+?):\s*(.+)$/s);
|
|
162
|
+
let speaker = "";
|
|
163
|
+
let text = rawText;
|
|
164
|
+
if (vTagMatch) {
|
|
165
|
+
speaker = vTagMatch[1].trim();
|
|
166
|
+
text = vTagMatch[2].trim();
|
|
167
|
+
}
|
|
168
|
+
else if (colonMatch) {
|
|
169
|
+
speaker = colonMatch[1].trim();
|
|
170
|
+
text = colonMatch[2].trim();
|
|
171
|
+
}
|
|
172
|
+
// Clean HTML tags
|
|
173
|
+
text = text.replace(/<[^>]+>/g, "").trim();
|
|
174
|
+
if (text) {
|
|
175
|
+
segments.push({ speaker, text, timestamp });
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
return segments;
|
|
179
|
+
}
|
|
180
|
+
parseSRT(content) {
|
|
181
|
+
const segments = [];
|
|
182
|
+
const blocks = content.trim().split(/\n\n+/);
|
|
183
|
+
for (const block of blocks) {
|
|
184
|
+
const lines = block.trim().split("\n");
|
|
185
|
+
if (lines.length < 3)
|
|
186
|
+
continue;
|
|
187
|
+
// SRT: index, timestamp, text
|
|
188
|
+
const timestamp = lines[1].trim();
|
|
189
|
+
const rawText = lines.slice(2).join(" ").trim();
|
|
190
|
+
const colonMatch = rawText.match(/^([A-Z][a-zA-ZÀ-ú\s.]+?):\s*(.+)$/s);
|
|
191
|
+
let speaker = "";
|
|
192
|
+
let text = rawText;
|
|
193
|
+
if (colonMatch) {
|
|
194
|
+
speaker = colonMatch[1].trim();
|
|
195
|
+
text = colonMatch[2].trim();
|
|
196
|
+
}
|
|
197
|
+
text = text.replace(/<[^>]+>/g, "").trim();
|
|
198
|
+
if (text) {
|
|
199
|
+
segments.push({ speaker, text, timestamp });
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
return segments;
|
|
203
|
+
}
|
|
204
|
+
parseMD(content) {
|
|
205
|
+
const segments = [];
|
|
206
|
+
const lines = content.split("\n");
|
|
207
|
+
// Detect Copilot Studio / Power Automate agent format:
|
|
208
|
+
// Has YAML frontmatter with type: "meeting-transcription"
|
|
209
|
+
// OR has numbered sections like ## 1. Executive Summary, ## 4. Complete Transcription
|
|
210
|
+
// OR has ## Executive Summary + ## Action Items
|
|
211
|
+
const hasFrontmatter = /^---\s*\n[\s\S]*?\n---/m.test(content);
|
|
212
|
+
const hasNumberedSections = /^##\s+\d+\.\s+(Executive Summary|Complete Transcription|Action Items)/im.test(content);
|
|
213
|
+
const hasSimpleSections = /^##\s+(Executive Summary|Transcription|Action Items)/im.test(content);
|
|
214
|
+
const isPowerAutomateFormat = hasFrontmatter || hasNumberedSections || hasSimpleSections;
|
|
215
|
+
if (isPowerAutomateFormat) {
|
|
216
|
+
return this.parsePowerAutomateMD(content);
|
|
217
|
+
}
|
|
218
|
+
for (const line of lines) {
|
|
219
|
+
const trimmed = line.trim();
|
|
220
|
+
if (!trimmed || trimmed.startsWith("#") || trimmed.startsWith("---"))
|
|
221
|
+
continue;
|
|
222
|
+
// Markdown bold speaker: **Speaker:** text
|
|
223
|
+
const boldMatch = trimmed.match(/^\*\*([^*]+?):?\*\*:?\s*(.+)$/);
|
|
224
|
+
// Plain speaker: Speaker: text
|
|
225
|
+
const plainMatch = trimmed.match(/^([A-Z][a-zA-ZÀ-ú\s.]+?):\s*(.+)$/);
|
|
226
|
+
// Bullet points
|
|
227
|
+
const bulletMatch = trimmed.match(/^[-*]\s+(.+)$/);
|
|
228
|
+
if (boldMatch) {
|
|
229
|
+
segments.push({ speaker: boldMatch[1].trim(), text: boldMatch[2].trim() });
|
|
230
|
+
}
|
|
231
|
+
else if (plainMatch) {
|
|
232
|
+
segments.push({ speaker: plainMatch[1].trim(), text: plainMatch[2].trim() });
|
|
233
|
+
}
|
|
234
|
+
else if (bulletMatch) {
|
|
235
|
+
segments.push({ speaker: "", text: bulletMatch[1].trim() });
|
|
236
|
+
}
|
|
237
|
+
else {
|
|
238
|
+
segments.push({ speaker: "", text: trimmed });
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
return segments;
|
|
242
|
+
}
|
|
243
|
+
/**
|
|
244
|
+
* Parse Markdown generated by Copilot Studio / Power Automate transcription agent.
|
|
245
|
+
*
|
|
246
|
+
* Handles the full format with:
|
|
247
|
+
* - YAML frontmatter (title, date, author, version, language, tags, etc.)
|
|
248
|
+
* - Numbered sections: ## 1. Executive Summary, ## 2. Meeting Details,
|
|
249
|
+
* ## 3. Main Topics, ## 4. Complete Transcription, ## 5. Action Items
|
|
250
|
+
* - Change Log table
|
|
251
|
+
* - Meeting Details table
|
|
252
|
+
* - File naming: {MeetingTitle}_v{version}_{YYYY-MM-DD}.md
|
|
253
|
+
*
|
|
254
|
+
* Also handles simpler formats without frontmatter or numbering.
|
|
255
|
+
*/
|
|
256
|
+
parsePowerAutomateMD(content) {
|
|
257
|
+
const segments = [];
|
|
258
|
+
// ── Step 1: Extract YAML frontmatter ──
|
|
259
|
+
const frontmatterMatch = content.match(/^---\s*\n([\s\S]*?)\n---/m);
|
|
260
|
+
if (frontmatterMatch) {
|
|
261
|
+
const yaml = frontmatterMatch[1];
|
|
262
|
+
const yamlFields = this.parseSimpleYaml(yaml);
|
|
263
|
+
if (yamlFields["title"]) {
|
|
264
|
+
segments.push({ speaker: "__META_TITLE__", text: yamlFields["title"] });
|
|
265
|
+
}
|
|
266
|
+
if (yamlFields["date"]) {
|
|
267
|
+
segments.push({ speaker: "__META_DATE__", text: yamlFields["date"] });
|
|
268
|
+
}
|
|
269
|
+
if (yamlFields["language"]) {
|
|
270
|
+
segments.push({ speaker: "__META_LANGUAGE__", text: yamlFields["language"] });
|
|
271
|
+
}
|
|
272
|
+
if (yamlFields["author"]) {
|
|
273
|
+
segments.push({ speaker: "__META_AUTHOR__", text: yamlFields["author"] });
|
|
274
|
+
}
|
|
275
|
+
if (yamlFields["version"]) {
|
|
276
|
+
segments.push({ speaker: "__META_VERSION__", text: yamlFields["version"] });
|
|
277
|
+
}
|
|
278
|
+
if (yamlFields["description"]) {
|
|
279
|
+
segments.push({ speaker: "__META_DESCRIPTION__", text: yamlFields["description"] });
|
|
280
|
+
}
|
|
281
|
+
if (yamlFields["tags"]) {
|
|
282
|
+
segments.push({ speaker: "__META_TAGS__", text: yamlFields["tags"] });
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
// ── Step 2: Parse sections ──
|
|
286
|
+
// Remove frontmatter from content for section parsing
|
|
287
|
+
const body = frontmatterMatch
|
|
288
|
+
? content.slice(frontmatterMatch[0].length).trim()
|
|
289
|
+
: content.trim();
|
|
290
|
+
const lines = body.split("\n");
|
|
291
|
+
let currentSection = "pre";
|
|
292
|
+
let h1Title = "";
|
|
293
|
+
for (const line of lines) {
|
|
294
|
+
const trimmed = line.trim();
|
|
295
|
+
if (!trimmed)
|
|
296
|
+
continue;
|
|
297
|
+
if (trimmed === "---")
|
|
298
|
+
continue;
|
|
299
|
+
// H1 — meeting title
|
|
300
|
+
const h1Match = trimmed.match(/^#\s+(.+)$/);
|
|
301
|
+
if (h1Match) {
|
|
302
|
+
h1Title = h1Match[1].trim();
|
|
303
|
+
// If no title from frontmatter, use H1
|
|
304
|
+
if (!segments.some((s) => s.speaker === "__META_TITLE__")) {
|
|
305
|
+
segments.push({ speaker: "__META_TITLE__", text: h1Title });
|
|
306
|
+
}
|
|
307
|
+
continue;
|
|
308
|
+
}
|
|
309
|
+
// > blockquote — meeting purpose statement
|
|
310
|
+
if (trimmed.startsWith("> ") && currentSection === "pre") {
|
|
311
|
+
segments.push({ speaker: "__SUMMARY__", text: trimmed.slice(2).trim() });
|
|
312
|
+
continue;
|
|
313
|
+
}
|
|
314
|
+
// H2 sections — detect by name (with or without numbering)
|
|
315
|
+
const h2Match = trimmed.match(/^##\s+(?:\d+\.\s*)?(.+)$/);
|
|
316
|
+
if (h2Match) {
|
|
317
|
+
const sectionName = h2Match[1].toLowerCase().trim();
|
|
318
|
+
if (sectionName.includes("executive summary") || sectionName.includes("resumo")) {
|
|
319
|
+
currentSection = "summary";
|
|
320
|
+
}
|
|
321
|
+
else if (sectionName.includes("meeting details") || sectionName.includes("detalhes")) {
|
|
322
|
+
currentSection = "details";
|
|
323
|
+
}
|
|
324
|
+
else if (sectionName.includes("main topics") || sectionName.includes("tópicos") || sectionName.includes("topicos")) {
|
|
325
|
+
currentSection = "topics";
|
|
326
|
+
}
|
|
327
|
+
else if (sectionName.includes("complete transcription") || sectionName.includes("transcription") || sectionName.includes("transcript") || sectionName.includes("transcrição")) {
|
|
328
|
+
currentSection = "transcription";
|
|
329
|
+
}
|
|
330
|
+
else if (sectionName.includes("action item") || sectionName.includes("ações") || sectionName.includes("acoes")) {
|
|
331
|
+
currentSection = "actions";
|
|
332
|
+
}
|
|
333
|
+
else if (sectionName.includes("reference") || sectionName.includes("referência")) {
|
|
334
|
+
currentSection = "references";
|
|
335
|
+
}
|
|
336
|
+
else if (sectionName.includes("change log") || sectionName.includes("changelog")) {
|
|
337
|
+
currentSection = "changelog";
|
|
338
|
+
}
|
|
339
|
+
else if (sectionName.includes("table of contents") || sectionName.includes("índice")) {
|
|
340
|
+
currentSection = "toc";
|
|
341
|
+
}
|
|
342
|
+
else {
|
|
343
|
+
currentSection = sectionName;
|
|
344
|
+
}
|
|
345
|
+
continue;
|
|
346
|
+
}
|
|
347
|
+
// H3 subsections
|
|
348
|
+
if (trimmed.startsWith("### "))
|
|
349
|
+
continue;
|
|
350
|
+
// Skip table of contents and changelog
|
|
351
|
+
if (currentSection === "toc" || currentSection === "changelog")
|
|
352
|
+
continue;
|
|
353
|
+
// ── Section-specific parsing ──
|
|
354
|
+
// Executive Summary — bullet points and paragraphs
|
|
355
|
+
if (currentSection === "summary") {
|
|
356
|
+
const bulletMatch = trimmed.match(/^[-*]\s+(.+)$/);
|
|
357
|
+
if (bulletMatch) {
|
|
358
|
+
segments.push({ speaker: "__SUMMARY__", text: bulletMatch[1].trim() });
|
|
359
|
+
}
|
|
360
|
+
else if (trimmed.length > 5 && !trimmed.startsWith("|")) {
|
|
361
|
+
segments.push({ speaker: "__SUMMARY__", text: trimmed });
|
|
362
|
+
}
|
|
363
|
+
continue;
|
|
364
|
+
}
|
|
365
|
+
// Meeting Details — parse table rows for participants
|
|
366
|
+
if (currentSection === "details") {
|
|
367
|
+
// Table row: | Key | Value |
|
|
368
|
+
const tableRowMatch = trimmed.match(/^\|\s*\*?\*?([^|*]+)\*?\*?\s*\|\s*([^|]+)\s*\|/);
|
|
369
|
+
if (tableRowMatch) {
|
|
370
|
+
const key = tableRowMatch[1].toLowerCase().trim();
|
|
371
|
+
const value = tableRowMatch[2].trim();
|
|
372
|
+
if (key.includes("participant") || key.includes("attendee") || key.includes("participante")) {
|
|
373
|
+
segments.push({ speaker: "__META_PARTICIPANTS__", text: value });
|
|
374
|
+
}
|
|
375
|
+
else if (key.includes("date") || key.includes("data") || key.includes("fecha")) {
|
|
376
|
+
if (!segments.some((s) => s.speaker === "__META_DATE__")) {
|
|
377
|
+
segments.push({ speaker: "__META_DATE__", text: value });
|
|
378
|
+
}
|
|
379
|
+
}
|
|
380
|
+
else if (key.includes("organizer") || key.includes("organizador")) {
|
|
381
|
+
// Organizer is also a participant
|
|
382
|
+
const existing = segments.find((s) => s.speaker === "__META_PARTICIPANTS__");
|
|
383
|
+
if (existing) {
|
|
384
|
+
existing.text = `${value}, ${existing.text}`;
|
|
385
|
+
}
|
|
386
|
+
else {
|
|
387
|
+
segments.push({ speaker: "__META_PARTICIPANTS__", text: value });
|
|
388
|
+
}
|
|
389
|
+
}
|
|
390
|
+
}
|
|
391
|
+
// Also handle **Key:** Value format in details section
|
|
392
|
+
const boldMeta = trimmed.match(/^\*\*([^*]+?):?\*\*:?\s*(.+)$/);
|
|
393
|
+
if (boldMeta) {
|
|
394
|
+
const key = boldMeta[1].toLowerCase().trim();
|
|
395
|
+
const value = boldMeta[2].trim();
|
|
396
|
+
if (key.includes("participant") || key.includes("participante")) {
|
|
397
|
+
segments.push({ speaker: "__META_PARTICIPANTS__", text: value });
|
|
398
|
+
}
|
|
399
|
+
}
|
|
400
|
+
continue;
|
|
401
|
+
}
|
|
402
|
+
// Main Topics — numbered or bulleted list
|
|
403
|
+
if (currentSection === "topics") {
|
|
404
|
+
const bulletMatch = trimmed.match(/^[-*]\s+(.+)$/);
|
|
405
|
+
const numberedMatch = trimmed.match(/^\d+\.\s+(.+)$/);
|
|
406
|
+
if (bulletMatch) {
|
|
407
|
+
segments.push({ speaker: "__TOPIC__", text: bulletMatch[1].trim() });
|
|
408
|
+
}
|
|
409
|
+
else if (numberedMatch) {
|
|
410
|
+
segments.push({ speaker: "__TOPIC__", text: numberedMatch[1].trim() });
|
|
411
|
+
}
|
|
412
|
+
else if (trimmed.length > 5) {
|
|
413
|
+
segments.push({ speaker: "__TOPIC__", text: trimmed });
|
|
414
|
+
}
|
|
415
|
+
continue;
|
|
416
|
+
}
|
|
417
|
+
// Complete Transcription — speaker-attributed text
|
|
418
|
+
if (currentSection === "transcription") {
|
|
419
|
+
// **Speaker Name:** text (bold speaker — colon may be inside or outside **)
|
|
420
|
+
const boldSpeaker = trimmed.match(/^\*\*([^*]+?):?\*\*:?\s*(.+)$/);
|
|
421
|
+
// Speaker Name: text (plain)
|
|
422
|
+
const plainSpeaker = trimmed.match(/^([A-Z][a-zA-ZÀ-ú\s.]+?):\s*(.+)$/);
|
|
423
|
+
// [HH:MM:SS] **Speaker:** text (with timestamp)
|
|
424
|
+
const timestampSpeaker = trimmed.match(/^\[?(\d{1,2}:\d{2}(?::\d{2})?)\]?\s*\*?\*?([^*:]+)\*?\*?:?\s*(.+)$/);
|
|
425
|
+
if (timestampSpeaker) {
|
|
426
|
+
segments.push({
|
|
427
|
+
speaker: timestampSpeaker[2].trim(),
|
|
428
|
+
text: timestampSpeaker[3].trim(),
|
|
429
|
+
timestamp: timestampSpeaker[1],
|
|
430
|
+
});
|
|
431
|
+
}
|
|
432
|
+
else if (boldSpeaker) {
|
|
433
|
+
segments.push({ speaker: boldSpeaker[1].trim(), text: boldSpeaker[2].trim() });
|
|
434
|
+
}
|
|
435
|
+
else if (plainSpeaker) {
|
|
436
|
+
segments.push({ speaker: plainSpeaker[1].trim(), text: plainSpeaker[2].trim() });
|
|
437
|
+
}
|
|
438
|
+
else if (trimmed.length > 5) {
|
|
439
|
+
segments.push({ speaker: "", text: trimmed });
|
|
440
|
+
}
|
|
441
|
+
continue;
|
|
442
|
+
}
|
|
443
|
+
// Action Items
|
|
444
|
+
if (currentSection === "actions") {
|
|
445
|
+
// - [ ] Owner: task description
|
|
446
|
+
// - [ ] task description
|
|
447
|
+
// - task description
|
|
448
|
+
const checkboxMatch = trimmed.match(/^[-*]\s+\[.\]\s*(.+)$/);
|
|
449
|
+
const bulletMatch = trimmed.match(/^[-*]\s+(.+)$/);
|
|
450
|
+
const numberedMatch = trimmed.match(/^\d+\.\s+(.+)$/);
|
|
451
|
+
if (checkboxMatch) {
|
|
452
|
+
segments.push({ speaker: "__ACTION__", text: checkboxMatch[1].trim() });
|
|
453
|
+
}
|
|
454
|
+
else if (bulletMatch) {
|
|
455
|
+
segments.push({ speaker: "__ACTION__", text: bulletMatch[1].trim() });
|
|
456
|
+
}
|
|
457
|
+
else if (numberedMatch) {
|
|
458
|
+
segments.push({ speaker: "__ACTION__", text: numberedMatch[1].trim() });
|
|
459
|
+
}
|
|
460
|
+
continue;
|
|
461
|
+
}
|
|
462
|
+
// References section
|
|
463
|
+
if (currentSection === "references") {
|
|
464
|
+
if (trimmed.length > 5 && !trimmed.startsWith("|")) {
|
|
465
|
+
segments.push({ speaker: "__REFERENCE__", text: trimmed });
|
|
466
|
+
}
|
|
467
|
+
continue;
|
|
468
|
+
}
|
|
469
|
+
// Pre-section content (between H1 and first H2)
|
|
470
|
+
if (currentSection === "pre") {
|
|
471
|
+
const boldMeta = trimmed.match(/^\*\*([^*]+?):?\*\*:?\s*(.+)$/);
|
|
472
|
+
if (boldMeta) {
|
|
473
|
+
const key = boldMeta[1].toLowerCase().trim();
|
|
474
|
+
const value = boldMeta[2].trim();
|
|
475
|
+
if (key.includes("date") || key.includes("data")) {
|
|
476
|
+
segments.push({ speaker: "__META_DATE__", text: value });
|
|
477
|
+
}
|
|
478
|
+
else if (key.includes("participant") || key.includes("participante")) {
|
|
479
|
+
segments.push({ speaker: "__META_PARTICIPANTS__", text: value });
|
|
480
|
+
}
|
|
481
|
+
}
|
|
482
|
+
continue;
|
|
483
|
+
}
|
|
484
|
+
// Any other section — include as general content
|
|
485
|
+
const boldMatch = trimmed.match(/^\*\*([^*]+?):?\*\*:?\s*(.+)$/);
|
|
486
|
+
const bulletMatch = trimmed.match(/^[-*]\s+(.+)$/);
|
|
487
|
+
if (boldMatch) {
|
|
488
|
+
segments.push({ speaker: boldMatch[1].trim(), text: boldMatch[2].trim() });
|
|
489
|
+
}
|
|
490
|
+
else if (bulletMatch) {
|
|
491
|
+
segments.push({ speaker: "", text: bulletMatch[1].trim() });
|
|
492
|
+
}
|
|
493
|
+
else if (trimmed.length > 5) {
|
|
494
|
+
segments.push({ speaker: "", text: trimmed });
|
|
495
|
+
}
|
|
496
|
+
}
|
|
497
|
+
return segments;
|
|
498
|
+
}
|
|
499
|
+
/**
|
|
500
|
+
* Simple YAML frontmatter parser — extracts key: "value" pairs.
|
|
501
|
+
* Handles quoted strings, arrays (as comma-separated), and bare values.
|
|
502
|
+
*/
|
|
503
|
+
parseSimpleYaml(yaml) {
|
|
504
|
+
const fields = {};
|
|
505
|
+
const lines = yaml.split("\n");
|
|
506
|
+
for (const line of lines) {
|
|
507
|
+
const trimmed = line.trim();
|
|
508
|
+
if (!trimmed || trimmed.startsWith("#"))
|
|
509
|
+
continue;
|
|
510
|
+
// key: "value" or key: value or key: [array]
|
|
511
|
+
const match = trimmed.match(/^(\w[\w-]*)\s*:\s*(.+)$/);
|
|
512
|
+
if (match) {
|
|
513
|
+
const key = match[1].trim();
|
|
514
|
+
let value = match[2].trim();
|
|
515
|
+
// Remove quotes
|
|
516
|
+
value = value.replace(/^["']|["']$/g, "");
|
|
517
|
+
// Handle YAML arrays: ["a", "b"] → "a, b"
|
|
518
|
+
if (value.startsWith("[") && value.endsWith("]")) {
|
|
519
|
+
value = value
|
|
520
|
+
.slice(1, -1)
|
|
521
|
+
.split(",")
|
|
522
|
+
.map((v) => v.trim().replace(/^["']|["']$/g, ""))
|
|
523
|
+
.join(", ");
|
|
524
|
+
}
|
|
525
|
+
fields[key] = value;
|
|
526
|
+
}
|
|
527
|
+
}
|
|
528
|
+
return fields;
|
|
529
|
+
}
|
|
530
|
+
parsePlainText(content) {
|
|
531
|
+
const segments = [];
|
|
532
|
+
const lines = content.split("\n");
|
|
533
|
+
for (const line of lines) {
|
|
534
|
+
const trimmed = line.trim();
|
|
535
|
+
if (!trimmed)
|
|
536
|
+
continue;
|
|
537
|
+
const colonMatch = trimmed.match(/^([A-Z][a-zA-ZÀ-ú\s.]+?):\s*(.+)$/);
|
|
538
|
+
if (colonMatch) {
|
|
539
|
+
segments.push({ speaker: colonMatch[1].trim(), text: colonMatch[2].trim() });
|
|
540
|
+
}
|
|
541
|
+
else {
|
|
542
|
+
segments.push({ speaker: "", text: trimmed });
|
|
543
|
+
}
|
|
544
|
+
}
|
|
545
|
+
return segments;
|
|
546
|
+
}
|
|
547
|
+
// ─── Extraction Methods ───
|
|
548
|
+
extractParticipants(segments) {
|
|
549
|
+
const speakers = new Set();
|
|
550
|
+
for (const seg of segments) {
|
|
551
|
+
// Power Automate metadata: __META_PARTICIPANTS__ contains comma-separated names
|
|
552
|
+
if (seg.speaker === "__META_PARTICIPANTS__") {
|
|
553
|
+
for (const name of seg.text.split(/[,;]+/)) {
|
|
554
|
+
const trimmed = name.trim();
|
|
555
|
+
if (trimmed)
|
|
556
|
+
speakers.add(trimmed);
|
|
557
|
+
}
|
|
558
|
+
continue;
|
|
559
|
+
}
|
|
560
|
+
// Regular speakers (skip meta markers, strip trailing colon)
|
|
561
|
+
if (seg.speaker && !seg.speaker.startsWith("__")) {
|
|
562
|
+
speakers.add(seg.speaker.replace(/:$/, "").trim());
|
|
563
|
+
}
|
|
564
|
+
}
|
|
565
|
+
return [...speakers].sort();
|
|
566
|
+
}
|
|
567
|
+
/** Filter out internal meta segments used for Power Automate format parsing */
|
|
568
|
+
contentSegments(segments) {
|
|
569
|
+
return segments.filter((s) => !s.speaker?.startsWith("__"));
|
|
570
|
+
}
|
|
571
|
+
extractTopics(segments) {
|
|
572
|
+
const topics = [];
|
|
573
|
+
const topicKeywords = new Map();
|
|
574
|
+
const realSegments = this.contentSegments(segments);
|
|
575
|
+
// If explicit topics were parsed from "Main Topics" section, use those first
|
|
576
|
+
const explicitTopics = segments.filter((s) => s.speaker === "__TOPIC__");
|
|
577
|
+
if (explicitTopics.length > 0) {
|
|
578
|
+
for (const topic of explicitTopics) {
|
|
579
|
+
topics.push({
|
|
580
|
+
name: topic.text.slice(0, 80),
|
|
581
|
+
summary: topic.text,
|
|
582
|
+
speakers: [],
|
|
583
|
+
key_points: [topic.text],
|
|
584
|
+
});
|
|
585
|
+
}
|
|
586
|
+
// Still scan content segments for additional detail per topic
|
|
587
|
+
return topics;
|
|
588
|
+
}
|
|
589
|
+
// Include executive summary as context for topic extraction
|
|
590
|
+
const summarySegments = segments.filter((s) => s.speaker === "__SUMMARY__");
|
|
591
|
+
if (summarySegments.length > 0) {
|
|
592
|
+
for (const summary of summarySegments) {
|
|
593
|
+
realSegments.unshift({ speaker: "", text: summary.text });
|
|
594
|
+
}
|
|
595
|
+
}
|
|
596
|
+
// Group segments into topic clusters based on content similarity
|
|
597
|
+
let currentTopic = "General Discussion";
|
|
598
|
+
let currentSegments = [];
|
|
599
|
+
for (const seg of realSegments) {
|
|
600
|
+
const text = seg.text.toLowerCase();
|
|
601
|
+
// Detect topic transitions
|
|
602
|
+
const topicSignals = [
|
|
603
|
+
{ pattern: /\b(login|auth|authentica|sso|oauth|azure ad|entra)\b/i, topic: "Authentication & Authorization" },
|
|
604
|
+
{ pattern: /\b(api|endpoint|rest|graphql|grpc|webhook)\b/i, topic: "API Design" },
|
|
605
|
+
{ pattern: /\b(database|db|sql|postgres|mongo|cosmos|storage)\b/i, topic: "Data Storage" },
|
|
606
|
+
{ pattern: /\b(deploy|ci\/cd|pipeline|github actions|azure devops|kubernetes|docker|container)\b/i, topic: "Deployment & Infrastructure" },
|
|
607
|
+
{ pattern: /\b(security|encrypt|ssl|tls|compliance|gdpr|lgpd|hipaa)\b/i, topic: "Security & Compliance" },
|
|
608
|
+
{ pattern: /\b(performance|latenc|speed|cache|redis|cdn|scale|concurrent)\b/i, topic: "Performance & Scalability" },
|
|
609
|
+
{ pattern: /\b(ui|ux|frontend|react|angular|vue|design|layout|component)\b/i, topic: "User Interface" },
|
|
610
|
+
{ pattern: /\b(test|testing|unit test|integration|e2e|qa|quality)\b/i, topic: "Testing & Quality" },
|
|
611
|
+
{ pattern: /\b(monitor|observ|log|metric|alert|grafana|datadog|app insights)\b/i, topic: "Monitoring & Observability" },
|
|
612
|
+
{ pattern: /\b(user|persona|stakeholder|customer|role|permission)\b/i, topic: "Users & Personas" },
|
|
613
|
+
{ pattern: /\b(budget|cost|pricing|timeline|deadline|sprint|milestone)\b/i, topic: "Project Constraints" },
|
|
614
|
+
{ pattern: /\b(integration|third.party|external|partner|vendor)\b/i, topic: "Integrations" },
|
|
615
|
+
{ pattern: /\b(notification|email|sms|push|alert|message)\b/i, topic: "Notifications" },
|
|
616
|
+
{ pattern: /\b(report|dashboard|analytics|chart|graph|insight)\b/i, topic: "Reporting & Analytics" },
|
|
617
|
+
];
|
|
618
|
+
let detected = false;
|
|
619
|
+
for (const signal of topicSignals) {
|
|
620
|
+
if (signal.pattern.test(seg.text)) {
|
|
621
|
+
if (signal.topic !== currentTopic) {
|
|
622
|
+
if (currentSegments.length > 0) {
|
|
623
|
+
topicKeywords.set(currentTopic, [...currentSegments]);
|
|
624
|
+
}
|
|
625
|
+
currentTopic = signal.topic;
|
|
626
|
+
currentSegments = [];
|
|
627
|
+
}
|
|
628
|
+
detected = true;
|
|
629
|
+
break;
|
|
630
|
+
}
|
|
631
|
+
}
|
|
632
|
+
currentSegments.push(seg);
|
|
633
|
+
}
|
|
634
|
+
if (currentSegments.length > 0) {
|
|
635
|
+
topicKeywords.set(currentTopic, [...currentSegments]);
|
|
636
|
+
}
|
|
637
|
+
// Build topic objects
|
|
638
|
+
for (const [name, segs] of topicKeywords) {
|
|
639
|
+
const speakers = [...new Set(segs.filter((s) => s.speaker).map((s) => s.speaker))];
|
|
640
|
+
const keyPoints = segs
|
|
641
|
+
.map((s) => s.text)
|
|
642
|
+
.filter((t) => t.length > 20)
|
|
643
|
+
.slice(0, 5);
|
|
644
|
+
topics.push({
|
|
645
|
+
name,
|
|
646
|
+
summary: keyPoints.slice(0, 2).join(" ").slice(0, 300),
|
|
647
|
+
speakers,
|
|
648
|
+
key_points: keyPoints,
|
|
649
|
+
});
|
|
650
|
+
}
|
|
651
|
+
return topics;
|
|
652
|
+
}
|
|
653
|
+
extractDecisions(segments) {
|
|
654
|
+
const realSegments = this.contentSegments(segments);
|
|
655
|
+
const decisions = [];
|
|
656
|
+
const decisionPatterns = [
|
|
657
|
+
/\b(decid|decided|decision|let's go with|we'll use|vamos com|decidimos|ficou decidido|a decisão é|definimos que)\b/i,
|
|
658
|
+
/\b(agreed|agreement|consensus|concordamos|aprovado|approved)\b/i,
|
|
659
|
+
/\b(will be|vai ser|será|chosen|escolhido|selected|selecionado)\b/i,
|
|
660
|
+
];
|
|
661
|
+
for (const seg of segments) {
|
|
662
|
+
for (const pattern of decisionPatterns) {
|
|
663
|
+
if (pattern.test(seg.text)) {
|
|
664
|
+
const clean = seg.speaker
|
|
665
|
+
? `[${seg.speaker}] ${seg.text}`
|
|
666
|
+
: seg.text;
|
|
667
|
+
decisions.push(clean);
|
|
668
|
+
break;
|
|
669
|
+
}
|
|
670
|
+
}
|
|
671
|
+
}
|
|
672
|
+
return [...new Set(decisions)];
|
|
673
|
+
}
|
|
674
|
+
extractActionItems(segments) {
|
|
675
|
+
const actions = [];
|
|
676
|
+
// First: collect explicit action items from Power Automate format
|
|
677
|
+
for (const seg of segments) {
|
|
678
|
+
if (seg.speaker === "__ACTION__") {
|
|
679
|
+
actions.push(seg.text);
|
|
680
|
+
}
|
|
681
|
+
}
|
|
682
|
+
// Then: detect action items from regular speech
|
|
683
|
+
const actionPatterns = [
|
|
684
|
+
/\b(action item|todo|to.do|task|precisa|needs? to|should|must|vai fazer|tem que|have to|assigned to)\b/i,
|
|
685
|
+
/\b(follow.up|next step|próximo passo|ação|responsável|owner)\b/i,
|
|
686
|
+
/\b(deadline|prazo|until|até|by (monday|tuesday|wednesday|thursday|friday|next week))\b/i,
|
|
687
|
+
];
|
|
688
|
+
for (const seg of segments) {
|
|
689
|
+
if (seg.speaker?.startsWith("__"))
|
|
690
|
+
continue; // Skip meta segments
|
|
691
|
+
for (const pattern of actionPatterns) {
|
|
692
|
+
if (pattern.test(seg.text)) {
|
|
693
|
+
const clean = seg.speaker
|
|
694
|
+
? `[${seg.speaker}] ${seg.text}`
|
|
695
|
+
: seg.text;
|
|
696
|
+
actions.push(clean);
|
|
697
|
+
break;
|
|
698
|
+
}
|
|
699
|
+
}
|
|
700
|
+
}
|
|
701
|
+
return [...new Set(actions)];
|
|
702
|
+
}
|
|
703
|
+
extractRequirements(segments) {
|
|
704
|
+
const reqs = [];
|
|
705
|
+
const reqPatterns = [
|
|
706
|
+
/\b(must|should|shall|needs? to|has to|required|requirement|precisa|deve|necessário|obrigatório)\b/i,
|
|
707
|
+
/\b(the system|the app|the platform|the server|the api|o sistema|a aplicação|a plataforma)\b/i,
|
|
708
|
+
/\b(feature|functionality|capability|funcionalidade|recurso|capacidade)\b/i,
|
|
709
|
+
/\b(support|suportar|handle|tratar|manage|gerenciar|enable|habilitar|allow|permitir)\b/i,
|
|
710
|
+
];
|
|
711
|
+
for (const seg of this.contentSegments(segments)) {
|
|
712
|
+
let matchCount = 0;
|
|
713
|
+
for (const pattern of reqPatterns) {
|
|
714
|
+
if (pattern.test(seg.text)) {
|
|
715
|
+
matchCount++;
|
|
716
|
+
}
|
|
717
|
+
}
|
|
718
|
+
// Require at least 1 pattern match and minimum text length
|
|
719
|
+
if (matchCount >= 1 && seg.text.length > 15) {
|
|
720
|
+
reqs.push(seg.text);
|
|
721
|
+
}
|
|
722
|
+
}
|
|
723
|
+
return [...new Set(reqs)];
|
|
724
|
+
}
|
|
725
|
+
extractConstraints(segments) {
|
|
726
|
+
const constraints = [];
|
|
727
|
+
const constraintPatterns = [
|
|
728
|
+
/\b(constraint|limitation|restrict|budget|timeline|deadline|cannot|can't|won't|não pode|restrição|limitação|prazo|orçamento)\b/i,
|
|
729
|
+
/\b(must use|has to be|needs to be|only|mandator|obrigatório|somente|apenas)\b/i,
|
|
730
|
+
/\b(compliance|regulation|policy|lei|regulamento|norma|lgpd|gdpr|hipaa|sox|pci)\b/i,
|
|
731
|
+
];
|
|
732
|
+
for (const seg of this.contentSegments(segments)) {
|
|
733
|
+
for (const pattern of constraintPatterns) {
|
|
734
|
+
if (pattern.test(seg.text) && seg.text.length > 15) {
|
|
735
|
+
constraints.push(seg.text);
|
|
736
|
+
break;
|
|
737
|
+
}
|
|
738
|
+
}
|
|
739
|
+
}
|
|
740
|
+
return [...new Set(constraints)];
|
|
741
|
+
}
|
|
742
|
+
extractQuestions(segments) {
|
|
743
|
+
const questions = [];
|
|
744
|
+
for (const seg of this.contentSegments(segments)) {
|
|
745
|
+
// Detect questions by "?" or question-word patterns
|
|
746
|
+
if (seg.text.includes("?") ||
|
|
747
|
+
/^(how|what|when|where|why|who|which|como|qual|quando|onde|por que|quem)\b/i.test(seg.text)) {
|
|
748
|
+
const clean = seg.speaker
|
|
749
|
+
? `[${seg.speaker}] ${seg.text}`
|
|
750
|
+
: seg.text;
|
|
751
|
+
questions.push(clean);
|
|
752
|
+
}
|
|
753
|
+
}
|
|
754
|
+
return [...new Set(questions)].slice(0, 15);
|
|
755
|
+
}
|
|
756
|
+
estimateDuration(segments) {
|
|
757
|
+
// Try to calculate from timestamps
|
|
758
|
+
if (segments.length >= 2) {
|
|
759
|
+
const first = segments[0].timestamp;
|
|
760
|
+
const last = segments[segments.length - 1].timestamp;
|
|
761
|
+
if (first && last) {
|
|
762
|
+
const start = this.parseTimestamp(first);
|
|
763
|
+
const end = this.parseTimestamp(last);
|
|
764
|
+
if (start !== null && end !== null) {
|
|
765
|
+
const diffSec = end - start;
|
|
766
|
+
const mins = Math.round(diffSec / 60);
|
|
767
|
+
if (mins > 0) {
|
|
768
|
+
return `~${mins} minutes`;
|
|
769
|
+
}
|
|
770
|
+
}
|
|
771
|
+
}
|
|
772
|
+
}
|
|
773
|
+
// Estimate from text volume (~150 words per minute speaking)
|
|
774
|
+
const totalWords = segments.reduce((sum, s) => sum + s.text.split(/\s+/).length, 0);
|
|
775
|
+
const estimatedMins = Math.max(1, Math.round(totalWords / 150));
|
|
776
|
+
return `~${estimatedMins} minutes (estimated from text volume)`;
|
|
777
|
+
}
|
|
778
|
+
parseTimestamp(ts) {
|
|
779
|
+
// Handles "HH:MM:SS.mmm" or "MM:SS.mmm" or "HH:MM:SS.mmm --> ..."
|
|
780
|
+
const clean = ts.split("-->")[0].trim();
|
|
781
|
+
const parts = clean.split(/[:.]/);
|
|
782
|
+
if (parts.length >= 3) {
|
|
783
|
+
if (parts.length === 4) {
|
|
784
|
+
// HH:MM:SS.mmm
|
|
785
|
+
return (parseInt(parts[0], 10) * 3600 +
|
|
786
|
+
parseInt(parts[1], 10) * 60 +
|
|
787
|
+
parseInt(parts[2], 10));
|
|
788
|
+
}
|
|
789
|
+
// MM:SS.mmm
|
|
790
|
+
return parseInt(parts[0], 10) * 60 + parseInt(parts[1], 10);
|
|
791
|
+
}
|
|
792
|
+
return null;
|
|
793
|
+
}
|
|
794
|
+
detectFormat(filePath, content) {
|
|
795
|
+
const lower = filePath.toLowerCase();
|
|
796
|
+
if (lower.endsWith(".vtt"))
|
|
797
|
+
return "vtt";
|
|
798
|
+
if (lower.endsWith(".srt"))
|
|
799
|
+
return "srt";
|
|
800
|
+
if (lower.endsWith(".md"))
|
|
801
|
+
return "md";
|
|
802
|
+
// Auto-detect from content
|
|
803
|
+
if (content.trimStart().startsWith("WEBVTT"))
|
|
804
|
+
return "vtt";
|
|
805
|
+
if (/^\d+\n\d{2}:\d{2}:\d{2},\d{3}\s*-->\s*\d{2}:\d{2}:\d{2},\d{3}/m.test(content))
|
|
806
|
+
return "srt";
|
|
807
|
+
return "txt";
|
|
808
|
+
}
|
|
809
|
+
}
|
|
810
|
+
//# sourceMappingURL=transcript-parser.js.map
|