specky-sdd 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/LICENSE +22 -0
  2. package/README.md +446 -0
  3. package/dist/constants.d.ts +68 -0
  4. package/dist/constants.d.ts.map +1 -0
  5. package/dist/constants.js +120 -0
  6. package/dist/constants.js.map +1 -0
  7. package/dist/index.d.ts +10 -0
  8. package/dist/index.d.ts.map +1 -0
  9. package/dist/index.js +95 -0
  10. package/dist/index.js.map +1 -0
  11. package/dist/schemas/common.d.ts +8 -0
  12. package/dist/schemas/common.d.ts.map +1 -0
  13. package/dist/schemas/common.js +18 -0
  14. package/dist/schemas/common.js.map +1 -0
  15. package/dist/schemas/pipeline.d.ts +296 -0
  16. package/dist/schemas/pipeline.d.ts.map +1 -0
  17. package/dist/schemas/pipeline.js +132 -0
  18. package/dist/schemas/pipeline.js.map +1 -0
  19. package/dist/schemas/transcript.d.ts +59 -0
  20. package/dist/schemas/transcript.d.ts.map +1 -0
  21. package/dist/schemas/transcript.js +61 -0
  22. package/dist/schemas/transcript.js.map +1 -0
  23. package/dist/schemas/utility.d.ts +92 -0
  24. package/dist/schemas/utility.d.ts.map +1 -0
  25. package/dist/schemas/utility.js +82 -0
  26. package/dist/schemas/utility.js.map +1 -0
  27. package/dist/services/codebase-scanner.d.ts +24 -0
  28. package/dist/services/codebase-scanner.d.ts.map +1 -0
  29. package/dist/services/codebase-scanner.js +185 -0
  30. package/dist/services/codebase-scanner.js.map +1 -0
  31. package/dist/services/ears-validator.d.ts +29 -0
  32. package/dist/services/ears-validator.d.ts.map +1 -0
  33. package/dist/services/ears-validator.js +163 -0
  34. package/dist/services/ears-validator.js.map +1 -0
  35. package/dist/services/file-manager.d.ts +56 -0
  36. package/dist/services/file-manager.d.ts.map +1 -0
  37. package/dist/services/file-manager.js +203 -0
  38. package/dist/services/file-manager.js.map +1 -0
  39. package/dist/services/state-machine.d.ts +46 -0
  40. package/dist/services/state-machine.d.ts.map +1 -0
  41. package/dist/services/state-machine.js +167 -0
  42. package/dist/services/state-machine.js.map +1 -0
  43. package/dist/services/template-engine.d.ts +37 -0
  44. package/dist/services/template-engine.d.ts.map +1 -0
  45. package/dist/services/template-engine.js +111 -0
  46. package/dist/services/template-engine.js.map +1 -0
  47. package/dist/services/transcript-parser.d.ts +61 -0
  48. package/dist/services/transcript-parser.d.ts.map +1 -0
  49. package/dist/services/transcript-parser.js +810 -0
  50. package/dist/services/transcript-parser.js.map +1 -0
  51. package/dist/tools/analysis.d.ts +10 -0
  52. package/dist/tools/analysis.d.ts.map +1 -0
  53. package/dist/tools/analysis.js +95 -0
  54. package/dist/tools/analysis.js.map +1 -0
  55. package/dist/tools/pipeline.d.ts +11 -0
  56. package/dist/tools/pipeline.d.ts.map +1 -0
  57. package/dist/tools/pipeline.js +583 -0
  58. package/dist/tools/pipeline.js.map +1 -0
  59. package/dist/tools/transcript.d.ts +14 -0
  60. package/dist/tools/transcript.d.ts.map +1 -0
  61. package/dist/tools/transcript.js +813 -0
  62. package/dist/tools/transcript.js.map +1 -0
  63. package/dist/tools/utility.d.ts +10 -0
  64. package/dist/tools/utility.d.ts.map +1 -0
  65. package/dist/tools/utility.js +239 -0
  66. package/dist/tools/utility.js.map +1 -0
  67. package/dist/types.d.ts +161 -0
  68. package/dist/types.d.ts.map +1 -0
  69. package/dist/types.js +6 -0
  70. package/dist/types.js.map +1 -0
  71. package/package.json +53 -0
  72. package/templates/analysis.md +54 -0
  73. package/templates/bugfix.md +45 -0
  74. package/templates/constitution.md +56 -0
  75. package/templates/design.md +47 -0
  76. package/templates/specification.md +49 -0
  77. package/templates/sync-report.md +43 -0
  78. package/templates/tasks.md +38 -0
@@ -0,0 +1,810 @@
1
+ /**
2
+ * TranscriptParser — Parses VTT, SRT, TXT, and MD meeting transcripts.
3
+ * Extracts speakers, topics, decisions, action items, and raw requirements.
4
+ */
5
+ export class TranscriptParser {
6
+ fileManager;
7
+ constructor(fileManager) {
8
+ this.fileManager = fileManager;
9
+ }
10
+ /**
11
+ * Parse a transcript file and extract structured data.
12
+ */
13
+ async parseFile(filePath) {
14
+ const content = await this.fileManager.readProjectFile(filePath);
15
+ const format = this.detectFormat(filePath, content);
16
+ return this.parse(content, format, filePath);
17
+ }
18
+ /**
19
+ * Parse raw transcript text.
20
+ */
21
+ parse(content, format = "txt", source = "inline") {
22
+ let segments;
23
+ switch (format) {
24
+ case "vtt":
25
+ segments = this.parseVTT(content);
26
+ break;
27
+ case "srt":
28
+ segments = this.parseSRT(content);
29
+ break;
30
+ case "md":
31
+ segments = this.parseMD(content);
32
+ break;
33
+ default:
34
+ segments = this.parsePlainText(content);
35
+ }
36
+ // Extract structured data from segments
37
+ const participants = this.extractParticipants(segments);
38
+ const fullText = segments.map((s) => s.text).join(" ");
39
+ const topics = this.extractTopics(segments);
40
+ const decisions = this.extractDecisions(segments);
41
+ const actionItems = this.extractActionItems(segments);
42
+ const requirementsRaw = this.extractRequirements(segments);
43
+ const constraints = this.extractConstraints(segments);
44
+ const openQuestions = this.extractQuestions(segments);
45
+ const duration = this.estimateDuration(segments);
46
+ // Derive title: from meta tag, or first topic, or first segment
47
+ const metaTitle = segments.find((s) => s.speaker === "__META_TITLE__");
48
+ const title = metaTitle
49
+ ? metaTitle.text
50
+ : topics.length > 0
51
+ ? topics[0].name
52
+ : segments.length > 0
53
+ ? segments[0].text.slice(0, 80)
54
+ : "Meeting Transcript";
55
+ return {
56
+ title,
57
+ participants,
58
+ duration_estimate: duration,
59
+ segments,
60
+ topics,
61
+ decisions,
62
+ action_items: actionItems,
63
+ requirements_raw: requirementsRaw,
64
+ constraints_mentioned: constraints,
65
+ open_questions: openQuestions,
66
+ full_text: fullText,
67
+ };
68
+ }
69
+ /**
70
+ * Convert a TranscriptAnalysis into clean Markdown.
71
+ */
72
+ toMarkdown(analysis) {
73
+ const lines = [
74
+ `# Meeting Transcript: ${analysis.title}`,
75
+ "",
76
+ `**Participants:** ${analysis.participants.join(", ")}`,
77
+ `**Duration:** ${analysis.duration_estimate}`,
78
+ "",
79
+ ];
80
+ if (analysis.topics.length > 0) {
81
+ lines.push("## Topics Discussed", "");
82
+ for (const topic of analysis.topics) {
83
+ lines.push(`### ${topic.name}`, "");
84
+ lines.push(topic.summary, "");
85
+ if (topic.key_points.length > 0) {
86
+ lines.push("**Key Points:**");
87
+ for (const point of topic.key_points) {
88
+ lines.push(`- ${point}`);
89
+ }
90
+ lines.push("");
91
+ }
92
+ }
93
+ }
94
+ if (analysis.decisions.length > 0) {
95
+ lines.push("## Decisions", "");
96
+ for (const d of analysis.decisions) {
97
+ lines.push(`- ${d}`);
98
+ }
99
+ lines.push("");
100
+ }
101
+ if (analysis.action_items.length > 0) {
102
+ lines.push("## Action Items", "");
103
+ for (const a of analysis.action_items) {
104
+ lines.push(`- [ ] ${a}`);
105
+ }
106
+ lines.push("");
107
+ }
108
+ if (analysis.requirements_raw.length > 0) {
109
+ lines.push("## Requirements Identified", "");
110
+ for (const r of analysis.requirements_raw) {
111
+ lines.push(`- ${r}`);
112
+ }
113
+ lines.push("");
114
+ }
115
+ if (analysis.constraints_mentioned.length > 0) {
116
+ lines.push("## Constraints Mentioned", "");
117
+ for (const c of analysis.constraints_mentioned) {
118
+ lines.push(`- ${c}`);
119
+ }
120
+ lines.push("");
121
+ }
122
+ if (analysis.open_questions.length > 0) {
123
+ lines.push("## Open Questions", "");
124
+ for (const q of analysis.open_questions) {
125
+ lines.push(`- ${q}`);
126
+ }
127
+ lines.push("");
128
+ }
129
+ lines.push("## Full Transcript", "");
130
+ for (const seg of analysis.segments) {
131
+ if (seg.speaker) {
132
+ lines.push(`**${seg.speaker}:** ${seg.text}`, "");
133
+ }
134
+ else {
135
+ lines.push(`${seg.text}`, "");
136
+ }
137
+ }
138
+ return lines.join("\n");
139
+ }
140
+ // ─── Format Parsers ───
141
+ parseVTT(content) {
142
+ const segments = [];
143
+ // Remove WEBVTT header and NOTE blocks
144
+ const cleaned = content
145
+ .replace(/^WEBVTT.*$/m, "")
146
+ .replace(/^NOTE[\s\S]*?(?=\n\n)/gm, "")
147
+ .trim();
148
+ // VTT blocks: optional id, timestamp line, text lines
149
+ const blocks = cleaned.split(/\n\n+/).filter((b) => b.trim());
150
+ for (const block of blocks) {
151
+ const lines = block.trim().split("\n");
152
+ // Find timestamp line
153
+ const tsLineIdx = lines.findIndex((l) => /\d{2}:\d{2}[.:]\d{2}\.\d{3}\s*-->\s*\d{2}:\d{2}[.:]\d{2}\.\d{3}/.test(l));
154
+ if (tsLineIdx === -1)
155
+ continue;
156
+ const timestamp = lines[tsLineIdx].trim();
157
+ const textLines = lines.slice(tsLineIdx + 1);
158
+ const rawText = textLines.join(" ").trim();
159
+ // Extract speaker from <v SpeakerName> tag or "SpeakerName:" prefix
160
+ const vTagMatch = rawText.match(/^<v\s+([^>]+)>(.*?)(?:<\/v>)?$/s);
161
+ const colonMatch = rawText.match(/^([A-Z][a-zA-ZÀ-ú\s.]+?):\s*(.+)$/s);
162
+ let speaker = "";
163
+ let text = rawText;
164
+ if (vTagMatch) {
165
+ speaker = vTagMatch[1].trim();
166
+ text = vTagMatch[2].trim();
167
+ }
168
+ else if (colonMatch) {
169
+ speaker = colonMatch[1].trim();
170
+ text = colonMatch[2].trim();
171
+ }
172
+ // Clean HTML tags
173
+ text = text.replace(/<[^>]+>/g, "").trim();
174
+ if (text) {
175
+ segments.push({ speaker, text, timestamp });
176
+ }
177
+ }
178
+ return segments;
179
+ }
180
+ parseSRT(content) {
181
+ const segments = [];
182
+ const blocks = content.trim().split(/\n\n+/);
183
+ for (const block of blocks) {
184
+ const lines = block.trim().split("\n");
185
+ if (lines.length < 3)
186
+ continue;
187
+ // SRT: index, timestamp, text
188
+ const timestamp = lines[1].trim();
189
+ const rawText = lines.slice(2).join(" ").trim();
190
+ const colonMatch = rawText.match(/^([A-Z][a-zA-ZÀ-ú\s.]+?):\s*(.+)$/s);
191
+ let speaker = "";
192
+ let text = rawText;
193
+ if (colonMatch) {
194
+ speaker = colonMatch[1].trim();
195
+ text = colonMatch[2].trim();
196
+ }
197
+ text = text.replace(/<[^>]+>/g, "").trim();
198
+ if (text) {
199
+ segments.push({ speaker, text, timestamp });
200
+ }
201
+ }
202
+ return segments;
203
+ }
204
+ parseMD(content) {
205
+ const segments = [];
206
+ const lines = content.split("\n");
207
+ // Detect Copilot Studio / Power Automate agent format:
208
+ // Has YAML frontmatter with type: "meeting-transcription"
209
+ // OR has numbered sections like ## 1. Executive Summary, ## 4. Complete Transcription
210
+ // OR has ## Executive Summary + ## Action Items
211
+ const hasFrontmatter = /^---\s*\n[\s\S]*?\n---/m.test(content);
212
+ const hasNumberedSections = /^##\s+\d+\.\s+(Executive Summary|Complete Transcription|Action Items)/im.test(content);
213
+ const hasSimpleSections = /^##\s+(Executive Summary|Transcription|Action Items)/im.test(content);
214
+ const isPowerAutomateFormat = hasFrontmatter || hasNumberedSections || hasSimpleSections;
215
+ if (isPowerAutomateFormat) {
216
+ return this.parsePowerAutomateMD(content);
217
+ }
218
+ for (const line of lines) {
219
+ const trimmed = line.trim();
220
+ if (!trimmed || trimmed.startsWith("#") || trimmed.startsWith("---"))
221
+ continue;
222
+ // Markdown bold speaker: **Speaker:** text
223
+ const boldMatch = trimmed.match(/^\*\*([^*]+?):?\*\*:?\s*(.+)$/);
224
+ // Plain speaker: Speaker: text
225
+ const plainMatch = trimmed.match(/^([A-Z][a-zA-ZÀ-ú\s.]+?):\s*(.+)$/);
226
+ // Bullet points
227
+ const bulletMatch = trimmed.match(/^[-*]\s+(.+)$/);
228
+ if (boldMatch) {
229
+ segments.push({ speaker: boldMatch[1].trim(), text: boldMatch[2].trim() });
230
+ }
231
+ else if (plainMatch) {
232
+ segments.push({ speaker: plainMatch[1].trim(), text: plainMatch[2].trim() });
233
+ }
234
+ else if (bulletMatch) {
235
+ segments.push({ speaker: "", text: bulletMatch[1].trim() });
236
+ }
237
+ else {
238
+ segments.push({ speaker: "", text: trimmed });
239
+ }
240
+ }
241
+ return segments;
242
+ }
243
+ /**
244
+ * Parse Markdown generated by Copilot Studio / Power Automate transcription agent.
245
+ *
246
+ * Handles the full format with:
247
+ * - YAML frontmatter (title, date, author, version, language, tags, etc.)
248
+ * - Numbered sections: ## 1. Executive Summary, ## 2. Meeting Details,
249
+ * ## 3. Main Topics, ## 4. Complete Transcription, ## 5. Action Items
250
+ * - Change Log table
251
+ * - Meeting Details table
252
+ * - File naming: {MeetingTitle}_v{version}_{YYYY-MM-DD}.md
253
+ *
254
+ * Also handles simpler formats without frontmatter or numbering.
255
+ */
256
+ parsePowerAutomateMD(content) {
257
+ const segments = [];
258
+ // ── Step 1: Extract YAML frontmatter ──
259
+ const frontmatterMatch = content.match(/^---\s*\n([\s\S]*?)\n---/m);
260
+ if (frontmatterMatch) {
261
+ const yaml = frontmatterMatch[1];
262
+ const yamlFields = this.parseSimpleYaml(yaml);
263
+ if (yamlFields["title"]) {
264
+ segments.push({ speaker: "__META_TITLE__", text: yamlFields["title"] });
265
+ }
266
+ if (yamlFields["date"]) {
267
+ segments.push({ speaker: "__META_DATE__", text: yamlFields["date"] });
268
+ }
269
+ if (yamlFields["language"]) {
270
+ segments.push({ speaker: "__META_LANGUAGE__", text: yamlFields["language"] });
271
+ }
272
+ if (yamlFields["author"]) {
273
+ segments.push({ speaker: "__META_AUTHOR__", text: yamlFields["author"] });
274
+ }
275
+ if (yamlFields["version"]) {
276
+ segments.push({ speaker: "__META_VERSION__", text: yamlFields["version"] });
277
+ }
278
+ if (yamlFields["description"]) {
279
+ segments.push({ speaker: "__META_DESCRIPTION__", text: yamlFields["description"] });
280
+ }
281
+ if (yamlFields["tags"]) {
282
+ segments.push({ speaker: "__META_TAGS__", text: yamlFields["tags"] });
283
+ }
284
+ }
285
+ // ── Step 2: Parse sections ──
286
+ // Remove frontmatter from content for section parsing
287
+ const body = frontmatterMatch
288
+ ? content.slice(frontmatterMatch[0].length).trim()
289
+ : content.trim();
290
+ const lines = body.split("\n");
291
+ let currentSection = "pre";
292
+ let h1Title = "";
293
+ for (const line of lines) {
294
+ const trimmed = line.trim();
295
+ if (!trimmed)
296
+ continue;
297
+ if (trimmed === "---")
298
+ continue;
299
+ // H1 — meeting title
300
+ const h1Match = trimmed.match(/^#\s+(.+)$/);
301
+ if (h1Match) {
302
+ h1Title = h1Match[1].trim();
303
+ // If no title from frontmatter, use H1
304
+ if (!segments.some((s) => s.speaker === "__META_TITLE__")) {
305
+ segments.push({ speaker: "__META_TITLE__", text: h1Title });
306
+ }
307
+ continue;
308
+ }
309
+ // > blockquote — meeting purpose statement
310
+ if (trimmed.startsWith("> ") && currentSection === "pre") {
311
+ segments.push({ speaker: "__SUMMARY__", text: trimmed.slice(2).trim() });
312
+ continue;
313
+ }
314
+ // H2 sections — detect by name (with or without numbering)
315
+ const h2Match = trimmed.match(/^##\s+(?:\d+\.\s*)?(.+)$/);
316
+ if (h2Match) {
317
+ const sectionName = h2Match[1].toLowerCase().trim();
318
+ if (sectionName.includes("executive summary") || sectionName.includes("resumo")) {
319
+ currentSection = "summary";
320
+ }
321
+ else if (sectionName.includes("meeting details") || sectionName.includes("detalhes")) {
322
+ currentSection = "details";
323
+ }
324
+ else if (sectionName.includes("main topics") || sectionName.includes("tópicos") || sectionName.includes("topicos")) {
325
+ currentSection = "topics";
326
+ }
327
+ else if (sectionName.includes("complete transcription") || sectionName.includes("transcription") || sectionName.includes("transcript") || sectionName.includes("transcrição")) {
328
+ currentSection = "transcription";
329
+ }
330
+ else if (sectionName.includes("action item") || sectionName.includes("ações") || sectionName.includes("acoes")) {
331
+ currentSection = "actions";
332
+ }
333
+ else if (sectionName.includes("reference") || sectionName.includes("referência")) {
334
+ currentSection = "references";
335
+ }
336
+ else if (sectionName.includes("change log") || sectionName.includes("changelog")) {
337
+ currentSection = "changelog";
338
+ }
339
+ else if (sectionName.includes("table of contents") || sectionName.includes("índice")) {
340
+ currentSection = "toc";
341
+ }
342
+ else {
343
+ currentSection = sectionName;
344
+ }
345
+ continue;
346
+ }
347
+ // H3 subsections
348
+ if (trimmed.startsWith("### "))
349
+ continue;
350
+ // Skip table of contents and changelog
351
+ if (currentSection === "toc" || currentSection === "changelog")
352
+ continue;
353
+ // ── Section-specific parsing ──
354
+ // Executive Summary — bullet points and paragraphs
355
+ if (currentSection === "summary") {
356
+ const bulletMatch = trimmed.match(/^[-*]\s+(.+)$/);
357
+ if (bulletMatch) {
358
+ segments.push({ speaker: "__SUMMARY__", text: bulletMatch[1].trim() });
359
+ }
360
+ else if (trimmed.length > 5 && !trimmed.startsWith("|")) {
361
+ segments.push({ speaker: "__SUMMARY__", text: trimmed });
362
+ }
363
+ continue;
364
+ }
365
+ // Meeting Details — parse table rows for participants
366
+ if (currentSection === "details") {
367
+ // Table row: | Key | Value |
368
+ const tableRowMatch = trimmed.match(/^\|\s*\*?\*?([^|*]+)\*?\*?\s*\|\s*([^|]+)\s*\|/);
369
+ if (tableRowMatch) {
370
+ const key = tableRowMatch[1].toLowerCase().trim();
371
+ const value = tableRowMatch[2].trim();
372
+ if (key.includes("participant") || key.includes("attendee") || key.includes("participante")) {
373
+ segments.push({ speaker: "__META_PARTICIPANTS__", text: value });
374
+ }
375
+ else if (key.includes("date") || key.includes("data") || key.includes("fecha")) {
376
+ if (!segments.some((s) => s.speaker === "__META_DATE__")) {
377
+ segments.push({ speaker: "__META_DATE__", text: value });
378
+ }
379
+ }
380
+ else if (key.includes("organizer") || key.includes("organizador")) {
381
+ // Organizer is also a participant
382
+ const existing = segments.find((s) => s.speaker === "__META_PARTICIPANTS__");
383
+ if (existing) {
384
+ existing.text = `${value}, ${existing.text}`;
385
+ }
386
+ else {
387
+ segments.push({ speaker: "__META_PARTICIPANTS__", text: value });
388
+ }
389
+ }
390
+ }
391
+ // Also handle **Key:** Value format in details section
392
+ const boldMeta = trimmed.match(/^\*\*([^*]+?):?\*\*:?\s*(.+)$/);
393
+ if (boldMeta) {
394
+ const key = boldMeta[1].toLowerCase().trim();
395
+ const value = boldMeta[2].trim();
396
+ if (key.includes("participant") || key.includes("participante")) {
397
+ segments.push({ speaker: "__META_PARTICIPANTS__", text: value });
398
+ }
399
+ }
400
+ continue;
401
+ }
402
+ // Main Topics — numbered or bulleted list
403
+ if (currentSection === "topics") {
404
+ const bulletMatch = trimmed.match(/^[-*]\s+(.+)$/);
405
+ const numberedMatch = trimmed.match(/^\d+\.\s+(.+)$/);
406
+ if (bulletMatch) {
407
+ segments.push({ speaker: "__TOPIC__", text: bulletMatch[1].trim() });
408
+ }
409
+ else if (numberedMatch) {
410
+ segments.push({ speaker: "__TOPIC__", text: numberedMatch[1].trim() });
411
+ }
412
+ else if (trimmed.length > 5) {
413
+ segments.push({ speaker: "__TOPIC__", text: trimmed });
414
+ }
415
+ continue;
416
+ }
417
+ // Complete Transcription — speaker-attributed text
418
+ if (currentSection === "transcription") {
419
+ // **Speaker Name:** text (bold speaker — colon may be inside or outside **)
420
+ const boldSpeaker = trimmed.match(/^\*\*([^*]+?):?\*\*:?\s*(.+)$/);
421
+ // Speaker Name: text (plain)
422
+ const plainSpeaker = trimmed.match(/^([A-Z][a-zA-ZÀ-ú\s.]+?):\s*(.+)$/);
423
+ // [HH:MM:SS] **Speaker:** text (with timestamp)
424
+ const timestampSpeaker = trimmed.match(/^\[?(\d{1,2}:\d{2}(?::\d{2})?)\]?\s*\*?\*?([^*:]+)\*?\*?:?\s*(.+)$/);
425
+ if (timestampSpeaker) {
426
+ segments.push({
427
+ speaker: timestampSpeaker[2].trim(),
428
+ text: timestampSpeaker[3].trim(),
429
+ timestamp: timestampSpeaker[1],
430
+ });
431
+ }
432
+ else if (boldSpeaker) {
433
+ segments.push({ speaker: boldSpeaker[1].trim(), text: boldSpeaker[2].trim() });
434
+ }
435
+ else if (plainSpeaker) {
436
+ segments.push({ speaker: plainSpeaker[1].trim(), text: plainSpeaker[2].trim() });
437
+ }
438
+ else if (trimmed.length > 5) {
439
+ segments.push({ speaker: "", text: trimmed });
440
+ }
441
+ continue;
442
+ }
443
+ // Action Items
444
+ if (currentSection === "actions") {
445
+ // - [ ] Owner: task description
446
+ // - [ ] task description
447
+ // - task description
448
+ const checkboxMatch = trimmed.match(/^[-*]\s+\[.\]\s*(.+)$/);
449
+ const bulletMatch = trimmed.match(/^[-*]\s+(.+)$/);
450
+ const numberedMatch = trimmed.match(/^\d+\.\s+(.+)$/);
451
+ if (checkboxMatch) {
452
+ segments.push({ speaker: "__ACTION__", text: checkboxMatch[1].trim() });
453
+ }
454
+ else if (bulletMatch) {
455
+ segments.push({ speaker: "__ACTION__", text: bulletMatch[1].trim() });
456
+ }
457
+ else if (numberedMatch) {
458
+ segments.push({ speaker: "__ACTION__", text: numberedMatch[1].trim() });
459
+ }
460
+ continue;
461
+ }
462
+ // References section
463
+ if (currentSection === "references") {
464
+ if (trimmed.length > 5 && !trimmed.startsWith("|")) {
465
+ segments.push({ speaker: "__REFERENCE__", text: trimmed });
466
+ }
467
+ continue;
468
+ }
469
+ // Pre-section content (between H1 and first H2)
470
+ if (currentSection === "pre") {
471
+ const boldMeta = trimmed.match(/^\*\*([^*]+?):?\*\*:?\s*(.+)$/);
472
+ if (boldMeta) {
473
+ const key = boldMeta[1].toLowerCase().trim();
474
+ const value = boldMeta[2].trim();
475
+ if (key.includes("date") || key.includes("data")) {
476
+ segments.push({ speaker: "__META_DATE__", text: value });
477
+ }
478
+ else if (key.includes("participant") || key.includes("participante")) {
479
+ segments.push({ speaker: "__META_PARTICIPANTS__", text: value });
480
+ }
481
+ }
482
+ continue;
483
+ }
484
+ // Any other section — include as general content
485
+ const boldMatch = trimmed.match(/^\*\*([^*]+?):?\*\*:?\s*(.+)$/);
486
+ const bulletMatch = trimmed.match(/^[-*]\s+(.+)$/);
487
+ if (boldMatch) {
488
+ segments.push({ speaker: boldMatch[1].trim(), text: boldMatch[2].trim() });
489
+ }
490
+ else if (bulletMatch) {
491
+ segments.push({ speaker: "", text: bulletMatch[1].trim() });
492
+ }
493
+ else if (trimmed.length > 5) {
494
+ segments.push({ speaker: "", text: trimmed });
495
+ }
496
+ }
497
+ return segments;
498
+ }
499
+ /**
500
+ * Simple YAML frontmatter parser — extracts key: "value" pairs.
501
+ * Handles quoted strings, arrays (as comma-separated), and bare values.
502
+ */
503
+ parseSimpleYaml(yaml) {
504
+ const fields = {};
505
+ const lines = yaml.split("\n");
506
+ for (const line of lines) {
507
+ const trimmed = line.trim();
508
+ if (!trimmed || trimmed.startsWith("#"))
509
+ continue;
510
+ // key: "value" or key: value or key: [array]
511
+ const match = trimmed.match(/^(\w[\w-]*)\s*:\s*(.+)$/);
512
+ if (match) {
513
+ const key = match[1].trim();
514
+ let value = match[2].trim();
515
+ // Remove quotes
516
+ value = value.replace(/^["']|["']$/g, "");
517
+ // Handle YAML arrays: ["a", "b"] → "a, b"
518
+ if (value.startsWith("[") && value.endsWith("]")) {
519
+ value = value
520
+ .slice(1, -1)
521
+ .split(",")
522
+ .map((v) => v.trim().replace(/^["']|["']$/g, ""))
523
+ .join(", ");
524
+ }
525
+ fields[key] = value;
526
+ }
527
+ }
528
+ return fields;
529
+ }
530
+ parsePlainText(content) {
531
+ const segments = [];
532
+ const lines = content.split("\n");
533
+ for (const line of lines) {
534
+ const trimmed = line.trim();
535
+ if (!trimmed)
536
+ continue;
537
+ const colonMatch = trimmed.match(/^([A-Z][a-zA-ZÀ-ú\s.]+?):\s*(.+)$/);
538
+ if (colonMatch) {
539
+ segments.push({ speaker: colonMatch[1].trim(), text: colonMatch[2].trim() });
540
+ }
541
+ else {
542
+ segments.push({ speaker: "", text: trimmed });
543
+ }
544
+ }
545
+ return segments;
546
+ }
547
+ // ─── Extraction Methods ───
548
+ extractParticipants(segments) {
549
+ const speakers = new Set();
550
+ for (const seg of segments) {
551
+ // Power Automate metadata: __META_PARTICIPANTS__ contains comma-separated names
552
+ if (seg.speaker === "__META_PARTICIPANTS__") {
553
+ for (const name of seg.text.split(/[,;]+/)) {
554
+ const trimmed = name.trim();
555
+ if (trimmed)
556
+ speakers.add(trimmed);
557
+ }
558
+ continue;
559
+ }
560
+ // Regular speakers (skip meta markers, strip trailing colon)
561
+ if (seg.speaker && !seg.speaker.startsWith("__")) {
562
+ speakers.add(seg.speaker.replace(/:$/, "").trim());
563
+ }
564
+ }
565
+ return [...speakers].sort();
566
+ }
567
+ /** Filter out internal meta segments used for Power Automate format parsing */
568
+ contentSegments(segments) {
569
+ return segments.filter((s) => !s.speaker?.startsWith("__"));
570
+ }
571
+ extractTopics(segments) {
572
+ const topics = [];
573
+ const topicKeywords = new Map();
574
+ const realSegments = this.contentSegments(segments);
575
+ // If explicit topics were parsed from "Main Topics" section, use those first
576
+ const explicitTopics = segments.filter((s) => s.speaker === "__TOPIC__");
577
+ if (explicitTopics.length > 0) {
578
+ for (const topic of explicitTopics) {
579
+ topics.push({
580
+ name: topic.text.slice(0, 80),
581
+ summary: topic.text,
582
+ speakers: [],
583
+ key_points: [topic.text],
584
+ });
585
+ }
586
+ // Still scan content segments for additional detail per topic
587
+ return topics;
588
+ }
589
+ // Include executive summary as context for topic extraction
590
+ const summarySegments = segments.filter((s) => s.speaker === "__SUMMARY__");
591
+ if (summarySegments.length > 0) {
592
+ for (const summary of summarySegments) {
593
+ realSegments.unshift({ speaker: "", text: summary.text });
594
+ }
595
+ }
596
+ // Group segments into topic clusters based on content similarity
597
+ let currentTopic = "General Discussion";
598
+ let currentSegments = [];
599
+ for (const seg of realSegments) {
600
+ const text = seg.text.toLowerCase();
601
+ // Detect topic transitions
602
+ const topicSignals = [
603
+ { pattern: /\b(login|auth|authentica|sso|oauth|azure ad|entra)\b/i, topic: "Authentication & Authorization" },
604
+ { pattern: /\b(api|endpoint|rest|graphql|grpc|webhook)\b/i, topic: "API Design" },
605
+ { pattern: /\b(database|db|sql|postgres|mongo|cosmos|storage)\b/i, topic: "Data Storage" },
606
+ { pattern: /\b(deploy|ci\/cd|pipeline|github actions|azure devops|kubernetes|docker|container)\b/i, topic: "Deployment & Infrastructure" },
607
+ { pattern: /\b(security|encrypt|ssl|tls|compliance|gdpr|lgpd|hipaa)\b/i, topic: "Security & Compliance" },
608
+ { pattern: /\b(performance|latenc|speed|cache|redis|cdn|scale|concurrent)\b/i, topic: "Performance & Scalability" },
609
+ { pattern: /\b(ui|ux|frontend|react|angular|vue|design|layout|component)\b/i, topic: "User Interface" },
610
+ { pattern: /\b(test|testing|unit test|integration|e2e|qa|quality)\b/i, topic: "Testing & Quality" },
611
+ { pattern: /\b(monitor|observ|log|metric|alert|grafana|datadog|app insights)\b/i, topic: "Monitoring & Observability" },
612
+ { pattern: /\b(user|persona|stakeholder|customer|role|permission)\b/i, topic: "Users & Personas" },
613
+ { pattern: /\b(budget|cost|pricing|timeline|deadline|sprint|milestone)\b/i, topic: "Project Constraints" },
614
+ { pattern: /\b(integration|third.party|external|partner|vendor)\b/i, topic: "Integrations" },
615
+ { pattern: /\b(notification|email|sms|push|alert|message)\b/i, topic: "Notifications" },
616
+ { pattern: /\b(report|dashboard|analytics|chart|graph|insight)\b/i, topic: "Reporting & Analytics" },
617
+ ];
618
+ let detected = false;
619
+ for (const signal of topicSignals) {
620
+ if (signal.pattern.test(seg.text)) {
621
+ if (signal.topic !== currentTopic) {
622
+ if (currentSegments.length > 0) {
623
+ topicKeywords.set(currentTopic, [...currentSegments]);
624
+ }
625
+ currentTopic = signal.topic;
626
+ currentSegments = [];
627
+ }
628
+ detected = true;
629
+ break;
630
+ }
631
+ }
632
+ currentSegments.push(seg);
633
+ }
634
+ if (currentSegments.length > 0) {
635
+ topicKeywords.set(currentTopic, [...currentSegments]);
636
+ }
637
+ // Build topic objects
638
+ for (const [name, segs] of topicKeywords) {
639
+ const speakers = [...new Set(segs.filter((s) => s.speaker).map((s) => s.speaker))];
640
+ const keyPoints = segs
641
+ .map((s) => s.text)
642
+ .filter((t) => t.length > 20)
643
+ .slice(0, 5);
644
+ topics.push({
645
+ name,
646
+ summary: keyPoints.slice(0, 2).join(" ").slice(0, 300),
647
+ speakers,
648
+ key_points: keyPoints,
649
+ });
650
+ }
651
+ return topics;
652
+ }
653
+ extractDecisions(segments) {
654
+ const realSegments = this.contentSegments(segments);
655
+ const decisions = [];
656
+ const decisionPatterns = [
657
+ /\b(decid|decided|decision|let's go with|we'll use|vamos com|decidimos|ficou decidido|a decisão é|definimos que)\b/i,
658
+ /\b(agreed|agreement|consensus|concordamos|aprovado|approved)\b/i,
659
+ /\b(will be|vai ser|será|chosen|escolhido|selected|selecionado)\b/i,
660
+ ];
661
+ for (const seg of segments) {
662
+ for (const pattern of decisionPatterns) {
663
+ if (pattern.test(seg.text)) {
664
+ const clean = seg.speaker
665
+ ? `[${seg.speaker}] ${seg.text}`
666
+ : seg.text;
667
+ decisions.push(clean);
668
+ break;
669
+ }
670
+ }
671
+ }
672
+ return [...new Set(decisions)];
673
+ }
674
+ extractActionItems(segments) {
675
+ const actions = [];
676
+ // First: collect explicit action items from Power Automate format
677
+ for (const seg of segments) {
678
+ if (seg.speaker === "__ACTION__") {
679
+ actions.push(seg.text);
680
+ }
681
+ }
682
+ // Then: detect action items from regular speech
683
+ const actionPatterns = [
684
+ /\b(action item|todo|to.do|task|precisa|needs? to|should|must|vai fazer|tem que|have to|assigned to)\b/i,
685
+ /\b(follow.up|next step|próximo passo|ação|responsável|owner)\b/i,
686
+ /\b(deadline|prazo|until|até|by (monday|tuesday|wednesday|thursday|friday|next week))\b/i,
687
+ ];
688
+ for (const seg of segments) {
689
+ if (seg.speaker?.startsWith("__"))
690
+ continue; // Skip meta segments
691
+ for (const pattern of actionPatterns) {
692
+ if (pattern.test(seg.text)) {
693
+ const clean = seg.speaker
694
+ ? `[${seg.speaker}] ${seg.text}`
695
+ : seg.text;
696
+ actions.push(clean);
697
+ break;
698
+ }
699
+ }
700
+ }
701
+ return [...new Set(actions)];
702
+ }
703
+ extractRequirements(segments) {
704
+ const reqs = [];
705
+ const reqPatterns = [
706
+ /\b(must|should|shall|needs? to|has to|required|requirement|precisa|deve|necessário|obrigatório)\b/i,
707
+ /\b(the system|the app|the platform|the server|the api|o sistema|a aplicação|a plataforma)\b/i,
708
+ /\b(feature|functionality|capability|funcionalidade|recurso|capacidade)\b/i,
709
+ /\b(support|suportar|handle|tratar|manage|gerenciar|enable|habilitar|allow|permitir)\b/i,
710
+ ];
711
+ for (const seg of this.contentSegments(segments)) {
712
+ let matchCount = 0;
713
+ for (const pattern of reqPatterns) {
714
+ if (pattern.test(seg.text)) {
715
+ matchCount++;
716
+ }
717
+ }
718
+ // Require at least 1 pattern match and minimum text length
719
+ if (matchCount >= 1 && seg.text.length > 15) {
720
+ reqs.push(seg.text);
721
+ }
722
+ }
723
+ return [...new Set(reqs)];
724
+ }
725
+ extractConstraints(segments) {
726
+ const constraints = [];
727
+ const constraintPatterns = [
728
+ /\b(constraint|limitation|restrict|budget|timeline|deadline|cannot|can't|won't|não pode|restrição|limitação|prazo|orçamento)\b/i,
729
+ /\b(must use|has to be|needs to be|only|mandator|obrigatório|somente|apenas)\b/i,
730
+ /\b(compliance|regulation|policy|lei|regulamento|norma|lgpd|gdpr|hipaa|sox|pci)\b/i,
731
+ ];
732
+ for (const seg of this.contentSegments(segments)) {
733
+ for (const pattern of constraintPatterns) {
734
+ if (pattern.test(seg.text) && seg.text.length > 15) {
735
+ constraints.push(seg.text);
736
+ break;
737
+ }
738
+ }
739
+ }
740
+ return [...new Set(constraints)];
741
+ }
742
+ extractQuestions(segments) {
743
+ const questions = [];
744
+ for (const seg of this.contentSegments(segments)) {
745
+ // Detect questions by "?" or question-word patterns
746
+ if (seg.text.includes("?") ||
747
+ /^(how|what|when|where|why|who|which|como|qual|quando|onde|por que|quem)\b/i.test(seg.text)) {
748
+ const clean = seg.speaker
749
+ ? `[${seg.speaker}] ${seg.text}`
750
+ : seg.text;
751
+ questions.push(clean);
752
+ }
753
+ }
754
+ return [...new Set(questions)].slice(0, 15);
755
+ }
756
+ estimateDuration(segments) {
757
+ // Try to calculate from timestamps
758
+ if (segments.length >= 2) {
759
+ const first = segments[0].timestamp;
760
+ const last = segments[segments.length - 1].timestamp;
761
+ if (first && last) {
762
+ const start = this.parseTimestamp(first);
763
+ const end = this.parseTimestamp(last);
764
+ if (start !== null && end !== null) {
765
+ const diffSec = end - start;
766
+ const mins = Math.round(diffSec / 60);
767
+ if (mins > 0) {
768
+ return `~${mins} minutes`;
769
+ }
770
+ }
771
+ }
772
+ }
773
+ // Estimate from text volume (~150 words per minute speaking)
774
+ const totalWords = segments.reduce((sum, s) => sum + s.text.split(/\s+/).length, 0);
775
+ const estimatedMins = Math.max(1, Math.round(totalWords / 150));
776
+ return `~${estimatedMins} minutes (estimated from text volume)`;
777
+ }
778
+ parseTimestamp(ts) {
779
+ // Handles "HH:MM:SS.mmm" or "MM:SS.mmm" or "HH:MM:SS.mmm --> ..."
780
+ const clean = ts.split("-->")[0].trim();
781
+ const parts = clean.split(/[:.]/);
782
+ if (parts.length >= 3) {
783
+ if (parts.length === 4) {
784
+ // HH:MM:SS.mmm
785
+ return (parseInt(parts[0], 10) * 3600 +
786
+ parseInt(parts[1], 10) * 60 +
787
+ parseInt(parts[2], 10));
788
+ }
789
+ // MM:SS.mmm
790
+ return parseInt(parts[0], 10) * 60 + parseInt(parts[1], 10);
791
+ }
792
+ return null;
793
+ }
794
+ detectFormat(filePath, content) {
795
+ const lower = filePath.toLowerCase();
796
+ if (lower.endsWith(".vtt"))
797
+ return "vtt";
798
+ if (lower.endsWith(".srt"))
799
+ return "srt";
800
+ if (lower.endsWith(".md"))
801
+ return "md";
802
+ // Auto-detect from content
803
+ if (content.trimStart().startsWith("WEBVTT"))
804
+ return "vtt";
805
+ if (/^\d+\n\d{2}:\d{2}:\d{2},\d{3}\s*-->\s*\d{2}:\d{2}:\d{2},\d{3}/m.test(content))
806
+ return "srt";
807
+ return "txt";
808
+ }
809
+ }
810
+ //# sourceMappingURL=transcript-parser.js.map