wogiflow 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.workflow/agents/reviewer.md +81 -0
- package/.workflow/agents/security.md +94 -0
- package/.workflow/agents/story-writer.md +58 -0
- package/.workflow/bridges/base-bridge.js +395 -0
- package/.workflow/bridges/claude-bridge.js +434 -0
- package/.workflow/bridges/index.js +130 -0
- package/.workflow/lib/assumption-detector.js +481 -0
- package/.workflow/lib/config-substitution.js +371 -0
- package/.workflow/lib/failure-categories.js +478 -0
- package/.workflow/state/app-map.md.template +15 -0
- package/.workflow/state/architecture.md.template +24 -0
- package/.workflow/state/component-index.json.template +5 -0
- package/.workflow/state/decisions.md.template +15 -0
- package/.workflow/state/feedback-patterns.md.template +9 -0
- package/.workflow/state/knowledge-sync.json.template +6 -0
- package/.workflow/state/progress.md.template +14 -0
- package/.workflow/state/ready.json.template +7 -0
- package/.workflow/state/request-log.md.template +14 -0
- package/.workflow/state/session-state.json.template +11 -0
- package/.workflow/state/stack.md.template +33 -0
- package/.workflow/state/testing.md.template +36 -0
- package/.workflow/templates/claude-md.hbs +257 -0
- package/.workflow/templates/correction-report.md +67 -0
- package/.workflow/templates/gemini-md.hbs +52 -0
- package/README.md +1802 -0
- package/bin/flow +205 -0
- package/lib/index.js +33 -0
- package/lib/installer.js +467 -0
- package/lib/release-channel.js +269 -0
- package/lib/skill-registry.js +526 -0
- package/lib/upgrader.js +401 -0
- package/lib/utils.js +305 -0
- package/package.json +64 -0
- package/scripts/flow +985 -0
- package/scripts/flow-adaptive-learning.js +1259 -0
- package/scripts/flow-aggregate.js +488 -0
- package/scripts/flow-archive +133 -0
- package/scripts/flow-auto-context.js +1015 -0
- package/scripts/flow-auto-learn.js +615 -0
- package/scripts/flow-bridge.js +223 -0
- package/scripts/flow-browser-suggest.js +316 -0
- package/scripts/flow-bug.js +247 -0
- package/scripts/flow-cascade.js +711 -0
- package/scripts/flow-changelog +85 -0
- package/scripts/flow-checkpoint.js +483 -0
- package/scripts/flow-cli.js +403 -0
- package/scripts/flow-code-intelligence.js +760 -0
- package/scripts/flow-complexity.js +502 -0
- package/scripts/flow-config-set.js +152 -0
- package/scripts/flow-constants.js +157 -0
- package/scripts/flow-context +152 -0
- package/scripts/flow-context-init.js +482 -0
- package/scripts/flow-context-monitor.js +384 -0
- package/scripts/flow-context-scoring.js +886 -0
- package/scripts/flow-correct.js +458 -0
- package/scripts/flow-damage-control.js +985 -0
- package/scripts/flow-deps +101 -0
- package/scripts/flow-diff.js +700 -0
- package/scripts/flow-done +151 -0
- package/scripts/flow-done.js +489 -0
- package/scripts/flow-durable-session.js +1541 -0
- package/scripts/flow-entropy-monitor.js +345 -0
- package/scripts/flow-export-profile +349 -0
- package/scripts/flow-export-scanner.js +1046 -0
- package/scripts/flow-figma-confirm.js +400 -0
- package/scripts/flow-figma-extract.js +496 -0
- package/scripts/flow-figma-generate.js +683 -0
- package/scripts/flow-figma-index.js +909 -0
- package/scripts/flow-figma-match.js +617 -0
- package/scripts/flow-figma-mcp-server.js +518 -0
- package/scripts/flow-figma-pipeline.js +414 -0
- package/scripts/flow-file-ops.js +301 -0
- package/scripts/flow-gate-confidence.js +825 -0
- package/scripts/flow-guided-edit.js +659 -0
- package/scripts/flow-health +185 -0
- package/scripts/flow-health.js +413 -0
- package/scripts/flow-hooks.js +556 -0
- package/scripts/flow-http-client.js +249 -0
- package/scripts/flow-hybrid-detect.js +167 -0
- package/scripts/flow-hybrid-interactive.js +591 -0
- package/scripts/flow-hybrid-test.js +152 -0
- package/scripts/flow-import-profile +439 -0
- package/scripts/flow-init +253 -0
- package/scripts/flow-instruction-richness.js +827 -0
- package/scripts/flow-jira-integration.js +579 -0
- package/scripts/flow-knowledge-router.js +522 -0
- package/scripts/flow-knowledge-sync.js +589 -0
- package/scripts/flow-linear-integration.js +631 -0
- package/scripts/flow-links.js +774 -0
- package/scripts/flow-log-manager.js +559 -0
- package/scripts/flow-loop-enforcer.js +1246 -0
- package/scripts/flow-loop-retry-learning.js +630 -0
- package/scripts/flow-lsp.js +923 -0
- package/scripts/flow-map-index +348 -0
- package/scripts/flow-map-sync +201 -0
- package/scripts/flow-memory-blocks.js +668 -0
- package/scripts/flow-memory-compactor.js +350 -0
- package/scripts/flow-memory-db.js +1110 -0
- package/scripts/flow-memory-sync.js +484 -0
- package/scripts/flow-metrics.js +353 -0
- package/scripts/flow-migrate-ids.js +370 -0
- package/scripts/flow-model-adapter.js +802 -0
- package/scripts/flow-model-router.js +884 -0
- package/scripts/flow-models.js +1231 -0
- package/scripts/flow-morning.js +517 -0
- package/scripts/flow-multi-approach.js +660 -0
- package/scripts/flow-new-feature +86 -0
- package/scripts/flow-onboard +1042 -0
- package/scripts/flow-orchestrate-llm.js +459 -0
- package/scripts/flow-orchestrate.js +3592 -0
- package/scripts/flow-output.js +123 -0
- package/scripts/flow-parallel-detector.js +399 -0
- package/scripts/flow-parallel-dispatch.js +987 -0
- package/scripts/flow-parallel.js +428 -0
- package/scripts/flow-pattern-enforcer.js +600 -0
- package/scripts/flow-prd-manager.js +282 -0
- package/scripts/flow-progress.js +323 -0
- package/scripts/flow-project-analyzer.js +975 -0
- package/scripts/flow-prompt-composer.js +487 -0
- package/scripts/flow-providers.js +1381 -0
- package/scripts/flow-queue.js +308 -0
- package/scripts/flow-ready +82 -0
- package/scripts/flow-ready.js +189 -0
- package/scripts/flow-regression.js +396 -0
- package/scripts/flow-response-parser.js +450 -0
- package/scripts/flow-resume.js +284 -0
- package/scripts/flow-rules-sync.js +439 -0
- package/scripts/flow-run-trace.js +718 -0
- package/scripts/flow-safety.js +587 -0
- package/scripts/flow-search +104 -0
- package/scripts/flow-security.js +481 -0
- package/scripts/flow-session-end +106 -0
- package/scripts/flow-session-end.js +437 -0
- package/scripts/flow-session-state.js +671 -0
- package/scripts/flow-setup-hooks +216 -0
- package/scripts/flow-setup-hooks.js +377 -0
- package/scripts/flow-skill-create.js +329 -0
- package/scripts/flow-skill-creator.js +572 -0
- package/scripts/flow-skill-generator.js +1046 -0
- package/scripts/flow-skill-learn.js +880 -0
- package/scripts/flow-skill-matcher.js +578 -0
- package/scripts/flow-spec-generator.js +820 -0
- package/scripts/flow-stack-wizard.js +895 -0
- package/scripts/flow-standup +162 -0
- package/scripts/flow-start +74 -0
- package/scripts/flow-start.js +235 -0
- package/scripts/flow-status +110 -0
- package/scripts/flow-status.js +301 -0
- package/scripts/flow-step-browser.js +83 -0
- package/scripts/flow-step-changelog.js +217 -0
- package/scripts/flow-step-comments.js +306 -0
- package/scripts/flow-step-complexity.js +234 -0
- package/scripts/flow-step-coverage.js +218 -0
- package/scripts/flow-step-knowledge.js +193 -0
- package/scripts/flow-step-pr-tests.js +364 -0
- package/scripts/flow-step-regression.js +89 -0
- package/scripts/flow-step-review.js +516 -0
- package/scripts/flow-step-security.js +162 -0
- package/scripts/flow-step-silent-failures.js +290 -0
- package/scripts/flow-step-simplifier.js +346 -0
- package/scripts/flow-story +105 -0
- package/scripts/flow-story.js +500 -0
- package/scripts/flow-suspend.js +252 -0
- package/scripts/flow-sync-daemon.js +654 -0
- package/scripts/flow-task-analyzer.js +606 -0
- package/scripts/flow-team-dashboard.js +748 -0
- package/scripts/flow-team-sync.js +752 -0
- package/scripts/flow-team.js +977 -0
- package/scripts/flow-tech-options.js +528 -0
- package/scripts/flow-templates.js +812 -0
- package/scripts/flow-tiered-learning.js +728 -0
- package/scripts/flow-trace +204 -0
- package/scripts/flow-transcript-chunking.js +1106 -0
- package/scripts/flow-transcript-digest.js +7918 -0
- package/scripts/flow-transcript-language.js +465 -0
- package/scripts/flow-transcript-parsing.js +1085 -0
- package/scripts/flow-transcript-stories.js +2194 -0
- package/scripts/flow-update-map +224 -0
- package/scripts/flow-utils.js +2242 -0
- package/scripts/flow-verification.js +644 -0
- package/scripts/flow-verify.js +1177 -0
- package/scripts/flow-voice-input.js +638 -0
- package/scripts/flow-watch +168 -0
- package/scripts/flow-workflow-steps.js +521 -0
- package/scripts/flow-workflow.js +1029 -0
- package/scripts/flow-worktree.js +489 -0
- package/scripts/hooks/adapters/base-adapter.js +102 -0
- package/scripts/hooks/adapters/claude-code.js +359 -0
- package/scripts/hooks/adapters/index.js +79 -0
- package/scripts/hooks/core/component-check.js +341 -0
- package/scripts/hooks/core/index.js +35 -0
- package/scripts/hooks/core/loop-check.js +241 -0
- package/scripts/hooks/core/session-context.js +294 -0
- package/scripts/hooks/core/task-gate.js +177 -0
- package/scripts/hooks/core/validation.js +230 -0
- package/scripts/hooks/entry/claude-code/post-tool-use.js +65 -0
- package/scripts/hooks/entry/claude-code/pre-tool-use.js +89 -0
- package/scripts/hooks/entry/claude-code/session-end.js +87 -0
- package/scripts/hooks/entry/claude-code/session-start.js +46 -0
- package/scripts/hooks/entry/claude-code/stop.js +43 -0
- package/scripts/postinstall.js +139 -0
- package/templates/browser-test-flow.json +56 -0
- package/templates/bug-report.md +43 -0
- package/templates/component-detail.md +42 -0
- package/templates/component.stories.tsx +49 -0
- package/templates/context/constraints.md +83 -0
- package/templates/context/conventions.md +177 -0
- package/templates/context/stack.md +60 -0
- package/templates/correction-report.md +90 -0
- package/templates/feature-proposal.md +35 -0
- package/templates/hybrid/_base.md +254 -0
- package/templates/hybrid/_patterns.md +45 -0
- package/templates/hybrid/create-component.md +127 -0
- package/templates/hybrid/create-file.md +56 -0
- package/templates/hybrid/create-hook.md +145 -0
- package/templates/hybrid/create-service.md +70 -0
- package/templates/hybrid/fix-bug.md +33 -0
- package/templates/hybrid/modify-file.md +55 -0
- package/templates/story.md +68 -0
- package/templates/task.json +56 -0
- package/templates/trace.md +69 -0
|
@@ -0,0 +1,1085 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Wogi Flow - Transcript Parsing Module
|
|
5
|
+
*
|
|
6
|
+
* Parses various subtitle and meeting formats:
|
|
7
|
+
* - VTT (WebVTT) format
|
|
8
|
+
* - SRT (SubRip) format
|
|
9
|
+
* - Zoom chat and VTT exports
|
|
10
|
+
* - Microsoft Teams chat, VTT, and JSON exports
|
|
11
|
+
*
|
|
12
|
+
* Extracted from flow-transcript-digest.js for modularity.
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
// ==========================================================================
|
|
16
|
+
// E4-S3: VTT/SRT Format Parsing Functions
|
|
17
|
+
// ==========================================================================
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* VTT timestamp patterns
|
|
21
|
+
*/
|
|
22
|
+
const VTT_TIMESTAMP_FULL = /(\d{2}):(\d{2}):(\d{2})\.(\d{3})\s*-->\s*(\d{2}):(\d{2}):(\d{2})\.(\d{3})/;
|
|
23
|
+
const VTT_TIMESTAMP_SHORT = /(\d{2}):(\d{2})\.(\d{3})\s*-->\s*(\d{2}):(\d{2})\.(\d{3})/;
|
|
24
|
+
const VTT_VOICE_TAG = /<v\s+([^>]+)>/;
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* SRT timestamp pattern
|
|
28
|
+
*/
|
|
29
|
+
const SRT_TIMESTAMP = /(\d{2}):(\d{2}):(\d{2}),(\d{3})\s*-->\s*(\d{2}):(\d{2}):(\d{2}),(\d{3})/;
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* Common speaker patterns
|
|
33
|
+
*/
|
|
34
|
+
const SPEAKER_COLON_PATTERN = /^([A-Z][a-zA-Z\s]+):\s*/;
|
|
35
|
+
const SPEAKER_BRACKET_PATTERN = /^\[([^\]]+)\]\s*/;
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Convert timestamp to milliseconds
|
|
39
|
+
*/
|
|
40
|
+
function timestampToMs(hours, minutes, seconds, ms) {
|
|
41
|
+
return (parseInt(hours) * 3600 + parseInt(minutes) * 60 + parseInt(seconds)) * 1000 + parseInt(ms);
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Format milliseconds as timestamp string
|
|
46
|
+
*/
|
|
47
|
+
function msToTimestamp(ms, short = false) {
|
|
48
|
+
const hours = Math.floor(ms / 3600000);
|
|
49
|
+
const minutes = Math.floor((ms % 3600000) / 60000);
|
|
50
|
+
const seconds = Math.floor((ms % 60000) / 1000);
|
|
51
|
+
|
|
52
|
+
if (short && hours === 0) {
|
|
53
|
+
return `${String(minutes).padStart(2, '0')}:${String(seconds).padStart(2, '0')}`;
|
|
54
|
+
}
|
|
55
|
+
return `${String(hours).padStart(2, '0')}:${String(minutes).padStart(2, '0')}:${String(seconds).padStart(2, '0')}`;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Clean VTT/SRT text by removing HTML tags and entities
|
|
60
|
+
*/
|
|
61
|
+
function cleanSubtitleText(text) {
|
|
62
|
+
let cleaned = text
|
|
63
|
+
// Remove HTML tags
|
|
64
|
+
.replace(/<\/?[biuc][^>]*>/gi, '')
|
|
65
|
+
.replace(/<\/?v[^>]*>/gi, '')
|
|
66
|
+
.replace(/<\/?lang[^>]*>/gi, '')
|
|
67
|
+
.replace(/<\/?ruby>/gi, '')
|
|
68
|
+
.replace(/<\/?rt>/gi, '')
|
|
69
|
+
// Decode entities
|
|
70
|
+
.replace(/ /gi, ' ')
|
|
71
|
+
.replace(/&/gi, '&')
|
|
72
|
+
.replace(/</gi, '<')
|
|
73
|
+
.replace(/>/gi, '>')
|
|
74
|
+
.replace(/"/gi, '"')
|
|
75
|
+
.replace(/'/gi, "'")
|
|
76
|
+
// Normalize whitespace
|
|
77
|
+
.replace(/\s+/g, ' ')
|
|
78
|
+
.trim();
|
|
79
|
+
|
|
80
|
+
return cleaned;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* Extract speaker from VTT voice tag
|
|
85
|
+
*/
|
|
86
|
+
function extractVTTSpeaker(line) {
|
|
87
|
+
const voiceMatch = line.match(VTT_VOICE_TAG);
|
|
88
|
+
if (voiceMatch) {
|
|
89
|
+
const speaker = voiceMatch[1].trim();
|
|
90
|
+
const text = line.replace(VTT_VOICE_TAG, '').trim();
|
|
91
|
+
return { speaker, text };
|
|
92
|
+
}
|
|
93
|
+
return { speaker: null, text: line };
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
* Extract speaker from common patterns
|
|
98
|
+
*/
|
|
99
|
+
function extractSpeaker(text) {
|
|
100
|
+
// Check colon pattern: "Speaker Name: text"
|
|
101
|
+
const colonMatch = text.match(SPEAKER_COLON_PATTERN);
|
|
102
|
+
if (colonMatch) {
|
|
103
|
+
return {
|
|
104
|
+
speaker: colonMatch[1].trim(),
|
|
105
|
+
text: text.substring(colonMatch[0].length).trim()
|
|
106
|
+
};
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
// Check bracket pattern: "[Speaker Name] text"
|
|
110
|
+
const bracketMatch = text.match(SPEAKER_BRACKET_PATTERN);
|
|
111
|
+
if (bracketMatch) {
|
|
112
|
+
return {
|
|
113
|
+
speaker: bracketMatch[1].trim(),
|
|
114
|
+
text: text.substring(bracketMatch[0].length).trim()
|
|
115
|
+
};
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
return { speaker: null, text };
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
/**
|
|
122
|
+
* Parse VTT file content
|
|
123
|
+
* @param {string} content - VTT file content
|
|
124
|
+
* @returns {{ metadata: object, cues: array, format: string, error?: string, partial?: boolean }}
|
|
125
|
+
*/
|
|
126
|
+
function parseVTT(content) {
|
|
127
|
+
// Input validation
|
|
128
|
+
if (!content || typeof content !== 'string') {
|
|
129
|
+
return { metadata: {}, cues: [], format: 'vtt', error: 'Invalid input: content must be a non-empty string' };
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
const lines = content.split('\n');
|
|
133
|
+
const cues = [];
|
|
134
|
+
let metadata = {};
|
|
135
|
+
let currentCue = null;
|
|
136
|
+
let inCue = false;
|
|
137
|
+
let cueIndex = 0;
|
|
138
|
+
let parseErrors = [];
|
|
139
|
+
|
|
140
|
+
try {
|
|
141
|
+
for (let i = 0; i < lines.length; i++) {
|
|
142
|
+
const line = lines[i].trim();
|
|
143
|
+
|
|
144
|
+
// Skip WEBVTT header
|
|
145
|
+
if (line.startsWith('WEBVTT')) {
|
|
146
|
+
continue;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
// Parse metadata (Kind, Language, etc.)
|
|
150
|
+
if (line.includes(':') && !inCue && !line.match(VTT_TIMESTAMP_FULL) && !line.match(VTT_TIMESTAMP_SHORT)) {
|
|
151
|
+
const [key, ...valueParts] = line.split(':');
|
|
152
|
+
if (key.match(/^[A-Za-z]+$/)) {
|
|
153
|
+
metadata[key.trim().toLowerCase()] = valueParts.join(':').trim();
|
|
154
|
+
continue;
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
// Skip NOTE comments
|
|
159
|
+
if (line.startsWith('NOTE')) {
|
|
160
|
+
// Skip until empty line
|
|
161
|
+
while (i < lines.length && lines[i].trim() !== '') {
|
|
162
|
+
i++;
|
|
163
|
+
}
|
|
164
|
+
continue;
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
// Skip empty lines
|
|
168
|
+
if (line === '') {
|
|
169
|
+
if (currentCue && currentCue.textLines.length > 0) {
|
|
170
|
+
cues.push(finalizeCue(currentCue));
|
|
171
|
+
currentCue = null;
|
|
172
|
+
inCue = false;
|
|
173
|
+
}
|
|
174
|
+
continue;
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
// Check for timestamp line
|
|
178
|
+
const fullMatch = line.match(VTT_TIMESTAMP_FULL);
|
|
179
|
+
const shortMatch = line.match(VTT_TIMESTAMP_SHORT);
|
|
180
|
+
|
|
181
|
+
if (fullMatch || shortMatch) {
|
|
182
|
+
if (currentCue && currentCue.textLines.length > 0) {
|
|
183
|
+
cues.push(finalizeCue(currentCue));
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
cueIndex++;
|
|
187
|
+
if (fullMatch) {
|
|
188
|
+
currentCue = {
|
|
189
|
+
index: cueIndex,
|
|
190
|
+
startMs: timestampToMs(fullMatch[1], fullMatch[2], fullMatch[3], fullMatch[4]),
|
|
191
|
+
endMs: timestampToMs(fullMatch[5], fullMatch[6], fullMatch[7], fullMatch[8]),
|
|
192
|
+
settings: line.substring(fullMatch[0].length).trim(),
|
|
193
|
+
textLines: [],
|
|
194
|
+
rawLines: []
|
|
195
|
+
};
|
|
196
|
+
} else {
|
|
197
|
+
currentCue = {
|
|
198
|
+
index: cueIndex,
|
|
199
|
+
startMs: timestampToMs(0, shortMatch[1], shortMatch[2], shortMatch[3]),
|
|
200
|
+
endMs: timestampToMs(0, shortMatch[4], shortMatch[5], shortMatch[6]),
|
|
201
|
+
settings: line.substring(shortMatch[0].length).trim(),
|
|
202
|
+
textLines: [],
|
|
203
|
+
rawLines: []
|
|
204
|
+
};
|
|
205
|
+
}
|
|
206
|
+
inCue = true;
|
|
207
|
+
continue;
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
// Text content
|
|
211
|
+
if (inCue && currentCue) {
|
|
212
|
+
currentCue.rawLines.push(line);
|
|
213
|
+
const { speaker, text } = extractVTTSpeaker(line);
|
|
214
|
+
if (speaker && !currentCue.speaker) {
|
|
215
|
+
currentCue.speaker = speaker;
|
|
216
|
+
}
|
|
217
|
+
currentCue.textLines.push(cleanSubtitleText(text));
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
// Don't forget last cue
|
|
222
|
+
if (currentCue && currentCue.textLines.length > 0) {
|
|
223
|
+
cues.push(finalizeCue(currentCue));
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
// Return results with any parse errors noted
|
|
227
|
+
const result = { metadata, cues, format: 'vtt' };
|
|
228
|
+
if (parseErrors.length > 0) {
|
|
229
|
+
result.parseErrors = parseErrors;
|
|
230
|
+
result.partial = true;
|
|
231
|
+
}
|
|
232
|
+
return result;
|
|
233
|
+
|
|
234
|
+
} catch (err) {
|
|
235
|
+
// Return partial results on error
|
|
236
|
+
return {
|
|
237
|
+
metadata,
|
|
238
|
+
cues,
|
|
239
|
+
format: 'vtt',
|
|
240
|
+
error: `Parse error: ${err.message}`,
|
|
241
|
+
partial: cues.length > 0
|
|
242
|
+
};
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
/**
|
|
247
|
+
* Parse SRT file content
|
|
248
|
+
* @param {string} content - SRT file content
|
|
249
|
+
* @returns {{ metadata: object, cues: array, format: string, error?: string, partial?: boolean }}
|
|
250
|
+
*/
|
|
251
|
+
function parseSRT(content) {
|
|
252
|
+
// Input validation
|
|
253
|
+
if (!content || typeof content !== 'string') {
|
|
254
|
+
return { metadata: {}, cues: [], format: 'srt', error: 'Invalid input: content must be a non-empty string' };
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
const lines = content.split('\n');
|
|
258
|
+
const cues = [];
|
|
259
|
+
let currentCue = null;
|
|
260
|
+
let expectingTimestamp = false;
|
|
261
|
+
|
|
262
|
+
try {
|
|
263
|
+
for (let i = 0; i < lines.length; i++) {
|
|
264
|
+
const line = lines[i].trim();
|
|
265
|
+
|
|
266
|
+
// Skip empty lines
|
|
267
|
+
if (line === '') {
|
|
268
|
+
if (currentCue && currentCue.textLines.length > 0) {
|
|
269
|
+
cues.push(finalizeCue(currentCue));
|
|
270
|
+
currentCue = null;
|
|
271
|
+
expectingTimestamp = false;
|
|
272
|
+
}
|
|
273
|
+
continue;
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
// Check for cue number
|
|
277
|
+
if (/^\d+$/.test(line) && !currentCue) {
|
|
278
|
+
expectingTimestamp = true;
|
|
279
|
+
continue;
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
// Check for timestamp
|
|
283
|
+
const timestampMatch = line.match(SRT_TIMESTAMP);
|
|
284
|
+
if (timestampMatch) {
|
|
285
|
+
if (currentCue && currentCue.textLines.length > 0) {
|
|
286
|
+
cues.push(finalizeCue(currentCue));
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
currentCue = {
|
|
290
|
+
index: cues.length + 1,
|
|
291
|
+
startMs: timestampToMs(timestampMatch[1], timestampMatch[2], timestampMatch[3], timestampMatch[4]),
|
|
292
|
+
endMs: timestampToMs(timestampMatch[5], timestampMatch[6], timestampMatch[7], timestampMatch[8]),
|
|
293
|
+
settings: '',
|
|
294
|
+
textLines: [],
|
|
295
|
+
rawLines: []
|
|
296
|
+
};
|
|
297
|
+
expectingTimestamp = false;
|
|
298
|
+
continue;
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
// Text content
|
|
302
|
+
if (currentCue) {
|
|
303
|
+
currentCue.rawLines.push(line);
|
|
304
|
+
const cleaned = cleanSubtitleText(line);
|
|
305
|
+
const { speaker, text } = extractSpeaker(cleaned);
|
|
306
|
+
if (speaker && !currentCue.speaker) {
|
|
307
|
+
currentCue.speaker = speaker;
|
|
308
|
+
}
|
|
309
|
+
currentCue.textLines.push(text);
|
|
310
|
+
}
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
// Don't forget last cue
|
|
314
|
+
if (currentCue && currentCue.textLines.length > 0) {
|
|
315
|
+
cues.push(finalizeCue(currentCue));
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
return { metadata: {}, cues, format: 'srt' };
|
|
319
|
+
|
|
320
|
+
} catch (err) {
|
|
321
|
+
// Return partial results on error
|
|
322
|
+
return {
|
|
323
|
+
metadata: {},
|
|
324
|
+
cues,
|
|
325
|
+
format: 'srt',
|
|
326
|
+
error: `Parse error: ${err.message}`,
|
|
327
|
+
partial: cues.length > 0
|
|
328
|
+
};
|
|
329
|
+
}
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
/**
|
|
333
|
+
* Finalize a cue with computed properties
|
|
334
|
+
*/
|
|
335
|
+
function finalizeCue(cue) {
|
|
336
|
+
return {
|
|
337
|
+
index: cue.index,
|
|
338
|
+
startTime: msToTimestamp(cue.startMs),
|
|
339
|
+
endTime: msToTimestamp(cue.endMs),
|
|
340
|
+
startMs: cue.startMs,
|
|
341
|
+
endMs: cue.endMs,
|
|
342
|
+
duration: cue.endMs - cue.startMs,
|
|
343
|
+
speaker: cue.speaker || null,
|
|
344
|
+
text: cue.textLines.join(' ').trim(),
|
|
345
|
+
rawText: cue.rawLines.join('\n'),
|
|
346
|
+
settings: cue.settings || ''
|
|
347
|
+
};
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
/**
|
|
351
|
+
* Merge consecutive cues from the same speaker
|
|
352
|
+
*/
|
|
353
|
+
function mergeCues(cues, options = {}) {
|
|
354
|
+
const mergeThreshold = options.mergeThreshold || 2000; // 2 seconds default
|
|
355
|
+
const merged = [];
|
|
356
|
+
let current = null;
|
|
357
|
+
|
|
358
|
+
for (const cue of cues) {
|
|
359
|
+
if (current === null) {
|
|
360
|
+
current = {
|
|
361
|
+
...cue,
|
|
362
|
+
textParts: [cue.text],
|
|
363
|
+
cueCount: 1
|
|
364
|
+
};
|
|
365
|
+
continue;
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
const gap = cue.startMs - current.endMs;
|
|
369
|
+
const sameSpeaker = cue.speaker === current.speaker;
|
|
370
|
+
|
|
371
|
+
if (sameSpeaker && gap < mergeThreshold) {
|
|
372
|
+
current.textParts.push(cue.text);
|
|
373
|
+
current.endMs = cue.endMs;
|
|
374
|
+
current.endTime = cue.endTime;
|
|
375
|
+
current.duration = current.endMs - current.startMs;
|
|
376
|
+
current.cueCount++;
|
|
377
|
+
} else {
|
|
378
|
+
current.text = current.textParts.join(' ');
|
|
379
|
+
delete current.textParts;
|
|
380
|
+
merged.push(current);
|
|
381
|
+
current = {
|
|
382
|
+
...cue,
|
|
383
|
+
textParts: [cue.text],
|
|
384
|
+
cueCount: 1
|
|
385
|
+
};
|
|
386
|
+
}
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
if (current) {
|
|
390
|
+
current.text = current.textParts.join(' ');
|
|
391
|
+
delete current.textParts;
|
|
392
|
+
merged.push(current);
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
return merged;
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
/**
|
|
399
|
+
* Auto-detect and parse subtitle file
|
|
400
|
+
*/
|
|
401
|
+
function parseSubtitle(content) {
|
|
402
|
+
// Check for VTT
|
|
403
|
+
if (content.trim().startsWith('WEBVTT') || isVTTFormat(content).detected) {
|
|
404
|
+
return parseVTT(content);
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
// Check for SRT
|
|
408
|
+
if (isSRTFormat(content).detected) {
|
|
409
|
+
return parseSRT(content);
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
return { error: 'Unable to detect subtitle format', format: 'unknown' };
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
/**
|
|
416
|
+
* Format parsed cues as plain text
|
|
417
|
+
*/
|
|
418
|
+
function formatCuesAsText(cues, options = {}) {
|
|
419
|
+
const lines = [];
|
|
420
|
+
|
|
421
|
+
for (const cue of cues) {
|
|
422
|
+
let line = '';
|
|
423
|
+
|
|
424
|
+
if (options.timestamps || options.withTimestamps) {
|
|
425
|
+
line += `[${msToTimestamp(cue.startMs, true)}] `;
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
if ((options.speakers || options.withSpeakers) && cue.speaker) {
|
|
429
|
+
line += `${cue.speaker}: `;
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
line += cue.text;
|
|
433
|
+
lines.push(line);
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
return lines.join('\n');
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
/**
|
|
440
|
+
* Get subtitle statistics
|
|
441
|
+
*/
|
|
442
|
+
function getSubtitleStats(parsed) {
|
|
443
|
+
if (parsed.error) return null;
|
|
444
|
+
|
|
445
|
+
const cues = parsed.cues;
|
|
446
|
+
const speakers = new Set(cues.filter(c => c.speaker).map(c => c.speaker));
|
|
447
|
+
const totalDuration = cues.length > 0 ? cues[cues.length - 1].endMs : 0;
|
|
448
|
+
const totalWords = cues.reduce((sum, c) => sum + countWords(c.text), 0);
|
|
449
|
+
|
|
450
|
+
return {
|
|
451
|
+
format: parsed.format,
|
|
452
|
+
cueCount: cues.length,
|
|
453
|
+
speakerCount: speakers.size,
|
|
454
|
+
speakers: Array.from(speakers),
|
|
455
|
+
totalDurationMs: totalDuration,
|
|
456
|
+
totalDuration: msToTimestamp(totalDuration),
|
|
457
|
+
totalWords,
|
|
458
|
+
avgWordsPerCue: cues.length > 0 ? Math.round(totalWords / cues.length * 10) / 10 : 0
|
|
459
|
+
};
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
// ==========================================================================
|
|
463
|
+
// E4-S4: Zoom/Teams Export Parsing Functions
|
|
464
|
+
// ==========================================================================
|
|
465
|
+
|
|
466
|
+
/**
|
|
467
|
+
* Pattern definitions for Zoom formats
|
|
468
|
+
*/
|
|
469
|
+
const ZOOM_PATTERNS = {
|
|
470
|
+
// Chat format: "00:00:01 From John Smith to Everyone:"
|
|
471
|
+
chatHeader: /^(\d{1,2}:\d{2}:\d{2})\t+From\s+(.+?)\s+to\s+(.+?):$/,
|
|
472
|
+
// VTT with speaker: "John Smith: text"
|
|
473
|
+
vttSpeaker: /^([A-Z][a-zA-Z\s'-]+):\s*(.+)$/,
|
|
474
|
+
// Timestamp line in chat
|
|
475
|
+
timestampLine: /^(\d{1,2}:\d{2}:\d{2})\t/,
|
|
476
|
+
// System message (participant joined/left)
|
|
477
|
+
systemMessage: /^(.+?)\s+(joined|left)\s+the\s+meeting\.?$/i,
|
|
478
|
+
// Recording messages
|
|
479
|
+
recordingMessage: /^Recording\s+(started|stopped)\.?$/i
|
|
480
|
+
};
|
|
481
|
+
|
|
482
|
+
/**
|
|
483
|
+
* Pattern definitions for Teams formats
|
|
484
|
+
*/
|
|
485
|
+
const TEAMS_PATTERNS = {
|
|
486
|
+
// Chat format: "[1/10/2026, 9:00:15 AM] John Smith: message"
|
|
487
|
+
chatLine: /^\[(\d{1,2}\/\d{1,2}\/\d{4}),?\s*(\d{1,2}:\d{2}:\d{2}\s*(?:AM|PM)?)\]\s*([^:]+):\s*(.*)$/i,
|
|
488
|
+
// Alternative chat format without date
|
|
489
|
+
chatLineNoDate: /^\[(\d{1,2}:\d{2}:\d{2}\s*(?:AM|PM)?)\]\s*([^:]+):\s*(.*)$/i,
|
|
490
|
+
// System event (joined/left)
|
|
491
|
+
systemEvent: /^(.+?)\s+(joined|left)\s+the\s+meeting\.?$/i,
|
|
492
|
+
// Reaction
|
|
493
|
+
reaction: /^\[.+\]\s*(.+?)\s+reacted\s+/i,
|
|
494
|
+
// Screen sharing
|
|
495
|
+
screenShare: /^\[.+\]\s*(.+?)\s+(started|stopped)\s+sharing/i
|
|
496
|
+
};
|
|
497
|
+
|
|
498
|
+
/**
|
|
499
|
+
* Check if text is a system message (joins/leaves/etc)
|
|
500
|
+
*/
|
|
501
|
+
function isSystemMessage(text) {
|
|
502
|
+
if (!text) return false;
|
|
503
|
+
const trimmed = text.trim();
|
|
504
|
+
return (
|
|
505
|
+
ZOOM_PATTERNS.systemMessage.test(trimmed) ||
|
|
506
|
+
ZOOM_PATTERNS.recordingMessage.test(trimmed) ||
|
|
507
|
+
TEAMS_PATTERNS.systemEvent.test(trimmed) ||
|
|
508
|
+
TEAMS_PATTERNS.reaction.test(trimmed) ||
|
|
509
|
+
TEAMS_PATTERNS.screenShare.test(trimmed)
|
|
510
|
+
);
|
|
511
|
+
}
|
|
512
|
+
|
|
513
|
+
/**
|
|
514
|
+
* Parse time string to milliseconds
|
|
515
|
+
* Supports: "HH:MM:SS", "H:MM:SS", "9:00:15 AM"
|
|
516
|
+
*/
|
|
517
|
+
function parseTimeToMs(timeStr) {
|
|
518
|
+
if (!timeStr) return 0;
|
|
519
|
+
|
|
520
|
+
// Handle AM/PM format
|
|
521
|
+
const ampmMatch = timeStr.match(/^(\d{1,2}):(\d{2}):(\d{2})\s*(AM|PM)?$/i);
|
|
522
|
+
if (ampmMatch) {
|
|
523
|
+
let hours = parseInt(ampmMatch[1], 10);
|
|
524
|
+
const minutes = parseInt(ampmMatch[2], 10);
|
|
525
|
+
const seconds = parseInt(ampmMatch[3], 10);
|
|
526
|
+
const ampm = ampmMatch[4];
|
|
527
|
+
|
|
528
|
+
if (ampm) {
|
|
529
|
+
if (ampm.toUpperCase() === 'PM' && hours !== 12) hours += 12;
|
|
530
|
+
if (ampm.toUpperCase() === 'AM' && hours === 12) hours = 0;
|
|
531
|
+
}
|
|
532
|
+
|
|
533
|
+
return (hours * 3600 + minutes * 60 + seconds) * 1000;
|
|
534
|
+
}
|
|
535
|
+
|
|
536
|
+
// Handle simple HH:MM:SS
|
|
537
|
+
const simpleMatch = timeStr.match(/^(\d{1,2}):(\d{2}):(\d{2})$/);
|
|
538
|
+
if (simpleMatch) {
|
|
539
|
+
const hours = parseInt(simpleMatch[1], 10);
|
|
540
|
+
const minutes = parseInt(simpleMatch[2], 10);
|
|
541
|
+
const seconds = parseInt(simpleMatch[3], 10);
|
|
542
|
+
return (hours * 3600 + minutes * 60 + seconds) * 1000;
|
|
543
|
+
}
|
|
544
|
+
|
|
545
|
+
return 0;
|
|
546
|
+
}
|
|
547
|
+
|
|
548
|
+
/**
|
|
549
|
+
* Parse Zoom chat format
|
|
550
|
+
*/
|
|
551
|
+
function parseZoomChat(content, options = {}) {
|
|
552
|
+
const lines = content.split('\n');
|
|
553
|
+
const entries = [];
|
|
554
|
+
let currentEntry = null;
|
|
555
|
+
const includeSystem = options.includeSystem || false;
|
|
556
|
+
|
|
557
|
+
for (let i = 0; i < lines.length; i++) {
|
|
558
|
+
const line = lines[i];
|
|
559
|
+
|
|
560
|
+
// Check for chat header: "00:00:01 From John Smith to Everyone:"
|
|
561
|
+
const headerMatch = line.match(ZOOM_PATTERNS.chatHeader);
|
|
562
|
+
if (headerMatch) {
|
|
563
|
+
// Save previous entry
|
|
564
|
+
if (currentEntry && currentEntry.text.trim()) {
|
|
565
|
+
if (includeSystem || !isSystemMessage(currentEntry.text)) {
|
|
566
|
+
entries.push(currentEntry);
|
|
567
|
+
}
|
|
568
|
+
}
|
|
569
|
+
|
|
570
|
+
currentEntry = {
|
|
571
|
+
index: entries.length + 1,
|
|
572
|
+
timestamp: headerMatch[1],
|
|
573
|
+
timestampMs: parseTimeToMs(headerMatch[1]),
|
|
574
|
+
speaker: headerMatch[2].trim(),
|
|
575
|
+
recipient: headerMatch[3].trim(),
|
|
576
|
+
text: '',
|
|
577
|
+
type: 'message',
|
|
578
|
+
source: 'zoom_chat'
|
|
579
|
+
};
|
|
580
|
+
continue;
|
|
581
|
+
}
|
|
582
|
+
|
|
583
|
+
// Check for continuation line (starts with tab)
|
|
584
|
+
if (currentEntry && line.startsWith('\t')) {
|
|
585
|
+
const text = line.replace(/^\t+/, '').trim();
|
|
586
|
+
if (text) {
|
|
587
|
+
currentEntry.text += (currentEntry.text ? ' ' : '') + text;
|
|
588
|
+
}
|
|
589
|
+
continue;
|
|
590
|
+
}
|
|
591
|
+
|
|
592
|
+
// Check for line that starts with timestamp but no "From"
|
|
593
|
+
if (line.match(ZOOM_PATTERNS.timestampLine) && !line.includes('From')) {
|
|
594
|
+
// Might be a different format or continuation
|
|
595
|
+
continue;
|
|
596
|
+
}
|
|
597
|
+
}
|
|
598
|
+
|
|
599
|
+
// Don't forget last entry
|
|
600
|
+
if (currentEntry && currentEntry.text.trim()) {
|
|
601
|
+
if (includeSystem || !isSystemMessage(currentEntry.text)) {
|
|
602
|
+
entries.push(currentEntry);
|
|
603
|
+
}
|
|
604
|
+
}
|
|
605
|
+
|
|
606
|
+
return {
|
|
607
|
+
format: 'zoom_chat',
|
|
608
|
+
metadata: {
|
|
609
|
+
entryCount: entries.length,
|
|
610
|
+
participants: [...new Set(entries.map(e => e.speaker))]
|
|
611
|
+
},
|
|
612
|
+
entries
|
|
613
|
+
};
|
|
614
|
+
}
|
|
615
|
+
|
|
616
|
+
/**
|
|
617
|
+
* Parse Zoom VTT transcript (VTT with speaker names in text)
|
|
618
|
+
*/
|
|
619
|
+
function parseZoomVTT(content, options = {}) {
|
|
620
|
+
// First parse as standard VTT
|
|
621
|
+
const vttResult = parseVTT(content);
|
|
622
|
+
const includeSystem = options.includeSystem || false;
|
|
623
|
+
|
|
624
|
+
// Then extract speakers from text if not already identified
|
|
625
|
+
const entries = [];
|
|
626
|
+
for (const cue of vttResult.cues) {
|
|
627
|
+
let speaker = cue.speaker;
|
|
628
|
+
let text = cue.text;
|
|
629
|
+
|
|
630
|
+
// Try to extract speaker from "Name: text" pattern
|
|
631
|
+
if (!speaker) {
|
|
632
|
+
const speakerMatch = text.match(ZOOM_PATTERNS.vttSpeaker);
|
|
633
|
+
if (speakerMatch) {
|
|
634
|
+
speaker = speakerMatch[1].trim();
|
|
635
|
+
text = speakerMatch[2].trim();
|
|
636
|
+
}
|
|
637
|
+
}
|
|
638
|
+
|
|
639
|
+
// Skip system messages unless requested
|
|
640
|
+
if (!includeSystem && isSystemMessage(text)) {
|
|
641
|
+
continue;
|
|
642
|
+
}
|
|
643
|
+
|
|
644
|
+
entries.push({
|
|
645
|
+
index: entries.length + 1,
|
|
646
|
+
timestamp: cue.startTime,
|
|
647
|
+
timestampMs: cue.startMs,
|
|
648
|
+
endTimestampMs: cue.endMs,
|
|
649
|
+
speaker: speaker,
|
|
650
|
+
text: text,
|
|
651
|
+
type: 'message',
|
|
652
|
+
source: 'zoom_vtt'
|
|
653
|
+
});
|
|
654
|
+
}
|
|
655
|
+
|
|
656
|
+
return {
|
|
657
|
+
format: 'zoom_vtt',
|
|
658
|
+
metadata: {
|
|
659
|
+
...vttResult.metadata,
|
|
660
|
+
entryCount: entries.length,
|
|
661
|
+
participants: [...new Set(entries.filter(e => e.speaker).map(e => e.speaker))]
|
|
662
|
+
},
|
|
663
|
+
entries
|
|
664
|
+
};
|
|
665
|
+
}
|
|
666
|
+
|
|
667
|
+
/**
|
|
668
|
+
* Parse Teams chat format
|
|
669
|
+
*/
|
|
670
|
+
function parseTeamsChat(content, options = {}) {
|
|
671
|
+
const lines = content.split('\n');
|
|
672
|
+
const entries = [];
|
|
673
|
+
const includeSystem = options.includeSystem || false;
|
|
674
|
+
|
|
675
|
+
for (const line of lines) {
|
|
676
|
+
const trimmed = line.trim();
|
|
677
|
+
if (!trimmed) continue;
|
|
678
|
+
|
|
679
|
+
// Try full format with date: "[1/10/2026, 9:00:15 AM] John Smith: message"
|
|
680
|
+
let match = trimmed.match(TEAMS_PATTERNS.chatLine);
|
|
681
|
+
if (match) {
|
|
682
|
+
const text = match[4].trim();
|
|
683
|
+
|
|
684
|
+
// Skip system messages unless requested
|
|
685
|
+
if (!includeSystem && isSystemMessage(text)) {
|
|
686
|
+
continue;
|
|
687
|
+
}
|
|
688
|
+
|
|
689
|
+
// Skip reactions
|
|
690
|
+
if (!includeSystem && TEAMS_PATTERNS.reaction.test(trimmed)) {
|
|
691
|
+
continue;
|
|
692
|
+
}
|
|
693
|
+
|
|
694
|
+
entries.push({
|
|
695
|
+
index: entries.length + 1,
|
|
696
|
+
date: match[1],
|
|
697
|
+
timestamp: match[2].trim(),
|
|
698
|
+
timestampMs: parseTimeToMs(match[2].trim()),
|
|
699
|
+
speaker: match[3].trim(),
|
|
700
|
+
text: text,
|
|
701
|
+
type: 'message',
|
|
702
|
+
source: 'teams_chat'
|
|
703
|
+
});
|
|
704
|
+
continue;
|
|
705
|
+
}
|
|
706
|
+
|
|
707
|
+
// Try format without date: "[9:00:15 AM] John Smith: message"
|
|
708
|
+
match = trimmed.match(TEAMS_PATTERNS.chatLineNoDate);
|
|
709
|
+
if (match) {
|
|
710
|
+
const text = match[3].trim();
|
|
711
|
+
|
|
712
|
+
if (!includeSystem && isSystemMessage(text)) {
|
|
713
|
+
continue;
|
|
714
|
+
}
|
|
715
|
+
|
|
716
|
+
entries.push({
|
|
717
|
+
index: entries.length + 1,
|
|
718
|
+
timestamp: match[1].trim(),
|
|
719
|
+
timestampMs: parseTimeToMs(match[1].trim()),
|
|
720
|
+
speaker: match[2].trim(),
|
|
721
|
+
text: text,
|
|
722
|
+
type: 'message',
|
|
723
|
+
source: 'teams_chat'
|
|
724
|
+
});
|
|
725
|
+
}
|
|
726
|
+
}
|
|
727
|
+
|
|
728
|
+
return {
|
|
729
|
+
format: 'teams_chat',
|
|
730
|
+
metadata: {
|
|
731
|
+
entryCount: entries.length,
|
|
732
|
+
participants: [...new Set(entries.map(e => e.speaker))]
|
|
733
|
+
},
|
|
734
|
+
entries
|
|
735
|
+
};
|
|
736
|
+
}
|
|
737
|
+
|
|
738
|
+
/**
|
|
739
|
+
* Parse Teams VTT transcript (VTT with voice tags)
|
|
740
|
+
*/
|
|
741
|
+
function parseTeamsVTT(content, options = {}) {
|
|
742
|
+
// Parse as standard VTT - it already handles <v Speaker> tags
|
|
743
|
+
const vttResult = parseVTT(content);
|
|
744
|
+
const includeSystem = options.includeSystem || false;
|
|
745
|
+
|
|
746
|
+
const entries = [];
|
|
747
|
+
for (const cue of vttResult.cues) {
|
|
748
|
+
// Skip system messages unless requested
|
|
749
|
+
if (!includeSystem && isSystemMessage(cue.text)) {
|
|
750
|
+
continue;
|
|
751
|
+
}
|
|
752
|
+
|
|
753
|
+
entries.push({
|
|
754
|
+
index: entries.length + 1,
|
|
755
|
+
timestamp: cue.startTime,
|
|
756
|
+
timestampMs: cue.startMs,
|
|
757
|
+
endTimestampMs: cue.endMs,
|
|
758
|
+
speaker: cue.speaker,
|
|
759
|
+
text: cue.text,
|
|
760
|
+
type: 'message',
|
|
761
|
+
source: 'teams_vtt'
|
|
762
|
+
});
|
|
763
|
+
}
|
|
764
|
+
|
|
765
|
+
return {
|
|
766
|
+
format: 'teams_vtt',
|
|
767
|
+
metadata: {
|
|
768
|
+
...vttResult.metadata,
|
|
769
|
+
entryCount: entries.length,
|
|
770
|
+
participants: [...new Set(entries.filter(e => e.speaker).map(e => e.speaker))]
|
|
771
|
+
},
|
|
772
|
+
entries
|
|
773
|
+
};
|
|
774
|
+
}
|
|
775
|
+
|
|
776
|
+
/**
|
|
777
|
+
* Parse Teams JSON transcript export
|
|
778
|
+
*/
|
|
779
|
+
function parseTeamsJSON(content, options = {}) {
|
|
780
|
+
let data;
|
|
781
|
+
try {
|
|
782
|
+
data = JSON.parse(content);
|
|
783
|
+
} catch (err) {
|
|
784
|
+
return { error: 'Invalid JSON format', format: 'unknown' };
|
|
785
|
+
}
|
|
786
|
+
|
|
787
|
+
const includeSystem = options.includeSystem || false;
|
|
788
|
+
|
|
789
|
+
// Handle different JSON structures
|
|
790
|
+
let transcripts = data.transcripts || data.messages || data.entries || data;
|
|
791
|
+
if (!Array.isArray(transcripts)) {
|
|
792
|
+
return { error: 'No transcript array found in JSON', format: 'unknown' };
|
|
793
|
+
}
|
|
794
|
+
|
|
795
|
+
const entries = [];
|
|
796
|
+
for (const item of transcripts) {
|
|
797
|
+
const speaker = item.speakerName || item.speaker || item.from || item.author;
|
|
798
|
+
const text = item.text || item.content || item.message || '';
|
|
799
|
+
const timestamp = item.timestamp || item.time || item.createdDateTime;
|
|
800
|
+
|
|
801
|
+
if (!includeSystem && isSystemMessage(text)) {
|
|
802
|
+
continue;
|
|
803
|
+
}
|
|
804
|
+
|
|
805
|
+
entries.push({
|
|
806
|
+
index: entries.length + 1,
|
|
807
|
+
timestamp: timestamp,
|
|
808
|
+
timestampMs: timestamp ? new Date(timestamp).getTime() : 0,
|
|
809
|
+
speaker: speaker,
|
|
810
|
+
text: text.trim(),
|
|
811
|
+
type: 'message',
|
|
812
|
+
source: 'teams_json'
|
|
813
|
+
});
|
|
814
|
+
}
|
|
815
|
+
|
|
816
|
+
return {
|
|
817
|
+
format: 'teams_json',
|
|
818
|
+
metadata: {
|
|
819
|
+
meetingId: data.meetingId,
|
|
820
|
+
entryCount: entries.length,
|
|
821
|
+
participants: [...new Set(entries.filter(e => e.speaker).map(e => e.speaker))]
|
|
822
|
+
},
|
|
823
|
+
entries
|
|
824
|
+
};
|
|
825
|
+
}
|
|
826
|
+
|
|
827
|
+
/**
|
|
828
|
+
* Detect meeting format from content
|
|
829
|
+
*/
|
|
830
|
+
function detectMeetingType(content) {
|
|
831
|
+
const trimmed = content.trim();
|
|
832
|
+
|
|
833
|
+
// Check for JSON
|
|
834
|
+
if (trimmed.startsWith('{') || trimmed.startsWith('[')) {
|
|
835
|
+
try {
|
|
836
|
+
JSON.parse(trimmed);
|
|
837
|
+
return 'teams_json';
|
|
838
|
+
} catch (err) {
|
|
839
|
+
// Not valid JSON
|
|
840
|
+
}
|
|
841
|
+
}
|
|
842
|
+
|
|
843
|
+
// Check for VTT format
|
|
844
|
+
if (trimmed.startsWith('WEBVTT') || /\d{2}:\d{2}:\d{2}\.\d{3}\s*-->\s*\d{2}:\d{2}:\d{2}\.\d{3}/.test(trimmed)) {
|
|
845
|
+
// Check for voice tags (Teams style)
|
|
846
|
+
if (/<v\s+[^>]+>/.test(trimmed)) {
|
|
847
|
+
return 'teams_vtt';
|
|
848
|
+
}
|
|
849
|
+
// Check for speaker colon pattern (Zoom style)
|
|
850
|
+
if (/\n[A-Z][a-zA-Z\s'-]+:\s/.test(trimmed)) {
|
|
851
|
+
return 'zoom_vtt';
|
|
852
|
+
}
|
|
853
|
+
return 'generic_vtt';
|
|
854
|
+
}
|
|
855
|
+
|
|
856
|
+
// Check for Zoom chat format
|
|
857
|
+
if (/^\d{1,2}:\d{2}:\d{2}\t+From\s+.+\s+to\s+.+:/m.test(trimmed)) {
|
|
858
|
+
return 'zoom_chat';
|
|
859
|
+
}
|
|
860
|
+
|
|
861
|
+
// Check for Teams chat format
|
|
862
|
+
if (/^\[\d{1,2}\/\d{1,2}\/\d{4},?\s*\d{1,2}:\d{2}:\d{2}\s*(?:AM|PM)?\]/im.test(trimmed)) {
|
|
863
|
+
return 'teams_chat';
|
|
864
|
+
}
|
|
865
|
+
|
|
866
|
+
// Check for simple bracket timestamp format
|
|
867
|
+
if (/^\[\d{1,2}:\d{2}:\d{2}\s*(?:AM|PM)?\]\s*[^:]+:/im.test(trimmed)) {
|
|
868
|
+
return 'teams_chat';
|
|
869
|
+
}
|
|
870
|
+
|
|
871
|
+
return 'unknown';
|
|
872
|
+
}
|
|
873
|
+
|
|
874
|
+
/**
|
|
875
|
+
* Parse Zoom transcript (auto-detect format)
|
|
876
|
+
*/
|
|
877
|
+
function parseZoom(content, options = {}) {
|
|
878
|
+
const format = options.format || detectMeetingType(content);
|
|
879
|
+
|
|
880
|
+
switch (format) {
|
|
881
|
+
case 'zoom_chat':
|
|
882
|
+
return parseZoomChat(content, options);
|
|
883
|
+
case 'zoom_vtt':
|
|
884
|
+
case 'generic_vtt':
|
|
885
|
+
return parseZoomVTT(content, options);
|
|
886
|
+
default:
|
|
887
|
+
// Try VTT first
|
|
888
|
+
if (content.includes('-->')) {
|
|
889
|
+
return parseZoomVTT(content, options);
|
|
890
|
+
}
|
|
891
|
+
return parseZoomChat(content, options);
|
|
892
|
+
}
|
|
893
|
+
}
|
|
894
|
+
|
|
895
|
+
/**
|
|
896
|
+
* Parse Teams transcript (auto-detect format)
|
|
897
|
+
*/
|
|
898
|
+
function parseTeams(content, options = {}) {
|
|
899
|
+
const format = options.format || detectMeetingType(content);
|
|
900
|
+
|
|
901
|
+
switch (format) {
|
|
902
|
+
case 'teams_json':
|
|
903
|
+
return parseTeamsJSON(content, options);
|
|
904
|
+
case 'teams_chat':
|
|
905
|
+
return parseTeamsChat(content, options);
|
|
906
|
+
case 'teams_vtt':
|
|
907
|
+
return parseTeamsVTT(content, options);
|
|
908
|
+
default:
|
|
909
|
+
// Try to detect
|
|
910
|
+
if (content.trim().startsWith('{') || content.trim().startsWith('[')) {
|
|
911
|
+
return parseTeamsJSON(content, options);
|
|
912
|
+
}
|
|
913
|
+
if (content.includes('-->')) {
|
|
914
|
+
return parseTeamsVTT(content, options);
|
|
915
|
+
}
|
|
916
|
+
return parseTeamsChat(content, options);
|
|
917
|
+
}
|
|
918
|
+
}
|
|
919
|
+
|
|
920
|
+
/**
|
|
921
|
+
* Parse meeting transcript (auto-detect Zoom or Teams)
|
|
922
|
+
*/
|
|
923
|
+
function parseMeeting(content, options = {}) {
|
|
924
|
+
const format = detectMeetingType(content);
|
|
925
|
+
|
|
926
|
+
if (format.startsWith('zoom')) {
|
|
927
|
+
return parseZoom(content, options);
|
|
928
|
+
}
|
|
929
|
+
|
|
930
|
+
if (format.startsWith('teams')) {
|
|
931
|
+
return parseTeams(content, options);
|
|
932
|
+
}
|
|
933
|
+
|
|
934
|
+
if (format === 'generic_vtt') {
|
|
935
|
+
// Try Zoom VTT parser as it handles generic VTT with speaker extraction
|
|
936
|
+
return parseZoomVTT(content, options);
|
|
937
|
+
}
|
|
938
|
+
|
|
939
|
+
return { error: 'Unable to detect meeting format', format: 'unknown' };
|
|
940
|
+
}
|
|
941
|
+
|
|
942
|
+
/**
|
|
943
|
+
* Merge consecutive entries from same speaker
|
|
944
|
+
*/
|
|
945
|
+
function mergeMeetingEntries(entries, options = {}) {
|
|
946
|
+
const mergeThreshold = options.mergeThreshold || 30000; // 30 seconds default for meetings
|
|
947
|
+
const merged = [];
|
|
948
|
+
let current = null;
|
|
949
|
+
|
|
950
|
+
for (const entry of entries) {
|
|
951
|
+
if (current === null) {
|
|
952
|
+
current = { ...entry, textParts: [entry.text] };
|
|
953
|
+
continue;
|
|
954
|
+
}
|
|
955
|
+
|
|
956
|
+
const gap = entry.timestampMs - (current.endTimestampMs || current.timestampMs);
|
|
957
|
+
const sameSpeaker = entry.speaker === current.speaker;
|
|
958
|
+
|
|
959
|
+
if (sameSpeaker && gap < mergeThreshold) {
|
|
960
|
+
current.textParts.push(entry.text);
|
|
961
|
+
current.endTimestampMs = entry.endTimestampMs || entry.timestampMs;
|
|
962
|
+
} else {
|
|
963
|
+
current.text = current.textParts.join(' ');
|
|
964
|
+
delete current.textParts;
|
|
965
|
+
merged.push(current);
|
|
966
|
+
current = { ...entry, textParts: [entry.text] };
|
|
967
|
+
}
|
|
968
|
+
}
|
|
969
|
+
|
|
970
|
+
if (current) {
|
|
971
|
+
current.text = current.textParts.join(' ');
|
|
972
|
+
delete current.textParts;
|
|
973
|
+
merged.push(current);
|
|
974
|
+
}
|
|
975
|
+
|
|
976
|
+
return merged;
|
|
977
|
+
}
|
|
978
|
+
|
|
979
|
+
/**
|
|
980
|
+
* Format meeting entries as text
|
|
981
|
+
*/
|
|
982
|
+
function formatMeetingAsText(entries, options = {}) {
|
|
983
|
+
const lines = [];
|
|
984
|
+
|
|
985
|
+
for (const entry of entries) {
|
|
986
|
+
let line = '';
|
|
987
|
+
|
|
988
|
+
if (options.timestamps && entry.timestamp) {
|
|
989
|
+
const displayTime = entry.timestamp.replace(/\.\d+$/, ''); // Remove ms
|
|
990
|
+
line += `[${displayTime}] `;
|
|
991
|
+
}
|
|
992
|
+
|
|
993
|
+
if (options.speakers !== false && entry.speaker) {
|
|
994
|
+
line += `${entry.speaker}: `;
|
|
995
|
+
}
|
|
996
|
+
|
|
997
|
+
line += entry.text;
|
|
998
|
+
lines.push(line);
|
|
999
|
+
}
|
|
1000
|
+
|
|
1001
|
+
return lines.join('\n');
|
|
1002
|
+
}
|
|
1003
|
+
|
|
1004
|
+
/**
|
|
1005
|
+
* Get meeting transcript statistics
|
|
1006
|
+
*/
|
|
1007
|
+
function getMeetingStats(parsed) {
|
|
1008
|
+
if (parsed.error) return null;
|
|
1009
|
+
|
|
1010
|
+
const entries = parsed.entries || [];
|
|
1011
|
+
const participants = parsed.metadata?.participants || [];
|
|
1012
|
+
|
|
1013
|
+
// Calculate duration
|
|
1014
|
+
let durationMs = 0;
|
|
1015
|
+
if (entries.length > 0) {
|
|
1016
|
+
const firstMs = entries[0].timestampMs || 0;
|
|
1017
|
+
const lastMs = entries[entries.length - 1].endTimestampMs || entries[entries.length - 1].timestampMs || 0;
|
|
1018
|
+
durationMs = lastMs - firstMs;
|
|
1019
|
+
}
|
|
1020
|
+
|
|
1021
|
+
// Word count
|
|
1022
|
+
const totalWords = entries.reduce((sum, e) => sum + countWords(e.text), 0);
|
|
1023
|
+
|
|
1024
|
+
// Messages per speaker
|
|
1025
|
+
const speakerCounts = {};
|
|
1026
|
+
for (const entry of entries) {
|
|
1027
|
+
if (entry.speaker) {
|
|
1028
|
+
speakerCounts[entry.speaker] = (speakerCounts[entry.speaker] || 0) + 1;
|
|
1029
|
+
}
|
|
1030
|
+
}
|
|
1031
|
+
|
|
1032
|
+
return {
|
|
1033
|
+
format: parsed.format,
|
|
1034
|
+
entryCount: entries.length,
|
|
1035
|
+
participantCount: participants.length,
|
|
1036
|
+
participants: participants,
|
|
1037
|
+
durationMs: durationMs,
|
|
1038
|
+
duration: msToTimestamp(durationMs),
|
|
1039
|
+
totalWords: totalWords,
|
|
1040
|
+
avgWordsPerEntry: entries.length > 0 ? Math.round(totalWords / entries.length * 10) / 10 : 0,
|
|
1041
|
+
speakerCounts: speakerCounts
|
|
1042
|
+
};
|
|
1043
|
+
}
|
|
1044
|
+
|
|
1045
|
+
|
|
1046
|
+
|
|
1047
|
+
module.exports = {
|
|
1048
|
+
// VTT/SRT Constants
|
|
1049
|
+
VTT_TIMESTAMP_FULL,
|
|
1050
|
+
VTT_TIMESTAMP_SHORT,
|
|
1051
|
+
VTT_VOICE_TAG,
|
|
1052
|
+
SRT_TIMESTAMP,
|
|
1053
|
+
SPEAKER_COLON_PATTERN,
|
|
1054
|
+
SPEAKER_BRACKET_PATTERN,
|
|
1055
|
+
// VTT/SRT Functions
|
|
1056
|
+
timestampToMs,
|
|
1057
|
+
msToTimestamp,
|
|
1058
|
+
cleanSubtitleText,
|
|
1059
|
+
extractVTTSpeaker,
|
|
1060
|
+
extractSpeaker,
|
|
1061
|
+
parseVTT,
|
|
1062
|
+
parseSRT,
|
|
1063
|
+
mergeCues,
|
|
1064
|
+
parseSubtitle,
|
|
1065
|
+
formatCuesAsText,
|
|
1066
|
+
getSubtitleStats,
|
|
1067
|
+
// Zoom/Teams Constants
|
|
1068
|
+
ZOOM_PATTERNS,
|
|
1069
|
+
TEAMS_PATTERNS,
|
|
1070
|
+
// Zoom/Teams Functions
|
|
1071
|
+
isSystemMessage,
|
|
1072
|
+
parseTimeToMs,
|
|
1073
|
+
parseZoomChat,
|
|
1074
|
+
parseZoomVTT,
|
|
1075
|
+
parseTeamsChat,
|
|
1076
|
+
parseTeamsVTT,
|
|
1077
|
+
parseTeamsJSON,
|
|
1078
|
+
detectMeetingType,
|
|
1079
|
+
parseZoom,
|
|
1080
|
+
parseTeams,
|
|
1081
|
+
parseMeeting,
|
|
1082
|
+
mergeMeetingEntries,
|
|
1083
|
+
formatMeetingAsText,
|
|
1084
|
+
getMeetingStats
|
|
1085
|
+
};
|