markupr 2.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/commands/review-feedback.md +47 -0
- package/.eslintrc.json +35 -0
- package/.github/CODEOWNERS +16 -0
- package/.github/FUNDING.yml +1 -0
- package/.github/ISSUE_TEMPLATE/bug_report.md +56 -0
- package/.github/ISSUE_TEMPLATE/feature_request.md +54 -0
- package/.github/PULL_REQUEST_TEMPLATE.md +89 -0
- package/.github/dependabot.yml +70 -0
- package/.github/workflows/ci.yml +184 -0
- package/.github/workflows/deploy-landing.yml +134 -0
- package/.github/workflows/nightly.yml +288 -0
- package/.github/workflows/release.yml +318 -0
- package/CHANGELOG.md +127 -0
- package/CLAUDE.md +137 -0
- package/CODE_OF_CONDUCT.md +9 -0
- package/CONTRIBUTING.md +390 -0
- package/LICENSE +21 -0
- package/PRODUCT_VISION.md +277 -0
- package/README.md +517 -0
- package/SECURITY.md +51 -0
- package/SIGNING_INSTRUCTIONS.md +284 -0
- package/assets/DMG_BACKGROUND_INSTRUCTIONS.md +130 -0
- package/assets/svg-source/dmg-background.svg +70 -0
- package/assets/svg-source/icon.svg +20 -0
- package/assets/svg-source/tray-icon-processing.svg +7 -0
- package/assets/svg-source/tray-icon-recording.svg +7 -0
- package/assets/svg-source/tray-icon.svg +6 -0
- package/assets/tray-complete.png +0 -0
- package/assets/tray-complete@2x.png +0 -0
- package/assets/tray-completeTemplate.png +0 -0
- package/assets/tray-completeTemplate@2x.png +0 -0
- package/assets/tray-error.png +0 -0
- package/assets/tray-error@2x.png +0 -0
- package/assets/tray-errorTemplate.png +0 -0
- package/assets/tray-errorTemplate@2x.png +0 -0
- package/assets/tray-icon-processing.png +0 -0
- package/assets/tray-icon-processing@2x.png +0 -0
- package/assets/tray-icon-processingTemplate.png +0 -0
- package/assets/tray-icon-processingTemplate@2x.png +0 -0
- package/assets/tray-icon-recording.png +0 -0
- package/assets/tray-icon-recording@2x.png +0 -0
- package/assets/tray-icon-recordingTemplate.png +0 -0
- package/assets/tray-icon-recordingTemplate@2x.png +0 -0
- package/assets/tray-icon.png +0 -0
- package/assets/tray-icon@2x.png +0 -0
- package/assets/tray-iconTemplate.png +0 -0
- package/assets/tray-iconTemplate@2x.png +0 -0
- package/assets/tray-idle.png +0 -0
- package/assets/tray-idle@2x.png +0 -0
- package/assets/tray-idleTemplate.png +0 -0
- package/assets/tray-idleTemplate@2x.png +0 -0
- package/assets/tray-processing-0.png +0 -0
- package/assets/tray-processing-0@2x.png +0 -0
- package/assets/tray-processing-0Template.png +0 -0
- package/assets/tray-processing-0Template@2x.png +0 -0
- package/assets/tray-processing-1.png +0 -0
- package/assets/tray-processing-1@2x.png +0 -0
- package/assets/tray-processing-1Template.png +0 -0
- package/assets/tray-processing-1Template@2x.png +0 -0
- package/assets/tray-processing-2.png +0 -0
- package/assets/tray-processing-2@2x.png +0 -0
- package/assets/tray-processing-2Template.png +0 -0
- package/assets/tray-processing-2Template@2x.png +0 -0
- package/assets/tray-processing-3.png +0 -0
- package/assets/tray-processing-3@2x.png +0 -0
- package/assets/tray-processing-3Template.png +0 -0
- package/assets/tray-processing-3Template@2x.png +0 -0
- package/assets/tray-processing.png +0 -0
- package/assets/tray-processing@2x.png +0 -0
- package/assets/tray-processingTemplate.png +0 -0
- package/assets/tray-processingTemplate@2x.png +0 -0
- package/assets/tray-recording.png +0 -0
- package/assets/tray-recording@2x.png +0 -0
- package/assets/tray-recordingTemplate.png +0 -0
- package/assets/tray-recordingTemplate@2x.png +0 -0
- package/build/DMG_BACKGROUND_SPEC.md +50 -0
- package/build/dmg-background.png +0 -0
- package/build/dmg-background@2x.png +0 -0
- package/build/entitlements.mac.inherit.plist +27 -0
- package/build/entitlements.mac.plist +41 -0
- package/build/favicon-16.png +0 -0
- package/build/favicon-180.png +0 -0
- package/build/favicon-192.png +0 -0
- package/build/favicon-32.png +0 -0
- package/build/favicon-48.png +0 -0
- package/build/favicon-512.png +0 -0
- package/build/favicon-64.png +0 -0
- package/build/icon-128.png +0 -0
- package/build/icon-16.png +0 -0
- package/build/icon-24.png +0 -0
- package/build/icon-256.png +0 -0
- package/build/icon-32.png +0 -0
- package/build/icon-48.png +0 -0
- package/build/icon-64.png +0 -0
- package/build/icon.icns +0 -0
- package/build/icon.ico +0 -0
- package/build/icon.iconset/icon_128x128.png +0 -0
- package/build/icon.iconset/icon_128x128@2x.png +0 -0
- package/build/icon.iconset/icon_16x16.png +0 -0
- package/build/icon.iconset/icon_16x16@2x.png +0 -0
- package/build/icon.iconset/icon_256x256.png +0 -0
- package/build/icon.iconset/icon_256x256@2x.png +0 -0
- package/build/icon.iconset/icon_32x32.png +0 -0
- package/build/icon.iconset/icon_32x32@2x.png +0 -0
- package/build/icon.iconset/icon_512x512.png +0 -0
- package/build/icon.iconset/icon_512x512@2x.png +0 -0
- package/build/icon.png +0 -0
- package/build/installer-header.bmp +0 -0
- package/build/installer-header.png +0 -0
- package/build/installer-sidebar.bmp +0 -0
- package/build/installer-sidebar.png +0 -0
- package/build/installer.nsh +45 -0
- package/build/overlay-processing.png +0 -0
- package/build/overlay-recording.png +0 -0
- package/build/toolbar-record.png +0 -0
- package/build/toolbar-screenshot.png +0 -0
- package/build/toolbar-settings.png +0 -0
- package/build/toolbar-stop.png +0 -0
- package/dist/main/index.mjs +12612 -0
- package/dist/preload/index.mjs +907 -0
- package/dist/renderer/assets/index-CCmUjl9K.js +19495 -0
- package/dist/renderer/assets/index-CUqz_Gs6.css +2270 -0
- package/dist/renderer/index.html +27 -0
- package/docs/AI_AGENT_QUICKSTART.md +42 -0
- package/docs/AI_PIPELINE_DESIGN.md +595 -0
- package/docs/API.md +514 -0
- package/docs/ARCHITECTURE.md +460 -0
- package/docs/CONFIGURATION.md +336 -0
- package/docs/DEVELOPMENT.md +508 -0
- package/docs/EXPORT_FORMATS.md +451 -0
- package/docs/GETTING_STARTED.md +236 -0
- package/docs/KEYBOARD_SHORTCUTS.md +334 -0
- package/docs/TROUBLESHOOTING.md +418 -0
- package/docs/landing/index.html +672 -0
- package/docs/landing/script.js +342 -0
- package/docs/landing/styles.css +1543 -0
- package/electron-builder.yml +140 -0
- package/electron.vite.config.ts +63 -0
- package/package.json +108 -0
- package/railway.json +12 -0
- package/scripts/build.mjs +51 -0
- package/scripts/generate-icons.mjs +314 -0
- package/scripts/generate-installer-images.cjs +253 -0
- package/scripts/generate-tray-icons.mjs +258 -0
- package/scripts/notarize.cjs +180 -0
- package/scripts/one-click-clean-test.sh +147 -0
- package/scripts/postinstall.mjs +36 -0
- package/scripts/setup-markupr.sh +55 -0
- package/setup +17 -0
- package/site/index.html +1835 -0
- package/site/package.json +11 -0
- package/site/railway.json +12 -0
- package/site/server.js +31 -0
- package/src/main/AutoUpdater.ts +392 -0
- package/src/main/CrashRecovery.ts +655 -0
- package/src/main/ErrorHandler.ts +703 -0
- package/src/main/HotkeyManager.ts +399 -0
- package/src/main/MenuManager.ts +529 -0
- package/src/main/PermissionManager.ts +420 -0
- package/src/main/SessionController.ts +1465 -0
- package/src/main/TrayManager.ts +540 -0
- package/src/main/ai/AIPipelineManager.ts +199 -0
- package/src/main/ai/ClaudeAnalyzer.ts +339 -0
- package/src/main/ai/ImageOptimizer.ts +176 -0
- package/src/main/ai/StructuredMarkdownBuilder.ts +379 -0
- package/src/main/ai/index.ts +16 -0
- package/src/main/ai/types.ts +258 -0
- package/src/main/analysis/ClarificationGenerator.ts +385 -0
- package/src/main/analysis/FeedbackAnalyzer.ts +531 -0
- package/src/main/analysis/index.ts +19 -0
- package/src/main/audio/AudioCapture.ts +978 -0
- package/src/main/audio/audioUtils.ts +100 -0
- package/src/main/audio/index.ts +20 -0
- package/src/main/capture/index.ts +1 -0
- package/src/main/index.ts +1693 -0
- package/src/main/ipc/captureHandlers.ts +272 -0
- package/src/main/ipc/index.ts +45 -0
- package/src/main/ipc/outputHandlers.ts +302 -0
- package/src/main/ipc/sessionHandlers.ts +56 -0
- package/src/main/ipc/settingsHandlers.ts +471 -0
- package/src/main/ipc/types.ts +56 -0
- package/src/main/ipc/windowHandlers.ts +277 -0
- package/src/main/output/ClipboardService.ts +369 -0
- package/src/main/output/ExportService.ts +539 -0
- package/src/main/output/FileManager.ts +416 -0
- package/src/main/output/MarkdownGenerator.ts +791 -0
- package/src/main/output/MarkdownPatcher.ts +299 -0
- package/src/main/output/index.ts +186 -0
- package/src/main/output/sessionAdapter.ts +207 -0
- package/src/main/output/templates/html-template.ts +553 -0
- package/src/main/pipeline/FrameExtractor.ts +330 -0
- package/src/main/pipeline/PostProcessor.ts +399 -0
- package/src/main/pipeline/TranscriptAnalyzer.ts +226 -0
- package/src/main/pipeline/index.ts +36 -0
- package/src/main/platform/WindowsTaskbar.ts +600 -0
- package/src/main/platform/index.ts +16 -0
- package/src/main/settings/SettingsManager.ts +730 -0
- package/src/main/settings/index.ts +19 -0
- package/src/main/transcription/ModelDownloadManager.ts +494 -0
- package/src/main/transcription/TierManager.ts +219 -0
- package/src/main/transcription/TranscriptionRecoveryService.ts +340 -0
- package/src/main/transcription/WhisperService.ts +748 -0
- package/src/main/transcription/index.ts +56 -0
- package/src/main/transcription/types.ts +135 -0
- package/src/main/windows/PopoverManager.ts +284 -0
- package/src/main/windows/TaskbarIntegration.ts +452 -0
- package/src/main/windows/index.ts +23 -0
- package/src/preload/index.ts +1047 -0
- package/src/renderer/App.tsx +515 -0
- package/src/renderer/AppWrapper.tsx +28 -0
- package/src/renderer/assets/logo-dark.svg +7 -0
- package/src/renderer/assets/logo.svg +7 -0
- package/src/renderer/audio/AudioCaptureRenderer.ts +454 -0
- package/src/renderer/capture/ScreenRecordingRenderer.ts +492 -0
- package/src/renderer/components/AnnotationOverlay.tsx +836 -0
- package/src/renderer/components/AudioWaveform.tsx +811 -0
- package/src/renderer/components/ClarificationQuestions.tsx +656 -0
- package/src/renderer/components/CountdownTimer.tsx +495 -0
- package/src/renderer/components/CrashRecoveryDialog.tsx +632 -0
- package/src/renderer/components/DonateButton.tsx +127 -0
- package/src/renderer/components/ErrorBoundary.tsx +308 -0
- package/src/renderer/components/ExportDialog.tsx +872 -0
- package/src/renderer/components/HotkeyHint.tsx +261 -0
- package/src/renderer/components/KeyboardShortcuts.tsx +787 -0
- package/src/renderer/components/ModelDownloadDialog.tsx +844 -0
- package/src/renderer/components/Onboarding.tsx +1830 -0
- package/src/renderer/components/ProcessingOverlay.tsx +157 -0
- package/src/renderer/components/RecordingOverlay.tsx +423 -0
- package/src/renderer/components/SessionHistory.tsx +1746 -0
- package/src/renderer/components/SessionReview.tsx +1321 -0
- package/src/renderer/components/SettingsPanel.tsx +217 -0
- package/src/renderer/components/Skeleton.tsx +347 -0
- package/src/renderer/components/StatusIndicator.tsx +86 -0
- package/src/renderer/components/ThemeProvider.tsx +429 -0
- package/src/renderer/components/Tooltip.tsx +370 -0
- package/src/renderer/components/TranscriptionPreview.tsx +183 -0
- package/src/renderer/components/TranscriptionTierSelector.tsx +640 -0
- package/src/renderer/components/UpdateNotification.tsx +377 -0
- package/src/renderer/components/WindowSelector.tsx +947 -0
- package/src/renderer/components/index.ts +99 -0
- package/src/renderer/components/primitives/ApiKeyInput.tsx +98 -0
- package/src/renderer/components/primitives/ColorPicker.tsx +65 -0
- package/src/renderer/components/primitives/DangerButton.tsx +45 -0
- package/src/renderer/components/primitives/DirectoryPicker.tsx +41 -0
- package/src/renderer/components/primitives/Dropdown.tsx +34 -0
- package/src/renderer/components/primitives/KeyRecorder.tsx +117 -0
- package/src/renderer/components/primitives/SettingsSection.tsx +32 -0
- package/src/renderer/components/primitives/Slider.tsx +43 -0
- package/src/renderer/components/primitives/Toggle.tsx +36 -0
- package/src/renderer/components/primitives/index.ts +10 -0
- package/src/renderer/components/settings/AdvancedTab.tsx +174 -0
- package/src/renderer/components/settings/AppearanceTab.tsx +77 -0
- package/src/renderer/components/settings/GeneralTab.tsx +40 -0
- package/src/renderer/components/settings/HotkeysTab.tsx +79 -0
- package/src/renderer/components/settings/RecordingTab.tsx +84 -0
- package/src/renderer/components/settings/index.ts +9 -0
- package/src/renderer/components/settings/settingsStyles.ts +673 -0
- package/src/renderer/components/settings/tabConfig.tsx +85 -0
- package/src/renderer/components/settings/useSettingsPanel.ts +447 -0
- package/src/renderer/contexts/ProcessingContext.tsx +227 -0
- package/src/renderer/contexts/RecordingContext.tsx +683 -0
- package/src/renderer/contexts/UIContext.tsx +326 -0
- package/src/renderer/contexts/index.ts +24 -0
- package/src/renderer/donateMessages.ts +69 -0
- package/src/renderer/hooks/index.ts +75 -0
- package/src/renderer/hooks/useAnimation.tsx +544 -0
- package/src/renderer/hooks/useTheme.ts +313 -0
- package/src/renderer/index.html +26 -0
- package/src/renderer/main.tsx +52 -0
- package/src/renderer/styles/animations.css +1093 -0
- package/src/renderer/styles/app-shell.css +662 -0
- package/src/renderer/styles/globals.css +515 -0
- package/src/renderer/styles/theme.ts +578 -0
- package/src/renderer/types/electron.d.ts +385 -0
- package/src/shared/hotkeys.ts +283 -0
- package/src/shared/types.ts +809 -0
- package/tests/clipboard.test.ts +228 -0
- package/tests/e2e/criticalPaths.test.ts +594 -0
- package/tests/feedbackAnalyzer.test.ts +303 -0
- package/tests/integration/sessionFlow.test.ts +583 -0
- package/tests/markdownGenerator.test.ts +418 -0
- package/tests/output.test.ts +96 -0
- package/tests/setup.ts +486 -0
- package/tests/unit/appIntegration.test.ts +676 -0
- package/tests/unit/appViewState.test.ts +281 -0
- package/tests/unit/audioIpcChannels.test.ts +17 -0
- package/tests/unit/exportService.test.ts +492 -0
- package/tests/unit/hotkeys.test.ts +92 -0
- package/tests/unit/navigationPreload.test.ts +94 -0
- package/tests/unit/onboardingFlow.test.ts +345 -0
- package/tests/unit/permissionManager.test.ts +175 -0
- package/tests/unit/permissionManagerExpanded.test.ts +296 -0
- package/tests/unit/screenRecordingRenderer.test.ts +368 -0
- package/tests/unit/sessionController.test.ts +515 -0
- package/tests/unit/tierManager.test.ts +61 -0
- package/tests/unit/tierManagerExpanded.test.ts +142 -0
- package/tests/unit/transcriptAnalyzer.test.ts +64 -0
- package/tsconfig.json +25 -0
- package/vitest.config.ts +46 -0
|
@@ -0,0 +1,748 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* WhisperService.ts - Local Whisper Transcription (Tier 2)
|
|
3
|
+
*
|
|
4
|
+
* Uses whisper.cpp via whisper-node for on-device transcription.
|
|
5
|
+
* No API key or internet required.
|
|
6
|
+
*
|
|
7
|
+
* Features:
|
|
8
|
+
* - Batch processing of audio chunks
|
|
9
|
+
* - Configurable model and language
|
|
10
|
+
* - Memory-efficient buffer management
|
|
11
|
+
* - Event-based result delivery
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import { EventEmitter } from 'events';
|
|
15
|
+
import { basename, join } from 'path';
|
|
16
|
+
import { app } from 'electron';
|
|
17
|
+
import { existsSync } from 'fs';
|
|
18
|
+
import { readFile, unlink } from 'fs/promises';
|
|
19
|
+
import { execFile } from 'child_process';
|
|
20
|
+
import { promisify } from 'util';
|
|
21
|
+
import { tmpdir } from 'os';
|
|
22
|
+
import { randomUUID } from 'crypto';
|
|
23
|
+
import * as os from 'os';
|
|
24
|
+
import type { WhisperTranscriptResult, WhisperConfig, ErrorCallback } from './types';
|
|
25
|
+
|
|
26
|
+
const execFileAsync = promisify(execFile);
|
|
27
|
+
|
|
28
|
+
// ============================================================================
|
|
29
|
+
// Types
|
|
30
|
+
// ============================================================================
|
|
31
|
+
|
|
32
|
+
type TranscriptCallback = (result: WhisperTranscriptResult) => void;
|
|
33
|
+
|
|
34
|
+
// Whisper-node module type (loaded dynamically)
|
|
35
|
+
interface WhisperModule {
|
|
36
|
+
whisper: (
|
|
37
|
+
samples: Float32Array,
|
|
38
|
+
options: {
|
|
39
|
+
modelPath: string;
|
|
40
|
+
language?: string;
|
|
41
|
+
threads?: number;
|
|
42
|
+
translate?: boolean;
|
|
43
|
+
}
|
|
44
|
+
) => Promise<Array<{ text: string; start: number; end: number }>>;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
// ============================================================================
|
|
48
|
+
// Constants
|
|
49
|
+
// ============================================================================
|
|
50
|
+
|
|
51
|
+
const DEFAULT_CONFIG: WhisperConfig = {
|
|
52
|
+
modelPath: '', // Set dynamically
|
|
53
|
+
language: 'en',
|
|
54
|
+
threads: Math.max(1, Math.floor(os.cpus().length / 2)), // Half CPU cores
|
|
55
|
+
translateToEnglish: false,
|
|
56
|
+
};
|
|
57
|
+
|
|
58
|
+
// Audio buffer configuration
|
|
59
|
+
const CHUNK_DURATION_MS = 3000; // Process 3 seconds at a time
|
|
60
|
+
const MAX_BUFFER_DURATION_MS = 30000; // Max 30 seconds before force-processing
|
|
61
|
+
const MAX_BUFFER_SIZE_BYTES = 500 * 1024; // 500KB cap as per audit
|
|
62
|
+
const SAMPLE_RATE = 16000; // 16kHz mono
|
|
63
|
+
const FILE_CHUNK_DURATION_SEC = 30; // 30 seconds per chunk for file transcription
|
|
64
|
+
const FILE_CHUNK_SAMPLES = FILE_CHUNK_DURATION_SEC * SAMPLE_RATE;
|
|
65
|
+
const MODEL_MEMORY_REQUIREMENTS_BYTES: Record<string, number> = {
|
|
66
|
+
'ggml-tiny.bin': 450 * 1024 * 1024,
|
|
67
|
+
'ggml-base.bin': 800 * 1024 * 1024,
|
|
68
|
+
'ggml-small.bin': 1400 * 1024 * 1024,
|
|
69
|
+
'ggml-medium.bin': 2800 * 1024 * 1024,
|
|
70
|
+
'ggml-large-v3.bin': 5200 * 1024 * 1024,
|
|
71
|
+
};
|
|
72
|
+
|
|
73
|
+
// ============================================================================
|
|
74
|
+
// WhisperService Class
|
|
75
|
+
// ============================================================================
|
|
76
|
+
|
|
77
|
+
export class WhisperService extends EventEmitter {
|
|
78
|
+
private config: WhisperConfig;
|
|
79
|
+
private isInitialized: boolean = false;
|
|
80
|
+
private isProcessing: boolean = false;
|
|
81
|
+
private whisperModule: WhisperModule | null = null;
|
|
82
|
+
|
|
83
|
+
// Audio buffering for batch processing
|
|
84
|
+
private audioBuffer: Float32Array[] = [];
|
|
85
|
+
private bufferStartTime: number = 0;
|
|
86
|
+
private totalBufferDuration: number = 0;
|
|
87
|
+
private totalBufferBytes: number = 0;
|
|
88
|
+
|
|
89
|
+
// Processing state
|
|
90
|
+
private processingInterval: ReturnType<typeof setInterval> | null = null;
|
|
91
|
+
|
|
92
|
+
// Callbacks
|
|
93
|
+
private transcriptCallbacks: TranscriptCallback[] = [];
|
|
94
|
+
private errorCallbacks: ErrorCallback[] = [];
|
|
95
|
+
|
|
96
|
+
constructor(config?: Partial<WhisperConfig>) {
|
|
97
|
+
super();
|
|
98
|
+
this.config = { ...DEFAULT_CONFIG, ...config };
|
|
99
|
+
|
|
100
|
+
// Set default model path if not specified
|
|
101
|
+
if (!this.config.modelPath) {
|
|
102
|
+
this.config.modelPath = this.getDefaultModelPath();
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
// ============================================================================
|
|
107
|
+
// Public API
|
|
108
|
+
// ============================================================================
|
|
109
|
+
|
|
110
|
+
/**
|
|
111
|
+
* Check if Whisper model is available
|
|
112
|
+
*/
|
|
113
|
+
isModelAvailable(): boolean {
|
|
114
|
+
return existsSync(this.config.modelPath);
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
/**
|
|
118
|
+
* Get the path where models should be stored
|
|
119
|
+
*/
|
|
120
|
+
getModelsDirectory(): string {
|
|
121
|
+
try {
|
|
122
|
+
return join(app.getPath('userData'), 'whisper-models');
|
|
123
|
+
} catch {
|
|
124
|
+
const homeDir = process.env.HOME || process.env.USERPROFILE || '/tmp';
|
|
125
|
+
return join(homeDir, '.markupr', 'whisper-models');
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
/**
|
|
130
|
+
* Get the default model path (whisper-medium)
|
|
131
|
+
*/
|
|
132
|
+
getDefaultModelPath(): string {
|
|
133
|
+
return join(this.getModelsDirectory(), 'ggml-medium.bin');
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
/**
|
|
137
|
+
* Set the model path
|
|
138
|
+
*/
|
|
139
|
+
setModelPath(modelPath: string): void {
|
|
140
|
+
this.config.modelPath = modelPath;
|
|
141
|
+
this.isInitialized = false; // Need to reinitialize with new model
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
/**
|
|
145
|
+
* Check if system has enough memory for Whisper
|
|
146
|
+
* Requirement is model-aware (tiny/base/small/medium/large).
|
|
147
|
+
*/
|
|
148
|
+
hasEnoughMemory(): boolean {
|
|
149
|
+
const freeMemory = os.freemem();
|
|
150
|
+
const requiredMemory = this.getRequiredMemoryBytes();
|
|
151
|
+
return freeMemory >= requiredMemory;
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
/**
|
|
155
|
+
* Get current memory info
|
|
156
|
+
*/
|
|
157
|
+
getMemoryInfo(): { freeMemoryMB: number; requiredMemoryMB: number; sufficient: boolean } {
|
|
158
|
+
const freeMemory = os.freemem();
|
|
159
|
+
const requiredMemory = this.getRequiredMemoryBytes();
|
|
160
|
+
return {
|
|
161
|
+
freeMemoryMB: Math.round(freeMemory / 1024 / 1024),
|
|
162
|
+
requiredMemoryMB: Math.round(requiredMemory / 1024 / 1024),
|
|
163
|
+
sufficient: freeMemory >= requiredMemory,
|
|
164
|
+
};
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
/**
|
|
168
|
+
* Initialize the Whisper model
|
|
169
|
+
* Call this once before starting transcription
|
|
170
|
+
*/
|
|
171
|
+
async initialize(): Promise<void> {
|
|
172
|
+
if (this.isInitialized) {
|
|
173
|
+
return;
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
if (!this.isModelAvailable()) {
|
|
177
|
+
throw new Error(`Whisper model not found at ${this.config.modelPath}. Please download the model first.`);
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
if (!this.hasEnoughMemory()) {
|
|
181
|
+
const memInfo = this.getMemoryInfo();
|
|
182
|
+
throw new Error(
|
|
183
|
+
`Insufficient memory for Whisper. Need ~${memInfo.requiredMemoryMB}MB free, only ${memInfo.freeMemoryMB}MB available.`
|
|
184
|
+
);
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
this.log('Initializing Whisper model...');
|
|
188
|
+
|
|
189
|
+
try {
|
|
190
|
+
// Dynamically import whisper-node to avoid startup crashes if not installed
|
|
191
|
+
// @ts-expect-error - whisper-node may not have types
|
|
192
|
+
this.whisperModule = await import('whisper-node');
|
|
193
|
+
|
|
194
|
+
// Verify the module loaded correctly
|
|
195
|
+
if (!this.whisperModule || typeof this.whisperModule.whisper !== 'function') {
|
|
196
|
+
throw new Error('whisper-node module loaded but whisper function not found');
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
// Do a test transcription with tiny audio to pre-load the model
|
|
200
|
+
this.log('Pre-loading model with test transcription...');
|
|
201
|
+
const testBuffer = new Float32Array(1600); // 16kHz * 0.1s = 100ms of silence
|
|
202
|
+
|
|
203
|
+
await this.whisperModule.whisper(testBuffer, {
|
|
204
|
+
modelPath: this.config.modelPath,
|
|
205
|
+
language: this.config.language,
|
|
206
|
+
threads: this.config.threads,
|
|
207
|
+
});
|
|
208
|
+
|
|
209
|
+
this.isInitialized = true;
|
|
210
|
+
this.log('Whisper model initialized successfully');
|
|
211
|
+
} catch (error) {
|
|
212
|
+
const initError = new Error(`Failed to initialize Whisper: ${(error as Error).message}`);
|
|
213
|
+
this.errorCallbacks.forEach((cb) => cb(initError));
|
|
214
|
+
throw initError;
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
/**
|
|
219
|
+
* Check if service is initialized and ready
|
|
220
|
+
*/
|
|
221
|
+
isReady(): boolean {
|
|
222
|
+
return this.isInitialized && this.whisperModule !== null;
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
/**
|
|
226
|
+
* Start accepting audio for transcription
|
|
227
|
+
*/
|
|
228
|
+
async start(): Promise<void> {
|
|
229
|
+
if (!this.isInitialized) {
|
|
230
|
+
await this.initialize();
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
// Reset buffer state
|
|
234
|
+
this.audioBuffer = [];
|
|
235
|
+
this.bufferStartTime = Date.now();
|
|
236
|
+
this.totalBufferDuration = 0;
|
|
237
|
+
this.totalBufferBytes = 0;
|
|
238
|
+
|
|
239
|
+
// Start periodic processing
|
|
240
|
+
this.processingInterval = setInterval(() => {
|
|
241
|
+
this.processBufferedAudio();
|
|
242
|
+
}, CHUNK_DURATION_MS);
|
|
243
|
+
|
|
244
|
+
this.log('Whisper transcription started');
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
/**
|
|
248
|
+
* Stop transcription and process remaining audio
|
|
249
|
+
*/
|
|
250
|
+
async stop(): Promise<void> {
|
|
251
|
+
// Clear processing interval
|
|
252
|
+
if (this.processingInterval) {
|
|
253
|
+
clearInterval(this.processingInterval);
|
|
254
|
+
this.processingInterval = null;
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
// Process any remaining audio
|
|
258
|
+
await this.processBufferedAudio(true);
|
|
259
|
+
|
|
260
|
+
// Clear buffer
|
|
261
|
+
this.audioBuffer = [];
|
|
262
|
+
this.totalBufferDuration = 0;
|
|
263
|
+
this.totalBufferBytes = 0;
|
|
264
|
+
|
|
265
|
+
this.log('Whisper transcription stopped');
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
/**
|
|
269
|
+
* Add audio data to the buffer
|
|
270
|
+
* @param samples Float32Array of audio samples at 16kHz mono
|
|
271
|
+
* @param durationMs Duration of this chunk in milliseconds
|
|
272
|
+
*/
|
|
273
|
+
addAudio(samples: Float32Array, durationMs: number): void {
|
|
274
|
+
const chunkBytes = samples.byteLength;
|
|
275
|
+
|
|
276
|
+
// Enforce buffer size cap (500KB as per audit CRIT-006)
|
|
277
|
+
if (this.totalBufferBytes + chunkBytes > MAX_BUFFER_SIZE_BYTES) {
|
|
278
|
+
this.log('Audio buffer full, force-processing before adding new audio');
|
|
279
|
+
this.processBufferedAudio(true);
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
this.audioBuffer.push(samples);
|
|
283
|
+
this.totalBufferDuration += durationMs;
|
|
284
|
+
this.totalBufferBytes += chunkBytes;
|
|
285
|
+
|
|
286
|
+
// Force process if buffer duration is too long
|
|
287
|
+
if (this.totalBufferDuration >= MAX_BUFFER_DURATION_MS) {
|
|
288
|
+
this.processBufferedAudio(true);
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
/**
|
|
293
|
+
* Register callback for transcript results
|
|
294
|
+
*/
|
|
295
|
+
onTranscript(callback: TranscriptCallback): () => void {
|
|
296
|
+
this.transcriptCallbacks.push(callback);
|
|
297
|
+
return () => {
|
|
298
|
+
this.transcriptCallbacks = this.transcriptCallbacks.filter((cb) => cb !== callback);
|
|
299
|
+
};
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
/**
|
|
303
|
+
* Register callback for errors
|
|
304
|
+
*/
|
|
305
|
+
onError(callback: ErrorCallback): () => void {
|
|
306
|
+
this.errorCallbacks.push(callback);
|
|
307
|
+
return () => {
|
|
308
|
+
this.errorCallbacks = this.errorCallbacks.filter((cb) => cb !== callback);
|
|
309
|
+
};
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
/**
|
|
313
|
+
* Get current configuration
|
|
314
|
+
*/
|
|
315
|
+
getConfig(): WhisperConfig {
|
|
316
|
+
return { ...this.config };
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
/**
|
|
320
|
+
* Transcribe a complete Float32 buffer in one pass.
|
|
321
|
+
* Useful for post-session retry workflows when live streaming failed.
|
|
322
|
+
*/
|
|
323
|
+
async transcribeSamples(
|
|
324
|
+
samples: Float32Array,
|
|
325
|
+
startTimeSec: number
|
|
326
|
+
): Promise<WhisperTranscriptResult[]> {
|
|
327
|
+
if (!this.isInitialized) {
|
|
328
|
+
await this.initialize();
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
if (!this.whisperModule) {
|
|
332
|
+
throw new Error('Whisper module not loaded');
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
const result = await this.whisperModule.whisper(samples, {
|
|
336
|
+
modelPath: this.config.modelPath,
|
|
337
|
+
language: this.config.language,
|
|
338
|
+
threads: this.config.threads,
|
|
339
|
+
translate: this.config.translateToEnglish,
|
|
340
|
+
});
|
|
341
|
+
|
|
342
|
+
if (!result || result.length === 0) {
|
|
343
|
+
return [];
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
return result
|
|
347
|
+
.map((segment) => ({
|
|
348
|
+
text: segment.text.trim(),
|
|
349
|
+
startTime: startTimeSec + segment.start,
|
|
350
|
+
endTime: startTimeSec + segment.end,
|
|
351
|
+
confidence: 0.9,
|
|
352
|
+
}))
|
|
353
|
+
.filter((segment) => segment.text.length > 0);
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
/**
|
|
357
|
+
* Transcribe an audio file from disk.
|
|
358
|
+
* Loads the file, converts to Float32Array at 16kHz mono, and transcribes.
|
|
359
|
+
* For large files, processes in chunks to manage memory.
|
|
360
|
+
*
|
|
361
|
+
* @param audioPath - Path to the audio file (webm, wav, ogg, m4a)
|
|
362
|
+
* @param onProgress - Optional progress callback (0-100)
|
|
363
|
+
* @returns Array of transcript results with timestamps
|
|
364
|
+
*/
|
|
365
|
+
async transcribeFile(
|
|
366
|
+
audioPath: string,
|
|
367
|
+
onProgress?: (percent: number) => void
|
|
368
|
+
): Promise<WhisperTranscriptResult[]> {
|
|
369
|
+
if (!this.isInitialized) {
|
|
370
|
+
await this.initialize();
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
if (!existsSync(audioPath)) {
|
|
374
|
+
throw new Error(`Audio file not found: ${audioPath}`);
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
this.log(`Transcribing file: ${audioPath}`);
|
|
378
|
+
onProgress?.(0);
|
|
379
|
+
|
|
380
|
+
// Get Float32Array samples from the file
|
|
381
|
+
const samples = await this.loadAudioAsSamples(audioPath);
|
|
382
|
+
|
|
383
|
+
if (samples.length === 0) {
|
|
384
|
+
this.log('Audio file produced no samples');
|
|
385
|
+
onProgress?.(100);
|
|
386
|
+
return [];
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
// Split into 30-second chunks and transcribe each
|
|
390
|
+
const totalChunks = Math.ceil(samples.length / FILE_CHUNK_SAMPLES);
|
|
391
|
+
const results: WhisperTranscriptResult[] = [];
|
|
392
|
+
|
|
393
|
+
this.log(`Processing ${totalChunks} chunk(s) (${(samples.length / SAMPLE_RATE).toFixed(1)}s total)`);
|
|
394
|
+
|
|
395
|
+
for (let i = 0; i < totalChunks; i++) {
|
|
396
|
+
const chunkStart = i * FILE_CHUNK_SAMPLES;
|
|
397
|
+
const chunkEnd = Math.min(chunkStart + FILE_CHUNK_SAMPLES, samples.length);
|
|
398
|
+
const chunk = samples.subarray(chunkStart, chunkEnd);
|
|
399
|
+
const startTimeSec = chunkStart / SAMPLE_RATE;
|
|
400
|
+
|
|
401
|
+
const chunkResults = await this.transcribeSamples(chunk, startTimeSec);
|
|
402
|
+
results.push(...chunkResults);
|
|
403
|
+
|
|
404
|
+
const percent = Math.round(((i + 1) / totalChunks) * 100);
|
|
405
|
+
onProgress?.(percent);
|
|
406
|
+
|
|
407
|
+
// Yield between chunks to avoid blocking the event loop
|
|
408
|
+
if (i < totalChunks - 1) {
|
|
409
|
+
await new Promise((resolve) => setTimeout(resolve, 0));
|
|
410
|
+
}
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
this.log(`Transcription complete: ${results.length} segment(s)`);
|
|
414
|
+
return results;
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
/**
|
|
418
|
+
* Check if ffmpeg is available on the system
|
|
419
|
+
*/
|
|
420
|
+
async isFfmpegAvailable(): Promise<boolean> {
|
|
421
|
+
try {
|
|
422
|
+
await execFileAsync('ffmpeg', ['-version']);
|
|
423
|
+
return true;
|
|
424
|
+
} catch {
|
|
425
|
+
return false;
|
|
426
|
+
}
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
// ============================================================================
|
|
430
|
+
// Private Methods
|
|
431
|
+
// ============================================================================
|
|
432
|
+
|
|
433
|
+
/**
|
|
434
|
+
* Load an audio file and return Float32Array samples at 16kHz mono.
|
|
435
|
+
* WAV files are parsed directly; other formats are converted via ffmpeg.
|
|
436
|
+
*/
|
|
437
|
+
private async loadAudioAsSamples(audioPath: string): Promise<Float32Array> {
|
|
438
|
+
const ext = audioPath.toLowerCase().split('.').pop() ?? '';
|
|
439
|
+
|
|
440
|
+
if (ext === 'wav') {
|
|
441
|
+
return this.parseWavFile(audioPath);
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
// For non-WAV formats (webm, ogg, m4a, etc.), convert via ffmpeg
|
|
445
|
+
return this.convertWithFfmpeg(audioPath);
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
/**
|
|
449
|
+
* Parse a WAV file and extract PCM data as Float32Array at 16kHz mono.
|
|
450
|
+
* Handles PCM float32 and PCM int16 formats.
|
|
451
|
+
*/
|
|
452
|
+
private async parseWavFile(wavPath: string): Promise<Float32Array> {
|
|
453
|
+
const buffer = await readFile(wavPath);
|
|
454
|
+
|
|
455
|
+
// Validate RIFF/WAVE header
|
|
456
|
+
const riff = buffer.toString('ascii', 0, 4);
|
|
457
|
+
const wave = buffer.toString('ascii', 8, 12);
|
|
458
|
+
if (riff !== 'RIFF' || wave !== 'WAVE') {
|
|
459
|
+
throw new Error(`Invalid WAV file: missing RIFF/WAVE header in ${wavPath}`);
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
// Find the 'fmt ' sub-chunk
|
|
463
|
+
let offset = 12;
|
|
464
|
+
let audioFormat = 0;
|
|
465
|
+
let numChannels = 0;
|
|
466
|
+
let sampleRate = 0;
|
|
467
|
+
let bitsPerSample = 0;
|
|
468
|
+
let fmtFound = false;
|
|
469
|
+
|
|
470
|
+
while (offset < buffer.length - 8) {
|
|
471
|
+
const chunkId = buffer.toString('ascii', offset, offset + 4);
|
|
472
|
+
const chunkSize = buffer.readUInt32LE(offset + 4);
|
|
473
|
+
|
|
474
|
+
if (chunkId === 'fmt ') {
|
|
475
|
+
audioFormat = buffer.readUInt16LE(offset + 8);
|
|
476
|
+
numChannels = buffer.readUInt16LE(offset + 10);
|
|
477
|
+
sampleRate = buffer.readUInt32LE(offset + 12);
|
|
478
|
+
bitsPerSample = buffer.readUInt16LE(offset + 22);
|
|
479
|
+
fmtFound = true;
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
if (chunkId === 'data') {
|
|
483
|
+
if (!fmtFound) {
|
|
484
|
+
throw new Error('WAV file has data chunk before fmt chunk');
|
|
485
|
+
}
|
|
486
|
+
|
|
487
|
+
const dataStart = offset + 8;
|
|
488
|
+
const dataEnd = dataStart + chunkSize;
|
|
489
|
+
const dataSlice = buffer.subarray(dataStart, Math.min(dataEnd, buffer.length));
|
|
490
|
+
|
|
491
|
+
return this.extractWavSamples(dataSlice, audioFormat, numChannels, sampleRate, bitsPerSample);
|
|
492
|
+
}
|
|
493
|
+
|
|
494
|
+
offset += 8 + chunkSize;
|
|
495
|
+
// Chunks are word-aligned
|
|
496
|
+
if (chunkSize % 2 !== 0) {
|
|
497
|
+
offset += 1;
|
|
498
|
+
}
|
|
499
|
+
}
|
|
500
|
+
|
|
501
|
+
throw new Error(`Invalid WAV file: no data chunk found in ${wavPath}`);
|
|
502
|
+
}
|
|
503
|
+
|
|
504
|
+
/**
|
|
505
|
+
* Extract samples from WAV data chunk, converting to Float32Array at 16kHz mono.
|
|
506
|
+
*/
|
|
507
|
+
private extractWavSamples(
|
|
508
|
+
data: Buffer,
|
|
509
|
+
audioFormat: number,
|
|
510
|
+
numChannels: number,
|
|
511
|
+
sampleRate: number,
|
|
512
|
+
bitsPerSample: number
|
|
513
|
+
): Float32Array {
|
|
514
|
+
let monoFloat32: Float32Array;
|
|
515
|
+
|
|
516
|
+
if (audioFormat === 3 && bitsPerSample === 32) {
|
|
517
|
+
// PCM Float32
|
|
518
|
+
const totalSamples = Math.floor(data.length / 4);
|
|
519
|
+
const allSamples = new Float32Array(totalSamples);
|
|
520
|
+
for (let i = 0; i < totalSamples; i++) {
|
|
521
|
+
allSamples[i] = data.readFloatLE(i * 4);
|
|
522
|
+
}
|
|
523
|
+
monoFloat32 = this.mixToMono(allSamples, numChannels);
|
|
524
|
+
} else if (audioFormat === 1 && bitsPerSample === 16) {
|
|
525
|
+
// PCM Int16
|
|
526
|
+
const totalSamples = Math.floor(data.length / 2);
|
|
527
|
+
const allSamples = new Float32Array(totalSamples);
|
|
528
|
+
for (let i = 0; i < totalSamples; i++) {
|
|
529
|
+
allSamples[i] = data.readInt16LE(i * 2) / 32768.0;
|
|
530
|
+
}
|
|
531
|
+
monoFloat32 = this.mixToMono(allSamples, numChannels);
|
|
532
|
+
} else {
|
|
533
|
+
throw new Error(
|
|
534
|
+
`Unsupported WAV format: audioFormat=${audioFormat}, bitsPerSample=${bitsPerSample}. ` +
|
|
535
|
+
`Expected PCM float32 (format=3, bits=32) or PCM int16 (format=1, bits=16).`
|
|
536
|
+
);
|
|
537
|
+
}
|
|
538
|
+
|
|
539
|
+
// Resample to 16kHz if needed
|
|
540
|
+
if (sampleRate !== SAMPLE_RATE) {
|
|
541
|
+
return this.resample(monoFloat32, sampleRate, SAMPLE_RATE);
|
|
542
|
+
}
|
|
543
|
+
|
|
544
|
+
return monoFloat32;
|
|
545
|
+
}
|
|
546
|
+
|
|
547
|
+
/**
|
|
548
|
+
* Mix multi-channel audio down to mono by averaging channels.
|
|
549
|
+
*/
|
|
550
|
+
private mixToMono(samples: Float32Array, numChannels: number): Float32Array {
|
|
551
|
+
if (numChannels === 1) {
|
|
552
|
+
return samples;
|
|
553
|
+
}
|
|
554
|
+
|
|
555
|
+
const monoLength = Math.floor(samples.length / numChannels);
|
|
556
|
+
const mono = new Float32Array(monoLength);
|
|
557
|
+
for (let i = 0; i < monoLength; i++) {
|
|
558
|
+
let sum = 0;
|
|
559
|
+
for (let ch = 0; ch < numChannels; ch++) {
|
|
560
|
+
sum += samples[i * numChannels + ch];
|
|
561
|
+
}
|
|
562
|
+
mono[i] = sum / numChannels;
|
|
563
|
+
}
|
|
564
|
+
return mono;
|
|
565
|
+
}
|
|
566
|
+
|
|
567
|
+
/**
|
|
568
|
+
* Simple linear resampling from one sample rate to another.
|
|
569
|
+
*/
|
|
570
|
+
private resample(samples: Float32Array, fromRate: number, toRate: number): Float32Array {
|
|
571
|
+
if (fromRate === toRate) {
|
|
572
|
+
return samples;
|
|
573
|
+
}
|
|
574
|
+
|
|
575
|
+
const ratio = fromRate / toRate;
|
|
576
|
+
const outputLength = Math.floor(samples.length / ratio);
|
|
577
|
+
const output = new Float32Array(outputLength);
|
|
578
|
+
|
|
579
|
+
for (let i = 0; i < outputLength; i++) {
|
|
580
|
+
const srcIndex = i * ratio;
|
|
581
|
+
const srcIndexFloor = Math.floor(srcIndex);
|
|
582
|
+
const srcIndexCeil = Math.min(srcIndexFloor + 1, samples.length - 1);
|
|
583
|
+
const frac = srcIndex - srcIndexFloor;
|
|
584
|
+
output[i] = samples[srcIndexFloor] * (1 - frac) + samples[srcIndexCeil] * frac;
|
|
585
|
+
}
|
|
586
|
+
|
|
587
|
+
return output;
|
|
588
|
+
}
|
|
589
|
+
|
|
590
|
+
/**
|
|
591
|
+
* Convert a non-WAV audio file to 16kHz mono Float32 WAV using ffmpeg,
|
|
592
|
+
* then parse the resulting WAV.
|
|
593
|
+
*/
|
|
594
|
+
private async convertWithFfmpeg(audioPath: string): Promise<Float32Array> {
|
|
595
|
+
const ffmpegAvailable = await this.isFfmpegAvailable();
|
|
596
|
+
if (!ffmpegAvailable) {
|
|
597
|
+
throw new Error(
|
|
598
|
+
'ffmpeg is not available on this system. ' +
|
|
599
|
+
'ffmpeg is required to transcribe non-WAV audio files (webm, ogg, m4a). ' +
|
|
600
|
+
'Install ffmpeg via: brew install ffmpeg (macOS) or apt install ffmpeg (Linux).'
|
|
601
|
+
);
|
|
602
|
+
}
|
|
603
|
+
|
|
604
|
+
const tempFileName = `markupr-transcode-${randomUUID()}.wav`;
|
|
605
|
+
const tempPath = join(tmpdir(), tempFileName);
|
|
606
|
+
|
|
607
|
+
try {
|
|
608
|
+
this.log(`Converting ${audioPath} to WAV via ffmpeg...`);
|
|
609
|
+
|
|
610
|
+
await execFileAsync('ffmpeg', [
|
|
611
|
+
'-i', audioPath,
|
|
612
|
+
'-ar', String(SAMPLE_RATE),
|
|
613
|
+
'-ac', '1',
|
|
614
|
+
'-f', 'wav',
|
|
615
|
+
'-acodec', 'pcm_f32le',
|
|
616
|
+
'-y',
|
|
617
|
+
tempPath,
|
|
618
|
+
]);
|
|
619
|
+
|
|
620
|
+
this.log('ffmpeg conversion complete, parsing WAV...');
|
|
621
|
+
return await this.parseWavFile(tempPath);
|
|
622
|
+
} catch (error) {
|
|
623
|
+
const msg = error instanceof Error ? error.message : String(error);
|
|
624
|
+
throw new Error(`Failed to convert audio file with ffmpeg: ${msg}`);
|
|
625
|
+
} finally {
|
|
626
|
+
// Clean up temp file
|
|
627
|
+
try {
|
|
628
|
+
await unlink(tempPath);
|
|
629
|
+
} catch {
|
|
630
|
+
// Ignore cleanup errors - temp dir will be cleaned eventually
|
|
631
|
+
}
|
|
632
|
+
}
|
|
633
|
+
}
|
|
634
|
+
|
|
635
|
+
/**
|
|
636
|
+
* Process buffered audio through Whisper
|
|
637
|
+
*/
|
|
638
|
+
private async processBufferedAudio(force: boolean = false): Promise<void> {
|
|
639
|
+
// Skip if already processing or buffer is too small (unless forced)
|
|
640
|
+
if (this.isProcessing) {
|
|
641
|
+
return;
|
|
642
|
+
}
|
|
643
|
+
|
|
644
|
+
if (!force && this.totalBufferDuration < CHUNK_DURATION_MS) {
|
|
645
|
+
return;
|
|
646
|
+
}
|
|
647
|
+
|
|
648
|
+
if (this.audioBuffer.length === 0) {
|
|
649
|
+
return;
|
|
650
|
+
}
|
|
651
|
+
|
|
652
|
+
if (!this.whisperModule) {
|
|
653
|
+
this.logError('Cannot process: Whisper module not loaded');
|
|
654
|
+
return;
|
|
655
|
+
}
|
|
656
|
+
|
|
657
|
+
this.isProcessing = true;
|
|
658
|
+
const processStartTime = this.bufferStartTime;
|
|
659
|
+
|
|
660
|
+
try {
|
|
661
|
+
// Concatenate all buffered audio into a single array
|
|
662
|
+
const totalSamples = this.audioBuffer.reduce((sum, arr) => sum + arr.length, 0);
|
|
663
|
+
const combinedAudio = new Float32Array(totalSamples);
|
|
664
|
+
|
|
665
|
+
let offset = 0;
|
|
666
|
+
for (const chunk of this.audioBuffer) {
|
|
667
|
+
combinedAudio.set(chunk, offset);
|
|
668
|
+
offset += chunk.length;
|
|
669
|
+
}
|
|
670
|
+
|
|
671
|
+
// Clear buffer before processing (to accept new audio while processing)
|
|
672
|
+
const processedDuration = this.totalBufferDuration;
|
|
673
|
+
this.audioBuffer = [];
|
|
674
|
+
this.totalBufferDuration = 0;
|
|
675
|
+
this.totalBufferBytes = 0;
|
|
676
|
+
this.bufferStartTime = Date.now();
|
|
677
|
+
|
|
678
|
+
// Run Whisper transcription
|
|
679
|
+
this.log(`Processing ${Math.round(processedDuration)}ms of audio...`);
|
|
680
|
+
|
|
681
|
+
const result = await this.whisperModule.whisper(combinedAudio, {
|
|
682
|
+
modelPath: this.config.modelPath,
|
|
683
|
+
language: this.config.language,
|
|
684
|
+
threads: this.config.threads,
|
|
685
|
+
translate: this.config.translateToEnglish,
|
|
686
|
+
});
|
|
687
|
+
|
|
688
|
+
// Parse result and emit transcript
|
|
689
|
+
if (result && result.length > 0) {
|
|
690
|
+
for (const segment of result) {
|
|
691
|
+
const transcriptResult: WhisperTranscriptResult = {
|
|
692
|
+
text: segment.text.trim(),
|
|
693
|
+
startTime: processStartTime / 1000 + segment.start,
|
|
694
|
+
endTime: processStartTime / 1000 + segment.end,
|
|
695
|
+
confidence: 0.9, // Whisper doesn't provide confidence, use default
|
|
696
|
+
};
|
|
697
|
+
|
|
698
|
+
if (transcriptResult.text) {
|
|
699
|
+
this.transcriptCallbacks.forEach((cb) => cb(transcriptResult));
|
|
700
|
+
this.emit('transcript', transcriptResult);
|
|
701
|
+
|
|
702
|
+
const preview =
|
|
703
|
+
transcriptResult.text.length > 50
|
|
704
|
+
? `${transcriptResult.text.substring(0, 50)}...`
|
|
705
|
+
: transcriptResult.text;
|
|
706
|
+
this.log(`Transcript: "${preview}"`);
|
|
707
|
+
}
|
|
708
|
+
}
|
|
709
|
+
}
|
|
710
|
+
} catch (error) {
|
|
711
|
+
const transcriptionError = new Error(`Whisper transcription failed: ${(error as Error).message}`);
|
|
712
|
+
this.errorCallbacks.forEach((cb) => cb(transcriptionError));
|
|
713
|
+
this.emit('error', transcriptionError);
|
|
714
|
+
this.logError('Transcription error', error);
|
|
715
|
+
} finally {
|
|
716
|
+
this.isProcessing = false;
|
|
717
|
+
}
|
|
718
|
+
}
|
|
719
|
+
|
|
720
|
+
/**
|
|
721
|
+
* Log helper
|
|
722
|
+
*/
|
|
723
|
+
private log(message: string): void {
|
|
724
|
+
const timestamp = new Date().toISOString();
|
|
725
|
+
console.log(`[WhisperService ${timestamp}] ${message}`);
|
|
726
|
+
}
|
|
727
|
+
|
|
728
|
+
/**
|
|
729
|
+
* Error log helper
|
|
730
|
+
*/
|
|
731
|
+
private logError(message: string, error?: unknown): void {
|
|
732
|
+
const timestamp = new Date().toISOString();
|
|
733
|
+
const errorStr = error instanceof Error ? error.message : String(error);
|
|
734
|
+
console.error(`[WhisperService ${timestamp}] ERROR: ${message} - ${errorStr}`);
|
|
735
|
+
}
|
|
736
|
+
|
|
737
|
+
private getRequiredMemoryBytes(): number {
|
|
738
|
+
const modelName = basename(this.config.modelPath);
|
|
739
|
+
return MODEL_MEMORY_REQUIREMENTS_BYTES[modelName] ?? MODEL_MEMORY_REQUIREMENTS_BYTES['ggml-small.bin'];
|
|
740
|
+
}
|
|
741
|
+
}
|
|
742
|
+
|
|
743
|
+
// ============================================================================
|
|
744
|
+
// Singleton Export
|
|
745
|
+
// ============================================================================
|
|
746
|
+
|
|
747
|
+
export const whisperService = new WhisperService();
|
|
748
|
+
export default WhisperService;
|