markupr 2.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (299) hide show
  1. package/.claude/commands/review-feedback.md +47 -0
  2. package/.eslintrc.json +35 -0
  3. package/.github/CODEOWNERS +16 -0
  4. package/.github/FUNDING.yml +1 -0
  5. package/.github/ISSUE_TEMPLATE/bug_report.md +56 -0
  6. package/.github/ISSUE_TEMPLATE/feature_request.md +54 -0
  7. package/.github/PULL_REQUEST_TEMPLATE.md +89 -0
  8. package/.github/dependabot.yml +70 -0
  9. package/.github/workflows/ci.yml +184 -0
  10. package/.github/workflows/deploy-landing.yml +134 -0
  11. package/.github/workflows/nightly.yml +288 -0
  12. package/.github/workflows/release.yml +318 -0
  13. package/CHANGELOG.md +127 -0
  14. package/CLAUDE.md +137 -0
  15. package/CODE_OF_CONDUCT.md +9 -0
  16. package/CONTRIBUTING.md +390 -0
  17. package/LICENSE +21 -0
  18. package/PRODUCT_VISION.md +277 -0
  19. package/README.md +517 -0
  20. package/SECURITY.md +51 -0
  21. package/SIGNING_INSTRUCTIONS.md +284 -0
  22. package/assets/DMG_BACKGROUND_INSTRUCTIONS.md +130 -0
  23. package/assets/svg-source/dmg-background.svg +70 -0
  24. package/assets/svg-source/icon.svg +20 -0
  25. package/assets/svg-source/tray-icon-processing.svg +7 -0
  26. package/assets/svg-source/tray-icon-recording.svg +7 -0
  27. package/assets/svg-source/tray-icon.svg +6 -0
  28. package/assets/tray-complete.png +0 -0
  29. package/assets/tray-complete@2x.png +0 -0
  30. package/assets/tray-completeTemplate.png +0 -0
  31. package/assets/tray-completeTemplate@2x.png +0 -0
  32. package/assets/tray-error.png +0 -0
  33. package/assets/tray-error@2x.png +0 -0
  34. package/assets/tray-errorTemplate.png +0 -0
  35. package/assets/tray-errorTemplate@2x.png +0 -0
  36. package/assets/tray-icon-processing.png +0 -0
  37. package/assets/tray-icon-processing@2x.png +0 -0
  38. package/assets/tray-icon-processingTemplate.png +0 -0
  39. package/assets/tray-icon-processingTemplate@2x.png +0 -0
  40. package/assets/tray-icon-recording.png +0 -0
  41. package/assets/tray-icon-recording@2x.png +0 -0
  42. package/assets/tray-icon-recordingTemplate.png +0 -0
  43. package/assets/tray-icon-recordingTemplate@2x.png +0 -0
  44. package/assets/tray-icon.png +0 -0
  45. package/assets/tray-icon@2x.png +0 -0
  46. package/assets/tray-iconTemplate.png +0 -0
  47. package/assets/tray-iconTemplate@2x.png +0 -0
  48. package/assets/tray-idle.png +0 -0
  49. package/assets/tray-idle@2x.png +0 -0
  50. package/assets/tray-idleTemplate.png +0 -0
  51. package/assets/tray-idleTemplate@2x.png +0 -0
  52. package/assets/tray-processing-0.png +0 -0
  53. package/assets/tray-processing-0@2x.png +0 -0
  54. package/assets/tray-processing-0Template.png +0 -0
  55. package/assets/tray-processing-0Template@2x.png +0 -0
  56. package/assets/tray-processing-1.png +0 -0
  57. package/assets/tray-processing-1@2x.png +0 -0
  58. package/assets/tray-processing-1Template.png +0 -0
  59. package/assets/tray-processing-1Template@2x.png +0 -0
  60. package/assets/tray-processing-2.png +0 -0
  61. package/assets/tray-processing-2@2x.png +0 -0
  62. package/assets/tray-processing-2Template.png +0 -0
  63. package/assets/tray-processing-2Template@2x.png +0 -0
  64. package/assets/tray-processing-3.png +0 -0
  65. package/assets/tray-processing-3@2x.png +0 -0
  66. package/assets/tray-processing-3Template.png +0 -0
  67. package/assets/tray-processing-3Template@2x.png +0 -0
  68. package/assets/tray-processing.png +0 -0
  69. package/assets/tray-processing@2x.png +0 -0
  70. package/assets/tray-processingTemplate.png +0 -0
  71. package/assets/tray-processingTemplate@2x.png +0 -0
  72. package/assets/tray-recording.png +0 -0
  73. package/assets/tray-recording@2x.png +0 -0
  74. package/assets/tray-recordingTemplate.png +0 -0
  75. package/assets/tray-recordingTemplate@2x.png +0 -0
  76. package/build/DMG_BACKGROUND_SPEC.md +50 -0
  77. package/build/dmg-background.png +0 -0
  78. package/build/dmg-background@2x.png +0 -0
  79. package/build/entitlements.mac.inherit.plist +27 -0
  80. package/build/entitlements.mac.plist +41 -0
  81. package/build/favicon-16.png +0 -0
  82. package/build/favicon-180.png +0 -0
  83. package/build/favicon-192.png +0 -0
  84. package/build/favicon-32.png +0 -0
  85. package/build/favicon-48.png +0 -0
  86. package/build/favicon-512.png +0 -0
  87. package/build/favicon-64.png +0 -0
  88. package/build/icon-128.png +0 -0
  89. package/build/icon-16.png +0 -0
  90. package/build/icon-24.png +0 -0
  91. package/build/icon-256.png +0 -0
  92. package/build/icon-32.png +0 -0
  93. package/build/icon-48.png +0 -0
  94. package/build/icon-64.png +0 -0
  95. package/build/icon.icns +0 -0
  96. package/build/icon.ico +0 -0
  97. package/build/icon.iconset/icon_128x128.png +0 -0
  98. package/build/icon.iconset/icon_128x128@2x.png +0 -0
  99. package/build/icon.iconset/icon_16x16.png +0 -0
  100. package/build/icon.iconset/icon_16x16@2x.png +0 -0
  101. package/build/icon.iconset/icon_256x256.png +0 -0
  102. package/build/icon.iconset/icon_256x256@2x.png +0 -0
  103. package/build/icon.iconset/icon_32x32.png +0 -0
  104. package/build/icon.iconset/icon_32x32@2x.png +0 -0
  105. package/build/icon.iconset/icon_512x512.png +0 -0
  106. package/build/icon.iconset/icon_512x512@2x.png +0 -0
  107. package/build/icon.png +0 -0
  108. package/build/installer-header.bmp +0 -0
  109. package/build/installer-header.png +0 -0
  110. package/build/installer-sidebar.bmp +0 -0
  111. package/build/installer-sidebar.png +0 -0
  112. package/build/installer.nsh +45 -0
  113. package/build/overlay-processing.png +0 -0
  114. package/build/overlay-recording.png +0 -0
  115. package/build/toolbar-record.png +0 -0
  116. package/build/toolbar-screenshot.png +0 -0
  117. package/build/toolbar-settings.png +0 -0
  118. package/build/toolbar-stop.png +0 -0
  119. package/dist/main/index.mjs +12612 -0
  120. package/dist/preload/index.mjs +907 -0
  121. package/dist/renderer/assets/index-CCmUjl9K.js +19495 -0
  122. package/dist/renderer/assets/index-CUqz_Gs6.css +2270 -0
  123. package/dist/renderer/index.html +27 -0
  124. package/docs/AI_AGENT_QUICKSTART.md +42 -0
  125. package/docs/AI_PIPELINE_DESIGN.md +595 -0
  126. package/docs/API.md +514 -0
  127. package/docs/ARCHITECTURE.md +460 -0
  128. package/docs/CONFIGURATION.md +336 -0
  129. package/docs/DEVELOPMENT.md +508 -0
  130. package/docs/EXPORT_FORMATS.md +451 -0
  131. package/docs/GETTING_STARTED.md +236 -0
  132. package/docs/KEYBOARD_SHORTCUTS.md +334 -0
  133. package/docs/TROUBLESHOOTING.md +418 -0
  134. package/docs/landing/index.html +672 -0
  135. package/docs/landing/script.js +342 -0
  136. package/docs/landing/styles.css +1543 -0
  137. package/electron-builder.yml +140 -0
  138. package/electron.vite.config.ts +63 -0
  139. package/package.json +108 -0
  140. package/railway.json +12 -0
  141. package/scripts/build.mjs +51 -0
  142. package/scripts/generate-icons.mjs +314 -0
  143. package/scripts/generate-installer-images.cjs +253 -0
  144. package/scripts/generate-tray-icons.mjs +258 -0
  145. package/scripts/notarize.cjs +180 -0
  146. package/scripts/one-click-clean-test.sh +147 -0
  147. package/scripts/postinstall.mjs +36 -0
  148. package/scripts/setup-markupr.sh +55 -0
  149. package/setup +17 -0
  150. package/site/index.html +1835 -0
  151. package/site/package.json +11 -0
  152. package/site/railway.json +12 -0
  153. package/site/server.js +31 -0
  154. package/src/main/AutoUpdater.ts +392 -0
  155. package/src/main/CrashRecovery.ts +655 -0
  156. package/src/main/ErrorHandler.ts +703 -0
  157. package/src/main/HotkeyManager.ts +399 -0
  158. package/src/main/MenuManager.ts +529 -0
  159. package/src/main/PermissionManager.ts +420 -0
  160. package/src/main/SessionController.ts +1465 -0
  161. package/src/main/TrayManager.ts +540 -0
  162. package/src/main/ai/AIPipelineManager.ts +199 -0
  163. package/src/main/ai/ClaudeAnalyzer.ts +339 -0
  164. package/src/main/ai/ImageOptimizer.ts +176 -0
  165. package/src/main/ai/StructuredMarkdownBuilder.ts +379 -0
  166. package/src/main/ai/index.ts +16 -0
  167. package/src/main/ai/types.ts +258 -0
  168. package/src/main/analysis/ClarificationGenerator.ts +385 -0
  169. package/src/main/analysis/FeedbackAnalyzer.ts +531 -0
  170. package/src/main/analysis/index.ts +19 -0
  171. package/src/main/audio/AudioCapture.ts +978 -0
  172. package/src/main/audio/audioUtils.ts +100 -0
  173. package/src/main/audio/index.ts +20 -0
  174. package/src/main/capture/index.ts +1 -0
  175. package/src/main/index.ts +1693 -0
  176. package/src/main/ipc/captureHandlers.ts +272 -0
  177. package/src/main/ipc/index.ts +45 -0
  178. package/src/main/ipc/outputHandlers.ts +302 -0
  179. package/src/main/ipc/sessionHandlers.ts +56 -0
  180. package/src/main/ipc/settingsHandlers.ts +471 -0
  181. package/src/main/ipc/types.ts +56 -0
  182. package/src/main/ipc/windowHandlers.ts +277 -0
  183. package/src/main/output/ClipboardService.ts +369 -0
  184. package/src/main/output/ExportService.ts +539 -0
  185. package/src/main/output/FileManager.ts +416 -0
  186. package/src/main/output/MarkdownGenerator.ts +791 -0
  187. package/src/main/output/MarkdownPatcher.ts +299 -0
  188. package/src/main/output/index.ts +186 -0
  189. package/src/main/output/sessionAdapter.ts +207 -0
  190. package/src/main/output/templates/html-template.ts +553 -0
  191. package/src/main/pipeline/FrameExtractor.ts +330 -0
  192. package/src/main/pipeline/PostProcessor.ts +399 -0
  193. package/src/main/pipeline/TranscriptAnalyzer.ts +226 -0
  194. package/src/main/pipeline/index.ts +36 -0
  195. package/src/main/platform/WindowsTaskbar.ts +600 -0
  196. package/src/main/platform/index.ts +16 -0
  197. package/src/main/settings/SettingsManager.ts +730 -0
  198. package/src/main/settings/index.ts +19 -0
  199. package/src/main/transcription/ModelDownloadManager.ts +494 -0
  200. package/src/main/transcription/TierManager.ts +219 -0
  201. package/src/main/transcription/TranscriptionRecoveryService.ts +340 -0
  202. package/src/main/transcription/WhisperService.ts +748 -0
  203. package/src/main/transcription/index.ts +56 -0
  204. package/src/main/transcription/types.ts +135 -0
  205. package/src/main/windows/PopoverManager.ts +284 -0
  206. package/src/main/windows/TaskbarIntegration.ts +452 -0
  207. package/src/main/windows/index.ts +23 -0
  208. package/src/preload/index.ts +1047 -0
  209. package/src/renderer/App.tsx +515 -0
  210. package/src/renderer/AppWrapper.tsx +28 -0
  211. package/src/renderer/assets/logo-dark.svg +7 -0
  212. package/src/renderer/assets/logo.svg +7 -0
  213. package/src/renderer/audio/AudioCaptureRenderer.ts +454 -0
  214. package/src/renderer/capture/ScreenRecordingRenderer.ts +492 -0
  215. package/src/renderer/components/AnnotationOverlay.tsx +836 -0
  216. package/src/renderer/components/AudioWaveform.tsx +811 -0
  217. package/src/renderer/components/ClarificationQuestions.tsx +656 -0
  218. package/src/renderer/components/CountdownTimer.tsx +495 -0
  219. package/src/renderer/components/CrashRecoveryDialog.tsx +632 -0
  220. package/src/renderer/components/DonateButton.tsx +127 -0
  221. package/src/renderer/components/ErrorBoundary.tsx +308 -0
  222. package/src/renderer/components/ExportDialog.tsx +872 -0
  223. package/src/renderer/components/HotkeyHint.tsx +261 -0
  224. package/src/renderer/components/KeyboardShortcuts.tsx +787 -0
  225. package/src/renderer/components/ModelDownloadDialog.tsx +844 -0
  226. package/src/renderer/components/Onboarding.tsx +1830 -0
  227. package/src/renderer/components/ProcessingOverlay.tsx +157 -0
  228. package/src/renderer/components/RecordingOverlay.tsx +423 -0
  229. package/src/renderer/components/SessionHistory.tsx +1746 -0
  230. package/src/renderer/components/SessionReview.tsx +1321 -0
  231. package/src/renderer/components/SettingsPanel.tsx +217 -0
  232. package/src/renderer/components/Skeleton.tsx +347 -0
  233. package/src/renderer/components/StatusIndicator.tsx +86 -0
  234. package/src/renderer/components/ThemeProvider.tsx +429 -0
  235. package/src/renderer/components/Tooltip.tsx +370 -0
  236. package/src/renderer/components/TranscriptionPreview.tsx +183 -0
  237. package/src/renderer/components/TranscriptionTierSelector.tsx +640 -0
  238. package/src/renderer/components/UpdateNotification.tsx +377 -0
  239. package/src/renderer/components/WindowSelector.tsx +947 -0
  240. package/src/renderer/components/index.ts +99 -0
  241. package/src/renderer/components/primitives/ApiKeyInput.tsx +98 -0
  242. package/src/renderer/components/primitives/ColorPicker.tsx +65 -0
  243. package/src/renderer/components/primitives/DangerButton.tsx +45 -0
  244. package/src/renderer/components/primitives/DirectoryPicker.tsx +41 -0
  245. package/src/renderer/components/primitives/Dropdown.tsx +34 -0
  246. package/src/renderer/components/primitives/KeyRecorder.tsx +117 -0
  247. package/src/renderer/components/primitives/SettingsSection.tsx +32 -0
  248. package/src/renderer/components/primitives/Slider.tsx +43 -0
  249. package/src/renderer/components/primitives/Toggle.tsx +36 -0
  250. package/src/renderer/components/primitives/index.ts +10 -0
  251. package/src/renderer/components/settings/AdvancedTab.tsx +174 -0
  252. package/src/renderer/components/settings/AppearanceTab.tsx +77 -0
  253. package/src/renderer/components/settings/GeneralTab.tsx +40 -0
  254. package/src/renderer/components/settings/HotkeysTab.tsx +79 -0
  255. package/src/renderer/components/settings/RecordingTab.tsx +84 -0
  256. package/src/renderer/components/settings/index.ts +9 -0
  257. package/src/renderer/components/settings/settingsStyles.ts +673 -0
  258. package/src/renderer/components/settings/tabConfig.tsx +85 -0
  259. package/src/renderer/components/settings/useSettingsPanel.ts +447 -0
  260. package/src/renderer/contexts/ProcessingContext.tsx +227 -0
  261. package/src/renderer/contexts/RecordingContext.tsx +683 -0
  262. package/src/renderer/contexts/UIContext.tsx +326 -0
  263. package/src/renderer/contexts/index.ts +24 -0
  264. package/src/renderer/donateMessages.ts +69 -0
  265. package/src/renderer/hooks/index.ts +75 -0
  266. package/src/renderer/hooks/useAnimation.tsx +544 -0
  267. package/src/renderer/hooks/useTheme.ts +313 -0
  268. package/src/renderer/index.html +26 -0
  269. package/src/renderer/main.tsx +52 -0
  270. package/src/renderer/styles/animations.css +1093 -0
  271. package/src/renderer/styles/app-shell.css +662 -0
  272. package/src/renderer/styles/globals.css +515 -0
  273. package/src/renderer/styles/theme.ts +578 -0
  274. package/src/renderer/types/electron.d.ts +385 -0
  275. package/src/shared/hotkeys.ts +283 -0
  276. package/src/shared/types.ts +809 -0
  277. package/tests/clipboard.test.ts +228 -0
  278. package/tests/e2e/criticalPaths.test.ts +594 -0
  279. package/tests/feedbackAnalyzer.test.ts +303 -0
  280. package/tests/integration/sessionFlow.test.ts +583 -0
  281. package/tests/markdownGenerator.test.ts +418 -0
  282. package/tests/output.test.ts +96 -0
  283. package/tests/setup.ts +486 -0
  284. package/tests/unit/appIntegration.test.ts +676 -0
  285. package/tests/unit/appViewState.test.ts +281 -0
  286. package/tests/unit/audioIpcChannels.test.ts +17 -0
  287. package/tests/unit/exportService.test.ts +492 -0
  288. package/tests/unit/hotkeys.test.ts +92 -0
  289. package/tests/unit/navigationPreload.test.ts +94 -0
  290. package/tests/unit/onboardingFlow.test.ts +345 -0
  291. package/tests/unit/permissionManager.test.ts +175 -0
  292. package/tests/unit/permissionManagerExpanded.test.ts +296 -0
  293. package/tests/unit/screenRecordingRenderer.test.ts +368 -0
  294. package/tests/unit/sessionController.test.ts +515 -0
  295. package/tests/unit/tierManager.test.ts +61 -0
  296. package/tests/unit/tierManagerExpanded.test.ts +142 -0
  297. package/tests/unit/transcriptAnalyzer.test.ts +64 -0
  298. package/tsconfig.json +25 -0
  299. package/vitest.config.ts +46 -0
@@ -0,0 +1,748 @@
1
+ /**
2
+ * WhisperService.ts - Local Whisper Transcription (Tier 2)
3
+ *
4
+ * Uses whisper.cpp via whisper-node for on-device transcription.
5
+ * No API key or internet required.
6
+ *
7
+ * Features:
8
+ * - Batch processing of audio chunks
9
+ * - Configurable model and language
10
+ * - Memory-efficient buffer management
11
+ * - Event-based result delivery
12
+ */
13
+
14
+ import { EventEmitter } from 'events';
15
+ import { basename, join } from 'path';
16
+ import { app } from 'electron';
17
+ import { existsSync } from 'fs';
18
+ import { readFile, unlink } from 'fs/promises';
19
+ import { execFile } from 'child_process';
20
+ import { promisify } from 'util';
21
+ import { tmpdir } from 'os';
22
+ import { randomUUID } from 'crypto';
23
+ import * as os from 'os';
24
+ import type { WhisperTranscriptResult, WhisperConfig, ErrorCallback } from './types';
25
+
26
+ const execFileAsync = promisify(execFile);
27
+
28
+ // ============================================================================
29
+ // Types
30
+ // ============================================================================
31
+
32
+ type TranscriptCallback = (result: WhisperTranscriptResult) => void;
33
+
34
+ // Whisper-node module type (loaded dynamically)
35
+ interface WhisperModule {
36
+ whisper: (
37
+ samples: Float32Array,
38
+ options: {
39
+ modelPath: string;
40
+ language?: string;
41
+ threads?: number;
42
+ translate?: boolean;
43
+ }
44
+ ) => Promise<Array<{ text: string; start: number; end: number }>>;
45
+ }
46
+
47
+ // ============================================================================
48
+ // Constants
49
+ // ============================================================================
50
+
51
+ const DEFAULT_CONFIG: WhisperConfig = {
52
+ modelPath: '', // Set dynamically
53
+ language: 'en',
54
+ threads: Math.max(1, Math.floor(os.cpus().length / 2)), // Half CPU cores
55
+ translateToEnglish: false,
56
+ };
57
+
58
+ // Audio buffer configuration
59
+ const CHUNK_DURATION_MS = 3000; // Process 3 seconds at a time
60
+ const MAX_BUFFER_DURATION_MS = 30000; // Max 30 seconds before force-processing
61
+ const MAX_BUFFER_SIZE_BYTES = 500 * 1024; // 500KB cap as per audit
62
+ const SAMPLE_RATE = 16000; // 16kHz mono
63
+ const FILE_CHUNK_DURATION_SEC = 30; // 30 seconds per chunk for file transcription
64
+ const FILE_CHUNK_SAMPLES = FILE_CHUNK_DURATION_SEC * SAMPLE_RATE;
65
+ const MODEL_MEMORY_REQUIREMENTS_BYTES: Record<string, number> = {
66
+ 'ggml-tiny.bin': 450 * 1024 * 1024,
67
+ 'ggml-base.bin': 800 * 1024 * 1024,
68
+ 'ggml-small.bin': 1400 * 1024 * 1024,
69
+ 'ggml-medium.bin': 2800 * 1024 * 1024,
70
+ 'ggml-large-v3.bin': 5200 * 1024 * 1024,
71
+ };
72
+
73
+ // ============================================================================
74
+ // WhisperService Class
75
+ // ============================================================================
76
+
77
+ export class WhisperService extends EventEmitter {
78
+ private config: WhisperConfig;
79
+ private isInitialized: boolean = false;
80
+ private isProcessing: boolean = false;
81
+ private whisperModule: WhisperModule | null = null;
82
+
83
+ // Audio buffering for batch processing
84
+ private audioBuffer: Float32Array[] = [];
85
+ private bufferStartTime: number = 0;
86
+ private totalBufferDuration: number = 0;
87
+ private totalBufferBytes: number = 0;
88
+
89
+ // Processing state
90
+ private processingInterval: ReturnType<typeof setInterval> | null = null;
91
+
92
+ // Callbacks
93
+ private transcriptCallbacks: TranscriptCallback[] = [];
94
+ private errorCallbacks: ErrorCallback[] = [];
95
+
96
+ constructor(config?: Partial<WhisperConfig>) {
97
+ super();
98
+ this.config = { ...DEFAULT_CONFIG, ...config };
99
+
100
+ // Set default model path if not specified
101
+ if (!this.config.modelPath) {
102
+ this.config.modelPath = this.getDefaultModelPath();
103
+ }
104
+ }
105
+
106
+ // ============================================================================
107
+ // Public API
108
+ // ============================================================================
109
+
110
+ /**
111
+ * Check if Whisper model is available
112
+ */
113
+ isModelAvailable(): boolean {
114
+ return existsSync(this.config.modelPath);
115
+ }
116
+
117
+ /**
118
+ * Get the path where models should be stored
119
+ */
120
+ getModelsDirectory(): string {
121
+ try {
122
+ return join(app.getPath('userData'), 'whisper-models');
123
+ } catch {
124
+ const homeDir = process.env.HOME || process.env.USERPROFILE || '/tmp';
125
+ return join(homeDir, '.markupr', 'whisper-models');
126
+ }
127
+ }
128
+
129
+ /**
130
+ * Get the default model path (whisper-medium)
131
+ */
132
+ getDefaultModelPath(): string {
133
+ return join(this.getModelsDirectory(), 'ggml-medium.bin');
134
+ }
135
+
136
+ /**
137
+ * Set the model path
138
+ */
139
+ setModelPath(modelPath: string): void {
140
+ this.config.modelPath = modelPath;
141
+ this.isInitialized = false; // Need to reinitialize with new model
142
+ }
143
+
144
+ /**
145
+ * Check if system has enough memory for Whisper
146
+ * Requirement is model-aware (tiny/base/small/medium/large).
147
+ */
148
+ hasEnoughMemory(): boolean {
149
+ const freeMemory = os.freemem();
150
+ const requiredMemory = this.getRequiredMemoryBytes();
151
+ return freeMemory >= requiredMemory;
152
+ }
153
+
154
+ /**
155
+ * Get current memory info
156
+ */
157
+ getMemoryInfo(): { freeMemoryMB: number; requiredMemoryMB: number; sufficient: boolean } {
158
+ const freeMemory = os.freemem();
159
+ const requiredMemory = this.getRequiredMemoryBytes();
160
+ return {
161
+ freeMemoryMB: Math.round(freeMemory / 1024 / 1024),
162
+ requiredMemoryMB: Math.round(requiredMemory / 1024 / 1024),
163
+ sufficient: freeMemory >= requiredMemory,
164
+ };
165
+ }
166
+
167
+ /**
168
+ * Initialize the Whisper model
169
+ * Call this once before starting transcription
170
+ */
171
+ async initialize(): Promise<void> {
172
+ if (this.isInitialized) {
173
+ return;
174
+ }
175
+
176
+ if (!this.isModelAvailable()) {
177
+ throw new Error(`Whisper model not found at ${this.config.modelPath}. Please download the model first.`);
178
+ }
179
+
180
+ if (!this.hasEnoughMemory()) {
181
+ const memInfo = this.getMemoryInfo();
182
+ throw new Error(
183
+ `Insufficient memory for Whisper. Need ~${memInfo.requiredMemoryMB}MB free, only ${memInfo.freeMemoryMB}MB available.`
184
+ );
185
+ }
186
+
187
+ this.log('Initializing Whisper model...');
188
+
189
+ try {
190
+ // Dynamically import whisper-node to avoid startup crashes if not installed
191
+ // @ts-expect-error - whisper-node may not have types
192
+ this.whisperModule = await import('whisper-node');
193
+
194
+ // Verify the module loaded correctly
195
+ if (!this.whisperModule || typeof this.whisperModule.whisper !== 'function') {
196
+ throw new Error('whisper-node module loaded but whisper function not found');
197
+ }
198
+
199
+ // Do a test transcription with tiny audio to pre-load the model
200
+ this.log('Pre-loading model with test transcription...');
201
+ const testBuffer = new Float32Array(1600); // 16kHz * 0.1s = 100ms of silence
202
+
203
+ await this.whisperModule.whisper(testBuffer, {
204
+ modelPath: this.config.modelPath,
205
+ language: this.config.language,
206
+ threads: this.config.threads,
207
+ });
208
+
209
+ this.isInitialized = true;
210
+ this.log('Whisper model initialized successfully');
211
+ } catch (error) {
212
+ const initError = new Error(`Failed to initialize Whisper: ${(error as Error).message}`);
213
+ this.errorCallbacks.forEach((cb) => cb(initError));
214
+ throw initError;
215
+ }
216
+ }
217
+
218
+ /**
219
+ * Check if service is initialized and ready
220
+ */
221
+ isReady(): boolean {
222
+ return this.isInitialized && this.whisperModule !== null;
223
+ }
224
+
225
+ /**
226
+ * Start accepting audio for transcription
227
+ */
228
+ async start(): Promise<void> {
229
+ if (!this.isInitialized) {
230
+ await this.initialize();
231
+ }
232
+
233
+ // Reset buffer state
234
+ this.audioBuffer = [];
235
+ this.bufferStartTime = Date.now();
236
+ this.totalBufferDuration = 0;
237
+ this.totalBufferBytes = 0;
238
+
239
+ // Start periodic processing
240
+ this.processingInterval = setInterval(() => {
241
+ this.processBufferedAudio();
242
+ }, CHUNK_DURATION_MS);
243
+
244
+ this.log('Whisper transcription started');
245
+ }
246
+
247
+ /**
248
+ * Stop transcription and process remaining audio
249
+ */
250
+ async stop(): Promise<void> {
251
+ // Clear processing interval
252
+ if (this.processingInterval) {
253
+ clearInterval(this.processingInterval);
254
+ this.processingInterval = null;
255
+ }
256
+
257
+ // Process any remaining audio
258
+ await this.processBufferedAudio(true);
259
+
260
+ // Clear buffer
261
+ this.audioBuffer = [];
262
+ this.totalBufferDuration = 0;
263
+ this.totalBufferBytes = 0;
264
+
265
+ this.log('Whisper transcription stopped');
266
+ }
267
+
268
+ /**
269
+ * Add audio data to the buffer
270
+ * @param samples Float32Array of audio samples at 16kHz mono
271
+ * @param durationMs Duration of this chunk in milliseconds
272
+ */
273
+ addAudio(samples: Float32Array, durationMs: number): void {
274
+ const chunkBytes = samples.byteLength;
275
+
276
+ // Enforce buffer size cap (500KB as per audit CRIT-006)
277
+ if (this.totalBufferBytes + chunkBytes > MAX_BUFFER_SIZE_BYTES) {
278
+ this.log('Audio buffer full, force-processing before adding new audio');
279
+ this.processBufferedAudio(true);
280
+ }
281
+
282
+ this.audioBuffer.push(samples);
283
+ this.totalBufferDuration += durationMs;
284
+ this.totalBufferBytes += chunkBytes;
285
+
286
+ // Force process if buffer duration is too long
287
+ if (this.totalBufferDuration >= MAX_BUFFER_DURATION_MS) {
288
+ this.processBufferedAudio(true);
289
+ }
290
+ }
291
+
292
+ /**
293
+ * Register callback for transcript results
294
+ */
295
+ onTranscript(callback: TranscriptCallback): () => void {
296
+ this.transcriptCallbacks.push(callback);
297
+ return () => {
298
+ this.transcriptCallbacks = this.transcriptCallbacks.filter((cb) => cb !== callback);
299
+ };
300
+ }
301
+
302
+ /**
303
+ * Register callback for errors
304
+ */
305
+ onError(callback: ErrorCallback): () => void {
306
+ this.errorCallbacks.push(callback);
307
+ return () => {
308
+ this.errorCallbacks = this.errorCallbacks.filter((cb) => cb !== callback);
309
+ };
310
+ }
311
+
312
+ /**
313
+ * Get current configuration
314
+ */
315
+ getConfig(): WhisperConfig {
316
+ return { ...this.config };
317
+ }
318
+
319
+ /**
320
+ * Transcribe a complete Float32 buffer in one pass.
321
+ * Useful for post-session retry workflows when live streaming failed.
322
+ */
323
+ async transcribeSamples(
324
+ samples: Float32Array,
325
+ startTimeSec: number
326
+ ): Promise<WhisperTranscriptResult[]> {
327
+ if (!this.isInitialized) {
328
+ await this.initialize();
329
+ }
330
+
331
+ if (!this.whisperModule) {
332
+ throw new Error('Whisper module not loaded');
333
+ }
334
+
335
+ const result = await this.whisperModule.whisper(samples, {
336
+ modelPath: this.config.modelPath,
337
+ language: this.config.language,
338
+ threads: this.config.threads,
339
+ translate: this.config.translateToEnglish,
340
+ });
341
+
342
+ if (!result || result.length === 0) {
343
+ return [];
344
+ }
345
+
346
+ return result
347
+ .map((segment) => ({
348
+ text: segment.text.trim(),
349
+ startTime: startTimeSec + segment.start,
350
+ endTime: startTimeSec + segment.end,
351
+ confidence: 0.9,
352
+ }))
353
+ .filter((segment) => segment.text.length > 0);
354
+ }
355
+
356
+ /**
357
+ * Transcribe an audio file from disk.
358
+ * Loads the file, converts to Float32Array at 16kHz mono, and transcribes.
359
+ * For large files, processes in chunks to manage memory.
360
+ *
361
+ * @param audioPath - Path to the audio file (webm, wav, ogg, m4a)
362
+ * @param onProgress - Optional progress callback (0-100)
363
+ * @returns Array of transcript results with timestamps
364
+ */
365
+ async transcribeFile(
366
+ audioPath: string,
367
+ onProgress?: (percent: number) => void
368
+ ): Promise<WhisperTranscriptResult[]> {
369
+ if (!this.isInitialized) {
370
+ await this.initialize();
371
+ }
372
+
373
+ if (!existsSync(audioPath)) {
374
+ throw new Error(`Audio file not found: ${audioPath}`);
375
+ }
376
+
377
+ this.log(`Transcribing file: ${audioPath}`);
378
+ onProgress?.(0);
379
+
380
+ // Get Float32Array samples from the file
381
+ const samples = await this.loadAudioAsSamples(audioPath);
382
+
383
+ if (samples.length === 0) {
384
+ this.log('Audio file produced no samples');
385
+ onProgress?.(100);
386
+ return [];
387
+ }
388
+
389
+ // Split into 30-second chunks and transcribe each
390
+ const totalChunks = Math.ceil(samples.length / FILE_CHUNK_SAMPLES);
391
+ const results: WhisperTranscriptResult[] = [];
392
+
393
+ this.log(`Processing ${totalChunks} chunk(s) (${(samples.length / SAMPLE_RATE).toFixed(1)}s total)`);
394
+
395
+ for (let i = 0; i < totalChunks; i++) {
396
+ const chunkStart = i * FILE_CHUNK_SAMPLES;
397
+ const chunkEnd = Math.min(chunkStart + FILE_CHUNK_SAMPLES, samples.length);
398
+ const chunk = samples.subarray(chunkStart, chunkEnd);
399
+ const startTimeSec = chunkStart / SAMPLE_RATE;
400
+
401
+ const chunkResults = await this.transcribeSamples(chunk, startTimeSec);
402
+ results.push(...chunkResults);
403
+
404
+ const percent = Math.round(((i + 1) / totalChunks) * 100);
405
+ onProgress?.(percent);
406
+
407
+ // Yield between chunks to avoid blocking the event loop
408
+ if (i < totalChunks - 1) {
409
+ await new Promise((resolve) => setTimeout(resolve, 0));
410
+ }
411
+ }
412
+
413
+ this.log(`Transcription complete: ${results.length} segment(s)`);
414
+ return results;
415
+ }
416
+
417
+ /**
418
+ * Check if ffmpeg is available on the system
419
+ */
420
+ async isFfmpegAvailable(): Promise<boolean> {
421
+ try {
422
+ await execFileAsync('ffmpeg', ['-version']);
423
+ return true;
424
+ } catch {
425
+ return false;
426
+ }
427
+ }
428
+
429
+ // ============================================================================
430
+ // Private Methods
431
+ // ============================================================================
432
+
433
+ /**
434
+ * Load an audio file and return Float32Array samples at 16kHz mono.
435
+ * WAV files are parsed directly; other formats are converted via ffmpeg.
436
+ */
437
+ private async loadAudioAsSamples(audioPath: string): Promise<Float32Array> {
438
+ const ext = audioPath.toLowerCase().split('.').pop() ?? '';
439
+
440
+ if (ext === 'wav') {
441
+ return this.parseWavFile(audioPath);
442
+ }
443
+
444
+ // For non-WAV formats (webm, ogg, m4a, etc.), convert via ffmpeg
445
+ return this.convertWithFfmpeg(audioPath);
446
+ }
447
+
448
+ /**
449
+ * Parse a WAV file and extract PCM data as Float32Array at 16kHz mono.
450
+ * Handles PCM float32 and PCM int16 formats.
451
+ */
452
+ private async parseWavFile(wavPath: string): Promise<Float32Array> {
453
+ const buffer = await readFile(wavPath);
454
+
455
+ // Validate RIFF/WAVE header
456
+ const riff = buffer.toString('ascii', 0, 4);
457
+ const wave = buffer.toString('ascii', 8, 12);
458
+ if (riff !== 'RIFF' || wave !== 'WAVE') {
459
+ throw new Error(`Invalid WAV file: missing RIFF/WAVE header in ${wavPath}`);
460
+ }
461
+
462
+ // Find the 'fmt ' sub-chunk
463
+ let offset = 12;
464
+ let audioFormat = 0;
465
+ let numChannels = 0;
466
+ let sampleRate = 0;
467
+ let bitsPerSample = 0;
468
+ let fmtFound = false;
469
+
470
+ while (offset < buffer.length - 8) {
471
+ const chunkId = buffer.toString('ascii', offset, offset + 4);
472
+ const chunkSize = buffer.readUInt32LE(offset + 4);
473
+
474
+ if (chunkId === 'fmt ') {
475
+ audioFormat = buffer.readUInt16LE(offset + 8);
476
+ numChannels = buffer.readUInt16LE(offset + 10);
477
+ sampleRate = buffer.readUInt32LE(offset + 12);
478
+ bitsPerSample = buffer.readUInt16LE(offset + 22);
479
+ fmtFound = true;
480
+ }
481
+
482
+ if (chunkId === 'data') {
483
+ if (!fmtFound) {
484
+ throw new Error('WAV file has data chunk before fmt chunk');
485
+ }
486
+
487
+ const dataStart = offset + 8;
488
+ const dataEnd = dataStart + chunkSize;
489
+ const dataSlice = buffer.subarray(dataStart, Math.min(dataEnd, buffer.length));
490
+
491
+ return this.extractWavSamples(dataSlice, audioFormat, numChannels, sampleRate, bitsPerSample);
492
+ }
493
+
494
+ offset += 8 + chunkSize;
495
+ // Chunks are word-aligned
496
+ if (chunkSize % 2 !== 0) {
497
+ offset += 1;
498
+ }
499
+ }
500
+
501
+ throw new Error(`Invalid WAV file: no data chunk found in ${wavPath}`);
502
+ }
503
+
504
+ /**
505
+ * Extract samples from WAV data chunk, converting to Float32Array at 16kHz mono.
506
+ */
507
+ private extractWavSamples(
508
+ data: Buffer,
509
+ audioFormat: number,
510
+ numChannels: number,
511
+ sampleRate: number,
512
+ bitsPerSample: number
513
+ ): Float32Array {
514
+ let monoFloat32: Float32Array;
515
+
516
+ if (audioFormat === 3 && bitsPerSample === 32) {
517
+ // PCM Float32
518
+ const totalSamples = Math.floor(data.length / 4);
519
+ const allSamples = new Float32Array(totalSamples);
520
+ for (let i = 0; i < totalSamples; i++) {
521
+ allSamples[i] = data.readFloatLE(i * 4);
522
+ }
523
+ monoFloat32 = this.mixToMono(allSamples, numChannels);
524
+ } else if (audioFormat === 1 && bitsPerSample === 16) {
525
+ // PCM Int16
526
+ const totalSamples = Math.floor(data.length / 2);
527
+ const allSamples = new Float32Array(totalSamples);
528
+ for (let i = 0; i < totalSamples; i++) {
529
+ allSamples[i] = data.readInt16LE(i * 2) / 32768.0;
530
+ }
531
+ monoFloat32 = this.mixToMono(allSamples, numChannels);
532
+ } else {
533
+ throw new Error(
534
+ `Unsupported WAV format: audioFormat=${audioFormat}, bitsPerSample=${bitsPerSample}. ` +
535
+ `Expected PCM float32 (format=3, bits=32) or PCM int16 (format=1, bits=16).`
536
+ );
537
+ }
538
+
539
+ // Resample to 16kHz if needed
540
+ if (sampleRate !== SAMPLE_RATE) {
541
+ return this.resample(monoFloat32, sampleRate, SAMPLE_RATE);
542
+ }
543
+
544
+ return monoFloat32;
545
+ }
546
+
547
+ /**
548
+ * Mix multi-channel audio down to mono by averaging channels.
549
+ */
550
+ private mixToMono(samples: Float32Array, numChannels: number): Float32Array {
551
+ if (numChannels === 1) {
552
+ return samples;
553
+ }
554
+
555
+ const monoLength = Math.floor(samples.length / numChannels);
556
+ const mono = new Float32Array(monoLength);
557
+ for (let i = 0; i < monoLength; i++) {
558
+ let sum = 0;
559
+ for (let ch = 0; ch < numChannels; ch++) {
560
+ sum += samples[i * numChannels + ch];
561
+ }
562
+ mono[i] = sum / numChannels;
563
+ }
564
+ return mono;
565
+ }
566
+
567
+ /**
568
+ * Simple linear resampling from one sample rate to another.
569
+ */
570
+ private resample(samples: Float32Array, fromRate: number, toRate: number): Float32Array {
571
+ if (fromRate === toRate) {
572
+ return samples;
573
+ }
574
+
575
+ const ratio = fromRate / toRate;
576
+ const outputLength = Math.floor(samples.length / ratio);
577
+ const output = new Float32Array(outputLength);
578
+
579
+ for (let i = 0; i < outputLength; i++) {
580
+ const srcIndex = i * ratio;
581
+ const srcIndexFloor = Math.floor(srcIndex);
582
+ const srcIndexCeil = Math.min(srcIndexFloor + 1, samples.length - 1);
583
+ const frac = srcIndex - srcIndexFloor;
584
+ output[i] = samples[srcIndexFloor] * (1 - frac) + samples[srcIndexCeil] * frac;
585
+ }
586
+
587
+ return output;
588
+ }
589
+
590
+ /**
591
+ * Convert a non-WAV audio file to 16kHz mono Float32 WAV using ffmpeg,
592
+ * then parse the resulting WAV.
593
+ */
594
+ private async convertWithFfmpeg(audioPath: string): Promise<Float32Array> {
595
+ const ffmpegAvailable = await this.isFfmpegAvailable();
596
+ if (!ffmpegAvailable) {
597
+ throw new Error(
598
+ 'ffmpeg is not available on this system. ' +
599
+ 'ffmpeg is required to transcribe non-WAV audio files (webm, ogg, m4a). ' +
600
+ 'Install ffmpeg via: brew install ffmpeg (macOS) or apt install ffmpeg (Linux).'
601
+ );
602
+ }
603
+
604
+ const tempFileName = `markupr-transcode-${randomUUID()}.wav`;
605
+ const tempPath = join(tmpdir(), tempFileName);
606
+
607
+ try {
608
+ this.log(`Converting ${audioPath} to WAV via ffmpeg...`);
609
+
610
+ await execFileAsync('ffmpeg', [
611
+ '-i', audioPath,
612
+ '-ar', String(SAMPLE_RATE),
613
+ '-ac', '1',
614
+ '-f', 'wav',
615
+ '-acodec', 'pcm_f32le',
616
+ '-y',
617
+ tempPath,
618
+ ]);
619
+
620
+ this.log('ffmpeg conversion complete, parsing WAV...');
621
+ return await this.parseWavFile(tempPath);
622
+ } catch (error) {
623
+ const msg = error instanceof Error ? error.message : String(error);
624
+ throw new Error(`Failed to convert audio file with ffmpeg: ${msg}`);
625
+ } finally {
626
+ // Clean up temp file
627
+ try {
628
+ await unlink(tempPath);
629
+ } catch {
630
+ // Ignore cleanup errors - temp dir will be cleaned eventually
631
+ }
632
+ }
633
+ }
634
+
635
+ /**
636
+ * Process buffered audio through Whisper
637
+ */
638
+ private async processBufferedAudio(force: boolean = false): Promise<void> {
639
+ // Skip if already processing or buffer is too small (unless forced)
640
+ if (this.isProcessing) {
641
+ return;
642
+ }
643
+
644
+ if (!force && this.totalBufferDuration < CHUNK_DURATION_MS) {
645
+ return;
646
+ }
647
+
648
+ if (this.audioBuffer.length === 0) {
649
+ return;
650
+ }
651
+
652
+ if (!this.whisperModule) {
653
+ this.logError('Cannot process: Whisper module not loaded');
654
+ return;
655
+ }
656
+
657
+ this.isProcessing = true;
658
+ const processStartTime = this.bufferStartTime;
659
+
660
+ try {
661
+ // Concatenate all buffered audio into a single array
662
+ const totalSamples = this.audioBuffer.reduce((sum, arr) => sum + arr.length, 0);
663
+ const combinedAudio = new Float32Array(totalSamples);
664
+
665
+ let offset = 0;
666
+ for (const chunk of this.audioBuffer) {
667
+ combinedAudio.set(chunk, offset);
668
+ offset += chunk.length;
669
+ }
670
+
671
+ // Clear buffer before processing (to accept new audio while processing)
672
+ const processedDuration = this.totalBufferDuration;
673
+ this.audioBuffer = [];
674
+ this.totalBufferDuration = 0;
675
+ this.totalBufferBytes = 0;
676
+ this.bufferStartTime = Date.now();
677
+
678
+ // Run Whisper transcription
679
+ this.log(`Processing ${Math.round(processedDuration)}ms of audio...`);
680
+
681
+ const result = await this.whisperModule.whisper(combinedAudio, {
682
+ modelPath: this.config.modelPath,
683
+ language: this.config.language,
684
+ threads: this.config.threads,
685
+ translate: this.config.translateToEnglish,
686
+ });
687
+
688
+ // Parse result and emit transcript
689
+ if (result && result.length > 0) {
690
+ for (const segment of result) {
691
+ const transcriptResult: WhisperTranscriptResult = {
692
+ text: segment.text.trim(),
693
+ startTime: processStartTime / 1000 + segment.start,
694
+ endTime: processStartTime / 1000 + segment.end,
695
+ confidence: 0.9, // Whisper doesn't provide confidence, use default
696
+ };
697
+
698
+ if (transcriptResult.text) {
699
+ this.transcriptCallbacks.forEach((cb) => cb(transcriptResult));
700
+ this.emit('transcript', transcriptResult);
701
+
702
+ const preview =
703
+ transcriptResult.text.length > 50
704
+ ? `${transcriptResult.text.substring(0, 50)}...`
705
+ : transcriptResult.text;
706
+ this.log(`Transcript: "${preview}"`);
707
+ }
708
+ }
709
+ }
710
+ } catch (error) {
711
+ const transcriptionError = new Error(`Whisper transcription failed: ${(error as Error).message}`);
712
+ this.errorCallbacks.forEach((cb) => cb(transcriptionError));
713
+ this.emit('error', transcriptionError);
714
+ this.logError('Transcription error', error);
715
+ } finally {
716
+ this.isProcessing = false;
717
+ }
718
+ }
719
+
720
+ /**
721
+ * Log helper
722
+ */
723
+ private log(message: string): void {
724
+ const timestamp = new Date().toISOString();
725
+ console.log(`[WhisperService ${timestamp}] ${message}`);
726
+ }
727
+
728
+ /**
729
+ * Error log helper
730
+ */
731
+ private logError(message: string, error?: unknown): void {
732
+ const timestamp = new Date().toISOString();
733
+ const errorStr = error instanceof Error ? error.message : String(error);
734
+ console.error(`[WhisperService ${timestamp}] ERROR: ${message} - ${errorStr}`);
735
+ }
736
+
737
+ private getRequiredMemoryBytes(): number {
738
+ const modelName = basename(this.config.modelPath);
739
+ return MODEL_MEMORY_REQUIREMENTS_BYTES[modelName] ?? MODEL_MEMORY_REQUIREMENTS_BYTES['ggml-small.bin'];
740
+ }
741
+ }
742
+
743
+ // ============================================================================
744
+ // Singleton Export
745
+ // ============================================================================
746
+
747
+ export const whisperService = new WhisperService();
748
+ export default WhisperService;