listener-ai 2.5.1 → 2.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -0
- package/dist/cli.js +111 -3
- package/dist/geminiService.js +163 -81
- package/dist/main.js +89 -20
- package/dist/outputService.js +132 -2
- package/package.json +2 -2
package/README.md
CHANGED
|
@@ -41,6 +41,11 @@ listener config set notionDatabaseId <your-id>
|
|
|
41
41
|
```bash
|
|
42
42
|
listener recording.mp3 # Transcribe to default output dir
|
|
43
43
|
listener recording.m4a --output ./ # Transcribe to current directory
|
|
44
|
+
listener transcript recording.wav # Print transcript to stdout (no summary)
|
|
45
|
+
listener transcript recording.wav -o out.txt
|
|
46
|
+
# Write transcript to a file
|
|
47
|
+
listener transcript recording.wav --prompt "Translate to English while transcribing"
|
|
48
|
+
# Override the default transcription instruction
|
|
44
49
|
listener config list # Show all config values (secrets masked)
|
|
45
50
|
listener config get <key> # Print one config value
|
|
46
51
|
listener config set <key> <value> # Set a config value
|
package/dist/cli.js
CHANGED
|
@@ -67,7 +67,9 @@ const VERSION = (() => {
|
|
|
67
67
|
return 'unknown';
|
|
68
68
|
}
|
|
69
69
|
})();
|
|
70
|
-
const USAGE_TEXT = 'Usage: listener <file> [--output <dir>] Transcribe an audio file\n' +
|
|
70
|
+
const USAGE_TEXT = 'Usage: listener <file> [--output <dir>] Transcribe an audio file into a meeting note\n' +
|
|
71
|
+
' listener transcript <file> [--output <path>] [--prompt <text>]\n' +
|
|
72
|
+
' Transcribe to plain text only (no summary)\n' +
|
|
71
73
|
' listener list [--limit <n>] List past transcriptions\n' +
|
|
72
74
|
' listener show <ref> Print summary to stdout\n' +
|
|
73
75
|
' listener export <ref> [<path>] [--json] [--transcript]\n' +
|
|
@@ -85,7 +87,10 @@ const USAGE_TEXT = 'Usage: listener <file> [--output <dir>] Transcribe an aud
|
|
|
85
87
|
'<ref> is a number from `listener list` or a folder name.\n' +
|
|
86
88
|
'\n' +
|
|
87
89
|
'Options:\n' +
|
|
88
|
-
' --output <
|
|
90
|
+
' --output, -o <path>\n' +
|
|
91
|
+
' Parent directory for the output folder (transcribe);\n' +
|
|
92
|
+
' destination file or directory (transcript)\n' +
|
|
93
|
+
' --prompt <text> Override the default transcription instruction (transcript)\n' +
|
|
89
94
|
' --limit <n> Max results (0 = all, default: 20)\n' +
|
|
90
95
|
' --json Export as JSON instead of markdown\n' +
|
|
91
96
|
' --transcript Include transcript body (export: append; search: widen scope)\n' +
|
|
@@ -426,6 +431,8 @@ async function handleExport(args) {
|
|
|
426
431
|
/* ignore */
|
|
427
432
|
}
|
|
428
433
|
}
|
|
434
|
+
const liveNotes = (0, outputService_1.parseLiveNotesField)(meta.liveNotes);
|
|
435
|
+
const highlights = (0, outputService_1.parseHighlightsField)(meta.highlights);
|
|
429
436
|
const obj = {
|
|
430
437
|
title: meta.title || '',
|
|
431
438
|
transcribedAt: meta.transcribedAt || '',
|
|
@@ -433,6 +440,8 @@ async function handleExport(args) {
|
|
|
433
440
|
keyPoints: meta.keyPoints || [],
|
|
434
441
|
actionItems: meta.actionItems || [],
|
|
435
442
|
customFields,
|
|
443
|
+
...(liveNotes ? { liveNotes } : {}),
|
|
444
|
+
...(highlights ? { highlights } : {}),
|
|
436
445
|
};
|
|
437
446
|
if (includeTranscript) {
|
|
438
447
|
obj.transcript = meta.transcript || '';
|
|
@@ -662,6 +671,101 @@ async function handleAsk(args) {
|
|
|
662
671
|
}
|
|
663
672
|
}
|
|
664
673
|
}
|
|
674
|
+
async function handleTranscript(args) {
|
|
675
|
+
let filePath;
|
|
676
|
+
let outputArg;
|
|
677
|
+
let promptText;
|
|
678
|
+
for (let i = 0; i < args.length; i++) {
|
|
679
|
+
const a = args[i];
|
|
680
|
+
if ((a === '--output' || a === '-o') && i + 1 < args.length) {
|
|
681
|
+
outputArg = args[++i];
|
|
682
|
+
continue;
|
|
683
|
+
}
|
|
684
|
+
if (a === '--prompt' && i + 1 < args.length) {
|
|
685
|
+
promptText = args[++i];
|
|
686
|
+
continue;
|
|
687
|
+
}
|
|
688
|
+
if (a.startsWith('-')) {
|
|
689
|
+
process.stderr.write(`Error: Unknown option: ${a}\n`);
|
|
690
|
+
process.exit(1);
|
|
691
|
+
}
|
|
692
|
+
if (filePath) {
|
|
693
|
+
process.stderr.write(`Error: Unexpected argument: ${a}\n`);
|
|
694
|
+
process.exit(1);
|
|
695
|
+
}
|
|
696
|
+
filePath = a;
|
|
697
|
+
}
|
|
698
|
+
if (!filePath) {
|
|
699
|
+
process.stderr.write('Error: No audio file specified. Usage: listener transcript <file> [--output <path>] [--prompt <text>]\n');
|
|
700
|
+
process.exit(1);
|
|
701
|
+
}
|
|
702
|
+
filePath = path.resolve(filePath);
|
|
703
|
+
if (!fs.existsSync(filePath)) {
|
|
704
|
+
process.stderr.write(`Error: File not found: ${filePath}\n`);
|
|
705
|
+
process.exit(1);
|
|
706
|
+
}
|
|
707
|
+
const ext = path.extname(filePath).toLowerCase();
|
|
708
|
+
if (!SUPPORTED_EXTENSIONS.has(ext)) {
|
|
709
|
+
process.stderr.write(`Error: Unsupported file type: ${ext}\n`);
|
|
710
|
+
process.stderr.write(`Supported formats: ${[...SUPPORTED_EXTENSIONS].join(', ')}\n`);
|
|
711
|
+
process.exit(1);
|
|
712
|
+
}
|
|
713
|
+
const dataPath = (0, dataPath_1.getDataPath)();
|
|
714
|
+
const config = new configService_1.ConfigService(dataPath);
|
|
715
|
+
const apiKey = config.getGeminiApiKey();
|
|
716
|
+
if (!apiKey) {
|
|
717
|
+
process.stderr.write('Error: Gemini API key not found.\n' +
|
|
718
|
+
'Set GEMINI_API_KEY env var or configure via the Listener.AI app.\n');
|
|
719
|
+
process.exit(1);
|
|
720
|
+
}
|
|
721
|
+
// Resolve --output before the expensive transcription so we fail fast on a
|
|
722
|
+
// bad path. Existing directory => <dir>/<basename>.transcript.md.
|
|
723
|
+
// Anything else => the path itself, treated as a file.
|
|
724
|
+
let outputPath;
|
|
725
|
+
if (outputArg) {
|
|
726
|
+
const resolved = path.resolve(outputArg);
|
|
727
|
+
let isDir = false;
|
|
728
|
+
try {
|
|
729
|
+
isDir = fs.statSync(resolved).isDirectory();
|
|
730
|
+
}
|
|
731
|
+
catch {
|
|
732
|
+
// ENOENT or similar: treat as a file path, validated below.
|
|
733
|
+
}
|
|
734
|
+
if (isDir) {
|
|
735
|
+
outputPath = path.join(resolved, `${path.basename(filePath, ext)}.transcript.md`);
|
|
736
|
+
}
|
|
737
|
+
else {
|
|
738
|
+
outputPath = resolved;
|
|
739
|
+
const parent = path.dirname(outputPath);
|
|
740
|
+
if (!fs.existsSync(parent)) {
|
|
741
|
+
process.stderr.write(`Error: Output directory does not exist: ${parent}\n`);
|
|
742
|
+
process.exit(1);
|
|
743
|
+
}
|
|
744
|
+
}
|
|
745
|
+
}
|
|
746
|
+
const gemini = new geminiService_1.GeminiService({
|
|
747
|
+
apiKey,
|
|
748
|
+
dataPath,
|
|
749
|
+
knownWords: config.getKnownWords(),
|
|
750
|
+
proModel: config.getGeminiModel(),
|
|
751
|
+
flashModel: config.getGeminiFlashModel(),
|
|
752
|
+
});
|
|
753
|
+
process.stderr.write(`Processing: ${filePath}\n`);
|
|
754
|
+
const result = await gemini.transcribeAudio(filePath, (_percent, message) => {
|
|
755
|
+
process.stderr.write(` ${message}\n`);
|
|
756
|
+
}, undefined, undefined, { transcriptOnly: true, transcriptionPrompt: promptText });
|
|
757
|
+
if (outputPath) {
|
|
758
|
+
fs.writeFileSync(outputPath, result.transcript, 'utf-8');
|
|
759
|
+
process.stderr.write('Done.\n');
|
|
760
|
+
process.stdout.write(`${outputPath}\n`);
|
|
761
|
+
}
|
|
762
|
+
else {
|
|
763
|
+
// Wait for the OS to drain the write before returning, so multi-MB
|
|
764
|
+
// transcripts piped to a slow consumer are not truncated on process exit.
|
|
765
|
+
const out = result.transcript.endsWith('\n') ? result.transcript : `${result.transcript}\n`;
|
|
766
|
+
await new Promise((resolve) => process.stdout.write(out, () => resolve()));
|
|
767
|
+
}
|
|
768
|
+
}
|
|
665
769
|
async function main() {
|
|
666
770
|
const args = process.argv.slice(2);
|
|
667
771
|
if (args.includes('--version') || args.includes('-V')) {
|
|
@@ -702,11 +806,15 @@ async function main() {
|
|
|
702
806
|
await handleAsk(args.slice(1));
|
|
703
807
|
return;
|
|
704
808
|
}
|
|
809
|
+
if (args[0] === 'transcript') {
|
|
810
|
+
await handleTranscript(args.slice(1));
|
|
811
|
+
return;
|
|
812
|
+
}
|
|
705
813
|
// Parse arguments
|
|
706
814
|
let filePath;
|
|
707
815
|
let outputDir;
|
|
708
816
|
for (let i = 0; i < args.length; i++) {
|
|
709
|
-
if (args[i] === '--output' && i + 1 < args.length) {
|
|
817
|
+
if ((args[i] === '--output' || args[i] === '-o') && i + 1 < args.length) {
|
|
710
818
|
outputDir = args[++i];
|
|
711
819
|
}
|
|
712
820
|
else if (args[i].startsWith('-')) {
|
package/dist/geminiService.js
CHANGED
|
@@ -36,9 +36,104 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
36
36
|
exports.GeminiService = void 0;
|
|
37
37
|
const fs = __importStar(require("fs"));
|
|
38
38
|
const path = __importStar(require("path"));
|
|
39
|
+
const child_process_1 = require("child_process");
|
|
40
|
+
const util_1 = require("util");
|
|
39
41
|
const genai_1 = require("@google/genai");
|
|
40
42
|
const audioFormats_1 = require("./audioFormats");
|
|
43
|
+
const outputService_1 = require("./outputService");
|
|
41
44
|
const ffmpegManager_1 = require("./services/ffmpegManager");
|
|
45
|
+
const execFileAsync = (0, util_1.promisify)(child_process_1.execFile);
|
|
46
|
+
// Append a section to the summary prompt instructing Gemini to enrich each
|
|
47
|
+
// user-flagged moment with a subtitle + categorized bullets, returned as a
|
|
48
|
+
// `highlights` array on the JSON response. Returns '' when there's nothing to
|
|
49
|
+
// enrich -- prompt stays untouched in that case so we don't pay for empty
|
|
50
|
+
// instructions.
|
|
51
|
+
function buildHighlightsPromptBlock(notes) {
|
|
52
|
+
if (notes.length === 0)
|
|
53
|
+
return '';
|
|
54
|
+
const lines = notes.map((n) => `- offsetMs=${n.offsetMs}, timestamp=${(0, outputService_1.formatOffsetTimestamp)(n.offsetMs)}, userText=${JSON.stringify(n.text)}`);
|
|
55
|
+
return `In addition, the user flagged the following moments during the meeting. For each note, produce a structured analysis tied to that moment in the transcript:
|
|
56
|
+
|
|
57
|
+
${lines.join('\n')}
|
|
58
|
+
|
|
59
|
+
For every flagged moment above, write one entry in a JSON array named "highlights". Each entry must include:
|
|
60
|
+
- "offsetMs": the exact integer from the input
|
|
61
|
+
- "userText": the user's typed text, copied verbatim
|
|
62
|
+
- "subtitle": a short topic label in Korean (3-7 words) summarising what was being discussed at that timestamp
|
|
63
|
+
- "bullets": 2-5 short Korean bullet strings categorising the discussion at that point. Prefix each bullet with one of these categories when applicable, omitting categories that don't fit: "결정 사항:", "주요 인사이트:", "실행 항목:", "식별된 리스크:". If none of the categories fit, just write the bullet without a prefix.
|
|
64
|
+
|
|
65
|
+
Use the transcript as the ground truth -- if the user's typed text doesn't clearly match anything in the transcript, fall back to the meeting content nearest the given timestamp. Return the highlights array as an additional key alongside the other fields in the JSON.`;
|
|
66
|
+
}
|
|
67
|
+
function mergeHighlights(liveNotes, raw) {
|
|
68
|
+
if (!liveNotes || liveNotes.length === 0)
|
|
69
|
+
return undefined;
|
|
70
|
+
// Index Gemini's returned highlights by offsetMs so we can attach
|
|
71
|
+
// enrichment to the matching user note. Treat anything malformed as
|
|
72
|
+
// "no enrichment for that note" -- the bare offset+userText still
|
|
73
|
+
// round-trips so the user's data is never lost.
|
|
74
|
+
const byOffset = new Map();
|
|
75
|
+
if (Array.isArray(raw)) {
|
|
76
|
+
for (const item of raw) {
|
|
77
|
+
if (!item || typeof item !== 'object')
|
|
78
|
+
continue;
|
|
79
|
+
const offsetMs = Number(item.offsetMs);
|
|
80
|
+
if (!Number.isFinite(offsetMs))
|
|
81
|
+
continue;
|
|
82
|
+
const subtitleRaw = item.subtitle;
|
|
83
|
+
const bulletsRaw = item.bullets;
|
|
84
|
+
const subtitle = typeof subtitleRaw === 'string' && subtitleRaw.trim().length > 0
|
|
85
|
+
? subtitleRaw.trim()
|
|
86
|
+
: undefined;
|
|
87
|
+
const bullets = Array.isArray(bulletsRaw)
|
|
88
|
+
? bulletsRaw.map((b) => (typeof b === 'string' ? b.trim() : '')).filter((b) => b.length > 0)
|
|
89
|
+
: undefined;
|
|
90
|
+
byOffset.set(offsetMs, {
|
|
91
|
+
subtitle,
|
|
92
|
+
bullets: bullets && bullets.length > 0 ? bullets : undefined,
|
|
93
|
+
});
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
return liveNotes.map((note) => {
|
|
97
|
+
const enrichment = byOffset.get(note.offsetMs);
|
|
98
|
+
return {
|
|
99
|
+
offsetMs: note.offsetMs,
|
|
100
|
+
userText: note.text,
|
|
101
|
+
subtitle: enrichment?.subtitle,
|
|
102
|
+
bullets: enrichment?.bullets,
|
|
103
|
+
};
|
|
104
|
+
});
|
|
105
|
+
}
|
|
106
|
+
function transcriptOnlyResult(transcript) {
|
|
107
|
+
return {
|
|
108
|
+
transcript,
|
|
109
|
+
summary: '',
|
|
110
|
+
keyPoints: [],
|
|
111
|
+
actionItems: [],
|
|
112
|
+
emoji: '',
|
|
113
|
+
};
|
|
114
|
+
}
|
|
115
|
+
const DEFAULT_TRANSCRIPT_PROMPT = `Please transcribe this audio recording with proper speaker identification.
|
|
116
|
+
|
|
117
|
+
Format requirements:
|
|
118
|
+
1. IDENTIFY different speakers and label them as 참가자1, 참가자2, etc.
|
|
119
|
+
2. Each speaker's turn MUST start on a NEW LINE
|
|
120
|
+
3. Format: 참가자X: [what they said]
|
|
121
|
+
4. Add a blank line between different speakers
|
|
122
|
+
|
|
123
|
+
Example format:
|
|
124
|
+
참가자1: 안녕하세요, 오늘 회의를 시작하겠습니다.
|
|
125
|
+
|
|
126
|
+
참가자2: 네, 준비됐습니다.
|
|
127
|
+
|
|
128
|
+
참가자1: 첫 번째 안건은...
|
|
129
|
+
|
|
130
|
+
IMPORTANT:
|
|
131
|
+
- You MUST identify and differentiate between speakers
|
|
132
|
+
- Each speaker turn MUST start on a new line
|
|
133
|
+
- Add blank line between different speakers
|
|
134
|
+
- DO NOT include timestamps
|
|
135
|
+
- Keep the transcription in the original spoken language
|
|
136
|
+
- Return ONLY the transcription text, no JSON formatting`;
|
|
42
137
|
class GeminiService {
|
|
43
138
|
// Get FFmpeg path for this service
|
|
44
139
|
async getFFmpegPath() {
|
|
@@ -63,7 +158,7 @@ class GeminiService {
|
|
|
63
158
|
const wordList = this.knownWords.map((w) => `- ${w}`).join('\n');
|
|
64
159
|
return `The following proper nouns, names, and terms may appear in the audio. Transcribe them exactly as spelled:\n${wordList}\n\n`;
|
|
65
160
|
}
|
|
66
|
-
async transcribeAudio(audioFilePath, progressCallback, summaryPrompt) {
|
|
161
|
+
async transcribeAudio(audioFilePath, progressCallback, summaryPrompt, liveNotes, options = {}) {
|
|
67
162
|
// Integration-test escape hatch: avoid the real Gemini call so tests can
|
|
68
163
|
// exercise the surrounding pipeline (CLI parsing, IPC, ffmpeg, save) for
|
|
69
164
|
// free and offline. Gated on NODE_ENV=test so a stray LISTENER_TEST_MODE
|
|
@@ -71,6 +166,9 @@ class GeminiService {
|
|
|
71
166
|
if (process.env.LISTENER_TEST_MODE && process.env.NODE_ENV === 'test') {
|
|
72
167
|
if (progressCallback)
|
|
73
168
|
progressCallback(100, 'Stubbed transcription');
|
|
169
|
+
if (options.transcriptOnly) {
|
|
170
|
+
return transcriptOnlyResult('Stubbed transcript.');
|
|
171
|
+
}
|
|
74
172
|
return {
|
|
75
173
|
transcript: 'Stubbed transcript.',
|
|
76
174
|
summary: 'Stubbed summary.',
|
|
@@ -84,20 +182,20 @@ class GeminiService {
|
|
|
84
182
|
// Check file size
|
|
85
183
|
const stats = fs.statSync(audioFilePath);
|
|
86
184
|
const fileSizeInMB = stats.size / (1024 * 1024);
|
|
87
|
-
console.
|
|
185
|
+
console.error(`Audio file size: ${fileSizeInMB.toFixed(2)} MB`);
|
|
88
186
|
if (progressCallback) {
|
|
89
187
|
progressCallback(15, `Processing ${fileSizeInMB.toFixed(1)} MB audio file...`);
|
|
90
188
|
}
|
|
91
189
|
// Get audio duration using ffmpeg
|
|
92
190
|
const duration = await this.getAudioDuration(audioFilePath);
|
|
93
|
-
console.
|
|
191
|
+
console.error(`Audio duration: ${duration} seconds`);
|
|
94
192
|
// If duration is 0, log a warning but continue processing
|
|
95
193
|
if (duration === 0) {
|
|
96
194
|
console.warn('WARNING: Could not determine audio duration. Will process as single file without segmentation.');
|
|
97
195
|
}
|
|
98
196
|
// Always use the two-step approach for consistency
|
|
99
|
-
console.
|
|
100
|
-
return await this.transcribeWithTwoSteps(audioFilePath, duration, progressCallback, summaryPrompt);
|
|
197
|
+
console.error('Using two-step transcription approach...');
|
|
198
|
+
return await this.transcribeWithTwoSteps(audioFilePath, duration, progressCallback, summaryPrompt, liveNotes, options);
|
|
101
199
|
}
|
|
102
200
|
catch (error) {
|
|
103
201
|
console.error('Error transcribing audio:', error);
|
|
@@ -116,21 +214,24 @@ class GeminiService {
|
|
|
116
214
|
throw new Error(`Failed to transcribe audio: ${error instanceof Error ? error.message : String(error)}`);
|
|
117
215
|
}
|
|
118
216
|
}
|
|
119
|
-
// Get audio duration using
|
|
217
|
+
// Get audio duration using ffmpeg
|
|
120
218
|
async getAudioDuration(audioFilePath) {
|
|
121
|
-
const { exec } = require('child_process');
|
|
122
|
-
const { promisify } = require('util');
|
|
123
|
-
const execAsync = promisify(exec);
|
|
124
219
|
try {
|
|
125
220
|
const ffmpegPath = await this.getFFmpegPath();
|
|
126
221
|
// Use ffmpeg with -f null to get file info including duration
|
|
127
222
|
// This will output file info to stderr which we can parse
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
223
|
+
console.error('Running ffmpeg for duration:', ffmpegPath, audioFilePath);
|
|
224
|
+
const { stderr } = await execFileAsync(ffmpegPath, [
|
|
225
|
+
'-i',
|
|
226
|
+
audioFilePath,
|
|
227
|
+
'-f',
|
|
228
|
+
'null',
|
|
229
|
+
'-',
|
|
230
|
+
]).catch((error) => {
|
|
231
|
+
const execError = error;
|
|
131
232
|
// FFmpeg exits with non-zero code when output is null, but still provides info in stderr
|
|
132
233
|
// This is expected behavior, so we return the error object which contains stdout/stderr
|
|
133
|
-
return { stdout:
|
|
234
|
+
return { stdout: execError.stdout || '', stderr: execError.stderr || '' };
|
|
134
235
|
});
|
|
135
236
|
// Extract duration from stderr (where ffmpeg outputs file info)
|
|
136
237
|
const durationMatch = stderr?.match(/Duration: (\d{2}):(\d{2}):(\d{2}\.\d{2})/);
|
|
@@ -139,7 +240,7 @@ class GeminiService {
|
|
|
139
240
|
const minutes = Number.parseInt(durationMatch[2]);
|
|
140
241
|
const seconds = Number.parseFloat(durationMatch[3]);
|
|
141
242
|
const totalSeconds = hours * 3600 + minutes * 60 + seconds;
|
|
142
|
-
console.
|
|
243
|
+
console.error(`FFmpeg extracted duration: ${totalSeconds} seconds`);
|
|
143
244
|
return totalSeconds;
|
|
144
245
|
}
|
|
145
246
|
// Alternative regex pattern for different duration formats
|
|
@@ -149,7 +250,7 @@ class GeminiService {
|
|
|
149
250
|
const minutes = Number.parseInt(altDurationMatch[2]);
|
|
150
251
|
const seconds = Number.parseInt(altDurationMatch[3]);
|
|
151
252
|
const totalSeconds = hours * 3600 + minutes * 60 + seconds;
|
|
152
|
-
console.
|
|
253
|
+
console.error(`FFmpeg extracted duration (alt format): ${totalSeconds} seconds`);
|
|
153
254
|
return totalSeconds;
|
|
154
255
|
}
|
|
155
256
|
// Default to 0 if we can't determine duration
|
|
@@ -164,9 +265,6 @@ class GeminiService {
|
|
|
164
265
|
}
|
|
165
266
|
// Split audio file into segments
|
|
166
267
|
async splitAudioIntoSegments(audioFilePath, segmentDuration = 300) {
|
|
167
|
-
const { exec } = require('child_process');
|
|
168
|
-
const { promisify } = require('util');
|
|
169
|
-
const execAsync = promisify(exec);
|
|
170
268
|
const outputDir = path.dirname(audioFilePath);
|
|
171
269
|
const baseName = path.basename(audioFilePath, path.extname(audioFilePath));
|
|
172
270
|
const ext = path.extname(audioFilePath);
|
|
@@ -175,14 +273,24 @@ class GeminiService {
|
|
|
175
273
|
const ffmpegPath = await this.getFFmpegPath();
|
|
176
274
|
try {
|
|
177
275
|
// Split audio into segments
|
|
178
|
-
await
|
|
276
|
+
await execFileAsync(ffmpegPath, [
|
|
277
|
+
'-i',
|
|
278
|
+
audioFilePath,
|
|
279
|
+
'-f',
|
|
280
|
+
'segment',
|
|
281
|
+
'-segment_time',
|
|
282
|
+
String(segmentDuration),
|
|
283
|
+
'-c',
|
|
284
|
+
'copy',
|
|
285
|
+
segmentPath,
|
|
286
|
+
]);
|
|
179
287
|
// Find all created segment files
|
|
180
288
|
const segmentFiles = fs
|
|
181
289
|
.readdirSync(outputDir)
|
|
182
290
|
.filter((file) => file.startsWith(`${baseName}_segment_`) && file.endsWith(ext))
|
|
183
291
|
.map((file) => path.join(outputDir, file))
|
|
184
292
|
.sort();
|
|
185
|
-
console.
|
|
293
|
+
console.error(`Split audio into ${segmentFiles.length} segments`);
|
|
186
294
|
return segmentFiles;
|
|
187
295
|
}
|
|
188
296
|
catch (error) {
|
|
@@ -228,25 +336,31 @@ class GeminiService {
|
|
|
228
336
|
}
|
|
229
337
|
}
|
|
230
338
|
// Two-step transcription approach for all audio files
|
|
231
|
-
async transcribeWithTwoSteps(audioFilePath, duration, progressCallback, customSummaryPrompt) {
|
|
339
|
+
async transcribeWithTwoSteps(audioFilePath, duration, progressCallback, customSummaryPrompt, liveNotes, options = {}) {
|
|
232
340
|
try {
|
|
233
341
|
let fullTranscript = '';
|
|
234
342
|
// Step 1: Get transcript
|
|
235
343
|
if (duration > 300) {
|
|
236
344
|
// Use segmented approach for long audio
|
|
237
|
-
console.
|
|
238
|
-
fullTranscript = await this.getSegmentedTranscript(audioFilePath, duration, progressCallback);
|
|
345
|
+
console.error('Audio is longer than 5 minutes, using segmented transcription...');
|
|
346
|
+
fullTranscript = await this.getSegmentedTranscript(audioFilePath, duration, progressCallback, options.transcriptionPrompt);
|
|
239
347
|
}
|
|
240
348
|
else {
|
|
241
349
|
// Get transcript for short audio
|
|
242
|
-
console.
|
|
243
|
-
fullTranscript = await this.getShortAudioTranscript(audioFilePath, progressCallback);
|
|
350
|
+
console.error('Transcribing short audio...');
|
|
351
|
+
fullTranscript = await this.getShortAudioTranscript(audioFilePath, progressCallback, options.transcriptionPrompt);
|
|
352
|
+
}
|
|
353
|
+
if (options.transcriptOnly) {
|
|
354
|
+
if (progressCallback) {
|
|
355
|
+
progressCallback(100, 'Transcript ready');
|
|
356
|
+
}
|
|
357
|
+
return transcriptOnlyResult(fullTranscript);
|
|
244
358
|
}
|
|
245
359
|
// Step 2: Generate summary, key points, action items from transcript
|
|
246
360
|
if (progressCallback) {
|
|
247
361
|
progressCallback(85, 'Generating summary and key points...');
|
|
248
362
|
}
|
|
249
|
-
const
|
|
363
|
+
const basePrompt = customSummaryPrompt ||
|
|
250
364
|
`Based on this meeting transcript, provide:
|
|
251
365
|
|
|
252
366
|
1. A concise meeting title in Korean (10-20 characters that captures the main topic)
|
|
@@ -263,6 +377,9 @@ Return as JSON:
|
|
|
263
377
|
"actionItems": ["action 1", "action 2"],
|
|
264
378
|
"emoji": "📝"
|
|
265
379
|
}`;
|
|
380
|
+
const enrichableNotes = (liveNotes ?? []).filter((n) => (n.text ?? '').trim().length > 0);
|
|
381
|
+
const highlightsBlock = buildHighlightsPromptBlock(enrichableNotes);
|
|
382
|
+
const summaryPrompt = highlightsBlock ? `${basePrompt}\n\n${highlightsBlock}` : basePrompt;
|
|
266
383
|
const summaryResult = await this.ai.models.generateContent({
|
|
267
384
|
model: this.proModel,
|
|
268
385
|
contents: [{ role: 'user', parts: [{ text: summaryPrompt }, { text: fullTranscript }] }],
|
|
@@ -286,11 +403,14 @@ Return as JSON:
|
|
|
286
403
|
'keyPoints',
|
|
287
404
|
'actionItems',
|
|
288
405
|
'emoji',
|
|
406
|
+
'highlights',
|
|
289
407
|
]);
|
|
290
408
|
const customFields = {};
|
|
409
|
+
let rawHighlights;
|
|
291
410
|
try {
|
|
292
411
|
const parsed = JSON.parse(summaryText);
|
|
293
412
|
summaryData = parsed;
|
|
413
|
+
rawHighlights = parsed.highlights;
|
|
294
414
|
// Extract custom fields (any keys not in the known set)
|
|
295
415
|
for (const [key, value] of Object.entries(parsed)) {
|
|
296
416
|
if (!KNOWN_KEYS.has(key)) {
|
|
@@ -306,6 +426,7 @@ Return as JSON:
|
|
|
306
426
|
summaryData.summary = summaryMatch[1].replace(/\\n/g, '\n');
|
|
307
427
|
}
|
|
308
428
|
}
|
|
429
|
+
const highlights = mergeHighlights(liveNotes, rawHighlights);
|
|
309
430
|
if (progressCallback) {
|
|
310
431
|
progressCallback(95, 'Finalizing results...');
|
|
311
432
|
}
|
|
@@ -317,6 +438,7 @@ Return as JSON:
|
|
|
317
438
|
emoji: summaryData.emoji,
|
|
318
439
|
suggestedTitle: summaryData.suggestedTitle,
|
|
319
440
|
customFields: Object.keys(customFields).length > 0 ? customFields : undefined,
|
|
441
|
+
highlights,
|
|
320
442
|
};
|
|
321
443
|
}
|
|
322
444
|
catch (error) {
|
|
@@ -325,7 +447,7 @@ Return as JSON:
|
|
|
325
447
|
}
|
|
326
448
|
}
|
|
327
449
|
// Get transcript for short audio files
|
|
328
|
-
async getShortAudioTranscript(audioFilePath, progressCallback) {
|
|
450
|
+
async getShortAudioTranscript(audioFilePath, progressCallback, customPrompt) {
|
|
329
451
|
try {
|
|
330
452
|
const stats = fs.statSync(audioFilePath);
|
|
331
453
|
const fileSizeInMB = stats.size / (1024 * 1024);
|
|
@@ -335,7 +457,7 @@ Return as JSON:
|
|
|
335
457
|
// Use Files API for files over 20MB
|
|
336
458
|
let fileUri = null;
|
|
337
459
|
if (fileSizeInMB > 20) {
|
|
338
|
-
console.
|
|
460
|
+
console.error('File is over 20MB, using Files API for upload...');
|
|
339
461
|
if (progressCallback) {
|
|
340
462
|
progressCallback(25, 'Uploading large file to Gemini...');
|
|
341
463
|
}
|
|
@@ -349,7 +471,7 @@ Return as JSON:
|
|
|
349
471
|
let file = await this.ai.files.get({ name: uploadResult.name || '' });
|
|
350
472
|
let retries = 0;
|
|
351
473
|
while (file.state === 'PROCESSING' && retries < 30) {
|
|
352
|
-
console.
|
|
474
|
+
console.error(`Waiting for file to be processed... (attempt ${retries + 1}/30)`);
|
|
353
475
|
await new Promise((resolve) => setTimeout(resolve, 2000));
|
|
354
476
|
file = await this.ai.files.get({ name: uploadResult.name || '' });
|
|
355
477
|
retries++;
|
|
@@ -361,28 +483,7 @@ Return as JSON:
|
|
|
361
483
|
if (progressCallback) {
|
|
362
484
|
progressCallback(50, 'Transcribing audio...');
|
|
363
485
|
}
|
|
364
|
-
const transcriptPrompt = `${this.buildGlossaryBlock()}
|
|
365
|
-
|
|
366
|
-
Format requirements:
|
|
367
|
-
1. IDENTIFY different speakers and label them as 참가자1, 참가자2, etc.
|
|
368
|
-
2. Each speaker's turn MUST start on a NEW LINE
|
|
369
|
-
3. Format: 참가자X: [what they said]
|
|
370
|
-
4. Add a blank line between different speakers
|
|
371
|
-
|
|
372
|
-
Example format:
|
|
373
|
-
참가자1: 안녕하세요, 오늘 회의를 시작하겠습니다.
|
|
374
|
-
|
|
375
|
-
참가자2: 네, 준비됐습니다.
|
|
376
|
-
|
|
377
|
-
참가자1: 첫 번째 안건은...
|
|
378
|
-
|
|
379
|
-
IMPORTANT:
|
|
380
|
-
- You MUST identify and differentiate between speakers
|
|
381
|
-
- Each speaker turn MUST start on a new line
|
|
382
|
-
- Add blank line between different speakers
|
|
383
|
-
- DO NOT include timestamps
|
|
384
|
-
- Keep the transcription in the original spoken language
|
|
385
|
-
- Return ONLY the transcription text, no JSON formatting`;
|
|
486
|
+
const transcriptPrompt = `${this.buildGlossaryBlock()}${customPrompt ?? DEFAULT_TRANSCRIPT_PROMPT}`;
|
|
386
487
|
let result;
|
|
387
488
|
if (fileUri) {
|
|
388
489
|
const mimeType = (0, audioFormats_1.mimeTypeForExtension)(path.extname(audioFilePath));
|
|
@@ -453,38 +554,19 @@ IMPORTANT:
|
|
|
453
554
|
return `[Segment ${segmentIndex + 1}: ${this.formatTime(segmentStartTime)} ~ ${this.formatTime(segmentEndTime)}]\n\n`;
|
|
454
555
|
}
|
|
455
556
|
// Create prompt for segment transcription
|
|
456
|
-
createSegmentPrompt(segmentIndex, totalSegments) {
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
1. IDENTIFY different speakers and label them as 참가자1, 참가자2, etc.
|
|
461
|
-
2. Each speaker's turn MUST start on a NEW LINE
|
|
462
|
-
3. Format: 참가자X: [what they said]
|
|
463
|
-
4. Add a blank line between different speakers
|
|
464
|
-
|
|
465
|
-
Example format:
|
|
466
|
-
참가자1: 안녕하세요, 오늘 회의를 시작하겠습니다.
|
|
467
|
-
|
|
468
|
-
참가자2: 네, 준비됐습니다.
|
|
469
|
-
|
|
470
|
-
참가자1: 첫 번째 안건은...
|
|
471
|
-
|
|
472
|
-
IMPORTANT:
|
|
473
|
-
- You MUST identify and differentiate between speakers
|
|
474
|
-
- Each speaker turn MUST start on a new line
|
|
475
|
-
- Add blank line between different speakers
|
|
476
|
-
- DO NOT include timestamps
|
|
477
|
-
- Keep the transcription in the original spoken language
|
|
478
|
-
- Return ONLY the transcription text, no JSON formatting`;
|
|
557
|
+
createSegmentPrompt(segmentIndex, totalSegments, customPrompt) {
|
|
558
|
+
const positional = `[Audio segment ${segmentIndex + 1} of ${totalSegments}]\n\n`;
|
|
559
|
+
const body = customPrompt ?? DEFAULT_TRANSCRIPT_PROMPT;
|
|
560
|
+
return `${this.buildGlossaryBlock()}${positional}${body}`;
|
|
479
561
|
}
|
|
480
562
|
// Transcribe a single segment with retry logic
|
|
481
|
-
async transcribeSingleSegment(segmentFile, segmentIndex, totalSegments, segmentStartTime, segmentEndTime) {
|
|
563
|
+
async transcribeSingleSegment(segmentFile, segmentIndex, totalSegments, segmentStartTime, segmentEndTime, customPrompt) {
|
|
482
564
|
const maxRetries = 3;
|
|
483
565
|
let lastError = null;
|
|
484
|
-
const segmentPrompt = this.createSegmentPrompt(segmentIndex, totalSegments);
|
|
566
|
+
const segmentPrompt = this.createSegmentPrompt(segmentIndex, totalSegments, customPrompt);
|
|
485
567
|
for (let attempt = 1; attempt <= maxRetries; attempt++) {
|
|
486
568
|
try {
|
|
487
|
-
console.
|
|
569
|
+
console.error(`Starting transcription for segment ${segmentIndex + 1}/${totalSegments} (attempt ${attempt}/${maxRetries})...`);
|
|
488
570
|
const audioData = fs.readFileSync(segmentFile);
|
|
489
571
|
const base64Audio = audioData.toString('base64');
|
|
490
572
|
const mimeType = (0, audioFormats_1.mimeTypeForExtension)(path.extname(segmentFile));
|
|
@@ -510,7 +592,7 @@ IMPORTANT:
|
|
|
510
592
|
},
|
|
511
593
|
});
|
|
512
594
|
const transcript = result.text || '';
|
|
513
|
-
console.
|
|
595
|
+
console.error(`Completed transcription for segment ${segmentIndex + 1}/${totalSegments}`);
|
|
514
596
|
// Add segment time range header
|
|
515
597
|
const segmentHeader = this.createSegmentHeader(segmentIndex, segmentStartTime, segmentEndTime);
|
|
516
598
|
return {
|
|
@@ -524,7 +606,7 @@ IMPORTANT:
|
|
|
524
606
|
if (attempt < maxRetries) {
|
|
525
607
|
// Wait before retry with exponential backoff
|
|
526
608
|
const retryDelay = Math.min(1000 * 2 ** (attempt - 1), 10000); // Max 10 seconds
|
|
527
|
-
console.
|
|
609
|
+
console.error(`Retrying segment ${segmentIndex + 1} in ${retryDelay}ms...`);
|
|
528
610
|
await new Promise((resolve) => setTimeout(resolve, retryDelay));
|
|
529
611
|
}
|
|
530
612
|
}
|
|
@@ -537,7 +619,7 @@ IMPORTANT:
|
|
|
537
619
|
};
|
|
538
620
|
}
|
|
539
621
|
// Get segmented transcript (renamed from transcribeAudioSegmented)
|
|
540
|
-
async getSegmentedTranscript(audioFilePath, duration, progressCallback) {
|
|
622
|
+
async getSegmentedTranscript(audioFilePath, duration, progressCallback, customPrompt) {
|
|
541
623
|
try {
|
|
542
624
|
// Split audio into 5-minute segments
|
|
543
625
|
const segmentFiles = await this.splitAudioIntoSegments(audioFilePath, 300);
|
|
@@ -548,7 +630,7 @@ IMPORTANT:
|
|
|
548
630
|
const transcriptionPromises = segmentFiles.map(async (segmentFile, i) => {
|
|
549
631
|
const segmentStartTime = i * 300; // 5 minutes in seconds
|
|
550
632
|
const segmentEndTime = Math.min(segmentStartTime + 300, duration);
|
|
551
|
-
return this.transcribeSingleSegment(segmentFile, i, segmentFiles.length, segmentStartTime, segmentEndTime);
|
|
633
|
+
return this.transcribeSingleSegment(segmentFile, i, segmentFiles.length, segmentStartTime, segmentEndTime, customPrompt);
|
|
552
634
|
});
|
|
553
635
|
// Track progress of concurrent transcriptions
|
|
554
636
|
let completedCount = 0;
|
package/dist/main.js
CHANGED
|
@@ -424,7 +424,7 @@ electron_1.app.whenReady().then(() => {
|
|
|
424
424
|
},
|
|
425
425
|
},
|
|
426
426
|
{
|
|
427
|
-
label:
|
|
427
|
+
label: autoUpdaterService_1.autoUpdaterService.getManualUpdateLabel(),
|
|
428
428
|
click: () => {
|
|
429
429
|
autoUpdaterService_1.autoUpdaterService.checkForUpdatesManually();
|
|
430
430
|
},
|
|
@@ -439,7 +439,7 @@ electron_1.app.whenReady().then(() => {
|
|
|
439
439
|
{ role: 'about' },
|
|
440
440
|
{ type: 'separator' },
|
|
441
441
|
{
|
|
442
|
-
label:
|
|
442
|
+
label: autoUpdaterService_1.autoUpdaterService.getManualUpdateLabel(),
|
|
443
443
|
click: () => {
|
|
444
444
|
autoUpdaterService_1.autoUpdaterService.checkForUpdatesManually();
|
|
445
445
|
},
|
|
@@ -1001,7 +1001,7 @@ electron_1.ipcMain.on('recording-chunk', (_event, data) => {
|
|
|
1001
1001
|
console.error('Invalid chunk payload:', error);
|
|
1002
1002
|
}
|
|
1003
1003
|
});
|
|
1004
|
-
electron_1.ipcMain.handle('stop-recording', async () => {
|
|
1004
|
+
electron_1.ipcMain.handle('stop-recording', async (_, opts) => {
|
|
1005
1005
|
try {
|
|
1006
1006
|
finalizeRecordingSession();
|
|
1007
1007
|
const result = await audioRecorder.stopRecording();
|
|
@@ -1014,6 +1014,18 @@ electron_1.ipcMain.handle('stop-recording', async () => {
|
|
|
1014
1014
|
// Silently skip if ffmpeg isn't available — file still plays, transcription
|
|
1015
1015
|
// pipeline is unaffected.
|
|
1016
1016
|
await remuxRecordingHeader(result.filePath);
|
|
1017
|
+
// Persist live notes alongside the audio so they survive even if the
|
|
1018
|
+
// user transcribes later (auto-mode off). transcribe-audio falls back
|
|
1019
|
+
// to this when its own arg is missing.
|
|
1020
|
+
const liveNotes = sanitizeLiveNotes(opts?.liveNotes);
|
|
1021
|
+
if (liveNotes && liveNotes.length > 0) {
|
|
1022
|
+
try {
|
|
1023
|
+
await metadataService_1.metadataService.saveMetadata(result.filePath, { liveNotes });
|
|
1024
|
+
}
|
|
1025
|
+
catch (err) {
|
|
1026
|
+
console.error('Failed to persist live notes to metadata:', err);
|
|
1027
|
+
}
|
|
1028
|
+
}
|
|
1017
1029
|
}
|
|
1018
1030
|
}
|
|
1019
1031
|
return result;
|
|
@@ -1108,6 +1120,31 @@ function isContainedTranscriptionPath(folderPath) {
|
|
|
1108
1120
|
const resolved = path.resolve(folderPath);
|
|
1109
1121
|
return resolved === root || resolved.startsWith(root + path.sep);
|
|
1110
1122
|
}
|
|
1123
|
+
// Validate + normalize the renderer's live-notes payload before it touches disk.
|
|
1124
|
+
// Renderer state is untrusted (compromised content scripts, future agent flows)
|
|
1125
|
+
// so this enforces shape + caps text length to keep summary.md/Notion sane.
|
|
1126
|
+
const LIVE_NOTE_MAX_TEXT = 2000;
|
|
1127
|
+
const LIVE_NOTE_MAX_COUNT = 500;
|
|
1128
|
+
function sanitizeLiveNotes(raw) {
|
|
1129
|
+
if (!Array.isArray(raw))
|
|
1130
|
+
return undefined;
|
|
1131
|
+
const out = [];
|
|
1132
|
+
for (const item of raw) {
|
|
1133
|
+
if (!item || typeof item !== 'object')
|
|
1134
|
+
continue;
|
|
1135
|
+
const offsetMs = Number(item.offsetMs);
|
|
1136
|
+
const text = item.text;
|
|
1137
|
+
if (!Number.isFinite(offsetMs))
|
|
1138
|
+
continue;
|
|
1139
|
+
out.push({
|
|
1140
|
+
offsetMs: Math.max(0, Math.floor(offsetMs)),
|
|
1141
|
+
text: typeof text === 'string' ? text.slice(0, LIVE_NOTE_MAX_TEXT) : '',
|
|
1142
|
+
});
|
|
1143
|
+
if (out.length >= LIVE_NOTE_MAX_COUNT)
|
|
1144
|
+
break;
|
|
1145
|
+
}
|
|
1146
|
+
return out.length > 0 ? out : undefined;
|
|
1147
|
+
}
|
|
1111
1148
|
// Tell the renderer the config has changed out-of-band so it can re-read and
|
|
1112
1149
|
// re-render its UI state (toggle checkboxes etc.). Used by the agent flow.
|
|
1113
1150
|
function broadcastConfigChanged() {
|
|
@@ -1182,9 +1219,24 @@ electron_1.ipcMain.handle('get-meeting-status', async () => {
|
|
|
1182
1219
|
};
|
|
1183
1220
|
});
|
|
1184
1221
|
// Transcription handler
|
|
1185
|
-
electron_1.ipcMain.handle('transcribe-audio', async (_, filePath) => {
|
|
1222
|
+
electron_1.ipcMain.handle('transcribe-audio', async (_, filePath, liveNotesRaw) => {
|
|
1186
1223
|
try {
|
|
1187
1224
|
console.log('Transcription requested for:', filePath);
|
|
1225
|
+
let liveNotes = sanitizeLiveNotes(liveNotesRaw);
|
|
1226
|
+
if (!liveNotes || liveNotes.length === 0) {
|
|
1227
|
+
// Fall back to whatever stop-recording persisted -- covers the
|
|
1228
|
+
// record-now-transcribe-later flow when auto-mode is off.
|
|
1229
|
+
try {
|
|
1230
|
+
const existing = await metadataService_1.metadataService.getMetadata(filePath);
|
|
1231
|
+
const fromMetadata = sanitizeLiveNotes(existing?.liveNotes);
|
|
1232
|
+
if (fromMetadata && fromMetadata.length > 0) {
|
|
1233
|
+
liveNotes = fromMetadata;
|
|
1234
|
+
}
|
|
1235
|
+
}
|
|
1236
|
+
catch (err) {
|
|
1237
|
+
console.warn('Failed to read live notes from metadata:', err);
|
|
1238
|
+
}
|
|
1239
|
+
}
|
|
1188
1240
|
// Send progress update
|
|
1189
1241
|
if (mainWindow) {
|
|
1190
1242
|
mainWindow.webContents.send('transcription-progress', {
|
|
@@ -1216,9 +1268,14 @@ electron_1.ipcMain.handle('transcribe-audio', async (_, filePath) => {
|
|
|
1216
1268
|
}
|
|
1217
1269
|
};
|
|
1218
1270
|
const summaryPrompt = configService.getSummaryPrompt();
|
|
1219
|
-
const result = await geminiService.transcribeAudio(filePath, progressCallback, summaryPrompt);
|
|
1271
|
+
const result = await geminiService.transcribeAudio(filePath, progressCallback, summaryPrompt, liveNotes);
|
|
1220
1272
|
console.log('Transcription completed successfully');
|
|
1221
1273
|
console.log('Saving metadata for:', filePath);
|
|
1274
|
+
// Attach renderer-captured notes so downstream consumers (Notion upload,
|
|
1275
|
+
// re-render in the modal) can read them off the result object.
|
|
1276
|
+
if (liveNotes && liveNotes.length > 0) {
|
|
1277
|
+
result.liveNotes = liveNotes;
|
|
1278
|
+
}
|
|
1222
1279
|
// Save transcription files (summary.md + transcript.md)
|
|
1223
1280
|
const title = result.suggestedTitle || path.basename(filePath, path.extname(filePath));
|
|
1224
1281
|
let transcriptionPath;
|
|
@@ -1228,6 +1285,7 @@ electron_1.ipcMain.handle('transcribe-audio', async (_, filePath) => {
|
|
|
1228
1285
|
result,
|
|
1229
1286
|
audioFilePath: filePath,
|
|
1230
1287
|
dataPath: electron_1.app.getPath('userData'),
|
|
1288
|
+
liveNotes,
|
|
1231
1289
|
});
|
|
1232
1290
|
console.log('Transcription saved to:', transcriptionPath);
|
|
1233
1291
|
}
|
|
@@ -1242,6 +1300,7 @@ electron_1.ipcMain.handle('transcribe-audio', async (_, filePath) => {
|
|
|
1242
1300
|
suggestedTitle: result.suggestedTitle,
|
|
1243
1301
|
transcriptionPath,
|
|
1244
1302
|
customFields: result.customFields,
|
|
1303
|
+
liveNotes,
|
|
1245
1304
|
transcribedAt: new Date().toISOString(),
|
|
1246
1305
|
});
|
|
1247
1306
|
}
|
|
@@ -1255,6 +1314,7 @@ electron_1.ipcMain.handle('transcribe-audio', async (_, filePath) => {
|
|
|
1255
1314
|
keyPoints: result.keyPoints,
|
|
1256
1315
|
actionItems: result.actionItems,
|
|
1257
1316
|
customFields: result.customFields,
|
|
1317
|
+
liveNotes,
|
|
1258
1318
|
transcribedAt: new Date().toISOString(),
|
|
1259
1319
|
});
|
|
1260
1320
|
}
|
|
@@ -1413,6 +1473,8 @@ electron_1.ipcMain.handle('get-metadata', async (_, filePath) => {
|
|
|
1413
1473
|
actionItems: transcription.actionItems,
|
|
1414
1474
|
customFields: transcription.customFields ?? metadata.customFields,
|
|
1415
1475
|
emoji: transcription.emoji,
|
|
1476
|
+
liveNotes: transcription.liveNotes ?? metadata.liveNotes,
|
|
1477
|
+
highlights: transcription.highlights,
|
|
1416
1478
|
notionPageUrl: transcription.notionPageUrl,
|
|
1417
1479
|
slackSentAt: transcription.slackSentAt,
|
|
1418
1480
|
slackError: transcription.slackError,
|
|
@@ -1512,33 +1574,40 @@ electron_1.ipcMain.handle('get-recordings', async () => {
|
|
|
1512
1574
|
}
|
|
1513
1575
|
// Read all files in the recordings directory
|
|
1514
1576
|
const files = fs.readdirSync(recordingsDir);
|
|
1515
|
-
// Filter for audio files and get their stats
|
|
1516
|
-
|
|
1517
|
-
|
|
1518
|
-
|
|
1577
|
+
// Filter for audio files and get their stats. Skip per-file races so one
|
|
1578
|
+
// deleted recording does not make the whole list fail.
|
|
1579
|
+
const recordings = [];
|
|
1580
|
+
for (const file of files) {
|
|
1519
1581
|
if (file.includes('_segment_'))
|
|
1520
|
-
|
|
1521
|
-
|
|
1522
|
-
|
|
1523
|
-
.map((file) => {
|
|
1582
|
+
continue;
|
|
1583
|
+
if (!(0, audioFormats_1.isSupportedAudioExtension)(path.extname(file)))
|
|
1584
|
+
continue;
|
|
1524
1585
|
const filePath = path.join(recordingsDir, file);
|
|
1525
|
-
|
|
1586
|
+
let stats;
|
|
1587
|
+
try {
|
|
1588
|
+
stats = fs.statSync(filePath);
|
|
1589
|
+
}
|
|
1590
|
+
catch (err) {
|
|
1591
|
+
if (err.code === 'ENOENT')
|
|
1592
|
+
continue;
|
|
1593
|
+
throw err;
|
|
1594
|
+
}
|
|
1526
1595
|
// Extract title from filename (format: title_timestamp.ext)
|
|
1527
1596
|
const nameWithoutExt = path.basename(file, path.extname(file));
|
|
1528
1597
|
const parts = nameWithoutExt.split('_');
|
|
1529
1598
|
const timestamp = parts.pop(); // Remove timestamp
|
|
1530
1599
|
const title = parts.join('_') || 'Untitled';
|
|
1531
|
-
|
|
1600
|
+
recordings.push({
|
|
1532
1601
|
filename: file,
|
|
1533
1602
|
path: filePath,
|
|
1534
|
-
title
|
|
1535
|
-
timestamp
|
|
1603
|
+
title,
|
|
1604
|
+
timestamp,
|
|
1536
1605
|
size: stats.size,
|
|
1537
1606
|
createdAt: stats.birthtime,
|
|
1538
1607
|
modifiedAt: stats.mtime,
|
|
1539
|
-
};
|
|
1540
|
-
}
|
|
1541
|
-
|
|
1608
|
+
});
|
|
1609
|
+
}
|
|
1610
|
+
recordings.sort((a, b) => b.createdAt.getTime() - a.createdAt.getTime()); // Sort by newest first
|
|
1542
1611
|
return { success: true, recordings };
|
|
1543
1612
|
}
|
|
1544
1613
|
catch (error) {
|
package/dist/outputService.js
CHANGED
|
@@ -33,12 +33,15 @@ var __importStar = (this && this.__importStar) || (function () {
|
|
|
33
33
|
};
|
|
34
34
|
})();
|
|
35
35
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
+
exports.formatOffsetTimestamp = formatOffsetTimestamp;
|
|
36
37
|
exports.sanitizeForPath = sanitizeForPath;
|
|
37
38
|
exports.formatTimestamp = formatTimestamp;
|
|
38
39
|
exports.camelToLabel = camelToLabel;
|
|
39
40
|
exports.formatSummary = formatSummary;
|
|
40
41
|
exports.formatTranscript = formatTranscript;
|
|
41
42
|
exports.parseFrontmatter = parseFrontmatter;
|
|
43
|
+
exports.parseLiveNotesField = parseLiveNotesField;
|
|
44
|
+
exports.parseHighlightsField = parseHighlightsField;
|
|
42
45
|
exports.getTranscriptionsDir = getTranscriptionsDir;
|
|
43
46
|
exports.saveTranscription = saveTranscription;
|
|
44
47
|
exports.listTranscriptions = listTranscriptions;
|
|
@@ -46,6 +49,22 @@ exports.readTranscription = readTranscription;
|
|
|
46
49
|
exports.updateTranscriptionStatus = updateTranscriptionStatus;
|
|
47
50
|
const fs = __importStar(require("fs"));
|
|
48
51
|
const path = __importStar(require("path"));
|
|
52
|
+
/**
|
|
53
|
+
* Format a millisecond offset as `mm:ss` (or `hh:mm:ss` when the offset crosses
|
|
54
|
+
* one hour) — used by every Highlights-rendering sink (summary.md, Notion,
|
|
55
|
+
* modal, CLI) and by the Gemini prompt so the LLM sees the same coordinate
|
|
56
|
+
* system as the saved output.
|
|
57
|
+
*/
|
|
58
|
+
function formatOffsetTimestamp(offsetMs) {
|
|
59
|
+
const totalSeconds = Math.max(0, Math.floor(offsetMs / 1000));
|
|
60
|
+
const hours = Math.floor(totalSeconds / 3600);
|
|
61
|
+
const minutes = Math.floor((totalSeconds % 3600) / 60);
|
|
62
|
+
const seconds = totalSeconds % 60;
|
|
63
|
+
const pad = (n) => String(n).padStart(2, '0');
|
|
64
|
+
return hours > 0
|
|
65
|
+
? `${pad(hours)}:${pad(minutes)}:${pad(seconds)}`
|
|
66
|
+
: `${pad(minutes)}:${pad(seconds)}`;
|
|
67
|
+
}
|
|
49
68
|
function sanitizeForPath(name) {
|
|
50
69
|
return name
|
|
51
70
|
.replace(/[\/\\:*?"<>|]/g, '_')
|
|
@@ -69,7 +88,7 @@ function camelToLabel(key) {
|
|
|
69
88
|
.replace(/^./, (s) => s.toUpperCase())
|
|
70
89
|
.trim();
|
|
71
90
|
}
|
|
72
|
-
function formatSummary(result, title, mergedFrom) {
|
|
91
|
+
function formatSummary(result, title, mergedFrom, liveNotes, highlights) {
|
|
73
92
|
const lines = [];
|
|
74
93
|
lines.push(`# ${title}\n`);
|
|
75
94
|
if (mergedFrom?.length) {
|
|
@@ -98,6 +117,37 @@ function formatSummary(result, title, mergedFrom) {
|
|
|
98
117
|
}
|
|
99
118
|
lines.push('');
|
|
100
119
|
}
|
|
120
|
+
// Prefer the AI-enriched "highlights" view when present -- it carries the
|
|
121
|
+
// same user notes plus per-moment subtitle/bullets. Fall back to the bare
|
|
122
|
+
// bullet list of liveNotes when Gemini didn't produce highlights.
|
|
123
|
+
const enriched = highlights && highlights.length > 0 ? highlights : null;
|
|
124
|
+
if (enriched || liveNotes?.length) {
|
|
125
|
+
lines.push('## 🗒️ Highlights\n');
|
|
126
|
+
if (enriched) {
|
|
127
|
+
for (const h of enriched) {
|
|
128
|
+
const ts = formatOffsetTimestamp(h.offsetMs);
|
|
129
|
+
const title = (h.userText ?? '').trim();
|
|
130
|
+
lines.push(title ? `### [${ts}] ${title}` : `### [${ts}] 🏴`);
|
|
131
|
+
if (h.subtitle?.trim()) {
|
|
132
|
+
lines.push(`*${h.subtitle.trim()}*`);
|
|
133
|
+
}
|
|
134
|
+
if (h.bullets?.length) {
|
|
135
|
+
lines.push('');
|
|
136
|
+
for (const b of h.bullets)
|
|
137
|
+
lines.push(`- ${b}`);
|
|
138
|
+
}
|
|
139
|
+
lines.push('');
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
else {
|
|
143
|
+
for (const note of liveNotes) {
|
|
144
|
+
const ts = formatOffsetTimestamp(note.offsetMs);
|
|
145
|
+
const text = note.text?.trim();
|
|
146
|
+
lines.push(text ? `- [${ts}] ${text}` : `- [${ts}] 🏴`);
|
|
147
|
+
}
|
|
148
|
+
lines.push('');
|
|
149
|
+
}
|
|
150
|
+
}
|
|
101
151
|
if (result.customFields) {
|
|
102
152
|
for (const [key, value] of Object.entries(result.customFields)) {
|
|
103
153
|
const label = camelToLabel(key);
|
|
@@ -157,6 +207,12 @@ function buildFrontmatter(meta) {
|
|
|
157
207
|
for (const src of meta.mergedFrom)
|
|
158
208
|
lines.push(` - ${yamlQuote(src)}`);
|
|
159
209
|
}
|
|
210
|
+
if (meta.liveNotes?.length) {
|
|
211
|
+
lines.push(`liveNotes: ${yamlQuote(JSON.stringify(meta.liveNotes))}`);
|
|
212
|
+
}
|
|
213
|
+
if (meta.highlights?.length) {
|
|
214
|
+
lines.push(`highlights: ${yamlQuote(JSON.stringify(meta.highlights))}`);
|
|
215
|
+
}
|
|
160
216
|
if (meta.notionPageUrl) {
|
|
161
217
|
lines.push(`notionPageUrl: ${yamlQuote(meta.notionPageUrl)}`);
|
|
162
218
|
}
|
|
@@ -222,6 +278,66 @@ function parseFrontmatter(content) {
|
|
|
222
278
|
}
|
|
223
279
|
return { meta, body };
|
|
224
280
|
}
|
|
281
|
+
function parseLiveNotesField(raw) {
|
|
282
|
+
if (!raw)
|
|
283
|
+
return undefined;
|
|
284
|
+
try {
|
|
285
|
+
const parsed = typeof raw === 'string' ? JSON.parse(raw) : raw;
|
|
286
|
+
if (!Array.isArray(parsed))
|
|
287
|
+
return undefined;
|
|
288
|
+
const notes = [];
|
|
289
|
+
for (const item of parsed) {
|
|
290
|
+
if (!item || typeof item !== 'object')
|
|
291
|
+
continue;
|
|
292
|
+
const offsetMs = Number(item.offsetMs);
|
|
293
|
+
const text = item.text;
|
|
294
|
+
if (!Number.isFinite(offsetMs))
|
|
295
|
+
continue;
|
|
296
|
+
notes.push({
|
|
297
|
+
offsetMs: Math.max(0, Math.floor(offsetMs)),
|
|
298
|
+
text: typeof text === 'string' ? text : '',
|
|
299
|
+
});
|
|
300
|
+
}
|
|
301
|
+
return notes.length > 0 ? notes : undefined;
|
|
302
|
+
}
|
|
303
|
+
catch (e) {
|
|
304
|
+
console.warn('Failed to parse liveNotes from frontmatter:', e);
|
|
305
|
+
return undefined;
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
function parseHighlightsField(raw) {
|
|
309
|
+
if (!raw)
|
|
310
|
+
return undefined;
|
|
311
|
+
try {
|
|
312
|
+
const parsed = typeof raw === 'string' ? JSON.parse(raw) : raw;
|
|
313
|
+
if (!Array.isArray(parsed))
|
|
314
|
+
return undefined;
|
|
315
|
+
const entries = [];
|
|
316
|
+
for (const item of parsed) {
|
|
317
|
+
if (!item || typeof item !== 'object')
|
|
318
|
+
continue;
|
|
319
|
+
const offsetMs = Number(item.offsetMs);
|
|
320
|
+
if (!Number.isFinite(offsetMs))
|
|
321
|
+
continue;
|
|
322
|
+
const userText = item.userText;
|
|
323
|
+
const subtitle = item.subtitle;
|
|
324
|
+
const bullets = item.bullets;
|
|
325
|
+
entries.push({
|
|
326
|
+
offsetMs: Math.max(0, Math.floor(offsetMs)),
|
|
327
|
+
userText: typeof userText === 'string' ? userText : '',
|
|
328
|
+
subtitle: typeof subtitle === 'string' && subtitle.trim().length > 0 ? subtitle : undefined,
|
|
329
|
+
bullets: Array.isArray(bullets)
|
|
330
|
+
? bullets.map((b) => (typeof b === 'string' ? b : '')).filter((b) => b.length > 0)
|
|
331
|
+
: undefined,
|
|
332
|
+
});
|
|
333
|
+
}
|
|
334
|
+
return entries.length > 0 ? entries : undefined;
|
|
335
|
+
}
|
|
336
|
+
catch (e) {
|
|
337
|
+
console.warn('Failed to parse highlights from frontmatter:', e);
|
|
338
|
+
return undefined;
|
|
339
|
+
}
|
|
340
|
+
}
|
|
225
341
|
function yamlUnquote(value) {
|
|
226
342
|
if (value.startsWith('"') && value.endsWith('"')) {
|
|
227
343
|
return value
|
|
@@ -236,6 +352,9 @@ function yamlUnquote(value) {
|
|
|
236
352
|
function getTranscriptionsDir(dataPath) {
|
|
237
353
|
return path.join(dataPath, 'transcriptions');
|
|
238
354
|
}
|
|
355
|
+
function getResultHighlights(result) {
|
|
356
|
+
return result.highlights && result.highlights.length > 0 ? result.highlights : undefined;
|
|
357
|
+
}
|
|
239
358
|
/**
|
|
240
359
|
* Save transcription as summary.md + transcript.md in a timestamped folder.
|
|
241
360
|
* Returns the created folder path.
|
|
@@ -246,6 +365,7 @@ function saveTranscription(opts) {
|
|
|
246
365
|
const folderPath = path.join(parentDir, folderName);
|
|
247
366
|
fs.mkdirSync(folderPath, { recursive: true });
|
|
248
367
|
const transcribedAt = new Date().toISOString();
|
|
368
|
+
const highlights = getResultHighlights(opts.result);
|
|
249
369
|
// summary.md with frontmatter (stores all raw data for machine reading)
|
|
250
370
|
const frontmatter = buildFrontmatter({
|
|
251
371
|
title: opts.title,
|
|
@@ -259,8 +379,10 @@ function saveTranscription(opts) {
|
|
|
259
379
|
transcribedAt,
|
|
260
380
|
emoji: opts.result.emoji,
|
|
261
381
|
mergedFrom: opts.mergedFrom,
|
|
382
|
+
liveNotes: opts.liveNotes,
|
|
383
|
+
highlights,
|
|
262
384
|
});
|
|
263
|
-
const summaryBody = formatSummary(opts.result, opts.title, opts.mergedFrom);
|
|
385
|
+
const summaryBody = formatSummary(opts.result, opts.title, opts.mergedFrom, opts.liveNotes, highlights);
|
|
264
386
|
fs.writeFileSync(path.join(folderPath, 'summary.md'), `${frontmatter}\n\n${summaryBody}`, 'utf-8');
|
|
265
387
|
// transcript.md
|
|
266
388
|
fs.writeFileSync(path.join(folderPath, 'transcript.md'), formatTranscript(opts.result, opts.title), 'utf-8');
|
|
@@ -355,6 +477,8 @@ async function readTranscription(folderPath) {
|
|
|
355
477
|
console.warn('Failed to parse customFields from frontmatter:', e);
|
|
356
478
|
}
|
|
357
479
|
}
|
|
480
|
+
const liveNotes = parseLiveNotesField(meta.liveNotes);
|
|
481
|
+
const highlights = parseHighlightsField(meta.highlights);
|
|
358
482
|
return {
|
|
359
483
|
title: meta.title || path.basename(folderPath),
|
|
360
484
|
suggestedTitle: meta.suggestedTitle,
|
|
@@ -367,6 +491,8 @@ async function readTranscription(folderPath) {
|
|
|
367
491
|
transcribedAt: meta.transcribedAt,
|
|
368
492
|
emoji: meta.emoji,
|
|
369
493
|
mergedFrom: meta.mergedFrom,
|
|
494
|
+
liveNotes,
|
|
495
|
+
highlights,
|
|
370
496
|
notionPageUrl: meta.notionPageUrl,
|
|
371
497
|
slackSentAt: meta.slackSentAt,
|
|
372
498
|
slackError: meta.slackError,
|
|
@@ -397,6 +523,8 @@ async function updateTranscriptionStatus(folderPath, updates) {
|
|
|
397
523
|
customFields = undefined;
|
|
398
524
|
}
|
|
399
525
|
}
|
|
526
|
+
const liveNotes = parseLiveNotesField(meta.liveNotes);
|
|
527
|
+
const highlights = parseHighlightsField(meta.highlights);
|
|
400
528
|
// Spread first so any unknown frontmatter keys (added by future writers, or
|
|
401
529
|
// by hand-edits) survive the round-trip; named fields override with proper
|
|
402
530
|
// typing and defaults.
|
|
@@ -407,6 +535,8 @@ async function updateTranscriptionStatus(folderPath, updates) {
|
|
|
407
535
|
transcript: meta.transcript || '',
|
|
408
536
|
transcribedAt: meta.transcribedAt || new Date().toISOString(),
|
|
409
537
|
customFields,
|
|
538
|
+
liveNotes,
|
|
539
|
+
highlights,
|
|
410
540
|
};
|
|
411
541
|
applyStatusUpdate(merged, 'notionPageUrl', updates.notionPageUrl);
|
|
412
542
|
applyStatusUpdate(merged, 'slackSentAt', updates.slackSentAt);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "listener-ai",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.6.0",
|
|
4
4
|
"description": "A lightweight desktop application for recording and transcribing meetings with AI-powered notes.",
|
|
5
5
|
"main": "dist/main.js",
|
|
6
6
|
"bin": {
|
|
@@ -29,7 +29,7 @@
|
|
|
29
29
|
"lint:fix": "oxlint --fix src renderer scripts",
|
|
30
30
|
"format": "oxfmt 'src/**/*.ts' 'renderer/**/*.ts' 'renderer/**/*.tsx' 'scripts/**/*.js' 'scripts/**/*.ts'",
|
|
31
31
|
"format:check": "oxfmt --check 'src/**/*.ts' 'renderer/**/*.ts' 'renderer/**/*.tsx' 'scripts/**/*.js' 'scripts/**/*.ts'",
|
|
32
|
-
"test": "tsc && node --test dist
|
|
32
|
+
"test": "tsc && node --test \"dist/**/*.test.js\"",
|
|
33
33
|
"dist": "pnpm run build && electron-builder",
|
|
34
34
|
"dist:mac": "pnpm run build && electron-builder --mac",
|
|
35
35
|
"dist:mac-x64": "pnpm run build && electron-builder --mac --x64",
|