video-context-mcp-server 0.32.0-beta → 0.33.0-beta
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -8
- package/dist/generated/version.d.ts +1 -1
- package/dist/generated/version.js +1 -1
- package/dist/index.js +3 -3
- package/dist/index.js.map +1 -1
- package/dist/services/audio/assemblyAiClient.d.ts +2 -9
- package/dist/services/audio/assemblyAiClient.d.ts.map +1 -1
- package/dist/services/audio/assemblyAiClient.js +71 -8
- package/dist/services/audio/assemblyAiClient.js.map +1 -1
- package/dist/services/audio/deepgramClient.d.ts +2 -9
- package/dist/services/audio/deepgramClient.d.ts.map +1 -1
- package/dist/services/audio/deepgramClient.js +77 -10
- package/dist/services/audio/deepgramClient.js.map +1 -1
- package/dist/services/audio/groqAudioClient.d.ts +2 -4
- package/dist/services/audio/groqAudioClient.d.ts.map +1 -1
- package/dist/services/audio/groqAudioClient.js +28 -10
- package/dist/services/audio/groqAudioClient.js.map +1 -1
- package/dist/services/geminiClient.d.ts +3 -2
- package/dist/services/geminiClient.d.ts.map +1 -1
- package/dist/services/geminiClient.js +7 -2
- package/dist/services/geminiClient.js.map +1 -1
- package/dist/tools/extractFrames.d.ts.map +1 -1
- package/dist/tools/extractFrames.js +58 -24
- package/dist/tools/extractFrames.js.map +1 -1
- package/dist/tools/orchestrator.d.ts.map +1 -1
- package/dist/tools/orchestrator.js +6 -1
- package/dist/tools/orchestrator.js.map +1 -1
- package/dist/tools/schemas.d.ts +8 -8
- package/dist/tools/summarizeVideo.d.ts.map +1 -1
- package/dist/tools/summarizeVideo.js +25 -2
- package/dist/tools/summarizeVideo.js.map +1 -1
- package/dist/tools/transcribeVideo.d.ts.map +1 -1
- package/dist/tools/transcribeVideo.js +52 -13
- package/dist/tools/transcribeVideo.js.map +1 -1
- package/dist/types/transcript.d.ts +43 -0
- package/dist/types/transcript.d.ts.map +1 -0
- package/dist/types/transcript.js +9 -0
- package/dist/types/transcript.js.map +1 -0
- package/dist/utils/artifactCache.d.ts +229 -0
- package/dist/utils/artifactCache.d.ts.map +1 -0
- package/dist/utils/artifactCache.js +559 -0
- package/dist/utils/artifactCache.js.map +1 -0
- package/dist/utils/audioUtils.d.ts +30 -1
- package/dist/utils/audioUtils.d.ts.map +1 -1
- package/dist/utils/audioUtils.js +111 -13
- package/dist/utils/audioUtils.js.map +1 -1
- package/dist/utils/videoUtils.d.ts.map +1 -1
- package/dist/utils/videoUtils.js +9 -5
- package/dist/utils/videoUtils.js.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -11,7 +11,7 @@ An MCP server that gives coding assistants (GitHub Copilot, Cursor, Claude Code)
|
|
|
11
11
|
- 🖼️ **Frame Extraction** — Extract frames at specific timestamps or intervals
|
|
12
12
|
- 🔍 **Timestamp Search** — Find the exact moment when something happens in a video
|
|
13
13
|
- 📊 **Video Metadata** — Get duration, resolution, fps, codec, and other technical details
|
|
14
|
-
- 🎙️ **Audio Transcription** — Transcribe speech using Deepgram, AssemblyAI, Groq/Whisper, or Gemini
|
|
14
|
+
- 🎙️ **Audio Transcription** — Transcribe speech with paragraph-level timestamps (`[MM:SS]`) using Deepgram, AssemblyAI, Groq/Whisper, or Gemini
|
|
15
15
|
- 🔊 **Speaker Diarization** — Identify who said what (Deepgram and AssemblyAI)
|
|
16
16
|
- 🔊 **Audio-Enhanced Analysis** — Auto-transcribes audio and injects transcripts into AI prompts for richer results (GLM/Kimi only)
|
|
17
17
|
- 🔄 **Multi-Backend Support** — Choose between GLM-4.6V, Kimi K2.5, or Gemini
|
|
@@ -295,12 +295,12 @@ Set `AUDIO_MCP_DEFAULT_PROVIDER` to change the default.
|
|
|
295
295
|
|
|
296
296
|
### Caching
|
|
297
297
|
|
|
298
|
-
Downloaded videos are cached in a persistent directory
|
|
298
|
+
Downloaded videos, extracted frames, audio tracks, and transcripts are cached together in a persistent per-video directory. Subsequent tool calls that reference the same video reuse cached artifacts instead of re-running ffmpeg, re-downloading, or re-calling audio provider APIs.
|
|
299
299
|
|
|
300
|
-
| Variable | Description
|
|
301
|
-
| ----------------------------- |
|
|
302
|
-
| `VIDEO_MCP_CACHE_TTL_MINUTES` | How long
|
|
303
|
-
| `VIDEO_MCP_CACHE_MAX_ENTRIES` | Max entries in the
|
|
300
|
+
| Variable | Description | Default |
|
|
301
|
+
| ----------------------------- | ----------------------------------------------------------------------------------------------------------------------------- | ------- |
|
|
302
|
+
| `VIDEO_MCP_CACHE_TTL_MINUTES` | How long video artifacts (video file, frames, audio, transcripts) are cached across tool calls (minutes). Set `0` to disable. | `30` |
|
|
303
|
+
| `VIDEO_MCP_CACHE_MAX_ENTRIES` | Max entries in the artifact cache. LRU eviction at the video level. Set `0` for unbounded. | `100` |
|
|
304
304
|
|
|
305
305
|
#### Cache Storage Location
|
|
306
306
|
|
|
@@ -316,8 +316,8 @@ The cache is stored in a `video-mcp-cache` folder within your system's temporary
|
|
|
316
316
|
|
|
317
317
|
The server automatically manages the cache by:
|
|
318
318
|
|
|
319
|
-
1. **Startup Sweep:** Removing any
|
|
320
|
-
2. **LRU Eviction:**
|
|
319
|
+
1. **Startup Sweep:** Removing any cache entries older than the TTL at server startup.
|
|
320
|
+
2. **LRU Eviction:** Evicting the least-recently-used video entry (all its artifacts together) when the `VIDEO_MCP_CACHE_MAX_ENTRIES` limit is reached.
|
|
321
321
|
3. **Manual deletion:** You can safely delete the entire `video-mcp-cache` folder at any time to reclaim space.
|
|
322
322
|
|
|
323
323
|
### Video Summarization
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export declare const VERSION = "0.
|
|
1
|
+
export declare const VERSION = "0.33.0-beta";
|
|
2
2
|
//# sourceMappingURL=version.d.ts.map
|
package/dist/index.js
CHANGED
|
@@ -11,7 +11,7 @@ import { transcribeVideoTool } from './tools/transcribeVideo.js';
|
|
|
11
11
|
import { setLoggerServer } from './utils/logger.js';
|
|
12
12
|
import { initLicenseValidation } from './utils/license.js';
|
|
13
13
|
import { sweepStaleTempDirs } from './utils/tempFiles.js';
|
|
14
|
-
import {
|
|
14
|
+
import { sweepExpiredCacheDirs } from './utils/artifactCache.js';
|
|
15
15
|
import { triggerUpdateCheck, waitForUpdateCheck, wrapWithUpdateNotice, } from './utils/updateCheck.js';
|
|
16
16
|
import { VERSION } from './generated/version.js';
|
|
17
17
|
import { analyzeVideoSchema, summarizeVideoSchema, extractFramesSchema, searchTimestampSchema, getVideoInfoSchema, transcribeVideoSchema, } from './tools/schemas.js';
|
|
@@ -33,8 +33,8 @@ async function main() {
|
|
|
33
33
|
setLoggerServer(server);
|
|
34
34
|
// Clean up orphaned temp directories from previous runs that were killed mid-call
|
|
35
35
|
sweepStaleTempDirs().catch(() => { }); // fire-and-forget
|
|
36
|
-
// Remove expired
|
|
37
|
-
|
|
36
|
+
// Remove expired per-video artifact directories from the persistent cache
|
|
37
|
+
sweepExpiredCacheDirs().catch(() => { }); // fire-and-forget
|
|
38
38
|
// Validate the pro license (offline Ed25519 + online heartbeat if key is set)
|
|
39
39
|
await initLicenseValidation();
|
|
40
40
|
/**
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AAEA,OAAO,EAAE,SAAS,EAAE,MAAM,yCAAyC,CAAA;AACnE,OAAO,EAAE,oBAAoB,EAAE,MAAM,2CAA2C,CAAA;AAEhF,uBAAuB;AACvB,OAAO,EAAE,gBAAgB,EAAE,MAAM,yBAAyB,CAAA;AAC1D,OAAO,EAAE,kBAAkB,EAAE,MAAM,2BAA2B,CAAA;AAC9D,OAAO,EAAE,iBAAiB,EAAE,MAAM,0BAA0B,CAAA;AAC5D,OAAO,EAAE,mBAAmB,EAAE,MAAM,4BAA4B,CAAA;AAChE,OAAO,EAAE,gBAAgB,EAAE,MAAM,yBAAyB,CAAA;AAC1D,OAAO,EAAE,mBAAmB,EAAE,MAAM,4BAA4B,CAAA;AAChE,OAAO,EAAE,eAAe,EAAE,MAAM,mBAAmB,CAAA;AACnD,OAAO,EAAE,qBAAqB,EAAE,MAAM,oBAAoB,CAAA;AAC1D,OAAO,EAAE,kBAAkB,EAAE,MAAM,sBAAsB,CAAA;AACzD,OAAO,EAAE,
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AAEA,OAAO,EAAE,SAAS,EAAE,MAAM,yCAAyC,CAAA;AACnE,OAAO,EAAE,oBAAoB,EAAE,MAAM,2CAA2C,CAAA;AAEhF,uBAAuB;AACvB,OAAO,EAAE,gBAAgB,EAAE,MAAM,yBAAyB,CAAA;AAC1D,OAAO,EAAE,kBAAkB,EAAE,MAAM,2BAA2B,CAAA;AAC9D,OAAO,EAAE,iBAAiB,EAAE,MAAM,0BAA0B,CAAA;AAC5D,OAAO,EAAE,mBAAmB,EAAE,MAAM,4BAA4B,CAAA;AAChE,OAAO,EAAE,gBAAgB,EAAE,MAAM,yBAAyB,CAAA;AAC1D,OAAO,EAAE,mBAAmB,EAAE,MAAM,4BAA4B,CAAA;AAChE,OAAO,EAAE,eAAe,EAAE,MAAM,mBAAmB,CAAA;AACnD,OAAO,EAAE,qBAAqB,EAAE,MAAM,oBAAoB,CAAA;AAC1D,OAAO,EAAE,kBAAkB,EAAE,MAAM,sBAAsB,CAAA;AACzD,OAAO,EAAE,qBAAqB,EAAE,MAAM,0BAA0B,CAAA;AAChE,OAAO,EACL,kBAAkB,EAClB,kBAAkB,EAClB,oBAAoB,GACrB,MAAM,wBAAwB,CAAA;AAC/B,OAAO,EAAE,OAAO,EAAE,MAAM,wBAAwB,CAAA;AAEhD,OAAO,EACL,kBAAkB,EAClB,oBAAoB,EACpB,mBAAmB,EACnB,qBAAqB,EACrB,kBAAkB,EAClB,qBAAqB,GACtB,MAAM,oBAAoB,CAAA;AAE3B;;;GAGG;AAEH,KAAK,UAAU,IAAI;IACjB,0CAA0C;IAC1C,MAAM,MAAM,GAAG,IAAI,SAAS,CAC1B;QACE,IAAI,EAAE,WAAW;QACjB,OAAO,EAAE,OAAO;KACjB,EACD;QACE,YAAY,EAAE;YACZ,OAAO,EAAE,EAAE,EAAE,wCAAwC;SACtD;KACF,CACF,CAAA;IAED,8DAA8D;IAC9D,eAAe,CAAC,MAAM,CAAC,CAAA;IAEvB,kFAAkF;IAClF,kBAAkB,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAA,CAAC,kBAAkB;IAEvD,0EAA0E;IAC1E,qBAAqB,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAA,CAAC,kBAAkB;IAE1D,8EAA8E;IAC9E,MAAM,qBAAqB,EAAE,CAAA;IAE7B;;;;OAIG;IACH,MAAM,eAAe,GACnB,CAAI,OAA+C,EAAE,EAAE,CACvD,KAAK,EAAE,MAAS,EAA2B,EAAE;QAC3C,kBAAkB,EAAE,CAAA;QACpB,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,MAAM,CAAC,CAAA;QACpC,MAAM,kBAAkB,EAAE,CAAA;QAC1B,OAAO,oBAAoB,CAAC,MAAM,CAAC,CAAA;IACrC,CAAC,CAAA;IAEH,oCAAoC;IAEpC,4DAA4D;IAC5D,MAAM,CAAC,YAAY,CACjB,eAAe,EACf;QACE,KAAK,EAAE,eAAe;QACtB,WAAW,EACT,mGAAmG;QACrG,WAAW,EAAE,kBAAkB;KAChC,EACD,eAAe,CAAC,gBAAgB,CAAC,CAClC,CAAA;IAED,8DAA8D;IAC9D,MAAM,CAAC,YAAY,CACjB,iBAAiB,EACjB;QACE,KAAK,EAAE,iBAAiB;QACxB,WAAW,EACT,8MAA8M;QAChN,WAAW,EAAE,oBAAoB;KAClC,EACD,eAAe,CAAC,kBAAkB,CAAC,CACpC,CAAA;IAED,qDAAqD;IACrD,MAAM,CAAC,YAAY,CACjB,gBAAgB,EAChB;QACE,KAAK,EAAE,gBAAgB;QACvB,WAAW,EACT,iKAAiK;QACnK,WAAW,EAAE,mBAAmB;KACjC,EACD,eAAe,CAAC,iBAAiB,CAAC,CACnC,CAAA;IAED,kEAAkE;IAClE,MAAM,CAAC,YAAY,CACjB,kBAAkB,EAClB;QACE,KAAK,EAAE,kBAAkB;QACzB,WAAW,EACT,0LAA0L;QAC5L,WAAW,EAAE,qBAAqB;KACnC,EACD,eAAe,CAAC,mBAAmB,CAAC,CACrC,CAAA;IAED,8CAA8C;IAC9C,MAAM,CAAC,YAAY,CACjB,gBAAgB,EAChB;QACE,KAAK,EAAE,gBAAgB;QACvB,WAAW,EACT,sLAAsL;QACxL,WAAW,EAAE,kBAAkB;KAChC,EACD,eAAe,CAAC,gBAAgB,CAAC,CAClC,CAAA;IAED,2DAA2D;IAC3D,MAAM,CAAC,YAAY,CACjB,kBAAkB,EAClB;QACE,KAAK,EAAE,kBAAkB;QACzB,WAAW,EACT,kMAAkM;QACpM,WAAW,EAAE,qBAAqB;KACnC,EACD,eAAe,CAAC,mBAAmB,CAAC,CACrC,CAAA;IAED,yCAAyC;IACzC,MAAM,SAAS,GAAG,IAAI,oBAAoB,EAAE,CAAA;IAC5C,MAAM,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,CAAA;IAE/B,+DAA+D;AACjE,CAAC;AAED,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,KAAK,EAAE,EAAE;IACrB,OAAO,CAAC,KAAK,CAAC,wCAAwC,EAAE,KAAK,CAAC,CAAA;IAC9D,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;AACjB,CAAC,CAAC,CAAA"}
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import type { TranscriptData } from '../../types/transcript.js';
|
|
1
2
|
/**
|
|
2
3
|
* AssemblyAI Audio Client
|
|
3
4
|
* Transcribes audio files using AssemblyAI's Universal (best) model
|
|
@@ -7,14 +8,6 @@ export interface AssemblyAiTranscribeOptions {
|
|
|
7
8
|
language?: string;
|
|
8
9
|
diarize?: boolean;
|
|
9
10
|
}
|
|
10
|
-
export interface TranscriptSegment {
|
|
11
|
-
speaker?: string;
|
|
12
|
-
text: string;
|
|
13
|
-
}
|
|
14
|
-
export interface TranscriptResult {
|
|
15
|
-
text: string;
|
|
16
|
-
segments?: TranscriptSegment[];
|
|
17
|
-
}
|
|
18
11
|
export declare class AssemblyAiClient {
|
|
19
12
|
private client;
|
|
20
13
|
constructor(apiKey: string);
|
|
@@ -23,6 +16,6 @@ export declare class AssemblyAiClient {
|
|
|
23
16
|
* @param audioPath - Local path to the audio file (.m4a, .mp3, etc.)
|
|
24
17
|
* @param options - Transcription options
|
|
25
18
|
*/
|
|
26
|
-
transcribe(audioPath: string, options?: AssemblyAiTranscribeOptions): Promise<
|
|
19
|
+
transcribe(audioPath: string, options?: AssemblyAiTranscribeOptions): Promise<TranscriptData>;
|
|
27
20
|
}
|
|
28
21
|
//# sourceMappingURL=assemblyAiClient.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"assemblyAiClient.d.ts","sourceRoot":"","sources":["../../../src/services/audio/assemblyAiClient.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"assemblyAiClient.d.ts","sourceRoot":"","sources":["../../../src/services/audio/assemblyAiClient.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EACV,cAAc,EAGf,MAAM,2BAA2B,CAAA;AAElC;;;;GAIG;AAEH,MAAM,WAAW,2BAA2B;IAC1C,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,OAAO,CAAC,EAAE,OAAO,CAAA;CAClB;AAED,qBAAa,gBAAgB;IAC3B,OAAO,CAAC,MAAM,CAAY;gBAEd,MAAM,EAAE,MAAM;IAI1B;;;;OAIG;IACG,UAAU,CACd,SAAS,EAAE,MAAM,EACjB,OAAO,GAAE,2BAAgC,GACxC,OAAO,CAAC,cAAc,CAAC;CAmG3B"}
|
|
@@ -23,18 +23,81 @@ export class AssemblyAiClient {
|
|
|
23
23
|
if (!transcript.text) {
|
|
24
24
|
throw new Error('AssemblyAI returned an empty transcript');
|
|
25
25
|
}
|
|
26
|
-
|
|
26
|
+
let paragraphs;
|
|
27
|
+
let text;
|
|
28
|
+
// AssemblyAI timestamps are in milliseconds — divide by 1000 to get seconds
|
|
27
29
|
if (options.diarize &&
|
|
28
30
|
transcript.utterances &&
|
|
29
31
|
transcript.utterances.length > 0) {
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
32
|
+
// Diarized: use utterances which include speaker label + start/end
|
|
33
|
+
paragraphs = transcript.utterances.map((u) => {
|
|
34
|
+
const words = (u.words ?? []).map((w) => ({
|
|
35
|
+
word: w.text,
|
|
36
|
+
start: w.start / 1000,
|
|
37
|
+
end: w.end / 1000,
|
|
38
|
+
confidence: w.confidence,
|
|
39
|
+
speaker: `Speaker ${u.speaker}`,
|
|
40
|
+
}));
|
|
41
|
+
return {
|
|
42
|
+
text: u.text,
|
|
43
|
+
start: u.start / 1000,
|
|
44
|
+
end: u.end / 1000,
|
|
45
|
+
speaker: `Speaker ${u.speaker}`,
|
|
46
|
+
words,
|
|
47
|
+
};
|
|
48
|
+
});
|
|
49
|
+
text = paragraphs.map((p) => `${p.speaker}: ${p.text}`).join('\n');
|
|
36
50
|
}
|
|
37
|
-
|
|
51
|
+
else {
|
|
52
|
+
// Non-diarized: get paragraph-level groupings via the paragraphs endpoint
|
|
53
|
+
try {
|
|
54
|
+
const paragraphsResponse = await this.client.transcripts.paragraphs(transcript.id);
|
|
55
|
+
const aaiParagraphs = paragraphsResponse.paragraphs ?? [];
|
|
56
|
+
if (aaiParagraphs.length === 0)
|
|
57
|
+
throw new Error('No paragraphs');
|
|
58
|
+
paragraphs = aaiParagraphs.map((p) => {
|
|
59
|
+
const words = (p.words ?? []).map((w) => ({
|
|
60
|
+
word: w.text,
|
|
61
|
+
start: w.start / 1000,
|
|
62
|
+
end: w.end / 1000,
|
|
63
|
+
confidence: w.confidence,
|
|
64
|
+
}));
|
|
65
|
+
return {
|
|
66
|
+
text: p.text,
|
|
67
|
+
start: p.start / 1000,
|
|
68
|
+
end: p.end / 1000,
|
|
69
|
+
words,
|
|
70
|
+
};
|
|
71
|
+
});
|
|
72
|
+
text = paragraphs.map((p) => p.text).join('\n\n');
|
|
73
|
+
}
|
|
74
|
+
catch {
|
|
75
|
+
// Fallback: use flat words from the transcript
|
|
76
|
+
const transcriptWords = transcript.words ?? [];
|
|
77
|
+
const allWords = transcriptWords.map((w) => ({
|
|
78
|
+
word: w.text,
|
|
79
|
+
start: w.start / 1000,
|
|
80
|
+
end: w.end / 1000,
|
|
81
|
+
confidence: w.confidence,
|
|
82
|
+
}));
|
|
83
|
+
paragraphs = [
|
|
84
|
+
{
|
|
85
|
+
text: transcript.text,
|
|
86
|
+
start: allWords[0]?.start ?? null,
|
|
87
|
+
end: allWords[allWords.length - 1]?.end ?? null,
|
|
88
|
+
words: allWords,
|
|
89
|
+
},
|
|
90
|
+
];
|
|
91
|
+
text = transcript.text;
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
return {
|
|
95
|
+
text,
|
|
96
|
+
paragraphs,
|
|
97
|
+
provider: 'assemblyai',
|
|
98
|
+
language: options.language,
|
|
99
|
+
diarize: options.diarize,
|
|
100
|
+
};
|
|
38
101
|
}
|
|
39
102
|
}
|
|
40
103
|
//# sourceMappingURL=assemblyAiClient.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"assemblyAiClient.js","sourceRoot":"","sources":["../../../src/services/audio/assemblyAiClient.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,YAAY,CAAA;
|
|
1
|
+
{"version":3,"file":"assemblyAiClient.js","sourceRoot":"","sources":["../../../src/services/audio/assemblyAiClient.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,YAAY,CAAA;AAkBvC,MAAM,OAAO,gBAAgB;IACnB,MAAM,CAAY;IAE1B,YAAY,MAAc;QACxB,IAAI,CAAC,MAAM,GAAG,IAAI,UAAU,CAAC,EAAE,MAAM,EAAE,CAAC,CAAA;IAC1C,CAAC;IAED;;;;OAIG;IACH,KAAK,CAAC,UAAU,CACd,SAAiB,EACjB,UAAuC,EAAE;QAEzC,MAAM,MAAM,GAA2D;YACrE,KAAK,EAAE,SAAS;YAChB,aAAa,EAAE,CAAC,iBAAiB,CAAC;YAClC,aAAa,EAAE,OAAO,CAAC,QAAQ;YAC/B,cAAc,EAAE,OAAO,CAAC,OAAO,IAAI,KAAK;SACzC,CAAA;QAED,MAAM,UAAU,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC,UAAU,CAAC,MAAM,CAAC,CAAA;QAEnE,IAAI,UAAU,CAAC,MAAM,KAAK,OAAO,EAAE,CAAC;YAClC,MAAM,IAAI,KAAK,CAAC,mCAAmC,UAAU,CAAC,KAAK,EAAE,CAAC,CAAA;QACxE,CAAC;QAED,IAAI,CAAC,UAAU,CAAC,IAAI,EAAE,CAAC;YACrB,MAAM,IAAI,KAAK,CAAC,yCAAyC,CAAC,CAAA;QAC5D,CAAC;QAED,IAAI,UAAiC,CAAA;QACrC,IAAI,IAAY,CAAA;QAEhB,4EAA4E;QAC5E,IACE,OAAO,CAAC,OAAO;YACf,UAAU,CAAC,UAAU;YACrB,UAAU,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC,EAChC,CAAC;YACD,mEAAmE;YACnE,UAAU,GAAG,UAAU,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE;gBAC3C,MAAM,KAAK,GAAqB,CAAC,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;oBAC1D,IAAI,EAAE,CAAC,CAAC,IAAI;oBACZ,KAAK,EAAE,CAAC,CAAC,KAAK,GAAG,IAAI;oBACrB,GAAG,EAAE,CAAC,CAAC,GAAG,GAAG,IAAI;oBACjB,UAAU,EAAE,CAAC,CAAC,UAAU;oBACxB,OAAO,EAAE,WAAW,CAAC,CAAC,OAAO,EAAE;iBAChC,CAAC,CAAC,CAAA;gBACH,OAAO;oBACL,IAAI,EAAE,CAAC,CAAC,IAAI;oBACZ,KAAK,EAAE,CAAC,CAAC,KAAK,GAAG,IAAI;oBACrB,GAAG,EAAE,CAAC,CAAC,GAAG,GAAG,IAAI;oBACjB,OAAO,EAAE,WAAW,CAAC,CAAC,OAAO,EAAE;oBAC/B,KAAK;iBACN,CAAA;YACH,CAAC,CAAC,CAAA;YACF,IAAI,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,CAAC,OAAO,KAAK,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;QACpE,CAAC;aAAM,CAAC;YACN,0EAA0E;YAC1E,IAAI,CAAC;gBACH,MAAM,kBAAkB,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC,UAAU,CACjE,UAAU,CAAC,EAAE,CACd,CAAA;gBACD,MAAM,aAAa,GAAG,kBAAkB,CAAC,UAAU,IAAI,EAAE,CAAA;gBAEzD,IAAI,aAAa,CAAC,MAAM,KAAK,CAAC;oBAAE,MAAM,IAAI,KAAK,CAAC,eAAe,CAAC,CAAA;gBAEhE,UAAU,GAAG,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE;oBACnC,MAAM,KAAK,GAAqB,CAAC,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;wBAC1D,IAAI,EAAE,CAAC,CAAC,IAAI;wBACZ,KAAK,EAAE,CAAC,CAAC,KAAK,GAAG,IAAI;wBACrB,GAAG,EAAE,CAAC,CAAC,GAAG,GAAG,IAAI;wBACjB,UAAU,EAAE,CAAC,CAAC,UAAU;qBACzB,CAAC,CAAC,CAAA;oBACH,OAAO;wBACL,IAAI,EAAE,CAAC,CAAC,IAAI;wBACZ,KAAK,EAAE,CAAC,CAAC,KAAK,GAAG,IAAI;wBACrB,GAAG,EAAE,CAAC,CAAC,GAAG,GAAG,IAAI;wBACjB,KAAK;qBACN,CAAA;gBACH,CAAC,CAAC,CAAA;gBACF,IAAI,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;YACnD,CAAC;YAAC,MAAM,CAAC;gBACP,+CAA+C;gBAC/C,MAAM,eAAe,GAAG,UAAU,CAAC,KAAK,IAAI,EAAE,CAAA;gBAC9C,MAAM,QAAQ,GAAqB,eAAe,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;oBAC7D,IAAI,EAAE,CAAC,CAAC,IAAI;oBACZ,KAAK,EAAE,CAAC,CAAC,KAAK,GAAG,IAAI;oBACrB,GAAG,EAAE,CAAC,CAAC,GAAG,GAAG,IAAI;oBACjB,UAAU,EAAE,CAAC,CAAC,UAAU;iBACzB,CAAC,CAAC,CAAA;gBACH,UAAU,GAAG;oBACX;wBACE,IAAI,EAAE,UAAU,CAAC,IAAI;wBACrB,KAAK,EAAE,QAAQ,CAAC,CAAC,CAAC,EAAE,KAAK,IAAI,IAAI;wBACjC,GAAG,EAAE,QAAQ,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,EAAE,GAAG,IAAI,IAAI;wBAC/C,KAAK,EAAE,QAAQ;qBAChB;iBACF,CAAA;gBACD,IAAI,GAAG,UAAU,CAAC,IAAI,CAAA;YACxB,CAAC;QACH,CAAC;QAED,OAAO;YACL,IAAI;YACJ,UAAU;YACV,QAAQ,EAAE,YAAY;YACtB,QAAQ,EAAE,OAAO,CAAC,QAAQ;YAC1B,OAAO,EAAE,OAAO,CAAC,OAAO;SACzB,CAAA;IACH,CAAC;CACF"}
|
|
@@ -1,15 +1,8 @@
|
|
|
1
|
+
import type { TranscriptData } from '../../types/transcript.js';
|
|
1
2
|
export interface DeepgramTranscribeOptions {
|
|
2
3
|
language?: string;
|
|
3
4
|
diarize?: boolean;
|
|
4
5
|
}
|
|
5
|
-
export interface TranscriptSegment {
|
|
6
|
-
speaker?: string;
|
|
7
|
-
text: string;
|
|
8
|
-
}
|
|
9
|
-
export interface TranscriptResult {
|
|
10
|
-
text: string;
|
|
11
|
-
segments?: TranscriptSegment[];
|
|
12
|
-
}
|
|
13
6
|
export declare class DeepgramClient {
|
|
14
7
|
private apiKey;
|
|
15
8
|
constructor(apiKey: string);
|
|
@@ -18,6 +11,6 @@ export declare class DeepgramClient {
|
|
|
18
11
|
* @param audioPath - Local path to the audio file (.m4a, .mp3, etc.)
|
|
19
12
|
* @param options - Transcription options
|
|
20
13
|
*/
|
|
21
|
-
transcribe(audioPath: string, options?: DeepgramTranscribeOptions): Promise<
|
|
14
|
+
transcribe(audioPath: string, options?: DeepgramTranscribeOptions): Promise<TranscriptData>;
|
|
22
15
|
}
|
|
23
16
|
//# sourceMappingURL=deepgramClient.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"deepgramClient.d.ts","sourceRoot":"","sources":["../../../src/services/audio/deepgramClient.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"deepgramClient.d.ts","sourceRoot":"","sources":["../../../src/services/audio/deepgramClient.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EACV,cAAc,EAGf,MAAM,2BAA2B,CAAA;AASlC,MAAM,WAAW,yBAAyB;IACxC,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,OAAO,CAAC,EAAE,OAAO,CAAA;CAClB;AAED,qBAAa,cAAc;IACzB,OAAO,CAAC,MAAM,CAAQ;gBAEV,MAAM,EAAE,MAAM;IAI1B;;;;OAIG;IACG,UAAU,CACd,SAAS,EAAE,MAAM,EACjB,OAAO,GAAE,yBAA8B,GACtC,OAAO,CAAC,cAAc,CAAC;CA6G3B"}
|
|
@@ -25,6 +25,7 @@ export class DeepgramClient {
|
|
|
25
25
|
punctuate: true,
|
|
26
26
|
smart_format: true,
|
|
27
27
|
utterances: options.diarize ?? false,
|
|
28
|
+
paragraphs: true,
|
|
28
29
|
});
|
|
29
30
|
if (error) {
|
|
30
31
|
throw new Error(`Deepgram transcription error: ${error.message}`);
|
|
@@ -34,17 +35,83 @@ export class DeepgramClient {
|
|
|
34
35
|
if (!alternative) {
|
|
35
36
|
throw new Error('Deepgram returned no transcription results');
|
|
36
37
|
}
|
|
37
|
-
//
|
|
38
|
-
const
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
|
|
38
|
+
// Build flat word array with timing from alternative.words
|
|
39
|
+
const allWords = (alternative.words ?? []).map((w) => ({
|
|
40
|
+
word: w.punctuated_word ?? w.word ?? '',
|
|
41
|
+
start: w.start,
|
|
42
|
+
end: w.end,
|
|
43
|
+
confidence: w.confidence,
|
|
44
|
+
speaker: w.speaker != null ? `Speaker ${w.speaker}` : undefined,
|
|
45
|
+
}));
|
|
46
|
+
/** Returns words whose time range falls within [start, end]. */
|
|
47
|
+
function wordsForRange(start, end) {
|
|
48
|
+
return allWords.filter((w) => w.start >= start - 0.01 && w.end <= end + 0.01);
|
|
46
49
|
}
|
|
47
|
-
|
|
50
|
+
let paragraphs;
|
|
51
|
+
let text;
|
|
52
|
+
if (options.diarize) {
|
|
53
|
+
// Diarized: use utterances which include speaker label + start/end
|
|
54
|
+
const utterances = result?.results?.utterances ?? [];
|
|
55
|
+
if (utterances.length > 0) {
|
|
56
|
+
paragraphs = utterances.map((u) => ({
|
|
57
|
+
text: u.transcript,
|
|
58
|
+
start: u.start,
|
|
59
|
+
end: u.end,
|
|
60
|
+
speaker: `Speaker ${u.speaker}`,
|
|
61
|
+
words: wordsForRange(u.start, u.end),
|
|
62
|
+
}));
|
|
63
|
+
text = paragraphs.map((p) => `${p.speaker}: ${p.text}`).join('\n');
|
|
64
|
+
}
|
|
65
|
+
else {
|
|
66
|
+
// Fallback: single paragraph from flat transcript
|
|
67
|
+
paragraphs = [
|
|
68
|
+
{
|
|
69
|
+
text: alternative.transcript,
|
|
70
|
+
start: allWords[0]?.start ?? null,
|
|
71
|
+
end: allWords[allWords.length - 1]?.end ?? null,
|
|
72
|
+
words: allWords,
|
|
73
|
+
},
|
|
74
|
+
];
|
|
75
|
+
text = alternative.transcript;
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
else {
|
|
79
|
+
// Non-diarized: use Deepgram paragraphs API response when available
|
|
80
|
+
const dgParagraphs = alternative.paragraphs?.paragraphs ?? [];
|
|
81
|
+
if (dgParagraphs.length > 0) {
|
|
82
|
+
paragraphs = dgParagraphs.map((p) => {
|
|
83
|
+
const paragraphText = p.sentences?.map((s) => s.text).join(' ') ?? '';
|
|
84
|
+
return {
|
|
85
|
+
text: paragraphText,
|
|
86
|
+
start: p.start,
|
|
87
|
+
end: p.end,
|
|
88
|
+
words: wordsForRange(p.start, p.end),
|
|
89
|
+
};
|
|
90
|
+
});
|
|
91
|
+
text =
|
|
92
|
+
alternative.paragraphs?.transcript ??
|
|
93
|
+
paragraphs.map((p) => p.text).join('\n\n');
|
|
94
|
+
}
|
|
95
|
+
else {
|
|
96
|
+
// Fallback: single paragraph from flat transcript
|
|
97
|
+
paragraphs = [
|
|
98
|
+
{
|
|
99
|
+
text: alternative.transcript,
|
|
100
|
+
start: allWords[0]?.start ?? null,
|
|
101
|
+
end: allWords[allWords.length - 1]?.end ?? null,
|
|
102
|
+
words: allWords,
|
|
103
|
+
},
|
|
104
|
+
];
|
|
105
|
+
text = alternative.transcript;
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
return {
|
|
109
|
+
text,
|
|
110
|
+
paragraphs,
|
|
111
|
+
provider: 'deepgram',
|
|
112
|
+
language: options.language,
|
|
113
|
+
diarize: options.diarize,
|
|
114
|
+
};
|
|
48
115
|
}
|
|
49
116
|
}
|
|
50
117
|
//# sourceMappingURL=deepgramClient.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"deepgramClient.js","sourceRoot":"","sources":["../../../src/services/audio/deepgramClient.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,eAAe,CAAA;AAC5C,OAAO,KAAK,EAAE,MAAM,aAAa,CAAA;
|
|
1
|
+
{"version":3,"file":"deepgramClient.js","sourceRoot":"","sources":["../../../src/services/audio/deepgramClient.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,eAAe,CAAA;AAC5C,OAAO,KAAK,EAAE,MAAM,aAAa,CAAA;AAOjC;;;GAGG;AAEH,MAAM,cAAc,GAAG,QAAQ,CAAA;AAO/B,MAAM,OAAO,cAAc;IACjB,MAAM,CAAQ;IAEtB,YAAY,MAAc;QACxB,IAAI,CAAC,MAAM,GAAG,MAAM,CAAA;IACtB,CAAC;IAED;;;;OAIG;IACH,KAAK,CAAC,UAAU,CACd,SAAiB,EACjB,UAAqC,EAAE;QAEvC,MAAM,MAAM,GAAG,YAAY,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;QACxC,MAAM,WAAW,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAA;QAEhD,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE,GAAG,MAAM,MAAM,CAAC,MAAM,CAAC,WAAW,CAAC,cAAc,CACtE,WAAW,EACX;YACE,KAAK,EAAE,cAAc;YACrB,QAAQ,EAAE,OAAO,CAAC,QAAQ,IAAI,IAAI;YAClC,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,KAAK;YACjC,SAAS,EAAE,IAAI;YACf,YAAY,EAAE,IAAI;YAClB,UAAU,EAAE,OAAO,CAAC,OAAO,IAAI,KAAK;YACpC,UAAU,EAAE,IAAI;SACjB,CACF,CAAA;QAED,IAAI,KAAK,EAAE,CAAC;YACV,MAAM,IAAI,KAAK,CAAC,iCAAiC,KAAK,CAAC,OAAO,EAAE,CAAC,CAAA;QACnE,CAAC;QAED,MAAM,OAAO,GAAG,MAAM,EAAE,OAAO,EAAE,QAAQ,EAAE,CAAC,CAAC,CAAC,CAAA;QAC9C,MAAM,WAAW,GAAG,OAAO,EAAE,YAAY,EAAE,CAAC,CAAC,CAAC,CAAA;QAE9C,IAAI,CAAC,WAAW,EAAE,CAAC;YACjB,MAAM,IAAI,KAAK,CAAC,4CAA4C,CAAC,CAAA;QAC/D,CAAC;QAED,2DAA2D;QAC3D,MAAM,QAAQ,GAAqB,CAAC,WAAW,CAAC,KAAK,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;YACvE,IAAI,EAAE,CAAC,CAAC,eAAe,IAAI,CAAC,CAAC,IAAI,IAAI,EAAE;YACvC,KAAK,EAAE,CAAC,CAAC,KAAK;YACd,GAAG,EAAE,CAAC,CAAC,GAAG;YACV,UAAU,EAAE,CAAC,CAAC,UAAU;YACxB,OAAO,EAAE,CAAC,CAAC,OAAO,IAAI,IAAI,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,SAAS;SAChE,CAAC,CAAC,CAAA;QAEH,gEAAgE;QAChE,SAAS,aAAa,CAAC,KAAa,EAAE,GAAW;YAC/C,OAAO,QAAQ,CAAC,MAAM,CACpB,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,IAAI,KAAK,GAAG,IAAI,IAAI,CAAC,CAAC,GAAG,IAAI,GAAG,GAAG,IAAI,CACtD,CAAA;QACH,CAAC;QAED,IAAI,UAAiC,CAAA;QACrC,IAAI,IAAY,CAAA;QAEhB,IAAI,OAAO,CAAC,OAAO,EAAE,CAAC;YACpB,mEAAmE;YACnE,MAAM,UAAU,GAAG,MAAM,EAAE,OAAO,EAAE,UAAU,IAAI,EAAE,CAAA;YACpD,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC1B,UAAU,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;oBAClC,IAAI,EAAE,CAAC,CAAC,UAAU;oBAClB,KAAK,EAAE,CAAC,CAAC,KAAK;oBACd,GAAG,EAAE,CAAC,CAAC,GAAG;oBACV,OAAO,EAAE,WAAW,CAAC,CAAC,OAAO,EAAE;oBAC/B,KAAK,EAAE,aAAa,CAAC,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,GAAG,CAAC;iBACrC,CAAC,CAAC,CAAA;gBACH,IAAI,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,CAAC,OAAO,KAAK,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;YACpE,CAAC;iBAAM,CAAC;gBACN,kDAAkD;gBAClD,UAAU,GAAG;oBACX;wBACE,IAAI,EAAE,WAAW,CAAC,UAAU;wBAC5B,KAAK,EAAE,QAAQ,CAAC,CAAC,CAAC,EAAE,KAAK,IAAI,IAAI;wBACjC,GAAG,EAAE,QAAQ,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,EAAE,GAAG,IAAI,IAAI;wBAC/C,KAAK,EAAE,QAAQ;qBAChB;iBACF,CAAA;gBACD,IAAI,GAAG,WAAW,CAAC,UAAU,CAAA;YAC/B,CAAC;QACH,CAAC;aAAM,CAAC;YACN,oEAAoE;YACpE,MAAM,YAAY,GAAG,WAAW,CAAC,UAAU,EAAE,UAAU,IAAI,EAAE,CAAA;YAC7D,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC5B,UAAU,GAAG,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE;oBAClC,MAAM,aAAa,GAAG,CAAC,CAAC,SAAS,EAAE,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,CAAA;oBACrE,OAAO;wBACL,IAAI,EAAE,aAAa;wBACnB,KAAK,EAAE,CAAC,CAAC,KAAK;wBACd,GAAG,EAAE,CAAC,CAAC,GAAG;wBACV,KAAK,EAAE,aAAa,CAAC,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,GAAG,CAAC;qBACrC,CAAA;gBACH,CAAC,CAAC,CAAA;gBACF,IAAI;oBACF,WAAW,CAAC,UAAU,EAAE,UAAU;wBAClC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;YAC9C,CAAC;iBAAM,CAAC;gBACN,kDAAkD;gBAClD,UAAU,GAAG;oBACX;wBACE,IAAI,EAAE,WAAW,CAAC,UAAU;wBAC5B,KAAK,EAAE,QAAQ,CAAC,CAAC,CAAC,EAAE,KAAK,IAAI,IAAI;wBACjC,GAAG,EAAE,QAAQ,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,EAAE,GAAG,IAAI,IAAI;wBAC/C,KAAK,EAAE,QAAQ;qBAChB;iBACF,CAAA;gBACD,IAAI,GAAG,WAAW,CAAC,UAAU,CAAA;YAC/B,CAAC;QACH,CAAC;QAED,OAAO;YACL,IAAI;YACJ,UAAU;YACV,QAAQ,EAAE,UAAU;YACpB,QAAQ,EAAE,OAAO,CAAC,QAAQ;YAC1B,OAAO,EAAE,OAAO,CAAC,OAAO;SACzB,CAAA;IACH,CAAC;CACF"}
|
|
@@ -1,10 +1,8 @@
|
|
|
1
|
+
import type { TranscriptData } from '../../types/transcript.js';
|
|
1
2
|
export interface GroqTranscribeOptions {
|
|
2
3
|
language?: string;
|
|
3
4
|
translate?: boolean;
|
|
4
5
|
}
|
|
5
|
-
export interface TranscriptResult {
|
|
6
|
-
text: string;
|
|
7
|
-
}
|
|
8
6
|
export declare class GroqAudioClient {
|
|
9
7
|
private client;
|
|
10
8
|
constructor(apiKey: string);
|
|
@@ -13,6 +11,6 @@ export declare class GroqAudioClient {
|
|
|
13
11
|
* @param audioPath - Local path to the audio file (.m4a, .mp3, etc.)
|
|
14
12
|
* @param options - Transcription options
|
|
15
13
|
*/
|
|
16
|
-
transcribe(audioPath: string, options?: GroqTranscribeOptions): Promise<
|
|
14
|
+
transcribe(audioPath: string, options?: GroqTranscribeOptions): Promise<TranscriptData>;
|
|
17
15
|
}
|
|
18
16
|
//# sourceMappingURL=groqAudioClient.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"groqAudioClient.d.ts","sourceRoot":"","sources":["../../../src/services/audio/groqAudioClient.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"groqAudioClient.d.ts","sourceRoot":"","sources":["../../../src/services/audio/groqAudioClient.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EACV,cAAc,EAGf,MAAM,2BAA2B,CAAA;AAWlC,MAAM,WAAW,qBAAqB;IACpC,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,SAAS,CAAC,EAAE,OAAO,CAAA;CACpB;AAwCD,qBAAa,eAAe;IAC1B,OAAO,CAAC,MAAM,CAAQ;gBAEV,MAAM,EAAE,MAAM;IAO1B;;;;OAIG;IACG,UAAU,CACd,SAAS,EAAE,MAAM,EACjB,OAAO,GAAE,qBAA0B,GAClC,OAAO,CAAC,cAAc,CAAC;CAuB3B"}
|
|
@@ -7,6 +7,22 @@ import * as fs from 'fs';
|
|
|
7
7
|
*/
|
|
8
8
|
const GROQ_BASE_URL = 'https://api.groq.com/openai/v1';
|
|
9
9
|
const GROQ_MODEL = 'whisper-large-v3';
|
|
10
|
+
/** Builds a TranscriptData from a verbose transcription response. */
|
|
11
|
+
function buildFromVerbose(response, provider, opts) {
|
|
12
|
+
const segments = response.segments ?? [];
|
|
13
|
+
const paragraphs = segments.map((s) => {
|
|
14
|
+
const words = (s.words ?? []).map((w) => ({
|
|
15
|
+
word: w.word,
|
|
16
|
+
start: w.start,
|
|
17
|
+
end: w.end,
|
|
18
|
+
}));
|
|
19
|
+
return { text: s.text.trim(), start: s.start, end: s.end, words };
|
|
20
|
+
});
|
|
21
|
+
const text = paragraphs.length > 0
|
|
22
|
+
? paragraphs.map((p) => p.text).join(' ')
|
|
23
|
+
: response.text;
|
|
24
|
+
return { text, paragraphs, provider, ...opts };
|
|
25
|
+
}
|
|
10
26
|
export class GroqAudioClient {
|
|
11
27
|
client;
|
|
12
28
|
constructor(apiKey) {
|
|
@@ -21,23 +37,25 @@ export class GroqAudioClient {
|
|
|
21
37
|
* @param options - Transcription options
|
|
22
38
|
*/
|
|
23
39
|
async transcribe(audioPath, options = {}) {
|
|
24
|
-
const fileStream = fs.createReadStream(audioPath);
|
|
25
40
|
if (options.translate) {
|
|
26
|
-
// Use the translations endpoint (always outputs English)
|
|
27
|
-
|
|
41
|
+
// Use the translations endpoint (always outputs English).
|
|
42
|
+
// verbose_json gives us segment timestamps.
|
|
43
|
+
const fileStream = fs.createReadStream(audioPath);
|
|
44
|
+
const response = (await this.client.audio.translations.create({
|
|
28
45
|
file: fileStream,
|
|
29
46
|
model: GROQ_MODEL,
|
|
30
|
-
response_format: '
|
|
31
|
-
});
|
|
32
|
-
return {
|
|
47
|
+
response_format: 'verbose_json',
|
|
48
|
+
}));
|
|
49
|
+
return buildFromVerbose(response, 'groq', { translate: true });
|
|
33
50
|
}
|
|
34
|
-
const
|
|
51
|
+
const fileStream = fs.createReadStream(audioPath);
|
|
52
|
+
const response = (await this.client.audio.transcriptions.create({
|
|
35
53
|
file: fileStream,
|
|
36
54
|
model: GROQ_MODEL,
|
|
37
55
|
language: options.language,
|
|
38
|
-
response_format: '
|
|
39
|
-
});
|
|
40
|
-
return {
|
|
56
|
+
response_format: 'verbose_json',
|
|
57
|
+
}));
|
|
58
|
+
return buildFromVerbose(response, 'groq', { language: options.language });
|
|
41
59
|
}
|
|
42
60
|
}
|
|
43
61
|
//# sourceMappingURL=groqAudioClient.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"groqAudioClient.js","sourceRoot":"","sources":["../../../src/services/audio/groqAudioClient.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,QAAQ,CAAA;AAC3B,OAAO,KAAK,EAAE,MAAM,IAAI,CAAA;
|
|
1
|
+
{"version":3,"file":"groqAudioClient.js","sourceRoot":"","sources":["../../../src/services/audio/groqAudioClient.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,QAAQ,CAAA;AAC3B,OAAO,KAAK,EAAE,MAAM,IAAI,CAAA;AAOxB;;;;GAIG;AAEH,MAAM,aAAa,GAAG,gCAAgC,CAAA;AACtD,MAAM,UAAU,GAAG,kBAAkB,CAAA;AAuBrC,qEAAqE;AACrE,SAAS,gBAAgB,CACvB,QAA8B,EAC9B,QAAgB,EAChB,IAAgD;IAEhD,MAAM,QAAQ,GAAG,QAAQ,CAAC,QAAQ,IAAI,EAAE,CAAA;IACxC,MAAM,UAAU,GAA0B,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE;QAC3D,MAAM,KAAK,GAAqB,CAAC,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;YAC1D,IAAI,EAAE,CAAC,CAAC,IAAI;YACZ,KAAK,EAAE,CAAC,CAAC,KAAK;YACd,GAAG,EAAE,CAAC,CAAC,GAAG;SACX,CAAC,CAAC,CAAA;QACH,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,EAAE,EAAE,KAAK,EAAE,CAAC,CAAC,KAAK,EAAE,GAAG,EAAE,CAAC,CAAC,GAAG,EAAE,KAAK,EAAE,CAAA;IACnE,CAAC,CAAC,CAAA;IACF,MAAM,IAAI,GACR,UAAU,CAAC,MAAM,GAAG,CAAC;QACnB,CAAC,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC;QACzC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAA;IACnB,OAAO,EAAE,IAAI,EAAE,UAAU,EAAE,QAAQ,EAAE,GAAG,IAAI,EAAE,CAAA;AAChD,CAAC;AAED,MAAM,OAAO,eAAe;IAClB,MAAM,CAAQ;IAEtB,YAAY,MAAc;QACxB,IAAI,CAAC,MAAM,GAAG,IAAI,MAAM,CAAC;YACvB,MAAM;YACN,OAAO,EAAE,aAAa;SACvB,CAAC,CAAA;IACJ,CAAC;IAED;;;;OAIG;IACH,KAAK,CAAC,UAAU,CACd,SAAiB,EACjB,UAAiC,EAAE;QAEnC,IAAI,OAAO,CAAC,SAAS,EAAE,CAAC;YACtB,0DAA0D;YAC1D,4CAA4C;YAC5C,MAAM,UAAU,GAAG,EAAE,CAAC,gBAAgB,CAAC,SAAS,CAAC,CAAA;YACjD,MAAM,QAAQ,GAAG,CAAC,MAAM,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,YAAY,CAAC,MAAM,CAAC;gBAC5D,IAAI,EAAE,UAAU;gBAChB,KAAK,EAAE,UAAU;gBACjB,eAAe,EAAE,cAAwB;aAC1C,CAAC,CAAoC,CAAA;YACtC,OAAO,gBAAgB,CAAC,QAAQ,EAAE,MAAM,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAA;QAChE,CAAC;QAED,MAAM,UAAU,GAAG,EAAE,CAAC,gBAAgB,CAAC,SAAS,CAAC,CAAA;QACjD,MAAM,QAAQ,GAAG,CAAC,MAAM,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,cAAc,CAAC,MAAM,CAAC;YAC9D,IAAI,EAAE,UAAU;YAChB,KAAK,EAAE,UAAU;YACjB,QAAQ,EAAE,OAAO,CAAC,QAAQ;YAC1B,eAAe,EAAE,cAAwB;SAC1C,CAAC,CAAoC,CAAA;QAEtC,OAAO,gBAAgB,CAAC,QAAQ,EAAE,MAAM,EAAE,EAAE,QAAQ,EAAE,OAAO,CAAC,QAAQ,EAAE,CAAC,CAAA;IAC3E,CAAC;CACF"}
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { type File as GeminiFile } from '@google/genai';
|
|
2
|
+
import type { TranscriptData } from '../types/transcript.js';
|
|
2
3
|
export interface GeminiClientOptions {
|
|
3
4
|
apiKey: string;
|
|
4
5
|
}
|
|
@@ -42,9 +43,9 @@ export declare class GeminiClient {
|
|
|
42
43
|
* Transcribe audio from an uploaded file object
|
|
43
44
|
* Re-uses the same upload → wait → prompt cycle, with an audio-specific prompt.
|
|
44
45
|
* @param fileObject - The file object returned from uploadVideo() for an audio file
|
|
45
|
-
* @returns
|
|
46
|
+
* @returns Structured transcript data (Gemini provides no programmatic timestamps)
|
|
46
47
|
*/
|
|
47
|
-
transcribeAudio(fileObject: GeminiFile): Promise<
|
|
48
|
+
transcribeAudio(fileObject: GeminiFile): Promise<TranscriptData>;
|
|
48
49
|
/**
|
|
49
50
|
* Analyze multiple images (e.g., extracted frames)
|
|
50
51
|
*/
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"geminiClient.d.ts","sourceRoot":"","sources":["../../src/services/geminiClient.ts"],"names":[],"mappings":"AAAA,OAAO,EAAe,KAAK,IAAI,IAAI,UAAU,EAAE,MAAM,eAAe,CAAA;
|
|
1
|
+
{"version":3,"file":"geminiClient.d.ts","sourceRoot":"","sources":["../../src/services/geminiClient.ts"],"names":[],"mappings":"AAAA,OAAO,EAAe,KAAK,IAAI,IAAI,UAAU,EAAE,MAAM,eAAe,CAAA;AACpE,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,wBAAwB,CAAA;AAS5D,MAAM,WAAW,mBAAmB;IAClC,MAAM,EAAE,MAAM,CAAA;CACf;AAED,qBAAa,YAAY;IACvB,OAAO,CAAC,MAAM,CAAa;gBAEf,OAAO,EAAE,mBAAmB;IAMxC;;;;;OAKG;IACG,WAAW,CAAC,QAAQ,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,UAAU,CAAC;IAS3E;;;;OAIG;IACG,qBAAqB,CAAC,IAAI,EAAE,MAAM,GAAG,SAAS,GAAG,OAAO,CAAC,IAAI,CAAC;IAqBpE;;;;;OAKG;IACG,YAAY,CAChB,UAAU,EAAE,UAAU,EACtB,QAAQ,EAAE,MAAM,GACf,OAAO,CAAC,MAAM,CAAC;IAwBlB;;;;OAIG;IACG,cAAc,CAAC,UAAU,EAAE,UAAU,GAAG,OAAO,CAAC,MAAM,CAAC;IAU7D;;;;;OAKG;IACG,eAAe,CACnB,UAAU,EAAE,UAAU,EACtB,KAAK,EAAE,MAAM,GACZ,OAAO,CAAC,MAAM,CAAC;IAOlB;;;;;OAKG;IACG,eAAe,CAAC,UAAU,EAAE,UAAU,GAAG,OAAO,CAAC,cAAc,CAAC;IAYtE;;OAEG;IACG,aAAa,CACjB,MAAM,EAAE,KAAK,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAA;KAAE,CAAC,EACjD,MAAM,EAAE,MAAM,GACb,OAAO,CAAC,MAAM,CAAC;CAyBnB;AAED;;;;GAIG;AACH,wBAAgB,kBAAkB,IAAI,YAAY,GAAG,IAAI,CAMxD"}
|
|
@@ -104,12 +104,17 @@ Please be as precise as possible. If the event occurs multiple times, list all o
|
|
|
104
104
|
* Transcribe audio from an uploaded file object
|
|
105
105
|
* Re-uses the same upload → wait → prompt cycle, with an audio-specific prompt.
|
|
106
106
|
* @param fileObject - The file object returned from uploadVideo() for an audio file
|
|
107
|
-
* @returns
|
|
107
|
+
* @returns Structured transcript data (Gemini provides no programmatic timestamps)
|
|
108
108
|
*/
|
|
109
109
|
async transcribeAudio(fileObject) {
|
|
110
110
|
const prompt = 'Please provide a verbatim transcript of all spoken audio in this recording. ' +
|
|
111
111
|
'Format each sentence on its own line. Do not add any commentary or explanations — output only the transcript.';
|
|
112
|
-
|
|
112
|
+
const text = await this.analyzeVideo(fileObject, prompt);
|
|
113
|
+
return {
|
|
114
|
+
text,
|
|
115
|
+
paragraphs: [{ text, start: null, end: null }],
|
|
116
|
+
provider: 'gemini',
|
|
117
|
+
};
|
|
113
118
|
}
|
|
114
119
|
/**
|
|
115
120
|
* Analyze multiple images (e.g., extracted frames)
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"geminiClient.js","sourceRoot":"","sources":["../../src/services/geminiClient.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAA2B,MAAM,eAAe,CAAA;
|
|
1
|
+
{"version":3,"file":"geminiClient.js","sourceRoot":"","sources":["../../src/services/geminiClient.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAA2B,MAAM,eAAe,CAAA;AAGpE;;;GAGG;AAEH,MAAM,YAAY,GAAG,wBAAwB,CAAA;AAM7C,MAAM,OAAO,YAAY;IACf,MAAM,CAAa;IAE3B,YAAY,OAA4B;QACtC,IAAI,CAAC,MAAM,GAAG,IAAI,WAAW,CAAC;YAC5B,MAAM,EAAE,OAAO,CAAC,MAAM;SACvB,CAAC,CAAA;IACJ,CAAC;IAED;;;;;OAKG;IACH,KAAK,CAAC,WAAW,CAAC,QAAgB,EAAE,QAAiB;QACnD,MAAM,UAAU,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC;YAChD,IAAI,EAAE,QAAQ;YACd,GAAG,CAAC,QAAQ,IAAI,EAAE,MAAM,EAAE,EAAE,QAAQ,EAAE,EAAE,CAAC;SAC1C,CAAC,CAAA;QAEF,OAAO,UAAU,CAAA;IACnB,CAAC;IAED;;;;OAIG;IACH,KAAK,CAAC,qBAAqB,CAAC,IAAwB;QAClD,IAAI,CAAC,IAAI;YAAE,MAAM,IAAI,KAAK,CAAC,oCAAoC,CAAC,CAAA;QAChE,MAAM,iBAAiB,GAAG,GAAG,CAAA,CAAC,yBAAyB;QACvD,IAAI,IAAI,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE,IAAI,EAAE,CAAC,CAAA;QAChD,IAAI,QAAQ,GAAG,CAAC,CAAA;QAChB,OAAO,IAAI,CAAC,KAAK,KAAK,YAAY,EAAE,CAAC;YACnC,IAAI,QAAQ,IAAI,iBAAiB,EAAE,CAAC;gBAClC,MAAM,IAAI,KAAK,CACb,0CAA0C,iBAAiB,GAAG,CAAC,YAAY;oBACzE,2DAA2D,CAC9D,CAAA;YACH,CAAC;YACD,MAAM,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC,CAAA;YACzD,IAAI,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE,IAAI,EAAE,CAAC,CAAA;YAC5C,QAAQ,EAAE,CAAA;QACZ,CAAC;QACD,IAAI,IAAI,CAAC,KAAK,KAAK,QAAQ,EAAE,CAAC;YAC5B,MAAM,IAAI,KAAK,CAAC,wCAAwC,CAAC,CAAA;QAC3D,CAAC;IACH,CAAC;IAED;;;;;OAKG;IACH,KAAK,CAAC,YAAY,CAChB,UAAsB,EACtB,QAAgB;QAEhB,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,eAAe,CAAC;YACxD,KAAK,EAAE,YAAY;YACnB,QAAQ,EAAE;gBACR;oBACE,IAAI,EAAE,MAAM;oBACZ,KAAK,EAAE;wBACL;4BACE,QAAQ,EAAE;gCACR,OAAO,EAAE,UAAU,CAAC,GAAG;gCACvB,QAAQ,EAAE,UAAU,CAAC,QAAQ;6BAC9B;yBACF;wBACD;4BACE,IAAI,EAAE,QAAQ;yBACf;qBACF;iBACF;aACF;SACF,CAAC,CAAA;QAEF,OAAO,QAAQ,CAAC,IAAI,IAAI,EAAE,CAAA;IAC5B,CAAC;IAED;;;;OAIG;IACH,KAAK,CAAC,cAAc,CAAC,UAAsB;QACzC,MAAM,MAAM,GAAG;;;;0CAIuB,CAAA;QAEtC,OAAO,IAAI,CAAC,YAAY,CAAC,UAAU,EAAE,MAAM,CAAC,CAAA;IAC9C,CAAC;IAED;;;;;OAKG;IACH,KAAK,CAAC,eAAe,CACnB,UAAsB,EACtB,KAAa;QAEb,MAAM,MAAM,GAAG,yEAAyE,KAAK;sIACqC,CAAA;QAElI,OAAO,IAAI,CAAC,YAAY,CAAC,UAAU,EAAE,MAAM,CAAC,CAAA;IAC9C,CAAC;IAED;;;;;OAKG;IACH,KAAK,CAAC,eAAe,CAAC,UAAsB;QAC1C,MAAM,MAAM,GACV,8EAA8E;YAC9E,+GAA+G,CAAA;QACjH,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,YAAY,CAAC,UAAU,EAAE,MAAM,CAAC,CAAA;QACxD,OAAO;YACL,IAAI;YACJ,UAAU,EAAE,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,GAAG,EAAE,IAAI,EAAE,CAAC;YAC9C,QAAQ,EAAE,QAAQ;SACnB,CAAA;IACH,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,aAAa,CACjB,MAAiD,EACjD,MAAc;QAEd,MAAM,UAAU,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;YACxC,UAAU,EAAE;gBACV,IAAI,EAAE,KAAK,CAAC,IAAI;gBAChB,QAAQ,EAAE,KAAK,CAAC,QAAQ;aACzB;SACF,CAAC,CAAC,CAAA;QAEH,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,eAAe,CAAC;YACxD,KAAK,EAAE,YAAY;YACnB,QAAQ,EAAE;gBACR;oBACE,IAAI,EAAE,MAAM;oBACZ,KAAK,EAAE;wBACL,GAAG,UAAU;wBACb;4BACE,IAAI,EAAE,MAAM;yBACb;qBACF;iBACF;aACF;SACF,CAAC,CAAA;QAEF,OAAO,QAAQ,CAAC,IAAI,IAAI,EAAE,CAAA;IAC5B,CAAC;CACF;AAED;;;;GAIG;AACH,MAAM,UAAU,kBAAkB;IAChC,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,cAAc,CAAA;IACzC,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,OAAO,IAAI,CAAA;IACb,CAAC;IACD,OAAO,IAAI,YAAY,CAAC,EAAE,MAAM,EAAE,CAAC,CAAA;AACrC,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"extractFrames.d.ts","sourceRoot":"","sources":["../../src/tools/extractFrames.ts"],"names":[],"mappings":"AAAA;;;GAGG;
|
|
1
|
+
{"version":3,"file":"extractFrames.d.ts","sourceRoot":"","sources":["../../src/tools/extractFrames.ts"],"names":[],"mappings":"AAAA;;;GAGG;AA0BH,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,oCAAoC,CAAA;AACxE,OAAO,EAAE,mBAAmB,EAAE,MAAM,cAAc,CAAA;AAElD,KAAK,mBAAmB,GAAG,OAAO,KAAK,EAAE,KAAK,CAAC,OAAO,mBAAmB,CAAC,CAAA;AAE1E,eAAO,MAAM,iBAAiB,GAC5B,QAAQ,mBAAmB,KAC1B,OAAO,CAAC,cAAc,CA8HxB,CAAA"}
|