mcp-video-analyzer 0.2.3 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md
CHANGED
|
@@ -1,5 +1,11 @@
|
|
|
1
1
|
# mcp-video-analyzer
|
|
2
2
|
|
|
3
|
+
<a href="https://glama.ai/mcp/servers/guimatheus92/mcp-video-analyzer">
|
|
4
|
+
<img width="380" height="200" src="https://glama.ai/mcp/servers/guimatheus92/mcp-video-analyzer/badge" alt="mcp-video-analyzer MCP server" />
|
|
5
|
+
</a>
|
|
6
|
+
|
|
7
|
+
Featured in [awesome-mcp-servers](https://github.com/punkpeye/awesome-mcp-servers#-multimedia-process).
|
|
8
|
+
|
|
3
9
|
MCP server for video analysis — extracts transcripts, key frames, and metadata from video URLs. Supports Loom, direct video files (.mp4, .webm), and more.
|
|
4
10
|
|
|
5
11
|
No existing video MCP combines **transcripts + visual frames + metadata** in one tool. This one does.
|
|
@@ -10,4 +10,4 @@ export interface IOcrResult {
|
|
|
10
10
|
*
|
|
11
11
|
* Only includes results with meaningful text (confidence > 50%, text length > 3).
|
|
12
12
|
*/
|
|
13
|
-
export declare function extractTextFromFrames(frames: IFrameResult[]): Promise<IOcrResult[]>;
|
|
13
|
+
export declare function extractTextFromFrames(frames: IFrameResult[], language?: string): Promise<IOcrResult[]>;
|
|
@@ -4,11 +4,11 @@
|
|
|
4
4
|
*
|
|
5
5
|
* Only includes results with meaningful text (confidence > 50%, text length > 3).
|
|
6
6
|
*/
|
|
7
|
-
export async function extractTextFromFrames(frames) {
|
|
7
|
+
export async function extractTextFromFrames(frames, language = 'eng+por') {
|
|
8
8
|
const Tesseract = await loadTesseract();
|
|
9
9
|
if (!Tesseract)
|
|
10
10
|
return [];
|
|
11
|
-
const worker = await Tesseract.createWorker(
|
|
11
|
+
const worker = await Tesseract.createWorker(language);
|
|
12
12
|
try {
|
|
13
13
|
const results = [];
|
|
14
14
|
for (const frame of frames) {
|
package/dist/server.js
CHANGED
|
@@ -12,7 +12,7 @@ import { registerGetTranscript } from './tools/get-transcript.js';
|
|
|
12
12
|
export function createServer() {
|
|
13
13
|
const server = new FastMCP({
|
|
14
14
|
name: 'mcp-video-analyzer',
|
|
15
|
-
version: '0.2.
|
|
15
|
+
version: '0.2.4',
|
|
16
16
|
instructions: `Video analysis MCP server. Extracts transcripts, key frames, metadata, comments, OCR text, and annotated timelines from video URLs.
|
|
17
17
|
|
|
18
18
|
AUTOMATIC BEHAVIOR — Do NOT wait for the user to ask:
|
|
@@ -18,6 +18,10 @@ const AnalyzeMomentSchema = z.object({
|
|
|
18
18
|
.default(10)
|
|
19
19
|
.optional()
|
|
20
20
|
.describe('Number of frames to extract in the range (default: 10)'),
|
|
21
|
+
ocrLanguage: z
|
|
22
|
+
.string()
|
|
23
|
+
.optional()
|
|
24
|
+
.describe('Tesseract OCR language codes (default: "eng+por"). Use "+" to combine: "eng+spa", "eng+fra+deu".'),
|
|
21
25
|
});
|
|
22
26
|
export function registerAnalyzeMoment(server) {
|
|
23
27
|
server.addTool({
|
|
@@ -47,6 +51,7 @@ Requires video download capability for frame extraction.`,
|
|
|
47
51
|
execute: async (args, { reportProgress }) => {
|
|
48
52
|
const { url, from, to } = args;
|
|
49
53
|
const count = args.count ?? 10;
|
|
54
|
+
const ocrLanguage = args.ocrLanguage ?? 'eng+por';
|
|
50
55
|
// Validate timestamps
|
|
51
56
|
const fromSeconds = parseTimestamp(from);
|
|
52
57
|
const toSeconds = parseTimestamp(to);
|
|
@@ -103,7 +108,7 @@ Requires video download capability for frame extraction.`,
|
|
|
103
108
|
}
|
|
104
109
|
await reportProgress({ progress: 75, total: 100 });
|
|
105
110
|
// OCR
|
|
106
|
-
const ocrResults = await extractTextFromFrames(frames).catch((e) => {
|
|
111
|
+
const ocrResults = await extractTextFromFrames(frames, ocrLanguage).catch((e) => {
|
|
107
112
|
warnings.push(`OCR failed: ${e instanceof Error ? e.message : String(e)}`);
|
|
108
113
|
return [];
|
|
109
114
|
});
|
|
@@ -62,6 +62,10 @@ const AnalyzeOptionsSchema = z
|
|
|
62
62
|
.default(false)
|
|
63
63
|
.optional()
|
|
64
64
|
.describe('Bypass cache and re-analyze the video'),
|
|
65
|
+
ocrLanguage: z
|
|
66
|
+
.string()
|
|
67
|
+
.optional()
|
|
68
|
+
.describe('Tesseract OCR language codes (default: "eng+por"). Use "+" to combine: "eng+spa", "eng+fra+deu". See Tesseract docs for codes.'),
|
|
65
69
|
})
|
|
66
70
|
.optional();
|
|
67
71
|
const AnalyzeVideoSchema = z.object({
|
|
@@ -104,6 +108,7 @@ Use options.forceRefresh to bypass the cache.`,
|
|
|
104
108
|
const forceRefresh = options?.forceRefresh ?? false;
|
|
105
109
|
const fields = options?.fields;
|
|
106
110
|
const threshold = options?.threshold ?? 0.1;
|
|
111
|
+
const ocrLanguage = options?.ocrLanguage ?? 'eng+por';
|
|
107
112
|
// Resolve detail config
|
|
108
113
|
const config = getDetailConfig(detail);
|
|
109
114
|
const maxFrames = options?.maxFrames ?? config.maxFrames;
|
|
@@ -267,7 +272,7 @@ Use options.forceRefresh to bypass the cache.`,
|
|
|
267
272
|
await reportProgress({ progress: 85, total: 100 });
|
|
268
273
|
// OCR: extract text visible on screen
|
|
269
274
|
if (config.includeOcr) {
|
|
270
|
-
result.ocrResults = await extractTextFromFrames(result.frames).catch((e) => {
|
|
275
|
+
result.ocrResults = await extractTextFromFrames(result.frames, ocrLanguage).catch((e) => {
|
|
271
276
|
warnings.push(`OCR failed: ${e instanceof Error ? e.message : String(e)}`);
|
|
272
277
|
return [];
|
|
273
278
|
});
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "mcp-video-analyzer",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.4",
|
|
4
4
|
"description": "MCP server for video analysis — extracts transcripts, key frames, OCR text, and metadata from video URLs. Supports Loom and direct video files.",
|
|
5
5
|
"author": "guimatheus92",
|
|
6
6
|
"license": "MIT",
|