@mixio-pro/kalaasetu-mcp 1.0.1-beta → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -68,8 +68,47 @@ Add to your Cursor settings (`~/.cursor/config.json` or via Settings → MCP):
68
68
  "env": {
69
69
  "GEMINI_API_KEY": "your-gemini-api-key",
70
70
  "FAL_KEY": "your-fal-api-key",
71
- "PERPLEXITY_API_KEY": "your-perplexity-api-key",
72
- "GOOGLE_APPLICATION_CREDENTIALS": "/path/to/your/gcp-credentials.json"
71
+ "PERPLEXITY_API_KEY": "your-perplexity-api-key"
72
+ }
73
+ }
74
+ }
75
+ }
76
+ ```
77
+
78
+ ### OpenCode IDE
79
+
80
+ Add to your OpenCode MCP configuration:
81
+
82
+ ```json
83
+ {
84
+ "mcpServers": {
85
+ "kalaasetu": {
86
+ "command": "npx",
87
+ "args": ["@mixio-pro/kalaasetu-mcp@latest"],
88
+ "environment": {
89
+ "GEMINI_API_KEY": "your-gemini-api-key",
90
+ "FAL_KEY": "your-fal-api-key",
91
+ "PERPLEXITY_API_KEY": "your-perplexity-api-key"
92
+ }
93
+ }
94
+ }
95
+ }
96
+ ```
97
+
98
+ ### Claude Desktop
99
+
100
+ Add to your Claude Desktop configuration (`~/Library/Application Support/Claude/claude_desktop_config.json` on macOS):
101
+
102
+ ```json
103
+ {
104
+ "mcpServers": {
105
+ "kalaasetu": {
106
+ "command": "npx",
107
+ "args": ["@mixio-pro/kalaasetu-mcp@latest"],
108
+ "env": {
109
+ "GEMINI_API_KEY": "your-gemini-api-key",
110
+ "FAL_KEY": "your-fal-api-key",
111
+ "PERPLEXITY_API_KEY": "your-perplexity-api-key"
73
112
  }
74
113
  }
75
114
  }
@@ -124,10 +163,9 @@ Add to your Claude Desktop configuration (`~/Library/Application Support/Claude/
124
163
 
125
164
  | Variable | Description | Get API Key |
126
165
  |----------|-------------|-------------|
127
- | `GEMINI_API_KEY` | For Gemini image generation, TTS, and video analysis | [Google AI Studio](https://aistudio.google.com/app/apikey) |
166
+ | `GEMINI_API_KEY` | For Gemini image generation, TTS, video analysis, and Veo video generation | [Google AI Studio](https://aistudio.google.com/app/apikey) |
128
167
  | `FAL_KEY` | For Infinitalk and Hunyuan Avatar tools | [FAL AI](https://fal.ai/dashboard/keys) |
129
168
  | `PERPLEXITY_API_KEY` | For image and video search | [Perplexity API](https://www.perplexity.ai/settings/api) |
130
- | `GOOGLE_APPLICATION_CREDENTIALS` | For Vertex AI Image-to-Video (Veo) | [GCP Console](https://console.cloud.google.com/apis/credentials) |
131
169
 
132
170
  ### Setting Environment Variables
133
171
 
@@ -138,13 +176,11 @@ Add to your Claude Desktop configuration (`~/Library/Application Support/Claude/
138
176
  export GEMINI_API_KEY="your-gemini-api-key"
139
177
  export FAL_KEY="your-fal-api-key"
140
178
  export PERPLEXITY_API_KEY="your-perplexity-api-key"
141
- export GOOGLE_APPLICATION_CREDENTIALS="/path/to/your/gcp-credentials.json"
142
179
 
143
180
  # Windows (PowerShell)
144
181
  $env:GEMINI_API_KEY="your-gemini-api-key"
145
182
  $env:FAL_KEY="your-fal-api-key"
146
183
  $env:PERPLEXITY_API_KEY="your-perplexity-api-key"
147
- $env:GOOGLE_APPLICATION_CREDENTIALS="C:\path\to\your\gcp-credentials.json"
148
184
  ```
149
185
 
150
186
  #### For MCP Clients
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mixio-pro/kalaasetu-mcp",
3
- "version": "1.0.1-beta",
3
+ "version": "1.0.3",
4
4
  "description": "A powerful Model Context Protocol server providing AI tools for content generation and analysis",
5
5
  "type": "module",
6
6
  "module": "src/index.ts",
@@ -263,7 +263,7 @@ export const geminiSingleSpeakerTts = {
263
263
  description: "Generate single speaker voice audio from text using Gemini 2.5 Pro Preview TTS model",
264
264
  parameters: z.object({
265
265
  text: z.string().describe("Text to convert to speech"),
266
- voice_name: z.string().describe("Voice name from supported options (e.g., 'Kore', 'Zephyr', 'Puck', etc.)"),
266
+ voice_name: z.string().describe("Voice name from supported options. Use Kore, Erinome or Despina for the female voices and Enceladus for male."),
267
267
  output_path: z.string().optional().describe("Output WAV file path (optional, defaults to timestamp-based filename)"),
268
268
  }),
269
269
  execute: async (args: { text: string; voice_name: string; output_path?: string }) => {
@@ -276,7 +276,7 @@ export const geminiSingleSpeakerTts = {
276
276
  speechConfig: {
277
277
  voiceConfig: {
278
278
  prebuiltVoiceConfig: {
279
- voiceName: args.voice_name
279
+ voiceName: args.voice_name || 'Despina'
280
280
  },
281
281
  },
282
282
  },
@@ -1,49 +1,23 @@
1
1
  import { z } from "zod";
2
2
  import * as fs from "fs";
3
- import { GoogleAuth } from "google-auth-library";
4
- import { exec } from "child_process";
5
3
  import * as path from "path";
4
+ import { GoogleGenAI } from "@google/genai";
6
5
 
7
6
  async function wait(ms: number): Promise<void> {
8
7
  return new Promise((resolve) => setTimeout(resolve, ms));
9
8
  }
10
9
 
11
- async function fetchAccessToken(): Promise<string> {
12
- try {
13
- const auth = new GoogleAuth({ scopes: ["https://www.googleapis.com/auth/cloud-platform"] });
14
- const client = await auth.getClient();
15
- const token = await client.getAccessToken();
16
- if (!token || !token.token || typeof token.token !== "string") {
17
- throw new Error("No token from GoogleAuth");
18
- }
19
- return token.token;
20
- } catch (e: any) {
21
- // Fallback to gcloud
22
- return await new Promise((resolve, reject) => {
23
- exec("gcloud auth print-access-token", (err, stdout, stderr) => {
24
- if (err) {
25
- reject(new Error(`Failed to fetch an access token (ADC and gcloud): ${stderr || err.message}`));
26
- return;
27
- }
28
- const t = (stdout || "").trim();
29
- if (!t) {
30
- reject(new Error("Failed to fetch an access token: empty token from gcloud"));
31
- return;
32
- }
33
- resolve(t);
34
- });
35
- });
36
- }
37
- }
38
-
39
10
  function fileToBase64(filePath: string): { data: string; mimeType: string } {
40
11
  if (!fs.existsSync(filePath)) {
41
12
  throw new Error(`File not found: ${filePath}`);
42
13
  }
43
14
  const buf = fs.readFileSync(filePath);
44
15
  const data = Buffer.from(buf).toString("base64");
45
- // Default to PNG if not sure, similar to existing code
46
- const mimeType = "image/png";
16
+ // Detect mime type from extension
17
+ const ext = path.extname(filePath).toLowerCase();
18
+ const mimeType = ext === '.jpg' || ext === '.jpeg' ? 'image/jpeg' :
19
+ ext === '.png' ? 'image/png' :
20
+ ext === '.webp' ? 'image/webp' : 'image/png';
47
21
  return { data, mimeType };
48
22
  }
49
23
 
@@ -54,178 +28,134 @@ export const imageToVideo = {
54
28
  prompt: z.string().describe("Text description for the video"),
55
29
  image_path: z.string().optional().describe("Path to source image for image-to-video generation"),
56
30
  aspect_ratio: z.string().optional().describe("Video aspect ratio: '16:9' or '9:16' (default: '9:16')"),
57
- duration_seconds: z.string().optional().describe("Video duration in seconds: '4', '6', or '8' (default: '6')"),
31
+ duration_seconds: z.number().optional().describe("Video duration in seconds: 4, 6, or 8 (default: 6)"),
58
32
  resolution: z.string().optional().describe("Video resolution: '720p' or '1080p' (default: '720p')"),
59
33
  negative_prompt: z.string().optional().describe("Text describing what not to include in the video"),
60
34
  person_generation: z.string().optional().describe("Controls generation of people: 'allow_adult' (default for image-to-video) or 'allow_all'"),
61
35
  reference_images: z.array(z.string()).optional().describe("Additional image paths for reference (max 3)"),
62
36
  output_path: z.string().optional().describe("Output MP4 file path (if multiple predictions, index suffix is added)"),
63
- project_id: z.string().optional().describe("GCP Project ID (default: mixio-pro)"),
64
- location_id: z.string().optional().describe("Vertex region (default: us-central1)"),
65
- model_id: z.string().optional().describe("Model ID (default: veo-3.1-fast-generate-preview)"),
37
+ gemini_api_key: z.string().optional().describe("Gemini API key (uses GEMINI_API_KEY env var if not provided)"),
38
+ model_id: z.string().optional().describe("Model ID (default: veo-2.0-generate-001)"),
66
39
  }),
67
40
  execute: async (args: {
68
41
  prompt: string;
69
42
  image_path?: string;
70
43
  aspect_ratio?: string;
71
- duration_seconds?: string;
44
+ duration_seconds?: number;
72
45
  resolution?: string;
73
46
  negative_prompt?: string;
74
47
  person_generation?: string;
75
48
  reference_images?: string[];
76
49
  output_path?: string;
77
- project_id?: string;
78
- location_id?: string;
50
+ gemini_api_key?: string;
79
51
  model_id?: string;
80
52
  }) => {
81
- const projectId = args.project_id || "mixio-pro";
82
- const location = args.location_id || "us-central1";
83
- const modelId = args.model_id || "veo-3.1-fast-generate-preview";
84
-
85
- const token = await fetchAccessToken();
86
-
87
- const url = `https://${location}-aiplatform.googleapis.com/v1/projects/${projectId}/locations/${location}/publishers/google/models/${modelId}:predictLongRunning`;
88
-
89
- let imagePart: any = undefined;
90
- if (args.image_path) {
91
- const { data, mimeType } = fileToBase64(args.image_path);
92
- imagePart = {
93
- image: {
94
- bytesBase64Encoded: data,
95
- mimeType,
96
- },
97
- };
53
+ const apiKey = args.gemini_api_key || process.env.GEMINI_API_KEY;
54
+ if (!apiKey) {
55
+ throw new Error("Gemini API key is required. Set GEMINI_API_KEY environment variable or pass gemini_api_key parameter. Get one at https://aistudio.google.com/app/apikey");
98
56
  }
99
57
 
100
- let referenceImages: any[] | undefined = undefined;
101
- if (args.reference_images) {
102
- let refImages: string[];
103
- if (typeof args.reference_images === 'string') {
104
- const strValue = args.reference_images as string;
105
- if (strValue.startsWith("[") && strValue.endsWith("]")) {
106
- try {
107
- refImages = JSON.parse(strValue);
108
- } catch {
109
- throw new Error("Invalid reference_images format");
110
- }
111
- } else {
112
- refImages = [strValue];
113
- }
114
- } else if (Array.isArray(args.reference_images)) {
115
- refImages = args.reference_images;
116
- } else {
117
- throw new Error("Invalid reference_images: must be array or string");
118
- }
119
-
120
- if (refImages.length > 0) {
121
- referenceImages = refImages.slice(0, 3).map((p) => {
122
- const { data, mimeType } = fileToBase64(p);
123
- return {
124
- image: {
125
- bytesBase64Encoded: data,
126
- mimeType,
127
- },
128
- referenceType: "asset",
129
- };
130
- });
131
- }
58
+ const model = args.model_id || "veo-2.0-generate-001";
59
+
60
+ // Initialize Google GenAI client
61
+ const genai = new GoogleGenAI({ apiKey });
62
+
63
+ // Build config for video generation
64
+ const config: any = {};
65
+
66
+ if (args.duration_seconds !== undefined) {
67
+ config.duration_seconds = args.duration_seconds;
68
+ } else {
69
+ config.duration_seconds = 6; // default
70
+ }
71
+
72
+ if (args.aspect_ratio) {
73
+ config.aspect_ratio = args.aspect_ratio;
132
74
  }
133
75
 
134
- const personGeneration = args.person_generation || (args.image_path ? "allow_adult" : "allow_all");
135
-
136
- const instances: any[] = [
137
- {
76
+ try {
77
+ // Start video generation operation
78
+ console.log(`Starting video generation with model: ${model}`);
79
+ let operation = await genai.models.generateVideos({
80
+ model,
138
81
  prompt: args.prompt,
139
- ...(imagePart || {}),
140
- ...(referenceImages ? { referenceImages } : {}),
141
- },
142
- ];
143
-
144
- const parameters: any = {
145
- aspectRatio: args.aspect_ratio || "9:16",
146
- durationSeconds: parseInt(args.duration_seconds || "6"),
147
- resolution: args.resolution || "720p",
148
- negativePrompt: args.negative_prompt,
149
- generateAudio: false,
150
- personGeneration,
151
- };
152
-
153
- const res = await fetch(url, {
154
- method: "POST",
155
- headers: {
156
- Authorization: `Bearer ${token}`,
157
- "Content-Type": "application/json",
158
- },
159
- body: JSON.stringify({ instances, parameters }),
160
- });
82
+ config,
83
+ });
161
84
 
162
- if (!res.ok) {
163
- const text = await res.text();
164
- throw new Error(`Vertex request failed: ${res.status} ${text}`);
165
- }
85
+ console.log("Operation started, waiting for completion...");
86
+
87
+ // Poll until operation is complete (max 10 minutes)
88
+ let tries = 0;
89
+ const maxTries = 60; // 10 minutes with 10s intervals
90
+
91
+ while (!operation.done && tries < maxTries) {
92
+ await wait(10000); // Wait 10 seconds
93
+ tries++;
94
+ console.log(`Polling attempt ${tries}/${maxTries}...`);
95
+
96
+ operation = await genai.operations.getVideosOperation({
97
+ operation: operation,
98
+ });
99
+ }
166
100
 
167
- const op: any = await res.json();
168
- const name: string = (op as any).name || (op as any).operation || "";
169
- if (!name) {
170
- throw new Error("Vertex did not return an operation name for long-running request");
171
- }
101
+ if (!operation.done) {
102
+ throw new Error("Video generation timed out after 10 minutes");
103
+ }
172
104
 
173
- let current: any = op;
174
- let done = !!(op as any).done;
175
- let tries = 0;
105
+ console.log("Operation completed!");
106
+ console.log("Full Response:", JSON.stringify(operation.response, null, 2));
176
107
 
177
- // Poll using fetchPredictOperation as per Vertex recommendation
178
- const fetchUrl = `https://${location}-aiplatform.googleapis.com/v1/projects/${projectId}/locations/${location}/publishers/google/models/${modelId}:fetchPredictOperation`;
179
- while (!done && tries < 60) {
180
- await wait(10000);
181
- const poll = await fetch(fetchUrl, {
182
- method: "POST",
183
- headers: {
184
- Authorization: `Bearer ${token}`,
185
- "Content-Type": "application/json",
186
- },
187
- body: JSON.stringify({ operationName: name }),
188
- });
189
- if (!poll.ok) {
190
- const text = await poll.text();
191
- throw new Error(`Vertex operation poll failed: ${poll.status} ${text}`);
108
+ // Extract generated videos from response
109
+ const generatedVideos = operation.response?.generatedVideos || [];
110
+
111
+ if (!generatedVideos || generatedVideos.length === 0) {
112
+ const respStr = JSON.stringify(operation.response, null, 2);
113
+ return `Video generation completed but no videos found in response.\n\nFull Response:\n${respStr.slice(0, 2000)}${respStr.length > 2000 ? '\n...(truncated)' : ''}`;
192
114
  }
193
- current = await poll.json();
194
- done = !!(current as any).done || !!(current as any).response;
195
- tries++;
196
- }
197
115
 
198
- const resp = (current as any).response || current;
199
- // Decode from response.videos[].bytesBase64Encoded only
200
- const outputs: string[] = [];
201
- const saveVideo = (base64: string, index: number) => {
202
- if (!base64) return;
203
- const filePath = args.output_path
204
- ? (index === 0 ? args.output_path : args.output_path.replace(/\.mp4$/i, `_${index}.mp4`))
205
- : `video_output_${Date.now()}${index === 0 ? '' : '_' + index}.mp4`;
206
- const absPath = path.resolve(filePath);
207
- const buf = Buffer.from(base64, 'base64');
208
- fs.writeFileSync(absPath, buf);
209
- outputs.push(absPath);
210
- };
116
+ // Download and save videos
117
+ const outputs: string[] = [];
118
+
119
+ for (let i = 0; i < generatedVideos.length; i++) {
120
+ const generatedVideo = generatedVideos[i];
121
+ const videoUri = generatedVideo?.video?.uri;
122
+
123
+ if (!videoUri) {
124
+ console.warn(`Video ${i} has no URI`);
125
+ continue;
126
+ }
211
127
 
212
- if (Array.isArray(resp?.videos) && resp.videos.length > 0) {
213
- for (let i = 0; i < resp.videos.length; i++) {
214
- const v = resp.videos[i] || {};
215
- if (typeof v.bytesBase64Encoded === 'string') {
216
- saveVideo(v.bytesBase64Encoded, i);
128
+ console.log(`Downloading video ${i + 1}/${generatedVideos.length}...`);
129
+
130
+ // Download video from URI
131
+ const videoUrl = `${videoUri}&key=${apiKey}`;
132
+ const response = await fetch(videoUrl);
133
+
134
+ if (!response.ok) {
135
+ throw new Error(`Failed to download video: ${response.status} ${response.statusText}`);
217
136
  }
137
+
138
+ const buffer = await response.arrayBuffer();
139
+
140
+ // Save video to file
141
+ const filePath = args.output_path
142
+ ? (i === 0 ? args.output_path : args.output_path.replace(/\.mp4$/i, `_${i}.mp4`))
143
+ : `video_output_${Date.now()}${i === 0 ? '' : '_' + i}.mp4`;
144
+ const absPath = path.resolve(filePath);
145
+
146
+ fs.writeFileSync(absPath, Buffer.from(buffer));
147
+ outputs.push(absPath);
148
+ console.log(`Saved video to: ${absPath}`);
149
+ }
150
+
151
+ if (outputs.length > 0) {
152
+ return `Video(s) saved successfully:\n${outputs.map((p, i) => `${i + 1}. ${p}`).join('\n')}`;
218
153
  }
219
- }
220
- if (outputs.length > 0) {
221
- return `Video(s) saved: ${outputs.join(', ')}`;
222
- }
223
154
 
224
- // If nothing saved, return a concise summary plus head/tail snippets of JSON
225
- let jsonStr = "";
226
- try { jsonStr = JSON.stringify(resp); } catch {}
227
- const head150 = jsonStr ? jsonStr.slice(0, 150) : "";
228
- const tail50 = jsonStr ? jsonStr.slice(Math.max(0, jsonStr.length - 50)) : "";
229
- return `Vertex operation done but no videos array present. operationName=${name}. json_head150=${head150} json_tail50=${tail50}`;
155
+ return "Video generation completed but no videos were saved.";
156
+
157
+ } catch (error: any) {
158
+ throw new Error(`Video generation failed: ${error.message || JSON.stringify(error)}`);
159
+ }
230
160
  },
231
161
  };