npm - @goonnguyen/human-mcp - Versions diffs - 2.8.3 → 2.8.4 - Mend

@goonnguyen/human-mcp 2.8.3 → 2.8.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/dist/index.js +525 -0
package/package.json +1 -1

package/dist/index.js CHANGED Viewed

@@ -167915,6 +167915,263 @@ function estimateVideoSize(duration, aspectRatio) {
 // src/tools/hands/processors/image-editor.ts
 init_logger();
 init_image_loader();
+async function editImage(geminiClient, options, config) {
+  const startTime = Date.now();
+  try {
+    const processedInputImage = await processImageForEditing(options.inputImage);
+    const editingPrompt = buildEditingPrompt(options);
+    logger2.info(`Image editing operation: ${options.operation}`);
+    logger2.info(`Editing prompt: "${editingPrompt}"`);
+    const model = geminiClient.getImageGenerationModel();
+    const requestContent = await buildRequestContent(options, processedInputImage, editingPrompt);
+    const response = await model.generateContent(requestContent);
+    const result = response.response;
+    const candidates = result.candidates;
+    logger2.debug(`Gemini API response structure: ${JSON.stringify({
+      hasCandidates: !!candidates,
+      candidatesLength: candidates?.length,
+      firstCandidate: candidates?.[0] ? {
+        hasContent: !!candidates[0].content,
+        hasParts: !!candidates[0].content?.parts,
+        partsLength: candidates[0].content?.parts?.length
+      } : null
+    })}`);
+    if (!candidates || candidates.length === 0) {
+      logger2.error("No candidates in Gemini response. Full response:", JSON.stringify(result, null, 2));
+      throw new Error("No image candidates returned from Gemini API. This may indicate the API doesn't support image editing yet, or the request format is incorrect.");
+    }
+    const candidate = candidates[0];
+    if (!candidate || !candidate.content) {
+      logger2.error("Invalid candidate structure:", JSON.stringify(candidate, null, 2));
+      throw new Error("Invalid response format from Gemini API: missing candidate content");
+    }
+    if (!candidate.content.parts || candidate.content.parts.length === 0) {
+      logger2.error("No parts in candidate content:", JSON.stringify(candidate.content, null, 2));
+      throw new Error("Invalid response format from Gemini API: missing content parts. Note: Gemini image editing may not be available in the current API version.");
+    }
+    let imageData = null;
+    let mimeType = "image/jpeg";
+    logger2.debug(`Searching for image data in ${candidate.content.parts.length} parts`);
+    for (const part of candidate.content.parts) {
+      logger2.debug(`Part type: ${JSON.stringify(Object.keys(part))}`);
+      if ("inlineData" in part && part.inlineData) {
+        imageData = part.inlineData.data;
+        mimeType = part.inlineData.mimeType || "image/jpeg";
+        logger2.info(`Found image data: ${imageData.length} bytes, type: ${mimeType}`);
+        break;
+      }
+    }
+    if (!imageData) {
+      logger2.error("No image data found in response parts:", JSON.stringify(candidate.content.parts, null, 2));
+      throw new Error("No image data found in Gemini response. The API may have returned text instead of an edited image.");
+    }
+    const processingTime = Date.now() - startTime;
+    let resultData;
+    let format;
+    let filePath;
+    let fileName;
+    let fileUrl;
+    let fileSize;
+    const shouldSaveFile = options.saveToFile !== false;
+    const shouldUploadToR2 = options.uploadToR2 === true;
+    if (shouldSaveFile && config) {
+      try {
+        const savedFile = await saveBase64ToFile(imageData, mimeType, config, {
+          prefix: options.filePrefix || `edited-${options.operation}`,
+          directory: options.saveDirectory,
+          uploadToR2: shouldUploadToR2
+        });
+        filePath = savedFile.filePath;
+        fileName = savedFile.fileName;
+        fileUrl = savedFile.url;
+        fileSize = savedFile.size;
+        logger2.info(`Edited image saved to file: ${filePath}`);
+        if (options.outputFormat === "url") {
+          resultData = fileUrl || filePath || `data:${mimeType};base64,${imageData}`;
+          format = fileUrl ? "url" : "file_path";
+        } else {
+          resultData = `data:${mimeType};base64,${imageData}`;
+          format = "base64_data_uri";
+        }
+      } catch (error) {
+        logger2.warn(`Failed to save edited image file: ${error}. Falling back to base64 only.`);
+        resultData = `data:${mimeType};base64,${imageData}`;
+        format = "base64_data_uri";
+      }
+    } else {
+      if (options.outputFormat === "base64") {
+        resultData = `data:${mimeType};base64,${imageData}`;
+        format = "base64_data_uri";
+      } else {
+        resultData = `data:${mimeType};base64,${imageData}`;
+        format = "base64_data_uri";
+        logger2.warn("URL output format requested but file saving disabled. Returning base64 data URI");
+      }
+    }
+    return {
+      editedImageData: resultData,
+      format,
+      operation: options.operation,
+      processingTime,
+      originalSize: estimateImageSize2(processedInputImage.data),
+      editedSize: estimateImageSize2(imageData),
+      filePath,
+      fileName,
+      fileUrl,
+      fileSize,
+      metadata: {
+        prompt: options.prompt,
+        operation: options.operation,
+        strength: options.strength,
+        guidanceScale: options.guidanceScale,
+        seed: options.seed
+      }
+    };
+  } catch (error) {
+    const processingTime = Date.now() - startTime;
+    logger2.error(`Image editing failed after ${processingTime}ms:`, error);
+    if (error instanceof Error) {
+      if (error.message.includes("API key")) {
+        throw new Error("Invalid or missing Google AI API key. Please check your GOOGLE_GEMINI_API_KEY environment variable.");
+      }
+      if (error.message.includes("quota") || error.message.includes("rate limit")) {
+        throw new Error("API quota exceeded or rate limit reached. Please try again later.");
+      }
+      if (error.message.includes("safety") || error.message.includes("policy")) {
+        throw new Error("Image editing blocked due to safety policies. Please modify your request and try again.");
+      }
+      throw new Error(`Image editing failed: ${error.message}`);
+    }
+    throw new Error("Image editing failed due to an unexpected error");
+  }
+}
+async function processImageForEditing(inputImage) {
+  try {
+    const result = await loadImageForProcessing(inputImage, {
+      fetchTimeout: 30000,
+      maxWidth: 1024,
+      maxHeight: 1024,
+      quality: 85
+    });
+    return {
+      data: result.data,
+      mimeType: result.mimeType
+    };
+  } catch (error) {
+    throw new Error(`Failed to process input image: ${error instanceof Error ? error.message : error}`);
+  }
+}
+function buildEditingPrompt(options) {
+  let prompt = options.prompt;
+  switch (options.operation) {
+    case "inpaint":
+      if (options.maskPrompt) {
+        prompt = `Using the provided image, ${prompt}. Focus on ${options.maskPrompt}. Keep all other parts of the image unchanged.`;
+      } else {
+        prompt = `Using the provided image, ${prompt}. Ensure the changes blend naturally with the existing image style, lighting, and perspective.`;
+      }
+      break;
+    case "outpaint":
+      const direction = options.expandDirection || "all directions";
+      prompt = `Expand the provided image ${direction === "all" ? "in all directions" : `to the ${direction}`} and add: ${prompt}. Match the original image's style, lighting, and perspective. Seamlessly blend the new content with the existing image.`;
+      if (options.expansionRatio && options.expansionRatio !== 1.5) {
+        prompt += ` Expand by approximately ${Math.round(options.expansionRatio * 100)}%.`;
+      }
+      break;
+    case "style_transfer":
+      prompt = `Transform the provided image to have this style: ${prompt}. Maintain the original composition, objects, and structure while applying the new artistic style.`;
+      if (options.styleStrength) {
+        const strength = options.styleStrength > 0.8 ? "strongly" : options.styleStrength > 0.5 ? "moderately" : "subtly";
+        prompt += ` Apply the style ${strength}.`;
+      }
+      break;
+    case "object_manipulation":
+      if (options.targetObject) {
+        const action = options.manipulationType || "modify";
+        prompt = `In the provided image, ${action} the ${options.targetObject}: ${prompt}`;
+        if (options.targetPosition) {
+          prompt += `. Position: ${options.targetPosition}`;
+        }
+        prompt += `. Keep all other elements unchanged.`;
+      }
+      break;
+    case "multi_image_compose":
+      prompt = `Combine the provided images: ${prompt}`;
+      if (options.compositionLayout) {
+        prompt += `. Use a ${options.compositionLayout} layout`;
+      }
+      prompt += `. Ensure natural blending and consistent lighting across the composition.`;
+      break;
+  }
+  if (options.quality === "high") {
+    prompt += " Generate a high-quality result with fine details and professional finish.";
+  } else if (options.quality === "draft") {
+    prompt += " Provide a quick draft version.";
+  }
+  if (options.negativePrompt) {
+    prompt += ` Do not include: ${options.negativePrompt}.`;
+  }
+  return prompt;
+}
+async function buildRequestContent(options, processedInputImage, editingPrompt) {
+  const content = [
+    {
+      inlineData: {
+        data: processedInputImage.data,
+        mimeType: processedInputImage.mimeType
+      }
+    },
+    {
+      text: editingPrompt
+    }
+  ];
+  if (options.operation === "style_transfer" && options.styleImage) {
+    try {
+      const processedStyle = await processImageForEditing(options.styleImage);
+      content.push({
+        inlineData: {
+          data: processedStyle.data,
+          mimeType: processedStyle.mimeType
+        }
+      });
+    } catch (error) {
+      logger2.warn(`Failed to process style image: ${error}. Proceeding without style reference.`);
+    }
+  }
+  if (options.operation === "multi_image_compose" && options.secondaryImages) {
+    let imageCount = 1;
+    for (const secondaryImage of options.secondaryImages) {
+      if (imageCount >= 3) {
+        logger2.warn("Gemini supports up to 3 images. Skipping additional images.");
+        break;
+      }
+      try {
+        const processedSecondary = await processImageForEditing(secondaryImage);
+        content.push({
+          inlineData: {
+            data: processedSecondary.data,
+            mimeType: processedSecondary.mimeType
+          }
+        });
+        imageCount++;
+      } catch (error) {
+        logger2.warn(`Failed to process secondary image: ${error}. Skipping this image.`);
+      }
+    }
+  }
+  return content;
+}
+function estimateImageSize2(base64Data) {
+  const dataLength = base64Data.length;
+  const estimatedBytes = dataLength * 3 / 4;
+  if (estimatedBytes < 1e5) {
+    return "512x512";
+  } else if (estimatedBytes < 400000) {
+    return "1024x1024";
+  } else {
+    return "1024x1024+";
+  }
+}
 // src/tools/hands/index.ts
 init_logger();
@@ -168070,6 +168327,177 @@ async function registerHandsTool(server, config) {
       };
     }
   });
+  server.registerTool("gemini_edit_image", {
+    title: "Gemini Image Editing Tool",
+    description: "Edit images using AI with text-based instructions for inpainting, outpainting, style transfer, object manipulation, and composition. No masks required - just describe what you want to change.",
+    inputSchema: {
+      operation: exports_external.enum([
+        "inpaint",
+        "outpaint",
+        "style_transfer",
+        "object_manipulation",
+        "multi_image_compose"
+      ]).describe("Type of image editing operation to perform"),
+      input_image: exports_external.string().describe("Base64 encoded image or file path to the input image"),
+      prompt: exports_external.string().min(1, "Prompt cannot be empty").describe("Text description of the desired edit"),
+      mask_image: exports_external.string().optional().describe("Base64 encoded mask image for inpainting (white = edit area, black = keep)"),
+      mask_prompt: exports_external.string().optional().describe("Text description of the area to mask for editing"),
+      expand_direction: exports_external.enum(["all", "left", "right", "top", "bottom", "horizontal", "vertical"]).optional().describe("Direction to expand the image"),
+      expansion_ratio: exports_external.number().min(0.1).max(3).optional().default(1.5).describe("How much to expand the image (1.0 = no expansion)"),
+      style_image: exports_external.string().optional().describe("Base64 encoded reference image for style transfer"),
+      style_strength: exports_external.number().min(0.1).max(1).optional().default(0.7).describe("Strength of style application"),
+      target_object: exports_external.string().optional().describe("Description of the object to manipulate"),
+      manipulation_type: exports_external.enum(["move", "resize", "remove", "replace", "duplicate"]).optional().describe("Type of object manipulation"),
+      target_position: exports_external.string().optional().describe("New position for the object (e.g., 'center', 'top-left')"),
+      secondary_images: exports_external.array(exports_external.string()).optional().describe("Array of base64 encoded images for composition"),
+      composition_layout: exports_external.enum(["blend", "collage", "overlay", "side_by_side"]).optional().describe("How to combine multiple images"),
+      blend_mode: exports_external.enum(["normal", "multiply", "screen", "overlay", "soft_light"]).optional().describe("Blending mode for image composition"),
+      negative_prompt: exports_external.string().optional().describe("What to avoid in the edited image"),
+      strength: exports_external.number().min(0.1).max(1).optional().default(0.8).describe("Strength of the editing effect"),
+      guidance_scale: exports_external.number().min(1).max(20).optional().default(7.5).describe("How closely to follow the prompt"),
+      seed: exports_external.number().int().min(0).optional().describe("Random seed for reproducible results"),
+      output_format: exports_external.enum(["base64", "url"]).optional().default("base64").describe("Output format for the edited image"),
+      quality: exports_external.enum(["draft", "standard", "high"]).optional().default("standard").describe("Quality level of the editing")
+    }
+  }, async (args) => {
+    try {
+      return await handleImageEditing(geminiClient, args, config);
+    } catch (error) {
+      const mcpError = handleError(error);
+      logger2.error(`Tool gemini_edit_image error:`, mcpError);
+      return {
+        content: [{
+          type: "text",
+          text: `Error: ${mcpError.message}`
+        }],
+        isError: true
+      };
+    }
+  });
+  server.registerTool("gemini_inpaint_image", {
+    title: "Gemini Image Inpainting Tool",
+    description: "Add or modify specific areas of an image using natural language descriptions. No mask required - just describe what to change and where.",
+    inputSchema: {
+      input_image: exports_external.string().describe("Base64 encoded image or file path to the input image"),
+      prompt: exports_external.string().min(1, "Prompt cannot be empty").describe("Text description of what to add or change in the image"),
+      mask_image: exports_external.string().optional().describe("(Optional) Base64 encoded mask image - not used by Gemini but kept for compatibility"),
+      mask_prompt: exports_external.string().optional().describe("Text description of WHERE in the image to make changes (e.g., 'the empty space beside the cat', 'the top-left corner')"),
+      negative_prompt: exports_external.string().optional().describe("What to avoid in the edited area"),
+      strength: exports_external.number().min(0.1).max(1).optional().default(0.8).describe("Strength of the editing effect"),
+      guidance_scale: exports_external.number().min(1).max(20).optional().default(7.5).describe("How closely to follow the prompt"),
+      seed: exports_external.number().int().min(0).optional().describe("Random seed for reproducible results"),
+      output_format: exports_external.enum(["base64", "url"]).optional().default("base64").describe("Output format"),
+      quality: exports_external.enum(["draft", "standard", "high"]).optional().default("standard").describe("Quality level")
+    }
+  }, async (args) => {
+    try {
+      const inpaintArgs = { ...args, operation: "inpaint" };
+      return await handleImageEditing(geminiClient, inpaintArgs, config);
+    } catch (error) {
+      const mcpError = handleError(error);
+      logger2.error(`Tool gemini_inpaint_image error:`, mcpError);
+      return {
+        content: [{
+          type: "text",
+          text: `Error: ${mcpError.message}`
+        }],
+        isError: true
+      };
+    }
+  });
+  server.registerTool("gemini_outpaint_image", {
+    title: "Gemini Image Outpainting Tool",
+    description: "Expand an image beyond its original borders in specified directions",
+    inputSchema: {
+      input_image: exports_external.string().describe("Base64 encoded image or file path to the input image"),
+      prompt: exports_external.string().min(1, "Prompt cannot be empty").describe("Text description of what to add in the expanded areas"),
+      expand_direction: exports_external.enum(["all", "left", "right", "top", "bottom", "horizontal", "vertical"]).optional().default("all").describe("Direction to expand the image"),
+      expansion_ratio: exports_external.number().min(0.1).max(3).optional().default(1.5).describe("How much to expand the image (1.0 = no expansion)"),
+      negative_prompt: exports_external.string().optional().describe("What to avoid in the expanded areas"),
+      strength: exports_external.number().min(0.1).max(1).optional().default(0.8).describe("Strength of the editing effect"),
+      guidance_scale: exports_external.number().min(1).max(20).optional().default(7.5).describe("How closely to follow the prompt"),
+      seed: exports_external.number().int().min(0).optional().describe("Random seed for reproducible results"),
+      output_format: exports_external.enum(["base64", "url"]).optional().default("base64").describe("Output format"),
+      quality: exports_external.enum(["draft", "standard", "high"]).optional().default("standard").describe("Quality level")
+    }
+  }, async (args) => {
+    try {
+      const outpaintArgs = { ...args, operation: "outpaint" };
+      return await handleImageEditing(geminiClient, outpaintArgs, config);
+    } catch (error) {
+      const mcpError = handleError(error);
+      logger2.error(`Tool gemini_outpaint_image error:`, mcpError);
+      return {
+        content: [{
+          type: "text",
+          text: `Error: ${mcpError.message}`
+        }],
+        isError: true
+      };
+    }
+  });
+  server.registerTool("gemini_style_transfer_image", {
+    title: "Gemini Style Transfer Tool",
+    description: "Transfer the style from one image to another using AI",
+    inputSchema: {
+      input_image: exports_external.string().describe("Base64 encoded image or file path to the input image"),
+      prompt: exports_external.string().min(1, "Prompt cannot be empty").describe("Text description of the desired style"),
+      style_image: exports_external.string().optional().describe("Base64 encoded reference image for style transfer"),
+      style_strength: exports_external.number().min(0.1).max(1).optional().default(0.7).describe("Strength of style application"),
+      negative_prompt: exports_external.string().optional().describe("What style elements to avoid"),
+      guidance_scale: exports_external.number().min(1).max(20).optional().default(7.5).describe("How closely to follow the prompt"),
+      seed: exports_external.number().int().min(0).optional().describe("Random seed for reproducible results"),
+      output_format: exports_external.enum(["base64", "url"]).optional().default("base64").describe("Output format"),
+      quality: exports_external.enum(["draft", "standard", "high"]).optional().default("standard").describe("Quality level")
+    }
+  }, async (args) => {
+    try {
+      const styleArgs = { ...args, operation: "style_transfer" };
+      return await handleImageEditing(geminiClient, styleArgs, config);
+    } catch (error) {
+      const mcpError = handleError(error);
+      logger2.error(`Tool gemini_style_transfer_image error:`, mcpError);
+      return {
+        content: [{
+          type: "text",
+          text: `Error: ${mcpError.message}`
+        }],
+        isError: true
+      };
+    }
+  });
+  server.registerTool("gemini_compose_images", {
+    title: "Gemini Image Composition Tool",
+    description: "Combine multiple images into a single composition using AI",
+    inputSchema: {
+      input_image: exports_external.string().describe("Base64 encoded primary image"),
+      secondary_images: exports_external.array(exports_external.string()).describe("Array of base64 encoded secondary images to compose"),
+      prompt: exports_external.string().min(1, "Prompt cannot be empty").describe("Text description of how to compose the images"),
+      composition_layout: exports_external.enum(["blend", "collage", "overlay", "side_by_side"]).optional().default("blend").describe("How to combine the images"),
+      blend_mode: exports_external.enum(["normal", "multiply", "screen", "overlay", "soft_light"]).optional().default("normal").describe("Blending mode for image composition"),
+      negative_prompt: exports_external.string().optional().describe("What to avoid in the composition"),
+      strength: exports_external.number().min(0.1).max(1).optional().default(0.8).describe("Strength of the composition effect"),
+      guidance_scale: exports_external.number().min(1).max(20).optional().default(7.5).describe("How closely to follow the prompt"),
+      seed: exports_external.number().int().min(0).optional().describe("Random seed for reproducible results"),
+      output_format: exports_external.enum(["base64", "url"]).optional().default("base64").describe("Output format"),
+      quality: exports_external.enum(["draft", "standard", "high"]).optional().default("standard").describe("Quality level")
+    }
+  }, async (args) => {
+    try {
+      const composeArgs = { ...args, operation: "multi_image_compose" };
+      return await handleImageEditing(geminiClient, composeArgs, config);
+    } catch (error) {
+      const mcpError = handleError(error);
+      logger2.error(`Tool gemini_compose_images error:`, mcpError);
+      return {
+        content: [{
+          type: "text",
+          text: `Error: ${mcpError.message}`
+        }],
+        isError: true
+      };
+    }
+  });
 }
 async function handleImageGeneration(geminiClient, args, config) {
   const input = ImageGenerationInputSchema.parse(args);
@@ -168236,6 +168664,103 @@ async function handleImageToVideoGeneration(geminiClient, args, config) {
     isError: false
   };
 }
+async function handleImageEditing(geminiClient, args, config) {
+  const input = ImageEditingInputSchema.parse(args);
+  const {
+    operation,
+    input_image,
+    prompt,
+    mask_image,
+    mask_prompt,
+    expand_direction,
+    expansion_ratio,
+    style_image,
+    style_strength,
+    target_object,
+    manipulation_type,
+    target_position,
+    secondary_images,
+    composition_layout,
+    blend_mode,
+    negative_prompt,
+    strength,
+    guidance_scale,
+    seed,
+    output_format,
+    quality
+  } = input;
+  logger2.info(`Editing image with operation: "${operation}" and prompt: "${prompt}"`);
+  const editingOptions = {
+    operation,
+    inputImage: input_image,
+    prompt,
+    maskImage: mask_image,
+    maskPrompt: mask_prompt,
+    expandDirection: expand_direction,
+    expansionRatio: expansion_ratio || 1.5,
+    styleImage: style_image,
+    styleStrength: style_strength || 0.7,
+    targetObject: target_object,
+    manipulationType: manipulation_type,
+    targetPosition: target_position,
+    secondaryImages: secondary_images,
+    compositionLayout: composition_layout,
+    blendMode: blend_mode,
+    negativePrompt: negative_prompt,
+    strength: strength || 0.8,
+    guidanceScale: guidance_scale || 7.5,
+    seed,
+    outputFormat: output_format || "base64",
+    quality: quality || "standard",
+    fetchTimeout: config.server.fetchTimeout,
+    saveToFile: true,
+    uploadToR2: config.cloudflare?.accessKey ? true : false,
+    filePrefix: `edited-${operation}`
+  };
+  const result = await editImage(geminiClient, editingOptions, config);
+  let base64Data;
+  let mimeType;
+  if (result.editedImageData.startsWith("data:")) {
+    const matches = result.editedImageData.match(/data:([^;]+);base64,(.+)/);
+    if (matches && matches[1] && matches[2]) {
+      mimeType = matches[1];
+      base64Data = matches[2];
+    }
+  }
+  const contextText = `✅ Image edited successfully using ${operation} operation
+**Editing Details:**
+- Operation: ${operation}
+- Prompt: "${prompt}"
+- Format: ${result.format}
+- Original Size: ${result.originalSize}
+- Edited Size: ${result.editedSize}
+- Processing Time: ${result.processingTime}ms
+- Quality: ${quality}
+- Timestamp: ${new Date().toISOString()}${result.filePath ? `
+**File Information:**
+- File Path: ${result.filePath}
+- File Name: ${result.fileName}
+- File Size: ${result.fileSize} bytes` : ""}${result.fileUrl ? `
+- Public URL: ${result.fileUrl}` : ""}${result.metadata ? `
+**Operation Metadata:**
+- Strength: ${result.metadata.strength}
+- Guidance Scale: ${result.metadata.guidanceScale}
+- Seed: ${result.metadata.seed || "random"}` : ""}`;
+  const formattedResponse = formatMediaResponse({
+    url: result.fileUrl,
+    filePath: result.filePath,
+    base64: base64Data,
+    mimeType,
+    size: result.fileSize
+  }, config, contextText);
+  return {
+    content: formattedResponse,
+    isError: false
+  };
+}
 // src/tools/mouth/schemas.ts
 var VoiceNames = [

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@goonnguyen/human-mcp",
-  "version": "2.8.3",
+  "version": "2.8.4",
   "description": "Human MCP: Bringing Human Capabilities to Coding Agents",
   "type": "module",
   "main": "dist/index.js",