npm - @trieungoctam/vibekit - Versions diffs - 1.0.0 - Mend

@trieungoctam/vibekit 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (352) hide show

package/skills/ai/ai-multimodal/scripts/check_setup.py ADDED Viewed

@@ -0,0 +1,315 @@
+#!/usr/bin/env python3
+"""
+Validate ai-multimodal skill setup and configuration.
+Checks:
+- API key presence and format
+- Python dependencies
+- Centralized resolver availability
+- Directory structure
+"""
+import os
+import sys
+from pathlib import Path
+# Fix Windows cp1252 encoding: Unicode symbols (✓, ⚠, ✗) can't encode on Windows.
+# Reconfigure stdout to UTF-8 with replacement (Python 3.7+).
+if sys.stdout.encoding and sys.stdout.encoding.lower() != "utf-8":
+    if hasattr(sys.stdout, 'reconfigure'):
+        sys.stdout.reconfigure(encoding="utf-8", errors="replace")
+    if hasattr(sys.stderr, 'reconfigure'):
+        sys.stderr.reconfigure(encoding="utf-8", errors="replace")
+# Color codes for terminal output
+GREEN = '\033[92m'
+YELLOW = '\033[93m'
+RED = '\033[91m'
+BLUE = '\033[94m'
+RESET = '\033[0m'
+BOLD = '\033[1m'
+def print_header(text):
+    """Print section header."""
+    print(f"\n{BOLD}{BLUE}{'='*60}{RESET}")
+    print(f"{BOLD}{BLUE}{text}{RESET}")
+    print(f"{BOLD}{BLUE}{'='*60}{RESET}\n")
+def print_success(text):
+    """Print success message."""
+    print(f"{GREEN}✓ {text}{RESET}")
+def print_warning(text):
+    """Print warning message."""
+    print(f"{YELLOW}⚠ {text}{RESET}")
+def print_error(text):
+    """Print error message."""
+    print(f"{RED}✗ {text}{RESET}")
+def print_info(text):
+    """Print info message."""
+    print(f"{BLUE}ℹ {text}{RESET}")
+def check_dependencies():
+    """Check if required Python packages are installed."""
+    print_header("Checking Python Dependencies")
+    dependencies = {
+        'google.genai': 'google-genai',
+        'dotenv': 'python-dotenv',
+        'PIL': 'pillow'
+    }
+    missing = []
+    for module_name, package_name in dependencies.items():
+        try:
+            __import__(module_name)
+            print_success(f"{package_name} is installed")
+        except ImportError:
+            print_error(f"{package_name} is NOT installed")
+            missing.append(package_name)
+    if missing:
+        print_error("\nMissing dependencies detected!")
+        print_info(f"Install with: pip install {' '.join(missing)}")
+        return False
+    return True
+def check_centralized_resolver():
+    """Check if centralized resolver is available."""
+    print_header("Checking Centralized Resolver")
+    claude_root = Path(__file__).parent.parent.parent.parent
+    resolver_path = claude_root / 'scripts' / 'resolve_env.py'
+    if resolver_path.exists():
+        print_success(f"Centralized resolver found: {resolver_path}")
+        # Try to import it
+        sys.path.insert(0, str(resolver_path.parent))
+        try:
+            from resolve_env import resolve_env
+            print_success("Centralized resolver can be imported")
+            return True
+        except ImportError as e:
+            print_error(f"Centralized resolver exists but cannot be imported: {e}")
+            return False
+    else:
+        print_warning(f"Centralized resolver not found: {resolver_path}")
+        print_info("Skill will use fallback resolution logic")
+        return True  # Not critical, fallback works
+def find_api_key():
+    """Find and validate API key using centralized resolver."""
+    print_header("Checking API Key Configuration")
+    # Try to use centralized resolver
+    claude_root = Path(__file__).parent.parent.parent.parent
+    sys.path.insert(0, str(claude_root / 'scripts'))
+    try:
+        from resolve_env import resolve_env
+        print_info("Using centralized resolver...")
+        api_key = resolve_env('GEMINI_API_KEY', skill='ai-multimodal')
+        if api_key:
+            print_success("API key found via centralized resolver")
+            print_info(f"Key preview: {api_key[:20]}...{api_key[-4:]}")
+            # Show hierarchy
+            print_info("\nTo see where the key was found, run:")
+            print_info("python ~/.claude/scripts/resolve_env.py GEMINI_API_KEY --skill ai-multimodal --verbose")
+            return api_key
+        else:
+            print_error("API key not found in any location")
+            return None
+    except ImportError:
+        print_warning("Centralized resolver not available, using fallback")
+        # Fallback: check environment
+        api_key = os.getenv('GEMINI_API_KEY')
+        if api_key:
+            print_success("API key found in process.env")
+            print_info(f"Key preview: {api_key[:20]}...{api_key[-4:]}")
+            return api_key
+        else:
+            print_error("API key not found")
+            return None
+def validate_api_key_format(api_key):
+    """Basic validation of API key format."""
+    if not api_key:
+        return False
+    # Google AI Studio keys typically start with 'AIza'
+    if api_key.startswith('AIza'):
+        print_success("API key format looks valid (Google AI Studio)")
+        return True
+    elif len(api_key) > 20:
+        print_warning("API key format not recognized (may be Vertex AI or custom)")
+        return True
+    else:
+        print_error("API key format looks invalid (too short)")
+        return False
+def test_api_connection(api_key):
+    """Test API connection with a simple request."""
+    print_header("Testing API Connection")
+    try:
+        from google import genai
+        print_info("Initializing Gemini client...")
+        client = genai.Client(api_key=api_key)
+        print_info("Fetching available models...")
+        # List models to verify API key works
+        models = list(client.models.list())
+        print_success(f"API connection successful! Found {len(models)} available models")
+        # Show some available models
+        print_info("\nSample available models:")
+        for model in models[:5]:
+            print(f"  - {model.name}")
+        return True
+    except ImportError:
+        print_error("google-genai package not installed")
+        return False
+    except Exception as e:
+        print_error(f"API connection failed: {str(e)}")
+        return False
+def check_directory_structure():
+    """Verify skill directory structure."""
+    print_header("Checking Directory Structure")
+    script_dir = Path(__file__).parent
+    skill_dir = script_dir.parent
+    required_files = [
+        ('SKILL.md', skill_dir / 'SKILL.md'),
+        ('.env.example', skill_dir / '.env.example'),
+        ('gemini_batch_process.py', script_dir / 'gemini_batch_process.py'),
+    ]
+    all_exist = True
+    for name, path in required_files:
+        if path.exists():
+            print_success(f"{name} exists")
+        else:
+            print_error(f"{name} NOT found at {path}")
+            all_exist = False
+    return all_exist
+def provide_setup_instructions():
+    """Provide setup instructions if configuration is incomplete."""
+    print_header("Setup Instructions")
+    print_info("To configure the ai-multimodal skill:")
+    print("\n1. Get a Gemini API key:")
+    print("   → Visit: https://aistudio.google.com/apikey")
+    print("\n2. Configure the API key (choose one method):")
+    print(f"\n   Option A: User global config (recommended)")
+    print(f"   $ echo 'GEMINI_API_KEY=your-api-key-here' >> ~/.claude/.env")
+    script_dir = Path(__file__).parent
+    skill_dir = script_dir.parent
+    print(f"\n   Option B: Skill-specific config")
+    print(f"   $ cd {skill_dir}")
+    print(f"   $ cp .env.example .env")
+    print(f"   $ # Edit .env and add your API key")
+    print(f"\n   Option C: Runtime environment (temporary)")
+    print(f"   $ export GEMINI_API_KEY='your-api-key-here'")
+    print("\n3. Verify setup:")
+    print(f"   $ python {Path(__file__)}")
+    print("\n4. Debug if needed:")
+    print(f"   $ python ~/.claude/scripts/resolve_env.py --show-hierarchy --skill ai-multimodal")
+    print(f"   $ python ~/.claude/scripts/resolve_env.py GEMINI_API_KEY --skill ai-multimodal --verbose")
+def main():
+    """Run all setup checks."""
+    print(f"\n{BOLD}AI Multimodal Skill - Setup Checker{RESET}")
+    all_passed = True
+    # Check directory structure
+    if not check_directory_structure():
+        all_passed = False
+    # Check centralized resolver
+    check_centralized_resolver()
+    # Check dependencies
+    if not check_dependencies():
+        all_passed = False
+        provide_setup_instructions()
+        sys.exit(1)
+    # Check API key
+    api_key = find_api_key()
+    if not api_key:
+        print_error("\n❌ GEMINI_API_KEY not found in any location")
+        all_passed = False
+        provide_setup_instructions()
+        sys.exit(1)
+    # Validate API key format
+    if not validate_api_key_format(api_key):
+        all_passed = False
+    # Test API connection
+    if not test_api_connection(api_key):
+        all_passed = False
+    # Final summary
+    print_header("Setup Summary")
+    if all_passed:
+        print_success("✅ All checks passed! The ai-multimodal skill is ready to use.")
+        print_info("\nNext steps:")
+        print("  • Read SKILL.md for usage examples")
+        print("  • Try: python scripts/gemini_batch_process.py --help")
+        print("\nImage generation models:")
+        print("  • gemini-2.5-flash-image    - Nano Banana Flash (DEFAULT - fast)")
+        print("  • imagen-4.0-generate-001   - Imagen 4 (alternative - production)")
+        print("  • gemini-3-pro-image-preview - Nano Banana Pro (4K text, reasoning)")
+        print("\nExample (uses default model):")
+        print("  python scripts/gemini_batch_process.py --task generate \\")
+        print("    --prompt 'A sunset over mountains' --aspect-ratio 16:9 --size 2K")
+    else:
+        print_error("❌ Some checks failed. Please fix the issues above.")
+        sys.exit(1)
+if __name__ == '__main__':
+    main()

package/skills/ai/ai-multimodal/scripts/document_converter.py ADDED Viewed

@@ -0,0 +1,395 @@
+#!/usr/bin/env python3
+"""
+Convert documents to Markdown using Gemini API.
+Supports all document types:
+- PDF documents (native vision processing)
+- Images (JPEG, PNG, WEBP, HEIC)
+- Office documents (DOCX, XLSX, PPTX)
+- HTML, TXT, and other text formats
+Features:
+- Converts to clean markdown format
+- Preserves structure, tables, and formatting
+- Extracts text from images and scanned documents
+- Batch conversion support
+- Saves to docs/assets/document-extraction.md by default
+"""
+import argparse
+import os
+import sys
+import time
+from pathlib import Path
+from typing import Optional, List, Dict, Any
+try:
+    from google import genai
+    from google.genai import types
+except ImportError:
+    print("Error: google-genai package not installed")
+    print("Install with: pip install google-genai")
+    sys.exit(1)
+try:
+    from dotenv import load_dotenv
+except ImportError:
+    load_dotenv = None
+def find_api_key() -> Optional[str]:
+    """Find Gemini API key using correct priority order.
+    Priority order (highest to lowest):
+    1. process.env (runtime environment variables)
+    2. .claude/skills/ai-multimodal/.env (skill-specific config)
+    3. .claude/skills/.env (shared skills config)
+    4. .claude/.env (Claude global config)
+    """
+    # Priority 1: Already in process.env (highest)
+    api_key = os.getenv('GEMINI_API_KEY')
+    if api_key:
+        return api_key
+    # Load .env files if dotenv available
+    if load_dotenv:
+        # Determine base paths
+        script_dir = Path(__file__).parent
+        skill_dir = script_dir.parent  # .claude/skills/ai-multimodal
+        skills_dir = skill_dir.parent   # .claude/skills
+        claude_dir = skills_dir.parent  # .claude
+        # Priority 2: Skill-specific .env
+        env_file = skill_dir / '.env'
+        if env_file.exists():
+            load_dotenv(env_file)
+            api_key = os.getenv('GEMINI_API_KEY')
+            if api_key:
+                return api_key
+        # Priority 3: Shared skills .env
+        env_file = skills_dir / '.env'
+        if env_file.exists():
+            load_dotenv(env_file)
+            api_key = os.getenv('GEMINI_API_KEY')
+            if api_key:
+                return api_key
+        # Priority 4: Claude global .env
+        env_file = claude_dir / '.env'
+        if env_file.exists():
+            load_dotenv(env_file)
+            api_key = os.getenv('GEMINI_API_KEY')
+            if api_key:
+                return api_key
+    return None
+def find_project_root() -> Path:
+    """Find project root directory."""
+    script_dir = Path(__file__).parent
+    # Look for .git or .claude directory
+    for parent in [script_dir] + list(script_dir.parents):
+        if (parent / '.git').exists() or (parent / '.claude').exists():
+            return parent
+    return script_dir
+def get_mime_type(file_path: str) -> str:
+    """Determine MIME type from file extension."""
+    ext = Path(file_path).suffix.lower()
+    mime_types = {
+        # Documents
+        '.pdf': 'application/pdf',
+        '.txt': 'text/plain',
+        '.html': 'text/html',
+        '.htm': 'text/html',
+        '.md': 'text/markdown',
+        '.csv': 'text/csv',
+        # Images
+        '.jpg': 'image/jpeg',
+        '.jpeg': 'image/jpeg',
+        '.png': 'image/png',
+        '.webp': 'image/webp',
+        '.heic': 'image/heic',
+        '.heif': 'image/heif',
+        # Office (need to be uploaded as binary)
+        '.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
+        '.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
+        '.pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
+    }
+    return mime_types.get(ext, 'application/octet-stream')
+def upload_file(client: genai.Client, file_path: str, verbose: bool = False) -> Any:
+    """Upload file to Gemini File API."""
+    if verbose:
+        print(f"Uploading {file_path}...")
+    myfile = client.files.upload(file=file_path)
+    # Wait for processing if needed
+    max_wait = 300  # 5 minutes
+    elapsed = 0
+    while myfile.state.name == 'PROCESSING' and elapsed < max_wait:
+        time.sleep(2)
+        myfile = client.files.get(name=myfile.name)
+        elapsed += 2
+        if verbose and elapsed % 10 == 0:
+            print(f"  Processing... {elapsed}s")
+    if myfile.state.name == 'FAILED':
+        raise ValueError(f"File processing failed: {file_path}")
+    if myfile.state.name == 'PROCESSING':
+        raise TimeoutError(f"Processing timeout after {max_wait}s: {file_path}")
+    if verbose:
+        print(f"  Uploaded: {myfile.name}")
+    return myfile
+def convert_to_markdown(
+    client: genai.Client,
+    file_path: str,
+    model: str = 'gemini-2.5-flash',
+    custom_prompt: Optional[str] = None,
+    verbose: bool = False,
+    max_retries: int = 3
+) -> Dict[str, Any]:
+    """Convert a document to markdown using Gemini."""
+    for attempt in range(max_retries):
+        try:
+            file_path_obj = Path(file_path)
+            file_size = file_path_obj.stat().st_size
+            use_file_api = file_size > 20 * 1024 * 1024  # >20MB
+            # Default prompt for markdown conversion
+            if custom_prompt:
+                prompt = custom_prompt
+            else:
+                prompt = """Convert this document to clean, well-formatted Markdown.
+Requirements:
+- Preserve all content, structure, and formatting
+- Convert tables to markdown table format
+- Maintain heading hierarchy (# ## ### etc)
+- Preserve lists, code blocks, and quotes
+- Extract text from images if present
+- Keep formatting consistent and readable
+Output only the markdown content without any preamble or explanation."""
+            # Upload or inline the file
+            if use_file_api:
+                myfile = upload_file(client, str(file_path), verbose)
+                content = [prompt, myfile]
+            else:
+                with open(file_path, 'rb') as f:
+                    file_bytes = f.read()
+                mime_type = get_mime_type(str(file_path))
+                content = [
+                    prompt,
+                    types.Part.from_bytes(data=file_bytes, mime_type=mime_type)
+                ]
+            # Generate markdown
+            response = client.models.generate_content(
+                model=model,
+                contents=content
+            )
+            markdown_content = response.text if hasattr(response, 'text') else ''
+            return {
+                'file': str(file_path),
+                'status': 'success',
+                'markdown': markdown_content
+            }
+        except Exception as e:
+            if attempt == max_retries - 1:
+                return {
+                    'file': str(file_path),
+                    'status': 'error',
+                    'error': str(e),
+                    'markdown': None
+                }
+            wait_time = 2 ** attempt
+            if verbose:
+                print(f"  Retry {attempt + 1} after {wait_time}s: {e}")
+            time.sleep(wait_time)
+def batch_convert(
+    files: List[str],
+    output_file: Optional[str] = None,
+    auto_name: bool = False,
+    model: str = 'gemini-2.5-flash',
+    custom_prompt: Optional[str] = None,
+    verbose: bool = False
+) -> List[Dict[str, Any]]:
+    """Batch convert multiple files to markdown."""
+    api_key = find_api_key()
+    if not api_key:
+        print("Error: GEMINI_API_KEY not found")
+        print("Set via: export GEMINI_API_KEY='your-key'")
+        print("Or create .env file with: GEMINI_API_KEY=your-key")
+        sys.exit(1)
+    client = genai.Client(api_key=api_key)
+    results = []
+    # Determine output path
+    if not output_file:
+        project_root = find_project_root()
+        output_dir = project_root / 'docs' / 'assets'
+        if auto_name and len(files) == 1:
+            # Auto-generate meaningful filename from input
+            input_path = Path(files[0])
+            base_name = input_path.stem
+            output_file = str(output_dir / f"{base_name}-extraction.md")
+        else:
+            output_file = str(output_dir / 'document-extraction.md')
+    output_path = Path(output_file)
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    # Process each file
+    for i, file_path in enumerate(files, 1):
+        if verbose:
+            print(f"\n[{i}/{len(files)}] Converting: {file_path}")
+        result = convert_to_markdown(
+            client=client,
+            file_path=file_path,
+            model=model,
+            custom_prompt=custom_prompt,
+            verbose=verbose
+        )
+        results.append(result)
+        if verbose:
+            status = result.get('status', 'unknown')
+            print(f"  Status: {status}")
+    # Save combined markdown
+    with open(output_path, 'w', encoding='utf-8') as f:
+        f.write("# Document Extraction Results\n\n")
+        f.write(f"Converted {len(files)} document(s) to markdown.\n\n")
+        f.write("---\n\n")
+        for result in results:
+            f.write(f"## {Path(result['file']).name}\n\n")
+            if result['status'] == 'success' and result.get('markdown'):
+                f.write(result['markdown'])
+                f.write("\n\n")
+            elif result['status'] == 'success':
+                f.write("**Note**: Conversion succeeded but no content was returned.\n\n")
+            else:
+                f.write(f"**Error**: {result.get('error', 'Unknown error')}\n\n")
+            f.write("---\n\n")
+    if verbose or True:  # Always show output location
+        print(f"\n{'='*50}")
+        print(f"Converted: {len(results)} file(s)")
+        print(f"Success: {sum(1 for r in results if r['status'] == 'success')}")
+        print(f"Failed: {sum(1 for r in results if r['status'] == 'error')}")
+        print(f"Output saved to: {output_path}")
+    return results
+def main():
+    parser = argparse.ArgumentParser(
+        description='Convert documents to Markdown using Gemini API',
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  # Convert single PDF to markdown (default name)
+  %(prog)s --input document.pdf
+  # Auto-generate meaningful filename
+  %(prog)s --input testpdf.pdf --auto-name
+  # Output: docs/assets/testpdf-extraction.md
+  # Convert multiple files
+  %(prog)s --input doc1.pdf doc2.docx image.png
+  # Specify custom output location
+  %(prog)s --input document.pdf --output ./output.md
+  # Use custom prompt
+  %(prog)s --input document.pdf --prompt "Extract only the tables as markdown"
+  # Batch convert directory
+  %(prog)s --input ./documents/*.pdf --verbose
+Supported formats:
+  - PDF documents (up to 1,000 pages)
+  - Images (JPEG, PNG, WEBP, HEIC)
+  - Office documents (DOCX, XLSX, PPTX)
+  - Text formats (TXT, HTML, Markdown, CSV)
+Default output: <project-root>/docs/assets/document-extraction.md
+        """
+    )
+    parser.add_argument('--input', '-i', nargs='+', required=True,
+                       help='Input file(s) to convert')
+    parser.add_argument('--output', '-o',
+                       help='Output markdown file (default: docs/assets/document-extraction.md)')
+    parser.add_argument('--auto-name', '-a', action='store_true',
+                       help='Auto-generate meaningful output filename from input (e.g., document.pdf -> document-extraction.md)')
+    parser.add_argument('--model', default='gemini-2.5-flash',
+                       help='Gemini model to use (default: gemini-2.5-flash)')
+    parser.add_argument('--prompt', '-p',
+                       help='Custom prompt for conversion')
+    parser.add_argument('--verbose', '-v', action='store_true',
+                       help='Verbose output')
+    args = parser.parse_args()
+    # Validate input files
+    files = []
+    for file_pattern in args.input:
+        file_path = Path(file_pattern)
+        if file_path.exists() and file_path.is_file():
+            files.append(str(file_path))
+        else:
+            # Try glob pattern
+            import glob
+            matched = glob.glob(file_pattern)
+            files.extend([f for f in matched if Path(f).is_file()])
+    if not files:
+        print("Error: No valid input files found")
+        sys.exit(1)
+    # Convert files
+    batch_convert(
+        files=files,
+        output_file=args.output,
+        auto_name=args.auto_name,
+        model=args.model,
+        custom_prompt=args.prompt,
+        verbose=args.verbose
+    )
+if __name__ == '__main__':
+    main()