merleau 0.1.1__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- merleau-0.3.0/.github/workflows/python-publish.yml +65 -0
- merleau-0.3.0/.gitignore +7 -0
- merleau-0.3.0/CLAUDE.md +64 -0
- {merleau-0.1.1 → merleau-0.3.0}/PKG-INFO +3 -1
- merleau-0.3.0/README.md +127 -0
- merleau-0.3.0/analyze_video.py +56 -0
- merleau-0.3.0/merleau/__init__.py +3 -0
- merleau-0.3.0/merleau/cli.py +215 -0
- merleau-0.3.0/pyproject.toml +19 -0
- merleau-0.3.0/research/positioning_merleau.md +191 -0
- merleau-0.3.0/streamlit_app.py +178 -0
- merleau-0.3.0/uv.lock +1590 -0
- merleau-0.3.0/website/index.html +1048 -0
- merleau-0.1.1/README.md +0 -53
- merleau-0.1.1/merleau.egg-info/PKG-INFO +0 -7
- merleau-0.1.1/merleau.egg-info/SOURCES.txt +0 -7
- merleau-0.1.1/merleau.egg-info/dependency_links.txt +0 -1
- merleau-0.1.1/merleau.egg-info/top_level.txt +0 -1
- merleau-0.1.1/pyproject.toml +0 -9
- merleau-0.1.1/setup.cfg +0 -4
- /merleau-0.1.1/merleau.egg-info/requires.txt → /merleau-0.3.0/requirements.txt +0 -0
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
# This workflow will upload a Python Package to PyPI when a release is created
|
|
2
|
+
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries
|
|
3
|
+
|
|
4
|
+
# This workflow uses actions that are not certified by GitHub.
|
|
5
|
+
# They are provided by a third-party and are governed by
|
|
6
|
+
# separate terms of service, privacy policy, and support
|
|
7
|
+
# documentation.
|
|
8
|
+
|
|
9
|
+
name: Upload Python Package
|
|
10
|
+
|
|
11
|
+
on:
|
|
12
|
+
release:
|
|
13
|
+
types: [published]
|
|
14
|
+
|
|
15
|
+
permissions:
|
|
16
|
+
contents: read
|
|
17
|
+
|
|
18
|
+
jobs:
|
|
19
|
+
release-build:
|
|
20
|
+
runs-on: ubuntu-latest
|
|
21
|
+
|
|
22
|
+
steps:
|
|
23
|
+
- uses: actions/checkout@v4
|
|
24
|
+
|
|
25
|
+
- uses: actions/setup-python@v5
|
|
26
|
+
with:
|
|
27
|
+
python-version: "3.x"
|
|
28
|
+
|
|
29
|
+
- name: Build release distributions
|
|
30
|
+
run: |
|
|
31
|
+
# NOTE: put your own distribution build steps here.
|
|
32
|
+
python -m pip install build
|
|
33
|
+
python -m build
|
|
34
|
+
|
|
35
|
+
- name: Upload distributions
|
|
36
|
+
uses: actions/upload-artifact@v4
|
|
37
|
+
with:
|
|
38
|
+
name: release-dists
|
|
39
|
+
path: dist/
|
|
40
|
+
|
|
41
|
+
pypi-publish:
|
|
42
|
+
runs-on: ubuntu-latest
|
|
43
|
+
needs:
|
|
44
|
+
- release-build
|
|
45
|
+
permissions:
|
|
46
|
+
# IMPORTANT: this permission is mandatory for trusted publishing
|
|
47
|
+
id-token: write
|
|
48
|
+
|
|
49
|
+
# Dedicated environments with protections for publishing are strongly recommended.
|
|
50
|
+
# For more information, see: https://docs.github.com/en/actions/deployment/targeting-different-environments/using-environments-for-deployment#deployment-protection-rules
|
|
51
|
+
environment:
|
|
52
|
+
name: pypi
|
|
53
|
+
url: https://pypi.org/p/merleau
|
|
54
|
+
|
|
55
|
+
steps:
|
|
56
|
+
- name: Retrieve release distributions
|
|
57
|
+
uses: actions/download-artifact@v4
|
|
58
|
+
with:
|
|
59
|
+
name: release-dists
|
|
60
|
+
path: dist/
|
|
61
|
+
|
|
62
|
+
- name: Publish release distributions to PyPI
|
|
63
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
64
|
+
with:
|
|
65
|
+
packages-dir: dist/
|
merleau-0.3.0/.gitignore
ADDED
merleau-0.3.0/CLAUDE.md
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
# CLAUDE.md
|
|
2
|
+
|
|
3
|
+
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
|
4
|
+
|
|
5
|
+
## Project Overview
|
|
6
|
+
|
|
7
|
+
Merleau is a CLI tool for video understanding using Google's Gemini API. Named after Maurice Merleau-Ponty, the phenomenologist philosopher. The CLI command is `ponty`.
|
|
8
|
+
|
|
9
|
+
See `research/positioning_merleau.md` for market positioning and differentiation strategy.
|
|
10
|
+
|
|
11
|
+
## Commands
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
# Install dependencies
|
|
15
|
+
uv sync
|
|
16
|
+
uv sync --extra web # Include Streamlit
|
|
17
|
+
|
|
18
|
+
# Run the CLI
|
|
19
|
+
uv run ponty video.mp4
|
|
20
|
+
uv run ponty video.mp4 -p "Custom prompt" -m gemini-2.0-flash
|
|
21
|
+
|
|
22
|
+
# Run the web UI
|
|
23
|
+
uv run streamlit run streamlit_app.py
|
|
24
|
+
|
|
25
|
+
# Build package
|
|
26
|
+
uv build
|
|
27
|
+
|
|
28
|
+
# Publish to PyPI
|
|
29
|
+
uv publish --token <token>
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
## Architecture
|
|
33
|
+
|
|
34
|
+
```
|
|
35
|
+
merleau/
|
|
36
|
+
├── merleau/
|
|
37
|
+
│ ├── __init__.py # Package version
|
|
38
|
+
│ └── cli.py # CLI + core analyze_video() function
|
|
39
|
+
├── streamlit_app.py # Web UI (run with: streamlit run streamlit_app.py)
|
|
40
|
+
├── website/ # Landing page (GitHub Pages)
|
|
41
|
+
│ └── index.html # Single-page site
|
|
42
|
+
├── research/ # Market research and positioning
|
|
43
|
+
├── pyproject.toml # Package config with [project.scripts] entry point
|
|
44
|
+
└── analyze_video.py # Legacy standalone script
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
### CLI Flow (merleau/cli.py)
|
|
48
|
+
1. Parse arguments (video path, prompt, model, cost flag)
|
|
49
|
+
2. Load API key from environment or `.env`
|
|
50
|
+
3. Upload video to Gemini Files API
|
|
51
|
+
4. Poll for processing completion
|
|
52
|
+
5. Generate content analysis
|
|
53
|
+
6. Display results and optional cost breakdown
|
|
54
|
+
|
|
55
|
+
## Key Differentiators
|
|
56
|
+
|
|
57
|
+
- **Native Gemini video** - Only CLI with true video understanding (not frame extraction)
|
|
58
|
+
- **YouTube URL support** - Direct analysis via Gemini's preview feature
|
|
59
|
+
- **Cost transparency** - Token usage and pricing shown by default
|
|
60
|
+
|
|
61
|
+
## Configuration
|
|
62
|
+
|
|
63
|
+
- `.env` - Contains `GEMINI_API_KEY` (required)
|
|
64
|
+
- `pyproject.toml` - Package metadata, dependencies, and CLI entry point
|
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: merleau
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: Video analysis using Google's Gemini 2.5 Flash API
|
|
5
5
|
Requires-Python: >=3.10
|
|
6
6
|
Requires-Dist: google-genai
|
|
7
7
|
Requires-Dist: python-dotenv
|
|
8
|
+
Provides-Extra: web
|
|
9
|
+
Requires-Dist: streamlit>=1.30.0; extra == 'web'
|
merleau-0.3.0/README.md
ADDED
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
# Merleau
|
|
2
|
+
|
|
3
|
+
> *"The world is not what I think, but what I live through."*
|
|
4
|
+
> — Maurice Merleau-Ponty
|
|
5
|
+
|
|
6
|
+
A CLI tool for video understanding using Google's Gemini API. Named after [Maurice Merleau-Ponty](https://en.wikipedia.org/wiki/Maurice_Merleau-Ponty), the phenomenologist philosopher whose work on perception inspires how this tool helps you perceive your videos.
|
|
7
|
+
|
|
8
|
+
**[Website](https://yanndebray.github.io/merleau/)** · **[PyPI](https://pypi.org/project/merleau/)** · **[GitHub](https://github.com/yanndebray/merleau)**
|
|
9
|
+
|
|
10
|
+
https://github.com/user-attachments/assets/e2c5b476-ddab-49ab-a35c-9ae5e880c25c
|
|
11
|
+
|
|
12
|
+
## Why Merleau?
|
|
13
|
+
|
|
14
|
+
Google Gemini is the **only major AI provider** with native video understanding—Claude doesn't support video, and GPT-4o requires frame extraction workarounds. Merleau is the first CLI that actually understands video rather than analyzing frames.
|
|
15
|
+
|
|
16
|
+
## Features
|
|
17
|
+
|
|
18
|
+
- **Native Gemini video processing** - Upload and analyze videos directly
|
|
19
|
+
- **YouTube URL support** - Analyze videos directly from YouTube (free preview)
|
|
20
|
+
- **Customizable prompts** - Ask any question about your video
|
|
21
|
+
- **Cost estimation** - Token usage tracking and cost breakdown
|
|
22
|
+
- **Multiple models** - Support for different Gemini models
|
|
23
|
+
- **Web UI** - Streamlit app for browser-based analysis
|
|
24
|
+
|
|
25
|
+
## Installation
|
|
26
|
+
|
|
27
|
+
Using [uv](https://docs.astral.sh/uv/) (recommended):
|
|
28
|
+
```bash
|
|
29
|
+
uv sync
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
Or install from PyPI:
|
|
33
|
+
```bash
|
|
34
|
+
pip install merleau
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
## Configuration
|
|
38
|
+
|
|
39
|
+
1. Get a Gemini API key from [Google AI Studio](https://aistudio.google.com/apikey)
|
|
40
|
+
2. Set the API key as an environment variable or create a `.env` file:
|
|
41
|
+
```
|
|
42
|
+
GEMINI_API_KEY=your_api_key_here
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
## Usage
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
# Basic video analysis
|
|
49
|
+
ponty video.mp4
|
|
50
|
+
|
|
51
|
+
# Custom prompt
|
|
52
|
+
ponty video.mp4 -p "Summarize the key points in this video"
|
|
53
|
+
|
|
54
|
+
# Use a different model
|
|
55
|
+
ponty video.mp4 -m gemini-2.0-flash
|
|
56
|
+
|
|
57
|
+
# Hide cost information
|
|
58
|
+
ponty video.mp4 --no-cost
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
### Web UI
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
# Install with web dependencies
|
|
65
|
+
pip install merleau[web]
|
|
66
|
+
|
|
67
|
+
# Run the Streamlit app
|
|
68
|
+
streamlit run streamlit_app.py
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
### Options
|
|
72
|
+
|
|
73
|
+
| Option | Description |
|
|
74
|
+
|--------|-------------|
|
|
75
|
+
| `-p, --prompt` | Prompt for the analysis (default: "Explain what happens in this video") |
|
|
76
|
+
| `-m, --model` | Gemini model to use (default: gemini-2.5-flash) |
|
|
77
|
+
| `--no-cost` | Hide usage and cost information |
|
|
78
|
+
|
|
79
|
+
## Reducing Costs with Compression
|
|
80
|
+
|
|
81
|
+
Compressing videos before analysis can reduce API costs by ~10-15% without degrading analysis quality. Gemini's token count is affected by video resolution and bitrate.
|
|
82
|
+
|
|
83
|
+
### Quick Compression with ffmpeg
|
|
84
|
+
|
|
85
|
+
```bash
|
|
86
|
+
# Basic compression (recommended)
|
|
87
|
+
ffmpeg -i input.mp4 -vcodec libx264 -crf 28 -preset medium -vf "scale=1280:-2" output.mp4
|
|
88
|
+
|
|
89
|
+
# Aggressive compression (smaller file, lower quality)
|
|
90
|
+
ffmpeg -i input.mp4 -vcodec libx264 -crf 32 -preset medium -vf "scale=640:-2" output.mp4
|
|
91
|
+
|
|
92
|
+
# Keep audio (for speech analysis)
|
|
93
|
+
ffmpeg -i input.mp4 -vcodec libx264 -crf 28 -preset medium -vf "scale=1280:-2" -acodec aac -b:a 128k output.mp4
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
### Compression Options Explained
|
|
97
|
+
|
|
98
|
+
| Option | Description |
|
|
99
|
+
|--------|-------------|
|
|
100
|
+
| `-crf 28` | Quality level (18-28 recommended, higher = smaller file) |
|
|
101
|
+
| `-preset medium` | Encoding speed/quality tradeoff |
|
|
102
|
+
| `-vf "scale=1280:-2"` | Resize to 1280px width, maintain aspect ratio |
|
|
103
|
+
| `-an` | Remove audio (if not needed) |
|
|
104
|
+
| `-acodec aac -b:a 128k` | Compress audio to 128kbps AAC |
|
|
105
|
+
|
|
106
|
+
### Cost Comparison Example
|
|
107
|
+
|
|
108
|
+
| Version | File Size | Prompt Tokens | Input Cost |
|
|
109
|
+
|---------|-----------|---------------|------------|
|
|
110
|
+
| Original (1080p) | 52 MB | 14,757 | $0.00221 |
|
|
111
|
+
| Compressed (720p) | 2.6 MB | 13,157 | $0.00197 |
|
|
112
|
+
| **Savings** | **95%** | **10.8%** | **10.8%** |
|
|
113
|
+
|
|
114
|
+
## Output
|
|
115
|
+
|
|
116
|
+
The CLI provides:
|
|
117
|
+
- Video content analysis from Gemini
|
|
118
|
+
- Token usage breakdown (prompt, response, total)
|
|
119
|
+
- Estimated cost based on Gemini pricing
|
|
120
|
+
|
|
121
|
+
## Pricing Reference
|
|
122
|
+
|
|
123
|
+
Gemini 2.5 Flash (as of 2025):
|
|
124
|
+
- Input: $0.15 per 1M tokens (text/image), $0.075 per 1M tokens (video)
|
|
125
|
+
- Output: $0.60 per 1M tokens, $3.50 for thinking tokens
|
|
126
|
+
|
|
127
|
+
A 1-hour video costs approximately **$0.11-0.32** to analyze.
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import time
|
|
3
|
+
from dotenv import load_dotenv
|
|
4
|
+
from google import genai
|
|
5
|
+
|
|
6
|
+
# Load environment variables from .env file
|
|
7
|
+
load_dotenv()
|
|
8
|
+
|
|
9
|
+
# Initialize the client with the API key
|
|
10
|
+
client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
|
|
11
|
+
|
|
12
|
+
# Upload the video file
|
|
13
|
+
video_path = "MATLAB_Modernizer.mp4"
|
|
14
|
+
print(f"Uploading video: {video_path}")
|
|
15
|
+
myfile = client.files.upload(file=video_path)
|
|
16
|
+
print(f"Upload complete. File URI: {myfile.uri}")
|
|
17
|
+
|
|
18
|
+
# Wait for the file to be processed (become ACTIVE)
|
|
19
|
+
print("Waiting for file to be processed...")
|
|
20
|
+
while myfile.state.name == "PROCESSING":
|
|
21
|
+
print(".", end="", flush=True)
|
|
22
|
+
time.sleep(2)
|
|
23
|
+
myfile = client.files.get(name=myfile.name)
|
|
24
|
+
|
|
25
|
+
if myfile.state.name == "FAILED":
|
|
26
|
+
raise ValueError(f"File processing failed: {myfile.state.name}")
|
|
27
|
+
|
|
28
|
+
print(f"\nFile state: {myfile.state.name}")
|
|
29
|
+
|
|
30
|
+
# Generate content using Gemini 2.5 Flash
|
|
31
|
+
print("\nAnalyzing video with Gemini 2.5 Flash...")
|
|
32
|
+
response = client.models.generate_content(
|
|
33
|
+
model="gemini-2.5-flash",
|
|
34
|
+
contents=[myfile, "Explain what happens in this video"]
|
|
35
|
+
)
|
|
36
|
+
print("\n--- Video Analysis ---")
|
|
37
|
+
print(response.text)
|
|
38
|
+
|
|
39
|
+
# Show usage/cost information
|
|
40
|
+
print("\n--- Usage Information ---")
|
|
41
|
+
if hasattr(response, 'usage_metadata'):
|
|
42
|
+
usage = response.usage_metadata
|
|
43
|
+
print(f"Prompt tokens: {usage.prompt_token_count}")
|
|
44
|
+
print(f"Response tokens: {usage.candidates_token_count}")
|
|
45
|
+
print(f"Total tokens: {usage.total_token_count}")
|
|
46
|
+
|
|
47
|
+
# Gemini 2.5 Flash pricing (as of 2025):
|
|
48
|
+
# Input: $0.15 per 1M tokens (text/image), $0.075 per 1M tokens for video
|
|
49
|
+
# Output: $0.60 per 1M tokens, $3.50 for thinking tokens
|
|
50
|
+
input_cost = (usage.prompt_token_count / 1_000_000) * 0.15
|
|
51
|
+
output_cost = (usage.candidates_token_count / 1_000_000) * 0.60
|
|
52
|
+
total_cost = input_cost + output_cost
|
|
53
|
+
print(f"\nEstimated cost:")
|
|
54
|
+
print(f" Input: ${input_cost:.6f}")
|
|
55
|
+
print(f" Output: ${output_cost:.6f}")
|
|
56
|
+
print(f" Total: ${total_cost:.6f}")
|
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
"""Command-line interface for Merleau video analysis."""
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import os
|
|
5
|
+
import sys
|
|
6
|
+
import time
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from typing import Callable, Optional
|
|
9
|
+
|
|
10
|
+
from dotenv import load_dotenv
|
|
11
|
+
from google import genai
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
class AnalysisResult:
|
|
16
|
+
"""Result from video analysis."""
|
|
17
|
+
text: str
|
|
18
|
+
prompt_tokens: int
|
|
19
|
+
response_tokens: int
|
|
20
|
+
total_tokens: int
|
|
21
|
+
input_cost: float
|
|
22
|
+
output_cost: float
|
|
23
|
+
total_cost: float
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def wait_for_processing(client, file, on_progress: Optional[Callable] = None):
|
|
27
|
+
"""Wait for file to finish processing."""
|
|
28
|
+
while file.state.name == "PROCESSING":
|
|
29
|
+
if on_progress:
|
|
30
|
+
on_progress()
|
|
31
|
+
else:
|
|
32
|
+
print(".", end="", flush=True)
|
|
33
|
+
time.sleep(2)
|
|
34
|
+
file = client.files.get(name=file.name)
|
|
35
|
+
if not on_progress:
|
|
36
|
+
print()
|
|
37
|
+
return file
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def calculate_cost(usage):
|
|
41
|
+
"""Calculate cost from usage metadata."""
|
|
42
|
+
# Gemini 2.5 Flash pricing (as of 2025):
|
|
43
|
+
# Input: $0.15 per 1M tokens (text/image), $0.075 per 1M tokens for video
|
|
44
|
+
# Output: $0.60 per 1M tokens, $3.50 for thinking tokens
|
|
45
|
+
input_cost = (usage.prompt_token_count / 1_000_000) * 0.15
|
|
46
|
+
output_cost = (usage.candidates_token_count / 1_000_000) * 0.60
|
|
47
|
+
return input_cost, output_cost, input_cost + output_cost
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def print_usage(usage):
|
|
51
|
+
"""Print token usage and cost estimation."""
|
|
52
|
+
print("\n--- Usage Information ---")
|
|
53
|
+
print(f"Prompt tokens: {usage.prompt_token_count}")
|
|
54
|
+
print(f"Response tokens: {usage.candidates_token_count}")
|
|
55
|
+
print(f"Total tokens: {usage.total_token_count}")
|
|
56
|
+
|
|
57
|
+
input_cost, output_cost, total_cost = calculate_cost(usage)
|
|
58
|
+
print(f"\nEstimated cost:")
|
|
59
|
+
print(f" Input: ${input_cost:.6f}")
|
|
60
|
+
print(f" Output: ${output_cost:.6f}")
|
|
61
|
+
print(f" Total: ${total_cost:.6f}")
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def analyze_video(
|
|
65
|
+
video_path: str,
|
|
66
|
+
prompt: str = "Explain what happens in this video",
|
|
67
|
+
model: str = "gemini-2.5-flash",
|
|
68
|
+
api_key: Optional[str] = None,
|
|
69
|
+
on_upload: Optional[Callable[[str], None]] = None,
|
|
70
|
+
on_processing: Optional[Callable] = None,
|
|
71
|
+
on_analyzing: Optional[Callable] = None,
|
|
72
|
+
) -> AnalysisResult:
|
|
73
|
+
"""
|
|
74
|
+
Analyze a video file using Gemini.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
video_path: Path to the video file
|
|
78
|
+
prompt: Analysis prompt
|
|
79
|
+
model: Gemini model to use
|
|
80
|
+
api_key: Optional API key (falls back to env var)
|
|
81
|
+
on_upload: Callback when upload completes (receives file URI)
|
|
82
|
+
on_processing: Callback during processing (called repeatedly)
|
|
83
|
+
on_analyzing: Callback when analysis starts
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
AnalysisResult with text, tokens, and cost
|
|
87
|
+
|
|
88
|
+
Raises:
|
|
89
|
+
ValueError: If API key not found or file doesn't exist
|
|
90
|
+
RuntimeError: If file processing fails
|
|
91
|
+
"""
|
|
92
|
+
load_dotenv()
|
|
93
|
+
|
|
94
|
+
api_key = api_key or os.getenv("GEMINI_API_KEY")
|
|
95
|
+
if not api_key:
|
|
96
|
+
raise ValueError("GEMINI_API_KEY not found in environment or .env file")
|
|
97
|
+
|
|
98
|
+
if not os.path.exists(video_path):
|
|
99
|
+
raise ValueError(f"Video file not found: {video_path}")
|
|
100
|
+
|
|
101
|
+
client = genai.Client(api_key=api_key)
|
|
102
|
+
|
|
103
|
+
# Upload video
|
|
104
|
+
myfile = client.files.upload(file=video_path)
|
|
105
|
+
if on_upload:
|
|
106
|
+
on_upload(myfile.uri)
|
|
107
|
+
|
|
108
|
+
# Wait for processing
|
|
109
|
+
myfile = wait_for_processing(client, myfile, on_progress=on_processing)
|
|
110
|
+
|
|
111
|
+
if myfile.state.name == "FAILED":
|
|
112
|
+
raise RuntimeError("File processing failed")
|
|
113
|
+
|
|
114
|
+
# Generate analysis
|
|
115
|
+
if on_analyzing:
|
|
116
|
+
on_analyzing()
|
|
117
|
+
|
|
118
|
+
response = client.models.generate_content(
|
|
119
|
+
model=model,
|
|
120
|
+
contents=[myfile, prompt]
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
# Extract usage info
|
|
124
|
+
usage = response.usage_metadata
|
|
125
|
+
input_cost, output_cost, total_cost = calculate_cost(usage)
|
|
126
|
+
|
|
127
|
+
return AnalysisResult(
|
|
128
|
+
text=response.text,
|
|
129
|
+
prompt_tokens=usage.prompt_token_count,
|
|
130
|
+
response_tokens=usage.candidates_token_count,
|
|
131
|
+
total_tokens=usage.total_token_count,
|
|
132
|
+
input_cost=input_cost,
|
|
133
|
+
output_cost=output_cost,
|
|
134
|
+
total_cost=total_cost,
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def analyze(video_path, prompt, model, show_cost):
|
|
139
|
+
"""Analyze a video file using Gemini (CLI wrapper)."""
|
|
140
|
+
try:
|
|
141
|
+
print(f"Uploading video: {video_path}")
|
|
142
|
+
|
|
143
|
+
def on_upload(uri):
|
|
144
|
+
print(f"Upload complete. File URI: {uri}")
|
|
145
|
+
print("Waiting for file to be processed...", end="")
|
|
146
|
+
|
|
147
|
+
def on_processing():
|
|
148
|
+
print(".", end="", flush=True)
|
|
149
|
+
|
|
150
|
+
def on_analyzing():
|
|
151
|
+
print()
|
|
152
|
+
print(f"\nAnalyzing video with {model}...")
|
|
153
|
+
|
|
154
|
+
result = analyze_video(
|
|
155
|
+
video_path=video_path,
|
|
156
|
+
prompt=prompt,
|
|
157
|
+
model=model,
|
|
158
|
+
on_upload=on_upload,
|
|
159
|
+
on_processing=on_processing,
|
|
160
|
+
on_analyzing=on_analyzing,
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
print("\n--- Video Analysis ---")
|
|
164
|
+
print(result.text)
|
|
165
|
+
|
|
166
|
+
if show_cost:
|
|
167
|
+
print("\n--- Usage Information ---")
|
|
168
|
+
print(f"Prompt tokens: {result.prompt_tokens}")
|
|
169
|
+
print(f"Response tokens: {result.response_tokens}")
|
|
170
|
+
print(f"Total tokens: {result.total_tokens}")
|
|
171
|
+
print(f"\nEstimated cost:")
|
|
172
|
+
print(f" Input: ${result.input_cost:.6f}")
|
|
173
|
+
print(f" Output: ${result.output_cost:.6f}")
|
|
174
|
+
print(f" Total: ${result.total_cost:.6f}")
|
|
175
|
+
|
|
176
|
+
except ValueError as e:
|
|
177
|
+
print(f"Error: {e}", file=sys.stderr)
|
|
178
|
+
sys.exit(1)
|
|
179
|
+
except RuntimeError as e:
|
|
180
|
+
print(f"Error: {e}", file=sys.stderr)
|
|
181
|
+
sys.exit(1)
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def main():
|
|
185
|
+
"""Main entry point for the CLI."""
|
|
186
|
+
parser = argparse.ArgumentParser(
|
|
187
|
+
prog="ponty",
|
|
188
|
+
description="Analyze videos using Google's Gemini API"
|
|
189
|
+
)
|
|
190
|
+
parser.add_argument(
|
|
191
|
+
"video",
|
|
192
|
+
help="Path to the video file to analyze"
|
|
193
|
+
)
|
|
194
|
+
parser.add_argument(
|
|
195
|
+
"-p", "--prompt",
|
|
196
|
+
default="Explain what happens in this video",
|
|
197
|
+
help="Prompt for the analysis (default: 'Explain what happens in this video')"
|
|
198
|
+
)
|
|
199
|
+
parser.add_argument(
|
|
200
|
+
"-m", "--model",
|
|
201
|
+
default="gemini-2.5-flash",
|
|
202
|
+
help="Gemini model to use (default: gemini-2.5-flash)"
|
|
203
|
+
)
|
|
204
|
+
parser.add_argument(
|
|
205
|
+
"--no-cost",
|
|
206
|
+
action="store_true",
|
|
207
|
+
help="Hide usage and cost information"
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
args = parser.parse_args()
|
|
211
|
+
analyze(args.video, args.prompt, args.model, show_cost=not args.no_cost)
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
if __name__ == "__main__":
|
|
215
|
+
main()
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "merleau"
|
|
3
|
+
version = "0.3.0"
|
|
4
|
+
description = "Video analysis using Google's Gemini 2.5 Flash API"
|
|
5
|
+
requires-python = ">=3.10"
|
|
6
|
+
dependencies = [
|
|
7
|
+
"google-genai",
|
|
8
|
+
"python-dotenv",
|
|
9
|
+
]
|
|
10
|
+
|
|
11
|
+
[project.optional-dependencies]
|
|
12
|
+
web = ["streamlit>=1.30.0"]
|
|
13
|
+
|
|
14
|
+
[project.scripts]
|
|
15
|
+
ponty = "merleau.cli:main"
|
|
16
|
+
|
|
17
|
+
[build-system]
|
|
18
|
+
requires = ["hatchling"]
|
|
19
|
+
build-backend = "hatchling.build"
|