merleau 0.1.1__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,65 @@
1
+ # This workflow will upload a Python Package to PyPI when a release is created
2
+ # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries
3
+
4
+ # This workflow uses actions that are not certified by GitHub.
5
+ # They are provided by a third-party and are governed by
6
+ # separate terms of service, privacy policy, and support
7
+ # documentation.
8
+
9
+ name: Upload Python Package
10
+
11
+ on:
12
+ release:
13
+ types: [published]
14
+
15
+ permissions:
16
+ contents: read
17
+
18
+ jobs:
19
+ release-build:
20
+ runs-on: ubuntu-latest
21
+
22
+ steps:
23
+ - uses: actions/checkout@v4
24
+
25
+ - uses: actions/setup-python@v5
26
+ with:
27
+ python-version: "3.x"
28
+
29
+ - name: Build release distributions
30
+ run: |
31
+ # NOTE: put your own distribution build steps here.
32
+ python -m pip install build
33
+ python -m build
34
+
35
+ - name: Upload distributions
36
+ uses: actions/upload-artifact@v4
37
+ with:
38
+ name: release-dists
39
+ path: dist/
40
+
41
+ pypi-publish:
42
+ runs-on: ubuntu-latest
43
+ needs:
44
+ - release-build
45
+ permissions:
46
+ # IMPORTANT: this permission is mandatory for trusted publishing
47
+ id-token: write
48
+
49
+ # Dedicated environments with protections for publishing are strongly recommended.
50
+ # For more information, see: https://docs.github.com/en/actions/deployment/targeting-different-environments/using-environments-for-deployment#deployment-protection-rules
51
+ environment:
52
+ name: pypi
53
+ url: https://pypi.org/p/merleau
54
+
55
+ steps:
56
+ - name: Retrieve release distributions
57
+ uses: actions/download-artifact@v4
58
+ with:
59
+ name: release-dists
60
+ path: dist/
61
+
62
+ - name: Publish release distributions to PyPI
63
+ uses: pypa/gh-action-pypi-publish@release/v1
64
+ with:
65
+ packages-dir: dist/
@@ -0,0 +1,7 @@
1
+ .env
2
+ __pycache__/
3
+ *.pyc
4
+ .venv/
5
+ dist/
6
+ build/
7
+ *.egg-info/
@@ -0,0 +1,64 @@
1
+ # CLAUDE.md
2
+
3
+ This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
4
+
5
+ ## Project Overview
6
+
7
+ Merleau is a CLI tool for video understanding using Google's Gemini API. Named after Maurice Merleau-Ponty, the phenomenologist philosopher. The CLI command is `ponty`.
8
+
9
+ See `research/positioning_merleau.md` for market positioning and differentiation strategy.
10
+
11
+ ## Commands
12
+
13
+ ```bash
14
+ # Install dependencies
15
+ uv sync
16
+ uv sync --extra web # Include Streamlit
17
+
18
+ # Run the CLI
19
+ uv run ponty video.mp4
20
+ uv run ponty video.mp4 -p "Custom prompt" -m gemini-2.0-flash
21
+
22
+ # Run the web UI
23
+ uv run streamlit run streamlit_app.py
24
+
25
+ # Build package
26
+ uv build
27
+
28
+ # Publish to PyPI
29
+ uv publish --token <token>
30
+ ```
31
+
32
+ ## Architecture
33
+
34
+ ```
35
+ merleau/
36
+ ├── merleau/
37
+ │ ├── __init__.py # Package version
38
+ │ └── cli.py # CLI + core analyze_video() function
39
+ ├── streamlit_app.py # Web UI (run with: streamlit run streamlit_app.py)
40
+ ├── website/ # Landing page (GitHub Pages)
41
+ │ └── index.html # Single-page site
42
+ ├── research/ # Market research and positioning
43
+ ├── pyproject.toml # Package config with [project.scripts] entry point
44
+ └── analyze_video.py # Legacy standalone script
45
+ ```
46
+
47
+ ### CLI Flow (merleau/cli.py)
48
+ 1. Parse arguments (video path, prompt, model, cost flag)
49
+ 2. Load API key from environment or `.env`
50
+ 3. Upload video to Gemini Files API
51
+ 4. Poll for processing completion
52
+ 5. Generate content analysis
53
+ 6. Display results and optional cost breakdown
54
+
55
+ ## Key Differentiators
56
+
57
+ - **Native Gemini video** - Only CLI with true video understanding (not frame extraction)
58
+ - **YouTube URL support** - Direct analysis via Gemini's preview feature
59
+ - **Cost transparency** - Token usage and pricing shown by default
60
+
61
+ ## Configuration
62
+
63
+ - `.env` - Contains `GEMINI_API_KEY` (required)
64
+ - `pyproject.toml` - Package metadata, dependencies, and CLI entry point
@@ -1,7 +1,9 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: merleau
3
- Version: 0.1.1
3
+ Version: 0.3.0
4
4
  Summary: Video analysis using Google's Gemini 2.5 Flash API
5
5
  Requires-Python: >=3.10
6
6
  Requires-Dist: google-genai
7
7
  Requires-Dist: python-dotenv
8
+ Provides-Extra: web
9
+ Requires-Dist: streamlit>=1.30.0; extra == 'web'
@@ -0,0 +1,127 @@
1
+ # Merleau
2
+
3
+ > *"The world is not what I think, but what I live through."*
4
+ > — Maurice Merleau-Ponty
5
+
6
+ A CLI tool for video understanding using Google's Gemini API. Named after [Maurice Merleau-Ponty](https://en.wikipedia.org/wiki/Maurice_Merleau-Ponty), the phenomenologist philosopher whose work on perception inspires how this tool helps you perceive your videos.
7
+
8
+ **[Website](https://yanndebray.github.io/merleau/)** · **[PyPI](https://pypi.org/project/merleau/)** · **[GitHub](https://github.com/yanndebray/merleau)**
9
+
10
+ https://github.com/user-attachments/assets/e2c5b476-ddab-49ab-a35c-9ae5e880c25c
11
+
12
+ ## Why Merleau?
13
+
14
+ Google Gemini is the **only major AI provider** with native video understanding—Claude doesn't support video, and GPT-4o requires frame extraction workarounds. Merleau is the first CLI that actually understands video rather than analyzing frames.
15
+
16
+ ## Features
17
+
18
+ - **Native Gemini video processing** - Upload and analyze videos directly
19
+ - **YouTube URL support** - Analyze videos directly from YouTube (free preview)
20
+ - **Customizable prompts** - Ask any question about your video
21
+ - **Cost estimation** - Token usage tracking and cost breakdown
22
+ - **Multiple models** - Support for different Gemini models
23
+ - **Web UI** - Streamlit app for browser-based analysis
24
+
25
+ ## Installation
26
+
27
+ Using [uv](https://docs.astral.sh/uv/) (recommended):
28
+ ```bash
29
+ uv sync
30
+ ```
31
+
32
+ Or install from PyPI:
33
+ ```bash
34
+ pip install merleau
35
+ ```
36
+
37
+ ## Configuration
38
+
39
+ 1. Get a Gemini API key from [Google AI Studio](https://aistudio.google.com/apikey)
40
+ 2. Set the API key as an environment variable or create a `.env` file:
41
+ ```
42
+ GEMINI_API_KEY=your_api_key_here
43
+ ```
44
+
45
+ ## Usage
46
+
47
+ ```bash
48
+ # Basic video analysis
49
+ ponty video.mp4
50
+
51
+ # Custom prompt
52
+ ponty video.mp4 -p "Summarize the key points in this video"
53
+
54
+ # Use a different model
55
+ ponty video.mp4 -m gemini-2.0-flash
56
+
57
+ # Hide cost information
58
+ ponty video.mp4 --no-cost
59
+ ```
60
+
61
+ ### Web UI
62
+
63
+ ```bash
64
+ # Install with web dependencies
65
+ pip install merleau[web]
66
+
67
+ # Run the Streamlit app
68
+ streamlit run streamlit_app.py
69
+ ```
70
+
71
+ ### Options
72
+
73
+ | Option | Description |
74
+ |--------|-------------|
75
+ | `-p, --prompt` | Prompt for the analysis (default: "Explain what happens in this video") |
76
+ | `-m, --model` | Gemini model to use (default: gemini-2.5-flash) |
77
+ | `--no-cost` | Hide usage and cost information |
78
+
79
+ ## Reducing Costs with Compression
80
+
81
+ Compressing videos before analysis can reduce API costs by ~10-15% without degrading analysis quality. Gemini's token count is affected by video resolution and bitrate.
82
+
83
+ ### Quick Compression with ffmpeg
84
+
85
+ ```bash
86
+ # Basic compression (recommended)
87
+ ffmpeg -i input.mp4 -vcodec libx264 -crf 28 -preset medium -vf "scale=1280:-2" output.mp4
88
+
89
+ # Aggressive compression (smaller file, lower quality)
90
+ ffmpeg -i input.mp4 -vcodec libx264 -crf 32 -preset medium -vf "scale=640:-2" output.mp4
91
+
92
+ # Keep audio (for speech analysis)
93
+ ffmpeg -i input.mp4 -vcodec libx264 -crf 28 -preset medium -vf "scale=1280:-2" -acodec aac -b:a 128k output.mp4
94
+ ```
95
+
96
+ ### Compression Options Explained
97
+
98
+ | Option | Description |
99
+ |--------|-------------|
100
+ | `-crf 28` | Quality level (18-28 recommended, higher = smaller file) |
101
+ | `-preset medium` | Encoding speed/quality tradeoff |
102
+ | `-vf "scale=1280:-2"` | Resize to 1280px width, maintain aspect ratio |
103
+ | `-an` | Remove audio (if not needed) |
104
+ | `-acodec aac -b:a 128k` | Compress audio to 128kbps AAC |
105
+
106
+ ### Cost Comparison Example
107
+
108
+ | Version | File Size | Prompt Tokens | Input Cost |
109
+ |---------|-----------|---------------|------------|
110
+ | Original (1080p) | 52 MB | 14,757 | $0.00221 |
111
+ | Compressed (720p) | 2.6 MB | 13,157 | $0.00197 |
112
+ | **Savings** | **95%** | **10.8%** | **10.8%** |
113
+
114
+ ## Output
115
+
116
+ The CLI provides:
117
+ - Video content analysis from Gemini
118
+ - Token usage breakdown (prompt, response, total)
119
+ - Estimated cost based on Gemini pricing
120
+
121
+ ## Pricing Reference
122
+
123
+ Gemini 2.5 Flash (as of 2025):
124
+ - Input: $0.15 per 1M tokens (text/image), $0.075 per 1M tokens (video)
125
+ - Output: $0.60 per 1M tokens, $3.50 for thinking tokens
126
+
127
+ A 1-hour video costs approximately **$0.11-0.32** to analyze.
@@ -0,0 +1,56 @@
1
+ import os
2
+ import time
3
+ from dotenv import load_dotenv
4
+ from google import genai
5
+
6
+ # Load environment variables from .env file
7
+ load_dotenv()
8
+
9
+ # Initialize the client with the API key
10
+ client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
11
+
12
+ # Upload the video file
13
+ video_path = "MATLAB_Modernizer.mp4"
14
+ print(f"Uploading video: {video_path}")
15
+ myfile = client.files.upload(file=video_path)
16
+ print(f"Upload complete. File URI: {myfile.uri}")
17
+
18
+ # Wait for the file to be processed (become ACTIVE)
19
+ print("Waiting for file to be processed...")
20
+ while myfile.state.name == "PROCESSING":
21
+ print(".", end="", flush=True)
22
+ time.sleep(2)
23
+ myfile = client.files.get(name=myfile.name)
24
+
25
+ if myfile.state.name == "FAILED":
26
+ raise ValueError(f"File processing failed: {myfile.state.name}")
27
+
28
+ print(f"\nFile state: {myfile.state.name}")
29
+
30
+ # Generate content using Gemini 2.5 Flash
31
+ print("\nAnalyzing video with Gemini 2.5 Flash...")
32
+ response = client.models.generate_content(
33
+ model="gemini-2.5-flash",
34
+ contents=[myfile, "Explain what happens in this video"]
35
+ )
36
+ print("\n--- Video Analysis ---")
37
+ print(response.text)
38
+
39
+ # Show usage/cost information
40
+ print("\n--- Usage Information ---")
41
+ if hasattr(response, 'usage_metadata'):
42
+ usage = response.usage_metadata
43
+ print(f"Prompt tokens: {usage.prompt_token_count}")
44
+ print(f"Response tokens: {usage.candidates_token_count}")
45
+ print(f"Total tokens: {usage.total_token_count}")
46
+
47
+ # Gemini 2.5 Flash pricing (as of 2025):
48
+ # Input: $0.15 per 1M tokens (text/image), $0.075 per 1M tokens for video
49
+ # Output: $0.60 per 1M tokens, $3.50 for thinking tokens
50
+ input_cost = (usage.prompt_token_count / 1_000_000) * 0.15
51
+ output_cost = (usage.candidates_token_count / 1_000_000) * 0.60
52
+ total_cost = input_cost + output_cost
53
+ print(f"\nEstimated cost:")
54
+ print(f" Input: ${input_cost:.6f}")
55
+ print(f" Output: ${output_cost:.6f}")
56
+ print(f" Total: ${total_cost:.6f}")
@@ -0,0 +1,3 @@
1
+ """Merleau - Video analysis using Google's Gemini API."""
2
+
3
+ __version__ = "0.3.0"
@@ -0,0 +1,215 @@
1
+ """Command-line interface for Merleau video analysis."""
2
+
3
+ import argparse
4
+ import os
5
+ import sys
6
+ import time
7
+ from dataclasses import dataclass
8
+ from typing import Callable, Optional
9
+
10
+ from dotenv import load_dotenv
11
+ from google import genai
12
+
13
+
14
+ @dataclass
15
+ class AnalysisResult:
16
+ """Result from video analysis."""
17
+ text: str
18
+ prompt_tokens: int
19
+ response_tokens: int
20
+ total_tokens: int
21
+ input_cost: float
22
+ output_cost: float
23
+ total_cost: float
24
+
25
+
26
+ def wait_for_processing(client, file, on_progress: Optional[Callable] = None):
27
+ """Wait for file to finish processing."""
28
+ while file.state.name == "PROCESSING":
29
+ if on_progress:
30
+ on_progress()
31
+ else:
32
+ print(".", end="", flush=True)
33
+ time.sleep(2)
34
+ file = client.files.get(name=file.name)
35
+ if not on_progress:
36
+ print()
37
+ return file
38
+
39
+
40
+ def calculate_cost(usage):
41
+ """Calculate cost from usage metadata."""
42
+ # Gemini 2.5 Flash pricing (as of 2025):
43
+ # Input: $0.15 per 1M tokens (text/image), $0.075 per 1M tokens for video
44
+ # Output: $0.60 per 1M tokens, $3.50 for thinking tokens
45
+ input_cost = (usage.prompt_token_count / 1_000_000) * 0.15
46
+ output_cost = (usage.candidates_token_count / 1_000_000) * 0.60
47
+ return input_cost, output_cost, input_cost + output_cost
48
+
49
+
50
+ def print_usage(usage):
51
+ """Print token usage and cost estimation."""
52
+ print("\n--- Usage Information ---")
53
+ print(f"Prompt tokens: {usage.prompt_token_count}")
54
+ print(f"Response tokens: {usage.candidates_token_count}")
55
+ print(f"Total tokens: {usage.total_token_count}")
56
+
57
+ input_cost, output_cost, total_cost = calculate_cost(usage)
58
+ print(f"\nEstimated cost:")
59
+ print(f" Input: ${input_cost:.6f}")
60
+ print(f" Output: ${output_cost:.6f}")
61
+ print(f" Total: ${total_cost:.6f}")
62
+
63
+
64
+ def analyze_video(
65
+ video_path: str,
66
+ prompt: str = "Explain what happens in this video",
67
+ model: str = "gemini-2.5-flash",
68
+ api_key: Optional[str] = None,
69
+ on_upload: Optional[Callable[[str], None]] = None,
70
+ on_processing: Optional[Callable] = None,
71
+ on_analyzing: Optional[Callable] = None,
72
+ ) -> AnalysisResult:
73
+ """
74
+ Analyze a video file using Gemini.
75
+
76
+ Args:
77
+ video_path: Path to the video file
78
+ prompt: Analysis prompt
79
+ model: Gemini model to use
80
+ api_key: Optional API key (falls back to env var)
81
+ on_upload: Callback when upload completes (receives file URI)
82
+ on_processing: Callback during processing (called repeatedly)
83
+ on_analyzing: Callback when analysis starts
84
+
85
+ Returns:
86
+ AnalysisResult with text, tokens, and cost
87
+
88
+ Raises:
89
+ ValueError: If API key not found or file doesn't exist
90
+ RuntimeError: If file processing fails
91
+ """
92
+ load_dotenv()
93
+
94
+ api_key = api_key or os.getenv("GEMINI_API_KEY")
95
+ if not api_key:
96
+ raise ValueError("GEMINI_API_KEY not found in environment or .env file")
97
+
98
+ if not os.path.exists(video_path):
99
+ raise ValueError(f"Video file not found: {video_path}")
100
+
101
+ client = genai.Client(api_key=api_key)
102
+
103
+ # Upload video
104
+ myfile = client.files.upload(file=video_path)
105
+ if on_upload:
106
+ on_upload(myfile.uri)
107
+
108
+ # Wait for processing
109
+ myfile = wait_for_processing(client, myfile, on_progress=on_processing)
110
+
111
+ if myfile.state.name == "FAILED":
112
+ raise RuntimeError("File processing failed")
113
+
114
+ # Generate analysis
115
+ if on_analyzing:
116
+ on_analyzing()
117
+
118
+ response = client.models.generate_content(
119
+ model=model,
120
+ contents=[myfile, prompt]
121
+ )
122
+
123
+ # Extract usage info
124
+ usage = response.usage_metadata
125
+ input_cost, output_cost, total_cost = calculate_cost(usage)
126
+
127
+ return AnalysisResult(
128
+ text=response.text,
129
+ prompt_tokens=usage.prompt_token_count,
130
+ response_tokens=usage.candidates_token_count,
131
+ total_tokens=usage.total_token_count,
132
+ input_cost=input_cost,
133
+ output_cost=output_cost,
134
+ total_cost=total_cost,
135
+ )
136
+
137
+
138
+ def analyze(video_path, prompt, model, show_cost):
139
+ """Analyze a video file using Gemini (CLI wrapper)."""
140
+ try:
141
+ print(f"Uploading video: {video_path}")
142
+
143
+ def on_upload(uri):
144
+ print(f"Upload complete. File URI: {uri}")
145
+ print("Waiting for file to be processed...", end="")
146
+
147
+ def on_processing():
148
+ print(".", end="", flush=True)
149
+
150
+ def on_analyzing():
151
+ print()
152
+ print(f"\nAnalyzing video with {model}...")
153
+
154
+ result = analyze_video(
155
+ video_path=video_path,
156
+ prompt=prompt,
157
+ model=model,
158
+ on_upload=on_upload,
159
+ on_processing=on_processing,
160
+ on_analyzing=on_analyzing,
161
+ )
162
+
163
+ print("\n--- Video Analysis ---")
164
+ print(result.text)
165
+
166
+ if show_cost:
167
+ print("\n--- Usage Information ---")
168
+ print(f"Prompt tokens: {result.prompt_tokens}")
169
+ print(f"Response tokens: {result.response_tokens}")
170
+ print(f"Total tokens: {result.total_tokens}")
171
+ print(f"\nEstimated cost:")
172
+ print(f" Input: ${result.input_cost:.6f}")
173
+ print(f" Output: ${result.output_cost:.6f}")
174
+ print(f" Total: ${result.total_cost:.6f}")
175
+
176
+ except ValueError as e:
177
+ print(f"Error: {e}", file=sys.stderr)
178
+ sys.exit(1)
179
+ except RuntimeError as e:
180
+ print(f"Error: {e}", file=sys.stderr)
181
+ sys.exit(1)
182
+
183
+
184
+ def main():
185
+ """Main entry point for the CLI."""
186
+ parser = argparse.ArgumentParser(
187
+ prog="ponty",
188
+ description="Analyze videos using Google's Gemini API"
189
+ )
190
+ parser.add_argument(
191
+ "video",
192
+ help="Path to the video file to analyze"
193
+ )
194
+ parser.add_argument(
195
+ "-p", "--prompt",
196
+ default="Explain what happens in this video",
197
+ help="Prompt for the analysis (default: 'Explain what happens in this video')"
198
+ )
199
+ parser.add_argument(
200
+ "-m", "--model",
201
+ default="gemini-2.5-flash",
202
+ help="Gemini model to use (default: gemini-2.5-flash)"
203
+ )
204
+ parser.add_argument(
205
+ "--no-cost",
206
+ action="store_true",
207
+ help="Hide usage and cost information"
208
+ )
209
+
210
+ args = parser.parse_args()
211
+ analyze(args.video, args.prompt, args.model, show_cost=not args.no_cost)
212
+
213
+
214
+ if __name__ == "__main__":
215
+ main()
@@ -0,0 +1,19 @@
1
+ [project]
2
+ name = "merleau"
3
+ version = "0.3.0"
4
+ description = "Video analysis using Google's Gemini 2.5 Flash API"
5
+ requires-python = ">=3.10"
6
+ dependencies = [
7
+ "google-genai",
8
+ "python-dotenv",
9
+ ]
10
+
11
+ [project.optional-dependencies]
12
+ web = ["streamlit>=1.30.0"]
13
+
14
+ [project.scripts]
15
+ ponty = "merleau.cli:main"
16
+
17
+ [build-system]
18
+ requires = ["hatchling"]
19
+ build-backend = "hatchling.build"