merleau 0.2.0__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,11 +13,15 @@ See `research/positioning_merleau.md` for market positioning and differentiation
13
13
  ```bash
14
14
  # Install dependencies
15
15
  uv sync
16
+ uv sync --extra web # Include Streamlit
16
17
 
17
18
  # Run the CLI
18
19
  uv run ponty video.mp4
19
20
  uv run ponty video.mp4 -p "Custom prompt" -m gemini-2.0-flash
20
21
 
22
+ # Run the web UI
23
+ uv run streamlit run streamlit_app.py
24
+
21
25
  # Build package
22
26
  uv build
23
27
 
@@ -31,7 +35,10 @@ uv publish --token <token>
31
35
  merleau/
32
36
  ├── merleau/
33
37
  │ ├── __init__.py # Package version
34
- │ └── cli.py # CLI entry point (ponty command)
38
+ │ └── cli.py # CLI + core analyze_video() function
39
+ ├── streamlit_app.py # Web UI (run with: streamlit run streamlit_app.py)
40
+ ├── website/ # Landing page (GitHub Pages)
41
+ │ └── index.html # Single-page site
35
42
  ├── research/ # Market research and positioning
36
43
  ├── pyproject.toml # Package config with [project.scripts] entry point
37
44
  └── analyze_video.py # Legacy standalone script
@@ -1,7 +1,9 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: merleau
3
- Version: 0.2.0
3
+ Version: 0.3.0
4
4
  Summary: Video analysis using Google's Gemini 2.5 Flash API
5
5
  Requires-Python: >=3.10
6
6
  Requires-Dist: google-genai
7
7
  Requires-Dist: python-dotenv
8
+ Provides-Extra: web
9
+ Requires-Dist: streamlit>=1.30.0; extra == 'web'
@@ -5,6 +5,10 @@
5
5
 
6
6
  A CLI tool for video understanding using Google's Gemini API. Named after [Maurice Merleau-Ponty](https://en.wikipedia.org/wiki/Maurice_Merleau-Ponty), the phenomenologist philosopher whose work on perception inspires how this tool helps you perceive your videos.
7
7
 
8
+ **[Website](https://yanndebray.github.io/merleau/)** · **[PyPI](https://pypi.org/project/merleau/)** · **[GitHub](https://github.com/yanndebray/merleau)**
9
+
10
+ https://github.com/user-attachments/assets/e2c5b476-ddab-49ab-a35c-9ae5e880c25c
11
+
8
12
  ## Why Merleau?
9
13
 
10
14
  Google Gemini is the **only major AI provider** with native video understanding—Claude doesn't support video, and GPT-4o requires frame extraction workarounds. Merleau is the first CLI that actually understands video rather than analyzing frames.
@@ -16,6 +20,7 @@ Google Gemini is the **only major AI provider** with native video understanding
16
20
  - **Customizable prompts** - Ask any question about your video
17
21
  - **Cost estimation** - Token usage tracking and cost breakdown
18
22
  - **Multiple models** - Support for different Gemini models
23
+ - **Web UI** - Streamlit app for browser-based analysis
19
24
 
20
25
  ## Installation
21
26
 
@@ -53,6 +58,16 @@ ponty video.mp4 -m gemini-2.0-flash
53
58
  ponty video.mp4 --no-cost
54
59
  ```
55
60
 
61
+ ### Web UI
62
+
63
+ ```bash
64
+ # Install with web dependencies
65
+ pip install merleau[web]
66
+
67
+ # Run the Streamlit app
68
+ streamlit run streamlit_app.py
69
+ ```
70
+
56
71
  ### Options
57
72
 
58
73
  | Option | Description |
@@ -61,6 +76,41 @@ ponty video.mp4 --no-cost
61
76
  | `-m, --model` | Gemini model to use (default: gemini-2.5-flash) |
62
77
  | `--no-cost` | Hide usage and cost information |
63
78
 
79
+ ## Reducing Costs with Compression
80
+
81
+ Compressing videos before analysis can reduce API costs by ~10-15% without degrading analysis quality. Gemini's token count is affected by video resolution and bitrate.
82
+
83
+ ### Quick Compression with ffmpeg
84
+
85
+ ```bash
86
+ # Basic compression (recommended)
87
+ ffmpeg -i input.mp4 -vcodec libx264 -crf 28 -preset medium -vf "scale=1280:-2" output.mp4
88
+
89
+ # Aggressive compression (smaller file, lower quality)
90
+ ffmpeg -i input.mp4 -vcodec libx264 -crf 32 -preset medium -vf "scale=640:-2" output.mp4
91
+
92
+ # Keep audio (for speech analysis)
93
+ ffmpeg -i input.mp4 -vcodec libx264 -crf 28 -preset medium -vf "scale=1280:-2" -acodec aac -b:a 128k output.mp4
94
+ ```
95
+
96
+ ### Compression Options Explained
97
+
98
+ | Option | Description |
99
+ |--------|-------------|
100
+ | `-crf 28` | Quality level (18-28 recommended, higher = smaller file) |
101
+ | `-preset medium` | Encoding speed/quality tradeoff |
102
+ | `-vf "scale=1280:-2"` | Resize to 1280px width, maintain aspect ratio |
103
+ | `-an` | Remove audio (if not needed) |
104
+ | `-acodec aac -b:a 128k` | Compress audio to 128kbps AAC |
105
+
106
+ ### Cost Comparison Example
107
+
108
+ | Version | File Size | Prompt Tokens | Input Cost |
109
+ |---------|-----------|---------------|------------|
110
+ | Original (1080p) | 52 MB | 14,757 | $0.00221 |
111
+ | Compressed (720p) | 2.6 MB | 13,157 | $0.00197 |
112
+ | **Savings** | **95%** | **10.8%** | **10.8%** |
113
+
64
114
  ## Output
65
115
 
66
116
  The CLI provides:
@@ -1,3 +1,3 @@
1
1
  """Merleau - Video analysis using Google's Gemini API."""
2
2
 
3
- __version__ = "0.2.0"
3
+ __version__ = "0.3.0"
@@ -0,0 +1,215 @@
1
+ """Command-line interface for Merleau video analysis."""
2
+
3
+ import argparse
4
+ import os
5
+ import sys
6
+ import time
7
+ from dataclasses import dataclass
8
+ from typing import Callable, Optional
9
+
10
+ from dotenv import load_dotenv
11
+ from google import genai
12
+
13
+
14
+ @dataclass
15
+ class AnalysisResult:
16
+ """Result from video analysis."""
17
+ text: str
18
+ prompt_tokens: int
19
+ response_tokens: int
20
+ total_tokens: int
21
+ input_cost: float
22
+ output_cost: float
23
+ total_cost: float
24
+
25
+
26
+ def wait_for_processing(client, file, on_progress: Optional[Callable] = None):
27
+ """Wait for file to finish processing."""
28
+ while file.state.name == "PROCESSING":
29
+ if on_progress:
30
+ on_progress()
31
+ else:
32
+ print(".", end="", flush=True)
33
+ time.sleep(2)
34
+ file = client.files.get(name=file.name)
35
+ if not on_progress:
36
+ print()
37
+ return file
38
+
39
+
40
+ def calculate_cost(usage):
41
+ """Calculate cost from usage metadata."""
42
+ # Gemini 2.5 Flash pricing (as of 2025):
43
+ # Input: $0.15 per 1M tokens (text/image), $0.075 per 1M tokens for video
44
+ # Output: $0.60 per 1M tokens, $3.50 for thinking tokens
45
+ input_cost = (usage.prompt_token_count / 1_000_000) * 0.15
46
+ output_cost = (usage.candidates_token_count / 1_000_000) * 0.60
47
+ return input_cost, output_cost, input_cost + output_cost
48
+
49
+
50
+ def print_usage(usage):
51
+ """Print token usage and cost estimation."""
52
+ print("\n--- Usage Information ---")
53
+ print(f"Prompt tokens: {usage.prompt_token_count}")
54
+ print(f"Response tokens: {usage.candidates_token_count}")
55
+ print(f"Total tokens: {usage.total_token_count}")
56
+
57
+ input_cost, output_cost, total_cost = calculate_cost(usage)
58
+ print(f"\nEstimated cost:")
59
+ print(f" Input: ${input_cost:.6f}")
60
+ print(f" Output: ${output_cost:.6f}")
61
+ print(f" Total: ${total_cost:.6f}")
62
+
63
+
64
+ def analyze_video(
65
+ video_path: str,
66
+ prompt: str = "Explain what happens in this video",
67
+ model: str = "gemini-2.5-flash",
68
+ api_key: Optional[str] = None,
69
+ on_upload: Optional[Callable[[str], None]] = None,
70
+ on_processing: Optional[Callable] = None,
71
+ on_analyzing: Optional[Callable] = None,
72
+ ) -> AnalysisResult:
73
+ """
74
+ Analyze a video file using Gemini.
75
+
76
+ Args:
77
+ video_path: Path to the video file
78
+ prompt: Analysis prompt
79
+ model: Gemini model to use
80
+ api_key: Optional API key (falls back to env var)
81
+ on_upload: Callback when upload completes (receives file URI)
82
+ on_processing: Callback during processing (called repeatedly)
83
+ on_analyzing: Callback when analysis starts
84
+
85
+ Returns:
86
+ AnalysisResult with text, tokens, and cost
87
+
88
+ Raises:
89
+ ValueError: If API key not found or file doesn't exist
90
+ RuntimeError: If file processing fails
91
+ """
92
+ load_dotenv()
93
+
94
+ api_key = api_key or os.getenv("GEMINI_API_KEY")
95
+ if not api_key:
96
+ raise ValueError("GEMINI_API_KEY not found in environment or .env file")
97
+
98
+ if not os.path.exists(video_path):
99
+ raise ValueError(f"Video file not found: {video_path}")
100
+
101
+ client = genai.Client(api_key=api_key)
102
+
103
+ # Upload video
104
+ myfile = client.files.upload(file=video_path)
105
+ if on_upload:
106
+ on_upload(myfile.uri)
107
+
108
+ # Wait for processing
109
+ myfile = wait_for_processing(client, myfile, on_progress=on_processing)
110
+
111
+ if myfile.state.name == "FAILED":
112
+ raise RuntimeError("File processing failed")
113
+
114
+ # Generate analysis
115
+ if on_analyzing:
116
+ on_analyzing()
117
+
118
+ response = client.models.generate_content(
119
+ model=model,
120
+ contents=[myfile, prompt]
121
+ )
122
+
123
+ # Extract usage info
124
+ usage = response.usage_metadata
125
+ input_cost, output_cost, total_cost = calculate_cost(usage)
126
+
127
+ return AnalysisResult(
128
+ text=response.text,
129
+ prompt_tokens=usage.prompt_token_count,
130
+ response_tokens=usage.candidates_token_count,
131
+ total_tokens=usage.total_token_count,
132
+ input_cost=input_cost,
133
+ output_cost=output_cost,
134
+ total_cost=total_cost,
135
+ )
136
+
137
+
138
+ def analyze(video_path, prompt, model, show_cost):
139
+ """Analyze a video file using Gemini (CLI wrapper)."""
140
+ try:
141
+ print(f"Uploading video: {video_path}")
142
+
143
+ def on_upload(uri):
144
+ print(f"Upload complete. File URI: {uri}")
145
+ print("Waiting for file to be processed...", end="")
146
+
147
+ def on_processing():
148
+ print(".", end="", flush=True)
149
+
150
+ def on_analyzing():
151
+ print()
152
+ print(f"\nAnalyzing video with {model}...")
153
+
154
+ result = analyze_video(
155
+ video_path=video_path,
156
+ prompt=prompt,
157
+ model=model,
158
+ on_upload=on_upload,
159
+ on_processing=on_processing,
160
+ on_analyzing=on_analyzing,
161
+ )
162
+
163
+ print("\n--- Video Analysis ---")
164
+ print(result.text)
165
+
166
+ if show_cost:
167
+ print("\n--- Usage Information ---")
168
+ print(f"Prompt tokens: {result.prompt_tokens}")
169
+ print(f"Response tokens: {result.response_tokens}")
170
+ print(f"Total tokens: {result.total_tokens}")
171
+ print(f"\nEstimated cost:")
172
+ print(f" Input: ${result.input_cost:.6f}")
173
+ print(f" Output: ${result.output_cost:.6f}")
174
+ print(f" Total: ${result.total_cost:.6f}")
175
+
176
+ except ValueError as e:
177
+ print(f"Error: {e}", file=sys.stderr)
178
+ sys.exit(1)
179
+ except RuntimeError as e:
180
+ print(f"Error: {e}", file=sys.stderr)
181
+ sys.exit(1)
182
+
183
+
184
+ def main():
185
+ """Main entry point for the CLI."""
186
+ parser = argparse.ArgumentParser(
187
+ prog="ponty",
188
+ description="Analyze videos using Google's Gemini API"
189
+ )
190
+ parser.add_argument(
191
+ "video",
192
+ help="Path to the video file to analyze"
193
+ )
194
+ parser.add_argument(
195
+ "-p", "--prompt",
196
+ default="Explain what happens in this video",
197
+ help="Prompt for the analysis (default: 'Explain what happens in this video')"
198
+ )
199
+ parser.add_argument(
200
+ "-m", "--model",
201
+ default="gemini-2.5-flash",
202
+ help="Gemini model to use (default: gemini-2.5-flash)"
203
+ )
204
+ parser.add_argument(
205
+ "--no-cost",
206
+ action="store_true",
207
+ help="Hide usage and cost information"
208
+ )
209
+
210
+ args = parser.parse_args()
211
+ analyze(args.video, args.prompt, args.model, show_cost=not args.no_cost)
212
+
213
+
214
+ if __name__ == "__main__":
215
+ main()
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "merleau"
3
- version = "0.2.0"
3
+ version = "0.3.0"
4
4
  description = "Video analysis using Google's Gemini 2.5 Flash API"
5
5
  requires-python = ">=3.10"
6
6
  dependencies = [
@@ -8,6 +8,9 @@ dependencies = [
8
8
  "python-dotenv",
9
9
  ]
10
10
 
11
+ [project.optional-dependencies]
12
+ web = ["streamlit>=1.30.0"]
13
+
11
14
  [project.scripts]
12
15
  ponty = "merleau.cli:main"
13
16
 
@@ -0,0 +1,178 @@
1
+ """Streamlit web interface for Merleau video analysis."""
2
+
3
+ import os
4
+ import tempfile
5
+
6
+ import streamlit as st
7
+ from dotenv import load_dotenv
8
+
9
+ from merleau.cli import AnalysisResult, analyze_video
10
+
11
+ # Page config
12
+ st.set_page_config(
13
+ page_title="Merleau - Video Understanding",
14
+ page_icon="👁️",
15
+ layout="centered",
16
+ )
17
+
18
+ # Load environment variables
19
+ load_dotenv()
20
+
21
+
22
+ # Header
23
+ st.title("👁️ Merleau")
24
+ st.markdown("*Video understanding from your browser*")
25
+
26
+ # Sidebar for settings
27
+ with st.sidebar:
28
+ st.header("Settings")
29
+
30
+ api_key = st.text_input(
31
+ "Gemini API Key",
32
+ type="password",
33
+ value=os.getenv("GEMINI_API_KEY", ""),
34
+ help="Get your key from [Google AI Studio](https://aistudio.google.com/apikey)"
35
+ )
36
+
37
+ model = st.selectbox(
38
+ "Model",
39
+ ["gemini-2.5-flash", "gemini-2.0-flash", "gemini-1.5-pro"],
40
+ index=0,
41
+ )
42
+
43
+ st.divider()
44
+ st.markdown("""
45
+ **Links**
46
+ - [GitHub](https://github.com/yanndebray/merleau)
47
+ - [PyPI](https://pypi.org/project/merleau/)
48
+ - [Documentation](https://yanndebray.github.io/merleau/)
49
+ """)
50
+
51
+ # Main content
52
+ tab1, tab2 = st.tabs(["📁 Upload Video", "🎬 Record Screen"])
53
+
54
+ with tab1:
55
+ uploaded_file = st.file_uploader(
56
+ "Choose a video file",
57
+ type=["mp4", "mov", "avi", "mkv", "webm"],
58
+ help="Supported formats: MP4, MOV, AVI, MKV, WebM"
59
+ )
60
+
61
+ if uploaded_file:
62
+ st.video(uploaded_file)
63
+
64
+ with tab2:
65
+ st.info("🎥 **Screen Recording** - Use your browser's built-in screen capture, then upload the recording above.")
66
+ st.markdown("""
67
+ **Quick recording options:**
68
+ 1. **Windows**: `Win + G` → Record
69
+ 2. **Mac**: `Cmd + Shift + 5` → Screen Recording
70
+ 3. **Chrome**: Extensions like Loom or Screencastify
71
+ """)
72
+
73
+ # Prompt input
74
+ st.divider()
75
+ prompt = st.text_area(
76
+ "What would you like to know about the video?",
77
+ value="Explain what happens in this video",
78
+ height=100,
79
+ )
80
+
81
+ # Analyze button
82
+ col1, col2 = st.columns([3, 1])
83
+ with col1:
84
+ analyze_btn = st.button("🔍 Analyze Video", type="primary", use_container_width=True)
85
+ with col2:
86
+ show_cost = st.checkbox("Show cost", value=True)
87
+
88
+ # Analysis
89
+ if analyze_btn:
90
+ if not api_key:
91
+ st.error("Please enter your Gemini API key in the sidebar.")
92
+ elif not uploaded_file:
93
+ st.warning("Please upload a video file first.")
94
+ else:
95
+ # Save uploaded file to temp location
96
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as tmp:
97
+ tmp.write(uploaded_file.getvalue())
98
+ tmp_path = tmp.name
99
+
100
+ try:
101
+ # Progress indicators
102
+ progress_text = st.empty()
103
+ progress_bar = st.progress(0)
104
+
105
+ progress_text.text("📤 Uploading video...")
106
+ progress_bar.progress(10)
107
+
108
+ def on_upload(uri):
109
+ progress_text.text("⏳ Processing video...")
110
+ progress_bar.progress(30)
111
+
112
+ processing_dots = [0]
113
+ def on_processing():
114
+ processing_dots[0] += 1
115
+ progress = min(30 + (processing_dots[0] * 5), 70)
116
+ progress_bar.progress(progress)
117
+
118
+ def on_analyzing():
119
+ progress_text.text("🧠 Analyzing with Gemini...")
120
+ progress_bar.progress(80)
121
+
122
+ result: AnalysisResult = analyze_video(
123
+ video_path=tmp_path,
124
+ prompt=prompt,
125
+ model=model,
126
+ api_key=api_key,
127
+ on_upload=on_upload,
128
+ on_processing=on_processing,
129
+ on_analyzing=on_analyzing,
130
+ )
131
+
132
+ progress_bar.progress(100)
133
+ progress_text.empty()
134
+ progress_bar.empty()
135
+
136
+ # Display results
137
+ st.success("Analysis complete!")
138
+
139
+ st.markdown("### 📝 Analysis")
140
+ st.markdown(result.text)
141
+
142
+ if show_cost:
143
+ st.divider()
144
+ col1, col2, col3 = st.columns(3)
145
+ with col1:
146
+ st.metric("Prompt Tokens", f"{result.prompt_tokens:,}")
147
+ with col2:
148
+ st.metric("Response Tokens", f"{result.response_tokens:,}")
149
+ with col3:
150
+ st.metric("Total Cost", f"${result.total_cost:.4f}")
151
+
152
+ # Store in session for history
153
+ if "history" not in st.session_state:
154
+ st.session_state.history = []
155
+ st.session_state.history.append({
156
+ "filename": uploaded_file.name,
157
+ "prompt": prompt,
158
+ "result": result.text,
159
+ "cost": result.total_cost,
160
+ })
161
+
162
+ except Exception as e:
163
+ st.error(f"Error: {e}")
164
+ finally:
165
+ # Cleanup temp file
166
+ if os.path.exists(tmp_path):
167
+ os.unlink(tmp_path)
168
+
169
+ # History section
170
+ if "history" in st.session_state and st.session_state.history:
171
+ st.divider()
172
+ with st.expander(f"📜 Analysis History ({len(st.session_state.history)} items)"):
173
+ for i, item in enumerate(reversed(st.session_state.history)):
174
+ st.markdown(f"**{item['filename']}** - ${item['cost']:.4f}")
175
+ st.caption(f"Prompt: {item['prompt'][:50]}...")
176
+ if st.button(f"Show full analysis", key=f"history_{i}"):
177
+ st.markdown(item['result'])
178
+ st.divider()