openadapt-capture 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. openadapt_capture-0.1.0/.env.example +7 -0
  2. openadapt_capture-0.1.0/.github/workflows/publish.yml +30 -0
  3. openadapt_capture-0.1.0/.github/workflows/test.yml +55 -0
  4. openadapt_capture-0.1.0/.gitignore +53 -0
  5. openadapt_capture-0.1.0/PKG-INFO +227 -0
  6. openadapt_capture-0.1.0/README.md +183 -0
  7. openadapt_capture-0.1.0/docs/DESIGN.md +276 -0
  8. openadapt_capture-0.1.0/docs/VISUALIZATION_DESIGN.md +156 -0
  9. openadapt_capture-0.1.0/docs/images/codec_comparison.png +0 -0
  10. openadapt_capture-0.1.0/docs/images/demo.gif +0 -0
  11. openadapt_capture-0.1.0/docs/images/frame_comparison.png +0 -0
  12. openadapt_capture-0.1.0/docs/images/performance_stats.png +0 -0
  13. openadapt_capture-0.1.0/docs/images/viewer.html +602 -0
  14. openadapt_capture-0.1.0/openadapt_capture/__init__.py +140 -0
  15. openadapt_capture-0.1.0/openadapt_capture/audio.py +489 -0
  16. openadapt_capture-0.1.0/openadapt_capture/capture.py +300 -0
  17. openadapt_capture-0.1.0/openadapt_capture/cli.py +289 -0
  18. openadapt_capture-0.1.0/openadapt_capture/comparison.py +276 -0
  19. openadapt_capture-0.1.0/openadapt_capture/config.py +29 -0
  20. openadapt_capture-0.1.0/openadapt_capture/events.py +280 -0
  21. openadapt_capture-0.1.0/openadapt_capture/input.py +494 -0
  22. openadapt_capture-0.1.0/openadapt_capture/processing.py +548 -0
  23. openadapt_capture-0.1.0/openadapt_capture/recorder.py +304 -0
  24. openadapt_capture-0.1.0/openadapt_capture/stats.py +212 -0
  25. openadapt_capture-0.1.0/openadapt_capture/storage.py +617 -0
  26. openadapt_capture-0.1.0/openadapt_capture/video.py +440 -0
  27. openadapt_capture-0.1.0/openadapt_capture/visualize/__init__.py +11 -0
  28. openadapt_capture-0.1.0/openadapt_capture/visualize/demo.py +343 -0
  29. openadapt_capture-0.1.0/openadapt_capture/visualize/html.py +1538 -0
  30. openadapt_capture-0.1.0/openadapt_capture/visualize/overlays.py +469 -0
  31. openadapt_capture-0.1.0/pyproject.toml +91 -0
  32. openadapt_capture-0.1.0/scripts/compare_codecs.py +369 -0
  33. openadapt_capture-0.1.0/scripts/generate_readme_demo.py +167 -0
  34. openadapt_capture-0.1.0/scripts/generate_readme_plots.py +232 -0
  35. openadapt_capture-0.1.0/scripts/generate_real_capture_plot.py +217 -0
  36. openadapt_capture-0.1.0/tests/__init__.py +1 -0
  37. openadapt_capture-0.1.0/tests/test_comparison.py +225 -0
  38. openadapt_capture-0.1.0/tests/test_events.py +273 -0
  39. openadapt_capture-0.1.0/tests/test_highlevel.py +172 -0
  40. openadapt_capture-0.1.0/tests/test_processing.py +254 -0
  41. openadapt_capture-0.1.0/tests/test_processing_comprehensive.py +454 -0
  42. openadapt_capture-0.1.0/tests/test_stats.py +261 -0
  43. openadapt_capture-0.1.0/tests/test_storage.py +275 -0
@@ -0,0 +1,7 @@
1
+ # OpenAI API key (optional)
2
+ # Only needed for fast cloud-based audio transcription via: capture transcribe <dir> --api
3
+ # Without this key, you can still use local Whisper transcription: capture transcribe <dir>
4
+ # Recording works without any API key.
5
+ #
6
+ # Get your key at: https://platform.openai.com/api-keys
7
+ OPENAI_API_KEY=sk-...
@@ -0,0 +1,30 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - 'v*'
7
+
8
+ jobs:
9
+ publish:
10
+ runs-on: ubuntu-latest
11
+ permissions:
12
+ id-token: write # Required for trusted publishing
13
+ contents: read # Required for checkout
14
+
15
+ steps:
16
+ - uses: actions/checkout@v4
17
+
18
+ - name: Install uv
19
+ uses: astral-sh/setup-uv@v4
20
+ with:
21
+ version: "latest"
22
+
23
+ - name: Set up Python
24
+ run: uv python install 3.12
25
+
26
+ - name: Build package
27
+ run: uv build
28
+
29
+ - name: Publish to PyPI
30
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,55 @@
1
+ name: Tests
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ test:
11
+ runs-on: ubuntu-latest
12
+ strategy:
13
+ matrix:
14
+ python-version: ["3.10", "3.11", "3.12"]
15
+
16
+ steps:
17
+ - uses: actions/checkout@v4
18
+
19
+ - name: Install uv
20
+ uses: astral-sh/setup-uv@v4
21
+ with:
22
+ version: "latest"
23
+
24
+ - name: Set up Python ${{ matrix.python-version }}
25
+ run: uv python install ${{ matrix.python-version }}
26
+
27
+ - name: Install system dependencies
28
+ run: |
29
+ sudo apt-get update
30
+ sudo apt-get install -y ffmpeg libportaudio2
31
+
32
+ - name: Install dependencies
33
+ run: uv sync --extra dev
34
+
35
+ - name: Run tests
36
+ run: uv run pytest tests/ -v
37
+
38
+ lint:
39
+ runs-on: ubuntu-latest
40
+ steps:
41
+ - uses: actions/checkout@v4
42
+
43
+ - name: Install uv
44
+ uses: astral-sh/setup-uv@v4
45
+ with:
46
+ version: "latest"
47
+
48
+ - name: Set up Python
49
+ run: uv python install 3.12
50
+
51
+ - name: Install dependencies
52
+ run: uv sync --extra dev
53
+
54
+ - name: Run ruff
55
+ run: uv run ruff check openadapt_capture/
@@ -0,0 +1,53 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .Python
7
+ build/
8
+ develop-eggs/
9
+ dist/
10
+ downloads/
11
+ eggs/
12
+ .eggs/
13
+ lib/
14
+ lib64/
15
+ parts/
16
+ sdist/
17
+ var/
18
+ wheels/
19
+ *.egg-info/
20
+ .installed.cfg
21
+ *.egg
22
+
23
+ # Virtual environments
24
+ .venv/
25
+ venv/
26
+ ENV/
27
+
28
+ # Testing
29
+ .pytest_cache/
30
+ .coverage
31
+ htmlcov/
32
+ .tox/
33
+ .nox/
34
+
35
+ # IDEs
36
+ .idea/
37
+ .vscode/
38
+ *.swp
39
+ *.swo
40
+ *~
41
+
42
+ # OS
43
+ .DS_Store
44
+ Thumbs.db
45
+
46
+ # uv
47
+ uv.lock
48
+
49
+ # Environment variables (contains API keys)
50
+ .env
51
+
52
+ # Demo captures (large data files)
53
+ demo_*/
@@ -0,0 +1,227 @@
1
+ Metadata-Version: 2.4
2
+ Name: openadapt-capture
3
+ Version: 0.1.0
4
+ Summary: GUI interaction capture - platform-agnostic event streams with time-aligned media
5
+ Project-URL: Homepage, https://github.com/OpenAdaptAI/openadapt-capture
6
+ Project-URL: Repository, https://github.com/OpenAdaptAI/openadapt-capture
7
+ Author-email: "MLDSAI Inc." <richard@mldsai.com>
8
+ License-Expression: MIT
9
+ Keywords: automation,capture,events,gui,rpa
10
+ Classifier: Development Status :: 2 - Pre-Alpha
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
18
+ Requires-Python: >=3.10
19
+ Requires-Dist: av>=10.0.0
20
+ Requires-Dist: fire>=0.7.1
21
+ Requires-Dist: mss>=6.0.0
22
+ Requires-Dist: openai-whisper>=20250625
23
+ Requires-Dist: openai>=2.11.0
24
+ Requires-Dist: pillow>=9.0.0
25
+ Requires-Dist: pydantic-settings>=2.12.0
26
+ Requires-Dist: pydantic>=2.0.0
27
+ Requires-Dist: pynput>=1.7.0
28
+ Requires-Dist: sounddevice>=0.5.3
29
+ Requires-Dist: soundfile>=0.13.1
30
+ Provides-Extra: all
31
+ Requires-Dist: openadapt-privacy>=0.1.0; extra == 'all'
32
+ Requires-Dist: openai-whisper>=20230314; extra == 'all'
33
+ Provides-Extra: dev
34
+ Requires-Dist: matplotlib>=3.5.0; extra == 'dev'
35
+ Requires-Dist: numpy>=1.21.0; extra == 'dev'
36
+ Requires-Dist: pytest-cov>=4.0.0; extra == 'dev'
37
+ Requires-Dist: pytest>=7.0.0; extra == 'dev'
38
+ Requires-Dist: ruff>=0.1.0; extra == 'dev'
39
+ Provides-Extra: privacy
40
+ Requires-Dist: openadapt-privacy>=0.1.0; extra == 'privacy'
41
+ Provides-Extra: transcribe
42
+ Requires-Dist: openai-whisper>=20230314; extra == 'transcribe'
43
+ Description-Content-Type: text/markdown
44
+
45
+ # openadapt-capture
46
+
47
+ GUI interaction capture - platform-agnostic event streams with time-aligned media.
48
+
49
+ > **Status:** Pre-alpha. See [docs/DESIGN.md](docs/DESIGN.md) for architecture discussion.
50
+
51
+ ## Installation
52
+
53
+ ```bash
54
+ uv add openadapt-capture
55
+ ```
56
+
57
+ This includes everything needed to capture and replay GUI interactions (mouse, keyboard, screen recording).
58
+
59
+ For audio capture with Whisper transcription (large download):
60
+
61
+ ```bash
62
+ uv add "openadapt-capture[audio]"
63
+ ```
64
+
65
+ ## Quick Start
66
+
67
+ ### Capture
68
+
69
+ ```python
70
+ from openadapt_capture import Recorder
71
+
72
+ # Record GUI interactions
73
+ with Recorder("./my_capture", task_description="Demo task") as recorder:
74
+ # Captures mouse, keyboard, and screen until context exits
75
+ input("Press Enter to stop recording...")
76
+
77
+ print(f"Captured {recorder.event_count} events")
78
+ ```
79
+
80
+ ### Replay / Analysis
81
+
82
+ ```python
83
+ from openadapt_capture import Capture
84
+
85
+ # Load and iterate over time-aligned events
86
+ capture = Capture.load("./my_capture")
87
+
88
+ for action in capture.actions():
89
+ # Each action has an associated screenshot
90
+ print(f"{action.timestamp}: {action.type} at ({action.x}, {action.y})")
91
+ screenshot = action.screenshot # PIL Image at time of action
92
+ ```
93
+
94
+ ### Low-Level API
95
+
96
+ ```python
97
+ from openadapt_capture import (
98
+ create_capture, process_events,
99
+ MouseDownEvent, MouseButton,
100
+ )
101
+
102
+ # Create storage (platform and screen size auto-detected)
103
+ capture, storage = create_capture("./my_capture")
104
+
105
+ # Write raw events
106
+ storage.write_event(MouseDownEvent(timestamp=1.0, x=100, y=200, button=MouseButton.LEFT))
107
+
108
+ # Query and process
109
+ raw_events = storage.get_events()
110
+ actions = process_events(raw_events) # Merges clicks, drags, typed text
111
+ ```
112
+
113
+ ## Event Types
114
+
115
+ **Raw events** (captured):
116
+ - `mouse.move`, `mouse.down`, `mouse.up`, `mouse.scroll`
117
+ - `key.down`, `key.up`
118
+ - `screen.frame`, `audio.chunk`
119
+
120
+ **Actions** (processed):
121
+ - `mouse.singleclick`, `mouse.doubleclick`, `mouse.drag`
122
+ - `key.type` (merged keystrokes → text)
123
+
124
+ ## Architecture
125
+
126
+ ```
127
+ capture_directory/
128
+ ├── capture.db # SQLite: events, metadata
129
+ ├── video.mp4 # Screen recording
130
+ └── audio.flac # Audio (optional)
131
+ ```
132
+
133
+ ## Performance Statistics
134
+
135
+ Track event write latency and analyze capture performance:
136
+
137
+ ```python
138
+ from openadapt_capture import Recorder
139
+
140
+ with Recorder("./my_capture") as recorder:
141
+ input("Press Enter to stop...")
142
+
143
+ # Access performance statistics
144
+ summary = recorder.stats.summary()
145
+ print(f"Mean latency: {summary['mean_latency_ms']:.1f}ms")
146
+
147
+ # Generate performance plot
148
+ recorder.stats.plot(output_path="performance.png")
149
+ ```
150
+
151
+ ![Performance Statistics](docs/images/performance_stats.png)
152
+
153
+ ## Frame Extraction Verification
154
+
155
+ Compare extracted video frames against original images to verify lossless capture:
156
+
157
+ ```python
158
+ from openadapt_capture import compare_video_to_images, plot_comparison
159
+
160
+ # Compare frames
161
+ report = compare_video_to_images(
162
+ "capture/video.mp4",
163
+ [(timestamp, image) for timestamp, image in captured_frames],
164
+ )
165
+
166
+ print(f"Mean diff: {report.mean_diff_overall:.2f}")
167
+ print(f"Lossless: {report.is_lossless}")
168
+
169
+ # Visualize comparison
170
+ plot_comparison(report, output_path="comparison.png")
171
+ ```
172
+
173
+ ![Frame Comparison](docs/images/frame_comparison.png)
174
+
175
+ ## Visualization
176
+
177
+ Generate animated demos and interactive viewers from recordings:
178
+
179
+ ### Animated GIF Demo
180
+
181
+ ```python
182
+ from openadapt_capture import Capture, create_demo
183
+
184
+ capture = Capture.load("./my_capture")
185
+ create_demo(capture, output="demo.gif", fps=10, max_duration=15)
186
+ ```
187
+
188
+ ### Interactive HTML Viewer
189
+
190
+ ```python
191
+ from openadapt_capture import Capture, create_html
192
+
193
+ capture = Capture.load("./my_capture")
194
+ create_html(capture, output="viewer.html", include_audio=True)
195
+ ```
196
+
197
+ The HTML viewer includes:
198
+ - Timeline scrubber with event markers
199
+ - Frame-by-frame navigation
200
+ - Synchronized audio playback
201
+ - Event list with details panel
202
+ - Keyboard shortcuts (Space, arrows, Home/End)
203
+
204
+ ### Generate Demo from Command Line
205
+
206
+ ```bash
207
+ uv run python scripts/generate_readme_demo.py --duration 10
208
+ ```
209
+
210
+ ## Optional Extras
211
+
212
+ | Extra | Features |
213
+ |-------|----------|
214
+ | `audio` | Audio capture + Whisper transcription |
215
+ | `privacy` | PII scrubbing (openadapt-privacy) |
216
+ | `all` | Everything |
217
+
218
+ ## Development
219
+
220
+ ```bash
221
+ uv sync --dev
222
+ uv run pytest
223
+ ```
224
+
225
+ ## License
226
+
227
+ MIT
@@ -0,0 +1,183 @@
1
+ # openadapt-capture
2
+
3
+ GUI interaction capture - platform-agnostic event streams with time-aligned media.
4
+
5
+ > **Status:** Pre-alpha. See [docs/DESIGN.md](docs/DESIGN.md) for architecture discussion.
6
+
7
+ ## Installation
8
+
9
+ ```bash
10
+ uv add openadapt-capture
11
+ ```
12
+
13
+ This includes everything needed to capture and replay GUI interactions (mouse, keyboard, screen recording).
14
+
15
+ For audio capture with Whisper transcription (large download):
16
+
17
+ ```bash
18
+ uv add "openadapt-capture[audio]"
19
+ ```
20
+
21
+ ## Quick Start
22
+
23
+ ### Capture
24
+
25
+ ```python
26
+ from openadapt_capture import Recorder
27
+
28
+ # Record GUI interactions
29
+ with Recorder("./my_capture", task_description="Demo task") as recorder:
30
+ # Captures mouse, keyboard, and screen until context exits
31
+ input("Press Enter to stop recording...")
32
+
33
+ print(f"Captured {recorder.event_count} events")
34
+ ```
35
+
36
+ ### Replay / Analysis
37
+
38
+ ```python
39
+ from openadapt_capture import Capture
40
+
41
+ # Load and iterate over time-aligned events
42
+ capture = Capture.load("./my_capture")
43
+
44
+ for action in capture.actions():
45
+ # Each action has an associated screenshot
46
+ print(f"{action.timestamp}: {action.type} at ({action.x}, {action.y})")
47
+ screenshot = action.screenshot # PIL Image at time of action
48
+ ```
49
+
50
+ ### Low-Level API
51
+
52
+ ```python
53
+ from openadapt_capture import (
54
+ create_capture, process_events,
55
+ MouseDownEvent, MouseButton,
56
+ )
57
+
58
+ # Create storage (platform and screen size auto-detected)
59
+ capture, storage = create_capture("./my_capture")
60
+
61
+ # Write raw events
62
+ storage.write_event(MouseDownEvent(timestamp=1.0, x=100, y=200, button=MouseButton.LEFT))
63
+
64
+ # Query and process
65
+ raw_events = storage.get_events()
66
+ actions = process_events(raw_events) # Merges clicks, drags, typed text
67
+ ```
68
+
69
+ ## Event Types
70
+
71
+ **Raw events** (captured):
72
+ - `mouse.move`, `mouse.down`, `mouse.up`, `mouse.scroll`
73
+ - `key.down`, `key.up`
74
+ - `screen.frame`, `audio.chunk`
75
+
76
+ **Actions** (processed):
77
+ - `mouse.singleclick`, `mouse.doubleclick`, `mouse.drag`
78
+ - `key.type` (merged keystrokes → text)
79
+
80
+ ## Architecture
81
+
82
+ ```
83
+ capture_directory/
84
+ ├── capture.db # SQLite: events, metadata
85
+ ├── video.mp4 # Screen recording
86
+ └── audio.flac # Audio (optional)
87
+ ```
88
+
89
+ ## Performance Statistics
90
+
91
+ Track event write latency and analyze capture performance:
92
+
93
+ ```python
94
+ from openadapt_capture import Recorder
95
+
96
+ with Recorder("./my_capture") as recorder:
97
+ input("Press Enter to stop...")
98
+
99
+ # Access performance statistics
100
+ summary = recorder.stats.summary()
101
+ print(f"Mean latency: {summary['mean_latency_ms']:.1f}ms")
102
+
103
+ # Generate performance plot
104
+ recorder.stats.plot(output_path="performance.png")
105
+ ```
106
+
107
+ ![Performance Statistics](docs/images/performance_stats.png)
108
+
109
+ ## Frame Extraction Verification
110
+
111
+ Compare extracted video frames against original images to verify lossless capture:
112
+
113
+ ```python
114
+ from openadapt_capture import compare_video_to_images, plot_comparison
115
+
116
+ # Compare frames
117
+ report = compare_video_to_images(
118
+ "capture/video.mp4",
119
+ [(timestamp, image) for timestamp, image in captured_frames],
120
+ )
121
+
122
+ print(f"Mean diff: {report.mean_diff_overall:.2f}")
123
+ print(f"Lossless: {report.is_lossless}")
124
+
125
+ # Visualize comparison
126
+ plot_comparison(report, output_path="comparison.png")
127
+ ```
128
+
129
+ ![Frame Comparison](docs/images/frame_comparison.png)
130
+
131
+ ## Visualization
132
+
133
+ Generate animated demos and interactive viewers from recordings:
134
+
135
+ ### Animated GIF Demo
136
+
137
+ ```python
138
+ from openadapt_capture import Capture, create_demo
139
+
140
+ capture = Capture.load("./my_capture")
141
+ create_demo(capture, output="demo.gif", fps=10, max_duration=15)
142
+ ```
143
+
144
+ ### Interactive HTML Viewer
145
+
146
+ ```python
147
+ from openadapt_capture import Capture, create_html
148
+
149
+ capture = Capture.load("./my_capture")
150
+ create_html(capture, output="viewer.html", include_audio=True)
151
+ ```
152
+
153
+ The HTML viewer includes:
154
+ - Timeline scrubber with event markers
155
+ - Frame-by-frame navigation
156
+ - Synchronized audio playback
157
+ - Event list with details panel
158
+ - Keyboard shortcuts (Space, arrows, Home/End)
159
+
160
+ ### Generate Demo from Command Line
161
+
162
+ ```bash
163
+ uv run python scripts/generate_readme_demo.py --duration 10
164
+ ```
165
+
166
+ ## Optional Extras
167
+
168
+ | Extra | Features |
169
+ |-------|----------|
170
+ | `audio` | Audio capture + Whisper transcription |
171
+ | `privacy` | PII scrubbing (openadapt-privacy) |
172
+ | `all` | Everything |
173
+
174
+ ## Development
175
+
176
+ ```bash
177
+ uv sync --dev
178
+ uv run pytest
179
+ ```
180
+
181
+ ## License
182
+
183
+ MIT