screex 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- screex-0.1.0/LICENSE +21 -0
- screex-0.1.0/MANIFEST.in +3 -0
- screex-0.1.0/PKG-INFO +150 -0
- screex-0.1.0/README.md +126 -0
- screex-0.1.0/pyproject.toml +52 -0
- screex-0.1.0/screex/SKILL.md +44 -0
- screex-0.1.0/screex/__init__.py +1 -0
- screex-0.1.0/screex/cli.py +161 -0
- screex-0.1.0/screex/core/__init__.py +0 -0
- screex-0.1.0/screex/core/analyzer.py +71 -0
- screex-0.1.0/screex/core/index.py +49 -0
- screex-0.1.0/screex/core/manifest.py +63 -0
- screex-0.1.0/screex/core/mapper.py +39 -0
- screex-0.1.0/screex/core/ocr.py +37 -0
- screex-0.1.0/screex/core/segment.py +41 -0
- screex-0.1.0/screex/core/source.py +71 -0
- screex-0.1.0/screex/skill.py +24 -0
- screex-0.1.0/screex.egg-info/PKG-INFO +150 -0
- screex-0.1.0/screex.egg-info/SOURCES.txt +31 -0
- screex-0.1.0/screex.egg-info/dependency_links.txt +1 -0
- screex-0.1.0/screex.egg-info/entry_points.txt +2 -0
- screex-0.1.0/screex.egg-info/requires.txt +6 -0
- screex-0.1.0/screex.egg-info/top_level.txt +1 -0
- screex-0.1.0/setup.cfg +4 -0
- screex-0.1.0/tests/test_analyzer.py +67 -0
- screex-0.1.0/tests/test_cli.py +59 -0
- screex-0.1.0/tests/test_index.py +20 -0
- screex-0.1.0/tests/test_manifest.py +44 -0
- screex-0.1.0/tests/test_mapper.py +27 -0
- screex-0.1.0/tests/test_ocr.py +24 -0
- screex-0.1.0/tests/test_segment.py +28 -0
- screex-0.1.0/tests/test_skill.py +14 -0
- screex-0.1.0/tests/test_source.py +26 -0
screex-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Rushikesh Hiray
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
screex-0.1.0/MANIFEST.in
ADDED
screex-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: screex
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Screen-recording understanding: turn a screencast into a queryable index of UI states for transcripts, Q&A, and how-to / bug-report generation.
|
|
5
|
+
Author-email: Rushikesh Hiray <rhiray03@gmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/blueprintparadise/Screex
|
|
8
|
+
Project-URL: Repository, https://github.com/blueprintparadise/Screex
|
|
9
|
+
Project-URL: Issues, https://github.com/blueprintparadise/Screex/issues
|
|
10
|
+
Keywords: screen-recording,screencast,ocr,llm,claude,claude-skill,agents,video-understanding,ui-understanding
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Topic :: Multimedia :: Video
|
|
14
|
+
Classifier: Topic :: Scientific/Engineering :: Image Recognition
|
|
15
|
+
Requires-Python: >=3.9
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
17
|
+
License-File: LICENSE
|
|
18
|
+
Requires-Dist: opencv-python
|
|
19
|
+
Requires-Dist: numpy
|
|
20
|
+
Requires-Dist: rapidocr-onnxruntime
|
|
21
|
+
Provides-Extra: test
|
|
22
|
+
Requires-Dist: pytest; extra == "test"
|
|
23
|
+
Dynamic: license-file
|
|
24
|
+
|
|
25
|
+
# Screex
|
|
26
|
+
|
|
27
|
+
**Screen-recording understanding for agents.** Screex turns a screencast into a queryable
|
|
28
|
+
**index** of UI states — each with the on-screen text (OCR), what text changed since the
|
|
29
|
+
previous state, a thumbnail, and a full-resolution keyframe — so an LLM/agent can produce an
|
|
30
|
+
action transcript, answer questions, or generate a how-to guide / bug report from a recording.
|
|
31
|
+
|
|
32
|
+
- **Training-free & model-agnostic** — no fine-tuned UI model; any LLM can read the index.
|
|
33
|
+
- **`pip install`-only** — OCR via [`rapidocr-onnxruntime`](https://pypi.org/project/rapidocr-onnxruntime/), no system binaries.
|
|
34
|
+
- **Cheap by design** — the on-screen text is plain text (nearly free to read); full-res
|
|
35
|
+
keyframes are escalated to only when the text is insufficient.
|
|
36
|
+
|
|
37
|
+
---
|
|
38
|
+
|
|
39
|
+
## Install
|
|
40
|
+
|
|
41
|
+
### From PyPI
|
|
42
|
+
```bash
|
|
43
|
+
pip install screex
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
### From source
|
|
47
|
+
```bash
|
|
48
|
+
git clone https://github.com/blueprintparadise/Screex.git
|
|
49
|
+
cd Screex
|
|
50
|
+
pip install -e . # add ".[test]" to also install pytest
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
Both give you a `screex` command (entry point `screex.cli:main`). Requires Python ≥ 3.9.
|
|
54
|
+
First run downloads the small RapidOCR ONNX models automatically.
|
|
55
|
+
|
|
56
|
+
---
|
|
57
|
+
|
|
58
|
+
## Quickstart (CLI)
|
|
59
|
+
|
|
60
|
+
```bash
|
|
61
|
+
# Build the index for a screen recording
|
|
62
|
+
screex index path/to/recording.mp4 --fps 2
|
|
63
|
+
# (or, without installing the package:)
|
|
64
|
+
python -m screex.cli index path/to/recording.mp4 --fps 2
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
This writes:
|
|
68
|
+
```
|
|
69
|
+
path/to/recording.screex/
|
|
70
|
+
index.json # the ScreenIndex (ordered UI states)
|
|
71
|
+
frames/00000.png # full-res keyframe per state
|
|
72
|
+
frames/00000_thumb.png# thumbnail per state
|
|
73
|
+
...
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
### `index` options
|
|
77
|
+
| Flag | Default | Meaning |
|
|
78
|
+
|------|---------|---------|
|
|
79
|
+
| `--fps` | `2` | frames sampled per second (raise for fast-moving recordings) |
|
|
80
|
+
| `--change-threshold` | `0.04` | visual-change fraction (0–1) that starts a new UI state — lower = more states, higher = fewer |
|
|
81
|
+
| `--thumb-width` | `320` | thumbnail width in px |
|
|
82
|
+
| `--out` | `<recording>.screex` | output directory |
|
|
83
|
+
|
|
84
|
+
### What `index.json` contains
|
|
85
|
+
An ordered list of `states`, each with:
|
|
86
|
+
`t_start` / `t_end`, `ocr_text` (on-screen text lines), `text_added` / `text_removed`
|
|
87
|
+
(text that appeared/disappeared vs the previous state — the strongest signal of what the user
|
|
88
|
+
did), and `thumbnail` / `keyframe` paths.
|
|
89
|
+
|
|
90
|
+
---
|
|
91
|
+
|
|
92
|
+
## Use as a Claude skill
|
|
93
|
+
|
|
94
|
+
Screex ships a `SKILL.md` that teaches Claude to build the index and turn it into one of three
|
|
95
|
+
views: an **action transcript**, **Q&A** over the recording, or a **how-to / bug report**.
|
|
96
|
+
|
|
97
|
+
1. **Install the package** so `python -m screex.cli` is available in the environment Claude
|
|
98
|
+
uses (`pip install -e .`).
|
|
99
|
+
2. **Install the skill** — the package bundles `SKILL.md`, so one command installs it where
|
|
100
|
+
Claude Code discovers skills:
|
|
101
|
+
```bash
|
|
102
|
+
screex skill --install # ~/.claude/skills/screex/
|
|
103
|
+
screex skill --install --dir <project>/.claude/skills/screex # per-project
|
|
104
|
+
screex skill --path # just print the target path
|
|
105
|
+
```
|
|
106
|
+
3. **Use it** — in Claude Code, just ask in natural language, e.g.:
|
|
107
|
+
- *"Use screex to turn `~/Downloads/bug-repro.mp4` into a bug report."*
|
|
108
|
+
- *"What steps does this screen recording show?"*
|
|
109
|
+
- *"From this demo, write a how-to doc."*
|
|
110
|
+
|
|
111
|
+
Claude runs `screex index`, reads `index.json`, skims the on-screen text across states, and
|
|
112
|
+
escalates to a full-res keyframe only when the text isn't enough — then produces the
|
|
113
|
+
transcript / answer / document.
|
|
114
|
+
|
|
115
|
+
> The skill is model-agnostic: the same `index.json` can be read by any LLM/agent, not only
|
|
116
|
+
> Claude.
|
|
117
|
+
|
|
118
|
+
---
|
|
119
|
+
|
|
120
|
+
## How it works
|
|
121
|
+
|
|
122
|
+
```
|
|
123
|
+
recording → sample frames → segment into UI states → per state: OCR text + text-diff
|
|
124
|
+
→ write thumbnail + full-res keyframe → index.json
|
|
125
|
+
↓
|
|
126
|
+
views (agent-driven): transcript · Q&A · how-to / bug report
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
`screex/core/`:
|
|
130
|
+
- `source` — decode & sample frames (OpenCV)
|
|
131
|
+
- `segment` — group frames into settled UI states by visual change
|
|
132
|
+
- `ocr` — RapidOCR text extraction + text-diff between states
|
|
133
|
+
- `index` — the `ScreenState` / `ScreenIndex` schema (JSON)
|
|
134
|
+
|
|
135
|
+
`screex/cli.py` wires them into the `screex index` command.
|
|
136
|
+
|
|
137
|
+
---
|
|
138
|
+
|
|
139
|
+
## Development
|
|
140
|
+
|
|
141
|
+
```bash
|
|
142
|
+
pip install -e ".[test]"
|
|
143
|
+
python -m pytest -q
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
---
|
|
147
|
+
|
|
148
|
+
## License
|
|
149
|
+
|
|
150
|
+
[MIT](LICENSE) © 2026 Rushikesh Hiray
|
screex-0.1.0/README.md
ADDED
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
# Screex
|
|
2
|
+
|
|
3
|
+
**Screen-recording understanding for agents.** Screex turns a screencast into a queryable
|
|
4
|
+
**index** of UI states — each with the on-screen text (OCR), what text changed since the
|
|
5
|
+
previous state, a thumbnail, and a full-resolution keyframe — so an LLM/agent can produce an
|
|
6
|
+
action transcript, answer questions, or generate a how-to guide / bug report from a recording.
|
|
7
|
+
|
|
8
|
+
- **Training-free & model-agnostic** — no fine-tuned UI model; any LLM can read the index.
|
|
9
|
+
- **`pip install`-only** — OCR via [`rapidocr-onnxruntime`](https://pypi.org/project/rapidocr-onnxruntime/), no system binaries.
|
|
10
|
+
- **Cheap by design** — the on-screen text is plain text (nearly free to read); full-res
|
|
11
|
+
keyframes are escalated to only when the text is insufficient.
|
|
12
|
+
|
|
13
|
+
---
|
|
14
|
+
|
|
15
|
+
## Install
|
|
16
|
+
|
|
17
|
+
### From PyPI
|
|
18
|
+
```bash
|
|
19
|
+
pip install screex
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
### From source
|
|
23
|
+
```bash
|
|
24
|
+
git clone https://github.com/blueprintparadise/Screex.git
|
|
25
|
+
cd Screex
|
|
26
|
+
pip install -e . # add ".[test]" to also install pytest
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
Both give you a `screex` command (entry point `screex.cli:main`). Requires Python ≥ 3.9.
|
|
30
|
+
First run downloads the small RapidOCR ONNX models automatically.
|
|
31
|
+
|
|
32
|
+
---
|
|
33
|
+
|
|
34
|
+
## Quickstart (CLI)
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
# Build the index for a screen recording
|
|
38
|
+
screex index path/to/recording.mp4 --fps 2
|
|
39
|
+
# (or, without installing the package:)
|
|
40
|
+
python -m screex.cli index path/to/recording.mp4 --fps 2
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
This writes:
|
|
44
|
+
```
|
|
45
|
+
path/to/recording.screex/
|
|
46
|
+
index.json # the ScreenIndex (ordered UI states)
|
|
47
|
+
frames/00000.png # full-res keyframe per state
|
|
48
|
+
frames/00000_thumb.png# thumbnail per state
|
|
49
|
+
...
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
### `index` options
|
|
53
|
+
| Flag | Default | Meaning |
|
|
54
|
+
|------|---------|---------|
|
|
55
|
+
| `--fps` | `2` | frames sampled per second (raise for fast-moving recordings) |
|
|
56
|
+
| `--change-threshold` | `0.04` | visual-change fraction (0–1) that starts a new UI state — lower = more states, higher = fewer |
|
|
57
|
+
| `--thumb-width` | `320` | thumbnail width in px |
|
|
58
|
+
| `--out` | `<recording>.screex` | output directory |
|
|
59
|
+
|
|
60
|
+
### What `index.json` contains
|
|
61
|
+
An ordered list of `states`, each with:
|
|
62
|
+
`t_start` / `t_end`, `ocr_text` (on-screen text lines), `text_added` / `text_removed`
|
|
63
|
+
(text that appeared/disappeared vs the previous state — the strongest signal of what the user
|
|
64
|
+
did), and `thumbnail` / `keyframe` paths.
|
|
65
|
+
|
|
66
|
+
---
|
|
67
|
+
|
|
68
|
+
## Use as a Claude skill
|
|
69
|
+
|
|
70
|
+
Screex ships a `SKILL.md` that teaches Claude to build the index and turn it into one of three
|
|
71
|
+
views: an **action transcript**, **Q&A** over the recording, or a **how-to / bug report**.
|
|
72
|
+
|
|
73
|
+
1. **Install the package** so `python -m screex.cli` is available in the environment Claude
|
|
74
|
+
uses (`pip install -e .`).
|
|
75
|
+
2. **Install the skill** — the package bundles `SKILL.md`, so one command installs it where
|
|
76
|
+
Claude Code discovers skills:
|
|
77
|
+
```bash
|
|
78
|
+
screex skill --install # ~/.claude/skills/screex/
|
|
79
|
+
screex skill --install --dir <project>/.claude/skills/screex # per-project
|
|
80
|
+
screex skill --path # just print the target path
|
|
81
|
+
```
|
|
82
|
+
3. **Use it** — in Claude Code, just ask in natural language, e.g.:
|
|
83
|
+
- *"Use screex to turn `~/Downloads/bug-repro.mp4` into a bug report."*
|
|
84
|
+
- *"What steps does this screen recording show?"*
|
|
85
|
+
- *"From this demo, write a how-to doc."*
|
|
86
|
+
|
|
87
|
+
Claude runs `screex index`, reads `index.json`, skims the on-screen text across states, and
|
|
88
|
+
escalates to a full-res keyframe only when the text isn't enough — then produces the
|
|
89
|
+
transcript / answer / document.
|
|
90
|
+
|
|
91
|
+
> The skill is model-agnostic: the same `index.json` can be read by any LLM/agent, not only
|
|
92
|
+
> Claude.
|
|
93
|
+
|
|
94
|
+
---
|
|
95
|
+
|
|
96
|
+
## How it works
|
|
97
|
+
|
|
98
|
+
```
|
|
99
|
+
recording → sample frames → segment into UI states → per state: OCR text + text-diff
|
|
100
|
+
→ write thumbnail + full-res keyframe → index.json
|
|
101
|
+
↓
|
|
102
|
+
views (agent-driven): transcript · Q&A · how-to / bug report
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
`screex/core/`:
|
|
106
|
+
- `source` — decode & sample frames (OpenCV)
|
|
107
|
+
- `segment` — group frames into settled UI states by visual change
|
|
108
|
+
- `ocr` — RapidOCR text extraction + text-diff between states
|
|
109
|
+
- `index` — the `ScreenState` / `ScreenIndex` schema (JSON)
|
|
110
|
+
|
|
111
|
+
`screex/cli.py` wires them into the `screex index` command.
|
|
112
|
+
|
|
113
|
+
---
|
|
114
|
+
|
|
115
|
+
## Development
|
|
116
|
+
|
|
117
|
+
```bash
|
|
118
|
+
pip install -e ".[test]"
|
|
119
|
+
python -m pytest -q
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
---
|
|
123
|
+
|
|
124
|
+
## License
|
|
125
|
+
|
|
126
|
+
[MIT](LICENSE) © 2026 Rushikesh Hiray
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "screex"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Screen-recording understanding: turn a screencast into a queryable index of UI states for transcripts, Q&A, and how-to / bug-report generation."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.9"
|
|
11
|
+
license = { text = "MIT" }
|
|
12
|
+
authors = [{ name = "Rushikesh Hiray", email = "rhiray03@gmail.com" }]
|
|
13
|
+
keywords = [
|
|
14
|
+
"screen-recording",
|
|
15
|
+
"screencast",
|
|
16
|
+
"ocr",
|
|
17
|
+
"llm",
|
|
18
|
+
"claude",
|
|
19
|
+
"claude-skill",
|
|
20
|
+
"agents",
|
|
21
|
+
"video-understanding",
|
|
22
|
+
"ui-understanding",
|
|
23
|
+
]
|
|
24
|
+
classifiers = [
|
|
25
|
+
"License :: OSI Approved :: MIT License",
|
|
26
|
+
"Programming Language :: Python :: 3",
|
|
27
|
+
"Topic :: Multimedia :: Video",
|
|
28
|
+
"Topic :: Scientific/Engineering :: Image Recognition",
|
|
29
|
+
]
|
|
30
|
+
dependencies = [
|
|
31
|
+
"opencv-python",
|
|
32
|
+
"numpy",
|
|
33
|
+
"rapidocr-onnxruntime",
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
[project.optional-dependencies]
|
|
37
|
+
test = ["pytest"]
|
|
38
|
+
|
|
39
|
+
[project.scripts]
|
|
40
|
+
screex = "screex.cli:main"
|
|
41
|
+
|
|
42
|
+
[project.urls]
|
|
43
|
+
Homepage = "https://github.com/blueprintparadise/Screex"
|
|
44
|
+
Repository = "https://github.com/blueprintparadise/Screex"
|
|
45
|
+
Issues = "https://github.com/blueprintparadise/Screex/issues"
|
|
46
|
+
|
|
47
|
+
[tool.setuptools]
|
|
48
|
+
packages = ["screex", "screex.core"]
|
|
49
|
+
include-package-data = true
|
|
50
|
+
|
|
51
|
+
[tool.setuptools.package-data]
|
|
52
|
+
screex = ["SKILL.md"]
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: screex
|
|
3
|
+
description: Use when the user wants Claude to understand a screen recording / screencast / demo / bug-repro video — e.g. "what are the steps in this recording?", "turn this into a how-to doc", "write a bug report from this repro", "what URL did they open?". Screex builds a queryable index of UI states (with on-screen text) and Claude reads it to produce a transcript, answer questions, or generate docs.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Screex — screen-recording understanding
|
|
7
|
+
|
|
8
|
+
## When to use
|
|
9
|
+
The user points you at a screen recording (a screencast, demo, tutorial, or bug repro) and
|
|
10
|
+
wants a step transcript, a how-to doc, a bug report, or answers to questions about it.
|
|
11
|
+
|
|
12
|
+
## Build the index
|
|
13
|
+
Run:
|
|
14
|
+
`python -m screex.cli index <recording> --fps 2`
|
|
15
|
+
(raise `--fps` for fast-moving recordings; lower `--change-threshold` to split states more
|
|
16
|
+
eagerly.) This writes `<recording>.screex/index.json` plus per-state `frames/NNNNN.png`
|
|
17
|
+
(full-res keyframe) and `frames/NNNNN_thumb.png` (thumbnail).
|
|
18
|
+
|
|
19
|
+
## Read the index
|
|
20
|
+
`Read` `index.json`. It is an ordered list of UI `states`, each with `t_start`/`t_end`,
|
|
21
|
+
`ocr_text` (the on-screen text), `text_added` / `text_removed` (what text appeared or
|
|
22
|
+
disappeared vs the previous state — the strongest signal of what the user did), and paths to
|
|
23
|
+
a `thumbnail` and full-res `keyframe`. The on-screen text is plain text — reading it across
|
|
24
|
+
states is cheap.
|
|
25
|
+
|
|
26
|
+
## Produce one of three views
|
|
27
|
+
|
|
28
|
+
- **Action transcript:** walk the states in order; use `text_added`/`text_removed` plus the
|
|
29
|
+
thumbnail to narrate timestamped steps, e.g. "0:04 opened Settings; 0:09 entered an API
|
|
30
|
+
key; 0:14 an 'invalid key' error appeared."
|
|
31
|
+
- **Q&A:** answer the user's question by scanning `ocr_text` across states (cheap). `Read`
|
|
32
|
+
the full-res `keyframe` PNG for a state only when the text is insufficient (small icons,
|
|
33
|
+
layout, colour).
|
|
34
|
+
- **Doc / bug report:** format the transcript into a how-to guide, or a structured
|
|
35
|
+
reproduction report (steps to reproduce, expected vs actual).
|
|
36
|
+
|
|
37
|
+
## Cost discipline
|
|
38
|
+
The `ocr_text` and `text_*` fields are text and nearly free to read. Escalate to a
|
|
39
|
+
`keyframe` image only for the few states where the text doesn't answer the question.
|
|
40
|
+
|
|
41
|
+
## Caveats
|
|
42
|
+
`ocr_text` can contain minor OCR noise (stray glyphs), and a busy recording can produce many
|
|
43
|
+
near-duplicate consecutive states — collapse states whose `ocr_text` is essentially identical
|
|
44
|
+
when you narrate. Tune `--change-threshold` up to merge states, down to split them.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.1.0"
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from screex import __version__
|
|
7
|
+
from screex.core import source, mapper, analyzer
|
|
8
|
+
from screex.core.manifest import Manifest, FrameRecord
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def analyze(video, fps=5.0, cols=120, sensitivity=0.06, edge=False, out=None, cut_threshold=0.5):
|
|
12
|
+
import cv2
|
|
13
|
+
|
|
14
|
+
video = Path(video)
|
|
15
|
+
info = source.video_info(str(video))
|
|
16
|
+
out_dir = Path(out) if out else video.parent / f"{video.stem}.screex"
|
|
17
|
+
frames_dir = out_dir / "frames"
|
|
18
|
+
frames_dir.mkdir(parents=True, exist_ok=True)
|
|
19
|
+
|
|
20
|
+
records = []
|
|
21
|
+
similarities = []
|
|
22
|
+
prev_gray = None
|
|
23
|
+
for idx, t, bgr in source.iter_frames(str(video), fps):
|
|
24
|
+
gray = cv2.cvtColor(bgr, cv2.COLOR_BGR2GRAY)
|
|
25
|
+
score = 0.0 if prev_gray is None else analyzer.motion_score(prev_gray, gray)
|
|
26
|
+
sim = 1.0 if prev_gray is None else analyzer.histogram_similarity(prev_gray, gray)
|
|
27
|
+
prev_gray = gray
|
|
28
|
+
similarities.append(sim)
|
|
29
|
+
|
|
30
|
+
ascii_text = mapper.frame_to_ascii(gray, cols, edge=edge)
|
|
31
|
+
name = f"{idx:05d}"
|
|
32
|
+
png_rel = f"frames/{name}.png"
|
|
33
|
+
txt_rel = f"frames/{name}.txt"
|
|
34
|
+
cv2.imwrite(str(out_dir / png_rel), bgr)
|
|
35
|
+
(out_dir / txt_rel).write_text(ascii_text, encoding="utf-8")
|
|
36
|
+
|
|
37
|
+
records.append(FrameRecord(
|
|
38
|
+
idx=idx, t=round(t, 3), score=round(score, 4),
|
|
39
|
+
event=False, ascii=txt_rel, png=png_rel,
|
|
40
|
+
))
|
|
41
|
+
|
|
42
|
+
scores = [r.score for r in records]
|
|
43
|
+
times = [r.t for r in records]
|
|
44
|
+
flags = analyzer.flag_events(scores, sensitivity)
|
|
45
|
+
for r, f in zip(records, flags):
|
|
46
|
+
r.event = f
|
|
47
|
+
events = analyzer.group_events(scores, times, flags)
|
|
48
|
+
events = analyzer.classify_events(events, similarities, cut_threshold)
|
|
49
|
+
|
|
50
|
+
manifest = Manifest(
|
|
51
|
+
video=video.name, duration=round(info["duration"], 3),
|
|
52
|
+
sampled_fps=fps, cols=cols, frames=records, events=events,
|
|
53
|
+
)
|
|
54
|
+
manifest_path = out_dir / "manifest.json"
|
|
55
|
+
manifest.save(manifest_path)
|
|
56
|
+
return manifest_path
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def index(recording, fps=2.0, change_threshold=0.04, thumb_width=320, out=None):
|
|
60
|
+
import cv2
|
|
61
|
+
from screex.core import source, segment, ocr
|
|
62
|
+
from screex.core.index import ScreenState, ScreenIndex
|
|
63
|
+
|
|
64
|
+
recording = Path(recording)
|
|
65
|
+
info = source.video_info(str(recording))
|
|
66
|
+
out_dir = Path(out) if out else recording.parent / f"{recording.stem}.screex"
|
|
67
|
+
frames_dir = out_dir / "frames"
|
|
68
|
+
frames_dir.mkdir(parents=True, exist_ok=True)
|
|
69
|
+
|
|
70
|
+
states = []
|
|
71
|
+
prev_ocr = []
|
|
72
|
+
for seg in segment.segment_stream(source.iter_frames(str(recording), fps), change_threshold):
|
|
73
|
+
bgr = seg.frame_bgr
|
|
74
|
+
text = ocr.extract_text(bgr)
|
|
75
|
+
added, removed = ocr.text_diff(prev_ocr, text)
|
|
76
|
+
prev_ocr = text
|
|
77
|
+
|
|
78
|
+
name = f"{seg.idx:05d}"
|
|
79
|
+
key_rel = f"frames/{name}.png"
|
|
80
|
+
thumb_rel = f"frames/{name}_thumb.png"
|
|
81
|
+
cv2.imwrite(str(out_dir / key_rel), bgr)
|
|
82
|
+
th = max(1, int(bgr.shape[0] * thumb_width / bgr.shape[1]))
|
|
83
|
+
cv2.imwrite(str(out_dir / thumb_rel), cv2.resize(bgr, (thumb_width, th)))
|
|
84
|
+
|
|
85
|
+
states.append(ScreenState(
|
|
86
|
+
idx=seg.idx, t_start=round(seg.t_start, 3), t_end=round(seg.t_end, 3),
|
|
87
|
+
thumbnail=thumb_rel, keyframe=key_rel,
|
|
88
|
+
ocr_text=text, text_added=added, text_removed=removed,
|
|
89
|
+
))
|
|
90
|
+
|
|
91
|
+
screen_index = ScreenIndex(
|
|
92
|
+
video=recording.name, duration=round(info["duration"], 3),
|
|
93
|
+
sampled_fps=fps, states=states,
|
|
94
|
+
)
|
|
95
|
+
index_path = out_dir / "index.json"
|
|
96
|
+
screen_index.save(index_path)
|
|
97
|
+
return index_path
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def main(argv=None):
|
|
101
|
+
p = argparse.ArgumentParser(prog="screex")
|
|
102
|
+
p.add_argument("--version", action="version", version=f"%(prog)s {__version__}")
|
|
103
|
+
sub = p.add_subparsers(dest="cmd", required=True)
|
|
104
|
+
|
|
105
|
+
a = sub.add_parser("analyze", help="analyze a video into ASCII frames + manifest")
|
|
106
|
+
a.add_argument("video")
|
|
107
|
+
a.add_argument("--fps", type=float, default=5.0, help="frames sampled per second")
|
|
108
|
+
a.add_argument("--cols", type=int, default=120, help="ASCII grid width")
|
|
109
|
+
a.add_argument("--sensitivity", type=float, default=0.06,
|
|
110
|
+
help="motion threshold (0..1) for flagging event frames")
|
|
111
|
+
a.add_argument("--edge", action="store_true", help="emphasize edges/structure")
|
|
112
|
+
a.add_argument("--out", default=None, help="output dir (default <video>.screex)")
|
|
113
|
+
a.add_argument("--cut-threshold", type=float, default=0.5,
|
|
114
|
+
help="histogram-similarity below which an event is a scene cut (0..1)")
|
|
115
|
+
|
|
116
|
+
ix = sub.add_parser("index", help="build a ScreenIndex from a screen recording")
|
|
117
|
+
ix.add_argument("recording")
|
|
118
|
+
ix.add_argument("--fps", type=float, default=2.0, help="frames sampled per second")
|
|
119
|
+
ix.add_argument("--change-threshold", type=float, default=0.04,
|
|
120
|
+
help="motion fraction (0..1) that marks a new UI state")
|
|
121
|
+
ix.add_argument("--thumb-width", type=int, default=320, help="thumbnail width in px")
|
|
122
|
+
ix.add_argument("--out", default=None, help="output dir (default <recording>.screex)")
|
|
123
|
+
|
|
124
|
+
c = sub.add_parser("capture", help="record a short webcam clip")
|
|
125
|
+
c.add_argument("--webcam", action="store_true")
|
|
126
|
+
c.add_argument("--seconds", type=float, default=10.0)
|
|
127
|
+
c.add_argument("--out", default="capture.mp4")
|
|
128
|
+
|
|
129
|
+
sk = sub.add_parser("skill", help="install or locate the Screex Claude skill (SKILL.md)")
|
|
130
|
+
sk.add_argument("--install", action="store_true",
|
|
131
|
+
help="copy the bundled SKILL.md into the skills dir (default action)")
|
|
132
|
+
sk.add_argument("--dir", default=None,
|
|
133
|
+
help="target skills dir (default ~/.claude/skills/screex)")
|
|
134
|
+
sk.add_argument("--path", action="store_true",
|
|
135
|
+
help="print the install target path without writing")
|
|
136
|
+
|
|
137
|
+
args = p.parse_args(argv)
|
|
138
|
+
if args.cmd == "analyze":
|
|
139
|
+
path = analyze(args.video, fps=args.fps, cols=args.cols,
|
|
140
|
+
sensitivity=args.sensitivity, edge=args.edge, out=args.out,
|
|
141
|
+
cut_threshold=args.cut_threshold)
|
|
142
|
+
print(f"manifest: {path}")
|
|
143
|
+
elif args.cmd == "index":
|
|
144
|
+
path = index(args.recording, fps=args.fps, change_threshold=args.change_threshold,
|
|
145
|
+
thumb_width=args.thumb_width, out=args.out)
|
|
146
|
+
print(f"index: {path}")
|
|
147
|
+
elif args.cmd == "capture":
|
|
148
|
+
out = source.capture_webcam(args.out, args.seconds)
|
|
149
|
+
print(f"captured: {out}")
|
|
150
|
+
elif args.cmd == "skill":
|
|
151
|
+
from screex import skill as skill_mod
|
|
152
|
+
target_dir = Path(args.dir) if args.dir else skill_mod.default_skill_dir()
|
|
153
|
+
if args.path:
|
|
154
|
+
print(target_dir / "SKILL.md")
|
|
155
|
+
else:
|
|
156
|
+
target = skill_mod.install_skill(args.dir)
|
|
157
|
+
print(f"installed skill: {target}")
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
if __name__ == "__main__":
|
|
161
|
+
main()
|
|
File without changes
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
|
|
5
|
+
from screex.core.manifest import EventRecord
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def motion_score(prev_gray: np.ndarray, cur_gray: np.ndarray) -> float:
|
|
9
|
+
a = np.asarray(prev_gray, dtype=np.int16)
|
|
10
|
+
b = np.asarray(cur_gray, dtype=np.int16)
|
|
11
|
+
return float(np.abs(b - a).mean()) / 255.0
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def histogram_similarity(prev_gray, cur_gray, bins: int = 64) -> float:
|
|
15
|
+
a = np.asarray(prev_gray)
|
|
16
|
+
b = np.asarray(cur_gray)
|
|
17
|
+
if a.shape == b.shape and np.array_equal(a, b):
|
|
18
|
+
return 1.0
|
|
19
|
+
ha, _ = np.histogram(a, bins=bins, range=(0, 256))
|
|
20
|
+
hb, _ = np.histogram(b, bins=bins, range=(0, 256))
|
|
21
|
+
ha = ha.astype(np.float64)
|
|
22
|
+
hb = hb.astype(np.float64)
|
|
23
|
+
if ha.sum() > 0:
|
|
24
|
+
ha /= ha.sum()
|
|
25
|
+
if hb.sum() > 0:
|
|
26
|
+
hb /= hb.sum()
|
|
27
|
+
if ha.std() == 0 or hb.std() == 0:
|
|
28
|
+
return 1.0 if np.array_equal(ha, hb) else 0.0
|
|
29
|
+
corr = float(np.corrcoef(ha, hb)[0, 1])
|
|
30
|
+
return max(0.0, corr)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def flag_events(scores, threshold: float):
|
|
34
|
+
return [s >= threshold for s in scores]
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def group_events(scores, times, flags):
|
|
38
|
+
events = []
|
|
39
|
+
n = len(flags)
|
|
40
|
+
i = 0
|
|
41
|
+
while i < n:
|
|
42
|
+
if not flags[i]:
|
|
43
|
+
i += 1
|
|
44
|
+
continue
|
|
45
|
+
j = i
|
|
46
|
+
while j + 1 < n and flags[j + 1]:
|
|
47
|
+
j += 1
|
|
48
|
+
peak = max(range(i, j + 1), key=lambda k: scores[k])
|
|
49
|
+
events.append(
|
|
50
|
+
EventRecord(
|
|
51
|
+
t_start=times[i],
|
|
52
|
+
t_end=times[j],
|
|
53
|
+
peak_frame=peak,
|
|
54
|
+
peak_score=scores[peak],
|
|
55
|
+
)
|
|
56
|
+
)
|
|
57
|
+
i = j + 1
|
|
58
|
+
return events
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def classify_events(events, similarities, cut_threshold: float = 0.5):
|
|
62
|
+
for e in events:
|
|
63
|
+
s = similarities[e.peak_frame]
|
|
64
|
+
if s < cut_threshold:
|
|
65
|
+
e.type = "cut"
|
|
66
|
+
e.confidence = round((cut_threshold - s) / cut_threshold, 3) if cut_threshold > 0 else 1.0
|
|
67
|
+
else:
|
|
68
|
+
denom = 1.0 - cut_threshold
|
|
69
|
+
e.type = "motion"
|
|
70
|
+
e.confidence = round((s - cut_threshold) / denom, 3) if denom > 0 else 1.0
|
|
71
|
+
return events
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from dataclasses import dataclass, asdict, field
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@dataclass
|
|
9
|
+
class ScreenState:
|
|
10
|
+
idx: int
|
|
11
|
+
t_start: float
|
|
12
|
+
t_end: float
|
|
13
|
+
thumbnail: str
|
|
14
|
+
keyframe: str
|
|
15
|
+
ocr_text: list = field(default_factory=list)
|
|
16
|
+
text_added: list = field(default_factory=list)
|
|
17
|
+
text_removed: list = field(default_factory=list)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass
|
|
21
|
+
class ScreenIndex:
|
|
22
|
+
video: str
|
|
23
|
+
duration: float
|
|
24
|
+
sampled_fps: float
|
|
25
|
+
states: list = field(default_factory=list)
|
|
26
|
+
|
|
27
|
+
def to_dict(self) -> dict:
|
|
28
|
+
return {
|
|
29
|
+
"video": self.video,
|
|
30
|
+
"duration": self.duration,
|
|
31
|
+
"sampled_fps": self.sampled_fps,
|
|
32
|
+
"states": [asdict(s) for s in self.states],
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
@classmethod
|
|
36
|
+
def from_dict(cls, d: dict) -> "ScreenIndex":
|
|
37
|
+
return cls(
|
|
38
|
+
video=d["video"],
|
|
39
|
+
duration=d["duration"],
|
|
40
|
+
sampled_fps=d["sampled_fps"],
|
|
41
|
+
states=[ScreenState(**s) for s in d["states"]],
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
def save(self, path) -> None:
|
|
45
|
+
Path(path).write_text(json.dumps(self.to_dict(), indent=2), encoding="utf-8")
|
|
46
|
+
|
|
47
|
+
@classmethod
|
|
48
|
+
def load(cls, path) -> "ScreenIndex":
|
|
49
|
+
return cls.from_dict(json.loads(Path(path).read_text(encoding="utf-8")))
|