screex 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
screex/SKILL.md ADDED
@@ -0,0 +1,44 @@
1
+ ---
2
+ name: screex
3
+ description: Use when the user wants Claude to understand a screen recording / screencast / demo / bug-repro video — e.g. "what are the steps in this recording?", "turn this into a how-to doc", "write a bug report from this repro", "what URL did they open?". Screex builds a queryable index of UI states (with on-screen text) and Claude reads it to produce a transcript, answer questions, or generate docs.
4
+ ---
5
+
6
+ # Screex — screen-recording understanding
7
+
8
+ ## When to use
9
+ The user points you at a screen recording (a screencast, demo, tutorial, or bug repro) and
10
+ wants a step transcript, a how-to doc, a bug report, or answers to questions about it.
11
+
12
+ ## Build the index
13
+ Run:
14
+ `python -m screex.cli index <recording> --fps 2`
15
+ (raise `--fps` for fast-moving recordings; lower `--change-threshold` to split states more
16
+ eagerly.) This writes `<recording>.screex/index.json` plus per-state `frames/NNNNN.png`
17
+ (full-res keyframe) and `frames/NNNNN_thumb.png` (thumbnail).
18
+
19
+ ## Read the index
20
+ `Read` `index.json`. It is an ordered list of UI `states`, each with `t_start`/`t_end`,
21
+ `ocr_text` (the on-screen text), `text_added` / `text_removed` (what text appeared or
22
+ disappeared vs the previous state — the strongest signal of what the user did), and paths to
23
+ a `thumbnail` and full-res `keyframe`. The on-screen text is plain text — reading it across
24
+ states is cheap.
25
+
26
+ ## Produce one of three views
27
+
28
+ - **Action transcript:** walk the states in order; use `text_added`/`text_removed` plus the
29
+ thumbnail to narrate timestamped steps, e.g. "0:04 opened Settings; 0:09 entered an API
30
+ key; 0:14 an 'invalid key' error appeared."
31
+ - **Q&A:** answer the user's question by scanning `ocr_text` across states (cheap). `Read`
32
+ the full-res `keyframe` PNG for a state only when the text is insufficient (small icons,
33
+ layout, colour).
34
+ - **Doc / bug report:** format the transcript into a how-to guide, or a structured
35
+ reproduction report (steps to reproduce, expected vs actual).
36
+
37
+ ## Cost discipline
38
+ The `ocr_text` and `text_*` fields are text and nearly free to read. Escalate to a
39
+ `keyframe` image only for the few states where the text doesn't answer the question.
40
+
41
+ ## Caveats
42
+ `ocr_text` can contain minor OCR noise (stray glyphs), and a busy recording can produce many
43
+ near-duplicate consecutive states — collapse states whose `ocr_text` is essentially identical
44
+ when you narrate. Tune `--change-threshold` up to merge states, down to split them.
screex/__init__.py ADDED
@@ -0,0 +1 @@
1
+ __version__ = "0.1.0"
screex/cli.py ADDED
@@ -0,0 +1,161 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ from pathlib import Path
5
+
6
+ from screex import __version__
7
+ from screex.core import source, mapper, analyzer
8
+ from screex.core.manifest import Manifest, FrameRecord
9
+
10
+
11
+ def analyze(video, fps=5.0, cols=120, sensitivity=0.06, edge=False, out=None, cut_threshold=0.5):
12
+ import cv2
13
+
14
+ video = Path(video)
15
+ info = source.video_info(str(video))
16
+ out_dir = Path(out) if out else video.parent / f"{video.stem}.screex"
17
+ frames_dir = out_dir / "frames"
18
+ frames_dir.mkdir(parents=True, exist_ok=True)
19
+
20
+ records = []
21
+ similarities = []
22
+ prev_gray = None
23
+ for idx, t, bgr in source.iter_frames(str(video), fps):
24
+ gray = cv2.cvtColor(bgr, cv2.COLOR_BGR2GRAY)
25
+ score = 0.0 if prev_gray is None else analyzer.motion_score(prev_gray, gray)
26
+ sim = 1.0 if prev_gray is None else analyzer.histogram_similarity(prev_gray, gray)
27
+ prev_gray = gray
28
+ similarities.append(sim)
29
+
30
+ ascii_text = mapper.frame_to_ascii(gray, cols, edge=edge)
31
+ name = f"{idx:05d}"
32
+ png_rel = f"frames/{name}.png"
33
+ txt_rel = f"frames/{name}.txt"
34
+ cv2.imwrite(str(out_dir / png_rel), bgr)
35
+ (out_dir / txt_rel).write_text(ascii_text, encoding="utf-8")
36
+
37
+ records.append(FrameRecord(
38
+ idx=idx, t=round(t, 3), score=round(score, 4),
39
+ event=False, ascii=txt_rel, png=png_rel,
40
+ ))
41
+
42
+ scores = [r.score for r in records]
43
+ times = [r.t for r in records]
44
+ flags = analyzer.flag_events(scores, sensitivity)
45
+ for r, f in zip(records, flags):
46
+ r.event = f
47
+ events = analyzer.group_events(scores, times, flags)
48
+ events = analyzer.classify_events(events, similarities, cut_threshold)
49
+
50
+ manifest = Manifest(
51
+ video=video.name, duration=round(info["duration"], 3),
52
+ sampled_fps=fps, cols=cols, frames=records, events=events,
53
+ )
54
+ manifest_path = out_dir / "manifest.json"
55
+ manifest.save(manifest_path)
56
+ return manifest_path
57
+
58
+
59
+ def index(recording, fps=2.0, change_threshold=0.04, thumb_width=320, out=None):
60
+ import cv2
61
+ from screex.core import source, segment, ocr
62
+ from screex.core.index import ScreenState, ScreenIndex
63
+
64
+ recording = Path(recording)
65
+ info = source.video_info(str(recording))
66
+ out_dir = Path(out) if out else recording.parent / f"{recording.stem}.screex"
67
+ frames_dir = out_dir / "frames"
68
+ frames_dir.mkdir(parents=True, exist_ok=True)
69
+
70
+ states = []
71
+ prev_ocr = []
72
+ for seg in segment.segment_stream(source.iter_frames(str(recording), fps), change_threshold):
73
+ bgr = seg.frame_bgr
74
+ text = ocr.extract_text(bgr)
75
+ added, removed = ocr.text_diff(prev_ocr, text)
76
+ prev_ocr = text
77
+
78
+ name = f"{seg.idx:05d}"
79
+ key_rel = f"frames/{name}.png"
80
+ thumb_rel = f"frames/{name}_thumb.png"
81
+ cv2.imwrite(str(out_dir / key_rel), bgr)
82
+ th = max(1, int(bgr.shape[0] * thumb_width / bgr.shape[1]))
83
+ cv2.imwrite(str(out_dir / thumb_rel), cv2.resize(bgr, (thumb_width, th)))
84
+
85
+ states.append(ScreenState(
86
+ idx=seg.idx, t_start=round(seg.t_start, 3), t_end=round(seg.t_end, 3),
87
+ thumbnail=thumb_rel, keyframe=key_rel,
88
+ ocr_text=text, text_added=added, text_removed=removed,
89
+ ))
90
+
91
+ screen_index = ScreenIndex(
92
+ video=recording.name, duration=round(info["duration"], 3),
93
+ sampled_fps=fps, states=states,
94
+ )
95
+ index_path = out_dir / "index.json"
96
+ screen_index.save(index_path)
97
+ return index_path
98
+
99
+
100
+ def main(argv=None):
101
+ p = argparse.ArgumentParser(prog="screex")
102
+ p.add_argument("--version", action="version", version=f"%(prog)s {__version__}")
103
+ sub = p.add_subparsers(dest="cmd", required=True)
104
+
105
+ a = sub.add_parser("analyze", help="analyze a video into ASCII frames + manifest")
106
+ a.add_argument("video")
107
+ a.add_argument("--fps", type=float, default=5.0, help="frames sampled per second")
108
+ a.add_argument("--cols", type=int, default=120, help="ASCII grid width")
109
+ a.add_argument("--sensitivity", type=float, default=0.06,
110
+ help="motion threshold (0..1) for flagging event frames")
111
+ a.add_argument("--edge", action="store_true", help="emphasize edges/structure")
112
+ a.add_argument("--out", default=None, help="output dir (default <video>.screex)")
113
+ a.add_argument("--cut-threshold", type=float, default=0.5,
114
+ help="histogram-similarity below which an event is a scene cut (0..1)")
115
+
116
+ ix = sub.add_parser("index", help="build a ScreenIndex from a screen recording")
117
+ ix.add_argument("recording")
118
+ ix.add_argument("--fps", type=float, default=2.0, help="frames sampled per second")
119
+ ix.add_argument("--change-threshold", type=float, default=0.04,
120
+ help="motion fraction (0..1) that marks a new UI state")
121
+ ix.add_argument("--thumb-width", type=int, default=320, help="thumbnail width in px")
122
+ ix.add_argument("--out", default=None, help="output dir (default <recording>.screex)")
123
+
124
+ c = sub.add_parser("capture", help="record a short webcam clip")
125
+ c.add_argument("--webcam", action="store_true")
126
+ c.add_argument("--seconds", type=float, default=10.0)
127
+ c.add_argument("--out", default="capture.mp4")
128
+
129
+ sk = sub.add_parser("skill", help="install or locate the Screex Claude skill (SKILL.md)")
130
+ sk.add_argument("--install", action="store_true",
131
+ help="copy the bundled SKILL.md into the skills dir (default action)")
132
+ sk.add_argument("--dir", default=None,
133
+ help="target skills dir (default ~/.claude/skills/screex)")
134
+ sk.add_argument("--path", action="store_true",
135
+ help="print the install target path without writing")
136
+
137
+ args = p.parse_args(argv)
138
+ if args.cmd == "analyze":
139
+ path = analyze(args.video, fps=args.fps, cols=args.cols,
140
+ sensitivity=args.sensitivity, edge=args.edge, out=args.out,
141
+ cut_threshold=args.cut_threshold)
142
+ print(f"manifest: {path}")
143
+ elif args.cmd == "index":
144
+ path = index(args.recording, fps=args.fps, change_threshold=args.change_threshold,
145
+ thumb_width=args.thumb_width, out=args.out)
146
+ print(f"index: {path}")
147
+ elif args.cmd == "capture":
148
+ out = source.capture_webcam(args.out, args.seconds)
149
+ print(f"captured: {out}")
150
+ elif args.cmd == "skill":
151
+ from screex import skill as skill_mod
152
+ target_dir = Path(args.dir) if args.dir else skill_mod.default_skill_dir()
153
+ if args.path:
154
+ print(target_dir / "SKILL.md")
155
+ else:
156
+ target = skill_mod.install_skill(args.dir)
157
+ print(f"installed skill: {target}")
158
+
159
+
160
+ if __name__ == "__main__":
161
+ main()
File without changes
@@ -0,0 +1,71 @@
1
+ from __future__ import annotations
2
+
3
+ import numpy as np
4
+
5
+ from screex.core.manifest import EventRecord
6
+
7
+
8
+ def motion_score(prev_gray: np.ndarray, cur_gray: np.ndarray) -> float:
9
+ a = np.asarray(prev_gray, dtype=np.int16)
10
+ b = np.asarray(cur_gray, dtype=np.int16)
11
+ return float(np.abs(b - a).mean()) / 255.0
12
+
13
+
14
+ def histogram_similarity(prev_gray, cur_gray, bins: int = 64) -> float:
15
+ a = np.asarray(prev_gray)
16
+ b = np.asarray(cur_gray)
17
+ if a.shape == b.shape and np.array_equal(a, b):
18
+ return 1.0
19
+ ha, _ = np.histogram(a, bins=bins, range=(0, 256))
20
+ hb, _ = np.histogram(b, bins=bins, range=(0, 256))
21
+ ha = ha.astype(np.float64)
22
+ hb = hb.astype(np.float64)
23
+ if ha.sum() > 0:
24
+ ha /= ha.sum()
25
+ if hb.sum() > 0:
26
+ hb /= hb.sum()
27
+ if ha.std() == 0 or hb.std() == 0:
28
+ return 1.0 if np.array_equal(ha, hb) else 0.0
29
+ corr = float(np.corrcoef(ha, hb)[0, 1])
30
+ return max(0.0, corr)
31
+
32
+
33
+ def flag_events(scores, threshold: float):
34
+ return [s >= threshold for s in scores]
35
+
36
+
37
+ def group_events(scores, times, flags):
38
+ events = []
39
+ n = len(flags)
40
+ i = 0
41
+ while i < n:
42
+ if not flags[i]:
43
+ i += 1
44
+ continue
45
+ j = i
46
+ while j + 1 < n and flags[j + 1]:
47
+ j += 1
48
+ peak = max(range(i, j + 1), key=lambda k: scores[k])
49
+ events.append(
50
+ EventRecord(
51
+ t_start=times[i],
52
+ t_end=times[j],
53
+ peak_frame=peak,
54
+ peak_score=scores[peak],
55
+ )
56
+ )
57
+ i = j + 1
58
+ return events
59
+
60
+
61
+ def classify_events(events, similarities, cut_threshold: float = 0.5):
62
+ for e in events:
63
+ s = similarities[e.peak_frame]
64
+ if s < cut_threshold:
65
+ e.type = "cut"
66
+ e.confidence = round((cut_threshold - s) / cut_threshold, 3) if cut_threshold > 0 else 1.0
67
+ else:
68
+ denom = 1.0 - cut_threshold
69
+ e.type = "motion"
70
+ e.confidence = round((s - cut_threshold) / denom, 3) if denom > 0 else 1.0
71
+ return events
screex/core/index.py ADDED
@@ -0,0 +1,49 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from dataclasses import dataclass, asdict, field
5
+ from pathlib import Path
6
+
7
+
8
+ @dataclass
9
+ class ScreenState:
10
+ idx: int
11
+ t_start: float
12
+ t_end: float
13
+ thumbnail: str
14
+ keyframe: str
15
+ ocr_text: list = field(default_factory=list)
16
+ text_added: list = field(default_factory=list)
17
+ text_removed: list = field(default_factory=list)
18
+
19
+
20
+ @dataclass
21
+ class ScreenIndex:
22
+ video: str
23
+ duration: float
24
+ sampled_fps: float
25
+ states: list = field(default_factory=list)
26
+
27
+ def to_dict(self) -> dict:
28
+ return {
29
+ "video": self.video,
30
+ "duration": self.duration,
31
+ "sampled_fps": self.sampled_fps,
32
+ "states": [asdict(s) for s in self.states],
33
+ }
34
+
35
+ @classmethod
36
+ def from_dict(cls, d: dict) -> "ScreenIndex":
37
+ return cls(
38
+ video=d["video"],
39
+ duration=d["duration"],
40
+ sampled_fps=d["sampled_fps"],
41
+ states=[ScreenState(**s) for s in d["states"]],
42
+ )
43
+
44
+ def save(self, path) -> None:
45
+ Path(path).write_text(json.dumps(self.to_dict(), indent=2), encoding="utf-8")
46
+
47
+ @classmethod
48
+ def load(cls, path) -> "ScreenIndex":
49
+ return cls.from_dict(json.loads(Path(path).read_text(encoding="utf-8")))
@@ -0,0 +1,63 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from dataclasses import dataclass, asdict, field
5
+ from pathlib import Path
6
+
7
+
8
+ @dataclass
9
+ class FrameRecord:
10
+ idx: int
11
+ t: float
12
+ score: float
13
+ event: bool
14
+ ascii: str
15
+ png: str
16
+
17
+
18
+ @dataclass
19
+ class EventRecord:
20
+ t_start: float
21
+ t_end: float
22
+ peak_frame: int
23
+ peak_score: float
24
+ type: str = "motion"
25
+ confidence: float = 0.0
26
+
27
+
28
+ @dataclass
29
+ class Manifest:
30
+ video: str
31
+ duration: float
32
+ sampled_fps: float
33
+ cols: int
34
+ frames: list = field(default_factory=list)
35
+ events: list = field(default_factory=list)
36
+
37
+ def to_dict(self) -> dict:
38
+ return {
39
+ "video": self.video,
40
+ "duration": self.duration,
41
+ "sampled_fps": self.sampled_fps,
42
+ "cols": self.cols,
43
+ "frames": [asdict(f) for f in self.frames],
44
+ "events": [asdict(e) for e in self.events],
45
+ }
46
+
47
+ @classmethod
48
+ def from_dict(cls, d: dict) -> "Manifest":
49
+ return cls(
50
+ video=d["video"],
51
+ duration=d["duration"],
52
+ sampled_fps=d["sampled_fps"],
53
+ cols=d["cols"],
54
+ frames=[FrameRecord(**f) for f in d["frames"]],
55
+ events=[EventRecord(**e) for e in d["events"]],
56
+ )
57
+
58
+ def save(self, path) -> None:
59
+ Path(path).write_text(json.dumps(self.to_dict(), indent=2), encoding="utf-8")
60
+
61
+ @classmethod
62
+ def load(cls, path) -> "Manifest":
63
+ return cls.from_dict(json.loads(Path(path).read_text(encoding="utf-8")))
screex/core/mapper.py ADDED
@@ -0,0 +1,39 @@
1
+ from __future__ import annotations
2
+
3
+ import numpy as np
4
+
5
+ RAMP = " .:-=+*#%@"
6
+
7
+
8
+ def gray_to_ascii(gray: np.ndarray, ramp: str = RAMP) -> str:
9
+ arr = np.asarray(gray, dtype=np.uint16)
10
+ n = len(ramp) - 1
11
+ idx = (arr * n // 255).astype(np.intp)
12
+ lut = np.array(list(ramp))
13
+ chars = lut[idx]
14
+ return "\n".join("".join(row) for row in chars)
15
+
16
+
17
+ def auto_rows(width: int, height: int, cols: int, char_aspect: float = 2.0) -> int:
18
+ return max(1, round(cols * (height / width) / char_aspect))
19
+
20
+
21
+ def edge_magnitude(gray: np.ndarray) -> np.ndarray:
22
+ g = np.asarray(gray, dtype=np.float32)
23
+ gy, gx = np.gradient(g)
24
+ mag = np.hypot(gx, gy)
25
+ peak = float(mag.max())
26
+ if peak > 0:
27
+ mag = mag / peak * 255.0
28
+ return mag.astype(np.uint8)
29
+
30
+
31
+ def frame_to_ascii(gray: np.ndarray, cols: int, ramp: str = RAMP, edge: bool = False) -> str:
32
+ import cv2
33
+
34
+ h, w = gray.shape[:2]
35
+ rows = auto_rows(w, h, cols)
36
+ small = cv2.resize(gray, (cols, rows), interpolation=cv2.INTER_AREA)
37
+ if edge:
38
+ small = edge_magnitude(small)
39
+ return gray_to_ascii(small, ramp)
screex/core/ocr.py ADDED
@@ -0,0 +1,37 @@
1
+ from __future__ import annotations
2
+
3
+ _engine = None
4
+
5
+
6
+ def _get_engine():
7
+ global _engine
8
+ if _engine is None:
9
+ from rapidocr_onnxruntime import RapidOCR
10
+ _engine = RapidOCR()
11
+ return _engine
12
+
13
+
14
+ def extract_text(bgr) -> list:
15
+ """Return on-screen text lines from a BGR frame, in reading order (top->bottom, left->right)."""
16
+ engine = _get_engine()
17
+ result, _ = engine(bgr)
18
+ if not result:
19
+ return []
20
+
21
+ def sort_key(item):
22
+ box = item[0] # 4 corner points [[x,y],...]
23
+ ys = [p[1] for p in box]
24
+ xs = [p[0] for p in box]
25
+ return (round(min(ys) / 10.0), min(xs))
26
+
27
+ ordered = sorted(result, key=sort_key)
28
+ return [str(item[1]).strip() for item in ordered if str(item[1]).strip()]
29
+
30
+
31
+ def text_diff(prev_lines, cur_lines):
32
+ """Return (added, removed): lines in cur not in prev, and lines in prev not in cur."""
33
+ prev_set = set(prev_lines)
34
+ cur_set = set(cur_lines)
35
+ added = [line for line in cur_lines if line not in prev_set]
36
+ removed = [line for line in prev_lines if line not in cur_set]
37
+ return added, removed
screex/core/segment.py ADDED
@@ -0,0 +1,41 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+
5
+ from screex.core.analyzer import motion_score
6
+
7
+
8
+ @dataclass
9
+ class Segment:
10
+ idx: int
11
+ t_start: float
12
+ t_end: float
13
+ frame_bgr: object # numpy BGR array: the settled keyframe of this UI state
14
+
15
+
16
+ def segment_stream(frames, change_threshold: float = 0.04):
17
+ """Yield one Segment per UI state. A new state begins when frame-to-frame motion
18
+ crosses change_threshold; the representative keyframe is the last (settled) frame
19
+ of the state. Holds only the current state's last frame (memory-bounded)."""
20
+ import cv2
21
+
22
+ prev_gray = None
23
+ seg_idx = 0
24
+ cur_start_t = None
25
+ last_t = None
26
+ last_bgr = None
27
+
28
+ for idx, t, bgr in frames:
29
+ gray = cv2.cvtColor(bgr, cv2.COLOR_BGR2GRAY)
30
+ if prev_gray is None:
31
+ cur_start_t = t
32
+ elif motion_score(prev_gray, gray) >= change_threshold:
33
+ yield Segment(seg_idx, cur_start_t, last_t, last_bgr)
34
+ seg_idx += 1
35
+ cur_start_t = t
36
+ prev_gray = gray
37
+ last_t = t
38
+ last_bgr = bgr
39
+
40
+ if last_bgr is not None:
41
+ yield Segment(seg_idx, cur_start_t, last_t, last_bgr)
screex/core/source.py ADDED
@@ -0,0 +1,71 @@
1
+ from __future__ import annotations
2
+
3
+
4
+ def _open(path):
5
+ import cv2
6
+
7
+ cap = cv2.VideoCapture(str(path))
8
+ if not cap.isOpened():
9
+ raise FileNotFoundError(f"cannot open video: {path}")
10
+ return cap
11
+
12
+
13
+ def video_info(path: str) -> dict:
14
+ import cv2
15
+
16
+ cap = _open(path)
17
+ try:
18
+ fps = cap.get(cv2.CAP_PROP_FPS) or 0.0
19
+ count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT) or 0)
20
+ w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH) or 0)
21
+ h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT) or 0)
22
+ finally:
23
+ cap.release()
24
+ duration = count / fps if fps else 0.0
25
+ return {"fps": fps, "count": count, "width": w, "height": h, "duration": duration}
26
+
27
+
28
+ def iter_frames(path: str, sample_fps: float):
29
+ cap = _open(path)
30
+ import cv2
31
+
32
+ native = cap.get(cv2.CAP_PROP_FPS) or sample_fps or 1.0
33
+ step = max(1, round(native / sample_fps)) if sample_fps else 1
34
+ raw_idx = 0
35
+ out_idx = 0
36
+ try:
37
+ while True:
38
+ if not cap.grab():
39
+ break
40
+ if raw_idx % step == 0:
41
+ ok, frame = cap.retrieve()
42
+ if not ok:
43
+ break
44
+ yield out_idx, raw_idx / native, frame
45
+ out_idx += 1
46
+ raw_idx += 1
47
+ finally:
48
+ cap.release()
49
+
50
+
51
+ def capture_webcam(out_path: str, seconds: float, fps: float = 15.0, device: int = 0) -> str:
52
+ """Record a short clip from the default webcam into out_path. Manual/hardware path."""
53
+ import cv2
54
+
55
+ cap = cv2.VideoCapture(device)
56
+ if not cap.isOpened():
57
+ raise RuntimeError("cannot open webcam")
58
+ w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) or 640
59
+ h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) or 480
60
+ fourcc = cv2.VideoWriter_fourcc(*"mp4v")
61
+ writer = cv2.VideoWriter(str(out_path), fourcc, fps, (w, h))
62
+ try:
63
+ for _ in range(int(seconds * fps)):
64
+ ok, frame = cap.read()
65
+ if not ok:
66
+ break
67
+ writer.write(frame)
68
+ finally:
69
+ cap.release()
70
+ writer.release()
71
+ return str(out_path)
screex/skill.py ADDED
@@ -0,0 +1,24 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+
5
+
6
+ def skill_text() -> str:
7
+ """Return the bundled SKILL.md content (shipped as package data)."""
8
+ from importlib.resources import files
9
+
10
+ return (files("screex") / "SKILL.md").read_text(encoding="utf-8")
11
+
12
+
13
+ def default_skill_dir() -> Path:
14
+ """Default Claude Code skill directory for Screex (~/.claude/skills/screex)."""
15
+ return Path.home() / ".claude" / "skills" / "screex"
16
+
17
+
18
+ def install_skill(dest_dir=None) -> Path:
19
+ """Write the bundled SKILL.md into a skills directory; return the written file path."""
20
+ target_dir = Path(dest_dir) if dest_dir else default_skill_dir()
21
+ target_dir.mkdir(parents=True, exist_ok=True)
22
+ target = target_dir / "SKILL.md"
23
+ target.write_text(skill_text(), encoding="utf-8")
24
+ return target
@@ -0,0 +1,150 @@
1
+ Metadata-Version: 2.4
2
+ Name: screex
3
+ Version: 0.1.0
4
+ Summary: Screen-recording understanding: turn a screencast into a queryable index of UI states for transcripts, Q&A, and how-to / bug-report generation.
5
+ Author-email: Rushikesh Hiray <rhiray03@gmail.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/blueprintparadise/Screex
8
+ Project-URL: Repository, https://github.com/blueprintparadise/Screex
9
+ Project-URL: Issues, https://github.com/blueprintparadise/Screex/issues
10
+ Keywords: screen-recording,screencast,ocr,llm,claude,claude-skill,agents,video-understanding,ui-understanding
11
+ Classifier: License :: OSI Approved :: MIT License
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Topic :: Multimedia :: Video
14
+ Classifier: Topic :: Scientific/Engineering :: Image Recognition
15
+ Requires-Python: >=3.9
16
+ Description-Content-Type: text/markdown
17
+ License-File: LICENSE
18
+ Requires-Dist: opencv-python
19
+ Requires-Dist: numpy
20
+ Requires-Dist: rapidocr-onnxruntime
21
+ Provides-Extra: test
22
+ Requires-Dist: pytest; extra == "test"
23
+ Dynamic: license-file
24
+
25
+ # Screex
26
+
27
+ **Screen-recording understanding for agents.** Screex turns a screencast into a queryable
28
+ **index** of UI states — each with the on-screen text (OCR), what text changed since the
29
+ previous state, a thumbnail, and a full-resolution keyframe — so an LLM/agent can produce an
30
+ action transcript, answer questions, or generate a how-to guide / bug report from a recording.
31
+
32
+ - **Training-free & model-agnostic** — no fine-tuned UI model; any LLM can read the index.
33
+ - **`pip install`-only** — OCR via [`rapidocr-onnxruntime`](https://pypi.org/project/rapidocr-onnxruntime/), no system binaries.
34
+ - **Cheap by design** — the on-screen text is plain text (nearly free to read); full-res
35
+ keyframes are escalated to only when the text is insufficient.
36
+
37
+ ---
38
+
39
+ ## Install
40
+
41
+ ### From PyPI
42
+ ```bash
43
+ pip install screex
44
+ ```
45
+
46
+ ### From source
47
+ ```bash
48
+ git clone https://github.com/blueprintparadise/Screex.git
49
+ cd Screex
50
+ pip install -e . # add ".[test]" to also install pytest
51
+ ```
52
+
53
+ Both give you a `screex` command (entry point `screex.cli:main`). Requires Python ≥ 3.9.
54
+ First run downloads the small RapidOCR ONNX models automatically.
55
+
56
+ ---
57
+
58
+ ## Quickstart (CLI)
59
+
60
+ ```bash
61
+ # Build the index for a screen recording
62
+ screex index path/to/recording.mp4 --fps 2
63
+ # (or, without installing the package:)
64
+ python -m screex.cli index path/to/recording.mp4 --fps 2
65
+ ```
66
+
67
+ This writes:
68
+ ```
69
+ path/to/recording.screex/
70
+ index.json # the ScreenIndex (ordered UI states)
71
+ frames/00000.png # full-res keyframe per state
72
+ frames/00000_thumb.png# thumbnail per state
73
+ ...
74
+ ```
75
+
76
+ ### `index` options
77
+ | Flag | Default | Meaning |
78
+ |------|---------|---------|
79
+ | `--fps` | `2` | frames sampled per second (raise for fast-moving recordings) |
80
+ | `--change-threshold` | `0.04` | visual-change fraction (0–1) that starts a new UI state — lower = more states, higher = fewer |
81
+ | `--thumb-width` | `320` | thumbnail width in px |
82
+ | `--out` | `<recording>.screex` | output directory |
83
+
84
+ ### What `index.json` contains
85
+ An ordered list of `states`, each with:
86
+ `t_start` / `t_end`, `ocr_text` (on-screen text lines), `text_added` / `text_removed`
87
+ (text that appeared/disappeared vs the previous state — the strongest signal of what the user
88
+ did), and `thumbnail` / `keyframe` paths.
89
+
90
+ ---
91
+
92
+ ## Use as a Claude skill
93
+
94
+ Screex ships a `SKILL.md` that teaches Claude to build the index and turn it into one of three
95
+ views: an **action transcript**, **Q&A** over the recording, or a **how-to / bug report**.
96
+
97
+ 1. **Install the package** so `python -m screex.cli` is available in the environment Claude
98
+ uses (`pip install -e .`).
99
+ 2. **Install the skill** — the package bundles `SKILL.md`, so one command installs it where
100
+ Claude Code discovers skills:
101
+ ```bash
102
+ screex skill --install # ~/.claude/skills/screex/
103
+ screex skill --install --dir <project>/.claude/skills/screex # per-project
104
+ screex skill --path # just print the target path
105
+ ```
106
+ 3. **Use it** — in Claude Code, just ask in natural language, e.g.:
107
+ - *"Use screex to turn `~/Downloads/bug-repro.mp4` into a bug report."*
108
+ - *"What steps does this screen recording show?"*
109
+ - *"From this demo, write a how-to doc."*
110
+
111
+ Claude runs `screex index`, reads `index.json`, skims the on-screen text across states, and
112
+ escalates to a full-res keyframe only when the text isn't enough — then produces the
113
+ transcript / answer / document.
114
+
115
+ > The skill is model-agnostic: the same `index.json` can be read by any LLM/agent, not only
116
+ > Claude.
117
+
118
+ ---
119
+
120
+ ## How it works
121
+
122
+ ```
123
+ recording → sample frames → segment into UI states → per state: OCR text + text-diff
124
+ → write thumbnail + full-res keyframe → index.json
125
+
126
+ views (agent-driven): transcript · Q&A · how-to / bug report
127
+ ```
128
+
129
+ `screex/core/`:
130
+ - `source` — decode & sample frames (OpenCV)
131
+ - `segment` — group frames into settled UI states by visual change
132
+ - `ocr` — RapidOCR text extraction + text-diff between states
133
+ - `index` — the `ScreenState` / `ScreenIndex` schema (JSON)
134
+
135
+ `screex/cli.py` wires them into the `screex index` command.
136
+
137
+ ---
138
+
139
+ ## Development
140
+
141
+ ```bash
142
+ pip install -e ".[test]"
143
+ python -m pytest -q
144
+ ```
145
+
146
+ ---
147
+
148
+ ## License
149
+
150
+ [MIT](LICENSE) © 2026 Rushikesh Hiray
@@ -0,0 +1,18 @@
1
+ screex/SKILL.md,sha256=vQainJuBGt1U8xLaNF530fOf3mpmskkgKQQ0eqU2AdA,2464
2
+ screex/__init__.py,sha256=kUR5RAFc7HCeiqdlX36dZOHkUI5wI6V_43RpEcD8b-0,22
3
+ screex/cli.py,sha256=OOjTAwVQhD58aLlHZMrwPjoX6fzHI4ztvzw950LScZI,6765
4
+ screex/skill.py,sha256=sp_dgWDMxfLQHdFq42IA0xXN7Rq_sAycSiQhfzil7RI,814
5
+ screex/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ screex/core/analyzer.py,sha256=SFTZqgvc22kMGi2XzuWUeGqcHgUi-F8krbtp5swmsQA,2064
7
+ screex/core/index.py,sha256=rb17ooCSLTGe1GhbAXPe3in3gHj6yRP_nXN65vVKBvA,1290
8
+ screex/core/manifest.py,sha256=577LQKggjVZ9E05tucz1x5TwgJnrHGgZuCdkFjcm1xw,1528
9
+ screex/core/mapper.py,sha256=5sJxZx2KLIA5lrQ_eyCPw4YHtjMnwYw-0Q2IEVI4Its,1098
10
+ screex/core/ocr.py,sha256=F-n7fLAE9kmh-t4Y-cgx299hiWGTIZ4J12ICkb37vEc,1102
11
+ screex/core/segment.py,sha256=Wrf9j6IrbQNrq6wwJvcytSzc0s9PsCgU_KFACF9PrFY,1187
12
+ screex/core/source.py,sha256=4pMBZwnb9tXqVNSsOczfi5Xv-bjrHCmaFwdQ80Te8fQ,2068
13
+ screex-0.1.0.dist-info/licenses/LICENSE,sha256=MdvXhyva5tRpxRdCOqEVZeLX5-TsdhfRyhJjHwr-nwQ,1072
14
+ screex-0.1.0.dist-info/METADATA,sha256=dx6F_5fsAwflitY34KPL7thIo-vH5wYwgW1k5o0oPwc,5573
15
+ screex-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
16
+ screex-0.1.0.dist-info/entry_points.txt,sha256=6JxbGLdyll0xXosUweomxCp2fhJgwQf5qL8N5_nzcxg,43
17
+ screex-0.1.0.dist-info/top_level.txt,sha256=VC_6j_HPuiRzlDg30hmAbnIsk99zAiR5sZDSztde1ew,7
18
+ screex-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ screex = screex.cli:main
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Rushikesh Hiray
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ screex