cshortranslate 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,45 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - "v*"
7
+ workflow_dispatch:
8
+
9
+ jobs:
10
+ build:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - uses: actions/checkout@v4
14
+
15
+ - name: Set up Python
16
+ uses: actions/setup-python@v5
17
+ with:
18
+ python-version: "3.12"
19
+
20
+ - name: Install build tools
21
+ run: pip install build
22
+
23
+ - name: Build package
24
+ run: python -m build
25
+
26
+ - name: Upload dist artifacts
27
+ uses: actions/upload-artifact@v4
28
+ with:
29
+ name: dist
30
+ path: dist/
31
+
32
+ publish:
33
+ needs: build
34
+ runs-on: ubuntu-latest
35
+ permissions:
36
+ id-token: write
37
+ steps:
38
+ - name: Download dist artifacts
39
+ uses: actions/download-artifact@v4
40
+ with:
41
+ name: dist
42
+ path: dist/
43
+
44
+ - name: Publish to PyPI
45
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,21 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ *$py.class
4
+ *.egg-info/
5
+ dist/
6
+ build/
7
+ *.egg
8
+ .eggs/
9
+
10
+ *.mp4
11
+ *.avi
12
+ *.mkv
13
+ *.mov
14
+ *.srt
15
+ *.ass
16
+ *.sub
17
+
18
+ .env
19
+ .venv
20
+ venv/
21
+ env/
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Your Name
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,91 @@
1
+ Metadata-Version: 2.4
2
+ Name: cshortranslate
3
+ Version: 0.1.0
4
+ Summary: OCR-based subtitle extraction from video files
5
+ Project-URL: Homepage, https://github.com/yourusername/cshortranslate
6
+ Project-URL: Issues, https://github.com/yourusername/cshortranslate/issues
7
+ Author-email: Your Name <you@example.com>
8
+ License-Expression: MIT
9
+ License-File: LICENSE
10
+ Keywords: chinese,ocr,srt,subtitle,video
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Operating System :: OS Independent
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.9
17
+ Classifier: Programming Language :: Python :: 3.10
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Programming Language :: Python :: 3.13
21
+ Classifier: Topic :: Multimedia :: Sound/Audio
22
+ Classifier: Topic :: Multimedia :: Video
23
+ Requires-Python: >=3.9
24
+ Requires-Dist: numpy
25
+ Requires-Dist: opencv-python
26
+ Requires-Dist: rapidocr-onnxruntime
27
+ Requires-Dist: tqdm
28
+ Provides-Extra: all
29
+ Requires-Dist: av; extra == 'all'
30
+ Requires-Dist: degirum-video-capture; extra == 'all'
31
+ Requires-Dist: ffmpegcv; extra == 'all'
32
+ Provides-Extra: degirum
33
+ Requires-Dist: degirum-video-capture; extra == 'degirum'
34
+ Provides-Extra: ffmpegcv
35
+ Requires-Dist: ffmpegcv; extra == 'ffmpegcv'
36
+ Provides-Extra: pyav
37
+ Requires-Dist: av; extra == 'pyav'
38
+ Description-Content-Type: text/markdown
39
+
40
+ # cshortranslate
41
+
42
+ OCR-based subtitle extraction from video files. Designed for extracting Chinese subtitles from long drama videos.
43
+
44
+ ## Installation
45
+
46
+ ```bash
47
+ pip install cshortranslate
48
+ ```
49
+
50
+ For optional video backends:
51
+
52
+ ```bash
53
+ pip install cshortranslate[pyav] # PyAV backend
54
+ pip install cshortranslate[ffmpegcv] # ffmpegcv backend
55
+ pip install cshortranslate[all] # All backends
56
+ ```
57
+
58
+ ## Usage
59
+
60
+ ```bash
61
+ cshortranslate video.mp4
62
+ cshortranslate video.mp4 -o output.srt
63
+ cshortranslate video.mp4 --fps 15 --region "bottom:20"
64
+ cshortranslate video.mp4 --preview
65
+ ```
66
+
67
+ ### Options
68
+
69
+ | Option | Description |
70
+ |---|---|
71
+ | `-o, --output` | Output SRT path (default: `<video>.srt`) |
72
+ | `--fps` | Frame extraction rate (default: 10) |
73
+ | `--region` | Subtitle region: `"bottom:15"` for bottom 15%, or `"x,y,w,h"` in pixels |
74
+ | `--preview` | Show first frame with crop overlay for region selection |
75
+ | `--backend` | Video backend: `opencv`, `pyav`, `ffmpegcv`, `degirum` |
76
+ | `--batch-size` | Frames to OCR in parallel (default: 1) |
77
+ | `--num-workers` | OCR worker threads (default: 1) |
78
+ | `--min-chars` | Minimum characters to keep (default: 2) |
79
+ | `--min-dur` | Minimum duration in seconds (default: 0.3) |
80
+
81
+ ## Requirements
82
+
83
+ - Python 3.9+
84
+ - `rapidocr-onnxruntime`
85
+ - `opencv-python`
86
+ - `tqdm`
87
+ - `numpy`
88
+
89
+ ## License
90
+
91
+ MIT
@@ -0,0 +1,52 @@
1
+ # cshortranslate
2
+
3
+ OCR-based subtitle extraction from video files. Designed for extracting Chinese subtitles from long drama videos.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ pip install cshortranslate
9
+ ```
10
+
11
+ For optional video backends:
12
+
13
+ ```bash
14
+ pip install cshortranslate[pyav] # PyAV backend
15
+ pip install cshortranslate[ffmpegcv] # ffmpegcv backend
16
+ pip install cshortranslate[all] # All backends
17
+ ```
18
+
19
+ ## Usage
20
+
21
+ ```bash
22
+ cshortranslate video.mp4
23
+ cshortranslate video.mp4 -o output.srt
24
+ cshortranslate video.mp4 --fps 15 --region "bottom:20"
25
+ cshortranslate video.mp4 --preview
26
+ ```
27
+
28
+ ### Options
29
+
30
+ | Option | Description |
31
+ |---|---|
32
+ | `-o, --output` | Output SRT path (default: `<video>.srt`) |
33
+ | `--fps` | Frame extraction rate (default: 10) |
34
+ | `--region` | Subtitle region: `"bottom:15"` for bottom 15%, or `"x,y,w,h"` in pixels |
35
+ | `--preview` | Show first frame with crop overlay for region selection |
36
+ | `--backend` | Video backend: `opencv`, `pyav`, `ffmpegcv`, `degirum` |
37
+ | `--batch-size` | Frames to OCR in parallel (default: 1) |
38
+ | `--num-workers` | OCR worker threads (default: 1) |
39
+ | `--min-chars` | Minimum characters to keep (default: 2) |
40
+ | `--min-dur` | Minimum duration in seconds (default: 0.3) |
41
+
42
+ ## Requirements
43
+
44
+ - Python 3.9+
45
+ - `rapidocr-onnxruntime`
46
+ - `opencv-python`
47
+ - `tqdm`
48
+ - `numpy`
49
+
50
+ ## License
51
+
52
+ MIT
@@ -0,0 +1,3 @@
1
+ """OCR-based subtitle extraction from video files."""
2
+
3
+ __version__ = "0.1.0"
@@ -0,0 +1,158 @@
1
+ import argparse
2
+ import os
3
+ import sys
4
+
5
+ import cv2
6
+ from tqdm import tqdm
7
+
8
+ from cshortranslate.ocr_engine import OCREngine
9
+ from cshortranslate.video_processor import VideoProcessor
10
+ from cshortranslate.subtitle_generator import deduplicate, filter_entries, format_srt
11
+
12
+
13
+ def parse_args():
14
+ parser = argparse.ArgumentParser(
15
+ description="OCR Chinese subtitles from long drama videos"
16
+ )
17
+ parser.add_argument("video", help="Path to the video file")
18
+ parser.add_argument("-o", "--output", help="Output SRT path (default: <video>.srt)")
19
+ parser.add_argument("--fps", type=int, default=10, help="Frame extraction rate (default: 10)")
20
+ parser.add_argument(
21
+ "--region",
22
+ default=None,
23
+ help='Subtitle region: "bottom:15" for bottom 15%%, or "x,y,w,h" in pixels',
24
+ )
25
+ parser.add_argument(
26
+ "--preview", action="store_true", help="Show first frame with crop overlay"
27
+ )
28
+ parser.add_argument("--lang", default="ch", help="OCR language (default: ch)")
29
+ parser.add_argument("--min-chars", type=int, default=2, help="Min characters to keep (default: 2)")
30
+ parser.add_argument("--min-dur", type=float, default=0.3, help="Min duration in seconds (default: 0.3)")
31
+ parser.add_argument(
32
+ "--backend",
33
+ choices=["opencv", "pyav", "ffmpegcv", "degirum"],
34
+ default="opencv",
35
+ help="Video decoding backend (default: opencv)",
36
+ )
37
+ parser.add_argument("--batch-size", type=int, default=1, help="Number of frames to OCR in parallel (default: 1)")
38
+ parser.add_argument("--num-workers", type=int, default=1, help="OCR worker threads (default: 1)")
39
+ return parser.parse_args()
40
+
41
+
42
+ def show_preview(video):
43
+ frame = video.get_preview_frame()
44
+ if frame is None:
45
+ print("Error: Could not read preview frame")
46
+ sys.exit(1)
47
+
48
+ region = None
49
+
50
+ def mouse_cb(event, x, y, flags, param):
51
+ nonlocal region
52
+ if event == cv2.EVENT_LBUTTONDOWN:
53
+ region = (x, y)
54
+
55
+ cv2.namedWindow("Preview - Press ENTER to confirm, ESC to cancel", cv2.WINDOW_NORMAL)
56
+ cv2.setMouseCallback("Preview - Press ENTER to confirm, ESC to cancel", mouse_cb)
57
+
58
+ clone = frame.copy()
59
+ print("Click on the TOP-LEFT corner of the subtitle region, then press ENTER")
60
+
61
+ while True:
62
+ display = clone.copy()
63
+ if region:
64
+ h, w = frame.shape[:2]
65
+ cv2.rectangle(display, (region[0], region[1]), (w, h), (0, 0, 255), 2)
66
+ cv2.imshow("Preview - Press ENTER to confirm, ESC to cancel", display)
67
+ key = cv2.waitKey(30) & 0xFF
68
+ if key == 13: # ENTER
69
+ break
70
+ elif key == 27: # ESC
71
+ region = None
72
+ break
73
+
74
+ cv2.destroyAllWindows()
75
+
76
+ if region:
77
+ h, w = frame.shape[:2]
78
+ return f"{region[0]},{region[1]},{w - region[0]},{h - region[1]}"
79
+ return None
80
+
81
+
82
+ def main():
83
+ args = parse_args()
84
+
85
+ if not os.path.isfile(args.video):
86
+ print(f"Error: File not found: {args.video}")
87
+ sys.exit(1)
88
+
89
+ output_path = args.output or os.path.splitext(args.video)[0] + ".srt"
90
+
91
+ video = VideoProcessor(args.video, fps=args.fps, backend=args.backend)
92
+ print(f"Video: {args.video}")
93
+ print(f"Resolution: {video.width}x{video.height}")
94
+ print(f"Duration: {video.duration:.1f}s | FPS: {video.video_fps:.1f}")
95
+ print(f"Extracting at {args.fps} FPS -> {video.get_total_snapshots()} frames")
96
+
97
+ region = args.region
98
+ if args.preview:
99
+ region = show_preview(video)
100
+ if region is None:
101
+ print("No region selected. Using full frame.")
102
+ else:
103
+ print(f"Selected region: {region}")
104
+
105
+ print(f"Subtitle region: {region or 'full frame (not recommended)'}")
106
+ print("Running OCR...")
107
+
108
+ ocr = OCREngine(num_workers=args.num_workers)
109
+ entries = []
110
+
111
+ try:
112
+ frame_duration = 1.0 / video.video_fps
113
+ batch_size = args.batch_size
114
+ batch_images = []
115
+ batch_timestamps = []
116
+
117
+ for frame, timestamp in tqdm(video.iter_frames(), total=video.get_total_snapshots(), unit="frame"):
118
+ cropped = video.crop_subtitle_region(frame, region)
119
+ batch_images.append(cropped)
120
+ batch_timestamps.append(timestamp)
121
+
122
+ if len(batch_images) >= batch_size:
123
+ texts = ocr.run_batch(batch_images)
124
+ for text, ts in zip(texts, batch_timestamps):
125
+ if text:
126
+ entries.append({"text": text, "start": ts, "end": ts + frame_duration})
127
+ batch_images.clear()
128
+ batch_timestamps.clear()
129
+
130
+ if batch_images:
131
+ texts = ocr.run_batch(batch_images)
132
+ for text, ts in zip(texts, batch_timestamps):
133
+ if text:
134
+ entries.append({"text": text, "start": ts, "end": ts + frame_duration})
135
+ except KeyboardInterrupt:
136
+ print("\nInterrupted. Saving partial results...")
137
+ finally:
138
+ video.release()
139
+
140
+ if not entries:
141
+ print("No subtitles detected.")
142
+ sys.exit(0)
143
+
144
+ print(f"Deduplicating {len(entries)} raw entries...")
145
+ merged = deduplicate(entries)
146
+ print(f"Merged to {len(merged)} subtitle blocks")
147
+ filtered = filter_entries(merged, min_chars=args.min_chars, min_dur=args.min_dur)
148
+ print(f"After filtering: {len(filtered)} entries")
149
+
150
+ srt = format_srt(merged)
151
+ with open(output_path, "w", encoding="utf-8") as f:
152
+ f.write(srt)
153
+
154
+ print(f"Saved: {output_path}")
155
+
156
+
157
+ if __name__ == "__main__":
158
+ main()
@@ -0,0 +1,38 @@
1
+ from concurrent.futures import ThreadPoolExecutor, as_completed
2
+
3
+ from rapidocr_onnxruntime import RapidOCR
4
+
5
+
6
+ class OCREngine:
7
+ def __init__(self, num_workers=1):
8
+ self.num_workers = num_workers
9
+ self._engines = [RapidOCR() for _ in range(num_workers)]
10
+
11
+ def run(self, image):
12
+ result, _ = self._engines[0](image)
13
+ if not result:
14
+ return ""
15
+ texts = [item[1] for item in result]
16
+ return "\n".join(texts)
17
+
18
+ def run_batch(self, images):
19
+ if self.num_workers == 1:
20
+ return [self.run(img) for img in images]
21
+
22
+ results = [None] * len(images)
23
+ with ThreadPoolExecutor(max_workers=self.num_workers) as executor:
24
+ future_to_idx = {
25
+ executor.submit(self._ocr_single, idx, img): idx
26
+ for idx, img in enumerate(images)
27
+ }
28
+ for future in as_completed(future_to_idx):
29
+ idx, text = future.result()
30
+ results[idx] = text
31
+ return results
32
+
33
+ def _ocr_single(self, idx, image):
34
+ result, _ = self._engines[idx % self.num_workers](image)
35
+ if not result:
36
+ return idx, ""
37
+ texts = [item[1] for item in result]
38
+ return idx, "\n".join(texts)
@@ -0,0 +1,68 @@
1
+ import re
2
+ from difflib import SequenceMatcher
3
+
4
+ TIMESTAMP_RE = re.compile(
5
+ r"^\s*\d{1,2}\s*:\s*\d{2}\s*/\s*\d{1,2}\s*:\s*\d{2}\s*$"
6
+ r"|^\s*\d{1,2}\s*:\s*\d{2}\s*:\s*\d{2}\s*/\s*\d{1,2}\s*:\s*\d{2}\s*:\s*\d{2}\s*$"
7
+ r"|^\s*\d{1,2}\s*:\s*\d{2}\s*/\s*\d{2}\s*$"
8
+ )
9
+
10
+
11
+ def is_noise(text):
12
+ return bool(TIMESTAMP_RE.match(text.strip()))
13
+
14
+
15
+ def filter_entries(entries, min_chars=2, min_dur=0.3):
16
+ result = []
17
+ for e in entries:
18
+ text = e["text"].strip()
19
+ if not text:
20
+ continue
21
+ if is_noise(text):
22
+ continue
23
+ if len(text) < min_chars:
24
+ continue
25
+ if (e["end"] - e["start"]) < min_dur:
26
+ continue
27
+ result.append(e)
28
+ return result
29
+
30
+
31
+ def deduplicate(entries, threshold=0.85, max_gap=2.0):
32
+ if not entries:
33
+ return []
34
+ merged = [entries[0]]
35
+ for entry in entries[1:]:
36
+ prev = merged[-1]
37
+ cur_text = entry["text"]
38
+ if not cur_text:
39
+ continue
40
+ gap = entry["start"] - prev["end"]
41
+ if gap > max_gap:
42
+ merged.append(entry)
43
+ continue
44
+ similarity = SequenceMatcher(None, prev["text"], cur_text).ratio()
45
+ if similarity >= threshold:
46
+ prev["end"] = entry["end"]
47
+ prev["text"] = prev["text"] if len(prev["text"]) >= len(cur_text) else cur_text
48
+ else:
49
+ merged.append(entry)
50
+ return merged
51
+
52
+
53
+ def format_srt(entries):
54
+ lines = []
55
+ for i, entry in enumerate(entries, 1):
56
+ lines.append(str(i))
57
+ lines.append(f"{_ts(entry['start'])} --> {_ts(entry['end'])}")
58
+ lines.append(entry["text"])
59
+ lines.append("")
60
+ return "\n".join(lines)
61
+
62
+
63
+ def _ts(seconds):
64
+ h = int(seconds // 3600)
65
+ m = int((seconds % 3600) // 60)
66
+ s = int(seconds % 60)
67
+ ms = int((seconds - int(seconds)) * 1000)
68
+ return f"{h:02d}:{m:02d}:{s:02d},{ms:03d}"
@@ -0,0 +1,149 @@
1
+ import cv2
2
+ import numpy as np
3
+
4
+
5
+ class VideoProcessor:
6
+ def __init__(self, video_path, fps=10, backend="opencv"):
7
+ self.video_path = video_path
8
+ self.fps = fps
9
+ self.backend = backend
10
+ self._backend_obj = None
11
+
12
+ if backend == "pyav":
13
+ self._init_pyav()
14
+ elif backend == "ffmpegcv":
15
+ self._init_ffmpegcv()
16
+ elif backend == "degirum":
17
+ self._init_degirum()
18
+ else:
19
+ self._init_opencv()
20
+
21
+ def _init_opencv(self):
22
+ self.cap = cv2.VideoCapture(self.video_path)
23
+ self.video_fps = self.cap.get(cv2.CAP_PROP_FPS)
24
+ self.total_frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
25
+ self.width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
26
+ self.height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
27
+ self.duration = self.total_frames / self.video_fps
28
+ self._step = self.video_fps / self.fps
29
+
30
+ def _init_pyav(self):
31
+ import av
32
+
33
+ self._av_container = av.open(self.video_path)
34
+ stream = self._av_container.streams.video[0]
35
+ self.video_fps = float(stream.average_rate or stream.rate)
36
+ self.total_frames = stream.frames or int(stream.duration * stream.time_base * self.video_fps)
37
+ self.width = stream.codec_context.width
38
+ self.height = stream.codec_context.height
39
+ self.duration = self.total_frames / self.video_fps
40
+ self._step = self.video_fps / self.fps
41
+ self._av_stream = stream
42
+
43
+ def _init_ffmpegcv(self):
44
+ import ffmpegcv
45
+
46
+ self.cap = ffmpegcv.VideoCapture(self.video_path)
47
+ self.video_fps = self.cap.fps
48
+ self.total_frames = self.cap.framecount
49
+ self.width = self.cap.width
50
+ self.height = self.cap.height
51
+ self.duration = self.total_frames / self.video_fps
52
+ self._step = self.video_fps / self.fps
53
+
54
+ def _init_degirum(self):
55
+ import degirum_video_capture
56
+
57
+ self.cap = degirum_video_capture.VideoCapture(self.video_path)
58
+ self.video_fps = self.cap.get(cv2.CAP_PROP_FPS)
59
+ self.total_frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
60
+ self.width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
61
+ self.height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
62
+ self.duration = self.total_frames / self.video_fps
63
+ self._step = self.video_fps / self.fps
64
+
65
+ def get_preview_frame(self):
66
+ if self.backend == "pyav":
67
+ container = self._av_container
68
+ stream = self._av_stream
69
+ container.seek(0, stream=stream)
70
+ for frame in container.decode(video=0):
71
+ return frame.to_ndarray(format="bgr24")
72
+ return None
73
+ else:
74
+ self.cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
75
+ ret, frame = self.cap.read()
76
+ return frame if ret else None
77
+
78
+ def crop_subtitle_region(self, frame, region):
79
+ if region is None:
80
+ return frame
81
+ h, w = frame.shape[:2]
82
+ if isinstance(region, str) and region.startswith("bottom:"):
83
+ pct = int(region.split(":")[1])
84
+ y_start = int(h * (100 - pct) / 100)
85
+ return frame[y_start:, :]
86
+ parts = [int(p) for p in region.split(",")]
87
+ x, y, rw, rh = parts
88
+ return frame[y : y + rh, x : x + rw]
89
+
90
+ def iter_frames(self):
91
+ if self.backend == "pyav":
92
+ yield from self._iter_pyav()
93
+ elif self.backend == "ffmpegcv":
94
+ yield from self._iter_opencv_compat()
95
+ elif self.backend == "degirum":
96
+ yield from self._iter_opencv_compat()
97
+ else:
98
+ yield from self._iter_opencv()
99
+
100
+ def _iter_opencv(self):
101
+ self.cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
102
+ frame_idx = 0
103
+ next_yield = 0.0
104
+ while True:
105
+ ret, frame = self.cap.read()
106
+ if not ret:
107
+ break
108
+ if frame_idx >= next_yield:
109
+ yield frame, frame_idx / self.video_fps
110
+ next_yield += self._step
111
+ frame_idx += 1
112
+
113
+ def _iter_pyav(self):
114
+ import av
115
+
116
+ container = self._av_container
117
+ stream = self._av_stream
118
+ container.seek(0, stream=stream)
119
+
120
+ frame_idx = 0
121
+ next_yield = 0.0
122
+ for frame in container.decode(video=0):
123
+ if frame_idx >= next_yield:
124
+ ndarray = frame.to_ndarray(format="bgr24")
125
+ yield ndarray, frame_idx / self.video_fps
126
+ next_yield += self._step
127
+ frame_idx += 1
128
+
129
+ def _iter_opencv_compat(self):
130
+ self.cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
131
+ frame_idx = 0
132
+ next_yield = 0.0
133
+ while True:
134
+ ret, frame = self.cap.read()
135
+ if not ret:
136
+ break
137
+ if frame_idx >= next_yield:
138
+ yield frame, frame_idx / self.video_fps
139
+ next_yield += self._step
140
+ frame_idx += 1
141
+
142
+ def get_total_snapshots(self):
143
+ return int(self.total_frames / self._step)
144
+
145
+ def release(self):
146
+ if self.backend == "pyav":
147
+ self._av_container.close()
148
+ elif hasattr(self, "cap"):
149
+ self.cap.release()
@@ -0,0 +1,48 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "cshortranslate"
7
+ version = "0.1.0"
8
+ description = "OCR-based subtitle extraction from video files"
9
+ readme = "README.md"
10
+ license = "MIT"
11
+ requires-python = ">=3.9"
12
+ authors = [
13
+ { name = "Your Name", email = "you@example.com" },
14
+ ]
15
+ keywords = ["ocr", "subtitle", "srt", "video", "chinese"]
16
+ classifiers = [
17
+ "Development Status :: 3 - Alpha",
18
+ "Intended Audience :: Developers",
19
+ "License :: OSI Approved :: MIT License",
20
+ "Operating System :: OS Independent",
21
+ "Programming Language :: Python :: 3",
22
+ "Programming Language :: Python :: 3.9",
23
+ "Programming Language :: Python :: 3.10",
24
+ "Programming Language :: Python :: 3.11",
25
+ "Programming Language :: Python :: 3.12",
26
+ "Programming Language :: Python :: 3.13",
27
+ "Topic :: Multimedia :: Video",
28
+ "Topic :: Multimedia :: Sound/Audio",
29
+ ]
30
+ dependencies = [
31
+ "rapidocr-onnxruntime",
32
+ "opencv-python",
33
+ "tqdm",
34
+ "numpy",
35
+ ]
36
+
37
+ [project.optional-dependencies]
38
+ pyav = ["av"]
39
+ ffmpegcv = ["ffmpegcv"]
40
+ degirum = ["degirum-video-capture"]
41
+ all = ["av", "ffmpegcv", "degirum-video-capture"]
42
+
43
+ [project.urls]
44
+ Homepage = "https://github.com/yourusername/cshortranslate"
45
+ Issues = "https://github.com/yourusername/cshortranslate/issues"
46
+
47
+ [project.scripts]
48
+ cshortranslate = "cshortranslate.cli:main"