cshortranslate 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cshortranslate-0.1.0/.github/workflows/publish.yml +45 -0
- cshortranslate-0.1.0/.gitignore +21 -0
- cshortranslate-0.1.0/LICENSE +21 -0
- cshortranslate-0.1.0/PKG-INFO +91 -0
- cshortranslate-0.1.0/README.md +52 -0
- cshortranslate-0.1.0/cshortranslate/__init__.py +3 -0
- cshortranslate-0.1.0/cshortranslate/cli.py +158 -0
- cshortranslate-0.1.0/cshortranslate/ocr_engine.py +38 -0
- cshortranslate-0.1.0/cshortranslate/subtitle_generator.py +68 -0
- cshortranslate-0.1.0/cshortranslate/video_processor.py +149 -0
- cshortranslate-0.1.0/pyproject.toml +48 -0
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- "v*"
|
|
7
|
+
workflow_dispatch:
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
build:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
steps:
|
|
13
|
+
- uses: actions/checkout@v4
|
|
14
|
+
|
|
15
|
+
- name: Set up Python
|
|
16
|
+
uses: actions/setup-python@v5
|
|
17
|
+
with:
|
|
18
|
+
python-version: "3.12"
|
|
19
|
+
|
|
20
|
+
- name: Install build tools
|
|
21
|
+
run: pip install build
|
|
22
|
+
|
|
23
|
+
- name: Build package
|
|
24
|
+
run: python -m build
|
|
25
|
+
|
|
26
|
+
- name: Upload dist artifacts
|
|
27
|
+
uses: actions/upload-artifact@v4
|
|
28
|
+
with:
|
|
29
|
+
name: dist
|
|
30
|
+
path: dist/
|
|
31
|
+
|
|
32
|
+
publish:
|
|
33
|
+
needs: build
|
|
34
|
+
runs-on: ubuntu-latest
|
|
35
|
+
permissions:
|
|
36
|
+
id-token: write
|
|
37
|
+
steps:
|
|
38
|
+
- name: Download dist artifacts
|
|
39
|
+
uses: actions/download-artifact@v4
|
|
40
|
+
with:
|
|
41
|
+
name: dist
|
|
42
|
+
path: dist/
|
|
43
|
+
|
|
44
|
+
- name: Publish to PyPI
|
|
45
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Your Name
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: cshortranslate
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: OCR-based subtitle extraction from video files
|
|
5
|
+
Project-URL: Homepage, https://github.com/yourusername/cshortranslate
|
|
6
|
+
Project-URL: Issues, https://github.com/yourusername/cshortranslate/issues
|
|
7
|
+
Author-email: Your Name <you@example.com>
|
|
8
|
+
License-Expression: MIT
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Keywords: chinese,ocr,srt,subtitle,video
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Operating System :: OS Independent
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
21
|
+
Classifier: Topic :: Multimedia :: Sound/Audio
|
|
22
|
+
Classifier: Topic :: Multimedia :: Video
|
|
23
|
+
Requires-Python: >=3.9
|
|
24
|
+
Requires-Dist: numpy
|
|
25
|
+
Requires-Dist: opencv-python
|
|
26
|
+
Requires-Dist: rapidocr-onnxruntime
|
|
27
|
+
Requires-Dist: tqdm
|
|
28
|
+
Provides-Extra: all
|
|
29
|
+
Requires-Dist: av; extra == 'all'
|
|
30
|
+
Requires-Dist: degirum-video-capture; extra == 'all'
|
|
31
|
+
Requires-Dist: ffmpegcv; extra == 'all'
|
|
32
|
+
Provides-Extra: degirum
|
|
33
|
+
Requires-Dist: degirum-video-capture; extra == 'degirum'
|
|
34
|
+
Provides-Extra: ffmpegcv
|
|
35
|
+
Requires-Dist: ffmpegcv; extra == 'ffmpegcv'
|
|
36
|
+
Provides-Extra: pyav
|
|
37
|
+
Requires-Dist: av; extra == 'pyav'
|
|
38
|
+
Description-Content-Type: text/markdown
|
|
39
|
+
|
|
40
|
+
# cshortranslate
|
|
41
|
+
|
|
42
|
+
OCR-based subtitle extraction from video files. Designed for extracting Chinese subtitles from long drama videos.
|
|
43
|
+
|
|
44
|
+
## Installation
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
pip install cshortranslate
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
For optional video backends:
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
pip install cshortranslate[pyav] # PyAV backend
|
|
54
|
+
pip install cshortranslate[ffmpegcv] # ffmpegcv backend
|
|
55
|
+
pip install cshortranslate[all] # All backends
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
## Usage
|
|
59
|
+
|
|
60
|
+
```bash
|
|
61
|
+
cshortranslate video.mp4
|
|
62
|
+
cshortranslate video.mp4 -o output.srt
|
|
63
|
+
cshortranslate video.mp4 --fps 15 --region "bottom:20"
|
|
64
|
+
cshortranslate video.mp4 --preview
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
### Options
|
|
68
|
+
|
|
69
|
+
| Option | Description |
|
|
70
|
+
|---|---|
|
|
71
|
+
| `-o, --output` | Output SRT path (default: `<video>.srt`) |
|
|
72
|
+
| `--fps` | Frame extraction rate (default: 10) |
|
|
73
|
+
| `--region` | Subtitle region: `"bottom:15"` for bottom 15%, or `"x,y,w,h"` in pixels |
|
|
74
|
+
| `--preview` | Show first frame with crop overlay for region selection |
|
|
75
|
+
| `--backend` | Video backend: `opencv`, `pyav`, `ffmpegcv`, `degirum` |
|
|
76
|
+
| `--batch-size` | Frames to OCR in parallel (default: 1) |
|
|
77
|
+
| `--num-workers` | OCR worker threads (default: 1) |
|
|
78
|
+
| `--min-chars` | Minimum characters to keep (default: 2) |
|
|
79
|
+
| `--min-dur` | Minimum duration in seconds (default: 0.3) |
|
|
80
|
+
|
|
81
|
+
## Requirements
|
|
82
|
+
|
|
83
|
+
- Python 3.9+
|
|
84
|
+
- `rapidocr-onnxruntime`
|
|
85
|
+
- `opencv-python`
|
|
86
|
+
- `tqdm`
|
|
87
|
+
- `numpy`
|
|
88
|
+
|
|
89
|
+
## License
|
|
90
|
+
|
|
91
|
+
MIT
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
# cshortranslate
|
|
2
|
+
|
|
3
|
+
OCR-based subtitle extraction from video files. Designed for extracting Chinese subtitles from long drama videos.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install cshortranslate
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
For optional video backends:
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
pip install cshortranslate[pyav] # PyAV backend
|
|
15
|
+
pip install cshortranslate[ffmpegcv] # ffmpegcv backend
|
|
16
|
+
pip install cshortranslate[all] # All backends
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
## Usage
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
cshortranslate video.mp4
|
|
23
|
+
cshortranslate video.mp4 -o output.srt
|
|
24
|
+
cshortranslate video.mp4 --fps 15 --region "bottom:20"
|
|
25
|
+
cshortranslate video.mp4 --preview
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
### Options
|
|
29
|
+
|
|
30
|
+
| Option | Description |
|
|
31
|
+
|---|---|
|
|
32
|
+
| `-o, --output` | Output SRT path (default: `<video>.srt`) |
|
|
33
|
+
| `--fps` | Frame extraction rate (default: 10) |
|
|
34
|
+
| `--region` | Subtitle region: `"bottom:15"` for bottom 15%, or `"x,y,w,h"` in pixels |
|
|
35
|
+
| `--preview` | Show first frame with crop overlay for region selection |
|
|
36
|
+
| `--backend` | Video backend: `opencv`, `pyav`, `ffmpegcv`, `degirum` |
|
|
37
|
+
| `--batch-size` | Frames to OCR in parallel (default: 1) |
|
|
38
|
+
| `--num-workers` | OCR worker threads (default: 1) |
|
|
39
|
+
| `--min-chars` | Minimum characters to keep (default: 2) |
|
|
40
|
+
| `--min-dur` | Minimum duration in seconds (default: 0.3) |
|
|
41
|
+
|
|
42
|
+
## Requirements
|
|
43
|
+
|
|
44
|
+
- Python 3.9+
|
|
45
|
+
- `rapidocr-onnxruntime`
|
|
46
|
+
- `opencv-python`
|
|
47
|
+
- `tqdm`
|
|
48
|
+
- `numpy`
|
|
49
|
+
|
|
50
|
+
## License
|
|
51
|
+
|
|
52
|
+
MIT
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import os
|
|
3
|
+
import sys
|
|
4
|
+
|
|
5
|
+
import cv2
|
|
6
|
+
from tqdm import tqdm
|
|
7
|
+
|
|
8
|
+
from cshortranslate.ocr_engine import OCREngine
|
|
9
|
+
from cshortranslate.video_processor import VideoProcessor
|
|
10
|
+
from cshortranslate.subtitle_generator import deduplicate, filter_entries, format_srt
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def parse_args():
|
|
14
|
+
parser = argparse.ArgumentParser(
|
|
15
|
+
description="OCR Chinese subtitles from long drama videos"
|
|
16
|
+
)
|
|
17
|
+
parser.add_argument("video", help="Path to the video file")
|
|
18
|
+
parser.add_argument("-o", "--output", help="Output SRT path (default: <video>.srt)")
|
|
19
|
+
parser.add_argument("--fps", type=int, default=10, help="Frame extraction rate (default: 10)")
|
|
20
|
+
parser.add_argument(
|
|
21
|
+
"--region",
|
|
22
|
+
default=None,
|
|
23
|
+
help='Subtitle region: "bottom:15" for bottom 15%%, or "x,y,w,h" in pixels',
|
|
24
|
+
)
|
|
25
|
+
parser.add_argument(
|
|
26
|
+
"--preview", action="store_true", help="Show first frame with crop overlay"
|
|
27
|
+
)
|
|
28
|
+
parser.add_argument("--lang", default="ch", help="OCR language (default: ch)")
|
|
29
|
+
parser.add_argument("--min-chars", type=int, default=2, help="Min characters to keep (default: 2)")
|
|
30
|
+
parser.add_argument("--min-dur", type=float, default=0.3, help="Min duration in seconds (default: 0.3)")
|
|
31
|
+
parser.add_argument(
|
|
32
|
+
"--backend",
|
|
33
|
+
choices=["opencv", "pyav", "ffmpegcv", "degirum"],
|
|
34
|
+
default="opencv",
|
|
35
|
+
help="Video decoding backend (default: opencv)",
|
|
36
|
+
)
|
|
37
|
+
parser.add_argument("--batch-size", type=int, default=1, help="Number of frames to OCR in parallel (default: 1)")
|
|
38
|
+
parser.add_argument("--num-workers", type=int, default=1, help="OCR worker threads (default: 1)")
|
|
39
|
+
return parser.parse_args()
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def show_preview(video):
|
|
43
|
+
frame = video.get_preview_frame()
|
|
44
|
+
if frame is None:
|
|
45
|
+
print("Error: Could not read preview frame")
|
|
46
|
+
sys.exit(1)
|
|
47
|
+
|
|
48
|
+
region = None
|
|
49
|
+
|
|
50
|
+
def mouse_cb(event, x, y, flags, param):
|
|
51
|
+
nonlocal region
|
|
52
|
+
if event == cv2.EVENT_LBUTTONDOWN:
|
|
53
|
+
region = (x, y)
|
|
54
|
+
|
|
55
|
+
cv2.namedWindow("Preview - Press ENTER to confirm, ESC to cancel", cv2.WINDOW_NORMAL)
|
|
56
|
+
cv2.setMouseCallback("Preview - Press ENTER to confirm, ESC to cancel", mouse_cb)
|
|
57
|
+
|
|
58
|
+
clone = frame.copy()
|
|
59
|
+
print("Click on the TOP-LEFT corner of the subtitle region, then press ENTER")
|
|
60
|
+
|
|
61
|
+
while True:
|
|
62
|
+
display = clone.copy()
|
|
63
|
+
if region:
|
|
64
|
+
h, w = frame.shape[:2]
|
|
65
|
+
cv2.rectangle(display, (region[0], region[1]), (w, h), (0, 0, 255), 2)
|
|
66
|
+
cv2.imshow("Preview - Press ENTER to confirm, ESC to cancel", display)
|
|
67
|
+
key = cv2.waitKey(30) & 0xFF
|
|
68
|
+
if key == 13: # ENTER
|
|
69
|
+
break
|
|
70
|
+
elif key == 27: # ESC
|
|
71
|
+
region = None
|
|
72
|
+
break
|
|
73
|
+
|
|
74
|
+
cv2.destroyAllWindows()
|
|
75
|
+
|
|
76
|
+
if region:
|
|
77
|
+
h, w = frame.shape[:2]
|
|
78
|
+
return f"{region[0]},{region[1]},{w - region[0]},{h - region[1]}"
|
|
79
|
+
return None
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def main():
|
|
83
|
+
args = parse_args()
|
|
84
|
+
|
|
85
|
+
if not os.path.isfile(args.video):
|
|
86
|
+
print(f"Error: File not found: {args.video}")
|
|
87
|
+
sys.exit(1)
|
|
88
|
+
|
|
89
|
+
output_path = args.output or os.path.splitext(args.video)[0] + ".srt"
|
|
90
|
+
|
|
91
|
+
video = VideoProcessor(args.video, fps=args.fps, backend=args.backend)
|
|
92
|
+
print(f"Video: {args.video}")
|
|
93
|
+
print(f"Resolution: {video.width}x{video.height}")
|
|
94
|
+
print(f"Duration: {video.duration:.1f}s | FPS: {video.video_fps:.1f}")
|
|
95
|
+
print(f"Extracting at {args.fps} FPS -> {video.get_total_snapshots()} frames")
|
|
96
|
+
|
|
97
|
+
region = args.region
|
|
98
|
+
if args.preview:
|
|
99
|
+
region = show_preview(video)
|
|
100
|
+
if region is None:
|
|
101
|
+
print("No region selected. Using full frame.")
|
|
102
|
+
else:
|
|
103
|
+
print(f"Selected region: {region}")
|
|
104
|
+
|
|
105
|
+
print(f"Subtitle region: {region or 'full frame (not recommended)'}")
|
|
106
|
+
print("Running OCR...")
|
|
107
|
+
|
|
108
|
+
ocr = OCREngine(num_workers=args.num_workers)
|
|
109
|
+
entries = []
|
|
110
|
+
|
|
111
|
+
try:
|
|
112
|
+
frame_duration = 1.0 / video.video_fps
|
|
113
|
+
batch_size = args.batch_size
|
|
114
|
+
batch_images = []
|
|
115
|
+
batch_timestamps = []
|
|
116
|
+
|
|
117
|
+
for frame, timestamp in tqdm(video.iter_frames(), total=video.get_total_snapshots(), unit="frame"):
|
|
118
|
+
cropped = video.crop_subtitle_region(frame, region)
|
|
119
|
+
batch_images.append(cropped)
|
|
120
|
+
batch_timestamps.append(timestamp)
|
|
121
|
+
|
|
122
|
+
if len(batch_images) >= batch_size:
|
|
123
|
+
texts = ocr.run_batch(batch_images)
|
|
124
|
+
for text, ts in zip(texts, batch_timestamps):
|
|
125
|
+
if text:
|
|
126
|
+
entries.append({"text": text, "start": ts, "end": ts + frame_duration})
|
|
127
|
+
batch_images.clear()
|
|
128
|
+
batch_timestamps.clear()
|
|
129
|
+
|
|
130
|
+
if batch_images:
|
|
131
|
+
texts = ocr.run_batch(batch_images)
|
|
132
|
+
for text, ts in zip(texts, batch_timestamps):
|
|
133
|
+
if text:
|
|
134
|
+
entries.append({"text": text, "start": ts, "end": ts + frame_duration})
|
|
135
|
+
except KeyboardInterrupt:
|
|
136
|
+
print("\nInterrupted. Saving partial results...")
|
|
137
|
+
finally:
|
|
138
|
+
video.release()
|
|
139
|
+
|
|
140
|
+
if not entries:
|
|
141
|
+
print("No subtitles detected.")
|
|
142
|
+
sys.exit(0)
|
|
143
|
+
|
|
144
|
+
print(f"Deduplicating {len(entries)} raw entries...")
|
|
145
|
+
merged = deduplicate(entries)
|
|
146
|
+
print(f"Merged to {len(merged)} subtitle blocks")
|
|
147
|
+
filtered = filter_entries(merged, min_chars=args.min_chars, min_dur=args.min_dur)
|
|
148
|
+
print(f"After filtering: {len(filtered)} entries")
|
|
149
|
+
|
|
150
|
+
srt = format_srt(merged)
|
|
151
|
+
with open(output_path, "w", encoding="utf-8") as f:
|
|
152
|
+
f.write(srt)
|
|
153
|
+
|
|
154
|
+
print(f"Saved: {output_path}")
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
if __name__ == "__main__":
|
|
158
|
+
main()
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
2
|
+
|
|
3
|
+
from rapidocr_onnxruntime import RapidOCR
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class OCREngine:
|
|
7
|
+
def __init__(self, num_workers=1):
|
|
8
|
+
self.num_workers = num_workers
|
|
9
|
+
self._engines = [RapidOCR() for _ in range(num_workers)]
|
|
10
|
+
|
|
11
|
+
def run(self, image):
|
|
12
|
+
result, _ = self._engines[0](image)
|
|
13
|
+
if not result:
|
|
14
|
+
return ""
|
|
15
|
+
texts = [item[1] for item in result]
|
|
16
|
+
return "\n".join(texts)
|
|
17
|
+
|
|
18
|
+
def run_batch(self, images):
|
|
19
|
+
if self.num_workers == 1:
|
|
20
|
+
return [self.run(img) for img in images]
|
|
21
|
+
|
|
22
|
+
results = [None] * len(images)
|
|
23
|
+
with ThreadPoolExecutor(max_workers=self.num_workers) as executor:
|
|
24
|
+
future_to_idx = {
|
|
25
|
+
executor.submit(self._ocr_single, idx, img): idx
|
|
26
|
+
for idx, img in enumerate(images)
|
|
27
|
+
}
|
|
28
|
+
for future in as_completed(future_to_idx):
|
|
29
|
+
idx, text = future.result()
|
|
30
|
+
results[idx] = text
|
|
31
|
+
return results
|
|
32
|
+
|
|
33
|
+
def _ocr_single(self, idx, image):
|
|
34
|
+
result, _ = self._engines[idx % self.num_workers](image)
|
|
35
|
+
if not result:
|
|
36
|
+
return idx, ""
|
|
37
|
+
texts = [item[1] for item in result]
|
|
38
|
+
return idx, "\n".join(texts)
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from difflib import SequenceMatcher
|
|
3
|
+
|
|
4
|
+
TIMESTAMP_RE = re.compile(
|
|
5
|
+
r"^\s*\d{1,2}\s*:\s*\d{2}\s*/\s*\d{1,2}\s*:\s*\d{2}\s*$"
|
|
6
|
+
r"|^\s*\d{1,2}\s*:\s*\d{2}\s*:\s*\d{2}\s*/\s*\d{1,2}\s*:\s*\d{2}\s*:\s*\d{2}\s*$"
|
|
7
|
+
r"|^\s*\d{1,2}\s*:\s*\d{2}\s*/\s*\d{2}\s*$"
|
|
8
|
+
)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def is_noise(text):
|
|
12
|
+
return bool(TIMESTAMP_RE.match(text.strip()))
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def filter_entries(entries, min_chars=2, min_dur=0.3):
|
|
16
|
+
result = []
|
|
17
|
+
for e in entries:
|
|
18
|
+
text = e["text"].strip()
|
|
19
|
+
if not text:
|
|
20
|
+
continue
|
|
21
|
+
if is_noise(text):
|
|
22
|
+
continue
|
|
23
|
+
if len(text) < min_chars:
|
|
24
|
+
continue
|
|
25
|
+
if (e["end"] - e["start"]) < min_dur:
|
|
26
|
+
continue
|
|
27
|
+
result.append(e)
|
|
28
|
+
return result
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def deduplicate(entries, threshold=0.85, max_gap=2.0):
|
|
32
|
+
if not entries:
|
|
33
|
+
return []
|
|
34
|
+
merged = [entries[0]]
|
|
35
|
+
for entry in entries[1:]:
|
|
36
|
+
prev = merged[-1]
|
|
37
|
+
cur_text = entry["text"]
|
|
38
|
+
if not cur_text:
|
|
39
|
+
continue
|
|
40
|
+
gap = entry["start"] - prev["end"]
|
|
41
|
+
if gap > max_gap:
|
|
42
|
+
merged.append(entry)
|
|
43
|
+
continue
|
|
44
|
+
similarity = SequenceMatcher(None, prev["text"], cur_text).ratio()
|
|
45
|
+
if similarity >= threshold:
|
|
46
|
+
prev["end"] = entry["end"]
|
|
47
|
+
prev["text"] = prev["text"] if len(prev["text"]) >= len(cur_text) else cur_text
|
|
48
|
+
else:
|
|
49
|
+
merged.append(entry)
|
|
50
|
+
return merged
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def format_srt(entries):
|
|
54
|
+
lines = []
|
|
55
|
+
for i, entry in enumerate(entries, 1):
|
|
56
|
+
lines.append(str(i))
|
|
57
|
+
lines.append(f"{_ts(entry['start'])} --> {_ts(entry['end'])}")
|
|
58
|
+
lines.append(entry["text"])
|
|
59
|
+
lines.append("")
|
|
60
|
+
return "\n".join(lines)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _ts(seconds):
|
|
64
|
+
h = int(seconds // 3600)
|
|
65
|
+
m = int((seconds % 3600) // 60)
|
|
66
|
+
s = int(seconds % 60)
|
|
67
|
+
ms = int((seconds - int(seconds)) * 1000)
|
|
68
|
+
return f"{h:02d}:{m:02d}:{s:02d},{ms:03d}"
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
import cv2
|
|
2
|
+
import numpy as np
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class VideoProcessor:
|
|
6
|
+
def __init__(self, video_path, fps=10, backend="opencv"):
|
|
7
|
+
self.video_path = video_path
|
|
8
|
+
self.fps = fps
|
|
9
|
+
self.backend = backend
|
|
10
|
+
self._backend_obj = None
|
|
11
|
+
|
|
12
|
+
if backend == "pyav":
|
|
13
|
+
self._init_pyav()
|
|
14
|
+
elif backend == "ffmpegcv":
|
|
15
|
+
self._init_ffmpegcv()
|
|
16
|
+
elif backend == "degirum":
|
|
17
|
+
self._init_degirum()
|
|
18
|
+
else:
|
|
19
|
+
self._init_opencv()
|
|
20
|
+
|
|
21
|
+
def _init_opencv(self):
|
|
22
|
+
self.cap = cv2.VideoCapture(self.video_path)
|
|
23
|
+
self.video_fps = self.cap.get(cv2.CAP_PROP_FPS)
|
|
24
|
+
self.total_frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
|
25
|
+
self.width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
|
26
|
+
self.height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
|
27
|
+
self.duration = self.total_frames / self.video_fps
|
|
28
|
+
self._step = self.video_fps / self.fps
|
|
29
|
+
|
|
30
|
+
def _init_pyav(self):
|
|
31
|
+
import av
|
|
32
|
+
|
|
33
|
+
self._av_container = av.open(self.video_path)
|
|
34
|
+
stream = self._av_container.streams.video[0]
|
|
35
|
+
self.video_fps = float(stream.average_rate or stream.rate)
|
|
36
|
+
self.total_frames = stream.frames or int(stream.duration * stream.time_base * self.video_fps)
|
|
37
|
+
self.width = stream.codec_context.width
|
|
38
|
+
self.height = stream.codec_context.height
|
|
39
|
+
self.duration = self.total_frames / self.video_fps
|
|
40
|
+
self._step = self.video_fps / self.fps
|
|
41
|
+
self._av_stream = stream
|
|
42
|
+
|
|
43
|
+
def _init_ffmpegcv(self):
|
|
44
|
+
import ffmpegcv
|
|
45
|
+
|
|
46
|
+
self.cap = ffmpegcv.VideoCapture(self.video_path)
|
|
47
|
+
self.video_fps = self.cap.fps
|
|
48
|
+
self.total_frames = self.cap.framecount
|
|
49
|
+
self.width = self.cap.width
|
|
50
|
+
self.height = self.cap.height
|
|
51
|
+
self.duration = self.total_frames / self.video_fps
|
|
52
|
+
self._step = self.video_fps / self.fps
|
|
53
|
+
|
|
54
|
+
def _init_degirum(self):
|
|
55
|
+
import degirum_video_capture
|
|
56
|
+
|
|
57
|
+
self.cap = degirum_video_capture.VideoCapture(self.video_path)
|
|
58
|
+
self.video_fps = self.cap.get(cv2.CAP_PROP_FPS)
|
|
59
|
+
self.total_frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
|
60
|
+
self.width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
|
61
|
+
self.height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
|
62
|
+
self.duration = self.total_frames / self.video_fps
|
|
63
|
+
self._step = self.video_fps / self.fps
|
|
64
|
+
|
|
65
|
+
def get_preview_frame(self):
|
|
66
|
+
if self.backend == "pyav":
|
|
67
|
+
container = self._av_container
|
|
68
|
+
stream = self._av_stream
|
|
69
|
+
container.seek(0, stream=stream)
|
|
70
|
+
for frame in container.decode(video=0):
|
|
71
|
+
return frame.to_ndarray(format="bgr24")
|
|
72
|
+
return None
|
|
73
|
+
else:
|
|
74
|
+
self.cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
|
|
75
|
+
ret, frame = self.cap.read()
|
|
76
|
+
return frame if ret else None
|
|
77
|
+
|
|
78
|
+
def crop_subtitle_region(self, frame, region):
|
|
79
|
+
if region is None:
|
|
80
|
+
return frame
|
|
81
|
+
h, w = frame.shape[:2]
|
|
82
|
+
if isinstance(region, str) and region.startswith("bottom:"):
|
|
83
|
+
pct = int(region.split(":")[1])
|
|
84
|
+
y_start = int(h * (100 - pct) / 100)
|
|
85
|
+
return frame[y_start:, :]
|
|
86
|
+
parts = [int(p) for p in region.split(",")]
|
|
87
|
+
x, y, rw, rh = parts
|
|
88
|
+
return frame[y : y + rh, x : x + rw]
|
|
89
|
+
|
|
90
|
+
def iter_frames(self):
|
|
91
|
+
if self.backend == "pyav":
|
|
92
|
+
yield from self._iter_pyav()
|
|
93
|
+
elif self.backend == "ffmpegcv":
|
|
94
|
+
yield from self._iter_opencv_compat()
|
|
95
|
+
elif self.backend == "degirum":
|
|
96
|
+
yield from self._iter_opencv_compat()
|
|
97
|
+
else:
|
|
98
|
+
yield from self._iter_opencv()
|
|
99
|
+
|
|
100
|
+
def _iter_opencv(self):
|
|
101
|
+
self.cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
|
|
102
|
+
frame_idx = 0
|
|
103
|
+
next_yield = 0.0
|
|
104
|
+
while True:
|
|
105
|
+
ret, frame = self.cap.read()
|
|
106
|
+
if not ret:
|
|
107
|
+
break
|
|
108
|
+
if frame_idx >= next_yield:
|
|
109
|
+
yield frame, frame_idx / self.video_fps
|
|
110
|
+
next_yield += self._step
|
|
111
|
+
frame_idx += 1
|
|
112
|
+
|
|
113
|
+
def _iter_pyav(self):
|
|
114
|
+
import av
|
|
115
|
+
|
|
116
|
+
container = self._av_container
|
|
117
|
+
stream = self._av_stream
|
|
118
|
+
container.seek(0, stream=stream)
|
|
119
|
+
|
|
120
|
+
frame_idx = 0
|
|
121
|
+
next_yield = 0.0
|
|
122
|
+
for frame in container.decode(video=0):
|
|
123
|
+
if frame_idx >= next_yield:
|
|
124
|
+
ndarray = frame.to_ndarray(format="bgr24")
|
|
125
|
+
yield ndarray, frame_idx / self.video_fps
|
|
126
|
+
next_yield += self._step
|
|
127
|
+
frame_idx += 1
|
|
128
|
+
|
|
129
|
+
def _iter_opencv_compat(self):
|
|
130
|
+
self.cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
|
|
131
|
+
frame_idx = 0
|
|
132
|
+
next_yield = 0.0
|
|
133
|
+
while True:
|
|
134
|
+
ret, frame = self.cap.read()
|
|
135
|
+
if not ret:
|
|
136
|
+
break
|
|
137
|
+
if frame_idx >= next_yield:
|
|
138
|
+
yield frame, frame_idx / self.video_fps
|
|
139
|
+
next_yield += self._step
|
|
140
|
+
frame_idx += 1
|
|
141
|
+
|
|
142
|
+
def get_total_snapshots(self):
|
|
143
|
+
return int(self.total_frames / self._step)
|
|
144
|
+
|
|
145
|
+
def release(self):
|
|
146
|
+
if self.backend == "pyav":
|
|
147
|
+
self._av_container.close()
|
|
148
|
+
elif hasattr(self, "cap"):
|
|
149
|
+
self.cap.release()
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "cshortranslate"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "OCR-based subtitle extraction from video files"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = "MIT"
|
|
11
|
+
requires-python = ">=3.9"
|
|
12
|
+
authors = [
|
|
13
|
+
{ name = "Your Name", email = "you@example.com" },
|
|
14
|
+
]
|
|
15
|
+
keywords = ["ocr", "subtitle", "srt", "video", "chinese"]
|
|
16
|
+
classifiers = [
|
|
17
|
+
"Development Status :: 3 - Alpha",
|
|
18
|
+
"Intended Audience :: Developers",
|
|
19
|
+
"License :: OSI Approved :: MIT License",
|
|
20
|
+
"Operating System :: OS Independent",
|
|
21
|
+
"Programming Language :: Python :: 3",
|
|
22
|
+
"Programming Language :: Python :: 3.9",
|
|
23
|
+
"Programming Language :: Python :: 3.10",
|
|
24
|
+
"Programming Language :: Python :: 3.11",
|
|
25
|
+
"Programming Language :: Python :: 3.12",
|
|
26
|
+
"Programming Language :: Python :: 3.13",
|
|
27
|
+
"Topic :: Multimedia :: Video",
|
|
28
|
+
"Topic :: Multimedia :: Sound/Audio",
|
|
29
|
+
]
|
|
30
|
+
dependencies = [
|
|
31
|
+
"rapidocr-onnxruntime",
|
|
32
|
+
"opencv-python",
|
|
33
|
+
"tqdm",
|
|
34
|
+
"numpy",
|
|
35
|
+
]
|
|
36
|
+
|
|
37
|
+
[project.optional-dependencies]
|
|
38
|
+
pyav = ["av"]
|
|
39
|
+
ffmpegcv = ["ffmpegcv"]
|
|
40
|
+
degirum = ["degirum-video-capture"]
|
|
41
|
+
all = ["av", "ffmpegcv", "degirum-video-capture"]
|
|
42
|
+
|
|
43
|
+
[project.urls]
|
|
44
|
+
Homepage = "https://github.com/yourusername/cshortranslate"
|
|
45
|
+
Issues = "https://github.com/yourusername/cshortranslate/issues"
|
|
46
|
+
|
|
47
|
+
[project.scripts]
|
|
48
|
+
cshortranslate = "cshortranslate.cli:main"
|