ocrgrep 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ocrgrep-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 tmctmt
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
ocrgrep-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,89 @@
1
+ Metadata-Version: 2.4
2
+ Name: ocrgrep
3
+ Version: 0.1.0
4
+ Summary: grep-like OCR tool for images and videos.
5
+ Author-email: tmctmt <tmctmt@proton.me>
6
+ License: MIT License
7
+
8
+ Copyright (c) 2026 tmctmt
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+ Keywords: ocr,grep,screen ai,chrome,locro
29
+ Classifier: Development Status :: 4 - Beta
30
+ Classifier: Environment :: Console
31
+ Classifier: Intended Audience :: End Users/Desktop
32
+ Classifier: License :: OSI Approved :: MIT License
33
+ Classifier: Natural Language :: English
34
+ Classifier: Operating System :: OS Independent
35
+ Classifier: Programming Language :: Python
36
+ Classifier: Topic :: Utilities
37
+ Requires-Python: >=3.8
38
+ Description-Content-Type: text/markdown
39
+ License-File: LICENSE
40
+ Requires-Dist: tqdm~=4.0
41
+ Requires-Dist: opencv-python~=4.0
42
+ Requires-Dist: filetype~=1.0
43
+ Requires-Dist: Pillow~=12.0
44
+ Dynamic: license-file
45
+
46
+ ## ocrgrep
47
+ A grep-like OCR tool for image and video files, utilizing the fast and accurate [Google Screen AI](https://chromium.googlesource.com/chromium/src/+/refs/tags/126.0.6452.4/services/screen_ai/README.md) engine via [locro](https://github.com/sergiocorreia/clv-locro/tree/master).
48
+
49
+ ```
50
+ $ ocrgrep -ih 'grep-like' screenshot.png
51
+ rep README.md in main Preview ocrgrep A grep-like OCR tool for image and video files, uti
52
+ ```
53
+
54
+ ```
55
+ usage: ocrgrep.py [-i] [-m NUM] [-p] [-r] [-w NUM] [-C NUM] [-F] [-h] [-t] [--include GLOB]
56
+ [--exclude GLOB] [--no-image] [--no-video] [--video-max-msec NUM]
57
+ [--video-step-msec NUM] [--help]
58
+ pattern files [files ...]
59
+
60
+ positional arguments:
61
+ pattern
62
+ files
63
+
64
+ options:
65
+ -i, --ignore-case ignore case distinctions in patterns and data
66
+ -m, --max-count NUM stop after NUM selected lines
67
+ -p, --progress show progress bar
68
+ -r, --recursive scan subfiles in directories
69
+ -w, --workers NUM concurrency (default: 16)
70
+ -C, --context NUM print NUM characters of output context (default: 40)
71
+ -F, --fixed-strings PATTERN is a string
72
+ -h, --no-filename suppress the file name prefix on output
73
+ -t, --no-timestamp suppress the timestamp prefix on output for videos
74
+ --include GLOB search only files that match GLOB (a file pattern)
75
+ --exclude GLOB skip files that match GLOB
76
+ --no-image ignore image files
77
+ --no-video ignore video files
78
+ --video-max-msec NUM stop after NUM milliseconds of video
79
+ --video-step-msec NUM
80
+ scan a frame for every NUM milliseconds of video (default: 1000)
81
+ --help show this help message and exit
82
+ ```
83
+
84
+ # Install
85
+ ```
86
+ pip install git+https://github.com/sergiocorreia/clv-locro.git
87
+ locro download
88
+ pip install ocrgrep
89
+ ```
@@ -0,0 +1,44 @@
1
+ ## ocrgrep
2
+ A grep-like OCR tool for image and video files, utilizing the fast and accurate [Google Screen AI](https://chromium.googlesource.com/chromium/src/+/refs/tags/126.0.6452.4/services/screen_ai/README.md) engine via [locro](https://github.com/sergiocorreia/clv-locro/tree/master).
3
+
4
+ ```
5
+ $ ocrgrep -ih 'grep-like' screenshot.png
6
+ rep README.md in main Preview ocrgrep A grep-like OCR tool for image and video files, uti
7
+ ```
8
+
9
+ ```
10
+ usage: ocrgrep.py [-i] [-m NUM] [-p] [-r] [-w NUM] [-C NUM] [-F] [-h] [-t] [--include GLOB]
11
+ [--exclude GLOB] [--no-image] [--no-video] [--video-max-msec NUM]
12
+ [--video-step-msec NUM] [--help]
13
+ pattern files [files ...]
14
+
15
+ positional arguments:
16
+ pattern
17
+ files
18
+
19
+ options:
20
+ -i, --ignore-case ignore case distinctions in patterns and data
21
+ -m, --max-count NUM stop after NUM selected lines
22
+ -p, --progress show progress bar
23
+ -r, --recursive scan subfiles in directories
24
+ -w, --workers NUM concurrency (default: 16)
25
+ -C, --context NUM print NUM characters of output context (default: 40)
26
+ -F, --fixed-strings PATTERN is a string
27
+ -h, --no-filename suppress the file name prefix on output
28
+ -t, --no-timestamp suppress the timestamp prefix on output for videos
29
+ --include GLOB search only files that match GLOB (a file pattern)
30
+ --exclude GLOB skip files that match GLOB
31
+ --no-image ignore image files
32
+ --no-video ignore video files
33
+ --video-max-msec NUM stop after NUM milliseconds of video
34
+ --video-step-msec NUM
35
+ scan a frame for every NUM milliseconds of video (default: 1000)
36
+ --help show this help message and exit
37
+ ```
38
+
39
+ # Install
40
+ ```
41
+ pip install git+https://github.com/sergiocorreia/clv-locro.git
42
+ locro download
43
+ pip install ocrgrep
44
+ ```
@@ -0,0 +1,89 @@
1
+ Metadata-Version: 2.4
2
+ Name: ocrgrep
3
+ Version: 0.1.0
4
+ Summary: grep-like OCR tool for images and videos.
5
+ Author-email: tmctmt <tmctmt@proton.me>
6
+ License: MIT License
7
+
8
+ Copyright (c) 2026 tmctmt
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+ Keywords: ocr,grep,screen ai,chrome,locro
29
+ Classifier: Development Status :: 4 - Beta
30
+ Classifier: Environment :: Console
31
+ Classifier: Intended Audience :: End Users/Desktop
32
+ Classifier: License :: OSI Approved :: MIT License
33
+ Classifier: Natural Language :: English
34
+ Classifier: Operating System :: OS Independent
35
+ Classifier: Programming Language :: Python
36
+ Classifier: Topic :: Utilities
37
+ Requires-Python: >=3.8
38
+ Description-Content-Type: text/markdown
39
+ License-File: LICENSE
40
+ Requires-Dist: tqdm~=4.0
41
+ Requires-Dist: opencv-python~=4.0
42
+ Requires-Dist: filetype~=1.0
43
+ Requires-Dist: Pillow~=12.0
44
+ Dynamic: license-file
45
+
46
+ ## ocrgrep
47
+ A grep-like OCR tool for image and video files, utilizing the fast and accurate [Google Screen AI](https://chromium.googlesource.com/chromium/src/+/refs/tags/126.0.6452.4/services/screen_ai/README.md) engine via [locro](https://github.com/sergiocorreia/clv-locro/tree/master).
48
+
49
+ ```
50
+ $ ocrgrep -ih 'grep-like' screenshot.png
51
+ rep README.md in main Preview ocrgrep A grep-like OCR tool for image and video files, uti
52
+ ```
53
+
54
+ ```
55
+ usage: ocrgrep.py [-i] [-m NUM] [-p] [-r] [-w NUM] [-C NUM] [-F] [-h] [-t] [--include GLOB]
56
+ [--exclude GLOB] [--no-image] [--no-video] [--video-max-msec NUM]
57
+ [--video-step-msec NUM] [--help]
58
+ pattern files [files ...]
59
+
60
+ positional arguments:
61
+ pattern
62
+ files
63
+
64
+ options:
65
+ -i, --ignore-case ignore case distinctions in patterns and data
66
+ -m, --max-count NUM stop after NUM selected lines
67
+ -p, --progress show progress bar
68
+ -r, --recursive scan subfiles in directories
69
+ -w, --workers NUM concurrency (default: 16)
70
+ -C, --context NUM print NUM characters of output context (default: 40)
71
+ -F, --fixed-strings PATTERN is a string
72
+ -h, --no-filename suppress the file name prefix on output
73
+ -t, --no-timestamp suppress the timestamp prefix on output for videos
74
+ --include GLOB search only files that match GLOB (a file pattern)
75
+ --exclude GLOB skip files that match GLOB
76
+ --no-image ignore image files
77
+ --no-video ignore video files
78
+ --video-max-msec NUM stop after NUM milliseconds of video
79
+ --video-step-msec NUM
80
+ scan a frame for every NUM milliseconds of video (default: 1000)
81
+ --help show this help message and exit
82
+ ```
83
+
84
+ # Install
85
+ ```
86
+ pip install git+https://github.com/sergiocorreia/clv-locro.git
87
+ locro download
88
+ pip install ocrgrep
89
+ ```
@@ -0,0 +1,10 @@
1
+ LICENSE
2
+ README.md
3
+ ocrgrep.py
4
+ pyproject.toml
5
+ ocrgrep.egg-info/PKG-INFO
6
+ ocrgrep.egg-info/SOURCES.txt
7
+ ocrgrep.egg-info/dependency_links.txt
8
+ ocrgrep.egg-info/entry_points.txt
9
+ ocrgrep.egg-info/requires.txt
10
+ ocrgrep.egg-info/top_level.txt
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ ocrgrep = ocrgrep:cli
@@ -0,0 +1,4 @@
1
+ tqdm~=4.0
2
+ opencv-python~=4.0
3
+ filetype~=1.0
4
+ Pillow~=12.0
@@ -0,0 +1 @@
1
+ ocrgrep
@@ -0,0 +1,164 @@
1
+ from dataclasses import dataclass
2
+ from datetime import timedelta
3
+ from fnmatch import fnmatch
4
+ from functools import partial
5
+ from multiprocessing import Pool, cpu_count
6
+ from pathlib import Path
7
+ import argparse
8
+ import re
9
+ import sys
10
+
11
+ from locro import ScreenAI
12
+ from tqdm import tqdm
13
+ import cv2
14
+ import filetype
15
+ import PIL
16
+
17
+ RED = '\033[31m'
18
+ YELLOW = '\033[33m'
19
+ MAGENTA = '\033[35m'
20
+ RESET = '\033[39m'
21
+
22
+ @dataclass(frozen=True)
23
+ class Result:
24
+ path: Path
25
+ text: str
26
+
27
+ @dataclass(frozen=True)
28
+ class VideoResult(Result):
29
+ msec: float
30
+
31
+ def ocr(path: Path, args: argparse.Namespace):
32
+ if 'engine' not in globals():
33
+ global engine
34
+ engine = ScreenAI()
35
+
36
+ results: list[Result] = []
37
+
38
+ if filetype.is_image(path) and not args.no_image:
39
+ image = PIL.Image.open(path)
40
+ text = engine.ocr_pil_image(image).text
41
+ results.append(Result(path, text))
42
+
43
+ if filetype.is_video(path) and not args.no_video:
44
+ cap = cv2.VideoCapture(path)
45
+ prev_msec = None
46
+
47
+ if cap.get(cv2.CAP_PROP_FRAME_COUNT) == -1:
48
+ return results
49
+
50
+ while True:
51
+ msec = cap.get(cv2.CAP_PROP_POS_MSEC)
52
+
53
+ if args.video_max_msec and msec > args.video_max_msec:
54
+ break
55
+
56
+ if prev_msec and (msec - prev_msec) < args.video_step_msec:
57
+ success = cap.grab()
58
+ if success:
59
+ continue
60
+ break
61
+
62
+ prev_msec = msec
63
+
64
+ success, image = cap.read()
65
+ if not success:
66
+ break
67
+
68
+ image = PIL.Image.fromarray(image)
69
+ text = engine.ocr_pil_image(image).text
70
+ results.append(VideoResult(path, text, msec))
71
+
72
+ cap.release()
73
+
74
+ return results
75
+
76
+ def cli():
77
+ p = argparse.ArgumentParser(
78
+ description='grep-like OCR tool for images and videos.',
79
+ epilog='example: %(prog)s -i "hello world" video.mp4 screenshot.png',
80
+ formatter_class=argparse.RawDescriptionHelpFormatter,
81
+ add_help=False
82
+ )
83
+ p.add_argument('pattern')
84
+ p.add_argument('files', nargs='+')
85
+ p.add_argument('-i', '--ignore-case', action='store_true',
86
+ help='ignore case distinctions in patterns and data')
87
+ p.add_argument('-m', '--max-count', type=int, metavar='NUM',
88
+ help='stop after NUM selected lines')
89
+ p.add_argument('-p', '--progress', action='store_true',
90
+ help='show progress bar')
91
+ p.add_argument('-r', '--recursive', action='store_true',
92
+ help='scan subfiles in directories')
93
+ p.add_argument('-w', '--workers', type=int, default=int(cpu_count()//2), metavar='NUM',
94
+ help='concurrency (default: %(default)s)')
95
+ p.add_argument('-C', '--context', type=int, default=40, metavar='NUM',
96
+ help='print NUM characters of output context (default: %(default)s)')
97
+ p.add_argument('-F', '--fixed-strings', action='store_true',
98
+ help='PATTERN is a string')
99
+ p.add_argument('-h', '--no-filename', action='store_true',
100
+ help='suppress the file name prefix on output')
101
+ p.add_argument('-t', '--no-timestamp', action='store_true',
102
+ help='suppress the timestamp prefix on output for videos')
103
+ p.add_argument('--include', action='append', default=[], metavar='GLOB',
104
+ help='search only files that match GLOB (a file pattern)')
105
+ p.add_argument('--exclude', action='append', default=[], metavar='GLOB',
106
+ help='skip files that match GLOB')
107
+ p.add_argument('--no-image', action='store_true',
108
+ help='ignore image files')
109
+ p.add_argument('--no-video', action='store_true',
110
+ help='ignore video files')
111
+ p.add_argument('--video-max-msec', type=int, metavar='NUM',
112
+ help='stop after NUM milliseconds of video')
113
+ p.add_argument('--video-step-msec', type=int, default=1000, metavar='NUM',
114
+ help='scan a frame for every NUM milliseconds of video (default: %(default)s)')
115
+ p.add_argument('--help', action='help',
116
+ help='show this help message and exit')
117
+
118
+ args = p.parse_args()
119
+ flags = re.IGNORECASE if args.ignore_case else 0
120
+
121
+ def should_include(path: Path):
122
+ if args.exclude and any(fnmatch(path, pat) for pat in args.exclude):
123
+ return False
124
+ if args.include and not any(fnmatch(path, pat) for pat in args.include):
125
+ return False
126
+ return True
127
+
128
+ files = []
129
+ for path in map(Path, args.files):
130
+ if path.is_file() and should_include(path):
131
+ files.append(path)
132
+ elif path.is_dir() and args.recursive:
133
+ files.extend(s for s in path.rglob('*') if s.is_file() and should_include(s))
134
+ elif path.is_dir():
135
+ print(f'ocrgrep: {path}: Is a directory', file=sys.stderr)
136
+ else:
137
+ print(f'ocrgrep: {path}: No such file or directory', file=sys.stderr)
138
+
139
+ with Pool(args.workers) as pool, tqdm(total=len(files), disable=not args.progress) as pbar:
140
+ for results in pool.imap_unordered(partial(ocr, args=args), files):
141
+ count = 0
142
+ for result in results:
143
+ text = re.sub(r'\s+', r' ', result.text)
144
+ match = re.search(args.pattern, text, flags=flags)
145
+ if match:
146
+ line = ''
147
+ if not args.no_filename:
148
+ line += MAGENTA + str(result.path) + RESET + ':'
149
+ if not args.no_timestamp and isinstance(result, VideoResult):
150
+ line += YELLOW + str(timedelta(milliseconds=result.msec))[:-3] + RESET + ':'
151
+ line += re.sub(
152
+ re.escape(args.pattern) if args.fixed_strings else args.pattern,
153
+ lambda m: RED + m.group(0) + RESET,
154
+ text[max(0, match.start() - args.context) : match.end() + args.context].strip(),
155
+ flags=flags
156
+ )
157
+ pbar.write(line)
158
+ count += 1
159
+ if args.max_count and count == args.max_count:
160
+ break
161
+ pbar.update()
162
+
163
+ if __name__ == '__main__':
164
+ cli()
@@ -0,0 +1,33 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "ocrgrep"
7
+ version = "0.1.0"
8
+ description = "grep-like OCR tool for images and videos."
9
+ readme = "README.md"
10
+ authors = [
11
+ { name = "tmctmt", email = "tmctmt@proton.me" },
12
+ ]
13
+ license = { file = "LICENSE" }
14
+ requires-python = ">=3.8"
15
+ classifiers = [
16
+ "Development Status :: 4 - Beta",
17
+ "Environment :: Console",
18
+ "Intended Audience :: End Users/Desktop",
19
+ "License :: OSI Approved :: MIT License",
20
+ "Natural Language :: English",
21
+ "Operating System :: OS Independent",
22
+ "Programming Language :: Python",
23
+ "Topic :: Utilities",
24
+ ]
25
+ keywords = ["ocr", "grep", "screen ai", "chrome", "locro"]
26
+ dependencies = [
27
+ "tqdm ~= 4.0",
28
+ "opencv-python ~= 4.0",
29
+ "filetype ~= 1.0",
30
+ "Pillow ~= 12.0"
31
+ ]
32
+ [project.scripts]
33
+ ocrgrep = "ocrgrep:cli"
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+