myocr-lib 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {myocr_lib-0.1.1.dist-info → myocr_lib-0.1.3.dist-info}/METADATA +1 -1
- myocr_lib-0.1.3.dist-info/RECORD +8 -0
- ocr_img/main_code.py +1 -1
- ocr_pdf/main_code.py +5 -3
- myocr_lib-0.1.1.dist-info/RECORD +0 -8
- {myocr_lib-0.1.1.dist-info → myocr_lib-0.1.3.dist-info}/WHEEL +0 -0
- {myocr_lib-0.1.1.dist-info → myocr_lib-0.1.3.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
ocr_img/__init__.py,sha256=A0HMZNNgwObl6qZHeGKXQSf-VhPlkQt3YKEcA_VXHLw,80
|
|
2
|
+
ocr_img/main_code.py,sha256=738bf1zZTr1FRncvPfUik4SSS2nSMkPqrCFRtnQeGmM,1300
|
|
3
|
+
ocr_pdf/__init__.py,sha256=t4SYasAJ1pjnd7sZBnPe1PqMbR18-4onsHXu93zw1LE,96
|
|
4
|
+
ocr_pdf/main_code.py,sha256=Rz9PtpA79XPbX2VsmeNaQabnY_ZoRkYylTUeNZPc2cE,5175
|
|
5
|
+
myocr_lib-0.1.3.dist-info/METADATA,sha256=s-8a8rie6ytZfIIF4vplbuot2_gHccJ8gUF9vHslcwY,677
|
|
6
|
+
myocr_lib-0.1.3.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
|
|
7
|
+
myocr_lib-0.1.3.dist-info/top_level.txt,sha256=m22oM9I_3SovUyKNzGQIpqgCp7GTXua0G7cEYqMx654,16
|
|
8
|
+
myocr_lib-0.1.3.dist-info/RECORD,,
|
ocr_img/main_code.py
CHANGED
ocr_pdf/main_code.py
CHANGED
|
@@ -28,7 +28,7 @@ class OCRDataExtractor:
|
|
|
28
28
|
result = subprocess.run(command, capture_output=True, text=True)
|
|
29
29
|
|
|
30
30
|
if result.returncode == 0:
|
|
31
|
-
print("PDF Was
|
|
31
|
+
print("OCR on PDF Was successfully.")
|
|
32
32
|
|
|
33
33
|
return True
|
|
34
34
|
|
|
@@ -110,13 +110,15 @@ class OCRDataExtractor:
|
|
|
110
110
|
# apply the whole pdf ocr if all the pages are extractable
|
|
111
111
|
if self._is_whole_pdf_ocr():
|
|
112
112
|
print("Applying whole Pdf ocr...")
|
|
113
|
+
# empty the pages
|
|
114
|
+
self.pages_to_ocr = []
|
|
113
115
|
|
|
114
116
|
results = self._apply_whole_pdf_ocr()
|
|
115
117
|
|
|
116
118
|
# apply the extraction for the whole pdf through fitz
|
|
117
119
|
text = self._extract_text_whole_pdf() if results else None
|
|
118
120
|
|
|
119
|
-
print(text)
|
|
121
|
+
# print(text)
|
|
120
122
|
|
|
121
123
|
# temporary store the text
|
|
122
124
|
# ext = self.input_pdf_path.split('.')[-1]
|
|
@@ -139,7 +141,7 @@ class OCRDataExtractor:
|
|
|
139
141
|
# do the extraction for specific pages only throug fitz
|
|
140
142
|
text = self._extract_text_page_by_page() if results else None
|
|
141
143
|
|
|
142
|
-
print(text)
|
|
144
|
+
# print(text)
|
|
143
145
|
|
|
144
146
|
# delete the output file
|
|
145
147
|
self.delete_file(self.output_file_path)
|
myocr_lib-0.1.1.dist-info/RECORD
DELETED
|
@@ -1,8 +0,0 @@
|
|
|
1
|
-
ocr_img/__init__.py,sha256=A0HMZNNgwObl6qZHeGKXQSf-VhPlkQt3YKEcA_VXHLw,80
|
|
2
|
-
ocr_img/main_code.py,sha256=aqE-NGFDCJ_nqpMt7GujRvEin6dB4kCK9DEQiADnMRs,1298
|
|
3
|
-
ocr_pdf/__init__.py,sha256=t4SYasAJ1pjnd7sZBnPe1PqMbR18-4onsHXu93zw1LE,96
|
|
4
|
-
ocr_pdf/main_code.py,sha256=1_SDZu-Ry9HZJE39KjgH4my5S7fXpE7MYIjkemxpmT8,5121
|
|
5
|
-
myocr_lib-0.1.1.dist-info/METADATA,sha256=3FUPOov0OXejJhMzSrbUc2LssNAJRT9yjHtGQ3G8LLw,677
|
|
6
|
-
myocr_lib-0.1.1.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
|
|
7
|
-
myocr_lib-0.1.1.dist-info/top_level.txt,sha256=m22oM9I_3SovUyKNzGQIpqgCp7GTXua0G7cEYqMx654,16
|
|
8
|
-
myocr_lib-0.1.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|