mistocr 0.2.10__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mistocr/__init__.py +1 -1
- mistocr/_modidx.py +4 -1
- mistocr/core.py +67 -23
- mistocr/pipeline.py +26 -15
- mistocr/refine.py +8 -3
- {mistocr-0.2.10.dist-info → mistocr-0.4.0.dist-info}/METADATA +2 -1
- mistocr-0.4.0.dist-info/RECORD +11 -0
- mistocr-0.2.10.dist-info/RECORD +0 -11
- {mistocr-0.2.10.dist-info → mistocr-0.4.0.dist-info}/WHEEL +0 -0
- {mistocr-0.2.10.dist-info → mistocr-0.4.0.dist-info}/entry_points.txt +0 -0
- {mistocr-0.2.10.dist-info → mistocr-0.4.0.dist-info}/licenses/LICENSE +0 -0
- {mistocr-0.2.10.dist-info → mistocr-0.4.0.dist-info}/top_level.txt +0 -0
mistocr/__init__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.
|
|
1
|
+
__version__ = "0.4.0"
|
mistocr/_modidx.py
CHANGED
|
@@ -5,7 +5,8 @@ d = { 'settings': { 'branch': 'main',
|
|
|
5
5
|
'doc_host': 'https://franckalbinet.github.io',
|
|
6
6
|
'git_url': 'https://github.com/franckalbinet/mistocr',
|
|
7
7
|
'lib_path': 'mistocr'},
|
|
8
|
-
'syms': { 'mistocr.core': { 'mistocr.core.
|
|
8
|
+
'syms': { 'mistocr.core': { 'mistocr.core._check_timeout': ('core.html#_check_timeout', 'mistocr/core.py'),
|
|
9
|
+
'mistocr.core._get_paths': ('core.html#_get_paths', 'mistocr/core.py'),
|
|
9
10
|
'mistocr.core._prep_batch': ('core.html#_prep_batch', 'mistocr/core.py'),
|
|
10
11
|
'mistocr.core._run_batch': ('core.html#_run_batch', 'mistocr/core.py'),
|
|
11
12
|
'mistocr.core.create_batch_entry': ('core.html#create_batch_entry', 'mistocr/core.py'),
|
|
@@ -18,10 +19,12 @@ d = { 'settings': { 'branch': 'main',
|
|
|
18
19
|
'mistocr.core.save_page': ('core.html#save_page', 'mistocr/core.py'),
|
|
19
20
|
'mistocr.core.save_pages': ('core.html#save_pages', 'mistocr/core.py'),
|
|
20
21
|
'mistocr.core.submit_batch': ('core.html#submit_batch', 'mistocr/core.py'),
|
|
22
|
+
'mistocr.core.subset_pdf': ('core.html#subset_pdf', 'mistocr/core.py'),
|
|
21
23
|
'mistocr.core.upload_pdf': ('core.html#upload_pdf', 'mistocr/core.py'),
|
|
22
24
|
'mistocr.core.wait_for_job': ('core.html#wait_for_job', 'mistocr/core.py')},
|
|
23
25
|
'mistocr.pipeline': {'mistocr.pipeline.pdf_to_md': ('pipeline.html#pdf_to_md', 'mistocr/pipeline.py')},
|
|
24
26
|
'mistocr.refine': { 'mistocr.refine.HeadingCorrection': ('refine.html#headingcorrection', 'mistocr/refine.py'),
|
|
27
|
+
'mistocr.refine.HeadingCorrections': ('refine.html#headingcorrections', 'mistocr/refine.py'),
|
|
25
28
|
'mistocr.refine.ImgDescription': ('refine.html#imgdescription', 'mistocr/refine.py'),
|
|
26
29
|
'mistocr.refine.add_descs_to_pg': ('refine.html#add_descs_to_pg', 'mistocr/refine.py'),
|
|
27
30
|
'mistocr.refine.add_descs_to_pgs': ('refine.html#add_descs_to_pgs', 'mistocr/refine.py'),
|
mistocr/core.py
CHANGED
|
@@ -3,8 +3,9 @@
|
|
|
3
3
|
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/00_core.ipynb.
|
|
4
4
|
|
|
5
5
|
# %% auto 0
|
|
6
|
-
__all__ = ['ocr_model', 'ocr_endpoint', 'get_api_key', 'upload_pdf', 'create_batch_entry', 'prep_pdf_batch',
|
|
7
|
-
'wait_for_job', 'download_results', 'save_images', 'save_page', 'save_pages', 'ocr_pdf',
|
|
6
|
+
__all__ = ['logger', 'ocr_model', 'ocr_endpoint', 'get_api_key', 'upload_pdf', 'create_batch_entry', 'prep_pdf_batch',
|
|
7
|
+
'submit_batch', 'wait_for_job', 'download_results', 'save_images', 'save_page', 'save_pages', 'ocr_pdf',
|
|
8
|
+
'read_pgs', 'subset_pdf']
|
|
8
9
|
|
|
9
10
|
# %% ../nbs/00_core.ipynb 3
|
|
10
11
|
from fastcore.all import *
|
|
@@ -13,8 +14,15 @@ from io import BytesIO
|
|
|
13
14
|
from pathlib import Path
|
|
14
15
|
from PIL import Image
|
|
15
16
|
from mistralai import Mistral
|
|
17
|
+
import PyPDF2
|
|
18
|
+
import logging
|
|
16
19
|
|
|
17
|
-
# %% ../nbs/00_core.ipynb
|
|
20
|
+
# %% ../nbs/00_core.ipynb 4
|
|
21
|
+
logger = logging.getLogger(__name__)
|
|
22
|
+
logging.basicConfig(level=logging.WARNING, format='%(name)s - %(levelname)s - %(message)s')
|
|
23
|
+
logger.setLevel(logging.DEBUG)
|
|
24
|
+
|
|
25
|
+
# %% ../nbs/00_core.ipynb 7
|
|
18
26
|
def get_api_key(
|
|
19
27
|
key:str=None # Mistral API key
|
|
20
28
|
):
|
|
@@ -23,11 +31,11 @@ def get_api_key(
|
|
|
23
31
|
if not key: raise ValueError("MISTRAL_API_KEY not found")
|
|
24
32
|
return key
|
|
25
33
|
|
|
26
|
-
# %% ../nbs/00_core.ipynb
|
|
34
|
+
# %% ../nbs/00_core.ipynb 8
|
|
27
35
|
ocr_model = "mistral-ocr-latest"
|
|
28
36
|
ocr_endpoint = "/v1/ocr"
|
|
29
37
|
|
|
30
|
-
# %% ../nbs/00_core.ipynb
|
|
38
|
+
# %% ../nbs/00_core.ipynb 11
|
|
31
39
|
def upload_pdf(
|
|
32
40
|
path:str, # Path to PDF file
|
|
33
41
|
key:str=None # Mistral API key
|
|
@@ -38,7 +46,7 @@ def upload_pdf(
|
|
|
38
46
|
uploaded = c.files.upload(file=dict(file_name=path.stem, content=path.read_bytes()), purpose="ocr")
|
|
39
47
|
return c.files.get_signed_url(file_id=uploaded.id).url, c
|
|
40
48
|
|
|
41
|
-
# %% ../nbs/00_core.ipynb
|
|
49
|
+
# %% ../nbs/00_core.ipynb 16
|
|
42
50
|
def create_batch_entry(
|
|
43
51
|
path:str, # Path to PDF file,
|
|
44
52
|
url:str, # Mistral signed URL
|
|
@@ -50,7 +58,7 @@ def create_batch_entry(
|
|
|
50
58
|
if not cid: cid = path.stem
|
|
51
59
|
return dict(custom_id=cid, body=dict(document=dict(type="document_url", document_url=url), include_image_base64=inc_img))
|
|
52
60
|
|
|
53
|
-
# %% ../nbs/00_core.ipynb
|
|
61
|
+
# %% ../nbs/00_core.ipynb 18
|
|
54
62
|
def prep_pdf_batch(
|
|
55
63
|
path:str, # Path to PDF file,
|
|
56
64
|
cid:str=None, # Custom ID (by default using the file name without extention)
|
|
@@ -61,7 +69,7 @@ def prep_pdf_batch(
|
|
|
61
69
|
url, c = upload_pdf(path, key)
|
|
62
70
|
return create_batch_entry(path, url, cid, inc_img), c
|
|
63
71
|
|
|
64
|
-
# %% ../nbs/00_core.ipynb
|
|
72
|
+
# %% ../nbs/00_core.ipynb 22
|
|
65
73
|
def submit_batch(
|
|
66
74
|
entries:list[dict], # List of batch entries,
|
|
67
75
|
c:Mistral=None, # Mistral client,
|
|
@@ -75,20 +83,35 @@ def submit_batch(
|
|
|
75
83
|
batch_data = c.files.upload(file=dict(file_name="batch.jsonl", content=open(f.name, "rb")), purpose="batch")
|
|
76
84
|
return c.batch.jobs.create(input_files=[batch_data.id], model=model, endpoint=endpoint)
|
|
77
85
|
|
|
78
|
-
# %% ../nbs/00_core.ipynb
|
|
86
|
+
# %% ../nbs/00_core.ipynb 25
|
|
87
|
+
def _check_timeout(
|
|
88
|
+
queued_time:int, # Time spent in QUEUED state (seconds)
|
|
89
|
+
timeout:int, # Maximum allowed QUEUED time (seconds)
|
|
90
|
+
job_id:str # Batch job ID
|
|
91
|
+
):
|
|
92
|
+
"Raise TimeoutError if job has been queued longer than timeout"
|
|
93
|
+
if queued_time >= timeout: raise TimeoutError(f"Job {job_id} stayed in QUEUED for {queued_time}s, exceeding timeout of {timeout}s. Check your balance or Mistral Status.")
|
|
94
|
+
|
|
95
|
+
# %% ../nbs/00_core.ipynb 26
|
|
79
96
|
def wait_for_job(
|
|
80
|
-
job:dict, #
|
|
81
|
-
c:Mistral=None, # Mistral client
|
|
82
|
-
poll_interval:int=1 #
|
|
83
|
-
|
|
97
|
+
job:dict, # Batch job from submit_batch
|
|
98
|
+
c:Mistral=None, # Mistral client
|
|
99
|
+
poll_interval:int=1, # Seconds between status checks
|
|
100
|
+
queued_timeout:int=300 # Max seconds in QUEUED before timeout
|
|
101
|
+
) -> dict: # Completed job dict
|
|
84
102
|
"Poll job until completion and return final job status"
|
|
103
|
+
logger.info(f"Waiting for batch job {job.id} (initial status: {job.status})")
|
|
104
|
+
queued_time = 0
|
|
85
105
|
while job.status in ["QUEUED", "RUNNING"]:
|
|
86
|
-
|
|
106
|
+
logger.debug(f"Job {job.id} status: {job.status} (elapsed: {queued_time}s)")
|
|
107
|
+
if job.status == "QUEUED": queued_time += poll_interval; _check_timeout(queued_time, queued_timeout, job.id)
|
|
87
108
|
time.sleep(poll_interval)
|
|
88
109
|
job = c.batch.jobs.get(job_id=job.id)
|
|
110
|
+
logger.info(f"Job {job.id} completed with status: {job.status}")
|
|
111
|
+
if job.status != "SUCCESS": logger.warning(f"Job {job.id} finished with non-success status: {job.status}")
|
|
89
112
|
return job
|
|
90
113
|
|
|
91
|
-
# %% ../nbs/00_core.ipynb
|
|
114
|
+
# %% ../nbs/00_core.ipynb 28
|
|
92
115
|
def download_results(
|
|
93
116
|
job:dict, # Job dict,
|
|
94
117
|
c:Mistral=None # Mistral client
|
|
@@ -97,7 +120,7 @@ def download_results(
|
|
|
97
120
|
content = c.files.download(file_id=job.output_file).read().decode('utf-8')
|
|
98
121
|
return [json.loads(line) for line in content.strip().split('\n') if line]
|
|
99
122
|
|
|
100
|
-
# %% ../nbs/00_core.ipynb
|
|
123
|
+
# %% ../nbs/00_core.ipynb 33
|
|
101
124
|
def save_images(
|
|
102
125
|
page:dict, # Page dict,
|
|
103
126
|
img_dir:str='img' # Directory to save images
|
|
@@ -108,7 +131,7 @@ def save_images(
|
|
|
108
131
|
img_bytes = base64.b64decode(img['image_base64'].split(',')[1])
|
|
109
132
|
Image.open(BytesIO(img_bytes)).save(img_dir / img['id'])
|
|
110
133
|
|
|
111
|
-
# %% ../nbs/00_core.ipynb
|
|
134
|
+
# %% ../nbs/00_core.ipynb 34
|
|
112
135
|
def save_page(
|
|
113
136
|
page:dict, # Page dict,
|
|
114
137
|
dst:str, # Directory to save page
|
|
@@ -120,7 +143,7 @@ def save_page(
|
|
|
120
143
|
img_dir.mkdir(exist_ok=True)
|
|
121
144
|
save_images(page, img_dir)
|
|
122
145
|
|
|
123
|
-
# %% ../nbs/00_core.ipynb
|
|
146
|
+
# %% ../nbs/00_core.ipynb 36
|
|
124
147
|
def save_pages(
|
|
125
148
|
ocr_resp:dict, # OCR response,
|
|
126
149
|
dst:str, # Directory to save pages,
|
|
@@ -133,7 +156,7 @@ def save_pages(
|
|
|
133
156
|
for page in ocr_resp['pages']: save_page(page, dst, img_dir)
|
|
134
157
|
return dst
|
|
135
158
|
|
|
136
|
-
# %% ../nbs/00_core.ipynb
|
|
159
|
+
# %% ../nbs/00_core.ipynb 42
|
|
137
160
|
def _get_paths(path:str) -> list[Path]:
|
|
138
161
|
"Get list of PDFs from file or folder"
|
|
139
162
|
path = Path(path)
|
|
@@ -144,7 +167,7 @@ def _get_paths(path:str) -> list[Path]:
|
|
|
144
167
|
return pdfs
|
|
145
168
|
raise ValueError(f"Path not found: {path}")
|
|
146
169
|
|
|
147
|
-
# %% ../nbs/00_core.ipynb
|
|
170
|
+
# %% ../nbs/00_core.ipynb 43
|
|
148
171
|
def _prep_batch(pdfs:list[Path], inc_img:bool=True, key:str=None) -> tuple[list[dict], Mistral]:
|
|
149
172
|
"Prepare batch entries for list of PDFs"
|
|
150
173
|
entries, c = [], None
|
|
@@ -153,7 +176,7 @@ def _prep_batch(pdfs:list[Path], inc_img:bool=True, key:str=None) -> tuple[list[
|
|
|
153
176
|
entries.append(entry)
|
|
154
177
|
return entries, c
|
|
155
178
|
|
|
156
|
-
# %% ../nbs/00_core.ipynb
|
|
179
|
+
# %% ../nbs/00_core.ipynb 44
|
|
157
180
|
def _run_batch(entries:list[dict], c:Mistral, poll_interval:int=2) -> list[dict]:
|
|
158
181
|
"Submit batch, wait for completion, and download results"
|
|
159
182
|
job = submit_batch(entries, c)
|
|
@@ -161,7 +184,7 @@ def _run_batch(entries:list[dict], c:Mistral, poll_interval:int=2) -> list[dict]
|
|
|
161
184
|
if job.status != 'SUCCESS': raise Exception(f"Job failed with status: {job.status}")
|
|
162
185
|
return download_results(job, c)
|
|
163
186
|
|
|
164
|
-
# %% ../nbs/00_core.ipynb
|
|
187
|
+
# %% ../nbs/00_core.ipynb 45
|
|
165
188
|
def ocr_pdf(
|
|
166
189
|
path:str, # Path to PDF file or folder,
|
|
167
190
|
dst:str='md', # Directory to save markdown pages,
|
|
@@ -175,7 +198,7 @@ def ocr_pdf(
|
|
|
175
198
|
results = _run_batch(entries, c, poll_interval)
|
|
176
199
|
return L([save_pages(r['response']['body'], dst, r['custom_id']) for r in results])
|
|
177
200
|
|
|
178
|
-
# %% ../nbs/00_core.ipynb
|
|
201
|
+
# %% ../nbs/00_core.ipynb 52
|
|
179
202
|
def read_pgs(
|
|
180
203
|
path:str, # OCR output directory,
|
|
181
204
|
join:bool=True # Join pages into single string
|
|
@@ -185,3 +208,24 @@ def read_pgs(
|
|
|
185
208
|
pgs = sorted(path.glob('page_*.md'), key=lambda p: int(p.stem.split('_')[1]))
|
|
186
209
|
contents = L([p.read_text() for p in pgs])
|
|
187
210
|
return '\n\n'.join(contents) if join else contents
|
|
211
|
+
|
|
212
|
+
# %% ../nbs/00_core.ipynb 59
|
|
213
|
+
def subset_pdf(
|
|
214
|
+
path:str, # Path to PDF file
|
|
215
|
+
start:int=1, # Start page (1-based)
|
|
216
|
+
end:int=None, # End page (1-based, inclusive)
|
|
217
|
+
dst:str='.' # Output directory
|
|
218
|
+
) -> Path: # Path to subset PDF
|
|
219
|
+
"Extract page range from PDF and save with range suffix"
|
|
220
|
+
path = Path(path)
|
|
221
|
+
writer = PyPDF2.PdfWriter()
|
|
222
|
+
with open(path, 'rb') as f:
|
|
223
|
+
reader = PyPDF2.PdfReader(f)
|
|
224
|
+
n = len(reader.pages)
|
|
225
|
+
end = end or n
|
|
226
|
+
s, e = max(0, start-1), min(n, end) - 1
|
|
227
|
+
for i in range(s, e+1): writer.add_page(reader.pages[i])
|
|
228
|
+
suffix = f"_p{s+1}-{e+1}" if s>0 or e<n-1 else ""
|
|
229
|
+
out = Path(dst) / f"{path.stem}{suffix}.pdf"
|
|
230
|
+
with open(out, 'wb') as f: writer.write(f)
|
|
231
|
+
return out
|
mistocr/pipeline.py
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/02_pipeline.ipynb.
|
|
4
4
|
|
|
5
5
|
# %% auto 0
|
|
6
|
-
__all__ = ['pdf_to_md']
|
|
6
|
+
__all__ = ['logger', 'pdf_to_md']
|
|
7
7
|
|
|
8
8
|
# %% ../nbs/02_pipeline.ipynb 3
|
|
9
9
|
from fastcore.all import *
|
|
@@ -11,27 +11,38 @@ from .core import read_pgs, ocr_pdf
|
|
|
11
11
|
from .refine import add_img_descs, fix_hdgs
|
|
12
12
|
from pathlib import Path
|
|
13
13
|
from asyncio import Semaphore, gather, sleep
|
|
14
|
+
import tempfile
|
|
14
15
|
import os, json, shutil
|
|
16
|
+
import logging
|
|
15
17
|
|
|
16
18
|
# %% ../nbs/02_pipeline.ipynb 4
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
logging.basicConfig(level=logging.WARNING, format='%(name)s - %(levelname)s - %(message)s')
|
|
21
|
+
logger.setLevel(logging.INFO)
|
|
22
|
+
|
|
23
|
+
# %% ../nbs/02_pipeline.ipynb 5
|
|
17
24
|
@delegates(add_img_descs)
|
|
18
25
|
async def pdf_to_md(
|
|
19
|
-
pdf_path:str,
|
|
20
|
-
dst:str,
|
|
21
|
-
|
|
22
|
-
model:str='claude-sonnet-4-5',
|
|
23
|
-
add_img_desc:bool=True,
|
|
24
|
-
progress:bool=True,
|
|
25
|
-
**kwargs
|
|
26
|
+
pdf_path:str, # Path to input PDF file
|
|
27
|
+
dst:str, # Destination directory for output markdown
|
|
28
|
+
ocr_dst:str=None, # Optional OCR output directory
|
|
29
|
+
model:str='claude-sonnet-4-5', # Model to use for heading fixes and image descriptions
|
|
30
|
+
add_img_desc:bool=True, # Whether to add image descriptions
|
|
31
|
+
progress:bool=True, # Whether to show progress messages
|
|
32
|
+
**kwargs
|
|
33
|
+
):
|
|
34
|
+
"Convert PDF to markdown with OCR, fixed heading hierarchy, and optional image descriptions"
|
|
26
35
|
"Convert PDF to markdown with OCR, fixed heading hierarchy, and optional image descriptions"
|
|
36
|
+
cleanup = ocr_dst is None
|
|
37
|
+
if cleanup: ocr_dst = tempfile.mkdtemp()
|
|
27
38
|
n_steps = 3 if add_img_desc else 2
|
|
28
|
-
if progress:
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
if progress: print(f"Step 2/{n_steps}: Fixing heading hierarchy...")
|
|
39
|
+
if progress: logger.info(f"Step 1/{n_steps}: Running OCR on {pdf_path}...")
|
|
40
|
+
ocr_dir = ocr_pdf(pdf_path, ocr_dst)[0]
|
|
41
|
+
if progress: logger.info(f"Step 2/{n_steps}: Fixing heading hierarchy...")
|
|
32
42
|
fix_hdgs(ocr_dir, model=model)
|
|
33
43
|
if add_img_desc:
|
|
34
|
-
if progress:
|
|
44
|
+
if progress: logger.info(f"Step 3/{n_steps}: Adding image descriptions...")
|
|
35
45
|
await add_img_descs(ocr_dir, dst=dst, model=model, progress=progress, **kwargs)
|
|
36
|
-
elif dst
|
|
37
|
-
if
|
|
46
|
+
elif dst != str(ocr_dir): shutil.copytree(ocr_dir, dst, dirs_exist_ok=True)
|
|
47
|
+
if cleanup: shutil.rmtree(ocr_dst)
|
|
48
|
+
if progress: logger.info("Done!")
|
mistocr/refine.py
CHANGED
|
@@ -4,9 +4,9 @@
|
|
|
4
4
|
|
|
5
5
|
# %% auto 0
|
|
6
6
|
__all__ = ['prompt_fix_hdgs', 'describe_img_prompt', 'get_hdgs', 'add_pg_hdgs', 'read_pgs_pg', 'fmt_hdgs_idx',
|
|
7
|
-
'HeadingCorrection', '
|
|
8
|
-
'
|
|
9
|
-
'add_img_descs']
|
|
7
|
+
'HeadingCorrection', 'HeadingCorrections', 'fix_hdg_hierarchy', 'mk_fixes_lut', 'apply_hdg_fixes',
|
|
8
|
+
'fix_hdgs', 'ImgDescription', 'describe_img', 'limit', 'parse_r', 'describe_imgs', 'save_img_descs',
|
|
9
|
+
'add_descs_to_pg', 'add_descs_to_pgs', 'add_img_descs']
|
|
10
10
|
|
|
11
11
|
# %% ../nbs/01_refine.ipynb 3
|
|
12
12
|
from fastcore.all import *
|
|
@@ -64,6 +64,11 @@ class HeadingCorrection(BaseModel):
|
|
|
64
64
|
index: int
|
|
65
65
|
corrected: str
|
|
66
66
|
|
|
67
|
+
# %% ../nbs/01_refine.ipynb 19
|
|
68
|
+
class HeadingCorrections(BaseModel):
|
|
69
|
+
"Collection of heading corrections returned by the LLM"
|
|
70
|
+
corrections: list[HeadingCorrection]
|
|
71
|
+
|
|
67
72
|
# %% ../nbs/01_refine.ipynb 21
|
|
68
73
|
prompt_fix_hdgs = """Fix markdown heading hierarchy errors while preserving the document's intended structure.
|
|
69
74
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mistocr
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.0
|
|
4
4
|
Summary: Batch OCR for PDFs with heading restoration and visual content integration
|
|
5
5
|
Home-page: https://github.com/franckalbinet/mistocr
|
|
6
6
|
Author: Solveit
|
|
@@ -23,6 +23,7 @@ Requires-Dist: mistralai
|
|
|
23
23
|
Requires-Dist: pillow
|
|
24
24
|
Requires-Dist: dotenv
|
|
25
25
|
Requires-Dist: lisette
|
|
26
|
+
Requires-Dist: PyPDF2
|
|
26
27
|
Provides-Extra: dev
|
|
27
28
|
Dynamic: author
|
|
28
29
|
Dynamic: author-email
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
mistocr/__init__.py,sha256=42STGor_9nKYXumfeV5tiyD_M8VdcddX7CEexmibPBk,22
|
|
2
|
+
mistocr/_modidx.py,sha256=LyFez7ndKOXQpF0unhg1imCrW7xcUwO106W82ttVnnk,4366
|
|
3
|
+
mistocr/core.py,sha256=-yXqEro_kTE66lXWBrewS73SRTl-Btt9uyKNxMnzjIw,9181
|
|
4
|
+
mistocr/pipeline.py,sha256=n8AHIHGZBXYy_k7LtEBZSiTL6HClT84-7K4QlTjOLvo,2107
|
|
5
|
+
mistocr/refine.py,sha256=hQg4ZYRTkPFEiCNNBU2ykgXFfVLCNAp2IuwOwHBVQ2k,12983
|
|
6
|
+
mistocr-0.4.0.dist-info/licenses/LICENSE,sha256=xV8xoN4VOL0uw9X8RSs2IMuD_Ss_a9yAbtGNeBWZwnw,11337
|
|
7
|
+
mistocr-0.4.0.dist-info/METADATA,sha256=c0LUM6UrwIIoeug8fA8H4dYvutdieBFLQ52Sho4uGgY,8438
|
|
8
|
+
mistocr-0.4.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
9
|
+
mistocr-0.4.0.dist-info/entry_points.txt,sha256=HjAiHozobM-alm_6bTF-ehRr2DD3KYE9PgRngelONOY,36
|
|
10
|
+
mistocr-0.4.0.dist-info/top_level.txt,sha256=LelTYnSpSXLy1Hb1m2YP3gt8luwP-I8KV0NjP_ucdSs,8
|
|
11
|
+
mistocr-0.4.0.dist-info/RECORD,,
|
mistocr-0.2.10.dist-info/RECORD
DELETED
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
mistocr/__init__.py,sha256=waXgc7p-jgGCsUjdVfO_KjlVZblnCvrzf4A0dsBj_lg,23
|
|
2
|
-
mistocr/_modidx.py,sha256=WTS9JpZdbrp2LghjhOV-CK0JYChHE4PzttgKfh7pTy4,4028
|
|
3
|
-
mistocr/core.py,sha256=KH5ND6ZMmETj9m9A-Ivtw5N2L-qm9vGwYgvHAzg-BsA,7218
|
|
4
|
-
mistocr/pipeline.py,sha256=k5rtr9ccxTgwQAzw533iMIL9qwiCofugWv0fhotLzaI,1639
|
|
5
|
-
mistocr/refine.py,sha256=H_IAF02k6CwBQdDJm9txknzUcTlz245zXitaHELX-P4,12791
|
|
6
|
-
mistocr-0.2.10.dist-info/licenses/LICENSE,sha256=xV8xoN4VOL0uw9X8RSs2IMuD_Ss_a9yAbtGNeBWZwnw,11337
|
|
7
|
-
mistocr-0.2.10.dist-info/METADATA,sha256=mkMu_9nYAXZ5jFdJd01AZqK3t93_Rt0xkkD0rRnl9Ew,8417
|
|
8
|
-
mistocr-0.2.10.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
9
|
-
mistocr-0.2.10.dist-info/entry_points.txt,sha256=HjAiHozobM-alm_6bTF-ehRr2DD3KYE9PgRngelONOY,36
|
|
10
|
-
mistocr-0.2.10.dist-info/top_level.txt,sha256=LelTYnSpSXLy1Hb1m2YP3gt8luwP-I8KV0NjP_ucdSs,8
|
|
11
|
-
mistocr-0.2.10.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|