mistocr 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mistocr/__init__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.1.0"
1
+ __version__ = "0.1.1"
mistocr/refine.py CHANGED
@@ -63,9 +63,10 @@ Headings to analyze:
63
63
  {headings_list}
64
64
  """
65
65
 
66
- # %% ../nbs/01_refine.ipynb 16
66
+ # %% ../nbs/01_refine.ipynb 18
67
67
  def fix_hdg_hierarchy(
68
68
  hdgs: list[str], # List of markdown headings
69
+ prompt: str=prompt_fix_hdgs, # Prompt to use
69
70
  model: str='claude-sonnet-4-5', # Model to use
70
71
  api_key: str=os.getenv('ANTHROPIC_API_KEY') # API key
71
72
  ) -> dict[int, str]: # Dictionary of index → corrected heading
@@ -78,7 +79,7 @@ def fix_hdg_hierarchy(
78
79
  )
79
80
  return json.loads(r.choices[0].message.content)['corrections']
80
81
 
81
- # %% ../nbs/01_refine.ipynb 19
82
+ # %% ../nbs/01_refine.ipynb 21
82
83
  def mk_fixes_lut(
83
84
  hdgs: list[str], # List of markdown headings
84
85
  model: str='claude-sonnet-4-5', # Model to use
@@ -88,7 +89,7 @@ def mk_fixes_lut(
88
89
  fixes = fix_hdg_hierarchy(hdgs, model, api_key)
89
90
  return {hdgs[int(k)]:v for k,v in fixes.items()}
90
91
 
91
- # %% ../nbs/01_refine.ipynb 22
92
+ # %% ../nbs/01_refine.ipynb 24
92
93
  def apply_hdg_fixes(
93
94
  p:str, # Page to fix
94
95
  lut_fixes: dict[str, str], # Lookup table of fixes
@@ -98,7 +99,7 @@ def apply_hdg_fixes(
98
99
  for old in get_hdgs(p): p = p.replace(old, lut_fixes.get(old, old) + (f' .... page {pg}' if pg else ''))
99
100
  return p
100
101
 
101
- # %% ../nbs/01_refine.ipynb 25
102
+ # %% ../nbs/01_refine.ipynb 27
102
103
  def fix_md_hdgs(
103
104
  src:str, # Source directory with markdown pages
104
105
  model:str='claude-sonnet-4-5', # Model
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mistocr
3
- Version: 0.1.0
3
+ Version: 0.1.1
4
4
  Summary: Simple batch OCR for PDFs using Mistral's state-of-the-art vision model
5
5
  Home-page: https://github.com/franckalbinet/mistocr
6
6
  Author: Solveit
@@ -0,0 +1,10 @@
1
+ mistocr/__init__.py,sha256=rnObPjuBcEStqSO0S6gsdS_ot8ITOQjVj_-P1LUUYpg,22
2
+ mistocr/_modidx.py,sha256=R9zVMv4dKz2sLStoB5wBoKRqjza216_z8xPXszoplU4,2660
3
+ mistocr/core.py,sha256=wtaYZ_Fz0dXIb1cYLAtymwR9Z7_KBI4ULy-UpM3YTX4,7154
4
+ mistocr/refine.py,sha256=0N0omMZvPqydArPaiTdkX8tts4eS4AgfAJQP98WnvwY,4307
5
+ mistocr-0.1.1.dist-info/licenses/LICENSE,sha256=xV8xoN4VOL0uw9X8RSs2IMuD_Ss_a9yAbtGNeBWZwnw,11337
6
+ mistocr-0.1.1.dist-info/METADATA,sha256=ZPvywkk_QS7UTfPtup5IppiLzf5aZ1tCSPkZLV9g4WE,4848
7
+ mistocr-0.1.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
8
+ mistocr-0.1.1.dist-info/entry_points.txt,sha256=HjAiHozobM-alm_6bTF-ehRr2DD3KYE9PgRngelONOY,36
9
+ mistocr-0.1.1.dist-info/top_level.txt,sha256=LelTYnSpSXLy1Hb1m2YP3gt8luwP-I8KV0NjP_ucdSs,8
10
+ mistocr-0.1.1.dist-info/RECORD,,
@@ -1,10 +0,0 @@
1
- mistocr/__init__.py,sha256=kUR5RAFc7HCeiqdlX36dZOHkUI5wI6V_43RpEcD8b-0,22
2
- mistocr/_modidx.py,sha256=R9zVMv4dKz2sLStoB5wBoKRqjza216_z8xPXszoplU4,2660
3
- mistocr/core.py,sha256=wtaYZ_Fz0dXIb1cYLAtymwR9Z7_KBI4ULy-UpM3YTX4,7154
4
- mistocr/refine.py,sha256=gWup79LGjmvKW5RyY1dRKUeAEt94mUJIeTZB3V4D-JE,4258
5
- mistocr-0.1.0.dist-info/licenses/LICENSE,sha256=xV8xoN4VOL0uw9X8RSs2IMuD_Ss_a9yAbtGNeBWZwnw,11337
6
- mistocr-0.1.0.dist-info/METADATA,sha256=JOyUQONpYUmmGk2kFzMkxaIBrHwjC9CfmI7fc9qa6ms,4848
7
- mistocr-0.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
8
- mistocr-0.1.0.dist-info/entry_points.txt,sha256=HjAiHozobM-alm_6bTF-ehRr2DD3KYE9PgRngelONOY,36
9
- mistocr-0.1.0.dist-info/top_level.txt,sha256=LelTYnSpSXLy1Hb1m2YP3gt8luwP-I8KV0NjP_ucdSs,8
10
- mistocr-0.1.0.dist-info/RECORD,,