mistocr 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mistocr/__init__.py +1 -1
- mistocr/refine.py +5 -4
- {mistocr-0.1.0.dist-info → mistocr-0.1.1.dist-info}/METADATA +1 -1
- mistocr-0.1.1.dist-info/RECORD +10 -0
- mistocr-0.1.0.dist-info/RECORD +0 -10
- {mistocr-0.1.0.dist-info → mistocr-0.1.1.dist-info}/WHEEL +0 -0
- {mistocr-0.1.0.dist-info → mistocr-0.1.1.dist-info}/entry_points.txt +0 -0
- {mistocr-0.1.0.dist-info → mistocr-0.1.1.dist-info}/licenses/LICENSE +0 -0
- {mistocr-0.1.0.dist-info → mistocr-0.1.1.dist-info}/top_level.txt +0 -0
mistocr/__init__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.1.
|
|
1
|
+
__version__ = "0.1.1"
|
mistocr/refine.py
CHANGED
|
@@ -63,9 +63,10 @@ Headings to analyze:
|
|
|
63
63
|
{headings_list}
|
|
64
64
|
"""
|
|
65
65
|
|
|
66
|
-
# %% ../nbs/01_refine.ipynb
|
|
66
|
+
# %% ../nbs/01_refine.ipynb 18
|
|
67
67
|
def fix_hdg_hierarchy(
|
|
68
68
|
hdgs: list[str], # List of markdown headings
|
|
69
|
+
prompt: str=prompt_fix_hdgs, # Prompt to use
|
|
69
70
|
model: str='claude-sonnet-4-5', # Model to use
|
|
70
71
|
api_key: str=os.getenv('ANTHROPIC_API_KEY') # API key
|
|
71
72
|
) -> dict[int, str]: # Dictionary of index → corrected heading
|
|
@@ -78,7 +79,7 @@ def fix_hdg_hierarchy(
|
|
|
78
79
|
)
|
|
79
80
|
return json.loads(r.choices[0].message.content)['corrections']
|
|
80
81
|
|
|
81
|
-
# %% ../nbs/01_refine.ipynb
|
|
82
|
+
# %% ../nbs/01_refine.ipynb 21
|
|
82
83
|
def mk_fixes_lut(
|
|
83
84
|
hdgs: list[str], # List of markdown headings
|
|
84
85
|
model: str='claude-sonnet-4-5', # Model to use
|
|
@@ -88,7 +89,7 @@ def mk_fixes_lut(
|
|
|
88
89
|
fixes = fix_hdg_hierarchy(hdgs, model, api_key)
|
|
89
90
|
return {hdgs[int(k)]:v for k,v in fixes.items()}
|
|
90
91
|
|
|
91
|
-
# %% ../nbs/01_refine.ipynb
|
|
92
|
+
# %% ../nbs/01_refine.ipynb 24
|
|
92
93
|
def apply_hdg_fixes(
|
|
93
94
|
p:str, # Page to fix
|
|
94
95
|
lut_fixes: dict[str, str], # Lookup table of fixes
|
|
@@ -98,7 +99,7 @@ def apply_hdg_fixes(
|
|
|
98
99
|
for old in get_hdgs(p): p = p.replace(old, lut_fixes.get(old, old) + (f' .... page {pg}' if pg else ''))
|
|
99
100
|
return p
|
|
100
101
|
|
|
101
|
-
# %% ../nbs/01_refine.ipynb
|
|
102
|
+
# %% ../nbs/01_refine.ipynb 27
|
|
102
103
|
def fix_md_hdgs(
|
|
103
104
|
src:str, # Source directory with markdown pages
|
|
104
105
|
model:str='claude-sonnet-4-5', # Model
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
mistocr/__init__.py,sha256=rnObPjuBcEStqSO0S6gsdS_ot8ITOQjVj_-P1LUUYpg,22
|
|
2
|
+
mistocr/_modidx.py,sha256=R9zVMv4dKz2sLStoB5wBoKRqjza216_z8xPXszoplU4,2660
|
|
3
|
+
mistocr/core.py,sha256=wtaYZ_Fz0dXIb1cYLAtymwR9Z7_KBI4ULy-UpM3YTX4,7154
|
|
4
|
+
mistocr/refine.py,sha256=0N0omMZvPqydArPaiTdkX8tts4eS4AgfAJQP98WnvwY,4307
|
|
5
|
+
mistocr-0.1.1.dist-info/licenses/LICENSE,sha256=xV8xoN4VOL0uw9X8RSs2IMuD_Ss_a9yAbtGNeBWZwnw,11337
|
|
6
|
+
mistocr-0.1.1.dist-info/METADATA,sha256=ZPvywkk_QS7UTfPtup5IppiLzf5aZ1tCSPkZLV9g4WE,4848
|
|
7
|
+
mistocr-0.1.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
8
|
+
mistocr-0.1.1.dist-info/entry_points.txt,sha256=HjAiHozobM-alm_6bTF-ehRr2DD3KYE9PgRngelONOY,36
|
|
9
|
+
mistocr-0.1.1.dist-info/top_level.txt,sha256=LelTYnSpSXLy1Hb1m2YP3gt8luwP-I8KV0NjP_ucdSs,8
|
|
10
|
+
mistocr-0.1.1.dist-info/RECORD,,
|
mistocr-0.1.0.dist-info/RECORD
DELETED
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
mistocr/__init__.py,sha256=kUR5RAFc7HCeiqdlX36dZOHkUI5wI6V_43RpEcD8b-0,22
|
|
2
|
-
mistocr/_modidx.py,sha256=R9zVMv4dKz2sLStoB5wBoKRqjza216_z8xPXszoplU4,2660
|
|
3
|
-
mistocr/core.py,sha256=wtaYZ_Fz0dXIb1cYLAtymwR9Z7_KBI4ULy-UpM3YTX4,7154
|
|
4
|
-
mistocr/refine.py,sha256=gWup79LGjmvKW5RyY1dRKUeAEt94mUJIeTZB3V4D-JE,4258
|
|
5
|
-
mistocr-0.1.0.dist-info/licenses/LICENSE,sha256=xV8xoN4VOL0uw9X8RSs2IMuD_Ss_a9yAbtGNeBWZwnw,11337
|
|
6
|
-
mistocr-0.1.0.dist-info/METADATA,sha256=JOyUQONpYUmmGk2kFzMkxaIBrHwjC9CfmI7fc9qa6ms,4848
|
|
7
|
-
mistocr-0.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
8
|
-
mistocr-0.1.0.dist-info/entry_points.txt,sha256=HjAiHozobM-alm_6bTF-ehRr2DD3KYE9PgRngelONOY,36
|
|
9
|
-
mistocr-0.1.0.dist-info/top_level.txt,sha256=LelTYnSpSXLy1Hb1m2YP3gt8luwP-I8KV0NjP_ucdSs,8
|
|
10
|
-
mistocr-0.1.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|