deepresearch-flow 0.4.0__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepresearch_flow/paper/web/constants.py +4 -4
- deepresearch_flow/paper/web/templates/detail.html +3 -3
- deepresearch_flow/paper/web/templates/index.html +3 -3
- deepresearch_flow/recognize/cli.py +805 -26
- deepresearch_flow/recognize/katex_check.js +29 -0
- deepresearch_flow/recognize/math.py +719 -0
- deepresearch_flow/recognize/mermaid.py +690 -0
- {deepresearch_flow-0.4.0.dist-info → deepresearch_flow-0.4.1.dist-info}/METADATA +56 -3
- {deepresearch_flow-0.4.0.dist-info → deepresearch_flow-0.4.1.dist-info}/RECORD +13 -10
- {deepresearch_flow-0.4.0.dist-info → deepresearch_flow-0.4.1.dist-info}/WHEEL +0 -0
- {deepresearch_flow-0.4.0.dist-info → deepresearch_flow-0.4.1.dist-info}/entry_points.txt +0 -0
- {deepresearch_flow-0.4.0.dist-info → deepresearch_flow-0.4.1.dist-info}/licenses/LICENSE +0 -0
- {deepresearch_flow-0.4.0.dist-info → deepresearch_flow-0.4.1.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: deepresearch-flow
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.1
|
|
4
4
|
Summary: Workflow tools for paper extraction, review, and research automation.
|
|
5
5
|
Author-email: DengQi <dengqi935@gmail.com>
|
|
6
6
|
License: MIT License
|
|
@@ -51,9 +51,10 @@ Requires-Dist: jsonschema>=4.21.1
|
|
|
51
51
|
Requires-Dist: markdown-it-py>=3.0.0
|
|
52
52
|
Requires-Dist: mdit-py-plugins>=0.4.0
|
|
53
53
|
Requires-Dist: pypdf>=3.0.0
|
|
54
|
+
Requires-Dist: pylatexenc>=2.10
|
|
54
55
|
Requires-Dist: pybtex>=0.24.0
|
|
55
56
|
Requires-Dist: rich>=13.7.1
|
|
56
|
-
Requires-Dist: rumdl>=0.0.
|
|
57
|
+
Requires-Dist: rumdl>=0.0.218
|
|
57
58
|
Requires-Dist: starlette>=0.37.2
|
|
58
59
|
Requires-Dist: tqdm>=4.66.4
|
|
59
60
|
Requires-Dist: uvicorn>=0.27.1
|
|
@@ -172,7 +173,36 @@ uv run deepresearch-flow translator translate \
|
|
|
172
173
|
--fix-level moderate
|
|
173
174
|
```
|
|
174
175
|
|
|
175
|
-
#### Step 3:
|
|
176
|
+
#### Step 3: Repair OCR Outputs (Recommended)
|
|
177
|
+
|
|
178
|
+
Recommended sequence to stabilize markdown before serving:
|
|
179
|
+
|
|
180
|
+
```bash
|
|
181
|
+
# 1) Fix OCR markdown (auto-detects JSON if inputs are .json)
|
|
182
|
+
uv run deepresearch-flow recognize fix \
|
|
183
|
+
--input ./docs \
|
|
184
|
+
--in-place
|
|
185
|
+
|
|
186
|
+
# 2) Fix LaTeX formulas
|
|
187
|
+
uv run deepresearch-flow recognize fix-math \
|
|
188
|
+
--input ./docs \
|
|
189
|
+
--model openai/gpt-4o-mini \
|
|
190
|
+
--in-place
|
|
191
|
+
|
|
192
|
+
# 3) Fix Mermaid diagrams
|
|
193
|
+
uv run deepresearch-flow recognize fix-mermaid \
|
|
194
|
+
--input ./paper_outputs \
|
|
195
|
+
--json \
|
|
196
|
+
--model openai/gpt-4o-mini \
|
|
197
|
+
--in-place
|
|
198
|
+
|
|
199
|
+
# 4) Fix again to normalize formatting
|
|
200
|
+
uv run deepresearch-flow recognize fix \
|
|
201
|
+
--input ./docs \
|
|
202
|
+
--in-place
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
#### Step 4: Serve Your Database
|
|
176
206
|
|
|
177
207
|
Launch a local UI to read and manage your papers.
|
|
178
208
|
|
|
@@ -275,6 +305,10 @@ Tools to clean up raw outputs from OCR engines like MinerU.
|
|
|
275
305
|
- Unpack Images: extract Base64 images back to files.
|
|
276
306
|
- Organize: flatten nested OCR output directories.
|
|
277
307
|
- Fix: apply OCR fixes and rumdl formatting during organize, or as a standalone step.
|
|
308
|
+
- Fix JSON: apply the same fixes to markdown fields inside paper JSON outputs.
|
|
309
|
+
- Fix Math: validate and repair LaTeX formulas with optional LLM assistance.
|
|
310
|
+
- Fix Mermaid: validate and repair Mermaid diagrams (requires `mmdc` from mermaid-cli).
|
|
311
|
+
- Recommended order: `fix` -> `fix-math` -> `fix-mermaid` -> `fix`.
|
|
278
312
|
|
|
279
313
|
```bash
|
|
280
314
|
uv run deepresearch-flow recognize md embed --input ./raw_ocr --output ./clean_md
|
|
@@ -296,6 +330,25 @@ uv run deepresearch-flow recognize fix \
|
|
|
296
330
|
uv run deepresearch-flow recognize fix \
|
|
297
331
|
--input ./ocr_md \
|
|
298
332
|
--in-place
|
|
333
|
+
|
|
334
|
+
# Fix JSON outputs in place
|
|
335
|
+
uv run deepresearch-flow recognize fix \
|
|
336
|
+
--json \
|
|
337
|
+
--input ./paper_outputs \
|
|
338
|
+
--in-place
|
|
339
|
+
|
|
340
|
+
# Fix LaTeX formulas in markdown
|
|
341
|
+
uv run deepresearch-flow recognize fix-math \
|
|
342
|
+
--input ./docs \
|
|
343
|
+
--model openai/gpt-4o-mini \
|
|
344
|
+
--in-place
|
|
345
|
+
|
|
346
|
+
# Fix Mermaid diagrams in JSON outputs
|
|
347
|
+
uv run deepresearch-flow recognize fix-mermaid \
|
|
348
|
+
--json \
|
|
349
|
+
--input ./paper_outputs \
|
|
350
|
+
--model openai/gpt-4o-mini \
|
|
351
|
+
--in-place
|
|
299
352
|
```
|
|
300
353
|
|
|
301
354
|
</details>
|
|
@@ -42,7 +42,7 @@ deepresearch_flow/paper/templates/eight_questions.md.j2,sha256=Ecz4CD3nd7jZ4Dg8h
|
|
|
42
42
|
deepresearch_flow/paper/templates/three_pass.md.j2,sha256=ZRj-NkpZePnqp0gSE8OT1dN5Lr5RW4vdOYdeVejYJW0,1576
|
|
43
43
|
deepresearch_flow/paper/web/__init__.py,sha256=eQBtBjvOYsNEdivHTI0aO286SCG2c86xI02tf-0jz5I,39
|
|
44
44
|
deepresearch_flow/paper/web/app.py,sha256=rXnQjffyzH5b64oCwv6ucihU_y5zaFbpzdEB5PRUvHc,3063
|
|
45
|
-
deepresearch_flow/paper/web/constants.py,sha256=
|
|
45
|
+
deepresearch_flow/paper/web/constants.py,sha256=HuuE_oZKckmisD3F_1RAqWzO7bnhNmMLyM8FqyM5Yfk,1085
|
|
46
46
|
deepresearch_flow/paper/web/filters.py,sha256=OVMB4GfigP9GPD5dXytHyeLYtnVXEK-QjYfA_k7QbaA,8315
|
|
47
47
|
deepresearch_flow/paper/web/markdown.py,sha256=QHrxUYKB-uAZjG5jVGmkQ6EIT2dSxQNzlibgjGIIKuA,18888
|
|
48
48
|
deepresearch_flow/paper/web/query.py,sha256=vTegfm5zGVkYCd6_K3yNrXJEmKMccUUFKG9DePPcKMw,1938
|
|
@@ -427,12 +427,15 @@ deepresearch_flow/paper/web/static/js/index.js,sha256=bbQz8QAewmu3TT8ImAzUqNtTWQ
|
|
|
427
427
|
deepresearch_flow/paper/web/static/js/outline.js,sha256=e9ydLcBqaTXOYULXt-1OKgKIzrZcZaH1RebPXWBbLvE,1882
|
|
428
428
|
deepresearch_flow/paper/web/static/js/stats.js,sha256=USGIAx9cPQTMeyFwYu_bTYPJM7OoiqimhCYuAjoP0-s,1420
|
|
429
429
|
deepresearch_flow/paper/web/templates/base.html,sha256=4gWJLvjOuDSnBYRpJqxhGKmKC6UuOl19q_Q_cOjhL-g,1806
|
|
430
|
-
deepresearch_flow/paper/web/templates/detail.html,sha256=
|
|
431
|
-
deepresearch_flow/paper/web/templates/index.html,sha256=
|
|
430
|
+
deepresearch_flow/paper/web/templates/detail.html,sha256=VC5VbsaAONajZG8_WFSuURCViRXLdi4gH_wDAMt3EVI,16332
|
|
431
|
+
deepresearch_flow/paper/web/templates/index.html,sha256=qNWwyQWa3QzmHdJbohSe5PJOZS3-KxWjk0RxoQSZiys,6117
|
|
432
432
|
deepresearch_flow/paper/web/templates/stats.html,sha256=bcQBawoZ9KoRkM0NNo9WJBVeN_8O1WU2xNiye-Fugyo,671
|
|
433
433
|
deepresearch_flow/recognize/__init__.py,sha256=yMAqbdCzpdRSiwFhq9j7yx9ZWxqz_Zq3vfYlTLFCWek,33
|
|
434
|
-
deepresearch_flow/recognize/cli.py,sha256=
|
|
434
|
+
deepresearch_flow/recognize/cli.py,sha256=QV0d9XhOdcWcr05427GPSSMheal06WvvmejV7wLVfz8,53460
|
|
435
|
+
deepresearch_flow/recognize/katex_check.js,sha256=jKFLk0Y7y_XR0fBJe2xdfQhAMMuYRXo-pSpWqcEyAH0,735
|
|
435
436
|
deepresearch_flow/recognize/markdown.py,sha256=y-PMJbGqrfWCNBVGanXK1M4OuMP9e1eqh7HDYye5a7Q,8757
|
|
437
|
+
deepresearch_flow/recognize/math.py,sha256=qgI4WRsoWgLaue9OxIq1pcO18wUOlpCNBLKQgicN2hs,22623
|
|
438
|
+
deepresearch_flow/recognize/mermaid.py,sha256=O8uQoEC9mG4mSdTpr-OnmP_vrThaFdUeqt6U00m6O-0,22545
|
|
436
439
|
deepresearch_flow/recognize/organize.py,sha256=-KVzuwNjiT2bLwqwLwcguEMQYxnGiZXjLNlov_oXSTo,5237
|
|
437
440
|
deepresearch_flow/translator/__init__.py,sha256=iaAkufvEELVKNbcs08Nh7bkTO4JlkT3rT_JIBP9jGfc,26
|
|
438
441
|
deepresearch_flow/translator/cli.py,sha256=BceOZhQuN9s5kqhpvLJuwpbB5J0MY1ucWUKw0jXWUPc,16872
|
|
@@ -443,9 +446,9 @@ deepresearch_flow/translator/placeholder.py,sha256=mEgqA-dPdOsIhno0h_hzfpXpY2asb
|
|
|
443
446
|
deepresearch_flow/translator/prompts.py,sha256=kl_9O2YvmtXC1w6WLnsLuVZKz4mcOtUF887SiTaOvc0,4754
|
|
444
447
|
deepresearch_flow/translator/protector.py,sha256=sXwNJ1Y8tyPm7dgm8-7S8HkcPe23TGsBdwRxH6mKL70,11291
|
|
445
448
|
deepresearch_flow/translator/segment.py,sha256=rBFMCLTrvm2GrPc_hNFymi-8Ih2DAtUQlZHCRE9nLaM,5146
|
|
446
|
-
deepresearch_flow-0.4.
|
|
447
|
-
deepresearch_flow-0.4.
|
|
448
|
-
deepresearch_flow-0.4.
|
|
449
|
-
deepresearch_flow-0.4.
|
|
450
|
-
deepresearch_flow-0.4.
|
|
451
|
-
deepresearch_flow-0.4.
|
|
449
|
+
deepresearch_flow-0.4.1.dist-info/licenses/LICENSE,sha256=hT8F2Py1pe6flxq3Ufdm2UKFk0B8CBm0aAQfsLXfvjw,1063
|
|
450
|
+
deepresearch_flow-0.4.1.dist-info/METADATA,sha256=bfOksObo91hopsY_NbQNce_FjC8MEW8kkYUjkQQi9Xo,12918
|
|
451
|
+
deepresearch_flow-0.4.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
452
|
+
deepresearch_flow-0.4.1.dist-info/entry_points.txt,sha256=1uIKscs0YRMg_mFsg9NjsaTt4CvQqQ_-zGERUKhhL_Y,65
|
|
453
|
+
deepresearch_flow-0.4.1.dist-info/top_level.txt,sha256=qBl4RvPJNJUbL8CFfMNWxY0HpQLx5RlF_ko-z_aKpm0,18
|
|
454
|
+
deepresearch_flow-0.4.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|