deepresearch-flow 0.5.1__py3-none-any.whl → 0.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. deepresearch_flow/paper/cli.py +63 -0
  2. deepresearch_flow/paper/config.py +87 -12
  3. deepresearch_flow/paper/db.py +1154 -35
  4. deepresearch_flow/paper/db_ops.py +124 -19
  5. deepresearch_flow/paper/extract.py +1546 -152
  6. deepresearch_flow/paper/prompt_templates/deep_read_phi_system.j2 +2 -0
  7. deepresearch_flow/paper/prompt_templates/deep_read_phi_user.j2 +5 -0
  8. deepresearch_flow/paper/prompt_templates/deep_read_system.j2 +2 -0
  9. deepresearch_flow/paper/prompt_templates/deep_read_user.j2 +272 -40
  10. deepresearch_flow/paper/prompt_templates/eight_questions_phi_system.j2 +1 -0
  11. deepresearch_flow/paper/prompt_templates/eight_questions_phi_user.j2 +2 -0
  12. deepresearch_flow/paper/prompt_templates/eight_questions_system.j2 +2 -0
  13. deepresearch_flow/paper/prompt_templates/eight_questions_user.j2 +4 -0
  14. deepresearch_flow/paper/prompt_templates/simple_phi_system.j2 +2 -0
  15. deepresearch_flow/paper/prompt_templates/simple_system.j2 +2 -0
  16. deepresearch_flow/paper/prompt_templates/simple_user.j2 +2 -0
  17. deepresearch_flow/paper/providers/azure_openai.py +45 -3
  18. deepresearch_flow/paper/providers/openai_compatible.py +45 -3
  19. deepresearch_flow/paper/schemas/deep_read_phi_schema.json +1 -0
  20. deepresearch_flow/paper/schemas/deep_read_schema.json +1 -0
  21. deepresearch_flow/paper/schemas/default_paper_schema.json +6 -0
  22. deepresearch_flow/paper/schemas/eight_questions_schema.json +1 -0
  23. deepresearch_flow/paper/snapshot/__init__.py +4 -0
  24. deepresearch_flow/paper/snapshot/api.py +941 -0
  25. deepresearch_flow/paper/snapshot/builder.py +965 -0
  26. deepresearch_flow/paper/snapshot/identity.py +239 -0
  27. deepresearch_flow/paper/snapshot/schema.py +245 -0
  28. deepresearch_flow/paper/snapshot/tests/__init__.py +2 -0
  29. deepresearch_flow/paper/snapshot/tests/test_identity.py +123 -0
  30. deepresearch_flow/paper/snapshot/text.py +154 -0
  31. deepresearch_flow/paper/template_registry.py +1 -0
  32. deepresearch_flow/paper/templates/deep_read.md.j2 +4 -0
  33. deepresearch_flow/paper/templates/deep_read_phi.md.j2 +4 -0
  34. deepresearch_flow/paper/templates/default_paper.md.j2 +4 -0
  35. deepresearch_flow/paper/templates/eight_questions.md.j2 +4 -0
  36. deepresearch_flow/paper/web/app.py +10 -3
  37. deepresearch_flow/recognize/cli.py +380 -103
  38. deepresearch_flow/recognize/markdown.py +31 -7
  39. deepresearch_flow/recognize/math.py +47 -12
  40. deepresearch_flow/recognize/mermaid.py +320 -10
  41. deepresearch_flow/recognize/organize.py +29 -7
  42. deepresearch_flow/translator/cli.py +71 -20
  43. deepresearch_flow/translator/engine.py +220 -81
  44. deepresearch_flow/translator/prompts.py +19 -2
  45. deepresearch_flow/translator/protector.py +15 -3
  46. deepresearch_flow-0.6.1.dist-info/METADATA +849 -0
  47. {deepresearch_flow-0.5.1.dist-info → deepresearch_flow-0.6.1.dist-info}/RECORD +51 -43
  48. {deepresearch_flow-0.5.1.dist-info → deepresearch_flow-0.6.1.dist-info}/WHEEL +1 -1
  49. deepresearch_flow-0.5.1.dist-info/METADATA +0 -440
  50. {deepresearch_flow-0.5.1.dist-info → deepresearch_flow-0.6.1.dist-info}/entry_points.txt +0 -0
  51. {deepresearch_flow-0.5.1.dist-info → deepresearch_flow-0.6.1.dist-info}/licenses/LICENSE +0 -0
  52. {deepresearch_flow-0.5.1.dist-info → deepresearch_flow-0.6.1.dist-info}/top_level.txt +0 -0
@@ -2,54 +2,62 @@ deepresearch_flow/__init__.py,sha256=rjP9ES4zJCfEN_MCDYAYPL1mNJZGjojdmbRwnZ9FlEk
2
2
  deepresearch_flow/__main__.py,sha256=Ceo0rMTOhHhwFPD-HyDDagenNsmWEzPmsdYLI7kwKVA,115
3
3
  deepresearch_flow/cli.py,sha256=t4oowCNWldL0DrVJ4d0UlRkuGU2qHej_G0mAc_quteQ,455
4
4
  deepresearch_flow/paper/__init__.py,sha256=sunaOkcgAJBrfmcaJTumcWbPGVUSGWvOv2a2Yidzy0A,43
5
- deepresearch_flow/paper/cli.py,sha256=4UY3KHi6BUGztL1vB4w0cCMiIAo9KNxrfQn1GBHt6fA,11153
6
- deepresearch_flow/paper/config.py,sha256=totVBGzouh0KS6mhRNPneXZYPuuw0SHiOGdO3r6HSfc,9289
7
- deepresearch_flow/paper/db.py,sha256=-9Wl-ivIu0N8bC6lFGxxBKKQewD8aay7Aq-ynfhEqJg,43002
8
- deepresearch_flow/paper/db_ops.py,sha256=Nwbou8y3jMNKMcvDSJfmi5NYQwEtoHk1E_7TbwDoYX8,70501
9
- deepresearch_flow/paper/extract.py,sha256=ID1dd2r6LTB0kRF4qBSH6bGtBGv0znw--g_mXYBcoeU,32314
5
+ deepresearch_flow/paper/cli.py,sha256=68d-yccScU0yL6d7eqZVdudPO6i_in8F4v-hKDWILMo,13647
6
+ deepresearch_flow/paper/config.py,sha256=V7z4ApPXCV1acSl2FU3nZGq6nt8uisMhm0GtOq5zzmg,12021
7
+ deepresearch_flow/paper/db.py,sha256=Bxhrd8NCaPZ9Ijtp1uiOplwh2Uy0n2Dyn1bO0d4A9bE,91780
8
+ deepresearch_flow/paper/db_ops.py,sha256=cb64jn2ax39i3tCS-0DYmlsJdGX3uBS2u5ncUIbUBic,73980
9
+ deepresearch_flow/paper/extract.py,sha256=78ASAyNLfCl1AsAk2o_v1vskZCNZuayaCHgr0S4V2Vs,87632
10
10
  deepresearch_flow/paper/llm.py,sha256=mHfs5IkT3Q6BOh46MDlfUmgVTX24WRf0IKKoOnN8nV8,4007
11
11
  deepresearch_flow/paper/prompts.py,sha256=mV7cEXw8pwukBUE4Trah0SjEPSSDgg5-RGaNaUdo4EU,519
12
12
  deepresearch_flow/paper/render.py,sha256=KeccrRGf1_sxoaiT6SUDkFRj9sStReoEwNvlw1ir7qw,2181
13
13
  deepresearch_flow/paper/schema.py,sha256=tQEVbj4R8NqNGBW6VYwW-xf5QJgV9qthrbZB-EmZTKA,1931
14
- deepresearch_flow/paper/template_registry.py,sha256=OSJOLAHFOtjsGR7sNXYstL8Z6VdY5g-KvBhNMgVUNXw,7387
14
+ deepresearch_flow/paper/template_registry.py,sha256=r0-uVIeeVcPKMD9TpIZ5kAi4CvMrJQoJTHBSEamA8Ko,7427
15
15
  deepresearch_flow/paper/utils.py,sha256=Lbr4OLiTtGJ8qX3AXOZHJdLVPVqdVLewClQKT_2XAQA,3788
16
16
  deepresearch_flow/paper/prompt_templates/__init__.py,sha256=b72Jin1J2Q7KS3635Te3YZm1ZXhuMIPzDVUH172Rphc,33
17
- deepresearch_flow/paper/prompt_templates/deep_read_phi_system.j2,sha256=WR7fp9nQYmXU5COwAfzly5noXEQEoFrh2Kx4YedvMww,364
18
- deepresearch_flow/paper/prompt_templates/deep_read_phi_user.j2,sha256=UAobEe25nIyr0f7jAfL3ca5qZzHHJHKE3O-NixwBibI,17581
19
- deepresearch_flow/paper/prompt_templates/deep_read_system.j2,sha256=WR7fp9nQYmXU5COwAfzly5noXEQEoFrh2Kx4YedvMww,364
20
- deepresearch_flow/paper/prompt_templates/deep_read_user.j2,sha256=VS_6ZkhN_h3-ur7tA4sosdPttjCg9ZwlhAn1LR4tsag,4068
21
- deepresearch_flow/paper/prompt_templates/eight_questions_phi_system.j2,sha256=WR7fp9nQYmXU5COwAfzly5noXEQEoFrh2Kx4YedvMww,364
22
- deepresearch_flow/paper/prompt_templates/eight_questions_phi_user.j2,sha256=9q6zSLZO2OhRNvOg0tCFFYH6avUyGAf3yW6wsnRyOvQ,7866
23
- deepresearch_flow/paper/prompt_templates/eight_questions_system.j2,sha256=WR7fp9nQYmXU5COwAfzly5noXEQEoFrh2Kx4YedvMww,364
24
- deepresearch_flow/paper/prompt_templates/eight_questions_user.j2,sha256=vyfMstnXAb5zLsxcqyYxTt5OWjpmPVoqaV-dIJshcrw,1339
25
- deepresearch_flow/paper/prompt_templates/simple_phi_system.j2,sha256=WR7fp9nQYmXU5COwAfzly5noXEQEoFrh2Kx4YedvMww,364
17
+ deepresearch_flow/paper/prompt_templates/deep_read_phi_system.j2,sha256=_KxtVHV54VElOPm9l6P9rJmN7Sd6-JL6SDiQDd2QQCM,528
18
+ deepresearch_flow/paper/prompt_templates/deep_read_phi_user.j2,sha256=kI8Rsp03Fy5XBkqh8qYtsTGa6elnUNKc_Epfx42puNk,17954
19
+ deepresearch_flow/paper/prompt_templates/deep_read_system.j2,sha256=_KxtVHV54VElOPm9l6P9rJmN7Sd6-JL6SDiQDd2QQCM,528
20
+ deepresearch_flow/paper/prompt_templates/deep_read_user.j2,sha256=q9PkrO32tepg7km8mqACGWFW2mGTaYHUlVsLfEVB87g,12785
21
+ deepresearch_flow/paper/prompt_templates/eight_questions_phi_system.j2,sha256=eZqgnNACpCNGxqZDo_JDq9m0VNc7p0qHe3fzmYzOv3s,466
22
+ deepresearch_flow/paper/prompt_templates/eight_questions_phi_user.j2,sha256=sujB0jNicF9tswFzIoo97KNteY9QLBuE7oeVbnak8PM,8110
23
+ deepresearch_flow/paper/prompt_templates/eight_questions_system.j2,sha256=_KxtVHV54VElOPm9l6P9rJmN7Sd6-JL6SDiQDd2QQCM,528
24
+ deepresearch_flow/paper/prompt_templates/eight_questions_user.j2,sha256=eXSN_Cyla29m2wp6loQH4g6RU6aDxqskK9ICDSyyDho,1634
25
+ deepresearch_flow/paper/prompt_templates/simple_phi_system.j2,sha256=_KxtVHV54VElOPm9l6P9rJmN7Sd6-JL6SDiQDd2QQCM,528
26
26
  deepresearch_flow/paper/prompt_templates/simple_phi_user.j2,sha256=LL_h0O1wcGLZQtuhTfUUhURjAoJwH8Vrmh3XzeB9bRM,2089
27
- deepresearch_flow/paper/prompt_templates/simple_system.j2,sha256=WR7fp9nQYmXU5COwAfzly5noXEQEoFrh2Kx4YedvMww,364
28
- deepresearch_flow/paper/prompt_templates/simple_user.j2,sha256=UX2FlsJkRdvbgPZmlCFzB7E2peGKpIjMGqkBfBQ4EHM,774
27
+ deepresearch_flow/paper/prompt_templates/simple_system.j2,sha256=_KxtVHV54VElOPm9l6P9rJmN7Sd6-JL6SDiQDd2QQCM,528
28
+ deepresearch_flow/paper/prompt_templates/simple_user.j2,sha256=f6XvUKwyi1BP6lW2XYDbEeIW4YsA2UmdrA0YdIZD750,840
29
29
  deepresearch_flow/paper/prompt_templates/three_pass_system.j2,sha256=WR7fp9nQYmXU5COwAfzly5noXEQEoFrh2Kx4YedvMww,364
30
30
  deepresearch_flow/paper/prompt_templates/three_pass_user.j2,sha256=R9OwcS_hmPlSRIv0-63YXqQBux_Uuv6iF7W4xy77nk8,1757
31
31
  deepresearch_flow/paper/providers/__init__.py,sha256=OiX-WYHweZ_FCeGS_siSEJndLXfWj2h8I0Am16P7KYQ,32
32
- deepresearch_flow/paper/providers/azure_openai.py,sha256=8xNaQ_5p8tXWzZ77g7rlwyknvrl6oxMgpzZVzq-4zEM,2206
32
+ deepresearch_flow/paper/providers/azure_openai.py,sha256=bejdKNAr3y5zQt33EbVao9ao3c842T-fWICVDlAy9Go,3530
33
33
  deepresearch_flow/paper/providers/base.py,sha256=WrfOVcpXTgxAGLvRogCCc3zXd5O5v4TYeTsSWOe9muM,537
34
34
  deepresearch_flow/paper/providers/claude.py,sha256=xFk2T2hSVb--sYBQbrvwykRSbEbEfYcoV9soRPpoTHU,2073
35
35
  deepresearch_flow/paper/providers/dashscope.py,sha256=UDg7bQY3EIj3wG2hbl36Bhr_uNjJcn_wPZZ6zSPHDY4,1732
36
36
  deepresearch_flow/paper/providers/gemini.py,sha256=x_JegccP4JK0pBj7slz7gVwX-6-ep8v_kl8NglqOUGg,3256
37
37
  deepresearch_flow/paper/providers/ollama.py,sha256=j3vZgP6gyxu9n5VF-4v8p3nOCbetzmnlaBjZmvuYKxE,1439
38
- deepresearch_flow/paper/providers/openai_compatible.py,sha256=eidpcOm5D24e0YU80ECUXcCnWgsMDv7gRXf3vnJlP4M,2077
38
+ deepresearch_flow/paper/providers/openai_compatible.py,sha256=AfNshL08WicAEdC7sO31hQMKqbpopunk0gu3PxOZhG4,3401
39
39
  deepresearch_flow/paper/schemas/__init__.py,sha256=NzRW1ugk01AZMkoDOrK-RF-fypYxYKOnyyKDzlHUQ40,29
40
- deepresearch_flow/paper/schemas/deep_read_phi_schema.json,sha256=_kPHH4RKzfTlYrDW7XgNhBECAAHXlvpNnnX9EP6Wqjk,823
41
- deepresearch_flow/paper/schemas/deep_read_schema.json,sha256=f9yhsisW_zSxRpjTn1D3-nsmtJMHmGJLLvRGPEmQX0s,1263
42
- deepresearch_flow/paper/schemas/default_paper_schema.json,sha256=Ncz-XSOJuB5jz1k7jAG7ha819rXPL4UGew06fJHkImM,827
43
- deepresearch_flow/paper/schemas/eight_questions_schema.json,sha256=3jYQeKvJWduuU_4En-M-s0gEJMOjdjYeu_ENlf8CDyw,931
40
+ deepresearch_flow/paper/schemas/deep_read_phi_schema.json,sha256=i6JA-kJg0BaTeOj_w0AMkS8Uj0gdB5_Y8EPWZb5hg-M,897
41
+ deepresearch_flow/paper/schemas/deep_read_schema.json,sha256=BqQE8T6y_gXt_on75x7MuYJOAcoeRQLw96JfRYvKNYk,1337
42
+ deepresearch_flow/paper/schemas/default_paper_schema.json,sha256=6h_2ayHolJj8JMnFQ6kYNC9diQr-LWuW-ineCAB5OIM,935
43
+ deepresearch_flow/paper/schemas/eight_questions_schema.json,sha256=VFKKpdZkgPdQkYIW5jyrZQ7c2TlQZwB4svVWfoiwxdg,1005
44
44
  deepresearch_flow/paper/schemas/three_pass_schema.json,sha256=8aNr4EdRiilxszIRBCC4hRNXrfIOcdnVW4Qhe6Fnh0o,689
45
+ deepresearch_flow/paper/snapshot/__init__.py,sha256=1VLO36xxDB3J5Yoo-HH9vyI-4ev2HcivXN0sNLg8O5k,102
46
+ deepresearch_flow/paper/snapshot/api.py,sha256=WgkOgS7n_2Fx-Bl4KnLrh5nhRJAsWJaPjXu7vX5ubxY,36960
47
+ deepresearch_flow/paper/snapshot/builder.py,sha256=HbRcfNteMoP4RnQ4y2onZCm9XfnIvzXLn_EwsLZsDzY,38692
48
+ deepresearch_flow/paper/snapshot/identity.py,sha256=k9x1EZPFBU1qgxzkTGvwVtDjLgcosmM_udPuvRLl0uI,7748
49
+ deepresearch_flow/paper/snapshot/schema.py,sha256=DcVmAklLYyEeDoVV9jYw7hoMHnHd9Eziivl-LP2busY,8991
50
+ deepresearch_flow/paper/snapshot/text.py,sha256=0RnxLowa6AdirdLsUYym6BhWbjwiP2Qj2oZeA-pjmdE,4368
51
+ deepresearch_flow/paper/snapshot/tests/__init__.py,sha256=G0IowrxHjGUIaqxcw6SvlcLFAtE5ZsleG6ECgd-sIdk,52
52
+ deepresearch_flow/paper/snapshot/tests/test_identity.py,sha256=KDFixAUU9l68KOum7gf1IrD0Oy18dBCSXG7RbJTqflA,4520
45
53
  deepresearch_flow/paper/templates/__init__.py,sha256=p8W6kINvrf-T2X6Ow4GMr28syVOorFuMn0pbmieVzAw,35
46
- deepresearch_flow/paper/templates/deep_read.md.j2,sha256=yYe2IotBpoDnpUE8lR0OaHMzmiH8rDNRQkaaELpuhPA,4637
47
- deepresearch_flow/paper/templates/deep_read_phi.md.j2,sha256=c9jBJv2L4YLQutYuzYw_6DAgJUtBeQWOphTMp94AjKs,1523
48
- deepresearch_flow/paper/templates/default_paper.md.j2,sha256=3azu48534QtLtHrCwI1sXtQUSaPfpWBVFOY44s-G63Y,887
49
- deepresearch_flow/paper/templates/eight_questions.md.j2,sha256=Ecz4CD3nd7jZ4Dg8himZkTwF4WDkk0ILWk8V728uOPI,3038
54
+ deepresearch_flow/paper/templates/deep_read.md.j2,sha256=vwVSPOzMBFIS72ez5XFBaKrDZGz0z32L3VGP6mNk434,4780
55
+ deepresearch_flow/paper/templates/deep_read_phi.md.j2,sha256=6Yz2Kxk0czGDPkZiWX3b87glLYHwDU1afr6CkjS-dh8,1666
56
+ deepresearch_flow/paper/templates/default_paper.md.j2,sha256=dMObS30OpZZ2Np0WHUgA1BBuvsRg6JsOEkeT4NO_NI4,1030
57
+ deepresearch_flow/paper/templates/eight_questions.md.j2,sha256=G0bJCMY4keKraofPLJ7PtS74lMYIoYYX0BMoGwDFXb0,3181
50
58
  deepresearch_flow/paper/templates/three_pass.md.j2,sha256=ZRj-NkpZePnqp0gSE8OT1dN5Lr5RW4vdOYdeVejYJW0,1576
51
59
  deepresearch_flow/paper/web/__init__.py,sha256=eQBtBjvOYsNEdivHTI0aO286SCG2c86xI02tf-0jz5I,39
52
- deepresearch_flow/paper/web/app.py,sha256=DxrcwrtwUKLZfjT25_ly1zfnqvgL1A8zDF6W7pCb8BY,7246
60
+ deepresearch_flow/paper/web/app.py,sha256=M1CiMkAc6Mz6Z3L-awytVY-LM-kqLbKHF7wsxNw9HYc,7502
53
61
  deepresearch_flow/paper/web/constants.py,sha256=0QZjySqVt2c6PZFoykVoeVgZAXsUahfL4ZVQlfL_qAY,1165
54
62
  deepresearch_flow/paper/web/filters.py,sha256=OVMB4GfigP9GPD5dXytHyeLYtnVXEK-QjYfA_k7QbaA,8315
55
63
  deepresearch_flow/paper/web/markdown.py,sha256=pwVGy-7kQHsEFo15SmNcQw15-v7lJn_N3VLd827L7uk,26852
@@ -440,24 +448,24 @@ deepresearch_flow/paper/web/templates/detail.html,sha256=NvYDKajgVYrSF9ayVCpMglK
440
448
  deepresearch_flow/paper/web/templates/index.html,sha256=qNWwyQWa3QzmHdJbohSe5PJOZS3-KxWjk0RxoQSZiys,6117
441
449
  deepresearch_flow/paper/web/templates/stats.html,sha256=bcQBawoZ9KoRkM0NNo9WJBVeN_8O1WU2xNiye-Fugyo,671
442
450
  deepresearch_flow/recognize/__init__.py,sha256=yMAqbdCzpdRSiwFhq9j7yx9ZWxqz_Zq3vfYlTLFCWek,33
443
- deepresearch_flow/recognize/cli.py,sha256=QV0d9XhOdcWcr05427GPSSMheal06WvvmejV7wLVfz8,53460
451
+ deepresearch_flow/recognize/cli.py,sha256=0YMyNfEdQB6Vz7bhaOI3o3tc5ZnoOkHwGQTNKyPjWzg,67312
444
452
  deepresearch_flow/recognize/katex_check.js,sha256=jKFLk0Y7y_XR0fBJe2xdfQhAMMuYRXo-pSpWqcEyAH0,735
445
- deepresearch_flow/recognize/markdown.py,sha256=y-PMJbGqrfWCNBVGanXK1M4OuMP9e1eqh7HDYye5a7Q,8757
446
- deepresearch_flow/recognize/math.py,sha256=qgI4WRsoWgLaue9OxIq1pcO18wUOlpCNBLKQgicN2hs,22623
447
- deepresearch_flow/recognize/mermaid.py,sha256=O8uQoEC9mG4mSdTpr-OnmP_vrThaFdUeqt6U00m6O-0,22545
448
- deepresearch_flow/recognize/organize.py,sha256=nxeWWm7PIp12WvDllYljYNnHqFSlcV7-kInMoHjdblc,5190
453
+ deepresearch_flow/recognize/markdown.py,sha256=8B4cR0n1gC2rXceita25Phi0Lo4uyDweYuXbRB1laH4,9472
454
+ deepresearch_flow/recognize/math.py,sha256=jvTkujqMob4LPuiltoKgss36GaIhCa6hQmnpraqOTiI,24073
455
+ deepresearch_flow/recognize/mermaid.py,sha256=rc5eTwn4i1dKZRua6igFZW08lhqXqqUcijACtCqNS1g,33819
456
+ deepresearch_flow/recognize/organize.py,sha256=KyhcwJ6ckWaZTCyE4M1e5k6ABRHHGWpAF9Re4LB-Sks,5883
449
457
  deepresearch_flow/translator/__init__.py,sha256=iaAkufvEELVKNbcs08Nh7bkTO4JlkT3rT_JIBP9jGfc,26
450
- deepresearch_flow/translator/cli.py,sha256=BceOZhQuN9s5kqhpvLJuwpbB5J0MY1ucWUKw0jXWUPc,16872
458
+ deepresearch_flow/translator/cli.py,sha256=gFcLTltbGqEU8LJl8epPRLEdNrmI03IWV6pbzuW9Z_o,18942
451
459
  deepresearch_flow/translator/config.py,sha256=0JI4VBLIzT039YscfEb5hqtCWCu8P2bJIgnAfIAhFmU,502
452
- deepresearch_flow/translator/engine.py,sha256=dLKKUjmptkLXhIs5ZsIUonmKI9bS8Se4tOnp7fADIYU,36800
460
+ deepresearch_flow/translator/engine.py,sha256=kdr7dkkwHTVkMhN7IpcQ1qGhIIl2mYsRfVeY3FOrbrk,42084
453
461
  deepresearch_flow/translator/fixers.py,sha256=Oe9vLhNxkmqyF6SkfTAGLDiSl7N_cCUArhO6pGpIa3Q,15667
454
462
  deepresearch_flow/translator/placeholder.py,sha256=mEgqA-dPdOsIhno0h_hzfpXpY2asb4A7UQEYV3tcnP8,2097
455
- deepresearch_flow/translator/prompts.py,sha256=kl_9O2YvmtXC1w6WLnsLuVZKz4mcOtUF887SiTaOvc0,4754
456
- deepresearch_flow/translator/protector.py,sha256=sXwNJ1Y8tyPm7dgm8-7S8HkcPe23TGsBdwRxH6mKL70,11291
463
+ deepresearch_flow/translator/prompts.py,sha256=EvfBvBIpQXARDj4m87GAyFXJGL8EJeahj_rOmp9mv68,5556
464
+ deepresearch_flow/translator/protector.py,sha256=yUMuS2FgVofK_MRXrcauLRiwNvdCCjNAnh6CcNd686o,11777
457
465
  deepresearch_flow/translator/segment.py,sha256=rBFMCLTrvm2GrPc_hNFymi-8Ih2DAtUQlZHCRE9nLaM,5146
458
- deepresearch_flow-0.5.1.dist-info/licenses/LICENSE,sha256=hT8F2Py1pe6flxq3Ufdm2UKFk0B8CBm0aAQfsLXfvjw,1063
459
- deepresearch_flow-0.5.1.dist-info/METADATA,sha256=glpV-vtIAMg05jdBladhGqNxE2p4Dq0Ynd4O3YsmSds,14486
460
- deepresearch_flow-0.5.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
461
- deepresearch_flow-0.5.1.dist-info/entry_points.txt,sha256=1uIKscs0YRMg_mFsg9NjsaTt4CvQqQ_-zGERUKhhL_Y,65
462
- deepresearch_flow-0.5.1.dist-info/top_level.txt,sha256=qBl4RvPJNJUbL8CFfMNWxY0HpQLx5RlF_ko-z_aKpm0,18
463
- deepresearch_flow-0.5.1.dist-info/RECORD,,
466
+ deepresearch_flow-0.6.1.dist-info/licenses/LICENSE,sha256=hT8F2Py1pe6flxq3Ufdm2UKFk0B8CBm0aAQfsLXfvjw,1063
467
+ deepresearch_flow-0.6.1.dist-info/METADATA,sha256=y_CHy1YJ-3P31W43Q_fd8dEkznj7LKLRrCF6F-sGHaQ,26696
468
+ deepresearch_flow-0.6.1.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
469
+ deepresearch_flow-0.6.1.dist-info/entry_points.txt,sha256=1uIKscs0YRMg_mFsg9NjsaTt4CvQqQ_-zGERUKhhL_Y,65
470
+ deepresearch_flow-0.6.1.dist-info/top_level.txt,sha256=qBl4RvPJNJUbL8CFfMNWxY0HpQLx5RlF_ko-z_aKpm0,18
471
+ deepresearch_flow-0.6.1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.9.0)
2
+ Generator: setuptools (80.10.2)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,440 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: deepresearch-flow
3
- Version: 0.5.1
4
- Summary: Workflow tools for paper extraction, review, and research automation.
5
- Author-email: DengQi <dengqi935@gmail.com>
6
- License: MIT License
7
-
8
- Copyright (c) 2025 DengQi
9
-
10
- Permission is hereby granted, free of charge, to any person obtaining a copy
11
- of this software and associated documentation files (the "Software"), to deal
12
- in the Software without restriction, including without limitation the rights
13
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
- copies of the Software, and to permit persons to whom the Software is
15
- furnished to do so, subject to the following conditions:
16
-
17
- The above copyright notice and this permission notice shall be included in all
18
- copies or substantial portions of the Software.
19
-
20
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
- SOFTWARE.
27
-
28
- Project-URL: Homepage, https://github.com/nerdneilsfield/ai-deepresearch-flow
29
- Project-URL: Repository, https://github.com/nerdneilsfield/ai-deepresearch-flow
30
- Project-URL: Issues, https://github.com/nerdneilsfield/ai-deepresearch-flow/issues
31
- Keywords: research,papers,pdf,ocr,llm,workflow
32
- Classifier: Development Status :: 3 - Alpha
33
- Classifier: Intended Audience :: Science/Research
34
- Classifier: License :: OSI Approved :: MIT License
35
- Classifier: Programming Language :: Python :: 3
36
- Classifier: Programming Language :: Python :: 3 :: Only
37
- Classifier: Topic :: Scientific/Engineering :: Information Analysis
38
- Requires-Python: >=3.12
39
- Description-Content-Type: text/markdown
40
- License-File: LICENSE
41
- Requires-Dist: anthropic>=0.28.0
42
- Requires-Dist: click>=8.1.7
43
- Requires-Dist: coloredlogs>=15.0.1
44
- Requires-Dist: dashscope>=1.20.0
45
- Requires-Dist: google-auth>=2.0.0
46
- Requires-Dist: google-genai>=0.5.0
47
- Requires-Dist: httpx>=0.27.0
48
- Requires-Dist: jinja2>=3.1.3
49
- Requires-Dist: json-repair>=0.31.0
50
- Requires-Dist: jsonschema>=4.21.1
51
- Requires-Dist: markdown-it-py>=3.0.0
52
- Requires-Dist: mdit-py-plugins>=0.4.0
53
- Requires-Dist: pypdf>=3.0.0
54
- Requires-Dist: pylatexenc>=2.10
55
- Requires-Dist: pybtex>=0.24.0
56
- Requires-Dist: rich>=13.7.1
57
- Requires-Dist: rumdl>=0.0.218
58
- Requires-Dist: starlette>=0.37.2
59
- Requires-Dist: tqdm>=4.66.4
60
- Requires-Dist: uvicorn>=0.27.1
61
- Dynamic: license-file
62
-
63
- <p align="center">
64
- <img src=".github/assets/logo.png" width="140" alt="ai-deepresearch-flow logo" />
65
- </p>
66
-
67
- <h3 align="center">ai-deepresearch-flow</h3>
68
-
69
- <p align="center">
70
- <em>From documents to deep research insight — automatically.</em>
71
- </p>
72
-
73
- <p align="center">
74
- <a href="README.md">English</a> | <a href="README_ZH.md">中文</a>
75
- </p>
76
-
77
- <p align="center">
78
- <a href="https://github.com/nerdneilsfield/ai-deepresearch-flow/actions">
79
- <img src="https://img.shields.io/github/actions/workflow/status/nerdneilsfield/ai-deepresearch-flow/push-to-pypi.yml?style=flat-square" />
80
- </a>
81
- <a href="https://pypi.org/project/deepresearch-flow/">
82
- <img src="https://img.shields.io/pypi/v/deepresearch-flow?style=flat-square" />
83
- </a>
84
- <a href="https://pypi.org/project/deepresearch-flow/">
85
- <img src="https://img.shields.io/pypi/pyversions/deepresearch-flow?style=flat-square" />
86
- </a>
87
- <a href="https://hub.docker.com/r/nerdneils/deepresearch-flow">
88
- <img src="https://img.shields.io/docker/v/nerdneils/deepresearch-flow?style=flat-square" />
89
- </a>
90
- <a href="https://github.com/nerdneilsfield/ai-deepresearch-flow/pkgs/container/deepresearch-flow">
91
- <img src="https://img.shields.io/badge/ghcr.io-nerdneilsfield%2Fdeepresearch-flow-0f172a?style=flat-square" />
92
- </a>
93
- <a href="https://github.com/nerdneilsfield/ai-deepresearch-flow/blob/main/LICENSE">
94
- <img src="https://img.shields.io/github/license/nerdneilsfield/ai-deepresearch-flow?style=flat-square" />
95
- </a>
96
- <a href="https://github.com/nerdneilsfield/ai-deepresearch-flow/stargazers">
97
- <img src="https://img.shields.io/github/stars/nerdneilsfield/ai-deepresearch-flow?style=flat-square" />
98
- </a>
99
- <a href="https://pypi.org/project/deepresearch-flow">
100
- <img alt="PyPI - Version" src="https://img.shields.io/pypi/v/deepresearch-flow">
101
- </a>
102
- <a href="https://github.com/nerdneilsfield/ai-deepresearch-flow/issues">
103
- <img src="https://img.shields.io/github/issues/nerdneilsfield/ai-deepresearch-flow?style=flat-square" />
104
- </a>
105
- </p>
106
-
107
- ---
108
-
109
- ## The Core Pain Points
110
-
111
- - **OCR Chaos**: Raw markdown from OCR tools is often broken -- tables drift, formulas break, and references are non-clickable.
112
- - **Translation Nightmares**: Translating technical papers often destroys code blocks, LaTeX formulas, and table structures.
113
- - **Information Overload**: Extracting structured insights (authors, venues, summaries) from hundreds of PDFs manually is impossible.
114
- - **Context Switching**: Managing PDFs, summaries, and translations in different windows kills focus.
115
-
116
- ## The Solution
117
-
118
- DeepResearch Flow provides a unified pipeline to **Repair**, **Translate**, **Extract**, and **Serve** your research library.
119
-
120
- ## Key Features
121
-
122
- - **Smart Extraction**: Turn unstructured Markdown into schema-enforced JSON (summaries, metadata, Q&A) using LLMs (OpenAI, Claude, Gemini, etc.).
123
- - **Precision Translation**: Translate OCR Markdown to Chinese/Japanese (`.zh.md`, `.ja.md`) while **freezing** formulas, code, tables, and references. No more broken layout.
124
- - **Local Knowledge DB**: A high-performance local Web UI to browse papers with **Split View** (Source vs. Translated vs. Summary), full-text search, and multi-dimensional filtering.
125
- - **Coverage Compare**: Compare JSON/PDF/Markdown/Translated datasets to find missing artifacts and export CSV reports.
126
- - **OCR Post-Processing**: Automatically fix broken references (`[1]` -> `[^1]`), merge split paragraphs, and standardize layouts.
127
-
128
- ---
129
-
130
- ## Quick Start
131
-
132
- ### 1) Installation
133
-
134
- ```bash
135
- # Recommended: using uv for speed
136
- uv pip install deepresearch-flow
137
-
138
- # Or standard pip
139
- pip install deepresearch-flow
140
- ```
141
-
142
- ### 2) Configuration
143
-
144
- Set up your LLM providers. We support OpenAI, Claude, Gemini, Ollama, and more.
145
-
146
- ```bash
147
- cp config.example.toml config.toml
148
- # Edit config.toml to add your API keys (e.g., env:OPENAI_API_KEY)
149
- ```
150
-
151
- ### 3) The "Zero to Hero" Workflow
152
-
153
- #### Step 1: Extract Insights
154
-
155
- Scan a folder of markdown files and extract structured summaries.
156
-
157
- ```bash
158
- uv run deepresearch-flow paper extract \
159
- --input ./docs \
160
- --model openai/gpt-4o-mini \
161
- --prompt-template deep_read
162
- ```
163
-
164
- #### Step 2: Translate Safely
165
-
166
- Translate papers to Chinese, protecting LaTeX and tables.
167
-
168
- ```bash
169
- uv run deepresearch-flow translator translate \
170
- --input ./docs \
171
- --target-lang zh \
172
- --model openai/gpt-4o-mini \
173
- --fix-level moderate
174
- ```
175
-
176
- #### Step 3: Repair OCR Outputs (Recommended)
177
-
178
- Recommended sequence to stabilize markdown before serving:
179
-
180
- ```bash
181
- # 1) Fix OCR markdown (auto-detects JSON if inputs are .json)
182
- uv run deepresearch-flow recognize fix \
183
- --input ./docs \
184
- --in-place
185
-
186
- # 2) Fix LaTeX formulas
187
- uv run deepresearch-flow recognize fix-math \
188
- --input ./docs \
189
- --model openai/gpt-4o-mini \
190
- --in-place
191
-
192
- # 3) Fix Mermaid diagrams
193
- uv run deepresearch-flow recognize fix-mermaid \
194
- --input ./paper_outputs \
195
- --json \
196
- --model openai/gpt-4o-mini \
197
- --in-place
198
-
199
- # 4) Fix again to normalize formatting
200
- uv run deepresearch-flow recognize fix \
201
- --input ./docs \
202
- --in-place
203
- ```
204
-
205
- #### Step 4: Serve Your Database
206
-
207
- Launch a local UI to read and manage your papers.
208
-
209
- ```bash
210
- uv run deepresearch-flow paper db serve \
211
- --input paper_infos.json \
212
- --md-root ./docs \
213
- --md-translated-root ./docs \
214
- --host 127.0.0.1
215
- ```
216
-
217
- ---
218
-
219
- ## Deployment (Static CDN)
220
-
221
- Use a separate static server (CDN) for PDFs/Markdown/images and keep the API/UI on another host.
222
-
223
- ### 1) Export static assets
224
-
225
- ```bash
226
- uv run deepresearch-flow paper db serve \
227
- --input paper_infos.json \
228
- --md-root ./docs \
229
- --md-translated-root ./docs \
230
- --pdf-root ./pdfs \
231
- --static-mode prod \
232
- --static-base-url https://static.example.com \
233
- --static-export-dir /data/paper-static
234
- ```
235
-
236
- Notes:
237
- - The API host must be able to read the original PDF/Markdown roots to build the index and hashes.
238
- - The CDN host only needs the exported directory (e.g. `/data/paper-static`).
239
-
240
- ### 2) Serve the export directory with CORS + cache headers (Caddy example)
241
-
242
- ```caddyfile
243
- :8002 {
244
- root * /data/paper-static
245
- encode zstd gzip
246
-
247
- @static path /pdf/* /md/* /md_translate/* /images/*
248
- header @static {
249
- Access-Control-Allow-Origin *
250
- Access-Control-Allow-Methods GET,HEAD,OPTIONS
251
- Access-Control-Allow-Headers *
252
- Cache-Control "public, max-age=31536000, immutable"
253
- }
254
-
255
- @options method OPTIONS
256
- respond @options 204
257
-
258
- file_server
259
- }
260
- ```
261
-
262
- ### 3) Start the API/UI with static base
263
-
264
- ```bash
265
- export PAPER_DB_STATIC_BASE_URL="https://static.example.com"
266
- export PAPER_DB_STATIC_MODE="prod"
267
- export PAPER_DB_STATIC_EXPORT_DIR="/data/paper-static"
268
- export PAPER_DB_PDFJS_CDN_BASE_URL="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/4.0.379"
269
-
270
- uv run deepresearch-flow paper db serve \
271
- --input paper_infos.json \
272
- --md-root ./docs \
273
- --md-translated-root ./docs \
274
- --pdf-root ./pdfs
275
- ```
276
-
277
- ---
278
-
279
- ## Comprehensive Guide
280
-
281
- <details>
282
- <summary><strong>1. Translator: OCR-Safe Translation</strong></summary>
283
-
284
- The translator module is built for scientific documents. It uses a node-based architecture to ensure stability.
285
-
286
- - Structure Protection: automatically detects and "freezes" code blocks, LaTeX (`$$...$$`), HTML tables, and images before sending text to the LLM.
287
- - OCR Repair: use `--fix-level` to merge broken paragraphs and convert text references (`[1]`) to clickable Markdown footnotes (`[^1]`).
288
- - Context-Aware: supports retries for failed chunks and falls back gracefully.
289
-
290
- ```bash
291
- # Translate with structure protection and OCR repairs
292
- uv run deepresearch-flow translator translate \
293
- --input ./paper.md \
294
- --target-lang ja \
295
- --fix-level aggressive \
296
- --model claude/claude-3-5-sonnet-20240620
297
- ```
298
-
299
- </details>
300
-
301
- <details>
302
- <summary><strong>2. Paper Extract: Structured Knowledge</strong></summary>
303
-
304
- Turn loose markdown files into a queryable database.
305
-
306
- - Templates: built-in prompts like `simple`, `eight_questions`, and `deep_read` guide the LLM to extract specific insights.
307
- - Async and throttled: precise control over concurrency (`--max-concurrency`) and rate limits (`--sleep-every`).
308
- - Incremental: skips already processed files; resumes from where you left off.
309
-
310
- ```bash
311
- uv run deepresearch-flow paper extract \
312
- --input ./library \
313
- --output paper_data.json \
314
- --template-dir ./my-custom-prompts \
315
- --max-concurrency 10
316
- ```
317
-
318
- </details>
319
-
320
- <details>
321
- <summary><strong>3. Database and UI: Your Personal ArXiv</strong></summary>
322
-
323
- The db serve command creates a local research station.
324
-
325
- - Split View: read the original PDF/Markdown on the left and the Summary/Translation on the right.
326
- - Full Text Search: search by title, author, year, or content tags (`tag:fpga year:2023..2024`).
327
- - Stats: visualize publication trends and keyword frequencies.
328
- - PDF Viewer: built-in PDF.js viewer prevents cross-origin issues with local files.
329
-
330
- ```bash
331
- uv run deepresearch-flow paper db serve \
332
- --input paper_infos.json \
333
- --pdf-root ./pdfs \
334
- --cache-dir .cache/db
335
- ```
336
-
337
- </details>
338
-
339
- <details>
340
- <summary><strong>4. Paper DB Compare: Coverage Audit</strong></summary>
341
-
342
- Compare two datasets (A/B) to find missing PDFs, markdowns, translations, or JSON items, with match metadata.
343
-
344
- ```bash
345
- uv run deepresearch-flow paper db compare \
346
- --input-a ./a.json \
347
- --md-root-b ./md_root \
348
- --output-csv ./compare.csv
349
-
350
- # Compare translated markdowns by language
351
- uv run deepresearch-flow paper db compare \
352
- --md-translated-root-a ./translated_a \
353
- --md-translated-root-b ./translated_b \
354
- --lang zh
355
- ```
356
-
357
- </details>
358
-
359
- <details>
360
- <summary><strong>5. Recognize: OCR Post-Processing</strong></summary>
361
-
362
- Tools to clean up raw outputs from OCR engines like MinerU.
363
-
364
- - Embed Images: convert local image links to Base64 for a portable single-file Markdown.
365
- - Unpack Images: extract Base64 images back to files.
366
- - Organize: flatten nested OCR output directories.
367
- - Fix: apply OCR fixes and rumdl formatting during organize, or as a standalone step.
368
- - Fix JSON: apply the same fixes to markdown fields inside paper JSON outputs.
369
- - Fix Math: validate and repair LaTeX formulas with optional LLM assistance.
370
- - Fix Mermaid: validate and repair Mermaid diagrams (requires `mmdc` from mermaid-cli).
371
- - Recommended order: `fix` -> `fix-math` -> `fix-mermaid` -> `fix`.
372
-
373
- ```bash
374
- uv run deepresearch-flow recognize md embed --input ./raw_ocr --output ./clean_md
375
- ```
376
-
377
- ```bash
378
- # Organize MinerU output and apply OCR fixes
379
- uv run deepresearch-flow recognize organize \
380
- --input ./mineru_outputs \
381
- --output-simple ./ocr_md \
382
- --fix
383
-
384
- # Fix and format existing markdown outputs
385
- uv run deepresearch-flow recognize fix \
386
- --input ./ocr_md \
387
- --output ./ocr_md_fixed
388
-
389
- # Fix in place
390
- uv run deepresearch-flow recognize fix \
391
- --input ./ocr_md \
392
- --in-place
393
-
394
- # Fix JSON outputs in place
395
- uv run deepresearch-flow recognize fix \
396
- --json \
397
- --input ./paper_outputs \
398
- --in-place
399
-
400
- # Fix LaTeX formulas in markdown
401
- uv run deepresearch-flow recognize fix-math \
402
- --input ./docs \
403
- --model openai/gpt-4o-mini \
404
- --in-place
405
-
406
- # Fix Mermaid diagrams in JSON outputs
407
- uv run deepresearch-flow recognize fix-mermaid \
408
- --json \
409
- --input ./paper_outputs \
410
- --model openai/gpt-4o-mini \
411
- --in-place
412
- ```
413
-
414
- </details>
415
-
416
- ---
417
-
418
- ## Docker Support
419
-
420
- Don't want to manage Python environments?
421
-
422
- ```bash
423
- docker run --rm -v $(pwd):/app -it ghcr.io/nerdneilsfield/deepresearch-flow --help
424
- ```
425
-
426
- ## Configuration
427
-
428
- The config.toml is your control center. It supports:
429
-
430
- - Multiple Providers: mix and match OpenAI, DeepSeek (DashScope), Gemini, Claude, and Ollama.
431
- - Model Routing: explicit routing to specific models (`--model provider/model_name`).
432
- - Environment Variables: keep secrets safe using `env:VAR_NAME` syntax.
433
-
434
- See `config.example.toml` for a full reference.
435
-
436
- ---
437
-
438
- <p align="center">
439
- Built with love for the Open Science community.
440
- </p>