aiagents4pharma 0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (336) hide show
  1. aiagents4pharma/__init__.py +11 -0
  2. aiagents4pharma/talk2aiagents4pharma/.dockerignore +13 -0
  3. aiagents4pharma/talk2aiagents4pharma/Dockerfile +133 -0
  4. aiagents4pharma/talk2aiagents4pharma/README.md +1 -0
  5. aiagents4pharma/talk2aiagents4pharma/__init__.py +5 -0
  6. aiagents4pharma/talk2aiagents4pharma/agents/__init__.py +6 -0
  7. aiagents4pharma/talk2aiagents4pharma/agents/main_agent.py +70 -0
  8. aiagents4pharma/talk2aiagents4pharma/configs/__init__.py +5 -0
  9. aiagents4pharma/talk2aiagents4pharma/configs/agents/__init__.py +5 -0
  10. aiagents4pharma/talk2aiagents4pharma/configs/agents/main_agent/default.yaml +29 -0
  11. aiagents4pharma/talk2aiagents4pharma/configs/app/__init__.py +0 -0
  12. aiagents4pharma/talk2aiagents4pharma/configs/app/frontend/__init__.py +0 -0
  13. aiagents4pharma/talk2aiagents4pharma/configs/app/frontend/default.yaml +102 -0
  14. aiagents4pharma/talk2aiagents4pharma/configs/config.yaml +4 -0
  15. aiagents4pharma/talk2aiagents4pharma/docker-compose/cpu/.env.example +23 -0
  16. aiagents4pharma/talk2aiagents4pharma/docker-compose/cpu/docker-compose.yml +93 -0
  17. aiagents4pharma/talk2aiagents4pharma/docker-compose/gpu/.env.example +23 -0
  18. aiagents4pharma/talk2aiagents4pharma/docker-compose/gpu/docker-compose.yml +108 -0
  19. aiagents4pharma/talk2aiagents4pharma/install.md +154 -0
  20. aiagents4pharma/talk2aiagents4pharma/states/__init__.py +5 -0
  21. aiagents4pharma/talk2aiagents4pharma/states/state_talk2aiagents4pharma.py +18 -0
  22. aiagents4pharma/talk2aiagents4pharma/tests/__init__.py +3 -0
  23. aiagents4pharma/talk2aiagents4pharma/tests/test_main_agent.py +312 -0
  24. aiagents4pharma/talk2biomodels/.dockerignore +13 -0
  25. aiagents4pharma/talk2biomodels/Dockerfile +104 -0
  26. aiagents4pharma/talk2biomodels/README.md +1 -0
  27. aiagents4pharma/talk2biomodels/__init__.py +5 -0
  28. aiagents4pharma/talk2biomodels/agents/__init__.py +6 -0
  29. aiagents4pharma/talk2biomodels/agents/t2b_agent.py +104 -0
  30. aiagents4pharma/talk2biomodels/api/__init__.py +5 -0
  31. aiagents4pharma/talk2biomodels/api/ols.py +75 -0
  32. aiagents4pharma/talk2biomodels/api/uniprot.py +36 -0
  33. aiagents4pharma/talk2biomodels/configs/__init__.py +5 -0
  34. aiagents4pharma/talk2biomodels/configs/agents/__init__.py +5 -0
  35. aiagents4pharma/talk2biomodels/configs/agents/t2b_agent/__init__.py +3 -0
  36. aiagents4pharma/talk2biomodels/configs/agents/t2b_agent/default.yaml +14 -0
  37. aiagents4pharma/talk2biomodels/configs/app/__init__.py +0 -0
  38. aiagents4pharma/talk2biomodels/configs/app/frontend/__init__.py +0 -0
  39. aiagents4pharma/talk2biomodels/configs/app/frontend/default.yaml +72 -0
  40. aiagents4pharma/talk2biomodels/configs/config.yaml +7 -0
  41. aiagents4pharma/talk2biomodels/configs/tools/__init__.py +5 -0
  42. aiagents4pharma/talk2biomodels/configs/tools/ask_question/__init__.py +3 -0
  43. aiagents4pharma/talk2biomodels/configs/tools/ask_question/default.yaml +30 -0
  44. aiagents4pharma/talk2biomodels/configs/tools/custom_plotter/__init__.py +3 -0
  45. aiagents4pharma/talk2biomodels/configs/tools/custom_plotter/default.yaml +8 -0
  46. aiagents4pharma/talk2biomodels/configs/tools/get_annotation/__init__.py +3 -0
  47. aiagents4pharma/talk2biomodels/configs/tools/get_annotation/default.yaml +8 -0
  48. aiagents4pharma/talk2biomodels/install.md +63 -0
  49. aiagents4pharma/talk2biomodels/models/__init__.py +5 -0
  50. aiagents4pharma/talk2biomodels/models/basico_model.py +125 -0
  51. aiagents4pharma/talk2biomodels/models/sys_bio_model.py +60 -0
  52. aiagents4pharma/talk2biomodels/states/__init__.py +6 -0
  53. aiagents4pharma/talk2biomodels/states/state_talk2biomodels.py +49 -0
  54. aiagents4pharma/talk2biomodels/tests/BIOMD0000000449_url.xml +1585 -0
  55. aiagents4pharma/talk2biomodels/tests/__init__.py +3 -0
  56. aiagents4pharma/talk2biomodels/tests/article_on_model_537.pdf +0 -0
  57. aiagents4pharma/talk2biomodels/tests/test_api.py +31 -0
  58. aiagents4pharma/talk2biomodels/tests/test_ask_question.py +42 -0
  59. aiagents4pharma/talk2biomodels/tests/test_basico_model.py +67 -0
  60. aiagents4pharma/talk2biomodels/tests/test_get_annotation.py +190 -0
  61. aiagents4pharma/talk2biomodels/tests/test_getmodelinfo.py +92 -0
  62. aiagents4pharma/talk2biomodels/tests/test_integration.py +116 -0
  63. aiagents4pharma/talk2biomodels/tests/test_load_biomodel.py +35 -0
  64. aiagents4pharma/talk2biomodels/tests/test_param_scan.py +71 -0
  65. aiagents4pharma/talk2biomodels/tests/test_query_article.py +184 -0
  66. aiagents4pharma/talk2biomodels/tests/test_save_model.py +47 -0
  67. aiagents4pharma/talk2biomodels/tests/test_search_models.py +35 -0
  68. aiagents4pharma/talk2biomodels/tests/test_simulate_model.py +44 -0
  69. aiagents4pharma/talk2biomodels/tests/test_steady_state.py +86 -0
  70. aiagents4pharma/talk2biomodels/tests/test_sys_bio_model.py +67 -0
  71. aiagents4pharma/talk2biomodels/tools/__init__.py +17 -0
  72. aiagents4pharma/talk2biomodels/tools/ask_question.py +125 -0
  73. aiagents4pharma/talk2biomodels/tools/custom_plotter.py +165 -0
  74. aiagents4pharma/talk2biomodels/tools/get_annotation.py +342 -0
  75. aiagents4pharma/talk2biomodels/tools/get_modelinfo.py +159 -0
  76. aiagents4pharma/talk2biomodels/tools/load_arguments.py +134 -0
  77. aiagents4pharma/talk2biomodels/tools/load_biomodel.py +44 -0
  78. aiagents4pharma/talk2biomodels/tools/parameter_scan.py +310 -0
  79. aiagents4pharma/talk2biomodels/tools/query_article.py +64 -0
  80. aiagents4pharma/talk2biomodels/tools/save_model.py +98 -0
  81. aiagents4pharma/talk2biomodels/tools/search_models.py +96 -0
  82. aiagents4pharma/talk2biomodels/tools/simulate_model.py +137 -0
  83. aiagents4pharma/talk2biomodels/tools/steady_state.py +187 -0
  84. aiagents4pharma/talk2biomodels/tools/utils.py +23 -0
  85. aiagents4pharma/talk2cells/README.md +1 -0
  86. aiagents4pharma/talk2cells/__init__.py +5 -0
  87. aiagents4pharma/talk2cells/agents/__init__.py +6 -0
  88. aiagents4pharma/talk2cells/agents/scp_agent.py +87 -0
  89. aiagents4pharma/talk2cells/states/__init__.py +6 -0
  90. aiagents4pharma/talk2cells/states/state_talk2cells.py +15 -0
  91. aiagents4pharma/talk2cells/tests/scp_agent/test_scp_agent.py +22 -0
  92. aiagents4pharma/talk2cells/tools/__init__.py +6 -0
  93. aiagents4pharma/talk2cells/tools/scp_agent/__init__.py +6 -0
  94. aiagents4pharma/talk2cells/tools/scp_agent/display_studies.py +27 -0
  95. aiagents4pharma/talk2cells/tools/scp_agent/search_studies.py +78 -0
  96. aiagents4pharma/talk2knowledgegraphs/.dockerignore +13 -0
  97. aiagents4pharma/talk2knowledgegraphs/Dockerfile +131 -0
  98. aiagents4pharma/talk2knowledgegraphs/README.md +1 -0
  99. aiagents4pharma/talk2knowledgegraphs/__init__.py +5 -0
  100. aiagents4pharma/talk2knowledgegraphs/agents/__init__.py +5 -0
  101. aiagents4pharma/talk2knowledgegraphs/agents/t2kg_agent.py +99 -0
  102. aiagents4pharma/talk2knowledgegraphs/configs/__init__.py +5 -0
  103. aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/__init__.py +3 -0
  104. aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/default.yaml +62 -0
  105. aiagents4pharma/talk2knowledgegraphs/configs/app/__init__.py +5 -0
  106. aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/__init__.py +3 -0
  107. aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/default.yaml +79 -0
  108. aiagents4pharma/talk2knowledgegraphs/configs/config.yaml +13 -0
  109. aiagents4pharma/talk2knowledgegraphs/configs/tools/__init__.py +5 -0
  110. aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/__init__.py +3 -0
  111. aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/default.yaml +24 -0
  112. aiagents4pharma/talk2knowledgegraphs/configs/tools/multimodal_subgraph_extraction/__init__.py +0 -0
  113. aiagents4pharma/talk2knowledgegraphs/configs/tools/multimodal_subgraph_extraction/default.yaml +33 -0
  114. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/__init__.py +3 -0
  115. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/default.yaml +43 -0
  116. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/__init__.py +3 -0
  117. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/default.yaml +9 -0
  118. aiagents4pharma/talk2knowledgegraphs/configs/utils/database/milvus/__init__.py +3 -0
  119. aiagents4pharma/talk2knowledgegraphs/configs/utils/database/milvus/default.yaml +61 -0
  120. aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/ols_terms/default.yaml +3 -0
  121. aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/reactome_pathways/default.yaml +3 -0
  122. aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/uniprot_proteins/default.yaml +6 -0
  123. aiagents4pharma/talk2knowledgegraphs/configs/utils/pubchem_utils/default.yaml +5 -0
  124. aiagents4pharma/talk2knowledgegraphs/datasets/__init__.py +5 -0
  125. aiagents4pharma/talk2knowledgegraphs/datasets/biobridge_primekg.py +607 -0
  126. aiagents4pharma/talk2knowledgegraphs/datasets/dataset.py +25 -0
  127. aiagents4pharma/talk2knowledgegraphs/datasets/primekg.py +212 -0
  128. aiagents4pharma/talk2knowledgegraphs/datasets/starkqa_primekg.py +210 -0
  129. aiagents4pharma/talk2knowledgegraphs/docker-compose/cpu/.env.example +23 -0
  130. aiagents4pharma/talk2knowledgegraphs/docker-compose/cpu/docker-compose.yml +93 -0
  131. aiagents4pharma/talk2knowledgegraphs/docker-compose/gpu/.env.example +23 -0
  132. aiagents4pharma/talk2knowledgegraphs/docker-compose/gpu/docker-compose.yml +108 -0
  133. aiagents4pharma/talk2knowledgegraphs/entrypoint.sh +180 -0
  134. aiagents4pharma/talk2knowledgegraphs/install.md +165 -0
  135. aiagents4pharma/talk2knowledgegraphs/milvus_data_dump.py +886 -0
  136. aiagents4pharma/talk2knowledgegraphs/states/__init__.py +5 -0
  137. aiagents4pharma/talk2knowledgegraphs/states/state_talk2knowledgegraphs.py +40 -0
  138. aiagents4pharma/talk2knowledgegraphs/tests/__init__.py +0 -0
  139. aiagents4pharma/talk2knowledgegraphs/tests/test_agents_t2kg_agent.py +318 -0
  140. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_biobridge_primekg.py +248 -0
  141. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_dataset.py +33 -0
  142. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_primekg.py +86 -0
  143. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_starkqa_primekg.py +125 -0
  144. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_graphrag_reasoning.py +257 -0
  145. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_milvus_multimodal_subgraph_extraction.py +1444 -0
  146. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_multimodal_subgraph_extraction.py +159 -0
  147. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_extraction.py +152 -0
  148. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_summarization.py +201 -0
  149. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_database_milvus_connection_manager.py +812 -0
  150. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_embeddings.py +51 -0
  151. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_huggingface.py +49 -0
  152. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_nim_molmim.py +59 -0
  153. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_ollama.py +63 -0
  154. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_sentencetransformer.py +47 -0
  155. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_enrichments.py +40 -0
  156. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ollama.py +94 -0
  157. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ols.py +70 -0
  158. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_pubchem.py +45 -0
  159. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_reactome.py +44 -0
  160. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_uniprot.py +48 -0
  161. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_extractions_milvus_multimodal_pcst.py +759 -0
  162. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_kg_utils.py +78 -0
  163. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_pubchem_utils.py +123 -0
  164. aiagents4pharma/talk2knowledgegraphs/tools/__init__.py +11 -0
  165. aiagents4pharma/talk2knowledgegraphs/tools/graphrag_reasoning.py +138 -0
  166. aiagents4pharma/talk2knowledgegraphs/tools/load_arguments.py +22 -0
  167. aiagents4pharma/talk2knowledgegraphs/tools/milvus_multimodal_subgraph_extraction.py +965 -0
  168. aiagents4pharma/talk2knowledgegraphs/tools/multimodal_subgraph_extraction.py +374 -0
  169. aiagents4pharma/talk2knowledgegraphs/tools/subgraph_extraction.py +291 -0
  170. aiagents4pharma/talk2knowledgegraphs/tools/subgraph_summarization.py +123 -0
  171. aiagents4pharma/talk2knowledgegraphs/utils/__init__.py +5 -0
  172. aiagents4pharma/talk2knowledgegraphs/utils/database/__init__.py +5 -0
  173. aiagents4pharma/talk2knowledgegraphs/utils/database/milvus_connection_manager.py +586 -0
  174. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/__init__.py +5 -0
  175. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/embeddings.py +81 -0
  176. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/huggingface.py +111 -0
  177. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/nim_molmim.py +54 -0
  178. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/ollama.py +87 -0
  179. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/sentence_transformer.py +73 -0
  180. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/__init__.py +12 -0
  181. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/enrichments.py +37 -0
  182. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ollama.py +129 -0
  183. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ols_terms.py +89 -0
  184. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/pubchem_strings.py +78 -0
  185. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/reactome_pathways.py +71 -0
  186. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/uniprot_proteins.py +98 -0
  187. aiagents4pharma/talk2knowledgegraphs/utils/extractions/__init__.py +5 -0
  188. aiagents4pharma/talk2knowledgegraphs/utils/extractions/milvus_multimodal_pcst.py +762 -0
  189. aiagents4pharma/talk2knowledgegraphs/utils/extractions/multimodal_pcst.py +298 -0
  190. aiagents4pharma/talk2knowledgegraphs/utils/extractions/pcst.py +229 -0
  191. aiagents4pharma/talk2knowledgegraphs/utils/kg_utils.py +67 -0
  192. aiagents4pharma/talk2knowledgegraphs/utils/pubchem_utils.py +104 -0
  193. aiagents4pharma/talk2scholars/.dockerignore +13 -0
  194. aiagents4pharma/talk2scholars/Dockerfile +104 -0
  195. aiagents4pharma/talk2scholars/README.md +1 -0
  196. aiagents4pharma/talk2scholars/__init__.py +7 -0
  197. aiagents4pharma/talk2scholars/agents/__init__.py +13 -0
  198. aiagents4pharma/talk2scholars/agents/main_agent.py +89 -0
  199. aiagents4pharma/talk2scholars/agents/paper_download_agent.py +96 -0
  200. aiagents4pharma/talk2scholars/agents/pdf_agent.py +101 -0
  201. aiagents4pharma/talk2scholars/agents/s2_agent.py +135 -0
  202. aiagents4pharma/talk2scholars/agents/zotero_agent.py +127 -0
  203. aiagents4pharma/talk2scholars/configs/__init__.py +7 -0
  204. aiagents4pharma/talk2scholars/configs/agents/__init__.py +7 -0
  205. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/__init__.py +7 -0
  206. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/__init__.py +3 -0
  207. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml +52 -0
  208. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/paper_download_agent/__init__.py +3 -0
  209. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/paper_download_agent/default.yaml +19 -0
  210. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/pdf_agent/__init__.py +3 -0
  211. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/pdf_agent/default.yaml +19 -0
  212. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/__init__.py +3 -0
  213. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml +44 -0
  214. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/__init__.py +3 -0
  215. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/default.yaml +19 -0
  216. aiagents4pharma/talk2scholars/configs/app/__init__.py +7 -0
  217. aiagents4pharma/talk2scholars/configs/app/frontend/__init__.py +3 -0
  218. aiagents4pharma/talk2scholars/configs/app/frontend/default.yaml +72 -0
  219. aiagents4pharma/talk2scholars/configs/config.yaml +16 -0
  220. aiagents4pharma/talk2scholars/configs/tools/__init__.py +21 -0
  221. aiagents4pharma/talk2scholars/configs/tools/multi_paper_recommendation/__init__.py +3 -0
  222. aiagents4pharma/talk2scholars/configs/tools/multi_paper_recommendation/default.yaml +26 -0
  223. aiagents4pharma/talk2scholars/configs/tools/paper_download/__init__.py +3 -0
  224. aiagents4pharma/talk2scholars/configs/tools/paper_download/default.yaml +124 -0
  225. aiagents4pharma/talk2scholars/configs/tools/question_and_answer/__init__.py +3 -0
  226. aiagents4pharma/talk2scholars/configs/tools/question_and_answer/default.yaml +62 -0
  227. aiagents4pharma/talk2scholars/configs/tools/retrieve_semantic_scholar_paper_id/__init__.py +3 -0
  228. aiagents4pharma/talk2scholars/configs/tools/retrieve_semantic_scholar_paper_id/default.yaml +12 -0
  229. aiagents4pharma/talk2scholars/configs/tools/search/__init__.py +3 -0
  230. aiagents4pharma/talk2scholars/configs/tools/search/default.yaml +26 -0
  231. aiagents4pharma/talk2scholars/configs/tools/single_paper_recommendation/__init__.py +3 -0
  232. aiagents4pharma/talk2scholars/configs/tools/single_paper_recommendation/default.yaml +26 -0
  233. aiagents4pharma/talk2scholars/configs/tools/zotero_read/__init__.py +3 -0
  234. aiagents4pharma/talk2scholars/configs/tools/zotero_read/default.yaml +57 -0
  235. aiagents4pharma/talk2scholars/configs/tools/zotero_write/__inti__.py +3 -0
  236. aiagents4pharma/talk2scholars/configs/tools/zotero_write/default.yaml +55 -0
  237. aiagents4pharma/talk2scholars/docker-compose/cpu/.env.example +21 -0
  238. aiagents4pharma/talk2scholars/docker-compose/cpu/docker-compose.yml +90 -0
  239. aiagents4pharma/talk2scholars/docker-compose/gpu/.env.example +21 -0
  240. aiagents4pharma/talk2scholars/docker-compose/gpu/docker-compose.yml +105 -0
  241. aiagents4pharma/talk2scholars/install.md +122 -0
  242. aiagents4pharma/talk2scholars/state/__init__.py +7 -0
  243. aiagents4pharma/talk2scholars/state/state_talk2scholars.py +98 -0
  244. aiagents4pharma/talk2scholars/tests/__init__.py +3 -0
  245. aiagents4pharma/talk2scholars/tests/test_agents_main_agent.py +256 -0
  246. aiagents4pharma/talk2scholars/tests/test_agents_paper_agents_download_agent.py +139 -0
  247. aiagents4pharma/talk2scholars/tests/test_agents_pdf_agent.py +114 -0
  248. aiagents4pharma/talk2scholars/tests/test_agents_s2_agent.py +198 -0
  249. aiagents4pharma/talk2scholars/tests/test_agents_zotero_agent.py +160 -0
  250. aiagents4pharma/talk2scholars/tests/test_s2_tools_display_dataframe.py +91 -0
  251. aiagents4pharma/talk2scholars/tests/test_s2_tools_query_dataframe.py +191 -0
  252. aiagents4pharma/talk2scholars/tests/test_states_state.py +38 -0
  253. aiagents4pharma/talk2scholars/tests/test_tools_paper_downloader.py +507 -0
  254. aiagents4pharma/talk2scholars/tests/test_tools_question_and_answer_tool.py +105 -0
  255. aiagents4pharma/talk2scholars/tests/test_tools_s2_multi.py +307 -0
  256. aiagents4pharma/talk2scholars/tests/test_tools_s2_retrieve.py +67 -0
  257. aiagents4pharma/talk2scholars/tests/test_tools_s2_search.py +286 -0
  258. aiagents4pharma/talk2scholars/tests/test_tools_s2_single.py +298 -0
  259. aiagents4pharma/talk2scholars/tests/test_utils_arxiv_downloader.py +469 -0
  260. aiagents4pharma/talk2scholars/tests/test_utils_base_paper_downloader.py +598 -0
  261. aiagents4pharma/talk2scholars/tests/test_utils_biorxiv_downloader.py +669 -0
  262. aiagents4pharma/talk2scholars/tests/test_utils_medrxiv_downloader.py +500 -0
  263. aiagents4pharma/talk2scholars/tests/test_utils_nvidia_nim_reranker.py +117 -0
  264. aiagents4pharma/talk2scholars/tests/test_utils_pdf_answer_formatter.py +67 -0
  265. aiagents4pharma/talk2scholars/tests/test_utils_pdf_batch_processor.py +92 -0
  266. aiagents4pharma/talk2scholars/tests/test_utils_pdf_collection_manager.py +173 -0
  267. aiagents4pharma/talk2scholars/tests/test_utils_pdf_document_processor.py +68 -0
  268. aiagents4pharma/talk2scholars/tests/test_utils_pdf_generate_answer.py +72 -0
  269. aiagents4pharma/talk2scholars/tests/test_utils_pdf_gpu_detection.py +129 -0
  270. aiagents4pharma/talk2scholars/tests/test_utils_pdf_paper_loader.py +116 -0
  271. aiagents4pharma/talk2scholars/tests/test_utils_pdf_rag_pipeline.py +88 -0
  272. aiagents4pharma/talk2scholars/tests/test_utils_pdf_retrieve_chunks.py +190 -0
  273. aiagents4pharma/talk2scholars/tests/test_utils_pdf_singleton_manager.py +159 -0
  274. aiagents4pharma/talk2scholars/tests/test_utils_pdf_vector_normalization.py +121 -0
  275. aiagents4pharma/talk2scholars/tests/test_utils_pdf_vector_store.py +406 -0
  276. aiagents4pharma/talk2scholars/tests/test_utils_pubmed_downloader.py +1007 -0
  277. aiagents4pharma/talk2scholars/tests/test_utils_read_helper_utils.py +106 -0
  278. aiagents4pharma/talk2scholars/tests/test_utils_s2_utils_ext_ids.py +403 -0
  279. aiagents4pharma/talk2scholars/tests/test_utils_tool_helper_utils.py +85 -0
  280. aiagents4pharma/talk2scholars/tests/test_utils_zotero_human_in_the_loop.py +266 -0
  281. aiagents4pharma/talk2scholars/tests/test_utils_zotero_path.py +496 -0
  282. aiagents4pharma/talk2scholars/tests/test_utils_zotero_pdf_downloader_utils.py +46 -0
  283. aiagents4pharma/talk2scholars/tests/test_utils_zotero_read.py +743 -0
  284. aiagents4pharma/talk2scholars/tests/test_utils_zotero_write.py +151 -0
  285. aiagents4pharma/talk2scholars/tools/__init__.py +9 -0
  286. aiagents4pharma/talk2scholars/tools/paper_download/__init__.py +12 -0
  287. aiagents4pharma/talk2scholars/tools/paper_download/paper_downloader.py +442 -0
  288. aiagents4pharma/talk2scholars/tools/paper_download/utils/__init__.py +22 -0
  289. aiagents4pharma/talk2scholars/tools/paper_download/utils/arxiv_downloader.py +207 -0
  290. aiagents4pharma/talk2scholars/tools/paper_download/utils/base_paper_downloader.py +336 -0
  291. aiagents4pharma/talk2scholars/tools/paper_download/utils/biorxiv_downloader.py +313 -0
  292. aiagents4pharma/talk2scholars/tools/paper_download/utils/medrxiv_downloader.py +196 -0
  293. aiagents4pharma/talk2scholars/tools/paper_download/utils/pubmed_downloader.py +323 -0
  294. aiagents4pharma/talk2scholars/tools/pdf/__init__.py +7 -0
  295. aiagents4pharma/talk2scholars/tools/pdf/question_and_answer.py +170 -0
  296. aiagents4pharma/talk2scholars/tools/pdf/utils/__init__.py +37 -0
  297. aiagents4pharma/talk2scholars/tools/pdf/utils/answer_formatter.py +62 -0
  298. aiagents4pharma/talk2scholars/tools/pdf/utils/batch_processor.py +198 -0
  299. aiagents4pharma/talk2scholars/tools/pdf/utils/collection_manager.py +172 -0
  300. aiagents4pharma/talk2scholars/tools/pdf/utils/document_processor.py +76 -0
  301. aiagents4pharma/talk2scholars/tools/pdf/utils/generate_answer.py +97 -0
  302. aiagents4pharma/talk2scholars/tools/pdf/utils/get_vectorstore.py +59 -0
  303. aiagents4pharma/talk2scholars/tools/pdf/utils/gpu_detection.py +150 -0
  304. aiagents4pharma/talk2scholars/tools/pdf/utils/nvidia_nim_reranker.py +97 -0
  305. aiagents4pharma/talk2scholars/tools/pdf/utils/paper_loader.py +123 -0
  306. aiagents4pharma/talk2scholars/tools/pdf/utils/rag_pipeline.py +113 -0
  307. aiagents4pharma/talk2scholars/tools/pdf/utils/retrieve_chunks.py +197 -0
  308. aiagents4pharma/talk2scholars/tools/pdf/utils/singleton_manager.py +140 -0
  309. aiagents4pharma/talk2scholars/tools/pdf/utils/tool_helper.py +86 -0
  310. aiagents4pharma/talk2scholars/tools/pdf/utils/vector_normalization.py +150 -0
  311. aiagents4pharma/talk2scholars/tools/pdf/utils/vector_store.py +327 -0
  312. aiagents4pharma/talk2scholars/tools/s2/__init__.py +21 -0
  313. aiagents4pharma/talk2scholars/tools/s2/display_dataframe.py +110 -0
  314. aiagents4pharma/talk2scholars/tools/s2/multi_paper_rec.py +111 -0
  315. aiagents4pharma/talk2scholars/tools/s2/query_dataframe.py +233 -0
  316. aiagents4pharma/talk2scholars/tools/s2/retrieve_semantic_scholar_paper_id.py +128 -0
  317. aiagents4pharma/talk2scholars/tools/s2/search.py +101 -0
  318. aiagents4pharma/talk2scholars/tools/s2/single_paper_rec.py +102 -0
  319. aiagents4pharma/talk2scholars/tools/s2/utils/__init__.py +5 -0
  320. aiagents4pharma/talk2scholars/tools/s2/utils/multi_helper.py +223 -0
  321. aiagents4pharma/talk2scholars/tools/s2/utils/search_helper.py +205 -0
  322. aiagents4pharma/talk2scholars/tools/s2/utils/single_helper.py +216 -0
  323. aiagents4pharma/talk2scholars/tools/zotero/__init__.py +7 -0
  324. aiagents4pharma/talk2scholars/tools/zotero/utils/__init__.py +7 -0
  325. aiagents4pharma/talk2scholars/tools/zotero/utils/read_helper.py +270 -0
  326. aiagents4pharma/talk2scholars/tools/zotero/utils/review_helper.py +74 -0
  327. aiagents4pharma/talk2scholars/tools/zotero/utils/write_helper.py +194 -0
  328. aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_path.py +180 -0
  329. aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_pdf_downloader.py +133 -0
  330. aiagents4pharma/talk2scholars/tools/zotero/zotero_read.py +105 -0
  331. aiagents4pharma/talk2scholars/tools/zotero/zotero_review.py +162 -0
  332. aiagents4pharma/talk2scholars/tools/zotero/zotero_write.py +91 -0
  333. aiagents4pharma-0.0.0.dist-info/METADATA +335 -0
  334. aiagents4pharma-0.0.0.dist-info/RECORD +336 -0
  335. aiagents4pharma-0.0.0.dist-info/WHEEL +4 -0
  336. aiagents4pharma-0.0.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,313 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ BioRxiv paper downloader implementation.
4
+ """
5
+
6
+ import logging
7
+ import re
8
+ import tempfile
9
+ from typing import Any
10
+
11
+ import cloudscraper
12
+ import requests
13
+
14
+ from .base_paper_downloader import BasePaperDownloader
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ class BiorxivDownloader(BasePaperDownloader):
20
+ """BioRxiv-specific implementation of paper downloader."""
21
+
22
+ def __init__(self, config: Any):
23
+ """Initialize BioRxiv downloader with configuration."""
24
+ super().__init__(config)
25
+ self.api_url = config.api_url
26
+ self.pdf_base_url = getattr(
27
+ config, "pdf_base_url", "https://www.biorxiv.org/content/10.1101/"
28
+ )
29
+ self.landing_url_template = getattr(
30
+ config,
31
+ "landing_url_template",
32
+ "https://www.biorxiv.org/content/{doi}v{version}",
33
+ )
34
+ self.pdf_url_template = getattr(
35
+ config,
36
+ "pdf_url_template",
37
+ "https://www.biorxiv.org/content/{doi}v{version}.full.pdf",
38
+ )
39
+
40
+ # Default values
41
+ self.default_version = getattr(config, "default_version", "1")
42
+
43
+ # CloudScraper specific settings
44
+ self.cf_clearance_timeout = getattr(config, "cf_clearance_timeout", 30)
45
+ self.session_reuse = getattr(config, "session_reuse", True)
46
+ self.browser_config_type = getattr(config, "browser_config", {}).get("type", "custom")
47
+
48
+ # Initialize shared CloudScraper session if enabled
49
+ self._scraper = None
50
+ if self.session_reuse:
51
+ self._scraper = cloudscraper.create_scraper(
52
+ browser={self.browser_config_type: self.user_agent},
53
+ delay=self.cf_clearance_timeout,
54
+ )
55
+
56
+ def fetch_metadata(self, identifier: str) -> dict[str, Any]:
57
+ """
58
+ Fetch paper metadata from bioRxiv API.
59
+
60
+ Args:
61
+ identifier: DOI (e.g., '10.1101/2020.09.09.20191205')
62
+
63
+ Returns:
64
+ JSON response as dictionary from bioRxiv API
65
+
66
+ Raises:
67
+ requests.RequestException: If API call fails
68
+ RuntimeError: If no collection data found in response
69
+ """
70
+ query_url = f"{self.api_url}/biorxiv/{identifier}/na/json"
71
+ logger.info("Fetching metadata for DOI %s from: %s", identifier, query_url)
72
+
73
+ # Use CloudScraper for metadata as well, in case API is behind CF protection
74
+ scraper = self._scraper or cloudscraper.create_scraper(
75
+ browser={self.browser_config_type: self.user_agent},
76
+ delay=self.cf_clearance_timeout,
77
+ )
78
+
79
+ response = scraper.get(query_url, timeout=self.request_timeout)
80
+ response.raise_for_status()
81
+
82
+ paper_data = response.json()
83
+
84
+ if "collection" not in paper_data or not paper_data["collection"]:
85
+ raise RuntimeError("No collection data found in bioRxiv API response")
86
+
87
+ return paper_data
88
+
89
+ def construct_pdf_url(self, metadata: dict[str, Any], identifier: str) -> str:
90
+ """
91
+ Construct PDF URL from bioRxiv metadata and DOI.
92
+
93
+ Args:
94
+ metadata: JSON response from bioRxiv API
95
+ identifier: DOI
96
+
97
+ Returns:
98
+ Constructed PDF URL string
99
+ """
100
+ if "collection" not in metadata or not metadata["collection"]:
101
+ return ""
102
+
103
+ paper = metadata["collection"][0] # Get first (and should be only) paper
104
+ version = paper.get("version", self.default_version)
105
+
106
+ # Construct bioRxiv PDF URL using template
107
+ pdf_url = self.pdf_url_template.format(doi=identifier, version=version)
108
+ logger.info("Constructed PDF URL for DOI %s: %s", identifier, pdf_url)
109
+
110
+ return pdf_url
111
+
112
+ def download_pdf_to_temp(self, pdf_url: str, identifier: str) -> tuple[str, str] | None:
113
+ """
114
+ Override base method to use CloudScraper for bioRxiv PDF downloads.
115
+ Includes landing page visit to handle CloudFlare protection.
116
+
117
+ Args:
118
+ pdf_url: URL to download PDF from
119
+ identifier: DOI for logging
120
+
121
+ Returns:
122
+ Tuple of (temp_file_path, filename) or None if failed
123
+ """
124
+ if not pdf_url:
125
+ logger.info("No PDF URL available for DOI %s", identifier)
126
+ return None
127
+
128
+ try:
129
+ logger.info("Downloading PDF for DOI %s from %s", identifier, pdf_url)
130
+
131
+ # Get scraper and visit landing page if needed
132
+ scraper = self._get_scraper()
133
+ self._visit_landing_page(scraper, pdf_url, identifier)
134
+
135
+ # Download and save PDF
136
+ response = scraper.get(pdf_url, timeout=self.request_timeout, stream=True)
137
+ response.raise_for_status()
138
+
139
+ temp_file_path = self._save_pdf_to_temp(response)
140
+ filename = self._extract_filename(response, identifier)
141
+
142
+ return temp_file_path, filename
143
+
144
+ except requests.RequestException as e:
145
+ logger.error("Failed to download PDF for DOI %s: %s", identifier, e)
146
+ return None
147
+
148
+ def _get_scraper(self):
149
+ """Get or create CloudScraper instance."""
150
+ return self._scraper or cloudscraper.create_scraper(
151
+ browser={self.browser_config_type: self.user_agent},
152
+ delay=self.cf_clearance_timeout,
153
+ )
154
+
155
+ def _visit_landing_page(self, scraper, pdf_url: str, identifier: str) -> None:
156
+ """Visit landing page to handle CloudFlare protection."""
157
+ if ".full.pdf" in pdf_url:
158
+ landing_url = pdf_url.replace(".full.pdf", "")
159
+ logger.info("Visiting landing page first: %s", landing_url)
160
+
161
+ landing_response = scraper.get(landing_url, timeout=self.request_timeout)
162
+ landing_response.raise_for_status()
163
+ logger.info("Successfully accessed landing page for %s", identifier)
164
+
165
+ def _save_pdf_to_temp(self, response) -> str:
166
+ """Save PDF response to temporary file."""
167
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
168
+ for chunk in response.iter_content(chunk_size=self.chunk_size):
169
+ if chunk: # Filter out keep-alive chunks
170
+ temp_file.write(chunk)
171
+ temp_file_path = temp_file.name
172
+
173
+ logger.info("BioRxiv PDF downloaded to temporary file: %s", temp_file_path)
174
+ return temp_file_path
175
+
176
+ def _extract_filename(self, response, identifier: str) -> str:
177
+ """Extract filename from response headers or generate default."""
178
+ filename = self.get_default_filename(identifier)
179
+
180
+ content_disposition = response.headers.get("Content-Disposition", "")
181
+ if "filename=" in content_disposition:
182
+ try:
183
+ filename_match = re.search(
184
+ r'filename[*]?=(?:"([^"]+)"|([^;]+))', content_disposition
185
+ )
186
+ if filename_match:
187
+ extracted_filename = filename_match.group(1) or filename_match.group(2)
188
+ extracted_filename = extracted_filename.strip().strip('"')
189
+ if extracted_filename and extracted_filename.endswith(".pdf"):
190
+ filename = extracted_filename
191
+ logger.info("Extracted filename from header: %s", filename)
192
+ except requests.RequestException as e:
193
+ logger.warning("Failed to extract filename from header: %s", e)
194
+
195
+ return filename
196
+
197
+ def extract_paper_metadata(
198
+ self,
199
+ metadata: dict[str, Any],
200
+ identifier: str,
201
+ pdf_result: tuple[str, str] | None,
202
+ ) -> dict[str, Any]:
203
+ """
204
+ Extract structured metadata from bioRxiv API response.
205
+
206
+ Args:
207
+ metadata: JSON response from bioRxiv API
208
+ identifier: DOI
209
+ pdf_result: Tuple of (temp_file_path, filename) if PDF downloaded
210
+
211
+ Returns:
212
+ Standardized paper metadata dictionary
213
+ """
214
+ if "collection" not in metadata or not metadata["collection"]:
215
+ raise RuntimeError("No collection data found in metadata")
216
+
217
+ paper = metadata["collection"][0] # Get first (and should be only) paper
218
+
219
+ # Extract basic metadata
220
+ basic_metadata = self._extract_basic_metadata(paper, identifier)
221
+
222
+ # Handle PDF download results
223
+ pdf_metadata = self._extract_pdf_metadata(pdf_result, identifier)
224
+
225
+ # Combine all metadata
226
+ return {
227
+ **basic_metadata,
228
+ **pdf_metadata,
229
+ }
230
+
231
+ def _extract_basic_metadata(self, paper: dict[str, Any], identifier: str) -> dict[str, Any]:
232
+ """Extract basic metadata from paper data."""
233
+ # Extract basic fields
234
+ title = paper.get("title", "N/A").strip()
235
+ abstract = paper.get("abstract", "N/A").strip()
236
+ pub_date = paper.get("date", "N/A").strip()
237
+ category = paper.get("category", "N/A").strip()
238
+ version = paper.get("version", "N/A")
239
+
240
+ # Extract authors - typically in a semicolon-separated string
241
+ authors = self._extract_authors(paper.get("authors", ""))
242
+
243
+ return {
244
+ "Title": title,
245
+ "Authors": authors,
246
+ "Abstract": abstract,
247
+ "Publication Date": pub_date,
248
+ "DOI": identifier,
249
+ "Category": category,
250
+ "Version": version,
251
+ "source": "biorxiv",
252
+ "server": "biorxiv",
253
+ }
254
+
255
+ def _extract_authors(self, authors_str: str) -> list:
256
+ """Extract and clean authors from semicolon-separated string."""
257
+ if not authors_str:
258
+ return []
259
+ return [author.strip() for author in authors_str.split(";") if author.strip()]
260
+
261
+ def _extract_pdf_metadata(
262
+ self, pdf_result: tuple[str, str] | None, identifier: str
263
+ ) -> dict[str, Any]:
264
+ """Extract PDF-related metadata."""
265
+ if pdf_result:
266
+ temp_file_path, filename = pdf_result
267
+ return {
268
+ "URL": temp_file_path,
269
+ "pdf_url": temp_file_path,
270
+ "filename": filename,
271
+ "access_type": "open_access_downloaded",
272
+ "temp_file_path": temp_file_path,
273
+ }
274
+
275
+ return {
276
+ "URL": "",
277
+ "pdf_url": "",
278
+ "filename": self.get_default_filename(identifier),
279
+ "access_type": "download_failed",
280
+ "temp_file_path": "",
281
+ }
282
+
283
+ def get_service_name(self) -> str:
284
+ """Return service name."""
285
+ return "bioRxiv"
286
+
287
+ def get_identifier_name(self) -> str:
288
+ """Return identifier display name."""
289
+ return "DOI"
290
+
291
+ def get_default_filename(self, identifier: str) -> str:
292
+ """Generate default filename for bioRxiv paper."""
293
+ # Sanitize DOI for filename use
294
+ return f"{identifier.replace('/', '_').replace('.', '_')}.pdf"
295
+
296
+ def _get_paper_identifier_info(self, paper: dict[str, Any]) -> str:
297
+ """Get bioRxiv-specific identifier info for paper summary."""
298
+ doi = paper.get("DOI", "N/A")
299
+ pub_date = paper.get("Publication Date", "N/A")
300
+ category = paper.get("Category", "N/A")
301
+
302
+ info = f" (DOI:{doi}, {pub_date})"
303
+ if category != "N/A":
304
+ info += f"\n Category: {category}"
305
+
306
+ return info
307
+
308
+ def _add_service_identifier(self, entry: dict[str, Any], identifier: str) -> None:
309
+ """Add DOI and bioRxiv-specific fields to entry."""
310
+ entry["DOI"] = identifier
311
+ entry["Category"] = "N/A"
312
+ entry["Version"] = "N/A"
313
+ entry["server"] = "biorxiv"
@@ -0,0 +1,196 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ MedRxiv paper downloader implementation.
4
+ """
5
+
6
+ import logging
7
+ from typing import Any
8
+
9
+ import requests
10
+
11
+ from .base_paper_downloader import BasePaperDownloader
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ class MedrxivDownloader(BasePaperDownloader):
17
+ """MedRxiv-specific implementation of paper downloader."""
18
+
19
+ def __init__(self, config: Any):
20
+ """Initialize MedRxiv downloader with configuration."""
21
+ super().__init__(config)
22
+ self.api_url = config.api_url
23
+ self.pdf_url_template = getattr(
24
+ config,
25
+ "pdf_url_template",
26
+ "https://www.medrxiv.org/content/{identifier}v{version}.full.pdf",
27
+ )
28
+ self.default_version = getattr(config, "default_version", "1")
29
+
30
+ def fetch_metadata(self, identifier: str) -> dict[str, Any]:
31
+ """
32
+ Fetch paper metadata from medRxiv API.
33
+
34
+ Args:
35
+ identifier: DOI (e.g., '10.1101/2020.09.09.20191205')
36
+
37
+ Returns:
38
+ JSON response as dictionary from medRxiv API
39
+
40
+ Raises:
41
+ requests.RequestException: If API call fails
42
+ RuntimeError: If no collection data found in response
43
+ """
44
+ query_url = f"{self.api_url}/medrxiv/{identifier}/na/json"
45
+ logger.info("Fetching metadata for DOI %s from: %s", identifier, query_url)
46
+
47
+ response = requests.get(query_url, timeout=self.request_timeout)
48
+ response.raise_for_status()
49
+
50
+ paper_data = response.json()
51
+
52
+ if "collection" not in paper_data or not paper_data["collection"]:
53
+ raise RuntimeError("No collection data found in medRxiv API response")
54
+
55
+ return paper_data
56
+
57
+ def construct_pdf_url(self, metadata: dict[str, Any], identifier: str) -> str:
58
+ """
59
+ Construct PDF URL from medRxiv metadata and DOI.
60
+
61
+ Args:
62
+ metadata: JSON response from medRxiv API
63
+ identifier: DOI
64
+
65
+ Returns:
66
+ Constructed PDF URL string
67
+ """
68
+ if "collection" not in metadata or not metadata["collection"]:
69
+ return ""
70
+
71
+ paper = metadata["collection"][0] # Get first (and should be only) paper
72
+ version = paper.get("version", self.default_version)
73
+
74
+ # Construct medRxiv PDF URL using template
75
+ pdf_url = self.pdf_url_template.format(identifier=identifier, version=version)
76
+ logger.info("Constructed PDF URL for DOI %s: %s", identifier, pdf_url)
77
+
78
+ return pdf_url
79
+
80
+ def extract_paper_metadata(
81
+ self,
82
+ metadata: dict[str, Any],
83
+ identifier: str,
84
+ pdf_result: tuple[str, str] | None,
85
+ ) -> dict[str, Any]:
86
+ """
87
+ Extract structured metadata from medRxiv API response.
88
+
89
+ Args:
90
+ metadata: JSON response from medRxiv API
91
+ identifier: DOI
92
+ pdf_result: Tuple of (temp_file_path, filename) if PDF downloaded
93
+
94
+ Returns:
95
+ Standardized paper metadata dictionary
96
+ """
97
+ if "collection" not in metadata or not metadata["collection"]:
98
+ raise RuntimeError("No collection data found in metadata")
99
+
100
+ paper = metadata["collection"][0] # Get first (and should be only) paper
101
+
102
+ # Extract basic metadata
103
+ basic_metadata = self._extract_basic_metadata(paper, identifier)
104
+
105
+ # Handle PDF download results
106
+ pdf_metadata = self._extract_pdf_metadata(pdf_result, identifier)
107
+
108
+ # Combine all metadata
109
+ return {
110
+ **basic_metadata,
111
+ **pdf_metadata,
112
+ }
113
+
114
+ def _extract_basic_metadata(self, paper: dict[str, Any], identifier: str) -> dict[str, Any]:
115
+ """Extract basic metadata from paper data."""
116
+ # Extract basic fields
117
+ title = paper.get("title", "N/A").strip()
118
+ abstract = paper.get("abstract", "N/A").strip()
119
+ pub_date = paper.get("date", "N/A").strip()
120
+ category = paper.get("category", "N/A").strip()
121
+ version = paper.get("version", "N/A")
122
+
123
+ # Extract authors - typically in a semicolon-separated string
124
+ authors = self._extract_authors(paper.get("authors", ""))
125
+
126
+ return {
127
+ "Title": title,
128
+ "Authors": authors,
129
+ "Abstract": abstract,
130
+ "Publication Date": pub_date,
131
+ "DOI": identifier,
132
+ "Category": category,
133
+ "Version": version,
134
+ "source": "medrxiv",
135
+ "server": "medrxiv",
136
+ }
137
+
138
+ def _extract_authors(self, authors_str: str) -> list:
139
+ """Extract and clean authors from semicolon-separated string."""
140
+ if not authors_str:
141
+ return []
142
+ return [author.strip() for author in authors_str.split(";") if author.strip()]
143
+
144
+ def _extract_pdf_metadata(
145
+ self, pdf_result: tuple[str, str] | None, identifier: str
146
+ ) -> dict[str, Any]:
147
+ """Extract PDF-related metadata."""
148
+ if pdf_result:
149
+ temp_file_path, filename = pdf_result
150
+ return {
151
+ "URL": temp_file_path,
152
+ "pdf_url": temp_file_path,
153
+ "filename": filename,
154
+ "access_type": "open_access_downloaded",
155
+ "temp_file_path": temp_file_path,
156
+ }
157
+
158
+ return {
159
+ "URL": "",
160
+ "pdf_url": "",
161
+ "filename": self.get_default_filename(identifier),
162
+ "access_type": "download_failed",
163
+ "temp_file_path": "",
164
+ }
165
+
166
+ def get_service_name(self) -> str:
167
+ """Return service name."""
168
+ return "medRxiv"
169
+
170
+ def get_identifier_name(self) -> str:
171
+ """Return identifier display name."""
172
+ return "DOI"
173
+
174
+ def get_default_filename(self, identifier: str) -> str:
175
+ """Generate default filename for medRxiv paper."""
176
+ # Sanitize DOI for filename use
177
+ return f"{identifier.replace('/', '_').replace('.', '_')}.pdf"
178
+
179
+ def _get_paper_identifier_info(self, paper: dict[str, Any]) -> str:
180
+ """Get medRxiv-specific identifier info for paper summary."""
181
+ doi = paper.get("DOI", "N/A")
182
+ pub_date = paper.get("Publication Date", "N/A")
183
+ category = paper.get("Category", "N/A")
184
+
185
+ info = f" (DOI:{doi}, {pub_date})"
186
+ if category != "N/A":
187
+ info += f"\n Category: {category}"
188
+
189
+ return info
190
+
191
+ def _add_service_identifier(self, entry: dict[str, Any], identifier: str) -> None:
192
+ """Add DOI and medRxiv-specific fields to entry."""
193
+ entry["DOI"] = identifier
194
+ entry["Category"] = "N/A"
195
+ entry["Version"] = "N/A"
196
+ entry["server"] = "medrxiv"