aiagents4pharma 1.44.0__py3-none-any.whl → 1.45.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (289) hide show
  1. aiagents4pharma/__init__.py +2 -2
  2. aiagents4pharma/talk2aiagents4pharma/.dockerignore +13 -0
  3. aiagents4pharma/talk2aiagents4pharma/Dockerfile +105 -0
  4. aiagents4pharma/talk2aiagents4pharma/README.md +1 -0
  5. aiagents4pharma/talk2aiagents4pharma/__init__.py +4 -5
  6. aiagents4pharma/talk2aiagents4pharma/agents/__init__.py +3 -2
  7. aiagents4pharma/talk2aiagents4pharma/agents/main_agent.py +24 -23
  8. aiagents4pharma/talk2aiagents4pharma/configs/__init__.py +2 -2
  9. aiagents4pharma/talk2aiagents4pharma/configs/agents/__init__.py +2 -2
  10. aiagents4pharma/talk2aiagents4pharma/configs/agents/main_agent/default.yaml +2 -2
  11. aiagents4pharma/talk2aiagents4pharma/configs/config.yaml +1 -1
  12. aiagents4pharma/talk2aiagents4pharma/docker-compose/cpu/.env.example +23 -0
  13. aiagents4pharma/talk2aiagents4pharma/docker-compose/cpu/docker-compose.yml +93 -0
  14. aiagents4pharma/talk2aiagents4pharma/docker-compose/gpu/.env.example +23 -0
  15. aiagents4pharma/talk2aiagents4pharma/docker-compose/gpu/docker-compose.yml +108 -0
  16. aiagents4pharma/talk2aiagents4pharma/install.md +127 -0
  17. aiagents4pharma/talk2aiagents4pharma/states/__init__.py +3 -2
  18. aiagents4pharma/talk2aiagents4pharma/states/state_talk2aiagents4pharma.py +5 -3
  19. aiagents4pharma/talk2aiagents4pharma/tests/__init__.py +2 -2
  20. aiagents4pharma/talk2aiagents4pharma/tests/test_main_agent.py +72 -50
  21. aiagents4pharma/talk2biomodels/.dockerignore +13 -0
  22. aiagents4pharma/talk2biomodels/Dockerfile +104 -0
  23. aiagents4pharma/talk2biomodels/README.md +1 -0
  24. aiagents4pharma/talk2biomodels/__init__.py +4 -8
  25. aiagents4pharma/talk2biomodels/agents/__init__.py +3 -2
  26. aiagents4pharma/talk2biomodels/agents/t2b_agent.py +47 -42
  27. aiagents4pharma/talk2biomodels/api/__init__.py +4 -5
  28. aiagents4pharma/talk2biomodels/api/kegg.py +14 -10
  29. aiagents4pharma/talk2biomodels/api/ols.py +13 -10
  30. aiagents4pharma/talk2biomodels/api/uniprot.py +7 -6
  31. aiagents4pharma/talk2biomodels/configs/__init__.py +3 -4
  32. aiagents4pharma/talk2biomodels/configs/agents/__init__.py +2 -2
  33. aiagents4pharma/talk2biomodels/configs/agents/t2b_agent/__init__.py +2 -2
  34. aiagents4pharma/talk2biomodels/configs/agents/t2b_agent/default.yaml +1 -1
  35. aiagents4pharma/talk2biomodels/configs/config.yaml +1 -1
  36. aiagents4pharma/talk2biomodels/configs/tools/__init__.py +4 -5
  37. aiagents4pharma/talk2biomodels/configs/tools/ask_question/__init__.py +2 -2
  38. aiagents4pharma/talk2biomodels/configs/tools/ask_question/default.yaml +1 -2
  39. aiagents4pharma/talk2biomodels/configs/tools/custom_plotter/__init__.py +2 -2
  40. aiagents4pharma/talk2biomodels/configs/tools/custom_plotter/default.yaml +1 -1
  41. aiagents4pharma/talk2biomodels/configs/tools/get_annotation/__init__.py +2 -2
  42. aiagents4pharma/talk2biomodels/configs/tools/get_annotation/default.yaml +1 -1
  43. aiagents4pharma/talk2biomodels/install.md +63 -0
  44. aiagents4pharma/talk2biomodels/models/__init__.py +4 -4
  45. aiagents4pharma/talk2biomodels/models/basico_model.py +36 -28
  46. aiagents4pharma/talk2biomodels/models/sys_bio_model.py +13 -10
  47. aiagents4pharma/talk2biomodels/states/__init__.py +3 -2
  48. aiagents4pharma/talk2biomodels/states/state_talk2biomodels.py +12 -8
  49. aiagents4pharma/talk2biomodels/tests/BIOMD0000000449_url.xml +1585 -0
  50. aiagents4pharma/talk2biomodels/tests/__init__.py +2 -2
  51. aiagents4pharma/talk2biomodels/tests/article_on_model_537.pdf +0 -0
  52. aiagents4pharma/talk2biomodels/tests/test_api.py +18 -14
  53. aiagents4pharma/talk2biomodels/tests/test_ask_question.py +8 -9
  54. aiagents4pharma/talk2biomodels/tests/test_basico_model.py +15 -9
  55. aiagents4pharma/talk2biomodels/tests/test_get_annotation.py +54 -55
  56. aiagents4pharma/talk2biomodels/tests/test_getmodelinfo.py +28 -27
  57. aiagents4pharma/talk2biomodels/tests/test_integration.py +21 -33
  58. aiagents4pharma/talk2biomodels/tests/test_load_biomodel.py +14 -11
  59. aiagents4pharma/talk2biomodels/tests/test_param_scan.py +21 -20
  60. aiagents4pharma/talk2biomodels/tests/test_query_article.py +129 -29
  61. aiagents4pharma/talk2biomodels/tests/test_search_models.py +9 -13
  62. aiagents4pharma/talk2biomodels/tests/test_simulate_model.py +16 -15
  63. aiagents4pharma/talk2biomodels/tests/test_steady_state.py +12 -22
  64. aiagents4pharma/talk2biomodels/tests/test_sys_bio_model.py +33 -29
  65. aiagents4pharma/talk2biomodels/tools/__init__.py +15 -12
  66. aiagents4pharma/talk2biomodels/tools/ask_question.py +42 -32
  67. aiagents4pharma/talk2biomodels/tools/custom_plotter.py +51 -43
  68. aiagents4pharma/talk2biomodels/tools/get_annotation.py +99 -75
  69. aiagents4pharma/talk2biomodels/tools/get_modelinfo.py +57 -51
  70. aiagents4pharma/talk2biomodels/tools/load_arguments.py +52 -32
  71. aiagents4pharma/talk2biomodels/tools/load_biomodel.py +8 -2
  72. aiagents4pharma/talk2biomodels/tools/parameter_scan.py +107 -90
  73. aiagents4pharma/talk2biomodels/tools/query_article.py +14 -13
  74. aiagents4pharma/talk2biomodels/tools/search_models.py +37 -26
  75. aiagents4pharma/talk2biomodels/tools/simulate_model.py +47 -37
  76. aiagents4pharma/talk2biomodels/tools/steady_state.py +76 -58
  77. aiagents4pharma/talk2biomodels/tools/utils.py +4 -3
  78. aiagents4pharma/talk2cells/README.md +1 -0
  79. aiagents4pharma/talk2cells/__init__.py +4 -5
  80. aiagents4pharma/talk2cells/agents/__init__.py +3 -2
  81. aiagents4pharma/talk2cells/agents/scp_agent.py +21 -19
  82. aiagents4pharma/talk2cells/states/__init__.py +3 -2
  83. aiagents4pharma/talk2cells/states/state_talk2cells.py +4 -2
  84. aiagents4pharma/talk2cells/tests/scp_agent/test_scp_agent.py +8 -9
  85. aiagents4pharma/talk2cells/tools/__init__.py +3 -2
  86. aiagents4pharma/talk2cells/tools/scp_agent/__init__.py +4 -4
  87. aiagents4pharma/talk2cells/tools/scp_agent/display_studies.py +5 -3
  88. aiagents4pharma/talk2cells/tools/scp_agent/search_studies.py +21 -22
  89. aiagents4pharma/talk2knowledgegraphs/.dockerignore +13 -0
  90. aiagents4pharma/talk2knowledgegraphs/Dockerfile +103 -0
  91. aiagents4pharma/talk2knowledgegraphs/README.md +1 -0
  92. aiagents4pharma/talk2knowledgegraphs/__init__.py +4 -7
  93. aiagents4pharma/talk2knowledgegraphs/agents/__init__.py +3 -2
  94. aiagents4pharma/talk2knowledgegraphs/agents/t2kg_agent.py +40 -30
  95. aiagents4pharma/talk2knowledgegraphs/configs/__init__.py +3 -6
  96. aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/__init__.py +2 -2
  97. aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/default.yaml +8 -8
  98. aiagents4pharma/talk2knowledgegraphs/configs/app/__init__.py +3 -2
  99. aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/__init__.py +2 -2
  100. aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/default.yaml +1 -1
  101. aiagents4pharma/talk2knowledgegraphs/configs/config.yaml +1 -1
  102. aiagents4pharma/talk2knowledgegraphs/configs/tools/__init__.py +4 -5
  103. aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/__init__.py +2 -2
  104. aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/default.yaml +1 -1
  105. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/__init__.py +2 -2
  106. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/default.yaml +1 -1
  107. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/__init__.py +2 -2
  108. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/default.yaml +1 -1
  109. aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/ols_terms/default.yaml +1 -1
  110. aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/reactome_pathways/default.yaml +1 -1
  111. aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/uniprot_proteins/default.yaml +1 -1
  112. aiagents4pharma/talk2knowledgegraphs/configs/utils/pubchem_utils/default.yaml +1 -1
  113. aiagents4pharma/talk2knowledgegraphs/datasets/__init__.py +4 -6
  114. aiagents4pharma/talk2knowledgegraphs/datasets/biobridge_primekg.py +115 -67
  115. aiagents4pharma/talk2knowledgegraphs/datasets/dataset.py +2 -0
  116. aiagents4pharma/talk2knowledgegraphs/datasets/primekg.py +35 -24
  117. aiagents4pharma/talk2knowledgegraphs/datasets/starkqa_primekg.py +29 -21
  118. aiagents4pharma/talk2knowledgegraphs/docker-compose/cpu/.env.example +23 -0
  119. aiagents4pharma/talk2knowledgegraphs/docker-compose/cpu/docker-compose.yml +93 -0
  120. aiagents4pharma/talk2knowledgegraphs/docker-compose/gpu/.env.example +23 -0
  121. aiagents4pharma/talk2knowledgegraphs/docker-compose/gpu/docker-compose.yml +108 -0
  122. aiagents4pharma/talk2knowledgegraphs/entrypoint.sh +190 -0
  123. aiagents4pharma/talk2knowledgegraphs/install.md +140 -0
  124. aiagents4pharma/talk2knowledgegraphs/milvus_data_dump.py +31 -65
  125. aiagents4pharma/talk2knowledgegraphs/states/__init__.py +3 -2
  126. aiagents4pharma/talk2knowledgegraphs/states/state_talk2knowledgegraphs.py +1 -0
  127. aiagents4pharma/talk2knowledgegraphs/tests/test_agents_t2kg_agent.py +65 -40
  128. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_biobridge_primekg.py +54 -48
  129. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_dataset.py +4 -0
  130. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_primekg.py +17 -4
  131. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_starkqa_primekg.py +33 -24
  132. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_graphrag_reasoning.py +116 -69
  133. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_milvus_multimodal_subgraph_extraction.py +334 -216
  134. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_multimodal_subgraph_extraction.py +22 -15
  135. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_extraction.py +19 -12
  136. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_summarization.py +95 -48
  137. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_embeddings.py +4 -0
  138. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_huggingface.py +5 -0
  139. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_nim_molmim.py +13 -18
  140. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_ollama.py +10 -3
  141. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_enrichments.py +4 -3
  142. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ollama.py +3 -2
  143. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ols.py +1 -0
  144. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_pubchem.py +9 -4
  145. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_reactome.py +6 -6
  146. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_uniprot.py +4 -0
  147. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_extractions_milvus_multimodal_pcst.py +160 -97
  148. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_kg_utils.py +3 -4
  149. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_pubchem_utils.py +87 -13
  150. aiagents4pharma/talk2knowledgegraphs/tools/__init__.py +10 -7
  151. aiagents4pharma/talk2knowledgegraphs/tools/graphrag_reasoning.py +15 -20
  152. aiagents4pharma/talk2knowledgegraphs/tools/milvus_multimodal_subgraph_extraction.py +145 -142
  153. aiagents4pharma/talk2knowledgegraphs/tools/multimodal_subgraph_extraction.py +92 -90
  154. aiagents4pharma/talk2knowledgegraphs/tools/subgraph_extraction.py +25 -37
  155. aiagents4pharma/talk2knowledgegraphs/tools/subgraph_summarization.py +10 -13
  156. aiagents4pharma/talk2knowledgegraphs/utils/__init__.py +4 -7
  157. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/__init__.py +4 -7
  158. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/embeddings.py +4 -0
  159. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/huggingface.py +11 -14
  160. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/nim_molmim.py +7 -7
  161. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/ollama.py +12 -6
  162. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/sentence_transformer.py +8 -6
  163. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/__init__.py +9 -6
  164. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/enrichments.py +1 -0
  165. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ollama.py +15 -9
  166. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ols_terms.py +23 -20
  167. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/pubchem_strings.py +12 -10
  168. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/reactome_pathways.py +16 -10
  169. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/uniprot_proteins.py +26 -18
  170. aiagents4pharma/talk2knowledgegraphs/utils/extractions/__init__.py +4 -5
  171. aiagents4pharma/talk2knowledgegraphs/utils/extractions/milvus_multimodal_pcst.py +14 -34
  172. aiagents4pharma/talk2knowledgegraphs/utils/extractions/multimodal_pcst.py +53 -47
  173. aiagents4pharma/talk2knowledgegraphs/utils/extractions/pcst.py +18 -14
  174. aiagents4pharma/talk2knowledgegraphs/utils/kg_utils.py +22 -23
  175. aiagents4pharma/talk2knowledgegraphs/utils/pubchem_utils.py +11 -10
  176. aiagents4pharma/talk2scholars/.dockerignore +13 -0
  177. aiagents4pharma/talk2scholars/Dockerfile +104 -0
  178. aiagents4pharma/talk2scholars/README.md +1 -0
  179. aiagents4pharma/talk2scholars/agents/__init__.py +1 -5
  180. aiagents4pharma/talk2scholars/agents/main_agent.py +6 -4
  181. aiagents4pharma/talk2scholars/agents/paper_download_agent.py +5 -4
  182. aiagents4pharma/talk2scholars/agents/pdf_agent.py +4 -2
  183. aiagents4pharma/talk2scholars/agents/s2_agent.py +2 -2
  184. aiagents4pharma/talk2scholars/agents/zotero_agent.py +10 -11
  185. aiagents4pharma/talk2scholars/configs/__init__.py +1 -3
  186. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/__init__.py +1 -4
  187. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml +1 -1
  188. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/pdf_agent/default.yaml +1 -1
  189. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml +8 -8
  190. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/default.yaml +7 -7
  191. aiagents4pharma/talk2scholars/configs/tools/__init__.py +8 -6
  192. aiagents4pharma/talk2scholars/docker-compose/cpu/.env.example +21 -0
  193. aiagents4pharma/talk2scholars/docker-compose/cpu/docker-compose.yml +90 -0
  194. aiagents4pharma/talk2scholars/docker-compose/gpu/.env.example +21 -0
  195. aiagents4pharma/talk2scholars/docker-compose/gpu/docker-compose.yml +105 -0
  196. aiagents4pharma/talk2scholars/install.md +122 -0
  197. aiagents4pharma/talk2scholars/state/state_talk2scholars.py +8 -8
  198. aiagents4pharma/talk2scholars/tests/{test_main_agent.py → test_agents_main_agent.py} +41 -23
  199. aiagents4pharma/talk2scholars/tests/{test_paper_download_agent.py → test_agents_paper_agents_download_agent.py} +10 -16
  200. aiagents4pharma/talk2scholars/tests/{test_pdf_agent.py → test_agents_pdf_agent.py} +6 -10
  201. aiagents4pharma/talk2scholars/tests/{test_s2_agent.py → test_agents_s2_agent.py} +8 -16
  202. aiagents4pharma/talk2scholars/tests/{test_zotero_agent.py → test_agents_zotero_agent.py} +5 -7
  203. aiagents4pharma/talk2scholars/tests/{test_s2_display_dataframe.py → test_s2_tools_display_dataframe.py} +6 -7
  204. aiagents4pharma/talk2scholars/tests/{test_s2_query_dataframe.py → test_s2_tools_query_dataframe.py} +5 -15
  205. aiagents4pharma/talk2scholars/tests/{test_paper_downloader.py → test_tools_paper_downloader.py} +25 -63
  206. aiagents4pharma/talk2scholars/tests/{test_question_and_answer_tool.py → test_tools_question_and_answer_tool.py} +2 -6
  207. aiagents4pharma/talk2scholars/tests/{test_s2_multi.py → test_tools_s2_multi.py} +5 -5
  208. aiagents4pharma/talk2scholars/tests/{test_s2_retrieve.py → test_tools_s2_retrieve.py} +2 -1
  209. aiagents4pharma/talk2scholars/tests/{test_s2_search.py → test_tools_s2_search.py} +5 -5
  210. aiagents4pharma/talk2scholars/tests/{test_s2_single.py → test_tools_s2_single.py} +5 -5
  211. aiagents4pharma/talk2scholars/tests/{test_arxiv_downloader.py → test_utils_arxiv_downloader.py} +16 -25
  212. aiagents4pharma/talk2scholars/tests/{test_base_paper_downloader.py → test_utils_base_paper_downloader.py} +25 -47
  213. aiagents4pharma/talk2scholars/tests/{test_biorxiv_downloader.py → test_utils_biorxiv_downloader.py} +14 -42
  214. aiagents4pharma/talk2scholars/tests/{test_medrxiv_downloader.py → test_utils_medrxiv_downloader.py} +15 -49
  215. aiagents4pharma/talk2scholars/tests/{test_nvidia_nim_reranker.py → test_utils_nvidia_nim_reranker.py} +6 -16
  216. aiagents4pharma/talk2scholars/tests/{test_pdf_answer_formatter.py → test_utils_pdf_answer_formatter.py} +1 -0
  217. aiagents4pharma/talk2scholars/tests/{test_pdf_batch_processor.py → test_utils_pdf_batch_processor.py} +6 -15
  218. aiagents4pharma/talk2scholars/tests/{test_pdf_collection_manager.py → test_utils_pdf_collection_manager.py} +34 -11
  219. aiagents4pharma/talk2scholars/tests/{test_pdf_document_processor.py → test_utils_pdf_document_processor.py} +2 -3
  220. aiagents4pharma/talk2scholars/tests/{test_pdf_generate_answer.py → test_utils_pdf_generate_answer.py} +3 -6
  221. aiagents4pharma/talk2scholars/tests/{test_pdf_gpu_detection.py → test_utils_pdf_gpu_detection.py} +5 -16
  222. aiagents4pharma/talk2scholars/tests/{test_pdf_rag_pipeline.py → test_utils_pdf_rag_pipeline.py} +7 -17
  223. aiagents4pharma/talk2scholars/tests/{test_pdf_retrieve_chunks.py → test_utils_pdf_retrieve_chunks.py} +4 -11
  224. aiagents4pharma/talk2scholars/tests/{test_pdf_singleton_manager.py → test_utils_pdf_singleton_manager.py} +26 -23
  225. aiagents4pharma/talk2scholars/tests/{test_pdf_vector_normalization.py → test_utils_pdf_vector_normalization.py} +1 -1
  226. aiagents4pharma/talk2scholars/tests/{test_pdf_vector_store.py → test_utils_pdf_vector_store.py} +27 -55
  227. aiagents4pharma/talk2scholars/tests/{test_pubmed_downloader.py → test_utils_pubmed_downloader.py} +31 -91
  228. aiagents4pharma/talk2scholars/tests/{test_read_helper_utils.py → test_utils_read_helper_utils.py} +2 -6
  229. aiagents4pharma/talk2scholars/tests/{test_s2_utils_ext_ids.py → test_utils_s2_utils_ext_ids.py} +5 -15
  230. aiagents4pharma/talk2scholars/tests/{test_zotero_human_in_the_loop.py → test_utils_zotero_human_in_the_loop.py} +6 -13
  231. aiagents4pharma/talk2scholars/tests/{test_zotero_path.py → test_utils_zotero_path.py} +53 -45
  232. aiagents4pharma/talk2scholars/tests/{test_zotero_read.py → test_utils_zotero_read.py} +30 -91
  233. aiagents4pharma/talk2scholars/tests/{test_zotero_write.py → test_utils_zotero_write.py} +6 -16
  234. aiagents4pharma/talk2scholars/tools/__init__.py +1 -4
  235. aiagents4pharma/talk2scholars/tools/paper_download/paper_downloader.py +20 -35
  236. aiagents4pharma/talk2scholars/tools/paper_download/utils/__init__.py +7 -5
  237. aiagents4pharma/talk2scholars/tools/paper_download/utils/arxiv_downloader.py +9 -11
  238. aiagents4pharma/talk2scholars/tools/paper_download/utils/base_paper_downloader.py +14 -21
  239. aiagents4pharma/talk2scholars/tools/paper_download/utils/biorxiv_downloader.py +14 -22
  240. aiagents4pharma/talk2scholars/tools/paper_download/utils/medrxiv_downloader.py +11 -13
  241. aiagents4pharma/talk2scholars/tools/paper_download/utils/pubmed_downloader.py +14 -28
  242. aiagents4pharma/talk2scholars/tools/pdf/question_and_answer.py +4 -8
  243. aiagents4pharma/talk2scholars/tools/pdf/utils/__init__.py +16 -14
  244. aiagents4pharma/talk2scholars/tools/pdf/utils/answer_formatter.py +4 -4
  245. aiagents4pharma/talk2scholars/tools/pdf/utils/batch_processor.py +15 -17
  246. aiagents4pharma/talk2scholars/tools/pdf/utils/collection_manager.py +2 -2
  247. aiagents4pharma/talk2scholars/tools/pdf/utils/document_processor.py +5 -5
  248. aiagents4pharma/talk2scholars/tools/pdf/utils/generate_answer.py +4 -4
  249. aiagents4pharma/talk2scholars/tools/pdf/utils/get_vectorstore.py +2 -6
  250. aiagents4pharma/talk2scholars/tools/pdf/utils/gpu_detection.py +5 -9
  251. aiagents4pharma/talk2scholars/tools/pdf/utils/nvidia_nim_reranker.py +4 -4
  252. aiagents4pharma/talk2scholars/tools/pdf/utils/paper_loader.py +2 -2
  253. aiagents4pharma/talk2scholars/tools/pdf/utils/rag_pipeline.py +6 -15
  254. aiagents4pharma/talk2scholars/tools/pdf/utils/retrieve_chunks.py +7 -15
  255. aiagents4pharma/talk2scholars/tools/pdf/utils/singleton_manager.py +2 -2
  256. aiagents4pharma/talk2scholars/tools/pdf/utils/tool_helper.py +3 -4
  257. aiagents4pharma/talk2scholars/tools/pdf/utils/vector_normalization.py +8 -17
  258. aiagents4pharma/talk2scholars/tools/pdf/utils/vector_store.py +17 -33
  259. aiagents4pharma/talk2scholars/tools/s2/__init__.py +8 -6
  260. aiagents4pharma/talk2scholars/tools/s2/display_dataframe.py +3 -7
  261. aiagents4pharma/talk2scholars/tools/s2/multi_paper_rec.py +7 -6
  262. aiagents4pharma/talk2scholars/tools/s2/query_dataframe.py +5 -12
  263. aiagents4pharma/talk2scholars/tools/s2/retrieve_semantic_scholar_paper_id.py +2 -4
  264. aiagents4pharma/talk2scholars/tools/s2/search.py +6 -6
  265. aiagents4pharma/talk2scholars/tools/s2/single_paper_rec.py +5 -3
  266. aiagents4pharma/talk2scholars/tools/s2/utils/__init__.py +1 -3
  267. aiagents4pharma/talk2scholars/tools/s2/utils/multi_helper.py +12 -18
  268. aiagents4pharma/talk2scholars/tools/s2/utils/search_helper.py +11 -18
  269. aiagents4pharma/talk2scholars/tools/s2/utils/single_helper.py +11 -16
  270. aiagents4pharma/talk2scholars/tools/zotero/__init__.py +1 -4
  271. aiagents4pharma/talk2scholars/tools/zotero/utils/__init__.py +1 -4
  272. aiagents4pharma/talk2scholars/tools/zotero/utils/read_helper.py +21 -39
  273. aiagents4pharma/talk2scholars/tools/zotero/utils/review_helper.py +2 -6
  274. aiagents4pharma/talk2scholars/tools/zotero/utils/write_helper.py +8 -11
  275. aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_path.py +4 -12
  276. aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_pdf_downloader.py +13 -27
  277. aiagents4pharma/talk2scholars/tools/zotero/zotero_read.py +4 -7
  278. aiagents4pharma/talk2scholars/tools/zotero/zotero_review.py +8 -10
  279. aiagents4pharma/talk2scholars/tools/zotero/zotero_write.py +3 -2
  280. {aiagents4pharma-1.44.0.dist-info → aiagents4pharma-1.45.1.dist-info}/METADATA +115 -51
  281. aiagents4pharma-1.45.1.dist-info/RECORD +324 -0
  282. {aiagents4pharma-1.44.0.dist-info → aiagents4pharma-1.45.1.dist-info}/WHEEL +1 -2
  283. aiagents4pharma-1.44.0.dist-info/RECORD +0 -293
  284. aiagents4pharma-1.44.0.dist-info/top_level.txt +0 -1
  285. /aiagents4pharma/talk2scholars/tests/{test_state.py → test_states_state.py} +0 -0
  286. /aiagents4pharma/talk2scholars/tests/{test_pdf_paper_loader.py → test_utils_pdf_paper_loader.py} +0 -0
  287. /aiagents4pharma/talk2scholars/tests/{test_tool_helper_utils.py → test_utils_tool_helper_utils.py} +0 -0
  288. /aiagents4pharma/talk2scholars/tests/{test_zotero_pdf_downloader_utils.py → test_utils_zotero_pdf_downloader_utils.py} +0 -0
  289. {aiagents4pharma-1.44.0.dist-info → aiagents4pharma-1.45.1.dist-info}/licenses/LICENSE +0 -0
@@ -2,13 +2,15 @@
2
2
  Exctraction of multimodal subgraph using Prize-Collecting Steiner Tree (PCST) algorithm.
3
3
  """
4
4
 
5
- from typing import Tuple, NamedTuple
5
+ from typing import NamedTuple
6
+
6
7
  import numpy as np
7
8
  import pandas as pd
8
- import torch
9
9
  import pcst_fast
10
+ import torch
10
11
  from torch_geometric.data.data import Data
11
12
 
13
+
12
14
  class MultimodalPCSTPruning(NamedTuple):
13
15
  """
14
16
  Prize-Collecting Steiner Tree (PCST) pruning algorithm implementation inspired by G-Retriever
@@ -27,6 +29,7 @@ class MultimodalPCSTPruning(NamedTuple):
27
29
  pruning: The pruning strategy to use.
28
30
  verbosity_level: The verbosity level.
29
31
  """
32
+
30
33
  topk: int = 3
31
34
  topk_e: int = 3
32
35
  cost_e: float = 0.5
@@ -37,10 +40,7 @@ class MultimodalPCSTPruning(NamedTuple):
37
40
  verbosity_level: int = 0
38
41
  use_description: bool = False
39
42
 
40
- def _compute_node_prizes(self,
41
- graph: Data,
42
- query_emb: torch.Tensor,
43
- modality: str) :
43
+ def _compute_node_prizes(self, graph: Data, query_emb: torch.Tensor, modality: str):
44
44
  """
45
45
  Compute the node prizes based on the cosine similarity between the query and nodes.
46
46
 
@@ -54,25 +54,28 @@ class MultimodalPCSTPruning(NamedTuple):
54
54
  The prizes of the nodes.
55
55
  """
56
56
  # Convert PyG graph to a DataFrame
57
- graph_df = pd.DataFrame({
58
- "node_type": graph.node_type,
59
- "desc_x": [x.tolist() for x in graph.desc_x],
60
- "x": [list(x) for x in graph.x],
61
- "score": [0.0 for _ in range(len(graph.node_id))],
62
- })
57
+ graph_df = pd.DataFrame(
58
+ {
59
+ "node_type": graph.node_type,
60
+ "desc_x": [x.tolist() for x in graph.desc_x],
61
+ "x": [list(x) for x in graph.x],
62
+ "score": [0.0 for _ in range(len(graph.node_id))],
63
+ }
64
+ )
63
65
 
64
66
  # Calculate cosine similarity for text features and update the score
65
67
  if self.use_description:
66
68
  graph_df.loc[:, "score"] = torch.nn.CosineSimilarity(dim=-1)(
67
- query_emb,
68
- torch.tensor(list(graph_df.desc_x.values)) # Using textual description features
69
- ).tolist()
69
+ query_emb,
70
+ torch.tensor(list(graph_df.desc_x.values)), # Using textual description features
71
+ ).tolist()
70
72
  else:
71
- graph_df.loc[graph_df["node_type"] == modality,
72
- "score"] = torch.nn.CosineSimilarity(dim=-1)(
73
- query_emb,
74
- torch.tensor(list(graph_df[graph_df["node_type"]== modality].x.values))
75
- ).tolist()
73
+ graph_df.loc[graph_df["node_type"] == modality, "score"] = torch.nn.CosineSimilarity(
74
+ dim=-1
75
+ )(
76
+ query_emb,
77
+ torch.tensor(list(graph_df[graph_df["node_type"] == modality].x.values)),
78
+ ).tolist()
76
79
 
77
80
  # Set the prizes for nodes based on the similarity scores
78
81
  n_prizes = torch.tensor(graph_df.score.values, dtype=torch.float32)
@@ -84,9 +87,7 @@ class MultimodalPCSTPruning(NamedTuple):
84
87
 
85
88
  return n_prizes
86
89
 
87
- def _compute_edge_prizes(self,
88
- graph: Data,
89
- text_emb: torch.Tensor) :
90
+ def _compute_edge_prizes(self, graph: Data, text_emb: torch.Tensor):
90
91
  """
91
92
  Compute the node prizes based on the cosine similarity between the query and nodes.
92
93
 
@@ -106,20 +107,22 @@ class MultimodalPCSTPruning(NamedTuple):
106
107
  e_prizes[e_prizes < topk_e_values[-1]] = 0.0
107
108
  last_topk_e_value = topk_e
108
109
  for k in range(topk_e):
109
- indices = inverse_indices == (
110
- unique_prizes == topk_e_values[k]
111
- ).nonzero(as_tuple=True)[0]
110
+ indices = (
111
+ inverse_indices == (unique_prizes == topk_e_values[k]).nonzero(as_tuple=True)[0]
112
+ )
112
113
  value = min((topk_e - k) / indices.sum().item(), last_topk_e_value)
113
114
  e_prizes[indices] = value
114
115
  last_topk_e_value = value * (1 - self.c_const)
115
116
 
116
117
  return e_prizes
117
118
 
118
- def compute_prizes(self,
119
- graph: Data,
120
- text_emb: torch.Tensor,
121
- query_emb: torch.Tensor,
122
- modality: str):
119
+ def compute_prizes(
120
+ self,
121
+ graph: Data,
122
+ text_emb: torch.Tensor,
123
+ query_emb: torch.Tensor,
124
+ modality: str,
125
+ ):
123
126
  """
124
127
  Compute the node prizes based on the cosine similarity between the query and nodes,
125
128
  as well as the edge prizes based on the cosine similarity between the query and edges.
@@ -144,9 +147,9 @@ class MultimodalPCSTPruning(NamedTuple):
144
147
 
145
148
  return {"nodes": n_prizes, "edges": e_prizes}
146
149
 
147
- def compute_subgraph_costs(self,
148
- graph: Data,
149
- prizes: dict) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
150
+ def compute_subgraph_costs(
151
+ self, graph: Data, prizes: dict
152
+ ) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
150
153
  """
151
154
  Compute the costs in constructing the subgraph proposed by G-Retriever paper.
152
155
 
@@ -204,7 +207,11 @@ class MultimodalPCSTPruning(NamedTuple):
204
207
  return edges_dict, prizes, costs, mapping
205
208
 
206
209
  def get_subgraph_nodes_edges(
207
- self, graph: Data, vertices: np.ndarray, edges_dict: dict, mapping: dict,
210
+ self,
211
+ graph: Data,
212
+ vertices: np.ndarray,
213
+ edges_dict: dict,
214
+ mapping: dict,
208
215
  ) -> dict:
209
216
  """
210
217
  Get the selected nodes and edges of the subgraph based on the vertices and edges computed
@@ -234,18 +241,18 @@ class MultimodalPCSTPruning(NamedTuple):
234
241
  subgraph_edges = np.array(subgraph_edges + virtual_edges)
235
242
  edge_index = graph.edge_index[:, subgraph_edges]
236
243
  subgraph_nodes = np.unique(
237
- np.concatenate(
238
- [subgraph_nodes, edge_index[0].numpy(), edge_index[1].numpy()]
239
- )
244
+ np.concatenate([subgraph_nodes, edge_index[0].numpy(), edge_index[1].numpy()])
240
245
  )
241
246
 
242
247
  return {"nodes": subgraph_nodes, "edges": subgraph_edges}
243
248
 
244
- def extract_subgraph(self,
245
- graph: Data,
246
- text_emb: torch.Tensor,
247
- query_emb: torch.Tensor,
248
- modality: str) -> dict:
249
+ def extract_subgraph(
250
+ self,
251
+ graph: Data,
252
+ text_emb: torch.Tensor,
253
+ query_emb: torch.Tensor,
254
+ modality: str,
255
+ ) -> dict:
249
256
  """
250
257
  Perform the Prize-Collecting Steiner Tree (PCST) algorithm to extract the subgraph.
251
258
 
@@ -268,9 +275,7 @@ class MultimodalPCSTPruning(NamedTuple):
268
275
  prizes = self.compute_prizes(graph, text_emb, query_emb, modality)
269
276
 
270
277
  # Compute costs in constructing the subgraph
271
- edges_dict, prizes, costs, mapping = self.compute_subgraph_costs(
272
- graph, prizes
273
- )
278
+ edges_dict, prizes, costs, mapping = self.compute_subgraph_costs(graph, prizes)
274
279
 
275
280
  # Retrieve the subgraph using the PCST algorithm
276
281
  result_vertices, result_edges = pcst_fast.pcst_fast(
@@ -287,6 +292,7 @@ class MultimodalPCSTPruning(NamedTuple):
287
292
  graph,
288
293
  result_vertices,
289
294
  {"edges": result_edges, "num_prior_edges": edges_dict["num_prior_edges"]},
290
- mapping)
295
+ mapping,
296
+ )
291
297
 
292
298
  return subgraph
@@ -2,12 +2,14 @@
2
2
  Exctraction of subgraph using Prize-Collecting Steiner Tree (PCST) algorithm.
3
3
  """
4
4
 
5
- from typing import Tuple, NamedTuple
5
+ from typing import NamedTuple
6
+
6
7
  import numpy as np
7
- import torch
8
8
  import pcst_fast
9
+ import torch
9
10
  from torch_geometric.data.data import Data
10
11
 
12
+
11
13
  class PCSTPruning(NamedTuple):
12
14
  """
13
15
  Prize-Collecting Steiner Tree (PCST) pruning algorithm implementation inspired by G-Retriever
@@ -26,6 +28,7 @@ class PCSTPruning(NamedTuple):
26
28
  pruning: The pruning strategy to use.
27
29
  verbosity_level: The verbosity level.
28
30
  """
31
+
29
32
  topk: int = 3
30
33
  topk_e: int = 3
31
34
  cost_e: float = 0.5
@@ -76,9 +79,9 @@ class PCSTPruning(NamedTuple):
76
79
  e_prizes[e_prizes < topk_e_values[-1]] = 0.0
77
80
  last_topk_e_value = topk_e
78
81
  for k in range(topk_e):
79
- indices = inverse_indices == (
80
- unique_prizes == topk_e_values[k]
81
- ).nonzero(as_tuple=True)[0]
82
+ indices = (
83
+ inverse_indices == (unique_prizes == topk_e_values[k]).nonzero(as_tuple=True)[0]
84
+ )
82
85
  value = min((topk_e - k) / indices.sum().item(), last_topk_e_value)
83
86
  e_prizes[indices] = value
84
87
  last_topk_e_value = value * (1 - self.c_const)
@@ -87,7 +90,7 @@ class PCSTPruning(NamedTuple):
87
90
 
88
91
  def compute_subgraph_costs(
89
92
  self, graph: Data, prizes: dict
90
- ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
93
+ ) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
91
94
  """
92
95
  Compute the costs in constructing the subgraph proposed by G-Retriever paper.
93
96
 
@@ -145,7 +148,11 @@ class PCSTPruning(NamedTuple):
145
148
  return edges_dict, prizes, costs, mapping
146
149
 
147
150
  def get_subgraph_nodes_edges(
148
- self, graph: Data, vertices: np.ndarray, edges_dict: dict, mapping: dict,
151
+ self,
152
+ graph: Data,
153
+ vertices: np.ndarray,
154
+ edges_dict: dict,
155
+ mapping: dict,
149
156
  ) -> dict:
150
157
  """
151
158
  Get the selected nodes and edges of the subgraph based on the vertices and edges computed
@@ -175,9 +182,7 @@ class PCSTPruning(NamedTuple):
175
182
  subgraph_edges = np.array(subgraph_edges + virtual_edges)
176
183
  edge_index = graph.edge_index[:, subgraph_edges]
177
184
  subgraph_nodes = np.unique(
178
- np.concatenate(
179
- [subgraph_nodes, edge_index[0].numpy(), edge_index[1].numpy()]
180
- )
185
+ np.concatenate([subgraph_nodes, edge_index[0].numpy(), edge_index[1].numpy()])
181
186
  )
182
187
 
183
188
  return {"nodes": subgraph_nodes, "edges": subgraph_edges}
@@ -201,9 +206,7 @@ class PCSTPruning(NamedTuple):
201
206
  prizes = self.compute_prizes(graph, query_emb)
202
207
 
203
208
  # Compute costs in constructing the subgraph
204
- edges_dict, prizes, costs, mapping = self.compute_subgraph_costs(
205
- graph, prizes
206
- )
209
+ edges_dict, prizes, costs, mapping = self.compute_subgraph_costs(graph, prizes)
207
210
 
208
211
  # Retrieve the subgraph using the PCST algorithm
209
212
  result_vertices, result_edges = pcst_fast.pcst_fast(
@@ -220,6 +223,7 @@ class PCSTPruning(NamedTuple):
220
223
  graph,
221
224
  result_vertices,
222
225
  {"edges": result_edges, "num_prior_edges": edges_dict["num_prior_edges"]},
223
- mapping)
226
+ mapping,
227
+ )
224
228
 
225
229
  return subgraph
@@ -1,12 +1,12 @@
1
1
  #!/usr/bin/env python3
2
2
 
3
- '''A utility module for knowledge graph operations'''
3
+ """A utility module for knowledge graph operations"""
4
4
 
5
- from typing import Tuple
6
5
  import networkx as nx
7
6
  import pandas as pd
8
7
 
9
- def kg_to_df_pandas(kg: nx.DiGraph) -> Tuple[pd.DataFrame, pd.DataFrame]:
8
+
9
+ def kg_to_df_pandas(kg: nx.DiGraph) -> tuple[pd.DataFrame, pd.DataFrame]:
10
10
  """
11
11
  Convert a directed knowledge graph to a pandas DataFrame.
12
12
 
@@ -19,20 +19,17 @@ def kg_to_df_pandas(kg: nx.DiGraph) -> Tuple[pd.DataFrame, pd.DataFrame]:
19
19
  """
20
20
 
21
21
  # Create a pandas DataFrame of the nodes
22
- df_nodes = pd.DataFrame.from_dict(kg.nodes, orient='index')
22
+ df_nodes = pd.DataFrame.from_dict(kg.nodes, orient="index")
23
23
 
24
24
  # Create a pandas DataFrame of the edges
25
- df_edges = nx.to_pandas_edgelist(kg,
26
- source='node_source',
27
- target='node_target')
25
+ df_edges = nx.to_pandas_edgelist(kg, source="node_source", target="node_target")
28
26
 
29
27
  return df_nodes, df_edges
30
28
 
31
- def df_pandas_to_kg(df: pd.DataFrame,
32
- df_nodes_attrs: pd.DataFrame,
33
- node_source: str,
34
- node_target: str
35
- ) -> nx.DiGraph:
29
+
30
+ def df_pandas_to_kg(
31
+ df: pd.DataFrame, df_nodes_attrs: pd.DataFrame, node_source: str, node_target: str
32
+ ) -> nx.DiGraph:
36
33
  """
37
34
  Convert a pandas DataFrame to a directed knowledge graph.
38
35
 
@@ -47,22 +44,24 @@ def df_pandas_to_kg(df: pd.DataFrame,
47
44
  """
48
45
 
49
46
  # Assert if the columns node_source and node_target are in the df
50
- assert node_source in df.columns, f'{node_source} not in df'
51
- assert node_target in df.columns, f'{node_target} not in df'
47
+ assert node_source in df.columns, f"{node_source} not in df"
48
+ assert node_target in df.columns, f"{node_target} not in df"
52
49
 
53
50
  # Assert that the nodes in the index of the df_nodes_attrs
54
51
  # are present in the source and target columns of the df
55
- assert set(df_nodes_attrs.index).issubset(set(df[node_source]).\
56
- union(set(df[node_target]))), \
57
- 'Nodes in index of df_nodes not found in df_edges'
52
+ assert set(df_nodes_attrs.index).issubset(set(df[node_source]).union(set(df[node_target]))), (
53
+ "Nodes in index of df_nodes not found in df_edges"
54
+ )
58
55
 
59
56
  # Create a knowledge graph from the dataframes
60
57
  # Add edges and nodes to the knowledge graph
61
- kg = nx.from_pandas_edgelist(df,
62
- source=node_source,
63
- target=node_target,
64
- create_using=nx.DiGraph,
65
- edge_attr=True)
66
- kg.add_nodes_from(df_nodes_attrs.to_dict('index').items())
58
+ kg = nx.from_pandas_edgelist(
59
+ df,
60
+ source=node_source,
61
+ target=node_target,
62
+ create_using=nx.DiGraph,
63
+ edge_attr=True,
64
+ )
65
+ kg.add_nodes_from(df_nodes_attrs.to_dict("index").items())
67
66
 
68
67
  return kg
@@ -5,13 +5,15 @@ Enrichment class for enriching PubChem IDs with their STRINGS representation.
5
5
  """
6
6
 
7
7
  import logging
8
- import requests
8
+
9
9
  import hydra
10
+ import requests
10
11
 
11
12
  # Initialize logger
12
13
  logging.basicConfig(level=logging.INFO)
13
14
  logger = logging.getLogger(__name__)
14
15
 
16
+
15
17
  def cas_rn2pubchem_cid(casrn):
16
18
  """
17
19
  Convert CAS RN to PubChem CID.
@@ -24,8 +26,7 @@ def cas_rn2pubchem_cid(casrn):
24
26
  """
25
27
  # Load Hydra configuration for PubChem ID conversion
26
28
  with hydra.initialize(version_base=None, config_path="../configs"):
27
- cfg = hydra.compose(config_name='config',
28
- overrides=['utils/pubchem_utils=default'])
29
+ cfg = hydra.compose(config_name="config", overrides=["utils/pubchem_utils=default"])
29
30
  cfg = cfg.utils.pubchem_utils
30
31
  # Prepare the URL
31
32
  pubchem_url_for_drug = f"{cfg.pubchem_casrn2cid_url}{casrn}/record/JSON"
@@ -43,6 +44,7 @@ def cas_rn2pubchem_cid(casrn):
43
44
  break
44
45
  return cid
45
46
 
47
+
46
48
  def external_id2pubchem_cid(db, db_id):
47
49
  """
48
50
  Convert external DB ID to PubChem CID.
@@ -59,8 +61,7 @@ def external_id2pubchem_cid(db, db_id):
59
61
  """
60
62
  # Load Hydra configuration for PubChem ID conversion
61
63
  with hydra.initialize(version_base=None, config_path="../configs"):
62
- cfg = hydra.compose(config_name='config',
63
- overrides=['utils/pubchem_utils=default'])
64
+ cfg = hydra.compose(config_name="config", overrides=["utils/pubchem_utils=default"])
64
65
  cfg = cfg.utils.pubchem_utils
65
66
  # Prepare the URL
66
67
  pubchem_url_for_drug = f"{cfg.pubchem_cid_base_url}/{db}/{db_id}/JSON"
@@ -76,6 +77,7 @@ def external_id2pubchem_cid(db, db_id):
76
77
  break
77
78
  return cid
78
79
 
80
+
79
81
  def pubchem_cid_description(cid):
80
82
  """
81
83
  Get the description of a PubChem CID.
@@ -88,8 +90,7 @@ def pubchem_cid_description(cid):
88
90
  """
89
91
  # Load Hydra configuration for PubChem CID description
90
92
  with hydra.initialize(version_base=None, config_path="../configs"):
91
- cfg = hydra.compose(config_name='config',
92
- overrides=['utils/pubchem_utils=default'])
93
+ cfg = hydra.compose(config_name="config", overrides=["utils/pubchem_utils=default"])
93
94
  cfg = cfg.utils.pubchem_utils
94
95
  # Prepare the URL
95
96
  pubchem_url_for_descpription = f"{cfg.pubchem_cid_description_url}/{cid}/description/JSON"
@@ -97,7 +98,7 @@ def pubchem_cid_description(cid):
97
98
  response = requests.get(pubchem_url_for_descpription, timeout=60)
98
99
  data = response.json()
99
100
  # Extract the PubChem CID description
100
- description = ''
101
- for information in data["InformationList"]['Information']:
102
- description += information.get("Description", '')
101
+ description = ""
102
+ for information in data["InformationList"]["Information"]:
103
+ description += information.get("Description", "")
103
104
  return description
@@ -0,0 +1,13 @@
1
+ _pycache_/
2
+ *.pyc
3
+ *.log
4
+ *.csv
5
+ *.pt
6
+ *.pkl
7
+ models/
8
+ data/
9
+ env/
10
+ .venv/
11
+ .git/
12
+ .env
13
+ .cufile.log
@@ -0,0 +1,104 @@
1
+ # syntax=docker/dockerfile:1
2
+
3
+ # Dockerfile for the talk2scholars application
4
+ # Multi-stage build for optimized image size with UV package manager
5
+
6
+ ARG BASE_IMAGE=ubuntu:24.04
7
+ ARG PYTHON_VERSION=3.12
8
+
9
+ FROM ${BASE_IMAGE} AS dev-base
10
+ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
11
+ build-essential \
12
+ ca-certificates \
13
+ cmake \
14
+ curl \
15
+ g++ \
16
+ libopenblas-dev \
17
+ libomp-dev \
18
+ ninja-build \
19
+ wget \
20
+ && rm -rf /var/lib/apt/lists/*
21
+
22
+ FROM dev-base AS python-install
23
+ ARG PYTHON_VERSION=3.12
24
+
25
+ # Install Python (available in Ubuntu 24.04 default repos)
26
+ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
27
+ python${PYTHON_VERSION} \
28
+ python${PYTHON_VERSION}-dev \
29
+ python${PYTHON_VERSION}-venv \
30
+ python3-pip \
31
+ && rm -rf /var/lib/apt/lists/* \
32
+ && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1 \
33
+ && update-alternatives --install /usr/bin/python python /usr/bin/python${PYTHON_VERSION} 1
34
+
35
+ FROM python-install AS uv-install
36
+ WORKDIR /app
37
+
38
+ # Install UV package manager and dependencies
39
+ COPY pyproject.toml uv.lock* ./
40
+ RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \
41
+ export PATH="/root/.local/bin:$PATH" && \
42
+ export UV_PROJECT_ENVIRONMENT="/opt/venv" && \
43
+ uv sync --frozen --extra dev --no-install-project --python python${PYTHON_VERSION} && \
44
+ . /opt/venv/bin/activate && \
45
+ # RAPIDS packages (commented out - will be added in future if needed)
46
+ # uv pip install \
47
+ # --extra-index-url=https://pypi.nvidia.com \
48
+ # --index-strategy unsafe-best-match \
49
+ # cudf-cu12 dask-cudf-cu12 && \
50
+ uv cache clean
51
+
52
+ FROM ${BASE_IMAGE} AS runtime
53
+ ARG PYTHON_VERSION=3.12
54
+ LABEL maintainer="talk2scholars"
55
+ LABEL version="1.0.0"
56
+ LABEL description="AI Agents for Pharma - Scholars Application"
57
+
58
+ # Install runtime dependencies
59
+ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
60
+ ca-certificates \
61
+ curl \
62
+ libmagic1 \
63
+ libopenblas0 \
64
+ libomp5 \
65
+ python${PYTHON_VERSION} \
66
+ && rm -rf /var/lib/apt/lists/* \
67
+ && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1 \
68
+ && update-alternatives --install /usr/bin/python python /usr/bin/python${PYTHON_VERSION} 1
69
+
70
+ # Copy UV virtual environment from build stage
71
+ COPY --from=uv-install /opt/venv /opt/venv
72
+
73
+ # Set environment variables
74
+ ENV PATH="/opt/venv/bin:$PATH"
75
+ ENV PYTHONPATH="/app"
76
+ ENV PYTHONUNBUFFERED=1
77
+ ENV PYTHONDONTWRITEBYTECODE=1
78
+ ENV STREAMLIT_SERVER_HEADLESS=true
79
+ ENV STREAMLIT_SERVER_ENABLE_CORS=false
80
+
81
+ # Set working directory and create necessary directories
82
+ WORKDIR /app
83
+
84
+ # Copy application code
85
+ COPY aiagents4pharma/talk2scholars /app/aiagents4pharma/talk2scholars
86
+ COPY docs /app/docs
87
+ COPY app /app/app
88
+
89
+ # Copy and set up the entrypoint script (commented out - will be added in future if needed)
90
+ # COPY aiagents4pharma/talk2knowledgegraphs/entrypoint.sh /usr/local/bin/entrypoint.sh
91
+ # RUN chmod +x /usr/local/bin/entrypoint.sh
92
+
93
+ # Health check for production monitoring
94
+ HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
95
+ CMD curl -f http://localhost:8501/health || exit 1
96
+
97
+ # Expose the default Streamlit port
98
+ EXPOSE 8501
99
+
100
+ # Set the entrypoint (commented out - will be added in future if needed)
101
+ # ENTRYPOINT ["/usr/local/bin/entrypoint.sh"]
102
+
103
+ # Default command (can be overridden)
104
+ CMD ["streamlit", "run", "/app/app/frontend/streamlit_app_talk2scholars.py", "--server.port=8501", "--server.address=0.0.0.0"]
@@ -0,0 +1 @@
1
+ Please check out the README file in the root folder for more information.
@@ -2,11 +2,7 @@
2
2
  This file is used to import all the modules in the package.
3
3
  """
4
4
 
5
- from . import main_agent
6
- from . import s2_agent
7
- from . import paper_download_agent
8
- from . import zotero_agent
9
- from . import pdf_agent
5
+ from . import main_agent, paper_download_agent, pdf_agent, s2_agent, zotero_agent
10
6
 
11
7
  __all__ = [
12
8
  "main_agent",
@@ -12,15 +12,17 @@ Functions:
12
12
  """
13
13
 
14
14
  import logging
15
+
15
16
  import hydra
16
- from langgraph_supervisor import create_supervisor
17
- from langchain_openai import ChatOpenAI
18
17
  from langchain_core.language_models.chat_models import BaseChatModel
18
+ from langchain_openai import ChatOpenAI
19
19
  from langgraph.checkpoint.memory import MemorySaver
20
+ from langgraph_supervisor import create_supervisor
21
+
22
+ from ..agents.paper_download_agent import get_app as get_app_paper_download
23
+ from ..agents.pdf_agent import get_app as get_app_pdf
20
24
  from ..agents.s2_agent import get_app as get_app_s2
21
25
  from ..agents.zotero_agent import get_app as get_app_zotero
22
- from ..agents.pdf_agent import get_app as get_app_pdf
23
- from ..agents.paper_download_agent import get_app as get_app_paper_download
24
26
  from ..state.state_talk2scholars import Talk2Scholars
25
27
 
26
28
  # Initialize logger
@@ -5,17 +5,18 @@ paper details and PDFs. It is part of the Talk2Scholars project.
5
5
  """
6
6
 
7
7
  import logging
8
- from typing import Any, Dict
8
+ from typing import Any
9
+
9
10
  import hydra
10
11
  from langchain_core.language_models.chat_models import BaseChatModel
12
+ from langgraph.checkpoint.memory import MemorySaver
11
13
  from langgraph.graph import START, StateGraph
12
14
  from langgraph.prebuilt.chat_agent_executor import create_react_agent
13
15
  from langgraph.prebuilt.tool_node import ToolNode
14
- from langgraph.checkpoint.memory import MemorySaver
16
+
15
17
  from ..state.state_talk2scholars import Talk2Scholars
16
18
  from ..tools.paper_download.paper_downloader import download_papers
17
19
 
18
-
19
20
  # Initialize logger
20
21
  logging.basicConfig(level=logging.INFO)
21
22
  logger = logging.getLogger(__name__)
@@ -67,7 +68,7 @@ def get_app(uniq_id, llm_model: BaseChatModel):
67
68
  checkpointer=MemorySaver(),
68
69
  )
69
70
 
70
- def paper_download_agent_node(state: Talk2Scholars) -> Dict[str, Any]:
71
+ def paper_download_agent_node(state: Talk2Scholars) -> dict[str, Any]:
71
72
  """
72
73
  Processes the current state to fetch the research paper from arXiv, BioRxiv, or MedRxiv.
73
74
  """
@@ -12,11 +12,13 @@ Usage:
12
12
  """
13
13
 
14
14
  import logging
15
+
15
16
  import hydra
16
17
  from langchain_core.language_models.chat_models import BaseChatModel
17
- from langgraph.graph import START, StateGraph
18
- from langgraph.prebuilt import create_react_agent, ToolNode
19
18
  from langgraph.checkpoint.memory import MemorySaver
19
+ from langgraph.graph import START, StateGraph
20
+ from langgraph.prebuilt import ToolNode, create_react_agent
21
+
20
22
  from ..state.state_talk2scholars import Talk2Scholars
21
23
  from ..tools.pdf.question_and_answer import question_and_answer
22
24
 
@@ -5,7 +5,7 @@ Agent for interacting with Semantic Scholar
5
5
  """
6
6
 
7
7
  import logging
8
- from typing import Any, Dict
8
+ from typing import Any
9
9
 
10
10
  import hydra
11
11
  from langchain_core.language_models.chat_models import BaseChatModel
@@ -54,7 +54,7 @@ def get_app(uniq_id, llm_model: BaseChatModel):
54
54
  >>> result = app.invoke(initial_state)
55
55
  """
56
56
 
57
- def s2_agent_node(state: Talk2Scholars) -> Dict[str, Any]:
57
+ def s2_agent_node(state: Talk2Scholars) -> dict[str, Any]:
58
58
  """
59
59
  Processes the user query and retrieves relevant research papers.
60
60