aiagents4pharma 1.44.0__py3-none-any.whl → 1.45.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (289) hide show
  1. aiagents4pharma/__init__.py +2 -2
  2. aiagents4pharma/talk2aiagents4pharma/.dockerignore +13 -0
  3. aiagents4pharma/talk2aiagents4pharma/Dockerfile +105 -0
  4. aiagents4pharma/talk2aiagents4pharma/README.md +1 -0
  5. aiagents4pharma/talk2aiagents4pharma/__init__.py +4 -5
  6. aiagents4pharma/talk2aiagents4pharma/agents/__init__.py +3 -2
  7. aiagents4pharma/talk2aiagents4pharma/agents/main_agent.py +24 -23
  8. aiagents4pharma/talk2aiagents4pharma/configs/__init__.py +2 -2
  9. aiagents4pharma/talk2aiagents4pharma/configs/agents/__init__.py +2 -2
  10. aiagents4pharma/talk2aiagents4pharma/configs/agents/main_agent/default.yaml +2 -2
  11. aiagents4pharma/talk2aiagents4pharma/configs/config.yaml +1 -1
  12. aiagents4pharma/talk2aiagents4pharma/docker-compose/cpu/.env.example +23 -0
  13. aiagents4pharma/talk2aiagents4pharma/docker-compose/cpu/docker-compose.yml +93 -0
  14. aiagents4pharma/talk2aiagents4pharma/docker-compose/gpu/.env.example +23 -0
  15. aiagents4pharma/talk2aiagents4pharma/docker-compose/gpu/docker-compose.yml +108 -0
  16. aiagents4pharma/talk2aiagents4pharma/install.md +127 -0
  17. aiagents4pharma/talk2aiagents4pharma/states/__init__.py +3 -2
  18. aiagents4pharma/talk2aiagents4pharma/states/state_talk2aiagents4pharma.py +5 -3
  19. aiagents4pharma/talk2aiagents4pharma/tests/__init__.py +2 -2
  20. aiagents4pharma/talk2aiagents4pharma/tests/test_main_agent.py +72 -50
  21. aiagents4pharma/talk2biomodels/.dockerignore +13 -0
  22. aiagents4pharma/talk2biomodels/Dockerfile +104 -0
  23. aiagents4pharma/talk2biomodels/README.md +1 -0
  24. aiagents4pharma/talk2biomodels/__init__.py +4 -8
  25. aiagents4pharma/talk2biomodels/agents/__init__.py +3 -2
  26. aiagents4pharma/talk2biomodels/agents/t2b_agent.py +47 -42
  27. aiagents4pharma/talk2biomodels/api/__init__.py +4 -5
  28. aiagents4pharma/talk2biomodels/api/kegg.py +14 -10
  29. aiagents4pharma/talk2biomodels/api/ols.py +13 -10
  30. aiagents4pharma/talk2biomodels/api/uniprot.py +7 -6
  31. aiagents4pharma/talk2biomodels/configs/__init__.py +3 -4
  32. aiagents4pharma/talk2biomodels/configs/agents/__init__.py +2 -2
  33. aiagents4pharma/talk2biomodels/configs/agents/t2b_agent/__init__.py +2 -2
  34. aiagents4pharma/talk2biomodels/configs/agents/t2b_agent/default.yaml +1 -1
  35. aiagents4pharma/talk2biomodels/configs/config.yaml +1 -1
  36. aiagents4pharma/talk2biomodels/configs/tools/__init__.py +4 -5
  37. aiagents4pharma/talk2biomodels/configs/tools/ask_question/__init__.py +2 -2
  38. aiagents4pharma/talk2biomodels/configs/tools/ask_question/default.yaml +1 -2
  39. aiagents4pharma/talk2biomodels/configs/tools/custom_plotter/__init__.py +2 -2
  40. aiagents4pharma/talk2biomodels/configs/tools/custom_plotter/default.yaml +1 -1
  41. aiagents4pharma/talk2biomodels/configs/tools/get_annotation/__init__.py +2 -2
  42. aiagents4pharma/talk2biomodels/configs/tools/get_annotation/default.yaml +1 -1
  43. aiagents4pharma/talk2biomodels/install.md +63 -0
  44. aiagents4pharma/talk2biomodels/models/__init__.py +4 -4
  45. aiagents4pharma/talk2biomodels/models/basico_model.py +36 -28
  46. aiagents4pharma/talk2biomodels/models/sys_bio_model.py +13 -10
  47. aiagents4pharma/talk2biomodels/states/__init__.py +3 -2
  48. aiagents4pharma/talk2biomodels/states/state_talk2biomodels.py +12 -8
  49. aiagents4pharma/talk2biomodels/tests/BIOMD0000000449_url.xml +1585 -0
  50. aiagents4pharma/talk2biomodels/tests/__init__.py +2 -2
  51. aiagents4pharma/talk2biomodels/tests/article_on_model_537.pdf +0 -0
  52. aiagents4pharma/talk2biomodels/tests/test_api.py +18 -14
  53. aiagents4pharma/talk2biomodels/tests/test_ask_question.py +8 -9
  54. aiagents4pharma/talk2biomodels/tests/test_basico_model.py +15 -9
  55. aiagents4pharma/talk2biomodels/tests/test_get_annotation.py +54 -55
  56. aiagents4pharma/talk2biomodels/tests/test_getmodelinfo.py +28 -27
  57. aiagents4pharma/talk2biomodels/tests/test_integration.py +21 -33
  58. aiagents4pharma/talk2biomodels/tests/test_load_biomodel.py +14 -11
  59. aiagents4pharma/talk2biomodels/tests/test_param_scan.py +21 -20
  60. aiagents4pharma/talk2biomodels/tests/test_query_article.py +129 -29
  61. aiagents4pharma/talk2biomodels/tests/test_search_models.py +9 -13
  62. aiagents4pharma/talk2biomodels/tests/test_simulate_model.py +16 -15
  63. aiagents4pharma/talk2biomodels/tests/test_steady_state.py +12 -22
  64. aiagents4pharma/talk2biomodels/tests/test_sys_bio_model.py +33 -29
  65. aiagents4pharma/talk2biomodels/tools/__init__.py +15 -12
  66. aiagents4pharma/talk2biomodels/tools/ask_question.py +42 -32
  67. aiagents4pharma/talk2biomodels/tools/custom_plotter.py +51 -43
  68. aiagents4pharma/talk2biomodels/tools/get_annotation.py +99 -75
  69. aiagents4pharma/talk2biomodels/tools/get_modelinfo.py +57 -51
  70. aiagents4pharma/talk2biomodels/tools/load_arguments.py +52 -32
  71. aiagents4pharma/talk2biomodels/tools/load_biomodel.py +8 -2
  72. aiagents4pharma/talk2biomodels/tools/parameter_scan.py +107 -90
  73. aiagents4pharma/talk2biomodels/tools/query_article.py +14 -13
  74. aiagents4pharma/talk2biomodels/tools/search_models.py +37 -26
  75. aiagents4pharma/talk2biomodels/tools/simulate_model.py +47 -37
  76. aiagents4pharma/talk2biomodels/tools/steady_state.py +76 -58
  77. aiagents4pharma/talk2biomodels/tools/utils.py +4 -3
  78. aiagents4pharma/talk2cells/README.md +1 -0
  79. aiagents4pharma/talk2cells/__init__.py +4 -5
  80. aiagents4pharma/talk2cells/agents/__init__.py +3 -2
  81. aiagents4pharma/talk2cells/agents/scp_agent.py +21 -19
  82. aiagents4pharma/talk2cells/states/__init__.py +3 -2
  83. aiagents4pharma/talk2cells/states/state_talk2cells.py +4 -2
  84. aiagents4pharma/talk2cells/tests/scp_agent/test_scp_agent.py +8 -9
  85. aiagents4pharma/talk2cells/tools/__init__.py +3 -2
  86. aiagents4pharma/talk2cells/tools/scp_agent/__init__.py +4 -4
  87. aiagents4pharma/talk2cells/tools/scp_agent/display_studies.py +5 -3
  88. aiagents4pharma/talk2cells/tools/scp_agent/search_studies.py +21 -22
  89. aiagents4pharma/talk2knowledgegraphs/.dockerignore +13 -0
  90. aiagents4pharma/talk2knowledgegraphs/Dockerfile +103 -0
  91. aiagents4pharma/talk2knowledgegraphs/README.md +1 -0
  92. aiagents4pharma/talk2knowledgegraphs/__init__.py +4 -7
  93. aiagents4pharma/talk2knowledgegraphs/agents/__init__.py +3 -2
  94. aiagents4pharma/talk2knowledgegraphs/agents/t2kg_agent.py +40 -30
  95. aiagents4pharma/talk2knowledgegraphs/configs/__init__.py +3 -6
  96. aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/__init__.py +2 -2
  97. aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/default.yaml +8 -8
  98. aiagents4pharma/talk2knowledgegraphs/configs/app/__init__.py +3 -2
  99. aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/__init__.py +2 -2
  100. aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/default.yaml +1 -1
  101. aiagents4pharma/talk2knowledgegraphs/configs/config.yaml +1 -1
  102. aiagents4pharma/talk2knowledgegraphs/configs/tools/__init__.py +4 -5
  103. aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/__init__.py +2 -2
  104. aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/default.yaml +1 -1
  105. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/__init__.py +2 -2
  106. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/default.yaml +1 -1
  107. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/__init__.py +2 -2
  108. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/default.yaml +1 -1
  109. aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/ols_terms/default.yaml +1 -1
  110. aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/reactome_pathways/default.yaml +1 -1
  111. aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/uniprot_proteins/default.yaml +1 -1
  112. aiagents4pharma/talk2knowledgegraphs/configs/utils/pubchem_utils/default.yaml +1 -1
  113. aiagents4pharma/talk2knowledgegraphs/datasets/__init__.py +4 -6
  114. aiagents4pharma/talk2knowledgegraphs/datasets/biobridge_primekg.py +115 -67
  115. aiagents4pharma/talk2knowledgegraphs/datasets/dataset.py +2 -0
  116. aiagents4pharma/talk2knowledgegraphs/datasets/primekg.py +35 -24
  117. aiagents4pharma/talk2knowledgegraphs/datasets/starkqa_primekg.py +29 -21
  118. aiagents4pharma/talk2knowledgegraphs/docker-compose/cpu/.env.example +23 -0
  119. aiagents4pharma/talk2knowledgegraphs/docker-compose/cpu/docker-compose.yml +93 -0
  120. aiagents4pharma/talk2knowledgegraphs/docker-compose/gpu/.env.example +23 -0
  121. aiagents4pharma/talk2knowledgegraphs/docker-compose/gpu/docker-compose.yml +108 -0
  122. aiagents4pharma/talk2knowledgegraphs/entrypoint.sh +190 -0
  123. aiagents4pharma/talk2knowledgegraphs/install.md +140 -0
  124. aiagents4pharma/talk2knowledgegraphs/milvus_data_dump.py +31 -65
  125. aiagents4pharma/talk2knowledgegraphs/states/__init__.py +3 -2
  126. aiagents4pharma/talk2knowledgegraphs/states/state_talk2knowledgegraphs.py +1 -0
  127. aiagents4pharma/talk2knowledgegraphs/tests/test_agents_t2kg_agent.py +65 -40
  128. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_biobridge_primekg.py +54 -48
  129. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_dataset.py +4 -0
  130. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_primekg.py +17 -4
  131. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_starkqa_primekg.py +33 -24
  132. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_graphrag_reasoning.py +116 -69
  133. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_milvus_multimodal_subgraph_extraction.py +334 -216
  134. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_multimodal_subgraph_extraction.py +22 -15
  135. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_extraction.py +19 -12
  136. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_summarization.py +95 -48
  137. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_embeddings.py +4 -0
  138. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_huggingface.py +5 -0
  139. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_nim_molmim.py +13 -18
  140. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_ollama.py +10 -3
  141. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_enrichments.py +4 -3
  142. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ollama.py +3 -2
  143. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ols.py +1 -0
  144. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_pubchem.py +9 -4
  145. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_reactome.py +6 -6
  146. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_uniprot.py +4 -0
  147. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_extractions_milvus_multimodal_pcst.py +160 -97
  148. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_kg_utils.py +3 -4
  149. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_pubchem_utils.py +87 -13
  150. aiagents4pharma/talk2knowledgegraphs/tools/__init__.py +10 -7
  151. aiagents4pharma/talk2knowledgegraphs/tools/graphrag_reasoning.py +15 -20
  152. aiagents4pharma/talk2knowledgegraphs/tools/milvus_multimodal_subgraph_extraction.py +145 -142
  153. aiagents4pharma/talk2knowledgegraphs/tools/multimodal_subgraph_extraction.py +92 -90
  154. aiagents4pharma/talk2knowledgegraphs/tools/subgraph_extraction.py +25 -37
  155. aiagents4pharma/talk2knowledgegraphs/tools/subgraph_summarization.py +10 -13
  156. aiagents4pharma/talk2knowledgegraphs/utils/__init__.py +4 -7
  157. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/__init__.py +4 -7
  158. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/embeddings.py +4 -0
  159. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/huggingface.py +11 -14
  160. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/nim_molmim.py +7 -7
  161. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/ollama.py +12 -6
  162. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/sentence_transformer.py +8 -6
  163. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/__init__.py +9 -6
  164. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/enrichments.py +1 -0
  165. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ollama.py +15 -9
  166. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ols_terms.py +23 -20
  167. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/pubchem_strings.py +12 -10
  168. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/reactome_pathways.py +16 -10
  169. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/uniprot_proteins.py +26 -18
  170. aiagents4pharma/talk2knowledgegraphs/utils/extractions/__init__.py +4 -5
  171. aiagents4pharma/talk2knowledgegraphs/utils/extractions/milvus_multimodal_pcst.py +14 -34
  172. aiagents4pharma/talk2knowledgegraphs/utils/extractions/multimodal_pcst.py +53 -47
  173. aiagents4pharma/talk2knowledgegraphs/utils/extractions/pcst.py +18 -14
  174. aiagents4pharma/talk2knowledgegraphs/utils/kg_utils.py +22 -23
  175. aiagents4pharma/talk2knowledgegraphs/utils/pubchem_utils.py +11 -10
  176. aiagents4pharma/talk2scholars/.dockerignore +13 -0
  177. aiagents4pharma/talk2scholars/Dockerfile +104 -0
  178. aiagents4pharma/talk2scholars/README.md +1 -0
  179. aiagents4pharma/talk2scholars/agents/__init__.py +1 -5
  180. aiagents4pharma/talk2scholars/agents/main_agent.py +6 -4
  181. aiagents4pharma/talk2scholars/agents/paper_download_agent.py +5 -4
  182. aiagents4pharma/talk2scholars/agents/pdf_agent.py +4 -2
  183. aiagents4pharma/talk2scholars/agents/s2_agent.py +2 -2
  184. aiagents4pharma/talk2scholars/agents/zotero_agent.py +10 -11
  185. aiagents4pharma/talk2scholars/configs/__init__.py +1 -3
  186. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/__init__.py +1 -4
  187. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml +1 -1
  188. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/pdf_agent/default.yaml +1 -1
  189. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml +8 -8
  190. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/default.yaml +7 -7
  191. aiagents4pharma/talk2scholars/configs/tools/__init__.py +8 -6
  192. aiagents4pharma/talk2scholars/docker-compose/cpu/.env.example +21 -0
  193. aiagents4pharma/talk2scholars/docker-compose/cpu/docker-compose.yml +90 -0
  194. aiagents4pharma/talk2scholars/docker-compose/gpu/.env.example +21 -0
  195. aiagents4pharma/talk2scholars/docker-compose/gpu/docker-compose.yml +105 -0
  196. aiagents4pharma/talk2scholars/install.md +122 -0
  197. aiagents4pharma/talk2scholars/state/state_talk2scholars.py +8 -8
  198. aiagents4pharma/talk2scholars/tests/{test_main_agent.py → test_agents_main_agent.py} +41 -23
  199. aiagents4pharma/talk2scholars/tests/{test_paper_download_agent.py → test_agents_paper_agents_download_agent.py} +10 -16
  200. aiagents4pharma/talk2scholars/tests/{test_pdf_agent.py → test_agents_pdf_agent.py} +6 -10
  201. aiagents4pharma/talk2scholars/tests/{test_s2_agent.py → test_agents_s2_agent.py} +8 -16
  202. aiagents4pharma/talk2scholars/tests/{test_zotero_agent.py → test_agents_zotero_agent.py} +5 -7
  203. aiagents4pharma/talk2scholars/tests/{test_s2_display_dataframe.py → test_s2_tools_display_dataframe.py} +6 -7
  204. aiagents4pharma/talk2scholars/tests/{test_s2_query_dataframe.py → test_s2_tools_query_dataframe.py} +5 -15
  205. aiagents4pharma/talk2scholars/tests/{test_paper_downloader.py → test_tools_paper_downloader.py} +25 -63
  206. aiagents4pharma/talk2scholars/tests/{test_question_and_answer_tool.py → test_tools_question_and_answer_tool.py} +2 -6
  207. aiagents4pharma/talk2scholars/tests/{test_s2_multi.py → test_tools_s2_multi.py} +5 -5
  208. aiagents4pharma/talk2scholars/tests/{test_s2_retrieve.py → test_tools_s2_retrieve.py} +2 -1
  209. aiagents4pharma/talk2scholars/tests/{test_s2_search.py → test_tools_s2_search.py} +5 -5
  210. aiagents4pharma/talk2scholars/tests/{test_s2_single.py → test_tools_s2_single.py} +5 -5
  211. aiagents4pharma/talk2scholars/tests/{test_arxiv_downloader.py → test_utils_arxiv_downloader.py} +16 -25
  212. aiagents4pharma/talk2scholars/tests/{test_base_paper_downloader.py → test_utils_base_paper_downloader.py} +25 -47
  213. aiagents4pharma/talk2scholars/tests/{test_biorxiv_downloader.py → test_utils_biorxiv_downloader.py} +14 -42
  214. aiagents4pharma/talk2scholars/tests/{test_medrxiv_downloader.py → test_utils_medrxiv_downloader.py} +15 -49
  215. aiagents4pharma/talk2scholars/tests/{test_nvidia_nim_reranker.py → test_utils_nvidia_nim_reranker.py} +6 -16
  216. aiagents4pharma/talk2scholars/tests/{test_pdf_answer_formatter.py → test_utils_pdf_answer_formatter.py} +1 -0
  217. aiagents4pharma/talk2scholars/tests/{test_pdf_batch_processor.py → test_utils_pdf_batch_processor.py} +6 -15
  218. aiagents4pharma/talk2scholars/tests/{test_pdf_collection_manager.py → test_utils_pdf_collection_manager.py} +34 -11
  219. aiagents4pharma/talk2scholars/tests/{test_pdf_document_processor.py → test_utils_pdf_document_processor.py} +2 -3
  220. aiagents4pharma/talk2scholars/tests/{test_pdf_generate_answer.py → test_utils_pdf_generate_answer.py} +3 -6
  221. aiagents4pharma/talk2scholars/tests/{test_pdf_gpu_detection.py → test_utils_pdf_gpu_detection.py} +5 -16
  222. aiagents4pharma/talk2scholars/tests/{test_pdf_rag_pipeline.py → test_utils_pdf_rag_pipeline.py} +7 -17
  223. aiagents4pharma/talk2scholars/tests/{test_pdf_retrieve_chunks.py → test_utils_pdf_retrieve_chunks.py} +4 -11
  224. aiagents4pharma/talk2scholars/tests/{test_pdf_singleton_manager.py → test_utils_pdf_singleton_manager.py} +26 -23
  225. aiagents4pharma/talk2scholars/tests/{test_pdf_vector_normalization.py → test_utils_pdf_vector_normalization.py} +1 -1
  226. aiagents4pharma/talk2scholars/tests/{test_pdf_vector_store.py → test_utils_pdf_vector_store.py} +27 -55
  227. aiagents4pharma/talk2scholars/tests/{test_pubmed_downloader.py → test_utils_pubmed_downloader.py} +31 -91
  228. aiagents4pharma/talk2scholars/tests/{test_read_helper_utils.py → test_utils_read_helper_utils.py} +2 -6
  229. aiagents4pharma/talk2scholars/tests/{test_s2_utils_ext_ids.py → test_utils_s2_utils_ext_ids.py} +5 -15
  230. aiagents4pharma/talk2scholars/tests/{test_zotero_human_in_the_loop.py → test_utils_zotero_human_in_the_loop.py} +6 -13
  231. aiagents4pharma/talk2scholars/tests/{test_zotero_path.py → test_utils_zotero_path.py} +53 -45
  232. aiagents4pharma/talk2scholars/tests/{test_zotero_read.py → test_utils_zotero_read.py} +30 -91
  233. aiagents4pharma/talk2scholars/tests/{test_zotero_write.py → test_utils_zotero_write.py} +6 -16
  234. aiagents4pharma/talk2scholars/tools/__init__.py +1 -4
  235. aiagents4pharma/talk2scholars/tools/paper_download/paper_downloader.py +20 -35
  236. aiagents4pharma/talk2scholars/tools/paper_download/utils/__init__.py +7 -5
  237. aiagents4pharma/talk2scholars/tools/paper_download/utils/arxiv_downloader.py +9 -11
  238. aiagents4pharma/talk2scholars/tools/paper_download/utils/base_paper_downloader.py +14 -21
  239. aiagents4pharma/talk2scholars/tools/paper_download/utils/biorxiv_downloader.py +14 -22
  240. aiagents4pharma/talk2scholars/tools/paper_download/utils/medrxiv_downloader.py +11 -13
  241. aiagents4pharma/talk2scholars/tools/paper_download/utils/pubmed_downloader.py +14 -28
  242. aiagents4pharma/talk2scholars/tools/pdf/question_and_answer.py +4 -8
  243. aiagents4pharma/talk2scholars/tools/pdf/utils/__init__.py +16 -14
  244. aiagents4pharma/talk2scholars/tools/pdf/utils/answer_formatter.py +4 -4
  245. aiagents4pharma/talk2scholars/tools/pdf/utils/batch_processor.py +15 -17
  246. aiagents4pharma/talk2scholars/tools/pdf/utils/collection_manager.py +2 -2
  247. aiagents4pharma/talk2scholars/tools/pdf/utils/document_processor.py +5 -5
  248. aiagents4pharma/talk2scholars/tools/pdf/utils/generate_answer.py +4 -4
  249. aiagents4pharma/talk2scholars/tools/pdf/utils/get_vectorstore.py +2 -6
  250. aiagents4pharma/talk2scholars/tools/pdf/utils/gpu_detection.py +5 -9
  251. aiagents4pharma/talk2scholars/tools/pdf/utils/nvidia_nim_reranker.py +4 -4
  252. aiagents4pharma/talk2scholars/tools/pdf/utils/paper_loader.py +2 -2
  253. aiagents4pharma/talk2scholars/tools/pdf/utils/rag_pipeline.py +6 -15
  254. aiagents4pharma/talk2scholars/tools/pdf/utils/retrieve_chunks.py +7 -15
  255. aiagents4pharma/talk2scholars/tools/pdf/utils/singleton_manager.py +2 -2
  256. aiagents4pharma/talk2scholars/tools/pdf/utils/tool_helper.py +3 -4
  257. aiagents4pharma/talk2scholars/tools/pdf/utils/vector_normalization.py +8 -17
  258. aiagents4pharma/talk2scholars/tools/pdf/utils/vector_store.py +17 -33
  259. aiagents4pharma/talk2scholars/tools/s2/__init__.py +8 -6
  260. aiagents4pharma/talk2scholars/tools/s2/display_dataframe.py +3 -7
  261. aiagents4pharma/talk2scholars/tools/s2/multi_paper_rec.py +7 -6
  262. aiagents4pharma/talk2scholars/tools/s2/query_dataframe.py +5 -12
  263. aiagents4pharma/talk2scholars/tools/s2/retrieve_semantic_scholar_paper_id.py +2 -4
  264. aiagents4pharma/talk2scholars/tools/s2/search.py +6 -6
  265. aiagents4pharma/talk2scholars/tools/s2/single_paper_rec.py +5 -3
  266. aiagents4pharma/talk2scholars/tools/s2/utils/__init__.py +1 -3
  267. aiagents4pharma/talk2scholars/tools/s2/utils/multi_helper.py +12 -18
  268. aiagents4pharma/talk2scholars/tools/s2/utils/search_helper.py +11 -18
  269. aiagents4pharma/talk2scholars/tools/s2/utils/single_helper.py +11 -16
  270. aiagents4pharma/talk2scholars/tools/zotero/__init__.py +1 -4
  271. aiagents4pharma/talk2scholars/tools/zotero/utils/__init__.py +1 -4
  272. aiagents4pharma/talk2scholars/tools/zotero/utils/read_helper.py +21 -39
  273. aiagents4pharma/talk2scholars/tools/zotero/utils/review_helper.py +2 -6
  274. aiagents4pharma/talk2scholars/tools/zotero/utils/write_helper.py +8 -11
  275. aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_path.py +4 -12
  276. aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_pdf_downloader.py +13 -27
  277. aiagents4pharma/talk2scholars/tools/zotero/zotero_read.py +4 -7
  278. aiagents4pharma/talk2scholars/tools/zotero/zotero_review.py +8 -10
  279. aiagents4pharma/talk2scholars/tools/zotero/zotero_write.py +3 -2
  280. {aiagents4pharma-1.44.0.dist-info → aiagents4pharma-1.45.1.dist-info}/METADATA +115 -51
  281. aiagents4pharma-1.45.1.dist-info/RECORD +324 -0
  282. {aiagents4pharma-1.44.0.dist-info → aiagents4pharma-1.45.1.dist-info}/WHEEL +1 -2
  283. aiagents4pharma-1.44.0.dist-info/RECORD +0 -293
  284. aiagents4pharma-1.44.0.dist-info/top_level.txt +0 -1
  285. /aiagents4pharma/talk2scholars/tests/{test_state.py → test_states_state.py} +0 -0
  286. /aiagents4pharma/talk2scholars/tests/{test_pdf_paper_loader.py → test_utils_pdf_paper_loader.py} +0 -0
  287. /aiagents4pharma/talk2scholars/tests/{test_tool_helper_utils.py → test_utils_tool_helper_utils.py} +0 -0
  288. /aiagents4pharma/talk2scholars/tests/{test_zotero_pdf_downloader_utils.py → test_utils_zotero_pdf_downloader_utils.py} +0 -0
  289. {aiagents4pharma-1.44.0.dist-info → aiagents4pharma-1.45.1.dist-info}/licenses/LICENSE +0 -0
@@ -2,17 +2,20 @@
2
2
  Embedding class using Ollama model based on LangChain Embeddings class.
3
3
  """
4
4
 
5
- import time
6
- from typing import List
7
5
  import subprocess
6
+ import time
7
+
8
8
  import ollama
9
9
  from langchain_ollama import OllamaEmbeddings
10
+
10
11
  from .embeddings import Embeddings
11
12
 
13
+
12
14
  class EmbeddingWithOllama(Embeddings):
13
15
  """
14
16
  Embedding class using Ollama model based on LangChain Embeddings class.
15
17
  """
18
+
16
19
  def __init__(self, model_name: str):
17
20
  """
18
21
  Initialize the EmbeddingWithOllama class.
@@ -38,18 +41,21 @@ class EmbeddingWithOllama(Embeddings):
38
41
  """
39
42
  try:
40
43
  models_list = ollama.list()["models"]
41
- if model_name not in [m['model'].replace(":latest", "") for m in models_list]:
44
+ if model_name not in [m["model"].replace(":latest", "") for m in models_list]:
42
45
  ollama.pull(model_name)
43
46
  time.sleep(30)
44
47
  raise ValueError(f"Pulled {model_name} model")
45
48
  except Exception as e:
46
49
  with subprocess.Popen(
47
- "ollama serve", shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
50
+ "ollama serve",
51
+ shell=True,
52
+ stdout=subprocess.PIPE,
53
+ stderr=subprocess.PIPE,
48
54
  ):
49
55
  time.sleep(10)
50
56
  raise ValueError(f"Error: {e} and restarted Ollama server.") from e
51
57
 
52
- def embed_documents(self, texts: List[str]) -> List[float]:
58
+ def embed_documents(self, texts: list[str]) -> list[float]:
53
59
  """
54
60
  Generate embedding for a list of input texts using Ollama model.
55
61
 
@@ -65,7 +71,7 @@ class EmbeddingWithOllama(Embeddings):
65
71
 
66
72
  return embeddings
67
73
 
68
- def embed_query(self, text: str) -> List[float]:
74
+ def embed_query(self, text: str) -> list[float]:
69
75
  """
70
76
  Generate embeddings for an input text using Ollama model.
71
77
 
@@ -4,8 +4,8 @@
4
4
  Embedding class using SentenceTransformer model based on LangChain Embeddings class.
5
5
  """
6
6
 
7
- from typing import List
8
7
  from sentence_transformers import SentenceTransformer
8
+
9
9
  from .embeddings import Embeddings
10
10
 
11
11
 
@@ -35,11 +35,13 @@ class EmbeddingWithSentenceTransformer(Embeddings):
35
35
  self.trust_remote_code = trust_remote_code
36
36
 
37
37
  # Load the model
38
- self.model = SentenceTransformer(self.model_name,
39
- cache_folder=self.model_cache_dir,
40
- trust_remote_code=self.trust_remote_code)
38
+ self.model = SentenceTransformer(
39
+ self.model_name,
40
+ cache_folder=self.model_cache_dir,
41
+ trust_remote_code=self.trust_remote_code,
42
+ )
41
43
 
42
- def embed_documents(self, texts: List[str]) -> List[float]:
44
+ def embed_documents(self, texts: list[str]) -> list[float]:
43
45
  """
44
46
  Generate embedding for a list of input texts using SentenceTransformer model.
45
47
 
@@ -55,7 +57,7 @@ class EmbeddingWithSentenceTransformer(Embeddings):
55
57
 
56
58
  return embeddings
57
59
 
58
- def embed_query(self, text: str) -> List[float]:
60
+ def embed_query(self, text: str) -> list[float]:
59
61
  """
60
62
  Generate embeddings for an input text using SentenceTransformer model.
61
63
 
@@ -1,9 +1,12 @@
1
1
  """
2
2
  This package contains modules to use the enrichment model
3
3
  """
4
- from . import enrichments
5
- from . import ollama
6
- from . import pubchem_strings
7
- from . import uniprot_proteins
8
- from . import reactome_pathways
9
- from . import ols_terms
4
+
5
+ from . import (
6
+ enrichments,
7
+ ollama,
8
+ ols_terms,
9
+ pubchem_strings,
10
+ reactome_pathways,
11
+ uniprot_proteins,
12
+ )
@@ -4,6 +4,7 @@ Enrichments interface
4
4
 
5
5
  from abc import ABC, abstractmethod
6
6
 
7
+
7
8
  class Enrichments(ABC):
8
9
  """Interface for enrichment models.
9
10
 
@@ -4,20 +4,23 @@
4
4
  Enrichment class using Ollama model based on LangChain Enrichment class.
5
5
  """
6
6
 
7
- import time
8
- from typing import List
9
- import subprocess
10
7
  import ast
8
+ import subprocess
9
+ import time
10
+
11
11
  import ollama
12
- from langchain_ollama import ChatOllama
13
- from langchain_core.prompts import ChatPromptTemplate
14
12
  from langchain_core.output_parsers import StrOutputParser
13
+ from langchain_core.prompts import ChatPromptTemplate
14
+ from langchain_ollama import ChatOllama
15
+
15
16
  from .enrichments import Enrichments
16
17
 
18
+
17
19
  class EnrichmentWithOllama(Enrichments):
18
20
  """
19
21
  Enrichment class using Ollama model based on the Enrichment abstract class.
20
22
  """
23
+
21
24
  def __init__(
22
25
  self,
23
26
  model_name: str,
@@ -67,18 +70,21 @@ class EnrichmentWithOllama(Enrichments):
67
70
  """
68
71
  try:
69
72
  models_list = ollama.list()["models"]
70
- if model_name not in [m['model'].replace(":latest", "") for m in models_list]:
73
+ if model_name not in [m["model"].replace(":latest", "") for m in models_list]:
71
74
  ollama.pull(model_name)
72
75
  time.sleep(30)
73
76
  raise ValueError(f"Pulled {model_name} model")
74
77
  except Exception as e:
75
78
  with subprocess.Popen(
76
- "ollama serve", shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
79
+ "ollama serve",
80
+ shell=True,
81
+ stdout=subprocess.PIPE,
82
+ stderr=subprocess.PIPE,
77
83
  ):
78
84
  time.sleep(10)
79
85
  raise ValueError(f"Error: {e} and restarted Ollama server.") from e
80
86
 
81
- def enrich_documents(self, texts: List[str]) -> List[str]:
87
+ def enrich_documents(self, texts: list[str]) -> list[str]:
82
88
  """
83
89
  Enrich a list of input texts with additional textual features using OLLAMA model.
84
90
  Important: Make sure the input is a list of texts based on the defined prompt template
@@ -116,7 +122,7 @@ class EnrichmentWithOllama(Enrichments):
116
122
  Args:
117
123
  texts: The list of texts to be enriched.
118
124
  docs: The list of reference documents to enrich the input texts.
119
-
125
+
120
126
  Returns:
121
127
  The list of enriched texts
122
128
  """
@@ -4,22 +4,25 @@
4
4
  Enrichment class for enriching OLS terms with textual descriptions
5
5
  """
6
6
 
7
- from typing import List
8
- import logging
9
7
  import json
8
+ import logging
9
+
10
10
  import hydra
11
11
  import requests
12
+
12
13
  from .enrichments import Enrichments
13
14
 
14
15
  # Initialize logger
15
16
  logging.basicConfig(level=logging.INFO)
16
17
  logger = logging.getLogger(__name__)
17
18
 
19
+
18
20
  class EnrichmentWithOLS(Enrichments):
19
21
  """
20
22
  Enrichment class using OLS terms
21
23
  """
22
- def enrich_documents(self, texts: List[str]) -> List[str]:
24
+
25
+ def enrich_documents(self, texts: list[str]) -> list[str]:
23
26
  """
24
27
  Enrich a list of input OLS terms
25
28
 
@@ -32,41 +35,41 @@ class EnrichmentWithOLS(Enrichments):
32
35
 
33
36
  ols_ids = texts
34
37
 
35
- logger.log(logging.INFO,
36
- "Load Hydra configuration for OLS enrichments.")
38
+ logger.log(logging.INFO, "Load Hydra configuration for OLS enrichments.")
37
39
  with hydra.initialize(version_base=None, config_path="../../configs"):
38
- cfg = hydra.compose(config_name='config',
39
- overrides=['utils/enrichments/ols_terms=default'])
40
+ cfg = hydra.compose(
41
+ config_name="config", overrides=["utils/enrichments/ols_terms=default"]
42
+ )
40
43
  cfg = cfg.utils.enrichments.ols_terms
41
44
 
42
45
  descriptions = []
43
46
  for ols_id in ols_ids:
44
- params = {
45
- 'short_form': ols_id
46
- }
47
- r = requests.get(cfg.base_url,
48
- headers={ "Accept" : "application/json"},
49
- params=params,
50
- timeout=cfg.timeout)
47
+ params = {"short_form": ols_id}
48
+ r = requests.get(
49
+ cfg.base_url,
50
+ headers={"Accept": "application/json"},
51
+ params=params,
52
+ timeout=cfg.timeout,
53
+ )
51
54
  response_body = json.loads(r.text)
52
55
  # if the response body is empty
53
- if '_embedded' not in response_body:
56
+ if "_embedded" not in response_body:
54
57
  descriptions.append(None)
55
58
  continue
56
59
  # Add the description to the list
57
60
  description = []
58
- for term in response_body['_embedded']['terms']:
61
+ for term in response_body["_embedded"]["terms"]:
59
62
  # If the term has a description, add it to the list
60
- description += term.get('description', [])
63
+ description += term.get("description", [])
61
64
  # Add synonyms to the description
62
- description += term.get('synonyms', [])
65
+ description += term.get("synonyms", [])
63
66
  # Add the label to the description
64
67
  # Label is not provided as list, so we need to convert it to a list
65
- description += [term.get('label', [])]
68
+ description += [term.get("label", [])]
66
69
  # Make unique the description
67
70
  description = list(set(description))
68
71
  # Join the description with new line
69
- description = '\n'.join(description)
72
+ description = "\n".join(description)
70
73
  # Add the description to the list
71
74
  descriptions.append(description)
72
75
  return descriptions
@@ -5,21 +5,24 @@ Enrichment class for enriching PubChem IDs with their STRINGS representation and
5
5
  """
6
6
 
7
7
  import logging
8
- from typing import List
9
- import requests
8
+
10
9
  import hydra
11
- from .enrichments import Enrichments
10
+ import requests
11
+
12
12
  from ..pubchem_utils import pubchem_cid_description
13
+ from .enrichments import Enrichments
13
14
 
14
15
  # Initialize logger
15
16
  logging.basicConfig(level=logging.INFO)
16
17
  logger = logging.getLogger(__name__)
17
18
 
19
+
18
20
  class EnrichmentWithPubChem(Enrichments):
19
21
  """
20
22
  Enrichment class using PubChem
21
23
  """
22
- def enrich_documents(self, texts: List[str]) -> List[str]:
24
+
25
+ def enrich_documents(self, texts: list[str]) -> list[str]:
23
26
  """
24
27
  Enrich a list of input PubChem IDs with their STRINGS representation.
25
28
 
@@ -35,8 +38,7 @@ class EnrichmentWithPubChem(Enrichments):
35
38
 
36
39
  # Load Hydra configuration to get the base URL for PubChem
37
40
  with hydra.initialize(version_base=None, config_path="../../configs"):
38
- cfg = hydra.compose(config_name='config',
39
- overrides=['utils/pubchem_utils=default'])
41
+ cfg = hydra.compose(config_name="config", overrides=["utils/pubchem_utils=default"])
40
42
  cfg = cfg.utils.pubchem_utils
41
43
  # Iterate over each PubChem ID in the input list
42
44
  pubchem_cids = texts
@@ -47,11 +49,11 @@ class EnrichmentWithPubChem(Enrichments):
47
49
  response = requests.get(pubchem_url, timeout=60)
48
50
  data = response.json()
49
51
  # Extract the PubChem CID SMILES
50
- smiles = ''
51
- description = ''
52
+ smiles = ""
53
+ description = ""
52
54
  if "PropertyTable" in data:
53
- for prop in data["PropertyTable"]['Properties']:
54
- smiles = prop.get("SMILES", '')
55
+ for prop in data["PropertyTable"]["Properties"]:
56
+ smiles = prop.get("SMILES", "")
55
57
  description = pubchem_cid_description(pubchem_cid)
56
58
  else:
57
59
  # If the PubChem ID is not found, set smiles and description to None
@@ -4,21 +4,24 @@
4
4
  Enrichment class for enriching Reactome pathways with textual descriptions
5
5
  """
6
6
 
7
- from typing import List
8
7
  import logging
8
+
9
9
  import hydra
10
10
  import requests
11
+
11
12
  from .enrichments import Enrichments
12
13
 
13
14
  # Initialize logger
14
15
  logging.basicConfig(level=logging.INFO)
15
16
  logger = logging.getLogger(__name__)
16
17
 
18
+
17
19
  class EnrichmentWithReactome(Enrichments):
18
20
  """
19
21
  Enrichment class using Reactome pathways
20
22
  """
21
- def enrich_documents(self, texts: List[str]) -> List[str]:
23
+
24
+ def enrich_documents(self, texts: list[str]) -> list[str]:
22
25
  """
23
26
  Enrich a list of input Reactome pathways
24
27
 
@@ -31,25 +34,28 @@ class EnrichmentWithReactome(Enrichments):
31
34
 
32
35
  reactome_pathways_ids = texts
33
36
 
34
- logger.log(logging.INFO,
35
- "Load Hydra configuration for reactome enrichment")
37
+ logger.log(logging.INFO, "Load Hydra configuration for reactome enrichment")
36
38
  with hydra.initialize(version_base=None, config_path="../../configs"):
37
- cfg = hydra.compose(config_name='config',
38
- overrides=['utils/enrichments/reactome_pathways=default'])
39
+ cfg = hydra.compose(
40
+ config_name="config",
41
+ overrides=["utils/enrichments/reactome_pathways=default"],
42
+ )
39
43
  cfg = cfg.utils.enrichments.reactome_pathways
40
44
 
41
45
  descriptions = []
42
46
  for reactome_pathway_id in reactome_pathways_ids:
43
- r = requests.get(cfg.base_url + reactome_pathway_id + '/summation',
44
- headers={ "Accept" : "text/plain"},
45
- timeout=cfg.timeout)
47
+ r = requests.get(
48
+ cfg.base_url + reactome_pathway_id + "/summation",
49
+ headers={"Accept": "text/plain"},
50
+ timeout=cfg.timeout,
51
+ )
46
52
  # if the response is not ok
47
53
  if not r.ok:
48
54
  descriptions.append(None)
49
55
  continue
50
56
  response_body = r.text
51
57
  # if the response is ok
52
- descriptions.append(response_body.split('\t')[1])
58
+ descriptions.append(response_body.split("\t")[1])
53
59
  return descriptions
54
60
 
55
61
  def enrich_documents_with_rag(self, texts, docs):
@@ -4,22 +4,25 @@
4
4
  Enrichment class for enriching Gene names with their function and sequence using UniProt.
5
5
  """
6
6
 
7
- from typing import List
8
- import logging
9
7
  import json
8
+ import logging
9
+
10
10
  import hydra
11
11
  import requests
12
+
12
13
  from .enrichments import Enrichments
13
14
 
14
15
  # Initialize logger
15
16
  logging.basicConfig(level=logging.INFO)
16
17
  logger = logging.getLogger(__name__)
17
18
 
19
+
18
20
  class EnrichmentWithUniProt(Enrichments):
19
21
  """
20
22
  Enrichment class using UniProt
21
23
  """
22
- def enrich_documents(self, texts: List[str]) -> List[str]:
24
+
25
+ def enrich_documents(self, texts: list[str]) -> list[str]:
23
26
  """
24
27
  Enrich a list of input UniProt gene names with their function and sequence.
25
28
 
@@ -32,14 +35,17 @@ class EnrichmentWithUniProt(Enrichments):
32
35
 
33
36
  enriched_gene_names = texts
34
37
 
35
- logger.log(logging.INFO,
36
- "Load Hydra configuration for Gene enrichment with description and sequence.")
38
+ logger.log(
39
+ logging.INFO,
40
+ "Load Hydra configuration for Gene enrichment with description and sequence.",
41
+ )
37
42
  with hydra.initialize(version_base=None, config_path="../../configs"):
38
- cfg = hydra.compose(config_name='config',
39
- overrides=['utils/enrichments/uniprot_proteins=default'])
43
+ cfg = hydra.compose(
44
+ config_name="config",
45
+ overrides=["utils/enrichments/uniprot_proteins=default"],
46
+ )
40
47
  cfg = cfg.utils.enrichments.uniprot_proteins
41
48
 
42
-
43
49
  descriptions = []
44
50
  sequences = []
45
51
  for gene in enriched_gene_names:
@@ -52,10 +58,12 @@ class EnrichmentWithUniProt(Enrichments):
52
58
  # https://www.uniprot.org/help/taxonomy
53
59
  }
54
60
 
55
- r = requests.get(cfg.uniprot_url,
56
- headers={ "Accept" : "application/json"},
57
- params=params,
58
- timeout=cfg.timeout)
61
+ r = requests.get(
62
+ cfg.uniprot_url,
63
+ headers={"Accept": "application/json"},
64
+ params=params,
65
+ timeout=cfg.timeout,
66
+ )
59
67
  # if the response is not ok
60
68
  if not r.ok:
61
69
  descriptions.append(None)
@@ -67,12 +75,12 @@ class EnrichmentWithUniProt(Enrichments):
67
75
  descriptions.append(None)
68
76
  sequences.append(None)
69
77
  continue
70
- description = ''
71
- for comment in response_body[0]['comments']:
72
- if comment['type'] == 'FUNCTION':
73
- for value in comment['text']:
74
- description += value['value']
75
- sequence = response_body[0]['sequence']['sequence']
78
+ description = ""
79
+ for comment in response_body[0]["comments"]:
80
+ if comment["type"] == "FUNCTION":
81
+ for value in comment["text"]:
82
+ description += value["value"]
83
+ sequence = response_body[0]["sequence"]["sequence"]
76
84
  descriptions.append(description)
77
85
  sequences.append(sequence)
78
86
  return descriptions, sequences
@@ -1,6 +1,5 @@
1
- '''
1
+ """
2
2
  This file is used to import all the models in the package.
3
- '''
4
- from . import pcst
5
- from . import multimodal_pcst
6
- from . import milvus_multimodal_pcst
3
+ """
4
+
5
+ from . import milvus_multimodal_pcst, multimodal_pcst, pcst
@@ -16,6 +16,7 @@ from pymilvus import Collection
16
16
  try:
17
17
  import cudf
18
18
  import cupy as cp
19
+
19
20
  CUDF_AVAILABLE = True
20
21
  except ImportError:
21
22
  CUDF_AVAILABLE = False
@@ -34,9 +35,7 @@ class SystemDetector:
34
35
  self.os_type = platform.system().lower() # 'windows', 'linux', 'darwin'
35
36
  self.architecture = platform.machine().lower() # 'x86_64', 'arm64', etc.
36
37
  self.has_nvidia_gpu = self._detect_nvidia_gpu()
37
- self.use_gpu = (
38
- self.has_nvidia_gpu and self.os_type != "darwin"
39
- ) # No CUDA on macOS
38
+ self.use_gpu = self.has_nvidia_gpu and self.os_type != "darwin" # No CUDA on macOS
40
39
 
41
40
  logger.info("System Detection Results:")
42
41
  logger.info(" OS: %s", self.os_type)
@@ -232,9 +231,7 @@ class MultimodalPCSTPruning(NamedTuple):
232
231
  """
233
232
  # Initialize several variables
234
233
  topk = min(self.topk, colls["nodes"].num_entities)
235
- n_prizes = self.loader.py.zeros(
236
- colls["nodes"].num_entities, dtype=self.loader.py.float32
237
- )
234
+ n_prizes = self.loader.py.zeros(colls["nodes"].num_entities, dtype=self.loader.py.float32)
238
235
 
239
236
  # Get the actual metric type to use
240
237
  actual_metric_type = self.metric_type or self.loader.metric_type
@@ -279,9 +276,7 @@ class MultimodalPCSTPruning(NamedTuple):
279
276
  """
280
277
  # Initialize several variables
281
278
  topk_e = min(self.topk_e, colls["edges"].num_entities)
282
- e_prizes = self.loader.py.zeros(
283
- colls["edges"].num_entities, dtype=self.loader.py.float32
284
- )
279
+ e_prizes = self.loader.py.zeros(colls["edges"].num_entities, dtype=self.loader.py.float32)
285
280
 
286
281
  # Get the actual metric type to use
287
282
  actual_metric_type = self.metric_type or self.loader.metric_type
@@ -299,15 +294,11 @@ class MultimodalPCSTPruning(NamedTuple):
299
294
  e_prizes[[r.id for r in res[0]]] = [r.score for r in res[0]]
300
295
 
301
296
  # Further process the edge_prizes
302
- unique_prizes, inverse_indices = self.loader.py.unique(
303
- e_prizes, return_inverse=True
304
- )
297
+ unique_prizes, inverse_indices = self.loader.py.unique(e_prizes, return_inverse=True)
305
298
  topk_e_values = unique_prizes[self.loader.py.argsort(-unique_prizes)[:topk_e]]
306
299
  last_topk_e_value = topk_e
307
300
  for k in range(topk_e):
308
- indices = (
309
- inverse_indices == (unique_prizes == topk_e_values[k]).nonzero()[0]
310
- )
301
+ indices = inverse_indices == (unique_prizes == topk_e_values[k]).nonzero()[0]
311
302
  value = min((topk_e - k) / indices.sum().item(), last_topk_e_value)
312
303
  e_prizes[indices] = value
313
304
  last_topk_e_value = value * (1 - self.c_const)
@@ -381,7 +372,7 @@ class MultimodalPCSTPruning(NamedTuple):
381
372
  # Edge index mapping: local real edge idx -> original global index
382
373
  logger.log(logging.INFO, "Creating mapping for real edges")
383
374
  mapping_edges = dict(
384
- zip(range(len(real_["indices"])), self.loader.to_list(real_["indices"]))
375
+ zip(range(len(real_["indices"])), self.loader.to_list(real_["indices"]), strict=False)
385
376
  )
386
377
 
387
378
  # Virtual edge handling
@@ -398,15 +389,9 @@ class MultimodalPCSTPruning(NamedTuple):
398
389
 
399
390
  # Virtual edges: (src → virtual), (virtual → dst)
400
391
  logger.log(logging.INFO, "Creating virtual edges")
401
- virt_["edges_1"] = self.loader.py.stack(
402
- [virt_["src"], virt_["node_ids"]], axis=1
403
- )
404
- virt_["edges_2"] = self.loader.py.stack(
405
- [virt_["node_ids"], virt_["dst"]], axis=1
406
- )
407
- virt_["edges"] = self.loader.py.concatenate(
408
- [virt_["edges_1"], virt_["edges_2"]], axis=0
409
- )
392
+ virt_["edges_1"] = self.loader.py.stack([virt_["src"], virt_["node_ids"]], axis=1)
393
+ virt_["edges_2"] = self.loader.py.stack([virt_["node_ids"], virt_["dst"]], axis=1)
394
+ virt_["edges"] = self.loader.py.concatenate([virt_["edges_1"], virt_["edges_2"]], axis=0)
410
395
  virt_["costs"] = self.loader.py.zeros(
411
396
  (virt_["edges"].shape[0],), dtype=real_["costs"].dtype
412
397
  )
@@ -418,9 +403,7 @@ class MultimodalPCSTPruning(NamedTuple):
418
403
 
419
404
  # Final prizes
420
405
  logger.log(logging.INFO, "Getting final prizes")
421
- final_prizes = self.loader.py.concatenate(
422
- [prizes["nodes"], virt_["prizes"]], axis=0
423
- )
406
+ final_prizes = self.loader.py.concatenate([prizes["nodes"], virt_["prizes"]], axis=0)
424
407
 
425
408
  # Mapping virtual node ID -> edge index in original graph
426
409
  logger.log(logging.INFO, "Creating mapping for virtual nodes")
@@ -428,6 +411,7 @@ class MultimodalPCSTPruning(NamedTuple):
428
411
  zip(
429
412
  self.loader.to_list(virt_["node_ids"]),
430
413
  self.loader.to_list(virt_["indices"]),
414
+ strict=False,
431
415
  )
432
416
  )
433
417
 
@@ -466,9 +450,7 @@ class MultimodalPCSTPruning(NamedTuple):
466
450
 
467
451
  # Retrieve the selected nodes and edges based on the given vertices and edges
468
452
  subgraph_nodes = vertices[vertices < num_nodes]
469
- subgraph_edges = [
470
- mapping["edges"][e.item()] for e in edges if e < num_prior_edges
471
- ]
453
+ subgraph_edges = [mapping["edges"][e.item()] for e in edges if e < num_prior_edges]
472
454
  virtual_vertices = vertices[vertices >= num_nodes]
473
455
  if len(virtual_vertices) > 0:
474
456
  virtual_edges = [mapping["nodes"][i.item()] for i in virtual_vertices]
@@ -480,9 +462,7 @@ class MultimodalPCSTPruning(NamedTuple):
480
462
 
481
463
  return {"nodes": subgraph_nodes, "edges": subgraph_edges}
482
464
 
483
- def extract_subgraph(
484
- self, text_emb: list, query_emb: list, modality: str, cfg: dict
485
- ) -> dict:
465
+ def extract_subgraph(self, text_emb: list, query_emb: list, modality: str, cfg: dict) -> dict:
486
466
  """
487
467
  Perform the Prize-Collecting Steiner Tree (PCST) algorithm to extract the subgraph.
488
468