aiagents4pharma 1.43.0__py3-none-any.whl → 1.45.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (290) hide show
  1. aiagents4pharma/__init__.py +2 -2
  2. aiagents4pharma/talk2aiagents4pharma/.dockerignore +13 -0
  3. aiagents4pharma/talk2aiagents4pharma/Dockerfile +105 -0
  4. aiagents4pharma/talk2aiagents4pharma/README.md +1 -0
  5. aiagents4pharma/talk2aiagents4pharma/__init__.py +4 -5
  6. aiagents4pharma/talk2aiagents4pharma/agents/__init__.py +3 -2
  7. aiagents4pharma/talk2aiagents4pharma/agents/main_agent.py +24 -23
  8. aiagents4pharma/talk2aiagents4pharma/configs/__init__.py +2 -2
  9. aiagents4pharma/talk2aiagents4pharma/configs/agents/__init__.py +2 -2
  10. aiagents4pharma/talk2aiagents4pharma/configs/agents/main_agent/default.yaml +2 -2
  11. aiagents4pharma/talk2aiagents4pharma/configs/config.yaml +1 -1
  12. aiagents4pharma/talk2aiagents4pharma/docker-compose/cpu/.env.example +23 -0
  13. aiagents4pharma/talk2aiagents4pharma/docker-compose/cpu/docker-compose.yml +93 -0
  14. aiagents4pharma/talk2aiagents4pharma/docker-compose/gpu/.env.example +23 -0
  15. aiagents4pharma/talk2aiagents4pharma/docker-compose/gpu/docker-compose.yml +108 -0
  16. aiagents4pharma/talk2aiagents4pharma/install.md +127 -0
  17. aiagents4pharma/talk2aiagents4pharma/states/__init__.py +3 -2
  18. aiagents4pharma/talk2aiagents4pharma/states/state_talk2aiagents4pharma.py +5 -3
  19. aiagents4pharma/talk2aiagents4pharma/tests/__init__.py +2 -2
  20. aiagents4pharma/talk2aiagents4pharma/tests/test_main_agent.py +72 -50
  21. aiagents4pharma/talk2biomodels/.dockerignore +13 -0
  22. aiagents4pharma/talk2biomodels/Dockerfile +104 -0
  23. aiagents4pharma/talk2biomodels/README.md +1 -0
  24. aiagents4pharma/talk2biomodels/__init__.py +4 -8
  25. aiagents4pharma/talk2biomodels/agents/__init__.py +3 -2
  26. aiagents4pharma/talk2biomodels/agents/t2b_agent.py +47 -42
  27. aiagents4pharma/talk2biomodels/api/__init__.py +4 -5
  28. aiagents4pharma/talk2biomodels/api/kegg.py +14 -10
  29. aiagents4pharma/talk2biomodels/api/ols.py +13 -10
  30. aiagents4pharma/talk2biomodels/api/uniprot.py +7 -6
  31. aiagents4pharma/talk2biomodels/configs/__init__.py +3 -4
  32. aiagents4pharma/talk2biomodels/configs/agents/__init__.py +2 -2
  33. aiagents4pharma/talk2biomodels/configs/agents/t2b_agent/__init__.py +2 -2
  34. aiagents4pharma/talk2biomodels/configs/agents/t2b_agent/default.yaml +1 -1
  35. aiagents4pharma/talk2biomodels/configs/config.yaml +1 -1
  36. aiagents4pharma/talk2biomodels/configs/tools/__init__.py +4 -5
  37. aiagents4pharma/talk2biomodels/configs/tools/ask_question/__init__.py +2 -2
  38. aiagents4pharma/talk2biomodels/configs/tools/ask_question/default.yaml +1 -2
  39. aiagents4pharma/talk2biomodels/configs/tools/custom_plotter/__init__.py +2 -2
  40. aiagents4pharma/talk2biomodels/configs/tools/custom_plotter/default.yaml +1 -1
  41. aiagents4pharma/talk2biomodels/configs/tools/get_annotation/__init__.py +2 -2
  42. aiagents4pharma/talk2biomodels/configs/tools/get_annotation/default.yaml +1 -1
  43. aiagents4pharma/talk2biomodels/install.md +63 -0
  44. aiagents4pharma/talk2biomodels/models/__init__.py +4 -4
  45. aiagents4pharma/talk2biomodels/models/basico_model.py +36 -28
  46. aiagents4pharma/talk2biomodels/models/sys_bio_model.py +13 -10
  47. aiagents4pharma/talk2biomodels/states/__init__.py +3 -2
  48. aiagents4pharma/talk2biomodels/states/state_talk2biomodels.py +12 -8
  49. aiagents4pharma/talk2biomodels/tests/BIOMD0000000449_url.xml +1585 -0
  50. aiagents4pharma/talk2biomodels/tests/__init__.py +2 -2
  51. aiagents4pharma/talk2biomodels/tests/article_on_model_537.pdf +0 -0
  52. aiagents4pharma/talk2biomodels/tests/test_api.py +18 -14
  53. aiagents4pharma/talk2biomodels/tests/test_ask_question.py +8 -9
  54. aiagents4pharma/talk2biomodels/tests/test_basico_model.py +15 -9
  55. aiagents4pharma/talk2biomodels/tests/test_get_annotation.py +54 -55
  56. aiagents4pharma/talk2biomodels/tests/test_getmodelinfo.py +28 -27
  57. aiagents4pharma/talk2biomodels/tests/test_integration.py +21 -33
  58. aiagents4pharma/talk2biomodels/tests/test_load_biomodel.py +14 -11
  59. aiagents4pharma/talk2biomodels/tests/test_param_scan.py +21 -20
  60. aiagents4pharma/talk2biomodels/tests/test_query_article.py +129 -29
  61. aiagents4pharma/talk2biomodels/tests/test_search_models.py +9 -13
  62. aiagents4pharma/talk2biomodels/tests/test_simulate_model.py +16 -15
  63. aiagents4pharma/talk2biomodels/tests/test_steady_state.py +12 -22
  64. aiagents4pharma/talk2biomodels/tests/test_sys_bio_model.py +33 -29
  65. aiagents4pharma/talk2biomodels/tools/__init__.py +15 -12
  66. aiagents4pharma/talk2biomodels/tools/ask_question.py +42 -32
  67. aiagents4pharma/talk2biomodels/tools/custom_plotter.py +51 -43
  68. aiagents4pharma/talk2biomodels/tools/get_annotation.py +99 -75
  69. aiagents4pharma/talk2biomodels/tools/get_modelinfo.py +57 -51
  70. aiagents4pharma/talk2biomodels/tools/load_arguments.py +52 -32
  71. aiagents4pharma/talk2biomodels/tools/load_biomodel.py +8 -2
  72. aiagents4pharma/talk2biomodels/tools/parameter_scan.py +107 -90
  73. aiagents4pharma/talk2biomodels/tools/query_article.py +14 -13
  74. aiagents4pharma/talk2biomodels/tools/search_models.py +37 -26
  75. aiagents4pharma/talk2biomodels/tools/simulate_model.py +47 -37
  76. aiagents4pharma/talk2biomodels/tools/steady_state.py +76 -58
  77. aiagents4pharma/talk2biomodels/tools/utils.py +4 -3
  78. aiagents4pharma/talk2cells/README.md +1 -0
  79. aiagents4pharma/talk2cells/__init__.py +4 -5
  80. aiagents4pharma/talk2cells/agents/__init__.py +3 -2
  81. aiagents4pharma/talk2cells/agents/scp_agent.py +21 -19
  82. aiagents4pharma/talk2cells/states/__init__.py +3 -2
  83. aiagents4pharma/talk2cells/states/state_talk2cells.py +4 -2
  84. aiagents4pharma/talk2cells/tests/scp_agent/test_scp_agent.py +8 -9
  85. aiagents4pharma/talk2cells/tools/__init__.py +3 -2
  86. aiagents4pharma/talk2cells/tools/scp_agent/__init__.py +4 -4
  87. aiagents4pharma/talk2cells/tools/scp_agent/display_studies.py +5 -3
  88. aiagents4pharma/talk2cells/tools/scp_agent/search_studies.py +21 -22
  89. aiagents4pharma/talk2knowledgegraphs/.dockerignore +13 -0
  90. aiagents4pharma/talk2knowledgegraphs/Dockerfile +103 -0
  91. aiagents4pharma/talk2knowledgegraphs/README.md +1 -0
  92. aiagents4pharma/talk2knowledgegraphs/__init__.py +4 -7
  93. aiagents4pharma/talk2knowledgegraphs/agents/__init__.py +3 -2
  94. aiagents4pharma/talk2knowledgegraphs/agents/t2kg_agent.py +40 -30
  95. aiagents4pharma/talk2knowledgegraphs/configs/__init__.py +3 -6
  96. aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/__init__.py +2 -2
  97. aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/default.yaml +8 -8
  98. aiagents4pharma/talk2knowledgegraphs/configs/app/__init__.py +3 -2
  99. aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/__init__.py +2 -2
  100. aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/default.yaml +1 -1
  101. aiagents4pharma/talk2knowledgegraphs/configs/config.yaml +1 -1
  102. aiagents4pharma/talk2knowledgegraphs/configs/tools/__init__.py +4 -5
  103. aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/__init__.py +2 -2
  104. aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/default.yaml +1 -1
  105. aiagents4pharma/talk2knowledgegraphs/configs/tools/multimodal_subgraph_extraction/default.yaml +17 -2
  106. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/__init__.py +2 -2
  107. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/default.yaml +1 -1
  108. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/__init__.py +2 -2
  109. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/default.yaml +1 -1
  110. aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/ols_terms/default.yaml +1 -1
  111. aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/reactome_pathways/default.yaml +1 -1
  112. aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/uniprot_proteins/default.yaml +1 -1
  113. aiagents4pharma/talk2knowledgegraphs/configs/utils/pubchem_utils/default.yaml +1 -1
  114. aiagents4pharma/talk2knowledgegraphs/datasets/__init__.py +4 -6
  115. aiagents4pharma/talk2knowledgegraphs/datasets/biobridge_primekg.py +115 -67
  116. aiagents4pharma/talk2knowledgegraphs/datasets/dataset.py +2 -0
  117. aiagents4pharma/talk2knowledgegraphs/datasets/primekg.py +35 -24
  118. aiagents4pharma/talk2knowledgegraphs/datasets/starkqa_primekg.py +29 -21
  119. aiagents4pharma/talk2knowledgegraphs/docker-compose/cpu/.env.example +23 -0
  120. aiagents4pharma/talk2knowledgegraphs/docker-compose/cpu/docker-compose.yml +93 -0
  121. aiagents4pharma/talk2knowledgegraphs/docker-compose/gpu/.env.example +23 -0
  122. aiagents4pharma/talk2knowledgegraphs/docker-compose/gpu/docker-compose.yml +108 -0
  123. aiagents4pharma/talk2knowledgegraphs/entrypoint.sh +190 -0
  124. aiagents4pharma/talk2knowledgegraphs/install.md +140 -0
  125. aiagents4pharma/talk2knowledgegraphs/milvus_data_dump.py +31 -65
  126. aiagents4pharma/talk2knowledgegraphs/states/__init__.py +3 -2
  127. aiagents4pharma/talk2knowledgegraphs/states/state_talk2knowledgegraphs.py +1 -0
  128. aiagents4pharma/talk2knowledgegraphs/tests/test_agents_t2kg_agent.py +65 -40
  129. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_biobridge_primekg.py +54 -48
  130. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_dataset.py +4 -0
  131. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_primekg.py +17 -4
  132. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_starkqa_primekg.py +33 -24
  133. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_graphrag_reasoning.py +116 -69
  134. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_milvus_multimodal_subgraph_extraction.py +736 -413
  135. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_multimodal_subgraph_extraction.py +22 -15
  136. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_extraction.py +19 -12
  137. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_summarization.py +95 -48
  138. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_embeddings.py +4 -0
  139. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_huggingface.py +5 -0
  140. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_nim_molmim.py +13 -18
  141. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_ollama.py +10 -3
  142. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_enrichments.py +4 -3
  143. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ollama.py +3 -2
  144. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ols.py +1 -0
  145. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_pubchem.py +9 -4
  146. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_reactome.py +6 -6
  147. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_uniprot.py +4 -0
  148. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_extractions_milvus_multimodal_pcst.py +442 -42
  149. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_kg_utils.py +3 -4
  150. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_pubchem_utils.py +10 -6
  151. aiagents4pharma/talk2knowledgegraphs/tools/__init__.py +10 -7
  152. aiagents4pharma/talk2knowledgegraphs/tools/graphrag_reasoning.py +15 -20
  153. aiagents4pharma/talk2knowledgegraphs/tools/milvus_multimodal_subgraph_extraction.py +245 -205
  154. aiagents4pharma/talk2knowledgegraphs/tools/multimodal_subgraph_extraction.py +92 -90
  155. aiagents4pharma/talk2knowledgegraphs/tools/subgraph_extraction.py +25 -37
  156. aiagents4pharma/talk2knowledgegraphs/tools/subgraph_summarization.py +10 -13
  157. aiagents4pharma/talk2knowledgegraphs/utils/__init__.py +4 -7
  158. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/__init__.py +4 -7
  159. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/embeddings.py +4 -0
  160. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/huggingface.py +11 -14
  161. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/nim_molmim.py +7 -7
  162. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/ollama.py +12 -6
  163. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/sentence_transformer.py +8 -6
  164. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/__init__.py +9 -6
  165. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/enrichments.py +1 -0
  166. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ollama.py +15 -9
  167. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ols_terms.py +23 -20
  168. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/pubchem_strings.py +12 -10
  169. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/reactome_pathways.py +16 -10
  170. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/uniprot_proteins.py +26 -18
  171. aiagents4pharma/talk2knowledgegraphs/utils/extractions/__init__.py +4 -5
  172. aiagents4pharma/talk2knowledgegraphs/utils/extractions/milvus_multimodal_pcst.py +218 -81
  173. aiagents4pharma/talk2knowledgegraphs/utils/extractions/multimodal_pcst.py +53 -47
  174. aiagents4pharma/talk2knowledgegraphs/utils/extractions/pcst.py +18 -14
  175. aiagents4pharma/talk2knowledgegraphs/utils/kg_utils.py +22 -23
  176. aiagents4pharma/talk2knowledgegraphs/utils/pubchem_utils.py +11 -10
  177. aiagents4pharma/talk2scholars/.dockerignore +13 -0
  178. aiagents4pharma/talk2scholars/Dockerfile +104 -0
  179. aiagents4pharma/talk2scholars/README.md +1 -0
  180. aiagents4pharma/talk2scholars/agents/__init__.py +1 -5
  181. aiagents4pharma/talk2scholars/agents/main_agent.py +6 -4
  182. aiagents4pharma/talk2scholars/agents/paper_download_agent.py +5 -4
  183. aiagents4pharma/talk2scholars/agents/pdf_agent.py +4 -2
  184. aiagents4pharma/talk2scholars/agents/s2_agent.py +2 -2
  185. aiagents4pharma/talk2scholars/agents/zotero_agent.py +10 -11
  186. aiagents4pharma/talk2scholars/configs/__init__.py +1 -3
  187. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/__init__.py +1 -4
  188. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml +1 -1
  189. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/pdf_agent/default.yaml +1 -1
  190. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml +8 -8
  191. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/default.yaml +7 -7
  192. aiagents4pharma/talk2scholars/configs/tools/__init__.py +8 -6
  193. aiagents4pharma/talk2scholars/docker-compose/cpu/.env.example +21 -0
  194. aiagents4pharma/talk2scholars/docker-compose/cpu/docker-compose.yml +90 -0
  195. aiagents4pharma/talk2scholars/docker-compose/gpu/.env.example +21 -0
  196. aiagents4pharma/talk2scholars/docker-compose/gpu/docker-compose.yml +105 -0
  197. aiagents4pharma/talk2scholars/install.md +122 -0
  198. aiagents4pharma/talk2scholars/state/state_talk2scholars.py +8 -8
  199. aiagents4pharma/talk2scholars/tests/{test_main_agent.py → test_agents_main_agent.py} +41 -23
  200. aiagents4pharma/talk2scholars/tests/{test_paper_download_agent.py → test_agents_paper_agents_download_agent.py} +10 -16
  201. aiagents4pharma/talk2scholars/tests/{test_pdf_agent.py → test_agents_pdf_agent.py} +6 -10
  202. aiagents4pharma/talk2scholars/tests/{test_s2_agent.py → test_agents_s2_agent.py} +8 -16
  203. aiagents4pharma/talk2scholars/tests/{test_zotero_agent.py → test_agents_zotero_agent.py} +5 -7
  204. aiagents4pharma/talk2scholars/tests/{test_s2_display_dataframe.py → test_s2_tools_display_dataframe.py} +6 -7
  205. aiagents4pharma/talk2scholars/tests/{test_s2_query_dataframe.py → test_s2_tools_query_dataframe.py} +5 -15
  206. aiagents4pharma/talk2scholars/tests/{test_paper_downloader.py → test_tools_paper_downloader.py} +25 -63
  207. aiagents4pharma/talk2scholars/tests/{test_question_and_answer_tool.py → test_tools_question_and_answer_tool.py} +2 -6
  208. aiagents4pharma/talk2scholars/tests/{test_s2_multi.py → test_tools_s2_multi.py} +5 -5
  209. aiagents4pharma/talk2scholars/tests/{test_s2_retrieve.py → test_tools_s2_retrieve.py} +2 -1
  210. aiagents4pharma/talk2scholars/tests/{test_s2_search.py → test_tools_s2_search.py} +5 -5
  211. aiagents4pharma/talk2scholars/tests/{test_s2_single.py → test_tools_s2_single.py} +5 -5
  212. aiagents4pharma/talk2scholars/tests/{test_arxiv_downloader.py → test_utils_arxiv_downloader.py} +16 -25
  213. aiagents4pharma/talk2scholars/tests/{test_base_paper_downloader.py → test_utils_base_paper_downloader.py} +25 -47
  214. aiagents4pharma/talk2scholars/tests/{test_biorxiv_downloader.py → test_utils_biorxiv_downloader.py} +14 -42
  215. aiagents4pharma/talk2scholars/tests/{test_medrxiv_downloader.py → test_utils_medrxiv_downloader.py} +15 -49
  216. aiagents4pharma/talk2scholars/tests/{test_nvidia_nim_reranker.py → test_utils_nvidia_nim_reranker.py} +6 -16
  217. aiagents4pharma/talk2scholars/tests/{test_pdf_answer_formatter.py → test_utils_pdf_answer_formatter.py} +1 -0
  218. aiagents4pharma/talk2scholars/tests/{test_pdf_batch_processor.py → test_utils_pdf_batch_processor.py} +6 -15
  219. aiagents4pharma/talk2scholars/tests/{test_pdf_collection_manager.py → test_utils_pdf_collection_manager.py} +34 -11
  220. aiagents4pharma/talk2scholars/tests/{test_pdf_document_processor.py → test_utils_pdf_document_processor.py} +2 -3
  221. aiagents4pharma/talk2scholars/tests/{test_pdf_generate_answer.py → test_utils_pdf_generate_answer.py} +3 -6
  222. aiagents4pharma/talk2scholars/tests/{test_pdf_gpu_detection.py → test_utils_pdf_gpu_detection.py} +5 -16
  223. aiagents4pharma/talk2scholars/tests/{test_pdf_rag_pipeline.py → test_utils_pdf_rag_pipeline.py} +7 -17
  224. aiagents4pharma/talk2scholars/tests/{test_pdf_retrieve_chunks.py → test_utils_pdf_retrieve_chunks.py} +4 -11
  225. aiagents4pharma/talk2scholars/tests/{test_pdf_singleton_manager.py → test_utils_pdf_singleton_manager.py} +26 -23
  226. aiagents4pharma/talk2scholars/tests/{test_pdf_vector_normalization.py → test_utils_pdf_vector_normalization.py} +1 -1
  227. aiagents4pharma/talk2scholars/tests/{test_pdf_vector_store.py → test_utils_pdf_vector_store.py} +27 -55
  228. aiagents4pharma/talk2scholars/tests/{test_pubmed_downloader.py → test_utils_pubmed_downloader.py} +31 -91
  229. aiagents4pharma/talk2scholars/tests/{test_read_helper_utils.py → test_utils_read_helper_utils.py} +2 -6
  230. aiagents4pharma/talk2scholars/tests/{test_s2_utils_ext_ids.py → test_utils_s2_utils_ext_ids.py} +5 -15
  231. aiagents4pharma/talk2scholars/tests/{test_zotero_human_in_the_loop.py → test_utils_zotero_human_in_the_loop.py} +6 -13
  232. aiagents4pharma/talk2scholars/tests/{test_zotero_path.py → test_utils_zotero_path.py} +53 -45
  233. aiagents4pharma/talk2scholars/tests/{test_zotero_read.py → test_utils_zotero_read.py} +30 -91
  234. aiagents4pharma/talk2scholars/tests/{test_zotero_write.py → test_utils_zotero_write.py} +6 -16
  235. aiagents4pharma/talk2scholars/tools/__init__.py +1 -4
  236. aiagents4pharma/talk2scholars/tools/paper_download/paper_downloader.py +20 -35
  237. aiagents4pharma/talk2scholars/tools/paper_download/utils/__init__.py +7 -5
  238. aiagents4pharma/talk2scholars/tools/paper_download/utils/arxiv_downloader.py +9 -11
  239. aiagents4pharma/talk2scholars/tools/paper_download/utils/base_paper_downloader.py +14 -21
  240. aiagents4pharma/talk2scholars/tools/paper_download/utils/biorxiv_downloader.py +14 -22
  241. aiagents4pharma/talk2scholars/tools/paper_download/utils/medrxiv_downloader.py +11 -13
  242. aiagents4pharma/talk2scholars/tools/paper_download/utils/pubmed_downloader.py +14 -28
  243. aiagents4pharma/talk2scholars/tools/pdf/question_and_answer.py +4 -8
  244. aiagents4pharma/talk2scholars/tools/pdf/utils/__init__.py +16 -14
  245. aiagents4pharma/talk2scholars/tools/pdf/utils/answer_formatter.py +4 -4
  246. aiagents4pharma/talk2scholars/tools/pdf/utils/batch_processor.py +15 -17
  247. aiagents4pharma/talk2scholars/tools/pdf/utils/collection_manager.py +2 -2
  248. aiagents4pharma/talk2scholars/tools/pdf/utils/document_processor.py +5 -5
  249. aiagents4pharma/talk2scholars/tools/pdf/utils/generate_answer.py +4 -4
  250. aiagents4pharma/talk2scholars/tools/pdf/utils/get_vectorstore.py +2 -6
  251. aiagents4pharma/talk2scholars/tools/pdf/utils/gpu_detection.py +5 -9
  252. aiagents4pharma/talk2scholars/tools/pdf/utils/nvidia_nim_reranker.py +4 -4
  253. aiagents4pharma/talk2scholars/tools/pdf/utils/paper_loader.py +2 -2
  254. aiagents4pharma/talk2scholars/tools/pdf/utils/rag_pipeline.py +6 -15
  255. aiagents4pharma/talk2scholars/tools/pdf/utils/retrieve_chunks.py +7 -15
  256. aiagents4pharma/talk2scholars/tools/pdf/utils/singleton_manager.py +2 -2
  257. aiagents4pharma/talk2scholars/tools/pdf/utils/tool_helper.py +3 -4
  258. aiagents4pharma/talk2scholars/tools/pdf/utils/vector_normalization.py +8 -17
  259. aiagents4pharma/talk2scholars/tools/pdf/utils/vector_store.py +17 -33
  260. aiagents4pharma/talk2scholars/tools/s2/__init__.py +8 -6
  261. aiagents4pharma/talk2scholars/tools/s2/display_dataframe.py +3 -7
  262. aiagents4pharma/talk2scholars/tools/s2/multi_paper_rec.py +7 -6
  263. aiagents4pharma/talk2scholars/tools/s2/query_dataframe.py +5 -12
  264. aiagents4pharma/talk2scholars/tools/s2/retrieve_semantic_scholar_paper_id.py +2 -4
  265. aiagents4pharma/talk2scholars/tools/s2/search.py +6 -6
  266. aiagents4pharma/talk2scholars/tools/s2/single_paper_rec.py +5 -3
  267. aiagents4pharma/talk2scholars/tools/s2/utils/__init__.py +1 -3
  268. aiagents4pharma/talk2scholars/tools/s2/utils/multi_helper.py +12 -18
  269. aiagents4pharma/talk2scholars/tools/s2/utils/search_helper.py +11 -18
  270. aiagents4pharma/talk2scholars/tools/s2/utils/single_helper.py +11 -16
  271. aiagents4pharma/talk2scholars/tools/zotero/__init__.py +1 -4
  272. aiagents4pharma/talk2scholars/tools/zotero/utils/__init__.py +1 -4
  273. aiagents4pharma/talk2scholars/tools/zotero/utils/read_helper.py +21 -39
  274. aiagents4pharma/talk2scholars/tools/zotero/utils/review_helper.py +2 -6
  275. aiagents4pharma/talk2scholars/tools/zotero/utils/write_helper.py +8 -11
  276. aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_path.py +4 -12
  277. aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_pdf_downloader.py +13 -27
  278. aiagents4pharma/talk2scholars/tools/zotero/zotero_read.py +4 -7
  279. aiagents4pharma/talk2scholars/tools/zotero/zotero_review.py +8 -10
  280. aiagents4pharma/talk2scholars/tools/zotero/zotero_write.py +3 -2
  281. {aiagents4pharma-1.43.0.dist-info → aiagents4pharma-1.45.0.dist-info}/METADATA +115 -50
  282. aiagents4pharma-1.45.0.dist-info/RECORD +324 -0
  283. {aiagents4pharma-1.43.0.dist-info → aiagents4pharma-1.45.0.dist-info}/WHEEL +1 -2
  284. aiagents4pharma-1.43.0.dist-info/RECORD +0 -293
  285. aiagents4pharma-1.43.0.dist-info/top_level.txt +0 -1
  286. /aiagents4pharma/talk2scholars/tests/{test_state.py → test_states_state.py} +0 -0
  287. /aiagents4pharma/talk2scholars/tests/{test_pdf_paper_loader.py → test_utils_pdf_paper_loader.py} +0 -0
  288. /aiagents4pharma/talk2scholars/tests/{test_tool_helper_utils.py → test_utils_tool_helper_utils.py} +0 -0
  289. /aiagents4pharma/talk2scholars/tests/{test_zotero_pdf_downloader_utils.py → test_utils_zotero_pdf_downloader_utils.py} +0 -0
  290. {aiagents4pharma-1.43.0.dist-info → aiagents4pharma-1.45.0.dist-info}/licenses/LICENSE +0 -0
@@ -4,7 +4,7 @@ Generate an answer for a question using retrieved chunks of documents.
4
4
 
5
5
  import logging
6
6
  import os
7
- from typing import Any, Dict, List
7
+ from typing import Any
8
8
 
9
9
  import hydra
10
10
  from langchain_core.documents import Document
@@ -32,7 +32,7 @@ def load_hydra_config() -> Any:
32
32
 
33
33
 
34
34
  def _build_context_and_sources(
35
- retrieved_chunks: List[Document],
35
+ retrieved_chunks: list[Document],
36
36
  ) -> tuple[str, set[str]]:
37
37
  """
38
38
  Build the combined context string and set of paper_ids from retrieved chunks.
@@ -61,10 +61,10 @@ def _build_context_and_sources(
61
61
 
62
62
  def generate_answer(
63
63
  question: str,
64
- retrieved_chunks: List[Document],
64
+ retrieved_chunks: list[Document],
65
65
  llm_model: BaseChatModel,
66
66
  config: Any,
67
- ) -> Dict[str, Any]:
67
+ ) -> dict[str, Any]:
68
68
  """
69
69
  Generate an answer for a question using retrieved chunks.
70
70
 
@@ -37,14 +37,10 @@ def get_vectorstore(
37
37
  with _cache_lock:
38
38
  if force_new and collection_name in _vectorstore_cache:
39
39
  del _vectorstore_cache[collection_name]
40
- logger.info(
41
- "Forced new Vectorstore instance for collection: %s", collection_name
42
- )
40
+ logger.info("Forced new Vectorstore instance for collection: %s", collection_name)
43
41
 
44
42
  if collection_name not in _vectorstore_cache:
45
- logger.info(
46
- "Creating new Vectorstore instance for collection: %s", collection_name
47
- )
43
+ logger.info("Creating new Vectorstore instance for collection: %s", collection_name)
48
44
  _vectorstore_cache[collection_name] = Vectorstore(
49
45
  embedding_model=embedding_model, config=config
50
46
  )
@@ -5,7 +5,7 @@ Handle COSINE -> IP conversion for GPU indexes
5
5
 
6
6
  import logging
7
7
  import subprocess
8
- from typing import Dict, Any, Tuple
8
+ from typing import Any
9
9
 
10
10
  logger = logging.getLogger(__name__)
11
11
 
@@ -56,7 +56,7 @@ def detect_nvidia_gpu(config=None) -> bool:
56
56
 
57
57
  def get_optimal_index_config(
58
58
  has_gpu: bool, embedding_dim: int = 768, use_cosine: bool = True
59
- ) -> Tuple[Dict[str, Any], Dict[str, Any]]:
59
+ ) -> tuple[dict[str, Any], dict[str, Any]]:
60
60
  """
61
61
  Get optimal index and search parameters based on GPU availability.
62
62
 
@@ -80,9 +80,7 @@ def get_optimal_index_config(
80
80
  "GPU indexes don't support COSINE distance. "
81
81
  "Vectors will be normalized and IP distance will be used instead."
82
82
  )
83
- metric_type = (
84
- "IP" # Inner Product for normalized vectors = cosine similarity
85
- )
83
+ metric_type = "IP" # Inner Product for normalized vectors = cosine similarity
86
84
  else:
87
85
  metric_type = "IP" # Default to IP for GPU
88
86
 
@@ -120,9 +118,7 @@ def get_optimal_index_config(
120
118
  "index_type": "IVF_FLAT",
121
119
  "metric_type": metric_type,
122
120
  "params": {
123
- "nlist": min(
124
- 1024, max(64, embedding_dim // 8)
125
- ) # Dynamic nlist based on dimension
121
+ "nlist": min(1024, max(64, embedding_dim // 8)) # Dynamic nlist based on dimension
126
122
  },
127
123
  }
128
124
 
@@ -136,7 +132,7 @@ def get_optimal_index_config(
136
132
 
137
133
 
138
134
  def log_index_configuration(
139
- index_params: Dict[str, Any], search_params: Dict[str, Any], use_cosine: bool = True
135
+ index_params: dict[str, Any], search_params: dict[str, Any], use_cosine: bool = True
140
136
  ) -> None:
141
137
  """Log the selected index configuration for debugging."""
142
138
  index_type = index_params.get("index_type", "Unknown")
@@ -5,7 +5,7 @@ Rerank chunks instead of papers following traditional RAG pipeline
5
5
 
6
6
  import logging
7
7
  import os
8
- from typing import Any, List
8
+ from typing import Any
9
9
 
10
10
  from langchain_core.documents import Document
11
11
  from langchain_nvidia_ai_endpoints import NVIDIARerank
@@ -18,8 +18,8 @@ logger.setLevel(getattr(logging, log_level))
18
18
 
19
19
 
20
20
  def rerank_chunks(
21
- chunks: List[Document], query: str, config: Any, top_k: int = 25
22
- ) -> List[Document]:
21
+ chunks: list[Document], query: str, config: Any, top_k: int = 25
22
+ ) -> list[Document]:
23
23
  """
24
24
  Rerank chunks by relevance to the query using NVIDIA's reranker.
25
25
 
@@ -68,7 +68,7 @@ def rerank_chunks(
68
68
  # Log chunk metadata for debugging
69
69
  logger.debug(
70
70
  "Reranking chunks from papers: %s",
71
- list(set(chunk.metadata.get("paper_id", "unknown") for chunk in chunks))[:5],
71
+ list({chunk.metadata.get("paper_id", "unknown") for chunk in chunks})[:5],
72
72
  )
73
73
 
74
74
  # Rerank the chunks
@@ -3,7 +3,7 @@ Paper loading utilities for managing PDF documents in vector store.
3
3
  """
4
4
 
5
5
  import logging
6
- from typing import Any, Dict
6
+ from typing import Any
7
7
 
8
8
  from .batch_processor import add_papers_batch
9
9
 
@@ -12,7 +12,7 @@ logger = logging.getLogger(__name__)
12
12
 
13
13
  def load_all_papers(
14
14
  vector_store: Any, # The Vectorstore instance
15
- articles: Dict[str, Any],
15
+ articles: dict[str, Any],
16
16
  call_id: str,
17
17
  config: Any,
18
18
  has_gpu: bool,
@@ -3,8 +3,7 @@ RAG pipeline for retrieving and reranking chunks from a vector store.
3
3
  """
4
4
 
5
5
  import logging
6
- from typing import Any, List
7
-
6
+ from typing import Any
8
7
 
9
8
  # Import our GPU detection utility
10
9
  from .nvidia_nim_reranker import rerank_chunks
@@ -15,7 +14,7 @@ logger = logging.getLogger(__name__)
15
14
 
16
15
  def retrieve_and_rerank_chunks(
17
16
  vector_store: Any, query: str, config: Any, call_id: str, has_gpu: bool
18
- ) -> List[Any]:
17
+ ) -> list[Any]:
19
18
  """
20
19
  Traditional RAG pipeline: retrieve chunks from all papers, then rerank.
21
20
  Optimized for GPU/CPU hardware.
@@ -38,12 +37,8 @@ def retrieve_and_rerank_chunks(
38
37
  # Adjust initial retrieval count based on hardware
39
38
  if has_gpu:
40
39
  # GPU can handle larger initial retrieval efficiently
41
- initial_chunks_count = config.get(
42
- "initial_retrieval_k", 150
43
- ) # Increased for GPU
44
- mmr_diversity = config.get(
45
- "mmr_diversity", 0.75
46
- ) # Slightly more diverse for larger sets
40
+ initial_chunks_count = config.get("initial_retrieval_k", 150) # Increased for GPU
41
+ mmr_diversity = config.get("mmr_diversity", 0.75) # Slightly more diverse for larger sets
47
42
  else:
48
43
  # CPU - use conservative settings
49
44
  initial_chunks_count = config.get("initial_retrieval_k", 100) # Original
@@ -72,9 +67,7 @@ def retrieve_and_rerank_chunks(
72
67
  "%s: Retrieved %d chunks from %d unique papers using %s",
73
68
  call_id,
74
69
  len(retrieved_chunks),
75
- len(
76
- set(chunk.metadata.get("paper_id", "unknown") for chunk in retrieved_chunks)
77
- ),
70
+ len({chunk.metadata.get("paper_id", "unknown") for chunk in retrieved_chunks}),
78
71
  hardware_mode,
79
72
  )
80
73
 
@@ -95,9 +88,7 @@ def retrieve_and_rerank_chunks(
95
88
  )
96
89
 
97
90
  # Log final results with hardware info
98
- final_papers = len(
99
- set(chunk.metadata.get("paper_id", "unknown") for chunk in reranked_chunks)
100
- )
91
+ final_papers = len({chunk.metadata.get("paper_id", "unknown") for chunk in reranked_chunks})
101
92
 
102
93
  logger.info(
103
94
  "%s: Reranking complete using %s. Final %d chunks from %d unique papers",
@@ -6,11 +6,9 @@ With automatic GPU/CPU search parameter optimization.
6
6
 
7
7
  import logging
8
8
  import os
9
- from typing import List, Optional
10
9
 
11
10
  from langchain_core.documents import Document
12
11
 
13
-
14
12
  # Set up logging with configurable level
15
13
  log_level = os.environ.get("LOG_LEVEL", "INFO")
16
14
  logging.basicConfig(level=getattr(logging, log_level))
@@ -21,10 +19,10 @@ logger.setLevel(getattr(logging, log_level))
21
19
  def retrieve_relevant_chunks(
22
20
  vector_store,
23
21
  query: str,
24
- paper_ids: Optional[List[str]] = None,
22
+ paper_ids: list[str] | None = None,
25
23
  top_k: int = 100, # Increased default to cast wider net before reranking
26
24
  mmr_diversity: float = 0.8, # Slightly reduced for better diversity
27
- ) -> List[Document]:
25
+ ) -> list[Document]:
28
26
  """
29
27
  Retrieve the most relevant chunks for a query using maximal marginal relevance.
30
28
  Automatically uses GPU-optimized search parameters if GPU is available.
@@ -104,9 +102,7 @@ def retrieve_relevant_chunks(
104
102
  filter=filter_dict,
105
103
  )
106
104
 
107
- logger.info(
108
- "Retrieved %d chunks using %s MMR from Milvus", len(results), search_mode
109
- )
105
+ logger.info("Retrieved %d chunks using %s MMR from Milvus", len(results), search_mode)
110
106
 
111
107
  # Log some details about retrieved chunks for debugging
112
108
  if results and logger.isEnabledFor(logging.DEBUG):
@@ -132,10 +128,10 @@ def retrieve_relevant_chunks(
132
128
  def retrieve_relevant_chunks_with_scores(
133
129
  vector_store,
134
130
  query: str,
135
- paper_ids: Optional[List[str]] = None,
131
+ paper_ids: list[str] | None = None,
136
132
  top_k: int = 100,
137
133
  score_threshold: float = 0.0,
138
- ) -> List[tuple[Document, float]]:
134
+ ) -> list[tuple[Document, float]]:
139
135
  """
140
136
  Retrieve chunks with similarity scores, optimized for GPU/CPU.
141
137
 
@@ -186,9 +182,7 @@ def retrieve_relevant_chunks_with_scores(
186
182
  )
187
183
 
188
184
  # Filter by score threshold
189
- filtered_results = [
190
- (doc, score) for doc, score in results if score >= score_threshold
191
- ]
185
+ filtered_results = [(doc, score) for doc, score in results if score >= score_threshold]
192
186
 
193
187
  logger.info(
194
188
  "%s search with scores retrieved %d/%d chunks above threshold %.3f",
@@ -200,6 +194,4 @@ def retrieve_relevant_chunks_with_scores(
200
194
 
201
195
  return filtered_results
202
196
 
203
- raise NotImplementedError(
204
- "Vector store does not support similarity_search_with_score"
205
- )
197
+ raise NotImplementedError("Vector store does not support similarity_search_with_score")
@@ -6,7 +6,7 @@ Handles connection reuse, event loops, and GPU detection caching.
6
6
  import asyncio
7
7
  import logging
8
8
  import threading
9
- from typing import Any, Dict
9
+ from typing import Any
10
10
 
11
11
  from langchain_core.embeddings import Embeddings
12
12
  from langchain_milvus import Milvus
@@ -105,7 +105,7 @@ class VectorstoreSingleton:
105
105
  self,
106
106
  collection_name: str,
107
107
  embedding_model: Embeddings,
108
- connection_args: Dict[str, Any],
108
+ connection_args: dict[str, Any],
109
109
  ) -> Milvus:
110
110
  """Get or create a vector store for a collection."""
111
111
  if collection_name not in self._vector_stores:
@@ -3,8 +3,7 @@ Helper class for question and answer tool in PDF processing.
3
3
  """
4
4
 
5
5
  import logging
6
- from typing import Any, Dict
7
-
6
+ from typing import Any
8
7
 
9
8
  from .get_vectorstore import get_vectorstore
10
9
 
@@ -29,7 +28,7 @@ class QAToolHelper:
29
28
  self.call_id = call_id
30
29
  logger.debug("QAToolHelper started call %s", call_id)
31
30
 
32
- def get_state_models_and_data(self, state: dict) -> tuple[Any, Any, Dict[str, Any]]:
31
+ def get_state_models_and_data(self, state: dict) -> tuple[Any, Any, dict[str, Any]]:
33
32
  """Retrieve embedding model, LLM, and article data from agent state."""
34
33
  text_emb = state.get("text_embedding_model")
35
34
  if not text_emb:
@@ -78,7 +77,7 @@ class QAToolHelper:
78
77
 
79
78
  return vs
80
79
 
81
- def get_hardware_stats(self) -> Dict[str, Any]:
80
+ def get_hardware_stats(self) -> dict[str, Any]:
82
81
  """Get current hardware configuration stats for monitoring."""
83
82
  return {
84
83
  "gpu_available": self.has_gpu,
@@ -5,7 +5,6 @@ and use IP (Inner Product) distance instead.
5
5
  """
6
6
 
7
7
  import logging
8
- from typing import List, Union
9
8
 
10
9
  import numpy as np
11
10
  from langchain_core.embeddings import Embeddings
@@ -13,7 +12,7 @@ from langchain_core.embeddings import Embeddings
13
12
  logger = logging.getLogger(__name__)
14
13
 
15
14
 
16
- def normalize_vector(vector: Union[List[float], np.ndarray]) -> List[float]:
15
+ def normalize_vector(vector: list[float] | np.ndarray) -> list[float]:
17
16
  """
18
17
  Normalize a single vector to unit length.
19
18
 
@@ -34,7 +33,7 @@ def normalize_vector(vector: Union[List[float], np.ndarray]) -> List[float]:
34
33
  return normalized.tolist()
35
34
 
36
35
 
37
- def normalize_vectors_batch(vectors: List[List[float]]) -> List[List[float]]:
36
+ def normalize_vectors_batch(vectors: list[list[float]]) -> list[list[float]]:
38
37
  """
39
38
  Normalize a batch of vectors to unit length.
40
39
 
@@ -56,9 +55,7 @@ def normalize_vectors_batch(vectors: List[List[float]]) -> List[List[float]]:
56
55
  # Handle zero vectors
57
56
  zero_mask = norms.flatten() == 0
58
57
  if np.any(zero_mask):
59
- logger.warning(
60
- "Found %d zero vectors during batch normalization", np.sum(zero_mask)
61
- )
58
+ logger.warning("Found %d zero vectors during batch normalization", np.sum(zero_mask))
62
59
  norms[zero_mask] = 1.0 # Avoid division by zero
63
60
 
64
61
  # Normalize
@@ -85,11 +82,9 @@ class NormalizingEmbeddings(Embeddings):
85
82
  self.normalize_for_gpu = normalize_for_gpu
86
83
 
87
84
  if normalize_for_gpu:
88
- logger.info(
89
- "Embedding model wrapped with normalization for GPU compatibility"
90
- )
85
+ logger.info("Embedding model wrapped with normalization for GPU compatibility")
91
86
 
92
- def embed_documents(self, texts: List[str]) -> List[List[float]]:
87
+ def embed_documents(self, texts: list[str]) -> list[list[float]]:
93
88
  """Embed documents and optionally normalize."""
94
89
  embeddings = self.embedding_model.embed_documents(texts)
95
90
 
@@ -99,7 +94,7 @@ class NormalizingEmbeddings(Embeddings):
99
94
 
100
95
  return embeddings
101
96
 
102
- def embed_query(self, text: str) -> List[float]:
97
+ def embed_query(self, text: str) -> list[float]:
103
98
  """Embed query and optionally normalize."""
104
99
  embedding = self.embedding_model.embed_query(text)
105
100
 
@@ -128,13 +123,9 @@ def should_normalize_vectors(has_gpu: bool, use_cosine: bool) -> bool:
128
123
  needs_normalization = has_gpu and use_cosine
129
124
 
130
125
  if needs_normalization:
131
- logger.info(
132
- "Vector normalization ENABLED: GPU detected with COSINE similarity request"
133
- )
126
+ logger.info("Vector normalization ENABLED: GPU detected with COSINE similarity request")
134
127
  else:
135
- logger.info(
136
- "Vector normalization DISABLED: GPU=%s, COSINE=%s", has_gpu, use_cosine
137
- )
128
+ logger.info("Vector normalization DISABLED: GPU=%s, COSINE=%s", has_gpu, use_cosine)
138
129
 
139
130
  return needs_normalization
140
131
 
@@ -8,7 +8,7 @@ Supports both GPU and CPU configurations.
8
8
  import logging
9
9
  import os
10
10
  import time
11
- from typing import Any, Dict, List, Optional
11
+ from typing import Any
12
12
 
13
13
  from langchain_core.documents import Document
14
14
  from langchain_core.embeddings import Embeddings
@@ -39,7 +39,7 @@ class Vectorstore:
39
39
  def __init__(
40
40
  self,
41
41
  embedding_model: Embeddings,
42
- metadata_fields: Optional[List[str]] = None,
42
+ metadata_fields: list[str] | None = None,
43
43
  config: Any = None,
44
44
  ):
45
45
  """
@@ -96,12 +96,8 @@ class Vectorstore:
96
96
 
97
97
  # Initialize Milvus connection parameters with environment variable fallback
98
98
  self.connection_args = {
99
- "host": (
100
- config.milvus.host if config else os.getenv("MILVUS_HOST", "127.0.0.1")
101
- ),
102
- "port": (
103
- config.milvus.port if config else int(os.getenv("MILVUS_PORT", "19530"))
104
- ),
99
+ "host": (config.milvus.host if config else os.getenv("MILVUS_HOST", "127.0.0.1")),
100
+ "port": (config.milvus.port if config else int(os.getenv("MILVUS_PORT", "19530"))),
105
101
  }
106
102
  # Log the connection parameters being used
107
103
  logger.info(
@@ -109,9 +105,7 @@ class Vectorstore:
109
105
  self.connection_args["host"],
110
106
  self.connection_args["port"],
111
107
  )
112
- self.collection_name = (
113
- config.milvus.collection_name if config else "pdf_rag_documents"
114
- )
108
+ self.collection_name = config.milvus.collection_name if config else "pdf_rag_documents"
115
109
  self.db_name = config.milvus.db_name if config else "pdf_rag_db"
116
110
 
117
111
  # Get singleton instance
@@ -139,8 +133,8 @@ class Vectorstore:
139
133
  self._ensure_collection_loaded()
140
134
 
141
135
  # Store for document metadata (keeping for compatibility)
142
- self.documents: Dict[str, Document] = {}
143
- self.paper_metadata: Dict[str, Dict[str, Any]] = {}
136
+ self.documents: dict[str, Document] = {}
137
+ self.paper_metadata: dict[str, dict[str, Any]] = {}
144
138
 
145
139
  # Log final configuration
146
140
  metric_info = (
@@ -182,9 +176,7 @@ class Vectorstore:
182
176
  langchain_collection = getattr(self.vector_store, "collection", None)
183
177
 
184
178
  if langchain_collection is None:
185
- logger.warning(
186
- "No LangChain collection found, proceeding with empty loaded_papers"
187
- )
179
+ logger.warning("No LangChain collection found, proceeding with empty loaded_papers")
188
180
  return
189
181
 
190
182
  # Force flush and check entity count
@@ -204,14 +196,14 @@ class Vectorstore:
204
196
  )
205
197
 
206
198
  # Extract unique paper IDs
207
- existing_paper_ids = set(result["paper_id"] for result in results)
199
+ existing_paper_ids = {result["paper_id"] for result in results}
208
200
  self.loaded_papers.update(existing_paper_ids)
209
201
 
210
202
  logger.info("Found %d unique papers in collection", len(existing_paper_ids))
211
203
  else:
212
204
  logger.info("Collection is empty - no existing papers")
213
205
 
214
- def similarity_search(self, query: str, **kwargs: Any) -> List[Document]:
206
+ def similarity_search(self, query: str, **kwargs: Any) -> list[Document]:
215
207
  """
216
208
  Perform similarity search on the vector store.
217
209
  Query embedding will be automatically normalized if using GPU with COSINE.
@@ -222,7 +214,7 @@ class Vectorstore:
222
214
  """
223
215
  # Extract our parameters
224
216
  k: int = kwargs.pop("k", 4)
225
- filter_: Optional[Dict[str, Any]] = kwargs.pop("filter", None)
217
+ filter_: dict[str, Any] | None = kwargs.pop("filter", None)
226
218
 
227
219
  # Build Milvus expr from filter_, if present
228
220
  expr = None
@@ -232,22 +224,16 @@ class Vectorstore:
232
224
  if isinstance(value, str):
233
225
  conditions.append(f'{key} == "{value}"')
234
226
  elif isinstance(value, list):
235
- vals = ", ".join(
236
- f'"{v}"' if isinstance(v, str) else str(v) for v in value
237
- )
227
+ vals = ", ".join(f'"{v}"' if isinstance(v, str) else str(v) for v in value)
238
228
  conditions.append(f"{key} in [{vals}]")
239
229
  else:
240
230
  conditions.append(f"{key} == {value}")
241
231
  expr = " and ".join(conditions)
242
232
 
243
233
  # Delegate to the wrapped store
244
- return self.vector_store.similarity_search(
245
- query=query, k=k, expr=expr, **kwargs
246
- )
234
+ return self.vector_store.similarity_search(query=query, k=k, expr=expr, **kwargs)
247
235
 
248
- def max_marginal_relevance_search(
249
- self, query: str, **kwargs: Any
250
- ) -> List[Document]:
236
+ def max_marginal_relevance_search(self, query: str, **kwargs: Any) -> list[Document]:
251
237
  """
252
238
  Perform MMR search on the vector store.
253
239
  Query embedding will be automatically normalized if using GPU with COSINE.
@@ -262,7 +248,7 @@ class Vectorstore:
262
248
  k: int = kwargs.pop("k", 4)
263
249
  fetch_k: int = kwargs.pop("fetch_k", 20)
264
250
  lambda_mult: float = kwargs.pop("lambda_mult", 0.5)
265
- filter_: Optional[Dict[str, Any]] = kwargs.pop("filter", None)
251
+ filter_: dict[str, Any] | None = kwargs.pop("filter", None)
266
252
 
267
253
  # Build Milvus expr from filter_, if present
268
254
  expr = None
@@ -272,9 +258,7 @@ class Vectorstore:
272
258
  if isinstance(value, str):
273
259
  conditions.append(f'{key} == "{value}"')
274
260
  elif isinstance(value, list):
275
- vals = ", ".join(
276
- f'"{v}"' if isinstance(v, str) else str(v) for v in value
277
- )
261
+ vals = ", ".join(f'"{v}"' if isinstance(v, str) else str(v) for v in value)
278
262
  conditions.append(f"{key} in [{vals}]")
279
263
  else:
280
264
  conditions.append(f"{key} == {value}")
@@ -330,7 +314,7 @@ class Vectorstore:
330
314
  else:
331
315
  logger.info("Collection is empty, skipping load operation")
332
316
 
333
- def get_embedding_info(self) -> Dict[str, Any]:
317
+ def get_embedding_info(self) -> dict[str, Any]:
334
318
  """Get information about the embedding configuration."""
335
319
  return {
336
320
  "has_gpu": self.has_gpu,
@@ -2,12 +2,14 @@
2
2
  This file is used to import all the modules in the package.
3
3
  """
4
4
 
5
- from . import display_dataframe
6
- from . import multi_paper_rec
7
- from . import search
8
- from . import single_paper_rec
9
- from . import query_dataframe
10
- from . import retrieve_semantic_scholar_paper_id
5
+ from . import (
6
+ display_dataframe,
7
+ multi_paper_rec,
8
+ query_dataframe,
9
+ retrieve_semantic_scholar_paper_id,
10
+ search,
11
+ single_paper_rec,
12
+ )
11
13
 
12
14
  __all__ = [
13
15
  "display_dataframe",
@@ -12,17 +12,15 @@ only displays the existing list. If no papers are available, it raises NoPapersF
12
12
  to signal that a search or recommendation must be executed first.
13
13
  """
14
14
 
15
-
16
15
  import logging
17
-
18
16
  from typing import Annotated
19
- from pydantic import BaseModel, Field
17
+
20
18
  from langchain_core.messages import ToolMessage
21
19
  from langchain_core.tools import tool
22
20
  from langchain_core.tools.base import InjectedToolCallId
23
21
  from langgraph.prebuilt import InjectedState
24
22
  from langgraph.types import Command
25
-
23
+ from pydantic import BaseModel, Field
26
24
 
27
25
  # Configure logging
28
26
  logging.basicConfig(level=logging.INFO)
@@ -97,9 +95,7 @@ def display_dataframe(
97
95
  artifact = state.get(context_val)
98
96
  if not artifact:
99
97
  logger.info("No papers found in state, raising NoPapersFoundError")
100
- raise NoPapersFoundError(
101
- "No papers found. A search/rec needs to be performed first."
102
- )
98
+ raise NoPapersFoundError("No papers found. A search/rec needs to be performed first.")
103
99
  content = f"{len(artifact)} papers found. Papers are attached as an artifact."
104
100
  return Command(
105
101
  update={
@@ -9,14 +9,15 @@ of recommended papers.
9
9
  """
10
10
 
11
11
  import logging
12
- from typing import Annotated, Any, List, Optional
12
+ from typing import Annotated, Any
13
+
13
14
  from langchain_core.messages import ToolMessage
14
15
  from langchain_core.tools import tool
15
16
  from langchain_core.tools.base import InjectedToolCallId
16
17
  from langgraph.types import Command
17
18
  from pydantic import BaseModel, Field
18
- from .utils.multi_helper import MultiPaperRecData
19
19
 
20
+ from .utils.multi_helper import MultiPaperRecData
20
21
 
21
22
  # Configure logging
22
23
  logging.basicConfig(level=logging.INFO)
@@ -34,7 +35,7 @@ class MultiPaperRecInput(BaseModel):
34
35
  tool_call_id: Internal tool call identifier injected by the system.
35
36
  """
36
37
 
37
- paper_ids: List[str] = Field(
38
+ paper_ids: list[str] = Field(
38
39
  description="List of 40-character Semantic Scholar Paper IDs"
39
40
  "(at least two) to base recommendations on"
40
41
  )
@@ -44,7 +45,7 @@ class MultiPaperRecInput(BaseModel):
44
45
  ge=1,
45
46
  le=500,
46
47
  )
47
- year: Optional[str] = Field(
48
+ year: str | None = Field(
48
49
  default=None,
49
50
  description="Publication year filter; supports formats:"
50
51
  "'YYYY', 'YYYY-', '-YYYY', 'YYYY:YYYY'",
@@ -59,10 +60,10 @@ class MultiPaperRecInput(BaseModel):
59
60
  parse_docstring=True,
60
61
  )
61
62
  def get_multi_paper_recommendations(
62
- paper_ids: List[str],
63
+ paper_ids: list[str],
63
64
  tool_call_id: Annotated[str, InjectedToolCallId],
64
65
  limit: int = 10,
65
- year: Optional[str] = None,
66
+ year: str | None = None,
66
67
  ) -> Command[Any]:
67
68
  """
68
69
  Recommend related research papers using the Semantic Scholar API.