aurelian 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (266) hide show
  1. aurelian/__init__.py +9 -0
  2. aurelian/agents/__init__.py +0 -0
  3. aurelian/agents/amigo/__init__.py +3 -0
  4. aurelian/agents/amigo/amigo_agent.py +77 -0
  5. aurelian/agents/amigo/amigo_config.py +85 -0
  6. aurelian/agents/amigo/amigo_evals.py +73 -0
  7. aurelian/agents/amigo/amigo_gradio.py +52 -0
  8. aurelian/agents/amigo/amigo_mcp.py +152 -0
  9. aurelian/agents/amigo/amigo_tools.py +152 -0
  10. aurelian/agents/biblio/__init__.py +42 -0
  11. aurelian/agents/biblio/biblio_agent.py +95 -0
  12. aurelian/agents/biblio/biblio_config.py +40 -0
  13. aurelian/agents/biblio/biblio_gradio.py +67 -0
  14. aurelian/agents/biblio/biblio_mcp.py +115 -0
  15. aurelian/agents/biblio/biblio_tools.py +164 -0
  16. aurelian/agents/biblio_agent.py +46 -0
  17. aurelian/agents/checklist/__init__.py +44 -0
  18. aurelian/agents/checklist/checklist_agent.py +86 -0
  19. aurelian/agents/checklist/checklist_config.py +28 -0
  20. aurelian/agents/checklist/checklist_gradio.py +70 -0
  21. aurelian/agents/checklist/checklist_mcp.py +86 -0
  22. aurelian/agents/checklist/checklist_tools.py +141 -0
  23. aurelian/agents/checklist/content/checklists.yaml +7 -0
  24. aurelian/agents/checklist/content/streams.csv +136 -0
  25. aurelian/agents/checklist_agent.py +40 -0
  26. aurelian/agents/chemistry/__init__.py +3 -0
  27. aurelian/agents/chemistry/chemistry_agent.py +47 -0
  28. aurelian/agents/chemistry/chemistry_config.py +71 -0
  29. aurelian/agents/chemistry/chemistry_evals.py +79 -0
  30. aurelian/agents/chemistry/chemistry_gradio.py +50 -0
  31. aurelian/agents/chemistry/chemistry_mcp.py +120 -0
  32. aurelian/agents/chemistry/chemistry_tools.py +121 -0
  33. aurelian/agents/chemistry/image_agent.py +15 -0
  34. aurelian/agents/d4d/__init__.py +30 -0
  35. aurelian/agents/d4d/d4d_agent.py +73 -0
  36. aurelian/agents/d4d/d4d_config.py +46 -0
  37. aurelian/agents/d4d/d4d_gradio.py +58 -0
  38. aurelian/agents/d4d/d4d_mcp.py +71 -0
  39. aurelian/agents/d4d/d4d_tools.py +157 -0
  40. aurelian/agents/d4d_agent.py +64 -0
  41. aurelian/agents/diagnosis/__init__.py +33 -0
  42. aurelian/agents/diagnosis/diagnosis_agent.py +54 -0
  43. aurelian/agents/diagnosis/diagnosis_config.py +48 -0
  44. aurelian/agents/diagnosis/diagnosis_evals.py +76 -0
  45. aurelian/agents/diagnosis/diagnosis_gradio.py +52 -0
  46. aurelian/agents/diagnosis/diagnosis_mcp.py +141 -0
  47. aurelian/agents/diagnosis/diagnosis_tools.py +204 -0
  48. aurelian/agents/diagnosis_agent.py +28 -0
  49. aurelian/agents/draw/__init__.py +3 -0
  50. aurelian/agents/draw/draw_agent.py +39 -0
  51. aurelian/agents/draw/draw_config.py +26 -0
  52. aurelian/agents/draw/draw_gradio.py +50 -0
  53. aurelian/agents/draw/draw_mcp.py +94 -0
  54. aurelian/agents/draw/draw_tools.py +100 -0
  55. aurelian/agents/draw/judge_agent.py +18 -0
  56. aurelian/agents/filesystem/__init__.py +0 -0
  57. aurelian/agents/filesystem/filesystem_config.py +27 -0
  58. aurelian/agents/filesystem/filesystem_gradio.py +49 -0
  59. aurelian/agents/filesystem/filesystem_mcp.py +89 -0
  60. aurelian/agents/filesystem/filesystem_tools.py +95 -0
  61. aurelian/agents/filesystem/py.typed +0 -0
  62. aurelian/agents/github/__init__.py +0 -0
  63. aurelian/agents/github/github_agent.py +83 -0
  64. aurelian/agents/github/github_cli.py +248 -0
  65. aurelian/agents/github/github_config.py +22 -0
  66. aurelian/agents/github/github_gradio.py +152 -0
  67. aurelian/agents/github/github_mcp.py +252 -0
  68. aurelian/agents/github/github_tools.py +408 -0
  69. aurelian/agents/github/github_tools.py.tmp +413 -0
  70. aurelian/agents/goann/__init__.py +13 -0
  71. aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines.md +1000 -0
  72. aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines.pdf +0 -0
  73. aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines_Paper.md +693 -0
  74. aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines_Paper.pdf +0 -0
  75. aurelian/agents/goann/goann_agent.py +90 -0
  76. aurelian/agents/goann/goann_config.py +90 -0
  77. aurelian/agents/goann/goann_evals.py +104 -0
  78. aurelian/agents/goann/goann_gradio.py +62 -0
  79. aurelian/agents/goann/goann_mcp.py +0 -0
  80. aurelian/agents/goann/goann_tools.py +65 -0
  81. aurelian/agents/gocam/__init__.py +52 -0
  82. aurelian/agents/gocam/documents/DNA-binding transcription factor activity annotation guidelines.docx +0 -0
  83. aurelian/agents/gocam/documents/DNA-binding transcription factor activity annotation guidelines.pdf +0 -0
  84. aurelian/agents/gocam/documents/DNA-binding_transcription_factor_activity_annotation_guidelines.md +100 -0
  85. aurelian/agents/gocam/documents/E3 ubiquitin ligases.docx +0 -0
  86. aurelian/agents/gocam/documents/E3 ubiquitin ligases.pdf +0 -0
  87. aurelian/agents/gocam/documents/E3_ubiquitin_ligases.md +134 -0
  88. aurelian/agents/gocam/documents/GO-CAM annotation guidelines README.docx +0 -0
  89. aurelian/agents/gocam/documents/GO-CAM annotation guidelines README.pdf +0 -0
  90. aurelian/agents/gocam/documents/GO-CAM modelling guidelines TO DO.docx +0 -0
  91. aurelian/agents/gocam/documents/GO-CAM modelling guidelines TO DO.pdf +0 -0
  92. aurelian/agents/gocam/documents/GO-CAM_annotation_guidelines_README.md +1 -0
  93. aurelian/agents/gocam/documents/GO-CAM_modelling_guidelines_TO_DO.md +3 -0
  94. aurelian/agents/gocam/documents/How to annotate complexes in GO-CAM.docx +0 -0
  95. aurelian/agents/gocam/documents/How to annotate complexes in GO-CAM.pdf +0 -0
  96. aurelian/agents/gocam/documents/How to annotate molecular adaptors.docx +0 -0
  97. aurelian/agents/gocam/documents/How to annotate molecular adaptors.pdf +0 -0
  98. aurelian/agents/gocam/documents/How to annotate sequestering proteins.docx +0 -0
  99. aurelian/agents/gocam/documents/How to annotate sequestering proteins.pdf +0 -0
  100. aurelian/agents/gocam/documents/How_to_annotate_complexes_in_GO-CAM.md +29 -0
  101. aurelian/agents/gocam/documents/How_to_annotate_molecular_adaptors.md +31 -0
  102. aurelian/agents/gocam/documents/How_to_annotate_sequestering_proteins.md +42 -0
  103. aurelian/agents/gocam/documents/Molecular adaptor activity.docx +0 -0
  104. aurelian/agents/gocam/documents/Molecular adaptor activity.pdf +0 -0
  105. aurelian/agents/gocam/documents/Molecular carrier activity.docx +0 -0
  106. aurelian/agents/gocam/documents/Molecular carrier activity.pdf +0 -0
  107. aurelian/agents/gocam/documents/Molecular_adaptor_activity.md +51 -0
  108. aurelian/agents/gocam/documents/Molecular_carrier_activity.md +41 -0
  109. aurelian/agents/gocam/documents/Protein sequestering activity.docx +0 -0
  110. aurelian/agents/gocam/documents/Protein sequestering activity.pdf +0 -0
  111. aurelian/agents/gocam/documents/Protein_sequestering_activity.md +50 -0
  112. aurelian/agents/gocam/documents/Signaling receptor activity annotation guidelines.docx +0 -0
  113. aurelian/agents/gocam/documents/Signaling receptor activity annotation guidelines.pdf +0 -0
  114. aurelian/agents/gocam/documents/Signaling_receptor_activity_annotation_guidelines.md +187 -0
  115. aurelian/agents/gocam/documents/Transcription coregulator activity.docx +0 -0
  116. aurelian/agents/gocam/documents/Transcription coregulator activity.pdf +0 -0
  117. aurelian/agents/gocam/documents/Transcription_coregulator_activity.md +36 -0
  118. aurelian/agents/gocam/documents/Transporter activity annotation annotation guidelines.docx +0 -0
  119. aurelian/agents/gocam/documents/Transporter activity annotation annotation guidelines.pdf +0 -0
  120. aurelian/agents/gocam/documents/Transporter_activity_annotation_annotation_guidelines.md +43 -0
  121. Regulatory Processes in GO-CAM.docx +0 -0
  122. Regulatory Processes in GO-CAM.pdf +0 -0
  123. aurelian/agents/gocam/documents/WIP_-_Regulation_and_Regulatory_Processes_in_GO-CAM.md +31 -0
  124. aurelian/agents/gocam/documents/md/DNA-binding_transcription_factor_activity_annotation_guidelines.md +131 -0
  125. aurelian/agents/gocam/documents/md/E3_ubiquitin_ligases.md +166 -0
  126. aurelian/agents/gocam/documents/md/GO-CAM_annotation_guidelines_README.md +1 -0
  127. aurelian/agents/gocam/documents/md/GO-CAM_modelling_guidelines_TO_DO.md +5 -0
  128. aurelian/agents/gocam/documents/md/How_to_annotate_complexes_in_GO-CAM.md +28 -0
  129. aurelian/agents/gocam/documents/md/How_to_annotate_molecular_adaptors.md +19 -0
  130. aurelian/agents/gocam/documents/md/How_to_annotate_sequestering_proteins.md +38 -0
  131. aurelian/agents/gocam/documents/md/Molecular_adaptor_activity.md +52 -0
  132. aurelian/agents/gocam/documents/md/Molecular_carrier_activity.md +59 -0
  133. aurelian/agents/gocam/documents/md/Protein_sequestering_activity.md +52 -0
  134. aurelian/agents/gocam/documents/md/Signaling_receptor_activity_annotation_guidelines.md +271 -0
  135. aurelian/agents/gocam/documents/md/Transcription_coregulator_activity.md +54 -0
  136. aurelian/agents/gocam/documents/md/Transporter_activity_annotation_annotation_guidelines.md +38 -0
  137. aurelian/agents/gocam/documents/md/WIP_-_Regulation_and_Regulatory_Processes_in_GO-CAM.md +39 -0
  138. aurelian/agents/gocam/documents/pandoc_md/Signaling_receptor_activity_annotation_guidelines.md +334 -0
  139. aurelian/agents/gocam/gocam_agent.py +243 -0
  140. aurelian/agents/gocam/gocam_config.py +85 -0
  141. aurelian/agents/gocam/gocam_curator_agent.py +46 -0
  142. aurelian/agents/gocam/gocam_evals.py +64 -0
  143. aurelian/agents/gocam/gocam_gradio.py +89 -0
  144. aurelian/agents/gocam/gocam_mcp.py +224 -0
  145. aurelian/agents/gocam/gocam_tools.py +294 -0
  146. aurelian/agents/linkml/__init__.py +0 -0
  147. aurelian/agents/linkml/linkml_agent.py +62 -0
  148. aurelian/agents/linkml/linkml_config.py +48 -0
  149. aurelian/agents/linkml/linkml_evals.py +66 -0
  150. aurelian/agents/linkml/linkml_gradio.py +45 -0
  151. aurelian/agents/linkml/linkml_mcp.py +181 -0
  152. aurelian/agents/linkml/linkml_tools.py +102 -0
  153. aurelian/agents/literature/__init__.py +3 -0
  154. aurelian/agents/literature/literature_agent.py +75 -0
  155. aurelian/agents/literature/literature_config.py +35 -0
  156. aurelian/agents/literature/literature_gradio.py +52 -0
  157. aurelian/agents/literature/literature_mcp.py +174 -0
  158. aurelian/agents/literature/literature_tools.py +182 -0
  159. aurelian/agents/monarch/__init__.py +0 -0
  160. aurelian/agents/monarch/monarch_agent.py +45 -0
  161. aurelian/agents/monarch/monarch_config.py +45 -0
  162. aurelian/agents/monarch/monarch_gradio.py +51 -0
  163. aurelian/agents/monarch/monarch_mcp.py +65 -0
  164. aurelian/agents/monarch/monarch_tools.py +112 -0
  165. aurelian/agents/oak/__init__.py +0 -0
  166. aurelian/agents/oak/oak_config.py +27 -0
  167. aurelian/agents/oak/oak_gradio.py +57 -0
  168. aurelian/agents/ontology_mapper/__init__.py +31 -0
  169. aurelian/agents/ontology_mapper/ontology_mapper_agent.py +57 -0
  170. aurelian/agents/ontology_mapper/ontology_mapper_config.py +50 -0
  171. aurelian/agents/ontology_mapper/ontology_mapper_evals.py +108 -0
  172. aurelian/agents/ontology_mapper/ontology_mapper_gradio.py +58 -0
  173. aurelian/agents/ontology_mapper/ontology_mapper_mcp.py +81 -0
  174. aurelian/agents/ontology_mapper/ontology_mapper_tools.py +147 -0
  175. aurelian/agents/paperqa/__init__.py +27 -0
  176. aurelian/agents/paperqa/paperqa_agent.py +66 -0
  177. aurelian/agents/paperqa/paperqa_cli.py +305 -0
  178. aurelian/agents/paperqa/paperqa_config.py +142 -0
  179. aurelian/agents/paperqa/paperqa_gradio.py +90 -0
  180. aurelian/agents/paperqa/paperqa_mcp.py +155 -0
  181. aurelian/agents/paperqa/paperqa_tools.py +566 -0
  182. aurelian/agents/phenopackets/__init__.py +3 -0
  183. aurelian/agents/phenopackets/phenopackets_agent.py +58 -0
  184. aurelian/agents/phenopackets/phenopackets_config.py +72 -0
  185. aurelian/agents/phenopackets/phenopackets_evals.py +99 -0
  186. aurelian/agents/phenopackets/phenopackets_gradio.py +55 -0
  187. aurelian/agents/phenopackets/phenopackets_mcp.py +178 -0
  188. aurelian/agents/phenopackets/phenopackets_tools.py +127 -0
  189. aurelian/agents/rag/__init__.py +40 -0
  190. aurelian/agents/rag/rag_agent.py +84 -0
  191. aurelian/agents/rag/rag_config.py +80 -0
  192. aurelian/agents/rag/rag_gradio.py +67 -0
  193. aurelian/agents/rag/rag_mcp.py +107 -0
  194. aurelian/agents/rag/rag_tools.py +189 -0
  195. aurelian/agents/rag_agent.py +54 -0
  196. aurelian/agents/robot/__init__.py +0 -0
  197. aurelian/agents/robot/assets/__init__.py +3 -0
  198. aurelian/agents/robot/assets/template.md +384 -0
  199. aurelian/agents/robot/robot_config.py +25 -0
  200. aurelian/agents/robot/robot_gradio.py +46 -0
  201. aurelian/agents/robot/robot_mcp.py +100 -0
  202. aurelian/agents/robot/robot_ontology_agent.py +139 -0
  203. aurelian/agents/robot/robot_tools.py +50 -0
  204. aurelian/agents/talisman/__init__.py +3 -0
  205. aurelian/agents/talisman/__main__.py +17 -0
  206. aurelian/agents/talisman/cli.py +70 -0
  207. aurelian/agents/talisman/run_talisman.py +18 -0
  208. aurelian/agents/talisman/talisman_agent.py +143 -0
  209. aurelian/agents/talisman/talisman_config.py +66 -0
  210. aurelian/agents/talisman/talisman_gradio.py +50 -0
  211. aurelian/agents/talisman/talisman_mcp.py +75 -0
  212. aurelian/agents/talisman/talisman_tools.py +962 -0
  213. aurelian/agents/ubergraph/__init__.py +40 -0
  214. aurelian/agents/ubergraph/ubergraph_agent.py +72 -0
  215. aurelian/agents/ubergraph/ubergraph_config.py +79 -0
  216. aurelian/agents/ubergraph/ubergraph_gradio.py +48 -0
  217. aurelian/agents/ubergraph/ubergraph_mcp.py +69 -0
  218. aurelian/agents/ubergraph/ubergraph_tools.py +118 -0
  219. aurelian/agents/uniprot/__init__.py +0 -0
  220. aurelian/agents/uniprot/uniprot_agent.py +43 -0
  221. aurelian/agents/uniprot/uniprot_config.py +43 -0
  222. aurelian/agents/uniprot/uniprot_evals.py +99 -0
  223. aurelian/agents/uniprot/uniprot_gradio.py +48 -0
  224. aurelian/agents/uniprot/uniprot_mcp.py +168 -0
  225. aurelian/agents/uniprot/uniprot_tools.py +136 -0
  226. aurelian/agents/web/__init__.py +0 -0
  227. aurelian/agents/web/web_config.py +27 -0
  228. aurelian/agents/web/web_gradio.py +48 -0
  229. aurelian/agents/web/web_mcp.py +50 -0
  230. aurelian/agents/web/web_tools.py +121 -0
  231. aurelian/chat.py +23 -0
  232. aurelian/cli.py +1004 -0
  233. aurelian/dependencies/__init__.py +0 -0
  234. aurelian/dependencies/workdir.py +78 -0
  235. aurelian/evaluators/model.py +9 -0
  236. aurelian/evaluators/substring_evaluator.py +30 -0
  237. aurelian/mcp/__init__.py +0 -0
  238. aurelian/mcp/amigo_mcp_test.py +86 -0
  239. aurelian/mcp/config_generator.py +123 -0
  240. aurelian/mcp/example_config.json +43 -0
  241. aurelian/mcp/generate_sample_config.py +37 -0
  242. aurelian/mcp/gocam_mcp_test.py +126 -0
  243. aurelian/mcp/linkml_mcp_tools.py +190 -0
  244. aurelian/mcp/mcp_discovery.py +87 -0
  245. aurelian/mcp/mcp_test.py +31 -0
  246. aurelian/mcp/phenopackets_mcp_test.py +103 -0
  247. aurelian/tools/__init__.py +0 -0
  248. aurelian/tools/web/__init__.py +0 -0
  249. aurelian/tools/web/url_download.py +51 -0
  250. aurelian/utils/__init__.py +0 -0
  251. aurelian/utils/async_utils.py +18 -0
  252. aurelian/utils/data_utils.py +32 -0
  253. aurelian/utils/documentation_manager.py +59 -0
  254. aurelian/utils/doi_fetcher.py +238 -0
  255. aurelian/utils/ontology_utils.py +68 -0
  256. aurelian/utils/pdf_fetcher.py +23 -0
  257. aurelian/utils/process_logs.py +100 -0
  258. aurelian/utils/pubmed_utils.py +238 -0
  259. aurelian/utils/pytest_report_to_markdown.py +67 -0
  260. aurelian/utils/robot_ontology_utils.py +112 -0
  261. aurelian/utils/search_utils.py +95 -0
  262. aurelian-0.1.0.dist-info/LICENSE +22 -0
  263. aurelian-0.1.0.dist-info/METADATA +109 -0
  264. aurelian-0.1.0.dist-info/RECORD +266 -0
  265. aurelian-0.1.0.dist-info/WHEEL +4 -0
  266. aurelian-0.1.0.dist-info/entry_points.txt +4 -0
@@ -0,0 +1,566 @@
1
+ """
2
+ Tools for the PaperQA agent.
3
+ """
4
+ import os
5
+ import logging
6
+ from pathlib import Path
7
+ from typing import List, Dict, Any, Optional
8
+
9
+ from pydantic_ai import RunContext, ModelRetry
10
+
11
+ from paperqa import Docs, agent_query
12
+ from paperqa.agents.search import get_directory_index
13
+
14
+ from .paperqa_config import PaperQADependencies
15
+
16
+
17
+ def create_response(success: bool, paper_directory: str, doc_files: dict,
18
+ indexed_files: Optional[dict] = None, **kwargs) -> dict:
19
+ """Create a standardized response dictionary.
20
+
21
+ Args:
22
+ success: Whether the operation was successful
23
+ paper_directory: Path to the paper directory
24
+ doc_files: Dictionary with document files by type
25
+ indexed_files: Optional dictionary of indexed files
26
+ **kwargs: Additional key-value pairs to include in the response
27
+
28
+ Returns:
29
+ A standardized response dictionary
30
+ """
31
+ document_counts = {
32
+ 'total': len(doc_files['all']),
33
+ 'pdf': len(doc_files['pdf']),
34
+ 'txt': len(doc_files['txt']),
35
+ 'html': len(doc_files['html']),
36
+ 'md': len(doc_files['md']),
37
+ }
38
+
39
+ response = {
40
+ "success": success,
41
+ "paper_directory": paper_directory,
42
+ "document_counts": document_counts,
43
+ }
44
+
45
+ if indexed_files is not None:
46
+ response["indexed_chunks_count"] = len(indexed_files)
47
+ response["indexed_papers"] = list(indexed_files.keys()) if hasattr(indexed_files, 'keys') else []
48
+
49
+ response.update(kwargs)
50
+
51
+ return response
52
+
53
+ logger = logging.getLogger(__name__)
54
+
55
+
56
+ def get_document_files(directory: str) -> Dict[str, List[str]]:
57
+ """
58
+ Get all indexable document files in the given directory.
59
+
60
+ Args:
61
+ directory: Directory to search for document files
62
+
63
+ Returns:
64
+ dict: Dictionary with file lists by type and a combined list
65
+ """
66
+ document_extensions = ['.pdf', '.txt', '.html', '.md']
67
+ all_files = []
68
+
69
+ dir_path = Path(directory)
70
+ if dir_path.exists() and dir_path.is_dir():
71
+ all_files = [f.name for f in dir_path.iterdir()
72
+ if f.is_file() and any(f.name.lower().endswith(ext) for ext in document_extensions)]
73
+
74
+ return {
75
+ 'all': all_files,
76
+ 'pdf': [f for f in all_files if f.lower().endswith('.pdf')],
77
+ 'txt': [f for f in all_files if f.lower().endswith('.txt')],
78
+ 'html': [f for f in all_files if f.lower().endswith('.html')],
79
+ 'md': [f for f in all_files if f.lower().endswith('.md')],
80
+ }
81
+
82
+
83
+ async def search_papers(
84
+ ctx: RunContext[PaperQADependencies],
85
+ query: str,
86
+ max_papers: Optional[int] = None,
87
+ ) -> Any:
88
+ """
89
+ Search for papers relevant to the query using PaperQA.
90
+
91
+ Args:
92
+ ctx: The run context
93
+ query: The search query
94
+ max_papers: Maximum number of papers to return (overrides config)
95
+
96
+ Returns:
97
+ A simplified response with paper details and metadata
98
+ """
99
+ try:
100
+ settings = ctx.deps.set_paperqa_settings()
101
+
102
+ if max_papers is not None:
103
+ settings.agent.search_count = max_papers
104
+
105
+ try:
106
+ index = await get_directory_index(settings=settings, build=False)
107
+ index_files = await index.index_files
108
+ logger.info(f"Found existing index with {len(index_files)} files")
109
+ except Exception as e:
110
+ # If the error is about an empty index, try to build it
111
+ if "was empty, please rebuild it" in str(e):
112
+ logger.info("Index is empty, attempting to rebuild...")
113
+ index = await get_directory_index(settings=settings, build=True)
114
+ index_files = await index.index_files
115
+
116
+ if not index_files:
117
+ return {
118
+ "message": "No papers are currently indexed. You can add papers using the add_paper function.",
119
+ "papers": []
120
+ }
121
+ else:
122
+ raise
123
+
124
+ response = await agent_query(
125
+ query=f"Find scientific papers about: {query}",
126
+ settings=settings
127
+ )
128
+
129
+ return response
130
+ except Exception as e:
131
+ if "ModelRetry" in str(type(e)):
132
+ raise e
133
+
134
+ if "was empty, please rebuild it" in str(e):
135
+ return {
136
+ "message": "No papers are currently indexed. You can add papers using the add_paper function.",
137
+ "papers": []
138
+ }
139
+
140
+ raise ModelRetry(f"Error searching papers: {str(e)}")
141
+
142
+
143
+ async def query_papers(
144
+ ctx: RunContext[PaperQADependencies],
145
+ query: str,
146
+ ) -> Any:
147
+ """
148
+ Query the papers to answer a specific question using PaperQA.
149
+
150
+ Args:
151
+ ctx: The run context
152
+ query: The question to answer based on the papers
153
+
154
+ Returns:
155
+ The full PQASession object with the answer and context
156
+ """
157
+ try:
158
+ settings = ctx.deps.set_paperqa_settings()
159
+
160
+ try:
161
+ # First try to get the index without building
162
+ index = await get_directory_index(settings=settings, build=False)
163
+ index_files = await index.index_files
164
+
165
+ # If we get here, the index exists and has files
166
+ if not index_files:
167
+ return {
168
+ "message": "No papers are currently indexed. You can add papers using the add_paper function.",
169
+ "papers": []
170
+ }
171
+ except Exception as e:
172
+ if "was empty, please rebuild it" in str(e):
173
+ return {
174
+ "message": "No papers are currently indexed. You can add papers using the add_paper function.",
175
+ "papers": []
176
+ }
177
+ else:
178
+ raise
179
+
180
+ response = await agent_query(
181
+ query=query,
182
+ settings=settings
183
+ )
184
+
185
+ return response
186
+ except Exception as e:
187
+ if "ModelRetry" in str(type(e)):
188
+ raise e
189
+
190
+ if "was empty, please rebuild it" in str(e):
191
+ return {
192
+ "message": "No papers are currently indexed. You can add papers using the add_paper function.",
193
+ "papers": []
194
+ }
195
+
196
+ raise ModelRetry(f"Error querying papers: {str(e)}")
197
+
198
+
199
+ async def build_index(
200
+ ctx: RunContext[PaperQADependencies],
201
+ ) -> Any:
202
+ """
203
+ Rebuild the search index for papers.
204
+
205
+ Args:
206
+ ctx: The run context
207
+
208
+ Returns:
209
+ Information about the indexing process
210
+ """
211
+ try:
212
+
213
+ settings = ctx.deps.set_paperqa_settings()
214
+ paper_directory = settings.agent.index.paper_directory
215
+
216
+ os.makedirs(paper_directory, exist_ok=True)
217
+
218
+ doc_files = get_document_files(paper_directory)
219
+
220
+ if not doc_files['all']:
221
+ return create_response(
222
+ success=True,
223
+ paper_directory=paper_directory,
224
+ doc_files=doc_files,
225
+ indexed_files={},
226
+ message=f"No indexable documents found in {paper_directory}. Add documents (PDF, TXT, HTML, MD) to this directory before indexing."
227
+ )
228
+
229
+ try:
230
+ logger.info(f"Building index for {len(doc_files['all'])} documents in {paper_directory}:")
231
+ if doc_files['pdf']:
232
+ logger.info(f" - {len(doc_files['pdf'])} PDF files")
233
+ if doc_files['txt']:
234
+ logger.info(f" - {len(doc_files['txt'])} text files")
235
+ if doc_files['html']:
236
+ logger.info(f" - {len(doc_files['html'])} HTML files")
237
+ if doc_files['md']:
238
+ logger.info(f" - {len(doc_files['md'])} Markdown files")
239
+
240
+ index = await get_directory_index(settings=settings, build=True)
241
+ index_files = await index.index_files
242
+
243
+ if not index_files:
244
+ return create_response(
245
+ success=True,
246
+ paper_directory=paper_directory,
247
+ doc_files=doc_files,
248
+ indexed_files={},
249
+ documents_found=doc_files,
250
+ message=f"Found {len(doc_files['all'])} documents but none were successfully indexed. This could be due to parsing issues with the documents."
251
+ )
252
+
253
+ return create_response(
254
+ success=True,
255
+ paper_directory=paper_directory,
256
+ doc_files=doc_files,
257
+ indexed_files=index_files,
258
+ message=f"Successfully indexed {len(index_files)} document chunks from {len(doc_files['all'])} files."
259
+ )
260
+ except Exception as e:
261
+ return create_response(
262
+ success=False,
263
+ paper_directory=paper_directory,
264
+ doc_files=doc_files,
265
+ message=f"Error indexing documents: {str(e)}",
266
+ error=str(e)
267
+ )
268
+ except Exception as e:
269
+ if "ModelRetry" in str(type(e)):
270
+ raise e
271
+ raise ModelRetry(f"Error building index: {str(e)}")
272
+
273
+
274
+ async def add_paper(
275
+ ctx: RunContext[PaperQADependencies],
276
+ path: str,
277
+ citation: Optional[str] = None,
278
+ auto_index: bool = True,
279
+ ) -> Any:
280
+ """
281
+ Add a specific paper to the collection.
282
+
283
+ Args:
284
+ ctx: The run context
285
+ path: Path to the paper file or URL
286
+ citation: Optional citation for the paper
287
+ auto_index: Whether to automatically rebuild the index after adding the paper
288
+
289
+ Returns:
290
+ Information about the added paper
291
+ """
292
+ try:
293
+ settings = ctx.deps.set_paperqa_settings()
294
+
295
+ paper_directory = settings.agent.index.paper_directory
296
+ os.makedirs(paper_directory, exist_ok=True)
297
+
298
+ # For URLs, we need to:
299
+ # 1. Download the PDF
300
+ # 2. Save it to the paper directory
301
+ # 3. Process it with Docs
302
+
303
+ if path.startswith(("http://", "https://")):
304
+ import requests
305
+ from urllib.parse import urlparse
306
+
307
+ url_parts = urlparse(path)
308
+ file_name = os.path.basename(url_parts.path)
309
+ if not file_name or not file_name.lower().endswith('.pdf'):
310
+ file_name = "paper.pdf"
311
+
312
+ target_path = os.path.join(paper_directory, file_name)
313
+
314
+ try:
315
+ response = requests.get(path, stream=True)
316
+ response.raise_for_status()
317
+
318
+ with open(target_path, 'wb') as f:
319
+ for chunk in response.iter_content(chunk_size=8192):
320
+ f.write(chunk)
321
+
322
+ logger.info(f"Downloaded {path} to {target_path}")
323
+
324
+ docs = Docs()
325
+ docname = await docs.aadd(
326
+ path=target_path,
327
+ citation=citation,
328
+ settings=settings,
329
+ )
330
+ except Exception as e:
331
+ # If download fails, fall back to docs.aadd_url
332
+ logger.warning(f"Download failed: {str(e)}, falling back to docs.aadd_url")
333
+ docs = Docs()
334
+ docname = await docs.aadd_url(
335
+ url=path,
336
+ citation=citation,
337
+ settings=settings,
338
+ )
339
+
340
+ # If we successfully added it with aadd_url, try to find where it saved the file
341
+ if docname and hasattr(docs, 'docs') and docname in docs.docs:
342
+ doc = docs.docs[docname]
343
+ if hasattr(doc, 'filepath') and os.path.exists(doc.filepath):
344
+ import shutil
345
+ target_path = os.path.join(paper_directory, f"{docname}.pdf")
346
+ if not os.path.exists(target_path):
347
+ shutil.copy2(doc.filepath, target_path)
348
+ logger.info(f"Copied from {doc.filepath} to {target_path}")
349
+ else:
350
+ # For file paths, copy to paper directory if needed
351
+ if not os.path.isabs(path):
352
+ full_path = os.path.join(ctx.deps.paper_directory, path)
353
+ if os.path.exists(full_path):
354
+ path = full_path
355
+ else:
356
+ full_path = os.path.join(ctx.deps.workdir.location, path)
357
+ if os.path.exists(full_path):
358
+ path = full_path
359
+
360
+ # If the path is outside the paper directory, copy it there
361
+ if os.path.exists(path) and paper_directory not in path:
362
+ import shutil
363
+ target_path = os.path.join(paper_directory, os.path.basename(path))
364
+ if not os.path.exists(target_path):
365
+ shutil.copy2(path, target_path)
366
+
367
+ docs = Docs()
368
+ docname = await docs.aadd(
369
+ path=path,
370
+ citation=citation,
371
+ settings=settings,
372
+ )
373
+
374
+ if docname:
375
+ doc = next((d for d in docs.docs.values() if d.docname == docname), None)
376
+
377
+ result = {
378
+ "success": True,
379
+ "docname": docname,
380
+ "doc": doc,
381
+ }
382
+
383
+ if auto_index:
384
+ try:
385
+ index_result = await build_index(ctx)
386
+ result["index_result"] = index_result
387
+ if index_result["success"]:
388
+ result["message"] = f"Paper added and indexed successfully. {index_result['indexed_papers_count']} papers now in the index."
389
+ else:
390
+ result["message"] = f"Paper added but indexing failed: {index_result['error']}"
391
+ except Exception as e:
392
+ result["message"] = f"Paper added but indexing failed: {str(e)}"
393
+ else:
394
+ result["message"] = "Paper added successfully. Use 'aurelian paperqa index' to rebuild the index to make this paper searchable."
395
+
396
+ return result
397
+ else:
398
+ return {
399
+ "success": False,
400
+ "message": "Paper was already in the collection."
401
+ }
402
+ except Exception as e:
403
+ if "ModelRetry" in str(type(e)):
404
+ raise e
405
+ raise ModelRetry(f"Error adding paper: {str(e)}")
406
+
407
+
408
+ async def add_papers(
409
+ ctx: RunContext[PaperQADependencies],
410
+ directory: str,
411
+ citation: Optional[str] = None,
412
+ auto_index: bool = True,
413
+ ) -> Any:
414
+ """
415
+ Add multiple papers from a directory to the collection.
416
+
417
+ Args:
418
+ ctx: The run context
419
+ directory: Path to the directory containing papers
420
+ citation: Optional citation format to use for all papers (paper filename will be appended)
421
+ auto_index: Whether to automatically rebuild the index after adding the papers
422
+
423
+ Returns:
424
+ Information about the added papers
425
+ """
426
+ try:
427
+ settings = ctx.deps.set_paperqa_settings()
428
+ paper_directory = settings.agent.index.paper_directory
429
+ os.makedirs(paper_directory, exist_ok=True)
430
+
431
+ if not Path(directory).is_dir():
432
+ return create_response(
433
+ success=False,
434
+ paper_directory=paper_directory,
435
+ doc_files={"all": [], "pdf": [], "txt": [], "html": [], "md": []}
436
+ )
437
+
438
+ doc_files = get_document_files(directory)
439
+
440
+ if not doc_files['all']:
441
+ return create_response(
442
+ success=False,
443
+ paper_directory=paper_directory,
444
+ doc_files=doc_files
445
+ )
446
+
447
+ logger.info(f"Found {len(doc_files['all'])} documents in {directory}:")
448
+ if doc_files['pdf']:
449
+ logger.info(f" - {len(doc_files['pdf'])} PDF files")
450
+ if doc_files['txt']:
451
+ logger.info(f" - {len(doc_files['txt'])} text files")
452
+ if doc_files['html']:
453
+ logger.info(f" - {len(doc_files['html'])} HTML files")
454
+ if doc_files['md']:
455
+ logger.info(f" - {len(doc_files['md'])} Markdown files")
456
+
457
+ docs = Docs()
458
+ added_papers = []
459
+
460
+ for doc_file in doc_files['all']:
461
+ file_path = os.path.join(directory, doc_file)
462
+ try:
463
+ logger.info(f"Adding document: {file_path}")
464
+
465
+ doc_citation = None
466
+ if citation:
467
+ doc_citation = f"{citation} - {doc_file}"
468
+
469
+ if Path(file_path).exists() and paper_directory not in file_path:
470
+ import shutil
471
+ target_path = os.path.join(paper_directory, os.path.basename(file_path))
472
+ if not Path(target_path).exists():
473
+ shutil.copy2(file_path, target_path)
474
+ logger.info(f"Copied {file_path} to {target_path}")
475
+
476
+ docname = await docs.aadd(
477
+ path=file_path,
478
+ citation=doc_citation,
479
+ settings=settings,
480
+ )
481
+ if docname:
482
+ doc = next((d for d in docs.docs.values() if d.docname == docname), None)
483
+ added_papers.append({
484
+ "file": doc_file,
485
+ "docname": docname,
486
+ "citation": doc_citation,
487
+ "doc": doc
488
+ })
489
+ logger.info(f"Successfully added document: {doc_file}")
490
+ except Exception as e:
491
+ logger.error(f"Error adding {file_path}: {e}")
492
+
493
+ index_result = None
494
+ if auto_index and added_papers:
495
+ try:
496
+ index_result = await build_index(ctx)
497
+ logger.info(f"Index rebuilt with {len(index_result.get('indexed_papers', []))} papers")
498
+ except Exception as e:
499
+ logger.error(f"Error rebuilding index: {e}")
500
+ index_result = {"success": False, "error": str(e)}
501
+
502
+ response = create_response(
503
+ success=True,
504
+ paper_directory=paper_directory,
505
+ doc_files=doc_files,
506
+ message=f"Successfully added {len(added_papers)} documents out of {len(doc_files['all'])}",
507
+ documents_added=len(added_papers),
508
+ added_documents=added_papers
509
+ )
510
+
511
+ if index_result:
512
+ response["index_result"] = index_result
513
+
514
+ return response
515
+ except Exception as e:
516
+ if "ModelRetry" in str(type(e)):
517
+ raise e
518
+ raise ModelRetry(f"Error adding papers: {str(e)}")
519
+
520
+
521
+ async def list_papers(
522
+ ctx: RunContext[PaperQADependencies],
523
+ ) -> Any:
524
+ """
525
+ List all papers in the current paper directory.
526
+
527
+ Args:
528
+ ctx: The run context
529
+
530
+ Returns:
531
+ Information about all papers in the paper directory
532
+ """
533
+ try:
534
+ settings = ctx.deps.set_paperqa_settings()
535
+ paper_directory = settings.agent.index.paper_directory
536
+
537
+ doc_files = get_document_files(paper_directory)
538
+
539
+ indexed_files = []
540
+ try:
541
+ index = await get_directory_index(settings=settings, build=False)
542
+ index_files = await index.index_files
543
+ indexed_files = list(index_files.keys())
544
+ logger.info(f"Found {len(indexed_files)} indexed document chunks")
545
+ except Exception:
546
+ logger.info("No index found or index is empty")
547
+
548
+ return create_response(
549
+ success=True,
550
+ paper_directory=paper_directory,
551
+ doc_files=doc_files,
552
+ indexed_files=indexed_files,
553
+ message=f"Found {len(doc_files['all'])} documents and {len(indexed_files)} indexed chunks",
554
+ files_in_directory=doc_files['all'],
555
+ files_by_type={
556
+ "pdf": doc_files['pdf'],
557
+ "txt": doc_files['txt'],
558
+ "html": doc_files['html'],
559
+ "md": doc_files['md']
560
+ },
561
+ note="To search papers, they must be both in the paper directory AND indexed. If there are files in the directory but not indexed, use the CLI command 'aurelian paperqa index -d <directory>' to index them."
562
+ )
563
+ except Exception as e:
564
+ if "ModelRetry" in str(type(e)):
565
+ raise e
566
+ raise ModelRetry(f"Error listing papers: {str(e)}")
@@ -0,0 +1,3 @@
1
+ """
2
+ Phenopackets agent module for working with phenopacket databases.
3
+ """
@@ -0,0 +1,58 @@
1
+ """
2
+ Agent for working with phenopacket databases.
3
+ """
4
+ from aurelian.agents.phenopackets.phenopackets_config import PhenopacketsDependencies
5
+ from aurelian.agents.phenopackets.phenopackets_tools import (
6
+ search_phenopackets,
7
+ lookup_phenopacket,
8
+ lookup_pmid,
9
+ search_web,
10
+ retrieve_web_page
11
+ )
12
+ from aurelian.agents.filesystem.filesystem_tools import inspect_file, list_files
13
+ from pydantic_ai import Agent, Tool
14
+
15
+ SYSTEM = """
16
+ You are an AI assistant that can answer questions using the Phenopacket database.
17
+
18
+ Phenopackets are standardized data structures for representing phenotypic and genetic information
19
+ about patients with rare diseases or genetic disorders.
20
+
21
+ You can help with:
22
+ - Searching for phenopackets by disease, phenotype, gene, etc.
23
+ - Looking up specific phenopackets by ID
24
+ - Analyzing and comparing information from multiple phenopackets
25
+ - Finding correlations between phenotypes, genes, and variants
26
+ - Retrieving literature related to phenopackets via PubMed
27
+
28
+ You can use different functions to access the database:
29
+ - `search_phenopackets` to find phenopackets by text query
30
+ - `lookup_phenopacket` to retrieve a specific phenopacket by ID
31
+ - `lookup_pmid` to retrieve the text of a PubMed article
32
+ - `search_web` and `retrieve_web_page` for additional information
33
+
34
+ Always use the database and functions provided to answer questions, rather than providing
35
+ your own knowledge, unless explicitly asked. Provide answers in a narrative form
36
+ understandable by clinical geneticists, with supporting evidence from the database.
37
+
38
+ When presenting terms, include IDs alongside labels when available (e.g., HP:0001234).
39
+ All prefixed IDs should be hyperlinked with Bioregistry, i.e., https://bioregistry.io/{curie}.
40
+
41
+ Use markdown tables for summarizing or comparing multiple patients, with appropriate
42
+ column headers and clear organization of information.
43
+ """
44
+
45
+ phenopackets_agent = Agent(
46
+ model="openai:gpt-4o",
47
+ deps_type=PhenopacketsDependencies,
48
+ system_prompt=SYSTEM,
49
+ tools=[
50
+ Tool(search_phenopackets),
51
+ Tool(lookup_phenopacket),
52
+ Tool(lookup_pmid),
53
+ Tool(search_web),
54
+ Tool(retrieve_web_page),
55
+ Tool(inspect_file),
56
+ Tool(list_files),
57
+ ]
58
+ )
@@ -0,0 +1,72 @@
1
+ """
2
+ Configuration classes for the phenopackets agent.
3
+ """
4
+ from dataclasses import dataclass, field
5
+ import os
6
+ from typing import Optional
7
+
8
+ from linkml_store import Client
9
+ from linkml_store.api import Collection
10
+
11
+ from aurelian.dependencies.workdir import HasWorkdir, WorkDir
12
+
13
+ HANDLE = "mongodb://localhost:27017/phenopackets"
14
+ DB_NAME = "phenopackets"
15
+ COLLECTION_NAME = "main"
16
+
17
+
18
+ @dataclass
19
+ class PhenopacketsDependencies(HasWorkdir):
20
+ """
21
+ Configuration for the phenopackets agent.
22
+ """
23
+ max_results: int = field(default=10)
24
+ db_path: str = field(default=HANDLE)
25
+ db_name: str = field(default=DB_NAME)
26
+ collection_name: str = field(default=COLLECTION_NAME)
27
+ _collection: Optional[Collection] = None
28
+
29
+ def __post_init__(self):
30
+ """Initialize the config with default values."""
31
+ # Initialize workdir if not provided
32
+ if self.workdir is None:
33
+ self.workdir = WorkDir()
34
+
35
+ @property
36
+ def collection(self) -> Collection:
37
+ """
38
+ Get the phenopackets collection, initializing the connection if needed.
39
+
40
+ Returns:
41
+ Collection: The phenopackets collection
42
+ """
43
+ if self._collection is None:
44
+ client = Client()
45
+ print(f"Attaching to database: {self.db_path} with alias: {self.db_name}")
46
+ client.attach_database(self.db_path, alias=self.db_name)
47
+ db = client.databases[self.db_name]
48
+ self._collection = db.get_collection(self.collection_name)
49
+ return self._collection
50
+
51
+
52
+ def get_config() -> PhenopacketsDependencies:
53
+ """
54
+ Get the Phenopackets configuration from environment variables or defaults.
55
+
56
+ Returns:
57
+ PhenopacketsDependencies: The phenopackets dependencies
58
+ """
59
+ workdir_path = os.environ.get("AURELIAN_WORKDIR", None)
60
+ workdir = WorkDir(location=workdir_path) if workdir_path else None
61
+
62
+ # Get any environment-specific settings
63
+ db_path = os.environ.get("PHENOPACKETS_DB_PATH", HANDLE)
64
+ db_name = os.environ.get("PHENOPACKETS_DB_NAME", DB_NAME)
65
+ collection_name = os.environ.get("PHENOPACKETS_COLLECTION", COLLECTION_NAME)
66
+
67
+ return PhenopacketsDependencies(
68
+ workdir=workdir,
69
+ db_path=db_path,
70
+ db_name=db_name,
71
+ collection_name=collection_name
72
+ )