aurelian 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (254) hide show
  1. aurelian/__init__.py +9 -0
  2. aurelian/agents/__init__.py +0 -0
  3. aurelian/agents/amigo/__init__.py +3 -0
  4. aurelian/agents/amigo/amigo_agent.py +77 -0
  5. aurelian/agents/amigo/amigo_config.py +85 -0
  6. aurelian/agents/amigo/amigo_evals.py +73 -0
  7. aurelian/agents/amigo/amigo_gradio.py +52 -0
  8. aurelian/agents/amigo/amigo_mcp.py +152 -0
  9. aurelian/agents/amigo/amigo_tools.py +152 -0
  10. aurelian/agents/biblio/__init__.py +42 -0
  11. aurelian/agents/biblio/biblio_agent.py +94 -0
  12. aurelian/agents/biblio/biblio_config.py +40 -0
  13. aurelian/agents/biblio/biblio_gradio.py +67 -0
  14. aurelian/agents/biblio/biblio_mcp.py +115 -0
  15. aurelian/agents/biblio/biblio_tools.py +164 -0
  16. aurelian/agents/biblio_agent.py +46 -0
  17. aurelian/agents/checklist/__init__.py +44 -0
  18. aurelian/agents/checklist/checklist_agent.py +85 -0
  19. aurelian/agents/checklist/checklist_config.py +28 -0
  20. aurelian/agents/checklist/checklist_gradio.py +70 -0
  21. aurelian/agents/checklist/checklist_mcp.py +86 -0
  22. aurelian/agents/checklist/checklist_tools.py +141 -0
  23. aurelian/agents/checklist/content/checklists.yaml +7 -0
  24. aurelian/agents/checklist/content/streams.csv +136 -0
  25. aurelian/agents/checklist_agent.py +40 -0
  26. aurelian/agents/chemistry/__init__.py +3 -0
  27. aurelian/agents/chemistry/chemistry_agent.py +46 -0
  28. aurelian/agents/chemistry/chemistry_config.py +71 -0
  29. aurelian/agents/chemistry/chemistry_evals.py +79 -0
  30. aurelian/agents/chemistry/chemistry_gradio.py +50 -0
  31. aurelian/agents/chemistry/chemistry_mcp.py +120 -0
  32. aurelian/agents/chemistry/chemistry_tools.py +121 -0
  33. aurelian/agents/chemistry/image_agent.py +15 -0
  34. aurelian/agents/d4d/__init__.py +30 -0
  35. aurelian/agents/d4d/d4d_agent.py +72 -0
  36. aurelian/agents/d4d/d4d_config.py +46 -0
  37. aurelian/agents/d4d/d4d_gradio.py +58 -0
  38. aurelian/agents/d4d/d4d_mcp.py +71 -0
  39. aurelian/agents/d4d/d4d_tools.py +157 -0
  40. aurelian/agents/d4d_agent.py +64 -0
  41. aurelian/agents/diagnosis/__init__.py +33 -0
  42. aurelian/agents/diagnosis/diagnosis_agent.py +53 -0
  43. aurelian/agents/diagnosis/diagnosis_config.py +48 -0
  44. aurelian/agents/diagnosis/diagnosis_evals.py +76 -0
  45. aurelian/agents/diagnosis/diagnosis_gradio.py +52 -0
  46. aurelian/agents/diagnosis/diagnosis_mcp.py +141 -0
  47. aurelian/agents/diagnosis/diagnosis_tools.py +204 -0
  48. aurelian/agents/diagnosis_agent.py +28 -0
  49. aurelian/agents/draw/__init__.py +3 -0
  50. aurelian/agents/draw/draw_agent.py +39 -0
  51. aurelian/agents/draw/draw_config.py +26 -0
  52. aurelian/agents/draw/draw_gradio.py +50 -0
  53. aurelian/agents/draw/draw_mcp.py +94 -0
  54. aurelian/agents/draw/draw_tools.py +100 -0
  55. aurelian/agents/draw/judge_agent.py +18 -0
  56. aurelian/agents/filesystem/__init__.py +0 -0
  57. aurelian/agents/filesystem/filesystem_config.py +27 -0
  58. aurelian/agents/filesystem/filesystem_gradio.py +49 -0
  59. aurelian/agents/filesystem/filesystem_mcp.py +89 -0
  60. aurelian/agents/filesystem/filesystem_tools.py +95 -0
  61. aurelian/agents/filesystem/py.typed +0 -0
  62. aurelian/agents/github/__init__.py +0 -0
  63. aurelian/agents/github/github_agent.py +83 -0
  64. aurelian/agents/github/github_cli.py +248 -0
  65. aurelian/agents/github/github_config.py +22 -0
  66. aurelian/agents/github/github_gradio.py +152 -0
  67. aurelian/agents/github/github_mcp.py +252 -0
  68. aurelian/agents/github/github_tools.py +408 -0
  69. aurelian/agents/github/github_tools.py.tmp +413 -0
  70. aurelian/agents/goann/__init__.py +13 -0
  71. aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines.md +1000 -0
  72. aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines.pdf +0 -0
  73. aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines_Paper.md +693 -0
  74. aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines_Paper.pdf +0 -0
  75. aurelian/agents/goann/goann_agent.py +90 -0
  76. aurelian/agents/goann/goann_config.py +90 -0
  77. aurelian/agents/goann/goann_evals.py +104 -0
  78. aurelian/agents/goann/goann_gradio.py +62 -0
  79. aurelian/agents/goann/goann_mcp.py +0 -0
  80. aurelian/agents/goann/goann_tools.py +65 -0
  81. aurelian/agents/gocam/__init__.py +43 -0
  82. aurelian/agents/gocam/documents/DNA-binding transcription factor activity annotation guidelines.docx +0 -0
  83. aurelian/agents/gocam/documents/DNA-binding transcription factor activity annotation guidelines.pdf +0 -0
  84. aurelian/agents/gocam/documents/DNA-binding_transcription_factor_activity_annotation_guidelines.md +100 -0
  85. aurelian/agents/gocam/documents/E3 ubiquitin ligases.docx +0 -0
  86. aurelian/agents/gocam/documents/E3 ubiquitin ligases.pdf +0 -0
  87. aurelian/agents/gocam/documents/E3_ubiquitin_ligases.md +134 -0
  88. aurelian/agents/gocam/documents/GO-CAM annotation guidelines README.docx +0 -0
  89. aurelian/agents/gocam/documents/GO-CAM annotation guidelines README.pdf +0 -0
  90. aurelian/agents/gocam/documents/GO-CAM modelling guidelines TO DO.docx +0 -0
  91. aurelian/agents/gocam/documents/GO-CAM modelling guidelines TO DO.pdf +0 -0
  92. aurelian/agents/gocam/documents/GO-CAM_annotation_guidelines_README.md +1 -0
  93. aurelian/agents/gocam/documents/GO-CAM_modelling_guidelines_TO_DO.md +3 -0
  94. aurelian/agents/gocam/documents/How to annotate complexes in GO-CAM.docx +0 -0
  95. aurelian/agents/gocam/documents/How to annotate complexes in GO-CAM.pdf +0 -0
  96. aurelian/agents/gocam/documents/How to annotate molecular adaptors.docx +0 -0
  97. aurelian/agents/gocam/documents/How to annotate molecular adaptors.pdf +0 -0
  98. aurelian/agents/gocam/documents/How to annotate sequestering proteins.docx +0 -0
  99. aurelian/agents/gocam/documents/How to annotate sequestering proteins.pdf +0 -0
  100. aurelian/agents/gocam/documents/How_to_annotate_complexes_in_GO-CAM.md +29 -0
  101. aurelian/agents/gocam/documents/How_to_annotate_molecular_adaptors.md +31 -0
  102. aurelian/agents/gocam/documents/How_to_annotate_sequestering_proteins.md +42 -0
  103. aurelian/agents/gocam/documents/Molecular adaptor activity.docx +0 -0
  104. aurelian/agents/gocam/documents/Molecular adaptor activity.pdf +0 -0
  105. aurelian/agents/gocam/documents/Molecular carrier activity.docx +0 -0
  106. aurelian/agents/gocam/documents/Molecular carrier activity.pdf +0 -0
  107. aurelian/agents/gocam/documents/Molecular_adaptor_activity.md +51 -0
  108. aurelian/agents/gocam/documents/Molecular_carrier_activity.md +41 -0
  109. aurelian/agents/gocam/documents/Protein sequestering activity.docx +0 -0
  110. aurelian/agents/gocam/documents/Protein sequestering activity.pdf +0 -0
  111. aurelian/agents/gocam/documents/Protein_sequestering_activity.md +50 -0
  112. aurelian/agents/gocam/documents/Signaling receptor activity annotation guidelines.docx +0 -0
  113. aurelian/agents/gocam/documents/Signaling receptor activity annotation guidelines.pdf +0 -0
  114. aurelian/agents/gocam/documents/Signaling_receptor_activity_annotation_guidelines.md +187 -0
  115. aurelian/agents/gocam/documents/Transcription coregulator activity.docx +0 -0
  116. aurelian/agents/gocam/documents/Transcription coregulator activity.pdf +0 -0
  117. aurelian/agents/gocam/documents/Transcription_coregulator_activity.md +36 -0
  118. aurelian/agents/gocam/documents/Transporter activity annotation annotation guidelines.docx +0 -0
  119. aurelian/agents/gocam/documents/Transporter activity annotation annotation guidelines.pdf +0 -0
  120. aurelian/agents/gocam/documents/Transporter_activity_annotation_annotation_guidelines.md +43 -0
  121. Regulatory Processes in GO-CAM.docx +0 -0
  122. Regulatory Processes in GO-CAM.pdf +0 -0
  123. aurelian/agents/gocam/documents/WIP_-_Regulation_and_Regulatory_Processes_in_GO-CAM.md +31 -0
  124. aurelian/agents/gocam/documents/md/DNA-binding_transcription_factor_activity_annotation_guidelines.md +131 -0
  125. aurelian/agents/gocam/documents/md/E3_ubiquitin_ligases.md +166 -0
  126. aurelian/agents/gocam/documents/md/GO-CAM_annotation_guidelines_README.md +1 -0
  127. aurelian/agents/gocam/documents/md/GO-CAM_modelling_guidelines_TO_DO.md +5 -0
  128. aurelian/agents/gocam/documents/md/How_to_annotate_complexes_in_GO-CAM.md +28 -0
  129. aurelian/agents/gocam/documents/md/How_to_annotate_molecular_adaptors.md +19 -0
  130. aurelian/agents/gocam/documents/md/How_to_annotate_sequestering_proteins.md +38 -0
  131. aurelian/agents/gocam/documents/md/Molecular_adaptor_activity.md +52 -0
  132. aurelian/agents/gocam/documents/md/Molecular_carrier_activity.md +59 -0
  133. aurelian/agents/gocam/documents/md/Protein_sequestering_activity.md +52 -0
  134. aurelian/agents/gocam/documents/md/Signaling_receptor_activity_annotation_guidelines.md +271 -0
  135. aurelian/agents/gocam/documents/md/Transcription_coregulator_activity.md +54 -0
  136. aurelian/agents/gocam/documents/md/Transporter_activity_annotation_annotation_guidelines.md +38 -0
  137. aurelian/agents/gocam/documents/md/WIP_-_Regulation_and_Regulatory_Processes_in_GO-CAM.md +39 -0
  138. aurelian/agents/gocam/documents/pandoc_md/Signaling_receptor_activity_annotation_guidelines.md +334 -0
  139. aurelian/agents/gocam/gocam_agent.py +240 -0
  140. aurelian/agents/gocam/gocam_config.py +85 -0
  141. aurelian/agents/gocam/gocam_curator_agent.py +46 -0
  142. aurelian/agents/gocam/gocam_evals.py +67 -0
  143. aurelian/agents/gocam/gocam_gradio.py +89 -0
  144. aurelian/agents/gocam/gocam_mcp.py +224 -0
  145. aurelian/agents/gocam/gocam_tools.py +294 -0
  146. aurelian/agents/linkml/__init__.py +0 -0
  147. aurelian/agents/linkml/linkml_agent.py +62 -0
  148. aurelian/agents/linkml/linkml_config.py +48 -0
  149. aurelian/agents/linkml/linkml_evals.py +66 -0
  150. aurelian/agents/linkml/linkml_gradio.py +45 -0
  151. aurelian/agents/linkml/linkml_mcp.py +186 -0
  152. aurelian/agents/linkml/linkml_tools.py +102 -0
  153. aurelian/agents/literature/__init__.py +3 -0
  154. aurelian/agents/literature/literature_agent.py +55 -0
  155. aurelian/agents/literature/literature_config.py +35 -0
  156. aurelian/agents/literature/literature_gradio.py +52 -0
  157. aurelian/agents/literature/literature_mcp.py +174 -0
  158. aurelian/agents/literature/literature_tools.py +182 -0
  159. aurelian/agents/monarch/__init__.py +25 -0
  160. aurelian/agents/monarch/monarch_agent.py +44 -0
  161. aurelian/agents/monarch/monarch_config.py +45 -0
  162. aurelian/agents/monarch/monarch_gradio.py +51 -0
  163. aurelian/agents/monarch/monarch_mcp.py +65 -0
  164. aurelian/agents/monarch/monarch_tools.py +113 -0
  165. aurelian/agents/oak/__init__.py +0 -0
  166. aurelian/agents/oak/oak_config.py +27 -0
  167. aurelian/agents/oak/oak_gradio.py +57 -0
  168. aurelian/agents/ontology_mapper/__init__.py +31 -0
  169. aurelian/agents/ontology_mapper/ontology_mapper_agent.py +56 -0
  170. aurelian/agents/ontology_mapper/ontology_mapper_config.py +50 -0
  171. aurelian/agents/ontology_mapper/ontology_mapper_evals.py +108 -0
  172. aurelian/agents/ontology_mapper/ontology_mapper_gradio.py +58 -0
  173. aurelian/agents/ontology_mapper/ontology_mapper_mcp.py +81 -0
  174. aurelian/agents/ontology_mapper/ontology_mapper_tools.py +147 -0
  175. aurelian/agents/phenopackets/__init__.py +3 -0
  176. aurelian/agents/phenopackets/phenopackets_agent.py +58 -0
  177. aurelian/agents/phenopackets/phenopackets_config.py +72 -0
  178. aurelian/agents/phenopackets/phenopackets_evals.py +99 -0
  179. aurelian/agents/phenopackets/phenopackets_gradio.py +55 -0
  180. aurelian/agents/phenopackets/phenopackets_mcp.py +178 -0
  181. aurelian/agents/phenopackets/phenopackets_tools.py +127 -0
  182. aurelian/agents/rag/__init__.py +40 -0
  183. aurelian/agents/rag/rag_agent.py +83 -0
  184. aurelian/agents/rag/rag_config.py +80 -0
  185. aurelian/agents/rag/rag_gradio.py +67 -0
  186. aurelian/agents/rag/rag_mcp.py +107 -0
  187. aurelian/agents/rag/rag_tools.py +189 -0
  188. aurelian/agents/rag_agent.py +54 -0
  189. aurelian/agents/robot/__init__.py +0 -0
  190. aurelian/agents/robot/assets/__init__.py +3 -0
  191. aurelian/agents/robot/assets/template.md +384 -0
  192. aurelian/agents/robot/robot_config.py +25 -0
  193. aurelian/agents/robot/robot_gradio.py +46 -0
  194. aurelian/agents/robot/robot_mcp.py +100 -0
  195. aurelian/agents/robot/robot_ontology_agent.py +139 -0
  196. aurelian/agents/robot/robot_tools.py +50 -0
  197. aurelian/agents/talisman/__init__.py +3 -0
  198. aurelian/agents/talisman/talisman_agent.py +126 -0
  199. aurelian/agents/talisman/talisman_config.py +66 -0
  200. aurelian/agents/talisman/talisman_gradio.py +50 -0
  201. aurelian/agents/talisman/talisman_mcp.py +168 -0
  202. aurelian/agents/talisman/talisman_tools.py +720 -0
  203. aurelian/agents/ubergraph/__init__.py +40 -0
  204. aurelian/agents/ubergraph/ubergraph_agent.py +71 -0
  205. aurelian/agents/ubergraph/ubergraph_config.py +79 -0
  206. aurelian/agents/ubergraph/ubergraph_gradio.py +48 -0
  207. aurelian/agents/ubergraph/ubergraph_mcp.py +69 -0
  208. aurelian/agents/ubergraph/ubergraph_tools.py +118 -0
  209. aurelian/agents/uniprot/__init__.py +37 -0
  210. aurelian/agents/uniprot/uniprot_agent.py +43 -0
  211. aurelian/agents/uniprot/uniprot_config.py +43 -0
  212. aurelian/agents/uniprot/uniprot_evals.py +99 -0
  213. aurelian/agents/uniprot/uniprot_gradio.py +48 -0
  214. aurelian/agents/uniprot/uniprot_mcp.py +168 -0
  215. aurelian/agents/uniprot/uniprot_tools.py +136 -0
  216. aurelian/agents/web/__init__.py +0 -0
  217. aurelian/agents/web/web_config.py +27 -0
  218. aurelian/agents/web/web_gradio.py +48 -0
  219. aurelian/agents/web/web_mcp.py +50 -0
  220. aurelian/agents/web/web_tools.py +108 -0
  221. aurelian/chat.py +23 -0
  222. aurelian/cli.py +800 -0
  223. aurelian/dependencies/__init__.py +0 -0
  224. aurelian/dependencies/workdir.py +78 -0
  225. aurelian/mcp/__init__.py +0 -0
  226. aurelian/mcp/amigo_mcp_test.py +86 -0
  227. aurelian/mcp/config_generator.py +123 -0
  228. aurelian/mcp/example_config.json +43 -0
  229. aurelian/mcp/generate_sample_config.py +37 -0
  230. aurelian/mcp/gocam_mcp_test.py +126 -0
  231. aurelian/mcp/linkml_mcp_tools.py +190 -0
  232. aurelian/mcp/mcp_discovery.py +87 -0
  233. aurelian/mcp/mcp_test.py +31 -0
  234. aurelian/mcp/phenopackets_mcp_test.py +103 -0
  235. aurelian/tools/__init__.py +0 -0
  236. aurelian/tools/web/__init__.py +0 -0
  237. aurelian/tools/web/url_download.py +51 -0
  238. aurelian/utils/__init__.py +0 -0
  239. aurelian/utils/async_utils.py +15 -0
  240. aurelian/utils/data_utils.py +32 -0
  241. aurelian/utils/documentation_manager.py +59 -0
  242. aurelian/utils/doi_fetcher.py +238 -0
  243. aurelian/utils/ontology_utils.py +68 -0
  244. aurelian/utils/pdf_fetcher.py +23 -0
  245. aurelian/utils/process_logs.py +100 -0
  246. aurelian/utils/pubmed_utils.py +238 -0
  247. aurelian/utils/pytest_report_to_markdown.py +67 -0
  248. aurelian/utils/robot_ontology_utils.py +112 -0
  249. aurelian/utils/search_utils.py +95 -0
  250. aurelian-0.3.2.dist-info/LICENSE +22 -0
  251. aurelian-0.3.2.dist-info/METADATA +105 -0
  252. aurelian-0.3.2.dist-info/RECORD +254 -0
  253. aurelian-0.3.2.dist-info/WHEEL +4 -0
  254. aurelian-0.3.2.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,81 @@
1
+ """
2
+ MCP tools for creating ontology mappings.
3
+ """
4
+ import os
5
+ from typing import Dict, List, Optional
6
+
7
+ from mcp.server.fastmcp import FastMCP
8
+
9
+ import aurelian.agents.ontology_mapper.ontology_mapper_tools as omt
10
+ from aurelian.agents.ontology_mapper.ontology_mapper_agent import ONTOLOGY_MAPPER_SYSTEM_PROMPT
11
+ from aurelian.agents.ontology_mapper.ontology_mapper_config import OntologyMapperDependencies, get_config
12
+ from pydantic_ai import RunContext
13
+
14
+ # Initialize FastMCP server
15
+ mcp = FastMCP("ontology_mapper", instructions=ONTOLOGY_MAPPER_SYSTEM_PROMPT)
16
+
17
+
18
+ from aurelian.dependencies.workdir import WorkDir
19
+
20
+ def deps() -> OntologyMapperDependencies:
21
+ deps = get_config()
22
+ # Set the location from environment variable or default
23
+ loc = os.getenv("AURELIAN_WORKDIR", "/tmp/aurelian")
24
+ deps.workdir = WorkDir(loc)
25
+ return deps
26
+
27
+ def ctx() -> RunContext[OntologyMapperDependencies]:
28
+ rc: RunContext[OntologyMapperDependencies] = RunContext[OntologyMapperDependencies](
29
+ deps=deps(),
30
+ model=None, usage=None, prompt=None,
31
+ )
32
+ return rc
33
+
34
+
35
+ @mcp.tool()
36
+ async def search_terms(query: str, ont: Optional[str] = None, limit: int = 10) -> List[Dict]:
37
+ """
38
+ Search for ontology terms matching a query.
39
+
40
+ Args:
41
+ query: The search query text
42
+ ont: Optional ontology ID to search in (e.g., 'cl', 'go', 'uberon')
43
+ limit: Maximum number of results to return
44
+
45
+ Returns:
46
+ List of matching ontology terms with their details
47
+ """
48
+ return await omt.search_terms(ctx(), query, ont, limit)
49
+
50
+
51
+ @mcp.tool()
52
+ async def search_web(query: str) -> str:
53
+ """
54
+ Search the web for ontology-related information.
55
+
56
+ Args:
57
+ query: The search query
58
+
59
+ Returns:
60
+ Search results with summaries
61
+ """
62
+ return await omt.search_web(ctx(), query)
63
+
64
+
65
+ @mcp.tool()
66
+ async def retrieve_web_page(url: str) -> str:
67
+ """
68
+ Fetch the contents of a web page related to ontologies.
69
+
70
+ Args:
71
+ url: The URL to fetch
72
+
73
+ Returns:
74
+ The contents of the web page
75
+ """
76
+ return await omt.retrieve_web_page(ctx(), url)
77
+
78
+
79
+ if __name__ == "__main__":
80
+ # Initialize and run the server
81
+ mcp.run(transport='stdio')
@@ -0,0 +1,147 @@
1
+ """
2
+ Tools for the Ontology Mapper agent.
3
+ """
4
+ import asyncio
5
+ from functools import lru_cache
6
+ from typing import Dict, List, Optional
7
+
8
+ from oaklib import get_adapter
9
+ from pydantic_ai import RunContext, ModelRetry
10
+
11
+ from aurelian.utils.ontology_utils import search_ontology
12
+ from aurelian.utils.search_utils import web_search, retrieve_web_page as fetch_web_page
13
+ from .ontology_mapper_config import OntologyMapperDependencies, get_config
14
+
15
+
16
+ @lru_cache
17
+ def get_ontology_adapter(ont: str):
18
+ """
19
+ Get an adapter for the specified ontology.
20
+
21
+ Args:
22
+ ont: The ontology ID to get an adapter for (e.g. cl, go, uberon)
23
+
24
+ Returns:
25
+ An OAK adapter for the specified ontology
26
+ """
27
+ ont = ont.lower()
28
+ return get_adapter(f"sqlite:obo:{ont}")
29
+
30
+
31
+ async def search_terms(
32
+ ctx: RunContext[OntologyMapperDependencies],
33
+ ontology_id: str,
34
+ query: str
35
+ ) -> List[Dict]:
36
+ """
37
+ Finds similar ontology terms to the search query.
38
+
39
+ For example:
40
+
41
+ ```
42
+ search_terms("go", "cycle cycle and related processes")
43
+ ```
44
+
45
+ Relevancy ranking is used, with semantic similarity, which means
46
+ queries need only be close in semantic space. E.g. while GO does not
47
+ deal with diseases, this may return relevant pathways or structures:
48
+
49
+ ```
50
+ search_terms("go", "terms most relevant to Parkinson disease")
51
+ ```
52
+
53
+ Args:
54
+ ctx: The run context
55
+ ontology_id: The ontology ID to search in (e.g. cl, go, uberon)
56
+ query: The search query
57
+
58
+ Returns:
59
+ A list of matching ontology terms
60
+ """
61
+ print(f"Term Search: {ontology_id} {query}")
62
+
63
+ try:
64
+ if " " in ontology_id:
65
+ raise ModelRetry(
66
+ "Invalid ontology ID, use an OBO style ID like cl, mondo, chebi, etc."
67
+ )
68
+
69
+ config = ctx.deps or get_config()
70
+ if ontology_id.lower() not in [ont.lower() for ont in config.ontologies]:
71
+ allowed_onts = ", ".join(config.ontologies)
72
+ raise ModelRetry(
73
+ f"Ontology '{ontology_id}' not in allowed list: {allowed_onts}"
74
+ )
75
+
76
+ adapter = get_ontology_adapter(ontology_id)
77
+ # Execute the potentially blocking operation in a thread pool
78
+ results = await asyncio.to_thread(
79
+ search_ontology,
80
+ adapter,
81
+ query,
82
+ limit=config.max_search_results
83
+ )
84
+
85
+ if not results:
86
+ raise ModelRetry(f"No results found for query '{query}' in ontology '{ontology_id}'")
87
+
88
+ return results
89
+ except Exception as e:
90
+ if "ModelRetry" in str(type(e)):
91
+ raise e
92
+ raise ModelRetry(f"Error searching ontology: {str(e)}")
93
+
94
+
95
+ async def search_web(query: str) -> str:
96
+ """
97
+ Search the web using a text query.
98
+
99
+ Note, this will not retrieve the full content, for that you
100
+ should use `retrieve_web_page`.
101
+
102
+ Args:
103
+ query: The search query
104
+
105
+ Returns:
106
+ Matching web pages plus summaries
107
+ """
108
+ print(f"Web Search: {query}")
109
+
110
+ try:
111
+ # Execute the potentially blocking operation in a thread pool
112
+ results = await asyncio.to_thread(web_search, query)
113
+
114
+ if not results or results.strip() == "":
115
+ raise ModelRetry(f"No web search results found for query: {query}")
116
+
117
+ return results
118
+ except Exception as e:
119
+ if "ModelRetry" in str(type(e)):
120
+ raise e
121
+ raise ModelRetry(f"Error searching the web: {str(e)}")
122
+
123
+
124
+ async def retrieve_web_page(url: str) -> str:
125
+ """
126
+ Fetch the contents of a web page.
127
+
128
+ Args:
129
+ url: The URL of the web page to retrieve
130
+
131
+ Returns:
132
+ The contents of the web page
133
+ """
134
+ print(f"Fetch URL: {url}")
135
+
136
+ try:
137
+ # Execute the potentially blocking operation in a thread pool
138
+ content = await asyncio.to_thread(fetch_web_page, url)
139
+
140
+ if not content or content.strip() == "":
141
+ raise ModelRetry(f"No content found at URL: {url}")
142
+
143
+ return content
144
+ except Exception as e:
145
+ if "ModelRetry" in str(type(e)):
146
+ raise e
147
+ raise ModelRetry(f"Error retrieving web page: {str(e)}")
@@ -0,0 +1,3 @@
1
+ """
2
+ Phenopackets agent module for working with phenopacket databases.
3
+ """
@@ -0,0 +1,58 @@
1
+ """
2
+ Agent for working with phenopacket databases.
3
+ """
4
+ from aurelian.agents.phenopackets.phenopackets_config import PhenopacketsDependencies
5
+ from aurelian.agents.phenopackets.phenopackets_tools import (
6
+ search_phenopackets,
7
+ lookup_phenopacket,
8
+ lookup_pmid,
9
+ search_web,
10
+ retrieve_web_page
11
+ )
12
+ from aurelian.agents.filesystem.filesystem_tools import inspect_file, list_files
13
+ from pydantic_ai import Agent, Tool
14
+
15
+ SYSTEM = """
16
+ You are an AI assistant that can answer questions using the Phenopacket database.
17
+
18
+ Phenopackets are standardized data structures for representing phenotypic and genetic information
19
+ about patients with rare diseases or genetic disorders.
20
+
21
+ You can help with:
22
+ - Searching for phenopackets by disease, phenotype, gene, etc.
23
+ - Looking up specific phenopackets by ID
24
+ - Analyzing and comparing information from multiple phenopackets
25
+ - Finding correlations between phenotypes, genes, and variants
26
+ - Retrieving literature related to phenopackets via PubMed
27
+
28
+ You can use different functions to access the database:
29
+ - `search_phenopackets` to find phenopackets by text query
30
+ - `lookup_phenopacket` to retrieve a specific phenopacket by ID
31
+ - `lookup_pmid` to retrieve the text of a PubMed article
32
+ - `search_web` and `retrieve_web_page` for additional information
33
+
34
+ Always use the database and functions provided to answer questions, rather than providing
35
+ your own knowledge, unless explicitly asked. Provide answers in a narrative form
36
+ understandable by clinical geneticists, with supporting evidence from the database.
37
+
38
+ When presenting terms, include IDs alongside labels when available (e.g., HP:0001234).
39
+ All prefixed IDs should be hyperlinked with Bioregistry, i.e., https://bioregistry.io/{curie}.
40
+
41
+ Use markdown tables for summarizing or comparing multiple patients, with appropriate
42
+ column headers and clear organization of information.
43
+ """
44
+
45
+ phenopackets_agent = Agent(
46
+ model="openai:gpt-4o",
47
+ deps_type=PhenopacketsDependencies,
48
+ system_prompt=SYSTEM,
49
+ tools=[
50
+ Tool(search_phenopackets),
51
+ Tool(lookup_phenopacket),
52
+ Tool(lookup_pmid),
53
+ Tool(search_web),
54
+ Tool(retrieve_web_page),
55
+ Tool(inspect_file),
56
+ Tool(list_files),
57
+ ]
58
+ )
@@ -0,0 +1,72 @@
1
+ """
2
+ Configuration classes for the phenopackets agent.
3
+ """
4
+ from dataclasses import dataclass, field
5
+ import os
6
+ from typing import Optional
7
+
8
+ from linkml_store import Client
9
+ from linkml_store.api import Collection
10
+
11
+ from aurelian.dependencies.workdir import HasWorkdir, WorkDir
12
+
13
+ HANDLE = "mongodb://localhost:27017/phenopackets"
14
+ DB_NAME = "phenopackets"
15
+ COLLECTION_NAME = "main"
16
+
17
+
18
+ @dataclass
19
+ class PhenopacketsDependencies(HasWorkdir):
20
+ """
21
+ Configuration for the phenopackets agent.
22
+ """
23
+ max_results: int = field(default=10)
24
+ db_path: str = field(default=HANDLE)
25
+ db_name: str = field(default=DB_NAME)
26
+ collection_name: str = field(default=COLLECTION_NAME)
27
+ _collection: Optional[Collection] = None
28
+
29
+ def __post_init__(self):
30
+ """Initialize the config with default values."""
31
+ # Initialize workdir if not provided
32
+ if self.workdir is None:
33
+ self.workdir = WorkDir()
34
+
35
+ @property
36
+ def collection(self) -> Collection:
37
+ """
38
+ Get the phenopackets collection, initializing the connection if needed.
39
+
40
+ Returns:
41
+ Collection: The phenopackets collection
42
+ """
43
+ if self._collection is None:
44
+ client = Client()
45
+ print(f"Attaching to database: {self.db_path} with alias: {self.db_name}")
46
+ client.attach_database(self.db_path, alias=self.db_name)
47
+ db = client.databases[self.db_name]
48
+ self._collection = db.get_collection(self.collection_name)
49
+ return self._collection
50
+
51
+
52
+ def get_config() -> PhenopacketsDependencies:
53
+ """
54
+ Get the Phenopackets configuration from environment variables or defaults.
55
+
56
+ Returns:
57
+ PhenopacketsDependencies: The phenopackets dependencies
58
+ """
59
+ workdir_path = os.environ.get("AURELIAN_WORKDIR", None)
60
+ workdir = WorkDir(location=workdir_path) if workdir_path else None
61
+
62
+ # Get any environment-specific settings
63
+ db_path = os.environ.get("PHENOPACKETS_DB_PATH", HANDLE)
64
+ db_name = os.environ.get("PHENOPACKETS_DB_NAME", DB_NAME)
65
+ collection_name = os.environ.get("PHENOPACKETS_COLLECTION", COLLECTION_NAME)
66
+
67
+ return PhenopacketsDependencies(
68
+ workdir=workdir,
69
+ db_path=db_path,
70
+ db_name=db_name,
71
+ collection_name=collection_name
72
+ )
@@ -0,0 +1,99 @@
1
+ """
2
+ Evaluation module for the Phenopackets agent.
3
+
4
+ This module implements evaluations for the Phenopackets agent using the pydantic-ai-evals framework.
5
+ """
6
+ import asyncio
7
+ import sys
8
+ from typing import Optional, Any, Dict, Callable, Awaitable
9
+
10
+ from aurelian.evaluators.model import MetadataDict, metadata
11
+ from aurelian.evaluators.substring_evaluator import SubstringEvaluator
12
+ from pydantic_evals import Case, Dataset
13
+ from pydantic_evals.evaluators import LLMJudge
14
+
15
+ from aurelian.agents.phenopackets.phenopackets_agent import phenopackets_agent
16
+ from aurelian.agents.phenopackets.phenopackets_config import PhenopacketsDependencies
17
+
18
+ class PhenopacketsMetadata(Dict[str, Any]):
19
+ """Simple metadata dictionary for Phenopackets evaluations."""
20
+ pass
21
+
22
+ # Define individual evaluation cases
23
+ case1 = Case(
24
+ name="liver_disease_patients",
25
+ inputs="What patients have liver disease?",
26
+ expected_output="hepat", # Should mention hepatic/liver terms
27
+ metadata=metadata("medium", "phenotype_query")
28
+ )
29
+
30
+ case2 = Case(
31
+ name="metabolic_pathway_genes",
32
+ inputs="What phenopackets involve genes from metabolic pathways?",
33
+ expected_output="metabol", # Should mention metabolic genes/pathways
34
+ metadata=metadata("hard", "gene_pathway_query"),
35
+ evaluators=[
36
+ LLMJudge(
37
+ rubric="""
38
+ Answer should:
39
+ 1. Identify phenopackets containing genes involved in metabolic pathways
40
+ 2. Link the metabolic genes to their corresponding phenotypes
41
+ 3. Explain how these genes relate to metabolic pathways
42
+ 4. Provide patient/case IDs where applicable
43
+ """,
44
+ include_input=True
45
+ )
46
+ ]
47
+ )
48
+
49
+ case3 = Case(
50
+ name="variant_effect_peroxisomal",
51
+ inputs="How does the type of variant affect phenotype in peroxisomal disorders?",
52
+ expected_output="peroxisom", # Should discuss peroxisomal disorders
53
+ metadata=metadata("hard", "variant_phenotype_correlation")
54
+ )
55
+
56
+ case4 = Case(
57
+ name="skeletal_dysplasia_comparison",
58
+ inputs="Examine phenopackets for skeletal dysplasias and compare their phenotypes",
59
+ expected_output="skeletal", # Should discuss skeletal terms
60
+ metadata=metadata("medium", "comparative_phenotype_analysis")
61
+ )
62
+
63
+ case5 = Case(
64
+ name="pnpla6_mutations",
65
+ inputs="Look up any patients with mutations in the PNPLA6 gene",
66
+ expected_output="PNPLA6", # Should mention the PNPLA6 gene
67
+ metadata=metadata("easy", "gene_mutation_query")
68
+ )
69
+
70
+ def create_eval_dataset() -> Dataset[str, str, MetadataDict]:
71
+ """
72
+ Create a dataset for evaluating the Phenopackets agent.
73
+
74
+ Returns:
75
+ Dataset of Phenopackets evaluation cases with appropriate evaluators
76
+ """
77
+ # Collect all cases
78
+ cases = [case1, case2, case3, case4, case5]
79
+
80
+ # Dataset-level evaluators
81
+ evaluators = [
82
+ SubstringEvaluator(),
83
+ LLMJudge(
84
+ rubric="""
85
+ Evaluate the answer based on:
86
+ 1. Accuracy in identifying relevant phenopackets based on the query
87
+ 2. Correct interpretation of phenotype-genotype relationships
88
+ 3. Proper use of HPO terms and gene identifiers
89
+ 4. Comprehensive analysis of phenotypic data when requested
90
+ 5. Clear presentation of results including patient/case identifiers when appropriate
91
+ """,
92
+ model="anthropic:claude-3-7-sonnet-latest"
93
+ )
94
+ ]
95
+
96
+ return Dataset(
97
+ cases=cases,
98
+ evaluators=evaluators
99
+ )
@@ -0,0 +1,55 @@
1
+ """
2
+ Gradio UI for the phenopackets agent.
3
+ """
4
+ from typing import List, Optional
5
+
6
+ import gradio as gr
7
+
8
+ from aurelian.agents.phenopackets.phenopackets_agent import phenopackets_agent
9
+ from aurelian.agents.phenopackets.phenopackets_config import PhenopacketsDependencies
10
+ from aurelian.utils.async_utils import run_sync
11
+
12
+
13
+ def chat(deps: Optional[PhenopacketsDependencies] = None, db_path: Optional[str] = None, collection_name: Optional[str] = None, **kwargs):
14
+ """
15
+ Initialize a chat interface for the phenopackets agent.
16
+
17
+ Args:
18
+ deps: Optional dependencies configuration
19
+ db_path: Optional database path, defaults to MongoDB localhost
20
+ collection_name: Optional collection name, defaults to "main"
21
+ **kwargs: Additional arguments to pass to the agent
22
+
23
+ Returns:
24
+ A Gradio chat interface
25
+ """
26
+ if deps is None:
27
+ deps = PhenopacketsDependencies()
28
+
29
+ if db_path:
30
+ deps.db_path = db_path
31
+ if collection_name:
32
+ deps.collection_name = collection_name
33
+
34
+ def get_info(query: str, history: List[str]) -> str:
35
+ print(f"QUERY: {query}")
36
+ print(f"HISTORY: {history}")
37
+ if history:
38
+ query += "## History"
39
+ for h in history:
40
+ query += f"\n{h}"
41
+ result = run_sync(lambda: phenopackets_agent.run_sync(query, deps=deps, **kwargs))
42
+ return result.data
43
+
44
+ return gr.ChatInterface(
45
+ fn=get_info,
46
+ type="messages",
47
+ title="Phenopackets AI Assistant",
48
+ examples=[
49
+ ["What patients have liver disease?"],
50
+ ["What phenopackets involve genes from metabolic pathways?"],
51
+ ["How does the type of variant affect phenotype in peroxisomal disorders?"],
52
+ ["Examine phenopackets for skeletal dysplasias and compare their phenotypes"],
53
+ ["Look up any patients with mutations in the PNPLA6 gene"]
54
+ ]
55
+ )
@@ -0,0 +1,178 @@
1
+ """
2
+ MCP tools for working with phenopacket databases.
3
+ """
4
+ import os
5
+ from typing import Dict, List
6
+
7
+ from mcp.server.fastmcp import FastMCP
8
+
9
+ import aurelian.agents.filesystem.filesystem_tools as fst
10
+ from aurelian.agents.phenopackets.phenopackets_agent import SYSTEM
11
+ import aurelian.agents.phenopackets.phenopackets_tools as pt
12
+ from aurelian.agents.phenopackets.phenopackets_config import PhenopacketsDependencies
13
+ from pydantic_ai import RunContext
14
+
15
+ # Initialize FastMCP server
16
+ mcp = FastMCP("phenopackets", instructions=SYSTEM)
17
+
18
+
19
+ from aurelian.dependencies.workdir import WorkDir
20
+
21
+ def deps() -> PhenopacketsDependencies:
22
+ deps = PhenopacketsDependencies()
23
+ # Set the location from environment variable or default
24
+ loc = os.getenv("AURELIAN_WORKDIR", "/tmp/aurelian")
25
+ deps.workdir = WorkDir(loc)
26
+
27
+ # Get database connection parameters from environment if available
28
+ db_path = os.getenv("PHENOPACKETS_DB_PATH")
29
+ db_name = os.getenv("PHENOPACKETS_DB_NAME")
30
+ collection_name = os.getenv("PHENOPACKETS_COLLECTION_NAME")
31
+
32
+ if db_path:
33
+ deps.db_path = db_path
34
+ if db_name:
35
+ deps.db_name = db_name
36
+ if collection_name:
37
+ deps.collection_name = collection_name
38
+
39
+ return deps
40
+
41
+ def ctx() -> RunContext[PhenopacketsDependencies]:
42
+ rc: RunContext[PhenopacketsDependencies] = RunContext[PhenopacketsDependencies](
43
+ deps=deps(),
44
+ model=None, usage=None, prompt=None,
45
+ )
46
+ return rc
47
+
48
+
49
+ @mcp.tool()
50
+ async def search_phenopackets(query: str) -> List[Dict]:
51
+ """
52
+ Performs a retrieval search over the Phenopackets database.
53
+
54
+ The query can be any text, such as name of a disease, phenotype, gene, etc.
55
+
56
+ The objects returned are "Phenopackets" which is a structured representation
57
+ of a patient. Each is uniquely identified by a phenopacket ID (essentially
58
+ the patient ID).
59
+
60
+ The objects returned are summaries of Phenopackets; some details such
61
+ as phenotypes are omitted. Use `lookup_phenopacket` to retrieve full details.
62
+
63
+ Args:
64
+ query: The search query text
65
+
66
+ Returns:
67
+ List[Dict]: List of phenopackets matching the query
68
+ """
69
+ return await pt.search_phenopackets(ctx(), query)
70
+
71
+
72
+ @mcp.tool()
73
+ async def lookup_phenopacket(phenopacket_id: str) -> Dict:
74
+ """
75
+ Performs a lookup of an individual Phenopacket by its ID.
76
+
77
+ IDs are typically of the form PMID_nnn_PatientNumber, but this should not be assumed.
78
+
79
+ Args:
80
+ phenopacket_id: The ID of the Phenopacket to look up
81
+
82
+ Returns:
83
+ Dict: The phenopacket data
84
+ """
85
+ return await pt.lookup_phenopacket(ctx(), phenopacket_id)
86
+
87
+
88
+ @mcp.tool()
89
+ async def lookup_pmid(pmid: str) -> str:
90
+ """
91
+ Lookup the text of a PubMed article by its PMID.
92
+
93
+ A PMID should be of the form "PMID:nnnnnnn" (no underscores).
94
+
95
+ NOTE: Phenopacket IDs are typically of the form PMID_nnn_PatientNumber,
96
+ but this should not be assumed. To reliably get PMIDs for a phenopacket,
97
+ use `lookup_phenopacket` to retrieve and examine the `externalReferences` field.
98
+
99
+ Args:
100
+ pmid: The PubMed ID to look up
101
+
102
+ Returns:
103
+ str: Full text if available, otherwise abstract
104
+ """
105
+ return await pt.lookup_pmid(pmid)
106
+
107
+
108
+ @mcp.tool()
109
+ async def search_web(query: str) -> str:
110
+ """
111
+ Search the web using a text query.
112
+
113
+ Args:
114
+ query: The search query
115
+
116
+ Returns:
117
+ str: Search results with summaries
118
+ """
119
+ return await pt.search_web(query)
120
+
121
+
122
+ @mcp.tool()
123
+ async def retrieve_web_page(url: str) -> str:
124
+ """
125
+ Fetch the contents of a web page.
126
+
127
+ Args:
128
+ url: The URL to fetch
129
+
130
+ Returns:
131
+ str: The contents of the web page
132
+ """
133
+ return await pt.retrieve_web_page(url)
134
+
135
+
136
+ @mcp.tool()
137
+ async def inspect_file(data_file: str) -> str:
138
+ """
139
+ Inspect a file in the working directory.
140
+
141
+ Args:
142
+ data_file: name of file
143
+
144
+ Returns:
145
+ str: Contents of the file
146
+ """
147
+ return await fst.inspect_file(ctx(), data_file)
148
+
149
+
150
+ @mcp.tool()
151
+ async def list_files() -> str:
152
+ """
153
+ List files in the working directory.
154
+
155
+ Returns:
156
+ str: List of files in the working directory
157
+ """
158
+ return await fst.list_files(ctx())
159
+
160
+
161
+ @mcp.tool()
162
+ async def write_to_file(file_name: str, data: str) -> str:
163
+ """
164
+ Write data to a file in the working directory.
165
+
166
+ Args:
167
+ file_name: Name of the file to write
168
+ data: Data to write to the file
169
+
170
+ Returns:
171
+ str: Confirmation message
172
+ """
173
+ return await fst.write_to_file(ctx(), file_name, data)
174
+
175
+
176
+ if __name__ == "__main__":
177
+ # Initialize and run the server
178
+ mcp.run(transport='stdio')