aurelian 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (254) hide show
  1. aurelian/__init__.py +9 -0
  2. aurelian/agents/__init__.py +0 -0
  3. aurelian/agents/amigo/__init__.py +3 -0
  4. aurelian/agents/amigo/amigo_agent.py +77 -0
  5. aurelian/agents/amigo/amigo_config.py +85 -0
  6. aurelian/agents/amigo/amigo_evals.py +73 -0
  7. aurelian/agents/amigo/amigo_gradio.py +52 -0
  8. aurelian/agents/amigo/amigo_mcp.py +152 -0
  9. aurelian/agents/amigo/amigo_tools.py +152 -0
  10. aurelian/agents/biblio/__init__.py +42 -0
  11. aurelian/agents/biblio/biblio_agent.py +94 -0
  12. aurelian/agents/biblio/biblio_config.py +40 -0
  13. aurelian/agents/biblio/biblio_gradio.py +67 -0
  14. aurelian/agents/biblio/biblio_mcp.py +115 -0
  15. aurelian/agents/biblio/biblio_tools.py +164 -0
  16. aurelian/agents/biblio_agent.py +46 -0
  17. aurelian/agents/checklist/__init__.py +44 -0
  18. aurelian/agents/checklist/checklist_agent.py +85 -0
  19. aurelian/agents/checklist/checklist_config.py +28 -0
  20. aurelian/agents/checklist/checklist_gradio.py +70 -0
  21. aurelian/agents/checklist/checklist_mcp.py +86 -0
  22. aurelian/agents/checklist/checklist_tools.py +141 -0
  23. aurelian/agents/checklist/content/checklists.yaml +7 -0
  24. aurelian/agents/checklist/content/streams.csv +136 -0
  25. aurelian/agents/checklist_agent.py +40 -0
  26. aurelian/agents/chemistry/__init__.py +3 -0
  27. aurelian/agents/chemistry/chemistry_agent.py +46 -0
  28. aurelian/agents/chemistry/chemistry_config.py +71 -0
  29. aurelian/agents/chemistry/chemistry_evals.py +79 -0
  30. aurelian/agents/chemistry/chemistry_gradio.py +50 -0
  31. aurelian/agents/chemistry/chemistry_mcp.py +120 -0
  32. aurelian/agents/chemistry/chemistry_tools.py +121 -0
  33. aurelian/agents/chemistry/image_agent.py +15 -0
  34. aurelian/agents/d4d/__init__.py +30 -0
  35. aurelian/agents/d4d/d4d_agent.py +72 -0
  36. aurelian/agents/d4d/d4d_config.py +46 -0
  37. aurelian/agents/d4d/d4d_gradio.py +58 -0
  38. aurelian/agents/d4d/d4d_mcp.py +71 -0
  39. aurelian/agents/d4d/d4d_tools.py +157 -0
  40. aurelian/agents/d4d_agent.py +64 -0
  41. aurelian/agents/diagnosis/__init__.py +33 -0
  42. aurelian/agents/diagnosis/diagnosis_agent.py +53 -0
  43. aurelian/agents/diagnosis/diagnosis_config.py +48 -0
  44. aurelian/agents/diagnosis/diagnosis_evals.py +76 -0
  45. aurelian/agents/diagnosis/diagnosis_gradio.py +52 -0
  46. aurelian/agents/diagnosis/diagnosis_mcp.py +141 -0
  47. aurelian/agents/diagnosis/diagnosis_tools.py +204 -0
  48. aurelian/agents/diagnosis_agent.py +28 -0
  49. aurelian/agents/draw/__init__.py +3 -0
  50. aurelian/agents/draw/draw_agent.py +39 -0
  51. aurelian/agents/draw/draw_config.py +26 -0
  52. aurelian/agents/draw/draw_gradio.py +50 -0
  53. aurelian/agents/draw/draw_mcp.py +94 -0
  54. aurelian/agents/draw/draw_tools.py +100 -0
  55. aurelian/agents/draw/judge_agent.py +18 -0
  56. aurelian/agents/filesystem/__init__.py +0 -0
  57. aurelian/agents/filesystem/filesystem_config.py +27 -0
  58. aurelian/agents/filesystem/filesystem_gradio.py +49 -0
  59. aurelian/agents/filesystem/filesystem_mcp.py +89 -0
  60. aurelian/agents/filesystem/filesystem_tools.py +95 -0
  61. aurelian/agents/filesystem/py.typed +0 -0
  62. aurelian/agents/github/__init__.py +0 -0
  63. aurelian/agents/github/github_agent.py +83 -0
  64. aurelian/agents/github/github_cli.py +248 -0
  65. aurelian/agents/github/github_config.py +22 -0
  66. aurelian/agents/github/github_gradio.py +152 -0
  67. aurelian/agents/github/github_mcp.py +252 -0
  68. aurelian/agents/github/github_tools.py +408 -0
  69. aurelian/agents/github/github_tools.py.tmp +413 -0
  70. aurelian/agents/goann/__init__.py +13 -0
  71. aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines.md +1000 -0
  72. aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines.pdf +0 -0
  73. aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines_Paper.md +693 -0
  74. aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines_Paper.pdf +0 -0
  75. aurelian/agents/goann/goann_agent.py +90 -0
  76. aurelian/agents/goann/goann_config.py +90 -0
  77. aurelian/agents/goann/goann_evals.py +104 -0
  78. aurelian/agents/goann/goann_gradio.py +62 -0
  79. aurelian/agents/goann/goann_mcp.py +0 -0
  80. aurelian/agents/goann/goann_tools.py +65 -0
  81. aurelian/agents/gocam/__init__.py +43 -0
  82. aurelian/agents/gocam/documents/DNA-binding transcription factor activity annotation guidelines.docx +0 -0
  83. aurelian/agents/gocam/documents/DNA-binding transcription factor activity annotation guidelines.pdf +0 -0
  84. aurelian/agents/gocam/documents/DNA-binding_transcription_factor_activity_annotation_guidelines.md +100 -0
  85. aurelian/agents/gocam/documents/E3 ubiquitin ligases.docx +0 -0
  86. aurelian/agents/gocam/documents/E3 ubiquitin ligases.pdf +0 -0
  87. aurelian/agents/gocam/documents/E3_ubiquitin_ligases.md +134 -0
  88. aurelian/agents/gocam/documents/GO-CAM annotation guidelines README.docx +0 -0
  89. aurelian/agents/gocam/documents/GO-CAM annotation guidelines README.pdf +0 -0
  90. aurelian/agents/gocam/documents/GO-CAM modelling guidelines TO DO.docx +0 -0
  91. aurelian/agents/gocam/documents/GO-CAM modelling guidelines TO DO.pdf +0 -0
  92. aurelian/agents/gocam/documents/GO-CAM_annotation_guidelines_README.md +1 -0
  93. aurelian/agents/gocam/documents/GO-CAM_modelling_guidelines_TO_DO.md +3 -0
  94. aurelian/agents/gocam/documents/How to annotate complexes in GO-CAM.docx +0 -0
  95. aurelian/agents/gocam/documents/How to annotate complexes in GO-CAM.pdf +0 -0
  96. aurelian/agents/gocam/documents/How to annotate molecular adaptors.docx +0 -0
  97. aurelian/agents/gocam/documents/How to annotate molecular adaptors.pdf +0 -0
  98. aurelian/agents/gocam/documents/How to annotate sequestering proteins.docx +0 -0
  99. aurelian/agents/gocam/documents/How to annotate sequestering proteins.pdf +0 -0
  100. aurelian/agents/gocam/documents/How_to_annotate_complexes_in_GO-CAM.md +29 -0
  101. aurelian/agents/gocam/documents/How_to_annotate_molecular_adaptors.md +31 -0
  102. aurelian/agents/gocam/documents/How_to_annotate_sequestering_proteins.md +42 -0
  103. aurelian/agents/gocam/documents/Molecular adaptor activity.docx +0 -0
  104. aurelian/agents/gocam/documents/Molecular adaptor activity.pdf +0 -0
  105. aurelian/agents/gocam/documents/Molecular carrier activity.docx +0 -0
  106. aurelian/agents/gocam/documents/Molecular carrier activity.pdf +0 -0
  107. aurelian/agents/gocam/documents/Molecular_adaptor_activity.md +51 -0
  108. aurelian/agents/gocam/documents/Molecular_carrier_activity.md +41 -0
  109. aurelian/agents/gocam/documents/Protein sequestering activity.docx +0 -0
  110. aurelian/agents/gocam/documents/Protein sequestering activity.pdf +0 -0
  111. aurelian/agents/gocam/documents/Protein_sequestering_activity.md +50 -0
  112. aurelian/agents/gocam/documents/Signaling receptor activity annotation guidelines.docx +0 -0
  113. aurelian/agents/gocam/documents/Signaling receptor activity annotation guidelines.pdf +0 -0
  114. aurelian/agents/gocam/documents/Signaling_receptor_activity_annotation_guidelines.md +187 -0
  115. aurelian/agents/gocam/documents/Transcription coregulator activity.docx +0 -0
  116. aurelian/agents/gocam/documents/Transcription coregulator activity.pdf +0 -0
  117. aurelian/agents/gocam/documents/Transcription_coregulator_activity.md +36 -0
  118. aurelian/agents/gocam/documents/Transporter activity annotation annotation guidelines.docx +0 -0
  119. aurelian/agents/gocam/documents/Transporter activity annotation annotation guidelines.pdf +0 -0
  120. aurelian/agents/gocam/documents/Transporter_activity_annotation_annotation_guidelines.md +43 -0
  121. Regulatory Processes in GO-CAM.docx +0 -0
  122. Regulatory Processes in GO-CAM.pdf +0 -0
  123. aurelian/agents/gocam/documents/WIP_-_Regulation_and_Regulatory_Processes_in_GO-CAM.md +31 -0
  124. aurelian/agents/gocam/documents/md/DNA-binding_transcription_factor_activity_annotation_guidelines.md +131 -0
  125. aurelian/agents/gocam/documents/md/E3_ubiquitin_ligases.md +166 -0
  126. aurelian/agents/gocam/documents/md/GO-CAM_annotation_guidelines_README.md +1 -0
  127. aurelian/agents/gocam/documents/md/GO-CAM_modelling_guidelines_TO_DO.md +5 -0
  128. aurelian/agents/gocam/documents/md/How_to_annotate_complexes_in_GO-CAM.md +28 -0
  129. aurelian/agents/gocam/documents/md/How_to_annotate_molecular_adaptors.md +19 -0
  130. aurelian/agents/gocam/documents/md/How_to_annotate_sequestering_proteins.md +38 -0
  131. aurelian/agents/gocam/documents/md/Molecular_adaptor_activity.md +52 -0
  132. aurelian/agents/gocam/documents/md/Molecular_carrier_activity.md +59 -0
  133. aurelian/agents/gocam/documents/md/Protein_sequestering_activity.md +52 -0
  134. aurelian/agents/gocam/documents/md/Signaling_receptor_activity_annotation_guidelines.md +271 -0
  135. aurelian/agents/gocam/documents/md/Transcription_coregulator_activity.md +54 -0
  136. aurelian/agents/gocam/documents/md/Transporter_activity_annotation_annotation_guidelines.md +38 -0
  137. aurelian/agents/gocam/documents/md/WIP_-_Regulation_and_Regulatory_Processes_in_GO-CAM.md +39 -0
  138. aurelian/agents/gocam/documents/pandoc_md/Signaling_receptor_activity_annotation_guidelines.md +334 -0
  139. aurelian/agents/gocam/gocam_agent.py +240 -0
  140. aurelian/agents/gocam/gocam_config.py +85 -0
  141. aurelian/agents/gocam/gocam_curator_agent.py +46 -0
  142. aurelian/agents/gocam/gocam_evals.py +67 -0
  143. aurelian/agents/gocam/gocam_gradio.py +89 -0
  144. aurelian/agents/gocam/gocam_mcp.py +224 -0
  145. aurelian/agents/gocam/gocam_tools.py +294 -0
  146. aurelian/agents/linkml/__init__.py +0 -0
  147. aurelian/agents/linkml/linkml_agent.py +62 -0
  148. aurelian/agents/linkml/linkml_config.py +48 -0
  149. aurelian/agents/linkml/linkml_evals.py +66 -0
  150. aurelian/agents/linkml/linkml_gradio.py +45 -0
  151. aurelian/agents/linkml/linkml_mcp.py +186 -0
  152. aurelian/agents/linkml/linkml_tools.py +102 -0
  153. aurelian/agents/literature/__init__.py +3 -0
  154. aurelian/agents/literature/literature_agent.py +55 -0
  155. aurelian/agents/literature/literature_config.py +35 -0
  156. aurelian/agents/literature/literature_gradio.py +52 -0
  157. aurelian/agents/literature/literature_mcp.py +174 -0
  158. aurelian/agents/literature/literature_tools.py +182 -0
  159. aurelian/agents/monarch/__init__.py +25 -0
  160. aurelian/agents/monarch/monarch_agent.py +44 -0
  161. aurelian/agents/monarch/monarch_config.py +45 -0
  162. aurelian/agents/monarch/monarch_gradio.py +51 -0
  163. aurelian/agents/monarch/monarch_mcp.py +65 -0
  164. aurelian/agents/monarch/monarch_tools.py +113 -0
  165. aurelian/agents/oak/__init__.py +0 -0
  166. aurelian/agents/oak/oak_config.py +27 -0
  167. aurelian/agents/oak/oak_gradio.py +57 -0
  168. aurelian/agents/ontology_mapper/__init__.py +31 -0
  169. aurelian/agents/ontology_mapper/ontology_mapper_agent.py +56 -0
  170. aurelian/agents/ontology_mapper/ontology_mapper_config.py +50 -0
  171. aurelian/agents/ontology_mapper/ontology_mapper_evals.py +108 -0
  172. aurelian/agents/ontology_mapper/ontology_mapper_gradio.py +58 -0
  173. aurelian/agents/ontology_mapper/ontology_mapper_mcp.py +81 -0
  174. aurelian/agents/ontology_mapper/ontology_mapper_tools.py +147 -0
  175. aurelian/agents/phenopackets/__init__.py +3 -0
  176. aurelian/agents/phenopackets/phenopackets_agent.py +58 -0
  177. aurelian/agents/phenopackets/phenopackets_config.py +72 -0
  178. aurelian/agents/phenopackets/phenopackets_evals.py +99 -0
  179. aurelian/agents/phenopackets/phenopackets_gradio.py +55 -0
  180. aurelian/agents/phenopackets/phenopackets_mcp.py +178 -0
  181. aurelian/agents/phenopackets/phenopackets_tools.py +127 -0
  182. aurelian/agents/rag/__init__.py +40 -0
  183. aurelian/agents/rag/rag_agent.py +83 -0
  184. aurelian/agents/rag/rag_config.py +80 -0
  185. aurelian/agents/rag/rag_gradio.py +67 -0
  186. aurelian/agents/rag/rag_mcp.py +107 -0
  187. aurelian/agents/rag/rag_tools.py +189 -0
  188. aurelian/agents/rag_agent.py +54 -0
  189. aurelian/agents/robot/__init__.py +0 -0
  190. aurelian/agents/robot/assets/__init__.py +3 -0
  191. aurelian/agents/robot/assets/template.md +384 -0
  192. aurelian/agents/robot/robot_config.py +25 -0
  193. aurelian/agents/robot/robot_gradio.py +46 -0
  194. aurelian/agents/robot/robot_mcp.py +100 -0
  195. aurelian/agents/robot/robot_ontology_agent.py +139 -0
  196. aurelian/agents/robot/robot_tools.py +50 -0
  197. aurelian/agents/talisman/__init__.py +3 -0
  198. aurelian/agents/talisman/talisman_agent.py +126 -0
  199. aurelian/agents/talisman/talisman_config.py +66 -0
  200. aurelian/agents/talisman/talisman_gradio.py +50 -0
  201. aurelian/agents/talisman/talisman_mcp.py +168 -0
  202. aurelian/agents/talisman/talisman_tools.py +720 -0
  203. aurelian/agents/ubergraph/__init__.py +40 -0
  204. aurelian/agents/ubergraph/ubergraph_agent.py +71 -0
  205. aurelian/agents/ubergraph/ubergraph_config.py +79 -0
  206. aurelian/agents/ubergraph/ubergraph_gradio.py +48 -0
  207. aurelian/agents/ubergraph/ubergraph_mcp.py +69 -0
  208. aurelian/agents/ubergraph/ubergraph_tools.py +118 -0
  209. aurelian/agents/uniprot/__init__.py +37 -0
  210. aurelian/agents/uniprot/uniprot_agent.py +43 -0
  211. aurelian/agents/uniprot/uniprot_config.py +43 -0
  212. aurelian/agents/uniprot/uniprot_evals.py +99 -0
  213. aurelian/agents/uniprot/uniprot_gradio.py +48 -0
  214. aurelian/agents/uniprot/uniprot_mcp.py +168 -0
  215. aurelian/agents/uniprot/uniprot_tools.py +136 -0
  216. aurelian/agents/web/__init__.py +0 -0
  217. aurelian/agents/web/web_config.py +27 -0
  218. aurelian/agents/web/web_gradio.py +48 -0
  219. aurelian/agents/web/web_mcp.py +50 -0
  220. aurelian/agents/web/web_tools.py +108 -0
  221. aurelian/chat.py +23 -0
  222. aurelian/cli.py +800 -0
  223. aurelian/dependencies/__init__.py +0 -0
  224. aurelian/dependencies/workdir.py +78 -0
  225. aurelian/mcp/__init__.py +0 -0
  226. aurelian/mcp/amigo_mcp_test.py +86 -0
  227. aurelian/mcp/config_generator.py +123 -0
  228. aurelian/mcp/example_config.json +43 -0
  229. aurelian/mcp/generate_sample_config.py +37 -0
  230. aurelian/mcp/gocam_mcp_test.py +126 -0
  231. aurelian/mcp/linkml_mcp_tools.py +190 -0
  232. aurelian/mcp/mcp_discovery.py +87 -0
  233. aurelian/mcp/mcp_test.py +31 -0
  234. aurelian/mcp/phenopackets_mcp_test.py +103 -0
  235. aurelian/tools/__init__.py +0 -0
  236. aurelian/tools/web/__init__.py +0 -0
  237. aurelian/tools/web/url_download.py +51 -0
  238. aurelian/utils/__init__.py +0 -0
  239. aurelian/utils/async_utils.py +15 -0
  240. aurelian/utils/data_utils.py +32 -0
  241. aurelian/utils/documentation_manager.py +59 -0
  242. aurelian/utils/doi_fetcher.py +238 -0
  243. aurelian/utils/ontology_utils.py +68 -0
  244. aurelian/utils/pdf_fetcher.py +23 -0
  245. aurelian/utils/process_logs.py +100 -0
  246. aurelian/utils/pubmed_utils.py +238 -0
  247. aurelian/utils/pytest_report_to_markdown.py +67 -0
  248. aurelian/utils/robot_ontology_utils.py +112 -0
  249. aurelian/utils/search_utils.py +95 -0
  250. aurelian-0.3.2.dist-info/LICENSE +22 -0
  251. aurelian-0.3.2.dist-info/METADATA +105 -0
  252. aurelian-0.3.2.dist-info/RECORD +254 -0
  253. aurelian-0.3.2.dist-info/WHEEL +4 -0
  254. aurelian-0.3.2.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,40 @@
1
+ """
2
+ Ubergraph agent package for working with ontologies via the UberGraph endpoint.
3
+ """
4
+
5
+ from .ubergraph_agent import (
6
+ ubergraph_agent,
7
+ ASSUMPTIONS,
8
+ add_ontology_assumptions,
9
+ add_prefixes,
10
+ )
11
+ from .ubergraph_config import Dependencies, DEFAULT_PREFIXES, get_config
12
+ from .ubergraph_gradio import chat
13
+ from .ubergraph_tools import (
14
+ query_ubergraph,
15
+ QueryResults,
16
+ simplify_value,
17
+ simplify_results,
18
+ )
19
+
20
+ __all__ = [
21
+ # Agent
22
+ "ubergraph_agent",
23
+ "ASSUMPTIONS",
24
+ "add_ontology_assumptions",
25
+ "add_prefixes",
26
+
27
+ # Config
28
+ "Dependencies",
29
+ "DEFAULT_PREFIXES",
30
+ "get_config",
31
+
32
+ # Tools
33
+ "query_ubergraph",
34
+ "QueryResults",
35
+ "simplify_value",
36
+ "simplify_results",
37
+
38
+ # Gradio
39
+ "chat",
40
+ ]
@@ -0,0 +1,71 @@
1
+ """
2
+ Agent for working with ontologies via UberGraph endpoint.
3
+ """
4
+ from typing import Dict
5
+
6
+ from pydantic_ai import Agent, RunContext
7
+
8
+ from .ubergraph_config import Dependencies, get_config
9
+ from .ubergraph_tools import query_ubergraph
10
+
11
+ # Assumptions about the UberGraph data model
12
+ ASSUMPTIONS = {
13
+ "provenance": (
14
+ "When formulating your response to tool outputs,",
15
+ " you can extemporize with your own knowledge, but if you do so,"
16
+ " you must be clear about which statements come from the ontology"
17
+ " vs your own knowledge.",
18
+ ),
19
+ "ids": "include both IDs and labels in responses, unless directed not to do so.",
20
+ "obo": "Assume OBO style ontology and OBO PURLs (http://purl.obolibrary.org/obo/).",
21
+ "rg": (
22
+ "All edges are stored as simple triples, e.g CL:0000080 BFO:0000050 UBERON:0000179"
23
+ " for 'circulating cell' 'part of' 'haemolymphatic fluid'"
24
+ ),
25
+ "ont_graph": (
26
+ "Direct (asserted) edges are stored in the `renci:ontology` graph." "Use this by default, even for subClassOf."
27
+ ),
28
+ "entailed": (
29
+ "Indirect (entailed) edges (including reflexive) are stored in the `renci:redundant` graph"
30
+ "Use this for queries that require transitive closure, e.g. rdfs:subClassOf+"
31
+ "Note however that other triples like rdfs:label are NOT in this graph - use renci:ontology for these."
32
+ ),
33
+ "paths": "In general you should NOT use paths like rdfs:subClassOf+, use the entailed graph.",
34
+ "ro": "RO is used for predicates. Common relations include BFO:0000050 for part-of.",
35
+ "is_a": "rdfs:subClassOf is used for is_a relationships.",
36
+ "labels": "rdfs:label used for labels. IDs/URIs are typically OBO-style.",
37
+ "oboInOwl": "assume obiInOwl for synonyms, e.g. oboInOwl:hasExactSynonym.",
38
+ "blazegraph": (
39
+ "Blazegraph is used as the underlying triplestore."
40
+ "This means you SHOULD do relevance-ranked match queries over CONTAINS. "
41
+ "E.g. ?c rdfs:label ?v . ?v bds:search 'circulating cell' ; ?v bds:relevance ?score ."
42
+ ),
43
+ "def": "IAO:0000115 is used for definitions.",
44
+ "xref": "assume oboInOwl:hasDbXref for simple cross-references.",
45
+ "mixed_language": "Do not assume all labels are language tagged.",
46
+ }
47
+
48
+ # Create the UberGraph agent
49
+ ubergraph_agent = Agent(
50
+ "openai:gpt-4o",
51
+ deps_type=Dependencies,
52
+ result_type=str,
53
+ )
54
+
55
+ # Register tools
56
+ ubergraph_agent.tool(query_ubergraph)
57
+
58
+
59
+ @ubergraph_agent.system_prompt
60
+ def add_ontology_assumptions(ctx: RunContext[Dependencies]) -> str:
61
+ """Add ontology assumptions to the system prompt."""
62
+ return "\n\n" + "\n\n".join([f"Assumption: {desc}" for name, desc in ASSUMPTIONS.items()])
63
+
64
+
65
+ @ubergraph_agent.system_prompt
66
+ def add_prefixes(ctx: RunContext[Dependencies]) -> str:
67
+ """Add SPARQL prefixes to the system prompt."""
68
+ prefixes = ctx.deps.prefixes
69
+ return "\n\nAssume the following prefixes are auto-included:" + "\n".join(
70
+ [f"\nPrefix: {prefix}: {expansion}" for prefix, expansion in prefixes.items()]
71
+ )
@@ -0,0 +1,79 @@
1
+ """
2
+ Configuration for the Ubergraph agent.
3
+ """
4
+ from dataclasses import dataclass, field
5
+ import os
6
+ from typing import Dict, Optional
7
+
8
+ from aurelian.dependencies.workdir import HasWorkdir, WorkDir
9
+
10
+ # Default UberGraph endpoint
11
+ UBERGRAPH_ENDPOINT = "https://ubergraph.apps.renci.org/sparql"
12
+
13
+ # Default SPARQL prefixes
14
+ DEFAULT_PREFIXES = {
15
+ "owl": "http://www.w3.org/2002/07/owl#",
16
+ "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
17
+ "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
18
+ "schema": "http://schema.org/",
19
+ "obo": "http://purl.obolibrary.org/obo/",
20
+ "xsd": "http://www.w3.org/2001/XMLSchema#",
21
+ "renci": "http://reasoner.renci.org/",
22
+ "oboInOwl": "http://www.geneontology.org/formats/oboInOwl#",
23
+ "BFO": "http://purl.obolibrary.org/obo/BFO_",
24
+ "RO": "http://purl.obolibrary.org/obo/RO_",
25
+ "GO": "http://purl.obolibrary.org/obo/GO_",
26
+ "SO": "http://purl.obolibrary.org/obo/SO_",
27
+ "CHEBI": "http://purl.obolibrary.org/obo/CHEBI_",
28
+ "CL": "http://purl.obolibrary.org/obo/CL_",
29
+ "UBERON": "http://purl.obolibrary.org/obo/UBERON_",
30
+ "IAO": "http://purl.obolibrary.org/obo/IAO_",
31
+ "OBI": "http://purl.obolibrary.org/obo/OBI_",
32
+ "biolink": "https://w3id.org/biolink/vocab/",
33
+ "bds": "http://www.bigdata.com/rdf/search#",
34
+ }
35
+
36
+
37
+ @dataclass
38
+ class Dependencies(HasWorkdir):
39
+ """Configuration for the UberGraph agent."""
40
+
41
+ # SPARQL endpoint
42
+ endpoint: str = UBERGRAPH_ENDPOINT
43
+
44
+ # Prefixes for SPARQL queries
45
+ prefixes: Dict[str, str] = field(default_factory=lambda: DEFAULT_PREFIXES)
46
+
47
+ # Maximum number of results to return
48
+ max_results: int = 20
49
+
50
+ def __post_init__(self):
51
+ """Initialize the config with default values."""
52
+ # HasWorkdir doesn't have a __post_init__ method, so we don't call super()
53
+ if self.workdir is None:
54
+ self.workdir = WorkDir()
55
+
56
+
57
+ def get_config(
58
+ endpoint: Optional[str] = None,
59
+ prefixes: Optional[Dict[str, str]] = None,
60
+ max_results: Optional[int] = None,
61
+ ) -> Dependencies:
62
+ """Get the UberGraph configuration from environment variables or defaults."""
63
+ # Initialize from environment or defaults
64
+ config_endpoint = endpoint or os.environ.get("UBERGRAPH_ENDPOINT", UBERGRAPH_ENDPOINT)
65
+ config_max_results = max_results or int(os.environ.get("MAX_RESULTS", "20"))
66
+
67
+ # Get workdir from environment if specified
68
+ workdir_path = os.environ.get("AURELIAN_WORKDIR", None)
69
+ workdir = WorkDir(location=workdir_path) if workdir_path else None
70
+
71
+ # Create config with the specified values
72
+ config = Dependencies(
73
+ endpoint=config_endpoint,
74
+ prefixes=prefixes or DEFAULT_PREFIXES,
75
+ max_results=config_max_results,
76
+ workdir=workdir,
77
+ )
78
+
79
+ return config
@@ -0,0 +1,48 @@
1
+ """
2
+ Gradio interface for the UberGraph agent.
3
+ """
4
+ import os
5
+ from typing import List, Optional
6
+
7
+ import gradio as gr
8
+
9
+ from aurelian.utils.async_utils import run_sync
10
+ from .ubergraph_agent import ubergraph_agent
11
+ from .ubergraph_config import Dependencies, get_config
12
+
13
+
14
+ def chat(deps: Optional[Dependencies] = None, **kwargs):
15
+ """
16
+ Initialize a chat interface for the UberGraph agent.
17
+
18
+ Args:
19
+ deps: Optional dependencies configuration
20
+ **kwargs: Additional arguments to pass to the agent
21
+
22
+ Returns:
23
+ A Gradio chat interface
24
+ """
25
+ if deps is None:
26
+ deps = get_config()
27
+
28
+ def get_info(query: str, history: List[str]) -> str:
29
+ print(f"QUERY: {query}")
30
+ print(f"HISTORY: {history}")
31
+ if history:
32
+ query += "## History"
33
+ for h in history:
34
+ query += f"\n{h}"
35
+ result = run_sync(lambda: ubergraph_agent.run_sync(query, deps=deps, **kwargs))
36
+ return result.data
37
+
38
+ return gr.ChatInterface(
39
+ fn=get_info,
40
+ type="messages",
41
+ title="UberGraph SPARQL Assistant",
42
+ examples=[
43
+ "Find all cell types that are part of the heart",
44
+ "What is the definition of CL:0000746?",
45
+ "What genes are expressed in neurons?",
46
+ "What are the subclasses of skeletal muscle tissue?",
47
+ ]
48
+ )
@@ -0,0 +1,69 @@
1
+ """
2
+ MCP tools for working with ontologies via UberGraph endpoint.
3
+ """
4
+ import os
5
+ from typing import Dict, Optional
6
+
7
+ from mcp.server.fastmcp import FastMCP
8
+
9
+ import aurelian.agents.ubergraph.ubergraph_tools as ut
10
+ from aurelian.agents.ubergraph.ubergraph_config import Dependencies, get_config
11
+ from pydantic_ai import RunContext
12
+
13
+ # Initialize FastMCP server with combined system prompt
14
+ SYSTEM_PROMPT = """
15
+ You are an expert ontologist with access to the UberGraph SPARQL endpoint.
16
+
17
+ UberGraph is a knowledge graph built from multiple OBO ontologies, including GO, Uberon, CL, ChEBI, and more.
18
+ You can help users explore ontology terms, relationships, and hierarchies through SPARQL queries.
19
+
20
+ IMPORTANT ASSUMPTIONS:
21
+ - When formulating your response to tool outputs, you can extemporize with your own knowledge,
22
+ but if you do so, you must be clear about which statements come from the ontology vs your own knowledge.
23
+ - Include both IDs and labels in responses, unless directed not to do so.
24
+ - Assume OBO style ontology and OBO PURLs (http://purl.obolibrary.org/obo/).
25
+ - All edges are stored as simple triples, e.g CL:0000080 BFO:0000050 UBERON:0000179 for 'circulating cell'
26
+ 'part of' 'haemolymphatic fluid'
27
+ - Direct (asserted) edges are stored in the 'renci:ontology' graph. Use this by default, even for subClassOf.
28
+ - Indirect (entailed) edges (including reflexive) are stored in the 'renci:redundant' graph. Use this for
29
+ queries that require transitive closure, e.g. rdfs:subClassOf+
30
+ """
31
+
32
+ mcp = FastMCP("ubergraph", instructions=SYSTEM_PROMPT)
33
+
34
+
35
+ from aurelian.dependencies.workdir import WorkDir
36
+
37
+ def deps() -> Dependencies:
38
+ deps = get_config()
39
+ # Set the location from environment variable or default
40
+ loc = os.getenv("AURELIAN_WORKDIR", "/tmp/aurelian")
41
+ deps.workdir = WorkDir(loc)
42
+ return deps
43
+
44
+ def ctx() -> RunContext[Dependencies]:
45
+ rc: RunContext[Dependencies] = RunContext[Dependencies](
46
+ deps=deps(),
47
+ model=None, usage=None, prompt=None,
48
+ )
49
+ return rc
50
+
51
+
52
+ @mcp.tool()
53
+ async def query_ubergraph(query: str, format: Optional[str] = "text") -> Dict:
54
+ """
55
+ Execute a SPARQL query against the UberGraph endpoint.
56
+
57
+ Args:
58
+ query: The SPARQL query to execute
59
+ format: Output format (text or json)
60
+
61
+ Returns:
62
+ The query results
63
+ """
64
+ return await ut.query_ubergraph(ctx(), query, format)
65
+
66
+
67
+ if __name__ == "__main__":
68
+ # Initialize and run the server
69
+ mcp.run(transport='stdio')
@@ -0,0 +1,118 @@
1
+ """
2
+ Tools for interacting with the UberGraph SPARQL endpoint.
3
+ """
4
+ import asyncio
5
+ from typing import Any, Dict, List, Optional
6
+
7
+ from pydantic import BaseModel
8
+ from pydantic_ai import RunContext, ModelRetry
9
+ from SPARQLWrapper import JSON, SPARQLWrapper
10
+
11
+ from .ubergraph_config import Dependencies, get_config
12
+
13
+
14
+ class QueryResults(BaseModel):
15
+ """Results of a SPARQL query."""
16
+ results: List[Dict] = []
17
+
18
+
19
+ def simplify_value(v: Dict, prefixes=None) -> Any:
20
+ """
21
+ Simplify a SPARQL query result value.
22
+
23
+ Args:
24
+ v: The value to simplify
25
+ prefixes: Optional mapping of prefixes to expansions
26
+
27
+ Returns:
28
+ The simplified value
29
+ """
30
+ if prefixes and v["type"] == "uri":
31
+ for prefix, expansion in prefixes.items():
32
+ if v["value"].startswith(expansion):
33
+ return f"{prefix}:{v['value'][len(expansion):]}"
34
+ return v["value"]
35
+
36
+
37
+ def simplify_results(results: Dict, prefixes=None, limit=20) -> List[Dict]:
38
+ """
39
+ Simplify SPARQL query results.
40
+
41
+ Args:
42
+ results: The query results to simplify
43
+ prefixes: Optional mapping of prefixes to expansions
44
+ limit: Maximum number of results to return
45
+
46
+ Returns:
47
+ A list of simplified results
48
+ """
49
+ rows = []
50
+ n = 0
51
+ for r in results["results"]["bindings"]:
52
+ n += 1
53
+ if n > limit:
54
+ break
55
+ row = {}
56
+ for k, v in r.items():
57
+ row[k] = simplify_value(v, prefixes)
58
+ rows.append(row)
59
+ return rows
60
+
61
+
62
+ async def query_ubergraph(ctx: RunContext[Dependencies], query: str) -> QueryResults:
63
+ """
64
+ Performs a SPARQL query over Ubergraph then returns the results as triples.
65
+
66
+ Ubergraph is a triplestore that contains many OBO ontologies and precomputed
67
+ relation graph edges.
68
+
69
+ Args:
70
+ ctx: The run context
71
+ query: The SPARQL query to execute
72
+
73
+ Returns:
74
+ The query results
75
+ """
76
+ config = ctx.deps or get_config()
77
+ prefixes = config.prefixes
78
+ endpoint = config.endpoint
79
+
80
+ # Add prefixes to query
81
+ prefixed_query = ""
82
+ for k, v in prefixes.items():
83
+ prefixed_query += f"PREFIX {k}: <{v}>\n"
84
+ prefixed_query += query
85
+
86
+ print("## Query")
87
+ print(prefixed_query)
88
+ print("##")
89
+
90
+ try:
91
+ # Create SPARQL wrapper
92
+ sw = SPARQLWrapper(endpoint)
93
+ sw.setQuery(prefixed_query)
94
+ sw.setReturnFormat(JSON)
95
+
96
+ # Execute the query in a thread pool
97
+ ret = await asyncio.to_thread(sw.queryAndConvert)
98
+
99
+ # Process the results
100
+ results = simplify_results(ret, prefixes, limit=config.max_results)
101
+ print("num results=", len(results))
102
+ print("results=", results)
103
+
104
+ if not results:
105
+ raise ModelRetry(f"No results found for SPARQL query. Try refining your query.")
106
+
107
+ return QueryResults(results=results)
108
+ except Exception as e:
109
+ if "ModelRetry" in str(type(e)):
110
+ raise e
111
+
112
+ # Handle specific SPARQL errors
113
+ if "syntax error" in str(e).lower():
114
+ raise ModelRetry(f"SPARQL syntax error: {str(e)}")
115
+ elif "time" in str(e).lower() and "out" in str(e).lower():
116
+ raise ModelRetry("Query timed out. Try to simplify your query or reduce its scope.")
117
+ else:
118
+ raise ModelRetry(f"Error executing SPARQL query: {str(e)}")
@@ -0,0 +1,37 @@
1
+ """
2
+ UniProt agent package for interacting with the UniProt database.
3
+ """
4
+
5
+ from .uniprot_agent import uniprot_agent, UNIPROT_SYSTEM_PROMPT
6
+ from .uniprot_config import UniprotConfig, get_config
7
+ from .uniprot_gradio import chat
8
+ from .uniprot_mcp import (
9
+ get_uniprot_mcp_tools,
10
+ get_uniprot_mcp_messages,
11
+ handle_uniprot_mcp_request,
12
+ )
13
+ from .uniprot_tools import lookup_uniprot_entry, search, uniprot_mapping, normalize_uniprot_id
14
+
15
+ __all__ = [
16
+ # Agent
17
+ "uniprot_agent",
18
+ "UNIPROT_SYSTEM_PROMPT",
19
+
20
+ # Config
21
+ "UniprotConfig",
22
+ "get_config",
23
+
24
+ # Tools
25
+ "lookup_uniprot_entry",
26
+ "search",
27
+ "uniprot_mapping",
28
+ "normalize_uniprot_id",
29
+
30
+ # Gradio
31
+ "chat",
32
+
33
+ # MCP
34
+ "get_uniprot_mcp_tools",
35
+ "get_uniprot_mcp_messages",
36
+ "handle_uniprot_mcp_request",
37
+ ]
@@ -0,0 +1,43 @@
1
+ """
2
+ Agent for working with the UniProt database and API.
3
+ """
4
+ from pydantic_ai import Agent
5
+
6
+ from .uniprot_config import UniprotConfig, get_config
7
+ from .uniprot_tools import lookup_uniprot_entry, search, uniprot_mapping
8
+
9
+ # System prompt for the UniProt agent
10
+ UNIPROT_SYSTEM_PROMPT = """
11
+ You are a helpful assistant that specializes in accessing and interpreting information from the UniProt database.
12
+ UniProt is a comprehensive, high-quality resource of protein sequence and functional information.
13
+
14
+ You can:
15
+ - Search UniProt with queries
16
+ - Look up detailed information about specific proteins using UniProt accession numbers
17
+ - Map UniProt accessions to entries in other databases
18
+
19
+ When using protein IDs:
20
+ - UniProt accession numbers (e.g., P12345) are stable identifiers for protein entries
21
+ - Some proteins may be referenced by their entry name (e.g., ALBU_HUMAN)
22
+ - UniProt IDs may sometimes include version numbers (e.g., P12345.2) which can be normalized
23
+
24
+ When returning information about proteins, present it in a clear, organized manner with:
25
+ - Key protein attributes like name, gene, organism, and length
26
+ - Functional information including catalytic activity and pathways
27
+ - Structural information if available
28
+ - Disease associations if relevant
29
+
30
+ For search results, summarize the key findings and highlight the most relevant matches.
31
+ """
32
+
33
+ # Create the agent with the system prompt
34
+ uniprot_agent = Agent(
35
+ model="openai:gpt-4o",
36
+ system_prompt=UNIPROT_SYSTEM_PROMPT,
37
+ deps_type=UniprotConfig,
38
+ )
39
+
40
+ # Register the tools with the agent
41
+ uniprot_agent.tool(search)
42
+ uniprot_agent.tool(lookup_uniprot_entry)
43
+ uniprot_agent.tool(uniprot_mapping)
@@ -0,0 +1,43 @@
1
+ """
2
+ Configuration for the UniProt agent.
3
+ """
4
+ from dataclasses import dataclass, field
5
+ import os
6
+ from typing import Any, Dict, Optional
7
+
8
+ from bioservices import UniProt
9
+
10
+ from aurelian.dependencies.workdir import HasWorkdir, WorkDir
11
+
12
+
13
+ @dataclass
14
+ class UniprotConfig(HasWorkdir):
15
+ """Configuration for the UniProt agent."""
16
+
17
+ # Options for the bioservices UniProt client
18
+ uniprot_client_options: Dict[str, Any] = field(default_factory=dict)
19
+
20
+ def __post_init__(self):
21
+ """Initialize the config with default values."""
22
+ # HasWorkdir doesn't have a __post_init__ method, so we don't call super()
23
+ if self.uniprot_client_options is None or len(self.uniprot_client_options) == 0:
24
+ self.uniprot_client_options = {"verbose": False}
25
+
26
+ # Initialize the workdir if not already set
27
+ if self.workdir is None:
28
+ self.workdir = WorkDir()
29
+
30
+ def get_uniprot_client(self) -> UniProt:
31
+ """Get a configured UniProt client."""
32
+ return UniProt(**self.uniprot_client_options)
33
+
34
+
35
+ def get_config() -> UniprotConfig:
36
+ """Get the UniProt configuration from environment variables or defaults."""
37
+ workdir_path = os.environ.get("AURELIAN_WORKDIR", None)
38
+ workdir = WorkDir(location=workdir_path) if workdir_path else None
39
+
40
+ return UniprotConfig(
41
+ workdir=workdir,
42
+ uniprot_client_options={"verbose": False}
43
+ )
@@ -0,0 +1,99 @@
1
+ """
2
+ Evaluation module for the UniProt agent.
3
+
4
+ This module implements evaluations for the UniProt agent using the pydantic-ai-evals framework.
5
+ """
6
+ import asyncio
7
+ import sys
8
+ from typing import Optional, Any, Dict, Callable, Awaitable
9
+
10
+ from aurelian.evaluators.model import MetadataDict, metadata
11
+ from aurelian.evaluators.substring_evaluator import SubstringEvaluator
12
+ from pydantic_evals import Case, Dataset
13
+ from pydantic_evals.evaluators import LLMJudge
14
+
15
+ from aurelian.agents.uniprot.uniprot_agent import uniprot_agent
16
+ from aurelian.agents.uniprot.uniprot_config import UniprotConfig
17
+
18
+ class UniprotMetadata(Dict[str, Any]):
19
+ """Simple metadata dictionary for UniProt evaluations."""
20
+ pass
21
+
22
+ # Define individual evaluation cases
23
+ case1 = Case(
24
+ name="human_insulin",
25
+ inputs="Search for human insulin protein",
26
+ expected_output="P01308", # Human insulin UniProt ID
27
+ metadata=metadata("easy", "protein_search")
28
+ )
29
+
30
+ case2 = Case(
31
+ name="uniprot_entry_lookup",
32
+ inputs="Look up UniProt entry P01308",
33
+ expected_output="insulin", # Should identify this as insulin
34
+ metadata=metadata("easy", "id_lookup"),
35
+ evaluators=[
36
+ LLMJudge(
37
+ rubric="""
38
+ Answer should:
39
+ 1. Correctly identify P01308 as human insulin
40
+ 2. Include key information about the protein's function
41
+ 3. Mention its role in glucose homeostasis
42
+ 4. Provide information about protein structure
43
+ """,
44
+ include_input=True
45
+ )
46
+ ]
47
+ )
48
+
49
+ case3 = Case(
50
+ name="id_mapping",
51
+ inputs="Map UniProt IDs P01308,P01009 to PDB database",
52
+ expected_output="PDB", # Should return PDB IDs
53
+ metadata=metadata("medium", "database_mapping")
54
+ )
55
+
56
+ case4 = Case(
57
+ name="domain_identification",
58
+ inputs="What domains are present in UniProt entry P53_HUMAN?",
59
+ expected_output="domain", # Should discuss protein domains
60
+ metadata=metadata("medium", "protein_domain_analysis")
61
+ )
62
+
63
+ case5 = Case(
64
+ name="disease_association",
65
+ inputs="Find all proteins related to Alzheimer's disease",
66
+ expected_output="amyloid", # Should mention amyloid proteins
67
+ metadata=metadata("hard", "disease_association_query")
68
+ )
69
+
70
+ def create_eval_dataset() -> Dataset[str, str, MetadataDict]:
71
+ """
72
+ Create a dataset for evaluating the UniProt agent.
73
+
74
+ Returns:
75
+ Dataset of UniProt evaluation cases with appropriate evaluators
76
+ """
77
+ # Collect all cases
78
+ cases = [case1, case2, case3, case4, case5]
79
+
80
+ # Dataset-level evaluators
81
+ evaluators = [
82
+ SubstringEvaluator(),
83
+ LLMJudge(
84
+ rubric="""
85
+ Evaluate the answer based on:
86
+ 1. Accuracy of protein information provided
87
+ 2. Correct identification of UniProt IDs and cross-references
88
+ 3. Comprehensive coverage of protein structure and function
89
+ 4. Proper description of protein domains and modifications
90
+ 5. Accurate representation of protein-disease associations
91
+ """,
92
+ model="anthropic:claude-3-7-sonnet-latest"
93
+ )
94
+ ]
95
+
96
+ return Dataset(
97
+ cases=cases,
98
+ evaluators=evaluators
99
+ )