aurelian 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (254) hide show
  1. aurelian/__init__.py +9 -0
  2. aurelian/agents/__init__.py +0 -0
  3. aurelian/agents/amigo/__init__.py +3 -0
  4. aurelian/agents/amigo/amigo_agent.py +77 -0
  5. aurelian/agents/amigo/amigo_config.py +85 -0
  6. aurelian/agents/amigo/amigo_evals.py +73 -0
  7. aurelian/agents/amigo/amigo_gradio.py +52 -0
  8. aurelian/agents/amigo/amigo_mcp.py +152 -0
  9. aurelian/agents/amigo/amigo_tools.py +152 -0
  10. aurelian/agents/biblio/__init__.py +42 -0
  11. aurelian/agents/biblio/biblio_agent.py +94 -0
  12. aurelian/agents/biblio/biblio_config.py +40 -0
  13. aurelian/agents/biblio/biblio_gradio.py +67 -0
  14. aurelian/agents/biblio/biblio_mcp.py +115 -0
  15. aurelian/agents/biblio/biblio_tools.py +164 -0
  16. aurelian/agents/biblio_agent.py +46 -0
  17. aurelian/agents/checklist/__init__.py +44 -0
  18. aurelian/agents/checklist/checklist_agent.py +85 -0
  19. aurelian/agents/checklist/checklist_config.py +28 -0
  20. aurelian/agents/checklist/checklist_gradio.py +70 -0
  21. aurelian/agents/checklist/checklist_mcp.py +86 -0
  22. aurelian/agents/checklist/checklist_tools.py +141 -0
  23. aurelian/agents/checklist/content/checklists.yaml +7 -0
  24. aurelian/agents/checklist/content/streams.csv +136 -0
  25. aurelian/agents/checklist_agent.py +40 -0
  26. aurelian/agents/chemistry/__init__.py +3 -0
  27. aurelian/agents/chemistry/chemistry_agent.py +46 -0
  28. aurelian/agents/chemistry/chemistry_config.py +71 -0
  29. aurelian/agents/chemistry/chemistry_evals.py +79 -0
  30. aurelian/agents/chemistry/chemistry_gradio.py +50 -0
  31. aurelian/agents/chemistry/chemistry_mcp.py +120 -0
  32. aurelian/agents/chemistry/chemistry_tools.py +121 -0
  33. aurelian/agents/chemistry/image_agent.py +15 -0
  34. aurelian/agents/d4d/__init__.py +30 -0
  35. aurelian/agents/d4d/d4d_agent.py +72 -0
  36. aurelian/agents/d4d/d4d_config.py +46 -0
  37. aurelian/agents/d4d/d4d_gradio.py +58 -0
  38. aurelian/agents/d4d/d4d_mcp.py +71 -0
  39. aurelian/agents/d4d/d4d_tools.py +157 -0
  40. aurelian/agents/d4d_agent.py +64 -0
  41. aurelian/agents/diagnosis/__init__.py +33 -0
  42. aurelian/agents/diagnosis/diagnosis_agent.py +53 -0
  43. aurelian/agents/diagnosis/diagnosis_config.py +48 -0
  44. aurelian/agents/diagnosis/diagnosis_evals.py +76 -0
  45. aurelian/agents/diagnosis/diagnosis_gradio.py +52 -0
  46. aurelian/agents/diagnosis/diagnosis_mcp.py +141 -0
  47. aurelian/agents/diagnosis/diagnosis_tools.py +204 -0
  48. aurelian/agents/diagnosis_agent.py +28 -0
  49. aurelian/agents/draw/__init__.py +3 -0
  50. aurelian/agents/draw/draw_agent.py +39 -0
  51. aurelian/agents/draw/draw_config.py +26 -0
  52. aurelian/agents/draw/draw_gradio.py +50 -0
  53. aurelian/agents/draw/draw_mcp.py +94 -0
  54. aurelian/agents/draw/draw_tools.py +100 -0
  55. aurelian/agents/draw/judge_agent.py +18 -0
  56. aurelian/agents/filesystem/__init__.py +0 -0
  57. aurelian/agents/filesystem/filesystem_config.py +27 -0
  58. aurelian/agents/filesystem/filesystem_gradio.py +49 -0
  59. aurelian/agents/filesystem/filesystem_mcp.py +89 -0
  60. aurelian/agents/filesystem/filesystem_tools.py +95 -0
  61. aurelian/agents/filesystem/py.typed +0 -0
  62. aurelian/agents/github/__init__.py +0 -0
  63. aurelian/agents/github/github_agent.py +83 -0
  64. aurelian/agents/github/github_cli.py +248 -0
  65. aurelian/agents/github/github_config.py +22 -0
  66. aurelian/agents/github/github_gradio.py +152 -0
  67. aurelian/agents/github/github_mcp.py +252 -0
  68. aurelian/agents/github/github_tools.py +408 -0
  69. aurelian/agents/github/github_tools.py.tmp +413 -0
  70. aurelian/agents/goann/__init__.py +13 -0
  71. aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines.md +1000 -0
  72. aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines.pdf +0 -0
  73. aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines_Paper.md +693 -0
  74. aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines_Paper.pdf +0 -0
  75. aurelian/agents/goann/goann_agent.py +90 -0
  76. aurelian/agents/goann/goann_config.py +90 -0
  77. aurelian/agents/goann/goann_evals.py +104 -0
  78. aurelian/agents/goann/goann_gradio.py +62 -0
  79. aurelian/agents/goann/goann_mcp.py +0 -0
  80. aurelian/agents/goann/goann_tools.py +65 -0
  81. aurelian/agents/gocam/__init__.py +43 -0
  82. aurelian/agents/gocam/documents/DNA-binding transcription factor activity annotation guidelines.docx +0 -0
  83. aurelian/agents/gocam/documents/DNA-binding transcription factor activity annotation guidelines.pdf +0 -0
  84. aurelian/agents/gocam/documents/DNA-binding_transcription_factor_activity_annotation_guidelines.md +100 -0
  85. aurelian/agents/gocam/documents/E3 ubiquitin ligases.docx +0 -0
  86. aurelian/agents/gocam/documents/E3 ubiquitin ligases.pdf +0 -0
  87. aurelian/agents/gocam/documents/E3_ubiquitin_ligases.md +134 -0
  88. aurelian/agents/gocam/documents/GO-CAM annotation guidelines README.docx +0 -0
  89. aurelian/agents/gocam/documents/GO-CAM annotation guidelines README.pdf +0 -0
  90. aurelian/agents/gocam/documents/GO-CAM modelling guidelines TO DO.docx +0 -0
  91. aurelian/agents/gocam/documents/GO-CAM modelling guidelines TO DO.pdf +0 -0
  92. aurelian/agents/gocam/documents/GO-CAM_annotation_guidelines_README.md +1 -0
  93. aurelian/agents/gocam/documents/GO-CAM_modelling_guidelines_TO_DO.md +3 -0
  94. aurelian/agents/gocam/documents/How to annotate complexes in GO-CAM.docx +0 -0
  95. aurelian/agents/gocam/documents/How to annotate complexes in GO-CAM.pdf +0 -0
  96. aurelian/agents/gocam/documents/How to annotate molecular adaptors.docx +0 -0
  97. aurelian/agents/gocam/documents/How to annotate molecular adaptors.pdf +0 -0
  98. aurelian/agents/gocam/documents/How to annotate sequestering proteins.docx +0 -0
  99. aurelian/agents/gocam/documents/How to annotate sequestering proteins.pdf +0 -0
  100. aurelian/agents/gocam/documents/How_to_annotate_complexes_in_GO-CAM.md +29 -0
  101. aurelian/agents/gocam/documents/How_to_annotate_molecular_adaptors.md +31 -0
  102. aurelian/agents/gocam/documents/How_to_annotate_sequestering_proteins.md +42 -0
  103. aurelian/agents/gocam/documents/Molecular adaptor activity.docx +0 -0
  104. aurelian/agents/gocam/documents/Molecular adaptor activity.pdf +0 -0
  105. aurelian/agents/gocam/documents/Molecular carrier activity.docx +0 -0
  106. aurelian/agents/gocam/documents/Molecular carrier activity.pdf +0 -0
  107. aurelian/agents/gocam/documents/Molecular_adaptor_activity.md +51 -0
  108. aurelian/agents/gocam/documents/Molecular_carrier_activity.md +41 -0
  109. aurelian/agents/gocam/documents/Protein sequestering activity.docx +0 -0
  110. aurelian/agents/gocam/documents/Protein sequestering activity.pdf +0 -0
  111. aurelian/agents/gocam/documents/Protein_sequestering_activity.md +50 -0
  112. aurelian/agents/gocam/documents/Signaling receptor activity annotation guidelines.docx +0 -0
  113. aurelian/agents/gocam/documents/Signaling receptor activity annotation guidelines.pdf +0 -0
  114. aurelian/agents/gocam/documents/Signaling_receptor_activity_annotation_guidelines.md +187 -0
  115. aurelian/agents/gocam/documents/Transcription coregulator activity.docx +0 -0
  116. aurelian/agents/gocam/documents/Transcription coregulator activity.pdf +0 -0
  117. aurelian/agents/gocam/documents/Transcription_coregulator_activity.md +36 -0
  118. aurelian/agents/gocam/documents/Transporter activity annotation annotation guidelines.docx +0 -0
  119. aurelian/agents/gocam/documents/Transporter activity annotation annotation guidelines.pdf +0 -0
  120. aurelian/agents/gocam/documents/Transporter_activity_annotation_annotation_guidelines.md +43 -0
  121. Regulatory Processes in GO-CAM.docx +0 -0
  122. Regulatory Processes in GO-CAM.pdf +0 -0
  123. aurelian/agents/gocam/documents/WIP_-_Regulation_and_Regulatory_Processes_in_GO-CAM.md +31 -0
  124. aurelian/agents/gocam/documents/md/DNA-binding_transcription_factor_activity_annotation_guidelines.md +131 -0
  125. aurelian/agents/gocam/documents/md/E3_ubiquitin_ligases.md +166 -0
  126. aurelian/agents/gocam/documents/md/GO-CAM_annotation_guidelines_README.md +1 -0
  127. aurelian/agents/gocam/documents/md/GO-CAM_modelling_guidelines_TO_DO.md +5 -0
  128. aurelian/agents/gocam/documents/md/How_to_annotate_complexes_in_GO-CAM.md +28 -0
  129. aurelian/agents/gocam/documents/md/How_to_annotate_molecular_adaptors.md +19 -0
  130. aurelian/agents/gocam/documents/md/How_to_annotate_sequestering_proteins.md +38 -0
  131. aurelian/agents/gocam/documents/md/Molecular_adaptor_activity.md +52 -0
  132. aurelian/agents/gocam/documents/md/Molecular_carrier_activity.md +59 -0
  133. aurelian/agents/gocam/documents/md/Protein_sequestering_activity.md +52 -0
  134. aurelian/agents/gocam/documents/md/Signaling_receptor_activity_annotation_guidelines.md +271 -0
  135. aurelian/agents/gocam/documents/md/Transcription_coregulator_activity.md +54 -0
  136. aurelian/agents/gocam/documents/md/Transporter_activity_annotation_annotation_guidelines.md +38 -0
  137. aurelian/agents/gocam/documents/md/WIP_-_Regulation_and_Regulatory_Processes_in_GO-CAM.md +39 -0
  138. aurelian/agents/gocam/documents/pandoc_md/Signaling_receptor_activity_annotation_guidelines.md +334 -0
  139. aurelian/agents/gocam/gocam_agent.py +240 -0
  140. aurelian/agents/gocam/gocam_config.py +85 -0
  141. aurelian/agents/gocam/gocam_curator_agent.py +46 -0
  142. aurelian/agents/gocam/gocam_evals.py +67 -0
  143. aurelian/agents/gocam/gocam_gradio.py +89 -0
  144. aurelian/agents/gocam/gocam_mcp.py +224 -0
  145. aurelian/agents/gocam/gocam_tools.py +294 -0
  146. aurelian/agents/linkml/__init__.py +0 -0
  147. aurelian/agents/linkml/linkml_agent.py +62 -0
  148. aurelian/agents/linkml/linkml_config.py +48 -0
  149. aurelian/agents/linkml/linkml_evals.py +66 -0
  150. aurelian/agents/linkml/linkml_gradio.py +45 -0
  151. aurelian/agents/linkml/linkml_mcp.py +186 -0
  152. aurelian/agents/linkml/linkml_tools.py +102 -0
  153. aurelian/agents/literature/__init__.py +3 -0
  154. aurelian/agents/literature/literature_agent.py +55 -0
  155. aurelian/agents/literature/literature_config.py +35 -0
  156. aurelian/agents/literature/literature_gradio.py +52 -0
  157. aurelian/agents/literature/literature_mcp.py +174 -0
  158. aurelian/agents/literature/literature_tools.py +182 -0
  159. aurelian/agents/monarch/__init__.py +25 -0
  160. aurelian/agents/monarch/monarch_agent.py +44 -0
  161. aurelian/agents/monarch/monarch_config.py +45 -0
  162. aurelian/agents/monarch/monarch_gradio.py +51 -0
  163. aurelian/agents/monarch/monarch_mcp.py +65 -0
  164. aurelian/agents/monarch/monarch_tools.py +113 -0
  165. aurelian/agents/oak/__init__.py +0 -0
  166. aurelian/agents/oak/oak_config.py +27 -0
  167. aurelian/agents/oak/oak_gradio.py +57 -0
  168. aurelian/agents/ontology_mapper/__init__.py +31 -0
  169. aurelian/agents/ontology_mapper/ontology_mapper_agent.py +56 -0
  170. aurelian/agents/ontology_mapper/ontology_mapper_config.py +50 -0
  171. aurelian/agents/ontology_mapper/ontology_mapper_evals.py +108 -0
  172. aurelian/agents/ontology_mapper/ontology_mapper_gradio.py +58 -0
  173. aurelian/agents/ontology_mapper/ontology_mapper_mcp.py +81 -0
  174. aurelian/agents/ontology_mapper/ontology_mapper_tools.py +147 -0
  175. aurelian/agents/phenopackets/__init__.py +3 -0
  176. aurelian/agents/phenopackets/phenopackets_agent.py +58 -0
  177. aurelian/agents/phenopackets/phenopackets_config.py +72 -0
  178. aurelian/agents/phenopackets/phenopackets_evals.py +99 -0
  179. aurelian/agents/phenopackets/phenopackets_gradio.py +55 -0
  180. aurelian/agents/phenopackets/phenopackets_mcp.py +178 -0
  181. aurelian/agents/phenopackets/phenopackets_tools.py +127 -0
  182. aurelian/agents/rag/__init__.py +40 -0
  183. aurelian/agents/rag/rag_agent.py +83 -0
  184. aurelian/agents/rag/rag_config.py +80 -0
  185. aurelian/agents/rag/rag_gradio.py +67 -0
  186. aurelian/agents/rag/rag_mcp.py +107 -0
  187. aurelian/agents/rag/rag_tools.py +189 -0
  188. aurelian/agents/rag_agent.py +54 -0
  189. aurelian/agents/robot/__init__.py +0 -0
  190. aurelian/agents/robot/assets/__init__.py +3 -0
  191. aurelian/agents/robot/assets/template.md +384 -0
  192. aurelian/agents/robot/robot_config.py +25 -0
  193. aurelian/agents/robot/robot_gradio.py +46 -0
  194. aurelian/agents/robot/robot_mcp.py +100 -0
  195. aurelian/agents/robot/robot_ontology_agent.py +139 -0
  196. aurelian/agents/robot/robot_tools.py +50 -0
  197. aurelian/agents/talisman/__init__.py +3 -0
  198. aurelian/agents/talisman/talisman_agent.py +126 -0
  199. aurelian/agents/talisman/talisman_config.py +66 -0
  200. aurelian/agents/talisman/talisman_gradio.py +50 -0
  201. aurelian/agents/talisman/talisman_mcp.py +168 -0
  202. aurelian/agents/talisman/talisman_tools.py +720 -0
  203. aurelian/agents/ubergraph/__init__.py +40 -0
  204. aurelian/agents/ubergraph/ubergraph_agent.py +71 -0
  205. aurelian/agents/ubergraph/ubergraph_config.py +79 -0
  206. aurelian/agents/ubergraph/ubergraph_gradio.py +48 -0
  207. aurelian/agents/ubergraph/ubergraph_mcp.py +69 -0
  208. aurelian/agents/ubergraph/ubergraph_tools.py +118 -0
  209. aurelian/agents/uniprot/__init__.py +37 -0
  210. aurelian/agents/uniprot/uniprot_agent.py +43 -0
  211. aurelian/agents/uniprot/uniprot_config.py +43 -0
  212. aurelian/agents/uniprot/uniprot_evals.py +99 -0
  213. aurelian/agents/uniprot/uniprot_gradio.py +48 -0
  214. aurelian/agents/uniprot/uniprot_mcp.py +168 -0
  215. aurelian/agents/uniprot/uniprot_tools.py +136 -0
  216. aurelian/agents/web/__init__.py +0 -0
  217. aurelian/agents/web/web_config.py +27 -0
  218. aurelian/agents/web/web_gradio.py +48 -0
  219. aurelian/agents/web/web_mcp.py +50 -0
  220. aurelian/agents/web/web_tools.py +108 -0
  221. aurelian/chat.py +23 -0
  222. aurelian/cli.py +800 -0
  223. aurelian/dependencies/__init__.py +0 -0
  224. aurelian/dependencies/workdir.py +78 -0
  225. aurelian/mcp/__init__.py +0 -0
  226. aurelian/mcp/amigo_mcp_test.py +86 -0
  227. aurelian/mcp/config_generator.py +123 -0
  228. aurelian/mcp/example_config.json +43 -0
  229. aurelian/mcp/generate_sample_config.py +37 -0
  230. aurelian/mcp/gocam_mcp_test.py +126 -0
  231. aurelian/mcp/linkml_mcp_tools.py +190 -0
  232. aurelian/mcp/mcp_discovery.py +87 -0
  233. aurelian/mcp/mcp_test.py +31 -0
  234. aurelian/mcp/phenopackets_mcp_test.py +103 -0
  235. aurelian/tools/__init__.py +0 -0
  236. aurelian/tools/web/__init__.py +0 -0
  237. aurelian/tools/web/url_download.py +51 -0
  238. aurelian/utils/__init__.py +0 -0
  239. aurelian/utils/async_utils.py +15 -0
  240. aurelian/utils/data_utils.py +32 -0
  241. aurelian/utils/documentation_manager.py +59 -0
  242. aurelian/utils/doi_fetcher.py +238 -0
  243. aurelian/utils/ontology_utils.py +68 -0
  244. aurelian/utils/pdf_fetcher.py +23 -0
  245. aurelian/utils/process_logs.py +100 -0
  246. aurelian/utils/pubmed_utils.py +238 -0
  247. aurelian/utils/pytest_report_to_markdown.py +67 -0
  248. aurelian/utils/robot_ontology_utils.py +112 -0
  249. aurelian/utils/search_utils.py +95 -0
  250. aurelian-0.3.2.dist-info/LICENSE +22 -0
  251. aurelian-0.3.2.dist-info/METADATA +105 -0
  252. aurelian-0.3.2.dist-info/RECORD +254 -0
  253. aurelian-0.3.2.dist-info/WHEEL +4 -0
  254. aurelian-0.3.2.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,294 @@
1
+ """
2
+ Tools for the GOCAM agent.
3
+ """
4
+ import os
5
+ import json
6
+ import yaml
7
+ from pathlib import Path
8
+ from typing import List, Dict, Optional, Union, Any
9
+
10
+ from linkml_store.utils.format_utils import load_objects
11
+ from pydantic_ai import RunContext, ModelRetry
12
+ from pydantic import ValidationError
13
+
14
+ from gocam.datamodel.gocam import Model as GocamModel
15
+
16
+ from aurelian.agents.gocam.gocam_config import GOCAMDependencies
17
+ from aurelian.agents.uniprot.uniprot_tools import normalize_uniprot_id
18
+ from aurelian.utils.data_utils import flatten
19
+ from aurelian.agents.literature.literature_tools import search_literature_web, retrieve_literature_page
20
+ from . import DOCUMENTS_DIR
21
+
22
+
23
+ async def search_gocams(ctx: RunContext[GOCAMDependencies], query: str) -> List[Dict]:
24
+ """
25
+ Performs a retrieval search over the GO-CAM database.
26
+
27
+ The query can be any text, such as name of a pathway, genes, or
28
+ a complex sentence.
29
+
30
+ The objects returned are summaries of GO-CAM models; they do not contain full
31
+ details. Use `lookup_gocam` to retrieve full details of a model.
32
+
33
+ This tool uses a retrieval method that is not guaranteed to always return
34
+ complete results, and some results may be less relevant than others.
35
+ You MAY use your judgment in filtering these.
36
+
37
+ Args:
38
+ ctx: The run context
39
+ query: The search query text
40
+
41
+ Returns:
42
+ List[Dict]: List of GOCAM models matching the query
43
+ """
44
+ print(f"SEARCH GOCAMS: {query}")
45
+ try:
46
+ qr = ctx.deps.collection.search(query, index_name="llm", limit=ctx.deps.max_results)
47
+ objs = []
48
+ for score, row in qr.ranked_rows:
49
+ obj = flatten(row)
50
+ obj["relevancy_score"] = score
51
+ objs.append(obj)
52
+ print(f"RESULT: {obj}")
53
+
54
+ if not objs:
55
+ raise ModelRetry(f"No GOCAM models found matching the query: {query}. Try a different search term.")
56
+
57
+ return objs
58
+ except Exception as e:
59
+ if "ModelRetry" in str(type(e)):
60
+ raise e
61
+ raise ModelRetry(f"Error searching GOCAM models: {str(e)}")
62
+
63
+
64
+ async def lookup_gocam_local(ctx: RunContext[GOCAMDependencies], path: str) -> Dict:
65
+ """
66
+ Performs a lookup of a GO-CAM model by its local file path.
67
+
68
+ Args:
69
+ ctx: The run context
70
+ path: The local file path of the GO-CAM model
71
+ """
72
+ print(f"LOOKUP GOCAM LOCAL: {path}")
73
+ try:
74
+ path = Path(path)
75
+ if not path.exists():
76
+ raise ModelRetry(f"File not found: {path}")
77
+ objects = load_objects(path)
78
+ if not objects:
79
+ raise ModelRetry(f"No objects found in file: {path}")
80
+ if not isinstance(objects, list):
81
+ objects = [objects]
82
+ if len(objects) > 1:
83
+ raise ModelRetry(f"Multiple objects found in file: {path}")
84
+ if not isinstance(objects[0], dict):
85
+ raise ModelRetry(f"Object is not a dictionary: {path}")
86
+ return objects[0]
87
+ except Exception as e:
88
+ if "ModelRetry" in str(type(e)):
89
+ raise e
90
+ raise ModelRetry(f"Error looking up GO-CAM model: {str(e)}")
91
+
92
+ async def lookup_gocam(ctx: RunContext[GOCAMDependencies], model_id: str) -> Dict:
93
+ """
94
+ Performs a lookup of a GO-CAM model by its ID, and returns the model.
95
+
96
+ Args:
97
+ ctx: The run context
98
+ model_id: The ID of the GO-CAM model to look up
99
+
100
+ Returns:
101
+ Dict: The GO-CAM model data
102
+ """
103
+ print(f"LOOKUP GOCAM: {model_id}")
104
+ try:
105
+ # Normalize the model ID
106
+ if ":" in model_id:
107
+ parts = model_id.split(":")
108
+ if parts[0] != "gomodel":
109
+ model_id = f"gomodel:{parts[1]}"
110
+ else:
111
+ model_id = f"gomodel:{model_id}"
112
+
113
+ qr = ctx.deps.collection.find({"id": model_id})
114
+ if not qr.rows:
115
+ raise ModelRetry(f"Could not find GO-CAM model with ID {model_id}. The ID may be incorrect.")
116
+ return qr.rows[0]
117
+ except Exception as e:
118
+ if "ModelRetry" in str(type(e)):
119
+ raise e
120
+ raise ModelRetry(f"Error looking up GO-CAM model {model_id}: {str(e)}")
121
+
122
+
123
+ async def lookup_uniprot_entry(ctx: RunContext[GOCAMDependencies], uniprot_acc: str) -> str:
124
+ """
125
+ Lookup the Uniprot entry for a given Uniprot accession number.
126
+
127
+ This can be used to obtain further information about a protein in
128
+ a GO-CAM.
129
+
130
+ Args:
131
+ ctx: The run context
132
+ uniprot_acc: The Uniprot accession
133
+
134
+ Returns:
135
+ str: Detailed functional and other info about the protein
136
+ """
137
+ print(f"LOOKUP UNIPROT: {uniprot_acc}")
138
+ try:
139
+ normalized_acc = normalize_uniprot_id(uniprot_acc)
140
+ uniprot_service = ctx.deps.get_uniprot_service()
141
+ result = uniprot_service.retrieve(normalized_acc, frmt="txt")
142
+
143
+ if not result or "Error" in result or "Entry not found" in result:
144
+ raise ModelRetry(f"Could not find UniProt entry for {uniprot_acc}. The accession may be incorrect.")
145
+
146
+ return result
147
+ except Exception as e:
148
+ if "ModelRetry" in str(type(e)):
149
+ raise e
150
+ raise ModelRetry(f"Error retrieving UniProt entry for {uniprot_acc}: {str(e)}")
151
+
152
+
153
+ # These functions have been removed and replaced with direct use of
154
+ # literature_lookup_pmid, search_literature_web, and retrieve_literature_page
155
+ # from aurelian.agents.literature.literature_tools
156
+
157
+
158
+ def all_documents() -> Dict:
159
+ """
160
+ Get all available GO-CAM documentation.
161
+
162
+ Returns:
163
+ Dictionary of all available GO-CAM documents
164
+ """
165
+ if not DOCUMENTS_DIR.exists():
166
+ return {"documents": []}
167
+
168
+ documents = []
169
+ for file_path in DOCUMENTS_DIR.glob("*.md"):
170
+ doc_id = file_path.stem
171
+ title = doc_id.replace("_", " ")
172
+ documents.append({
173
+ "id": doc_id,
174
+ "title": title,
175
+ "path": str(file_path)
176
+ })
177
+
178
+ return {"documents": documents}
179
+
180
+
181
+ async def fetch_document(
182
+ ctx: RunContext[GOCAMDependencies],
183
+ name: str,
184
+ format: str = "md"
185
+ ) -> str:
186
+ """
187
+ Lookup the GO-CAM document by name.
188
+
189
+ Args:
190
+ ctx: The run context
191
+ name: The document name (e.g. "How_to_annotate_complexes_in_GO-CAM")
192
+ format: The format of the document (defaults to "md")
193
+
194
+ Returns:
195
+ The content of the document
196
+ """
197
+ print(f"FETCH DOCUMENT: {name}")
198
+ try:
199
+ # Get all available documents
200
+ all_docs = all_documents()
201
+
202
+ # Normalize document name and find it
203
+ selected_document = None
204
+ name_normalized = name.replace(" ", "_").lower()
205
+
206
+ for document in all_docs["documents"]:
207
+ if document["id"].lower() == name_normalized:
208
+ selected_document = document
209
+ break
210
+ if document["title"].lower() == name.lower():
211
+ selected_document = document
212
+ break
213
+
214
+ if not selected_document:
215
+ available_docs = ", ".join([d["title"] for d in all_docs["documents"]])
216
+ raise ModelRetry(
217
+ f"Could not find document with name '{name}'. "
218
+ f"Available documents: {available_docs}"
219
+ )
220
+
221
+ # Get the document file
222
+ path = Path(selected_document["path"])
223
+
224
+ if not path.exists():
225
+ raise ModelRetry(f"Document file not found: {path}")
226
+
227
+ # Read the document file
228
+ with open(path) as f:
229
+ content = f.read()
230
+
231
+ if not content or content.strip() == "":
232
+ raise ModelRetry(f"Document file is empty: {path}")
233
+
234
+ return content
235
+ except Exception as e:
236
+ if "ModelRetry" in str(type(e)):
237
+ raise e
238
+ raise ModelRetry(f"Error fetching document: {str(e)}")
239
+
240
+
241
+ async def validate_gocam_model(
242
+ ctx: RunContext[GOCAMDependencies],
243
+ model_data: Union[str, Dict[str, Any]],
244
+ format: str = "json"
245
+ ) -> Dict[str, Any]:
246
+ """
247
+ Validate a GO-CAM model against the pydantic schema.
248
+
249
+ Args:
250
+ ctx: The run context
251
+ model_data: The model data as a JSON/YAML string or dict
252
+ format: The format of the input data (json or yaml)
253
+
254
+ Returns:
255
+ Dict with validation results, including success status and errors if any
256
+ """
257
+ try:
258
+ # Parse the input data if it's a string
259
+ if isinstance(model_data, str):
260
+ if format.lower() == "json":
261
+ parsed_data = json.loads(model_data)
262
+ elif format.lower() == "yaml":
263
+ parsed_data = yaml.safe_load(model_data)
264
+ else:
265
+ raise ModelRetry(f"Unsupported format: {format}. Must be 'json' or 'yaml'")
266
+ else:
267
+ parsed_data = model_data
268
+
269
+ # Validate the model
270
+ try:
271
+ gocam_model = GocamModel(**parsed_data)
272
+ return {
273
+ "valid": True,
274
+ "message": "Model is valid according to GO-CAM schema",
275
+ "model": gocam_model.model_dump(exclude_none=True)
276
+ }
277
+ except ValidationError as e:
278
+ errors = []
279
+ for error in e.errors():
280
+ errors.append({
281
+ "loc": " -> ".join([str(loc) for loc in error["loc"]]),
282
+ "msg": error["msg"],
283
+ "type": error["type"]
284
+ })
285
+
286
+ return {
287
+ "valid": False,
288
+ "message": "Model validation failed",
289
+ "errors": errors
290
+ }
291
+ except Exception as e:
292
+ if "ModelRetry" in str(type(e)):
293
+ raise e
294
+ raise ModelRetry(f"Error validating GO-CAM model: {str(e)}")
File without changes
@@ -0,0 +1,62 @@
1
+ """
2
+ Agent for creating LinkML schemas and example datasets
3
+ """
4
+ from typing import List
5
+
6
+ from aurelian.agents.filesystem.filesystem_tools import download_url_as_markdown, inspect_file
7
+ from aurelian.agents.linkml.linkml_config import LinkMLDependencies
8
+ from aurelian.agents.linkml.linkml_tools import validate_then_save_schema, validate_data
9
+ from aurelian.utils.async_utils import run_sync
10
+ from pydantic_ai import Agent, Tool
11
+
12
+ SYSTEM = """
13
+ You are an expert data modeler able to assist in creating LinkML schemas.
14
+ Always provide the schema in LinkML YAML, unless asked otherwise.
15
+ Before providing the user with a schema, you MUST ALWAYS validate it using the `validate_schema` tool.
16
+ If there are mistakes, iterate on the schema until it validates.
17
+ If it is too hard, ask the user for further guidance.
18
+ If you are asked to make schemas for a file, you can look at files using
19
+ the `inspect_file` tool.
20
+ Always be transparent and show your working and reasoning. If you validate the schema,
21
+ tell the user you did this.
22
+ You should assume the user is technically competent, and can interpret both YAML
23
+ schema files, and example data files in JSON or YAML.
24
+ """
25
+
26
+ linkml_agent = Agent(
27
+ model="openai:gpt-4o",
28
+ deps_type=LinkMLDependencies,
29
+ tools=[
30
+ Tool(inspect_file),
31
+ Tool(download_url_as_markdown),
32
+ Tool(validate_then_save_schema),
33
+ Tool(validate_data),
34
+ ],
35
+ system_prompt=SYSTEM
36
+ )
37
+
38
+
39
+ def chat(workdir: str, **kwargs):
40
+ import gradio as gr
41
+ deps = LinkMLDependencies()
42
+ deps.workdir.location = workdir
43
+
44
+ def get_info(query: str, history: List[str]) -> str:
45
+ print(f"QUERY: {query}")
46
+ print(f"HISTORY: {history}")
47
+ if history:
48
+ query += "## History"
49
+ for h in history:
50
+ query += f"\n{h}"
51
+ result = run_sync(lambda: linkml_agent.run_sync(query, deps=deps, **kwargs))
52
+ return result.data
53
+
54
+ return gr.ChatInterface(
55
+ fn=get_info,
56
+ type="messages",
57
+ title="LinkML AI Assistant",
58
+ examples=[
59
+ ["Generate a schema for modeling the chemical components of foods"],
60
+ ["Generate a schema for this data: {name: 'joe', age: 22}"],
61
+ ]
62
+ )
@@ -0,0 +1,48 @@
1
+ from dataclasses import dataclass, field
2
+ import os
3
+ from typing import List, Optional
4
+
5
+ from pydantic_ai import AgentRunError
6
+
7
+ from aurelian.dependencies.workdir import HasWorkdir, WorkDir
8
+
9
+
10
+ @dataclass
11
+ class LinkMLDependencies(HasWorkdir):
12
+ """Configuration for the LinkML agent."""
13
+ workdir: Optional[WorkDir] = None
14
+
15
+ def __post_init__(self):
16
+ """Initialize the config with default values."""
17
+ # Initialize workdir if not provided
18
+ if self.workdir is None:
19
+ self.workdir = WorkDir()
20
+
21
+ def parse_objects_from_file(self, data_file: str) -> List[dict]:
22
+ """
23
+ Parse objects from a file in the working directory.
24
+
25
+ Args:
26
+ data_file: Name of the data file in the working directory
27
+
28
+ Returns:
29
+ List of parsed objects
30
+ """
31
+ from linkml_store.utils.format_utils import load_objects
32
+ path_to_file = self.workdir.get_file_path(data_file)
33
+ if not path_to_file.exists():
34
+ raise AgentRunError(f"Data file {data_file} does not exist")
35
+ return load_objects(path_to_file)
36
+
37
+
38
+ def get_config() -> LinkMLDependencies:
39
+ """
40
+ Get the LinkML agent configuration.
41
+
42
+ Returns:
43
+ LinkMLDependencies: The LinkML dependencies
44
+ """
45
+ workdir_path = os.environ.get("AURELIAN_WORKDIR", None)
46
+ workdir = WorkDir(location=workdir_path) if workdir_path else None
47
+
48
+ return LinkMLDependencies(workdir=workdir)
@@ -0,0 +1,66 @@
1
+ """
2
+ Evaluation module for the LinkML agent.
3
+
4
+ This module implements evaluations for the LinkML agent using the pydantic-ai-evals framework.
5
+ """
6
+ import asyncio
7
+ import sys
8
+ from typing import Optional, Any, Dict, Callable, Awaitable
9
+
10
+ from aurelian.evaluators.model import MetadataDict, metadata
11
+ from aurelian.evaluators.substring_evaluator import SubstringEvaluator
12
+ from pydantic_evals import Case, Dataset
13
+
14
+ from aurelian.agents.linkml.linkml_agent import linkml_agent
15
+ from aurelian.agents.linkml.linkml_config import LinkMLDependencies
16
+
17
+ class LinkMLMetadata(Dict[str, Any]):
18
+ """Simple metadata dictionary for LinkML evaluations."""
19
+ pass
20
+
21
+ # Define individual evaluation cases
22
+ case1 = Case(
23
+ name="schema_generation_food",
24
+ inputs="Generate a schema for modeling the chemical components of foods",
25
+ expected_output="class", # We expect the output to contain schema classes
26
+ metadata=metadata("medium", "schema_generation")
27
+ )
28
+
29
+ case2 = Case(
30
+ name="schema_from_json",
31
+ inputs="Generate a schema for this data: {name: 'joe', age: 22}",
32
+ expected_output="Person", # Expected to infer a Person class
33
+ metadata=metadata("easy", "schema_inference")
34
+ )
35
+
36
+ case3 = Case(
37
+ name="schema_validation",
38
+ inputs="Is this a valid LinkML schema? types: string: {base: str}",
39
+ expected_output="valid", # Checking agent can validate schema snippets
40
+ metadata=metadata("medium", "schema_validation")
41
+ )
42
+
43
+ case4 = Case(
44
+ name="schema_recommendations",
45
+ inputs="What's the best way to model a many-to-many relationship in LinkML?",
46
+ expected_output="multivalued", # Should mention multivalued attributes
47
+ metadata=metadata("hard", "best_practices")
48
+ )
49
+
50
+ def create_eval_dataset() -> Dataset[str, str, MetadataDict]:
51
+ """
52
+ Create a dataset for evaluating the LinkML agent.
53
+
54
+ Returns:
55
+ Dataset of LinkML evaluation cases with appropriate evaluators
56
+ """
57
+ # Collect all cases
58
+ cases = [case1, case2, case3, case4]
59
+
60
+ # Dataset-level evaluators
61
+ evaluators = [SubstringEvaluator()]
62
+
63
+ return Dataset(
64
+ cases=cases,
65
+ evaluators=evaluators
66
+ )
@@ -0,0 +1,45 @@
1
+ """
2
+ Gradio UI for the LinkML agent.
3
+ """
4
+ from typing import List, Optional
5
+
6
+ import gradio as gr
7
+
8
+ from aurelian.agents.linkml.linkml_agent import linkml_agent
9
+ from aurelian.agents.linkml.linkml_config import LinkMLDependencies
10
+ from aurelian.utils.async_utils import run_sync
11
+
12
+
13
+ def chat(deps: Optional[LinkMLDependencies] = None, **kwargs):
14
+ """
15
+ Initialize a chat interface for the LinkML agent.
16
+
17
+ Args:
18
+ deps: Optional dependencies configuration
19
+ **kwargs: Additional arguments to pass to the agent
20
+
21
+ Returns:
22
+ A Gradio chat interface
23
+ """
24
+ if deps is None:
25
+ deps = LinkMLDependencies()
26
+
27
+ def get_info(query: str, history: List[str]) -> str:
28
+ print(f"QUERY: {query}")
29
+ print(f"HISTORY: {history}")
30
+ if history:
31
+ query += "## History"
32
+ for h in history:
33
+ query += f"\n{h}"
34
+ result = run_sync(lambda: linkml_agent.run_sync(query, deps=deps, **kwargs))
35
+ return result.data
36
+
37
+ return gr.ChatInterface(
38
+ fn=get_info,
39
+ type="messages",
40
+ title="LinkML AI Assistant",
41
+ examples=[
42
+ ["Generate a schema for modeling the chemical components of foods"],
43
+ ["Generate a schema for this data: {name: 'joe', age: 22}"],
44
+ ]
45
+ )
@@ -0,0 +1,186 @@
1
+ """
2
+ MCP tools for creating LinkML schemas and example datasets
3
+ """
4
+ import os
5
+
6
+ from mcp.server.fastmcp import FastMCP
7
+
8
+ import aurelian.agents.filesystem.filesystem_tools as fst
9
+ from aurelian.agents.linkml.linkml_agent import SYSTEM
10
+ from aurelian.agents.linkml.linkml_config import LinkMLDependencies
11
+ from aurelian.agents.linkml.linkml_tools import validate_then_save_schema, ValidationResult
12
+ from aurelian.utils.search_utils import web_search
13
+
14
+ # Initialize FastMCP server
15
+ mcp = FastMCP("linkml", instructions=SYSTEM)
16
+
17
+ from linkml_runtime.loaders import yaml_loader
18
+ from linkml_runtime.linkml_model import SchemaDefinition
19
+ from linkml.validator import validate
20
+ from pydantic_ai import RunContext, ModelRetry
21
+
22
+ from aurelian.dependencies.workdir import WorkDir
23
+
24
+ def deps() -> LinkMLDependencies:
25
+ deps = LinkMLDependencies()
26
+ loc = os.getenv("AURELIAN_WORKDIR", "/tmp/aurelian")
27
+ deps.workdir = WorkDir(loc)
28
+ return deps
29
+
30
+ def ctx() -> RunContext[LinkMLDependencies]:
31
+ rc: RunContext[LinkMLDependencies] = RunContext[LinkMLDependencies](
32
+ deps=deps(),
33
+ model=None, usage=None, prompt=None,
34
+ )
35
+ return rc
36
+
37
+
38
+ @mcp.tool()
39
+ async def validate_schema(schema: str, save_to_file: str="schema.yaml") -> ValidationResult:
40
+ """
41
+ Validate a LinkML schema.
42
+
43
+ Args:
44
+ schema: schema (as yaml) to validate. Do not truncate, always pass the whole schema.
45
+ save_to_file: optional file name to save the schema to. Defaults to schema.yaml
46
+
47
+ Returns:
48
+
49
+ """
50
+ return await validate_then_save_schema(ctx(), schema, save_to_file)
51
+
52
+
53
+ @mcp.tool()
54
+ async def inspect_file(data_file: str) -> str:
55
+ """
56
+ Inspect a file in the working directory.
57
+
58
+ Args:
59
+ ctx:
60
+ data_file: name of file
61
+
62
+ Returns:
63
+
64
+ """
65
+ return await fst.inspect_file(ctx(), data_file)
66
+
67
+
68
+ @mcp.tool()
69
+ async def list_files() -> str:
70
+ """
71
+ List files in the working directory.
72
+
73
+ Args:
74
+ ctx:
75
+
76
+ Returns:
77
+
78
+ """
79
+ return "\n".join(deps().workdir.list_file_names())
80
+
81
+ @mcp.tool()
82
+ async def write_to_file(data: str, file_name: str) -> str:
83
+ """
84
+ Write data to a file in the working directory.
85
+
86
+ Args:
87
+ ctx:
88
+ data:
89
+ file_name:
90
+
91
+ Returns:
92
+
93
+ """
94
+ print(f"Writing data to file: {file_name}")
95
+ deps().workdir.write_file(file_name, data)
96
+ return f"Data written to {file_name}"
97
+
98
+ @mcp.tool()
99
+ async def validate_data(schema: str, data_file: str) -> str:
100
+ """
101
+ Validate data file against a schema.
102
+
103
+ This assumes the data file is present in the working directory.
104
+ You can write data to the working directory using the `write_to_file` tool.
105
+
106
+ Args:
107
+ ctx:
108
+ schema: the schema (as a YAML string)
109
+ data_file: the name of the data file in the working directory
110
+
111
+ Returns:
112
+
113
+ """
114
+ print(f"Validating data file: {data_file} using schema: {schema}")
115
+ try:
116
+ schema = yaml_loader.loads(schema, target_class=SchemaDefinition)
117
+ except Exception as e:
118
+ return f"Schema does not validate: {e}"
119
+ try:
120
+ instances = deps().parse_objects_from_file(data_file)
121
+ for instance in instances:
122
+ print(f"Validating {instance}")
123
+ rpt = validate(instance, schema)
124
+ print(f"Validation report: {rpt}")
125
+ if rpt.results:
126
+ return f"Data does not validate:\n{rpt.results}"
127
+ return f"{len(instances)} instances all validate successfully"
128
+ except Exception as e:
129
+ return f"Data does not validate: {e}"
130
+
131
+
132
+ @mcp.tool()
133
+ async def search_web(query: str) -> str:
134
+ """
135
+ Search the web using a text query.
136
+
137
+ Note, this will not retrieve the full content, for that you
138
+ should use `retrieve_web_page`.
139
+
140
+ Args:
141
+ query: Text query
142
+
143
+ Returns: matching web pages plus summaries
144
+ """
145
+ print(f"Web Search: {query}")
146
+ return web_search(query)
147
+
148
+ @mcp.tool()
149
+ async def retrieve_web_page(url: str) -> str:
150
+ """
151
+ Fetch the contents of a web page.
152
+
153
+ Args:
154
+ url: URL of the web page
155
+
156
+ Returns:
157
+ The contents of the web page.
158
+ """
159
+ print(f"Fetch URL: {url}")
160
+ import aurelian.utils.search_utils as su
161
+ return su.retrieve_web_page(url)
162
+
163
+
164
+ @mcp.tool()
165
+ async def download_web_page(url: str, local_file_name: str) -> str:
166
+ """
167
+ Download contents of a web page.
168
+
169
+ Args:
170
+ ctx:
171
+ url: URL of the web page
172
+ local_file_name: Name of the local file to save the
173
+
174
+ Returns:
175
+ str: message
176
+ """
177
+ print(f"Fetch URL: {url}")
178
+ import aurelian.utils.search_utils as su
179
+ data = su.retrieve_web_page(url)
180
+ deps().workdir.write_file(local_file_name, data)
181
+ return f"Data written to {local_file_name}"
182
+
183
+
184
+ if __name__ == "__main__":
185
+ # Initialize and run the server
186
+ mcp.run(transport='stdio')