aurelian 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (266) hide show
  1. aurelian/__init__.py +9 -0
  2. aurelian/agents/__init__.py +0 -0
  3. aurelian/agents/amigo/__init__.py +3 -0
  4. aurelian/agents/amigo/amigo_agent.py +77 -0
  5. aurelian/agents/amigo/amigo_config.py +85 -0
  6. aurelian/agents/amigo/amigo_evals.py +73 -0
  7. aurelian/agents/amigo/amigo_gradio.py +52 -0
  8. aurelian/agents/amigo/amigo_mcp.py +152 -0
  9. aurelian/agents/amigo/amigo_tools.py +152 -0
  10. aurelian/agents/biblio/__init__.py +42 -0
  11. aurelian/agents/biblio/biblio_agent.py +95 -0
  12. aurelian/agents/biblio/biblio_config.py +40 -0
  13. aurelian/agents/biblio/biblio_gradio.py +67 -0
  14. aurelian/agents/biblio/biblio_mcp.py +115 -0
  15. aurelian/agents/biblio/biblio_tools.py +164 -0
  16. aurelian/agents/biblio_agent.py +46 -0
  17. aurelian/agents/checklist/__init__.py +44 -0
  18. aurelian/agents/checklist/checklist_agent.py +86 -0
  19. aurelian/agents/checklist/checklist_config.py +28 -0
  20. aurelian/agents/checklist/checklist_gradio.py +70 -0
  21. aurelian/agents/checklist/checklist_mcp.py +86 -0
  22. aurelian/agents/checklist/checklist_tools.py +141 -0
  23. aurelian/agents/checklist/content/checklists.yaml +7 -0
  24. aurelian/agents/checklist/content/streams.csv +136 -0
  25. aurelian/agents/checklist_agent.py +40 -0
  26. aurelian/agents/chemistry/__init__.py +3 -0
  27. aurelian/agents/chemistry/chemistry_agent.py +47 -0
  28. aurelian/agents/chemistry/chemistry_config.py +71 -0
  29. aurelian/agents/chemistry/chemistry_evals.py +79 -0
  30. aurelian/agents/chemistry/chemistry_gradio.py +50 -0
  31. aurelian/agents/chemistry/chemistry_mcp.py +120 -0
  32. aurelian/agents/chemistry/chemistry_tools.py +121 -0
  33. aurelian/agents/chemistry/image_agent.py +15 -0
  34. aurelian/agents/d4d/__init__.py +30 -0
  35. aurelian/agents/d4d/d4d_agent.py +73 -0
  36. aurelian/agents/d4d/d4d_config.py +46 -0
  37. aurelian/agents/d4d/d4d_gradio.py +58 -0
  38. aurelian/agents/d4d/d4d_mcp.py +71 -0
  39. aurelian/agents/d4d/d4d_tools.py +157 -0
  40. aurelian/agents/d4d_agent.py +64 -0
  41. aurelian/agents/diagnosis/__init__.py +33 -0
  42. aurelian/agents/diagnosis/diagnosis_agent.py +54 -0
  43. aurelian/agents/diagnosis/diagnosis_config.py +48 -0
  44. aurelian/agents/diagnosis/diagnosis_evals.py +76 -0
  45. aurelian/agents/diagnosis/diagnosis_gradio.py +52 -0
  46. aurelian/agents/diagnosis/diagnosis_mcp.py +141 -0
  47. aurelian/agents/diagnosis/diagnosis_tools.py +204 -0
  48. aurelian/agents/diagnosis_agent.py +28 -0
  49. aurelian/agents/draw/__init__.py +3 -0
  50. aurelian/agents/draw/draw_agent.py +39 -0
  51. aurelian/agents/draw/draw_config.py +26 -0
  52. aurelian/agents/draw/draw_gradio.py +50 -0
  53. aurelian/agents/draw/draw_mcp.py +94 -0
  54. aurelian/agents/draw/draw_tools.py +100 -0
  55. aurelian/agents/draw/judge_agent.py +18 -0
  56. aurelian/agents/filesystem/__init__.py +0 -0
  57. aurelian/agents/filesystem/filesystem_config.py +27 -0
  58. aurelian/agents/filesystem/filesystem_gradio.py +49 -0
  59. aurelian/agents/filesystem/filesystem_mcp.py +89 -0
  60. aurelian/agents/filesystem/filesystem_tools.py +95 -0
  61. aurelian/agents/filesystem/py.typed +0 -0
  62. aurelian/agents/github/__init__.py +0 -0
  63. aurelian/agents/github/github_agent.py +83 -0
  64. aurelian/agents/github/github_cli.py +248 -0
  65. aurelian/agents/github/github_config.py +22 -0
  66. aurelian/agents/github/github_gradio.py +152 -0
  67. aurelian/agents/github/github_mcp.py +252 -0
  68. aurelian/agents/github/github_tools.py +408 -0
  69. aurelian/agents/github/github_tools.py.tmp +413 -0
  70. aurelian/agents/goann/__init__.py +13 -0
  71. aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines.md +1000 -0
  72. aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines.pdf +0 -0
  73. aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines_Paper.md +693 -0
  74. aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines_Paper.pdf +0 -0
  75. aurelian/agents/goann/goann_agent.py +90 -0
  76. aurelian/agents/goann/goann_config.py +90 -0
  77. aurelian/agents/goann/goann_evals.py +104 -0
  78. aurelian/agents/goann/goann_gradio.py +62 -0
  79. aurelian/agents/goann/goann_mcp.py +0 -0
  80. aurelian/agents/goann/goann_tools.py +65 -0
  81. aurelian/agents/gocam/__init__.py +52 -0
  82. aurelian/agents/gocam/documents/DNA-binding transcription factor activity annotation guidelines.docx +0 -0
  83. aurelian/agents/gocam/documents/DNA-binding transcription factor activity annotation guidelines.pdf +0 -0
  84. aurelian/agents/gocam/documents/DNA-binding_transcription_factor_activity_annotation_guidelines.md +100 -0
  85. aurelian/agents/gocam/documents/E3 ubiquitin ligases.docx +0 -0
  86. aurelian/agents/gocam/documents/E3 ubiquitin ligases.pdf +0 -0
  87. aurelian/agents/gocam/documents/E3_ubiquitin_ligases.md +134 -0
  88. aurelian/agents/gocam/documents/GO-CAM annotation guidelines README.docx +0 -0
  89. aurelian/agents/gocam/documents/GO-CAM annotation guidelines README.pdf +0 -0
  90. aurelian/agents/gocam/documents/GO-CAM modelling guidelines TO DO.docx +0 -0
  91. aurelian/agents/gocam/documents/GO-CAM modelling guidelines TO DO.pdf +0 -0
  92. aurelian/agents/gocam/documents/GO-CAM_annotation_guidelines_README.md +1 -0
  93. aurelian/agents/gocam/documents/GO-CAM_modelling_guidelines_TO_DO.md +3 -0
  94. aurelian/agents/gocam/documents/How to annotate complexes in GO-CAM.docx +0 -0
  95. aurelian/agents/gocam/documents/How to annotate complexes in GO-CAM.pdf +0 -0
  96. aurelian/agents/gocam/documents/How to annotate molecular adaptors.docx +0 -0
  97. aurelian/agents/gocam/documents/How to annotate molecular adaptors.pdf +0 -0
  98. aurelian/agents/gocam/documents/How to annotate sequestering proteins.docx +0 -0
  99. aurelian/agents/gocam/documents/How to annotate sequestering proteins.pdf +0 -0
  100. aurelian/agents/gocam/documents/How_to_annotate_complexes_in_GO-CAM.md +29 -0
  101. aurelian/agents/gocam/documents/How_to_annotate_molecular_adaptors.md +31 -0
  102. aurelian/agents/gocam/documents/How_to_annotate_sequestering_proteins.md +42 -0
  103. aurelian/agents/gocam/documents/Molecular adaptor activity.docx +0 -0
  104. aurelian/agents/gocam/documents/Molecular adaptor activity.pdf +0 -0
  105. aurelian/agents/gocam/documents/Molecular carrier activity.docx +0 -0
  106. aurelian/agents/gocam/documents/Molecular carrier activity.pdf +0 -0
  107. aurelian/agents/gocam/documents/Molecular_adaptor_activity.md +51 -0
  108. aurelian/agents/gocam/documents/Molecular_carrier_activity.md +41 -0
  109. aurelian/agents/gocam/documents/Protein sequestering activity.docx +0 -0
  110. aurelian/agents/gocam/documents/Protein sequestering activity.pdf +0 -0
  111. aurelian/agents/gocam/documents/Protein_sequestering_activity.md +50 -0
  112. aurelian/agents/gocam/documents/Signaling receptor activity annotation guidelines.docx +0 -0
  113. aurelian/agents/gocam/documents/Signaling receptor activity annotation guidelines.pdf +0 -0
  114. aurelian/agents/gocam/documents/Signaling_receptor_activity_annotation_guidelines.md +187 -0
  115. aurelian/agents/gocam/documents/Transcription coregulator activity.docx +0 -0
  116. aurelian/agents/gocam/documents/Transcription coregulator activity.pdf +0 -0
  117. aurelian/agents/gocam/documents/Transcription_coregulator_activity.md +36 -0
  118. aurelian/agents/gocam/documents/Transporter activity annotation annotation guidelines.docx +0 -0
  119. aurelian/agents/gocam/documents/Transporter activity annotation annotation guidelines.pdf +0 -0
  120. aurelian/agents/gocam/documents/Transporter_activity_annotation_annotation_guidelines.md +43 -0
  121. Regulatory Processes in GO-CAM.docx +0 -0
  122. Regulatory Processes in GO-CAM.pdf +0 -0
  123. aurelian/agents/gocam/documents/WIP_-_Regulation_and_Regulatory_Processes_in_GO-CAM.md +31 -0
  124. aurelian/agents/gocam/documents/md/DNA-binding_transcription_factor_activity_annotation_guidelines.md +131 -0
  125. aurelian/agents/gocam/documents/md/E3_ubiquitin_ligases.md +166 -0
  126. aurelian/agents/gocam/documents/md/GO-CAM_annotation_guidelines_README.md +1 -0
  127. aurelian/agents/gocam/documents/md/GO-CAM_modelling_guidelines_TO_DO.md +5 -0
  128. aurelian/agents/gocam/documents/md/How_to_annotate_complexes_in_GO-CAM.md +28 -0
  129. aurelian/agents/gocam/documents/md/How_to_annotate_molecular_adaptors.md +19 -0
  130. aurelian/agents/gocam/documents/md/How_to_annotate_sequestering_proteins.md +38 -0
  131. aurelian/agents/gocam/documents/md/Molecular_adaptor_activity.md +52 -0
  132. aurelian/agents/gocam/documents/md/Molecular_carrier_activity.md +59 -0
  133. aurelian/agents/gocam/documents/md/Protein_sequestering_activity.md +52 -0
  134. aurelian/agents/gocam/documents/md/Signaling_receptor_activity_annotation_guidelines.md +271 -0
  135. aurelian/agents/gocam/documents/md/Transcription_coregulator_activity.md +54 -0
  136. aurelian/agents/gocam/documents/md/Transporter_activity_annotation_annotation_guidelines.md +38 -0
  137. aurelian/agents/gocam/documents/md/WIP_-_Regulation_and_Regulatory_Processes_in_GO-CAM.md +39 -0
  138. aurelian/agents/gocam/documents/pandoc_md/Signaling_receptor_activity_annotation_guidelines.md +334 -0
  139. aurelian/agents/gocam/gocam_agent.py +243 -0
  140. aurelian/agents/gocam/gocam_config.py +85 -0
  141. aurelian/agents/gocam/gocam_curator_agent.py +46 -0
  142. aurelian/agents/gocam/gocam_evals.py +64 -0
  143. aurelian/agents/gocam/gocam_gradio.py +89 -0
  144. aurelian/agents/gocam/gocam_mcp.py +224 -0
  145. aurelian/agents/gocam/gocam_tools.py +294 -0
  146. aurelian/agents/linkml/__init__.py +0 -0
  147. aurelian/agents/linkml/linkml_agent.py +62 -0
  148. aurelian/agents/linkml/linkml_config.py +48 -0
  149. aurelian/agents/linkml/linkml_evals.py +66 -0
  150. aurelian/agents/linkml/linkml_gradio.py +45 -0
  151. aurelian/agents/linkml/linkml_mcp.py +181 -0
  152. aurelian/agents/linkml/linkml_tools.py +102 -0
  153. aurelian/agents/literature/__init__.py +3 -0
  154. aurelian/agents/literature/literature_agent.py +75 -0
  155. aurelian/agents/literature/literature_config.py +35 -0
  156. aurelian/agents/literature/literature_gradio.py +52 -0
  157. aurelian/agents/literature/literature_mcp.py +174 -0
  158. aurelian/agents/literature/literature_tools.py +182 -0
  159. aurelian/agents/monarch/__init__.py +0 -0
  160. aurelian/agents/monarch/monarch_agent.py +45 -0
  161. aurelian/agents/monarch/monarch_config.py +45 -0
  162. aurelian/agents/monarch/monarch_gradio.py +51 -0
  163. aurelian/agents/monarch/monarch_mcp.py +65 -0
  164. aurelian/agents/monarch/monarch_tools.py +112 -0
  165. aurelian/agents/oak/__init__.py +0 -0
  166. aurelian/agents/oak/oak_config.py +27 -0
  167. aurelian/agents/oak/oak_gradio.py +57 -0
  168. aurelian/agents/ontology_mapper/__init__.py +31 -0
  169. aurelian/agents/ontology_mapper/ontology_mapper_agent.py +57 -0
  170. aurelian/agents/ontology_mapper/ontology_mapper_config.py +50 -0
  171. aurelian/agents/ontology_mapper/ontology_mapper_evals.py +108 -0
  172. aurelian/agents/ontology_mapper/ontology_mapper_gradio.py +58 -0
  173. aurelian/agents/ontology_mapper/ontology_mapper_mcp.py +81 -0
  174. aurelian/agents/ontology_mapper/ontology_mapper_tools.py +147 -0
  175. aurelian/agents/paperqa/__init__.py +27 -0
  176. aurelian/agents/paperqa/paperqa_agent.py +66 -0
  177. aurelian/agents/paperqa/paperqa_cli.py +305 -0
  178. aurelian/agents/paperqa/paperqa_config.py +142 -0
  179. aurelian/agents/paperqa/paperqa_gradio.py +90 -0
  180. aurelian/agents/paperqa/paperqa_mcp.py +155 -0
  181. aurelian/agents/paperqa/paperqa_tools.py +566 -0
  182. aurelian/agents/phenopackets/__init__.py +3 -0
  183. aurelian/agents/phenopackets/phenopackets_agent.py +58 -0
  184. aurelian/agents/phenopackets/phenopackets_config.py +72 -0
  185. aurelian/agents/phenopackets/phenopackets_evals.py +99 -0
  186. aurelian/agents/phenopackets/phenopackets_gradio.py +55 -0
  187. aurelian/agents/phenopackets/phenopackets_mcp.py +178 -0
  188. aurelian/agents/phenopackets/phenopackets_tools.py +127 -0
  189. aurelian/agents/rag/__init__.py +40 -0
  190. aurelian/agents/rag/rag_agent.py +84 -0
  191. aurelian/agents/rag/rag_config.py +80 -0
  192. aurelian/agents/rag/rag_gradio.py +67 -0
  193. aurelian/agents/rag/rag_mcp.py +107 -0
  194. aurelian/agents/rag/rag_tools.py +189 -0
  195. aurelian/agents/rag_agent.py +54 -0
  196. aurelian/agents/robot/__init__.py +0 -0
  197. aurelian/agents/robot/assets/__init__.py +3 -0
  198. aurelian/agents/robot/assets/template.md +384 -0
  199. aurelian/agents/robot/robot_config.py +25 -0
  200. aurelian/agents/robot/robot_gradio.py +46 -0
  201. aurelian/agents/robot/robot_mcp.py +100 -0
  202. aurelian/agents/robot/robot_ontology_agent.py +139 -0
  203. aurelian/agents/robot/robot_tools.py +50 -0
  204. aurelian/agents/talisman/__init__.py +3 -0
  205. aurelian/agents/talisman/__main__.py +17 -0
  206. aurelian/agents/talisman/cli.py +70 -0
  207. aurelian/agents/talisman/run_talisman.py +18 -0
  208. aurelian/agents/talisman/talisman_agent.py +143 -0
  209. aurelian/agents/talisman/talisman_config.py +66 -0
  210. aurelian/agents/talisman/talisman_gradio.py +50 -0
  211. aurelian/agents/talisman/talisman_mcp.py +75 -0
  212. aurelian/agents/talisman/talisman_tools.py +962 -0
  213. aurelian/agents/ubergraph/__init__.py +40 -0
  214. aurelian/agents/ubergraph/ubergraph_agent.py +72 -0
  215. aurelian/agents/ubergraph/ubergraph_config.py +79 -0
  216. aurelian/agents/ubergraph/ubergraph_gradio.py +48 -0
  217. aurelian/agents/ubergraph/ubergraph_mcp.py +69 -0
  218. aurelian/agents/ubergraph/ubergraph_tools.py +118 -0
  219. aurelian/agents/uniprot/__init__.py +0 -0
  220. aurelian/agents/uniprot/uniprot_agent.py +43 -0
  221. aurelian/agents/uniprot/uniprot_config.py +43 -0
  222. aurelian/agents/uniprot/uniprot_evals.py +99 -0
  223. aurelian/agents/uniprot/uniprot_gradio.py +48 -0
  224. aurelian/agents/uniprot/uniprot_mcp.py +168 -0
  225. aurelian/agents/uniprot/uniprot_tools.py +136 -0
  226. aurelian/agents/web/__init__.py +0 -0
  227. aurelian/agents/web/web_config.py +27 -0
  228. aurelian/agents/web/web_gradio.py +48 -0
  229. aurelian/agents/web/web_mcp.py +50 -0
  230. aurelian/agents/web/web_tools.py +121 -0
  231. aurelian/chat.py +23 -0
  232. aurelian/cli.py +1004 -0
  233. aurelian/dependencies/__init__.py +0 -0
  234. aurelian/dependencies/workdir.py +78 -0
  235. aurelian/evaluators/model.py +9 -0
  236. aurelian/evaluators/substring_evaluator.py +30 -0
  237. aurelian/mcp/__init__.py +0 -0
  238. aurelian/mcp/amigo_mcp_test.py +86 -0
  239. aurelian/mcp/config_generator.py +123 -0
  240. aurelian/mcp/example_config.json +43 -0
  241. aurelian/mcp/generate_sample_config.py +37 -0
  242. aurelian/mcp/gocam_mcp_test.py +126 -0
  243. aurelian/mcp/linkml_mcp_tools.py +190 -0
  244. aurelian/mcp/mcp_discovery.py +87 -0
  245. aurelian/mcp/mcp_test.py +31 -0
  246. aurelian/mcp/phenopackets_mcp_test.py +103 -0
  247. aurelian/tools/__init__.py +0 -0
  248. aurelian/tools/web/__init__.py +0 -0
  249. aurelian/tools/web/url_download.py +51 -0
  250. aurelian/utils/__init__.py +0 -0
  251. aurelian/utils/async_utils.py +18 -0
  252. aurelian/utils/data_utils.py +32 -0
  253. aurelian/utils/documentation_manager.py +59 -0
  254. aurelian/utils/doi_fetcher.py +238 -0
  255. aurelian/utils/ontology_utils.py +68 -0
  256. aurelian/utils/pdf_fetcher.py +23 -0
  257. aurelian/utils/process_logs.py +100 -0
  258. aurelian/utils/pubmed_utils.py +238 -0
  259. aurelian/utils/pytest_report_to_markdown.py +67 -0
  260. aurelian/utils/robot_ontology_utils.py +112 -0
  261. aurelian/utils/search_utils.py +95 -0
  262. aurelian-0.1.0.dist-info/LICENSE +22 -0
  263. aurelian-0.1.0.dist-info/METADATA +109 -0
  264. aurelian-0.1.0.dist-info/RECORD +266 -0
  265. aurelian-0.1.0.dist-info/WHEEL +4 -0
  266. aurelian-0.1.0.dist-info/entry_points.txt +4 -0
@@ -0,0 +1,139 @@
1
+ """
2
+ Agent for creating ROBOT templates and compiling to ontologies.
3
+ """
4
+ from dataclasses import dataclass, field
5
+ from typing import List, Dict
6
+
7
+ from aurelian.agents.filesystem.filesystem_tools import inspect_file, download_url_as_markdown, list_files
8
+ from aurelian.agents.robot.robot_config import RobotDependencies
9
+ from aurelian.agents.robot.robot_tools import write_and_compile_template, fetch_documentation
10
+ from aurelian.utils.async_utils import run_sync
11
+ from aurelian.utils.search_utils import web_search
12
+ from pydantic_ai import Agent, RunContext, Tool
13
+
14
+ from aurelian.dependencies.workdir import WorkDir, HasWorkdir
15
+
16
+ SYSTEM = """
17
+ Background:
18
+
19
+ Your job is to iteratively build an ontology via *robot templates*,
20
+ These are tabular data (CSV syntax) with a special header that compiles to OWL.
21
+
22
+ For example, if the request is for an animals ontology, you could start with a CSV with columns Name, ParentTaxon, Eats,
23
+ with rows filled out with some example animals.
24
+
25
+ The main tool you will use is `write_and_compile_template` which writes the the template content to
26
+ a local file after compiling to OWL. This also takes a list of ontologies to import, which
27
+ should also be on the file system.
28
+
29
+ Sometimes you may need to work with multiple dependent ontologies. For example, if you have a vehicle class
30
+ hierarchy in `vehicles.csv` and parts in `parts.csv`, and vehicles depends on parts, you would first iterate
31
+ on `parts.csv` (e.g. calling `write_and_compile_template`, with no imports), then write vehicles using
32
+ `write_and_compile_template` with `['parts.csv']` as the dependencies/imports.
33
+
34
+ ## Robot template CSV structure:
35
+
36
+ Robot template files have an additional metadata row below the header row. This is called the "template row". It specifies how each column maps to OWL. Typical values will be "ID" for the unique identifier, LABEL for the name, "SC %" for the parent class. Consult the docs for details. Note that this is always beneath the main header row. This can seem a bit duplicative, but that's OK. An example might be:
37
+
38
+ identifier,name,parent,synonyms
39
+ ID,LABEL,SC %,A oboInOwl:hasExactSynonym
40
+ ANIMAL:1,chicken,aves,Gallus gallus|chick
41
+
42
+ The first row is a normal header with human-friendly columns. The 2nd is the robot template row. After that are the usual data rows.
43
+
44
+ Here "A oboInOwl:hasExactSynonym" in the template row for "synonyms" indicate this column should be interpreted as an owl annotation using that property. Generally the value for annotations is literals/text.
45
+
46
+ Another common piece of metadata is definitions. For OBO ontologies, IAO must be used here. For non-OBO ontologies people may want to use skos
47
+
48
+ Some ontologies may need to use other relationships. For part-of parents, use "'part of' some %" (this means that the class indicated by the ID is part-of some X, where X is the value in the part-of column). Use other relationships as appropriate. If you are unclear about the semantics, then consult the docs. You can also work through the docs with the user.
49
+
50
+ Note that any terms referenced as parents or in logical axioms such as part-of should be in the ontology, so make sure they have rows in the CSV. It's OK to use the label. For example:
51
+
52
+ identifier,primary_name,parent,madeOf
53
+ ID,LABEL,SC %,SC 'made of' some %
54
+ VON:1,vehicle,,
55
+ VON:2.car,vehicle,wheel|chassis
56
+ VON:3,wheel,car part,
57
+ VON:4,chassis,car part,
58
+
59
+ If in doubt, use "A <propertyName>" for metadata and "SC '<relationName>' some %" for logical relationships / graph edges.
60
+
61
+ If your working dir doesn't contain any object or annotation properties you can make them in a seperate
62
+ imported ontology, TYPE is useful for determining the OWL type, for example:
63
+
64
+ ```
65
+ ID,Label,Type,Definition
66
+ ID,LABEL,TYPE,A IAO:0000115
67
+ IAO:0000115,definition,owl:AnnotationProperty
68
+ BFO:0000050,part_of,owl:ObjectProperty
69
+ ```
70
+
71
+ If you need any more detailed documentation, you can fetch it with `fetch_documentation`
72
+
73
+ You can look at files with `inspect_file`
74
+
75
+ Use scientific language as far as possible. For IDs, these should be numeric curies unless the user requests otherwise. If the user wants to substitute actual ontology term IDs for these, use lookup_curies_get_lookup_get
76
+ """
77
+
78
+
79
+
80
+
81
+ robot_ontology_agent = Agent(
82
+ model="openai:gpt-4o",
83
+ deps_type=RobotDependencies,
84
+ system_prompt=SYSTEM,
85
+ tools=[
86
+ Tool(write_and_compile_template, max_retries=2),
87
+ Tool(fetch_documentation),
88
+ Tool(inspect_file),
89
+ Tool(list_files),
90
+ Tool(download_url_as_markdown),
91
+ ]
92
+ )
93
+
94
+
95
+ @robot_ontology_agent.system_prompt
96
+ def include_templates_in_prompt(ctx: RunContext[RobotDependencies]) -> str:
97
+ files_names = ctx.deps.workdir.list_file_names()
98
+ s = "Working directory files/templates:"
99
+ if files_names:
100
+ for f in files_names:
101
+ s += f"{f}\n---"
102
+ s += ctx.deps.workdir.read_file(f)
103
+ s += "\n"
104
+ return s
105
+
106
+
107
+ @robot_ontology_agent.system_prompt
108
+ def include_prefixes_in_prompt(ctx: RunContext[RobotDependencies]) -> str:
109
+ pmap = ctx.deps.prefix_map
110
+ return f"Prefixes: {pmap}"
111
+
112
+
113
+
114
+
115
+
116
+
117
+ def chat(workdir: str, **kwargs):
118
+ import gradio as gr
119
+ deps = RobotDependencies()
120
+ deps.workdir.location = workdir
121
+
122
+ def get_info(query: str, history: List[str]) -> str:
123
+ print(f"QUERY: {query}")
124
+ print(f"HISTORY: {history}")
125
+ if history:
126
+ query += "## History"
127
+ for h in history:
128
+ query += f"\n{h}"
129
+ result = run_sync(lambda: robot_ontology_agent.run_sync(query, deps=deps, **kwargs))
130
+ return result.data
131
+
132
+ return gr.ChatInterface(
133
+ fn=get_info,
134
+ type="messages",
135
+ title="robot AI Assistant",
136
+ examples=[
137
+ ["Create an ontology of snacks"],
138
+ ]
139
+ )
@@ -0,0 +1,50 @@
1
+ from typing import Optional, List
2
+
3
+ from pydantic_ai import RunContext, ModelRetry
4
+
5
+ from aurelian.agents.robot.assets import ROBOT_ONTOLOGY_AGENT_CONTENTS_DIR
6
+ from aurelian.agents.robot.robot_config import RobotDependencies
7
+ from aurelian.utils.robot_ontology_utils import run_robot_template_command
8
+
9
+
10
+ async def write_and_compile_template(ctx: RunContext[RobotDependencies], template: str, save_to_file: str= "core.csv", import_ontologies: Optional[List[str]] = None) -> str:
11
+ """
12
+ Adds a template to the file system and compile it to OWL
13
+
14
+ Args:
15
+ ctx: context
16
+ template: robot template as string. Do not truncate, always pass the whole template, including header.
17
+ save_to_file: file name to save the templates to. Defaults to core.csv. Only written if file compiles to OWL
18
+ import_ontologies: list of ontologies to import. These should be files in the working directory.
19
+
20
+ Returns:
21
+
22
+ """
23
+ print(f"Validating template: {template}")
24
+ try:
25
+ ctx.deps.workdir.write_file(save_to_file, template)
26
+ output_path = run_robot_template_command(
27
+ ctx.deps.workdir,
28
+ save_to_file,
29
+ import_ontologies=import_ontologies,
30
+ prefix_map=ctx.deps.prefix_map,
31
+ output_path=None,
32
+ ),
33
+ if save_to_file and template:
34
+ ctx.deps.workdir.write_file(save_to_file, template)
35
+ except Exception as e:
36
+ raise ModelRetry(f"Template does not compile: {e}")
37
+ return f"Template compiled to {output_path}"
38
+
39
+
40
+ async def fetch_documentation(ctx: RunContext[RobotDependencies]) -> str:
41
+ """
42
+ Fetch the documentation for the robot ontology agent.
43
+
44
+ Args:
45
+ ctx: context
46
+
47
+ Returns:
48
+ str: documentation
49
+ """
50
+ return open(ROBOT_ONTOLOGY_AGENT_CONTENTS_DIR / "template.md").read()
@@ -0,0 +1,3 @@
1
+ """
2
+ Talisman agent package for advanced gene analysis using UniProt and NCBI Entrez.
3
+ """
@@ -0,0 +1,17 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Main entry point to run the talisman agent.
4
+ """
5
+ import os
6
+ import sys
7
+ from pydantic_ai import chat
8
+
9
+ # Add the parent directory to the path for absolute imports
10
+ sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../")))
11
+
12
+ from aurelian.agents.talisman.talisman_agent import talisman_agent
13
+ from aurelian.agents.talisman.talisman_config import get_config
14
+
15
+ if __name__ == "__main__":
16
+ config = get_config()
17
+ chat(talisman_agent, deps=config)
@@ -0,0 +1,70 @@
1
+ """
2
+ CLI interface for the talisman agent.
3
+ This may not be in the original code, but let's add it to make sure it's properly configured.
4
+ """
5
+ import logging
6
+ import re
7
+ from pydantic_ai import RunContext
8
+
9
+ from aurelian.agents.talisman.talisman_config import TalismanConfig
10
+ from aurelian.agents.talisman.talisman_tools import GeneSetAnalysis, FunctionalTerm, GeneSummary
11
+
12
+ def format_talisman_output(result):
13
+ """Format the talisman output to ensure it always has all three sections."""
14
+ logging.info("Post-processing talisman output")
15
+
16
+ # Check if output already has proper sections
17
+ has_narrative = re.search(r'^\s*##\s*Narrative', result, re.MULTILINE) is not None
18
+ has_functional_terms = re.search(r'^\s*##\s*Functional Terms Table', result, re.MULTILINE) is not None
19
+ has_gene_summary = re.search(r'^\s*##\s*Gene Summary Table', result, re.MULTILINE) is not None
20
+
21
+ # If all sections are present, return as is
22
+ if has_narrative and has_functional_terms and has_gene_summary:
23
+ return result
24
+
25
+ # Need to reconstruct the output
26
+ # Extract gene summary table if it exists
27
+ gene_table_match = re.search(r'^\s*##\s*Gene Summary Table\s*\n(.*?)(?=$|\n\n|\Z)',
28
+ result, re.MULTILINE | re.DOTALL)
29
+
30
+ if gene_table_match:
31
+ gene_table = gene_table_match.group(0)
32
+
33
+ # Extract existing text that might be a narrative
34
+ narrative_text = result.replace(gene_table, '').strip()
35
+
36
+ # Create a proper narrative section if missing
37
+ if not has_narrative and narrative_text:
38
+ narrative_section = "## Narrative\n" + narrative_text + "\n\n"
39
+ else:
40
+ narrative_section = "## Narrative\nThese genes may have related functions as indicated in the gene summary table.\n\n"
41
+
42
+ # Create a functional terms section if missing
43
+ if not has_functional_terms:
44
+ # Extract gene IDs from the gene table
45
+ gene_ids = []
46
+ for line in gene_table.split('\n'):
47
+ if '|' in line and not line.strip().startswith('|--') and not 'ID |' in line:
48
+ parts = line.split('|')
49
+ if len(parts) > 1:
50
+ gene_id = parts[1].strip()
51
+ if gene_id and gene_id != 'ID':
52
+ gene_ids.append(gene_id)
53
+
54
+ # Create a simple functional terms table
55
+ functional_terms = "## Functional Terms Table\n"
56
+ functional_terms += "| Functional Term | Genes | Source |\n"
57
+ functional_terms += "|-----------------|-------|--------|\n"
58
+ functional_terms += f"| Gene set | {', '.join(gene_ids)} | Analysis |\n\n"
59
+ else:
60
+ # Find and extract existing functional terms section
61
+ ft_match = re.search(r'^\s*##\s*Functional Terms Table\s*\n(.*?)(?=^\s*##\s*|\Z)',
62
+ result, re.MULTILINE | re.DOTALL)
63
+ functional_terms = ft_match.group(0) if ft_match else ""
64
+
65
+ # Reconstruct the output with all sections
66
+ formatted_output = "# Gene Set Analysis\n\n" + narrative_section + functional_terms + gene_table
67
+ return formatted_output
68
+
69
+ # If no gene table was found, return the original result
70
+ return result
@@ -0,0 +1,18 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Standalone script to run the talisman agent directly.
4
+ """
5
+ import os
6
+ import sys
7
+ from pydantic_ai import chat
8
+
9
+ # Add the src directory to the path for imports
10
+ src_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../../"))
11
+ sys.path.insert(0, src_dir)
12
+
13
+ from aurelian.agents.talisman.talisman_agent import talisman_agent
14
+ from aurelian.agents.talisman.talisman_config import get_config
15
+
16
+ if __name__ == "__main__":
17
+ config = get_config()
18
+ chat(talisman_agent, deps=config)
@@ -0,0 +1,143 @@
1
+ """
2
+ Agent for working with gene information using the UniProt API and NCBI Entrez.
3
+ Provides structured information in the form of Narrative, Functional Terms Table, and Gene Summary Table.
4
+ """
5
+ from pydantic_ai import Agent
6
+
7
+ from .talisman_config import TalismanConfig, get_config
8
+ from .talisman_tools import (
9
+ get_gene_description,
10
+ get_gene_descriptions,
11
+ get_genes_from_list,
12
+ analyze_gene_set
13
+ )
14
+
15
+ # System prompt for the Talisman agent
16
+ TALISMAN_SYSTEM_PROMPT = """
17
+ You are a helpful assistant that specializes in providing gene information using both UniProt and NCBI Entrez.
18
+ You can retrieve gene descriptions for single gene IDs or lists of multiple gene IDs, and analyze gene sets to identify functional relationships.
19
+
20
+ You can:
21
+ - Get a description for a single gene using its identifier
22
+ - Get descriptions for multiple genes using a list of identifiers
23
+ - Parse a string containing multiple gene identifiers in various formats
24
+ - Analyze sets of genes to identify biological relationships and shared properties
25
+
26
+ Gene identifiers can be:
27
+ - UniProt accession numbers (e.g., P12345, Q934N0)
28
+ - Gene symbols (e.g., INS, TP53, mmoX)
29
+ - Gene names
30
+ - Ensembl IDs (e.g., ENSG00000139618)
31
+ - NCBI protein IDs (e.g., 8YJT_C2)
32
+
33
+ IMPORTANT: The system handles both gene symbols and UniProt identifiers intelligently:
34
+ 1. When a gene symbol is provided, the system will:
35
+ - First attempt to look up the corresponding UniProt protein accession
36
+ - Search UniProt for detailed information
37
+ - Fall back to NCBI Entrez if the gene is not found in UniProt
38
+
39
+ 2. When a UniProt ID is provided, the system will:
40
+ - Directly retrieve the information from UniProt
41
+ - Fall back to NCBI Entrez if needed
42
+
43
+ 3. When a protein ID is provided, the system will:
44
+ - Search the protein database in NCBI Entrez
45
+ - Return detailed protein information
46
+
47
+ This multi-database approach ensures comprehensive coverage of gene and protein information.
48
+
49
+ When returning gene information, present it in a clear, organized manner with:
50
+ - Gene name and symbol
51
+ - Description of gene function
52
+ - Organism information when available
53
+ - Disease associations if relevant
54
+ - Source database (UniProt, NCBI, or both)
55
+
56
+ For multiple genes, organize the results in a table format for easy readability.
57
+
58
+ For gene set analysis, you will receive a detailed summary of:
59
+ - Shared biological processes these genes may participate in
60
+ - Potential protein-protein interactions or functional relationships
61
+ - Common cellular localization patterns
62
+ - Involvement in similar pathways
63
+ - Coordinated activities or cooperative functions
64
+ - Any disease associations that multiple genes in this set share
65
+
66
+ The analysis will cover multiple types of relationships:
67
+ - Functional relationships
68
+ - Pathway relationships
69
+ - Regulatory relationships
70
+ - Localization patterns
71
+ - Physical interactions
72
+ - Genetic interactions
73
+
74
+ For gene set analysis, your output MUST always include three distinct sections:
75
+
76
+ 1. First, a "## Narrative" section providing a concise explanation of the functional and categorical relationships between the genes. This should:
77
+ - Prioritize explanations involving most or all genes in the set
78
+ - Refer to specific subsets of genes when discussing specialized functions
79
+ - Highlight the most significant shared pathways, processes, or disease associations
80
+ - Be clear, concise, and focused on biological meaning
81
+
82
+ 2. Second, a "## Functional Terms Table" that presents key functional terms in a tabular format with these columns:
83
+ - Functional Term: The biological term or concept (e.g., DNA repair, kinase activity)
84
+ - Genes: The genes associated with this term (comma-separated list)
85
+ - Source: The likely source database or ontology (e.g., GO-BP, KEGG, Reactome, GO-MF, GO-CC, Disease)
86
+
87
+ The functional terms should include various types:
88
+ - Gene Ontology biological process terms (e.g., DNA repair, oxidative phosphorylation)
89
+ - Molecular function terms (e.g., kinase activity, DNA binding)
90
+ - Cellular component/localization terms (e.g., nucleus, plasma membrane)
91
+ - Pathway names (e.g., glycolysis, MAPK signaling)
92
+ - Disease associations (if relevant)
93
+ - Structural and functional domains/motifs (if relevant)
94
+
95
+ Example of Functional Terms Table:
96
+ ## Functional Terms Table
97
+ | Functional Term | Genes | Source |
98
+ |-----------------|-------|--------|
99
+ | DNA damage response | BRCA1, BRCA2, ATM | GO-BP |
100
+ | Homologous recombination | BRCA1, BRCA2 | Reactome |
101
+ | Tumor suppression | BRCA1, BRCA2, ATM | Disease |
102
+ | Nuclear localization | BRCA1, BRCA2, ATM | GO-CC |
103
+ | Kinase activity | ATM | GO-MF |
104
+ | PARP inhibitor sensitivity | BRCA1, BRCA2, PARP1 | Pathway |
105
+
106
+ 3. Third, a "## Gene Summary Table" with a markdown table summarizing the genes analyzed,
107
+ with the following columns in this exact order:
108
+ - ID: The gene identifier (same as Gene Symbol)
109
+ - Annotation: Genomic coordinates or accession with position information
110
+ - Genomic Context: Information about the genomic location (chromosome, plasmid, etc.)
111
+ - Organism: The organism the gene belongs to
112
+ - Description: The protein/gene function description
113
+
114
+ Example of Gene Summary Table:
115
+ ## Gene Summary Table
116
+ | ID | Annotation | Genomic Context | Organism | Description |
117
+ |-------------|-------------|----------|----------------|------------|
118
+ | BRCA1 | NC_000017.11 (43044295..43125483) | Chromosome 17 | Homo sapiens | Breast cancer type 1 susceptibility protein |
119
+ | BRCA2 | NC_000013.11 (32315474..32400266) | Chromosome 13 | Homo sapiens | Breast cancer type 2 susceptibility protein |
120
+ | ATM | NC_000011.10 (108222484..108369102) | Chromosome 11 | Homo sapiens | ATM serine/threonine kinase |
121
+ | PARP1 | NC_000001.11 (226360251..226408516) | Chromosome 1 | Homo sapiens | Poly(ADP-ribose) polymerase 1 |
122
+
123
+ For bacterial genes, the table format would be:
124
+ | ID | Annotation | Genomic Context | Organism | Description |
125
+ |-------------|-------------|----------|----------------|------------|
126
+ | invA | NC_003197.2 (3038407..3040471, complement) | Chromosome | Salmonella enterica | Invasion protein |
127
+ | DVUA0001 | NC_005863.1 (699..872, complement) | Plasmid pDV | Desulfovibrio vulgaris str. Hildenborough | Hypothetical protein |
128
+
129
+ Use this information to help researchers understand the potential functional relationships between genes.
130
+ """
131
+
132
+ # Create the agent with the system prompt
133
+ talisman_agent = Agent(
134
+ model="openai:gpt-4o",
135
+ system_prompt=TALISMAN_SYSTEM_PROMPT,
136
+ deps_type=TalismanConfig,
137
+ )
138
+
139
+ # Register the tools with the agent
140
+ talisman_agent.tool(get_gene_description)
141
+ talisman_agent.tool(get_gene_descriptions)
142
+ talisman_agent.tool(get_genes_from_list)
143
+ #talisman_agent.tool(analyze_gene_set)
@@ -0,0 +1,66 @@
1
+ """
2
+ Configuration for the Talisman agent.
3
+ """
4
+ from dataclasses import dataclass, field
5
+ import os
6
+ from typing import Any, Dict, Optional
7
+
8
+ from bioservices import UniProt
9
+ from bioservices.eutils import EUtils as NCBI
10
+
11
+ from aurelian.dependencies.workdir import HasWorkdir, WorkDir
12
+
13
+
14
+ @dataclass
15
+ class TalismanConfig(HasWorkdir):
16
+ """Configuration for the Talisman agent."""
17
+
18
+ # Options for the bioservices UniProt client
19
+ uniprot_client_options: Dict[str, Any] = field(default_factory=dict)
20
+
21
+ # Options for the bioservices NCBI client
22
+ ncbi_client_options: Dict[str, Any] = field(default_factory=dict)
23
+
24
+ # OpenAI API key for LLM-based analysis
25
+ openai_api_key: Optional[str] = None
26
+
27
+ # Model to use for gene set analysis
28
+ model_name: str = "gpt-4o"
29
+
30
+ def __post_init__(self):
31
+ """Initialize the config with default values."""
32
+ # Initialize with default options if none provided
33
+ if self.uniprot_client_options is None or len(self.uniprot_client_options) == 0:
34
+ self.uniprot_client_options = {"verbose": False}
35
+
36
+ if self.ncbi_client_options is None or len(self.ncbi_client_options) == 0:
37
+ self.ncbi_client_options = {"verbose": False, "email": "MJoachimiak@lbl.gov"}
38
+
39
+ # Initialize the workdir if not already set
40
+ if self.workdir is None:
41
+ self.workdir = WorkDir()
42
+
43
+ # Try to get OpenAI API key from environment if not provided
44
+ if self.openai_api_key is None:
45
+ import os
46
+ self.openai_api_key = os.environ.get("OPENAI_API_KEY")
47
+
48
+ def get_uniprot_client(self) -> UniProt:
49
+ """Get a configured UniProt client."""
50
+ return UniProt(**self.uniprot_client_options)
51
+
52
+ def get_ncbi_client(self) -> NCBI:
53
+ """Get a configured NCBI client."""
54
+ return NCBI(**self.ncbi_client_options)
55
+
56
+
57
+ def get_config() -> TalismanConfig:
58
+ """Get the Talisman configuration from environment variables or defaults."""
59
+ workdir_path = os.environ.get("AURELIAN_WORKDIR", None)
60
+ workdir = WorkDir(location=workdir_path) if workdir_path else None
61
+
62
+ return TalismanConfig(
63
+ workdir=workdir,
64
+ uniprot_client_options={"verbose": False},
65
+ ncbi_client_options={"verbose": False}
66
+ )
@@ -0,0 +1,50 @@
1
+ """
2
+ Gradio interface for the Talisman agent.
3
+ """
4
+ from typing import List, Optional
5
+
6
+ import gradio as gr
7
+
8
+ from aurelian.agents.talisman.talisman_agent import talisman_agent
9
+ from aurelian.agents.talisman.talisman_config import TalismanConfig
10
+ from aurelian.utils.async_utils import run_sync
11
+
12
+
13
+ def chat(deps: Optional[TalismanConfig] = None, **kwargs):
14
+ """
15
+ Initialize a chat interface for the Talisman agent.
16
+
17
+ Args:
18
+ deps: Optional dependencies configuration
19
+ **kwargs: Additional arguments to pass to the agent
20
+
21
+ Returns:
22
+ A Gradio chat interface
23
+ """
24
+ if deps is None:
25
+ deps = TalismanConfig()
26
+
27
+ def get_info(query: str, history: List[str]) -> str:
28
+ print(f"QUERY: {query}")
29
+ print(f"HISTORY: {history}")
30
+ if history:
31
+ query += "## History"
32
+ for h in history:
33
+ query += f"\n{h}"
34
+ result = run_sync(lambda: talisman_agent.run_sync(query, deps=deps, **kwargs))
35
+ return result.data
36
+
37
+ return gr.ChatInterface(
38
+ fn=get_info,
39
+ type="messages",
40
+ title="Talisman Gene Analysis Assistant",
41
+ examples=[
42
+ ["Get description for TP53"],
43
+ ["Get information about the BRCA1 gene"],
44
+ ["Get descriptions for multiple genes: INS, ALB, APOE"],
45
+ ["What is the function of KRAS?"],
46
+ ["Analyze the relationship between TP53 and MDM2"],
47
+ ["Analyze these genes and their functional relationships: BRCA1, BRCA2, ATM, PARP1"],
48
+ ["Get descriptions for ENSG00000139618, ENSG00000141510"]
49
+ ]
50
+ )
@@ -0,0 +1,75 @@
1
+ """
2
+ MCP tools for retrieving gene information using the UniProt API and NCBI Entrez.
3
+ """
4
+ import os
5
+
6
+ from mcp.server.fastmcp import FastMCP
7
+
8
+ from aurelian.agents.talisman.talisman_agent import TALISMAN_SYSTEM_PROMPT
9
+ from aurelian.agents.talisman.talisman_config import TalismanConfig, get_config
10
+ from aurelian.agents.talisman.talisman_tools import (
11
+ get_gene_description,
12
+ get_gene_descriptions,
13
+ get_genes_from_list,
14
+ analyze_gene_set
15
+ )
16
+
17
+ from pydantic_ai import RunContext
18
+
19
+ # Initialize FastMCP server
20
+ mcp = FastMCP("talisman", instructions=TALISMAN_SYSTEM_PROMPT)
21
+
22
+ def deps() -> TalismanConfig:
23
+ """Get the Talisman dependencies."""
24
+ return get_config()
25
+
26
+ def ctx() -> RunContext[TalismanConfig]:
27
+ """Get the run context with dependencies."""
28
+ rc: RunContext[TalismanConfig] = RunContext[TalismanConfig](
29
+ deps=deps(),
30
+ model=None, usage=None, prompt=None,
31
+ )
32
+ return rc
33
+
34
+ @mcp.tool()
35
+ async def get_gene_info(gene_id: str) -> str:
36
+ """
37
+ Get description for a single gene ID.
38
+
39
+ Args:
40
+ gene_id: The gene identifier (UniProt ID, gene symbol, etc.)
41
+
42
+ Returns:
43
+ The gene description in a structured format
44
+ """
45
+ return get_gene_description(ctx(), gene_id)
46
+
47
+ @mcp.tool()
48
+ async def get_multiple_gene_info(gene_ids: str) -> str:
49
+ """
50
+ Get descriptions for multiple gene IDs provided as a string.
51
+
52
+ Args:
53
+ gene_ids: String containing gene identifiers separated by commas, spaces, or newlines
54
+
55
+ Returns:
56
+ The gene descriptions in a structured format
57
+ """
58
+ return get_genes_from_list(ctx(), gene_ids)
59
+
60
+ @mcp.tool()
61
+ async def analyze_genes(gene_list: str) -> str:
62
+ """
63
+ Analyze a set of genes and generate a biological summary of their properties and relationships.
64
+
65
+ Args:
66
+ gene_list: String containing gene identifiers separated by commas, spaces, or newlines
67
+
68
+ Returns:
69
+ A structured biological summary of the gene set with Narrative, Functional Terms Table, and Gene Summary Table
70
+ """
71
+ return analyze_gene_set(ctx(), gene_list)
72
+
73
+ if __name__ == "__main__":
74
+ # Initialize and run the server
75
+ mcp.run(transport='stdio')