aurelian 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (254) hide show
  1. aurelian/__init__.py +9 -0
  2. aurelian/agents/__init__.py +0 -0
  3. aurelian/agents/amigo/__init__.py +3 -0
  4. aurelian/agents/amigo/amigo_agent.py +77 -0
  5. aurelian/agents/amigo/amigo_config.py +85 -0
  6. aurelian/agents/amigo/amigo_evals.py +73 -0
  7. aurelian/agents/amigo/amigo_gradio.py +52 -0
  8. aurelian/agents/amigo/amigo_mcp.py +152 -0
  9. aurelian/agents/amigo/amigo_tools.py +152 -0
  10. aurelian/agents/biblio/__init__.py +42 -0
  11. aurelian/agents/biblio/biblio_agent.py +94 -0
  12. aurelian/agents/biblio/biblio_config.py +40 -0
  13. aurelian/agents/biblio/biblio_gradio.py +67 -0
  14. aurelian/agents/biblio/biblio_mcp.py +115 -0
  15. aurelian/agents/biblio/biblio_tools.py +164 -0
  16. aurelian/agents/biblio_agent.py +46 -0
  17. aurelian/agents/checklist/__init__.py +44 -0
  18. aurelian/agents/checklist/checklist_agent.py +85 -0
  19. aurelian/agents/checklist/checklist_config.py +28 -0
  20. aurelian/agents/checklist/checklist_gradio.py +70 -0
  21. aurelian/agents/checklist/checklist_mcp.py +86 -0
  22. aurelian/agents/checklist/checklist_tools.py +141 -0
  23. aurelian/agents/checklist/content/checklists.yaml +7 -0
  24. aurelian/agents/checklist/content/streams.csv +136 -0
  25. aurelian/agents/checklist_agent.py +40 -0
  26. aurelian/agents/chemistry/__init__.py +3 -0
  27. aurelian/agents/chemistry/chemistry_agent.py +46 -0
  28. aurelian/agents/chemistry/chemistry_config.py +71 -0
  29. aurelian/agents/chemistry/chemistry_evals.py +79 -0
  30. aurelian/agents/chemistry/chemistry_gradio.py +50 -0
  31. aurelian/agents/chemistry/chemistry_mcp.py +120 -0
  32. aurelian/agents/chemistry/chemistry_tools.py +121 -0
  33. aurelian/agents/chemistry/image_agent.py +15 -0
  34. aurelian/agents/d4d/__init__.py +30 -0
  35. aurelian/agents/d4d/d4d_agent.py +72 -0
  36. aurelian/agents/d4d/d4d_config.py +46 -0
  37. aurelian/agents/d4d/d4d_gradio.py +58 -0
  38. aurelian/agents/d4d/d4d_mcp.py +71 -0
  39. aurelian/agents/d4d/d4d_tools.py +157 -0
  40. aurelian/agents/d4d_agent.py +64 -0
  41. aurelian/agents/diagnosis/__init__.py +33 -0
  42. aurelian/agents/diagnosis/diagnosis_agent.py +53 -0
  43. aurelian/agents/diagnosis/diagnosis_config.py +48 -0
  44. aurelian/agents/diagnosis/diagnosis_evals.py +76 -0
  45. aurelian/agents/diagnosis/diagnosis_gradio.py +52 -0
  46. aurelian/agents/diagnosis/diagnosis_mcp.py +141 -0
  47. aurelian/agents/diagnosis/diagnosis_tools.py +204 -0
  48. aurelian/agents/diagnosis_agent.py +28 -0
  49. aurelian/agents/draw/__init__.py +3 -0
  50. aurelian/agents/draw/draw_agent.py +39 -0
  51. aurelian/agents/draw/draw_config.py +26 -0
  52. aurelian/agents/draw/draw_gradio.py +50 -0
  53. aurelian/agents/draw/draw_mcp.py +94 -0
  54. aurelian/agents/draw/draw_tools.py +100 -0
  55. aurelian/agents/draw/judge_agent.py +18 -0
  56. aurelian/agents/filesystem/__init__.py +0 -0
  57. aurelian/agents/filesystem/filesystem_config.py +27 -0
  58. aurelian/agents/filesystem/filesystem_gradio.py +49 -0
  59. aurelian/agents/filesystem/filesystem_mcp.py +89 -0
  60. aurelian/agents/filesystem/filesystem_tools.py +95 -0
  61. aurelian/agents/filesystem/py.typed +0 -0
  62. aurelian/agents/github/__init__.py +0 -0
  63. aurelian/agents/github/github_agent.py +83 -0
  64. aurelian/agents/github/github_cli.py +248 -0
  65. aurelian/agents/github/github_config.py +22 -0
  66. aurelian/agents/github/github_gradio.py +152 -0
  67. aurelian/agents/github/github_mcp.py +252 -0
  68. aurelian/agents/github/github_tools.py +408 -0
  69. aurelian/agents/github/github_tools.py.tmp +413 -0
  70. aurelian/agents/goann/__init__.py +13 -0
  71. aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines.md +1000 -0
  72. aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines.pdf +0 -0
  73. aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines_Paper.md +693 -0
  74. aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines_Paper.pdf +0 -0
  75. aurelian/agents/goann/goann_agent.py +90 -0
  76. aurelian/agents/goann/goann_config.py +90 -0
  77. aurelian/agents/goann/goann_evals.py +104 -0
  78. aurelian/agents/goann/goann_gradio.py +62 -0
  79. aurelian/agents/goann/goann_mcp.py +0 -0
  80. aurelian/agents/goann/goann_tools.py +65 -0
  81. aurelian/agents/gocam/__init__.py +43 -0
  82. aurelian/agents/gocam/documents/DNA-binding transcription factor activity annotation guidelines.docx +0 -0
  83. aurelian/agents/gocam/documents/DNA-binding transcription factor activity annotation guidelines.pdf +0 -0
  84. aurelian/agents/gocam/documents/DNA-binding_transcription_factor_activity_annotation_guidelines.md +100 -0
  85. aurelian/agents/gocam/documents/E3 ubiquitin ligases.docx +0 -0
  86. aurelian/agents/gocam/documents/E3 ubiquitin ligases.pdf +0 -0
  87. aurelian/agents/gocam/documents/E3_ubiquitin_ligases.md +134 -0
  88. aurelian/agents/gocam/documents/GO-CAM annotation guidelines README.docx +0 -0
  89. aurelian/agents/gocam/documents/GO-CAM annotation guidelines README.pdf +0 -0
  90. aurelian/agents/gocam/documents/GO-CAM modelling guidelines TO DO.docx +0 -0
  91. aurelian/agents/gocam/documents/GO-CAM modelling guidelines TO DO.pdf +0 -0
  92. aurelian/agents/gocam/documents/GO-CAM_annotation_guidelines_README.md +1 -0
  93. aurelian/agents/gocam/documents/GO-CAM_modelling_guidelines_TO_DO.md +3 -0
  94. aurelian/agents/gocam/documents/How to annotate complexes in GO-CAM.docx +0 -0
  95. aurelian/agents/gocam/documents/How to annotate complexes in GO-CAM.pdf +0 -0
  96. aurelian/agents/gocam/documents/How to annotate molecular adaptors.docx +0 -0
  97. aurelian/agents/gocam/documents/How to annotate molecular adaptors.pdf +0 -0
  98. aurelian/agents/gocam/documents/How to annotate sequestering proteins.docx +0 -0
  99. aurelian/agents/gocam/documents/How to annotate sequestering proteins.pdf +0 -0
  100. aurelian/agents/gocam/documents/How_to_annotate_complexes_in_GO-CAM.md +29 -0
  101. aurelian/agents/gocam/documents/How_to_annotate_molecular_adaptors.md +31 -0
  102. aurelian/agents/gocam/documents/How_to_annotate_sequestering_proteins.md +42 -0
  103. aurelian/agents/gocam/documents/Molecular adaptor activity.docx +0 -0
  104. aurelian/agents/gocam/documents/Molecular adaptor activity.pdf +0 -0
  105. aurelian/agents/gocam/documents/Molecular carrier activity.docx +0 -0
  106. aurelian/agents/gocam/documents/Molecular carrier activity.pdf +0 -0
  107. aurelian/agents/gocam/documents/Molecular_adaptor_activity.md +51 -0
  108. aurelian/agents/gocam/documents/Molecular_carrier_activity.md +41 -0
  109. aurelian/agents/gocam/documents/Protein sequestering activity.docx +0 -0
  110. aurelian/agents/gocam/documents/Protein sequestering activity.pdf +0 -0
  111. aurelian/agents/gocam/documents/Protein_sequestering_activity.md +50 -0
  112. aurelian/agents/gocam/documents/Signaling receptor activity annotation guidelines.docx +0 -0
  113. aurelian/agents/gocam/documents/Signaling receptor activity annotation guidelines.pdf +0 -0
  114. aurelian/agents/gocam/documents/Signaling_receptor_activity_annotation_guidelines.md +187 -0
  115. aurelian/agents/gocam/documents/Transcription coregulator activity.docx +0 -0
  116. aurelian/agents/gocam/documents/Transcription coregulator activity.pdf +0 -0
  117. aurelian/agents/gocam/documents/Transcription_coregulator_activity.md +36 -0
  118. aurelian/agents/gocam/documents/Transporter activity annotation annotation guidelines.docx +0 -0
  119. aurelian/agents/gocam/documents/Transporter activity annotation annotation guidelines.pdf +0 -0
  120. aurelian/agents/gocam/documents/Transporter_activity_annotation_annotation_guidelines.md +43 -0
  121. Regulatory Processes in GO-CAM.docx +0 -0
  122. Regulatory Processes in GO-CAM.pdf +0 -0
  123. aurelian/agents/gocam/documents/WIP_-_Regulation_and_Regulatory_Processes_in_GO-CAM.md +31 -0
  124. aurelian/agents/gocam/documents/md/DNA-binding_transcription_factor_activity_annotation_guidelines.md +131 -0
  125. aurelian/agents/gocam/documents/md/E3_ubiquitin_ligases.md +166 -0
  126. aurelian/agents/gocam/documents/md/GO-CAM_annotation_guidelines_README.md +1 -0
  127. aurelian/agents/gocam/documents/md/GO-CAM_modelling_guidelines_TO_DO.md +5 -0
  128. aurelian/agents/gocam/documents/md/How_to_annotate_complexes_in_GO-CAM.md +28 -0
  129. aurelian/agents/gocam/documents/md/How_to_annotate_molecular_adaptors.md +19 -0
  130. aurelian/agents/gocam/documents/md/How_to_annotate_sequestering_proteins.md +38 -0
  131. aurelian/agents/gocam/documents/md/Molecular_adaptor_activity.md +52 -0
  132. aurelian/agents/gocam/documents/md/Molecular_carrier_activity.md +59 -0
  133. aurelian/agents/gocam/documents/md/Protein_sequestering_activity.md +52 -0
  134. aurelian/agents/gocam/documents/md/Signaling_receptor_activity_annotation_guidelines.md +271 -0
  135. aurelian/agents/gocam/documents/md/Transcription_coregulator_activity.md +54 -0
  136. aurelian/agents/gocam/documents/md/Transporter_activity_annotation_annotation_guidelines.md +38 -0
  137. aurelian/agents/gocam/documents/md/WIP_-_Regulation_and_Regulatory_Processes_in_GO-CAM.md +39 -0
  138. aurelian/agents/gocam/documents/pandoc_md/Signaling_receptor_activity_annotation_guidelines.md +334 -0
  139. aurelian/agents/gocam/gocam_agent.py +240 -0
  140. aurelian/agents/gocam/gocam_config.py +85 -0
  141. aurelian/agents/gocam/gocam_curator_agent.py +46 -0
  142. aurelian/agents/gocam/gocam_evals.py +67 -0
  143. aurelian/agents/gocam/gocam_gradio.py +89 -0
  144. aurelian/agents/gocam/gocam_mcp.py +224 -0
  145. aurelian/agents/gocam/gocam_tools.py +294 -0
  146. aurelian/agents/linkml/__init__.py +0 -0
  147. aurelian/agents/linkml/linkml_agent.py +62 -0
  148. aurelian/agents/linkml/linkml_config.py +48 -0
  149. aurelian/agents/linkml/linkml_evals.py +66 -0
  150. aurelian/agents/linkml/linkml_gradio.py +45 -0
  151. aurelian/agents/linkml/linkml_mcp.py +186 -0
  152. aurelian/agents/linkml/linkml_tools.py +102 -0
  153. aurelian/agents/literature/__init__.py +3 -0
  154. aurelian/agents/literature/literature_agent.py +55 -0
  155. aurelian/agents/literature/literature_config.py +35 -0
  156. aurelian/agents/literature/literature_gradio.py +52 -0
  157. aurelian/agents/literature/literature_mcp.py +174 -0
  158. aurelian/agents/literature/literature_tools.py +182 -0
  159. aurelian/agents/monarch/__init__.py +25 -0
  160. aurelian/agents/monarch/monarch_agent.py +44 -0
  161. aurelian/agents/monarch/monarch_config.py +45 -0
  162. aurelian/agents/monarch/monarch_gradio.py +51 -0
  163. aurelian/agents/monarch/monarch_mcp.py +65 -0
  164. aurelian/agents/monarch/monarch_tools.py +113 -0
  165. aurelian/agents/oak/__init__.py +0 -0
  166. aurelian/agents/oak/oak_config.py +27 -0
  167. aurelian/agents/oak/oak_gradio.py +57 -0
  168. aurelian/agents/ontology_mapper/__init__.py +31 -0
  169. aurelian/agents/ontology_mapper/ontology_mapper_agent.py +56 -0
  170. aurelian/agents/ontology_mapper/ontology_mapper_config.py +50 -0
  171. aurelian/agents/ontology_mapper/ontology_mapper_evals.py +108 -0
  172. aurelian/agents/ontology_mapper/ontology_mapper_gradio.py +58 -0
  173. aurelian/agents/ontology_mapper/ontology_mapper_mcp.py +81 -0
  174. aurelian/agents/ontology_mapper/ontology_mapper_tools.py +147 -0
  175. aurelian/agents/phenopackets/__init__.py +3 -0
  176. aurelian/agents/phenopackets/phenopackets_agent.py +58 -0
  177. aurelian/agents/phenopackets/phenopackets_config.py +72 -0
  178. aurelian/agents/phenopackets/phenopackets_evals.py +99 -0
  179. aurelian/agents/phenopackets/phenopackets_gradio.py +55 -0
  180. aurelian/agents/phenopackets/phenopackets_mcp.py +178 -0
  181. aurelian/agents/phenopackets/phenopackets_tools.py +127 -0
  182. aurelian/agents/rag/__init__.py +40 -0
  183. aurelian/agents/rag/rag_agent.py +83 -0
  184. aurelian/agents/rag/rag_config.py +80 -0
  185. aurelian/agents/rag/rag_gradio.py +67 -0
  186. aurelian/agents/rag/rag_mcp.py +107 -0
  187. aurelian/agents/rag/rag_tools.py +189 -0
  188. aurelian/agents/rag_agent.py +54 -0
  189. aurelian/agents/robot/__init__.py +0 -0
  190. aurelian/agents/robot/assets/__init__.py +3 -0
  191. aurelian/agents/robot/assets/template.md +384 -0
  192. aurelian/agents/robot/robot_config.py +25 -0
  193. aurelian/agents/robot/robot_gradio.py +46 -0
  194. aurelian/agents/robot/robot_mcp.py +100 -0
  195. aurelian/agents/robot/robot_ontology_agent.py +139 -0
  196. aurelian/agents/robot/robot_tools.py +50 -0
  197. aurelian/agents/talisman/__init__.py +3 -0
  198. aurelian/agents/talisman/talisman_agent.py +126 -0
  199. aurelian/agents/talisman/talisman_config.py +66 -0
  200. aurelian/agents/talisman/talisman_gradio.py +50 -0
  201. aurelian/agents/talisman/talisman_mcp.py +168 -0
  202. aurelian/agents/talisman/talisman_tools.py +720 -0
  203. aurelian/agents/ubergraph/__init__.py +40 -0
  204. aurelian/agents/ubergraph/ubergraph_agent.py +71 -0
  205. aurelian/agents/ubergraph/ubergraph_config.py +79 -0
  206. aurelian/agents/ubergraph/ubergraph_gradio.py +48 -0
  207. aurelian/agents/ubergraph/ubergraph_mcp.py +69 -0
  208. aurelian/agents/ubergraph/ubergraph_tools.py +118 -0
  209. aurelian/agents/uniprot/__init__.py +37 -0
  210. aurelian/agents/uniprot/uniprot_agent.py +43 -0
  211. aurelian/agents/uniprot/uniprot_config.py +43 -0
  212. aurelian/agents/uniprot/uniprot_evals.py +99 -0
  213. aurelian/agents/uniprot/uniprot_gradio.py +48 -0
  214. aurelian/agents/uniprot/uniprot_mcp.py +168 -0
  215. aurelian/agents/uniprot/uniprot_tools.py +136 -0
  216. aurelian/agents/web/__init__.py +0 -0
  217. aurelian/agents/web/web_config.py +27 -0
  218. aurelian/agents/web/web_gradio.py +48 -0
  219. aurelian/agents/web/web_mcp.py +50 -0
  220. aurelian/agents/web/web_tools.py +108 -0
  221. aurelian/chat.py +23 -0
  222. aurelian/cli.py +800 -0
  223. aurelian/dependencies/__init__.py +0 -0
  224. aurelian/dependencies/workdir.py +78 -0
  225. aurelian/mcp/__init__.py +0 -0
  226. aurelian/mcp/amigo_mcp_test.py +86 -0
  227. aurelian/mcp/config_generator.py +123 -0
  228. aurelian/mcp/example_config.json +43 -0
  229. aurelian/mcp/generate_sample_config.py +37 -0
  230. aurelian/mcp/gocam_mcp_test.py +126 -0
  231. aurelian/mcp/linkml_mcp_tools.py +190 -0
  232. aurelian/mcp/mcp_discovery.py +87 -0
  233. aurelian/mcp/mcp_test.py +31 -0
  234. aurelian/mcp/phenopackets_mcp_test.py +103 -0
  235. aurelian/tools/__init__.py +0 -0
  236. aurelian/tools/web/__init__.py +0 -0
  237. aurelian/tools/web/url_download.py +51 -0
  238. aurelian/utils/__init__.py +0 -0
  239. aurelian/utils/async_utils.py +15 -0
  240. aurelian/utils/data_utils.py +32 -0
  241. aurelian/utils/documentation_manager.py +59 -0
  242. aurelian/utils/doi_fetcher.py +238 -0
  243. aurelian/utils/ontology_utils.py +68 -0
  244. aurelian/utils/pdf_fetcher.py +23 -0
  245. aurelian/utils/process_logs.py +100 -0
  246. aurelian/utils/pubmed_utils.py +238 -0
  247. aurelian/utils/pytest_report_to_markdown.py +67 -0
  248. aurelian/utils/robot_ontology_utils.py +112 -0
  249. aurelian/utils/search_utils.py +95 -0
  250. aurelian-0.3.2.dist-info/LICENSE +22 -0
  251. aurelian-0.3.2.dist-info/METADATA +105 -0
  252. aurelian-0.3.2.dist-info/RECORD +254 -0
  253. aurelian-0.3.2.dist-info/WHEEL +4 -0
  254. aurelian-0.3.2.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,90 @@
1
+ """
2
+ Agent for reviewing GO standard annotations.
3
+ """
4
+ from pydantic_ai import Agent, Tool, RunContext
5
+
6
+ from aurelian.agents.goann.goann_config import GOAnnotationDependencies
7
+ from aurelian.utils.documentation_manager import DocumentationManager
8
+ from aurelian.agents.literature.literature_tools import (
9
+ lookup_pmid as literature_lookup_pmid,
10
+ search_literature_web,
11
+ retrieve_literature_page
12
+ )
13
+ from . import DOCUMENTS_DIR
14
+ from .goann_tools import find_gene_annotations, fetch_document
15
+ from ..uniprot import lookup_uniprot_entry
16
+
17
+ SYSTEM = """
18
+ You are a GO annotation reviewer specializing in reviewing GO standard annotations.
19
+
20
+ Your primary responsibilities include:
21
+ 1. Evaluating GO annotations for accuracy based on evidence codes and supporting literature
22
+ 2. Identifying potential over-annotations based on phenotypes rather than direct involvement
23
+ 3. Reviewing annotations according to GO annotation guidelines
24
+ 4. Suggesting corrections or improvements to annotations
25
+
26
+ You use the GO guidelines for proper annotation, you can use the `fetch_document` tool to retrieve the content of any of these documents.
27
+
28
+ A gene should only be annotated to a process if its activity is an identifiable step in that process.
29
+
30
+ When asked to evaluate or review existing annotations, be sure to:
31
+
32
+ - check the papers used in annotation using `literature_lookup_pmid` to ensure the interpretation of the evidence is correct
33
+ - look at textual information on the uniprot ID usingg `lookup_uniprot_entry` to ensure the annotation is consistent
34
+
35
+
36
+ Your goal is to help maintain the quality and accuracy of the GO annotation database.
37
+ """
38
+
39
+ # Define core tools for the agent
40
+ core_tools = [
41
+ Tool(find_gene_annotations),
42
+ Tool(lookup_uniprot_entry),
43
+ Tool(literature_lookup_pmid,
44
+ description="""Lookup the text of a PubMed article by its PMID.
45
+
46
+ Note that assertions in GO-CAMs may reference PMIDs, so this tool
47
+ is useful for validating assertions. A common task is to align
48
+ the text of a PMID with the text of an assertion, or extracting text
49
+ snippets from the publication that support the assertion."""),
50
+ Tool(search_literature_web),
51
+ Tool(retrieve_literature_page),
52
+ Tool(fetch_document),
53
+ ]
54
+
55
+ # Create the GO annotation review agent
56
+ goann_agent = Agent(
57
+ model="anthropic:claude-3-7-sonnet-latest",
58
+ deps_type=GOAnnotationDependencies,
59
+ system_prompt=SYSTEM,
60
+ tools=core_tools,
61
+ )
62
+
63
+
64
+ def get_documents_for_prompt() -> str:
65
+ """
66
+ Get the documents for the system prompt.
67
+
68
+ Returns:
69
+ A string containing the list of available GO annotation best practice documents
70
+ """
71
+ dm = DocumentationManager(documents_dir=DOCUMENTS_DIR)
72
+ return dm.get_documents_for_prompt(extra_text=(
73
+ "\n\nYou can use the `fetch_document` tool to retrieve the content of any of these documents."
74
+ "\nWhen asked any question about GO annotation curation practice, be sure to ALWAYS"
75
+ " check the relevant document for the most up-to-date information.\n"
76
+ ))
77
+
78
+
79
+ @goann_agent.system_prompt
80
+ def add_documents_to_prompt(ctx: RunContext[GOAnnotationDependencies]) -> str:
81
+ """
82
+ Add available GO-CAM documents to the system prompt.
83
+
84
+ Args:
85
+ ctx: The run context
86
+
87
+ Returns:
88
+ A string containing the list of available GO-CAM documents
89
+ """
90
+ return get_documents_for_prompt()
@@ -0,0 +1,90 @@
1
+ """
2
+ Configuration classes for the GO Annotation Review agent.
3
+ """
4
+ from dataclasses import dataclass, field
5
+ from typing import Dict, Any
6
+
7
+ from bioservices import UniProt
8
+ from oaklib import get_adapter
9
+ from oaklib.implementations import AmiGOImplementation
10
+
11
+ from aurelian.dependencies.workdir import HasWorkdir
12
+ from aurelian.agents.uniprot.uniprot_tools import normalize_uniprot_id
13
+
14
+ # Initialize UniProt service
15
+ uniprot_service = UniProt(verbose=False)
16
+
17
+
18
+ @dataclass
19
+ class GOAnnotationDependencies(HasWorkdir):
20
+ """
21
+ Configuration for the GO Annotation Review agent.
22
+
23
+ Args:
24
+ taxon: NCBI Taxonomy ID, defaults to human (9606)
25
+ """
26
+ taxon: str = field(default="9606")
27
+
28
+ # Options for the bioservices UniProt client
29
+ uniprot_client_options: Dict[str, Any] = field(default_factory=dict)
30
+
31
+ def __post_init__(self):
32
+ """Initialize the config with default values."""
33
+ # HasWorkdir doesn't have a __post_init__ method, so we don't call super()
34
+ if self.uniprot_client_options is None or len(self.uniprot_client_options) == 0:
35
+ self.uniprot_client_options = {"verbose": False}
36
+
37
+ def get_uniprot_client(self) -> UniProt:
38
+ """Get a configured UniProt client."""
39
+ return UniProt(**self.uniprot_client_options)
40
+
41
+ def get_amigo_adapter(self) -> AmiGOImplementation:
42
+ """
43
+ Get the AmiGO adapter for the specified taxon.
44
+
45
+ Returns:
46
+ AmiGOImplementation: The OAK AmiGO adapter
47
+ """
48
+ return get_adapter(f"amigo:NCBITaxon:{self.taxon}")
49
+
50
+ def get_gene_id(self, gene_term: str) -> str:
51
+ """
52
+ Normalize a gene identifier.
53
+
54
+ Args:
55
+ gene_term: The gene identifier
56
+
57
+ Returns:
58
+ str: The normalized gene identifier
59
+ """
60
+ return gene_term
61
+
62
+
63
+ def normalize_pmid(pmid: str) -> str:
64
+ """
65
+ Normalize a PubMed ID to the format PMID:nnnnnnn.
66
+
67
+ Args:
68
+ pmid: The PubMed ID
69
+
70
+ Returns:
71
+ str: The normalized PubMed ID
72
+ """
73
+ if ":" in pmid:
74
+ pmid = pmid.split(":", 1)[1]
75
+ if not pmid.startswith("PMID:"):
76
+ pmid = f"PMID:{pmid}"
77
+ return pmid
78
+
79
+
80
+ def get_config(taxon: str = "9606") -> GOAnnotationDependencies:
81
+ """
82
+ Get the GO Annotation Review configuration.
83
+
84
+ Args:
85
+ taxon: NCBI Taxonomy ID, defaults to human (9606)
86
+
87
+ Returns:
88
+ GOAnnotationDependencies: The GO Annotation dependencies
89
+ """
90
+ return GOAnnotationDependencies(taxon=taxon)
@@ -0,0 +1,104 @@
1
+ """
2
+ Evaluation module for the GO Annotation agent.
3
+
4
+ This module implements evaluations for the GO Annotation agent using the pydantic-ai-evals framework.
5
+ """
6
+ import asyncio
7
+ import sys
8
+ from typing import Optional, Any, Dict, Callable, Awaitable
9
+
10
+ from aurelian.evaluators.model import MetadataDict, metadata
11
+ from aurelian.evaluators.substring_evaluator import SubstringEvaluator
12
+ from pydantic_evals import Case, Dataset
13
+ from pydantic_evals.evaluators import LLMJudge
14
+
15
+ from aurelian.agents.goann.goann_agent import goann_agent
16
+ from aurelian.agents.goann.goann_config import GOAnnotationDependencies, get_config
17
+
18
+ class GOAnnotationMetadata(Dict[str, Any]):
19
+ """Simple metadata dictionary for GO Annotation evaluations."""
20
+ pass
21
+
22
+ # Define individual evaluation cases
23
+ case1 = Case(
24
+ name="notch1_review",
25
+ inputs="Review GO annotations for human NOTCH1 (P46531)",
26
+ expected_output="transcription", # Should mention transcription-related functions
27
+ metadata=metadata("medium", "protein_annotation_review")
28
+ )
29
+
30
+ case2 = Case(
31
+ name="tf_coregulator_difference",
32
+ inputs="Explain the difference between DNA-binding transcription factors and coregulators",
33
+ expected_output="bind DNA", # Should mention DNA binding as key distinction
34
+ metadata=metadata("medium", "concept_explanation"),
35
+ evaluators=[
36
+ LLMJudge(
37
+ rubric="""
38
+ Answer should clearly explain:
39
+ 1. DNA-binding TFs directly interact with DNA via specific domains
40
+ 2. Coregulators modulate transcription without directly binding DNA
41
+ 3. The different functional roles of each in gene expression
42
+ """,
43
+ include_input=True
44
+ )
45
+ ]
46
+ )
47
+
48
+ case3 = Case(
49
+ name="tf_annotation_guidelines",
50
+ inputs="What are the guidelines for annotating transcription factors?",
51
+ expected_output="evidence", # Should mention evidence requirements
52
+ metadata=metadata("hard", "annotation_guideline_retrieval")
53
+ )
54
+
55
+ case4 = Case(
56
+ name="tf_annotation_check",
57
+ inputs="Check if P46531 is correctly annotated as a DNA-binding transcription factor",
58
+ expected_output="NOTCH", # Should identify it as NOTCH1
59
+ metadata=metadata("hard", "annotation_accuracy_check")
60
+ )
61
+
62
+ case5 = Case(
63
+ name="pmid_annotation_quality",
64
+ inputs="Find GO annotations from PMID:12345678 and assess their quality",
65
+ expected_output="evidence code", # Should mention evidence codes in assessment
66
+ metadata=metadata("medium", "literature_annotation_assessment")
67
+ )
68
+
69
+ case6 = Case(
70
+ name="evidence_code_reliability",
71
+ inputs="What evidence codes are most reliable for transcription factor annotations?",
72
+ expected_output="IDA", # Should mention IDA (Inferred from Direct Assay)
73
+ metadata=metadata("easy", "evidence_code_assessment")
74
+ )
75
+
76
+ def create_eval_dataset() -> Dataset[str, str, MetadataDict]:
77
+ """
78
+ Create a dataset for evaluating the GO Annotation agent.
79
+
80
+ Returns:
81
+ Dataset of GO Annotation evaluation cases with appropriate evaluators
82
+ """
83
+ # Collect all cases
84
+ cases = [case1, case2, case3, case4, case5, case6]
85
+
86
+ # Dataset-level evaluators
87
+ evaluators = [
88
+ SubstringEvaluator(),
89
+ LLMJudge(
90
+ rubric="""
91
+ Evaluate the answer based on:
92
+ 1. Accuracy in explaining GO annotation concepts and guidelines
93
+ 2. Proper use of GO terminology and evidence codes
94
+ 3. Correct assessment of annotation quality where relevant
95
+ 4. Alignment with current GO Consortium best practices
96
+ """,
97
+ model="anthropic:claude-3-7-sonnet-latest"
98
+ )
99
+ ]
100
+
101
+ return Dataset(
102
+ cases=cases,
103
+ evaluators=evaluators
104
+ )
@@ -0,0 +1,62 @@
1
+ """
2
+ Gradio interface for the GO Annotation Review agent.
3
+ """
4
+ from typing import List, Optional
5
+
6
+ import gradio as gr
7
+
8
+ from aurelian.agents.goann.goann_agent import goann_agent
9
+ from aurelian.agents.goann.goann_config import GOAnnotationDependencies, get_config
10
+ from aurelian.utils.async_utils import run_sync
11
+
12
+
13
+ def chat(deps: Optional[GOAnnotationDependencies] = None, **kwargs):
14
+ """
15
+ Initialize a chat interface for the GO Annotation Review agent.
16
+
17
+ Args:
18
+ deps: Optional dependencies configuration
19
+ **kwargs: Additional arguments to pass to the agent
20
+
21
+ Returns:
22
+ A Gradio chat interface
23
+ """
24
+ if deps is None:
25
+ deps = get_config()
26
+
27
+ def get_info(query: str, history: List[str]) -> str:
28
+ print(f"QUERY: {query}")
29
+ print(f"HISTORY: {history}")
30
+ if history:
31
+ query += "## History"
32
+ for h in history:
33
+ query += f"\n{h}"
34
+ result = run_sync(lambda: goann_agent.run_sync(query, deps=deps, **kwargs))
35
+ return result.data
36
+
37
+ return gr.ChatInterface(
38
+ fn=get_info,
39
+ type="messages",
40
+ title="GO Annotation Review Assistant",
41
+ description="I can help review GO annotations, check annotation quality, and ensure compliance with GO guidelines.",
42
+ examples=[
43
+ ["Review GO annotations for human NOTCH1 (P46531)"],
44
+ ["Explain the difference between DNA-binding transcription factors and coregulators"],
45
+ ["What are the guidelines for annotating transcription factors?"],
46
+ ["Check if P46531 is correctly annotated as a DNA-binding transcription factor"],
47
+ ["Find GO annotations from PMID:12345678 and assess their quality"],
48
+ ["What evidence codes are most reliable for transcription factor annotations?"],
49
+ ],
50
+ )
51
+
52
+
53
+ def launch_demo():
54
+ """
55
+ Launch the Gradio demo for the GO Annotation Review agent.
56
+ """
57
+ demo = chat()
58
+ demo.launch()
59
+
60
+
61
+ if __name__ == "__main__":
62
+ launch_demo()
File without changes
@@ -0,0 +1,65 @@
1
+ """
2
+ Tools for the GO Annotation Review agent.
3
+ """
4
+ from typing import List, Dict, Optional
5
+
6
+ from pydantic_ai import RunContext, ModelRetry
7
+
8
+ from aurelian.agents.goann.goann_config import GOAnnotationDependencies
9
+ from aurelian.utils.data_utils import obj_to_dict
10
+
11
+ from . import DOCUMENTS_DIR
12
+ from ...utils.documentation_manager import DocumentationManager
13
+
14
+ document_manager = DocumentationManager(documents_dir=DOCUMENTS_DIR)
15
+
16
+ async def find_gene_annotations(ctx: RunContext[GOAnnotationDependencies], gene_id: str) -> List[Dict]:
17
+ """
18
+ Find gene annotations for a given gene or gene product.
19
+
20
+ Args:
21
+ ctx: The run context
22
+ gene_id: Gene or gene product IDs. This should be a prefixed ID, consistent with AmiGO
23
+
24
+ Returns:
25
+ List[Dict]: List of gene annotations including their evidence codes
26
+ """
27
+ print(f"FIND GENE ANNOTATIONS: {gene_id}")
28
+ try:
29
+ adapter = ctx.deps.get_amigo_adapter()
30
+ normalized_gene_id = ctx.deps.get_gene_id(gene_id)
31
+ assocs = [obj_to_dict(a) for a in adapter.associations([normalized_gene_id])]
32
+
33
+ if not assocs:
34
+ raise ModelRetry(f"No gene annotations found for {gene_id}. Try a different gene identifier.")
35
+
36
+ return assocs
37
+ except Exception as e:
38
+ if "ModelRetry" in str(type(e)):
39
+ raise e
40
+ raise ModelRetry(f"Error finding gene annotations for {gene_id}: {str(e)}")
41
+
42
+
43
+ async def fetch_document(
44
+ ctx: RunContext[GOAnnotationDependencies],
45
+ name: str,
46
+ format: str = "md"
47
+ ) -> str:
48
+ """
49
+ Lookup thedocument by name.
50
+
51
+ Args:
52
+ ctx: The run context
53
+ name: The document name (e.g. "How_to_annotate_TFs")
54
+ format: The format of the document (defaults to "md")
55
+
56
+ Returns:
57
+ The content of the document
58
+ """
59
+ print(f"FETCH DOCUMENT: {name}")
60
+ try:
61
+ return document_manager.fetch_document(name)
62
+ except KeyError:
63
+ raise ModelRetry(f"Document with name '{name}' not found. Please check the name and try again.")
64
+
65
+
@@ -0,0 +1,43 @@
1
+ """
2
+ GOCAM agent module for working with Gene Ontology Causal Activity Models.
3
+ """
4
+ from pathlib import Path
5
+
6
+ THIS_DIR = Path(__file__).parent
7
+ DOCUMENTS_DIR = THIS_DIR / "documents"
8
+
9
+ # isort: skip_file
10
+ from .gocam_agent import gocam_agent # noqa: E402
11
+ from .gocam_config import GOCAMDependencies, get_config # noqa: E402
12
+ from .gocam_gradio import chat # noqa: E402
13
+ from .gocam_tools import ( # noqa: E402
14
+ search_gocams,
15
+ lookup_gocam,
16
+ lookup_uniprot_entry,
17
+ all_documents,
18
+ fetch_document,
19
+ validate_gocam_model,
20
+ )
21
+
22
+ __all__ = [
23
+ # Constants
24
+ "THIS_DIR",
25
+ "DOCUMENTS_DIR",
26
+
27
+ # Agent
28
+ "gocam_agent",
29
+ # Config
30
+ "GOCAMDependencies",
31
+ "get_config",
32
+
33
+ # Tools
34
+ "search_gocams",
35
+ "lookup_gocam",
36
+ "lookup_uniprot_entry",
37
+ "all_documents",
38
+ "fetch_document",
39
+ "validate_gocam_model",
40
+
41
+ # Gradio
42
+ "chat",
43
+ ]
@@ -0,0 +1,100 @@
1
+ Guidelines for DNA-binding transcription factor annotation in eukaryotes
2
+
3
+ ###
4
+
5
+ # Pathway Editor
6
+
7
+ ### DNA-binding transcription factor activity - Single transcription target
8
+
9
+ The activity unit for a eukaryotic DNA-binding transcription factor is:
10
+
11
+ * **MF:** 'enables'a child of DNA binding transcription factor activity, RNA polymerase, II-specific ([GO:0000981](https://www.ebi.ac.uk/QuickGO/term/GO%3A0000981)):
12
+ + DNA-binding transcription activator activity, RNA polymerase, II-specific ([GO:0001228](https://www.ebi.ac.uk/QuickGO/term/GO%3A0001228))
13
+ + DNA-binding transcription repressor activity, RNA polymerase II-specific ([GO:0001227](https://www.ebi.ac.uk/QuickGO/term/GO%3A0001227))
14
+ * **Context:**
15
+ + The relation between the DNA-binding transcription factor activity and the gene it regulates is 'has input'
16
+ * **BP: '**part of' **regulation of the BP** in which the target participates (if known)**.**
17
+ + **CC:** 'occurs in' nucleus ([GO:0005634](https://www.ebi.ac.uk/QuickGO/term/GO%3A0005634))
18
+ + The causal relation between the transcription factor activity and the activity of its target gene is: ‘indirectly positively regulates’ or 'indirectly negatively regulates’, since there are many steps between the activation of transcription and the activity of the target protein, including the production of a messenger RNA that is translated into a protein, i. e the regulator does not directly interact with the protein it regulates.
19
+
20
+ **Example single target:** [**FOXO3 regulation of G6PC1**](http://noctua.geneontology.org/workbench/noctua-visual-pathway-editor/?model_id=gomodel%3A645d887900001840)![](data:image/png;base64...)
21
+
22
+ ###
23
+
24
+ ###
25
+
26
+ ###
27
+
28
+ ### DNA-binding transcription factor activity - Multiple transcription targets
29
+
30
+ In cases where transcription factor regulates multiple target genes, a separate activity unit is captured for each transcriptional target.
31
+
32
+ **Example multiple targets:** [**FOXO3 regulation of G6PC1 and Pck1**](http://noctua.geneontology.org/workbench/noctua-visual-pathway-editor/?model_id=gomodel%3A645d887900001840)
33
+
34
+ ![](data:image/png;base64...)
35
+
36
+ ###
37
+
38
+ ### Nuclear receptors and ligand-activated transcription factors
39
+
40
+ * Nuclear receptors are positively regulated by a ligand, usually a small molecule (ChEBI).
41
+ * The activity unit for a nuclear receptor is:
42
+ + **MF**: nuclear receptor activity ([GO:0004879](https://www.ebi.ac.uk/QuickGO/GTerm?id=GO:0004879)) (a child of transcription factor activity)
43
+ + **Context:** the causal relation between the small molecule and the nuclear receptor is ‘is small activator of’.
44
+ + Other data are captured the same way as for [other transcription factors](#_fbcjfrmmj04i).
45
+
46
+ **Example:** [**Model for nuclear receptor annotation**](http://noctua.geneontology.org/workbench/noctua-visual-pathway-editor/?model_id=gomodel%3A6482692800001263)
47
+
48
+ ![](data:image/png;base64...)
49
+
50
+ ##
51
+
52
+ ## Form Editor
53
+
54
+ ### DNA-binding transcription factor activity
55
+
56
+ * **MF:** 'enables' a child of DNA binding transcription factor activity, RNA polymerase, II-specific ([GO:0000981](https://www.ebi.ac.uk/QuickGO/term/GO%3A0000981)):
57
+ + DNA-binding transcription activator activity, RNA polymerase, II-specific ([GO:0001228](https://www.ebi.ac.uk/QuickGO/term/GO%3A0001228))
58
+ + DNA-binding transcription repressor activity, RNA polymerase II-specific ([GO:0001227](https://www.ebi.ac.uk/QuickGO/term/GO%3A0001227))
59
+ * **Context:**
60
+ + The relation between the DNA-binding transcription factor activity and the gene it regulates is 'has input'. A single input is captured per activity unit.
61
+ * **regulation of transcription may be 'part of' a larger BP**, specifically, regulation of the BPin which the target participates (if known)**.**
62
+ + **CC:** 'occurs in' nucleus ([GO:0005634](https://www.ebi.ac.uk/QuickGO/term/GO%3A0005634))
63
+
64
+ **Example DNA binding transcription factor activity:** [**FOXO3 regulation of G6PC1**](http://noctua.geneontology.org/workbench/noctua-form/?model_id=gomodel%3A645d887900001840)
65
+
66
+ ![](data:image/png;base64...)
67
+
68
+ ###
69
+
70
+ ### Nuclear receptors and ligand-activated transcription factors
71
+
72
+ **Example:** [**Model for nuclear receptor annotation guidelines**](http://noctua.geneontology.org/workbench/noctua-form/?model_id=gomodel%3A6482692800001263)
73
+
74
+ The annotations are the same as for DNA binding transcription factor activity, except using the more precise MF nuclear receptor activity ([GO:0004879](https://www.ebi.ac.uk/QuickGO/GTerm?id=GO:0004879)).
75
+
76
+ ![](data:image/png;base64...)
77
+
78
+ #
79
+
80
+ # Differences between GO-CAM and standard annotation of a DNA-binding transcription factor activity
81
+
82
+ In standard annotation (captured with the Noctua Form or Protein2GO), relations between molecular functions are not captured, so there is no relation between the DNA binding transcription factor and the MF of its transcriptional target.
83
+
84
+ For nuclear receptors, the relation between the small molecule activator and the transcription factor is not captured.
85
+
86
+ # Open questions
87
+
88
+ * FORM: For nuclear receptors, the relation between the small molecule activator and the transcription factor is not captured: can we add the relation in the Form?
89
+
90
+ # Future features
91
+
92
+ Chromosomal coordinates of the promoter/enhancer/loop anchor binding site of a DNA-binding transcription factor will be captured as 'has input'. For for the human genome, the syntax is: hg38\_chr6:12334566-12335555\* if we want to capture the chromosomal region
93
+
94
+ \* https://eu.idtdna.com/pages/support/faqs/how-are-genomic-coordinates-defined
95
+
96
+ # Review information
97
+
98
+ Review date: 2023-07-20
99
+
100
+ Reviewed by: Cristina Casals, Pascale Gaudet, Patrick Masson