aurelian 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aurelian/__init__.py +9 -0
- aurelian/agents/__init__.py +0 -0
- aurelian/agents/amigo/__init__.py +3 -0
- aurelian/agents/amigo/amigo_agent.py +77 -0
- aurelian/agents/amigo/amigo_config.py +85 -0
- aurelian/agents/amigo/amigo_evals.py +73 -0
- aurelian/agents/amigo/amigo_gradio.py +52 -0
- aurelian/agents/amigo/amigo_mcp.py +152 -0
- aurelian/agents/amigo/amigo_tools.py +152 -0
- aurelian/agents/biblio/__init__.py +42 -0
- aurelian/agents/biblio/biblio_agent.py +94 -0
- aurelian/agents/biblio/biblio_config.py +40 -0
- aurelian/agents/biblio/biblio_gradio.py +67 -0
- aurelian/agents/biblio/biblio_mcp.py +115 -0
- aurelian/agents/biblio/biblio_tools.py +164 -0
- aurelian/agents/biblio_agent.py +46 -0
- aurelian/agents/checklist/__init__.py +44 -0
- aurelian/agents/checklist/checklist_agent.py +85 -0
- aurelian/agents/checklist/checklist_config.py +28 -0
- aurelian/agents/checklist/checklist_gradio.py +70 -0
- aurelian/agents/checklist/checklist_mcp.py +86 -0
- aurelian/agents/checklist/checklist_tools.py +141 -0
- aurelian/agents/checklist/content/checklists.yaml +7 -0
- aurelian/agents/checklist/content/streams.csv +136 -0
- aurelian/agents/checklist_agent.py +40 -0
- aurelian/agents/chemistry/__init__.py +3 -0
- aurelian/agents/chemistry/chemistry_agent.py +46 -0
- aurelian/agents/chemistry/chemistry_config.py +71 -0
- aurelian/agents/chemistry/chemistry_evals.py +79 -0
- aurelian/agents/chemistry/chemistry_gradio.py +50 -0
- aurelian/agents/chemistry/chemistry_mcp.py +120 -0
- aurelian/agents/chemistry/chemistry_tools.py +121 -0
- aurelian/agents/chemistry/image_agent.py +15 -0
- aurelian/agents/d4d/__init__.py +30 -0
- aurelian/agents/d4d/d4d_agent.py +72 -0
- aurelian/agents/d4d/d4d_config.py +46 -0
- aurelian/agents/d4d/d4d_gradio.py +58 -0
- aurelian/agents/d4d/d4d_mcp.py +71 -0
- aurelian/agents/d4d/d4d_tools.py +157 -0
- aurelian/agents/d4d_agent.py +64 -0
- aurelian/agents/diagnosis/__init__.py +33 -0
- aurelian/agents/diagnosis/diagnosis_agent.py +53 -0
- aurelian/agents/diagnosis/diagnosis_config.py +48 -0
- aurelian/agents/diagnosis/diagnosis_evals.py +76 -0
- aurelian/agents/diagnosis/diagnosis_gradio.py +52 -0
- aurelian/agents/diagnosis/diagnosis_mcp.py +141 -0
- aurelian/agents/diagnosis/diagnosis_tools.py +204 -0
- aurelian/agents/diagnosis_agent.py +28 -0
- aurelian/agents/draw/__init__.py +3 -0
- aurelian/agents/draw/draw_agent.py +39 -0
- aurelian/agents/draw/draw_config.py +26 -0
- aurelian/agents/draw/draw_gradio.py +50 -0
- aurelian/agents/draw/draw_mcp.py +94 -0
- aurelian/agents/draw/draw_tools.py +100 -0
- aurelian/agents/draw/judge_agent.py +18 -0
- aurelian/agents/filesystem/__init__.py +0 -0
- aurelian/agents/filesystem/filesystem_config.py +27 -0
- aurelian/agents/filesystem/filesystem_gradio.py +49 -0
- aurelian/agents/filesystem/filesystem_mcp.py +89 -0
- aurelian/agents/filesystem/filesystem_tools.py +95 -0
- aurelian/agents/filesystem/py.typed +0 -0
- aurelian/agents/github/__init__.py +0 -0
- aurelian/agents/github/github_agent.py +83 -0
- aurelian/agents/github/github_cli.py +248 -0
- aurelian/agents/github/github_config.py +22 -0
- aurelian/agents/github/github_gradio.py +152 -0
- aurelian/agents/github/github_mcp.py +252 -0
- aurelian/agents/github/github_tools.py +408 -0
- aurelian/agents/github/github_tools.py.tmp +413 -0
- aurelian/agents/goann/__init__.py +13 -0
- aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines.md +1000 -0
- aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines.pdf +0 -0
- aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines_Paper.md +693 -0
- aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines_Paper.pdf +0 -0
- aurelian/agents/goann/goann_agent.py +90 -0
- aurelian/agents/goann/goann_config.py +90 -0
- aurelian/agents/goann/goann_evals.py +104 -0
- aurelian/agents/goann/goann_gradio.py +62 -0
- aurelian/agents/goann/goann_mcp.py +0 -0
- aurelian/agents/goann/goann_tools.py +65 -0
- aurelian/agents/gocam/__init__.py +43 -0
- aurelian/agents/gocam/documents/DNA-binding transcription factor activity annotation guidelines.docx +0 -0
- aurelian/agents/gocam/documents/DNA-binding transcription factor activity annotation guidelines.pdf +0 -0
- aurelian/agents/gocam/documents/DNA-binding_transcription_factor_activity_annotation_guidelines.md +100 -0
- aurelian/agents/gocam/documents/E3 ubiquitin ligases.docx +0 -0
- aurelian/agents/gocam/documents/E3 ubiquitin ligases.pdf +0 -0
- aurelian/agents/gocam/documents/E3_ubiquitin_ligases.md +134 -0
- aurelian/agents/gocam/documents/GO-CAM annotation guidelines README.docx +0 -0
- aurelian/agents/gocam/documents/GO-CAM annotation guidelines README.pdf +0 -0
- aurelian/agents/gocam/documents/GO-CAM modelling guidelines TO DO.docx +0 -0
- aurelian/agents/gocam/documents/GO-CAM modelling guidelines TO DO.pdf +0 -0
- aurelian/agents/gocam/documents/GO-CAM_annotation_guidelines_README.md +1 -0
- aurelian/agents/gocam/documents/GO-CAM_modelling_guidelines_TO_DO.md +3 -0
- aurelian/agents/gocam/documents/How to annotate complexes in GO-CAM.docx +0 -0
- aurelian/agents/gocam/documents/How to annotate complexes in GO-CAM.pdf +0 -0
- aurelian/agents/gocam/documents/How to annotate molecular adaptors.docx +0 -0
- aurelian/agents/gocam/documents/How to annotate molecular adaptors.pdf +0 -0
- aurelian/agents/gocam/documents/How to annotate sequestering proteins.docx +0 -0
- aurelian/agents/gocam/documents/How to annotate sequestering proteins.pdf +0 -0
- aurelian/agents/gocam/documents/How_to_annotate_complexes_in_GO-CAM.md +29 -0
- aurelian/agents/gocam/documents/How_to_annotate_molecular_adaptors.md +31 -0
- aurelian/agents/gocam/documents/How_to_annotate_sequestering_proteins.md +42 -0
- aurelian/agents/gocam/documents/Molecular adaptor activity.docx +0 -0
- aurelian/agents/gocam/documents/Molecular adaptor activity.pdf +0 -0
- aurelian/agents/gocam/documents/Molecular carrier activity.docx +0 -0
- aurelian/agents/gocam/documents/Molecular carrier activity.pdf +0 -0
- aurelian/agents/gocam/documents/Molecular_adaptor_activity.md +51 -0
- aurelian/agents/gocam/documents/Molecular_carrier_activity.md +41 -0
- aurelian/agents/gocam/documents/Protein sequestering activity.docx +0 -0
- aurelian/agents/gocam/documents/Protein sequestering activity.pdf +0 -0
- aurelian/agents/gocam/documents/Protein_sequestering_activity.md +50 -0
- aurelian/agents/gocam/documents/Signaling receptor activity annotation guidelines.docx +0 -0
- aurelian/agents/gocam/documents/Signaling receptor activity annotation guidelines.pdf +0 -0
- aurelian/agents/gocam/documents/Signaling_receptor_activity_annotation_guidelines.md +187 -0
- aurelian/agents/gocam/documents/Transcription coregulator activity.docx +0 -0
- aurelian/agents/gocam/documents/Transcription coregulator activity.pdf +0 -0
- aurelian/agents/gocam/documents/Transcription_coregulator_activity.md +36 -0
- aurelian/agents/gocam/documents/Transporter activity annotation annotation guidelines.docx +0 -0
- aurelian/agents/gocam/documents/Transporter activity annotation annotation guidelines.pdf +0 -0
- aurelian/agents/gocam/documents/Transporter_activity_annotation_annotation_guidelines.md +43 -0
- Regulatory Processes in GO-CAM.docx +0 -0
- Regulatory Processes in GO-CAM.pdf +0 -0
- aurelian/agents/gocam/documents/WIP_-_Regulation_and_Regulatory_Processes_in_GO-CAM.md +31 -0
- aurelian/agents/gocam/documents/md/DNA-binding_transcription_factor_activity_annotation_guidelines.md +131 -0
- aurelian/agents/gocam/documents/md/E3_ubiquitin_ligases.md +166 -0
- aurelian/agents/gocam/documents/md/GO-CAM_annotation_guidelines_README.md +1 -0
- aurelian/agents/gocam/documents/md/GO-CAM_modelling_guidelines_TO_DO.md +5 -0
- aurelian/agents/gocam/documents/md/How_to_annotate_complexes_in_GO-CAM.md +28 -0
- aurelian/agents/gocam/documents/md/How_to_annotate_molecular_adaptors.md +19 -0
- aurelian/agents/gocam/documents/md/How_to_annotate_sequestering_proteins.md +38 -0
- aurelian/agents/gocam/documents/md/Molecular_adaptor_activity.md +52 -0
- aurelian/agents/gocam/documents/md/Molecular_carrier_activity.md +59 -0
- aurelian/agents/gocam/documents/md/Protein_sequestering_activity.md +52 -0
- aurelian/agents/gocam/documents/md/Signaling_receptor_activity_annotation_guidelines.md +271 -0
- aurelian/agents/gocam/documents/md/Transcription_coregulator_activity.md +54 -0
- aurelian/agents/gocam/documents/md/Transporter_activity_annotation_annotation_guidelines.md +38 -0
- aurelian/agents/gocam/documents/md/WIP_-_Regulation_and_Regulatory_Processes_in_GO-CAM.md +39 -0
- aurelian/agents/gocam/documents/pandoc_md/Signaling_receptor_activity_annotation_guidelines.md +334 -0
- aurelian/agents/gocam/gocam_agent.py +240 -0
- aurelian/agents/gocam/gocam_config.py +85 -0
- aurelian/agents/gocam/gocam_curator_agent.py +46 -0
- aurelian/agents/gocam/gocam_evals.py +67 -0
- aurelian/agents/gocam/gocam_gradio.py +89 -0
- aurelian/agents/gocam/gocam_mcp.py +224 -0
- aurelian/agents/gocam/gocam_tools.py +294 -0
- aurelian/agents/linkml/__init__.py +0 -0
- aurelian/agents/linkml/linkml_agent.py +62 -0
- aurelian/agents/linkml/linkml_config.py +48 -0
- aurelian/agents/linkml/linkml_evals.py +66 -0
- aurelian/agents/linkml/linkml_gradio.py +45 -0
- aurelian/agents/linkml/linkml_mcp.py +186 -0
- aurelian/agents/linkml/linkml_tools.py +102 -0
- aurelian/agents/literature/__init__.py +3 -0
- aurelian/agents/literature/literature_agent.py +55 -0
- aurelian/agents/literature/literature_config.py +35 -0
- aurelian/agents/literature/literature_gradio.py +52 -0
- aurelian/agents/literature/literature_mcp.py +174 -0
- aurelian/agents/literature/literature_tools.py +182 -0
- aurelian/agents/monarch/__init__.py +25 -0
- aurelian/agents/monarch/monarch_agent.py +44 -0
- aurelian/agents/monarch/monarch_config.py +45 -0
- aurelian/agents/monarch/monarch_gradio.py +51 -0
- aurelian/agents/monarch/monarch_mcp.py +65 -0
- aurelian/agents/monarch/monarch_tools.py +113 -0
- aurelian/agents/oak/__init__.py +0 -0
- aurelian/agents/oak/oak_config.py +27 -0
- aurelian/agents/oak/oak_gradio.py +57 -0
- aurelian/agents/ontology_mapper/__init__.py +31 -0
- aurelian/agents/ontology_mapper/ontology_mapper_agent.py +56 -0
- aurelian/agents/ontology_mapper/ontology_mapper_config.py +50 -0
- aurelian/agents/ontology_mapper/ontology_mapper_evals.py +108 -0
- aurelian/agents/ontology_mapper/ontology_mapper_gradio.py +58 -0
- aurelian/agents/ontology_mapper/ontology_mapper_mcp.py +81 -0
- aurelian/agents/ontology_mapper/ontology_mapper_tools.py +147 -0
- aurelian/agents/phenopackets/__init__.py +3 -0
- aurelian/agents/phenopackets/phenopackets_agent.py +58 -0
- aurelian/agents/phenopackets/phenopackets_config.py +72 -0
- aurelian/agents/phenopackets/phenopackets_evals.py +99 -0
- aurelian/agents/phenopackets/phenopackets_gradio.py +55 -0
- aurelian/agents/phenopackets/phenopackets_mcp.py +178 -0
- aurelian/agents/phenopackets/phenopackets_tools.py +127 -0
- aurelian/agents/rag/__init__.py +40 -0
- aurelian/agents/rag/rag_agent.py +83 -0
- aurelian/agents/rag/rag_config.py +80 -0
- aurelian/agents/rag/rag_gradio.py +67 -0
- aurelian/agents/rag/rag_mcp.py +107 -0
- aurelian/agents/rag/rag_tools.py +189 -0
- aurelian/agents/rag_agent.py +54 -0
- aurelian/agents/robot/__init__.py +0 -0
- aurelian/agents/robot/assets/__init__.py +3 -0
- aurelian/agents/robot/assets/template.md +384 -0
- aurelian/agents/robot/robot_config.py +25 -0
- aurelian/agents/robot/robot_gradio.py +46 -0
- aurelian/agents/robot/robot_mcp.py +100 -0
- aurelian/agents/robot/robot_ontology_agent.py +139 -0
- aurelian/agents/robot/robot_tools.py +50 -0
- aurelian/agents/talisman/__init__.py +3 -0
- aurelian/agents/talisman/talisman_agent.py +126 -0
- aurelian/agents/talisman/talisman_config.py +66 -0
- aurelian/agents/talisman/talisman_gradio.py +50 -0
- aurelian/agents/talisman/talisman_mcp.py +168 -0
- aurelian/agents/talisman/talisman_tools.py +720 -0
- aurelian/agents/ubergraph/__init__.py +40 -0
- aurelian/agents/ubergraph/ubergraph_agent.py +71 -0
- aurelian/agents/ubergraph/ubergraph_config.py +79 -0
- aurelian/agents/ubergraph/ubergraph_gradio.py +48 -0
- aurelian/agents/ubergraph/ubergraph_mcp.py +69 -0
- aurelian/agents/ubergraph/ubergraph_tools.py +118 -0
- aurelian/agents/uniprot/__init__.py +37 -0
- aurelian/agents/uniprot/uniprot_agent.py +43 -0
- aurelian/agents/uniprot/uniprot_config.py +43 -0
- aurelian/agents/uniprot/uniprot_evals.py +99 -0
- aurelian/agents/uniprot/uniprot_gradio.py +48 -0
- aurelian/agents/uniprot/uniprot_mcp.py +168 -0
- aurelian/agents/uniprot/uniprot_tools.py +136 -0
- aurelian/agents/web/__init__.py +0 -0
- aurelian/agents/web/web_config.py +27 -0
- aurelian/agents/web/web_gradio.py +48 -0
- aurelian/agents/web/web_mcp.py +50 -0
- aurelian/agents/web/web_tools.py +108 -0
- aurelian/chat.py +23 -0
- aurelian/cli.py +800 -0
- aurelian/dependencies/__init__.py +0 -0
- aurelian/dependencies/workdir.py +78 -0
- aurelian/mcp/__init__.py +0 -0
- aurelian/mcp/amigo_mcp_test.py +86 -0
- aurelian/mcp/config_generator.py +123 -0
- aurelian/mcp/example_config.json +43 -0
- aurelian/mcp/generate_sample_config.py +37 -0
- aurelian/mcp/gocam_mcp_test.py +126 -0
- aurelian/mcp/linkml_mcp_tools.py +190 -0
- aurelian/mcp/mcp_discovery.py +87 -0
- aurelian/mcp/mcp_test.py +31 -0
- aurelian/mcp/phenopackets_mcp_test.py +103 -0
- aurelian/tools/__init__.py +0 -0
- aurelian/tools/web/__init__.py +0 -0
- aurelian/tools/web/url_download.py +51 -0
- aurelian/utils/__init__.py +0 -0
- aurelian/utils/async_utils.py +15 -0
- aurelian/utils/data_utils.py +32 -0
- aurelian/utils/documentation_manager.py +59 -0
- aurelian/utils/doi_fetcher.py +238 -0
- aurelian/utils/ontology_utils.py +68 -0
- aurelian/utils/pdf_fetcher.py +23 -0
- aurelian/utils/process_logs.py +100 -0
- aurelian/utils/pubmed_utils.py +238 -0
- aurelian/utils/pytest_report_to_markdown.py +67 -0
- aurelian/utils/robot_ontology_utils.py +112 -0
- aurelian/utils/search_utils.py +95 -0
- aurelian-0.3.2.dist-info/LICENSE +22 -0
- aurelian-0.3.2.dist-info/METADATA +105 -0
- aurelian-0.3.2.dist-info/RECORD +254 -0
- aurelian-0.3.2.dist-info/WHEEL +4 -0
- aurelian-0.3.2.dist-info/entry_points.txt +3 -0
aurelian/__init__.py
ADDED
File without changes
|
@@ -0,0 +1,77 @@
|
|
1
|
+
"""
|
2
|
+
Agent for interacting with GO KnowledgeBase via AmiGO solr endpoint.
|
3
|
+
"""
|
4
|
+
from aurelian.agents.amigo.amigo_config import AmiGODependencies
|
5
|
+
from aurelian.agents.amigo.amigo_tools import (
|
6
|
+
find_gene_associations,
|
7
|
+
find_gene_associations_for_pmid,
|
8
|
+
lookup_uniprot_entry,
|
9
|
+
uniprot_mapping
|
10
|
+
)
|
11
|
+
from aurelian.agents.literature.literature_tools import (
|
12
|
+
lookup_pmid as literature_lookup_pmid,
|
13
|
+
search_literature_web,
|
14
|
+
retrieve_literature_page
|
15
|
+
)
|
16
|
+
from pydantic_ai import Agent, Tool
|
17
|
+
|
18
|
+
SYSTEM = """
|
19
|
+
You are a biocurator that can answer questions using the Gene Ontology knowledge base via the AmiGO API.
|
20
|
+
|
21
|
+
Do not assume the knowledge base is complete or always correct. Your job is to help curators find mistakes
|
22
|
+
or missing information. A particular pervasive issue in GO is over-annotation based on phenotypes - a gene
|
23
|
+
should only be annotated to a process if it is involved in that process, i.e., if the activity of the
|
24
|
+
gene process is an identifiable step in the pathway.
|
25
|
+
|
26
|
+
You can help with:
|
27
|
+
- Finding gene associations for specific genes or gene products
|
28
|
+
- Finding gene associations cited in specific publications by PMID
|
29
|
+
- Looking up protein information via UniProt
|
30
|
+
- Mapping UniProt accessions to other databases
|
31
|
+
- Analyzing gene function and involvement in biological processes
|
32
|
+
- Identifying potential over-annotations or missing annotations
|
33
|
+
|
34
|
+
You can also use your general knowledge of genes and biological processes, and do additional searches
|
35
|
+
when needed to provide context or verification.
|
36
|
+
"""
|
37
|
+
|
38
|
+
amigo_agent = Agent(
|
39
|
+
model="openai:gpt-4o",
|
40
|
+
deps_type=AmiGODependencies,
|
41
|
+
system_prompt=SYSTEM,
|
42
|
+
tools=[
|
43
|
+
Tool(find_gene_associations),
|
44
|
+
Tool(find_gene_associations_for_pmid),
|
45
|
+
Tool(lookup_uniprot_entry),
|
46
|
+
Tool(uniprot_mapping),
|
47
|
+
Tool(literature_lookup_pmid, name="lookup_pmid",
|
48
|
+
description="""Lookup the text of a PubMed article by its PMID.
|
49
|
+
|
50
|
+
A PMID should be of the form "PMID:nnnnnnn" (no underscores).
|
51
|
+
|
52
|
+
This is useful for retrieving the full text of papers referenced in GO annotations
|
53
|
+
to verify the evidence for gene annotations or identify potential over-annotations.
|
54
|
+
|
55
|
+
Args:
|
56
|
+
pmid: The PubMed ID to look up
|
57
|
+
|
58
|
+
Returns:
|
59
|
+
str: Full text if available, otherwise abstract"""),
|
60
|
+
Tool(search_literature_web, name="search_web",
|
61
|
+
description="""Search the web using a text query.
|
62
|
+
|
63
|
+
Args:
|
64
|
+
query: The search query
|
65
|
+
|
66
|
+
Returns:
|
67
|
+
str: Search results with summaries"""),
|
68
|
+
Tool(retrieve_literature_page, name="retrieve_web_page",
|
69
|
+
description="""Fetch the contents of a web page.
|
70
|
+
|
71
|
+
Args:
|
72
|
+
url: The URL to fetch
|
73
|
+
|
74
|
+
Returns:
|
75
|
+
str: The contents of the web page""")
|
76
|
+
]
|
77
|
+
)
|
@@ -0,0 +1,85 @@
|
|
1
|
+
"""
|
2
|
+
Configuration classes for the AmiGO agent.
|
3
|
+
"""
|
4
|
+
from dataclasses import dataclass, field
|
5
|
+
|
6
|
+
from bioservices import UniProt
|
7
|
+
from oaklib import get_adapter
|
8
|
+
from oaklib.implementations import AmiGOImplementation
|
9
|
+
|
10
|
+
from aurelian.dependencies.workdir import HasWorkdir
|
11
|
+
from aurelian.agents.uniprot.uniprot_tools import normalize_uniprot_id
|
12
|
+
|
13
|
+
# Initialize UniProt service
|
14
|
+
uniprot_service = UniProt(verbose=False)
|
15
|
+
|
16
|
+
|
17
|
+
@dataclass
|
18
|
+
class AmiGODependencies(HasWorkdir):
|
19
|
+
"""
|
20
|
+
Configuration for the AmiGO agent.
|
21
|
+
|
22
|
+
Args:
|
23
|
+
taxon: NCBI Taxonomy ID, defaults to human (9606)
|
24
|
+
"""
|
25
|
+
taxon: str = field(default="9606")
|
26
|
+
|
27
|
+
def get_uniprot_service(self) -> UniProt:
|
28
|
+
"""
|
29
|
+
Get the UniProt service for protein lookups.
|
30
|
+
|
31
|
+
Returns:
|
32
|
+
UniProt: The UniProt service
|
33
|
+
"""
|
34
|
+
return uniprot_service
|
35
|
+
|
36
|
+
def get_amigo_adapter(self) -> AmiGOImplementation:
|
37
|
+
"""
|
38
|
+
Get the AmiGO adapter for the specified taxon.
|
39
|
+
|
40
|
+
Returns:
|
41
|
+
AmiGOImplementation: The OAK AmiGO adapter
|
42
|
+
"""
|
43
|
+
return get_adapter(f"amigo:NCBITaxon:{self.taxon}")
|
44
|
+
|
45
|
+
def get_gene_id(self, gene_term: str) -> str:
|
46
|
+
"""
|
47
|
+
Normalize a gene identifier.
|
48
|
+
|
49
|
+
Args:
|
50
|
+
gene_term: The gene identifier
|
51
|
+
|
52
|
+
Returns:
|
53
|
+
str: The normalized gene identifier
|
54
|
+
"""
|
55
|
+
return gene_term
|
56
|
+
|
57
|
+
|
58
|
+
def normalize_pmid(pmid: str) -> str:
|
59
|
+
"""
|
60
|
+
Normalize a PubMed ID to the format PMID:nnnnnnn.
|
61
|
+
|
62
|
+
Args:
|
63
|
+
pmid: The PubMed ID
|
64
|
+
|
65
|
+
Returns:
|
66
|
+
str: The normalized PubMed ID
|
67
|
+
"""
|
68
|
+
if ":" in pmid:
|
69
|
+
pmid = pmid.split(":", 1)[1]
|
70
|
+
if not pmid.startswith("PMID:"):
|
71
|
+
pmid = f"PMID:{pmid}"
|
72
|
+
return pmid
|
73
|
+
|
74
|
+
|
75
|
+
def get_config(taxon: str = "9606") -> AmiGODependencies:
|
76
|
+
"""
|
77
|
+
Get the AmiGO configuration.
|
78
|
+
|
79
|
+
Args:
|
80
|
+
taxon: NCBI Taxonomy ID, defaults to human (9606)
|
81
|
+
|
82
|
+
Returns:
|
83
|
+
AmiGODependencies: The AmiGO dependencies
|
84
|
+
"""
|
85
|
+
return AmiGODependencies(taxon=taxon)
|
@@ -0,0 +1,73 @@
|
|
1
|
+
"""
|
2
|
+
Evaluation module for the AmiGO agent.
|
3
|
+
|
4
|
+
This module implements evaluations for the AmiGO agent using the pydantic-ai-evals framework.
|
5
|
+
"""
|
6
|
+
import asyncio
|
7
|
+
import sys
|
8
|
+
from typing import Optional, Any, Dict, Callable, Awaitable
|
9
|
+
|
10
|
+
from aurelian.evaluators.model import MetadataDict, metadata
|
11
|
+
from aurelian.evaluators.substring_evaluator import SubstringEvaluator
|
12
|
+
from pydantic_evals import Case, Dataset
|
13
|
+
|
14
|
+
from aurelian.agents.amigo.amigo_agent import amigo_agent
|
15
|
+
from aurelian.agents.amigo.amigo_config import AmiGODependencies
|
16
|
+
|
17
|
+
class AmiGOMetadata(Dict[str, Any]):
|
18
|
+
"""Simple metadata dictionary for AmiGO evaluations."""
|
19
|
+
pass
|
20
|
+
|
21
|
+
# Define individual evaluation cases
|
22
|
+
case1 = Case(
|
23
|
+
name="uniprot_annotations",
|
24
|
+
inputs="What are some annotations for the protein UniProtKB:Q9UMS5",
|
25
|
+
expected_output="GO:", # Should contain GO terms
|
26
|
+
metadata=metadata("medium", "annotation_retrieval")
|
27
|
+
)
|
28
|
+
|
29
|
+
case2 = Case(
|
30
|
+
name="paper_overannotation",
|
31
|
+
inputs="Check PMID:19661248 for over-annotation",
|
32
|
+
expected_output="annotation", # Should evaluate the paper's annotations
|
33
|
+
metadata=metadata("hard", "literature_assessment")
|
34
|
+
)
|
35
|
+
|
36
|
+
case3 = Case(
|
37
|
+
name="pathway_genes",
|
38
|
+
inputs="What genes are involved in the ribosome biogenesis pathway?",
|
39
|
+
expected_output="ribosom", # Should mention ribosome-related genes
|
40
|
+
metadata=metadata("medium", "pathway_analysis")
|
41
|
+
)
|
42
|
+
|
43
|
+
case4 = Case(
|
44
|
+
name="database_mapping",
|
45
|
+
inputs="Map UniProtKB:P04637 to KEGG database",
|
46
|
+
expected_output="KEGG", # Should contain KEGG IDs
|
47
|
+
metadata=metadata("easy", "database_mapping")
|
48
|
+
)
|
49
|
+
|
50
|
+
case5 = Case(
|
51
|
+
name="dna_repair_genes",
|
52
|
+
inputs="Search for genes involved in DNA repair and show me their annotations",
|
53
|
+
expected_output="repair", # Should mention DNA repair annotations
|
54
|
+
metadata=metadata("medium", "gene_function_search")
|
55
|
+
)
|
56
|
+
|
57
|
+
def create_eval_dataset() -> Dataset[str, str, MetadataDict]:
|
58
|
+
"""
|
59
|
+
Create a dataset for evaluating the AmiGO agent.
|
60
|
+
|
61
|
+
Returns:
|
62
|
+
Dataset of AmiGO evaluation cases with appropriate evaluators
|
63
|
+
"""
|
64
|
+
# Collect all cases
|
65
|
+
cases = [case1, case2, case3, case4, case5]
|
66
|
+
|
67
|
+
# Dataset-level evaluators
|
68
|
+
evaluators = [SubstringEvaluator()]
|
69
|
+
|
70
|
+
return Dataset(
|
71
|
+
cases=cases,
|
72
|
+
evaluators=evaluators
|
73
|
+
)
|
@@ -0,0 +1,52 @@
|
|
1
|
+
"""
|
2
|
+
Gradio UI for the AmiGO agent.
|
3
|
+
"""
|
4
|
+
from typing import List, Optional
|
5
|
+
|
6
|
+
import gradio as gr
|
7
|
+
|
8
|
+
from aurelian.agents.amigo.amigo_agent import amigo_agent
|
9
|
+
from aurelian.agents.amigo.amigo_config import AmiGODependencies
|
10
|
+
from aurelian.utils.async_utils import run_sync
|
11
|
+
|
12
|
+
|
13
|
+
def chat(deps: Optional[AmiGODependencies] = None, taxon: Optional[str] = None, **kwargs):
|
14
|
+
"""
|
15
|
+
Initialize a chat interface for the AmiGO agent.
|
16
|
+
|
17
|
+
Args:
|
18
|
+
deps: Optional dependencies configuration
|
19
|
+
taxon: Optional NCBI Taxonomy ID, defaults to human (9606)
|
20
|
+
**kwargs: Additional arguments to pass to the agent
|
21
|
+
|
22
|
+
Returns:
|
23
|
+
A Gradio chat interface
|
24
|
+
"""
|
25
|
+
if deps is None:
|
26
|
+
deps = AmiGODependencies()
|
27
|
+
|
28
|
+
if taxon:
|
29
|
+
deps.taxon = taxon
|
30
|
+
|
31
|
+
def get_info(query: str, history: List[str]) -> str:
|
32
|
+
print(f"QUERY: {query}")
|
33
|
+
print(f"HISTORY: {history}")
|
34
|
+
if history:
|
35
|
+
query += "## History"
|
36
|
+
for h in history:
|
37
|
+
query += f"\n{h}"
|
38
|
+
result = run_sync(lambda: amigo_agent.run_sync(query, deps=deps, **kwargs))
|
39
|
+
return result.data
|
40
|
+
|
41
|
+
return gr.ChatInterface(
|
42
|
+
fn=get_info,
|
43
|
+
type="messages",
|
44
|
+
title="AmiGO AI Assistant",
|
45
|
+
examples=[
|
46
|
+
["What are some annotations for the protein UniProtKB:Q9UMS5"],
|
47
|
+
["Check PMID:19661248 for over-annotation"],
|
48
|
+
["What genes are involved in the ribosome biogenesis pathway?"],
|
49
|
+
["Map UniProtKB:P04637 to KEGG database"],
|
50
|
+
["Search for genes involved in DNA repair and show me their annotations"]
|
51
|
+
]
|
52
|
+
)
|
@@ -0,0 +1,152 @@
|
|
1
|
+
"""
|
2
|
+
MCP tools for interacting with GO KnowledgeBase via AmiGO solr endpoint.
|
3
|
+
"""
|
4
|
+
import os
|
5
|
+
from typing import Dict, List
|
6
|
+
|
7
|
+
from mcp.server.fastmcp import FastMCP
|
8
|
+
|
9
|
+
from aurelian.agents.amigo.amigo_agent import SYSTEM
|
10
|
+
import aurelian.agents.amigo.amigo_tools as at
|
11
|
+
from aurelian.agents.amigo.amigo_config import AmiGODependencies
|
12
|
+
from aurelian.agents.literature.literature_tools import (
|
13
|
+
lookup_pmid as lit_lookup_pmid,
|
14
|
+
search_literature_web,
|
15
|
+
retrieve_literature_page
|
16
|
+
)
|
17
|
+
from pydantic_ai import RunContext
|
18
|
+
|
19
|
+
# Initialize FastMCP server
|
20
|
+
mcp = FastMCP("amigo", instructions=SYSTEM)
|
21
|
+
|
22
|
+
|
23
|
+
from aurelian.dependencies.workdir import WorkDir
|
24
|
+
|
25
|
+
def deps() -> AmiGODependencies:
|
26
|
+
deps = AmiGODependencies()
|
27
|
+
# Set the location from environment variable or default
|
28
|
+
loc = os.getenv("AURELIAN_WORKDIR", "/tmp/aurelian")
|
29
|
+
deps.workdir = WorkDir(loc)
|
30
|
+
|
31
|
+
# Get taxonomy ID from environment variable if available
|
32
|
+
taxon = os.getenv("AMIGO_TAXON")
|
33
|
+
if taxon:
|
34
|
+
deps.taxon = taxon
|
35
|
+
|
36
|
+
return deps
|
37
|
+
|
38
|
+
def ctx() -> RunContext[AmiGODependencies]:
|
39
|
+
rc: RunContext[AmiGODependencies] = RunContext[AmiGODependencies](
|
40
|
+
deps=deps(),
|
41
|
+
model=None, usage=None, prompt=None,
|
42
|
+
)
|
43
|
+
return rc
|
44
|
+
|
45
|
+
|
46
|
+
@mcp.tool()
|
47
|
+
async def find_gene_associations(gene_id: str) -> List[Dict]:
|
48
|
+
"""
|
49
|
+
Find gene associations for a given gene or gene product.
|
50
|
+
|
51
|
+
Args:
|
52
|
+
gene_id: Gene or gene product IDs
|
53
|
+
|
54
|
+
Returns:
|
55
|
+
List[Dict]: List of gene associations
|
56
|
+
"""
|
57
|
+
return await at.find_gene_associations(ctx(), gene_id)
|
58
|
+
|
59
|
+
|
60
|
+
@mcp.tool()
|
61
|
+
async def find_gene_associations_for_pmid(pmid: str) -> List[Dict]:
|
62
|
+
"""
|
63
|
+
Find gene associations for a given PubMed ID.
|
64
|
+
|
65
|
+
Args:
|
66
|
+
pmid: The PubMed ID
|
67
|
+
|
68
|
+
Returns:
|
69
|
+
List[Dict]: List of gene associations for the PubMed ID
|
70
|
+
"""
|
71
|
+
return await at.find_gene_associations_for_pmid(ctx(), pmid)
|
72
|
+
|
73
|
+
|
74
|
+
@mcp.tool()
|
75
|
+
async def lookup_uniprot_entry(uniprot_acc: str) -> str:
|
76
|
+
"""
|
77
|
+
Lookup the Uniprot entry for a given Uniprot accession number.
|
78
|
+
|
79
|
+
Args:
|
80
|
+
uniprot_acc: The Uniprot accession
|
81
|
+
|
82
|
+
Returns:
|
83
|
+
str: The Uniprot entry text
|
84
|
+
"""
|
85
|
+
return await at.lookup_uniprot_entry(ctx(), uniprot_acc)
|
86
|
+
|
87
|
+
|
88
|
+
@mcp.tool()
|
89
|
+
async def uniprot_mapping(target_database: str, uniprot_accs: List[str]) -> Dict:
|
90
|
+
"""
|
91
|
+
Perform a mapping of Uniprot accessions to another database.
|
92
|
+
|
93
|
+
Args:
|
94
|
+
target_database: The target database (e.g KEGG, PDB)
|
95
|
+
uniprot_accs: The Uniprot accessions
|
96
|
+
|
97
|
+
Returns:
|
98
|
+
Dict: Mapping results
|
99
|
+
"""
|
100
|
+
return await at.uniprot_mapping(ctx(), target_database, uniprot_accs)
|
101
|
+
|
102
|
+
|
103
|
+
@mcp.tool()
|
104
|
+
async def lookup_pmid(pmid: str) -> str:
|
105
|
+
"""
|
106
|
+
Lookup the text of a PubMed article by its PMID.
|
107
|
+
|
108
|
+
A PMID should be of the form "PMID:nnnnnnn" (no underscores).
|
109
|
+
|
110
|
+
This is useful for retrieving the full text of papers referenced in GO annotations
|
111
|
+
to verify the evidence for gene annotations or identify potential over-annotations.
|
112
|
+
|
113
|
+
Args:
|
114
|
+
pmid: The PubMed ID to look up
|
115
|
+
|
116
|
+
Returns:
|
117
|
+
str: Full text if available, otherwise abstract
|
118
|
+
"""
|
119
|
+
return await lit_lookup_pmid(pmid)
|
120
|
+
|
121
|
+
|
122
|
+
@mcp.tool()
|
123
|
+
async def search_web(query: str) -> str:
|
124
|
+
"""
|
125
|
+
Search the web using a text query.
|
126
|
+
|
127
|
+
Args:
|
128
|
+
query: The search query
|
129
|
+
|
130
|
+
Returns:
|
131
|
+
str: Search results with summaries
|
132
|
+
"""
|
133
|
+
return await search_literature_web(query)
|
134
|
+
|
135
|
+
|
136
|
+
@mcp.tool()
|
137
|
+
async def retrieve_web_page(url: str) -> str:
|
138
|
+
"""
|
139
|
+
Fetch the contents of a web page.
|
140
|
+
|
141
|
+
Args:
|
142
|
+
url: The URL to fetch
|
143
|
+
|
144
|
+
Returns:
|
145
|
+
str: The contents of the web page
|
146
|
+
"""
|
147
|
+
return await retrieve_literature_page(url)
|
148
|
+
|
149
|
+
|
150
|
+
if __name__ == "__main__":
|
151
|
+
# Initialize and run the server
|
152
|
+
mcp.run(transport='stdio')
|
@@ -0,0 +1,152 @@
|
|
1
|
+
"""
|
2
|
+
Tools for the AmiGO agent.
|
3
|
+
"""
|
4
|
+
from typing import List, Dict
|
5
|
+
|
6
|
+
from pydantic_ai import RunContext, ModelRetry
|
7
|
+
|
8
|
+
from aurelian.agents.amigo.amigo_config import AmiGODependencies, normalize_pmid
|
9
|
+
from aurelian.agents.uniprot.uniprot_tools import normalize_uniprot_id
|
10
|
+
from aurelian.utils.data_utils import obj_to_dict
|
11
|
+
|
12
|
+
from oaklib.datamodels.association import Association, NegatedAssociation
|
13
|
+
from oaklib.implementations.amigo.amigo_implementation import (
|
14
|
+
DEFAULT_SELECT_FIELDS, QUALIFIER, BIOENTITY,
|
15
|
+
BIOENTITY_LABEL, map_predicate, ANNOTATION_CLASS, ANNOTATION_CLASS_LABEL,
|
16
|
+
REFERENCE, EVIDENCE_TYPE, ASSIGNED_BY,
|
17
|
+
_query as amigo_query,
|
18
|
+
_normalize
|
19
|
+
)
|
20
|
+
|
21
|
+
|
22
|
+
async def find_gene_associations(ctx: RunContext[AmiGODependencies], gene_id: str) -> List[Dict]:
|
23
|
+
"""
|
24
|
+
Find gene associations for a given gene or gene product.
|
25
|
+
|
26
|
+
Args:
|
27
|
+
ctx: The run context
|
28
|
+
gene_id: Gene or gene product IDs
|
29
|
+
|
30
|
+
Returns:
|
31
|
+
List[Dict]: List of gene associations
|
32
|
+
"""
|
33
|
+
print(f"FIND GENE ASSOCIATIONS: {gene_id}")
|
34
|
+
try:
|
35
|
+
adapter = ctx.deps.get_amigo_adapter()
|
36
|
+
normalized_gene_id = ctx.deps.get_gene_id(gene_id)
|
37
|
+
assocs = [obj_to_dict(a) for a in adapter.associations([normalized_gene_id])]
|
38
|
+
|
39
|
+
if not assocs:
|
40
|
+
raise ModelRetry(f"No gene associations found for {gene_id}. Try a different gene identifier.")
|
41
|
+
|
42
|
+
return assocs
|
43
|
+
except Exception as e:
|
44
|
+
if "ModelRetry" in str(type(e)):
|
45
|
+
raise e
|
46
|
+
raise ModelRetry(f"Error finding gene associations for {gene_id}: {str(e)}")
|
47
|
+
|
48
|
+
|
49
|
+
async def find_gene_associations_for_pmid(ctx: RunContext[AmiGODependencies], pmid: str) -> List[Dict]:
|
50
|
+
"""
|
51
|
+
Find gene associations for a given PubMed ID.
|
52
|
+
|
53
|
+
Args:
|
54
|
+
ctx: The run context
|
55
|
+
pmid: The PubMed ID
|
56
|
+
|
57
|
+
Returns:
|
58
|
+
List[Dict]: List of gene associations for the PubMed ID
|
59
|
+
"""
|
60
|
+
print(f"FIND GENE ASSOCIATIONS FOR PMID: {pmid}")
|
61
|
+
try:
|
62
|
+
normalized_pmid = normalize_pmid(pmid)
|
63
|
+
amigo = ctx.deps.get_amigo_adapter()
|
64
|
+
|
65
|
+
print(f"Lookup amigo annotations to PMID: {normalized_pmid}")
|
66
|
+
solr = amigo._solr
|
67
|
+
select_fields = DEFAULT_SELECT_FIELDS
|
68
|
+
results = amigo_query(solr, {"reference": normalized_pmid}, select_fields)
|
69
|
+
|
70
|
+
assocs = []
|
71
|
+
for doc in results:
|
72
|
+
cls = Association
|
73
|
+
quals = set(doc.get(QUALIFIER, []))
|
74
|
+
if "not" in quals:
|
75
|
+
cls = NegatedAssociation
|
76
|
+
assoc = cls(
|
77
|
+
subject=_normalize(doc[BIOENTITY]),
|
78
|
+
subject_label=doc[BIOENTITY_LABEL],
|
79
|
+
predicate=map_predicate(quals),
|
80
|
+
negated=cls == NegatedAssociation,
|
81
|
+
object=doc[ANNOTATION_CLASS],
|
82
|
+
object_label=doc[ANNOTATION_CLASS_LABEL],
|
83
|
+
publications=doc[REFERENCE],
|
84
|
+
evidence_type=doc.get(EVIDENCE_TYPE),
|
85
|
+
primary_knowledge_source=doc[ASSIGNED_BY],
|
86
|
+
aggregator_knowledge_source="infores:go",
|
87
|
+
)
|
88
|
+
assocs.append(obj_to_dict(assoc))
|
89
|
+
|
90
|
+
if not assocs:
|
91
|
+
raise ModelRetry(f"No gene associations found for PMID {pmid}. Try a different PubMed ID.")
|
92
|
+
|
93
|
+
return assocs
|
94
|
+
except Exception as e:
|
95
|
+
if "ModelRetry" in str(type(e)):
|
96
|
+
raise e
|
97
|
+
raise ModelRetry(f"Error finding gene associations for PMID {pmid}: {str(e)}")
|
98
|
+
|
99
|
+
|
100
|
+
async def lookup_uniprot_entry(ctx: RunContext[AmiGODependencies], uniprot_acc: str) -> str:
|
101
|
+
"""
|
102
|
+
Lookup the Uniprot entry for a given Uniprot accession number.
|
103
|
+
|
104
|
+
Args:
|
105
|
+
ctx: The run context
|
106
|
+
uniprot_acc: The Uniprot accession
|
107
|
+
|
108
|
+
Returns:
|
109
|
+
str: The Uniprot entry text
|
110
|
+
"""
|
111
|
+
print(f"LOOKUP UNIPROT: {uniprot_acc}")
|
112
|
+
try:
|
113
|
+
normalized_acc = normalize_uniprot_id(uniprot_acc)
|
114
|
+
uniprot_service = ctx.deps.get_uniprot_service()
|
115
|
+
result = uniprot_service.retrieve(normalized_acc, frmt="txt")
|
116
|
+
|
117
|
+
if not result or "Error" in result or "Entry not found" in result:
|
118
|
+
raise ModelRetry(f"Could not find UniProt entry for {uniprot_acc}. The accession may be incorrect.")
|
119
|
+
|
120
|
+
return result
|
121
|
+
except Exception as e:
|
122
|
+
if "ModelRetry" in str(type(e)):
|
123
|
+
raise e
|
124
|
+
raise ModelRetry(f"Error retrieving UniProt entry for {uniprot_acc}: {str(e)}")
|
125
|
+
|
126
|
+
|
127
|
+
async def uniprot_mapping(ctx: RunContext[AmiGODependencies], target_database: str, uniprot_accs: List[str]) -> Dict:
|
128
|
+
"""
|
129
|
+
Perform a mapping of Uniprot accessions to another database.
|
130
|
+
|
131
|
+
Args:
|
132
|
+
ctx: The run context
|
133
|
+
target_database: The target database (e.g KEGG, PDB)
|
134
|
+
uniprot_accs: The Uniprot accessions
|
135
|
+
|
136
|
+
Returns:
|
137
|
+
Dict: Mapping results
|
138
|
+
"""
|
139
|
+
print(f"UNIPROT MAPPING: {target_database} - {uniprot_accs}")
|
140
|
+
try:
|
141
|
+
uniprot_service = ctx.deps.get_uniprot_service()
|
142
|
+
normalized_accs = [normalize_uniprot_id(x) for x in uniprot_accs]
|
143
|
+
result = uniprot_service.mapping("UniProtKB_AC-ID", target_database, ",".join(normalized_accs))
|
144
|
+
|
145
|
+
if not result or len(result) == 0:
|
146
|
+
raise ModelRetry(f"No mappings found for {uniprot_accs} to {target_database}. Try a different database or accessions.")
|
147
|
+
|
148
|
+
return result
|
149
|
+
except Exception as e:
|
150
|
+
if "ModelRetry" in str(type(e)):
|
151
|
+
raise e
|
152
|
+
raise ModelRetry(f"Error mapping {uniprot_accs} to {target_database}: {str(e)}")
|
@@ -0,0 +1,42 @@
|
|
1
|
+
"""
|
2
|
+
Biblio agent package for working with bibliographic data and citations.
|
3
|
+
"""
|
4
|
+
|
5
|
+
# Constants
|
6
|
+
HANDLE = "mongodb://localhost:27017/biblio"
|
7
|
+
DB_NAME = "biblio"
|
8
|
+
COLLECTION_NAME = "main"
|
9
|
+
|
10
|
+
# isort: skip_file
|
11
|
+
from .biblio_agent import biblio_agent # noqa: E402
|
12
|
+
from .biblio_config import BiblioDependencies, get_config # noqa: E402
|
13
|
+
from .biblio_gradio import chat # noqa: E402
|
14
|
+
from .biblio_tools import ( # noqa: E402
|
15
|
+
search_bibliography,
|
16
|
+
lookup_pmid,
|
17
|
+
search_web,
|
18
|
+
retrieve_web_page,
|
19
|
+
)
|
20
|
+
|
21
|
+
__all__ = [
|
22
|
+
# Constants
|
23
|
+
"HANDLE",
|
24
|
+
"DB_NAME",
|
25
|
+
"COLLECTION_NAME",
|
26
|
+
|
27
|
+
# Agent
|
28
|
+
"biblio_agent",
|
29
|
+
|
30
|
+
# Config
|
31
|
+
"BiblioDependencies",
|
32
|
+
"get_config",
|
33
|
+
|
34
|
+
# Tools
|
35
|
+
"search_bibliography",
|
36
|
+
"lookup_pmid",
|
37
|
+
"search_web",
|
38
|
+
"retrieve_web_page",
|
39
|
+
|
40
|
+
# Gradio
|
41
|
+
"chat",
|
42
|
+
]
|