aurelian 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aurelian/__init__.py +9 -0
- aurelian/agents/__init__.py +0 -0
- aurelian/agents/amigo/__init__.py +3 -0
- aurelian/agents/amigo/amigo_agent.py +77 -0
- aurelian/agents/amigo/amigo_config.py +85 -0
- aurelian/agents/amigo/amigo_evals.py +73 -0
- aurelian/agents/amigo/amigo_gradio.py +52 -0
- aurelian/agents/amigo/amigo_mcp.py +152 -0
- aurelian/agents/amigo/amigo_tools.py +152 -0
- aurelian/agents/biblio/__init__.py +42 -0
- aurelian/agents/biblio/biblio_agent.py +94 -0
- aurelian/agents/biblio/biblio_config.py +40 -0
- aurelian/agents/biblio/biblio_gradio.py +67 -0
- aurelian/agents/biblio/biblio_mcp.py +115 -0
- aurelian/agents/biblio/biblio_tools.py +164 -0
- aurelian/agents/biblio_agent.py +46 -0
- aurelian/agents/checklist/__init__.py +44 -0
- aurelian/agents/checklist/checklist_agent.py +85 -0
- aurelian/agents/checklist/checklist_config.py +28 -0
- aurelian/agents/checklist/checklist_gradio.py +70 -0
- aurelian/agents/checklist/checklist_mcp.py +86 -0
- aurelian/agents/checklist/checklist_tools.py +141 -0
- aurelian/agents/checklist/content/checklists.yaml +7 -0
- aurelian/agents/checklist/content/streams.csv +136 -0
- aurelian/agents/checklist_agent.py +40 -0
- aurelian/agents/chemistry/__init__.py +3 -0
- aurelian/agents/chemistry/chemistry_agent.py +46 -0
- aurelian/agents/chemistry/chemistry_config.py +71 -0
- aurelian/agents/chemistry/chemistry_evals.py +79 -0
- aurelian/agents/chemistry/chemistry_gradio.py +50 -0
- aurelian/agents/chemistry/chemistry_mcp.py +120 -0
- aurelian/agents/chemistry/chemistry_tools.py +121 -0
- aurelian/agents/chemistry/image_agent.py +15 -0
- aurelian/agents/d4d/__init__.py +30 -0
- aurelian/agents/d4d/d4d_agent.py +72 -0
- aurelian/agents/d4d/d4d_config.py +46 -0
- aurelian/agents/d4d/d4d_gradio.py +58 -0
- aurelian/agents/d4d/d4d_mcp.py +71 -0
- aurelian/agents/d4d/d4d_tools.py +157 -0
- aurelian/agents/d4d_agent.py +64 -0
- aurelian/agents/diagnosis/__init__.py +33 -0
- aurelian/agents/diagnosis/diagnosis_agent.py +53 -0
- aurelian/agents/diagnosis/diagnosis_config.py +48 -0
- aurelian/agents/diagnosis/diagnosis_evals.py +76 -0
- aurelian/agents/diagnosis/diagnosis_gradio.py +52 -0
- aurelian/agents/diagnosis/diagnosis_mcp.py +141 -0
- aurelian/agents/diagnosis/diagnosis_tools.py +204 -0
- aurelian/agents/diagnosis_agent.py +28 -0
- aurelian/agents/draw/__init__.py +3 -0
- aurelian/agents/draw/draw_agent.py +39 -0
- aurelian/agents/draw/draw_config.py +26 -0
- aurelian/agents/draw/draw_gradio.py +50 -0
- aurelian/agents/draw/draw_mcp.py +94 -0
- aurelian/agents/draw/draw_tools.py +100 -0
- aurelian/agents/draw/judge_agent.py +18 -0
- aurelian/agents/filesystem/__init__.py +0 -0
- aurelian/agents/filesystem/filesystem_config.py +27 -0
- aurelian/agents/filesystem/filesystem_gradio.py +49 -0
- aurelian/agents/filesystem/filesystem_mcp.py +89 -0
- aurelian/agents/filesystem/filesystem_tools.py +95 -0
- aurelian/agents/filesystem/py.typed +0 -0
- aurelian/agents/github/__init__.py +0 -0
- aurelian/agents/github/github_agent.py +83 -0
- aurelian/agents/github/github_cli.py +248 -0
- aurelian/agents/github/github_config.py +22 -0
- aurelian/agents/github/github_gradio.py +152 -0
- aurelian/agents/github/github_mcp.py +252 -0
- aurelian/agents/github/github_tools.py +408 -0
- aurelian/agents/github/github_tools.py.tmp +413 -0
- aurelian/agents/goann/__init__.py +13 -0
- aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines.md +1000 -0
- aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines.pdf +0 -0
- aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines_Paper.md +693 -0
- aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines_Paper.pdf +0 -0
- aurelian/agents/goann/goann_agent.py +90 -0
- aurelian/agents/goann/goann_config.py +90 -0
- aurelian/agents/goann/goann_evals.py +104 -0
- aurelian/agents/goann/goann_gradio.py +62 -0
- aurelian/agents/goann/goann_mcp.py +0 -0
- aurelian/agents/goann/goann_tools.py +65 -0
- aurelian/agents/gocam/__init__.py +43 -0
- aurelian/agents/gocam/documents/DNA-binding transcription factor activity annotation guidelines.docx +0 -0
- aurelian/agents/gocam/documents/DNA-binding transcription factor activity annotation guidelines.pdf +0 -0
- aurelian/agents/gocam/documents/DNA-binding_transcription_factor_activity_annotation_guidelines.md +100 -0
- aurelian/agents/gocam/documents/E3 ubiquitin ligases.docx +0 -0
- aurelian/agents/gocam/documents/E3 ubiquitin ligases.pdf +0 -0
- aurelian/agents/gocam/documents/E3_ubiquitin_ligases.md +134 -0
- aurelian/agents/gocam/documents/GO-CAM annotation guidelines README.docx +0 -0
- aurelian/agents/gocam/documents/GO-CAM annotation guidelines README.pdf +0 -0
- aurelian/agents/gocam/documents/GO-CAM modelling guidelines TO DO.docx +0 -0
- aurelian/agents/gocam/documents/GO-CAM modelling guidelines TO DO.pdf +0 -0
- aurelian/agents/gocam/documents/GO-CAM_annotation_guidelines_README.md +1 -0
- aurelian/agents/gocam/documents/GO-CAM_modelling_guidelines_TO_DO.md +3 -0
- aurelian/agents/gocam/documents/How to annotate complexes in GO-CAM.docx +0 -0
- aurelian/agents/gocam/documents/How to annotate complexes in GO-CAM.pdf +0 -0
- aurelian/agents/gocam/documents/How to annotate molecular adaptors.docx +0 -0
- aurelian/agents/gocam/documents/How to annotate molecular adaptors.pdf +0 -0
- aurelian/agents/gocam/documents/How to annotate sequestering proteins.docx +0 -0
- aurelian/agents/gocam/documents/How to annotate sequestering proteins.pdf +0 -0
- aurelian/agents/gocam/documents/How_to_annotate_complexes_in_GO-CAM.md +29 -0
- aurelian/agents/gocam/documents/How_to_annotate_molecular_adaptors.md +31 -0
- aurelian/agents/gocam/documents/How_to_annotate_sequestering_proteins.md +42 -0
- aurelian/agents/gocam/documents/Molecular adaptor activity.docx +0 -0
- aurelian/agents/gocam/documents/Molecular adaptor activity.pdf +0 -0
- aurelian/agents/gocam/documents/Molecular carrier activity.docx +0 -0
- aurelian/agents/gocam/documents/Molecular carrier activity.pdf +0 -0
- aurelian/agents/gocam/documents/Molecular_adaptor_activity.md +51 -0
- aurelian/agents/gocam/documents/Molecular_carrier_activity.md +41 -0
- aurelian/agents/gocam/documents/Protein sequestering activity.docx +0 -0
- aurelian/agents/gocam/documents/Protein sequestering activity.pdf +0 -0
- aurelian/agents/gocam/documents/Protein_sequestering_activity.md +50 -0
- aurelian/agents/gocam/documents/Signaling receptor activity annotation guidelines.docx +0 -0
- aurelian/agents/gocam/documents/Signaling receptor activity annotation guidelines.pdf +0 -0
- aurelian/agents/gocam/documents/Signaling_receptor_activity_annotation_guidelines.md +187 -0
- aurelian/agents/gocam/documents/Transcription coregulator activity.docx +0 -0
- aurelian/agents/gocam/documents/Transcription coregulator activity.pdf +0 -0
- aurelian/agents/gocam/documents/Transcription_coregulator_activity.md +36 -0
- aurelian/agents/gocam/documents/Transporter activity annotation annotation guidelines.docx +0 -0
- aurelian/agents/gocam/documents/Transporter activity annotation annotation guidelines.pdf +0 -0
- aurelian/agents/gocam/documents/Transporter_activity_annotation_annotation_guidelines.md +43 -0
- Regulatory Processes in GO-CAM.docx +0 -0
- Regulatory Processes in GO-CAM.pdf +0 -0
- aurelian/agents/gocam/documents/WIP_-_Regulation_and_Regulatory_Processes_in_GO-CAM.md +31 -0
- aurelian/agents/gocam/documents/md/DNA-binding_transcription_factor_activity_annotation_guidelines.md +131 -0
- aurelian/agents/gocam/documents/md/E3_ubiquitin_ligases.md +166 -0
- aurelian/agents/gocam/documents/md/GO-CAM_annotation_guidelines_README.md +1 -0
- aurelian/agents/gocam/documents/md/GO-CAM_modelling_guidelines_TO_DO.md +5 -0
- aurelian/agents/gocam/documents/md/How_to_annotate_complexes_in_GO-CAM.md +28 -0
- aurelian/agents/gocam/documents/md/How_to_annotate_molecular_adaptors.md +19 -0
- aurelian/agents/gocam/documents/md/How_to_annotate_sequestering_proteins.md +38 -0
- aurelian/agents/gocam/documents/md/Molecular_adaptor_activity.md +52 -0
- aurelian/agents/gocam/documents/md/Molecular_carrier_activity.md +59 -0
- aurelian/agents/gocam/documents/md/Protein_sequestering_activity.md +52 -0
- aurelian/agents/gocam/documents/md/Signaling_receptor_activity_annotation_guidelines.md +271 -0
- aurelian/agents/gocam/documents/md/Transcription_coregulator_activity.md +54 -0
- aurelian/agents/gocam/documents/md/Transporter_activity_annotation_annotation_guidelines.md +38 -0
- aurelian/agents/gocam/documents/md/WIP_-_Regulation_and_Regulatory_Processes_in_GO-CAM.md +39 -0
- aurelian/agents/gocam/documents/pandoc_md/Signaling_receptor_activity_annotation_guidelines.md +334 -0
- aurelian/agents/gocam/gocam_agent.py +240 -0
- aurelian/agents/gocam/gocam_config.py +85 -0
- aurelian/agents/gocam/gocam_curator_agent.py +46 -0
- aurelian/agents/gocam/gocam_evals.py +67 -0
- aurelian/agents/gocam/gocam_gradio.py +89 -0
- aurelian/agents/gocam/gocam_mcp.py +224 -0
- aurelian/agents/gocam/gocam_tools.py +294 -0
- aurelian/agents/linkml/__init__.py +0 -0
- aurelian/agents/linkml/linkml_agent.py +62 -0
- aurelian/agents/linkml/linkml_config.py +48 -0
- aurelian/agents/linkml/linkml_evals.py +66 -0
- aurelian/agents/linkml/linkml_gradio.py +45 -0
- aurelian/agents/linkml/linkml_mcp.py +186 -0
- aurelian/agents/linkml/linkml_tools.py +102 -0
- aurelian/agents/literature/__init__.py +3 -0
- aurelian/agents/literature/literature_agent.py +55 -0
- aurelian/agents/literature/literature_config.py +35 -0
- aurelian/agents/literature/literature_gradio.py +52 -0
- aurelian/agents/literature/literature_mcp.py +174 -0
- aurelian/agents/literature/literature_tools.py +182 -0
- aurelian/agents/monarch/__init__.py +25 -0
- aurelian/agents/monarch/monarch_agent.py +44 -0
- aurelian/agents/monarch/monarch_config.py +45 -0
- aurelian/agents/monarch/monarch_gradio.py +51 -0
- aurelian/agents/monarch/monarch_mcp.py +65 -0
- aurelian/agents/monarch/monarch_tools.py +113 -0
- aurelian/agents/oak/__init__.py +0 -0
- aurelian/agents/oak/oak_config.py +27 -0
- aurelian/agents/oak/oak_gradio.py +57 -0
- aurelian/agents/ontology_mapper/__init__.py +31 -0
- aurelian/agents/ontology_mapper/ontology_mapper_agent.py +56 -0
- aurelian/agents/ontology_mapper/ontology_mapper_config.py +50 -0
- aurelian/agents/ontology_mapper/ontology_mapper_evals.py +108 -0
- aurelian/agents/ontology_mapper/ontology_mapper_gradio.py +58 -0
- aurelian/agents/ontology_mapper/ontology_mapper_mcp.py +81 -0
- aurelian/agents/ontology_mapper/ontology_mapper_tools.py +147 -0
- aurelian/agents/phenopackets/__init__.py +3 -0
- aurelian/agents/phenopackets/phenopackets_agent.py +58 -0
- aurelian/agents/phenopackets/phenopackets_config.py +72 -0
- aurelian/agents/phenopackets/phenopackets_evals.py +99 -0
- aurelian/agents/phenopackets/phenopackets_gradio.py +55 -0
- aurelian/agents/phenopackets/phenopackets_mcp.py +178 -0
- aurelian/agents/phenopackets/phenopackets_tools.py +127 -0
- aurelian/agents/rag/__init__.py +40 -0
- aurelian/agents/rag/rag_agent.py +83 -0
- aurelian/agents/rag/rag_config.py +80 -0
- aurelian/agents/rag/rag_gradio.py +67 -0
- aurelian/agents/rag/rag_mcp.py +107 -0
- aurelian/agents/rag/rag_tools.py +189 -0
- aurelian/agents/rag_agent.py +54 -0
- aurelian/agents/robot/__init__.py +0 -0
- aurelian/agents/robot/assets/__init__.py +3 -0
- aurelian/agents/robot/assets/template.md +384 -0
- aurelian/agents/robot/robot_config.py +25 -0
- aurelian/agents/robot/robot_gradio.py +46 -0
- aurelian/agents/robot/robot_mcp.py +100 -0
- aurelian/agents/robot/robot_ontology_agent.py +139 -0
- aurelian/agents/robot/robot_tools.py +50 -0
- aurelian/agents/talisman/__init__.py +3 -0
- aurelian/agents/talisman/talisman_agent.py +126 -0
- aurelian/agents/talisman/talisman_config.py +66 -0
- aurelian/agents/talisman/talisman_gradio.py +50 -0
- aurelian/agents/talisman/talisman_mcp.py +168 -0
- aurelian/agents/talisman/talisman_tools.py +720 -0
- aurelian/agents/ubergraph/__init__.py +40 -0
- aurelian/agents/ubergraph/ubergraph_agent.py +71 -0
- aurelian/agents/ubergraph/ubergraph_config.py +79 -0
- aurelian/agents/ubergraph/ubergraph_gradio.py +48 -0
- aurelian/agents/ubergraph/ubergraph_mcp.py +69 -0
- aurelian/agents/ubergraph/ubergraph_tools.py +118 -0
- aurelian/agents/uniprot/__init__.py +37 -0
- aurelian/agents/uniprot/uniprot_agent.py +43 -0
- aurelian/agents/uniprot/uniprot_config.py +43 -0
- aurelian/agents/uniprot/uniprot_evals.py +99 -0
- aurelian/agents/uniprot/uniprot_gradio.py +48 -0
- aurelian/agents/uniprot/uniprot_mcp.py +168 -0
- aurelian/agents/uniprot/uniprot_tools.py +136 -0
- aurelian/agents/web/__init__.py +0 -0
- aurelian/agents/web/web_config.py +27 -0
- aurelian/agents/web/web_gradio.py +48 -0
- aurelian/agents/web/web_mcp.py +50 -0
- aurelian/agents/web/web_tools.py +108 -0
- aurelian/chat.py +23 -0
- aurelian/cli.py +800 -0
- aurelian/dependencies/__init__.py +0 -0
- aurelian/dependencies/workdir.py +78 -0
- aurelian/mcp/__init__.py +0 -0
- aurelian/mcp/amigo_mcp_test.py +86 -0
- aurelian/mcp/config_generator.py +123 -0
- aurelian/mcp/example_config.json +43 -0
- aurelian/mcp/generate_sample_config.py +37 -0
- aurelian/mcp/gocam_mcp_test.py +126 -0
- aurelian/mcp/linkml_mcp_tools.py +190 -0
- aurelian/mcp/mcp_discovery.py +87 -0
- aurelian/mcp/mcp_test.py +31 -0
- aurelian/mcp/phenopackets_mcp_test.py +103 -0
- aurelian/tools/__init__.py +0 -0
- aurelian/tools/web/__init__.py +0 -0
- aurelian/tools/web/url_download.py +51 -0
- aurelian/utils/__init__.py +0 -0
- aurelian/utils/async_utils.py +15 -0
- aurelian/utils/data_utils.py +32 -0
- aurelian/utils/documentation_manager.py +59 -0
- aurelian/utils/doi_fetcher.py +238 -0
- aurelian/utils/ontology_utils.py +68 -0
- aurelian/utils/pdf_fetcher.py +23 -0
- aurelian/utils/process_logs.py +100 -0
- aurelian/utils/pubmed_utils.py +238 -0
- aurelian/utils/pytest_report_to_markdown.py +67 -0
- aurelian/utils/robot_ontology_utils.py +112 -0
- aurelian/utils/search_utils.py +95 -0
- aurelian-0.3.2.dist-info/LICENSE +22 -0
- aurelian-0.3.2.dist-info/METADATA +105 -0
- aurelian-0.3.2.dist-info/RECORD +254 -0
- aurelian-0.3.2.dist-info/WHEEL +4 -0
- aurelian-0.3.2.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,40 @@
|
|
1
|
+
"""
|
2
|
+
Ubergraph agent package for working with ontologies via the UberGraph endpoint.
|
3
|
+
"""
|
4
|
+
|
5
|
+
from .ubergraph_agent import (
|
6
|
+
ubergraph_agent,
|
7
|
+
ASSUMPTIONS,
|
8
|
+
add_ontology_assumptions,
|
9
|
+
add_prefixes,
|
10
|
+
)
|
11
|
+
from .ubergraph_config import Dependencies, DEFAULT_PREFIXES, get_config
|
12
|
+
from .ubergraph_gradio import chat
|
13
|
+
from .ubergraph_tools import (
|
14
|
+
query_ubergraph,
|
15
|
+
QueryResults,
|
16
|
+
simplify_value,
|
17
|
+
simplify_results,
|
18
|
+
)
|
19
|
+
|
20
|
+
__all__ = [
|
21
|
+
# Agent
|
22
|
+
"ubergraph_agent",
|
23
|
+
"ASSUMPTIONS",
|
24
|
+
"add_ontology_assumptions",
|
25
|
+
"add_prefixes",
|
26
|
+
|
27
|
+
# Config
|
28
|
+
"Dependencies",
|
29
|
+
"DEFAULT_PREFIXES",
|
30
|
+
"get_config",
|
31
|
+
|
32
|
+
# Tools
|
33
|
+
"query_ubergraph",
|
34
|
+
"QueryResults",
|
35
|
+
"simplify_value",
|
36
|
+
"simplify_results",
|
37
|
+
|
38
|
+
# Gradio
|
39
|
+
"chat",
|
40
|
+
]
|
@@ -0,0 +1,71 @@
|
|
1
|
+
"""
|
2
|
+
Agent for working with ontologies via UberGraph endpoint.
|
3
|
+
"""
|
4
|
+
from typing import Dict
|
5
|
+
|
6
|
+
from pydantic_ai import Agent, RunContext
|
7
|
+
|
8
|
+
from .ubergraph_config import Dependencies, get_config
|
9
|
+
from .ubergraph_tools import query_ubergraph
|
10
|
+
|
11
|
+
# Assumptions about the UberGraph data model
|
12
|
+
ASSUMPTIONS = {
|
13
|
+
"provenance": (
|
14
|
+
"When formulating your response to tool outputs,",
|
15
|
+
" you can extemporize with your own knowledge, but if you do so,"
|
16
|
+
" you must be clear about which statements come from the ontology"
|
17
|
+
" vs your own knowledge.",
|
18
|
+
),
|
19
|
+
"ids": "include both IDs and labels in responses, unless directed not to do so.",
|
20
|
+
"obo": "Assume OBO style ontology and OBO PURLs (http://purl.obolibrary.org/obo/).",
|
21
|
+
"rg": (
|
22
|
+
"All edges are stored as simple triples, e.g CL:0000080 BFO:0000050 UBERON:0000179"
|
23
|
+
" for 'circulating cell' 'part of' 'haemolymphatic fluid'"
|
24
|
+
),
|
25
|
+
"ont_graph": (
|
26
|
+
"Direct (asserted) edges are stored in the `renci:ontology` graph." "Use this by default, even for subClassOf."
|
27
|
+
),
|
28
|
+
"entailed": (
|
29
|
+
"Indirect (entailed) edges (including reflexive) are stored in the `renci:redundant` graph"
|
30
|
+
"Use this for queries that require transitive closure, e.g. rdfs:subClassOf+"
|
31
|
+
"Note however that other triples like rdfs:label are NOT in this graph - use renci:ontology for these."
|
32
|
+
),
|
33
|
+
"paths": "In general you should NOT use paths like rdfs:subClassOf+, use the entailed graph.",
|
34
|
+
"ro": "RO is used for predicates. Common relations include BFO:0000050 for part-of.",
|
35
|
+
"is_a": "rdfs:subClassOf is used for is_a relationships.",
|
36
|
+
"labels": "rdfs:label used for labels. IDs/URIs are typically OBO-style.",
|
37
|
+
"oboInOwl": "assume obiInOwl for synonyms, e.g. oboInOwl:hasExactSynonym.",
|
38
|
+
"blazegraph": (
|
39
|
+
"Blazegraph is used as the underlying triplestore."
|
40
|
+
"This means you SHOULD do relevance-ranked match queries over CONTAINS. "
|
41
|
+
"E.g. ?c rdfs:label ?v . ?v bds:search 'circulating cell' ; ?v bds:relevance ?score ."
|
42
|
+
),
|
43
|
+
"def": "IAO:0000115 is used for definitions.",
|
44
|
+
"xref": "assume oboInOwl:hasDbXref for simple cross-references.",
|
45
|
+
"mixed_language": "Do not assume all labels are language tagged.",
|
46
|
+
}
|
47
|
+
|
48
|
+
# Create the UberGraph agent
|
49
|
+
ubergraph_agent = Agent(
|
50
|
+
"openai:gpt-4o",
|
51
|
+
deps_type=Dependencies,
|
52
|
+
result_type=str,
|
53
|
+
)
|
54
|
+
|
55
|
+
# Register tools
|
56
|
+
ubergraph_agent.tool(query_ubergraph)
|
57
|
+
|
58
|
+
|
59
|
+
@ubergraph_agent.system_prompt
|
60
|
+
def add_ontology_assumptions(ctx: RunContext[Dependencies]) -> str:
|
61
|
+
"""Add ontology assumptions to the system prompt."""
|
62
|
+
return "\n\n" + "\n\n".join([f"Assumption: {desc}" for name, desc in ASSUMPTIONS.items()])
|
63
|
+
|
64
|
+
|
65
|
+
@ubergraph_agent.system_prompt
|
66
|
+
def add_prefixes(ctx: RunContext[Dependencies]) -> str:
|
67
|
+
"""Add SPARQL prefixes to the system prompt."""
|
68
|
+
prefixes = ctx.deps.prefixes
|
69
|
+
return "\n\nAssume the following prefixes are auto-included:" + "\n".join(
|
70
|
+
[f"\nPrefix: {prefix}: {expansion}" for prefix, expansion in prefixes.items()]
|
71
|
+
)
|
@@ -0,0 +1,79 @@
|
|
1
|
+
"""
|
2
|
+
Configuration for the Ubergraph agent.
|
3
|
+
"""
|
4
|
+
from dataclasses import dataclass, field
|
5
|
+
import os
|
6
|
+
from typing import Dict, Optional
|
7
|
+
|
8
|
+
from aurelian.dependencies.workdir import HasWorkdir, WorkDir
|
9
|
+
|
10
|
+
# Default UberGraph endpoint
|
11
|
+
UBERGRAPH_ENDPOINT = "https://ubergraph.apps.renci.org/sparql"
|
12
|
+
|
13
|
+
# Default SPARQL prefixes
|
14
|
+
DEFAULT_PREFIXES = {
|
15
|
+
"owl": "http://www.w3.org/2002/07/owl#",
|
16
|
+
"rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
|
17
|
+
"rdfs": "http://www.w3.org/2000/01/rdf-schema#",
|
18
|
+
"schema": "http://schema.org/",
|
19
|
+
"obo": "http://purl.obolibrary.org/obo/",
|
20
|
+
"xsd": "http://www.w3.org/2001/XMLSchema#",
|
21
|
+
"renci": "http://reasoner.renci.org/",
|
22
|
+
"oboInOwl": "http://www.geneontology.org/formats/oboInOwl#",
|
23
|
+
"BFO": "http://purl.obolibrary.org/obo/BFO_",
|
24
|
+
"RO": "http://purl.obolibrary.org/obo/RO_",
|
25
|
+
"GO": "http://purl.obolibrary.org/obo/GO_",
|
26
|
+
"SO": "http://purl.obolibrary.org/obo/SO_",
|
27
|
+
"CHEBI": "http://purl.obolibrary.org/obo/CHEBI_",
|
28
|
+
"CL": "http://purl.obolibrary.org/obo/CL_",
|
29
|
+
"UBERON": "http://purl.obolibrary.org/obo/UBERON_",
|
30
|
+
"IAO": "http://purl.obolibrary.org/obo/IAO_",
|
31
|
+
"OBI": "http://purl.obolibrary.org/obo/OBI_",
|
32
|
+
"biolink": "https://w3id.org/biolink/vocab/",
|
33
|
+
"bds": "http://www.bigdata.com/rdf/search#",
|
34
|
+
}
|
35
|
+
|
36
|
+
|
37
|
+
@dataclass
|
38
|
+
class Dependencies(HasWorkdir):
|
39
|
+
"""Configuration for the UberGraph agent."""
|
40
|
+
|
41
|
+
# SPARQL endpoint
|
42
|
+
endpoint: str = UBERGRAPH_ENDPOINT
|
43
|
+
|
44
|
+
# Prefixes for SPARQL queries
|
45
|
+
prefixes: Dict[str, str] = field(default_factory=lambda: DEFAULT_PREFIXES)
|
46
|
+
|
47
|
+
# Maximum number of results to return
|
48
|
+
max_results: int = 20
|
49
|
+
|
50
|
+
def __post_init__(self):
|
51
|
+
"""Initialize the config with default values."""
|
52
|
+
# HasWorkdir doesn't have a __post_init__ method, so we don't call super()
|
53
|
+
if self.workdir is None:
|
54
|
+
self.workdir = WorkDir()
|
55
|
+
|
56
|
+
|
57
|
+
def get_config(
|
58
|
+
endpoint: Optional[str] = None,
|
59
|
+
prefixes: Optional[Dict[str, str]] = None,
|
60
|
+
max_results: Optional[int] = None,
|
61
|
+
) -> Dependencies:
|
62
|
+
"""Get the UberGraph configuration from environment variables or defaults."""
|
63
|
+
# Initialize from environment or defaults
|
64
|
+
config_endpoint = endpoint or os.environ.get("UBERGRAPH_ENDPOINT", UBERGRAPH_ENDPOINT)
|
65
|
+
config_max_results = max_results or int(os.environ.get("MAX_RESULTS", "20"))
|
66
|
+
|
67
|
+
# Get workdir from environment if specified
|
68
|
+
workdir_path = os.environ.get("AURELIAN_WORKDIR", None)
|
69
|
+
workdir = WorkDir(location=workdir_path) if workdir_path else None
|
70
|
+
|
71
|
+
# Create config with the specified values
|
72
|
+
config = Dependencies(
|
73
|
+
endpoint=config_endpoint,
|
74
|
+
prefixes=prefixes or DEFAULT_PREFIXES,
|
75
|
+
max_results=config_max_results,
|
76
|
+
workdir=workdir,
|
77
|
+
)
|
78
|
+
|
79
|
+
return config
|
@@ -0,0 +1,48 @@
|
|
1
|
+
"""
|
2
|
+
Gradio interface for the UberGraph agent.
|
3
|
+
"""
|
4
|
+
import os
|
5
|
+
from typing import List, Optional
|
6
|
+
|
7
|
+
import gradio as gr
|
8
|
+
|
9
|
+
from aurelian.utils.async_utils import run_sync
|
10
|
+
from .ubergraph_agent import ubergraph_agent
|
11
|
+
from .ubergraph_config import Dependencies, get_config
|
12
|
+
|
13
|
+
|
14
|
+
def chat(deps: Optional[Dependencies] = None, **kwargs):
|
15
|
+
"""
|
16
|
+
Initialize a chat interface for the UberGraph agent.
|
17
|
+
|
18
|
+
Args:
|
19
|
+
deps: Optional dependencies configuration
|
20
|
+
**kwargs: Additional arguments to pass to the agent
|
21
|
+
|
22
|
+
Returns:
|
23
|
+
A Gradio chat interface
|
24
|
+
"""
|
25
|
+
if deps is None:
|
26
|
+
deps = get_config()
|
27
|
+
|
28
|
+
def get_info(query: str, history: List[str]) -> str:
|
29
|
+
print(f"QUERY: {query}")
|
30
|
+
print(f"HISTORY: {history}")
|
31
|
+
if history:
|
32
|
+
query += "## History"
|
33
|
+
for h in history:
|
34
|
+
query += f"\n{h}"
|
35
|
+
result = run_sync(lambda: ubergraph_agent.run_sync(query, deps=deps, **kwargs))
|
36
|
+
return result.data
|
37
|
+
|
38
|
+
return gr.ChatInterface(
|
39
|
+
fn=get_info,
|
40
|
+
type="messages",
|
41
|
+
title="UberGraph SPARQL Assistant",
|
42
|
+
examples=[
|
43
|
+
"Find all cell types that are part of the heart",
|
44
|
+
"What is the definition of CL:0000746?",
|
45
|
+
"What genes are expressed in neurons?",
|
46
|
+
"What are the subclasses of skeletal muscle tissue?",
|
47
|
+
]
|
48
|
+
)
|
@@ -0,0 +1,69 @@
|
|
1
|
+
"""
|
2
|
+
MCP tools for working with ontologies via UberGraph endpoint.
|
3
|
+
"""
|
4
|
+
import os
|
5
|
+
from typing import Dict, Optional
|
6
|
+
|
7
|
+
from mcp.server.fastmcp import FastMCP
|
8
|
+
|
9
|
+
import aurelian.agents.ubergraph.ubergraph_tools as ut
|
10
|
+
from aurelian.agents.ubergraph.ubergraph_config import Dependencies, get_config
|
11
|
+
from pydantic_ai import RunContext
|
12
|
+
|
13
|
+
# Initialize FastMCP server with combined system prompt
|
14
|
+
SYSTEM_PROMPT = """
|
15
|
+
You are an expert ontologist with access to the UberGraph SPARQL endpoint.
|
16
|
+
|
17
|
+
UberGraph is a knowledge graph built from multiple OBO ontologies, including GO, Uberon, CL, ChEBI, and more.
|
18
|
+
You can help users explore ontology terms, relationships, and hierarchies through SPARQL queries.
|
19
|
+
|
20
|
+
IMPORTANT ASSUMPTIONS:
|
21
|
+
- When formulating your response to tool outputs, you can extemporize with your own knowledge,
|
22
|
+
but if you do so, you must be clear about which statements come from the ontology vs your own knowledge.
|
23
|
+
- Include both IDs and labels in responses, unless directed not to do so.
|
24
|
+
- Assume OBO style ontology and OBO PURLs (http://purl.obolibrary.org/obo/).
|
25
|
+
- All edges are stored as simple triples, e.g CL:0000080 BFO:0000050 UBERON:0000179 for 'circulating cell'
|
26
|
+
'part of' 'haemolymphatic fluid'
|
27
|
+
- Direct (asserted) edges are stored in the 'renci:ontology' graph. Use this by default, even for subClassOf.
|
28
|
+
- Indirect (entailed) edges (including reflexive) are stored in the 'renci:redundant' graph. Use this for
|
29
|
+
queries that require transitive closure, e.g. rdfs:subClassOf+
|
30
|
+
"""
|
31
|
+
|
32
|
+
mcp = FastMCP("ubergraph", instructions=SYSTEM_PROMPT)
|
33
|
+
|
34
|
+
|
35
|
+
from aurelian.dependencies.workdir import WorkDir
|
36
|
+
|
37
|
+
def deps() -> Dependencies:
|
38
|
+
deps = get_config()
|
39
|
+
# Set the location from environment variable or default
|
40
|
+
loc = os.getenv("AURELIAN_WORKDIR", "/tmp/aurelian")
|
41
|
+
deps.workdir = WorkDir(loc)
|
42
|
+
return deps
|
43
|
+
|
44
|
+
def ctx() -> RunContext[Dependencies]:
|
45
|
+
rc: RunContext[Dependencies] = RunContext[Dependencies](
|
46
|
+
deps=deps(),
|
47
|
+
model=None, usage=None, prompt=None,
|
48
|
+
)
|
49
|
+
return rc
|
50
|
+
|
51
|
+
|
52
|
+
@mcp.tool()
|
53
|
+
async def query_ubergraph(query: str, format: Optional[str] = "text") -> Dict:
|
54
|
+
"""
|
55
|
+
Execute a SPARQL query against the UberGraph endpoint.
|
56
|
+
|
57
|
+
Args:
|
58
|
+
query: The SPARQL query to execute
|
59
|
+
format: Output format (text or json)
|
60
|
+
|
61
|
+
Returns:
|
62
|
+
The query results
|
63
|
+
"""
|
64
|
+
return await ut.query_ubergraph(ctx(), query, format)
|
65
|
+
|
66
|
+
|
67
|
+
if __name__ == "__main__":
|
68
|
+
# Initialize and run the server
|
69
|
+
mcp.run(transport='stdio')
|
@@ -0,0 +1,118 @@
|
|
1
|
+
"""
|
2
|
+
Tools for interacting with the UberGraph SPARQL endpoint.
|
3
|
+
"""
|
4
|
+
import asyncio
|
5
|
+
from typing import Any, Dict, List, Optional
|
6
|
+
|
7
|
+
from pydantic import BaseModel
|
8
|
+
from pydantic_ai import RunContext, ModelRetry
|
9
|
+
from SPARQLWrapper import JSON, SPARQLWrapper
|
10
|
+
|
11
|
+
from .ubergraph_config import Dependencies, get_config
|
12
|
+
|
13
|
+
|
14
|
+
class QueryResults(BaseModel):
|
15
|
+
"""Results of a SPARQL query."""
|
16
|
+
results: List[Dict] = []
|
17
|
+
|
18
|
+
|
19
|
+
def simplify_value(v: Dict, prefixes=None) -> Any:
|
20
|
+
"""
|
21
|
+
Simplify a SPARQL query result value.
|
22
|
+
|
23
|
+
Args:
|
24
|
+
v: The value to simplify
|
25
|
+
prefixes: Optional mapping of prefixes to expansions
|
26
|
+
|
27
|
+
Returns:
|
28
|
+
The simplified value
|
29
|
+
"""
|
30
|
+
if prefixes and v["type"] == "uri":
|
31
|
+
for prefix, expansion in prefixes.items():
|
32
|
+
if v["value"].startswith(expansion):
|
33
|
+
return f"{prefix}:{v['value'][len(expansion):]}"
|
34
|
+
return v["value"]
|
35
|
+
|
36
|
+
|
37
|
+
def simplify_results(results: Dict, prefixes=None, limit=20) -> List[Dict]:
|
38
|
+
"""
|
39
|
+
Simplify SPARQL query results.
|
40
|
+
|
41
|
+
Args:
|
42
|
+
results: The query results to simplify
|
43
|
+
prefixes: Optional mapping of prefixes to expansions
|
44
|
+
limit: Maximum number of results to return
|
45
|
+
|
46
|
+
Returns:
|
47
|
+
A list of simplified results
|
48
|
+
"""
|
49
|
+
rows = []
|
50
|
+
n = 0
|
51
|
+
for r in results["results"]["bindings"]:
|
52
|
+
n += 1
|
53
|
+
if n > limit:
|
54
|
+
break
|
55
|
+
row = {}
|
56
|
+
for k, v in r.items():
|
57
|
+
row[k] = simplify_value(v, prefixes)
|
58
|
+
rows.append(row)
|
59
|
+
return rows
|
60
|
+
|
61
|
+
|
62
|
+
async def query_ubergraph(ctx: RunContext[Dependencies], query: str) -> QueryResults:
|
63
|
+
"""
|
64
|
+
Performs a SPARQL query over Ubergraph then returns the results as triples.
|
65
|
+
|
66
|
+
Ubergraph is a triplestore that contains many OBO ontologies and precomputed
|
67
|
+
relation graph edges.
|
68
|
+
|
69
|
+
Args:
|
70
|
+
ctx: The run context
|
71
|
+
query: The SPARQL query to execute
|
72
|
+
|
73
|
+
Returns:
|
74
|
+
The query results
|
75
|
+
"""
|
76
|
+
config = ctx.deps or get_config()
|
77
|
+
prefixes = config.prefixes
|
78
|
+
endpoint = config.endpoint
|
79
|
+
|
80
|
+
# Add prefixes to query
|
81
|
+
prefixed_query = ""
|
82
|
+
for k, v in prefixes.items():
|
83
|
+
prefixed_query += f"PREFIX {k}: <{v}>\n"
|
84
|
+
prefixed_query += query
|
85
|
+
|
86
|
+
print("## Query")
|
87
|
+
print(prefixed_query)
|
88
|
+
print("##")
|
89
|
+
|
90
|
+
try:
|
91
|
+
# Create SPARQL wrapper
|
92
|
+
sw = SPARQLWrapper(endpoint)
|
93
|
+
sw.setQuery(prefixed_query)
|
94
|
+
sw.setReturnFormat(JSON)
|
95
|
+
|
96
|
+
# Execute the query in a thread pool
|
97
|
+
ret = await asyncio.to_thread(sw.queryAndConvert)
|
98
|
+
|
99
|
+
# Process the results
|
100
|
+
results = simplify_results(ret, prefixes, limit=config.max_results)
|
101
|
+
print("num results=", len(results))
|
102
|
+
print("results=", results)
|
103
|
+
|
104
|
+
if not results:
|
105
|
+
raise ModelRetry(f"No results found for SPARQL query. Try refining your query.")
|
106
|
+
|
107
|
+
return QueryResults(results=results)
|
108
|
+
except Exception as e:
|
109
|
+
if "ModelRetry" in str(type(e)):
|
110
|
+
raise e
|
111
|
+
|
112
|
+
# Handle specific SPARQL errors
|
113
|
+
if "syntax error" in str(e).lower():
|
114
|
+
raise ModelRetry(f"SPARQL syntax error: {str(e)}")
|
115
|
+
elif "time" in str(e).lower() and "out" in str(e).lower():
|
116
|
+
raise ModelRetry("Query timed out. Try to simplify your query or reduce its scope.")
|
117
|
+
else:
|
118
|
+
raise ModelRetry(f"Error executing SPARQL query: {str(e)}")
|
@@ -0,0 +1,37 @@
|
|
1
|
+
"""
|
2
|
+
UniProt agent package for interacting with the UniProt database.
|
3
|
+
"""
|
4
|
+
|
5
|
+
from .uniprot_agent import uniprot_agent, UNIPROT_SYSTEM_PROMPT
|
6
|
+
from .uniprot_config import UniprotConfig, get_config
|
7
|
+
from .uniprot_gradio import chat
|
8
|
+
from .uniprot_mcp import (
|
9
|
+
get_uniprot_mcp_tools,
|
10
|
+
get_uniprot_mcp_messages,
|
11
|
+
handle_uniprot_mcp_request,
|
12
|
+
)
|
13
|
+
from .uniprot_tools import lookup_uniprot_entry, search, uniprot_mapping, normalize_uniprot_id
|
14
|
+
|
15
|
+
__all__ = [
|
16
|
+
# Agent
|
17
|
+
"uniprot_agent",
|
18
|
+
"UNIPROT_SYSTEM_PROMPT",
|
19
|
+
|
20
|
+
# Config
|
21
|
+
"UniprotConfig",
|
22
|
+
"get_config",
|
23
|
+
|
24
|
+
# Tools
|
25
|
+
"lookup_uniprot_entry",
|
26
|
+
"search",
|
27
|
+
"uniprot_mapping",
|
28
|
+
"normalize_uniprot_id",
|
29
|
+
|
30
|
+
# Gradio
|
31
|
+
"chat",
|
32
|
+
|
33
|
+
# MCP
|
34
|
+
"get_uniprot_mcp_tools",
|
35
|
+
"get_uniprot_mcp_messages",
|
36
|
+
"handle_uniprot_mcp_request",
|
37
|
+
]
|
@@ -0,0 +1,43 @@
|
|
1
|
+
"""
|
2
|
+
Agent for working with the UniProt database and API.
|
3
|
+
"""
|
4
|
+
from pydantic_ai import Agent
|
5
|
+
|
6
|
+
from .uniprot_config import UniprotConfig, get_config
|
7
|
+
from .uniprot_tools import lookup_uniprot_entry, search, uniprot_mapping
|
8
|
+
|
9
|
+
# System prompt for the UniProt agent
|
10
|
+
UNIPROT_SYSTEM_PROMPT = """
|
11
|
+
You are a helpful assistant that specializes in accessing and interpreting information from the UniProt database.
|
12
|
+
UniProt is a comprehensive, high-quality resource of protein sequence and functional information.
|
13
|
+
|
14
|
+
You can:
|
15
|
+
- Search UniProt with queries
|
16
|
+
- Look up detailed information about specific proteins using UniProt accession numbers
|
17
|
+
- Map UniProt accessions to entries in other databases
|
18
|
+
|
19
|
+
When using protein IDs:
|
20
|
+
- UniProt accession numbers (e.g., P12345) are stable identifiers for protein entries
|
21
|
+
- Some proteins may be referenced by their entry name (e.g., ALBU_HUMAN)
|
22
|
+
- UniProt IDs may sometimes include version numbers (e.g., P12345.2) which can be normalized
|
23
|
+
|
24
|
+
When returning information about proteins, present it in a clear, organized manner with:
|
25
|
+
- Key protein attributes like name, gene, organism, and length
|
26
|
+
- Functional information including catalytic activity and pathways
|
27
|
+
- Structural information if available
|
28
|
+
- Disease associations if relevant
|
29
|
+
|
30
|
+
For search results, summarize the key findings and highlight the most relevant matches.
|
31
|
+
"""
|
32
|
+
|
33
|
+
# Create the agent with the system prompt
|
34
|
+
uniprot_agent = Agent(
|
35
|
+
model="openai:gpt-4o",
|
36
|
+
system_prompt=UNIPROT_SYSTEM_PROMPT,
|
37
|
+
deps_type=UniprotConfig,
|
38
|
+
)
|
39
|
+
|
40
|
+
# Register the tools with the agent
|
41
|
+
uniprot_agent.tool(search)
|
42
|
+
uniprot_agent.tool(lookup_uniprot_entry)
|
43
|
+
uniprot_agent.tool(uniprot_mapping)
|
@@ -0,0 +1,43 @@
|
|
1
|
+
"""
|
2
|
+
Configuration for the UniProt agent.
|
3
|
+
"""
|
4
|
+
from dataclasses import dataclass, field
|
5
|
+
import os
|
6
|
+
from typing import Any, Dict, Optional
|
7
|
+
|
8
|
+
from bioservices import UniProt
|
9
|
+
|
10
|
+
from aurelian.dependencies.workdir import HasWorkdir, WorkDir
|
11
|
+
|
12
|
+
|
13
|
+
@dataclass
|
14
|
+
class UniprotConfig(HasWorkdir):
|
15
|
+
"""Configuration for the UniProt agent."""
|
16
|
+
|
17
|
+
# Options for the bioservices UniProt client
|
18
|
+
uniprot_client_options: Dict[str, Any] = field(default_factory=dict)
|
19
|
+
|
20
|
+
def __post_init__(self):
|
21
|
+
"""Initialize the config with default values."""
|
22
|
+
# HasWorkdir doesn't have a __post_init__ method, so we don't call super()
|
23
|
+
if self.uniprot_client_options is None or len(self.uniprot_client_options) == 0:
|
24
|
+
self.uniprot_client_options = {"verbose": False}
|
25
|
+
|
26
|
+
# Initialize the workdir if not already set
|
27
|
+
if self.workdir is None:
|
28
|
+
self.workdir = WorkDir()
|
29
|
+
|
30
|
+
def get_uniprot_client(self) -> UniProt:
|
31
|
+
"""Get a configured UniProt client."""
|
32
|
+
return UniProt(**self.uniprot_client_options)
|
33
|
+
|
34
|
+
|
35
|
+
def get_config() -> UniprotConfig:
|
36
|
+
"""Get the UniProt configuration from environment variables or defaults."""
|
37
|
+
workdir_path = os.environ.get("AURELIAN_WORKDIR", None)
|
38
|
+
workdir = WorkDir(location=workdir_path) if workdir_path else None
|
39
|
+
|
40
|
+
return UniprotConfig(
|
41
|
+
workdir=workdir,
|
42
|
+
uniprot_client_options={"verbose": False}
|
43
|
+
)
|
@@ -0,0 +1,99 @@
|
|
1
|
+
"""
|
2
|
+
Evaluation module for the UniProt agent.
|
3
|
+
|
4
|
+
This module implements evaluations for the UniProt agent using the pydantic-ai-evals framework.
|
5
|
+
"""
|
6
|
+
import asyncio
|
7
|
+
import sys
|
8
|
+
from typing import Optional, Any, Dict, Callable, Awaitable
|
9
|
+
|
10
|
+
from aurelian.evaluators.model import MetadataDict, metadata
|
11
|
+
from aurelian.evaluators.substring_evaluator import SubstringEvaluator
|
12
|
+
from pydantic_evals import Case, Dataset
|
13
|
+
from pydantic_evals.evaluators import LLMJudge
|
14
|
+
|
15
|
+
from aurelian.agents.uniprot.uniprot_agent import uniprot_agent
|
16
|
+
from aurelian.agents.uniprot.uniprot_config import UniprotConfig
|
17
|
+
|
18
|
+
class UniprotMetadata(Dict[str, Any]):
|
19
|
+
"""Simple metadata dictionary for UniProt evaluations."""
|
20
|
+
pass
|
21
|
+
|
22
|
+
# Define individual evaluation cases
|
23
|
+
case1 = Case(
|
24
|
+
name="human_insulin",
|
25
|
+
inputs="Search for human insulin protein",
|
26
|
+
expected_output="P01308", # Human insulin UniProt ID
|
27
|
+
metadata=metadata("easy", "protein_search")
|
28
|
+
)
|
29
|
+
|
30
|
+
case2 = Case(
|
31
|
+
name="uniprot_entry_lookup",
|
32
|
+
inputs="Look up UniProt entry P01308",
|
33
|
+
expected_output="insulin", # Should identify this as insulin
|
34
|
+
metadata=metadata("easy", "id_lookup"),
|
35
|
+
evaluators=[
|
36
|
+
LLMJudge(
|
37
|
+
rubric="""
|
38
|
+
Answer should:
|
39
|
+
1. Correctly identify P01308 as human insulin
|
40
|
+
2. Include key information about the protein's function
|
41
|
+
3. Mention its role in glucose homeostasis
|
42
|
+
4. Provide information about protein structure
|
43
|
+
""",
|
44
|
+
include_input=True
|
45
|
+
)
|
46
|
+
]
|
47
|
+
)
|
48
|
+
|
49
|
+
case3 = Case(
|
50
|
+
name="id_mapping",
|
51
|
+
inputs="Map UniProt IDs P01308,P01009 to PDB database",
|
52
|
+
expected_output="PDB", # Should return PDB IDs
|
53
|
+
metadata=metadata("medium", "database_mapping")
|
54
|
+
)
|
55
|
+
|
56
|
+
case4 = Case(
|
57
|
+
name="domain_identification",
|
58
|
+
inputs="What domains are present in UniProt entry P53_HUMAN?",
|
59
|
+
expected_output="domain", # Should discuss protein domains
|
60
|
+
metadata=metadata("medium", "protein_domain_analysis")
|
61
|
+
)
|
62
|
+
|
63
|
+
case5 = Case(
|
64
|
+
name="disease_association",
|
65
|
+
inputs="Find all proteins related to Alzheimer's disease",
|
66
|
+
expected_output="amyloid", # Should mention amyloid proteins
|
67
|
+
metadata=metadata("hard", "disease_association_query")
|
68
|
+
)
|
69
|
+
|
70
|
+
def create_eval_dataset() -> Dataset[str, str, MetadataDict]:
|
71
|
+
"""
|
72
|
+
Create a dataset for evaluating the UniProt agent.
|
73
|
+
|
74
|
+
Returns:
|
75
|
+
Dataset of UniProt evaluation cases with appropriate evaluators
|
76
|
+
"""
|
77
|
+
# Collect all cases
|
78
|
+
cases = [case1, case2, case3, case4, case5]
|
79
|
+
|
80
|
+
# Dataset-level evaluators
|
81
|
+
evaluators = [
|
82
|
+
SubstringEvaluator(),
|
83
|
+
LLMJudge(
|
84
|
+
rubric="""
|
85
|
+
Evaluate the answer based on:
|
86
|
+
1. Accuracy of protein information provided
|
87
|
+
2. Correct identification of UniProt IDs and cross-references
|
88
|
+
3. Comprehensive coverage of protein structure and function
|
89
|
+
4. Proper description of protein domains and modifications
|
90
|
+
5. Accurate representation of protein-disease associations
|
91
|
+
""",
|
92
|
+
model="anthropic:claude-3-7-sonnet-latest"
|
93
|
+
)
|
94
|
+
]
|
95
|
+
|
96
|
+
return Dataset(
|
97
|
+
cases=cases,
|
98
|
+
evaluators=evaluators
|
99
|
+
)
|