aurelian 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aurelian/__init__.py +9 -0
- aurelian/agents/__init__.py +0 -0
- aurelian/agents/amigo/__init__.py +3 -0
- aurelian/agents/amigo/amigo_agent.py +77 -0
- aurelian/agents/amigo/amigo_config.py +85 -0
- aurelian/agents/amigo/amigo_evals.py +73 -0
- aurelian/agents/amigo/amigo_gradio.py +52 -0
- aurelian/agents/amigo/amigo_mcp.py +152 -0
- aurelian/agents/amigo/amigo_tools.py +152 -0
- aurelian/agents/biblio/__init__.py +42 -0
- aurelian/agents/biblio/biblio_agent.py +94 -0
- aurelian/agents/biblio/biblio_config.py +40 -0
- aurelian/agents/biblio/biblio_gradio.py +67 -0
- aurelian/agents/biblio/biblio_mcp.py +115 -0
- aurelian/agents/biblio/biblio_tools.py +164 -0
- aurelian/agents/biblio_agent.py +46 -0
- aurelian/agents/checklist/__init__.py +44 -0
- aurelian/agents/checklist/checklist_agent.py +85 -0
- aurelian/agents/checklist/checklist_config.py +28 -0
- aurelian/agents/checklist/checklist_gradio.py +70 -0
- aurelian/agents/checklist/checklist_mcp.py +86 -0
- aurelian/agents/checklist/checklist_tools.py +141 -0
- aurelian/agents/checklist/content/checklists.yaml +7 -0
- aurelian/agents/checklist/content/streams.csv +136 -0
- aurelian/agents/checklist_agent.py +40 -0
- aurelian/agents/chemistry/__init__.py +3 -0
- aurelian/agents/chemistry/chemistry_agent.py +46 -0
- aurelian/agents/chemistry/chemistry_config.py +71 -0
- aurelian/agents/chemistry/chemistry_evals.py +79 -0
- aurelian/agents/chemistry/chemistry_gradio.py +50 -0
- aurelian/agents/chemistry/chemistry_mcp.py +120 -0
- aurelian/agents/chemistry/chemistry_tools.py +121 -0
- aurelian/agents/chemistry/image_agent.py +15 -0
- aurelian/agents/d4d/__init__.py +30 -0
- aurelian/agents/d4d/d4d_agent.py +72 -0
- aurelian/agents/d4d/d4d_config.py +46 -0
- aurelian/agents/d4d/d4d_gradio.py +58 -0
- aurelian/agents/d4d/d4d_mcp.py +71 -0
- aurelian/agents/d4d/d4d_tools.py +157 -0
- aurelian/agents/d4d_agent.py +64 -0
- aurelian/agents/diagnosis/__init__.py +33 -0
- aurelian/agents/diagnosis/diagnosis_agent.py +53 -0
- aurelian/agents/diagnosis/diagnosis_config.py +48 -0
- aurelian/agents/diagnosis/diagnosis_evals.py +76 -0
- aurelian/agents/diagnosis/diagnosis_gradio.py +52 -0
- aurelian/agents/diagnosis/diagnosis_mcp.py +141 -0
- aurelian/agents/diagnosis/diagnosis_tools.py +204 -0
- aurelian/agents/diagnosis_agent.py +28 -0
- aurelian/agents/draw/__init__.py +3 -0
- aurelian/agents/draw/draw_agent.py +39 -0
- aurelian/agents/draw/draw_config.py +26 -0
- aurelian/agents/draw/draw_gradio.py +50 -0
- aurelian/agents/draw/draw_mcp.py +94 -0
- aurelian/agents/draw/draw_tools.py +100 -0
- aurelian/agents/draw/judge_agent.py +18 -0
- aurelian/agents/filesystem/__init__.py +0 -0
- aurelian/agents/filesystem/filesystem_config.py +27 -0
- aurelian/agents/filesystem/filesystem_gradio.py +49 -0
- aurelian/agents/filesystem/filesystem_mcp.py +89 -0
- aurelian/agents/filesystem/filesystem_tools.py +95 -0
- aurelian/agents/filesystem/py.typed +0 -0
- aurelian/agents/github/__init__.py +0 -0
- aurelian/agents/github/github_agent.py +83 -0
- aurelian/agents/github/github_cli.py +248 -0
- aurelian/agents/github/github_config.py +22 -0
- aurelian/agents/github/github_gradio.py +152 -0
- aurelian/agents/github/github_mcp.py +252 -0
- aurelian/agents/github/github_tools.py +408 -0
- aurelian/agents/github/github_tools.py.tmp +413 -0
- aurelian/agents/goann/__init__.py +13 -0
- aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines.md +1000 -0
- aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines.pdf +0 -0
- aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines_Paper.md +693 -0
- aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines_Paper.pdf +0 -0
- aurelian/agents/goann/goann_agent.py +90 -0
- aurelian/agents/goann/goann_config.py +90 -0
- aurelian/agents/goann/goann_evals.py +104 -0
- aurelian/agents/goann/goann_gradio.py +62 -0
- aurelian/agents/goann/goann_mcp.py +0 -0
- aurelian/agents/goann/goann_tools.py +65 -0
- aurelian/agents/gocam/__init__.py +43 -0
- aurelian/agents/gocam/documents/DNA-binding transcription factor activity annotation guidelines.docx +0 -0
- aurelian/agents/gocam/documents/DNA-binding transcription factor activity annotation guidelines.pdf +0 -0
- aurelian/agents/gocam/documents/DNA-binding_transcription_factor_activity_annotation_guidelines.md +100 -0
- aurelian/agents/gocam/documents/E3 ubiquitin ligases.docx +0 -0
- aurelian/agents/gocam/documents/E3 ubiquitin ligases.pdf +0 -0
- aurelian/agents/gocam/documents/E3_ubiquitin_ligases.md +134 -0
- aurelian/agents/gocam/documents/GO-CAM annotation guidelines README.docx +0 -0
- aurelian/agents/gocam/documents/GO-CAM annotation guidelines README.pdf +0 -0
- aurelian/agents/gocam/documents/GO-CAM modelling guidelines TO DO.docx +0 -0
- aurelian/agents/gocam/documents/GO-CAM modelling guidelines TO DO.pdf +0 -0
- aurelian/agents/gocam/documents/GO-CAM_annotation_guidelines_README.md +1 -0
- aurelian/agents/gocam/documents/GO-CAM_modelling_guidelines_TO_DO.md +3 -0
- aurelian/agents/gocam/documents/How to annotate complexes in GO-CAM.docx +0 -0
- aurelian/agents/gocam/documents/How to annotate complexes in GO-CAM.pdf +0 -0
- aurelian/agents/gocam/documents/How to annotate molecular adaptors.docx +0 -0
- aurelian/agents/gocam/documents/How to annotate molecular adaptors.pdf +0 -0
- aurelian/agents/gocam/documents/How to annotate sequestering proteins.docx +0 -0
- aurelian/agents/gocam/documents/How to annotate sequestering proteins.pdf +0 -0
- aurelian/agents/gocam/documents/How_to_annotate_complexes_in_GO-CAM.md +29 -0
- aurelian/agents/gocam/documents/How_to_annotate_molecular_adaptors.md +31 -0
- aurelian/agents/gocam/documents/How_to_annotate_sequestering_proteins.md +42 -0
- aurelian/agents/gocam/documents/Molecular adaptor activity.docx +0 -0
- aurelian/agents/gocam/documents/Molecular adaptor activity.pdf +0 -0
- aurelian/agents/gocam/documents/Molecular carrier activity.docx +0 -0
- aurelian/agents/gocam/documents/Molecular carrier activity.pdf +0 -0
- aurelian/agents/gocam/documents/Molecular_adaptor_activity.md +51 -0
- aurelian/agents/gocam/documents/Molecular_carrier_activity.md +41 -0
- aurelian/agents/gocam/documents/Protein sequestering activity.docx +0 -0
- aurelian/agents/gocam/documents/Protein sequestering activity.pdf +0 -0
- aurelian/agents/gocam/documents/Protein_sequestering_activity.md +50 -0
- aurelian/agents/gocam/documents/Signaling receptor activity annotation guidelines.docx +0 -0
- aurelian/agents/gocam/documents/Signaling receptor activity annotation guidelines.pdf +0 -0
- aurelian/agents/gocam/documents/Signaling_receptor_activity_annotation_guidelines.md +187 -0
- aurelian/agents/gocam/documents/Transcription coregulator activity.docx +0 -0
- aurelian/agents/gocam/documents/Transcription coregulator activity.pdf +0 -0
- aurelian/agents/gocam/documents/Transcription_coregulator_activity.md +36 -0
- aurelian/agents/gocam/documents/Transporter activity annotation annotation guidelines.docx +0 -0
- aurelian/agents/gocam/documents/Transporter activity annotation annotation guidelines.pdf +0 -0
- aurelian/agents/gocam/documents/Transporter_activity_annotation_annotation_guidelines.md +43 -0
- Regulatory Processes in GO-CAM.docx +0 -0
- Regulatory Processes in GO-CAM.pdf +0 -0
- aurelian/agents/gocam/documents/WIP_-_Regulation_and_Regulatory_Processes_in_GO-CAM.md +31 -0
- aurelian/agents/gocam/documents/md/DNA-binding_transcription_factor_activity_annotation_guidelines.md +131 -0
- aurelian/agents/gocam/documents/md/E3_ubiquitin_ligases.md +166 -0
- aurelian/agents/gocam/documents/md/GO-CAM_annotation_guidelines_README.md +1 -0
- aurelian/agents/gocam/documents/md/GO-CAM_modelling_guidelines_TO_DO.md +5 -0
- aurelian/agents/gocam/documents/md/How_to_annotate_complexes_in_GO-CAM.md +28 -0
- aurelian/agents/gocam/documents/md/How_to_annotate_molecular_adaptors.md +19 -0
- aurelian/agents/gocam/documents/md/How_to_annotate_sequestering_proteins.md +38 -0
- aurelian/agents/gocam/documents/md/Molecular_adaptor_activity.md +52 -0
- aurelian/agents/gocam/documents/md/Molecular_carrier_activity.md +59 -0
- aurelian/agents/gocam/documents/md/Protein_sequestering_activity.md +52 -0
- aurelian/agents/gocam/documents/md/Signaling_receptor_activity_annotation_guidelines.md +271 -0
- aurelian/agents/gocam/documents/md/Transcription_coregulator_activity.md +54 -0
- aurelian/agents/gocam/documents/md/Transporter_activity_annotation_annotation_guidelines.md +38 -0
- aurelian/agents/gocam/documents/md/WIP_-_Regulation_and_Regulatory_Processes_in_GO-CAM.md +39 -0
- aurelian/agents/gocam/documents/pandoc_md/Signaling_receptor_activity_annotation_guidelines.md +334 -0
- aurelian/agents/gocam/gocam_agent.py +240 -0
- aurelian/agents/gocam/gocam_config.py +85 -0
- aurelian/agents/gocam/gocam_curator_agent.py +46 -0
- aurelian/agents/gocam/gocam_evals.py +67 -0
- aurelian/agents/gocam/gocam_gradio.py +89 -0
- aurelian/agents/gocam/gocam_mcp.py +224 -0
- aurelian/agents/gocam/gocam_tools.py +294 -0
- aurelian/agents/linkml/__init__.py +0 -0
- aurelian/agents/linkml/linkml_agent.py +62 -0
- aurelian/agents/linkml/linkml_config.py +48 -0
- aurelian/agents/linkml/linkml_evals.py +66 -0
- aurelian/agents/linkml/linkml_gradio.py +45 -0
- aurelian/agents/linkml/linkml_mcp.py +186 -0
- aurelian/agents/linkml/linkml_tools.py +102 -0
- aurelian/agents/literature/__init__.py +3 -0
- aurelian/agents/literature/literature_agent.py +55 -0
- aurelian/agents/literature/literature_config.py +35 -0
- aurelian/agents/literature/literature_gradio.py +52 -0
- aurelian/agents/literature/literature_mcp.py +174 -0
- aurelian/agents/literature/literature_tools.py +182 -0
- aurelian/agents/monarch/__init__.py +25 -0
- aurelian/agents/monarch/monarch_agent.py +44 -0
- aurelian/agents/monarch/monarch_config.py +45 -0
- aurelian/agents/monarch/monarch_gradio.py +51 -0
- aurelian/agents/monarch/monarch_mcp.py +65 -0
- aurelian/agents/monarch/monarch_tools.py +113 -0
- aurelian/agents/oak/__init__.py +0 -0
- aurelian/agents/oak/oak_config.py +27 -0
- aurelian/agents/oak/oak_gradio.py +57 -0
- aurelian/agents/ontology_mapper/__init__.py +31 -0
- aurelian/agents/ontology_mapper/ontology_mapper_agent.py +56 -0
- aurelian/agents/ontology_mapper/ontology_mapper_config.py +50 -0
- aurelian/agents/ontology_mapper/ontology_mapper_evals.py +108 -0
- aurelian/agents/ontology_mapper/ontology_mapper_gradio.py +58 -0
- aurelian/agents/ontology_mapper/ontology_mapper_mcp.py +81 -0
- aurelian/agents/ontology_mapper/ontology_mapper_tools.py +147 -0
- aurelian/agents/phenopackets/__init__.py +3 -0
- aurelian/agents/phenopackets/phenopackets_agent.py +58 -0
- aurelian/agents/phenopackets/phenopackets_config.py +72 -0
- aurelian/agents/phenopackets/phenopackets_evals.py +99 -0
- aurelian/agents/phenopackets/phenopackets_gradio.py +55 -0
- aurelian/agents/phenopackets/phenopackets_mcp.py +178 -0
- aurelian/agents/phenopackets/phenopackets_tools.py +127 -0
- aurelian/agents/rag/__init__.py +40 -0
- aurelian/agents/rag/rag_agent.py +83 -0
- aurelian/agents/rag/rag_config.py +80 -0
- aurelian/agents/rag/rag_gradio.py +67 -0
- aurelian/agents/rag/rag_mcp.py +107 -0
- aurelian/agents/rag/rag_tools.py +189 -0
- aurelian/agents/rag_agent.py +54 -0
- aurelian/agents/robot/__init__.py +0 -0
- aurelian/agents/robot/assets/__init__.py +3 -0
- aurelian/agents/robot/assets/template.md +384 -0
- aurelian/agents/robot/robot_config.py +25 -0
- aurelian/agents/robot/robot_gradio.py +46 -0
- aurelian/agents/robot/robot_mcp.py +100 -0
- aurelian/agents/robot/robot_ontology_agent.py +139 -0
- aurelian/agents/robot/robot_tools.py +50 -0
- aurelian/agents/talisman/__init__.py +3 -0
- aurelian/agents/talisman/talisman_agent.py +126 -0
- aurelian/agents/talisman/talisman_config.py +66 -0
- aurelian/agents/talisman/talisman_gradio.py +50 -0
- aurelian/agents/talisman/talisman_mcp.py +168 -0
- aurelian/agents/talisman/talisman_tools.py +720 -0
- aurelian/agents/ubergraph/__init__.py +40 -0
- aurelian/agents/ubergraph/ubergraph_agent.py +71 -0
- aurelian/agents/ubergraph/ubergraph_config.py +79 -0
- aurelian/agents/ubergraph/ubergraph_gradio.py +48 -0
- aurelian/agents/ubergraph/ubergraph_mcp.py +69 -0
- aurelian/agents/ubergraph/ubergraph_tools.py +118 -0
- aurelian/agents/uniprot/__init__.py +37 -0
- aurelian/agents/uniprot/uniprot_agent.py +43 -0
- aurelian/agents/uniprot/uniprot_config.py +43 -0
- aurelian/agents/uniprot/uniprot_evals.py +99 -0
- aurelian/agents/uniprot/uniprot_gradio.py +48 -0
- aurelian/agents/uniprot/uniprot_mcp.py +168 -0
- aurelian/agents/uniprot/uniprot_tools.py +136 -0
- aurelian/agents/web/__init__.py +0 -0
- aurelian/agents/web/web_config.py +27 -0
- aurelian/agents/web/web_gradio.py +48 -0
- aurelian/agents/web/web_mcp.py +50 -0
- aurelian/agents/web/web_tools.py +108 -0
- aurelian/chat.py +23 -0
- aurelian/cli.py +800 -0
- aurelian/dependencies/__init__.py +0 -0
- aurelian/dependencies/workdir.py +78 -0
- aurelian/mcp/__init__.py +0 -0
- aurelian/mcp/amigo_mcp_test.py +86 -0
- aurelian/mcp/config_generator.py +123 -0
- aurelian/mcp/example_config.json +43 -0
- aurelian/mcp/generate_sample_config.py +37 -0
- aurelian/mcp/gocam_mcp_test.py +126 -0
- aurelian/mcp/linkml_mcp_tools.py +190 -0
- aurelian/mcp/mcp_discovery.py +87 -0
- aurelian/mcp/mcp_test.py +31 -0
- aurelian/mcp/phenopackets_mcp_test.py +103 -0
- aurelian/tools/__init__.py +0 -0
- aurelian/tools/web/__init__.py +0 -0
- aurelian/tools/web/url_download.py +51 -0
- aurelian/utils/__init__.py +0 -0
- aurelian/utils/async_utils.py +15 -0
- aurelian/utils/data_utils.py +32 -0
- aurelian/utils/documentation_manager.py +59 -0
- aurelian/utils/doi_fetcher.py +238 -0
- aurelian/utils/ontology_utils.py +68 -0
- aurelian/utils/pdf_fetcher.py +23 -0
- aurelian/utils/process_logs.py +100 -0
- aurelian/utils/pubmed_utils.py +238 -0
- aurelian/utils/pytest_report_to_markdown.py +67 -0
- aurelian/utils/robot_ontology_utils.py +112 -0
- aurelian/utils/search_utils.py +95 -0
- aurelian-0.3.2.dist-info/LICENSE +22 -0
- aurelian-0.3.2.dist-info/METADATA +105 -0
- aurelian-0.3.2.dist-info/RECORD +254 -0
- aurelian-0.3.2.dist-info/WHEEL +4 -0
- aurelian-0.3.2.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,71 @@
|
|
1
|
+
"""
|
2
|
+
Configuration classes for the chemistry agent.
|
3
|
+
"""
|
4
|
+
from dataclasses import dataclass, field
|
5
|
+
from typing import Optional
|
6
|
+
|
7
|
+
from pydantic import BaseModel
|
8
|
+
from aurelian.dependencies.workdir import HasWorkdir
|
9
|
+
|
10
|
+
|
11
|
+
class ChemicalStructure(BaseModel):
|
12
|
+
"""
|
13
|
+
Model for representing chemical structures.
|
14
|
+
"""
|
15
|
+
chebi_id: Optional[str] = None
|
16
|
+
smiles: Optional[str] = None
|
17
|
+
inchi: Optional[str] = None
|
18
|
+
name: Optional[str] = None
|
19
|
+
|
20
|
+
@property
|
21
|
+
def chebi_local_id(self) -> Optional[str]:
|
22
|
+
if self.chebi_id:
|
23
|
+
return self.chebi_id.split(":")[1]
|
24
|
+
return None
|
25
|
+
|
26
|
+
@property
|
27
|
+
def chebi_image_url(self) -> str:
|
28
|
+
local_id = self.chebi_local_id
|
29
|
+
if local_id:
|
30
|
+
return f"https://www.ebi.ac.uk/chebi/displayImage.do?defaultImage=true&imageIndex=0&chebiId={local_id}"
|
31
|
+
return ""
|
32
|
+
|
33
|
+
@classmethod
|
34
|
+
def from_id(cls, id: str) -> 'ChemicalStructure':
|
35
|
+
if ":" in id:
|
36
|
+
prefix, local_id = id.split(":")
|
37
|
+
if prefix.lower() != "chebi":
|
38
|
+
raise ValueError(f"Invalid prefix: {prefix}")
|
39
|
+
id = "CHEBI:" + local_id
|
40
|
+
else:
|
41
|
+
id = "CHEBI:" + id
|
42
|
+
return cls(chebi_id=id)
|
43
|
+
|
44
|
+
@classmethod
|
45
|
+
def from_anything(cls, id: str) -> 'ChemicalStructure':
|
46
|
+
if ":" in id:
|
47
|
+
return cls.from_id(id)
|
48
|
+
# check if valid smiles
|
49
|
+
from rdkit import Chem
|
50
|
+
mol = Chem.MolFromSmiles(id)
|
51
|
+
if mol:
|
52
|
+
return cls(smiles=id)
|
53
|
+
raise ValueError(f"Invalid identifier: {id}")
|
54
|
+
|
55
|
+
|
56
|
+
@dataclass
|
57
|
+
class ChemistryDependencies(HasWorkdir):
|
58
|
+
"""
|
59
|
+
Configuration for the chemistry agent.
|
60
|
+
"""
|
61
|
+
max_search_results: int = 30
|
62
|
+
|
63
|
+
|
64
|
+
def get_config() -> ChemistryDependencies:
|
65
|
+
"""
|
66
|
+
Get the Chemistry agent configuration.
|
67
|
+
|
68
|
+
Returns:
|
69
|
+
ChemistryDependencies: The chemistry dependencies
|
70
|
+
"""
|
71
|
+
return ChemistryDependencies()
|
@@ -0,0 +1,79 @@
|
|
1
|
+
"""
|
2
|
+
Evaluation module for the Chemistry agent.
|
3
|
+
|
4
|
+
This module implements evaluations for the Chemistry agent using the pydantic-ai-evals framework.
|
5
|
+
"""
|
6
|
+
import asyncio
|
7
|
+
import sys
|
8
|
+
from typing import Optional, Any, Dict, Callable, Awaitable
|
9
|
+
|
10
|
+
from aurelian.evaluators.model import MetadataDict, metadata
|
11
|
+
from aurelian.evaluators.substring_evaluator import SubstringEvaluator
|
12
|
+
from pydantic_evals import Case, Dataset
|
13
|
+
from pydantic_evals.evaluators import LLMJudge
|
14
|
+
|
15
|
+
from aurelian.agents.chemistry.chemistry_agent import chemistry_agent
|
16
|
+
from aurelian.agents.chemistry.chemistry_config import ChemistryDependencies
|
17
|
+
|
18
|
+
class ChemistryMetadata(Dict[str, Any]):
|
19
|
+
"""Simple metadata dictionary for Chemistry evaluations."""
|
20
|
+
pass
|
21
|
+
|
22
|
+
# Define individual evaluation cases
|
23
|
+
case1 = Case(
|
24
|
+
name="caffeine_structure",
|
25
|
+
inputs="Explain the structure of caffeine (CHEBI:27732)",
|
26
|
+
expected_output="methylxanthine", # Should mention methylxanthine structure
|
27
|
+
metadata=metadata("medium", "structure_explanation")
|
28
|
+
)
|
29
|
+
|
30
|
+
case2 = Case(
|
31
|
+
name="aspirin_properties",
|
32
|
+
inputs="What does the structure of aspirin (CHEBI:15365) tell us about its properties?",
|
33
|
+
expected_output="acetyl", # Should mention acetyl group
|
34
|
+
metadata=metadata("medium", "structure_property_relationship"),
|
35
|
+
evaluators=[
|
36
|
+
LLMJudge(
|
37
|
+
rubric="Answer should explain how the acetyl group affects aspirin's properties and mention its action as a COX inhibitor",
|
38
|
+
include_input=True
|
39
|
+
)
|
40
|
+
]
|
41
|
+
)
|
42
|
+
|
43
|
+
case3 = Case(
|
44
|
+
name="smiles_interpretation",
|
45
|
+
inputs="Interpret this SMILES: CC(=O)OC1=CC=CC=C1C(=O)O",
|
46
|
+
expected_output="aspirin", # This is aspirin
|
47
|
+
metadata=metadata("hard", "smiles_interpretation")
|
48
|
+
)
|
49
|
+
|
50
|
+
case4 = Case(
|
51
|
+
name="functional_groups",
|
52
|
+
inputs="Identify all functional groups in paracetamol (CHEBI:46195)",
|
53
|
+
expected_output="amide", # Should identify the amide group
|
54
|
+
metadata=metadata("medium", "functional_group_identification")
|
55
|
+
)
|
56
|
+
|
57
|
+
def create_eval_dataset() -> Dataset[str, str, MetadataDict]:
|
58
|
+
"""
|
59
|
+
Create a dataset for evaluating the Chemistry agent.
|
60
|
+
|
61
|
+
Returns:
|
62
|
+
Dataset of Chemistry evaluation cases with appropriate evaluators
|
63
|
+
"""
|
64
|
+
# Collect all cases
|
65
|
+
cases = [case1, case2, case3, case4]
|
66
|
+
|
67
|
+
# Dataset-level evaluators
|
68
|
+
evaluators = [
|
69
|
+
SubstringEvaluator(),
|
70
|
+
LLMJudge(
|
71
|
+
rubric="Answer should be scientifically accurate and use proper chemistry terminology",
|
72
|
+
model="anthropic:claude-3-7-sonnet-latest"
|
73
|
+
)
|
74
|
+
]
|
75
|
+
|
76
|
+
return Dataset(
|
77
|
+
cases=cases,
|
78
|
+
evaluators=evaluators
|
79
|
+
)
|
@@ -0,0 +1,50 @@
|
|
1
|
+
"""
|
2
|
+
Gradio UI for the chemistry agent.
|
3
|
+
"""
|
4
|
+
from typing import List, Optional
|
5
|
+
|
6
|
+
import gradio as gr
|
7
|
+
|
8
|
+
from aurelian.agents.chemistry.chemistry_agent import chemistry_agent
|
9
|
+
from aurelian.agents.chemistry.chemistry_config import ChemistryDependencies
|
10
|
+
from aurelian.utils.async_utils import run_sync
|
11
|
+
|
12
|
+
|
13
|
+
def chat(deps: Optional[ChemistryDependencies] = None, workdir: str = None, **kwargs):
|
14
|
+
"""
|
15
|
+
Initialize a chat interface for the chemistry agent.
|
16
|
+
|
17
|
+
Args:
|
18
|
+
deps: Optional dependencies configuration
|
19
|
+
workdir: Optional working directory path
|
20
|
+
**kwargs: Additional arguments to pass to the agent
|
21
|
+
|
22
|
+
Returns:
|
23
|
+
A Gradio chat interface
|
24
|
+
"""
|
25
|
+
if deps is None:
|
26
|
+
deps = ChemistryDependencies()
|
27
|
+
|
28
|
+
if workdir:
|
29
|
+
deps.workdir.location = workdir
|
30
|
+
|
31
|
+
def get_info(query: str, history: List[str]) -> str:
|
32
|
+
print(f"QUERY: {query}")
|
33
|
+
print(f"HISTORY: {history}")
|
34
|
+
if history:
|
35
|
+
query += "## History"
|
36
|
+
for h in history:
|
37
|
+
query += f"\n{h}"
|
38
|
+
result = run_sync(lambda: chemistry_agent.run_sync(query, deps=deps, **kwargs))
|
39
|
+
return result.data
|
40
|
+
|
41
|
+
return gr.ChatInterface(
|
42
|
+
fn=get_info,
|
43
|
+
type="messages",
|
44
|
+
title="Chemistry AI Assistant",
|
45
|
+
examples=[
|
46
|
+
["Explain the structure of caffeine (CHEBI:27732)"],
|
47
|
+
["What does the structure of aspirin (CHEBI:15365) tell us about its properties?"],
|
48
|
+
["Interpret this SMILES: CC(=O)OC1=CC=CC=C1C(=O)O"]
|
49
|
+
]
|
50
|
+
)
|
@@ -0,0 +1,120 @@
|
|
1
|
+
"""
|
2
|
+
MCP tools for working with chemical structures.
|
3
|
+
"""
|
4
|
+
import os
|
5
|
+
from typing import Dict, List
|
6
|
+
|
7
|
+
from mcp.server.fastmcp import FastMCP
|
8
|
+
|
9
|
+
import aurelian.agents.chemistry.chemistry_tools as ct
|
10
|
+
import aurelian.agents.filesystem.filesystem_tools as fst
|
11
|
+
from aurelian.agents.chemistry.chemistry_agent import SYSTEM
|
12
|
+
from aurelian.agents.chemistry.chemistry_config import ChemistryDependencies
|
13
|
+
from pydantic_ai import RunContext
|
14
|
+
|
15
|
+
# Initialize FastMCP server
|
16
|
+
mcp = FastMCP("chemistry", instructions=SYSTEM)
|
17
|
+
|
18
|
+
|
19
|
+
from aurelian.dependencies.workdir import WorkDir
|
20
|
+
|
21
|
+
def deps() -> ChemistryDependencies:
|
22
|
+
deps = ChemistryDependencies()
|
23
|
+
# Set the location from environment variable or default
|
24
|
+
loc = os.getenv("AURELIAN_WORKDIR", "/tmp/aurelian")
|
25
|
+
deps.workdir = WorkDir(loc)
|
26
|
+
return deps
|
27
|
+
|
28
|
+
def ctx() -> RunContext[ChemistryDependencies]:
|
29
|
+
rc: RunContext[ChemistryDependencies] = RunContext[ChemistryDependencies](
|
30
|
+
deps=deps(),
|
31
|
+
model=None, usage=None, prompt=None,
|
32
|
+
)
|
33
|
+
return rc
|
34
|
+
|
35
|
+
|
36
|
+
@mcp.tool()
|
37
|
+
async def draw_structure_and_interpret(identifier: str, question: str) -> str:
|
38
|
+
"""
|
39
|
+
Draw a chemical structure and analyze it.
|
40
|
+
|
41
|
+
Args:
|
42
|
+
identifier: A ChEBI ID (e.g., CHEBI:16236) or SMILES string
|
43
|
+
question: A specific question about the structure
|
44
|
+
|
45
|
+
Returns:
|
46
|
+
Analysis of the structure in response to the question
|
47
|
+
"""
|
48
|
+
return await ct.draw_structure_and_interpret(ctx(), identifier, question)
|
49
|
+
|
50
|
+
|
51
|
+
@mcp.tool()
|
52
|
+
async def chebi_search_terms(query: str) -> List[Dict]:
|
53
|
+
"""
|
54
|
+
Search ChEBI for a term.
|
55
|
+
|
56
|
+
Args:
|
57
|
+
query: The search text
|
58
|
+
|
59
|
+
Returns:
|
60
|
+
A list of matching ChEBI terms
|
61
|
+
"""
|
62
|
+
return await ct.chebi_search_terms(ctx(), query)
|
63
|
+
|
64
|
+
|
65
|
+
@mcp.tool()
|
66
|
+
async def search_web_for_chemistry(query: str) -> str:
|
67
|
+
"""
|
68
|
+
Search the web for chemistry information.
|
69
|
+
|
70
|
+
Args:
|
71
|
+
query: The search query
|
72
|
+
|
73
|
+
Returns:
|
74
|
+
Search results with summaries
|
75
|
+
"""
|
76
|
+
return await ct.search_web_for_chemistry(ctx(), query)
|
77
|
+
|
78
|
+
|
79
|
+
@mcp.tool()
|
80
|
+
async def retrieve_chemistry_web_page(url: str) -> str:
|
81
|
+
"""
|
82
|
+
Fetch the contents of a web page related to chemistry.
|
83
|
+
|
84
|
+
Args:
|
85
|
+
url: The URL to fetch
|
86
|
+
|
87
|
+
Returns:
|
88
|
+
The contents of the web page
|
89
|
+
"""
|
90
|
+
return await ct.retrieve_chemistry_web_page(ctx(), url)
|
91
|
+
|
92
|
+
|
93
|
+
@mcp.tool()
|
94
|
+
async def inspect_file(data_file: str) -> str:
|
95
|
+
"""
|
96
|
+
Inspect a file in the working directory.
|
97
|
+
|
98
|
+
Args:
|
99
|
+
data_file: name of file
|
100
|
+
|
101
|
+
Returns:
|
102
|
+
Contents of the file
|
103
|
+
"""
|
104
|
+
return await fst.inspect_file(ctx(), data_file)
|
105
|
+
|
106
|
+
|
107
|
+
@mcp.tool()
|
108
|
+
async def list_files() -> str:
|
109
|
+
"""
|
110
|
+
List files in the working directory.
|
111
|
+
|
112
|
+
Returns:
|
113
|
+
List of files in the working directory
|
114
|
+
"""
|
115
|
+
return await fst.list_files(ctx())
|
116
|
+
|
117
|
+
|
118
|
+
if __name__ == "__main__":
|
119
|
+
# Initialize and run the server
|
120
|
+
mcp.run(transport='stdio')
|
@@ -0,0 +1,121 @@
|
|
1
|
+
"""
|
2
|
+
Tools for the chemistry agent.
|
3
|
+
"""
|
4
|
+
import io
|
5
|
+
import httpx
|
6
|
+
from functools import lru_cache
|
7
|
+
from typing import List, Dict, Optional
|
8
|
+
|
9
|
+
from oaklib import get_adapter
|
10
|
+
from pydantic_ai import RunContext, BinaryContent, ModelRetry
|
11
|
+
|
12
|
+
from aurelian.agents.chemistry.chemistry_config import ChemistryDependencies, ChemicalStructure
|
13
|
+
from aurelian.utils.ontology_utils import search_ontology
|
14
|
+
from aurelian.utils.search_utils import web_search, retrieve_web_page
|
15
|
+
|
16
|
+
|
17
|
+
@lru_cache
|
18
|
+
def get_chebi_adapter():
|
19
|
+
"""Get the ChEBI adapter from oaklib."""
|
20
|
+
return get_adapter(f"sqlite:obo:chebi")
|
21
|
+
|
22
|
+
|
23
|
+
def smiles_to_image(smiles: str) -> bytes:
|
24
|
+
"""
|
25
|
+
Convert a SMILES string to an image.
|
26
|
+
|
27
|
+
Args:
|
28
|
+
smiles: The SMILES representation of a molecule
|
29
|
+
|
30
|
+
Returns:
|
31
|
+
bytes: PNG image of the molecular structure
|
32
|
+
|
33
|
+
Raises:
|
34
|
+
ValueError: If the SMILES string is invalid
|
35
|
+
"""
|
36
|
+
from rdkit import Chem
|
37
|
+
from rdkit.Chem import Draw
|
38
|
+
mol = Chem.MolFromSmiles(smiles)
|
39
|
+
if not mol:
|
40
|
+
raise ValueError(f"Invalid SMILES: {smiles}")
|
41
|
+
img = Draw.MolToImage(mol)
|
42
|
+
img_bytes = io.BytesIO()
|
43
|
+
img.save(img_bytes, format='PNG')
|
44
|
+
return img_bytes.getvalue()
|
45
|
+
|
46
|
+
|
47
|
+
async def draw_structure_and_interpret(ctx: RunContext[ChemistryDependencies], identifier: str, question: str) -> str:
|
48
|
+
"""
|
49
|
+
Draw a chemical structure and analyze it.
|
50
|
+
|
51
|
+
Args:
|
52
|
+
ctx: The run context
|
53
|
+
identifier: CHEBI ID (e.g. CHEBI:12345) or a SMILES string
|
54
|
+
question: Question about the structure to be answered
|
55
|
+
|
56
|
+
Returns:
|
57
|
+
str: Analysis of the chemical structure
|
58
|
+
"""
|
59
|
+
print(f"Draw Structure: {identifier}, then: {question}")
|
60
|
+
structure = ChemicalStructure.from_anything(identifier)
|
61
|
+
image_url = structure.chebi_image_url
|
62
|
+
img = None
|
63
|
+
|
64
|
+
if image_url:
|
65
|
+
image_response = httpx.get(image_url)
|
66
|
+
img = BinaryContent(data=image_response.content, media_type='image/png')
|
67
|
+
else:
|
68
|
+
if structure.smiles:
|
69
|
+
img = BinaryContent(data=smiles_to_image(structure.smiles), media_type='image/png')
|
70
|
+
|
71
|
+
if not img:
|
72
|
+
raise ModelRetry("Could not find image for structure")
|
73
|
+
|
74
|
+
from aurelian.agents.chemistry.image_agent import structure_image_agent
|
75
|
+
result = await structure_image_agent.run(
|
76
|
+
[question, img],
|
77
|
+
deps=ctx.deps)
|
78
|
+
return result.data
|
79
|
+
|
80
|
+
|
81
|
+
async def chebi_search_terms(ctx: RunContext[ChemistryDependencies], query: str) -> List[Dict]:
|
82
|
+
"""
|
83
|
+
Finds similar ontology terms to the search query in ChEBI.
|
84
|
+
|
85
|
+
Args:
|
86
|
+
ctx: The run context
|
87
|
+
query: The search query
|
88
|
+
|
89
|
+
Returns:
|
90
|
+
List[Dict]: List of matching ChEBI terms
|
91
|
+
"""
|
92
|
+
print(f"ChEBI Term Search: {query}")
|
93
|
+
return search_ontology(get_chebi_adapter(), query, limit=ctx.deps.max_search_results)
|
94
|
+
|
95
|
+
|
96
|
+
async def search_web_for_chemistry(query: str) -> str:
|
97
|
+
"""
|
98
|
+
Search the web using a text query.
|
99
|
+
|
100
|
+
Args:
|
101
|
+
query: The search query
|
102
|
+
|
103
|
+
Returns:
|
104
|
+
str: Matching web pages plus summaries
|
105
|
+
"""
|
106
|
+
print(f"Web Search: {query}")
|
107
|
+
return web_search(query)
|
108
|
+
|
109
|
+
|
110
|
+
async def retrieve_chemistry_web_page(url: str) -> str:
|
111
|
+
"""
|
112
|
+
Fetch the contents of a web page.
|
113
|
+
|
114
|
+
Args:
|
115
|
+
url: The URL to fetch
|
116
|
+
|
117
|
+
Returns:
|
118
|
+
str: The contents of the web page
|
119
|
+
"""
|
120
|
+
print(f"Fetch URL: {url}")
|
121
|
+
return retrieve_web_page(url)
|
@@ -0,0 +1,15 @@
|
|
1
|
+
"""
|
2
|
+
Agent specifically for interpreting chemical structure images.
|
3
|
+
"""
|
4
|
+
from pydantic_ai import Agent
|
5
|
+
|
6
|
+
# Separate agent for image interpretation to avoid circular imports
|
7
|
+
structure_image_agent = Agent(
|
8
|
+
model='openai:gpt-4o',
|
9
|
+
system_prompt="""You are an expert chemist, able to interpret
|
10
|
+
chemical structure diagrams and answer questions on them.
|
11
|
+
Use the information in the provided chemical structure image to
|
12
|
+
answer questions about molecular properties, functional groups,
|
13
|
+
potential reactivity, or other chemical characteristics.
|
14
|
+
"""
|
15
|
+
)
|
@@ -0,0 +1,30 @@
|
|
1
|
+
"""
|
2
|
+
D4D (Datasheets for Datasets) agent package for extracting dataset metadata.
|
3
|
+
"""
|
4
|
+
|
5
|
+
# isort: skip_file
|
6
|
+
from .d4d_agent import data_sheets_agent # noqa: E402
|
7
|
+
from .d4d_config import D4DConfig, get_config # noqa: E402
|
8
|
+
from .d4d_gradio import chat # noqa: E402
|
9
|
+
from .d4d_tools import ( # noqa: E402
|
10
|
+
get_full_schema,
|
11
|
+
process_website_or_pdf,
|
12
|
+
extract_text_from_pdf,
|
13
|
+
)
|
14
|
+
|
15
|
+
__all__ = [
|
16
|
+
# Agent
|
17
|
+
"data_sheets_agent",
|
18
|
+
|
19
|
+
# Config
|
20
|
+
"D4DConfig",
|
21
|
+
"get_config",
|
22
|
+
|
23
|
+
# Tools
|
24
|
+
"get_full_schema",
|
25
|
+
"process_website_or_pdf",
|
26
|
+
"extract_text_from_pdf",
|
27
|
+
|
28
|
+
# Gradio
|
29
|
+
"chat",
|
30
|
+
]
|
@@ -0,0 +1,72 @@
|
|
1
|
+
"""
|
2
|
+
Agent for extracting dataset metadata following the datasheets for datasets schema.
|
3
|
+
"""
|
4
|
+
from pydantic_ai import Agent, RunContext
|
5
|
+
|
6
|
+
from .d4d_config import D4DConfig
|
7
|
+
from .d4d_tools import get_full_schema, process_website_or_pdf
|
8
|
+
|
9
|
+
|
10
|
+
# Create the agent, the full schema will be loaded when needed
|
11
|
+
data_sheets_agent = Agent(
|
12
|
+
model="openai:gpt-4o",
|
13
|
+
deps_type=D4DConfig,
|
14
|
+
system_prompt="""
|
15
|
+
Below is the complete datasheets for datasets schema:
|
16
|
+
|
17
|
+
{schema}
|
18
|
+
|
19
|
+
When provided with a URL to a webpage or PDF describing a dataset, your task is to fetch the
|
20
|
+
content, extract all the relevant metadata, and output a YAML document that exactly
|
21
|
+
conforms to the above schema. The output must be valid YAML with all required fields
|
22
|
+
filled in, following the schema exactly.
|
23
|
+
""",
|
24
|
+
)
|
25
|
+
|
26
|
+
|
27
|
+
@data_sheets_agent.system_prompt
|
28
|
+
async def add_schema(ctx: RunContext[D4DConfig]) -> str:
|
29
|
+
"""
|
30
|
+
Add the full schema to the system prompt.
|
31
|
+
|
32
|
+
Args:
|
33
|
+
ctx: The run context
|
34
|
+
|
35
|
+
Returns:
|
36
|
+
The schema to be inserted into the system prompt
|
37
|
+
"""
|
38
|
+
schema = await get_full_schema(ctx)
|
39
|
+
return schema
|
40
|
+
|
41
|
+
|
42
|
+
@data_sheets_agent.tool
|
43
|
+
async def extract_metadata(ctx: RunContext[D4DConfig], url: str) -> str:
|
44
|
+
"""
|
45
|
+
Extract metadata from a dataset description document or webpage.
|
46
|
+
|
47
|
+
Args:
|
48
|
+
ctx: The run context
|
49
|
+
url: The URL of the dataset description (webpage or PDF)
|
50
|
+
|
51
|
+
Returns:
|
52
|
+
YAML formatted metadata following the datasheets for datasets schema
|
53
|
+
"""
|
54
|
+
# Retrieve the content
|
55
|
+
content = await process_website_or_pdf(ctx, url)
|
56
|
+
|
57
|
+
# Prepare a prompt to extract metadata
|
58
|
+
prompt = f"""
|
59
|
+
The following is the content of a document describing a dataset:
|
60
|
+
|
61
|
+
{content}
|
62
|
+
|
63
|
+
Using the complete datasheets for datasets schema provided above, extract all the metadata
|
64
|
+
from the document and generate a YAML document that exactly conforms to that schema.
|
65
|
+
Ensure that all required fields are present and the output is valid YAML.
|
66
|
+
The dataset URL is: {url}
|
67
|
+
|
68
|
+
Generate only the YAML document.
|
69
|
+
"""
|
70
|
+
|
71
|
+
# The prompt will be used as the user message
|
72
|
+
return prompt
|
@@ -0,0 +1,46 @@
|
|
1
|
+
"""
|
2
|
+
Configuration for the D4D (Datasheets for Datasets) agent.
|
3
|
+
"""
|
4
|
+
from dataclasses import dataclass
|
5
|
+
import os
|
6
|
+
|
7
|
+
from aurelian.dependencies.workdir import HasWorkdir, WorkDir
|
8
|
+
|
9
|
+
|
10
|
+
@dataclass
|
11
|
+
class D4DConfig(HasWorkdir):
|
12
|
+
"""Configuration for the D4D agent."""
|
13
|
+
|
14
|
+
schema_url: str = "https://raw.githubusercontent.com/monarch-initiative/ontogpt/main/src/ontogpt/templates/data_sheets_schema.yaml"
|
15
|
+
|
16
|
+
def __post_init__(self):
|
17
|
+
"""Initialize the config with default values."""
|
18
|
+
# HasWorkdir doesn't have a __post_init__ method, so we don't call super()
|
19
|
+
if self.workdir is None:
|
20
|
+
self.workdir = WorkDir()
|
21
|
+
|
22
|
+
|
23
|
+
def get_config(schema_url: str = None) -> D4DConfig:
|
24
|
+
"""
|
25
|
+
Get the D4D configuration from environment variables or defaults.
|
26
|
+
|
27
|
+
Args:
|
28
|
+
schema_url: The URL to the schema YAML (overrides environment variable)
|
29
|
+
|
30
|
+
Returns:
|
31
|
+
A D4DConfig instance
|
32
|
+
"""
|
33
|
+
# Try to get from environment, then use provided values or defaults
|
34
|
+
env_schema_url = os.environ.get("AURELIAN_D4D_SCHEMA_URL", None)
|
35
|
+
|
36
|
+
# Use provided values first, then environment, then defaults
|
37
|
+
final_schema_url = schema_url or env_schema_url
|
38
|
+
|
39
|
+
workdir_path = os.environ.get("AURELIAN_WORKDIR", None)
|
40
|
+
workdir = WorkDir(location=workdir_path) if workdir_path else None
|
41
|
+
|
42
|
+
config = D4DConfig(workdir=workdir)
|
43
|
+
if final_schema_url:
|
44
|
+
config.schema_url = final_schema_url
|
45
|
+
|
46
|
+
return config
|
@@ -0,0 +1,58 @@
|
|
1
|
+
"""
|
2
|
+
Gradio interface for the D4D (Datasheets for Datasets) agent.
|
3
|
+
"""
|
4
|
+
from typing import List, Optional
|
5
|
+
|
6
|
+
import gradio as gr
|
7
|
+
|
8
|
+
from .d4d_agent import data_sheets_agent
|
9
|
+
from .d4d_config import D4DConfig, get_config
|
10
|
+
|
11
|
+
|
12
|
+
async def process_url(url: str, history: List[str], config: D4DConfig) -> str:
|
13
|
+
"""
|
14
|
+
Process a URL and generate metadata in YAML format.
|
15
|
+
|
16
|
+
Args:
|
17
|
+
url: The URL to process (webpage or PDF)
|
18
|
+
history: Conversation history
|
19
|
+
config: The agent configuration
|
20
|
+
|
21
|
+
Returns:
|
22
|
+
YAML formatted metadata
|
23
|
+
"""
|
24
|
+
# Run the agent with the URL
|
25
|
+
result = await data_sheets_agent.run(url, deps=config)
|
26
|
+
return result.data
|
27
|
+
|
28
|
+
|
29
|
+
def chat(deps: Optional[D4DConfig] = None, **kwargs):
|
30
|
+
"""
|
31
|
+
Create a Gradio chat interface for the D4D agent.
|
32
|
+
|
33
|
+
Args:
|
34
|
+
deps: Optional dependencies configuration
|
35
|
+
kwargs: Additional keyword arguments for configuration
|
36
|
+
|
37
|
+
Returns:
|
38
|
+
A Gradio ChatInterface
|
39
|
+
"""
|
40
|
+
# Initialize dependencies if needed
|
41
|
+
if deps is None:
|
42
|
+
deps = get_config(**kwargs)
|
43
|
+
|
44
|
+
def get_info(url: str, history: List[str]) -> str:
|
45
|
+
"""Wrapper for the async process_url function."""
|
46
|
+
import asyncio
|
47
|
+
return asyncio.run(process_url(url, history, deps))
|
48
|
+
|
49
|
+
return gr.ChatInterface(
|
50
|
+
fn=get_info,
|
51
|
+
type="messages",
|
52
|
+
title="Datasheets for Datasets Agent",
|
53
|
+
description="Enter a URL to a webpage or PDF describing a dataset. The agent will generate metadata in YAML format according to the complete datasheets for datasets schema.",
|
54
|
+
examples=[
|
55
|
+
"https://fairhub.io/datasets/2",
|
56
|
+
"https://data.chhs.ca.gov/dataset/99bc1fea-c55c-4377-bad8-f00832fd195d/resource/5a6d5fe9-36e6-4aca-ba4c-bf6edc682cf5/download/hci_crime_752-narrative_examples-10-30-15-ada.pdf"
|
57
|
+
]
|
58
|
+
)
|