aurelian 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aurelian/__init__.py +9 -0
- aurelian/agents/__init__.py +0 -0
- aurelian/agents/amigo/__init__.py +3 -0
- aurelian/agents/amigo/amigo_agent.py +77 -0
- aurelian/agents/amigo/amigo_config.py +85 -0
- aurelian/agents/amigo/amigo_evals.py +73 -0
- aurelian/agents/amigo/amigo_gradio.py +52 -0
- aurelian/agents/amigo/amigo_mcp.py +152 -0
- aurelian/agents/amigo/amigo_tools.py +152 -0
- aurelian/agents/biblio/__init__.py +42 -0
- aurelian/agents/biblio/biblio_agent.py +94 -0
- aurelian/agents/biblio/biblio_config.py +40 -0
- aurelian/agents/biblio/biblio_gradio.py +67 -0
- aurelian/agents/biblio/biblio_mcp.py +115 -0
- aurelian/agents/biblio/biblio_tools.py +164 -0
- aurelian/agents/biblio_agent.py +46 -0
- aurelian/agents/checklist/__init__.py +44 -0
- aurelian/agents/checklist/checklist_agent.py +85 -0
- aurelian/agents/checklist/checklist_config.py +28 -0
- aurelian/agents/checklist/checklist_gradio.py +70 -0
- aurelian/agents/checklist/checklist_mcp.py +86 -0
- aurelian/agents/checklist/checklist_tools.py +141 -0
- aurelian/agents/checklist/content/checklists.yaml +7 -0
- aurelian/agents/checklist/content/streams.csv +136 -0
- aurelian/agents/checklist_agent.py +40 -0
- aurelian/agents/chemistry/__init__.py +3 -0
- aurelian/agents/chemistry/chemistry_agent.py +46 -0
- aurelian/agents/chemistry/chemistry_config.py +71 -0
- aurelian/agents/chemistry/chemistry_evals.py +79 -0
- aurelian/agents/chemistry/chemistry_gradio.py +50 -0
- aurelian/agents/chemistry/chemistry_mcp.py +120 -0
- aurelian/agents/chemistry/chemistry_tools.py +121 -0
- aurelian/agents/chemistry/image_agent.py +15 -0
- aurelian/agents/d4d/__init__.py +30 -0
- aurelian/agents/d4d/d4d_agent.py +72 -0
- aurelian/agents/d4d/d4d_config.py +46 -0
- aurelian/agents/d4d/d4d_gradio.py +58 -0
- aurelian/agents/d4d/d4d_mcp.py +71 -0
- aurelian/agents/d4d/d4d_tools.py +157 -0
- aurelian/agents/d4d_agent.py +64 -0
- aurelian/agents/diagnosis/__init__.py +33 -0
- aurelian/agents/diagnosis/diagnosis_agent.py +53 -0
- aurelian/agents/diagnosis/diagnosis_config.py +48 -0
- aurelian/agents/diagnosis/diagnosis_evals.py +76 -0
- aurelian/agents/diagnosis/diagnosis_gradio.py +52 -0
- aurelian/agents/diagnosis/diagnosis_mcp.py +141 -0
- aurelian/agents/diagnosis/diagnosis_tools.py +204 -0
- aurelian/agents/diagnosis_agent.py +28 -0
- aurelian/agents/draw/__init__.py +3 -0
- aurelian/agents/draw/draw_agent.py +39 -0
- aurelian/agents/draw/draw_config.py +26 -0
- aurelian/agents/draw/draw_gradio.py +50 -0
- aurelian/agents/draw/draw_mcp.py +94 -0
- aurelian/agents/draw/draw_tools.py +100 -0
- aurelian/agents/draw/judge_agent.py +18 -0
- aurelian/agents/filesystem/__init__.py +0 -0
- aurelian/agents/filesystem/filesystem_config.py +27 -0
- aurelian/agents/filesystem/filesystem_gradio.py +49 -0
- aurelian/agents/filesystem/filesystem_mcp.py +89 -0
- aurelian/agents/filesystem/filesystem_tools.py +95 -0
- aurelian/agents/filesystem/py.typed +0 -0
- aurelian/agents/github/__init__.py +0 -0
- aurelian/agents/github/github_agent.py +83 -0
- aurelian/agents/github/github_cli.py +248 -0
- aurelian/agents/github/github_config.py +22 -0
- aurelian/agents/github/github_gradio.py +152 -0
- aurelian/agents/github/github_mcp.py +252 -0
- aurelian/agents/github/github_tools.py +408 -0
- aurelian/agents/github/github_tools.py.tmp +413 -0
- aurelian/agents/goann/__init__.py +13 -0
- aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines.md +1000 -0
- aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines.pdf +0 -0
- aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines_Paper.md +693 -0
- aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines_Paper.pdf +0 -0
- aurelian/agents/goann/goann_agent.py +90 -0
- aurelian/agents/goann/goann_config.py +90 -0
- aurelian/agents/goann/goann_evals.py +104 -0
- aurelian/agents/goann/goann_gradio.py +62 -0
- aurelian/agents/goann/goann_mcp.py +0 -0
- aurelian/agents/goann/goann_tools.py +65 -0
- aurelian/agents/gocam/__init__.py +43 -0
- aurelian/agents/gocam/documents/DNA-binding transcription factor activity annotation guidelines.docx +0 -0
- aurelian/agents/gocam/documents/DNA-binding transcription factor activity annotation guidelines.pdf +0 -0
- aurelian/agents/gocam/documents/DNA-binding_transcription_factor_activity_annotation_guidelines.md +100 -0
- aurelian/agents/gocam/documents/E3 ubiquitin ligases.docx +0 -0
- aurelian/agents/gocam/documents/E3 ubiquitin ligases.pdf +0 -0
- aurelian/agents/gocam/documents/E3_ubiquitin_ligases.md +134 -0
- aurelian/agents/gocam/documents/GO-CAM annotation guidelines README.docx +0 -0
- aurelian/agents/gocam/documents/GO-CAM annotation guidelines README.pdf +0 -0
- aurelian/agents/gocam/documents/GO-CAM modelling guidelines TO DO.docx +0 -0
- aurelian/agents/gocam/documents/GO-CAM modelling guidelines TO DO.pdf +0 -0
- aurelian/agents/gocam/documents/GO-CAM_annotation_guidelines_README.md +1 -0
- aurelian/agents/gocam/documents/GO-CAM_modelling_guidelines_TO_DO.md +3 -0
- aurelian/agents/gocam/documents/How to annotate complexes in GO-CAM.docx +0 -0
- aurelian/agents/gocam/documents/How to annotate complexes in GO-CAM.pdf +0 -0
- aurelian/agents/gocam/documents/How to annotate molecular adaptors.docx +0 -0
- aurelian/agents/gocam/documents/How to annotate molecular adaptors.pdf +0 -0
- aurelian/agents/gocam/documents/How to annotate sequestering proteins.docx +0 -0
- aurelian/agents/gocam/documents/How to annotate sequestering proteins.pdf +0 -0
- aurelian/agents/gocam/documents/How_to_annotate_complexes_in_GO-CAM.md +29 -0
- aurelian/agents/gocam/documents/How_to_annotate_molecular_adaptors.md +31 -0
- aurelian/agents/gocam/documents/How_to_annotate_sequestering_proteins.md +42 -0
- aurelian/agents/gocam/documents/Molecular adaptor activity.docx +0 -0
- aurelian/agents/gocam/documents/Molecular adaptor activity.pdf +0 -0
- aurelian/agents/gocam/documents/Molecular carrier activity.docx +0 -0
- aurelian/agents/gocam/documents/Molecular carrier activity.pdf +0 -0
- aurelian/agents/gocam/documents/Molecular_adaptor_activity.md +51 -0
- aurelian/agents/gocam/documents/Molecular_carrier_activity.md +41 -0
- aurelian/agents/gocam/documents/Protein sequestering activity.docx +0 -0
- aurelian/agents/gocam/documents/Protein sequestering activity.pdf +0 -0
- aurelian/agents/gocam/documents/Protein_sequestering_activity.md +50 -0
- aurelian/agents/gocam/documents/Signaling receptor activity annotation guidelines.docx +0 -0
- aurelian/agents/gocam/documents/Signaling receptor activity annotation guidelines.pdf +0 -0
- aurelian/agents/gocam/documents/Signaling_receptor_activity_annotation_guidelines.md +187 -0
- aurelian/agents/gocam/documents/Transcription coregulator activity.docx +0 -0
- aurelian/agents/gocam/documents/Transcription coregulator activity.pdf +0 -0
- aurelian/agents/gocam/documents/Transcription_coregulator_activity.md +36 -0
- aurelian/agents/gocam/documents/Transporter activity annotation annotation guidelines.docx +0 -0
- aurelian/agents/gocam/documents/Transporter activity annotation annotation guidelines.pdf +0 -0
- aurelian/agents/gocam/documents/Transporter_activity_annotation_annotation_guidelines.md +43 -0
- Regulatory Processes in GO-CAM.docx +0 -0
- Regulatory Processes in GO-CAM.pdf +0 -0
- aurelian/agents/gocam/documents/WIP_-_Regulation_and_Regulatory_Processes_in_GO-CAM.md +31 -0
- aurelian/agents/gocam/documents/md/DNA-binding_transcription_factor_activity_annotation_guidelines.md +131 -0
- aurelian/agents/gocam/documents/md/E3_ubiquitin_ligases.md +166 -0
- aurelian/agents/gocam/documents/md/GO-CAM_annotation_guidelines_README.md +1 -0
- aurelian/agents/gocam/documents/md/GO-CAM_modelling_guidelines_TO_DO.md +5 -0
- aurelian/agents/gocam/documents/md/How_to_annotate_complexes_in_GO-CAM.md +28 -0
- aurelian/agents/gocam/documents/md/How_to_annotate_molecular_adaptors.md +19 -0
- aurelian/agents/gocam/documents/md/How_to_annotate_sequestering_proteins.md +38 -0
- aurelian/agents/gocam/documents/md/Molecular_adaptor_activity.md +52 -0
- aurelian/agents/gocam/documents/md/Molecular_carrier_activity.md +59 -0
- aurelian/agents/gocam/documents/md/Protein_sequestering_activity.md +52 -0
- aurelian/agents/gocam/documents/md/Signaling_receptor_activity_annotation_guidelines.md +271 -0
- aurelian/agents/gocam/documents/md/Transcription_coregulator_activity.md +54 -0
- aurelian/agents/gocam/documents/md/Transporter_activity_annotation_annotation_guidelines.md +38 -0
- aurelian/agents/gocam/documents/md/WIP_-_Regulation_and_Regulatory_Processes_in_GO-CAM.md +39 -0
- aurelian/agents/gocam/documents/pandoc_md/Signaling_receptor_activity_annotation_guidelines.md +334 -0
- aurelian/agents/gocam/gocam_agent.py +240 -0
- aurelian/agents/gocam/gocam_config.py +85 -0
- aurelian/agents/gocam/gocam_curator_agent.py +46 -0
- aurelian/agents/gocam/gocam_evals.py +67 -0
- aurelian/agents/gocam/gocam_gradio.py +89 -0
- aurelian/agents/gocam/gocam_mcp.py +224 -0
- aurelian/agents/gocam/gocam_tools.py +294 -0
- aurelian/agents/linkml/__init__.py +0 -0
- aurelian/agents/linkml/linkml_agent.py +62 -0
- aurelian/agents/linkml/linkml_config.py +48 -0
- aurelian/agents/linkml/linkml_evals.py +66 -0
- aurelian/agents/linkml/linkml_gradio.py +45 -0
- aurelian/agents/linkml/linkml_mcp.py +186 -0
- aurelian/agents/linkml/linkml_tools.py +102 -0
- aurelian/agents/literature/__init__.py +3 -0
- aurelian/agents/literature/literature_agent.py +55 -0
- aurelian/agents/literature/literature_config.py +35 -0
- aurelian/agents/literature/literature_gradio.py +52 -0
- aurelian/agents/literature/literature_mcp.py +174 -0
- aurelian/agents/literature/literature_tools.py +182 -0
- aurelian/agents/monarch/__init__.py +25 -0
- aurelian/agents/monarch/monarch_agent.py +44 -0
- aurelian/agents/monarch/monarch_config.py +45 -0
- aurelian/agents/monarch/monarch_gradio.py +51 -0
- aurelian/agents/monarch/monarch_mcp.py +65 -0
- aurelian/agents/monarch/monarch_tools.py +113 -0
- aurelian/agents/oak/__init__.py +0 -0
- aurelian/agents/oak/oak_config.py +27 -0
- aurelian/agents/oak/oak_gradio.py +57 -0
- aurelian/agents/ontology_mapper/__init__.py +31 -0
- aurelian/agents/ontology_mapper/ontology_mapper_agent.py +56 -0
- aurelian/agents/ontology_mapper/ontology_mapper_config.py +50 -0
- aurelian/agents/ontology_mapper/ontology_mapper_evals.py +108 -0
- aurelian/agents/ontology_mapper/ontology_mapper_gradio.py +58 -0
- aurelian/agents/ontology_mapper/ontology_mapper_mcp.py +81 -0
- aurelian/agents/ontology_mapper/ontology_mapper_tools.py +147 -0
- aurelian/agents/phenopackets/__init__.py +3 -0
- aurelian/agents/phenopackets/phenopackets_agent.py +58 -0
- aurelian/agents/phenopackets/phenopackets_config.py +72 -0
- aurelian/agents/phenopackets/phenopackets_evals.py +99 -0
- aurelian/agents/phenopackets/phenopackets_gradio.py +55 -0
- aurelian/agents/phenopackets/phenopackets_mcp.py +178 -0
- aurelian/agents/phenopackets/phenopackets_tools.py +127 -0
- aurelian/agents/rag/__init__.py +40 -0
- aurelian/agents/rag/rag_agent.py +83 -0
- aurelian/agents/rag/rag_config.py +80 -0
- aurelian/agents/rag/rag_gradio.py +67 -0
- aurelian/agents/rag/rag_mcp.py +107 -0
- aurelian/agents/rag/rag_tools.py +189 -0
- aurelian/agents/rag_agent.py +54 -0
- aurelian/agents/robot/__init__.py +0 -0
- aurelian/agents/robot/assets/__init__.py +3 -0
- aurelian/agents/robot/assets/template.md +384 -0
- aurelian/agents/robot/robot_config.py +25 -0
- aurelian/agents/robot/robot_gradio.py +46 -0
- aurelian/agents/robot/robot_mcp.py +100 -0
- aurelian/agents/robot/robot_ontology_agent.py +139 -0
- aurelian/agents/robot/robot_tools.py +50 -0
- aurelian/agents/talisman/__init__.py +3 -0
- aurelian/agents/talisman/talisman_agent.py +126 -0
- aurelian/agents/talisman/talisman_config.py +66 -0
- aurelian/agents/talisman/talisman_gradio.py +50 -0
- aurelian/agents/talisman/talisman_mcp.py +168 -0
- aurelian/agents/talisman/talisman_tools.py +720 -0
- aurelian/agents/ubergraph/__init__.py +40 -0
- aurelian/agents/ubergraph/ubergraph_agent.py +71 -0
- aurelian/agents/ubergraph/ubergraph_config.py +79 -0
- aurelian/agents/ubergraph/ubergraph_gradio.py +48 -0
- aurelian/agents/ubergraph/ubergraph_mcp.py +69 -0
- aurelian/agents/ubergraph/ubergraph_tools.py +118 -0
- aurelian/agents/uniprot/__init__.py +37 -0
- aurelian/agents/uniprot/uniprot_agent.py +43 -0
- aurelian/agents/uniprot/uniprot_config.py +43 -0
- aurelian/agents/uniprot/uniprot_evals.py +99 -0
- aurelian/agents/uniprot/uniprot_gradio.py +48 -0
- aurelian/agents/uniprot/uniprot_mcp.py +168 -0
- aurelian/agents/uniprot/uniprot_tools.py +136 -0
- aurelian/agents/web/__init__.py +0 -0
- aurelian/agents/web/web_config.py +27 -0
- aurelian/agents/web/web_gradio.py +48 -0
- aurelian/agents/web/web_mcp.py +50 -0
- aurelian/agents/web/web_tools.py +108 -0
- aurelian/chat.py +23 -0
- aurelian/cli.py +800 -0
- aurelian/dependencies/__init__.py +0 -0
- aurelian/dependencies/workdir.py +78 -0
- aurelian/mcp/__init__.py +0 -0
- aurelian/mcp/amigo_mcp_test.py +86 -0
- aurelian/mcp/config_generator.py +123 -0
- aurelian/mcp/example_config.json +43 -0
- aurelian/mcp/generate_sample_config.py +37 -0
- aurelian/mcp/gocam_mcp_test.py +126 -0
- aurelian/mcp/linkml_mcp_tools.py +190 -0
- aurelian/mcp/mcp_discovery.py +87 -0
- aurelian/mcp/mcp_test.py +31 -0
- aurelian/mcp/phenopackets_mcp_test.py +103 -0
- aurelian/tools/__init__.py +0 -0
- aurelian/tools/web/__init__.py +0 -0
- aurelian/tools/web/url_download.py +51 -0
- aurelian/utils/__init__.py +0 -0
- aurelian/utils/async_utils.py +15 -0
- aurelian/utils/data_utils.py +32 -0
- aurelian/utils/documentation_manager.py +59 -0
- aurelian/utils/doi_fetcher.py +238 -0
- aurelian/utils/ontology_utils.py +68 -0
- aurelian/utils/pdf_fetcher.py +23 -0
- aurelian/utils/process_logs.py +100 -0
- aurelian/utils/pubmed_utils.py +238 -0
- aurelian/utils/pytest_report_to_markdown.py +67 -0
- aurelian/utils/robot_ontology_utils.py +112 -0
- aurelian/utils/search_utils.py +95 -0
- aurelian-0.3.2.dist-info/LICENSE +22 -0
- aurelian-0.3.2.dist-info/METADATA +105 -0
- aurelian-0.3.2.dist-info/RECORD +254 -0
- aurelian-0.3.2.dist-info/WHEEL +4 -0
- aurelian-0.3.2.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,139 @@
|
|
1
|
+
"""
|
2
|
+
Agent for creating ROBOT templates and compiling to ontologies.
|
3
|
+
"""
|
4
|
+
from dataclasses import dataclass, field
|
5
|
+
from typing import List, Dict
|
6
|
+
|
7
|
+
from aurelian.agents.filesystem.filesystem_tools import inspect_file, download_url_as_markdown, list_files
|
8
|
+
from aurelian.agents.robot.robot_config import RobotDependencies
|
9
|
+
from aurelian.agents.robot.robot_tools import write_and_compile_template, fetch_documentation
|
10
|
+
from aurelian.utils.async_utils import run_sync
|
11
|
+
from aurelian.utils.search_utils import web_search
|
12
|
+
from pydantic_ai import Agent, RunContext, Tool
|
13
|
+
|
14
|
+
from aurelian.dependencies.workdir import WorkDir, HasWorkdir
|
15
|
+
|
16
|
+
SYSTEM = """
|
17
|
+
Background:
|
18
|
+
|
19
|
+
Your job is to iteratively build an ontology via *robot templates*,
|
20
|
+
These are tabular data (CSV syntax) with a special header that compiles to OWL.
|
21
|
+
|
22
|
+
For example, if the request is for an animals ontology, you could start with a CSV with columns Name, ParentTaxon, Eats,
|
23
|
+
with rows filled out with some example animals.
|
24
|
+
|
25
|
+
The main tool you will use is `write_and_compile_template` which writes the the template content to
|
26
|
+
a local file after compiling to OWL. This also takes a list of ontologies to import, which
|
27
|
+
should also be on the file system.
|
28
|
+
|
29
|
+
Sometimes you may need to work with multiple dependent ontologies. For example, if you have a vehicle class
|
30
|
+
hierarchy in `vehicles.csv` and parts in `parts.csv`, and vehicles depends on parts, you would first iterate
|
31
|
+
on `parts.csv` (e.g. calling `write_and_compile_template`, with no imports), then write vehicles using
|
32
|
+
`write_and_compile_template` with `['parts.csv']` as the dependencies/imports.
|
33
|
+
|
34
|
+
## Robot template CSV structure:
|
35
|
+
|
36
|
+
Robot template files have an additional metadata row below the header row. This is called the "template row". It specifies how each column maps to OWL. Typical values will be "ID" for the unique identifier, LABEL for the name, "SC %" for the parent class. Consult the docs for details. Note that this is always beneath the main header row. This can seem a bit duplicative, but that's OK. An example might be:
|
37
|
+
|
38
|
+
identifier,name,parent,synonyms
|
39
|
+
ID,LABEL,SC %,A oboInOwl:hasExactSynonym
|
40
|
+
ANIMAL:1,chicken,aves,Gallus gallus|chick
|
41
|
+
|
42
|
+
The first row is a normal header with human-friendly columns. The 2nd is the robot template row. After that are the usual data rows.
|
43
|
+
|
44
|
+
Here "A oboInOwl:hasExactSynonym" in the template row for "synonyms" indicate this column should be interpreted as an owl annotation using that property. Generally the value for annotations is literals/text.
|
45
|
+
|
46
|
+
Another common piece of metadata is definitions. For OBO ontologies, IAO must be used here. For non-OBO ontologies people may want to use skos
|
47
|
+
|
48
|
+
Some ontologies may need to use other relationships. For part-of parents, use "'part of' some %" (this means that the class indicated by the ID is part-of some X, where X is the value in the part-of column). Use other relationships as appropriate. If you are unclear about the semantics, then consult the docs. You can also work through the docs with the user.
|
49
|
+
|
50
|
+
Note that any terms referenced as parents or in logical axioms such as part-of should be in the ontology, so make sure they have rows in the CSV. It's OK to use the label. For example:
|
51
|
+
|
52
|
+
identifier,primary_name,parent,madeOf
|
53
|
+
ID,LABEL,SC %,SC 'made of' some %
|
54
|
+
VON:1,vehicle,,
|
55
|
+
VON:2.car,vehicle,wheel|chassis
|
56
|
+
VON:3,wheel,car part,
|
57
|
+
VON:4,chassis,car part,
|
58
|
+
|
59
|
+
If in doubt, use "A <propertyName>" for metadata and "SC '<relationName>' some %" for logical relationships / graph edges.
|
60
|
+
|
61
|
+
If your working dir doesn't contain any object or annotation properties you can make them in a seperate
|
62
|
+
imported ontology, TYPE is useful for determining the OWL type, for example:
|
63
|
+
|
64
|
+
```
|
65
|
+
ID,Label,Type,Definition
|
66
|
+
ID,LABEL,TYPE,A IAO:0000115
|
67
|
+
IAO:0000115,definition,owl:AnnotationProperty
|
68
|
+
BFO:0000050,part_of,owl:ObjectProperty
|
69
|
+
```
|
70
|
+
|
71
|
+
If you need any more detailed documentation, you can fetch it with `fetch_documentation`
|
72
|
+
|
73
|
+
You can look at files with `inspect_file`
|
74
|
+
|
75
|
+
Use scientific language as far as possible. For IDs, these should be numeric curies unless the user requests otherwise. If the user wants to substitute actual ontology term IDs for these, use lookup_curies_get_lookup_get
|
76
|
+
"""
|
77
|
+
|
78
|
+
|
79
|
+
|
80
|
+
|
81
|
+
robot_ontology_agent = Agent(
|
82
|
+
model="openai:gpt-4o",
|
83
|
+
deps_type=RobotDependencies,
|
84
|
+
system_prompt=SYSTEM,
|
85
|
+
tools=[
|
86
|
+
Tool(write_and_compile_template, max_retries=2),
|
87
|
+
Tool(fetch_documentation),
|
88
|
+
Tool(inspect_file),
|
89
|
+
Tool(list_files),
|
90
|
+
Tool(download_url_as_markdown),
|
91
|
+
]
|
92
|
+
)
|
93
|
+
|
94
|
+
|
95
|
+
@robot_ontology_agent.system_prompt
|
96
|
+
def include_templates_in_prompt(ctx: RunContext[RobotDependencies]) -> str:
|
97
|
+
files_names = ctx.deps.workdir.list_file_names()
|
98
|
+
s = "Working directory files/templates:"
|
99
|
+
if files_names:
|
100
|
+
for f in files_names:
|
101
|
+
s += f"{f}\n---"
|
102
|
+
s += ctx.deps.workdir.read_file(f)
|
103
|
+
s += "\n"
|
104
|
+
return s
|
105
|
+
|
106
|
+
|
107
|
+
@robot_ontology_agent.system_prompt
|
108
|
+
def include_prefixes_in_prompt(ctx: RunContext[RobotDependencies]) -> str:
|
109
|
+
pmap = ctx.deps.prefix_map
|
110
|
+
return f"Prefixes: {pmap}"
|
111
|
+
|
112
|
+
|
113
|
+
|
114
|
+
|
115
|
+
|
116
|
+
|
117
|
+
def chat(workdir: str, **kwargs):
|
118
|
+
import gradio as gr
|
119
|
+
deps = RobotDependencies()
|
120
|
+
deps.workdir.location = workdir
|
121
|
+
|
122
|
+
def get_info(query: str, history: List[str]) -> str:
|
123
|
+
print(f"QUERY: {query}")
|
124
|
+
print(f"HISTORY: {history}")
|
125
|
+
if history:
|
126
|
+
query += "## History"
|
127
|
+
for h in history:
|
128
|
+
query += f"\n{h}"
|
129
|
+
result = run_sync(lambda: robot_ontology_agent.run_sync(query, deps=deps, **kwargs))
|
130
|
+
return result.data
|
131
|
+
|
132
|
+
return gr.ChatInterface(
|
133
|
+
fn=get_info,
|
134
|
+
type="messages",
|
135
|
+
title="robot AI Assistant",
|
136
|
+
examples=[
|
137
|
+
["Create an ontology of snacks"],
|
138
|
+
]
|
139
|
+
)
|
@@ -0,0 +1,50 @@
|
|
1
|
+
from typing import Optional, List
|
2
|
+
|
3
|
+
from pydantic_ai import RunContext, ModelRetry
|
4
|
+
|
5
|
+
from aurelian.agents.robot.assets import ROBOT_ONTOLOGY_AGENT_CONTENTS_DIR
|
6
|
+
from aurelian.agents.robot.robot_config import RobotDependencies
|
7
|
+
from aurelian.utils.robot_ontology_utils import run_robot_template_command
|
8
|
+
|
9
|
+
|
10
|
+
async def write_and_compile_template(ctx: RunContext[RobotDependencies], template: str, save_to_file: str= "core.csv", import_ontologies: Optional[List[str]] = None) -> str:
|
11
|
+
"""
|
12
|
+
Adds a template to the file system and compile it to OWL
|
13
|
+
|
14
|
+
Args:
|
15
|
+
ctx: context
|
16
|
+
template: robot template as string. Do not truncate, always pass the whole template, including header.
|
17
|
+
save_to_file: file name to save the templates to. Defaults to core.csv. Only written if file compiles to OWL
|
18
|
+
import_ontologies: list of ontologies to import. These should be files in the working directory.
|
19
|
+
|
20
|
+
Returns:
|
21
|
+
|
22
|
+
"""
|
23
|
+
print(f"Validating template: {template}")
|
24
|
+
try:
|
25
|
+
ctx.deps.workdir.write_file(save_to_file, template)
|
26
|
+
output_path = run_robot_template_command(
|
27
|
+
ctx.deps.workdir,
|
28
|
+
save_to_file,
|
29
|
+
import_ontologies=import_ontologies,
|
30
|
+
prefix_map=ctx.deps.prefix_map,
|
31
|
+
output_path=None,
|
32
|
+
),
|
33
|
+
if save_to_file and template:
|
34
|
+
ctx.deps.workdir.write_file(save_to_file, template)
|
35
|
+
except Exception as e:
|
36
|
+
raise ModelRetry(f"Template does not compile: {e}")
|
37
|
+
return f"Template compiled to {output_path}"
|
38
|
+
|
39
|
+
|
40
|
+
async def fetch_documentation(ctx: RunContext[RobotDependencies]) -> str:
|
41
|
+
"""
|
42
|
+
Fetch the documentation for the robot ontology agent.
|
43
|
+
|
44
|
+
Args:
|
45
|
+
ctx: context
|
46
|
+
|
47
|
+
Returns:
|
48
|
+
str: documentation
|
49
|
+
"""
|
50
|
+
return open(ROBOT_ONTOLOGY_AGENT_CONTENTS_DIR / "template.md").read()
|
@@ -0,0 +1,126 @@
|
|
1
|
+
"""
|
2
|
+
Agent for working with gene information using the UniProt API and NCBI Entrez.
|
3
|
+
"""
|
4
|
+
from pydantic_ai import Agent
|
5
|
+
|
6
|
+
from .talisman_config import TalismanConfig, get_config
|
7
|
+
from .talisman_tools import (
|
8
|
+
get_gene_description,
|
9
|
+
get_gene_descriptions,
|
10
|
+
get_genes_from_list,
|
11
|
+
analyze_gene_set
|
12
|
+
)
|
13
|
+
|
14
|
+
# System prompt for the Talisman agent
|
15
|
+
TALISMAN_SYSTEM_PROMPT = """
|
16
|
+
You are a helpful assistant that specializes in providing gene information using both UniProt and NCBI Entrez.
|
17
|
+
You can retrieve gene descriptions for single gene IDs or lists of multiple gene IDs, and analyze gene sets to identify functional relationships.
|
18
|
+
|
19
|
+
You can:
|
20
|
+
- Get a description for a single gene using its identifier
|
21
|
+
- Get descriptions for multiple genes using a list of identifiers
|
22
|
+
- Parse a string containing multiple gene identifiers in various formats
|
23
|
+
- Analyze sets of genes to identify biological relationships and shared properties
|
24
|
+
|
25
|
+
Gene identifiers can be:
|
26
|
+
- UniProt accession numbers (e.g., P12345, Q934N0)
|
27
|
+
- Gene symbols (e.g., INS, TP53, mmoX)
|
28
|
+
- Gene names
|
29
|
+
- Ensembl IDs (e.g., ENSG00000139618)
|
30
|
+
- NCBI protein IDs (e.g., 8YJT_C2)
|
31
|
+
|
32
|
+
IMPORTANT: The system handles both gene symbols and UniProt identifiers intelligently:
|
33
|
+
1. When a gene symbol is provided, the system will:
|
34
|
+
- First attempt to look up the corresponding UniProt protein accession
|
35
|
+
- Search UniProt for detailed information
|
36
|
+
- Fall back to NCBI Entrez if the gene is not found in UniProt
|
37
|
+
|
38
|
+
2. When a UniProt ID is provided, the system will:
|
39
|
+
- Directly retrieve the information from UniProt
|
40
|
+
- Fall back to NCBI Entrez if needed
|
41
|
+
|
42
|
+
3. When a protein ID is provided, the system will:
|
43
|
+
- Search the protein database in NCBI Entrez
|
44
|
+
- Return detailed protein information
|
45
|
+
|
46
|
+
This multi-database approach ensures comprehensive coverage of gene and protein information.
|
47
|
+
|
48
|
+
When returning gene information, present it in a clear, organized manner with:
|
49
|
+
- Gene name and symbol
|
50
|
+
- Description of gene function
|
51
|
+
- Organism information when available
|
52
|
+
- Disease associations if relevant
|
53
|
+
- Source database (UniProt, NCBI, or both)
|
54
|
+
|
55
|
+
For multiple genes, organize the results in a table format for easy readability.
|
56
|
+
|
57
|
+
For gene set analysis, you will receive a detailed summary of:
|
58
|
+
- Shared biological processes these genes may participate in
|
59
|
+
- Potential protein-protein interactions or functional relationships
|
60
|
+
- Common cellular localization patterns
|
61
|
+
- Involvement in similar pathways
|
62
|
+
- Coordinated activities or cooperative functions
|
63
|
+
- Any disease associations that multiple genes in this set share
|
64
|
+
|
65
|
+
The analysis will cover multiple types of relationships:
|
66
|
+
- Functional relationships
|
67
|
+
- Pathway relationships
|
68
|
+
- Regulatory relationships
|
69
|
+
- Localization patterns
|
70
|
+
- Physical interactions
|
71
|
+
- Genetic interactions
|
72
|
+
|
73
|
+
IMPORTANT: For gene set analysis, ALWAYS include a distinct section titled "## Terms"
|
74
|
+
that contains a semicolon-delimited list of functional terms relevant to the gene set,
|
75
|
+
ordered by relevance. These terms should include:
|
76
|
+
- Gene Ontology biological process terms (e.g., DNA repair, oxidative phosphorylation, signal transduction)
|
77
|
+
- Molecular function terms (e.g., kinase activity, DNA binding, transporter activity)
|
78
|
+
- Cellular component/localization terms (e.g., nucleus, plasma membrane, mitochondria)
|
79
|
+
- Pathway names (e.g., glycolysis, TCA cycle, MAPK signaling)
|
80
|
+
- Co-regulation terms (e.g., stress response regulon, heat shock response)
|
81
|
+
- Interaction networks (e.g., protein complex formation, signaling cascade)
|
82
|
+
- Metabolic process terms (e.g., fatty acid synthesis, amino acid metabolism)
|
83
|
+
- Regulatory mechanisms (e.g., transcriptional regulation, post-translational modification)
|
84
|
+
- Disease associations (if relevant, e.g., virulence, pathogenesis, antibiotic resistance)
|
85
|
+
- Structural and functional domains/motifs (e.g., helix-turn-helix, zinc finger)
|
86
|
+
|
87
|
+
Example of Terms section:
|
88
|
+
## Terms
|
89
|
+
DNA damage response; p53 signaling pathway; apoptosis; cell cycle regulation; tumor suppression; DNA repair; protein ubiquitination; transcriptional regulation; nuclear localization; cancer predisposition
|
90
|
+
|
91
|
+
IMPORTANT: After the Terms section, ALWAYS include a "## Gene Summary Table" with a markdown table
|
92
|
+
summarizing the genes analyzed, with the following columns in this exact order:
|
93
|
+
- ID: The gene identifier (same as Gene Symbol)
|
94
|
+
- Annotation: Genomic coordinates or accession with position information
|
95
|
+
- Genomic Context: Information about the genomic location (chromosome, plasmid, etc.)
|
96
|
+
- Organism: The organism the gene belongs to
|
97
|
+
- Description: The protein/gene function description
|
98
|
+
|
99
|
+
Example of Gene Summary Table:
|
100
|
+
## Gene Summary Table
|
101
|
+
| ID | Annotation | Genomic Context | Organism | Description |
|
102
|
+
|-------------|-------------|----------|----------------|------------|
|
103
|
+
| BRCA1 | NC_000017.11 (43044295..43125483) | Chromosome 17 | Homo sapiens | Breast cancer type 1 susceptibility protein |
|
104
|
+
| TP53 | NC_000017.11 (7668402..7687550) | Chromosome 17 | Homo sapiens | Tumor suppressor protein |
|
105
|
+
|
106
|
+
For bacterial genes, the table format would be:
|
107
|
+
| ID | Annotation | Genomic Context | Organism | Description |
|
108
|
+
|-------------|-------------|----------|----------------|------------|
|
109
|
+
| invA | NC_003197.2 (3038407..3040471, complement) | Chromosome | Salmonella enterica | Invasion protein |
|
110
|
+
| DVUA0001 | NC_005863.1 (699..872, complement) | Plasmid pDV | Desulfovibrio vulgaris str. Hildenborough | Hypothetical protein |
|
111
|
+
|
112
|
+
Use this information to help researchers understand the potential functional relationships between genes.
|
113
|
+
"""
|
114
|
+
|
115
|
+
# Create the agent with the system prompt
|
116
|
+
talisman_agent = Agent(
|
117
|
+
model="openai:gpt-4o",
|
118
|
+
system_prompt=TALISMAN_SYSTEM_PROMPT,
|
119
|
+
deps_type=TalismanConfig,
|
120
|
+
)
|
121
|
+
|
122
|
+
# Register the tools with the agent
|
123
|
+
talisman_agent.tool(get_gene_description)
|
124
|
+
talisman_agent.tool(get_gene_descriptions)
|
125
|
+
talisman_agent.tool(get_genes_from_list)
|
126
|
+
talisman_agent.tool(analyze_gene_set)
|
@@ -0,0 +1,66 @@
|
|
1
|
+
"""
|
2
|
+
Configuration for the Talisman agent.
|
3
|
+
"""
|
4
|
+
from dataclasses import dataclass, field
|
5
|
+
import os
|
6
|
+
from typing import Any, Dict, Optional
|
7
|
+
|
8
|
+
from bioservices import UniProt
|
9
|
+
from bioservices.eutils import EUtils as NCBI
|
10
|
+
|
11
|
+
from aurelian.dependencies.workdir import HasWorkdir, WorkDir
|
12
|
+
|
13
|
+
|
14
|
+
@dataclass
|
15
|
+
class TalismanConfig(HasWorkdir):
|
16
|
+
"""Configuration for the Talisman agent."""
|
17
|
+
|
18
|
+
# Options for the bioservices UniProt client
|
19
|
+
uniprot_client_options: Dict[str, Any] = field(default_factory=dict)
|
20
|
+
|
21
|
+
# Options for the bioservices NCBI client
|
22
|
+
ncbi_client_options: Dict[str, Any] = field(default_factory=dict)
|
23
|
+
|
24
|
+
# OpenAI API key for LLM-based analysis
|
25
|
+
openai_api_key: Optional[str] = None
|
26
|
+
|
27
|
+
# Model to use for gene set analysis
|
28
|
+
model_name: str = "gpt-4o"
|
29
|
+
|
30
|
+
def __post_init__(self):
|
31
|
+
"""Initialize the config with default values."""
|
32
|
+
# Initialize with default options if none provided
|
33
|
+
if self.uniprot_client_options is None or len(self.uniprot_client_options) == 0:
|
34
|
+
self.uniprot_client_options = {"verbose": False}
|
35
|
+
|
36
|
+
if self.ncbi_client_options is None or len(self.ncbi_client_options) == 0:
|
37
|
+
self.ncbi_client_options = {"verbose": False, "email": "MJoachimiak@lbl.gov"}
|
38
|
+
|
39
|
+
# Initialize the workdir if not already set
|
40
|
+
if self.workdir is None:
|
41
|
+
self.workdir = WorkDir()
|
42
|
+
|
43
|
+
# Try to get OpenAI API key from environment if not provided
|
44
|
+
if self.openai_api_key is None:
|
45
|
+
import os
|
46
|
+
self.openai_api_key = os.environ.get("OPENAI_API_KEY")
|
47
|
+
|
48
|
+
def get_uniprot_client(self) -> UniProt:
|
49
|
+
"""Get a configured UniProt client."""
|
50
|
+
return UniProt(**self.uniprot_client_options)
|
51
|
+
|
52
|
+
def get_ncbi_client(self) -> NCBI:
|
53
|
+
"""Get a configured NCBI client."""
|
54
|
+
return NCBI(**self.ncbi_client_options)
|
55
|
+
|
56
|
+
|
57
|
+
def get_config() -> TalismanConfig:
|
58
|
+
"""Get the Talisman configuration from environment variables or defaults."""
|
59
|
+
workdir_path = os.environ.get("AURELIAN_WORKDIR", None)
|
60
|
+
workdir = WorkDir(location=workdir_path) if workdir_path else None
|
61
|
+
|
62
|
+
return TalismanConfig(
|
63
|
+
workdir=workdir,
|
64
|
+
uniprot_client_options={"verbose": False},
|
65
|
+
ncbi_client_options={"verbose": False}
|
66
|
+
)
|
@@ -0,0 +1,50 @@
|
|
1
|
+
"""
|
2
|
+
Gradio interface for the Talisman agent.
|
3
|
+
"""
|
4
|
+
from typing import List, Optional
|
5
|
+
|
6
|
+
import gradio as gr
|
7
|
+
|
8
|
+
from aurelian.agents.talisman.talisman_agent import talisman_agent
|
9
|
+
from aurelian.agents.talisman.talisman_config import TalismanConfig
|
10
|
+
from aurelian.utils.async_utils import run_sync
|
11
|
+
|
12
|
+
|
13
|
+
def chat(deps: Optional[TalismanConfig] = None, **kwargs):
|
14
|
+
"""
|
15
|
+
Initialize a chat interface for the Talisman agent.
|
16
|
+
|
17
|
+
Args:
|
18
|
+
deps: Optional dependencies configuration
|
19
|
+
**kwargs: Additional arguments to pass to the agent
|
20
|
+
|
21
|
+
Returns:
|
22
|
+
A Gradio chat interface
|
23
|
+
"""
|
24
|
+
if deps is None:
|
25
|
+
deps = TalismanConfig()
|
26
|
+
|
27
|
+
def get_info(query: str, history: List[str]) -> str:
|
28
|
+
print(f"QUERY: {query}")
|
29
|
+
print(f"HISTORY: {history}")
|
30
|
+
if history:
|
31
|
+
query += "## History"
|
32
|
+
for h in history:
|
33
|
+
query += f"\n{h}"
|
34
|
+
result = run_sync(lambda: talisman_agent.run_sync(query, deps=deps, **kwargs))
|
35
|
+
return result.data
|
36
|
+
|
37
|
+
return gr.ChatInterface(
|
38
|
+
fn=get_info,
|
39
|
+
type="messages",
|
40
|
+
title="Talisman Gene Analysis Assistant",
|
41
|
+
examples=[
|
42
|
+
["Get description for TP53"],
|
43
|
+
["Get information about the BRCA1 gene"],
|
44
|
+
["Get descriptions for multiple genes: INS, ALB, APOE"],
|
45
|
+
["What is the function of KRAS?"],
|
46
|
+
["Analyze the relationship between TP53 and MDM2"],
|
47
|
+
["Analyze these genes and their functional relationships: BRCA1, BRCA2, ATM, PARP1"],
|
48
|
+
["Get descriptions for ENSG00000139618, ENSG00000141510"]
|
49
|
+
]
|
50
|
+
)
|
@@ -0,0 +1,168 @@
|
|
1
|
+
"""
|
2
|
+
MCP integration for the Talisman agent.
|
3
|
+
"""
|
4
|
+
import json
|
5
|
+
import os
|
6
|
+
from typing import Dict, Any, List, Optional
|
7
|
+
|
8
|
+
from anthropic import Anthropic
|
9
|
+
from pydantic_ai import Agent
|
10
|
+
|
11
|
+
# Import directly from the MCP module
|
12
|
+
from .talisman_agent import talisman_agent, TALISMAN_SYSTEM_PROMPT
|
13
|
+
from .talisman_config import TalismanConfig, get_config
|
14
|
+
|
15
|
+
|
16
|
+
def tools_to_anthropic_tools(agent: Agent) -> List[Dict[str, Any]]:
|
17
|
+
"""Convert pydantic-ai Agent tools to Anthropic tools format.
|
18
|
+
|
19
|
+
Args:
|
20
|
+
agent: The pydantic-ai Agent with tools
|
21
|
+
|
22
|
+
Returns:
|
23
|
+
A list of tools in the Anthropic format
|
24
|
+
"""
|
25
|
+
anthropic_tools = []
|
26
|
+
for tool in agent.tools:
|
27
|
+
anthropic_tool = {
|
28
|
+
"name": tool.name,
|
29
|
+
"description": tool.description,
|
30
|
+
"input_schema": {
|
31
|
+
"type": "object",
|
32
|
+
"properties": {},
|
33
|
+
"required": []
|
34
|
+
}
|
35
|
+
}
|
36
|
+
|
37
|
+
for param_name, param in tool.params.items():
|
38
|
+
if param_name == "ctx":
|
39
|
+
continue
|
40
|
+
|
41
|
+
param_schema = {"type": "string"}
|
42
|
+
if param.annotation.__origin__ == list:
|
43
|
+
param_schema = {
|
44
|
+
"type": "array",
|
45
|
+
"items": {"type": "string"}
|
46
|
+
}
|
47
|
+
|
48
|
+
anthropic_tool["input_schema"]["properties"][param_name] = param_schema
|
49
|
+
if param.default == param.empty:
|
50
|
+
anthropic_tool["input_schema"]["required"].append(param_name)
|
51
|
+
|
52
|
+
anthropic_tools.append(anthropic_tool)
|
53
|
+
|
54
|
+
return anthropic_tools
|
55
|
+
|
56
|
+
|
57
|
+
def create_bedrock_compatible_resp(
|
58
|
+
response_id: str,
|
59
|
+
model: str,
|
60
|
+
role: str,
|
61
|
+
content: List[Dict[str, Any]]
|
62
|
+
) -> Dict[str, Any]:
|
63
|
+
"""Create a response compatible with the Bedrock format.
|
64
|
+
|
65
|
+
Args:
|
66
|
+
response_id: The response ID
|
67
|
+
model: The model name
|
68
|
+
role: The role (usually 'assistant')
|
69
|
+
content: The content from the Anthropic response
|
70
|
+
|
71
|
+
Returns:
|
72
|
+
A response in the Bedrock format
|
73
|
+
"""
|
74
|
+
return {
|
75
|
+
"id": response_id,
|
76
|
+
"model": model,
|
77
|
+
"choices": [
|
78
|
+
{
|
79
|
+
"message": {
|
80
|
+
"role": role,
|
81
|
+
"content": content
|
82
|
+
}
|
83
|
+
}
|
84
|
+
]
|
85
|
+
}
|
86
|
+
|
87
|
+
|
88
|
+
def get_talisman_mcp_tools() -> List[Dict[str, Any]]:
|
89
|
+
"""Get the MCP tools for the Talisman agent.
|
90
|
+
|
91
|
+
Returns:
|
92
|
+
A list of tools in the MCP format
|
93
|
+
"""
|
94
|
+
return tools_to_anthropic_tools(talisman_agent)
|
95
|
+
|
96
|
+
|
97
|
+
def get_talisman_mcp_messages(
|
98
|
+
messages: Optional[List[Dict[str, Any]]] = None,
|
99
|
+
system: Optional[str] = None,
|
100
|
+
) -> List[Dict[str, Any]]:
|
101
|
+
"""Get MCP messages for the Talisman agent.
|
102
|
+
|
103
|
+
Args:
|
104
|
+
messages: Previous messages
|
105
|
+
system: System message override
|
106
|
+
|
107
|
+
Returns:
|
108
|
+
List of MCP messages
|
109
|
+
"""
|
110
|
+
if messages is None:
|
111
|
+
messages = []
|
112
|
+
|
113
|
+
if system is None:
|
114
|
+
system = TALISMAN_SYSTEM_PROMPT
|
115
|
+
|
116
|
+
if not messages or messages[0].get("role") != "system":
|
117
|
+
messages = [{"role": "system", "content": system}] + messages
|
118
|
+
|
119
|
+
return messages
|
120
|
+
|
121
|
+
|
122
|
+
def handle_talisman_mcp_request(
|
123
|
+
messages: List[Dict[str, Any]],
|
124
|
+
config: Optional[TalismanConfig] = None,
|
125
|
+
model: str = "claude-3-5-sonnet-20240620",
|
126
|
+
max_tokens: int = 4096,
|
127
|
+
temperature: float = 0.0,
|
128
|
+
) -> Dict[str, Any]:
|
129
|
+
"""Handle an MCP request for the Talisman agent.
|
130
|
+
|
131
|
+
Args:
|
132
|
+
messages: The messages from the client
|
133
|
+
config: The Talisman configuration (optional)
|
134
|
+
model: The model to use
|
135
|
+
max_tokens: The maximum number of tokens to generate
|
136
|
+
temperature: The temperature to use for generation
|
137
|
+
|
138
|
+
Returns:
|
139
|
+
An MCP response
|
140
|
+
"""
|
141
|
+
config = config or get_config()
|
142
|
+
|
143
|
+
api_key = os.environ.get("ANTHROPIC_API_KEY")
|
144
|
+
if not api_key:
|
145
|
+
raise ValueError("ANTHROPIC_API_KEY environment variable is required for MCP")
|
146
|
+
|
147
|
+
client = Anthropic(api_key=api_key)
|
148
|
+
|
149
|
+
# Prepare the messages and tools
|
150
|
+
mcp_messages = get_talisman_mcp_messages(messages)
|
151
|
+
mcp_tools = get_talisman_mcp_tools()
|
152
|
+
|
153
|
+
# Send the request to Anthropic
|
154
|
+
response = client.messages.create(
|
155
|
+
model=model,
|
156
|
+
messages=mcp_messages,
|
157
|
+
tools=mcp_tools,
|
158
|
+
max_tokens=max_tokens,
|
159
|
+
temperature=temperature,
|
160
|
+
)
|
161
|
+
|
162
|
+
# Return the response in a format compatible with the MCP protocol
|
163
|
+
return create_bedrock_compatible_resp(
|
164
|
+
response_id=response.id,
|
165
|
+
model=model,
|
166
|
+
role="assistant",
|
167
|
+
content=response.content,
|
168
|
+
)
|