PyPI - aurelian - Versions diffs - 0.3.2__py3-none-any.whl - Mend

aurelian 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (254) hide show

aurelian/__init__.py +9 -0
aurelian/agents/__init__.py +0 -0
aurelian/agents/amigo/__init__.py +3 -0
aurelian/agents/amigo/amigo_agent.py +77 -0
aurelian/agents/amigo/amigo_config.py +85 -0
aurelian/agents/amigo/amigo_evals.py +73 -0
aurelian/agents/amigo/amigo_gradio.py +52 -0
aurelian/agents/amigo/amigo_mcp.py +152 -0
aurelian/agents/amigo/amigo_tools.py +152 -0
aurelian/agents/biblio/__init__.py +42 -0
aurelian/agents/biblio/biblio_agent.py +94 -0
aurelian/agents/biblio/biblio_config.py +40 -0
aurelian/agents/biblio/biblio_gradio.py +67 -0
aurelian/agents/biblio/biblio_mcp.py +115 -0
aurelian/agents/biblio/biblio_tools.py +164 -0
aurelian/agents/biblio_agent.py +46 -0
aurelian/agents/checklist/__init__.py +44 -0
aurelian/agents/checklist/checklist_agent.py +85 -0
aurelian/agents/checklist/checklist_config.py +28 -0
aurelian/agents/checklist/checklist_gradio.py +70 -0
aurelian/agents/checklist/checklist_mcp.py +86 -0
aurelian/agents/checklist/checklist_tools.py +141 -0
aurelian/agents/checklist/content/checklists.yaml +7 -0
aurelian/agents/checklist/content/streams.csv +136 -0
aurelian/agents/checklist_agent.py +40 -0
aurelian/agents/chemistry/__init__.py +3 -0
aurelian/agents/chemistry/chemistry_agent.py +46 -0
aurelian/agents/chemistry/chemistry_config.py +71 -0
aurelian/agents/chemistry/chemistry_evals.py +79 -0
aurelian/agents/chemistry/chemistry_gradio.py +50 -0
aurelian/agents/chemistry/chemistry_mcp.py +120 -0
aurelian/agents/chemistry/chemistry_tools.py +121 -0
aurelian/agents/chemistry/image_agent.py +15 -0
aurelian/agents/d4d/__init__.py +30 -0
aurelian/agents/d4d/d4d_agent.py +72 -0
aurelian/agents/d4d/d4d_config.py +46 -0
aurelian/agents/d4d/d4d_gradio.py +58 -0
aurelian/agents/d4d/d4d_mcp.py +71 -0
aurelian/agents/d4d/d4d_tools.py +157 -0
aurelian/agents/d4d_agent.py +64 -0
aurelian/agents/diagnosis/__init__.py +33 -0
aurelian/agents/diagnosis/diagnosis_agent.py +53 -0
aurelian/agents/diagnosis/diagnosis_config.py +48 -0
aurelian/agents/diagnosis/diagnosis_evals.py +76 -0
aurelian/agents/diagnosis/diagnosis_gradio.py +52 -0
aurelian/agents/diagnosis/diagnosis_mcp.py +141 -0
aurelian/agents/diagnosis/diagnosis_tools.py +204 -0
aurelian/agents/diagnosis_agent.py +28 -0
aurelian/agents/draw/__init__.py +3 -0
aurelian/agents/draw/draw_agent.py +39 -0
aurelian/agents/draw/draw_config.py +26 -0
aurelian/agents/draw/draw_gradio.py +50 -0
aurelian/agents/draw/draw_mcp.py +94 -0
aurelian/agents/draw/draw_tools.py +100 -0
aurelian/agents/draw/judge_agent.py +18 -0
aurelian/agents/filesystem/__init__.py +0 -0
aurelian/agents/filesystem/filesystem_config.py +27 -0
aurelian/agents/filesystem/filesystem_gradio.py +49 -0
aurelian/agents/filesystem/filesystem_mcp.py +89 -0
aurelian/agents/filesystem/filesystem_tools.py +95 -0
aurelian/agents/filesystem/py.typed +0 -0
aurelian/agents/github/__init__.py +0 -0
aurelian/agents/github/github_agent.py +83 -0
aurelian/agents/github/github_cli.py +248 -0
aurelian/agents/github/github_config.py +22 -0
aurelian/agents/github/github_gradio.py +152 -0
aurelian/agents/github/github_mcp.py +252 -0
aurelian/agents/github/github_tools.py +408 -0
aurelian/agents/github/github_tools.py.tmp +413 -0
aurelian/agents/goann/__init__.py +13 -0
aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines.md +1000 -0
aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines.pdf +0 -0
aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines_Paper.md +693 -0
aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines_Paper.pdf +0 -0
aurelian/agents/goann/goann_agent.py +90 -0
aurelian/agents/goann/goann_config.py +90 -0
aurelian/agents/goann/goann_evals.py +104 -0
aurelian/agents/goann/goann_gradio.py +62 -0
aurelian/agents/goann/goann_mcp.py +0 -0
aurelian/agents/goann/goann_tools.py +65 -0
aurelian/agents/gocam/__init__.py +43 -0
aurelian/agents/gocam/documents/DNA-binding transcription factor activity annotation guidelines.docx +0 -0
aurelian/agents/gocam/documents/DNA-binding transcription factor activity annotation guidelines.pdf +0 -0
aurelian/agents/gocam/documents/DNA-binding_transcription_factor_activity_annotation_guidelines.md +100 -0
aurelian/agents/gocam/documents/E3 ubiquitin ligases.docx +0 -0
aurelian/agents/gocam/documents/E3 ubiquitin ligases.pdf +0 -0
aurelian/agents/gocam/documents/E3_ubiquitin_ligases.md +134 -0
aurelian/agents/gocam/documents/GO-CAM annotation guidelines README.docx +0 -0
aurelian/agents/gocam/documents/GO-CAM annotation guidelines README.pdf +0 -0
aurelian/agents/gocam/documents/GO-CAM modelling guidelines TO DO.docx +0 -0
aurelian/agents/gocam/documents/GO-CAM modelling guidelines TO DO.pdf +0 -0
aurelian/agents/gocam/documents/GO-CAM_annotation_guidelines_README.md +1 -0
aurelian/agents/gocam/documents/GO-CAM_modelling_guidelines_TO_DO.md +3 -0
aurelian/agents/gocam/documents/How to annotate complexes in GO-CAM.docx +0 -0
aurelian/agents/gocam/documents/How to annotate complexes in GO-CAM.pdf +0 -0
aurelian/agents/gocam/documents/How to annotate molecular adaptors.docx +0 -0
aurelian/agents/gocam/documents/How to annotate molecular adaptors.pdf +0 -0
aurelian/agents/gocam/documents/How to annotate sequestering proteins.docx +0 -0
aurelian/agents/gocam/documents/How to annotate sequestering proteins.pdf +0 -0
aurelian/agents/gocam/documents/How_to_annotate_complexes_in_GO-CAM.md +29 -0
aurelian/agents/gocam/documents/How_to_annotate_molecular_adaptors.md +31 -0
aurelian/agents/gocam/documents/How_to_annotate_sequestering_proteins.md +42 -0
aurelian/agents/gocam/documents/Molecular adaptor activity.docx +0 -0
aurelian/agents/gocam/documents/Molecular adaptor activity.pdf +0 -0
aurelian/agents/gocam/documents/Molecular carrier activity.docx +0 -0
aurelian/agents/gocam/documents/Molecular carrier activity.pdf +0 -0
aurelian/agents/gocam/documents/Molecular_adaptor_activity.md +51 -0
aurelian/agents/gocam/documents/Molecular_carrier_activity.md +41 -0
aurelian/agents/gocam/documents/Protein sequestering activity.docx +0 -0
aurelian/agents/gocam/documents/Protein sequestering activity.pdf +0 -0
aurelian/agents/gocam/documents/Protein_sequestering_activity.md +50 -0
aurelian/agents/gocam/documents/Signaling receptor activity annotation guidelines.docx +0 -0
aurelian/agents/gocam/documents/Signaling receptor activity annotation guidelines.pdf +0 -0
aurelian/agents/gocam/documents/Signaling_receptor_activity_annotation_guidelines.md +187 -0
aurelian/agents/gocam/documents/Transcription coregulator activity.docx +0 -0
aurelian/agents/gocam/documents/Transcription coregulator activity.pdf +0 -0
aurelian/agents/gocam/documents/Transcription_coregulator_activity.md +36 -0
aurelian/agents/gocam/documents/Transporter activity annotation annotation guidelines.docx +0 -0
aurelian/agents/gocam/documents/Transporter activity annotation annotation guidelines.pdf +0 -0
aurelian/agents/gocam/documents/Transporter_activity_annotation_annotation_guidelines.md +43 -0
Regulatory Processes in GO-CAM.docx +0 -0
Regulatory Processes in GO-CAM.pdf +0 -0
aurelian/agents/gocam/documents/WIP_-_Regulation_and_Regulatory_Processes_in_GO-CAM.md +31 -0
aurelian/agents/gocam/documents/md/DNA-binding_transcription_factor_activity_annotation_guidelines.md +131 -0
aurelian/agents/gocam/documents/md/E3_ubiquitin_ligases.md +166 -0
aurelian/agents/gocam/documents/md/GO-CAM_annotation_guidelines_README.md +1 -0
aurelian/agents/gocam/documents/md/GO-CAM_modelling_guidelines_TO_DO.md +5 -0
aurelian/agents/gocam/documents/md/How_to_annotate_complexes_in_GO-CAM.md +28 -0
aurelian/agents/gocam/documents/md/How_to_annotate_molecular_adaptors.md +19 -0
aurelian/agents/gocam/documents/md/How_to_annotate_sequestering_proteins.md +38 -0
aurelian/agents/gocam/documents/md/Molecular_adaptor_activity.md +52 -0
aurelian/agents/gocam/documents/md/Molecular_carrier_activity.md +59 -0
aurelian/agents/gocam/documents/md/Protein_sequestering_activity.md +52 -0
aurelian/agents/gocam/documents/md/Signaling_receptor_activity_annotation_guidelines.md +271 -0
aurelian/agents/gocam/documents/md/Transcription_coregulator_activity.md +54 -0
aurelian/agents/gocam/documents/md/Transporter_activity_annotation_annotation_guidelines.md +38 -0
aurelian/agents/gocam/documents/md/WIP_-_Regulation_and_Regulatory_Processes_in_GO-CAM.md +39 -0
aurelian/agents/gocam/documents/pandoc_md/Signaling_receptor_activity_annotation_guidelines.md +334 -0
aurelian/agents/gocam/gocam_agent.py +240 -0
aurelian/agents/gocam/gocam_config.py +85 -0
aurelian/agents/gocam/gocam_curator_agent.py +46 -0
aurelian/agents/gocam/gocam_evals.py +67 -0
aurelian/agents/gocam/gocam_gradio.py +89 -0
aurelian/agents/gocam/gocam_mcp.py +224 -0
aurelian/agents/gocam/gocam_tools.py +294 -0
aurelian/agents/linkml/__init__.py +0 -0
aurelian/agents/linkml/linkml_agent.py +62 -0
aurelian/agents/linkml/linkml_config.py +48 -0
aurelian/agents/linkml/linkml_evals.py +66 -0
aurelian/agents/linkml/linkml_gradio.py +45 -0
aurelian/agents/linkml/linkml_mcp.py +186 -0
aurelian/agents/linkml/linkml_tools.py +102 -0
aurelian/agents/literature/__init__.py +3 -0
aurelian/agents/literature/literature_agent.py +55 -0
aurelian/agents/literature/literature_config.py +35 -0
aurelian/agents/literature/literature_gradio.py +52 -0
aurelian/agents/literature/literature_mcp.py +174 -0
aurelian/agents/literature/literature_tools.py +182 -0
aurelian/agents/monarch/__init__.py +25 -0
aurelian/agents/monarch/monarch_agent.py +44 -0
aurelian/agents/monarch/monarch_config.py +45 -0
aurelian/agents/monarch/monarch_gradio.py +51 -0
aurelian/agents/monarch/monarch_mcp.py +65 -0
aurelian/agents/monarch/monarch_tools.py +113 -0
aurelian/agents/oak/__init__.py +0 -0
aurelian/agents/oak/oak_config.py +27 -0
aurelian/agents/oak/oak_gradio.py +57 -0
aurelian/agents/ontology_mapper/__init__.py +31 -0
aurelian/agents/ontology_mapper/ontology_mapper_agent.py +56 -0
aurelian/agents/ontology_mapper/ontology_mapper_config.py +50 -0
aurelian/agents/ontology_mapper/ontology_mapper_evals.py +108 -0
aurelian/agents/ontology_mapper/ontology_mapper_gradio.py +58 -0
aurelian/agents/ontology_mapper/ontology_mapper_mcp.py +81 -0
aurelian/agents/ontology_mapper/ontology_mapper_tools.py +147 -0
aurelian/agents/phenopackets/__init__.py +3 -0
aurelian/agents/phenopackets/phenopackets_agent.py +58 -0
aurelian/agents/phenopackets/phenopackets_config.py +72 -0
aurelian/agents/phenopackets/phenopackets_evals.py +99 -0
aurelian/agents/phenopackets/phenopackets_gradio.py +55 -0
aurelian/agents/phenopackets/phenopackets_mcp.py +178 -0
aurelian/agents/phenopackets/phenopackets_tools.py +127 -0
aurelian/agents/rag/__init__.py +40 -0
aurelian/agents/rag/rag_agent.py +83 -0
aurelian/agents/rag/rag_config.py +80 -0
aurelian/agents/rag/rag_gradio.py +67 -0
aurelian/agents/rag/rag_mcp.py +107 -0
aurelian/agents/rag/rag_tools.py +189 -0
aurelian/agents/rag_agent.py +54 -0
aurelian/agents/robot/__init__.py +0 -0
aurelian/agents/robot/assets/__init__.py +3 -0
aurelian/agents/robot/assets/template.md +384 -0
aurelian/agents/robot/robot_config.py +25 -0
aurelian/agents/robot/robot_gradio.py +46 -0
aurelian/agents/robot/robot_mcp.py +100 -0
aurelian/agents/robot/robot_ontology_agent.py +139 -0
aurelian/agents/robot/robot_tools.py +50 -0
aurelian/agents/talisman/__init__.py +3 -0
aurelian/agents/talisman/talisman_agent.py +126 -0
aurelian/agents/talisman/talisman_config.py +66 -0
aurelian/agents/talisman/talisman_gradio.py +50 -0
aurelian/agents/talisman/talisman_mcp.py +168 -0
aurelian/agents/talisman/talisman_tools.py +720 -0
aurelian/agents/ubergraph/__init__.py +40 -0
aurelian/agents/ubergraph/ubergraph_agent.py +71 -0
aurelian/agents/ubergraph/ubergraph_config.py +79 -0
aurelian/agents/ubergraph/ubergraph_gradio.py +48 -0
aurelian/agents/ubergraph/ubergraph_mcp.py +69 -0
aurelian/agents/ubergraph/ubergraph_tools.py +118 -0
aurelian/agents/uniprot/__init__.py +37 -0
aurelian/agents/uniprot/uniprot_agent.py +43 -0
aurelian/agents/uniprot/uniprot_config.py +43 -0
aurelian/agents/uniprot/uniprot_evals.py +99 -0
aurelian/agents/uniprot/uniprot_gradio.py +48 -0
aurelian/agents/uniprot/uniprot_mcp.py +168 -0
aurelian/agents/uniprot/uniprot_tools.py +136 -0
aurelian/agents/web/__init__.py +0 -0
aurelian/agents/web/web_config.py +27 -0
aurelian/agents/web/web_gradio.py +48 -0
aurelian/agents/web/web_mcp.py +50 -0
aurelian/agents/web/web_tools.py +108 -0
aurelian/chat.py +23 -0
aurelian/cli.py +800 -0
aurelian/dependencies/__init__.py +0 -0
aurelian/dependencies/workdir.py +78 -0
aurelian/mcp/__init__.py +0 -0
aurelian/mcp/amigo_mcp_test.py +86 -0
aurelian/mcp/config_generator.py +123 -0
aurelian/mcp/example_config.json +43 -0
aurelian/mcp/generate_sample_config.py +37 -0
aurelian/mcp/gocam_mcp_test.py +126 -0
aurelian/mcp/linkml_mcp_tools.py +190 -0
aurelian/mcp/mcp_discovery.py +87 -0
aurelian/mcp/mcp_test.py +31 -0
aurelian/mcp/phenopackets_mcp_test.py +103 -0
aurelian/tools/__init__.py +0 -0
aurelian/tools/web/__init__.py +0 -0
aurelian/tools/web/url_download.py +51 -0
aurelian/utils/__init__.py +0 -0
aurelian/utils/async_utils.py +15 -0
aurelian/utils/data_utils.py +32 -0
aurelian/utils/documentation_manager.py +59 -0
aurelian/utils/doi_fetcher.py +238 -0
aurelian/utils/ontology_utils.py +68 -0
aurelian/utils/pdf_fetcher.py +23 -0
aurelian/utils/process_logs.py +100 -0
aurelian/utils/pubmed_utils.py +238 -0
aurelian/utils/pytest_report_to_markdown.py +67 -0
aurelian/utils/robot_ontology_utils.py +112 -0
aurelian/utils/search_utils.py +95 -0
aurelian-0.3.2.dist-info/LICENSE +22 -0
aurelian-0.3.2.dist-info/METADATA +105 -0
aurelian-0.3.2.dist-info/RECORD +254 -0
aurelian-0.3.2.dist-info/WHEEL +4 -0
aurelian-0.3.2.dist-info/entry_points.txt +3 -0

aurelian/agents/gocam/gocam_tools.py ADDED Viewed

@@ -0,0 +1,294 @@
+"""
+Tools for the GOCAM agent.
+"""
+import os
+import json
+import yaml
+from pathlib import Path
+from typing import List, Dict, Optional, Union, Any
+from linkml_store.utils.format_utils import load_objects
+from pydantic_ai import RunContext, ModelRetry
+from pydantic import ValidationError
+from gocam.datamodel.gocam import Model as GocamModel
+from aurelian.agents.gocam.gocam_config import GOCAMDependencies
+from aurelian.agents.uniprot.uniprot_tools import normalize_uniprot_id
+from aurelian.utils.data_utils import flatten
+from aurelian.agents.literature.literature_tools import search_literature_web, retrieve_literature_page
+from . import DOCUMENTS_DIR
+async def search_gocams(ctx: RunContext[GOCAMDependencies], query: str) -> List[Dict]:
+    """
+    Performs a retrieval search over the GO-CAM database.
+    The query can be any text, such as name of a pathway, genes, or
+    a complex sentence.
+    The objects returned are summaries of GO-CAM models; they do not contain full
+    details. Use `lookup_gocam` to retrieve full details of a model.
+    This tool uses a retrieval method that is not guaranteed to always return
+    complete results, and some results may be less relevant than others.
+    You MAY use your judgment in filtering these.
+    Args:
+        ctx: The run context
+        query: The search query text
+    Returns:
+        List[Dict]: List of GOCAM models matching the query
+    """
+    print(f"SEARCH GOCAMS: {query}")
+    try:
+        qr = ctx.deps.collection.search(query, index_name="llm", limit=ctx.deps.max_results)
+        objs = []
+        for score, row in qr.ranked_rows:
+            obj = flatten(row)
+            obj["relevancy_score"] = score
+            objs.append(obj)
+            print(f"RESULT: {obj}")
+        if not objs:
+            raise ModelRetry(f"No GOCAM models found matching the query: {query}. Try a different search term.")
+        return objs
+    except Exception as e:
+        if "ModelRetry" in str(type(e)):
+            raise e
+        raise ModelRetry(f"Error searching GOCAM models: {str(e)}")
+async def lookup_gocam_local(ctx: RunContext[GOCAMDependencies], path: str) -> Dict:
+    """
+    Performs a lookup of a GO-CAM model by its local file path.
+    Args:
+        ctx: The run context
+        path: The local file path of the GO-CAM model
+    """
+    print(f"LOOKUP GOCAM LOCAL: {path}")
+    try:
+        path = Path(path)
+        if not path.exists():
+            raise ModelRetry(f"File not found: {path}")
+        objects = load_objects(path)
+        if not objects:
+            raise ModelRetry(f"No objects found in file: {path}")
+        if not isinstance(objects, list):
+            objects = [objects]
+        if len(objects) > 1:
+            raise ModelRetry(f"Multiple objects found in file: {path}")
+        if not isinstance(objects[0], dict):
+            raise ModelRetry(f"Object is not a dictionary: {path}")
+        return objects[0]
+    except Exception as e:
+        if "ModelRetry" in str(type(e)):
+            raise e
+        raise ModelRetry(f"Error looking up GO-CAM model: {str(e)}")
+async def lookup_gocam(ctx: RunContext[GOCAMDependencies], model_id: str) -> Dict:
+    """
+    Performs a lookup of a GO-CAM model by its ID, and returns the model.
+    Args:
+        ctx: The run context
+        model_id: The ID of the GO-CAM model to look up
+    Returns:
+        Dict: The GO-CAM model data
+    """
+    print(f"LOOKUP GOCAM: {model_id}")
+    try:
+        # Normalize the model ID
+        if ":" in model_id:
+            parts = model_id.split(":")
+            if parts[0] != "gomodel":
+                model_id = f"gomodel:{parts[1]}"
+        else:
+            model_id = f"gomodel:{model_id}"
+        qr = ctx.deps.collection.find({"id": model_id})
+        if not qr.rows:
+            raise ModelRetry(f"Could not find GO-CAM model with ID {model_id}. The ID may be incorrect.")
+        return qr.rows[0]
+    except Exception as e:
+        if "ModelRetry" in str(type(e)):
+            raise e
+        raise ModelRetry(f"Error looking up GO-CAM model {model_id}: {str(e)}")
+async def lookup_uniprot_entry(ctx: RunContext[GOCAMDependencies], uniprot_acc: str) -> str:
+    """
+    Lookup the Uniprot entry for a given Uniprot accession number.
+    This can be used to obtain further information about a protein in
+    a GO-CAM.
+    Args:
+        ctx: The run context
+        uniprot_acc: The Uniprot accession
+    Returns:
+        str: Detailed functional and other info about the protein
+    """
+    print(f"LOOKUP UNIPROT: {uniprot_acc}")
+    try:
+        normalized_acc = normalize_uniprot_id(uniprot_acc)
+        uniprot_service = ctx.deps.get_uniprot_service()
+        result = uniprot_service.retrieve(normalized_acc, frmt="txt")
+        if not result or "Error" in result or "Entry not found" in result:
+            raise ModelRetry(f"Could not find UniProt entry for {uniprot_acc}. The accession may be incorrect.")
+        return result
+    except Exception as e:
+        if "ModelRetry" in str(type(e)):
+            raise e
+        raise ModelRetry(f"Error retrieving UniProt entry for {uniprot_acc}: {str(e)}")
+# These functions have been removed and replaced with direct use of
+# literature_lookup_pmid, search_literature_web, and retrieve_literature_page
+# from aurelian.agents.literature.literature_tools
+def all_documents() -> Dict:
+    """
+    Get all available GO-CAM documentation.
+    Returns:
+        Dictionary of all available GO-CAM documents
+    """
+    if not DOCUMENTS_DIR.exists():
+        return {"documents": []}
+    documents = []
+    for file_path in DOCUMENTS_DIR.glob("*.md"):
+        doc_id = file_path.stem
+        title = doc_id.replace("_", " ")
+        documents.append({
+            "id": doc_id,
+            "title": title,
+            "path": str(file_path)
+        })
+    return {"documents": documents}
+async def fetch_document(
+    ctx: RunContext[GOCAMDependencies],
+    name: str,
+    format: str = "md"
+) -> str:
+    """
+    Lookup the GO-CAM document by name.
+    Args:
+        ctx: The run context
+        name: The document name (e.g. "How_to_annotate_complexes_in_GO-CAM")
+        format: The format of the document (defaults to "md")
+    Returns:
+        The content of the document
+    """
+    print(f"FETCH DOCUMENT: {name}")
+    try:
+        # Get all available documents
+        all_docs = all_documents()
+        # Normalize document name and find it
+        selected_document = None
+        name_normalized = name.replace(" ", "_").lower()
+        for document in all_docs["documents"]:
+            if document["id"].lower() == name_normalized:
+                selected_document = document
+                break
+            if document["title"].lower() == name.lower():
+                selected_document = document
+                break
+        if not selected_document:
+            available_docs = ", ".join([d["title"] for d in all_docs["documents"]])
+            raise ModelRetry(
+                f"Could not find document with name '{name}'. "
+                f"Available documents: {available_docs}"
+            )
+        # Get the document file
+        path = Path(selected_document["path"])
+        if not path.exists():
+            raise ModelRetry(f"Document file not found: {path}")
+        # Read the document file
+        with open(path) as f:
+            content = f.read()
+        if not content or content.strip() == "":
+            raise ModelRetry(f"Document file is empty: {path}")
+        return content
+    except Exception as e:
+        if "ModelRetry" in str(type(e)):
+            raise e
+        raise ModelRetry(f"Error fetching document: {str(e)}")
+async def validate_gocam_model(
+    ctx: RunContext[GOCAMDependencies],
+    model_data: Union[str, Dict[str, Any]],
+    format: str = "json"
+) -> Dict[str, Any]:
+    """
+    Validate a GO-CAM model against the pydantic schema.
+    Args:
+        ctx: The run context
+        model_data: The model data as a JSON/YAML string or dict
+        format: The format of the input data (json or yaml)
+    Returns:
+        Dict with validation results, including success status and errors if any
+    """
+    try:
+        # Parse the input data if it's a string
+        if isinstance(model_data, str):
+            if format.lower() == "json":
+                parsed_data = json.loads(model_data)
+            elif format.lower() == "yaml":
+                parsed_data = yaml.safe_load(model_data)
+            else:
+                raise ModelRetry(f"Unsupported format: {format}. Must be 'json' or 'yaml'")
+        else:
+            parsed_data = model_data
+        # Validate the model
+        try:
+            gocam_model = GocamModel(**parsed_data)
+            return {
+                "valid": True,
+                "message": "Model is valid according to GO-CAM schema",
+                "model": gocam_model.model_dump(exclude_none=True)
+            }
+        except ValidationError as e:
+            errors = []
+            for error in e.errors():
+                errors.append({
+                    "loc": " -> ".join([str(loc) for loc in error["loc"]]),
+                    "msg": error["msg"],
+                    "type": error["type"]
+                })
+            return {
+                "valid": False,
+                "message": "Model validation failed",
+                "errors": errors
+            }
+    except Exception as e:
+        if "ModelRetry" in str(type(e)):
+            raise e
+        raise ModelRetry(f"Error validating GO-CAM model: {str(e)}")

aurelian/agents/linkml/__init__.py ADDED Viewed

File without changes

aurelian/agents/linkml/linkml_agent.py ADDED Viewed

@@ -0,0 +1,62 @@
+"""
+Agent for creating LinkML schemas and example datasets
+"""
+from typing import List
+from aurelian.agents.filesystem.filesystem_tools import download_url_as_markdown, inspect_file
+from aurelian.agents.linkml.linkml_config import LinkMLDependencies
+from aurelian.agents.linkml.linkml_tools import validate_then_save_schema, validate_data
+from aurelian.utils.async_utils import run_sync
+from pydantic_ai import Agent, Tool
+SYSTEM = """
+You are an expert data modeler able to assist in creating LinkML schemas.
+Always provide the schema in LinkML YAML, unless asked otherwise.
+Before providing the user with a schema, you MUST ALWAYS validate it using the `validate_schema` tool.
+If there are mistakes, iterate on the schema until it validates.
+If it is too hard, ask the user for further guidance.
+If you are asked to make schemas for a file, you can look at files using
+the `inspect_file` tool.
+Always be transparent and show your working and reasoning. If you validate the schema,
+tell the user you did this.
+You should assume the user is technically competent, and can interpret both YAML
+schema files, and example data files in JSON or YAML.
+"""
+linkml_agent = Agent(
+    model="openai:gpt-4o",
+    deps_type=LinkMLDependencies,
+    tools=[
+        Tool(inspect_file),
+        Tool(download_url_as_markdown),
+        Tool(validate_then_save_schema),
+        Tool(validate_data),
+    ],
+    system_prompt=SYSTEM
+)
+def chat(workdir: str, **kwargs):
+    import gradio as gr
+    deps = LinkMLDependencies()
+    deps.workdir.location = workdir
+    def get_info(query: str, history: List[str]) -> str:
+        print(f"QUERY: {query}")
+        print(f"HISTORY: {history}")
+        if history:
+            query += "## History"
+            for h in history:
+                query += f"\n{h}"
+        result = run_sync(lambda: linkml_agent.run_sync(query, deps=deps, **kwargs))
+        return result.data
+    return gr.ChatInterface(
+        fn=get_info,
+        type="messages",
+        title="LinkML AI Assistant",
+        examples=[
+            ["Generate a schema for modeling the chemical components of foods"],
+            ["Generate a schema for this data: {name: 'joe', age: 22}"],
+        ]
+    )

aurelian/agents/linkml/linkml_config.py ADDED Viewed

@@ -0,0 +1,48 @@
+from dataclasses import dataclass, field
+import os
+from typing import List, Optional
+from pydantic_ai import AgentRunError
+from aurelian.dependencies.workdir import HasWorkdir, WorkDir
+@dataclass
+class LinkMLDependencies(HasWorkdir):
+    """Configuration for the LinkML agent."""
+    workdir: Optional[WorkDir] = None
+    def __post_init__(self):
+        """Initialize the config with default values."""
+        # Initialize workdir if not provided
+        if self.workdir is None:
+            self.workdir = WorkDir()
+    def parse_objects_from_file(self, data_file: str) -> List[dict]:
+        """
+        Parse objects from a file in the working directory.
+        Args:
+            data_file: Name of the data file in the working directory
+        Returns:
+            List of parsed objects
+        """
+        from linkml_store.utils.format_utils import load_objects
+        path_to_file = self.workdir.get_file_path(data_file)
+        if not path_to_file.exists():
+            raise AgentRunError(f"Data file {data_file} does not exist")
+        return load_objects(path_to_file)
+def get_config() -> LinkMLDependencies:
+    """
+    Get the LinkML agent configuration.
+    Returns:
+        LinkMLDependencies: The LinkML dependencies
+    """
+    workdir_path = os.environ.get("AURELIAN_WORKDIR", None)
+    workdir = WorkDir(location=workdir_path) if workdir_path else None
+    return LinkMLDependencies(workdir=workdir)

aurelian/agents/linkml/linkml_evals.py ADDED Viewed

@@ -0,0 +1,66 @@
+"""
+Evaluation module for the LinkML agent.
+This module implements evaluations for the LinkML agent using the pydantic-ai-evals framework.
+"""
+import asyncio
+import sys
+from typing import Optional, Any, Dict, Callable, Awaitable
+from aurelian.evaluators.model import MetadataDict, metadata
+from aurelian.evaluators.substring_evaluator import SubstringEvaluator
+from pydantic_evals import Case, Dataset
+from aurelian.agents.linkml.linkml_agent import linkml_agent
+from aurelian.agents.linkml.linkml_config import LinkMLDependencies
+class LinkMLMetadata(Dict[str, Any]):
+    """Simple metadata dictionary for LinkML evaluations."""
+    pass
+# Define individual evaluation cases
+case1 = Case(
+    name="schema_generation_food",
+    inputs="Generate a schema for modeling the chemical components of foods",
+    expected_output="class",  # We expect the output to contain schema classes
+    metadata=metadata("medium", "schema_generation")
+)
+case2 = Case(
+    name="schema_from_json",
+    inputs="Generate a schema for this data: {name: 'joe', age: 22}",
+    expected_output="Person",  # Expected to infer a Person class
+    metadata=metadata("easy", "schema_inference")
+)
+case3 = Case(
+    name="schema_validation",
+    inputs="Is this a valid LinkML schema? types: string: {base: str}",
+    expected_output="valid",  # Checking agent can validate schema snippets
+    metadata=metadata("medium", "schema_validation")
+)
+case4 = Case(
+    name="schema_recommendations",
+    inputs="What's the best way to model a many-to-many relationship in LinkML?",
+    expected_output="multivalued",  # Should mention multivalued attributes
+    metadata=metadata("hard", "best_practices")
+)
+def create_eval_dataset() -> Dataset[str, str, MetadataDict]:
+    """
+    Create a dataset for evaluating the LinkML agent.
+    Returns:
+        Dataset of LinkML evaluation cases with appropriate evaluators
+    """
+    # Collect all cases
+    cases = [case1, case2, case3, case4]
+    # Dataset-level evaluators
+    evaluators = [SubstringEvaluator()]
+    return Dataset(
+        cases=cases,
+        evaluators=evaluators
+    )

aurelian/agents/linkml/linkml_gradio.py ADDED Viewed

@@ -0,0 +1,45 @@
+"""
+Gradio UI for the LinkML agent.
+"""
+from typing import List, Optional
+import gradio as gr
+from aurelian.agents.linkml.linkml_agent import linkml_agent
+from aurelian.agents.linkml.linkml_config import LinkMLDependencies
+from aurelian.utils.async_utils import run_sync
+def chat(deps: Optional[LinkMLDependencies] = None, **kwargs):
+    """
+    Initialize a chat interface for the LinkML agent.
+    Args:
+        deps: Optional dependencies configuration
+        **kwargs: Additional arguments to pass to the agent
+    Returns:
+        A Gradio chat interface
+    """
+    if deps is None:
+        deps = LinkMLDependencies()
+    def get_info(query: str, history: List[str]) -> str:
+        print(f"QUERY: {query}")
+        print(f"HISTORY: {history}")
+        if history:
+            query += "## History"
+            for h in history:
+                query += f"\n{h}"
+        result = run_sync(lambda: linkml_agent.run_sync(query, deps=deps, **kwargs))
+        return result.data
+    return gr.ChatInterface(
+        fn=get_info,
+        type="messages",
+        title="LinkML AI Assistant",
+        examples=[
+            ["Generate a schema for modeling the chemical components of foods"],
+            ["Generate a schema for this data: {name: 'joe', age: 22}"],
+        ]
+    )

aurelian/agents/linkml/linkml_mcp.py ADDED Viewed

@@ -0,0 +1,186 @@
+"""
+MCP tools for creating LinkML schemas and example datasets
+"""
+import os
+from mcp.server.fastmcp import FastMCP
+import aurelian.agents.filesystem.filesystem_tools as fst
+from aurelian.agents.linkml.linkml_agent import SYSTEM
+from aurelian.agents.linkml.linkml_config import LinkMLDependencies
+from aurelian.agents.linkml.linkml_tools import validate_then_save_schema, ValidationResult
+from aurelian.utils.search_utils import web_search
+# Initialize FastMCP server
+mcp = FastMCP("linkml", instructions=SYSTEM)
+from linkml_runtime.loaders import yaml_loader
+from linkml_runtime.linkml_model import SchemaDefinition
+from linkml.validator import validate
+from pydantic_ai import RunContext, ModelRetry
+from aurelian.dependencies.workdir import WorkDir
+def deps() -> LinkMLDependencies:
+    deps = LinkMLDependencies()
+    loc = os.getenv("AURELIAN_WORKDIR", "/tmp/aurelian")
+    deps.workdir = WorkDir(loc)
+    return deps
+def ctx() -> RunContext[LinkMLDependencies]:
+    rc: RunContext[LinkMLDependencies] = RunContext[LinkMLDependencies](
+        deps=deps(),
+        model=None, usage=None, prompt=None,
+    )
+    return rc
+@mcp.tool()
+async def validate_schema(schema: str, save_to_file: str="schema.yaml") -> ValidationResult:
+    """
+    Validate a LinkML schema.
+    Args:
+        schema: schema (as yaml) to validate. Do not truncate, always pass the whole schema.
+        save_to_file: optional file name to save the schema to. Defaults to schema.yaml
+    Returns:
+    """
+    return await validate_then_save_schema(ctx(), schema, save_to_file)
+@mcp.tool()
+async def inspect_file(data_file: str) -> str:
+    """
+    Inspect a file in the working directory.
+    Args:
+        ctx:
+        data_file: name of file
+    Returns:
+    """
+    return await fst.inspect_file(ctx(), data_file)
+@mcp.tool()
+async def list_files() -> str:
+    """
+    List files in the working directory.
+    Args:
+        ctx:
+    Returns:
+    """
+    return "\n".join(deps().workdir.list_file_names())
+@mcp.tool()
+async def write_to_file(data: str, file_name: str) -> str:
+    """
+    Write data to a file in the working directory.
+    Args:
+        ctx:
+        data:
+        file_name:
+    Returns:
+    """
+    print(f"Writing data to file: {file_name}")
+    deps().workdir.write_file(file_name, data)
+    return f"Data written to {file_name}"
+@mcp.tool()
+async def validate_data(schema: str, data_file: str) -> str:
+    """
+    Validate data file against a schema.
+    This assumes the data file is present in the working directory.
+    You can write data to the working directory using the `write_to_file` tool.
+    Args:
+        ctx:
+        schema: the schema (as a YAML string)
+        data_file: the name of the data file in the working directory
+    Returns:
+    """
+    print(f"Validating data file: {data_file} using schema: {schema}")
+    try:
+        schema = yaml_loader.loads(schema, target_class=SchemaDefinition)
+    except Exception as e:
+        return f"Schema does not validate: {e}"
+    try:
+        instances = deps().parse_objects_from_file(data_file)
+        for instance in instances:
+            print(f"Validating {instance}")
+            rpt = validate(instance, schema)
+            print(f"Validation report: {rpt}")
+            if rpt.results:
+                return f"Data does not validate:\n{rpt.results}"
+        return f"{len(instances)} instances all validate successfully"
+    except Exception as e:
+        return f"Data does not validate: {e}"
+@mcp.tool()
+async def search_web(query: str) -> str:
+    """
+    Search the web using a text query.
+    Note, this will not retrieve the full content, for that you
+    should use `retrieve_web_page`.
+    Args:
+        query: Text query
+    Returns: matching web pages plus summaries
+    """
+    print(f"Web Search: {query}")
+    return web_search(query)
+@mcp.tool()
+async def retrieve_web_page(url: str) -> str:
+    """
+    Fetch the contents of a web page.
+    Args:
+        url: URL of the web page
+    Returns:
+        The contents of the web page.
+    """
+    print(f"Fetch URL: {url}")
+    import aurelian.utils.search_utils as su
+    return su.retrieve_web_page(url)
+@mcp.tool()
+async def download_web_page(url: str, local_file_name: str) -> str:
+    """
+    Download contents of a web page.
+    Args:
+        ctx:
+        url: URL of the web page
+        local_file_name: Name of the local file to save the
+    Returns:
+        str: message
+    """
+    print(f"Fetch URL: {url}")
+    import aurelian.utils.search_utils as su
+    data = su.retrieve_web_page(url)
+    deps().workdir.write_file(local_file_name, data)
+    return f"Data written to {local_file_name}"
+if __name__ == "__main__":
+    # Initialize and run the server
+    mcp.run(transport='stdio')