PyPI - aurelian - Versions diffs - 0.3.2__py3-none-any.whl - Mend

aurelian 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (254) hide show

aurelian/__init__.py +9 -0
aurelian/agents/__init__.py +0 -0
aurelian/agents/amigo/__init__.py +3 -0
aurelian/agents/amigo/amigo_agent.py +77 -0
aurelian/agents/amigo/amigo_config.py +85 -0
aurelian/agents/amigo/amigo_evals.py +73 -0
aurelian/agents/amigo/amigo_gradio.py +52 -0
aurelian/agents/amigo/amigo_mcp.py +152 -0
aurelian/agents/amigo/amigo_tools.py +152 -0
aurelian/agents/biblio/__init__.py +42 -0
aurelian/agents/biblio/biblio_agent.py +94 -0
aurelian/agents/biblio/biblio_config.py +40 -0
aurelian/agents/biblio/biblio_gradio.py +67 -0
aurelian/agents/biblio/biblio_mcp.py +115 -0
aurelian/agents/biblio/biblio_tools.py +164 -0
aurelian/agents/biblio_agent.py +46 -0
aurelian/agents/checklist/__init__.py +44 -0
aurelian/agents/checklist/checklist_agent.py +85 -0
aurelian/agents/checklist/checklist_config.py +28 -0
aurelian/agents/checklist/checklist_gradio.py +70 -0
aurelian/agents/checklist/checklist_mcp.py +86 -0
aurelian/agents/checklist/checklist_tools.py +141 -0
aurelian/agents/checklist/content/checklists.yaml +7 -0
aurelian/agents/checklist/content/streams.csv +136 -0
aurelian/agents/checklist_agent.py +40 -0
aurelian/agents/chemistry/__init__.py +3 -0
aurelian/agents/chemistry/chemistry_agent.py +46 -0
aurelian/agents/chemistry/chemistry_config.py +71 -0
aurelian/agents/chemistry/chemistry_evals.py +79 -0
aurelian/agents/chemistry/chemistry_gradio.py +50 -0
aurelian/agents/chemistry/chemistry_mcp.py +120 -0
aurelian/agents/chemistry/chemistry_tools.py +121 -0
aurelian/agents/chemistry/image_agent.py +15 -0
aurelian/agents/d4d/__init__.py +30 -0
aurelian/agents/d4d/d4d_agent.py +72 -0
aurelian/agents/d4d/d4d_config.py +46 -0
aurelian/agents/d4d/d4d_gradio.py +58 -0
aurelian/agents/d4d/d4d_mcp.py +71 -0
aurelian/agents/d4d/d4d_tools.py +157 -0
aurelian/agents/d4d_agent.py +64 -0
aurelian/agents/diagnosis/__init__.py +33 -0
aurelian/agents/diagnosis/diagnosis_agent.py +53 -0
aurelian/agents/diagnosis/diagnosis_config.py +48 -0
aurelian/agents/diagnosis/diagnosis_evals.py +76 -0
aurelian/agents/diagnosis/diagnosis_gradio.py +52 -0
aurelian/agents/diagnosis/diagnosis_mcp.py +141 -0
aurelian/agents/diagnosis/diagnosis_tools.py +204 -0
aurelian/agents/diagnosis_agent.py +28 -0
aurelian/agents/draw/__init__.py +3 -0
aurelian/agents/draw/draw_agent.py +39 -0
aurelian/agents/draw/draw_config.py +26 -0
aurelian/agents/draw/draw_gradio.py +50 -0
aurelian/agents/draw/draw_mcp.py +94 -0
aurelian/agents/draw/draw_tools.py +100 -0
aurelian/agents/draw/judge_agent.py +18 -0
aurelian/agents/filesystem/__init__.py +0 -0
aurelian/agents/filesystem/filesystem_config.py +27 -0
aurelian/agents/filesystem/filesystem_gradio.py +49 -0
aurelian/agents/filesystem/filesystem_mcp.py +89 -0
aurelian/agents/filesystem/filesystem_tools.py +95 -0
aurelian/agents/filesystem/py.typed +0 -0
aurelian/agents/github/__init__.py +0 -0
aurelian/agents/github/github_agent.py +83 -0
aurelian/agents/github/github_cli.py +248 -0
aurelian/agents/github/github_config.py +22 -0
aurelian/agents/github/github_gradio.py +152 -0
aurelian/agents/github/github_mcp.py +252 -0
aurelian/agents/github/github_tools.py +408 -0
aurelian/agents/github/github_tools.py.tmp +413 -0
aurelian/agents/goann/__init__.py +13 -0
aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines.md +1000 -0
aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines.pdf +0 -0
aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines_Paper.md +693 -0
aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines_Paper.pdf +0 -0
aurelian/agents/goann/goann_agent.py +90 -0
aurelian/agents/goann/goann_config.py +90 -0
aurelian/agents/goann/goann_evals.py +104 -0
aurelian/agents/goann/goann_gradio.py +62 -0
aurelian/agents/goann/goann_mcp.py +0 -0
aurelian/agents/goann/goann_tools.py +65 -0
aurelian/agents/gocam/__init__.py +43 -0
aurelian/agents/gocam/documents/DNA-binding transcription factor activity annotation guidelines.docx +0 -0
aurelian/agents/gocam/documents/DNA-binding transcription factor activity annotation guidelines.pdf +0 -0
aurelian/agents/gocam/documents/DNA-binding_transcription_factor_activity_annotation_guidelines.md +100 -0
aurelian/agents/gocam/documents/E3 ubiquitin ligases.docx +0 -0
aurelian/agents/gocam/documents/E3 ubiquitin ligases.pdf +0 -0
aurelian/agents/gocam/documents/E3_ubiquitin_ligases.md +134 -0
aurelian/agents/gocam/documents/GO-CAM annotation guidelines README.docx +0 -0
aurelian/agents/gocam/documents/GO-CAM annotation guidelines README.pdf +0 -0
aurelian/agents/gocam/documents/GO-CAM modelling guidelines TO DO.docx +0 -0
aurelian/agents/gocam/documents/GO-CAM modelling guidelines TO DO.pdf +0 -0
aurelian/agents/gocam/documents/GO-CAM_annotation_guidelines_README.md +1 -0
aurelian/agents/gocam/documents/GO-CAM_modelling_guidelines_TO_DO.md +3 -0
aurelian/agents/gocam/documents/How to annotate complexes in GO-CAM.docx +0 -0
aurelian/agents/gocam/documents/How to annotate complexes in GO-CAM.pdf +0 -0
aurelian/agents/gocam/documents/How to annotate molecular adaptors.docx +0 -0
aurelian/agents/gocam/documents/How to annotate molecular adaptors.pdf +0 -0
aurelian/agents/gocam/documents/How to annotate sequestering proteins.docx +0 -0
aurelian/agents/gocam/documents/How to annotate sequestering proteins.pdf +0 -0
aurelian/agents/gocam/documents/How_to_annotate_complexes_in_GO-CAM.md +29 -0
aurelian/agents/gocam/documents/How_to_annotate_molecular_adaptors.md +31 -0
aurelian/agents/gocam/documents/How_to_annotate_sequestering_proteins.md +42 -0
aurelian/agents/gocam/documents/Molecular adaptor activity.docx +0 -0
aurelian/agents/gocam/documents/Molecular adaptor activity.pdf +0 -0
aurelian/agents/gocam/documents/Molecular carrier activity.docx +0 -0
aurelian/agents/gocam/documents/Molecular carrier activity.pdf +0 -0
aurelian/agents/gocam/documents/Molecular_adaptor_activity.md +51 -0
aurelian/agents/gocam/documents/Molecular_carrier_activity.md +41 -0
aurelian/agents/gocam/documents/Protein sequestering activity.docx +0 -0
aurelian/agents/gocam/documents/Protein sequestering activity.pdf +0 -0
aurelian/agents/gocam/documents/Protein_sequestering_activity.md +50 -0
aurelian/agents/gocam/documents/Signaling receptor activity annotation guidelines.docx +0 -0
aurelian/agents/gocam/documents/Signaling receptor activity annotation guidelines.pdf +0 -0
aurelian/agents/gocam/documents/Signaling_receptor_activity_annotation_guidelines.md +187 -0
aurelian/agents/gocam/documents/Transcription coregulator activity.docx +0 -0
aurelian/agents/gocam/documents/Transcription coregulator activity.pdf +0 -0
aurelian/agents/gocam/documents/Transcription_coregulator_activity.md +36 -0
aurelian/agents/gocam/documents/Transporter activity annotation annotation guidelines.docx +0 -0
aurelian/agents/gocam/documents/Transporter activity annotation annotation guidelines.pdf +0 -0
aurelian/agents/gocam/documents/Transporter_activity_annotation_annotation_guidelines.md +43 -0
Regulatory Processes in GO-CAM.docx +0 -0
Regulatory Processes in GO-CAM.pdf +0 -0
aurelian/agents/gocam/documents/WIP_-_Regulation_and_Regulatory_Processes_in_GO-CAM.md +31 -0
aurelian/agents/gocam/documents/md/DNA-binding_transcription_factor_activity_annotation_guidelines.md +131 -0
aurelian/agents/gocam/documents/md/E3_ubiquitin_ligases.md +166 -0
aurelian/agents/gocam/documents/md/GO-CAM_annotation_guidelines_README.md +1 -0
aurelian/agents/gocam/documents/md/GO-CAM_modelling_guidelines_TO_DO.md +5 -0
aurelian/agents/gocam/documents/md/How_to_annotate_complexes_in_GO-CAM.md +28 -0
aurelian/agents/gocam/documents/md/How_to_annotate_molecular_adaptors.md +19 -0
aurelian/agents/gocam/documents/md/How_to_annotate_sequestering_proteins.md +38 -0
aurelian/agents/gocam/documents/md/Molecular_adaptor_activity.md +52 -0
aurelian/agents/gocam/documents/md/Molecular_carrier_activity.md +59 -0
aurelian/agents/gocam/documents/md/Protein_sequestering_activity.md +52 -0
aurelian/agents/gocam/documents/md/Signaling_receptor_activity_annotation_guidelines.md +271 -0
aurelian/agents/gocam/documents/md/Transcription_coregulator_activity.md +54 -0
aurelian/agents/gocam/documents/md/Transporter_activity_annotation_annotation_guidelines.md +38 -0
aurelian/agents/gocam/documents/md/WIP_-_Regulation_and_Regulatory_Processes_in_GO-CAM.md +39 -0
aurelian/agents/gocam/documents/pandoc_md/Signaling_receptor_activity_annotation_guidelines.md +334 -0
aurelian/agents/gocam/gocam_agent.py +240 -0
aurelian/agents/gocam/gocam_config.py +85 -0
aurelian/agents/gocam/gocam_curator_agent.py +46 -0
aurelian/agents/gocam/gocam_evals.py +67 -0
aurelian/agents/gocam/gocam_gradio.py +89 -0
aurelian/agents/gocam/gocam_mcp.py +224 -0
aurelian/agents/gocam/gocam_tools.py +294 -0
aurelian/agents/linkml/__init__.py +0 -0
aurelian/agents/linkml/linkml_agent.py +62 -0
aurelian/agents/linkml/linkml_config.py +48 -0
aurelian/agents/linkml/linkml_evals.py +66 -0
aurelian/agents/linkml/linkml_gradio.py +45 -0
aurelian/agents/linkml/linkml_mcp.py +186 -0
aurelian/agents/linkml/linkml_tools.py +102 -0
aurelian/agents/literature/__init__.py +3 -0
aurelian/agents/literature/literature_agent.py +55 -0
aurelian/agents/literature/literature_config.py +35 -0
aurelian/agents/literature/literature_gradio.py +52 -0
aurelian/agents/literature/literature_mcp.py +174 -0
aurelian/agents/literature/literature_tools.py +182 -0
aurelian/agents/monarch/__init__.py +25 -0
aurelian/agents/monarch/monarch_agent.py +44 -0
aurelian/agents/monarch/monarch_config.py +45 -0
aurelian/agents/monarch/monarch_gradio.py +51 -0
aurelian/agents/monarch/monarch_mcp.py +65 -0
aurelian/agents/monarch/monarch_tools.py +113 -0
aurelian/agents/oak/__init__.py +0 -0
aurelian/agents/oak/oak_config.py +27 -0
aurelian/agents/oak/oak_gradio.py +57 -0
aurelian/agents/ontology_mapper/__init__.py +31 -0
aurelian/agents/ontology_mapper/ontology_mapper_agent.py +56 -0
aurelian/agents/ontology_mapper/ontology_mapper_config.py +50 -0
aurelian/agents/ontology_mapper/ontology_mapper_evals.py +108 -0
aurelian/agents/ontology_mapper/ontology_mapper_gradio.py +58 -0
aurelian/agents/ontology_mapper/ontology_mapper_mcp.py +81 -0
aurelian/agents/ontology_mapper/ontology_mapper_tools.py +147 -0
aurelian/agents/phenopackets/__init__.py +3 -0
aurelian/agents/phenopackets/phenopackets_agent.py +58 -0
aurelian/agents/phenopackets/phenopackets_config.py +72 -0
aurelian/agents/phenopackets/phenopackets_evals.py +99 -0
aurelian/agents/phenopackets/phenopackets_gradio.py +55 -0
aurelian/agents/phenopackets/phenopackets_mcp.py +178 -0
aurelian/agents/phenopackets/phenopackets_tools.py +127 -0
aurelian/agents/rag/__init__.py +40 -0
aurelian/agents/rag/rag_agent.py +83 -0
aurelian/agents/rag/rag_config.py +80 -0
aurelian/agents/rag/rag_gradio.py +67 -0
aurelian/agents/rag/rag_mcp.py +107 -0
aurelian/agents/rag/rag_tools.py +189 -0
aurelian/agents/rag_agent.py +54 -0
aurelian/agents/robot/__init__.py +0 -0
aurelian/agents/robot/assets/__init__.py +3 -0
aurelian/agents/robot/assets/template.md +384 -0
aurelian/agents/robot/robot_config.py +25 -0
aurelian/agents/robot/robot_gradio.py +46 -0
aurelian/agents/robot/robot_mcp.py +100 -0
aurelian/agents/robot/robot_ontology_agent.py +139 -0
aurelian/agents/robot/robot_tools.py +50 -0
aurelian/agents/talisman/__init__.py +3 -0
aurelian/agents/talisman/talisman_agent.py +126 -0
aurelian/agents/talisman/talisman_config.py +66 -0
aurelian/agents/talisman/talisman_gradio.py +50 -0
aurelian/agents/talisman/talisman_mcp.py +168 -0
aurelian/agents/talisman/talisman_tools.py +720 -0
aurelian/agents/ubergraph/__init__.py +40 -0
aurelian/agents/ubergraph/ubergraph_agent.py +71 -0
aurelian/agents/ubergraph/ubergraph_config.py +79 -0
aurelian/agents/ubergraph/ubergraph_gradio.py +48 -0
aurelian/agents/ubergraph/ubergraph_mcp.py +69 -0
aurelian/agents/ubergraph/ubergraph_tools.py +118 -0
aurelian/agents/uniprot/__init__.py +37 -0
aurelian/agents/uniprot/uniprot_agent.py +43 -0
aurelian/agents/uniprot/uniprot_config.py +43 -0
aurelian/agents/uniprot/uniprot_evals.py +99 -0
aurelian/agents/uniprot/uniprot_gradio.py +48 -0
aurelian/agents/uniprot/uniprot_mcp.py +168 -0
aurelian/agents/uniprot/uniprot_tools.py +136 -0
aurelian/agents/web/__init__.py +0 -0
aurelian/agents/web/web_config.py +27 -0
aurelian/agents/web/web_gradio.py +48 -0
aurelian/agents/web/web_mcp.py +50 -0
aurelian/agents/web/web_tools.py +108 -0
aurelian/chat.py +23 -0
aurelian/cli.py +800 -0
aurelian/dependencies/__init__.py +0 -0
aurelian/dependencies/workdir.py +78 -0
aurelian/mcp/__init__.py +0 -0
aurelian/mcp/amigo_mcp_test.py +86 -0
aurelian/mcp/config_generator.py +123 -0
aurelian/mcp/example_config.json +43 -0
aurelian/mcp/generate_sample_config.py +37 -0
aurelian/mcp/gocam_mcp_test.py +126 -0
aurelian/mcp/linkml_mcp_tools.py +190 -0
aurelian/mcp/mcp_discovery.py +87 -0
aurelian/mcp/mcp_test.py +31 -0
aurelian/mcp/phenopackets_mcp_test.py +103 -0
aurelian/tools/__init__.py +0 -0
aurelian/tools/web/__init__.py +0 -0
aurelian/tools/web/url_download.py +51 -0
aurelian/utils/__init__.py +0 -0
aurelian/utils/async_utils.py +15 -0
aurelian/utils/data_utils.py +32 -0
aurelian/utils/documentation_manager.py +59 -0
aurelian/utils/doi_fetcher.py +238 -0
aurelian/utils/ontology_utils.py +68 -0
aurelian/utils/pdf_fetcher.py +23 -0
aurelian/utils/process_logs.py +100 -0
aurelian/utils/pubmed_utils.py +238 -0
aurelian/utils/pytest_report_to_markdown.py +67 -0
aurelian/utils/robot_ontology_utils.py +112 -0
aurelian/utils/search_utils.py +95 -0
aurelian-0.3.2.dist-info/LICENSE +22 -0
aurelian-0.3.2.dist-info/METADATA +105 -0
aurelian-0.3.2.dist-info/RECORD +254 -0
aurelian-0.3.2.dist-info/WHEEL +4 -0
aurelian-0.3.2.dist-info/entry_points.txt +3 -0

aurelian/agents/d4d/d4d_mcp.py ADDED Viewed

@@ -0,0 +1,71 @@
+"""
+MCP tools for the D4D (Datasheets for Datasets) agent.
+"""
+import os
+from typing import Optional
+from mcp.server.fastmcp import FastMCP
+from aurelian.agents.d4d.d4d_agent import data_sheets_agent
+from aurelian.agents.d4d.d4d_config import D4DConfig
+import aurelian.agents.d4d.d4d_tools as dt
+from pydantic_ai import RunContext
+# Initialize FastMCP server
+mcp = FastMCP("d4d", instructions="Datasheets for Datasets (D4D) agent")
+from aurelian.dependencies.workdir import WorkDir
+def deps() -> D4DConfig:
+    deps = D4DConfig()
+    # Set the location from environment variable or default
+    loc = os.getenv("AURELIAN_WORKDIR", "/tmp/aurelian")
+    deps.workdir = WorkDir(loc)
+    return deps
+def ctx() -> RunContext[D4DConfig]:
+    rc: RunContext[D4DConfig] = RunContext[D4DConfig](
+        deps=deps(),
+        model=None, usage=None, prompt=None,
+    )
+    return rc
+@mcp.system_prompt
+async def add_schema() -> str:
+    """Add the full schema to the system prompt."""
+    return await dt.get_full_schema(ctx())
+@mcp.tool()
+async def get_full_schema(url: Optional[str] = None) -> str:
+    """
+    Load the full datasheets for datasets schema from GitHub.
+    Args:
+        url: Optional URL override for the schema location
+    Returns:
+        The schema text content
+    """
+    return await dt.get_full_schema(ctx(), url)
+@mcp.tool()
+async def process_website_or_pdf(url: str) -> str:
+    """
+    Process a website or PDF with dataset information.
+    Args:
+        url: URL to a website or PDF file with dataset information
+    Returns:
+        YAML formatted dataset metadata following the D4D schema
+    """
+    return await dt.process_website_or_pdf(ctx(), url)
+if __name__ == "__main__":
+    # Initialize and run the server
+    mcp.run(transport='stdio')

aurelian/agents/d4d/d4d_tools.py ADDED Viewed

@@ -0,0 +1,157 @@
+"""
+Tools for the D4D (Datasheets for Datasets) agent.
+"""
+import asyncio
+import tempfile
+from typing import Optional
+import requests
+from pdfminer.high_level import extract_text
+from pydantic_ai import RunContext, ModelRetry
+from aurelian.utils.search_utils import retrieve_web_page as fetch_web_page
+from .d4d_config import D4DConfig
+async def get_full_schema(
+    ctx: RunContext[D4DConfig],
+    url: Optional[str] = None
+) -> str:
+    """
+    Load the full datasheets for datasets schema from GitHub.
+    Args:
+        ctx: The run context
+        url: Optional URL override for the schema location
+    Returns:
+        The schema text content
+    """
+    try:
+        schema_url = url or ctx.deps.schema_url
+        # Execute the potentially blocking operation in a thread pool
+        def _fetch_schema():
+            response = requests.get(schema_url)
+            if response.status_code == 200:
+                return response.text
+            else:
+                raise Exception(f"Failed to load schema: HTTP {response.status_code}")
+        schema_text = await asyncio.to_thread(_fetch_schema)
+        if not schema_text or schema_text.strip() == "":
+            raise ModelRetry(f"Empty schema returned from URL: {schema_url}")
+        return schema_text
+    except Exception as e:
+        if "ModelRetry" in str(type(e)):
+            raise e
+        raise ModelRetry(f"Error loading schema: {str(e)}")
+async def extract_text_from_pdf(
+    ctx: RunContext[D4DConfig],
+    pdf_url: str
+) -> str:
+    """
+    Download and extract text from a PDF given its URL.
+    Args:
+        ctx: The run context
+        pdf_url: The URL of the PDF to extract text from
+    Returns:
+        The extracted text content
+    """
+    try:
+        # Execute the potentially blocking operation in a thread pool
+        def _extract_pdf():
+            response = requests.get(pdf_url)
+            if response.status_code != 200:
+                raise Exception(f"Failed to retrieve PDF: HTTP {response.status_code}")
+            with tempfile.NamedTemporaryFile(suffix=".pdf", delete=True) as temp_pdf:
+                temp_pdf.write(response.content)
+                temp_pdf.flush()  # Ensure all data is written before reading
+                text = extract_text(temp_pdf.name)
+                if not text or text.strip() == "":
+                    raise Exception("No text extracted from PDF")
+                return text.strip()
+        pdf_text = await asyncio.to_thread(_extract_pdf)
+        return pdf_text
+    except Exception as e:
+        if "ModelRetry" in str(type(e)):
+            raise e
+        raise ModelRetry(f"Error extracting PDF text: {str(e)}")
+async def retrieve_web_page(
+    ctx: RunContext[D4DConfig],
+    url: str
+) -> str:
+    """
+    Retrieve the content of a web page.
+    Args:
+        ctx: The run context
+        url: The URL of the web page to retrieve
+    Returns:
+        The web page content
+    """
+    try:
+        # Execute the potentially blocking operation in a thread pool
+        content = await asyncio.to_thread(fetch_web_page, url)
+        if not content or content.strip() == "":
+            raise ModelRetry(f"No content found for URL: {url}")
+        return content
+    except Exception as e:
+        if "ModelRetry" in str(type(e)):
+            raise e
+        raise ModelRetry(f"Error retrieving web page: {str(e)}")
+async def process_website_or_pdf(
+    ctx: RunContext[D4DConfig],
+    url: str
+) -> str:
+    """
+    Determine if the URL is a PDF or webpage, retrieve the content.
+    Args:
+        ctx: The run context
+        url: The URL of the content to process
+    Returns:
+        The extracted content from the PDF or web page
+    """
+    try:
+        # Check if it's a PDF by extension or content type
+        is_pdf = False
+        if url.lower().endswith(".pdf"):
+            is_pdf = True
+        else:
+            # Check the content type in case the file doesn't have a .pdf extension
+            def _check_content_type():
+                response = requests.head(url)
+                content_type = response.headers.get("Content-Type", "").lower()
+                return "pdf" in content_type
+            is_pdf = await asyncio.to_thread(_check_content_type)
+        # Retrieve the content based on the type
+        if is_pdf:
+            return await extract_text_from_pdf(ctx, url)
+        else:
+            return await retrieve_web_page(ctx, url)
+    except Exception as e:
+        if "ModelRetry" in str(type(e)):
+            raise e
+        raise ModelRetry(f"Error processing URL: {str(e)}")

aurelian/agents/d4d_agent.py ADDED Viewed

@@ -0,0 +1,64 @@
+"""
+Agent for extracting dataset metadata following the datasheets for datasets schema.
+This module re-exports components from the d4d/ package for backward compatibility.
+"""
+import asyncio
+# Re-export from d4d package
+from aurelian.agents.d4d import (
+    data_sheets_agent,
+    D4DConfig,
+    get_config,
+    get_full_schema,
+    process_website_or_pdf,
+    extract_text_from_pdf,
+    chat,
+)
+# Provide the original synchronous functions for backward compatibility
+def get_full_schema_sync(url=None):
+    """Legacy synchronous version of get_full_schema"""
+    config = get_config()
+    ctx = data_sheets_agent._get_run_context(deps=config)
+    return asyncio.run(get_full_schema(ctx, url))
+FULL_SCHEMA = get_full_schema_sync()
+def safe_run(prompt: str):
+    """
+    Ensure an event loop is available and then call the agent's synchronous method.
+    """
+    try:
+        loop = asyncio.get_running_loop()
+    except RuntimeError:
+        loop = asyncio.new_event_loop()
+        asyncio.set_event_loop(loop)
+    return data_sheets_agent.run_sync(prompt)
+def process_website_or_pdf_sync(url: str) -> str:
+    """
+    Legacy synchronous version of process_website_or_pdf
+    """
+    config = get_config()
+    ctx = data_sheets_agent._get_run_context(deps=config)
+    # Get the content
+    page_content = asyncio.run(process_website_or_pdf(ctx, url))
+    # Format the prompt
+    prompt = f"""
+The following is the content of a document describing a dataset:
+{page_content}
+Using the complete datasheets for datasets schema provided above, extract all the metadata from the document and generate a YAML document that exactly conforms to that schema. Ensure that all required fields are present and the output is valid YAML. The dataset URL is: {url}
+Generate only the YAML document.
+"""
+    # Run the agent with the prompt
+    result = safe_run(prompt)
+    return result.data

aurelian/agents/diagnosis/__init__.py ADDED Viewed

@@ -0,0 +1,33 @@
+"""
+Diagnosis agent package for diagnosing rare diseases using the Monarch Knowledge Base.
+"""
+from .diagnosis_agent import diagnosis_agent
+from .diagnosis_config import DiagnosisDependencies, get_config
+from .diagnosis_gradio import chat
+from .diagnosis_tools import (
+    find_disease_id,
+    find_disease_phenotypes,
+    search_web,
+    retrieve_web_page,
+    get_mondo_adapter,
+)
+__all__ = [
+    # Agent
+    "diagnosis_agent",
+    # Config
+    "DiagnosisDependencies",
+    "get_config",
+    # Tools
+    "find_disease_id",
+    "find_disease_phenotypes",
+    "search_web",
+    "retrieve_web_page",
+    "get_mondo_adapter",
+    # Gradio
+    "chat",
+]

aurelian/agents/diagnosis/diagnosis_agent.py ADDED Viewed

@@ -0,0 +1,53 @@
+"""
+Agent for performing diagnoses, validated against Monarch KG.
+"""
+from pydantic_ai import Agent
+from .diagnosis_config import DiagnosisDependencies, get_config
+from .diagnosis_tools import (
+    find_disease_id,
+    find_disease_phenotypes,
+    search_web,
+    retrieve_web_page,
+)
+# System prompt for the diagnosis agent
+DIAGNOSIS_SYSTEM_PROMPT = (
+    "You are an expert clinical geneticist."
+    " Your task is to assist in diagnosing rare diseases,"
+    " and with determining underlying gene or variant."
+    " The recommended workflow is to first think of a set of candidate diseases."
+    " You should show your reasoning, and your candidate list (as many as appropriate)."
+    " You should then check your hypotheses against the Monarch knowledge base."
+    " You can find the Mondo ID of the disease using the `find_disease_id` function."
+    " You should then query the Monarch knowledge base to get a list of phenotypes for that"
+    " disease id, using the `find_disease_phenotypes` function."
+    " Present results in detail, using markdown tables unless otherwise specified."
+    " Try and account for all presented patient phenotypes in the table (you can"
+    " roll up similar phenotypes to broader categories)."
+    " also try and account for hallmark features of the disease not found in the patient,"
+    " always showing your reasoning."
+    " If you get something from a web search, tell me the web page."
+    " If you get something from the knowledge base, give provenance."
+    " Again, using information from the knowledge base."
+    " Give detailed provenance chains in <details> tags."
+    " Show ontology term IDs together with labels whenever possible."
+    " Include HPO IDs which you will get from the `find_disease_phenotypes` function"
+    " (never guess these, always get from the query results)."
+    " Stick to markdown, and all prefixed IDs should by hyperlinked with Bioregistry,"
+    " i.e https://bioregistry.io/{curie}."
+)
+# Create the diagnosis agent
+diagnosis_agent = Agent(
+    model="openai:gpt-4o",
+    deps_type=DiagnosisDependencies,
+    result_type=str,
+    system_prompt=DIAGNOSIS_SYSTEM_PROMPT,
+)
+# Register tools
+diagnosis_agent.tool(find_disease_id)
+diagnosis_agent.tool(find_disease_phenotypes)
+diagnosis_agent.tool_plain(search_web)
+diagnosis_agent.tool_plain(retrieve_web_page)

aurelian/agents/diagnosis/diagnosis_config.py ADDED Viewed

@@ -0,0 +1,48 @@
+"""
+Configuration for the Diagnosis agent.
+"""
+from dataclasses import dataclass
+import os
+from typing import Optional
+from oaklib.implementations import MonarchImplementation
+from aurelian.dependencies.workdir import HasWorkdir, WorkDir
+# Constants
+HAS_PHENOTYPE = "biolink:has_phenotype"
+@dataclass
+class DiagnosisDependencies(HasWorkdir):
+    """Configuration for the Diagnosis agent."""
+    # Maximum number of search results to return
+    max_search_results: int = 10
+    # Monarch adapter
+    monarch_adapter: Optional[MonarchImplementation] = None
+    def __post_init__(self):
+        """Initialize the config with default values."""
+        # HasWorkdir doesn't have a __post_init__ method, so we don't call super()
+        if self.workdir is None:
+            self.workdir = WorkDir()
+        # Initialize Monarch adapter if not provided
+        if self.monarch_adapter is None:
+            self.monarch_adapter = MonarchImplementation()
+def get_config() -> DiagnosisDependencies:
+    """Get the Diagnosis configuration from environment variables or defaults."""
+    workdir_path = os.environ.get("AURELIAN_WORKDIR", None)
+    workdir = WorkDir(location=workdir_path) if workdir_path else None
+    # Get max search results from environment if available
+    max_results_env = os.environ.get("MAX_SEARCH_RESULTS")
+    max_results = int(max_results_env) if max_results_env and max_results_env.isdigit() else 10
+    return DiagnosisDependencies(
+        workdir=workdir,
+        max_search_results=max_results,
+    )

aurelian/agents/diagnosis/diagnosis_evals.py ADDED Viewed

@@ -0,0 +1,76 @@
+"""
+Evaluation module for the Diagnosis agent.
+This module implements evaluations for the Diagnosis agent using the pydantic-ai-evals framework.
+"""
+import asyncio
+import sys
+from typing import Optional, Any, Dict, Callable, Awaitable
+from aurelian.evaluators.model import MetadataDict, metadata
+from aurelian.evaluators.substring_evaluator import SubstringEvaluator
+from pydantic_evals import Case, Dataset
+from aurelian.agents.diagnosis.diagnosis_agent import diagnosis_agent
+from aurelian.agents.diagnosis.diagnosis_config import DiagnosisDependencies, get_config
+class DiagnosisMetadata(Dict[str, Any]):
+    """Simple metadata dictionary for Diagnosis evaluations."""
+    pass
+# Define individual evaluation cases
+case1 = Case(
+    name="hutchinson_gilford_progeria",
+    inputs="""Patient has growth failure, distinct facial features, alopecia, and skin aging.
+    Findings excluded: Pigmented nevi, cafe-au-lait spots, and photosensitivity.
+    Onset was in infancy.
+    Return diagnosis with MONDO ID""",
+    expected_output="MONDO:0010135",  # Hutchinson-Gilford Progeria Syndrome
+    metadata=metadata("hard", "diagnosis")
+)
+case2 = Case(
+    name="marfan_eye_phenotypes",
+    inputs="What eye phenotypes does Marfan syndrome have?",
+    expected_output="lens",  # Should mention lens dislocation/ectopia lentis
+    metadata=metadata("medium", "phenotype_retrieval")
+)
+case3 = Case(
+    name="eds_type1_id",
+    inputs="What is the ID for Ehlers-Danlos syndrome type 1?",
+    expected_output="MONDO:0007947",  # EDS classic type 1
+    metadata=metadata("easy", "id_retrieval")
+)
+case4 = Case(
+    name="eds_types",
+    inputs="What are the kinds of Ehlers-Danlos syndrome?",
+    expected_output="hypermobility",  # Should mention hypermobility type
+    metadata=metadata("medium", "classification")
+)
+case5 = Case(
+    name="eds_literature_search",
+    inputs="Look at phenotypes for Ehlers-Danlos classic type 2. Do a literature search to look at latest studies. What is missing from the KB?",
+    expected_output="study",  # Should reference studies
+    metadata=metadata("hard", "literature_analysis")
+)
+def create_eval_dataset() -> Dataset[str, str, MetadataDict]:
+    """
+    Create a dataset for evaluating the Diagnosis agent.
+    Returns:
+        Dataset of Diagnosis evaluation cases with appropriate evaluators
+    """
+    # Collect all cases
+    cases = [case1, case2, case3, case4, case5]
+    # Dataset-level evaluators
+    evaluators = [SubstringEvaluator()]
+    return Dataset(
+        cases=cases,
+        evaluators=evaluators
+    )

aurelian/agents/diagnosis/diagnosis_gradio.py ADDED Viewed

@@ -0,0 +1,52 @@
+"""
+Gradio interface for the Diagnosis agent.
+"""
+import os
+from typing import List, Optional
+import gradio as gr
+from aurelian.utils.async_utils import run_sync
+from .diagnosis_agent import diagnosis_agent
+from .diagnosis_config import DiagnosisDependencies, get_config
+def chat(deps: Optional[DiagnosisDependencies] = None, **kwargs):
+    """
+    Initialize a chat interface for the Diagnosis agent.
+    Args:
+        deps: Optional dependencies configuration
+        **kwargs: Additional arguments to pass to the agent
+    Returns:
+        A Gradio chat interface
+    """
+    if deps is None:
+        deps = get_config()
+    def get_info(query: str, history: List[str]) -> str:
+        print(f"QUERY: {query}")
+        print(f"HISTORY: {history}")
+        if history:
+            query += "## History"
+            for h in history:
+                query += f"\n{h}"
+        result = run_sync(lambda: diagnosis_agent.run_sync(query, deps=deps, **kwargs))
+        return result.data
+    return gr.ChatInterface(
+        fn=get_info,
+        type="messages",
+        title="Diagnosis AI Assistant",
+        examples=[
+            """Patient has growth failure, distinct facial features, alopecia, and skin aging.
+            Findings excluded: Pigmented nevi, cafe-au-lait spots, and photosensitivity.
+            Onset was in infancy.
+            Return diagnosis with MONDO ID""",
+            "What eye phenotypes does Marfan syndrome have?",
+            "What is the ID for Ehlers-Danlos syndrome type 1?",
+            "What are the kinds of Ehlers-Danlos syndrome?",
+            "Look at phenotypes for Ehlers-Danlos classic type 2. Do a literature search to look at latest studies. What is missing from the KB?",
+        ],
+    )

aurelian/agents/diagnosis/diagnosis_mcp.py ADDED Viewed

@@ -0,0 +1,141 @@
+#!/usr/bin/env python
+"""
+MCP tools for performing diagnoses, validated against Monarch KG.
+"""
+import os
+from mcp.server.fastmcp import FastMCP
+import aurelian.agents.filesystem.filesystem_tools as fst
+from aurelian.agents.diagnosis.diagnosis_agent import DIAGNOSIS_SYSTEM_PROMPT
+from aurelian.agents.diagnosis.diagnosis_config import DiagnosisDependencies, get_config
+from aurelian.agents.diagnosis.diagnosis_tools import (
+    find_disease_id,
+    find_disease_phenotypes,
+)
+from aurelian.utils.search_utils import web_search, retrieve_web_page as fetch_web_page
+from aurelian.dependencies.workdir import WorkDir
+# Initialize FastMCP server
+mcp = FastMCP("diagnosis", instructions=DIAGNOSIS_SYSTEM_PROMPT)
+def deps() -> DiagnosisDependencies:
+    """Get diagnosis dependencies with workdir from environment."""
+    deps = DiagnosisDependencies()
+    loc = os.getenv("AURELIAN_WORKDIR", "/tmp/diagnosis")
+    deps.workdir = WorkDir(loc)
+    return deps
+@mcp.tool()
+async def search_disease(query: str) -> list:
+    """
+    Find diseases matching a search query.
+    Args:
+        query: The search term or expression to find diseases
+    Returns:
+        List of matching disease IDs and labels
+    """
+    return await find_disease_id(deps(), query)
+@mcp.tool()
+async def get_disease_phenotypes(disease_id: str) -> list:
+    """
+    Get phenotypes associated with a disease.
+    Args:
+        disease_id: The disease ID (e.g., "MONDO:0007947") or label
+    Returns:
+        List of phenotype associations for the disease
+    """
+    return await find_disease_phenotypes(deps(), disease_id)
+@mcp.tool()
+async def search_web(query: str) -> str:
+    """
+    Search the web using a text query.
+    Note: This will not retrieve the full content. For that, use `retrieve_web_page`.
+    Args:
+        query: The search query
+    Returns:
+        Matching web pages plus summaries
+    """
+    return web_search(query)
+@mcp.tool()
+async def retrieve_web_page(url: str) -> str:
+    """
+    Fetch the contents of a web page.
+    Args:
+        url: The URL of the web page to retrieve
+    Returns:
+        The contents of the web page
+    """
+    return fetch_web_page(url)
+@mcp.tool()
+async def inspect_file(file_name: str) -> str:
+    """
+    Inspect a file in the working directory.
+    Args:
+        file_name: name of file to inspect
+    Returns:
+        File contents as string
+    """
+    return await fst.inspect_file(deps(), file_name)
+@mcp.tool()
+async def list_files() -> str:
+    """
+    List files in the working directory.
+    Returns:
+        Newline-separated list of file names
+    """
+    return "\n".join(deps().workdir.list_file_names())
+@mcp.tool()
+async def write_to_file(data: str, file_name: str) -> str:
+    """
+    Write data to a file in the working directory.
+    Args:
+        data: Content to write
+        file_name: Target file name
+    Returns:
+        Confirmation message
+    """
+    print(f"Writing data to file: {file_name}")
+    deps().workdir.write_file(file_name, data)
+    return f"Data written to {file_name}"
+@mcp.tool()
+async def download_web_page(url: str, local_file_name: str) -> str:
+    """
+    Download contents of a web page to a local file.
+    Args:
+        url: URL of the web page
+        local_file_name: Name of the local file to save to
+    Returns:
+        Confirmation message
+    """
+    print(f"Fetch URL: {url}")
+    data = fetch_web_page(url)
+    deps().workdir.write_file(local_file_name, data)
+    return f"Data written to {local_file_name}"
+if __name__ == "__main__":
+    # Initialize and run the server
+    mcp.run(transport='stdio')