PyPI - aurelian - Versions diffs - 0.1.0__py3-none-any.whl - Mend

aurelian 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (266) hide show

aurelian/__init__.py +9 -0
aurelian/agents/__init__.py +0 -0
aurelian/agents/amigo/__init__.py +3 -0
aurelian/agents/amigo/amigo_agent.py +77 -0
aurelian/agents/amigo/amigo_config.py +85 -0
aurelian/agents/amigo/amigo_evals.py +73 -0
aurelian/agents/amigo/amigo_gradio.py +52 -0
aurelian/agents/amigo/amigo_mcp.py +152 -0
aurelian/agents/amigo/amigo_tools.py +152 -0
aurelian/agents/biblio/__init__.py +42 -0
aurelian/agents/biblio/biblio_agent.py +95 -0
aurelian/agents/biblio/biblio_config.py +40 -0
aurelian/agents/biblio/biblio_gradio.py +67 -0
aurelian/agents/biblio/biblio_mcp.py +115 -0
aurelian/agents/biblio/biblio_tools.py +164 -0
aurelian/agents/biblio_agent.py +46 -0
aurelian/agents/checklist/__init__.py +44 -0
aurelian/agents/checklist/checklist_agent.py +86 -0
aurelian/agents/checklist/checklist_config.py +28 -0
aurelian/agents/checklist/checklist_gradio.py +70 -0
aurelian/agents/checklist/checklist_mcp.py +86 -0
aurelian/agents/checklist/checklist_tools.py +141 -0
aurelian/agents/checklist/content/checklists.yaml +7 -0
aurelian/agents/checklist/content/streams.csv +136 -0
aurelian/agents/checklist_agent.py +40 -0
aurelian/agents/chemistry/__init__.py +3 -0
aurelian/agents/chemistry/chemistry_agent.py +47 -0
aurelian/agents/chemistry/chemistry_config.py +71 -0
aurelian/agents/chemistry/chemistry_evals.py +79 -0
aurelian/agents/chemistry/chemistry_gradio.py +50 -0
aurelian/agents/chemistry/chemistry_mcp.py +120 -0
aurelian/agents/chemistry/chemistry_tools.py +121 -0
aurelian/agents/chemistry/image_agent.py +15 -0
aurelian/agents/d4d/__init__.py +30 -0
aurelian/agents/d4d/d4d_agent.py +73 -0
aurelian/agents/d4d/d4d_config.py +46 -0
aurelian/agents/d4d/d4d_gradio.py +58 -0
aurelian/agents/d4d/d4d_mcp.py +71 -0
aurelian/agents/d4d/d4d_tools.py +157 -0
aurelian/agents/d4d_agent.py +64 -0
aurelian/agents/diagnosis/__init__.py +33 -0
aurelian/agents/diagnosis/diagnosis_agent.py +54 -0
aurelian/agents/diagnosis/diagnosis_config.py +48 -0
aurelian/agents/diagnosis/diagnosis_evals.py +76 -0
aurelian/agents/diagnosis/diagnosis_gradio.py +52 -0
aurelian/agents/diagnosis/diagnosis_mcp.py +141 -0
aurelian/agents/diagnosis/diagnosis_tools.py +204 -0
aurelian/agents/diagnosis_agent.py +28 -0
aurelian/agents/draw/__init__.py +3 -0
aurelian/agents/draw/draw_agent.py +39 -0
aurelian/agents/draw/draw_config.py +26 -0
aurelian/agents/draw/draw_gradio.py +50 -0
aurelian/agents/draw/draw_mcp.py +94 -0
aurelian/agents/draw/draw_tools.py +100 -0
aurelian/agents/draw/judge_agent.py +18 -0
aurelian/agents/filesystem/__init__.py +0 -0
aurelian/agents/filesystem/filesystem_config.py +27 -0
aurelian/agents/filesystem/filesystem_gradio.py +49 -0
aurelian/agents/filesystem/filesystem_mcp.py +89 -0
aurelian/agents/filesystem/filesystem_tools.py +95 -0
aurelian/agents/filesystem/py.typed +0 -0
aurelian/agents/github/__init__.py +0 -0
aurelian/agents/github/github_agent.py +83 -0
aurelian/agents/github/github_cli.py +248 -0
aurelian/agents/github/github_config.py +22 -0
aurelian/agents/github/github_gradio.py +152 -0
aurelian/agents/github/github_mcp.py +252 -0
aurelian/agents/github/github_tools.py +408 -0
aurelian/agents/github/github_tools.py.tmp +413 -0
aurelian/agents/goann/__init__.py +13 -0
aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines.md +1000 -0
aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines.pdf +0 -0
aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines_Paper.md +693 -0
aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines_Paper.pdf +0 -0
aurelian/agents/goann/goann_agent.py +90 -0
aurelian/agents/goann/goann_config.py +90 -0
aurelian/agents/goann/goann_evals.py +104 -0
aurelian/agents/goann/goann_gradio.py +62 -0
aurelian/agents/goann/goann_mcp.py +0 -0
aurelian/agents/goann/goann_tools.py +65 -0
aurelian/agents/gocam/__init__.py +52 -0
aurelian/agents/gocam/documents/DNA-binding transcription factor activity annotation guidelines.docx +0 -0
aurelian/agents/gocam/documents/DNA-binding transcription factor activity annotation guidelines.pdf +0 -0
aurelian/agents/gocam/documents/DNA-binding_transcription_factor_activity_annotation_guidelines.md +100 -0
aurelian/agents/gocam/documents/E3 ubiquitin ligases.docx +0 -0
aurelian/agents/gocam/documents/E3 ubiquitin ligases.pdf +0 -0
aurelian/agents/gocam/documents/E3_ubiquitin_ligases.md +134 -0
aurelian/agents/gocam/documents/GO-CAM annotation guidelines README.docx +0 -0
aurelian/agents/gocam/documents/GO-CAM annotation guidelines README.pdf +0 -0
aurelian/agents/gocam/documents/GO-CAM modelling guidelines TO DO.docx +0 -0
aurelian/agents/gocam/documents/GO-CAM modelling guidelines TO DO.pdf +0 -0
aurelian/agents/gocam/documents/GO-CAM_annotation_guidelines_README.md +1 -0
aurelian/agents/gocam/documents/GO-CAM_modelling_guidelines_TO_DO.md +3 -0
aurelian/agents/gocam/documents/How to annotate complexes in GO-CAM.docx +0 -0
aurelian/agents/gocam/documents/How to annotate complexes in GO-CAM.pdf +0 -0
aurelian/agents/gocam/documents/How to annotate molecular adaptors.docx +0 -0
aurelian/agents/gocam/documents/How to annotate molecular adaptors.pdf +0 -0
aurelian/agents/gocam/documents/How to annotate sequestering proteins.docx +0 -0
aurelian/agents/gocam/documents/How to annotate sequestering proteins.pdf +0 -0
aurelian/agents/gocam/documents/How_to_annotate_complexes_in_GO-CAM.md +29 -0
aurelian/agents/gocam/documents/How_to_annotate_molecular_adaptors.md +31 -0
aurelian/agents/gocam/documents/How_to_annotate_sequestering_proteins.md +42 -0
aurelian/agents/gocam/documents/Molecular adaptor activity.docx +0 -0
aurelian/agents/gocam/documents/Molecular adaptor activity.pdf +0 -0
aurelian/agents/gocam/documents/Molecular carrier activity.docx +0 -0
aurelian/agents/gocam/documents/Molecular carrier activity.pdf +0 -0
aurelian/agents/gocam/documents/Molecular_adaptor_activity.md +51 -0
aurelian/agents/gocam/documents/Molecular_carrier_activity.md +41 -0
aurelian/agents/gocam/documents/Protein sequestering activity.docx +0 -0
aurelian/agents/gocam/documents/Protein sequestering activity.pdf +0 -0
aurelian/agents/gocam/documents/Protein_sequestering_activity.md +50 -0
aurelian/agents/gocam/documents/Signaling receptor activity annotation guidelines.docx +0 -0
aurelian/agents/gocam/documents/Signaling receptor activity annotation guidelines.pdf +0 -0
aurelian/agents/gocam/documents/Signaling_receptor_activity_annotation_guidelines.md +187 -0
aurelian/agents/gocam/documents/Transcription coregulator activity.docx +0 -0
aurelian/agents/gocam/documents/Transcription coregulator activity.pdf +0 -0
aurelian/agents/gocam/documents/Transcription_coregulator_activity.md +36 -0
aurelian/agents/gocam/documents/Transporter activity annotation annotation guidelines.docx +0 -0
aurelian/agents/gocam/documents/Transporter activity annotation annotation guidelines.pdf +0 -0
aurelian/agents/gocam/documents/Transporter_activity_annotation_annotation_guidelines.md +43 -0
Regulatory Processes in GO-CAM.docx +0 -0
Regulatory Processes in GO-CAM.pdf +0 -0
aurelian/agents/gocam/documents/WIP_-_Regulation_and_Regulatory_Processes_in_GO-CAM.md +31 -0
aurelian/agents/gocam/documents/md/DNA-binding_transcription_factor_activity_annotation_guidelines.md +131 -0
aurelian/agents/gocam/documents/md/E3_ubiquitin_ligases.md +166 -0
aurelian/agents/gocam/documents/md/GO-CAM_annotation_guidelines_README.md +1 -0
aurelian/agents/gocam/documents/md/GO-CAM_modelling_guidelines_TO_DO.md +5 -0
aurelian/agents/gocam/documents/md/How_to_annotate_complexes_in_GO-CAM.md +28 -0
aurelian/agents/gocam/documents/md/How_to_annotate_molecular_adaptors.md +19 -0
aurelian/agents/gocam/documents/md/How_to_annotate_sequestering_proteins.md +38 -0
aurelian/agents/gocam/documents/md/Molecular_adaptor_activity.md +52 -0
aurelian/agents/gocam/documents/md/Molecular_carrier_activity.md +59 -0
aurelian/agents/gocam/documents/md/Protein_sequestering_activity.md +52 -0
aurelian/agents/gocam/documents/md/Signaling_receptor_activity_annotation_guidelines.md +271 -0
aurelian/agents/gocam/documents/md/Transcription_coregulator_activity.md +54 -0
aurelian/agents/gocam/documents/md/Transporter_activity_annotation_annotation_guidelines.md +38 -0
aurelian/agents/gocam/documents/md/WIP_-_Regulation_and_Regulatory_Processes_in_GO-CAM.md +39 -0
aurelian/agents/gocam/documents/pandoc_md/Signaling_receptor_activity_annotation_guidelines.md +334 -0
aurelian/agents/gocam/gocam_agent.py +243 -0
aurelian/agents/gocam/gocam_config.py +85 -0
aurelian/agents/gocam/gocam_curator_agent.py +46 -0
aurelian/agents/gocam/gocam_evals.py +64 -0
aurelian/agents/gocam/gocam_gradio.py +89 -0
aurelian/agents/gocam/gocam_mcp.py +224 -0
aurelian/agents/gocam/gocam_tools.py +294 -0
aurelian/agents/linkml/__init__.py +0 -0
aurelian/agents/linkml/linkml_agent.py +62 -0
aurelian/agents/linkml/linkml_config.py +48 -0
aurelian/agents/linkml/linkml_evals.py +66 -0
aurelian/agents/linkml/linkml_gradio.py +45 -0
aurelian/agents/linkml/linkml_mcp.py +181 -0
aurelian/agents/linkml/linkml_tools.py +102 -0
aurelian/agents/literature/__init__.py +3 -0
aurelian/agents/literature/literature_agent.py +75 -0
aurelian/agents/literature/literature_config.py +35 -0
aurelian/agents/literature/literature_gradio.py +52 -0
aurelian/agents/literature/literature_mcp.py +174 -0
aurelian/agents/literature/literature_tools.py +182 -0
aurelian/agents/monarch/__init__.py +0 -0
aurelian/agents/monarch/monarch_agent.py +45 -0
aurelian/agents/monarch/monarch_config.py +45 -0
aurelian/agents/monarch/monarch_gradio.py +51 -0
aurelian/agents/monarch/monarch_mcp.py +65 -0
aurelian/agents/monarch/monarch_tools.py +112 -0
aurelian/agents/oak/__init__.py +0 -0
aurelian/agents/oak/oak_config.py +27 -0
aurelian/agents/oak/oak_gradio.py +57 -0
aurelian/agents/ontology_mapper/__init__.py +31 -0
aurelian/agents/ontology_mapper/ontology_mapper_agent.py +57 -0
aurelian/agents/ontology_mapper/ontology_mapper_config.py +50 -0
aurelian/agents/ontology_mapper/ontology_mapper_evals.py +108 -0
aurelian/agents/ontology_mapper/ontology_mapper_gradio.py +58 -0
aurelian/agents/ontology_mapper/ontology_mapper_mcp.py +81 -0
aurelian/agents/ontology_mapper/ontology_mapper_tools.py +147 -0
aurelian/agents/paperqa/__init__.py +27 -0
aurelian/agents/paperqa/paperqa_agent.py +66 -0
aurelian/agents/paperqa/paperqa_cli.py +305 -0
aurelian/agents/paperqa/paperqa_config.py +142 -0
aurelian/agents/paperqa/paperqa_gradio.py +90 -0
aurelian/agents/paperqa/paperqa_mcp.py +155 -0
aurelian/agents/paperqa/paperqa_tools.py +566 -0
aurelian/agents/phenopackets/__init__.py +3 -0
aurelian/agents/phenopackets/phenopackets_agent.py +58 -0
aurelian/agents/phenopackets/phenopackets_config.py +72 -0
aurelian/agents/phenopackets/phenopackets_evals.py +99 -0
aurelian/agents/phenopackets/phenopackets_gradio.py +55 -0
aurelian/agents/phenopackets/phenopackets_mcp.py +178 -0
aurelian/agents/phenopackets/phenopackets_tools.py +127 -0
aurelian/agents/rag/__init__.py +40 -0
aurelian/agents/rag/rag_agent.py +84 -0
aurelian/agents/rag/rag_config.py +80 -0
aurelian/agents/rag/rag_gradio.py +67 -0
aurelian/agents/rag/rag_mcp.py +107 -0
aurelian/agents/rag/rag_tools.py +189 -0
aurelian/agents/rag_agent.py +54 -0
aurelian/agents/robot/__init__.py +0 -0
aurelian/agents/robot/assets/__init__.py +3 -0
aurelian/agents/robot/assets/template.md +384 -0
aurelian/agents/robot/robot_config.py +25 -0
aurelian/agents/robot/robot_gradio.py +46 -0
aurelian/agents/robot/robot_mcp.py +100 -0
aurelian/agents/robot/robot_ontology_agent.py +139 -0
aurelian/agents/robot/robot_tools.py +50 -0
aurelian/agents/talisman/__init__.py +3 -0
aurelian/agents/talisman/__main__.py +17 -0
aurelian/agents/talisman/cli.py +70 -0
aurelian/agents/talisman/run_talisman.py +18 -0
aurelian/agents/talisman/talisman_agent.py +143 -0
aurelian/agents/talisman/talisman_config.py +66 -0
aurelian/agents/talisman/talisman_gradio.py +50 -0
aurelian/agents/talisman/talisman_mcp.py +75 -0
aurelian/agents/talisman/talisman_tools.py +962 -0
aurelian/agents/ubergraph/__init__.py +40 -0
aurelian/agents/ubergraph/ubergraph_agent.py +72 -0
aurelian/agents/ubergraph/ubergraph_config.py +79 -0
aurelian/agents/ubergraph/ubergraph_gradio.py +48 -0
aurelian/agents/ubergraph/ubergraph_mcp.py +69 -0
aurelian/agents/ubergraph/ubergraph_tools.py +118 -0
aurelian/agents/uniprot/__init__.py +0 -0
aurelian/agents/uniprot/uniprot_agent.py +43 -0
aurelian/agents/uniprot/uniprot_config.py +43 -0
aurelian/agents/uniprot/uniprot_evals.py +99 -0
aurelian/agents/uniprot/uniprot_gradio.py +48 -0
aurelian/agents/uniprot/uniprot_mcp.py +168 -0
aurelian/agents/uniprot/uniprot_tools.py +136 -0
aurelian/agents/web/__init__.py +0 -0
aurelian/agents/web/web_config.py +27 -0
aurelian/agents/web/web_gradio.py +48 -0
aurelian/agents/web/web_mcp.py +50 -0
aurelian/agents/web/web_tools.py +121 -0
aurelian/chat.py +23 -0
aurelian/cli.py +1004 -0
aurelian/dependencies/__init__.py +0 -0
aurelian/dependencies/workdir.py +78 -0
aurelian/evaluators/model.py +9 -0
aurelian/evaluators/substring_evaluator.py +30 -0
aurelian/mcp/__init__.py +0 -0
aurelian/mcp/amigo_mcp_test.py +86 -0
aurelian/mcp/config_generator.py +123 -0
aurelian/mcp/example_config.json +43 -0
aurelian/mcp/generate_sample_config.py +37 -0
aurelian/mcp/gocam_mcp_test.py +126 -0
aurelian/mcp/linkml_mcp_tools.py +190 -0
aurelian/mcp/mcp_discovery.py +87 -0
aurelian/mcp/mcp_test.py +31 -0
aurelian/mcp/phenopackets_mcp_test.py +103 -0
aurelian/tools/__init__.py +0 -0
aurelian/tools/web/__init__.py +0 -0
aurelian/tools/web/url_download.py +51 -0
aurelian/utils/__init__.py +0 -0
aurelian/utils/async_utils.py +18 -0
aurelian/utils/data_utils.py +32 -0
aurelian/utils/documentation_manager.py +59 -0
aurelian/utils/doi_fetcher.py +238 -0
aurelian/utils/ontology_utils.py +68 -0
aurelian/utils/pdf_fetcher.py +23 -0
aurelian/utils/process_logs.py +100 -0
aurelian/utils/pubmed_utils.py +238 -0
aurelian/utils/pytest_report_to_markdown.py +67 -0
aurelian/utils/robot_ontology_utils.py +112 -0
aurelian/utils/search_utils.py +95 -0
aurelian-0.1.0.dist-info/LICENSE +22 -0
aurelian-0.1.0.dist-info/METADATA +109 -0
aurelian-0.1.0.dist-info/RECORD +266 -0
aurelian-0.1.0.dist-info/WHEEL +4 -0
aurelian-0.1.0.dist-info/entry_points.txt +4 -0

aurelian/agents/paperqa/paperqa_tools.py ADDED Viewed

@@ -0,0 +1,566 @@
+"""
+Tools for the PaperQA agent.
+"""
+import os
+import logging
+from pathlib import Path
+from typing import List, Dict, Any, Optional
+from pydantic_ai import RunContext, ModelRetry
+from paperqa import Docs, agent_query
+from paperqa.agents.search import get_directory_index
+from .paperqa_config import PaperQADependencies
+def create_response(success: bool, paper_directory: str, doc_files: dict,
+                    indexed_files: Optional[dict] = None, **kwargs) -> dict:
+    """Create a standardized response dictionary.
+    Args:
+        success: Whether the operation was successful
+        paper_directory: Path to the paper directory
+        doc_files: Dictionary with document files by type
+        indexed_files: Optional dictionary of indexed files
+        **kwargs: Additional key-value pairs to include in the response
+    Returns:
+        A standardized response dictionary
+    """
+    document_counts = {
+        'total': len(doc_files['all']),
+        'pdf': len(doc_files['pdf']),
+        'txt': len(doc_files['txt']),
+        'html': len(doc_files['html']),
+        'md': len(doc_files['md']),
+    }
+    response = {
+        "success": success,
+        "paper_directory": paper_directory,
+        "document_counts": document_counts,
+    }
+    if indexed_files is not None:
+        response["indexed_chunks_count"] = len(indexed_files)
+        response["indexed_papers"] = list(indexed_files.keys()) if hasattr(indexed_files, 'keys') else []
+    response.update(kwargs)
+    return response
+logger = logging.getLogger(__name__)
+def get_document_files(directory: str) -> Dict[str, List[str]]:
+    """
+    Get all indexable document files in the given directory.
+    Args:
+        directory: Directory to search for document files
+    Returns:
+        dict: Dictionary with file lists by type and a combined list
+    """
+    document_extensions = ['.pdf', '.txt', '.html', '.md']
+    all_files = []
+    dir_path = Path(directory)
+    if dir_path.exists() and dir_path.is_dir():
+        all_files = [f.name for f in dir_path.iterdir()
+                    if f.is_file() and any(f.name.lower().endswith(ext) for ext in document_extensions)]
+    return {
+        'all': all_files,
+        'pdf': [f for f in all_files if f.lower().endswith('.pdf')],
+        'txt': [f for f in all_files if f.lower().endswith('.txt')],
+        'html': [f for f in all_files if f.lower().endswith('.html')],
+        'md': [f for f in all_files if f.lower().endswith('.md')],
+    }
+async def search_papers(
+        ctx: RunContext[PaperQADependencies],
+        query: str,
+        max_papers: Optional[int] = None,
+) -> Any:
+    """
+    Search for papers relevant to the query using PaperQA.
+    Args:
+        ctx: The run context
+        query: The search query
+        max_papers: Maximum number of papers to return (overrides config)
+    Returns:
+        A simplified response with paper details and metadata
+    """
+    try:
+        settings = ctx.deps.set_paperqa_settings()
+        if max_papers is not None:
+            settings.agent.search_count = max_papers
+        try:
+            index = await get_directory_index(settings=settings, build=False)
+            index_files = await index.index_files
+            logger.info(f"Found existing index with {len(index_files)} files")
+        except Exception as e:
+            # If the error is about an empty index, try to build it
+            if "was empty, please rebuild it" in str(e):
+                logger.info("Index is empty, attempting to rebuild...")
+                index = await get_directory_index(settings=settings, build=True)
+                index_files = await index.index_files
+                if not index_files:
+                    return {
+                        "message": "No papers are currently indexed. You can add papers using the add_paper function.",
+                        "papers": []
+                    }
+            else:
+                raise
+        response = await agent_query(
+            query=f"Find scientific papers about: {query}",
+            settings=settings
+        )
+        return response
+    except Exception as e:
+        if "ModelRetry" in str(type(e)):
+            raise e
+        if "was empty, please rebuild it" in str(e):
+            return {
+                "message": "No papers are currently indexed. You can add papers using the add_paper function.",
+                "papers": []
+            }
+        raise ModelRetry(f"Error searching papers: {str(e)}")
+async def query_papers(
+        ctx: RunContext[PaperQADependencies],
+        query: str,
+) -> Any:
+    """
+    Query the papers to answer a specific question using PaperQA.
+    Args:
+        ctx: The run context
+        query: The question to answer based on the papers
+    Returns:
+        The full PQASession object with the answer and context
+    """
+    try:
+        settings = ctx.deps.set_paperqa_settings()
+        try:
+            # First try to get the index without building
+            index = await get_directory_index(settings=settings, build=False)
+            index_files = await index.index_files
+            # If we get here, the index exists and has files
+            if not index_files:
+                return {
+                    "message": "No papers are currently indexed. You can add papers using the add_paper function.",
+                    "papers": []
+                }
+        except Exception as e:
+            if "was empty, please rebuild it" in str(e):
+                return {
+                    "message": "No papers are currently indexed. You can add papers using the add_paper function.",
+                    "papers": []
+                }
+            else:
+                raise
+        response = await agent_query(
+            query=query,
+            settings=settings
+        )
+        return response
+    except Exception as e:
+        if "ModelRetry" in str(type(e)):
+            raise e
+        if "was empty, please rebuild it" in str(e):
+            return {
+                "message": "No papers are currently indexed. You can add papers using the add_paper function.",
+                "papers": []
+            }
+        raise ModelRetry(f"Error querying papers: {str(e)}")
+async def build_index(
+    ctx: RunContext[PaperQADependencies],
+) -> Any:
+    """
+    Rebuild the search index for papers.
+    Args:
+        ctx: The run context
+    Returns:
+        Information about the indexing process
+    """
+    try:
+        settings = ctx.deps.set_paperqa_settings()
+        paper_directory = settings.agent.index.paper_directory
+        os.makedirs(paper_directory, exist_ok=True)
+        doc_files = get_document_files(paper_directory)
+        if not doc_files['all']:
+            return create_response(
+                success=True,
+                paper_directory=paper_directory,
+                doc_files=doc_files,
+                indexed_files={},
+                message=f"No indexable documents found in {paper_directory}. Add documents (PDF, TXT, HTML, MD) to this directory before indexing."
+            )
+        try:
+            logger.info(f"Building index for {len(doc_files['all'])} documents in {paper_directory}:")
+            if doc_files['pdf']:
+                logger.info(f"  - {len(doc_files['pdf'])} PDF files")
+            if doc_files['txt']:
+                logger.info(f"  - {len(doc_files['txt'])} text files")
+            if doc_files['html']:
+                logger.info(f"  - {len(doc_files['html'])} HTML files")
+            if doc_files['md']:
+                logger.info(f"  - {len(doc_files['md'])} Markdown files")
+            index = await get_directory_index(settings=settings, build=True)
+            index_files = await index.index_files
+            if not index_files:
+                return create_response(
+                    success=True,
+                    paper_directory=paper_directory,
+                    doc_files=doc_files,
+                    indexed_files={},
+                    documents_found=doc_files,
+                    message=f"Found {len(doc_files['all'])} documents but none were successfully indexed. This could be due to parsing issues with the documents."
+                )
+            return create_response(
+                success=True,
+                paper_directory=paper_directory,
+                doc_files=doc_files,
+                indexed_files=index_files,
+                message=f"Successfully indexed {len(index_files)} document chunks from {len(doc_files['all'])} files."
+            )
+        except Exception as e:
+            return create_response(
+                success=False,
+                paper_directory=paper_directory,
+                doc_files=doc_files,
+                message=f"Error indexing documents: {str(e)}",
+                error=str(e)
+            )
+    except Exception as e:
+        if "ModelRetry" in str(type(e)):
+            raise e
+        raise ModelRetry(f"Error building index: {str(e)}")
+async def add_paper(
+    ctx: RunContext[PaperQADependencies],
+    path: str,
+    citation: Optional[str] = None,
+    auto_index: bool = True,
+) -> Any:
+    """
+    Add a specific paper to the collection.
+    Args:
+        ctx: The run context
+        path: Path to the paper file or URL
+        citation: Optional citation for the paper
+        auto_index: Whether to automatically rebuild the index after adding the paper
+    Returns:
+        Information about the added paper
+    """
+    try:
+        settings = ctx.deps.set_paperqa_settings()
+        paper_directory = settings.agent.index.paper_directory
+        os.makedirs(paper_directory, exist_ok=True)
+        # For URLs, we need to:
+        # 1. Download the PDF
+        # 2. Save it to the paper directory
+        # 3. Process it with Docs
+        if path.startswith(("http://", "https://")):
+            import requests
+            from urllib.parse import urlparse
+            url_parts = urlparse(path)
+            file_name = os.path.basename(url_parts.path)
+            if not file_name or not file_name.lower().endswith('.pdf'):
+                file_name = "paper.pdf"
+            target_path = os.path.join(paper_directory, file_name)
+            try:
+                response = requests.get(path, stream=True)
+                response.raise_for_status()
+                with open(target_path, 'wb') as f:
+                    for chunk in response.iter_content(chunk_size=8192):
+                        f.write(chunk)
+                logger.info(f"Downloaded {path} to {target_path}")
+                docs = Docs()
+                docname = await docs.aadd(
+                    path=target_path,
+                    citation=citation,
+                    settings=settings,
+                )
+            except Exception as e:
+                # If download fails, fall back to docs.aadd_url
+                logger.warning(f"Download failed: {str(e)}, falling back to docs.aadd_url")
+                docs = Docs()
+                docname = await docs.aadd_url(
+                    url=path,
+                    citation=citation,
+                    settings=settings,
+                )
+                # If we successfully added it with aadd_url, try to find where it saved the file
+                if docname and hasattr(docs, 'docs') and docname in docs.docs:
+                    doc = docs.docs[docname]
+                    if hasattr(doc, 'filepath') and os.path.exists(doc.filepath):
+                        import shutil
+                        target_path = os.path.join(paper_directory, f"{docname}.pdf")
+                        if not os.path.exists(target_path):
+                            shutil.copy2(doc.filepath, target_path)
+                            logger.info(f"Copied from {doc.filepath} to {target_path}")
+        else:
+            # For file paths, copy to paper directory if needed
+            if not os.path.isabs(path):
+                full_path = os.path.join(ctx.deps.paper_directory, path)
+                if os.path.exists(full_path):
+                    path = full_path
+                else:
+                    full_path = os.path.join(ctx.deps.workdir.location, path)
+                    if os.path.exists(full_path):
+                        path = full_path
+            # If the path is outside the paper directory, copy it there
+            if os.path.exists(path) and paper_directory not in path:
+                import shutil
+                target_path = os.path.join(paper_directory, os.path.basename(path))
+                if not os.path.exists(target_path):
+                    shutil.copy2(path, target_path)
+            docs = Docs()
+            docname = await docs.aadd(
+                path=path,
+                citation=citation,
+                settings=settings,
+            )
+        if docname:
+            doc = next((d for d in docs.docs.values() if d.docname == docname), None)
+            result = {
+                "success": True,
+                "docname": docname,
+                "doc": doc,
+            }
+            if auto_index:
+                try:
+                    index_result = await build_index(ctx)
+                    result["index_result"] = index_result
+                    if index_result["success"]:
+                        result["message"] = f"Paper added and indexed successfully. {index_result['indexed_papers_count']} papers now in the index."
+                    else:
+                        result["message"] = f"Paper added but indexing failed: {index_result['error']}"
+                except Exception as e:
+                    result["message"] = f"Paper added but indexing failed: {str(e)}"
+            else:
+                result["message"] = "Paper added successfully. Use 'aurelian paperqa index' to rebuild the index to make this paper searchable."
+            return result
+        else:
+            return {
+                "success": False,
+                "message": "Paper was already in the collection."
+            }
+    except Exception as e:
+        if "ModelRetry" in str(type(e)):
+            raise e
+        raise ModelRetry(f"Error adding paper: {str(e)}")
+async def add_papers(
+        ctx: RunContext[PaperQADependencies],
+        directory: str,
+        citation: Optional[str] = None,
+        auto_index: bool = True,
+) -> Any:
+    """
+    Add multiple papers from a directory to the collection.
+    Args:
+        ctx: The run context
+        directory: Path to the directory containing papers
+        citation: Optional citation format to use for all papers (paper filename will be appended)
+        auto_index: Whether to automatically rebuild the index after adding the papers
+    Returns:
+        Information about the added papers
+    """
+    try:
+        settings = ctx.deps.set_paperqa_settings()
+        paper_directory = settings.agent.index.paper_directory
+        os.makedirs(paper_directory, exist_ok=True)
+        if not Path(directory).is_dir():
+            return create_response(
+                success=False,
+                paper_directory=paper_directory,
+                doc_files={"all": [], "pdf": [], "txt": [], "html": [], "md": []}
+            )
+        doc_files = get_document_files(directory)
+        if not doc_files['all']:
+            return create_response(
+                success=False,
+                paper_directory=paper_directory,
+                doc_files=doc_files
+            )
+        logger.info(f"Found {len(doc_files['all'])} documents in {directory}:")
+        if doc_files['pdf']:
+            logger.info(f"  - {len(doc_files['pdf'])} PDF files")
+        if doc_files['txt']:
+            logger.info(f"  - {len(doc_files['txt'])} text files")
+        if doc_files['html']:
+            logger.info(f"  - {len(doc_files['html'])} HTML files")
+        if doc_files['md']:
+            logger.info(f"  - {len(doc_files['md'])} Markdown files")
+        docs = Docs()
+        added_papers = []
+        for doc_file in doc_files['all']:
+            file_path = os.path.join(directory, doc_file)
+            try:
+                logger.info(f"Adding document: {file_path}")
+                doc_citation = None
+                if citation:
+                    doc_citation = f"{citation} - {doc_file}"
+                if Path(file_path).exists() and paper_directory not in file_path:
+                    import shutil
+                    target_path = os.path.join(paper_directory, os.path.basename(file_path))
+                    if not Path(target_path).exists():
+                        shutil.copy2(file_path, target_path)
+                        logger.info(f"Copied {file_path} to {target_path}")
+                docname = await docs.aadd(
+                    path=file_path,
+                    citation=doc_citation,
+                    settings=settings,
+                )
+                if docname:
+                    doc = next((d for d in docs.docs.values() if d.docname == docname), None)
+                    added_papers.append({
+                        "file": doc_file,
+                        "docname": docname,
+                        "citation": doc_citation,
+                        "doc": doc
+                    })
+                    logger.info(f"Successfully added document: {doc_file}")
+            except Exception as e:
+                logger.error(f"Error adding {file_path}: {e}")
+        index_result = None
+        if auto_index and added_papers:
+            try:
+                index_result = await build_index(ctx)
+                logger.info(f"Index rebuilt with {len(index_result.get('indexed_papers', []))} papers")
+            except Exception as e:
+                logger.error(f"Error rebuilding index: {e}")
+                index_result = {"success": False, "error": str(e)}
+        response = create_response(
+            success=True,
+            paper_directory=paper_directory,
+            doc_files=doc_files,
+            message=f"Successfully added {len(added_papers)} documents out of {len(doc_files['all'])}",
+            documents_added=len(added_papers),
+            added_documents=added_papers
+        )
+        if index_result:
+            response["index_result"] = index_result
+        return response
+    except Exception as e:
+        if "ModelRetry" in str(type(e)):
+            raise e
+        raise ModelRetry(f"Error adding papers: {str(e)}")
+async def list_papers(
+    ctx: RunContext[PaperQADependencies],
+) -> Any:
+    """
+    List all papers in the current paper directory.
+    Args:
+        ctx: The run context
+    Returns:
+        Information about all papers in the paper directory
+    """
+    try:
+        settings = ctx.deps.set_paperqa_settings()
+        paper_directory = settings.agent.index.paper_directory
+        doc_files = get_document_files(paper_directory)
+        indexed_files = []
+        try:
+            index = await get_directory_index(settings=settings, build=False)
+            index_files = await index.index_files
+            indexed_files = list(index_files.keys())
+            logger.info(f"Found {len(indexed_files)} indexed document chunks")
+        except Exception:
+            logger.info("No index found or index is empty")
+        return create_response(
+            success=True,
+            paper_directory=paper_directory,
+            doc_files=doc_files,
+            indexed_files=indexed_files,
+            message=f"Found {len(doc_files['all'])} documents and {len(indexed_files)} indexed chunks",
+            files_in_directory=doc_files['all'],
+            files_by_type={
+                "pdf": doc_files['pdf'],
+                "txt": doc_files['txt'],
+                "html": doc_files['html'],
+                "md": doc_files['md']
+            },
+            note="To search papers, they must be both in the paper directory AND indexed. If there are files in the directory but not indexed, use the CLI command 'aurelian paperqa index -d <directory>' to index them."
+        )
+    except Exception as e:
+        if "ModelRetry" in str(type(e)):
+            raise e
+        raise ModelRetry(f"Error listing papers: {str(e)}")

aurelian/agents/phenopackets/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+"""
+Phenopackets agent module for working with phenopacket databases.
+"""

aurelian/agents/phenopackets/phenopackets_agent.py ADDED Viewed

@@ -0,0 +1,58 @@
+"""
+Agent for working with phenopacket databases.
+"""
+from aurelian.agents.phenopackets.phenopackets_config import PhenopacketsDependencies
+from aurelian.agents.phenopackets.phenopackets_tools import (
+    search_phenopackets,
+    lookup_phenopacket,
+    lookup_pmid,
+    search_web,
+    retrieve_web_page
+)
+from aurelian.agents.filesystem.filesystem_tools import inspect_file, list_files
+from pydantic_ai import Agent, Tool
+SYSTEM = """
+You are an AI assistant that can answer questions using the Phenopacket database.
+Phenopackets are standardized data structures for representing phenotypic and genetic information
+about patients with rare diseases or genetic disorders.
+You can help with:
+- Searching for phenopackets by disease, phenotype, gene, etc.
+- Looking up specific phenopackets by ID
+- Analyzing and comparing information from multiple phenopackets
+- Finding correlations between phenotypes, genes, and variants
+- Retrieving literature related to phenopackets via PubMed
+You can use different functions to access the database:
+- `search_phenopackets` to find phenopackets by text query
+- `lookup_phenopacket` to retrieve a specific phenopacket by ID
+- `lookup_pmid` to retrieve the text of a PubMed article
+- `search_web` and `retrieve_web_page` for additional information
+Always use the database and functions provided to answer questions, rather than providing
+your own knowledge, unless explicitly asked. Provide answers in a narrative form
+understandable by clinical geneticists, with supporting evidence from the database.
+When presenting terms, include IDs alongside labels when available (e.g., HP:0001234).
+All prefixed IDs should be hyperlinked with Bioregistry, i.e., https://bioregistry.io/{curie}.
+Use markdown tables for summarizing or comparing multiple patients, with appropriate
+column headers and clear organization of information.
+"""
+phenopackets_agent = Agent(
+    model="openai:gpt-4o",
+    deps_type=PhenopacketsDependencies,
+    system_prompt=SYSTEM,
+    tools=[
+        Tool(search_phenopackets),
+        Tool(lookup_phenopacket),
+        Tool(lookup_pmid),
+        Tool(search_web),
+        Tool(retrieve_web_page),
+        Tool(inspect_file),
+        Tool(list_files),
+    ]
+)

aurelian/agents/phenopackets/phenopackets_config.py ADDED Viewed

@@ -0,0 +1,72 @@
+"""
+Configuration classes for the phenopackets agent.
+"""
+from dataclasses import dataclass, field
+import os
+from typing import Optional
+from linkml_store import Client
+from linkml_store.api import Collection
+from aurelian.dependencies.workdir import HasWorkdir, WorkDir
+HANDLE = "mongodb://localhost:27017/phenopackets"
+DB_NAME = "phenopackets"
+COLLECTION_NAME = "main"
+@dataclass
+class PhenopacketsDependencies(HasWorkdir):
+    """
+    Configuration for the phenopackets agent.
+    """
+    max_results: int = field(default=10)
+    db_path: str = field(default=HANDLE)
+    db_name: str = field(default=DB_NAME)
+    collection_name: str = field(default=COLLECTION_NAME)
+    _collection: Optional[Collection] = None
+    def __post_init__(self):
+        """Initialize the config with default values."""
+        # Initialize workdir if not provided
+        if self.workdir is None:
+            self.workdir = WorkDir()
+    @property
+    def collection(self) -> Collection:
+        """
+        Get the phenopackets collection, initializing the connection if needed.
+        Returns:
+            Collection: The phenopackets collection
+        """
+        if self._collection is None:
+            client = Client()
+            print(f"Attaching to database: {self.db_path} with alias: {self.db_name}")
+            client.attach_database(self.db_path, alias=self.db_name)
+            db = client.databases[self.db_name]
+            self._collection = db.get_collection(self.collection_name)
+        return self._collection
+def get_config() -> PhenopacketsDependencies:
+    """
+    Get the Phenopackets configuration from environment variables or defaults.
+    Returns:
+        PhenopacketsDependencies: The phenopackets dependencies
+    """
+    workdir_path = os.environ.get("AURELIAN_WORKDIR", None)
+    workdir = WorkDir(location=workdir_path) if workdir_path else None
+    # Get any environment-specific settings
+    db_path = os.environ.get("PHENOPACKETS_DB_PATH", HANDLE)
+    db_name = os.environ.get("PHENOPACKETS_DB_NAME", DB_NAME)
+    collection_name = os.environ.get("PHENOPACKETS_COLLECTION", COLLECTION_NAME)
+    return PhenopacketsDependencies(
+        workdir=workdir,
+        db_path=db_path,
+        db_name=db_name,
+        collection_name=collection_name
+    )