PyPI - aurelian - Versions diffs - 0.3.2__py3-none-any.whl - Mend

aurelian 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (254) hide show

aurelian/__init__.py +9 -0
aurelian/agents/__init__.py +0 -0
aurelian/agents/amigo/__init__.py +3 -0
aurelian/agents/amigo/amigo_agent.py +77 -0
aurelian/agents/amigo/amigo_config.py +85 -0
aurelian/agents/amigo/amigo_evals.py +73 -0
aurelian/agents/amigo/amigo_gradio.py +52 -0
aurelian/agents/amigo/amigo_mcp.py +152 -0
aurelian/agents/amigo/amigo_tools.py +152 -0
aurelian/agents/biblio/__init__.py +42 -0
aurelian/agents/biblio/biblio_agent.py +94 -0
aurelian/agents/biblio/biblio_config.py +40 -0
aurelian/agents/biblio/biblio_gradio.py +67 -0
aurelian/agents/biblio/biblio_mcp.py +115 -0
aurelian/agents/biblio/biblio_tools.py +164 -0
aurelian/agents/biblio_agent.py +46 -0
aurelian/agents/checklist/__init__.py +44 -0
aurelian/agents/checklist/checklist_agent.py +85 -0
aurelian/agents/checklist/checklist_config.py +28 -0
aurelian/agents/checklist/checklist_gradio.py +70 -0
aurelian/agents/checklist/checklist_mcp.py +86 -0
aurelian/agents/checklist/checklist_tools.py +141 -0
aurelian/agents/checklist/content/checklists.yaml +7 -0
aurelian/agents/checklist/content/streams.csv +136 -0
aurelian/agents/checklist_agent.py +40 -0
aurelian/agents/chemistry/__init__.py +3 -0
aurelian/agents/chemistry/chemistry_agent.py +46 -0
aurelian/agents/chemistry/chemistry_config.py +71 -0
aurelian/agents/chemistry/chemistry_evals.py +79 -0
aurelian/agents/chemistry/chemistry_gradio.py +50 -0
aurelian/agents/chemistry/chemistry_mcp.py +120 -0
aurelian/agents/chemistry/chemistry_tools.py +121 -0
aurelian/agents/chemistry/image_agent.py +15 -0
aurelian/agents/d4d/__init__.py +30 -0
aurelian/agents/d4d/d4d_agent.py +72 -0
aurelian/agents/d4d/d4d_config.py +46 -0
aurelian/agents/d4d/d4d_gradio.py +58 -0
aurelian/agents/d4d/d4d_mcp.py +71 -0
aurelian/agents/d4d/d4d_tools.py +157 -0
aurelian/agents/d4d_agent.py +64 -0
aurelian/agents/diagnosis/__init__.py +33 -0
aurelian/agents/diagnosis/diagnosis_agent.py +53 -0
aurelian/agents/diagnosis/diagnosis_config.py +48 -0
aurelian/agents/diagnosis/diagnosis_evals.py +76 -0
aurelian/agents/diagnosis/diagnosis_gradio.py +52 -0
aurelian/agents/diagnosis/diagnosis_mcp.py +141 -0
aurelian/agents/diagnosis/diagnosis_tools.py +204 -0
aurelian/agents/diagnosis_agent.py +28 -0
aurelian/agents/draw/__init__.py +3 -0
aurelian/agents/draw/draw_agent.py +39 -0
aurelian/agents/draw/draw_config.py +26 -0
aurelian/agents/draw/draw_gradio.py +50 -0
aurelian/agents/draw/draw_mcp.py +94 -0
aurelian/agents/draw/draw_tools.py +100 -0
aurelian/agents/draw/judge_agent.py +18 -0
aurelian/agents/filesystem/__init__.py +0 -0
aurelian/agents/filesystem/filesystem_config.py +27 -0
aurelian/agents/filesystem/filesystem_gradio.py +49 -0
aurelian/agents/filesystem/filesystem_mcp.py +89 -0
aurelian/agents/filesystem/filesystem_tools.py +95 -0
aurelian/agents/filesystem/py.typed +0 -0
aurelian/agents/github/__init__.py +0 -0
aurelian/agents/github/github_agent.py +83 -0
aurelian/agents/github/github_cli.py +248 -0
aurelian/agents/github/github_config.py +22 -0
aurelian/agents/github/github_gradio.py +152 -0
aurelian/agents/github/github_mcp.py +252 -0
aurelian/agents/github/github_tools.py +408 -0
aurelian/agents/github/github_tools.py.tmp +413 -0
aurelian/agents/goann/__init__.py +13 -0
aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines.md +1000 -0
aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines.pdf +0 -0
aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines_Paper.md +693 -0
aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines_Paper.pdf +0 -0
aurelian/agents/goann/goann_agent.py +90 -0
aurelian/agents/goann/goann_config.py +90 -0
aurelian/agents/goann/goann_evals.py +104 -0
aurelian/agents/goann/goann_gradio.py +62 -0
aurelian/agents/goann/goann_mcp.py +0 -0
aurelian/agents/goann/goann_tools.py +65 -0
aurelian/agents/gocam/__init__.py +43 -0
aurelian/agents/gocam/documents/DNA-binding transcription factor activity annotation guidelines.docx +0 -0
aurelian/agents/gocam/documents/DNA-binding transcription factor activity annotation guidelines.pdf +0 -0
aurelian/agents/gocam/documents/DNA-binding_transcription_factor_activity_annotation_guidelines.md +100 -0
aurelian/agents/gocam/documents/E3 ubiquitin ligases.docx +0 -0
aurelian/agents/gocam/documents/E3 ubiquitin ligases.pdf +0 -0
aurelian/agents/gocam/documents/E3_ubiquitin_ligases.md +134 -0
aurelian/agents/gocam/documents/GO-CAM annotation guidelines README.docx +0 -0
aurelian/agents/gocam/documents/GO-CAM annotation guidelines README.pdf +0 -0
aurelian/agents/gocam/documents/GO-CAM modelling guidelines TO DO.docx +0 -0
aurelian/agents/gocam/documents/GO-CAM modelling guidelines TO DO.pdf +0 -0
aurelian/agents/gocam/documents/GO-CAM_annotation_guidelines_README.md +1 -0
aurelian/agents/gocam/documents/GO-CAM_modelling_guidelines_TO_DO.md +3 -0
aurelian/agents/gocam/documents/How to annotate complexes in GO-CAM.docx +0 -0
aurelian/agents/gocam/documents/How to annotate complexes in GO-CAM.pdf +0 -0
aurelian/agents/gocam/documents/How to annotate molecular adaptors.docx +0 -0
aurelian/agents/gocam/documents/How to annotate molecular adaptors.pdf +0 -0
aurelian/agents/gocam/documents/How to annotate sequestering proteins.docx +0 -0
aurelian/agents/gocam/documents/How to annotate sequestering proteins.pdf +0 -0
aurelian/agents/gocam/documents/How_to_annotate_complexes_in_GO-CAM.md +29 -0
aurelian/agents/gocam/documents/How_to_annotate_molecular_adaptors.md +31 -0
aurelian/agents/gocam/documents/How_to_annotate_sequestering_proteins.md +42 -0
aurelian/agents/gocam/documents/Molecular adaptor activity.docx +0 -0
aurelian/agents/gocam/documents/Molecular adaptor activity.pdf +0 -0
aurelian/agents/gocam/documents/Molecular carrier activity.docx +0 -0
aurelian/agents/gocam/documents/Molecular carrier activity.pdf +0 -0
aurelian/agents/gocam/documents/Molecular_adaptor_activity.md +51 -0
aurelian/agents/gocam/documents/Molecular_carrier_activity.md +41 -0
aurelian/agents/gocam/documents/Protein sequestering activity.docx +0 -0
aurelian/agents/gocam/documents/Protein sequestering activity.pdf +0 -0
aurelian/agents/gocam/documents/Protein_sequestering_activity.md +50 -0
aurelian/agents/gocam/documents/Signaling receptor activity annotation guidelines.docx +0 -0
aurelian/agents/gocam/documents/Signaling receptor activity annotation guidelines.pdf +0 -0
aurelian/agents/gocam/documents/Signaling_receptor_activity_annotation_guidelines.md +187 -0
aurelian/agents/gocam/documents/Transcription coregulator activity.docx +0 -0
aurelian/agents/gocam/documents/Transcription coregulator activity.pdf +0 -0
aurelian/agents/gocam/documents/Transcription_coregulator_activity.md +36 -0
aurelian/agents/gocam/documents/Transporter activity annotation annotation guidelines.docx +0 -0
aurelian/agents/gocam/documents/Transporter activity annotation annotation guidelines.pdf +0 -0
aurelian/agents/gocam/documents/Transporter_activity_annotation_annotation_guidelines.md +43 -0
Regulatory Processes in GO-CAM.docx +0 -0
Regulatory Processes in GO-CAM.pdf +0 -0
aurelian/agents/gocam/documents/WIP_-_Regulation_and_Regulatory_Processes_in_GO-CAM.md +31 -0
aurelian/agents/gocam/documents/md/DNA-binding_transcription_factor_activity_annotation_guidelines.md +131 -0
aurelian/agents/gocam/documents/md/E3_ubiquitin_ligases.md +166 -0
aurelian/agents/gocam/documents/md/GO-CAM_annotation_guidelines_README.md +1 -0
aurelian/agents/gocam/documents/md/GO-CAM_modelling_guidelines_TO_DO.md +5 -0
aurelian/agents/gocam/documents/md/How_to_annotate_complexes_in_GO-CAM.md +28 -0
aurelian/agents/gocam/documents/md/How_to_annotate_molecular_adaptors.md +19 -0
aurelian/agents/gocam/documents/md/How_to_annotate_sequestering_proteins.md +38 -0
aurelian/agents/gocam/documents/md/Molecular_adaptor_activity.md +52 -0
aurelian/agents/gocam/documents/md/Molecular_carrier_activity.md +59 -0
aurelian/agents/gocam/documents/md/Protein_sequestering_activity.md +52 -0
aurelian/agents/gocam/documents/md/Signaling_receptor_activity_annotation_guidelines.md +271 -0
aurelian/agents/gocam/documents/md/Transcription_coregulator_activity.md +54 -0
aurelian/agents/gocam/documents/md/Transporter_activity_annotation_annotation_guidelines.md +38 -0
aurelian/agents/gocam/documents/md/WIP_-_Regulation_and_Regulatory_Processes_in_GO-CAM.md +39 -0
aurelian/agents/gocam/documents/pandoc_md/Signaling_receptor_activity_annotation_guidelines.md +334 -0
aurelian/agents/gocam/gocam_agent.py +240 -0
aurelian/agents/gocam/gocam_config.py +85 -0
aurelian/agents/gocam/gocam_curator_agent.py +46 -0
aurelian/agents/gocam/gocam_evals.py +67 -0
aurelian/agents/gocam/gocam_gradio.py +89 -0
aurelian/agents/gocam/gocam_mcp.py +224 -0
aurelian/agents/gocam/gocam_tools.py +294 -0
aurelian/agents/linkml/__init__.py +0 -0
aurelian/agents/linkml/linkml_agent.py +62 -0
aurelian/agents/linkml/linkml_config.py +48 -0
aurelian/agents/linkml/linkml_evals.py +66 -0
aurelian/agents/linkml/linkml_gradio.py +45 -0
aurelian/agents/linkml/linkml_mcp.py +186 -0
aurelian/agents/linkml/linkml_tools.py +102 -0
aurelian/agents/literature/__init__.py +3 -0
aurelian/agents/literature/literature_agent.py +55 -0
aurelian/agents/literature/literature_config.py +35 -0
aurelian/agents/literature/literature_gradio.py +52 -0
aurelian/agents/literature/literature_mcp.py +174 -0
aurelian/agents/literature/literature_tools.py +182 -0
aurelian/agents/monarch/__init__.py +25 -0
aurelian/agents/monarch/monarch_agent.py +44 -0
aurelian/agents/monarch/monarch_config.py +45 -0
aurelian/agents/monarch/monarch_gradio.py +51 -0
aurelian/agents/monarch/monarch_mcp.py +65 -0
aurelian/agents/monarch/monarch_tools.py +113 -0
aurelian/agents/oak/__init__.py +0 -0
aurelian/agents/oak/oak_config.py +27 -0
aurelian/agents/oak/oak_gradio.py +57 -0
aurelian/agents/ontology_mapper/__init__.py +31 -0
aurelian/agents/ontology_mapper/ontology_mapper_agent.py +56 -0
aurelian/agents/ontology_mapper/ontology_mapper_config.py +50 -0
aurelian/agents/ontology_mapper/ontology_mapper_evals.py +108 -0
aurelian/agents/ontology_mapper/ontology_mapper_gradio.py +58 -0
aurelian/agents/ontology_mapper/ontology_mapper_mcp.py +81 -0
aurelian/agents/ontology_mapper/ontology_mapper_tools.py +147 -0
aurelian/agents/phenopackets/__init__.py +3 -0
aurelian/agents/phenopackets/phenopackets_agent.py +58 -0
aurelian/agents/phenopackets/phenopackets_config.py +72 -0
aurelian/agents/phenopackets/phenopackets_evals.py +99 -0
aurelian/agents/phenopackets/phenopackets_gradio.py +55 -0
aurelian/agents/phenopackets/phenopackets_mcp.py +178 -0
aurelian/agents/phenopackets/phenopackets_tools.py +127 -0
aurelian/agents/rag/__init__.py +40 -0
aurelian/agents/rag/rag_agent.py +83 -0
aurelian/agents/rag/rag_config.py +80 -0
aurelian/agents/rag/rag_gradio.py +67 -0
aurelian/agents/rag/rag_mcp.py +107 -0
aurelian/agents/rag/rag_tools.py +189 -0
aurelian/agents/rag_agent.py +54 -0
aurelian/agents/robot/__init__.py +0 -0
aurelian/agents/robot/assets/__init__.py +3 -0
aurelian/agents/robot/assets/template.md +384 -0
aurelian/agents/robot/robot_config.py +25 -0
aurelian/agents/robot/robot_gradio.py +46 -0
aurelian/agents/robot/robot_mcp.py +100 -0
aurelian/agents/robot/robot_ontology_agent.py +139 -0
aurelian/agents/robot/robot_tools.py +50 -0
aurelian/agents/talisman/__init__.py +3 -0
aurelian/agents/talisman/talisman_agent.py +126 -0
aurelian/agents/talisman/talisman_config.py +66 -0
aurelian/agents/talisman/talisman_gradio.py +50 -0
aurelian/agents/talisman/talisman_mcp.py +168 -0
aurelian/agents/talisman/talisman_tools.py +720 -0
aurelian/agents/ubergraph/__init__.py +40 -0
aurelian/agents/ubergraph/ubergraph_agent.py +71 -0
aurelian/agents/ubergraph/ubergraph_config.py +79 -0
aurelian/agents/ubergraph/ubergraph_gradio.py +48 -0
aurelian/agents/ubergraph/ubergraph_mcp.py +69 -0
aurelian/agents/ubergraph/ubergraph_tools.py +118 -0
aurelian/agents/uniprot/__init__.py +37 -0
aurelian/agents/uniprot/uniprot_agent.py +43 -0
aurelian/agents/uniprot/uniprot_config.py +43 -0
aurelian/agents/uniprot/uniprot_evals.py +99 -0
aurelian/agents/uniprot/uniprot_gradio.py +48 -0
aurelian/agents/uniprot/uniprot_mcp.py +168 -0
aurelian/agents/uniprot/uniprot_tools.py +136 -0
aurelian/agents/web/__init__.py +0 -0
aurelian/agents/web/web_config.py +27 -0
aurelian/agents/web/web_gradio.py +48 -0
aurelian/agents/web/web_mcp.py +50 -0
aurelian/agents/web/web_tools.py +108 -0
aurelian/chat.py +23 -0
aurelian/cli.py +800 -0
aurelian/dependencies/__init__.py +0 -0
aurelian/dependencies/workdir.py +78 -0
aurelian/mcp/__init__.py +0 -0
aurelian/mcp/amigo_mcp_test.py +86 -0
aurelian/mcp/config_generator.py +123 -0
aurelian/mcp/example_config.json +43 -0
aurelian/mcp/generate_sample_config.py +37 -0
aurelian/mcp/gocam_mcp_test.py +126 -0
aurelian/mcp/linkml_mcp_tools.py +190 -0
aurelian/mcp/mcp_discovery.py +87 -0
aurelian/mcp/mcp_test.py +31 -0
aurelian/mcp/phenopackets_mcp_test.py +103 -0
aurelian/tools/__init__.py +0 -0
aurelian/tools/web/__init__.py +0 -0
aurelian/tools/web/url_download.py +51 -0
aurelian/utils/__init__.py +0 -0
aurelian/utils/async_utils.py +15 -0
aurelian/utils/data_utils.py +32 -0
aurelian/utils/documentation_manager.py +59 -0
aurelian/utils/doi_fetcher.py +238 -0
aurelian/utils/ontology_utils.py +68 -0
aurelian/utils/pdf_fetcher.py +23 -0
aurelian/utils/process_logs.py +100 -0
aurelian/utils/pubmed_utils.py +238 -0
aurelian/utils/pytest_report_to_markdown.py +67 -0
aurelian/utils/robot_ontology_utils.py +112 -0
aurelian/utils/search_utils.py +95 -0
aurelian-0.3.2.dist-info/LICENSE +22 -0
aurelian-0.3.2.dist-info/METADATA +105 -0
aurelian-0.3.2.dist-info/RECORD +254 -0
aurelian-0.3.2.dist-info/WHEEL +4 -0
aurelian-0.3.2.dist-info/entry_points.txt +3 -0

aurelian/agents/checklist/checklist_gradio.py ADDED Viewed

@@ -0,0 +1,70 @@
+"""
+Gradio interface for the Checklist agent.
+"""
+from typing import List, Optional
+import gradio as gr
+from .checklist_agent import checklist_agent
+from .checklist_config import ChecklistDependencies, get_config
+from aurelian.utils.async_utils import run_sync
+async def get_info(query: str, history: List[str], deps: ChecklistDependencies) -> str:
+    """
+    Process a query using the checklist agent.
+    Args:
+        query: The user query
+        history: The conversation history
+        deps: The dependencies configuration
+    Returns:
+        The agent's response
+    """
+    print(f"QUERY: {query}")
+    print(f"HISTORY: {history}")
+    # Add history to the query if available
+    if history:
+        query += "## History"
+        for h in history:
+            query += f"\n{h}"
+    # Run the agent
+    result = await checklist_agent.run(query, deps=deps)
+    return result.data
+def chat(deps: Optional[ChecklistDependencies] = None, **kwargs):
+    """
+    Create a Gradio chat interface for the Checklist agent.
+    Args:
+        deps: Optional dependencies configuration
+        kwargs: Additional keyword arguments for the agent
+    Returns:
+        A Gradio ChatInterface
+    """
+    if deps is None:
+        deps = get_config()
+    def get_info_wrapper(query: str, history: List[str]) -> str:
+        # Use run_sync to handle the async function
+        return run_sync(lambda: get_info(query, history, deps))
+    return gr.ChatInterface(
+        fn=get_info_wrapper,
+        type="messages",
+        title="Checklist AI Assistant",
+        examples=[
+            ["Evaluate https://journals.asm.org/doi/10.1128/mra.01361-19 using STREAMS"],
+            [
+                (
+                    "Check the paper 'Exploration of the Biosynthetic Potential of the Populus Microbiome'"
+                    " https://journals.asm.org/doi/10.1128/msystems.00045-18"
+                )
+            ],
+        ],
+    )

aurelian/agents/checklist/checklist_mcp.py ADDED Viewed

@@ -0,0 +1,86 @@
+"""
+MCP tools for validating papers against checklists.
+"""
+import os
+from typing import Dict, List
+from mcp.server.fastmcp import FastMCP
+import aurelian.agents.checklist.checklist_tools as ct
+from aurelian.agents.checklist.checklist_agent import checklist_agent
+from aurelian.agents.checklist.checklist_config import ChecklistDependencies
+from pydantic_ai import RunContext
+# Initialize FastMCP server
+mcp = FastMCP("checklist", instructions=checklist_agent.system_prompt)
+from aurelian.dependencies.workdir import WorkDir
+def deps() -> ChecklistDependencies:
+    deps = ChecklistDependencies()
+    # Set the location from environment variable or default
+    loc = os.getenv("AURELIAN_WORKDIR", "/tmp/aurelian")
+    deps.workdir = WorkDir(loc)
+    return deps
+def ctx() -> RunContext[ChecklistDependencies]:
+    rc: RunContext[ChecklistDependencies] = RunContext[ChecklistDependencies](
+        deps=deps(),
+        model=None, usage=None, prompt=None,
+    )
+    return rc
+@mcp.system_prompt
+def add_checklists():
+    """Add available checklists to the system prompt."""
+    meta = ct.all_checklists()
+    return "\n".join([f"- {c['id']}: {c['title']}" for c in meta["checklists"]])
+@mcp.tool()
+async def retrieve_text_from_pmid(pmid: str) -> str:
+    """
+    Lookup the text of a PubMed ID, using its PMID.
+    Args:
+        pmid: The PubMed ID to look up
+    Returns:
+        Full text if available, otherwise abstract
+    """
+    return await ct.retrieve_text_from_pmid(ctx(), pmid)
+@mcp.tool()
+async def retrieve_text_from_doi(doi: str) -> str:
+    """
+    Lookup the text of a DOI.
+    Args:
+        doi: The DOI to look up
+    Returns:
+        Full text if available, otherwise abstract
+    """
+    return await ct.retrieve_text_from_doi(ctx(), doi)
+@mcp.tool()
+async def fetch_checklist(checklist_id: str) -> str:
+    """
+    Lookup the checklist entry for a given checklist accession number.
+    Args:
+        checklist_id: The checklist ID (e.g. STREAM, STORMS, ARRIVE)
+    Returns:
+        The content of the checklist
+    """
+    return await ct.fetch_checklist(ctx(), checklist_id)
+if __name__ == "__main__":
+    # Initialize and run the server
+    mcp.run(transport='stdio')

aurelian/agents/checklist/checklist_tools.py ADDED Viewed

@@ -0,0 +1,141 @@
+"""
+Tools for the Checklist agent.
+"""
+import asyncio
+from typing import Dict
+import yaml
+from pydantic_ai import RunContext, ModelRetry
+from aurelian.utils.pubmed_utils import get_doi_text, get_pmid_text
+from . import CONTENT_DIR, CONTENT_METADATA_PATH
+from .checklist_config import ChecklistDependencies
+def all_checklists() -> Dict:
+    """
+    Get all available checklists.
+    Returns:
+        Dictionary of all available checklists
+    """
+    with open(CONTENT_METADATA_PATH) as f:
+        return yaml.safe_load(f)
+async def retrieve_text_from_pmid(
+    ctx: RunContext[ChecklistDependencies],
+    pmid: str
+) -> str:
+    """
+    Lookup the text of a PubMed ID, using its PMID.
+    Args:
+        ctx: The run context
+        pmid: The PubMed ID to look up
+    Returns:
+        Full text if available, otherwise abstract
+    """
+    print(f"LOOKUP PMID: {pmid}")
+    try:
+        # Execute the potentially blocking operation in a thread pool
+        text = await asyncio.to_thread(get_pmid_text, pmid)
+        if not text or text.strip() == "":
+            raise ModelRetry(f"No text found for PMID: {pmid}")
+        return text
+    except Exception as e:
+        if "ModelRetry" in str(type(e)):
+            raise e
+        raise ModelRetry(f"Error retrieving text from PMID: {str(e)}")
+async def retrieve_text_from_doi(
+    ctx: RunContext[ChecklistDependencies],
+    doi: str
+) -> str:
+    """
+    Lookup the text of a DOI.
+    Args:
+        ctx: The run context
+        doi: The DOI to look up
+    Returns:
+        Full text if available, otherwise abstract
+    """
+    print(f"LOOKUP DOI: {doi}")
+    try:
+        # Execute the potentially blocking operation in a thread pool
+        text = await asyncio.to_thread(get_doi_text, doi)
+        if not text or text.strip() == "":
+            raise ModelRetry(f"No text found for DOI: {doi}")
+        return text
+    except Exception as e:
+        if "ModelRetry" in str(type(e)):
+            raise e
+        raise ModelRetry(f"Error retrieving text from DOI: {str(e)}")
+async def fetch_checklist(
+    ctx: RunContext[ChecklistDependencies],
+    checklist_id: str
+) -> str:
+    """
+    Lookup the checklist entry for a given checklist accession number.
+    Args:
+        ctx: The run context
+        checklist_id: The checklist ID (e.g. STREAM, STORMS, ARRIVE)
+    Returns:
+        The content of the checklist
+    """
+    try:
+        # Execute the potentially blocking operation in a thread pool
+        meta = all_checklists()
+        # Normalize and find the checklist
+        selected_checklist = None
+        checklist_id_lower = checklist_id.lower()
+        for checklist in meta["checklists"]:
+            if checklist["id"].lower() == checklist_id_lower:
+                selected_checklist = checklist
+                break
+            if checklist["title"].lower() == checklist_id_lower:
+                selected_checklist = checklist
+                break
+        if not selected_checklist:
+            available_checklists = ", ".join([c["id"] for c in meta["checklists"]])
+            raise ModelRetry(
+                f"Could not find checklist with ID {checklist_id}. "
+                f"Available checklists: {available_checklists}"
+            )
+        # Get the checklist file
+        id = selected_checklist["id"]
+        path = CONTENT_DIR / f"{id}.csv"
+        if not path.exists():
+            raise ModelRetry(f"Checklist file not found: {path}")
+        # Read the checklist file
+        with open(path) as f:
+            content = f.read()
+        if not content or content.strip() == "":
+            raise ModelRetry(f"Checklist file is empty: {path}")
+        return content
+    except Exception as e:
+        if "ModelRetry" in str(type(e)):
+            raise e
+        raise ModelRetry(f"Error fetching checklist: {str(e)}")

aurelian/agents/checklist/content/checklists.yaml ADDED Viewed

@@ -0,0 +1,7 @@
+description: all checklists
+checklists:
+  - id: streams
+    title: Standards for Technical Reporting in Environmental and host-Associated Microbiome Studies (STREAMS) guidelines
+    keywords: [Environmental microbiome]
+    description: Data and knowledge sharing in microbiome research presents unique challenges owing to the interdisciplinary nature of the field coupled with the generation of complex multi-omics data. For human microbiome research, the Strengthening the Organization and Reporting of Microbiome Studies (STORMS) reporting guidelines are an effective checklist for the organization of necessary study, experimental design, and analytical methods within a scientific manuscript. Here, we expand upon the STORMS reporting guidelines to include environmental, non-human host-associated, and synthetic microbiomes. The Standards for Technical Reporting in Environmental and host-Associated Microbiome Studies (STREAMS) guidelines are a consensus from input received from nearly 250 researchers spanning 30 countries. The STREAMS guidelines include 18 items organized to support the reporting of microbiome studies in a robust, standardized, and machine-actionable manner. We provide detailed guidance on each section and comparison with STORMS, along with example case studies that demonstrate the usage of STREAMS. Additional implementations of the STREAMS guidelines are found in the accompanying DMPTool templates, along with information on available scientific journals that encourage usage when publishing. Future efforts to update STREAMS versions and coordination with the STORMS consortium will follow best practices in building community consensus.

aurelian/agents/checklist/content/streams.csv ADDED Viewed

@@ -0,0 +1,136 @@
+Number,Item,Recommendation,Item Source,Additional Guidance,Yes/No/NA,Comments or location in manuscript,Example(s),Present in the manuscript? Yes/No/NA,Comments or location in manuscript
+Abstract ,,,,,,,,,
+1.0,Structured or unstructured abstract,"Abstract should include information on background, methods, results, conclusions, and significance in a structured or unstructured format.",STORMS,"While the information included in this section is recommended, the word limit for the abstract (determined by the journal) must be considered. A graphical or video abstract may also be required or encouraged which should outline the major points of the manuscript.",,,,,
+1.1,Study design,State study design in abstract.,STORMS,See 3.0 for additional information on study design.,,,,,
+1.2,Environmental & sample information,Describe the specific environmental system and the sample types studied. ,STREAMS,"Describe the specific source(s) of the microbiome(s) - environment(s) sampled (e.g., soil) or specific host site/tissue(s) (e.g., midgut). Add the scientific names of any specific microbes or other organisms that are studied in the paper. ",,,,,
+1.3,Host information (if applicable),"Describe any associated host(s) in terms of taxonomy and/or identifying characteristics or conditions (e.g., a diseased state).",STREAMS,Include number of individuals included in the study. Include the host NCBI taxonomy ID or other relevant identifying information if possible. More details about the host(s) can be reported later in the manuscript (Item 3.4).,,,,,
+1.4,Experiments & omics methods,State any experimental and omics strategies used to obtain the results.,Modified STORMS,"Note any experiments performed and the data types generated (e.g., soil biogeochemistry, observational data) along with the omics methods utilized - for example, targeted ITS or 16S rRNA gene amplicon sequencing, metagenomics, metatranscriptomics, metabolomics, metaproteomics. Describe if the study was based on existing datasets.",,,,,
+1.5,Analyses & results,"Briefly describe the analyses performed, the results obtained, and the significance of the results.",STREAMS,"Include a mention of the analyses performed (e.g., functional annotations), key results, and how these represent significant contributions to the broader context of the field. ",,,,,
+Introduction,,,,,,,,,
+2.0,Background & rationale,"Summarize the underlying motivation, background, scientific evidence, or theory driving the hypothesis or research question(s) and study objective(s). ",Modified STORMS,"Include proper references and citations to previous work in the field. If applicable, include references (including DOIs when possible) to previously generated datasets available through public repositories that this study is reusing or building upon, or are relevant to the background of the study. Describe the knowledge gap(s) in the field and how this study addresses those.",,,,,
+2.1,Hypotheses or questions,"State the research question(s), aim(s), and/or objective(s) along with hypotheses (if applicable).",Modified STORMS,Describe how the research questions or hypotheses contribute to the broader significance of this area of research.,,,,,
+Methods,,,,,,,,,
+3.0,Study design,Describe the overall study design.,STORMS,"Describe if the study is observational or experimental, alternatively identify which aspects of the study were observational and which were experimental. Describe the spatiotemporal aspects of the study. Ensure biological and technical replicates are considered and reported here and/or in Item 6.2. Describe if the study was performed in the field, in a greenhouse, growth chamber, laboratory conditions, or other set-up. Briefly describe any key limitations to the study design. Include all other study design information relevant to the methods and results (e.g., randomization).
+Describe if the study is an analysis or combined analysis of existing data and/or describe how previously published data is used alongside newly generated data. ",,,,,
+3.1,Samples,State and describe all sample types involved in the study. ,STREAMS; MIxS,"Define the ID/naming scheme and if the samples have unique identifiers. Report sample metadata using MIxS standards ( https://genomicsstandardsconsortium.github.io/mixs/ ) whenever possible (report full metadata in supplementary information and/or in public repository). Provide a justification if no metadata standards are used.
+If samples are constructed (e.g., a synthetic community) describe in detail the methods and all steps used to construct these samples, referring to standards in this field ( https://doi.org/10.1038/s41564-024-01833-4 ) and noting how many organisms are within the synthetic community, their taxonomy, and other relevant characteristics.
+Describe and cite (e.g., publications, DOIs, accessions, catalogue information) publicly available samples and data being used if the study involves sample, synthetic community, and/or data reuse.",,,,,
+3.2,Environmental context & geographic location,State the environmental context(s) and geographical region(s) where the samples originated.,"ENVO; MIxS: geographic location (country and/or sea, region); MIxS: lat_lon; Specific ENVO fields (MIxS standard): env_broad_scale, env_local_scale, env_medium ; STORMS","Geographic coordinates (latitude and longitude) should be reported according to MIxS to prevent potential ambiguities.
+Also use the ENVO standards ( https://sites.google.com/site/environmentontology/ ) to report on the environment
+Justification should be provided if geographic coordinates are not provided (e.g., in cases where there are issues with privacy, requests from indigenous communities to not share specific locations, collected on the International Space Station). State the spatial scale of the overall study if applicable.",,,,,
+3.3,Relevant dates,State the start and end dates for sample and data collection. Indicate if the study is longitudinal/a time-series and indicate sampling frequency or other relevant dates throughout the study.,Modified STORMS; MIxS,"Report collection date according to MIxS (ISO8601 standard); report on sampling time if possible (ISO8601 formatted) and report on other temporal factors of importance e.g. seasonality. For time-series studies, state how many follow-ups were conducted, describe sample size at follow-up by group or condition, and discuss any loss at each time point. Provide justification if dates cannot be provided (e.g., due to privacy). For data reuse applications, report any relevant dates (e.g., original sampling date, date of data access)",,,,,
+3.4,Host(s) (if applicable),Describe any associated non-human host(s) including taxonomy and any relevant characteristics or conditions.,STREAMS; MIxS host-associated extension; OBO Foundry,"If the microbiome originated from a host (e.g., plant, animal), describe characteristics such as the taxonomy (including an NCBI taxonomy ID if possible; and/or references to the most current taxonomy; or state if taxonomy is unknown or could only be assessed to a certain taxonomic level), number of host(s) included in the study, how host(s) were selected (e.g., which plants out of a large field were chosen), defining characteristics, age or growth stage, genetic information, cultivar information, sex, feeding or watering regime, habitat information and if sampled populations were from natural environments or maintained in laboratory conditions, relevant conditions and other phenotypes (e.g., diseased plant host vs. healthy plant host, level of disease severity), and other pertinent information to the host and study.
+For reporting taxonomy, report what tool/database/standard was used for taxonomic hierarchy classification, the taxonomy version number, and any previously known taxonomic names if applicable. If there are multiple hosts (e.g., bacterial symbiont within a fungus within an animal gut), indicate this relationship and note how the hosts will be referred to within the study. Utilize existing ontologies whenever possible: Plants: https://obofoundry.org/ontology/to.html MIxS host-associated extension.",,,,,
+3.5,Ethics,State permit information and the guidelines followed for research ethics.,Modified STORMS,"Provide statements regarding ethics protocols (e.g. Nagoya Protocol); provide permit information or indicate if permits/permissions were not necessary and provide justification; describe if provenance and due diligence were performed to ensure CARE principles were followed ( https://www.gida-global.org/care ); If applicable, state the name of the institutional review board (IRB) (and indigenous community if applicable) that approved the study and protocols, protocol number and date of approval, and procedures for working with animal hosts. Describe other permissions that were obtained (e.g., from a private landowner). Utilize Local Contexts Labels and Notices for indigenous data when applicable ( https://localcontexts.org ). List IACUC approval information if applicable. Include CITES ( https://www.fws.gov/international-affairs/cites ) or USDA APHIS ( https://www.aphis.usda.gov ) permits and information when applicable. The provided list of permits is not exhaustive, and significant research should be done by the team during the experimental design phase to ensure all relevant permits and permissions (and/or exceptions) are obtained. Depending on the journal, this information can be reported in the Acknowledgements as described in Item 14.0 or in the Supplementary Information (Item 16.0).",,,,,
+3.6,Environmental conditions & experimental treatments,"List the experimental treatment(s), the conditions the environment or host was exposed to, and the relevant history of the environmental site(s) (e.g., agriculture practice, overall land use history, treatments, climate, presence of wildfires).",MIxS; STREAMS,"List any relevant natural environmental conditions (e.g., weather), as well as the experimental treatments applied to the samples, hosts, or environments (e.g., drought regime in the field prior to collecting plants). Refer to the MIxS guidelines for specifics (e.g., experimental factor term;  https://doi.org/10.1038/ismej.2013.176 for built environments; https://doi.org/10.1094/PBIOMES-09-19-0051-P for agricultural samples; https://doi.org/10.1093/gigascience/giae071 for SIP).",,,,,
+3.7,Sample collection,"State how samples were collected and if any samples were immediately pooled, include information about specific sampling location and methods used.","MIxS: sample collection device (MIXS:0000002), sample collection method (MIXS:0001225); host body site","Provide information on where exactly samples were collected from (Reference Item 3.2 for geographic location of sampling) and how they were collected. Indicate if the sampling that was done was destructive or if an existing sample (e.g., museum specimen) was destroyed during the study. Describe the tools and methods used for sampling. If applicable, describe any methods or design choices that were employed to minimize sampling effects on the larger environment. Provide a reference to complete lab protocols if previously published elsewhere such as on protocols.io. Note any modifications of lab protocols and the reason for protocol modifications. Provide a diagram or schematic to indicate specific sampling locations if desired. For host associated studies, include specific information on host tissue or body site if applicable. Explain how the chosen host niche (e.g., rhizosphere) was defined and targeted, if applicable.
+Include instrument, sampling tool, and reagent information (catalogue #s, vendor).",,,,,
+3.8,Eligibility/selection criteria,"List any criteria for inclusion and exclusion of environments, hosts, or samples.",Modified STORMS,"If any environments, hosts, or samples were excluded, describe the inclusion or exclusion criteria and how many were excluded. Describe reasons for exclusion at each stage of the study.",,,,,
+3.9,Analytic sample size,"Explain how the final analytic sample size was calculated, including the number of controls, if relevant. ",Modified STORMS,"Include the number of host individuals included (if applicable) and the number of samples within the final analysis group. Clearly list the number of discrete samples at every stage. Consider use of a flow diagram describing the sample selection process (see template at https://stormsmicrobiome.org/figures ). Also state sample size in abstract if relevant.
+If power analysis was used to calculate sample size, describe those calculations. Refer to replicates as described in 6.2. ",,,,,
+4.0,Storage & preservation,"Describe how the samples were stored and preserved from immediately after sampling through the laboratory steps. Include information on long-term storage, especially if samples will be made publicly available.",Modified STORMS; MIxS slot: samp_store_temp,"State the amount of time between collection and storage and how the sample was preserved. The time it takes to collect a sample itself may also be reported as this can affect omics results. Indicate any preservation buffers or refrigeration/freezing used. Indicate if liquid nitrogen was used for flash freezing and indicate specifics about other freezing methods (e.g., put onto dry ice, put into a -80C freezer). Include the time between storage and use of the sample(s) including any acclimation time prior to experiments. If samples are reused e.g., from a museum collection, report what is known about storage and preservation history. Describe if and how the samples will be stored long-term and/or deposited into a repository (Item 17).
+Include vendor/catalogue #s for reagents (e.g., preservatives) if applicable.",,,,,
+4.1,Shipping,Describe how samples were transported or shipped to the laboratory.,STORMS,Include length of time from collection to shipment to receipt by the lab and if temperature control was used during shipping.,,,,,
+4.2,Extraction methods,"Provide extraction methods for nucleic acids, proteins, metabolites, lipids, or other biomolecules, including kit and protocol information.",Modified STORMS; MIXS:0000037 for nucleic acid extraction,"Provide a reference to complete lab protocols if previously published elsewhere such as on protocols.io. Denote any kit that was used for extraction. Describe any modifications to lab and kit protocols and the reason for protocol modifications. Ensure the referenced methods include all relevant extraction and clean-up information, including specifics on the cell lysis methods and buffers/reagents used.
+Include instrument and reagent information (catalogue #s, vendor) for kits or all chemicals used in custom protocols. ",,,,,
+4.3,Experiments & sample processing,"Describe any modifications or experiments performed on the samples (e.g., culturing, flow sorting, size selection, stable-isotope probing, surface sterilization, homogenization, filtering, purification of viral fraction, sub-sampling, density selection). ","MIxS (e.g., https://doi.org/10.1093/gigascience/giae071 ); STREAMS","Describe laboratory experiments performed with the samples and include experimental condition information (e.g., pH, temperature, anaerobic vs. aerobic). Indicate if the any culturing was done (and the relevant culturing conditions) or if the experiment was culture-independent. Describe any modifications made to the sample (e.g., for stable-isotope probing [SIP] experiments). Describe in detail how the samples were prepared for omics analysis (e.g., column used for metabolomics). Provide a reference to complete lab protocols if previously published elsewhere such as on protocols.io. Describe any modifications to lab and kit protocols and the reason for protocol modifications.
+Include instrument and reagent information (catalogue #s, vendor) if applicable.",,,,,
+4.4,Library preparation,Describe how sequencing libraries were prepared.,STREAMS,"Provide a reference to complete lab protocols if previously published elsewhere such as on protocols.io. Describe any modifications to lab and kit protocols and the reason for protocol modifications. Include instrument and reagent information (catalogue #s, vendor) if applicable.",,,,,
+4.5,"Depletion, enrichment, & multiplexing","Describe in detail any methods used for nucleic acid depletion or enrichment (e.g., host depletion, rRNA depletion, polyA selection, sequence-based enrichment, polynucleic acid clamps).",Modified STORMS; MIXS:0000038,"Provide a reference to complete lab protocols if previously published elsewhere such as on protocols.io. Note any modifications of lab protocols and the reason for protocol modifications. Include nucleic acid sequences for primers or depletion/enrichment strategies if applicable. Include information on barcodes and how samples were multiplexed. Provide pre-sequencing amplification strategy and specific primer information when appropriate (MIXS:0000038). State which samples were and were not subjected to these methods.
+Include instrument and reagent information (catalogue #s, vendor).",,,,,
+4.6,Primer selection,Provide primer selection and nucleic acid amplification methods as well as the target variable region (if applicable). ,MIxS: PCR primers MIXS:0000046,Provide a reference to complete lab protocols if previously published elsewhere such as on protocols.io. Note any modifications of lab protocols and the reason for protocol modifications. Reference the origin of the primers if they were previously developed. Provide the primer sequences in the Methods or Supplementary Information (Item 16.0).,,,,,
+5.0,Positive controls,"Describe any positive controls (e.g., mock communities, spike-ins, standards) if used. Denote what stage(s) in the experiment these were used. ",Modified STORMS,"If used, should be deposited under guidance provided in Item 8.2. Indicate if these were processed separately or added directly to the samples and what each standard or positive control was designed to assess. Describe if the positive controls are commercially available or were developed in-house, referencing any relevant protocols, publications, or catalogue/vendor information. Provide justification if no controls were used. Describe the standards used for metabolomics and proteomics. Performance of the controls should be reported throughout the Methods (e.g., Item 7.1).",,,,,
+5.1,Negative controls,Describe any negative controls that were used throughout the experiment(s). Denote what stage(s) in the experiment these were used.,Modified STORMS,"If used, should be deposited under guidance provided in Item 8.2. Negative controls/blanks are recommended to be included for as many steps as possible (e.g., sampling, culturing, extraction, PCR, kit assessment, library preparation, sequencing) to be used in the interpretations of contamination (Item 6.1) and the performance of the controls should be reported on (e.g., Item 7.1). Provide justification if no negative controls were used.",,,,,
+6.0,Quantification & quality assessments,"Describe the methods used to assess sample quality as well as nucleic acid quantity (via spectrophotometry (e.g. NanoDrop) or fluorescence (e.g. Qubit)). Describe if and how RNA quality and nucleic acid fragment size were measured via e.g., a Bioanalyzer. Provide information on metabolite, and/or protein quantity and quality.",STREAMS,"Provide a reference to complete lab protocols if previously published elsewhere such as on protocols.io. Note any modifications of lab protocols and the reason for protocol modifications. Provide any cutoffs used for sample exclusion based on quantity or quality.
+Include instrument and reagent information (catalogue #s, vendor).",,,,,
+6.1,Contaminant mitigation & identification,"Provide any laboratory methods used to control for or identify contamination from the environment, reagents, or laboratory.",STORMS,"Includes filtering of reagents, sterilization protocols, and other steps and laboratory methodologies utilized to minimize contamination. It is relevant to state whether the specimens of interest have low microbial load, which makes contamination especially relevant. Quality control of the data is described in Item 7.1.",,,,,
+6.2,Replication,"Describe any biological or technical replicates included in the experiment, including which specific steps replicates were used for.",STORMS; MIxS: biological sample replicate (samp_rep_biol) & technical sample replicate (samp_rep_tech),Replication may be biological (redundant biological specimens) or technical (aliquots taken at different stages of analysis) and throughout the study. Describe if and when replicates or their resulting data were pooled. Describe why certain replicates were included and include how they factor in to bioinformatic (Item 7.0) and statistical (Item 7.4) analyses. Provide justification if replicates were not included. ,,,,,
+6.3,Sequencing methods,"Describe the sequencing strategy or strategies (e.g., metagenome, metatranscriptome, amplicon sequencing; long-read or short-read); the sequencer(s), and sequencing methods/chemistry used.",MIxS: sequencing method (MIXS:0000050),"State the model(s) of sequencer(s) used. Include information on read length, sequencing depth per sample (average, standard deviation, and minimum for all samples), whether reads are paired, flow cell information (including version if applicable), and other parameters. For amplicon sequencing, state the region and primers selected (Item 4.6).
+Include instrument and reagent information (catalogue #s, vendor) and provide information on the institution or company that performed the sequencing (can be Acknowledged in Item 14.0).",,,,,
+6.4,"Metabolomics, metaproteomics, & other omics methods","Provide information about metabolomics, proteomics, and other omics methods and platforms.",Modified STORMS; Chemical Analysis Working Group (CAWG) Metabolomics Standards Initiative (MSI),"For proteomics, provide details on proteomic methods and platforms (e.g. LC-MS/MS, instrument type, column type, mass range, resolution, scan speed, maximum injection time, isolation window, normalized collision energy, and resolution). If applicable, detail which protease was used for digestion. For metabolomics, specify the analytic method used (such as nuclear magnetic resonance spectroscopy or mass spectrometry). For mass spectrometry, detail which fractions were obtained (polar and/or non polar) and how these were analyzed. Provide details on metabolomics methods and platforms (e.g. derivatization, instrument type, injection type, column type, column information, operating parameters, and instrument settings). For LC-MS, note which ion mode (+/-) the instrument was run on. Include all instrument and reagent information (catalogue #s, vendor) and provide information on the institution or company that performed these analyses (can be Acknowledge in Item 14.0). State the method of data acquisition: Data Dependent Acquisition (DDA) or Data Independent Acquisition (DIA). Discuss standards (e.g., reference standards) that were utilized. State if any samples were pooled for analysis. Metabolomics and other chemical analysis metadata should be reported in a standardized manner: https://doi.org/10.1007/s11306-007-0082-2  ",,,,,
+6.5,Contextual & linked datasets,"For other data and metadata generated, state how it was defined, how it was measured or collected, and any transformations applied to the variable prior to analysis. Provide any contextual data or other data linked or relevant to the study.",Modified STORMS,"Describe any linked or contextual data (e.g., geochemical) data and describe the techniques for obtaining and analyzing this data. Describe any other data types generated (e.g., imaging data). Describe how these other data types can be linked with the sample metadata and omics data (e.g., spreadsheets organized by sample ID). Refer to Item 3.5 for guidance on reporting indigenous land/water/sample use and for guidance on how to adhere to the CARE principles ( https://doi.org/10.5334/dsj-2020-043 ). Information on how to access these datasets should also be provided (Item 8.0).
+Note any linked sample(s) from an herbarium, museum collection, or repository.",,,,,
+6.6,Batch effects,"Discuss any likely sources of batch effects, if known, and any methods employed to minimize and correct them.",Modified STORMS,"Potential sources of batch effects include sample collection, storage, technician, library preparation, or sequencing and are commonly unavoidable in all but the smallest of studies. Describe how batch effects were evaluated and potentially corrected computationally or in the lab, including references to any software and code along with citations and repository links. Describe laboratory procedures used to minimize batch effects, (e.g., for performing extractions for a large number of samples, when working with long-term studies or samples from different continents). Detail any blocking or randomization used in study design to avoid confounding of batches with exposures or outcomes. ",,,,,
+7.0,Bioinformatics analyses,"Describe all bioinformatics steps including taxonomic, functional profiling, or other analyses performed. ",STREAMS,"Describe the methods, code and/or software (including versions, parameters, repository links, and citations), databases (Item 7.3), and protocols used for data analysis. Include this information for all steps performed (e.g., metagenome assembly, annotation, metagenome assembled genome (MAG) generation [can use MIMAG standards], taxonomic classification). Indicate if all data was processed in the same standardized manner, or provide a justification if there was variability. Include criteria and cutoffs for analyses e.g., ASV similarity clustering % and criteria for confident taxonomic assignments (including what tool/database/standard was used for taxonomic hierarchy classification (e.g., ICTV for viral classification), the taxonomy version number, the taxonomic level(s) used for classification, and any previously known taxonomic names if applicable). In cases the host genome was used (e.g., functional annotation, gene expression), provide information on the source and version of genomic data and licenses, if available (e.g., Phytozome, Setaria viridis version 2.1). Provide information on any references used for analyses (e.g., reference-based assembly) or indicate if assemblies were de novo. Indicate if the analyses performed were inclusive of all microbes or if biases or restrictions were added e.g., only used a database that contains bacterial genomes for taxonomy classification. Describe all computational steps performed for metabolite and protein identifiication including the databases used (Item 7.3) and the criteria for confident assignments. Metabolomic identifications should conform to community standards: https://pubs.acs.org/doi/10.1021/es5002105 ",,,,,
+7.1,Quality control,Describe any methods to identify or filter contamination or low quality reads. Report on results from positive and negative controls.,MIxS: sequence quality check (MIXS:0000051),"If samples or data were excluded based on quality or read depth, list the criteria and thresholds used, the number of samples excluded, and the final sample size after quality control. If applicable, report how host-associated reads were filtered out or any other filtering done to the data (e.g., to remove mitochondrial or chloroplast reads). Raw, unfiltered data should always be provided (Item 8.2). Describe any abundance cutoffs that were applied. Describe how data resulting from suspected contamination was identified and filtered out including how data from sampling, extraction, PCR, library prep, sequencing, and/or other controls were used for quality control. State if and how potential tag jumping was assessed. Include information on if samples or datasets were excluded due to the fact that they could not be identified or classified (e.g., taxonomically classified). If applicable, describe how MAG quality was determined and any relevant quality cutoffs. For metabolomics and metaproteomics, describe the baseline for detection, how alignment between runs was performed, and how low quality data was determined. Describe and cite any existing protocols, software, or code that was used along with the versions, parameters and thresholds, or provide detailed code or methods for how the quality control steps were performed. ",,,,,
+7.2,Normalization,"Describe any transformations to quantitative variables used in analyses (e.g. use of percentages instead of counts, normalization, rarefaction, categorization).",Modified STORMS,"Describe if and how any normalization was performed on the data. If a variable is analyzed using different transformations, state rationale for the transformation and for each analysis, and which version of the variable is used. Provide details on any rarifying that was performed.
+In case of any complex or multistep transformations, give enumerated instructions for reproducing those transformations. Provide software, workflow, and code information including links, DOIs, citations, versions, and parameters.",,,,,
+7.3,Database information,"Specify all databases (e.g., the taxonomic database, metabolite database, peptide database) and version(s) used to generate the omics data interpretations.",STREAMS,"Did the study attempt to classify bacteria, archaea, eukaryotes, viruses, mobile elements? Describe the taxonomic classification strategy and which databases were used to make those assignments. Describe any omics databases that were used to classify metabolites and proteins and the search criteria. Provide database version information, DOI if applicable, and date(s) of access. If custom databases were used, describe what they contained (e.g., accession numbers), how and when they were constructed, validated, and utilized and how they can be accessed. Note if multiple databases were utilized and how discrepancies were handled. Describe the exact software, tools, and code that were used to query and work with these databases including any relevant search parameters. Provide accession numbers and/or DOIs for specific datasets utilized from these databases for further analysis (e.g., references for phylogenetic tree building). ",,,,,
+7.4,Statistical methods,Describe all statistical methods.,Modified STROBE,"Describe any statistical tests used, exploratory data analysis performed, dimension reduction methods/unsupervised analyses, alpha/beta metrics, effect size calculations, and/or methods for adjusting for measurement bias. Provide the code that was utilized or specifics about how calculations were run whenever possible. Provide any formulas that were used. Cite existing software, workflows, packages, or code that were used and include version, repository, and/or citation information along with the parameters that were utilized.
+Include why the methods and metric(s) were chosen.
+If a multiple hypothesis testing correction method was used, describe the type of correction used. Describe any other transformations done to the dataset(s). Describe any subgroups that were used and how these affected statistical analyses (Item 7.7).",,,,,
+7.5,Missing data,Explain how missing data were addressed.,Modified STROBE,"""Missing data"" refers to sample measurements such as contextual metadata or time points that should have been collected but were not, or data points not applicable to the reported observations. Describe if or how zeroes that are reported are different from N/As. ",,,,,
+7.6,Bias & confounding variables,"Discuss potential for bias across your samples and data, if any, and discuss any potential confounding variables that may influence both the outcome and exposure of interest. State any variables controlled for and the rationale for controlling for them. Describe any methods that were used to minimize potential biases.",Modified STORMS,"State any sources of potential bias in measurements, for example multiple measurement instruments, multiple people performing extractions, and whether these potential biases were assessed or accounted for in study design. For animal hosts, selection bias can occur when some members of the target study population are more likely to be included in the study/final analytic sample than others. Some examples include survival bias (where part of the target study population is more likely to die before they can be studied), convenience sampling (where members of the target study population are not selected at random), and loss to follow-up (when probability of dropping out is related to one of the things being studied). Describe if and how positive controls or standards (Item 5) were used to evaluate potential biases (e.g., chosen methods are poor at identifying eukaryotes, or the limit of detection is higher than other reported protocols). Can be further discussed in the Discussion.
+For causal inference (if applicable): describe the assumptions that would be required to draw causal inferences from observational data. See https://doi.org/10.1038/s41586-020-2881-9",,,,,
+7.7,Subgroup analysis,Describe any methods used to examine subgroups.,Modified STROBE,"Provide justifications or reasoning as to why subgroups were separated. Describe how and why certain groups were separated: this can be on an experimental level (e.g., only looked at microbes targeted with stable-isotope probing) or in the analyses or statistics.",,,,,
+7.8,Sensitivity analyses,Describe any sensitivity analyses.,Modified STROBE,Sensitiviy analyses are techniques that examine how changes to variables affect outcomes. Include information on any simulations that were performed.,,,,,
+7.9,Criteria for significance,State criteria used to select findings for reporting and describe significance threshold(s).,Modified STORMS,"Explain methods used for assessing false discovery rate and why these were chosen. Discuss effect size threshold(s), significance threshold(s), and any other relevant metrics for determining significant results. ",,,,,
+8.0,Metadata access,"State where sample metadata such as demographics, environmental conditions, and other covariates may be accessed, and how they can be matched to the microbiome data. State where linked & contextual datasets (Item 6.5) may be accessed. ",Modified STORMS,"Describe any metadata standards that were followed and where this metadata can be accessed. Describe the naming schemes to match up each sample and dataset with its relevant metadata. If using a private repository, provide rationale (e.g., https://www.science.org/doi/10.1126/science.adr2493 ). Provide accession numbers, DOIs, and/or links whenever applicable.",,,,,
+8.1,Host data access (if applicable),"State where host data such as host genome(s) may be accessed, and how this data can be matched to the microbiome data.",Modified STORMS,"Describe where the host data can be found, and if this data was generated by another group and used for analysis (e.g., used a publicly available genome as the reference for this study's Arabidopsis RNAseq experiment), and/or if a pangenome was used (and describe details of construction). If data for the host was generated in the same experiment as the microbiome data (e.g., metabolome data or a deep metagenomic sequencing run), then indicate this in the repository entry. If using a private repository, provide rationale (e.g., https://www.science.org/doi/10.1126/science.adr2493 ). Describe how host metadata including phenotype information can be accessed and linked with host genome information. Provide accession numbers, DOIs, and/or links whenever applicable.",,,,,
+8.2,Raw data access,State where raw data may be accessed including demultiplexing information. ,Modified STORMS,"Report on due diligence performed to ensure data is meaningfully FAIR and adheres to CARE principles. Robust, long-term databases such as NCBI and EMBL-EBI are strongly preferred. If using a private repository, provide rationale (e.g., https://www.science.org/doi/10.1126/science.adr2493 ). Provide accession numbers, DOIs, and/or links whenever applicable.",,,,,
+8.3,Processed data access,"If applicable, state where processed data may be accessed. ",STORMS,"Data at various stages of processing should be made available whenever possible (e.g., assemblies, MAGs) and the processing 'stage' should be clearly noted in the text and associated with the datasets.
+Robust, long-term databases such as NCBI and EMBL-EBI (including PRIDE and MetaboLights) are preferred. Repositories like zenodo (https://zenodo.org/) or publisso ( https://www.publisso.de/en/working-for-you/doi-service/ ) can be used to provide a DOI and long-term storage for processed datasets, even those which cannot be published openly. Persistent Unique Identifiers are strongly encouraged. If using a private repository, provide rationale (e.g., https://www.science.org/doi/10.1126/science.adr2493 ). Provide accession numbers, DOIs, and/or links whenever applicable.",,,,,
+8.4,Software & source code access,"Cite all software, workflows, and code that were used and not specifically stated above. ",Modified STREGA; STORMS,"If a standard or formalized workflow was employed or modified, reference it here and describe how it was modified and state any specific functions used. Installed packages, add-ons, or libraries should also be stated (including versions and run information) and cited in addition to the software used. Links to GitHub or other code repositories should be included when applicable (including specific Git commit hash information if necessary). Citation information, DOIs, and other identifying information should be provided. All versions and parameters should be provided. Provide the actual scripts run whenever possible. Provide computational requirements for running software when applicable.
+Publish code generated for the study through public repositories and/or within the supplementary information. For code developed for this study, provide the link, DOI, and/or citation and provide information on the version, commit, and/or parameters run for this study and provide additional information needed to successfully run the code and recreate the analyses. Follow citation guidelines for GitHub: https://docs.github.com/en/repositories/archiving-a-github-repository/referencing-and-citing-content
+Provide links to Docker container images ( https://doi.org/10.1145/2723872.2723882 ), or other containerized versions of tools, software, and processes as well as other information that can increase reproducibility (e.g., Research Object [RO] crates https://www.researchobject.org/ro-crate/ ).",,,,,
+8.5,Reproducible research,Make a statement about if and how others can reproduce the reported methods and analyses.,STORMS,"Consider using a specialized rubric for reproducible research (such as: https://doi.org/10.1128/mbio.00525-18 ).
+Consider preregistering the study protocol and consider evaluating based on Open Science Indicators (such as on osf.io or https://plos.org/open-science-practice/ ).
+eNotebooks, tutorials, effective documentation, and annotated code can all improve reproducibility of analyses. Comments surrounding methodology performance can be reported on in the Methods, Results, or Discussion.",,,,,
+Results,,,,,,,,,
+9.0,Descriptive data,"Summarize characteristics of environments, hosts, samples, and information on relevant history (e.g., drought, fire), experiments, and potential confounders.",modified STROBE,"Can be reported in the text, in a table included in the paper or as supplementary information.
+This includes factors that may affect the relationship between the microbiome and the condition of interest. ",,,,,
+10.0,Microbiome sequencing data,"Report findings for microbiome analyses with all applicable outcomes and covariates. If applicable, include information on taxonomy, differential abundance analyses, or other analyses performed. ",STORMS,"Results of analyses beyond the primary scope of the study can be reported in supplementary materials (Item 16.0). Report on data analyzed and the significant results e.g. functional potential, MAG assemblies, and RNAseq data.
+Anything shared via an external site (e.g., FigShare, Zenodo) should have the publicly available DOIs reported in the manuscript. Include relevant positive and/or negative control results (from Items 5.0, 5.1, and 7.1). Describe how these omics types were matched or integrated with other omics datasets and how these results can be interpreted.",,,,,
+10.1,"Metabolomics, proteomics, & other omics data","Report results of metabolomics, proteomics, and other omics data analyses. ",Modified STORMS,"Include information on metabolites, proteins identified, results from other omics analyses, and any validations that were employed. Discuss the standards and controls that were utilized and how these results were assessed. Describe how these omics types were matched or integrated with other omics datasets and how these results can be interpreted.",,,,,
+10.2,Statistical analyses,Report the results of statistical data analyses and their significance.,STORMS,"This could include results from subgroup analyses, sensitivity analyses, and cluster analyses. Refer to Item 7.4 and the significance thresholds defined above (Item 7.9) for supporting conclusions. Comment on statistical power when applicable. ",,,,,
+10.3,"Figures, tables, & captions","Include accurate and clear figures, tables, and captions to communicate the results.",STREAMS,"Ensure captions clearly communicate the results, key methodological or statistical information, and refer to specific sample ID or accession numbers whenever possible. Ensure information on how the figures were generated is captured in the Methods, and any other relevant figures or tables (e.g., ASV tables) are publicly available and referenced or provided in the supplementary materials (Item 16.0). Photos of the environments, samples, or methodologies can provide important context and can be added to main text or supplemental figures. Visualizations should be easily interpretable and colorblind-friendly. The caption, main text, or supplementary information should provide enough of a description of the visualizations for visually-impaired readers to interpret findings. If software was used to generate figures this should be referenced in the Methods, and if illustrations or figures were generated with tools that need specific citations (e.g., from BioRender) it must be accurately credited in captions.",,,,,
+Discussion,,,,,,,,,
+11.0,Key results,Summarize key results with reference to study objectives.,Modified STROBE,Discuss if the results support the underlying hypotheses or how the results address the research question(s) of interest.,,,,,
+11.1,Interpretation,"Give a reasonable interpretation of results considering objectives, multiplicity of analyses, results from similar studies and comparisons between datasets, and other relevant evidence.",Modified STROBE,"Define or clarify any subjective terms such as ""dominant,"" ""dysbiosis,"" ""healthy"" ( https://doi.org/10.1038/s41579-024-01107-0 ) and similar words used in interpretation of results. Definitions may also need to be provided in the Introduction or Methods if they are directly used in hypotheses or research questions.
+When interpreting the findings, consider how the interpretation of the findings may be summarized or quoted for the general public such as in press releases or news articles.
+If causal language is used in the interpretation (such as ""alters,"" ""affects,"" ""results in,"" ""causes,"" or ""impacts""), assumptions made for causal inference should be explicitly stated.
+Distinguish between functional potential (e.g., inferred from metagenomic data) and observed activity (e.g., from metatranscriptomic, metabolomic, metaproteomic data) if discussing microbial function.",,,,,
+11.2,Limitations,"Discuss limitations of the study, taking into account sources of potential bias (Item 7.6) or imprecision.",STROBE,"Consider limitations resulting from the methods (especially novel methods), the study design, the spatiotemporal scope, and the sample size. ",,,,,
+11.3,Generalizability,Discuss the generalizability (external validity) of the study results.,Modified STROBE,To what populations or other settings do you expect the conclusions to generalize? How does this work compare to other publications or datasets in related areas?,,,,,
+12.0,Ongoing & future work,Describe potential future research or ongoing research based on the study's findings. ,STORMS,Describe what the next steps are and any limitations or expected challenges associated with future research in this area. ,,,,,
+13.0,Conclusions,State the overall conclusions arising from this work.,STREAMS,Describe how the results fit into the broader context of the field. This Conclusions section may be required to be a separate section within the manuscript.,,,,,
+Other information,,,,,,,,,
+14.0,Acknowledgements,Include acknowledgements of those who contributed to the research but did not meet criteria for authorship. Use the CRediT roles ( https://credit.niso.org ) for author contributions and determining authorship vs. an acknowledgement.,Modified STORMS,"Review journal guidelines for specific guidance regarding acknowledgements.
+For general guidelines on authorship, see http://www.icmje.org and https://www.elsevier.com/authors/journal-authors/policies-and-ethics/credit-author-statement
+Also acknowledge any relevant facilities or institutions e.g., lab space, field station that was used to perform the experiments, facility that was used for computing infrastructure.
+If applicable (and some journals require a separate data ethics section), provide acknowledgements for use of indigenous/first nation lands and/or samples as well as other information about ethical research practices as described in Item 3.5.",,,,,
+14.1,Funding,"Give the source of funding (award number) and the role of the funders for the present study and, if applicable, for the original study on which the present article is based.",STROBE,"Accurate funding information should be provided for each author when applicable. A standardized, machine-readable format should be used whenever possible (e.g., https://www.niso.org/publications/rp-37-2021-jats4r-funding-v1.3 ). Funder IDs from the Research Organization Registry ( https://ror.org ) can also be included if applicable.",,,,,
+15.0,Conflicts of interest,Include a conflicts of interest statement.,STORMS,"Follow journal specific guidelines for the formatting of the conflicts of interest statement. Ensure all potential perceived conflicts of interest are disclosed (e.g., owned licenses, affiliations with companies, journal editorial roles, other funding sources).",,,,,
+16.0,Supplementary data & files,Indicate where supplementary data and files may be accessed and what information they contain. ,Modified STORMS,"Anything shared via an external site (e.g., FigShare, Zenodo) should have the publicly available DOIs reported in the manuscript; Accession numbers should be provided for data deposited into repositories; Depending on the analysis performed, examples of the supplemental results included could be mean relative abundance, differential abundance, raw p-value, multiple hypothesis testing-adjusted p-values, and standard error. Include scripts or links to scripts that were run if applicable to increase reproducibility (Item 8.5). Environment and sample metadata can also be reported in supplementary files: provide linked accession numbers within spreadsheets when possible. Provide keys or data dictionaries for matching up the information in different files. Use a machine-readable, plain-text format such as csv or tsv whenever possible. Include licenses/permissions to utilize figures, images, illustrations, or other products from software, companies, other researchers, or journals. ",,,,,
+17.0,Sample & data availability,Provide a statement about how and where all data associated with the study can be accessed. Provide a statement on how any associated samples may be accessed.,STREAMS,"This is a required field by some journals. Provide all accession numbers, DOIs, and/or links for data in public repositories or in an external site (e.g., FigShare, Zenodo). Ensure data use policies are followed and all data generated by other researchers or groups is cited properly and also included in the references. Describe the naming scheme of the samples and data (and keys for matching names with publicly available datasets). If not all data or samples are publicly available, provide a justification as to why the data or samples cannot be made available (e.g., due to privacy concerns, indigenous data sovereignty, protections for endangered species). Provide information about how samples are stored, how they can be shared, and the process for sharing samples (e.g., contact the corresponding author to arrange for a material transfer agreement). If museum specimens were used, provide the full catalogue numbers along with the museum collection information and/or specific database links.",,,,,
+18.0,AI usage,"Describe if AI was used, the program and version that was used, and the exact ways it was utilized (e.g. literature review, code generation, data analysis, manuscript drafting, translation assistance). ",STREAMS,"Ensure that all uses of generative AI (e.g., for text and images) are clearly defined. Refer to specific publisher guidance: https://www.nature.com/nature-portfolio/editorial-policies/ai and/or guidance from funding agencies e.g., https://www.energy.gov/sites/default/files/2024-06/Generative%20AI%20Reference%20Guide%20v2%206-14-24.pdf for the kinds of AI usage that should be reported on and their specific reporting requirements. Guidelines for AI usage are still rather dynamic, so ensure up-to-date guidance has been followed. This information may also need to be reported on within the Methods. ",,,,,

aurelian/agents/checklist_agent.py ADDED Viewed

@@ -0,0 +1,40 @@
+"""
+Agent for validating papers against checklists, e.g STREAMS
+This module re-exports components from the checklist/ package for backward compatibility.
+"""
+from typing import Dict, List
+# Re-export from checklist package
+from aurelian.agents.checklist import (
+    checklist_agent,
+    add_checklists,
+    ChecklistDependencies,
+    get_config,
+    all_checklists,
+    retrieve_text_from_pmid,
+    retrieve_text_from_doi,
+    fetch_checklist,
+    chat,
+)
+# Re-export the older synchronous versions of functions for compatibility
+@checklist_agent.tool
+def retrieve_text_from_pmid_sync(ctx, pmid: str) -> str:
+    """Legacy synchronous version of retrieve_text_from_pmid"""
+    import asyncio
+    return asyncio.run(retrieve_text_from_pmid(ctx, pmid))
+@checklist_agent.tool
+def retrieve_text_from_doi_sync(ctx, doi: str) -> str:
+    """Legacy synchronous version of retrieve_text_from_doi"""
+    import asyncio
+    return asyncio.run(retrieve_text_from_doi(ctx, doi))
+@checklist_agent.tool
+def fetch_checklist_sync(ctx, checklist_id: str) -> str:
+    """Legacy synchronous version of fetch_checklist"""
+    import asyncio
+    return asyncio.run(fetch_checklist(ctx, checklist_id))

aurelian/agents/chemistry/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+"""
+Chemistry agent module for working with chemical structures.
+"""

aurelian/agents/chemistry/chemistry_agent.py ADDED Viewed

@@ -0,0 +1,46 @@
+"""
+Agent for working with chemical structures.
+Currently this is largely geared around interpreting chemical structures.
+"""
+from aurelian.agents.chemistry.chemistry_config import ChemistryDependencies
+from aurelian.agents.chemistry.chemistry_tools import (
+    draw_structure_and_interpret,
+    chebi_search_terms,
+    search_web_for_chemistry,
+    retrieve_chemistry_web_page
+)
+from aurelian.agents.filesystem.filesystem_tools import inspect_file, list_files
+from pydantic_ai import Agent, Tool
+# Import from dedicated image agent module to avoid circular imports
+from aurelian.agents.chemistry.image_agent import structure_image_agent
+SYSTEM = """
+You are an expert chemist specializing in chemical structures, reactions, and properties.
+You can help with:
+- Interpreting chemical structures (using ChEBI IDs or SMILES strings)
+- Answering questions about chemicals and their properties
+- Finding information about chemical structures in ChEBI ontology
+- General chemistry questions
+Always be precise in your chemical explanations, using IUPAC nomenclature and accurate terminology.
+"""
+chemistry_agent = Agent(
+    model="openai:gpt-4o",
+    deps_type=ChemistryDependencies,
+    system_prompt=SYSTEM,
+    tools=[
+        Tool(draw_structure_and_interpret),
+        Tool(chebi_search_terms),
+        Tool(search_web_for_chemistry),
+        Tool(retrieve_chemistry_web_page),
+        Tool(inspect_file),
+        Tool(list_files),
+    ]
+)
+# Remove the chat import to avoid circular imports
+# The chat function is directly available from chemistry_gradio.py