PyPI - aurelian - Versions diffs - 0.3.2__py3-none-any.whl - Mend

aurelian 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (254) hide show

aurelian/__init__.py +9 -0
aurelian/agents/__init__.py +0 -0
aurelian/agents/amigo/__init__.py +3 -0
aurelian/agents/amigo/amigo_agent.py +77 -0
aurelian/agents/amigo/amigo_config.py +85 -0
aurelian/agents/amigo/amigo_evals.py +73 -0
aurelian/agents/amigo/amigo_gradio.py +52 -0
aurelian/agents/amigo/amigo_mcp.py +152 -0
aurelian/agents/amigo/amigo_tools.py +152 -0
aurelian/agents/biblio/__init__.py +42 -0
aurelian/agents/biblio/biblio_agent.py +94 -0
aurelian/agents/biblio/biblio_config.py +40 -0
aurelian/agents/biblio/biblio_gradio.py +67 -0
aurelian/agents/biblio/biblio_mcp.py +115 -0
aurelian/agents/biblio/biblio_tools.py +164 -0
aurelian/agents/biblio_agent.py +46 -0
aurelian/agents/checklist/__init__.py +44 -0
aurelian/agents/checklist/checklist_agent.py +85 -0
aurelian/agents/checklist/checklist_config.py +28 -0
aurelian/agents/checklist/checklist_gradio.py +70 -0
aurelian/agents/checklist/checklist_mcp.py +86 -0
aurelian/agents/checklist/checklist_tools.py +141 -0
aurelian/agents/checklist/content/checklists.yaml +7 -0
aurelian/agents/checklist/content/streams.csv +136 -0
aurelian/agents/checklist_agent.py +40 -0
aurelian/agents/chemistry/__init__.py +3 -0
aurelian/agents/chemistry/chemistry_agent.py +46 -0
aurelian/agents/chemistry/chemistry_config.py +71 -0
aurelian/agents/chemistry/chemistry_evals.py +79 -0
aurelian/agents/chemistry/chemistry_gradio.py +50 -0
aurelian/agents/chemistry/chemistry_mcp.py +120 -0
aurelian/agents/chemistry/chemistry_tools.py +121 -0
aurelian/agents/chemistry/image_agent.py +15 -0
aurelian/agents/d4d/__init__.py +30 -0
aurelian/agents/d4d/d4d_agent.py +72 -0
aurelian/agents/d4d/d4d_config.py +46 -0
aurelian/agents/d4d/d4d_gradio.py +58 -0
aurelian/agents/d4d/d4d_mcp.py +71 -0
aurelian/agents/d4d/d4d_tools.py +157 -0
aurelian/agents/d4d_agent.py +64 -0
aurelian/agents/diagnosis/__init__.py +33 -0
aurelian/agents/diagnosis/diagnosis_agent.py +53 -0
aurelian/agents/diagnosis/diagnosis_config.py +48 -0
aurelian/agents/diagnosis/diagnosis_evals.py +76 -0
aurelian/agents/diagnosis/diagnosis_gradio.py +52 -0
aurelian/agents/diagnosis/diagnosis_mcp.py +141 -0
aurelian/agents/diagnosis/diagnosis_tools.py +204 -0
aurelian/agents/diagnosis_agent.py +28 -0
aurelian/agents/draw/__init__.py +3 -0
aurelian/agents/draw/draw_agent.py +39 -0
aurelian/agents/draw/draw_config.py +26 -0
aurelian/agents/draw/draw_gradio.py +50 -0
aurelian/agents/draw/draw_mcp.py +94 -0
aurelian/agents/draw/draw_tools.py +100 -0
aurelian/agents/draw/judge_agent.py +18 -0
aurelian/agents/filesystem/__init__.py +0 -0
aurelian/agents/filesystem/filesystem_config.py +27 -0
aurelian/agents/filesystem/filesystem_gradio.py +49 -0
aurelian/agents/filesystem/filesystem_mcp.py +89 -0
aurelian/agents/filesystem/filesystem_tools.py +95 -0
aurelian/agents/filesystem/py.typed +0 -0
aurelian/agents/github/__init__.py +0 -0
aurelian/agents/github/github_agent.py +83 -0
aurelian/agents/github/github_cli.py +248 -0
aurelian/agents/github/github_config.py +22 -0
aurelian/agents/github/github_gradio.py +152 -0
aurelian/agents/github/github_mcp.py +252 -0
aurelian/agents/github/github_tools.py +408 -0
aurelian/agents/github/github_tools.py.tmp +413 -0
aurelian/agents/goann/__init__.py +13 -0
aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines.md +1000 -0
aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines.pdf +0 -0
aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines_Paper.md +693 -0
aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines_Paper.pdf +0 -0
aurelian/agents/goann/goann_agent.py +90 -0
aurelian/agents/goann/goann_config.py +90 -0
aurelian/agents/goann/goann_evals.py +104 -0
aurelian/agents/goann/goann_gradio.py +62 -0
aurelian/agents/goann/goann_mcp.py +0 -0
aurelian/agents/goann/goann_tools.py +65 -0
aurelian/agents/gocam/__init__.py +43 -0
aurelian/agents/gocam/documents/DNA-binding transcription factor activity annotation guidelines.docx +0 -0
aurelian/agents/gocam/documents/DNA-binding transcription factor activity annotation guidelines.pdf +0 -0
aurelian/agents/gocam/documents/DNA-binding_transcription_factor_activity_annotation_guidelines.md +100 -0
aurelian/agents/gocam/documents/E3 ubiquitin ligases.docx +0 -0
aurelian/agents/gocam/documents/E3 ubiquitin ligases.pdf +0 -0
aurelian/agents/gocam/documents/E3_ubiquitin_ligases.md +134 -0
aurelian/agents/gocam/documents/GO-CAM annotation guidelines README.docx +0 -0
aurelian/agents/gocam/documents/GO-CAM annotation guidelines README.pdf +0 -0
aurelian/agents/gocam/documents/GO-CAM modelling guidelines TO DO.docx +0 -0
aurelian/agents/gocam/documents/GO-CAM modelling guidelines TO DO.pdf +0 -0
aurelian/agents/gocam/documents/GO-CAM_annotation_guidelines_README.md +1 -0
aurelian/agents/gocam/documents/GO-CAM_modelling_guidelines_TO_DO.md +3 -0
aurelian/agents/gocam/documents/How to annotate complexes in GO-CAM.docx +0 -0
aurelian/agents/gocam/documents/How to annotate complexes in GO-CAM.pdf +0 -0
aurelian/agents/gocam/documents/How to annotate molecular adaptors.docx +0 -0
aurelian/agents/gocam/documents/How to annotate molecular adaptors.pdf +0 -0
aurelian/agents/gocam/documents/How to annotate sequestering proteins.docx +0 -0
aurelian/agents/gocam/documents/How to annotate sequestering proteins.pdf +0 -0
aurelian/agents/gocam/documents/How_to_annotate_complexes_in_GO-CAM.md +29 -0
aurelian/agents/gocam/documents/How_to_annotate_molecular_adaptors.md +31 -0
aurelian/agents/gocam/documents/How_to_annotate_sequestering_proteins.md +42 -0
aurelian/agents/gocam/documents/Molecular adaptor activity.docx +0 -0
aurelian/agents/gocam/documents/Molecular adaptor activity.pdf +0 -0
aurelian/agents/gocam/documents/Molecular carrier activity.docx +0 -0
aurelian/agents/gocam/documents/Molecular carrier activity.pdf +0 -0
aurelian/agents/gocam/documents/Molecular_adaptor_activity.md +51 -0
aurelian/agents/gocam/documents/Molecular_carrier_activity.md +41 -0
aurelian/agents/gocam/documents/Protein sequestering activity.docx +0 -0
aurelian/agents/gocam/documents/Protein sequestering activity.pdf +0 -0
aurelian/agents/gocam/documents/Protein_sequestering_activity.md +50 -0
aurelian/agents/gocam/documents/Signaling receptor activity annotation guidelines.docx +0 -0
aurelian/agents/gocam/documents/Signaling receptor activity annotation guidelines.pdf +0 -0
aurelian/agents/gocam/documents/Signaling_receptor_activity_annotation_guidelines.md +187 -0
aurelian/agents/gocam/documents/Transcription coregulator activity.docx +0 -0
aurelian/agents/gocam/documents/Transcription coregulator activity.pdf +0 -0
aurelian/agents/gocam/documents/Transcription_coregulator_activity.md +36 -0
aurelian/agents/gocam/documents/Transporter activity annotation annotation guidelines.docx +0 -0
aurelian/agents/gocam/documents/Transporter activity annotation annotation guidelines.pdf +0 -0
aurelian/agents/gocam/documents/Transporter_activity_annotation_annotation_guidelines.md +43 -0
Regulatory Processes in GO-CAM.docx +0 -0
Regulatory Processes in GO-CAM.pdf +0 -0
aurelian/agents/gocam/documents/WIP_-_Regulation_and_Regulatory_Processes_in_GO-CAM.md +31 -0
aurelian/agents/gocam/documents/md/DNA-binding_transcription_factor_activity_annotation_guidelines.md +131 -0
aurelian/agents/gocam/documents/md/E3_ubiquitin_ligases.md +166 -0
aurelian/agents/gocam/documents/md/GO-CAM_annotation_guidelines_README.md +1 -0
aurelian/agents/gocam/documents/md/GO-CAM_modelling_guidelines_TO_DO.md +5 -0
aurelian/agents/gocam/documents/md/How_to_annotate_complexes_in_GO-CAM.md +28 -0
aurelian/agents/gocam/documents/md/How_to_annotate_molecular_adaptors.md +19 -0
aurelian/agents/gocam/documents/md/How_to_annotate_sequestering_proteins.md +38 -0
aurelian/agents/gocam/documents/md/Molecular_adaptor_activity.md +52 -0
aurelian/agents/gocam/documents/md/Molecular_carrier_activity.md +59 -0
aurelian/agents/gocam/documents/md/Protein_sequestering_activity.md +52 -0
aurelian/agents/gocam/documents/md/Signaling_receptor_activity_annotation_guidelines.md +271 -0
aurelian/agents/gocam/documents/md/Transcription_coregulator_activity.md +54 -0
aurelian/agents/gocam/documents/md/Transporter_activity_annotation_annotation_guidelines.md +38 -0
aurelian/agents/gocam/documents/md/WIP_-_Regulation_and_Regulatory_Processes_in_GO-CAM.md +39 -0
aurelian/agents/gocam/documents/pandoc_md/Signaling_receptor_activity_annotation_guidelines.md +334 -0
aurelian/agents/gocam/gocam_agent.py +240 -0
aurelian/agents/gocam/gocam_config.py +85 -0
aurelian/agents/gocam/gocam_curator_agent.py +46 -0
aurelian/agents/gocam/gocam_evals.py +67 -0
aurelian/agents/gocam/gocam_gradio.py +89 -0
aurelian/agents/gocam/gocam_mcp.py +224 -0
aurelian/agents/gocam/gocam_tools.py +294 -0
aurelian/agents/linkml/__init__.py +0 -0
aurelian/agents/linkml/linkml_agent.py +62 -0
aurelian/agents/linkml/linkml_config.py +48 -0
aurelian/agents/linkml/linkml_evals.py +66 -0
aurelian/agents/linkml/linkml_gradio.py +45 -0
aurelian/agents/linkml/linkml_mcp.py +186 -0
aurelian/agents/linkml/linkml_tools.py +102 -0
aurelian/agents/literature/__init__.py +3 -0
aurelian/agents/literature/literature_agent.py +55 -0
aurelian/agents/literature/literature_config.py +35 -0
aurelian/agents/literature/literature_gradio.py +52 -0
aurelian/agents/literature/literature_mcp.py +174 -0
aurelian/agents/literature/literature_tools.py +182 -0
aurelian/agents/monarch/__init__.py +25 -0
aurelian/agents/monarch/monarch_agent.py +44 -0
aurelian/agents/monarch/monarch_config.py +45 -0
aurelian/agents/monarch/monarch_gradio.py +51 -0
aurelian/agents/monarch/monarch_mcp.py +65 -0
aurelian/agents/monarch/monarch_tools.py +113 -0
aurelian/agents/oak/__init__.py +0 -0
aurelian/agents/oak/oak_config.py +27 -0
aurelian/agents/oak/oak_gradio.py +57 -0
aurelian/agents/ontology_mapper/__init__.py +31 -0
aurelian/agents/ontology_mapper/ontology_mapper_agent.py +56 -0
aurelian/agents/ontology_mapper/ontology_mapper_config.py +50 -0
aurelian/agents/ontology_mapper/ontology_mapper_evals.py +108 -0
aurelian/agents/ontology_mapper/ontology_mapper_gradio.py +58 -0
aurelian/agents/ontology_mapper/ontology_mapper_mcp.py +81 -0
aurelian/agents/ontology_mapper/ontology_mapper_tools.py +147 -0
aurelian/agents/phenopackets/__init__.py +3 -0
aurelian/agents/phenopackets/phenopackets_agent.py +58 -0
aurelian/agents/phenopackets/phenopackets_config.py +72 -0
aurelian/agents/phenopackets/phenopackets_evals.py +99 -0
aurelian/agents/phenopackets/phenopackets_gradio.py +55 -0
aurelian/agents/phenopackets/phenopackets_mcp.py +178 -0
aurelian/agents/phenopackets/phenopackets_tools.py +127 -0
aurelian/agents/rag/__init__.py +40 -0
aurelian/agents/rag/rag_agent.py +83 -0
aurelian/agents/rag/rag_config.py +80 -0
aurelian/agents/rag/rag_gradio.py +67 -0
aurelian/agents/rag/rag_mcp.py +107 -0
aurelian/agents/rag/rag_tools.py +189 -0
aurelian/agents/rag_agent.py +54 -0
aurelian/agents/robot/__init__.py +0 -0
aurelian/agents/robot/assets/__init__.py +3 -0
aurelian/agents/robot/assets/template.md +384 -0
aurelian/agents/robot/robot_config.py +25 -0
aurelian/agents/robot/robot_gradio.py +46 -0
aurelian/agents/robot/robot_mcp.py +100 -0
aurelian/agents/robot/robot_ontology_agent.py +139 -0
aurelian/agents/robot/robot_tools.py +50 -0
aurelian/agents/talisman/__init__.py +3 -0
aurelian/agents/talisman/talisman_agent.py +126 -0
aurelian/agents/talisman/talisman_config.py +66 -0
aurelian/agents/talisman/talisman_gradio.py +50 -0
aurelian/agents/talisman/talisman_mcp.py +168 -0
aurelian/agents/talisman/talisman_tools.py +720 -0
aurelian/agents/ubergraph/__init__.py +40 -0
aurelian/agents/ubergraph/ubergraph_agent.py +71 -0
aurelian/agents/ubergraph/ubergraph_config.py +79 -0
aurelian/agents/ubergraph/ubergraph_gradio.py +48 -0
aurelian/agents/ubergraph/ubergraph_mcp.py +69 -0
aurelian/agents/ubergraph/ubergraph_tools.py +118 -0
aurelian/agents/uniprot/__init__.py +37 -0
aurelian/agents/uniprot/uniprot_agent.py +43 -0
aurelian/agents/uniprot/uniprot_config.py +43 -0
aurelian/agents/uniprot/uniprot_evals.py +99 -0
aurelian/agents/uniprot/uniprot_gradio.py +48 -0
aurelian/agents/uniprot/uniprot_mcp.py +168 -0
aurelian/agents/uniprot/uniprot_tools.py +136 -0
aurelian/agents/web/__init__.py +0 -0
aurelian/agents/web/web_config.py +27 -0
aurelian/agents/web/web_gradio.py +48 -0
aurelian/agents/web/web_mcp.py +50 -0
aurelian/agents/web/web_tools.py +108 -0
aurelian/chat.py +23 -0
aurelian/cli.py +800 -0
aurelian/dependencies/__init__.py +0 -0
aurelian/dependencies/workdir.py +78 -0
aurelian/mcp/__init__.py +0 -0
aurelian/mcp/amigo_mcp_test.py +86 -0
aurelian/mcp/config_generator.py +123 -0
aurelian/mcp/example_config.json +43 -0
aurelian/mcp/generate_sample_config.py +37 -0
aurelian/mcp/gocam_mcp_test.py +126 -0
aurelian/mcp/linkml_mcp_tools.py +190 -0
aurelian/mcp/mcp_discovery.py +87 -0
aurelian/mcp/mcp_test.py +31 -0
aurelian/mcp/phenopackets_mcp_test.py +103 -0
aurelian/tools/__init__.py +0 -0
aurelian/tools/web/__init__.py +0 -0
aurelian/tools/web/url_download.py +51 -0
aurelian/utils/__init__.py +0 -0
aurelian/utils/async_utils.py +15 -0
aurelian/utils/data_utils.py +32 -0
aurelian/utils/documentation_manager.py +59 -0
aurelian/utils/doi_fetcher.py +238 -0
aurelian/utils/ontology_utils.py +68 -0
aurelian/utils/pdf_fetcher.py +23 -0
aurelian/utils/process_logs.py +100 -0
aurelian/utils/pubmed_utils.py +238 -0
aurelian/utils/pytest_report_to_markdown.py +67 -0
aurelian/utils/robot_ontology_utils.py +112 -0
aurelian/utils/search_utils.py +95 -0
aurelian-0.3.2.dist-info/LICENSE +22 -0
aurelian-0.3.2.dist-info/METADATA +105 -0
aurelian-0.3.2.dist-info/RECORD +254 -0
aurelian-0.3.2.dist-info/WHEEL +4 -0
aurelian-0.3.2.dist-info/entry_points.txt +3 -0

aurelian/utils/ontology_utils.py ADDED Viewed

@@ -0,0 +1,68 @@
+import logfire
+import pystow
+from cachetools.func import lru_cache
+from linkml_store.api import Collection
+from linkml_store.api.stores.duckdb import DuckDBDatabase
+from linkml_store.index import LLMIndexer
+from oaklib import BasicOntologyInterface, get_adapter
+llm_indexer = LLMIndexer()
+@lru_cache
+def get_collection_for_adapter(handle: str, name: str) -> Collection:
+    """
+    Retrieve or create a cached ontology collection.
+    Args:
+        handle (str): The ontology handle (e.g., `sqlite:obo:uberon`).
+        name (str): The name of the ontology (e.g., `uberon`).
+    Returns:
+        Collection: The indexed ontology collection.
+    """
+    adapter = get_adapter(handle)
+    cache_dir = pystow.join("aurelian", "indexes")
+    duckdb_path = str(cache_dir / f"{name}.duckdb")
+    database = DuckDBDatabase(duckdb_path)
+    collection = database.get_collection(name, create_if_not_exists=True)
+    if collection.size() > 0:
+        return collection
+    objs = [{"id": id, "label": lbl} for id, lbl in adapter.labels(adapter.entities())]
+    collection.insert(objs)
+    return collection
+def search_ontology(adapter: BasicOntologyInterface, query: str, limit=10):
+    """
+    Search the ontology for the given query term.
+    Example:
+        >>> from oaklib import get_adapter
+        >>> adapter = get_adapter("sqlite:obo:uberon")
+        >>> terms = search_ontology(adapter, "manus")
+        >>> assert len(terms) > 1
+        >>> terms = search_ontology(adapter, "l~digit", limit=5)
+        >>> assert len(terms) == 5
+    Args:
+        adapter (BasicOntologyInterface): The ontology adapter.
+        query (str): The query term.
+        limit (int): The maximum number of search results to return.
+    Returns:
+        List[Tuple[str, str]]: A list of tuples containing ontology term IDs and labels.
+    """
+    scheme = adapter.resource.scheme
+    name = adapter.resource.slug
+    local_name = name.split(":")[-1]
+    handle = f"{scheme}:{name}"
+    collection = get_collection_for_adapter(handle, local_name)
+    with logfire.span("search_ontology {name} {query}", name=name, query=query):
+        print(f"Searching {scheme}:{name} for {query}")
+        qr = collection.search(query, limit=limit, index_name="llm")
+        objs = [(obj["id"], obj["label"]) for obj in qr.rows]
+    return objs

aurelian/utils/pdf_fetcher.py ADDED Viewed

@@ -0,0 +1,23 @@
+import tempfile
+import requests
+from pdfminer.high_level import extract_text
+def extract_text_from_pdf(pdf_url: str) -> str:
+    """
+    Download and extract text from a PDF given its URL, using a temporary file.
+    """
+    response = requests.get(pdf_url)
+    if response.status_code != 200:
+        return "Error: Unable to retrieve PDF."
+    try:
+        with tempfile.NamedTemporaryFile(suffix=".pdf", delete=True) as temp_pdf:
+            temp_pdf.write(response.content)
+            temp_pdf.flush()  # Ensure all data is written before reading
+            text = extract_text(temp_pdf.name)
+            return text.strip() if text else "Error: No text extracted from PDF."
+    except Exception as e:
+        return f"Error extracting PDF text: {e}"

aurelian/utils/process_logs.py ADDED Viewed

@@ -0,0 +1,100 @@
+import json
+from pathlib import Path
+from collections import defaultdict
+import re
+def parse_reportlog(log_path: str):
+    """Parse pytest-reportlog output into structured format."""
+    tests = defaultdict(dict)
+    with open(log_path) as f:
+        for line in f:
+            entry = json.loads(line)
+            # Only process TestReport entries
+            if entry.get('$report_type') != 'TestReport':
+                continue
+            nodeid = entry['nodeid']
+            # Store test outcome
+            if 'outcome' in entry:
+                tests[nodeid]['outcome'] = entry['outcome']
+            # Store duration
+            if 'duration' in entry:
+                tests[nodeid]['duration'] = entry['duration']
+            # Convert user_properties to dict
+            if 'user_properties' in entry:
+                props = dict(entry['user_properties'])
+                tests[nodeid]['properties'] = props
+            # Store parameters from nodeid
+            # Extract from something like: test_search_ontology[sqlite:obo:bfo-3D spatial-10-expected0]
+            if '[' in nodeid:
+                param_str = nodeid[nodeid.index('[') + 1:nodeid.rindex(']')]
+                # You might want to customize this parsing based on your parameter format
+                tests[nodeid]['parameters'] = param_str
+    return tests
+def generate_markdown(tests):
+    """Convert test results to markdown documentation."""
+    md = []
+    md.append("# Test Results Documentation\n")
+    # Group tests by their base function name
+    test_groups = defaultdict(list)
+    for nodeid, data in tests.items():
+        # Split nodeid into parts: path::function[params]
+        base_name = nodeid.split('::')[1].split('[')[0] if '[' in nodeid else nodeid.split('::')[1]
+        test_groups[base_name].append((nodeid, data))
+    for base_name, group in test_groups.items():
+        md.append(f"## {base_name}\n")
+        # Create table for all test runs
+        md.append("### Test Runs\n")
+        # Headers: Parameters, Properties, Duration, Outcome
+        md.append('| Parameters | Properties | Duration (s) | Outcome |')
+        md.append('|------------|------------|-------------|---------|')
+        for nodeid, data in group:
+            # Extract parameters from nodeid
+            params = nodeid.split('[')[1].rstrip(']') if '[' in nodeid else ''
+            # Format properties
+            props = data.get('properties', {})
+            props_str = '; '.join(f"{k}: {v}" for k, v in props.items())
+            # Format duration
+            duration = f"{data.get('duration', 0):.3f}"
+            row = [
+                params,
+                props_str,
+                duration,
+                data.get('outcome', '')
+            ]
+            md.append('| ' + ' | '.join(str(cell) for cell in row) + ' |')
+        md.append('')
+    return '\n'.join(md)
+# Example usage:
+if __name__ == '__main__':
+    # Assume report.jsonl exists from running:
+    # pytest test_examples.py --report-log=report.jsonl
+    log_path = Path('report.jsonl')
+    tests = parse_reportlog(log_path)
+    markdown = generate_markdown(tests)
+    # Write markdown to file
+    with open('docs/unit_tests.md', 'w') as f:
+        f.write(markdown)

aurelian/utils/pubmed_utils.py ADDED Viewed

@@ -0,0 +1,238 @@
+import re
+from typing import Optional
+import requests
+from bs4 import BeautifulSoup
+from aurelian.utils.doi_fetcher import DOIFetcher
+BIOC_URL = "https://www.ncbi.nlm.nih.gov/research/bionlp/RESTful/pmcoa.cgi/BioC_xml/{pmid}/ascii"
+PUBMED_EUTILS_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=pubmed&id={pmid}&retmode=xml"
+EFETCH_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id={pmid}&retmode=xml"
+DOI_PATTERN = r"/(10\.\d{4,9}/[\w\-.]+)"
+doi_fetcher = DOIFetcher()
+def extract_doi_from_url(url: str) -> Optional[str]:
+    """Extracts the DOI from a given journal URL.
+    Args:
+        url (str): The URL of the article.
+    Returns:
+        str: The extracted DOI if found, otherwise an empty string.
+    """
+    doi_match = re.search(DOI_PATTERN, url)
+    return doi_match.group(1) if doi_match else None
+def doi_to_pmid(doi: str) -> Optional[str]:
+    """Converts a DOI to a PMID using the NCBI ID Converter API.
+    Args:
+        doi (str): The DOI to be converted.
+    Returns:
+        str: The corresponding PMID if found, otherwise an empty string.
+    """
+    API_URL = f"https://www.ncbi.nlm.nih.gov/pmc/utils/idconv/v1.0/?ids={doi}&format=json"
+    response = requests.get(API_URL).json()
+    records = response.get("records", [])
+    pmid = records[0].get("pmid", None) if records else None
+    return pmid
+def get_doi_text(doi: str) -> str:
+    """Fetch the full text of an article using a DOI.
+    TODO: non pubmed sources
+    Example:
+        >>> doi = "10.1128/msystems.00045-18"
+        >>> full_text = get_doi_text(doi)
+        >>> assert "Populus Microbiome" in full_text
+    Args:
+        doi: The DOI of the article.
+    Returns:
+        The full text of the article if available, otherwise an empty string.
+    """
+    pmid = doi_to_pmid(doi)
+    if not pmid:
+        info = doi_fetcher.get_full_text(doi)
+        if info:
+            return info
+        else:
+            return f"PMID not found for {doi} and not available via unpaywall"
+    return get_pmid_text(pmid)
+def get_pmid_from_pmcid(pmcid):
+    """Fetch the PMID from a PMC ID using the Entrez E-utilities `esummary`.
+    Example:
+        >>> pmcid = "PMC5048378"
+        >>> pmid = get_pmid_from_pmcid(pmcid)
+        >>> print(pmid)
+        27629041
+    Args:
+        pmcid:
+    Returns:
+    """
+    if ":" in pmcid:
+        pmcid = pmcid.split(":")[1]
+    url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi"
+    params = {"db": "pmc", "id": pmcid.replace("PMC", ""), "retmode": "json"}  # Remove "PMC" prefix if included
+    response = requests.get(url, params=params)
+    data = response.json()
+    # Extract PMID
+    try:
+        uid = data["result"]["uids"][0]  # Extract the UID
+        article_ids = data["result"][uid]["articleids"]  # Get article IDs
+        for item in article_ids:
+            if item["idtype"] == "pmid":
+                return item["value"]
+    except KeyError:
+        return "PMID not found"
+def get_pmcid_text(pmcid: str) -> str:
+    """Fetch full text from PubMed Central Open Access BioC XML.
+    Example:
+        >>> pmcid = "PMC5048378"
+        >>> full_text = get_pmcid_text(pmcid)
+        >>> assert "integrated stress response (ISR)" in full_text
+    Args:
+        pmcid:
+    Returns:
+    """
+    pmid = get_pmid_from_pmcid(pmcid)
+    return get_pmid_text(pmid)
+def get_pmid_text(pmid: str) -> str:
+    """Fetch full text from PubMed Central Open Access BioC XML.
+    If full text is not available, fallback to fetching the abstract from PubMed.
+    Example:
+        >>> pmid = "11"
+        >>> full_text = get_pmid_text(pmid)
+        >>> print(full_text)
+        Identification of adenylate cyclase-coupled beta-adrenergic receptors with radiolabeled beta-adrenergic antagonists.
+        <BLANKLINE>
+        No abstract available
+    Args:
+        pmid: PubMed ID of the article.
+    Returns:
+        The full text of the article if available, otherwise the abstract.
+    """
+    if ":" in pmid:
+        pmid = pmid.split(":")[1]
+    text = get_full_text_from_bioc(pmid)
+    if not text:
+        doi = pmid_to_doi(pmid)
+        if doi:
+            text = doi_fetcher.get_full_text(doi)
+    if not text:
+        text = get_abstract_from_pubmed(pmid)
+    return text
+def pmid_to_doi(pmid: str) -> Optional[str]:
+    if ":" in pmid:
+        pmid = pmid.split(":")[1]
+    url = f"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=pubmed&id={pmid}&retmode=json"
+    response = requests.get(url)
+    data = response.json()
+    try:
+        article_info = data["result"][str(pmid)]
+        for aid in article_info["articleids"]:
+            if aid["idtype"] == "doi":
+                return aid["value"]
+        elocationid = article_info.get("elocationid", "")
+        if elocationid.startswith("10."):  # DOI starts with "10."
+            return elocationid
+        else:
+            return None
+    except KeyError:
+        return None
+def get_full_text_from_bioc(pmid: str) -> str:
+    """Fetch full text from PubMed Central Open Access BioC XML.
+    Example:
+        >>> pmid = "17299597"
+        >>> full_text = get_full_text_from_bioc(pmid)
+        >>> assert "Evolution of biological complexity." in full_text
+    Args:
+        pmid: PubMed ID of the article.
+    Returns:
+        The full text of the article if available, otherwise an empty string.
+    """
+    response = requests.get(BIOC_URL.format(pmid=pmid))
+    if response.status_code != 200:
+        return ""  # Return empty string if request fails
+    soup = BeautifulSoup(response.text, "xml")
+    # Extract ONLY text from <text> tags within <passage>
+    text_sections = [text_tag.get_text() for text_tag in soup.find_all("text")]
+    full_text = "\n".join(text_sections).strip()
+    return full_text
+def get_abstract_from_pubmed(pmid: str) -> str:
+    """Fetch the title and abstract of an article from PubMed using Entrez E-utilities `efetch`.
+    Example:
+        >>> pmid = "31653696"
+        >>> abstract = get_abstract_from_pubmed(pmid)
+        >>> assert "The apparent deglycase activity of DJ-1" in abstract
+    Args:
+        pmid: PubMed ID of the article.
+    Returns:
+        The title and abstract text if available, otherwise an empty string.
+    """
+    response = requests.get(EFETCH_URL.format(pmid=pmid))
+    if response.status_code != 200:
+        return ""
+    soup = BeautifulSoup(response.text, "xml")
+    # Extract title
+    title_tag = soup.find("ArticleTitle")
+    title = title_tag.get_text().strip() if title_tag else "No title available"
+    # Extract abstract (may contain multiple sections)
+    abstract_tags = soup.find_all("AbstractText")
+    abstract = "\n".join(tag.get_text().strip() for tag in abstract_tags) if abstract_tags else "No abstract available"
+    return f"{title}\n\n{abstract}"

aurelian/utils/pytest_report_to_markdown.py ADDED Viewed

@@ -0,0 +1,67 @@
+import json
+from pathlib import Path
+from collections import defaultdict
+import re
+from typing import Iterator
+import click
+def report_md(log_path: str) -> str:
+    return '\n'.join(list(report_md_iter(log_path)))
+def report_md_iter(log_path: str) -> Iterator[str]:
+    """
+    Parse pytest-reportlog output into structured format.
+    Args:
+        log_path:
+    Returns:
+    """
+    with open(log_path) as f:
+        outcome = None
+        duration = None
+        for line in f:
+            entry = json.loads(line)
+            # Only process TestReport entries
+            if entry.get('$report_type') != 'TestReport':
+                continue
+            nodeid = entry['nodeid']
+            outcome = entry.get('outcome')
+            duration = entry.get('duration')
+            if not outcome:
+                continue
+            yield f"## {nodeid}\n"
+            for p in entry.get('user_properties', []):
+                k = p[0]
+                v = p[1]
+                yield f"### {k}\n\n"
+                yield f"{v}\n"
+        yield "## Stats\n\n"
+        if outcome:
+            yield f"* Outcome: {outcome}\n"
+        if duration:
+            yield f"* Duration: {duration}\n"
+@click.command()
+@click.argument("log_path", type=click.Path(exists=True))
+def main(log_path: str):
+    markdown = report_md(log_path)
+    print(markdown)
+if __name__ == "__main__":
+    main()

aurelian/utils/robot_ontology_utils.py ADDED Viewed

@@ -0,0 +1,112 @@
+from typing import Dict, Optional, List, Tuple
+from aurelian.dependencies.workdir import WorkDir
+MERGED_IMPORT_PATH = "_imports_.owl"
+def run(cmd: str):
+    """
+    Run a command, raising an error if the command fails,
+    returning stdout
+    Args:
+        cmd:
+    Returns:
+    """
+    import subprocess
+    result = subprocess.run(cmd, shell=True, capture_output=True)
+    if result.returncode != 0:
+        stdout = result.stdout.decode()
+        stderr = result.stderr.decode()
+        raise Exception(f"Command failed: {cmd}\nError: {stderr}\nOutput: {stdout}")
+    return result.stdout.decode()
+def parse_component_name(name: str) -> Tuple[str, Optional[str]]:
+    """
+    Parse file name
+    Example:
+        >>> parse_component_name("foo.owl")
+        ('foo', 'owl')
+        >>> parse_component_name("foo")
+        ('foo', None)
+    Args:
+        name:
+    Returns:
+    """
+    parts = name.split(".")
+    if len(parts) == 1:
+        return name, None
+    return ".".join(parts[:-1]), parts[-1]
+def depends_on_csv(workdir: WorkDir, name: str) -> Optional[str]:
+    base, suffix = parse_component_name(name)
+    if not suffix:
+        suffix = "owl"
+        base = name
+    if suffix == "owl":
+        for d_suffix in ("tsv", "csv"):
+            d_name = f"{base}.{d_suffix}"
+            if workdir.check_file_exists(d_name):
+                return d_name
+    return None
+def run_robot_template_command(workdir: WorkDir, template_path: str, prefix_map: Dict[str, str], output_path: Optional[str] = None, import_ontologies: Optional[List[str]] = None) -> str:
+    """
+    Generate a robot template command
+    Args:
+        workdir:
+        template_path:
+        prefix_map:
+        output_path:
+        import_ontologies:
+    Returns:
+    """
+    if output_path is None:
+        output_path = template_path.replace(".csv", ".owl")
+    prefixes = " ".join([f"--prefix '{k}: {v}'" for k, v in prefix_map.items()])
+    if not import_ontologies:
+        import_ontologies = []
+    import_owls = []
+    for import_ontology in import_ontologies:
+        local_name, suffix = parse_component_name(import_ontology)
+        if suffix == "owl":
+            import_ontology_owl = import_ontology
+            if not workdir.check_file_exists(import_ontology_owl):
+                depends_on = depends_on_csv(workdir, import_ontology_owl)
+                if not workdir.check_file_exists(depends_on):
+                    raise Exception(f"Cannot make owl file {import_ontology_owl} as no {depends_on}")
+                run_robot_template_command(
+                    workdir,
+                    depends_on,
+                    prefix_map=prefix_map,
+                    output_path=import_ontology_owl,
+                )
+        else:
+            if suffix:
+                import_ontology_owl = import_ontology.replace(suffix, "owl")
+            else:
+                import_ontology_owl = import_ontology + ".owl"
+            run_robot_template_command(workdir, import_ontology, prefix_map=prefix_map, output_path=import_ontology_owl)
+        import_owls.append(import_ontology_owl)
+    if import_owls:
+        input_opts = [f"--input {owl}" for owl in import_owls]
+        cmd = f"cd {workdir.location} && robot merge {' '.join(input_opts)} --output {MERGED_IMPORT_PATH}"
+        run(cmd)
+        import_ontology_opt = f"--input {MERGED_IMPORT_PATH}"
+    else:
+        import_ontology_opt = ""
+    cmd = f"cd {workdir.location} && robot template {import_ontology_opt} --template {template_path} {prefixes} reason --output {output_path}"
+    run(cmd)
+    return output_path