aurelian 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aurelian/__init__.py +9 -0
- aurelian/agents/__init__.py +0 -0
- aurelian/agents/amigo/__init__.py +3 -0
- aurelian/agents/amigo/amigo_agent.py +77 -0
- aurelian/agents/amigo/amigo_config.py +85 -0
- aurelian/agents/amigo/amigo_evals.py +73 -0
- aurelian/agents/amigo/amigo_gradio.py +52 -0
- aurelian/agents/amigo/amigo_mcp.py +152 -0
- aurelian/agents/amigo/amigo_tools.py +152 -0
- aurelian/agents/biblio/__init__.py +42 -0
- aurelian/agents/biblio/biblio_agent.py +95 -0
- aurelian/agents/biblio/biblio_config.py +40 -0
- aurelian/agents/biblio/biblio_gradio.py +67 -0
- aurelian/agents/biblio/biblio_mcp.py +115 -0
- aurelian/agents/biblio/biblio_tools.py +164 -0
- aurelian/agents/biblio_agent.py +46 -0
- aurelian/agents/checklist/__init__.py +44 -0
- aurelian/agents/checklist/checklist_agent.py +86 -0
- aurelian/agents/checklist/checklist_config.py +28 -0
- aurelian/agents/checklist/checklist_gradio.py +70 -0
- aurelian/agents/checklist/checklist_mcp.py +86 -0
- aurelian/agents/checklist/checklist_tools.py +141 -0
- aurelian/agents/checklist/content/checklists.yaml +7 -0
- aurelian/agents/checklist/content/streams.csv +136 -0
- aurelian/agents/checklist_agent.py +40 -0
- aurelian/agents/chemistry/__init__.py +3 -0
- aurelian/agents/chemistry/chemistry_agent.py +47 -0
- aurelian/agents/chemistry/chemistry_config.py +71 -0
- aurelian/agents/chemistry/chemistry_evals.py +79 -0
- aurelian/agents/chemistry/chemistry_gradio.py +50 -0
- aurelian/agents/chemistry/chemistry_mcp.py +120 -0
- aurelian/agents/chemistry/chemistry_tools.py +121 -0
- aurelian/agents/chemistry/image_agent.py +15 -0
- aurelian/agents/d4d/__init__.py +30 -0
- aurelian/agents/d4d/d4d_agent.py +73 -0
- aurelian/agents/d4d/d4d_config.py +46 -0
- aurelian/agents/d4d/d4d_gradio.py +58 -0
- aurelian/agents/d4d/d4d_mcp.py +71 -0
- aurelian/agents/d4d/d4d_tools.py +157 -0
- aurelian/agents/d4d_agent.py +64 -0
- aurelian/agents/diagnosis/__init__.py +33 -0
- aurelian/agents/diagnosis/diagnosis_agent.py +54 -0
- aurelian/agents/diagnosis/diagnosis_config.py +48 -0
- aurelian/agents/diagnosis/diagnosis_evals.py +76 -0
- aurelian/agents/diagnosis/diagnosis_gradio.py +52 -0
- aurelian/agents/diagnosis/diagnosis_mcp.py +141 -0
- aurelian/agents/diagnosis/diagnosis_tools.py +204 -0
- aurelian/agents/diagnosis_agent.py +28 -0
- aurelian/agents/draw/__init__.py +3 -0
- aurelian/agents/draw/draw_agent.py +39 -0
- aurelian/agents/draw/draw_config.py +26 -0
- aurelian/agents/draw/draw_gradio.py +50 -0
- aurelian/agents/draw/draw_mcp.py +94 -0
- aurelian/agents/draw/draw_tools.py +100 -0
- aurelian/agents/draw/judge_agent.py +18 -0
- aurelian/agents/filesystem/__init__.py +0 -0
- aurelian/agents/filesystem/filesystem_config.py +27 -0
- aurelian/agents/filesystem/filesystem_gradio.py +49 -0
- aurelian/agents/filesystem/filesystem_mcp.py +89 -0
- aurelian/agents/filesystem/filesystem_tools.py +95 -0
- aurelian/agents/filesystem/py.typed +0 -0
- aurelian/agents/github/__init__.py +0 -0
- aurelian/agents/github/github_agent.py +83 -0
- aurelian/agents/github/github_cli.py +248 -0
- aurelian/agents/github/github_config.py +22 -0
- aurelian/agents/github/github_gradio.py +152 -0
- aurelian/agents/github/github_mcp.py +252 -0
- aurelian/agents/github/github_tools.py +408 -0
- aurelian/agents/github/github_tools.py.tmp +413 -0
- aurelian/agents/goann/__init__.py +13 -0
- aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines.md +1000 -0
- aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines.pdf +0 -0
- aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines_Paper.md +693 -0
- aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines_Paper.pdf +0 -0
- aurelian/agents/goann/goann_agent.py +90 -0
- aurelian/agents/goann/goann_config.py +90 -0
- aurelian/agents/goann/goann_evals.py +104 -0
- aurelian/agents/goann/goann_gradio.py +62 -0
- aurelian/agents/goann/goann_mcp.py +0 -0
- aurelian/agents/goann/goann_tools.py +65 -0
- aurelian/agents/gocam/__init__.py +52 -0
- aurelian/agents/gocam/documents/DNA-binding transcription factor activity annotation guidelines.docx +0 -0
- aurelian/agents/gocam/documents/DNA-binding transcription factor activity annotation guidelines.pdf +0 -0
- aurelian/agents/gocam/documents/DNA-binding_transcription_factor_activity_annotation_guidelines.md +100 -0
- aurelian/agents/gocam/documents/E3 ubiquitin ligases.docx +0 -0
- aurelian/agents/gocam/documents/E3 ubiquitin ligases.pdf +0 -0
- aurelian/agents/gocam/documents/E3_ubiquitin_ligases.md +134 -0
- aurelian/agents/gocam/documents/GO-CAM annotation guidelines README.docx +0 -0
- aurelian/agents/gocam/documents/GO-CAM annotation guidelines README.pdf +0 -0
- aurelian/agents/gocam/documents/GO-CAM modelling guidelines TO DO.docx +0 -0
- aurelian/agents/gocam/documents/GO-CAM modelling guidelines TO DO.pdf +0 -0
- aurelian/agents/gocam/documents/GO-CAM_annotation_guidelines_README.md +1 -0
- aurelian/agents/gocam/documents/GO-CAM_modelling_guidelines_TO_DO.md +3 -0
- aurelian/agents/gocam/documents/How to annotate complexes in GO-CAM.docx +0 -0
- aurelian/agents/gocam/documents/How to annotate complexes in GO-CAM.pdf +0 -0
- aurelian/agents/gocam/documents/How to annotate molecular adaptors.docx +0 -0
- aurelian/agents/gocam/documents/How to annotate molecular adaptors.pdf +0 -0
- aurelian/agents/gocam/documents/How to annotate sequestering proteins.docx +0 -0
- aurelian/agents/gocam/documents/How to annotate sequestering proteins.pdf +0 -0
- aurelian/agents/gocam/documents/How_to_annotate_complexes_in_GO-CAM.md +29 -0
- aurelian/agents/gocam/documents/How_to_annotate_molecular_adaptors.md +31 -0
- aurelian/agents/gocam/documents/How_to_annotate_sequestering_proteins.md +42 -0
- aurelian/agents/gocam/documents/Molecular adaptor activity.docx +0 -0
- aurelian/agents/gocam/documents/Molecular adaptor activity.pdf +0 -0
- aurelian/agents/gocam/documents/Molecular carrier activity.docx +0 -0
- aurelian/agents/gocam/documents/Molecular carrier activity.pdf +0 -0
- aurelian/agents/gocam/documents/Molecular_adaptor_activity.md +51 -0
- aurelian/agents/gocam/documents/Molecular_carrier_activity.md +41 -0
- aurelian/agents/gocam/documents/Protein sequestering activity.docx +0 -0
- aurelian/agents/gocam/documents/Protein sequestering activity.pdf +0 -0
- aurelian/agents/gocam/documents/Protein_sequestering_activity.md +50 -0
- aurelian/agents/gocam/documents/Signaling receptor activity annotation guidelines.docx +0 -0
- aurelian/agents/gocam/documents/Signaling receptor activity annotation guidelines.pdf +0 -0
- aurelian/agents/gocam/documents/Signaling_receptor_activity_annotation_guidelines.md +187 -0
- aurelian/agents/gocam/documents/Transcription coregulator activity.docx +0 -0
- aurelian/agents/gocam/documents/Transcription coregulator activity.pdf +0 -0
- aurelian/agents/gocam/documents/Transcription_coregulator_activity.md +36 -0
- aurelian/agents/gocam/documents/Transporter activity annotation annotation guidelines.docx +0 -0
- aurelian/agents/gocam/documents/Transporter activity annotation annotation guidelines.pdf +0 -0
- aurelian/agents/gocam/documents/Transporter_activity_annotation_annotation_guidelines.md +43 -0
- Regulatory Processes in GO-CAM.docx +0 -0
- Regulatory Processes in GO-CAM.pdf +0 -0
- aurelian/agents/gocam/documents/WIP_-_Regulation_and_Regulatory_Processes_in_GO-CAM.md +31 -0
- aurelian/agents/gocam/documents/md/DNA-binding_transcription_factor_activity_annotation_guidelines.md +131 -0
- aurelian/agents/gocam/documents/md/E3_ubiquitin_ligases.md +166 -0
- aurelian/agents/gocam/documents/md/GO-CAM_annotation_guidelines_README.md +1 -0
- aurelian/agents/gocam/documents/md/GO-CAM_modelling_guidelines_TO_DO.md +5 -0
- aurelian/agents/gocam/documents/md/How_to_annotate_complexes_in_GO-CAM.md +28 -0
- aurelian/agents/gocam/documents/md/How_to_annotate_molecular_adaptors.md +19 -0
- aurelian/agents/gocam/documents/md/How_to_annotate_sequestering_proteins.md +38 -0
- aurelian/agents/gocam/documents/md/Molecular_adaptor_activity.md +52 -0
- aurelian/agents/gocam/documents/md/Molecular_carrier_activity.md +59 -0
- aurelian/agents/gocam/documents/md/Protein_sequestering_activity.md +52 -0
- aurelian/agents/gocam/documents/md/Signaling_receptor_activity_annotation_guidelines.md +271 -0
- aurelian/agents/gocam/documents/md/Transcription_coregulator_activity.md +54 -0
- aurelian/agents/gocam/documents/md/Transporter_activity_annotation_annotation_guidelines.md +38 -0
- aurelian/agents/gocam/documents/md/WIP_-_Regulation_and_Regulatory_Processes_in_GO-CAM.md +39 -0
- aurelian/agents/gocam/documents/pandoc_md/Signaling_receptor_activity_annotation_guidelines.md +334 -0
- aurelian/agents/gocam/gocam_agent.py +243 -0
- aurelian/agents/gocam/gocam_config.py +85 -0
- aurelian/agents/gocam/gocam_curator_agent.py +46 -0
- aurelian/agents/gocam/gocam_evals.py +64 -0
- aurelian/agents/gocam/gocam_gradio.py +89 -0
- aurelian/agents/gocam/gocam_mcp.py +224 -0
- aurelian/agents/gocam/gocam_tools.py +294 -0
- aurelian/agents/linkml/__init__.py +0 -0
- aurelian/agents/linkml/linkml_agent.py +62 -0
- aurelian/agents/linkml/linkml_config.py +48 -0
- aurelian/agents/linkml/linkml_evals.py +66 -0
- aurelian/agents/linkml/linkml_gradio.py +45 -0
- aurelian/agents/linkml/linkml_mcp.py +181 -0
- aurelian/agents/linkml/linkml_tools.py +102 -0
- aurelian/agents/literature/__init__.py +3 -0
- aurelian/agents/literature/literature_agent.py +75 -0
- aurelian/agents/literature/literature_config.py +35 -0
- aurelian/agents/literature/literature_gradio.py +52 -0
- aurelian/agents/literature/literature_mcp.py +174 -0
- aurelian/agents/literature/literature_tools.py +182 -0
- aurelian/agents/monarch/__init__.py +0 -0
- aurelian/agents/monarch/monarch_agent.py +45 -0
- aurelian/agents/monarch/monarch_config.py +45 -0
- aurelian/agents/monarch/monarch_gradio.py +51 -0
- aurelian/agents/monarch/monarch_mcp.py +65 -0
- aurelian/agents/monarch/monarch_tools.py +112 -0
- aurelian/agents/oak/__init__.py +0 -0
- aurelian/agents/oak/oak_config.py +27 -0
- aurelian/agents/oak/oak_gradio.py +57 -0
- aurelian/agents/ontology_mapper/__init__.py +31 -0
- aurelian/agents/ontology_mapper/ontology_mapper_agent.py +57 -0
- aurelian/agents/ontology_mapper/ontology_mapper_config.py +50 -0
- aurelian/agents/ontology_mapper/ontology_mapper_evals.py +108 -0
- aurelian/agents/ontology_mapper/ontology_mapper_gradio.py +58 -0
- aurelian/agents/ontology_mapper/ontology_mapper_mcp.py +81 -0
- aurelian/agents/ontology_mapper/ontology_mapper_tools.py +147 -0
- aurelian/agents/paperqa/__init__.py +27 -0
- aurelian/agents/paperqa/paperqa_agent.py +66 -0
- aurelian/agents/paperqa/paperqa_cli.py +305 -0
- aurelian/agents/paperqa/paperqa_config.py +142 -0
- aurelian/agents/paperqa/paperqa_gradio.py +90 -0
- aurelian/agents/paperqa/paperqa_mcp.py +155 -0
- aurelian/agents/paperqa/paperqa_tools.py +566 -0
- aurelian/agents/phenopackets/__init__.py +3 -0
- aurelian/agents/phenopackets/phenopackets_agent.py +58 -0
- aurelian/agents/phenopackets/phenopackets_config.py +72 -0
- aurelian/agents/phenopackets/phenopackets_evals.py +99 -0
- aurelian/agents/phenopackets/phenopackets_gradio.py +55 -0
- aurelian/agents/phenopackets/phenopackets_mcp.py +178 -0
- aurelian/agents/phenopackets/phenopackets_tools.py +127 -0
- aurelian/agents/rag/__init__.py +40 -0
- aurelian/agents/rag/rag_agent.py +84 -0
- aurelian/agents/rag/rag_config.py +80 -0
- aurelian/agents/rag/rag_gradio.py +67 -0
- aurelian/agents/rag/rag_mcp.py +107 -0
- aurelian/agents/rag/rag_tools.py +189 -0
- aurelian/agents/rag_agent.py +54 -0
- aurelian/agents/robot/__init__.py +0 -0
- aurelian/agents/robot/assets/__init__.py +3 -0
- aurelian/agents/robot/assets/template.md +384 -0
- aurelian/agents/robot/robot_config.py +25 -0
- aurelian/agents/robot/robot_gradio.py +46 -0
- aurelian/agents/robot/robot_mcp.py +100 -0
- aurelian/agents/robot/robot_ontology_agent.py +139 -0
- aurelian/agents/robot/robot_tools.py +50 -0
- aurelian/agents/talisman/__init__.py +3 -0
- aurelian/agents/talisman/__main__.py +17 -0
- aurelian/agents/talisman/cli.py +70 -0
- aurelian/agents/talisman/run_talisman.py +18 -0
- aurelian/agents/talisman/talisman_agent.py +143 -0
- aurelian/agents/talisman/talisman_config.py +66 -0
- aurelian/agents/talisman/talisman_gradio.py +50 -0
- aurelian/agents/talisman/talisman_mcp.py +75 -0
- aurelian/agents/talisman/talisman_tools.py +962 -0
- aurelian/agents/ubergraph/__init__.py +40 -0
- aurelian/agents/ubergraph/ubergraph_agent.py +72 -0
- aurelian/agents/ubergraph/ubergraph_config.py +79 -0
- aurelian/agents/ubergraph/ubergraph_gradio.py +48 -0
- aurelian/agents/ubergraph/ubergraph_mcp.py +69 -0
- aurelian/agents/ubergraph/ubergraph_tools.py +118 -0
- aurelian/agents/uniprot/__init__.py +0 -0
- aurelian/agents/uniprot/uniprot_agent.py +43 -0
- aurelian/agents/uniprot/uniprot_config.py +43 -0
- aurelian/agents/uniprot/uniprot_evals.py +99 -0
- aurelian/agents/uniprot/uniprot_gradio.py +48 -0
- aurelian/agents/uniprot/uniprot_mcp.py +168 -0
- aurelian/agents/uniprot/uniprot_tools.py +136 -0
- aurelian/agents/web/__init__.py +0 -0
- aurelian/agents/web/web_config.py +27 -0
- aurelian/agents/web/web_gradio.py +48 -0
- aurelian/agents/web/web_mcp.py +50 -0
- aurelian/agents/web/web_tools.py +121 -0
- aurelian/chat.py +23 -0
- aurelian/cli.py +1004 -0
- aurelian/dependencies/__init__.py +0 -0
- aurelian/dependencies/workdir.py +78 -0
- aurelian/evaluators/model.py +9 -0
- aurelian/evaluators/substring_evaluator.py +30 -0
- aurelian/mcp/__init__.py +0 -0
- aurelian/mcp/amigo_mcp_test.py +86 -0
- aurelian/mcp/config_generator.py +123 -0
- aurelian/mcp/example_config.json +43 -0
- aurelian/mcp/generate_sample_config.py +37 -0
- aurelian/mcp/gocam_mcp_test.py +126 -0
- aurelian/mcp/linkml_mcp_tools.py +190 -0
- aurelian/mcp/mcp_discovery.py +87 -0
- aurelian/mcp/mcp_test.py +31 -0
- aurelian/mcp/phenopackets_mcp_test.py +103 -0
- aurelian/tools/__init__.py +0 -0
- aurelian/tools/web/__init__.py +0 -0
- aurelian/tools/web/url_download.py +51 -0
- aurelian/utils/__init__.py +0 -0
- aurelian/utils/async_utils.py +18 -0
- aurelian/utils/data_utils.py +32 -0
- aurelian/utils/documentation_manager.py +59 -0
- aurelian/utils/doi_fetcher.py +238 -0
- aurelian/utils/ontology_utils.py +68 -0
- aurelian/utils/pdf_fetcher.py +23 -0
- aurelian/utils/process_logs.py +100 -0
- aurelian/utils/pubmed_utils.py +238 -0
- aurelian/utils/pytest_report_to_markdown.py +67 -0
- aurelian/utils/robot_ontology_utils.py +112 -0
- aurelian/utils/search_utils.py +95 -0
- aurelian-0.1.0.dist-info/LICENSE +22 -0
- aurelian-0.1.0.dist-info/METADATA +109 -0
- aurelian-0.1.0.dist-info/RECORD +266 -0
- aurelian-0.1.0.dist-info/WHEEL +4 -0
- aurelian-0.1.0.dist-info/entry_points.txt +4 -0
@@ -0,0 +1,81 @@
|
|
1
|
+
"""
|
2
|
+
MCP tools for creating ontology mappings.
|
3
|
+
"""
|
4
|
+
import os
|
5
|
+
from typing import Dict, List, Optional
|
6
|
+
|
7
|
+
from mcp.server.fastmcp import FastMCP
|
8
|
+
|
9
|
+
import aurelian.agents.ontology_mapper.ontology_mapper_tools as omt
|
10
|
+
from aurelian.agents.ontology_mapper.ontology_mapper_agent import ONTOLOGY_MAPPER_SYSTEM_PROMPT
|
11
|
+
from aurelian.agents.ontology_mapper.ontology_mapper_config import OntologyMapperDependencies, get_config
|
12
|
+
from pydantic_ai import RunContext
|
13
|
+
|
14
|
+
# Initialize FastMCP server
|
15
|
+
mcp = FastMCP("ontology_mapper", instructions=ONTOLOGY_MAPPER_SYSTEM_PROMPT)
|
16
|
+
|
17
|
+
|
18
|
+
from aurelian.dependencies.workdir import WorkDir
|
19
|
+
|
20
|
+
def deps() -> OntologyMapperDependencies:
|
21
|
+
deps = get_config()
|
22
|
+
# Set the location from environment variable or default
|
23
|
+
loc = os.getenv("AURELIAN_WORKDIR", "/tmp/aurelian")
|
24
|
+
deps.workdir = WorkDir(loc)
|
25
|
+
return deps
|
26
|
+
|
27
|
+
def ctx() -> RunContext[OntologyMapperDependencies]:
|
28
|
+
rc: RunContext[OntologyMapperDependencies] = RunContext[OntologyMapperDependencies](
|
29
|
+
deps=deps(),
|
30
|
+
model=None, usage=None, prompt=None,
|
31
|
+
)
|
32
|
+
return rc
|
33
|
+
|
34
|
+
|
35
|
+
@mcp.tool()
|
36
|
+
async def search_terms(query: str, ont: Optional[str] = None, limit: int = 10) -> List[Dict]:
|
37
|
+
"""
|
38
|
+
Search for ontology terms matching a query.
|
39
|
+
|
40
|
+
Args:
|
41
|
+
query: The search query text
|
42
|
+
ont: Optional ontology ID to search in (e.g., 'cl', 'go', 'uberon')
|
43
|
+
limit: Maximum number of results to return
|
44
|
+
|
45
|
+
Returns:
|
46
|
+
List of matching ontology terms with their details
|
47
|
+
"""
|
48
|
+
return await omt.search_terms(ctx(), query, ont, limit)
|
49
|
+
|
50
|
+
|
51
|
+
@mcp.tool()
|
52
|
+
async def search_web(query: str) -> str:
|
53
|
+
"""
|
54
|
+
Search the web for ontology-related information.
|
55
|
+
|
56
|
+
Args:
|
57
|
+
query: The search query
|
58
|
+
|
59
|
+
Returns:
|
60
|
+
Search results with summaries
|
61
|
+
"""
|
62
|
+
return await omt.search_web(ctx(), query)
|
63
|
+
|
64
|
+
|
65
|
+
@mcp.tool()
|
66
|
+
async def retrieve_web_page(url: str) -> str:
|
67
|
+
"""
|
68
|
+
Fetch the contents of a web page related to ontologies.
|
69
|
+
|
70
|
+
Args:
|
71
|
+
url: The URL to fetch
|
72
|
+
|
73
|
+
Returns:
|
74
|
+
The contents of the web page
|
75
|
+
"""
|
76
|
+
return await omt.retrieve_web_page(ctx(), url)
|
77
|
+
|
78
|
+
|
79
|
+
if __name__ == "__main__":
|
80
|
+
# Initialize and run the server
|
81
|
+
mcp.run(transport='stdio')
|
@@ -0,0 +1,147 @@
|
|
1
|
+
"""
|
2
|
+
Tools for the Ontology Mapper agent.
|
3
|
+
"""
|
4
|
+
import asyncio
|
5
|
+
from functools import lru_cache
|
6
|
+
from typing import Dict, List, Optional
|
7
|
+
|
8
|
+
from oaklib import get_adapter
|
9
|
+
from pydantic_ai import RunContext, ModelRetry
|
10
|
+
|
11
|
+
from aurelian.utils.ontology_utils import search_ontology
|
12
|
+
from aurelian.utils.search_utils import web_search, retrieve_web_page as fetch_web_page
|
13
|
+
from .ontology_mapper_config import OntologyMapperDependencies, get_config
|
14
|
+
|
15
|
+
|
16
|
+
@lru_cache
|
17
|
+
def get_ontology_adapter(ont: str):
|
18
|
+
"""
|
19
|
+
Get an adapter for the specified ontology.
|
20
|
+
|
21
|
+
Args:
|
22
|
+
ont: The ontology ID to get an adapter for (e.g. cl, go, uberon)
|
23
|
+
|
24
|
+
Returns:
|
25
|
+
An OAK adapter for the specified ontology
|
26
|
+
"""
|
27
|
+
ont = ont.lower()
|
28
|
+
return get_adapter(f"sqlite:obo:{ont}")
|
29
|
+
|
30
|
+
|
31
|
+
async def search_terms(
|
32
|
+
ctx: RunContext[OntologyMapperDependencies],
|
33
|
+
ontology_id: str,
|
34
|
+
query: str
|
35
|
+
) -> List[Dict]:
|
36
|
+
"""
|
37
|
+
Finds similar ontology terms to the search query.
|
38
|
+
|
39
|
+
For example:
|
40
|
+
|
41
|
+
```
|
42
|
+
search_terms("go", "cycle cycle and related processes")
|
43
|
+
```
|
44
|
+
|
45
|
+
Relevancy ranking is used, with semantic similarity, which means
|
46
|
+
queries need only be close in semantic space. E.g. while GO does not
|
47
|
+
deal with diseases, this may return relevant pathways or structures:
|
48
|
+
|
49
|
+
```
|
50
|
+
search_terms("go", "terms most relevant to Parkinson disease")
|
51
|
+
```
|
52
|
+
|
53
|
+
Args:
|
54
|
+
ctx: The run context
|
55
|
+
ontology_id: The ontology ID to search in (e.g. cl, go, uberon)
|
56
|
+
query: The search query
|
57
|
+
|
58
|
+
Returns:
|
59
|
+
A list of matching ontology terms
|
60
|
+
"""
|
61
|
+
print(f"Term Search: {ontology_id} {query}")
|
62
|
+
|
63
|
+
try:
|
64
|
+
if " " in ontology_id:
|
65
|
+
raise ModelRetry(
|
66
|
+
"Invalid ontology ID, use an OBO style ID like cl, mondo, chebi, etc."
|
67
|
+
)
|
68
|
+
|
69
|
+
config = ctx.deps or get_config()
|
70
|
+
if ontology_id.lower() not in [ont.lower() for ont in config.ontologies]:
|
71
|
+
allowed_onts = ", ".join(config.ontologies)
|
72
|
+
raise ModelRetry(
|
73
|
+
f"Ontology '{ontology_id}' not in allowed list: {allowed_onts}"
|
74
|
+
)
|
75
|
+
|
76
|
+
adapter = get_ontology_adapter(ontology_id)
|
77
|
+
# Execute the potentially blocking operation in a thread pool
|
78
|
+
results = await asyncio.to_thread(
|
79
|
+
search_ontology,
|
80
|
+
adapter,
|
81
|
+
query,
|
82
|
+
limit=config.max_search_results
|
83
|
+
)
|
84
|
+
|
85
|
+
if not results:
|
86
|
+
raise ModelRetry(f"No results found for query '{query}' in ontology '{ontology_id}'")
|
87
|
+
|
88
|
+
return results
|
89
|
+
except Exception as e:
|
90
|
+
if "ModelRetry" in str(type(e)):
|
91
|
+
raise e
|
92
|
+
raise ModelRetry(f"Error searching ontology: {str(e)}")
|
93
|
+
|
94
|
+
|
95
|
+
async def search_web(query: str) -> str:
|
96
|
+
"""
|
97
|
+
Search the web using a text query.
|
98
|
+
|
99
|
+
Note, this will not retrieve the full content, for that you
|
100
|
+
should use `retrieve_web_page`.
|
101
|
+
|
102
|
+
Args:
|
103
|
+
query: The search query
|
104
|
+
|
105
|
+
Returns:
|
106
|
+
Matching web pages plus summaries
|
107
|
+
"""
|
108
|
+
print(f"Web Search: {query}")
|
109
|
+
|
110
|
+
try:
|
111
|
+
# Execute the potentially blocking operation in a thread pool
|
112
|
+
results = await asyncio.to_thread(web_search, query)
|
113
|
+
|
114
|
+
if not results or results.strip() == "":
|
115
|
+
raise ModelRetry(f"No web search results found for query: {query}")
|
116
|
+
|
117
|
+
return results
|
118
|
+
except Exception as e:
|
119
|
+
if "ModelRetry" in str(type(e)):
|
120
|
+
raise e
|
121
|
+
raise ModelRetry(f"Error searching the web: {str(e)}")
|
122
|
+
|
123
|
+
|
124
|
+
async def retrieve_web_page(url: str) -> str:
|
125
|
+
"""
|
126
|
+
Fetch the contents of a web page.
|
127
|
+
|
128
|
+
Args:
|
129
|
+
url: The URL of the web page to retrieve
|
130
|
+
|
131
|
+
Returns:
|
132
|
+
The contents of the web page
|
133
|
+
"""
|
134
|
+
print(f"Fetch URL: {url}")
|
135
|
+
|
136
|
+
try:
|
137
|
+
# Execute the potentially blocking operation in a thread pool
|
138
|
+
content = await asyncio.to_thread(fetch_web_page, url)
|
139
|
+
|
140
|
+
if not content or content.strip() == "":
|
141
|
+
raise ModelRetry(f"No content found at URL: {url}")
|
142
|
+
|
143
|
+
return content
|
144
|
+
except Exception as e:
|
145
|
+
if "ModelRetry" in str(type(e)):
|
146
|
+
raise e
|
147
|
+
raise ModelRetry(f"Error retrieving web page: {str(e)}")
|
@@ -0,0 +1,27 @@
|
|
1
|
+
"""
|
2
|
+
PaperQA agent package for scientific literature search and analysis.
|
3
|
+
"""
|
4
|
+
|
5
|
+
# isort: skip_file
|
6
|
+
from .paperqa_agent import paperqa_agent # noqa: E402
|
7
|
+
from .paperqa_config import PaperQADependencies, get_config # noqa: E402
|
8
|
+
from .paperqa_gradio import chat # noqa: E402
|
9
|
+
from .paperqa_tools import ( # noqa: E402
|
10
|
+
search_papers,
|
11
|
+
query_papers,
|
12
|
+
add_paper,
|
13
|
+
add_papers,
|
14
|
+
list_papers,
|
15
|
+
)
|
16
|
+
|
17
|
+
__all__ = [
|
18
|
+
"paperqa_agent",
|
19
|
+
"PaperQADependencies",
|
20
|
+
"get_config",
|
21
|
+
"search_papers",
|
22
|
+
"query_papers",
|
23
|
+
"add_paper",
|
24
|
+
"add_papers",
|
25
|
+
"list_papers",
|
26
|
+
"chat",
|
27
|
+
]
|
@@ -0,0 +1,66 @@
|
|
1
|
+
"""
|
2
|
+
Agent for PaperQA integration with Aurelian.
|
3
|
+
"""
|
4
|
+
import logging
|
5
|
+
from pydantic_ai import Agent
|
6
|
+
|
7
|
+
paperqa_logger = logging.getLogger("aurelian.agents.paperqa")
|
8
|
+
paperqa_logger.setLevel(logging.INFO)
|
9
|
+
|
10
|
+
for handler in list(paperqa_logger.handlers):
|
11
|
+
paperqa_logger.removeHandler(handler)
|
12
|
+
|
13
|
+
console = logging.StreamHandler()
|
14
|
+
console.setLevel(logging.INFO)
|
15
|
+
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
16
|
+
console.setFormatter(formatter)
|
17
|
+
paperqa_logger.addHandler(console)
|
18
|
+
|
19
|
+
paperqa_logger.propagate = False
|
20
|
+
|
21
|
+
from .paperqa_config import PaperQADependencies
|
22
|
+
from .paperqa_tools import (
|
23
|
+
search_papers,
|
24
|
+
query_papers,
|
25
|
+
add_paper,
|
26
|
+
add_papers,
|
27
|
+
list_papers,
|
28
|
+
build_index
|
29
|
+
)
|
30
|
+
|
31
|
+
PAPERQA_SYSTEM_PROMPT = """
|
32
|
+
You are an AI assistant that helps explore scientific literature using PaperQA.
|
33
|
+
You can use different functions to search for papers and analyze them:
|
34
|
+
- `search_papers` to find papers by topic or keyword from outside this repository.
|
35
|
+
- `query_papers` to ask questions about the papers in the repository
|
36
|
+
- `add_paper` to add a specific paper by file path or URL (with auto_index=True by default)
|
37
|
+
- `add_papers` to add multiple papers from a directory (with auto_index=True by default)
|
38
|
+
- `list_papers` to see all papers in the collection
|
39
|
+
- `build_index` to manually rebuild the search index
|
40
|
+
|
41
|
+
When adding papers with `add_paper` or `add_papers`:
|
42
|
+
- For `add_paper`, the URL must be a direct link to a PDF (e.g., "https://example.com/paper.pdf")
|
43
|
+
- For `add_paper`, you can provide a citation string to attribute the source
|
44
|
+
- For `add_papers`, you provide a directory containing papers and an optional citation format
|
45
|
+
- By default, auto_index=True, which automatically rebuilds the index after adding papers
|
46
|
+
- You can set auto_index=False if you want to add multiple papers before indexing
|
47
|
+
- After adding papers with auto_index=False, use `build_index()` to make them searchable
|
48
|
+
|
49
|
+
When showing paper information, format using Markdown for readability.
|
50
|
+
When papers have been successfully retrieved, proceed to analyzing them.
|
51
|
+
"""
|
52
|
+
|
53
|
+
paperqa_agent = Agent(
|
54
|
+
model="openai:gpt-4o-2024-11-20",
|
55
|
+
deps_type=PaperQADependencies,
|
56
|
+
result_type=str,
|
57
|
+
system_prompt=PAPERQA_SYSTEM_PROMPT,
|
58
|
+
defer_model_check=True,
|
59
|
+
)
|
60
|
+
|
61
|
+
paperqa_agent.tool(search_papers)
|
62
|
+
paperqa_agent.tool(query_papers)
|
63
|
+
paperqa_agent.tool(add_paper)
|
64
|
+
paperqa_agent.tool(add_papers)
|
65
|
+
paperqa_agent.tool(list_papers)
|
66
|
+
paperqa_agent.tool(build_index)
|
@@ -0,0 +1,305 @@
|
|
1
|
+
"""
|
2
|
+
CLI commands for the PaperQA agent.
|
3
|
+
"""
|
4
|
+
import os
|
5
|
+
import asyncio
|
6
|
+
import logging
|
7
|
+
import sys
|
8
|
+
from pathlib import Path
|
9
|
+
import click
|
10
|
+
from paperqa import agent_query
|
11
|
+
|
12
|
+
from aurelian.agents.paperqa.paperqa_config import get_config
|
13
|
+
from paperqa.agents.search import get_directory_index
|
14
|
+
from paperqa.settings import IndexSettings
|
15
|
+
|
16
|
+
logger = logging.getLogger(__name__)
|
17
|
+
|
18
|
+
def setup_logging():
|
19
|
+
"""Set up logging for the PaperQA CLI."""
|
20
|
+
logging.basicConfig(
|
21
|
+
level=logging.INFO,
|
22
|
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
23
|
+
handlers=[logging.StreamHandler(sys.stdout)]
|
24
|
+
)
|
25
|
+
|
26
|
+
|
27
|
+
def check_api_key():
|
28
|
+
"""Check if the OpenAI API key is set.
|
29
|
+
|
30
|
+
Returns:
|
31
|
+
bool: True if key is set, False otherwise
|
32
|
+
"""
|
33
|
+
if not os.environ.get("OPENAI_API_KEY"):
|
34
|
+
logger.error("OPENAI_API_KEY environment variable must be set.")
|
35
|
+
click.echo("Error: OPENAI_API_KEY environment variable must be set.")
|
36
|
+
return False
|
37
|
+
return True
|
38
|
+
|
39
|
+
|
40
|
+
def setup_and_configure_paper_directory(directory):
|
41
|
+
"""
|
42
|
+
Setup and configure a paper directory with proper paths.
|
43
|
+
|
44
|
+
Args:
|
45
|
+
directory: Input directory path (can be relative)
|
46
|
+
|
47
|
+
Returns:
|
48
|
+
tuple: (resolved_path, settings, config) tuple with properly configured settings
|
49
|
+
"""
|
50
|
+
directory = str(Path(directory).resolve())
|
51
|
+
|
52
|
+
config = get_config()
|
53
|
+
config.paper_directory = directory
|
54
|
+
|
55
|
+
os.environ["PQA_HOME"] = directory
|
56
|
+
|
57
|
+
if not os.path.exists(directory):
|
58
|
+
logger.info(f"Creating paper directory: {directory}")
|
59
|
+
os.makedirs(directory, exist_ok=True)
|
60
|
+
|
61
|
+
settings = config.set_paperqa_settings()
|
62
|
+
settings.agent.index = IndexSettings(
|
63
|
+
name=config.index_name,
|
64
|
+
paper_directory=directory,
|
65
|
+
recurse_subdirectories=False
|
66
|
+
)
|
67
|
+
|
68
|
+
return directory, settings, config
|
69
|
+
|
70
|
+
|
71
|
+
def get_document_files(directory):
|
72
|
+
"""
|
73
|
+
Get all indexable document files in the given directory.
|
74
|
+
|
75
|
+
Args:
|
76
|
+
directory: Directory to search for document files
|
77
|
+
|
78
|
+
Returns:
|
79
|
+
dict: Dictionary with file lists by type and a combined list
|
80
|
+
"""
|
81
|
+
document_extensions = ['.pdf', '.txt', '.html', '.md']
|
82
|
+
all_files = [f for f in os.listdir(directory)
|
83
|
+
if any(f.lower().endswith(ext) for ext in document_extensions)]
|
84
|
+
|
85
|
+
return {
|
86
|
+
'all': all_files,
|
87
|
+
'pdf': [f for f in all_files if f.lower().endswith('.pdf')],
|
88
|
+
'txt': [f for f in all_files if f.lower().endswith('.txt')],
|
89
|
+
'html': [f for f in all_files if f.lower().endswith('.html')],
|
90
|
+
'md': [f for f in all_files if f.lower().endswith('.md')],
|
91
|
+
}
|
92
|
+
|
93
|
+
|
94
|
+
@click.group(name="paperqa")
|
95
|
+
@click.option("-v", "--verbose", count=True, help="Increase verbosity level (-v for INFO, -vv for DEBUG)")
|
96
|
+
@click.option("-q", "--quiet", is_flag=True, help="Suppress non-error output")
|
97
|
+
def paperqa_cli(verbose, quiet):
|
98
|
+
"""PaperQA management commands for indexing and querying documents.
|
99
|
+
|
100
|
+
PaperQA supports PDF, TXT, HTML, and Markdown files in all operations.
|
101
|
+
|
102
|
+
Examples:
|
103
|
+
# Index documents in a directory
|
104
|
+
aurelian paperqa index -d /path/to/papers
|
105
|
+
|
106
|
+
# Ask a question about indexed papers
|
107
|
+
aurelian paperqa ask "What is the role of tau protein in Alzheimer's?" -d /path/to/papers
|
108
|
+
|
109
|
+
# List indexed papers
|
110
|
+
aurelian paperqa list -d /path/to/papers
|
111
|
+
|
112
|
+
# Run with increased verbosity
|
113
|
+
aurelian paperqa --verbose index -d /path/to/papers
|
114
|
+
|
115
|
+
# Add documents through the agent
|
116
|
+
# (Using these commands in chat modes like Gradio or MCP)
|
117
|
+
"Add the paper from /path/to/paper.pdf"
|
118
|
+
"Add all papers from the directory /path/to/papers/"
|
119
|
+
"""
|
120
|
+
setup_logging()
|
121
|
+
|
122
|
+
if verbose >= 2:
|
123
|
+
logging.getLogger("aurelian.agents.paperqa").setLevel(logging.DEBUG)
|
124
|
+
elif verbose == 1:
|
125
|
+
logging.getLogger("aurelian.agents.paperqa").setLevel(logging.INFO)
|
126
|
+
else:
|
127
|
+
logging.getLogger("aurelian.agents.paperqa").setLevel(logging.WARNING)
|
128
|
+
|
129
|
+
if quiet:
|
130
|
+
logging.getLogger("aurelian.agents.paperqa").setLevel(logging.ERROR)
|
131
|
+
|
132
|
+
|
133
|
+
@paperqa_cli.command()
|
134
|
+
@click.option(
|
135
|
+
"--directory", "-d",
|
136
|
+
required=True,
|
137
|
+
help="Paper directory containing PDF, TXT, HTML, and MD files to index",
|
138
|
+
)
|
139
|
+
def index(directory):
|
140
|
+
"""Index documents for search and querying.
|
141
|
+
|
142
|
+
This command scans the specified directory for documents (PDF, TXT, HTML, MD)
|
143
|
+
and creates a searchable index for them. The index is stored in the .pqa
|
144
|
+
subdirectory of the specified directory.
|
145
|
+
|
146
|
+
Example:
|
147
|
+
aurelian paperqa index -d ~/research/papers
|
148
|
+
"""
|
149
|
+
if not check_api_key():
|
150
|
+
return
|
151
|
+
|
152
|
+
paper_dir, settings, _ = setup_and_configure_paper_directory(directory)
|
153
|
+
|
154
|
+
docs = get_document_files(paper_dir)
|
155
|
+
|
156
|
+
if not docs['all']:
|
157
|
+
logger.warning(f"No indexable documents found in {paper_dir}")
|
158
|
+
click.echo(f"No indexable documents found in {paper_dir}")
|
159
|
+
return
|
160
|
+
|
161
|
+
# detailed breakdown
|
162
|
+
logger.info(f"Found {len(docs['all'])} documents in {paper_dir}:")
|
163
|
+
if docs['pdf']: logger.info(f" - {len(docs['pdf'])} PDF files")
|
164
|
+
if docs['txt']: logger.info(f" - {len(docs['txt'])} text files")
|
165
|
+
if docs['html']: logger.info(f" - {len(docs['html'])} HTML files")
|
166
|
+
if docs['md']: logger.info(f" - {len(docs['md'])} Markdown files")
|
167
|
+
logger.info(f"Index will be stored in: {paper_dir}/.pqa")
|
168
|
+
logger.info("Indexing papers... (this may take a while)")
|
169
|
+
|
170
|
+
async def run_index():
|
171
|
+
try:
|
172
|
+
index = await get_directory_index(
|
173
|
+
settings=settings,
|
174
|
+
build=True,
|
175
|
+
)
|
176
|
+
index_files = await index.index_files
|
177
|
+
logger.info(f"Success! Indexed {len(index_files)} document chunks from your PDF files.")
|
178
|
+
except Exception as e:
|
179
|
+
logger.error(f"Error indexing papers: {str(e)}")
|
180
|
+
|
181
|
+
try:
|
182
|
+
asyncio.run(run_index())
|
183
|
+
except Exception as e:
|
184
|
+
logger.error(f"Error: {str(e)}")
|
185
|
+
|
186
|
+
|
187
|
+
@paperqa_cli.command()
|
188
|
+
@click.argument("query", required=True)
|
189
|
+
@click.option(
|
190
|
+
"--directory", "-d",
|
191
|
+
required=True,
|
192
|
+
help="Paper directory containing indexed documents",
|
193
|
+
)
|
194
|
+
def ask(query, directory):
|
195
|
+
"""Ask a question about the indexed documents.
|
196
|
+
|
197
|
+
This command searches the indexed documents for information relevant to the
|
198
|
+
provided query and generates an AI-powered answer with references. Make sure
|
199
|
+
to run the 'index' command first to create an index.
|
200
|
+
|
201
|
+
Example:
|
202
|
+
aurelian paperqa ask "What are the key findings on tau proteins?" -d ~/research/papers
|
203
|
+
"""
|
204
|
+
if not check_api_key():
|
205
|
+
return
|
206
|
+
|
207
|
+
paper_dir, settings, _ = setup_and_configure_paper_directory(directory)
|
208
|
+
|
209
|
+
async def run_query():
|
210
|
+
try:
|
211
|
+
docs = get_document_files(paper_dir)
|
212
|
+
|
213
|
+
if not docs['all']:
|
214
|
+
logger.warning(f"No indexable documents found in {paper_dir}")
|
215
|
+
logger.info(f"Add documents (PDF, TXT, HTML, MD) to the directory and then run 'aurelian paperqa index -d {paper_dir}'")
|
216
|
+
return
|
217
|
+
|
218
|
+
try:
|
219
|
+
index = await get_directory_index(settings=settings, build=False)
|
220
|
+
index_files = await index.index_files
|
221
|
+
|
222
|
+
if not index_files:
|
223
|
+
logger.warning(f"No indexed papers found. PDF files exist but haven't been indexed.")
|
224
|
+
logger.info(f"Run 'aurelian paperqa index -d {paper_dir}' to index the papers.")
|
225
|
+
return
|
226
|
+
except Exception as e:
|
227
|
+
if "was empty, please rebuild it" in str(e):
|
228
|
+
logger.warning(f"Index is empty. Run 'aurelian paperqa index -d {paper_dir}' to index papers.")
|
229
|
+
return
|
230
|
+
raise
|
231
|
+
|
232
|
+
logger.info(f"Querying {len(index_files)} papers about: {query}")
|
233
|
+
logger.info("This may take a moment...")
|
234
|
+
|
235
|
+
response = await agent_query(
|
236
|
+
query=query,
|
237
|
+
settings=settings
|
238
|
+
)
|
239
|
+
|
240
|
+
click.echo(f"Answer: {response.session.answer}" +
|
241
|
+
f"\n\nReferences: {response.session.references}")
|
242
|
+
|
243
|
+
except Exception as e:
|
244
|
+
logger.error(f"Error querying papers: {str(e)}")
|
245
|
+
|
246
|
+
loop = asyncio.get_event_loop()
|
247
|
+
try:
|
248
|
+
loop.run_until_complete(run_query())
|
249
|
+
except Exception as e:
|
250
|
+
logger.error(f"Error: {str(e)}")
|
251
|
+
|
252
|
+
|
253
|
+
@paperqa_cli.command()
|
254
|
+
@click.option(
|
255
|
+
"--directory", "-d",
|
256
|
+
required=True,
|
257
|
+
help="Paper directory containing documents",
|
258
|
+
)
|
259
|
+
def list(directory):
|
260
|
+
"""List documents in the directory and their indexing status.
|
261
|
+
|
262
|
+
This command displays all documents (PDF, TXT, HTML, MD) in the specified
|
263
|
+
directory and shows which ones have been indexed. Use this to verify that
|
264
|
+
your documents are properly recognized and indexed.
|
265
|
+
|
266
|
+
Example:
|
267
|
+
aurelian paperqa list -d ~/research/papers
|
268
|
+
"""
|
269
|
+
if not check_api_key():
|
270
|
+
return
|
271
|
+
|
272
|
+
paper_dir, settings, _ = setup_and_configure_paper_directory(directory)
|
273
|
+
|
274
|
+
docs = get_document_files(paper_dir)
|
275
|
+
|
276
|
+
logger.info(f"Documents in directory {paper_dir}:")
|
277
|
+
for doc in docs['all']:
|
278
|
+
if doc.lower().endswith('.pdf'):
|
279
|
+
logger.info(f" - {doc} [PDF]")
|
280
|
+
elif doc.lower().endswith('.txt'):
|
281
|
+
logger.info(f" - {doc} [TXT]")
|
282
|
+
elif doc.lower().endswith('.html'):
|
283
|
+
logger.info(f" - {doc} [HTML]")
|
284
|
+
elif doc.lower().endswith('.md'):
|
285
|
+
logger.info(f" - {doc} [MD]")
|
286
|
+
|
287
|
+
async def list_indexed():
|
288
|
+
try:
|
289
|
+
index = await get_directory_index(settings=settings, build=False)
|
290
|
+
index_files = await index.index_files
|
291
|
+
if index_files:
|
292
|
+
logger.info(f"Indexed papers ({len(index_files)}):")
|
293
|
+
for file in index_files:
|
294
|
+
logger.info(f" - {file}")
|
295
|
+
else:
|
296
|
+
logger.warning(f"No indexed papers found. Run 'aurelian paperqa index -d {paper_dir}' to index papers.")
|
297
|
+
except Exception as e:
|
298
|
+
logger.error(f"Error accessing index: {str(e)}")
|
299
|
+
logger.info(f"Run 'aurelian paperqa index -d {paper_dir}' to create or rebuild the index.")
|
300
|
+
|
301
|
+
loop = asyncio.get_event_loop()
|
302
|
+
try:
|
303
|
+
loop.run_until_complete(list_indexed())
|
304
|
+
except Exception as e:
|
305
|
+
logger.error(f"Error: {str(e)}")
|