aurelian 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (254) hide show
  1. aurelian/__init__.py +9 -0
  2. aurelian/agents/__init__.py +0 -0
  3. aurelian/agents/amigo/__init__.py +3 -0
  4. aurelian/agents/amigo/amigo_agent.py +77 -0
  5. aurelian/agents/amigo/amigo_config.py +85 -0
  6. aurelian/agents/amigo/amigo_evals.py +73 -0
  7. aurelian/agents/amigo/amigo_gradio.py +52 -0
  8. aurelian/agents/amigo/amigo_mcp.py +152 -0
  9. aurelian/agents/amigo/amigo_tools.py +152 -0
  10. aurelian/agents/biblio/__init__.py +42 -0
  11. aurelian/agents/biblio/biblio_agent.py +94 -0
  12. aurelian/agents/biblio/biblio_config.py +40 -0
  13. aurelian/agents/biblio/biblio_gradio.py +67 -0
  14. aurelian/agents/biblio/biblio_mcp.py +115 -0
  15. aurelian/agents/biblio/biblio_tools.py +164 -0
  16. aurelian/agents/biblio_agent.py +46 -0
  17. aurelian/agents/checklist/__init__.py +44 -0
  18. aurelian/agents/checklist/checklist_agent.py +85 -0
  19. aurelian/agents/checklist/checklist_config.py +28 -0
  20. aurelian/agents/checklist/checklist_gradio.py +70 -0
  21. aurelian/agents/checklist/checklist_mcp.py +86 -0
  22. aurelian/agents/checklist/checklist_tools.py +141 -0
  23. aurelian/agents/checklist/content/checklists.yaml +7 -0
  24. aurelian/agents/checklist/content/streams.csv +136 -0
  25. aurelian/agents/checklist_agent.py +40 -0
  26. aurelian/agents/chemistry/__init__.py +3 -0
  27. aurelian/agents/chemistry/chemistry_agent.py +46 -0
  28. aurelian/agents/chemistry/chemistry_config.py +71 -0
  29. aurelian/agents/chemistry/chemistry_evals.py +79 -0
  30. aurelian/agents/chemistry/chemistry_gradio.py +50 -0
  31. aurelian/agents/chemistry/chemistry_mcp.py +120 -0
  32. aurelian/agents/chemistry/chemistry_tools.py +121 -0
  33. aurelian/agents/chemistry/image_agent.py +15 -0
  34. aurelian/agents/d4d/__init__.py +30 -0
  35. aurelian/agents/d4d/d4d_agent.py +72 -0
  36. aurelian/agents/d4d/d4d_config.py +46 -0
  37. aurelian/agents/d4d/d4d_gradio.py +58 -0
  38. aurelian/agents/d4d/d4d_mcp.py +71 -0
  39. aurelian/agents/d4d/d4d_tools.py +157 -0
  40. aurelian/agents/d4d_agent.py +64 -0
  41. aurelian/agents/diagnosis/__init__.py +33 -0
  42. aurelian/agents/diagnosis/diagnosis_agent.py +53 -0
  43. aurelian/agents/diagnosis/diagnosis_config.py +48 -0
  44. aurelian/agents/diagnosis/diagnosis_evals.py +76 -0
  45. aurelian/agents/diagnosis/diagnosis_gradio.py +52 -0
  46. aurelian/agents/diagnosis/diagnosis_mcp.py +141 -0
  47. aurelian/agents/diagnosis/diagnosis_tools.py +204 -0
  48. aurelian/agents/diagnosis_agent.py +28 -0
  49. aurelian/agents/draw/__init__.py +3 -0
  50. aurelian/agents/draw/draw_agent.py +39 -0
  51. aurelian/agents/draw/draw_config.py +26 -0
  52. aurelian/agents/draw/draw_gradio.py +50 -0
  53. aurelian/agents/draw/draw_mcp.py +94 -0
  54. aurelian/agents/draw/draw_tools.py +100 -0
  55. aurelian/agents/draw/judge_agent.py +18 -0
  56. aurelian/agents/filesystem/__init__.py +0 -0
  57. aurelian/agents/filesystem/filesystem_config.py +27 -0
  58. aurelian/agents/filesystem/filesystem_gradio.py +49 -0
  59. aurelian/agents/filesystem/filesystem_mcp.py +89 -0
  60. aurelian/agents/filesystem/filesystem_tools.py +95 -0
  61. aurelian/agents/filesystem/py.typed +0 -0
  62. aurelian/agents/github/__init__.py +0 -0
  63. aurelian/agents/github/github_agent.py +83 -0
  64. aurelian/agents/github/github_cli.py +248 -0
  65. aurelian/agents/github/github_config.py +22 -0
  66. aurelian/agents/github/github_gradio.py +152 -0
  67. aurelian/agents/github/github_mcp.py +252 -0
  68. aurelian/agents/github/github_tools.py +408 -0
  69. aurelian/agents/github/github_tools.py.tmp +413 -0
  70. aurelian/agents/goann/__init__.py +13 -0
  71. aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines.md +1000 -0
  72. aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines.pdf +0 -0
  73. aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines_Paper.md +693 -0
  74. aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines_Paper.pdf +0 -0
  75. aurelian/agents/goann/goann_agent.py +90 -0
  76. aurelian/agents/goann/goann_config.py +90 -0
  77. aurelian/agents/goann/goann_evals.py +104 -0
  78. aurelian/agents/goann/goann_gradio.py +62 -0
  79. aurelian/agents/goann/goann_mcp.py +0 -0
  80. aurelian/agents/goann/goann_tools.py +65 -0
  81. aurelian/agents/gocam/__init__.py +43 -0
  82. aurelian/agents/gocam/documents/DNA-binding transcription factor activity annotation guidelines.docx +0 -0
  83. aurelian/agents/gocam/documents/DNA-binding transcription factor activity annotation guidelines.pdf +0 -0
  84. aurelian/agents/gocam/documents/DNA-binding_transcription_factor_activity_annotation_guidelines.md +100 -0
  85. aurelian/agents/gocam/documents/E3 ubiquitin ligases.docx +0 -0
  86. aurelian/agents/gocam/documents/E3 ubiquitin ligases.pdf +0 -0
  87. aurelian/agents/gocam/documents/E3_ubiquitin_ligases.md +134 -0
  88. aurelian/agents/gocam/documents/GO-CAM annotation guidelines README.docx +0 -0
  89. aurelian/agents/gocam/documents/GO-CAM annotation guidelines README.pdf +0 -0
  90. aurelian/agents/gocam/documents/GO-CAM modelling guidelines TO DO.docx +0 -0
  91. aurelian/agents/gocam/documents/GO-CAM modelling guidelines TO DO.pdf +0 -0
  92. aurelian/agents/gocam/documents/GO-CAM_annotation_guidelines_README.md +1 -0
  93. aurelian/agents/gocam/documents/GO-CAM_modelling_guidelines_TO_DO.md +3 -0
  94. aurelian/agents/gocam/documents/How to annotate complexes in GO-CAM.docx +0 -0
  95. aurelian/agents/gocam/documents/How to annotate complexes in GO-CAM.pdf +0 -0
  96. aurelian/agents/gocam/documents/How to annotate molecular adaptors.docx +0 -0
  97. aurelian/agents/gocam/documents/How to annotate molecular adaptors.pdf +0 -0
  98. aurelian/agents/gocam/documents/How to annotate sequestering proteins.docx +0 -0
  99. aurelian/agents/gocam/documents/How to annotate sequestering proteins.pdf +0 -0
  100. aurelian/agents/gocam/documents/How_to_annotate_complexes_in_GO-CAM.md +29 -0
  101. aurelian/agents/gocam/documents/How_to_annotate_molecular_adaptors.md +31 -0
  102. aurelian/agents/gocam/documents/How_to_annotate_sequestering_proteins.md +42 -0
  103. aurelian/agents/gocam/documents/Molecular adaptor activity.docx +0 -0
  104. aurelian/agents/gocam/documents/Molecular adaptor activity.pdf +0 -0
  105. aurelian/agents/gocam/documents/Molecular carrier activity.docx +0 -0
  106. aurelian/agents/gocam/documents/Molecular carrier activity.pdf +0 -0
  107. aurelian/agents/gocam/documents/Molecular_adaptor_activity.md +51 -0
  108. aurelian/agents/gocam/documents/Molecular_carrier_activity.md +41 -0
  109. aurelian/agents/gocam/documents/Protein sequestering activity.docx +0 -0
  110. aurelian/agents/gocam/documents/Protein sequestering activity.pdf +0 -0
  111. aurelian/agents/gocam/documents/Protein_sequestering_activity.md +50 -0
  112. aurelian/agents/gocam/documents/Signaling receptor activity annotation guidelines.docx +0 -0
  113. aurelian/agents/gocam/documents/Signaling receptor activity annotation guidelines.pdf +0 -0
  114. aurelian/agents/gocam/documents/Signaling_receptor_activity_annotation_guidelines.md +187 -0
  115. aurelian/agents/gocam/documents/Transcription coregulator activity.docx +0 -0
  116. aurelian/agents/gocam/documents/Transcription coregulator activity.pdf +0 -0
  117. aurelian/agents/gocam/documents/Transcription_coregulator_activity.md +36 -0
  118. aurelian/agents/gocam/documents/Transporter activity annotation annotation guidelines.docx +0 -0
  119. aurelian/agents/gocam/documents/Transporter activity annotation annotation guidelines.pdf +0 -0
  120. aurelian/agents/gocam/documents/Transporter_activity_annotation_annotation_guidelines.md +43 -0
  121. Regulatory Processes in GO-CAM.docx +0 -0
  122. Regulatory Processes in GO-CAM.pdf +0 -0
  123. aurelian/agents/gocam/documents/WIP_-_Regulation_and_Regulatory_Processes_in_GO-CAM.md +31 -0
  124. aurelian/agents/gocam/documents/md/DNA-binding_transcription_factor_activity_annotation_guidelines.md +131 -0
  125. aurelian/agents/gocam/documents/md/E3_ubiquitin_ligases.md +166 -0
  126. aurelian/agents/gocam/documents/md/GO-CAM_annotation_guidelines_README.md +1 -0
  127. aurelian/agents/gocam/documents/md/GO-CAM_modelling_guidelines_TO_DO.md +5 -0
  128. aurelian/agents/gocam/documents/md/How_to_annotate_complexes_in_GO-CAM.md +28 -0
  129. aurelian/agents/gocam/documents/md/How_to_annotate_molecular_adaptors.md +19 -0
  130. aurelian/agents/gocam/documents/md/How_to_annotate_sequestering_proteins.md +38 -0
  131. aurelian/agents/gocam/documents/md/Molecular_adaptor_activity.md +52 -0
  132. aurelian/agents/gocam/documents/md/Molecular_carrier_activity.md +59 -0
  133. aurelian/agents/gocam/documents/md/Protein_sequestering_activity.md +52 -0
  134. aurelian/agents/gocam/documents/md/Signaling_receptor_activity_annotation_guidelines.md +271 -0
  135. aurelian/agents/gocam/documents/md/Transcription_coregulator_activity.md +54 -0
  136. aurelian/agents/gocam/documents/md/Transporter_activity_annotation_annotation_guidelines.md +38 -0
  137. aurelian/agents/gocam/documents/md/WIP_-_Regulation_and_Regulatory_Processes_in_GO-CAM.md +39 -0
  138. aurelian/agents/gocam/documents/pandoc_md/Signaling_receptor_activity_annotation_guidelines.md +334 -0
  139. aurelian/agents/gocam/gocam_agent.py +240 -0
  140. aurelian/agents/gocam/gocam_config.py +85 -0
  141. aurelian/agents/gocam/gocam_curator_agent.py +46 -0
  142. aurelian/agents/gocam/gocam_evals.py +67 -0
  143. aurelian/agents/gocam/gocam_gradio.py +89 -0
  144. aurelian/agents/gocam/gocam_mcp.py +224 -0
  145. aurelian/agents/gocam/gocam_tools.py +294 -0
  146. aurelian/agents/linkml/__init__.py +0 -0
  147. aurelian/agents/linkml/linkml_agent.py +62 -0
  148. aurelian/agents/linkml/linkml_config.py +48 -0
  149. aurelian/agents/linkml/linkml_evals.py +66 -0
  150. aurelian/agents/linkml/linkml_gradio.py +45 -0
  151. aurelian/agents/linkml/linkml_mcp.py +186 -0
  152. aurelian/agents/linkml/linkml_tools.py +102 -0
  153. aurelian/agents/literature/__init__.py +3 -0
  154. aurelian/agents/literature/literature_agent.py +55 -0
  155. aurelian/agents/literature/literature_config.py +35 -0
  156. aurelian/agents/literature/literature_gradio.py +52 -0
  157. aurelian/agents/literature/literature_mcp.py +174 -0
  158. aurelian/agents/literature/literature_tools.py +182 -0
  159. aurelian/agents/monarch/__init__.py +25 -0
  160. aurelian/agents/monarch/monarch_agent.py +44 -0
  161. aurelian/agents/monarch/monarch_config.py +45 -0
  162. aurelian/agents/monarch/monarch_gradio.py +51 -0
  163. aurelian/agents/monarch/monarch_mcp.py +65 -0
  164. aurelian/agents/monarch/monarch_tools.py +113 -0
  165. aurelian/agents/oak/__init__.py +0 -0
  166. aurelian/agents/oak/oak_config.py +27 -0
  167. aurelian/agents/oak/oak_gradio.py +57 -0
  168. aurelian/agents/ontology_mapper/__init__.py +31 -0
  169. aurelian/agents/ontology_mapper/ontology_mapper_agent.py +56 -0
  170. aurelian/agents/ontology_mapper/ontology_mapper_config.py +50 -0
  171. aurelian/agents/ontology_mapper/ontology_mapper_evals.py +108 -0
  172. aurelian/agents/ontology_mapper/ontology_mapper_gradio.py +58 -0
  173. aurelian/agents/ontology_mapper/ontology_mapper_mcp.py +81 -0
  174. aurelian/agents/ontology_mapper/ontology_mapper_tools.py +147 -0
  175. aurelian/agents/phenopackets/__init__.py +3 -0
  176. aurelian/agents/phenopackets/phenopackets_agent.py +58 -0
  177. aurelian/agents/phenopackets/phenopackets_config.py +72 -0
  178. aurelian/agents/phenopackets/phenopackets_evals.py +99 -0
  179. aurelian/agents/phenopackets/phenopackets_gradio.py +55 -0
  180. aurelian/agents/phenopackets/phenopackets_mcp.py +178 -0
  181. aurelian/agents/phenopackets/phenopackets_tools.py +127 -0
  182. aurelian/agents/rag/__init__.py +40 -0
  183. aurelian/agents/rag/rag_agent.py +83 -0
  184. aurelian/agents/rag/rag_config.py +80 -0
  185. aurelian/agents/rag/rag_gradio.py +67 -0
  186. aurelian/agents/rag/rag_mcp.py +107 -0
  187. aurelian/agents/rag/rag_tools.py +189 -0
  188. aurelian/agents/rag_agent.py +54 -0
  189. aurelian/agents/robot/__init__.py +0 -0
  190. aurelian/agents/robot/assets/__init__.py +3 -0
  191. aurelian/agents/robot/assets/template.md +384 -0
  192. aurelian/agents/robot/robot_config.py +25 -0
  193. aurelian/agents/robot/robot_gradio.py +46 -0
  194. aurelian/agents/robot/robot_mcp.py +100 -0
  195. aurelian/agents/robot/robot_ontology_agent.py +139 -0
  196. aurelian/agents/robot/robot_tools.py +50 -0
  197. aurelian/agents/talisman/__init__.py +3 -0
  198. aurelian/agents/talisman/talisman_agent.py +126 -0
  199. aurelian/agents/talisman/talisman_config.py +66 -0
  200. aurelian/agents/talisman/talisman_gradio.py +50 -0
  201. aurelian/agents/talisman/talisman_mcp.py +168 -0
  202. aurelian/agents/talisman/talisman_tools.py +720 -0
  203. aurelian/agents/ubergraph/__init__.py +40 -0
  204. aurelian/agents/ubergraph/ubergraph_agent.py +71 -0
  205. aurelian/agents/ubergraph/ubergraph_config.py +79 -0
  206. aurelian/agents/ubergraph/ubergraph_gradio.py +48 -0
  207. aurelian/agents/ubergraph/ubergraph_mcp.py +69 -0
  208. aurelian/agents/ubergraph/ubergraph_tools.py +118 -0
  209. aurelian/agents/uniprot/__init__.py +37 -0
  210. aurelian/agents/uniprot/uniprot_agent.py +43 -0
  211. aurelian/agents/uniprot/uniprot_config.py +43 -0
  212. aurelian/agents/uniprot/uniprot_evals.py +99 -0
  213. aurelian/agents/uniprot/uniprot_gradio.py +48 -0
  214. aurelian/agents/uniprot/uniprot_mcp.py +168 -0
  215. aurelian/agents/uniprot/uniprot_tools.py +136 -0
  216. aurelian/agents/web/__init__.py +0 -0
  217. aurelian/agents/web/web_config.py +27 -0
  218. aurelian/agents/web/web_gradio.py +48 -0
  219. aurelian/agents/web/web_mcp.py +50 -0
  220. aurelian/agents/web/web_tools.py +108 -0
  221. aurelian/chat.py +23 -0
  222. aurelian/cli.py +800 -0
  223. aurelian/dependencies/__init__.py +0 -0
  224. aurelian/dependencies/workdir.py +78 -0
  225. aurelian/mcp/__init__.py +0 -0
  226. aurelian/mcp/amigo_mcp_test.py +86 -0
  227. aurelian/mcp/config_generator.py +123 -0
  228. aurelian/mcp/example_config.json +43 -0
  229. aurelian/mcp/generate_sample_config.py +37 -0
  230. aurelian/mcp/gocam_mcp_test.py +126 -0
  231. aurelian/mcp/linkml_mcp_tools.py +190 -0
  232. aurelian/mcp/mcp_discovery.py +87 -0
  233. aurelian/mcp/mcp_test.py +31 -0
  234. aurelian/mcp/phenopackets_mcp_test.py +103 -0
  235. aurelian/tools/__init__.py +0 -0
  236. aurelian/tools/web/__init__.py +0 -0
  237. aurelian/tools/web/url_download.py +51 -0
  238. aurelian/utils/__init__.py +0 -0
  239. aurelian/utils/async_utils.py +15 -0
  240. aurelian/utils/data_utils.py +32 -0
  241. aurelian/utils/documentation_manager.py +59 -0
  242. aurelian/utils/doi_fetcher.py +238 -0
  243. aurelian/utils/ontology_utils.py +68 -0
  244. aurelian/utils/pdf_fetcher.py +23 -0
  245. aurelian/utils/process_logs.py +100 -0
  246. aurelian/utils/pubmed_utils.py +238 -0
  247. aurelian/utils/pytest_report_to_markdown.py +67 -0
  248. aurelian/utils/robot_ontology_utils.py +112 -0
  249. aurelian/utils/search_utils.py +95 -0
  250. aurelian-0.3.2.dist-info/LICENSE +22 -0
  251. aurelian-0.3.2.dist-info/METADATA +105 -0
  252. aurelian-0.3.2.dist-info/RECORD +254 -0
  253. aurelian-0.3.2.dist-info/WHEEL +4 -0
  254. aurelian-0.3.2.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,71 @@
1
+ """
2
+ MCP tools for the D4D (Datasheets for Datasets) agent.
3
+ """
4
+ import os
5
+ from typing import Optional
6
+
7
+ from mcp.server.fastmcp import FastMCP
8
+
9
+ from aurelian.agents.d4d.d4d_agent import data_sheets_agent
10
+ from aurelian.agents.d4d.d4d_config import D4DConfig
11
+ import aurelian.agents.d4d.d4d_tools as dt
12
+ from pydantic_ai import RunContext
13
+
14
+ # Initialize FastMCP server
15
+ mcp = FastMCP("d4d", instructions="Datasheets for Datasets (D4D) agent")
16
+
17
+
18
+ from aurelian.dependencies.workdir import WorkDir
19
+
20
+ def deps() -> D4DConfig:
21
+ deps = D4DConfig()
22
+ # Set the location from environment variable or default
23
+ loc = os.getenv("AURELIAN_WORKDIR", "/tmp/aurelian")
24
+ deps.workdir = WorkDir(loc)
25
+ return deps
26
+
27
+ def ctx() -> RunContext[D4DConfig]:
28
+ rc: RunContext[D4DConfig] = RunContext[D4DConfig](
29
+ deps=deps(),
30
+ model=None, usage=None, prompt=None,
31
+ )
32
+ return rc
33
+
34
+
35
+ @mcp.system_prompt
36
+ async def add_schema() -> str:
37
+ """Add the full schema to the system prompt."""
38
+ return await dt.get_full_schema(ctx())
39
+
40
+
41
+ @mcp.tool()
42
+ async def get_full_schema(url: Optional[str] = None) -> str:
43
+ """
44
+ Load the full datasheets for datasets schema from GitHub.
45
+
46
+ Args:
47
+ url: Optional URL override for the schema location
48
+
49
+ Returns:
50
+ The schema text content
51
+ """
52
+ return await dt.get_full_schema(ctx(), url)
53
+
54
+
55
+ @mcp.tool()
56
+ async def process_website_or_pdf(url: str) -> str:
57
+ """
58
+ Process a website or PDF with dataset information.
59
+
60
+ Args:
61
+ url: URL to a website or PDF file with dataset information
62
+
63
+ Returns:
64
+ YAML formatted dataset metadata following the D4D schema
65
+ """
66
+ return await dt.process_website_or_pdf(ctx(), url)
67
+
68
+
69
+ if __name__ == "__main__":
70
+ # Initialize and run the server
71
+ mcp.run(transport='stdio')
@@ -0,0 +1,157 @@
1
+ """
2
+ Tools for the D4D (Datasheets for Datasets) agent.
3
+ """
4
+ import asyncio
5
+ import tempfile
6
+ from typing import Optional
7
+
8
+ import requests
9
+ from pdfminer.high_level import extract_text
10
+ from pydantic_ai import RunContext, ModelRetry
11
+
12
+ from aurelian.utils.search_utils import retrieve_web_page as fetch_web_page
13
+ from .d4d_config import D4DConfig
14
+
15
+
16
+ async def get_full_schema(
17
+ ctx: RunContext[D4DConfig],
18
+ url: Optional[str] = None
19
+ ) -> str:
20
+ """
21
+ Load the full datasheets for datasets schema from GitHub.
22
+
23
+ Args:
24
+ ctx: The run context
25
+ url: Optional URL override for the schema location
26
+
27
+ Returns:
28
+ The schema text content
29
+ """
30
+ try:
31
+ schema_url = url or ctx.deps.schema_url
32
+
33
+ # Execute the potentially blocking operation in a thread pool
34
+ def _fetch_schema():
35
+ response = requests.get(schema_url)
36
+ if response.status_code == 200:
37
+ return response.text
38
+ else:
39
+ raise Exception(f"Failed to load schema: HTTP {response.status_code}")
40
+
41
+ schema_text = await asyncio.to_thread(_fetch_schema)
42
+
43
+ if not schema_text or schema_text.strip() == "":
44
+ raise ModelRetry(f"Empty schema returned from URL: {schema_url}")
45
+
46
+ return schema_text
47
+ except Exception as e:
48
+ if "ModelRetry" in str(type(e)):
49
+ raise e
50
+ raise ModelRetry(f"Error loading schema: {str(e)}")
51
+
52
+
53
+ async def extract_text_from_pdf(
54
+ ctx: RunContext[D4DConfig],
55
+ pdf_url: str
56
+ ) -> str:
57
+ """
58
+ Download and extract text from a PDF given its URL.
59
+
60
+ Args:
61
+ ctx: The run context
62
+ pdf_url: The URL of the PDF to extract text from
63
+
64
+ Returns:
65
+ The extracted text content
66
+ """
67
+ try:
68
+ # Execute the potentially blocking operation in a thread pool
69
+ def _extract_pdf():
70
+ response = requests.get(pdf_url)
71
+ if response.status_code != 200:
72
+ raise Exception(f"Failed to retrieve PDF: HTTP {response.status_code}")
73
+
74
+ with tempfile.NamedTemporaryFile(suffix=".pdf", delete=True) as temp_pdf:
75
+ temp_pdf.write(response.content)
76
+ temp_pdf.flush() # Ensure all data is written before reading
77
+
78
+ text = extract_text(temp_pdf.name)
79
+ if not text or text.strip() == "":
80
+ raise Exception("No text extracted from PDF")
81
+
82
+ return text.strip()
83
+
84
+ pdf_text = await asyncio.to_thread(_extract_pdf)
85
+ return pdf_text
86
+ except Exception as e:
87
+ if "ModelRetry" in str(type(e)):
88
+ raise e
89
+ raise ModelRetry(f"Error extracting PDF text: {str(e)}")
90
+
91
+
92
+ async def retrieve_web_page(
93
+ ctx: RunContext[D4DConfig],
94
+ url: str
95
+ ) -> str:
96
+ """
97
+ Retrieve the content of a web page.
98
+
99
+ Args:
100
+ ctx: The run context
101
+ url: The URL of the web page to retrieve
102
+
103
+ Returns:
104
+ The web page content
105
+ """
106
+ try:
107
+ # Execute the potentially blocking operation in a thread pool
108
+ content = await asyncio.to_thread(fetch_web_page, url)
109
+
110
+ if not content or content.strip() == "":
111
+ raise ModelRetry(f"No content found for URL: {url}")
112
+
113
+ return content
114
+ except Exception as e:
115
+ if "ModelRetry" in str(type(e)):
116
+ raise e
117
+ raise ModelRetry(f"Error retrieving web page: {str(e)}")
118
+
119
+
120
+ async def process_website_or_pdf(
121
+ ctx: RunContext[D4DConfig],
122
+ url: str
123
+ ) -> str:
124
+ """
125
+ Determine if the URL is a PDF or webpage, retrieve the content.
126
+
127
+ Args:
128
+ ctx: The run context
129
+ url: The URL of the content to process
130
+
131
+ Returns:
132
+ The extracted content from the PDF or web page
133
+ """
134
+ try:
135
+ # Check if it's a PDF by extension or content type
136
+ is_pdf = False
137
+
138
+ if url.lower().endswith(".pdf"):
139
+ is_pdf = True
140
+ else:
141
+ # Check the content type in case the file doesn't have a .pdf extension
142
+ def _check_content_type():
143
+ response = requests.head(url)
144
+ content_type = response.headers.get("Content-Type", "").lower()
145
+ return "pdf" in content_type
146
+
147
+ is_pdf = await asyncio.to_thread(_check_content_type)
148
+
149
+ # Retrieve the content based on the type
150
+ if is_pdf:
151
+ return await extract_text_from_pdf(ctx, url)
152
+ else:
153
+ return await retrieve_web_page(ctx, url)
154
+ except Exception as e:
155
+ if "ModelRetry" in str(type(e)):
156
+ raise e
157
+ raise ModelRetry(f"Error processing URL: {str(e)}")
@@ -0,0 +1,64 @@
1
+ """
2
+ Agent for extracting dataset metadata following the datasheets for datasets schema.
3
+
4
+ This module re-exports components from the d4d/ package for backward compatibility.
5
+ """
6
+ import asyncio
7
+
8
+ # Re-export from d4d package
9
+ from aurelian.agents.d4d import (
10
+ data_sheets_agent,
11
+ D4DConfig,
12
+ get_config,
13
+ get_full_schema,
14
+ process_website_or_pdf,
15
+ extract_text_from_pdf,
16
+ chat,
17
+ )
18
+
19
+ # Provide the original synchronous functions for backward compatibility
20
+ def get_full_schema_sync(url=None):
21
+ """Legacy synchronous version of get_full_schema"""
22
+ config = get_config()
23
+ ctx = data_sheets_agent._get_run_context(deps=config)
24
+ return asyncio.run(get_full_schema(ctx, url))
25
+
26
+
27
+ FULL_SCHEMA = get_full_schema_sync()
28
+
29
+
30
+ def safe_run(prompt: str):
31
+ """
32
+ Ensure an event loop is available and then call the agent's synchronous method.
33
+ """
34
+ try:
35
+ loop = asyncio.get_running_loop()
36
+ except RuntimeError:
37
+ loop = asyncio.new_event_loop()
38
+ asyncio.set_event_loop(loop)
39
+ return data_sheets_agent.run_sync(prompt)
40
+
41
+
42
+ def process_website_or_pdf_sync(url: str) -> str:
43
+ """
44
+ Legacy synchronous version of process_website_or_pdf
45
+ """
46
+ config = get_config()
47
+ ctx = data_sheets_agent._get_run_context(deps=config)
48
+
49
+ # Get the content
50
+ page_content = asyncio.run(process_website_or_pdf(ctx, url))
51
+
52
+ # Format the prompt
53
+ prompt = f"""
54
+ The following is the content of a document describing a dataset:
55
+
56
+ {page_content}
57
+
58
+ Using the complete datasheets for datasets schema provided above, extract all the metadata from the document and generate a YAML document that exactly conforms to that schema. Ensure that all required fields are present and the output is valid YAML. The dataset URL is: {url}
59
+
60
+ Generate only the YAML document.
61
+ """
62
+ # Run the agent with the prompt
63
+ result = safe_run(prompt)
64
+ return result.data
@@ -0,0 +1,33 @@
1
+ """
2
+ Diagnosis agent package for diagnosing rare diseases using the Monarch Knowledge Base.
3
+ """
4
+
5
+ from .diagnosis_agent import diagnosis_agent
6
+ from .diagnosis_config import DiagnosisDependencies, get_config
7
+ from .diagnosis_gradio import chat
8
+ from .diagnosis_tools import (
9
+ find_disease_id,
10
+ find_disease_phenotypes,
11
+ search_web,
12
+ retrieve_web_page,
13
+ get_mondo_adapter,
14
+ )
15
+
16
+ __all__ = [
17
+ # Agent
18
+ "diagnosis_agent",
19
+
20
+ # Config
21
+ "DiagnosisDependencies",
22
+ "get_config",
23
+
24
+ # Tools
25
+ "find_disease_id",
26
+ "find_disease_phenotypes",
27
+ "search_web",
28
+ "retrieve_web_page",
29
+ "get_mondo_adapter",
30
+
31
+ # Gradio
32
+ "chat",
33
+ ]
@@ -0,0 +1,53 @@
1
+ """
2
+ Agent for performing diagnoses, validated against Monarch KG.
3
+ """
4
+ from pydantic_ai import Agent
5
+
6
+ from .diagnosis_config import DiagnosisDependencies, get_config
7
+ from .diagnosis_tools import (
8
+ find_disease_id,
9
+ find_disease_phenotypes,
10
+ search_web,
11
+ retrieve_web_page,
12
+ )
13
+
14
+ # System prompt for the diagnosis agent
15
+ DIAGNOSIS_SYSTEM_PROMPT = (
16
+ "You are an expert clinical geneticist."
17
+ " Your task is to assist in diagnosing rare diseases,"
18
+ " and with determining underlying gene or variant."
19
+ " The recommended workflow is to first think of a set of candidate diseases."
20
+ " You should show your reasoning, and your candidate list (as many as appropriate)."
21
+ " You should then check your hypotheses against the Monarch knowledge base."
22
+ " You can find the Mondo ID of the disease using the `find_disease_id` function."
23
+ " You should then query the Monarch knowledge base to get a list of phenotypes for that"
24
+ " disease id, using the `find_disease_phenotypes` function."
25
+ " Present results in detail, using markdown tables unless otherwise specified."
26
+ " Try and account for all presented patient phenotypes in the table (you can"
27
+ " roll up similar phenotypes to broader categories)."
28
+ " also try and account for hallmark features of the disease not found in the patient,"
29
+ " always showing your reasoning."
30
+ " If you get something from a web search, tell me the web page."
31
+ " If you get something from the knowledge base, give provenance."
32
+ " Again, using information from the knowledge base."
33
+ " Give detailed provenance chains in <details> tags."
34
+ " Show ontology term IDs together with labels whenever possible."
35
+ " Include HPO IDs which you will get from the `find_disease_phenotypes` function"
36
+ " (never guess these, always get from the query results)."
37
+ " Stick to markdown, and all prefixed IDs should by hyperlinked with Bioregistry,"
38
+ " i.e https://bioregistry.io/{curie}."
39
+ )
40
+
41
+ # Create the diagnosis agent
42
+ diagnosis_agent = Agent(
43
+ model="openai:gpt-4o",
44
+ deps_type=DiagnosisDependencies,
45
+ result_type=str,
46
+ system_prompt=DIAGNOSIS_SYSTEM_PROMPT,
47
+ )
48
+
49
+ # Register tools
50
+ diagnosis_agent.tool(find_disease_id)
51
+ diagnosis_agent.tool(find_disease_phenotypes)
52
+ diagnosis_agent.tool_plain(search_web)
53
+ diagnosis_agent.tool_plain(retrieve_web_page)
@@ -0,0 +1,48 @@
1
+ """
2
+ Configuration for the Diagnosis agent.
3
+ """
4
+ from dataclasses import dataclass
5
+ import os
6
+ from typing import Optional
7
+
8
+ from oaklib.implementations import MonarchImplementation
9
+ from aurelian.dependencies.workdir import HasWorkdir, WorkDir
10
+
11
+ # Constants
12
+ HAS_PHENOTYPE = "biolink:has_phenotype"
13
+
14
+
15
+ @dataclass
16
+ class DiagnosisDependencies(HasWorkdir):
17
+ """Configuration for the Diagnosis agent."""
18
+
19
+ # Maximum number of search results to return
20
+ max_search_results: int = 10
21
+
22
+ # Monarch adapter
23
+ monarch_adapter: Optional[MonarchImplementation] = None
24
+
25
+ def __post_init__(self):
26
+ """Initialize the config with default values."""
27
+ # HasWorkdir doesn't have a __post_init__ method, so we don't call super()
28
+ if self.workdir is None:
29
+ self.workdir = WorkDir()
30
+
31
+ # Initialize Monarch adapter if not provided
32
+ if self.monarch_adapter is None:
33
+ self.monarch_adapter = MonarchImplementation()
34
+
35
+
36
+ def get_config() -> DiagnosisDependencies:
37
+ """Get the Diagnosis configuration from environment variables or defaults."""
38
+ workdir_path = os.environ.get("AURELIAN_WORKDIR", None)
39
+ workdir = WorkDir(location=workdir_path) if workdir_path else None
40
+
41
+ # Get max search results from environment if available
42
+ max_results_env = os.environ.get("MAX_SEARCH_RESULTS")
43
+ max_results = int(max_results_env) if max_results_env and max_results_env.isdigit() else 10
44
+
45
+ return DiagnosisDependencies(
46
+ workdir=workdir,
47
+ max_search_results=max_results,
48
+ )
@@ -0,0 +1,76 @@
1
+ """
2
+ Evaluation module for the Diagnosis agent.
3
+
4
+ This module implements evaluations for the Diagnosis agent using the pydantic-ai-evals framework.
5
+ """
6
+ import asyncio
7
+ import sys
8
+ from typing import Optional, Any, Dict, Callable, Awaitable
9
+
10
+ from aurelian.evaluators.model import MetadataDict, metadata
11
+ from aurelian.evaluators.substring_evaluator import SubstringEvaluator
12
+ from pydantic_evals import Case, Dataset
13
+
14
+ from aurelian.agents.diagnosis.diagnosis_agent import diagnosis_agent
15
+ from aurelian.agents.diagnosis.diagnosis_config import DiagnosisDependencies, get_config
16
+
17
+ class DiagnosisMetadata(Dict[str, Any]):
18
+ """Simple metadata dictionary for Diagnosis evaluations."""
19
+ pass
20
+
21
+ # Define individual evaluation cases
22
+ case1 = Case(
23
+ name="hutchinson_gilford_progeria",
24
+ inputs="""Patient has growth failure, distinct facial features, alopecia, and skin aging.
25
+ Findings excluded: Pigmented nevi, cafe-au-lait spots, and photosensitivity.
26
+ Onset was in infancy.
27
+ Return diagnosis with MONDO ID""",
28
+ expected_output="MONDO:0010135", # Hutchinson-Gilford Progeria Syndrome
29
+ metadata=metadata("hard", "diagnosis")
30
+ )
31
+
32
+ case2 = Case(
33
+ name="marfan_eye_phenotypes",
34
+ inputs="What eye phenotypes does Marfan syndrome have?",
35
+ expected_output="lens", # Should mention lens dislocation/ectopia lentis
36
+ metadata=metadata("medium", "phenotype_retrieval")
37
+ )
38
+
39
+ case3 = Case(
40
+ name="eds_type1_id",
41
+ inputs="What is the ID for Ehlers-Danlos syndrome type 1?",
42
+ expected_output="MONDO:0007947", # EDS classic type 1
43
+ metadata=metadata("easy", "id_retrieval")
44
+ )
45
+
46
+ case4 = Case(
47
+ name="eds_types",
48
+ inputs="What are the kinds of Ehlers-Danlos syndrome?",
49
+ expected_output="hypermobility", # Should mention hypermobility type
50
+ metadata=metadata("medium", "classification")
51
+ )
52
+
53
+ case5 = Case(
54
+ name="eds_literature_search",
55
+ inputs="Look at phenotypes for Ehlers-Danlos classic type 2. Do a literature search to look at latest studies. What is missing from the KB?",
56
+ expected_output="study", # Should reference studies
57
+ metadata=metadata("hard", "literature_analysis")
58
+ )
59
+
60
+ def create_eval_dataset() -> Dataset[str, str, MetadataDict]:
61
+ """
62
+ Create a dataset for evaluating the Diagnosis agent.
63
+
64
+ Returns:
65
+ Dataset of Diagnosis evaluation cases with appropriate evaluators
66
+ """
67
+ # Collect all cases
68
+ cases = [case1, case2, case3, case4, case5]
69
+
70
+ # Dataset-level evaluators
71
+ evaluators = [SubstringEvaluator()]
72
+
73
+ return Dataset(
74
+ cases=cases,
75
+ evaluators=evaluators
76
+ )
@@ -0,0 +1,52 @@
1
+ """
2
+ Gradio interface for the Diagnosis agent.
3
+ """
4
+ import os
5
+ from typing import List, Optional
6
+
7
+ import gradio as gr
8
+
9
+ from aurelian.utils.async_utils import run_sync
10
+ from .diagnosis_agent import diagnosis_agent
11
+ from .diagnosis_config import DiagnosisDependencies, get_config
12
+
13
+
14
+ def chat(deps: Optional[DiagnosisDependencies] = None, **kwargs):
15
+ """
16
+ Initialize a chat interface for the Diagnosis agent.
17
+
18
+ Args:
19
+ deps: Optional dependencies configuration
20
+ **kwargs: Additional arguments to pass to the agent
21
+
22
+ Returns:
23
+ A Gradio chat interface
24
+ """
25
+ if deps is None:
26
+ deps = get_config()
27
+
28
+ def get_info(query: str, history: List[str]) -> str:
29
+ print(f"QUERY: {query}")
30
+ print(f"HISTORY: {history}")
31
+ if history:
32
+ query += "## History"
33
+ for h in history:
34
+ query += f"\n{h}"
35
+ result = run_sync(lambda: diagnosis_agent.run_sync(query, deps=deps, **kwargs))
36
+ return result.data
37
+
38
+ return gr.ChatInterface(
39
+ fn=get_info,
40
+ type="messages",
41
+ title="Diagnosis AI Assistant",
42
+ examples=[
43
+ """Patient has growth failure, distinct facial features, alopecia, and skin aging.
44
+ Findings excluded: Pigmented nevi, cafe-au-lait spots, and photosensitivity.
45
+ Onset was in infancy.
46
+ Return diagnosis with MONDO ID""",
47
+ "What eye phenotypes does Marfan syndrome have?",
48
+ "What is the ID for Ehlers-Danlos syndrome type 1?",
49
+ "What are the kinds of Ehlers-Danlos syndrome?",
50
+ "Look at phenotypes for Ehlers-Danlos classic type 2. Do a literature search to look at latest studies. What is missing from the KB?",
51
+ ],
52
+ )
@@ -0,0 +1,141 @@
1
+ #!/usr/bin/env python
2
+ """
3
+ MCP tools for performing diagnoses, validated against Monarch KG.
4
+ """
5
+ import os
6
+
7
+ from mcp.server.fastmcp import FastMCP
8
+
9
+ import aurelian.agents.filesystem.filesystem_tools as fst
10
+ from aurelian.agents.diagnosis.diagnosis_agent import DIAGNOSIS_SYSTEM_PROMPT
11
+ from aurelian.agents.diagnosis.diagnosis_config import DiagnosisDependencies, get_config
12
+ from aurelian.agents.diagnosis.diagnosis_tools import (
13
+ find_disease_id,
14
+ find_disease_phenotypes,
15
+ )
16
+ from aurelian.utils.search_utils import web_search, retrieve_web_page as fetch_web_page
17
+ from aurelian.dependencies.workdir import WorkDir
18
+
19
+ # Initialize FastMCP server
20
+ mcp = FastMCP("diagnosis", instructions=DIAGNOSIS_SYSTEM_PROMPT)
21
+
22
+ def deps() -> DiagnosisDependencies:
23
+ """Get diagnosis dependencies with workdir from environment."""
24
+ deps = DiagnosisDependencies()
25
+ loc = os.getenv("AURELIAN_WORKDIR", "/tmp/diagnosis")
26
+ deps.workdir = WorkDir(loc)
27
+ return deps
28
+
29
+ @mcp.tool()
30
+ async def search_disease(query: str) -> list:
31
+ """
32
+ Find diseases matching a search query.
33
+
34
+ Args:
35
+ query: The search term or expression to find diseases
36
+
37
+ Returns:
38
+ List of matching disease IDs and labels
39
+ """
40
+ return await find_disease_id(deps(), query)
41
+
42
+ @mcp.tool()
43
+ async def get_disease_phenotypes(disease_id: str) -> list:
44
+ """
45
+ Get phenotypes associated with a disease.
46
+
47
+ Args:
48
+ disease_id: The disease ID (e.g., "MONDO:0007947") or label
49
+
50
+ Returns:
51
+ List of phenotype associations for the disease
52
+ """
53
+ return await find_disease_phenotypes(deps(), disease_id)
54
+
55
+ @mcp.tool()
56
+ async def search_web(query: str) -> str:
57
+ """
58
+ Search the web using a text query.
59
+
60
+ Note: This will not retrieve the full content. For that, use `retrieve_web_page`.
61
+
62
+ Args:
63
+ query: The search query
64
+
65
+ Returns:
66
+ Matching web pages plus summaries
67
+ """
68
+ return web_search(query)
69
+
70
+ @mcp.tool()
71
+ async def retrieve_web_page(url: str) -> str:
72
+ """
73
+ Fetch the contents of a web page.
74
+
75
+ Args:
76
+ url: The URL of the web page to retrieve
77
+
78
+ Returns:
79
+ The contents of the web page
80
+ """
81
+ return fetch_web_page(url)
82
+
83
+ @mcp.tool()
84
+ async def inspect_file(file_name: str) -> str:
85
+ """
86
+ Inspect a file in the working directory.
87
+
88
+ Args:
89
+ file_name: name of file to inspect
90
+
91
+ Returns:
92
+ File contents as string
93
+ """
94
+ return await fst.inspect_file(deps(), file_name)
95
+
96
+ @mcp.tool()
97
+ async def list_files() -> str:
98
+ """
99
+ List files in the working directory.
100
+
101
+ Returns:
102
+ Newline-separated list of file names
103
+ """
104
+ return "\n".join(deps().workdir.list_file_names())
105
+
106
+ @mcp.tool()
107
+ async def write_to_file(data: str, file_name: str) -> str:
108
+ """
109
+ Write data to a file in the working directory.
110
+
111
+ Args:
112
+ data: Content to write
113
+ file_name: Target file name
114
+
115
+ Returns:
116
+ Confirmation message
117
+ """
118
+ print(f"Writing data to file: {file_name}")
119
+ deps().workdir.write_file(file_name, data)
120
+ return f"Data written to {file_name}"
121
+
122
+ @mcp.tool()
123
+ async def download_web_page(url: str, local_file_name: str) -> str:
124
+ """
125
+ Download contents of a web page to a local file.
126
+
127
+ Args:
128
+ url: URL of the web page
129
+ local_file_name: Name of the local file to save to
130
+
131
+ Returns:
132
+ Confirmation message
133
+ """
134
+ print(f"Fetch URL: {url}")
135
+ data = fetch_web_page(url)
136
+ deps().workdir.write_file(local_file_name, data)
137
+ return f"Data written to {local_file_name}"
138
+
139
+ if __name__ == "__main__":
140
+ # Initialize and run the server
141
+ mcp.run(transport='stdio')