aurelian 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (254) hide show
  1. aurelian/__init__.py +9 -0
  2. aurelian/agents/__init__.py +0 -0
  3. aurelian/agents/amigo/__init__.py +3 -0
  4. aurelian/agents/amigo/amigo_agent.py +77 -0
  5. aurelian/agents/amigo/amigo_config.py +85 -0
  6. aurelian/agents/amigo/amigo_evals.py +73 -0
  7. aurelian/agents/amigo/amigo_gradio.py +52 -0
  8. aurelian/agents/amigo/amigo_mcp.py +152 -0
  9. aurelian/agents/amigo/amigo_tools.py +152 -0
  10. aurelian/agents/biblio/__init__.py +42 -0
  11. aurelian/agents/biblio/biblio_agent.py +94 -0
  12. aurelian/agents/biblio/biblio_config.py +40 -0
  13. aurelian/agents/biblio/biblio_gradio.py +67 -0
  14. aurelian/agents/biblio/biblio_mcp.py +115 -0
  15. aurelian/agents/biblio/biblio_tools.py +164 -0
  16. aurelian/agents/biblio_agent.py +46 -0
  17. aurelian/agents/checklist/__init__.py +44 -0
  18. aurelian/agents/checklist/checklist_agent.py +85 -0
  19. aurelian/agents/checklist/checklist_config.py +28 -0
  20. aurelian/agents/checklist/checklist_gradio.py +70 -0
  21. aurelian/agents/checklist/checklist_mcp.py +86 -0
  22. aurelian/agents/checklist/checklist_tools.py +141 -0
  23. aurelian/agents/checklist/content/checklists.yaml +7 -0
  24. aurelian/agents/checklist/content/streams.csv +136 -0
  25. aurelian/agents/checklist_agent.py +40 -0
  26. aurelian/agents/chemistry/__init__.py +3 -0
  27. aurelian/agents/chemistry/chemistry_agent.py +46 -0
  28. aurelian/agents/chemistry/chemistry_config.py +71 -0
  29. aurelian/agents/chemistry/chemistry_evals.py +79 -0
  30. aurelian/agents/chemistry/chemistry_gradio.py +50 -0
  31. aurelian/agents/chemistry/chemistry_mcp.py +120 -0
  32. aurelian/agents/chemistry/chemistry_tools.py +121 -0
  33. aurelian/agents/chemistry/image_agent.py +15 -0
  34. aurelian/agents/d4d/__init__.py +30 -0
  35. aurelian/agents/d4d/d4d_agent.py +72 -0
  36. aurelian/agents/d4d/d4d_config.py +46 -0
  37. aurelian/agents/d4d/d4d_gradio.py +58 -0
  38. aurelian/agents/d4d/d4d_mcp.py +71 -0
  39. aurelian/agents/d4d/d4d_tools.py +157 -0
  40. aurelian/agents/d4d_agent.py +64 -0
  41. aurelian/agents/diagnosis/__init__.py +33 -0
  42. aurelian/agents/diagnosis/diagnosis_agent.py +53 -0
  43. aurelian/agents/diagnosis/diagnosis_config.py +48 -0
  44. aurelian/agents/diagnosis/diagnosis_evals.py +76 -0
  45. aurelian/agents/diagnosis/diagnosis_gradio.py +52 -0
  46. aurelian/agents/diagnosis/diagnosis_mcp.py +141 -0
  47. aurelian/agents/diagnosis/diagnosis_tools.py +204 -0
  48. aurelian/agents/diagnosis_agent.py +28 -0
  49. aurelian/agents/draw/__init__.py +3 -0
  50. aurelian/agents/draw/draw_agent.py +39 -0
  51. aurelian/agents/draw/draw_config.py +26 -0
  52. aurelian/agents/draw/draw_gradio.py +50 -0
  53. aurelian/agents/draw/draw_mcp.py +94 -0
  54. aurelian/agents/draw/draw_tools.py +100 -0
  55. aurelian/agents/draw/judge_agent.py +18 -0
  56. aurelian/agents/filesystem/__init__.py +0 -0
  57. aurelian/agents/filesystem/filesystem_config.py +27 -0
  58. aurelian/agents/filesystem/filesystem_gradio.py +49 -0
  59. aurelian/agents/filesystem/filesystem_mcp.py +89 -0
  60. aurelian/agents/filesystem/filesystem_tools.py +95 -0
  61. aurelian/agents/filesystem/py.typed +0 -0
  62. aurelian/agents/github/__init__.py +0 -0
  63. aurelian/agents/github/github_agent.py +83 -0
  64. aurelian/agents/github/github_cli.py +248 -0
  65. aurelian/agents/github/github_config.py +22 -0
  66. aurelian/agents/github/github_gradio.py +152 -0
  67. aurelian/agents/github/github_mcp.py +252 -0
  68. aurelian/agents/github/github_tools.py +408 -0
  69. aurelian/agents/github/github_tools.py.tmp +413 -0
  70. aurelian/agents/goann/__init__.py +13 -0
  71. aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines.md +1000 -0
  72. aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines.pdf +0 -0
  73. aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines_Paper.md +693 -0
  74. aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines_Paper.pdf +0 -0
  75. aurelian/agents/goann/goann_agent.py +90 -0
  76. aurelian/agents/goann/goann_config.py +90 -0
  77. aurelian/agents/goann/goann_evals.py +104 -0
  78. aurelian/agents/goann/goann_gradio.py +62 -0
  79. aurelian/agents/goann/goann_mcp.py +0 -0
  80. aurelian/agents/goann/goann_tools.py +65 -0
  81. aurelian/agents/gocam/__init__.py +43 -0
  82. aurelian/agents/gocam/documents/DNA-binding transcription factor activity annotation guidelines.docx +0 -0
  83. aurelian/agents/gocam/documents/DNA-binding transcription factor activity annotation guidelines.pdf +0 -0
  84. aurelian/agents/gocam/documents/DNA-binding_transcription_factor_activity_annotation_guidelines.md +100 -0
  85. aurelian/agents/gocam/documents/E3 ubiquitin ligases.docx +0 -0
  86. aurelian/agents/gocam/documents/E3 ubiquitin ligases.pdf +0 -0
  87. aurelian/agents/gocam/documents/E3_ubiquitin_ligases.md +134 -0
  88. aurelian/agents/gocam/documents/GO-CAM annotation guidelines README.docx +0 -0
  89. aurelian/agents/gocam/documents/GO-CAM annotation guidelines README.pdf +0 -0
  90. aurelian/agents/gocam/documents/GO-CAM modelling guidelines TO DO.docx +0 -0
  91. aurelian/agents/gocam/documents/GO-CAM modelling guidelines TO DO.pdf +0 -0
  92. aurelian/agents/gocam/documents/GO-CAM_annotation_guidelines_README.md +1 -0
  93. aurelian/agents/gocam/documents/GO-CAM_modelling_guidelines_TO_DO.md +3 -0
  94. aurelian/agents/gocam/documents/How to annotate complexes in GO-CAM.docx +0 -0
  95. aurelian/agents/gocam/documents/How to annotate complexes in GO-CAM.pdf +0 -0
  96. aurelian/agents/gocam/documents/How to annotate molecular adaptors.docx +0 -0
  97. aurelian/agents/gocam/documents/How to annotate molecular adaptors.pdf +0 -0
  98. aurelian/agents/gocam/documents/How to annotate sequestering proteins.docx +0 -0
  99. aurelian/agents/gocam/documents/How to annotate sequestering proteins.pdf +0 -0
  100. aurelian/agents/gocam/documents/How_to_annotate_complexes_in_GO-CAM.md +29 -0
  101. aurelian/agents/gocam/documents/How_to_annotate_molecular_adaptors.md +31 -0
  102. aurelian/agents/gocam/documents/How_to_annotate_sequestering_proteins.md +42 -0
  103. aurelian/agents/gocam/documents/Molecular adaptor activity.docx +0 -0
  104. aurelian/agents/gocam/documents/Molecular adaptor activity.pdf +0 -0
  105. aurelian/agents/gocam/documents/Molecular carrier activity.docx +0 -0
  106. aurelian/agents/gocam/documents/Molecular carrier activity.pdf +0 -0
  107. aurelian/agents/gocam/documents/Molecular_adaptor_activity.md +51 -0
  108. aurelian/agents/gocam/documents/Molecular_carrier_activity.md +41 -0
  109. aurelian/agents/gocam/documents/Protein sequestering activity.docx +0 -0
  110. aurelian/agents/gocam/documents/Protein sequestering activity.pdf +0 -0
  111. aurelian/agents/gocam/documents/Protein_sequestering_activity.md +50 -0
  112. aurelian/agents/gocam/documents/Signaling receptor activity annotation guidelines.docx +0 -0
  113. aurelian/agents/gocam/documents/Signaling receptor activity annotation guidelines.pdf +0 -0
  114. aurelian/agents/gocam/documents/Signaling_receptor_activity_annotation_guidelines.md +187 -0
  115. aurelian/agents/gocam/documents/Transcription coregulator activity.docx +0 -0
  116. aurelian/agents/gocam/documents/Transcription coregulator activity.pdf +0 -0
  117. aurelian/agents/gocam/documents/Transcription_coregulator_activity.md +36 -0
  118. aurelian/agents/gocam/documents/Transporter activity annotation annotation guidelines.docx +0 -0
  119. aurelian/agents/gocam/documents/Transporter activity annotation annotation guidelines.pdf +0 -0
  120. aurelian/agents/gocam/documents/Transporter_activity_annotation_annotation_guidelines.md +43 -0
  121. Regulatory Processes in GO-CAM.docx +0 -0
  122. Regulatory Processes in GO-CAM.pdf +0 -0
  123. aurelian/agents/gocam/documents/WIP_-_Regulation_and_Regulatory_Processes_in_GO-CAM.md +31 -0
  124. aurelian/agents/gocam/documents/md/DNA-binding_transcription_factor_activity_annotation_guidelines.md +131 -0
  125. aurelian/agents/gocam/documents/md/E3_ubiquitin_ligases.md +166 -0
  126. aurelian/agents/gocam/documents/md/GO-CAM_annotation_guidelines_README.md +1 -0
  127. aurelian/agents/gocam/documents/md/GO-CAM_modelling_guidelines_TO_DO.md +5 -0
  128. aurelian/agents/gocam/documents/md/How_to_annotate_complexes_in_GO-CAM.md +28 -0
  129. aurelian/agents/gocam/documents/md/How_to_annotate_molecular_adaptors.md +19 -0
  130. aurelian/agents/gocam/documents/md/How_to_annotate_sequestering_proteins.md +38 -0
  131. aurelian/agents/gocam/documents/md/Molecular_adaptor_activity.md +52 -0
  132. aurelian/agents/gocam/documents/md/Molecular_carrier_activity.md +59 -0
  133. aurelian/agents/gocam/documents/md/Protein_sequestering_activity.md +52 -0
  134. aurelian/agents/gocam/documents/md/Signaling_receptor_activity_annotation_guidelines.md +271 -0
  135. aurelian/agents/gocam/documents/md/Transcription_coregulator_activity.md +54 -0
  136. aurelian/agents/gocam/documents/md/Transporter_activity_annotation_annotation_guidelines.md +38 -0
  137. aurelian/agents/gocam/documents/md/WIP_-_Regulation_and_Regulatory_Processes_in_GO-CAM.md +39 -0
  138. aurelian/agents/gocam/documents/pandoc_md/Signaling_receptor_activity_annotation_guidelines.md +334 -0
  139. aurelian/agents/gocam/gocam_agent.py +240 -0
  140. aurelian/agents/gocam/gocam_config.py +85 -0
  141. aurelian/agents/gocam/gocam_curator_agent.py +46 -0
  142. aurelian/agents/gocam/gocam_evals.py +67 -0
  143. aurelian/agents/gocam/gocam_gradio.py +89 -0
  144. aurelian/agents/gocam/gocam_mcp.py +224 -0
  145. aurelian/agents/gocam/gocam_tools.py +294 -0
  146. aurelian/agents/linkml/__init__.py +0 -0
  147. aurelian/agents/linkml/linkml_agent.py +62 -0
  148. aurelian/agents/linkml/linkml_config.py +48 -0
  149. aurelian/agents/linkml/linkml_evals.py +66 -0
  150. aurelian/agents/linkml/linkml_gradio.py +45 -0
  151. aurelian/agents/linkml/linkml_mcp.py +186 -0
  152. aurelian/agents/linkml/linkml_tools.py +102 -0
  153. aurelian/agents/literature/__init__.py +3 -0
  154. aurelian/agents/literature/literature_agent.py +55 -0
  155. aurelian/agents/literature/literature_config.py +35 -0
  156. aurelian/agents/literature/literature_gradio.py +52 -0
  157. aurelian/agents/literature/literature_mcp.py +174 -0
  158. aurelian/agents/literature/literature_tools.py +182 -0
  159. aurelian/agents/monarch/__init__.py +25 -0
  160. aurelian/agents/monarch/monarch_agent.py +44 -0
  161. aurelian/agents/monarch/monarch_config.py +45 -0
  162. aurelian/agents/monarch/monarch_gradio.py +51 -0
  163. aurelian/agents/monarch/monarch_mcp.py +65 -0
  164. aurelian/agents/monarch/monarch_tools.py +113 -0
  165. aurelian/agents/oak/__init__.py +0 -0
  166. aurelian/agents/oak/oak_config.py +27 -0
  167. aurelian/agents/oak/oak_gradio.py +57 -0
  168. aurelian/agents/ontology_mapper/__init__.py +31 -0
  169. aurelian/agents/ontology_mapper/ontology_mapper_agent.py +56 -0
  170. aurelian/agents/ontology_mapper/ontology_mapper_config.py +50 -0
  171. aurelian/agents/ontology_mapper/ontology_mapper_evals.py +108 -0
  172. aurelian/agents/ontology_mapper/ontology_mapper_gradio.py +58 -0
  173. aurelian/agents/ontology_mapper/ontology_mapper_mcp.py +81 -0
  174. aurelian/agents/ontology_mapper/ontology_mapper_tools.py +147 -0
  175. aurelian/agents/phenopackets/__init__.py +3 -0
  176. aurelian/agents/phenopackets/phenopackets_agent.py +58 -0
  177. aurelian/agents/phenopackets/phenopackets_config.py +72 -0
  178. aurelian/agents/phenopackets/phenopackets_evals.py +99 -0
  179. aurelian/agents/phenopackets/phenopackets_gradio.py +55 -0
  180. aurelian/agents/phenopackets/phenopackets_mcp.py +178 -0
  181. aurelian/agents/phenopackets/phenopackets_tools.py +127 -0
  182. aurelian/agents/rag/__init__.py +40 -0
  183. aurelian/agents/rag/rag_agent.py +83 -0
  184. aurelian/agents/rag/rag_config.py +80 -0
  185. aurelian/agents/rag/rag_gradio.py +67 -0
  186. aurelian/agents/rag/rag_mcp.py +107 -0
  187. aurelian/agents/rag/rag_tools.py +189 -0
  188. aurelian/agents/rag_agent.py +54 -0
  189. aurelian/agents/robot/__init__.py +0 -0
  190. aurelian/agents/robot/assets/__init__.py +3 -0
  191. aurelian/agents/robot/assets/template.md +384 -0
  192. aurelian/agents/robot/robot_config.py +25 -0
  193. aurelian/agents/robot/robot_gradio.py +46 -0
  194. aurelian/agents/robot/robot_mcp.py +100 -0
  195. aurelian/agents/robot/robot_ontology_agent.py +139 -0
  196. aurelian/agents/robot/robot_tools.py +50 -0
  197. aurelian/agents/talisman/__init__.py +3 -0
  198. aurelian/agents/talisman/talisman_agent.py +126 -0
  199. aurelian/agents/talisman/talisman_config.py +66 -0
  200. aurelian/agents/talisman/talisman_gradio.py +50 -0
  201. aurelian/agents/talisman/talisman_mcp.py +168 -0
  202. aurelian/agents/talisman/talisman_tools.py +720 -0
  203. aurelian/agents/ubergraph/__init__.py +40 -0
  204. aurelian/agents/ubergraph/ubergraph_agent.py +71 -0
  205. aurelian/agents/ubergraph/ubergraph_config.py +79 -0
  206. aurelian/agents/ubergraph/ubergraph_gradio.py +48 -0
  207. aurelian/agents/ubergraph/ubergraph_mcp.py +69 -0
  208. aurelian/agents/ubergraph/ubergraph_tools.py +118 -0
  209. aurelian/agents/uniprot/__init__.py +37 -0
  210. aurelian/agents/uniprot/uniprot_agent.py +43 -0
  211. aurelian/agents/uniprot/uniprot_config.py +43 -0
  212. aurelian/agents/uniprot/uniprot_evals.py +99 -0
  213. aurelian/agents/uniprot/uniprot_gradio.py +48 -0
  214. aurelian/agents/uniprot/uniprot_mcp.py +168 -0
  215. aurelian/agents/uniprot/uniprot_tools.py +136 -0
  216. aurelian/agents/web/__init__.py +0 -0
  217. aurelian/agents/web/web_config.py +27 -0
  218. aurelian/agents/web/web_gradio.py +48 -0
  219. aurelian/agents/web/web_mcp.py +50 -0
  220. aurelian/agents/web/web_tools.py +108 -0
  221. aurelian/chat.py +23 -0
  222. aurelian/cli.py +800 -0
  223. aurelian/dependencies/__init__.py +0 -0
  224. aurelian/dependencies/workdir.py +78 -0
  225. aurelian/mcp/__init__.py +0 -0
  226. aurelian/mcp/amigo_mcp_test.py +86 -0
  227. aurelian/mcp/config_generator.py +123 -0
  228. aurelian/mcp/example_config.json +43 -0
  229. aurelian/mcp/generate_sample_config.py +37 -0
  230. aurelian/mcp/gocam_mcp_test.py +126 -0
  231. aurelian/mcp/linkml_mcp_tools.py +190 -0
  232. aurelian/mcp/mcp_discovery.py +87 -0
  233. aurelian/mcp/mcp_test.py +31 -0
  234. aurelian/mcp/phenopackets_mcp_test.py +103 -0
  235. aurelian/tools/__init__.py +0 -0
  236. aurelian/tools/web/__init__.py +0 -0
  237. aurelian/tools/web/url_download.py +51 -0
  238. aurelian/utils/__init__.py +0 -0
  239. aurelian/utils/async_utils.py +15 -0
  240. aurelian/utils/data_utils.py +32 -0
  241. aurelian/utils/documentation_manager.py +59 -0
  242. aurelian/utils/doi_fetcher.py +238 -0
  243. aurelian/utils/ontology_utils.py +68 -0
  244. aurelian/utils/pdf_fetcher.py +23 -0
  245. aurelian/utils/process_logs.py +100 -0
  246. aurelian/utils/pubmed_utils.py +238 -0
  247. aurelian/utils/pytest_report_to_markdown.py +67 -0
  248. aurelian/utils/robot_ontology_utils.py +112 -0
  249. aurelian/utils/search_utils.py +95 -0
  250. aurelian-0.3.2.dist-info/LICENSE +22 -0
  251. aurelian-0.3.2.dist-info/METADATA +105 -0
  252. aurelian-0.3.2.dist-info/RECORD +254 -0
  253. aurelian-0.3.2.dist-info/WHEEL +4 -0
  254. aurelian-0.3.2.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,71 @@
1
+ """
2
+ Configuration classes for the chemistry agent.
3
+ """
4
+ from dataclasses import dataclass, field
5
+ from typing import Optional
6
+
7
+ from pydantic import BaseModel
8
+ from aurelian.dependencies.workdir import HasWorkdir
9
+
10
+
11
+ class ChemicalStructure(BaseModel):
12
+ """
13
+ Model for representing chemical structures.
14
+ """
15
+ chebi_id: Optional[str] = None
16
+ smiles: Optional[str] = None
17
+ inchi: Optional[str] = None
18
+ name: Optional[str] = None
19
+
20
+ @property
21
+ def chebi_local_id(self) -> Optional[str]:
22
+ if self.chebi_id:
23
+ return self.chebi_id.split(":")[1]
24
+ return None
25
+
26
+ @property
27
+ def chebi_image_url(self) -> str:
28
+ local_id = self.chebi_local_id
29
+ if local_id:
30
+ return f"https://www.ebi.ac.uk/chebi/displayImage.do?defaultImage=true&imageIndex=0&chebiId={local_id}"
31
+ return ""
32
+
33
+ @classmethod
34
+ def from_id(cls, id: str) -> 'ChemicalStructure':
35
+ if ":" in id:
36
+ prefix, local_id = id.split(":")
37
+ if prefix.lower() != "chebi":
38
+ raise ValueError(f"Invalid prefix: {prefix}")
39
+ id = "CHEBI:" + local_id
40
+ else:
41
+ id = "CHEBI:" + id
42
+ return cls(chebi_id=id)
43
+
44
+ @classmethod
45
+ def from_anything(cls, id: str) -> 'ChemicalStructure':
46
+ if ":" in id:
47
+ return cls.from_id(id)
48
+ # check if valid smiles
49
+ from rdkit import Chem
50
+ mol = Chem.MolFromSmiles(id)
51
+ if mol:
52
+ return cls(smiles=id)
53
+ raise ValueError(f"Invalid identifier: {id}")
54
+
55
+
56
+ @dataclass
57
+ class ChemistryDependencies(HasWorkdir):
58
+ """
59
+ Configuration for the chemistry agent.
60
+ """
61
+ max_search_results: int = 30
62
+
63
+
64
+ def get_config() -> ChemistryDependencies:
65
+ """
66
+ Get the Chemistry agent configuration.
67
+
68
+ Returns:
69
+ ChemistryDependencies: The chemistry dependencies
70
+ """
71
+ return ChemistryDependencies()
@@ -0,0 +1,79 @@
1
+ """
2
+ Evaluation module for the Chemistry agent.
3
+
4
+ This module implements evaluations for the Chemistry agent using the pydantic-ai-evals framework.
5
+ """
6
+ import asyncio
7
+ import sys
8
+ from typing import Optional, Any, Dict, Callable, Awaitable
9
+
10
+ from aurelian.evaluators.model import MetadataDict, metadata
11
+ from aurelian.evaluators.substring_evaluator import SubstringEvaluator
12
+ from pydantic_evals import Case, Dataset
13
+ from pydantic_evals.evaluators import LLMJudge
14
+
15
+ from aurelian.agents.chemistry.chemistry_agent import chemistry_agent
16
+ from aurelian.agents.chemistry.chemistry_config import ChemistryDependencies
17
+
18
+ class ChemistryMetadata(Dict[str, Any]):
19
+ """Simple metadata dictionary for Chemistry evaluations."""
20
+ pass
21
+
22
+ # Define individual evaluation cases
23
+ case1 = Case(
24
+ name="caffeine_structure",
25
+ inputs="Explain the structure of caffeine (CHEBI:27732)",
26
+ expected_output="methylxanthine", # Should mention methylxanthine structure
27
+ metadata=metadata("medium", "structure_explanation")
28
+ )
29
+
30
+ case2 = Case(
31
+ name="aspirin_properties",
32
+ inputs="What does the structure of aspirin (CHEBI:15365) tell us about its properties?",
33
+ expected_output="acetyl", # Should mention acetyl group
34
+ metadata=metadata("medium", "structure_property_relationship"),
35
+ evaluators=[
36
+ LLMJudge(
37
+ rubric="Answer should explain how the acetyl group affects aspirin's properties and mention its action as a COX inhibitor",
38
+ include_input=True
39
+ )
40
+ ]
41
+ )
42
+
43
+ case3 = Case(
44
+ name="smiles_interpretation",
45
+ inputs="Interpret this SMILES: CC(=O)OC1=CC=CC=C1C(=O)O",
46
+ expected_output="aspirin", # This is aspirin
47
+ metadata=metadata("hard", "smiles_interpretation")
48
+ )
49
+
50
+ case4 = Case(
51
+ name="functional_groups",
52
+ inputs="Identify all functional groups in paracetamol (CHEBI:46195)",
53
+ expected_output="amide", # Should identify the amide group
54
+ metadata=metadata("medium", "functional_group_identification")
55
+ )
56
+
57
+ def create_eval_dataset() -> Dataset[str, str, MetadataDict]:
58
+ """
59
+ Create a dataset for evaluating the Chemistry agent.
60
+
61
+ Returns:
62
+ Dataset of Chemistry evaluation cases with appropriate evaluators
63
+ """
64
+ # Collect all cases
65
+ cases = [case1, case2, case3, case4]
66
+
67
+ # Dataset-level evaluators
68
+ evaluators = [
69
+ SubstringEvaluator(),
70
+ LLMJudge(
71
+ rubric="Answer should be scientifically accurate and use proper chemistry terminology",
72
+ model="anthropic:claude-3-7-sonnet-latest"
73
+ )
74
+ ]
75
+
76
+ return Dataset(
77
+ cases=cases,
78
+ evaluators=evaluators
79
+ )
@@ -0,0 +1,50 @@
1
+ """
2
+ Gradio UI for the chemistry agent.
3
+ """
4
+ from typing import List, Optional
5
+
6
+ import gradio as gr
7
+
8
+ from aurelian.agents.chemistry.chemistry_agent import chemistry_agent
9
+ from aurelian.agents.chemistry.chemistry_config import ChemistryDependencies
10
+ from aurelian.utils.async_utils import run_sync
11
+
12
+
13
+ def chat(deps: Optional[ChemistryDependencies] = None, workdir: str = None, **kwargs):
14
+ """
15
+ Initialize a chat interface for the chemistry agent.
16
+
17
+ Args:
18
+ deps: Optional dependencies configuration
19
+ workdir: Optional working directory path
20
+ **kwargs: Additional arguments to pass to the agent
21
+
22
+ Returns:
23
+ A Gradio chat interface
24
+ """
25
+ if deps is None:
26
+ deps = ChemistryDependencies()
27
+
28
+ if workdir:
29
+ deps.workdir.location = workdir
30
+
31
+ def get_info(query: str, history: List[str]) -> str:
32
+ print(f"QUERY: {query}")
33
+ print(f"HISTORY: {history}")
34
+ if history:
35
+ query += "## History"
36
+ for h in history:
37
+ query += f"\n{h}"
38
+ result = run_sync(lambda: chemistry_agent.run_sync(query, deps=deps, **kwargs))
39
+ return result.data
40
+
41
+ return gr.ChatInterface(
42
+ fn=get_info,
43
+ type="messages",
44
+ title="Chemistry AI Assistant",
45
+ examples=[
46
+ ["Explain the structure of caffeine (CHEBI:27732)"],
47
+ ["What does the structure of aspirin (CHEBI:15365) tell us about its properties?"],
48
+ ["Interpret this SMILES: CC(=O)OC1=CC=CC=C1C(=O)O"]
49
+ ]
50
+ )
@@ -0,0 +1,120 @@
1
+ """
2
+ MCP tools for working with chemical structures.
3
+ """
4
+ import os
5
+ from typing import Dict, List
6
+
7
+ from mcp.server.fastmcp import FastMCP
8
+
9
+ import aurelian.agents.chemistry.chemistry_tools as ct
10
+ import aurelian.agents.filesystem.filesystem_tools as fst
11
+ from aurelian.agents.chemistry.chemistry_agent import SYSTEM
12
+ from aurelian.agents.chemistry.chemistry_config import ChemistryDependencies
13
+ from pydantic_ai import RunContext
14
+
15
+ # Initialize FastMCP server
16
+ mcp = FastMCP("chemistry", instructions=SYSTEM)
17
+
18
+
19
+ from aurelian.dependencies.workdir import WorkDir
20
+
21
+ def deps() -> ChemistryDependencies:
22
+ deps = ChemistryDependencies()
23
+ # Set the location from environment variable or default
24
+ loc = os.getenv("AURELIAN_WORKDIR", "/tmp/aurelian")
25
+ deps.workdir = WorkDir(loc)
26
+ return deps
27
+
28
+ def ctx() -> RunContext[ChemistryDependencies]:
29
+ rc: RunContext[ChemistryDependencies] = RunContext[ChemistryDependencies](
30
+ deps=deps(),
31
+ model=None, usage=None, prompt=None,
32
+ )
33
+ return rc
34
+
35
+
36
+ @mcp.tool()
37
+ async def draw_structure_and_interpret(identifier: str, question: str) -> str:
38
+ """
39
+ Draw a chemical structure and analyze it.
40
+
41
+ Args:
42
+ identifier: A ChEBI ID (e.g., CHEBI:16236) or SMILES string
43
+ question: A specific question about the structure
44
+
45
+ Returns:
46
+ Analysis of the structure in response to the question
47
+ """
48
+ return await ct.draw_structure_and_interpret(ctx(), identifier, question)
49
+
50
+
51
+ @mcp.tool()
52
+ async def chebi_search_terms(query: str) -> List[Dict]:
53
+ """
54
+ Search ChEBI for a term.
55
+
56
+ Args:
57
+ query: The search text
58
+
59
+ Returns:
60
+ A list of matching ChEBI terms
61
+ """
62
+ return await ct.chebi_search_terms(ctx(), query)
63
+
64
+
65
+ @mcp.tool()
66
+ async def search_web_for_chemistry(query: str) -> str:
67
+ """
68
+ Search the web for chemistry information.
69
+
70
+ Args:
71
+ query: The search query
72
+
73
+ Returns:
74
+ Search results with summaries
75
+ """
76
+ return await ct.search_web_for_chemistry(ctx(), query)
77
+
78
+
79
+ @mcp.tool()
80
+ async def retrieve_chemistry_web_page(url: str) -> str:
81
+ """
82
+ Fetch the contents of a web page related to chemistry.
83
+
84
+ Args:
85
+ url: The URL to fetch
86
+
87
+ Returns:
88
+ The contents of the web page
89
+ """
90
+ return await ct.retrieve_chemistry_web_page(ctx(), url)
91
+
92
+
93
+ @mcp.tool()
94
+ async def inspect_file(data_file: str) -> str:
95
+ """
96
+ Inspect a file in the working directory.
97
+
98
+ Args:
99
+ data_file: name of file
100
+
101
+ Returns:
102
+ Contents of the file
103
+ """
104
+ return await fst.inspect_file(ctx(), data_file)
105
+
106
+
107
+ @mcp.tool()
108
+ async def list_files() -> str:
109
+ """
110
+ List files in the working directory.
111
+
112
+ Returns:
113
+ List of files in the working directory
114
+ """
115
+ return await fst.list_files(ctx())
116
+
117
+
118
+ if __name__ == "__main__":
119
+ # Initialize and run the server
120
+ mcp.run(transport='stdio')
@@ -0,0 +1,121 @@
1
+ """
2
+ Tools for the chemistry agent.
3
+ """
4
+ import io
5
+ import httpx
6
+ from functools import lru_cache
7
+ from typing import List, Dict, Optional
8
+
9
+ from oaklib import get_adapter
10
+ from pydantic_ai import RunContext, BinaryContent, ModelRetry
11
+
12
+ from aurelian.agents.chemistry.chemistry_config import ChemistryDependencies, ChemicalStructure
13
+ from aurelian.utils.ontology_utils import search_ontology
14
+ from aurelian.utils.search_utils import web_search, retrieve_web_page
15
+
16
+
17
+ @lru_cache
18
+ def get_chebi_adapter():
19
+ """Get the ChEBI adapter from oaklib."""
20
+ return get_adapter(f"sqlite:obo:chebi")
21
+
22
+
23
+ def smiles_to_image(smiles: str) -> bytes:
24
+ """
25
+ Convert a SMILES string to an image.
26
+
27
+ Args:
28
+ smiles: The SMILES representation of a molecule
29
+
30
+ Returns:
31
+ bytes: PNG image of the molecular structure
32
+
33
+ Raises:
34
+ ValueError: If the SMILES string is invalid
35
+ """
36
+ from rdkit import Chem
37
+ from rdkit.Chem import Draw
38
+ mol = Chem.MolFromSmiles(smiles)
39
+ if not mol:
40
+ raise ValueError(f"Invalid SMILES: {smiles}")
41
+ img = Draw.MolToImage(mol)
42
+ img_bytes = io.BytesIO()
43
+ img.save(img_bytes, format='PNG')
44
+ return img_bytes.getvalue()
45
+
46
+
47
+ async def draw_structure_and_interpret(ctx: RunContext[ChemistryDependencies], identifier: str, question: str) -> str:
48
+ """
49
+ Draw a chemical structure and analyze it.
50
+
51
+ Args:
52
+ ctx: The run context
53
+ identifier: CHEBI ID (e.g. CHEBI:12345) or a SMILES string
54
+ question: Question about the structure to be answered
55
+
56
+ Returns:
57
+ str: Analysis of the chemical structure
58
+ """
59
+ print(f"Draw Structure: {identifier}, then: {question}")
60
+ structure = ChemicalStructure.from_anything(identifier)
61
+ image_url = structure.chebi_image_url
62
+ img = None
63
+
64
+ if image_url:
65
+ image_response = httpx.get(image_url)
66
+ img = BinaryContent(data=image_response.content, media_type='image/png')
67
+ else:
68
+ if structure.smiles:
69
+ img = BinaryContent(data=smiles_to_image(structure.smiles), media_type='image/png')
70
+
71
+ if not img:
72
+ raise ModelRetry("Could not find image for structure")
73
+
74
+ from aurelian.agents.chemistry.image_agent import structure_image_agent
75
+ result = await structure_image_agent.run(
76
+ [question, img],
77
+ deps=ctx.deps)
78
+ return result.data
79
+
80
+
81
+ async def chebi_search_terms(ctx: RunContext[ChemistryDependencies], query: str) -> List[Dict]:
82
+ """
83
+ Finds similar ontology terms to the search query in ChEBI.
84
+
85
+ Args:
86
+ ctx: The run context
87
+ query: The search query
88
+
89
+ Returns:
90
+ List[Dict]: List of matching ChEBI terms
91
+ """
92
+ print(f"ChEBI Term Search: {query}")
93
+ return search_ontology(get_chebi_adapter(), query, limit=ctx.deps.max_search_results)
94
+
95
+
96
+ async def search_web_for_chemistry(query: str) -> str:
97
+ """
98
+ Search the web using a text query.
99
+
100
+ Args:
101
+ query: The search query
102
+
103
+ Returns:
104
+ str: Matching web pages plus summaries
105
+ """
106
+ print(f"Web Search: {query}")
107
+ return web_search(query)
108
+
109
+
110
+ async def retrieve_chemistry_web_page(url: str) -> str:
111
+ """
112
+ Fetch the contents of a web page.
113
+
114
+ Args:
115
+ url: The URL to fetch
116
+
117
+ Returns:
118
+ str: The contents of the web page
119
+ """
120
+ print(f"Fetch URL: {url}")
121
+ return retrieve_web_page(url)
@@ -0,0 +1,15 @@
1
+ """
2
+ Agent specifically for interpreting chemical structure images.
3
+ """
4
+ from pydantic_ai import Agent
5
+
6
+ # Separate agent for image interpretation to avoid circular imports
7
+ structure_image_agent = Agent(
8
+ model='openai:gpt-4o',
9
+ system_prompt="""You are an expert chemist, able to interpret
10
+ chemical structure diagrams and answer questions on them.
11
+ Use the information in the provided chemical structure image to
12
+ answer questions about molecular properties, functional groups,
13
+ potential reactivity, or other chemical characteristics.
14
+ """
15
+ )
@@ -0,0 +1,30 @@
1
+ """
2
+ D4D (Datasheets for Datasets) agent package for extracting dataset metadata.
3
+ """
4
+
5
+ # isort: skip_file
6
+ from .d4d_agent import data_sheets_agent # noqa: E402
7
+ from .d4d_config import D4DConfig, get_config # noqa: E402
8
+ from .d4d_gradio import chat # noqa: E402
9
+ from .d4d_tools import ( # noqa: E402
10
+ get_full_schema,
11
+ process_website_or_pdf,
12
+ extract_text_from_pdf,
13
+ )
14
+
15
+ __all__ = [
16
+ # Agent
17
+ "data_sheets_agent",
18
+
19
+ # Config
20
+ "D4DConfig",
21
+ "get_config",
22
+
23
+ # Tools
24
+ "get_full_schema",
25
+ "process_website_or_pdf",
26
+ "extract_text_from_pdf",
27
+
28
+ # Gradio
29
+ "chat",
30
+ ]
@@ -0,0 +1,72 @@
1
+ """
2
+ Agent for extracting dataset metadata following the datasheets for datasets schema.
3
+ """
4
+ from pydantic_ai import Agent, RunContext
5
+
6
+ from .d4d_config import D4DConfig
7
+ from .d4d_tools import get_full_schema, process_website_or_pdf
8
+
9
+
10
+ # Create the agent, the full schema will be loaded when needed
11
+ data_sheets_agent = Agent(
12
+ model="openai:gpt-4o",
13
+ deps_type=D4DConfig,
14
+ system_prompt="""
15
+ Below is the complete datasheets for datasets schema:
16
+
17
+ {schema}
18
+
19
+ When provided with a URL to a webpage or PDF describing a dataset, your task is to fetch the
20
+ content, extract all the relevant metadata, and output a YAML document that exactly
21
+ conforms to the above schema. The output must be valid YAML with all required fields
22
+ filled in, following the schema exactly.
23
+ """,
24
+ )
25
+
26
+
27
+ @data_sheets_agent.system_prompt
28
+ async def add_schema(ctx: RunContext[D4DConfig]) -> str:
29
+ """
30
+ Add the full schema to the system prompt.
31
+
32
+ Args:
33
+ ctx: The run context
34
+
35
+ Returns:
36
+ The schema to be inserted into the system prompt
37
+ """
38
+ schema = await get_full_schema(ctx)
39
+ return schema
40
+
41
+
42
+ @data_sheets_agent.tool
43
+ async def extract_metadata(ctx: RunContext[D4DConfig], url: str) -> str:
44
+ """
45
+ Extract metadata from a dataset description document or webpage.
46
+
47
+ Args:
48
+ ctx: The run context
49
+ url: The URL of the dataset description (webpage or PDF)
50
+
51
+ Returns:
52
+ YAML formatted metadata following the datasheets for datasets schema
53
+ """
54
+ # Retrieve the content
55
+ content = await process_website_or_pdf(ctx, url)
56
+
57
+ # Prepare a prompt to extract metadata
58
+ prompt = f"""
59
+ The following is the content of a document describing a dataset:
60
+
61
+ {content}
62
+
63
+ Using the complete datasheets for datasets schema provided above, extract all the metadata
64
+ from the document and generate a YAML document that exactly conforms to that schema.
65
+ Ensure that all required fields are present and the output is valid YAML.
66
+ The dataset URL is: {url}
67
+
68
+ Generate only the YAML document.
69
+ """
70
+
71
+ # The prompt will be used as the user message
72
+ return prompt
@@ -0,0 +1,46 @@
1
+ """
2
+ Configuration for the D4D (Datasheets for Datasets) agent.
3
+ """
4
+ from dataclasses import dataclass
5
+ import os
6
+
7
+ from aurelian.dependencies.workdir import HasWorkdir, WorkDir
8
+
9
+
10
+ @dataclass
11
+ class D4DConfig(HasWorkdir):
12
+ """Configuration for the D4D agent."""
13
+
14
+ schema_url: str = "https://raw.githubusercontent.com/monarch-initiative/ontogpt/main/src/ontogpt/templates/data_sheets_schema.yaml"
15
+
16
+ def __post_init__(self):
17
+ """Initialize the config with default values."""
18
+ # HasWorkdir doesn't have a __post_init__ method, so we don't call super()
19
+ if self.workdir is None:
20
+ self.workdir = WorkDir()
21
+
22
+
23
+ def get_config(schema_url: str = None) -> D4DConfig:
24
+ """
25
+ Get the D4D configuration from environment variables or defaults.
26
+
27
+ Args:
28
+ schema_url: The URL to the schema YAML (overrides environment variable)
29
+
30
+ Returns:
31
+ A D4DConfig instance
32
+ """
33
+ # Try to get from environment, then use provided values or defaults
34
+ env_schema_url = os.environ.get("AURELIAN_D4D_SCHEMA_URL", None)
35
+
36
+ # Use provided values first, then environment, then defaults
37
+ final_schema_url = schema_url or env_schema_url
38
+
39
+ workdir_path = os.environ.get("AURELIAN_WORKDIR", None)
40
+ workdir = WorkDir(location=workdir_path) if workdir_path else None
41
+
42
+ config = D4DConfig(workdir=workdir)
43
+ if final_schema_url:
44
+ config.schema_url = final_schema_url
45
+
46
+ return config
@@ -0,0 +1,58 @@
1
+ """
2
+ Gradio interface for the D4D (Datasheets for Datasets) agent.
3
+ """
4
+ from typing import List, Optional
5
+
6
+ import gradio as gr
7
+
8
+ from .d4d_agent import data_sheets_agent
9
+ from .d4d_config import D4DConfig, get_config
10
+
11
+
12
+ async def process_url(url: str, history: List[str], config: D4DConfig) -> str:
13
+ """
14
+ Process a URL and generate metadata in YAML format.
15
+
16
+ Args:
17
+ url: The URL to process (webpage or PDF)
18
+ history: Conversation history
19
+ config: The agent configuration
20
+
21
+ Returns:
22
+ YAML formatted metadata
23
+ """
24
+ # Run the agent with the URL
25
+ result = await data_sheets_agent.run(url, deps=config)
26
+ return result.data
27
+
28
+
29
+ def chat(deps: Optional[D4DConfig] = None, **kwargs):
30
+ """
31
+ Create a Gradio chat interface for the D4D agent.
32
+
33
+ Args:
34
+ deps: Optional dependencies configuration
35
+ kwargs: Additional keyword arguments for configuration
36
+
37
+ Returns:
38
+ A Gradio ChatInterface
39
+ """
40
+ # Initialize dependencies if needed
41
+ if deps is None:
42
+ deps = get_config(**kwargs)
43
+
44
+ def get_info(url: str, history: List[str]) -> str:
45
+ """Wrapper for the async process_url function."""
46
+ import asyncio
47
+ return asyncio.run(process_url(url, history, deps))
48
+
49
+ return gr.ChatInterface(
50
+ fn=get_info,
51
+ type="messages",
52
+ title="Datasheets for Datasets Agent",
53
+ description="Enter a URL to a webpage or PDF describing a dataset. The agent will generate metadata in YAML format according to the complete datasheets for datasets schema.",
54
+ examples=[
55
+ "https://fairhub.io/datasets/2",
56
+ "https://data.chhs.ca.gov/dataset/99bc1fea-c55c-4377-bad8-f00832fd195d/resource/5a6d5fe9-36e6-4aca-ba4c-bf6edc682cf5/download/hci_crime_752-narrative_examples-10-30-15-ada.pdf"
57
+ ]
58
+ )