aurelian 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (254) hide show
  1. aurelian/__init__.py +9 -0
  2. aurelian/agents/__init__.py +0 -0
  3. aurelian/agents/amigo/__init__.py +3 -0
  4. aurelian/agents/amigo/amigo_agent.py +77 -0
  5. aurelian/agents/amigo/amigo_config.py +85 -0
  6. aurelian/agents/amigo/amigo_evals.py +73 -0
  7. aurelian/agents/amigo/amigo_gradio.py +52 -0
  8. aurelian/agents/amigo/amigo_mcp.py +152 -0
  9. aurelian/agents/amigo/amigo_tools.py +152 -0
  10. aurelian/agents/biblio/__init__.py +42 -0
  11. aurelian/agents/biblio/biblio_agent.py +94 -0
  12. aurelian/agents/biblio/biblio_config.py +40 -0
  13. aurelian/agents/biblio/biblio_gradio.py +67 -0
  14. aurelian/agents/biblio/biblio_mcp.py +115 -0
  15. aurelian/agents/biblio/biblio_tools.py +164 -0
  16. aurelian/agents/biblio_agent.py +46 -0
  17. aurelian/agents/checklist/__init__.py +44 -0
  18. aurelian/agents/checklist/checklist_agent.py +85 -0
  19. aurelian/agents/checklist/checklist_config.py +28 -0
  20. aurelian/agents/checklist/checklist_gradio.py +70 -0
  21. aurelian/agents/checklist/checklist_mcp.py +86 -0
  22. aurelian/agents/checklist/checklist_tools.py +141 -0
  23. aurelian/agents/checklist/content/checklists.yaml +7 -0
  24. aurelian/agents/checklist/content/streams.csv +136 -0
  25. aurelian/agents/checklist_agent.py +40 -0
  26. aurelian/agents/chemistry/__init__.py +3 -0
  27. aurelian/agents/chemistry/chemistry_agent.py +46 -0
  28. aurelian/agents/chemistry/chemistry_config.py +71 -0
  29. aurelian/agents/chemistry/chemistry_evals.py +79 -0
  30. aurelian/agents/chemistry/chemistry_gradio.py +50 -0
  31. aurelian/agents/chemistry/chemistry_mcp.py +120 -0
  32. aurelian/agents/chemistry/chemistry_tools.py +121 -0
  33. aurelian/agents/chemistry/image_agent.py +15 -0
  34. aurelian/agents/d4d/__init__.py +30 -0
  35. aurelian/agents/d4d/d4d_agent.py +72 -0
  36. aurelian/agents/d4d/d4d_config.py +46 -0
  37. aurelian/agents/d4d/d4d_gradio.py +58 -0
  38. aurelian/agents/d4d/d4d_mcp.py +71 -0
  39. aurelian/agents/d4d/d4d_tools.py +157 -0
  40. aurelian/agents/d4d_agent.py +64 -0
  41. aurelian/agents/diagnosis/__init__.py +33 -0
  42. aurelian/agents/diagnosis/diagnosis_agent.py +53 -0
  43. aurelian/agents/diagnosis/diagnosis_config.py +48 -0
  44. aurelian/agents/diagnosis/diagnosis_evals.py +76 -0
  45. aurelian/agents/diagnosis/diagnosis_gradio.py +52 -0
  46. aurelian/agents/diagnosis/diagnosis_mcp.py +141 -0
  47. aurelian/agents/diagnosis/diagnosis_tools.py +204 -0
  48. aurelian/agents/diagnosis_agent.py +28 -0
  49. aurelian/agents/draw/__init__.py +3 -0
  50. aurelian/agents/draw/draw_agent.py +39 -0
  51. aurelian/agents/draw/draw_config.py +26 -0
  52. aurelian/agents/draw/draw_gradio.py +50 -0
  53. aurelian/agents/draw/draw_mcp.py +94 -0
  54. aurelian/agents/draw/draw_tools.py +100 -0
  55. aurelian/agents/draw/judge_agent.py +18 -0
  56. aurelian/agents/filesystem/__init__.py +0 -0
  57. aurelian/agents/filesystem/filesystem_config.py +27 -0
  58. aurelian/agents/filesystem/filesystem_gradio.py +49 -0
  59. aurelian/agents/filesystem/filesystem_mcp.py +89 -0
  60. aurelian/agents/filesystem/filesystem_tools.py +95 -0
  61. aurelian/agents/filesystem/py.typed +0 -0
  62. aurelian/agents/github/__init__.py +0 -0
  63. aurelian/agents/github/github_agent.py +83 -0
  64. aurelian/agents/github/github_cli.py +248 -0
  65. aurelian/agents/github/github_config.py +22 -0
  66. aurelian/agents/github/github_gradio.py +152 -0
  67. aurelian/agents/github/github_mcp.py +252 -0
  68. aurelian/agents/github/github_tools.py +408 -0
  69. aurelian/agents/github/github_tools.py.tmp +413 -0
  70. aurelian/agents/goann/__init__.py +13 -0
  71. aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines.md +1000 -0
  72. aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines.pdf +0 -0
  73. aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines_Paper.md +693 -0
  74. aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines_Paper.pdf +0 -0
  75. aurelian/agents/goann/goann_agent.py +90 -0
  76. aurelian/agents/goann/goann_config.py +90 -0
  77. aurelian/agents/goann/goann_evals.py +104 -0
  78. aurelian/agents/goann/goann_gradio.py +62 -0
  79. aurelian/agents/goann/goann_mcp.py +0 -0
  80. aurelian/agents/goann/goann_tools.py +65 -0
  81. aurelian/agents/gocam/__init__.py +43 -0
  82. aurelian/agents/gocam/documents/DNA-binding transcription factor activity annotation guidelines.docx +0 -0
  83. aurelian/agents/gocam/documents/DNA-binding transcription factor activity annotation guidelines.pdf +0 -0
  84. aurelian/agents/gocam/documents/DNA-binding_transcription_factor_activity_annotation_guidelines.md +100 -0
  85. aurelian/agents/gocam/documents/E3 ubiquitin ligases.docx +0 -0
  86. aurelian/agents/gocam/documents/E3 ubiquitin ligases.pdf +0 -0
  87. aurelian/agents/gocam/documents/E3_ubiquitin_ligases.md +134 -0
  88. aurelian/agents/gocam/documents/GO-CAM annotation guidelines README.docx +0 -0
  89. aurelian/agents/gocam/documents/GO-CAM annotation guidelines README.pdf +0 -0
  90. aurelian/agents/gocam/documents/GO-CAM modelling guidelines TO DO.docx +0 -0
  91. aurelian/agents/gocam/documents/GO-CAM modelling guidelines TO DO.pdf +0 -0
  92. aurelian/agents/gocam/documents/GO-CAM_annotation_guidelines_README.md +1 -0
  93. aurelian/agents/gocam/documents/GO-CAM_modelling_guidelines_TO_DO.md +3 -0
  94. aurelian/agents/gocam/documents/How to annotate complexes in GO-CAM.docx +0 -0
  95. aurelian/agents/gocam/documents/How to annotate complexes in GO-CAM.pdf +0 -0
  96. aurelian/agents/gocam/documents/How to annotate molecular adaptors.docx +0 -0
  97. aurelian/agents/gocam/documents/How to annotate molecular adaptors.pdf +0 -0
  98. aurelian/agents/gocam/documents/How to annotate sequestering proteins.docx +0 -0
  99. aurelian/agents/gocam/documents/How to annotate sequestering proteins.pdf +0 -0
  100. aurelian/agents/gocam/documents/How_to_annotate_complexes_in_GO-CAM.md +29 -0
  101. aurelian/agents/gocam/documents/How_to_annotate_molecular_adaptors.md +31 -0
  102. aurelian/agents/gocam/documents/How_to_annotate_sequestering_proteins.md +42 -0
  103. aurelian/agents/gocam/documents/Molecular adaptor activity.docx +0 -0
  104. aurelian/agents/gocam/documents/Molecular adaptor activity.pdf +0 -0
  105. aurelian/agents/gocam/documents/Molecular carrier activity.docx +0 -0
  106. aurelian/agents/gocam/documents/Molecular carrier activity.pdf +0 -0
  107. aurelian/agents/gocam/documents/Molecular_adaptor_activity.md +51 -0
  108. aurelian/agents/gocam/documents/Molecular_carrier_activity.md +41 -0
  109. aurelian/agents/gocam/documents/Protein sequestering activity.docx +0 -0
  110. aurelian/agents/gocam/documents/Protein sequestering activity.pdf +0 -0
  111. aurelian/agents/gocam/documents/Protein_sequestering_activity.md +50 -0
  112. aurelian/agents/gocam/documents/Signaling receptor activity annotation guidelines.docx +0 -0
  113. aurelian/agents/gocam/documents/Signaling receptor activity annotation guidelines.pdf +0 -0
  114. aurelian/agents/gocam/documents/Signaling_receptor_activity_annotation_guidelines.md +187 -0
  115. aurelian/agents/gocam/documents/Transcription coregulator activity.docx +0 -0
  116. aurelian/agents/gocam/documents/Transcription coregulator activity.pdf +0 -0
  117. aurelian/agents/gocam/documents/Transcription_coregulator_activity.md +36 -0
  118. aurelian/agents/gocam/documents/Transporter activity annotation annotation guidelines.docx +0 -0
  119. aurelian/agents/gocam/documents/Transporter activity annotation annotation guidelines.pdf +0 -0
  120. aurelian/agents/gocam/documents/Transporter_activity_annotation_annotation_guidelines.md +43 -0
  121. Regulatory Processes in GO-CAM.docx +0 -0
  122. Regulatory Processes in GO-CAM.pdf +0 -0
  123. aurelian/agents/gocam/documents/WIP_-_Regulation_and_Regulatory_Processes_in_GO-CAM.md +31 -0
  124. aurelian/agents/gocam/documents/md/DNA-binding_transcription_factor_activity_annotation_guidelines.md +131 -0
  125. aurelian/agents/gocam/documents/md/E3_ubiquitin_ligases.md +166 -0
  126. aurelian/agents/gocam/documents/md/GO-CAM_annotation_guidelines_README.md +1 -0
  127. aurelian/agents/gocam/documents/md/GO-CAM_modelling_guidelines_TO_DO.md +5 -0
  128. aurelian/agents/gocam/documents/md/How_to_annotate_complexes_in_GO-CAM.md +28 -0
  129. aurelian/agents/gocam/documents/md/How_to_annotate_molecular_adaptors.md +19 -0
  130. aurelian/agents/gocam/documents/md/How_to_annotate_sequestering_proteins.md +38 -0
  131. aurelian/agents/gocam/documents/md/Molecular_adaptor_activity.md +52 -0
  132. aurelian/agents/gocam/documents/md/Molecular_carrier_activity.md +59 -0
  133. aurelian/agents/gocam/documents/md/Protein_sequestering_activity.md +52 -0
  134. aurelian/agents/gocam/documents/md/Signaling_receptor_activity_annotation_guidelines.md +271 -0
  135. aurelian/agents/gocam/documents/md/Transcription_coregulator_activity.md +54 -0
  136. aurelian/agents/gocam/documents/md/Transporter_activity_annotation_annotation_guidelines.md +38 -0
  137. aurelian/agents/gocam/documents/md/WIP_-_Regulation_and_Regulatory_Processes_in_GO-CAM.md +39 -0
  138. aurelian/agents/gocam/documents/pandoc_md/Signaling_receptor_activity_annotation_guidelines.md +334 -0
  139. aurelian/agents/gocam/gocam_agent.py +240 -0
  140. aurelian/agents/gocam/gocam_config.py +85 -0
  141. aurelian/agents/gocam/gocam_curator_agent.py +46 -0
  142. aurelian/agents/gocam/gocam_evals.py +67 -0
  143. aurelian/agents/gocam/gocam_gradio.py +89 -0
  144. aurelian/agents/gocam/gocam_mcp.py +224 -0
  145. aurelian/agents/gocam/gocam_tools.py +294 -0
  146. aurelian/agents/linkml/__init__.py +0 -0
  147. aurelian/agents/linkml/linkml_agent.py +62 -0
  148. aurelian/agents/linkml/linkml_config.py +48 -0
  149. aurelian/agents/linkml/linkml_evals.py +66 -0
  150. aurelian/agents/linkml/linkml_gradio.py +45 -0
  151. aurelian/agents/linkml/linkml_mcp.py +186 -0
  152. aurelian/agents/linkml/linkml_tools.py +102 -0
  153. aurelian/agents/literature/__init__.py +3 -0
  154. aurelian/agents/literature/literature_agent.py +55 -0
  155. aurelian/agents/literature/literature_config.py +35 -0
  156. aurelian/agents/literature/literature_gradio.py +52 -0
  157. aurelian/agents/literature/literature_mcp.py +174 -0
  158. aurelian/agents/literature/literature_tools.py +182 -0
  159. aurelian/agents/monarch/__init__.py +25 -0
  160. aurelian/agents/monarch/monarch_agent.py +44 -0
  161. aurelian/agents/monarch/monarch_config.py +45 -0
  162. aurelian/agents/monarch/monarch_gradio.py +51 -0
  163. aurelian/agents/monarch/monarch_mcp.py +65 -0
  164. aurelian/agents/monarch/monarch_tools.py +113 -0
  165. aurelian/agents/oak/__init__.py +0 -0
  166. aurelian/agents/oak/oak_config.py +27 -0
  167. aurelian/agents/oak/oak_gradio.py +57 -0
  168. aurelian/agents/ontology_mapper/__init__.py +31 -0
  169. aurelian/agents/ontology_mapper/ontology_mapper_agent.py +56 -0
  170. aurelian/agents/ontology_mapper/ontology_mapper_config.py +50 -0
  171. aurelian/agents/ontology_mapper/ontology_mapper_evals.py +108 -0
  172. aurelian/agents/ontology_mapper/ontology_mapper_gradio.py +58 -0
  173. aurelian/agents/ontology_mapper/ontology_mapper_mcp.py +81 -0
  174. aurelian/agents/ontology_mapper/ontology_mapper_tools.py +147 -0
  175. aurelian/agents/phenopackets/__init__.py +3 -0
  176. aurelian/agents/phenopackets/phenopackets_agent.py +58 -0
  177. aurelian/agents/phenopackets/phenopackets_config.py +72 -0
  178. aurelian/agents/phenopackets/phenopackets_evals.py +99 -0
  179. aurelian/agents/phenopackets/phenopackets_gradio.py +55 -0
  180. aurelian/agents/phenopackets/phenopackets_mcp.py +178 -0
  181. aurelian/agents/phenopackets/phenopackets_tools.py +127 -0
  182. aurelian/agents/rag/__init__.py +40 -0
  183. aurelian/agents/rag/rag_agent.py +83 -0
  184. aurelian/agents/rag/rag_config.py +80 -0
  185. aurelian/agents/rag/rag_gradio.py +67 -0
  186. aurelian/agents/rag/rag_mcp.py +107 -0
  187. aurelian/agents/rag/rag_tools.py +189 -0
  188. aurelian/agents/rag_agent.py +54 -0
  189. aurelian/agents/robot/__init__.py +0 -0
  190. aurelian/agents/robot/assets/__init__.py +3 -0
  191. aurelian/agents/robot/assets/template.md +384 -0
  192. aurelian/agents/robot/robot_config.py +25 -0
  193. aurelian/agents/robot/robot_gradio.py +46 -0
  194. aurelian/agents/robot/robot_mcp.py +100 -0
  195. aurelian/agents/robot/robot_ontology_agent.py +139 -0
  196. aurelian/agents/robot/robot_tools.py +50 -0
  197. aurelian/agents/talisman/__init__.py +3 -0
  198. aurelian/agents/talisman/talisman_agent.py +126 -0
  199. aurelian/agents/talisman/talisman_config.py +66 -0
  200. aurelian/agents/talisman/talisman_gradio.py +50 -0
  201. aurelian/agents/talisman/talisman_mcp.py +168 -0
  202. aurelian/agents/talisman/talisman_tools.py +720 -0
  203. aurelian/agents/ubergraph/__init__.py +40 -0
  204. aurelian/agents/ubergraph/ubergraph_agent.py +71 -0
  205. aurelian/agents/ubergraph/ubergraph_config.py +79 -0
  206. aurelian/agents/ubergraph/ubergraph_gradio.py +48 -0
  207. aurelian/agents/ubergraph/ubergraph_mcp.py +69 -0
  208. aurelian/agents/ubergraph/ubergraph_tools.py +118 -0
  209. aurelian/agents/uniprot/__init__.py +37 -0
  210. aurelian/agents/uniprot/uniprot_agent.py +43 -0
  211. aurelian/agents/uniprot/uniprot_config.py +43 -0
  212. aurelian/agents/uniprot/uniprot_evals.py +99 -0
  213. aurelian/agents/uniprot/uniprot_gradio.py +48 -0
  214. aurelian/agents/uniprot/uniprot_mcp.py +168 -0
  215. aurelian/agents/uniprot/uniprot_tools.py +136 -0
  216. aurelian/agents/web/__init__.py +0 -0
  217. aurelian/agents/web/web_config.py +27 -0
  218. aurelian/agents/web/web_gradio.py +48 -0
  219. aurelian/agents/web/web_mcp.py +50 -0
  220. aurelian/agents/web/web_tools.py +108 -0
  221. aurelian/chat.py +23 -0
  222. aurelian/cli.py +800 -0
  223. aurelian/dependencies/__init__.py +0 -0
  224. aurelian/dependencies/workdir.py +78 -0
  225. aurelian/mcp/__init__.py +0 -0
  226. aurelian/mcp/amigo_mcp_test.py +86 -0
  227. aurelian/mcp/config_generator.py +123 -0
  228. aurelian/mcp/example_config.json +43 -0
  229. aurelian/mcp/generate_sample_config.py +37 -0
  230. aurelian/mcp/gocam_mcp_test.py +126 -0
  231. aurelian/mcp/linkml_mcp_tools.py +190 -0
  232. aurelian/mcp/mcp_discovery.py +87 -0
  233. aurelian/mcp/mcp_test.py +31 -0
  234. aurelian/mcp/phenopackets_mcp_test.py +103 -0
  235. aurelian/tools/__init__.py +0 -0
  236. aurelian/tools/web/__init__.py +0 -0
  237. aurelian/tools/web/url_download.py +51 -0
  238. aurelian/utils/__init__.py +0 -0
  239. aurelian/utils/async_utils.py +15 -0
  240. aurelian/utils/data_utils.py +32 -0
  241. aurelian/utils/documentation_manager.py +59 -0
  242. aurelian/utils/doi_fetcher.py +238 -0
  243. aurelian/utils/ontology_utils.py +68 -0
  244. aurelian/utils/pdf_fetcher.py +23 -0
  245. aurelian/utils/process_logs.py +100 -0
  246. aurelian/utils/pubmed_utils.py +238 -0
  247. aurelian/utils/pytest_report_to_markdown.py +67 -0
  248. aurelian/utils/robot_ontology_utils.py +112 -0
  249. aurelian/utils/search_utils.py +95 -0
  250. aurelian-0.3.2.dist-info/LICENSE +22 -0
  251. aurelian-0.3.2.dist-info/METADATA +105 -0
  252. aurelian-0.3.2.dist-info/RECORD +254 -0
  253. aurelian-0.3.2.dist-info/WHEEL +4 -0
  254. aurelian-0.3.2.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,68 @@
1
+ import logfire
2
+ import pystow
3
+ from cachetools.func import lru_cache
4
+ from linkml_store.api import Collection
5
+ from linkml_store.api.stores.duckdb import DuckDBDatabase
6
+ from linkml_store.index import LLMIndexer
7
+ from oaklib import BasicOntologyInterface, get_adapter
8
+
9
+ llm_indexer = LLMIndexer()
10
+
11
+
12
+ @lru_cache
13
+ def get_collection_for_adapter(handle: str, name: str) -> Collection:
14
+ """
15
+ Retrieve or create a cached ontology collection.
16
+
17
+ Args:
18
+ handle (str): The ontology handle (e.g., `sqlite:obo:uberon`).
19
+ name (str): The name of the ontology (e.g., `uberon`).
20
+
21
+ Returns:
22
+ Collection: The indexed ontology collection.
23
+ """
24
+ adapter = get_adapter(handle)
25
+ cache_dir = pystow.join("aurelian", "indexes")
26
+ duckdb_path = str(cache_dir / f"{name}.duckdb")
27
+ database = DuckDBDatabase(duckdb_path)
28
+ collection = database.get_collection(name, create_if_not_exists=True)
29
+
30
+ if collection.size() > 0:
31
+ return collection
32
+
33
+ objs = [{"id": id, "label": lbl} for id, lbl in adapter.labels(adapter.entities())]
34
+ collection.insert(objs)
35
+ return collection
36
+
37
+
38
+ def search_ontology(adapter: BasicOntologyInterface, query: str, limit=10):
39
+ """
40
+ Search the ontology for the given query term.
41
+
42
+ Example:
43
+ >>> from oaklib import get_adapter
44
+ >>> adapter = get_adapter("sqlite:obo:uberon")
45
+ >>> terms = search_ontology(adapter, "manus")
46
+ >>> assert len(terms) > 1
47
+ >>> terms = search_ontology(adapter, "l~digit", limit=5)
48
+ >>> assert len(terms) == 5
49
+
50
+ Args:
51
+ adapter (BasicOntologyInterface): The ontology adapter.
52
+ query (str): The query term.
53
+ limit (int): The maximum number of search results to return.
54
+
55
+ Returns:
56
+ List[Tuple[str, str]]: A list of tuples containing ontology term IDs and labels.
57
+ """
58
+ scheme = adapter.resource.scheme
59
+ name = adapter.resource.slug
60
+ local_name = name.split(":")[-1]
61
+ handle = f"{scheme}:{name}"
62
+
63
+ collection = get_collection_for_adapter(handle, local_name)
64
+ with logfire.span("search_ontology {name} {query}", name=name, query=query):
65
+ print(f"Searching {scheme}:{name} for {query}")
66
+ qr = collection.search(query, limit=limit, index_name="llm")
67
+ objs = [(obj["id"], obj["label"]) for obj in qr.rows]
68
+ return objs
@@ -0,0 +1,23 @@
1
+ import tempfile
2
+ import requests
3
+ from pdfminer.high_level import extract_text
4
+
5
+
6
+ def extract_text_from_pdf(pdf_url: str) -> str:
7
+ """
8
+ Download and extract text from a PDF given its URL, using a temporary file.
9
+ """
10
+ response = requests.get(pdf_url)
11
+ if response.status_code != 200:
12
+ return "Error: Unable to retrieve PDF."
13
+
14
+ try:
15
+ with tempfile.NamedTemporaryFile(suffix=".pdf", delete=True) as temp_pdf:
16
+ temp_pdf.write(response.content)
17
+ temp_pdf.flush() # Ensure all data is written before reading
18
+
19
+ text = extract_text(temp_pdf.name)
20
+ return text.strip() if text else "Error: No text extracted from PDF."
21
+
22
+ except Exception as e:
23
+ return f"Error extracting PDF text: {e}"
@@ -0,0 +1,100 @@
1
+ import json
2
+ from pathlib import Path
3
+ from collections import defaultdict
4
+ import re
5
+
6
+
7
+ def parse_reportlog(log_path: str):
8
+ """Parse pytest-reportlog output into structured format."""
9
+ tests = defaultdict(dict)
10
+
11
+ with open(log_path) as f:
12
+ for line in f:
13
+ entry = json.loads(line)
14
+
15
+ # Only process TestReport entries
16
+ if entry.get('$report_type') != 'TestReport':
17
+ continue
18
+
19
+ nodeid = entry['nodeid']
20
+
21
+ # Store test outcome
22
+ if 'outcome' in entry:
23
+ tests[nodeid]['outcome'] = entry['outcome']
24
+
25
+ # Store duration
26
+ if 'duration' in entry:
27
+ tests[nodeid]['duration'] = entry['duration']
28
+
29
+ # Convert user_properties to dict
30
+ if 'user_properties' in entry:
31
+ props = dict(entry['user_properties'])
32
+ tests[nodeid]['properties'] = props
33
+
34
+ # Store parameters from nodeid
35
+ # Extract from something like: test_search_ontology[sqlite:obo:bfo-3D spatial-10-expected0]
36
+ if '[' in nodeid:
37
+ param_str = nodeid[nodeid.index('[') + 1:nodeid.rindex(']')]
38
+ # You might want to customize this parsing based on your parameter format
39
+ tests[nodeid]['parameters'] = param_str
40
+
41
+ return tests
42
+
43
+
44
+ def generate_markdown(tests):
45
+ """Convert test results to markdown documentation."""
46
+ md = []
47
+ md.append("# Test Results Documentation\n")
48
+
49
+ # Group tests by their base function name
50
+ test_groups = defaultdict(list)
51
+ for nodeid, data in tests.items():
52
+ # Split nodeid into parts: path::function[params]
53
+ base_name = nodeid.split('::')[1].split('[')[0] if '[' in nodeid else nodeid.split('::')[1]
54
+ test_groups[base_name].append((nodeid, data))
55
+
56
+ for base_name, group in test_groups.items():
57
+ md.append(f"## {base_name}\n")
58
+
59
+ # Create table for all test runs
60
+ md.append("### Test Runs\n")
61
+
62
+ # Headers: Parameters, Properties, Duration, Outcome
63
+ md.append('| Parameters | Properties | Duration (s) | Outcome |')
64
+ md.append('|------------|------------|-------------|---------|')
65
+
66
+ for nodeid, data in group:
67
+ # Extract parameters from nodeid
68
+ params = nodeid.split('[')[1].rstrip(']') if '[' in nodeid else ''
69
+
70
+ # Format properties
71
+ props = data.get('properties', {})
72
+ props_str = '; '.join(f"{k}: {v}" for k, v in props.items())
73
+
74
+ # Format duration
75
+ duration = f"{data.get('duration', 0):.3f}"
76
+
77
+ row = [
78
+ params,
79
+ props_str,
80
+ duration,
81
+ data.get('outcome', '')
82
+ ]
83
+
84
+ md.append('| ' + ' | '.join(str(cell) for cell in row) + ' |')
85
+
86
+ md.append('')
87
+ return '\n'.join(md)
88
+
89
+ # Example usage:
90
+ if __name__ == '__main__':
91
+ # Assume report.jsonl exists from running:
92
+ # pytest test_examples.py --report-log=report.jsonl
93
+
94
+ log_path = Path('report.jsonl')
95
+ tests = parse_reportlog(log_path)
96
+ markdown = generate_markdown(tests)
97
+
98
+ # Write markdown to file
99
+ with open('docs/unit_tests.md', 'w') as f:
100
+ f.write(markdown)
@@ -0,0 +1,238 @@
1
+ import re
2
+ from typing import Optional
3
+
4
+ import requests
5
+ from bs4 import BeautifulSoup
6
+
7
+ from aurelian.utils.doi_fetcher import DOIFetcher
8
+
9
+ BIOC_URL = "https://www.ncbi.nlm.nih.gov/research/bionlp/RESTful/pmcoa.cgi/BioC_xml/{pmid}/ascii"
10
+ PUBMED_EUTILS_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=pubmed&id={pmid}&retmode=xml"
11
+ EFETCH_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id={pmid}&retmode=xml"
12
+
13
+ DOI_PATTERN = r"/(10\.\d{4,9}/[\w\-.]+)"
14
+
15
+ doi_fetcher = DOIFetcher()
16
+
17
+
18
+ def extract_doi_from_url(url: str) -> Optional[str]:
19
+ """Extracts the DOI from a given journal URL.
20
+
21
+ Args:
22
+ url (str): The URL of the article.
23
+
24
+ Returns:
25
+ str: The extracted DOI if found, otherwise an empty string.
26
+
27
+ """
28
+ doi_match = re.search(DOI_PATTERN, url)
29
+ return doi_match.group(1) if doi_match else None
30
+
31
+
32
+ def doi_to_pmid(doi: str) -> Optional[str]:
33
+ """Converts a DOI to a PMID using the NCBI ID Converter API.
34
+
35
+ Args:
36
+ doi (str): The DOI to be converted.
37
+
38
+ Returns:
39
+ str: The corresponding PMID if found, otherwise an empty string.
40
+
41
+ """
42
+ API_URL = f"https://www.ncbi.nlm.nih.gov/pmc/utils/idconv/v1.0/?ids={doi}&format=json"
43
+ response = requests.get(API_URL).json()
44
+ records = response.get("records", [])
45
+ pmid = records[0].get("pmid", None) if records else None
46
+ return pmid
47
+
48
+
49
+ def get_doi_text(doi: str) -> str:
50
+ """Fetch the full text of an article using a DOI.
51
+
52
+ TODO: non pubmed sources
53
+
54
+ Example:
55
+ >>> doi = "10.1128/msystems.00045-18"
56
+ >>> full_text = get_doi_text(doi)
57
+ >>> assert "Populus Microbiome" in full_text
58
+
59
+ Args:
60
+ doi: The DOI of the article.
61
+
62
+ Returns:
63
+ The full text of the article if available, otherwise an empty string.
64
+
65
+ """
66
+ pmid = doi_to_pmid(doi)
67
+ if not pmid:
68
+ info = doi_fetcher.get_full_text(doi)
69
+ if info:
70
+ return info
71
+ else:
72
+ return f"PMID not found for {doi} and not available via unpaywall"
73
+ return get_pmid_text(pmid)
74
+
75
+
76
+ def get_pmid_from_pmcid(pmcid):
77
+ """Fetch the PMID from a PMC ID using the Entrez E-utilities `esummary`.
78
+
79
+ Example:
80
+ >>> pmcid = "PMC5048378"
81
+ >>> pmid = get_pmid_from_pmcid(pmcid)
82
+ >>> print(pmid)
83
+ 27629041
84
+
85
+ Args:
86
+ pmcid:
87
+
88
+ Returns:
89
+
90
+ """
91
+ if ":" in pmcid:
92
+ pmcid = pmcid.split(":")[1]
93
+ url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi"
94
+ params = {"db": "pmc", "id": pmcid.replace("PMC", ""), "retmode": "json"} # Remove "PMC" prefix if included
95
+
96
+ response = requests.get(url, params=params)
97
+ data = response.json()
98
+
99
+ # Extract PMID
100
+ try:
101
+ uid = data["result"]["uids"][0] # Extract the UID
102
+ article_ids = data["result"][uid]["articleids"] # Get article IDs
103
+ for item in article_ids:
104
+ if item["idtype"] == "pmid":
105
+ return item["value"]
106
+ except KeyError:
107
+ return "PMID not found"
108
+
109
+
110
+ def get_pmcid_text(pmcid: str) -> str:
111
+ """Fetch full text from PubMed Central Open Access BioC XML.
112
+
113
+ Example:
114
+ >>> pmcid = "PMC5048378"
115
+ >>> full_text = get_pmcid_text(pmcid)
116
+ >>> assert "integrated stress response (ISR)" in full_text
117
+
118
+ Args:
119
+ pmcid:
120
+
121
+ Returns:
122
+
123
+ """
124
+ pmid = get_pmid_from_pmcid(pmcid)
125
+ return get_pmid_text(pmid)
126
+
127
+
128
+ def get_pmid_text(pmid: str) -> str:
129
+ """Fetch full text from PubMed Central Open Access BioC XML.
130
+ If full text is not available, fallback to fetching the abstract from PubMed.
131
+
132
+ Example:
133
+ >>> pmid = "11"
134
+ >>> full_text = get_pmid_text(pmid)
135
+ >>> print(full_text)
136
+ Identification of adenylate cyclase-coupled beta-adrenergic receptors with radiolabeled beta-adrenergic antagonists.
137
+ <BLANKLINE>
138
+ No abstract available
139
+
140
+ Args:
141
+ pmid: PubMed ID of the article.
142
+
143
+ Returns:
144
+ The full text of the article if available, otherwise the abstract.
145
+
146
+ """
147
+ if ":" in pmid:
148
+ pmid = pmid.split(":")[1]
149
+ text = get_full_text_from_bioc(pmid)
150
+ if not text:
151
+ doi = pmid_to_doi(pmid)
152
+ if doi:
153
+ text = doi_fetcher.get_full_text(doi)
154
+ if not text:
155
+ text = get_abstract_from_pubmed(pmid)
156
+ return text
157
+
158
+ def pmid_to_doi(pmid: str) -> Optional[str]:
159
+ if ":" in pmid:
160
+ pmid = pmid.split(":")[1]
161
+ url = f"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=pubmed&id={pmid}&retmode=json"
162
+ response = requests.get(url)
163
+ data = response.json()
164
+
165
+ try:
166
+ article_info = data["result"][str(pmid)]
167
+ for aid in article_info["articleids"]:
168
+ if aid["idtype"] == "doi":
169
+ return aid["value"]
170
+ elocationid = article_info.get("elocationid", "")
171
+ if elocationid.startswith("10."): # DOI starts with "10."
172
+ return elocationid
173
+ else:
174
+ return None
175
+ except KeyError:
176
+ return None
177
+
178
+
179
+ def get_full_text_from_bioc(pmid: str) -> str:
180
+ """Fetch full text from PubMed Central Open Access BioC XML.
181
+
182
+ Example:
183
+ >>> pmid = "17299597"
184
+ >>> full_text = get_full_text_from_bioc(pmid)
185
+ >>> assert "Evolution of biological complexity." in full_text
186
+
187
+ Args:
188
+ pmid: PubMed ID of the article.
189
+
190
+ Returns:
191
+ The full text of the article if available, otherwise an empty string.
192
+
193
+ """
194
+ response = requests.get(BIOC_URL.format(pmid=pmid))
195
+
196
+ if response.status_code != 200:
197
+ return "" # Return empty string if request fails
198
+
199
+ soup = BeautifulSoup(response.text, "xml")
200
+
201
+ # Extract ONLY text from <text> tags within <passage>
202
+ text_sections = [text_tag.get_text() for text_tag in soup.find_all("text")]
203
+
204
+ full_text = "\n".join(text_sections).strip()
205
+ return full_text
206
+
207
+
208
+ def get_abstract_from_pubmed(pmid: str) -> str:
209
+ """Fetch the title and abstract of an article from PubMed using Entrez E-utilities `efetch`.
210
+
211
+ Example:
212
+ >>> pmid = "31653696"
213
+ >>> abstract = get_abstract_from_pubmed(pmid)
214
+ >>> assert "The apparent deglycase activity of DJ-1" in abstract
215
+
216
+ Args:
217
+ pmid: PubMed ID of the article.
218
+
219
+ Returns:
220
+ The title and abstract text if available, otherwise an empty string.
221
+
222
+ """
223
+ response = requests.get(EFETCH_URL.format(pmid=pmid))
224
+
225
+ if response.status_code != 200:
226
+ return ""
227
+
228
+ soup = BeautifulSoup(response.text, "xml")
229
+
230
+ # Extract title
231
+ title_tag = soup.find("ArticleTitle")
232
+ title = title_tag.get_text().strip() if title_tag else "No title available"
233
+
234
+ # Extract abstract (may contain multiple sections)
235
+ abstract_tags = soup.find_all("AbstractText")
236
+ abstract = "\n".join(tag.get_text().strip() for tag in abstract_tags) if abstract_tags else "No abstract available"
237
+
238
+ return f"{title}\n\n{abstract}"
@@ -0,0 +1,67 @@
1
+ import json
2
+ from pathlib import Path
3
+ from collections import defaultdict
4
+ import re
5
+ from typing import Iterator
6
+
7
+ import click
8
+
9
+
10
+ def report_md(log_path: str) -> str:
11
+ return '\n'.join(list(report_md_iter(log_path)))
12
+
13
+ def report_md_iter(log_path: str) -> Iterator[str]:
14
+ """
15
+ Parse pytest-reportlog output into structured format.
16
+
17
+ Args:
18
+ log_path:
19
+
20
+ Returns:
21
+
22
+ """
23
+
24
+ with open(log_path) as f:
25
+ outcome = None
26
+ duration = None
27
+ for line in f:
28
+ entry = json.loads(line)
29
+
30
+ # Only process TestReport entries
31
+ if entry.get('$report_type') != 'TestReport':
32
+ continue
33
+
34
+ nodeid = entry['nodeid']
35
+ outcome = entry.get('outcome')
36
+ duration = entry.get('duration')
37
+
38
+ if not outcome:
39
+ continue
40
+
41
+ yield f"## {nodeid}\n"
42
+
43
+
44
+
45
+ for p in entry.get('user_properties', []):
46
+ k = p[0]
47
+ v = p[1]
48
+
49
+ yield f"### {k}\n\n"
50
+ yield f"{v}\n"
51
+
52
+ yield "## Stats\n\n"
53
+ if outcome:
54
+ yield f"* Outcome: {outcome}\n"
55
+ if duration:
56
+ yield f"* Duration: {duration}\n"
57
+
58
+
59
+
60
+ @click.command()
61
+ @click.argument("log_path", type=click.Path(exists=True))
62
+ def main(log_path: str):
63
+ markdown = report_md(log_path)
64
+ print(markdown)
65
+
66
+ if __name__ == "__main__":
67
+ main()
@@ -0,0 +1,112 @@
1
+ from typing import Dict, Optional, List, Tuple
2
+
3
+ from aurelian.dependencies.workdir import WorkDir
4
+
5
+ MERGED_IMPORT_PATH = "_imports_.owl"
6
+
7
+ def run(cmd: str):
8
+ """
9
+ Run a command, raising an error if the command fails,
10
+ returning stdout
11
+
12
+ Args:
13
+ cmd:
14
+
15
+ Returns:
16
+
17
+ """
18
+ import subprocess
19
+ result = subprocess.run(cmd, shell=True, capture_output=True)
20
+ if result.returncode != 0:
21
+ stdout = result.stdout.decode()
22
+ stderr = result.stderr.decode()
23
+ raise Exception(f"Command failed: {cmd}\nError: {stderr}\nOutput: {stdout}")
24
+ return result.stdout.decode()
25
+
26
+ def parse_component_name(name: str) -> Tuple[str, Optional[str]]:
27
+ """
28
+ Parse file name
29
+
30
+ Example:
31
+
32
+ >>> parse_component_name("foo.owl")
33
+ ('foo', 'owl')
34
+ >>> parse_component_name("foo")
35
+ ('foo', None)
36
+
37
+
38
+ Args:
39
+ name:
40
+
41
+ Returns:
42
+
43
+ """
44
+ parts = name.split(".")
45
+ if len(parts) == 1:
46
+ return name, None
47
+ return ".".join(parts[:-1]), parts[-1]
48
+
49
+
50
+ def depends_on_csv(workdir: WorkDir, name: str) -> Optional[str]:
51
+ base, suffix = parse_component_name(name)
52
+ if not suffix:
53
+ suffix = "owl"
54
+ base = name
55
+ if suffix == "owl":
56
+ for d_suffix in ("tsv", "csv"):
57
+ d_name = f"{base}.{d_suffix}"
58
+ if workdir.check_file_exists(d_name):
59
+ return d_name
60
+ return None
61
+
62
+ def run_robot_template_command(workdir: WorkDir, template_path: str, prefix_map: Dict[str, str], output_path: Optional[str] = None, import_ontologies: Optional[List[str]] = None) -> str:
63
+ """
64
+ Generate a robot template command
65
+
66
+ Args:
67
+ workdir:
68
+ template_path:
69
+ prefix_map:
70
+ output_path:
71
+ import_ontologies:
72
+
73
+ Returns:
74
+
75
+ """
76
+ if output_path is None:
77
+ output_path = template_path.replace(".csv", ".owl")
78
+ prefixes = " ".join([f"--prefix '{k}: {v}'" for k, v in prefix_map.items()])
79
+ if not import_ontologies:
80
+ import_ontologies = []
81
+ import_owls = []
82
+ for import_ontology in import_ontologies:
83
+ local_name, suffix = parse_component_name(import_ontology)
84
+ if suffix == "owl":
85
+ import_ontology_owl = import_ontology
86
+ if not workdir.check_file_exists(import_ontology_owl):
87
+ depends_on = depends_on_csv(workdir, import_ontology_owl)
88
+ if not workdir.check_file_exists(depends_on):
89
+ raise Exception(f"Cannot make owl file {import_ontology_owl} as no {depends_on}")
90
+ run_robot_template_command(
91
+ workdir,
92
+ depends_on,
93
+ prefix_map=prefix_map,
94
+ output_path=import_ontology_owl,
95
+ )
96
+ else:
97
+ if suffix:
98
+ import_ontology_owl = import_ontology.replace(suffix, "owl")
99
+ else:
100
+ import_ontology_owl = import_ontology + ".owl"
101
+ run_robot_template_command(workdir, import_ontology, prefix_map=prefix_map, output_path=import_ontology_owl)
102
+ import_owls.append(import_ontology_owl)
103
+ if import_owls:
104
+ input_opts = [f"--input {owl}" for owl in import_owls]
105
+ cmd = f"cd {workdir.location} && robot merge {' '.join(input_opts)} --output {MERGED_IMPORT_PATH}"
106
+ run(cmd)
107
+ import_ontology_opt = f"--input {MERGED_IMPORT_PATH}"
108
+ else:
109
+ import_ontology_opt = ""
110
+ cmd = f"cd {workdir.location} && robot template {import_ontology_opt} --template {template_path} {prefixes} reason --output {output_path}"
111
+ run(cmd)
112
+ return output_path