aurelian 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (254) hide show
  1. aurelian/__init__.py +9 -0
  2. aurelian/agents/__init__.py +0 -0
  3. aurelian/agents/amigo/__init__.py +3 -0
  4. aurelian/agents/amigo/amigo_agent.py +77 -0
  5. aurelian/agents/amigo/amigo_config.py +85 -0
  6. aurelian/agents/amigo/amigo_evals.py +73 -0
  7. aurelian/agents/amigo/amigo_gradio.py +52 -0
  8. aurelian/agents/amigo/amigo_mcp.py +152 -0
  9. aurelian/agents/amigo/amigo_tools.py +152 -0
  10. aurelian/agents/biblio/__init__.py +42 -0
  11. aurelian/agents/biblio/biblio_agent.py +94 -0
  12. aurelian/agents/biblio/biblio_config.py +40 -0
  13. aurelian/agents/biblio/biblio_gradio.py +67 -0
  14. aurelian/agents/biblio/biblio_mcp.py +115 -0
  15. aurelian/agents/biblio/biblio_tools.py +164 -0
  16. aurelian/agents/biblio_agent.py +46 -0
  17. aurelian/agents/checklist/__init__.py +44 -0
  18. aurelian/agents/checklist/checklist_agent.py +85 -0
  19. aurelian/agents/checklist/checklist_config.py +28 -0
  20. aurelian/agents/checklist/checklist_gradio.py +70 -0
  21. aurelian/agents/checklist/checklist_mcp.py +86 -0
  22. aurelian/agents/checklist/checklist_tools.py +141 -0
  23. aurelian/agents/checklist/content/checklists.yaml +7 -0
  24. aurelian/agents/checklist/content/streams.csv +136 -0
  25. aurelian/agents/checklist_agent.py +40 -0
  26. aurelian/agents/chemistry/__init__.py +3 -0
  27. aurelian/agents/chemistry/chemistry_agent.py +46 -0
  28. aurelian/agents/chemistry/chemistry_config.py +71 -0
  29. aurelian/agents/chemistry/chemistry_evals.py +79 -0
  30. aurelian/agents/chemistry/chemistry_gradio.py +50 -0
  31. aurelian/agents/chemistry/chemistry_mcp.py +120 -0
  32. aurelian/agents/chemistry/chemistry_tools.py +121 -0
  33. aurelian/agents/chemistry/image_agent.py +15 -0
  34. aurelian/agents/d4d/__init__.py +30 -0
  35. aurelian/agents/d4d/d4d_agent.py +72 -0
  36. aurelian/agents/d4d/d4d_config.py +46 -0
  37. aurelian/agents/d4d/d4d_gradio.py +58 -0
  38. aurelian/agents/d4d/d4d_mcp.py +71 -0
  39. aurelian/agents/d4d/d4d_tools.py +157 -0
  40. aurelian/agents/d4d_agent.py +64 -0
  41. aurelian/agents/diagnosis/__init__.py +33 -0
  42. aurelian/agents/diagnosis/diagnosis_agent.py +53 -0
  43. aurelian/agents/diagnosis/diagnosis_config.py +48 -0
  44. aurelian/agents/diagnosis/diagnosis_evals.py +76 -0
  45. aurelian/agents/diagnosis/diagnosis_gradio.py +52 -0
  46. aurelian/agents/diagnosis/diagnosis_mcp.py +141 -0
  47. aurelian/agents/diagnosis/diagnosis_tools.py +204 -0
  48. aurelian/agents/diagnosis_agent.py +28 -0
  49. aurelian/agents/draw/__init__.py +3 -0
  50. aurelian/agents/draw/draw_agent.py +39 -0
  51. aurelian/agents/draw/draw_config.py +26 -0
  52. aurelian/agents/draw/draw_gradio.py +50 -0
  53. aurelian/agents/draw/draw_mcp.py +94 -0
  54. aurelian/agents/draw/draw_tools.py +100 -0
  55. aurelian/agents/draw/judge_agent.py +18 -0
  56. aurelian/agents/filesystem/__init__.py +0 -0
  57. aurelian/agents/filesystem/filesystem_config.py +27 -0
  58. aurelian/agents/filesystem/filesystem_gradio.py +49 -0
  59. aurelian/agents/filesystem/filesystem_mcp.py +89 -0
  60. aurelian/agents/filesystem/filesystem_tools.py +95 -0
  61. aurelian/agents/filesystem/py.typed +0 -0
  62. aurelian/agents/github/__init__.py +0 -0
  63. aurelian/agents/github/github_agent.py +83 -0
  64. aurelian/agents/github/github_cli.py +248 -0
  65. aurelian/agents/github/github_config.py +22 -0
  66. aurelian/agents/github/github_gradio.py +152 -0
  67. aurelian/agents/github/github_mcp.py +252 -0
  68. aurelian/agents/github/github_tools.py +408 -0
  69. aurelian/agents/github/github_tools.py.tmp +413 -0
  70. aurelian/agents/goann/__init__.py +13 -0
  71. aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines.md +1000 -0
  72. aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines.pdf +0 -0
  73. aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines_Paper.md +693 -0
  74. aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines_Paper.pdf +0 -0
  75. aurelian/agents/goann/goann_agent.py +90 -0
  76. aurelian/agents/goann/goann_config.py +90 -0
  77. aurelian/agents/goann/goann_evals.py +104 -0
  78. aurelian/agents/goann/goann_gradio.py +62 -0
  79. aurelian/agents/goann/goann_mcp.py +0 -0
  80. aurelian/agents/goann/goann_tools.py +65 -0
  81. aurelian/agents/gocam/__init__.py +43 -0
  82. aurelian/agents/gocam/documents/DNA-binding transcription factor activity annotation guidelines.docx +0 -0
  83. aurelian/agents/gocam/documents/DNA-binding transcription factor activity annotation guidelines.pdf +0 -0
  84. aurelian/agents/gocam/documents/DNA-binding_transcription_factor_activity_annotation_guidelines.md +100 -0
  85. aurelian/agents/gocam/documents/E3 ubiquitin ligases.docx +0 -0
  86. aurelian/agents/gocam/documents/E3 ubiquitin ligases.pdf +0 -0
  87. aurelian/agents/gocam/documents/E3_ubiquitin_ligases.md +134 -0
  88. aurelian/agents/gocam/documents/GO-CAM annotation guidelines README.docx +0 -0
  89. aurelian/agents/gocam/documents/GO-CAM annotation guidelines README.pdf +0 -0
  90. aurelian/agents/gocam/documents/GO-CAM modelling guidelines TO DO.docx +0 -0
  91. aurelian/agents/gocam/documents/GO-CAM modelling guidelines TO DO.pdf +0 -0
  92. aurelian/agents/gocam/documents/GO-CAM_annotation_guidelines_README.md +1 -0
  93. aurelian/agents/gocam/documents/GO-CAM_modelling_guidelines_TO_DO.md +3 -0
  94. aurelian/agents/gocam/documents/How to annotate complexes in GO-CAM.docx +0 -0
  95. aurelian/agents/gocam/documents/How to annotate complexes in GO-CAM.pdf +0 -0
  96. aurelian/agents/gocam/documents/How to annotate molecular adaptors.docx +0 -0
  97. aurelian/agents/gocam/documents/How to annotate molecular adaptors.pdf +0 -0
  98. aurelian/agents/gocam/documents/How to annotate sequestering proteins.docx +0 -0
  99. aurelian/agents/gocam/documents/How to annotate sequestering proteins.pdf +0 -0
  100. aurelian/agents/gocam/documents/How_to_annotate_complexes_in_GO-CAM.md +29 -0
  101. aurelian/agents/gocam/documents/How_to_annotate_molecular_adaptors.md +31 -0
  102. aurelian/agents/gocam/documents/How_to_annotate_sequestering_proteins.md +42 -0
  103. aurelian/agents/gocam/documents/Molecular adaptor activity.docx +0 -0
  104. aurelian/agents/gocam/documents/Molecular adaptor activity.pdf +0 -0
  105. aurelian/agents/gocam/documents/Molecular carrier activity.docx +0 -0
  106. aurelian/agents/gocam/documents/Molecular carrier activity.pdf +0 -0
  107. aurelian/agents/gocam/documents/Molecular_adaptor_activity.md +51 -0
  108. aurelian/agents/gocam/documents/Molecular_carrier_activity.md +41 -0
  109. aurelian/agents/gocam/documents/Protein sequestering activity.docx +0 -0
  110. aurelian/agents/gocam/documents/Protein sequestering activity.pdf +0 -0
  111. aurelian/agents/gocam/documents/Protein_sequestering_activity.md +50 -0
  112. aurelian/agents/gocam/documents/Signaling receptor activity annotation guidelines.docx +0 -0
  113. aurelian/agents/gocam/documents/Signaling receptor activity annotation guidelines.pdf +0 -0
  114. aurelian/agents/gocam/documents/Signaling_receptor_activity_annotation_guidelines.md +187 -0
  115. aurelian/agents/gocam/documents/Transcription coregulator activity.docx +0 -0
  116. aurelian/agents/gocam/documents/Transcription coregulator activity.pdf +0 -0
  117. aurelian/agents/gocam/documents/Transcription_coregulator_activity.md +36 -0
  118. aurelian/agents/gocam/documents/Transporter activity annotation annotation guidelines.docx +0 -0
  119. aurelian/agents/gocam/documents/Transporter activity annotation annotation guidelines.pdf +0 -0
  120. aurelian/agents/gocam/documents/Transporter_activity_annotation_annotation_guidelines.md +43 -0
  121. Regulatory Processes in GO-CAM.docx +0 -0
  122. Regulatory Processes in GO-CAM.pdf +0 -0
  123. aurelian/agents/gocam/documents/WIP_-_Regulation_and_Regulatory_Processes_in_GO-CAM.md +31 -0
  124. aurelian/agents/gocam/documents/md/DNA-binding_transcription_factor_activity_annotation_guidelines.md +131 -0
  125. aurelian/agents/gocam/documents/md/E3_ubiquitin_ligases.md +166 -0
  126. aurelian/agents/gocam/documents/md/GO-CAM_annotation_guidelines_README.md +1 -0
  127. aurelian/agents/gocam/documents/md/GO-CAM_modelling_guidelines_TO_DO.md +5 -0
  128. aurelian/agents/gocam/documents/md/How_to_annotate_complexes_in_GO-CAM.md +28 -0
  129. aurelian/agents/gocam/documents/md/How_to_annotate_molecular_adaptors.md +19 -0
  130. aurelian/agents/gocam/documents/md/How_to_annotate_sequestering_proteins.md +38 -0
  131. aurelian/agents/gocam/documents/md/Molecular_adaptor_activity.md +52 -0
  132. aurelian/agents/gocam/documents/md/Molecular_carrier_activity.md +59 -0
  133. aurelian/agents/gocam/documents/md/Protein_sequestering_activity.md +52 -0
  134. aurelian/agents/gocam/documents/md/Signaling_receptor_activity_annotation_guidelines.md +271 -0
  135. aurelian/agents/gocam/documents/md/Transcription_coregulator_activity.md +54 -0
  136. aurelian/agents/gocam/documents/md/Transporter_activity_annotation_annotation_guidelines.md +38 -0
  137. aurelian/agents/gocam/documents/md/WIP_-_Regulation_and_Regulatory_Processes_in_GO-CAM.md +39 -0
  138. aurelian/agents/gocam/documents/pandoc_md/Signaling_receptor_activity_annotation_guidelines.md +334 -0
  139. aurelian/agents/gocam/gocam_agent.py +240 -0
  140. aurelian/agents/gocam/gocam_config.py +85 -0
  141. aurelian/agents/gocam/gocam_curator_agent.py +46 -0
  142. aurelian/agents/gocam/gocam_evals.py +67 -0
  143. aurelian/agents/gocam/gocam_gradio.py +89 -0
  144. aurelian/agents/gocam/gocam_mcp.py +224 -0
  145. aurelian/agents/gocam/gocam_tools.py +294 -0
  146. aurelian/agents/linkml/__init__.py +0 -0
  147. aurelian/agents/linkml/linkml_agent.py +62 -0
  148. aurelian/agents/linkml/linkml_config.py +48 -0
  149. aurelian/agents/linkml/linkml_evals.py +66 -0
  150. aurelian/agents/linkml/linkml_gradio.py +45 -0
  151. aurelian/agents/linkml/linkml_mcp.py +186 -0
  152. aurelian/agents/linkml/linkml_tools.py +102 -0
  153. aurelian/agents/literature/__init__.py +3 -0
  154. aurelian/agents/literature/literature_agent.py +55 -0
  155. aurelian/agents/literature/literature_config.py +35 -0
  156. aurelian/agents/literature/literature_gradio.py +52 -0
  157. aurelian/agents/literature/literature_mcp.py +174 -0
  158. aurelian/agents/literature/literature_tools.py +182 -0
  159. aurelian/agents/monarch/__init__.py +25 -0
  160. aurelian/agents/monarch/monarch_agent.py +44 -0
  161. aurelian/agents/monarch/monarch_config.py +45 -0
  162. aurelian/agents/monarch/monarch_gradio.py +51 -0
  163. aurelian/agents/monarch/monarch_mcp.py +65 -0
  164. aurelian/agents/monarch/monarch_tools.py +113 -0
  165. aurelian/agents/oak/__init__.py +0 -0
  166. aurelian/agents/oak/oak_config.py +27 -0
  167. aurelian/agents/oak/oak_gradio.py +57 -0
  168. aurelian/agents/ontology_mapper/__init__.py +31 -0
  169. aurelian/agents/ontology_mapper/ontology_mapper_agent.py +56 -0
  170. aurelian/agents/ontology_mapper/ontology_mapper_config.py +50 -0
  171. aurelian/agents/ontology_mapper/ontology_mapper_evals.py +108 -0
  172. aurelian/agents/ontology_mapper/ontology_mapper_gradio.py +58 -0
  173. aurelian/agents/ontology_mapper/ontology_mapper_mcp.py +81 -0
  174. aurelian/agents/ontology_mapper/ontology_mapper_tools.py +147 -0
  175. aurelian/agents/phenopackets/__init__.py +3 -0
  176. aurelian/agents/phenopackets/phenopackets_agent.py +58 -0
  177. aurelian/agents/phenopackets/phenopackets_config.py +72 -0
  178. aurelian/agents/phenopackets/phenopackets_evals.py +99 -0
  179. aurelian/agents/phenopackets/phenopackets_gradio.py +55 -0
  180. aurelian/agents/phenopackets/phenopackets_mcp.py +178 -0
  181. aurelian/agents/phenopackets/phenopackets_tools.py +127 -0
  182. aurelian/agents/rag/__init__.py +40 -0
  183. aurelian/agents/rag/rag_agent.py +83 -0
  184. aurelian/agents/rag/rag_config.py +80 -0
  185. aurelian/agents/rag/rag_gradio.py +67 -0
  186. aurelian/agents/rag/rag_mcp.py +107 -0
  187. aurelian/agents/rag/rag_tools.py +189 -0
  188. aurelian/agents/rag_agent.py +54 -0
  189. aurelian/agents/robot/__init__.py +0 -0
  190. aurelian/agents/robot/assets/__init__.py +3 -0
  191. aurelian/agents/robot/assets/template.md +384 -0
  192. aurelian/agents/robot/robot_config.py +25 -0
  193. aurelian/agents/robot/robot_gradio.py +46 -0
  194. aurelian/agents/robot/robot_mcp.py +100 -0
  195. aurelian/agents/robot/robot_ontology_agent.py +139 -0
  196. aurelian/agents/robot/robot_tools.py +50 -0
  197. aurelian/agents/talisman/__init__.py +3 -0
  198. aurelian/agents/talisman/talisman_agent.py +126 -0
  199. aurelian/agents/talisman/talisman_config.py +66 -0
  200. aurelian/agents/talisman/talisman_gradio.py +50 -0
  201. aurelian/agents/talisman/talisman_mcp.py +168 -0
  202. aurelian/agents/talisman/talisman_tools.py +720 -0
  203. aurelian/agents/ubergraph/__init__.py +40 -0
  204. aurelian/agents/ubergraph/ubergraph_agent.py +71 -0
  205. aurelian/agents/ubergraph/ubergraph_config.py +79 -0
  206. aurelian/agents/ubergraph/ubergraph_gradio.py +48 -0
  207. aurelian/agents/ubergraph/ubergraph_mcp.py +69 -0
  208. aurelian/agents/ubergraph/ubergraph_tools.py +118 -0
  209. aurelian/agents/uniprot/__init__.py +37 -0
  210. aurelian/agents/uniprot/uniprot_agent.py +43 -0
  211. aurelian/agents/uniprot/uniprot_config.py +43 -0
  212. aurelian/agents/uniprot/uniprot_evals.py +99 -0
  213. aurelian/agents/uniprot/uniprot_gradio.py +48 -0
  214. aurelian/agents/uniprot/uniprot_mcp.py +168 -0
  215. aurelian/agents/uniprot/uniprot_tools.py +136 -0
  216. aurelian/agents/web/__init__.py +0 -0
  217. aurelian/agents/web/web_config.py +27 -0
  218. aurelian/agents/web/web_gradio.py +48 -0
  219. aurelian/agents/web/web_mcp.py +50 -0
  220. aurelian/agents/web/web_tools.py +108 -0
  221. aurelian/chat.py +23 -0
  222. aurelian/cli.py +800 -0
  223. aurelian/dependencies/__init__.py +0 -0
  224. aurelian/dependencies/workdir.py +78 -0
  225. aurelian/mcp/__init__.py +0 -0
  226. aurelian/mcp/amigo_mcp_test.py +86 -0
  227. aurelian/mcp/config_generator.py +123 -0
  228. aurelian/mcp/example_config.json +43 -0
  229. aurelian/mcp/generate_sample_config.py +37 -0
  230. aurelian/mcp/gocam_mcp_test.py +126 -0
  231. aurelian/mcp/linkml_mcp_tools.py +190 -0
  232. aurelian/mcp/mcp_discovery.py +87 -0
  233. aurelian/mcp/mcp_test.py +31 -0
  234. aurelian/mcp/phenopackets_mcp_test.py +103 -0
  235. aurelian/tools/__init__.py +0 -0
  236. aurelian/tools/web/__init__.py +0 -0
  237. aurelian/tools/web/url_download.py +51 -0
  238. aurelian/utils/__init__.py +0 -0
  239. aurelian/utils/async_utils.py +15 -0
  240. aurelian/utils/data_utils.py +32 -0
  241. aurelian/utils/documentation_manager.py +59 -0
  242. aurelian/utils/doi_fetcher.py +238 -0
  243. aurelian/utils/ontology_utils.py +68 -0
  244. aurelian/utils/pdf_fetcher.py +23 -0
  245. aurelian/utils/process_logs.py +100 -0
  246. aurelian/utils/pubmed_utils.py +238 -0
  247. aurelian/utils/pytest_report_to_markdown.py +67 -0
  248. aurelian/utils/robot_ontology_utils.py +112 -0
  249. aurelian/utils/search_utils.py +95 -0
  250. aurelian-0.3.2.dist-info/LICENSE +22 -0
  251. aurelian-0.3.2.dist-info/METADATA +105 -0
  252. aurelian-0.3.2.dist-info/RECORD +254 -0
  253. aurelian-0.3.2.dist-info/WHEEL +4 -0
  254. aurelian-0.3.2.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,693 @@
1
+ Contents lists available at ScienceDirect
2
+
3
+ BBA - Gene Regulatory Mechanisms
4
+
5
+ journal homepage: www.elsevier.com/locate/bbagrm
6
+
7
+ Gene Ontology representation for transcription factor functions
8
+
9
+ Pascale Gaudet a, *, Colin Logie b, Ruth C. Lovering c, Martin Kuiper d, Astrid Lægreid e,
10
+ Paul D. Thomas f
11
+ a Swiss-Prot group, SIB Swiss Institute of Bioinformatics, 1 Rue Michel-Servet, 1211 Gen`eve, Switzerland
12
+ b Molecular Biology Department, Faculty of Science, Radboud University, PO box 9101, 6500HB Nijmegen, the Netherlands
13
+ c Functional Gene Annotation, Preclinical and Fundamental Science, UCL Institute of Cardiovascular Science, University College London, London, UK
14
+ d Department of Biology, Norwegian University of Science and Technology, Trondheim, Norway
15
+ e Department of Clinical and Molecular Medicine, Norwegian University of Science and Technology, Trondheim, Norway
16
+ f Division of Bioinformatics, Department of Preventive Medicine, University of Southern California, Los Angeles, CA, USA
17
+
18
+ A R T I C L E I N F O
19
+
20
+ A B S T R A C T
21
+
22
+ Keywords:
23
+ Transcription
24
+ Gene Ontology
25
+ Biological databases
26
+ Biocuration
27
+
28
+ Transcription plays a central role in defining the identity and functionalities of cells, as well as in their responses
29
+ to changes in the cellular environment. The Gene Ontology (GO) provides a rigorously defined set of concepts
30
+ that describe the functions of gene products. A GO annotation is a statement about the function of a particular
31
+ gene product, represented as an association between a gene product and the biological concept a GO term de-
32
+ fines. Critically, each GO annotation is based on traceable scientific evidence. Here, we describe the different GO
33
+ terms that are associated with proteins involved in transcription and its regulation, focusing on the standard of
34
+ evidence required to support these associations. This article is intended to help users of GO annotations un-
35
+ derstand how to interpret the annotations and can contribute to the consistency of GO annotations. We distin-
36
+ guish between three classes of activities involved in transcription or directly regulating it - general transcription
37
+ factors, DNA-binding transcription factors, and transcription co-regulators.
38
+
39
+ 1. Introduction
40
+
41
+ The Gene Ontology (GO) develops a computational model of bio-
42
+ logical systems, ranging from the molecular to the organism level, across
43
+ all species in the tree of life. GO aims to provide a comprehensive rep-
44
+ resentation of the current scientific knowledge about the functions of
45
+ gene products, namely, proteins and non-coding RNA molecules [1,2].
46
+ GO is organized in three aspects. GO Molecular Functions (MF) describe
47
+ activities that occur at the molecular level, such as “DNA binding tran-
48
+ scription factor activity” or “histone deacetylase activity”. Biological
49
+ Processes (BP) represent the larger processes or ‘biological programs’
50
+ accomplished by multiple molecular activities. Examples of broad bio-
51
+ logical process terms are “transcription” or “signal transduction”.
52
+ Cellular Components (CC) are the cellular structures in which a gene
53
+ product performs a function, either cellular compartments (e.g., “nu-
54
+ cleus” or “chromatin”), or stable macromolecular complexes of which
55
+ they are parts (e.g., “RNA polymerase II”). Together, annotations of a
56
+ gene to terms from each of those aspects describe what specific function
57
+ a gene product plays in a process and where this activity occurs in the
58
+
59
+ cell. Ideally every gene product should have an annotation from each of
60
+ the three aspects of GO.
61
+
62
+ The specific genes expressed in a given cell define the identity and
63
+ functionalities of that cell. Regulation of transcription is highly complex
64
+ and leads to differential gene expression in specific cells or under spe-
65
+ cific conditions. In human cells, it has been estimated that several
66
+ thousand proteins participate in gene expression and its regulation,
67
+ directly or indirectly [3] (Velthuijs et al. BBAGRM-D-21-00020 this
68
+ issue). This includes the general transcription machinery, the factors
69
+ that make the chromatin more or less accessible, specific DNA-binding
70
+ transcription factors, and the signaling molecules that regulate the ac-
71
+ tivity of all those proteins. This complexity is difficult to accurately
72
+ represent in ontological form. Tripathi et al. [4] redesigned that part of
73
+ the ontology in 2013 to define precise molecular functions for the
74
+ various proteins involved in transcription and its regulation. Nearly 10
75
+ years after its implementation, we had to acknowledge that this
76
+ framework was too complex and difficult to navigate, leading to
77
+ inconsistent annotations and thus poorly serving the user community.
78
+ The work described here was also motivated by the https://www.gr
79
+
80
+ * Corresponding author.
81
+
82
+ E-mail address: pascale.gaudet@sib.swiss (P. Gaudet).
83
+
84
+ https://doi.org/10.1016/j.bbagrm.2021.194752
85
+ Received 6 January 2021; Received in revised form 24 August 2021; Accepted 25 August 2021
86
+
87
+ BBA-GeneRegulatoryMechanisms1864(2021)194752Availableonline28August20211874-9399/©2021TheAuthor(s).PublishedbyElsevierB.V.ThisisanopenaccessarticleundertheCCBYlicense(http://creativecommons.org/licenses/by/4.0/). P. Gaudet et al.
88
+
89
+ Fig. 1. Transcription regulator activity branches of the Gene Ontology. (a) Graphical representation of the placement of the parent terms for transcription regulator
90
+ molecular functions. Black headers correspond to MF and cyan headers to BP terms. (b) Transcription regulators are dbTF and coTFs. The general transcription
91
+ initiation factors play a direct role in transcription. Top-level terms of each branch are highlighted in blue.
92
+
93
+ eekc.org/ GREEKC consortium, whose goals include curation tools
94
+ development, reengineering of ontologies, development of curation
95
+ guidelines and text mining tools, developing platforms to analyze and
96
+ render the molecular logic of transcription regulatory networks for
97
+ which a robust infrastructure is needed. Therefore, we thoroughly
98
+ reviewed the Gene Ontology representation of molecular activities
99
+ relevant to transcription, with a simpler and more pragmatic approach,
100
+ more aligned with available experimental data.
101
+
102
+ We have revised the GO MF terms representing the activities of
103
+ proteins involved in transcription, with the input from domain experts.
104
+ In addition to RNA polymerase, we defined three different types of ac-
105
+ tivities that take place on the DNA to mediate or regulate transcription:
106
+ general transcription factors (GTFs), DNA-binding transcription factors
107
+ (dbTFs), and transcription coregulators (coTFs).
108
+
109
+ Here we present the annotation approach recommended by the GO
110
+ consortium [5], applied to the recent refactoring of the transcription
111
+
112
+ domain of GO. This approach aims to 1) help biocurators – annotation
113
+ producers - interpret published data and correctly assign the MFs terms
114
+ for GTF, dbTF, or coTF to a protein, and 2) help users understand how
115
+ the data is generated and how to interpret them. The annotation of
116
+ factors involved in transcription and its regulation is challenging for
117
+ multiple reasons. Contrary to other molecular functions, for example
118
+ enzymes, where one protein or a well-defined complex catalyses a pre-
119
+ cise reaction, the measurable output of transcription activities is the
120
+ result of multiple nearly simultaneous activities of GTF, dbTF, coTF, as
121
+ well as RNA polymerase, hence, individual activities can be hard to
122
+ distinguish experimentally. Moreover, these factors often form large
123
+ complexes, such that the level of resolution of the experimental setup is
124
+ essential to determine the precise activity of any given protein. Older
125
+ experimental methods often did not provide enough details, leading to
126
+ inaccurate classifications of certain proteins. In addition, researchers use
127
+ “transcription factor” loosely, at times meaning GTF, dbTF, or coTF. This
128
+
129
+ BBA-GeneRegulatoryMechanisms1864(2021)1947522 P. Gaudet et al.
130
+
131
+ Fig. 2. DNA binding branch of the Gene Ontology. This part of the Molecular Function (MF) ontology describes DNA binding. (a) Graphical representation of the
132
+ placement of the terms describing sequence-specific promoter binding. (b) Hierarchical view of the sequence-specific transcription regulatory region binding terms.
133
+
134
+ complicates the annotation process and necessitates solid expertise for
135
+ correct interpretation of the data. The experimental data itself is difficult
136
+ to parse for unambiguous assignment of a function to a protein: typi-
137
+ cally, a single experiment is insufficient for accurately determining the
138
+ function of these proteins, thus, interpretation of experimental results
139
+ that investigate dbTFs must rely on pre-existing knowledge. Also, many
140
+ proteins presumed to function as dbTFs have never been experimentally
141
+ demonstrated to bind DNA, but their role is indirectly inferred by the
142
+ presence of known specific DNA-binding domains and in some cases,
143
+ evidence of an effect on the transcription of putative direct target genes.
144
+ To add to the complexity, the presence of a DNA-binding domain in a
145
+ protein does not always imply that the protein functions as a dbTF [6].
146
+
147
+ 2. GO description of molecular functions relevant for
148
+ transcription
149
+
150
+ We distinguish between three types of activities involved in tran-
151
+ scription or directly regulating it: general transcription factors
152
+ (GO:0140223), DNA-binding transcription factors (GO:0003700), and
153
+ transcription co-regulators (GO:0003712). The general transcription
154
+ initiation factor activity term and its descendants describe the activ-
155
+ ities of general transcription initiation factors for RNA polymerase I, II
156
+ and III, which play a direct role in the biological process of transcription
157
+ at the core promoter (Sant et al., BBAGRM-D-21-00014 this issue). In
158
+ contrast, the GO:0140110 transcription regulator activity branch
159
+ describes the activities of transcription regulators: dbTF and coTFs, that
160
+
161
+ BBA-GeneRegulatoryMechanisms1864(2021)1947523 P. Gaudet et al.
162
+
163
+ act at any type of cis-regulatory module (Fig. 1). DNA-binding tran-
164
+ scription factors are adaptors that bind chromatin at specific genomic
165
+ addresses to coordinately regulate the expression of genes sets. This is
166
+ encoded in the ontology via links between the DNA-binding transcrip-
167
+ tion factor activity term and its descendants and to their counterpart
168
+ branch of the MF ontology describing DNA binding. The GO:0000976
169
+ transcription regulatory region sequence-specific DNA-binding sub-tree
170
+ of GO includes terms describing specific regulatory regions, such as the
171
+ core promoter (including the TATA box and the transcription start site),
172
+ cis-regulatory regions (bound by dbTFs), and specific types of cis-
173
+ regulatory motifs (such as E-box and N-box). An overview of the GO
174
+ structure for DNA binding activities is shown in Fig. 2. The definitions
175
+ and placement of GO terms in the ontology can be viewed in the AmiGO
176
+ [7,8]; http://amigo.geneontology.org/amigo, and QuickGO [9];
177
+ https://www.ebi.ac.uk/QuickGO/ browsers.
178
+
179
+ 3. Strategy for annotating transcription-associated activities
180
+
181
+ GO terms are associated with gene products based on two general
182
+ approaches: from experimental data and from sequence inferences [10].
183
+ The GO database has a total of 8 million annotations, about 7% of which
184
+ are to human gene products. For human, there are >915,000 annota-
185
+ tions derived from experimental data (GO release 2020-10-10 obtained
186
+ from http://amigo.geneontology.org). Sequence inference methods
187
+ provide more than 106,000 annotations for human proteins based on
188
+ phylogenetic relationships (65,000 annotations) [11]; protein domains
189
+ (6730 annotations) [12]; and Ensembl orthology predictions (35,000
190
+ annotations) [13]. The next sections describe the annotation of the
191
+ different types of proteins involved in transcription and its regulation.
192
+
193
+ 3.1. Transcription activity annotations supported by experimental data2.
194
+
195
+ The following annotation approach follows the recommendations of
196
+ the GO consortium. First and foremost, it is necessary to use as much
197
+ information as possible, rather than annotating articles individually and
198
+ out of the wider context. When extracting information, a gene-by-gene
199
+ or pathway-by-pathway approach is considered best practice [5].
200
+ Reviewing a range of articles ensures that the annotations closely reflect
201
+ the current state of knowledge. Ideally, the corpus of annotations for a
202
+ gene product should be based on multiple observations from different
203
+ articles by independent research groups. Five steps used to determine
204
+ whether a gene can be annotated as a transcriptional regulator are
205
+ outlined in Fig. 3. Appendix 1 provides examples of each of those
206
+ different activities.
207
+
208
+ 1. Identify the starting hypothesis: are the authors characterizing
209
+ a transcription regulator? Scientific models are built by adding
210
+ new data to the existing corpus of evidence. New data can either
211
+ support or contradict existing models. The Introduction section of
212
+ research articles can be used to understand what prior knowledge the
213
+ article builds on, and which aspect of the existing model or what new
214
+ model the authors are assessing. The hypothesis tested by the authors
215
+ is essential to choose a GO term, with the caveat that inconsistent
216
+ terminology has been used in transcription research articles and
217
+ therefore may not always be aligned with the GO term categories.
218
+ 2. Determine whether knowledge from specific protein domains or
219
+ characterized orthologs support the hypothesis. The presence of
220
+ specific domains and the existence of well-characterized orthologs
221
+ can provide useful support for interpreting experimental data. Note
222
+ that this data should be used with caution. For instance, ARID-, AT
223
+ hook-, and some HMG-, GATA-, zinc finger domain-containing pro-
224
+ teins and proteins binding structural features such as the DNA minor
225
+ groove rarely bind DNA in a sequence-specific manner; some of them
226
+ merely function to increase the avidity or stability of a transcription
227
+ factor complex and its associated co-factors and do not - in their own
228
+ capacity - provide the specific genomic address to guide transcription
229
+
230
+ Fig. 3. Five steps to transcription activity annotation. The five key steps to
231
+ associating a transcription MF term with a protein starts with identifying the
232
+ starting hypothesis, to confirm that the authors are characterizing a GTF, dbTF
233
+ or coTF. Secondly, considering whether the knowledge from specific protein
234
+ domains or characterized orthologs support the hypothesis. Thirdly, checking
235
+ whether existing annotations from GO, UniProt and Model Organism databases
236
+ are consistent with the hypothesis. Fourthly, reviewing other published
237
+ experimental data to ensure no contradictory findings have been reported.
238
+ Finally, creating new GO annotations, if the experimental results are consistent
239
+ with the identified hypothesis.
240
+
241
+ to specified target genes. Such proteins are not considered dbTFs in
242
+ GO.
243
+
244
+ To support the association of a gene with a GO term from homolo-
245
+ gous sequences from other species, only closely related orthologs whose
246
+ function have been unambiguously characterized can be used if those are
247
+ consistent with the experimental data presented in the article.
248
+
249
+ - GTFs function as the molecular machine that assembles with the
250
+ RNA polymerase at the promoter to form the pre-initiation complex
251
+ (PIC). GTFs have been characterized in several organisms, from
252
+ archaea to yeast and mammalian cells [14,15], and therefore
253
+ orthology should provide strong support for the decision to associate
254
+ these proteins with a child specific for RNA polymerase I, II or III of
255
+ the MF term “GO:0140223 general transcription initiation factor
256
+ activity”. In addition, the naming of GTFs is well established across
257
+ human and model organism nomenclature groups and can be used to
258
+ help guide these decisions. Thus, for human GTFs the HUGO Gene
259
+ Nomenclature Committee (HGNC, www.genenames.org) provide the
260
+ gene symbol TAF#, for TATA-box binding protein associated factors,
261
+ and GTF2#s and GTF3#s, for general transcription factor II and III
262
+ subunits respectively.
263
+
264
+ - dbTFs are specific double-stranded DNA-binding transcription fac-
265
+ tors that provide genomic addresses and respond to the conditions
266
+ under which specific genes are expressed. Central to dbTF function is
267
+ their binding to specific double-stranded DNA sequences that are
268
+ often named transcription factor binding sites (TFBS). Gene products
269
+ associated with the GO term “GO:0003700 DNA-binding transcrip-
270
+ tion factor activity” have the ability to bind DNA and this binding
271
+ regulates the expression of a specific set of target genes. The direct
272
+ target gene(s) can also be included in the annotation using the “has
273
+ input relation”. A human dbTF catalog developed by the GREEKC
274
+ project ( [6]; also accessible from https://www.ebi.ac.uk/Qui
275
+ ckGO/targetset/dbTF) may be consulted to check whether a spe-
276
+ cific human protein is annotated to dbTF function with experimental
277
+ or phylogenetic evidence. When considering proteins that belong to
278
+ families of well characterized transcription factors, such as those that
279
+ contain bHLH, bZIP, homeobox, ETS, Forkhead, etc. domains and
280
+ proteins with a one-to-one ortholog already demonstrated to be a
281
+
282
+ BBA-GeneRegulatoryMechanisms1864(2021)1947524 P. Gaudet et al.
283
+
284
+ dbTF, then weaker evidence of DNA binding, such as ChIP experi-
285
+ ments is sufficient. In contrast, special care must be taken to annotate
286
+ proteins bearing domains that are not exclusively found in tran-
287
+ scription factors, such as RING, MYND and PhD zinc fingers. Simi-
288
+ larly, for proteins with enzymatic activity: while there are rare cases
289
+ of dbTFs with enzymatic activities, such as ENO1, dbTF and enzy-
290
+ matic activity are usually mutually exclusive. For proteins not in the
291
+ dbTF catalog, clear experimental or phylogenetic evidence of
292
+ sequence-specific DNA binding and gene transcription regulation via
293
+ cognate DNA motifs located in gene-associated cis-regulatory mod-
294
+ ules is required for the protein to be classified with high confidence
295
+ as a dbTF.
296
+
297
+ - coTFs: Transcription coregulators (also known as transcription co-
298
+ factors; GO:0003712) represent a group of different functions that
299
+ take place at cis-regulatory regions to make transcription of specific
300
+ gene sets either more (coactivators) or less (corepressors) efficient.
301
+ Coregulators can modify chromatin structure through covalent
302
+ modification of histones, ATP-dependent chromatin remodelling,
303
+ and modulate dbTF interactions with other transcription cor-
304
+ egulators. We classify the Mediator Complex, which bridges dbTFs
305
+ and the RNA polymerase, as a transcription coactivator [16–18].
306
+ Many coTFs have enzymatic activity and normally exert their func-
307
+ tion independent of high affinity binding to specific DNA sequences.
308
+ CoTFs that do bind DNA typically recognize very short DNA se-
309
+ quences that are not sufficiently unique in the genome to enable
310
+ regulation of a limited set of genes in a discrete environmental or
311
+ developmental stage. One example of this is CPF1, that binds the CpG
312
+ dinucleotide and helps most CpG islands gain epigenomic marking
313
+ [19–21].
314
+
315
+ It is important to keep in mind that DNA binding proteins that
316
+ regulate transcription are not necessarily dbTFs. Key points that help
317
+ distinguish between the three activities discussed above are that (i)
318
+ dbTFs bind DNA in a sequence-specific manner, and regulate precise
319
+ sets of genes; (ii) coTFs usually do not directly bind DNA, and when
320
+ they do they don't exhibit strong sequence-specificity; (iii) coTFs
321
+ often have catalytic activities (such as histone methyltransferase,
322
+ protein kinase, or ubiquitin ligase), which is highly unusual in dbTFs;
323
+ (iv) GTFs are required for core promoter activity and are considered
324
+ to act at each promoter to promote transcription initiation [14,22],
325
+ although the exact subunit composition at individual promoters may
326
+ vary.
327
+
328
+ 3. Confirm that existing annotations are consistent with the hy-
329
+ pothesis. New annotations need to be consistent with existing an-
330
+ notations, unless the existing annotations are believed to be wrong or
331
+ out of date. Annotations made to a term as well as a more specific
332
+ descendant reflect differences in granularity of annotation, and are
333
+ not generally considered inconsistent. When the new annotation uses
334
+ a term in a different branch than existing annotations, a review of the
335
+ evidence supporting the existing annotations is undertaken and, if
336
+ necessary, annotations that appear to be incorrect are disputed (see
337
+ section “Ensuring a coherent set of annotations”).
338
+
339
+ 4. Check that other published experimental results do not
340
+ contradict the hypothesis. The application of the gene-by-gene or
341
+ pathway-by-pathway annotation approach ensures that results from
342
+ other research articles are taken into account and that all annotations
343
+ are in line with the current state of knowledge. Again, if in-
344
+ consistencies are noticed, great care is taken to confirm correct
345
+ interpretation of the data, this is particularly important if there is
346
+ evidence for multiple, distinct transcription activity functions.
347
+ 5. Validate that the experimental results are consistent with the
348
+ hypothesis. If the results presented in the curated article are
349
+ consistent with the hypothesis presented by the authors, then the
350
+ appropriate transcription activity GO term(s) are associated with the
351
+ gene product.
352
+
353
+ Proteins that are involved in transcription and its regulation have
354
+ historically been studied through small-scale, focused experimental
355
+ approaches. For some examples of the small-scale experiments that do
356
+ provide evidence for DNA binding transcription factor activity the bio-
357
+ curator can use Tables 3 and 4 of Tripathi et al. [4] and in Santos-
358
+ Zavaleta et al. [23]. Recent advances in high-throughput methodolo-
359
+ gies now provide robust data that, when interpreted with sufficient care,
360
+ support the assignment of a function role to many proteins, including
361
+ transcription regulators. This includes HT-SELEX [24,25], Protein
362
+ Binding Microarrays [26], ChIP [27], one- and two-hybrid experiments
363
+ [28,29]. For these experiments, the data quality and the false positive
364
+ rate must be evaluated before annotations are created. For example,
365
+ human HT-SELEX data will have more false positives if native dbTFs are
366
+ assayed in nuclear extracts or over-expressed in eukaryotic cells,
367
+ compared with heterologous proteins purified from prokaryotic cells, as
368
+ the latter reduces the probability of indirect interactions with endoge-
369
+ nous factors. For high-throughput transcription data, only articles with
370
+ low rates of false positives, are curated. Those various techniques pro-
371
+ vide multiple independent lines of evidence, strengthening the confi-
372
+ dence in the annotation when they converge on a single motif or
373
+ molecular function. The GO recommendations on curation of high-
374
+ throughput experimental data should be applied when such data is an-
375
+ notated [30].
376
+
377
+ 3.2. Annotations based on non-experimental evidence
378
+
379
+ There are only about 500 human dbTFs for which there is experi-
380
+ mental evidence satisfying the criteria presented here. Across all areas of
381
+ biology several reliable methods infer protein function from available
382
+ experimental data. Indeed, there are approximately 1000 human pro-
383
+ teins annotated as dbTFs by non-experimental methods (Lovering et al.
384
+ same BBA issue, prepublication available at [6]). Phylogenetic annota-
385
+ tions are assigned by a group of biocurators with expertise in evolu-
386
+ tionary biology, and require experimental evidence for at least one
387
+ member of a clade of evolutionarily related proteins [11]. The GO
388
+ knowledgebase also contains GO terms assigned by automated pipelines
389
+ based on protein domain (InterPro2GO) and orthology (Ensembl).
390
+ InterPro2GO [12] is based primarily on local (partial) homology: pro-
391
+ tein domains are mapped to specific GO terms, and any protein with one
392
+ of these domains will be annotated to the appropriate GO term(s).
393
+ Ensembl Compara [13] generates groups of one-to-one orthologs among
394
+ closely related species and propagates all experimental annotations to
395
+ each members of the group. While manual annotations based on these
396
+ methods are allowed, the GO consortium recommends using the auto-
397
+ mated pipelines that are maintained centrally and ensure a consistent
398
+ annotation corpus across all annotated species.
399
+
400
+ 4. Ensuring a coherent set of annotations
401
+
402
+ During the process of annotation other relevant annotations associ-
403
+ ated with the gene are reviewed. If there are conflicting annotations, the
404
+ supporting data should be reassessed to determine whether the anno-
405
+ tations are inconsistent with the data, in which case the annotations
406
+ must be fixed [5].
407
+
408
+ In cases where the primary data is conflicting across different articles
409
+ (for example a protein is sometimes described as a transcription factor,
410
+ and sometimes as a coregulator), then the literature will be reviewed
411
+ carefully to decide whether the annotation is incorrect (bad choice of
412
+ term, wrong protein annotated), whether the knowledge has evolved, if
413
+ the protein plays multiple roles under different conditions (i.e., acts as a
414
+ DNA-binding transcription factor in certain contexts and as a cofactor in
415
+ others). If no activity has yet been established, no MF annotation will be
416
+ made.
417
+
418
+ Note that individual DNA-binding transcription factors can act as
419
+ both activators or repressors dependent on the context, hence associa-
420
+ tion of both activator and repressor terms with a single protein is not
421
+
422
+ BBA-GeneRegulatoryMechanisms1864(2021)1947525 P. Gaudet et al.
423
+
424
+ Fig. 4. Representation of biological context of dbTF activity. The level of cyclin-dependent kinase inhibitor p21 (CDKN1A) is regulated by the transcription factor
425
+ p53 (TP53) upon DNA damage, signaling cell cycle arrest to the cell (http://noctua.berkeleybop.org/editor/graph/gomodel:5fa76ad400000000).
426
+
427
+ considered inconsistent. The specific conditions under which this hap-
428
+ pens, such as relevant signaling pathways, cell type, as well as specific
429
+ target genes, etc., may be further specified through additional context
430
+ details ([31]; see an example of a GO-CAM model in Fig. 4).
431
+
432
+ 5. Pitfalls in annotating transcription regulators
433
+
434
+ During the review of dbTF GO annotations [6], in which over 3000
435
+ GO annotations were reviewed, a variety of common errors in data
436
+ interpretation were identified. One of the most common errors was
437
+ caused by the difficulty in distinguishing a dbTF from a coTF, as the
438
+ evidence for those two functions can be quite similar. To prevent this
439
+ error, biocurators ensure that the protein has a sequence-specific dou-
440
+ ble-stranded DNA-binding domain and conduct an exhaustive review of
441
+ the literature, including articles associated with the protein's close
442
+ orthologs. Furthermore, the literature supporting the dbTF activity of a
443
+ protein that also has evidence for another function, in particular, RNA
444
+ binding, will be carefully checked before assigning a dbTF activity. The
445
+ work on the human dbTF catalog added a GO ‘DNA-binding transcrip-
446
+ tion factor activity’ annotation to 583 proteins, and removed erronous
447
+ assignments for 256 proteins (Lovering et al. BBAGRM-D-20-00141 this
448
+ issue).
449
+
450
+ Transcription regulators most often act as members of complexes,
451
+ some of which also contain proteins with other activities. In some cases,
452
+ only some subunits of a complex interact with DNA: for instance, while
453
+ the RFX complex contains three members: RFX5, RFXAP and RFXANK,
454
+ only RFX5 binds DNA directly. But the DNA-binding ability of the
455
+ complex is facilitated by all three subunits so RFXAP and RFXANK are
456
+ not coTFs [32]. In this case, RFXAP and RFXANK are annotated using the
457
+ “contributes to” qualifier, to indicate that they participate in, but are not
458
+ directly responsible for the activity.
459
+
460
+ Another activity that can easily be confused for a coTF is a dbTF
461
+ inhibitor. These proteins interact with a dbTF, but not at the DNA, to
462
+ prevent the dbTF from reaching its target genes. Well characterized
463
+ examples are the I-SMADs, SMAD6 and SMAD7 [33], that act by
464
+ competing with active SMADs at receptors, thus blocking further
465
+ intracellular signaling, and should be annotated to “GO:0140416 tran-
466
+ scription regulator inhibitor activity”.
467
+
468
+ It must be noted that these approaches to avoid errors in dbTF ac-
469
+ tivity assignment are not unequivocal, as some proteins do have multiple
470
+ functions. For example, the glucocorticoid receptor (NR3C1), which is a
471
+
472
+ canonical dbTF, has recently been shown to bind double-stranded RNA
473
+ motifs [34]; ATF2 (activating transcription factor 2) and CLOCK are
474
+ dbTFs that have been reported to also exhibit histone acetyltransferase
475
+ activity [35–38]; some dbTFs, such as NFIB (nuclear factor I B), also
476
+ function as dbTF inhibitors [39]. Finally, general and sequence-specific
477
+ effects can be difficult to separate, as has been established for the MYC
478
+ dbTF [40].
479
+
480
+ 6. Conclusion
481
+
482
+ The annotation approach presented here is designed to help bio-
483
+ curators annotate factors involved in transcription and its regulation, as
484
+ well as for users of GO annotations to understand their meaning and the
485
+ evidence behind them. This work complements the redesign of this part
486
+ of the GO to significantly simplify the ontology structure. The new
487
+ ontology structure and the present standards were applied to the review
488
+ of human proteins associated with GO terms describing dbTF activity
489
+ [6]. We anticipate that adoption of this annotation approach by all
490
+ groups who produce GO associations will increase annotation consis-
491
+ tency across all species, for transcription and also more widely across all
492
+ areas represented by GO.
493
+
494
+ Declaration of competing interest
495
+
496
+ The authors declare that they have no known competing financial
497
+ interests or personal relationships that could have appeared to influence
498
+ the work reported in this paper.
499
+
500
+ Acknowledgements
501
+
502
+ We thank many GREEKC and GO consortium members for useful
503
+ discussions that led to the development of these guidelines, in particular
504
+ Marcio L. Acencio, Helen Attrill, and Valerie Wood.
505
+
506
+ Funding sources
507
+
508
+ The GO Consortium is funded by the National Human Genome
509
+ Research Institute (US National Institutes of Health), grant number
510
+ HG002273. RCL has been supported by Alzheimer's Research UK grant
511
+ (ARUK-NAS2017A-1) and the National Institute for Health Research
512
+ University College London Hospitals Biomedical Research Centre.
513
+
514
+ BBA-GeneRegulatoryMechanisms1864(2021)1947526 P. Gaudet et al.
515
+
516
+ GREEKC is supported by the COST Action grant CA15205.
517
+
518
+ Appendices. Supplementary data
519
+
520
+ Supplementary data to this article can be found online at https://doi.
521
+
522
+ org/10.1016/j.bbagrm.2021.194752.
523
+
524
+ References
525
+
526
+ [1] M. Ashburner, C.A. Ball, J.A. Blake, D. Botstein, H. Butler, J.M. Cherry, et al., Gene
527
+ ontology: tool for the unification of biology, Gene Ontol. Consort. Nat Genet. 25 (1)
528
+ (2000 May) 25–29.
529
+
530
+ [2] The Gene Ontology Consortium, The Gene Ontology Resource: 20 years and still
531
+
532
+ GOing strong, Nucleic Acids Res. 47 (D1) (2019) D330–D338, 08.
533
+
534
+ [3] R. Tupler, G. Perini, M.R. Green, Expressing the human genome, Nature. 409
535
+
536
+ (6822) (2001 Feb 15) 832–833.
537
+
538
+ [4] S. Tripathi, K.R. Christie, R. Balakrishnan, R. Huntley, D.P. Hill, L. Thommesen, et
539
+ al., Gene Ontology annotation of sequence-specific DNA binding transcription
540
+ factors: setting the stage for a large-scale curation effort, Database J. Biol.
541
+ Databases Curation 2013 (2013), bat062.
542
+
543
+ [5] S. Poux, P. Gaudet, Best practices in manual annotation with the Gene Ontology,
544
+
545
+ Methods Mol. Biol. Clifton NJ 1446 (2017) 41–54.
546
+
547
+ [6] Lovering R.C., Gaudet P., Acencio M.L., Ignatchenko A., Jolma A., Fornes O., et al.,
548
+
549
+ BBAGRM-D-20-00141 this issue.
550
+
551
+ [7] Gene Ontology Consortium, Gene Ontology Consortium: going forward, Nucleic
552
+
553
+ Acids Res. 43 (Database issue) (2015 Jan) D1049–D1056.
554
+
555
+ [8] S. Carbon, A. Ireland, C.J. Mungall, S. Shu, B. Marshall, S. Lewis, et al., AmiGO:
556
+
557
+ online access to ontology and annotation data, Bioinforma Oxf. Engl. 25 (2) (2009
558
+ Jan 15) 288–289.
559
+
560
+ [9] D. Binns, E. Dimmer, R. Huntley, D. Barrell, C. O’Donovan, R. Apweiler, QuickGO:
561
+ a web-based tool for Gene Ontology searching, Bioinforma Oxf Engl. 25 (22) (2009
562
+ Nov 15) 3045–3046.
563
+
564
+ ˇ
565
+ Skunca, J.C. Hu, C. Dessimoz, Primer on the Gene Ontology, Methods
566
+ [10] P. Gaudet, N.
567
+
568
+ Mol. Biol. Clifton NJ 1446 (2017) 25–37.
569
+
570
+ [11] P. Gaudet, M.S. Livstone, S.E. Lewis, P.D. Thomas, Phylogenetic-based propagation
571
+ of functional annotations within the Gene Ontology consortium, Brief. Bioinform.
572
+ 12 (5) (2011 Sep) 449–462.
573
+
574
+ [12] A. Mitchell, H.-Y. Chang, L. Daugherty, M. Fraser, S. Hunter, R. Lopez, et al., The
575
+ InterPro protein families database: the classification resource after 15 years,
576
+ Nucleic Acids Res. 43 (Database issue) (2015 Jan) D213–D221.
577
+
578
+ [13] F. Cunningham, P. Achuthan, W. Akanni, J. Allen, M.R. Amode, I.M. Armean, et al.,
579
+
580
+ Ensembl 2019, Nucleic Acids Res. 47 (D1) (2019) D745–D751, 08.
581
+
582
+ [14] S. Sainsbury, C. Bernecky, P. Cramer, Structural basis of transcription initiation by
583
+ RNA polymerase II, Nat. Rev. Mol. Cell Biol. 16 (3) (2015 Mar) 129–143.
584
+ [15] M.J.E. Koster, B. Snel, H.T.M. Timmers, Genesis of chromatin and transcription
585
+ dynamics in the origin of species, Cell. 161 (4) (2015 May 7) 724–736.
586
+
587
+ [16] K.M. Andr´e, E.H. Sipos, J. Soutourina, Mediator roles going beyond transcription,
588
+
589
+ Trends Genet TIG. 37 (3) (2020 Sep 10) 224–234.
590
+
591
+ [17] T. Eychenne, M. Werner, J. Soutourina, Toward understanding of the mechanisms
592
+ of mediator function in vivo: focus on the preinitiation complex assembly,
593
+ Transcription. 8 (5) (2017) 328–342.
594
+
595
+ [18] J. Yin, G. Wang, The mediator complex: a master coordinator of transcription and
596
+ cell lineage development, Dev. Camb. Engl. 141 (5) (2014 Mar) 977–987.
597
+ [19] J.P. Thomson, P.J. Skene, J. Selfridge, T. Clouaire, J. Guy, S. Webb, et al., CpG
598
+ islands influence chromatin structure via the CpG-binding protein Cfp1, Nature.
599
+ 464 (7291) (2010 Apr 15) 1082–1086.
600
+
601
+ [20] J. Lipski, X. Zhang, B. Kruszewska, R. Kanjhan, Morphological study of long axonal
602
+
603
+ projections of ventral medullary inspiratory neurons in the rat, Brain Res. 640
604
+ (1–2) (1994 Mar 21) 171–184.
605
+
606
+ [21] H.K. Long, N.P. Blackledge, R.J. Klose, ZF-CxxC domain-containing proteins, CpG
607
+
608
+ islands and the chromatin connection, Biochem. Soc. Trans. 41 (3) (2013 Jun)
609
+ 727–740.
610
+
611
+ [22] P. Cramer, Organization and regulation of gene transcription, Nature. 573 (7772)
612
+
613
+ (2019) 45–54.
614
+
615
+ [23] A. Santos-Zavaleta, H. Salgado, S. Gama-Castro, M. S´anchez-P´erez, L. G´omez-
616
+
617
+ Romero, D. Ledezma-Tejeida, et al., RegulonDB v 10.5: tackling challenges to unify
618
+ classic and high throughput knowledge of gene regulation in E. coli K-12, Nucleic
619
+ Acids Res. 47 (D1) (2019) D212–D220, 08.
620
+
621
+ [24] A.D. Ellington, J.W. Szostak, In vitro selection of RNA molecules that bind specific
622
+
623
+ ligands, Nature. 346 (6287) (1990 Aug 30) 818–822.
624
+
625
+ [25] C. Tuerk, L. Gold, Systematic evolution of ligands by exponential enrichment: RNA
626
+ ligands to bacteriophage T4 DNA polymerase, Science. 249 (4968) (1990 Aug 3)
627
+ 505–510.
628
+
629
+ [26] K.K. Andrilenas, A. Penvose, T. Siggers, Using protein-binding microarrays to study
630
+
631
+ transcription factor specificity: homologs, isoforms and complexes, Brief Funct.
632
+ Genom. 14 (1) (2015 Jan) 17–29.
633
+
634
+ [27] T.H. Kim, J. Dekker, ChIP-seq, Cold Spring Harb. Protoc. 2018 (5) (2018), 01.
635
+ [28] J.A. Sewell, J.I. Fuxman Bass, Options and considerations when using a yeast one-
636
+
637
+ hybrid system, Methods Mol. Biol. Clifton NJ. 1794 (2018) 119–130.
638
+
639
+ [29] A. Paiano, A. Margiotta, M. De Luca, C. Bucci, Yeast two-hybrid assay to identify
640
+
641
+ interacting proteins, Curr. Protoc. Protein Sci. 95 (1) (2019), e70.
642
+
643
+ [30] H. Attrill, P. Gaudet, R.P. Huntley, R.C. Lovering, S.R. Engel, S. Poux, et al.,
644
+
645
+ Annotation of gene product function from high-throughput studies using the Gene
646
+ Ontology, Database J. Biol. Databases Curation. 2019 (2019), 01.
647
+
648
+ [31] P.D. Thomas, D.P. Hill, H. Mi, D. Osumi-Sutherland, K. Van Auken, S. Carbon, et
649
+ al., Gene Ontology Causal Activity Modeling (GO-CAM) moves beyond GO
650
+ annotations to structured descriptions of biological functions and systems, Nat.
651
+ Genet. 51 (10) (2019) 1429–1433.
652
+
653
+ [32] K. Masternak, E. Barras, M. Zufferey, B. Conrad, G. Corthals, R. Aebersold, et al.,
654
+ A gene encoding a novel RFX-associated transactivator is mutated in the majority
655
+ of MHC class II deficiency patients, Nat. Genet. 20 (3) (1998 Nov) 273–277.
656
+
657
+ [33] K. Miyazawa, K. Miyazono, Regulation of TGF-β family signaling by inhibitory
658
+
659
+ smads, Cold Spring Harb. Perspect. Biol. 9 (3) (2017 Mar 1).
660
+
661
+ [34] N.V. Parsonnet, N.C. Lammer, Z.E. Holmes, R.T. Batey, D.S. Wuttke, The
662
+
663
+ glucocorticoid receptor DNA-binding domain recognizes RNA hairpin structures
664
+ with high affinity, Nucleic Acids Res. 47 (15) (2019 05) 8180–8192.
665
+
666
+ [35] H. Kawasaki, L. Schiltz, R. Chiu, K. Itakura, K. Taira, Y. Nakatani, et al., ATF-2 has
667
+ intrinsic histone acetyltransferase activity which is modulated by phosphorylation,
668
+ Nature. 405 (6783) (2000 May 11) 195–200.
669
+
670
+ [36] J. Hirayama, S. Sahar, B. Grimaldi, T. Tamaru, K. Takamatsu, Y. Nakahata, et al.,
671
+
672
+ CLOCK-mediated acetylation of BMAL1 controls circadian function, Nature. 450
673
+ (7172) (2007 Dec 13) 1086–1090.
674
+
675
+ [37] B. Grimaldi, Y. Nakahata, S. Sahar, M. Kaluzova, D. Gauthier, K. Pham, et al.,
676
+
677
+ Chromatin remodeling and circadian control: master regulator CLOCK is an
678
+ enzyme, Cold Spring Harb. Symp. Quant. Biol. 72 (2007) 105–112.
679
+
680
+ [38] Z. Wang, Y. Wu, L. Li, X.-D. Su, Intermolecular recognition revealed by the complex
681
+ structure of human CLOCK-BMAL1 basic helix-loop-helix domains with E-box
682
+ DNA, Cell Res. 23 (2) (2013 Feb) 213–224.
683
+
684
+ [39] Y. Liu, H.U. Bernard, D. Apt, NFI-B3, a novel transcriptional repressor of the
685
+
686
+ nuclear factor I family, is generated by alternative RNA processing, J. Biol. Chem.
687
+ 272 (16) (1997 Apr 18) 10739–10745.
688
+
689
+ [40] Z. Nie, C. Guo, S.K. Das, C.C. Chow, E. Batchelor, S.S. Simons, et al., Dissecting
690
+
691
+ transcriptional amplification by MYC, eLife 9 (2020), 27.
692
+
693
+ BBA-GeneRegulatoryMechanisms1864(2021)1947527