aurelian 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aurelian/__init__.py +9 -0
- aurelian/agents/__init__.py +0 -0
- aurelian/agents/amigo/__init__.py +3 -0
- aurelian/agents/amigo/amigo_agent.py +77 -0
- aurelian/agents/amigo/amigo_config.py +85 -0
- aurelian/agents/amigo/amigo_evals.py +73 -0
- aurelian/agents/amigo/amigo_gradio.py +52 -0
- aurelian/agents/amigo/amigo_mcp.py +152 -0
- aurelian/agents/amigo/amigo_tools.py +152 -0
- aurelian/agents/biblio/__init__.py +42 -0
- aurelian/agents/biblio/biblio_agent.py +94 -0
- aurelian/agents/biblio/biblio_config.py +40 -0
- aurelian/agents/biblio/biblio_gradio.py +67 -0
- aurelian/agents/biblio/biblio_mcp.py +115 -0
- aurelian/agents/biblio/biblio_tools.py +164 -0
- aurelian/agents/biblio_agent.py +46 -0
- aurelian/agents/checklist/__init__.py +44 -0
- aurelian/agents/checklist/checklist_agent.py +85 -0
- aurelian/agents/checklist/checklist_config.py +28 -0
- aurelian/agents/checklist/checklist_gradio.py +70 -0
- aurelian/agents/checklist/checklist_mcp.py +86 -0
- aurelian/agents/checklist/checklist_tools.py +141 -0
- aurelian/agents/checklist/content/checklists.yaml +7 -0
- aurelian/agents/checklist/content/streams.csv +136 -0
- aurelian/agents/checklist_agent.py +40 -0
- aurelian/agents/chemistry/__init__.py +3 -0
- aurelian/agents/chemistry/chemistry_agent.py +46 -0
- aurelian/agents/chemistry/chemistry_config.py +71 -0
- aurelian/agents/chemistry/chemistry_evals.py +79 -0
- aurelian/agents/chemistry/chemistry_gradio.py +50 -0
- aurelian/agents/chemistry/chemistry_mcp.py +120 -0
- aurelian/agents/chemistry/chemistry_tools.py +121 -0
- aurelian/agents/chemistry/image_agent.py +15 -0
- aurelian/agents/d4d/__init__.py +30 -0
- aurelian/agents/d4d/d4d_agent.py +72 -0
- aurelian/agents/d4d/d4d_config.py +46 -0
- aurelian/agents/d4d/d4d_gradio.py +58 -0
- aurelian/agents/d4d/d4d_mcp.py +71 -0
- aurelian/agents/d4d/d4d_tools.py +157 -0
- aurelian/agents/d4d_agent.py +64 -0
- aurelian/agents/diagnosis/__init__.py +33 -0
- aurelian/agents/diagnosis/diagnosis_agent.py +53 -0
- aurelian/agents/diagnosis/diagnosis_config.py +48 -0
- aurelian/agents/diagnosis/diagnosis_evals.py +76 -0
- aurelian/agents/diagnosis/diagnosis_gradio.py +52 -0
- aurelian/agents/diagnosis/diagnosis_mcp.py +141 -0
- aurelian/agents/diagnosis/diagnosis_tools.py +204 -0
- aurelian/agents/diagnosis_agent.py +28 -0
- aurelian/agents/draw/__init__.py +3 -0
- aurelian/agents/draw/draw_agent.py +39 -0
- aurelian/agents/draw/draw_config.py +26 -0
- aurelian/agents/draw/draw_gradio.py +50 -0
- aurelian/agents/draw/draw_mcp.py +94 -0
- aurelian/agents/draw/draw_tools.py +100 -0
- aurelian/agents/draw/judge_agent.py +18 -0
- aurelian/agents/filesystem/__init__.py +0 -0
- aurelian/agents/filesystem/filesystem_config.py +27 -0
- aurelian/agents/filesystem/filesystem_gradio.py +49 -0
- aurelian/agents/filesystem/filesystem_mcp.py +89 -0
- aurelian/agents/filesystem/filesystem_tools.py +95 -0
- aurelian/agents/filesystem/py.typed +0 -0
- aurelian/agents/github/__init__.py +0 -0
- aurelian/agents/github/github_agent.py +83 -0
- aurelian/agents/github/github_cli.py +248 -0
- aurelian/agents/github/github_config.py +22 -0
- aurelian/agents/github/github_gradio.py +152 -0
- aurelian/agents/github/github_mcp.py +252 -0
- aurelian/agents/github/github_tools.py +408 -0
- aurelian/agents/github/github_tools.py.tmp +413 -0
- aurelian/agents/goann/__init__.py +13 -0
- aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines.md +1000 -0
- aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines.pdf +0 -0
- aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines_Paper.md +693 -0
- aurelian/agents/goann/documents/Transcription_Factors_Annotation_Guidelines_Paper.pdf +0 -0
- aurelian/agents/goann/goann_agent.py +90 -0
- aurelian/agents/goann/goann_config.py +90 -0
- aurelian/agents/goann/goann_evals.py +104 -0
- aurelian/agents/goann/goann_gradio.py +62 -0
- aurelian/agents/goann/goann_mcp.py +0 -0
- aurelian/agents/goann/goann_tools.py +65 -0
- aurelian/agents/gocam/__init__.py +43 -0
- aurelian/agents/gocam/documents/DNA-binding transcription factor activity annotation guidelines.docx +0 -0
- aurelian/agents/gocam/documents/DNA-binding transcription factor activity annotation guidelines.pdf +0 -0
- aurelian/agents/gocam/documents/DNA-binding_transcription_factor_activity_annotation_guidelines.md +100 -0
- aurelian/agents/gocam/documents/E3 ubiquitin ligases.docx +0 -0
- aurelian/agents/gocam/documents/E3 ubiquitin ligases.pdf +0 -0
- aurelian/agents/gocam/documents/E3_ubiquitin_ligases.md +134 -0
- aurelian/agents/gocam/documents/GO-CAM annotation guidelines README.docx +0 -0
- aurelian/agents/gocam/documents/GO-CAM annotation guidelines README.pdf +0 -0
- aurelian/agents/gocam/documents/GO-CAM modelling guidelines TO DO.docx +0 -0
- aurelian/agents/gocam/documents/GO-CAM modelling guidelines TO DO.pdf +0 -0
- aurelian/agents/gocam/documents/GO-CAM_annotation_guidelines_README.md +1 -0
- aurelian/agents/gocam/documents/GO-CAM_modelling_guidelines_TO_DO.md +3 -0
- aurelian/agents/gocam/documents/How to annotate complexes in GO-CAM.docx +0 -0
- aurelian/agents/gocam/documents/How to annotate complexes in GO-CAM.pdf +0 -0
- aurelian/agents/gocam/documents/How to annotate molecular adaptors.docx +0 -0
- aurelian/agents/gocam/documents/How to annotate molecular adaptors.pdf +0 -0
- aurelian/agents/gocam/documents/How to annotate sequestering proteins.docx +0 -0
- aurelian/agents/gocam/documents/How to annotate sequestering proteins.pdf +0 -0
- aurelian/agents/gocam/documents/How_to_annotate_complexes_in_GO-CAM.md +29 -0
- aurelian/agents/gocam/documents/How_to_annotate_molecular_adaptors.md +31 -0
- aurelian/agents/gocam/documents/How_to_annotate_sequestering_proteins.md +42 -0
- aurelian/agents/gocam/documents/Molecular adaptor activity.docx +0 -0
- aurelian/agents/gocam/documents/Molecular adaptor activity.pdf +0 -0
- aurelian/agents/gocam/documents/Molecular carrier activity.docx +0 -0
- aurelian/agents/gocam/documents/Molecular carrier activity.pdf +0 -0
- aurelian/agents/gocam/documents/Molecular_adaptor_activity.md +51 -0
- aurelian/agents/gocam/documents/Molecular_carrier_activity.md +41 -0
- aurelian/agents/gocam/documents/Protein sequestering activity.docx +0 -0
- aurelian/agents/gocam/documents/Protein sequestering activity.pdf +0 -0
- aurelian/agents/gocam/documents/Protein_sequestering_activity.md +50 -0
- aurelian/agents/gocam/documents/Signaling receptor activity annotation guidelines.docx +0 -0
- aurelian/agents/gocam/documents/Signaling receptor activity annotation guidelines.pdf +0 -0
- aurelian/agents/gocam/documents/Signaling_receptor_activity_annotation_guidelines.md +187 -0
- aurelian/agents/gocam/documents/Transcription coregulator activity.docx +0 -0
- aurelian/agents/gocam/documents/Transcription coregulator activity.pdf +0 -0
- aurelian/agents/gocam/documents/Transcription_coregulator_activity.md +36 -0
- aurelian/agents/gocam/documents/Transporter activity annotation annotation guidelines.docx +0 -0
- aurelian/agents/gocam/documents/Transporter activity annotation annotation guidelines.pdf +0 -0
- aurelian/agents/gocam/documents/Transporter_activity_annotation_annotation_guidelines.md +43 -0
- Regulatory Processes in GO-CAM.docx +0 -0
- Regulatory Processes in GO-CAM.pdf +0 -0
- aurelian/agents/gocam/documents/WIP_-_Regulation_and_Regulatory_Processes_in_GO-CAM.md +31 -0
- aurelian/agents/gocam/documents/md/DNA-binding_transcription_factor_activity_annotation_guidelines.md +131 -0
- aurelian/agents/gocam/documents/md/E3_ubiquitin_ligases.md +166 -0
- aurelian/agents/gocam/documents/md/GO-CAM_annotation_guidelines_README.md +1 -0
- aurelian/agents/gocam/documents/md/GO-CAM_modelling_guidelines_TO_DO.md +5 -0
- aurelian/agents/gocam/documents/md/How_to_annotate_complexes_in_GO-CAM.md +28 -0
- aurelian/agents/gocam/documents/md/How_to_annotate_molecular_adaptors.md +19 -0
- aurelian/agents/gocam/documents/md/How_to_annotate_sequestering_proteins.md +38 -0
- aurelian/agents/gocam/documents/md/Molecular_adaptor_activity.md +52 -0
- aurelian/agents/gocam/documents/md/Molecular_carrier_activity.md +59 -0
- aurelian/agents/gocam/documents/md/Protein_sequestering_activity.md +52 -0
- aurelian/agents/gocam/documents/md/Signaling_receptor_activity_annotation_guidelines.md +271 -0
- aurelian/agents/gocam/documents/md/Transcription_coregulator_activity.md +54 -0
- aurelian/agents/gocam/documents/md/Transporter_activity_annotation_annotation_guidelines.md +38 -0
- aurelian/agents/gocam/documents/md/WIP_-_Regulation_and_Regulatory_Processes_in_GO-CAM.md +39 -0
- aurelian/agents/gocam/documents/pandoc_md/Signaling_receptor_activity_annotation_guidelines.md +334 -0
- aurelian/agents/gocam/gocam_agent.py +240 -0
- aurelian/agents/gocam/gocam_config.py +85 -0
- aurelian/agents/gocam/gocam_curator_agent.py +46 -0
- aurelian/agents/gocam/gocam_evals.py +67 -0
- aurelian/agents/gocam/gocam_gradio.py +89 -0
- aurelian/agents/gocam/gocam_mcp.py +224 -0
- aurelian/agents/gocam/gocam_tools.py +294 -0
- aurelian/agents/linkml/__init__.py +0 -0
- aurelian/agents/linkml/linkml_agent.py +62 -0
- aurelian/agents/linkml/linkml_config.py +48 -0
- aurelian/agents/linkml/linkml_evals.py +66 -0
- aurelian/agents/linkml/linkml_gradio.py +45 -0
- aurelian/agents/linkml/linkml_mcp.py +186 -0
- aurelian/agents/linkml/linkml_tools.py +102 -0
- aurelian/agents/literature/__init__.py +3 -0
- aurelian/agents/literature/literature_agent.py +55 -0
- aurelian/agents/literature/literature_config.py +35 -0
- aurelian/agents/literature/literature_gradio.py +52 -0
- aurelian/agents/literature/literature_mcp.py +174 -0
- aurelian/agents/literature/literature_tools.py +182 -0
- aurelian/agents/monarch/__init__.py +25 -0
- aurelian/agents/monarch/monarch_agent.py +44 -0
- aurelian/agents/monarch/monarch_config.py +45 -0
- aurelian/agents/monarch/monarch_gradio.py +51 -0
- aurelian/agents/monarch/monarch_mcp.py +65 -0
- aurelian/agents/monarch/monarch_tools.py +113 -0
- aurelian/agents/oak/__init__.py +0 -0
- aurelian/agents/oak/oak_config.py +27 -0
- aurelian/agents/oak/oak_gradio.py +57 -0
- aurelian/agents/ontology_mapper/__init__.py +31 -0
- aurelian/agents/ontology_mapper/ontology_mapper_agent.py +56 -0
- aurelian/agents/ontology_mapper/ontology_mapper_config.py +50 -0
- aurelian/agents/ontology_mapper/ontology_mapper_evals.py +108 -0
- aurelian/agents/ontology_mapper/ontology_mapper_gradio.py +58 -0
- aurelian/agents/ontology_mapper/ontology_mapper_mcp.py +81 -0
- aurelian/agents/ontology_mapper/ontology_mapper_tools.py +147 -0
- aurelian/agents/phenopackets/__init__.py +3 -0
- aurelian/agents/phenopackets/phenopackets_agent.py +58 -0
- aurelian/agents/phenopackets/phenopackets_config.py +72 -0
- aurelian/agents/phenopackets/phenopackets_evals.py +99 -0
- aurelian/agents/phenopackets/phenopackets_gradio.py +55 -0
- aurelian/agents/phenopackets/phenopackets_mcp.py +178 -0
- aurelian/agents/phenopackets/phenopackets_tools.py +127 -0
- aurelian/agents/rag/__init__.py +40 -0
- aurelian/agents/rag/rag_agent.py +83 -0
- aurelian/agents/rag/rag_config.py +80 -0
- aurelian/agents/rag/rag_gradio.py +67 -0
- aurelian/agents/rag/rag_mcp.py +107 -0
- aurelian/agents/rag/rag_tools.py +189 -0
- aurelian/agents/rag_agent.py +54 -0
- aurelian/agents/robot/__init__.py +0 -0
- aurelian/agents/robot/assets/__init__.py +3 -0
- aurelian/agents/robot/assets/template.md +384 -0
- aurelian/agents/robot/robot_config.py +25 -0
- aurelian/agents/robot/robot_gradio.py +46 -0
- aurelian/agents/robot/robot_mcp.py +100 -0
- aurelian/agents/robot/robot_ontology_agent.py +139 -0
- aurelian/agents/robot/robot_tools.py +50 -0
- aurelian/agents/talisman/__init__.py +3 -0
- aurelian/agents/talisman/talisman_agent.py +126 -0
- aurelian/agents/talisman/talisman_config.py +66 -0
- aurelian/agents/talisman/talisman_gradio.py +50 -0
- aurelian/agents/talisman/talisman_mcp.py +168 -0
- aurelian/agents/talisman/talisman_tools.py +720 -0
- aurelian/agents/ubergraph/__init__.py +40 -0
- aurelian/agents/ubergraph/ubergraph_agent.py +71 -0
- aurelian/agents/ubergraph/ubergraph_config.py +79 -0
- aurelian/agents/ubergraph/ubergraph_gradio.py +48 -0
- aurelian/agents/ubergraph/ubergraph_mcp.py +69 -0
- aurelian/agents/ubergraph/ubergraph_tools.py +118 -0
- aurelian/agents/uniprot/__init__.py +37 -0
- aurelian/agents/uniprot/uniprot_agent.py +43 -0
- aurelian/agents/uniprot/uniprot_config.py +43 -0
- aurelian/agents/uniprot/uniprot_evals.py +99 -0
- aurelian/agents/uniprot/uniprot_gradio.py +48 -0
- aurelian/agents/uniprot/uniprot_mcp.py +168 -0
- aurelian/agents/uniprot/uniprot_tools.py +136 -0
- aurelian/agents/web/__init__.py +0 -0
- aurelian/agents/web/web_config.py +27 -0
- aurelian/agents/web/web_gradio.py +48 -0
- aurelian/agents/web/web_mcp.py +50 -0
- aurelian/agents/web/web_tools.py +108 -0
- aurelian/chat.py +23 -0
- aurelian/cli.py +800 -0
- aurelian/dependencies/__init__.py +0 -0
- aurelian/dependencies/workdir.py +78 -0
- aurelian/mcp/__init__.py +0 -0
- aurelian/mcp/amigo_mcp_test.py +86 -0
- aurelian/mcp/config_generator.py +123 -0
- aurelian/mcp/example_config.json +43 -0
- aurelian/mcp/generate_sample_config.py +37 -0
- aurelian/mcp/gocam_mcp_test.py +126 -0
- aurelian/mcp/linkml_mcp_tools.py +190 -0
- aurelian/mcp/mcp_discovery.py +87 -0
- aurelian/mcp/mcp_test.py +31 -0
- aurelian/mcp/phenopackets_mcp_test.py +103 -0
- aurelian/tools/__init__.py +0 -0
- aurelian/tools/web/__init__.py +0 -0
- aurelian/tools/web/url_download.py +51 -0
- aurelian/utils/__init__.py +0 -0
- aurelian/utils/async_utils.py +15 -0
- aurelian/utils/data_utils.py +32 -0
- aurelian/utils/documentation_manager.py +59 -0
- aurelian/utils/doi_fetcher.py +238 -0
- aurelian/utils/ontology_utils.py +68 -0
- aurelian/utils/pdf_fetcher.py +23 -0
- aurelian/utils/process_logs.py +100 -0
- aurelian/utils/pubmed_utils.py +238 -0
- aurelian/utils/pytest_report_to_markdown.py +67 -0
- aurelian/utils/robot_ontology_utils.py +112 -0
- aurelian/utils/search_utils.py +95 -0
- aurelian-0.3.2.dist-info/LICENSE +22 -0
- aurelian-0.3.2.dist-info/METADATA +105 -0
- aurelian-0.3.2.dist-info/RECORD +254 -0
- aurelian-0.3.2.dist-info/WHEEL +4 -0
- aurelian-0.3.2.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,693 @@
|
|
1
|
+
Contents lists available at ScienceDirect
|
2
|
+
|
3
|
+
BBA - Gene Regulatory Mechanisms
|
4
|
+
|
5
|
+
journal homepage: www.elsevier.com/locate/bbagrm
|
6
|
+
|
7
|
+
Gene Ontology representation for transcription factor functions
|
8
|
+
|
9
|
+
Pascale Gaudet a, *, Colin Logie b, Ruth C. Lovering c, Martin Kuiper d, Astrid Lægreid e,
|
10
|
+
Paul D. Thomas f
|
11
|
+
a Swiss-Prot group, SIB Swiss Institute of Bioinformatics, 1 Rue Michel-Servet, 1211 Gen`eve, Switzerland
|
12
|
+
b Molecular Biology Department, Faculty of Science, Radboud University, PO box 9101, 6500HB Nijmegen, the Netherlands
|
13
|
+
c Functional Gene Annotation, Preclinical and Fundamental Science, UCL Institute of Cardiovascular Science, University College London, London, UK
|
14
|
+
d Department of Biology, Norwegian University of Science and Technology, Trondheim, Norway
|
15
|
+
e Department of Clinical and Molecular Medicine, Norwegian University of Science and Technology, Trondheim, Norway
|
16
|
+
f Division of Bioinformatics, Department of Preventive Medicine, University of Southern California, Los Angeles, CA, USA
|
17
|
+
|
18
|
+
A R T I C L E I N F O
|
19
|
+
|
20
|
+
A B S T R A C T
|
21
|
+
|
22
|
+
Keywords:
|
23
|
+
Transcription
|
24
|
+
Gene Ontology
|
25
|
+
Biological databases
|
26
|
+
Biocuration
|
27
|
+
|
28
|
+
Transcription plays a central role in defining the identity and functionalities of cells, as well as in their responses
|
29
|
+
to changes in the cellular environment. The Gene Ontology (GO) provides a rigorously defined set of concepts
|
30
|
+
that describe the functions of gene products. A GO annotation is a statement about the function of a particular
|
31
|
+
gene product, represented as an association between a gene product and the biological concept a GO term de-
|
32
|
+
fines. Critically, each GO annotation is based on traceable scientific evidence. Here, we describe the different GO
|
33
|
+
terms that are associated with proteins involved in transcription and its regulation, focusing on the standard of
|
34
|
+
evidence required to support these associations. This article is intended to help users of GO annotations un-
|
35
|
+
derstand how to interpret the annotations and can contribute to the consistency of GO annotations. We distin-
|
36
|
+
guish between three classes of activities involved in transcription or directly regulating it - general transcription
|
37
|
+
factors, DNA-binding transcription factors, and transcription co-regulators.
|
38
|
+
|
39
|
+
1. Introduction
|
40
|
+
|
41
|
+
The Gene Ontology (GO) develops a computational model of bio-
|
42
|
+
logical systems, ranging from the molecular to the organism level, across
|
43
|
+
all species in the tree of life. GO aims to provide a comprehensive rep-
|
44
|
+
resentation of the current scientific knowledge about the functions of
|
45
|
+
gene products, namely, proteins and non-coding RNA molecules [1,2].
|
46
|
+
GO is organized in three aspects. GO Molecular Functions (MF) describe
|
47
|
+
activities that occur at the molecular level, such as “DNA binding tran-
|
48
|
+
scription factor activity” or “histone deacetylase activity”. Biological
|
49
|
+
Processes (BP) represent the larger processes or ‘biological programs’
|
50
|
+
accomplished by multiple molecular activities. Examples of broad bio-
|
51
|
+
logical process terms are “transcription” or “signal transduction”.
|
52
|
+
Cellular Components (CC) are the cellular structures in which a gene
|
53
|
+
product performs a function, either cellular compartments (e.g., “nu-
|
54
|
+
cleus” or “chromatin”), or stable macromolecular complexes of which
|
55
|
+
they are parts (e.g., “RNA polymerase II”). Together, annotations of a
|
56
|
+
gene to terms from each of those aspects describe what specific function
|
57
|
+
a gene product plays in a process and where this activity occurs in the
|
58
|
+
|
59
|
+
cell. Ideally every gene product should have an annotation from each of
|
60
|
+
the three aspects of GO.
|
61
|
+
|
62
|
+
The specific genes expressed in a given cell define the identity and
|
63
|
+
functionalities of that cell. Regulation of transcription is highly complex
|
64
|
+
and leads to differential gene expression in specific cells or under spe-
|
65
|
+
cific conditions. In human cells, it has been estimated that several
|
66
|
+
thousand proteins participate in gene expression and its regulation,
|
67
|
+
directly or indirectly [3] (Velthuijs et al. BBAGRM-D-21-00020 this
|
68
|
+
issue). This includes the general transcription machinery, the factors
|
69
|
+
that make the chromatin more or less accessible, specific DNA-binding
|
70
|
+
transcription factors, and the signaling molecules that regulate the ac-
|
71
|
+
tivity of all those proteins. This complexity is difficult to accurately
|
72
|
+
represent in ontological form. Tripathi et al. [4] redesigned that part of
|
73
|
+
the ontology in 2013 to define precise molecular functions for the
|
74
|
+
various proteins involved in transcription and its regulation. Nearly 10
|
75
|
+
years after its implementation, we had to acknowledge that this
|
76
|
+
framework was too complex and difficult to navigate, leading to
|
77
|
+
inconsistent annotations and thus poorly serving the user community.
|
78
|
+
The work described here was also motivated by the https://www.gr
|
79
|
+
|
80
|
+
* Corresponding author.
|
81
|
+
|
82
|
+
E-mail address: pascale.gaudet@sib.swiss (P. Gaudet).
|
83
|
+
|
84
|
+
https://doi.org/10.1016/j.bbagrm.2021.194752
|
85
|
+
Received 6 January 2021; Received in revised form 24 August 2021; Accepted 25 August 2021
|
86
|
+
|
87
|
+
BBA-GeneRegulatoryMechanisms1864(2021)194752Availableonline28August20211874-9399/©2021TheAuthor(s).PublishedbyElsevierB.V.ThisisanopenaccessarticleundertheCCBYlicense(http://creativecommons.org/licenses/by/4.0/).P. Gaudet et al.
|
88
|
+
|
89
|
+
Fig. 1. Transcription regulator activity branches of the Gene Ontology. (a) Graphical representation of the placement of the parent terms for transcription regulator
|
90
|
+
molecular functions. Black headers correspond to MF and cyan headers to BP terms. (b) Transcription regulators are dbTF and coTFs. The general transcription
|
91
|
+
initiation factors play a direct role in transcription. Top-level terms of each branch are highlighted in blue.
|
92
|
+
|
93
|
+
eekc.org/ GREEKC consortium, whose goals include curation tools
|
94
|
+
development, reengineering of ontologies, development of curation
|
95
|
+
guidelines and text mining tools, developing platforms to analyze and
|
96
|
+
render the molecular logic of transcription regulatory networks for
|
97
|
+
which a robust infrastructure is needed. Therefore, we thoroughly
|
98
|
+
reviewed the Gene Ontology representation of molecular activities
|
99
|
+
relevant to transcription, with a simpler and more pragmatic approach,
|
100
|
+
more aligned with available experimental data.
|
101
|
+
|
102
|
+
We have revised the GO MF terms representing the activities of
|
103
|
+
proteins involved in transcription, with the input from domain experts.
|
104
|
+
In addition to RNA polymerase, we defined three different types of ac-
|
105
|
+
tivities that take place on the DNA to mediate or regulate transcription:
|
106
|
+
general transcription factors (GTFs), DNA-binding transcription factors
|
107
|
+
(dbTFs), and transcription coregulators (coTFs).
|
108
|
+
|
109
|
+
Here we present the annotation approach recommended by the GO
|
110
|
+
consortium [5], applied to the recent refactoring of the transcription
|
111
|
+
|
112
|
+
domain of GO. This approach aims to 1) help biocurators – annotation
|
113
|
+
producers - interpret published data and correctly assign the MFs terms
|
114
|
+
for GTF, dbTF, or coTF to a protein, and 2) help users understand how
|
115
|
+
the data is generated and how to interpret them. The annotation of
|
116
|
+
factors involved in transcription and its regulation is challenging for
|
117
|
+
multiple reasons. Contrary to other molecular functions, for example
|
118
|
+
enzymes, where one protein or a well-defined complex catalyses a pre-
|
119
|
+
cise reaction, the measurable output of transcription activities is the
|
120
|
+
result of multiple nearly simultaneous activities of GTF, dbTF, coTF, as
|
121
|
+
well as RNA polymerase, hence, individual activities can be hard to
|
122
|
+
distinguish experimentally. Moreover, these factors often form large
|
123
|
+
complexes, such that the level of resolution of the experimental setup is
|
124
|
+
essential to determine the precise activity of any given protein. Older
|
125
|
+
experimental methods often did not provide enough details, leading to
|
126
|
+
inaccurate classifications of certain proteins. In addition, researchers use
|
127
|
+
“transcription factor” loosely, at times meaning GTF, dbTF, or coTF. This
|
128
|
+
|
129
|
+
BBA-GeneRegulatoryMechanisms1864(2021)1947522P. Gaudet et al.
|
130
|
+
|
131
|
+
Fig. 2. DNA binding branch of the Gene Ontology. This part of the Molecular Function (MF) ontology describes DNA binding. (a) Graphical representation of the
|
132
|
+
placement of the terms describing sequence-specific promoter binding. (b) Hierarchical view of the sequence-specific transcription regulatory region binding terms.
|
133
|
+
|
134
|
+
complicates the annotation process and necessitates solid expertise for
|
135
|
+
correct interpretation of the data. The experimental data itself is difficult
|
136
|
+
to parse for unambiguous assignment of a function to a protein: typi-
|
137
|
+
cally, a single experiment is insufficient for accurately determining the
|
138
|
+
function of these proteins, thus, interpretation of experimental results
|
139
|
+
that investigate dbTFs must rely on pre-existing knowledge. Also, many
|
140
|
+
proteins presumed to function as dbTFs have never been experimentally
|
141
|
+
demonstrated to bind DNA, but their role is indirectly inferred by the
|
142
|
+
presence of known specific DNA-binding domains and in some cases,
|
143
|
+
evidence of an effect on the transcription of putative direct target genes.
|
144
|
+
To add to the complexity, the presence of a DNA-binding domain in a
|
145
|
+
protein does not always imply that the protein functions as a dbTF [6].
|
146
|
+
|
147
|
+
2. GO description of molecular functions relevant for
|
148
|
+
transcription
|
149
|
+
|
150
|
+
We distinguish between three types of activities involved in tran-
|
151
|
+
scription or directly regulating it: general transcription factors
|
152
|
+
(GO:0140223), DNA-binding transcription factors (GO:0003700), and
|
153
|
+
transcription co-regulators (GO:0003712). The general transcription
|
154
|
+
initiation factor activity term and its descendants describe the activ-
|
155
|
+
ities of general transcription initiation factors for RNA polymerase I, II
|
156
|
+
and III, which play a direct role in the biological process of transcription
|
157
|
+
at the core promoter (Sant et al., BBAGRM-D-21-00014 this issue). In
|
158
|
+
contrast, the GO:0140110 transcription regulator activity branch
|
159
|
+
describes the activities of transcription regulators: dbTF and coTFs, that
|
160
|
+
|
161
|
+
BBA-GeneRegulatoryMechanisms1864(2021)1947523P. Gaudet et al.
|
162
|
+
|
163
|
+
act at any type of cis-regulatory module (Fig. 1). DNA-binding tran-
|
164
|
+
scription factors are adaptors that bind chromatin at specific genomic
|
165
|
+
addresses to coordinately regulate the expression of genes sets. This is
|
166
|
+
encoded in the ontology via links between the DNA-binding transcrip-
|
167
|
+
tion factor activity term and its descendants and to their counterpart
|
168
|
+
branch of the MF ontology describing DNA binding. The GO:0000976
|
169
|
+
transcription regulatory region sequence-specific DNA-binding sub-tree
|
170
|
+
of GO includes terms describing specific regulatory regions, such as the
|
171
|
+
core promoter (including the TATA box and the transcription start site),
|
172
|
+
cis-regulatory regions (bound by dbTFs), and specific types of cis-
|
173
|
+
regulatory motifs (such as E-box and N-box). An overview of the GO
|
174
|
+
structure for DNA binding activities is shown in Fig. 2. The definitions
|
175
|
+
and placement of GO terms in the ontology can be viewed in the AmiGO
|
176
|
+
[7,8]; http://amigo.geneontology.org/amigo, and QuickGO [9];
|
177
|
+
https://www.ebi.ac.uk/QuickGO/ browsers.
|
178
|
+
|
179
|
+
3. Strategy for annotating transcription-associated activities
|
180
|
+
|
181
|
+
GO terms are associated with gene products based on two general
|
182
|
+
approaches: from experimental data and from sequence inferences [10].
|
183
|
+
The GO database has a total of 8 million annotations, about 7% of which
|
184
|
+
are to human gene products. For human, there are >915,000 annota-
|
185
|
+
tions derived from experimental data (GO release 2020-10-10 obtained
|
186
|
+
from http://amigo.geneontology.org). Sequence inference methods
|
187
|
+
provide more than 106,000 annotations for human proteins based on
|
188
|
+
phylogenetic relationships (65,000 annotations) [11]; protein domains
|
189
|
+
(6730 annotations) [12]; and Ensembl orthology predictions (35,000
|
190
|
+
annotations) [13]. The next sections describe the annotation of the
|
191
|
+
different types of proteins involved in transcription and its regulation.
|
192
|
+
|
193
|
+
3.1. Transcription activity annotations supported by experimental data2.
|
194
|
+
|
195
|
+
The following annotation approach follows the recommendations of
|
196
|
+
the GO consortium. First and foremost, it is necessary to use as much
|
197
|
+
information as possible, rather than annotating articles individually and
|
198
|
+
out of the wider context. When extracting information, a gene-by-gene
|
199
|
+
or pathway-by-pathway approach is considered best practice [5].
|
200
|
+
Reviewing a range of articles ensures that the annotations closely reflect
|
201
|
+
the current state of knowledge. Ideally, the corpus of annotations for a
|
202
|
+
gene product should be based on multiple observations from different
|
203
|
+
articles by independent research groups. Five steps used to determine
|
204
|
+
whether a gene can be annotated as a transcriptional regulator are
|
205
|
+
outlined in Fig. 3. Appendix 1 provides examples of each of those
|
206
|
+
different activities.
|
207
|
+
|
208
|
+
1. Identify the starting hypothesis: are the authors characterizing
|
209
|
+
a transcription regulator? Scientific models are built by adding
|
210
|
+
new data to the existing corpus of evidence. New data can either
|
211
|
+
support or contradict existing models. The Introduction section of
|
212
|
+
research articles can be used to understand what prior knowledge the
|
213
|
+
article builds on, and which aspect of the existing model or what new
|
214
|
+
model the authors are assessing. The hypothesis tested by the authors
|
215
|
+
is essential to choose a GO term, with the caveat that inconsistent
|
216
|
+
terminology has been used in transcription research articles and
|
217
|
+
therefore may not always be aligned with the GO term categories.
|
218
|
+
2. Determine whether knowledge from specific protein domains or
|
219
|
+
characterized orthologs support the hypothesis. The presence of
|
220
|
+
specific domains and the existence of well-characterized orthologs
|
221
|
+
can provide useful support for interpreting experimental data. Note
|
222
|
+
that this data should be used with caution. For instance, ARID-, AT
|
223
|
+
hook-, and some HMG-, GATA-, zinc finger domain-containing pro-
|
224
|
+
teins and proteins binding structural features such as the DNA minor
|
225
|
+
groove rarely bind DNA in a sequence-specific manner; some of them
|
226
|
+
merely function to increase the avidity or stability of a transcription
|
227
|
+
factor complex and its associated co-factors and do not - in their own
|
228
|
+
capacity - provide the specific genomic address to guide transcription
|
229
|
+
|
230
|
+
Fig. 3. Five steps to transcription activity annotation. The five key steps to
|
231
|
+
associating a transcription MF term with a protein starts with identifying the
|
232
|
+
starting hypothesis, to confirm that the authors are characterizing a GTF, dbTF
|
233
|
+
or coTF. Secondly, considering whether the knowledge from specific protein
|
234
|
+
domains or characterized orthologs support the hypothesis. Thirdly, checking
|
235
|
+
whether existing annotations from GO, UniProt and Model Organism databases
|
236
|
+
are consistent with the hypothesis. Fourthly, reviewing other published
|
237
|
+
experimental data to ensure no contradictory findings have been reported.
|
238
|
+
Finally, creating new GO annotations, if the experimental results are consistent
|
239
|
+
with the identified hypothesis.
|
240
|
+
|
241
|
+
to specified target genes. Such proteins are not considered dbTFs in
|
242
|
+
GO.
|
243
|
+
|
244
|
+
To support the association of a gene with a GO term from homolo-
|
245
|
+
gous sequences from other species, only closely related orthologs whose
|
246
|
+
function have been unambiguously characterized can be used if those are
|
247
|
+
consistent with the experimental data presented in the article.
|
248
|
+
|
249
|
+
- GTFs function as the molecular machine that assembles with the
|
250
|
+
RNA polymerase at the promoter to form the pre-initiation complex
|
251
|
+
(PIC). GTFs have been characterized in several organisms, from
|
252
|
+
archaea to yeast and mammalian cells [14,15], and therefore
|
253
|
+
orthology should provide strong support for the decision to associate
|
254
|
+
these proteins with a child specific for RNA polymerase I, II or III of
|
255
|
+
the MF term “GO:0140223 general transcription initiation factor
|
256
|
+
activity”. In addition, the naming of GTFs is well established across
|
257
|
+
human and model organism nomenclature groups and can be used to
|
258
|
+
help guide these decisions. Thus, for human GTFs the HUGO Gene
|
259
|
+
Nomenclature Committee (HGNC, www.genenames.org) provide the
|
260
|
+
gene symbol TAF#, for TATA-box binding protein associated factors,
|
261
|
+
and GTF2#s and GTF3#s, for general transcription factor II and III
|
262
|
+
subunits respectively.
|
263
|
+
|
264
|
+
- dbTFs are specific double-stranded DNA-binding transcription fac-
|
265
|
+
tors that provide genomic addresses and respond to the conditions
|
266
|
+
under which specific genes are expressed. Central to dbTF function is
|
267
|
+
their binding to specific double-stranded DNA sequences that are
|
268
|
+
often named transcription factor binding sites (TFBS). Gene products
|
269
|
+
associated with the GO term “GO:0003700 DNA-binding transcrip-
|
270
|
+
tion factor activity” have the ability to bind DNA and this binding
|
271
|
+
regulates the expression of a specific set of target genes. The direct
|
272
|
+
target gene(s) can also be included in the annotation using the “has
|
273
|
+
input relation”. A human dbTF catalog developed by the GREEKC
|
274
|
+
project ( [6]; also accessible from https://www.ebi.ac.uk/Qui
|
275
|
+
ckGO/targetset/dbTF) may be consulted to check whether a spe-
|
276
|
+
cific human protein is annotated to dbTF function with experimental
|
277
|
+
or phylogenetic evidence. When considering proteins that belong to
|
278
|
+
families of well characterized transcription factors, such as those that
|
279
|
+
contain bHLH, bZIP, homeobox, ETS, Forkhead, etc. domains and
|
280
|
+
proteins with a one-to-one ortholog already demonstrated to be a
|
281
|
+
|
282
|
+
BBA-GeneRegulatoryMechanisms1864(2021)1947524P. Gaudet et al.
|
283
|
+
|
284
|
+
dbTF, then weaker evidence of DNA binding, such as ChIP experi-
|
285
|
+
ments is sufficient. In contrast, special care must be taken to annotate
|
286
|
+
proteins bearing domains that are not exclusively found in tran-
|
287
|
+
scription factors, such as RING, MYND and PhD zinc fingers. Simi-
|
288
|
+
larly, for proteins with enzymatic activity: while there are rare cases
|
289
|
+
of dbTFs with enzymatic activities, such as ENO1, dbTF and enzy-
|
290
|
+
matic activity are usually mutually exclusive. For proteins not in the
|
291
|
+
dbTF catalog, clear experimental or phylogenetic evidence of
|
292
|
+
sequence-specific DNA binding and gene transcription regulation via
|
293
|
+
cognate DNA motifs located in gene-associated cis-regulatory mod-
|
294
|
+
ules is required for the protein to be classified with high confidence
|
295
|
+
as a dbTF.
|
296
|
+
|
297
|
+
- coTFs: Transcription coregulators (also known as transcription co-
|
298
|
+
factors; GO:0003712) represent a group of different functions that
|
299
|
+
take place at cis-regulatory regions to make transcription of specific
|
300
|
+
gene sets either more (coactivators) or less (corepressors) efficient.
|
301
|
+
Coregulators can modify chromatin structure through covalent
|
302
|
+
modification of histones, ATP-dependent chromatin remodelling,
|
303
|
+
and modulate dbTF interactions with other transcription cor-
|
304
|
+
egulators. We classify the Mediator Complex, which bridges dbTFs
|
305
|
+
and the RNA polymerase, as a transcription coactivator [16–18].
|
306
|
+
Many coTFs have enzymatic activity and normally exert their func-
|
307
|
+
tion independent of high affinity binding to specific DNA sequences.
|
308
|
+
CoTFs that do bind DNA typically recognize very short DNA se-
|
309
|
+
quences that are not sufficiently unique in the genome to enable
|
310
|
+
regulation of a limited set of genes in a discrete environmental or
|
311
|
+
developmental stage. One example of this is CPF1, that binds the CpG
|
312
|
+
dinucleotide and helps most CpG islands gain epigenomic marking
|
313
|
+
[19–21].
|
314
|
+
|
315
|
+
It is important to keep in mind that DNA binding proteins that
|
316
|
+
regulate transcription are not necessarily dbTFs. Key points that help
|
317
|
+
distinguish between the three activities discussed above are that (i)
|
318
|
+
dbTFs bind DNA in a sequence-specific manner, and regulate precise
|
319
|
+
sets of genes; (ii) coTFs usually do not directly bind DNA, and when
|
320
|
+
they do they don't exhibit strong sequence-specificity; (iii) coTFs
|
321
|
+
often have catalytic activities (such as histone methyltransferase,
|
322
|
+
protein kinase, or ubiquitin ligase), which is highly unusual in dbTFs;
|
323
|
+
(iv) GTFs are required for core promoter activity and are considered
|
324
|
+
to act at each promoter to promote transcription initiation [14,22],
|
325
|
+
although the exact subunit composition at individual promoters may
|
326
|
+
vary.
|
327
|
+
|
328
|
+
3. Confirm that existing annotations are consistent with the hy-
|
329
|
+
pothesis. New annotations need to be consistent with existing an-
|
330
|
+
notations, unless the existing annotations are believed to be wrong or
|
331
|
+
out of date. Annotations made to a term as well as a more specific
|
332
|
+
descendant reflect differences in granularity of annotation, and are
|
333
|
+
not generally considered inconsistent. When the new annotation uses
|
334
|
+
a term in a different branch than existing annotations, a review of the
|
335
|
+
evidence supporting the existing annotations is undertaken and, if
|
336
|
+
necessary, annotations that appear to be incorrect are disputed (see
|
337
|
+
section “Ensuring a coherent set of annotations”).
|
338
|
+
|
339
|
+
4. Check that other published experimental results do not
|
340
|
+
contradict the hypothesis. The application of the gene-by-gene or
|
341
|
+
pathway-by-pathway annotation approach ensures that results from
|
342
|
+
other research articles are taken into account and that all annotations
|
343
|
+
are in line with the current state of knowledge. Again, if in-
|
344
|
+
consistencies are noticed, great care is taken to confirm correct
|
345
|
+
interpretation of the data, this is particularly important if there is
|
346
|
+
evidence for multiple, distinct transcription activity functions.
|
347
|
+
5. Validate that the experimental results are consistent with the
|
348
|
+
hypothesis. If the results presented in the curated article are
|
349
|
+
consistent with the hypothesis presented by the authors, then the
|
350
|
+
appropriate transcription activity GO term(s) are associated with the
|
351
|
+
gene product.
|
352
|
+
|
353
|
+
Proteins that are involved in transcription and its regulation have
|
354
|
+
historically been studied through small-scale, focused experimental
|
355
|
+
approaches. For some examples of the small-scale experiments that do
|
356
|
+
provide evidence for DNA binding transcription factor activity the bio-
|
357
|
+
curator can use Tables 3 and 4 of Tripathi et al. [4] and in Santos-
|
358
|
+
Zavaleta et al. [23]. Recent advances in high-throughput methodolo-
|
359
|
+
gies now provide robust data that, when interpreted with sufficient care,
|
360
|
+
support the assignment of a function role to many proteins, including
|
361
|
+
transcription regulators. This includes HT-SELEX [24,25], Protein
|
362
|
+
Binding Microarrays [26], ChIP [27], one- and two-hybrid experiments
|
363
|
+
[28,29]. For these experiments, the data quality and the false positive
|
364
|
+
rate must be evaluated before annotations are created. For example,
|
365
|
+
human HT-SELEX data will have more false positives if native dbTFs are
|
366
|
+
assayed in nuclear extracts or over-expressed in eukaryotic cells,
|
367
|
+
compared with heterologous proteins purified from prokaryotic cells, as
|
368
|
+
the latter reduces the probability of indirect interactions with endoge-
|
369
|
+
nous factors. For high-throughput transcription data, only articles with
|
370
|
+
low rates of false positives, are curated. Those various techniques pro-
|
371
|
+
vide multiple independent lines of evidence, strengthening the confi-
|
372
|
+
dence in the annotation when they converge on a single motif or
|
373
|
+
molecular function. The GO recommendations on curation of high-
|
374
|
+
throughput experimental data should be applied when such data is an-
|
375
|
+
notated [30].
|
376
|
+
|
377
|
+
3.2. Annotations based on non-experimental evidence
|
378
|
+
|
379
|
+
There are only about 500 human dbTFs for which there is experi-
|
380
|
+
mental evidence satisfying the criteria presented here. Across all areas of
|
381
|
+
biology several reliable methods infer protein function from available
|
382
|
+
experimental data. Indeed, there are approximately 1000 human pro-
|
383
|
+
teins annotated as dbTFs by non-experimental methods (Lovering et al.
|
384
|
+
same BBA issue, prepublication available at [6]). Phylogenetic annota-
|
385
|
+
tions are assigned by a group of biocurators with expertise in evolu-
|
386
|
+
tionary biology, and require experimental evidence for at least one
|
387
|
+
member of a clade of evolutionarily related proteins [11]. The GO
|
388
|
+
knowledgebase also contains GO terms assigned by automated pipelines
|
389
|
+
based on protein domain (InterPro2GO) and orthology (Ensembl).
|
390
|
+
InterPro2GO [12] is based primarily on local (partial) homology: pro-
|
391
|
+
tein domains are mapped to specific GO terms, and any protein with one
|
392
|
+
of these domains will be annotated to the appropriate GO term(s).
|
393
|
+
Ensembl Compara [13] generates groups of one-to-one orthologs among
|
394
|
+
closely related species and propagates all experimental annotations to
|
395
|
+
each members of the group. While manual annotations based on these
|
396
|
+
methods are allowed, the GO consortium recommends using the auto-
|
397
|
+
mated pipelines that are maintained centrally and ensure a consistent
|
398
|
+
annotation corpus across all annotated species.
|
399
|
+
|
400
|
+
4. Ensuring a coherent set of annotations
|
401
|
+
|
402
|
+
During the process of annotation other relevant annotations associ-
|
403
|
+
ated with the gene are reviewed. If there are conflicting annotations, the
|
404
|
+
supporting data should be reassessed to determine whether the anno-
|
405
|
+
tations are inconsistent with the data, in which case the annotations
|
406
|
+
must be fixed [5].
|
407
|
+
|
408
|
+
In cases where the primary data is conflicting across different articles
|
409
|
+
(for example a protein is sometimes described as a transcription factor,
|
410
|
+
and sometimes as a coregulator), then the literature will be reviewed
|
411
|
+
carefully to decide whether the annotation is incorrect (bad choice of
|
412
|
+
term, wrong protein annotated), whether the knowledge has evolved, if
|
413
|
+
the protein plays multiple roles under different conditions (i.e., acts as a
|
414
|
+
DNA-binding transcription factor in certain contexts and as a cofactor in
|
415
|
+
others). If no activity has yet been established, no MF annotation will be
|
416
|
+
made.
|
417
|
+
|
418
|
+
Note that individual DNA-binding transcription factors can act as
|
419
|
+
both activators or repressors dependent on the context, hence associa-
|
420
|
+
tion of both activator and repressor terms with a single protein is not
|
421
|
+
|
422
|
+
BBA-GeneRegulatoryMechanisms1864(2021)1947525P. Gaudet et al.
|
423
|
+
|
424
|
+
Fig. 4. Representation of biological context of dbTF activity. The level of cyclin-dependent kinase inhibitor p21 (CDKN1A) is regulated by the transcription factor
|
425
|
+
p53 (TP53) upon DNA damage, signaling cell cycle arrest to the cell (http://noctua.berkeleybop.org/editor/graph/gomodel:5fa76ad400000000).
|
426
|
+
|
427
|
+
considered inconsistent. The specific conditions under which this hap-
|
428
|
+
pens, such as relevant signaling pathways, cell type, as well as specific
|
429
|
+
target genes, etc., may be further specified through additional context
|
430
|
+
details ([31]; see an example of a GO-CAM model in Fig. 4).
|
431
|
+
|
432
|
+
5. Pitfalls in annotating transcription regulators
|
433
|
+
|
434
|
+
During the review of dbTF GO annotations [6], in which over 3000
|
435
|
+
GO annotations were reviewed, a variety of common errors in data
|
436
|
+
interpretation were identified. One of the most common errors was
|
437
|
+
caused by the difficulty in distinguishing a dbTF from a coTF, as the
|
438
|
+
evidence for those two functions can be quite similar. To prevent this
|
439
|
+
error, biocurators ensure that the protein has a sequence-specific dou-
|
440
|
+
ble-stranded DNA-binding domain and conduct an exhaustive review of
|
441
|
+
the literature, including articles associated with the protein's close
|
442
|
+
orthologs. Furthermore, the literature supporting the dbTF activity of a
|
443
|
+
protein that also has evidence for another function, in particular, RNA
|
444
|
+
binding, will be carefully checked before assigning a dbTF activity. The
|
445
|
+
work on the human dbTF catalog added a GO ‘DNA-binding transcrip-
|
446
|
+
tion factor activity’ annotation to 583 proteins, and removed erronous
|
447
|
+
assignments for 256 proteins (Lovering et al. BBAGRM-D-20-00141 this
|
448
|
+
issue).
|
449
|
+
|
450
|
+
Transcription regulators most often act as members of complexes,
|
451
|
+
some of which also contain proteins with other activities. In some cases,
|
452
|
+
only some subunits of a complex interact with DNA: for instance, while
|
453
|
+
the RFX complex contains three members: RFX5, RFXAP and RFXANK,
|
454
|
+
only RFX5 binds DNA directly. But the DNA-binding ability of the
|
455
|
+
complex is facilitated by all three subunits so RFXAP and RFXANK are
|
456
|
+
not coTFs [32]. In this case, RFXAP and RFXANK are annotated using the
|
457
|
+
“contributes to” qualifier, to indicate that they participate in, but are not
|
458
|
+
directly responsible for the activity.
|
459
|
+
|
460
|
+
Another activity that can easily be confused for a coTF is a dbTF
|
461
|
+
inhibitor. These proteins interact with a dbTF, but not at the DNA, to
|
462
|
+
prevent the dbTF from reaching its target genes. Well characterized
|
463
|
+
examples are the I-SMADs, SMAD6 and SMAD7 [33], that act by
|
464
|
+
competing with active SMADs at receptors, thus blocking further
|
465
|
+
intracellular signaling, and should be annotated to “GO:0140416 tran-
|
466
|
+
scription regulator inhibitor activity”.
|
467
|
+
|
468
|
+
It must be noted that these approaches to avoid errors in dbTF ac-
|
469
|
+
tivity assignment are not unequivocal, as some proteins do have multiple
|
470
|
+
functions. For example, the glucocorticoid receptor (NR3C1), which is a
|
471
|
+
|
472
|
+
canonical dbTF, has recently been shown to bind double-stranded RNA
|
473
|
+
motifs [34]; ATF2 (activating transcription factor 2) and CLOCK are
|
474
|
+
dbTFs that have been reported to also exhibit histone acetyltransferase
|
475
|
+
activity [35–38]; some dbTFs, such as NFIB (nuclear factor I B), also
|
476
|
+
function as dbTF inhibitors [39]. Finally, general and sequence-specific
|
477
|
+
effects can be difficult to separate, as has been established for the MYC
|
478
|
+
dbTF [40].
|
479
|
+
|
480
|
+
6. Conclusion
|
481
|
+
|
482
|
+
The annotation approach presented here is designed to help bio-
|
483
|
+
curators annotate factors involved in transcription and its regulation, as
|
484
|
+
well as for users of GO annotations to understand their meaning and the
|
485
|
+
evidence behind them. This work complements the redesign of this part
|
486
|
+
of the GO to significantly simplify the ontology structure. The new
|
487
|
+
ontology structure and the present standards were applied to the review
|
488
|
+
of human proteins associated with GO terms describing dbTF activity
|
489
|
+
[6]. We anticipate that adoption of this annotation approach by all
|
490
|
+
groups who produce GO associations will increase annotation consis-
|
491
|
+
tency across all species, for transcription and also more widely across all
|
492
|
+
areas represented by GO.
|
493
|
+
|
494
|
+
Declaration of competing interest
|
495
|
+
|
496
|
+
The authors declare that they have no known competing financial
|
497
|
+
interests or personal relationships that could have appeared to influence
|
498
|
+
the work reported in this paper.
|
499
|
+
|
500
|
+
Acknowledgements
|
501
|
+
|
502
|
+
We thank many GREEKC and GO consortium members for useful
|
503
|
+
discussions that led to the development of these guidelines, in particular
|
504
|
+
Marcio L. Acencio, Helen Attrill, and Valerie Wood.
|
505
|
+
|
506
|
+
Funding sources
|
507
|
+
|
508
|
+
The GO Consortium is funded by the National Human Genome
|
509
|
+
Research Institute (US National Institutes of Health), grant number
|
510
|
+
HG002273. RCL has been supported by Alzheimer's Research UK grant
|
511
|
+
(ARUK-NAS2017A-1) and the National Institute for Health Research
|
512
|
+
University College London Hospitals Biomedical Research Centre.
|
513
|
+
|
514
|
+
BBA-GeneRegulatoryMechanisms1864(2021)1947526P. Gaudet et al.
|
515
|
+
|
516
|
+
GREEKC is supported by the COST Action grant CA15205.
|
517
|
+
|
518
|
+
Appendices. Supplementary data
|
519
|
+
|
520
|
+
Supplementary data to this article can be found online at https://doi.
|
521
|
+
|
522
|
+
org/10.1016/j.bbagrm.2021.194752.
|
523
|
+
|
524
|
+
References
|
525
|
+
|
526
|
+
[1] M. Ashburner, C.A. Ball, J.A. Blake, D. Botstein, H. Butler, J.M. Cherry, et al., Gene
|
527
|
+
ontology: tool for the unification of biology, Gene Ontol. Consort. Nat Genet. 25 (1)
|
528
|
+
(2000 May) 25–29.
|
529
|
+
|
530
|
+
[2] The Gene Ontology Consortium, The Gene Ontology Resource: 20 years and still
|
531
|
+
|
532
|
+
GOing strong, Nucleic Acids Res. 47 (D1) (2019) D330–D338, 08.
|
533
|
+
|
534
|
+
[3] R. Tupler, G. Perini, M.R. Green, Expressing the human genome, Nature. 409
|
535
|
+
|
536
|
+
(6822) (2001 Feb 15) 832–833.
|
537
|
+
|
538
|
+
[4] S. Tripathi, K.R. Christie, R. Balakrishnan, R. Huntley, D.P. Hill, L. Thommesen, et
|
539
|
+
al., Gene Ontology annotation of sequence-specific DNA binding transcription
|
540
|
+
factors: setting the stage for a large-scale curation effort, Database J. Biol.
|
541
|
+
Databases Curation 2013 (2013), bat062.
|
542
|
+
|
543
|
+
[5] S. Poux, P. Gaudet, Best practices in manual annotation with the Gene Ontology,
|
544
|
+
|
545
|
+
Methods Mol. Biol. Clifton NJ 1446 (2017) 41–54.
|
546
|
+
|
547
|
+
[6] Lovering R.C., Gaudet P., Acencio M.L., Ignatchenko A., Jolma A., Fornes O., et al.,
|
548
|
+
|
549
|
+
BBAGRM-D-20-00141 this issue.
|
550
|
+
|
551
|
+
[7] Gene Ontology Consortium, Gene Ontology Consortium: going forward, Nucleic
|
552
|
+
|
553
|
+
Acids Res. 43 (Database issue) (2015 Jan) D1049–D1056.
|
554
|
+
|
555
|
+
[8] S. Carbon, A. Ireland, C.J. Mungall, S. Shu, B. Marshall, S. Lewis, et al., AmiGO:
|
556
|
+
|
557
|
+
online access to ontology and annotation data, Bioinforma Oxf. Engl. 25 (2) (2009
|
558
|
+
Jan 15) 288–289.
|
559
|
+
|
560
|
+
[9] D. Binns, E. Dimmer, R. Huntley, D. Barrell, C. O’Donovan, R. Apweiler, QuickGO:
|
561
|
+
a web-based tool for Gene Ontology searching, Bioinforma Oxf Engl. 25 (22) (2009
|
562
|
+
Nov 15) 3045–3046.
|
563
|
+
|
564
|
+
ˇ
|
565
|
+
Skunca, J.C. Hu, C. Dessimoz, Primer on the Gene Ontology, Methods
|
566
|
+
[10] P. Gaudet, N.
|
567
|
+
|
568
|
+
Mol. Biol. Clifton NJ 1446 (2017) 25–37.
|
569
|
+
|
570
|
+
[11] P. Gaudet, M.S. Livstone, S.E. Lewis, P.D. Thomas, Phylogenetic-based propagation
|
571
|
+
of functional annotations within the Gene Ontology consortium, Brief. Bioinform.
|
572
|
+
12 (5) (2011 Sep) 449–462.
|
573
|
+
|
574
|
+
[12] A. Mitchell, H.-Y. Chang, L. Daugherty, M. Fraser, S. Hunter, R. Lopez, et al., The
|
575
|
+
InterPro protein families database: the classification resource after 15 years,
|
576
|
+
Nucleic Acids Res. 43 (Database issue) (2015 Jan) D213–D221.
|
577
|
+
|
578
|
+
[13] F. Cunningham, P. Achuthan, W. Akanni, J. Allen, M.R. Amode, I.M. Armean, et al.,
|
579
|
+
|
580
|
+
Ensembl 2019, Nucleic Acids Res. 47 (D1) (2019) D745–D751, 08.
|
581
|
+
|
582
|
+
[14] S. Sainsbury, C. Bernecky, P. Cramer, Structural basis of transcription initiation by
|
583
|
+
RNA polymerase II, Nat. Rev. Mol. Cell Biol. 16 (3) (2015 Mar) 129–143.
|
584
|
+
[15] M.J.E. Koster, B. Snel, H.T.M. Timmers, Genesis of chromatin and transcription
|
585
|
+
dynamics in the origin of species, Cell. 161 (4) (2015 May 7) 724–736.
|
586
|
+
|
587
|
+
[16] K.M. Andr´e, E.H. Sipos, J. Soutourina, Mediator roles going beyond transcription,
|
588
|
+
|
589
|
+
Trends Genet TIG. 37 (3) (2020 Sep 10) 224–234.
|
590
|
+
|
591
|
+
[17] T. Eychenne, M. Werner, J. Soutourina, Toward understanding of the mechanisms
|
592
|
+
of mediator function in vivo: focus on the preinitiation complex assembly,
|
593
|
+
Transcription. 8 (5) (2017) 328–342.
|
594
|
+
|
595
|
+
[18] J. Yin, G. Wang, The mediator complex: a master coordinator of transcription and
|
596
|
+
cell lineage development, Dev. Camb. Engl. 141 (5) (2014 Mar) 977–987.
|
597
|
+
[19] J.P. Thomson, P.J. Skene, J. Selfridge, T. Clouaire, J. Guy, S. Webb, et al., CpG
|
598
|
+
islands influence chromatin structure via the CpG-binding protein Cfp1, Nature.
|
599
|
+
464 (7291) (2010 Apr 15) 1082–1086.
|
600
|
+
|
601
|
+
[20] J. Lipski, X. Zhang, B. Kruszewska, R. Kanjhan, Morphological study of long axonal
|
602
|
+
|
603
|
+
projections of ventral medullary inspiratory neurons in the rat, Brain Res. 640
|
604
|
+
(1–2) (1994 Mar 21) 171–184.
|
605
|
+
|
606
|
+
[21] H.K. Long, N.P. Blackledge, R.J. Klose, ZF-CxxC domain-containing proteins, CpG
|
607
|
+
|
608
|
+
islands and the chromatin connection, Biochem. Soc. Trans. 41 (3) (2013 Jun)
|
609
|
+
727–740.
|
610
|
+
|
611
|
+
[22] P. Cramer, Organization and regulation of gene transcription, Nature. 573 (7772)
|
612
|
+
|
613
|
+
(2019) 45–54.
|
614
|
+
|
615
|
+
[23] A. Santos-Zavaleta, H. Salgado, S. Gama-Castro, M. S´anchez-P´erez, L. G´omez-
|
616
|
+
|
617
|
+
Romero, D. Ledezma-Tejeida, et al., RegulonDB v 10.5: tackling challenges to unify
|
618
|
+
classic and high throughput knowledge of gene regulation in E. coli K-12, Nucleic
|
619
|
+
Acids Res. 47 (D1) (2019) D212–D220, 08.
|
620
|
+
|
621
|
+
[24] A.D. Ellington, J.W. Szostak, In vitro selection of RNA molecules that bind specific
|
622
|
+
|
623
|
+
ligands, Nature. 346 (6287) (1990 Aug 30) 818–822.
|
624
|
+
|
625
|
+
[25] C. Tuerk, L. Gold, Systematic evolution of ligands by exponential enrichment: RNA
|
626
|
+
ligands to bacteriophage T4 DNA polymerase, Science. 249 (4968) (1990 Aug 3)
|
627
|
+
505–510.
|
628
|
+
|
629
|
+
[26] K.K. Andrilenas, A. Penvose, T. Siggers, Using protein-binding microarrays to study
|
630
|
+
|
631
|
+
transcription factor specificity: homologs, isoforms and complexes, Brief Funct.
|
632
|
+
Genom. 14 (1) (2015 Jan) 17–29.
|
633
|
+
|
634
|
+
[27] T.H. Kim, J. Dekker, ChIP-seq, Cold Spring Harb. Protoc. 2018 (5) (2018), 01.
|
635
|
+
[28] J.A. Sewell, J.I. Fuxman Bass, Options and considerations when using a yeast one-
|
636
|
+
|
637
|
+
hybrid system, Methods Mol. Biol. Clifton NJ. 1794 (2018) 119–130.
|
638
|
+
|
639
|
+
[29] A. Paiano, A. Margiotta, M. De Luca, C. Bucci, Yeast two-hybrid assay to identify
|
640
|
+
|
641
|
+
interacting proteins, Curr. Protoc. Protein Sci. 95 (1) (2019), e70.
|
642
|
+
|
643
|
+
[30] H. Attrill, P. Gaudet, R.P. Huntley, R.C. Lovering, S.R. Engel, S. Poux, et al.,
|
644
|
+
|
645
|
+
Annotation of gene product function from high-throughput studies using the Gene
|
646
|
+
Ontology, Database J. Biol. Databases Curation. 2019 (2019), 01.
|
647
|
+
|
648
|
+
[31] P.D. Thomas, D.P. Hill, H. Mi, D. Osumi-Sutherland, K. Van Auken, S. Carbon, et
|
649
|
+
al., Gene Ontology Causal Activity Modeling (GO-CAM) moves beyond GO
|
650
|
+
annotations to structured descriptions of biological functions and systems, Nat.
|
651
|
+
Genet. 51 (10) (2019) 1429–1433.
|
652
|
+
|
653
|
+
[32] K. Masternak, E. Barras, M. Zufferey, B. Conrad, G. Corthals, R. Aebersold, et al.,
|
654
|
+
A gene encoding a novel RFX-associated transactivator is mutated in the majority
|
655
|
+
of MHC class II deficiency patients, Nat. Genet. 20 (3) (1998 Nov) 273–277.
|
656
|
+
|
657
|
+
[33] K. Miyazawa, K. Miyazono, Regulation of TGF-β family signaling by inhibitory
|
658
|
+
|
659
|
+
smads, Cold Spring Harb. Perspect. Biol. 9 (3) (2017 Mar 1).
|
660
|
+
|
661
|
+
[34] N.V. Parsonnet, N.C. Lammer, Z.E. Holmes, R.T. Batey, D.S. Wuttke, The
|
662
|
+
|
663
|
+
glucocorticoid receptor DNA-binding domain recognizes RNA hairpin structures
|
664
|
+
with high affinity, Nucleic Acids Res. 47 (15) (2019 05) 8180–8192.
|
665
|
+
|
666
|
+
[35] H. Kawasaki, L. Schiltz, R. Chiu, K. Itakura, K. Taira, Y. Nakatani, et al., ATF-2 has
|
667
|
+
intrinsic histone acetyltransferase activity which is modulated by phosphorylation,
|
668
|
+
Nature. 405 (6783) (2000 May 11) 195–200.
|
669
|
+
|
670
|
+
[36] J. Hirayama, S. Sahar, B. Grimaldi, T. Tamaru, K. Takamatsu, Y. Nakahata, et al.,
|
671
|
+
|
672
|
+
CLOCK-mediated acetylation of BMAL1 controls circadian function, Nature. 450
|
673
|
+
(7172) (2007 Dec 13) 1086–1090.
|
674
|
+
|
675
|
+
[37] B. Grimaldi, Y. Nakahata, S. Sahar, M. Kaluzova, D. Gauthier, K. Pham, et al.,
|
676
|
+
|
677
|
+
Chromatin remodeling and circadian control: master regulator CLOCK is an
|
678
|
+
enzyme, Cold Spring Harb. Symp. Quant. Biol. 72 (2007) 105–112.
|
679
|
+
|
680
|
+
[38] Z. Wang, Y. Wu, L. Li, X.-D. Su, Intermolecular recognition revealed by the complex
|
681
|
+
structure of human CLOCK-BMAL1 basic helix-loop-helix domains with E-box
|
682
|
+
DNA, Cell Res. 23 (2) (2013 Feb) 213–224.
|
683
|
+
|
684
|
+
[39] Y. Liu, H.U. Bernard, D. Apt, NFI-B3, a novel transcriptional repressor of the
|
685
|
+
|
686
|
+
nuclear factor I family, is generated by alternative RNA processing, J. Biol. Chem.
|
687
|
+
272 (16) (1997 Apr 18) 10739–10745.
|
688
|
+
|
689
|
+
[40] Z. Nie, C. Guo, S.K. Das, C.C. Chow, E. Batchelor, S.S. Simons, et al., Dissecting
|
690
|
+
|
691
|
+
transcriptional amplification by MYC, eLife 9 (2020), 27.
|
692
|
+
|
693
|
+
BBA-GeneRegulatoryMechanisms1864(2021)1947527
|