grnsight 5.1.0 → 6.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.travis.yml +2 -0
- package/README.md +2 -2
- package/database/README.md +1 -0
- package/database/network-database/README.md +44 -0
- package/database/network-database/schema.sql +24 -0
- package/database/network-database/scripts/filter_genes.py +76 -0
- package/database/network-database/scripts/generate_network.py +199 -0
- package/database/network-database/scripts/generate_sgd_network_from_yeastract_network.py +120 -0
- package/database/network-database/scripts/loader.py +78 -0
- package/package.json +12 -12
- package/server/app.js +3 -1
- package/server/controllers/custom-workbook-controller.js +66 -0
- package/server/controllers/expression-database-controller.js +19 -0
- package/server/controllers/network-database-controller.js +18 -0
- package/server/{controllers/database-controller.js → dals/expression-dal.js} +27 -34
- package/server/dals/network-dal.js +96 -0
- package/test/api-tests.js +2 -5
- package/web-client/public/gene/api.js +1 -1
- package/web-client/public/js/api/grnsight-api.js +124 -0
- package/web-client/public/js/constants.js +7 -4
- package/web-client/public/js/createNetwork.js +195 -0
- package/web-client/public/js/grnsight.js +2 -0
- package/web-client/public/js/grnsight.min.js +33 -9
- package/web-client/public/js/grnstate.js +2 -1
- package/web-client/public/js/setup-load-and-import-handlers.js +32 -12
- package/web-client/public/js/update-app.js +38 -92
- package/web-client/public/js/upload.js +12 -8
- package/web-client/public/stylesheets/grnsight.styl +151 -3
- package/web-client/views/components/demo.pug +10 -0
- package/web-client/views/info.pug +1 -1
- package/web-client/views/upload.pug +125 -132
package/.travis.yml
CHANGED
package/README.md
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
GRNsight
|
|
2
2
|
========
|
|
3
3
|
[](https://zenodo.org/badge/latestdoi/16195791)
|
|
4
|
-
[](https://app.travis-ci.com/dondi/GRNsight)
|
|
5
|
+
[](https://coveralls.io/github/dondi/GRNsight?branch=beta)
|
|
6
6
|
|
|
7
7
|
http://dondi.github.io/GRNsight/
|
|
8
8
|
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
Here are the files pertaining to both the network and expression databases. Look within the README.md files of both folders for information pertinent to the schema that you intend to be using.
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# Network Database (Schema)
|
|
2
|
+
|
|
3
|
+
All files pertaining the network database live within this directory.
|
|
4
|
+
|
|
5
|
+
## The basics
|
|
6
|
+
|
|
7
|
+
### Schema
|
|
8
|
+
|
|
9
|
+
All network data is stored within the spring2022_network schema on our Postgres database.
|
|
10
|
+
|
|
11
|
+
The schema is located within this directory at the top level in the file `schema.sql`. It defines the tables located within the spring2022_network schema.
|
|
12
|
+
|
|
13
|
+
### Scripts
|
|
14
|
+
|
|
15
|
+
All scripts live within the subdirectory `scripts`, located in the top-level of the network database directory.
|
|
16
|
+
|
|
17
|
+
Any source files required to run the scripts live within the subdirectory `source-files`, located in the top-level of the network database directory. As source files may be large, you must create this directory yourself and add any source files you need to use there.
|
|
18
|
+
|
|
19
|
+
All generated results of the scripts live in the subdirectory `script-results`, located in the top-level of the network database directory. Currently, all scripts that generate code create the directory if it does not currently exist. When adding a new script that generates resulting code, best practice is to create the script-results directory and any subdirectories if it does not exist, in order to prevent errors and snafus for recently cloned repositories.
|
|
20
|
+
|
|
21
|
+
Within the scripts directory, there are the following files:
|
|
22
|
+
|
|
23
|
+
- `generate_network.py`
|
|
24
|
+
- `generate_sgd_network_from_yeastract_network.py`
|
|
25
|
+
- `loader.py`
|
|
26
|
+
- `filter_genes.py`
|
|
27
|
+
|
|
28
|
+
#### Network Generator (and data preprocessor)
|
|
29
|
+
|
|
30
|
+
This script (`generate_network.py`) is a two-for-one. It first uses the yeastmine service from the SGD database to query for all regulator genes relating to Saccharomyces cerevisiae. From there it gets all all of the targets for each regulator gene. We then construct two networks from these connections (a regulator by regulator matrix as well as a regulator by target matrix). We also construct the processed loader files, so that they are ready to load using `loader.py`.
|
|
31
|
+
|
|
32
|
+
The resulting network matrices are located in `script-results/networks` and the resulting processed loader files are located within `script-results/processed-loader-files`
|
|
33
|
+
|
|
34
|
+
Make sure to have all dependencies installed beforehand or you will recieve errors. (pip3 install intermine, tzlocal, etc. [see file for all imports])
|
|
35
|
+
|
|
36
|
+
Usage:
|
|
37
|
+
```
|
|
38
|
+
python3 generate_network.py
|
|
39
|
+
```
|
|
40
|
+
#### Generate an SGD network from a Yeastract network
|
|
41
|
+
|
|
42
|
+
This script takes a network (assumed to have data from Yeastract, but it can be any given network) and gives you a network with data queried from Yeastmine (SGD). It takes the regulators and targets from a given network file, then queries Yeastmine in order to get the regulatory connections between the genes. From there, it creates a new network using the data obtained from Yeastmine.
|
|
43
|
+
|
|
44
|
+
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
CREATE TABLE spring2022_network.source (
|
|
2
|
+
time_stamp TIMESTAMP,
|
|
3
|
+
source VARCHAR,
|
|
4
|
+
PRIMARY KEY(time_stamp, source)
|
|
5
|
+
);
|
|
6
|
+
|
|
7
|
+
CREATE TABLE spring2022_network.gene (
|
|
8
|
+
gene_id VARCHAR, -- systematic like name
|
|
9
|
+
display_gene_id VARCHAR, -- standard like name
|
|
10
|
+
species VARCHAR,
|
|
11
|
+
taxon_id VARCHAR,
|
|
12
|
+
regulator BOOLEAN,
|
|
13
|
+
PRIMARY KEY(gene_id, taxon_id)
|
|
14
|
+
);
|
|
15
|
+
CREATE TABLE spring2022_network.network (
|
|
16
|
+
regulator_gene_id VARCHAR,
|
|
17
|
+
target_gene_id VARCHAR,
|
|
18
|
+
taxon_id VARCHAR,
|
|
19
|
+
time_stamp TIMESTAMP,
|
|
20
|
+
source VARCHAR,
|
|
21
|
+
FOREIGN KEY (regulator_gene_id, taxon_id) REFERENCES spring2022_network.gene(gene_id, taxon_id),
|
|
22
|
+
FOREIGN KEY (target_gene_id, taxon_id) REFERENCES spring2022_network.gene(gene_id, taxon_id),
|
|
23
|
+
FOREIGN KEY (time_stamp, source) REFERENCES spring2022_network.source(time_stamp, source)
|
|
24
|
+
);
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
import psycopg2
|
|
2
|
+
import csv
|
|
3
|
+
import os
|
|
4
|
+
|
|
5
|
+
PROCESSED_GENES = "../script-results/processed-loader-files/gene.csv"
|
|
6
|
+
MISSING_GENE_DESTINATION = '../script-results/processed-loader-files/missing-genes.csv'
|
|
7
|
+
UPDATE_GENE_DESTINATION = '../script-results/processed-loader-files/update-genes.csv'
|
|
8
|
+
|
|
9
|
+
try:
|
|
10
|
+
connection = psycopg2.connect(user="postgres",
|
|
11
|
+
password="",
|
|
12
|
+
host="grnsight2.cfimp3lu6uob.us-west-1.rds.amazonaws.com",
|
|
13
|
+
port="5432",
|
|
14
|
+
database="postgres")
|
|
15
|
+
cursor = connection.cursor()
|
|
16
|
+
postgreSQL_select_Query = "select * from spring2022_network.gene"
|
|
17
|
+
|
|
18
|
+
cursor.execute(postgreSQL_select_Query)
|
|
19
|
+
print("Selecting rows from gene table using cursor.fetchall")
|
|
20
|
+
gene_records = cursor.fetchall()
|
|
21
|
+
|
|
22
|
+
db_genes = {}
|
|
23
|
+
missing_genes = {}
|
|
24
|
+
genes_to_update = {}
|
|
25
|
+
for gene in gene_records:
|
|
26
|
+
# key = (gene_id, taxon_id)
|
|
27
|
+
key = (gene[0], gene[3])
|
|
28
|
+
value = {"display_gene_id": gene[1], "species": gene[2], "regulator": gene[4]}
|
|
29
|
+
db_genes[key] = value
|
|
30
|
+
|
|
31
|
+
print(f'Processing file {PROCESSED_GENES}')
|
|
32
|
+
with open(PROCESSED_GENES, 'r+', encoding="UTF-8") as f:
|
|
33
|
+
i = 0
|
|
34
|
+
reader = csv.reader(f)
|
|
35
|
+
for row in reader:
|
|
36
|
+
if i != 0:
|
|
37
|
+
row = row[0].split('\t')
|
|
38
|
+
gene_id = row[0]
|
|
39
|
+
display_gene_id = row[1]
|
|
40
|
+
species = row[2]
|
|
41
|
+
taxon_id = row[3]
|
|
42
|
+
regulator = row[4]
|
|
43
|
+
key = (gene_id, taxon_id)
|
|
44
|
+
value = {"display_gene_id": display_gene_id , "species": species, "regulator": regulator}
|
|
45
|
+
if key not in db_genes:
|
|
46
|
+
missing_genes[key] = value
|
|
47
|
+
elif db_genes[key]["display_gene_id"] != display_gene_id:
|
|
48
|
+
# the display gene id got updated, so lets update our db to account for that
|
|
49
|
+
genes_to_update[key] = value
|
|
50
|
+
i+=1
|
|
51
|
+
|
|
52
|
+
print(f'Creating missing-genes.csv\n')
|
|
53
|
+
gene_file = open(MISSING_GENE_DESTINATION, 'w')
|
|
54
|
+
headers = f'Gene ID\tDisplay Gene ID\tSpecies\tTaxon ID\tRegulator'
|
|
55
|
+
gene_file.write(f'{headers}\n')
|
|
56
|
+
for gene in missing_genes:
|
|
57
|
+
gene_file.write(f'{gene[0]}\t{missing_genes[gene]["display_gene_id"]}\t{missing_genes[gene]["species"]}\t{gene[1]}\t{missing_genes[gene]["regulator"]}\n')
|
|
58
|
+
gene_file.close()
|
|
59
|
+
|
|
60
|
+
print(f'Creating update-genes.csv\n')
|
|
61
|
+
gene_file = open(UPDATE_GENE_DESTINATION, 'w')
|
|
62
|
+
headers = f'Gene ID\tDisplay Gene ID\tSpecies\tTaxon ID\tRegulator'
|
|
63
|
+
gene_file.write(f'{headers}\n')
|
|
64
|
+
for gene in genes_to_update:
|
|
65
|
+
gene_file.write(f'{gene[0]}\t{genes_to_update[gene]["display_gene_id"]}\t{genes_to_update[gene]["species"]}\t{gene[1]}\t{genes_to_update[gene]["regulator"]}\n')
|
|
66
|
+
gene_file.close()
|
|
67
|
+
|
|
68
|
+
except (Exception, psycopg2.Error) as error:
|
|
69
|
+
print("Error while fetching data from PostgreSQL", error)
|
|
70
|
+
|
|
71
|
+
finally:
|
|
72
|
+
# closing database connection.
|
|
73
|
+
if connection:
|
|
74
|
+
cursor.close()
|
|
75
|
+
connection.close()
|
|
76
|
+
print("PostgreSQL connection is closed")
|
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
from __future__ import print_function
|
|
2
|
+
|
|
3
|
+
from intermine.webservice import Service
|
|
4
|
+
service = Service("https://yeastmine.yeastgenome.org/yeastmine/service")
|
|
5
|
+
|
|
6
|
+
import csv
|
|
7
|
+
import re
|
|
8
|
+
import sys
|
|
9
|
+
import os
|
|
10
|
+
import datetime
|
|
11
|
+
import pytz
|
|
12
|
+
import tzlocal
|
|
13
|
+
|
|
14
|
+
# Get Network Data from Yeastmine
|
|
15
|
+
|
|
16
|
+
query = service.new_query("Gene")
|
|
17
|
+
|
|
18
|
+
query.add_view(
|
|
19
|
+
"primaryIdentifier", "secondaryIdentifier", "symbol", "name", "sgdAlias",
|
|
20
|
+
"regulationSummary.summaryParagraph",
|
|
21
|
+
"regulationSummary.publications.pubMedId",
|
|
22
|
+
"regulationSummary.publications.citation"
|
|
23
|
+
)
|
|
24
|
+
query.outerjoin("regulationSummary.publications")
|
|
25
|
+
|
|
26
|
+
regulators = {}
|
|
27
|
+
all_genes = {}
|
|
28
|
+
print("COLLECTING REGULATORS\n")
|
|
29
|
+
for row in query.rows():
|
|
30
|
+
systematic_name = row["secondaryIdentifier"]
|
|
31
|
+
standard_name = row["symbol"]
|
|
32
|
+
if standard_name == None:
|
|
33
|
+
standard_name = systematic_name
|
|
34
|
+
|
|
35
|
+
regulators[standard_name] = systematic_name
|
|
36
|
+
all_genes[standard_name] = systematic_name
|
|
37
|
+
|
|
38
|
+
regulators_to_targets = {}
|
|
39
|
+
all_targets = {}
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
print("COLLECTING TARGETS\n")
|
|
43
|
+
for regulator in regulators:
|
|
44
|
+
query = service.new_query("Gene")
|
|
45
|
+
query.add_constraint("regulatoryRegions", "TFBindingSite")
|
|
46
|
+
query.add_view(
|
|
47
|
+
"regulatoryRegions.regulator.symbol",
|
|
48
|
+
"regulatoryRegions.regulator.secondaryIdentifier", "symbol",
|
|
49
|
+
"secondaryIdentifier", "regulatoryRegions.regEvidence.ontologyTerm.name",
|
|
50
|
+
"regulatoryRegions.regEvidence.ontologyTerm.identifier",
|
|
51
|
+
"regulatoryRegions.experimentCondition",
|
|
52
|
+
"regulatoryRegions.strainBackground",
|
|
53
|
+
"regulatoryRegions.regulationDirection",
|
|
54
|
+
"regulatoryRegions.publications.pubMedId", "regulatoryRegions.datasource",
|
|
55
|
+
"regulatoryRegions.annotationType"
|
|
56
|
+
)
|
|
57
|
+
query.add_sort_order("Gene.secondaryIdentifier", "ASC")
|
|
58
|
+
query.add_constraint("regulatoryRegions.regulator", "LOOKUP", regulator, "S. cerevisiae", code="A")
|
|
59
|
+
targets = {}
|
|
60
|
+
|
|
61
|
+
for row in query.rows():
|
|
62
|
+
target_systematic_name = row["secondaryIdentifier"]
|
|
63
|
+
target_standard_name = row["symbol"]
|
|
64
|
+
if target_standard_name == None:
|
|
65
|
+
target_standard_name = target_systematic_name
|
|
66
|
+
targets[target_standard_name] = target_systematic_name
|
|
67
|
+
all_targets[target_standard_name] = target_systematic_name
|
|
68
|
+
all_genes[target_standard_name] = target_systematic_name
|
|
69
|
+
|
|
70
|
+
regulators_to_targets[regulator] = { "systematic_name": regulators[regulator], "targets": targets}
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def create_regulator_to_target_row(target, all_regulators):
|
|
75
|
+
result = "" + target
|
|
76
|
+
for regulator in all_regulators:
|
|
77
|
+
if target in all_regulators[regulator]["targets"]:
|
|
78
|
+
result += "\t" + "1"
|
|
79
|
+
else:
|
|
80
|
+
result += "\t" + "0"
|
|
81
|
+
return result
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
# Create files
|
|
85
|
+
|
|
86
|
+
# Create folder paths
|
|
87
|
+
if not os.path.exists('../script-results'):
|
|
88
|
+
os.makedirs('../script-results')
|
|
89
|
+
|
|
90
|
+
if not os.path.exists('../script-results/networks'):
|
|
91
|
+
os.makedirs('../script-results/networks')
|
|
92
|
+
|
|
93
|
+
if not os.path.exists('../script-results/processed-loader-files'):
|
|
94
|
+
os.makedirs('../script-results/processed-loader-files')
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
# Files to be generated
|
|
99
|
+
|
|
100
|
+
# Create Networks
|
|
101
|
+
|
|
102
|
+
REGULATORS_TO_TARGETS_MATRIX = '../script-results/networks/regulators_to_targets.csv'
|
|
103
|
+
REGULATORS_TO_REGULATORS_MATRIX = '../script-results/networks/regulators_to_regulators.csv'
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
targets = []
|
|
107
|
+
for target in all_targets:
|
|
108
|
+
if target != None:
|
|
109
|
+
targets.append(target)
|
|
110
|
+
|
|
111
|
+
regulators_list = []
|
|
112
|
+
for regulator in regulators_to_targets:
|
|
113
|
+
if regulator != None:
|
|
114
|
+
regulators_list.append(regulator)
|
|
115
|
+
|
|
116
|
+
print(f'Creating REGULATORS TO TARGETS MATRIX\n')
|
|
117
|
+
regulator_to_target_file = open(REGULATORS_TO_TARGETS_MATRIX, 'w')
|
|
118
|
+
headers = "cols regulators/rows targets"
|
|
119
|
+
headers += '\t'.join(regulators_list)
|
|
120
|
+
regulator_to_target_file.write(f'{headers}\n')
|
|
121
|
+
for target in targets:
|
|
122
|
+
result = create_regulator_to_target_row(target, regulators_to_targets)
|
|
123
|
+
if result != False:
|
|
124
|
+
regulator_to_target_file.write(f'{result}\n')
|
|
125
|
+
regulator_to_target_file.close()
|
|
126
|
+
|
|
127
|
+
print(f'Creating REGULATORS TO TARGETS MATRIX\n')
|
|
128
|
+
regulator_to_regulator_file = open(REGULATORS_TO_REGULATORS_MATRIX, 'w')
|
|
129
|
+
headers = "cols regulators/rows targets"
|
|
130
|
+
headers += '\t'.join(regulators_list)
|
|
131
|
+
regulator_to_regulator_file.write(f'{headers}\n')
|
|
132
|
+
for target in targets:
|
|
133
|
+
result = create_regulator_to_target_row(target, regulators_to_targets)
|
|
134
|
+
if result != False:
|
|
135
|
+
regulator_to_regulator_file.write(f'{result}\n')
|
|
136
|
+
regulator_to_regulator_file.close()
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
# Create loader-files
|
|
141
|
+
|
|
142
|
+
# Source Table
|
|
143
|
+
|
|
144
|
+
SOURCE_DESTINATION = '../script-results/processed-loader-files/source.csv'
|
|
145
|
+
dt = datetime.datetime.now()
|
|
146
|
+
|
|
147
|
+
year = dt.year
|
|
148
|
+
month = f'{dt.month}'
|
|
149
|
+
if len(month) == 1:
|
|
150
|
+
month = "0" + month
|
|
151
|
+
day = f'{dt.day}'
|
|
152
|
+
if len(day) == 1:
|
|
153
|
+
day = "0" + day
|
|
154
|
+
hour = dt.hour
|
|
155
|
+
minute = dt.minute
|
|
156
|
+
second = dt.second
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
timestamp = f'{year}-{month}-{day} {hour}:{minute}:{second}'
|
|
160
|
+
source = "YeastMine - Saccharomyces Genome Database"
|
|
161
|
+
|
|
162
|
+
source_file = open(SOURCE_DESTINATION, 'w')
|
|
163
|
+
headers = f'Timestamp\tSource\n{timestamp}\t{source}'
|
|
164
|
+
source_file.write(f'{headers}\n')
|
|
165
|
+
source_file.close()
|
|
166
|
+
|
|
167
|
+
# Gene Table
|
|
168
|
+
|
|
169
|
+
GENE_DESTINATION = '../script-results/processed-loader-files/gene.csv'
|
|
170
|
+
|
|
171
|
+
species = "Saccharomyces cerevisiae"
|
|
172
|
+
taxon_id = "559292"
|
|
173
|
+
|
|
174
|
+
print(f'Creating gene.csv\n')
|
|
175
|
+
gene_file = open(GENE_DESTINATION, 'w')
|
|
176
|
+
headers = f'Gene ID\tDisplay Gene ID\tSpecies\tTaxon ID\tRegulator'
|
|
177
|
+
gene_file.write(f'{headers}\n')
|
|
178
|
+
for gene in all_genes:
|
|
179
|
+
if gene in regulators:
|
|
180
|
+
gene_file.write(f'{all_genes[gene]}\t{gene}\t{species}\t{taxon_id}\ttrue\n')
|
|
181
|
+
else:
|
|
182
|
+
gene_file.write(f'{all_genes[gene]}\t{gene}\t{species}\t{taxon_id}\tfalse\n')
|
|
183
|
+
|
|
184
|
+
gene_file.close()
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
# Network Table
|
|
188
|
+
|
|
189
|
+
NETWORK_DESTINATION = '../script-results/processed-loader-files/network.csv'
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
print(f'Creating network.csv\n')
|
|
193
|
+
network_file = open(NETWORK_DESTINATION, 'w')
|
|
194
|
+
headers = f'Regulator Gene ID\tTarget Gene ID\tTaxon ID\tTimestamp\tSource'
|
|
195
|
+
network_file.write(f'{headers}\n')
|
|
196
|
+
for gene in regulators_to_targets:
|
|
197
|
+
for target_gene in regulators_to_targets[gene]["targets"]:
|
|
198
|
+
network_file.write(f'{regulators_to_targets[gene]["systematic_name"]}\t{regulators_to_targets[gene]["targets"][target_gene]}\t{taxon_id}\t{timestamp}\t{source}\n')
|
|
199
|
+
network_file.close()
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
from __future__ import print_function
|
|
2
|
+
|
|
3
|
+
from intermine.webservice import Service
|
|
4
|
+
service = Service("https://yeastmine.yeastgenome.org/yeastmine/service")
|
|
5
|
+
|
|
6
|
+
import csv
|
|
7
|
+
import os
|
|
8
|
+
import pandas as pd
|
|
9
|
+
|
|
10
|
+
# Extracting regulator and target genes from file
|
|
11
|
+
|
|
12
|
+
# Remember to make the source file folder and put your source files in there
|
|
13
|
+
YEASTRACT_NETWORK = "../source-files/Regulation_matrix_profile2.csv"
|
|
14
|
+
targets = []
|
|
15
|
+
regulators = []
|
|
16
|
+
print(f'Processing file {YEASTRACT_NETWORK}')
|
|
17
|
+
with open(YEASTRACT_NETWORK, 'r+', encoding="UTF-8") as f:
|
|
18
|
+
targets = []
|
|
19
|
+
regulators = []
|
|
20
|
+
i = 0
|
|
21
|
+
reader = csv.reader(f)
|
|
22
|
+
for row in reader:
|
|
23
|
+
if i == 0:
|
|
24
|
+
# we are getting the regulators
|
|
25
|
+
j = 0
|
|
26
|
+
x = row[0].split()
|
|
27
|
+
for regulator in x:
|
|
28
|
+
if j > 2:
|
|
29
|
+
regulators.append(regulator)
|
|
30
|
+
j += 1
|
|
31
|
+
else:
|
|
32
|
+
# we are getting the targets
|
|
33
|
+
targets.append(row[0].split()[0])
|
|
34
|
+
i+=1
|
|
35
|
+
|
|
36
|
+
print (targets)
|
|
37
|
+
print (regulators)
|
|
38
|
+
|
|
39
|
+
regulators_to_targets = {}
|
|
40
|
+
|
|
41
|
+
print("COLLECTING TARGETS\n")
|
|
42
|
+
for regulator in regulators:
|
|
43
|
+
query = service.new_query("Gene")
|
|
44
|
+
query.add_constraint("regulatoryRegions", "TFBindingSite")
|
|
45
|
+
query.add_view(
|
|
46
|
+
"regulatoryRegions.regulator.symbol",
|
|
47
|
+
"regulatoryRegions.regulator.secondaryIdentifier", "symbol",
|
|
48
|
+
"secondaryIdentifier", "regulatoryRegions.regEvidence.ontologyTerm.name",
|
|
49
|
+
"regulatoryRegions.regEvidence.ontologyTerm.identifier",
|
|
50
|
+
"regulatoryRegions.experimentCondition",
|
|
51
|
+
"regulatoryRegions.strainBackground",
|
|
52
|
+
"regulatoryRegions.regulationDirection",
|
|
53
|
+
"regulatoryRegions.publications.pubMedId", "regulatoryRegions.datasource",
|
|
54
|
+
"regulatoryRegions.annotationType"
|
|
55
|
+
)
|
|
56
|
+
query.add_sort_order("Gene.secondaryIdentifier", "ASC")
|
|
57
|
+
query.add_constraint("regulatoryRegions.regulator", "LOOKUP", regulator, "S. cerevisiae", code="A")
|
|
58
|
+
regulators_targets = []
|
|
59
|
+
|
|
60
|
+
for row in query.rows():
|
|
61
|
+
target_systematic_name = row["secondaryIdentifier"]
|
|
62
|
+
target_standard_name = row["symbol"]
|
|
63
|
+
if target_standard_name == None:
|
|
64
|
+
target_standard_name = target_systematic_name
|
|
65
|
+
if target_standard_name in targets:
|
|
66
|
+
regulators_targets.append(target_standard_name)
|
|
67
|
+
|
|
68
|
+
regulators_to_targets[regulator] = regulators_targets
|
|
69
|
+
|
|
70
|
+
print(regulators_to_targets)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
# We already have the regulator and target genes from Yeastract, now let's find the network
|
|
78
|
+
|
|
79
|
+
def create_regulator_to_target_row(target, all_regulators):
|
|
80
|
+
result = "" + target
|
|
81
|
+
for regulator in all_regulators:
|
|
82
|
+
if target in all_regulators[regulator]:
|
|
83
|
+
result += "\t" + "1"
|
|
84
|
+
else:
|
|
85
|
+
result += "\t" + "0"
|
|
86
|
+
return result
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
# Create files
|
|
90
|
+
|
|
91
|
+
# Create folder paths
|
|
92
|
+
if not os.path.exists('../script-results/yeastract-to-sgd-networks'):
|
|
93
|
+
os.makedirs('../script-results/yeastract-to-sgd-networks')
|
|
94
|
+
|
|
95
|
+
# Files to be generated
|
|
96
|
+
|
|
97
|
+
# Create Networks
|
|
98
|
+
|
|
99
|
+
SGD_MATRIX = '../script-results/yeastract-to-sgd-networks/SGD_Regulation_matrix_profile2.csv'
|
|
100
|
+
SGD_MATRIX_EXCEL = '../script-results/yeastract-to-sgd-networks/SGD_Regulation_matrix_profile2.xlsx'
|
|
101
|
+
|
|
102
|
+
print(f'Creating SGD MATRIX\n')
|
|
103
|
+
sgd_matrix_file = open(SGD_MATRIX, 'w')
|
|
104
|
+
headers = "cols regulators/rows targets\t"
|
|
105
|
+
headers += '\t'.join(regulators)
|
|
106
|
+
sgd_matrix_file.write(f'{headers}\n')
|
|
107
|
+
for target in targets:
|
|
108
|
+
result = create_regulator_to_target_row(target, regulators_to_targets)
|
|
109
|
+
if result != False:
|
|
110
|
+
sgd_matrix_file.write(f'{result}\n')
|
|
111
|
+
sgd_matrix_file.close()
|
|
112
|
+
|
|
113
|
+
# Reading the csv file
|
|
114
|
+
df_new = pd.read_csv(SGD_MATRIX, sep='\t')
|
|
115
|
+
|
|
116
|
+
# saving xlsx file
|
|
117
|
+
GFG = pd.ExcelWriter(SGD_MATRIX_EXCEL)
|
|
118
|
+
df_new.to_excel(GFG, sheet_name="network", index=False)
|
|
119
|
+
|
|
120
|
+
GFG.save()
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
import csv
|
|
2
|
+
import re
|
|
3
|
+
# Usage
|
|
4
|
+
# python3 loader.py | psql postgresql://localhost/postgres
|
|
5
|
+
"""
|
|
6
|
+
This program generates direct SQL statements from the source files in order
|
|
7
|
+
to populate a relational database with those files’ data.
|
|
8
|
+
|
|
9
|
+
By taking the approach of emitting SQL statements directly, we bypass the need to import
|
|
10
|
+
some kind of database library for the loading process, instead passing the statements
|
|
11
|
+
directly into a database command line utility such as `psql`.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
"""
|
|
15
|
+
This function Loads Network Data Sources into the database
|
|
16
|
+
"""
|
|
17
|
+
def LOAD_SOURCES():
|
|
18
|
+
print('COPY spring2022_network.source (time_stamp, source) FROM stdin;')
|
|
19
|
+
NETWORK_DATA_SOURCE = '../script-results/processed-loader-files/source.csv'
|
|
20
|
+
with open(NETWORK_DATA_SOURCE, 'r+') as f:
|
|
21
|
+
reader = csv.reader(f)
|
|
22
|
+
row_num = 0
|
|
23
|
+
for row in reader:
|
|
24
|
+
if row_num != 0:
|
|
25
|
+
r= ','.join(row).split('\t')
|
|
26
|
+
time_stamp = r[0]
|
|
27
|
+
source = r[1]
|
|
28
|
+
print(f'{time_stamp}\t{source}')
|
|
29
|
+
row_num += 1
|
|
30
|
+
print('\\.')
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
"""
|
|
34
|
+
This function Loads Gene ID Mapping into the database
|
|
35
|
+
"""
|
|
36
|
+
def LOAD_GENES():
|
|
37
|
+
print('COPY spring2022_network.gene (gene_id, display_gene_id, species, taxon_id, regulator) FROM stdin;')
|
|
38
|
+
GENE_SOURCE = '../script-results/processed-loader-files/gene.csv'
|
|
39
|
+
with open(GENE_SOURCE, 'r+') as f:
|
|
40
|
+
reader = csv.reader(f)
|
|
41
|
+
row_num = 0
|
|
42
|
+
for row in reader:
|
|
43
|
+
if row_num != 0:
|
|
44
|
+
r= ','.join(row).split('\t')
|
|
45
|
+
gene_id = r[0]
|
|
46
|
+
display_gene_id= r[1]
|
|
47
|
+
species = r[2]
|
|
48
|
+
taxon_id = r[3]
|
|
49
|
+
regulator = r[4]
|
|
50
|
+
print(f'{gene_id}\t{display_gene_id}\t{species}\t{taxon_id}\t{regulator}')
|
|
51
|
+
row_num += 1
|
|
52
|
+
print('\\.')
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
"""
|
|
56
|
+
This function Loads the Network Matrix into the database
|
|
57
|
+
"""
|
|
58
|
+
def LOAD_NETWORK():
|
|
59
|
+
print('COPY spring2022_network.network (regulator_gene_id, target_gene_id, taxon_id, time_stamp, source) FROM stdin;')
|
|
60
|
+
GENE_SOURCE = '../script-results/processed-loader-files/network.csv'
|
|
61
|
+
with open(GENE_SOURCE, 'r+') as f:
|
|
62
|
+
reader = csv.reader(f)
|
|
63
|
+
row_num = 0
|
|
64
|
+
for row in reader:
|
|
65
|
+
if row_num != 0:
|
|
66
|
+
r= ','.join(row).split('\t')
|
|
67
|
+
regulator_gene_id = r[0]
|
|
68
|
+
target_gene_id= r[1]
|
|
69
|
+
taxon_id = r[2]
|
|
70
|
+
time_stamp = r[3]
|
|
71
|
+
source = r[4]
|
|
72
|
+
print(f'{regulator_gene_id}\t{target_gene_id}\t{taxon_id}\t{time_stamp}\t{source}')
|
|
73
|
+
row_num += 1
|
|
74
|
+
print('\\.')
|
|
75
|
+
|
|
76
|
+
LOAD_SOURCES()
|
|
77
|
+
LOAD_GENES()
|
|
78
|
+
LOAD_NETWORK()
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "grnsight",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "6.0.0",
|
|
4
4
|
"description": "Web app and service for visualizing models of gene regulatory networks",
|
|
5
5
|
"directories": {
|
|
6
6
|
"test": "test"
|
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
"cors": "2.8.1",
|
|
12
12
|
"cytoscape": "2.7.14",
|
|
13
13
|
"d3-v4-grid": "2.0.1",
|
|
14
|
-
"dotenv": "
|
|
14
|
+
"dotenv": "8.2.0",
|
|
15
15
|
"errorhandler": "1.5.0",
|
|
16
16
|
"express": "4.16.0",
|
|
17
17
|
"fs": "0.0.1-security",
|
|
@@ -25,31 +25,31 @@
|
|
|
25
25
|
"node-xlsx": "0.7.2",
|
|
26
26
|
"nodemon": "1.19.1",
|
|
27
27
|
"parallelshell": "3.0.1",
|
|
28
|
-
"pg": "
|
|
29
|
-
"pg-hstore": "
|
|
30
|
-
"pug": "
|
|
31
|
-
"querystring": "
|
|
28
|
+
"pg": "8.0.0",
|
|
29
|
+
"pg-hstore": "2.3.3",
|
|
30
|
+
"pug": "3.0.2",
|
|
31
|
+
"querystring": "0.2.0",
|
|
32
32
|
"save-svg-as-png": "1.4.14",
|
|
33
|
-
"sequelize": "
|
|
33
|
+
"sequelize": "5.21.6",
|
|
34
34
|
"serve-static": "1.13.1",
|
|
35
35
|
"should": "11.2.0",
|
|
36
|
-
"stylus": "
|
|
36
|
+
"stylus": "0.54.5",
|
|
37
37
|
"supertest": "3.0.0",
|
|
38
|
-
"url": "
|
|
38
|
+
"url": "0.11.0",
|
|
39
39
|
"webpack": "4.0.0",
|
|
40
40
|
"xml2js": "0.4.17",
|
|
41
41
|
"xmlbuilder": "8.2.2"
|
|
42
42
|
},
|
|
43
43
|
"devDependencies": {
|
|
44
|
-
"browser-env": "
|
|
44
|
+
"browser-env": "3.3.0",
|
|
45
45
|
"chai": "4.1.2",
|
|
46
46
|
"coveralls": "2.13.1",
|
|
47
47
|
"eslint": "3.19.0",
|
|
48
48
|
"isomorphic-fetch": "2.2.1",
|
|
49
49
|
"istanbul": "0.4.5",
|
|
50
|
-
"jquery": "
|
|
50
|
+
"jquery": "3.6.0",
|
|
51
51
|
"jquery-extend": "2.0.3",
|
|
52
|
-
"jsdom": "
|
|
52
|
+
"jsdom": "13.2.0",
|
|
53
53
|
"markdown-pdf": "8.0.0",
|
|
54
54
|
"mocha": "2.5.3",
|
|
55
55
|
"mocha-lcov-reporter": "1.3.0",
|
package/server/app.js
CHANGED
|
@@ -33,7 +33,9 @@ require(__dirname + "/controllers/export-controller")(app);
|
|
|
33
33
|
require(__dirname + "/controllers/import-controller")(app);
|
|
34
34
|
require(__dirname + "/controllers/ga-controller")(app);
|
|
35
35
|
require(__dirname + "/controllers/api-controllers")(app);
|
|
36
|
-
require(__dirname + "/controllers/database-controller")(app);
|
|
36
|
+
require(__dirname + "/controllers/expression-database-controller")(app);
|
|
37
|
+
require(__dirname + "/controllers/network-database-controller")(app);
|
|
38
|
+
require(__dirname + "/controllers/custom-workbook-controller")(app);
|
|
37
39
|
|
|
38
40
|
// Don"t start the server if this app is run as a child process.
|
|
39
41
|
if (!module.parent) {
|