grnsight 6.0.7 → 7.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.yml +4 -4
- package/.github/workflows/node.js.yml +35 -0
- package/README.md +1 -1
- package/database/README.md +218 -97
- package/database/constants.py +42 -0
- package/database/filter_update.py +168 -0
- package/database/grnsettings-database/README.md +52 -0
- package/database/grnsettings-database/schema.sql +4 -0
- package/database/loader.py +30 -0
- package/database/loader_update.py +36 -0
- package/database/network-database/scripts/generate_network.py +15 -23
- package/database/network-database/scripts/generate_new_network_version.py +17 -24
- package/database/protein-protein-database/README.md +71 -0
- package/database/protein-protein-database/schema.sql +37 -0
- package/database/protein-protein-database/scripts/generate_protein_network.py +227 -0
- package/database/protein-protein-database/scripts/remove_duplicates.sh +4 -0
- package/database/utils.py +418 -0
- package/package.json +3 -2
- package/server/app.js +2 -0
- package/server/config/config.js +4 -4
- package/server/controllers/additional-sheet-parser.js +2 -1
- package/server/controllers/constants.js +5 -0
- package/server/controllers/custom-workbook-controller.js +4 -3
- package/server/controllers/demo-workbooks.js +1462 -6
- package/server/controllers/export-constants.js +3 -2
- package/server/controllers/exporters/sif.js +6 -1
- package/server/controllers/exporters/xlsx.js +8 -3
- package/server/controllers/expression-sheet-parser.js +0 -6
- package/server/controllers/grnsettings-database-controller.js +17 -0
- package/server/controllers/importers/sif.js +30 -11
- package/server/controllers/network-database-controller.js +2 -2
- package/server/controllers/network-sheet-parser.js +54 -12
- package/server/controllers/protein-database-controller.js +18 -0
- package/server/controllers/sif-constants.js +11 -4
- package/server/controllers/spreadsheet-controller.js +44 -1
- package/server/controllers/workbook-constants.js +21 -4
- package/server/dals/expression-dal.js +4 -4
- package/server/dals/grnsetting-dal.js +49 -0
- package/server/dals/network-dal.js +14 -15
- package/server/dals/protein-dal.js +106 -0
- package/test/additional-sheet-parser-tests.js +1 -1
- package/test/export-tests.js +136 -9
- package/test/import-sif-tests.js +67 -13
- package/test/test.js +1 -1
- package/test-files/additional-sheet-test-files/optimization-parameters-default.xlsx +0 -0
- package/test-files/demo-files/18_proteins_81_edges_PPI.xlsx +0 -0
- package/test-files/expression-data-test-sheets/expression_sheet_missing_data_ok_export_exact.xlsx +0 -0
- package/web-client/config/config.js +4 -4
- package/web-client/public/js/api/grnsight-api.js +18 -3
- package/web-client/public/js/constants.js +27 -12
- package/web-client/public/js/generateNetwork.js +170 -72
- package/web-client/public/js/graph.js +424 -161
- package/web-client/public/js/grnsight.js +25 -4
- package/web-client/public/js/grnstate.js +4 -1
- package/web-client/public/js/iframe-coordination.js +3 -3
- package/web-client/public/js/setup-handlers.js +76 -61
- package/web-client/public/js/setup-load-and-import-handlers.js +32 -7
- package/web-client/public/js/update-app.js +119 -28
- package/web-client/public/js/upload.js +142 -85
- package/web-client/public/js/warnings.js +25 -0
- package/web-client/public/lib/bootstrap.file-input/bootstrap.file-input.js +0 -1
- package/web-client/public/stylesheets/grnsight.styl +40 -16
- package/web-client/views/components/demo.pug +7 -5
- package/web-client/views/upload.pug +64 -50
- package/database/network-database/scripts/filter_genes.py +0 -76
- package/database/network-database/scripts/loader.py +0 -79
- package/database/network-database/scripts/loader_updates.py +0 -99
|
@@ -0,0 +1,418 @@
|
|
|
1
|
+
import csv
|
|
2
|
+
import os
|
|
3
|
+
|
|
4
|
+
class Utils:
|
|
5
|
+
"""
|
|
6
|
+
A class to define utility functions. The class contains functions to load sources,
|
|
7
|
+
genes, proteins and network data into the database. These functions generate direct
|
|
8
|
+
SQL statements from the source files in order to populate a relational database with
|
|
9
|
+
those files’ data.
|
|
10
|
+
|
|
11
|
+
By taking the approach of emitting SQL statements directly, we bypass the need to import
|
|
12
|
+
some kind of database library for the loading process, instead passing the statements
|
|
13
|
+
directly into a database command line utility such as `psql`.
|
|
14
|
+
|
|
15
|
+
...
|
|
16
|
+
Attributes
|
|
17
|
+
----------
|
|
18
|
+
|
|
19
|
+
Methods
|
|
20
|
+
----------
|
|
21
|
+
load_sources(source_path: str, database_namespace: str)
|
|
22
|
+
Load Sources into the database
|
|
23
|
+
load_genes(gene_path: str, database_namespace: str, is_protein: bool)
|
|
24
|
+
Load Gene ID Mapping into the database
|
|
25
|
+
load_proteins(protein_path: str, database_namespace: str)
|
|
26
|
+
Load Protein ID Mapping into the database
|
|
27
|
+
load_network(network_source_path: str, database_namespace: str, is_protein: bool)
|
|
28
|
+
Load Network Matrix into the database
|
|
29
|
+
update_genes(update_gene_path: str, database_namespace: str, is_protein: bool)
|
|
30
|
+
Update Gene ID Mapping into the database
|
|
31
|
+
update_ppi_proteins(update_protein_path: str, database_namespace: str)
|
|
32
|
+
Update Protein ID Mapping into the database
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
@classmethod
|
|
36
|
+
def load_sources(cls, source_path: str, database_namespace: str):
|
|
37
|
+
"""
|
|
38
|
+
Load Sources (time_stamp, source, display_name) into the database using the COPY command
|
|
39
|
+
|
|
40
|
+
Parameters
|
|
41
|
+
----------
|
|
42
|
+
source_path : str
|
|
43
|
+
The path to the file containing the sources that want to add to the database
|
|
44
|
+
database_namespace : str
|
|
45
|
+
The database namespace i.e the schema name where the sources will be loaded
|
|
46
|
+
"""
|
|
47
|
+
print(f'COPY {database_namespace}.source (time_stamp, source, display_name) FROM stdin;')
|
|
48
|
+
with open(source_path, 'r+') as f:
|
|
49
|
+
reader = csv.reader(f)
|
|
50
|
+
row_num = 0
|
|
51
|
+
for row in reader:
|
|
52
|
+
if row_num != 0:
|
|
53
|
+
r= ','.join(row).split('\t')
|
|
54
|
+
time_stamp = r[0]
|
|
55
|
+
source = r[1]
|
|
56
|
+
display_name = r[2]
|
|
57
|
+
print(f'{time_stamp}\t{source}\t{display_name}')
|
|
58
|
+
row_num += 1
|
|
59
|
+
print('\\.')
|
|
60
|
+
|
|
61
|
+
@classmethod
|
|
62
|
+
def load_grn_genes(cls, gene_path: str, database_namespace: str):
|
|
63
|
+
cls._load_genes(gene_path, database_namespace, is_grn = True)
|
|
64
|
+
|
|
65
|
+
@classmethod
|
|
66
|
+
def load_ppi_genes(cls, gene_path: str, database_namespace: str):
|
|
67
|
+
cls._load_genes(gene_path, database_namespace, is_grn = False)
|
|
68
|
+
|
|
69
|
+
@classmethod
|
|
70
|
+
def load_expression_genes(cls, gene_path: str, database_namespace: str):
|
|
71
|
+
cls._load_genes(gene_path, database_namespace, is_grn = False)
|
|
72
|
+
|
|
73
|
+
@classmethod
|
|
74
|
+
def _load_genes(cls, gene_path: str, database_namespace: str, is_grn: bool):
|
|
75
|
+
"""
|
|
76
|
+
Load Gene ID Mapping into the database using the COPY command
|
|
77
|
+
|
|
78
|
+
Parameters
|
|
79
|
+
----------
|
|
80
|
+
gene_path : str
|
|
81
|
+
The path to the file containing the gene data that want to add to the database
|
|
82
|
+
database_namespace: str
|
|
83
|
+
The database namespace i.e the schema name where the gene data will be loaded
|
|
84
|
+
is_grn : bool
|
|
85
|
+
A boolean value to check if the schema is for gene_regulatory_network
|
|
86
|
+
"""
|
|
87
|
+
if is_grn:
|
|
88
|
+
print(f'COPY {database_namespace}.gene (gene_id, display_gene_id, species, taxon_id, regulator) FROM stdin;')
|
|
89
|
+
else:
|
|
90
|
+
print(f'COPY {database_namespace}.gene (gene_id, display_gene_id, species, taxon_id) FROM stdin;')
|
|
91
|
+
with open(gene_path, 'r+') as f:
|
|
92
|
+
reader = csv.reader(f)
|
|
93
|
+
row_num = 0
|
|
94
|
+
for row in reader:
|
|
95
|
+
if row_num != 0:
|
|
96
|
+
r= ','.join(row).split('\t')
|
|
97
|
+
gene_id = r[0]
|
|
98
|
+
display_gene_id= r[1]
|
|
99
|
+
species = r[2]
|
|
100
|
+
taxon_id = r[3]
|
|
101
|
+
if is_grn:
|
|
102
|
+
regulator = r[4]
|
|
103
|
+
print(f'{gene_id}\t{display_gene_id}\t{species}\t{taxon_id}\t{regulator}')
|
|
104
|
+
else:
|
|
105
|
+
print(f'{gene_id}\t{display_gene_id}\t{species}\t{taxon_id}')
|
|
106
|
+
row_num += 1
|
|
107
|
+
print('\\.')
|
|
108
|
+
|
|
109
|
+
@classmethod
|
|
110
|
+
def load_proteins(cls, protein_path: str, database_namespace: str):
|
|
111
|
+
"""
|
|
112
|
+
Load Protein ID Mapping into the database using the COPY command
|
|
113
|
+
|
|
114
|
+
Parameters
|
|
115
|
+
----------
|
|
116
|
+
protein_path : str
|
|
117
|
+
The path to the file containing the protein data that want to add to the database
|
|
118
|
+
database_namespace : str
|
|
119
|
+
The database namespace i.e the schema name where the protein data will be loaded
|
|
120
|
+
"""
|
|
121
|
+
print(f'COPY {database_namespace}.protein (standard_name, gene_systematic_name, length, molecular_weight, PI, taxon_id) FROM stdin;')
|
|
122
|
+
with open(protein_path, 'r+') as f:
|
|
123
|
+
reader = csv.reader(f)
|
|
124
|
+
row_num = 0
|
|
125
|
+
for row in reader:
|
|
126
|
+
if row_num != 0:
|
|
127
|
+
r= ','.join(row).split('\t')
|
|
128
|
+
standard_name = r[0]
|
|
129
|
+
gene_name= r[1]
|
|
130
|
+
length = r[2] if r[2] != "None" else 0
|
|
131
|
+
molecular_weight = r[3] if r[3] != "None" else 0
|
|
132
|
+
pi = r[4] if r[4] != "None" else 0
|
|
133
|
+
taxon_id = r[5]
|
|
134
|
+
print(f'{standard_name}\t{gene_name}\t{length}\t{molecular_weight}\t{pi}\t{taxon_id}')
|
|
135
|
+
row_num += 1
|
|
136
|
+
print('\\.')
|
|
137
|
+
|
|
138
|
+
@classmethod
|
|
139
|
+
def load_ppi_network(cls, network_source_path: str, database_namespace: str):
|
|
140
|
+
cls._load_network(network_source_path, database_namespace, is_protein = True)
|
|
141
|
+
|
|
142
|
+
@classmethod
|
|
143
|
+
def load_grn_network(cls, network_source_path: str, database_namespace: str):
|
|
144
|
+
cls._load_network(network_source_path, database_namespace, is_protein = False)
|
|
145
|
+
|
|
146
|
+
@classmethod
|
|
147
|
+
def _load_network(cls, network_source_path: str, database_namespace: str, is_protein: bool):
|
|
148
|
+
"""
|
|
149
|
+
Load Network Matrix into the database using the COPY command
|
|
150
|
+
|
|
151
|
+
Parameters
|
|
152
|
+
----------
|
|
153
|
+
network_source_path : str
|
|
154
|
+
The path to the file containing the network data that want to add to the database
|
|
155
|
+
database_namespace : str
|
|
156
|
+
The database namespace i.e the schema name where the network data will be loaded
|
|
157
|
+
is_protein : bool
|
|
158
|
+
A boolean value to check if the schema is for protein_protein_interactions or gene_regulatory_network
|
|
159
|
+
"""
|
|
160
|
+
if is_protein:
|
|
161
|
+
print(f'COPY {database_namespace}.physical_interactions (protein1, protein2, interaction_detection_methods_identifier, experiment_name, time_stamp, source) FROM stdin;');
|
|
162
|
+
else:
|
|
163
|
+
print(f'COPY {database_namespace}.network (regulator_gene_id, target_gene_id, taxon_id, time_stamp, source) FROM stdin;')
|
|
164
|
+
with open(network_source_path, 'r+') as f:
|
|
165
|
+
reader = csv.reader(f)
|
|
166
|
+
row_num = 0
|
|
167
|
+
for row in reader:
|
|
168
|
+
if row_num != 0:
|
|
169
|
+
r= ','.join(row).split('\t')
|
|
170
|
+
if is_protein:
|
|
171
|
+
protein1 = r[0]
|
|
172
|
+
protein2= r[1]
|
|
173
|
+
idmi = r[2]
|
|
174
|
+
exp_name = r[3]
|
|
175
|
+
timestamp = r[4]
|
|
176
|
+
source = r[5]
|
|
177
|
+
print(f'{protein1}\t{protein2}\t{idmi}\t{exp_name}\t{timestamp}\t{source}')
|
|
178
|
+
else:
|
|
179
|
+
regulator_gene_id = r[0]
|
|
180
|
+
target_gene_id= r[1]
|
|
181
|
+
taxon_id = r[2]
|
|
182
|
+
time_stamp = r[3]
|
|
183
|
+
source = r[4]
|
|
184
|
+
print(f'{regulator_gene_id}\t{target_gene_id}\t{taxon_id}\t{time_stamp}\t{source}')
|
|
185
|
+
row_num += 1
|
|
186
|
+
print('\\.')
|
|
187
|
+
|
|
188
|
+
@classmethod
|
|
189
|
+
def update_grn_genes(cls, gene_path: str, database_namespace: str):
|
|
190
|
+
cls._update_genes(gene_path, database_namespace, is_protein = False)
|
|
191
|
+
|
|
192
|
+
@classmethod
|
|
193
|
+
def update_ppi_genes(cls, gene_path: str, database_namespace: str):
|
|
194
|
+
cls._update_genes(gene_path, database_namespace, is_protein = True)
|
|
195
|
+
|
|
196
|
+
@classmethod
|
|
197
|
+
def update_expression_genes(cls, gene_path: str, database_namespace: str):
|
|
198
|
+
cls._update_genes(gene_path, database_namespace, is_protein = False)
|
|
199
|
+
|
|
200
|
+
@classmethod
|
|
201
|
+
def _update_genes(cls, update_gene_path: str, database_namespace: str, is_protein: bool):
|
|
202
|
+
"""
|
|
203
|
+
Update Gene ID Mapping into the database
|
|
204
|
+
|
|
205
|
+
Parameters
|
|
206
|
+
----------
|
|
207
|
+
update_gene_path : str
|
|
208
|
+
The path to the file containing the gene data that want to update in the database
|
|
209
|
+
database_namespace : str
|
|
210
|
+
The database namespace i.e the schema name where the gene data will be updated
|
|
211
|
+
is_protein : bool
|
|
212
|
+
A boolean value to check if the schema is for protein_protein_interactions or gene_regulatory_network
|
|
213
|
+
"""
|
|
214
|
+
print('BEGIN;')
|
|
215
|
+
with open(update_gene_path, 'r+') as f:
|
|
216
|
+
reader = csv.reader(f)
|
|
217
|
+
row_num = 0
|
|
218
|
+
for row in reader:
|
|
219
|
+
if row_num != 0:
|
|
220
|
+
r= ','.join(row).split('\t')
|
|
221
|
+
gene_id = r[0]
|
|
222
|
+
display_gene_id= r[1]
|
|
223
|
+
if is_protein:
|
|
224
|
+
print(f"UPDATE {database_namespace}.gene\nSET display_gene_id = '{display_gene_id}'\nWHERE gene_id = '{gene_id}';")
|
|
225
|
+
else:
|
|
226
|
+
regulator = r[4]
|
|
227
|
+
print(f"UPDATE {database_namespace}.gene\nSET display_gene_id = '{display_gene_id}', regulator={regulator}\nWHERE gene_id = '{gene_id}';")
|
|
228
|
+
row_num += 1
|
|
229
|
+
print('COMMIT;')
|
|
230
|
+
|
|
231
|
+
@classmethod
|
|
232
|
+
def update_ppi_proteins(cls, update_protein_path: str, database_namespace: str):
|
|
233
|
+
"""
|
|
234
|
+
Update Protein ID Mapping into the database
|
|
235
|
+
|
|
236
|
+
Args:
|
|
237
|
+
update_protein_path : str
|
|
238
|
+
The path to the file containing the protein data that want to update in the database
|
|
239
|
+
database_namespace : str
|
|
240
|
+
The database namespace i.e the schema name where the protein data will be updated
|
|
241
|
+
"""
|
|
242
|
+
print('BEGIN;')
|
|
243
|
+
with open(update_protein_path, 'r+') as f:
|
|
244
|
+
reader = csv.reader(f)
|
|
245
|
+
row_num = 0
|
|
246
|
+
for row in reader:
|
|
247
|
+
if row_num != 0:
|
|
248
|
+
r= ','.join(row).split('\t')
|
|
249
|
+
standard_name = r[0]
|
|
250
|
+
gene_name= r[1]
|
|
251
|
+
length = r[2] if r[2] != "None" else 0
|
|
252
|
+
molecular_weight = r[3] if r[3] != "None" else 0
|
|
253
|
+
pi = r[4] if r[4] != "None" else 0
|
|
254
|
+
print(f"UPDATE {database_namespace}.protein\nSET standard_name = '{standard_name}', length = {length}, molecular_weight = {molecular_weight}, PI = {pi}\nWHERE gene_systematic_name = '{gene_name}';")
|
|
255
|
+
row_num += 1
|
|
256
|
+
print('COMMIT;')
|
|
257
|
+
|
|
258
|
+
# The following functions are for expression database specifically
|
|
259
|
+
"""
|
|
260
|
+
This program Loads Refs into the database
|
|
261
|
+
"""
|
|
262
|
+
@classmethod
|
|
263
|
+
def load_refs(cls, refs_path: str, database_namespace: str):
|
|
264
|
+
print(f'COPY {database_namespace}.ref (pubmed_id, authors, publication_year, title, doi, ncbi_geo_id) FROM stdin;')
|
|
265
|
+
with open(refs_path, 'r+') as f:
|
|
266
|
+
reader = csv.reader(f)
|
|
267
|
+
row_num = 0
|
|
268
|
+
for row in reader:
|
|
269
|
+
if row_num != 0:
|
|
270
|
+
r= ','.join(row).split('\t')
|
|
271
|
+
pubmed_id = r[0]
|
|
272
|
+
authors = r[1]
|
|
273
|
+
publication_year = r[2]
|
|
274
|
+
title = r[3]
|
|
275
|
+
doi = r[4]
|
|
276
|
+
ncbi_geo_id = r[5]
|
|
277
|
+
print(f'{pubmed_id}\t{authors}\t{publication_year}\t{title}\t{doi}\t{ncbi_geo_id}')
|
|
278
|
+
row_num += 1
|
|
279
|
+
print('\\.')
|
|
280
|
+
|
|
281
|
+
"""
|
|
282
|
+
This program Loads Expression Metadata into the database
|
|
283
|
+
"""
|
|
284
|
+
@classmethod
|
|
285
|
+
def load_expression_metadata(cls, expression_metadata_path: str, database_namespace: str):
|
|
286
|
+
print(f'COPY {database_namespace}.expression_metadata (ncbi_geo_id, pubmed_id, control_yeast_strain, treatment_yeast_strain, control, treatment, concentration_value, concentration_unit, time_value, time_unit, number_of_replicates, expression_table) FROM stdin;')
|
|
287
|
+
with open(expression_metadata_path, 'r+') as f:
|
|
288
|
+
reader = csv.reader(f)
|
|
289
|
+
row_num = 0
|
|
290
|
+
for row in reader:
|
|
291
|
+
if row_num != 0:
|
|
292
|
+
r= ','.join(row).split('\t')
|
|
293
|
+
ncbi_geo_id = r[0]
|
|
294
|
+
pubmed_id =r[1]
|
|
295
|
+
control_yeast_strain = r[2]
|
|
296
|
+
treatment_yeast_strain = r[3]
|
|
297
|
+
control = r[4]
|
|
298
|
+
treatment = r[5]
|
|
299
|
+
concentration_value = float(r[6])
|
|
300
|
+
concentration_unit = r[7]
|
|
301
|
+
time_value = float(r[8])
|
|
302
|
+
time_unit = r[9]
|
|
303
|
+
number_of_replicates = int(r[10])
|
|
304
|
+
expression_table = r[11]
|
|
305
|
+
|
|
306
|
+
print(f'{ncbi_geo_id}\t{pubmed_id}\t{control_yeast_strain}\t{treatment_yeast_strain}\t{control}\t{treatment}\t{concentration_value}\t{concentration_unit}\t{time_value}\t{time_unit}\t{number_of_replicates}\t{expression_table}')
|
|
307
|
+
row_num += 1
|
|
308
|
+
print('\\.')
|
|
309
|
+
"""
|
|
310
|
+
This program Loads Expression Data into the database
|
|
311
|
+
"""
|
|
312
|
+
@classmethod
|
|
313
|
+
def load_expression_data(cls, expression_data_path: str, database_namespace: str):
|
|
314
|
+
print(f'COPY {database_namespace}.expression (gene_id, taxon_id, sort_index, sample_id, expression, time_point, dataset) FROM stdin;')
|
|
315
|
+
with open(expression_data_path, 'r+') as f:
|
|
316
|
+
reader = csv.reader(f)
|
|
317
|
+
row_num = 0
|
|
318
|
+
for row in reader:
|
|
319
|
+
if row_num != 0:
|
|
320
|
+
r= ','.join(row).split('\t')
|
|
321
|
+
gene_id = r[0]
|
|
322
|
+
taxon_id = r[1]
|
|
323
|
+
sort_index = int(r[2])
|
|
324
|
+
sample_id = r[3]
|
|
325
|
+
expression = float(r[4]) if r[4] != "" else "NaN"
|
|
326
|
+
|
|
327
|
+
time_point = float(r[5])
|
|
328
|
+
data_set = r[6]
|
|
329
|
+
print(f'{gene_id}\t{taxon_id}\t{sort_index}\t{sample_id}\t{expression}\t{time_point}\t{data_set}')
|
|
330
|
+
row_num += 1
|
|
331
|
+
print('\\.')
|
|
332
|
+
|
|
333
|
+
"""
|
|
334
|
+
This program Loads Production Rates into the database
|
|
335
|
+
"""
|
|
336
|
+
@classmethod
|
|
337
|
+
def load_production_rates(cls, production_rates_path: str, database_namespace: str):
|
|
338
|
+
print(f'COPY {database_namespace}.production_rate (gene_id, taxon_id, ncbi_geo_id, pubmed_id, production_rate) FROM stdin;')
|
|
339
|
+
with open(production_rates_path, 'r+') as f:
|
|
340
|
+
reader = csv.reader(f)
|
|
341
|
+
row_num = 0
|
|
342
|
+
for row in reader:
|
|
343
|
+
if row_num != 0:
|
|
344
|
+
r= ','.join(row).split('\t')
|
|
345
|
+
gene_id = r[0]
|
|
346
|
+
taxon_id = r[1]
|
|
347
|
+
ncbi_geo_id = r[2]
|
|
348
|
+
pubmed_id = r[3]
|
|
349
|
+
production_rate = float(r[4]) if r[4] != "" else "NaN"
|
|
350
|
+
print(f'{gene_id}\t{taxon_id}\t{ncbi_geo_id}\t{pubmed_id}\t{production_rate}')
|
|
351
|
+
row_num += 1
|
|
352
|
+
print('\\.')
|
|
353
|
+
|
|
354
|
+
"""
|
|
355
|
+
This program Loads Degradation Rates into the database
|
|
356
|
+
"""
|
|
357
|
+
@classmethod
|
|
358
|
+
def load_degradation_rates(cls, degradation_rates_path: str, database_namespace: str):
|
|
359
|
+
print(f'COPY {database_namespace}.degradation_rate (gene_id, taxon_id, ncbi_geo_id, pubmed_id, degradation_rate) FROM stdin;')
|
|
360
|
+
with open(degradation_rates_path, 'r+') as f:
|
|
361
|
+
reader = csv.reader(f)
|
|
362
|
+
row_num = 0
|
|
363
|
+
for row in reader:
|
|
364
|
+
if row_num != 0:
|
|
365
|
+
r= ','.join(row).split('\t')
|
|
366
|
+
gene_id = r[0]
|
|
367
|
+
taxon_id = r[1]
|
|
368
|
+
ncbi_geo_id = r[2]
|
|
369
|
+
pubmed_id = r[3]
|
|
370
|
+
degradation_rate = float(r[4]) if r[4] != "" else "NaN"
|
|
371
|
+
print(f'{gene_id}\t{taxon_id}\t{ncbi_geo_id}\t{pubmed_id}\t{degradation_rate}')
|
|
372
|
+
row_num += 1
|
|
373
|
+
print('\\.')
|
|
374
|
+
|
|
375
|
+
@classmethod
|
|
376
|
+
def _read_gene_info_from_csv(cls, file_path):
|
|
377
|
+
gene_info = {}
|
|
378
|
+
with open(file_path, 'r') as file:
|
|
379
|
+
reader = csv.reader(file)
|
|
380
|
+
row_num = 0
|
|
381
|
+
for row in reader:
|
|
382
|
+
if row_num != 0:
|
|
383
|
+
r= ','.join(row).split('\t')
|
|
384
|
+
if len(r) == 4:
|
|
385
|
+
r.append('False')
|
|
386
|
+
if r[1] == 'None':
|
|
387
|
+
r[1] = r[0]
|
|
388
|
+
gene_info[r[0]] = {
|
|
389
|
+
'Display Gene ID': r[1],
|
|
390
|
+
'Species': r[2],
|
|
391
|
+
'Taxon': r[3],
|
|
392
|
+
'Regulator': r[4]
|
|
393
|
+
}
|
|
394
|
+
row_num += 1
|
|
395
|
+
return gene_info
|
|
396
|
+
|
|
397
|
+
@classmethod
|
|
398
|
+
def create_union_file(cls, file_paths, output_file_path):
|
|
399
|
+
"""
|
|
400
|
+
Create a union file containing all unique genes from all input files.
|
|
401
|
+
|
|
402
|
+
Args:
|
|
403
|
+
file_paths (List[str]): a list of file paths to read genes from (should list network genes file path at the end) because it can overwrite the regulator status of the gene
|
|
404
|
+
output_file_path (str): the file path to write the union genes to
|
|
405
|
+
"""
|
|
406
|
+
all_gene_info = {}
|
|
407
|
+
|
|
408
|
+
for file_path in file_paths:
|
|
409
|
+
gene_info = cls._read_gene_info_from_csv(file_path)
|
|
410
|
+
all_gene_info.update(gene_info)
|
|
411
|
+
|
|
412
|
+
with open(output_file_path, 'w', newline='') as union_file:
|
|
413
|
+
headers = ['Gene ID', 'Display Gene ID', 'Species', 'Taxon', 'Regulator']
|
|
414
|
+
union_file.write('\t'.join(headers) + '\n')
|
|
415
|
+
|
|
416
|
+
for gene_id, gene_info in all_gene_info.items():
|
|
417
|
+
row_data = [gene_id, gene_info['Display Gene ID'], gene_info['Species'], gene_info['Taxon'], gene_info['Regulator']]
|
|
418
|
+
union_file.write('\t'.join(row_data) + '\n')
|
package/package.json
CHANGED
|
@@ -1,13 +1,14 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "grnsight",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "7.2.0",
|
|
4
4
|
"description": "Web app and service for visualizing models of gene regulatory networks",
|
|
5
5
|
"directories": {
|
|
6
6
|
"test": "test"
|
|
7
7
|
},
|
|
8
8
|
"dependencies": {
|
|
9
9
|
"body-parser": "1.18.2",
|
|
10
|
-
"
|
|
10
|
+
"canvas": "^2.11.2",
|
|
11
|
+
"canvg": "3.0.10",
|
|
11
12
|
"cors": "2.8.1",
|
|
12
13
|
"cytoscape": "2.7.14",
|
|
13
14
|
"d3-v4-grid": "2.0.1",
|
package/server/app.js
CHANGED
|
@@ -35,7 +35,9 @@ require(__dirname + "/controllers/ga-controller")(app);
|
|
|
35
35
|
require(__dirname + "/controllers/api-controllers")(app);
|
|
36
36
|
require(__dirname + "/controllers/expression-database-controller")(app);
|
|
37
37
|
require(__dirname + "/controllers/network-database-controller")(app);
|
|
38
|
+
require(__dirname + "/controllers/protein-database-controller")(app);
|
|
38
39
|
require(__dirname + "/controllers/custom-workbook-controller")(app);
|
|
40
|
+
require(__dirname + "/controllers/grnsettings-database-controller")(app);
|
|
39
41
|
|
|
40
42
|
// Don"t start the server if this app is run as a child process.
|
|
41
43
|
if (!module.parent) {
|
package/server/config/config.js
CHANGED
|
@@ -17,10 +17,10 @@ module.exports = {
|
|
|
17
17
|
},
|
|
18
18
|
|
|
19
19
|
production: {
|
|
20
|
-
host: "grnsight.
|
|
20
|
+
host: "grnsight.lmucs.org",
|
|
21
21
|
port: 3000,
|
|
22
22
|
url: "https://" + this.host + "/server",
|
|
23
|
-
corsOrigin: "//grnsight.
|
|
23
|
+
corsOrigin: "//grnsight.lmucs.org",
|
|
24
24
|
root: rootPath,
|
|
25
25
|
app: {
|
|
26
26
|
name: "GRNsight"
|
|
@@ -31,10 +31,10 @@ module.exports = {
|
|
|
31
31
|
},
|
|
32
32
|
|
|
33
33
|
beta: {
|
|
34
|
-
host: "grnsight.
|
|
34
|
+
host: "grnsight.lmucs.org",
|
|
35
35
|
port: 4000,
|
|
36
36
|
url: "https://" + this.host + "/beta/server/",
|
|
37
|
-
corsOrigin: "//grnsight.
|
|
37
|
+
corsOrigin: "//grnsight.lmucs.org",
|
|
38
38
|
root: rootPath,
|
|
39
39
|
app: {
|
|
40
40
|
name: "GRNsight"
|
|
@@ -23,7 +23,8 @@ const optimizationParametersTypeKey = {
|
|
|
23
23
|
alpha: "number", "kk_max": "number", MaxIter: "number", TolFun: "number", MaxFunEval: "number",
|
|
24
24
|
TolX: "number", "production_function": "string", "L_curve": "number", "estimate_params": "number",
|
|
25
25
|
"make_graphs": "number", "fix_P": "number", "fix_b": "number", "expression_timepoints": "object",
|
|
26
|
-
Strain: "object", species: "string", "taxon_id": "number",
|
|
26
|
+
Strain: "object", species: "string", "taxon_id": "number", workbookType: "string",
|
|
27
|
+
"simulation_timepoints": "object", "b_or_tau" : "number"
|
|
27
28
|
|
|
28
29
|
};
|
|
29
30
|
|
|
@@ -4,6 +4,11 @@ module.exports = {
|
|
|
4
4
|
NOT_FOUND: -1,
|
|
5
5
|
WEIGHTED: "weighted",
|
|
6
6
|
UNWEIGHTED: "unweighted",
|
|
7
|
+
CELL_A1_PPI: "cols protein1/ rows protein2",
|
|
8
|
+
CELL_A1_GRN: "cols regulators/rows targets",
|
|
9
|
+
|
|
10
|
+
NETWORK_PPI_MODE: "protein-protein-physical-interaction",
|
|
11
|
+
NETWORK_GRN_MODE: "grn",
|
|
7
12
|
|
|
8
13
|
GRNSIGHT_FILENAME_HEADER: "X-GRNsight-Filename",
|
|
9
14
|
|
|
@@ -11,7 +11,7 @@ var processCustomWorkbook = function (path, res, app, workbook) {
|
|
|
11
11
|
res.status(400).json(workbook);
|
|
12
12
|
};
|
|
13
13
|
|
|
14
|
-
const createCustomWorkbook = (genesString, linksString) => {
|
|
14
|
+
const createCustomWorkbook = (genesString, linksString, networkType) => {
|
|
15
15
|
const g = genesString.split(",");
|
|
16
16
|
let genes = g.map(gene => {
|
|
17
17
|
return {name: gene};
|
|
@@ -45,7 +45,8 @@ const createCustomWorkbook = (genesString, linksString) => {
|
|
|
45
45
|
meta: {
|
|
46
46
|
data: {
|
|
47
47
|
species: "Saccharomyces cerevisiae",
|
|
48
|
-
taxon_id: 559292
|
|
48
|
+
taxon_id: 559292,
|
|
49
|
+
workbookType: networkType
|
|
49
50
|
}
|
|
50
51
|
},
|
|
51
52
|
meta2: {},
|
|
@@ -61,7 +62,7 @@ module.exports = function (app) {
|
|
|
61
62
|
|
|
62
63
|
// Load Custom Workbook
|
|
63
64
|
app.get("/upload-custom-workbook", function (req, res) {
|
|
64
|
-
let workbook = createCustomWorkbook(req.query.genes, req.query.links);
|
|
65
|
+
let workbook = createCustomWorkbook(req.query.genes, req.query.links, req.query.networkType);
|
|
65
66
|
return processCustomWorkbook(req.query.name, res, app, workbook);
|
|
66
67
|
});
|
|
67
68
|
}
|