grnsight 6.0.7 → 7.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.yml +4 -4
- package/.github/workflows/node.js.yml +35 -0
- package/README.md +1 -1
- package/database/README.md +218 -97
- package/database/constants.py +42 -0
- package/database/filter_update.py +168 -0
- package/database/grnsettings-database/README.md +52 -0
- package/database/grnsettings-database/schema.sql +4 -0
- package/database/loader.py +30 -0
- package/database/loader_update.py +36 -0
- package/database/network-database/scripts/generate_network.py +15 -23
- package/database/network-database/scripts/generate_new_network_version.py +17 -24
- package/database/protein-protein-database/README.md +71 -0
- package/database/protein-protein-database/schema.sql +37 -0
- package/database/protein-protein-database/scripts/generate_protein_network.py +227 -0
- package/database/protein-protein-database/scripts/remove_duplicates.sh +4 -0
- package/database/utils.py +418 -0
- package/package.json +3 -2
- package/server/app.js +2 -0
- package/server/config/config.js +4 -4
- package/server/controllers/additional-sheet-parser.js +2 -1
- package/server/controllers/constants.js +5 -0
- package/server/controllers/custom-workbook-controller.js +4 -3
- package/server/controllers/demo-workbooks.js +1462 -6
- package/server/controllers/export-constants.js +3 -2
- package/server/controllers/exporters/sif.js +6 -1
- package/server/controllers/exporters/xlsx.js +8 -3
- package/server/controllers/expression-sheet-parser.js +0 -6
- package/server/controllers/grnsettings-database-controller.js +17 -0
- package/server/controllers/importers/sif.js +30 -11
- package/server/controllers/network-database-controller.js +2 -2
- package/server/controllers/network-sheet-parser.js +54 -12
- package/server/controllers/protein-database-controller.js +18 -0
- package/server/controllers/sif-constants.js +11 -4
- package/server/controllers/spreadsheet-controller.js +44 -1
- package/server/controllers/workbook-constants.js +21 -4
- package/server/dals/expression-dal.js +4 -4
- package/server/dals/grnsetting-dal.js +49 -0
- package/server/dals/network-dal.js +14 -15
- package/server/dals/protein-dal.js +106 -0
- package/test/additional-sheet-parser-tests.js +1 -1
- package/test/export-tests.js +136 -9
- package/test/import-sif-tests.js +67 -13
- package/test/test.js +1 -1
- package/test-files/additional-sheet-test-files/optimization-parameters-default.xlsx +0 -0
- package/test-files/demo-files/18_proteins_81_edges_PPI.xlsx +0 -0
- package/test-files/expression-data-test-sheets/expression_sheet_missing_data_ok_export_exact.xlsx +0 -0
- package/web-client/config/config.js +4 -4
- package/web-client/public/js/api/grnsight-api.js +18 -3
- package/web-client/public/js/constants.js +27 -12
- package/web-client/public/js/generateNetwork.js +170 -72
- package/web-client/public/js/graph.js +424 -161
- package/web-client/public/js/grnsight.js +25 -4
- package/web-client/public/js/grnstate.js +4 -1
- package/web-client/public/js/iframe-coordination.js +3 -3
- package/web-client/public/js/setup-handlers.js +76 -61
- package/web-client/public/js/setup-load-and-import-handlers.js +32 -7
- package/web-client/public/js/update-app.js +119 -28
- package/web-client/public/js/upload.js +142 -85
- package/web-client/public/js/warnings.js +25 -0
- package/web-client/public/lib/bootstrap.file-input/bootstrap.file-input.js +0 -1
- package/web-client/public/stylesheets/grnsight.styl +40 -16
- package/web-client/views/components/demo.pug +7 -5
- package/web-client/views/upload.pug +64 -50
- package/database/network-database/scripts/filter_genes.py +0 -76
- package/database/network-database/scripts/loader.py +0 -79
- package/database/network-database/scripts/loader_updates.py +0 -99
|
@@ -57,10 +57,21 @@ html
|
|
|
57
57
|
|
|
58
58
|
li(class='divider')
|
|
59
59
|
|
|
60
|
+
li
|
|
61
|
+
span(class='menu-subheader') Network Mode
|
|
62
|
+
|
|
63
|
+
li(class='startDisabled disabled')
|
|
64
|
+
a(href='#' id='network-mode-grn-menu' class="menu-submenu network-mode")
|
|
65
|
+
span(class='glyphicon')
|
|
66
|
+
| Gene Regulatory Network
|
|
67
|
+
li(class='startDisabled disabled')
|
|
68
|
+
a(href='#' id='network-mode-protein-protein-physical-interaction-menu' class="menu-submenu network-mode")
|
|
69
|
+
span(class='glyphicon')
|
|
70
|
+
| Protein-Protein Physical Interaction Network
|
|
60
71
|
li
|
|
61
72
|
span(class='menu-subheader') Species
|
|
62
73
|
li
|
|
63
|
-
p(id='Saccharomyces-cerevisiae')
|
|
74
|
+
p(id='Saccharomyces-cerevisiae', class="menu-submenu")
|
|
64
75
|
span(class='glyphicon glyphicon-ok')
|
|
65
76
|
| Saccharomyces cerevisiae
|
|
66
77
|
|
|
@@ -70,35 +81,33 @@ html
|
|
|
70
81
|
ul(class='dropdown-menu' role='menu')
|
|
71
82
|
span(class='menu-subheader') Graph Options
|
|
72
83
|
li
|
|
73
|
-
a(href='#' id='forceGraph' class='layout'
|
|
74
|
-
|
|
75
|
-
| Force Graph
|
|
84
|
+
a(href='#' id='forceGraph' class='layout')
|
|
85
|
+
| Force Graph
|
|
76
86
|
a(href='#' id='gridLayout' class='layout')
|
|
77
|
-
|
|
78
|
-
| Grid Layout
|
|
87
|
+
| Grid Layout
|
|
79
88
|
li(class='divider')
|
|
80
89
|
li
|
|
81
90
|
a(href='#' id='lockSlidersMenu' class="lockSliders")
|
|
82
91
|
span(class='glyphicon invisible')
|
|
83
|
-
|
|
|
92
|
+
|Lock Force Graph Parameters
|
|
84
93
|
li
|
|
85
94
|
a(href='#' id='resetSlidersMenu' class="resetSliders")
|
|
86
95
|
span(class='glyphicon invisible')
|
|
87
|
-
|
|
|
96
|
+
|Reset Force Graph Parameters
|
|
88
97
|
li(class='disabled')
|
|
89
98
|
a(href='#' id='undoResetMenu' class="undoSliderReset")
|
|
90
99
|
span(class='glyphicon invisible')
|
|
91
|
-
|
|
|
100
|
+
|Undo Reset
|
|
92
101
|
li(class='divider')
|
|
93
102
|
li
|
|
94
103
|
a(href='#' id='link-distance' class='with-input')
|
|
95
104
|
span(class='glyphicon invisible')
|
|
96
|
-
|
|
|
105
|
+
|Link Distance (1 - 1000)
|
|
97
106
|
input(type='number' id='link-distance-menu' class='keepopen' value='500' aria-describedby='basic-addon1')
|
|
98
107
|
li
|
|
99
108
|
a(href='#' id='charge' class='with-input')
|
|
100
109
|
span(class='glyphicon invisible')
|
|
101
|
-
|
|
|
110
|
+
|Charge (-2000 - 0)
|
|
102
111
|
input(type='number' id='charge-menu' class='keepopen' value='-50' aria-describedby='basic-addon1')
|
|
103
112
|
|
|
104
113
|
|
|
@@ -109,32 +118,33 @@ html
|
|
|
109
118
|
a(href='#' id='node-coloring-toggle-menu' class='nodeColoringToggle')
|
|
110
119
|
span(class='glyphicon')
|
|
111
120
|
| Enable Node Coloring
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
121
|
+
div(id='node-coloring-navbar-options' class='disabled')
|
|
122
|
+
li(class='divider')
|
|
123
|
+
li(class='node-coloring-menu dropdown-submenu disabled')
|
|
124
|
+
a(href='#')
|
|
125
|
+
span(class='glyphicon invisible')
|
|
126
|
+
| Select Top Dataset
|
|
127
|
+
ul(class='dropdown-menu' id='topDatasetDropdownMenu')
|
|
128
|
+
li(class='node-coloring-menu disabled')
|
|
129
|
+
a(href='#' id='averageDataTopMenu' class='nodeColoring')
|
|
130
|
+
span(class='glyphicon')
|
|
131
|
+
| Average Replicate Values for Top Dataset
|
|
132
|
+
li(class='divider')
|
|
133
|
+
li(class='node-coloring-menu dropdown-submenu disabled')
|
|
134
|
+
a(href='#')
|
|
135
|
+
span(class='glyphicon invisible')
|
|
136
|
+
| Select Bottom Dataset
|
|
137
|
+
ul(class='dropdown-menu' id='bottomDatasetDropdownMenu')
|
|
138
|
+
li(class='node-coloring-menu disabled')
|
|
139
|
+
a(href='#' id='averageDataBottomMenu' class='viewportOption')
|
|
140
|
+
span(class='glyphicon')
|
|
141
|
+
| Average Replicate Values for Bottom Dataset
|
|
142
|
+
li(class='divider')
|
|
143
|
+
li(class='node-coloring-menu disabled')
|
|
144
|
+
a(href='#' class='nodeOptions with-input')
|
|
145
|
+
span(class='glyphicon invisible')
|
|
146
|
+
| Log Fold Change Max Value (0.01 - 100):
|
|
147
|
+
input(type='number' min="0.01" max="100" id='log-fold-change-max-value-menu' class='logFoldChangeMaxValue keepopen' aria-describedby='basic-addon1')
|
|
138
148
|
|
|
139
149
|
li(class='dropdown')
|
|
140
150
|
a(href='#' class='dropdown-toggle' data-toggle='dropdown') Edge
|
|
@@ -200,7 +210,7 @@ html
|
|
|
200
210
|
| Fit To Window
|
|
201
211
|
li(class='divider')
|
|
202
212
|
li
|
|
203
|
-
a(href='#' id='restrict-graph-to-viewport' class='
|
|
213
|
+
a(href='#' id='restrict-graph-to-viewport' class='viewport')
|
|
204
214
|
span(class='glyphicon')
|
|
205
215
|
| Restrict Graph to Viewport
|
|
206
216
|
li(class='divider')
|
|
@@ -234,15 +244,11 @@ html
|
|
|
234
244
|
li(class='startDisabled disabled weighted export')
|
|
235
245
|
a(href='#' id='exportAsWeightedSif') To Weighted SIF
|
|
236
246
|
li(class='divider')
|
|
237
|
-
li(class='startDisabled disabled unweighted export')
|
|
247
|
+
li(class='startDisabled disabled unweighted export' id='unweightedGraphmlContainer')
|
|
238
248
|
a(href='#' id='exportAsUnweightedGraphMl') To Unweighted GraphML
|
|
239
249
|
li(class='startDisabled disabled weighted export')
|
|
240
250
|
a(href='#' id='exportAsWeightedGraphMl') To Weighted GraphML
|
|
241
251
|
li(class='divider')
|
|
242
|
-
//- li(class='startDisabled disabled unweighted export')
|
|
243
|
-
a(href='#' id='exportAsUnweightedExcel') To Unweighted Excel
|
|
244
|
-
//- li(class='startDisabled disabled weighted export')
|
|
245
|
-
//- a(href='#' id='exportAsWeightedExcel') To Weighted Excel
|
|
246
252
|
li(class='startDisabled disabled export')
|
|
247
253
|
a(href='#' id='exportAsExcel') To Excel
|
|
248
254
|
|
|
@@ -341,6 +347,7 @@ html
|
|
|
341
347
|
option(href='#' id='demoSourceDropdown-weighted' class='weighted' value='weighted') Demo #2: Weighted GRN (15 genes, 28 edges, Dahlquist Lab unpublished data)
|
|
342
348
|
option(href='#' id='demoSourceDropdown-schadeInput' class='schadeInput' value='schadeInput') Demo #3: Unweighted GRN (21 genes, 31 edges)
|
|
343
349
|
option(href='#' id='demoSourceDropdown-schadeOutput' class='schadeOutput' value='schadeOutput') Demo #4: Weighted GRN (21 genes, 31 edges, Schade et al. 2004 data)
|
|
350
|
+
option(href='#' id='demoSourceDropdown-ppi' class='ppi' value='ppi') Demo #5: PPI (18 proteins, 81 edges)
|
|
344
351
|
label(for='upload-network' class='open btn btn-default network-button' data-toggle='tooltip', title='(.xlsx, .sif, .graphml)')
|
|
345
352
|
span(class='glyphicon glyphicon-folder-open')
|
|
346
353
|
| Open File
|
|
@@ -354,6 +361,13 @@ html
|
|
|
354
361
|
span(class='glyphicon glyphicon-repeat sidebar-glyphicon')
|
|
355
362
|
| Reload
|
|
356
363
|
div(class="long-sub-divider")
|
|
364
|
+
form(class='panelDropdownContainer')
|
|
365
|
+
label(for='speciesDropdown' class='info') Network Mode
|
|
366
|
+
select(class="networkMode panelDropdown btn btn-default startDisabled disabled" id='networkModeDropdown' name="networkModeDropdown" href='#')
|
|
367
|
+
option(value="none" selected="true" disabled hidden) Change Network Mode
|
|
368
|
+
option(href='#' id='demoSourceDropdown-grn' value='grn') Gene Regulatory Network
|
|
369
|
+
option(href='#' id='demoSourceDropdown-protein-protein-physical-interaction' value='protein-protein-physical-interaction') Protein-Protein Physical Interaction Network
|
|
370
|
+
|
|
357
371
|
label(for='species-info' class='info') Species:
|
|
358
372
|
span(id='species' class='network-type') Saccharomyces cerevisiae
|
|
359
373
|
input(type="hidden" id='species-info')
|
|
@@ -488,7 +502,7 @@ html
|
|
|
488
502
|
tr(class='shortest-path-column-headers')
|
|
489
503
|
tbody
|
|
490
504
|
|
|
491
|
-
div(id='errorModal' class='modal fade' tab-index='-1' role='dialog' aria-labelledby='mySmallModalLabel' aria-hidden='true')
|
|
505
|
+
div(id='errorModal' class='modal fade' tab-index='-1' role='dialog' aria-labelledby='mySmallModalLabel' aria-hidden='true' data-backdrop="static")
|
|
492
506
|
div(class='modal-dialog')
|
|
493
507
|
div(class='modal-content')
|
|
494
508
|
div(class='modal-header')
|
|
@@ -499,9 +513,9 @@ html
|
|
|
499
513
|
p(id='error')
|
|
500
514
|
div(class='modal-footer')
|
|
501
515
|
input(type='button' class='btn btn-default' data-dismiss='modal' value='Close')
|
|
502
|
-
input(type='button' id='launchFileOpen' class='btn btn-primary' data-dismiss='modal' value='Select New File' onclick="$('
|
|
516
|
+
input(type='button' id='launchFileOpen' class='btn btn-primary' data-dismiss='modal' value='Select New File' onclick="$('#upload-network').click()")
|
|
503
517
|
|
|
504
|
-
div(id='warningsModal' class='modal fade' tab-index='-1' role='dialog' aria-labelledby='mySmallModalLabel' aria-hidden='true')
|
|
518
|
+
div(id='warningsModal' class='modal fade' tab-index='-1' role='dialog' aria-labelledby='mySmallModalLabel' aria-hidden='true' data-backdrop="static")
|
|
505
519
|
div(class='modal-dialog')
|
|
506
520
|
div(class='modal-content')
|
|
507
521
|
div(class='modal-header')
|
|
@@ -522,7 +536,7 @@ html
|
|
|
522
536
|
div(class='modal-footer')
|
|
523
537
|
input(type='button' class='btn btn-default' data-dismiss='modal' value='Close')
|
|
524
538
|
|
|
525
|
-
div(id='exportExcelModal' class='modal fade' tab-index='-1' role='dialog' aria-labelledby='mySmallModalLabel' aria-hidden='true')
|
|
539
|
+
div(id='exportExcelModal' class='modal fade' tab-index='-1' role='dialog' aria-labelledby='mySmallModalLabel' aria-hidden='true' data-backdrop="static")
|
|
526
540
|
div(class='modal-dialog')
|
|
527
541
|
div(class='modal-content')
|
|
528
542
|
div(class='modal-header')
|
|
@@ -534,7 +548,7 @@ html
|
|
|
534
548
|
div(class='modal-footer')
|
|
535
549
|
div(id='exportExcelFooter-container')
|
|
536
550
|
|
|
537
|
-
div(id='generateNetworkModal' class='modal fade' tab-index='-1' role='dialog' aria-labelledby='mySmallModalLabel' aria-hidden='true')
|
|
551
|
+
div(id='generateNetworkModal' class='modal fade' tab-index='-1' role='dialog' aria-labelledby='mySmallModalLabel' aria-hidden='true' data-backdrop="static")
|
|
538
552
|
div(class='modal-dialog')
|
|
539
553
|
div(class='modal-content')
|
|
540
554
|
div(class='modal-header')
|
|
@@ -546,7 +560,7 @@ html
|
|
|
546
560
|
div(class='modal-footer')
|
|
547
561
|
div(id='generateNetworkFooter-container')
|
|
548
562
|
|
|
549
|
-
div(id='warningsModalSpecies' class='modal fade' tab-index='-1' role='dialog' aria-labelledby='mySmallModalLabel' aria-hidden='true')
|
|
563
|
+
div(id='warningsModalSpecies' class='modal fade' tab-index='-1' role='dialog' aria-labelledby='mySmallModalLabel' aria-hidden='true' data-backdrop="static")
|
|
550
564
|
div(class='modal-dialog')
|
|
551
565
|
div(class='modal-content')
|
|
552
566
|
div(class='modal-header')
|
|
@@ -559,7 +573,7 @@ html
|
|
|
559
573
|
input(type='button' class='btn btn-default' data-dismiss='modal' value='Close')
|
|
560
574
|
//- input(type='button' id='launchFileOpen' class='btn btn-primary' data-dismiss='modal' value='Select New File' onclick="$('.upload').click()")
|
|
561
575
|
|
|
562
|
-
div(id='importErrorModal' class='modal fade' tab-index='-1' role='dialog' aria-hidden='true')
|
|
576
|
+
div(id='importErrorModal' class='modal fade' tab-index='-1' role='dialog' aria-hidden='true' data-backdrop="static")
|
|
563
577
|
div(class='modal-dialog')
|
|
564
578
|
div(class='modal-content')
|
|
565
579
|
div(class='modal-header')
|
|
@@ -1,76 +0,0 @@
|
|
|
1
|
-
import psycopg2
|
|
2
|
-
import csv
|
|
3
|
-
import os
|
|
4
|
-
|
|
5
|
-
PROCESSED_GENES = "../script-results/processed-loader-files/gene.csv"
|
|
6
|
-
MISSING_GENE_DESTINATION = '../script-results/processed-loader-files/missing-genes.csv'
|
|
7
|
-
UPDATE_GENE_DESTINATION = '../script-results/processed-loader-files/update-genes.csv'
|
|
8
|
-
|
|
9
|
-
try:
|
|
10
|
-
connection = psycopg2.connect(user="postgres",
|
|
11
|
-
password="",
|
|
12
|
-
host="grnsight2.cfimp3lu6uob.us-west-1.rds.amazonaws.com",
|
|
13
|
-
port="5432",
|
|
14
|
-
database="postgres")
|
|
15
|
-
cursor = connection.cursor()
|
|
16
|
-
postgreSQL_select_Query = "select * from gene_regulatory_network.gene"
|
|
17
|
-
|
|
18
|
-
cursor.execute(postgreSQL_select_Query)
|
|
19
|
-
print("Selecting rows from gene table using cursor.fetchall")
|
|
20
|
-
gene_records = cursor.fetchall()
|
|
21
|
-
|
|
22
|
-
db_genes = {}
|
|
23
|
-
missing_genes = {}
|
|
24
|
-
genes_to_update = {}
|
|
25
|
-
for gene in gene_records:
|
|
26
|
-
# key = (gene_id, taxon_id)
|
|
27
|
-
key = (gene[0], gene[3])
|
|
28
|
-
value = {"display_gene_id": gene[1], "species": gene[2], "regulator": gene[4]}
|
|
29
|
-
db_genes[key] = value
|
|
30
|
-
|
|
31
|
-
print(f'Processing file {PROCESSED_GENES}')
|
|
32
|
-
with open(PROCESSED_GENES, 'r+', encoding="UTF-8") as f:
|
|
33
|
-
i = 0
|
|
34
|
-
reader = csv.reader(f)
|
|
35
|
-
for row in reader:
|
|
36
|
-
if i != 0:
|
|
37
|
-
row = row[0].split('\t')
|
|
38
|
-
gene_id = row[0]
|
|
39
|
-
display_gene_id = row[1]
|
|
40
|
-
species = row[2]
|
|
41
|
-
taxon_id = row[3]
|
|
42
|
-
regulator = row[4]
|
|
43
|
-
key = (gene_id, taxon_id)
|
|
44
|
-
value = {"display_gene_id": display_gene_id , "species": species, "regulator": regulator}
|
|
45
|
-
if key not in db_genes:
|
|
46
|
-
missing_genes[key] = value
|
|
47
|
-
elif db_genes[key]["display_gene_id"] != display_gene_id:
|
|
48
|
-
# the display gene id got updated, so lets update our db to account for that
|
|
49
|
-
genes_to_update[key] = value
|
|
50
|
-
i+=1
|
|
51
|
-
|
|
52
|
-
print(f'Creating missing-genes.csv\n')
|
|
53
|
-
gene_file = open(MISSING_GENE_DESTINATION, 'w')
|
|
54
|
-
headers = f'Gene ID\tDisplay Gene ID\tSpecies\tTaxon ID\tRegulator'
|
|
55
|
-
gene_file.write(f'{headers}\n')
|
|
56
|
-
for gene in missing_genes:
|
|
57
|
-
gene_file.write(f'{gene[0]}\t{missing_genes[gene]["display_gene_id"]}\t{missing_genes[gene]["species"]}\t{gene[1]}\t{missing_genes[gene]["regulator"]}\n')
|
|
58
|
-
gene_file.close()
|
|
59
|
-
|
|
60
|
-
print(f'Creating update-genes.csv\n')
|
|
61
|
-
gene_file = open(UPDATE_GENE_DESTINATION, 'w')
|
|
62
|
-
headers = f'Gene ID\tDisplay Gene ID\tSpecies\tTaxon ID\tRegulator'
|
|
63
|
-
gene_file.write(f'{headers}\n')
|
|
64
|
-
for gene in genes_to_update:
|
|
65
|
-
gene_file.write(f'{gene[0]}\t{genes_to_update[gene]["display_gene_id"]}\t{genes_to_update[gene]["species"]}\t{gene[1]}\t{genes_to_update[gene]["regulator"]}\n')
|
|
66
|
-
gene_file.close()
|
|
67
|
-
|
|
68
|
-
except (Exception, psycopg2.Error) as error:
|
|
69
|
-
print("Error while fetching data from PostgreSQL", error)
|
|
70
|
-
|
|
71
|
-
finally:
|
|
72
|
-
# closing database connection.
|
|
73
|
-
if connection:
|
|
74
|
-
cursor.close()
|
|
75
|
-
connection.close()
|
|
76
|
-
print("PostgreSQL connection is closed")
|
|
@@ -1,79 +0,0 @@
|
|
|
1
|
-
import csv
|
|
2
|
-
import re
|
|
3
|
-
# Usage
|
|
4
|
-
# python3 loader.py | psql postgresql://localhost/postgres
|
|
5
|
-
"""
|
|
6
|
-
This program generates direct SQL statements from the source files in order
|
|
7
|
-
to populate a relational database with those files’ data.
|
|
8
|
-
|
|
9
|
-
By taking the approach of emitting SQL statements directly, we bypass the need to import
|
|
10
|
-
some kind of database library for the loading process, instead passing the statements
|
|
11
|
-
directly into a database command line utility such as `psql`.
|
|
12
|
-
"""
|
|
13
|
-
|
|
14
|
-
"""
|
|
15
|
-
This function Loads Network Data Sources into the database
|
|
16
|
-
"""
|
|
17
|
-
def LOAD_SOURCES():
|
|
18
|
-
print('COPY gene_regulatory_network.source (time_stamp, source, display_name) FROM stdin;')
|
|
19
|
-
NETWORK_DATA_SOURCE = '../script-results/processed-loader-files/source.csv'
|
|
20
|
-
with open(NETWORK_DATA_SOURCE, 'r+') as f:
|
|
21
|
-
reader = csv.reader(f)
|
|
22
|
-
row_num = 0
|
|
23
|
-
for row in reader:
|
|
24
|
-
if row_num != 0:
|
|
25
|
-
r= ','.join(row).split('\t')
|
|
26
|
-
time_stamp = r[0]
|
|
27
|
-
source = r[1]
|
|
28
|
-
display_name = r[2]
|
|
29
|
-
print(f'{time_stamp}\t{source}\t{display_name}')
|
|
30
|
-
row_num += 1
|
|
31
|
-
print('\\.')
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
"""
|
|
35
|
-
This function Loads Gene ID Mapping into the database
|
|
36
|
-
"""
|
|
37
|
-
def LOAD_GENES():
|
|
38
|
-
print('COPY gene_regulatory_network.gene (gene_id, display_gene_id, species, taxon_id, regulator) FROM stdin;')
|
|
39
|
-
GENE_SOURCE = '../script-results/processed-loader-files/gene.csv'
|
|
40
|
-
with open(GENE_SOURCE, 'r+') as f:
|
|
41
|
-
reader = csv.reader(f)
|
|
42
|
-
row_num = 0
|
|
43
|
-
for row in reader:
|
|
44
|
-
if row_num != 0:
|
|
45
|
-
r= ','.join(row).split('\t')
|
|
46
|
-
gene_id = r[0]
|
|
47
|
-
display_gene_id= r[1]
|
|
48
|
-
species = r[2]
|
|
49
|
-
taxon_id = r[3]
|
|
50
|
-
regulator = r[4]
|
|
51
|
-
print(f'{gene_id}\t{display_gene_id}\t{species}\t{taxon_id}\t{regulator}')
|
|
52
|
-
row_num += 1
|
|
53
|
-
print('\\.')
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
"""
|
|
57
|
-
This function Loads the Network Matrix into the database
|
|
58
|
-
"""
|
|
59
|
-
def LOAD_NETWORK():
|
|
60
|
-
print('COPY gene_regulatory_network.network (regulator_gene_id, target_gene_id, taxon_id, time_stamp, source) FROM stdin;')
|
|
61
|
-
NETWORK_SOURCE = '../script-results/processed-loader-files/network.csv'
|
|
62
|
-
with open(NETWORK_SOURCE, 'r+') as f:
|
|
63
|
-
reader = csv.reader(f)
|
|
64
|
-
row_num = 0
|
|
65
|
-
for row in reader:
|
|
66
|
-
if row_num != 0:
|
|
67
|
-
r= ','.join(row).split('\t')
|
|
68
|
-
regulator_gene_id = r[0]
|
|
69
|
-
target_gene_id= r[1]
|
|
70
|
-
taxon_id = r[2]
|
|
71
|
-
time_stamp = r[3]
|
|
72
|
-
source = r[4]
|
|
73
|
-
print(f'{regulator_gene_id}\t{target_gene_id}\t{taxon_id}\t{time_stamp}\t{source}')
|
|
74
|
-
row_num += 1
|
|
75
|
-
print('\\.')
|
|
76
|
-
|
|
77
|
-
LOAD_SOURCES()
|
|
78
|
-
LOAD_GENES()
|
|
79
|
-
LOAD_NETWORK()
|
|
@@ -1,99 +0,0 @@
|
|
|
1
|
-
import csv
|
|
2
|
-
import re
|
|
3
|
-
# Usage
|
|
4
|
-
# python3 loader.py | psql postgresql://localhost/postgres
|
|
5
|
-
"""
|
|
6
|
-
This program generates direct SQL statements from the source files in order
|
|
7
|
-
to populate a relational database with those files’ data.
|
|
8
|
-
|
|
9
|
-
By taking the approach of emitting SQL statements directly, we bypass the need to import
|
|
10
|
-
some kind of database library for the loading process, instead passing the statements
|
|
11
|
-
directly into a database command line utility such as `psql`.
|
|
12
|
-
"""
|
|
13
|
-
|
|
14
|
-
"""
|
|
15
|
-
This function Loads Network Data Sources into the database
|
|
16
|
-
"""
|
|
17
|
-
def LOAD_SOURCES():
|
|
18
|
-
print('COPY gene_regulatory_network.source (time_stamp, source, display_name) FROM stdin;')
|
|
19
|
-
NETWORK_DATA_SOURCE = '../script-results/processed-loader-files/source.csv'
|
|
20
|
-
with open(NETWORK_DATA_SOURCE, 'r+') as f:
|
|
21
|
-
reader = csv.reader(f)
|
|
22
|
-
row_num = 0
|
|
23
|
-
for row in reader:
|
|
24
|
-
if row_num != 0:
|
|
25
|
-
r= ','.join(row).split('\t')
|
|
26
|
-
time_stamp = r[0]
|
|
27
|
-
source = r[1]
|
|
28
|
-
display_name = r[2]
|
|
29
|
-
print(f'{time_stamp}\t{source}\t{display_name}')
|
|
30
|
-
row_num += 1
|
|
31
|
-
print('\\.')
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
"""
|
|
35
|
-
This function Loads Gene ID Mapping into the database
|
|
36
|
-
"""
|
|
37
|
-
def LOAD_GENES():
|
|
38
|
-
print('COPY gene_regulatory_network.gene (gene_id, display_gene_id, species, taxon_id, regulator) FROM stdin;')
|
|
39
|
-
GENE_SOURCE = '../script-results/processed-loader-files/gene.csv'
|
|
40
|
-
with open(GENE_SOURCE, 'r+') as f:
|
|
41
|
-
reader = csv.reader(f)
|
|
42
|
-
row_num = 0
|
|
43
|
-
for row in reader:
|
|
44
|
-
if row_num != 0:
|
|
45
|
-
r= ','.join(row).split('\t')
|
|
46
|
-
gene_id = r[0]
|
|
47
|
-
display_gene_id= r[1]
|
|
48
|
-
species = r[2]
|
|
49
|
-
taxon_id = r[3]
|
|
50
|
-
regulator = r[4]
|
|
51
|
-
print(f'{gene_id}\t{display_gene_id}\t{species}\t{taxon_id}\t{regulator}')
|
|
52
|
-
row_num += 1
|
|
53
|
-
print('\\.')
|
|
54
|
-
|
|
55
|
-
"""
|
|
56
|
-
This Updates to Gene ID Mapping into the database
|
|
57
|
-
"""
|
|
58
|
-
def UPDATE_GENES():
|
|
59
|
-
print('BEGIN;')
|
|
60
|
-
GENE_SOURCE = '../script-results/processed-loader-files/gene_update.csv'
|
|
61
|
-
with open(GENE_SOURCE, 'r+') as f:
|
|
62
|
-
reader = csv.reader(f)
|
|
63
|
-
row_num = 0
|
|
64
|
-
for row in reader:
|
|
65
|
-
if row_num != 0:
|
|
66
|
-
r= ','.join(row).split('\t')
|
|
67
|
-
gene_id = r[0]
|
|
68
|
-
display_gene_id= r[1]
|
|
69
|
-
regulator = r[2]
|
|
70
|
-
print(f"UPDATE gene_regulatory_network.gene\nSET display_gene_id = '{display_gene_id}', regulator={regulator}\nWHERE gene_id = '{gene_id}';")
|
|
71
|
-
row_num += 1
|
|
72
|
-
print('COMMIT;')
|
|
73
|
-
|
|
74
|
-
"""
|
|
75
|
-
This function Loads the Network Matrix into the database
|
|
76
|
-
"""
|
|
77
|
-
def LOAD_NETWORK():
|
|
78
|
-
print('COPY gene_regulatory_network.network (regulator_gene_id, target_gene_id, taxon_id, time_stamp, source) FROM stdin;')
|
|
79
|
-
NETWORK_SOURCE = '../script-results/processed-loader-files/network.csv'
|
|
80
|
-
with open(NETWORK_SOURCE, 'r+') as f:
|
|
81
|
-
reader = csv.reader(f)
|
|
82
|
-
row_num = 0
|
|
83
|
-
for row in reader:
|
|
84
|
-
if row_num != 0:
|
|
85
|
-
r= ','.join(row).split('\t')
|
|
86
|
-
regulator_gene_id = r[0]
|
|
87
|
-
target_gene_id= r[1]
|
|
88
|
-
taxon_id = r[2]
|
|
89
|
-
time_stamp = r[3]
|
|
90
|
-
source = r[4]
|
|
91
|
-
print(f'{regulator_gene_id}\t{target_gene_id}\t{taxon_id}\t{time_stamp}\t{source}')
|
|
92
|
-
row_num += 1
|
|
93
|
-
print('\\.')
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
UPDATE_GENES()
|
|
97
|
-
LOAD_SOURCES()
|
|
98
|
-
LOAD_GENES()
|
|
99
|
-
LOAD_NETWORK()
|