grnsight 6.0.7 → 7.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. package/.eslintrc.yml +4 -4
  2. package/.github/workflows/node.js.yml +35 -0
  3. package/README.md +1 -1
  4. package/database/README.md +218 -97
  5. package/database/constants.py +42 -0
  6. package/database/filter_update.py +168 -0
  7. package/database/grnsettings-database/README.md +52 -0
  8. package/database/grnsettings-database/schema.sql +4 -0
  9. package/database/loader.py +30 -0
  10. package/database/loader_update.py +36 -0
  11. package/database/network-database/scripts/generate_network.py +15 -23
  12. package/database/network-database/scripts/generate_new_network_version.py +17 -24
  13. package/database/protein-protein-database/README.md +71 -0
  14. package/database/protein-protein-database/schema.sql +37 -0
  15. package/database/protein-protein-database/scripts/generate_protein_network.py +227 -0
  16. package/database/protein-protein-database/scripts/remove_duplicates.sh +4 -0
  17. package/database/utils.py +418 -0
  18. package/package.json +3 -2
  19. package/server/app.js +2 -0
  20. package/server/config/config.js +4 -4
  21. package/server/controllers/additional-sheet-parser.js +2 -1
  22. package/server/controllers/constants.js +5 -0
  23. package/server/controllers/custom-workbook-controller.js +4 -3
  24. package/server/controllers/demo-workbooks.js +1462 -6
  25. package/server/controllers/export-constants.js +3 -2
  26. package/server/controllers/exporters/sif.js +6 -1
  27. package/server/controllers/exporters/xlsx.js +8 -3
  28. package/server/controllers/expression-sheet-parser.js +0 -6
  29. package/server/controllers/grnsettings-database-controller.js +17 -0
  30. package/server/controllers/importers/sif.js +30 -11
  31. package/server/controllers/network-database-controller.js +2 -2
  32. package/server/controllers/network-sheet-parser.js +54 -12
  33. package/server/controllers/protein-database-controller.js +18 -0
  34. package/server/controllers/sif-constants.js +11 -4
  35. package/server/controllers/spreadsheet-controller.js +44 -1
  36. package/server/controllers/workbook-constants.js +21 -4
  37. package/server/dals/expression-dal.js +4 -4
  38. package/server/dals/grnsetting-dal.js +49 -0
  39. package/server/dals/network-dal.js +14 -15
  40. package/server/dals/protein-dal.js +106 -0
  41. package/test/additional-sheet-parser-tests.js +1 -1
  42. package/test/export-tests.js +136 -9
  43. package/test/import-sif-tests.js +67 -13
  44. package/test/test.js +1 -1
  45. package/test-files/additional-sheet-test-files/optimization-parameters-default.xlsx +0 -0
  46. package/test-files/demo-files/18_proteins_81_edges_PPI.xlsx +0 -0
  47. package/test-files/expression-data-test-sheets/expression_sheet_missing_data_ok_export_exact.xlsx +0 -0
  48. package/web-client/config/config.js +4 -4
  49. package/web-client/public/js/api/grnsight-api.js +18 -3
  50. package/web-client/public/js/constants.js +27 -12
  51. package/web-client/public/js/generateNetwork.js +170 -72
  52. package/web-client/public/js/graph.js +424 -161
  53. package/web-client/public/js/grnsight.js +25 -4
  54. package/web-client/public/js/grnstate.js +4 -1
  55. package/web-client/public/js/iframe-coordination.js +3 -3
  56. package/web-client/public/js/setup-handlers.js +76 -61
  57. package/web-client/public/js/setup-load-and-import-handlers.js +32 -7
  58. package/web-client/public/js/update-app.js +119 -28
  59. package/web-client/public/js/upload.js +142 -85
  60. package/web-client/public/js/warnings.js +25 -0
  61. package/web-client/public/lib/bootstrap.file-input/bootstrap.file-input.js +0 -1
  62. package/web-client/public/stylesheets/grnsight.styl +40 -16
  63. package/web-client/views/components/demo.pug +7 -5
  64. package/web-client/views/upload.pug +64 -50
  65. package/database/network-database/scripts/filter_genes.py +0 -76
  66. package/database/network-database/scripts/loader.py +0 -79
  67. package/database/network-database/scripts/loader_updates.py +0 -99
@@ -57,10 +57,21 @@ html
57
57
 
58
58
  li(class='divider')
59
59
 
60
+ li
61
+ span(class='menu-subheader') Network Mode
62
+
63
+ li(class='startDisabled disabled')
64
+ a(href='#' id='network-mode-grn-menu' class="menu-submenu network-mode")
65
+ span(class='glyphicon')
66
+ |   Gene Regulatory Network
67
+ li(class='startDisabled disabled')
68
+ a(href='#' id='network-mode-protein-protein-physical-interaction-menu' class="menu-submenu network-mode")
69
+ span(class='glyphicon')
70
+ |   Protein-Protein Physical Interaction Network
60
71
  li
61
72
  span(class='menu-subheader') Species
62
73
  li
63
- p(id='Saccharomyces-cerevisiae')
74
+ p(id='Saccharomyces-cerevisiae', class="menu-submenu")
64
75
  span(class='glyphicon glyphicon-ok')
65
76
  |   Saccharomyces cerevisiae
66
77
 
@@ -70,35 +81,33 @@ html
70
81
  ul(class='dropdown-menu' role='menu')
71
82
  span(class='menu-subheader') Graph Options
72
83
  li
73
- a(href='#' id='forceGraph' class='layout' checked)
74
- span(class='glyphicon glyphicon-ok')
75
- |   Force Graph
84
+ a(href='#' id='forceGraph' class='layout')
85
+ |      Force Graph
76
86
  a(href='#' id='gridLayout' class='layout')
77
- span(class='glyphicon')
78
- |   Grid Layout
87
+ |      Grid Layout
79
88
  li(class='divider')
80
89
  li
81
90
  a(href='#' id='lockSlidersMenu' class="lockSliders")
82
91
  span(class='glyphicon invisible')
83
- |   Lock Force Graph Parameters
92
+ |Lock Force Graph Parameters
84
93
  li
85
94
  a(href='#' id='resetSlidersMenu' class="resetSliders")
86
95
  span(class='glyphicon invisible')
87
- |   Reset Force Graph Parameters
96
+ |Reset Force Graph Parameters
88
97
  li(class='disabled')
89
98
  a(href='#' id='undoResetMenu' class="undoSliderReset")
90
99
  span(class='glyphicon invisible')
91
- |   Undo Reset
100
+ |Undo Reset
92
101
  li(class='divider')
93
102
  li
94
103
  a(href='#' id='link-distance' class='with-input')
95
104
  span(class='glyphicon invisible')
96
- |   Link Distance (1 - 1000)  
105
+ |Link Distance (1 - 1000)  
97
106
  input(type='number' id='link-distance-menu' class='keepopen' value='500' aria-describedby='basic-addon1')
98
107
  li
99
108
  a(href='#' id='charge' class='with-input')
100
109
  span(class='glyphicon invisible')
101
- |   Charge (-2000 - 0)  
110
+ |Charge (-2000 - 0)  
102
111
  input(type='number' id='charge-menu' class='keepopen' value='-50' aria-describedby='basic-addon1')
103
112
 
104
113
 
@@ -109,32 +118,33 @@ html
109
118
  a(href='#' id='node-coloring-toggle-menu' class='nodeColoringToggle')
110
119
  span(class='glyphicon')
111
120
  |   Enable Node Coloring
112
- li(class='divider')
113
- li(class='node-coloring-menu dropdown-submenu disabled')
114
- a(href='#')
115
- span(class='glyphicon invisible')
116
- |   Select Top Dataset
117
- ul(class='dropdown-menu' id='topDatasetDropdownMenu')
118
- li(class='node-coloring-menu disabled')
119
- a(href='#' id='averageDataTopMenu' class='nodeColoring')
120
- span(class='glyphicon')
121
- |   Average Replicate Values for Top Dataset
122
- li(class='divider')
123
- li(class='node-coloring-menu dropdown-submenu disabled')
124
- a(href='#')
125
- span(class='glyphicon invisible')
126
- |   Select Bottom Dataset
127
- ul(class='dropdown-menu' id='bottomDatasetDropdownMenu')
128
- li(class='node-coloring-menu disabled')
129
- a(href='#' id='averageDataBottomMenu' class='viewportOption')
130
- span(class='glyphicon')
131
- |   Average Replicate Values for Bottom Dataset
132
- li(class='divider')
133
- li(class='node-coloring-menu disabled')
134
- a(href='#' class='nodeOptions with-input')
135
- span(class='glyphicon invisible')
136
- |   Log Fold Change Max Value (0.01 - 100):  
137
- input(type='number' min="0.01" max="100" id='log-fold-change-max-value-menu' class='logFoldChangeMaxValue keepopen' aria-describedby='basic-addon1')
121
+ div(id='node-coloring-navbar-options' class='disabled')
122
+ li(class='divider')
123
+ li(class='node-coloring-menu dropdown-submenu disabled')
124
+ a(href='#')
125
+ span(class='glyphicon invisible')
126
+ |   Select Top Dataset
127
+ ul(class='dropdown-menu' id='topDatasetDropdownMenu')
128
+ li(class='node-coloring-menu disabled')
129
+ a(href='#' id='averageDataTopMenu' class='nodeColoring')
130
+ span(class='glyphicon')
131
+ |   Average Replicate Values for Top Dataset
132
+ li(class='divider')
133
+ li(class='node-coloring-menu dropdown-submenu disabled')
134
+ a(href='#')
135
+ span(class='glyphicon invisible')
136
+ |   Select Bottom Dataset
137
+ ul(class='dropdown-menu' id='bottomDatasetDropdownMenu')
138
+ li(class='node-coloring-menu disabled')
139
+ a(href='#' id='averageDataBottomMenu' class='viewportOption')
140
+ span(class='glyphicon')
141
+ |   Average Replicate Values for Bottom Dataset
142
+ li(class='divider')
143
+ li(class='node-coloring-menu disabled')
144
+ a(href='#' class='nodeOptions with-input')
145
+ span(class='glyphicon invisible')
146
+ |   Log Fold Change Max Value (0.01 - 100):  
147
+ input(type='number' min="0.01" max="100" id='log-fold-change-max-value-menu' class='logFoldChangeMaxValue keepopen' aria-describedby='basic-addon1')
138
148
 
139
149
  li(class='dropdown')
140
150
  a(href='#' class='dropdown-toggle' data-toggle='dropdown') Edge
@@ -200,7 +210,7 @@ html
200
210
  |   Fit To Window
201
211
  li(class='divider')
202
212
  li
203
- a(href='#' id='restrict-graph-to-viewport' class='viewportOption')
213
+ a(href='#' id='restrict-graph-to-viewport' class='viewport')
204
214
  span(class='glyphicon')
205
215
  |   Restrict Graph to Viewport
206
216
  li(class='divider')
@@ -234,15 +244,11 @@ html
234
244
  li(class='startDisabled disabled weighted export')
235
245
  a(href='#' id='exportAsWeightedSif') To Weighted SIF
236
246
  li(class='divider')
237
- li(class='startDisabled disabled unweighted export')
247
+ li(class='startDisabled disabled unweighted export' id='unweightedGraphmlContainer')
238
248
  a(href='#' id='exportAsUnweightedGraphMl') To Unweighted GraphML
239
249
  li(class='startDisabled disabled weighted export')
240
250
  a(href='#' id='exportAsWeightedGraphMl') To Weighted GraphML
241
251
  li(class='divider')
242
- //- li(class='startDisabled disabled unweighted export')
243
- a(href='#' id='exportAsUnweightedExcel') To Unweighted Excel
244
- //- li(class='startDisabled disabled weighted export')
245
- //- a(href='#' id='exportAsWeightedExcel') To Weighted Excel
246
252
  li(class='startDisabled disabled export')
247
253
  a(href='#' id='exportAsExcel') To Excel
248
254
 
@@ -341,6 +347,7 @@ html
341
347
  option(href='#' id='demoSourceDropdown-weighted' class='weighted' value='weighted') Demo #2: Weighted GRN (15 genes, 28 edges, Dahlquist Lab unpublished data)
342
348
  option(href='#' id='demoSourceDropdown-schadeInput' class='schadeInput' value='schadeInput') Demo #3: Unweighted GRN (21 genes, 31 edges)
343
349
  option(href='#' id='demoSourceDropdown-schadeOutput' class='schadeOutput' value='schadeOutput') Demo #4: Weighted GRN (21 genes, 31 edges, Schade et al. 2004 data)
350
+ option(href='#' id='demoSourceDropdown-ppi' class='ppi' value='ppi') Demo #5: PPI (18 proteins, 81 edges)
344
351
  label(for='upload-network' class='open btn btn-default network-button' data-toggle='tooltip', title='(.xlsx, .sif, .graphml)')
345
352
  span(class='glyphicon glyphicon-folder-open')
346
353
  |   Open File
@@ -354,6 +361,13 @@ html
354
361
  span(class='glyphicon glyphicon-repeat sidebar-glyphicon')
355
362
  |   Reload
356
363
  div(class="long-sub-divider")
364
+ form(class='panelDropdownContainer')
365
+ label(for='speciesDropdown' class='info') Network Mode
366
+ select(class="networkMode panelDropdown btn btn-default startDisabled disabled" id='networkModeDropdown' name="networkModeDropdown" href='#')
367
+ option(value="none" selected="true" disabled hidden) Change Network Mode
368
+ option(href='#' id='demoSourceDropdown-grn' value='grn') Gene Regulatory Network
369
+ option(href='#' id='demoSourceDropdown-protein-protein-physical-interaction' value='protein-protein-physical-interaction') Protein-Protein Physical Interaction Network
370
+
357
371
  label(for='species-info' class='info') Species:
358
372
  span(id='species' class='network-type') Saccharomyces cerevisiae
359
373
  input(type="hidden" id='species-info')
@@ -488,7 +502,7 @@ html
488
502
  tr(class='shortest-path-column-headers')
489
503
  tbody
490
504
 
491
- div(id='errorModal' class='modal fade' tab-index='-1' role='dialog' aria-labelledby='mySmallModalLabel' aria-hidden='true')
505
+ div(id='errorModal' class='modal fade' tab-index='-1' role='dialog' aria-labelledby='mySmallModalLabel' aria-hidden='true' data-backdrop="static")
492
506
  div(class='modal-dialog')
493
507
  div(class='modal-content')
494
508
  div(class='modal-header')
@@ -499,9 +513,9 @@ html
499
513
  p(id='error')
500
514
  div(class='modal-footer')
501
515
  input(type='button' class='btn btn-default' data-dismiss='modal' value='Close')
502
- input(type='button' id='launchFileOpen' class='btn btn-primary' data-dismiss='modal' value='Select New File' onclick="$('.upload').click()")
516
+ input(type='button' id='launchFileOpen' class='btn btn-primary' data-dismiss='modal' value='Select New File' onclick="$('#upload-network').click()")
503
517
 
504
- div(id='warningsModal' class='modal fade' tab-index='-1' role='dialog' aria-labelledby='mySmallModalLabel' aria-hidden='true')
518
+ div(id='warningsModal' class='modal fade' tab-index='-1' role='dialog' aria-labelledby='mySmallModalLabel' aria-hidden='true' data-backdrop="static")
505
519
  div(class='modal-dialog')
506
520
  div(class='modal-content')
507
521
  div(class='modal-header')
@@ -522,7 +536,7 @@ html
522
536
  div(class='modal-footer')
523
537
  input(type='button' class='btn btn-default' data-dismiss='modal' value='Close')
524
538
 
525
- div(id='exportExcelModal' class='modal fade' tab-index='-1' role='dialog' aria-labelledby='mySmallModalLabel' aria-hidden='true')
539
+ div(id='exportExcelModal' class='modal fade' tab-index='-1' role='dialog' aria-labelledby='mySmallModalLabel' aria-hidden='true' data-backdrop="static")
526
540
  div(class='modal-dialog')
527
541
  div(class='modal-content')
528
542
  div(class='modal-header')
@@ -534,7 +548,7 @@ html
534
548
  div(class='modal-footer')
535
549
  div(id='exportExcelFooter-container')
536
550
 
537
- div(id='generateNetworkModal' class='modal fade' tab-index='-1' role='dialog' aria-labelledby='mySmallModalLabel' aria-hidden='true')
551
+ div(id='generateNetworkModal' class='modal fade' tab-index='-1' role='dialog' aria-labelledby='mySmallModalLabel' aria-hidden='true' data-backdrop="static")
538
552
  div(class='modal-dialog')
539
553
  div(class='modal-content')
540
554
  div(class='modal-header')
@@ -546,7 +560,7 @@ html
546
560
  div(class='modal-footer')
547
561
  div(id='generateNetworkFooter-container')
548
562
 
549
- div(id='warningsModalSpecies' class='modal fade' tab-index='-1' role='dialog' aria-labelledby='mySmallModalLabel' aria-hidden='true')
563
+ div(id='warningsModalSpecies' class='modal fade' tab-index='-1' role='dialog' aria-labelledby='mySmallModalLabel' aria-hidden='true' data-backdrop="static")
550
564
  div(class='modal-dialog')
551
565
  div(class='modal-content')
552
566
  div(class='modal-header')
@@ -559,7 +573,7 @@ html
559
573
  input(type='button' class='btn btn-default' data-dismiss='modal' value='Close')
560
574
  //- input(type='button' id='launchFileOpen' class='btn btn-primary' data-dismiss='modal' value='Select New File' onclick="$('.upload').click()")
561
575
 
562
- div(id='importErrorModal' class='modal fade' tab-index='-1' role='dialog' aria-hidden='true')
576
+ div(id='importErrorModal' class='modal fade' tab-index='-1' role='dialog' aria-hidden='true' data-backdrop="static")
563
577
  div(class='modal-dialog')
564
578
  div(class='modal-content')
565
579
  div(class='modal-header')
@@ -1,76 +0,0 @@
1
- import psycopg2
2
- import csv
3
- import os
4
-
5
- PROCESSED_GENES = "../script-results/processed-loader-files/gene.csv"
6
- MISSING_GENE_DESTINATION = '../script-results/processed-loader-files/missing-genes.csv'
7
- UPDATE_GENE_DESTINATION = '../script-results/processed-loader-files/update-genes.csv'
8
-
9
- try:
10
- connection = psycopg2.connect(user="postgres",
11
- password="",
12
- host="grnsight2.cfimp3lu6uob.us-west-1.rds.amazonaws.com",
13
- port="5432",
14
- database="postgres")
15
- cursor = connection.cursor()
16
- postgreSQL_select_Query = "select * from gene_regulatory_network.gene"
17
-
18
- cursor.execute(postgreSQL_select_Query)
19
- print("Selecting rows from gene table using cursor.fetchall")
20
- gene_records = cursor.fetchall()
21
-
22
- db_genes = {}
23
- missing_genes = {}
24
- genes_to_update = {}
25
- for gene in gene_records:
26
- # key = (gene_id, taxon_id)
27
- key = (gene[0], gene[3])
28
- value = {"display_gene_id": gene[1], "species": gene[2], "regulator": gene[4]}
29
- db_genes[key] = value
30
-
31
- print(f'Processing file {PROCESSED_GENES}')
32
- with open(PROCESSED_GENES, 'r+', encoding="UTF-8") as f:
33
- i = 0
34
- reader = csv.reader(f)
35
- for row in reader:
36
- if i != 0:
37
- row = row[0].split('\t')
38
- gene_id = row[0]
39
- display_gene_id = row[1]
40
- species = row[2]
41
- taxon_id = row[3]
42
- regulator = row[4]
43
- key = (gene_id, taxon_id)
44
- value = {"display_gene_id": display_gene_id , "species": species, "regulator": regulator}
45
- if key not in db_genes:
46
- missing_genes[key] = value
47
- elif db_genes[key]["display_gene_id"] != display_gene_id:
48
- # the display gene id got updated, so lets update our db to account for that
49
- genes_to_update[key] = value
50
- i+=1
51
-
52
- print(f'Creating missing-genes.csv\n')
53
- gene_file = open(MISSING_GENE_DESTINATION, 'w')
54
- headers = f'Gene ID\tDisplay Gene ID\tSpecies\tTaxon ID\tRegulator'
55
- gene_file.write(f'{headers}\n')
56
- for gene in missing_genes:
57
- gene_file.write(f'{gene[0]}\t{missing_genes[gene]["display_gene_id"]}\t{missing_genes[gene]["species"]}\t{gene[1]}\t{missing_genes[gene]["regulator"]}\n')
58
- gene_file.close()
59
-
60
- print(f'Creating update-genes.csv\n')
61
- gene_file = open(UPDATE_GENE_DESTINATION, 'w')
62
- headers = f'Gene ID\tDisplay Gene ID\tSpecies\tTaxon ID\tRegulator'
63
- gene_file.write(f'{headers}\n')
64
- for gene in genes_to_update:
65
- gene_file.write(f'{gene[0]}\t{genes_to_update[gene]["display_gene_id"]}\t{genes_to_update[gene]["species"]}\t{gene[1]}\t{genes_to_update[gene]["regulator"]}\n')
66
- gene_file.close()
67
-
68
- except (Exception, psycopg2.Error) as error:
69
- print("Error while fetching data from PostgreSQL", error)
70
-
71
- finally:
72
- # closing database connection.
73
- if connection:
74
- cursor.close()
75
- connection.close()
76
- print("PostgreSQL connection is closed")
@@ -1,79 +0,0 @@
1
- import csv
2
- import re
3
- # Usage
4
- # python3 loader.py | psql postgresql://localhost/postgres
5
- """
6
- This program generates direct SQL statements from the source files in order
7
- to populate a relational database with those files’ data.
8
-
9
- By taking the approach of emitting SQL statements directly, we bypass the need to import
10
- some kind of database library for the loading process, instead passing the statements
11
- directly into a database command line utility such as `psql`.
12
- """
13
-
14
- """
15
- This function Loads Network Data Sources into the database
16
- """
17
- def LOAD_SOURCES():
18
- print('COPY gene_regulatory_network.source (time_stamp, source, display_name) FROM stdin;')
19
- NETWORK_DATA_SOURCE = '../script-results/processed-loader-files/source.csv'
20
- with open(NETWORK_DATA_SOURCE, 'r+') as f:
21
- reader = csv.reader(f)
22
- row_num = 0
23
- for row in reader:
24
- if row_num != 0:
25
- r= ','.join(row).split('\t')
26
- time_stamp = r[0]
27
- source = r[1]
28
- display_name = r[2]
29
- print(f'{time_stamp}\t{source}\t{display_name}')
30
- row_num += 1
31
- print('\\.')
32
-
33
-
34
- """
35
- This function Loads Gene ID Mapping into the database
36
- """
37
- def LOAD_GENES():
38
- print('COPY gene_regulatory_network.gene (gene_id, display_gene_id, species, taxon_id, regulator) FROM stdin;')
39
- GENE_SOURCE = '../script-results/processed-loader-files/gene.csv'
40
- with open(GENE_SOURCE, 'r+') as f:
41
- reader = csv.reader(f)
42
- row_num = 0
43
- for row in reader:
44
- if row_num != 0:
45
- r= ','.join(row).split('\t')
46
- gene_id = r[0]
47
- display_gene_id= r[1]
48
- species = r[2]
49
- taxon_id = r[3]
50
- regulator = r[4]
51
- print(f'{gene_id}\t{display_gene_id}\t{species}\t{taxon_id}\t{regulator}')
52
- row_num += 1
53
- print('\\.')
54
-
55
-
56
- """
57
- This function Loads the Network Matrix into the database
58
- """
59
- def LOAD_NETWORK():
60
- print('COPY gene_regulatory_network.network (regulator_gene_id, target_gene_id, taxon_id, time_stamp, source) FROM stdin;')
61
- NETWORK_SOURCE = '../script-results/processed-loader-files/network.csv'
62
- with open(NETWORK_SOURCE, 'r+') as f:
63
- reader = csv.reader(f)
64
- row_num = 0
65
- for row in reader:
66
- if row_num != 0:
67
- r= ','.join(row).split('\t')
68
- regulator_gene_id = r[0]
69
- target_gene_id= r[1]
70
- taxon_id = r[2]
71
- time_stamp = r[3]
72
- source = r[4]
73
- print(f'{regulator_gene_id}\t{target_gene_id}\t{taxon_id}\t{time_stamp}\t{source}')
74
- row_num += 1
75
- print('\\.')
76
-
77
- LOAD_SOURCES()
78
- LOAD_GENES()
79
- LOAD_NETWORK()
@@ -1,99 +0,0 @@
1
- import csv
2
- import re
3
- # Usage
4
- # python3 loader.py | psql postgresql://localhost/postgres
5
- """
6
- This program generates direct SQL statements from the source files in order
7
- to populate a relational database with those files’ data.
8
-
9
- By taking the approach of emitting SQL statements directly, we bypass the need to import
10
- some kind of database library for the loading process, instead passing the statements
11
- directly into a database command line utility such as `psql`.
12
- """
13
-
14
- """
15
- This function Loads Network Data Sources into the database
16
- """
17
- def LOAD_SOURCES():
18
- print('COPY gene_regulatory_network.source (time_stamp, source, display_name) FROM stdin;')
19
- NETWORK_DATA_SOURCE = '../script-results/processed-loader-files/source.csv'
20
- with open(NETWORK_DATA_SOURCE, 'r+') as f:
21
- reader = csv.reader(f)
22
- row_num = 0
23
- for row in reader:
24
- if row_num != 0:
25
- r= ','.join(row).split('\t')
26
- time_stamp = r[0]
27
- source = r[1]
28
- display_name = r[2]
29
- print(f'{time_stamp}\t{source}\t{display_name}')
30
- row_num += 1
31
- print('\\.')
32
-
33
-
34
- """
35
- This function Loads Gene ID Mapping into the database
36
- """
37
- def LOAD_GENES():
38
- print('COPY gene_regulatory_network.gene (gene_id, display_gene_id, species, taxon_id, regulator) FROM stdin;')
39
- GENE_SOURCE = '../script-results/processed-loader-files/gene.csv'
40
- with open(GENE_SOURCE, 'r+') as f:
41
- reader = csv.reader(f)
42
- row_num = 0
43
- for row in reader:
44
- if row_num != 0:
45
- r= ','.join(row).split('\t')
46
- gene_id = r[0]
47
- display_gene_id= r[1]
48
- species = r[2]
49
- taxon_id = r[3]
50
- regulator = r[4]
51
- print(f'{gene_id}\t{display_gene_id}\t{species}\t{taxon_id}\t{regulator}')
52
- row_num += 1
53
- print('\\.')
54
-
55
- """
56
- This Updates to Gene ID Mapping into the database
57
- """
58
- def UPDATE_GENES():
59
- print('BEGIN;')
60
- GENE_SOURCE = '../script-results/processed-loader-files/gene_update.csv'
61
- with open(GENE_SOURCE, 'r+') as f:
62
- reader = csv.reader(f)
63
- row_num = 0
64
- for row in reader:
65
- if row_num != 0:
66
- r= ','.join(row).split('\t')
67
- gene_id = r[0]
68
- display_gene_id= r[1]
69
- regulator = r[2]
70
- print(f"UPDATE gene_regulatory_network.gene\nSET display_gene_id = '{display_gene_id}', regulator={regulator}\nWHERE gene_id = '{gene_id}';")
71
- row_num += 1
72
- print('COMMIT;')
73
-
74
- """
75
- This function Loads the Network Matrix into the database
76
- """
77
- def LOAD_NETWORK():
78
- print('COPY gene_regulatory_network.network (regulator_gene_id, target_gene_id, taxon_id, time_stamp, source) FROM stdin;')
79
- NETWORK_SOURCE = '../script-results/processed-loader-files/network.csv'
80
- with open(NETWORK_SOURCE, 'r+') as f:
81
- reader = csv.reader(f)
82
- row_num = 0
83
- for row in reader:
84
- if row_num != 0:
85
- r= ','.join(row).split('\t')
86
- regulator_gene_id = r[0]
87
- target_gene_id= r[1]
88
- taxon_id = r[2]
89
- time_stamp = r[3]
90
- source = r[4]
91
- print(f'{regulator_gene_id}\t{target_gene_id}\t{taxon_id}\t{time_stamp}\t{source}')
92
- row_num += 1
93
- print('\\.')
94
-
95
-
96
- UPDATE_GENES()
97
- LOAD_SOURCES()
98
- LOAD_GENES()
99
- LOAD_NETWORK()