@sjcrh/proteinpaint-server 2.68.0 → 2.69.1-0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/routes/genesetEnrichment.js +31 -3
- package/src/app.js +164 -73
- package/utils/gsea.py +60 -16
package/utils/gsea.py
CHANGED
|
@@ -1,16 +1,42 @@
|
|
|
1
|
-
#
|
|
1
|
+
# cat ~/sjpp/test.txt | python gsea.py
|
|
2
2
|
|
|
3
3
|
import blitzgsea as blitz
|
|
4
4
|
import json
|
|
5
5
|
import time
|
|
6
6
|
import sys
|
|
7
7
|
import sqlite3
|
|
8
|
+
import os
|
|
9
|
+
import numpy as np
|
|
8
10
|
import pandas as pd
|
|
9
|
-
|
|
10
11
|
|
|
11
12
|
def extract_symbols(x):
|
|
12
13
|
return x['symbol']
|
|
13
14
|
|
|
15
|
+
def extract_plot_data(signature, geneset, library, result, center=True):
|
|
16
|
+
signature = signature.copy()
|
|
17
|
+
signature.columns = ["i","v"]
|
|
18
|
+
signature = signature.sort_values("v", ascending=False).set_index("i")
|
|
19
|
+
signature = signature[~signature.index.duplicated(keep='first')]
|
|
20
|
+
if center:
|
|
21
|
+
signature.loc[:,"v"] -= np.mean(signature.loc[:,"v"])
|
|
22
|
+
signature_map = {}
|
|
23
|
+
for i,h in enumerate(signature.index):
|
|
24
|
+
signature_map[h] = i
|
|
25
|
+
|
|
26
|
+
gs = set(library[geneset])
|
|
27
|
+
hits = [i for i,x in enumerate(signature.index) if x in gs]
|
|
28
|
+
|
|
29
|
+
running_sum, es = blitz.enrichment_score(np.array(np.abs(signature.iloc[:,0])), signature_map, gs)
|
|
30
|
+
running_sum = list(running_sum)
|
|
31
|
+
nn = np.where(np.abs(running_sum)==np.max(np.abs(running_sum)))[0][0]
|
|
32
|
+
#print ("nn:",nn)
|
|
33
|
+
#print ("running_sum:",running_sum)
|
|
34
|
+
#print ("es:",es)
|
|
35
|
+
running_sum_str=[str(elem) for elem in running_sum]
|
|
36
|
+
print ('result: {"nn":'+str(nn)+',"running_sum":"'+",".join(running_sum_str)+'","es":'+str(es)+'}')
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
# Main function
|
|
14
40
|
try:
|
|
15
41
|
# Try to read a single character from stdin without blocking
|
|
16
42
|
if sys.stdin.read(1):
|
|
@@ -18,12 +44,13 @@ try:
|
|
|
18
44
|
for line in sys.stdin:
|
|
19
45
|
# Process each line
|
|
20
46
|
json_object = json.loads(line)
|
|
21
|
-
|
|
22
|
-
table_name=json_object['gene_set_group']
|
|
47
|
+
cachedir=json_object['cachedir']
|
|
23
48
|
genes=json_object['genes']
|
|
24
49
|
fold_change=json_object['fold_change']
|
|
50
|
+
table_name=json_object['geneset_group']
|
|
25
51
|
df = {'Genes': genes, 'fold_change': fold_change}
|
|
26
|
-
|
|
52
|
+
signature=pd.DataFrame(df)
|
|
53
|
+
db=json_object['db']
|
|
27
54
|
# Connect to the SQLite database
|
|
28
55
|
conn = sqlite3.connect(db)
|
|
29
56
|
|
|
@@ -39,7 +66,6 @@ try:
|
|
|
39
66
|
# Fetch all rows from the executed SQL query
|
|
40
67
|
rows = cursor.fetchall()
|
|
41
68
|
|
|
42
|
-
|
|
43
69
|
start_loop_time = time.time()
|
|
44
70
|
msigdb_library={}
|
|
45
71
|
# Iterate over the rows and print them
|
|
@@ -58,16 +84,34 @@ try:
|
|
|
58
84
|
stop_loop_time = time.time()
|
|
59
85
|
execution_time = stop_loop_time - start_loop_time
|
|
60
86
|
print(f"Execution time: {execution_time} seconds")
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
87
|
+
try: # Extract ES data to be plotted on client side
|
|
88
|
+
geneset_name=json_object['geneset_name'] # Checks if geneset_name is present, if yes it indicates the server request is for generating the image. It retrieves the result.pkl file and generates the image without having to recompute gsea again.
|
|
89
|
+
pickle_file=json_object['pickle_file']
|
|
90
|
+
result = pd.read_pickle(os.path.join(cachedir,pickle_file))
|
|
91
|
+
fig = blitz.plot.running_sum(signature, geneset_name, msigdb_library, result=result.T, compact=True)
|
|
92
|
+
random_num = np.random.rand()
|
|
93
|
+
png_filename = "gsea_plot_" + str(random_num) + ".png"
|
|
94
|
+
fig.savefig(os.path.join(cachedir,png_filename), bbox_inches='tight')
|
|
95
|
+
#extract_plot_data(signature, geneset_name, msigdb_library, result) # This returns raw data to client side, not currently used
|
|
96
|
+
print ('image: {"image_file":"' + png_filename + '"}')
|
|
97
|
+
except KeyError: #Initial GSEA calculation, result saved to a result.pkl pickle file
|
|
98
|
+
# run enrichment analysis
|
|
99
|
+
start_gsea_time = time.time()
|
|
100
|
+
if __name__ == "__main__":
|
|
101
|
+
result = blitz.gsea(signature, msigdb_library).T
|
|
102
|
+
random_num = np.random.rand()
|
|
103
|
+
pickle_filename="gsea_result_"+ str(random_num) +".pkl"
|
|
104
|
+
result.to_pickle(os.path.join(cachedir,pickle_filename))
|
|
105
|
+
gsea_str='{"data":' + result.to_json() + '}'
|
|
106
|
+
pickle_str='{"pickle_file":"' + pickle_filename + '"}'
|
|
107
|
+
#print ("pickle_file:",pickle_str)
|
|
108
|
+
gsea_dict = json.loads(gsea_str)
|
|
109
|
+
pickle_dict = json.loads(pickle_str)
|
|
110
|
+
result_dict = {**gsea_dict, **pickle_dict}
|
|
111
|
+
print ("result:",json.dumps(result_dict))
|
|
112
|
+
stop_gsea_time = time.time()
|
|
113
|
+
gsea_time = stop_gsea_time - start_gsea_time
|
|
114
|
+
print (f"GSEA time: {gsea_time} seconds")
|
|
71
115
|
|
|
72
116
|
else:
|
|
73
117
|
pass
|