@sjcrh/proteinpaint-server 2.69.0 → 2.69.1-0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/utils/gsea.py CHANGED
@@ -1,16 +1,42 @@
1
- # 'cat ~/sjpp/test.txt | python gsea.py
1
+ # cat ~/sjpp/test.txt | python gsea.py
2
2
 
3
3
  import blitzgsea as blitz
4
4
  import json
5
5
  import time
6
6
  import sys
7
7
  import sqlite3
8
+ import os
9
+ import numpy as np
8
10
  import pandas as pd
9
-
10
11
 
11
12
  def extract_symbols(x):
12
13
  return x['symbol']
13
14
 
15
+ def extract_plot_data(signature, geneset, library, result, center=True):
16
+ signature = signature.copy()
17
+ signature.columns = ["i","v"]
18
+ signature = signature.sort_values("v", ascending=False).set_index("i")
19
+ signature = signature[~signature.index.duplicated(keep='first')]
20
+ if center:
21
+ signature.loc[:,"v"] -= np.mean(signature.loc[:,"v"])
22
+ signature_map = {}
23
+ for i,h in enumerate(signature.index):
24
+ signature_map[h] = i
25
+
26
+ gs = set(library[geneset])
27
+ hits = [i for i,x in enumerate(signature.index) if x in gs]
28
+
29
+ running_sum, es = blitz.enrichment_score(np.array(np.abs(signature.iloc[:,0])), signature_map, gs)
30
+ running_sum = list(running_sum)
31
+ nn = np.where(np.abs(running_sum)==np.max(np.abs(running_sum)))[0][0]
32
+ #print ("nn:",nn)
33
+ #print ("running_sum:",running_sum)
34
+ #print ("es:",es)
35
+ running_sum_str=[str(elem) for elem in running_sum]
36
+ print ('result: {"nn":'+str(nn)+',"running_sum":"'+",".join(running_sum_str)+'","es":'+str(es)+'}')
37
+
38
+
39
+ # Main function
14
40
  try:
15
41
  # Try to read a single character from stdin without blocking
16
42
  if sys.stdin.read(1):
@@ -18,12 +44,13 @@ try:
18
44
  for line in sys.stdin:
19
45
  # Process each line
20
46
  json_object = json.loads(line)
21
- db=json_object['db']
22
- table_name=json_object['gene_set_group']
47
+ cachedir=json_object['cachedir']
23
48
  genes=json_object['genes']
24
49
  fold_change=json_object['fold_change']
50
+ table_name=json_object['geneset_group']
25
51
  df = {'Genes': genes, 'fold_change': fold_change}
26
-
52
+ signature=pd.DataFrame(df)
53
+ db=json_object['db']
27
54
  # Connect to the SQLite database
28
55
  conn = sqlite3.connect(db)
29
56
 
@@ -39,7 +66,6 @@ try:
39
66
  # Fetch all rows from the executed SQL query
40
67
  rows = cursor.fetchall()
41
68
 
42
-
43
69
  start_loop_time = time.time()
44
70
  msigdb_library={}
45
71
  # Iterate over the rows and print them
@@ -58,16 +84,34 @@ try:
58
84
  stop_loop_time = time.time()
59
85
  execution_time = stop_loop_time - start_loop_time
60
86
  print(f"Execution time: {execution_time} seconds")
61
- signature=pd.DataFrame(df)
62
-
63
- # run enrichment analysis
64
- start_gsea_time = time.time()
65
- if __name__ == "__main__":
66
- result = blitz.gsea(signature, msigdb_library).T
67
- print ("result:",result.to_json())
68
- stop_gsea_time = time.time()
69
- gsea_time = stop_gsea_time - start_gsea_time
70
- print (f"GSEA time: {gsea_time} seconds")
87
+ try: # Extract ES data to be plotted on client side
88
+ geneset_name=json_object['geneset_name'] # Checks if geneset_name is present, if yes it indicates the server request is for generating the image. It retrieves the result.pkl file and generates the image without having to recompute gsea again.
89
+ pickle_file=json_object['pickle_file']
90
+ result = pd.read_pickle(os.path.join(cachedir,pickle_file))
91
+ fig = blitz.plot.running_sum(signature, geneset_name, msigdb_library, result=result.T, compact=True)
92
+ random_num = np.random.rand()
93
+ png_filename = "gsea_plot_" + str(random_num) + ".png"
94
+ fig.savefig(os.path.join(cachedir,png_filename), bbox_inches='tight')
95
+ #extract_plot_data(signature, geneset_name, msigdb_library, result) # This returns raw data to client side, not currently used
96
+ print ('image: {"image_file":"' + png_filename + '"}')
97
+ except KeyError: #Initial GSEA calculation, result saved to a result.pkl pickle file
98
+ # run enrichment analysis
99
+ start_gsea_time = time.time()
100
+ if __name__ == "__main__":
101
+ result = blitz.gsea(signature, msigdb_library).T
102
+ random_num = np.random.rand()
103
+ pickle_filename="gsea_result_"+ str(random_num) +".pkl"
104
+ result.to_pickle(os.path.join(cachedir,pickle_filename))
105
+ gsea_str='{"data":' + result.to_json() + '}'
106
+ pickle_str='{"pickle_file":"' + pickle_filename + '"}'
107
+ #print ("pickle_file:",pickle_str)
108
+ gsea_dict = json.loads(gsea_str)
109
+ pickle_dict = json.loads(pickle_str)
110
+ result_dict = {**gsea_dict, **pickle_dict}
111
+ print ("result:",json.dumps(result_dict))
112
+ stop_gsea_time = time.time()
113
+ gsea_time = stop_gsea_time - start_gsea_time
114
+ print (f"GSEA time: {gsea_time} seconds")
71
115
 
72
116
  else:
73
117
  pass