@sjcrh/proteinpaint-server 2.117.0 → 2.118.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +3 -2
- package/routes/brainImaging.js +15 -26
- package/routes/brainImagingSamples.js +1 -55
- package/routes/genesetEnrichment.js +2 -56
- package/src/app.js +474 -593
- package/src/serverconfig.js +1 -1
- package/utils/gsea.py +0 -146
- package/utils/plotBrainImaging.py +0 -112
package/src/serverconfig.js
CHANGED
|
@@ -61,7 +61,7 @@ if (!serverconfig.clustalo) serverconfig.clustalo = 'clustalo'
|
|
|
61
61
|
if (!serverconfig.Rscript) serverconfig.Rscript = 'Rscript'
|
|
62
62
|
if (!serverconfig.gfServer) serverconfig.gfServer = 'gfServer'
|
|
63
63
|
if (!serverconfig.gfClient) serverconfig.gfClient = 'gfClient'
|
|
64
|
-
|
|
64
|
+
// note: server/src/app.ts uses `setPythonBinPath()` from `@scjrch/proteinpaint-python`
|
|
65
65
|
// NOTE: will set other cmd paths that require binpath after it's filled-in below
|
|
66
66
|
|
|
67
67
|
/******************
|
package/utils/gsea.py
DELETED
|
@@ -1,146 +0,0 @@
|
|
|
1
|
-
# Test syntax: cat ~/sjpp/test.txt | time python gsea.py
|
|
2
|
-
# test.txt contains the json string autogenerated by the commented out nodejs code.
|
|
3
|
-
import blitzgsea as blitz
|
|
4
|
-
import json
|
|
5
|
-
import time
|
|
6
|
-
import sys
|
|
7
|
-
import sqlite3
|
|
8
|
-
import random
|
|
9
|
-
import os
|
|
10
|
-
import numpy as np
|
|
11
|
-
import pandas as pd
|
|
12
|
-
|
|
13
|
-
# Helper function to extract gene symbols from a dictionary
|
|
14
|
-
def extract_symbols(x):
|
|
15
|
-
return x['symbol'] # Return the 'symbol' field from the dictionary
|
|
16
|
-
|
|
17
|
-
# Main function
|
|
18
|
-
try:
|
|
19
|
-
# Check if there is input from stdin
|
|
20
|
-
if sys.stdin.read(1):
|
|
21
|
-
# Read each line from stdin
|
|
22
|
-
for line in sys.stdin:
|
|
23
|
-
# Parse the JSON input
|
|
24
|
-
json_object = json.loads(line)
|
|
25
|
-
cachedir = json_object['cachedir'] # Get the cache directory from the JSON object
|
|
26
|
-
genes = json_object['genes'] # Get the genes from the JSON object
|
|
27
|
-
fold_change = json_object['fold_change'] # Get the fold change values from the JSON object
|
|
28
|
-
num_permutations = json_object['num_permutations'] # Number of permutations for GSEA analysis
|
|
29
|
-
table_name = json_object['geneset_group'] # Get the gene set group from the JSON object
|
|
30
|
-
filter_non_coding_genes = json_object['filter_non_coding_genes'] # Get the filter_non_coding_genes flag from the JSON object
|
|
31
|
-
db = json_object['db'] # Get the database path from the JSON object
|
|
32
|
-
# Create a DataFrame for the signature
|
|
33
|
-
df = {'Genes': genes, 'fold_change': fold_change} # Create a dictionary with genes and fold change
|
|
34
|
-
signature = pd.DataFrame(df) # Convert the dictionary to a DataFrame
|
|
35
|
-
|
|
36
|
-
# Connect to the SQLite database
|
|
37
|
-
conn = sqlite3.connect(db) # Connect to the SQLite database
|
|
38
|
-
cursor = conn.cursor() # Create a cursor object
|
|
39
|
-
|
|
40
|
-
msigdb_library = {} # Initialize an empty dictionary for the gene set library
|
|
41
|
-
if table_name == "REACTOME--blitzgsea": # Parse from blitzgsea reactome library
|
|
42
|
-
msigdb_library = blitz.enrichr.get_library("Reactome_2022")
|
|
43
|
-
elif table_name == "KEGG--blitzgsea": # Parse from blitzgsea KEGG library
|
|
44
|
-
msigdb_library = blitz.enrichr.get_library("KEGG_2021_Human")
|
|
45
|
-
elif table_name == "WikiPathways--blitzgsea": # Parse from blitzgsea WikiPathways library
|
|
46
|
-
msigdb_library = blitz.enrichr.get_library("WikiPathways_2019_Human")
|
|
47
|
-
else: # Use geneset groups from msigdb
|
|
48
|
-
# Query to get gene set IDs
|
|
49
|
-
query = f"SELECT id FROM terms WHERE parent_id='{table_name}'" # SQL query to get gene set IDs
|
|
50
|
-
cursor.execute(query) # Execute the query
|
|
51
|
-
|
|
52
|
-
# Fetch all gene set IDs
|
|
53
|
-
rows = cursor.fetchall() # Fetch all rows from the executed query
|
|
54
|
-
|
|
55
|
-
start_loop_time = time.time() # Record the start time of the loop
|
|
56
|
-
|
|
57
|
-
# Iterate over gene set IDs and fetch corresponding genes
|
|
58
|
-
for row in rows:
|
|
59
|
-
query2 = f"SELECT genes FROM term2genes WHERE id='{row[0]}'" # SQL query to get genes for a gene set ID
|
|
60
|
-
cursor.execute(query2) # Execute the query
|
|
61
|
-
rows2 = cursor.fetchall() # Fetch all rows from the executed query
|
|
62
|
-
row3 = json.loads(rows2[0][0]) # Parse the JSON data
|
|
63
|
-
msigdb_library[row[0]] = list(set(map(extract_symbols, row3))) # Extract only unique gene symbols and add them to the library. "set" command selects only unique genes
|
|
64
|
-
#print ("msigdb_library:",msigdb_library)
|
|
65
|
-
|
|
66
|
-
# Close the cursor and connection to the database
|
|
67
|
-
cursor.close() # Close the cursor
|
|
68
|
-
conn.close() # Close the connection
|
|
69
|
-
|
|
70
|
-
stop_loop_time = time.time() # Record the stop time of the loop
|
|
71
|
-
execution_time = stop_loop_time - start_loop_time # Calculate the execution time
|
|
72
|
-
print(f"Execution time: {execution_time} seconds") # Print the execution time
|
|
73
|
-
|
|
74
|
-
# Filter out non-coding genes if specified
|
|
75
|
-
if filter_non_coding_genes:
|
|
76
|
-
coding_genes_query = "SELECT * FROM codingGenes" # SQL query to get coding genes
|
|
77
|
-
genedb = json_object['genedb'] # Get the gene database path from the JSON object
|
|
78
|
-
gene_conn = sqlite3.connect(genedb) # Connect to the gene database
|
|
79
|
-
gene_cursor = gene_conn.cursor() # Create a cursor object for the gene database
|
|
80
|
-
gene_cursor.execute(coding_genes_query) # Execute the query to get coding genes
|
|
81
|
-
coding_genes_list = gene_cursor.fetchall() # Fetch all coding genes
|
|
82
|
-
coding_genes_list = list(map(lambda x: x[0], coding_genes_list)) # Extract the gene symbols
|
|
83
|
-
signature = signature[signature['Genes'].isin(coding_genes_list)] # Filter the signature to include only coding genes
|
|
84
|
-
|
|
85
|
-
try:
|
|
86
|
-
# Check if geneset_name and pickle_file are present for generating the plot
|
|
87
|
-
geneset_name = json_object['geneset_name'] # Get the gene set name from the JSON object
|
|
88
|
-
pickle_file = json_object['pickle_file'] # Get the pickle file name from the JSON object
|
|
89
|
-
if os.path.isfile(os.path.join(cachedir, pickle_file)): # Check if the pickle file exists as it may not be in the same server that did the original GSEA computation
|
|
90
|
-
result = pd.read_pickle(os.path.join(cachedir, pickle_file)) # Load the result from the pickle file
|
|
91
|
-
fig = blitz.plot.running_sum(signature, geneset_name, msigdb_library, result=result.T, compact=True) # Generate the running sum plot
|
|
92
|
-
else: # If pickle file is not found, redo the GSEA computation from scratch
|
|
93
|
-
result = blitz.gsea(signature, msigdb_library, permutations=num_permutations).T # Perform GSEA computation and transpose the result
|
|
94
|
-
fig = blitz.plot.running_sum(signature, geneset_name, msigdb_library, result=result.T, compact=True) # Generate the running sum plot
|
|
95
|
-
result.to_pickle(os.path.join(cachedir, pickle_file)) # Save the result to a pickle file with same name
|
|
96
|
-
random_num = np.random.rand() # Generate a random number for unique png filename
|
|
97
|
-
png_filename = f"gsea_plot_{random_num}.png" # Create a filename for the plot
|
|
98
|
-
fig.savefig(os.path.join(cachedir, png_filename), bbox_inches='tight') # Save the plot as a PNG file
|
|
99
|
-
print(f'image: {{"image_file": "{png_filename}"}}') # Print the image file path in JSON format
|
|
100
|
-
except KeyError:
|
|
101
|
-
# Initial GSEA calculation and save the result to a pickle file
|
|
102
|
-
start_gsea_time = time.time() # Record the start time of GSEA
|
|
103
|
-
if __name__ == "__main__":
|
|
104
|
-
result = blitz.gsea(signature, msigdb_library, permutations=num_permutations).T # Perform GSEA computation and transpose the result
|
|
105
|
-
random_num = random.random() + time.time() # Generate a random number for unique pickle filename
|
|
106
|
-
pickle_filename = f"gsea_result_{random_num}.pkl" # Create a filename for the pickle file
|
|
107
|
-
result.to_pickle(os.path.join(cachedir, pickle_filename)) # Save the result to the pickle file
|
|
108
|
-
gsea_str = f'{{"data": {result.to_json()}}}' # Convert the result to JSON format
|
|
109
|
-
pickle_str = f'{{"pickle_file": "{pickle_filename}"}}' # Create a JSON string for the pickle file
|
|
110
|
-
gsea_dict = json.loads(gsea_str) # Parse the JSON string
|
|
111
|
-
pickle_dict = json.loads(pickle_str) # Parse the JSON string
|
|
112
|
-
result_dict = {**gsea_dict, **pickle_dict} # Merge the dictionaries
|
|
113
|
-
print(f"result: {json.dumps(result_dict)}") # Print the result in JSON format
|
|
114
|
-
stop_gsea_time = time.time() # Record the stop time of GSEA
|
|
115
|
-
gsea_time = stop_gsea_time - start_gsea_time # Calculate the GSEA execution time
|
|
116
|
-
print(f"GSEA time: {gsea_time} seconds") # Print the GSEA execution time
|
|
117
|
-
else:
|
|
118
|
-
pass # Do nothing if there is no input from stdin
|
|
119
|
-
except (EOFError, IOError):
|
|
120
|
-
pass # Handle EOFError and IOError exceptions gracefully
|
|
121
|
-
|
|
122
|
-
# Function to extract plot data for GSEA visualization (NOT currently being used, but will be used for generating client side gsea plots)
|
|
123
|
-
def extract_plot_data(signature, geneset, library, result, center=True):
|
|
124
|
-
print("signature", signature)
|
|
125
|
-
print("result", result)
|
|
126
|
-
print("geneset", geneset)
|
|
127
|
-
print("library", library)
|
|
128
|
-
signature = signature.copy() # Create a copy of the signature DataFrame
|
|
129
|
-
signature.columns = ["i", "v"] # Rename columns to 'i' and 'v'
|
|
130
|
-
signature = signature.sort_values("v", ascending=False).set_index("i") # Sort by 'v' in descending order and set 'i' as index
|
|
131
|
-
signature = signature[~signature.index.duplicated(keep='first')] # Remove duplicate indices, keeping the first occurrence
|
|
132
|
-
|
|
133
|
-
if center:
|
|
134
|
-
signature.loc[:, "v"] -= np.mean(signature.loc[:, "v"]) # Center the signature values by subtracting the mean
|
|
135
|
-
|
|
136
|
-
signature_map = {h: i for i, h in enumerate(signature.index)} # Create a mapping of signature indices
|
|
137
|
-
|
|
138
|
-
gs = set(library[geneset]) # Get the gene set from the library
|
|
139
|
-
hits = [i for i, x in enumerate(signature.index) if x in gs] # Find the indices of hits in the signature
|
|
140
|
-
|
|
141
|
-
running_sum, es = blitz.enrichment_score(np.array(np.abs(signature.iloc[:, 0])), signature_map, gs) # Compute running sum and enrichment score
|
|
142
|
-
running_sum = list(running_sum) # Convert running sum to a list
|
|
143
|
-
nn = np.where(np.abs(running_sum) == np.max(np.abs(running_sum)))[0][0] # Find the index of the maximum absolute running sum
|
|
144
|
-
|
|
145
|
-
running_sum_str = [str(elem) for elem in running_sum] # Convert running sum elements to strings
|
|
146
|
-
print(f'result: {{"nn": {nn}, "running_sum": "{",".join(running_sum_str)}", "es": {es}}}') # Print the result in JSON format
|
|
@@ -1,112 +0,0 @@
|
|
|
1
|
-
import nibabel as nib # Library for loading data from neuroimaging file formats such as NIfTI
|
|
2
|
-
import matplotlib.pyplot as plt
|
|
3
|
-
import matplotlib.colors as mcolors
|
|
4
|
-
import matplotlib.cm as cm
|
|
5
|
-
import numpy as np
|
|
6
|
-
import sys
|
|
7
|
-
import io
|
|
8
|
-
import json
|
|
9
|
-
|
|
10
|
-
if len(sys.argv) <= 1:
|
|
11
|
-
print('python3 '+sys.argv[0]+' <path/to/template/file> plane index filesJson.\n filesJson: dictionary containg sample files and color per category).\nplane: L (left, sagittal), F (front, coronal), T (top, axial)')
|
|
12
|
-
sys.exit(1)
|
|
13
|
-
|
|
14
|
-
plane = sys.argv[2]
|
|
15
|
-
if(plane != 'L' and plane != 'F' and plane != 'T'):
|
|
16
|
-
print('Invalid plane')
|
|
17
|
-
sys.exit(1)
|
|
18
|
-
index = sys.argv[3]
|
|
19
|
-
|
|
20
|
-
if(len(index) == 0):
|
|
21
|
-
print('Need to provide index')
|
|
22
|
-
sys.exit(1)
|
|
23
|
-
|
|
24
|
-
templateFile = sys.argv[1]
|
|
25
|
-
|
|
26
|
-
# load data from nifti files
|
|
27
|
-
template = nib.load(templateFile).get_fdata()
|
|
28
|
-
|
|
29
|
-
vmaxSamples = sys.argv[4]
|
|
30
|
-
|
|
31
|
-
if(len(vmaxSamples) == 0):
|
|
32
|
-
print('Need to provide max samples for normalization')
|
|
33
|
-
sys.exit(1)
|
|
34
|
-
vmaxSamples = int(vmaxSamples)
|
|
35
|
-
|
|
36
|
-
sampleFiles = json.loads(sys.argv[5])
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
index = int(index)
|
|
40
|
-
# (left, sagittal), f (front, coronal), t (top, axial)
|
|
41
|
-
if plane == 'L':
|
|
42
|
-
slice = template[index,:,:]
|
|
43
|
-
slice = np.rot90(slice)
|
|
44
|
-
elif plane == 'F':
|
|
45
|
-
slice = template[:,index,:]
|
|
46
|
-
slice = np.flip(np.rot90(slice),axis=1)
|
|
47
|
-
|
|
48
|
-
else:# plane == 'T'
|
|
49
|
-
slice = template[:,:,index]
|
|
50
|
-
slice = np.flip(np.rot90(slice),axis=1)
|
|
51
|
-
|
|
52
|
-
fig, ax = plt.subplots(1, 1)
|
|
53
|
-
ax.imshow(slice, 'gray', filternorm=False, vmin=0, vmax=100)
|
|
54
|
-
|
|
55
|
-
for key, value in sampleFiles.items():
|
|
56
|
-
if(len(value["samples"]) == 0) :
|
|
57
|
-
continue
|
|
58
|
-
# Load all sample files
|
|
59
|
-
sample_data = [nib.load(file_path).get_fdata() for file_path in value["samples"]]
|
|
60
|
-
|
|
61
|
-
# Initialize the result array with zeros
|
|
62
|
-
labels = np.zeros_like(sample_data[0])
|
|
63
|
-
|
|
64
|
-
# Sum all sample data
|
|
65
|
-
for data in sample_data:
|
|
66
|
-
labels += data
|
|
67
|
-
|
|
68
|
-
labels = np.ma.masked_where(labels == 0, labels) # Mask labels where they are 0
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
index = int(index)
|
|
72
|
-
# (left, sagittal), f (front, coronal), t (top, axial)
|
|
73
|
-
if plane == 'L':
|
|
74
|
-
label = labels[index,:,:]
|
|
75
|
-
label = np.rot90(label)
|
|
76
|
-
elif plane == 'F':
|
|
77
|
-
label = labels[:,index,:]
|
|
78
|
-
label = np.flip(np.rot90(label),axis=1)
|
|
79
|
-
else:# plane == 'T'
|
|
80
|
-
label = labels[:,:,index]
|
|
81
|
-
label = np.flip(np.rot90(label),axis=1)
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
# create three subplots for sagittal, coronal and axial plane
|
|
86
|
-
vmin = 0
|
|
87
|
-
vmax = 100
|
|
88
|
-
alpha = 0.6
|
|
89
|
-
|
|
90
|
-
color = value['color']
|
|
91
|
-
print(color)
|
|
92
|
-
cmap = mcolors.LinearSegmentedColormap.from_list('my_cmap', ['white', color])
|
|
93
|
-
ax.imshow(label, cmap, alpha=alpha, filternorm=False,vmin=0,vmax=vmaxSamples)
|
|
94
|
-
ax.axis('off')
|
|
95
|
-
|
|
96
|
-
# Create the color bar
|
|
97
|
-
# if showLegend == 1:
|
|
98
|
-
# # Create a color bar without changing figure size
|
|
99
|
-
# norm = mcolors.Normalize(vmin=0, vmax=vmaxSamples)
|
|
100
|
-
# sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
|
|
101
|
-
|
|
102
|
-
# cbar = plt.colorbar(sm, ax=ax, orientation='vertical', fraction=0.01, pad=0.05, alpha=alpha)
|
|
103
|
-
# cbar.set_label('Combined Intensity', color='white', fontsize=6, labelpad=-10)
|
|
104
|
-
# cbar.ax.text(0.5, 1.0001, vmaxSamples, ha='center', va='bottom', transform=cbar.ax.transAxes, color='white', fontsize=6)
|
|
105
|
-
# cbar.ax.text(0.5, -0.0001, 0, ha='center', va='top', transform=cbar.ax.transAxes, color='white', fontsize=6)
|
|
106
|
-
|
|
107
|
-
# Output the image data to stdout
|
|
108
|
-
buf = io.BytesIO()
|
|
109
|
-
plt.savefig(buf, format='png', bbox_inches='tight', facecolor='k')
|
|
110
|
-
buf.seek(0)
|
|
111
|
-
sys.stdout.buffer.write(buf.getvalue())
|
|
112
|
-
plt.close()
|