@sjcrh/proteinpaint-server 2.117.0 → 2.118.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -61,7 +61,7 @@ if (!serverconfig.clustalo) serverconfig.clustalo = 'clustalo'
61
61
  if (!serverconfig.Rscript) serverconfig.Rscript = 'Rscript'
62
62
  if (!serverconfig.gfServer) serverconfig.gfServer = 'gfServer'
63
63
  if (!serverconfig.gfClient) serverconfig.gfClient = 'gfClient'
64
- if (!serverconfig.python) serverconfig.python = 'python3'
64
+ // note: server/src/app.ts uses `setPythonBinPath()` from `@scjrch/proteinpaint-python`
65
65
  // NOTE: will set other cmd paths that require binpath after it's filled-in below
66
66
 
67
67
  /******************
package/utils/gsea.py DELETED
@@ -1,146 +0,0 @@
1
- # Test syntax: cat ~/sjpp/test.txt | time python gsea.py
2
- # test.txt contains the json string autogenerated by the commented out nodejs code.
3
- import blitzgsea as blitz
4
- import json
5
- import time
6
- import sys
7
- import sqlite3
8
- import random
9
- import os
10
- import numpy as np
11
- import pandas as pd
12
-
13
- # Helper function to extract gene symbols from a dictionary
14
- def extract_symbols(x):
15
- return x['symbol'] # Return the 'symbol' field from the dictionary
16
-
17
- # Main function
18
- try:
19
- # Check if there is input from stdin
20
- if sys.stdin.read(1):
21
- # Read each line from stdin
22
- for line in sys.stdin:
23
- # Parse the JSON input
24
- json_object = json.loads(line)
25
- cachedir = json_object['cachedir'] # Get the cache directory from the JSON object
26
- genes = json_object['genes'] # Get the genes from the JSON object
27
- fold_change = json_object['fold_change'] # Get the fold change values from the JSON object
28
- num_permutations = json_object['num_permutations'] # Number of permutations for GSEA analysis
29
- table_name = json_object['geneset_group'] # Get the gene set group from the JSON object
30
- filter_non_coding_genes = json_object['filter_non_coding_genes'] # Get the filter_non_coding_genes flag from the JSON object
31
- db = json_object['db'] # Get the database path from the JSON object
32
- # Create a DataFrame for the signature
33
- df = {'Genes': genes, 'fold_change': fold_change} # Create a dictionary with genes and fold change
34
- signature = pd.DataFrame(df) # Convert the dictionary to a DataFrame
35
-
36
- # Connect to the SQLite database
37
- conn = sqlite3.connect(db) # Connect to the SQLite database
38
- cursor = conn.cursor() # Create a cursor object
39
-
40
- msigdb_library = {} # Initialize an empty dictionary for the gene set library
41
- if table_name == "REACTOME--blitzgsea": # Parse from blitzgsea reactome library
42
- msigdb_library = blitz.enrichr.get_library("Reactome_2022")
43
- elif table_name == "KEGG--blitzgsea": # Parse from blitzgsea KEGG library
44
- msigdb_library = blitz.enrichr.get_library("KEGG_2021_Human")
45
- elif table_name == "WikiPathways--blitzgsea": # Parse from blitzgsea WikiPathways library
46
- msigdb_library = blitz.enrichr.get_library("WikiPathways_2019_Human")
47
- else: # Use geneset groups from msigdb
48
- # Query to get gene set IDs
49
- query = f"SELECT id FROM terms WHERE parent_id='{table_name}'" # SQL query to get gene set IDs
50
- cursor.execute(query) # Execute the query
51
-
52
- # Fetch all gene set IDs
53
- rows = cursor.fetchall() # Fetch all rows from the executed query
54
-
55
- start_loop_time = time.time() # Record the start time of the loop
56
-
57
- # Iterate over gene set IDs and fetch corresponding genes
58
- for row in rows:
59
- query2 = f"SELECT genes FROM term2genes WHERE id='{row[0]}'" # SQL query to get genes for a gene set ID
60
- cursor.execute(query2) # Execute the query
61
- rows2 = cursor.fetchall() # Fetch all rows from the executed query
62
- row3 = json.loads(rows2[0][0]) # Parse the JSON data
63
- msigdb_library[row[0]] = list(set(map(extract_symbols, row3))) # Extract only unique gene symbols and add them to the library. "set" command selects only unique genes
64
- #print ("msigdb_library:",msigdb_library)
65
-
66
- # Close the cursor and connection to the database
67
- cursor.close() # Close the cursor
68
- conn.close() # Close the connection
69
-
70
- stop_loop_time = time.time() # Record the stop time of the loop
71
- execution_time = stop_loop_time - start_loop_time # Calculate the execution time
72
- print(f"Execution time: {execution_time} seconds") # Print the execution time
73
-
74
- # Filter out non-coding genes if specified
75
- if filter_non_coding_genes:
76
- coding_genes_query = "SELECT * FROM codingGenes" # SQL query to get coding genes
77
- genedb = json_object['genedb'] # Get the gene database path from the JSON object
78
- gene_conn = sqlite3.connect(genedb) # Connect to the gene database
79
- gene_cursor = gene_conn.cursor() # Create a cursor object for the gene database
80
- gene_cursor.execute(coding_genes_query) # Execute the query to get coding genes
81
- coding_genes_list = gene_cursor.fetchall() # Fetch all coding genes
82
- coding_genes_list = list(map(lambda x: x[0], coding_genes_list)) # Extract the gene symbols
83
- signature = signature[signature['Genes'].isin(coding_genes_list)] # Filter the signature to include only coding genes
84
-
85
- try:
86
- # Check if geneset_name and pickle_file are present for generating the plot
87
- geneset_name = json_object['geneset_name'] # Get the gene set name from the JSON object
88
- pickle_file = json_object['pickle_file'] # Get the pickle file name from the JSON object
89
- if os.path.isfile(os.path.join(cachedir, pickle_file)): # Check if the pickle file exists as it may not be in the same server that did the original GSEA computation
90
- result = pd.read_pickle(os.path.join(cachedir, pickle_file)) # Load the result from the pickle file
91
- fig = blitz.plot.running_sum(signature, geneset_name, msigdb_library, result=result.T, compact=True) # Generate the running sum plot
92
- else: # If pickle file is not found, redo the GSEA computation from scratch
93
- result = blitz.gsea(signature, msigdb_library, permutations=num_permutations).T # Perform GSEA computation and transpose the result
94
- fig = blitz.plot.running_sum(signature, geneset_name, msigdb_library, result=result.T, compact=True) # Generate the running sum plot
95
- result.to_pickle(os.path.join(cachedir, pickle_file)) # Save the result to a pickle file with same name
96
- random_num = np.random.rand() # Generate a random number for unique png filename
97
- png_filename = f"gsea_plot_{random_num}.png" # Create a filename for the plot
98
- fig.savefig(os.path.join(cachedir, png_filename), bbox_inches='tight') # Save the plot as a PNG file
99
- print(f'image: {{"image_file": "{png_filename}"}}') # Print the image file path in JSON format
100
- except KeyError:
101
- # Initial GSEA calculation and save the result to a pickle file
102
- start_gsea_time = time.time() # Record the start time of GSEA
103
- if __name__ == "__main__":
104
- result = blitz.gsea(signature, msigdb_library, permutations=num_permutations).T # Perform GSEA computation and transpose the result
105
- random_num = random.random() + time.time() # Generate a random number for unique pickle filename
106
- pickle_filename = f"gsea_result_{random_num}.pkl" # Create a filename for the pickle file
107
- result.to_pickle(os.path.join(cachedir, pickle_filename)) # Save the result to the pickle file
108
- gsea_str = f'{{"data": {result.to_json()}}}' # Convert the result to JSON format
109
- pickle_str = f'{{"pickle_file": "{pickle_filename}"}}' # Create a JSON string for the pickle file
110
- gsea_dict = json.loads(gsea_str) # Parse the JSON string
111
- pickle_dict = json.loads(pickle_str) # Parse the JSON string
112
- result_dict = {**gsea_dict, **pickle_dict} # Merge the dictionaries
113
- print(f"result: {json.dumps(result_dict)}") # Print the result in JSON format
114
- stop_gsea_time = time.time() # Record the stop time of GSEA
115
- gsea_time = stop_gsea_time - start_gsea_time # Calculate the GSEA execution time
116
- print(f"GSEA time: {gsea_time} seconds") # Print the GSEA execution time
117
- else:
118
- pass # Do nothing if there is no input from stdin
119
- except (EOFError, IOError):
120
- pass # Handle EOFError and IOError exceptions gracefully
121
-
122
- # Function to extract plot data for GSEA visualization (NOT currently being used, but will be used for generating client side gsea plots)
123
- def extract_plot_data(signature, geneset, library, result, center=True):
124
- print("signature", signature)
125
- print("result", result)
126
- print("geneset", geneset)
127
- print("library", library)
128
- signature = signature.copy() # Create a copy of the signature DataFrame
129
- signature.columns = ["i", "v"] # Rename columns to 'i' and 'v'
130
- signature = signature.sort_values("v", ascending=False).set_index("i") # Sort by 'v' in descending order and set 'i' as index
131
- signature = signature[~signature.index.duplicated(keep='first')] # Remove duplicate indices, keeping the first occurrence
132
-
133
- if center:
134
- signature.loc[:, "v"] -= np.mean(signature.loc[:, "v"]) # Center the signature values by subtracting the mean
135
-
136
- signature_map = {h: i for i, h in enumerate(signature.index)} # Create a mapping of signature indices
137
-
138
- gs = set(library[geneset]) # Get the gene set from the library
139
- hits = [i for i, x in enumerate(signature.index) if x in gs] # Find the indices of hits in the signature
140
-
141
- running_sum, es = blitz.enrichment_score(np.array(np.abs(signature.iloc[:, 0])), signature_map, gs) # Compute running sum and enrichment score
142
- running_sum = list(running_sum) # Convert running sum to a list
143
- nn = np.where(np.abs(running_sum) == np.max(np.abs(running_sum)))[0][0] # Find the index of the maximum absolute running sum
144
-
145
- running_sum_str = [str(elem) for elem in running_sum] # Convert running sum elements to strings
146
- print(f'result: {{"nn": {nn}, "running_sum": "{",".join(running_sum_str)}", "es": {es}}}') # Print the result in JSON format
@@ -1,112 +0,0 @@
1
- import nibabel as nib # Library for loading data from neuroimaging file formats such as NIfTI
2
- import matplotlib.pyplot as plt
3
- import matplotlib.colors as mcolors
4
- import matplotlib.cm as cm
5
- import numpy as np
6
- import sys
7
- import io
8
- import json
9
-
10
- if len(sys.argv) <= 1:
11
- print('python3 '+sys.argv[0]+' <path/to/template/file> plane index filesJson.\n filesJson: dictionary containg sample files and color per category).\nplane: L (left, sagittal), F (front, coronal), T (top, axial)')
12
- sys.exit(1)
13
-
14
- plane = sys.argv[2]
15
- if(plane != 'L' and plane != 'F' and plane != 'T'):
16
- print('Invalid plane')
17
- sys.exit(1)
18
- index = sys.argv[3]
19
-
20
- if(len(index) == 0):
21
- print('Need to provide index')
22
- sys.exit(1)
23
-
24
- templateFile = sys.argv[1]
25
-
26
- # load data from nifti files
27
- template = nib.load(templateFile).get_fdata()
28
-
29
- vmaxSamples = sys.argv[4]
30
-
31
- if(len(vmaxSamples) == 0):
32
- print('Need to provide max samples for normalization')
33
- sys.exit(1)
34
- vmaxSamples = int(vmaxSamples)
35
-
36
- sampleFiles = json.loads(sys.argv[5])
37
-
38
-
39
- index = int(index)
40
- # (left, sagittal), f (front, coronal), t (top, axial)
41
- if plane == 'L':
42
- slice = template[index,:,:]
43
- slice = np.rot90(slice)
44
- elif plane == 'F':
45
- slice = template[:,index,:]
46
- slice = np.flip(np.rot90(slice),axis=1)
47
-
48
- else:# plane == 'T'
49
- slice = template[:,:,index]
50
- slice = np.flip(np.rot90(slice),axis=1)
51
-
52
- fig, ax = plt.subplots(1, 1)
53
- ax.imshow(slice, 'gray', filternorm=False, vmin=0, vmax=100)
54
-
55
- for key, value in sampleFiles.items():
56
- if(len(value["samples"]) == 0) :
57
- continue
58
- # Load all sample files
59
- sample_data = [nib.load(file_path).get_fdata() for file_path in value["samples"]]
60
-
61
- # Initialize the result array with zeros
62
- labels = np.zeros_like(sample_data[0])
63
-
64
- # Sum all sample data
65
- for data in sample_data:
66
- labels += data
67
-
68
- labels = np.ma.masked_where(labels == 0, labels) # Mask labels where they are 0
69
-
70
-
71
- index = int(index)
72
- # (left, sagittal), f (front, coronal), t (top, axial)
73
- if plane == 'L':
74
- label = labels[index,:,:]
75
- label = np.rot90(label)
76
- elif plane == 'F':
77
- label = labels[:,index,:]
78
- label = np.flip(np.rot90(label),axis=1)
79
- else:# plane == 'T'
80
- label = labels[:,:,index]
81
- label = np.flip(np.rot90(label),axis=1)
82
-
83
-
84
-
85
- # create three subplots for sagittal, coronal and axial plane
86
- vmin = 0
87
- vmax = 100
88
- alpha = 0.6
89
-
90
- color = value['color']
91
- print(color)
92
- cmap = mcolors.LinearSegmentedColormap.from_list('my_cmap', ['white', color])
93
- ax.imshow(label, cmap, alpha=alpha, filternorm=False,vmin=0,vmax=vmaxSamples)
94
- ax.axis('off')
95
-
96
- # Create the color bar
97
- # if showLegend == 1:
98
- # # Create a color bar without changing figure size
99
- # norm = mcolors.Normalize(vmin=0, vmax=vmaxSamples)
100
- # sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
101
-
102
- # cbar = plt.colorbar(sm, ax=ax, orientation='vertical', fraction=0.01, pad=0.05, alpha=alpha)
103
- # cbar.set_label('Combined Intensity', color='white', fontsize=6, labelpad=-10)
104
- # cbar.ax.text(0.5, 1.0001, vmaxSamples, ha='center', va='bottom', transform=cbar.ax.transAxes, color='white', fontsize=6)
105
- # cbar.ax.text(0.5, -0.0001, 0, ha='center', va='top', transform=cbar.ax.transAxes, color='white', fontsize=6)
106
-
107
- # Output the image data to stdout
108
- buf = io.BytesIO()
109
- plt.savefig(buf, format='png', bbox_inches='tight', facecolor='k')
110
- buf.seek(0)
111
- sys.stdout.buffer.write(buf.getvalue())
112
- plt.close()