@sjcrh/proteinpaint-rust 2.58.0 → 2.60.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/downloadBinariesOrCompileSource.js +119 -0
- package/package.json +8 -3
- package/src/DEanalysis.rs +27 -6
- package/src/genesetORA.rs +15 -7
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
const https = require('https')
|
|
2
|
+
const fs = require('fs')
|
|
3
|
+
const os = require('os')
|
|
4
|
+
const path = require('path')
|
|
5
|
+
const { exec } = require('child_process')
|
|
6
|
+
const tar = require('tar')
|
|
7
|
+
|
|
8
|
+
// Read package.json to get version and pp_release_tag
|
|
9
|
+
const packageJson = require(path.join(__dirname, 'package.json'))
|
|
10
|
+
const { version, pp_release_tag } = packageJson
|
|
11
|
+
|
|
12
|
+
const targetDirectory = './target/release'
|
|
13
|
+
|
|
14
|
+
function downloadBinary(url, outputPath) {
|
|
15
|
+
const file = fs.createWriteStream(outputPath)
|
|
16
|
+
https
|
|
17
|
+
.get(url, function (response) {
|
|
18
|
+
if (response.statusCode >= 200 && response.statusCode <= 299) {
|
|
19
|
+
response.pipe(file)
|
|
20
|
+
file.on('finish', function () {
|
|
21
|
+
file.close()
|
|
22
|
+
console.log('Pre-compiled binaries download completed.')
|
|
23
|
+
extractAndClean(outputPath)
|
|
24
|
+
})
|
|
25
|
+
} else if (response.statusCode >= 300 && response.statusCode <= 399) {
|
|
26
|
+
if (response.headers.location) {
|
|
27
|
+
downloadBinary(response.headers.location, outputPath)
|
|
28
|
+
} else {
|
|
29
|
+
console.error('Redirection without location header encountered.')
|
|
30
|
+
compileFromSource()
|
|
31
|
+
}
|
|
32
|
+
} else {
|
|
33
|
+
console.error('Failed to download the binary. Compiling from source...')
|
|
34
|
+
compileFromSource()
|
|
35
|
+
}
|
|
36
|
+
})
|
|
37
|
+
.on('error', function (err) {
|
|
38
|
+
console.error('Error downloading the file:', err.message)
|
|
39
|
+
compileFromSource()
|
|
40
|
+
})
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
function compileFromSource() {
|
|
44
|
+
console.log('Starting compilation from source...')
|
|
45
|
+
const compileProcess = exec('cargo build --release', { cwd: path.join(__dirname) })
|
|
46
|
+
|
|
47
|
+
compileProcess.stdout.on('data', data => {
|
|
48
|
+
console.log(data.toString()) // Print standard output from cargo to console
|
|
49
|
+
})
|
|
50
|
+
|
|
51
|
+
compileProcess.stderr.on('data', data => {
|
|
52
|
+
console.error(data.toString()) // Print standard error from cargo to console as error
|
|
53
|
+
})
|
|
54
|
+
|
|
55
|
+
compileProcess.on('exit', code => {
|
|
56
|
+
if (code === 0) {
|
|
57
|
+
console.log('Compilation successful.')
|
|
58
|
+
} else {
|
|
59
|
+
console.error(`Compilation failed with exit code ${code}`)
|
|
60
|
+
}
|
|
61
|
+
})
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
function extractAndClean(tarPath) {
|
|
65
|
+
// Ensure target directory exists
|
|
66
|
+
fs.mkdirSync(targetDirectory, { recursive: true })
|
|
67
|
+
|
|
68
|
+
// Extract tar.gz file to target/release directory
|
|
69
|
+
tar
|
|
70
|
+
.x({
|
|
71
|
+
file: tarPath,
|
|
72
|
+
cwd: targetDirectory // Change directory to target/release
|
|
73
|
+
})
|
|
74
|
+
.then(() => {
|
|
75
|
+
console.log('Extraction complete.')
|
|
76
|
+
makeBinariesExecutable()
|
|
77
|
+
// Remove the tar file after successful extraction
|
|
78
|
+
fs.unlink(tarPath, err => {
|
|
79
|
+
if (err) {
|
|
80
|
+
console.error('Error removing the tar file:', err)
|
|
81
|
+
} else {
|
|
82
|
+
console.log('Tar file removed.')
|
|
83
|
+
}
|
|
84
|
+
})
|
|
85
|
+
})
|
|
86
|
+
.catch(err => {
|
|
87
|
+
console.error('Error during extraction:', err)
|
|
88
|
+
})
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
function makeBinariesExecutable() {
|
|
92
|
+
// Construct the full path to the directory
|
|
93
|
+
const fullPath = path.join(__dirname, targetDirectory)
|
|
94
|
+
const command = `chmod -R +x ${fullPath}`
|
|
95
|
+
|
|
96
|
+
exec(command, (error, stdout, stderr) => {
|
|
97
|
+
if (error) {
|
|
98
|
+
console.error(`Error setting executable permissions: ${error.message}`)
|
|
99
|
+
return
|
|
100
|
+
}
|
|
101
|
+
if (stderr) {
|
|
102
|
+
console.error(`Error output from chmod: ${stderr}`)
|
|
103
|
+
return
|
|
104
|
+
}
|
|
105
|
+
console.log(`Set executable permissions for all files in ${fullPath}`)
|
|
106
|
+
})
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
const architecture = os.arch()
|
|
110
|
+
let binaryUrl = ''
|
|
111
|
+
|
|
112
|
+
if (architecture === 'x64') {
|
|
113
|
+
binaryUrl = `https://github.com/stjude/proteinpaint/releases/download/${pp_release_tag}/rust-binaries-${version}-linux-x64.tar.gz`
|
|
114
|
+
const outputPath = path.join(__dirname, 'binaries.tar.gz')
|
|
115
|
+
downloadBinary(binaryUrl, outputPath)
|
|
116
|
+
} else {
|
|
117
|
+
console.log('Unsupported architecture, attempting to compile from source...')
|
|
118
|
+
compileFromSource()
|
|
119
|
+
}
|
package/package.json
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
{
|
|
2
|
-
"version": "2.
|
|
2
|
+
"version": "2.60.0",
|
|
3
3
|
"name": "@sjcrh/proteinpaint-rust",
|
|
4
4
|
"description": "Rust-based utilities for proteinpaint",
|
|
5
5
|
"main": "index.js",
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
"scripts": {
|
|
10
10
|
"dev": "cargo build --release",
|
|
11
11
|
"build": "cargo build --release",
|
|
12
|
-
"postinstall": "if [ ! -d ./test ] & [ ! -d ./target/release ]; then
|
|
12
|
+
"postinstall": "if [ ! -d ./test ] & [ ! -d ./target/release ]; then node ./downloadBinariesOrCompileSource.js; fi",
|
|
13
13
|
"test": "tape **/test/*.spec.js",
|
|
14
14
|
"test:unit": "tape **/test/*.unit.spec.js",
|
|
15
15
|
"test:integration": "echo 'TODO: rust integration tests'"
|
|
@@ -25,13 +25,18 @@
|
|
|
25
25
|
"index.js",
|
|
26
26
|
"Cargo.toml",
|
|
27
27
|
"src",
|
|
28
|
+
"downloadBinariesOrCompileSource.js",
|
|
28
29
|
"LICENSE/*"
|
|
29
30
|
],
|
|
30
31
|
"bugs": {
|
|
31
32
|
"url": "https://github.com/stjude/proteinpaint"
|
|
32
33
|
},
|
|
33
34
|
"homepage": "https://github.com/stjude/proteinpaint#readme",
|
|
35
|
+
"dependencies": {
|
|
36
|
+
"tar": "^7.1.0"
|
|
37
|
+
},
|
|
34
38
|
"devDependencies": {
|
|
35
39
|
"tape": "^5.2.2"
|
|
36
|
-
}
|
|
40
|
+
},
|
|
41
|
+
"pp_release_tag": "v2.60.0"
|
|
37
42
|
}
|
package/src/DEanalysis.rs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
// cd .. && cargo build --release && json='{"case":"SJMB030827,SJMB030838,SJMB032893,SJMB031131,SJMB031227","control":"SJMB030488,SJMB030825,SJMB031110","input_file":"/Users/rpaul1/pp_data/files/hg38/sjmb12/rnaseq/geneCounts.txt"}' && time echo $json | target/release/DEanalysis
|
|
1
|
+
// cd .. && cargo build --release && json='{"min_count":10,"min_total_count":15,"case":"SJMB030827,SJMB030838,SJMB032893,SJMB031131,SJMB031227","control":"SJMB030488,SJMB030825,SJMB031110","input_file":"/Users/rpaul1/pp_data/files/hg38/sjmb12/rnaseq/geneCounts.txt"}' && time echo $json | target/release/DEanalysis
|
|
2
2
|
// cd .. && cargo build --release && time cat ~/sjpp/test.txt | target/release/DEanalysis
|
|
3
3
|
#![allow(non_snake_case)]
|
|
4
4
|
use json;
|
|
@@ -368,6 +368,22 @@ fn main() {
|
|
|
368
368
|
match input_json {
|
|
369
369
|
Ok(json_string) => {
|
|
370
370
|
let now = Instant::now();
|
|
371
|
+
let min_count_option = json_string["min_count"].as_f64().to_owned();
|
|
372
|
+
let min_total_count_option = json_string["min_total_count"].as_f64().to_owned();
|
|
373
|
+
let min_count;
|
|
374
|
+
match min_count_option {
|
|
375
|
+
Some(x) => min_count = x,
|
|
376
|
+
None => {
|
|
377
|
+
panic!("min_count is missing a value")
|
|
378
|
+
}
|
|
379
|
+
}
|
|
380
|
+
let min_total_count;
|
|
381
|
+
match min_total_count_option {
|
|
382
|
+
Some(x) => min_total_count = x,
|
|
383
|
+
None => {
|
|
384
|
+
panic!("min_total_count is missing a value")
|
|
385
|
+
}
|
|
386
|
+
}
|
|
371
387
|
let case_string = &json_string["case"].to_owned().as_str().unwrap().to_string();
|
|
372
388
|
let control_string = &json_string["control"]
|
|
373
389
|
.to_owned()
|
|
@@ -388,6 +404,8 @@ fn main() {
|
|
|
388
404
|
let filtering_time = Instant::now();
|
|
389
405
|
let (filtered_matrix, lib_sizes, filtered_genes, filtered_gene_symbols) =
|
|
390
406
|
filter_by_expr(
|
|
407
|
+
min_count,
|
|
408
|
+
min_total_count,
|
|
391
409
|
&input_matrix,
|
|
392
410
|
case_indexes.len(),
|
|
393
411
|
control_indexes.len(),
|
|
@@ -671,6 +689,7 @@ fn adjust_p_values_bonferroni(original_p_values: Vec<PValueIndexes>) -> Vec<Adju
|
|
|
671
689
|
adjusted_p_values
|
|
672
690
|
}
|
|
673
691
|
|
|
692
|
+
// Original TMM normalization source code in edgeR: https://rdrr.io/bioc/edgeR/src/R/calcNormFactors.R
|
|
674
693
|
fn tmm_normalization(
|
|
675
694
|
input_matrix: Matrix<f64, Dyn, Dyn, VecStorage<f64, Dyn, Dyn>>,
|
|
676
695
|
lib_sizes: &Vec<f64>,
|
|
@@ -947,7 +966,10 @@ fn calc_quantile(mut input: Vec<f64>, p: f64) -> f64 {
|
|
|
947
966
|
qs_final
|
|
948
967
|
}
|
|
949
968
|
|
|
969
|
+
// Original filterByExpr source code in edgeR: https://rdrr.io/bioc/edgeR/src/R/filterByExpr.R
|
|
950
970
|
fn filter_by_expr(
|
|
971
|
+
min_count: f64,
|
|
972
|
+
min_total_count: f64,
|
|
951
973
|
raw_data: &Matrix<f64, Dyn, Dyn, VecStorage<f64, Dyn, Dyn>>,
|
|
952
974
|
num_diseased: usize,
|
|
953
975
|
num_control: usize,
|
|
@@ -960,9 +982,8 @@ fn filter_by_expr(
|
|
|
960
982
|
Vec<String>,
|
|
961
983
|
) {
|
|
962
984
|
// Matrix<f64, Dyn, Dyn, VecStorage<f64, Dyn, Dyn>>
|
|
963
|
-
|
|
964
|
-
const
|
|
965
|
-
const MIN_TOTAL_COUNT: f64 = 15.0; // Value of constant from R implementation
|
|
985
|
+
//const min_count: f64 = 10.0; // Value of constant from R implementation
|
|
986
|
+
//const min_total_count: f64 = 15.0; // Value of constant from R implementation
|
|
966
987
|
const LARGE_N: f64 = 10.0; // Value of constant from R implementation
|
|
967
988
|
const MIN_PROP: f64 = 0.7; // Value of constant from R implementation
|
|
968
989
|
|
|
@@ -989,7 +1010,7 @@ fn filter_by_expr(
|
|
|
989
1010
|
//println!("lib_sizes:{:?}", lib_sizes);
|
|
990
1011
|
//println!("min_sample_size:{}", min_sample_size);
|
|
991
1012
|
let median_lib_size = Data::new(lib_sizes.clone()).median();
|
|
992
|
-
let cpm_cutoff = (
|
|
1013
|
+
let cpm_cutoff = (min_count / median_lib_size) * 1000000.0;
|
|
993
1014
|
//println!("cpm_cutoff:{}", cpm_cutoff);
|
|
994
1015
|
let cpm_matrix = cpm(&raw_data);
|
|
995
1016
|
const TOL: f64 = 1e-14; // Value of constant from R implementation
|
|
@@ -1018,7 +1039,7 @@ fn filter_by_expr(
|
|
|
1018
1039
|
//}
|
|
1019
1040
|
|
|
1020
1041
|
let mut keep_total_bool = false;
|
|
1021
|
-
if row_sums[(row, 0)] as f64 >=
|
|
1042
|
+
if row_sums[(row, 0)] as f64 >= min_total_count - TOL {
|
|
1022
1043
|
keep_total_bool = true;
|
|
1023
1044
|
//keep_total.push(keep_total_bool);
|
|
1024
1045
|
//positive_total += 1;
|
package/src/genesetORA.rs
CHANGED
|
@@ -112,12 +112,12 @@ fn main() -> Result<()> {
|
|
|
112
112
|
+ &genesetgroup
|
|
113
113
|
+ "'"),
|
|
114
114
|
);
|
|
115
|
+
let mut iter = 0;
|
|
115
116
|
match stmt_result {
|
|
116
117
|
Ok(mut stmt) => {
|
|
117
118
|
#[allow(non_snake_case)]
|
|
118
119
|
let GO_iter =
|
|
119
120
|
stmt.query_map([], |row| Ok(GO_pathway { GO_id: row.get(0)? }))?;
|
|
120
|
-
let mut iter = 0;
|
|
121
121
|
#[allow(non_snake_case)]
|
|
122
122
|
for GO_term in GO_iter {
|
|
123
123
|
iter += 1;
|
|
@@ -178,14 +178,15 @@ fn main() -> Result<()> {
|
|
|
178
178
|
}
|
|
179
179
|
}
|
|
180
180
|
}
|
|
181
|
-
println!("Number of pathway entries:{}", iter);
|
|
182
181
|
}
|
|
183
182
|
Err(_) => panic!("sqlite database file not found"),
|
|
184
183
|
}
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
184
|
+
let output_string = "{\"num_pathways\":".to_string()
|
|
185
|
+
+ &iter.to_string()
|
|
186
|
+
+ &",\"pathways\":"
|
|
187
|
+
+ &adjust_p_values(pathway_p_values, num_items_output)
|
|
188
|
+
+ &"}";
|
|
189
|
+
println!("pathway_p_values:{}", output_string);
|
|
189
190
|
println!(
|
|
190
191
|
"Time for calculating gene overrepresentation:{:?}",
|
|
191
192
|
run_time.elapsed()
|
|
@@ -199,7 +200,10 @@ fn main() -> Result<()> {
|
|
|
199
200
|
Ok(())
|
|
200
201
|
}
|
|
201
202
|
|
|
202
|
-
fn adjust_p_values(
|
|
203
|
+
fn adjust_p_values(
|
|
204
|
+
mut original_p_values: Vec<pathway_p_value>,
|
|
205
|
+
mut num_items_output: usize,
|
|
206
|
+
) -> String {
|
|
203
207
|
// Sorting p-values in ascending order
|
|
204
208
|
original_p_values.as_mut_slice().sort_by(|a, b| {
|
|
205
209
|
(a.p_value_original)
|
|
@@ -243,6 +247,10 @@ fn adjust_p_values(mut original_p_values: Vec<pathway_p_value>, num_items_output
|
|
|
243
247
|
.unwrap_or(Ordering::Equal)
|
|
244
248
|
});
|
|
245
249
|
|
|
250
|
+
if num_items_output > adjusted_p_values.len() {
|
|
251
|
+
num_items_output = adjusted_p_values.len()
|
|
252
|
+
}
|
|
253
|
+
|
|
246
254
|
let mut output_string = "[".to_string();
|
|
247
255
|
for i in 0..num_items_output {
|
|
248
256
|
let j = adjusted_p_values.len() - i - 1;
|