DomFun 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +11 -0
- data/.rspec +3 -0
- data/.travis.yml +7 -0
- data/DomFun.gemspec +44 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +39 -0
- data/Rakefile +6 -0
- data/bin/add_protein_functional_families.rb +133 -0
- data/bin/console +14 -0
- data/bin/domains_to_function_predictor.rb +287 -0
- data/bin/generate_CAFA2_dataset.rb +135 -0
- data/bin/generate_CAFA2_tripartite_network.rb +139 -0
- data/bin/generate_cafa_control.rb +45 -0
- data/bin/get_kegg_pathways.R +12 -0
- data/bin/lines.R +74 -0
- data/bin/merge_pairs.rb +139 -0
- data/bin/normalize_combined_scores.rb +118 -0
- data/bin/prepare_cafa_network.rb +96 -0
- data/bin/setup +8 -0
- data/bin/standardize_scores.R +53 -0
- data/bin/translate_kegg_genes2pathways.rb +98 -0
- data/bin/validate_ProtFunSys_predictions.rb +174 -0
- data/lib/DomFun.rb +6 -0
- data/lib/DomFun/generalMethods.rb +105 -0
- data/lib/DomFun/version.rb +3 -0
- metadata +128 -0
@@ -0,0 +1,118 @@
|
|
1
|
+
#! /usr/bin/env ruby
|
2
|
+
##########################
|
3
|
+
# Rojano E. & Seoane P., July 2019
|
4
|
+
# Normalize combined scores for their use with CAFA validation
|
5
|
+
##########################
|
6
|
+
|
7
|
+
require 'optparse'
|
8
|
+
|
9
|
+
##########################
|
10
|
+
#METHODS
|
11
|
+
##########################
|
12
|
+
module Enumerable
|
13
|
+
|
14
|
+
def sum
|
15
|
+
self.inject(0){|accum, i| accum + i }
|
16
|
+
end
|
17
|
+
|
18
|
+
def mean
|
19
|
+
self.sum/self.length.to_f
|
20
|
+
end
|
21
|
+
|
22
|
+
def sample_variance
|
23
|
+
m = self.mean
|
24
|
+
sum = self.inject(0){|accum, i| accum +(i-m)**2 }
|
25
|
+
sum/(self.length - 1).to_f
|
26
|
+
end
|
27
|
+
|
28
|
+
def standard_deviation
|
29
|
+
Math.sqrt(self.sample_variance)
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
33
|
+
|
34
|
+
def load_predictions(input_file)
|
35
|
+
predictions_data = []
|
36
|
+
File.open(input_file).each do |line|
|
37
|
+
line.chomp!
|
38
|
+
protID, domains, funSys, combScore = line.split("\t")
|
39
|
+
predictions_data << [protID, domains, funSys, combScore.to_f]
|
40
|
+
end
|
41
|
+
return predictions_data
|
42
|
+
end
|
43
|
+
|
44
|
+
##########################
|
45
|
+
#OPT-PARSER
|
46
|
+
##########################
|
47
|
+
options = {}
|
48
|
+
OptionParser.new do |opts|
|
49
|
+
opts.banner = "Usage: #{__FILE__} [options]"
|
50
|
+
|
51
|
+
options[:input_file] = nil
|
52
|
+
opts.on("-a", "--input_file PATH", "Input file with association values to normalize") do |data|
|
53
|
+
options[:input_file] = data
|
54
|
+
end
|
55
|
+
|
56
|
+
options[:integration_method] = 'fisher'
|
57
|
+
opts.on("-i", "--integration_method STRING", "Integration method") do |data|
|
58
|
+
options[:integration_method] = data
|
59
|
+
end
|
60
|
+
|
61
|
+
options[:normalization_mode] = 'normal'
|
62
|
+
opts.on("-m", "--normalization_mode STRING", "Normalization mode: normal or max") do |data|
|
63
|
+
options[:normalization_mode] = data
|
64
|
+
end
|
65
|
+
|
66
|
+
options[:output_file] = 'normalized_associations.txt'
|
67
|
+
opts.on("-o", "--output_file PATH", "Output association file with normalized values") do |data|
|
68
|
+
options[:output_file] = data
|
69
|
+
end
|
70
|
+
|
71
|
+
opts.on_tail("-h", "--help", "Tool information") do
|
72
|
+
puts opts
|
73
|
+
exit
|
74
|
+
end
|
75
|
+
|
76
|
+
end.parse!
|
77
|
+
|
78
|
+
##########################
|
79
|
+
#MAIN
|
80
|
+
##########################
|
81
|
+
predictions_data = load_predictions(options[:input_file])
|
82
|
+
standardized_data = []
|
83
|
+
|
84
|
+
if options[:integration_method] == 'fisher'
|
85
|
+
predictions_data.each do |protID, domains, funSys, combScore|
|
86
|
+
stdScore = 1 - combScore
|
87
|
+
#CAFA validation score must be in range (0,1]
|
88
|
+
#Remember that fisher's integration most significant value is 0
|
89
|
+
standardized_data << [protID, domains, funSys, stdScore] if stdScore > 0.001
|
90
|
+
end
|
91
|
+
else
|
92
|
+
#https://www.researchgate.net/post/How_do_i_normalize_data_from_0_to_1_range
|
93
|
+
combScores = predictions_data.map{|a| a[3] }
|
94
|
+
combScoresAverage = combScores.mean
|
95
|
+
combScoresSD = combScores.standard_deviation
|
96
|
+
maxCombScore = combScores.max
|
97
|
+
predictions_data.each do |protID, domains, funSys, combScore|
|
98
|
+
if options[:normalization_mode] == 'normal'
|
99
|
+
score = (combScore - combScoresAverage).fdiv(combScoresSD)
|
100
|
+
if score > 2
|
101
|
+
score = 2
|
102
|
+
elsif score < -2
|
103
|
+
score = -2
|
104
|
+
end
|
105
|
+
stdScore = score.fdiv(4) + 0.5
|
106
|
+
elsif options[:normalization_mode] == 'max'
|
107
|
+
stdScore = combScore.fdiv(maxCombScore)
|
108
|
+
end
|
109
|
+
#CAFA validation score must be in range (0,1]
|
110
|
+
standardized_data << [protID, domains, funSys, stdScore] #if stdScore > 0.001
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
handler = File.open(options[:output_file], 'w')
|
115
|
+
standardized_data.each do |data|
|
116
|
+
handler.puts data.join("\t")
|
117
|
+
end
|
118
|
+
handler.close
|
@@ -0,0 +1,96 @@
|
|
1
|
+
#! /usr/bin/env ruby
|
2
|
+
##########################
|
3
|
+
# Rojano E. & Seoane P., July 2019
|
4
|
+
# Generate GO tripartite networks filtered with CAFA data
|
5
|
+
# For its use as system control (only GO)
|
6
|
+
##########################
|
7
|
+
|
8
|
+
REPORT_FOLDER=File.expand_path(File.join(File.dirname(__FILE__), '..', 'templates'))
|
9
|
+
ROOT_PATH = File.dirname(__FILE__)
|
10
|
+
$: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'DomFun'))
|
11
|
+
require 'optparse'
|
12
|
+
require 'generalMethods.rb'
|
13
|
+
|
14
|
+
##########################
|
15
|
+
#METHODS
|
16
|
+
##########################
|
17
|
+
|
18
|
+
def load_network_data(network)
|
19
|
+
go_gene_rels = {}
|
20
|
+
domain_gene_rels = {}
|
21
|
+
File.open(network).each do |line|
|
22
|
+
line.chomp!
|
23
|
+
term, gene = line.split("\t")
|
24
|
+
if term.include?('GO:')
|
25
|
+
query = go_gene_rels[gene]
|
26
|
+
if query.nil?
|
27
|
+
go_gene_rels[gene] = [term]
|
28
|
+
else
|
29
|
+
query << term
|
30
|
+
end
|
31
|
+
else
|
32
|
+
query = domain_gene_rels[gene]
|
33
|
+
if query.nil?
|
34
|
+
domain_gene_rels[gene] = [term]
|
35
|
+
else
|
36
|
+
query << term
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
return go_gene_rels, domain_gene_rels
|
41
|
+
end
|
42
|
+
|
43
|
+
def check_genes(term_protein_rels, cafa_data)
|
44
|
+
cafa_data.each do |gene, go_list|
|
45
|
+
term_protein_rels.delete(gene)
|
46
|
+
end
|
47
|
+
return term_protein_rels
|
48
|
+
end
|
49
|
+
##########################
|
50
|
+
#OPT-PARSER
|
51
|
+
##########################
|
52
|
+
options = {}
|
53
|
+
OptionParser.new do |opts|
|
54
|
+
opts.banner = "Usage: #{__FILE__} [options]"
|
55
|
+
|
56
|
+
options[:cafa_data] = nil
|
57
|
+
opts.on("-a", "--cafa_data PATH", "Input CAFA gene annotations") do |data|
|
58
|
+
options[:cafa_data] = data
|
59
|
+
end
|
60
|
+
|
61
|
+
options[:input_network] = nil
|
62
|
+
opts.on("-n", "--input_network PATH", "Input network to parse") do |data|
|
63
|
+
options[:input_network] = data
|
64
|
+
end
|
65
|
+
|
66
|
+
options[:output_network] = 'cafa_network.txt'
|
67
|
+
opts.on("-o", "--output_network PATH", "Output network without CAFA proteins") do |data|
|
68
|
+
options[:output_network] = data
|
69
|
+
end
|
70
|
+
|
71
|
+
opts.on_tail("-h", "--help", "Show tool help") do
|
72
|
+
puts opts
|
73
|
+
exit
|
74
|
+
end
|
75
|
+
|
76
|
+
end.parse!
|
77
|
+
|
78
|
+
##########################
|
79
|
+
#MAIN
|
80
|
+
##########################
|
81
|
+
cafa_data = load_cafa_data(options[:cafa_data])
|
82
|
+
go_gene_rels, domain_gene_rels = load_network_data(options[:input_network])
|
83
|
+
genes2gos_layer = check_genes(go_gene_rels, cafa_data)
|
84
|
+
genes2domains_layer = check_genes(domain_gene_rels, cafa_data)
|
85
|
+
handler = File.open(options[:output_network], 'w')
|
86
|
+
genes2gos_layer.each do |gene, go_list|
|
87
|
+
go_list.each do |go_term|
|
88
|
+
handler.puts "#{go_term}\t#{gene}"
|
89
|
+
end
|
90
|
+
end
|
91
|
+
genes2domains_layer.each do |gene, domains|
|
92
|
+
domains.each do |domain|
|
93
|
+
handler.puts "#{domain}\t#{gene}"
|
94
|
+
end
|
95
|
+
end
|
96
|
+
handler.close
|
data/bin/setup
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
#! /usr/bin/env Rscript
|
2
|
+
|
3
|
+
library(optparse)
|
4
|
+
library(ggplot2)
|
5
|
+
#####################
|
6
|
+
## OPTPARSE
|
7
|
+
#####################
|
8
|
+
option_list <- list(
|
9
|
+
make_option(c("-d", "--data_file"), type="character",
|
10
|
+
help="Tabulated file with information about each sample"),
|
11
|
+
make_option(c("-o", "--output"), type="character", default="results",
|
12
|
+
help="Output figure file"),
|
13
|
+
make_option(c("-e", "--external_score"), type="double", default=NULL,
|
14
|
+
help="Use external score"),
|
15
|
+
make_option(c("-s", "--set_column"), type="character", default="",
|
16
|
+
help="Name of column to be converted to Z-scores"),
|
17
|
+
make_option(c("-p", "--plot_distribution"), action="store_true", default=FALSE,
|
18
|
+
help="Print plot distribution")
|
19
|
+
)
|
20
|
+
opt <- parse_args(OptionParser(option_list=option_list))
|
21
|
+
|
22
|
+
|
23
|
+
################################################################
|
24
|
+
## MAIN
|
25
|
+
################################################################
|
26
|
+
|
27
|
+
data <- read.table(opt$data_file, sep="\t", header=FALSE)
|
28
|
+
raw_data <- data[[opt$set_column]]
|
29
|
+
if(!is.null(opt$external_score)){
|
30
|
+
raw_data <- c(opt$external_score, raw_data)
|
31
|
+
}
|
32
|
+
if(opt$plot_distribution){
|
33
|
+
dataframe <- as.data.frame(raw_data)
|
34
|
+
colnames(dataframe) <- c("AssociationValue")
|
35
|
+
plot <- ggplot(dataframe, aes(y=AssociationValue)) +
|
36
|
+
geom_boxplot()
|
37
|
+
print(ggplot_build(plot))
|
38
|
+
quit(save = "default", status = 0, runLast = TRUE)
|
39
|
+
}
|
40
|
+
#print(ggplot_build(plot))
|
41
|
+
|
42
|
+
|
43
|
+
#message(mean(raw_data))
|
44
|
+
z_scores = scale(raw_data, center=TRUE, scale=TRUE)
|
45
|
+
if(!is.null(opt$external_score)){
|
46
|
+
external_score2z_score <- z_scores[1]
|
47
|
+
cat(sep="","ExtZScore\t",external_score2z_score,"\n")
|
48
|
+
z_scores <- z_scores[-1] #remove external score
|
49
|
+
}
|
50
|
+
|
51
|
+
data[[opt$set_column]] <- z_scores
|
52
|
+
|
53
|
+
write.table(data, file=opt$output, quote=FALSE, sep="\t", row.names=FALSE, col.names=FALSE)
|
@@ -0,0 +1,98 @@
|
|
1
|
+
#! /usr/bin/env ruby
|
2
|
+
##########################
|
3
|
+
# Rojano E. & Seoane P., July 2019
|
4
|
+
# Translate KEGG genes into pathways
|
5
|
+
##########################
|
6
|
+
|
7
|
+
REPORT_FOLDER=File.expand_path(File.join(File.dirname(__FILE__), '..', 'templates'))
|
8
|
+
ROOT_PATH = File.dirname(__FILE__)
|
9
|
+
$: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'DomFun'))
|
10
|
+
# require 'generalMethods.rb'
|
11
|
+
require 'optparse'
|
12
|
+
|
13
|
+
##########################
|
14
|
+
#METHODS
|
15
|
+
##########################
|
16
|
+
|
17
|
+
def load_kegg_dictionary(pathway_to_genes_file)
|
18
|
+
kegg_dictionary = {}
|
19
|
+
File.open(pathway_to_genes_file).each do |line|
|
20
|
+
line.chomp!
|
21
|
+
keggGeneID, keggPathwayID = line.split("\t")
|
22
|
+
query = kegg_dictionary[keggGeneID]
|
23
|
+
if query.nil?
|
24
|
+
kegg_dictionary[keggGeneID] = [keggPathwayID]
|
25
|
+
else
|
26
|
+
query << keggPathwayID
|
27
|
+
end
|
28
|
+
|
29
|
+
end
|
30
|
+
return kegg_dictionary
|
31
|
+
end
|
32
|
+
|
33
|
+
def load_network(network_kegg_file)
|
34
|
+
network_kegg = []
|
35
|
+
superfamily_ids = []
|
36
|
+
File.open(network_kegg_file).each do |line|
|
37
|
+
line.chomp!
|
38
|
+
kegg_gene_ID, gene = line.split("\t")
|
39
|
+
if kegg_gene_ID.include?('hsa:')
|
40
|
+
network_kegg << [kegg_gene_ID, gene]
|
41
|
+
else
|
42
|
+
superfamily_ids << [kegg_gene_ID, gene]
|
43
|
+
end
|
44
|
+
end
|
45
|
+
return network_kegg, superfamily_ids
|
46
|
+
end
|
47
|
+
|
48
|
+
##########################
|
49
|
+
#OPT-PARSER
|
50
|
+
##########################
|
51
|
+
options = {}
|
52
|
+
OptionParser.new do |opts|
|
53
|
+
opts.banner = "Usage: #{__FILE__} [options]"
|
54
|
+
|
55
|
+
options[:kegg_pathways] = nil
|
56
|
+
opts.on("-k", "--kegg_pathways PATH", "Input file with KEGG genes to pathways") do |data|
|
57
|
+
options[:kegg_pathways] = data
|
58
|
+
end
|
59
|
+
|
60
|
+
options[:network_kegg] = nil
|
61
|
+
opts.on("-n", "--network_kegg PATH", "Network with KEGG genes to translate into pathways") do |data|
|
62
|
+
options[:network_kegg] = data
|
63
|
+
end
|
64
|
+
|
65
|
+
options[:output_path] = 'network_kegg_pathways'
|
66
|
+
opts.on("-o", "--output_path PATH", "Resulting network output path") do |data|
|
67
|
+
options[:output_path] = data
|
68
|
+
end
|
69
|
+
|
70
|
+
opts.on_tail("-h", "--help", "Show this message") do
|
71
|
+
puts opts
|
72
|
+
exit
|
73
|
+
end
|
74
|
+
|
75
|
+
end.parse!
|
76
|
+
|
77
|
+
##########################
|
78
|
+
#MAIN
|
79
|
+
##########################
|
80
|
+
kegg_dictionary = load_kegg_dictionary(options[:kegg_pathways])
|
81
|
+
network_kegg, superfamily_ids = load_network(options[:network_kegg])
|
82
|
+
pathways_network = []
|
83
|
+
network_kegg.each do |kegg_gene_ID, proteinID|
|
84
|
+
pathwayIDs = kegg_dictionary[kegg_gene_ID]
|
85
|
+
unless pathwayIDs.nil?
|
86
|
+
pathwayIDs.each do |pathway|
|
87
|
+
pathways_network << [pathway, proteinID]
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
handler = File.open(options[:output_path], 'w')
|
92
|
+
pathways_network.each do |pair|
|
93
|
+
handler.puts pair.join("\t")
|
94
|
+
end
|
95
|
+
superfamily_ids.each do |pair|
|
96
|
+
handler.puts pair.join("\t")
|
97
|
+
end
|
98
|
+
handler.close
|
@@ -0,0 +1,174 @@
|
|
1
|
+
#! /usr/bin/env ruby
|
2
|
+
##########################
|
3
|
+
# Rojano E. & Seoane P., June 2019
|
4
|
+
# Domain to functional annotation predictor validation system
|
5
|
+
# The script uses the predictions file and proteins-FunSys from UniProtKB
|
6
|
+
# It compares the predictions with the proteins-FunSys to validate the functioning of the predictor
|
7
|
+
# Generate values to plot in a PR
|
8
|
+
##########################
|
9
|
+
|
10
|
+
|
11
|
+
REPORT_FOLDER=File.expand_path(File.join(File.dirname(__FILE__), '..', 'templates'))
|
12
|
+
ROOT_PATH = File.dirname(__FILE__)
|
13
|
+
$: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'DomFun'))
|
14
|
+
require 'generalMethods.rb'
|
15
|
+
require 'optparse'
|
16
|
+
require "statistics2"
|
17
|
+
require 'bigdecimal'
|
18
|
+
|
19
|
+
|
20
|
+
##########################
|
21
|
+
#METHODS
|
22
|
+
##########################
|
23
|
+
|
24
|
+
def load_predictions_file(predictions_file)
|
25
|
+
predictions = []
|
26
|
+
File.open(predictions_file).each do |line|
|
27
|
+
line.chomp!
|
28
|
+
next if line.include?('ProteinID')
|
29
|
+
protein, domains, funSys, combinedScore = line.split("\t")
|
30
|
+
predictions << [protein, funSys, combinedScore.to_f]
|
31
|
+
end
|
32
|
+
return predictions
|
33
|
+
end
|
34
|
+
|
35
|
+
def load_control_file(control_file)
|
36
|
+
control_protein_FunSys = {}
|
37
|
+
File.open(control_file).each do |line|
|
38
|
+
line.chomp!
|
39
|
+
proteinID, funSys = line.split("\t")
|
40
|
+
control_protein_FunSys[proteinID] = funSys.split(';')
|
41
|
+
end
|
42
|
+
return control_protein_FunSys
|
43
|
+
end
|
44
|
+
|
45
|
+
def load_prediction(pairs_array)
|
46
|
+
pred = {}
|
47
|
+
min = nil
|
48
|
+
max = nil
|
49
|
+
pairs_array.each do |key, label, score| #protein, FunSys, assocScore
|
50
|
+
query = pred[key]
|
51
|
+
if !min.nil? && !max.nil?
|
52
|
+
min = score if score < min
|
53
|
+
max = score if score > max
|
54
|
+
else
|
55
|
+
min = score; max = score
|
56
|
+
end
|
57
|
+
if query.nil?
|
58
|
+
pred[key] = [[label], [score]]
|
59
|
+
else
|
60
|
+
query.first << label
|
61
|
+
query.last << score
|
62
|
+
end
|
63
|
+
end
|
64
|
+
return pred, [min, max]
|
65
|
+
end
|
66
|
+
|
67
|
+
|
68
|
+
# Pandey 2007, Association Analysis-based Transformations for Protein Interaction Networks: A Function Prediction Case Study
|
69
|
+
def get_pred_rec(meth, cut_number = 100, top_number = 10000, control_protein_FunSys, predictions)
|
70
|
+
performance = [] #cut, pred, rec
|
71
|
+
preds, limits = load_prediction(predictions)
|
72
|
+
cuts = get_cuts(limits, cut_number)
|
73
|
+
cuts.each do |cut|
|
74
|
+
prec, rec = pred_rec(preds, cut, top_number, control_protein_FunSys)
|
75
|
+
performance << [cut, prec, rec]
|
76
|
+
end
|
77
|
+
return performance
|
78
|
+
end
|
79
|
+
|
80
|
+
def pred_rec(preds, cut, top, control_protein_FunSys)
|
81
|
+
predicted_labels = 0 #m
|
82
|
+
true_labels = 0 #n
|
83
|
+
common_labels = 0 # k
|
84
|
+
control_protein_FunSys.each do |key, c_labels|
|
85
|
+
true_labels += c_labels.length #n
|
86
|
+
pred_info = preds[key]
|
87
|
+
if !pred_info.nil?
|
88
|
+
labels, scores = pred_info
|
89
|
+
reliable_labels = get_reliable_labels(labels, scores, cut, top)
|
90
|
+
predicted_labels += reliable_labels.length #m
|
91
|
+
common_labels += (c_labels & reliable_labels).length #k
|
92
|
+
end
|
93
|
+
end
|
94
|
+
#puts "cut: #{cut} trueL: #{true_labels} predL: #{predicted_labels} commL: #{common_labels}"
|
95
|
+
prec = common_labels.to_f/predicted_labels
|
96
|
+
rec = common_labels.to_f/true_labels
|
97
|
+
prec = 0.0 if prec.nan?
|
98
|
+
rec = 0.0 if rec.nan?
|
99
|
+
return prec, rec
|
100
|
+
end
|
101
|
+
|
102
|
+
def get_cuts(limits, n_cuts)
|
103
|
+
cuts = []
|
104
|
+
range = (limits.last - limits.first).abs.fdiv(n_cuts)
|
105
|
+
range = BigDecimal(range, 10)
|
106
|
+
cut = limits.first
|
107
|
+
(n_cuts + 1).times do |n|
|
108
|
+
cuts << (cut + n * range).to_f
|
109
|
+
end
|
110
|
+
return cuts
|
111
|
+
end
|
112
|
+
|
113
|
+
def get_reliable_labels(labels, scores, cut, top)
|
114
|
+
reliable_labels = []
|
115
|
+
scores.each_with_index do |score, i|
|
116
|
+
reliable_labels << [labels[i], score] if score >= cut
|
117
|
+
end
|
118
|
+
reliable_labels = reliable_labels.sort!{|l1,l2| l2.last <=> l1.last}[0..top-1].map{|pred| pred.first}
|
119
|
+
return reliable_labels
|
120
|
+
end
|
121
|
+
|
122
|
+
|
123
|
+
##########################
|
124
|
+
#OPT-PARSER
|
125
|
+
##########################
|
126
|
+
|
127
|
+
options = {}
|
128
|
+
OptionParser.new do |opts|
|
129
|
+
opts.banner = "Usage: #{__FILE__} [options]"
|
130
|
+
|
131
|
+
options[:input_predictions] = nil
|
132
|
+
opts.on("-a", "--input_predictions PATH", "Domain-function predictions") do |data|
|
133
|
+
options[:input_predictions] = data
|
134
|
+
end
|
135
|
+
|
136
|
+
options[:control_file] = nil
|
137
|
+
opts.on("-c", "--control_file PATH", "Control dataset with proteins-FunSys from UniProtKB") do |data|
|
138
|
+
options[:control_file] = data
|
139
|
+
end
|
140
|
+
|
141
|
+
options[:assoc_meth] = nil
|
142
|
+
opts.on("-m", "--assoc_meth STRING", "Association method used") do |data|
|
143
|
+
options[:assoc_meth] = data
|
144
|
+
end
|
145
|
+
|
146
|
+
options[:performance_file] = 'precision_recall.txt'
|
147
|
+
opts.on("-p", "--performance_file PATH", "Output file with PR values") do |data|
|
148
|
+
options[:performance_file] = data
|
149
|
+
end
|
150
|
+
|
151
|
+
opts.on_tail("-h", "--help", "Show this message") do
|
152
|
+
puts opts
|
153
|
+
exit
|
154
|
+
end
|
155
|
+
|
156
|
+
end.parse!
|
157
|
+
|
158
|
+
##########################
|
159
|
+
#MAIN
|
160
|
+
##########################
|
161
|
+
|
162
|
+
control_protein_FunSys = load_control_file(options[:control_file])
|
163
|
+
|
164
|
+
domains_FunSys_predictions = load_predictions_file(options[:input_predictions])
|
165
|
+
|
166
|
+
performance = get_pred_rec(options[:assoc_meth], 100, 10000, control_protein_FunSys, domains_FunSys_predictions)
|
167
|
+
|
168
|
+
File.open(options[:performance_file], 'w') do |f|
|
169
|
+
f.puts %w[cut prec rec meth].join("\t")
|
170
|
+
performance.each do |item|
|
171
|
+
item << options[:assoc_meth].to_s
|
172
|
+
f.puts item.join("\t")
|
173
|
+
end
|
174
|
+
end
|