NetAnalyzer 0.6.2 → 0.6.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/NetAnalyzer.gemspec +1 -1
- data/README.md +3 -1
- data/bin/NetAnalyzer.rb +5 -15
- data/bin/randomize_clustering.rb +121 -0
- data/bin/randomize_network.rb +89 -0
- data/bin/ranker_gene.rb +121 -0
- data/bin/text2binary_matrix.rb +15 -1
- data/lib/NetAnalyzer/adv_mat_calc.rb +117 -0
- data/lib/NetAnalyzer/net_parser.rb +50 -0
- data/lib/NetAnalyzer/net_plotter.rb +145 -0
- data/lib/NetAnalyzer/network.rb +474 -564
- data/lib/NetAnalyzer/nodes.rb +15 -0
- data/lib/NetAnalyzer/performancer.rb +98 -0
- data/lib/NetAnalyzer/ranker.rb +250 -0
- data/lib/NetAnalyzer/version.rb +1 -1
- data/lib/NetAnalyzer.rb +5 -0
- metadata +19 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1c71fe9b15d48d8ce8c76d2aec32a60ef2ede83686f49d18b09a8dc7204f07a9
|
4
|
+
data.tar.gz: 697edf91abebe9cbf600b65838dbaa9f0761342bb6f89bcbfadfab270e4ad1ce
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: eee16b18357c98e62ea4d1799991efb878ad8f8385d50a45a5f89154e3a36829e03768e560b8821dc0e0b7d10b64c2efee3bf3dadf13efb6adc7177f96e41e82
|
7
|
+
data.tar.gz: 1b2b19082a15a60df2585a99d6e3253eef98682398d6969aaa3e584ed8c425147f2389a668be3439474562b2da5100ec4d91f2790b9bce11b9bbbbf868251fb0
|
data/NetAnalyzer.gemspec
CHANGED
@@ -10,7 +10,7 @@ Gem::Specification.new do |spec|
|
|
10
10
|
spec.email = ["elenarojano@uma.es, seoanezonjic@hotmail.com"]
|
11
11
|
|
12
12
|
spec.summary = %q{Network analysis tool that calculate and validate different association indices.}
|
13
|
-
spec.description = %q{NetAnalyzer is a useful network analysis tool developed in Ruby that can 1) analyse any type of unweighted network, regardless of the number of layers, 2) calculate the relationship between different layers, using various association indices (Jaccard, Simpson, PCC, geometric, cosine and hypergeometric) and 3) validate the results}
|
13
|
+
spec.description = %q{DEPRECATED PROJECT. MIGRATED TO PYTHON: https://github.com/seoanezonjic/NetAnalyzer. NetAnalyzer is a useful network analysis tool developed in Ruby that can 1) analyse any type of unweighted network, regardless of the number of layers, 2) calculate the relationship between different layers, using various association indices (Jaccard, Simpson, PCC, geometric, cosine and hypergeometric) and 3) validate the results}
|
14
14
|
spec.homepage = "https://github.com/ElenaRojano/NetAnalyzer"
|
15
15
|
spec.license = "MIT"
|
16
16
|
|
data/README.md
CHANGED
@@ -1,7 +1,9 @@
|
|
1
1
|
# NetAnalyzer
|
2
2
|
|
3
|
+
DEPRECATED PROJECT. MIGRATED TO [python semtools](https://github.com/seoanezonjic/NetAnalyzer)
|
4
|
+
|
3
5
|
NetAnalyzer is a network analysis tool that can be used to calculate the associations between nodes in unweighted n-partite networks [1]. The calculation of the association between nodes is based on similarity indices (Jaccard, Simpson, geometric and cosine), statistic-based (Pearson correlation coefficient, CSI and hypergeometric) [2] and a special metric designed only for tripartite networks (here called as 'transference' method [3]). The user can choose the association index method according to the network to analyse. The tool gives a table of results, with all the associations between nodes and the association value calculated.
|
4
|
-
|
6
|
+
|
5
7
|
If you use this tool, please cite us: [1] E. Rojano, P. Seoane, A. Bueno, J. R. Perkins & J. A. G. Ranea. Revealing the Relationship Between Human Genome Regions and Pathological Phenotypes Through Network Analysis. Lecture Notes in Computer Science, Vol 10208, 197-207 (2017).
|
6
8
|
|
7
9
|
[2] Fuxman-Bass et al. Using networks to measure similarity between genes: association index selection. Nature Methods, 10(12):1169-76. 2013.
|
data/bin/NetAnalyzer.rb
CHANGED
@@ -176,26 +176,15 @@ OptionParser.new do |opts|
|
|
176
176
|
options[:delete_nodes] = item.split(';')
|
177
177
|
end
|
178
178
|
end.parse!
|
179
|
-
|
180
179
|
##########################
|
181
180
|
#MAIN
|
182
181
|
##########################
|
183
|
-
|
182
|
+
puts "Loading network data"
|
183
|
+
fullNet = Net_parser.load(options)
|
184
184
|
fullNet.reference_nodes = options[:reference_nodes]
|
185
185
|
fullNet.threads = options[:threads]
|
186
186
|
fullNet.group_nodes = options[:group_nodes]
|
187
187
|
fullNet.set_compute_pairs(options[:use_pairs], !options[:no_autorelations])
|
188
|
-
#puts options[:layers].map{|layer| layer.first}.inspect
|
189
|
-
puts "Loading network data"
|
190
|
-
if options[:input_format] == 'pair'
|
191
|
-
fullNet.load_network_by_pairs(options[:input_file], options[:layers], options[:split_char])
|
192
|
-
elsif options[:input_format] == 'bin'
|
193
|
-
fullNet.load_network_by_bin_matrix(options[:input_file], options[:node_file], options[:layers])
|
194
|
-
elsif options[:input_format] == 'matrix'
|
195
|
-
fullNet.load_network_by_plain_matrix(options[:input_file], options[:node_file], options[:layers], options[:splitChar])
|
196
|
-
else
|
197
|
-
raise("ERROR: The format #{options[:input_format]} is not defined")
|
198
|
-
end
|
199
188
|
|
200
189
|
if !options[:delete_nodes].empty?
|
201
190
|
node_list = load_file(options[:delete_nodes].first).flatten
|
@@ -243,8 +232,9 @@ if !options[:meth].nil?
|
|
243
232
|
line.chomp!
|
244
233
|
control << line.split("\t")
|
245
234
|
end
|
246
|
-
|
247
|
-
|
235
|
+
Performancer.load_control(control)
|
236
|
+
predictions = fullNet.association_values[options[:meth]]
|
237
|
+
performance = Performancer.get_pred_rec(predictions)
|
248
238
|
File.open(options[:performance_file], 'w') do |f|
|
249
239
|
f.puts %w[cut prec rec meth].join("\t")
|
250
240
|
performance.each do |item|
|
@@ -0,0 +1,121 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'optparse'
|
4
|
+
|
5
|
+
##############################
|
6
|
+
#FUNCTIONS
|
7
|
+
##############################
|
8
|
+
|
9
|
+
|
10
|
+
def load_clusters(options)
|
11
|
+
clusters = {}
|
12
|
+
File.open(options[:input_file]).each do |line|
|
13
|
+
line = line.chomp.split(options[:column_sep])
|
14
|
+
cluster = line[options[:cluster_index]]
|
15
|
+
clusters[cluster] = [] if clusters[cluster].nil?
|
16
|
+
node = line[options[:node_index]]
|
17
|
+
node = node.split(options[:node_sep]) if !options[:node_sep].nil?
|
18
|
+
clusters[cluster] << node
|
19
|
+
clusters[cluster].flatten!
|
20
|
+
end
|
21
|
+
return clusters
|
22
|
+
end
|
23
|
+
|
24
|
+
|
25
|
+
def random_sample(nodes, replacement, all_sizes, seed)
|
26
|
+
random_clusters = {}
|
27
|
+
nodes_list = nodes.dup
|
28
|
+
all_sizes.each_with_index do |cluster_size, counter|
|
29
|
+
abort("Not enough nodes to generate clusters. Please activate replacement or change random mode") if cluster_size > nodes_list.size
|
30
|
+
random_nodes = nodes_list.uniq.sample(cluster_size, random: Random.new(seed))
|
31
|
+
if !replacement
|
32
|
+
nodes_list = nodes_list - random_nodes
|
33
|
+
end
|
34
|
+
random_clusters["#{counter}_random"] = random_nodes
|
35
|
+
seed += 1
|
36
|
+
end
|
37
|
+
return random_clusters
|
38
|
+
end
|
39
|
+
|
40
|
+
def write_clusters(clusters, output_file, sep)
|
41
|
+
File.open(output_file, 'w') do |outfile|
|
42
|
+
clusters.each do |cluster, nodes|
|
43
|
+
nodes = [nodes.join(sep)] if !sep.nil?
|
44
|
+
nodes.each do |node|
|
45
|
+
outfile.puts [cluster, node].flatten.join("\t")
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
##############################
|
52
|
+
#OPTPARSE
|
53
|
+
##############################
|
54
|
+
|
55
|
+
options = {}
|
56
|
+
OptionParser.new do |opts|
|
57
|
+
opts.banner = "Usage: #{__FILE__} [options]"
|
58
|
+
|
59
|
+
options[:input_file] = nil
|
60
|
+
opts.on("-i", "--input_file PATH", "Input file") do |input_file|
|
61
|
+
options[:input_file] = input_file
|
62
|
+
end
|
63
|
+
|
64
|
+
options[:node_index] = 1
|
65
|
+
opts.on("-N", "--node_column INTEGER", "Number of the nodes column. Default = #{options[:node_index]}") do |node_i|
|
66
|
+
options[:node_index] = node_i.to_i - 1
|
67
|
+
end
|
68
|
+
|
69
|
+
options[:cluster_index] = 0
|
70
|
+
opts.on("-C", "--cluster_column INTEGER", "Number of the clusters column. Default = #{options[:cluster_index]}" ) do |cluster_i|
|
71
|
+
options[:cluster_index] = cluster_i.to_i - 1
|
72
|
+
end
|
73
|
+
|
74
|
+
options[:column_sep] = "\t"
|
75
|
+
opts.on("-S", "--split_char CHARACTER", "Character for splitting input file. Default: tab") do |split_char|
|
76
|
+
options[:column_sep] = split_char
|
77
|
+
end
|
78
|
+
|
79
|
+
options[:node_sep] = nil
|
80
|
+
opts.on("-s", "--node_sep CHARACTER", "Node split character. This option must to be used when input file is aggregated.") do |split_char|
|
81
|
+
options[:node_sep] = split_char
|
82
|
+
end
|
83
|
+
|
84
|
+
options[:random_type] = ["size"]
|
85
|
+
opts.on("-r", "--random_type STRING", "Indicate random mode. 'size' for radomize clusters with the same size as input clusters. 'full_size' same as 'size' but all nodes are repaeted as same as input. 'fixed:n:s' for generate 'n' clusters of 's' nodes. Default = #{options[:output_file]}") do |random_type|
|
86
|
+
options[:random_type] = random_type.split(":")
|
87
|
+
end
|
88
|
+
|
89
|
+
options[:replacement] = false
|
90
|
+
opts.on("-R", "--replacement", "Boolean. Activates ramdom sampling with replacement. Sampling witout replacement will be executed instead.") do
|
91
|
+
options[:replacement] = true
|
92
|
+
end
|
93
|
+
|
94
|
+
options[:output_file] = "./random_clusters.txt"
|
95
|
+
opts.on("-o", "--output_file FILEPATH", "Output file") do |output_file|
|
96
|
+
options[:output_file] = output_file
|
97
|
+
end
|
98
|
+
|
99
|
+
options[:aggregate_sep] = nil
|
100
|
+
opts.on("-a", "--aggregate_sep CHARACTER", "This option activates aggregation in output. Separator character must be provided") do |split_char|
|
101
|
+
options[:aggregate_sep] = split_char
|
102
|
+
end
|
103
|
+
|
104
|
+
end.parse!
|
105
|
+
##########################
|
106
|
+
#MAIN
|
107
|
+
##########################
|
108
|
+
|
109
|
+
clusters = load_clusters(options)
|
110
|
+
|
111
|
+
nodes = clusters.values.flatten
|
112
|
+
nodes = nodes.uniq if !options[:random_type][0] == "full_size"
|
113
|
+
|
114
|
+
if options[:random_type][0].include?("size") && options[:random_type].size == 1
|
115
|
+
all_sizes = clusters.map{|cluster, nodes| nodes.size}
|
116
|
+
elsif options[:random_type][0] == "fixed" && options[:random_type].size == 3
|
117
|
+
all_sizes = Array.new(options[:random_type][1].to_i, options[:random_type][2].to_i)
|
118
|
+
end
|
119
|
+
|
120
|
+
random_clusters = random_sample(nodes, options[:replacement], all_sizes, 123)
|
121
|
+
write_clusters(random_clusters, options[:output_file], options[:aggregate_sep])
|
@@ -0,0 +1,89 @@
|
|
1
|
+
#! /usr/bin/env ruby
|
2
|
+
|
3
|
+
ROOT_PATH = File.dirname(__FILE__)
|
4
|
+
$LOAD_PATH.unshift(File.expand_path(File.join(ROOT_PATH, '..', 'lib')))
|
5
|
+
require 'optparse'
|
6
|
+
require 'benchmark'
|
7
|
+
require 'NetAnalyzer'
|
8
|
+
|
9
|
+
|
10
|
+
|
11
|
+
##############################
|
12
|
+
#OPTPARSE
|
13
|
+
##############################
|
14
|
+
|
15
|
+
options = {}
|
16
|
+
OptionParser.new do |opts|
|
17
|
+
opts.banner = "Usage: #{__FILE__} [options]"
|
18
|
+
|
19
|
+
options[:input_file] = nil
|
20
|
+
opts.on("-i", "--input_file PATH", "Input file") do |input_file|
|
21
|
+
options[:input_file] = input_file
|
22
|
+
end
|
23
|
+
|
24
|
+
options[:node_file] = nil
|
25
|
+
opts.on("-n", "--node_names_file PATH", "File with node names corresponding to the input matrix, only use when -i is set to bin or matrix.") do |node_file|
|
26
|
+
options[:node_file] = node_file
|
27
|
+
end
|
28
|
+
|
29
|
+
options[:input_format] = 'pair'
|
30
|
+
opts.on("-f", "--input_format STRING", "Input file format: pair (default), bin, matrix") do |input_format|
|
31
|
+
options[:input_format] = input_format
|
32
|
+
end
|
33
|
+
|
34
|
+
options[:split_char] = "\t"
|
35
|
+
opts.on("-s", "--split_char CHARACTER", "Character for splitting input file. Default: tab") do |split_char|
|
36
|
+
options[:split_char] = split_char
|
37
|
+
end
|
38
|
+
|
39
|
+
options[:layers] = [:layer, '-']
|
40
|
+
opts.on("-l", "--layers STRING", "Layer definition on network: layer1name,regexp1;layer2name,regexp2...") do |layers|
|
41
|
+
layers_definition = layers.split(";").map{|layer_attr| layer_attr.split(',')}
|
42
|
+
layers_definition.map!{|layer_attr| [layer_attr.first.to_sym, /#{layer_attr.last}/]}
|
43
|
+
options[:layers] = layers_definition
|
44
|
+
end
|
45
|
+
|
46
|
+
options[:type_random] = nil
|
47
|
+
opts.on("-r", "--type_random network", "Randomized basis. 'nodes' Node-baseds randomize or 'links' Links-baseds randomize") do |type_random|
|
48
|
+
options[:type_random] = type_random
|
49
|
+
end
|
50
|
+
|
51
|
+
options[:output_file] = nil
|
52
|
+
opts.on("-o", "--output_file FILEPATH", "Output file") do |output_file|
|
53
|
+
options[:output_file] = output_file
|
54
|
+
end
|
55
|
+
|
56
|
+
end.parse!
|
57
|
+
|
58
|
+
|
59
|
+
##########################
|
60
|
+
#MAIN
|
61
|
+
##########################
|
62
|
+
fullNet = Network.new(options[:layers].map{|layer| layer.first})
|
63
|
+
puts "Loading network data"
|
64
|
+
|
65
|
+
if options[:layers].length == 1
|
66
|
+
layerA = layerB = options[:layers][0].first
|
67
|
+
elsif options[:layers].length == 2
|
68
|
+
layerA = options[:layers][0].first
|
69
|
+
layerB = options[:layers][1].first
|
70
|
+
end
|
71
|
+
|
72
|
+
if options[:input_format] == 'pair'
|
73
|
+
fullNet.load_network_by_pairs(options[:input_file], options[:layers], options[:split_char])
|
74
|
+
elsif options[:input_format] == 'bin' && !options[:node_file].nil?
|
75
|
+
fullNet.load_network_by_bin_matrix(options[:input_file], options[:node_file], options[:layers])
|
76
|
+
elsif options[:input_format] == 'matrix' && !options[:node_file].nil?
|
77
|
+
fullNet.load_network_by_plain_matrix(options[:input_file], options[:node_file], options[:layers], options[:splitChar])
|
78
|
+
else
|
79
|
+
raise("ERROR: The format #{options[:input_format]} is not defined")
|
80
|
+
exit
|
81
|
+
end
|
82
|
+
|
83
|
+
|
84
|
+
fullNet.randomize_network(options[:type_random])
|
85
|
+
|
86
|
+
|
87
|
+
#fullNet.save_adjacency_matrix(layerA, layerB, options[:output_file])
|
88
|
+
|
89
|
+
|
data/bin/ranker_gene.rb
ADDED
@@ -0,0 +1,121 @@
|
|
1
|
+
#! /usr/bin/env ruby
|
2
|
+
|
3
|
+
ROOT_PATH = File.dirname(__FILE__)
|
4
|
+
$LOAD_PATH.unshift(File.expand_path(File.join(ROOT_PATH, '..', 'lib')))
|
5
|
+
|
6
|
+
require 'optparse'
|
7
|
+
require 'npy'
|
8
|
+
require 'parallel'
|
9
|
+
require 'NetAnalyzer'
|
10
|
+
|
11
|
+
|
12
|
+
########################### METHODS ########################
|
13
|
+
#############################################################
|
14
|
+
|
15
|
+
def write_ranking(file, ranking_list)
|
16
|
+
File.open(file ,'w') do |f|
|
17
|
+
ranking_list.each do |seed_name, ranking|
|
18
|
+
ranking.each do |ranked_gene|
|
19
|
+
f.puts "#{ranked_gene.join("\t")}\t#{seed_name}"
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
|
26
|
+
########################### OPTPARSE ########################
|
27
|
+
#############################################################
|
28
|
+
|
29
|
+
options = {}
|
30
|
+
OptionParser.new do |opts|
|
31
|
+
|
32
|
+
options[:kernel_file] = nil
|
33
|
+
opts.on("-k","--input_kernels KER", "The roots from each kernel to integrate") do |ker|
|
34
|
+
options[:kernel_file] = ker
|
35
|
+
end
|
36
|
+
|
37
|
+
options[:node_file] = nil
|
38
|
+
opts.on("-n","--input_nodes NODE", "The list of node for each kernel in lst format") do |node_file|
|
39
|
+
options[:node_file] = node_file
|
40
|
+
end
|
41
|
+
|
42
|
+
options[:genes_seed] = nil
|
43
|
+
opts.on("-s","--genes_seed SEED", "The name of the gene to look for backups") do |genes_seed|
|
44
|
+
options[:genes_seed] = genes_seed
|
45
|
+
end
|
46
|
+
|
47
|
+
options[:seed_genes_sep] = ","
|
48
|
+
opts.on("-S","--genes_seed_sep SEP", "Separator of seed genes. Only use when -s point to a file") do |genes_seed|
|
49
|
+
options[:genes_seed] = genes_seed
|
50
|
+
end
|
51
|
+
|
52
|
+
options[:filter] = nil
|
53
|
+
opts.on("-f","--filter NAME", "PATH to file with seed_name and genes to keep in output") do |file|
|
54
|
+
options[:filter] = file
|
55
|
+
end
|
56
|
+
|
57
|
+
options[:leave_one_out] = false
|
58
|
+
opts.on("-l","--leave_one_out", "Perform leave one out from a seed genes group") do
|
59
|
+
options[:leave_one_out] = true
|
60
|
+
end
|
61
|
+
|
62
|
+
options[:top_n] = nil
|
63
|
+
opts.on("-t","--top_n INT", "Top N genes to print in output") do |str|
|
64
|
+
options[:top_n] = str.to_i
|
65
|
+
end
|
66
|
+
|
67
|
+
options[:output_top] = nil
|
68
|
+
opts.on("--output_top PATH", "File to save Top N genes") do |path|
|
69
|
+
options[:output_top] = path
|
70
|
+
end
|
71
|
+
|
72
|
+
options[:output_name] = "ranked_genes"
|
73
|
+
opts.on("-o","--output_name NAME", "The name of the ranked file") do |output_name|
|
74
|
+
options[:output_name] = output_name
|
75
|
+
end
|
76
|
+
|
77
|
+
options[:threads] = 0
|
78
|
+
opts.on( '-T', '--threads INTEGER', 'Number of threads to use in computation, one thread will be reserved as manager.' ) do |opt|
|
79
|
+
options[:threads] = opt.to_i - 1
|
80
|
+
end
|
81
|
+
end.parse!
|
82
|
+
|
83
|
+
########################### MAIN ############################
|
84
|
+
#############################################################
|
85
|
+
|
86
|
+
ranker = Ranker.new()
|
87
|
+
ranker.matrix = Npy.load(options[:kernel_file])
|
88
|
+
ranker.load_nodes_from_file(options[:node_file])
|
89
|
+
ranker.load_seeds(options[:genes_seed], sep: options[:seed_genes_sep])
|
90
|
+
ranker.load_references(options[:filter], sep: ",") if !options[:filter].nil?
|
91
|
+
ranker.do_ranking(leave_one_out: options[:leave_one_out], threads: options[:threads])
|
92
|
+
rankings = ranker.ranking
|
93
|
+
|
94
|
+
discarded_seeds = rankings.select{|seed_name, ranks| ranks.empty?}.keys
|
95
|
+
if !discarded_seeds.empty?
|
96
|
+
File.open("#{options[:output_name]}_discarded",'w') do |f|
|
97
|
+
discarded_seeds.each do |seed_name|
|
98
|
+
f.puts "#{seed_name}\t#{ranker.seeds[seed_name].join(options[:seed_genes_sep])}"
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
if !options[:top_n].nil?
|
104
|
+
top_n = ranker.get_top(options[:top_n])
|
105
|
+
if options[:output_top].nil?
|
106
|
+
rankings = top_n
|
107
|
+
else
|
108
|
+
write_ranking(options[:output_top], top_n)
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
if !options[:filter].nil?
|
113
|
+
rankings = ranker.get_reference_ranks
|
114
|
+
end
|
115
|
+
|
116
|
+
if !rankings.empty?
|
117
|
+
write_ranking("#{options[:output_name]}_all_candidates", rankings)
|
118
|
+
end
|
119
|
+
|
120
|
+
|
121
|
+
|
data/bin/text2binary_matrix.rb
CHANGED
@@ -220,6 +220,11 @@ optparse = OptionParser.new do |opts|
|
|
220
220
|
options[:binarize] = opt.to_f
|
221
221
|
end
|
222
222
|
|
223
|
+
options[:cutoff] = nil
|
224
|
+
opts.on( '-c', '--cutoff FLOAT', 'Cutoff matrix values keeping just x >= and setting any other to zero into matrix given' ) do |opt|
|
225
|
+
options[:cutoff] = opt.to_f
|
226
|
+
end
|
227
|
+
|
223
228
|
options[:stats] = false
|
224
229
|
opts.on( '-s', '--get_stats', 'Get stats from the processed matrix' ) do
|
225
230
|
options[:stats] = true
|
@@ -266,7 +271,7 @@ if options[:set_diagonal]
|
|
266
271
|
end
|
267
272
|
end
|
268
273
|
|
269
|
-
if !options[:binarize].nil?
|
274
|
+
if !options[:binarize].nil? && options[:cutoff].nil?
|
270
275
|
elements = matrix.shape.last
|
271
276
|
elements.times do |i|
|
272
277
|
elements.times do |j|
|
@@ -275,6 +280,15 @@ if !options[:binarize].nil?
|
|
275
280
|
end
|
276
281
|
end
|
277
282
|
|
283
|
+
if !options[:cutoff].nil? && options[:binarize].nil?
|
284
|
+
elements = matrix.shape.last
|
285
|
+
elements.times do |i|
|
286
|
+
elements.times do |j|
|
287
|
+
matrix[i,j] = matrix[i,j] >= options[:cutoff] ? matrix[i,j] : 0
|
288
|
+
end
|
289
|
+
end
|
290
|
+
end
|
291
|
+
|
278
292
|
if options[:stats]
|
279
293
|
stats = get_stats(matrix)
|
280
294
|
stats.each do |stat|
|
@@ -0,0 +1,117 @@
|
|
1
|
+
require 'numo/narray'
|
2
|
+
require 'numo/linalg'
|
3
|
+
|
4
|
+
class Adv_mat_calc # Advanced matrix calculations
|
5
|
+
## KERNEL METHODS
|
6
|
+
#######################################################################################
|
7
|
+
def self.get_kernel(matrix, node_names, kernel, normalization=false)
|
8
|
+
#I = identity matrix
|
9
|
+
#D = Diagonal matrix
|
10
|
+
#A = adjacency matrix
|
11
|
+
#L = laplacian matrix = D − A
|
12
|
+
matrix_result = nil
|
13
|
+
dimension_elements = matrix.shape.last
|
14
|
+
# In scuba code, the diagonal values of A is set to 0. In weighted matrix the kernel result is the same with or without this operation. Maybe increases the computing performance?
|
15
|
+
# In the md kernel this operation affects the values of the final kernel
|
16
|
+
#dimension_elements.times do |n|
|
17
|
+
# matrix[n,n] = 0.0
|
18
|
+
#end
|
19
|
+
if kernel == 'el' || kernel == 'ct' || kernel == 'rf' ||
|
20
|
+
kernel.include?('vn') || kernel.include?('rl') || kernel == 'me'
|
21
|
+
diagonal_matrix = matrix.sum(1).diag # get the total sum for each row, for this reason the sum method takes the 1 value. If sum colums is desired, use 0
|
22
|
+
# Make a matrix whose diagonal is row_sum
|
23
|
+
matrix_L = diagonal_matrix - matrix
|
24
|
+
if kernel == 'el' #Exponential Laplacian diffusion kernel(active). F Fouss 2012 | doi: 10.1016/j.neunet.2012.03.001
|
25
|
+
beta = 0.02
|
26
|
+
beta_product = matrix_L * -beta
|
27
|
+
#matrix_result = beta_product.expm
|
28
|
+
matrix_result = Numo::Linalg.expm(beta_product, 14)
|
29
|
+
elsif kernel == 'ct' # Commute time kernel (active). J.-K. Heriche 2014 | doi: 10.1091/mbc.E13-04-0221
|
30
|
+
matrix_result = Numo::Linalg.pinv(matrix_L) # Anibal saids that this kernel was normalized. Why?. Paper do not seem to describe this operation for ct, it describes for Kvn or for all kernels, it is not clear.
|
31
|
+
elsif kernel == 'rf' # Random forest kernel. J.-K. Heriche 2014 | doi: 10.1091/mbc.E13-04-0221
|
32
|
+
matrix_result = Numo::Linalg.inv(Numo::DFloat.eye(dimension_elements) + matrix_L) #Krf = (I +L ) ^ −1
|
33
|
+
elsif kernel.include?('vn') # von Neumann diffusion kernel. J.-K. Heriche 2014 | doi: 10.1091/mbc.E13-04-0221
|
34
|
+
alpha = kernel.gsub('vn', '').to_f * matrix.max_eigenvalue ** -1 # alpha = impact_of_penalization (1, 0.5 or 0.1) * spectral radius of A. spectral radius of A = absolute value of max eigenvalue of A
|
35
|
+
matrix_result = Numo::Linalg.inv(Numo::DFloat.eye(dimension_elements) - matrix * alpha ) # (I -alphaA ) ^ −1
|
36
|
+
elsif kernel.include?('rl') # Regularized Laplacian kernel matrix (active)
|
37
|
+
alpha = kernel.gsub('rl', '').to_f * matrix.max_eigenvalue ** -1 # alpha = impact_of_penalization (1, 0.5 or 0.1) * spectral radius of A. spectral radius of A = absolute value of max eigenvalue of A
|
38
|
+
matrix_result = Numo::Linalg.inv(Numo::DFloat.eye(dimension_elements) + matrix_L * alpha ) # (I + alphaL ) ^ −1
|
39
|
+
elsif kernel == 'me' # Markov exponential diffusion kernel (active). G Zampieri 2018 | doi.org/10.1186/s12859-018-2025-5 . Taken from compute_kernel script
|
40
|
+
beta=0.04
|
41
|
+
#(beta/N)*(N*I - D + A)
|
42
|
+
id_mat = Numo::DFloat.eye(dimension_elements)
|
43
|
+
m_matrix = (id_mat * dimension_elements - diagonal_matrix + matrix ) * (beta/dimension_elements)
|
44
|
+
#matrix_result = m_matrix.expm
|
45
|
+
matrix_result = Numo::Linalg.expm(m_matrix, 16)
|
46
|
+
end
|
47
|
+
elsif kernel == 'ka' # Kernelized adjacency matrix (active). J.-K. Heriche 2014 | doi: 10.1091/mbc.E13-04-0221
|
48
|
+
lambda_value = matrix.min_eigenvalue
|
49
|
+
matrix_result = matrix + Numo::DFloat.eye(dimension_elements) * lambda_value.abs # Ka = A + lambda*I # lambda = the absolute value of the smallest eigenvalue of A
|
50
|
+
elsif kernel.include?('md') # Markov diffusion kernel matrix. G Zampieri 2018 | doi.org/10.1186/s12859-018-2025-5 . Taken from compute_kernel script
|
51
|
+
t = kernel.gsub('md', '').to_i
|
52
|
+
#TODO: check implementation with Numo::array
|
53
|
+
col_sum = matrix.sum(1)
|
54
|
+
p_mat = matrix.div_by_vector(col_sum)
|
55
|
+
p_temp_mat = p_mat.clone
|
56
|
+
zt_mat = p_mat.clone
|
57
|
+
(t-1).times do
|
58
|
+
p_temp_mat = p_temp_mat.dot(p_mat)
|
59
|
+
zt_mat = zt_mat + p_temp_mat
|
60
|
+
end
|
61
|
+
zt_mat = zt_mat * (1.0/t)
|
62
|
+
matrix_result = zt_mat.dot(zt_mat.transpose)
|
63
|
+
else
|
64
|
+
matrix_result = matrix
|
65
|
+
warn('Warning: The kernel method was not specified or not exists. The adjacency matrix will be given as result')
|
66
|
+
# This allows process a previous kernel and perform the normalization in a separated step.
|
67
|
+
end
|
68
|
+
matrix_result = matrix_result.cosine_normalization if normalization #TODO: check implementation with Numo::array
|
69
|
+
return matrix_result
|
70
|
+
end
|
71
|
+
|
72
|
+
# Alaimo 2014, doi: 10.3389/fbioe.2014.00071
|
73
|
+
def self.tranference_resources(matrix1, matrix2, lambda_value1 = 0.5, lambda_value2 = 0.5)
|
74
|
+
m1rowNumber, m1colNumber = matrix1.shape
|
75
|
+
m2rowNumber, m2colNumber = matrix2.shape
|
76
|
+
#puts m1rowNumber, m1colNumber, m2rowNumber, m2colNumber
|
77
|
+
matrix1Weight = self.graphWeights(m1colNumber, m1rowNumber, matrix1.transpose, lambda_value1)
|
78
|
+
matrix2Weight = self.graphWeights(m2colNumber, m2rowNumber, matrix2.transpose, lambda_value2)
|
79
|
+
matrixWeightProduct = Numo::Linalg.dot(matrix1Weight, Numo::Linalg.dot(matrix2, matrix2Weight))
|
80
|
+
finalMatrix = Numo::Linalg.dot(matrix1, matrixWeightProduct)
|
81
|
+
return finalMatrix
|
82
|
+
end
|
83
|
+
|
84
|
+
def self.graphWeights(rowsNumber, colsNumber, inputMatrix, lambdaValue = 0.5)
|
85
|
+
ky = (1.0 / inputMatrix.sum(0)).diag #sum cols
|
86
|
+
weigth = Numo::Linalg.dot(inputMatrix, ky).transpose
|
87
|
+
ky = nil #free memory
|
88
|
+
weigth = Numo::Linalg.dot(inputMatrix, weigth)
|
89
|
+
|
90
|
+
kx = inputMatrix.sum(1) #sum rows
|
91
|
+
|
92
|
+
kx_lamb = kx ** lambdaValue
|
93
|
+
kx_lamb_mat = Numo::DFloat.zeros(rowsNumber, rowsNumber)
|
94
|
+
rowsNumber.times do |j|
|
95
|
+
rowsNumber.times do |i|
|
96
|
+
kx_lamb_mat[j,i] = kx_lamb[i]
|
97
|
+
end
|
98
|
+
end
|
99
|
+
kx_lamb = nil #free memory
|
100
|
+
|
101
|
+
kx_inv_lamb = kx ** (1 - lambdaValue)
|
102
|
+
kx_inv_lamb_mat = Numo::DFloat.zeros(rowsNumber, rowsNumber)
|
103
|
+
rowsNumber.times do |j|
|
104
|
+
rowsNumber.times do |i|
|
105
|
+
kx_inv_lamb_mat[i, j] = kx_inv_lamb[i]
|
106
|
+
end
|
107
|
+
end
|
108
|
+
kx_inv_lamb = nil #free memory
|
109
|
+
|
110
|
+
nx = 1.0/(kx_lamb_mat.inplace * kx_inv_lamb_mat).inplace # inplace marks a matrix to be used by reference, not for value
|
111
|
+
kx_lamb_mat = nil #free memory
|
112
|
+
kx_inv_lamb_mat = nil #free memory
|
113
|
+
weigth.inplace * nx
|
114
|
+
return weigth
|
115
|
+
end
|
116
|
+
|
117
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
require 'expcalc'
|
2
|
+
|
3
|
+
class Net_parser
|
4
|
+
def self.load(options)
|
5
|
+
net = nil
|
6
|
+
if options[:input_format] == 'pair'
|
7
|
+
net = load_network_by_pairs(options[:input_file], options[:layers], options[:split_char])
|
8
|
+
elsif options[:input_format] == 'bin'
|
9
|
+
net = load_network_by_bin_matrix(options[:input_file], options[:node_file], options[:layers])
|
10
|
+
elsif options[:input_format] == 'matrix'
|
11
|
+
net = load_network_by_plain_matrix(options[:input_file], options[:node_file], options[:layers], options[:splitChar])
|
12
|
+
else
|
13
|
+
raise("ERROR: The format #{options[:input_format]} is not defined")
|
14
|
+
end
|
15
|
+
return net
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.load_network_by_pairs(file, layers, split_character="\t")
|
19
|
+
net = Network.new(layers.map{|layer| layer.first})
|
20
|
+
File.open(file).each do |line|
|
21
|
+
line.chomp!
|
22
|
+
pair = line.split(split_character)
|
23
|
+
node1 = pair[0]
|
24
|
+
node2 = pair[1]
|
25
|
+
net.add_node(node1, net.set_layer(layers, node1))
|
26
|
+
net.add_node(node2, net.set_layer(layers, node2))
|
27
|
+
net.add_edge(node1, node2)
|
28
|
+
end
|
29
|
+
return net
|
30
|
+
end
|
31
|
+
|
32
|
+
def self.load_network_by_bin_matrix(input_file, node_file, layers)
|
33
|
+
net = Network.new(layers.map{|layer| layer.first})
|
34
|
+
node_names = load_input_list(node_file)
|
35
|
+
net.adjacency_matrices[layers.map{|l| l.first}] = [Numo::NArray.load(input_file, type='npy'), node_names, node_names]
|
36
|
+
return net
|
37
|
+
end
|
38
|
+
|
39
|
+
def self.load_network_by_plain_matrix(input_file, node_file, layers, splitChar="\t")
|
40
|
+
net = Network.new(layers.map{|layer| layer.first})
|
41
|
+
node_names = load_input_list(node_file)
|
42
|
+
net.adjacency_matrices[layers.map{|l| l.first}] = [Numo::NArray.load(input_file, type='txt', splitChar=splitChar), node_names, node_names]
|
43
|
+
return net
|
44
|
+
end
|
45
|
+
|
46
|
+
private
|
47
|
+
def self.load_input_list(file)
|
48
|
+
return File.open(file).readlines.map!{|line| line.chomp}
|
49
|
+
end
|
50
|
+
end
|