NetAnalyzer 0.1.5 → 0.6.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.rspec +1 -0
- data/Gemfile +4 -0
- data/NetAnalyzer.gemspec +16 -6
- data/README.md +16 -2
- data/Rakefile +13 -9
- data/bin/NetAnalyzer.rb +176 -33
- data/bin/randomize_clustering.rb +121 -0
- data/bin/randomize_network.rb +89 -0
- data/bin/ranker_gene.rb +121 -0
- data/bin/text2binary_matrix.rb +308 -0
- data/lib/NetAnalyzer/adv_mat_calc.rb +117 -0
- data/lib/NetAnalyzer/net_parser.rb +50 -0
- data/lib/NetAnalyzer/net_plotter.rb +145 -0
- data/lib/NetAnalyzer/network.rb +723 -249
- data/lib/NetAnalyzer/nodes.rb +15 -0
- data/lib/NetAnalyzer/performancer.rb +98 -0
- data/lib/NetAnalyzer/ranker.rb +250 -0
- data/lib/NetAnalyzer/templates/ElGrapho.min.js +28 -0
- data/lib/NetAnalyzer/templates/cytoscape.erb +65 -0
- data/lib/NetAnalyzer/templates/cytoscape.min.js +32 -0
- data/lib/NetAnalyzer/templates/el_grapho.erb +89 -0
- data/lib/NetAnalyzer/templates/pako.min.js +1 -0
- data/lib/NetAnalyzer/templates/sigma.erb +132 -0
- data/lib/NetAnalyzer/version.rb +1 -1
- data/lib/NetAnalyzer.rb +7 -0
- metadata +187 -29
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 1c71fe9b15d48d8ce8c76d2aec32a60ef2ede83686f49d18b09a8dc7204f07a9
|
4
|
+
data.tar.gz: 697edf91abebe9cbf600b65838dbaa9f0761342bb6f89bcbfadfab270e4ad1ce
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: eee16b18357c98e62ea4d1799991efb878ad8f8385d50a45a5f89154e3a36829e03768e560b8821dc0e0b7d10b64c2efee3bf3dadf13efb6adc7177f96e41e82
|
7
|
+
data.tar.gz: 1b2b19082a15a60df2585a99d6e3253eef98682398d6969aaa3e584ed8c425147f2389a668be3439474562b2da5100ec4d91f2790b9bce11b9bbbbf868251fb0
|
data/.rspec
CHANGED
data/Gemfile
CHANGED
@@ -2,3 +2,7 @@ source 'https://rubygems.org'
|
|
2
2
|
|
3
3
|
# Specify your gem's dependencies in NetAnalyzer.gemspec
|
4
4
|
gemspec
|
5
|
+
semtools_dev_path = File.expand_path('~/dev_gems/semtools')
|
6
|
+
gem "semtools", github: "seoanezonjic/semtools", branch: "master" if Dir.exist?(semtools_dev_path)
|
7
|
+
expcalc_dev_path = File.expand_path('~/dev_gems/expcalc')
|
8
|
+
gem "expcalc", github: "seoanezonjic/expcalc", branch: "master" if Dir.exist?(expcalc_dev_path)
|
data/NetAnalyzer.gemspec
CHANGED
@@ -10,7 +10,7 @@ Gem::Specification.new do |spec|
|
|
10
10
|
spec.email = ["elenarojano@uma.es, seoanezonjic@hotmail.com"]
|
11
11
|
|
12
12
|
spec.summary = %q{Network analysis tool that calculate and validate different association indices.}
|
13
|
-
spec.description = %q{NetAnalyzer is a useful network analysis tool developed in Ruby that can 1) analyse any type of unweighted network, regardless of the number of layers, 2) calculate the relationship between different layers, using various association indices (Jaccard, Simpson, PCC, geometric, cosine and hypergeometric) and 3) validate the results}
|
13
|
+
spec.description = %q{DEPRECATED PROJECT. MIGRATED TO PYTHON: https://github.com/seoanezonjic/NetAnalyzer. NetAnalyzer is a useful network analysis tool developed in Ruby that can 1) analyse any type of unweighted network, regardless of the number of layers, 2) calculate the relationship between different layers, using various association indices (Jaccard, Simpson, PCC, geometric, cosine and hypergeometric) and 3) validate the results}
|
14
14
|
spec.homepage = "https://github.com/ElenaRojano/NetAnalyzer"
|
15
15
|
spec.license = "MIT"
|
16
16
|
|
@@ -19,9 +19,19 @@ Gem::Specification.new do |spec|
|
|
19
19
|
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
20
20
|
spec.require_paths = ["lib"]
|
21
21
|
|
22
|
-
spec.add_development_dependency "
|
23
|
-
spec.add_development_dependency "
|
24
|
-
spec.add_development_dependency "
|
25
|
-
spec.add_dependency "
|
26
|
-
spec.add_dependency "
|
22
|
+
spec.add_development_dependency "rake", ">= 13.0.3"
|
23
|
+
spec.add_development_dependency "rspec"
|
24
|
+
spec.add_development_dependency "minitest"
|
25
|
+
spec.add_dependency "cmath", ">= 1.0.0"
|
26
|
+
spec.add_dependency "numo-linalg", ">= 0.1.5"
|
27
|
+
spec.add_dependency "numo-narray", ">= 0.9.1.9"
|
28
|
+
spec.add_dependency "pp", ">= 0.1.0"
|
29
|
+
spec.add_dependency "npy", ">= 0.2.0"
|
30
|
+
spec.add_dependency "bigdecimal", ">= 3.0.0"
|
31
|
+
spec.add_dependency "gv", ">= 0.1.0"
|
32
|
+
spec.add_dependency "semtools", ">= 0.1.1"
|
33
|
+
spec.add_dependency "expcalc"
|
34
|
+
spec.add_dependency "parallel"
|
35
|
+
spec.add_dependency "rubystats"
|
36
|
+
spec.add_dependency "red-colors"
|
27
37
|
end
|
data/README.md
CHANGED
@@ -1,8 +1,14 @@
|
|
1
1
|
# NetAnalyzer
|
2
2
|
|
3
|
+
DEPRECATED PROJECT. MIGRATED TO [python semtools](https://github.com/seoanezonjic/NetAnalyzer)
|
4
|
+
|
3
5
|
NetAnalyzer is a network analysis tool that can be used to calculate the associations between nodes in unweighted n-partite networks [1]. The calculation of the association between nodes is based on similarity indices (Jaccard, Simpson, geometric and cosine), statistic-based (Pearson correlation coefficient, CSI and hypergeometric) [2] and a special metric designed only for tripartite networks (here called as 'transference' method [3]). The user can choose the association index method according to the network to analyse. The tool gives a table of results, with all the associations between nodes and the association value calculated.
|
6
|
+
|
7
|
+
If you use this tool, please cite us: [1] E. Rojano, P. Seoane, A. Bueno, J. R. Perkins & J. A. G. Ranea. Revealing the Relationship Between Human Genome Regions and Pathological Phenotypes Through Network Analysis. Lecture Notes in Computer Science, Vol 10208, 197-207 (2017).
|
4
8
|
|
5
|
-
|
9
|
+
[2] Fuxman-Bass et al. Using networks to measure similarity between genes: association index selection. Nature Methods, 10(12):1169-76. 2013.
|
10
|
+
|
11
|
+
[3] Alaimo et al. ncPred: ncRNA-Disease Association Prediction through Tripartite Network-Based Inference. Frontiers in Bioengineering and Biotechnology, 2:71, 2014.
|
6
12
|
|
7
13
|
## Installation
|
8
14
|
|
@@ -32,7 +38,6 @@ Once nmatrix gem is installed:
|
|
32
38
|
gem install 'NetAnalyzer'
|
33
39
|
```
|
34
40
|
|
35
|
-
|
36
41
|
## Usage
|
37
42
|
|
38
43
|
The program NetAnalyzer.rb can analyse an unweighted network to calculate the association index between different nodes.
|
@@ -47,9 +52,18 @@ Where:
|
|
47
52
|
-i: Input file with the network to analyse. It must have two columns (separated by default by tabs) that represents the nodes that are related (NodeA\tNodeB). Please if you have doubts about the format, check the example providen.
|
48
53
|
-l: Layers construction. Please consider that, depending on the n-partite network you provide, NetAnalyzer will transform it into a bipartite one to perform the analysis (excepting if the association method used is 'transference'). The layers must contain a identifier of the node, and a character or pattern to identify. In this example, the bipartite network has HPO terms (with 'HP:' string in each of them) and patients that have these HPO terms (they are given as numerical patient IDs). Both layers must be separated by ';'.
|
49
54
|
-m: Association method. There are 8 different association methods to choose: 'jaccard', 'cosine', 'pcc', 'csi', 'hypergeometric', 'simpson', 'geometric' and 'transference'.
|
55
|
+
-u: Set which layer will be the one that establish connections between nodes in the other layer. In this case, we will get with patient is associated to other patient because the HPO they share.
|
50
56
|
-a: Associations output file name. Here you can find the associations between nodes in the network and the calculated association value, according to the chosen method.
|
51
57
|
```
|
52
58
|
|
59
|
+
Optional flags:
|
60
|
+
|
61
|
+
```
|
62
|
+
-s: Split character. Change if the layers of the network are not separated by tabs.
|
63
|
+
-o: Output file name.
|
64
|
+
|
65
|
+
```
|
66
|
+
|
53
67
|
## Development
|
54
68
|
|
55
69
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
data/Rakefile
CHANGED
@@ -1,13 +1,17 @@
|
|
1
1
|
require "bundler/gem_tasks"
|
2
|
-
|
2
|
+
require "rake/testtask"
|
3
|
+
require 'rdoc/task'
|
3
4
|
|
4
|
-
|
5
|
+
Rake::TestTask.new(:test) do |t|
|
6
|
+
t.libs << "test"
|
7
|
+
t.libs << "lib"
|
8
|
+
t.test_files = FileList["test/**/*_test.rb"]
|
9
|
+
end
|
5
10
|
|
6
|
-
|
11
|
+
RDoc::Task.new do |rdoc|
|
12
|
+
rdoc.main = "README.doc"
|
13
|
+
rdoc.rdoc_files.include("README.md", "lib/*.rb", "lib/NetAnalyzer/*.rb")
|
14
|
+
rdoc.options << "--all"
|
15
|
+
end
|
7
16
|
|
8
|
-
|
9
|
-
|
10
|
-
Rake::TestTask.new do |t|
|
11
|
-
t.libs << 'test'
|
12
|
-
t.pattern = "test/*_test.rb"
|
13
|
-
end
|
17
|
+
task :default => :test
|
data/bin/NetAnalyzer.rb
CHANGED
@@ -1,11 +1,21 @@
|
|
1
1
|
#! /usr/bin/env ruby
|
2
2
|
|
3
3
|
ROOT_PATH = File.dirname(__FILE__)
|
4
|
-
|
5
|
-
$: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'NetAnalyzer', 'methods'))
|
6
|
-
|
7
|
-
require 'network'
|
4
|
+
$LOAD_PATH.unshift(File.expand_path(File.join(ROOT_PATH, '..', 'lib')))
|
8
5
|
require 'optparse'
|
6
|
+
require 'benchmark'
|
7
|
+
require 'NetAnalyzer'
|
8
|
+
|
9
|
+
######################################
|
10
|
+
## METHODS
|
11
|
+
######################################
|
12
|
+
def load_file(path)
|
13
|
+
data = []
|
14
|
+
File.open(path).each do |line|
|
15
|
+
data << line.chomp.split("\t")
|
16
|
+
end
|
17
|
+
return data
|
18
|
+
end
|
9
19
|
|
10
20
|
##############################
|
11
21
|
#OPTPARSE
|
@@ -20,11 +30,26 @@ OptionParser.new do |opts|
|
|
20
30
|
options[:input_file] = input_file
|
21
31
|
end
|
22
32
|
|
33
|
+
options[:node_file] = nil
|
34
|
+
opts.on("-n", "--node_names_file PATH", "File with node names corresponding to the input matrix, only use when -i is set to bin or matrix.") do |node_file|
|
35
|
+
options[:node_file] = node_file
|
36
|
+
end
|
37
|
+
|
38
|
+
options[:input_format] = 'pair'
|
39
|
+
opts.on("-f", "--input_format STRING", "Input file format: pair (default), bin, matrix") do |input_format|
|
40
|
+
options[:input_format] = input_format
|
41
|
+
end
|
42
|
+
|
23
43
|
options[:split_char] = "\t"
|
24
44
|
opts.on("-s", "--split_char STRING", "Character for splitting input file. Default: tab") do |split_char|
|
25
45
|
options[:split_char] = split_char
|
26
46
|
end
|
27
47
|
|
48
|
+
options[:use_pairs] = :conn
|
49
|
+
opts.on("-P", "--use_pairs STRING", "Which pairs must be computed. 'all' means all posible pair node combinations and 'conn' means the pair are truly connected in the network. Default 'conn' ") do |use_pairs|
|
50
|
+
options[:use_pairs] = use_pairs.to_sym
|
51
|
+
end
|
52
|
+
|
28
53
|
options[:output_file] = "network2plot"
|
29
54
|
opts.on("-o", "--output_file PATH", "Output file name") do |output_file|
|
30
55
|
options[:output_file] = output_file
|
@@ -35,6 +60,11 @@ OptionParser.new do |opts|
|
|
35
60
|
options[:assoc_file] = output_file
|
36
61
|
end
|
37
62
|
|
63
|
+
options[:kernel_file] = "kernel_values"
|
64
|
+
opts.on("-K", "--kernel_file PATH", "Output file name for kernel values") do |output_file|
|
65
|
+
options[:kernel_file] = output_file
|
66
|
+
end
|
67
|
+
|
38
68
|
options[:performance_file] = "perf_values.txt"
|
39
69
|
opts.on("-p", "--performance_file PATH", "Output file name for performance values") do |output_file|
|
40
70
|
options[:performance_file] = output_file
|
@@ -42,8 +72,8 @@ OptionParser.new do |opts|
|
|
42
72
|
|
43
73
|
options[:layers] = [:layer, '-']
|
44
74
|
opts.on("-l", "--layers STRING", "Layer definition on network: layer1name,regexp1;layer2name,regexp2...") do |layers|
|
45
|
-
|
46
|
-
|
75
|
+
layers_definition = layers.split(";").map{|layer_attr| layer_attr.split(',')}
|
76
|
+
layers_definition.map!{|layer_attr| [layer_attr.first.to_sym, /#{layer_attr.last}/]}
|
47
77
|
options[:layers] = layers_definition
|
48
78
|
end
|
49
79
|
|
@@ -62,28 +92,118 @@ OptionParser.new do |opts|
|
|
62
92
|
options[:output_style] = output_style
|
63
93
|
end
|
64
94
|
|
95
|
+
options[:ontologies] = []
|
96
|
+
opts.on("-O", "--ontology STRING", "String that define which ontologies must be used with each layer. String definition:'layer_name1:path_to_obo_file1;layer_name2:path_to_obo_file2'") do |ontologies|
|
97
|
+
options[:ontologies] = ontologies.split(';').map{|pair| pair.split(':')}
|
98
|
+
end
|
99
|
+
|
65
100
|
options[:meth] = nil
|
66
101
|
opts.on("-m", "--association_method STRING", "Association method to use on network") do |meth|
|
67
102
|
options[:meth] = meth.to_sym
|
68
103
|
end
|
69
104
|
|
70
|
-
options[:
|
105
|
+
options[:kernel] = nil
|
106
|
+
opts.on("-k", "--kernel_method STRING", "Kernel operation to perform with the adjacency matrix") do |kernel|
|
107
|
+
options[:kernel] = kernel
|
108
|
+
end
|
109
|
+
|
110
|
+
options[:no_autorelations] = false
|
71
111
|
opts.on("-N", "--no_autorelations", "Remove association values between nodes os same type") do
|
72
|
-
options[:no_autorelations] =
|
112
|
+
options[:no_autorelations] = true
|
73
113
|
end
|
74
114
|
|
75
|
-
|
115
|
+
options[:normalize_kernel] = false
|
116
|
+
opts.on("-z", "--normalize_kernel_values", "Apply cosine normalization to the obtained kernel") do
|
117
|
+
options[:normalize_kernel] = true
|
118
|
+
end
|
119
|
+
|
120
|
+
options[:graph_file] = nil
|
121
|
+
opts.on("-g", "--graph_file PATH", "Build a graphic representation of the network") do |item|
|
122
|
+
options[:graph_file] = item
|
123
|
+
end
|
124
|
+
|
125
|
+
options[:graph_options] = {method: 'el_grapho', layout: 'forcedir', steps: '30'}
|
126
|
+
opts.on("--graph_options STRING", "Set graph parameters as 'NAME1=value1,NAME2=value2,...") do |item|
|
127
|
+
options[:graph_options] = {}
|
128
|
+
item.split(',').each do |pair|
|
129
|
+
fields = pair.split('=')
|
130
|
+
options[:graph_options][fields.first.to_sym] = fields.last
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
options[:threads] = 0
|
135
|
+
opts.on( '-T', '--threads INTEGER', 'Number of threads to use in computation, one thread will be reserved as manager.' ) do |opt|
|
136
|
+
options[:threads] = opt.to_i - 1
|
137
|
+
end
|
138
|
+
|
139
|
+
options[:reference_nodes] = []
|
140
|
+
opts.on("-r", "--reference_nodes STRING", "Node ids comma separared") do |item|
|
141
|
+
options[:reference_nodes] = item.split(',')
|
142
|
+
end
|
76
143
|
|
144
|
+
options[:group_nodes] = {}
|
145
|
+
opts.on("-G", "--group_nodes STRING", "File path or groups separated by ';' and group node ids comma separared") do |item|
|
146
|
+
if File.exists?(item)
|
147
|
+
File.open(item).each do |line|
|
148
|
+
groupID, nodeID = line.chomp.split("\t")
|
149
|
+
query = options[:group_nodes][groupID]
|
150
|
+
query.nil? ? options[:group_nodes][groupID] = [nodeID] : query << nodeID
|
151
|
+
end
|
152
|
+
else
|
153
|
+
item.split(';').each_with_index do |group, i|
|
154
|
+
options[:group_nodes][i] = group.split(',')
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
options[:group_metrics] = false
|
160
|
+
opts.on("-M", "--group_metrics", "Perform group group_metrics") do
|
161
|
+
options[:group_metrics] = true
|
162
|
+
end
|
163
|
+
|
164
|
+
options[:expand_clusters] = nil
|
165
|
+
opts.on("-x", "--expand_clusters STRING", "Method to expand clusters Available methods: sht_path") do |item|
|
166
|
+
options[:expand_clusters] = item
|
167
|
+
end
|
168
|
+
|
169
|
+
options[:get_attributes] = []
|
170
|
+
opts.on("-A", "--attributes STRING", "String separadted by commas with the name of network attribute") do |item|
|
171
|
+
options[:get_attributes] = item.split(',')
|
172
|
+
end
|
173
|
+
|
174
|
+
options[:delete_nodes] = []
|
175
|
+
opts.on("-d", "--delete PATH", "Remove nodes from file. If PATH;r then nodes not included in file are removed") do |item|
|
176
|
+
options[:delete_nodes] = item.split(';')
|
177
|
+
end
|
178
|
+
end.parse!
|
77
179
|
##########################
|
78
180
|
#MAIN
|
79
181
|
##########################
|
80
|
-
|
81
|
-
fullNet = Network.new(options[:layers].map{|layer| layer.first})
|
82
|
-
#puts options[:layers].map{|layer| layer.first}.inspect
|
83
182
|
puts "Loading network data"
|
84
|
-
fullNet.
|
183
|
+
fullNet = Net_parser.load(options)
|
184
|
+
fullNet.reference_nodes = options[:reference_nodes]
|
185
|
+
fullNet.threads = options[:threads]
|
186
|
+
fullNet.group_nodes = options[:group_nodes]
|
187
|
+
fullNet.set_compute_pairs(options[:use_pairs], !options[:no_autorelations])
|
188
|
+
|
189
|
+
if !options[:delete_nodes].empty?
|
190
|
+
node_list = load_file(options[:delete_nodes].first).flatten
|
191
|
+
options[:delete_nodes].length > 1 ? mode = options[:delete_nodes][1] : 'd'
|
192
|
+
fullNet.delete_nodes(node_list, mode)
|
193
|
+
end
|
85
194
|
|
86
|
-
|
195
|
+
options[:ontologies].each do |layer_name, ontology_file_path|
|
196
|
+
fullNet.link_ontology(ontology_file_path, layer_name.to_sym)
|
197
|
+
end
|
198
|
+
|
199
|
+
if !options[:get_attributes].empty?
|
200
|
+
node_attributes = fullNet.get_node_attributes(options[:get_attributes])
|
201
|
+
File.open(File.join(File.dirname(options[:output_file]), 'node_attributes.txt'), 'w' ) do |f|
|
202
|
+
node_attributes.each do |attributes|
|
203
|
+
f.puts(attributes.join("\t"))
|
204
|
+
end
|
205
|
+
end
|
206
|
+
end
|
87
207
|
|
88
208
|
if !options[:meth].nil?
|
89
209
|
puts "Performing association method #{options[:meth]} on network"
|
@@ -100,31 +220,54 @@ if !options[:meth].nil?
|
|
100
220
|
options[:use_layers][1].first,
|
101
221
|
options[:meth])
|
102
222
|
end
|
103
|
-
puts 'Clean autorelations' if options[:no_autorelations]
|
104
|
-
fullNet.clean_autorelations_on_association_values if options[:no_autorelations]
|
105
223
|
File.open(options[:assoc_file], 'w') do |f|
|
106
224
|
fullNet.association_values[options[:meth]].each do |val|
|
107
225
|
f.puts val.join("\t")
|
108
226
|
end
|
109
227
|
end
|
228
|
+
if !options[:control_file].nil?
|
229
|
+
puts "Doing validation on association values obtained from method #{options[:meth]}"
|
230
|
+
control = []
|
231
|
+
File.open(options[:control_file]).each("\n") do |line|
|
232
|
+
line.chomp!
|
233
|
+
control << line.split("\t")
|
234
|
+
end
|
235
|
+
Performancer.load_control(control)
|
236
|
+
predictions = fullNet.association_values[options[:meth]]
|
237
|
+
performance = Performancer.get_pred_rec(predictions)
|
238
|
+
File.open(options[:performance_file], 'w') do |f|
|
239
|
+
f.puts %w[cut prec rec meth].join("\t")
|
240
|
+
performance.each do |item|
|
241
|
+
item << options[:meth].to_s
|
242
|
+
f.puts item.join("\t")
|
243
|
+
end
|
244
|
+
end
|
245
|
+
end
|
246
|
+
puts "End of analysis: #{options[:meth]}"
|
110
247
|
end
|
111
248
|
|
112
|
-
if !options[:
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
File.open(options[:performance_file], 'w') do |f|
|
122
|
-
f.puts %w[cut prec rec meth].join("\t")
|
123
|
-
performance.each do |item|
|
124
|
-
item << options[:meth].to_s
|
125
|
-
f.puts item.join("\t")
|
126
|
-
end
|
127
|
-
end
|
249
|
+
if !options[:kernel].nil?
|
250
|
+
layer2kernel = options[:use_layers].first # we use only a layer to perform the kernel, so only one item it is selected.
|
251
|
+
fullNet.get_kernel(layer2kernel, options[:kernel], options[:normalize_kernel])
|
252
|
+
fullNet.write_kernel(layer2kernel, options[:kernel_file])
|
253
|
+
end
|
254
|
+
|
255
|
+
if !options[:graph_file].nil?
|
256
|
+
options[:graph_options][:output_file] = options[:graph_file]
|
257
|
+
fullNet.plot_network(options[:graph_options])
|
128
258
|
end
|
129
259
|
|
130
|
-
|
260
|
+
if options[:group_metrics]
|
261
|
+
fullNet.compute_group_metrics(File.join(File.dirname(options[:output_file]), 'group_metrics.txt'))
|
262
|
+
end
|
263
|
+
|
264
|
+
if !options[:expand_clusters].nil?
|
265
|
+
expanded_clusters = fullNet.expand_clusters(options[:expand_clusters])
|
266
|
+
File.open(File.join(File.dirname(options[:output_file]), 'expand_clusters.txt'), 'w' ) do |f|
|
267
|
+
expanded_clusters.each do |cl_id, nodes|
|
268
|
+
nodes.each do |node|
|
269
|
+
f.puts "#{cl_id}\t#{node}"
|
270
|
+
end
|
271
|
+
end
|
272
|
+
end
|
273
|
+
end
|
@@ -0,0 +1,121 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'optparse'
|
4
|
+
|
5
|
+
##############################
|
6
|
+
#FUNCTIONS
|
7
|
+
##############################
|
8
|
+
|
9
|
+
|
10
|
+
def load_clusters(options)
|
11
|
+
clusters = {}
|
12
|
+
File.open(options[:input_file]).each do |line|
|
13
|
+
line = line.chomp.split(options[:column_sep])
|
14
|
+
cluster = line[options[:cluster_index]]
|
15
|
+
clusters[cluster] = [] if clusters[cluster].nil?
|
16
|
+
node = line[options[:node_index]]
|
17
|
+
node = node.split(options[:node_sep]) if !options[:node_sep].nil?
|
18
|
+
clusters[cluster] << node
|
19
|
+
clusters[cluster].flatten!
|
20
|
+
end
|
21
|
+
return clusters
|
22
|
+
end
|
23
|
+
|
24
|
+
|
25
|
+
def random_sample(nodes, replacement, all_sizes, seed)
|
26
|
+
random_clusters = {}
|
27
|
+
nodes_list = nodes.dup
|
28
|
+
all_sizes.each_with_index do |cluster_size, counter|
|
29
|
+
abort("Not enough nodes to generate clusters. Please activate replacement or change random mode") if cluster_size > nodes_list.size
|
30
|
+
random_nodes = nodes_list.uniq.sample(cluster_size, random: Random.new(seed))
|
31
|
+
if !replacement
|
32
|
+
nodes_list = nodes_list - random_nodes
|
33
|
+
end
|
34
|
+
random_clusters["#{counter}_random"] = random_nodes
|
35
|
+
seed += 1
|
36
|
+
end
|
37
|
+
return random_clusters
|
38
|
+
end
|
39
|
+
|
40
|
+
def write_clusters(clusters, output_file, sep)
|
41
|
+
File.open(output_file, 'w') do |outfile|
|
42
|
+
clusters.each do |cluster, nodes|
|
43
|
+
nodes = [nodes.join(sep)] if !sep.nil?
|
44
|
+
nodes.each do |node|
|
45
|
+
outfile.puts [cluster, node].flatten.join("\t")
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
##############################
|
52
|
+
#OPTPARSE
|
53
|
+
##############################
|
54
|
+
|
55
|
+
options = {}
|
56
|
+
OptionParser.new do |opts|
|
57
|
+
opts.banner = "Usage: #{__FILE__} [options]"
|
58
|
+
|
59
|
+
options[:input_file] = nil
|
60
|
+
opts.on("-i", "--input_file PATH", "Input file") do |input_file|
|
61
|
+
options[:input_file] = input_file
|
62
|
+
end
|
63
|
+
|
64
|
+
options[:node_index] = 1
|
65
|
+
opts.on("-N", "--node_column INTEGER", "Number of the nodes column. Default = #{options[:node_index]}") do |node_i|
|
66
|
+
options[:node_index] = node_i.to_i - 1
|
67
|
+
end
|
68
|
+
|
69
|
+
options[:cluster_index] = 0
|
70
|
+
opts.on("-C", "--cluster_column INTEGER", "Number of the clusters column. Default = #{options[:cluster_index]}" ) do |cluster_i|
|
71
|
+
options[:cluster_index] = cluster_i.to_i - 1
|
72
|
+
end
|
73
|
+
|
74
|
+
options[:column_sep] = "\t"
|
75
|
+
opts.on("-S", "--split_char CHARACTER", "Character for splitting input file. Default: tab") do |split_char|
|
76
|
+
options[:column_sep] = split_char
|
77
|
+
end
|
78
|
+
|
79
|
+
options[:node_sep] = nil
|
80
|
+
opts.on("-s", "--node_sep CHARACTER", "Node split character. This option must to be used when input file is aggregated.") do |split_char|
|
81
|
+
options[:node_sep] = split_char
|
82
|
+
end
|
83
|
+
|
84
|
+
options[:random_type] = ["size"]
|
85
|
+
opts.on("-r", "--random_type STRING", "Indicate random mode. 'size' for radomize clusters with the same size as input clusters. 'full_size' same as 'size' but all nodes are repaeted as same as input. 'fixed:n:s' for generate 'n' clusters of 's' nodes. Default = #{options[:output_file]}") do |random_type|
|
86
|
+
options[:random_type] = random_type.split(":")
|
87
|
+
end
|
88
|
+
|
89
|
+
options[:replacement] = false
|
90
|
+
opts.on("-R", "--replacement", "Boolean. Activates ramdom sampling with replacement. Sampling witout replacement will be executed instead.") do
|
91
|
+
options[:replacement] = true
|
92
|
+
end
|
93
|
+
|
94
|
+
options[:output_file] = "./random_clusters.txt"
|
95
|
+
opts.on("-o", "--output_file FILEPATH", "Output file") do |output_file|
|
96
|
+
options[:output_file] = output_file
|
97
|
+
end
|
98
|
+
|
99
|
+
options[:aggregate_sep] = nil
|
100
|
+
opts.on("-a", "--aggregate_sep CHARACTER", "This option activates aggregation in output. Separator character must be provided") do |split_char|
|
101
|
+
options[:aggregate_sep] = split_char
|
102
|
+
end
|
103
|
+
|
104
|
+
end.parse!
|
105
|
+
##########################
|
106
|
+
#MAIN
|
107
|
+
##########################
|
108
|
+
|
109
|
+
clusters = load_clusters(options)
|
110
|
+
|
111
|
+
nodes = clusters.values.flatten
|
112
|
+
nodes = nodes.uniq if !options[:random_type][0] == "full_size"
|
113
|
+
|
114
|
+
if options[:random_type][0].include?("size") && options[:random_type].size == 1
|
115
|
+
all_sizes = clusters.map{|cluster, nodes| nodes.size}
|
116
|
+
elsif options[:random_type][0] == "fixed" && options[:random_type].size == 3
|
117
|
+
all_sizes = Array.new(options[:random_type][1].to_i, options[:random_type][2].to_i)
|
118
|
+
end
|
119
|
+
|
120
|
+
random_clusters = random_sample(nodes, options[:replacement], all_sizes, 123)
|
121
|
+
write_clusters(random_clusters, options[:output_file], options[:aggregate_sep])
|
@@ -0,0 +1,89 @@
|
|
1
|
+
#! /usr/bin/env ruby
|
2
|
+
|
3
|
+
ROOT_PATH = File.dirname(__FILE__)
|
4
|
+
$LOAD_PATH.unshift(File.expand_path(File.join(ROOT_PATH, '..', 'lib')))
|
5
|
+
require 'optparse'
|
6
|
+
require 'benchmark'
|
7
|
+
require 'NetAnalyzer'
|
8
|
+
|
9
|
+
|
10
|
+
|
11
|
+
##############################
|
12
|
+
#OPTPARSE
|
13
|
+
##############################
|
14
|
+
|
15
|
+
options = {}
|
16
|
+
OptionParser.new do |opts|
|
17
|
+
opts.banner = "Usage: #{__FILE__} [options]"
|
18
|
+
|
19
|
+
options[:input_file] = nil
|
20
|
+
opts.on("-i", "--input_file PATH", "Input file") do |input_file|
|
21
|
+
options[:input_file] = input_file
|
22
|
+
end
|
23
|
+
|
24
|
+
options[:node_file] = nil
|
25
|
+
opts.on("-n", "--node_names_file PATH", "File with node names corresponding to the input matrix, only use when -i is set to bin or matrix.") do |node_file|
|
26
|
+
options[:node_file] = node_file
|
27
|
+
end
|
28
|
+
|
29
|
+
options[:input_format] = 'pair'
|
30
|
+
opts.on("-f", "--input_format STRING", "Input file format: pair (default), bin, matrix") do |input_format|
|
31
|
+
options[:input_format] = input_format
|
32
|
+
end
|
33
|
+
|
34
|
+
options[:split_char] = "\t"
|
35
|
+
opts.on("-s", "--split_char CHARACTER", "Character for splitting input file. Default: tab") do |split_char|
|
36
|
+
options[:split_char] = split_char
|
37
|
+
end
|
38
|
+
|
39
|
+
options[:layers] = [:layer, '-']
|
40
|
+
opts.on("-l", "--layers STRING", "Layer definition on network: layer1name,regexp1;layer2name,regexp2...") do |layers|
|
41
|
+
layers_definition = layers.split(";").map{|layer_attr| layer_attr.split(',')}
|
42
|
+
layers_definition.map!{|layer_attr| [layer_attr.first.to_sym, /#{layer_attr.last}/]}
|
43
|
+
options[:layers] = layers_definition
|
44
|
+
end
|
45
|
+
|
46
|
+
options[:type_random] = nil
|
47
|
+
opts.on("-r", "--type_random network", "Randomized basis. 'nodes' Node-baseds randomize or 'links' Links-baseds randomize") do |type_random|
|
48
|
+
options[:type_random] = type_random
|
49
|
+
end
|
50
|
+
|
51
|
+
options[:output_file] = nil
|
52
|
+
opts.on("-o", "--output_file FILEPATH", "Output file") do |output_file|
|
53
|
+
options[:output_file] = output_file
|
54
|
+
end
|
55
|
+
|
56
|
+
end.parse!
|
57
|
+
|
58
|
+
|
59
|
+
##########################
|
60
|
+
#MAIN
|
61
|
+
##########################
|
62
|
+
fullNet = Network.new(options[:layers].map{|layer| layer.first})
|
63
|
+
puts "Loading network data"
|
64
|
+
|
65
|
+
if options[:layers].length == 1
|
66
|
+
layerA = layerB = options[:layers][0].first
|
67
|
+
elsif options[:layers].length == 2
|
68
|
+
layerA = options[:layers][0].first
|
69
|
+
layerB = options[:layers][1].first
|
70
|
+
end
|
71
|
+
|
72
|
+
if options[:input_format] == 'pair'
|
73
|
+
fullNet.load_network_by_pairs(options[:input_file], options[:layers], options[:split_char])
|
74
|
+
elsif options[:input_format] == 'bin' && !options[:node_file].nil?
|
75
|
+
fullNet.load_network_by_bin_matrix(options[:input_file], options[:node_file], options[:layers])
|
76
|
+
elsif options[:input_format] == 'matrix' && !options[:node_file].nil?
|
77
|
+
fullNet.load_network_by_plain_matrix(options[:input_file], options[:node_file], options[:layers], options[:splitChar])
|
78
|
+
else
|
79
|
+
raise("ERROR: The format #{options[:input_format]} is not defined")
|
80
|
+
exit
|
81
|
+
end
|
82
|
+
|
83
|
+
|
84
|
+
fullNet.randomize_network(options[:type_random])
|
85
|
+
|
86
|
+
|
87
|
+
#fullNet.save_adjacency_matrix(layerA, layerB, options[:output_file])
|
88
|
+
|
89
|
+
|