DomFun 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 87063a5c0ddf9a988af77b9aaa73e2c7c676f2ad
4
+ data.tar.gz: 725fcefa7c7f464526410e1dcc7f50f6a19adf8f
5
+ SHA512:
6
+ metadata.gz: 13b5059cf7978c2c8fe8583da5118e46725287f5d90658e7f2574ded9eebf8988d61203fe8c576371ded87f93c64d5ef7de18523c1db72ea0180dde8b9f5b881
7
+ data.tar.gz: 47a817f6bdffa4efce7624199125afa94ea30d4cdc8e0b2d13f48dd5c6cf25af4e0af0c344988ea3f0ae14a8f8d043168c266d8c72f303267ca45344e315d9aa
data/.gitignore ADDED
@@ -0,0 +1,11 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /tmp/
9
+
10
+ # rspec failure tracking
11
+ .rspec_status
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format documentation
2
+ --color
3
+ --require spec_helper
data/.travis.yml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ sudo: false
3
+ language: ruby
4
+ cache: bundler
5
+ rvm:
6
+ - 2.4.1
7
+ before_install: gem install bundler -v 2.0.1
data/DomFun.gemspec ADDED
@@ -0,0 +1,44 @@
1
+
2
+ lib = File.expand_path("../lib", __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require "DomFun/version"
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "DomFun"
8
+ spec.version = DomFun::VERSION
9
+ spec.authors = ["Elena Rojano, Pedro Seoane"]
10
+ spec.email = ["elenarojano@uma.es, seoanezonjic@hotmail.com"]
11
+
12
+ spec.summary = %q{Tool to predict protein functions based on domains-FunSys associations.}
13
+ spec.description = %q{From associations calculated between protein domains and functional systems (FunSys), DomFun can predict the functions of proteins looking up domains and the FunSys that have been associated with. The system is validated using data from CAFA.}
14
+ spec.homepage = "https://github.com/ElenaRojano/DomFun"
15
+ spec.license = "MIT"
16
+
17
+ # Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
18
+ # to allow pushing to a single host or delete this section to allow pushing to any host.
19
+ # if spec.respond_to?(:metadata)
20
+ # spec.metadata["allowed_push_host"] = "TODO: Set to 'http://mygemserver.com'"
21
+
22
+ # spec.metadata["homepage_uri"] = spec.homepage
23
+ # spec.metadata["source_code_uri"] = "TODO: Put your gem's public repo URL here."
24
+ # spec.metadata["changelog_uri"] = "TODO: Put your gem's CHANGELOG.md URL here."
25
+ # else
26
+ # raise "RubyGems 2.0 or newer is required to protect against " \
27
+ # "public gem pushes."
28
+ # end
29
+
30
+ # Specify which files should be added to the gem when it is released.
31
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
32
+ spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
33
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
34
+ end
35
+ spec.bindir = "exe"
36
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
37
+ spec.require_paths = ["lib"]
38
+
39
+ spec.add_development_dependency "bundler", "~> 2.0"
40
+ spec.add_development_dependency "rake", "~> 10.0"
41
+ spec.add_development_dependency "rspec", "~> 3.0"
42
+
43
+ spec.add_dependency "NetAnalyzer", "~> 0.1.5"
44
+ end
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "https://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in DomFun.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2019 elenarojano
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,39 @@
1
+ # DomFun
2
+
3
+ Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/DomFun`. To experiment with that code, run `bin/console` for an interactive prompt.
4
+
5
+ TODO: Delete this and the text above, and describe your gem
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application's Gemfile:
10
+
11
+ ```ruby
12
+ gem 'DomFun'
13
+ ```
14
+
15
+ And then execute:
16
+
17
+ $ bundle
18
+
19
+ Or install it yourself as:
20
+
21
+ $ gem install DomFun
22
+
23
+ ## Usage
24
+
25
+ TODO: Write usage instructions here
26
+
27
+ ## Development
28
+
29
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
30
+
31
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
32
+
33
+ ## Contributing
34
+
35
+ Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/DomFun.
36
+
37
+ ## License
38
+
39
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
@@ -0,0 +1,133 @@
1
+ #! /usr/bin/env ruby
2
+ ##########################
3
+ # Rojano E. & Seoane P., June 2019
4
+ # Generate tripartite networks with domains-proteins-FunSys data
5
+ # Protein IDs and FunSys (GO-MF, KEGG and Reactome) from UniProtKB.
6
+ # Protein domains (Superfamilies and FunFams) from CATH.
7
+ ##########################
8
+
9
+ REPORT_FOLDER=File.expand_path(File.join(File.dirname(__FILE__), '..', 'templates'))
10
+ ROOT_PATH = File.dirname(__FILE__)
11
+ $: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'DomFun'))
12
+ require 'generalMethods.rb'
13
+ require 'csv'
14
+ require 'optparse'
15
+
16
+ ##########################
17
+ #METHODS
18
+ ##########################
19
+ def build_tripartite_networks(nomenclature_annotations, cath_data, path, protein2gene)
20
+ records = Hash.new(0)
21
+ nomenclature_annotations.each do |nomenclature, protein_annotations|
22
+ annots = []
23
+ datas = []
24
+ protein_annotations.each do |protID, annotations|
25
+ query_cath_data = cath_data[protID]
26
+ if !query_cath_data.nil?
27
+ #gene_ID = protein2gene[protID] unless protein2gene[protID].nil?
28
+
29
+ #gene_ID = protID if gene_ID.nil?
30
+ annotations.each do |annotation|
31
+ #annots << [annotation, gene_ID]
32
+ annots << [annotation, protID]
33
+ end
34
+ query_cath_data.each do |data|
35
+ #datas << [data, gene_ID]
36
+ datas << [data, protID]
37
+ end
38
+ end
39
+ end
40
+ records[nomenclature] += annots.map{|pair| pair.last}.uniq.length
41
+ File.open(File.join(path, "network_#{nomenclature}"), 'w') do |f|
42
+ annots.uniq.each do |pair|
43
+ f.puts pair.join("\t")
44
+ end
45
+ datas.uniq.each do |pair|
46
+ f.puts pair.join("\t")
47
+ end
48
+ end
49
+ end
50
+ return records
51
+ end
52
+
53
+ ##########################
54
+ #OPT-PARSER
55
+ ##########################
56
+ options = {}
57
+ OptionParser.new do |opts|
58
+ opts.banner = "Usage: #{__FILE__} [options]"
59
+
60
+ options[:input_annotations] = nil
61
+ opts.on("-a", "--input_annotations PATH", "Input file with gene annotations") do |data|
62
+ options[:input_annotations] = data
63
+ end
64
+
65
+ options[:calculate_proteins_by_domain] = false
66
+ opts.on("-c", "--calculate_proteins_by_domain", "Calculate the number of proteins that a domain has") do
67
+ options[:calculate_proteins_by_domain] = true
68
+ end
69
+
70
+ options[:input_domains] = nil
71
+ opts.on("-d", "--input_domains PATH", "Input file with protein domains") do |data|
72
+ options[:input_domains] = data
73
+ end
74
+
75
+ options[:search_domain] = true
76
+ opts.on("-f", "--search_domain", "Search full protein domains. If false, search funfams") do
77
+ options[:search_domain] = false
78
+ end
79
+
80
+ options[:annotation_types] = %w[ kegg reactome go]
81
+ opts.on("-p", "--annotation_types STRING", "List of annotation types separated by commas") do |data|
82
+ options[:annotation_types] = data.split(",")
83
+ end
84
+
85
+ options[:output_stats] = 'uniprot_stats.txt'
86
+ opts.on("-s", "--output_stats PATH", "Output file with UniProt stats") do |data|
87
+ options[:output_stats] = data
88
+ end
89
+
90
+ options[:category_type] = 'funfamID'
91
+ opts.on("-t", "--category_type STRING", "Input category of domains. Options: funfamID, superfamilyID") do |data|
92
+ options[:category_type] = data
93
+ end
94
+
95
+ options[:unnanotated_proteins] = 'unnanotated_proteins_list.txt'
96
+ opts.on("-u", "--unnanotated_proteins PATH", "Output file with unnanotated proteins list") do |data|
97
+ options[:unnanotated_proteins] = data
98
+ end
99
+
100
+ opts.on_tail("-h", "--help", "Show this message") do
101
+ puts opts
102
+ exit
103
+ end
104
+
105
+ end.parse!
106
+
107
+ ##########################
108
+ #MAIN
109
+ ##########################
110
+
111
+ puts "Loading data..."
112
+ cath_data, protein2gene, gene2proteins, cath_proteins_number = load_cath_data(options[:input_domains], options[:category_type])
113
+ nomenclature_annotations, number_of_proteins, proteins_without_annotations = load_proteins_file(options[:input_annotations], options[:annotation_types])
114
+ networks_path = nil
115
+ if options[:category_type] == 'funfamID'
116
+ networks_path = 'networks/funfam_networks'
117
+ else
118
+ networks_path = 'networks/superfamily_networks'
119
+ end
120
+ puts "Generating tripartite networks. This can take a while, please wait."
121
+ protein_stats = build_tripartite_networks(nomenclature_annotations, cath_data, networks_path, protein2gene)
122
+ handler = File.open(options[:output_stats], 'w')
123
+ protein_stats.each do |annotation_type, number_of_proteins|
124
+ handler.puts "#{annotation_type}\t#{number_of_proteins}"
125
+ end
126
+ handler.puts "Total of Uniprot proteins\t#{number_of_proteins}"
127
+ handler.puts "Total of Uniprot proteins without annotations\t#{proteins_without_annotations.length}"
128
+ handler.puts "Total of CATH proteins\t#{cath_proteins_number}"
129
+ handler = File.open(options[:unnanotated_proteins], 'w')
130
+ proteins_without_annotations.each do |unnanotated_prot|
131
+ handler.puts unnanotated_prot
132
+ end
133
+ handler.close
data/bin/console ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "DomFun"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start(__FILE__)
@@ -0,0 +1,287 @@
1
+ #! /usr/bin/env ruby
2
+ ##########################
3
+ # Rojano E. & Seoane P., June 2019
4
+ # Domain to functional annotation predictor
5
+ # Based on domain-annotation association, this predictor can add functions to a group of domains of a protein
6
+ # It predict the most putative functions associated to a protein based on their domains.
7
+ # Protein IDs and FunSys (GO-MF, KEGG and Reactome) from UniProtKB.
8
+ # Protein domains (Superfamilies and FunFams) from CATH.
9
+ ##########################
10
+
11
+ REPORT_FOLDER=File.expand_path(File.join(File.dirname(__FILE__), '..', 'templates'))
12
+ ROOT_PATH = File.dirname(__FILE__)
13
+ $: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'DomFun'))
14
+ require 'generalMethods.rb'
15
+ require 'csv'
16
+ require 'optparse'
17
+ require "statistics2"
18
+ require "terminal-table"
19
+ require 'report_html'
20
+
21
+
22
+ ##########################
23
+ #METHODS
24
+ ##########################
25
+
26
+ def get_protein_domains(cath_data, protein)
27
+ domains_to_predict = nil
28
+ unless cath_data[protein].nil?
29
+ domains_to_predict = cath_data[protein].uniq
30
+ end
31
+ return domains_to_predict
32
+ end
33
+
34
+
35
+ def load_domain_to_pathway_association(associations_file, threshold)
36
+ domain_to_pathway_associations = {}
37
+ File.open(associations_file).each do |line|
38
+ line.chomp!
39
+ annotation, domain, association_value = line.split("\t")
40
+ association_value = association_value.to_f
41
+ next if association_value < threshold
42
+ query = domain_to_pathway_associations[domain]
43
+ if query.nil?
44
+ domain_to_pathway_associations[domain] = [[annotation, association_value]]
45
+ else
46
+ query << [annotation, association_value]
47
+ end
48
+ end
49
+ return domain_to_pathway_associations
50
+ end
51
+
52
+ def load_domains_to_predict(domains_file)
53
+ domains_to_predict = {}
54
+ File.open(domains_file).each do |line|
55
+ line.chomp!
56
+ protein_id, domains = line.split("\t")
57
+ domains_to_predict[protein_id] = domains.split(',')
58
+ end
59
+ return domains_to_predict
60
+ end
61
+
62
+ def search4function(domains_to_predict, domain_to_pathway_associations)
63
+ domain_to_function_and_association_value = {}
64
+ domains_to_predict.each do |domain|
65
+ #puts domain
66
+ associations = domain_to_pathway_associations[domain]
67
+ if !associations.nil?
68
+ domain_to_function_and_association_value[domain] = associations
69
+ end
70
+ end
71
+ return domain_to_function_and_association_value
72
+ end
73
+
74
+
75
+ def group_by_function(domain_to_function_and_association_value)
76
+ function_to_domains = {}
77
+ association_scores = {}
78
+ domain_to_function_and_association_value.each do |domain, annotations|
79
+ annotations.each do |annotation_id, association_score|
80
+ query = function_to_domains[annotation_id]
81
+ if query.nil?
82
+ function_to_domains[annotation_id] = [domain]
83
+ else
84
+ query << domain
85
+ end
86
+ query = association_scores[annotation_id]
87
+ if query.nil?
88
+ association_scores[annotation_id] = {domain => association_score}
89
+ else
90
+ query[domain] = association_score
91
+ end
92
+ end
93
+ end
94
+ return function_to_domains, association_scores
95
+ end
96
+
97
+ def generate_domain_annotation_matrix(function_to_domains, association_scores, domains_to_predict, null_value=0)
98
+ # #method for creating the hpo to region matrix for plotting
99
+ # #info2predict = hpo list from user
100
+ # #hpo_associated_regions = [[chr, start, stop, [hpos_list], [weighted_association_scores]]]
101
+ domain_annotation_matrix = []
102
+ function_to_domains.each do |function_ID, domains_list|
103
+ row = []
104
+ domains_to_predict.each do |user_domain|
105
+ value = association_scores[function_ID][user_domain]
106
+ if value.nil?
107
+ row << null_value
108
+ else
109
+ row << value
110
+ end
111
+ end
112
+ domain_annotation_matrix << row
113
+ end
114
+ return domain_annotation_matrix
115
+ end
116
+
117
+ def scoring_funsys(function_to_domains, domain_annotation_matrix, scoring_system, freedom_degree='maxnum', null_value=0, pvalue_threshold)
118
+ domains_array = function_to_domains.values
119
+ max_cluster_length = domain_annotation_matrix.map{|x| x.count {|i| i != 0}}.max if freedom_degree == 'maxnum'
120
+ domain_annotation_matrix.each_with_index do |associations, i|
121
+ sample_length = nil
122
+ if freedom_degree == 'maxnum'
123
+ sample_length = max_cluster_length
124
+ else
125
+ abort("Invalid freedom degree calculation method: #{freedom_degree}")
126
+ end
127
+ if scoring_system == 'fisher'
128
+ #hyper must be ln not log10 from net analyzer
129
+ #https://en.wikipedia.org/wiki/Fisher%27s_method
130
+ lns = associations.map{|a| Math.log(10 ** -a)} #hyper values come as log10 values
131
+ sum = lns.inject(0){|s, a| s + a}
132
+ combined_pvalue = Statistics2.chi2_x(sample_length *2, -2*sum)
133
+ domains_array[i] << combined_pvalue
134
+ elsif scoring_system == 'harmonic'
135
+ #STDERR.puts associations.inspect
136
+ lns = associations.map{|a| 10 ** -a}
137
+ inv = lns.map{|n| 1.fdiv(n)}
138
+ sum = inv.inject(0){|s,x| s + x}
139
+ combined_pvalue = associations.length.fdiv(sum)
140
+ domains_array[i] << combined_pvalue
141
+ elsif scoring_system == 'stouffer'
142
+ sum = associations.inject(0){|s,x| s + x}
143
+ combined_z_score = sum/Math.sqrt(sample_length)
144
+ domains_array[i] << combined_z_score
145
+ elsif scoring_system == 'average'
146
+ sum = associations.inject(0){|s,x| s + x.abs}.fdiv(associations.length)
147
+ #STDERR.puts sum.inspect
148
+ domains_array[i] << sum
149
+ elsif scoring_system == 'sum'
150
+ sum = associations.inject(0){|s,x| s + x.abs}
151
+ domains_array[i] << sum
152
+ else
153
+ abort("Invalid integration method: #{scoring_system}")
154
+ end
155
+ end
156
+ if scoring_system == 'fisher' || scoring_system == 'harmonic'
157
+ function_to_domains.select!{|function, attributes| attributes.last <= pvalue_threshold}
158
+ else
159
+ function_to_domains.select!{|function, attributes| attributes.last >= pvalue_threshold}
160
+ end
161
+ #STDERR.puts function_to_domains.inspect
162
+ end
163
+
164
+
165
+ def report_data(predictions, html_file)
166
+ container = {:predictions => predictions }
167
+ template = File.open(File.join(REPORT_FOLDER, 'report_data.erb')).read
168
+ report = Report_html.new(container, 'Protein domains and FunSys predictions summary')
169
+ report.build(template)
170
+ report.write(html_file)
171
+ end
172
+
173
+ ##########################
174
+ #OPT-PARSER
175
+ ##########################
176
+
177
+ options = {}
178
+ OptionParser.new do |opts|
179
+ opts.banner = "Usage: #{__FILE__} [options]"
180
+
181
+ options[:input_associations] = nil
182
+ opts.on("-a", "--input_associations PATH", "Domain-function associations") do |data|
183
+ options[:input_associations] = data
184
+ end
185
+
186
+ options[:domain_category] = "superfamilyID"
187
+ opts.on("-c", "--domain_category PATH", "Domain category. Please choose one: superfamilyID or funfamID" ) do |data|
188
+ options[:domain_category] = data
189
+ end
190
+
191
+ options[:protein_domains_file] = nil
192
+ opts.on("-f", "--protein_domains_file PATH", "Input protein-domains file from CATH") do |data|
193
+ options[:protein_domains_file] = data
194
+ end
195
+
196
+ options[:integration_method] = 'fisher'
197
+ opts.on("-i", "--integration_method STRING", "Integration method") do |data|
198
+ options[:integration_method] = data
199
+ end
200
+
201
+ options[:output_file] = 'predictions_file.txt'
202
+ opts.on("-o", "--output_file PATH", "Predictions file") do |data|
203
+ options[:output_file] = data
204
+ end
205
+
206
+ options[:proteins_2predict] = nil
207
+ opts.on("-p", "--proteins_2predict PATH", "Protein to predict. Please use UniProt IDs" ) do |data|
208
+ options[:proteins_2predict] = data
209
+ end
210
+
211
+ options[:pvalue_threshold] = 0.05
212
+ opts.on("-t", "--pvalue_threshold FLOAT", "P-value threshold") do |pvalue_threshold|
213
+ options[:pvalue_threshold] = pvalue_threshold.to_f
214
+ end
215
+
216
+ options[:association_threshold] = 2
217
+ opts.on("-T", "--association_threshold FLOAT", "Association value threshold") do |association_threshold|
218
+ options[:association_threshold] = association_threshold.to_f
219
+ end
220
+
221
+ options[:multiple_proteins] = false
222
+ opts.on("-u", "--multiple_proteins", "Set if multiple profiles") do
223
+ options[:multiple_proteins] = true
224
+ end
225
+
226
+ opts.on_tail("-h", "--help", "Show this message") do
227
+ puts opts
228
+ exit
229
+ end
230
+
231
+ end.parse!
232
+
233
+ ##########################
234
+ #MAIN
235
+ ##########################
236
+
237
+ # 1. Load protein domains classification to get domains from proteins to predict
238
+ cath_data, protein2gene, gene2proteins, cath_proteins_number = load_cath_data(options[:protein_domains_file], options[:domain_category])
239
+ # 2. Load protein(s) to predict
240
+ if File.exist?(options[:proteins_2predict])
241
+ if !options[:multiple_proteins]
242
+ options[:proteins_2predict] = [File.open(options[:proteins_2predict]).readlines.map!{|line| line.chomp}]
243
+ else
244
+ multiple_proteins = []
245
+ File.open(options[:proteins_2predict]).each do |line|
246
+ line.chomp!
247
+ multiple_proteins << line
248
+ end
249
+ options[:proteins_2predict] = multiple_proteins
250
+ end
251
+ else
252
+ if !options[:multiple_proteins]
253
+ options[:proteins_2predict] = [options[:proteins_2predict].split('|')]
254
+ else
255
+ options[:proteins_2predict] = options[:proteins_2predict].split('!').map{|profile| profile.split('|')}
256
+ end
257
+ end
258
+
259
+
260
+ # 3. Load domain-FunSys associations
261
+ domain_to_pathways_associations = load_domain_to_pathway_association(options[:input_associations], options[:association_threshold])
262
+ # 4. Prediction
263
+ handler = File.open(options[:output_file], 'w')
264
+ options[:proteins_2predict].each do |protein|
265
+ domains = get_protein_domains(cath_data, protein)
266
+ next if domains.nil?
267
+ null_value = 0
268
+ domain_function_assocValue = search4function(domains, domain_to_pathways_associations)
269
+
270
+ function_to_domains, association_scores = group_by_function(domain_function_assocValue)
271
+ annotation_matrix = generate_domain_annotation_matrix(function_to_domains, association_scores, domains, 0)
272
+
273
+ scoring_funsys(
274
+ function_to_domains,
275
+ annotation_matrix,
276
+ options[:integration_method],
277
+ 'maxnum',
278
+ null_value,
279
+ options[:pvalue_threshold]
280
+ )
281
+
282
+ function_to_domains.each do |funsys, domains_data|
283
+ score = domains_data.pop
284
+ handler.puts "#{protein}\t#{domains_data.join(',')}\t#{funsys}\t#{score}"
285
+ end
286
+ end
287
+ handler.close