DomFun 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +11 -0
- data/.rspec +3 -0
- data/.travis.yml +7 -0
- data/DomFun.gemspec +44 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +39 -0
- data/Rakefile +6 -0
- data/bin/add_protein_functional_families.rb +133 -0
- data/bin/console +14 -0
- data/bin/domains_to_function_predictor.rb +287 -0
- data/bin/generate_CAFA2_dataset.rb +135 -0
- data/bin/generate_CAFA2_tripartite_network.rb +139 -0
- data/bin/generate_cafa_control.rb +45 -0
- data/bin/get_kegg_pathways.R +12 -0
- data/bin/lines.R +74 -0
- data/bin/merge_pairs.rb +139 -0
- data/bin/normalize_combined_scores.rb +118 -0
- data/bin/prepare_cafa_network.rb +96 -0
- data/bin/setup +8 -0
- data/bin/standardize_scores.R +53 -0
- data/bin/translate_kegg_genes2pathways.rb +98 -0
- data/bin/validate_ProtFunSys_predictions.rb +174 -0
- data/lib/DomFun.rb +6 -0
- data/lib/DomFun/generalMethods.rb +105 -0
- data/lib/DomFun/version.rb +3 -0
- metadata +128 -0
data/lib/DomFun.rb
ADDED
@@ -0,0 +1,105 @@
|
|
1
|
+
def load_proteins_file(file, annotation_types)
|
2
|
+
protein_annotations = {}
|
3
|
+
proteins_without_annotations = []
|
4
|
+
annotation_types.each do |type| # initialize annotation hashes
|
5
|
+
protein_annotations[type] = {}
|
6
|
+
end
|
7
|
+
counter = 0
|
8
|
+
File.open(file).each do |line|
|
9
|
+
line.chomp!
|
10
|
+
if counter == 0
|
11
|
+
counter += 1
|
12
|
+
next
|
13
|
+
end
|
14
|
+
line.gsub!(' ', '')
|
15
|
+
fields = line.split("\t", 4)
|
16
|
+
protID = fields.shift
|
17
|
+
annotation_types.each_with_index do |type, i|
|
18
|
+
annotations = fields[i].split(/[;,]/)
|
19
|
+
if !annotations.empty?
|
20
|
+
if type.include?('go')
|
21
|
+
go_annotations = []
|
22
|
+
annotations.each do |go_term|
|
23
|
+
go_name, go_id = go_term.split('GO:')
|
24
|
+
go_annotations << "GO:".concat(go_id.tr(']', '')) unless go_id.nil?
|
25
|
+
end
|
26
|
+
protein_annotations[type][protID] = go_annotations
|
27
|
+
else
|
28
|
+
protein_annotations[type][protID] = annotations
|
29
|
+
end
|
30
|
+
end
|
31
|
+
if fields.count("") == 3
|
32
|
+
proteins_without_annotations << protID
|
33
|
+
end
|
34
|
+
end
|
35
|
+
counter += 1
|
36
|
+
end
|
37
|
+
return protein_annotations, counter, proteins_without_annotations.uniq
|
38
|
+
end
|
39
|
+
|
40
|
+
def load_cath_data(file, category, meth='protACC')
|
41
|
+
cath_data = {}
|
42
|
+
protein2gene = {}
|
43
|
+
gene2proteins = {}
|
44
|
+
csv_file = CSV.read(file, { :col_sep => "\t" })
|
45
|
+
csv_file.delete_at(0)
|
46
|
+
csv_file.each do |protein_domains_data|
|
47
|
+
next if protein_domains_data.empty?
|
48
|
+
protein_id = protein_domains_data[0]
|
49
|
+
if meth == 'protACC'
|
50
|
+
field = 3
|
51
|
+
elsif meth == 'geneID'
|
52
|
+
field = 4
|
53
|
+
end
|
54
|
+
gene_name = protein_domains_data[field]
|
55
|
+
next if gene_name.include?('fusion')
|
56
|
+
gene_name = gene_name.gsub(' ', '_') if gene_name.include?(' ')
|
57
|
+
superfamilyID = protein_domains_data[5]
|
58
|
+
funfamID = protein_domains_data[6]
|
59
|
+
term2save = nil
|
60
|
+
if category == 'superfamilyID'
|
61
|
+
term2save = superfamilyID
|
62
|
+
elsif category == 'funfamID'
|
63
|
+
term2save = funfamID
|
64
|
+
end
|
65
|
+
add_term2dictionary(cath_data, protein_id, term2save)
|
66
|
+
protein2gene[protein_id] = gene_name if gene_name != 'NULL'
|
67
|
+
query = gene2proteins[gene_name]
|
68
|
+
if query.nil?
|
69
|
+
gene2proteins[gene_name] = [protein_id] if protein_id != 'NULL'
|
70
|
+
else
|
71
|
+
query << protein_id if protein_id != 'NULL'
|
72
|
+
end
|
73
|
+
end
|
74
|
+
cath_proteins_number = cath_data.keys.length
|
75
|
+
return cath_data, protein2gene, gene2proteins, cath_proteins_number
|
76
|
+
end
|
77
|
+
|
78
|
+
def add_term2dictionary(dict, key, term)
|
79
|
+
query = dict[key]
|
80
|
+
if query.nil?
|
81
|
+
dict[key] = [term]
|
82
|
+
else
|
83
|
+
query << term
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
def load_cafa_data(cafa_file)
|
88
|
+
cafa_data = {}
|
89
|
+
File.open(cafa_file).each do |line|
|
90
|
+
line.chomp!
|
91
|
+
next if line.include?('GO_Ont')
|
92
|
+
cafa_info = line.split("\t")
|
93
|
+
next unless cafa_info[1] == 'MF'
|
94
|
+
go_term = cafa_info[4]
|
95
|
+
gene_name = cafa_info[6]
|
96
|
+
next if gene_name == 'NA'
|
97
|
+
query = cafa_data[gene_name]
|
98
|
+
if query.nil?
|
99
|
+
cafa_data[gene_name] = [go_term]
|
100
|
+
else
|
101
|
+
query << go_term
|
102
|
+
end
|
103
|
+
end
|
104
|
+
return cafa_data
|
105
|
+
end
|
metadata
ADDED
@@ -0,0 +1,128 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: DomFun
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Elena Rojano, Pedro Seoane
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2019-11-21 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '2.0'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '2.0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '3.0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '3.0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: NetAnalyzer
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 0.1.5
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: 0.1.5
|
69
|
+
description: From associations calculated between protein domains and functional systems
|
70
|
+
(FunSys), DomFun can predict the functions of proteins looking up domains and the
|
71
|
+
FunSys that have been associated with. The system is validated using data from CAFA.
|
72
|
+
email:
|
73
|
+
- elenarojano@uma.es, seoanezonjic@hotmail.com
|
74
|
+
executables: []
|
75
|
+
extensions: []
|
76
|
+
extra_rdoc_files: []
|
77
|
+
files:
|
78
|
+
- ".gitignore"
|
79
|
+
- ".rspec"
|
80
|
+
- ".travis.yml"
|
81
|
+
- DomFun.gemspec
|
82
|
+
- Gemfile
|
83
|
+
- LICENSE.txt
|
84
|
+
- README.md
|
85
|
+
- Rakefile
|
86
|
+
- bin/add_protein_functional_families.rb
|
87
|
+
- bin/console
|
88
|
+
- bin/domains_to_function_predictor.rb
|
89
|
+
- bin/generate_CAFA2_dataset.rb
|
90
|
+
- bin/generate_CAFA2_tripartite_network.rb
|
91
|
+
- bin/generate_cafa_control.rb
|
92
|
+
- bin/get_kegg_pathways.R
|
93
|
+
- bin/lines.R
|
94
|
+
- bin/merge_pairs.rb
|
95
|
+
- bin/normalize_combined_scores.rb
|
96
|
+
- bin/prepare_cafa_network.rb
|
97
|
+
- bin/setup
|
98
|
+
- bin/standardize_scores.R
|
99
|
+
- bin/translate_kegg_genes2pathways.rb
|
100
|
+
- bin/validate_ProtFunSys_predictions.rb
|
101
|
+
- lib/DomFun.rb
|
102
|
+
- lib/DomFun/generalMethods.rb
|
103
|
+
- lib/DomFun/version.rb
|
104
|
+
homepage: https://github.com/ElenaRojano/DomFun
|
105
|
+
licenses:
|
106
|
+
- MIT
|
107
|
+
metadata: {}
|
108
|
+
post_install_message:
|
109
|
+
rdoc_options: []
|
110
|
+
require_paths:
|
111
|
+
- lib
|
112
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
113
|
+
requirements:
|
114
|
+
- - ">="
|
115
|
+
- !ruby/object:Gem::Version
|
116
|
+
version: '0'
|
117
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
118
|
+
requirements:
|
119
|
+
- - ">="
|
120
|
+
- !ruby/object:Gem::Version
|
121
|
+
version: '0'
|
122
|
+
requirements: []
|
123
|
+
rubyforge_project:
|
124
|
+
rubygems_version: 2.6.14
|
125
|
+
signing_key:
|
126
|
+
specification_version: 4
|
127
|
+
summary: Tool to predict protein functions based on domains-FunSys associations.
|
128
|
+
test_files: []
|