DomFun 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +11 -0
- data/.rspec +3 -0
- data/.travis.yml +7 -0
- data/DomFun.gemspec +44 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +39 -0
- data/Rakefile +6 -0
- data/bin/add_protein_functional_families.rb +133 -0
- data/bin/console +14 -0
- data/bin/domains_to_function_predictor.rb +287 -0
- data/bin/generate_CAFA2_dataset.rb +135 -0
- data/bin/generate_CAFA2_tripartite_network.rb +139 -0
- data/bin/generate_cafa_control.rb +45 -0
- data/bin/get_kegg_pathways.R +12 -0
- data/bin/lines.R +74 -0
- data/bin/merge_pairs.rb +139 -0
- data/bin/normalize_combined_scores.rb +118 -0
- data/bin/prepare_cafa_network.rb +96 -0
- data/bin/setup +8 -0
- data/bin/standardize_scores.R +53 -0
- data/bin/translate_kegg_genes2pathways.rb +98 -0
- data/bin/validate_ProtFunSys_predictions.rb +174 -0
- data/lib/DomFun.rb +6 -0
- data/lib/DomFun/generalMethods.rb +105 -0
- data/lib/DomFun/version.rb +3 -0
- metadata +128 -0
data/lib/DomFun.rb
ADDED
@@ -0,0 +1,105 @@
|
|
1
|
+
def load_proteins_file(file, annotation_types)
|
2
|
+
protein_annotations = {}
|
3
|
+
proteins_without_annotations = []
|
4
|
+
annotation_types.each do |type| # initialize annotation hashes
|
5
|
+
protein_annotations[type] = {}
|
6
|
+
end
|
7
|
+
counter = 0
|
8
|
+
File.open(file).each do |line|
|
9
|
+
line.chomp!
|
10
|
+
if counter == 0
|
11
|
+
counter += 1
|
12
|
+
next
|
13
|
+
end
|
14
|
+
line.gsub!(' ', '')
|
15
|
+
fields = line.split("\t", 4)
|
16
|
+
protID = fields.shift
|
17
|
+
annotation_types.each_with_index do |type, i|
|
18
|
+
annotations = fields[i].split(/[;,]/)
|
19
|
+
if !annotations.empty?
|
20
|
+
if type.include?('go')
|
21
|
+
go_annotations = []
|
22
|
+
annotations.each do |go_term|
|
23
|
+
go_name, go_id = go_term.split('GO:')
|
24
|
+
go_annotations << "GO:".concat(go_id.tr(']', '')) unless go_id.nil?
|
25
|
+
end
|
26
|
+
protein_annotations[type][protID] = go_annotations
|
27
|
+
else
|
28
|
+
protein_annotations[type][protID] = annotations
|
29
|
+
end
|
30
|
+
end
|
31
|
+
if fields.count("") == 3
|
32
|
+
proteins_without_annotations << protID
|
33
|
+
end
|
34
|
+
end
|
35
|
+
counter += 1
|
36
|
+
end
|
37
|
+
return protein_annotations, counter, proteins_without_annotations.uniq
|
38
|
+
end
|
39
|
+
|
40
|
+
def load_cath_data(file, category, meth='protACC')
|
41
|
+
cath_data = {}
|
42
|
+
protein2gene = {}
|
43
|
+
gene2proteins = {}
|
44
|
+
csv_file = CSV.read(file, { :col_sep => "\t" })
|
45
|
+
csv_file.delete_at(0)
|
46
|
+
csv_file.each do |protein_domains_data|
|
47
|
+
next if protein_domains_data.empty?
|
48
|
+
protein_id = protein_domains_data[0]
|
49
|
+
if meth == 'protACC'
|
50
|
+
field = 3
|
51
|
+
elsif meth == 'geneID'
|
52
|
+
field = 4
|
53
|
+
end
|
54
|
+
gene_name = protein_domains_data[field]
|
55
|
+
next if gene_name.include?('fusion')
|
56
|
+
gene_name = gene_name.gsub(' ', '_') if gene_name.include?(' ')
|
57
|
+
superfamilyID = protein_domains_data[5]
|
58
|
+
funfamID = protein_domains_data[6]
|
59
|
+
term2save = nil
|
60
|
+
if category == 'superfamilyID'
|
61
|
+
term2save = superfamilyID
|
62
|
+
elsif category == 'funfamID'
|
63
|
+
term2save = funfamID
|
64
|
+
end
|
65
|
+
add_term2dictionary(cath_data, protein_id, term2save)
|
66
|
+
protein2gene[protein_id] = gene_name if gene_name != 'NULL'
|
67
|
+
query = gene2proteins[gene_name]
|
68
|
+
if query.nil?
|
69
|
+
gene2proteins[gene_name] = [protein_id] if protein_id != 'NULL'
|
70
|
+
else
|
71
|
+
query << protein_id if protein_id != 'NULL'
|
72
|
+
end
|
73
|
+
end
|
74
|
+
cath_proteins_number = cath_data.keys.length
|
75
|
+
return cath_data, protein2gene, gene2proteins, cath_proteins_number
|
76
|
+
end
|
77
|
+
|
78
|
+
def add_term2dictionary(dict, key, term)
|
79
|
+
query = dict[key]
|
80
|
+
if query.nil?
|
81
|
+
dict[key] = [term]
|
82
|
+
else
|
83
|
+
query << term
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
def load_cafa_data(cafa_file)
|
88
|
+
cafa_data = {}
|
89
|
+
File.open(cafa_file).each do |line|
|
90
|
+
line.chomp!
|
91
|
+
next if line.include?('GO_Ont')
|
92
|
+
cafa_info = line.split("\t")
|
93
|
+
next unless cafa_info[1] == 'MF'
|
94
|
+
go_term = cafa_info[4]
|
95
|
+
gene_name = cafa_info[6]
|
96
|
+
next if gene_name == 'NA'
|
97
|
+
query = cafa_data[gene_name]
|
98
|
+
if query.nil?
|
99
|
+
cafa_data[gene_name] = [go_term]
|
100
|
+
else
|
101
|
+
query << go_term
|
102
|
+
end
|
103
|
+
end
|
104
|
+
return cafa_data
|
105
|
+
end
|
metadata
ADDED
@@ -0,0 +1,128 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: DomFun
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Elena Rojano, Pedro Seoane
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2019-11-21 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '2.0'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '2.0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '3.0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '3.0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: NetAnalyzer
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 0.1.5
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: 0.1.5
|
69
|
+
description: From associations calculated between protein domains and functional systems
|
70
|
+
(FunSys), DomFun can predict the functions of proteins looking up domains and the
|
71
|
+
FunSys that have been associated with. The system is validated using data from CAFA.
|
72
|
+
email:
|
73
|
+
- elenarojano@uma.es, seoanezonjic@hotmail.com
|
74
|
+
executables: []
|
75
|
+
extensions: []
|
76
|
+
extra_rdoc_files: []
|
77
|
+
files:
|
78
|
+
- ".gitignore"
|
79
|
+
- ".rspec"
|
80
|
+
- ".travis.yml"
|
81
|
+
- DomFun.gemspec
|
82
|
+
- Gemfile
|
83
|
+
- LICENSE.txt
|
84
|
+
- README.md
|
85
|
+
- Rakefile
|
86
|
+
- bin/add_protein_functional_families.rb
|
87
|
+
- bin/console
|
88
|
+
- bin/domains_to_function_predictor.rb
|
89
|
+
- bin/generate_CAFA2_dataset.rb
|
90
|
+
- bin/generate_CAFA2_tripartite_network.rb
|
91
|
+
- bin/generate_cafa_control.rb
|
92
|
+
- bin/get_kegg_pathways.R
|
93
|
+
- bin/lines.R
|
94
|
+
- bin/merge_pairs.rb
|
95
|
+
- bin/normalize_combined_scores.rb
|
96
|
+
- bin/prepare_cafa_network.rb
|
97
|
+
- bin/setup
|
98
|
+
- bin/standardize_scores.R
|
99
|
+
- bin/translate_kegg_genes2pathways.rb
|
100
|
+
- bin/validate_ProtFunSys_predictions.rb
|
101
|
+
- lib/DomFun.rb
|
102
|
+
- lib/DomFun/generalMethods.rb
|
103
|
+
- lib/DomFun/version.rb
|
104
|
+
homepage: https://github.com/ElenaRojano/DomFun
|
105
|
+
licenses:
|
106
|
+
- MIT
|
107
|
+
metadata: {}
|
108
|
+
post_install_message:
|
109
|
+
rdoc_options: []
|
110
|
+
require_paths:
|
111
|
+
- lib
|
112
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
113
|
+
requirements:
|
114
|
+
- - ">="
|
115
|
+
- !ruby/object:Gem::Version
|
116
|
+
version: '0'
|
117
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
118
|
+
requirements:
|
119
|
+
- - ">="
|
120
|
+
- !ruby/object:Gem::Version
|
121
|
+
version: '0'
|
122
|
+
requirements: []
|
123
|
+
rubyforge_project:
|
124
|
+
rubygems_version: 2.6.14
|
125
|
+
signing_key:
|
126
|
+
specification_version: 4
|
127
|
+
summary: Tool to predict protein functions based on domains-FunSys associations.
|
128
|
+
test_files: []
|