DomFun 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/lib/DomFun.rb ADDED
@@ -0,0 +1,6 @@
1
+ require "DomFun/version"
2
+
3
+ module DomFun
4
+ class Error < StandardError; end
5
+ # Your code goes here...
6
+ end
@@ -0,0 +1,105 @@
1
+ def load_proteins_file(file, annotation_types)
2
+ protein_annotations = {}
3
+ proteins_without_annotations = []
4
+ annotation_types.each do |type| # initialize annotation hashes
5
+ protein_annotations[type] = {}
6
+ end
7
+ counter = 0
8
+ File.open(file).each do |line|
9
+ line.chomp!
10
+ if counter == 0
11
+ counter += 1
12
+ next
13
+ end
14
+ line.gsub!(' ', '')
15
+ fields = line.split("\t", 4)
16
+ protID = fields.shift
17
+ annotation_types.each_with_index do |type, i|
18
+ annotations = fields[i].split(/[;,]/)
19
+ if !annotations.empty?
20
+ if type.include?('go')
21
+ go_annotations = []
22
+ annotations.each do |go_term|
23
+ go_name, go_id = go_term.split('GO:')
24
+ go_annotations << "GO:".concat(go_id.tr(']', '')) unless go_id.nil?
25
+ end
26
+ protein_annotations[type][protID] = go_annotations
27
+ else
28
+ protein_annotations[type][protID] = annotations
29
+ end
30
+ end
31
+ if fields.count("") == 3
32
+ proteins_without_annotations << protID
33
+ end
34
+ end
35
+ counter += 1
36
+ end
37
+ return protein_annotations, counter, proteins_without_annotations.uniq
38
+ end
39
+
40
+ def load_cath_data(file, category, meth='protACC')
41
+ cath_data = {}
42
+ protein2gene = {}
43
+ gene2proteins = {}
44
+ csv_file = CSV.read(file, { :col_sep => "\t" })
45
+ csv_file.delete_at(0)
46
+ csv_file.each do |protein_domains_data|
47
+ next if protein_domains_data.empty?
48
+ protein_id = protein_domains_data[0]
49
+ if meth == 'protACC'
50
+ field = 3
51
+ elsif meth == 'geneID'
52
+ field = 4
53
+ end
54
+ gene_name = protein_domains_data[field]
55
+ next if gene_name.include?('fusion')
56
+ gene_name = gene_name.gsub(' ', '_') if gene_name.include?(' ')
57
+ superfamilyID = protein_domains_data[5]
58
+ funfamID = protein_domains_data[6]
59
+ term2save = nil
60
+ if category == 'superfamilyID'
61
+ term2save = superfamilyID
62
+ elsif category == 'funfamID'
63
+ term2save = funfamID
64
+ end
65
+ add_term2dictionary(cath_data, protein_id, term2save)
66
+ protein2gene[protein_id] = gene_name if gene_name != 'NULL'
67
+ query = gene2proteins[gene_name]
68
+ if query.nil?
69
+ gene2proteins[gene_name] = [protein_id] if protein_id != 'NULL'
70
+ else
71
+ query << protein_id if protein_id != 'NULL'
72
+ end
73
+ end
74
+ cath_proteins_number = cath_data.keys.length
75
+ return cath_data, protein2gene, gene2proteins, cath_proteins_number
76
+ end
77
+
78
+ def add_term2dictionary(dict, key, term)
79
+ query = dict[key]
80
+ if query.nil?
81
+ dict[key] = [term]
82
+ else
83
+ query << term
84
+ end
85
+ end
86
+
87
+ def load_cafa_data(cafa_file)
88
+ cafa_data = {}
89
+ File.open(cafa_file).each do |line|
90
+ line.chomp!
91
+ next if line.include?('GO_Ont')
92
+ cafa_info = line.split("\t")
93
+ next unless cafa_info[1] == 'MF'
94
+ go_term = cafa_info[4]
95
+ gene_name = cafa_info[6]
96
+ next if gene_name == 'NA'
97
+ query = cafa_data[gene_name]
98
+ if query.nil?
99
+ cafa_data[gene_name] = [go_term]
100
+ else
101
+ query << go_term
102
+ end
103
+ end
104
+ return cafa_data
105
+ end
@@ -0,0 +1,3 @@
1
+ module DomFun
2
+ VERSION = "0.1.0"
3
+ end
metadata ADDED
@@ -0,0 +1,128 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: DomFun
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Elena Rojano, Pedro Seoane
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2019-11-21 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '2.0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '2.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '3.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '3.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: NetAnalyzer
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: 0.1.5
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: 0.1.5
69
+ description: From associations calculated between protein domains and functional systems
70
+ (FunSys), DomFun can predict the functions of proteins looking up domains and the
71
+ FunSys that have been associated with. The system is validated using data from CAFA.
72
+ email:
73
+ - elenarojano@uma.es, seoanezonjic@hotmail.com
74
+ executables: []
75
+ extensions: []
76
+ extra_rdoc_files: []
77
+ files:
78
+ - ".gitignore"
79
+ - ".rspec"
80
+ - ".travis.yml"
81
+ - DomFun.gemspec
82
+ - Gemfile
83
+ - LICENSE.txt
84
+ - README.md
85
+ - Rakefile
86
+ - bin/add_protein_functional_families.rb
87
+ - bin/console
88
+ - bin/domains_to_function_predictor.rb
89
+ - bin/generate_CAFA2_dataset.rb
90
+ - bin/generate_CAFA2_tripartite_network.rb
91
+ - bin/generate_cafa_control.rb
92
+ - bin/get_kegg_pathways.R
93
+ - bin/lines.R
94
+ - bin/merge_pairs.rb
95
+ - bin/normalize_combined_scores.rb
96
+ - bin/prepare_cafa_network.rb
97
+ - bin/setup
98
+ - bin/standardize_scores.R
99
+ - bin/translate_kegg_genes2pathways.rb
100
+ - bin/validate_ProtFunSys_predictions.rb
101
+ - lib/DomFun.rb
102
+ - lib/DomFun/generalMethods.rb
103
+ - lib/DomFun/version.rb
104
+ homepage: https://github.com/ElenaRojano/DomFun
105
+ licenses:
106
+ - MIT
107
+ metadata: {}
108
+ post_install_message:
109
+ rdoc_options: []
110
+ require_paths:
111
+ - lib
112
+ required_ruby_version: !ruby/object:Gem::Requirement
113
+ requirements:
114
+ - - ">="
115
+ - !ruby/object:Gem::Version
116
+ version: '0'
117
+ required_rubygems_version: !ruby/object:Gem::Requirement
118
+ requirements:
119
+ - - ">="
120
+ - !ruby/object:Gem::Version
121
+ version: '0'
122
+ requirements: []
123
+ rubyforge_project:
124
+ rubygems_version: 2.6.14
125
+ signing_key:
126
+ specification_version: 4
127
+ summary: Tool to predict protein functions based on domains-FunSys associations.
128
+ test_files: []