DomFun 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/DomFun.rb ADDED
@@ -0,0 +1,6 @@
1
+ require "DomFun/version"
2
+
3
+ module DomFun
4
+ class Error < StandardError; end
5
+ # Your code goes here...
6
+ end
@@ -0,0 +1,105 @@
1
+ def load_proteins_file(file, annotation_types)
2
+ protein_annotations = {}
3
+ proteins_without_annotations = []
4
+ annotation_types.each do |type| # initialize annotation hashes
5
+ protein_annotations[type] = {}
6
+ end
7
+ counter = 0
8
+ File.open(file).each do |line|
9
+ line.chomp!
10
+ if counter == 0
11
+ counter += 1
12
+ next
13
+ end
14
+ line.gsub!(' ', '')
15
+ fields = line.split("\t", 4)
16
+ protID = fields.shift
17
+ annotation_types.each_with_index do |type, i|
18
+ annotations = fields[i].split(/[;,]/)
19
+ if !annotations.empty?
20
+ if type.include?('go')
21
+ go_annotations = []
22
+ annotations.each do |go_term|
23
+ go_name, go_id = go_term.split('GO:')
24
+ go_annotations << "GO:".concat(go_id.tr(']', '')) unless go_id.nil?
25
+ end
26
+ protein_annotations[type][protID] = go_annotations
27
+ else
28
+ protein_annotations[type][protID] = annotations
29
+ end
30
+ end
31
+ if fields.count("") == 3
32
+ proteins_without_annotations << protID
33
+ end
34
+ end
35
+ counter += 1
36
+ end
37
+ return protein_annotations, counter, proteins_without_annotations.uniq
38
+ end
39
+
40
+ def load_cath_data(file, category, meth='protACC')
41
+ cath_data = {}
42
+ protein2gene = {}
43
+ gene2proteins = {}
44
+ csv_file = CSV.read(file, { :col_sep => "\t" })
45
+ csv_file.delete_at(0)
46
+ csv_file.each do |protein_domains_data|
47
+ next if protein_domains_data.empty?
48
+ protein_id = protein_domains_data[0]
49
+ if meth == 'protACC'
50
+ field = 3
51
+ elsif meth == 'geneID'
52
+ field = 4
53
+ end
54
+ gene_name = protein_domains_data[field]
55
+ next if gene_name.include?('fusion')
56
+ gene_name = gene_name.gsub(' ', '_') if gene_name.include?(' ')
57
+ superfamilyID = protein_domains_data[5]
58
+ funfamID = protein_domains_data[6]
59
+ term2save = nil
60
+ if category == 'superfamilyID'
61
+ term2save = superfamilyID
62
+ elsif category == 'funfamID'
63
+ term2save = funfamID
64
+ end
65
+ add_term2dictionary(cath_data, protein_id, term2save)
66
+ protein2gene[protein_id] = gene_name if gene_name != 'NULL'
67
+ query = gene2proteins[gene_name]
68
+ if query.nil?
69
+ gene2proteins[gene_name] = [protein_id] if protein_id != 'NULL'
70
+ else
71
+ query << protein_id if protein_id != 'NULL'
72
+ end
73
+ end
74
+ cath_proteins_number = cath_data.keys.length
75
+ return cath_data, protein2gene, gene2proteins, cath_proteins_number
76
+ end
77
+
78
+ def add_term2dictionary(dict, key, term)
79
+ query = dict[key]
80
+ if query.nil?
81
+ dict[key] = [term]
82
+ else
83
+ query << term
84
+ end
85
+ end
86
+
87
+ def load_cafa_data(cafa_file)
88
+ cafa_data = {}
89
+ File.open(cafa_file).each do |line|
90
+ line.chomp!
91
+ next if line.include?('GO_Ont')
92
+ cafa_info = line.split("\t")
93
+ next unless cafa_info[1] == 'MF'
94
+ go_term = cafa_info[4]
95
+ gene_name = cafa_info[6]
96
+ next if gene_name == 'NA'
97
+ query = cafa_data[gene_name]
98
+ if query.nil?
99
+ cafa_data[gene_name] = [go_term]
100
+ else
101
+ query << go_term
102
+ end
103
+ end
104
+ return cafa_data
105
+ end
@@ -0,0 +1,3 @@
1
+ module DomFun
2
+ VERSION = "0.1.0"
3
+ end
metadata ADDED
@@ -0,0 +1,128 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: DomFun
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Elena Rojano, Pedro Seoane
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2019-11-21 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '2.0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '2.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '3.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '3.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: NetAnalyzer
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: 0.1.5
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: 0.1.5
69
+ description: From associations calculated between protein domains and functional systems
70
+ (FunSys), DomFun can predict the functions of proteins looking up domains and the
71
+ FunSys that have been associated with. The system is validated using data from CAFA.
72
+ email:
73
+ - elenarojano@uma.es, seoanezonjic@hotmail.com
74
+ executables: []
75
+ extensions: []
76
+ extra_rdoc_files: []
77
+ files:
78
+ - ".gitignore"
79
+ - ".rspec"
80
+ - ".travis.yml"
81
+ - DomFun.gemspec
82
+ - Gemfile
83
+ - LICENSE.txt
84
+ - README.md
85
+ - Rakefile
86
+ - bin/add_protein_functional_families.rb
87
+ - bin/console
88
+ - bin/domains_to_function_predictor.rb
89
+ - bin/generate_CAFA2_dataset.rb
90
+ - bin/generate_CAFA2_tripartite_network.rb
91
+ - bin/generate_cafa_control.rb
92
+ - bin/get_kegg_pathways.R
93
+ - bin/lines.R
94
+ - bin/merge_pairs.rb
95
+ - bin/normalize_combined_scores.rb
96
+ - bin/prepare_cafa_network.rb
97
+ - bin/setup
98
+ - bin/standardize_scores.R
99
+ - bin/translate_kegg_genes2pathways.rb
100
+ - bin/validate_ProtFunSys_predictions.rb
101
+ - lib/DomFun.rb
102
+ - lib/DomFun/generalMethods.rb
103
+ - lib/DomFun/version.rb
104
+ homepage: https://github.com/ElenaRojano/DomFun
105
+ licenses:
106
+ - MIT
107
+ metadata: {}
108
+ post_install_message:
109
+ rdoc_options: []
110
+ require_paths:
111
+ - lib
112
+ required_ruby_version: !ruby/object:Gem::Requirement
113
+ requirements:
114
+ - - ">="
115
+ - !ruby/object:Gem::Version
116
+ version: '0'
117
+ required_rubygems_version: !ruby/object:Gem::Requirement
118
+ requirements:
119
+ - - ">="
120
+ - !ruby/object:Gem::Version
121
+ version: '0'
122
+ requirements: []
123
+ rubyforge_project:
124
+ rubygems_version: 2.6.14
125
+ signing_key:
126
+ specification_version: 4
127
+ summary: Tool to predict protein functions based on domains-FunSys associations.
128
+ test_files: []