WingenderTFClass 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: a5b5fa62fdeb9e8e624a9e63964f0886d135ddcb
4
+ data.tar.gz: 11372a9c59c0c627664e01faaab77fc3a212ca3e
5
+ SHA512:
6
+ metadata.gz: e6c05b7b6670ec5b00a78524c00ebd4c0780552ae8c5539ad0f33925bd696e5987a5a4f84ceba4e09ea63f0de2ea907aa7d1f7fcdbde19310c8f3a70e7556694
7
+ data.tar.gz: 0c2168abdd84f9cc32818c60f1dd8dcc03ba062b10921ed018c142bc585492c670ead58b547ebcdc88f77c3a76348e8d2b8101d45538e6176cea012d48060911
data/.gitignore ADDED
@@ -0,0 +1,9 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
data/.travis.yml ADDED
@@ -0,0 +1,4 @@
1
+ language: ruby
2
+ rvm:
3
+ - 2.2.2
4
+ before_install: gem install bundler -v 1.10.6
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in WingenderTFClass.gemspec
4
+ gemspec
data/README.md ADDED
@@ -0,0 +1,36 @@
1
+ # WingenderTFClass
2
+
3
+ Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/WingenderTFClass`. To experiment with that code, run `bin/console` for an interactive prompt.
4
+
5
+ TODO: Delete this and the text above, and describe your gem
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application's Gemfile:
10
+
11
+ ```ruby
12
+ gem 'WingenderTFClass'
13
+ ```
14
+
15
+ And then execute:
16
+
17
+ $ bundle
18
+
19
+ Or install it yourself as:
20
+
21
+ $ gem install WingenderTFClass
22
+
23
+ ## Usage
24
+
25
+ TODO: Write usage instructions here
26
+
27
+ ## Development
28
+
29
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
30
+
31
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
32
+
33
+ ## Contributing
34
+
35
+ Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/WingenderTFClass.
36
+
data/Rakefile ADDED
@@ -0,0 +1,10 @@
1
+ require "bundler/gem_tasks"
2
+ require "rake/testtask"
3
+
4
+ Rake::TestTask.new(:test) do |t|
5
+ t.libs << "test"
6
+ t.libs << "lib"
7
+ t.test_files = FileList['test/**/*_test.rb']
8
+ end
9
+
10
+ task :default => :test
@@ -0,0 +1,32 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'WingenderTFClass/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "WingenderTFClass"
8
+ spec.version = WingenderTFClass::VERSION
9
+ spec.authors = ["prijutme4ty"]
10
+ spec.email = ["prijutme4ty@gmail.com"]
11
+
12
+ spec.summary = %q{Gem to acquire TFClass protein class/family/subfamily/etc for a transcription factor by its UniprotID.}
13
+ spec.description = %q{This gem allows to retrieve information for a transcription factor from Wingender's TFClass ontology.}
14
+ spec.homepage = "https://github.com/VorontsovIE/WingenderTFClass"
15
+
16
+ # # Prevent pushing this gem to RubyGems.org by setting 'allowed_push_host', or
17
+ # # delete this section to allow pushing this gem to any host.
18
+ # if spec.respond_to?(:metadata)
19
+ # spec.metadata['allowed_push_host'] = "TODO: Set to 'http://mygemserver.com'"
20
+ # else
21
+ # raise "RubyGems 2.0 or newer is required to protect against public gem pushes."
22
+ # end
23
+
24
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
25
+ spec.bindir = "exe"
26
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
27
+ spec.require_paths = ["lib"]
28
+
29
+ spec.add_development_dependency "bundler", "~> 1.10"
30
+ spec.add_development_dependency "rake", "~> 10.0"
31
+ spec.add_development_dependency "minitest"
32
+ end
data/bin/console ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "WingenderTFClass"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start
data/bin/setup ADDED
@@ -0,0 +1,7 @@
1
+ #!/bin/bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+
5
+ bundle install
6
+
7
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,6 @@
1
+ require_relative "WingenderTFClass/version"
2
+ require_relative "WingenderTFClass/motif_family_recognizer"
3
+
4
+ module WingenderTFClass
5
+ # Your code goes here...
6
+ end
@@ -0,0 +1,96 @@
1
+ require_relative 'obo/tf_classification'
2
+ require_relative 'uniprot_info'
3
+
4
+ module WingenderTFClass
5
+ module FilePaths
6
+ TFOntologyHuman = File.absolute_path('source_data/TFOntologies/TFClass_human.obo', __dir__)
7
+ TFOntologyMouse = File.absolute_path('source_data/TFOntologies/TFClass_mouse.obo', __dir__)
8
+
9
+ UniprotHuman = File.absolute_path('source_data/uniprot_infos/human.tsv', __dir__)
10
+ UniprotMouse = File.absolute_path('source_data/uniprot_infos/mouse.tsv', __dir__)
11
+ end
12
+
13
+
14
+ module ProteinFamilyRecognizers
15
+ def self.by_uniprot_id(deepness:, tf_classification_filename:, uniprot_infos_filename:)
16
+ tf_classification = OBO::TFClassification.from_file(tf_classification_filename)
17
+ ByUniprotID.new(
18
+ ByUniprotAC.new(tf_classification, deepness),
19
+ UniprotInfo.uniprot_ac_list_by_id_from_file(uniprot_infos_filename)
20
+ )
21
+ end
22
+ HumanAtLevel = Hash.new{|h, deepness|
23
+ h[deepness] = self.by_uniprot_id(
24
+ deepness: deepness,
25
+ tf_classification_filename: FilePaths::TFOntologyHuman,
26
+ uniprot_infos_filename: FilePaths::UniprotHuman,
27
+ )
28
+ }
29
+
30
+ MouseAtLevel = Hash.new{|h, deepness|
31
+ h[deepness] = self.by_uniprot_id(
32
+ deepness: deepness,
33
+ tf_classification_filename: FilePaths::TFOntologyMouse,
34
+ uniprot_infos_filename: FilePaths::UniprotMouse,
35
+ )
36
+ }
37
+
38
+ class ByUniprotAC
39
+ def initialize(tf_classification, deepness)
40
+ @deepness = deepness
41
+ @tf_classification = tf_classification
42
+ end
43
+
44
+ def subtree_groups
45
+ @subtree_groups ||= @tf_classification.tf_groups(@deepness)
46
+ end
47
+
48
+ private def subtree_root_by_uniprot_ac
49
+ @subtree_root_by_uniprot_id ||= begin
50
+ result = Hash.new{|h,k| h[k] = [] }
51
+
52
+ subtree_groups.each{|group_root, group_leafs|
53
+ group_leafs.flat_map(&:uniprot_ACs).uniq.each{|uniprot_ac|
54
+ result[uniprot_ac] << group_root
55
+ }
56
+ }
57
+ result
58
+ end
59
+ end
60
+
61
+ # In most cases Uniprot refers the only leaf, but in some cases it refers several leafs in different subtrees.
62
+ # So we return an array of subfamilies
63
+ def subfamilies_by_uniprot_ac(uniprot_ac)
64
+ subtree_root_by_uniprot_ac[uniprot_ac]
65
+ end
66
+
67
+ def subfamilies_by_multiple_uniprot_acs(uniprot_acs)
68
+ uniprot_acs.flat_map{|uniprot_ac|
69
+ subfamilies_by_uniprot_ac(uniprot_ac)
70
+ }.uniq
71
+ end
72
+ end
73
+
74
+ #########################
75
+
76
+ class ByUniprotID
77
+ def initialize(motif_family_recognizer_by_uniprot_ac, uniprot_acs_by_id)
78
+ @motif_family_recognizer_by_uniprot_ac = motif_family_recognizer_by_uniprot_ac
79
+ @uniprot_acs_by_id = uniprot_acs_by_id
80
+ end
81
+
82
+ # In most cases Uniprot refers the only leaf, but in some cases it refers several leafs in different subtrees.
83
+ # So we return an array of subfamilies
84
+ def subfamilies_by_uniprot_id(uniprot_id)
85
+ uniprot_acs = @uniprot_acs_by_id[uniprot_id]
86
+ @motif_family_recognizer_by_uniprot_ac.subfamilies_by_multiple_uniprot_acs( uniprot_acs )
87
+ end
88
+
89
+ def subfamilies_by_multiple_uniprot_ids(uniprot_ids)
90
+ uniprot_ids.flat_map{|uniprot_id|
91
+ subfamilies_by_uniprot_id(uniprot_id)
92
+ }.uniq
93
+ end
94
+ end
95
+ end
96
+ end
@@ -0,0 +1,110 @@
1
+ module WingenderTFClass
2
+ module OBO
3
+ Term = Struct.new(:ontology_tree, :id, :name, :subset, :definition, :parent_id, :uniprot_ACs, :other) do
4
+ def self.from_line_array(ontology_tree, arr)
5
+ id, name, subset, definition, parent_id = nil, nil, nil, nil, nil
6
+ other = []
7
+ uniprot_ACs = []
8
+
9
+ arr.select{|line|
10
+ line.match(/^\w+:/)
11
+ }.each{|line|
12
+ case line
13
+ when /^id:/
14
+ id = line[/^id: (?<data>.+)$/, :data]
15
+ when /^name:/
16
+ name = line[/^name: (?<data>.+)$/, :data]
17
+ when /^subset:/
18
+ subset = line[/^subset: (?<data>.+)$/, :data]
19
+ when /^def:/
20
+ definition = line[/^def: (?<data>.+)$/, :data]
21
+ when /^is_a:/
22
+ parent_id = line[/^is_a: (?<data>.+?) ! .+$/, :data]
23
+ when /^xref: UNIPROT:/
24
+ uniprot_ACs << line[/^xref: UNIPROT:(?<data>\w+)\b/, :data]
25
+ else
26
+ other << line
27
+ end
28
+ }
29
+
30
+ self.new(ontology_tree, id, name, subset, definition, parent_id || '', uniprot_ACs, other)
31
+ end
32
+
33
+ def parent
34
+ ontology_tree.term(parent_id)
35
+ end
36
+
37
+ def <=>(other)
38
+ if self.id.split('.').first == '0' && other.id.split('.').first == '0' # unclassified vs unclassified
39
+ id <=> other.id
40
+ elsif !self.id.split('.').first == '0' && !other.id.split('.').first == '0' # classified vs classified
41
+ id <=> other.id
42
+ elsif self.id.split('.').first == '0' # classified vs unclassified
43
+ 1
44
+ else
45
+ -1
46
+ end
47
+ end
48
+
49
+ def children
50
+ ontology_tree.children(id)
51
+ end
52
+
53
+ def leaf?
54
+ ontology_tree.leaf?(id)
55
+ end
56
+
57
+ # It can be different from number of ancestors
58
+ def deepness
59
+ id.split('.').size
60
+ end
61
+
62
+ def descendant_leafs
63
+ leaf? ? [self] : children.flat_map(&:descendant_leafs)
64
+ end
65
+
66
+ def descendants
67
+ children + children.flat_map(&:descendants)
68
+ end
69
+
70
+ def subtree_nodes
71
+ [self] + children.flat_map(&:subtree_nodes)
72
+ end
73
+
74
+ def ancestors
75
+ result = []
76
+ term = self
77
+ while term.parent
78
+ term = term.parent
79
+ result.unshift(term)
80
+ end
81
+ result
82
+ end
83
+
84
+ def level_name
85
+ case deepness
86
+ when 0
87
+ 'all TFs'
88
+ when 1
89
+ 'superclass'
90
+ when 2
91
+ 'class'
92
+ when 3
93
+ 'family'
94
+ when 4
95
+ 'subfamily'
96
+ when 5
97
+ 'genus'
98
+ when 6
99
+ 'species'
100
+ end
101
+ end
102
+
103
+ def to_s
104
+ "#{name}{#{id}}"
105
+ end
106
+
107
+ def inspect; to_s; end
108
+ end
109
+ end
110
+ end
@@ -0,0 +1,69 @@
1
+ require_relative 'term'
2
+ module WingenderTFClass
3
+ module OBO
4
+ class TFClassification
5
+ # terms by ids
6
+ def initialize()
7
+ @terms_by_id = {}
8
+ @children_by_id = Hash.new{|h,k| h[k] = [] }
9
+ @terms_by_name = Hash.new{|h,k| h[k] = [] }
10
+ # @terms_by_id.each{|term_id, term|
11
+ # @children_by_id[term.parent_id] << term if term.parent_id
12
+ # }
13
+ self << Term.new(self, '', '', 'Root', '', nil, [], [])
14
+ end
15
+
16
+ def <<(term)
17
+ raise "Duplicate id #{term.id}" if @terms_by_id[term.id]
18
+ @terms_by_id[term.id] = term
19
+ @terms_by_name[term.name] << term
20
+ @children_by_id[term.parent_id] << term if term.parent_id
21
+ end
22
+
23
+ def self.from_file(filename)
24
+ tf_ontology = self.new
25
+ terms = File.readlines(filename)
26
+ .map(&:chomp)
27
+ .slice_before{|line|
28
+ line.start_with?('[Term]')
29
+ }.drop(1)
30
+ .map{|enumerator|
31
+ Term.from_line_array(tf_ontology, enumerator.to_a)
32
+ }
33
+ terms.each{|term|
34
+ tf_ontology << term
35
+ }
36
+ tf_ontology
37
+ end
38
+
39
+ def term_by_name(name)
40
+ @terms_by_name[name]
41
+ end
42
+
43
+ def term(term_id)
44
+ @terms_by_id[term_id]
45
+ end
46
+
47
+ def children(term_id)
48
+ @children_by_id[term_id]
49
+ end
50
+
51
+ def root
52
+ term('')
53
+ end
54
+
55
+ def leaf?(term_id)
56
+ raise "Term #{term_id} does not exist" unless @terms_by_id[term_id]
57
+ @children_by_id[term_id].empty?
58
+ end
59
+
60
+ def tf_groups(slice_deepness)
61
+ @terms_by_id.each_value.select{|term|
62
+ term.deepness >= slice_deepness && (!term.parent || term.parent.deepness < slice_deepness)
63
+ }.map{|term|
64
+ [term, term.subtree_nodes]
65
+ }.to_h
66
+ end
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,64 @@
1
+ require 'rake'
2
+ require 'rake/clean'
3
+
4
+ task default: 'WingenderTFClass'
5
+ desc 'Download Wingender TFClass ontology'
6
+ task 'WingenderTFClass' => ['WingenderTFClass:download_tfclass', 'WingenderTFClass:download_uniprot_id_ac_mapping']
7
+
8
+ desc 'Download Wingender ontology files'
9
+ task 'WingenderTFClass:download_tfclass' => ['WingenderTFClass:download_tfclass:human', 'WingenderTFClass:download_tfclass:mouse']
10
+ task 'WingenderTFClass:download_tfclass:human' => 'source_data/TFOntologies/TFClass_human.obo'
11
+ task 'WingenderTFClass:download_tfclass:mouse' => 'source_data/TFOntologies/TFClass_mouse.obo'
12
+
13
+ directory 'source_data'
14
+ directory 'source_data/uniprot_infos/'
15
+ directory 'source_data/TFOntologies/'
16
+
17
+ file 'source_data/TFClass_ontologies_temp.zip' => 'source_data' do
18
+ sh 'wget', 'http://tfclass.bioinf.med.uni-goettingen.de/suplementary/TFClass_ontologies.zip', '-O', 'source_data/TFClass_ontologies_temp.zip'
19
+ end
20
+
21
+ file 'source_data/TFOntologies/TFClass_human.obo' => ['source_data/TFOntologies/', 'source_data/TFClass_ontologies_temp.zip'] do
22
+ sh 'unzip', 'source_data/TFClass_ontologies_temp.zip', 'TFClass_human.obo', '-d', 'source_data/TFOntologies/'
23
+ end
24
+
25
+ file 'source_data/TFOntologies/TFClass_mouse.obo' => ['source_data/TFOntologies/', 'source_data/TFClass_ontologies_temp.zip'] do
26
+ sh 'unzip', 'source_data/TFClass_ontologies_temp.zip', 'TFClass_mouse.obo', '-d', 'source_data/TFOntologies/'
27
+ end
28
+
29
+
30
+
31
+ desc 'Download Uniprot ID-AC mapping'
32
+ task 'WingenderTFClass:download_uniprot_id_ac_mapping'
33
+
34
+ {'human' => 'Homo sapiens', 'mouse' => 'Mus musculus'}.each do |organism, organism_official_name|
35
+ task 'WingenderTFClass:download_uniprot_id_ac_mapping' => "source_data/uniprot_infos/#{organism}.tsv"
36
+ file "source_data/uniprot_infos/#{organism}.tsv" => "source_data/uniprot_infos/#{organism}.tsv.gz" do
37
+ sh 'gzip', '--decompress', "source_data/uniprot_infos/#{organism}.tsv.gz"
38
+ end
39
+
40
+ file "source_data/uniprot_infos/#{organism}.tsv.gz" => 'source_data/uniprot_infos/' do
41
+ query = 'organism:"%{organism}"' % {organism: organism_official_name}
42
+ columns = ['id', 'entry name' ] # id - is uniprot_ac; entry_name is uniprot_id. Orwell DB
43
+
44
+ options = {
45
+ sort: 'score',
46
+ desc: '',
47
+ compress: 'yes',
48
+ query: query,
49
+ fil: '',
50
+ format: 'tab',
51
+ force: 'yes',
52
+ columns: columns.join(','),
53
+ }
54
+ options_str = options.map{|k,v| "#{k}=#{v}" }.join('&')
55
+
56
+ sh 'wget', "http://www.uniprot.org/uniprot/?#{options_str}", '-O', "source_data/uniprot_infos/#{organism}.tsv.gz"
57
+ end
58
+ end
59
+
60
+ CLEAN << 'source_data/uniprot_ID_to_AC.tsv.gz'
61
+ CLEAN << 'source_data/TFClass_ontologies_temp.zip'
62
+
63
+ CLOBBER << 'source_data/uniprot_ID_to_AC.tsv'
64
+ CLOBBER << 'source_data/TFOntologies/*'