mspire-obo 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 45d44376c536159e5d906e57b3bc726517834f43
4
+ data.tar.gz: 2343cf00ab153e903f14a9ef8ddb804a1fc4e3a9
5
+ SHA512:
6
+ metadata.gz: 1cbfd483cac33cc25ad8fc26aea40c2a097bc0d26605393102432545741cd6e3df38340d91b66e809eb156f30ca5218222c9584c01770cb821183f7d4cc29a03
7
+ data.tar.gz: cf8774fba09f176b66563759b9d62a17fecf20b77431eb3782ea2efa0b75034d667e11838f5317185ab04e18c8ec89b9bafb037df252a58b3b5ab7a34d0653fb
data/.gitignore ADDED
@@ -0,0 +1,22 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ *.bundle
19
+ *.so
20
+ *.o
21
+ *.a
22
+ mkmf.log
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in mspire-obo.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,23 @@
1
+ Copyright (c) 2014 Brigham Young University
2
+ Author: John T. Prince
3
+
4
+ MIT License
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining
7
+ a copy of this software and associated documentation files (the
8
+ "Software"), to deal in the Software without restriction, including
9
+ without limitation the rights to use, copy, modify, merge, publish,
10
+ distribute, sublicense, and/or sell copies of the Software, and to
11
+ permit persons to whom the Software is furnished to do so, subject to
12
+ the following conditions:
13
+
14
+ The above copyright notice and this permission notice shall be
15
+ included in all copies or substantial portions of the Software.
16
+
17
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
21
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,103 @@
1
+ ## Mspire::Obo
2
+
3
+ Tools for working with ontologies (specificially obo files) with built-in
4
+ access to mass spectrometry related ontologies.
5
+
6
+ ### Installation
7
+
8
+ gem install mspire-obo
9
+
10
+ ### Examples
11
+
12
+ ```ruby
13
+ require 'mspire/obo'
14
+ ```
15
+
16
+ #### Discover which ontologies are available
17
+
18
+ ```ruby
19
+ Mspire::Obo.available # =>
20
+
21
+ [{:full_name=>"protein modification ontology",
22
+ :uri=> "http://psidev.cvs.sourceforge.net/viewvc/psidev/psi/mod/data/PSI-MOD.obo",
23
+ :namespace=>"MOD",
24
+ :path=>"/home/jtprince/dev/mspire-obo/obo/PSI-MOD.obo",
25
+ :version=>"1.013.0",
26
+ :key=>:mod},
27
+ {:full_name=>"Imaging MS Ontology",
28
+ ...
29
+ ]
30
+
31
+ ```
32
+
33
+ #### Use a particular ontology
34
+
35
+ Keywords (downcased symbol of the namespace) are used to easily load an ontology.
36
+
37
+ ```ruby
38
+ ontologies_by_key = Mspire::Obo.available(:key) # => index the available obos by their key
39
+
40
+ ms_obo = Mspire::Obo[:ms] # the Proteomics Standards Initiative Mass Spectrometry Ontology
41
+ ```
42
+
43
+ #### Access ontology information
44
+
45
+ Can create hashes on the fly.
46
+
47
+ ```ruby
48
+ id_to_name_hash = ms_obo.make_id_to_name
49
+ id_to_name_hash['MS:1000005'] # => 'sample volume'
50
+
51
+ id_to_stanza_hash = ms_obo.make_id_to_stanza
52
+ ...
53
+ ```
54
+
55
+ Can make and set hashes (bake them into the Obo object)
56
+
57
+ ```ruby
58
+ ms_obo.id_to_name!
59
+ ms_obo.id_to_name['MS:1000005'] # => 'sample volume'
60
+ ```
61
+
62
+ If you want all access hashes baked in:
63
+
64
+ ```ruby
65
+ ms_obo.make_all!
66
+ ```
67
+
68
+ #### Cast values
69
+
70
+ ```ruby
71
+ ms_obo.id_to_cast!
72
+ ms_obo.cast('MS:1000004') # => :to_f
73
+ ms_obo.cast('MS:1000004', '3.3') # => 3.3 (a Float)
74
+ ```
75
+
76
+ #### Access ontology meta-information
77
+
78
+ ```ruby
79
+ ms_obo.version
80
+ ms_obo.full_name
81
+ ms_obo.uri
82
+ ...
83
+ ```
84
+
85
+ #### Multiple ontologies? - create merged lookup hashes
86
+
87
+ ```ruby
88
+ group = Mspire::Obo::Group.new [Mspire::Obo[:ms], Mspire::Obo[:uo]]
89
+ a_hash = group.make_id_to_stanza # if you want the hash itself
90
+ group.id_to_stanza!
91
+ group.id_to_stanza["UO:0000012"] => an Obo::Stanza object
92
+ group.id_to_stanza["IMS:1001207"] => an Obo::Stanza object
93
+ ```
94
+
95
+ #### Use *any* obo file
96
+
97
+ ```ruby
98
+ obo = Mspire::Obo.new("somefile.obo")
99
+ ```
100
+
101
+ ### License
102
+
103
+ MIT. See LICENSE.txt for details.
data/Rakefile ADDED
@@ -0,0 +1,48 @@
1
+ require "bundler/gem_tasks"
2
+ require 'tempfile'
3
+ require 'fileutils'
4
+
5
+ require 'rspec/core'
6
+ require 'rspec/core/rake_task'
7
+ RSpec::Core::RakeTask.new(:spec) do |spec|
8
+ spec.pattern = FileList['spec/**/*_spec.rb']
9
+ end
10
+
11
+ task :default => :spec
12
+
13
+ require 'rdoc/task'
14
+ Rake::RDocTask.new do |rdoc|
15
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
16
+
17
+ rdoc.rdoc_dir = 'rdoc'
18
+ rdoc.title = "rubabel #{version}"
19
+ rdoc.rdoc_files.include('README*')
20
+ rdoc.rdoc_files.include('lib/**/*.rb')
21
+ end
22
+
23
+ desc "downloads the latest obo to appropriate spot"
24
+ task 'update' do
25
+ require 'mspire/obo'
26
+ require 'open-uri'
27
+ puts "Downloading the latest:"
28
+ Mspire::Obo.all(false).each do |obo|
29
+ begin
30
+ print " #{File.basename(obo.uri)} ... "
31
+ tmpfile = Tempfile.new("test_temp")
32
+ tmpfile << open(obo.uri, &:read).gsub(/\r\n?/, "\n")
33
+ tmpfile.close
34
+ new_version = Mspire::Obo.version(tmpfile)
35
+ if obo.version != new_version
36
+ puts "!! ---> updating from #{obo.version} to #{new_version} (check into git) <--- !!"
37
+ FileUtils.mv(tmpfile.path, obo.path)
38
+ else
39
+ puts "already latest."
40
+ end
41
+ ensure
42
+ if File.exist?(tmpfile.path)
43
+ tmpfile.close!
44
+ end
45
+ end
46
+ end
47
+ end
48
+
data/lib/ext/obo.rb ADDED
@@ -0,0 +1,31 @@
1
+ module Obo
2
+ class Stanza
3
+
4
+ # returns :to_f, :to_i, :to_s or false based on the xref value.
5
+ def cast_method
6
+ xref = @tagvalues['xref'].first
7
+ @cast_method =
8
+ if xref.nil? || (@cast_method == false)
9
+ false
10
+ else
11
+ if @cast_method
12
+ @cast_method
13
+ else
14
+ case xref[/value-type:xsd\\:([^\s]+) /, 1]
15
+ when 'float' ; :to_f
16
+ when 'int' ; :to_i
17
+ when 'string' ; :to_s
18
+ else ; false
19
+ end
20
+ end
21
+ end
22
+ end
23
+
24
+ # returns the value cast based on rules in first xref
25
+ # no casting performed if there is no xref
26
+ def cast(val)
27
+ methd = cast_method
28
+ methd ? val.send(methd) : val
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,75 @@
1
+ require 'mspire/obo'
2
+ require 'mspire/obo/hash_provider'
3
+
4
+ module Mspire
5
+ class Obo
6
+ # An Mspire::Obo::Group is a distinct collection of Mspire::Obo objects,
7
+ # but all lookup hashes are merged across the various ontologies. This
8
+ # means that a user can make a group and query across all the ontologies
9
+ # in a single, simple call. The interface mimics that of the hash
10
+ # providing Mspire::Obo object.
11
+ #
12
+ # group = Mspire::Obo::Group.new([Mspire::Obo[:ms], Mspire::Obo[:uo]])
13
+ # hash = group.id_to_name
14
+ # # can access any ids from the various Mspire::Obo objects
15
+ # hash["MS:1000001"] # -> 'sample number'
16
+ # group
17
+ #
18
+ class Group
19
+ include Mspire::Obo::HashProvider
20
+
21
+ # the array of Mspire::Obo objects
22
+ attr_accessor :obos
23
+
24
+ def initialize(obos=[])
25
+ @obos = obos
26
+ end
27
+
28
+ # returns an id to name Hash
29
+ def make_id_to_name
30
+ merge_hashes(__method__)
31
+ end
32
+
33
+ def make_id_to_cast
34
+ merge_hashes(__method__)
35
+ end
36
+
37
+ # returns an id_to_stanza hash
38
+ def make_id_to_stanza
39
+ merge_hashes(__method__)
40
+ end
41
+
42
+ # returns a name_to_id Hash
43
+ def make_name_to_id
44
+ merge_hashes(__method__)
45
+ end
46
+
47
+ # merges the hashes retrieved with that symbol
48
+ def merge_hashes(symbol)
49
+ obos.map(&symbol).reduce({}, :merge)
50
+ end
51
+
52
+ # creates a hash keyed by namespace string that yields the name_to_id
53
+ # hash.
54
+ def name_to_id_by_namespace
55
+ Hash[ obos.map(&:namespace).zip(obos.map(&:make_name_to_id)) ]
56
+ end
57
+
58
+ # with no arguments, merely returns the @name_to_id merged hash (if
59
+ # made). With one argument, looks up the id given the name. With a
60
+ # namespace, the id will be returned without collision.
61
+ def name_to_id(name=nil, namespace=nil)
62
+ if namespace
63
+ @name_to_id_by_namespace ||= name_to_id_by_namespace
64
+ @name_to_id_by_namespace[namespace][name]
65
+ elsif name
66
+ @name_to_id[name]
67
+ else
68
+ @name_to_id
69
+ end
70
+ end
71
+
72
+ undef_method(:build_hash)
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,111 @@
1
+ module Mspire
2
+ class Obo
3
+ # requires classes to provide the :stanzas method
4
+ module HashProvider
5
+
6
+ attr_reader :id_to_stanza
7
+ attr_reader :id_to_name
8
+ attr_reader :id_to_cast
9
+ attr_reader :name_to_id
10
+
11
+ # builds all hashes for fast access
12
+ def make_all!
13
+ id_to_name!.id_to_cast!.id_to_stanza!.name_to_id!
14
+ end
15
+
16
+ ####################
17
+ ## ID TO STANZA
18
+ ####################
19
+
20
+ # returns an id_to_stanza hash
21
+ def make_id_to_stanza
22
+ build_hash('id', nil)
23
+ end
24
+
25
+ # makes and sets the id_to_stanza hash and returns self
26
+ def id_to_stanza!
27
+ @id_to_stanza = make_id_to_stanza
28
+ self
29
+ end
30
+
31
+ # returns an Obo::Stanza object
32
+ def stanza(id)
33
+ @id_to_stanza[id]
34
+ end
35
+
36
+ ####################
37
+ ## ID TO NAME
38
+ ####################
39
+
40
+ # returns an id to name Hash
41
+ def make_id_to_name
42
+ build_hash('id', 'name')
43
+ end
44
+
45
+ # builds the id_to_name hash and returns self for chaining
46
+ def id_to_name!
47
+ @id_to_name = make_id_to_name
48
+ self
49
+ end
50
+
51
+ # requires id_to_name! be called first
52
+ def name(id)
53
+ @id_to_name[id]
54
+ end
55
+
56
+ ####################
57
+ ## ID TO CAST
58
+ ####################
59
+
60
+ def make_id_to_cast
61
+ build_hash('id', :cast_method)
62
+ end
63
+
64
+ # makes and sets the id_to_cast hash
65
+ def id_to_cast!
66
+ @id_to_cast = make_id_to_cast
67
+ self
68
+ end
69
+
70
+ # requires id_to_cast! be called first. If no val given, returns a symbol (e.g., :to_f). If given a val, then it returns the cast of that val.
71
+ def cast(id, val=nil)
72
+ val ? val.send(@id_to_cast[id]) : @id_to_cast[id]
73
+ end
74
+
75
+ ####################
76
+ ## NAME TO ID
77
+ ####################
78
+
79
+ # makes and sets the name_to_id hash and returns self
80
+ def name_to_id!
81
+ @name_to_id = make_name_to_id
82
+ self
83
+ end
84
+
85
+ # returns a name_to_id Hash
86
+ def make_name_to_id
87
+ build_hash('name', 'id')
88
+ end
89
+
90
+ ####################
91
+ ####################
92
+
93
+ # if val is a symbol, will call that method on the stanza
94
+ def build_hash(key,val)
95
+ hash = {}
96
+ stanzas.each do |el|
97
+ tv = el.tagvalues
98
+ case val
99
+ when nil
100
+ hash[tv[key].first] = el
101
+ when Symbol
102
+ hash[tv[key].first] = (el.send(val))
103
+ else
104
+ hash[tv[key].first] = tv[val].first
105
+ end
106
+ end
107
+ hash
108
+ end
109
+ end
110
+ end
111
+ end
@@ -0,0 +1,16 @@
1
+ require 'obo/parser'
2
+
3
+ module Mspire
4
+ class Obo
5
+ class HeaderParser < ::Obo::Parser
6
+ def initialize
7
+ end
8
+
9
+ def header(filename)
10
+ File.open(filename) do |io|
11
+ elements(io).next
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,5 @@
1
+ module Mspire
2
+ class Obo
3
+ VERSION = "0.0.3"
4
+ end
5
+ end
data/lib/mspire/obo.rb ADDED
@@ -0,0 +1,146 @@
1
+ require 'mspire/obo/version'
2
+ require 'mspire/obo/header_parser'
3
+ require 'mspire/obo/hash_provider'
4
+ require 'obo'
5
+ require 'ext/obo'
6
+ require 'andand'
7
+ require 'yaml'
8
+
9
+ module Enumerable
10
+ def index_by
11
+ if block_given?
12
+ Hash[map { |elem| [yield(elem), elem] }]
13
+ else
14
+ to_enum :index_by
15
+ end
16
+ end
17
+ end
18
+
19
+ module Mspire
20
+ # This is the major class representing an ontology. Because there are
21
+ # multiple ways to access the information, and fast access requires building
22
+ # a hash, you will need to explicitly build any hashes you want to use.
23
+ #
24
+ # Mspire::Obo.new(file).make_all!
25
+ class Obo
26
+ include Mspire::Obo::HashProvider
27
+
28
+ DIR = File.expand_path(File.dirname(__FILE__) + '/../../obo')
29
+
30
+ class << self
31
+ # returns an array of hashes with each hash describing the available
32
+ # obos (those in the Mspire::Obo::DIR directory) with these keys:
33
+ #
34
+ # :full_name # the generic name of the ontology
35
+ # :uri # where the ontology may be downloaded
36
+ # :namespace # namespace (String)
37
+ # :path # the expanded path filename
38
+ # :version # the ontology version (String)
39
+ # :key # access symbol [typically namespace.downcase.to_sym] (Symbol)
40
+ def available(index_by=nil)
41
+ obos = []
42
+ Dir.chdir(Mspire::Obo::DIR) do
43
+ Dir['*.*'].sort.each_slice(2) do |meta, obo|
44
+ hash = Hash[YAML.load_file(meta).map {|k,v| [k.to_sym, v] }]
45
+ hash[:path] = File.expand_path(obo)
46
+ hash[:version] = version(hash[:path])
47
+ hash[:key] = hash[:namespace].downcase.to_sym
48
+ obos << hash
49
+ end
50
+ end
51
+ if index_by
52
+ obos.index_by {|info| info[index_by] }
53
+ else
54
+ obos
55
+ end
56
+ end
57
+
58
+ # returns an array of Obo objects corresponding to all obos held
59
+ def all(load_file=true)
60
+ available(:key).keys.map {|key| self[key, load_file] }
61
+ end
62
+
63
+ # determines the version of the obo by just reading the header
64
+ def version(filename)
65
+ self.new.set_version!(filename).version
66
+ end
67
+
68
+ # create an Mspire::Obo object from any obo file within the obo
69
+ # directory using its :key. The key is the downcased symbol of the
70
+ # namespace and can effortlessly be determined with
71
+ # Mspire::Obo.available().
72
+ def [](key, load_file=true)
73
+ lookup = available.index_by {|info| info[:key] }
74
+ info = lookup[key]
75
+ self.new(
76
+ load_file ? info[:path] : nil,
77
+ uri: info[:uri],
78
+ full_name: info[:full_name],
79
+ version: info[:version],
80
+ path: info[:path],
81
+ namespace: info[:namespace],
82
+ )
83
+ end
84
+ end
85
+
86
+
87
+ attr_accessor :header
88
+ attr_accessor :stanzas
89
+
90
+ ## These are common attributes associated with typical usage of obo files
91
+ ## (e.g. see mzML spec)
92
+
93
+ # String specifying the namespace of the obo, e.g., 'UO' for unit
94
+ # ontology, "IMS" for imaging mass spec. (necessary for name_to_id
95
+ # collision resolution for Mspire::Obo::Group objects).
96
+ attr_accessor :namespace
97
+ # the uri of the obo file (required for most markup languages using
98
+ # ontologies)
99
+ attr_accessor :uri
100
+ # the English name of the ontology (e.g., "Proteomics Ontology") (required
101
+ # for most markup languages using ontologies)
102
+ attr_accessor :full_name
103
+ # the version of the file. This can be found dynamically if you have the
104
+ # file (required for most markup languages using ontologies)
105
+ attr_accessor :version
106
+ # expanded path to the obo file (optional)
107
+ attr_accessor :path
108
+
109
+ # if given a filename, then the file will be read and relevant properties
110
+ # will be set.
111
+ def initialize(filename=nil, uri: nil, full_name: nil, version: nil, path: nil, namespace: nil)
112
+ @uri, @full_name, @version, @path, @namespace = uri, full_name, version, path, namespace
113
+ from_file(filename) if filename
114
+ end
115
+
116
+ # sets the object properties and returns self for chaining
117
+ def from_file(filename)
118
+ obo = ::Obo::Parser.new(filename)
119
+ @stanzas = obo.elements.to_a
120
+ @header = @stanzas.shift
121
+ version_from_header!
122
+ self
123
+ end
124
+
125
+ # sets the header attribut and returns self for chaining
126
+ def set_header_from_file!(filename)
127
+ @header = Mspire::Obo::HeaderParser.new.header(filename)
128
+ self
129
+ end
130
+
131
+ # sets the version attribute from the header, returns self.
132
+ def version_from_header!
133
+ @version = [header.tagvalues['data-version'].first,
134
+ header.tagvalues['remark'].map {|str| str[/version\s*:\s*([^\s]+)/, 1] }.compact.first,
135
+ header['date'].andand.split(' ').first
136
+ ].compact.first
137
+ self
138
+ end
139
+
140
+ # sets the version by just reading the header of the file. Returns self for
141
+ # chaining.
142
+ def set_version!(filename)
143
+ set_header_from_file!(filename).version_from_header!
144
+ end
145
+ end
146
+ end
@@ -0,0 +1,37 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'mspire/obo/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "mspire-obo"
8
+ spec.version = Mspire::Obo::VERSION
9
+ spec.authors = ["John T. Prince"]
10
+ spec.email = ["jtprince@gmail.com"]
11
+ spec.summary = %q{simplified access for obo ontology files}
12
+ spec.description = %q{simplified access for obo ontology files. Builds hashes for quick lookup of terms and finds version, etc.}
13
+ spec.homepage = ""
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ [
22
+ ["obo", ">= 0.1.5"],
23
+ ["andand", ">= 1.3.3"],
24
+ ].each do |args|
25
+ spec.add_dependency(*args)
26
+ end
27
+
28
+ [
29
+ ["bundler", "~> 1.6.2"],
30
+ ["rake"],
31
+ ["rspec", "~> 2.14.1"],
32
+ ["rdoc", "~> 4.1.1"],
33
+ ["simplecov", "~> 0.8.2"],
34
+ ].each do |args|
35
+ spec.add_development_dependency(*args)
36
+ end
37
+ end
data/obo/PSI-MOD.meta ADDED
@@ -0,0 +1,3 @@
1
+ full_name: protein modification ontology
2
+ uri: http://psidev.cvs.sourceforge.net/viewvc/psidev/psi/mod/data/PSI-MOD.obo
3
+ namespace: MOD