mspire-obo 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 45d44376c536159e5d906e57b3bc726517834f43
4
+ data.tar.gz: 2343cf00ab153e903f14a9ef8ddb804a1fc4e3a9
5
+ SHA512:
6
+ metadata.gz: 1cbfd483cac33cc25ad8fc26aea40c2a097bc0d26605393102432545741cd6e3df38340d91b66e809eb156f30ca5218222c9584c01770cb821183f7d4cc29a03
7
+ data.tar.gz: cf8774fba09f176b66563759b9d62a17fecf20b77431eb3782ea2efa0b75034d667e11838f5317185ab04e18c8ec89b9bafb037df252a58b3b5ab7a34d0653fb
data/.gitignore ADDED
@@ -0,0 +1,22 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ *.bundle
19
+ *.so
20
+ *.o
21
+ *.a
22
+ mkmf.log
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in mspire-obo.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,23 @@
1
+ Copyright (c) 2014 Brigham Young University
2
+ Author: John T. Prince
3
+
4
+ MIT License
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining
7
+ a copy of this software and associated documentation files (the
8
+ "Software"), to deal in the Software without restriction, including
9
+ without limitation the rights to use, copy, modify, merge, publish,
10
+ distribute, sublicense, and/or sell copies of the Software, and to
11
+ permit persons to whom the Software is furnished to do so, subject to
12
+ the following conditions:
13
+
14
+ The above copyright notice and this permission notice shall be
15
+ included in all copies or substantial portions of the Software.
16
+
17
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
21
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,103 @@
1
+ ## Mspire::Obo
2
+
3
+ Tools for working with ontologies (specificially obo files) with built-in
4
+ access to mass spectrometry related ontologies.
5
+
6
+ ### Installation
7
+
8
+ gem install mspire-obo
9
+
10
+ ### Examples
11
+
12
+ ```ruby
13
+ require 'mspire/obo'
14
+ ```
15
+
16
+ #### Discover which ontologies are available
17
+
18
+ ```ruby
19
+ Mspire::Obo.available # =>
20
+
21
+ [{:full_name=>"protein modification ontology",
22
+ :uri=> "http://psidev.cvs.sourceforge.net/viewvc/psidev/psi/mod/data/PSI-MOD.obo",
23
+ :namespace=>"MOD",
24
+ :path=>"/home/jtprince/dev/mspire-obo/obo/PSI-MOD.obo",
25
+ :version=>"1.013.0",
26
+ :key=>:mod},
27
+ {:full_name=>"Imaging MS Ontology",
28
+ ...
29
+ ]
30
+
31
+ ```
32
+
33
+ #### Use a particular ontology
34
+
35
+ Keywords (downcased symbol of the namespace) are used to easily load an ontology.
36
+
37
+ ```ruby
38
+ ontologies_by_key = Mspire::Obo.available(:key) # => index the available obos by their key
39
+
40
+ ms_obo = Mspire::Obo[:ms] # the Proteomics Standards Initiative Mass Spectrometry Ontology
41
+ ```
42
+
43
+ #### Access ontology information
44
+
45
+ Can create hashes on the fly.
46
+
47
+ ```ruby
48
+ id_to_name_hash = ms_obo.make_id_to_name
49
+ id_to_name_hash['MS:1000005'] # => 'sample volume'
50
+
51
+ id_to_stanza_hash = ms_obo.make_id_to_stanza
52
+ ...
53
+ ```
54
+
55
+ Can make and set hashes (bake them into the Obo object)
56
+
57
+ ```ruby
58
+ ms_obo.id_to_name!
59
+ ms_obo.id_to_name['MS:1000005'] # => 'sample volume'
60
+ ```
61
+
62
+ If you want all access hashes baked in:
63
+
64
+ ```ruby
65
+ ms_obo.make_all!
66
+ ```
67
+
68
+ #### Cast values
69
+
70
+ ```ruby
71
+ ms_obo.id_to_cast!
72
+ ms_obo.cast('MS:1000004') # => :to_f
73
+ ms_obo.cast('MS:1000004', '3.3') # => 3.3 (a Float)
74
+ ```
75
+
76
+ #### Access ontology meta-information
77
+
78
+ ```ruby
79
+ ms_obo.version
80
+ ms_obo.full_name
81
+ ms_obo.uri
82
+ ...
83
+ ```
84
+
85
+ #### Multiple ontologies? - create merged lookup hashes
86
+
87
+ ```ruby
88
+ group = Mspire::Obo::Group.new [Mspire::Obo[:ms], Mspire::Obo[:uo]]
89
+ a_hash = group.make_id_to_stanza # if you want the hash itself
90
+ group.id_to_stanza!
91
+ group.id_to_stanza["UO:0000012"] => an Obo::Stanza object
92
+ group.id_to_stanza["IMS:1001207"] => an Obo::Stanza object
93
+ ```
94
+
95
+ #### Use *any* obo file
96
+
97
+ ```ruby
98
+ obo = Mspire::Obo.new("somefile.obo")
99
+ ```
100
+
101
+ ### License
102
+
103
+ MIT. See LICENSE.txt for details.
data/Rakefile ADDED
@@ -0,0 +1,48 @@
1
+ require "bundler/gem_tasks"
2
+ require 'tempfile'
3
+ require 'fileutils'
4
+
5
+ require 'rspec/core'
6
+ require 'rspec/core/rake_task'
7
+ RSpec::Core::RakeTask.new(:spec) do |spec|
8
+ spec.pattern = FileList['spec/**/*_spec.rb']
9
+ end
10
+
11
+ task :default => :spec
12
+
13
+ require 'rdoc/task'
14
+ Rake::RDocTask.new do |rdoc|
15
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
16
+
17
+ rdoc.rdoc_dir = 'rdoc'
18
+ rdoc.title = "rubabel #{version}"
19
+ rdoc.rdoc_files.include('README*')
20
+ rdoc.rdoc_files.include('lib/**/*.rb')
21
+ end
22
+
23
+ desc "downloads the latest obo to appropriate spot"
24
+ task 'update' do
25
+ require 'mspire/obo'
26
+ require 'open-uri'
27
+ puts "Downloading the latest:"
28
+ Mspire::Obo.all(false).each do |obo|
29
+ begin
30
+ print " #{File.basename(obo.uri)} ... "
31
+ tmpfile = Tempfile.new("test_temp")
32
+ tmpfile << open(obo.uri, &:read).gsub(/\r\n?/, "\n")
33
+ tmpfile.close
34
+ new_version = Mspire::Obo.version(tmpfile)
35
+ if obo.version != new_version
36
+ puts "!! ---> updating from #{obo.version} to #{new_version} (check into git) <--- !!"
37
+ FileUtils.mv(tmpfile.path, obo.path)
38
+ else
39
+ puts "already latest."
40
+ end
41
+ ensure
42
+ if File.exist?(tmpfile.path)
43
+ tmpfile.close!
44
+ end
45
+ end
46
+ end
47
+ end
48
+
data/lib/ext/obo.rb ADDED
@@ -0,0 +1,31 @@
1
+ module Obo
2
+ class Stanza
3
+
4
+ # returns :to_f, :to_i, :to_s or false based on the xref value.
5
+ def cast_method
6
+ xref = @tagvalues['xref'].first
7
+ @cast_method =
8
+ if xref.nil? || (@cast_method == false)
9
+ false
10
+ else
11
+ if @cast_method
12
+ @cast_method
13
+ else
14
+ case xref[/value-type:xsd\\:([^\s]+) /, 1]
15
+ when 'float' ; :to_f
16
+ when 'int' ; :to_i
17
+ when 'string' ; :to_s
18
+ else ; false
19
+ end
20
+ end
21
+ end
22
+ end
23
+
24
+ # returns the value cast based on rules in first xref
25
+ # no casting performed if there is no xref
26
+ def cast(val)
27
+ methd = cast_method
28
+ methd ? val.send(methd) : val
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,75 @@
1
+ require 'mspire/obo'
2
+ require 'mspire/obo/hash_provider'
3
+
4
+ module Mspire
5
+ class Obo
6
+ # An Mspire::Obo::Group is a distinct collection of Mspire::Obo objects,
7
+ # but all lookup hashes are merged across the various ontologies. This
8
+ # means that a user can make a group and query across all the ontologies
9
+ # in a single, simple call. The interface mimics that of the hash
10
+ # providing Mspire::Obo object.
11
+ #
12
+ # group = Mspire::Obo::Group.new([Mspire::Obo[:ms], Mspire::Obo[:uo]])
13
+ # hash = group.id_to_name
14
+ # # can access any ids from the various Mspire::Obo objects
15
+ # hash["MS:1000001"] # -> 'sample number'
16
+ # group
17
+ #
18
+ class Group
19
+ include Mspire::Obo::HashProvider
20
+
21
+ # the array of Mspire::Obo objects
22
+ attr_accessor :obos
23
+
24
+ def initialize(obos=[])
25
+ @obos = obos
26
+ end
27
+
28
+ # returns an id to name Hash
29
+ def make_id_to_name
30
+ merge_hashes(__method__)
31
+ end
32
+
33
+ def make_id_to_cast
34
+ merge_hashes(__method__)
35
+ end
36
+
37
+ # returns an id_to_stanza hash
38
+ def make_id_to_stanza
39
+ merge_hashes(__method__)
40
+ end
41
+
42
+ # returns a name_to_id Hash
43
+ def make_name_to_id
44
+ merge_hashes(__method__)
45
+ end
46
+
47
+ # merges the hashes retrieved with that symbol
48
+ def merge_hashes(symbol)
49
+ obos.map(&symbol).reduce({}, :merge)
50
+ end
51
+
52
+ # creates a hash keyed by namespace string that yields the name_to_id
53
+ # hash.
54
+ def name_to_id_by_namespace
55
+ Hash[ obos.map(&:namespace).zip(obos.map(&:make_name_to_id)) ]
56
+ end
57
+
58
+ # with no arguments, merely returns the @name_to_id merged hash (if
59
+ # made). With one argument, looks up the id given the name. With a
60
+ # namespace, the id will be returned without collision.
61
+ def name_to_id(name=nil, namespace=nil)
62
+ if namespace
63
+ @name_to_id_by_namespace ||= name_to_id_by_namespace
64
+ @name_to_id_by_namespace[namespace][name]
65
+ elsif name
66
+ @name_to_id[name]
67
+ else
68
+ @name_to_id
69
+ end
70
+ end
71
+
72
+ undef_method(:build_hash)
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,111 @@
1
+ module Mspire
2
+ class Obo
3
+ # requires classes to provide the :stanzas method
4
+ module HashProvider
5
+
6
+ attr_reader :id_to_stanza
7
+ attr_reader :id_to_name
8
+ attr_reader :id_to_cast
9
+ attr_reader :name_to_id
10
+
11
+ # builds all hashes for fast access
12
+ def make_all!
13
+ id_to_name!.id_to_cast!.id_to_stanza!.name_to_id!
14
+ end
15
+
16
+ ####################
17
+ ## ID TO STANZA
18
+ ####################
19
+
20
+ # returns an id_to_stanza hash
21
+ def make_id_to_stanza
22
+ build_hash('id', nil)
23
+ end
24
+
25
+ # makes and sets the id_to_stanza hash and returns self
26
+ def id_to_stanza!
27
+ @id_to_stanza = make_id_to_stanza
28
+ self
29
+ end
30
+
31
+ # returns an Obo::Stanza object
32
+ def stanza(id)
33
+ @id_to_stanza[id]
34
+ end
35
+
36
+ ####################
37
+ ## ID TO NAME
38
+ ####################
39
+
40
+ # returns an id to name Hash
41
+ def make_id_to_name
42
+ build_hash('id', 'name')
43
+ end
44
+
45
+ # builds the id_to_name hash and returns self for chaining
46
+ def id_to_name!
47
+ @id_to_name = make_id_to_name
48
+ self
49
+ end
50
+
51
+ # requires id_to_name! be called first
52
+ def name(id)
53
+ @id_to_name[id]
54
+ end
55
+
56
+ ####################
57
+ ## ID TO CAST
58
+ ####################
59
+
60
+ def make_id_to_cast
61
+ build_hash('id', :cast_method)
62
+ end
63
+
64
+ # makes and sets the id_to_cast hash
65
+ def id_to_cast!
66
+ @id_to_cast = make_id_to_cast
67
+ self
68
+ end
69
+
70
+ # requires id_to_cast! be called first. If no val given, returns a symbol (e.g., :to_f). If given a val, then it returns the cast of that val.
71
+ def cast(id, val=nil)
72
+ val ? val.send(@id_to_cast[id]) : @id_to_cast[id]
73
+ end
74
+
75
+ ####################
76
+ ## NAME TO ID
77
+ ####################
78
+
79
+ # makes and sets the name_to_id hash and returns self
80
+ def name_to_id!
81
+ @name_to_id = make_name_to_id
82
+ self
83
+ end
84
+
85
+ # returns a name_to_id Hash
86
+ def make_name_to_id
87
+ build_hash('name', 'id')
88
+ end
89
+
90
+ ####################
91
+ ####################
92
+
93
+ # if val is a symbol, will call that method on the stanza
94
+ def build_hash(key,val)
95
+ hash = {}
96
+ stanzas.each do |el|
97
+ tv = el.tagvalues
98
+ case val
99
+ when nil
100
+ hash[tv[key].first] = el
101
+ when Symbol
102
+ hash[tv[key].first] = (el.send(val))
103
+ else
104
+ hash[tv[key].first] = tv[val].first
105
+ end
106
+ end
107
+ hash
108
+ end
109
+ end
110
+ end
111
+ end
@@ -0,0 +1,16 @@
1
+ require 'obo/parser'
2
+
3
+ module Mspire
4
+ class Obo
5
+ class HeaderParser < ::Obo::Parser
6
+ def initialize
7
+ end
8
+
9
+ def header(filename)
10
+ File.open(filename) do |io|
11
+ elements(io).next
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,5 @@
1
+ module Mspire
2
+ class Obo
3
+ VERSION = "0.0.3"
4
+ end
5
+ end
data/lib/mspire/obo.rb ADDED
@@ -0,0 +1,146 @@
1
+ require 'mspire/obo/version'
2
+ require 'mspire/obo/header_parser'
3
+ require 'mspire/obo/hash_provider'
4
+ require 'obo'
5
+ require 'ext/obo'
6
+ require 'andand'
7
+ require 'yaml'
8
+
9
+ module Enumerable
10
+ def index_by
11
+ if block_given?
12
+ Hash[map { |elem| [yield(elem), elem] }]
13
+ else
14
+ to_enum :index_by
15
+ end
16
+ end
17
+ end
18
+
19
+ module Mspire
20
+ # This is the major class representing an ontology. Because there are
21
+ # multiple ways to access the information, and fast access requires building
22
+ # a hash, you will need to explicitly build any hashes you want to use.
23
+ #
24
+ # Mspire::Obo.new(file).make_all!
25
+ class Obo
26
+ include Mspire::Obo::HashProvider
27
+
28
+ DIR = File.expand_path(File.dirname(__FILE__) + '/../../obo')
29
+
30
+ class << self
31
+ # returns an array of hashes with each hash describing the available
32
+ # obos (those in the Mspire::Obo::DIR directory) with these keys:
33
+ #
34
+ # :full_name # the generic name of the ontology
35
+ # :uri # where the ontology may be downloaded
36
+ # :namespace # namespace (String)
37
+ # :path # the expanded path filename
38
+ # :version # the ontology version (String)
39
+ # :key # access symbol [typically namespace.downcase.to_sym] (Symbol)
40
+ def available(index_by=nil)
41
+ obos = []
42
+ Dir.chdir(Mspire::Obo::DIR) do
43
+ Dir['*.*'].sort.each_slice(2) do |meta, obo|
44
+ hash = Hash[YAML.load_file(meta).map {|k,v| [k.to_sym, v] }]
45
+ hash[:path] = File.expand_path(obo)
46
+ hash[:version] = version(hash[:path])
47
+ hash[:key] = hash[:namespace].downcase.to_sym
48
+ obos << hash
49
+ end
50
+ end
51
+ if index_by
52
+ obos.index_by {|info| info[index_by] }
53
+ else
54
+ obos
55
+ end
56
+ end
57
+
58
+ # returns an array of Obo objects corresponding to all obos held
59
+ def all(load_file=true)
60
+ available(:key).keys.map {|key| self[key, load_file] }
61
+ end
62
+
63
+ # determines the version of the obo by just reading the header
64
+ def version(filename)
65
+ self.new.set_version!(filename).version
66
+ end
67
+
68
+ # create an Mspire::Obo object from any obo file within the obo
69
+ # directory using its :key. The key is the downcased symbol of the
70
+ # namespace and can effortlessly be determined with
71
+ # Mspire::Obo.available().
72
+ def [](key, load_file=true)
73
+ lookup = available.index_by {|info| info[:key] }
74
+ info = lookup[key]
75
+ self.new(
76
+ load_file ? info[:path] : nil,
77
+ uri: info[:uri],
78
+ full_name: info[:full_name],
79
+ version: info[:version],
80
+ path: info[:path],
81
+ namespace: info[:namespace],
82
+ )
83
+ end
84
+ end
85
+
86
+
87
+ attr_accessor :header
88
+ attr_accessor :stanzas
89
+
90
+ ## These are common attributes associated with typical usage of obo files
91
+ ## (e.g. see mzML spec)
92
+
93
+ # String specifying the namespace of the obo, e.g., 'UO' for unit
94
+ # ontology, "IMS" for imaging mass spec. (necessary for name_to_id
95
+ # collision resolution for Mspire::Obo::Group objects).
96
+ attr_accessor :namespace
97
+ # the uri of the obo file (required for most markup languages using
98
+ # ontologies)
99
+ attr_accessor :uri
100
+ # the English name of the ontology (e.g., "Proteomics Ontology") (required
101
+ # for most markup languages using ontologies)
102
+ attr_accessor :full_name
103
+ # the version of the file. This can be found dynamically if you have the
104
+ # file (required for most markup languages using ontologies)
105
+ attr_accessor :version
106
+ # expanded path to the obo file (optional)
107
+ attr_accessor :path
108
+
109
+ # if given a filename, then the file will be read and relevant properties
110
+ # will be set.
111
+ def initialize(filename=nil, uri: nil, full_name: nil, version: nil, path: nil, namespace: nil)
112
+ @uri, @full_name, @version, @path, @namespace = uri, full_name, version, path, namespace
113
+ from_file(filename) if filename
114
+ end
115
+
116
+ # sets the object properties and returns self for chaining
117
+ def from_file(filename)
118
+ obo = ::Obo::Parser.new(filename)
119
+ @stanzas = obo.elements.to_a
120
+ @header = @stanzas.shift
121
+ version_from_header!
122
+ self
123
+ end
124
+
125
+ # sets the header attribut and returns self for chaining
126
+ def set_header_from_file!(filename)
127
+ @header = Mspire::Obo::HeaderParser.new.header(filename)
128
+ self
129
+ end
130
+
131
+ # sets the version attribute from the header, returns self.
132
+ def version_from_header!
133
+ @version = [header.tagvalues['data-version'].first,
134
+ header.tagvalues['remark'].map {|str| str[/version\s*:\s*([^\s]+)/, 1] }.compact.first,
135
+ header['date'].andand.split(' ').first
136
+ ].compact.first
137
+ self
138
+ end
139
+
140
+ # sets the version by just reading the header of the file. Returns self for
141
+ # chaining.
142
+ def set_version!(filename)
143
+ set_header_from_file!(filename).version_from_header!
144
+ end
145
+ end
146
+ end
@@ -0,0 +1,37 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'mspire/obo/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "mspire-obo"
8
+ spec.version = Mspire::Obo::VERSION
9
+ spec.authors = ["John T. Prince"]
10
+ spec.email = ["jtprince@gmail.com"]
11
+ spec.summary = %q{simplified access for obo ontology files}
12
+ spec.description = %q{simplified access for obo ontology files. Builds hashes for quick lookup of terms and finds version, etc.}
13
+ spec.homepage = ""
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ [
22
+ ["obo", ">= 0.1.5"],
23
+ ["andand", ">= 1.3.3"],
24
+ ].each do |args|
25
+ spec.add_dependency(*args)
26
+ end
27
+
28
+ [
29
+ ["bundler", "~> 1.6.2"],
30
+ ["rake"],
31
+ ["rspec", "~> 2.14.1"],
32
+ ["rdoc", "~> 4.1.1"],
33
+ ["simplecov", "~> 0.8.2"],
34
+ ].each do |args|
35
+ spec.add_development_dependency(*args)
36
+ end
37
+ end
data/obo/PSI-MOD.meta ADDED
@@ -0,0 +1,3 @@
1
+ full_name: protein modification ontology
2
+ uri: http://psidev.cvs.sourceforge.net/viewvc/psidev/psi/mod/data/PSI-MOD.obo
3
+ namespace: MOD