obo_parser 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/LICENSE CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2009 mjy
1
+ Copyright (c) 2010 Matt Yoder
2
2
 
3
3
  Permission is hereby granted, free of charge, to any person obtaining
4
4
  a copy of this software and associated documentation files (the
data/README.rdoc CHANGED
@@ -1,16 +1,44 @@
1
1
  = obo_parser
2
2
 
3
- A simple OBO file format parsing library.
3
+ A simple Ruby gem for parsing OBO formatted ontology files. Useful for reporting, comparing, and mapping data to other databases. There is presently no functionality for logical inference across the ontology.
4
4
 
5
- == Note on Patches/Pull Requests
5
+ == Installation
6
+
7
+ gem install obo_parser
8
+
9
+ == Use
10
+
11
+ require 'rubygems'
12
+ require 'obo_parser'
13
+ foo = parse_obo_file(File.read('my_ontology.obo')) # => An OboParser instance. Targets OBO v. 1.4
14
+ first_term = foo.terms.first # => An OboParser#Term instance
15
+ d = first_term.def # => An OboParser#Tag instance
6
16
 
7
- * Fork the project.
8
- * Make your feature addition or bug fix.
9
- * Add tests for it. This is important so I don't break it in a
10
- future version unintentionally.
11
- * Commit, do not mess with rakefile, version, or history.
12
- (if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
13
- * Send me a pull request. Bonus points for topic branches.
17
+ d.tag # => 'def'
18
+ d.value # => 'Some defintition'
19
+ d.xrefs # => ['xref:123', 'xref:456']
20
+ d.comment # => 'Some comment'
21
+
22
+ t = first_term.name # => An OboParser#Tag instance
23
+ t.tag # => 'name'
24
+ t.value # => 'Some Term name'
25
+
26
+ o = first_term.other_tags # => [OboParser#Tag, ... ] An array of tags that are not specially referenced in an OboParser::Stanza
27
+ o.first # => An OboParser#Tag instance
28
+
29
+ first_typedef = foo.typdefs.first # => An OboParser#Typdef instance
30
+ first_typdef.id.value # => 'Some typedef id'
31
+ first_typdef.name.value # => 'Some typedef name'
32
+
33
+ foo.terms.first.tags_named('is_a') # => [OboParser#Tag, ... ]
34
+ foo.terms.first.tags_named('is_a').first.tag # => 'is_a'
35
+ foo.terms.first.tags_named('is_a').first.value # => 'Some Term id'
36
+
37
+ See also /test/test_obo_parser.rb
38
+
39
+ == Utilties
40
+
41
+ A small set of methods (e.g. comparing OBO ontologies) utilizing the gem are included in utilities.rb. See /lib/utilities.rb. For example, shared labels across sets of ontologies can be found and returned.
14
42
 
15
43
  == Copyright
16
44
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.1
1
+ 0.3.0
data/lib/lexer.rb CHANGED
@@ -56,4 +56,5 @@ class OboParser::Lexer
56
56
  return false
57
57
  end
58
58
  end
59
+
59
60
  end
data/lib/obo_parser.rb CHANGED
@@ -2,113 +2,159 @@
2
2
  # uses the PhyloTree parser/lexer engine by Krishna Dole which in turn was based on
3
3
  # Thomas Mailund's <mailund@birc.dk> 'newick-1.0.5' Python library
4
4
 
5
- # outstanding issues:
5
+ #== Outstanding issues:
6
+ # * Better documentation
7
+ # * More tests
6
8
 
7
9
  module OboParser
8
10
 
9
- require File.expand_path(File.join(File.dirname(__FILE__), 'tokens'))
10
- require File.expand_path(File.join(File.dirname(__FILE__), 'parser'))
11
- require File.expand_path(File.join(File.dirname(__FILE__), 'lexer'))
12
- require File.expand_path(File.join(File.dirname(__FILE__), 'utilities'))
11
+ require File.expand_path(File.join(File.dirname(__FILE__), 'tokens'))
12
+ require File.expand_path(File.join(File.dirname(__FILE__), 'parser'))
13
+ require File.expand_path(File.join(File.dirname(__FILE__), 'lexer'))
14
+ require File.expand_path(File.join(File.dirname(__FILE__), 'utilities'))
13
15
 
16
+ class OboParser
17
+ attr_accessor :terms, :typedefs
14
18
 
15
- class OboParser # Node
16
- attr_accessor :terms, :typedefs
19
+ def initialize
20
+ @terms = []
21
+ @typedefs = []
22
+ true
23
+ end
17
24
 
18
- def initialize # :yields: true
19
- @terms = []
20
- @typedefs = []
21
- true
22
- end
25
+ def term_strings # :yields: Array of Strings
26
+ @terms.collect{|t| t.name.value}.sort
27
+ end
23
28
 
24
- def term_strings # :yields: Array of Strings
25
- @terms.collect{|t| t.name}.sort
26
- end
29
+ # Warning! This assumes terms are unique, they are NOT required to be so in an OBO file.
30
+ def term_hash # :yields: Hash (String => String) (name => id)
31
+ @terms.inject({}) {|sum, t| sum.update(t.name.value => t.id.value)}
32
+ end
27
33
 
28
- def term_hash # :yields: Hash (String => String) (name => id)
29
- # Warning! This assumes terms are unqiue, they are not required to be so.
30
- @terms.inject({}) {|sum, t| sum.update(t.name => t.id)}
31
- end
34
+ def id_hash # :yields: Hash (String => String (id => name))
35
+ @terms.inject({}) {|sum, t| sum.update(t.id.value => t.name.value)}
36
+ end
32
37
 
33
- def id_hash # :yields: Hash (String => String (id => name)
34
- # ids are unique
35
- @terms.inject({}) {|sum, t| sum.update(t.id => t.name)}
36
- end
38
+ class Stanza
39
+ # Make special reference to several specific types of tags (:name, :id), subclasses will remove additional special typs from :other_tags
40
+ attr_accessor :name, :id, :other_tags
41
+
42
+ def initialize(tags)
43
+ @other_tags = []
44
+
45
+ while tags.length != 0
46
+ t = tags.shift
47
+
48
+ new_tag = OboParser::Tag.new
49
+ new_tag.tag = t.tag
50
+ new_tag.value = t.value
51
+ new_tag.comment = t.comment
52
+ new_tag.xrefs = t.xrefs
53
+
54
+ case new_tag.tag
55
+ when 'id'
56
+ @id = new_tag
57
+ when 'name'
58
+ @name = new_tag
59
+ else
60
+ @other_tags.push(new_tag)
61
+ end
62
+ end
63
+ end
37
64
 
38
- class Stanza
39
- attr_accessor :name, :id, :tags
40
- # we can have only one of id, name, and some others (but this is a loose setup now)
41
- # can have many of some other things- put them in tags
42
-
43
- def initialize(tags)
44
- @tags = {}
45
- tags.each do |t|
46
- case t[0]
47
- when 'id'
48
- @id = t[1]
49
- when 'name'
50
- @name = t[1]
51
- else
52
- @tags[t[0]] = [] if !@tags[t[0]]
53
- @tags[t[0]].push t[1]
65
+ #=== Convienience methods
66
+
67
+ def tags_named(tag_name = nil)
68
+ return nil if tag_name.nil?
69
+ result = []
70
+ @other_tags.each do |t|
71
+ result.push t if t.tag == tag_name
54
72
  end
73
+ result
55
74
  end
75
+
56
76
  end
57
- end
58
77
 
59
- class Term < Stanza
60
- attr_accessor :def
61
- def initialize(tags)
62
- super
78
+ class Term < Stanza
79
+ attr_accessor :def
80
+ def initialize(tags)
81
+ super
82
+
83
+ anonymous_tags = []
84
+
85
+ # Loop through "unclaimed" tags and reference those specific to Term
86
+ while @other_tags.size != 0
87
+ t = @other_tags.shift
88
+ case t.tag
89
+ when 'def'
90
+ @def = t
91
+ else
92
+ anonymous_tags.push(t)
93
+ end
94
+ end
95
+ @other_tags = anonymous_tags
96
+ end
97
+ end
98
+
99
+ class Typedef < Stanza
100
+ def initialize(tags)
101
+ super
102
+ #anonymous_tags = []
103
+ ## Loop through "unclaimed" tags and reference those specific to Typedef
104
+ #while @other_tags.size != 0
105
+ # t = @other_tags.shift
106
+ # case t.tag
107
+ # when 'def'
108
+ # @def = t
109
+ # else
110
+ # anonymous_tags.push(t)
111
+ # end
112
+ # @other_tags = anonymous_tags
113
+ #end
114
+ end
63
115
  end
64
- end
65
116
 
66
- class Typedef < Stanza
67
- def initialize(tags)
68
- super
117
+ class Tag
118
+ attr_accessor :tag, :value, :xrefs, :comment
69
119
  end
120
+
70
121
  end
71
122
 
72
- end
123
+ class OboParserBuilder
124
+ def initialize
125
+ @of = OboParser.new
126
+ end
73
127
 
128
+ def add_term(tags)
129
+ @of.terms.push OboParser::Term.new(tags)
130
+ end
74
131
 
75
- class OboParserBuilder
76
- def initialize
77
- @of = OboParser.new
78
- end
132
+ def add_typedef(tags)
133
+ @of.typedefs.push OboParser::Typedef.new(tags)
134
+ end
79
135
 
80
- def add_term(tags)
81
- @of.terms.push OboParser::Term.new(tags)
82
- end
136
+ def obo_file
137
+ @of
138
+ end
83
139
 
84
- def add_typedef(tags)
85
- @of.typedefs.push OboParser::Typedef.new(tags)
86
140
  end
87
141
 
88
- def obo_file
89
- @of
142
+ class ParseError < StandardError
90
143
  end
91
144
 
92
- end
93
-
94
- class ParseError < StandardError
95
- end
96
-
97
145
  end # end module
98
146
 
99
- # the actual method
147
+ #= Implementation
148
+
100
149
  def parse_obo_file(input)
101
150
  @input = input
102
- raise(OboParser::ParseError, "Nothing passed to parse!") if !@input || @input.size == 0
151
+ raise(OboParser::ParseError, "Nothing passed to parse!") if !@input || @input.size == 0
152
+
153
+ # Comments are handled now.
154
+ # @input.gsub!(/(\s*?![^!'"]*?\n)/i, "\n") # strip out comments - this is a kludge, likely needs fixing!!
103
155
 
104
- @input.gsub!(/(\s*?![^!'"]*?\n)/i, "\n") # strip out comments - this is a kludge, likely needs fixing!!
105
-
106
156
  builder = OboParser::OboParserBuilder.new
107
157
  lexer = OboParser::Lexer.new(@input)
108
158
  OboParser::Parser.new(lexer, builder).parse_file
109
159
  return builder.obo_file
110
160
  end
111
-
112
-
113
-
114
-
data/lib/parser.rb CHANGED
@@ -5,21 +5,21 @@ class OboParser::Parser
5
5
  end
6
6
 
7
7
  def parse_file
8
- # toss everything right now, we just want the terms
8
+ # At present we ignore the header lines
9
9
  while !@lexer.peek(OboParser::Tokens::Term)
10
10
  @lexer.pop(OboParser::Tokens::TagValuePair)
11
11
  end
12
12
 
13
13
  i = 0
14
14
  while !@lexer.peek(OboParser::Tokens::Typedef) && !@lexer.peek(OboParser::Tokens::EndOfFile)
15
- raise OboParser::ParseError, "infinite loop in Terms" if i > 10000000
15
+ raise OboParser::ParseError, "infinite loop in Terms" if i > 10000000 # there aren't that many words!
16
16
  parse_term
17
17
  i += 1
18
18
  end
19
19
 
20
20
  i = 0
21
21
  while @lexer.peek(OboParser::Tokens::Typedef)
22
- raise OboParser::ParseError,"infinite loop in Terms" if i > 1000000 # there aren't that many words!
22
+ raise OboParser::ParseError,"infinite loop in Typedefs" if i > 1000000
23
23
  parse_typedef
24
24
  i += 1
25
25
  end
@@ -29,11 +29,11 @@ class OboParser::Parser
29
29
  t = @lexer.pop(OboParser::Tokens::Term)
30
30
  tags = []
31
31
  while !@lexer.peek(OboParser::Tokens::Term) && !@lexer.peek(OboParser::Tokens::Typedef) && !@lexer.peek(OboParser::Tokens::EndOfFile)
32
- if @lexer.peek(OboParser::Tokens::TagValuePair)
32
+ begin
33
33
  t = @lexer.pop(OboParser::Tokens::TagValuePair)
34
- tags.push [t.tag, t.value]
35
- else
36
- raise(OboParser::ParseError, "Expected a tag-value pair, but did not get one following this tag/value: [#{t.tag} / #{t.value}]")
34
+ tags.push(t)
35
+ rescue
36
+ raise
37
37
  end
38
38
  end
39
39
  @builder.add_term(tags)
@@ -41,11 +41,14 @@ class OboParser::Parser
41
41
 
42
42
  def parse_typedef
43
43
  @lexer.pop(OboParser::Tokens::Typedef)
44
- # @t = @builder.stub_typdef
45
44
  tags = []
46
45
  while !@lexer.peek(OboParser::Tokens::Typedef) && @lexer.peek(OboParser::Tokens::TagValuePair)
47
- t = @lexer.pop(OboParser::Tokens::TagValuePair)
48
- tags.push [t.tag, t.value]
46
+ begin
47
+ t = @lexer.pop(OboParser::Tokens::TagValuePair)
48
+ tags.push(t)
49
+ rescue
50
+ raise
51
+ end
49
52
  end
50
53
  @builder.add_typedef(tags)
51
54
  end
data/lib/tokens.rb CHANGED
@@ -9,9 +9,6 @@ module OboParser::Tokens
9
9
  end
10
10
  end
11
11
 
12
- # in ruby, \A is needed if you want to only match at the beginning of the string, we need this everywhere, as we're
13
- # moving along popping off
14
-
15
12
  class Term < Token
16
13
  @regexp = Regexp.new(/\A\s*(\[term\])\s*/i)
17
14
  end
@@ -20,19 +17,52 @@ module OboParser::Tokens
20
17
  @regexp = Regexp.new(/\A\s*(\[typedef\])\s*/i)
21
18
  end
22
19
 
23
-
24
20
  class TagValuePair < Token
25
- attr_reader :tag, :value
26
- @regexp = Regexp.new(/\A\s*([^:]+:.+)\s*\n*/i) # returns key => value hash for tokens like 'foo=bar' or foo = 'b a ar'
21
+ attr_reader :tag, :comment, :xrefs
22
+ @regexp = Regexp.new(/\A\s*([^:]+:.+)\s*\n*/i)
27
23
  def initialize(str)
28
24
  str.strip!
29
- str = str.split(':',2)
30
-
31
- str[1].strip!
32
- # strip trailing comments
25
+ tag, value = str.split(':',2)
26
+
27
+ value.strip!
28
+
29
+ # Handle comments
30
+ if value =~ /(!\s*.+)\Z/i
31
+ @comment = $1
32
+ value.gsub!(@comment, '')
33
+ @comment.gsub!(/\A!\s*/, '')
34
+ end
35
+
36
+ # Break out the xrefs, could be made made robust
37
+ # Assumes non-quoted comma delimited in format 'foo:bar, stuff:things'
38
+ if value =~ /(\s*\[.*\]\s*)/i
39
+ xref_list = $1
40
+ value.gsub!(xref_list, '')
41
+ xref_list.strip!
42
+ xref_list = xref_list[1..-2] # strip []
43
+ @xrefs = xref_list.split(",")
44
+ end
33
45
 
34
- @tag = str[0]
35
- @value = str[1]
46
+ @tag = tag.strip
47
+ @value = value.strip
48
+
49
+ @value = @value[1..-2] if @value[0..0] == "\"" # get rid of quote marks
50
+ @value = @value[1..-2] if @value[0..0] == "'" # get rid of quote marks
51
+
52
+ @tag = @tag.strip
53
+ @value = @value.strip
54
+ end
55
+ end
56
+
57
+ class XrefList < Token
58
+ @regexp = Regexp.new(/\A\s*\[(.+)\]\s*\n*/i) # returns key => value hash for tokens like 'foo=bar' or foo = 'b a ar'
59
+ def initialize(str)
60
+ str.strip!
61
+ @value = {}
62
+ str.split(",").each do |s|
63
+ i = s.split(":")
64
+ @value.merge!(i[0].strip => i[1].strip)
65
+ end
36
66
  end
37
67
  end
38
68
 
@@ -56,8 +86,6 @@ module OboParser::Tokens
56
86
  end
57
87
  end
58
88
 
59
-
60
-
61
89
  # note we grab EOL and ; here
62
90
  class ValuePair < Token
63
91
  @regexp = Regexp.new(/\A\s*([\w\d\_\&]+\s*=\s*((\'[^\']+\')|(\(.*\))|(\"[^\"]+\")|([^\s\n\t;]+)))[\s\n\t;]+/i) # returns key => value hash for tokens like 'foo=bar' or foo = 'b a ar'
@@ -71,90 +99,73 @@ module OboParser::Tokens
71
99
  end
72
100
  end
73
101
 
74
- class Matrix < Token
75
- @regexp = Regexp.new(/\A\s*(matrix)\s*/i)
76
- end
77
-
78
- class RowVec < Token
79
- @regexp = Regexp.new(/\A\s*(.+)\s*\n/i)
80
- def initialize(str)
81
- s = str.split(/\(|\)/).collect{|s| s=~ /[\,|\s]/ ? s.split(/[\,|\s]/) : s}.inject([]){|sum, x| x.class == Array ? sum << x.delete_if {|y| y == "" } : sum + x.strip.split(//)}
82
- @value = s
83
- end
102
+ class EndOfFile < Token
103
+ @regexp = Regexp.new('\A(\s*\n*)\Z')
84
104
  end
85
105
 
86
-
87
-
88
106
  ## punctuation
89
107
 
90
108
  class LBracket < Token
91
109
  @regexp = Regexp.new('\A\s*(\[)\s*')
92
110
  end
93
111
 
94
- class RBracket < Token
95
- @regexp = Regexp.new('\A\s*(\])\s*')
96
- end
97
-
98
- class LParen < Token
99
- @regexp = Regexp.new('\A\s*(\()\s*')
100
- end
101
-
102
- class RParen < Token
103
- @regexp = Regexp.new('\A\s*(\))\s*')
104
- end
112
+ #class LParen < Token
113
+ # @regexp = Regexp.new('\A\s*(\()\s*')
114
+ #end
115
+
116
+ #class RBracket < Token
117
+ # @regexp = Regexp.new('\A\s*(\])\s*')
118
+ #end
119
+
120
+ #class RParen < Token
121
+ # @regexp = Regexp.new('\A\s*(\))\s*')
122
+ #end
123
+
124
+ #class Equals < Token
125
+ # @regexp = Regexp.new('\A\s*(=)\s*')
126
+ #end
127
+
128
+ #class BckSlash < Token
129
+ # @regexp = Regexp.new('\A\s*(\/)\s*')
130
+ #end
131
+
132
+ #class Colon < Token
133
+ # @regexp = Regexp.new('\A\s*(:)\s*')
134
+ #end
135
+
136
+ #class SemiColon < Token
137
+ # @regexp = Regexp.new('\A\s*(;)\s*')
138
+ #end
139
+
140
+ #class Comma < Token
141
+ # @regexp = Regexp.new('\A\s*(\,)\s*')
142
+ #end
105
143
 
106
- class Equals < Token
107
- @regexp = Regexp.new('\A\s*(=)\s*')
108
- end
109
-
110
- class BckSlash < Token
111
- @regexp = Regexp.new('\A\s*(\/)\s*')
112
- end
113
-
114
-
115
- class Colon < Token
116
- @regexp = Regexp.new('\A\s*(:)\s*')
117
- end
118
-
119
- class SemiColon < Token
120
- @regexp = Regexp.new('\A\s*(;)\s*')
121
- end
122
-
123
- class Comma < Token
124
- @regexp = Regexp.new('\A\s*(\,)\s*')
125
- end
126
-
127
- class EndOfFile < Token
128
- @regexp = Regexp.new('\A(\s*\n*)\Z')
129
- end
130
-
131
- class Number < Token
132
- @regexp = Regexp.new('\A\s*(-?\d+(\.\d+)?([eE][+-]?\d+)?)\s*')
133
- def initialize(str)
134
- # a little oddness here, in some case we don't want to include the .0
135
- # see issues with numbers as labels
136
- if str =~ /\./
137
- @value = str.to_f
138
- else
139
- @value = str.to_i
140
- end
141
-
142
- end
143
- end
144
-
145
- # Tokens::NexusComment
146
-
147
- # this list also defines priority, i.e. if tokens have overlap (which they shouldn't!!) then the earlier indexed token will match first
144
+ #class Number < Token
145
+ # @regexp = Regexp.new('\A\s*(-?\d+(\.\d+)?([eE][+-]?\d+)?)\s*')
146
+ # def initialize(str)
147
+ # # a little oddness here, in some case we don't want to include the .0
148
+ # # see issues with numbers as labels
149
+ # if str =~ /\./
150
+ # @value = str.to_f
151
+ # else
152
+ # @value = str.to_i
153
+ # end
154
+ # end
155
+ #end
156
+
157
+ # This list defines inclusion and priority, i.e. if tokens have overlap then the earlier indexed token will match first
148
158
  def self.obo_file_token_list
149
159
  [
150
160
  OboParser::Tokens::Term,
151
161
  OboParser::Tokens::Typedef,
152
- OboParser::Tokens::TagValuePair,
153
- OboParser::Tokens::NameValuePair, # not implemented
154
- OboParser::Tokens::Dbxref, # not implemented
155
162
  OboParser::Tokens::LBracket,
163
+ OboParser::Tokens::TagValuePair,
164
+ OboParser::Tokens::XrefList,
156
165
  OboParser::Tokens::EndOfFile
166
+ # OboParser::Tokens::NameValuePair, # not implemented
167
+ # OboParser::Tokens::Dbxref, # not implemented
157
168
  ]
158
169
  end
159
-
170
+
160
171
  end
data/lib/utilities.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  require 'rubygems'
2
2
  require 'ruby-debug'
3
- require 'obo_parser'
3
+ require File.expand_path(File.join(File.dirname(__FILE__), 'obo_parser'))
4
4
 
5
5
  module OboParser::Utilities
6
6
 
@@ -11,7 +11,6 @@ module OboParser::Utilities
11
11
  # of4 = File.read('hao4.obo')
12
12
  #
13
13
  # OboParser::Utilities::dump_comparison_by_id([of1, of2, of3, of4])
14
-
15
14
  def self.dump_comparison_by_id(files = []) # :yields: String
16
15
  of = []
17
16
  files.each_with_index do |f, i|
@@ -39,10 +38,9 @@ module OboParser::Utilities
39
38
  end
40
39
  end
41
40
 
41
+ # infile is a tab delimited 2 column file that contains IDs in the from FOO_1234
42
+ # The file is replicated to STDOUT replacing the ID with the Term
42
43
  def self.alignment_translate(infile = nil) # :yields: String
43
- # infile is a tab delimited 2 column file that contains IDs in the from FOO_1234
44
- # The file is replicated to STDOUT replacing the ID with the Term
45
-
46
44
  agreement = ARGV[0]
47
45
  raise "Provide a file with comparison." if agreement.nil?
48
46
  comparison = File.read(agreement)
@@ -70,22 +68,15 @@ module OboParser::Utilities
70
68
  "\t" +
71
69
  (identifiers[v2].nil? ? 'NOT FOUND' : identifiers[v2])
72
70
  end
73
-
74
71
  end
75
72
 
76
-
77
- def self.shared_labels(files = []) # :yields: String
78
-
79
73
  # Returns labels found in all passed ontologies
80
-
81
74
  # Usage:
82
-
83
- # of1 = File.read('fly_anatomy.obo')
84
- # of2 = File.read('hao.obo')
85
- # of3 = File.read('mosquito_anatomy.obo')
86
-
87
- # shared_labels([of1, of6])
88
-
75
+ # of1 = File.read('fly_anatomy.obo')
76
+ # of2 = File.read('hao.obo')
77
+ # of3 = File.read('mosquito_anatomy.obo')
78
+ # shared_labels([of1, of6])
79
+ def self.shared_labels(files = []) # :yields: String
89
80
  comparison = {}
90
81
 
91
82
  files.each do |f|
@@ -109,10 +100,8 @@ module OboParser::Utilities
109
100
  end
110
101
 
111
102
  puts match.sort.join("\n")
112
-
113
103
  puts "\n#{match.length} total."
114
104
 
115
105
  end
116
-
117
106
 
118
107
  end
data/obo_parser.gemspec CHANGED
@@ -5,23 +5,21 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{obo_parser}
8
- s.version = "0.2.1"
8
+ s.version = "0.3.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["mjy"]
12
- s.date = %q{2011-02-28}
12
+ s.date = %q{2011-04-05}
13
13
  s.description = %q{Provides all-in-one object containing the contents of an OBO formatted file. OBO version 1.2 is targeted, though this should work for 1.0. }
14
14
  s.email = %q{diapriid@gmail.com}
15
15
  s.extra_rdoc_files = [
16
16
  "LICENSE",
17
- "README",
18
17
  "README.rdoc"
19
18
  ]
20
19
  s.files = [
21
20
  ".document",
22
21
  ".gitignore",
23
22
  "LICENSE",
24
- "README",
25
23
  "README.rdoc",
26
24
  "Rakefile",
27
25
  "VERSION",
@@ -43,17 +41,16 @@ Gem::Specification.new do |s|
43
41
  s.homepage = %q{http://github.com/mjy/obo_parser}
44
42
  s.rdoc_options = ["--charset=UTF-8"]
45
43
  s.require_paths = ["lib"]
46
- s.rubygems_version = %q{1.3.6}
44
+ s.rubygems_version = %q{1.5.3}
47
45
  s.summary = %q{A simple OBO file handler.}
48
46
  s.test_files = [
49
47
  "test/test_obo_parser.rb"
50
48
  ]
51
49
 
52
50
  if s.respond_to? :specification_version then
53
- current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
54
51
  s.specification_version = 3
55
52
 
56
- if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
53
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
57
54
  else
58
55
  end
59
56
  else
data/test/cell.obo CHANGED
@@ -5865,3 +5865,4 @@ is_a: CL:0000349 ! extraembryonic cell
5865
5865
  id: develops_from
5866
5866
  name: develops_from
5867
5867
  is_transitive: true
5868
+
@@ -2,7 +2,7 @@ require 'test/unit'
2
2
  require 'rubygems'
3
3
  require 'ruby-debug'
4
4
 
5
- require File.expand_path(File.join(File.dirname(__FILE__), '../lib/obo_parser'))
5
+ require File.expand_path(File.join(File.dirname(__FILE__), '../lib/obo_parser'))
6
6
 
7
7
  class OboParserTest < Test::Unit::TestCase
8
8
  def test_truth
@@ -16,15 +16,12 @@ class Test_OboParserBuilder < Test::Unit::TestCase
16
16
  end
17
17
  end
18
18
 
19
-
20
19
  class Test_Regex < Test::Unit::TestCase
21
20
 
22
- def test_comment_stripping
23
- # hackish, likely will fail with complex combinations of "!"
24
- txt = "line without note\nBegin taxa; ! comment\n! not this line\n'this ok!'\n\"this too!!\""
25
- r2 = Regexp.new(/(\s*?![^!'"]*?\n)/i)
26
- assert_equal "line without note\nBegin taxa;\n\n'this ok!'\n\"this too!!\"" , txt.gsub(r2, "\n")
21
+ def test_some_regex
22
+ assert true
27
23
  end
24
+
28
25
  end
29
26
 
30
27
  class Test_Lexer < Test::Unit::TestCase
@@ -60,7 +57,8 @@ class Test_Lexer < Test::Unit::TestCase
60
57
 
61
58
  assert t = lexer.pop(OboParser::Tokens::TagValuePair)
62
59
  assert_equal 'def', t.tag
63
- assert_equal '"A chromatic scalar-circular quality inhering in an object that manifests in an observer by virtue of the dominant wavelength of the visible light; may be subject to fiat divisions, typically into 7 or 8 spectra." [PATOC:cjm]', t.value
60
+ assert_equal 'A chromatic scalar-circular quality inhering in an object that manifests in an observer by virtue of the dominant wavelength of the visible light; may be subject to fiat divisions, typically into 7 or 8 spectra.', t.value
61
+ assert_equal(['PATOC:cjm'], t.xrefs)
64
62
 
65
63
  assert t = lexer.pop(OboParser::Tokens::TagValuePair)
66
64
  assert_equal 'subset', t.tag
@@ -76,10 +74,27 @@ class Test_Lexer < Test::Unit::TestCase
76
74
  assert lexer.pop(OboParser::Tokens::Term)
77
75
  end
78
76
 
77
+ def test_xref_list
78
+ lexer = OboParser::Lexer.new("[foo:bar, stuff:things]")
79
+ assert t = lexer.pop(OboParser::Tokens::XrefList)
80
+ hsh = {'foo' => 'bar', 'stuff' => 'things'}
81
+ assert_equal hsh, t.value
82
+ end
83
+
79
84
  def test_tagvaluepair
80
85
  lexer = OboParser::Lexer.new("id: PATO:0000179")
81
86
  assert lexer.pop(OboParser::Tokens::TagValuePair)
82
87
  end
88
+
89
+ def test_tagvaluepair_with_comments_and_xrefs
90
+ lexer = OboParser::Lexer.new("def: \"The foo that is bar.\" [PATO:0000179] ! FOO! \n")
91
+ assert t = lexer.pop(OboParser::Tokens::TagValuePair)
92
+ assert_equal 'def', t.tag
93
+ assert_equal 'The foo that is bar.', t.value
94
+ assert_equal 'FOO!', t.comment
95
+ assert_equal(['PATO:0000179'], t.xrefs)
96
+ end
97
+
83
98
  end
84
99
 
85
100
  class Test_Parser < Test::Unit::TestCase
@@ -89,10 +104,13 @@ class Test_Parser < Test::Unit::TestCase
89
104
 
90
105
  def test_file_parsing
91
106
  foo = parse_obo_file(@of)
92
- assert_equal 'pato', foo.terms[0].name
93
- assert_equal 'quality', foo.terms[1].name
94
- assert_equal 'part_of', foo.typedefs.last.name
95
- assert_equal 'OBO_REL:part_of', foo.typedefs.last.id
107
+ assert_equal 'pato', foo.terms[0].name.value
108
+ assert_equal 'quality', foo.terms[1].name.value
109
+ assert_equal 'part_of', foo.typedefs.last.name.value
110
+ assert_equal 'OBO_REL:part_of', foo.typedefs.last.id.value
111
+ assert_equal(['PATOC:GVG'], foo.terms[1].def.xrefs)
112
+ assert_equal 'is_obsolete', foo.terms.first.tags_named('is_obsolete').first.tag
113
+ assert_equal 'true', foo.terms.first.tags_named('is_obsolete').first.value
96
114
  end
97
115
 
98
116
  def teardown
@@ -106,3 +124,9 @@ class Test_Parser < Test::Unit::TestCase
106
124
 
107
125
  end
108
126
 
127
+
128
+
129
+
130
+
131
+
132
+
metadata CHANGED
@@ -1,12 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: obo_parser
3
3
  version: !ruby/object:Gem::Version
4
- prerelease: false
4
+ hash: 19
5
+ prerelease:
5
6
  segments:
6
7
  - 0
7
- - 2
8
- - 1
9
- version: 0.2.1
8
+ - 3
9
+ - 0
10
+ version: 0.3.0
10
11
  platform: ruby
11
12
  authors:
12
13
  - mjy
@@ -14,7 +15,7 @@ autorequire:
14
15
  bindir: bin
15
16
  cert_chain: []
16
17
 
17
- date: 2011-02-28 00:00:00 -05:00
18
+ date: 2011-04-05 00:00:00 -04:00
18
19
  default_executable:
19
20
  dependencies: []
20
21
 
@@ -26,13 +27,11 @@ extensions: []
26
27
 
27
28
  extra_rdoc_files:
28
29
  - LICENSE
29
- - README
30
30
  - README.rdoc
31
31
  files:
32
32
  - .document
33
33
  - .gitignore
34
34
  - LICENSE
35
- - README
36
35
  - README.rdoc
37
36
  - Rakefile
38
37
  - VERSION
@@ -60,23 +59,27 @@ rdoc_options:
60
59
  require_paths:
61
60
  - lib
62
61
  required_ruby_version: !ruby/object:Gem::Requirement
62
+ none: false
63
63
  requirements:
64
64
  - - ">="
65
65
  - !ruby/object:Gem::Version
66
+ hash: 3
66
67
  segments:
67
68
  - 0
68
69
  version: "0"
69
70
  required_rubygems_version: !ruby/object:Gem::Requirement
71
+ none: false
70
72
  requirements:
71
73
  - - ">="
72
74
  - !ruby/object:Gem::Version
75
+ hash: 3
73
76
  segments:
74
77
  - 0
75
78
  version: "0"
76
79
  requirements: []
77
80
 
78
81
  rubyforge_project:
79
- rubygems_version: 1.3.6
82
+ rubygems_version: 1.5.3
80
83
  signing_key:
81
84
  specification_version: 3
82
85
  summary: A simple OBO file handler.
data/README DELETED
@@ -1,13 +0,0 @@
1
- NexusParser
2
- ===========
3
-
4
- Introduction goes here.
5
-
6
-
7
- Example
8
- =======
9
-
10
- Example goes here.
11
-
12
-
13
- Copyright (c) 2008 Matt Yoder, released under the MIT license