obo_parser 0.2.1 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
data/LICENSE CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2009 mjy
1
+ Copyright (c) 2010 Matt Yoder
2
2
 
3
3
  Permission is hereby granted, free of charge, to any person obtaining
4
4
  a copy of this software and associated documentation files (the
data/README.rdoc CHANGED
@@ -1,16 +1,44 @@
1
1
  = obo_parser
2
2
 
3
- A simple OBO file format parsing library.
3
+ A simple Ruby gem for parsing OBO formatted ontology files. Useful for reporting, comparing, and mapping data to other databases. There is presently no functionality for logical inference across the ontology.
4
4
 
5
- == Note on Patches/Pull Requests
5
+ == Installation
6
+
7
+ gem install obo_parser
8
+
9
+ == Use
10
+
11
+ require 'rubygems'
12
+ require 'obo_parser'
13
+ foo = parse_obo_file(File.read('my_ontology.obo')) # => An OboParser instance. Targets OBO v. 1.4
14
+ first_term = foo.terms.first # => An OboParser#Term instance
15
+ d = first_term.def # => An OboParser#Tag instance
6
16
 
7
- * Fork the project.
8
- * Make your feature addition or bug fix.
9
- * Add tests for it. This is important so I don't break it in a
10
- future version unintentionally.
11
- * Commit, do not mess with rakefile, version, or history.
12
- (if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
13
- * Send me a pull request. Bonus points for topic branches.
17
+ d.tag # => 'def'
18
+ d.value # => 'Some defintition'
19
+ d.xrefs # => ['xref:123', 'xref:456']
20
+ d.comment # => 'Some comment'
21
+
22
+ t = first_term.name # => An OboParser#Tag instance
23
+ t.tag # => 'name'
24
+ t.value # => 'Some Term name'
25
+
26
+ o = first_term.other_tags # => [OboParser#Tag, ... ] An array of tags that are not specially referenced in an OboParser::Stanza
27
+ o.first # => An OboParser#Tag instance
28
+
29
+ first_typedef = foo.typdefs.first # => An OboParser#Typdef instance
30
+ first_typdef.id.value # => 'Some typedef id'
31
+ first_typdef.name.value # => 'Some typedef name'
32
+
33
+ foo.terms.first.tags_named('is_a') # => [OboParser#Tag, ... ]
34
+ foo.terms.first.tags_named('is_a').first.tag # => 'is_a'
35
+ foo.terms.first.tags_named('is_a').first.value # => 'Some Term id'
36
+
37
+ See also /test/test_obo_parser.rb
38
+
39
+ == Utilties
40
+
41
+ A small set of methods (e.g. comparing OBO ontologies) utilizing the gem are included in utilities.rb. See /lib/utilities.rb. For example, shared labels across sets of ontologies can be found and returned.
14
42
 
15
43
  == Copyright
16
44
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.1
1
+ 0.3.0
data/lib/lexer.rb CHANGED
@@ -56,4 +56,5 @@ class OboParser::Lexer
56
56
  return false
57
57
  end
58
58
  end
59
+
59
60
  end
data/lib/obo_parser.rb CHANGED
@@ -2,113 +2,159 @@
2
2
  # uses the PhyloTree parser/lexer engine by Krishna Dole which in turn was based on
3
3
  # Thomas Mailund's <mailund@birc.dk> 'newick-1.0.5' Python library
4
4
 
5
- # outstanding issues:
5
+ #== Outstanding issues:
6
+ # * Better documentation
7
+ # * More tests
6
8
 
7
9
  module OboParser
8
10
 
9
- require File.expand_path(File.join(File.dirname(__FILE__), 'tokens'))
10
- require File.expand_path(File.join(File.dirname(__FILE__), 'parser'))
11
- require File.expand_path(File.join(File.dirname(__FILE__), 'lexer'))
12
- require File.expand_path(File.join(File.dirname(__FILE__), 'utilities'))
11
+ require File.expand_path(File.join(File.dirname(__FILE__), 'tokens'))
12
+ require File.expand_path(File.join(File.dirname(__FILE__), 'parser'))
13
+ require File.expand_path(File.join(File.dirname(__FILE__), 'lexer'))
14
+ require File.expand_path(File.join(File.dirname(__FILE__), 'utilities'))
13
15
 
16
+ class OboParser
17
+ attr_accessor :terms, :typedefs
14
18
 
15
- class OboParser # Node
16
- attr_accessor :terms, :typedefs
19
+ def initialize
20
+ @terms = []
21
+ @typedefs = []
22
+ true
23
+ end
17
24
 
18
- def initialize # :yields: true
19
- @terms = []
20
- @typedefs = []
21
- true
22
- end
25
+ def term_strings # :yields: Array of Strings
26
+ @terms.collect{|t| t.name.value}.sort
27
+ end
23
28
 
24
- def term_strings # :yields: Array of Strings
25
- @terms.collect{|t| t.name}.sort
26
- end
29
+ # Warning! This assumes terms are unique, they are NOT required to be so in an OBO file.
30
+ def term_hash # :yields: Hash (String => String) (name => id)
31
+ @terms.inject({}) {|sum, t| sum.update(t.name.value => t.id.value)}
32
+ end
27
33
 
28
- def term_hash # :yields: Hash (String => String) (name => id)
29
- # Warning! This assumes terms are unqiue, they are not required to be so.
30
- @terms.inject({}) {|sum, t| sum.update(t.name => t.id)}
31
- end
34
+ def id_hash # :yields: Hash (String => String (id => name))
35
+ @terms.inject({}) {|sum, t| sum.update(t.id.value => t.name.value)}
36
+ end
32
37
 
33
- def id_hash # :yields: Hash (String => String (id => name)
34
- # ids are unique
35
- @terms.inject({}) {|sum, t| sum.update(t.id => t.name)}
36
- end
38
+ class Stanza
39
+ # Make special reference to several specific types of tags (:name, :id), subclasses will remove additional special typs from :other_tags
40
+ attr_accessor :name, :id, :other_tags
41
+
42
+ def initialize(tags)
43
+ @other_tags = []
44
+
45
+ while tags.length != 0
46
+ t = tags.shift
47
+
48
+ new_tag = OboParser::Tag.new
49
+ new_tag.tag = t.tag
50
+ new_tag.value = t.value
51
+ new_tag.comment = t.comment
52
+ new_tag.xrefs = t.xrefs
53
+
54
+ case new_tag.tag
55
+ when 'id'
56
+ @id = new_tag
57
+ when 'name'
58
+ @name = new_tag
59
+ else
60
+ @other_tags.push(new_tag)
61
+ end
62
+ end
63
+ end
37
64
 
38
- class Stanza
39
- attr_accessor :name, :id, :tags
40
- # we can have only one of id, name, and some others (but this is a loose setup now)
41
- # can have many of some other things- put them in tags
42
-
43
- def initialize(tags)
44
- @tags = {}
45
- tags.each do |t|
46
- case t[0]
47
- when 'id'
48
- @id = t[1]
49
- when 'name'
50
- @name = t[1]
51
- else
52
- @tags[t[0]] = [] if !@tags[t[0]]
53
- @tags[t[0]].push t[1]
65
+ #=== Convienience methods
66
+
67
+ def tags_named(tag_name = nil)
68
+ return nil if tag_name.nil?
69
+ result = []
70
+ @other_tags.each do |t|
71
+ result.push t if t.tag == tag_name
54
72
  end
73
+ result
55
74
  end
75
+
56
76
  end
57
- end
58
77
 
59
- class Term < Stanza
60
- attr_accessor :def
61
- def initialize(tags)
62
- super
78
+ class Term < Stanza
79
+ attr_accessor :def
80
+ def initialize(tags)
81
+ super
82
+
83
+ anonymous_tags = []
84
+
85
+ # Loop through "unclaimed" tags and reference those specific to Term
86
+ while @other_tags.size != 0
87
+ t = @other_tags.shift
88
+ case t.tag
89
+ when 'def'
90
+ @def = t
91
+ else
92
+ anonymous_tags.push(t)
93
+ end
94
+ end
95
+ @other_tags = anonymous_tags
96
+ end
97
+ end
98
+
99
+ class Typedef < Stanza
100
+ def initialize(tags)
101
+ super
102
+ #anonymous_tags = []
103
+ ## Loop through "unclaimed" tags and reference those specific to Typedef
104
+ #while @other_tags.size != 0
105
+ # t = @other_tags.shift
106
+ # case t.tag
107
+ # when 'def'
108
+ # @def = t
109
+ # else
110
+ # anonymous_tags.push(t)
111
+ # end
112
+ # @other_tags = anonymous_tags
113
+ #end
114
+ end
63
115
  end
64
- end
65
116
 
66
- class Typedef < Stanza
67
- def initialize(tags)
68
- super
117
+ class Tag
118
+ attr_accessor :tag, :value, :xrefs, :comment
69
119
  end
120
+
70
121
  end
71
122
 
72
- end
123
+ class OboParserBuilder
124
+ def initialize
125
+ @of = OboParser.new
126
+ end
73
127
 
128
+ def add_term(tags)
129
+ @of.terms.push OboParser::Term.new(tags)
130
+ end
74
131
 
75
- class OboParserBuilder
76
- def initialize
77
- @of = OboParser.new
78
- end
132
+ def add_typedef(tags)
133
+ @of.typedefs.push OboParser::Typedef.new(tags)
134
+ end
79
135
 
80
- def add_term(tags)
81
- @of.terms.push OboParser::Term.new(tags)
82
- end
136
+ def obo_file
137
+ @of
138
+ end
83
139
 
84
- def add_typedef(tags)
85
- @of.typedefs.push OboParser::Typedef.new(tags)
86
140
  end
87
141
 
88
- def obo_file
89
- @of
142
+ class ParseError < StandardError
90
143
  end
91
144
 
92
- end
93
-
94
- class ParseError < StandardError
95
- end
96
-
97
145
  end # end module
98
146
 
99
- # the actual method
147
+ #= Implementation
148
+
100
149
  def parse_obo_file(input)
101
150
  @input = input
102
- raise(OboParser::ParseError, "Nothing passed to parse!") if !@input || @input.size == 0
151
+ raise(OboParser::ParseError, "Nothing passed to parse!") if !@input || @input.size == 0
152
+
153
+ # Comments are handled now.
154
+ # @input.gsub!(/(\s*?![^!'"]*?\n)/i, "\n") # strip out comments - this is a kludge, likely needs fixing!!
103
155
 
104
- @input.gsub!(/(\s*?![^!'"]*?\n)/i, "\n") # strip out comments - this is a kludge, likely needs fixing!!
105
-
106
156
  builder = OboParser::OboParserBuilder.new
107
157
  lexer = OboParser::Lexer.new(@input)
108
158
  OboParser::Parser.new(lexer, builder).parse_file
109
159
  return builder.obo_file
110
160
  end
111
-
112
-
113
-
114
-
data/lib/parser.rb CHANGED
@@ -5,21 +5,21 @@ class OboParser::Parser
5
5
  end
6
6
 
7
7
  def parse_file
8
- # toss everything right now, we just want the terms
8
+ # At present we ignore the header lines
9
9
  while !@lexer.peek(OboParser::Tokens::Term)
10
10
  @lexer.pop(OboParser::Tokens::TagValuePair)
11
11
  end
12
12
 
13
13
  i = 0
14
14
  while !@lexer.peek(OboParser::Tokens::Typedef) && !@lexer.peek(OboParser::Tokens::EndOfFile)
15
- raise OboParser::ParseError, "infinite loop in Terms" if i > 10000000
15
+ raise OboParser::ParseError, "infinite loop in Terms" if i > 10000000 # there aren't that many words!
16
16
  parse_term
17
17
  i += 1
18
18
  end
19
19
 
20
20
  i = 0
21
21
  while @lexer.peek(OboParser::Tokens::Typedef)
22
- raise OboParser::ParseError,"infinite loop in Terms" if i > 1000000 # there aren't that many words!
22
+ raise OboParser::ParseError,"infinite loop in Typedefs" if i > 1000000
23
23
  parse_typedef
24
24
  i += 1
25
25
  end
@@ -29,11 +29,11 @@ class OboParser::Parser
29
29
  t = @lexer.pop(OboParser::Tokens::Term)
30
30
  tags = []
31
31
  while !@lexer.peek(OboParser::Tokens::Term) && !@lexer.peek(OboParser::Tokens::Typedef) && !@lexer.peek(OboParser::Tokens::EndOfFile)
32
- if @lexer.peek(OboParser::Tokens::TagValuePair)
32
+ begin
33
33
  t = @lexer.pop(OboParser::Tokens::TagValuePair)
34
- tags.push [t.tag, t.value]
35
- else
36
- raise(OboParser::ParseError, "Expected a tag-value pair, but did not get one following this tag/value: [#{t.tag} / #{t.value}]")
34
+ tags.push(t)
35
+ rescue
36
+ raise
37
37
  end
38
38
  end
39
39
  @builder.add_term(tags)
@@ -41,11 +41,14 @@ class OboParser::Parser
41
41
 
42
42
  def parse_typedef
43
43
  @lexer.pop(OboParser::Tokens::Typedef)
44
- # @t = @builder.stub_typdef
45
44
  tags = []
46
45
  while !@lexer.peek(OboParser::Tokens::Typedef) && @lexer.peek(OboParser::Tokens::TagValuePair)
47
- t = @lexer.pop(OboParser::Tokens::TagValuePair)
48
- tags.push [t.tag, t.value]
46
+ begin
47
+ t = @lexer.pop(OboParser::Tokens::TagValuePair)
48
+ tags.push(t)
49
+ rescue
50
+ raise
51
+ end
49
52
  end
50
53
  @builder.add_typedef(tags)
51
54
  end
data/lib/tokens.rb CHANGED
@@ -9,9 +9,6 @@ module OboParser::Tokens
9
9
  end
10
10
  end
11
11
 
12
- # in ruby, \A is needed if you want to only match at the beginning of the string, we need this everywhere, as we're
13
- # moving along popping off
14
-
15
12
  class Term < Token
16
13
  @regexp = Regexp.new(/\A\s*(\[term\])\s*/i)
17
14
  end
@@ -20,19 +17,52 @@ module OboParser::Tokens
20
17
  @regexp = Regexp.new(/\A\s*(\[typedef\])\s*/i)
21
18
  end
22
19
 
23
-
24
20
  class TagValuePair < Token
25
- attr_reader :tag, :value
26
- @regexp = Regexp.new(/\A\s*([^:]+:.+)\s*\n*/i) # returns key => value hash for tokens like 'foo=bar' or foo = 'b a ar'
21
+ attr_reader :tag, :comment, :xrefs
22
+ @regexp = Regexp.new(/\A\s*([^:]+:.+)\s*\n*/i)
27
23
  def initialize(str)
28
24
  str.strip!
29
- str = str.split(':',2)
30
-
31
- str[1].strip!
32
- # strip trailing comments
25
+ tag, value = str.split(':',2)
26
+
27
+ value.strip!
28
+
29
+ # Handle comments
30
+ if value =~ /(!\s*.+)\Z/i
31
+ @comment = $1
32
+ value.gsub!(@comment, '')
33
+ @comment.gsub!(/\A!\s*/, '')
34
+ end
35
+
36
+ # Break out the xrefs, could be made made robust
37
+ # Assumes non-quoted comma delimited in format 'foo:bar, stuff:things'
38
+ if value =~ /(\s*\[.*\]\s*)/i
39
+ xref_list = $1
40
+ value.gsub!(xref_list, '')
41
+ xref_list.strip!
42
+ xref_list = xref_list[1..-2] # strip []
43
+ @xrefs = xref_list.split(",")
44
+ end
33
45
 
34
- @tag = str[0]
35
- @value = str[1]
46
+ @tag = tag.strip
47
+ @value = value.strip
48
+
49
+ @value = @value[1..-2] if @value[0..0] == "\"" # get rid of quote marks
50
+ @value = @value[1..-2] if @value[0..0] == "'" # get rid of quote marks
51
+
52
+ @tag = @tag.strip
53
+ @value = @value.strip
54
+ end
55
+ end
56
+
57
+ class XrefList < Token
58
+ @regexp = Regexp.new(/\A\s*\[(.+)\]\s*\n*/i) # returns key => value hash for tokens like 'foo=bar' or foo = 'b a ar'
59
+ def initialize(str)
60
+ str.strip!
61
+ @value = {}
62
+ str.split(",").each do |s|
63
+ i = s.split(":")
64
+ @value.merge!(i[0].strip => i[1].strip)
65
+ end
36
66
  end
37
67
  end
38
68
 
@@ -56,8 +86,6 @@ module OboParser::Tokens
56
86
  end
57
87
  end
58
88
 
59
-
60
-
61
89
  # note we grab EOL and ; here
62
90
  class ValuePair < Token
63
91
  @regexp = Regexp.new(/\A\s*([\w\d\_\&]+\s*=\s*((\'[^\']+\')|(\(.*\))|(\"[^\"]+\")|([^\s\n\t;]+)))[\s\n\t;]+/i) # returns key => value hash for tokens like 'foo=bar' or foo = 'b a ar'
@@ -71,90 +99,73 @@ module OboParser::Tokens
71
99
  end
72
100
  end
73
101
 
74
- class Matrix < Token
75
- @regexp = Regexp.new(/\A\s*(matrix)\s*/i)
76
- end
77
-
78
- class RowVec < Token
79
- @regexp = Regexp.new(/\A\s*(.+)\s*\n/i)
80
- def initialize(str)
81
- s = str.split(/\(|\)/).collect{|s| s=~ /[\,|\s]/ ? s.split(/[\,|\s]/) : s}.inject([]){|sum, x| x.class == Array ? sum << x.delete_if {|y| y == "" } : sum + x.strip.split(//)}
82
- @value = s
83
- end
102
+ class EndOfFile < Token
103
+ @regexp = Regexp.new('\A(\s*\n*)\Z')
84
104
  end
85
105
 
86
-
87
-
88
106
  ## punctuation
89
107
 
90
108
  class LBracket < Token
91
109
  @regexp = Regexp.new('\A\s*(\[)\s*')
92
110
  end
93
111
 
94
- class RBracket < Token
95
- @regexp = Regexp.new('\A\s*(\])\s*')
96
- end
97
-
98
- class LParen < Token
99
- @regexp = Regexp.new('\A\s*(\()\s*')
100
- end
101
-
102
- class RParen < Token
103
- @regexp = Regexp.new('\A\s*(\))\s*')
104
- end
112
+ #class LParen < Token
113
+ # @regexp = Regexp.new('\A\s*(\()\s*')
114
+ #end
115
+
116
+ #class RBracket < Token
117
+ # @regexp = Regexp.new('\A\s*(\])\s*')
118
+ #end
119
+
120
+ #class RParen < Token
121
+ # @regexp = Regexp.new('\A\s*(\))\s*')
122
+ #end
123
+
124
+ #class Equals < Token
125
+ # @regexp = Regexp.new('\A\s*(=)\s*')
126
+ #end
127
+
128
+ #class BckSlash < Token
129
+ # @regexp = Regexp.new('\A\s*(\/)\s*')
130
+ #end
131
+
132
+ #class Colon < Token
133
+ # @regexp = Regexp.new('\A\s*(:)\s*')
134
+ #end
135
+
136
+ #class SemiColon < Token
137
+ # @regexp = Regexp.new('\A\s*(;)\s*')
138
+ #end
139
+
140
+ #class Comma < Token
141
+ # @regexp = Regexp.new('\A\s*(\,)\s*')
142
+ #end
105
143
 
106
- class Equals < Token
107
- @regexp = Regexp.new('\A\s*(=)\s*')
108
- end
109
-
110
- class BckSlash < Token
111
- @regexp = Regexp.new('\A\s*(\/)\s*')
112
- end
113
-
114
-
115
- class Colon < Token
116
- @regexp = Regexp.new('\A\s*(:)\s*')
117
- end
118
-
119
- class SemiColon < Token
120
- @regexp = Regexp.new('\A\s*(;)\s*')
121
- end
122
-
123
- class Comma < Token
124
- @regexp = Regexp.new('\A\s*(\,)\s*')
125
- end
126
-
127
- class EndOfFile < Token
128
- @regexp = Regexp.new('\A(\s*\n*)\Z')
129
- end
130
-
131
- class Number < Token
132
- @regexp = Regexp.new('\A\s*(-?\d+(\.\d+)?([eE][+-]?\d+)?)\s*')
133
- def initialize(str)
134
- # a little oddness here, in some case we don't want to include the .0
135
- # see issues with numbers as labels
136
- if str =~ /\./
137
- @value = str.to_f
138
- else
139
- @value = str.to_i
140
- end
141
-
142
- end
143
- end
144
-
145
- # Tokens::NexusComment
146
-
147
- # this list also defines priority, i.e. if tokens have overlap (which they shouldn't!!) then the earlier indexed token will match first
144
+ #class Number < Token
145
+ # @regexp = Regexp.new('\A\s*(-?\d+(\.\d+)?([eE][+-]?\d+)?)\s*')
146
+ # def initialize(str)
147
+ # # a little oddness here, in some case we don't want to include the .0
148
+ # # see issues with numbers as labels
149
+ # if str =~ /\./
150
+ # @value = str.to_f
151
+ # else
152
+ # @value = str.to_i
153
+ # end
154
+ # end
155
+ #end
156
+
157
+ # This list defines inclusion and priority, i.e. if tokens have overlap then the earlier indexed token will match first
148
158
  def self.obo_file_token_list
149
159
  [
150
160
  OboParser::Tokens::Term,
151
161
  OboParser::Tokens::Typedef,
152
- OboParser::Tokens::TagValuePair,
153
- OboParser::Tokens::NameValuePair, # not implemented
154
- OboParser::Tokens::Dbxref, # not implemented
155
162
  OboParser::Tokens::LBracket,
163
+ OboParser::Tokens::TagValuePair,
164
+ OboParser::Tokens::XrefList,
156
165
  OboParser::Tokens::EndOfFile
166
+ # OboParser::Tokens::NameValuePair, # not implemented
167
+ # OboParser::Tokens::Dbxref, # not implemented
157
168
  ]
158
169
  end
159
-
170
+
160
171
  end
data/lib/utilities.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  require 'rubygems'
2
2
  require 'ruby-debug'
3
- require 'obo_parser'
3
+ require File.expand_path(File.join(File.dirname(__FILE__), 'obo_parser'))
4
4
 
5
5
  module OboParser::Utilities
6
6
 
@@ -11,7 +11,6 @@ module OboParser::Utilities
11
11
  # of4 = File.read('hao4.obo')
12
12
  #
13
13
  # OboParser::Utilities::dump_comparison_by_id([of1, of2, of3, of4])
14
-
15
14
  def self.dump_comparison_by_id(files = []) # :yields: String
16
15
  of = []
17
16
  files.each_with_index do |f, i|
@@ -39,10 +38,9 @@ module OboParser::Utilities
39
38
  end
40
39
  end
41
40
 
41
+ # infile is a tab delimited 2 column file that contains IDs in the from FOO_1234
42
+ # The file is replicated to STDOUT replacing the ID with the Term
42
43
  def self.alignment_translate(infile = nil) # :yields: String
43
- # infile is a tab delimited 2 column file that contains IDs in the from FOO_1234
44
- # The file is replicated to STDOUT replacing the ID with the Term
45
-
46
44
  agreement = ARGV[0]
47
45
  raise "Provide a file with comparison." if agreement.nil?
48
46
  comparison = File.read(agreement)
@@ -70,22 +68,15 @@ module OboParser::Utilities
70
68
  "\t" +
71
69
  (identifiers[v2].nil? ? 'NOT FOUND' : identifiers[v2])
72
70
  end
73
-
74
71
  end
75
72
 
76
-
77
- def self.shared_labels(files = []) # :yields: String
78
-
79
73
  # Returns labels found in all passed ontologies
80
-
81
74
  # Usage:
82
-
83
- # of1 = File.read('fly_anatomy.obo')
84
- # of2 = File.read('hao.obo')
85
- # of3 = File.read('mosquito_anatomy.obo')
86
-
87
- # shared_labels([of1, of6])
88
-
75
+ # of1 = File.read('fly_anatomy.obo')
76
+ # of2 = File.read('hao.obo')
77
+ # of3 = File.read('mosquito_anatomy.obo')
78
+ # shared_labels([of1, of6])
79
+ def self.shared_labels(files = []) # :yields: String
89
80
  comparison = {}
90
81
 
91
82
  files.each do |f|
@@ -109,10 +100,8 @@ module OboParser::Utilities
109
100
  end
110
101
 
111
102
  puts match.sort.join("\n")
112
-
113
103
  puts "\n#{match.length} total."
114
104
 
115
105
  end
116
-
117
106
 
118
107
  end
data/obo_parser.gemspec CHANGED
@@ -5,23 +5,21 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{obo_parser}
8
- s.version = "0.2.1"
8
+ s.version = "0.3.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["mjy"]
12
- s.date = %q{2011-02-28}
12
+ s.date = %q{2011-04-05}
13
13
  s.description = %q{Provides all-in-one object containing the contents of an OBO formatted file. OBO version 1.2 is targeted, though this should work for 1.0. }
14
14
  s.email = %q{diapriid@gmail.com}
15
15
  s.extra_rdoc_files = [
16
16
  "LICENSE",
17
- "README",
18
17
  "README.rdoc"
19
18
  ]
20
19
  s.files = [
21
20
  ".document",
22
21
  ".gitignore",
23
22
  "LICENSE",
24
- "README",
25
23
  "README.rdoc",
26
24
  "Rakefile",
27
25
  "VERSION",
@@ -43,17 +41,16 @@ Gem::Specification.new do |s|
43
41
  s.homepage = %q{http://github.com/mjy/obo_parser}
44
42
  s.rdoc_options = ["--charset=UTF-8"]
45
43
  s.require_paths = ["lib"]
46
- s.rubygems_version = %q{1.3.6}
44
+ s.rubygems_version = %q{1.5.3}
47
45
  s.summary = %q{A simple OBO file handler.}
48
46
  s.test_files = [
49
47
  "test/test_obo_parser.rb"
50
48
  ]
51
49
 
52
50
  if s.respond_to? :specification_version then
53
- current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
54
51
  s.specification_version = 3
55
52
 
56
- if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
53
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
57
54
  else
58
55
  end
59
56
  else
data/test/cell.obo CHANGED
@@ -5865,3 +5865,4 @@ is_a: CL:0000349 ! extraembryonic cell
5865
5865
  id: develops_from
5866
5866
  name: develops_from
5867
5867
  is_transitive: true
5868
+
@@ -2,7 +2,7 @@ require 'test/unit'
2
2
  require 'rubygems'
3
3
  require 'ruby-debug'
4
4
 
5
- require File.expand_path(File.join(File.dirname(__FILE__), '../lib/obo_parser'))
5
+ require File.expand_path(File.join(File.dirname(__FILE__), '../lib/obo_parser'))
6
6
 
7
7
  class OboParserTest < Test::Unit::TestCase
8
8
  def test_truth
@@ -16,15 +16,12 @@ class Test_OboParserBuilder < Test::Unit::TestCase
16
16
  end
17
17
  end
18
18
 
19
-
20
19
  class Test_Regex < Test::Unit::TestCase
21
20
 
22
- def test_comment_stripping
23
- # hackish, likely will fail with complex combinations of "!"
24
- txt = "line without note\nBegin taxa; ! comment\n! not this line\n'this ok!'\n\"this too!!\""
25
- r2 = Regexp.new(/(\s*?![^!'"]*?\n)/i)
26
- assert_equal "line without note\nBegin taxa;\n\n'this ok!'\n\"this too!!\"" , txt.gsub(r2, "\n")
21
+ def test_some_regex
22
+ assert true
27
23
  end
24
+
28
25
  end
29
26
 
30
27
  class Test_Lexer < Test::Unit::TestCase
@@ -60,7 +57,8 @@ class Test_Lexer < Test::Unit::TestCase
60
57
 
61
58
  assert t = lexer.pop(OboParser::Tokens::TagValuePair)
62
59
  assert_equal 'def', t.tag
63
- assert_equal '"A chromatic scalar-circular quality inhering in an object that manifests in an observer by virtue of the dominant wavelength of the visible light; may be subject to fiat divisions, typically into 7 or 8 spectra." [PATOC:cjm]', t.value
60
+ assert_equal 'A chromatic scalar-circular quality inhering in an object that manifests in an observer by virtue of the dominant wavelength of the visible light; may be subject to fiat divisions, typically into 7 or 8 spectra.', t.value
61
+ assert_equal(['PATOC:cjm'], t.xrefs)
64
62
 
65
63
  assert t = lexer.pop(OboParser::Tokens::TagValuePair)
66
64
  assert_equal 'subset', t.tag
@@ -76,10 +74,27 @@ class Test_Lexer < Test::Unit::TestCase
76
74
  assert lexer.pop(OboParser::Tokens::Term)
77
75
  end
78
76
 
77
+ def test_xref_list
78
+ lexer = OboParser::Lexer.new("[foo:bar, stuff:things]")
79
+ assert t = lexer.pop(OboParser::Tokens::XrefList)
80
+ hsh = {'foo' => 'bar', 'stuff' => 'things'}
81
+ assert_equal hsh, t.value
82
+ end
83
+
79
84
  def test_tagvaluepair
80
85
  lexer = OboParser::Lexer.new("id: PATO:0000179")
81
86
  assert lexer.pop(OboParser::Tokens::TagValuePair)
82
87
  end
88
+
89
+ def test_tagvaluepair_with_comments_and_xrefs
90
+ lexer = OboParser::Lexer.new("def: \"The foo that is bar.\" [PATO:0000179] ! FOO! \n")
91
+ assert t = lexer.pop(OboParser::Tokens::TagValuePair)
92
+ assert_equal 'def', t.tag
93
+ assert_equal 'The foo that is bar.', t.value
94
+ assert_equal 'FOO!', t.comment
95
+ assert_equal(['PATO:0000179'], t.xrefs)
96
+ end
97
+
83
98
  end
84
99
 
85
100
  class Test_Parser < Test::Unit::TestCase
@@ -89,10 +104,13 @@ class Test_Parser < Test::Unit::TestCase
89
104
 
90
105
  def test_file_parsing
91
106
  foo = parse_obo_file(@of)
92
- assert_equal 'pato', foo.terms[0].name
93
- assert_equal 'quality', foo.terms[1].name
94
- assert_equal 'part_of', foo.typedefs.last.name
95
- assert_equal 'OBO_REL:part_of', foo.typedefs.last.id
107
+ assert_equal 'pato', foo.terms[0].name.value
108
+ assert_equal 'quality', foo.terms[1].name.value
109
+ assert_equal 'part_of', foo.typedefs.last.name.value
110
+ assert_equal 'OBO_REL:part_of', foo.typedefs.last.id.value
111
+ assert_equal(['PATOC:GVG'], foo.terms[1].def.xrefs)
112
+ assert_equal 'is_obsolete', foo.terms.first.tags_named('is_obsolete').first.tag
113
+ assert_equal 'true', foo.terms.first.tags_named('is_obsolete').first.value
96
114
  end
97
115
 
98
116
  def teardown
@@ -106,3 +124,9 @@ class Test_Parser < Test::Unit::TestCase
106
124
 
107
125
  end
108
126
 
127
+
128
+
129
+
130
+
131
+
132
+
metadata CHANGED
@@ -1,12 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: obo_parser
3
3
  version: !ruby/object:Gem::Version
4
- prerelease: false
4
+ hash: 19
5
+ prerelease:
5
6
  segments:
6
7
  - 0
7
- - 2
8
- - 1
9
- version: 0.2.1
8
+ - 3
9
+ - 0
10
+ version: 0.3.0
10
11
  platform: ruby
11
12
  authors:
12
13
  - mjy
@@ -14,7 +15,7 @@ autorequire:
14
15
  bindir: bin
15
16
  cert_chain: []
16
17
 
17
- date: 2011-02-28 00:00:00 -05:00
18
+ date: 2011-04-05 00:00:00 -04:00
18
19
  default_executable:
19
20
  dependencies: []
20
21
 
@@ -26,13 +27,11 @@ extensions: []
26
27
 
27
28
  extra_rdoc_files:
28
29
  - LICENSE
29
- - README
30
30
  - README.rdoc
31
31
  files:
32
32
  - .document
33
33
  - .gitignore
34
34
  - LICENSE
35
- - README
36
35
  - README.rdoc
37
36
  - Rakefile
38
37
  - VERSION
@@ -60,23 +59,27 @@ rdoc_options:
60
59
  require_paths:
61
60
  - lib
62
61
  required_ruby_version: !ruby/object:Gem::Requirement
62
+ none: false
63
63
  requirements:
64
64
  - - ">="
65
65
  - !ruby/object:Gem::Version
66
+ hash: 3
66
67
  segments:
67
68
  - 0
68
69
  version: "0"
69
70
  required_rubygems_version: !ruby/object:Gem::Requirement
71
+ none: false
70
72
  requirements:
71
73
  - - ">="
72
74
  - !ruby/object:Gem::Version
75
+ hash: 3
73
76
  segments:
74
77
  - 0
75
78
  version: "0"
76
79
  requirements: []
77
80
 
78
81
  rubyforge_project:
79
- rubygems_version: 1.3.6
82
+ rubygems_version: 1.5.3
80
83
  signing_key:
81
84
  specification_version: 3
82
85
  summary: A simple OBO file handler.
data/README DELETED
@@ -1,13 +0,0 @@
1
- NexusParser
2
- ===========
3
-
4
- Introduction goes here.
5
-
6
-
7
- Example
8
- =======
9
-
10
- Example goes here.
11
-
12
-
13
- Copyright (c) 2008 Matt Yoder, released under the MIT license