obo_parser 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
data/.gitignore ADDED
@@ -0,0 +1,21 @@
1
+ ## MAC OS
2
+ .DS_Store
3
+
4
+ ## TEXTMATE
5
+ *.tmproj
6
+ tmtags
7
+
8
+ ## EMACS
9
+ *~
10
+ \#*
11
+ .\#*
12
+
13
+ ## VIM
14
+ *.swp
15
+
16
+ ## PROJECT::GENERAL
17
+ coverage
18
+ rdoc
19
+ pkg
20
+
21
+ ## PROJECT::SPECIFIC
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 mjy
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README ADDED
@@ -0,0 +1,13 @@
1
+ NexusParser
2
+ ===========
3
+
4
+ Introduction goes here.
5
+
6
+
7
+ Example
8
+ =======
9
+
10
+ Example goes here.
11
+
12
+
13
+ Copyright (c) 2008 Matt Yoder, released under the MIT license
data/README.rdoc ADDED
@@ -0,0 +1,17 @@
1
+ = obo_parser
2
+
3
+ A simple OBO file format parsing library.
4
+
5
+ == Note on Patches/Pull Requests
6
+
7
+ * Fork the project.
8
+ * Make your feature addition or bug fix.
9
+ * Add tests for it. This is important so I don't break it in a
10
+ future version unintentionally.
11
+ * Commit, do not mess with rakefile, version, or history.
12
+ (if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
13
+ * Send me a pull request. Bonus points for topic branches.
14
+
15
+ == Copyright
16
+
17
+ Copyright (c) 2010 Matt Yoder. See LICENSE for details.
data/Rakefile ADDED
@@ -0,0 +1,53 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "obo_parser"
8
+ gem.summary = %Q{A simple OBO file handler.}
9
+ gem.description = %Q{Provides all-in-one object containing the contents of an OBO formatted file. OBO version 1.2 is targeted, though this should work for 1.0. }
10
+ gem.email = "diapriid@gmail.com"
11
+ gem.homepage = "http://github.com/mjy/obo_parser"
12
+ gem.authors = ["mjy"]
13
+ # gem.add_development_dependency "thoughtbot-shoulda", ">= 0"
14
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
15
+ end
16
+ Jeweler::GemcutterTasks.new
17
+ rescue LoadError
18
+ puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
19
+ end
20
+
21
+ require 'rake/testtask'
22
+ Rake::TestTask.new(:test) do |test|
23
+ test.libs << 'lib' << 'test'
24
+ test.pattern = 'test/**/test_*.rb'
25
+ test.verbose = true
26
+ end
27
+
28
+ begin
29
+ require 'rcov/rcovtask'
30
+ Rcov::RcovTask.new do |test|
31
+ test.libs << 'test'
32
+ test.pattern = 'test/**/test_*.rb'
33
+ test.verbose = true
34
+ end
35
+ rescue LoadError
36
+ task :rcov do
37
+ abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
38
+ end
39
+ end
40
+
41
+ task :test => :check_dependencies
42
+
43
+ task :default => :test
44
+
45
+ require 'rake/rdoctask'
46
+ Rake::RDocTask.new do |rdoc|
47
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
48
+
49
+ rdoc.rdoc_dir = 'rdoc'
50
+ rdoc.title = "obo_parser #{version}"
51
+ rdoc.rdoc_files.include('README*')
52
+ rdoc.rdoc_files.include('lib/**/*.rb')
53
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.0
data/init.rb ADDED
@@ -0,0 +1 @@
1
+ # Include hook code here
data/install.rb ADDED
@@ -0,0 +1 @@
1
+ # Install hook code here
data/lib/lexer.rb ADDED
@@ -0,0 +1,59 @@
1
+ class OboFile::Lexer
2
+ attr_reader :input
3
+ def initialize(input)
4
+ @input = input
5
+ @next_token = nil
6
+ end
7
+
8
+ # checks whether the next token is of the specified class.
9
+ def peek(token_class)
10
+ token = read_next_token(token_class)
11
+ return token.class == token_class
12
+ end
13
+
14
+ # return (and delete) the next token from the input stream, or raise an exception
15
+ # if the next token is not of the given class.
16
+ def pop(token_class)
17
+ token = read_next_token(token_class)
18
+ @next_token = nil
19
+ if token.class != token_class
20
+ raise(OboFile::ParseError,"expected #{token_class.to_s} but received #{token.class.to_s} at #{@input[0..10]}...", caller)
21
+ else
22
+ return token
23
+ end
24
+ end
25
+
26
+ private
27
+ # read (and store) the next token from the input, if it has not already been read.
28
+ def read_next_token(token_class)
29
+ if @next_token
30
+ return @next_token
31
+ else
32
+ # check for a match on the specified class first
33
+ if match(token_class)
34
+ return @next_token
35
+ else
36
+ # now check all the tokens for a match
37
+ OboFile::Tokens.obo_file_token_list.each {|t|
38
+ return @next_token if match(t)
39
+ }
40
+ end
41
+ # no match, either end of string or lex-error
42
+ if @input != ''
43
+ raise(OboFile::ParseError, "Lex Error, unknown token at #{@input[0..10]}...", caller)
44
+ else
45
+ return nil
46
+ end
47
+ end
48
+ end
49
+
50
+ def match(token_class)
51
+ if (m = token_class.regexp.match(@input))
52
+ @next_token = token_class.new(m[1])
53
+ @input = @input[m.end(0)..-1]
54
+ return true
55
+ else
56
+ return false
57
+ end
58
+ end
59
+ end
data/lib/obo_file.rb ADDED
@@ -0,0 +1,106 @@
1
+
2
+ # uses the PhyloTree parser/lexer engine by Krishna Dole which in turn was based on
3
+ # Thomas Mailund's <mailund@birc.dk> 'newick-1.0.5' Python library
4
+
5
+ # outstanding issues:
6
+
7
+ module OboFile
8
+
9
+ require File.expand_path(File.join(File.dirname(__FILE__), 'tokens'))
10
+ require File.expand_path(File.join(File.dirname(__FILE__), 'parser'))
11
+ require File.expand_path(File.join(File.dirname(__FILE__), 'lexer'))
12
+
13
+
14
+ class OboFile # Node
15
+ attr_accessor :terms, :typedefs
16
+
17
+ def initialize
18
+ @terms = []
19
+ @typedefs = []
20
+ end
21
+
22
+ def term_strings
23
+ @terms.collect{|t| t.name}.sort
24
+ end
25
+
26
+ def term_hash
27
+ @terms.inject({}) {|sum, t| sum.update(t.name => t.id)}
28
+ end
29
+
30
+
31
+ class Stanza
32
+ attr_accessor :name, :id, :tags
33
+ # we can have only one of id, name, and some others (but this is a loose setup now)
34
+ # can have many of some other things- put them in tags
35
+
36
+ def initialize(tags)
37
+ @tags = {}
38
+ tags.each do |t|
39
+ case t[0]
40
+ when 'id'
41
+ @id = t[1]
42
+ when 'name'
43
+ @name = t[1]
44
+ else
45
+ @tags[t[0]] = [] if !@tags[t[0]]
46
+ @tags[t[0]].push t[1]
47
+ end
48
+ end
49
+ end
50
+ end
51
+
52
+ class Term < Stanza
53
+ attr_accessor :def
54
+ def initialize(tags)
55
+ super
56
+ end
57
+ end
58
+
59
+ class Typedef < Stanza
60
+ def initialize(tags)
61
+ super
62
+ end
63
+ end
64
+
65
+ end
66
+
67
+
68
+ class OboFileBuilder
69
+ def initialize
70
+ @of = OboFile.new
71
+ end
72
+
73
+ def add_term(tags)
74
+ @of.terms.push OboFile::Term.new(tags)
75
+ end
76
+
77
+ def add_typedef(tags)
78
+ @of.typedefs.push OboFile::Typedef.new(tags)
79
+ end
80
+
81
+ def obo_file
82
+ @of
83
+ end
84
+
85
+ end
86
+
87
+ class ParseError < StandardError
88
+ end
89
+
90
+ end # end module
91
+
92
+ # the actual method
93
+ def parse_obo_file(input)
94
+ @input = input
95
+ raise(OboFile::ParseError, "Nothing passed to parse!") if !@input || @input.size == 0
96
+
97
+ @input.gsub!(/(\s*?![^!'"]*?\n)/i, "\n") # strip out comments - this is a kludge, likely needs fixing!!
98
+
99
+ builder = OboFile::OboFileBuilder.new
100
+ lexer = OboFile::Lexer.new(@input)
101
+ OboFile::Parser.new(lexer, builder).parse_file
102
+ return builder.obo_file
103
+ end
104
+
105
+
106
+
data/lib/parser.rb ADDED
@@ -0,0 +1,53 @@
1
+ class OboFile::Parser
2
+ def initialize(lexer, builder)
3
+ @lexer = lexer
4
+ @builder = builder
5
+ end
6
+
7
+ def parse_file
8
+ # toss everything right now, we just want the terms
9
+ while !@lexer.peek(OboFile::Tokens::Term)
10
+ @lexer.pop(OboFile::Tokens::TagValuePair)
11
+ end
12
+
13
+ i = 0
14
+ while !@lexer.peek(OboFile::Tokens::Typedef) && !@lexer.peek(OboFile::Tokens::EndOfFile)
15
+ raise OboFile::ParseError, "infinite loop in Terms" if i > 10000000
16
+ parse_term
17
+ i += 1
18
+ end
19
+
20
+ i = 0
21
+ while @lexer.peek(OboFile::Tokens::Typedef)
22
+ raise OboFile::ParseError,"infinite loop in Terms" if i > 1000000 # there aren't that many words!
23
+ parse_typedef
24
+ i += 1
25
+ end
26
+ end
27
+
28
+ def parse_term
29
+ t = @lexer.pop(OboFile::Tokens::Term)
30
+ tags = []
31
+ while !@lexer.peek(OboFile::Tokens::Term) && !@lexer.peek(OboFile::Tokens::Typedef) && !@lexer.peek(OboFile::Tokens::EndOfFile)
32
+ if @lexer.peek(OboFile::Tokens::TagValuePair)
33
+ t = @lexer.pop(OboFile::Tokens::TagValuePair)
34
+ tags.push [t.tag, t.value]
35
+ else
36
+ raise(OboFile::ParseError, "Expected a tag-value pair, but did not get one following this tag/value: [#{t.tag} / #{t.value}]")
37
+ end
38
+ end
39
+ @builder.add_term(tags)
40
+ end
41
+
42
+ def parse_typedef
43
+ @lexer.pop(OboFile::Tokens::Typedef)
44
+ # @t = @builder.stub_typdef
45
+ tags = []
46
+ while !@lexer.peek(OboFile::Tokens::Typedef) && @lexer.peek(OboFile::Tokens::TagValuePair)
47
+ t = @lexer.pop(OboFile::Tokens::TagValuePair)
48
+ tags.push [t.tag, t.value]
49
+ end
50
+ @builder.add_typedef(tags)
51
+ end
52
+
53
+ end
data/lib/tokens.rb ADDED
@@ -0,0 +1,160 @@
1
+ module OboFile::Tokens
2
+
3
+ class Token
4
+ # this allows access the the class attribute regexp, without using a class variable
5
+ class << self; attr_reader :regexp; end
6
+ attr_reader :value
7
+ def initialize(str)
8
+ @value = str
9
+ end
10
+ end
11
+
12
+ # in ruby, \A is needed if you want to only match at the beginning of the string, we need this everywhere, as we're
13
+ # moving along popping off
14
+
15
+ class Term < Token
16
+ @regexp = Regexp.new(/\A\s*(\[term\])\s*/i)
17
+ end
18
+
19
+ class Typedef < Token
20
+ @regexp = Regexp.new(/\A\s*(\[typedef\])\s*/i)
21
+ end
22
+
23
+
24
+ class TagValuePair < Token
25
+ attr_reader :tag, :value
26
+ @regexp = Regexp.new(/\A\s*([^:]+:.+)\s*\n*/i) # returns key => value hash for tokens like 'foo=bar' or foo = 'b a ar'
27
+ def initialize(str)
28
+ str.strip!
29
+ str = str.split(':',2)
30
+
31
+ str[1].strip!
32
+ # strip trailing comments
33
+
34
+ @tag = str[0]
35
+ @value = str[1]
36
+ end
37
+ end
38
+
39
+ class NameValuePair < Token
40
+ @regexp = Regexp.new('fail')
41
+ end
42
+
43
+ class Dbxref < Token
44
+ @regexp = Regexp.new('fail')
45
+ end
46
+
47
+ # same as ID
48
+ class Label < Token
49
+ @regexp = Regexp.new('\A\s*((\'+[^\']+\'+)|(\"+[^\"]+\"+)|(\w[^,:(); \t\n]*|_)+)\s*') # matches "foo and stuff", foo, 'stuff or foo', '''foo''', """bar""" BUT NOT ""foo" "
50
+ def initialize(str)
51
+ str.strip!
52
+ str = str[1..-2] if str[0..0] == "'" # get rid of quote marks
53
+ str = str[1..-2] if str[0..0] == '"'
54
+ str.strip!
55
+ @value = str
56
+ end
57
+ end
58
+
59
+
60
+
61
+ # note we grab EOL and ; here
62
+ class ValuePair < Token
63
+ @regexp = Regexp.new(/\A\s*([\w\d\_\&]+\s*=\s*((\'[^\']+\')|(\(.*\))|(\"[^\"]+\")|([^\s\n\t;]+)))[\s\n\t;]+/i) # returns key => value hash for tokens like 'foo=bar' or foo = 'b a ar'
64
+ def initialize(str)
65
+ str.strip!
66
+ str = str.split(/=/)
67
+ str[1].strip!
68
+ str[1] = str[1][1..-2] if str[1][0..0] == "'"
69
+ str[1] = str[1][1..-2] if str[1][0..0] == "\""
70
+ @value = {str[0].strip.downcase.to_sym => str[1].strip}
71
+ end
72
+ end
73
+
74
+ class Matrix < Token
75
+ @regexp = Regexp.new(/\A\s*(matrix)\s*/i)
76
+ end
77
+
78
+ class RowVec < Token
79
+ @regexp = Regexp.new(/\A\s*(.+)\s*\n/i)
80
+ def initialize(str)
81
+ s = str.split(/\(|\)/).collect{|s| s=~ /[\,|\s]/ ? s.split(/[\,|\s]/) : s}.inject([]){|sum, x| x.class == Array ? sum << x.delete_if {|y| y == "" } : sum + x.strip.split(//)}
82
+ @value = s
83
+ end
84
+ end
85
+
86
+
87
+
88
+ ## punctuation
89
+
90
+ class LBracket < Token
91
+ @regexp = Regexp.new('\A\s*(\[)\s*')
92
+ end
93
+
94
+ class RBracket < Token
95
+ @regexp = Regexp.new('\A\s*(\])\s*')
96
+ end
97
+
98
+ class LParen < Token
99
+ @regexp = Regexp.new('\A\s*(\()\s*')
100
+ end
101
+
102
+ class RParen < Token
103
+ @regexp = Regexp.new('\A\s*(\))\s*')
104
+ end
105
+
106
+ class Equals < Token
107
+ @regexp = Regexp.new('\A\s*(=)\s*')
108
+ end
109
+
110
+ class BckSlash < Token
111
+ @regexp = Regexp.new('\A\s*(\/)\s*')
112
+ end
113
+
114
+
115
+ class Colon < Token
116
+ @regexp = Regexp.new('\A\s*(:)\s*')
117
+ end
118
+
119
+ class SemiColon < Token
120
+ @regexp = Regexp.new('\A\s*(;)\s*')
121
+ end
122
+
123
+ class Comma < Token
124
+ @regexp = Regexp.new('\A\s*(\,)\s*')
125
+ end
126
+
127
+ class EndOfFile < Token
128
+ @regexp = Regexp.new('\A(\s*\n*)\Z')
129
+ end
130
+
131
+ class Number < Token
132
+ @regexp = Regexp.new('\A\s*(-?\d+(\.\d+)?([eE][+-]?\d+)?)\s*')
133
+ def initialize(str)
134
+ # a little oddness here, in some case we don't want to include the .0
135
+ # see issues with numbers as labels
136
+ if str =~ /\./
137
+ @value = str.to_f
138
+ else
139
+ @value = str.to_i
140
+ end
141
+
142
+ end
143
+ end
144
+
145
+ # Tokens::NexusComment
146
+
147
+ # this list also defines priority, i.e. if tokens have overlap (which they shouldn't!!) then the earlier indexed token will match first
148
+ def self.obo_file_token_list
149
+ [
150
+ OboFile::Tokens::Term,
151
+ OboFile::Tokens::Typedef,
152
+ OboFile::Tokens::TagValuePair,
153
+ OboFile::Tokens::NameValuePair, # not implemented
154
+ OboFile::Tokens::Dbxref, # not implemented
155
+ OboFile::Tokens::LBracket,
156
+ OboFile::Tokens::EndOfFile
157
+ ]
158
+ end
159
+
160
+ end
@@ -0,0 +1,61 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{obo_parser}
8
+ s.version = "0.1.0"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["mjy"]
12
+ s.date = %q{2010-03-17}
13
+ s.description = %q{Provides all-in-one object containing the contents of an OBO formatted file. OBO version 1.2 is targeted, though this should work for 1.0. }
14
+ s.email = %q{diapriid@gmail.com}
15
+ s.extra_rdoc_files = [
16
+ "LICENSE",
17
+ "README",
18
+ "README.rdoc"
19
+ ]
20
+ s.files = [
21
+ ".document",
22
+ ".gitignore",
23
+ "LICENSE",
24
+ "README",
25
+ "README.rdoc",
26
+ "Rakefile",
27
+ "VERSION",
28
+ "init.rb",
29
+ "install.rb",
30
+ "lib/lexer.rb",
31
+ "lib/obo_file.rb",
32
+ "lib/parser.rb",
33
+ "lib/tokens.rb",
34
+ "obo_parser.gemspec",
35
+ "tasks/obo_parser_tasks.rake",
36
+ "test/cell.obo",
37
+ "test/obo_1.0_test.txt",
38
+ "test/obo_1.0_test_wo_typedefs.txt",
39
+ "test/test_obo_parser.rb",
40
+ "uninstall.rb"
41
+ ]
42
+ s.homepage = %q{http://github.com/mjy/obo_parser}
43
+ s.rdoc_options = ["--charset=UTF-8"]
44
+ s.require_paths = ["lib"]
45
+ s.rubygems_version = %q{1.3.6}
46
+ s.summary = %q{A simple OBO file handler.}
47
+ s.test_files = [
48
+ "test/test_obo_parser.rb"
49
+ ]
50
+
51
+ if s.respond_to? :specification_version then
52
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
53
+ s.specification_version = 3
54
+
55
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
56
+ else
57
+ end
58
+ else
59
+ end
60
+ end
61
+
@@ -0,0 +1,4 @@
1
+ # desc "Explaining what the task does"
2
+ # task :foo_task do
3
+ # # Task goes here
4
+ # end