obo_parser 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
data/.gitignore ADDED
@@ -0,0 +1,21 @@
1
+ ## MAC OS
2
+ .DS_Store
3
+
4
+ ## TEXTMATE
5
+ *.tmproj
6
+ tmtags
7
+
8
+ ## EMACS
9
+ *~
10
+ \#*
11
+ .\#*
12
+
13
+ ## VIM
14
+ *.swp
15
+
16
+ ## PROJECT::GENERAL
17
+ coverage
18
+ rdoc
19
+ pkg
20
+
21
+ ## PROJECT::SPECIFIC
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 mjy
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README ADDED
@@ -0,0 +1,13 @@
1
+ NexusParser
2
+ ===========
3
+
4
+ Introduction goes here.
5
+
6
+
7
+ Example
8
+ =======
9
+
10
+ Example goes here.
11
+
12
+
13
+ Copyright (c) 2008 Matt Yoder, released under the MIT license
data/README.rdoc ADDED
@@ -0,0 +1,17 @@
1
+ = obo_parser
2
+
3
+ A simple OBO file format parsing library.
4
+
5
+ == Note on Patches/Pull Requests
6
+
7
+ * Fork the project.
8
+ * Make your feature addition or bug fix.
9
+ * Add tests for it. This is important so I don't break it in a
10
+ future version unintentionally.
11
+ * Commit, do not mess with rakefile, version, or history.
12
+ (if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
13
+ * Send me a pull request. Bonus points for topic branches.
14
+
15
+ == Copyright
16
+
17
+ Copyright (c) 2010 Matt Yoder. See LICENSE for details.
data/Rakefile ADDED
@@ -0,0 +1,53 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "obo_parser"
8
+ gem.summary = %Q{A simple OBO file handler.}
9
+ gem.description = %Q{Provides all-in-one object containing the contents of an OBO formatted file. OBO version 1.2 is targeted, though this should work for 1.0. }
10
+ gem.email = "diapriid@gmail.com"
11
+ gem.homepage = "http://github.com/mjy/obo_parser"
12
+ gem.authors = ["mjy"]
13
+ # gem.add_development_dependency "thoughtbot-shoulda", ">= 0"
14
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
15
+ end
16
+ Jeweler::GemcutterTasks.new
17
+ rescue LoadError
18
+ puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
19
+ end
20
+
21
+ require 'rake/testtask'
22
+ Rake::TestTask.new(:test) do |test|
23
+ test.libs << 'lib' << 'test'
24
+ test.pattern = 'test/**/test_*.rb'
25
+ test.verbose = true
26
+ end
27
+
28
+ begin
29
+ require 'rcov/rcovtask'
30
+ Rcov::RcovTask.new do |test|
31
+ test.libs << 'test'
32
+ test.pattern = 'test/**/test_*.rb'
33
+ test.verbose = true
34
+ end
35
+ rescue LoadError
36
+ task :rcov do
37
+ abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
38
+ end
39
+ end
40
+
41
+ task :test => :check_dependencies
42
+
43
+ task :default => :test
44
+
45
+ require 'rake/rdoctask'
46
+ Rake::RDocTask.new do |rdoc|
47
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
48
+
49
+ rdoc.rdoc_dir = 'rdoc'
50
+ rdoc.title = "obo_parser #{version}"
51
+ rdoc.rdoc_files.include('README*')
52
+ rdoc.rdoc_files.include('lib/**/*.rb')
53
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.0
data/init.rb ADDED
@@ -0,0 +1 @@
1
+ # Include hook code here
data/install.rb ADDED
@@ -0,0 +1 @@
1
+ # Install hook code here
data/lib/lexer.rb ADDED
@@ -0,0 +1,59 @@
1
+ class OboFile::Lexer
2
+ attr_reader :input
3
+ def initialize(input)
4
+ @input = input
5
+ @next_token = nil
6
+ end
7
+
8
+ # checks whether the next token is of the specified class.
9
+ def peek(token_class)
10
+ token = read_next_token(token_class)
11
+ return token.class == token_class
12
+ end
13
+
14
+ # return (and delete) the next token from the input stream, or raise an exception
15
+ # if the next token is not of the given class.
16
+ def pop(token_class)
17
+ token = read_next_token(token_class)
18
+ @next_token = nil
19
+ if token.class != token_class
20
+ raise(OboFile::ParseError,"expected #{token_class.to_s} but received #{token.class.to_s} at #{@input[0..10]}...", caller)
21
+ else
22
+ return token
23
+ end
24
+ end
25
+
26
+ private
27
+ # read (and store) the next token from the input, if it has not already been read.
28
+ def read_next_token(token_class)
29
+ if @next_token
30
+ return @next_token
31
+ else
32
+ # check for a match on the specified class first
33
+ if match(token_class)
34
+ return @next_token
35
+ else
36
+ # now check all the tokens for a match
37
+ OboFile::Tokens.obo_file_token_list.each {|t|
38
+ return @next_token if match(t)
39
+ }
40
+ end
41
+ # no match, either end of string or lex-error
42
+ if @input != ''
43
+ raise(OboFile::ParseError, "Lex Error, unknown token at #{@input[0..10]}...", caller)
44
+ else
45
+ return nil
46
+ end
47
+ end
48
+ end
49
+
50
+ def match(token_class)
51
+ if (m = token_class.regexp.match(@input))
52
+ @next_token = token_class.new(m[1])
53
+ @input = @input[m.end(0)..-1]
54
+ return true
55
+ else
56
+ return false
57
+ end
58
+ end
59
+ end
data/lib/obo_file.rb ADDED
@@ -0,0 +1,106 @@
1
+
2
+ # uses the PhyloTree parser/lexer engine by Krishna Dole which in turn was based on
3
+ # Thomas Mailund's <mailund@birc.dk> 'newick-1.0.5' Python library
4
+
5
+ # outstanding issues:
6
+
7
+ module OboFile
8
+
9
+ require File.expand_path(File.join(File.dirname(__FILE__), 'tokens'))
10
+ require File.expand_path(File.join(File.dirname(__FILE__), 'parser'))
11
+ require File.expand_path(File.join(File.dirname(__FILE__), 'lexer'))
12
+
13
+
14
+ class OboFile # Node
15
+ attr_accessor :terms, :typedefs
16
+
17
+ def initialize
18
+ @terms = []
19
+ @typedefs = []
20
+ end
21
+
22
+ def term_strings
23
+ @terms.collect{|t| t.name}.sort
24
+ end
25
+
26
+ def term_hash
27
+ @terms.inject({}) {|sum, t| sum.update(t.name => t.id)}
28
+ end
29
+
30
+
31
+ class Stanza
32
+ attr_accessor :name, :id, :tags
33
+ # we can have only one of id, name, and some others (but this is a loose setup now)
34
+ # can have many of some other things- put them in tags
35
+
36
+ def initialize(tags)
37
+ @tags = {}
38
+ tags.each do |t|
39
+ case t[0]
40
+ when 'id'
41
+ @id = t[1]
42
+ when 'name'
43
+ @name = t[1]
44
+ else
45
+ @tags[t[0]] = [] if !@tags[t[0]]
46
+ @tags[t[0]].push t[1]
47
+ end
48
+ end
49
+ end
50
+ end
51
+
52
+ class Term < Stanza
53
+ attr_accessor :def
54
+ def initialize(tags)
55
+ super
56
+ end
57
+ end
58
+
59
+ class Typedef < Stanza
60
+ def initialize(tags)
61
+ super
62
+ end
63
+ end
64
+
65
+ end
66
+
67
+
68
+ class OboFileBuilder
69
+ def initialize
70
+ @of = OboFile.new
71
+ end
72
+
73
+ def add_term(tags)
74
+ @of.terms.push OboFile::Term.new(tags)
75
+ end
76
+
77
+ def add_typedef(tags)
78
+ @of.typedefs.push OboFile::Typedef.new(tags)
79
+ end
80
+
81
+ def obo_file
82
+ @of
83
+ end
84
+
85
+ end
86
+
87
+ class ParseError < StandardError
88
+ end
89
+
90
+ end # end module
91
+
92
+ # the actual method
93
+ def parse_obo_file(input)
94
+ @input = input
95
+ raise(OboFile::ParseError, "Nothing passed to parse!") if !@input || @input.size == 0
96
+
97
+ @input.gsub!(/(\s*?![^!'"]*?\n)/i, "\n") # strip out comments - this is a kludge, likely needs fixing!!
98
+
99
+ builder = OboFile::OboFileBuilder.new
100
+ lexer = OboFile::Lexer.new(@input)
101
+ OboFile::Parser.new(lexer, builder).parse_file
102
+ return builder.obo_file
103
+ end
104
+
105
+
106
+
data/lib/parser.rb ADDED
@@ -0,0 +1,53 @@
1
+ class OboFile::Parser
2
+ def initialize(lexer, builder)
3
+ @lexer = lexer
4
+ @builder = builder
5
+ end
6
+
7
+ def parse_file
8
+ # toss everything right now, we just want the terms
9
+ while !@lexer.peek(OboFile::Tokens::Term)
10
+ @lexer.pop(OboFile::Tokens::TagValuePair)
11
+ end
12
+
13
+ i = 0
14
+ while !@lexer.peek(OboFile::Tokens::Typedef) && !@lexer.peek(OboFile::Tokens::EndOfFile)
15
+ raise OboFile::ParseError, "infinite loop in Terms" if i > 10000000
16
+ parse_term
17
+ i += 1
18
+ end
19
+
20
+ i = 0
21
+ while @lexer.peek(OboFile::Tokens::Typedef)
22
+ raise OboFile::ParseError,"infinite loop in Terms" if i > 1000000 # there aren't that many words!
23
+ parse_typedef
24
+ i += 1
25
+ end
26
+ end
27
+
28
+ def parse_term
29
+ t = @lexer.pop(OboFile::Tokens::Term)
30
+ tags = []
31
+ while !@lexer.peek(OboFile::Tokens::Term) && !@lexer.peek(OboFile::Tokens::Typedef) && !@lexer.peek(OboFile::Tokens::EndOfFile)
32
+ if @lexer.peek(OboFile::Tokens::TagValuePair)
33
+ t = @lexer.pop(OboFile::Tokens::TagValuePair)
34
+ tags.push [t.tag, t.value]
35
+ else
36
+ raise(OboFile::ParseError, "Expected a tag-value pair, but did not get one following this tag/value: [#{t.tag} / #{t.value}]")
37
+ end
38
+ end
39
+ @builder.add_term(tags)
40
+ end
41
+
42
+ def parse_typedef
43
+ @lexer.pop(OboFile::Tokens::Typedef)
44
+ # @t = @builder.stub_typdef
45
+ tags = []
46
+ while !@lexer.peek(OboFile::Tokens::Typedef) && @lexer.peek(OboFile::Tokens::TagValuePair)
47
+ t = @lexer.pop(OboFile::Tokens::TagValuePair)
48
+ tags.push [t.tag, t.value]
49
+ end
50
+ @builder.add_typedef(tags)
51
+ end
52
+
53
+ end
data/lib/tokens.rb ADDED
@@ -0,0 +1,160 @@
1
+ module OboFile::Tokens
2
+
3
+ class Token
4
+ # this allows access the the class attribute regexp, without using a class variable
5
+ class << self; attr_reader :regexp; end
6
+ attr_reader :value
7
+ def initialize(str)
8
+ @value = str
9
+ end
10
+ end
11
+
12
+ # in ruby, \A is needed if you want to only match at the beginning of the string, we need this everywhere, as we're
13
+ # moving along popping off
14
+
15
+ class Term < Token
16
+ @regexp = Regexp.new(/\A\s*(\[term\])\s*/i)
17
+ end
18
+
19
+ class Typedef < Token
20
+ @regexp = Regexp.new(/\A\s*(\[typedef\])\s*/i)
21
+ end
22
+
23
+
24
+ class TagValuePair < Token
25
+ attr_reader :tag, :value
26
+ @regexp = Regexp.new(/\A\s*([^:]+:.+)\s*\n*/i) # returns key => value hash for tokens like 'foo=bar' or foo = 'b a ar'
27
+ def initialize(str)
28
+ str.strip!
29
+ str = str.split(':',2)
30
+
31
+ str[1].strip!
32
+ # strip trailing comments
33
+
34
+ @tag = str[0]
35
+ @value = str[1]
36
+ end
37
+ end
38
+
39
+ class NameValuePair < Token
40
+ @regexp = Regexp.new('fail')
41
+ end
42
+
43
+ class Dbxref < Token
44
+ @regexp = Regexp.new('fail')
45
+ end
46
+
47
+ # same as ID
48
+ class Label < Token
49
+ @regexp = Regexp.new('\A\s*((\'+[^\']+\'+)|(\"+[^\"]+\"+)|(\w[^,:(); \t\n]*|_)+)\s*') # matches "foo and stuff", foo, 'stuff or foo', '''foo''', """bar""" BUT NOT ""foo" "
50
+ def initialize(str)
51
+ str.strip!
52
+ str = str[1..-2] if str[0..0] == "'" # get rid of quote marks
53
+ str = str[1..-2] if str[0..0] == '"'
54
+ str.strip!
55
+ @value = str
56
+ end
57
+ end
58
+
59
+
60
+
61
+ # note we grab EOL and ; here
62
+ class ValuePair < Token
63
+ @regexp = Regexp.new(/\A\s*([\w\d\_\&]+\s*=\s*((\'[^\']+\')|(\(.*\))|(\"[^\"]+\")|([^\s\n\t;]+)))[\s\n\t;]+/i) # returns key => value hash for tokens like 'foo=bar' or foo = 'b a ar'
64
+ def initialize(str)
65
+ str.strip!
66
+ str = str.split(/=/)
67
+ str[1].strip!
68
+ str[1] = str[1][1..-2] if str[1][0..0] == "'"
69
+ str[1] = str[1][1..-2] if str[1][0..0] == "\""
70
+ @value = {str[0].strip.downcase.to_sym => str[1].strip}
71
+ end
72
+ end
73
+
74
+ class Matrix < Token
75
+ @regexp = Regexp.new(/\A\s*(matrix)\s*/i)
76
+ end
77
+
78
+ class RowVec < Token
79
+ @regexp = Regexp.new(/\A\s*(.+)\s*\n/i)
80
+ def initialize(str)
81
+ s = str.split(/\(|\)/).collect{|s| s=~ /[\,|\s]/ ? s.split(/[\,|\s]/) : s}.inject([]){|sum, x| x.class == Array ? sum << x.delete_if {|y| y == "" } : sum + x.strip.split(//)}
82
+ @value = s
83
+ end
84
+ end
85
+
86
+
87
+
88
+ ## punctuation
89
+
90
+ class LBracket < Token
91
+ @regexp = Regexp.new('\A\s*(\[)\s*')
92
+ end
93
+
94
+ class RBracket < Token
95
+ @regexp = Regexp.new('\A\s*(\])\s*')
96
+ end
97
+
98
+ class LParen < Token
99
+ @regexp = Regexp.new('\A\s*(\()\s*')
100
+ end
101
+
102
+ class RParen < Token
103
+ @regexp = Regexp.new('\A\s*(\))\s*')
104
+ end
105
+
106
+ class Equals < Token
107
+ @regexp = Regexp.new('\A\s*(=)\s*')
108
+ end
109
+
110
+ class BckSlash < Token
111
+ @regexp = Regexp.new('\A\s*(\/)\s*')
112
+ end
113
+
114
+
115
+ class Colon < Token
116
+ @regexp = Regexp.new('\A\s*(:)\s*')
117
+ end
118
+
119
+ class SemiColon < Token
120
+ @regexp = Regexp.new('\A\s*(;)\s*')
121
+ end
122
+
123
+ class Comma < Token
124
+ @regexp = Regexp.new('\A\s*(\,)\s*')
125
+ end
126
+
127
+ class EndOfFile < Token
128
+ @regexp = Regexp.new('\A(\s*\n*)\Z')
129
+ end
130
+
131
+ class Number < Token
132
+ @regexp = Regexp.new('\A\s*(-?\d+(\.\d+)?([eE][+-]?\d+)?)\s*')
133
+ def initialize(str)
134
+ # a little oddness here, in some case we don't want to include the .0
135
+ # see issues with numbers as labels
136
+ if str =~ /\./
137
+ @value = str.to_f
138
+ else
139
+ @value = str.to_i
140
+ end
141
+
142
+ end
143
+ end
144
+
145
+ # Tokens::NexusComment
146
+
147
+ # this list also defines priority, i.e. if tokens have overlap (which they shouldn't!!) then the earlier indexed token will match first
148
+ def self.obo_file_token_list
149
+ [
150
+ OboFile::Tokens::Term,
151
+ OboFile::Tokens::Typedef,
152
+ OboFile::Tokens::TagValuePair,
153
+ OboFile::Tokens::NameValuePair, # not implemented
154
+ OboFile::Tokens::Dbxref, # not implemented
155
+ OboFile::Tokens::LBracket,
156
+ OboFile::Tokens::EndOfFile
157
+ ]
158
+ end
159
+
160
+ end
@@ -0,0 +1,61 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{obo_parser}
8
+ s.version = "0.1.0"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["mjy"]
12
+ s.date = %q{2010-03-17}
13
+ s.description = %q{Provides all-in-one object containing the contents of an OBO formatted file. OBO version 1.2 is targeted, though this should work for 1.0. }
14
+ s.email = %q{diapriid@gmail.com}
15
+ s.extra_rdoc_files = [
16
+ "LICENSE",
17
+ "README",
18
+ "README.rdoc"
19
+ ]
20
+ s.files = [
21
+ ".document",
22
+ ".gitignore",
23
+ "LICENSE",
24
+ "README",
25
+ "README.rdoc",
26
+ "Rakefile",
27
+ "VERSION",
28
+ "init.rb",
29
+ "install.rb",
30
+ "lib/lexer.rb",
31
+ "lib/obo_file.rb",
32
+ "lib/parser.rb",
33
+ "lib/tokens.rb",
34
+ "obo_parser.gemspec",
35
+ "tasks/obo_parser_tasks.rake",
36
+ "test/cell.obo",
37
+ "test/obo_1.0_test.txt",
38
+ "test/obo_1.0_test_wo_typedefs.txt",
39
+ "test/test_obo_parser.rb",
40
+ "uninstall.rb"
41
+ ]
42
+ s.homepage = %q{http://github.com/mjy/obo_parser}
43
+ s.rdoc_options = ["--charset=UTF-8"]
44
+ s.require_paths = ["lib"]
45
+ s.rubygems_version = %q{1.3.6}
46
+ s.summary = %q{A simple OBO file handler.}
47
+ s.test_files = [
48
+ "test/test_obo_parser.rb"
49
+ ]
50
+
51
+ if s.respond_to? :specification_version then
52
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
53
+ s.specification_version = 3
54
+
55
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
56
+ else
57
+ end
58
+ else
59
+ end
60
+ end
61
+
@@ -0,0 +1,4 @@
1
+ # desc "Explaining what the task does"
2
+ # task :foo_task do
3
+ # # Task goes here
4
+ # end