taxonifi 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. data/.document +5 -0
  2. data/Gemfile +18 -0
  3. data/Gemfile.lock +30 -0
  4. data/LICENSE.txt +20 -0
  5. data/README.rdoc +155 -0
  6. data/Rakefile +53 -0
  7. data/VERSION +1 -0
  8. data/lib/assessor/assessor.rb +31 -0
  9. data/lib/assessor/base.rb +17 -0
  10. data/lib/assessor/row_assessor.rb +131 -0
  11. data/lib/export/export.rb +9 -0
  12. data/lib/export/format/base.rb +43 -0
  13. data/lib/export/format/species_file.rb +341 -0
  14. data/lib/lumper/lumper.rb +334 -0
  15. data/lib/lumper/lumps/parent_child_name_collection.rb +84 -0
  16. data/lib/models/author_year.rb +39 -0
  17. data/lib/models/base.rb +73 -0
  18. data/lib/models/collection.rb +92 -0
  19. data/lib/models/generic_object.rb +15 -0
  20. data/lib/models/geog.rb +59 -0
  21. data/lib/models/geog_collection.rb +28 -0
  22. data/lib/models/name.rb +206 -0
  23. data/lib/models/name_collection.rb +149 -0
  24. data/lib/models/person.rb +49 -0
  25. data/lib/models/ref.rb +85 -0
  26. data/lib/models/ref_collection.rb +106 -0
  27. data/lib/models/species_name.rb +85 -0
  28. data/lib/splitter/builder.rb +26 -0
  29. data/lib/splitter/lexer.rb +70 -0
  30. data/lib/splitter/parser.rb +54 -0
  31. data/lib/splitter/splitter.rb +45 -0
  32. data/lib/splitter/tokens.rb +322 -0
  33. data/lib/taxonifi.rb +36 -0
  34. data/test/file_fixtures/Lygaeoidea.csv +801 -0
  35. data/test/helper.rb +38 -0
  36. data/test/test_exporter.rb +32 -0
  37. data/test/test_lumper_geogs.rb +59 -0
  38. data/test/test_lumper_hierarchical_collection.rb +88 -0
  39. data/test/test_lumper_names.rb +119 -0
  40. data/test/test_lumper_parent_child_name_collection.rb +41 -0
  41. data/test/test_lumper_refs.rb +91 -0
  42. data/test/test_parser.rb +34 -0
  43. data/test/test_splitter.rb +27 -0
  44. data/test/test_splitter_tokens.rb +403 -0
  45. data/test/test_taxonifi.rb +11 -0
  46. data/test/test_taxonifi_accessor.rb +61 -0
  47. data/test/test_taxonifi_geog.rb +51 -0
  48. data/test/test_taxonifi_name.rb +186 -0
  49. data/test/test_taxonifi_name_collection.rb +158 -0
  50. data/test/test_taxonifi_ref.rb +90 -0
  51. data/test/test_taxonifi_ref_collection.rb +69 -0
  52. data/test/test_taxonifi_species_name.rb +95 -0
  53. metadata +167 -0
@@ -0,0 +1,149 @@
1
+ module Taxonifi
2
+ class NameCollectionError < StandardError; end
3
+ module Model
4
+
5
+ # A collection of taxonomic names.
6
+ class NameCollection < Taxonifi::Model::Collection
7
+
8
+ attr_accessor :by_name_index
9
+ attr_accessor :ref_collection
10
+
11
+ def initialize(options = {})
12
+ super
13
+ @collection = []
14
+ @by_name_index = {} # "foo => [1,2,3]"
15
+ Taxonifi::RANKS.inject(@by_name_index){|hsh, v| hsh.merge!(v => {})}
16
+ @by_name_index['unknown'] = {} # unranked names get dumped in here
17
+ @ref_collection = nil
18
+ true
19
+ end
20
+
21
+ def object_class
22
+ Taxonifi::Model::Name
23
+ end
24
+
25
+ # Return the highest RANK for which there is no
26
+ # name in this collection.
27
+ def encompassing_rank
28
+ highest = RANKS.size
29
+ @collection.each do |n|
30
+ h = RANKS.index(n.rank)
31
+ highest = h if h < highest
32
+ end
33
+ RANKS[highest - 1]
34
+ end
35
+
36
+ # The names objects in the collection at a rank.
37
+ # TODO: Should index this on add_object
38
+ def names_at_rank(rank)
39
+ raise if !RANKS.include?(rank)
40
+ names = []
41
+ @collection.each do |n|
42
+ names << n if n.rank == rank
43
+ end
44
+ names
45
+ end
46
+
47
+ # Returns id of matching existing name
48
+ # or false if there i s no match.
49
+ # Matches against name (string) and parents ("identity")
50
+ def name_exists?(name = Taxonifi::Model::Name)
51
+ # Does the name (string) exist?
52
+ rank = name.rank.downcase
53
+ rank ||= 'unknown'
54
+ if by_name_index[rank][name.name]
55
+ # Yes, check to see if parents match
56
+ by_name_index[rank][name.name].each do |id|
57
+ vector = parent_id_vector(id)
58
+ vector.pop
59
+ if vector == parent_id_vector(name.parent.id)
60
+ exists = true
61
+ return id
62
+ end
63
+ end
64
+ end
65
+ false
66
+ end
67
+
68
+ # Add an individaul name object, indexing it.
69
+ def add_object(obj)
70
+ super
71
+ index_by_name(obj)
72
+ obj
73
+ end
74
+
75
+ # Add an individaul name object, without indexing it.
76
+ def add_object_pre_indexed(obj)
77
+ super
78
+ index_by_name(obj)
79
+ obj
80
+ end
81
+
82
+ # Add a Taxonifi::Model::SpeciesName object
83
+ # as individual objects.
84
+ def add_species_name(sn)
85
+ raise "Failed trying to load [#{sn.display_name}]. SpeciesName#genus#parent must be set before using add_species_name." if sn.genus.parent.nil?
86
+ current_parent_id = sn.genus.parent.id
87
+ sn.names.each do |o|
88
+ o.parent = object_by_id(current_parent_id)
89
+ if id = name_exists?(o)
90
+ cp_id = id
91
+ else
92
+ add_object(o)
93
+ cp_id = o.id
94
+ end
95
+ current_parent_id = cp_id
96
+ end
97
+ current_parent_id # return the id of the last name created
98
+ end
99
+
100
+ # As #add_species_name but do
101
+ # not assign ids to the incoming names
102
+ # TODO: deprecate?
103
+ def add_species_name_unindexed(sn)
104
+ sn.names.each do |o|
105
+ if !name_exists?(o)
106
+ add_object(o)
107
+ end
108
+ end
109
+ end
110
+
111
+ # Take the author/years of these names and generate a reference collection.
112
+ # Start the ids assigned to the references with initial_id.
113
+ def generate_ref_collection(initial_id = 0)
114
+ rc = Taxonifi::Model::RefCollection.new(:initial_id => initial_id)
115
+ if collection.size > 0
116
+ uniques = collection.inject({}){|hsh, n| hsh.merge!(n.author_year_string => nil)}.keys.compact
117
+ if uniques.size > 0
118
+ uniques.sort.each_with_index do |r, i|
119
+ next if r.size == 0
120
+ ref = Taxonifi::Model::Ref.new(:author_year => r)
121
+ rc.add_object(ref)
122
+ end
123
+ end
124
+ end
125
+ @ref_collection = rc
126
+ end
127
+
128
+ # Assign a reference collection to this name collection.
129
+ # !! Overwrites existing reference collection, including ones built
130
+ # using generate_ref_collection.
131
+ def ref_collection=(ref_collection)
132
+ @ref_collection = ref_collection if ref_collection.class == Taxonifi::Model::RefCollection
133
+ end
134
+
135
+ protected
136
+
137
+ # Index the object by name into the
138
+ # @by_name_index variable (this looks like:
139
+ # {"Foo bar" => [1,2,93]})
140
+ def index_by_name(obj)
141
+ rank = obj.rank
142
+ rank ||= 'unknown'
143
+ by_name_index[rank][obj.name] ||= []
144
+ by_name_index[rank][obj.name].push obj.id
145
+ end
146
+
147
+ end
148
+ end
149
+ end
@@ -0,0 +1,49 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), "../models/base.rb"))
2
+
3
+ module Taxonifi
4
+ module Model
5
+
6
+ # Simple Person class.
7
+ # You can store multiple initials and suffixes.
8
+ class Person < Taxonifi::Model::Base
9
+ ATTRIBUTES = [
10
+ :first_name,
11
+ :last_name,
12
+ :initials, # an Array, no periods.
13
+ :suffix # an Array
14
+ ]
15
+
16
+ ATTRIBUTES.each do |a|
17
+ attr_accessor a
18
+ end
19
+
20
+ def initialize(options = {})
21
+ opts = {
22
+ }.merge!(options)
23
+ # Check for valid opts prior to building
24
+ build(ATTRIBUTES, opts)
25
+ true
26
+ end
27
+
28
+ # Returns a string with data delimited by pipes.
29
+ # Used in identity comparisons.
30
+ def compact_string
31
+ s = [ATTRIBUTES.sort.collect{|a| send(a)}].join("|").downcase.gsub(/\s/, '')
32
+ end
33
+
34
+ # Nothing fancy, just the data.
35
+ def display_name
36
+ [@last_name, @first_name, @initials, @suffix].compact.flatten.join(" ")
37
+ end
38
+
39
+ # Return a string representing the initials, periods added.
40
+ def initials_string
41
+ if @initials.nil?
42
+ nil
43
+ else
44
+ @initials.join(".") + "."
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
data/lib/models/ref.rb ADDED
@@ -0,0 +1,85 @@
1
+ module Taxonifi
2
+ class RefError < StandardError; end
3
+ module Model
4
+
5
+ # A basic reference object.
6
+ class Ref < Taxonifi::Model::Base
7
+
8
+ # These attributes are set automatically on #new()
9
+ ATTRIBUTES = [
10
+ :authors,
11
+ :title,
12
+ :year,
13
+ :publication,
14
+ :volume,
15
+ :number,
16
+ :pages,
17
+ :pg_start,
18
+ :pg_end,
19
+ :cited_page,
20
+ :full_citation
21
+ ]
22
+
23
+ # Array of Taxonifi::Model::Person
24
+ attr_accessor :authors
25
+ # String
26
+ attr_accessor :title
27
+ # String
28
+ attr_accessor :year
29
+ # String
30
+ attr_accessor :publication
31
+ # String
32
+ attr_accessor :volume
33
+ # String
34
+ attr_accessor :number
35
+ # String. Anything that doesn't fit in a page range.
36
+ attr_accessor :pages
37
+ # String
38
+ attr_accessor :pg_start
39
+ # String
40
+ attr_accessor :pg_end
41
+ # String. Some specific page(s) of note.
42
+ attr_accessor :cited_page
43
+ # String. The full text of the citation, as read from input or assigned, not computed from individual components.
44
+ attr_accessor :full_citation
45
+
46
+ # String. Computed index based on existing Ref#authors and Ref#year
47
+ attr_accessor :author_year_index
48
+
49
+ # If :author_year is passed it is broken down into People + year.
50
+ def initialize(options = {})
51
+ opts = {
52
+ }.merge!(options)
53
+ @parent = nil
54
+ build(ATTRIBUTES, opts)
55
+ @authors = [] if @authors.nil?
56
+ raise Taxonifi::RefError, 'If :author_year is provided then authors and year must not be.' if opts[:author_year] && (!opts[:year].nil? || !opts[:authors].nil?)
57
+ add_author_year(opts[:author_year]) if !opts[:author_year].nil? && opts[:author_year].size > 0
58
+ true
59
+ end
60
+
61
+ def add_author_year(string)
62
+ auth_yr = Taxonifi::Splitter::Builder.build_author_year(string)
63
+ @year = auth_yr.year
64
+ @authors = auth_yr.people
65
+ end
66
+
67
+ # Returns a pipe delimited representation of the reference.
68
+ def compact_string
69
+ s = [authors.collect{|a| a.compact_string}.join, year, self.title, publication, volume, number, pages, pg_start, pg_end, cited_page].join("|").downcase.gsub(/\s/, '')
70
+ s
71
+ end
72
+
73
+ # Return a by author_year index.
74
+ def author_year_index
75
+ @author_year_index ||= generate_author_year_index
76
+ end
77
+
78
+ # (re-) generate the author year index.
79
+ def generate_author_year_index
80
+ @author_year_index = Taxonifi::Model::AuthorYear.new(people: @authors, year: @year).compact_index
81
+ end
82
+
83
+ end
84
+ end
85
+ end
@@ -0,0 +1,106 @@
1
+ module Taxonifi
2
+ class RefCollectionError < StandardError; end
3
+
4
+ module Model
5
+
6
+ # A collection of references.
7
+ class RefCollection < Taxonifi::Model::Collection
8
+
9
+ # An options index when there is one reference per row.
10
+ attr_accessor :row_index
11
+
12
+ # Points a Ref#id to an array of Person#ids.
13
+ # Built on request.
14
+ attr_accessor :author_index
15
+
16
+ def initialize(options = {})
17
+ super
18
+ @row_index = []
19
+ @author_index = {}
20
+ true
21
+ end
22
+
23
+ # The instance collection class.
24
+ def object_class
25
+ Taxonifi::Model::Ref
26
+ end
27
+
28
+ # The object at a given row.
29
+ # TODO: inherit from Collection?
30
+ def object_from_row(row_number)
31
+ @row_index[row_number]
32
+ end
33
+
34
+ # Incrementally (re-)assigns the id of every associated author (Person)
35
+ # This is only really useful if you assume every author is unique.
36
+ def enumerate_authors(initial_id = 0)
37
+ i = initial_id
38
+ collection.each do |r|
39
+ r.authors.each do |a|
40
+ a.id = i
41
+ i += 1
42
+ end
43
+ end
44
+ end
45
+
46
+ # Finds unique authors, and combines them, then
47
+ # rebuilds author lists using references to the new unique set.
48
+ def uniquify_authors(initial_id = 0)
49
+ auth_index = {}
50
+ unique_authors.each_with_index do |a, i|
51
+ a.id = i + initial_id
52
+ auth_index.merge!(a.compact_string => a)
53
+ end
54
+
55
+ collection.each do |r|
56
+ new_authors = []
57
+ r.authors.inject(new_authors){|ary, a| ary.push(auth_index[a.compact_string])}
58
+ r.authors = new_authors
59
+ end
60
+ true
61
+ end
62
+
63
+ # Build the author index.
64
+ # {Ref#id => [a1#id, ... an#id]}
65
+ def build_author_index
66
+ collection.each do |r|
67
+ @author_index.merge!(r.id => r.authors.collect{|a| a.id ? a.id : -1})
68
+ end
69
+ end
70
+
71
+ # Return an array the unique author strings in this collection.
72
+ def unique_author_strings
73
+ auths = {}
74
+ collection.each do |r|
75
+ r.authors.each do |a|
76
+ auths.merge!(a.display_name => nil)
77
+ end
78
+ end
79
+ auths.keys.sort
80
+ end
81
+
82
+ # Returns Array of Taxonifi::Model::Person
83
+ # Will need better indexing on big lists?
84
+ def unique_authors
85
+ auths = []
86
+ collection.each do |r|
87
+ r.authors.each do |a|
88
+ found = false
89
+ auths.each do |x|
90
+ if a.identical?(x)
91
+ found = true
92
+ next
93
+ end
94
+ end
95
+ if not found
96
+ auths.push a.clone
97
+ end
98
+ end
99
+ end
100
+ auths
101
+ end
102
+
103
+ end
104
+ end
105
+
106
+ end
@@ -0,0 +1,85 @@
1
+ module Taxonifi
2
+ class SpeciesNameError < StandardError; end
3
+ module Model
4
+
5
+ # The species name model is just a pointer to 5 Taxonifi::Model::Names.
6
+ # The various metadata (author, year, original combination) is stored with the individual
7
+ # instances of those names.
8
+ # Taxonifi::Model::Names have no ids!
9
+
10
+ class SpeciesName < Taxonifi::Model::Base
11
+ ATTRIBUTES = [:genus, :subgenus, :species, :subspecies, :parent]
12
+ ATTRIBUTES.each do |a|
13
+ attr_accessor a
14
+ end
15
+
16
+ def initialize(options = {})
17
+ opts = {
18
+ }.merge!(options)
19
+ build(ATTRIBUTES, opts)
20
+ true
21
+ end
22
+
23
+ # Set the genus name.
24
+ def genus=(genus)
25
+ @genus = genus
26
+ end
27
+
28
+ # Set the subgenus name.
29
+ def subgenus=(subgenus)
30
+ raise Taxonifi::SpeciesNameError, "Species name must have a Genus name before subgenus can be assigned" if @genus.nil?
31
+ @subgenus = subgenus
32
+ @subgenus.parent = @genus
33
+ end
34
+
35
+ # Set the species name.
36
+ def species=(species)
37
+ raise Taxonifi::SpeciesNameError, "Species name must have a Genus name before species can be assigned" if @genus.nil?
38
+ @species = species
39
+ @species.parent = (@subgenus ? @subgenus : @genus)
40
+ end
41
+
42
+ # Set the subspecies name.
43
+ def subspecies=(subspecies)
44
+ raise Taxonifi::SpeciesNameError, "Subspecies name must have a species name before species can be assigned" if @species.nil?
45
+ @subspecies = subspecies
46
+ @subspecies.parent = @species
47
+ end
48
+
49
+ # Set the parent name.
50
+ def parent=(parent)
51
+ if parent.class != Taxonifi::Model::Name
52
+ raise SpeciesNameError, "Parent is not a Taxonifi::Model::Name."
53
+ end
54
+
55
+ if parent.rank.nil? || (Taxonifi::RANKS.index('genus') <= Taxonifi::RANKS.index(parent.rank))
56
+ raise Taxonifi::SpeciesNameError, "Parents of SpeciesNames must have rank higher than Genus."
57
+ end
58
+
59
+ @parent = parent
60
+ end
61
+
62
+ # Return an array of Name objects.
63
+ def names
64
+ ATTRIBUTES.collect{|a| self.send(a)}.compact
65
+ end
66
+
67
+ # Return a string representation of the species name.
68
+ def display_name
69
+ strs = []
70
+ self.names.each do |n|
71
+ case n.rank
72
+ when 'subgenus'
73
+ strs.push "(#{n.name})"
74
+ else
75
+ strs.push n.name
76
+ end
77
+ end
78
+ strs.push self.names.last.author_year
79
+ txt = strs.compact.join(" ")
80
+ txt
81
+ end
82
+ end
83
+ end
84
+ end
85
+
@@ -0,0 +1,26 @@
1
+ # Builder functionality for parsing/lexing framework.
2
+ module Taxonifi::Splitter::Builder
3
+
4
+ # Load all builders (= models)
5
+ # TODO: perhaps use a different scope that doesn't require loading all at once
6
+ Dir.glob( File.expand_path(File.join(File.dirname(__FILE__), "../models/*.rb") )) do |file|
7
+ require file
8
+ end
9
+
10
+ # Build and return Taxonifi::Model::AuthorYear from a string.
11
+ def self.build_author_year(text)
12
+ lexer = Taxonifi::Splitter::Lexer.new(text)
13
+ builder = Taxonifi::Model::AuthorYear.new
14
+ Taxonifi::Splitter::Parser.new(lexer, builder).parse_author_year
15
+ builder
16
+ end
17
+
18
+ # Build and return Taxonifi::Model::SpeciesName from a string.
19
+ def self.build_species_name(text)
20
+ lexer = Taxonifi::Splitter::Lexer.new(text, :species_name)
21
+ builder = Taxonifi::Model::SpeciesName.new
22
+ Taxonifi::Splitter::Parser.new(lexer, builder).parse_species_name
23
+ builder
24
+ end
25
+
26
+ end
@@ -0,0 +1,70 @@
1
+ #
2
+ # Lexer taken verbatim from OboParser and other mjy gems.
3
+ #
4
+ class Taxonifi::Splitter::Lexer
5
+ attr_reader :input, :token_list
6
+ def initialize(input, token_list = nil)
7
+
8
+ raise Taxonifi::Splitter::SplitterError, "Invalid token list passed to Lexer." if (!token_list.nil? && !Taxonifi::Splitter::TOKEN_LISTS.include?(token_list) )
9
+ token_list = :global_token_list if token_list.nil?
10
+
11
+ @input = input
12
+ @token_list = token_list
13
+ @next_token = nil
14
+ end
15
+
16
+ # Checks whether the next token is of the specified class.
17
+ def peek(token_class, token_list = nil)
18
+ token = read_next_token(token_class)
19
+ return token.class == token_class
20
+ end
21
+
22
+ # Return (and delete) the next token from the input stream, or raise an exception
23
+ # if the next token is not of the given class.
24
+ def pop(token_class)
25
+ token = read_next_token(token_class)
26
+ @next_token = nil
27
+ if token.class != token_class
28
+ raise(Taxonifi::Splitter::SplitterError, "expected #{token_class.to_s} but received #{token.class.to_s} at #{@input[0..10]}...", caller)
29
+ else
30
+ return token
31
+ end
32
+ end
33
+
34
+ private
35
+
36
+ # Read (and store) the next token from the input, if it has not already been read.
37
+ def read_next_token(token_class)
38
+ if @next_token
39
+ return @next_token
40
+ else
41
+ # check for a match on the specified class first
42
+ if match(token_class)
43
+ return @next_token
44
+ else
45
+ # now check all the tokens for a match
46
+ Taxonifi::Splitter::Tokens.send(@token_list).each {|t|
47
+ return @next_token if match(t)
48
+ }
49
+ end
50
+ # no match, either end of string or lex-error
51
+ if @input != ''
52
+ raise(Taxonifi::Splitter::SplitterError, "Lexer Error, unknown token at |#{@input[0..20]}...", caller)
53
+ else
54
+ return nil
55
+ end
56
+ end
57
+ end
58
+
59
+ # Match a token to the input.
60
+ def match(token_class)
61
+ if (m = token_class.regexp.match(@input))
62
+ @next_token = token_class.new(m[1])
63
+ @input = @input[m.end(0)..-1]
64
+ return true
65
+ else
66
+ return false
67
+ end
68
+ end
69
+
70
+ end
@@ -0,0 +1,54 @@
1
+ #
2
+ # Parser pattern taken from OboParser and other mjy gems.
3
+ #
4
+ # The parser takes a builder and a lexer and does the actual breakdown.
5
+ #
6
+ class Taxonifi::Splitter::Parser
7
+ def initialize(lexer, builder )
8
+ @lexer = lexer
9
+ @builder = builder
10
+ end
11
+
12
+ # parse out an author year combination.
13
+ # TODO: This is only indirectly tested in lumper code
14
+ def parse_author_year
15
+ t = @lexer.pop(Taxonifi::Splitter::Tokens::AuthorYear)
16
+
17
+ lexer = Taxonifi::Splitter::Lexer.new(t.authors)
18
+ authors = lexer.pop(Taxonifi::Splitter::Tokens::Authors)
19
+
20
+ # TODO: A people collection?
21
+ authors.names.each do |a|
22
+ n = Taxonifi::Model::Person.new()
23
+ n.last_name = a[:last_name]
24
+ n.initials = a[:initials]
25
+ @builder.people.push n
26
+ end
27
+
28
+ @builder.year = t.year.to_i
29
+ @builder.parens = t.parens
30
+ end
31
+
32
+ # Parse a species name
33
+ def parse_species_name
34
+ t = @lexer.pop(Taxonifi::Splitter::Tokens::Quadrinomial)
35
+ ranks = %w{genus subgenus species subspecies}
36
+ names = {}
37
+ last_parent = nil
38
+ ranks.each do |r|
39
+ names.merge!(r: nil)
40
+ @builder.send("#{r}=", Taxonifi::Model::Name.new(:name => t.send(r), rank: r) ) if t.send(r)
41
+ end
42
+
43
+ if @lexer.peek(Taxonifi::Splitter::Tokens::AuthorYear)
44
+ t = @lexer.pop(Taxonifi::Splitter::Tokens::AuthorYear)
45
+ @builder.names.last.author = t.authors
46
+ @builder.names.last.year = t.year
47
+ @builder.names.last.parens = !t.parens
48
+ @builder.names.last.derive_authors_year
49
+ end
50
+
51
+ @builder
52
+ end
53
+
54
+ end
@@ -0,0 +1,45 @@
1
+ module Taxonifi
2
+
3
+ # An implementation of the parser/lexer/token pattern by Krishna Dole which in turn was based on
4
+ # Thomas Mailund's <mailund@birc.dk> 'newick-1.0.5' Python library, which has evolved
5
+ # into mjy's obo_parser/nexus_parser libraries.
6
+ module Splitter
7
+
8
+ TOKEN_LISTS = [
9
+ :global_token_list,
10
+ :volume_number,
11
+ :pages,
12
+ :species_name
13
+ ]
14
+
15
+ class SplitterError < StandardError; end
16
+
17
+ require File.expand_path(File.join(File.dirname(__FILE__), 'tokens'))
18
+ require File.expand_path(File.join(File.dirname(__FILE__), 'parser'))
19
+ require File.expand_path(File.join(File.dirname(__FILE__), 'lexer'))
20
+ require File.expand_path(File.join(File.dirname(__FILE__), 'builder'))
21
+
22
+
23
+ # stub, we might not need
24
+ class Splitter
25
+ def initialize
26
+ true
27
+ end
28
+ end
29
+
30
+ end # end Splitter module
31
+ end # Taxonifi module
32
+
33
+
34
+ #= Implementation
35
+
36
+ def do_bar(input)
37
+ @input = input
38
+ raise(Taxonifi::Splitter::SplitterError, "Nothing passed to parse!") if !@input || @input.size == 0
39
+
40
+ builder = Taxonifi::Splitter::SplitterBuilder.new
41
+ lexer = Taxonifi::Splitter::Lexer.new(@input)
42
+ Taxonfi::Splitter::Parser.new(lexer, builder).foo
43
+ return builder.bar
44
+ end
45
+