taxonifi 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. data/.document +5 -0
  2. data/Gemfile +18 -0
  3. data/Gemfile.lock +30 -0
  4. data/LICENSE.txt +20 -0
  5. data/README.rdoc +155 -0
  6. data/Rakefile +53 -0
  7. data/VERSION +1 -0
  8. data/lib/assessor/assessor.rb +31 -0
  9. data/lib/assessor/base.rb +17 -0
  10. data/lib/assessor/row_assessor.rb +131 -0
  11. data/lib/export/export.rb +9 -0
  12. data/lib/export/format/base.rb +43 -0
  13. data/lib/export/format/species_file.rb +341 -0
  14. data/lib/lumper/lumper.rb +334 -0
  15. data/lib/lumper/lumps/parent_child_name_collection.rb +84 -0
  16. data/lib/models/author_year.rb +39 -0
  17. data/lib/models/base.rb +73 -0
  18. data/lib/models/collection.rb +92 -0
  19. data/lib/models/generic_object.rb +15 -0
  20. data/lib/models/geog.rb +59 -0
  21. data/lib/models/geog_collection.rb +28 -0
  22. data/lib/models/name.rb +206 -0
  23. data/lib/models/name_collection.rb +149 -0
  24. data/lib/models/person.rb +49 -0
  25. data/lib/models/ref.rb +85 -0
  26. data/lib/models/ref_collection.rb +106 -0
  27. data/lib/models/species_name.rb +85 -0
  28. data/lib/splitter/builder.rb +26 -0
  29. data/lib/splitter/lexer.rb +70 -0
  30. data/lib/splitter/parser.rb +54 -0
  31. data/lib/splitter/splitter.rb +45 -0
  32. data/lib/splitter/tokens.rb +322 -0
  33. data/lib/taxonifi.rb +36 -0
  34. data/test/file_fixtures/Lygaeoidea.csv +801 -0
  35. data/test/helper.rb +38 -0
  36. data/test/test_exporter.rb +32 -0
  37. data/test/test_lumper_geogs.rb +59 -0
  38. data/test/test_lumper_hierarchical_collection.rb +88 -0
  39. data/test/test_lumper_names.rb +119 -0
  40. data/test/test_lumper_parent_child_name_collection.rb +41 -0
  41. data/test/test_lumper_refs.rb +91 -0
  42. data/test/test_parser.rb +34 -0
  43. data/test/test_splitter.rb +27 -0
  44. data/test/test_splitter_tokens.rb +403 -0
  45. data/test/test_taxonifi.rb +11 -0
  46. data/test/test_taxonifi_accessor.rb +61 -0
  47. data/test/test_taxonifi_geog.rb +51 -0
  48. data/test/test_taxonifi_name.rb +186 -0
  49. data/test/test_taxonifi_name_collection.rb +158 -0
  50. data/test/test_taxonifi_ref.rb +90 -0
  51. data/test/test_taxonifi_ref_collection.rb +69 -0
  52. data/test/test_taxonifi_species_name.rb +95 -0
  53. metadata +167 -0
@@ -0,0 +1,149 @@
1
+ module Taxonifi
2
+ class NameCollectionError < StandardError; end
3
+ module Model
4
+
5
+ # A collection of taxonomic names.
6
+ class NameCollection < Taxonifi::Model::Collection
7
+
8
+ attr_accessor :by_name_index
9
+ attr_accessor :ref_collection
10
+
11
+ def initialize(options = {})
12
+ super
13
+ @collection = []
14
+ @by_name_index = {} # "foo => [1,2,3]"
15
+ Taxonifi::RANKS.inject(@by_name_index){|hsh, v| hsh.merge!(v => {})}
16
+ @by_name_index['unknown'] = {} # unranked names get dumped in here
17
+ @ref_collection = nil
18
+ true
19
+ end
20
+
21
+ def object_class
22
+ Taxonifi::Model::Name
23
+ end
24
+
25
+ # Return the highest RANK for which there is no
26
+ # name in this collection.
27
+ def encompassing_rank
28
+ highest = RANKS.size
29
+ @collection.each do |n|
30
+ h = RANKS.index(n.rank)
31
+ highest = h if h < highest
32
+ end
33
+ RANKS[highest - 1]
34
+ end
35
+
36
+ # The names objects in the collection at a rank.
37
+ # TODO: Should index this on add_object
38
+ def names_at_rank(rank)
39
+ raise if !RANKS.include?(rank)
40
+ names = []
41
+ @collection.each do |n|
42
+ names << n if n.rank == rank
43
+ end
44
+ names
45
+ end
46
+
47
+ # Returns id of matching existing name
48
+ # or false if there i s no match.
49
+ # Matches against name (string) and parents ("identity")
50
+ def name_exists?(name = Taxonifi::Model::Name)
51
+ # Does the name (string) exist?
52
+ rank = name.rank.downcase
53
+ rank ||= 'unknown'
54
+ if by_name_index[rank][name.name]
55
+ # Yes, check to see if parents match
56
+ by_name_index[rank][name.name].each do |id|
57
+ vector = parent_id_vector(id)
58
+ vector.pop
59
+ if vector == parent_id_vector(name.parent.id)
60
+ exists = true
61
+ return id
62
+ end
63
+ end
64
+ end
65
+ false
66
+ end
67
+
68
+ # Add an individaul name object, indexing it.
69
+ def add_object(obj)
70
+ super
71
+ index_by_name(obj)
72
+ obj
73
+ end
74
+
75
+ # Add an individaul name object, without indexing it.
76
+ def add_object_pre_indexed(obj)
77
+ super
78
+ index_by_name(obj)
79
+ obj
80
+ end
81
+
82
+ # Add a Taxonifi::Model::SpeciesName object
83
+ # as individual objects.
84
+ def add_species_name(sn)
85
+ raise "Failed trying to load [#{sn.display_name}]. SpeciesName#genus#parent must be set before using add_species_name." if sn.genus.parent.nil?
86
+ current_parent_id = sn.genus.parent.id
87
+ sn.names.each do |o|
88
+ o.parent = object_by_id(current_parent_id)
89
+ if id = name_exists?(o)
90
+ cp_id = id
91
+ else
92
+ add_object(o)
93
+ cp_id = o.id
94
+ end
95
+ current_parent_id = cp_id
96
+ end
97
+ current_parent_id # return the id of the last name created
98
+ end
99
+
100
+ # As #add_species_name but do
101
+ # not assign ids to the incoming names
102
+ # TODO: deprecate?
103
+ def add_species_name_unindexed(sn)
104
+ sn.names.each do |o|
105
+ if !name_exists?(o)
106
+ add_object(o)
107
+ end
108
+ end
109
+ end
110
+
111
+ # Take the author/years of these names and generate a reference collection.
112
+ # Start the ids assigned to the references with initial_id.
113
+ def generate_ref_collection(initial_id = 0)
114
+ rc = Taxonifi::Model::RefCollection.new(:initial_id => initial_id)
115
+ if collection.size > 0
116
+ uniques = collection.inject({}){|hsh, n| hsh.merge!(n.author_year_string => nil)}.keys.compact
117
+ if uniques.size > 0
118
+ uniques.sort.each_with_index do |r, i|
119
+ next if r.size == 0
120
+ ref = Taxonifi::Model::Ref.new(:author_year => r)
121
+ rc.add_object(ref)
122
+ end
123
+ end
124
+ end
125
+ @ref_collection = rc
126
+ end
127
+
128
+ # Assign a reference collection to this name collection.
129
+ # !! Overwrites existing reference collection, including ones built
130
+ # using generate_ref_collection.
131
+ def ref_collection=(ref_collection)
132
+ @ref_collection = ref_collection if ref_collection.class == Taxonifi::Model::RefCollection
133
+ end
134
+
135
+ protected
136
+
137
+ # Index the object by name into the
138
+ # @by_name_index variable (this looks like:
139
+ # {"Foo bar" => [1,2,93]})
140
+ def index_by_name(obj)
141
+ rank = obj.rank
142
+ rank ||= 'unknown'
143
+ by_name_index[rank][obj.name] ||= []
144
+ by_name_index[rank][obj.name].push obj.id
145
+ end
146
+
147
+ end
148
+ end
149
+ end
@@ -0,0 +1,49 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), "../models/base.rb"))
2
+
3
+ module Taxonifi
4
+ module Model
5
+
6
+ # Simple Person class.
7
+ # You can store multiple initials and suffixes.
8
+ class Person < Taxonifi::Model::Base
9
+ ATTRIBUTES = [
10
+ :first_name,
11
+ :last_name,
12
+ :initials, # an Array, no periods.
13
+ :suffix # an Array
14
+ ]
15
+
16
+ ATTRIBUTES.each do |a|
17
+ attr_accessor a
18
+ end
19
+
20
+ def initialize(options = {})
21
+ opts = {
22
+ }.merge!(options)
23
+ # Check for valid opts prior to building
24
+ build(ATTRIBUTES, opts)
25
+ true
26
+ end
27
+
28
+ # Returns a string with data delimited by pipes.
29
+ # Used in identity comparisons.
30
+ def compact_string
31
+ s = [ATTRIBUTES.sort.collect{|a| send(a)}].join("|").downcase.gsub(/\s/, '')
32
+ end
33
+
34
+ # Nothing fancy, just the data.
35
+ def display_name
36
+ [@last_name, @first_name, @initials, @suffix].compact.flatten.join(" ")
37
+ end
38
+
39
+ # Return a string representing the initials, periods added.
40
+ def initials_string
41
+ if @initials.nil?
42
+ nil
43
+ else
44
+ @initials.join(".") + "."
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
data/lib/models/ref.rb ADDED
@@ -0,0 +1,85 @@
1
+ module Taxonifi
2
+ class RefError < StandardError; end
3
+ module Model
4
+
5
+ # A basic reference object.
6
+ class Ref < Taxonifi::Model::Base
7
+
8
+ # These attributes are set automatically on #new()
9
+ ATTRIBUTES = [
10
+ :authors,
11
+ :title,
12
+ :year,
13
+ :publication,
14
+ :volume,
15
+ :number,
16
+ :pages,
17
+ :pg_start,
18
+ :pg_end,
19
+ :cited_page,
20
+ :full_citation
21
+ ]
22
+
23
+ # Array of Taxonifi::Model::Person
24
+ attr_accessor :authors
25
+ # String
26
+ attr_accessor :title
27
+ # String
28
+ attr_accessor :year
29
+ # String
30
+ attr_accessor :publication
31
+ # String
32
+ attr_accessor :volume
33
+ # String
34
+ attr_accessor :number
35
+ # String. Anything that doesn't fit in a page range.
36
+ attr_accessor :pages
37
+ # String
38
+ attr_accessor :pg_start
39
+ # String
40
+ attr_accessor :pg_end
41
+ # String. Some specific page(s) of note.
42
+ attr_accessor :cited_page
43
+ # String. The full text of the citation, as read from input or assigned, not computed from individual components.
44
+ attr_accessor :full_citation
45
+
46
+ # String. Computed index based on existing Ref#authors and Ref#year
47
+ attr_accessor :author_year_index
48
+
49
+ # If :author_year is passed it is broken down into People + year.
50
+ def initialize(options = {})
51
+ opts = {
52
+ }.merge!(options)
53
+ @parent = nil
54
+ build(ATTRIBUTES, opts)
55
+ @authors = [] if @authors.nil?
56
+ raise Taxonifi::RefError, 'If :author_year is provided then authors and year must not be.' if opts[:author_year] && (!opts[:year].nil? || !opts[:authors].nil?)
57
+ add_author_year(opts[:author_year]) if !opts[:author_year].nil? && opts[:author_year].size > 0
58
+ true
59
+ end
60
+
61
+ def add_author_year(string)
62
+ auth_yr = Taxonifi::Splitter::Builder.build_author_year(string)
63
+ @year = auth_yr.year
64
+ @authors = auth_yr.people
65
+ end
66
+
67
+ # Returns a pipe delimited representation of the reference.
68
+ def compact_string
69
+ s = [authors.collect{|a| a.compact_string}.join, year, self.title, publication, volume, number, pages, pg_start, pg_end, cited_page].join("|").downcase.gsub(/\s/, '')
70
+ s
71
+ end
72
+
73
+ # Return a by author_year index.
74
+ def author_year_index
75
+ @author_year_index ||= generate_author_year_index
76
+ end
77
+
78
+ # (re-) generate the author year index.
79
+ def generate_author_year_index
80
+ @author_year_index = Taxonifi::Model::AuthorYear.new(people: @authors, year: @year).compact_index
81
+ end
82
+
83
+ end
84
+ end
85
+ end
@@ -0,0 +1,106 @@
1
+ module Taxonifi
2
+ class RefCollectionError < StandardError; end
3
+
4
+ module Model
5
+
6
+ # A collection of references.
7
+ class RefCollection < Taxonifi::Model::Collection
8
+
9
+ # An options index when there is one reference per row.
10
+ attr_accessor :row_index
11
+
12
+ # Points a Ref#id to an array of Person#ids.
13
+ # Built on request.
14
+ attr_accessor :author_index
15
+
16
+ def initialize(options = {})
17
+ super
18
+ @row_index = []
19
+ @author_index = {}
20
+ true
21
+ end
22
+
23
+ # The instance collection class.
24
+ def object_class
25
+ Taxonifi::Model::Ref
26
+ end
27
+
28
+ # The object at a given row.
29
+ # TODO: inherit from Collection?
30
+ def object_from_row(row_number)
31
+ @row_index[row_number]
32
+ end
33
+
34
+ # Incrementally (re-)assigns the id of every associated author (Person)
35
+ # This is only really useful if you assume every author is unique.
36
+ def enumerate_authors(initial_id = 0)
37
+ i = initial_id
38
+ collection.each do |r|
39
+ r.authors.each do |a|
40
+ a.id = i
41
+ i += 1
42
+ end
43
+ end
44
+ end
45
+
46
+ # Finds unique authors, and combines them, then
47
+ # rebuilds author lists using references to the new unique set.
48
+ def uniquify_authors(initial_id = 0)
49
+ auth_index = {}
50
+ unique_authors.each_with_index do |a, i|
51
+ a.id = i + initial_id
52
+ auth_index.merge!(a.compact_string => a)
53
+ end
54
+
55
+ collection.each do |r|
56
+ new_authors = []
57
+ r.authors.inject(new_authors){|ary, a| ary.push(auth_index[a.compact_string])}
58
+ r.authors = new_authors
59
+ end
60
+ true
61
+ end
62
+
63
+ # Build the author index.
64
+ # {Ref#id => [a1#id, ... an#id]}
65
+ def build_author_index
66
+ collection.each do |r|
67
+ @author_index.merge!(r.id => r.authors.collect{|a| a.id ? a.id : -1})
68
+ end
69
+ end
70
+
71
+ # Return an array the unique author strings in this collection.
72
+ def unique_author_strings
73
+ auths = {}
74
+ collection.each do |r|
75
+ r.authors.each do |a|
76
+ auths.merge!(a.display_name => nil)
77
+ end
78
+ end
79
+ auths.keys.sort
80
+ end
81
+
82
+ # Returns Array of Taxonifi::Model::Person
83
+ # Will need better indexing on big lists?
84
+ def unique_authors
85
+ auths = []
86
+ collection.each do |r|
87
+ r.authors.each do |a|
88
+ found = false
89
+ auths.each do |x|
90
+ if a.identical?(x)
91
+ found = true
92
+ next
93
+ end
94
+ end
95
+ if not found
96
+ auths.push a.clone
97
+ end
98
+ end
99
+ end
100
+ auths
101
+ end
102
+
103
+ end
104
+ end
105
+
106
+ end
@@ -0,0 +1,85 @@
1
+ module Taxonifi
2
+ class SpeciesNameError < StandardError; end
3
+ module Model
4
+
5
+ # The species name model is just a pointer to 5 Taxonifi::Model::Names.
6
+ # The various metadata (author, year, original combination) is stored with the individual
7
+ # instances of those names.
8
+ # Taxonifi::Model::Names have no ids!
9
+
10
+ class SpeciesName < Taxonifi::Model::Base
11
+ ATTRIBUTES = [:genus, :subgenus, :species, :subspecies, :parent]
12
+ ATTRIBUTES.each do |a|
13
+ attr_accessor a
14
+ end
15
+
16
+ def initialize(options = {})
17
+ opts = {
18
+ }.merge!(options)
19
+ build(ATTRIBUTES, opts)
20
+ true
21
+ end
22
+
23
+ # Set the genus name.
24
+ def genus=(genus)
25
+ @genus = genus
26
+ end
27
+
28
+ # Set the subgenus name.
29
+ def subgenus=(subgenus)
30
+ raise Taxonifi::SpeciesNameError, "Species name must have a Genus name before subgenus can be assigned" if @genus.nil?
31
+ @subgenus = subgenus
32
+ @subgenus.parent = @genus
33
+ end
34
+
35
+ # Set the species name.
36
+ def species=(species)
37
+ raise Taxonifi::SpeciesNameError, "Species name must have a Genus name before species can be assigned" if @genus.nil?
38
+ @species = species
39
+ @species.parent = (@subgenus ? @subgenus : @genus)
40
+ end
41
+
42
+ # Set the subspecies name.
43
+ def subspecies=(subspecies)
44
+ raise Taxonifi::SpeciesNameError, "Subspecies name must have a species name before species can be assigned" if @species.nil?
45
+ @subspecies = subspecies
46
+ @subspecies.parent = @species
47
+ end
48
+
49
+ # Set the parent name.
50
+ def parent=(parent)
51
+ if parent.class != Taxonifi::Model::Name
52
+ raise SpeciesNameError, "Parent is not a Taxonifi::Model::Name."
53
+ end
54
+
55
+ if parent.rank.nil? || (Taxonifi::RANKS.index('genus') <= Taxonifi::RANKS.index(parent.rank))
56
+ raise Taxonifi::SpeciesNameError, "Parents of SpeciesNames must have rank higher than Genus."
57
+ end
58
+
59
+ @parent = parent
60
+ end
61
+
62
+ # Return an array of Name objects.
63
+ def names
64
+ ATTRIBUTES.collect{|a| self.send(a)}.compact
65
+ end
66
+
67
+ # Return a string representation of the species name.
68
+ def display_name
69
+ strs = []
70
+ self.names.each do |n|
71
+ case n.rank
72
+ when 'subgenus'
73
+ strs.push "(#{n.name})"
74
+ else
75
+ strs.push n.name
76
+ end
77
+ end
78
+ strs.push self.names.last.author_year
79
+ txt = strs.compact.join(" ")
80
+ txt
81
+ end
82
+ end
83
+ end
84
+ end
85
+
@@ -0,0 +1,26 @@
1
+ # Builder functionality for parsing/lexing framework.
2
+ module Taxonifi::Splitter::Builder
3
+
4
+ # Load all builders (= models)
5
+ # TODO: perhaps use a different scope that doesn't require loading all at once
6
+ Dir.glob( File.expand_path(File.join(File.dirname(__FILE__), "../models/*.rb") )) do |file|
7
+ require file
8
+ end
9
+
10
+ # Build and return Taxonifi::Model::AuthorYear from a string.
11
+ def self.build_author_year(text)
12
+ lexer = Taxonifi::Splitter::Lexer.new(text)
13
+ builder = Taxonifi::Model::AuthorYear.new
14
+ Taxonifi::Splitter::Parser.new(lexer, builder).parse_author_year
15
+ builder
16
+ end
17
+
18
+ # Build and return Taxonifi::Model::SpeciesName from a string.
19
+ def self.build_species_name(text)
20
+ lexer = Taxonifi::Splitter::Lexer.new(text, :species_name)
21
+ builder = Taxonifi::Model::SpeciesName.new
22
+ Taxonifi::Splitter::Parser.new(lexer, builder).parse_species_name
23
+ builder
24
+ end
25
+
26
+ end
@@ -0,0 +1,70 @@
1
+ #
2
+ # Lexer taken verbatim from OboParser and other mjy gems.
3
+ #
4
+ class Taxonifi::Splitter::Lexer
5
+ attr_reader :input, :token_list
6
+ def initialize(input, token_list = nil)
7
+
8
+ raise Taxonifi::Splitter::SplitterError, "Invalid token list passed to Lexer." if (!token_list.nil? && !Taxonifi::Splitter::TOKEN_LISTS.include?(token_list) )
9
+ token_list = :global_token_list if token_list.nil?
10
+
11
+ @input = input
12
+ @token_list = token_list
13
+ @next_token = nil
14
+ end
15
+
16
+ # Checks whether the next token is of the specified class.
17
+ def peek(token_class, token_list = nil)
18
+ token = read_next_token(token_class)
19
+ return token.class == token_class
20
+ end
21
+
22
+ # Return (and delete) the next token from the input stream, or raise an exception
23
+ # if the next token is not of the given class.
24
+ def pop(token_class)
25
+ token = read_next_token(token_class)
26
+ @next_token = nil
27
+ if token.class != token_class
28
+ raise(Taxonifi::Splitter::SplitterError, "expected #{token_class.to_s} but received #{token.class.to_s} at #{@input[0..10]}...", caller)
29
+ else
30
+ return token
31
+ end
32
+ end
33
+
34
+ private
35
+
36
+ # Read (and store) the next token from the input, if it has not already been read.
37
+ def read_next_token(token_class)
38
+ if @next_token
39
+ return @next_token
40
+ else
41
+ # check for a match on the specified class first
42
+ if match(token_class)
43
+ return @next_token
44
+ else
45
+ # now check all the tokens for a match
46
+ Taxonifi::Splitter::Tokens.send(@token_list).each {|t|
47
+ return @next_token if match(t)
48
+ }
49
+ end
50
+ # no match, either end of string or lex-error
51
+ if @input != ''
52
+ raise(Taxonifi::Splitter::SplitterError, "Lexer Error, unknown token at |#{@input[0..20]}...", caller)
53
+ else
54
+ return nil
55
+ end
56
+ end
57
+ end
58
+
59
+ # Match a token to the input.
60
+ def match(token_class)
61
+ if (m = token_class.regexp.match(@input))
62
+ @next_token = token_class.new(m[1])
63
+ @input = @input[m.end(0)..-1]
64
+ return true
65
+ else
66
+ return false
67
+ end
68
+ end
69
+
70
+ end
@@ -0,0 +1,54 @@
1
+ #
2
+ # Parser pattern taken from OboParser and other mjy gems.
3
+ #
4
+ # The parser takes a builder and a lexer and does the actual breakdown.
5
+ #
6
+ class Taxonifi::Splitter::Parser
7
+ def initialize(lexer, builder )
8
+ @lexer = lexer
9
+ @builder = builder
10
+ end
11
+
12
+ # parse out an author year combination.
13
+ # TODO: This is only indirectly tested in lumper code
14
+ def parse_author_year
15
+ t = @lexer.pop(Taxonifi::Splitter::Tokens::AuthorYear)
16
+
17
+ lexer = Taxonifi::Splitter::Lexer.new(t.authors)
18
+ authors = lexer.pop(Taxonifi::Splitter::Tokens::Authors)
19
+
20
+ # TODO: A people collection?
21
+ authors.names.each do |a|
22
+ n = Taxonifi::Model::Person.new()
23
+ n.last_name = a[:last_name]
24
+ n.initials = a[:initials]
25
+ @builder.people.push n
26
+ end
27
+
28
+ @builder.year = t.year.to_i
29
+ @builder.parens = t.parens
30
+ end
31
+
32
+ # Parse a species name
33
+ def parse_species_name
34
+ t = @lexer.pop(Taxonifi::Splitter::Tokens::Quadrinomial)
35
+ ranks = %w{genus subgenus species subspecies}
36
+ names = {}
37
+ last_parent = nil
38
+ ranks.each do |r|
39
+ names.merge!(r: nil)
40
+ @builder.send("#{r}=", Taxonifi::Model::Name.new(:name => t.send(r), rank: r) ) if t.send(r)
41
+ end
42
+
43
+ if @lexer.peek(Taxonifi::Splitter::Tokens::AuthorYear)
44
+ t = @lexer.pop(Taxonifi::Splitter::Tokens::AuthorYear)
45
+ @builder.names.last.author = t.authors
46
+ @builder.names.last.year = t.year
47
+ @builder.names.last.parens = !t.parens
48
+ @builder.names.last.derive_authors_year
49
+ end
50
+
51
+ @builder
52
+ end
53
+
54
+ end
@@ -0,0 +1,45 @@
1
+ module Taxonifi
2
+
3
+ # An implementation of the parser/lexer/token pattern by Krishna Dole which in turn was based on
4
+ # Thomas Mailund's <mailund@birc.dk> 'newick-1.0.5' Python library, which has evolved
5
+ # into mjy's obo_parser/nexus_parser libraries.
6
+ module Splitter
7
+
8
+ TOKEN_LISTS = [
9
+ :global_token_list,
10
+ :volume_number,
11
+ :pages,
12
+ :species_name
13
+ ]
14
+
15
+ class SplitterError < StandardError; end
16
+
17
+ require File.expand_path(File.join(File.dirname(__FILE__), 'tokens'))
18
+ require File.expand_path(File.join(File.dirname(__FILE__), 'parser'))
19
+ require File.expand_path(File.join(File.dirname(__FILE__), 'lexer'))
20
+ require File.expand_path(File.join(File.dirname(__FILE__), 'builder'))
21
+
22
+
23
+ # stub, we might not need
24
+ class Splitter
25
+ def initialize
26
+ true
27
+ end
28
+ end
29
+
30
+ end # end Splitter module
31
+ end # Taxonifi module
32
+
33
+
34
+ #= Implementation
35
+
36
+ def do_bar(input)
37
+ @input = input
38
+ raise(Taxonifi::Splitter::SplitterError, "Nothing passed to parse!") if !@input || @input.size == 0
39
+
40
+ builder = Taxonifi::Splitter::SplitterBuilder.new
41
+ lexer = Taxonifi::Splitter::Lexer.new(@input)
42
+ Taxonfi::Splitter::Parser.new(lexer, builder).foo
43
+ return builder.bar
44
+ end
45
+