taxonifi 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
 - data/Gemfile +18 -0
 - data/Gemfile.lock +30 -0
 - data/LICENSE.txt +20 -0
 - data/README.rdoc +155 -0
 - data/Rakefile +53 -0
 - data/VERSION +1 -0
 - data/lib/assessor/assessor.rb +31 -0
 - data/lib/assessor/base.rb +17 -0
 - data/lib/assessor/row_assessor.rb +131 -0
 - data/lib/export/export.rb +9 -0
 - data/lib/export/format/base.rb +43 -0
 - data/lib/export/format/species_file.rb +341 -0
 - data/lib/lumper/lumper.rb +334 -0
 - data/lib/lumper/lumps/parent_child_name_collection.rb +84 -0
 - data/lib/models/author_year.rb +39 -0
 - data/lib/models/base.rb +73 -0
 - data/lib/models/collection.rb +92 -0
 - data/lib/models/generic_object.rb +15 -0
 - data/lib/models/geog.rb +59 -0
 - data/lib/models/geog_collection.rb +28 -0
 - data/lib/models/name.rb +206 -0
 - data/lib/models/name_collection.rb +149 -0
 - data/lib/models/person.rb +49 -0
 - data/lib/models/ref.rb +85 -0
 - data/lib/models/ref_collection.rb +106 -0
 - data/lib/models/species_name.rb +85 -0
 - data/lib/splitter/builder.rb +26 -0
 - data/lib/splitter/lexer.rb +70 -0
 - data/lib/splitter/parser.rb +54 -0
 - data/lib/splitter/splitter.rb +45 -0
 - data/lib/splitter/tokens.rb +322 -0
 - data/lib/taxonifi.rb +36 -0
 - data/test/file_fixtures/Lygaeoidea.csv +801 -0
 - data/test/helper.rb +38 -0
 - data/test/test_exporter.rb +32 -0
 - data/test/test_lumper_geogs.rb +59 -0
 - data/test/test_lumper_hierarchical_collection.rb +88 -0
 - data/test/test_lumper_names.rb +119 -0
 - data/test/test_lumper_parent_child_name_collection.rb +41 -0
 - data/test/test_lumper_refs.rb +91 -0
 - data/test/test_parser.rb +34 -0
 - data/test/test_splitter.rb +27 -0
 - data/test/test_splitter_tokens.rb +403 -0
 - data/test/test_taxonifi.rb +11 -0
 - data/test/test_taxonifi_accessor.rb +61 -0
 - data/test/test_taxonifi_geog.rb +51 -0
 - data/test/test_taxonifi_name.rb +186 -0
 - data/test/test_taxonifi_name_collection.rb +158 -0
 - data/test/test_taxonifi_ref.rb +90 -0
 - data/test/test_taxonifi_ref_collection.rb +69 -0
 - data/test/test_taxonifi_species_name.rb +95 -0
 - metadata +167 -0
 
| 
         @@ -0,0 +1,149 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            module Taxonifi
         
     | 
| 
      
 2 
     | 
    
         
            +
              class NameCollectionError < StandardError; end
         
     | 
| 
      
 3 
     | 
    
         
            +
              module Model
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
                # A collection of taxonomic names. 
         
     | 
| 
      
 6 
     | 
    
         
            +
                class NameCollection < Taxonifi::Model::Collection
         
     | 
| 
      
 7 
     | 
    
         
            +
             
     | 
| 
      
 8 
     | 
    
         
            +
                  attr_accessor :by_name_index
         
     | 
| 
      
 9 
     | 
    
         
            +
                  attr_accessor :ref_collection
         
     | 
| 
      
 10 
     | 
    
         
            +
             
     | 
| 
      
 11 
     | 
    
         
            +
                  def initialize(options = {})
         
     | 
| 
      
 12 
     | 
    
         
            +
                    super 
         
     | 
| 
      
 13 
     | 
    
         
            +
                    @collection = []
         
     | 
| 
      
 14 
     | 
    
         
            +
                    @by_name_index = {}             # "foo => [1,2,3]"
         
     | 
| 
      
 15 
     | 
    
         
            +
                    Taxonifi::RANKS.inject(@by_name_index){|hsh, v| hsh.merge!(v => {})}
         
     | 
| 
      
 16 
     | 
    
         
            +
                    @by_name_index['unknown'] = {} # unranked names get dumped in here
         
     | 
| 
      
 17 
     | 
    
         
            +
                    @ref_collection = nil
         
     | 
| 
      
 18 
     | 
    
         
            +
                    true
         
     | 
| 
      
 19 
     | 
    
         
            +
                  end 
         
     | 
| 
      
 20 
     | 
    
         
            +
             
     | 
| 
      
 21 
     | 
    
         
            +
                  def object_class
         
     | 
| 
      
 22 
     | 
    
         
            +
                    Taxonifi::Model::Name
         
     | 
| 
      
 23 
     | 
    
         
            +
                  end
         
     | 
| 
      
 24 
     | 
    
         
            +
             
     | 
| 
      
 25 
     | 
    
         
            +
                  # Return the highest RANK for which there is no
         
     | 
| 
      
 26 
     | 
    
         
            +
                  # name in this collection.
         
     | 
| 
      
 27 
     | 
    
         
            +
                  def encompassing_rank
         
     | 
| 
      
 28 
     | 
    
         
            +
                    highest = RANKS.size
         
     | 
| 
      
 29 
     | 
    
         
            +
                    @collection.each do |n|
         
     | 
| 
      
 30 
     | 
    
         
            +
                      h = RANKS.index(n.rank)
         
     | 
| 
      
 31 
     | 
    
         
            +
                      highest = h if h < highest
         
     | 
| 
      
 32 
     | 
    
         
            +
                    end
         
     | 
| 
      
 33 
     | 
    
         
            +
                    RANKS[highest - 1]
         
     | 
| 
      
 34 
     | 
    
         
            +
                  end 
         
     | 
| 
      
 35 
     | 
    
         
            +
             
     | 
| 
      
 36 
     | 
    
         
            +
                  # The names objects in the collection at a rank. 
         
     | 
| 
      
 37 
     | 
    
         
            +
                  # TODO: Should index this on add_object
         
     | 
| 
      
 38 
     | 
    
         
            +
                  def names_at_rank(rank)
         
     | 
| 
      
 39 
     | 
    
         
            +
                    raise if !RANKS.include?(rank)
         
     | 
| 
      
 40 
     | 
    
         
            +
                    names = []
         
     | 
| 
      
 41 
     | 
    
         
            +
                    @collection.each do |n|
         
     | 
| 
      
 42 
     | 
    
         
            +
                      names << n if n.rank == rank
         
     | 
| 
      
 43 
     | 
    
         
            +
                    end
         
     | 
| 
      
 44 
     | 
    
         
            +
                    names
         
     | 
| 
      
 45 
     | 
    
         
            +
                  end
         
     | 
| 
      
 46 
     | 
    
         
            +
             
     | 
| 
      
 47 
     | 
    
         
            +
                  # Returns id of matching existing name
         
     | 
| 
      
 48 
     | 
    
         
            +
                  # or false if there i s no match.
         
     | 
| 
      
 49 
     | 
    
         
            +
                  # Matches against name (string) and parents ("identity")
         
     | 
| 
      
 50 
     | 
    
         
            +
                  def name_exists?(name = Taxonifi::Model::Name) 
         
     | 
| 
      
 51 
     | 
    
         
            +
                    # Does the name (string) exist? 
         
     | 
| 
      
 52 
     | 
    
         
            +
                    rank = name.rank.downcase 
         
     | 
| 
      
 53 
     | 
    
         
            +
                    rank ||= 'unknown'
         
     | 
| 
      
 54 
     | 
    
         
            +
                    if by_name_index[rank][name.name]
         
     | 
| 
      
 55 
     | 
    
         
            +
                      # Yes, check to see if parents match
         
     | 
| 
      
 56 
     | 
    
         
            +
                      by_name_index[rank][name.name].each do |id|
         
     | 
| 
      
 57 
     | 
    
         
            +
                        vector = parent_id_vector(id)
         
     | 
| 
      
 58 
     | 
    
         
            +
                        vector.pop
         
     | 
| 
      
 59 
     | 
    
         
            +
                        if vector == parent_id_vector(name.parent.id)
         
     | 
| 
      
 60 
     | 
    
         
            +
                          exists = true
         
     | 
| 
      
 61 
     | 
    
         
            +
                          return id
         
     | 
| 
      
 62 
     | 
    
         
            +
                        end
         
     | 
| 
      
 63 
     | 
    
         
            +
                      end
         
     | 
| 
      
 64 
     | 
    
         
            +
                    end 
         
     | 
| 
      
 65 
     | 
    
         
            +
                    false 
         
     | 
| 
      
 66 
     | 
    
         
            +
                  end
         
     | 
| 
      
 67 
     | 
    
         
            +
             
     | 
| 
      
 68 
     | 
    
         
            +
                  # Add an individaul name object, indexing it.
         
     | 
| 
      
 69 
     | 
    
         
            +
                  def add_object(obj)
         
     | 
| 
      
 70 
     | 
    
         
            +
                    super
         
     | 
| 
      
 71 
     | 
    
         
            +
                    index_by_name(obj)
         
     | 
| 
      
 72 
     | 
    
         
            +
                    obj
         
     | 
| 
      
 73 
     | 
    
         
            +
                  end
         
     | 
| 
      
 74 
     | 
    
         
            +
             
     | 
| 
      
 75 
     | 
    
         
            +
                  # Add an individaul name object, without indexing it. 
         
     | 
| 
      
 76 
     | 
    
         
            +
                  def add_object_pre_indexed(obj)
         
     | 
| 
      
 77 
     | 
    
         
            +
                    super
         
     | 
| 
      
 78 
     | 
    
         
            +
                    index_by_name(obj)
         
     | 
| 
      
 79 
     | 
    
         
            +
                    obj
         
     | 
| 
      
 80 
     | 
    
         
            +
                  end
         
     | 
| 
      
 81 
     | 
    
         
            +
             
     | 
| 
      
 82 
     | 
    
         
            +
                  # Add a Taxonifi::Model::SpeciesName object
         
     | 
| 
      
 83 
     | 
    
         
            +
                  # as individual objects.
         
     | 
| 
      
 84 
     | 
    
         
            +
                  def add_species_name(sn)
         
     | 
| 
      
 85 
     | 
    
         
            +
                    raise "Failed trying to load [#{sn.display_name}]. SpeciesName#genus#parent must be set before using add_species_name." if sn.genus.parent.nil?
         
     | 
| 
      
 86 
     | 
    
         
            +
                    current_parent_id = sn.genus.parent.id 
         
     | 
| 
      
 87 
     | 
    
         
            +
                    sn.names.each do |o|
         
     | 
| 
      
 88 
     | 
    
         
            +
                      o.parent = object_by_id(current_parent_id)
         
     | 
| 
      
 89 
     | 
    
         
            +
                      if id = name_exists?(o)
         
     | 
| 
      
 90 
     | 
    
         
            +
                        cp_id = id 
         
     | 
| 
      
 91 
     | 
    
         
            +
                      else
         
     | 
| 
      
 92 
     | 
    
         
            +
                        add_object(o)
         
     | 
| 
      
 93 
     | 
    
         
            +
                        cp_id = o.id
         
     | 
| 
      
 94 
     | 
    
         
            +
                      end
         
     | 
| 
      
 95 
     | 
    
         
            +
                      current_parent_id = cp_id
         
     | 
| 
      
 96 
     | 
    
         
            +
                    end
         
     | 
| 
      
 97 
     | 
    
         
            +
                    current_parent_id # return the id of the last name created
         
     | 
| 
      
 98 
     | 
    
         
            +
                  end
         
     | 
| 
      
 99 
     | 
    
         
            +
             
     | 
| 
      
 100 
     | 
    
         
            +
                  # As #add_species_name but do
         
     | 
| 
      
 101 
     | 
    
         
            +
                  # not assign ids to the incoming names
         
     | 
| 
      
 102 
     | 
    
         
            +
                  # TODO: deprecate?
         
     | 
| 
      
 103 
     | 
    
         
            +
                  def add_species_name_unindexed(sn)
         
     | 
| 
      
 104 
     | 
    
         
            +
                    sn.names.each do |o|
         
     | 
| 
      
 105 
     | 
    
         
            +
                      if !name_exists?(o)
         
     | 
| 
      
 106 
     | 
    
         
            +
                        add_object(o)
         
     | 
| 
      
 107 
     | 
    
         
            +
                      end
         
     | 
| 
      
 108 
     | 
    
         
            +
                    end
         
     | 
| 
      
 109 
     | 
    
         
            +
                  end
         
     | 
| 
      
 110 
     | 
    
         
            +
             
     | 
| 
      
 111 
     | 
    
         
            +
                  # Take the author/years of these names and generate a reference collection.
         
     | 
| 
      
 112 
     | 
    
         
            +
                  # Start the ids assigned to the references with initial_id.
         
     | 
| 
      
 113 
     | 
    
         
            +
                  def generate_ref_collection(initial_id = 0)
         
     | 
| 
      
 114 
     | 
    
         
            +
                    rc = Taxonifi::Model::RefCollection.new(:initial_id => initial_id)
         
     | 
| 
      
 115 
     | 
    
         
            +
                    if collection.size > 0
         
     | 
| 
      
 116 
     | 
    
         
            +
                      uniques = collection.inject({}){|hsh, n| hsh.merge!(n.author_year_string => nil)}.keys.compact
         
     | 
| 
      
 117 
     | 
    
         
            +
                      if  uniques.size > 0
         
     | 
| 
      
 118 
     | 
    
         
            +
                        uniques.sort.each_with_index do |r, i|
         
     | 
| 
      
 119 
     | 
    
         
            +
                          next if r.size == 0
         
     | 
| 
      
 120 
     | 
    
         
            +
                          ref = Taxonifi::Model::Ref.new(:author_year => r)        
         
     | 
| 
      
 121 
     | 
    
         
            +
                          rc.add_object(ref)
         
     | 
| 
      
 122 
     | 
    
         
            +
                        end
         
     | 
| 
      
 123 
     | 
    
         
            +
                      end
         
     | 
| 
      
 124 
     | 
    
         
            +
                    end
         
     | 
| 
      
 125 
     | 
    
         
            +
                    @ref_collection = rc 
         
     | 
| 
      
 126 
     | 
    
         
            +
                  end
         
     | 
| 
      
 127 
     | 
    
         
            +
             
     | 
| 
      
 128 
     | 
    
         
            +
                  # Assign a reference collection to this name collection. 
         
     | 
| 
      
 129 
     | 
    
         
            +
                  # !! Overwrites existing reference collection, including ones built
         
     | 
| 
      
 130 
     | 
    
         
            +
                  # using generate_ref_collection. 
         
     | 
| 
      
 131 
     | 
    
         
            +
                  def ref_collection=(ref_collection)
         
     | 
| 
      
 132 
     | 
    
         
            +
                    @ref_collection = ref_collection if ref_collection.class == Taxonifi::Model::RefCollection
         
     | 
| 
      
 133 
     | 
    
         
            +
                  end
         
     | 
| 
      
 134 
     | 
    
         
            +
             
     | 
| 
      
 135 
     | 
    
         
            +
                  protected
         
     | 
| 
      
 136 
     | 
    
         
            +
             
     | 
| 
      
 137 
     | 
    
         
            +
                  # Index the object by name into the
         
     | 
| 
      
 138 
     | 
    
         
            +
                  # @by_name_index variable (this looks like:
         
     | 
| 
      
 139 
     | 
    
         
            +
                  #  {"Foo bar" => [1,2,93]})
         
     | 
| 
      
 140 
     | 
    
         
            +
                  def index_by_name(obj)
         
     | 
| 
      
 141 
     | 
    
         
            +
                    rank = obj.rank
         
     | 
| 
      
 142 
     | 
    
         
            +
                    rank ||= 'unknown'
         
     | 
| 
      
 143 
     | 
    
         
            +
                    by_name_index[rank][obj.name] ||= [] 
         
     | 
| 
      
 144 
     | 
    
         
            +
                    by_name_index[rank][obj.name].push obj.id 
         
     | 
| 
      
 145 
     | 
    
         
            +
                  end
         
     | 
| 
      
 146 
     | 
    
         
            +
             
     | 
| 
      
 147 
     | 
    
         
            +
                end
         
     | 
| 
      
 148 
     | 
    
         
            +
              end
         
     | 
| 
      
 149 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,49 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require File.expand_path(File.join(File.dirname(__FILE__), "../models/base.rb"))
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            module Taxonifi
         
     | 
| 
      
 4 
     | 
    
         
            +
              module Model
         
     | 
| 
      
 5 
     | 
    
         
            +
             
     | 
| 
      
 6 
     | 
    
         
            +
                # Simple Person class. 
         
     | 
| 
      
 7 
     | 
    
         
            +
                # You can store multiple initials and suffixes.
         
     | 
| 
      
 8 
     | 
    
         
            +
                class Person < Taxonifi::Model::Base
         
     | 
| 
      
 9 
     | 
    
         
            +
                  ATTRIBUTES = [
         
     | 
| 
      
 10 
     | 
    
         
            +
                    :first_name,
         
     | 
| 
      
 11 
     | 
    
         
            +
                    :last_name,
         
     | 
| 
      
 12 
     | 
    
         
            +
                    :initials,    # an Array, no periods.
         
     | 
| 
      
 13 
     | 
    
         
            +
                    :suffix       # an Array
         
     | 
| 
      
 14 
     | 
    
         
            +
                  ]
         
     | 
| 
      
 15 
     | 
    
         
            +
                  
         
     | 
| 
      
 16 
     | 
    
         
            +
                  ATTRIBUTES.each do |a|
         
     | 
| 
      
 17 
     | 
    
         
            +
                    attr_accessor a
         
     | 
| 
      
 18 
     | 
    
         
            +
                  end
         
     | 
| 
      
 19 
     | 
    
         
            +
             
     | 
| 
      
 20 
     | 
    
         
            +
                  def initialize(options = {})
         
     | 
| 
      
 21 
     | 
    
         
            +
                    opts = {
         
     | 
| 
      
 22 
     | 
    
         
            +
                    }.merge!(options)
         
     | 
| 
      
 23 
     | 
    
         
            +
                    # Check for valid opts prior to building
         
     | 
| 
      
 24 
     | 
    
         
            +
                    build(ATTRIBUTES, opts)
         
     | 
| 
      
 25 
     | 
    
         
            +
                    true
         
     | 
| 
      
 26 
     | 
    
         
            +
                  end
         
     | 
| 
      
 27 
     | 
    
         
            +
             
     | 
| 
      
 28 
     | 
    
         
            +
                  # Returns a string with data delimited by pipes.
         
     | 
| 
      
 29 
     | 
    
         
            +
                  # Used in identity comparisons.
         
     | 
| 
      
 30 
     | 
    
         
            +
                  def compact_string
         
     | 
| 
      
 31 
     | 
    
         
            +
                    s = [ATTRIBUTES.sort.collect{|a| send(a)}].join("|").downcase.gsub(/\s/, '')
         
     | 
| 
      
 32 
     | 
    
         
            +
                  end
         
     | 
| 
      
 33 
     | 
    
         
            +
             
     | 
| 
      
 34 
     | 
    
         
            +
                  # Nothing fancy, just the data.
         
     | 
| 
      
 35 
     | 
    
         
            +
                  def display_name
         
     | 
| 
      
 36 
     | 
    
         
            +
                    [@last_name, @first_name, @initials, @suffix].compact.flatten.join(" ")
         
     | 
| 
      
 37 
     | 
    
         
            +
                  end
         
     | 
| 
      
 38 
     | 
    
         
            +
             
     | 
| 
      
 39 
     | 
    
         
            +
                  # Return a string representing the initials, periods added.
         
     | 
| 
      
 40 
     | 
    
         
            +
                  def initials_string
         
     | 
| 
      
 41 
     | 
    
         
            +
                    if @initials.nil? 
         
     | 
| 
      
 42 
     | 
    
         
            +
                      nil
         
     | 
| 
      
 43 
     | 
    
         
            +
                    else 
         
     | 
| 
      
 44 
     | 
    
         
            +
                      @initials.join(".") + "." 
         
     | 
| 
      
 45 
     | 
    
         
            +
                    end 
         
     | 
| 
      
 46 
     | 
    
         
            +
                  end
         
     | 
| 
      
 47 
     | 
    
         
            +
                end
         
     | 
| 
      
 48 
     | 
    
         
            +
              end
         
     | 
| 
      
 49 
     | 
    
         
            +
            end
         
     | 
    
        data/lib/models/ref.rb
    ADDED
    
    | 
         @@ -0,0 +1,85 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            module Taxonifi
         
     | 
| 
      
 2 
     | 
    
         
            +
              class RefError < StandardError; end
         
     | 
| 
      
 3 
     | 
    
         
            +
              module Model
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
                # A basic reference object.  
         
     | 
| 
      
 6 
     | 
    
         
            +
                class Ref < Taxonifi::Model::Base
         
     | 
| 
      
 7 
     | 
    
         
            +
             
     | 
| 
      
 8 
     | 
    
         
            +
                  # These attributes are set automatically on #new()
         
     | 
| 
      
 9 
     | 
    
         
            +
                  ATTRIBUTES = [
         
     | 
| 
      
 10 
     | 
    
         
            +
                    :authors,     
         
     | 
| 
      
 11 
     | 
    
         
            +
                    :title, 
         
     | 
| 
      
 12 
     | 
    
         
            +
                    :year,
         
     | 
| 
      
 13 
     | 
    
         
            +
                    :publication,
         
     | 
| 
      
 14 
     | 
    
         
            +
                    :volume,
         
     | 
| 
      
 15 
     | 
    
         
            +
                    :number,
         
     | 
| 
      
 16 
     | 
    
         
            +
                    :pages,
         
     | 
| 
      
 17 
     | 
    
         
            +
                    :pg_start,
         
     | 
| 
      
 18 
     | 
    
         
            +
                    :pg_end,
         
     | 
| 
      
 19 
     | 
    
         
            +
                    :cited_page,   
         
     | 
| 
      
 20 
     | 
    
         
            +
                    :full_citation
         
     | 
| 
      
 21 
     | 
    
         
            +
                  ]
         
     | 
| 
      
 22 
     | 
    
         
            +
             
     | 
| 
      
 23 
     | 
    
         
            +
                  # Array of Taxonifi::Model::Person   
         
     | 
| 
      
 24 
     | 
    
         
            +
                  attr_accessor :authors      
         
     | 
| 
      
 25 
     | 
    
         
            +
                  # String
         
     | 
| 
      
 26 
     | 
    
         
            +
                  attr_accessor :title 
         
     | 
| 
      
 27 
     | 
    
         
            +
                  # String
         
     | 
| 
      
 28 
     | 
    
         
            +
                  attr_accessor :year
         
     | 
| 
      
 29 
     | 
    
         
            +
                  # String
         
     | 
| 
      
 30 
     | 
    
         
            +
                  attr_accessor :publication
         
     | 
| 
      
 31 
     | 
    
         
            +
                  # String
         
     | 
| 
      
 32 
     | 
    
         
            +
                  attr_accessor :volume
         
     | 
| 
      
 33 
     | 
    
         
            +
                  # String
         
     | 
| 
      
 34 
     | 
    
         
            +
                  attr_accessor :number
         
     | 
| 
      
 35 
     | 
    
         
            +
                  # String.  Anything that doesn't fit in a page range.
         
     | 
| 
      
 36 
     | 
    
         
            +
                  attr_accessor :pages
         
     | 
| 
      
 37 
     | 
    
         
            +
                  # String
         
     | 
| 
      
 38 
     | 
    
         
            +
                  attr_accessor :pg_start
         
     | 
| 
      
 39 
     | 
    
         
            +
                  # String
         
     | 
| 
      
 40 
     | 
    
         
            +
                  attr_accessor :pg_end
         
     | 
| 
      
 41 
     | 
    
         
            +
                  # String.  Some specific page(s) of note.
         
     | 
| 
      
 42 
     | 
    
         
            +
                  attr_accessor :cited_page   
         
     | 
| 
      
 43 
     | 
    
         
            +
                  # String. The full text of the citation, as read from input or assigned, not computed from individual components. 
         
     | 
| 
      
 44 
     | 
    
         
            +
                  attr_accessor :full_citation 
         
     | 
| 
      
 45 
     | 
    
         
            +
                  
         
     | 
| 
      
 46 
     | 
    
         
            +
                  # String. Computed index based on existing Ref#authors and Ref#year
         
     | 
| 
      
 47 
     | 
    
         
            +
                  attr_accessor :author_year_index
         
     | 
| 
      
 48 
     | 
    
         
            +
             
     | 
| 
      
 49 
     | 
    
         
            +
                  # If :author_year is passed it is broken down into People + year. 
         
     | 
| 
      
 50 
     | 
    
         
            +
                  def initialize(options = {})
         
     | 
| 
      
 51 
     | 
    
         
            +
                    opts = {
         
     | 
| 
      
 52 
     | 
    
         
            +
                    }.merge!(options)
         
     | 
| 
      
 53 
     | 
    
         
            +
                    @parent = nil
         
     | 
| 
      
 54 
     | 
    
         
            +
                    build(ATTRIBUTES, opts)
         
     | 
| 
      
 55 
     | 
    
         
            +
                    @authors = [] if @authors.nil?
         
     | 
| 
      
 56 
     | 
    
         
            +
                    raise Taxonifi::RefError, 'If :author_year is provided then authors and year must not be.' if opts[:author_year] && (!opts[:year].nil? || !opts[:authors].nil?)
         
     | 
| 
      
 57 
     | 
    
         
            +
                    add_author_year(opts[:author_year]) if !opts[:author_year].nil? && opts[:author_year].size > 0
         
     | 
| 
      
 58 
     | 
    
         
            +
                    true
         
     | 
| 
      
 59 
     | 
    
         
            +
                  end
         
     | 
| 
      
 60 
     | 
    
         
            +
             
     | 
| 
      
 61 
     | 
    
         
            +
                  def add_author_year(string)
         
     | 
| 
      
 62 
     | 
    
         
            +
                    auth_yr = Taxonifi::Splitter::Builder.build_author_year(string)
         
     | 
| 
      
 63 
     | 
    
         
            +
                    @year = auth_yr.year
         
     | 
| 
      
 64 
     | 
    
         
            +
                    @authors = auth_yr.people
         
     | 
| 
      
 65 
     | 
    
         
            +
                  end
         
     | 
| 
      
 66 
     | 
    
         
            +
             
     | 
| 
      
 67 
     | 
    
         
            +
                  # Returns a pipe delimited representation of the reference.
         
     | 
| 
      
 68 
     | 
    
         
            +
                  def compact_string
         
     | 
| 
      
 69 
     | 
    
         
            +
                    s = [authors.collect{|a| a.compact_string}.join, year, self.title, publication, volume, number, pages, pg_start, pg_end, cited_page].join("|").downcase.gsub(/\s/, '')
         
     | 
| 
      
 70 
     | 
    
         
            +
                    s
         
     | 
| 
      
 71 
     | 
    
         
            +
                  end
         
     | 
| 
      
 72 
     | 
    
         
            +
             
     | 
| 
      
 73 
     | 
    
         
            +
                  # Return a by author_year index.
         
     | 
| 
      
 74 
     | 
    
         
            +
                  def author_year_index
         
     | 
| 
      
 75 
     | 
    
         
            +
                    @author_year_index ||= generate_author_year_index
         
     | 
| 
      
 76 
     | 
    
         
            +
                  end
         
     | 
| 
      
 77 
     | 
    
         
            +
             
     | 
| 
      
 78 
     | 
    
         
            +
                  # (re-) generate the author year index.
         
     | 
| 
      
 79 
     | 
    
         
            +
                  def generate_author_year_index
         
     | 
| 
      
 80 
     | 
    
         
            +
                    @author_year_index = Taxonifi::Model::AuthorYear.new(people: @authors, year: @year).compact_index
         
     | 
| 
      
 81 
     | 
    
         
            +
                  end
         
     | 
| 
      
 82 
     | 
    
         
            +
             
     | 
| 
      
 83 
     | 
    
         
            +
                end
         
     | 
| 
      
 84 
     | 
    
         
            +
              end
         
     | 
| 
      
 85 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,106 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            module Taxonifi
         
     | 
| 
      
 2 
     | 
    
         
            +
              class RefCollectionError < StandardError; end
         
     | 
| 
      
 3 
     | 
    
         
            +
             
     | 
| 
      
 4 
     | 
    
         
            +
              module Model
         
     | 
| 
      
 5 
     | 
    
         
            +
             
     | 
| 
      
 6 
     | 
    
         
            +
                # A collection of references.
         
     | 
| 
      
 7 
     | 
    
         
            +
                class RefCollection < Taxonifi::Model::Collection
         
     | 
| 
      
 8 
     | 
    
         
            +
             
     | 
| 
      
 9 
     | 
    
         
            +
                  # An options index when there is one reference per row.
         
     | 
| 
      
 10 
     | 
    
         
            +
                  attr_accessor :row_index
         
     | 
| 
      
 11 
     | 
    
         
            +
                 
         
     | 
| 
      
 12 
     | 
    
         
            +
                  # Points a Ref#id to an array of Person#ids.  
         
     | 
| 
      
 13 
     | 
    
         
            +
                  # Built on request.
         
     | 
| 
      
 14 
     | 
    
         
            +
                  attr_accessor :author_index 
         
     | 
| 
      
 15 
     | 
    
         
            +
             
     | 
| 
      
 16 
     | 
    
         
            +
                  def initialize(options = {})
         
     | 
| 
      
 17 
     | 
    
         
            +
                    super
         
     | 
| 
      
 18 
     | 
    
         
            +
                    @row_index = []
         
     | 
| 
      
 19 
     | 
    
         
            +
                    @author_index = {}
         
     | 
| 
      
 20 
     | 
    
         
            +
                    true
         
     | 
| 
      
 21 
     | 
    
         
            +
                  end 
         
     | 
| 
      
 22 
     | 
    
         
            +
             
     | 
| 
      
 23 
     | 
    
         
            +
                  # The instance collection class.
         
     | 
| 
      
 24 
     | 
    
         
            +
                  def object_class
         
     | 
| 
      
 25 
     | 
    
         
            +
                    Taxonifi::Model::Ref  
         
     | 
| 
      
 26 
     | 
    
         
            +
                  end
         
     | 
| 
      
 27 
     | 
    
         
            +
                    
         
     | 
| 
      
 28 
     | 
    
         
            +
                  # The object at a given row.
         
     | 
| 
      
 29 
     | 
    
         
            +
                  # TODO: inherit from Collection? 
         
     | 
| 
      
 30 
     | 
    
         
            +
                  def object_from_row(row_number)
         
     | 
| 
      
 31 
     | 
    
         
            +
                    @row_index[row_number]
         
     | 
| 
      
 32 
     | 
    
         
            +
                  end
         
     | 
| 
      
 33 
     | 
    
         
            +
             
     | 
| 
      
 34 
     | 
    
         
            +
                  # Incrementally (re-)assigns the id of every associated author (Person) 
         
     | 
| 
      
 35 
     | 
    
         
            +
                  # This is only really useful if you assume every author is unique.
         
     | 
| 
      
 36 
     | 
    
         
            +
                  def enumerate_authors(initial_id = 0)
         
     | 
| 
      
 37 
     | 
    
         
            +
                    i = initial_id 
         
     | 
| 
      
 38 
     | 
    
         
            +
                    collection.each do |r|
         
     | 
| 
      
 39 
     | 
    
         
            +
                      r.authors.each do |a|
         
     | 
| 
      
 40 
     | 
    
         
            +
                        a.id = i
         
     | 
| 
      
 41 
     | 
    
         
            +
                        i += 1
         
     | 
| 
      
 42 
     | 
    
         
            +
                      end
         
     | 
| 
      
 43 
     | 
    
         
            +
                    end
         
     | 
| 
      
 44 
     | 
    
         
            +
                  end
         
     | 
| 
      
 45 
     | 
    
         
            +
             
     | 
| 
      
 46 
     | 
    
         
            +
                  # Finds unique authors, and combines them, then 
         
     | 
| 
      
 47 
     | 
    
         
            +
                  # rebuilds author lists using references to the new unique set.
         
     | 
| 
      
 48 
     | 
    
         
            +
                  def uniquify_authors(initial_id = 0)
         
     | 
| 
      
 49 
     | 
    
         
            +
                    auth_index = {}
         
     | 
| 
      
 50 
     | 
    
         
            +
                    unique_authors.each_with_index do |a, i|
         
     | 
| 
      
 51 
     | 
    
         
            +
                      a.id = i + initial_id
         
     | 
| 
      
 52 
     | 
    
         
            +
                      auth_index.merge!(a.compact_string => a)
         
     | 
| 
      
 53 
     | 
    
         
            +
                    end
         
     | 
| 
      
 54 
     | 
    
         
            +
                    
         
     | 
| 
      
 55 
     | 
    
         
            +
                    collection.each do |r|
         
     | 
| 
      
 56 
     | 
    
         
            +
                      new_authors = []
         
     | 
| 
      
 57 
     | 
    
         
            +
                      r.authors.inject(new_authors){|ary, a| ary.push(auth_index[a.compact_string])}
         
     | 
| 
      
 58 
     | 
    
         
            +
                      r.authors = new_authors
         
     | 
| 
      
 59 
     | 
    
         
            +
                    end
         
     | 
| 
      
 60 
     | 
    
         
            +
                    true 
         
     | 
| 
      
 61 
     | 
    
         
            +
                  end
         
     | 
| 
      
 62 
     | 
    
         
            +
             
     | 
| 
      
 63 
     | 
    
         
            +
                  # Build the author index. 
         
     | 
| 
      
 64 
     | 
    
         
            +
                  #   {Ref#id => [a1#id, ... an#id]}
         
     | 
| 
      
 65 
     | 
    
         
            +
                  def build_author_index
         
     | 
| 
      
 66 
     | 
    
         
            +
                    collection.each do |r|
         
     | 
| 
      
 67 
     | 
    
         
            +
                      @author_index.merge!(r.id => r.authors.collect{|a| a.id ? a.id : -1})
         
     | 
| 
      
 68 
     | 
    
         
            +
                    end
         
     | 
| 
      
 69 
     | 
    
         
            +
                  end
         
     | 
| 
      
 70 
     | 
    
         
            +
             
     | 
| 
      
 71 
     | 
    
         
            +
                  # Return an array the unique author strings in this collection.
         
     | 
| 
      
 72 
     | 
    
         
            +
                  def unique_author_strings
         
     | 
| 
      
 73 
     | 
    
         
            +
                    auths = {}
         
     | 
| 
      
 74 
     | 
    
         
            +
                    collection.each do |r|
         
     | 
| 
      
 75 
     | 
    
         
            +
                      r.authors.each do |a|
         
     | 
| 
      
 76 
     | 
    
         
            +
                        auths.merge!(a.display_name => nil)
         
     | 
| 
      
 77 
     | 
    
         
            +
                      end
         
     | 
| 
      
 78 
     | 
    
         
            +
                    end
         
     | 
| 
      
 79 
     | 
    
         
            +
                    auths.keys.sort
         
     | 
| 
      
 80 
     | 
    
         
            +
                  end
         
     | 
| 
      
 81 
     | 
    
         
            +
             
     | 
| 
      
 82 
     | 
    
         
            +
                  # Returns Array of Taxonifi::Model::Person
         
     | 
| 
      
 83 
     | 
    
         
            +
                  # Will need better indexing on big lists?
         
     | 
| 
      
 84 
     | 
    
         
            +
                  def unique_authors
         
     | 
| 
      
 85 
     | 
    
         
            +
                    auths = []
         
     | 
| 
      
 86 
     | 
    
         
            +
                    collection.each do |r|
         
     | 
| 
      
 87 
     | 
    
         
            +
                      r.authors.each do |a|
         
     | 
| 
      
 88 
     | 
    
         
            +
                        found = false
         
     | 
| 
      
 89 
     | 
    
         
            +
                        auths.each do |x|
         
     | 
| 
      
 90 
     | 
    
         
            +
                          if a.identical?(x)
         
     | 
| 
      
 91 
     | 
    
         
            +
                            found = true 
         
     | 
| 
      
 92 
     | 
    
         
            +
                            next           
         
     | 
| 
      
 93 
     | 
    
         
            +
                          end
         
     | 
| 
      
 94 
     | 
    
         
            +
                        end
         
     | 
| 
      
 95 
     | 
    
         
            +
                        if not found
         
     | 
| 
      
 96 
     | 
    
         
            +
                          auths.push a.clone
         
     | 
| 
      
 97 
     | 
    
         
            +
                        end
         
     | 
| 
      
 98 
     | 
    
         
            +
                      end
         
     | 
| 
      
 99 
     | 
    
         
            +
                    end
         
     | 
| 
      
 100 
     | 
    
         
            +
                    auths
         
     | 
| 
      
 101 
     | 
    
         
            +
                  end
         
     | 
| 
      
 102 
     | 
    
         
            +
             
     | 
| 
      
 103 
     | 
    
         
            +
                end
         
     | 
| 
      
 104 
     | 
    
         
            +
              end
         
     | 
| 
      
 105 
     | 
    
         
            +
             
     | 
| 
      
 106 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,85 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            module Taxonifi
         
     | 
| 
      
 2 
     | 
    
         
            +
              class SpeciesNameError < StandardError; end
         
     | 
| 
      
 3 
     | 
    
         
            +
              module Model
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
                # The species name model is just a pointer to 5 Taxonifi::Model::Names. 
         
     | 
| 
      
 6 
     | 
    
         
            +
                # The various metadata (author, year, original combination) is stored with the individual 
         
     | 
| 
      
 7 
     | 
    
         
            +
                # instances of those names.
         
     | 
| 
      
 8 
     | 
    
         
            +
                # Taxonifi::Model::Names have no ids!  
         
     | 
| 
      
 9 
     | 
    
         
            +
             
     | 
| 
      
 10 
     | 
    
         
            +
                class SpeciesName < Taxonifi::Model::Base
         
     | 
| 
      
 11 
     | 
    
         
            +
                  ATTRIBUTES = [:genus, :subgenus, :species, :subspecies, :parent]
         
     | 
| 
      
 12 
     | 
    
         
            +
                  ATTRIBUTES.each do |a|
         
     | 
| 
      
 13 
     | 
    
         
            +
                    attr_accessor a
         
     | 
| 
      
 14 
     | 
    
         
            +
                  end
         
     | 
| 
      
 15 
     | 
    
         
            +
             
     | 
| 
      
 16 
     | 
    
         
            +
                  def initialize(options = {})
         
     | 
| 
      
 17 
     | 
    
         
            +
                    opts = {
         
     | 
| 
      
 18 
     | 
    
         
            +
                    }.merge!(options)
         
     | 
| 
      
 19 
     | 
    
         
            +
                    build(ATTRIBUTES, opts)
         
     | 
| 
      
 20 
     | 
    
         
            +
                    true
         
     | 
| 
      
 21 
     | 
    
         
            +
                  end 
         
     | 
| 
      
 22 
     | 
    
         
            +
             
     | 
| 
      
 23 
     | 
    
         
            +
                  # Set the genus name.
         
     | 
| 
      
 24 
     | 
    
         
            +
                  def genus=(genus)
         
     | 
| 
      
 25 
     | 
    
         
            +
                    @genus = genus
         
     | 
| 
      
 26 
     | 
    
         
            +
                  end
         
     | 
| 
      
 27 
     | 
    
         
            +
             
     | 
| 
      
 28 
     | 
    
         
            +
                  # Set the subgenus name.
         
     | 
| 
      
 29 
     | 
    
         
            +
                  def subgenus=(subgenus)
         
     | 
| 
      
 30 
     | 
    
         
            +
                    raise Taxonifi::SpeciesNameError, "Species name must have a Genus name before subgenus can be assigned" if @genus.nil?
         
     | 
| 
      
 31 
     | 
    
         
            +
                    @subgenus = subgenus
         
     | 
| 
      
 32 
     | 
    
         
            +
                    @subgenus.parent = @genus
         
     | 
| 
      
 33 
     | 
    
         
            +
                  end
         
     | 
| 
      
 34 
     | 
    
         
            +
             
     | 
| 
      
 35 
     | 
    
         
            +
                  # Set the species name.
         
     | 
| 
      
 36 
     | 
    
         
            +
                  def species=(species)
         
     | 
| 
      
 37 
     | 
    
         
            +
                    raise Taxonifi::SpeciesNameError, "Species name must have a Genus name before species can be assigned" if @genus.nil?
         
     | 
| 
      
 38 
     | 
    
         
            +
                    @species = species 
         
     | 
| 
      
 39 
     | 
    
         
            +
                    @species.parent = (@subgenus ? @subgenus : @genus)
         
     | 
| 
      
 40 
     | 
    
         
            +
                  end
         
     | 
| 
      
 41 
     | 
    
         
            +
             
     | 
| 
      
 42 
     | 
    
         
            +
                  # Set the subspecies name.
         
     | 
| 
      
 43 
     | 
    
         
            +
                  def subspecies=(subspecies)
         
     | 
| 
      
 44 
     | 
    
         
            +
                    raise Taxonifi::SpeciesNameError, "Subspecies name must have a species name before species can be assigned" if @species.nil?
         
     | 
| 
      
 45 
     | 
    
         
            +
                    @subspecies = subspecies 
         
     | 
| 
      
 46 
     | 
    
         
            +
                    @subspecies.parent = @species
         
     | 
| 
      
 47 
     | 
    
         
            +
                  end
         
     | 
| 
      
 48 
     | 
    
         
            +
             
     | 
| 
      
 49 
     | 
    
         
            +
                  # Set the parent name.
         
     | 
| 
      
 50 
     | 
    
         
            +
                  def parent=(parent)
         
     | 
| 
      
 51 
     | 
    
         
            +
                    if parent.class != Taxonifi::Model::Name
         
     | 
| 
      
 52 
     | 
    
         
            +
                      raise SpeciesNameError, "Parent is not a Taxonifi::Model::Name."
         
     | 
| 
      
 53 
     | 
    
         
            +
                    end
         
     | 
| 
      
 54 
     | 
    
         
            +
             
     | 
| 
      
 55 
     | 
    
         
            +
                    if parent.rank.nil? ||  (Taxonifi::RANKS.index('genus') <= Taxonifi::RANKS.index(parent.rank))
         
     | 
| 
      
 56 
     | 
    
         
            +
                      raise Taxonifi::SpeciesNameError, "Parents of SpeciesNames must have rank higher than Genus."
         
     | 
| 
      
 57 
     | 
    
         
            +
                    end
         
     | 
| 
      
 58 
     | 
    
         
            +
             
     | 
| 
      
 59 
     | 
    
         
            +
                    @parent = parent
         
     | 
| 
      
 60 
     | 
    
         
            +
                  end
         
     | 
| 
      
 61 
     | 
    
         
            +
             
     | 
| 
      
 62 
     | 
    
         
            +
                  # Return an array of Name objects.
         
     | 
| 
      
 63 
     | 
    
         
            +
                  def names
         
     | 
| 
      
 64 
     | 
    
         
            +
                    ATTRIBUTES.collect{|a| self.send(a)}.compact 
         
     | 
| 
      
 65 
     | 
    
         
            +
                  end
         
     | 
| 
      
 66 
     | 
    
         
            +
             
     | 
| 
      
 67 
     | 
    
         
            +
                  # Return a string representation of the species name.
         
     | 
| 
      
 68 
     | 
    
         
            +
                  def display_name
         
     | 
| 
      
 69 
     | 
    
         
            +
                    strs = [] 
         
     | 
| 
      
 70 
     | 
    
         
            +
                    self.names.each do |n|
         
     | 
| 
      
 71 
     | 
    
         
            +
                      case n.rank
         
     | 
| 
      
 72 
     | 
    
         
            +
                      when 'subgenus'
         
     | 
| 
      
 73 
     | 
    
         
            +
                        strs.push "(#{n.name})"
         
     | 
| 
      
 74 
     | 
    
         
            +
                      else
         
     | 
| 
      
 75 
     | 
    
         
            +
                        strs.push n.name 
         
     | 
| 
      
 76 
     | 
    
         
            +
                      end
         
     | 
| 
      
 77 
     | 
    
         
            +
                    end
         
     | 
| 
      
 78 
     | 
    
         
            +
                    strs.push self.names.last.author_year
         
     | 
| 
      
 79 
     | 
    
         
            +
                    txt = strs.compact.join(" ")  
         
     | 
| 
      
 80 
     | 
    
         
            +
                    txt
         
     | 
| 
      
 81 
     | 
    
         
            +
                  end
         
     | 
| 
      
 82 
     | 
    
         
            +
                end
         
     | 
| 
      
 83 
     | 
    
         
            +
              end
         
     | 
| 
      
 84 
     | 
    
         
            +
            end
         
     | 
| 
      
 85 
     | 
    
         
            +
             
     | 
| 
         @@ -0,0 +1,26 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # Builder functionality for parsing/lexing framework. 
         
     | 
| 
      
 2 
     | 
    
         
            +
            module Taxonifi::Splitter::Builder
         
     | 
| 
      
 3 
     | 
    
         
            +
             
     | 
| 
      
 4 
     | 
    
         
            +
                # Load all builders (= models)
         
     | 
| 
      
 5 
     | 
    
         
            +
                #  TODO: perhaps use a different scope that doesn't require loading all at once
         
     | 
| 
      
 6 
     | 
    
         
            +
                Dir.glob( File.expand_path(File.join(File.dirname(__FILE__), "../models/*.rb") )) do |file|
         
     | 
| 
      
 7 
     | 
    
         
            +
                  require file
         
     | 
| 
      
 8 
     | 
    
         
            +
                end
         
     | 
| 
      
 9 
     | 
    
         
            +
             
     | 
| 
      
 10 
     | 
    
         
            +
                # Build and return Taxonifi::Model::AuthorYear from a string.
         
     | 
| 
      
 11 
     | 
    
         
            +
                def self.build_author_year(text)
         
     | 
| 
      
 12 
     | 
    
         
            +
                  lexer = Taxonifi::Splitter::Lexer.new(text)
         
     | 
| 
      
 13 
     | 
    
         
            +
                  builder = Taxonifi::Model::AuthorYear.new
         
     | 
| 
      
 14 
     | 
    
         
            +
                  Taxonifi::Splitter::Parser.new(lexer, builder).parse_author_year
         
     | 
| 
      
 15 
     | 
    
         
            +
                  builder
         
     | 
| 
      
 16 
     | 
    
         
            +
                end
         
     | 
| 
      
 17 
     | 
    
         
            +
             
     | 
| 
      
 18 
     | 
    
         
            +
                # Build and return Taxonifi::Model::SpeciesName from a string.
         
     | 
| 
      
 19 
     | 
    
         
            +
                def self.build_species_name(text)
         
     | 
| 
      
 20 
     | 
    
         
            +
                  lexer = Taxonifi::Splitter::Lexer.new(text, :species_name)
         
     | 
| 
      
 21 
     | 
    
         
            +
                  builder = Taxonifi::Model::SpeciesName.new
         
     | 
| 
      
 22 
     | 
    
         
            +
                  Taxonifi::Splitter::Parser.new(lexer, builder).parse_species_name
         
     | 
| 
      
 23 
     | 
    
         
            +
                  builder
         
     | 
| 
      
 24 
     | 
    
         
            +
                end
         
     | 
| 
      
 25 
     | 
    
         
            +
             
     | 
| 
      
 26 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,70 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            #
         
     | 
| 
      
 2 
     | 
    
         
            +
            # Lexer taken verbatim from OboParser and other mjy gems.  
         
     | 
| 
      
 3 
     | 
    
         
            +
            #
         
     | 
| 
      
 4 
     | 
    
         
            +
            class Taxonifi::Splitter::Lexer
         
     | 
| 
      
 5 
     | 
    
         
            +
              attr_reader :input, :token_list
         
     | 
| 
      
 6 
     | 
    
         
            +
              def initialize(input, token_list = nil)
         
     | 
| 
      
 7 
     | 
    
         
            +
             
     | 
| 
      
 8 
     | 
    
         
            +
                raise Taxonifi::Splitter::SplitterError, "Invalid token list passed to Lexer." if (!token_list.nil? && !Taxonifi::Splitter::TOKEN_LISTS.include?(token_list)  )
         
     | 
| 
      
 9 
     | 
    
         
            +
                token_list = :global_token_list if token_list.nil?
         
     | 
| 
      
 10 
     | 
    
         
            +
             
     | 
| 
      
 11 
     | 
    
         
            +
                @input = input
         
     | 
| 
      
 12 
     | 
    
         
            +
                @token_list = token_list 
         
     | 
| 
      
 13 
     | 
    
         
            +
                @next_token = nil
         
     | 
| 
      
 14 
     | 
    
         
            +
              end
         
     | 
| 
      
 15 
     | 
    
         
            +
             
     | 
| 
      
 16 
     | 
    
         
            +
              # Checks whether the next token is of the specified class. 
         
     | 
| 
      
 17 
     | 
    
         
            +
              def peek(token_class, token_list = nil)
         
     | 
| 
      
 18 
     | 
    
         
            +
                token = read_next_token(token_class)
         
     | 
| 
      
 19 
     | 
    
         
            +
                return token.class == token_class
         
     | 
| 
      
 20 
     | 
    
         
            +
              end
         
     | 
| 
      
 21 
     | 
    
         
            +
             
     | 
| 
      
 22 
     | 
    
         
            +
              # Return (and delete) the next token from the input stream, or raise an exception
         
     | 
| 
      
 23 
     | 
    
         
            +
              # if the next token is not of the given class.
         
     | 
| 
      
 24 
     | 
    
         
            +
              def pop(token_class)
         
     | 
| 
      
 25 
     | 
    
         
            +
                token = read_next_token(token_class)
         
     | 
| 
      
 26 
     | 
    
         
            +
                @next_token = nil
         
     | 
| 
      
 27 
     | 
    
         
            +
                if token.class != token_class
         
     | 
| 
      
 28 
     | 
    
         
            +
                  raise(Taxonifi::Splitter::SplitterError, "expected #{token_class.to_s} but received #{token.class.to_s} at #{@input[0..10]}...", caller)
         
     | 
| 
      
 29 
     | 
    
         
            +
                else
         
     | 
| 
      
 30 
     | 
    
         
            +
                  return token
         
     | 
| 
      
 31 
     | 
    
         
            +
                end
         
     | 
| 
      
 32 
     | 
    
         
            +
              end
         
     | 
| 
      
 33 
     | 
    
         
            +
             
     | 
| 
      
 34 
     | 
    
         
            +
              private
         
     | 
| 
      
 35 
     | 
    
         
            +
              
         
     | 
| 
      
 36 
     | 
    
         
            +
              # Read (and store) the next token from the input, if it has not already been read.
         
     | 
| 
      
 37 
     | 
    
         
            +
              def read_next_token(token_class)
         
     | 
| 
      
 38 
     | 
    
         
            +
                if @next_token
         
     | 
| 
      
 39 
     | 
    
         
            +
                  return @next_token
         
     | 
| 
      
 40 
     | 
    
         
            +
                else
         
     | 
| 
      
 41 
     | 
    
         
            +
                  # check for a match on the specified class first
         
     | 
| 
      
 42 
     | 
    
         
            +
                  if match(token_class)
         
     | 
| 
      
 43 
     | 
    
         
            +
                    return @next_token
         
     | 
| 
      
 44 
     | 
    
         
            +
                  else
         
     | 
| 
      
 45 
     | 
    
         
            +
                    # now check all the tokens for a match
         
     | 
| 
      
 46 
     | 
    
         
            +
                    Taxonifi::Splitter::Tokens.send(@token_list).each {|t|
         
     | 
| 
      
 47 
     | 
    
         
            +
                      return @next_token if match(t)
         
     | 
| 
      
 48 
     | 
    
         
            +
                    }
         
     | 
| 
      
 49 
     | 
    
         
            +
                  end
         
     | 
| 
      
 50 
     | 
    
         
            +
                  # no match, either end of string or lex-error
         
     | 
| 
      
 51 
     | 
    
         
            +
                  if @input != ''
         
     | 
| 
      
 52 
     | 
    
         
            +
                    raise(Taxonifi::Splitter::SplitterError, "Lexer Error, unknown token at |#{@input[0..20]}...", caller)
         
     | 
| 
      
 53 
     | 
    
         
            +
                  else
         
     | 
| 
      
 54 
     | 
    
         
            +
                    return nil
         
     | 
| 
      
 55 
     | 
    
         
            +
                  end
         
     | 
| 
      
 56 
     | 
    
         
            +
                end
         
     | 
| 
      
 57 
     | 
    
         
            +
              end
         
     | 
| 
      
 58 
     | 
    
         
            +
             
     | 
| 
      
 59 
     | 
    
         
            +
              # Match a token to the input.
         
     | 
| 
      
 60 
     | 
    
         
            +
              def match(token_class)
         
     | 
| 
      
 61 
     | 
    
         
            +
                if (m = token_class.regexp.match(@input))
         
     | 
| 
      
 62 
     | 
    
         
            +
                  @next_token = token_class.new(m[1])
         
     | 
| 
      
 63 
     | 
    
         
            +
                  @input = @input[m.end(0)..-1]
         
     | 
| 
      
 64 
     | 
    
         
            +
                  return true
         
     | 
| 
      
 65 
     | 
    
         
            +
                else
         
     | 
| 
      
 66 
     | 
    
         
            +
                  return false
         
     | 
| 
      
 67 
     | 
    
         
            +
                end
         
     | 
| 
      
 68 
     | 
    
         
            +
              end
         
     | 
| 
      
 69 
     | 
    
         
            +
             
     | 
| 
      
 70 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,54 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            #
         
     | 
| 
      
 2 
     | 
    
         
            +
            # Parser pattern taken from OboParser and other mjy gems.  
         
     | 
| 
      
 3 
     | 
    
         
            +
            #
         
     | 
| 
      
 4 
     | 
    
         
            +
            # The parser takes a builder and a lexer and does the actual breakdown.
         
     | 
| 
      
 5 
     | 
    
         
            +
            #
         
     | 
| 
      
 6 
     | 
    
         
            +
            class Taxonifi::Splitter::Parser
         
     | 
| 
      
 7 
     | 
    
         
            +
              def initialize(lexer, builder )
         
     | 
| 
      
 8 
     | 
    
         
            +
                @lexer = lexer
         
     | 
| 
      
 9 
     | 
    
         
            +
                @builder = builder
         
     | 
| 
      
 10 
     | 
    
         
            +
              end
         
     | 
| 
      
 11 
     | 
    
         
            +
             
     | 
| 
      
 12 
     | 
    
         
            +
              # parse out an author year combination. 
         
     | 
| 
      
 13 
     | 
    
         
            +
              # TODO: This is only indirectly tested in lumper code
         
     | 
| 
      
 14 
     | 
    
         
            +
              def parse_author_year
         
     | 
| 
      
 15 
     | 
    
         
            +
                t = @lexer.pop(Taxonifi::Splitter::Tokens::AuthorYear)
         
     | 
| 
      
 16 
     | 
    
         
            +
             
     | 
| 
      
 17 
     | 
    
         
            +
                lexer = Taxonifi::Splitter::Lexer.new(t.authors)
         
     | 
| 
      
 18 
     | 
    
         
            +
                authors = lexer.pop(Taxonifi::Splitter::Tokens::Authors)
         
     | 
| 
      
 19 
     | 
    
         
            +
             
     | 
| 
      
 20 
     | 
    
         
            +
                # TODO: A people collection?
         
     | 
| 
      
 21 
     | 
    
         
            +
                authors.names.each do |a|
         
     | 
| 
      
 22 
     | 
    
         
            +
                  n = Taxonifi::Model::Person.new()
         
     | 
| 
      
 23 
     | 
    
         
            +
                  n.last_name = a[:last_name]
         
     | 
| 
      
 24 
     | 
    
         
            +
                  n.initials = a[:initials]
         
     | 
| 
      
 25 
     | 
    
         
            +
                  @builder.people.push n 
         
     | 
| 
      
 26 
     | 
    
         
            +
                end
         
     | 
| 
      
 27 
     | 
    
         
            +
             
     | 
| 
      
 28 
     | 
    
         
            +
                @builder.year   = t.year.to_i
         
     | 
| 
      
 29 
     | 
    
         
            +
                @builder.parens = t.parens
         
     | 
| 
      
 30 
     | 
    
         
            +
              end
         
     | 
| 
      
 31 
     | 
    
         
            +
             
     | 
| 
      
 32 
     | 
    
         
            +
              # Parse a species name 
         
     | 
| 
      
 33 
     | 
    
         
            +
              def parse_species_name
         
     | 
| 
      
 34 
     | 
    
         
            +
                t = @lexer.pop(Taxonifi::Splitter::Tokens::Quadrinomial)
         
     | 
| 
      
 35 
     | 
    
         
            +
                ranks = %w{genus subgenus species subspecies}
         
     | 
| 
      
 36 
     | 
    
         
            +
                names = {} 
         
     | 
| 
      
 37 
     | 
    
         
            +
                last_parent = nil
         
     | 
| 
      
 38 
     | 
    
         
            +
                ranks.each do |r|
         
     | 
| 
      
 39 
     | 
    
         
            +
                  names.merge!(r: nil)
         
     | 
| 
      
 40 
     | 
    
         
            +
                  @builder.send("#{r}=", Taxonifi::Model::Name.new(:name => t.send(r), rank: r) ) if t.send(r)
         
     | 
| 
      
 41 
     | 
    
         
            +
                end
         
     | 
| 
      
 42 
     | 
    
         
            +
             
     | 
| 
      
 43 
     | 
    
         
            +
                if @lexer.peek(Taxonifi::Splitter::Tokens::AuthorYear)
         
     | 
| 
      
 44 
     | 
    
         
            +
                  t = @lexer.pop(Taxonifi::Splitter::Tokens::AuthorYear)
         
     | 
| 
      
 45 
     | 
    
         
            +
                  @builder.names.last.author = t.authors
         
     | 
| 
      
 46 
     | 
    
         
            +
                  @builder.names.last.year = t.year
         
     | 
| 
      
 47 
     | 
    
         
            +
                  @builder.names.last.parens = !t.parens
         
     | 
| 
      
 48 
     | 
    
         
            +
                  @builder.names.last.derive_authors_year
         
     | 
| 
      
 49 
     | 
    
         
            +
                end
         
     | 
| 
      
 50 
     | 
    
         
            +
              
         
     | 
| 
      
 51 
     | 
    
         
            +
                @builder
         
     | 
| 
      
 52 
     | 
    
         
            +
              end
         
     | 
| 
      
 53 
     | 
    
         
            +
             
     | 
| 
      
 54 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,45 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            module Taxonifi
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
              # An implementation of the parser/lexer/token pattern by Krishna Dole which in turn was based on
         
     | 
| 
      
 4 
     | 
    
         
            +
              # Thomas Mailund's <mailund@birc.dk> 'newick-1.0.5' Python library, which has evolved
         
     | 
| 
      
 5 
     | 
    
         
            +
              # into mjy's obo_parser/nexus_parser libraries.
         
     | 
| 
      
 6 
     | 
    
         
            +
              module Splitter
         
     | 
| 
      
 7 
     | 
    
         
            +
             
     | 
| 
      
 8 
     | 
    
         
            +
                TOKEN_LISTS = [
         
     | 
| 
      
 9 
     | 
    
         
            +
                  :global_token_list,
         
     | 
| 
      
 10 
     | 
    
         
            +
                  :volume_number,
         
     | 
| 
      
 11 
     | 
    
         
            +
                  :pages,
         
     | 
| 
      
 12 
     | 
    
         
            +
                  :species_name
         
     | 
| 
      
 13 
     | 
    
         
            +
                ]
         
     | 
| 
      
 14 
     | 
    
         
            +
             
     | 
| 
      
 15 
     | 
    
         
            +
                class SplitterError < StandardError; end
         
     | 
| 
      
 16 
     | 
    
         
            +
             
     | 
| 
      
 17 
     | 
    
         
            +
                require File.expand_path(File.join(File.dirname(__FILE__), 'tokens'))
         
     | 
| 
      
 18 
     | 
    
         
            +
                require File.expand_path(File.join(File.dirname(__FILE__), 'parser'))
         
     | 
| 
      
 19 
     | 
    
         
            +
                require File.expand_path(File.join(File.dirname(__FILE__), 'lexer'))
         
     | 
| 
      
 20 
     | 
    
         
            +
                require File.expand_path(File.join(File.dirname(__FILE__), 'builder'))
         
     | 
| 
      
 21 
     | 
    
         
            +
             
     | 
| 
      
 22 
     | 
    
         
            +
             
     | 
| 
      
 23 
     | 
    
         
            +
                # stub, we might not need
         
     | 
| 
      
 24 
     | 
    
         
            +
                class Splitter
         
     | 
| 
      
 25 
     | 
    
         
            +
                  def initialize 
         
     | 
| 
      
 26 
     | 
    
         
            +
                    true
         
     | 
| 
      
 27 
     | 
    
         
            +
                  end
         
     | 
| 
      
 28 
     | 
    
         
            +
                end
         
     | 
| 
      
 29 
     | 
    
         
            +
             
     | 
| 
      
 30 
     | 
    
         
            +
              end # end Splitter module
         
     | 
| 
      
 31 
     | 
    
         
            +
            end # Taxonifi module
         
     | 
| 
      
 32 
     | 
    
         
            +
             
     | 
| 
      
 33 
     | 
    
         
            +
             
     | 
| 
      
 34 
     | 
    
         
            +
            #= Implementation
         
     | 
| 
      
 35 
     | 
    
         
            +
             
     | 
| 
      
 36 
     | 
    
         
            +
            def do_bar(input)
         
     | 
| 
      
 37 
     | 
    
         
            +
              @input = input
         
     | 
| 
      
 38 
     | 
    
         
            +
              raise(Taxonifi::Splitter::SplitterError, "Nothing passed to parse!") if !@input || @input.size == 0
         
     | 
| 
      
 39 
     | 
    
         
            +
             
     | 
| 
      
 40 
     | 
    
         
            +
              builder = Taxonifi::Splitter::SplitterBuilder.new
         
     | 
| 
      
 41 
     | 
    
         
            +
              lexer = Taxonifi::Splitter::Lexer.new(@input)
         
     | 
| 
      
 42 
     | 
    
         
            +
              Taxonfi::Splitter::Parser.new(lexer, builder).foo 
         
     | 
| 
      
 43 
     | 
    
         
            +
              return builder.bar
         
     | 
| 
      
 44 
     | 
    
         
            +
            end
         
     | 
| 
      
 45 
     | 
    
         
            +
             
     |