RubyGems - taxonifi - Versions diffs - 0.2.0 → 0.3.2 - Mend

taxonifi 0.2.0 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

checksums.yaml +7 -0
data/.gitignore +59 -0
data/.travis.yml +11 -0
data/Gemfile +5 -17
data/Gemfile.lock +22 -40
data/README.md +192 -0
data/Rakefile +35 -26
data/lib/export/format/base.rb +1 -1
data/lib/export/format/species_file.rb +154 -152
data/lib/lumper/clump.rb +1 -1
data/lib/lumper/lumper.rb +22 -18
data/lib/lumper/lumps/parent_child_name_collection.rb +1 -2
data/lib/lumper/name_index.rb +21 -0
data/lib/{models → model}/author_year.rb +2 -2
data/lib/{models → model}/base.rb +35 -5
data/lib/{models → model}/collection.rb +8 -1
data/lib/{models → model}/name.rb +128 -36
data/lib/{models → model}/name_collection.rb +134 -33
data/lib/{models → model}/person.rb +1 -1
data/lib/{models → model}/ref.rb +4 -2
data/lib/model/ref_collection.rb +171 -0
data/lib/{models → model}/species_name.rb +24 -3
data/lib/splitter/builder.rb +1 -1
data/lib/splitter/parser.rb +5 -0
data/lib/splitter/tokens.rb +54 -9
data/lib/taxonifi/version.rb +3 -0
data/lib/taxonifi.rb +5 -9
data/taxonifi.gemspec +29 -99
data/test/helper.rb +1 -1
data/test/test_exporter.rb +1 -1
data/test/test_lumper_names.rb +9 -9
data/test/test_lumper_refs.rb +4 -4
data/test/test_parser.rb +97 -26
data/test/test_splitter_tokens.rb +25 -4
data/test/test_taxonifi_base.rb +1 -1
data/test/test_taxonifi_geog.rb +1 -1
data/test/test_taxonifi_name.rb +13 -14
data/test/test_taxonifi_name_collection.rb +11 -5
data/test/test_taxonifi_ref.rb +1 -1
data/test/test_taxonifi_ref_collection.rb +40 -3
data/test/test_taxonifi_species_name.rb +51 -1
data/travis/before_install.sh +2 -0
metadata +96 -66
data/README.rdoc +0 -154
data/VERSION +0 -1
data/lib/models/ref_collection.rb +0 -107
/data/lib/{models → model}/generic_object.rb +0 -0
/data/lib/{models → model}/geog.rb +0 -0
/data/lib/{models → model}/geog_collection.rb +0 -0
/data/lib/{models → model}/shared_class_methods.rb +0 -0

data/lib/export/format/species_file.rb CHANGED Viewed

@@ -8,6 +8,7 @@ module Taxonifi::Export
     # tblRanks 5/17/2012
     SPECIES_FILE_RANKS = {
+      'variety' =>                 5,  # there is no variety rank per se in SFs, they are handled this way according to DE
       'subspecies' =>              5,
       'species' =>                 10,
       'species subgroup' =>        11,
@@ -55,18 +56,17 @@ module Taxonifi::Export
     attr_accessor :name_collection
     attr_accessor :ref_collection
     attr_accessor :pub_collection
-    attr_accessor :author_index
     attr_accessor :genus_names, :species_names, :nomenclator
     attr_accessor :authorized_user_id, :time
-    attr_accessor :starting_ref_id
+    attr_accessor :built_nomenclators
     def initialize(options = {})
       opts = {
         :nc => Taxonifi::Model::NameCollection.new,
         :export_folder => 'species_file',
         :authorized_user_id => nil,
-        :starting_ref_id => 1,                              # should be configured elsewhere... but
-        :manifest => %w{tblPubs tblRefs tblPeople tblRefAuthors tblTaxa tblGenusNames tblSpeciesNames tblNomenclator tblCites}
+        :manifest => %w{tblPubs tblRefs tblPeople tblRefAuthors tblTaxa tblGenusNames tblSpeciesNames tblNomenclator tblCites tblTypeSpecies}
       }.merge!(options)
       @manifest = opts[:manifest]
@@ -77,9 +77,7 @@ module Taxonifi::Export
       @name_collection = opts[:nc]
       @pub_collection = {} # title => id
       @authorized_user_id = opts[:authorized_user_id]
-      @author_index = {}
-      @starting_ref_id = opts[:starting_ref_id]
       # Careful here, at present we are just generating Reference micro-citations from our names, so the indexing "just works"
       # because it's all internal.  There will is a strong potential for key collisions if this pipeline is modified to
       # include references external to the initialized name_collection.  See also export_references.
@@ -93,11 +91,6 @@ module Taxonifi::Export
       @empty_quotes = ""
     end
-    # Assumes names that are the same are the same person.
-    def build_author_index
-      @author_index = @name_collection.ref_collection.unique_authors.inject({}){|hsh, a| hsh.merge!(a.compact_string => a)}
-    end
     def export()
       super
       # You must have
@@ -109,9 +102,6 @@ module Taxonifi::Export
       # Give authors unique ids:
       # @name_collection.ref_collection.uniquify_authors(1)
-      if @name_collection.ref_collection
-        build_author_index
-      end
       # raise Taxonifi::Export::ExportError, 'NameCollection has no RefCollection, you might try @name_collection.generate_ref_collection(1), or alter the manifest: hash.' if ! @name_collection.ref_collection.nil?
@@ -122,14 +112,28 @@ module Taxonifi::Export
       @name_collection.names_at_rank('subgenus').inject(@genus_names){|hsh, n| hsh.merge!(n.name => nil)}
       @name_collection.names_at_rank('species').inject(@species_names){|hsh, n| hsh.merge!(n.name => nil)}
       @name_collection.names_at_rank('subspecies').inject(@species_names){|hsh, n| hsh.merge!(n.name => nil)}
+      @name_collection.names_at_rank('variety').inject(@species_names){|hsh, n| hsh.merge!(n.name => nil)}
+      # Add combinations of names from nomenclators/citations as well
+      @name_collection.nomenclators.keys.each do |k|
+        @genus_names.merge!(@name_collection.nomenclators[k][0] => nil)
+        @genus_names.merge!(@name_collection.nomenclators[k][1] => nil)
+        @species_names.merge!(@name_collection.nomenclators[k][2] => nil)
+        @species_names.merge!(@name_collection.nomenclators[k][3] => nil)
+        @species_names.merge!(@name_collection.nomenclators[k][4] => nil)
+      end
+      @genus_names.delete_if{|key,value| key.nil? || key.length == 0}
+      @species_names.delete_if{|key,value| key.nil? || key.length == 0}
       str = [ 'BEGIN TRY', 'BEGIN TRANSACTION']
       @manifest.each do |f|
         str << send(f)
       end
       str << ['COMMIT', 'END TRY', 'BEGIN CATCH',
-        'SELECT ERROR_LINE() AS ErrorLine, ERROR_NUMBER() AS ErrorNumber, ERROR_MESSAGE() AS ErrorMessage;',
-        'ROLLBACK', 'END CATCH']
+              'SELECT ERROR_LINE() AS ErrorLine, ERROR_NUMBER() AS ErrorNumber, ERROR_MESSAGE() AS ErrorMessage;',
+              'ROLLBACK', 'END CATCH']
       write_file('everything.sql', str.join("\n\n"))
       true
     end
@@ -142,56 +146,56 @@ module Taxonifi::Export
     #   nc.ref_collection = Taxonifi::Model::RefCollection.new
     #   etc.
     def export_references(options = {})
-      raise Taxonifi::Export::ExportError, 'Method deprecated, alter manifest: to achieve a similar result.'
-     #opts = {
-     #  :starting_ref_id => 0,
-     #  :starting_author_id => 0
-     #}
-     #configure_folders
-     #build_author_index
-     ## order matters
-     #['tblPeople', 'tblRefs', 'tblRefAuthors', 'sqlRefs' ].each do |t|
-     #  write_file(t, send(t))
-     #end
-    end
+      raise Taxonifi::Export::ExportError, 'Method deprecated, alter manifest to achieve a similar result.'
+      #configure_folders
+   end
-    # Get's the reference for a name as referenced
-    # by .related[:link_to_ref_from_row]
+    # Gets the reference for a name as referenced
+    # by .properties[:link_to_ref_from_row]
     def get_ref(name)
-      if not name.related[:link_to_ref_from_row].nil?
-        return @name_collection.ref_collection.object_from_row(name.related[:link_to_ref_from_row])
-      end
-      nil
+#     if not name.properties[:link_to_ref_from_row].nil?
+#       return @name_collection.ref_collection.object_from_row(name.properties[:link_to_ref_from_row])
+#     end
+#     nil
+      name.original_description_reference ? name.original_description_reference : nil
     end
     def tblTaxa
-      @headers = %w{TaxonNameID TaxonNameStr RankID Name Parens AboveID RefID DataFlags AccessCode NameStatus StatusFlags OriginalGenusID LastUpdate ModifiedBy}
+      @headers = %w{TaxonNameID TaxonNameStr RankID Name Parens AboveID RefID DataFlags AccessCode Extinct NameStatus StatusFlags OriginalGenusID LastUpdate ModifiedBy}
       sql = []
-      @name_collection.collection.each do |n|
-        $DEBUG && $stderr.puts("#{n.name} is too long") if n.name.length > 30
-        ref = get_ref(n)
-        cols = {
-          TaxonNameID: n.id,
-          TaxonNameStr: n.parent_ids_sf_style,        # closure -> ends with 1
-          RankID: SPECIES_FILE_RANKS[n.rank],
-          Name: n.name,
-          Parens: (n.parens ? 1 : 0),
-          AboveID: (n.related_name.nil? ? (n.parent ? n.parent.id : 0) : n.related_name.id),   # !! SF folks like to pre-populate with zeros
-          RefID: (ref ? ref.id : 0),
-          DataFlags: 0,                                    # see http://software.speciesfile.org/Design/TaxaTables.aspx#Taxon, a flag populated when data is reviewed, initialize to zero
-          AccessCode: 0,
-          NameStatus: (n.related_name.nil? ? 0 : 7),                            # 0 :valid, 7: synonym)
-          StatusFlags: (n.related_name.nil? ? 0 : 262144),                      # 0 :valid, 262144: jr. synonym
-          OriginalGenusID: (!n.parens && n.parent_at_rank('genus') ? n.parent_at_rank('genus').id : 0),      # SF must be pre-configured with 0 filler (this restriction needs to go)
-          LastUpdate: @time,
-          ModifiedBy: @authorized_user_id,
-        }
-        sql << sql_insert_statement('tblTaxa', cols)
+      sql_above = []
+      # Need to add by rank for FK constraint handling
+      Taxonifi::RANKS.each do |rank|
+        @name_collection.names_at_rank(rank).each do |n|
+          $DEBUG && $stderr.puts("#{n.name} is too long") if n.name.length > 30
+          # ref = get_ref(n)
+          cols = {
+            TaxonNameID: n.id,
+            TaxonNameStr: n.parent_ids_sf_style,                       # closure -> ends with 1
+            RankID: SPECIES_FILE_RANKS[n.rank],
+            Name: n.name,
+            Parens: (n.parens ? 1 : 0),
+            AboveID: 0,
+            RefID: (n.original_description_reference ? n.original_description_reference.id : 0),
+            DataFlags: 0,                  # see http://software.speciesfile.org/Design/TaxaTables.aspx#Taxon, a flag populated when data is reviewed, initialize to zero
+            AccessCode: 0,
+            Extinct: (n.properties && n.properties['extinct'] == 'true' ? 1 : 0),
+            NameStatus: (n.related_name.nil? ? 0 : 7),                            # 0 :valid, 7: synonym)
+            StatusFlags: (n.related_name.nil? ? 0 : 262144),                      # 0 :valid, 262144: jr. synonym
+            OriginalGenusID: (n.properties && !n.properties['original_genus_id'].nil? ? n.properties['original_genus_id'] : 0),      # SF must be pre-configured with 0 filler (this restriction needs to go)
+            LastUpdate: @time,
+            ModifiedBy: @authorized_user_id,
+          }
+          sql << sql_insert_statement('tblTaxa', cols)
+          above_id =  (n.related_name.nil? ? (n.parent ? n.parent.id : 0) : n.related_name.id)
+          sql_above.push "UPDATE tblTaxa SET AboveID = #{above_id} where TaxonNameID = #{n.id};"
+        end
       end
-      sql.join("\n")
+      sql.join("\n") +  sql_above.join("\n")
     end
     # Generate a tblRefs string.
@@ -202,6 +206,16 @@ module Taxonifi::Export
         # Assumes the 0 "null" pub id is there
         pub_id = @pub_collection[r.publication] ? @pub_collection[r.publication] : 0
+        # Build a note based on "unused" properties
+        note = []
+        if r.properties
+          r.properties.keys.each do |k|
+            note.push "#{k}: #{r.properties[k]}" if r.properties[k] && r.properties.length > 0
+          end
+        end
+        note = note.join("; ")
+        note = @empty_quotes if note.length == 0
         cols = {
           RefID: r.id,
           ContainingRefID: 0,
@@ -210,12 +224,12 @@ module Taxonifi::Export
           Series: @empty_quotes,
           Volume: (r.volume ? r.volume : @empty_quotes),
           Issue:  (r.number ? r.number : @empty_quotes),
-          RefPages: r.page_string, # always a string
+          RefPages: r.page_string, # always a strings
           ActualYear: (r.year ? r.year : @empty_quotes),
           StatedYear: @empty_quotes,
           AccessCode: 0,
           Flags: 0,
-          Note: @empty_quotes,
+          Note: note,
           LastUpdate: @time,
           LinkID: 0,
           ModifiedBy: @authorized_user_id,
@@ -231,7 +245,7 @@ module Taxonifi::Export
     def tblPubs
       sql = []
       @headers = %w{PubID PrefID PubType ShortName FullName Note LastUpdate ModifiedBy Publisher PlacePublished PubRegID Status StartYear EndYear BHL}
       # Hackish should build this elsewhere, but degrades OK
       pubs = @name_collection.ref_collection.collection.collect{|r| r.publication}.compact.uniq
@@ -263,9 +277,7 @@ module Taxonifi::Export
     def tblPeople
       @headers = %w{PersonID FamilyName GivenNames GivenInitials Suffix Role LastUpdate ModifiedBy}
       sql = []
-      @author_index.keys.each_with_index do |k,i|
-        a = @author_index[k]
-        # a.id = i + 1
+      @name_collection.ref_collection.all_authors.each do |a|
         cols = {
           PersonID: a.id,
           FamilyName: (a.last_name.length > 0 ? a.last_name : "Unknown"),
@@ -287,12 +299,11 @@ module Taxonifi::Export
       sql = []
       @name_collection.ref_collection.collection.each do |r|
         r.authors.each_with_index do |x, i|
-          a = @author_index[x.compact_string]
           cols = {
             RefID: r.id,
-            PersonID: a.id,
+            PersonID: x.id,
             SeqNum: i + 1,
-            AuthorCount: r.authors.size,
+            AuthorCount: r.authors.size + 1,
             LastUpdate: @time,
             ModifiedBy: @authorized_user_id
           }
@@ -306,35 +317,62 @@ module Taxonifi::Export
     def tblCites
       @headers = %w{TaxonNameID SeqNum RefID NomenclatorID LastUpdate ModifiedBy NewNameStatus CitePages Note TypeClarification CurrentConcept ConceptChange InfoFlags InfoFlagStatus PolynomialStatus}
       sql = []
-      @name_collection.collection.each do |n|
-        next if @nomenclator[n.nomenclator_name].nil? # Only create nomenclator records if they are original citations, otherwise not !! Might need updating in future imports
-        ref = get_ref(n)
-        # ref = @by_author_reference_index[n.author_year_index]
-        next if ref.nil?
-        cols = {
-          TaxonNameID:       n.id,
-          SeqNum:            1,
-          RefID:             ref.id,
-          NomenclatorID:     @nomenclator[n.nomenclator_name],
-          LastUpdate:        @time,
-          ModifiedBy:        @authorized_user_id,
-          CitePages:         @empty_quotes,        # equates to "" in CSV speak
-          NewNameStatus:     0,
-          Note:              @empty_quotes,
-          TypeClarification: 0,     # We might derive more data from this
-          CurrentConcept:    1,        # Boolean, right?
-          ConceptChange:     0,         # Unspecified
-          InfoFlags:         0,             #
-          InfoFlagStatus:    1,        # 1 => needs review
-          PolynomialStatus:  0
-        }
-        sql << sql_insert_statement('tblCites', cols)
+      @name_collection.citations.keys.each do |name_id|
+        seq_num = 1
+        @name_collection.citations[name_id].each do |ref_id, nomenclator_index, properties|
+          cols = {
+            TaxonNameID:       name_id,
+            SeqNum:            seq_num,
+            RefID:             ref_id,
+            NomenclatorID:     nomenclator_index,
+            LastUpdate:        @time,
+            ModifiedBy:        @authorized_user_id,
+            CitePages:         (properties[:cite_pages] ? properties[:cite_pages] : @empty_quotes),
+            NewNameStatus:     0,
+            Note:              (properties[:note] ? properties[:note] : @empty_quotes),
+            TypeClarification: 0,     # We might derive more data from this
+            CurrentConcept:    (properties[:current_concept] == true ? 1 : 0),     # Boolean, right?
+            ConceptChange:     0,     # Unspecified
+            InfoFlags:         0,     #
+            InfoFlagStatus:    1,     # 1 => needs review
+            PolynomialStatus:  0
+          }
+          sql << sql_insert_statement('tblCites', cols)
+          seq_num += 1
+        end
       end
       sql.join("\n")
     end
+    # Generate tblTypeSpecies string.
+    def tblTypeSpecies
+      @headers = %w{GenusNameID SpeciesNameID Reason AuthorityRefID FirstFamGrpNameID LastUpdate ModifiedBy NewID}
+      sql = []
+      names = @name_collection.names_at_rank('genus') + @name_collection.names_at_rank('subgenus')
+      names.each do |n|
+        if n.properties[:type_species_id]
+          ref = get_ref(n)
+          # ref = @by_author_reference_index[n.author_year_index]
+          next if ref.nil?
+          cols = {
+            GenusNameID: n.id ,
+            SpeciesNameID: n.properties[:type_species_id],
+            Reason: 0            ,
+            AuthorityRefID: 0    ,
+            FirstFamGrpNameID: 0 ,
+            LastUpdate: @time    ,
+            ModifiedBy: @authorized_user_id   ,
+            NewID: 0 # What is this?
+          }
+          sql << sql_insert_statement('tblTypeSpecies', cols)
+        end
+      end
+      sql.join("\n")
+    end
     def tblGenusNames
       # TODO: SF tests catch unused names based on some names not being included in Nomeclator data.  We could optimize so that the work around is removed.
       # I.e., all the names get added here, not all the names get added to Nomclator/Cites because of citations which are not original combinations
@@ -374,78 +412,42 @@ module Taxonifi::Export
       @headers = %w{NomenclatorID GenusNameID SubgenusNameID SpeciesNameID SubspeciesNameID LastUpdate ModifiedBy SuitableForGenus SuitableForSpecies InfrasubspeciesNameID InfrasubKind}
       sql = []
       i = 1
-      @name_collection.collection.each do |n|
-        gid, sgid = 0,0
-        sid = @species_names[n.parent_name_at_rank('species')] || 0
-        ssid = @species_names[n.parent_name_at_rank('subspecies')] || 0
-        if n.parens == false
-          gid = @genus_names[n.parent_name_at_rank('genus')] || 0
-          sgid = @genus_names[n.parent_name_at_rank('subgenus')] || 0
-        end
+      # Ugh, move build from here
+      @name_collection.nomenclators.keys.each do |i|
+        name =  @name_collection.nomenclators[i]
+        genus_id = @genus_names[name[0]]
+        genus_id ||= 0
+        subgenus_id = @genus_names[name[1]]
+        subgenus_id ||= 0
+        species_id = @species_names[name[2]]
+        species_id ||= 0
+        subspecies_id = @species_names[name[3]]
+        subspecies_id ||= 0
+        variety_id = @species_names[name[4]]
+        variety_id ||= 0
-        next if Taxonifi::RANKS.index(n.rank) < Taxonifi::RANKS.index('subtribe')
-        ref = get_ref(n)
-        # debugger
-        # ref = @by_author_reference_index[n.author_year_index]
-        next if ref.nil?
         cols = {
           NomenclatorID: i,
-          GenusNameID: gid,
-          SubgenusNameID: sgid,
-          SpeciesNameID: sid,
-          SubspeciesNameID: ssid,
-          InfrasubspeciesNameID: 0,
-          InfrasubKind: 0,                          # this might be wrong
+          GenusNameID: genus_id,
+          SubgenusNameID: subgenus_id,
+          SpeciesNameID: species_id,
+          SubspeciesNameID: subspecies_id,
+          InfrasubspeciesNameID: variety_id,
+          InfrasubKind: (variety_id == 0 ? 0 : 2),
           LastUpdate: @time,
           ModifiedBy: @authorized_user_id,
-          SuitableForGenus: 0,                      # Set in SF
-          SuitableForSpecies: 0                     # Set in SF
+          SuitableForGenus: 0,                      # Set in SF w test
+          SuitableForSpecies: 0                     # Set in SF w test
         }
-        @nomenclator.merge!(n.nomenclator_name => i)
         i += 1
         sql << sql_insert_statement('tblNomenclator', cols)
       end
-      # TODO: DRY this up with above?!
-      @name_collection.combinations.each do |c|
-        gid, sgid = 0,0
-        sid = (c[2].nil? ? 0 : @species_names[c[2].name])
-        ssid = (c[3].nil? ? 0 : @species_names[c[3].name])
-        if c.compact.last.parens == false
-          gid = (c[0].nil? ? 0 : @genus_names[c[0].name])
-          sgid = (c[1].nil? ? 0 : @genus_names[c[1].name])
-        end
-        # ref = @by_author_reference_index[c.compact.last.author_year_index]
-        ref =  @name_collection.ref_collection.object_from_row(c.compact.last.related[:link_to_ref_from_row])
-        next if ref.nil?
-        cols = {
-          NomenclatorID: i,
-          GenusNameID: gid ,
-          SubgenusNameID: sgid ,
-          SpeciesNameID: sid ,
-          SubspeciesNameID: ssid ,
-          InfrasubspeciesNameID: 0,
-          InfrasubKind: 0,                          # this might be wrong
-          LastUpdate: @time,
-          ModifiedBy: @authorized_user_id,
-          SuitableForGenus: 0,                      # Set in SF
-          SuitableForSpecies: 0                     # Set in SF
-        }
-        # check!?
-        @nomenclator.merge!(c.compact.last.nomenclator_name => i)
-        sql << sql_insert_statement('tblNomenclator', cols)
-        i += 1
-      end
       sql.join("\n")
     end
   end # End class
 end # End module

data/lib/lumper/clump.rb CHANGED Viewed

@@ -1,6 +1,6 @@
 # require File.expand_path(File.join(File.dirname(__FILE__), '../taxonifi'))
-# A Clump is a "C"ollection of lump derivatives and the relatinoships between these derivatives!
+# A Clump is a "C"ollection of lump derivatives and the relationships between these derivatives!
 # It's used to define relationships among objects derived, for example, between single rows of data
 module Taxonifi::Lumper:Clumps

data/lib/lumper/lumper.rb CHANGED Viewed

@@ -63,7 +63,7 @@ module Taxonifi::Lumper
     opts = {
       :csv => [],
       :initial_id => 0,
-      :capture_related_fields => true   # Stores other column values in (column_header => value) pairs in Name.related
+      :capture_related_fields => true   # Stores other column values in (column_header => value) pairs in Name#properties
     }.merge!(options)
     csv = opts[:csv]
@@ -82,14 +82,14 @@ module Taxonifi::Lumper
     # :genus => {'Foo' => [0,2]}
     # This says that "Foo" is instantiated two times in the
     # name collection, with id 0, and id 2.
-    name_index = {}
+    name_index = {} # Taxonifi::Lumper::NameIndex.new # {}
     has_ref_fields = ([:citation_basic, :citation_small] & Taxonifi::Lumper.intersecting_lumps(csv.headers)).size > 0
     unused_fields = csv.headers - Taxonifi::Lumper::LUMPS[:names]
     # First pass, create and index names
     Taxonifi::Assessor::RowAssessor.rank_headers(csv.headers).each do |rank|
+      # name_index.new_rank(rank)
       name_index[rank] = {}
       csv.each_with_index do |row, i|
         shares_rank = (rank == Taxonifi::Assessor::RowAssessor.lump_name_rank(row).to_s)
@@ -99,9 +99,8 @@ module Taxonifi::Lumper
           n = nil         # a Name if necessary
           name_id = nil   # index the new or existing Name
+          exists = false
           if name_index[rank][name] # A matching name (String) has been previously added
-            exists = false
             name_index[rank][name].each do |id|
               # Compare vectors of parent_ids for name presence
               if nc.parent_id_vector(id) == row_index[i]
@@ -110,15 +109,12 @@ module Taxonifi::Lumper
                 break
               end
             end
-            if !exists # name (string) exists, but parents are different, create new name
-              n = Taxonifi::Model::Name.new()
-            end
-          else # no version of the name exists
-            n = Taxonifi::Model::Name.new()
           end # end name exists
+          n = Taxonifi::Model::Name.new() if !exists
+          unused_data = row.to_hash.select{|f| unused_fields.include?(f)}
+          row_identifier = (row['identifier'] ? row['identifier'] : i)
           # Populate the new name if created.  Previously matched names are not effected.
           if !n.nil?
@@ -134,13 +130,13 @@ module Taxonifi::Lumper
             if shares_rank
               if row['author_year']
                 builder = Taxonifi::Splitter::Builder.build_author_year(row['author_year'])
-                n.author               = builder.people
+                n.authors              = builder.people  # was author!?
                 n.year                 = builder.year
-                n.parens               = !builder.parens
+                n.parens               = builder.parens
               end
-              n.related.merge!(:link_to_ref_from_row => i) if has_ref_fields
-              n.related.merge!(row.to_hash.select{|f| unused_fields.include?(f)}) if opts[:capture_related_fields]
+              n.add_property(:link_to_ref_from_row, i) if has_ref_fields # TODO: update this
+              n.add_properties(unused_data) if opts[:capture_related_fields]
             end
             name_id = nc.add_object(n).id
@@ -150,6 +146,14 @@ module Taxonifi::Lumper
             $DEBUG && $stderr.puts("added #{nc.collection.size - 1} | #{n.name} | #{n.rank} | #{n.parent ? n.parent.name : '-'} | #{n.parent ? n.parent.id : '-'}")
           else
             $DEBUG && $stderr.puts("already present #{rank} | #{name}")
+            if shares_rank
+              # original::
+              nc.add_duplicate_entry_metadata(name_id, row_identifier, unused_data)
+              # hack
+              # nc.add_duplicate_entry_metadata(name_id, row_identifier, row.to_hash)
+            end
           end
           # build a by row vector of parent child relationships
@@ -166,7 +170,7 @@ module Taxonifi::Lumper
     opts = {
       :csv => nil,
       :inital_id => 1,
-      :capture_related_fields => true   # Stores other column values in (column_header => value) pairs in Ref.related
+      :capture_related_fields => true   # Stores other column values in (column_header => value) pairs in Ref#related
     }.merge!(options)
     csv = opts[:csv]
@@ -219,7 +223,7 @@ module Taxonifi::Lumper
           end
         end
-        r.related.merge!(row.to_hash.select{|f| unused_fields.include?(f)}) if opts[:capture_related_fields]
+        r.add_properties(row.to_hash.select{|f| unused_fields.include?(f)}) if opts[:capture_related_fields]
         # Do some indexing.
         ref_str = r.compact_string

data/lib/lumper/lumps/parent_child_name_collection.rb CHANGED Viewed

@@ -56,7 +56,7 @@ module Taxonifi::Lumper::Lumps::ParentChildNameCollection
           n.rank = rank
           n.name = name
           n.row_number = i
-          n.related.merge!(:external_id => external_id)
+          n.add_property(:external_id, external_id)
           if parent = external_index[parent_id]
             n.parent = parent
@@ -134,7 +134,6 @@ module Taxonifi::Lumper::Lumps::ParentChildNameCollection
       # validation in general, something to look at, for now, throw up our hands and move on.
       return last_id if (real_genus.nil? || real_species.nil?)
-      # debugger if real_genus.id == 399
       real_subgenus = nil # revisit
       real_subspecies = nc.object_by_id(nc.name_exists?(tmp_subspecies))  if !tmp_subspecies.nil?

data/lib/lumper/name_index.rb ADDED Viewed

@@ -0,0 +1,21 @@
+require File.expand_path(File.join(File.dirname(__FILE__), '../taxonifi'))
+module Taxonifi::Lumper
+  class NameIndex
+    attr_accessor :index
+    def initialize
+      @index = {}
+    end
+    def new_rank(rank)
+      @index[rank] = {}
+    end
+    def name_exists_at_rank?(name, rank)
+      name_index[rank] && name_index[rank][name]
+    end
+  end
+end

data/lib/{models → model}/author_year.rb RENAMED Viewed

@@ -1,4 +1,4 @@
-require File.expand_path(File.join(File.dirname(__FILE__), "../models/base.rb"))
+require File.expand_path(File.join(File.dirname(__FILE__), "../model/base.rb"))
 module Taxonifi
   module Model
@@ -28,7 +28,7 @@ module Taxonifi
       def compact_index
         index = [@year]
         @people.each do |a|
-          index.push a.compact_string
+          index.push(a.compact_string)
         end
         index.join("-")
       end