taxonifi 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +1 -0
- data/Gemfile.lock +24 -7
- data/README.rdoc +5 -6
- data/Rakefile +1 -1
- data/VERSION +1 -1
- data/lib/assessor/row_assessor.rb +25 -18
- data/lib/export/format/base.rb +96 -1
- data/lib/export/format/obo_nomenclature.rb +71 -0
- data/lib/export/format/prolog.rb +59 -0
- data/lib/export/format/species_file.rb +303 -193
- data/lib/lumper/clump.rb +112 -0
- data/lib/lumper/lumper.rb +71 -45
- data/lib/lumper/lumps/parent_child_name_collection.rb +79 -15
- data/lib/models/author_year.rb +1 -2
- data/lib/models/base.rb +56 -51
- data/lib/models/collection.rb +16 -1
- data/lib/models/name.rb +56 -15
- data/lib/models/name_collection.rb +70 -19
- data/lib/models/ref.rb +17 -0
- data/lib/models/ref_collection.rb +2 -1
- data/lib/models/shared_class_methods.rb +29 -0
- data/lib/models/species_name.rb +14 -12
- data/lib/splitter/parser.rb +1 -2
- data/lib/splitter/tokens.rb +1 -1
- data/lib/taxonifi.rb +12 -0
- data/lib/utils/array.rb +17 -0
- data/lib/utils/hash.rb +17 -0
- data/taxonifi.gemspec +116 -0
- data/test/file_fixtures/Fossil.csv +11 -0
- data/test/file_fixtures/Lygaeoidea.csv +1 -1
- data/test/file_fixtures/names.csv +1 -0
- data/test/helper.rb +14 -0
- data/test/test_export_prolog.rb +14 -0
- data/test/test_exporter.rb +23 -0
- data/test/test_lumper_clump.rb +75 -0
- data/test/test_lumper_names.rb +67 -9
- data/test/test_lumper_parent_child_name_collection.rb +47 -3
- data/test/test_lumper_refs.rb +22 -7
- data/test/test_obo_nomenclature.rb +14 -0
- data/test/test_parser.rb +4 -2
- data/test/test_splitter_tokens.rb +9 -0
- data/test/test_taxonifi_accessor.rb +21 -15
- data/test/test_taxonifi_base.rb +25 -0
- data/test/test_taxonifi_name.rb +41 -4
- data/test/test_taxonifi_name_collection.rb +54 -17
- data/test/test_taxonifi_species_name.rb +1 -1
- metadata +34 -5
| @@ -29,7 +29,7 @@ module Taxonifi::Export | |
| 29 29 | 
             
                  'superfamily group' =>       44,         
         | 
| 30 30 | 
             
                  'subinfraordinal group' =>   45,             
         | 
| 31 31 | 
             
                  'infraorder' =>              46,  
         | 
| 32 | 
            -
                  'suborder' =>                 | 
| 32 | 
            +
                  'suborder' =>                48,
         | 
| 33 33 | 
             
                  'order' =>                   50,
         | 
| 34 34 | 
             
                  'mirorder' =>                51,
         | 
| 35 35 | 
             
                  'superorder' =>              52,  
         | 
| @@ -54,288 +54,398 @@ module Taxonifi::Export | |
| 54 54 |  | 
| 55 55 | 
             
                attr_accessor :name_collection
         | 
| 56 56 | 
             
                attr_accessor :ref_collection
         | 
| 57 | 
            +
                attr_accessor :pub_collection
         | 
| 57 58 | 
             
                attr_accessor :author_index
         | 
| 58 59 | 
             
                attr_accessor :genus_names, :species_names, :nomenclator
         | 
| 59 60 | 
             
                attr_accessor :authorized_user_id, :time
         | 
| 60 | 
            -
             | 
| 61 | 
            -
                # MANIFEST order is important
         | 
| 62 | 
            -
                MANIFEST = %w{tblTaxa tblRefs tblPeople tblRefAuthors tblGenusNames tblSpeciesNames tblNomenclator tblCites} 
         | 
| 61 | 
            +
                attr_accessor :starting_ref_id
         | 
| 63 62 |  | 
| 64 63 | 
             
                def initialize(options = {})
         | 
| 65 64 | 
             
                  opts = {
         | 
| 66 65 | 
             
                    :nc => Taxonifi::Model::NameCollection.new,
         | 
| 67 66 | 
             
                    :export_folder => 'species_file',
         | 
| 68 | 
            -
                    :authorized_user_id => nil
         | 
| 67 | 
            +
                    :authorized_user_id => nil,
         | 
| 68 | 
            +
                    :starting_ref_id => 1,                              # should be configured elsewhere... but
         | 
| 69 | 
            +
                    :manifest => %w{tblPubs tblRefs tblPeople tblRefAuthors tblTaxa tblGenusNames tblSpeciesNames tblNomenclator tblCites} 
         | 
| 69 70 | 
             
                  }.merge!(options)
         | 
| 70 71 |  | 
| 72 | 
            +
                  @manifest = opts[:manifest]
         | 
| 73 | 
            +
             | 
| 71 74 | 
             
                  super(opts)
         | 
| 72 75 | 
             
                  raise Taxonifi::Export::ExportError, 'NameCollection not passed to SpeciesFile export.' if ! opts[:nc].class == Taxonifi::Model::NameCollection
         | 
| 73 76 | 
             
                  raise Taxonifi::Export::ExportError, 'You must provide authorized_user_id for species_file export initialization.' if opts[:authorized_user_id].nil?
         | 
| 74 77 | 
             
                  @name_collection = opts[:nc]
         | 
| 78 | 
            +
                  @pub_collection = {} # title => id
         | 
| 75 79 | 
             
                  @authorized_user_id = opts[:authorized_user_id]
         | 
| 76 80 | 
             
                  @author_index = {}
         | 
| 77 | 
            -
             | 
| 78 | 
            -
             | 
| 81 | 
            +
                  @starting_ref_id = opts[:starting_ref_id]
         | 
| 82 | 
            +
                
         | 
| 79 83 | 
             
                  # Careful here, at present we are just generating Reference micro-citations from our names, so the indexing "just works"
         | 
| 80 84 | 
             
                  # because it's all internal.  There will is a strong potential for key collisions if this pipeline is modified to 
         | 
| 81 85 | 
             
                  # include references external to the initialized name_collection.  See also export_references.
         | 
| 82 86 | 
             
                  #
         | 
| 83 | 
            -
                  @by_author_reference_index = {}
         | 
| 87 | 
            +
                  # @by_author_reference_index = {}
         | 
| 84 88 | 
             
                  @genus_names = {}
         | 
| 85 89 | 
             
                  @species_names = {}
         | 
| 86 90 | 
             
                  @nomenclator = {}
         | 
| 91 | 
            +
             | 
| 87 92 | 
             
                  @time = Time.now.strftime("%F %T") 
         | 
| 93 | 
            +
                  @empty_quotes = "" 
         | 
| 88 94 | 
             
                end 
         | 
| 89 95 |  | 
| 90 | 
            -
                # Export only the ref_collection. Sidesteps the main name-centric exports
         | 
| 91 | 
            -
                # Note that this still uses the base @name_collection object as a starting reference,
         | 
| 92 | 
            -
                # it just references @name_collection.ref_collection.  So you can do:
         | 
| 93 | 
            -
                #   nc = Taxonifi::Model::NameCollection.new
         | 
| 94 | 
            -
                #   nc.ref_collection = Taxonifi::Model::RefCollection.new
         | 
| 95 | 
            -
                #   etc.
         | 
| 96 | 
            -
                def export_references(options = {})
         | 
| 97 | 
            -
                  opts = {
         | 
| 98 | 
            -
                    :starting_ref_id => 0,
         | 
| 99 | 
            -
                    :starting_author_id => 0
         | 
| 100 | 
            -
                  }
         | 
| 101 | 
            -
             | 
| 102 | 
            -
                  configure_folders
         | 
| 103 | 
            -
                  build_author_index 
         | 
| 104 | 
            -
             | 
| 105 | 
            -
                  # order matters
         | 
| 106 | 
            -
                  ['tblPeople', 'tblRefs', 'tblRefAuthors', 'sqlRefs' ].each do |t|
         | 
| 107 | 
            -
                    write_file(t, send(t))
         | 
| 108 | 
            -
                  end
         | 
| 109 | 
            -
                end
         | 
| 110 | 
            -
             | 
| 111 96 | 
             
                # Assumes names that are the same are the same person. 
         | 
| 112 97 | 
             
                def build_author_index
         | 
| 113 98 | 
             
                  @author_index = @name_collection.ref_collection.unique_authors.inject({}){|hsh, a| hsh.merge!(a.compact_string => a)}
         | 
| 114 99 | 
             
                end
         | 
| 115 | 
            -
             | 
| 100 | 
            +
             | 
| 116 101 | 
             
                def export()
         | 
| 117 102 | 
             
                  super
         | 
| 118 | 
            -
                   | 
| 103 | 
            +
                  # You must have
         | 
| 104 | 
            +
                  # how to create and link the reference IDs.
         | 
| 105 | 
            +
             | 
| 106 | 
            +
                  # Reference related approaches:
         | 
| 107 | 
            +
                  # 
         | 
| 108 | 
            +
                  # @name_collection.generate_ref_collection(1)
         | 
| 109 | 
            +
                  # Give authors unique ids:
         | 
| 110 | 
            +
                  # @name_collection.ref_collection.uniquify_authors(1) 
         | 
| 119 111 |  | 
| 120 | 
            -
                   | 
| 121 | 
            -
             | 
| 122 | 
            -
                   | 
| 112 | 
            +
                  if @name_collection.ref_collection 
         | 
| 113 | 
            +
                    build_author_index
         | 
| 114 | 
            +
                  end
         | 
| 115 | 
            +
             | 
| 116 | 
            +
                  # raise Taxonifi::Export::ExportError, 'NameCollection has no RefCollection, you might try @name_collection.generate_ref_collection(1), or alter the manifest: hash.' if ! @name_collection.ref_collection.nil?
         | 
| 123 117 |  | 
| 124 118 | 
             
                  # See notes in #initalize re potential key collisions!
         | 
| 125 | 
            -
                  @by_author_reference_index =  @name_collection.ref_collection.collection.inject({}){|hsh, r| hsh.merge!(r.author_year_index => r)}
         | 
| 126 | 
            -
             | 
| 119 | 
            +
                  # @by_author_reference_index =  @name_collection.ref_collection.collection.inject({}){|hsh, r| hsh.merge!(r.author_year_index => r)}
         | 
| 120 | 
            +
             | 
| 127 121 | 
             
                  @name_collection.names_at_rank('genus').inject(@genus_names){|hsh, n| hsh.merge!(n.name => nil)}
         | 
| 128 122 | 
             
                  @name_collection.names_at_rank('subgenus').inject(@genus_names){|hsh, n| hsh.merge!(n.name => nil)}
         | 
| 129 123 | 
             
                  @name_collection.names_at_rank('species').inject(@species_names){|hsh, n| hsh.merge!(n.name => nil)}
         | 
| 130 124 | 
             
                  @name_collection.names_at_rank('subspecies').inject(@species_names){|hsh, n| hsh.merge!(n.name => nil)}
         | 
| 131 125 |  | 
| 132 | 
            -
                   | 
| 133 | 
            -
             | 
| 126 | 
            +
                  str = [ 'BEGIN TRY', 'BEGIN TRANSACTION']
         | 
| 127 | 
            +
                  @manifest.each do |f|
         | 
| 128 | 
            +
                    str << send(f)
         | 
| 129 | 
            +
                  end
         | 
| 130 | 
            +
                  str << ['COMMIT', 'END TRY', 'BEGIN CATCH', 
         | 
| 131 | 
            +
                    'SELECT ERROR_LINE() AS ErrorLine, ERROR_NUMBER() AS ErrorNumber, ERROR_MESSAGE() AS ErrorMessage;', 
         | 
| 132 | 
            +
                    'ROLLBACK', 'END CATCH']  
         | 
| 133 | 
            +
                  write_file('everything.sql', str.join("\n\n"))
         | 
| 134 | 
            +
                  true
         | 
| 135 | 
            +
                end
         | 
| 136 | 
            +
             | 
| 137 | 
            +
                # Deprecated!
         | 
| 138 | 
            +
                # Export only the ref_collection. Sidesteps the main name-centric exports
         | 
| 139 | 
            +
                # Note that this still uses the base @name_collection object as a starting reference,
         | 
| 140 | 
            +
                # it just references @name_collection.ref_collection.  So you can do:
         | 
| 141 | 
            +
                #   nc = Taxonifi::Model::NameCollection.new
         | 
| 142 | 
            +
                #   nc.ref_collection = Taxonifi::Model::RefCollection.new
         | 
| 143 | 
            +
                #   etc.
         | 
| 144 | 
            +
                def export_references(options = {})
         | 
| 145 | 
            +
                  raise Taxonifi::Export::ExportError, 'Method deprecated, alter manifest: to achieve a similar result.'
         | 
| 146 | 
            +
                 #opts = {
         | 
| 147 | 
            +
                 #  :starting_ref_id => 0,
         | 
| 148 | 
            +
                 #  :starting_author_id => 0
         | 
| 149 | 
            +
                 #}
         | 
| 150 | 
            +
             | 
| 151 | 
            +
                 #configure_folders
         | 
| 152 | 
            +
                 #build_author_index 
         | 
| 153 | 
            +
             | 
| 154 | 
            +
                 ## order matters
         | 
| 155 | 
            +
                 #['tblPeople', 'tblRefs', 'tblRefAuthors', 'sqlRefs' ].each do |t|
         | 
| 156 | 
            +
                 #  write_file(t, send(t))
         | 
| 157 | 
            +
                 #end
         | 
| 158 | 
            +
                end
         | 
| 159 | 
            +
             | 
| 160 | 
            +
                # Get's the reference for a name as referenced
         | 
| 161 | 
            +
                # by .related[:link_to_ref_from_row]
         | 
| 162 | 
            +
                def get_ref(name)
         | 
| 163 | 
            +
                  if not name.related[:link_to_ref_from_row].nil?
         | 
| 164 | 
            +
                    return @name_collection.ref_collection.object_from_row(name.related[:link_to_ref_from_row])
         | 
| 134 165 | 
             
                  end
         | 
| 166 | 
            +
                  nil
         | 
| 135 167 | 
             
                end
         | 
| 136 168 |  | 
| 137 169 | 
             
                def tblTaxa
         | 
| 138 170 | 
             
                  @headers = %w{TaxonNameID TaxonNameStr RankID Name Parens AboveID RefID DataFlags AccessCode NameStatus StatusFlags OriginalGenusID LastUpdate ModifiedBy}
         | 
| 139 | 
            -
                   | 
| 140 | 
            -
             | 
| 141 | 
            -
                     | 
| 142 | 
            -
             | 
| 143 | 
            -
             | 
| 144 | 
            -
             | 
| 145 | 
            -
             | 
| 146 | 
            -
             | 
| 147 | 
            -
             | 
| 148 | 
            -
             | 
| 149 | 
            -
             | 
| 150 | 
            -
             | 
| 151 | 
            -
             | 
| 152 | 
            -
             | 
| 153 | 
            -
             | 
| 154 | 
            -
             | 
| 155 | 
            -
             | 
| 156 | 
            -
             | 
| 157 | 
            -
             | 
| 158 | 
            -
                       | 
| 159 | 
            -
             | 
| 160 | 
            -
                     | 
| 171 | 
            +
                  sql = []
         | 
| 172 | 
            +
                  @name_collection.collection.each do |n|
         | 
| 173 | 
            +
                    $DEBUG && $stderr.puts("#{n.name} is too long") if n.name.length > 30
         | 
| 174 | 
            +
                    
         | 
| 175 | 
            +
                    ref = get_ref(n) 
         | 
| 176 | 
            +
                    cols = {
         | 
| 177 | 
            +
                      TaxonNameID: n.id,
         | 
| 178 | 
            +
                      TaxonNameStr: n.parent_ids_sf_style,        # closure -> ends with 1 
         | 
| 179 | 
            +
                      RankID: SPECIES_FILE_RANKS[n.rank], 
         | 
| 180 | 
            +
                      Name: n.name,
         | 
| 181 | 
            +
                      Parens: (n.parens ? 1 : 0),
         | 
| 182 | 
            +
                      AboveID: (n.related_name.nil? ? (n.parent ? n.parent.id : 0) : n.related_name.id),   # !! SF folks like to pre-populate with zeros
         | 
| 183 | 
            +
                      RefID: (ref ? ref.id : 0),
         | 
| 184 | 
            +
                      DataFlags: 0,                                    # see http://software.speciesfile.org/Design/TaxaTables.aspx#Taxon, a flag populated when data is reviewed, initialize to zero
         | 
| 185 | 
            +
                      AccessCode: 0,             
         | 
| 186 | 
            +
                      NameStatus: (n.related_name.nil? ? 0 : 7),                            # 0 :valid, 7: synonym)
         | 
| 187 | 
            +
                      StatusFlags: (n.related_name.nil? ? 0 : 262144),                      # 0 :valid, 262144: jr. synonym
         | 
| 188 | 
            +
                      OriginalGenusID: (!n.parens && n.parent_at_rank('genus') ? n.parent_at_rank('genus').id : 0),      # SF must be pre-configured with 0 filler (this restriction needs to go)                
         | 
| 189 | 
            +
                      LastUpdate: @time, 
         | 
| 190 | 
            +
                      ModifiedBy: @authorized_user_id,
         | 
| 191 | 
            +
                    }
         | 
| 192 | 
            +
                    sql << sql_insert_statement('tblTaxa', cols) 
         | 
| 161 193 | 
             
                  end
         | 
| 162 | 
            -
             | 
| 194 | 
            +
                  sql.join("\n")
         | 
| 163 195 | 
             
                end
         | 
| 164 196 |  | 
| 165 197 | 
             
                # Generate a tblRefs string.
         | 
| 166 198 | 
             
                def tblRefs
         | 
| 167 | 
            -
                   | 
| 168 | 
            -
                  @ | 
| 169 | 
            -
             | 
| 170 | 
            -
                     | 
| 171 | 
            -
             | 
| 172 | 
            -
             | 
| 173 | 
            -
             | 
| 174 | 
            -
             | 
| 175 | 
            -
             | 
| 176 | 
            -
             | 
| 177 | 
            -
                       | 
| 178 | 
            -
                       | 
| 179 | 
            -
             | 
| 199 | 
            +
                  sql = []
         | 
| 200 | 
            +
                  @headers = %w{RefID ActualYear Title PubID Verbatim}
         | 
| 201 | 
            +
                  @name_collection.ref_collection.collection.each_with_index do |r,i|
         | 
| 202 | 
            +
                    # Assumes the 0 "null" pub id is there
         | 
| 203 | 
            +
                    pub_id = @pub_collection[r.publication] ? @pub_collection[r.publication] : 0
         | 
| 204 | 
            +
             | 
| 205 | 
            +
                    cols = {
         | 
| 206 | 
            +
                      RefID: r.id,
         | 
| 207 | 
            +
                      ContainingRefID: 0,
         | 
| 208 | 
            +
                      Title: (r.title.nil? ? @empty_quotes : r.title),
         | 
| 209 | 
            +
                      PubID: pub_id,  
         | 
| 210 | 
            +
                      Series: @empty_quotes,
         | 
| 211 | 
            +
                      Volume: (r.volume ? r.volume : @empty_quotes),
         | 
| 212 | 
            +
                      Issue:  (r.number ? r.number : @empty_quotes),
         | 
| 213 | 
            +
                      RefPages: r.page_string, # always a string
         | 
| 214 | 
            +
                      ActualYear: (r.year ? r.year : @empty_quotes),
         | 
| 215 | 
            +
                      StatedYear: @empty_quotes,
         | 
| 216 | 
            +
                      AccessCode: 0,
         | 
| 217 | 
            +
                      Flags: 0, 
         | 
| 218 | 
            +
                      Note: @empty_quotes,
         | 
| 219 | 
            +
                      LastUpdate: @time,
         | 
| 220 | 
            +
                      LinkID: 0,
         | 
| 221 | 
            +
                      ModifiedBy: @authorized_user_id,
         | 
| 222 | 
            +
                      CiteDataStatus: 0,
         | 
| 223 | 
            +
                      Verbatim: (r.full_citation ? r.full_citation : @empty_quotes)
         | 
| 224 | 
            +
                    }
         | 
| 225 | 
            +
                    sql << sql_insert_statement('tblRefs', cols) 
         | 
| 180 226 | 
             
                  end
         | 
| 181 | 
            -
                   | 
| 227 | 
            +
                  sql.join("\n")
         | 
| 182 228 | 
             
                end
         | 
| 183 229 |  | 
| 184 | 
            -
                #  | 
| 185 | 
            -
                def  | 
| 186 | 
            -
                  sql = [ | 
| 187 | 
            -
                  @headers = %w{ | 
| 188 | 
            -
                   | 
| 230 | 
            +
                # Generate tblPubs SQL
         | 
| 231 | 
            +
                def tblPubs
         | 
| 232 | 
            +
                  sql = []
         | 
| 233 | 
            +
                  @headers = %w{PubID PrefID PubType ShortName FullName Note LastUpdate ModifiedBy Publisher PlacePublished PubRegID Status StartYear EndYear BHL}
         | 
| 234 | 
            +
                  
         | 
| 235 | 
            +
                  # Hackish should build this elsewhere, but degrades OK
         | 
| 236 | 
            +
                  pubs = @name_collection.ref_collection.collection.collect{|r| r.publication}.compact.uniq
         | 
| 237 | 
            +
             | 
| 238 | 
            +
                  pubs.each_with_index do |p, i|
         | 
| 189 239 | 
             
                    cols = {
         | 
| 190 | 
            -
                       | 
| 191 | 
            -
                       | 
| 192 | 
            -
                       | 
| 193 | 
            -
                       | 
| 194 | 
            -
                       | 
| 240 | 
            +
                      PubID: i + 1,
         | 
| 241 | 
            +
                      PrefID: 0,
         | 
| 242 | 
            +
                      PubType: 1,
         | 
| 243 | 
            +
                      ShortName: "unknown_#{i}", # Unique constraint
         | 
| 244 | 
            +
                      FullName: p,
         | 
| 245 | 
            +
                      Note: @empty_quotes,
         | 
| 246 | 
            +
                      LastUpdate: @time, 
         | 
| 247 | 
            +
                      ModifiedBy: @authorized_user_id,
         | 
| 248 | 
            +
                      Publisher: @empty_quotes,
         | 
| 249 | 
            +
                      PlacePublished: @empty_quotes,
         | 
| 250 | 
            +
                      PubRegID: 0,
         | 
| 251 | 
            +
                      Status: 0, 
         | 
| 252 | 
            +
                      StartYear: 0, 
         | 
| 253 | 
            +
                      EndYear: 0, 
         | 
| 254 | 
            +
                      BHL: 0
         | 
| 195 255 | 
             
                    }
         | 
| 196 | 
            -
                     | 
| 256 | 
            +
                    @pub_collection.merge!(p => i + 1)
         | 
| 257 | 
            +
                    sql << sql_insert_statement('tblPubs', cols) 
         | 
| 197 258 | 
             
                  end
         | 
| 198 | 
            -
                  sql | 
| 199 | 
            -
                  sql.join("\n") 
         | 
| 259 | 
            +
                  sql.join("\n")
         | 
| 200 260 | 
             
                end
         | 
| 201 261 |  | 
| 202 262 | 
             
                # Generate tblPeople string.
         | 
| 203 263 | 
             
                def tblPeople
         | 
| 204 264 | 
             
                  @headers = %w{PersonID FamilyName GivenNames GivenInitials Suffix Role LastUpdate ModifiedBy}
         | 
| 205 | 
            -
                   | 
| 206 | 
            -
             | 
| 207 | 
            -
                    @author_index | 
| 208 | 
            -
             | 
| 209 | 
            -
             | 
| 210 | 
            -
                       | 
| 211 | 
            -
             | 
| 212 | 
            -
             | 
| 213 | 
            -
             | 
| 214 | 
            -
             | 
| 215 | 
            -
             | 
| 216 | 
            -
             | 
| 217 | 
            -
             | 
| 218 | 
            -
             | 
| 219 | 
            -
             | 
| 220 | 
            -
                      csv <<  @headers.collect{|h| cols[h.to_sym]} 
         | 
| 221 | 
            -
                    end
         | 
| 265 | 
            +
                  sql = []   
         | 
| 266 | 
            +
                  @author_index.keys.each_with_index do |k,i|
         | 
| 267 | 
            +
                    a = @author_index[k] 
         | 
| 268 | 
            +
                    # a.id = i + 1
         | 
| 269 | 
            +
                    cols = {
         | 
| 270 | 
            +
                      PersonID: a.id,
         | 
| 271 | 
            +
                      FamilyName: (a.last_name.length > 0 ? a.last_name : "Unknown"),
         | 
| 272 | 
            +
                      GivenNames: a.first_name || @empty_quotes,
         | 
| 273 | 
            +
                      GivenInitials: a.initials_string || @empty_quotes,
         | 
| 274 | 
            +
                      Suffix: a.suffix || @empty_quotes,
         | 
| 275 | 
            +
                      Role: 1,                          # authors 
         | 
| 276 | 
            +
                      LastUpdate: @time,
         | 
| 277 | 
            +
                      ModifiedBy: @authorized_user_id
         | 
| 278 | 
            +
                    }
         | 
| 279 | 
            +
                    sql << sql_insert_statement('tblPeople', cols) 
         | 
| 222 280 | 
             
                  end
         | 
| 223 | 
            -
                   | 
| 281 | 
            +
                  sql.join("\n")
         | 
| 224 282 | 
             
                end
         | 
| 225 283 |  | 
| 226 284 | 
             
                # Generate tblRefAuthors string.
         | 
| 227 285 | 
             
                def tblRefAuthors 
         | 
| 228 286 | 
             
                  @headers = %w{RefID PersonID SeqNum AuthorCount LastUpdate ModifiedBy}
         | 
| 229 | 
            -
                   | 
| 230 | 
            -
             | 
| 231 | 
            -
                     | 
| 232 | 
            -
                       | 
| 233 | 
            -
             | 
| 234 | 
            -
                         | 
| 235 | 
            -
             | 
| 236 | 
            -
             | 
| 237 | 
            -
             | 
| 238 | 
            -
             | 
| 239 | 
            -
             | 
| 240 | 
            -
             | 
| 241 | 
            -
             | 
| 242 | 
            -
                        csv <<  @headers.collect{|h| cols[h.to_sym]} 
         | 
| 243 | 
            -
                      end
         | 
| 287 | 
            +
                  sql = []
         | 
| 288 | 
            +
                  @name_collection.ref_collection.collection.each do |r| 
         | 
| 289 | 
            +
                    r.authors.each_with_index do |x, i|
         | 
| 290 | 
            +
                      a = @author_index[x.compact_string] 
         | 
| 291 | 
            +
                      cols = {
         | 
| 292 | 
            +
                        RefID: r.id,
         | 
| 293 | 
            +
                        PersonID: a.id,
         | 
| 294 | 
            +
                        SeqNum: i + 1,
         | 
| 295 | 
            +
                        AuthorCount: r.authors.size,
         | 
| 296 | 
            +
                        LastUpdate: @time,
         | 
| 297 | 
            +
                        ModifiedBy: @authorized_user_id
         | 
| 298 | 
            +
                      }
         | 
| 299 | 
            +
                      sql << sql_insert_statement('tblRefAuthors', cols) 
         | 
| 244 300 | 
             
                    end
         | 
| 245 301 | 
             
                  end
         | 
| 246 | 
            -
                   | 
| 302 | 
            +
                  sql.join("\n")
         | 
| 247 303 | 
             
                end
         | 
| 248 304 |  | 
| 249 305 | 
             
                # Generate tblCites string.
         | 
| 250 306 | 
             
                def tblCites
         | 
| 251 307 | 
             
                  @headers = %w{TaxonNameID SeqNum RefID NomenclatorID LastUpdate ModifiedBy NewNameStatus CitePages Note TypeClarification CurrentConcept ConceptChange InfoFlags InfoFlagStatus PolynomialStatus}
         | 
| 252 | 
            -
                   | 
| 253 | 
            -
             | 
| 254 | 
            -
             | 
| 255 | 
            -
             | 
| 256 | 
            -
             | 
| 257 | 
            -
             | 
| 258 | 
            -
             | 
| 259 | 
            -
             | 
| 260 | 
            -
             | 
| 261 | 
            -
             | 
| 262 | 
            -
                         | 
| 263 | 
            -
             | 
| 264 | 
            -
             | 
| 265 | 
            -
             | 
| 266 | 
            -
             | 
| 267 | 
            -
             | 
| 268 | 
            -
             | 
| 269 | 
            -
             | 
| 270 | 
            -
             | 
| 271 | 
            -
             | 
| 272 | 
            -
             | 
| 273 | 
            -
                       | 
| 274 | 
            -
                       | 
| 275 | 
            -
             | 
| 308 | 
            +
                  sql = []
         | 
| 309 | 
            +
                 
         | 
| 310 | 
            +
                  @name_collection.collection.each do |n|
         | 
| 311 | 
            +
                    next if @nomenclator[n.nomenclator_name].nil? # Only create nomenclator records if they are original citations, otherwise not !! Might need updating in future imports
         | 
| 312 | 
            +
                    ref = get_ref(n)
         | 
| 313 | 
            +
             | 
| 314 | 
            +
                    # ref = @by_author_reference_index[n.author_year_index]
         | 
| 315 | 
            +
                    next if ref.nil?
         | 
| 316 | 
            +
                    cols = {
         | 
| 317 | 
            +
                      TaxonNameID:       n.id,
         | 
| 318 | 
            +
                      SeqNum:            1,
         | 
| 319 | 
            +
                      RefID:             ref.id,
         | 
| 320 | 
            +
                      NomenclatorID:     @nomenclator[n.nomenclator_name], 
         | 
| 321 | 
            +
                      LastUpdate:        @time, 
         | 
| 322 | 
            +
                      ModifiedBy:        @authorized_user_id,
         | 
| 323 | 
            +
                      CitePages:         @empty_quotes,        # equates to "" in CSV speak
         | 
| 324 | 
            +
                      NewNameStatus:     0,
         | 
| 325 | 
            +
                      Note:              @empty_quotes,
         | 
| 326 | 
            +
                      TypeClarification: 0,     # We might derive more data from this
         | 
| 327 | 
            +
                      CurrentConcept:    1,        # Boolean, right?
         | 
| 328 | 
            +
                      ConceptChange:     0,         # Unspecified
         | 
| 329 | 
            +
                      InfoFlags:         0,             # 
         | 
| 330 | 
            +
                      InfoFlagStatus:    1,        # 1 => needs review
         | 
| 331 | 
            +
                      PolynomialStatus:  0
         | 
| 332 | 
            +
                    }
         | 
| 333 | 
            +
                    sql << sql_insert_statement('tblCites', cols) 
         | 
| 276 334 | 
             
                  end
         | 
| 277 | 
            -
                   | 
| 335 | 
            +
                  sql.join("\n")
         | 
| 278 336 | 
             
                end
         | 
| 279 337 |  | 
| 280 338 | 
             
                def tblGenusNames
         | 
| 281 | 
            -
                   | 
| 282 | 
            -
                   | 
| 339 | 
            +
                  # TODO: SF tests catch unused names based on some names not being included in Nomeclator data.  We could optimize so that the work around is removed.
         | 
| 340 | 
            +
                  # I.e., all the names get added here, not all the names get added to Nomclator/Cites because of citations which are not original combinations
         | 
| 341 | 
            +
                  sql = sql_for_genus_and_species_names_tables('Genus')
         | 
| 342 | 
            +
                  sql 
         | 
| 283 343 | 
             
                end
         | 
| 284 344 |  | 
| 285 345 | 
             
                def tblSpeciesNames
         | 
| 286 | 
            -
                   | 
| 287 | 
            -
                   | 
| 346 | 
            +
                  # TODO: SF tests catch unused names based on some names not being included in Nomeclator data.  We could optimize so that the work around is removed.
         | 
| 347 | 
            +
                  # I.e., all the names get added here, not all the names get added to Nomclator/Cites because of citations which are not original combinations
         | 
| 348 | 
            +
                  sql = sql_for_genus_and_species_names_tables('Species')
         | 
| 349 | 
            +
                  sql 
         | 
| 288 350 | 
             
                end
         | 
| 289 351 |  | 
| 290 | 
            -
                def  | 
| 352 | 
            +
                def sql_for_genus_and_species_names_tables(type)
         | 
| 353 | 
            +
                  sql = []
         | 
| 291 354 | 
             
                  col = "#{type}NameID"
         | 
| 292 355 | 
             
                  @headers = [col, "Name", "LastUpdate", "ModifiedBy", "Italicize"]
         | 
| 293 | 
            -
                   | 
| 294 | 
            -
             | 
| 295 | 
            -
                    var =  | 
| 296 | 
            -
                     | 
| 297 | 
            -
                       | 
| 298 | 
            -
                       | 
| 299 | 
            -
             | 
| 300 | 
            -
             | 
| 301 | 
            -
             | 
| 302 | 
            -
             | 
| 303 | 
            -
             | 
| 304 | 
            -
                      }
         | 
| 305 | 
            -
                      csv <<  @headers.collect{|h| cols[h.to_sym]} 
         | 
| 306 | 
            -
                    end
         | 
| 356 | 
            +
                  var = self.send("#{type.downcase}_names")
         | 
| 357 | 
            +
                  var.keys.each_with_index do |n,i|
         | 
| 358 | 
            +
                    var[n] = i + 1
         | 
| 359 | 
            +
                    cols = {
         | 
| 360 | 
            +
                      col.to_sym => i + 1,
         | 
| 361 | 
            +
                      Name: n,
         | 
| 362 | 
            +
                      LastUpdate: @time, 
         | 
| 363 | 
            +
                      ModifiedBy: @authorized_user_id,
         | 
| 364 | 
            +
                      Italicize: 1                              # always true for these data
         | 
| 365 | 
            +
                    }
         | 
| 366 | 
            +
                    sql << sql_insert_statement("tbl#{type}Names", cols) 
         | 
| 307 367 | 
             
                  end
         | 
| 308 | 
            -
                   | 
| 368 | 
            +
                  sql.join("\n")
         | 
| 309 369 | 
             
                end
         | 
| 310 370 |  | 
| 311 | 
            -
                #  | 
| 371 | 
            +
                # Must be called post tblGenusNames and tblSpeciesNames.
         | 
| 372 | 
            +
                # Some records are not used but can be cleaned by SF 
         | 
| 312 373 | 
             
                def tblNomenclator
         | 
| 313 374 | 
             
                  @headers = %w{NomenclatorID GenusNameID SubgenusNameID SpeciesNameID SubspeciesNameID LastUpdate ModifiedBy SuitableForGenus SuitableForSpecies InfrasubspeciesNameID InfrasubKind}
         | 
| 314 | 
            -
                   | 
| 315 | 
            -
             | 
| 316 | 
            -
             | 
| 317 | 
            -
                     | 
| 318 | 
            -
             | 
| 319 | 
            -
             | 
| 320 | 
            -
             | 
| 321 | 
            -
             | 
| 322 | 
            -
             | 
| 323 | 
            -
             | 
| 324 | 
            -
             | 
| 325 | 
            -
             | 
| 326 | 
            -
             | 
| 327 | 
            -
             | 
| 328 | 
            -
             | 
| 329 | 
            -
             | 
| 330 | 
            -
             | 
| 331 | 
            -
             | 
| 332 | 
            -
             | 
| 333 | 
            -
             | 
| 334 | 
            -
                       | 
| 335 | 
            -
             | 
| 375 | 
            +
                  sql = []   
         | 
| 376 | 
            +
                  i = 1
         | 
| 377 | 
            +
                  @name_collection.collection.each do |n|
         | 
| 378 | 
            +
                    gid, sgid = 0,0
         | 
| 379 | 
            +
                    sid = @species_names[n.parent_name_at_rank('species')] || 0
         | 
| 380 | 
            +
                    ssid = @species_names[n.parent_name_at_rank('subspecies')] || 0
         | 
| 381 | 
            +
             | 
| 382 | 
            +
                    if n.parens == false
         | 
| 383 | 
            +
                      gid = @genus_names[n.parent_name_at_rank('genus')] || 0
         | 
| 384 | 
            +
                      sgid = @genus_names[n.parent_name_at_rank('subgenus')] || 0
         | 
| 385 | 
            +
                    end 
         | 
| 386 | 
            +
             | 
| 387 | 
            +
                    next if Taxonifi::RANKS.index(n.rank) < Taxonifi::RANKS.index('subtribe')
         | 
| 388 | 
            +
             | 
| 389 | 
            +
                    ref = get_ref(n)  
         | 
| 390 | 
            +
                    # debugger
         | 
| 391 | 
            +
                    # ref = @by_author_reference_index[n.author_year_index]
         | 
| 392 | 
            +
             | 
| 393 | 
            +
                    next if ref.nil?
         | 
| 394 | 
            +
                    cols = {
         | 
| 395 | 
            +
                      NomenclatorID: i,
         | 
| 396 | 
            +
                      GenusNameID: gid,
         | 
| 397 | 
            +
                      SubgenusNameID: sgid, 
         | 
| 398 | 
            +
                      SpeciesNameID: sid, 
         | 
| 399 | 
            +
                      SubspeciesNameID: ssid,
         | 
| 400 | 
            +
                      InfrasubspeciesNameID: 0,
         | 
| 401 | 
            +
                      InfrasubKind: 0,                          # this might be wrong
         | 
| 402 | 
            +
                      LastUpdate: @time,  
         | 
| 403 | 
            +
                      ModifiedBy: @authorized_user_id, 
         | 
| 404 | 
            +
                      SuitableForGenus: 0,                      # Set in SF 
         | 
| 405 | 
            +
                      SuitableForSpecies: 0                     # Set in SF
         | 
| 406 | 
            +
                    }
         | 
| 407 | 
            +
                    @nomenclator.merge!(n.nomenclator_name => i)
         | 
| 408 | 
            +
                    i += 1
         | 
| 409 | 
            +
             | 
| 410 | 
            +
                    sql << sql_insert_statement('tblNomenclator', cols) 
         | 
| 411 | 
            +
                  end
         | 
| 412 | 
            +
             | 
| 413 | 
            +
                  # TODO: DRY this up with above?!
         | 
| 414 | 
            +
                  @name_collection.combinations.each do |c|
         | 
| 415 | 
            +
                    gid, sgid = 0,0
         | 
| 416 | 
            +
                    sid = (c[2].nil? ? 0 : @species_names[c[2].name])
         | 
| 417 | 
            +
                    ssid = (c[3].nil? ? 0 : @species_names[c[3].name])
         | 
| 418 | 
            +
             | 
| 419 | 
            +
                    if c.compact.last.parens == false
         | 
| 420 | 
            +
                      gid = (c[0].nil? ? 0 : @genus_names[c[0].name])
         | 
| 421 | 
            +
                      sgid = (c[1].nil? ? 0 : @genus_names[c[1].name])
         | 
| 422 | 
            +
                    end 
         | 
| 423 | 
            +
             | 
| 424 | 
            +
                    # ref = @by_author_reference_index[c.compact.last.author_year_index]
         | 
| 425 | 
            +
                    ref =  @name_collection.ref_collection.object_from_row(c.compact.last.related[:link_to_ref_from_row]) 
         | 
| 426 | 
            +
             | 
| 427 | 
            +
                    next if ref.nil?
         | 
| 428 | 
            +
             | 
| 429 | 
            +
                    cols = {
         | 
| 430 | 
            +
                      NomenclatorID: i,
         | 
| 431 | 
            +
                      GenusNameID: gid ,
         | 
| 432 | 
            +
                      SubgenusNameID: sgid ,
         | 
| 433 | 
            +
                      SpeciesNameID: sid ,
         | 
| 434 | 
            +
                      SubspeciesNameID: ssid ,
         | 
| 435 | 
            +
                      InfrasubspeciesNameID: 0,
         | 
| 436 | 
            +
                      InfrasubKind: 0,                          # this might be wrong
         | 
| 437 | 
            +
                      LastUpdate: @time,  
         | 
| 438 | 
            +
                      ModifiedBy: @authorized_user_id, 
         | 
| 439 | 
            +
                      SuitableForGenus: 0,                      # Set in SF 
         | 
| 440 | 
            +
                      SuitableForSpecies: 0                     # Set in SF
         | 
| 441 | 
            +
                    }
         | 
| 442 | 
            +
                    # check!?
         | 
| 443 | 
            +
                    @nomenclator.merge!(c.compact.last.nomenclator_name => i)
         | 
| 444 | 
            +
                    sql << sql_insert_statement('tblNomenclator', cols) 
         | 
| 445 | 
            +
                    i += 1
         | 
| 336 446 | 
             
                  end
         | 
| 337 | 
            -
                   | 
| 447 | 
            +
                  sql.join("\n")
         | 
| 338 448 | 
             
                end
         | 
| 339 449 |  | 
| 340 | 
            -
              end
         | 
| 341 | 
            -
            end
         | 
| 450 | 
            +
              end # End class
         | 
| 451 | 
            +
            end # End module
         |