RubyGems - dwca_hunter - Versions diffs - 0.7.1 → 0.7.2 - Mend

dwca_hunter 0.7.1 → 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

checksums.yaml +4 -4
data/.rubocop.yml +9 -1
data/.ruby-version +1 -1
data/Gemfile.lock +56 -27
data/dwca_hunter.gemspec +11 -9
data/exe/dwcahunter +0 -2
data/lib/dwca_hunter.rb +9 -7
data/lib/dwca_hunter/resource.rb +8 -3
data/lib/dwca_hunter/resources/arctos.rb +42 -45
data/lib/dwca_hunter/resources/ioc_word_bird.rb +105 -105
data/lib/dwca_hunter/resources/mammal_divdb.rb +76 -45
data/lib/dwca_hunter/resources/mcz.rb +1 -1
data/lib/dwca_hunter/resources/wikispecies.rb +65 -98
data/lib/dwca_hunter/version.rb +1 -1
metadata +48 -20

data/lib/dwca_hunter/resources/ioc_word_bird.rb CHANGED Viewed

@@ -3,14 +3,14 @@
 module DwcaHunter
   class ResourceIOCWorldBird < DwcaHunter::Resource
     def initialize(opts = {})
-      @command = "ioc-world-bird"
-      @title = "IOC World Bird List"
-      @url = "https://uofi.box.com/shared/static/fbpuk5ghh9083nbzjdeyqtoqsvdnro01.csv"
-      @UUID = "6421ffec-38e3-40fb-a6d9-af27238a47a1"
+      @command = 'ioc-world-bird'
+      @title = 'IOC World Bird List'
+      @url = 'https://uofi.box.com/shared/static/znsd734a78saq87hes979p5uspgkzy93.csv'
+      @UUID = '6421ffec-38e3-40fb-a6d9-af27238a47a1'
       @download_path = File.join(Dir.tmpdir,
-                                 "dwca_hunter",
-                                 "ioc-bird",
-                                 "data.csv")
+                                 'dwca_hunter',
+                                 'ioc-bird',
+                                 'data.csv')
       @synonyms = []
       @names = []
       @vernaculars = []
@@ -21,17 +21,17 @@ module DwcaHunter
     end
     def download
-      puts "Downloading cached and converted to csv version."
-      puts "CHECK FOR NEW VERSION at"
-      puts "https://www.worldbirdnames.org/ioc-lists/master-list-2/"
-      puts "Use libreoffice to convert to csv."
+      puts 'Downloading cached and converted to csv version.'
+      puts 'CHECK FOR NEW VERSION at'
+      puts 'https://www.worldbirdnames.org/ioc-lists/master-list-2/'
+      puts 'Use libreoffice to convert to csv.'
       `curl -s -L #{@url} -o #{@download_path}`
     end
     def unpack; end
     def make_dwca
-      DwcaHunter.logger_write(object_id, "Extracting data")
+      DwcaHunter.logger_write(object_id, 'Extracting data')
       get_names
       generate_dwca
     end
@@ -45,84 +45,84 @@ module DwcaHunter
     def collect_names
       @names_index = {}
-      file = CSV.open(File.join(@download_dir, "data.csv"),
+      file = CSV.open(File.join(@download_dir, 'data.csv'),
                       headers: true)
-      order = ""
-      family = ""
-      genus = ""
-      species = ""
+      order = ''
+      family = ''
+      genus = ''
+      species = ''
       count = 0
       file.each do |row|
-        order1 = row["Order"]
-        order = order1.capitalize if order1.to_s != ""
+        order1 = row['Order']
+        order = order1.capitalize if order1.to_s != ''
-        family1 = row["Family (Scientific)"]
-        family = family1.capitalize if family1.to_s != ""
+        family1 = row['Family (Scientific)']
+        family = family1.capitalize if family1.to_s != ''
-        genus1 = row["Genus"]
-        genus = genus1.capitalize if genus1.to_s != ""
+        genus1 = row['Genus']
+        genus = genus1.capitalize if genus1.to_s != ''
-        species1 = row["Species (Scientific)"]
-        species = species1 if species1.to_s != ""
+        species1 = row['Species (Scientific)']
+        species = species1 if species1.to_s != ''
-        subspecies = row["Subspecies"]
-        next if species.to_s == ""
+        subspecies = row['Subspecies']
+        next if species.to_s == ''
         count += 1
         taxon_id = "gn_#{count}"
         name = {
           taxon_id: taxon_id,
-          kingdom: "Animalia",
-          phylum: "Chordata",
-          klass: "Aves",
+          kingdom: 'Animalia',
+          phylum: 'Chordata',
+          klass: 'Aves',
           order: order,
           family: family,
           genus: genus,
-          code: "ICZN"
+          code: 'ICZN'
         }
-        if subspecies.to_s == ""
-          auth = row["Authority"].to_s
-          auth = DwcaHunter.normalize_authors(auth) if auth != ""
+        if subspecies.to_s == ''
+          auth = row['Authority'].to_s
+          auth = DwcaHunter.normalize_authors(auth) if auth != ''
           name[:name_string] = clean(
-            "#{genus} #{species} #{auth}".
-            strip
+            "#{genus} #{species} #{auth}"
+            .strip
           )
           @names << name
-          vernacular = row["Species (English)"]
-          if vernacular.to_s != ""
-            vernaclar = { taxon_id: taxon_id, vern: vernacular, lang: "en" }
+          vernacular = row['Species (English)']
+          if vernacular.to_s != ''
+            vernaclar = { taxon_id: taxon_id, vern: vernacular, lang: 'en' }
             @vernaculars << vernaclar
           end
-          species = ""
+          species = ''
         else
           name[:name_string] = clean(
-            "#{genus} #{species} #{subspecies} #{row['Authority']}".
-            strip
+            "#{genus} #{species} #{subspecies} #{row['Authority']}"
+            .strip
           )
           @names << name
-          species = ""
-          subspecies = ""
+          species = ''
+          subspecies = ''
         end
       end
     end
     def clean(n)
-      n = n.gsub(/†/, "")
-      n.gsub(/\s+/, " ")
+      n = n.gsub(/†/, '')
+      n.gsub(/\s+/, ' ')
     end
     def generate_dwca
       DwcaHunter.logger_write(object_id,
-                              "Creating DarwinCore Archive file")
-      @core = [["http://rs.tdwg.org/dwc/terms/taxonID",
-                "http://rs.tdwg.org/dwc/terms/scientificName",
-                "http://rs.tdwg.org/dwc/terms/kingdom",
-                "http://rs.tdwg.org/dwc/terms/phylum",
-                "http://rs.tdwg.org/dwc/terms/class",
-                "http://rs.tdwg.org/dwc/terms/order",
-                "http://rs.tdwg.org/dwc/terms/family",
-                "http://rs.tdwg.org/dwc/terms/genus",
-                "http://rs.tdwg.org/dwc/terms/nomenclaturalCode"]]
+                              'Creating DarwinCore Archive file')
+      @core = [['http://rs.tdwg.org/dwc/terms/taxonID',
+                'http://rs.tdwg.org/dwc/terms/scientificName',
+                'http://rs.tdwg.org/dwc/terms/kingdom',
+                'http://rs.tdwg.org/dwc/terms/phylum',
+                'http://rs.tdwg.org/dwc/terms/class',
+                'http://rs.tdwg.org/dwc/terms/order',
+                'http://rs.tdwg.org/dwc/terms/family',
+                'http://rs.tdwg.org/dwc/terms/genus',
+                'http://rs.tdwg.org/dwc/terms/nomenclaturalCode']]
       @names.each do |n|
         @core << [n[:taxon_id], n[:name_string],
                   n[:kingdom], n[:phylum], n[:klass], n[:order], n[:family],
@@ -130,12 +130,12 @@ module DwcaHunter
       end
       @extensions << {
         data: [[
-          "http://rs.tdwg.org/dwc/terms/taxonID",
-          "http://rs.tdwg.org/dwc/terms/vernacularName",
-          "http://purl.org/dc/terms/language"
+          'http://rs.tdwg.org/dwc/terms/taxonID',
+          'http://rs.tdwg.org/dwc/terms/vernacularName',
+          'http://purl.org/dc/terms/language'
         ]],
-        file_name: "vernacular_names.txt",
-        row_type: "http://rs.gbif.org/terms/1.0/VernacularName"
+        file_name: 'vernacular_names.txt',
+        row_type: 'http://rs.gbif.org/terms/1.0/VernacularName'
       }
       @vernaculars.each do |v|
@@ -146,53 +146,53 @@ module DwcaHunter
         id: @uuid,
         title: @title,
         authors: [
-          { first_name: "Per",
-            last_name: "Alstrom" },
-          { first_name: "Mike",
-            last_name: "Blair" },
-          { first_name: "Rauri",
-            last_name: "Bowie" },
-          { first_name: "Nigel",
-            last_name: "Redman" },
-          { first_name: "Jon",
-            last_name: "Fjeldsa" },
-          { first_name: "Phil",
-            last_name: "Gregory" },
-          { first_name: "Leo",
-            last_name: "Joseph" },
-          { first_name: "Peter",
-            last_name: "Kovalik" },
-          { first_name: "Adolfo",
-            last_name: "Navarro-Siguenza" },
-          { first_name: "David",
-            last_name: "Parkin" },
-          { first_name: "Alan",
-            last_name: "Peterson" },
-          { first_name: "Douglas",
-            last_name: "Pratt" },
-          { first_name: "Pam",
-            last_name: "Rasmussen" },
-          { first_name: "Frank",
-            last_name: "Rheindt" },
-          { first_name: "Robert",
-            last_name: "Ridgely" },
-          { first_name: "Peter",
-            last_name: "Ryan" },
-          { first_name: "George",
-            last_name: "Sangster" },
-          { first_name: "Dick",
-            last_name: "Schodde" },
-          { first_name: "Minturn",
-            last_name: "Wright" }
+          { first_name: 'Per',
+            last_name: 'Alstrom' },
+          { first_name: 'Mike',
+            last_name: 'Blair' },
+          { first_name: 'Rauri',
+            last_name: 'Bowie' },
+          { first_name: 'Nigel',
+            last_name: 'Redman' },
+          { first_name: 'Jon',
+            last_name: 'Fjeldsa' },
+          { first_name: 'Phil',
+            last_name: 'Gregory' },
+          { first_name: 'Leo',
+            last_name: 'Joseph' },
+          { first_name: 'Peter',
+            last_name: 'Kovalik' },
+          { first_name: 'Adolfo',
+            last_name: 'Navarro-Siguenza' },
+          { first_name: 'David',
+            last_name: 'Parkin' },
+          { first_name: 'Alan',
+            last_name: 'Peterson' },
+          { first_name: 'Douglas',
+            last_name: 'Pratt' },
+          { first_name: 'Pam',
+            last_name: 'Rasmussen' },
+          { first_name: 'Frank',
+            last_name: 'Rheindt' },
+          { first_name: 'Robert',
+            last_name: 'Ridgely' },
+          { first_name: 'Peter',
+            last_name: 'Ryan' },
+          { first_name: 'George',
+            last_name: 'Sangster' },
+          { first_name: 'Dick',
+            last_name: 'Schodde' },
+          { first_name: 'Minturn',
+            last_name: 'Wright' }
         ],
         metadata_providers: [
-          { first_name: "Dmitry",
-            last_name: "Mozzherin",
-            email: "dmozzherin@gmail.com" }
+          { first_name: 'Dmitry',
+            last_name: 'Mozzherin',
+            email: 'dmozzherin@gmail.com' }
         ],
-        abstract: "The IOC World Bird List is an open access resource of " \
-                  "the international community of ornithologists.",
-        url: "https://www.worldbirdnames.org"
+        abstract: 'The IOC World Bird List is an open access resource of ' \
+                  'the international community of ornithologists.',
+        url: 'https://www.worldbirdnames.org'
       }
       super
     end

data/lib/dwca_hunter/resources/mammal_divdb.rb CHANGED Viewed

@@ -5,12 +5,12 @@ module DwcaHunter
     def initialize(opts = {})
       @command = "mammal-div-db"
       @title = "ASM Mammal Diversity Database"
-      @url = "https://mammaldiversity.org/species-account/api.php?q=*"
+      @url = "https://www.mammaldiversity.org/assets/data/MDD.zip"
       @UUID = "94270cdd-5424-4bb1-8324-46ccc5386dc7"
       @download_path = File.join(Dir.tmpdir,
                                  "dwca_hunter",
                                  "mammal-div-db",
-                                 "data.json")
+                                 "data.zip")
       @synonyms = []
       @names = []
       @vernaculars = []
@@ -25,7 +25,9 @@ module DwcaHunter
       `curl '#{@url}' -H 'User-Agent:' -o #{@download_path}`
     end
-    def unpack; end
+    def unpack
+      unpack_zip
+    end
     def make_dwca
       DwcaHunter.logger_write(object_id, "Extracting data")
@@ -40,49 +42,78 @@ module DwcaHunter
       collect_names
     end
+    def find_csv_file
+      Dir.chdir(@download_dir)
+      Dir.entries(".").each do |f|
+        return f if f[-4..-1] == ".csv"
+      end
+    end
+    def assemble_name(row)
+      name = row["sciName"].gsub("_", " ")
+      auth = "#{row['authoritySpeciesAuthor']} #{row['aurhoritySpeciesYear']}".
+        strip
+      auth = "(#{auth})" if row["authorityParentheses"] == 1
+      rank = "species"
+      rank = "subspecies" if (name.split(" ").size > 2)
+      name = "#{name} #{auth}".strip
+      [rank, name]
+    end
+    def assemble_synonym(row)
+      name = row["originalNameCombination"].gsub("_", " ")
+      auth = "#{row['authoritySpeciesAuthor']} #{row['aurhoritySpeciesYear']}".
+        strip
+      name = "#{name} #{auth}".strip
+      { taxon_id: row["id"], name_string: name, status: "synonym" }
+    end
+    def vernaculars(row)
+      id = row["id"]
+      res = []
+      vern = row["mainCommonName"].to_s
+      res << vern  if vern != ""
+      verns = row["otherCommonNames"].to_s
+      if verns != ""
+        verns = verns.split("|")
+        res += verns
+      end
+      res.map do |v|
+        { taxon_id: id, vern: v, lang: "en" }
+      end
+    end
     def collect_names
       @names_index = {}
-      decoder = HTMLEntities.new
-      data = File.read(File.join(@download_dir, "data.json"))
-      data = JSON.parse(data, symbolize_names: true)
-      data[:result].each_with_index do |e, _i|
-        e = e[1]
-        order = e[:dwc][:order].capitalize
-        order = nil if order.match(/incertae/)
-        family = e[:dwc][:family].capitalize
-        family = nil if family.match(/incertae/)
-        genus = e[:dwc][:genus].capitalize
-        genus = nil if genus.match(/incertae/)
-        name = {
-          taxon_id: e[:internal_id],
+      file = CSV.open(File.join(@download_dir, find_csv_file),
+                      headers: true)
+      file.each do |row|
+        order = row["order"].to_s.capitalize
+        order = nil if order.match(/incertae/) || order.empty?
+        family = row["family"].to_s.capitalize
+        family = nil if family.match(/incertae/) || family.empty?
+        genus = row["genus"].to_s.capitalize
+        genus = nil if genus.match(/incertae/) || genus.empty?
+        rank, name_string = assemble_name(row)
+        @names << {
+          taxon_id: row["id"],
           kingdom: "Animalia",
           phylum: "Chordata",
           klass: "Mammalia",
           order: order,
           family: family,
           genus: genus,
-          name_string: "#{e[:dwc][:scientificName]} " \
-          "#{e[:dwc][:scientificNameAuthorship][:species]}".strip,
-          rank: e[:dwc][:taxonRank],
-          status: e[:dwc][:taxonRank],
+          name_string: name_string,
+          rank: rank,
           code: "ICZN"
         }
-        if e[:dwc][:taxonomicStatus] == "accepted"
-          @names << name
-        else
-          @synonyms << name
+        if row["originalNameCombination"].to_s != ""
+          @synonyms << assemble_synonym(row)
+        end
+        vernaculars(row).each do |vern|
+          @vernaculars << vern
         end
-        vern = e[:dwc][:vernacularName]
-        next unless vern.to_s != ""
-        vern = decoder.decode(vern)
-        vernacular = {
-          taxon_id: e[:id],
-          vern: vern,
-          lang: "en"
-        }
-        @vernaculars << vernacular
       end
-      puts data[:result].size
     end
     def generate_dwca
@@ -96,11 +127,12 @@ module DwcaHunter
                 "http://rs.tdwg.org/dwc/terms/order",
                 "http://rs.tdwg.org/dwc/terms/family",
                 "http://rs.tdwg.org/dwc/terms/genus",
+                "http://rs.tdwg.org/dwc/terms/taxonRank",
                 "http://rs.tdwg.org/dwc/terms/nomenclaturalCode"]]
       @names.each do |n|
         @core << [n[:taxon_id], n[:name_string],
                   n[:kingdom], n[:phylum], n[:klass], n[:order], n[:family],
-                  n[:genus], n[:code]]
+                  n[:genus], n[:rank], n[:code]]
       end
       @extensions << {
         data: [[
@@ -133,23 +165,22 @@ module DwcaHunter
         authors: [
           { first_name: "C. J.",
             last_name: "Burgin" },
-          { first_name: "J. P.",
-            last_name: "Colella" },
-          { first_name: "P. L.",
-            last_name: "Kahn" },
-          { first_name: "N. S.",
-            last_name: "Upham" }
+            { first_name: "J. P.",
+              last_name: "Colella" },
+              { first_name: "P. L.",
+                last_name: "Kahn" },
+                { first_name: "N. S.",
+                  last_name: "Upham" }
         ],
         metadata_providers: [
           { first_name: "Dmitry",
             last_name: "Mozzherin",
             email: "dmozzherin@gmail.com" }
         ],
-        abstract: "Mammal Diversity Database. 2020. www.mammaldiversity.org. " \
-        "American Society of Mammalogists. Accessed 2020-05-24 .",
-        url: @url
+        abstract: "Mammal Diversity Database. 2021. www.mammaldiversity.org. " \
+        "American Society of Mammalogists. Accessed 2021-01-28.", url: @url
       }
       super
+      end
     end
   end
-end