RubyGems - dwca_hunter - Versions diffs - 0.5.3 → 0.5.4 - Mend

dwca_hunter 0.5.3 → 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

checksums.yaml +4 -4
data/.ruby-version +1 -1
data/Gemfile.lock +2 -2
data/lib/dwca_hunter/resources/arctos.rb +56 -85
data/lib/dwca_hunter/version.rb +1 -1
metadata +3 -4

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: c3fbae125e5aa2c8891a3fa045eb3c628c7c53b7b3957a5b0f8153e6d6b3cbbb
-  data.tar.gz: 71f2c5579faf193f9a2b81378b2a437c5e0b3a4f4152fa8496bc0233ed27f225
+  metadata.gz: 6c7057b88df4f16a74e5818f1f9966183968844624143c68d43715c8569adb4c
+  data.tar.gz: 47b5a3b28b22a18fb8dff65a095775e7535f098d7cebcc60a199b82d02f8b9dc
 SHA512:
-  metadata.gz: 371304bc1e3a0c5b2862b4213e494f713b27895237d5226430001a98b17c122b5924ad815c9b8c3e164d19ae22997e2c955e8812600c230c011bed484d1b4bd2
-  data.tar.gz: 0f97ed3b3230161bf03ad0976785eaded15e500fd2fa8443c85144a90d6439faaa1c33eaa5aa0aad4355aef8882ade0c661d1ee3f25144f82436926f9e3581a7
+  metadata.gz: b780161f3c024dfe6155028fc71c8762e83a95f8dd0f9158d5d387f0cbb77cd6525d5abb5137d8d73ed42093ccae897e38da8e8d2a13bf5b10bec1fae9f68424
+  data.tar.gz: 3c8cfa6603b2cc8bac0766568168ed6016e3d509e62e88b47704c4b0e2662f332de230da4cbe1c8ef0cb1467ce5b6e987260875e9b160efde1e2cff169cae263

data/.ruby-version CHANGED Viewed

	@@ -1 +1 @@
1	- 2.5.3
1	+ 2.6.4

data/Gemfile.lock CHANGED Viewed

@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    dwca_hunter (0.5.2)
+    dwca_hunter (0.5.4)
       biodiversity (~> 3.5)
       dwc-archive (~> 1.0)
       gn_uuid (~> 0.5)
@@ -137,4 +137,4 @@ DEPENDENCIES
   solargraph (~> 0.23)
 BUNDLED WITH
-   2.0.1
+   2.0.2

data/lib/dwca_hunter/resources/arctos.rb CHANGED Viewed

@@ -5,7 +5,7 @@ module DwcaHunter
     def initialize(opts = {})
       @command = 'arctos'
       @title = 'Arctos'
-      @url = 'https://www.dropbox.com/s/jo44d1vd9bkdwm8/arctos.zip?dl=1'
+      @url = 'https://www.dropbox.com/s/3rmny5d8cfm9mmp/arctos.tar.gz?dl=1'
       @UUID =  'eea8315d-a244-4625-859a-226675622312'
       @download_path = File.join(Dir.tmpdir,
                                  'dwca_hunter',
@@ -15,6 +15,8 @@ module DwcaHunter
       @names = []
       @vernaculars = []
       @extensions = []
+      @synonyms_hash = {}
+      @vernaculars_hash = {}
       super(opts)
     end
@@ -24,7 +26,7 @@ module DwcaHunter
     end
     def unpack
-      unpack_zip
+      unpack_tar
     end
     def make_dwca
@@ -37,92 +39,72 @@ module DwcaHunter
     def get_names
       Dir.chdir(@download_dir)
-      Dir.entries(@download_dir).grep(/zip$/).each do |file|
-        self.class.unzip(file) unless File.exists?(file.gsub(/zip$/,'csv'))
-      end
       collect_names
       collect_synonyms
       collect_vernaculars
     end
     def collect_vernaculars
-      file = open(File.join(@download_dir, 'flat_common_name.csv'))
-      fields = {}
+      file = CSV.open(File.join(@download_dir, 'common_name.csv'),
+        headers: true)
       file.each_with_index do |row, i|
-        if i == 0
-          fields = get_fields(row)
-          next
-        end
-        row = split_row(row)
-        taxon_id = row[fields[:taxon_name_id]]
-        vernacular_name_string = row[fields[:common_name]]
+        canonical = row['SCIENTIFIC_NAME']
+        vernacular_name_string = row['COMMON_NAME']
-        @vernaculars << {
-          taxon_id: taxon_id,
-          vernacular_name_string: vernacular_name_string
-        }
+        if @vernaculars_hash.has_key?(canonical)
+          @vernaculars_hash[canonical] << vernacular_name_string
+        else
+          @vernaculars_hash[canonical] = [vernacular_name_string]
+        end
         puts "Processed %s vernaculars" % i if i % 10000 == 0
       end
     end
     def collect_synonyms
-      file = open(File.join(@download_dir, 'flat_relationships.csv'))
-      fields = {}
+      file = CSV.open(File.join(@download_dir, 'relationships.csv'),
+       headers: true)
       file.each_with_index do |row, i|
-        if i == 0
-          fields = get_fields(row)
-          next
+        canonical = row['scientific_name']
+        if @synonyms_hash.has_key?(canonical)
+          @synonyms_hash[canonical] <<
+          { synonym: row['related_name'], status: row['TAXON_RELATIONSHIP']}
+        else
+          @synonyms_hash[canonical] = [
+          { synonym: row['related_name'], status: row['TAXON_RELATIONSHIP']}
+          ]
         end
-        row = split_row(row)
-        taxon_id = row[fields[:taxon_name_id]]
-        @synonyms << {
-          taxon_id: row[fields[:related_taxon_name_id]],
-          local_id: taxon_id,
-          name_string: @names_index[taxon_id],
-          #synonym_authority:      row[fields[:relation_authority]],
-          taxonomic_status:       row[fields[:taxon_relationship]],
-        }
         puts "Processed %s synonyms" % i if i % 10000 == 0
       end
     end
     def collect_names
       @names_index = {}
-      file = open(File.join(@download_dir, 'flat_classification.csv'))
-      fields = {}
+      file = CSV.open(File.join(@download_dir, 'classification.csv'),
+       headers: true)
       file.each_with_index do |row, i|
-        if i == 0
-          fields = get_fields(row)
-          next
-        end
-        next unless  row[fields[:display_name]]
-        row = split_row(row)
-        taxon_id = row[fields[:taxon_name_id]]
-        name_string = row[fields[:display_name]].gsub(/<\/?i>/,'')
-        kingdom = row[fields[:kingdom]]
-        phylum = row[fields[:phylum]]
-        klass = row[fields[:phylclass]]
-        subclass = row[fields[:subclass]]
-        order = row[fields[:phylorder]]
-        suborder = row[fields[:suborder]]
-        superfamily = row[fields[:superfamily]]
-        family = row[fields[:family]]
-        subfamily = row[fields[:subfamily]]
-        tribe = row[fields[:tribe]]
-        genus = row[fields[:genus]]
-        subgenus = row[fields[:subgenus]]
-        species = row[fields[:species]]
-        subspecies = row[fields[:subspecies]]
-        code = row[fields[:nomenclatural_code]]
+        next unless  row['display_name']
+        name_string = row['display_name'].gsub(/<\/?i>/,'')
+        canonical = row['scientific_name']
+        kingdom = row['kingdom']
+        phylum = row['phylum']
+        klass = row['phylclass']
+        subclass = row['subclass']
+        order = row['phylorder']
+        suborder = row['suborder']
+        superfamily = row['superfamily']
+        family = row['family']
+        subfamily = row['subfamily']
+        tribe = row['tribe']
+        genus = row['genus']
+        subgenus = row['subgenus']
+        species = row['species']
+        subspecies = row['subspecies']
+        code = row['nomenclatural_code']
+        taxon_id = "ARCT_#{i}"
         @names << { taxon_id: taxon_id,
-          local_id: taxon_id,
           name_string: name_string,
           kingdom: kingdom,
           phylum: phylum,
@@ -133,37 +115,26 @@ module DwcaHunter
           code: code,
         }
-        @names_index[taxon_id] = name_string
+        update_vernacular(taxon_id, canonical)
+        update_synonym(taxon_id, canonical)
         puts "Processed %s names" % i if i % 10000 == 0
       end
     end
-    def split_row(row)
-      row = row.strip.gsub(/^"/, '').gsub(/"$/, '')
-      row.split('","')
+    def update_vernacular(taxon_id, canonical)
+      return unless @vernaculars_hash.has_key?(canonical)
+      @vernaculars_hash[canonical].each do |vern|
+        @vernaculars << [taxon_id, vern, 'en']
+      end
     end
-    def get_fields(row)
-      row = row.split(",")
-      encoding_options = {
-        :invalid           => :replace,
-        :undef             => :replace,
-        :replace           => '',
-        :universal_newline => true
-      }
-      num_ary = (0...row.size).to_a
-      row = row.map do |f|
-        f = f.strip.downcase
-        f = f.encode ::Encoding.find('ASCII'), encoding_options
-        f.to_sym
+    def update_synonym(taxon_id, canonical)
+      return unless @synonyms_hash.has_key?(canonical)
+      @synonyms_hash[canonical].each do |syn|
+        @synonyms << [taxon_id, syn[:synonym], syn[:status]]
       end
-      res = Hash[row.zip(num_ary)]
-      require 'byebug'; byebug
-      puts ''
-      res
     end
     def generate_dwca
       DwcaHunter::logger_write(self.object_id,
                                'Creating DarwinCore Archive file')

data/lib/dwca_hunter/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 module DwcaHunter
-  VERSION = "0.5.3"
+  VERSION = "0.5.4"
   def self.version
     VERSION

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: dwca_hunter
 version: !ruby/object:Gem::Version
-  version: 0.5.3
+  version: 0.5.4
 platform: ruby
 authors:
 - Dmitry Mozzherin
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2019-11-12 00:00:00.000000000 Z
+date: 2019-11-13 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: biodiversity
@@ -284,8 +284,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubyforge_project:
-rubygems_version: 2.7.6
+rubygems_version: 3.0.3
 signing_key:
 specification_version: 4
 summary: Converts a variety of available online resources to DarwinCore Archive files.