dwca_hunter 0.5.3 → 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c3fbae125e5aa2c8891a3fa045eb3c628c7c53b7b3957a5b0f8153e6d6b3cbbb
4
- data.tar.gz: 71f2c5579faf193f9a2b81378b2a437c5e0b3a4f4152fa8496bc0233ed27f225
3
+ metadata.gz: 6c7057b88df4f16a74e5818f1f9966183968844624143c68d43715c8569adb4c
4
+ data.tar.gz: 47b5a3b28b22a18fb8dff65a095775e7535f098d7cebcc60a199b82d02f8b9dc
5
5
  SHA512:
6
- metadata.gz: 371304bc1e3a0c5b2862b4213e494f713b27895237d5226430001a98b17c122b5924ad815c9b8c3e164d19ae22997e2c955e8812600c230c011bed484d1b4bd2
7
- data.tar.gz: 0f97ed3b3230161bf03ad0976785eaded15e500fd2fa8443c85144a90d6439faaa1c33eaa5aa0aad4355aef8882ade0c661d1ee3f25144f82436926f9e3581a7
6
+ metadata.gz: b780161f3c024dfe6155028fc71c8762e83a95f8dd0f9158d5d387f0cbb77cd6525d5abb5137d8d73ed42093ccae897e38da8e8d2a13bf5b10bec1fae9f68424
7
+ data.tar.gz: 3c8cfa6603b2cc8bac0766568168ed6016e3d509e62e88b47704c4b0e2662f332de230da4cbe1c8ef0cb1467ce5b6e987260875e9b160efde1e2cff169cae263
data/.ruby-version CHANGED
@@ -1 +1 @@
1
- 2.5.3
1
+ 2.6.4
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- dwca_hunter (0.5.2)
4
+ dwca_hunter (0.5.4)
5
5
  biodiversity (~> 3.5)
6
6
  dwc-archive (~> 1.0)
7
7
  gn_uuid (~> 0.5)
@@ -137,4 +137,4 @@ DEPENDENCIES
137
137
  solargraph (~> 0.23)
138
138
 
139
139
  BUNDLED WITH
140
- 2.0.1
140
+ 2.0.2
@@ -5,7 +5,7 @@ module DwcaHunter
5
5
  def initialize(opts = {})
6
6
  @command = 'arctos'
7
7
  @title = 'Arctos'
8
- @url = 'https://www.dropbox.com/s/jo44d1vd9bkdwm8/arctos.zip?dl=1'
8
+ @url = 'https://www.dropbox.com/s/3rmny5d8cfm9mmp/arctos.tar.gz?dl=1'
9
9
  @UUID = 'eea8315d-a244-4625-859a-226675622312'
10
10
  @download_path = File.join(Dir.tmpdir,
11
11
  'dwca_hunter',
@@ -15,6 +15,8 @@ module DwcaHunter
15
15
  @names = []
16
16
  @vernaculars = []
17
17
  @extensions = []
18
+ @synonyms_hash = {}
19
+ @vernaculars_hash = {}
18
20
  super(opts)
19
21
  end
20
22
 
@@ -24,7 +26,7 @@ module DwcaHunter
24
26
  end
25
27
 
26
28
  def unpack
27
- unpack_zip
29
+ unpack_tar
28
30
  end
29
31
 
30
32
  def make_dwca
@@ -37,92 +39,72 @@ module DwcaHunter
37
39
 
38
40
  def get_names
39
41
  Dir.chdir(@download_dir)
40
- Dir.entries(@download_dir).grep(/zip$/).each do |file|
41
- self.class.unzip(file) unless File.exists?(file.gsub(/zip$/,'csv'))
42
- end
43
42
  collect_names
44
43
  collect_synonyms
45
44
  collect_vernaculars
46
45
  end
47
46
 
48
47
  def collect_vernaculars
49
- file = open(File.join(@download_dir, 'flat_common_name.csv'))
50
- fields = {}
48
+ file = CSV.open(File.join(@download_dir, 'common_name.csv'),
49
+ headers: true)
51
50
  file.each_with_index do |row, i|
52
51
 
53
- if i == 0
54
- fields = get_fields(row)
55
- next
56
- end
57
-
58
- row = split_row(row)
59
-
60
- taxon_id = row[fields[:taxon_name_id]]
61
- vernacular_name_string = row[fields[:common_name]]
52
+ canonical = row['SCIENTIFIC_NAME']
53
+ vernacular_name_string = row['COMMON_NAME']
62
54
 
63
- @vernaculars << {
64
- taxon_id: taxon_id,
65
- vernacular_name_string: vernacular_name_string
66
- }
55
+ if @vernaculars_hash.has_key?(canonical)
56
+ @vernaculars_hash[canonical] << vernacular_name_string
57
+ else
58
+ @vernaculars_hash[canonical] = [vernacular_name_string]
59
+ end
67
60
 
68
61
  puts "Processed %s vernaculars" % i if i % 10000 == 0
69
62
  end
70
63
  end
71
64
 
72
65
  def collect_synonyms
73
- file = open(File.join(@download_dir, 'flat_relationships.csv'))
74
- fields = {}
66
+ file = CSV.open(File.join(@download_dir, 'relationships.csv'),
67
+ headers: true)
75
68
  file.each_with_index do |row, i|
76
- if i == 0
77
- fields = get_fields(row)
78
- next
69
+ canonical = row['scientific_name']
70
+ if @synonyms_hash.has_key?(canonical)
71
+ @synonyms_hash[canonical] <<
72
+ { synonym: row['related_name'], status: row['TAXON_RELATIONSHIP']}
73
+ else
74
+ @synonyms_hash[canonical] = [
75
+ { synonym: row['related_name'], status: row['TAXON_RELATIONSHIP']}
76
+ ]
79
77
  end
80
-
81
- row = split_row(row)
82
- taxon_id = row[fields[:taxon_name_id]]
83
- @synonyms << {
84
- taxon_id: row[fields[:related_taxon_name_id]],
85
- local_id: taxon_id,
86
- name_string: @names_index[taxon_id],
87
- #synonym_authority: row[fields[:relation_authority]],
88
- taxonomic_status: row[fields[:taxon_relationship]],
89
- }
90
78
  puts "Processed %s synonyms" % i if i % 10000 == 0
91
79
  end
92
80
  end
93
81
 
94
82
  def collect_names
95
83
  @names_index = {}
96
- file = open(File.join(@download_dir, 'flat_classification.csv'))
97
- fields = {}
84
+ file = CSV.open(File.join(@download_dir, 'classification.csv'),
85
+ headers: true)
98
86
  file.each_with_index do |row, i|
99
- if i == 0
100
- fields = get_fields(row)
101
- next
102
- end
103
-
104
- next unless row[fields[:display_name]]
105
- row = split_row(row)
106
- taxon_id = row[fields[:taxon_name_id]]
107
- name_string = row[fields[:display_name]].gsub(/<\/?i>/,'')
108
- kingdom = row[fields[:kingdom]]
109
- phylum = row[fields[:phylum]]
110
- klass = row[fields[:phylclass]]
111
- subclass = row[fields[:subclass]]
112
- order = row[fields[:phylorder]]
113
- suborder = row[fields[:suborder]]
114
- superfamily = row[fields[:superfamily]]
115
- family = row[fields[:family]]
116
- subfamily = row[fields[:subfamily]]
117
- tribe = row[fields[:tribe]]
118
- genus = row[fields[:genus]]
119
- subgenus = row[fields[:subgenus]]
120
- species = row[fields[:species]]
121
- subspecies = row[fields[:subspecies]]
122
- code = row[fields[:nomenclatural_code]]
123
-
87
+ next unless row['display_name']
88
+ name_string = row['display_name'].gsub(/<\/?i>/,'')
89
+ canonical = row['scientific_name']
90
+ kingdom = row['kingdom']
91
+ phylum = row['phylum']
92
+ klass = row['phylclass']
93
+ subclass = row['subclass']
94
+ order = row['phylorder']
95
+ suborder = row['suborder']
96
+ superfamily = row['superfamily']
97
+ family = row['family']
98
+ subfamily = row['subfamily']
99
+ tribe = row['tribe']
100
+ genus = row['genus']
101
+ subgenus = row['subgenus']
102
+ species = row['species']
103
+ subspecies = row['subspecies']
104
+ code = row['nomenclatural_code']
105
+
106
+ taxon_id = "ARCT_#{i}"
124
107
  @names << { taxon_id: taxon_id,
125
- local_id: taxon_id,
126
108
  name_string: name_string,
127
109
  kingdom: kingdom,
128
110
  phylum: phylum,
@@ -133,37 +115,26 @@ module DwcaHunter
133
115
  code: code,
134
116
  }
135
117
 
136
- @names_index[taxon_id] = name_string
118
+ update_vernacular(taxon_id, canonical)
119
+ update_synonym(taxon_id, canonical)
137
120
  puts "Processed %s names" % i if i % 10000 == 0
138
121
  end
139
122
  end
140
123
 
141
- def split_row(row)
142
- row = row.strip.gsub(/^"/, '').gsub(/"$/, '')
143
- row.split('","')
124
+ def update_vernacular(taxon_id, canonical)
125
+ return unless @vernaculars_hash.has_key?(canonical)
126
+ @vernaculars_hash[canonical].each do |vern|
127
+ @vernaculars << [taxon_id, vern, 'en']
128
+ end
144
129
  end
145
130
 
146
- def get_fields(row)
147
- row = row.split(",")
148
- encoding_options = {
149
- :invalid => :replace,
150
- :undef => :replace,
151
- :replace => '',
152
- :universal_newline => true
153
- }
154
- num_ary = (0...row.size).to_a
155
- row = row.map do |f|
156
- f = f.strip.downcase
157
- f = f.encode ::Encoding.find('ASCII'), encoding_options
158
- f.to_sym
131
+ def update_synonym(taxon_id, canonical)
132
+ return unless @synonyms_hash.has_key?(canonical)
133
+ @synonyms_hash[canonical].each do |syn|
134
+ @synonyms << [taxon_id, syn[:synonym], syn[:status]]
159
135
  end
160
- res = Hash[row.zip(num_ary)]
161
- require 'byebug'; byebug
162
- puts ''
163
- res
164
136
  end
165
137
 
166
-
167
138
  def generate_dwca
168
139
  DwcaHunter::logger_write(self.object_id,
169
140
  'Creating DarwinCore Archive file')
@@ -1,5 +1,5 @@
1
1
  module DwcaHunter
2
- VERSION = "0.5.3"
2
+ VERSION = "0.5.4"
3
3
 
4
4
  def self.version
5
5
  VERSION
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dwca_hunter
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.3
4
+ version: 0.5.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dmitry Mozzherin
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-11-12 00:00:00.000000000 Z
11
+ date: 2019-11-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: biodiversity
@@ -284,8 +284,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
284
284
  - !ruby/object:Gem::Version
285
285
  version: '0'
286
286
  requirements: []
287
- rubyforge_project:
288
- rubygems_version: 2.7.6
287
+ rubygems_version: 3.0.3
289
288
  signing_key:
290
289
  specification_version: 4
291
290
  summary: Converts a variety of available online resources to DarwinCore Archive files.