dwca_hunter 0.5.3 → 0.5.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c3fbae125e5aa2c8891a3fa045eb3c628c7c53b7b3957a5b0f8153e6d6b3cbbb
4
- data.tar.gz: 71f2c5579faf193f9a2b81378b2a437c5e0b3a4f4152fa8496bc0233ed27f225
3
+ metadata.gz: 6c7057b88df4f16a74e5818f1f9966183968844624143c68d43715c8569adb4c
4
+ data.tar.gz: 47b5a3b28b22a18fb8dff65a095775e7535f098d7cebcc60a199b82d02f8b9dc
5
5
  SHA512:
6
- metadata.gz: 371304bc1e3a0c5b2862b4213e494f713b27895237d5226430001a98b17c122b5924ad815c9b8c3e164d19ae22997e2c955e8812600c230c011bed484d1b4bd2
7
- data.tar.gz: 0f97ed3b3230161bf03ad0976785eaded15e500fd2fa8443c85144a90d6439faaa1c33eaa5aa0aad4355aef8882ade0c661d1ee3f25144f82436926f9e3581a7
6
+ metadata.gz: b780161f3c024dfe6155028fc71c8762e83a95f8dd0f9158d5d387f0cbb77cd6525d5abb5137d8d73ed42093ccae897e38da8e8d2a13bf5b10bec1fae9f68424
7
+ data.tar.gz: 3c8cfa6603b2cc8bac0766568168ed6016e3d509e62e88b47704c4b0e2662f332de230da4cbe1c8ef0cb1467ce5b6e987260875e9b160efde1e2cff169cae263
data/.ruby-version CHANGED
@@ -1 +1 @@
1
- 2.5.3
1
+ 2.6.4
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- dwca_hunter (0.5.2)
4
+ dwca_hunter (0.5.4)
5
5
  biodiversity (~> 3.5)
6
6
  dwc-archive (~> 1.0)
7
7
  gn_uuid (~> 0.5)
@@ -137,4 +137,4 @@ DEPENDENCIES
137
137
  solargraph (~> 0.23)
138
138
 
139
139
  BUNDLED WITH
140
- 2.0.1
140
+ 2.0.2
@@ -5,7 +5,7 @@ module DwcaHunter
5
5
  def initialize(opts = {})
6
6
  @command = 'arctos'
7
7
  @title = 'Arctos'
8
- @url = 'https://www.dropbox.com/s/jo44d1vd9bkdwm8/arctos.zip?dl=1'
8
+ @url = 'https://www.dropbox.com/s/3rmny5d8cfm9mmp/arctos.tar.gz?dl=1'
9
9
  @UUID = 'eea8315d-a244-4625-859a-226675622312'
10
10
  @download_path = File.join(Dir.tmpdir,
11
11
  'dwca_hunter',
@@ -15,6 +15,8 @@ module DwcaHunter
15
15
  @names = []
16
16
  @vernaculars = []
17
17
  @extensions = []
18
+ @synonyms_hash = {}
19
+ @vernaculars_hash = {}
18
20
  super(opts)
19
21
  end
20
22
 
@@ -24,7 +26,7 @@ module DwcaHunter
24
26
  end
25
27
 
26
28
  def unpack
27
- unpack_zip
29
+ unpack_tar
28
30
  end
29
31
 
30
32
  def make_dwca
@@ -37,92 +39,72 @@ module DwcaHunter
37
39
 
38
40
  def get_names
39
41
  Dir.chdir(@download_dir)
40
- Dir.entries(@download_dir).grep(/zip$/).each do |file|
41
- self.class.unzip(file) unless File.exists?(file.gsub(/zip$/,'csv'))
42
- end
43
42
  collect_names
44
43
  collect_synonyms
45
44
  collect_vernaculars
46
45
  end
47
46
 
48
47
  def collect_vernaculars
49
- file = open(File.join(@download_dir, 'flat_common_name.csv'))
50
- fields = {}
48
+ file = CSV.open(File.join(@download_dir, 'common_name.csv'),
49
+ headers: true)
51
50
  file.each_with_index do |row, i|
52
51
 
53
- if i == 0
54
- fields = get_fields(row)
55
- next
56
- end
57
-
58
- row = split_row(row)
59
-
60
- taxon_id = row[fields[:taxon_name_id]]
61
- vernacular_name_string = row[fields[:common_name]]
52
+ canonical = row['SCIENTIFIC_NAME']
53
+ vernacular_name_string = row['COMMON_NAME']
62
54
 
63
- @vernaculars << {
64
- taxon_id: taxon_id,
65
- vernacular_name_string: vernacular_name_string
66
- }
55
+ if @vernaculars_hash.has_key?(canonical)
56
+ @vernaculars_hash[canonical] << vernacular_name_string
57
+ else
58
+ @vernaculars_hash[canonical] = [vernacular_name_string]
59
+ end
67
60
 
68
61
  puts "Processed %s vernaculars" % i if i % 10000 == 0
69
62
  end
70
63
  end
71
64
 
72
65
  def collect_synonyms
73
- file = open(File.join(@download_dir, 'flat_relationships.csv'))
74
- fields = {}
66
+ file = CSV.open(File.join(@download_dir, 'relationships.csv'),
67
+ headers: true)
75
68
  file.each_with_index do |row, i|
76
- if i == 0
77
- fields = get_fields(row)
78
- next
69
+ canonical = row['scientific_name']
70
+ if @synonyms_hash.has_key?(canonical)
71
+ @synonyms_hash[canonical] <<
72
+ { synonym: row['related_name'], status: row['TAXON_RELATIONSHIP']}
73
+ else
74
+ @synonyms_hash[canonical] = [
75
+ { synonym: row['related_name'], status: row['TAXON_RELATIONSHIP']}
76
+ ]
79
77
  end
80
-
81
- row = split_row(row)
82
- taxon_id = row[fields[:taxon_name_id]]
83
- @synonyms << {
84
- taxon_id: row[fields[:related_taxon_name_id]],
85
- local_id: taxon_id,
86
- name_string: @names_index[taxon_id],
87
- #synonym_authority: row[fields[:relation_authority]],
88
- taxonomic_status: row[fields[:taxon_relationship]],
89
- }
90
78
  puts "Processed %s synonyms" % i if i % 10000 == 0
91
79
  end
92
80
  end
93
81
 
94
82
  def collect_names
95
83
  @names_index = {}
96
- file = open(File.join(@download_dir, 'flat_classification.csv'))
97
- fields = {}
84
+ file = CSV.open(File.join(@download_dir, 'classification.csv'),
85
+ headers: true)
98
86
  file.each_with_index do |row, i|
99
- if i == 0
100
- fields = get_fields(row)
101
- next
102
- end
103
-
104
- next unless row[fields[:display_name]]
105
- row = split_row(row)
106
- taxon_id = row[fields[:taxon_name_id]]
107
- name_string = row[fields[:display_name]].gsub(/<\/?i>/,'')
108
- kingdom = row[fields[:kingdom]]
109
- phylum = row[fields[:phylum]]
110
- klass = row[fields[:phylclass]]
111
- subclass = row[fields[:subclass]]
112
- order = row[fields[:phylorder]]
113
- suborder = row[fields[:suborder]]
114
- superfamily = row[fields[:superfamily]]
115
- family = row[fields[:family]]
116
- subfamily = row[fields[:subfamily]]
117
- tribe = row[fields[:tribe]]
118
- genus = row[fields[:genus]]
119
- subgenus = row[fields[:subgenus]]
120
- species = row[fields[:species]]
121
- subspecies = row[fields[:subspecies]]
122
- code = row[fields[:nomenclatural_code]]
123
-
87
+ next unless row['display_name']
88
+ name_string = row['display_name'].gsub(/<\/?i>/,'')
89
+ canonical = row['scientific_name']
90
+ kingdom = row['kingdom']
91
+ phylum = row['phylum']
92
+ klass = row['phylclass']
93
+ subclass = row['subclass']
94
+ order = row['phylorder']
95
+ suborder = row['suborder']
96
+ superfamily = row['superfamily']
97
+ family = row['family']
98
+ subfamily = row['subfamily']
99
+ tribe = row['tribe']
100
+ genus = row['genus']
101
+ subgenus = row['subgenus']
102
+ species = row['species']
103
+ subspecies = row['subspecies']
104
+ code = row['nomenclatural_code']
105
+
106
+ taxon_id = "ARCT_#{i}"
124
107
  @names << { taxon_id: taxon_id,
125
- local_id: taxon_id,
126
108
  name_string: name_string,
127
109
  kingdom: kingdom,
128
110
  phylum: phylum,
@@ -133,37 +115,26 @@ module DwcaHunter
133
115
  code: code,
134
116
  }
135
117
 
136
- @names_index[taxon_id] = name_string
118
+ update_vernacular(taxon_id, canonical)
119
+ update_synonym(taxon_id, canonical)
137
120
  puts "Processed %s names" % i if i % 10000 == 0
138
121
  end
139
122
  end
140
123
 
141
- def split_row(row)
142
- row = row.strip.gsub(/^"/, '').gsub(/"$/, '')
143
- row.split('","')
124
+ def update_vernacular(taxon_id, canonical)
125
+ return unless @vernaculars_hash.has_key?(canonical)
126
+ @vernaculars_hash[canonical].each do |vern|
127
+ @vernaculars << [taxon_id, vern, 'en']
128
+ end
144
129
  end
145
130
 
146
- def get_fields(row)
147
- row = row.split(",")
148
- encoding_options = {
149
- :invalid => :replace,
150
- :undef => :replace,
151
- :replace => '',
152
- :universal_newline => true
153
- }
154
- num_ary = (0...row.size).to_a
155
- row = row.map do |f|
156
- f = f.strip.downcase
157
- f = f.encode ::Encoding.find('ASCII'), encoding_options
158
- f.to_sym
131
+ def update_synonym(taxon_id, canonical)
132
+ return unless @synonyms_hash.has_key?(canonical)
133
+ @synonyms_hash[canonical].each do |syn|
134
+ @synonyms << [taxon_id, syn[:synonym], syn[:status]]
159
135
  end
160
- res = Hash[row.zip(num_ary)]
161
- require 'byebug'; byebug
162
- puts ''
163
- res
164
136
  end
165
137
 
166
-
167
138
  def generate_dwca
168
139
  DwcaHunter::logger_write(self.object_id,
169
140
  'Creating DarwinCore Archive file')
@@ -1,5 +1,5 @@
1
1
  module DwcaHunter
2
- VERSION = "0.5.3"
2
+ VERSION = "0.5.4"
3
3
 
4
4
  def self.version
5
5
  VERSION
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dwca_hunter
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.3
4
+ version: 0.5.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dmitry Mozzherin
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-11-12 00:00:00.000000000 Z
11
+ date: 2019-11-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: biodiversity
@@ -284,8 +284,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
284
284
  - !ruby/object:Gem::Version
285
285
  version: '0'
286
286
  requirements: []
287
- rubyforge_project:
288
- rubygems_version: 2.7.6
287
+ rubygems_version: 3.0.3
289
288
  signing_key:
290
289
  specification_version: 4
291
290
  summary: Converts a variety of available online resources to DarwinCore Archive files.