dwca_hunter 0.5.1 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.byebug_history +45 -0
- data/.gitignore +5 -0
- data/.rubocop.yml +3 -2
- data/.ruby-version +1 -1
- data/Gemfile.lock +61 -83
- data/LICENSE.txt +1 -1
- data/README.md +1 -1
- data/dwca_hunter.gemspec +9 -9
- data/exe/dwcahunter +1 -3
- data/lib/dwca_hunter.rb +39 -8
- data/lib/dwca_hunter/resource.rb +5 -0
- data/lib/dwca_hunter/resources/aos-birds.rb +143 -0
- data/lib/dwca_hunter/resources/arctos.rb +121 -145
- data/lib/dwca_hunter/resources/clements.rb +151 -0
- data/lib/dwca_hunter/resources/eol.rb +85 -0
- data/lib/dwca_hunter/resources/freebase.rb +51 -49
- data/lib/dwca_hunter/resources/how-moore-birds.rb +168 -0
- data/lib/dwca_hunter/resources/ioc_word_bird.rb +200 -0
- data/lib/dwca_hunter/resources/ipni.rb +111 -0
- data/lib/dwca_hunter/resources/itis.rb +99 -99
- data/lib/dwca_hunter/resources/mammal_divdb.rb +155 -0
- data/lib/dwca_hunter/resources/mammal_species.rb +9 -6
- data/lib/dwca_hunter/resources/mcz.rb +123 -0
- data/lib/dwca_hunter/resources/ncbi.rb +22 -23
- data/lib/dwca_hunter/resources/opentree.rb +5 -5
- data/lib/dwca_hunter/resources/paleobiodb.rb +193 -0
- data/lib/dwca_hunter/resources/paleodb_harvester.rb +140 -0
- data/lib/dwca_hunter/resources/sherborn.rb +91 -0
- data/lib/dwca_hunter/resources/wikispecies.rb +142 -129
- data/lib/dwca_hunter/version.rb +1 -1
- metadata +46 -40
- data/files/birdlife_7.csv +0 -11862
- data/files/fishbase_taxon_cache.tsv +0 -81000
- data/files/reptile_checklist_2014_12.csv +0 -15158
- data/files/species-black.txt +0 -251
@@ -0,0 +1,111 @@
|
|
1
|
+
require "xz"
|
2
|
+
|
3
|
+
module DwcaHunter
|
4
|
+
# Resource for FishBase
|
5
|
+
class ResourceIPNI < DwcaHunter::Resource
|
6
|
+
attr_reader :title, :abbr
|
7
|
+
def initialize(opts = {}) #download: false, unpack: false})
|
8
|
+
@command = "ipni"
|
9
|
+
@title = "The International Plant Names Index"
|
10
|
+
@abbr = "IPNI"
|
11
|
+
@url = "https://uofi.box.com/shared/static/s0x4xjonxt54pi89n543gdmttrdqd6iv.xz"
|
12
|
+
@uuid = "6b3905ce-5025-49f3-9697-ddd5bdfb4ff0"
|
13
|
+
@download_path = File.join(Dir.tmpdir, "dwca_hunter", "ipni",
|
14
|
+
"ipni.csv.xz")
|
15
|
+
@extensions = []
|
16
|
+
super
|
17
|
+
end
|
18
|
+
|
19
|
+
def unpack
|
20
|
+
puts "Unpacking #{@download_path}"
|
21
|
+
XZ.decompress_file(@download_path, @download_path[0...-3] )
|
22
|
+
end
|
23
|
+
|
24
|
+
def download
|
25
|
+
puts "Download by hand from"
|
26
|
+
puts "https://storage.cloud.google.com/ipni-data/ipniWebName.csv.xz"
|
27
|
+
puts "and copy to given url"
|
28
|
+
`curl -s -L #{@url} -o #{@download_path}`
|
29
|
+
end
|
30
|
+
|
31
|
+
def make_dwca
|
32
|
+
organize_data
|
33
|
+
generate_dwca
|
34
|
+
end
|
35
|
+
|
36
|
+
private
|
37
|
+
|
38
|
+
def organize_data
|
39
|
+
DwcaHunter::logger_write(self.object_id,
|
40
|
+
"Organizing data")
|
41
|
+
# snp = ScientificNameParser.new
|
42
|
+
@data = CSV.open(@download_path[0...-3],
|
43
|
+
col_sep: "|", quote_char: "щ", headers: true)
|
44
|
+
.each_with_object([]) do |row, data|
|
45
|
+
name = row['taxon_scientific_name_s_lower'].strip
|
46
|
+
au = row['authors_t'].to_s.strip
|
47
|
+
name = "#{name} #{au}" if au != ''
|
48
|
+
id = row["id"].split(":")[-1]
|
49
|
+
data << { taxon_id: id,
|
50
|
+
local_id: id,
|
51
|
+
family: row["family_s_lower"],
|
52
|
+
genus: row["genus_s_lower"],
|
53
|
+
scientific_name: name,
|
54
|
+
rank: row["rank_s_alphanum"]
|
55
|
+
}
|
56
|
+
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def generate_dwca
|
61
|
+
DwcaHunter::logger_write(self.object_id,
|
62
|
+
'Creating DarwinCore Archive file')
|
63
|
+
core_init
|
64
|
+
eml_init
|
65
|
+
DwcaHunter::logger_write(self.object_id, 'Assembling Core Data')
|
66
|
+
count = 0
|
67
|
+
@data.each do |d|
|
68
|
+
count += 1
|
69
|
+
if count % 10000 == 0
|
70
|
+
DwcaHunter::logger_write(self.object_id, "Core row #{count}")
|
71
|
+
end
|
72
|
+
@core << [d[:taxon_id], d[:local_id],
|
73
|
+
d[:scientific_name], d[:rank],
|
74
|
+
d[:family], d[:genus]]
|
75
|
+
end
|
76
|
+
super
|
77
|
+
end
|
78
|
+
|
79
|
+
def eml_init
|
80
|
+
@eml = {
|
81
|
+
id: @uuid,
|
82
|
+
title: @title,
|
83
|
+
authors: [],
|
84
|
+
metadata_providers: [
|
85
|
+
{ first_name: "Dmitry",
|
86
|
+
last_name: "Mozzherin",
|
87
|
+
}
|
88
|
+
],
|
89
|
+
abstract: "The International Plant Names Index (IPNI) is a database " \
|
90
|
+
"of the names and associated basic bibliographical " \
|
91
|
+
"details of seed plants, ferns and lycophytes. Its goal " \
|
92
|
+
"is to eliminate the need for repeated reference to " \
|
93
|
+
"primary sources for basic bibliographic information " \
|
94
|
+
"about plant names. The data are freely available and are " \
|
95
|
+
"gradually being standardized and checked. IPNI will be a " \
|
96
|
+
"dynamic resource, depending on direct contributions by " \
|
97
|
+
"all members of the botanical community.",
|
98
|
+
url: "http://www.ipni.org"
|
99
|
+
}
|
100
|
+
end
|
101
|
+
|
102
|
+
def core_init
|
103
|
+
@core = [["http://rs.tdwg.org/dwc/terms/taxonID",
|
104
|
+
"http://globalnames.org/terms/localID",
|
105
|
+
"http://rs.tdwg.org/dwc/terms/scientificName",
|
106
|
+
"http://rs.tdwg.org/dwc/terms/taxonRank",
|
107
|
+
"http://rs.tdwg.org/dwc/terms/family",
|
108
|
+
"http://rs.tdwg.org/dwc/terms/genus"]]
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
@@ -1,15 +1,16 @@
|
|
1
|
-
#
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
2
3
|
module DwcaHunter
|
3
4
|
class ResourceITIS < DwcaHunter::Resource
|
4
5
|
def initialize(opts = {})
|
5
|
-
@command =
|
6
|
-
@title =
|
7
|
-
@url =
|
8
|
-
@uuid =
|
6
|
+
@command = "itis"
|
7
|
+
@title = "ITIS"
|
8
|
+
@url = "https://www.itis.gov/downloads/itisMySQLTables.tar.gz"
|
9
|
+
@uuid = "5d066e84-e512-4a2f-875c-0a605d3d9f35"
|
9
10
|
@download_path = File.join(Dir.tmpdir,
|
10
|
-
|
11
|
-
|
12
|
-
|
11
|
+
"dwca_hunter",
|
12
|
+
"itis",
|
13
|
+
"data.tar.gz")
|
13
14
|
@ranks = {}
|
14
15
|
@kingdoms = {}
|
15
16
|
@authors = {}
|
@@ -19,20 +20,20 @@ module DwcaHunter
|
|
19
20
|
@names = {}
|
20
21
|
@extensions = []
|
21
22
|
super(opts)
|
22
|
-
@itis_dir = File.join(@download_dir,
|
23
|
+
@itis_dir = File.join(@download_dir, "itis")
|
23
24
|
end
|
24
25
|
|
25
26
|
def unpack
|
26
27
|
unpack_tar
|
27
|
-
dir = Dir.entries(@download_dir).select {|e| e.match(/itisMySQL/)}[0]
|
28
|
+
dir = Dir.entries(@download_dir).select { |e| e.match(/itisMySQL/) }[0]
|
28
29
|
FileUtils.mv(File.join(@download_dir, dir), @itis_dir)
|
29
30
|
|
30
31
|
# Create a file with the same name as the directory we extracted.
|
31
|
-
FileUtils.touch(File.join(@itis_dir,
|
32
|
+
FileUtils.touch(File.join(@itis_dir, "version_" + dir))
|
32
33
|
end
|
33
34
|
|
34
35
|
def make_dwca
|
35
|
-
DwcaHunter
|
36
|
+
DwcaHunter.logger_write(object_id, "Extracting data")
|
36
37
|
get_ranks
|
37
38
|
get_kingdoms
|
38
39
|
get_authors
|
@@ -42,7 +43,8 @@ module DwcaHunter
|
|
42
43
|
generate_dwca
|
43
44
|
end
|
44
45
|
|
45
|
-
|
46
|
+
private
|
47
|
+
|
46
48
|
def get_ranks
|
47
49
|
# 0 kingdom_id integer not null
|
48
50
|
# 1 rank_id smallint not null
|
@@ -50,15 +52,15 @@ module DwcaHunter
|
|
50
52
|
# 3 dir_parent_rank_id smallint not null
|
51
53
|
# 4 req_parent_rank_id smallint not null
|
52
54
|
# 5 update_date date not null
|
53
|
-
rank_file = File.join(@itis_dir,
|
54
|
-
f = open(rank_file,
|
55
|
+
rank_file = File.join(@itis_dir, "taxon_unit_types")
|
56
|
+
f = open(rank_file, "r:utf-8")
|
55
57
|
f.each do |l|
|
56
|
-
l.encode!(
|
57
|
-
|
58
|
+
l.encode!("UTF-8",
|
59
|
+
"ISO-8859-1",
|
58
60
|
invalid: :replace,
|
59
|
-
replace:
|
60
|
-
row = l.strip.split(
|
61
|
-
@ranks[row[0].strip +
|
61
|
+
replace: "?")
|
62
|
+
row = l.strip.split("|")
|
63
|
+
@ranks[row[0].strip + "/" + row[1].strip] = row[2].strip
|
62
64
|
end
|
63
65
|
end
|
64
66
|
|
@@ -67,9 +69,9 @@ module DwcaHunter
|
|
67
69
|
# 1 kingdom_name char(10) not null
|
68
70
|
# 2 update_date date not null
|
69
71
|
|
70
|
-
f = open(File.join(@itis_dir,
|
72
|
+
f = open(File.join(@itis_dir, "kingdoms"))
|
71
73
|
f.each do |l|
|
72
|
-
data = l.strip.split(
|
74
|
+
data = l.strip.split("|")
|
73
75
|
@kingdoms[data[0].strip] = data[1].strip
|
74
76
|
end
|
75
77
|
end
|
@@ -80,13 +82,13 @@ module DwcaHunter
|
|
80
82
|
# 2 update_date date not null
|
81
83
|
# 3 kingdom_id smallint not null
|
82
84
|
|
83
|
-
f = open(File.join(@itis_dir,
|
85
|
+
f = open(File.join(@itis_dir, "taxon_authors_lkp"))
|
84
86
|
f.each do |l|
|
85
|
-
l.encode!(
|
86
|
-
|
87
|
+
l.encode!("UTF-8",
|
88
|
+
"ISO-8859-1",
|
87
89
|
invalid: :replace,
|
88
|
-
replace:
|
89
|
-
data = l.strip.split(
|
90
|
+
replace: "?")
|
91
|
+
data = l.strip.split("|")
|
90
92
|
@authors[data[0].strip] = data[1].strip
|
91
93
|
end
|
92
94
|
end
|
@@ -100,22 +102,22 @@ module DwcaHunter
|
|
100
102
|
# 5 primary key (tsn,vernacular_name,language)
|
101
103
|
# constraint "itis".vernaculars_key
|
102
104
|
|
103
|
-
f = open(File.join(@itis_dir,
|
105
|
+
f = open(File.join(@itis_dir, "vernaculars"))
|
104
106
|
f.each_with_index do |l, i|
|
105
107
|
if i % BATCH_SIZE == 0
|
106
|
-
DwcaHunter
|
107
|
-
|
108
|
+
DwcaHunter.logger_write(object_id,
|
109
|
+
"Extracted %s vernacular names" % i)
|
108
110
|
end
|
109
|
-
l.encode!(
|
110
|
-
|
111
|
+
l.encode!("UTF-8",
|
112
|
+
"ISO-8859-1",
|
111
113
|
invalid: :replace,
|
112
|
-
replace:
|
113
|
-
data = l.split(
|
114
|
+
replace: "?")
|
115
|
+
data = l.split("|").map(&:strip)
|
114
116
|
name_tsn = data[0]
|
115
117
|
string = data[1]
|
116
118
|
language = data[2]
|
117
|
-
language =
|
118
|
-
@vernaculars[name_tsn] = { name:string, language:language }
|
119
|
+
language = "Common name" if language == "unspecified"
|
120
|
+
@vernaculars[name_tsn] = { name: string, language: language }
|
119
121
|
end
|
120
122
|
end
|
121
123
|
|
@@ -124,17 +126,17 @@ module DwcaHunter
|
|
124
126
|
# 1 tsn_accepted integer not null
|
125
127
|
# 2 update_date date not null
|
126
128
|
|
127
|
-
f = open(File.join(@itis_dir,
|
129
|
+
f = open(File.join(@itis_dir, "synonym_links"))
|
128
130
|
f.each_with_index do |l, i|
|
129
131
|
if i % BATCH_SIZE == 0
|
130
|
-
DwcaHunter
|
131
|
-
|
132
|
+
DwcaHunter.logger_write(object_id,
|
133
|
+
"Extracted %s synonyms" % i)
|
132
134
|
end
|
133
|
-
l.encode!(
|
134
|
-
|
135
|
+
l.encode!("UTF-8",
|
136
|
+
"ISO-8859-1",
|
135
137
|
invalid: :replace,
|
136
|
-
replace:
|
137
|
-
data = l.split(
|
138
|
+
replace: "?")
|
139
|
+
data = l.split("|").map(&:strip)
|
138
140
|
synonym_name_tsn = data[0]
|
139
141
|
accepted_name_tsn = data[1]
|
140
142
|
@synonyms[synonym_name_tsn] = accepted_name_tsn
|
@@ -167,19 +169,19 @@ module DwcaHunter
|
|
167
169
|
# 22 update_date date not null
|
168
170
|
# 23 uncertain_prnt_ind char(3)
|
169
171
|
|
170
|
-
f = open(File.join(@itis_dir,
|
172
|
+
f = open(File.join(@itis_dir, "taxonomic_units"))
|
171
173
|
f.each_with_index do |l, i|
|
172
174
|
if i % BATCH_SIZE == 0
|
173
|
-
DwcaHunter
|
174
|
-
|
175
|
+
DwcaHunter.logger_write(object_id,
|
176
|
+
"Extracted %s names" % i)
|
175
177
|
end
|
176
|
-
l.encode!(
|
177
|
-
|
178
|
+
l.encode!("UTF-8",
|
179
|
+
"ISO-8859-1",
|
178
180
|
invalid: :replace,
|
179
|
-
replace:
|
180
|
-
data = l.split("|").map
|
181
|
-
name_tsn
|
182
|
-
x1
|
181
|
+
replace: "?")
|
182
|
+
data = l.split("|").map(&:strip)
|
183
|
+
name_tsn = data[0]
|
184
|
+
x1 = data[1]
|
183
185
|
name_part1 = data[2]
|
184
186
|
x2 = data[3]
|
185
187
|
name_part2 = data[4]
|
@@ -193,16 +195,15 @@ module DwcaHunter
|
|
193
195
|
kingdom_id = data[20]
|
194
196
|
rank_id = data[21]
|
195
197
|
|
196
|
-
parent_tsn = nil if parent_tsn ==
|
198
|
+
parent_tsn = nil if parent_tsn == ""
|
197
199
|
name = [x1, name_part1, x2, name_part2,
|
198
200
|
sp_marker1, name_part3, sp_marker2, name_part4]
|
199
201
|
canonical_name = name.clone
|
200
202
|
name << @authors[author_id] if @authors[author_id]
|
201
|
-
name = name.join(
|
202
|
-
canonical_name = canonical_name.join(
|
203
|
-
rank = @ranks[kingdom_id +
|
204
|
-
|
205
|
-
''
|
203
|
+
name = name.join(" ").strip.gsub(/\s+/, " ")
|
204
|
+
canonical_name = canonical_name.join(" ").strip.gsub(/\s+/, " ")
|
205
|
+
rank = @ranks[kingdom_id + "/" + rank_id] ||
|
206
|
+
""
|
206
207
|
@names[name_tsn] = { name: name,
|
207
208
|
canonical_name: canonical_name,
|
208
209
|
status: status,
|
@@ -212,58 +213,57 @@ module DwcaHunter
|
|
212
213
|
end
|
213
214
|
|
214
215
|
def generate_dwca
|
215
|
-
DwcaHunter
|
216
|
-
|
217
|
-
@core = [[
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
@extensions << { data: [[
|
225
|
-
|
226
|
-
|
227
|
-
file_name:
|
228
|
-
row_type:
|
229
|
-
|
230
|
-
@names.keys.each_with_index do |k, i|
|
216
|
+
DwcaHunter.logger_write(object_id,
|
217
|
+
"Creating DarwinCore Archive file")
|
218
|
+
@core = [["http://rs.tdwg.org/dwc/terms/taxonID",
|
219
|
+
"http://rs.tdwg.org/dwc/terms/parentNameUsageID",
|
220
|
+
"http://rs.tdwg.org/dwc/terms/acceptedNameUsageID",
|
221
|
+
"http://rs.tdwg.org/dwc/terms/scientificName",
|
222
|
+
"http://rs.tdwg.org/ontology/voc/TaxonName#nameComplete",
|
223
|
+
"http://rs.tdwg.org/dwc/terms/taxonomicStatus",
|
224
|
+
"http://rs.tdwg.org/dwc/terms/taxonRank"]]
|
225
|
+
@extensions << { data: [["http://rs.tdwg.org/dwc/terms/taxonID",
|
226
|
+
"http://rs.tdwg.org/dwc/terms/vernacularName",
|
227
|
+
"http://purl.org/dc/terms/language"]],
|
228
|
+
file_name: "vernacular_names.txt",
|
229
|
+
row_type: "http://rs.gbif.org/terms/1.0/VernacularName" }
|
230
|
+
@names.keys.each_with_index do |k, _i|
|
231
231
|
d = @names[k]
|
232
|
-
accepted_id = @synonyms[k]
|
232
|
+
accepted_id = @synonyms[k] || nil
|
233
233
|
parent_id = d[:parent_tsn].to_i == 0 ? nil : d[:parent_tsn]
|
234
234
|
row = [k, parent_id, accepted_id, d[:name], d[:canonical_name], d[:status], d[:rank]]
|
235
235
|
@core << row
|
236
236
|
end
|
237
237
|
|
238
|
-
@vernaculars.keys.each_with_index do |k,
|
238
|
+
@vernaculars.keys.each_with_index do |k, _i|
|
239
239
|
d = @vernaculars[k]
|
240
240
|
@extensions[0][:data] << [k, d[:name], d[:language]]
|
241
241
|
end
|
242
242
|
|
243
243
|
@eml = {
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
244
|
+
id: @uuid,
|
245
|
+
title: @title,
|
246
|
+
authors: [
|
247
|
+
{ email: "itiswebmaster@itis.gov" }
|
248
|
+
],
|
249
|
+
metadata_providers: [
|
250
|
+
{ first_name: "Dmitry",
|
251
|
+
last_name: "Mozzherin",
|
252
|
+
email: "dmozzherin@gmail.com" }
|
253
|
+
],
|
254
|
+
abstract: "The White House Subcommittee on Biodiversity and " \
|
255
|
+
"Ecosystem Dynamics has identified systematics as a " \
|
256
|
+
"research priority that is fundamental to ecosystem " \
|
257
|
+
"management and biodiversity conservation. This primary " \
|
258
|
+
"need identified by the Subcommittee requires " \
|
259
|
+
"improvements in the organization of, and access to, " \
|
260
|
+
"standardized nomenclature. ITIS (originally referred " \
|
261
|
+
"to as the Interagency Taxonomic Information System) " \
|
262
|
+
"was designed to fulfill these requirements. In the " \
|
263
|
+
"future, the ITIS will provide taxonomic data and a " \
|
264
|
+
"directory of taxonomic expertise that will support " \
|
265
|
+
"the system",
|
266
|
+
url: "http://www.itis.gov"
|
267
267
|
}
|
268
268
|
super
|
269
269
|
end
|
@@ -0,0 +1,155 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module DwcaHunter
|
4
|
+
class ResourceMammalDiversityDb < DwcaHunter::Resource
|
5
|
+
def initialize(opts = {})
|
6
|
+
@command = "mammal-div-db"
|
7
|
+
@title = "ASM Mammal Diversity Database"
|
8
|
+
@url = "https://mammaldiversity.org/species-account/api.php?q=*"
|
9
|
+
@UUID = "94270cdd-5424-4bb1-8324-46ccc5386dc7"
|
10
|
+
@download_path = File.join(Dir.tmpdir,
|
11
|
+
"dwca_hunter",
|
12
|
+
"mammal-div-db",
|
13
|
+
"data.json")
|
14
|
+
@synonyms = []
|
15
|
+
@names = []
|
16
|
+
@vernaculars = []
|
17
|
+
@extensions = []
|
18
|
+
@synonyms_hash = {}
|
19
|
+
@vernaculars_hash = {}
|
20
|
+
super(opts)
|
21
|
+
end
|
22
|
+
|
23
|
+
def download
|
24
|
+
DwcaHunter.logger_write(object_id, "Downloading")
|
25
|
+
`curl '#{@url}' -H 'User-Agent:' -o #{@download_path}`
|
26
|
+
end
|
27
|
+
|
28
|
+
def unpack; end
|
29
|
+
|
30
|
+
def make_dwca
|
31
|
+
DwcaHunter.logger_write(object_id, "Extracting data")
|
32
|
+
get_names
|
33
|
+
generate_dwca
|
34
|
+
end
|
35
|
+
|
36
|
+
private
|
37
|
+
|
38
|
+
def get_names
|
39
|
+
Dir.chdir(@download_dir)
|
40
|
+
collect_names
|
41
|
+
end
|
42
|
+
|
43
|
+
def collect_names
|
44
|
+
@names_index = {}
|
45
|
+
decoder = HTMLEntities.new
|
46
|
+
data = File.read(File.join(@download_dir, "data.json"))
|
47
|
+
data = JSON.parse(data, symbolize_names: true)
|
48
|
+
data[:result].each_with_index do |e, _i|
|
49
|
+
e = e[1]
|
50
|
+
order = e[:dwc][:order].capitalize
|
51
|
+
order = nil if order.match(/incertae/)
|
52
|
+
family = e[:dwc][:family].capitalize
|
53
|
+
family = nil if family.match(/incertae/)
|
54
|
+
genus = e[:dwc][:genus].capitalize
|
55
|
+
genus = nil if genus.match(/incertae/)
|
56
|
+
name = {
|
57
|
+
taxon_id: e[:internal_id],
|
58
|
+
kingdom: "Animalia",
|
59
|
+
phylum: "Chordata",
|
60
|
+
klass: "Mammalia",
|
61
|
+
order: order,
|
62
|
+
family: family,
|
63
|
+
genus: genus,
|
64
|
+
name_string: "#{e[:dwc][:scientificName]} " \
|
65
|
+
"#{e[:dwc][:scientificNameAuthorship][:species]}".strip,
|
66
|
+
rank: e[:dwc][:taxonRank],
|
67
|
+
status: e[:dwc][:taxonRank],
|
68
|
+
code: "ICZN"
|
69
|
+
}
|
70
|
+
if e[:dwc][:taxonomicStatus] == "accepted"
|
71
|
+
@names << name
|
72
|
+
else
|
73
|
+
@synonyms << name
|
74
|
+
end
|
75
|
+
vern = e[:dwc][:vernacularName]
|
76
|
+
next unless vern.to_s != ""
|
77
|
+
vern = decoder.decode(vern)
|
78
|
+
vernacular = {
|
79
|
+
taxon_id: e[:id],
|
80
|
+
vern: vern,
|
81
|
+
lang: "en"
|
82
|
+
}
|
83
|
+
@vernaculars << vernacular
|
84
|
+
end
|
85
|
+
puts data[:result].size
|
86
|
+
end
|
87
|
+
|
88
|
+
def generate_dwca
|
89
|
+
DwcaHunter.logger_write(object_id,
|
90
|
+
"Creating DarwinCore Archive file")
|
91
|
+
@core = [["http://rs.tdwg.org/dwc/terms/taxonID",
|
92
|
+
"http://rs.tdwg.org/dwc/terms/scientificName",
|
93
|
+
"http://rs.tdwg.org/dwc/terms/kingdom",
|
94
|
+
"http://rs.tdwg.org/dwc/terms/phylum",
|
95
|
+
"http://rs.tdwg.org/dwc/terms/class",
|
96
|
+
"http://rs.tdwg.org/dwc/terms/order",
|
97
|
+
"http://rs.tdwg.org/dwc/terms/family",
|
98
|
+
"http://rs.tdwg.org/dwc/terms/genus",
|
99
|
+
"http://rs.tdwg.org/dwc/terms/nomenclaturalCode"]]
|
100
|
+
@names.each do |n|
|
101
|
+
@core << [n[:taxon_id], n[:name_string],
|
102
|
+
n[:kingdom], n[:phylum], n[:klass], n[:order], n[:family],
|
103
|
+
n[:genus], n[:code]]
|
104
|
+
end
|
105
|
+
@extensions << {
|
106
|
+
data: [[
|
107
|
+
"http://rs.tdwg.org/dwc/terms/taxonID",
|
108
|
+
"http://rs.tdwg.org/dwc/terms/vernacularName",
|
109
|
+
"http://purl.org/dc/terms/language"
|
110
|
+
]],
|
111
|
+
file_name: "vernacular_names.txt",
|
112
|
+
row_type: "http://rs.gbif.org/terms/1.0/VernacularName"
|
113
|
+
}
|
114
|
+
|
115
|
+
@vernaculars.each do |v|
|
116
|
+
@extensions[-1][:data] << [v[:taxon_id], v[:vern], v[:lang]]
|
117
|
+
end
|
118
|
+
|
119
|
+
@extensions << {
|
120
|
+
data: [[
|
121
|
+
"http://rs.tdwg.org/dwc/terms/taxonID",
|
122
|
+
"http://rs.tdwg.org/dwc/terms/scientificName",
|
123
|
+
"http://rs.tdwg.org/dwc/terms/taxonomicStatus"
|
124
|
+
]],
|
125
|
+
file_name: "synonyms.txt"
|
126
|
+
}
|
127
|
+
@synonyms.each do |s|
|
128
|
+
@extensions[-1][:data] << [s[:taxon_id], s[:name_string], s[:status]]
|
129
|
+
end
|
130
|
+
@eml = {
|
131
|
+
id: @uuid,
|
132
|
+
title: @title,
|
133
|
+
authors: [
|
134
|
+
{ first_name: "C. J.",
|
135
|
+
last_name: "Burgin" },
|
136
|
+
{ first_name: "J. P.",
|
137
|
+
last_name: "Colella" },
|
138
|
+
{ first_name: "P. L.",
|
139
|
+
last_name: "Kahn" },
|
140
|
+
{ first_name: "N. S.",
|
141
|
+
last_name: "Upham" }
|
142
|
+
],
|
143
|
+
metadata_providers: [
|
144
|
+
{ first_name: "Dmitry",
|
145
|
+
last_name: "Mozzherin",
|
146
|
+
email: "dmozzherin@gmail.com" }
|
147
|
+
],
|
148
|
+
abstract: "Mammal Diversity Database. 2020. www.mammaldiversity.org. " \
|
149
|
+
"American Society of Mammalogists. Accessed 2020-05-24 .",
|
150
|
+
url: @url
|
151
|
+
}
|
152
|
+
super
|
153
|
+
end
|
154
|
+
end
|
155
|
+
end
|