dwca_hunter 0.5.1 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.byebug_history +45 -0
- data/.gitignore +5 -0
- data/.rubocop.yml +3 -2
- data/.ruby-version +1 -1
- data/Gemfile.lock +61 -83
- data/LICENSE.txt +1 -1
- data/README.md +1 -1
- data/dwca_hunter.gemspec +9 -9
- data/exe/dwcahunter +1 -3
- data/lib/dwca_hunter.rb +39 -8
- data/lib/dwca_hunter/resource.rb +5 -0
- data/lib/dwca_hunter/resources/aos-birds.rb +143 -0
- data/lib/dwca_hunter/resources/arctos.rb +121 -145
- data/lib/dwca_hunter/resources/clements.rb +151 -0
- data/lib/dwca_hunter/resources/eol.rb +85 -0
- data/lib/dwca_hunter/resources/freebase.rb +51 -49
- data/lib/dwca_hunter/resources/how-moore-birds.rb +168 -0
- data/lib/dwca_hunter/resources/ioc_word_bird.rb +200 -0
- data/lib/dwca_hunter/resources/ipni.rb +111 -0
- data/lib/dwca_hunter/resources/itis.rb +99 -99
- data/lib/dwca_hunter/resources/mammal_divdb.rb +155 -0
- data/lib/dwca_hunter/resources/mammal_species.rb +9 -6
- data/lib/dwca_hunter/resources/mcz.rb +123 -0
- data/lib/dwca_hunter/resources/ncbi.rb +22 -23
- data/lib/dwca_hunter/resources/opentree.rb +5 -5
- data/lib/dwca_hunter/resources/paleobiodb.rb +193 -0
- data/lib/dwca_hunter/resources/paleodb_harvester.rb +140 -0
- data/lib/dwca_hunter/resources/sherborn.rb +91 -0
- data/lib/dwca_hunter/resources/wikispecies.rb +142 -129
- data/lib/dwca_hunter/version.rb +1 -1
- metadata +46 -40
- data/files/birdlife_7.csv +0 -11862
- data/files/fishbase_taxon_cache.tsv +0 -81000
- data/files/reptile_checklist_2014_12.csv +0 -15158
- data/files/species-black.txt +0 -251
@@ -0,0 +1,85 @@
|
|
1
|
+
require "xz"
|
2
|
+
|
3
|
+
module DwcaHunter
|
4
|
+
# Resource for FishBase
|
5
|
+
class ResourceEOL < DwcaHunter::Resource
|
6
|
+
attr_reader :title, :abbr
|
7
|
+
def initialize(opts = {}) #download: false, unpack: false})
|
8
|
+
@command = "eol"
|
9
|
+
@title = "Encyclopedia of Life"
|
10
|
+
@abbr = "EOL"
|
11
|
+
@url = "https://eol.org/data/provider_ids.csv.gz"
|
12
|
+
@uuid = "dba5f880-a40d-479b-a1ad-a646835edde4"
|
13
|
+
@download_dir = File.join(Dir.tmpdir, "dwca_hunter", "eol")
|
14
|
+
@download_path = File.join(@download_dir, "eol.csv.gz")
|
15
|
+
@extensions = []
|
16
|
+
super
|
17
|
+
end
|
18
|
+
|
19
|
+
def unpack
|
20
|
+
unpack_gzip
|
21
|
+
end
|
22
|
+
|
23
|
+
def make_dwca
|
24
|
+
organize_data
|
25
|
+
generate_dwca
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
def organize_data
|
31
|
+
DwcaHunter::logger_write(self.object_id,
|
32
|
+
"Organizing data")
|
33
|
+
# snp = ScientificNameParser.new
|
34
|
+
@data = CSV.open(@download_path[0...-3],
|
35
|
+
col_sep: ",", headers: true)
|
36
|
+
.each_with_object([]) do |row, data|
|
37
|
+
id = row['page_id'].strip
|
38
|
+
name = row['preferred_canonical_for_page'].strip
|
39
|
+
data << { taxon_id: id,
|
40
|
+
local_id: id,
|
41
|
+
scientific_name: name}
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def generate_dwca
|
46
|
+
DwcaHunter::logger_write(self.object_id,
|
47
|
+
'Creating DarwinCore Archive file')
|
48
|
+
core_init
|
49
|
+
eml_init
|
50
|
+
DwcaHunter::logger_write(self.object_id, 'Assembling Core Data')
|
51
|
+
count = 0
|
52
|
+
@data.each do |d|
|
53
|
+
count += 1
|
54
|
+
if count % 10000 == 0
|
55
|
+
DwcaHunter::logger_write(self.object_id, "Core row #{count}")
|
56
|
+
end
|
57
|
+
@core << [d[:taxon_id], d[:local_id],
|
58
|
+
d[:scientific_name]]
|
59
|
+
end
|
60
|
+
super
|
61
|
+
end
|
62
|
+
|
63
|
+
def eml_init
|
64
|
+
@eml = {
|
65
|
+
id: @uuid,
|
66
|
+
title: @title,
|
67
|
+
authors: [],
|
68
|
+
metadata_providers: [
|
69
|
+
{ first_name: "Dmitry",
|
70
|
+
last_name: "Mozzherin",
|
71
|
+
}
|
72
|
+
],
|
73
|
+
abstract: "Global access to knowledge about life on Earth",
|
74
|
+
url: "http://www.eol.org"
|
75
|
+
}
|
76
|
+
end
|
77
|
+
|
78
|
+
def core_init
|
79
|
+
@core = [["http://rs.tdwg.org/dwc/terms/taxonID",
|
80
|
+
"http://globalnames.org/terms/localID",
|
81
|
+
"http://rs.tdwg.org/dwc/terms/scientificName",
|
82
|
+
]]
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
@@ -1,15 +1,15 @@
|
|
1
|
-
#
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module DwcaHunter
|
4
4
|
class ResourceFreebase < DwcaHunter::Resource
|
5
5
|
def initialize(opts = {})
|
6
6
|
@command = "freebase"
|
7
|
-
@title =
|
8
|
-
@uuid =
|
7
|
+
@title = "Freebase"
|
8
|
+
@uuid = "bacd21f0-44e0-43e2-914c-70929916f257"
|
9
9
|
@download_path = File.join(Dir.tmpdir,
|
10
|
-
|
11
|
-
|
12
|
-
|
10
|
+
"dwca_hunter",
|
11
|
+
"freebase",
|
12
|
+
"data.json")
|
13
13
|
@data = []
|
14
14
|
@all_taxa = {}
|
15
15
|
@cleaned_taxa = {}
|
@@ -27,11 +27,11 @@ module DwcaHunter
|
|
27
27
|
end
|
28
28
|
|
29
29
|
def download
|
30
|
-
DwcaHunter
|
31
|
-
|
30
|
+
DwcaHunter.logger_write(object_id,
|
31
|
+
"Querying freebase for species information...")
|
32
32
|
q = {
|
33
33
|
query: [{
|
34
|
-
type:
|
34
|
+
type: "/biology/organism_classification",
|
35
35
|
id: nil,
|
36
36
|
guid: nil,
|
37
37
|
name: nil,
|
@@ -41,16 +41,16 @@ module DwcaHunter
|
|
41
41
|
id: nil,
|
42
42
|
guid: nil,
|
43
43
|
scientific_name: nil,
|
44
|
-
optional: true
|
45
|
-
}
|
44
|
+
optional: true
|
45
|
+
}
|
46
46
|
}],
|
47
|
-
cursor: true
|
47
|
+
cursor: true
|
48
48
|
}
|
49
49
|
|
50
50
|
run_query(q)
|
51
51
|
|
52
52
|
data = JSON.pretty_generate @data
|
53
|
-
f = open(@download_path,
|
53
|
+
f = open(@download_path, "w:utf-8")
|
54
54
|
f.write(data)
|
55
55
|
f.close
|
56
56
|
end
|
@@ -60,31 +60,32 @@ module DwcaHunter
|
|
60
60
|
def run_query(q)
|
61
61
|
count = 0
|
62
62
|
requests_num = 0
|
63
|
-
|
63
|
+
loop do
|
64
64
|
freebase_url = "http://api.freebase.com/api/service/mqlread?query=%s" %
|
65
|
-
|
65
|
+
URI.encode(q.to_json)
|
66
66
|
res = JSON.load RestClient.get(freebase_url)
|
67
67
|
requests_num += 1
|
68
|
-
break if res[
|
68
|
+
break if res["result"].nil? || res["result"].empty?
|
69
|
+
|
69
70
|
if requests_num % 10 == 0
|
70
|
-
DwcaHunter
|
71
|
-
|
71
|
+
DwcaHunter.logger_write(object_id,
|
72
|
+
"Received %s names" % count)
|
72
73
|
end
|
73
|
-
count += res[
|
74
|
-
res[
|
75
|
-
q[:cursor] = res[
|
74
|
+
count += res["result"].size
|
75
|
+
res["result"].each { |d| @data << d }
|
76
|
+
q[:cursor] = res["cursor"]
|
76
77
|
end
|
77
78
|
end
|
78
79
|
|
79
80
|
def organize_data
|
80
|
-
@data = JSON.load(open(@download_path,
|
81
|
+
@data = JSON.load(open(@download_path, "r:utf-8").read)
|
81
82
|
@data.each do |d|
|
82
|
-
scientific_name = d[
|
83
|
+
scientific_name = d["scientific_name"].to_s
|
83
84
|
id = d["id"]
|
84
|
-
parent_id = d[
|
85
|
-
d[
|
85
|
+
parent_id = d["higher_classification"] ?
|
86
|
+
d["higher_classification"]["id"] :
|
86
87
|
nil
|
87
|
-
synonyms = d[
|
88
|
+
synonyms = d["synonym_scientific_name"]
|
88
89
|
@all_taxa[id] = { id: id,
|
89
90
|
parent_id: parent_id,
|
90
91
|
scientific_name: scientific_name,
|
@@ -93,6 +94,7 @@ module DwcaHunter
|
|
93
94
|
|
94
95
|
@all_taxa.each do |k, v|
|
95
96
|
next unless v[:scientific_name] && v[:scientific_name].strip != ""
|
97
|
+
|
96
98
|
parent_id = v[:parent_id]
|
97
99
|
until (@all_taxa[parent_id] &&
|
98
100
|
@all_taxa[parent_id][:scientific_name]) || parent_id.nil?
|
@@ -103,29 +105,28 @@ module DwcaHunter
|
|
103
105
|
v[:parent_id] = parent_id
|
104
106
|
@cleaned_taxa[k] = v
|
105
107
|
end
|
106
|
-
|
107
108
|
end
|
108
109
|
|
109
110
|
def generate_dwca
|
110
|
-
DwcaHunter
|
111
|
-
|
112
|
-
@core = [[
|
113
|
-
|
114
|
-
|
111
|
+
DwcaHunter.logger_write(object_id,
|
112
|
+
"Creating DarwinCore Archive file")
|
113
|
+
@core = [["http://rs.tdwg.org/dwc/terms/taxonID",
|
114
|
+
"http://rs.tdwg.org/dwc/terms/scientificName",
|
115
|
+
"http://rs.tdwg.org/dwc/terms/parentNameUsageID"]]
|
115
116
|
|
116
117
|
@extensions << { data: [[
|
117
|
-
|
118
|
-
|
119
|
-
]], file_name:
|
120
|
-
DwcaHunter
|
121
|
-
|
118
|
+
"http://rs.tdwg.org/dwc/terms/TaxonID",
|
119
|
+
"http://rs.tdwg.org/dwc/terms/scientificName"
|
120
|
+
]], file_name: "synonyms.txt" }
|
121
|
+
DwcaHunter.logger_write(object_id,
|
122
|
+
"Creating synonyms extension for DarwinCore Archive file")
|
122
123
|
count = 0
|
123
|
-
@cleaned_taxa.each do |
|
124
|
+
@cleaned_taxa.each do |_key, taxon|
|
124
125
|
count += 1
|
125
126
|
@core << [taxon[:id], taxon[:scientific_name], taxon[:parent_id]]
|
126
127
|
if count % BATCH_SIZE == 0
|
127
|
-
DwcaHunter
|
128
|
-
|
128
|
+
DwcaHunter.logger_write(object_id,
|
129
|
+
"Traversing %s extension data record" % count)
|
129
130
|
end
|
130
131
|
taxon[:synonyms].each do |name|
|
131
132
|
@extensions[-1][:data] << [taxon[:id], name]
|
@@ -134,19 +135,20 @@ module DwcaHunter
|
|
134
135
|
@eml = {
|
135
136
|
id: @uuid,
|
136
137
|
title: @title,
|
137
|
-
license:
|
138
|
+
license: "http://creativecommons.org/licenses/by-sa/3.0/",
|
138
139
|
authors: [
|
139
|
-
{ url:
|
140
|
-
|
141
|
-
|
140
|
+
{ url: "http://www.freebase.com/home" }
|
141
|
+
],
|
142
|
+
abstract: "An entity graph of people, places and things, " \
|
143
|
+
"built by a community that loves open data.",
|
142
144
|
metadata_providers: [
|
143
|
-
{ first_name:
|
144
|
-
last_name:
|
145
|
-
email:
|
146
|
-
|
145
|
+
{ first_name: "Dmitry",
|
146
|
+
last_name: "Mozzherin",
|
147
|
+
email: "dmozzherin@mbl.edu" }
|
148
|
+
],
|
149
|
+
url: "http://www.freebase.com/home"
|
147
150
|
}
|
148
151
|
super
|
149
152
|
end
|
150
|
-
|
151
153
|
end
|
152
154
|
end
|
@@ -0,0 +1,168 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module DwcaHunter
|
4
|
+
class ResourceHowardMoore < DwcaHunter::Resource
|
5
|
+
def initialize(opts = {})
|
6
|
+
@command = "how-moore-birds"
|
7
|
+
@title = "Howard and Moore Complete Checklist of the Birds of the World"
|
8
|
+
@url = "https://uofi.box.com/shared/static/m71m541dr5unc41xzg4y51d92b7wiy2k.csv"
|
9
|
+
@UUID = "85023fe5-bf2a-486b-bdae-3e61cefd41fd"
|
10
|
+
@download_path = File.join(Dir.tmpdir,
|
11
|
+
"dwca_hunter",
|
12
|
+
"how-moore-birds",
|
13
|
+
"data.csv")
|
14
|
+
@synonyms = []
|
15
|
+
@names = []
|
16
|
+
@vernaculars = []
|
17
|
+
@extensions = []
|
18
|
+
@synonyms_hash = {}
|
19
|
+
@vernaculars_hash = {}
|
20
|
+
super(opts)
|
21
|
+
end
|
22
|
+
|
23
|
+
def download
|
24
|
+
puts "Downloading cached verion of the file."
|
25
|
+
puts "Check https://www.howardandmoore.org/howard-and-moore-database/"
|
26
|
+
puts "If there is a more recent edition"
|
27
|
+
`curl -s -L #{@url} -o #{@download_path}`
|
28
|
+
end
|
29
|
+
|
30
|
+
def unpack; end
|
31
|
+
|
32
|
+
def make_dwca
|
33
|
+
DwcaHunter.logger_write(object_id, "Extracting data")
|
34
|
+
get_names
|
35
|
+
generate_dwca
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
def get_names
|
41
|
+
Dir.chdir(@download_dir)
|
42
|
+
collect_names
|
43
|
+
end
|
44
|
+
|
45
|
+
def collect_names
|
46
|
+
file = CSV.open(File.join(@download_dir, "data.csv"),
|
47
|
+
headers: true)
|
48
|
+
file.each_with_index do |row, i|
|
49
|
+
kingdom = "Animalia"
|
50
|
+
phylum = "Chordata"
|
51
|
+
klass = "Aves"
|
52
|
+
family = row["FAMILY_NAME"].capitalize
|
53
|
+
genus = row["GENERA_NAME"].capitalize
|
54
|
+
species = row["SPECIES_NAME"]
|
55
|
+
species_au =
|
56
|
+
"#{row['species_author']} #{row['species_rec_year']}".strip
|
57
|
+
subspecies = row["SUB_SPECIES_NAME"]
|
58
|
+
subspecies_au =
|
59
|
+
"#{row['subspecies_author']} #{row['subspecies_rec_year']}".strip
|
60
|
+
code = "ICZN"
|
61
|
+
|
62
|
+
taxon_id = "gn_#{i + 1}"
|
63
|
+
name_string = species
|
64
|
+
name_string = if subspecies.to_s == "" ||
|
65
|
+
name_string.include?(subspecies)
|
66
|
+
"#{name_string} #{species_au}".strip
|
67
|
+
else
|
68
|
+
"#{name_string} #{subspecies} #{subspecies_au}".strip
|
69
|
+
end
|
70
|
+
|
71
|
+
@names << { taxon_id: taxon_id,
|
72
|
+
name_string: name_string,
|
73
|
+
kingdom: kingdom,
|
74
|
+
phylum: phylum,
|
75
|
+
klass: klass,
|
76
|
+
family: family,
|
77
|
+
genus: genus,
|
78
|
+
code: code }
|
79
|
+
|
80
|
+
if row["species_english_name"].to_s != ""
|
81
|
+
@vernaculars << {
|
82
|
+
taxon_id: taxon_id,
|
83
|
+
vern: row["species_english_name"],
|
84
|
+
lang: "en"
|
85
|
+
}
|
86
|
+
end
|
87
|
+
if row["species_english_name2"].to_s != ""
|
88
|
+
@vernaculars << {
|
89
|
+
taxon_id: taxon_id,
|
90
|
+
vern: row["species_english_name2"],
|
91
|
+
lang: "en"
|
92
|
+
}
|
93
|
+
end
|
94
|
+
|
95
|
+
puts "Processed %s names" % i if i % 10_000 == 0
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
def update_vernacular(taxon_id, canonical)
|
100
|
+
return unless @vernaculars_hash.key?(canonical)
|
101
|
+
|
102
|
+
@vernaculars_hash[canonical].each do |vern|
|
103
|
+
@vernaculars << { taxon_id: taxon_id, vern: vern }
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
def update_synonym(taxon_id, canonical)
|
108
|
+
return unless @synonyms_hash.key?(canonical)
|
109
|
+
|
110
|
+
@synonyms_hash[canonical].each do |syn|
|
111
|
+
@synonyms << { taxon_id: taxon_id, name_string: syn[:name_string],
|
112
|
+
status: syn[:status] }
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
def generate_dwca
|
117
|
+
DwcaHunter.logger_write(object_id,
|
118
|
+
"Creating DarwinCore Archive file")
|
119
|
+
@core = [["http://rs.tdwg.org/dwc/terms/taxonID",
|
120
|
+
"http://rs.tdwg.org/dwc/terms/scientificName",
|
121
|
+
"http://rs.tdwg.org/dwc/terms/kingdom",
|
122
|
+
"http://rs.tdwg.org/dwc/terms/phylum",
|
123
|
+
"http://rs.tdwg.org/dwc/terms/class",
|
124
|
+
"http://rs.tdwg.org/dwc/terms/family",
|
125
|
+
"http://rs.tdwg.org/dwc/terms/genus",
|
126
|
+
"http://rs.tdwg.org/dwc/terms/nomenclaturalCode"]]
|
127
|
+
@names.each do |n|
|
128
|
+
@core << [n[:taxon_id], n[:name_string],
|
129
|
+
n[:kingdom], n[:phylum], n[:klass], n[:family],
|
130
|
+
n[:genus], n[:code]]
|
131
|
+
end
|
132
|
+
@extensions << {
|
133
|
+
data: [[
|
134
|
+
"http://rs.tdwg.org/dwc/terms/taxonID",
|
135
|
+
"http://rs.tdwg.org/dwc/terms/vernacularName",
|
136
|
+
"http://purl.org/dc/terms/language"
|
137
|
+
]],
|
138
|
+
file_name: "vernacular_names.txt",
|
139
|
+
row_type: "http://rs.gbif.org/terms/1.0/VernacularName"
|
140
|
+
}
|
141
|
+
|
142
|
+
@vernaculars.each do |v|
|
143
|
+
@extensions[-1][:data] << [v[:taxon_id], v[:vern], v[:lang]]
|
144
|
+
end
|
145
|
+
|
146
|
+
@eml = {
|
147
|
+
id: @uuid,
|
148
|
+
title: @title,
|
149
|
+
authors: [
|
150
|
+
{
|
151
|
+
last_name: "Christidis"
|
152
|
+
}
|
153
|
+
],
|
154
|
+
metadata_providers: [
|
155
|
+
{ first_name: "Dmitry",
|
156
|
+
last_name: "Mozzherin",
|
157
|
+
email: "dmozzherin@gmail.com" }
|
158
|
+
],
|
159
|
+
abstract: "Christidis et al. 2018. The Howard and Moore Complete " \
|
160
|
+
"Checklist of the Birds of the World, version 4.1 " \
|
161
|
+
"(Downloadable checklist). " \
|
162
|
+
"Accessed from https://www.howardandmoore.org.",
|
163
|
+
url: @url
|
164
|
+
}
|
165
|
+
super
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
@@ -0,0 +1,200 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module DwcaHunter
|
4
|
+
class ResourceIOCWorldBird < DwcaHunter::Resource
|
5
|
+
def initialize(opts = {})
|
6
|
+
@command = "ioc-world-bird"
|
7
|
+
@title = "IOC World Bird List"
|
8
|
+
@url = "https://uofi.box.com/shared/static/fbpuk5ghh9083nbzjdeyqtoqsvdnro01.csv"
|
9
|
+
@UUID = "6421ffec-38e3-40fb-a6d9-af27238a47a1"
|
10
|
+
@download_path = File.join(Dir.tmpdir,
|
11
|
+
"dwca_hunter",
|
12
|
+
"ioc-bird",
|
13
|
+
"data.csv")
|
14
|
+
@synonyms = []
|
15
|
+
@names = []
|
16
|
+
@vernaculars = []
|
17
|
+
@extensions = []
|
18
|
+
@synonyms_hash = {}
|
19
|
+
@vernaculars_hash = {}
|
20
|
+
super(opts)
|
21
|
+
end
|
22
|
+
|
23
|
+
def download
|
24
|
+
puts "Downloading cached and converted to csv version."
|
25
|
+
puts "CHECK FOR NEW VERSION at"
|
26
|
+
puts "https://www.worldbirdnames.org/ioc-lists/master-list-2/"
|
27
|
+
puts "Use libreoffice to convert to csv."
|
28
|
+
`curl -s -L #{@url} -o #{@download_path}`
|
29
|
+
end
|
30
|
+
|
31
|
+
def unpack; end
|
32
|
+
|
33
|
+
def make_dwca
|
34
|
+
DwcaHunter.logger_write(object_id, "Extracting data")
|
35
|
+
get_names
|
36
|
+
generate_dwca
|
37
|
+
end
|
38
|
+
|
39
|
+
private
|
40
|
+
|
41
|
+
def get_names
|
42
|
+
Dir.chdir(@download_dir)
|
43
|
+
collect_names
|
44
|
+
end
|
45
|
+
|
46
|
+
def collect_names
|
47
|
+
@names_index = {}
|
48
|
+
file = CSV.open(File.join(@download_dir, "data.csv"),
|
49
|
+
headers: true)
|
50
|
+
order = ""
|
51
|
+
family = ""
|
52
|
+
genus = ""
|
53
|
+
species = ""
|
54
|
+
count = 0
|
55
|
+
file.each do |row|
|
56
|
+
order1 = row["Order"]
|
57
|
+
order = order1.capitalize if order1.to_s != ""
|
58
|
+
|
59
|
+
family1 = row["Family (Scientific)"]
|
60
|
+
family = family1.capitalize if family1.to_s != ""
|
61
|
+
|
62
|
+
genus1 = row["Genus"]
|
63
|
+
genus = genus1.capitalize if genus1.to_s != ""
|
64
|
+
|
65
|
+
species1 = row["Species (Scientific)"]
|
66
|
+
species = species1 if species1.to_s != ""
|
67
|
+
|
68
|
+
subspecies = row["Subspecies"]
|
69
|
+
next if species.to_s == ""
|
70
|
+
|
71
|
+
count += 1
|
72
|
+
taxon_id = "gn_#{count}"
|
73
|
+
name = {
|
74
|
+
taxon_id: taxon_id,
|
75
|
+
kingdom: "Animalia",
|
76
|
+
phylum: "Chordata",
|
77
|
+
klass: "Aves",
|
78
|
+
order: order,
|
79
|
+
family: family,
|
80
|
+
genus: genus,
|
81
|
+
code: "ICZN"
|
82
|
+
}
|
83
|
+
if subspecies.to_s == ""
|
84
|
+
auth = row["Authority"].to_s
|
85
|
+
auth = DwcaHunter.normalize_authors(auth) if auth != ""
|
86
|
+
name[:name_string] = clean(
|
87
|
+
"#{genus} #{species} #{auth}".
|
88
|
+
strip
|
89
|
+
)
|
90
|
+
@names << name
|
91
|
+
vernacular = row["Species (English)"]
|
92
|
+
if vernacular.to_s != ""
|
93
|
+
vernaclar = { taxon_id: taxon_id, vern: vernacular, lang: "en" }
|
94
|
+
@vernaculars << vernaclar
|
95
|
+
end
|
96
|
+
species = ""
|
97
|
+
else
|
98
|
+
name[:name_string] = clean(
|
99
|
+
"#{genus} #{species} #{subspecies} #{row['Authority']}".
|
100
|
+
strip
|
101
|
+
)
|
102
|
+
@names << name
|
103
|
+
species = ""
|
104
|
+
subspecies = ""
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
def clean(n)
|
110
|
+
n = n.gsub(/†/, "")
|
111
|
+
n.gsub(/\s+/, " ")
|
112
|
+
end
|
113
|
+
|
114
|
+
def generate_dwca
|
115
|
+
DwcaHunter.logger_write(object_id,
|
116
|
+
"Creating DarwinCore Archive file")
|
117
|
+
@core = [["http://rs.tdwg.org/dwc/terms/taxonID",
|
118
|
+
"http://rs.tdwg.org/dwc/terms/scientificName",
|
119
|
+
"http://rs.tdwg.org/dwc/terms/kingdom",
|
120
|
+
"http://rs.tdwg.org/dwc/terms/phylum",
|
121
|
+
"http://rs.tdwg.org/dwc/terms/class",
|
122
|
+
"http://rs.tdwg.org/dwc/terms/order",
|
123
|
+
"http://rs.tdwg.org/dwc/terms/family",
|
124
|
+
"http://rs.tdwg.org/dwc/terms/genus",
|
125
|
+
"http://rs.tdwg.org/dwc/terms/nomenclaturalCode"]]
|
126
|
+
@names.each do |n|
|
127
|
+
@core << [n[:taxon_id], n[:name_string],
|
128
|
+
n[:kingdom], n[:phylum], n[:klass], n[:order], n[:family],
|
129
|
+
n[:genus], n[:code]]
|
130
|
+
end
|
131
|
+
@extensions << {
|
132
|
+
data: [[
|
133
|
+
"http://rs.tdwg.org/dwc/terms/taxonID",
|
134
|
+
"http://rs.tdwg.org/dwc/terms/vernacularName",
|
135
|
+
"http://purl.org/dc/terms/language"
|
136
|
+
]],
|
137
|
+
file_name: "vernacular_names.txt",
|
138
|
+
row_type: "http://rs.gbif.org/terms/1.0/VernacularName"
|
139
|
+
}
|
140
|
+
|
141
|
+
@vernaculars.each do |v|
|
142
|
+
@extensions[-1][:data] << [v[:taxon_id], v[:vern], v[:lang]]
|
143
|
+
end
|
144
|
+
|
145
|
+
@eml = {
|
146
|
+
id: @uuid,
|
147
|
+
title: @title,
|
148
|
+
authors: [
|
149
|
+
{ first_name: "Per",
|
150
|
+
last_name: "Alstrom" },
|
151
|
+
{ first_name: "Mike",
|
152
|
+
last_name: "Blair" },
|
153
|
+
{ first_name: "Rauri",
|
154
|
+
last_name: "Bowie" },
|
155
|
+
{ first_name: "Nigel",
|
156
|
+
last_name: "Redman" },
|
157
|
+
{ first_name: "Jon",
|
158
|
+
last_name: "Fjeldsa" },
|
159
|
+
{ first_name: "Phil",
|
160
|
+
last_name: "Gregory" },
|
161
|
+
{ first_name: "Leo",
|
162
|
+
last_name: "Joseph" },
|
163
|
+
{ first_name: "Peter",
|
164
|
+
last_name: "Kovalik" },
|
165
|
+
{ first_name: "Adolfo",
|
166
|
+
last_name: "Navarro-Siguenza" },
|
167
|
+
{ first_name: "David",
|
168
|
+
last_name: "Parkin" },
|
169
|
+
{ first_name: "Alan",
|
170
|
+
last_name: "Peterson" },
|
171
|
+
{ first_name: "Douglas",
|
172
|
+
last_name: "Pratt" },
|
173
|
+
{ first_name: "Pam",
|
174
|
+
last_name: "Rasmussen" },
|
175
|
+
{ first_name: "Frank",
|
176
|
+
last_name: "Rheindt" },
|
177
|
+
{ first_name: "Robert",
|
178
|
+
last_name: "Ridgely" },
|
179
|
+
{ first_name: "Peter",
|
180
|
+
last_name: "Ryan" },
|
181
|
+
{ first_name: "George",
|
182
|
+
last_name: "Sangster" },
|
183
|
+
{ first_name: "Dick",
|
184
|
+
last_name: "Schodde" },
|
185
|
+
{ first_name: "Minturn",
|
186
|
+
last_name: "Wright" }
|
187
|
+
],
|
188
|
+
metadata_providers: [
|
189
|
+
{ first_name: "Dmitry",
|
190
|
+
last_name: "Mozzherin",
|
191
|
+
email: "dmozzherin@gmail.com" }
|
192
|
+
],
|
193
|
+
abstract: "The IOC World Bird List is an open access resource of " \
|
194
|
+
"the international community of ornithologists.",
|
195
|
+
url: "https://www.worldbirdnames.org"
|
196
|
+
}
|
197
|
+
super
|
198
|
+
end
|
199
|
+
end
|
200
|
+
end
|