dwca_hunter 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,152 @@
1
+ # encoding: utf-8
2
+
3
+ module DwcaHunter
4
+ class ResourceFreebase < DwcaHunter::Resource
5
+ def initialize(opts = {})
6
+ @command = "freebase"
7
+ @title = 'Freebase'
8
+ @uuid = 'bacd21f0-44e0-43e2-914c-70929916f257'
9
+ @download_path = File.join(Dir.tmpdir,
10
+ 'dwca_hunter',
11
+ 'freebase',
12
+ 'data.json')
13
+ @data = []
14
+ @all_taxa = {}
15
+ @cleaned_taxa = {}
16
+ @extensions = []
17
+ super
18
+ end
19
+
20
+ def needs_unpack?
21
+ false
22
+ end
23
+
24
+ def make_dwca
25
+ organize_data
26
+ generate_dwca
27
+ end
28
+
29
+ def download
30
+ DwcaHunter::logger_write(self.object_id,
31
+ 'Querying freebase for species information...')
32
+ q = {
33
+ query: [{
34
+ type: '/biology/organism_classification',
35
+ id: nil,
36
+ guid: nil,
37
+ name: nil,
38
+ scientific_name: nil,
39
+ synonym_scientific_name: [],
40
+ higher_classification: {
41
+ id: nil,
42
+ guid: nil,
43
+ scientific_name: nil,
44
+ optional: true,
45
+ },
46
+ }],
47
+ cursor: true,
48
+ }
49
+
50
+ run_query(q)
51
+
52
+ data = JSON.pretty_generate @data
53
+ f = open(@download_path, 'w:utf-8')
54
+ f.write(data)
55
+ f.close
56
+ end
57
+
58
+ private
59
+
60
+ def run_query(q)
61
+ count = 0
62
+ requests_num = 0
63
+ while true
64
+ freebase_url = "http://api.freebase.com/api/service/mqlread?query=%s" %
65
+ URI.encode(q.to_json)
66
+ res = JSON.load RestClient.get(freebase_url)
67
+ requests_num += 1
68
+ break if res['result'] == nil || res['result'].empty?
69
+ if requests_num % 10 == 0
70
+ DwcaHunter::logger_write(self.object_id,
71
+ "Received %s names" % count)
72
+ end
73
+ count += res['result'].size
74
+ res['result'].each { |d| @data << d }
75
+ q[:cursor] = res['cursor']
76
+ end
77
+ end
78
+
79
+ def organize_data
80
+ @data = JSON.load(open(@download_path, 'r:utf-8').read)
81
+ @data.each do |d|
82
+ scientific_name = d['scientific_name'].to_s
83
+ id = d["id"]
84
+ parent_id = d['higher_classification'] ?
85
+ d['higher_classification']["id"] :
86
+ nil
87
+ synonyms = d['synonym_scientific_name']
88
+ @all_taxa[id] = { id: id,
89
+ parent_id: parent_id,
90
+ scientific_name: scientific_name,
91
+ synonyms: synonyms }
92
+ end
93
+
94
+ @all_taxa.each do |k, v|
95
+ next unless v[:scientific_name] && v[:scientific_name].strip != ""
96
+ parent_id = v[:parent_id]
97
+ until (@all_taxa[parent_id] &&
98
+ @all_taxa[parent_id][:scientific_name]) || parent_id.nil?
99
+ puts "did not find parent %s" % parent_id
100
+ parent_id = @all_taxa[parent_id]
101
+ end
102
+ parent_id = nil if v[:id] == parent_id
103
+ v[:parent_id] = parent_id
104
+ @cleaned_taxa[k] = v
105
+ end
106
+
107
+ end
108
+
109
+ def generate_dwca
110
+ DwcaHunter::logger_write(self.object_id,
111
+ 'Creating DarwinCore Archive file')
112
+ @core = [['http://rs.tdwg.org/dwc/terms/taxonID',
113
+ 'http://rs.tdwg.org/dwc/terms/scientificName',
114
+ 'http://rs.tdwg.org/dwc/terms/parentNameUsageID']]
115
+
116
+ @extensions << { data: [[
117
+ 'http://rs.tdwg.org/dwc/terms/TaxonID',
118
+ 'http://rs.tdwg.org/dwc/terms/scientificName',
119
+ ]], file_name: 'synonyms.txt' }
120
+ DwcaHunter::logger_write(self.object_id,
121
+ 'Creating synonyms extension for DarwinCore Archive file')
122
+ count = 0
123
+ @cleaned_taxa.each do |key, taxon|
124
+ count += 1
125
+ @core << [taxon[:id], taxon[:scientific_name], taxon[:parent_id]]
126
+ if count % BATCH_SIZE == 0
127
+ DwcaHunter::logger_write(self.object_id,
128
+ "Traversing %s extension data record" % count)
129
+ end
130
+ taxon[:synonyms].each do |name|
131
+ @extensions[-1][:data] << [taxon[:id], name]
132
+ end
133
+ end
134
+ @eml = {
135
+ id: @uuid,
136
+ title: @title,
137
+ license: 'http://creativecommons.org/licenses/by-sa/3.0/',
138
+ authors: [
139
+ { url: 'http://www.freebase.com/home' }],
140
+ abstract: 'An entity graph of people, places and things, ' +
141
+ 'built by a community that loves open data.',
142
+ metadata_providers: [
143
+ { first_name: 'Dmitry',
144
+ last_name: 'Mozzherin',
145
+ email: 'dmozzherin@mbl.edu' }],
146
+ url: 'http://www.freebase.com/home'
147
+ }
148
+ super
149
+ end
150
+
151
+ end
152
+ end
@@ -0,0 +1,101 @@
1
+ # encoding: utf-8
2
+ module DwcaHunter
3
+ class ResourceGNUB < DwcaHunter::Resource
4
+ def initialize(opts = {})
5
+ @command = 'gnub'
6
+ @title = 'GNUB'
7
+ @url = 'http://gnub.org/datadump/gni_export.zip'
8
+ @UUID = 'd34ed224-78e7-485d-a478-adc2558a0f68'
9
+ @download_path = File.join(Dir.tmpdir,
10
+ 'dwca_hunter',
11
+ 'gnub',
12
+ 'data.tar.gz')
13
+ @ranks = {}
14
+ @kingdoms = {}
15
+ @authors = {}
16
+ @vernaculars = {}
17
+ @synonyms = {}
18
+ @synonym_of = {}
19
+ @names = []
20
+ @extensions = []
21
+ super(opts)
22
+ @gnub_dir = File.join(@download_dir, 'gnub')
23
+ end
24
+
25
+ def unpack
26
+ unpack_zip
27
+ end
28
+
29
+ def make_dwca
30
+ DwcaHunter::logger_write(self.object_id, 'Extracting data')
31
+ get_names
32
+ generate_dwca
33
+ end
34
+
35
+ private
36
+
37
+ def get_names
38
+ codes = get_codes
39
+ file = Dir.entries(@download_dir).grep(/txt$/).first
40
+ open(File.join(@download_dir, file)).each_with_index do |line, i|
41
+ next if i == 0 || (data = line.strip) == ''
42
+ data = data.split("\t")
43
+ protolog = data[0].downcase
44
+ protolog_path = data[1].downcase
45
+ name_string = data[2]
46
+ rank = data[3]
47
+ code = codes[data[4].to_i]
48
+ taxon_id = UUID.create_v5(name_string +
49
+ protolog_path +
50
+ rank, GNA_NAMESPACE)
51
+ @names << { taxon_id: taxon_id,
52
+ name_string: name_string,
53
+ protolog: protolog,
54
+ protolog_path: protolog_path,
55
+ code: code,
56
+ rank: rank }
57
+ end
58
+ end
59
+
60
+ def get_codes
61
+ codes_url = 'http://resolver.globalnames.org/nomenclatural_codes.json'
62
+ codes = RestClient.get(codes_url)
63
+ codes = JSON.parse(codes, symbolize_names: true)
64
+ codes.inject({}) do |res, c|
65
+ res[c[:id]] = c[:code]
66
+ res
67
+ end
68
+ end
69
+
70
+ def generate_dwca
71
+ DwcaHunter::logger_write(self.object_id,
72
+ 'Creating DarwinCore Archive file')
73
+ @core = [['http://rs.tdwg.org/dwc/terms/taxonID',
74
+ 'http://rs.tdwg.org/dwc/terms/originalNameUsageID',
75
+ 'http://globalnames.org/terms/originalNameUsageIDPath',
76
+ 'http://rs.tdwg.org/dwc/terms/scientificName',
77
+ 'http://rs.tdwg.org/dwc/terms/nomenclaturalCode',
78
+ 'http://rs.tdwg.org/dwc/terms/taxonRank']]
79
+ @names.each do |n|
80
+ @core << [n[:taxon_id], n[:protolog], n[:name_string],
81
+ n[:protolog_path], n[:code], n[:rank]]
82
+ end
83
+ @eml = {
84
+ id: @uuid,
85
+ title: @title,
86
+ authors: [
87
+ {email: 'deepreef@bishopmuseum.org'}
88
+ ],
89
+ metadata_providers: [
90
+ { first_name: 'Dmitry',
91
+ last_name: 'Mozzherin',
92
+ email: 'dmozzherin@gmail.com' }
93
+ ],
94
+ abstract: 'Global Names Usage Bank',
95
+ url: 'http://www.zoobank.org'
96
+ }
97
+ super
98
+ end
99
+ end
100
+ end
101
+
@@ -0,0 +1,271 @@
1
+ # encoding: utf-8
2
+ module DwcaHunter
3
+ class ResourceITIS < DwcaHunter::Resource
4
+ def initialize(opts = {})
5
+ @command = 'itis'
6
+ @title = 'ITIS'
7
+ @url = 'https://www.itis.gov/downloads/itisMySQLTables.tar.gz'
8
+ @uuid = '5d066e84-e512-4a2f-875c-0a605d3d9f35'
9
+ @download_path = File.join(Dir.tmpdir,
10
+ 'dwca_hunter',
11
+ 'itis',
12
+ 'data.tar.gz')
13
+ @ranks = {}
14
+ @kingdoms = {}
15
+ @authors = {}
16
+ @vernaculars = {}
17
+ @synonyms = {}
18
+ @synonym_of = {}
19
+ @names = {}
20
+ @extensions = []
21
+ super(opts)
22
+ @itis_dir = File.join(@download_dir, 'itis')
23
+ end
24
+
25
+ def unpack
26
+ unpack_tar
27
+ dir = Dir.entries(@download_dir).select {|e| e.match(/itisMySQL/)}[0]
28
+ FileUtils.mv(File.join(@download_dir, dir), @itis_dir)
29
+
30
+ # Create a file with the same name as the directory we extracted.
31
+ FileUtils.touch(File.join(@itis_dir, 'version_' + dir))
32
+ end
33
+
34
+ def make_dwca
35
+ DwcaHunter::logger_write(self.object_id, 'Extracting data')
36
+ get_ranks
37
+ get_kingdoms
38
+ get_authors
39
+ get_vernaculars
40
+ get_synonyms
41
+ get_names
42
+ generate_dwca
43
+ end
44
+
45
+ private
46
+ def get_ranks
47
+ # 0 kingdom_id integer not null
48
+ # 1 rank_id smallint not null
49
+ # 2 rank_name char(15) not null
50
+ # 3 dir_parent_rank_id smallint not null
51
+ # 4 req_parent_rank_id smallint not null
52
+ # 5 update_date date not null
53
+ rank_file = File.join(@itis_dir, 'taxon_unit_types')
54
+ f = open(rank_file, 'r:utf-8')
55
+ f.each do |l|
56
+ l.encode!('UTF-8',
57
+ 'ISO-8859-1',
58
+ invalid: :replace,
59
+ replace: '?')
60
+ row = l.strip.split('|')
61
+ @ranks[row[0].strip + '/' + row[1].strip] = row[2].strip
62
+ end
63
+ end
64
+
65
+ def get_kingdoms
66
+ # 0 kingdom_id serial not null
67
+ # 1 kingdom_name char(10) not null
68
+ # 2 update_date date not null
69
+
70
+ f = open(File.join(@itis_dir, 'kingdoms'))
71
+ f.each do |l|
72
+ data = l.strip.split('|')
73
+ @kingdoms[data[0].strip] = data[1].strip
74
+ end
75
+ end
76
+
77
+ def get_authors
78
+ # 0 taxon_author_id serial not null
79
+ # 1 taxon_author varchar(100,30) not null
80
+ # 2 update_date date not null
81
+ # 3 kingdom_id smallint not null
82
+
83
+ f = open(File.join(@itis_dir, 'taxon_authors_lkp'))
84
+ f.each do |l|
85
+ l.encode!('UTF-8',
86
+ 'ISO-8859-1',
87
+ invalid: :replace,
88
+ replace: '?')
89
+ data = l.strip.split('|')
90
+ @authors[data[0].strip] = data[1].strip
91
+ end
92
+ end
93
+
94
+ def get_vernaculars
95
+ # 0 tsn integer not null
96
+ # 1 vernacular_name varchar(80,5) not null
97
+ # 2 language varchar(15) not null
98
+ # 3 approved_ind char(1)
99
+ # 4 update_date date not null
100
+ # 5 primary key (tsn,vernacular_name,language)
101
+ # constraint "itis".vernaculars_key
102
+
103
+ f = open(File.join(@itis_dir, 'vernaculars'))
104
+ f.each_with_index do |l, i|
105
+ if i % BATCH_SIZE == 0
106
+ DwcaHunter::logger_write(self.object_id,
107
+ "Extracted %s vernacular names" % i)
108
+ end
109
+ l.encode!('UTF-8',
110
+ 'ISO-8859-1',
111
+ invalid: :replace,
112
+ replace: '?')
113
+ data = l.split('|').map { |d| d.strip }
114
+ name_tsn = data[0]
115
+ string = data[1]
116
+ language = data[2]
117
+ language = 'Common name' if language == 'unspecified'
118
+ @vernaculars[name_tsn] = { name:string, language:language }
119
+ end
120
+ end
121
+
122
+ def get_synonyms
123
+ # 0 tsn integer not null
124
+ # 1 tsn_accepted integer not null
125
+ # 2 update_date date not null
126
+
127
+ f = open(File.join(@itis_dir, 'synonym_links'))
128
+ f.each_with_index do |l, i|
129
+ if i % BATCH_SIZE == 0
130
+ DwcaHunter::logger_write(self.object_id,
131
+ "Extracted %s synonyms" % i)
132
+ end
133
+ l.encode!('UTF-8',
134
+ 'ISO-8859-1',
135
+ invalid: :replace,
136
+ replace: '?')
137
+ data = l.split('|').map { |d| d.strip }
138
+ synonym_name_tsn = data[0]
139
+ accepted_name_tsn = data[1]
140
+ @synonyms[synonym_name_tsn] = accepted_name_tsn
141
+ end
142
+ end
143
+
144
+ def get_names
145
+ # 0 tsn serial not null
146
+ # 1 unit_ind1 char(1)
147
+ # 2 unit_name1 char(35) not null
148
+ # 3 unit_ind2 char(1)
149
+ # 4 unit_name2 varchar(35)
150
+ # 5 unit_ind3 varchar(7)
151
+ # 6 unit_name3 varchar(35)
152
+ # 7 unit_ind4 varchar(7)
153
+ # 8 unit_name4 varchar(35)
154
+ # 9 unnamed_taxon_ind char(1)
155
+ # 10 usage varchar(12,5) not null
156
+ # 11 unaccept_reason varchar(50,9)
157
+ # 12 credibility_rtng varchar(40,17) not null
158
+ # 13 completeness_rtng char(10)
159
+ # 14 currency_rating char(7)
160
+ # 15 phylo_sort_seq smallint
161
+ # 16 initial_time_stamp datetime year to second not null
162
+ # 17 parent_tsn integer
163
+ # 18 taxon_author_id integer
164
+ # 19 hybrid_author_id integer
165
+ # 20 kingdom_id smallint not null
166
+ # 21 rank_id smallint not null
167
+ # 22 update_date date not null
168
+ # 23 uncertain_prnt_ind char(3)
169
+
170
+ f = open(File.join(@itis_dir, 'taxonomic_units'))
171
+ f.each_with_index do |l, i|
172
+ if i % BATCH_SIZE == 0
173
+ DwcaHunter::logger_write(self.object_id,
174
+ "Extracted %s names" % i)
175
+ end
176
+ l.encode!('UTF-8',
177
+ 'ISO-8859-1',
178
+ invalid: :replace,
179
+ replace: '?')
180
+ data = l.split("|").map { |d| d.strip }
181
+ name_tsn = data[0]
182
+ x1 = data[1]
183
+ name_part1 = data[2]
184
+ x2 = data[3]
185
+ name_part2 = data[4]
186
+ sp_marker1 = data[5]
187
+ name_part3 = data[6]
188
+ sp_marker2 = data[7]
189
+ name_part4 = data[8]
190
+ status = data[10]
191
+ parent_tsn = data[17]
192
+ author_id = data[18]
193
+ kingdom_id = data[20]
194
+ rank_id = data[21]
195
+
196
+ parent_tsn = nil if parent_tsn == ''
197
+ name = [x1, name_part1, x2, name_part2,
198
+ sp_marker1, name_part3, sp_marker2, name_part4]
199
+ canonical_name = name.clone
200
+ name << @authors[author_id] if @authors[author_id]
201
+ name = name.join(' ').strip.gsub(/\s+/, ' ')
202
+ canonical_name = canonical_name.join(' ').strip.gsub(/\s+/, ' ')
203
+ rank = @ranks[kingdom_id + '/' + rank_id] ?
204
+ @ranks[kingdom_id + '/' + rank_id] :
205
+ ''
206
+ @names[name_tsn] = { name: name,
207
+ canonical_name: canonical_name,
208
+ status: status,
209
+ parent_tsn: parent_tsn,
210
+ rank: rank }
211
+ end
212
+ end
213
+
214
+ def generate_dwca
215
+ DwcaHunter::logger_write(self.object_id,
216
+ 'Creating DarwinCore Archive file')
217
+ @core = [['http://rs.tdwg.org/dwc/terms/taxonID',
218
+ 'http://rs.tdwg.org/dwc/terms/parentNameUsageID',
219
+ 'http://rs.tdwg.org/dwc/terms/acceptedNameUsageID',
220
+ 'http://rs.tdwg.org/dwc/terms/scientificName',
221
+ 'http://rs.tdwg.org/ontology/voc/TaxonName#nameComplete',
222
+ 'http://rs.tdwg.org/dwc/terms/taxonomicStatus',
223
+ 'http://rs.tdwg.org/dwc/terms/taxonRank']]
224
+ @extensions << { data: [['http://rs.tdwg.org/dwc/terms/taxonID',
225
+ 'http://rs.tdwg.org/dwc/terms/vernacularName',
226
+ 'http://purl.org/dc/terms/language']],
227
+ file_name: 'vernacular_names.txt',
228
+ row_type: 'http://rs.gbif.org/terms/1.0/VernacularName'
229
+ }
230
+ @names.keys.each_with_index do |k, i|
231
+ d = @names[k]
232
+ accepted_id = @synonyms[k] ? @synonyms[k] : nil
233
+ parent_id = d[:parent_tsn].to_i == 0 ? nil : d[:parent_tsn]
234
+ row = [k, parent_id, accepted_id, d[:name], d[:canonical_name], d[:status], d[:rank]]
235
+ @core << row
236
+ end
237
+
238
+ @vernaculars.keys.each_with_index do |k, i|
239
+ d = @vernaculars[k]
240
+ @extensions[0][:data] << [k, d[:name], d[:language]]
241
+ end
242
+
243
+ @eml = {
244
+ id: @uuid,
245
+ title: @title,
246
+ authors: [
247
+ {email: 'itiswebmaster@itis.gov'}
248
+ ],
249
+ metadata_providers: [
250
+ { first_name: 'Dmitry',
251
+ last_name: 'Mozzherin',
252
+ email: 'dmozzherin@gmail.com' }
253
+ ],
254
+ abstract: 'The White House Subcommittee on Biodiversity and ' +
255
+ 'Ecosystem Dynamics has identified systematics as a ' +
256
+ 'research priority that is fundamental to ecosystem ' +
257
+ 'management and biodiversity conservation. This primary ' +
258
+ 'need identified by the Subcommittee requires ' +
259
+ 'improvements in the organization of, and access to, ' +
260
+ 'standardized nomenclature. ITIS (originally referred ' +
261
+ 'to as the Interagency Taxonomic Information System) ' +
262
+ 'was designed to fulfill these requirements. In the ' +
263
+ 'future, the ITIS will provide taxonomic data and a ' +
264
+ 'directory of taxonomic expertise that will support ' +
265
+ 'the system',
266
+ url: 'http://www.itis.gov'
267
+ }
268
+ super
269
+ end
270
+ end
271
+ end
@@ -0,0 +1,179 @@
1
+ module DwcaHunter
2
+ class ResourceMammalSpecies < DwcaHunter::Resource
3
+ def initialize(opts = {})
4
+ @command = "mammal-species"
5
+ @title = "The Mammal Species of The World"
6
+ @uuid = "464dafec-1037-432d-8449-c0b309e0a030"
7
+ @data = []
8
+ @extensions = []
9
+ @count = 1
10
+ @clades = {"Mammalia" => { rank: "class", id: @count}}
11
+ @url = "http://www.departments.bucknell.edu"\
12
+ "/biology/resources/msw3/export.asp"
13
+ @download_path = File.join(Dir.tmpdir, "dwca_hunter",
14
+ "mammalsp", "msw3-all.csv")
15
+ super
16
+ end
17
+
18
+ def needs_unpack?
19
+ false
20
+ end
21
+
22
+ def make_dwca
23
+ DwcaHunter::logger_write(self.object_id, "Extracting data")
24
+ encode
25
+ collect_data
26
+ generate_dwca
27
+ end
28
+
29
+ def download
30
+ DwcaHunter::logger_write(self.object_id, "Downloading file -- "\
31
+ "it will take some time...")
32
+ dlr = DwcaHunter::Downloader.new(url, @download_path)
33
+ dlr.download
34
+ end
35
+
36
+ private
37
+
38
+ def encode
39
+ DwcaHunter::Encoding.latin1_to_utf8(@download_path)
40
+ end
41
+
42
+ def collect_data
43
+ opts = { headers: true, header_converters: :symbol }
44
+ CSV.open(@download_path + ".utf_8", opts).each do |row|
45
+ @data << row.to_hash
46
+ end
47
+ end
48
+
49
+ def generate_dwca
50
+ DwcaHunter::logger_write(self.object_id,
51
+ 'Creating DarwinCore Archive file')
52
+ core_init
53
+ extensions_init
54
+ eml_init
55
+ @data.each do |rec|
56
+ taxon = process_hierarchy(rec)
57
+ process_vernaculars(rec, taxon)
58
+ process_synonyms(rec, taxon)
59
+ end
60
+ super
61
+ end
62
+
63
+ def process_vernaculars(rec, taxon)
64
+ return if rec[:commonname].to_s == ""
65
+ taxon_id = taxon[0]
66
+ lang = "en"
67
+ name = rec[:commonname].gsub("\u{0092}", "'")
68
+ @extensions[0][:data] << [taxon_id, name, lang]
69
+
70
+ end
71
+
72
+ def process_synonyms(rec, taxon)
73
+ accepted_id = taxon[0]
74
+ parent_id = taxon[2]
75
+ rank = taxon[-1]
76
+ return unless ['species', 'subspecies'].include? rank
77
+ synonyms = rec[:synonyms].gsub(/\.$/, "").
78
+ gsub(/<[\/ib]+>/, "").gsub(/[\s]+/, " ").split(";")
79
+ synonyms = synonyms.map(&:strip)
80
+ synonyms = synonyms.map do |s|
81
+ next if s.match(/<u>/)
82
+ if s.match(/^[a-z]/)
83
+ s = rec[:genus] + " " + s
84
+ end
85
+ @count += 1
86
+ id = @count
87
+ @core << [id, nil, parent_id, accepted_id, s, "synonym", rank]
88
+ end
89
+ end
90
+
91
+ def process_name(rec, rank)
92
+ name =[@core.last[4], rec[:author], rec[:date]]
93
+ @core.last[4] = name.join(" ").gsub(/[\s]+/, " ").strip
94
+ @core.last[1] = rec[:id]
95
+ end
96
+
97
+ def process_hierarchy(rec)
98
+ parent_id = @clades["Mammalia"][:id]
99
+ is_row_rank = false
100
+ [:order, :suborder, :infraorder, :superfamily, :family,
101
+ :subfamily, :tribe, :genus, :subgenus,
102
+ :species, :subspecies].each do |rank|
103
+ is_row_rank = true if rank == rec[:taxonlevel].downcase.to_sym
104
+ clade = rec[rank]
105
+ clade = clade.capitalize if clade.match(/^[A-Z]+$/)
106
+ next if clade.to_s == ""
107
+ clade_id = nil
108
+ clade = adjust_clade(rec, rank, clade)
109
+ if @clades.key?(clade)
110
+ clade_id = @clades[clade][:id]
111
+ else
112
+ @count += 1
113
+ clade_id = @count
114
+ @clades[clade] = { id: clade_id, rank: rank }
115
+ @core << [clade_id, nil, parent_id, clade_id, clade, nil, rank.to_s]
116
+ if is_row_rank
117
+ process_name(rec, rank)
118
+ return @core.last
119
+ end
120
+ end
121
+ parent_id = clade_id
122
+ end
123
+ end
124
+
125
+ def adjust_clade(rec, rank, clade)
126
+ if [:species, :subspecies].include? rank
127
+ clade = [rec[:genus], rec[:species]]
128
+ clade << rec[:subspecies] if rank == :subspecies
129
+ clade.join(" ").gsub(/[\s]+/, " ").strip
130
+ else
131
+ clade
132
+ end
133
+ end
134
+
135
+ def eml_init
136
+ @eml = {
137
+ id: @uuid,
138
+ title: @title,
139
+ authors: [
140
+ { first_name: "Don",
141
+ last_name: "Wilson" },
142
+ { first_name: "DeeAnn",
143
+ last_name: "Reader" },
144
+ ],
145
+ metadata_providers: [
146
+ { first_name: "Dmitry",
147
+ last_name: "Mozzherin",
148
+ email: "dmozzherin@gmail.com" }
149
+ ],
150
+ abstract: "Mammal Species of the World, 3rd edition (MSW3) is "\
151
+ "a database of mammalian taxonomy, based upon the 2005 book "\
152
+ "Mammal Species of the World. A Taxonomic and Geographic Reference "\
153
+ "(3rd ed). Don E. Wilson & DeeAnn M. Reeder (editors).",
154
+ url: "http://www.vertebrates.si.edu/msw/mswcfapp/msw/index.cfm"
155
+ }
156
+ end
157
+
158
+ def core_init
159
+ @core = [['http://rs.tdwg.org/dwc/terms/taxonID',
160
+ 'http://globalnames.org/terms/localID',
161
+ 'http://rs.tdwg.org/dwc/terms/parentNameUsageID',
162
+ 'http://rs.tdwg.org/dwc/terms/acceptedNameUsageID',
163
+ 'http://rs.tdwg.org/dwc/terms/scientificName',
164
+ 'http://rs.tdwg.org/dwc/terms/taxonomicStatus',
165
+ 'http://rs.tdwg.org/dwc/terms/taxonRank']]
166
+ m = @clades["Mammalia"]
167
+ @core << [m[:id], nil, nil, m[:id], "Mammalia", nil, "class"]
168
+ end
169
+
170
+ def extensions_init
171
+ @extensions << { data: [['http://rs.tdwg.org/dwc/terms/taxonID',
172
+ 'http://rs.tdwg.org/dwc/terms/vernacularName',
173
+ 'http://purl.org/dc/terms/language']],
174
+ file_name: 'vernacular_names.txt',
175
+ row_type: 'http://rs.gbif.org/terms/1.0/VernacularName'
176
+ }
177
+ end
178
+ end
179
+ end