dwca_hunter 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,152 @@
1
+ # encoding: utf-8
2
+
3
+ module DwcaHunter
4
+ class ResourceFreebase < DwcaHunter::Resource
5
+ def initialize(opts = {})
6
+ @command = "freebase"
7
+ @title = 'Freebase'
8
+ @uuid = 'bacd21f0-44e0-43e2-914c-70929916f257'
9
+ @download_path = File.join(Dir.tmpdir,
10
+ 'dwca_hunter',
11
+ 'freebase',
12
+ 'data.json')
13
+ @data = []
14
+ @all_taxa = {}
15
+ @cleaned_taxa = {}
16
+ @extensions = []
17
+ super
18
+ end
19
+
20
+ def needs_unpack?
21
+ false
22
+ end
23
+
24
+ def make_dwca
25
+ organize_data
26
+ generate_dwca
27
+ end
28
+
29
+ def download
30
+ DwcaHunter::logger_write(self.object_id,
31
+ 'Querying freebase for species information...')
32
+ q = {
33
+ query: [{
34
+ type: '/biology/organism_classification',
35
+ id: nil,
36
+ guid: nil,
37
+ name: nil,
38
+ scientific_name: nil,
39
+ synonym_scientific_name: [],
40
+ higher_classification: {
41
+ id: nil,
42
+ guid: nil,
43
+ scientific_name: nil,
44
+ optional: true,
45
+ },
46
+ }],
47
+ cursor: true,
48
+ }
49
+
50
+ run_query(q)
51
+
52
+ data = JSON.pretty_generate @data
53
+ f = open(@download_path, 'w:utf-8')
54
+ f.write(data)
55
+ f.close
56
+ end
57
+
58
+ private
59
+
60
+ def run_query(q)
61
+ count = 0
62
+ requests_num = 0
63
+ while true
64
+ freebase_url = "http://api.freebase.com/api/service/mqlread?query=%s" %
65
+ URI.encode(q.to_json)
66
+ res = JSON.load RestClient.get(freebase_url)
67
+ requests_num += 1
68
+ break if res['result'] == nil || res['result'].empty?
69
+ if requests_num % 10 == 0
70
+ DwcaHunter::logger_write(self.object_id,
71
+ "Received %s names" % count)
72
+ end
73
+ count += res['result'].size
74
+ res['result'].each { |d| @data << d }
75
+ q[:cursor] = res['cursor']
76
+ end
77
+ end
78
+
79
+ def organize_data
80
+ @data = JSON.load(open(@download_path, 'r:utf-8').read)
81
+ @data.each do |d|
82
+ scientific_name = d['scientific_name'].to_s
83
+ id = d["id"]
84
+ parent_id = d['higher_classification'] ?
85
+ d['higher_classification']["id"] :
86
+ nil
87
+ synonyms = d['synonym_scientific_name']
88
+ @all_taxa[id] = { id: id,
89
+ parent_id: parent_id,
90
+ scientific_name: scientific_name,
91
+ synonyms: synonyms }
92
+ end
93
+
94
+ @all_taxa.each do |k, v|
95
+ next unless v[:scientific_name] && v[:scientific_name].strip != ""
96
+ parent_id = v[:parent_id]
97
+ until (@all_taxa[parent_id] &&
98
+ @all_taxa[parent_id][:scientific_name]) || parent_id.nil?
99
+ puts "did not find parent %s" % parent_id
100
+ parent_id = @all_taxa[parent_id]
101
+ end
102
+ parent_id = nil if v[:id] == parent_id
103
+ v[:parent_id] = parent_id
104
+ @cleaned_taxa[k] = v
105
+ end
106
+
107
+ end
108
+
109
+ def generate_dwca
110
+ DwcaHunter::logger_write(self.object_id,
111
+ 'Creating DarwinCore Archive file')
112
+ @core = [['http://rs.tdwg.org/dwc/terms/taxonID',
113
+ 'http://rs.tdwg.org/dwc/terms/scientificName',
114
+ 'http://rs.tdwg.org/dwc/terms/parentNameUsageID']]
115
+
116
+ @extensions << { data: [[
117
+ 'http://rs.tdwg.org/dwc/terms/TaxonID',
118
+ 'http://rs.tdwg.org/dwc/terms/scientificName',
119
+ ]], file_name: 'synonyms.txt' }
120
+ DwcaHunter::logger_write(self.object_id,
121
+ 'Creating synonyms extension for DarwinCore Archive file')
122
+ count = 0
123
+ @cleaned_taxa.each do |key, taxon|
124
+ count += 1
125
+ @core << [taxon[:id], taxon[:scientific_name], taxon[:parent_id]]
126
+ if count % BATCH_SIZE == 0
127
+ DwcaHunter::logger_write(self.object_id,
128
+ "Traversing %s extension data record" % count)
129
+ end
130
+ taxon[:synonyms].each do |name|
131
+ @extensions[-1][:data] << [taxon[:id], name]
132
+ end
133
+ end
134
+ @eml = {
135
+ id: @uuid,
136
+ title: @title,
137
+ license: 'http://creativecommons.org/licenses/by-sa/3.0/',
138
+ authors: [
139
+ { url: 'http://www.freebase.com/home' }],
140
+ abstract: 'An entity graph of people, places and things, ' +
141
+ 'built by a community that loves open data.',
142
+ metadata_providers: [
143
+ { first_name: 'Dmitry',
144
+ last_name: 'Mozzherin',
145
+ email: 'dmozzherin@mbl.edu' }],
146
+ url: 'http://www.freebase.com/home'
147
+ }
148
+ super
149
+ end
150
+
151
+ end
152
+ end
@@ -0,0 +1,101 @@
1
+ # encoding: utf-8
2
+ module DwcaHunter
3
+ class ResourceGNUB < DwcaHunter::Resource
4
+ def initialize(opts = {})
5
+ @command = 'gnub'
6
+ @title = 'GNUB'
7
+ @url = 'http://gnub.org/datadump/gni_export.zip'
8
+ @UUID = 'd34ed224-78e7-485d-a478-adc2558a0f68'
9
+ @download_path = File.join(Dir.tmpdir,
10
+ 'dwca_hunter',
11
+ 'gnub',
12
+ 'data.tar.gz')
13
+ @ranks = {}
14
+ @kingdoms = {}
15
+ @authors = {}
16
+ @vernaculars = {}
17
+ @synonyms = {}
18
+ @synonym_of = {}
19
+ @names = []
20
+ @extensions = []
21
+ super(opts)
22
+ @gnub_dir = File.join(@download_dir, 'gnub')
23
+ end
24
+
25
+ def unpack
26
+ unpack_zip
27
+ end
28
+
29
+ def make_dwca
30
+ DwcaHunter::logger_write(self.object_id, 'Extracting data')
31
+ get_names
32
+ generate_dwca
33
+ end
34
+
35
+ private
36
+
37
+ def get_names
38
+ codes = get_codes
39
+ file = Dir.entries(@download_dir).grep(/txt$/).first
40
+ open(File.join(@download_dir, file)).each_with_index do |line, i|
41
+ next if i == 0 || (data = line.strip) == ''
42
+ data = data.split("\t")
43
+ protolog = data[0].downcase
44
+ protolog_path = data[1].downcase
45
+ name_string = data[2]
46
+ rank = data[3]
47
+ code = codes[data[4].to_i]
48
+ taxon_id = UUID.create_v5(name_string +
49
+ protolog_path +
50
+ rank, GNA_NAMESPACE)
51
+ @names << { taxon_id: taxon_id,
52
+ name_string: name_string,
53
+ protolog: protolog,
54
+ protolog_path: protolog_path,
55
+ code: code,
56
+ rank: rank }
57
+ end
58
+ end
59
+
60
+ def get_codes
61
+ codes_url = 'http://resolver.globalnames.org/nomenclatural_codes.json'
62
+ codes = RestClient.get(codes_url)
63
+ codes = JSON.parse(codes, symbolize_names: true)
64
+ codes.inject({}) do |res, c|
65
+ res[c[:id]] = c[:code]
66
+ res
67
+ end
68
+ end
69
+
70
+ def generate_dwca
71
+ DwcaHunter::logger_write(self.object_id,
72
+ 'Creating DarwinCore Archive file')
73
+ @core = [['http://rs.tdwg.org/dwc/terms/taxonID',
74
+ 'http://rs.tdwg.org/dwc/terms/originalNameUsageID',
75
+ 'http://globalnames.org/terms/originalNameUsageIDPath',
76
+ 'http://rs.tdwg.org/dwc/terms/scientificName',
77
+ 'http://rs.tdwg.org/dwc/terms/nomenclaturalCode',
78
+ 'http://rs.tdwg.org/dwc/terms/taxonRank']]
79
+ @names.each do |n|
80
+ @core << [n[:taxon_id], n[:protolog], n[:name_string],
81
+ n[:protolog_path], n[:code], n[:rank]]
82
+ end
83
+ @eml = {
84
+ id: @uuid,
85
+ title: @title,
86
+ authors: [
87
+ {email: 'deepreef@bishopmuseum.org'}
88
+ ],
89
+ metadata_providers: [
90
+ { first_name: 'Dmitry',
91
+ last_name: 'Mozzherin',
92
+ email: 'dmozzherin@gmail.com' }
93
+ ],
94
+ abstract: 'Global Names Usage Bank',
95
+ url: 'http://www.zoobank.org'
96
+ }
97
+ super
98
+ end
99
+ end
100
+ end
101
+
@@ -0,0 +1,271 @@
1
+ # encoding: utf-8
2
+ module DwcaHunter
3
+ class ResourceITIS < DwcaHunter::Resource
4
+ def initialize(opts = {})
5
+ @command = 'itis'
6
+ @title = 'ITIS'
7
+ @url = 'https://www.itis.gov/downloads/itisMySQLTables.tar.gz'
8
+ @uuid = '5d066e84-e512-4a2f-875c-0a605d3d9f35'
9
+ @download_path = File.join(Dir.tmpdir,
10
+ 'dwca_hunter',
11
+ 'itis',
12
+ 'data.tar.gz')
13
+ @ranks = {}
14
+ @kingdoms = {}
15
+ @authors = {}
16
+ @vernaculars = {}
17
+ @synonyms = {}
18
+ @synonym_of = {}
19
+ @names = {}
20
+ @extensions = []
21
+ super(opts)
22
+ @itis_dir = File.join(@download_dir, 'itis')
23
+ end
24
+
25
+ def unpack
26
+ unpack_tar
27
+ dir = Dir.entries(@download_dir).select {|e| e.match(/itisMySQL/)}[0]
28
+ FileUtils.mv(File.join(@download_dir, dir), @itis_dir)
29
+
30
+ # Create a file with the same name as the directory we extracted.
31
+ FileUtils.touch(File.join(@itis_dir, 'version_' + dir))
32
+ end
33
+
34
+ def make_dwca
35
+ DwcaHunter::logger_write(self.object_id, 'Extracting data')
36
+ get_ranks
37
+ get_kingdoms
38
+ get_authors
39
+ get_vernaculars
40
+ get_synonyms
41
+ get_names
42
+ generate_dwca
43
+ end
44
+
45
+ private
46
+ def get_ranks
47
+ # 0 kingdom_id integer not null
48
+ # 1 rank_id smallint not null
49
+ # 2 rank_name char(15) not null
50
+ # 3 dir_parent_rank_id smallint not null
51
+ # 4 req_parent_rank_id smallint not null
52
+ # 5 update_date date not null
53
+ rank_file = File.join(@itis_dir, 'taxon_unit_types')
54
+ f = open(rank_file, 'r:utf-8')
55
+ f.each do |l|
56
+ l.encode!('UTF-8',
57
+ 'ISO-8859-1',
58
+ invalid: :replace,
59
+ replace: '?')
60
+ row = l.strip.split('|')
61
+ @ranks[row[0].strip + '/' + row[1].strip] = row[2].strip
62
+ end
63
+ end
64
+
65
+ def get_kingdoms
66
+ # 0 kingdom_id serial not null
67
+ # 1 kingdom_name char(10) not null
68
+ # 2 update_date date not null
69
+
70
+ f = open(File.join(@itis_dir, 'kingdoms'))
71
+ f.each do |l|
72
+ data = l.strip.split('|')
73
+ @kingdoms[data[0].strip] = data[1].strip
74
+ end
75
+ end
76
+
77
+ def get_authors
78
+ # 0 taxon_author_id serial not null
79
+ # 1 taxon_author varchar(100,30) not null
80
+ # 2 update_date date not null
81
+ # 3 kingdom_id smallint not null
82
+
83
+ f = open(File.join(@itis_dir, 'taxon_authors_lkp'))
84
+ f.each do |l|
85
+ l.encode!('UTF-8',
86
+ 'ISO-8859-1',
87
+ invalid: :replace,
88
+ replace: '?')
89
+ data = l.strip.split('|')
90
+ @authors[data[0].strip] = data[1].strip
91
+ end
92
+ end
93
+
94
+ def get_vernaculars
95
+ # 0 tsn integer not null
96
+ # 1 vernacular_name varchar(80,5) not null
97
+ # 2 language varchar(15) not null
98
+ # 3 approved_ind char(1)
99
+ # 4 update_date date not null
100
+ # 5 primary key (tsn,vernacular_name,language)
101
+ # constraint "itis".vernaculars_key
102
+
103
+ f = open(File.join(@itis_dir, 'vernaculars'))
104
+ f.each_with_index do |l, i|
105
+ if i % BATCH_SIZE == 0
106
+ DwcaHunter::logger_write(self.object_id,
107
+ "Extracted %s vernacular names" % i)
108
+ end
109
+ l.encode!('UTF-8',
110
+ 'ISO-8859-1',
111
+ invalid: :replace,
112
+ replace: '?')
113
+ data = l.split('|').map { |d| d.strip }
114
+ name_tsn = data[0]
115
+ string = data[1]
116
+ language = data[2]
117
+ language = 'Common name' if language == 'unspecified'
118
+ @vernaculars[name_tsn] = { name:string, language:language }
119
+ end
120
+ end
121
+
122
+ def get_synonyms
123
+ # 0 tsn integer not null
124
+ # 1 tsn_accepted integer not null
125
+ # 2 update_date date not null
126
+
127
+ f = open(File.join(@itis_dir, 'synonym_links'))
128
+ f.each_with_index do |l, i|
129
+ if i % BATCH_SIZE == 0
130
+ DwcaHunter::logger_write(self.object_id,
131
+ "Extracted %s synonyms" % i)
132
+ end
133
+ l.encode!('UTF-8',
134
+ 'ISO-8859-1',
135
+ invalid: :replace,
136
+ replace: '?')
137
+ data = l.split('|').map { |d| d.strip }
138
+ synonym_name_tsn = data[0]
139
+ accepted_name_tsn = data[1]
140
+ @synonyms[synonym_name_tsn] = accepted_name_tsn
141
+ end
142
+ end
143
+
144
+ def get_names
145
+ # 0 tsn serial not null
146
+ # 1 unit_ind1 char(1)
147
+ # 2 unit_name1 char(35) not null
148
+ # 3 unit_ind2 char(1)
149
+ # 4 unit_name2 varchar(35)
150
+ # 5 unit_ind3 varchar(7)
151
+ # 6 unit_name3 varchar(35)
152
+ # 7 unit_ind4 varchar(7)
153
+ # 8 unit_name4 varchar(35)
154
+ # 9 unnamed_taxon_ind char(1)
155
+ # 10 usage varchar(12,5) not null
156
+ # 11 unaccept_reason varchar(50,9)
157
+ # 12 credibility_rtng varchar(40,17) not null
158
+ # 13 completeness_rtng char(10)
159
+ # 14 currency_rating char(7)
160
+ # 15 phylo_sort_seq smallint
161
+ # 16 initial_time_stamp datetime year to second not null
162
+ # 17 parent_tsn integer
163
+ # 18 taxon_author_id integer
164
+ # 19 hybrid_author_id integer
165
+ # 20 kingdom_id smallint not null
166
+ # 21 rank_id smallint not null
167
+ # 22 update_date date not null
168
+ # 23 uncertain_prnt_ind char(3)
169
+
170
+ f = open(File.join(@itis_dir, 'taxonomic_units'))
171
+ f.each_with_index do |l, i|
172
+ if i % BATCH_SIZE == 0
173
+ DwcaHunter::logger_write(self.object_id,
174
+ "Extracted %s names" % i)
175
+ end
176
+ l.encode!('UTF-8',
177
+ 'ISO-8859-1',
178
+ invalid: :replace,
179
+ replace: '?')
180
+ data = l.split("|").map { |d| d.strip }
181
+ name_tsn = data[0]
182
+ x1 = data[1]
183
+ name_part1 = data[2]
184
+ x2 = data[3]
185
+ name_part2 = data[4]
186
+ sp_marker1 = data[5]
187
+ name_part3 = data[6]
188
+ sp_marker2 = data[7]
189
+ name_part4 = data[8]
190
+ status = data[10]
191
+ parent_tsn = data[17]
192
+ author_id = data[18]
193
+ kingdom_id = data[20]
194
+ rank_id = data[21]
195
+
196
+ parent_tsn = nil if parent_tsn == ''
197
+ name = [x1, name_part1, x2, name_part2,
198
+ sp_marker1, name_part3, sp_marker2, name_part4]
199
+ canonical_name = name.clone
200
+ name << @authors[author_id] if @authors[author_id]
201
+ name = name.join(' ').strip.gsub(/\s+/, ' ')
202
+ canonical_name = canonical_name.join(' ').strip.gsub(/\s+/, ' ')
203
+ rank = @ranks[kingdom_id + '/' + rank_id] ?
204
+ @ranks[kingdom_id + '/' + rank_id] :
205
+ ''
206
+ @names[name_tsn] = { name: name,
207
+ canonical_name: canonical_name,
208
+ status: status,
209
+ parent_tsn: parent_tsn,
210
+ rank: rank }
211
+ end
212
+ end
213
+
214
+ def generate_dwca
215
+ DwcaHunter::logger_write(self.object_id,
216
+ 'Creating DarwinCore Archive file')
217
+ @core = [['http://rs.tdwg.org/dwc/terms/taxonID',
218
+ 'http://rs.tdwg.org/dwc/terms/parentNameUsageID',
219
+ 'http://rs.tdwg.org/dwc/terms/acceptedNameUsageID',
220
+ 'http://rs.tdwg.org/dwc/terms/scientificName',
221
+ 'http://rs.tdwg.org/ontology/voc/TaxonName#nameComplete',
222
+ 'http://rs.tdwg.org/dwc/terms/taxonomicStatus',
223
+ 'http://rs.tdwg.org/dwc/terms/taxonRank']]
224
+ @extensions << { data: [['http://rs.tdwg.org/dwc/terms/taxonID',
225
+ 'http://rs.tdwg.org/dwc/terms/vernacularName',
226
+ 'http://purl.org/dc/terms/language']],
227
+ file_name: 'vernacular_names.txt',
228
+ row_type: 'http://rs.gbif.org/terms/1.0/VernacularName'
229
+ }
230
+ @names.keys.each_with_index do |k, i|
231
+ d = @names[k]
232
+ accepted_id = @synonyms[k] ? @synonyms[k] : nil
233
+ parent_id = d[:parent_tsn].to_i == 0 ? nil : d[:parent_tsn]
234
+ row = [k, parent_id, accepted_id, d[:name], d[:canonical_name], d[:status], d[:rank]]
235
+ @core << row
236
+ end
237
+
238
+ @vernaculars.keys.each_with_index do |k, i|
239
+ d = @vernaculars[k]
240
+ @extensions[0][:data] << [k, d[:name], d[:language]]
241
+ end
242
+
243
+ @eml = {
244
+ id: @uuid,
245
+ title: @title,
246
+ authors: [
247
+ {email: 'itiswebmaster@itis.gov'}
248
+ ],
249
+ metadata_providers: [
250
+ { first_name: 'Dmitry',
251
+ last_name: 'Mozzherin',
252
+ email: 'dmozzherin@gmail.com' }
253
+ ],
254
+ abstract: 'The White House Subcommittee on Biodiversity and ' +
255
+ 'Ecosystem Dynamics has identified systematics as a ' +
256
+ 'research priority that is fundamental to ecosystem ' +
257
+ 'management and biodiversity conservation. This primary ' +
258
+ 'need identified by the Subcommittee requires ' +
259
+ 'improvements in the organization of, and access to, ' +
260
+ 'standardized nomenclature. ITIS (originally referred ' +
261
+ 'to as the Interagency Taxonomic Information System) ' +
262
+ 'was designed to fulfill these requirements. In the ' +
263
+ 'future, the ITIS will provide taxonomic data and a ' +
264
+ 'directory of taxonomic expertise that will support ' +
265
+ 'the system',
266
+ url: 'http://www.itis.gov'
267
+ }
268
+ super
269
+ end
270
+ end
271
+ end
@@ -0,0 +1,179 @@
1
+ module DwcaHunter
2
+ class ResourceMammalSpecies < DwcaHunter::Resource
3
+ def initialize(opts = {})
4
+ @command = "mammal-species"
5
+ @title = "The Mammal Species of The World"
6
+ @uuid = "464dafec-1037-432d-8449-c0b309e0a030"
7
+ @data = []
8
+ @extensions = []
9
+ @count = 1
10
+ @clades = {"Mammalia" => { rank: "class", id: @count}}
11
+ @url = "http://www.departments.bucknell.edu"\
12
+ "/biology/resources/msw3/export.asp"
13
+ @download_path = File.join(Dir.tmpdir, "dwca_hunter",
14
+ "mammalsp", "msw3-all.csv")
15
+ super
16
+ end
17
+
18
+ def needs_unpack?
19
+ false
20
+ end
21
+
22
+ def make_dwca
23
+ DwcaHunter::logger_write(self.object_id, "Extracting data")
24
+ encode
25
+ collect_data
26
+ generate_dwca
27
+ end
28
+
29
+ def download
30
+ DwcaHunter::logger_write(self.object_id, "Downloading file -- "\
31
+ "it will take some time...")
32
+ dlr = DwcaHunter::Downloader.new(url, @download_path)
33
+ dlr.download
34
+ end
35
+
36
+ private
37
+
38
+ def encode
39
+ DwcaHunter::Encoding.latin1_to_utf8(@download_path)
40
+ end
41
+
42
+ def collect_data
43
+ opts = { headers: true, header_converters: :symbol }
44
+ CSV.open(@download_path + ".utf_8", opts).each do |row|
45
+ @data << row.to_hash
46
+ end
47
+ end
48
+
49
+ def generate_dwca
50
+ DwcaHunter::logger_write(self.object_id,
51
+ 'Creating DarwinCore Archive file')
52
+ core_init
53
+ extensions_init
54
+ eml_init
55
+ @data.each do |rec|
56
+ taxon = process_hierarchy(rec)
57
+ process_vernaculars(rec, taxon)
58
+ process_synonyms(rec, taxon)
59
+ end
60
+ super
61
+ end
62
+
63
+ def process_vernaculars(rec, taxon)
64
+ return if rec[:commonname].to_s == ""
65
+ taxon_id = taxon[0]
66
+ lang = "en"
67
+ name = rec[:commonname].gsub("\u{0092}", "'")
68
+ @extensions[0][:data] << [taxon_id, name, lang]
69
+
70
+ end
71
+
72
+ def process_synonyms(rec, taxon)
73
+ accepted_id = taxon[0]
74
+ parent_id = taxon[2]
75
+ rank = taxon[-1]
76
+ return unless ['species', 'subspecies'].include? rank
77
+ synonyms = rec[:synonyms].gsub(/\.$/, "").
78
+ gsub(/<[\/ib]+>/, "").gsub(/[\s]+/, " ").split(";")
79
+ synonyms = synonyms.map(&:strip)
80
+ synonyms = synonyms.map do |s|
81
+ next if s.match(/<u>/)
82
+ if s.match(/^[a-z]/)
83
+ s = rec[:genus] + " " + s
84
+ end
85
+ @count += 1
86
+ id = @count
87
+ @core << [id, nil, parent_id, accepted_id, s, "synonym", rank]
88
+ end
89
+ end
90
+
91
+ def process_name(rec, rank)
92
+ name =[@core.last[4], rec[:author], rec[:date]]
93
+ @core.last[4] = name.join(" ").gsub(/[\s]+/, " ").strip
94
+ @core.last[1] = rec[:id]
95
+ end
96
+
97
+ def process_hierarchy(rec)
98
+ parent_id = @clades["Mammalia"][:id]
99
+ is_row_rank = false
100
+ [:order, :suborder, :infraorder, :superfamily, :family,
101
+ :subfamily, :tribe, :genus, :subgenus,
102
+ :species, :subspecies].each do |rank|
103
+ is_row_rank = true if rank == rec[:taxonlevel].downcase.to_sym
104
+ clade = rec[rank]
105
+ clade = clade.capitalize if clade.match(/^[A-Z]+$/)
106
+ next if clade.to_s == ""
107
+ clade_id = nil
108
+ clade = adjust_clade(rec, rank, clade)
109
+ if @clades.key?(clade)
110
+ clade_id = @clades[clade][:id]
111
+ else
112
+ @count += 1
113
+ clade_id = @count
114
+ @clades[clade] = { id: clade_id, rank: rank }
115
+ @core << [clade_id, nil, parent_id, clade_id, clade, nil, rank.to_s]
116
+ if is_row_rank
117
+ process_name(rec, rank)
118
+ return @core.last
119
+ end
120
+ end
121
+ parent_id = clade_id
122
+ end
123
+ end
124
+
125
+ def adjust_clade(rec, rank, clade)
126
+ if [:species, :subspecies].include? rank
127
+ clade = [rec[:genus], rec[:species]]
128
+ clade << rec[:subspecies] if rank == :subspecies
129
+ clade.join(" ").gsub(/[\s]+/, " ").strip
130
+ else
131
+ clade
132
+ end
133
+ end
134
+
135
+ def eml_init
136
+ @eml = {
137
+ id: @uuid,
138
+ title: @title,
139
+ authors: [
140
+ { first_name: "Don",
141
+ last_name: "Wilson" },
142
+ { first_name: "DeeAnn",
143
+ last_name: "Reader" },
144
+ ],
145
+ metadata_providers: [
146
+ { first_name: "Dmitry",
147
+ last_name: "Mozzherin",
148
+ email: "dmozzherin@gmail.com" }
149
+ ],
150
+ abstract: "Mammal Species of the World, 3rd edition (MSW3) is "\
151
+ "a database of mammalian taxonomy, based upon the 2005 book "\
152
+ "Mammal Species of the World. A Taxonomic and Geographic Reference "\
153
+ "(3rd ed). Don E. Wilson & DeeAnn M. Reeder (editors).",
154
+ url: "http://www.vertebrates.si.edu/msw/mswcfapp/msw/index.cfm"
155
+ }
156
+ end
157
+
158
+ def core_init
159
+ @core = [['http://rs.tdwg.org/dwc/terms/taxonID',
160
+ 'http://globalnames.org/terms/localID',
161
+ 'http://rs.tdwg.org/dwc/terms/parentNameUsageID',
162
+ 'http://rs.tdwg.org/dwc/terms/acceptedNameUsageID',
163
+ 'http://rs.tdwg.org/dwc/terms/scientificName',
164
+ 'http://rs.tdwg.org/dwc/terms/taxonomicStatus',
165
+ 'http://rs.tdwg.org/dwc/terms/taxonRank']]
166
+ m = @clades["Mammalia"]
167
+ @core << [m[:id], nil, nil, m[:id], "Mammalia", nil, "class"]
168
+ end
169
+
170
+ def extensions_init
171
+ @extensions << { data: [['http://rs.tdwg.org/dwc/terms/taxonID',
172
+ 'http://rs.tdwg.org/dwc/terms/vernacularName',
173
+ 'http://purl.org/dc/terms/language']],
174
+ file_name: 'vernacular_names.txt',
175
+ row_type: 'http://rs.gbif.org/terms/1.0/VernacularName'
176
+ }
177
+ end
178
+ end
179
+ end