dwca_hunter 0.5.1 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. checksums.yaml +4 -4
  2. data/.byebug_history +45 -0
  3. data/.gitignore +5 -0
  4. data/.rubocop.yml +3 -2
  5. data/.ruby-version +1 -1
  6. data/Gemfile.lock +61 -83
  7. data/LICENSE.txt +1 -1
  8. data/README.md +1 -1
  9. data/dwca_hunter.gemspec +9 -9
  10. data/exe/dwcahunter +1 -3
  11. data/lib/dwca_hunter.rb +39 -8
  12. data/lib/dwca_hunter/resource.rb +5 -0
  13. data/lib/dwca_hunter/resources/aos-birds.rb +143 -0
  14. data/lib/dwca_hunter/resources/arctos.rb +121 -145
  15. data/lib/dwca_hunter/resources/clements.rb +151 -0
  16. data/lib/dwca_hunter/resources/eol.rb +85 -0
  17. data/lib/dwca_hunter/resources/freebase.rb +51 -49
  18. data/lib/dwca_hunter/resources/how-moore-birds.rb +168 -0
  19. data/lib/dwca_hunter/resources/ioc_word_bird.rb +200 -0
  20. data/lib/dwca_hunter/resources/ipni.rb +111 -0
  21. data/lib/dwca_hunter/resources/itis.rb +99 -99
  22. data/lib/dwca_hunter/resources/mammal_divdb.rb +155 -0
  23. data/lib/dwca_hunter/resources/mammal_species.rb +9 -6
  24. data/lib/dwca_hunter/resources/mcz.rb +123 -0
  25. data/lib/dwca_hunter/resources/ncbi.rb +22 -23
  26. data/lib/dwca_hunter/resources/opentree.rb +5 -5
  27. data/lib/dwca_hunter/resources/paleobiodb.rb +193 -0
  28. data/lib/dwca_hunter/resources/paleodb_harvester.rb +140 -0
  29. data/lib/dwca_hunter/resources/sherborn.rb +91 -0
  30. data/lib/dwca_hunter/resources/wikispecies.rb +142 -129
  31. data/lib/dwca_hunter/version.rb +1 -1
  32. metadata +46 -40
  33. data/files/birdlife_7.csv +0 -11862
  34. data/files/fishbase_taxon_cache.tsv +0 -81000
  35. data/files/reptile_checklist_2014_12.csv +0 -15158
  36. data/files/species-black.txt +0 -251
@@ -0,0 +1,111 @@
1
+ require "xz"
2
+
3
+ module DwcaHunter
4
+ # Resource for FishBase
5
+ class ResourceIPNI < DwcaHunter::Resource
6
+ attr_reader :title, :abbr
7
+ def initialize(opts = {}) #download: false, unpack: false})
8
+ @command = "ipni"
9
+ @title = "The International Plant Names Index"
10
+ @abbr = "IPNI"
11
+ @url = "https://uofi.box.com/shared/static/s0x4xjonxt54pi89n543gdmttrdqd6iv.xz"
12
+ @uuid = "6b3905ce-5025-49f3-9697-ddd5bdfb4ff0"
13
+ @download_path = File.join(Dir.tmpdir, "dwca_hunter", "ipni",
14
+ "ipni.csv.xz")
15
+ @extensions = []
16
+ super
17
+ end
18
+
19
+ def unpack
20
+ puts "Unpacking #{@download_path}"
21
+ XZ.decompress_file(@download_path, @download_path[0...-3] )
22
+ end
23
+
24
+ def download
25
+ puts "Download by hand from"
26
+ puts "https://storage.cloud.google.com/ipni-data/ipniWebName.csv.xz"
27
+ puts "and copy to given url"
28
+ `curl -s -L #{@url} -o #{@download_path}`
29
+ end
30
+
31
+ def make_dwca
32
+ organize_data
33
+ generate_dwca
34
+ end
35
+
36
+ private
37
+
38
+ def organize_data
39
+ DwcaHunter::logger_write(self.object_id,
40
+ "Organizing data")
41
+ # snp = ScientificNameParser.new
42
+ @data = CSV.open(@download_path[0...-3],
43
+ col_sep: "|", quote_char: "щ", headers: true)
44
+ .each_with_object([]) do |row, data|
45
+ name = row['taxon_scientific_name_s_lower'].strip
46
+ au = row['authors_t'].to_s.strip
47
+ name = "#{name} #{au}" if au != ''
48
+ id = row["id"].split(":")[-1]
49
+ data << { taxon_id: id,
50
+ local_id: id,
51
+ family: row["family_s_lower"],
52
+ genus: row["genus_s_lower"],
53
+ scientific_name: name,
54
+ rank: row["rank_s_alphanum"]
55
+ }
56
+
57
+ end
58
+ end
59
+
60
+ def generate_dwca
61
+ DwcaHunter::logger_write(self.object_id,
62
+ 'Creating DarwinCore Archive file')
63
+ core_init
64
+ eml_init
65
+ DwcaHunter::logger_write(self.object_id, 'Assembling Core Data')
66
+ count = 0
67
+ @data.each do |d|
68
+ count += 1
69
+ if count % 10000 == 0
70
+ DwcaHunter::logger_write(self.object_id, "Core row #{count}")
71
+ end
72
+ @core << [d[:taxon_id], d[:local_id],
73
+ d[:scientific_name], d[:rank],
74
+ d[:family], d[:genus]]
75
+ end
76
+ super
77
+ end
78
+
79
+ def eml_init
80
+ @eml = {
81
+ id: @uuid,
82
+ title: @title,
83
+ authors: [],
84
+ metadata_providers: [
85
+ { first_name: "Dmitry",
86
+ last_name: "Mozzherin",
87
+ }
88
+ ],
89
+ abstract: "The International Plant Names Index (IPNI) is a database " \
90
+ "of the names and associated basic bibliographical " \
91
+ "details of seed plants, ferns and lycophytes. Its goal " \
92
+ "is to eliminate the need for repeated reference to " \
93
+ "primary sources for basic bibliographic information " \
94
+ "about plant names. The data are freely available and are " \
95
+ "gradually being standardized and checked. IPNI will be a " \
96
+ "dynamic resource, depending on direct contributions by " \
97
+ "all members of the botanical community.",
98
+ url: "http://www.ipni.org"
99
+ }
100
+ end
101
+
102
+ def core_init
103
+ @core = [["http://rs.tdwg.org/dwc/terms/taxonID",
104
+ "http://globalnames.org/terms/localID",
105
+ "http://rs.tdwg.org/dwc/terms/scientificName",
106
+ "http://rs.tdwg.org/dwc/terms/taxonRank",
107
+ "http://rs.tdwg.org/dwc/terms/family",
108
+ "http://rs.tdwg.org/dwc/terms/genus"]]
109
+ end
110
+ end
111
+ end
@@ -1,15 +1,16 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
+
2
3
  module DwcaHunter
3
4
  class ResourceITIS < DwcaHunter::Resource
4
5
  def initialize(opts = {})
5
- @command = 'itis'
6
- @title = 'ITIS'
7
- @url = 'https://www.itis.gov/downloads/itisMySQLTables.tar.gz'
8
- @uuid = '5d066e84-e512-4a2f-875c-0a605d3d9f35'
6
+ @command = "itis"
7
+ @title = "ITIS"
8
+ @url = "https://www.itis.gov/downloads/itisMySQLTables.tar.gz"
9
+ @uuid = "5d066e84-e512-4a2f-875c-0a605d3d9f35"
9
10
  @download_path = File.join(Dir.tmpdir,
10
- 'dwca_hunter',
11
- 'itis',
12
- 'data.tar.gz')
11
+ "dwca_hunter",
12
+ "itis",
13
+ "data.tar.gz")
13
14
  @ranks = {}
14
15
  @kingdoms = {}
15
16
  @authors = {}
@@ -19,20 +20,20 @@ module DwcaHunter
19
20
  @names = {}
20
21
  @extensions = []
21
22
  super(opts)
22
- @itis_dir = File.join(@download_dir, 'itis')
23
+ @itis_dir = File.join(@download_dir, "itis")
23
24
  end
24
25
 
25
26
  def unpack
26
27
  unpack_tar
27
- dir = Dir.entries(@download_dir).select {|e| e.match(/itisMySQL/)}[0]
28
+ dir = Dir.entries(@download_dir).select { |e| e.match(/itisMySQL/) }[0]
28
29
  FileUtils.mv(File.join(@download_dir, dir), @itis_dir)
29
30
 
30
31
  # Create a file with the same name as the directory we extracted.
31
- FileUtils.touch(File.join(@itis_dir, 'version_' + dir))
32
+ FileUtils.touch(File.join(@itis_dir, "version_" + dir))
32
33
  end
33
34
 
34
35
  def make_dwca
35
- DwcaHunter::logger_write(self.object_id, 'Extracting data')
36
+ DwcaHunter.logger_write(object_id, "Extracting data")
36
37
  get_ranks
37
38
  get_kingdoms
38
39
  get_authors
@@ -42,7 +43,8 @@ module DwcaHunter
42
43
  generate_dwca
43
44
  end
44
45
 
45
- private
46
+ private
47
+
46
48
  def get_ranks
47
49
  # 0 kingdom_id integer not null
48
50
  # 1 rank_id smallint not null
@@ -50,15 +52,15 @@ module DwcaHunter
50
52
  # 3 dir_parent_rank_id smallint not null
51
53
  # 4 req_parent_rank_id smallint not null
52
54
  # 5 update_date date not null
53
- rank_file = File.join(@itis_dir, 'taxon_unit_types')
54
- f = open(rank_file, 'r:utf-8')
55
+ rank_file = File.join(@itis_dir, "taxon_unit_types")
56
+ f = open(rank_file, "r:utf-8")
55
57
  f.each do |l|
56
- l.encode!('UTF-8',
57
- 'ISO-8859-1',
58
+ l.encode!("UTF-8",
59
+ "ISO-8859-1",
58
60
  invalid: :replace,
59
- replace: '?')
60
- row = l.strip.split('|')
61
- @ranks[row[0].strip + '/' + row[1].strip] = row[2].strip
61
+ replace: "?")
62
+ row = l.strip.split("|")
63
+ @ranks[row[0].strip + "/" + row[1].strip] = row[2].strip
62
64
  end
63
65
  end
64
66
 
@@ -67,9 +69,9 @@ module DwcaHunter
67
69
  # 1 kingdom_name char(10) not null
68
70
  # 2 update_date date not null
69
71
 
70
- f = open(File.join(@itis_dir, 'kingdoms'))
72
+ f = open(File.join(@itis_dir, "kingdoms"))
71
73
  f.each do |l|
72
- data = l.strip.split('|')
74
+ data = l.strip.split("|")
73
75
  @kingdoms[data[0].strip] = data[1].strip
74
76
  end
75
77
  end
@@ -80,13 +82,13 @@ module DwcaHunter
80
82
  # 2 update_date date not null
81
83
  # 3 kingdom_id smallint not null
82
84
 
83
- f = open(File.join(@itis_dir, 'taxon_authors_lkp'))
85
+ f = open(File.join(@itis_dir, "taxon_authors_lkp"))
84
86
  f.each do |l|
85
- l.encode!('UTF-8',
86
- 'ISO-8859-1',
87
+ l.encode!("UTF-8",
88
+ "ISO-8859-1",
87
89
  invalid: :replace,
88
- replace: '?')
89
- data = l.strip.split('|')
90
+ replace: "?")
91
+ data = l.strip.split("|")
90
92
  @authors[data[0].strip] = data[1].strip
91
93
  end
92
94
  end
@@ -100,22 +102,22 @@ module DwcaHunter
100
102
  # 5 primary key (tsn,vernacular_name,language)
101
103
  # constraint "itis".vernaculars_key
102
104
 
103
- f = open(File.join(@itis_dir, 'vernaculars'))
105
+ f = open(File.join(@itis_dir, "vernaculars"))
104
106
  f.each_with_index do |l, i|
105
107
  if i % BATCH_SIZE == 0
106
- DwcaHunter::logger_write(self.object_id,
107
- "Extracted %s vernacular names" % i)
108
+ DwcaHunter.logger_write(object_id,
109
+ "Extracted %s vernacular names" % i)
108
110
  end
109
- l.encode!('UTF-8',
110
- 'ISO-8859-1',
111
+ l.encode!("UTF-8",
112
+ "ISO-8859-1",
111
113
  invalid: :replace,
112
- replace: '?')
113
- data = l.split('|').map { |d| d.strip }
114
+ replace: "?")
115
+ data = l.split("|").map(&:strip)
114
116
  name_tsn = data[0]
115
117
  string = data[1]
116
118
  language = data[2]
117
- language = 'Common name' if language == 'unspecified'
118
- @vernaculars[name_tsn] = { name:string, language:language }
119
+ language = "Common name" if language == "unspecified"
120
+ @vernaculars[name_tsn] = { name: string, language: language }
119
121
  end
120
122
  end
121
123
 
@@ -124,17 +126,17 @@ module DwcaHunter
124
126
  # 1 tsn_accepted integer not null
125
127
  # 2 update_date date not null
126
128
 
127
- f = open(File.join(@itis_dir, 'synonym_links'))
129
+ f = open(File.join(@itis_dir, "synonym_links"))
128
130
  f.each_with_index do |l, i|
129
131
  if i % BATCH_SIZE == 0
130
- DwcaHunter::logger_write(self.object_id,
131
- "Extracted %s synonyms" % i)
132
+ DwcaHunter.logger_write(object_id,
133
+ "Extracted %s synonyms" % i)
132
134
  end
133
- l.encode!('UTF-8',
134
- 'ISO-8859-1',
135
+ l.encode!("UTF-8",
136
+ "ISO-8859-1",
135
137
  invalid: :replace,
136
- replace: '?')
137
- data = l.split('|').map { |d| d.strip }
138
+ replace: "?")
139
+ data = l.split("|").map(&:strip)
138
140
  synonym_name_tsn = data[0]
139
141
  accepted_name_tsn = data[1]
140
142
  @synonyms[synonym_name_tsn] = accepted_name_tsn
@@ -167,19 +169,19 @@ module DwcaHunter
167
169
  # 22 update_date date not null
168
170
  # 23 uncertain_prnt_ind char(3)
169
171
 
170
- f = open(File.join(@itis_dir, 'taxonomic_units'))
172
+ f = open(File.join(@itis_dir, "taxonomic_units"))
171
173
  f.each_with_index do |l, i|
172
174
  if i % BATCH_SIZE == 0
173
- DwcaHunter::logger_write(self.object_id,
174
- "Extracted %s names" % i)
175
+ DwcaHunter.logger_write(object_id,
176
+ "Extracted %s names" % i)
175
177
  end
176
- l.encode!('UTF-8',
177
- 'ISO-8859-1',
178
+ l.encode!("UTF-8",
179
+ "ISO-8859-1",
178
180
  invalid: :replace,
179
- replace: '?')
180
- data = l.split("|").map { |d| d.strip }
181
- name_tsn = data[0]
182
- x1 = data[1]
181
+ replace: "?")
182
+ data = l.split("|").map(&:strip)
183
+ name_tsn = data[0]
184
+ x1 = data[1]
183
185
  name_part1 = data[2]
184
186
  x2 = data[3]
185
187
  name_part2 = data[4]
@@ -193,16 +195,15 @@ module DwcaHunter
193
195
  kingdom_id = data[20]
194
196
  rank_id = data[21]
195
197
 
196
- parent_tsn = nil if parent_tsn == ''
198
+ parent_tsn = nil if parent_tsn == ""
197
199
  name = [x1, name_part1, x2, name_part2,
198
200
  sp_marker1, name_part3, sp_marker2, name_part4]
199
201
  canonical_name = name.clone
200
202
  name << @authors[author_id] if @authors[author_id]
201
- name = name.join(' ').strip.gsub(/\s+/, ' ')
202
- canonical_name = canonical_name.join(' ').strip.gsub(/\s+/, ' ')
203
- rank = @ranks[kingdom_id + '/' + rank_id] ?
204
- @ranks[kingdom_id + '/' + rank_id] :
205
- ''
203
+ name = name.join(" ").strip.gsub(/\s+/, " ")
204
+ canonical_name = canonical_name.join(" ").strip.gsub(/\s+/, " ")
205
+ rank = @ranks[kingdom_id + "/" + rank_id] ||
206
+ ""
206
207
  @names[name_tsn] = { name: name,
207
208
  canonical_name: canonical_name,
208
209
  status: status,
@@ -212,58 +213,57 @@ module DwcaHunter
212
213
  end
213
214
 
214
215
  def generate_dwca
215
- DwcaHunter::logger_write(self.object_id,
216
- 'Creating DarwinCore Archive file')
217
- @core = [['http://rs.tdwg.org/dwc/terms/taxonID',
218
- 'http://rs.tdwg.org/dwc/terms/parentNameUsageID',
219
- 'http://rs.tdwg.org/dwc/terms/acceptedNameUsageID',
220
- 'http://rs.tdwg.org/dwc/terms/scientificName',
221
- 'http://rs.tdwg.org/ontology/voc/TaxonName#nameComplete',
222
- 'http://rs.tdwg.org/dwc/terms/taxonomicStatus',
223
- 'http://rs.tdwg.org/dwc/terms/taxonRank']]
224
- @extensions << { data: [['http://rs.tdwg.org/dwc/terms/taxonID',
225
- 'http://rs.tdwg.org/dwc/terms/vernacularName',
226
- 'http://purl.org/dc/terms/language']],
227
- file_name: 'vernacular_names.txt',
228
- row_type: 'http://rs.gbif.org/terms/1.0/VernacularName'
229
- }
230
- @names.keys.each_with_index do |k, i|
216
+ DwcaHunter.logger_write(object_id,
217
+ "Creating DarwinCore Archive file")
218
+ @core = [["http://rs.tdwg.org/dwc/terms/taxonID",
219
+ "http://rs.tdwg.org/dwc/terms/parentNameUsageID",
220
+ "http://rs.tdwg.org/dwc/terms/acceptedNameUsageID",
221
+ "http://rs.tdwg.org/dwc/terms/scientificName",
222
+ "http://rs.tdwg.org/ontology/voc/TaxonName#nameComplete",
223
+ "http://rs.tdwg.org/dwc/terms/taxonomicStatus",
224
+ "http://rs.tdwg.org/dwc/terms/taxonRank"]]
225
+ @extensions << { data: [["http://rs.tdwg.org/dwc/terms/taxonID",
226
+ "http://rs.tdwg.org/dwc/terms/vernacularName",
227
+ "http://purl.org/dc/terms/language"]],
228
+ file_name: "vernacular_names.txt",
229
+ row_type: "http://rs.gbif.org/terms/1.0/VernacularName" }
230
+ @names.keys.each_with_index do |k, _i|
231
231
  d = @names[k]
232
- accepted_id = @synonyms[k] ? @synonyms[k] : nil
232
+ accepted_id = @synonyms[k] || nil
233
233
  parent_id = d[:parent_tsn].to_i == 0 ? nil : d[:parent_tsn]
234
234
  row = [k, parent_id, accepted_id, d[:name], d[:canonical_name], d[:status], d[:rank]]
235
235
  @core << row
236
236
  end
237
237
 
238
- @vernaculars.keys.each_with_index do |k, i|
238
+ @vernaculars.keys.each_with_index do |k, _i|
239
239
  d = @vernaculars[k]
240
240
  @extensions[0][:data] << [k, d[:name], d[:language]]
241
241
  end
242
242
 
243
243
  @eml = {
244
- id: @uuid,
245
- title: @title,
246
- authors: [
247
- {email: 'itiswebmaster@itis.gov'}
248
- ],
249
- metadata_providers: [
250
- { first_name: 'Dmitry',
251
- last_name: 'Mozzherin',
252
- email: 'dmozzherin@gmail.com' }
253
- ],
254
- abstract: 'The White House Subcommittee on Biodiversity and ' +
255
- 'Ecosystem Dynamics has identified systematics as a ' +
256
- 'research priority that is fundamental to ecosystem ' +
257
- 'management and biodiversity conservation. This primary ' +
258
- 'need identified by the Subcommittee requires ' +
259
- 'improvements in the organization of, and access to, ' +
260
- 'standardized nomenclature. ITIS (originally referred ' +
261
- 'to as the Interagency Taxonomic Information System) ' +
262
- 'was designed to fulfill these requirements. In the ' +
263
- 'future, the ITIS will provide taxonomic data and a ' +
264
- 'directory of taxonomic expertise that will support ' +
265
- 'the system',
266
- url: 'http://www.itis.gov'
244
+ id: @uuid,
245
+ title: @title,
246
+ authors: [
247
+ { email: "itiswebmaster@itis.gov" }
248
+ ],
249
+ metadata_providers: [
250
+ { first_name: "Dmitry",
251
+ last_name: "Mozzherin",
252
+ email: "dmozzherin@gmail.com" }
253
+ ],
254
+ abstract: "The White House Subcommittee on Biodiversity and " \
255
+ "Ecosystem Dynamics has identified systematics as a " \
256
+ "research priority that is fundamental to ecosystem " \
257
+ "management and biodiversity conservation. This primary " \
258
+ "need identified by the Subcommittee requires " \
259
+ "improvements in the organization of, and access to, " \
260
+ "standardized nomenclature. ITIS (originally referred " \
261
+ "to as the Interagency Taxonomic Information System) " \
262
+ "was designed to fulfill these requirements. In the " \
263
+ "future, the ITIS will provide taxonomic data and a " \
264
+ "directory of taxonomic expertise that will support " \
265
+ "the system",
266
+ url: "http://www.itis.gov"
267
267
  }
268
268
  super
269
269
  end
@@ -0,0 +1,155 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DwcaHunter
4
+ class ResourceMammalDiversityDb < DwcaHunter::Resource
5
+ def initialize(opts = {})
6
+ @command = "mammal-div-db"
7
+ @title = "ASM Mammal Diversity Database"
8
+ @url = "https://mammaldiversity.org/species-account/api.php?q=*"
9
+ @UUID = "94270cdd-5424-4bb1-8324-46ccc5386dc7"
10
+ @download_path = File.join(Dir.tmpdir,
11
+ "dwca_hunter",
12
+ "mammal-div-db",
13
+ "data.json")
14
+ @synonyms = []
15
+ @names = []
16
+ @vernaculars = []
17
+ @extensions = []
18
+ @synonyms_hash = {}
19
+ @vernaculars_hash = {}
20
+ super(opts)
21
+ end
22
+
23
+ def download
24
+ DwcaHunter.logger_write(object_id, "Downloading")
25
+ `curl '#{@url}' -H 'User-Agent:' -o #{@download_path}`
26
+ end
27
+
28
+ def unpack; end
29
+
30
+ def make_dwca
31
+ DwcaHunter.logger_write(object_id, "Extracting data")
32
+ get_names
33
+ generate_dwca
34
+ end
35
+
36
+ private
37
+
38
+ def get_names
39
+ Dir.chdir(@download_dir)
40
+ collect_names
41
+ end
42
+
43
+ def collect_names
44
+ @names_index = {}
45
+ decoder = HTMLEntities.new
46
+ data = File.read(File.join(@download_dir, "data.json"))
47
+ data = JSON.parse(data, symbolize_names: true)
48
+ data[:result].each_with_index do |e, _i|
49
+ e = e[1]
50
+ order = e[:dwc][:order].capitalize
51
+ order = nil if order.match(/incertae/)
52
+ family = e[:dwc][:family].capitalize
53
+ family = nil if family.match(/incertae/)
54
+ genus = e[:dwc][:genus].capitalize
55
+ genus = nil if genus.match(/incertae/)
56
+ name = {
57
+ taxon_id: e[:internal_id],
58
+ kingdom: "Animalia",
59
+ phylum: "Chordata",
60
+ klass: "Mammalia",
61
+ order: order,
62
+ family: family,
63
+ genus: genus,
64
+ name_string: "#{e[:dwc][:scientificName]} " \
65
+ "#{e[:dwc][:scientificNameAuthorship][:species]}".strip,
66
+ rank: e[:dwc][:taxonRank],
67
+ status: e[:dwc][:taxonRank],
68
+ code: "ICZN"
69
+ }
70
+ if e[:dwc][:taxonomicStatus] == "accepted"
71
+ @names << name
72
+ else
73
+ @synonyms << name
74
+ end
75
+ vern = e[:dwc][:vernacularName]
76
+ next unless vern.to_s != ""
77
+ vern = decoder.decode(vern)
78
+ vernacular = {
79
+ taxon_id: e[:id],
80
+ vern: vern,
81
+ lang: "en"
82
+ }
83
+ @vernaculars << vernacular
84
+ end
85
+ puts data[:result].size
86
+ end
87
+
88
+ def generate_dwca
89
+ DwcaHunter.logger_write(object_id,
90
+ "Creating DarwinCore Archive file")
91
+ @core = [["http://rs.tdwg.org/dwc/terms/taxonID",
92
+ "http://rs.tdwg.org/dwc/terms/scientificName",
93
+ "http://rs.tdwg.org/dwc/terms/kingdom",
94
+ "http://rs.tdwg.org/dwc/terms/phylum",
95
+ "http://rs.tdwg.org/dwc/terms/class",
96
+ "http://rs.tdwg.org/dwc/terms/order",
97
+ "http://rs.tdwg.org/dwc/terms/family",
98
+ "http://rs.tdwg.org/dwc/terms/genus",
99
+ "http://rs.tdwg.org/dwc/terms/nomenclaturalCode"]]
100
+ @names.each do |n|
101
+ @core << [n[:taxon_id], n[:name_string],
102
+ n[:kingdom], n[:phylum], n[:klass], n[:order], n[:family],
103
+ n[:genus], n[:code]]
104
+ end
105
+ @extensions << {
106
+ data: [[
107
+ "http://rs.tdwg.org/dwc/terms/taxonID",
108
+ "http://rs.tdwg.org/dwc/terms/vernacularName",
109
+ "http://purl.org/dc/terms/language"
110
+ ]],
111
+ file_name: "vernacular_names.txt",
112
+ row_type: "http://rs.gbif.org/terms/1.0/VernacularName"
113
+ }
114
+
115
+ @vernaculars.each do |v|
116
+ @extensions[-1][:data] << [v[:taxon_id], v[:vern], v[:lang]]
117
+ end
118
+
119
+ @extensions << {
120
+ data: [[
121
+ "http://rs.tdwg.org/dwc/terms/taxonID",
122
+ "http://rs.tdwg.org/dwc/terms/scientificName",
123
+ "http://rs.tdwg.org/dwc/terms/taxonomicStatus"
124
+ ]],
125
+ file_name: "synonyms.txt"
126
+ }
127
+ @synonyms.each do |s|
128
+ @extensions[-1][:data] << [s[:taxon_id], s[:name_string], s[:status]]
129
+ end
130
+ @eml = {
131
+ id: @uuid,
132
+ title: @title,
133
+ authors: [
134
+ { first_name: "C. J.",
135
+ last_name: "Burgin" },
136
+ { first_name: "J. P.",
137
+ last_name: "Colella" },
138
+ { first_name: "P. L.",
139
+ last_name: "Kahn" },
140
+ { first_name: "N. S.",
141
+ last_name: "Upham" }
142
+ ],
143
+ metadata_providers: [
144
+ { first_name: "Dmitry",
145
+ last_name: "Mozzherin",
146
+ email: "dmozzherin@gmail.com" }
147
+ ],
148
+ abstract: "Mammal Diversity Database. 2020. www.mammaldiversity.org. " \
149
+ "American Society of Mammalogists. Accessed 2020-05-24 .",
150
+ url: @url
151
+ }
152
+ super
153
+ end
154
+ end
155
+ end