dwca_hunter 0.7.1 → 0.7.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -3,14 +3,14 @@
3
3
  module DwcaHunter
4
4
  class ResourceIOCWorldBird < DwcaHunter::Resource
5
5
  def initialize(opts = {})
6
- @command = "ioc-world-bird"
7
- @title = "IOC World Bird List"
8
- @url = "https://uofi.box.com/shared/static/fbpuk5ghh9083nbzjdeyqtoqsvdnro01.csv"
9
- @UUID = "6421ffec-38e3-40fb-a6d9-af27238a47a1"
6
+ @command = 'ioc-world-bird'
7
+ @title = 'IOC World Bird List'
8
+ @url = 'https://uofi.box.com/shared/static/znsd734a78saq87hes979p5uspgkzy93.csv'
9
+ @UUID = '6421ffec-38e3-40fb-a6d9-af27238a47a1'
10
10
  @download_path = File.join(Dir.tmpdir,
11
- "dwca_hunter",
12
- "ioc-bird",
13
- "data.csv")
11
+ 'dwca_hunter',
12
+ 'ioc-bird',
13
+ 'data.csv')
14
14
  @synonyms = []
15
15
  @names = []
16
16
  @vernaculars = []
@@ -21,17 +21,17 @@ module DwcaHunter
21
21
  end
22
22
 
23
23
  def download
24
- puts "Downloading cached and converted to csv version."
25
- puts "CHECK FOR NEW VERSION at"
26
- puts "https://www.worldbirdnames.org/ioc-lists/master-list-2/"
27
- puts "Use libreoffice to convert to csv."
24
+ puts 'Downloading cached and converted to csv version.'
25
+ puts 'CHECK FOR NEW VERSION at'
26
+ puts 'https://www.worldbirdnames.org/ioc-lists/master-list-2/'
27
+ puts 'Use libreoffice to convert to csv.'
28
28
  `curl -s -L #{@url} -o #{@download_path}`
29
29
  end
30
30
 
31
31
  def unpack; end
32
32
 
33
33
  def make_dwca
34
- DwcaHunter.logger_write(object_id, "Extracting data")
34
+ DwcaHunter.logger_write(object_id, 'Extracting data')
35
35
  get_names
36
36
  generate_dwca
37
37
  end
@@ -45,84 +45,84 @@ module DwcaHunter
45
45
 
46
46
  def collect_names
47
47
  @names_index = {}
48
- file = CSV.open(File.join(@download_dir, "data.csv"),
48
+ file = CSV.open(File.join(@download_dir, 'data.csv'),
49
49
  headers: true)
50
- order = ""
51
- family = ""
52
- genus = ""
53
- species = ""
50
+ order = ''
51
+ family = ''
52
+ genus = ''
53
+ species = ''
54
54
  count = 0
55
55
  file.each do |row|
56
- order1 = row["Order"]
57
- order = order1.capitalize if order1.to_s != ""
56
+ order1 = row['Order']
57
+ order = order1.capitalize if order1.to_s != ''
58
58
 
59
- family1 = row["Family (Scientific)"]
60
- family = family1.capitalize if family1.to_s != ""
59
+ family1 = row['Family (Scientific)']
60
+ family = family1.capitalize if family1.to_s != ''
61
61
 
62
- genus1 = row["Genus"]
63
- genus = genus1.capitalize if genus1.to_s != ""
62
+ genus1 = row['Genus']
63
+ genus = genus1.capitalize if genus1.to_s != ''
64
64
 
65
- species1 = row["Species (Scientific)"]
66
- species = species1 if species1.to_s != ""
65
+ species1 = row['Species (Scientific)']
66
+ species = species1 if species1.to_s != ''
67
67
 
68
- subspecies = row["Subspecies"]
69
- next if species.to_s == ""
68
+ subspecies = row['Subspecies']
69
+ next if species.to_s == ''
70
70
 
71
71
  count += 1
72
72
  taxon_id = "gn_#{count}"
73
73
  name = {
74
74
  taxon_id: taxon_id,
75
- kingdom: "Animalia",
76
- phylum: "Chordata",
77
- klass: "Aves",
75
+ kingdom: 'Animalia',
76
+ phylum: 'Chordata',
77
+ klass: 'Aves',
78
78
  order: order,
79
79
  family: family,
80
80
  genus: genus,
81
- code: "ICZN"
81
+ code: 'ICZN'
82
82
  }
83
- if subspecies.to_s == ""
84
- auth = row["Authority"].to_s
85
- auth = DwcaHunter.normalize_authors(auth) if auth != ""
83
+ if subspecies.to_s == ''
84
+ auth = row['Authority'].to_s
85
+ auth = DwcaHunter.normalize_authors(auth) if auth != ''
86
86
  name[:name_string] = clean(
87
- "#{genus} #{species} #{auth}".
88
- strip
87
+ "#{genus} #{species} #{auth}"
88
+ .strip
89
89
  )
90
90
  @names << name
91
- vernacular = row["Species (English)"]
92
- if vernacular.to_s != ""
93
- vernaclar = { taxon_id: taxon_id, vern: vernacular, lang: "en" }
91
+ vernacular = row['Species (English)']
92
+ if vernacular.to_s != ''
93
+ vernaclar = { taxon_id: taxon_id, vern: vernacular, lang: 'en' }
94
94
  @vernaculars << vernaclar
95
95
  end
96
- species = ""
96
+ species = ''
97
97
  else
98
98
  name[:name_string] = clean(
99
- "#{genus} #{species} #{subspecies} #{row['Authority']}".
100
- strip
99
+ "#{genus} #{species} #{subspecies} #{row['Authority']}"
100
+ .strip
101
101
  )
102
102
  @names << name
103
- species = ""
104
- subspecies = ""
103
+ species = ''
104
+ subspecies = ''
105
105
  end
106
106
  end
107
107
  end
108
108
 
109
109
  def clean(n)
110
- n = n.gsub(/†/, "")
111
- n.gsub(/\s+/, " ")
110
+ n = n.gsub(/†/, '')
111
+ n.gsub(/\s+/, ' ')
112
112
  end
113
113
 
114
114
  def generate_dwca
115
115
  DwcaHunter.logger_write(object_id,
116
- "Creating DarwinCore Archive file")
117
- @core = [["http://rs.tdwg.org/dwc/terms/taxonID",
118
- "http://rs.tdwg.org/dwc/terms/scientificName",
119
- "http://rs.tdwg.org/dwc/terms/kingdom",
120
- "http://rs.tdwg.org/dwc/terms/phylum",
121
- "http://rs.tdwg.org/dwc/terms/class",
122
- "http://rs.tdwg.org/dwc/terms/order",
123
- "http://rs.tdwg.org/dwc/terms/family",
124
- "http://rs.tdwg.org/dwc/terms/genus",
125
- "http://rs.tdwg.org/dwc/terms/nomenclaturalCode"]]
116
+ 'Creating DarwinCore Archive file')
117
+ @core = [['http://rs.tdwg.org/dwc/terms/taxonID',
118
+ 'http://rs.tdwg.org/dwc/terms/scientificName',
119
+ 'http://rs.tdwg.org/dwc/terms/kingdom',
120
+ 'http://rs.tdwg.org/dwc/terms/phylum',
121
+ 'http://rs.tdwg.org/dwc/terms/class',
122
+ 'http://rs.tdwg.org/dwc/terms/order',
123
+ 'http://rs.tdwg.org/dwc/terms/family',
124
+ 'http://rs.tdwg.org/dwc/terms/genus',
125
+ 'http://rs.tdwg.org/dwc/terms/nomenclaturalCode']]
126
126
  @names.each do |n|
127
127
  @core << [n[:taxon_id], n[:name_string],
128
128
  n[:kingdom], n[:phylum], n[:klass], n[:order], n[:family],
@@ -130,12 +130,12 @@ module DwcaHunter
130
130
  end
131
131
  @extensions << {
132
132
  data: [[
133
- "http://rs.tdwg.org/dwc/terms/taxonID",
134
- "http://rs.tdwg.org/dwc/terms/vernacularName",
135
- "http://purl.org/dc/terms/language"
133
+ 'http://rs.tdwg.org/dwc/terms/taxonID',
134
+ 'http://rs.tdwg.org/dwc/terms/vernacularName',
135
+ 'http://purl.org/dc/terms/language'
136
136
  ]],
137
- file_name: "vernacular_names.txt",
138
- row_type: "http://rs.gbif.org/terms/1.0/VernacularName"
137
+ file_name: 'vernacular_names.txt',
138
+ row_type: 'http://rs.gbif.org/terms/1.0/VernacularName'
139
139
  }
140
140
 
141
141
  @vernaculars.each do |v|
@@ -146,53 +146,53 @@ module DwcaHunter
146
146
  id: @uuid,
147
147
  title: @title,
148
148
  authors: [
149
- { first_name: "Per",
150
- last_name: "Alstrom" },
151
- { first_name: "Mike",
152
- last_name: "Blair" },
153
- { first_name: "Rauri",
154
- last_name: "Bowie" },
155
- { first_name: "Nigel",
156
- last_name: "Redman" },
157
- { first_name: "Jon",
158
- last_name: "Fjeldsa" },
159
- { first_name: "Phil",
160
- last_name: "Gregory" },
161
- { first_name: "Leo",
162
- last_name: "Joseph" },
163
- { first_name: "Peter",
164
- last_name: "Kovalik" },
165
- { first_name: "Adolfo",
166
- last_name: "Navarro-Siguenza" },
167
- { first_name: "David",
168
- last_name: "Parkin" },
169
- { first_name: "Alan",
170
- last_name: "Peterson" },
171
- { first_name: "Douglas",
172
- last_name: "Pratt" },
173
- { first_name: "Pam",
174
- last_name: "Rasmussen" },
175
- { first_name: "Frank",
176
- last_name: "Rheindt" },
177
- { first_name: "Robert",
178
- last_name: "Ridgely" },
179
- { first_name: "Peter",
180
- last_name: "Ryan" },
181
- { first_name: "George",
182
- last_name: "Sangster" },
183
- { first_name: "Dick",
184
- last_name: "Schodde" },
185
- { first_name: "Minturn",
186
- last_name: "Wright" }
149
+ { first_name: 'Per',
150
+ last_name: 'Alstrom' },
151
+ { first_name: 'Mike',
152
+ last_name: 'Blair' },
153
+ { first_name: 'Rauri',
154
+ last_name: 'Bowie' },
155
+ { first_name: 'Nigel',
156
+ last_name: 'Redman' },
157
+ { first_name: 'Jon',
158
+ last_name: 'Fjeldsa' },
159
+ { first_name: 'Phil',
160
+ last_name: 'Gregory' },
161
+ { first_name: 'Leo',
162
+ last_name: 'Joseph' },
163
+ { first_name: 'Peter',
164
+ last_name: 'Kovalik' },
165
+ { first_name: 'Adolfo',
166
+ last_name: 'Navarro-Siguenza' },
167
+ { first_name: 'David',
168
+ last_name: 'Parkin' },
169
+ { first_name: 'Alan',
170
+ last_name: 'Peterson' },
171
+ { first_name: 'Douglas',
172
+ last_name: 'Pratt' },
173
+ { first_name: 'Pam',
174
+ last_name: 'Rasmussen' },
175
+ { first_name: 'Frank',
176
+ last_name: 'Rheindt' },
177
+ { first_name: 'Robert',
178
+ last_name: 'Ridgely' },
179
+ { first_name: 'Peter',
180
+ last_name: 'Ryan' },
181
+ { first_name: 'George',
182
+ last_name: 'Sangster' },
183
+ { first_name: 'Dick',
184
+ last_name: 'Schodde' },
185
+ { first_name: 'Minturn',
186
+ last_name: 'Wright' }
187
187
  ],
188
188
  metadata_providers: [
189
- { first_name: "Dmitry",
190
- last_name: "Mozzherin",
191
- email: "dmozzherin@gmail.com" }
189
+ { first_name: 'Dmitry',
190
+ last_name: 'Mozzherin',
191
+ email: 'dmozzherin@gmail.com' }
192
192
  ],
193
- abstract: "The IOC World Bird List is an open access resource of " \
194
- "the international community of ornithologists.",
195
- url: "https://www.worldbirdnames.org"
193
+ abstract: 'The IOC World Bird List is an open access resource of ' \
194
+ 'the international community of ornithologists.',
195
+ url: 'https://www.worldbirdnames.org'
196
196
  }
197
197
  super
198
198
  end
@@ -5,12 +5,12 @@ module DwcaHunter
5
5
  def initialize(opts = {})
6
6
  @command = "mammal-div-db"
7
7
  @title = "ASM Mammal Diversity Database"
8
- @url = "https://mammaldiversity.org/species-account/api.php?q=*"
8
+ @url = "https://www.mammaldiversity.org/assets/data/MDD.zip"
9
9
  @UUID = "94270cdd-5424-4bb1-8324-46ccc5386dc7"
10
10
  @download_path = File.join(Dir.tmpdir,
11
11
  "dwca_hunter",
12
12
  "mammal-div-db",
13
- "data.json")
13
+ "data.zip")
14
14
  @synonyms = []
15
15
  @names = []
16
16
  @vernaculars = []
@@ -25,7 +25,9 @@ module DwcaHunter
25
25
  `curl '#{@url}' -H 'User-Agent:' -o #{@download_path}`
26
26
  end
27
27
 
28
- def unpack; end
28
+ def unpack
29
+ unpack_zip
30
+ end
29
31
 
30
32
  def make_dwca
31
33
  DwcaHunter.logger_write(object_id, "Extracting data")
@@ -40,49 +42,78 @@ module DwcaHunter
40
42
  collect_names
41
43
  end
42
44
 
45
+ def find_csv_file
46
+ Dir.chdir(@download_dir)
47
+ Dir.entries(".").each do |f|
48
+ return f if f[-4..-1] == ".csv"
49
+ end
50
+ end
51
+
52
+ def assemble_name(row)
53
+ name = row["sciName"].gsub("_", " ")
54
+ auth = "#{row['authoritySpeciesAuthor']} #{row['aurhoritySpeciesYear']}".
55
+ strip
56
+ auth = "(#{auth})" if row["authorityParentheses"] == 1
57
+ rank = "species"
58
+ rank = "subspecies" if (name.split(" ").size > 2)
59
+ name = "#{name} #{auth}".strip
60
+ [rank, name]
61
+ end
62
+
63
+ def assemble_synonym(row)
64
+ name = row["originalNameCombination"].gsub("_", " ")
65
+ auth = "#{row['authoritySpeciesAuthor']} #{row['aurhoritySpeciesYear']}".
66
+ strip
67
+ name = "#{name} #{auth}".strip
68
+ { taxon_id: row["id"], name_string: name, status: "synonym" }
69
+ end
70
+
71
+ def vernaculars(row)
72
+ id = row["id"]
73
+ res = []
74
+ vern = row["mainCommonName"].to_s
75
+ res << vern if vern != ""
76
+ verns = row["otherCommonNames"].to_s
77
+ if verns != ""
78
+ verns = verns.split("|")
79
+ res += verns
80
+ end
81
+ res.map do |v|
82
+ { taxon_id: id, vern: v, lang: "en" }
83
+ end
84
+ end
85
+
43
86
  def collect_names
44
87
  @names_index = {}
45
- decoder = HTMLEntities.new
46
- data = File.read(File.join(@download_dir, "data.json"))
47
- data = JSON.parse(data, symbolize_names: true)
48
- data[:result].each_with_index do |e, _i|
49
- e = e[1]
50
- order = e[:dwc][:order].capitalize
51
- order = nil if order.match(/incertae/)
52
- family = e[:dwc][:family].capitalize
53
- family = nil if family.match(/incertae/)
54
- genus = e[:dwc][:genus].capitalize
55
- genus = nil if genus.match(/incertae/)
56
- name = {
57
- taxon_id: e[:internal_id],
88
+ file = CSV.open(File.join(@download_dir, find_csv_file),
89
+ headers: true)
90
+ file.each do |row|
91
+ order = row["order"].to_s.capitalize
92
+ order = nil if order.match(/incertae/) || order.empty?
93
+ family = row["family"].to_s.capitalize
94
+ family = nil if family.match(/incertae/) || family.empty?
95
+ genus = row["genus"].to_s.capitalize
96
+ genus = nil if genus.match(/incertae/) || genus.empty?
97
+ rank, name_string = assemble_name(row)
98
+ @names << {
99
+ taxon_id: row["id"],
58
100
  kingdom: "Animalia",
59
101
  phylum: "Chordata",
60
102
  klass: "Mammalia",
61
103
  order: order,
62
104
  family: family,
63
105
  genus: genus,
64
- name_string: "#{e[:dwc][:scientificName]} " \
65
- "#{e[:dwc][:scientificNameAuthorship][:species]}".strip,
66
- rank: e[:dwc][:taxonRank],
67
- status: e[:dwc][:taxonRank],
106
+ name_string: name_string,
107
+ rank: rank,
68
108
  code: "ICZN"
69
109
  }
70
- if e[:dwc][:taxonomicStatus] == "accepted"
71
- @names << name
72
- else
73
- @synonyms << name
110
+ if row["originalNameCombination"].to_s != ""
111
+ @synonyms << assemble_synonym(row)
112
+ end
113
+ vernaculars(row).each do |vern|
114
+ @vernaculars << vern
74
115
  end
75
- vern = e[:dwc][:vernacularName]
76
- next unless vern.to_s != ""
77
- vern = decoder.decode(vern)
78
- vernacular = {
79
- taxon_id: e[:id],
80
- vern: vern,
81
- lang: "en"
82
- }
83
- @vernaculars << vernacular
84
116
  end
85
- puts data[:result].size
86
117
  end
87
118
 
88
119
  def generate_dwca
@@ -96,11 +127,12 @@ module DwcaHunter
96
127
  "http://rs.tdwg.org/dwc/terms/order",
97
128
  "http://rs.tdwg.org/dwc/terms/family",
98
129
  "http://rs.tdwg.org/dwc/terms/genus",
130
+ "http://rs.tdwg.org/dwc/terms/taxonRank",
99
131
  "http://rs.tdwg.org/dwc/terms/nomenclaturalCode"]]
100
132
  @names.each do |n|
101
133
  @core << [n[:taxon_id], n[:name_string],
102
134
  n[:kingdom], n[:phylum], n[:klass], n[:order], n[:family],
103
- n[:genus], n[:code]]
135
+ n[:genus], n[:rank], n[:code]]
104
136
  end
105
137
  @extensions << {
106
138
  data: [[
@@ -133,23 +165,22 @@ module DwcaHunter
133
165
  authors: [
134
166
  { first_name: "C. J.",
135
167
  last_name: "Burgin" },
136
- { first_name: "J. P.",
137
- last_name: "Colella" },
138
- { first_name: "P. L.",
139
- last_name: "Kahn" },
140
- { first_name: "N. S.",
141
- last_name: "Upham" }
168
+ { first_name: "J. P.",
169
+ last_name: "Colella" },
170
+ { first_name: "P. L.",
171
+ last_name: "Kahn" },
172
+ { first_name: "N. S.",
173
+ last_name: "Upham" }
142
174
  ],
143
175
  metadata_providers: [
144
176
  { first_name: "Dmitry",
145
177
  last_name: "Mozzherin",
146
178
  email: "dmozzherin@gmail.com" }
147
179
  ],
148
- abstract: "Mammal Diversity Database. 2020. www.mammaldiversity.org. " \
149
- "American Society of Mammalogists. Accessed 2020-05-24 .",
150
- url: @url
180
+ abstract: "Mammal Diversity Database. 2021. www.mammaldiversity.org. " \
181
+ "American Society of Mammalogists. Accessed 2021-01-28.", url: @url
151
182
  }
152
183
  super
184
+ end
153
185
  end
154
186
  end
155
- end