dwca_hunter 0.7.1 → 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,14 +3,14 @@
3
3
  module DwcaHunter
4
4
  class ResourceIOCWorldBird < DwcaHunter::Resource
5
5
  def initialize(opts = {})
6
- @command = "ioc-world-bird"
7
- @title = "IOC World Bird List"
8
- @url = "https://uofi.box.com/shared/static/fbpuk5ghh9083nbzjdeyqtoqsvdnro01.csv"
9
- @UUID = "6421ffec-38e3-40fb-a6d9-af27238a47a1"
6
+ @command = 'ioc-world-bird'
7
+ @title = 'IOC World Bird List'
8
+ @url = 'https://uofi.box.com/shared/static/znsd734a78saq87hes979p5uspgkzy93.csv'
9
+ @UUID = '6421ffec-38e3-40fb-a6d9-af27238a47a1'
10
10
  @download_path = File.join(Dir.tmpdir,
11
- "dwca_hunter",
12
- "ioc-bird",
13
- "data.csv")
11
+ 'dwca_hunter',
12
+ 'ioc-bird',
13
+ 'data.csv')
14
14
  @synonyms = []
15
15
  @names = []
16
16
  @vernaculars = []
@@ -21,17 +21,17 @@ module DwcaHunter
21
21
  end
22
22
 
23
23
  def download
24
- puts "Downloading cached and converted to csv version."
25
- puts "CHECK FOR NEW VERSION at"
26
- puts "https://www.worldbirdnames.org/ioc-lists/master-list-2/"
27
- puts "Use libreoffice to convert to csv."
24
+ puts 'Downloading cached and converted to csv version.'
25
+ puts 'CHECK FOR NEW VERSION at'
26
+ puts 'https://www.worldbirdnames.org/ioc-lists/master-list-2/'
27
+ puts 'Use libreoffice to convert to csv.'
28
28
  `curl -s -L #{@url} -o #{@download_path}`
29
29
  end
30
30
 
31
31
  def unpack; end
32
32
 
33
33
  def make_dwca
34
- DwcaHunter.logger_write(object_id, "Extracting data")
34
+ DwcaHunter.logger_write(object_id, 'Extracting data')
35
35
  get_names
36
36
  generate_dwca
37
37
  end
@@ -45,84 +45,84 @@ module DwcaHunter
45
45
 
46
46
  def collect_names
47
47
  @names_index = {}
48
- file = CSV.open(File.join(@download_dir, "data.csv"),
48
+ file = CSV.open(File.join(@download_dir, 'data.csv'),
49
49
  headers: true)
50
- order = ""
51
- family = ""
52
- genus = ""
53
- species = ""
50
+ order = ''
51
+ family = ''
52
+ genus = ''
53
+ species = ''
54
54
  count = 0
55
55
  file.each do |row|
56
- order1 = row["Order"]
57
- order = order1.capitalize if order1.to_s != ""
56
+ order1 = row['Order']
57
+ order = order1.capitalize if order1.to_s != ''
58
58
 
59
- family1 = row["Family (Scientific)"]
60
- family = family1.capitalize if family1.to_s != ""
59
+ family1 = row['Family (Scientific)']
60
+ family = family1.capitalize if family1.to_s != ''
61
61
 
62
- genus1 = row["Genus"]
63
- genus = genus1.capitalize if genus1.to_s != ""
62
+ genus1 = row['Genus']
63
+ genus = genus1.capitalize if genus1.to_s != ''
64
64
 
65
- species1 = row["Species (Scientific)"]
66
- species = species1 if species1.to_s != ""
65
+ species1 = row['Species (Scientific)']
66
+ species = species1 if species1.to_s != ''
67
67
 
68
- subspecies = row["Subspecies"]
69
- next if species.to_s == ""
68
+ subspecies = row['Subspecies']
69
+ next if species.to_s == ''
70
70
 
71
71
  count += 1
72
72
  taxon_id = "gn_#{count}"
73
73
  name = {
74
74
  taxon_id: taxon_id,
75
- kingdom: "Animalia",
76
- phylum: "Chordata",
77
- klass: "Aves",
75
+ kingdom: 'Animalia',
76
+ phylum: 'Chordata',
77
+ klass: 'Aves',
78
78
  order: order,
79
79
  family: family,
80
80
  genus: genus,
81
- code: "ICZN"
81
+ code: 'ICZN'
82
82
  }
83
- if subspecies.to_s == ""
84
- auth = row["Authority"].to_s
85
- auth = DwcaHunter.normalize_authors(auth) if auth != ""
83
+ if subspecies.to_s == ''
84
+ auth = row['Authority'].to_s
85
+ auth = DwcaHunter.normalize_authors(auth) if auth != ''
86
86
  name[:name_string] = clean(
87
- "#{genus} #{species} #{auth}".
88
- strip
87
+ "#{genus} #{species} #{auth}"
88
+ .strip
89
89
  )
90
90
  @names << name
91
- vernacular = row["Species (English)"]
92
- if vernacular.to_s != ""
93
- vernaclar = { taxon_id: taxon_id, vern: vernacular, lang: "en" }
91
+ vernacular = row['Species (English)']
92
+ if vernacular.to_s != ''
93
+ vernaclar = { taxon_id: taxon_id, vern: vernacular, lang: 'en' }
94
94
  @vernaculars << vernaclar
95
95
  end
96
- species = ""
96
+ species = ''
97
97
  else
98
98
  name[:name_string] = clean(
99
- "#{genus} #{species} #{subspecies} #{row['Authority']}".
100
- strip
99
+ "#{genus} #{species} #{subspecies} #{row['Authority']}"
100
+ .strip
101
101
  )
102
102
  @names << name
103
- species = ""
104
- subspecies = ""
103
+ species = ''
104
+ subspecies = ''
105
105
  end
106
106
  end
107
107
  end
108
108
 
109
109
  def clean(n)
110
- n = n.gsub(/†/, "")
111
- n.gsub(/\s+/, " ")
110
+ n = n.gsub(/†/, '')
111
+ n.gsub(/\s+/, ' ')
112
112
  end
113
113
 
114
114
  def generate_dwca
115
115
  DwcaHunter.logger_write(object_id,
116
- "Creating DarwinCore Archive file")
117
- @core = [["http://rs.tdwg.org/dwc/terms/taxonID",
118
- "http://rs.tdwg.org/dwc/terms/scientificName",
119
- "http://rs.tdwg.org/dwc/terms/kingdom",
120
- "http://rs.tdwg.org/dwc/terms/phylum",
121
- "http://rs.tdwg.org/dwc/terms/class",
122
- "http://rs.tdwg.org/dwc/terms/order",
123
- "http://rs.tdwg.org/dwc/terms/family",
124
- "http://rs.tdwg.org/dwc/terms/genus",
125
- "http://rs.tdwg.org/dwc/terms/nomenclaturalCode"]]
116
+ 'Creating DarwinCore Archive file')
117
+ @core = [['http://rs.tdwg.org/dwc/terms/taxonID',
118
+ 'http://rs.tdwg.org/dwc/terms/scientificName',
119
+ 'http://rs.tdwg.org/dwc/terms/kingdom',
120
+ 'http://rs.tdwg.org/dwc/terms/phylum',
121
+ 'http://rs.tdwg.org/dwc/terms/class',
122
+ 'http://rs.tdwg.org/dwc/terms/order',
123
+ 'http://rs.tdwg.org/dwc/terms/family',
124
+ 'http://rs.tdwg.org/dwc/terms/genus',
125
+ 'http://rs.tdwg.org/dwc/terms/nomenclaturalCode']]
126
126
  @names.each do |n|
127
127
  @core << [n[:taxon_id], n[:name_string],
128
128
  n[:kingdom], n[:phylum], n[:klass], n[:order], n[:family],
@@ -130,12 +130,12 @@ module DwcaHunter
130
130
  end
131
131
  @extensions << {
132
132
  data: [[
133
- "http://rs.tdwg.org/dwc/terms/taxonID",
134
- "http://rs.tdwg.org/dwc/terms/vernacularName",
135
- "http://purl.org/dc/terms/language"
133
+ 'http://rs.tdwg.org/dwc/terms/taxonID',
134
+ 'http://rs.tdwg.org/dwc/terms/vernacularName',
135
+ 'http://purl.org/dc/terms/language'
136
136
  ]],
137
- file_name: "vernacular_names.txt",
138
- row_type: "http://rs.gbif.org/terms/1.0/VernacularName"
137
+ file_name: 'vernacular_names.txt',
138
+ row_type: 'http://rs.gbif.org/terms/1.0/VernacularName'
139
139
  }
140
140
 
141
141
  @vernaculars.each do |v|
@@ -146,53 +146,53 @@ module DwcaHunter
146
146
  id: @uuid,
147
147
  title: @title,
148
148
  authors: [
149
- { first_name: "Per",
150
- last_name: "Alstrom" },
151
- { first_name: "Mike",
152
- last_name: "Blair" },
153
- { first_name: "Rauri",
154
- last_name: "Bowie" },
155
- { first_name: "Nigel",
156
- last_name: "Redman" },
157
- { first_name: "Jon",
158
- last_name: "Fjeldsa" },
159
- { first_name: "Phil",
160
- last_name: "Gregory" },
161
- { first_name: "Leo",
162
- last_name: "Joseph" },
163
- { first_name: "Peter",
164
- last_name: "Kovalik" },
165
- { first_name: "Adolfo",
166
- last_name: "Navarro-Siguenza" },
167
- { first_name: "David",
168
- last_name: "Parkin" },
169
- { first_name: "Alan",
170
- last_name: "Peterson" },
171
- { first_name: "Douglas",
172
- last_name: "Pratt" },
173
- { first_name: "Pam",
174
- last_name: "Rasmussen" },
175
- { first_name: "Frank",
176
- last_name: "Rheindt" },
177
- { first_name: "Robert",
178
- last_name: "Ridgely" },
179
- { first_name: "Peter",
180
- last_name: "Ryan" },
181
- { first_name: "George",
182
- last_name: "Sangster" },
183
- { first_name: "Dick",
184
- last_name: "Schodde" },
185
- { first_name: "Minturn",
186
- last_name: "Wright" }
149
+ { first_name: 'Per',
150
+ last_name: 'Alstrom' },
151
+ { first_name: 'Mike',
152
+ last_name: 'Blair' },
153
+ { first_name: 'Rauri',
154
+ last_name: 'Bowie' },
155
+ { first_name: 'Nigel',
156
+ last_name: 'Redman' },
157
+ { first_name: 'Jon',
158
+ last_name: 'Fjeldsa' },
159
+ { first_name: 'Phil',
160
+ last_name: 'Gregory' },
161
+ { first_name: 'Leo',
162
+ last_name: 'Joseph' },
163
+ { first_name: 'Peter',
164
+ last_name: 'Kovalik' },
165
+ { first_name: 'Adolfo',
166
+ last_name: 'Navarro-Siguenza' },
167
+ { first_name: 'David',
168
+ last_name: 'Parkin' },
169
+ { first_name: 'Alan',
170
+ last_name: 'Peterson' },
171
+ { first_name: 'Douglas',
172
+ last_name: 'Pratt' },
173
+ { first_name: 'Pam',
174
+ last_name: 'Rasmussen' },
175
+ { first_name: 'Frank',
176
+ last_name: 'Rheindt' },
177
+ { first_name: 'Robert',
178
+ last_name: 'Ridgely' },
179
+ { first_name: 'Peter',
180
+ last_name: 'Ryan' },
181
+ { first_name: 'George',
182
+ last_name: 'Sangster' },
183
+ { first_name: 'Dick',
184
+ last_name: 'Schodde' },
185
+ { first_name: 'Minturn',
186
+ last_name: 'Wright' }
187
187
  ],
188
188
  metadata_providers: [
189
- { first_name: "Dmitry",
190
- last_name: "Mozzherin",
191
- email: "dmozzherin@gmail.com" }
189
+ { first_name: 'Dmitry',
190
+ last_name: 'Mozzherin',
191
+ email: 'dmozzherin@gmail.com' }
192
192
  ],
193
- abstract: "The IOC World Bird List is an open access resource of " \
194
- "the international community of ornithologists.",
195
- url: "https://www.worldbirdnames.org"
193
+ abstract: 'The IOC World Bird List is an open access resource of ' \
194
+ 'the international community of ornithologists.',
195
+ url: 'https://www.worldbirdnames.org'
196
196
  }
197
197
  super
198
198
  end
@@ -5,12 +5,12 @@ module DwcaHunter
5
5
  def initialize(opts = {})
6
6
  @command = "mammal-div-db"
7
7
  @title = "ASM Mammal Diversity Database"
8
- @url = "https://mammaldiversity.org/species-account/api.php?q=*"
8
+ @url = "https://www.mammaldiversity.org/assets/data/MDD.zip"
9
9
  @UUID = "94270cdd-5424-4bb1-8324-46ccc5386dc7"
10
10
  @download_path = File.join(Dir.tmpdir,
11
11
  "dwca_hunter",
12
12
  "mammal-div-db",
13
- "data.json")
13
+ "data.zip")
14
14
  @synonyms = []
15
15
  @names = []
16
16
  @vernaculars = []
@@ -25,7 +25,9 @@ module DwcaHunter
25
25
  `curl '#{@url}' -H 'User-Agent:' -o #{@download_path}`
26
26
  end
27
27
 
28
- def unpack; end
28
+ def unpack
29
+ unpack_zip
30
+ end
29
31
 
30
32
  def make_dwca
31
33
  DwcaHunter.logger_write(object_id, "Extracting data")
@@ -40,49 +42,78 @@ module DwcaHunter
40
42
  collect_names
41
43
  end
42
44
 
45
+ def find_csv_file
46
+ Dir.chdir(@download_dir)
47
+ Dir.entries(".").each do |f|
48
+ return f if f[-4..-1] == ".csv"
49
+ end
50
+ end
51
+
52
+ def assemble_name(row)
53
+ name = row["sciName"].gsub("_", " ")
54
+ auth = "#{row['authoritySpeciesAuthor']} #{row['aurhoritySpeciesYear']}".
55
+ strip
56
+ auth = "(#{auth})" if row["authorityParentheses"] == 1
57
+ rank = "species"
58
+ rank = "subspecies" if (name.split(" ").size > 2)
59
+ name = "#{name} #{auth}".strip
60
+ [rank, name]
61
+ end
62
+
63
+ def assemble_synonym(row)
64
+ name = row["originalNameCombination"].gsub("_", " ")
65
+ auth = "#{row['authoritySpeciesAuthor']} #{row['aurhoritySpeciesYear']}".
66
+ strip
67
+ name = "#{name} #{auth}".strip
68
+ { taxon_id: row["id"], name_string: name, status: "synonym" }
69
+ end
70
+
71
+ def vernaculars(row)
72
+ id = row["id"]
73
+ res = []
74
+ vern = row["mainCommonName"].to_s
75
+ res << vern if vern != ""
76
+ verns = row["otherCommonNames"].to_s
77
+ if verns != ""
78
+ verns = verns.split("|")
79
+ res += verns
80
+ end
81
+ res.map do |v|
82
+ { taxon_id: id, vern: v, lang: "en" }
83
+ end
84
+ end
85
+
43
86
  def collect_names
44
87
  @names_index = {}
45
- decoder = HTMLEntities.new
46
- data = File.read(File.join(@download_dir, "data.json"))
47
- data = JSON.parse(data, symbolize_names: true)
48
- data[:result].each_with_index do |e, _i|
49
- e = e[1]
50
- order = e[:dwc][:order].capitalize
51
- order = nil if order.match(/incertae/)
52
- family = e[:dwc][:family].capitalize
53
- family = nil if family.match(/incertae/)
54
- genus = e[:dwc][:genus].capitalize
55
- genus = nil if genus.match(/incertae/)
56
- name = {
57
- taxon_id: e[:internal_id],
88
+ file = CSV.open(File.join(@download_dir, find_csv_file),
89
+ headers: true)
90
+ file.each do |row|
91
+ order = row["order"].to_s.capitalize
92
+ order = nil if order.match(/incertae/) || order.empty?
93
+ family = row["family"].to_s.capitalize
94
+ family = nil if family.match(/incertae/) || family.empty?
95
+ genus = row["genus"].to_s.capitalize
96
+ genus = nil if genus.match(/incertae/) || genus.empty?
97
+ rank, name_string = assemble_name(row)
98
+ @names << {
99
+ taxon_id: row["id"],
58
100
  kingdom: "Animalia",
59
101
  phylum: "Chordata",
60
102
  klass: "Mammalia",
61
103
  order: order,
62
104
  family: family,
63
105
  genus: genus,
64
- name_string: "#{e[:dwc][:scientificName]} " \
65
- "#{e[:dwc][:scientificNameAuthorship][:species]}".strip,
66
- rank: e[:dwc][:taxonRank],
67
- status: e[:dwc][:taxonRank],
106
+ name_string: name_string,
107
+ rank: rank,
68
108
  code: "ICZN"
69
109
  }
70
- if e[:dwc][:taxonomicStatus] == "accepted"
71
- @names << name
72
- else
73
- @synonyms << name
110
+ if row["originalNameCombination"].to_s != ""
111
+ @synonyms << assemble_synonym(row)
112
+ end
113
+ vernaculars(row).each do |vern|
114
+ @vernaculars << vern
74
115
  end
75
- vern = e[:dwc][:vernacularName]
76
- next unless vern.to_s != ""
77
- vern = decoder.decode(vern)
78
- vernacular = {
79
- taxon_id: e[:id],
80
- vern: vern,
81
- lang: "en"
82
- }
83
- @vernaculars << vernacular
84
116
  end
85
- puts data[:result].size
86
117
  end
87
118
 
88
119
  def generate_dwca
@@ -96,11 +127,12 @@ module DwcaHunter
96
127
  "http://rs.tdwg.org/dwc/terms/order",
97
128
  "http://rs.tdwg.org/dwc/terms/family",
98
129
  "http://rs.tdwg.org/dwc/terms/genus",
130
+ "http://rs.tdwg.org/dwc/terms/taxonRank",
99
131
  "http://rs.tdwg.org/dwc/terms/nomenclaturalCode"]]
100
132
  @names.each do |n|
101
133
  @core << [n[:taxon_id], n[:name_string],
102
134
  n[:kingdom], n[:phylum], n[:klass], n[:order], n[:family],
103
- n[:genus], n[:code]]
135
+ n[:genus], n[:rank], n[:code]]
104
136
  end
105
137
  @extensions << {
106
138
  data: [[
@@ -133,23 +165,22 @@ module DwcaHunter
133
165
  authors: [
134
166
  { first_name: "C. J.",
135
167
  last_name: "Burgin" },
136
- { first_name: "J. P.",
137
- last_name: "Colella" },
138
- { first_name: "P. L.",
139
- last_name: "Kahn" },
140
- { first_name: "N. S.",
141
- last_name: "Upham" }
168
+ { first_name: "J. P.",
169
+ last_name: "Colella" },
170
+ { first_name: "P. L.",
171
+ last_name: "Kahn" },
172
+ { first_name: "N. S.",
173
+ last_name: "Upham" }
142
174
  ],
143
175
  metadata_providers: [
144
176
  { first_name: "Dmitry",
145
177
  last_name: "Mozzherin",
146
178
  email: "dmozzherin@gmail.com" }
147
179
  ],
148
- abstract: "Mammal Diversity Database. 2020. www.mammaldiversity.org. " \
149
- "American Society of Mammalogists. Accessed 2020-05-24 .",
150
- url: @url
180
+ abstract: "Mammal Diversity Database. 2021. www.mammaldiversity.org. " \
181
+ "American Society of Mammalogists. Accessed 2021-01-28.", url: @url
151
182
  }
152
183
  super
184
+ end
153
185
  end
154
186
  end
155
- end