dwca_hunter 0.5.3 → 0.5.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.ruby-version +1 -1
- data/Gemfile.lock +2 -2
- data/lib/dwca_hunter/resources/arctos.rb +56 -85
- data/lib/dwca_hunter/version.rb +1 -1
- metadata +3 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6c7057b88df4f16a74e5818f1f9966183968844624143c68d43715c8569adb4c
|
4
|
+
data.tar.gz: 47b5a3b28b22a18fb8dff65a095775e7535f098d7cebcc60a199b82d02f8b9dc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b780161f3c024dfe6155028fc71c8762e83a95f8dd0f9158d5d387f0cbb77cd6525d5abb5137d8d73ed42093ccae897e38da8e8d2a13bf5b10bec1fae9f68424
|
7
|
+
data.tar.gz: 3c8cfa6603b2cc8bac0766568168ed6016e3d509e62e88b47704c4b0e2662f332de230da4cbe1c8ef0cb1467ce5b6e987260875e9b160efde1e2cff169cae263
|
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
2.
|
1
|
+
2.6.4
|
data/Gemfile.lock
CHANGED
@@ -5,7 +5,7 @@ module DwcaHunter
|
|
5
5
|
def initialize(opts = {})
|
6
6
|
@command = 'arctos'
|
7
7
|
@title = 'Arctos'
|
8
|
-
@url = 'https://www.dropbox.com/s/
|
8
|
+
@url = 'https://www.dropbox.com/s/3rmny5d8cfm9mmp/arctos.tar.gz?dl=1'
|
9
9
|
@UUID = 'eea8315d-a244-4625-859a-226675622312'
|
10
10
|
@download_path = File.join(Dir.tmpdir,
|
11
11
|
'dwca_hunter',
|
@@ -15,6 +15,8 @@ module DwcaHunter
|
|
15
15
|
@names = []
|
16
16
|
@vernaculars = []
|
17
17
|
@extensions = []
|
18
|
+
@synonyms_hash = {}
|
19
|
+
@vernaculars_hash = {}
|
18
20
|
super(opts)
|
19
21
|
end
|
20
22
|
|
@@ -24,7 +26,7 @@ module DwcaHunter
|
|
24
26
|
end
|
25
27
|
|
26
28
|
def unpack
|
27
|
-
|
29
|
+
unpack_tar
|
28
30
|
end
|
29
31
|
|
30
32
|
def make_dwca
|
@@ -37,92 +39,72 @@ module DwcaHunter
|
|
37
39
|
|
38
40
|
def get_names
|
39
41
|
Dir.chdir(@download_dir)
|
40
|
-
Dir.entries(@download_dir).grep(/zip$/).each do |file|
|
41
|
-
self.class.unzip(file) unless File.exists?(file.gsub(/zip$/,'csv'))
|
42
|
-
end
|
43
42
|
collect_names
|
44
43
|
collect_synonyms
|
45
44
|
collect_vernaculars
|
46
45
|
end
|
47
46
|
|
48
47
|
def collect_vernaculars
|
49
|
-
file = open(File.join(@download_dir, '
|
50
|
-
|
48
|
+
file = CSV.open(File.join(@download_dir, 'common_name.csv'),
|
49
|
+
headers: true)
|
51
50
|
file.each_with_index do |row, i|
|
52
51
|
|
53
|
-
|
54
|
-
|
55
|
-
next
|
56
|
-
end
|
57
|
-
|
58
|
-
row = split_row(row)
|
59
|
-
|
60
|
-
taxon_id = row[fields[:taxon_name_id]]
|
61
|
-
vernacular_name_string = row[fields[:common_name]]
|
52
|
+
canonical = row['SCIENTIFIC_NAME']
|
53
|
+
vernacular_name_string = row['COMMON_NAME']
|
62
54
|
|
63
|
-
@
|
64
|
-
|
65
|
-
|
66
|
-
|
55
|
+
if @vernaculars_hash.has_key?(canonical)
|
56
|
+
@vernaculars_hash[canonical] << vernacular_name_string
|
57
|
+
else
|
58
|
+
@vernaculars_hash[canonical] = [vernacular_name_string]
|
59
|
+
end
|
67
60
|
|
68
61
|
puts "Processed %s vernaculars" % i if i % 10000 == 0
|
69
62
|
end
|
70
63
|
end
|
71
64
|
|
72
65
|
def collect_synonyms
|
73
|
-
file = open(File.join(@download_dir, '
|
74
|
-
|
66
|
+
file = CSV.open(File.join(@download_dir, 'relationships.csv'),
|
67
|
+
headers: true)
|
75
68
|
file.each_with_index do |row, i|
|
76
|
-
|
77
|
-
|
78
|
-
|
69
|
+
canonical = row['scientific_name']
|
70
|
+
if @synonyms_hash.has_key?(canonical)
|
71
|
+
@synonyms_hash[canonical] <<
|
72
|
+
{ synonym: row['related_name'], status: row['TAXON_RELATIONSHIP']}
|
73
|
+
else
|
74
|
+
@synonyms_hash[canonical] = [
|
75
|
+
{ synonym: row['related_name'], status: row['TAXON_RELATIONSHIP']}
|
76
|
+
]
|
79
77
|
end
|
80
|
-
|
81
|
-
row = split_row(row)
|
82
|
-
taxon_id = row[fields[:taxon_name_id]]
|
83
|
-
@synonyms << {
|
84
|
-
taxon_id: row[fields[:related_taxon_name_id]],
|
85
|
-
local_id: taxon_id,
|
86
|
-
name_string: @names_index[taxon_id],
|
87
|
-
#synonym_authority: row[fields[:relation_authority]],
|
88
|
-
taxonomic_status: row[fields[:taxon_relationship]],
|
89
|
-
}
|
90
78
|
puts "Processed %s synonyms" % i if i % 10000 == 0
|
91
79
|
end
|
92
80
|
end
|
93
81
|
|
94
82
|
def collect_names
|
95
83
|
@names_index = {}
|
96
|
-
file = open(File.join(@download_dir, '
|
97
|
-
|
84
|
+
file = CSV.open(File.join(@download_dir, 'classification.csv'),
|
85
|
+
headers: true)
|
98
86
|
file.each_with_index do |row, i|
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
subgenus = row[fields[:subgenus]]
|
120
|
-
species = row[fields[:species]]
|
121
|
-
subspecies = row[fields[:subspecies]]
|
122
|
-
code = row[fields[:nomenclatural_code]]
|
123
|
-
|
87
|
+
next unless row['display_name']
|
88
|
+
name_string = row['display_name'].gsub(/<\/?i>/,'')
|
89
|
+
canonical = row['scientific_name']
|
90
|
+
kingdom = row['kingdom']
|
91
|
+
phylum = row['phylum']
|
92
|
+
klass = row['phylclass']
|
93
|
+
subclass = row['subclass']
|
94
|
+
order = row['phylorder']
|
95
|
+
suborder = row['suborder']
|
96
|
+
superfamily = row['superfamily']
|
97
|
+
family = row['family']
|
98
|
+
subfamily = row['subfamily']
|
99
|
+
tribe = row['tribe']
|
100
|
+
genus = row['genus']
|
101
|
+
subgenus = row['subgenus']
|
102
|
+
species = row['species']
|
103
|
+
subspecies = row['subspecies']
|
104
|
+
code = row['nomenclatural_code']
|
105
|
+
|
106
|
+
taxon_id = "ARCT_#{i}"
|
124
107
|
@names << { taxon_id: taxon_id,
|
125
|
-
local_id: taxon_id,
|
126
108
|
name_string: name_string,
|
127
109
|
kingdom: kingdom,
|
128
110
|
phylum: phylum,
|
@@ -133,37 +115,26 @@ module DwcaHunter
|
|
133
115
|
code: code,
|
134
116
|
}
|
135
117
|
|
136
|
-
|
118
|
+
update_vernacular(taxon_id, canonical)
|
119
|
+
update_synonym(taxon_id, canonical)
|
137
120
|
puts "Processed %s names" % i if i % 10000 == 0
|
138
121
|
end
|
139
122
|
end
|
140
123
|
|
141
|
-
def
|
142
|
-
|
143
|
-
|
124
|
+
def update_vernacular(taxon_id, canonical)
|
125
|
+
return unless @vernaculars_hash.has_key?(canonical)
|
126
|
+
@vernaculars_hash[canonical].each do |vern|
|
127
|
+
@vernaculars << [taxon_id, vern, 'en']
|
128
|
+
end
|
144
129
|
end
|
145
130
|
|
146
|
-
def
|
147
|
-
|
148
|
-
|
149
|
-
:
|
150
|
-
:undef => :replace,
|
151
|
-
:replace => '',
|
152
|
-
:universal_newline => true
|
153
|
-
}
|
154
|
-
num_ary = (0...row.size).to_a
|
155
|
-
row = row.map do |f|
|
156
|
-
f = f.strip.downcase
|
157
|
-
f = f.encode ::Encoding.find('ASCII'), encoding_options
|
158
|
-
f.to_sym
|
131
|
+
def update_synonym(taxon_id, canonical)
|
132
|
+
return unless @synonyms_hash.has_key?(canonical)
|
133
|
+
@synonyms_hash[canonical].each do |syn|
|
134
|
+
@synonyms << [taxon_id, syn[:synonym], syn[:status]]
|
159
135
|
end
|
160
|
-
res = Hash[row.zip(num_ary)]
|
161
|
-
require 'byebug'; byebug
|
162
|
-
puts ''
|
163
|
-
res
|
164
136
|
end
|
165
137
|
|
166
|
-
|
167
138
|
def generate_dwca
|
168
139
|
DwcaHunter::logger_write(self.object_id,
|
169
140
|
'Creating DarwinCore Archive file')
|
data/lib/dwca_hunter/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dwca_hunter
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dmitry Mozzherin
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-11-
|
11
|
+
date: 2019-11-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: biodiversity
|
@@ -284,8 +284,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
284
284
|
- !ruby/object:Gem::Version
|
285
285
|
version: '0'
|
286
286
|
requirements: []
|
287
|
-
|
288
|
-
rubygems_version: 2.7.6
|
287
|
+
rubygems_version: 3.0.3
|
289
288
|
signing_key:
|
290
289
|
specification_version: 4
|
291
290
|
summary: Converts a variety of available online resources to DarwinCore Archive files.
|