dwca_hunter 0.5.3 → 0.5.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.ruby-version +1 -1
- data/Gemfile.lock +2 -2
- data/lib/dwca_hunter/resources/arctos.rb +56 -85
- data/lib/dwca_hunter/version.rb +1 -1
- metadata +3 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6c7057b88df4f16a74e5818f1f9966183968844624143c68d43715c8569adb4c
|
4
|
+
data.tar.gz: 47b5a3b28b22a18fb8dff65a095775e7535f098d7cebcc60a199b82d02f8b9dc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b780161f3c024dfe6155028fc71c8762e83a95f8dd0f9158d5d387f0cbb77cd6525d5abb5137d8d73ed42093ccae897e38da8e8d2a13bf5b10bec1fae9f68424
|
7
|
+
data.tar.gz: 3c8cfa6603b2cc8bac0766568168ed6016e3d509e62e88b47704c4b0e2662f332de230da4cbe1c8ef0cb1467ce5b6e987260875e9b160efde1e2cff169cae263
|
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
2.
|
1
|
+
2.6.4
|
data/Gemfile.lock
CHANGED
@@ -5,7 +5,7 @@ module DwcaHunter
|
|
5
5
|
def initialize(opts = {})
|
6
6
|
@command = 'arctos'
|
7
7
|
@title = 'Arctos'
|
8
|
-
@url = 'https://www.dropbox.com/s/
|
8
|
+
@url = 'https://www.dropbox.com/s/3rmny5d8cfm9mmp/arctos.tar.gz?dl=1'
|
9
9
|
@UUID = 'eea8315d-a244-4625-859a-226675622312'
|
10
10
|
@download_path = File.join(Dir.tmpdir,
|
11
11
|
'dwca_hunter',
|
@@ -15,6 +15,8 @@ module DwcaHunter
|
|
15
15
|
@names = []
|
16
16
|
@vernaculars = []
|
17
17
|
@extensions = []
|
18
|
+
@synonyms_hash = {}
|
19
|
+
@vernaculars_hash = {}
|
18
20
|
super(opts)
|
19
21
|
end
|
20
22
|
|
@@ -24,7 +26,7 @@ module DwcaHunter
|
|
24
26
|
end
|
25
27
|
|
26
28
|
def unpack
|
27
|
-
|
29
|
+
unpack_tar
|
28
30
|
end
|
29
31
|
|
30
32
|
def make_dwca
|
@@ -37,92 +39,72 @@ module DwcaHunter
|
|
37
39
|
|
38
40
|
def get_names
|
39
41
|
Dir.chdir(@download_dir)
|
40
|
-
Dir.entries(@download_dir).grep(/zip$/).each do |file|
|
41
|
-
self.class.unzip(file) unless File.exists?(file.gsub(/zip$/,'csv'))
|
42
|
-
end
|
43
42
|
collect_names
|
44
43
|
collect_synonyms
|
45
44
|
collect_vernaculars
|
46
45
|
end
|
47
46
|
|
48
47
|
def collect_vernaculars
|
49
|
-
file = open(File.join(@download_dir, '
|
50
|
-
|
48
|
+
file = CSV.open(File.join(@download_dir, 'common_name.csv'),
|
49
|
+
headers: true)
|
51
50
|
file.each_with_index do |row, i|
|
52
51
|
|
53
|
-
|
54
|
-
|
55
|
-
next
|
56
|
-
end
|
57
|
-
|
58
|
-
row = split_row(row)
|
59
|
-
|
60
|
-
taxon_id = row[fields[:taxon_name_id]]
|
61
|
-
vernacular_name_string = row[fields[:common_name]]
|
52
|
+
canonical = row['SCIENTIFIC_NAME']
|
53
|
+
vernacular_name_string = row['COMMON_NAME']
|
62
54
|
|
63
|
-
@
|
64
|
-
|
65
|
-
|
66
|
-
|
55
|
+
if @vernaculars_hash.has_key?(canonical)
|
56
|
+
@vernaculars_hash[canonical] << vernacular_name_string
|
57
|
+
else
|
58
|
+
@vernaculars_hash[canonical] = [vernacular_name_string]
|
59
|
+
end
|
67
60
|
|
68
61
|
puts "Processed %s vernaculars" % i if i % 10000 == 0
|
69
62
|
end
|
70
63
|
end
|
71
64
|
|
72
65
|
def collect_synonyms
|
73
|
-
file = open(File.join(@download_dir, '
|
74
|
-
|
66
|
+
file = CSV.open(File.join(@download_dir, 'relationships.csv'),
|
67
|
+
headers: true)
|
75
68
|
file.each_with_index do |row, i|
|
76
|
-
|
77
|
-
|
78
|
-
|
69
|
+
canonical = row['scientific_name']
|
70
|
+
if @synonyms_hash.has_key?(canonical)
|
71
|
+
@synonyms_hash[canonical] <<
|
72
|
+
{ synonym: row['related_name'], status: row['TAXON_RELATIONSHIP']}
|
73
|
+
else
|
74
|
+
@synonyms_hash[canonical] = [
|
75
|
+
{ synonym: row['related_name'], status: row['TAXON_RELATIONSHIP']}
|
76
|
+
]
|
79
77
|
end
|
80
|
-
|
81
|
-
row = split_row(row)
|
82
|
-
taxon_id = row[fields[:taxon_name_id]]
|
83
|
-
@synonyms << {
|
84
|
-
taxon_id: row[fields[:related_taxon_name_id]],
|
85
|
-
local_id: taxon_id,
|
86
|
-
name_string: @names_index[taxon_id],
|
87
|
-
#synonym_authority: row[fields[:relation_authority]],
|
88
|
-
taxonomic_status: row[fields[:taxon_relationship]],
|
89
|
-
}
|
90
78
|
puts "Processed %s synonyms" % i if i % 10000 == 0
|
91
79
|
end
|
92
80
|
end
|
93
81
|
|
94
82
|
def collect_names
|
95
83
|
@names_index = {}
|
96
|
-
file = open(File.join(@download_dir, '
|
97
|
-
|
84
|
+
file = CSV.open(File.join(@download_dir, 'classification.csv'),
|
85
|
+
headers: true)
|
98
86
|
file.each_with_index do |row, i|
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
subgenus = row[fields[:subgenus]]
|
120
|
-
species = row[fields[:species]]
|
121
|
-
subspecies = row[fields[:subspecies]]
|
122
|
-
code = row[fields[:nomenclatural_code]]
|
123
|
-
|
87
|
+
next unless row['display_name']
|
88
|
+
name_string = row['display_name'].gsub(/<\/?i>/,'')
|
89
|
+
canonical = row['scientific_name']
|
90
|
+
kingdom = row['kingdom']
|
91
|
+
phylum = row['phylum']
|
92
|
+
klass = row['phylclass']
|
93
|
+
subclass = row['subclass']
|
94
|
+
order = row['phylorder']
|
95
|
+
suborder = row['suborder']
|
96
|
+
superfamily = row['superfamily']
|
97
|
+
family = row['family']
|
98
|
+
subfamily = row['subfamily']
|
99
|
+
tribe = row['tribe']
|
100
|
+
genus = row['genus']
|
101
|
+
subgenus = row['subgenus']
|
102
|
+
species = row['species']
|
103
|
+
subspecies = row['subspecies']
|
104
|
+
code = row['nomenclatural_code']
|
105
|
+
|
106
|
+
taxon_id = "ARCT_#{i}"
|
124
107
|
@names << { taxon_id: taxon_id,
|
125
|
-
local_id: taxon_id,
|
126
108
|
name_string: name_string,
|
127
109
|
kingdom: kingdom,
|
128
110
|
phylum: phylum,
|
@@ -133,37 +115,26 @@ module DwcaHunter
|
|
133
115
|
code: code,
|
134
116
|
}
|
135
117
|
|
136
|
-
|
118
|
+
update_vernacular(taxon_id, canonical)
|
119
|
+
update_synonym(taxon_id, canonical)
|
137
120
|
puts "Processed %s names" % i if i % 10000 == 0
|
138
121
|
end
|
139
122
|
end
|
140
123
|
|
141
|
-
def
|
142
|
-
|
143
|
-
|
124
|
+
def update_vernacular(taxon_id, canonical)
|
125
|
+
return unless @vernaculars_hash.has_key?(canonical)
|
126
|
+
@vernaculars_hash[canonical].each do |vern|
|
127
|
+
@vernaculars << [taxon_id, vern, 'en']
|
128
|
+
end
|
144
129
|
end
|
145
130
|
|
146
|
-
def
|
147
|
-
|
148
|
-
|
149
|
-
:
|
150
|
-
:undef => :replace,
|
151
|
-
:replace => '',
|
152
|
-
:universal_newline => true
|
153
|
-
}
|
154
|
-
num_ary = (0...row.size).to_a
|
155
|
-
row = row.map do |f|
|
156
|
-
f = f.strip.downcase
|
157
|
-
f = f.encode ::Encoding.find('ASCII'), encoding_options
|
158
|
-
f.to_sym
|
131
|
+
def update_synonym(taxon_id, canonical)
|
132
|
+
return unless @synonyms_hash.has_key?(canonical)
|
133
|
+
@synonyms_hash[canonical].each do |syn|
|
134
|
+
@synonyms << [taxon_id, syn[:synonym], syn[:status]]
|
159
135
|
end
|
160
|
-
res = Hash[row.zip(num_ary)]
|
161
|
-
require 'byebug'; byebug
|
162
|
-
puts ''
|
163
|
-
res
|
164
136
|
end
|
165
137
|
|
166
|
-
|
167
138
|
def generate_dwca
|
168
139
|
DwcaHunter::logger_write(self.object_id,
|
169
140
|
'Creating DarwinCore Archive file')
|
data/lib/dwca_hunter/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dwca_hunter
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dmitry Mozzherin
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-11-
|
11
|
+
date: 2019-11-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: biodiversity
|
@@ -284,8 +284,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
284
284
|
- !ruby/object:Gem::Version
|
285
285
|
version: '0'
|
286
286
|
requirements: []
|
287
|
-
|
288
|
-
rubygems_version: 2.7.6
|
287
|
+
rubygems_version: 3.0.3
|
289
288
|
signing_key:
|
290
289
|
specification_version: 4
|
291
290
|
summary: Converts a variety of available online resources to DarwinCore Archive files.
|