dwca_hunter 0.5.1 → 0.5.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.byebug_history +8 -0
- data/.ruby-version +1 -1
- data/Gemfile.lock +101 -43
- data/dwca_hunter.gemspec +2 -1
- data/ipni.csv.gz +0 -0
- data/ipniWebName.csv.xz?dl=1 +0 -0
- data/lib/dwca_hunter/resources/ipni.rb +110 -0
- data/lib/dwca_hunter/version.rb +1 -1
- metadata +21 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fb05e834a8403ae6b6cce3fc9c913b38d7111808ec355f4ad659d74b3960697f
|
4
|
+
data.tar.gz: cb30906b445212fa52d4ab536610d77050bcfa417fdbb0546c93d5f004a266d9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8ee016bf36ca9bab6ed6d65475b52e231c13cae276482ac34787b5680d3953ca5672b73428b33e1834823ad90411464567617b9a7bf25f359a09e5f6f7a8122c
|
7
|
+
data.tar.gz: 233dd03050a99fc016e1c78326d76476f0fbf3f81743a28c1ec842e4b5755a2162eb1951035192149b9da537811dedad20c046b31e962eeeabd89550b146cec5
|
data/.byebug_history
CHANGED
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
2.5.
|
1
|
+
2.5.3
|
data/Gemfile.lock
CHANGED
@@ -1,128 +1,186 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
dwca_hunter (0.5.
|
4
|
+
dwca_hunter (0.5.2)
|
5
5
|
biodiversity (~> 3.5)
|
6
6
|
dwc-archive (~> 1.0)
|
7
7
|
gn_uuid (~> 0.5)
|
8
|
+
google-cloud-storage (~> 1.23)
|
8
9
|
htmlentities (~> 4.3)
|
9
10
|
nokogiri (~> 1.8)
|
10
11
|
rest-client (~> 2.0)
|
12
|
+
ruby-xz (~> 1.0)
|
11
13
|
thor (~> 0.19)
|
12
14
|
|
13
15
|
GEM
|
14
16
|
remote: http://rubygems.org/
|
15
17
|
specs:
|
18
|
+
addressable (2.7.0)
|
19
|
+
public_suffix (>= 2.0.2, < 5.0)
|
16
20
|
ast (2.4.0)
|
17
|
-
|
21
|
+
backport (0.3.0)
|
22
|
+
biodiversity (3.5.1)
|
18
23
|
gn_uuid (~> 0.5)
|
19
24
|
parallel (~> 1.12)
|
20
25
|
treetop (~> 1.6)
|
21
26
|
unicode_utils (~> 1.4)
|
22
27
|
byebug (10.0.2)
|
23
|
-
coderay (1.1.2)
|
24
28
|
coveralls (0.8.22)
|
25
29
|
json (>= 1.8, < 3)
|
26
30
|
simplecov (~> 0.16.1)
|
27
31
|
term-ansicolor (~> 1.3)
|
28
32
|
thor (~> 0.19.4)
|
29
33
|
tins (~> 1.6)
|
34
|
+
declarative (0.0.10)
|
35
|
+
declarative-option (0.1.0)
|
30
36
|
diff-lcs (1.3)
|
37
|
+
digest-crc (0.4.1)
|
31
38
|
docile (1.3.1)
|
32
|
-
domain_name (0.5.
|
39
|
+
domain_name (0.5.20190701)
|
33
40
|
unf (>= 0.0.5, < 1.0.0)
|
34
41
|
dwc-archive (1.0.1)
|
35
42
|
nokogiri (~> 1.8)
|
36
43
|
parsley-store (~> 0.3)
|
37
|
-
|
38
|
-
|
44
|
+
faraday (0.17.0)
|
45
|
+
multipart-post (>= 1.2, < 3)
|
46
|
+
gn_uuid (0.5.1)
|
47
|
+
google-api-client (0.34.1)
|
48
|
+
addressable (~> 2.5, >= 2.5.1)
|
49
|
+
googleauth (~> 0.9)
|
50
|
+
httpclient (>= 2.8.1, < 3.0)
|
51
|
+
mini_mime (~> 1.0)
|
52
|
+
representable (~> 3.0)
|
53
|
+
retriable (>= 2.0, < 4.0)
|
54
|
+
signet (~> 0.12)
|
55
|
+
google-cloud-core (1.4.1)
|
56
|
+
google-cloud-env (~> 1.0)
|
57
|
+
google-cloud-env (1.3.0)
|
58
|
+
faraday (~> 0.11)
|
59
|
+
google-cloud-storage (1.23.0)
|
60
|
+
addressable (~> 2.5)
|
61
|
+
digest-crc (~> 0.4)
|
62
|
+
google-api-client (~> 0.33)
|
63
|
+
google-cloud-core (~> 1.2)
|
64
|
+
googleauth (~> 0.9)
|
65
|
+
mini_mime (~> 1.0)
|
66
|
+
googleauth (0.10.0)
|
67
|
+
faraday (~> 0.12)
|
68
|
+
jwt (>= 1.4, < 3.0)
|
69
|
+
memoist (~> 0.16)
|
70
|
+
multi_json (~> 1.11)
|
71
|
+
os (>= 0.9, < 2.0)
|
72
|
+
signet (~> 0.12)
|
39
73
|
htmlentities (4.3.4)
|
74
|
+
http-accept (1.7.0)
|
40
75
|
http-cookie (1.0.3)
|
41
76
|
domain_name (~> 0.5)
|
42
|
-
|
43
|
-
|
77
|
+
httpclient (2.8.3)
|
78
|
+
jaro_winkler (1.5.2)
|
79
|
+
json (2.2.0)
|
80
|
+
jwt (2.2.1)
|
44
81
|
kramdown (1.17.0)
|
45
|
-
|
82
|
+
memoist (0.16.1)
|
83
|
+
mime-types (3.3)
|
46
84
|
mime-types-data (~> 3.2015)
|
47
|
-
mime-types-data (3.
|
48
|
-
|
85
|
+
mime-types-data (3.2019.1009)
|
86
|
+
mini_mime (1.0.2)
|
87
|
+
mini_portile2 (2.4.0)
|
88
|
+
multi_json (1.14.1)
|
89
|
+
multipart-post (2.1.1)
|
49
90
|
netrc (0.11.0)
|
50
|
-
nokogiri (1.
|
51
|
-
mini_portile2 (~> 2.
|
52
|
-
|
53
|
-
|
91
|
+
nokogiri (1.10.1)
|
92
|
+
mini_portile2 (~> 2.4.0)
|
93
|
+
os (1.0.1)
|
94
|
+
parallel (1.14.0)
|
95
|
+
parser (2.6.0.0)
|
54
96
|
ast (~> 2.4.0)
|
55
97
|
parsley-store (0.3.6)
|
56
98
|
biodiversity (~> 3.1)
|
57
99
|
redis (~> 3.0)
|
58
100
|
polyglot (0.3.5)
|
59
101
|
powerpack (0.1.2)
|
102
|
+
psych (3.1.0)
|
103
|
+
public_suffix (4.0.1)
|
60
104
|
rainbow (3.0.0)
|
61
|
-
rake (12.3.
|
105
|
+
rake (12.3.2)
|
62
106
|
redis (3.3.5)
|
63
|
-
|
107
|
+
representable (3.0.4)
|
108
|
+
declarative (< 0.1.0)
|
109
|
+
declarative-option (< 0.2.0)
|
110
|
+
uber (< 0.2.0)
|
111
|
+
rest-client (2.1.0)
|
112
|
+
http-accept (>= 1.7.0, < 2.0)
|
64
113
|
http-cookie (>= 1.0.2, < 2.0)
|
65
114
|
mime-types (>= 1.16, < 4.0)
|
66
115
|
netrc (~> 0.8)
|
116
|
+
retriable (3.1.2)
|
67
117
|
reverse_markdown (1.1.0)
|
68
118
|
nokogiri
|
69
|
-
rspec (3.
|
70
|
-
rspec-core (~> 3.
|
71
|
-
rspec-expectations (~> 3.
|
72
|
-
rspec-mocks (~> 3.
|
73
|
-
rspec-core (3.
|
74
|
-
rspec-support (~> 3.
|
75
|
-
rspec-expectations (3.
|
119
|
+
rspec (3.8.0)
|
120
|
+
rspec-core (~> 3.8.0)
|
121
|
+
rspec-expectations (~> 3.8.0)
|
122
|
+
rspec-mocks (~> 3.8.0)
|
123
|
+
rspec-core (3.8.0)
|
124
|
+
rspec-support (~> 3.8.0)
|
125
|
+
rspec-expectations (3.8.2)
|
76
126
|
diff-lcs (>= 1.2.0, < 2.0)
|
77
|
-
rspec-support (~> 3.
|
78
|
-
rspec-mocks (3.
|
127
|
+
rspec-support (~> 3.8.0)
|
128
|
+
rspec-mocks (3.8.0)
|
79
129
|
diff-lcs (>= 1.2.0, < 2.0)
|
80
|
-
rspec-support (~> 3.
|
81
|
-
rspec-support (3.
|
82
|
-
rubocop (0.
|
130
|
+
rspec-support (~> 3.8.0)
|
131
|
+
rspec-support (3.8.0)
|
132
|
+
rubocop (0.65.0)
|
83
133
|
jaro_winkler (~> 1.5.1)
|
84
134
|
parallel (~> 1.10)
|
85
135
|
parser (>= 2.5, != 2.5.1.1)
|
86
136
|
powerpack (~> 0.1)
|
137
|
+
psych (>= 3.1.0)
|
87
138
|
rainbow (>= 2.2.2, < 4.0)
|
88
139
|
ruby-progressbar (~> 1.7)
|
89
|
-
unicode-display_width (~> 1.
|
90
|
-
ruby-progressbar (1.
|
140
|
+
unicode-display_width (~> 1.4.0)
|
141
|
+
ruby-progressbar (1.10.0)
|
142
|
+
ruby-xz (1.0.0)
|
143
|
+
signet (0.12.0)
|
144
|
+
addressable (~> 2.3)
|
145
|
+
faraday (~> 0.9)
|
146
|
+
jwt (>= 1.5, < 3.0)
|
147
|
+
multi_json (~> 1.10)
|
91
148
|
simplecov (0.16.1)
|
92
149
|
docile (~> 1.1)
|
93
150
|
json (>= 1.8, < 3)
|
94
151
|
simplecov-html (~> 0.10.0)
|
95
152
|
simplecov-html (0.10.2)
|
96
|
-
solargraph (0.
|
97
|
-
|
98
|
-
eventmachine (~> 1.2, >= 1.2.5)
|
153
|
+
solargraph (0.31.3)
|
154
|
+
backport (~> 0.3)
|
99
155
|
htmlentities (~> 4.3, >= 4.3.4)
|
156
|
+
jaro_winkler (~> 1.5)
|
100
157
|
kramdown (~> 1.16)
|
101
|
-
parser (~> 2.
|
158
|
+
parser (~> 2.3)
|
102
159
|
reverse_markdown (~> 1.0, >= 1.0.5)
|
103
160
|
rubocop (~> 0.52)
|
104
161
|
thor (~> 0.19, >= 0.19.4)
|
105
162
|
tilt (~> 2.0)
|
106
163
|
yard (~> 0.9)
|
107
|
-
term-ansicolor (1.
|
164
|
+
term-ansicolor (1.7.1)
|
108
165
|
tins (~> 1.0)
|
109
166
|
thor (0.19.4)
|
110
|
-
tilt (2.0.
|
111
|
-
tins (1.
|
167
|
+
tilt (2.0.9)
|
168
|
+
tins (1.20.2)
|
112
169
|
treetop (1.6.10)
|
113
170
|
polyglot (~> 0.3)
|
171
|
+
uber (0.1.0)
|
114
172
|
unf (0.1.4)
|
115
173
|
unf_ext
|
116
|
-
unf_ext (0.0.7.
|
117
|
-
unicode-display_width (1.4.
|
174
|
+
unf_ext (0.0.7.6)
|
175
|
+
unicode-display_width (1.4.1)
|
118
176
|
unicode_utils (1.4.0)
|
119
|
-
yard (0.9.
|
177
|
+
yard (0.9.18)
|
120
178
|
|
121
179
|
PLATFORMS
|
122
180
|
ruby
|
123
181
|
|
124
182
|
DEPENDENCIES
|
125
|
-
bundler (~>
|
183
|
+
bundler (~> 2.0)
|
126
184
|
byebug (~> 10.0)
|
127
185
|
coveralls (~> 0.8)
|
128
186
|
dwca_hunter!
|
@@ -132,4 +190,4 @@ DEPENDENCIES
|
|
132
190
|
solargraph (~> 0.23)
|
133
191
|
|
134
192
|
BUNDLED WITH
|
135
|
-
|
193
|
+
2.0.1
|
data/dwca_hunter.gemspec
CHANGED
@@ -32,9 +32,10 @@ Gem::Specification.new do |gem|
|
|
32
32
|
gem.add_dependency "htmlentities", "~> 4.3"
|
33
33
|
gem.add_dependency "nokogiri", "~> 1.8"
|
34
34
|
gem.add_dependency "rest-client", "~> 2.0"
|
35
|
+
gem.add_dependency "ruby-xz", "~> 1.0"
|
35
36
|
gem.add_dependency "thor", "~> 0.19"
|
36
37
|
|
37
|
-
gem.add_development_dependency "bundler", "~>
|
38
|
+
gem.add_development_dependency "bundler", "~> 2.0"
|
38
39
|
gem.add_development_dependency "byebug", "~> 10.0"
|
39
40
|
gem.add_development_dependency "coveralls", "~> 0.8"
|
40
41
|
gem.add_development_dependency "rake", "~> 12.3"
|
data/ipni.csv.gz
ADDED
Binary file
|
Binary file
|
@@ -0,0 +1,110 @@
|
|
1
|
+
require "xz"
|
2
|
+
|
3
|
+
module DwcaHunter
|
4
|
+
# Resource for FishBase
|
5
|
+
class ResourceIPNI < DwcaHunter::Resource
|
6
|
+
attr_reader :title, :abbr
|
7
|
+
def initialize(opts = {}) #download: false, unpack: false})
|
8
|
+
@command = "ipni"
|
9
|
+
@title = "The International Plant Names Index"
|
10
|
+
@abbr = "IPNI"
|
11
|
+
@url = "https://www.dropbox.com/s/1n0sn80vkdir5nu/ipniWebName.csv.xz"
|
12
|
+
@uuid = "6b3905ce-5025-49f3-9697-ddd5bdfb4ff0"
|
13
|
+
@download_path = File.join(Dir.tmpdir, "dwca_hunter", "ipni",
|
14
|
+
"ipni.csv.xz")
|
15
|
+
@extensions = []
|
16
|
+
super
|
17
|
+
end
|
18
|
+
|
19
|
+
def unpack
|
20
|
+
puts "Unpacking #{@download_path}"
|
21
|
+
XZ.decompress_file(@download_path, @download_path[0...-3] )
|
22
|
+
end
|
23
|
+
|
24
|
+
def download
|
25
|
+
puts "Downloading cached verion of the file. Get daily updated one from"
|
26
|
+
puts "https://storage.cloud.google.com/ipni-data/ipniWebName.csv.xz"
|
27
|
+
`curl -s -L #{@url} -o #{@download_path}`
|
28
|
+
end
|
29
|
+
|
30
|
+
def make_dwca
|
31
|
+
organize_data
|
32
|
+
generate_dwca
|
33
|
+
end
|
34
|
+
|
35
|
+
private
|
36
|
+
|
37
|
+
def organize_data
|
38
|
+
DwcaHunter::logger_write(self.object_id,
|
39
|
+
"Organizing data")
|
40
|
+
# snp = ScientificNameParser.new
|
41
|
+
@data = CSV.open(@download_path[0...-3],
|
42
|
+
col_sep: "|", quote_char: "щ", headers: true)
|
43
|
+
.each_with_object([]) do |row, data|
|
44
|
+
name = row['taxon_scientific_name_s_lower'].strip
|
45
|
+
au = row['authors_t'].to_s.strip
|
46
|
+
name = "#{name} #{au}" if au != ''
|
47
|
+
id = row["id"].split(":")[-1]
|
48
|
+
data << { taxon_id: id,
|
49
|
+
local_id: id,
|
50
|
+
family: row["family_s_lower"],
|
51
|
+
genus: row["genus_s_lower"],
|
52
|
+
scientific_name: name,
|
53
|
+
rank: row["rank_s_alphanum"]
|
54
|
+
}
|
55
|
+
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def generate_dwca
|
60
|
+
DwcaHunter::logger_write(self.object_id,
|
61
|
+
'Creating DarwinCore Archive file')
|
62
|
+
core_init
|
63
|
+
eml_init
|
64
|
+
DwcaHunter::logger_write(self.object_id, 'Assembling Core Data')
|
65
|
+
count = 0
|
66
|
+
@data.each do |d|
|
67
|
+
count += 1
|
68
|
+
if count % 10000 == 0
|
69
|
+
DwcaHunter::logger_write(self.object_id, "Core row #{count}")
|
70
|
+
end
|
71
|
+
@core << [d[:taxon_id], d[:local_id],
|
72
|
+
d[:scientific_name], d[:rank],
|
73
|
+
d[:family], d[:genus]]
|
74
|
+
end
|
75
|
+
super
|
76
|
+
end
|
77
|
+
|
78
|
+
def eml_init
|
79
|
+
@eml = {
|
80
|
+
id: @uuid,
|
81
|
+
title: @title,
|
82
|
+
authors: [],
|
83
|
+
metadata_providers: [
|
84
|
+
{ first_name: "Dmitry",
|
85
|
+
last_name: "Mozzherin",
|
86
|
+
}
|
87
|
+
],
|
88
|
+
abstract: "The International Plant Names Index (IPNI) is a database " \
|
89
|
+
"of the names and associated basic bibliographical " \
|
90
|
+
"details of seed plants, ferns and lycophytes. Its goal " \
|
91
|
+
"is to eliminate the need for repeated reference to " \
|
92
|
+
"primary sources for basic bibliographic information " \
|
93
|
+
"about plant names. The data are freely available and are " \
|
94
|
+
"gradually being standardized and checked. IPNI will be a " \
|
95
|
+
"dynamic resource, depending on direct contributions by " \
|
96
|
+
"all members of the botanical community.",
|
97
|
+
url: "http://www.ipni.org"
|
98
|
+
}
|
99
|
+
end
|
100
|
+
|
101
|
+
def core_init
|
102
|
+
@core = [["http://rs.tdwg.org/dwc/terms/taxonID",
|
103
|
+
"http://globalnames.org/terms/localID",
|
104
|
+
"http://rs.tdwg.org/dwc/terms/scientificName",
|
105
|
+
"http://rs.tdwg.org/dwc/terms/taxonRank",
|
106
|
+
"http://rs.tdwg.org/dwc/terms/family",
|
107
|
+
"http://rs.tdwg.org/dwc/terms/genus"]]
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
data/lib/dwca_hunter/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dwca_hunter
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dmitry Mozzherin
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2019-11-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: biodiversity
|
@@ -94,6 +94,20 @@ dependencies:
|
|
94
94
|
- - "~>"
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: '2.0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: ruby-xz
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - "~>"
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '1.0'
|
104
|
+
type: :runtime
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - "~>"
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '1.0'
|
97
111
|
- !ruby/object:Gem::Dependency
|
98
112
|
name: thor
|
99
113
|
requirement: !ruby/object:Gem::Requirement
|
@@ -114,14 +128,14 @@ dependencies:
|
|
114
128
|
requirements:
|
115
129
|
- - "~>"
|
116
130
|
- !ruby/object:Gem::Version
|
117
|
-
version: '
|
131
|
+
version: '2.0'
|
118
132
|
type: :development
|
119
133
|
prerelease: false
|
120
134
|
version_requirements: !ruby/object:Gem::Requirement
|
121
135
|
requirements:
|
122
136
|
- - "~>"
|
123
137
|
- !ruby/object:Gem::Version
|
124
|
-
version: '
|
138
|
+
version: '2.0'
|
125
139
|
- !ruby/object:Gem::Dependency
|
126
140
|
name: byebug
|
127
141
|
requirement: !ruby/object:Gem::Requirement
|
@@ -233,6 +247,8 @@ files:
|
|
233
247
|
- files/fishbase_taxon_cache.tsv
|
234
248
|
- files/reptile_checklist_2014_12.csv
|
235
249
|
- files/species-black.txt
|
250
|
+
- ipni.csv.gz
|
251
|
+
- ipniWebName.csv.xz?dl=1
|
236
252
|
- lib/dwca_hunter.rb
|
237
253
|
- lib/dwca_hunter/downloader.rb
|
238
254
|
- lib/dwca_hunter/encoding.rb
|
@@ -242,6 +258,7 @@ files:
|
|
242
258
|
- lib/dwca_hunter/resources/fishbase.rb
|
243
259
|
- lib/dwca_hunter/resources/freebase.rb
|
244
260
|
- lib/dwca_hunter/resources/gnub.rb
|
261
|
+
- lib/dwca_hunter/resources/ipni.rb
|
245
262
|
- lib/dwca_hunter/resources/itis.rb
|
246
263
|
- lib/dwca_hunter/resources/mammal_species.rb
|
247
264
|
- lib/dwca_hunter/resources/ncbi.rb
|