dwca_hunter 0.5.1 → 0.5.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e30e9b34ea1c46b021bd3c2ec66ccad4996d4a921c7ce78791b84940bd239f05
4
- data.tar.gz: 1be0e7119fd38094f94a53d71460a8f97a59f2e46a5e9740b814f5dcc97b42cd
3
+ metadata.gz: fb05e834a8403ae6b6cce3fc9c913b38d7111808ec355f4ad659d74b3960697f
4
+ data.tar.gz: cb30906b445212fa52d4ab536610d77050bcfa417fdbb0546c93d5f004a266d9
5
5
  SHA512:
6
- metadata.gz: df1d9bebe191ebf8ae72d601f05374edeaeffbb627d08d7981da582559439dd9ad173656d78a7c88b3a9765562efa8a7095c4eaa5ed0f1f3ef856be94f990b63
7
- data.tar.gz: 703bbf2d197a55a8d4e8510e940f77562540983bb5eddc7a7496ed77af5e610ce0ce5d95a1faa25878a8f8b12a17613808898453a0bd6b7aa19a87dc7c5f000e
6
+ metadata.gz: 8ee016bf36ca9bab6ed6d65475b52e231c13cae276482ac34787b5680d3953ca5672b73428b33e1834823ad90411464567617b9a7bf25f359a09e5f6f7a8122c
7
+ data.tar.gz: 233dd03050a99fc016e1c78326d76476f0fbf3f81743a28c1ec842e4b5755a2162eb1951035192149b9da537811dedad20c046b31e962eeeabd89550b146cec5
@@ -1,3 +1,11 @@
1
+ exit
2
+ pp row
3
+ require "pp"
4
+ pp row
5
+ row["id"]
6
+ row.id
7
+ row
8
+ data
1
9
  q
2
10
  c
3
11
  q
@@ -1 +1 @@
1
- 2.5.1
1
+ 2.5.3
@@ -1,128 +1,186 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- dwca_hunter (0.5.1)
4
+ dwca_hunter (0.5.2)
5
5
  biodiversity (~> 3.5)
6
6
  dwc-archive (~> 1.0)
7
7
  gn_uuid (~> 0.5)
8
+ google-cloud-storage (~> 1.23)
8
9
  htmlentities (~> 4.3)
9
10
  nokogiri (~> 1.8)
10
11
  rest-client (~> 2.0)
12
+ ruby-xz (~> 1.0)
11
13
  thor (~> 0.19)
12
14
 
13
15
  GEM
14
16
  remote: http://rubygems.org/
15
17
  specs:
18
+ addressable (2.7.0)
19
+ public_suffix (>= 2.0.2, < 5.0)
16
20
  ast (2.4.0)
17
- biodiversity (3.5.0)
21
+ backport (0.3.0)
22
+ biodiversity (3.5.1)
18
23
  gn_uuid (~> 0.5)
19
24
  parallel (~> 1.12)
20
25
  treetop (~> 1.6)
21
26
  unicode_utils (~> 1.4)
22
27
  byebug (10.0.2)
23
- coderay (1.1.2)
24
28
  coveralls (0.8.22)
25
29
  json (>= 1.8, < 3)
26
30
  simplecov (~> 0.16.1)
27
31
  term-ansicolor (~> 1.3)
28
32
  thor (~> 0.19.4)
29
33
  tins (~> 1.6)
34
+ declarative (0.0.10)
35
+ declarative-option (0.1.0)
30
36
  diff-lcs (1.3)
37
+ digest-crc (0.4.1)
31
38
  docile (1.3.1)
32
- domain_name (0.5.20180417)
39
+ domain_name (0.5.20190701)
33
40
  unf (>= 0.0.5, < 1.0.0)
34
41
  dwc-archive (1.0.1)
35
42
  nokogiri (~> 1.8)
36
43
  parsley-store (~> 0.3)
37
- eventmachine (1.2.7)
38
- gn_uuid (0.5.0)
44
+ faraday (0.17.0)
45
+ multipart-post (>= 1.2, < 3)
46
+ gn_uuid (0.5.1)
47
+ google-api-client (0.34.1)
48
+ addressable (~> 2.5, >= 2.5.1)
49
+ googleauth (~> 0.9)
50
+ httpclient (>= 2.8.1, < 3.0)
51
+ mini_mime (~> 1.0)
52
+ representable (~> 3.0)
53
+ retriable (>= 2.0, < 4.0)
54
+ signet (~> 0.12)
55
+ google-cloud-core (1.4.1)
56
+ google-cloud-env (~> 1.0)
57
+ google-cloud-env (1.3.0)
58
+ faraday (~> 0.11)
59
+ google-cloud-storage (1.23.0)
60
+ addressable (~> 2.5)
61
+ digest-crc (~> 0.4)
62
+ google-api-client (~> 0.33)
63
+ google-cloud-core (~> 1.2)
64
+ googleauth (~> 0.9)
65
+ mini_mime (~> 1.0)
66
+ googleauth (0.10.0)
67
+ faraday (~> 0.12)
68
+ jwt (>= 1.4, < 3.0)
69
+ memoist (~> 0.16)
70
+ multi_json (~> 1.11)
71
+ os (>= 0.9, < 2.0)
72
+ signet (~> 0.12)
39
73
  htmlentities (4.3.4)
74
+ http-accept (1.7.0)
40
75
  http-cookie (1.0.3)
41
76
  domain_name (~> 0.5)
42
- jaro_winkler (1.5.1)
43
- json (2.1.0)
77
+ httpclient (2.8.3)
78
+ jaro_winkler (1.5.2)
79
+ json (2.2.0)
80
+ jwt (2.2.1)
44
81
  kramdown (1.17.0)
45
- mime-types (3.1)
82
+ memoist (0.16.1)
83
+ mime-types (3.3)
46
84
  mime-types-data (~> 3.2015)
47
- mime-types-data (3.2016.0521)
48
- mini_portile2 (2.3.0)
85
+ mime-types-data (3.2019.1009)
86
+ mini_mime (1.0.2)
87
+ mini_portile2 (2.4.0)
88
+ multi_json (1.14.1)
89
+ multipart-post (2.1.1)
49
90
  netrc (0.11.0)
50
- nokogiri (1.8.4)
51
- mini_portile2 (~> 2.3.0)
52
- parallel (1.12.1)
53
- parser (2.5.1.2)
91
+ nokogiri (1.10.1)
92
+ mini_portile2 (~> 2.4.0)
93
+ os (1.0.1)
94
+ parallel (1.14.0)
95
+ parser (2.6.0.0)
54
96
  ast (~> 2.4.0)
55
97
  parsley-store (0.3.6)
56
98
  biodiversity (~> 3.1)
57
99
  redis (~> 3.0)
58
100
  polyglot (0.3.5)
59
101
  powerpack (0.1.2)
102
+ psych (3.1.0)
103
+ public_suffix (4.0.1)
60
104
  rainbow (3.0.0)
61
- rake (12.3.1)
105
+ rake (12.3.2)
62
106
  redis (3.3.5)
63
- rest-client (2.0.2)
107
+ representable (3.0.4)
108
+ declarative (< 0.1.0)
109
+ declarative-option (< 0.2.0)
110
+ uber (< 0.2.0)
111
+ rest-client (2.1.0)
112
+ http-accept (>= 1.7.0, < 2.0)
64
113
  http-cookie (>= 1.0.2, < 2.0)
65
114
  mime-types (>= 1.16, < 4.0)
66
115
  netrc (~> 0.8)
116
+ retriable (3.1.2)
67
117
  reverse_markdown (1.1.0)
68
118
  nokogiri
69
- rspec (3.7.0)
70
- rspec-core (~> 3.7.0)
71
- rspec-expectations (~> 3.7.0)
72
- rspec-mocks (~> 3.7.0)
73
- rspec-core (3.7.1)
74
- rspec-support (~> 3.7.0)
75
- rspec-expectations (3.7.0)
119
+ rspec (3.8.0)
120
+ rspec-core (~> 3.8.0)
121
+ rspec-expectations (~> 3.8.0)
122
+ rspec-mocks (~> 3.8.0)
123
+ rspec-core (3.8.0)
124
+ rspec-support (~> 3.8.0)
125
+ rspec-expectations (3.8.2)
76
126
  diff-lcs (>= 1.2.0, < 2.0)
77
- rspec-support (~> 3.7.0)
78
- rspec-mocks (3.7.0)
127
+ rspec-support (~> 3.8.0)
128
+ rspec-mocks (3.8.0)
79
129
  diff-lcs (>= 1.2.0, < 2.0)
80
- rspec-support (~> 3.7.0)
81
- rspec-support (3.7.1)
82
- rubocop (0.58.1)
130
+ rspec-support (~> 3.8.0)
131
+ rspec-support (3.8.0)
132
+ rubocop (0.65.0)
83
133
  jaro_winkler (~> 1.5.1)
84
134
  parallel (~> 1.10)
85
135
  parser (>= 2.5, != 2.5.1.1)
86
136
  powerpack (~> 0.1)
137
+ psych (>= 3.1.0)
87
138
  rainbow (>= 2.2.2, < 4.0)
88
139
  ruby-progressbar (~> 1.7)
89
- unicode-display_width (~> 1.0, >= 1.0.1)
90
- ruby-progressbar (1.9.0)
140
+ unicode-display_width (~> 1.4.0)
141
+ ruby-progressbar (1.10.0)
142
+ ruby-xz (1.0.0)
143
+ signet (0.12.0)
144
+ addressable (~> 2.3)
145
+ faraday (~> 0.9)
146
+ jwt (>= 1.5, < 3.0)
147
+ multi_json (~> 1.10)
91
148
  simplecov (0.16.1)
92
149
  docile (~> 1.1)
93
150
  json (>= 1.8, < 3)
94
151
  simplecov-html (~> 0.10.0)
95
152
  simplecov-html (0.10.2)
96
- solargraph (0.23.5)
97
- coderay (~> 1.1)
98
- eventmachine (~> 1.2, >= 1.2.5)
153
+ solargraph (0.31.3)
154
+ backport (~> 0.3)
99
155
  htmlentities (~> 4.3, >= 4.3.4)
156
+ jaro_winkler (~> 1.5)
100
157
  kramdown (~> 1.16)
101
- parser (~> 2.4)
158
+ parser (~> 2.3)
102
159
  reverse_markdown (~> 1.0, >= 1.0.5)
103
160
  rubocop (~> 0.52)
104
161
  thor (~> 0.19, >= 0.19.4)
105
162
  tilt (~> 2.0)
106
163
  yard (~> 0.9)
107
- term-ansicolor (1.6.0)
164
+ term-ansicolor (1.7.1)
108
165
  tins (~> 1.0)
109
166
  thor (0.19.4)
110
- tilt (2.0.8)
111
- tins (1.16.3)
167
+ tilt (2.0.9)
168
+ tins (1.20.2)
112
169
  treetop (1.6.10)
113
170
  polyglot (~> 0.3)
171
+ uber (0.1.0)
114
172
  unf (0.1.4)
115
173
  unf_ext
116
- unf_ext (0.0.7.5)
117
- unicode-display_width (1.4.0)
174
+ unf_ext (0.0.7.6)
175
+ unicode-display_width (1.4.1)
118
176
  unicode_utils (1.4.0)
119
- yard (0.9.15)
177
+ yard (0.9.18)
120
178
 
121
179
  PLATFORMS
122
180
  ruby
123
181
 
124
182
  DEPENDENCIES
125
- bundler (~> 1.16)
183
+ bundler (~> 2.0)
126
184
  byebug (~> 10.0)
127
185
  coveralls (~> 0.8)
128
186
  dwca_hunter!
@@ -132,4 +190,4 @@ DEPENDENCIES
132
190
  solargraph (~> 0.23)
133
191
 
134
192
  BUNDLED WITH
135
- 1.16.3
193
+ 2.0.1
@@ -32,9 +32,10 @@ Gem::Specification.new do |gem|
32
32
  gem.add_dependency "htmlentities", "~> 4.3"
33
33
  gem.add_dependency "nokogiri", "~> 1.8"
34
34
  gem.add_dependency "rest-client", "~> 2.0"
35
+ gem.add_dependency "ruby-xz", "~> 1.0"
35
36
  gem.add_dependency "thor", "~> 0.19"
36
37
 
37
- gem.add_development_dependency "bundler", "~> 1.16"
38
+ gem.add_development_dependency "bundler", "~> 2.0"
38
39
  gem.add_development_dependency "byebug", "~> 10.0"
39
40
  gem.add_development_dependency "coveralls", "~> 0.8"
40
41
  gem.add_development_dependency "rake", "~> 12.3"
Binary file
Binary file
@@ -0,0 +1,110 @@
1
+ require "xz"
2
+
3
+ module DwcaHunter
4
+ # Resource for FishBase
5
+ class ResourceIPNI < DwcaHunter::Resource
6
+ attr_reader :title, :abbr
7
+ def initialize(opts = {}) #download: false, unpack: false})
8
+ @command = "ipni"
9
+ @title = "The International Plant Names Index"
10
+ @abbr = "IPNI"
11
+ @url = "https://www.dropbox.com/s/1n0sn80vkdir5nu/ipniWebName.csv.xz"
12
+ @uuid = "6b3905ce-5025-49f3-9697-ddd5bdfb4ff0"
13
+ @download_path = File.join(Dir.tmpdir, "dwca_hunter", "ipni",
14
+ "ipni.csv.xz")
15
+ @extensions = []
16
+ super
17
+ end
18
+
19
+ def unpack
20
+ puts "Unpacking #{@download_path}"
21
+ XZ.decompress_file(@download_path, @download_path[0...-3] )
22
+ end
23
+
24
+ def download
25
+ puts "Downloading cached verion of the file. Get daily updated one from"
26
+ puts "https://storage.cloud.google.com/ipni-data/ipniWebName.csv.xz"
27
+ `curl -s -L #{@url} -o #{@download_path}`
28
+ end
29
+
30
+ def make_dwca
31
+ organize_data
32
+ generate_dwca
33
+ end
34
+
35
+ private
36
+
37
+ def organize_data
38
+ DwcaHunter::logger_write(self.object_id,
39
+ "Organizing data")
40
+ # snp = ScientificNameParser.new
41
+ @data = CSV.open(@download_path[0...-3],
42
+ col_sep: "|", quote_char: "щ", headers: true)
43
+ .each_with_object([]) do |row, data|
44
+ name = row['taxon_scientific_name_s_lower'].strip
45
+ au = row['authors_t'].to_s.strip
46
+ name = "#{name} #{au}" if au != ''
47
+ id = row["id"].split(":")[-1]
48
+ data << { taxon_id: id,
49
+ local_id: id,
50
+ family: row["family_s_lower"],
51
+ genus: row["genus_s_lower"],
52
+ scientific_name: name,
53
+ rank: row["rank_s_alphanum"]
54
+ }
55
+
56
+ end
57
+ end
58
+
59
+ def generate_dwca
60
+ DwcaHunter::logger_write(self.object_id,
61
+ 'Creating DarwinCore Archive file')
62
+ core_init
63
+ eml_init
64
+ DwcaHunter::logger_write(self.object_id, 'Assembling Core Data')
65
+ count = 0
66
+ @data.each do |d|
67
+ count += 1
68
+ if count % 10000 == 0
69
+ DwcaHunter::logger_write(self.object_id, "Core row #{count}")
70
+ end
71
+ @core << [d[:taxon_id], d[:local_id],
72
+ d[:scientific_name], d[:rank],
73
+ d[:family], d[:genus]]
74
+ end
75
+ super
76
+ end
77
+
78
+ def eml_init
79
+ @eml = {
80
+ id: @uuid,
81
+ title: @title,
82
+ authors: [],
83
+ metadata_providers: [
84
+ { first_name: "Dmitry",
85
+ last_name: "Mozzherin",
86
+ }
87
+ ],
88
+ abstract: "The International Plant Names Index (IPNI) is a database " \
89
+ "of the names and associated basic bibliographical " \
90
+ "details of seed plants, ferns and lycophytes. Its goal " \
91
+ "is to eliminate the need for repeated reference to " \
92
+ "primary sources for basic bibliographic information " \
93
+ "about plant names. The data are freely available and are " \
94
+ "gradually being standardized and checked. IPNI will be a " \
95
+ "dynamic resource, depending on direct contributions by " \
96
+ "all members of the botanical community.",
97
+ url: "http://www.ipni.org"
98
+ }
99
+ end
100
+
101
+ def core_init
102
+ @core = [["http://rs.tdwg.org/dwc/terms/taxonID",
103
+ "http://globalnames.org/terms/localID",
104
+ "http://rs.tdwg.org/dwc/terms/scientificName",
105
+ "http://rs.tdwg.org/dwc/terms/taxonRank",
106
+ "http://rs.tdwg.org/dwc/terms/family",
107
+ "http://rs.tdwg.org/dwc/terms/genus"]]
108
+ end
109
+ end
110
+ end
@@ -1,5 +1,5 @@
1
1
  module DwcaHunter
2
- VERSION = "0.5.1"
2
+ VERSION = "0.5.2"
3
3
 
4
4
  def self.version
5
5
  VERSION
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dwca_hunter
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.1
4
+ version: 0.5.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dmitry Mozzherin
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-08-04 00:00:00.000000000 Z
11
+ date: 2019-11-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: biodiversity
@@ -94,6 +94,20 @@ dependencies:
94
94
  - - "~>"
95
95
  - !ruby/object:Gem::Version
96
96
  version: '2.0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: ruby-xz
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: '1.0'
104
+ type: :runtime
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: '1.0'
97
111
  - !ruby/object:Gem::Dependency
98
112
  name: thor
99
113
  requirement: !ruby/object:Gem::Requirement
@@ -114,14 +128,14 @@ dependencies:
114
128
  requirements:
115
129
  - - "~>"
116
130
  - !ruby/object:Gem::Version
117
- version: '1.16'
131
+ version: '2.0'
118
132
  type: :development
119
133
  prerelease: false
120
134
  version_requirements: !ruby/object:Gem::Requirement
121
135
  requirements:
122
136
  - - "~>"
123
137
  - !ruby/object:Gem::Version
124
- version: '1.16'
138
+ version: '2.0'
125
139
  - !ruby/object:Gem::Dependency
126
140
  name: byebug
127
141
  requirement: !ruby/object:Gem::Requirement
@@ -233,6 +247,8 @@ files:
233
247
  - files/fishbase_taxon_cache.tsv
234
248
  - files/reptile_checklist_2014_12.csv
235
249
  - files/species-black.txt
250
+ - ipni.csv.gz
251
+ - ipniWebName.csv.xz?dl=1
236
252
  - lib/dwca_hunter.rb
237
253
  - lib/dwca_hunter/downloader.rb
238
254
  - lib/dwca_hunter/encoding.rb
@@ -242,6 +258,7 @@ files:
242
258
  - lib/dwca_hunter/resources/fishbase.rb
243
259
  - lib/dwca_hunter/resources/freebase.rb
244
260
  - lib/dwca_hunter/resources/gnub.rb
261
+ - lib/dwca_hunter/resources/ipni.rb
245
262
  - lib/dwca_hunter/resources/itis.rb
246
263
  - lib/dwca_hunter/resources/mammal_species.rb
247
264
  - lib/dwca_hunter/resources/ncbi.rb