dwca_hunter 0.5.1 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e30e9b34ea1c46b021bd3c2ec66ccad4996d4a921c7ce78791b84940bd239f05
4
- data.tar.gz: 1be0e7119fd38094f94a53d71460a8f97a59f2e46a5e9740b814f5dcc97b42cd
3
+ metadata.gz: fb05e834a8403ae6b6cce3fc9c913b38d7111808ec355f4ad659d74b3960697f
4
+ data.tar.gz: cb30906b445212fa52d4ab536610d77050bcfa417fdbb0546c93d5f004a266d9
5
5
  SHA512:
6
- metadata.gz: df1d9bebe191ebf8ae72d601f05374edeaeffbb627d08d7981da582559439dd9ad173656d78a7c88b3a9765562efa8a7095c4eaa5ed0f1f3ef856be94f990b63
7
- data.tar.gz: 703bbf2d197a55a8d4e8510e940f77562540983bb5eddc7a7496ed77af5e610ce0ce5d95a1faa25878a8f8b12a17613808898453a0bd6b7aa19a87dc7c5f000e
6
+ metadata.gz: 8ee016bf36ca9bab6ed6d65475b52e231c13cae276482ac34787b5680d3953ca5672b73428b33e1834823ad90411464567617b9a7bf25f359a09e5f6f7a8122c
7
+ data.tar.gz: 233dd03050a99fc016e1c78326d76476f0fbf3f81743a28c1ec842e4b5755a2162eb1951035192149b9da537811dedad20c046b31e962eeeabd89550b146cec5
@@ -1,3 +1,11 @@
1
+ exit
2
+ pp row
3
+ require "pp"
4
+ pp row
5
+ row["id"]
6
+ row.id
7
+ row
8
+ data
1
9
  q
2
10
  c
3
11
  q
@@ -1 +1 @@
1
- 2.5.1
1
+ 2.5.3
@@ -1,128 +1,186 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- dwca_hunter (0.5.1)
4
+ dwca_hunter (0.5.2)
5
5
  biodiversity (~> 3.5)
6
6
  dwc-archive (~> 1.0)
7
7
  gn_uuid (~> 0.5)
8
+ google-cloud-storage (~> 1.23)
8
9
  htmlentities (~> 4.3)
9
10
  nokogiri (~> 1.8)
10
11
  rest-client (~> 2.0)
12
+ ruby-xz (~> 1.0)
11
13
  thor (~> 0.19)
12
14
 
13
15
  GEM
14
16
  remote: http://rubygems.org/
15
17
  specs:
18
+ addressable (2.7.0)
19
+ public_suffix (>= 2.0.2, < 5.0)
16
20
  ast (2.4.0)
17
- biodiversity (3.5.0)
21
+ backport (0.3.0)
22
+ biodiversity (3.5.1)
18
23
  gn_uuid (~> 0.5)
19
24
  parallel (~> 1.12)
20
25
  treetop (~> 1.6)
21
26
  unicode_utils (~> 1.4)
22
27
  byebug (10.0.2)
23
- coderay (1.1.2)
24
28
  coveralls (0.8.22)
25
29
  json (>= 1.8, < 3)
26
30
  simplecov (~> 0.16.1)
27
31
  term-ansicolor (~> 1.3)
28
32
  thor (~> 0.19.4)
29
33
  tins (~> 1.6)
34
+ declarative (0.0.10)
35
+ declarative-option (0.1.0)
30
36
  diff-lcs (1.3)
37
+ digest-crc (0.4.1)
31
38
  docile (1.3.1)
32
- domain_name (0.5.20180417)
39
+ domain_name (0.5.20190701)
33
40
  unf (>= 0.0.5, < 1.0.0)
34
41
  dwc-archive (1.0.1)
35
42
  nokogiri (~> 1.8)
36
43
  parsley-store (~> 0.3)
37
- eventmachine (1.2.7)
38
- gn_uuid (0.5.0)
44
+ faraday (0.17.0)
45
+ multipart-post (>= 1.2, < 3)
46
+ gn_uuid (0.5.1)
47
+ google-api-client (0.34.1)
48
+ addressable (~> 2.5, >= 2.5.1)
49
+ googleauth (~> 0.9)
50
+ httpclient (>= 2.8.1, < 3.0)
51
+ mini_mime (~> 1.0)
52
+ representable (~> 3.0)
53
+ retriable (>= 2.0, < 4.0)
54
+ signet (~> 0.12)
55
+ google-cloud-core (1.4.1)
56
+ google-cloud-env (~> 1.0)
57
+ google-cloud-env (1.3.0)
58
+ faraday (~> 0.11)
59
+ google-cloud-storage (1.23.0)
60
+ addressable (~> 2.5)
61
+ digest-crc (~> 0.4)
62
+ google-api-client (~> 0.33)
63
+ google-cloud-core (~> 1.2)
64
+ googleauth (~> 0.9)
65
+ mini_mime (~> 1.0)
66
+ googleauth (0.10.0)
67
+ faraday (~> 0.12)
68
+ jwt (>= 1.4, < 3.0)
69
+ memoist (~> 0.16)
70
+ multi_json (~> 1.11)
71
+ os (>= 0.9, < 2.0)
72
+ signet (~> 0.12)
39
73
  htmlentities (4.3.4)
74
+ http-accept (1.7.0)
40
75
  http-cookie (1.0.3)
41
76
  domain_name (~> 0.5)
42
- jaro_winkler (1.5.1)
43
- json (2.1.0)
77
+ httpclient (2.8.3)
78
+ jaro_winkler (1.5.2)
79
+ json (2.2.0)
80
+ jwt (2.2.1)
44
81
  kramdown (1.17.0)
45
- mime-types (3.1)
82
+ memoist (0.16.1)
83
+ mime-types (3.3)
46
84
  mime-types-data (~> 3.2015)
47
- mime-types-data (3.2016.0521)
48
- mini_portile2 (2.3.0)
85
+ mime-types-data (3.2019.1009)
86
+ mini_mime (1.0.2)
87
+ mini_portile2 (2.4.0)
88
+ multi_json (1.14.1)
89
+ multipart-post (2.1.1)
49
90
  netrc (0.11.0)
50
- nokogiri (1.8.4)
51
- mini_portile2 (~> 2.3.0)
52
- parallel (1.12.1)
53
- parser (2.5.1.2)
91
+ nokogiri (1.10.1)
92
+ mini_portile2 (~> 2.4.0)
93
+ os (1.0.1)
94
+ parallel (1.14.0)
95
+ parser (2.6.0.0)
54
96
  ast (~> 2.4.0)
55
97
  parsley-store (0.3.6)
56
98
  biodiversity (~> 3.1)
57
99
  redis (~> 3.0)
58
100
  polyglot (0.3.5)
59
101
  powerpack (0.1.2)
102
+ psych (3.1.0)
103
+ public_suffix (4.0.1)
60
104
  rainbow (3.0.0)
61
- rake (12.3.1)
105
+ rake (12.3.2)
62
106
  redis (3.3.5)
63
- rest-client (2.0.2)
107
+ representable (3.0.4)
108
+ declarative (< 0.1.0)
109
+ declarative-option (< 0.2.0)
110
+ uber (< 0.2.0)
111
+ rest-client (2.1.0)
112
+ http-accept (>= 1.7.0, < 2.0)
64
113
  http-cookie (>= 1.0.2, < 2.0)
65
114
  mime-types (>= 1.16, < 4.0)
66
115
  netrc (~> 0.8)
116
+ retriable (3.1.2)
67
117
  reverse_markdown (1.1.0)
68
118
  nokogiri
69
- rspec (3.7.0)
70
- rspec-core (~> 3.7.0)
71
- rspec-expectations (~> 3.7.0)
72
- rspec-mocks (~> 3.7.0)
73
- rspec-core (3.7.1)
74
- rspec-support (~> 3.7.0)
75
- rspec-expectations (3.7.0)
119
+ rspec (3.8.0)
120
+ rspec-core (~> 3.8.0)
121
+ rspec-expectations (~> 3.8.0)
122
+ rspec-mocks (~> 3.8.0)
123
+ rspec-core (3.8.0)
124
+ rspec-support (~> 3.8.0)
125
+ rspec-expectations (3.8.2)
76
126
  diff-lcs (>= 1.2.0, < 2.0)
77
- rspec-support (~> 3.7.0)
78
- rspec-mocks (3.7.0)
127
+ rspec-support (~> 3.8.0)
128
+ rspec-mocks (3.8.0)
79
129
  diff-lcs (>= 1.2.0, < 2.0)
80
- rspec-support (~> 3.7.0)
81
- rspec-support (3.7.1)
82
- rubocop (0.58.1)
130
+ rspec-support (~> 3.8.0)
131
+ rspec-support (3.8.0)
132
+ rubocop (0.65.0)
83
133
  jaro_winkler (~> 1.5.1)
84
134
  parallel (~> 1.10)
85
135
  parser (>= 2.5, != 2.5.1.1)
86
136
  powerpack (~> 0.1)
137
+ psych (>= 3.1.0)
87
138
  rainbow (>= 2.2.2, < 4.0)
88
139
  ruby-progressbar (~> 1.7)
89
- unicode-display_width (~> 1.0, >= 1.0.1)
90
- ruby-progressbar (1.9.0)
140
+ unicode-display_width (~> 1.4.0)
141
+ ruby-progressbar (1.10.0)
142
+ ruby-xz (1.0.0)
143
+ signet (0.12.0)
144
+ addressable (~> 2.3)
145
+ faraday (~> 0.9)
146
+ jwt (>= 1.5, < 3.0)
147
+ multi_json (~> 1.10)
91
148
  simplecov (0.16.1)
92
149
  docile (~> 1.1)
93
150
  json (>= 1.8, < 3)
94
151
  simplecov-html (~> 0.10.0)
95
152
  simplecov-html (0.10.2)
96
- solargraph (0.23.5)
97
- coderay (~> 1.1)
98
- eventmachine (~> 1.2, >= 1.2.5)
153
+ solargraph (0.31.3)
154
+ backport (~> 0.3)
99
155
  htmlentities (~> 4.3, >= 4.3.4)
156
+ jaro_winkler (~> 1.5)
100
157
  kramdown (~> 1.16)
101
- parser (~> 2.4)
158
+ parser (~> 2.3)
102
159
  reverse_markdown (~> 1.0, >= 1.0.5)
103
160
  rubocop (~> 0.52)
104
161
  thor (~> 0.19, >= 0.19.4)
105
162
  tilt (~> 2.0)
106
163
  yard (~> 0.9)
107
- term-ansicolor (1.6.0)
164
+ term-ansicolor (1.7.1)
108
165
  tins (~> 1.0)
109
166
  thor (0.19.4)
110
- tilt (2.0.8)
111
- tins (1.16.3)
167
+ tilt (2.0.9)
168
+ tins (1.20.2)
112
169
  treetop (1.6.10)
113
170
  polyglot (~> 0.3)
171
+ uber (0.1.0)
114
172
  unf (0.1.4)
115
173
  unf_ext
116
- unf_ext (0.0.7.5)
117
- unicode-display_width (1.4.0)
174
+ unf_ext (0.0.7.6)
175
+ unicode-display_width (1.4.1)
118
176
  unicode_utils (1.4.0)
119
- yard (0.9.15)
177
+ yard (0.9.18)
120
178
 
121
179
  PLATFORMS
122
180
  ruby
123
181
 
124
182
  DEPENDENCIES
125
- bundler (~> 1.16)
183
+ bundler (~> 2.0)
126
184
  byebug (~> 10.0)
127
185
  coveralls (~> 0.8)
128
186
  dwca_hunter!
@@ -132,4 +190,4 @@ DEPENDENCIES
132
190
  solargraph (~> 0.23)
133
191
 
134
192
  BUNDLED WITH
135
- 1.16.3
193
+ 2.0.1
@@ -32,9 +32,10 @@ Gem::Specification.new do |gem|
32
32
  gem.add_dependency "htmlentities", "~> 4.3"
33
33
  gem.add_dependency "nokogiri", "~> 1.8"
34
34
  gem.add_dependency "rest-client", "~> 2.0"
35
+ gem.add_dependency "ruby-xz", "~> 1.0"
35
36
  gem.add_dependency "thor", "~> 0.19"
36
37
 
37
- gem.add_development_dependency "bundler", "~> 1.16"
38
+ gem.add_development_dependency "bundler", "~> 2.0"
38
39
  gem.add_development_dependency "byebug", "~> 10.0"
39
40
  gem.add_development_dependency "coveralls", "~> 0.8"
40
41
  gem.add_development_dependency "rake", "~> 12.3"
Binary file
Binary file
@@ -0,0 +1,110 @@
1
+ require "xz"
2
+
3
+ module DwcaHunter
4
+ # Resource for FishBase
5
+ class ResourceIPNI < DwcaHunter::Resource
6
+ attr_reader :title, :abbr
7
+ def initialize(opts = {}) #download: false, unpack: false})
8
+ @command = "ipni"
9
+ @title = "The International Plant Names Index"
10
+ @abbr = "IPNI"
11
+ @url = "https://www.dropbox.com/s/1n0sn80vkdir5nu/ipniWebName.csv.xz"
12
+ @uuid = "6b3905ce-5025-49f3-9697-ddd5bdfb4ff0"
13
+ @download_path = File.join(Dir.tmpdir, "dwca_hunter", "ipni",
14
+ "ipni.csv.xz")
15
+ @extensions = []
16
+ super
17
+ end
18
+
19
+ def unpack
20
+ puts "Unpacking #{@download_path}"
21
+ XZ.decompress_file(@download_path, @download_path[0...-3] )
22
+ end
23
+
24
+ def download
25
+ puts "Downloading cached verion of the file. Get daily updated one from"
26
+ puts "https://storage.cloud.google.com/ipni-data/ipniWebName.csv.xz"
27
+ `curl -s -L #{@url} -o #{@download_path}`
28
+ end
29
+
30
+ def make_dwca
31
+ organize_data
32
+ generate_dwca
33
+ end
34
+
35
+ private
36
+
37
+ def organize_data
38
+ DwcaHunter::logger_write(self.object_id,
39
+ "Organizing data")
40
+ # snp = ScientificNameParser.new
41
+ @data = CSV.open(@download_path[0...-3],
42
+ col_sep: "|", quote_char: "щ", headers: true)
43
+ .each_with_object([]) do |row, data|
44
+ name = row['taxon_scientific_name_s_lower'].strip
45
+ au = row['authors_t'].to_s.strip
46
+ name = "#{name} #{au}" if au != ''
47
+ id = row["id"].split(":")[-1]
48
+ data << { taxon_id: id,
49
+ local_id: id,
50
+ family: row["family_s_lower"],
51
+ genus: row["genus_s_lower"],
52
+ scientific_name: name,
53
+ rank: row["rank_s_alphanum"]
54
+ }
55
+
56
+ end
57
+ end
58
+
59
+ def generate_dwca
60
+ DwcaHunter::logger_write(self.object_id,
61
+ 'Creating DarwinCore Archive file')
62
+ core_init
63
+ eml_init
64
+ DwcaHunter::logger_write(self.object_id, 'Assembling Core Data')
65
+ count = 0
66
+ @data.each do |d|
67
+ count += 1
68
+ if count % 10000 == 0
69
+ DwcaHunter::logger_write(self.object_id, "Core row #{count}")
70
+ end
71
+ @core << [d[:taxon_id], d[:local_id],
72
+ d[:scientific_name], d[:rank],
73
+ d[:family], d[:genus]]
74
+ end
75
+ super
76
+ end
77
+
78
+ def eml_init
79
+ @eml = {
80
+ id: @uuid,
81
+ title: @title,
82
+ authors: [],
83
+ metadata_providers: [
84
+ { first_name: "Dmitry",
85
+ last_name: "Mozzherin",
86
+ }
87
+ ],
88
+ abstract: "The International Plant Names Index (IPNI) is a database " \
89
+ "of the names and associated basic bibliographical " \
90
+ "details of seed plants, ferns and lycophytes. Its goal " \
91
+ "is to eliminate the need for repeated reference to " \
92
+ "primary sources for basic bibliographic information " \
93
+ "about plant names. The data are freely available and are " \
94
+ "gradually being standardized and checked. IPNI will be a " \
95
+ "dynamic resource, depending on direct contributions by " \
96
+ "all members of the botanical community.",
97
+ url: "http://www.ipni.org"
98
+ }
99
+ end
100
+
101
+ def core_init
102
+ @core = [["http://rs.tdwg.org/dwc/terms/taxonID",
103
+ "http://globalnames.org/terms/localID",
104
+ "http://rs.tdwg.org/dwc/terms/scientificName",
105
+ "http://rs.tdwg.org/dwc/terms/taxonRank",
106
+ "http://rs.tdwg.org/dwc/terms/family",
107
+ "http://rs.tdwg.org/dwc/terms/genus"]]
108
+ end
109
+ end
110
+ end
@@ -1,5 +1,5 @@
1
1
  module DwcaHunter
2
- VERSION = "0.5.1"
2
+ VERSION = "0.5.2"
3
3
 
4
4
  def self.version
5
5
  VERSION
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dwca_hunter
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.1
4
+ version: 0.5.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dmitry Mozzherin
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-08-04 00:00:00.000000000 Z
11
+ date: 2019-11-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: biodiversity
@@ -94,6 +94,20 @@ dependencies:
94
94
  - - "~>"
95
95
  - !ruby/object:Gem::Version
96
96
  version: '2.0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: ruby-xz
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: '1.0'
104
+ type: :runtime
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: '1.0'
97
111
  - !ruby/object:Gem::Dependency
98
112
  name: thor
99
113
  requirement: !ruby/object:Gem::Requirement
@@ -114,14 +128,14 @@ dependencies:
114
128
  requirements:
115
129
  - - "~>"
116
130
  - !ruby/object:Gem::Version
117
- version: '1.16'
131
+ version: '2.0'
118
132
  type: :development
119
133
  prerelease: false
120
134
  version_requirements: !ruby/object:Gem::Requirement
121
135
  requirements:
122
136
  - - "~>"
123
137
  - !ruby/object:Gem::Version
124
- version: '1.16'
138
+ version: '2.0'
125
139
  - !ruby/object:Gem::Dependency
126
140
  name: byebug
127
141
  requirement: !ruby/object:Gem::Requirement
@@ -233,6 +247,8 @@ files:
233
247
  - files/fishbase_taxon_cache.tsv
234
248
  - files/reptile_checklist_2014_12.csv
235
249
  - files/species-black.txt
250
+ - ipni.csv.gz
251
+ - ipniWebName.csv.xz?dl=1
236
252
  - lib/dwca_hunter.rb
237
253
  - lib/dwca_hunter/downloader.rb
238
254
  - lib/dwca_hunter/encoding.rb
@@ -242,6 +258,7 @@ files:
242
258
  - lib/dwca_hunter/resources/fishbase.rb
243
259
  - lib/dwca_hunter/resources/freebase.rb
244
260
  - lib/dwca_hunter/resources/gnub.rb
261
+ - lib/dwca_hunter/resources/ipni.rb
245
262
  - lib/dwca_hunter/resources/itis.rb
246
263
  - lib/dwca_hunter/resources/mammal_species.rb
247
264
  - lib/dwca_hunter/resources/ncbi.rb