dwca_hunter 0.5.2 → 0.7.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/.byebug_history +37 -0
  3. data/.gitignore +5 -0
  4. data/.rubocop.yml +3 -2
  5. data/.ruby-version +1 -1
  6. data/Gemfile.lock +59 -135
  7. data/LICENSE.txt +1 -1
  8. data/README.md +1 -1
  9. data/dwca_hunter.gemspec +7 -8
  10. data/exe/dwcahunter +1 -3
  11. data/lib/dwca_hunter.rb +39 -8
  12. data/lib/dwca_hunter/resource.rb +5 -0
  13. data/lib/dwca_hunter/resources/aos-birds.rb +143 -0
  14. data/lib/dwca_hunter/resources/arctos.rb +121 -145
  15. data/lib/dwca_hunter/resources/clements.rb +151 -0
  16. data/lib/dwca_hunter/resources/eol.rb +85 -0
  17. data/lib/dwca_hunter/resources/freebase.rb +51 -49
  18. data/lib/dwca_hunter/resources/how-moore-birds.rb +168 -0
  19. data/lib/dwca_hunter/resources/index-fungorum.rb +131 -0
  20. data/lib/dwca_hunter/resources/ioc_word_bird.rb +200 -0
  21. data/lib/dwca_hunter/resources/ion.rb +98 -0
  22. data/lib/dwca_hunter/resources/ipni.rb +3 -2
  23. data/lib/dwca_hunter/resources/itis.rb +99 -99
  24. data/lib/dwca_hunter/resources/mammal_divdb.rb +155 -0
  25. data/lib/dwca_hunter/resources/mammal_species.rb +9 -6
  26. data/lib/dwca_hunter/resources/mcz.rb +123 -0
  27. data/lib/dwca_hunter/resources/ncbi.rb +22 -23
  28. data/lib/dwca_hunter/resources/opentree.rb +5 -5
  29. data/lib/dwca_hunter/resources/paleobiodb.rb +193 -0
  30. data/lib/dwca_hunter/resources/paleodb_harvester.rb +140 -0
  31. data/lib/dwca_hunter/resources/sherborn.rb +91 -0
  32. data/lib/dwca_hunter/resources/wikispecies.rb +142 -129
  33. data/lib/dwca_hunter/version.rb +1 -1
  34. metadata +31 -40
  35. data/files/birdlife_7.csv +0 -11862
  36. data/files/fishbase_taxon_cache.tsv +0 -81000
  37. data/files/reptile_checklist_2014_12.csv +0 -15158
  38. data/files/species-black.txt +0 -251
  39. data/ipni.csv.gz +0 -0
  40. data/ipniWebName.csv.xz?dl=1 +0 -0
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: fb05e834a8403ae6b6cce3fc9c913b38d7111808ec355f4ad659d74b3960697f
4
- data.tar.gz: cb30906b445212fa52d4ab536610d77050bcfa417fdbb0546c93d5f004a266d9
3
+ metadata.gz: 60327e105c53c226f322e3a7272bdc5747d73fac0124887b024f99e3c39c985b
4
+ data.tar.gz: '09660f8b5feccfaf4caeaec277db4dc4729a973196a77bb02860947ef55bd272'
5
5
  SHA512:
6
- metadata.gz: 8ee016bf36ca9bab6ed6d65475b52e231c13cae276482ac34787b5680d3953ca5672b73428b33e1834823ad90411464567617b9a7bf25f359a09e5f6f7a8122c
7
- data.tar.gz: 233dd03050a99fc016e1c78326d76476f0fbf3f81743a28c1ec842e4b5755a2162eb1951035192149b9da537811dedad20c046b31e962eeeabd89550b146cec5
6
+ metadata.gz: 9b0a621f85535f421eef5a8550ce653c4f3483f563c7b19934a76e8f30b0cdf17e7a8c59945ea31455c57a350a14d345993f5fe6b91d656f5eb40317da6b1af9
7
+ data.tar.gz: 00a54b23a8588e6d304d35bb8756f633fb409777f07908525d257f4b1a23c5956a9683863b491f50e8d772c8b41343f909e6cb252a326d4f2e662f96f37826ed
@@ -1,3 +1,40 @@
1
+ q
2
+ row
3
+ data
4
+ cc
5
+ c
6
+ data
7
+ c
8
+ data
9
+ c
10
+ data
11
+ c
12
+ data
13
+ c
14
+ data
15
+ q
16
+ p n
17
+ n
18
+ q
19
+ @name_id_json[n]
20
+ n = row[:acc_name].to_sym
21
+ row[:acc_name].to_sym
22
+ row[:acc_name]
23
+ acc_id
24
+ vi acc_id
25
+ q
26
+ @name_id
27
+ acc_id
28
+ id
29
+ q
30
+ DwcaHunter.resources
31
+ q
32
+ DwcaHunter.resources
33
+ q
34
+ res.size
35
+ res.len
36
+ res
37
+ trd
1
38
  exit
2
39
  pp row
3
40
  require "pp"
data/.gitignore CHANGED
@@ -56,3 +56,8 @@ tags
56
56
 
57
57
  # vscode
58
58
  .vs
59
+
60
+ # misc
61
+ *.json
62
+ *.csv
63
+ *.txt
@@ -1,5 +1,6 @@
1
1
  AllCops:
2
- TargetRubyVersion: 2.4
2
+ NewCops: disable
3
+ TargetRubyVersion: 2.6.6
3
4
  Exclude:
4
5
  - bin/**/*
5
6
  - db/**/*
@@ -30,4 +31,4 @@ Metrics/MethodLength:
30
31
  Naming/FileName:
31
32
  Exclude:
32
33
  - Gemfile
33
- - Rakefile
34
+ - Rakefile
@@ -1 +1 @@
1
- 2.5.3
1
+ 2.6.6
@@ -1,13 +1,12 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- dwca_hunter (0.5.2)
5
- biodiversity (~> 3.5)
6
- dwc-archive (~> 1.0)
4
+ dwca_hunter (0.7.0)
5
+ biodiversity (~> 4)
6
+ dwc-archive (~> 1.1.1)
7
7
  gn_uuid (~> 0.5)
8
- google-cloud-storage (~> 1.23)
9
8
  htmlentities (~> 4.3)
10
- nokogiri (~> 1.8)
9
+ nokogiri (~> 1.11)
11
10
  rest-client (~> 2.0)
12
11
  ruby-xz (~> 1.0)
13
12
  thor (~> 0.19)
@@ -15,166 +14,92 @@ PATH
15
14
  GEM
16
15
  remote: http://rubygems.org/
17
16
  specs:
18
- addressable (2.7.0)
19
- public_suffix (>= 2.0.2, < 5.0)
20
- ast (2.4.0)
21
- backport (0.3.0)
22
- biodiversity (3.5.1)
23
- gn_uuid (~> 0.5)
24
- parallel (~> 1.12)
25
- treetop (~> 1.6)
26
- unicode_utils (~> 1.4)
17
+ ast (2.4.1)
18
+ biodiversity (4.1.0)
19
+ ffi (~> 1.11)
27
20
  byebug (10.0.2)
28
- coveralls (0.8.22)
21
+ coveralls (0.8.23)
29
22
  json (>= 1.8, < 3)
30
23
  simplecov (~> 0.16.1)
31
24
  term-ansicolor (~> 1.3)
32
- thor (~> 0.19.4)
25
+ thor (>= 0.19.4, < 2.0)
33
26
  tins (~> 1.6)
34
- declarative (0.0.10)
35
- declarative-option (0.1.0)
36
- diff-lcs (1.3)
37
- digest-crc (0.4.1)
38
- docile (1.3.1)
27
+ diff-lcs (1.4.4)
28
+ docile (1.3.4)
39
29
  domain_name (0.5.20190701)
40
30
  unf (>= 0.0.5, < 1.0.0)
41
- dwc-archive (1.0.1)
42
- nokogiri (~> 1.8)
43
- parsley-store (~> 0.3)
44
- faraday (0.17.0)
45
- multipart-post (>= 1.2, < 3)
31
+ dwc-archive (1.1.1)
32
+ biodiversity (~> 4)
33
+ nokogiri (~> 1.10)
34
+ ffi (1.14.2)
46
35
  gn_uuid (0.5.1)
47
- google-api-client (0.34.1)
48
- addressable (~> 2.5, >= 2.5.1)
49
- googleauth (~> 0.9)
50
- httpclient (>= 2.8.1, < 3.0)
51
- mini_mime (~> 1.0)
52
- representable (~> 3.0)
53
- retriable (>= 2.0, < 4.0)
54
- signet (~> 0.12)
55
- google-cloud-core (1.4.1)
56
- google-cloud-env (~> 1.0)
57
- google-cloud-env (1.3.0)
58
- faraday (~> 0.11)
59
- google-cloud-storage (1.23.0)
60
- addressable (~> 2.5)
61
- digest-crc (~> 0.4)
62
- google-api-client (~> 0.33)
63
- google-cloud-core (~> 1.2)
64
- googleauth (~> 0.9)
65
- mini_mime (~> 1.0)
66
- googleauth (0.10.0)
67
- faraday (~> 0.12)
68
- jwt (>= 1.4, < 3.0)
69
- memoist (~> 0.16)
70
- multi_json (~> 1.11)
71
- os (>= 0.9, < 2.0)
72
- signet (~> 0.12)
73
36
  htmlentities (4.3.4)
74
37
  http-accept (1.7.0)
75
38
  http-cookie (1.0.3)
76
39
  domain_name (~> 0.5)
77
- httpclient (2.8.3)
78
- jaro_winkler (1.5.2)
79
- json (2.2.0)
80
- jwt (2.2.1)
81
- kramdown (1.17.0)
82
- memoist (0.16.1)
83
- mime-types (3.3)
40
+ json (2.5.1)
41
+ mime-types (3.3.1)
84
42
  mime-types-data (~> 3.2015)
85
- mime-types-data (3.2019.1009)
86
- mini_mime (1.0.2)
87
- mini_portile2 (2.4.0)
88
- multi_json (1.14.1)
89
- multipart-post (2.1.1)
43
+ mime-types-data (3.2020.1104)
44
+ mini_portile2 (2.5.0)
90
45
  netrc (0.11.0)
91
- nokogiri (1.10.1)
92
- mini_portile2 (~> 2.4.0)
93
- os (1.0.1)
94
- parallel (1.14.0)
95
- parser (2.6.0.0)
96
- ast (~> 2.4.0)
97
- parsley-store (0.3.6)
98
- biodiversity (~> 3.1)
99
- redis (~> 3.0)
100
- polyglot (0.3.5)
101
- powerpack (0.1.2)
102
- psych (3.1.0)
103
- public_suffix (4.0.1)
46
+ nokogiri (1.11.1)
47
+ mini_portile2 (~> 2.5.0)
48
+ racc (~> 1.4)
49
+ parallel (1.20.1)
50
+ parser (3.0.0.0)
51
+ ast (~> 2.4.1)
52
+ racc (1.5.2)
104
53
  rainbow (3.0.0)
105
- rake (12.3.2)
106
- redis (3.3.5)
107
- representable (3.0.4)
108
- declarative (< 0.1.0)
109
- declarative-option (< 0.2.0)
110
- uber (< 0.2.0)
54
+ rake (13.0.3)
55
+ regexp_parser (2.0.3)
111
56
  rest-client (2.1.0)
112
57
  http-accept (>= 1.7.0, < 2.0)
113
58
  http-cookie (>= 1.0.2, < 2.0)
114
59
  mime-types (>= 1.16, < 4.0)
115
60
  netrc (~> 0.8)
116
- retriable (3.1.2)
117
- reverse_markdown (1.1.0)
118
- nokogiri
119
- rspec (3.8.0)
120
- rspec-core (~> 3.8.0)
121
- rspec-expectations (~> 3.8.0)
122
- rspec-mocks (~> 3.8.0)
123
- rspec-core (3.8.0)
124
- rspec-support (~> 3.8.0)
125
- rspec-expectations (3.8.2)
61
+ rexml (3.2.4)
62
+ rspec (3.10.0)
63
+ rspec-core (~> 3.10.0)
64
+ rspec-expectations (~> 3.10.0)
65
+ rspec-mocks (~> 3.10.0)
66
+ rspec-core (3.10.1)
67
+ rspec-support (~> 3.10.0)
68
+ rspec-expectations (3.10.1)
126
69
  diff-lcs (>= 1.2.0, < 2.0)
127
- rspec-support (~> 3.8.0)
128
- rspec-mocks (3.8.0)
70
+ rspec-support (~> 3.10.0)
71
+ rspec-mocks (3.10.1)
129
72
  diff-lcs (>= 1.2.0, < 2.0)
130
- rspec-support (~> 3.8.0)
131
- rspec-support (3.8.0)
132
- rubocop (0.65.0)
133
- jaro_winkler (~> 1.5.1)
73
+ rspec-support (~> 3.10.0)
74
+ rspec-support (3.10.1)
75
+ rubocop (0.93.1)
134
76
  parallel (~> 1.10)
135
- parser (>= 2.5, != 2.5.1.1)
136
- powerpack (~> 0.1)
137
- psych (>= 3.1.0)
77
+ parser (>= 2.7.1.5)
138
78
  rainbow (>= 2.2.2, < 4.0)
79
+ regexp_parser (>= 1.8)
80
+ rexml
81
+ rubocop-ast (>= 0.6.0)
139
82
  ruby-progressbar (~> 1.7)
140
- unicode-display_width (~> 1.4.0)
141
- ruby-progressbar (1.10.0)
83
+ unicode-display_width (>= 1.4.0, < 2.0)
84
+ rubocop-ast (1.4.0)
85
+ parser (>= 2.7.1.5)
86
+ ruby-progressbar (1.11.0)
142
87
  ruby-xz (1.0.0)
143
- signet (0.12.0)
144
- addressable (~> 2.3)
145
- faraday (~> 0.9)
146
- jwt (>= 1.5, < 3.0)
147
- multi_json (~> 1.10)
148
88
  simplecov (0.16.1)
149
89
  docile (~> 1.1)
150
90
  json (>= 1.8, < 3)
151
91
  simplecov-html (~> 0.10.0)
152
92
  simplecov-html (0.10.2)
153
- solargraph (0.31.3)
154
- backport (~> 0.3)
155
- htmlentities (~> 4.3, >= 4.3.4)
156
- jaro_winkler (~> 1.5)
157
- kramdown (~> 1.16)
158
- parser (~> 2.3)
159
- reverse_markdown (~> 1.0, >= 1.0.5)
160
- rubocop (~> 0.52)
161
- thor (~> 0.19, >= 0.19.4)
162
- tilt (~> 2.0)
163
- yard (~> 0.9)
93
+ sync (0.5.0)
164
94
  term-ansicolor (1.7.1)
165
95
  tins (~> 1.0)
166
- thor (0.19.4)
167
- tilt (2.0.9)
168
- tins (1.20.2)
169
- treetop (1.6.10)
170
- polyglot (~> 0.3)
171
- uber (0.1.0)
96
+ thor (0.20.3)
97
+ tins (1.26.0)
98
+ sync
172
99
  unf (0.1.4)
173
100
  unf_ext
174
- unf_ext (0.0.7.6)
175
- unicode-display_width (1.4.1)
176
- unicode_utils (1.4.0)
177
- yard (0.9.18)
101
+ unf_ext (0.0.7.7)
102
+ unicode-display_width (1.7.0)
178
103
 
179
104
  PLATFORMS
180
105
  ruby
@@ -184,10 +109,9 @@ DEPENDENCIES
184
109
  byebug (~> 10.0)
185
110
  coveralls (~> 0.8)
186
111
  dwca_hunter!
187
- rake (~> 12.3)
188
- rspec (~> 3.7)
189
- rubocop (~> 0.58)
190
- solargraph (~> 0.23)
112
+ rake (~> 13.0)
113
+ rspec (~> 3.9)
114
+ rubocop (~> 0.84)
191
115
 
192
116
  BUNDLED WITH
193
- 2.0.1
117
+ 2.1.4
@@ -1,4 +1,4 @@
1
- Copyright (c) 2011 Marine Biological Laboratory
1
+ Copyright (c) 2011-2020 Dmitry Mozzherin
2
2
 
3
3
  Permission is hereby granted, free of charge, to any person obtaining
4
4
  a copy of this software and associated documentation files (the
data/README.md CHANGED
@@ -32,7 +32,7 @@ See dwca_hunter/bin/dwca-hunt.rb as a example how to use the code.
32
32
 
33
33
  ## Copyright
34
34
 
35
- Copyright (c) 2011-2016 Dmitry Mozzherin. See LICENSE.txt for further details.
35
+ Copyright (c) 2011-2020 Dmitry Mozzherin. See LICENSE.txt for further details.
36
36
 
37
37
  [code-climate-img]: https://codeclimate.com/badge.png
38
38
  [code-climate]: https://codeclimate.com/github/GlobalNamesArchitecture/dwca_hunter
@@ -6,7 +6,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
6
6
  require "dwca_hunter/version"
7
7
 
8
8
  Gem::Specification.new do |gem|
9
- gem.required_ruby_version = ">= 2.4"
9
+ gem.required_ruby_version = ">= 2.6.6"
10
10
  gem.name = "dwca_hunter"
11
11
  gem.version = DwcaHunter.version
12
12
  gem.license = "MIT"
@@ -26,11 +26,11 @@ Gem::Specification.new do |gem|
26
26
  gem.executables = gem.files.grep(%r{^exe/}) { |f| File.basename(f) }
27
27
  gem.require_paths = ["lib"]
28
28
 
29
- gem.add_dependency "biodiversity", "~> 3.5"
30
- gem.add_dependency "dwc-archive", "~> 1.0"
29
+ gem.add_dependency "biodiversity", "~> 4"
30
+ gem.add_dependency "dwc-archive", "~> 1.1.1"
31
31
  gem.add_dependency "gn_uuid", "~> 0.5"
32
32
  gem.add_dependency "htmlentities", "~> 4.3"
33
- gem.add_dependency "nokogiri", "~> 1.8"
33
+ gem.add_dependency "nokogiri", "~> 1.11"
34
34
  gem.add_dependency "rest-client", "~> 2.0"
35
35
  gem.add_dependency "ruby-xz", "~> 1.0"
36
36
  gem.add_dependency "thor", "~> 0.19"
@@ -38,8 +38,7 @@ Gem::Specification.new do |gem|
38
38
  gem.add_development_dependency "bundler", "~> 2.0"
39
39
  gem.add_development_dependency "byebug", "~> 10.0"
40
40
  gem.add_development_dependency "coveralls", "~> 0.8"
41
- gem.add_development_dependency "rake", "~> 12.3"
42
- gem.add_development_dependency "rspec", "~> 3.7"
43
- gem.add_development_dependency "rubocop", "~> 0.58"
44
- gem.add_development_dependency "solargraph", "~> 0.23"
41
+ gem.add_development_dependency "rake", "~> 13.0"
42
+ gem.add_development_dependency "rspec", "~> 3.9"
43
+ gem.add_development_dependency "rubocop", "~> 0.84"
45
44
  end
@@ -37,9 +37,7 @@ class DwcaHunterCLI < Thor
37
37
  res = []
38
38
  DwcaHunter.resources.each do |resource|
39
39
  r = resource.new
40
- if add_resource?(search, r)
41
- res << { n: nil, command: r.command, resource: r.title }
42
- end
40
+ res << { n: nil, command: r.command, resource: r.title } if add_resource?(search, r)
43
41
  end
44
42
  res.sort_by { |r| r[:command] }.each_with_object([]) do |r, a|
45
43
  r[:n] = a.size + 1
@@ -1,17 +1,17 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "base64"
3
4
  require "biodiversity"
4
- require "logger"
5
- require "fileutils"
6
- require "uri"
7
- require "tmpdir"
8
- require "net/http"
9
- require "json"
10
- require "htmlentities"
11
5
  require "dwc_archive"
12
6
  require "dwca_hunter/resource"
7
+ require "fileutils"
8
+ require "htmlentities"
9
+ require "json"
10
+ require "logger"
11
+ require "net/http"
13
12
  require "rest_client"
14
- require "base64"
13
+ require "tmpdir"
14
+ require "uri"
15
15
 
16
16
  Dir[File.join(__dir__, "dwca_hunter", "*.rb")].
17
17
  each { |f| require f }
@@ -51,5 +51,36 @@ module DwcaHunter
51
51
  c < Resource
52
52
  end
53
53
  end
54
+
55
+ def normalize_authors(auth)
56
+ reg = Regexp.new(/^([\(]?)(.*?)(([\s,\)][^[:upper:]]*)?$)/)
57
+ auth = auth.gsub(/duPont/, 'du Pont')
58
+ match = reg.match(auth)
59
+ return auth if match.nil?
60
+ a1, a2, a3 = match[1..3]
61
+ a2mod = a2.gsub('&', ',')
62
+ ary2 = a2mod.split(',').map(&:strip)
63
+ a2 = move_initials(ary2) if ary2.size > 1
64
+ "#{a1}#{a2}#{a3}"
65
+ end
66
+
67
+ def move_initials(ary)
68
+ res = []
69
+ ary.each do |a|
70
+ if res.empty?
71
+ res << a
72
+ next
73
+ end
74
+ match = /^([[:upper:]]{1,4})(\sJr)?$/.match(a)
75
+ if !match.nil?
76
+ initialls = match[1].split('').join('. ')
77
+ res[-1] = "#{initialls}. #{res[-1]}#{match[2].to_s}"
78
+ else
79
+ res << a
80
+ end
81
+ end
82
+ res.size == 1 ? res[0] : "#{res[0..-2].join(', ')} & #{res[-1]}"
83
+ end
54
84
  end
55
85
  end
86
+