dwca_hunter 0.5.2 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/.byebug_history +37 -0
  3. data/.gitignore +5 -0
  4. data/.rubocop.yml +3 -2
  5. data/.ruby-version +1 -1
  6. data/Gemfile.lock +59 -135
  7. data/LICENSE.txt +1 -1
  8. data/README.md +1 -1
  9. data/dwca_hunter.gemspec +7 -8
  10. data/exe/dwcahunter +1 -3
  11. data/lib/dwca_hunter.rb +39 -8
  12. data/lib/dwca_hunter/resource.rb +5 -0
  13. data/lib/dwca_hunter/resources/aos-birds.rb +143 -0
  14. data/lib/dwca_hunter/resources/arctos.rb +121 -145
  15. data/lib/dwca_hunter/resources/clements.rb +151 -0
  16. data/lib/dwca_hunter/resources/eol.rb +85 -0
  17. data/lib/dwca_hunter/resources/freebase.rb +51 -49
  18. data/lib/dwca_hunter/resources/how-moore-birds.rb +168 -0
  19. data/lib/dwca_hunter/resources/index-fungorum.rb +131 -0
  20. data/lib/dwca_hunter/resources/ioc_word_bird.rb +200 -0
  21. data/lib/dwca_hunter/resources/ion.rb +98 -0
  22. data/lib/dwca_hunter/resources/ipni.rb +3 -2
  23. data/lib/dwca_hunter/resources/itis.rb +99 -99
  24. data/lib/dwca_hunter/resources/mammal_divdb.rb +155 -0
  25. data/lib/dwca_hunter/resources/mammal_species.rb +9 -6
  26. data/lib/dwca_hunter/resources/mcz.rb +123 -0
  27. data/lib/dwca_hunter/resources/ncbi.rb +22 -23
  28. data/lib/dwca_hunter/resources/opentree.rb +5 -5
  29. data/lib/dwca_hunter/resources/paleobiodb.rb +193 -0
  30. data/lib/dwca_hunter/resources/paleodb_harvester.rb +140 -0
  31. data/lib/dwca_hunter/resources/sherborn.rb +91 -0
  32. data/lib/dwca_hunter/resources/wikispecies.rb +142 -129
  33. data/lib/dwca_hunter/version.rb +1 -1
  34. metadata +31 -40
  35. data/files/birdlife_7.csv +0 -11862
  36. data/files/fishbase_taxon_cache.tsv +0 -81000
  37. data/files/reptile_checklist_2014_12.csv +0 -15158
  38. data/files/species-black.txt +0 -251
  39. data/ipni.csv.gz +0 -0
  40. data/ipniWebName.csv.xz?dl=1 +0 -0
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: fb05e834a8403ae6b6cce3fc9c913b38d7111808ec355f4ad659d74b3960697f
4
- data.tar.gz: cb30906b445212fa52d4ab536610d77050bcfa417fdbb0546c93d5f004a266d9
3
+ metadata.gz: 60327e105c53c226f322e3a7272bdc5747d73fac0124887b024f99e3c39c985b
4
+ data.tar.gz: '09660f8b5feccfaf4caeaec277db4dc4729a973196a77bb02860947ef55bd272'
5
5
  SHA512:
6
- metadata.gz: 8ee016bf36ca9bab6ed6d65475b52e231c13cae276482ac34787b5680d3953ca5672b73428b33e1834823ad90411464567617b9a7bf25f359a09e5f6f7a8122c
7
- data.tar.gz: 233dd03050a99fc016e1c78326d76476f0fbf3f81743a28c1ec842e4b5755a2162eb1951035192149b9da537811dedad20c046b31e962eeeabd89550b146cec5
6
+ metadata.gz: 9b0a621f85535f421eef5a8550ce653c4f3483f563c7b19934a76e8f30b0cdf17e7a8c59945ea31455c57a350a14d345993f5fe6b91d656f5eb40317da6b1af9
7
+ data.tar.gz: 00a54b23a8588e6d304d35bb8756f633fb409777f07908525d257f4b1a23c5956a9683863b491f50e8d772c8b41343f909e6cb252a326d4f2e662f96f37826ed
@@ -1,3 +1,40 @@
1
+ q
2
+ row
3
+ data
4
+ cc
5
+ c
6
+ data
7
+ c
8
+ data
9
+ c
10
+ data
11
+ c
12
+ data
13
+ c
14
+ data
15
+ q
16
+ p n
17
+ n
18
+ q
19
+ @name_id_json[n]
20
+ n = row[:acc_name].to_sym
21
+ row[:acc_name].to_sym
22
+ row[:acc_name]
23
+ acc_id
24
+ vi acc_id
25
+ q
26
+ @name_id
27
+ acc_id
28
+ id
29
+ q
30
+ DwcaHunter.resources
31
+ q
32
+ DwcaHunter.resources
33
+ q
34
+ res.size
35
+ res.len
36
+ res
37
+ trd
1
38
  exit
2
39
  pp row
3
40
  require "pp"
data/.gitignore CHANGED
@@ -56,3 +56,8 @@ tags
56
56
 
57
57
  # vscode
58
58
  .vs
59
+
60
+ # misc
61
+ *.json
62
+ *.csv
63
+ *.txt
@@ -1,5 +1,6 @@
1
1
  AllCops:
2
- TargetRubyVersion: 2.4
2
+ NewCops: disable
3
+ TargetRubyVersion: 2.6.6
3
4
  Exclude:
4
5
  - bin/**/*
5
6
  - db/**/*
@@ -30,4 +31,4 @@ Metrics/MethodLength:
30
31
  Naming/FileName:
31
32
  Exclude:
32
33
  - Gemfile
33
- - Rakefile
34
+ - Rakefile
@@ -1 +1 @@
1
- 2.5.3
1
+ 2.6.6
@@ -1,13 +1,12 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- dwca_hunter (0.5.2)
5
- biodiversity (~> 3.5)
6
- dwc-archive (~> 1.0)
4
+ dwca_hunter (0.7.0)
5
+ biodiversity (~> 4)
6
+ dwc-archive (~> 1.1.1)
7
7
  gn_uuid (~> 0.5)
8
- google-cloud-storage (~> 1.23)
9
8
  htmlentities (~> 4.3)
10
- nokogiri (~> 1.8)
9
+ nokogiri (~> 1.11)
11
10
  rest-client (~> 2.0)
12
11
  ruby-xz (~> 1.0)
13
12
  thor (~> 0.19)
@@ -15,166 +14,92 @@ PATH
15
14
  GEM
16
15
  remote: http://rubygems.org/
17
16
  specs:
18
- addressable (2.7.0)
19
- public_suffix (>= 2.0.2, < 5.0)
20
- ast (2.4.0)
21
- backport (0.3.0)
22
- biodiversity (3.5.1)
23
- gn_uuid (~> 0.5)
24
- parallel (~> 1.12)
25
- treetop (~> 1.6)
26
- unicode_utils (~> 1.4)
17
+ ast (2.4.1)
18
+ biodiversity (4.1.0)
19
+ ffi (~> 1.11)
27
20
  byebug (10.0.2)
28
- coveralls (0.8.22)
21
+ coveralls (0.8.23)
29
22
  json (>= 1.8, < 3)
30
23
  simplecov (~> 0.16.1)
31
24
  term-ansicolor (~> 1.3)
32
- thor (~> 0.19.4)
25
+ thor (>= 0.19.4, < 2.0)
33
26
  tins (~> 1.6)
34
- declarative (0.0.10)
35
- declarative-option (0.1.0)
36
- diff-lcs (1.3)
37
- digest-crc (0.4.1)
38
- docile (1.3.1)
27
+ diff-lcs (1.4.4)
28
+ docile (1.3.4)
39
29
  domain_name (0.5.20190701)
40
30
  unf (>= 0.0.5, < 1.0.0)
41
- dwc-archive (1.0.1)
42
- nokogiri (~> 1.8)
43
- parsley-store (~> 0.3)
44
- faraday (0.17.0)
45
- multipart-post (>= 1.2, < 3)
31
+ dwc-archive (1.1.1)
32
+ biodiversity (~> 4)
33
+ nokogiri (~> 1.10)
34
+ ffi (1.14.2)
46
35
  gn_uuid (0.5.1)
47
- google-api-client (0.34.1)
48
- addressable (~> 2.5, >= 2.5.1)
49
- googleauth (~> 0.9)
50
- httpclient (>= 2.8.1, < 3.0)
51
- mini_mime (~> 1.0)
52
- representable (~> 3.0)
53
- retriable (>= 2.0, < 4.0)
54
- signet (~> 0.12)
55
- google-cloud-core (1.4.1)
56
- google-cloud-env (~> 1.0)
57
- google-cloud-env (1.3.0)
58
- faraday (~> 0.11)
59
- google-cloud-storage (1.23.0)
60
- addressable (~> 2.5)
61
- digest-crc (~> 0.4)
62
- google-api-client (~> 0.33)
63
- google-cloud-core (~> 1.2)
64
- googleauth (~> 0.9)
65
- mini_mime (~> 1.0)
66
- googleauth (0.10.0)
67
- faraday (~> 0.12)
68
- jwt (>= 1.4, < 3.0)
69
- memoist (~> 0.16)
70
- multi_json (~> 1.11)
71
- os (>= 0.9, < 2.0)
72
- signet (~> 0.12)
73
36
  htmlentities (4.3.4)
74
37
  http-accept (1.7.0)
75
38
  http-cookie (1.0.3)
76
39
  domain_name (~> 0.5)
77
- httpclient (2.8.3)
78
- jaro_winkler (1.5.2)
79
- json (2.2.0)
80
- jwt (2.2.1)
81
- kramdown (1.17.0)
82
- memoist (0.16.1)
83
- mime-types (3.3)
40
+ json (2.5.1)
41
+ mime-types (3.3.1)
84
42
  mime-types-data (~> 3.2015)
85
- mime-types-data (3.2019.1009)
86
- mini_mime (1.0.2)
87
- mini_portile2 (2.4.0)
88
- multi_json (1.14.1)
89
- multipart-post (2.1.1)
43
+ mime-types-data (3.2020.1104)
44
+ mini_portile2 (2.5.0)
90
45
  netrc (0.11.0)
91
- nokogiri (1.10.1)
92
- mini_portile2 (~> 2.4.0)
93
- os (1.0.1)
94
- parallel (1.14.0)
95
- parser (2.6.0.0)
96
- ast (~> 2.4.0)
97
- parsley-store (0.3.6)
98
- biodiversity (~> 3.1)
99
- redis (~> 3.0)
100
- polyglot (0.3.5)
101
- powerpack (0.1.2)
102
- psych (3.1.0)
103
- public_suffix (4.0.1)
46
+ nokogiri (1.11.1)
47
+ mini_portile2 (~> 2.5.0)
48
+ racc (~> 1.4)
49
+ parallel (1.20.1)
50
+ parser (3.0.0.0)
51
+ ast (~> 2.4.1)
52
+ racc (1.5.2)
104
53
  rainbow (3.0.0)
105
- rake (12.3.2)
106
- redis (3.3.5)
107
- representable (3.0.4)
108
- declarative (< 0.1.0)
109
- declarative-option (< 0.2.0)
110
- uber (< 0.2.0)
54
+ rake (13.0.3)
55
+ regexp_parser (2.0.3)
111
56
  rest-client (2.1.0)
112
57
  http-accept (>= 1.7.0, < 2.0)
113
58
  http-cookie (>= 1.0.2, < 2.0)
114
59
  mime-types (>= 1.16, < 4.0)
115
60
  netrc (~> 0.8)
116
- retriable (3.1.2)
117
- reverse_markdown (1.1.0)
118
- nokogiri
119
- rspec (3.8.0)
120
- rspec-core (~> 3.8.0)
121
- rspec-expectations (~> 3.8.0)
122
- rspec-mocks (~> 3.8.0)
123
- rspec-core (3.8.0)
124
- rspec-support (~> 3.8.0)
125
- rspec-expectations (3.8.2)
61
+ rexml (3.2.4)
62
+ rspec (3.10.0)
63
+ rspec-core (~> 3.10.0)
64
+ rspec-expectations (~> 3.10.0)
65
+ rspec-mocks (~> 3.10.0)
66
+ rspec-core (3.10.1)
67
+ rspec-support (~> 3.10.0)
68
+ rspec-expectations (3.10.1)
126
69
  diff-lcs (>= 1.2.0, < 2.0)
127
- rspec-support (~> 3.8.0)
128
- rspec-mocks (3.8.0)
70
+ rspec-support (~> 3.10.0)
71
+ rspec-mocks (3.10.1)
129
72
  diff-lcs (>= 1.2.0, < 2.0)
130
- rspec-support (~> 3.8.0)
131
- rspec-support (3.8.0)
132
- rubocop (0.65.0)
133
- jaro_winkler (~> 1.5.1)
73
+ rspec-support (~> 3.10.0)
74
+ rspec-support (3.10.1)
75
+ rubocop (0.93.1)
134
76
  parallel (~> 1.10)
135
- parser (>= 2.5, != 2.5.1.1)
136
- powerpack (~> 0.1)
137
- psych (>= 3.1.0)
77
+ parser (>= 2.7.1.5)
138
78
  rainbow (>= 2.2.2, < 4.0)
79
+ regexp_parser (>= 1.8)
80
+ rexml
81
+ rubocop-ast (>= 0.6.0)
139
82
  ruby-progressbar (~> 1.7)
140
- unicode-display_width (~> 1.4.0)
141
- ruby-progressbar (1.10.0)
83
+ unicode-display_width (>= 1.4.0, < 2.0)
84
+ rubocop-ast (1.4.0)
85
+ parser (>= 2.7.1.5)
86
+ ruby-progressbar (1.11.0)
142
87
  ruby-xz (1.0.0)
143
- signet (0.12.0)
144
- addressable (~> 2.3)
145
- faraday (~> 0.9)
146
- jwt (>= 1.5, < 3.0)
147
- multi_json (~> 1.10)
148
88
  simplecov (0.16.1)
149
89
  docile (~> 1.1)
150
90
  json (>= 1.8, < 3)
151
91
  simplecov-html (~> 0.10.0)
152
92
  simplecov-html (0.10.2)
153
- solargraph (0.31.3)
154
- backport (~> 0.3)
155
- htmlentities (~> 4.3, >= 4.3.4)
156
- jaro_winkler (~> 1.5)
157
- kramdown (~> 1.16)
158
- parser (~> 2.3)
159
- reverse_markdown (~> 1.0, >= 1.0.5)
160
- rubocop (~> 0.52)
161
- thor (~> 0.19, >= 0.19.4)
162
- tilt (~> 2.0)
163
- yard (~> 0.9)
93
+ sync (0.5.0)
164
94
  term-ansicolor (1.7.1)
165
95
  tins (~> 1.0)
166
- thor (0.19.4)
167
- tilt (2.0.9)
168
- tins (1.20.2)
169
- treetop (1.6.10)
170
- polyglot (~> 0.3)
171
- uber (0.1.0)
96
+ thor (0.20.3)
97
+ tins (1.26.0)
98
+ sync
172
99
  unf (0.1.4)
173
100
  unf_ext
174
- unf_ext (0.0.7.6)
175
- unicode-display_width (1.4.1)
176
- unicode_utils (1.4.0)
177
- yard (0.9.18)
101
+ unf_ext (0.0.7.7)
102
+ unicode-display_width (1.7.0)
178
103
 
179
104
  PLATFORMS
180
105
  ruby
@@ -184,10 +109,9 @@ DEPENDENCIES
184
109
  byebug (~> 10.0)
185
110
  coveralls (~> 0.8)
186
111
  dwca_hunter!
187
- rake (~> 12.3)
188
- rspec (~> 3.7)
189
- rubocop (~> 0.58)
190
- solargraph (~> 0.23)
112
+ rake (~> 13.0)
113
+ rspec (~> 3.9)
114
+ rubocop (~> 0.84)
191
115
 
192
116
  BUNDLED WITH
193
- 2.0.1
117
+ 2.1.4
@@ -1,4 +1,4 @@
1
- Copyright (c) 2011 Marine Biological Laboratory
1
+ Copyright (c) 2011-2020 Dmitry Mozzherin
2
2
 
3
3
  Permission is hereby granted, free of charge, to any person obtaining
4
4
  a copy of this software and associated documentation files (the
data/README.md CHANGED
@@ -32,7 +32,7 @@ See dwca_hunter/bin/dwca-hunt.rb as a example how to use the code.
32
32
 
33
33
  ## Copyright
34
34
 
35
- Copyright (c) 2011-2016 Dmitry Mozzherin. See LICENSE.txt for further details.
35
+ Copyright (c) 2011-2020 Dmitry Mozzherin. See LICENSE.txt for further details.
36
36
 
37
37
  [code-climate-img]: https://codeclimate.com/badge.png
38
38
  [code-climate]: https://codeclimate.com/github/GlobalNamesArchitecture/dwca_hunter
@@ -6,7 +6,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
6
6
  require "dwca_hunter/version"
7
7
 
8
8
  Gem::Specification.new do |gem|
9
- gem.required_ruby_version = ">= 2.4"
9
+ gem.required_ruby_version = ">= 2.6.6"
10
10
  gem.name = "dwca_hunter"
11
11
  gem.version = DwcaHunter.version
12
12
  gem.license = "MIT"
@@ -26,11 +26,11 @@ Gem::Specification.new do |gem|
26
26
  gem.executables = gem.files.grep(%r{^exe/}) { |f| File.basename(f) }
27
27
  gem.require_paths = ["lib"]
28
28
 
29
- gem.add_dependency "biodiversity", "~> 3.5"
30
- gem.add_dependency "dwc-archive", "~> 1.0"
29
+ gem.add_dependency "biodiversity", "~> 4"
30
+ gem.add_dependency "dwc-archive", "~> 1.1.1"
31
31
  gem.add_dependency "gn_uuid", "~> 0.5"
32
32
  gem.add_dependency "htmlentities", "~> 4.3"
33
- gem.add_dependency "nokogiri", "~> 1.8"
33
+ gem.add_dependency "nokogiri", "~> 1.11"
34
34
  gem.add_dependency "rest-client", "~> 2.0"
35
35
  gem.add_dependency "ruby-xz", "~> 1.0"
36
36
  gem.add_dependency "thor", "~> 0.19"
@@ -38,8 +38,7 @@ Gem::Specification.new do |gem|
38
38
  gem.add_development_dependency "bundler", "~> 2.0"
39
39
  gem.add_development_dependency "byebug", "~> 10.0"
40
40
  gem.add_development_dependency "coveralls", "~> 0.8"
41
- gem.add_development_dependency "rake", "~> 12.3"
42
- gem.add_development_dependency "rspec", "~> 3.7"
43
- gem.add_development_dependency "rubocop", "~> 0.58"
44
- gem.add_development_dependency "solargraph", "~> 0.23"
41
+ gem.add_development_dependency "rake", "~> 13.0"
42
+ gem.add_development_dependency "rspec", "~> 3.9"
43
+ gem.add_development_dependency "rubocop", "~> 0.84"
45
44
  end
@@ -37,9 +37,7 @@ class DwcaHunterCLI < Thor
37
37
  res = []
38
38
  DwcaHunter.resources.each do |resource|
39
39
  r = resource.new
40
- if add_resource?(search, r)
41
- res << { n: nil, command: r.command, resource: r.title }
42
- end
40
+ res << { n: nil, command: r.command, resource: r.title } if add_resource?(search, r)
43
41
  end
44
42
  res.sort_by { |r| r[:command] }.each_with_object([]) do |r, a|
45
43
  r[:n] = a.size + 1
@@ -1,17 +1,17 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "base64"
3
4
  require "biodiversity"
4
- require "logger"
5
- require "fileutils"
6
- require "uri"
7
- require "tmpdir"
8
- require "net/http"
9
- require "json"
10
- require "htmlentities"
11
5
  require "dwc_archive"
12
6
  require "dwca_hunter/resource"
7
+ require "fileutils"
8
+ require "htmlentities"
9
+ require "json"
10
+ require "logger"
11
+ require "net/http"
13
12
  require "rest_client"
14
- require "base64"
13
+ require "tmpdir"
14
+ require "uri"
15
15
 
16
16
  Dir[File.join(__dir__, "dwca_hunter", "*.rb")].
17
17
  each { |f| require f }
@@ -51,5 +51,36 @@ module DwcaHunter
51
51
  c < Resource
52
52
  end
53
53
  end
54
+
55
+ def normalize_authors(auth)
56
+ reg = Regexp.new(/^([\(]?)(.*?)(([\s,\)][^[:upper:]]*)?$)/)
57
+ auth = auth.gsub(/duPont/, 'du Pont')
58
+ match = reg.match(auth)
59
+ return auth if match.nil?
60
+ a1, a2, a3 = match[1..3]
61
+ a2mod = a2.gsub('&', ',')
62
+ ary2 = a2mod.split(',').map(&:strip)
63
+ a2 = move_initials(ary2) if ary2.size > 1
64
+ "#{a1}#{a2}#{a3}"
65
+ end
66
+
67
+ def move_initials(ary)
68
+ res = []
69
+ ary.each do |a|
70
+ if res.empty?
71
+ res << a
72
+ next
73
+ end
74
+ match = /^([[:upper:]]{1,4})(\sJr)?$/.match(a)
75
+ if !match.nil?
76
+ initialls = match[1].split('').join('. ')
77
+ res[-1] = "#{initialls}. #{res[-1]}#{match[2].to_s}"
78
+ else
79
+ res << a
80
+ end
81
+ end
82
+ res.size == 1 ? res[0] : "#{res[0..-2].join(', ')} & #{res[-1]}"
83
+ end
54
84
  end
55
85
  end
86
+