dwca_hunter 0.5.5 → 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 200857d1971d198dfe5a7f048a1eab71cfb295f96cff9e3164d5d62486e26431
4
- data.tar.gz: 87a07aa3c9006857c1876fa017436921621889ff833e2eb8d499d853dbe243d1
3
+ metadata.gz: 76fb831d8f9d33d8cfa32221ce4a5a0e82ee8090ed427c6d2eebc96e267b8151
4
+ data.tar.gz: 33c8040ec605718963ee46eef4a2a4c27770ce3cfb53baf7486047a3d0528410
5
5
  SHA512:
6
- metadata.gz: 5aeef02ffb2e3f366aef826c7492c5acb23a7aa4fd57ec3dd8fec513176d919c306e289aaa3e678a8b629a81630da2dcd3c3b42b6259fba3aa04b971b0b6e489
7
- data.tar.gz: c20a2dd504367154582297754fcb594a372484aa67944c8cff25bf56f1df571069fd634169d9f9ef7a3c4a708bca276dbcd413786b1de663ea0fbfc36078c7b0
6
+ metadata.gz: 380a22edce284de5f1e836b8b4ae236b8d43fecb7b72032d4e4d3c071d77da05bb87498f6e2f868b071f935983e64861cd8ce549d6b9ed8d672b8ab7d53da32e
7
+ data.tar.gz: 97a397e0fc24f5705e46287178f4dd1560621995a19b85007609eba4819127b0a98c49a9dd9ea2d946ead54bf5c4f17e2d85cea738f3c9fdf7ba3de3a5c92f93
@@ -1,3 +1,40 @@
1
+ q
2
+ row
3
+ data
4
+ cc
5
+ c
6
+ data
7
+ c
8
+ data
9
+ c
10
+ data
11
+ c
12
+ data
13
+ c
14
+ data
15
+ q
16
+ p n
17
+ n
18
+ q
19
+ @name_id_json[n]
20
+ n = row[:acc_name].to_sym
21
+ row[:acc_name].to_sym
22
+ row[:acc_name]
23
+ acc_id
24
+ vi acc_id
25
+ q
26
+ @name_id
27
+ acc_id
28
+ id
29
+ q
30
+ DwcaHunter.resources
31
+ q
32
+ DwcaHunter.resources
33
+ q
34
+ res.size
35
+ res.len
36
+ res
37
+ trd
1
38
  exit
2
39
  pp row
3
40
  require "pp"
data/.gitignore CHANGED
@@ -56,3 +56,8 @@ tags
56
56
 
57
57
  # vscode
58
58
  .vs
59
+
60
+ # misc
61
+ *.json
62
+ *.csv
63
+ *.txt
@@ -1,5 +1,6 @@
1
1
  AllCops:
2
- TargetRubyVersion: 2.4
2
+ NewCops: disable
3
+ TargetRubyVersion: 2.6.6
3
4
  Exclude:
4
5
  - bin/**/*
5
6
  - db/**/*
@@ -30,4 +31,4 @@ Metrics/MethodLength:
30
31
  Naming/FileName:
31
32
  Exclude:
32
33
  - Gemfile
33
- - Rakefile
34
+ - Rakefile
@@ -1 +1 @@
1
- 2.6.5
1
+ 2.6.6
@@ -1,12 +1,12 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- dwca_hunter (0.5.5)
5
- biodiversity (~> 3.5)
6
- dwc-archive (~> 1.0)
4
+ dwca_hunter (0.7.0)
5
+ biodiversity (~> 4)
6
+ dwc-archive (~> 1.1.1)
7
7
  gn_uuid (~> 0.5)
8
8
  htmlentities (~> 4.3)
9
- nokogiri (~> 1.8)
9
+ nokogiri (~> 1.10)
10
10
  rest-client (~> 2.0)
11
11
  ruby-xz (~> 1.0)
12
12
  thor (~> 0.19)
@@ -15,113 +15,87 @@ GEM
15
15
  remote: http://rubygems.org/
16
16
  specs:
17
17
  ast (2.4.0)
18
- backport (0.3.0)
19
- biodiversity (3.5.1)
20
- gn_uuid (~> 0.5)
21
- parallel (~> 1.12)
22
- treetop (~> 1.6)
23
- unicode_utils (~> 1.4)
18
+ biodiversity (4.1.0)
19
+ ffi (~> 1.11)
24
20
  byebug (10.0.2)
25
- coveralls (0.8.22)
21
+ coveralls (0.8.23)
26
22
  json (>= 1.8, < 3)
27
23
  simplecov (~> 0.16.1)
28
24
  term-ansicolor (~> 1.3)
29
- thor (~> 0.19.4)
25
+ thor (>= 0.19.4, < 2.0)
30
26
  tins (~> 1.6)
31
27
  diff-lcs (1.3)
32
- docile (1.3.1)
28
+ docile (1.3.2)
33
29
  domain_name (0.5.20190701)
34
30
  unf (>= 0.0.5, < 1.0.0)
35
- dwc-archive (1.0.1)
36
- nokogiri (~> 1.8)
37
- parsley-store (~> 0.3)
31
+ dwc-archive (1.1.1)
32
+ biodiversity (~> 4)
33
+ nokogiri (~> 1.10)
34
+ ffi (1.13.1)
38
35
  gn_uuid (0.5.1)
39
36
  htmlentities (4.3.4)
40
37
  http-accept (1.7.0)
41
38
  http-cookie (1.0.3)
42
39
  domain_name (~> 0.5)
43
- jaro_winkler (1.5.2)
44
- json (2.2.0)
45
- kramdown (1.17.0)
46
- mime-types (3.3)
40
+ json (2.3.0)
41
+ mime-types (3.3.1)
47
42
  mime-types-data (~> 3.2015)
48
- mime-types-data (3.2019.1009)
43
+ mime-types-data (3.2020.0512)
49
44
  mini_portile2 (2.4.0)
50
45
  netrc (0.11.0)
51
- nokogiri (1.10.5)
46
+ nokogiri (1.10.9)
52
47
  mini_portile2 (~> 2.4.0)
53
- parallel (1.14.0)
54
- parser (2.6.0.0)
48
+ parallel (1.19.1)
49
+ parser (2.7.1.3)
55
50
  ast (~> 2.4.0)
56
- parsley-store (0.3.6)
57
- biodiversity (~> 3.1)
58
- redis (~> 3.0)
59
- polyglot (0.3.5)
60
- powerpack (0.1.2)
61
- psych (3.1.0)
62
51
  rainbow (3.0.0)
63
- rake (12.3.2)
64
- redis (3.3.5)
52
+ rake (13.0.1)
65
53
  rest-client (2.1.0)
66
54
  http-accept (>= 1.7.0, < 2.0)
67
55
  http-cookie (>= 1.0.2, < 2.0)
68
56
  mime-types (>= 1.16, < 4.0)
69
57
  netrc (~> 0.8)
70
- reverse_markdown (1.1.0)
71
- nokogiri
72
- rspec (3.8.0)
73
- rspec-core (~> 3.8.0)
74
- rspec-expectations (~> 3.8.0)
75
- rspec-mocks (~> 3.8.0)
76
- rspec-core (3.8.0)
77
- rspec-support (~> 3.8.0)
78
- rspec-expectations (3.8.2)
58
+ rexml (3.2.4)
59
+ rspec (3.9.0)
60
+ rspec-core (~> 3.9.0)
61
+ rspec-expectations (~> 3.9.0)
62
+ rspec-mocks (~> 3.9.0)
63
+ rspec-core (3.9.2)
64
+ rspec-support (~> 3.9.3)
65
+ rspec-expectations (3.9.2)
79
66
  diff-lcs (>= 1.2.0, < 2.0)
80
- rspec-support (~> 3.8.0)
81
- rspec-mocks (3.8.0)
67
+ rspec-support (~> 3.9.0)
68
+ rspec-mocks (3.9.1)
82
69
  diff-lcs (>= 1.2.0, < 2.0)
83
- rspec-support (~> 3.8.0)
84
- rspec-support (3.8.0)
85
- rubocop (0.65.0)
86
- jaro_winkler (~> 1.5.1)
70
+ rspec-support (~> 3.9.0)
71
+ rspec-support (3.9.3)
72
+ rubocop (0.84.0)
87
73
  parallel (~> 1.10)
88
- parser (>= 2.5, != 2.5.1.1)
89
- powerpack (~> 0.1)
90
- psych (>= 3.1.0)
74
+ parser (>= 2.7.0.1)
91
75
  rainbow (>= 2.2.2, < 4.0)
76
+ rexml
77
+ rubocop-ast (>= 0.0.3)
92
78
  ruby-progressbar (~> 1.7)
93
- unicode-display_width (~> 1.4.0)
94
- ruby-progressbar (1.10.0)
79
+ unicode-display_width (>= 1.4.0, < 2.0)
80
+ rubocop-ast (0.0.3)
81
+ parser (>= 2.7.0.1)
82
+ ruby-progressbar (1.10.1)
95
83
  ruby-xz (1.0.0)
96
84
  simplecov (0.16.1)
97
85
  docile (~> 1.1)
98
86
  json (>= 1.8, < 3)
99
87
  simplecov-html (~> 0.10.0)
100
88
  simplecov-html (0.10.2)
101
- solargraph (0.31.3)
102
- backport (~> 0.3)
103
- htmlentities (~> 4.3, >= 4.3.4)
104
- jaro_winkler (~> 1.5)
105
- kramdown (~> 1.16)
106
- parser (~> 2.3)
107
- reverse_markdown (~> 1.0, >= 1.0.5)
108
- rubocop (~> 0.52)
109
- thor (~> 0.19, >= 0.19.4)
110
- tilt (~> 2.0)
111
- yard (~> 0.9)
89
+ sync (0.5.0)
112
90
  term-ansicolor (1.7.1)
113
91
  tins (~> 1.0)
114
- thor (0.19.4)
115
- tilt (2.0.9)
116
- tins (1.20.2)
117
- treetop (1.6.10)
118
- polyglot (~> 0.3)
92
+ thor (0.20.3)
93
+ tins (1.25.0)
94
+ sync
119
95
  unf (0.1.4)
120
96
  unf_ext
121
- unf_ext (0.0.7.6)
122
- unicode-display_width (1.4.1)
123
- unicode_utils (1.4.0)
124
- yard (0.9.20)
97
+ unf_ext (0.0.7.7)
98
+ unicode-display_width (1.7.0)
125
99
 
126
100
  PLATFORMS
127
101
  ruby
@@ -131,10 +105,9 @@ DEPENDENCIES
131
105
  byebug (~> 10.0)
132
106
  coveralls (~> 0.8)
133
107
  dwca_hunter!
134
- rake (~> 12.3)
135
- rspec (~> 3.7)
136
- rubocop (~> 0.58)
137
- solargraph (~> 0.23)
108
+ rake (~> 13.0)
109
+ rspec (~> 3.9)
110
+ rubocop (~> 0.84)
138
111
 
139
112
  BUNDLED WITH
140
- 2.0.2
113
+ 2.1.4
@@ -1,4 +1,4 @@
1
- Copyright (c) 2011 Marine Biological Laboratory
1
+ Copyright (c) 2011-2020 Dmitry Mozzherin
2
2
 
3
3
  Permission is hereby granted, free of charge, to any person obtaining
4
4
  a copy of this software and associated documentation files (the
data/README.md CHANGED
@@ -32,7 +32,7 @@ See dwca_hunter/bin/dwca-hunt.rb as a example how to use the code.
32
32
 
33
33
  ## Copyright
34
34
 
35
- Copyright (c) 2011-2016 Dmitry Mozzherin. See LICENSE.txt for further details.
35
+ Copyright (c) 2011-2020 Dmitry Mozzherin. See LICENSE.txt for further details.
36
36
 
37
37
  [code-climate-img]: https://codeclimate.com/badge.png
38
38
  [code-climate]: https://codeclimate.com/github/GlobalNamesArchitecture/dwca_hunter
@@ -6,7 +6,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
6
6
  require "dwca_hunter/version"
7
7
 
8
8
  Gem::Specification.new do |gem|
9
- gem.required_ruby_version = ">= 2.4"
9
+ gem.required_ruby_version = ">= 2.6.6"
10
10
  gem.name = "dwca_hunter"
11
11
  gem.version = DwcaHunter.version
12
12
  gem.license = "MIT"
@@ -26,11 +26,11 @@ Gem::Specification.new do |gem|
26
26
  gem.executables = gem.files.grep(%r{^exe/}) { |f| File.basename(f) }
27
27
  gem.require_paths = ["lib"]
28
28
 
29
- gem.add_dependency "biodiversity", "~> 3.5"
30
- gem.add_dependency "dwc-archive", "~> 1.0"
29
+ gem.add_dependency "biodiversity", "~> 4"
30
+ gem.add_dependency "dwc-archive", "~> 1.1.1"
31
31
  gem.add_dependency "gn_uuid", "~> 0.5"
32
32
  gem.add_dependency "htmlentities", "~> 4.3"
33
- gem.add_dependency "nokogiri", "~> 1.8"
33
+ gem.add_dependency "nokogiri", "~> 1.10"
34
34
  gem.add_dependency "rest-client", "~> 2.0"
35
35
  gem.add_dependency "ruby-xz", "~> 1.0"
36
36
  gem.add_dependency "thor", "~> 0.19"
@@ -38,8 +38,7 @@ Gem::Specification.new do |gem|
38
38
  gem.add_development_dependency "bundler", "~> 2.0"
39
39
  gem.add_development_dependency "byebug", "~> 10.0"
40
40
  gem.add_development_dependency "coveralls", "~> 0.8"
41
- gem.add_development_dependency "rake", "~> 12.3"
42
- gem.add_development_dependency "rspec", "~> 3.7"
43
- gem.add_development_dependency "rubocop", "~> 0.58"
44
- gem.add_development_dependency "solargraph", "~> 0.23"
41
+ gem.add_development_dependency "rake", "~> 13.0"
42
+ gem.add_development_dependency "rspec", "~> 3.9"
43
+ gem.add_development_dependency "rubocop", "~> 0.84"
45
44
  end
@@ -37,9 +37,7 @@ class DwcaHunterCLI < Thor
37
37
  res = []
38
38
  DwcaHunter.resources.each do |resource|
39
39
  r = resource.new
40
- if add_resource?(search, r)
41
- res << { n: nil, command: r.command, resource: r.title }
42
- end
40
+ res << { n: nil, command: r.command, resource: r.title } if add_resource?(search, r)
43
41
  end
44
42
  res.sort_by { |r| r[:command] }.each_with_object([]) do |r, a|
45
43
  r[:n] = a.size + 1
@@ -51,5 +51,36 @@ module DwcaHunter
51
51
  c < Resource
52
52
  end
53
53
  end
54
+
55
+ def normalize_authors(auth)
56
+ reg = Regexp.new(/^([\(]?)(.*?)(([\s,\)][^[:upper:]]*)?$)/)
57
+ auth = auth.gsub(/duPont/, 'du Pont')
58
+ match = reg.match(auth)
59
+ return auth if match.nil?
60
+ a1, a2, a3 = match[1..3]
61
+ a2mod = a2.gsub('&', ',')
62
+ ary2 = a2mod.split(',').map(&:strip)
63
+ a2 = move_initials(ary2) if ary2.size > 1
64
+ "#{a1}#{a2}#{a3}"
65
+ end
66
+
67
+ def move_initials(ary)
68
+ res = []
69
+ ary.each do |a|
70
+ if res.empty?
71
+ res << a
72
+ next
73
+ end
74
+ match = /^([[:upper:]]{1,4})(\sJr)?$/.match(a)
75
+ if !match.nil?
76
+ initialls = match[1].split('').join('. ')
77
+ res[-1] = "#{initialls}. #{res[-1]}#{match[2].to_s}"
78
+ else
79
+ res << a
80
+ end
81
+ end
82
+ res.size == 1 ? res[0] : "#{res[0..-2].join(', ')} & #{res[-1]}"
83
+ end
54
84
  end
55
85
  end
86
+
@@ -0,0 +1,143 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DwcaHunter
4
+ class ResourceAOS < DwcaHunter::Resource
5
+ def initialize(opts = {})
6
+ @command = "aos-birds"
7
+ @title = "American Ornithological Society"
8
+ @url = "http://checklist.americanornithology.org/taxa.csv"
9
+ @UUID = "91d38806-8435-479f-a18d-705e5cb0767c"
10
+ @download_path = File.join(Dir.tmpdir,
11
+ "dwca_hunter",
12
+ "aos",
13
+ "data.csv")
14
+ @synonyms = []
15
+ @names = []
16
+ @vernaculars = []
17
+ @extensions = []
18
+ @synonyms_hash = {}
19
+ @vernaculars_hash = {}
20
+ super(opts)
21
+ end
22
+
23
+ def download
24
+ puts "Downloading csv from remote"
25
+ `curl -s -L #{@url} -o #{@download_path}`
26
+ end
27
+
28
+ def unpack; end
29
+
30
+ def make_dwca
31
+ DwcaHunter.logger_write(object_id, "Extracting data")
32
+ get_names
33
+ generate_dwca
34
+ end
35
+
36
+ private
37
+
38
+ def get_names
39
+ Dir.chdir(@download_dir)
40
+ collect_names
41
+ end
42
+
43
+ def collect_names
44
+ @names_index = {}
45
+ file = CSV.open(File.join(@download_dir, "data.csv"),
46
+ headers: true)
47
+ file.each_with_index do |row, _i|
48
+ taxon_id = row["id"]
49
+ name_string = row["species"]
50
+ kingdom = "Animalia"
51
+ phylum = "Chordata"
52
+ klass = "Aves"
53
+ order = row["order"]
54
+ family = row["family"]
55
+ genus = row["genus"]
56
+ code = "ICZN"
57
+
58
+ @names << {
59
+ taxon_id: taxon_id,
60
+ name_string: name_string,
61
+ kingdom: kingdom,
62
+ phylum: phylum,
63
+ klass: klass,
64
+ order: order,
65
+ family: family,
66
+ genus: genus,
67
+ code: code
68
+ }
69
+ if row["common_name"].to_s != ""
70
+ @vernaculars << {
71
+ taxon_id: taxon_id,
72
+ vern: row["common_name"],
73
+ lang: "en"
74
+ }
75
+ end
76
+ next unless row["french_name"].to_s != ""
77
+
78
+ @vernaculars << {
79
+ taxon_id: taxon_id,
80
+ vern: row["french_name"],
81
+ lang: "fr"
82
+ }
83
+ end
84
+ end
85
+
86
+ def generate_dwca
87
+ DwcaHunter.logger_write(object_id,
88
+ "Creating DarwinCore Archive file")
89
+ @core = [["http://rs.tdwg.org/dwc/terms/taxonID",
90
+ "http://rs.tdwg.org/dwc/terms/scientificName",
91
+ "http://rs.tdwg.org/dwc/terms/kingdom",
92
+ "http://rs.tdwg.org/dwc/terms/phylum",
93
+ "http://rs.tdwg.org/dwc/terms/class",
94
+ "http://rs.tdwg.org/dwc/terms/order",
95
+ "http://rs.tdwg.org/dwc/terms/family",
96
+ "http://rs.tdwg.org/dwc/terms/genus",
97
+ "http://rs.tdwg.org/dwc/terms/nomenclaturalCode"]]
98
+ @names.each do |n|
99
+ @core << [n[:taxon_id], n[:name_string],
100
+ n[:kingdom], n[:phylum], n[:klass], n[:order], n[:family],
101
+ n[:genus], n[:code]]
102
+ end
103
+ @extensions << {
104
+ data: [[
105
+ "http://rs.tdwg.org/dwc/terms/taxonID",
106
+ "http://rs.tdwg.org/dwc/terms/vernacularName",
107
+ "http://purl.org/dc/terms/language"
108
+ ]],
109
+ file_name: "vernacular_names.txt",
110
+ row_type: "http://rs.gbif.org/terms/1.0/VernacularName"
111
+ }
112
+
113
+ @vernaculars.each do |v|
114
+ @extensions[-1][:data] << [v[:taxon_id], v[:vern], v[:lang]]
115
+ end
116
+ @eml = {
117
+ id: @uuid,
118
+ title: @title,
119
+ authors: [
120
+ { first_name: "R. T.",
121
+ last_name: "Chesser" }
122
+ ],
123
+ metadata_providers: [
124
+ { first_name: "Dmitry",
125
+ last_name: "Mozzherin",
126
+ email: "dmozzherin@gmail.com" }
127
+ ],
128
+ abstract: "The American Ornithological Society's (AOS) Checklist is " \
129
+ "the official source on the taxonomy of birds found in North and " \
130
+ "Middle America, including adjacent islands. This list is produced " \
131
+ "by the North American Classification and Nomenclature Committee " \
132
+ "(NACC) of the AOS.\n\n" \
133
+ "Recommended citation: Chesser, R. T., K. J. Burns, C. Cicero, " \
134
+ "J. L. Dunn, A. W. Kratter, I. J. Lovette, P. C. Rasmussen, " \
135
+ "J. V. Remsen, Jr., D. F. Stotz, and K. Winker. 2019. Check-list " \
136
+ "of North American Birds (online). American Ornithological Society. " \
137
+ "http://checklist.aou.org/taxa",
138
+ url: @url
139
+ }
140
+ super
141
+ end
142
+ end
143
+ end