dwca_hunter 0.7.0 → 0.7.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 76fb831d8f9d33d8cfa32221ce4a5a0e82ee8090ed427c6d2eebc96e267b8151
4
- data.tar.gz: 33c8040ec605718963ee46eef4a2a4c27770ce3cfb53baf7486047a3d0528410
3
+ metadata.gz: 60327e105c53c226f322e3a7272bdc5747d73fac0124887b024f99e3c39c985b
4
+ data.tar.gz: '09660f8b5feccfaf4caeaec277db4dc4729a973196a77bb02860947ef55bd272'
5
5
  SHA512:
6
- metadata.gz: 380a22edce284de5f1e836b8b4ae236b8d43fecb7b72032d4e4d3c071d77da05bb87498f6e2f868b071f935983e64861cd8ce549d6b9ed8d672b8ab7d53da32e
7
- data.tar.gz: 97a397e0fc24f5705e46287178f4dd1560621995a19b85007609eba4819127b0a98c49a9dd9ea2d946ead54bf5c4f17e2d85cea738f3c9fdf7ba3de3a5c92f93
6
+ metadata.gz: 9b0a621f85535f421eef5a8550ce653c4f3483f563c7b19934a76e8f30b0cdf17e7a8c59945ea31455c57a350a14d345993f5fe6b91d656f5eb40317da6b1af9
7
+ data.tar.gz: 00a54b23a8588e6d304d35bb8756f633fb409777f07908525d257f4b1a23c5956a9683863b491f50e8d772c8b41343f909e6cb252a326d4f2e662f96f37826ed
@@ -6,7 +6,7 @@ PATH
6
6
  dwc-archive (~> 1.1.1)
7
7
  gn_uuid (~> 0.5)
8
8
  htmlentities (~> 4.3)
9
- nokogiri (~> 1.10)
9
+ nokogiri (~> 1.11)
10
10
  rest-client (~> 2.0)
11
11
  ruby-xz (~> 1.0)
12
12
  thor (~> 0.19)
@@ -14,7 +14,7 @@ PATH
14
14
  GEM
15
15
  remote: http://rubygems.org/
16
16
  specs:
17
- ast (2.4.0)
17
+ ast (2.4.1)
18
18
  biodiversity (4.1.0)
19
19
  ffi (~> 1.11)
20
20
  byebug (10.0.2)
@@ -24,62 +24,66 @@ GEM
24
24
  term-ansicolor (~> 1.3)
25
25
  thor (>= 0.19.4, < 2.0)
26
26
  tins (~> 1.6)
27
- diff-lcs (1.3)
28
- docile (1.3.2)
27
+ diff-lcs (1.4.4)
28
+ docile (1.3.4)
29
29
  domain_name (0.5.20190701)
30
30
  unf (>= 0.0.5, < 1.0.0)
31
31
  dwc-archive (1.1.1)
32
32
  biodiversity (~> 4)
33
33
  nokogiri (~> 1.10)
34
- ffi (1.13.1)
34
+ ffi (1.14.2)
35
35
  gn_uuid (0.5.1)
36
36
  htmlentities (4.3.4)
37
37
  http-accept (1.7.0)
38
38
  http-cookie (1.0.3)
39
39
  domain_name (~> 0.5)
40
- json (2.3.0)
40
+ json (2.5.1)
41
41
  mime-types (3.3.1)
42
42
  mime-types-data (~> 3.2015)
43
- mime-types-data (3.2020.0512)
44
- mini_portile2 (2.4.0)
43
+ mime-types-data (3.2020.1104)
44
+ mini_portile2 (2.5.0)
45
45
  netrc (0.11.0)
46
- nokogiri (1.10.9)
47
- mini_portile2 (~> 2.4.0)
48
- parallel (1.19.1)
49
- parser (2.7.1.3)
50
- ast (~> 2.4.0)
46
+ nokogiri (1.11.1)
47
+ mini_portile2 (~> 2.5.0)
48
+ racc (~> 1.4)
49
+ parallel (1.20.1)
50
+ parser (3.0.0.0)
51
+ ast (~> 2.4.1)
52
+ racc (1.5.2)
51
53
  rainbow (3.0.0)
52
- rake (13.0.1)
54
+ rake (13.0.3)
55
+ regexp_parser (2.0.3)
53
56
  rest-client (2.1.0)
54
57
  http-accept (>= 1.7.0, < 2.0)
55
58
  http-cookie (>= 1.0.2, < 2.0)
56
59
  mime-types (>= 1.16, < 4.0)
57
60
  netrc (~> 0.8)
58
61
  rexml (3.2.4)
59
- rspec (3.9.0)
60
- rspec-core (~> 3.9.0)
61
- rspec-expectations (~> 3.9.0)
62
- rspec-mocks (~> 3.9.0)
63
- rspec-core (3.9.2)
64
- rspec-support (~> 3.9.3)
65
- rspec-expectations (3.9.2)
62
+ rspec (3.10.0)
63
+ rspec-core (~> 3.10.0)
64
+ rspec-expectations (~> 3.10.0)
65
+ rspec-mocks (~> 3.10.0)
66
+ rspec-core (3.10.1)
67
+ rspec-support (~> 3.10.0)
68
+ rspec-expectations (3.10.1)
66
69
  diff-lcs (>= 1.2.0, < 2.0)
67
- rspec-support (~> 3.9.0)
68
- rspec-mocks (3.9.1)
70
+ rspec-support (~> 3.10.0)
71
+ rspec-mocks (3.10.1)
69
72
  diff-lcs (>= 1.2.0, < 2.0)
70
- rspec-support (~> 3.9.0)
71
- rspec-support (3.9.3)
72
- rubocop (0.84.0)
73
+ rspec-support (~> 3.10.0)
74
+ rspec-support (3.10.1)
75
+ rubocop (0.93.1)
73
76
  parallel (~> 1.10)
74
- parser (>= 2.7.0.1)
77
+ parser (>= 2.7.1.5)
75
78
  rainbow (>= 2.2.2, < 4.0)
79
+ regexp_parser (>= 1.8)
76
80
  rexml
77
- rubocop-ast (>= 0.0.3)
81
+ rubocop-ast (>= 0.6.0)
78
82
  ruby-progressbar (~> 1.7)
79
83
  unicode-display_width (>= 1.4.0, < 2.0)
80
- rubocop-ast (0.0.3)
81
- parser (>= 2.7.0.1)
82
- ruby-progressbar (1.10.1)
84
+ rubocop-ast (1.4.0)
85
+ parser (>= 2.7.1.5)
86
+ ruby-progressbar (1.11.0)
83
87
  ruby-xz (1.0.0)
84
88
  simplecov (0.16.1)
85
89
  docile (~> 1.1)
@@ -90,7 +94,7 @@ GEM
90
94
  term-ansicolor (1.7.1)
91
95
  tins (~> 1.0)
92
96
  thor (0.20.3)
93
- tins (1.25.0)
97
+ tins (1.26.0)
94
98
  sync
95
99
  unf (0.1.4)
96
100
  unf_ext
@@ -30,7 +30,7 @@ Gem::Specification.new do |gem|
30
30
  gem.add_dependency "dwc-archive", "~> 1.1.1"
31
31
  gem.add_dependency "gn_uuid", "~> 0.5"
32
32
  gem.add_dependency "htmlentities", "~> 4.3"
33
- gem.add_dependency "nokogiri", "~> 1.10"
33
+ gem.add_dependency "nokogiri", "~> 1.11"
34
34
  gem.add_dependency "rest-client", "~> 2.0"
35
35
  gem.add_dependency "ruby-xz", "~> 1.0"
36
36
  gem.add_dependency "thor", "~> 0.19"
@@ -0,0 +1,131 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DwcaHunter
4
+ class ResourceAOS < DwcaHunter::Resource
5
+ def initialize(opts = {})
6
+ @command = "index-fungorum"
7
+ @title = "Index Fungorum (Species Fungorum)"
8
+ @url = "https://uofi.box.com/shared/static/54l3b7h4q4pwqq4fgqvx42h3d328fl1c.csv"
9
+ @UUID = "af06816a-0b28-4a09-8219-bd1d63289858"
10
+ @download_path = File.join(Dir.tmpdir,
11
+ "dwca_hunter",
12
+ "index-fungorum",
13
+ "data.csv")
14
+ @synonyms = []
15
+ @names = []
16
+ @extensions = []
17
+ @synonyms_hash = {}
18
+ super(opts)
19
+ end
20
+
21
+ def download
22
+ puts "Downloading csv from remote"
23
+ `curl -s -L #{@url} -o #{@download_path}`
24
+ end
25
+
26
+ def unpack; end
27
+
28
+ def make_dwca
29
+ DwcaHunter.logger_write(object_id, "Extracting data")
30
+ get_names
31
+ generate_dwca
32
+ end
33
+
34
+ private
35
+
36
+ def get_names
37
+ Dir.chdir(@download_dir)
38
+ collect_names
39
+ end
40
+
41
+ def collect_names
42
+ @names_index = {}
43
+ file = CSV.open(File.join(@download_dir, "data.csv"),
44
+ headers: true)
45
+ file.each_with_index do |row, _i|
46
+ taxon_id = row["RECORD NUMBER"]
47
+ current_id = row["CURRENT NAME RECORD NUMBER"]
48
+ name_string = row["NAME OF FUNGUS"]
49
+ authors = row["AUTHORS"]
50
+ year = row["YEAR OF PUBLICATION"]
51
+ kingdom = row["Kingdom name"]
52
+ phylum = row["Phylum name"]
53
+ sub_phylum = row["Subphylum name"]
54
+ klass = row["Class name"]
55
+ subklass = row["Subclass name"]
56
+ order = row["Order name"]
57
+ family = row["Family name"]
58
+ code = "ICN"
59
+
60
+ @names << {
61
+ taxon_id: taxon_id,
62
+ name_string: "#{name_string} #{authors} #{year}",
63
+ current_id: current_id,
64
+ kingdom: kingdom,
65
+ phylum: phylum,
66
+ klass: klass,
67
+ order: order,
68
+ family: family,
69
+ code: code
70
+ }
71
+ end
72
+ end
73
+
74
+ def generate_dwca
75
+ DwcaHunter.logger_write(object_id,
76
+ "Creating DarwinCore Archive file")
77
+ @core = [["http://rs.tdwg.org/dwc/terms/taxonID",
78
+ "http://rs.tdwg.org/dwc/terms/scientificName",
79
+ "http://rs.tdwg.org/dwc/terms/acceptedNameUsageID",
80
+ "http://rs.tdwg.org/dwc/terms/kingdom",
81
+ "http://rs.tdwg.org/dwc/terms/phylum",
82
+ "http://rs.tdwg.org/dwc/terms/class",
83
+ "http://rs.tdwg.org/dwc/terms/order",
84
+ "http://rs.tdwg.org/dwc/terms/family",
85
+ "http://rs.tdwg.org/dwc/terms/nomenclaturalCode"]]
86
+ @names.each do |n|
87
+ @core << [n[:taxon_id], n[:name_string], n[:current_id],
88
+ n[:kingdom], n[:phylum], n[:klass], n[:order], n[:family],
89
+ n[:code]]
90
+ end
91
+
92
+ @eml = {
93
+ id: @uuid,
94
+ title: @title,
95
+ authors: [
96
+ { first_name: "Paul",
97
+ last_name: "Kirk" }
98
+ ],
99
+ metadata_providers: [
100
+ { first_name: "Dmitry",
101
+ last_name: "Mozzherin",
102
+ email: "dmozzherin@gmail.com" }
103
+ ],
104
+ abstract: "The Index Fungorum, the global fungal nomenclator " \
105
+ "coordinated and supported by the Index Fungorum Partnership, " \
106
+ "contains names of fungi (including yeasts, lichens, chromistan " \
107
+ "fungal analogues, protozoan fungal analogues and fossil forms) " \
108
+ "at all ranks.\n\n" \
109
+ "As a result of changes to the ICN (previously ICBN) relating to " \
110
+ "registration of names and following the lead taken by MycoBank, " \
111
+ "Index Fungorum now provides a mechanism to register names of " \
112
+ "new taxa, new names, new combinations and new typifications — no " \
113
+ "login is required. Names registered at Index Fungorum can be " \
114
+ "published immediately through the Index Fungorum e-Publication " \
115
+ "facility — an authorized login is required for this.\n\n" \
116
+ "Species Fungorum is currently an RBG Kew coordinated initiative " \
117
+ "to compile a global checklist of the fungi. You may search " \
118
+ "systematically defined and taxonomically complete datasets - " \
119
+ "global species databases - or the entire Species Fungorum. " \
120
+ "Species Fungorum contributes the fungal component to the Species " \
121
+ "2000 project and, in partnership with ITIS, to the Catalogue " \
122
+ "of Life (currently used in the GBIF and EoL portal); for more " \
123
+ "information regarding these global initiative visit their " \
124
+ "websites. Please contact Paul Kirk if you you would like to " \
125
+ "contribute to Species Fungorum.",
126
+ url: @url
127
+ }
128
+ super
129
+ end
130
+ end
131
+ end
@@ -0,0 +1,98 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DwcaHunter
4
+ class ResourceION < DwcaHunter::Resource
5
+ def initialize(opts = {})
6
+ @command = "ion"
7
+ @title = "Index to Organism Names"
8
+ @url = "https://uofi.box.com/shared/static/tklh8i6q2kb33g6ki33k6s3is06lo9np.gz"
9
+ @UUID = "1137dfa3-5b8c-487d-b497-dc0938605864"
10
+ @download_path = File.join(Dir.tmpdir,
11
+ "dwca_hunter",
12
+ "ion",
13
+ "data.tar.gz")
14
+ @names = []
15
+ @extensions = []
16
+ super(opts)
17
+ end
18
+
19
+ def download
20
+ puts "Downloading cached verion of the file. Ask Rod Page to make new."
21
+ `curl -s -L #{@url} -o #{@download_path}`
22
+ end
23
+
24
+ def unpack
25
+ unpack_tar
26
+ end
27
+
28
+ def make_dwca
29
+ DwcaHunter.logger_write(object_id, "Extracting data")
30
+ get_names
31
+ generate_dwca
32
+ end
33
+
34
+ private
35
+
36
+ def get_names
37
+ Dir.chdir(@download_dir)
38
+ collect_names
39
+ end
40
+
41
+ def collect_names
42
+ file = CSV.open(File.join(@download_dir, "ion.tsv"),
43
+ headers: true, col_sep: "\t", quote_char: "щ")
44
+ file.each_with_index do |row, i|
45
+ id = row["id"]
46
+ name_string = row["nameComplete"]
47
+ auth = row["taxonAuthor"]
48
+
49
+ @names << { taxon_id: id,
50
+ name_string: name_string,
51
+ auth: auth }
52
+
53
+ puts "Processed %s names" % i if i % 10_000 == 0
54
+ end
55
+ end
56
+
57
+ def generate_dwca
58
+ DwcaHunter.logger_write(object_id,
59
+ "Creating DarwinCore Archive file")
60
+ @core = [["http://rs.tdwg.org/dwc/terms/taxonID",
61
+ "http://rs.tdwg.org/dwc/terms/scientificName",
62
+ "http://rs.tdwg.org/dwc/terms/scientificNameAuthorship"]]
63
+ @names.each do |n|
64
+ @core << [n[:taxon_id], n[:name_string], n[:auth]]
65
+ end
66
+
67
+ @eml = {
68
+ id: @uuid,
69
+ title: @title,
70
+ authors: [
71
+ { first_name: "Nigel",
72
+ last_name: "Robinson",
73
+ email: "nigel.robinson@thomsonreuters.com" }
74
+ ],
75
+ metadata_providers: [
76
+ { first_name: "Dmitry",
77
+ last_name: "Mozzherin",
78
+ email: "dmozzherin@gmail.com" }
79
+ ],
80
+ abstract: "ION contains millions of animal names, both fossil and " \
81
+ "recent, at all taxonomic ranks, reported from the scientific " \
82
+ "literature. (Bacteria, plant and virus names will be added soon)." \
83
+ "\n\n" \
84
+ "These names are derived from premier Clarivate databases: " \
85
+ "Zoological Record®, BIOSIS Previews®, and Biological Abstracts®. " \
86
+ "All names are tied to at least one published article. Together, " \
87
+ "these resources cover every aspect of the life sciences - " \
88
+ "providing names from over 30 million scientific records, " \
89
+ "including approximately ,000 international journals, patents, " \
90
+ "books, and conference proceedings. They provide a powerful " \
91
+ "foundation for the most complete collection of organism names " \
92
+ "available today.",
93
+ url: @url
94
+ }
95
+ super
96
+ end
97
+ end
98
+ end
@@ -4,7 +4,7 @@ module DwcaHunter
4
4
  class ResourceITIS < DwcaHunter::Resource
5
5
  def initialize(opts = {})
6
6
  @command = "itis"
7
- @title = "ITIS"
7
+ @title = "Integrated Taxonomic Information SystemITIS"
8
8
  @url = "https://www.itis.gov/downloads/itisMySQLTables.tar.gz"
9
9
  @uuid = "5d066e84-e512-4a2f-875c-0a605d3d9f35"
10
10
  @download_path = File.join(Dir.tmpdir,
@@ -4,7 +4,7 @@ module DwcaHunter
4
4
  class ResourceNCBI < DwcaHunter::Resource
5
5
  def initialize(opts = {})
6
6
  @command = "ncbi"
7
- @title = "NCBI"
7
+ @title = "National Center for Biotechnology Information"
8
8
  @url = "ftp://ftp.ncbi.nih.gov/pub/taxonomy/taxdump.tar.gz"
9
9
  @uuid = "97d7633b-5f79-4307-a397-3c29402d9311"
10
10
  @download_path = File.join(Dir.tmpdir,
@@ -1,5 +1,5 @@
1
1
  module DwcaHunter
2
- VERSION = "0.7.0"
2
+ VERSION = "0.7.1"
3
3
 
4
4
  def self.version
5
5
  VERSION
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dwca_hunter
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.0
4
+ version: 0.7.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dmitry Mozzherin
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-06-15 00:00:00.000000000 Z
11
+ date: 2021-01-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: biodiversity
@@ -72,14 +72,14 @@ dependencies:
72
72
  requirements:
73
73
  - - "~>"
74
74
  - !ruby/object:Gem::Version
75
- version: '1.10'
75
+ version: '1.11'
76
76
  type: :runtime
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
80
  - - "~>"
81
81
  - !ruby/object:Gem::Version
82
- version: '1.10'
82
+ version: '1.11'
83
83
  - !ruby/object:Gem::Dependency
84
84
  name: rest-client
85
85
  requirement: !ruby/object:Gem::Requirement
@@ -242,7 +242,9 @@ files:
242
242
  - lib/dwca_hunter/resources/freebase.rb
243
243
  - lib/dwca_hunter/resources/gnub.rb
244
244
  - lib/dwca_hunter/resources/how-moore-birds.rb
245
+ - lib/dwca_hunter/resources/index-fungorum.rb
245
246
  - lib/dwca_hunter/resources/ioc_word_bird.rb
247
+ - lib/dwca_hunter/resources/ion.rb
246
248
  - lib/dwca_hunter/resources/ipni.rb
247
249
  - lib/dwca_hunter/resources/itis.rb
248
250
  - lib/dwca_hunter/resources/mammal_divdb.rb