dwca_hunter 0.7.0 → 0.7.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +36 -32
- data/dwca_hunter.gemspec +1 -1
- data/lib/dwca_hunter/resources/index-fungorum.rb +131 -0
- data/lib/dwca_hunter/resources/ion.rb +98 -0
- data/lib/dwca_hunter/resources/itis.rb +1 -1
- data/lib/dwca_hunter/resources/ncbi.rb +1 -1
- data/lib/dwca_hunter/version.rb +1 -1
- metadata +6 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 60327e105c53c226f322e3a7272bdc5747d73fac0124887b024f99e3c39c985b
|
4
|
+
data.tar.gz: '09660f8b5feccfaf4caeaec277db4dc4729a973196a77bb02860947ef55bd272'
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9b0a621f85535f421eef5a8550ce653c4f3483f563c7b19934a76e8f30b0cdf17e7a8c59945ea31455c57a350a14d345993f5fe6b91d656f5eb40317da6b1af9
|
7
|
+
data.tar.gz: 00a54b23a8588e6d304d35bb8756f633fb409777f07908525d257f4b1a23c5956a9683863b491f50e8d772c8b41343f909e6cb252a326d4f2e662f96f37826ed
|
data/Gemfile.lock
CHANGED
@@ -6,7 +6,7 @@ PATH
|
|
6
6
|
dwc-archive (~> 1.1.1)
|
7
7
|
gn_uuid (~> 0.5)
|
8
8
|
htmlentities (~> 4.3)
|
9
|
-
nokogiri (~> 1.
|
9
|
+
nokogiri (~> 1.11)
|
10
10
|
rest-client (~> 2.0)
|
11
11
|
ruby-xz (~> 1.0)
|
12
12
|
thor (~> 0.19)
|
@@ -14,7 +14,7 @@ PATH
|
|
14
14
|
GEM
|
15
15
|
remote: http://rubygems.org/
|
16
16
|
specs:
|
17
|
-
ast (2.4.
|
17
|
+
ast (2.4.1)
|
18
18
|
biodiversity (4.1.0)
|
19
19
|
ffi (~> 1.11)
|
20
20
|
byebug (10.0.2)
|
@@ -24,62 +24,66 @@ GEM
|
|
24
24
|
term-ansicolor (~> 1.3)
|
25
25
|
thor (>= 0.19.4, < 2.0)
|
26
26
|
tins (~> 1.6)
|
27
|
-
diff-lcs (1.
|
28
|
-
docile (1.3.
|
27
|
+
diff-lcs (1.4.4)
|
28
|
+
docile (1.3.4)
|
29
29
|
domain_name (0.5.20190701)
|
30
30
|
unf (>= 0.0.5, < 1.0.0)
|
31
31
|
dwc-archive (1.1.1)
|
32
32
|
biodiversity (~> 4)
|
33
33
|
nokogiri (~> 1.10)
|
34
|
-
ffi (1.
|
34
|
+
ffi (1.14.2)
|
35
35
|
gn_uuid (0.5.1)
|
36
36
|
htmlentities (4.3.4)
|
37
37
|
http-accept (1.7.0)
|
38
38
|
http-cookie (1.0.3)
|
39
39
|
domain_name (~> 0.5)
|
40
|
-
json (2.
|
40
|
+
json (2.5.1)
|
41
41
|
mime-types (3.3.1)
|
42
42
|
mime-types-data (~> 3.2015)
|
43
|
-
mime-types-data (3.2020.
|
44
|
-
mini_portile2 (2.
|
43
|
+
mime-types-data (3.2020.1104)
|
44
|
+
mini_portile2 (2.5.0)
|
45
45
|
netrc (0.11.0)
|
46
|
-
nokogiri (1.
|
47
|
-
mini_portile2 (~> 2.
|
48
|
-
|
49
|
-
|
50
|
-
|
46
|
+
nokogiri (1.11.1)
|
47
|
+
mini_portile2 (~> 2.5.0)
|
48
|
+
racc (~> 1.4)
|
49
|
+
parallel (1.20.1)
|
50
|
+
parser (3.0.0.0)
|
51
|
+
ast (~> 2.4.1)
|
52
|
+
racc (1.5.2)
|
51
53
|
rainbow (3.0.0)
|
52
|
-
rake (13.0.
|
54
|
+
rake (13.0.3)
|
55
|
+
regexp_parser (2.0.3)
|
53
56
|
rest-client (2.1.0)
|
54
57
|
http-accept (>= 1.7.0, < 2.0)
|
55
58
|
http-cookie (>= 1.0.2, < 2.0)
|
56
59
|
mime-types (>= 1.16, < 4.0)
|
57
60
|
netrc (~> 0.8)
|
58
61
|
rexml (3.2.4)
|
59
|
-
rspec (3.
|
60
|
-
rspec-core (~> 3.
|
61
|
-
rspec-expectations (~> 3.
|
62
|
-
rspec-mocks (~> 3.
|
63
|
-
rspec-core (3.
|
64
|
-
rspec-support (~> 3.
|
65
|
-
rspec-expectations (3.
|
62
|
+
rspec (3.10.0)
|
63
|
+
rspec-core (~> 3.10.0)
|
64
|
+
rspec-expectations (~> 3.10.0)
|
65
|
+
rspec-mocks (~> 3.10.0)
|
66
|
+
rspec-core (3.10.1)
|
67
|
+
rspec-support (~> 3.10.0)
|
68
|
+
rspec-expectations (3.10.1)
|
66
69
|
diff-lcs (>= 1.2.0, < 2.0)
|
67
|
-
rspec-support (~> 3.
|
68
|
-
rspec-mocks (3.
|
70
|
+
rspec-support (~> 3.10.0)
|
71
|
+
rspec-mocks (3.10.1)
|
69
72
|
diff-lcs (>= 1.2.0, < 2.0)
|
70
|
-
rspec-support (~> 3.
|
71
|
-
rspec-support (3.
|
72
|
-
rubocop (0.
|
73
|
+
rspec-support (~> 3.10.0)
|
74
|
+
rspec-support (3.10.1)
|
75
|
+
rubocop (0.93.1)
|
73
76
|
parallel (~> 1.10)
|
74
|
-
parser (>= 2.7.
|
77
|
+
parser (>= 2.7.1.5)
|
75
78
|
rainbow (>= 2.2.2, < 4.0)
|
79
|
+
regexp_parser (>= 1.8)
|
76
80
|
rexml
|
77
|
-
rubocop-ast (>= 0.0
|
81
|
+
rubocop-ast (>= 0.6.0)
|
78
82
|
ruby-progressbar (~> 1.7)
|
79
83
|
unicode-display_width (>= 1.4.0, < 2.0)
|
80
|
-
rubocop-ast (
|
81
|
-
parser (>= 2.7.
|
82
|
-
ruby-progressbar (1.
|
84
|
+
rubocop-ast (1.4.0)
|
85
|
+
parser (>= 2.7.1.5)
|
86
|
+
ruby-progressbar (1.11.0)
|
83
87
|
ruby-xz (1.0.0)
|
84
88
|
simplecov (0.16.1)
|
85
89
|
docile (~> 1.1)
|
@@ -90,7 +94,7 @@ GEM
|
|
90
94
|
term-ansicolor (1.7.1)
|
91
95
|
tins (~> 1.0)
|
92
96
|
thor (0.20.3)
|
93
|
-
tins (1.
|
97
|
+
tins (1.26.0)
|
94
98
|
sync
|
95
99
|
unf (0.1.4)
|
96
100
|
unf_ext
|
data/dwca_hunter.gemspec
CHANGED
@@ -30,7 +30,7 @@ Gem::Specification.new do |gem|
|
|
30
30
|
gem.add_dependency "dwc-archive", "~> 1.1.1"
|
31
31
|
gem.add_dependency "gn_uuid", "~> 0.5"
|
32
32
|
gem.add_dependency "htmlentities", "~> 4.3"
|
33
|
-
gem.add_dependency "nokogiri", "~> 1.
|
33
|
+
gem.add_dependency "nokogiri", "~> 1.11"
|
34
34
|
gem.add_dependency "rest-client", "~> 2.0"
|
35
35
|
gem.add_dependency "ruby-xz", "~> 1.0"
|
36
36
|
gem.add_dependency "thor", "~> 0.19"
|
@@ -0,0 +1,131 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module DwcaHunter
|
4
|
+
class ResourceAOS < DwcaHunter::Resource
|
5
|
+
def initialize(opts = {})
|
6
|
+
@command = "index-fungorum"
|
7
|
+
@title = "Index Fungorum (Species Fungorum)"
|
8
|
+
@url = "https://uofi.box.com/shared/static/54l3b7h4q4pwqq4fgqvx42h3d328fl1c.csv"
|
9
|
+
@UUID = "af06816a-0b28-4a09-8219-bd1d63289858"
|
10
|
+
@download_path = File.join(Dir.tmpdir,
|
11
|
+
"dwca_hunter",
|
12
|
+
"index-fungorum",
|
13
|
+
"data.csv")
|
14
|
+
@synonyms = []
|
15
|
+
@names = []
|
16
|
+
@extensions = []
|
17
|
+
@synonyms_hash = {}
|
18
|
+
super(opts)
|
19
|
+
end
|
20
|
+
|
21
|
+
def download
|
22
|
+
puts "Downloading csv from remote"
|
23
|
+
`curl -s -L #{@url} -o #{@download_path}`
|
24
|
+
end
|
25
|
+
|
26
|
+
def unpack; end
|
27
|
+
|
28
|
+
def make_dwca
|
29
|
+
DwcaHunter.logger_write(object_id, "Extracting data")
|
30
|
+
get_names
|
31
|
+
generate_dwca
|
32
|
+
end
|
33
|
+
|
34
|
+
private
|
35
|
+
|
36
|
+
def get_names
|
37
|
+
Dir.chdir(@download_dir)
|
38
|
+
collect_names
|
39
|
+
end
|
40
|
+
|
41
|
+
def collect_names
|
42
|
+
@names_index = {}
|
43
|
+
file = CSV.open(File.join(@download_dir, "data.csv"),
|
44
|
+
headers: true)
|
45
|
+
file.each_with_index do |row, _i|
|
46
|
+
taxon_id = row["RECORD NUMBER"]
|
47
|
+
current_id = row["CURRENT NAME RECORD NUMBER"]
|
48
|
+
name_string = row["NAME OF FUNGUS"]
|
49
|
+
authors = row["AUTHORS"]
|
50
|
+
year = row["YEAR OF PUBLICATION"]
|
51
|
+
kingdom = row["Kingdom name"]
|
52
|
+
phylum = row["Phylum name"]
|
53
|
+
sub_phylum = row["Subphylum name"]
|
54
|
+
klass = row["Class name"]
|
55
|
+
subklass = row["Subclass name"]
|
56
|
+
order = row["Order name"]
|
57
|
+
family = row["Family name"]
|
58
|
+
code = "ICN"
|
59
|
+
|
60
|
+
@names << {
|
61
|
+
taxon_id: taxon_id,
|
62
|
+
name_string: "#{name_string} #{authors} #{year}",
|
63
|
+
current_id: current_id,
|
64
|
+
kingdom: kingdom,
|
65
|
+
phylum: phylum,
|
66
|
+
klass: klass,
|
67
|
+
order: order,
|
68
|
+
family: family,
|
69
|
+
code: code
|
70
|
+
}
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def generate_dwca
|
75
|
+
DwcaHunter.logger_write(object_id,
|
76
|
+
"Creating DarwinCore Archive file")
|
77
|
+
@core = [["http://rs.tdwg.org/dwc/terms/taxonID",
|
78
|
+
"http://rs.tdwg.org/dwc/terms/scientificName",
|
79
|
+
"http://rs.tdwg.org/dwc/terms/acceptedNameUsageID",
|
80
|
+
"http://rs.tdwg.org/dwc/terms/kingdom",
|
81
|
+
"http://rs.tdwg.org/dwc/terms/phylum",
|
82
|
+
"http://rs.tdwg.org/dwc/terms/class",
|
83
|
+
"http://rs.tdwg.org/dwc/terms/order",
|
84
|
+
"http://rs.tdwg.org/dwc/terms/family",
|
85
|
+
"http://rs.tdwg.org/dwc/terms/nomenclaturalCode"]]
|
86
|
+
@names.each do |n|
|
87
|
+
@core << [n[:taxon_id], n[:name_string], n[:current_id],
|
88
|
+
n[:kingdom], n[:phylum], n[:klass], n[:order], n[:family],
|
89
|
+
n[:code]]
|
90
|
+
end
|
91
|
+
|
92
|
+
@eml = {
|
93
|
+
id: @uuid,
|
94
|
+
title: @title,
|
95
|
+
authors: [
|
96
|
+
{ first_name: "Paul",
|
97
|
+
last_name: "Kirk" }
|
98
|
+
],
|
99
|
+
metadata_providers: [
|
100
|
+
{ first_name: "Dmitry",
|
101
|
+
last_name: "Mozzherin",
|
102
|
+
email: "dmozzherin@gmail.com" }
|
103
|
+
],
|
104
|
+
abstract: "The Index Fungorum, the global fungal nomenclator " \
|
105
|
+
"coordinated and supported by the Index Fungorum Partnership, " \
|
106
|
+
"contains names of fungi (including yeasts, lichens, chromistan " \
|
107
|
+
"fungal analogues, protozoan fungal analogues and fossil forms) " \
|
108
|
+
"at all ranks.\n\n" \
|
109
|
+
"As a result of changes to the ICN (previously ICBN) relating to " \
|
110
|
+
"registration of names and following the lead taken by MycoBank, " \
|
111
|
+
"Index Fungorum now provides a mechanism to register names of " \
|
112
|
+
"new taxa, new names, new combinations and new typifications — no " \
|
113
|
+
"login is required. Names registered at Index Fungorum can be " \
|
114
|
+
"published immediately through the Index Fungorum e-Publication " \
|
115
|
+
"facility — an authorized login is required for this.\n\n" \
|
116
|
+
"Species Fungorum is currently an RBG Kew coordinated initiative " \
|
117
|
+
"to compile a global checklist of the fungi. You may search " \
|
118
|
+
"systematically defined and taxonomically complete datasets - " \
|
119
|
+
"global species databases - or the entire Species Fungorum. " \
|
120
|
+
"Species Fungorum contributes the fungal component to the Species " \
|
121
|
+
"2000 project and, in partnership with ITIS, to the Catalogue " \
|
122
|
+
"of Life (currently used in the GBIF and EoL portal); for more " \
|
123
|
+
"information regarding these global initiative visit their " \
|
124
|
+
"websites. Please contact Paul Kirk if you you would like to " \
|
125
|
+
"contribute to Species Fungorum.",
|
126
|
+
url: @url
|
127
|
+
}
|
128
|
+
super
|
129
|
+
end
|
130
|
+
end
|
131
|
+
end
|
@@ -0,0 +1,98 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module DwcaHunter
|
4
|
+
class ResourceION < DwcaHunter::Resource
|
5
|
+
def initialize(opts = {})
|
6
|
+
@command = "ion"
|
7
|
+
@title = "Index to Organism Names"
|
8
|
+
@url = "https://uofi.box.com/shared/static/tklh8i6q2kb33g6ki33k6s3is06lo9np.gz"
|
9
|
+
@UUID = "1137dfa3-5b8c-487d-b497-dc0938605864"
|
10
|
+
@download_path = File.join(Dir.tmpdir,
|
11
|
+
"dwca_hunter",
|
12
|
+
"ion",
|
13
|
+
"data.tar.gz")
|
14
|
+
@names = []
|
15
|
+
@extensions = []
|
16
|
+
super(opts)
|
17
|
+
end
|
18
|
+
|
19
|
+
def download
|
20
|
+
puts "Downloading cached verion of the file. Ask Rod Page to make new."
|
21
|
+
`curl -s -L #{@url} -o #{@download_path}`
|
22
|
+
end
|
23
|
+
|
24
|
+
def unpack
|
25
|
+
unpack_tar
|
26
|
+
end
|
27
|
+
|
28
|
+
def make_dwca
|
29
|
+
DwcaHunter.logger_write(object_id, "Extracting data")
|
30
|
+
get_names
|
31
|
+
generate_dwca
|
32
|
+
end
|
33
|
+
|
34
|
+
private
|
35
|
+
|
36
|
+
def get_names
|
37
|
+
Dir.chdir(@download_dir)
|
38
|
+
collect_names
|
39
|
+
end
|
40
|
+
|
41
|
+
def collect_names
|
42
|
+
file = CSV.open(File.join(@download_dir, "ion.tsv"),
|
43
|
+
headers: true, col_sep: "\t", quote_char: "щ")
|
44
|
+
file.each_with_index do |row, i|
|
45
|
+
id = row["id"]
|
46
|
+
name_string = row["nameComplete"]
|
47
|
+
auth = row["taxonAuthor"]
|
48
|
+
|
49
|
+
@names << { taxon_id: id,
|
50
|
+
name_string: name_string,
|
51
|
+
auth: auth }
|
52
|
+
|
53
|
+
puts "Processed %s names" % i if i % 10_000 == 0
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def generate_dwca
|
58
|
+
DwcaHunter.logger_write(object_id,
|
59
|
+
"Creating DarwinCore Archive file")
|
60
|
+
@core = [["http://rs.tdwg.org/dwc/terms/taxonID",
|
61
|
+
"http://rs.tdwg.org/dwc/terms/scientificName",
|
62
|
+
"http://rs.tdwg.org/dwc/terms/scientificNameAuthorship"]]
|
63
|
+
@names.each do |n|
|
64
|
+
@core << [n[:taxon_id], n[:name_string], n[:auth]]
|
65
|
+
end
|
66
|
+
|
67
|
+
@eml = {
|
68
|
+
id: @uuid,
|
69
|
+
title: @title,
|
70
|
+
authors: [
|
71
|
+
{ first_name: "Nigel",
|
72
|
+
last_name: "Robinson",
|
73
|
+
email: "nigel.robinson@thomsonreuters.com" }
|
74
|
+
],
|
75
|
+
metadata_providers: [
|
76
|
+
{ first_name: "Dmitry",
|
77
|
+
last_name: "Mozzherin",
|
78
|
+
email: "dmozzherin@gmail.com" }
|
79
|
+
],
|
80
|
+
abstract: "ION contains millions of animal names, both fossil and " \
|
81
|
+
"recent, at all taxonomic ranks, reported from the scientific " \
|
82
|
+
"literature. (Bacteria, plant and virus names will be added soon)." \
|
83
|
+
"\n\n" \
|
84
|
+
"These names are derived from premier Clarivate databases: " \
|
85
|
+
"Zoological Record®, BIOSIS Previews®, and Biological Abstracts®. " \
|
86
|
+
"All names are tied to at least one published article. Together, " \
|
87
|
+
"these resources cover every aspect of the life sciences - " \
|
88
|
+
"providing names from over 30 million scientific records, " \
|
89
|
+
"including approximately ,000 international journals, patents, " \
|
90
|
+
"books, and conference proceedings. They provide a powerful " \
|
91
|
+
"foundation for the most complete collection of organism names " \
|
92
|
+
"available today.",
|
93
|
+
url: @url
|
94
|
+
}
|
95
|
+
super
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
@@ -4,7 +4,7 @@ module DwcaHunter
|
|
4
4
|
class ResourceITIS < DwcaHunter::Resource
|
5
5
|
def initialize(opts = {})
|
6
6
|
@command = "itis"
|
7
|
-
@title = "
|
7
|
+
@title = "Integrated Taxonomic Information SystemITIS"
|
8
8
|
@url = "https://www.itis.gov/downloads/itisMySQLTables.tar.gz"
|
9
9
|
@uuid = "5d066e84-e512-4a2f-875c-0a605d3d9f35"
|
10
10
|
@download_path = File.join(Dir.tmpdir,
|
@@ -4,7 +4,7 @@ module DwcaHunter
|
|
4
4
|
class ResourceNCBI < DwcaHunter::Resource
|
5
5
|
def initialize(opts = {})
|
6
6
|
@command = "ncbi"
|
7
|
-
@title = "
|
7
|
+
@title = "National Center for Biotechnology Information"
|
8
8
|
@url = "ftp://ftp.ncbi.nih.gov/pub/taxonomy/taxdump.tar.gz"
|
9
9
|
@uuid = "97d7633b-5f79-4307-a397-3c29402d9311"
|
10
10
|
@download_path = File.join(Dir.tmpdir,
|
data/lib/dwca_hunter/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dwca_hunter
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.7.
|
4
|
+
version: 0.7.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dmitry Mozzherin
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-01-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: biodiversity
|
@@ -72,14 +72,14 @@ dependencies:
|
|
72
72
|
requirements:
|
73
73
|
- - "~>"
|
74
74
|
- !ruby/object:Gem::Version
|
75
|
-
version: '1.
|
75
|
+
version: '1.11'
|
76
76
|
type: :runtime
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
80
|
- - "~>"
|
81
81
|
- !ruby/object:Gem::Version
|
82
|
-
version: '1.
|
82
|
+
version: '1.11'
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
84
|
name: rest-client
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
@@ -242,7 +242,9 @@ files:
|
|
242
242
|
- lib/dwca_hunter/resources/freebase.rb
|
243
243
|
- lib/dwca_hunter/resources/gnub.rb
|
244
244
|
- lib/dwca_hunter/resources/how-moore-birds.rb
|
245
|
+
- lib/dwca_hunter/resources/index-fungorum.rb
|
245
246
|
- lib/dwca_hunter/resources/ioc_word_bird.rb
|
247
|
+
- lib/dwca_hunter/resources/ion.rb
|
246
248
|
- lib/dwca_hunter/resources/ipni.rb
|
247
249
|
- lib/dwca_hunter/resources/itis.rb
|
248
250
|
- lib/dwca_hunter/resources/mammal_divdb.rb
|