dwc-archive 0.9.5 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +31 -0
- data/.rspec +3 -0
- data/.rubocop.yml +23 -0
- data/.ruby-version +1 -0
- data/.travis.yml +5 -4
- data/CHANGELOG +17 -5
- data/Gemfile +3 -15
- data/LICENSE +1 -1
- data/README.md +143 -111
- data/Rakefile +13 -49
- data/dwc-archive.gemspec +37 -0
- data/features/step_definitions/dwc-creator_steps.rb +5 -5
- data/features/step_definitions/dwc-reader_steps.rb +47 -28
- data/features/support/env.rb +1 -1
- data/lib/dwc_archive.rb +121 -0
- data/lib/dwc_archive/archive.rb +59 -0
- data/lib/dwc_archive/classification_normalizer.rb +394 -0
- data/lib/dwc_archive/core.rb +25 -0
- data/lib/{dwc-archive → dwc_archive}/errors.rb +2 -0
- data/lib/dwc_archive/expander.rb +85 -0
- data/lib/{dwc-archive → dwc_archive}/extension.rb +5 -3
- data/lib/dwc_archive/generator.rb +90 -0
- data/lib/dwc_archive/generator_eml_xml.rb +116 -0
- data/lib/dwc_archive/generator_meta_xml.rb +72 -0
- data/lib/dwc_archive/gnub_taxon.rb +14 -0
- data/lib/dwc_archive/ingester.rb +106 -0
- data/lib/dwc_archive/metadata.rb +56 -0
- data/lib/dwc_archive/taxon_normalized.rb +23 -0
- data/lib/dwc_archive/version.rb +6 -0
- data/lib/dwc_archive/xml_reader.rb +89 -0
- data/spec/files/file with characters(3).gz b/data/spec/files/file with → characters(3).tar.gz +0 -0
- data/spec/files/generator_eml.xml +47 -0
- data/spec/files/generator_meta.xml +19 -0
- data/spec/files/gnub.tar.gz +0 -0
- data/spec/lib/classification_normalizer_spec.rb +214 -0
- data/spec/lib/core_spec.rb +100 -0
- data/spec/lib/darwin_core_spec.rb +249 -0
- data/spec/lib/generator_eml_xml_spec.rb +22 -0
- data/spec/lib/generator_meta_xml_spec.rb +22 -0
- data/spec/lib/generator_spec.rb +124 -0
- data/spec/lib/gnub_taxon_spec.rb +32 -0
- data/spec/lib/metadata_spec.rb +89 -0
- data/spec/lib/taxon_normalized_spec.rb +142 -0
- data/spec/lib/xml_reader_spec.rb +11 -11
- data/spec/spec_helper.rb +78 -7
- metadata +181 -92
- data/.rvmrc +0 -1
- data/Gemfile.lock +0 -155
- data/VERSION +0 -1
- data/lib/dwc-archive.rb +0 -95
- data/lib/dwc-archive/.expander.rb.swo +0 -0
- data/lib/dwc-archive/archive.rb +0 -37
- data/lib/dwc-archive/classification_normalizer.rb +0 -332
- data/lib/dwc-archive/core.rb +0 -17
- data/lib/dwc-archive/expander.rb +0 -80
- data/lib/dwc-archive/generator.rb +0 -75
- data/lib/dwc-archive/generator_eml_xml.rb +0 -84
- data/lib/dwc-archive/generator_meta_xml.rb +0 -50
- data/lib/dwc-archive/ingester.rb +0 -101
- data/lib/dwc-archive/metadata.rb +0 -42
- data/lib/dwc-archive/utf_regex_ruby18.rb +0 -10
- data/lib/dwc-archive/xml_reader.rb +0 -64
- data/spec/lib/dwc-archive_spec.rb +0 -236
- data/spec/spec.opts +0 -1
@@ -0,0 +1,56 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class DarwinCore
|
4
|
+
# Represents data from EML (Ecological Metadata Language) file
|
5
|
+
class Metadata
|
6
|
+
attr_reader :metadata
|
7
|
+
alias data metadata
|
8
|
+
|
9
|
+
def initialize(archive = nil)
|
10
|
+
@archive = archive
|
11
|
+
@metadata = @archive.eml
|
12
|
+
end
|
13
|
+
|
14
|
+
def id
|
15
|
+
fix_nil { @metadata[:eml][:dataset][:attributes][:id] }
|
16
|
+
end
|
17
|
+
|
18
|
+
def package_id
|
19
|
+
fix_nil { @metadata.data[:eml][:attributes][:packageId] }
|
20
|
+
end
|
21
|
+
|
22
|
+
def title
|
23
|
+
fix_nil { @metadata[:eml][:dataset][:title] }
|
24
|
+
end
|
25
|
+
|
26
|
+
def authors
|
27
|
+
return nil unless defined?(@metadata[:eml][:dataset][:creator])
|
28
|
+
authors = [@metadata[:eml][:dataset][:creator]].flatten
|
29
|
+
authors.map do |au|
|
30
|
+
{ first_name: au[:individualName][:givenName],
|
31
|
+
last_name: au[:individualName][:surName],
|
32
|
+
email: au[:electronicMailAddress] }
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def abstract
|
37
|
+
fix_nil { @metadata[:eml][:dataset][:abstract] }
|
38
|
+
end
|
39
|
+
|
40
|
+
def citation
|
41
|
+
fix_nil { @metadata[:eml][:additionalMetadata][:metadata][:citation] }
|
42
|
+
end
|
43
|
+
|
44
|
+
def url
|
45
|
+
fix_nil { @metadata[:eml][:dataset][:distribution][:online][:url] }
|
46
|
+
end
|
47
|
+
|
48
|
+
private
|
49
|
+
|
50
|
+
def fix_nil
|
51
|
+
yield
|
52
|
+
rescue NoMethodError
|
53
|
+
nil
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class DarwinCore
|
4
|
+
# Describes normalized taxon
|
5
|
+
class TaxonNormalized
|
6
|
+
attr_accessor :id, :local_id, :global_id, :source, :parent_id,
|
7
|
+
:classification_path_id, :classification_path,
|
8
|
+
:linnean_classification_path, :current_name,
|
9
|
+
:current_name_canonical, :synonyms, :vernacular_names,
|
10
|
+
:rank, :status
|
11
|
+
|
12
|
+
def initialize
|
13
|
+
@id = @parent_id = @rank = @status = nil
|
14
|
+
@current_name = @current_name_canonical = @source = @local_id = ""
|
15
|
+
@global_id = ""
|
16
|
+
@classification_path = []
|
17
|
+
@classification_path_id = []
|
18
|
+
@synonyms = []
|
19
|
+
@vernacular_names = []
|
20
|
+
@linnean_classification_path = []
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,89 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class DarwinCore
|
4
|
+
# USAGE: Hash.from_xml:(YOUR_XML_STRING)
|
5
|
+
# modified from
|
6
|
+
# http://stackoverflow.com/questions/1230741/
|
7
|
+
# convert-a-nokogiri-document-to-a-ruby-hash/1231297#1231297
|
8
|
+
module XmlReader
|
9
|
+
def self.from_xml(xml_io)
|
10
|
+
result = Nokogiri::XML(xml_io)
|
11
|
+
{ result.root.name.to_sym => self::Node.new(result.root).value }
|
12
|
+
end
|
13
|
+
|
14
|
+
# Node is a helper class to parse xml into hash
|
15
|
+
class Node
|
16
|
+
def initialize(node)
|
17
|
+
@node = node
|
18
|
+
@val = {}
|
19
|
+
end
|
20
|
+
|
21
|
+
def value
|
22
|
+
if @node.element?
|
23
|
+
prepare_node_element
|
24
|
+
else
|
25
|
+
prepare(@node.content.to_s)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
private
|
30
|
+
|
31
|
+
def prepare_node_element
|
32
|
+
add_attributes
|
33
|
+
add_children if @node.children.size.positive?
|
34
|
+
@val
|
35
|
+
end
|
36
|
+
|
37
|
+
def prepare(data)
|
38
|
+
data.class == String && data.to_i.to_s == data ? data.to_i : data
|
39
|
+
end
|
40
|
+
|
41
|
+
def add_attributes
|
42
|
+
return if @node.attributes.empty?
|
43
|
+
@val[:attributes] = {}
|
44
|
+
@node.attributes.keys.each do |key|
|
45
|
+
add_attribute(@val[:attributes], @node.attributes[key])
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def add_attribute(attributes, attribute)
|
50
|
+
attributes[attribute.name.to_sym] = prepare(attribute.value)
|
51
|
+
end
|
52
|
+
|
53
|
+
def add_children
|
54
|
+
@node.children.each do |child|
|
55
|
+
process_child(child)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def process_child(child)
|
60
|
+
value = DarwinCore::XmlReader::Node.new(child).value
|
61
|
+
if child.name == "text"
|
62
|
+
handle_text(child, value)
|
63
|
+
else
|
64
|
+
add_child_to_value(child, value)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def add_child_to_value(child, value)
|
69
|
+
if @val[child.name.to_sym]
|
70
|
+
handle_child_node(child.name.to_sym, value)
|
71
|
+
else
|
72
|
+
@val[child.name.to_sym] = prepare(value)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def handle_child_node(child, val)
|
77
|
+
if @val[child].is_a?(Object::Array)
|
78
|
+
@val[child] << prepare(val)
|
79
|
+
else
|
80
|
+
@val[child] = [@val[child], prepare(val)]
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
def handle_text(child, val)
|
85
|
+
@val = prepare(val) unless child.next_sibling || child.previous_sibling
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
data/spec/files/file with characters(3).gz b/data/spec/files/file with → characters(3).tar.gz
RENAMED
File without changes
|
@@ -0,0 +1,47 @@
|
|
1
|
+
<?xml version="1.0"?>
|
2
|
+
<eml:eml xmlns:eml="eml://ecoinformatics.org/eml-2.1.1" xmlns:md="eml://ecoinformatics.org/methods-2.1.1" xmlns:proj="eml://ecoinformatics.org/project-2.1.1" xmlns:d="eml://ecoinformatics.org/dataset-2.1.1" xmlns:res="eml://ecoinformatics.org/resource-2.1.1" xmlns:dc="http://purl.org/dc/terms/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" packageId="1234/2013-12-30::19:45:33" system="http://globalnames.org" xml:lang="en" xsi:schemaLocation="eml://ecoinformatics.org/eml-2.1.1 http://rs.gbif.org/schema/eml-gbif-profile/1.0.1/eml.xsd">
|
3
|
+
<dataset id="1234">
|
4
|
+
<title>Test Classification</title>
|
5
|
+
<license>http://creativecommons.org/licenses/by-sa/3.0/</license>
|
6
|
+
<creator id="1" scope="document">
|
7
|
+
<individualName>
|
8
|
+
<givenName>John</givenName>
|
9
|
+
<surName>Doe</surName>
|
10
|
+
</individualName>
|
11
|
+
<organizationName>Example</organizationName>
|
12
|
+
<positionName>Assistant Professor</positionName>
|
13
|
+
<onlineUrl>http://example.org</onlineUrl>
|
14
|
+
<electronicMailAddress>jdoe@example.com</electronicMailAddress>
|
15
|
+
</creator>
|
16
|
+
<creator id="2" scope="document">
|
17
|
+
<individualName>
|
18
|
+
<givenName>Jane</givenName>
|
19
|
+
<surName>Doe</surName>
|
20
|
+
</individualName>
|
21
|
+
<electronicMailAddress>jane@example.com</electronicMailAddress>
|
22
|
+
</creator>
|
23
|
+
<metadataProvider>
|
24
|
+
<individualName>
|
25
|
+
<givenName>Jim</givenName>
|
26
|
+
<surName>Doe</surName>
|
27
|
+
</individualName>
|
28
|
+
<onlineUrl>http://aggregator.example.org</onlineUrl>
|
29
|
+
<electronicMailAddress>jimdoe@example.com</electronicMailAddress>
|
30
|
+
</metadataProvider>
|
31
|
+
<pubDate>2013-12-30 14:45:33 -0500</pubDate>
|
32
|
+
<abstract>
|
33
|
+
<para>test classification</para>
|
34
|
+
</abstract>
|
35
|
+
<contact>
|
36
|
+
<references>1</references>
|
37
|
+
</contact>
|
38
|
+
<contact>
|
39
|
+
<references>2</references>
|
40
|
+
</contact>
|
41
|
+
</dataset>
|
42
|
+
<additionalMetadata>
|
43
|
+
<metadata>
|
44
|
+
<citation>Test classification: Doe John, Doe Jane, Taxnonmy, 10, 1, 2010</citation>
|
45
|
+
</metadata>
|
46
|
+
</additionalMetadata>
|
47
|
+
</eml:eml>
|
@@ -0,0 +1,19 @@
|
|
1
|
+
<?xml version="1.0"?>
|
2
|
+
<archive xmlns="http://rs.tdwg.org/dwc/text/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://rs.tdwg.org/dwc/terms/xsd/archive/ http://darwincore.googlecode.com/svn/trunk/text/tdwg_dwc_text.xsd">
|
3
|
+
<core encoding="UTF-8" fieldsTerminatedBy="," fieldsEnclosedBy=""" linesTerminatedBy=" " rowType="http://rs.tdwg.org/dwc/terms/Taxon" ignoreHeaderLines="1">
|
4
|
+
<files>
|
5
|
+
<location>core.csv</location>
|
6
|
+
</files>
|
7
|
+
<id index="0"/>
|
8
|
+
<field term="http://rs.tdwg.org/dwc/terms/parentNameUsageID" index="1"/>
|
9
|
+
<field term="http://rs.tdwg.org/dwc/terms/scientificName" index="2"/>
|
10
|
+
<field term="http://rs.tdwg.org/dwc/terms/taxonRank" index="3"/>
|
11
|
+
</core>
|
12
|
+
<extension encoding="UTF-8" fieldsTerminatedBy="," fieldsEnclosedBy=""" linesTerminatedBy=" " rowType="http://rs.gbif.org/terms/1.0/VernacularName" ignoreHeaderLines="1">
|
13
|
+
<files>
|
14
|
+
<location>vern.csv</location>
|
15
|
+
</files>
|
16
|
+
<coreid index="0"/>
|
17
|
+
<field term="http://rs.tdwg.org/dwc/terms/vernacularName" index="1"/>
|
18
|
+
</extension>
|
19
|
+
</archive>
|
Binary file
|
@@ -0,0 +1,214 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
describe DarwinCore::ClassificationNormalizer do
|
4
|
+
subject(:dwca) { DarwinCore.new(file_path) }
|
5
|
+
subject(:normalizer) { DarwinCore::ClassificationNormalizer.new(dwca) }
|
6
|
+
|
7
|
+
let(:file_dir) { File.expand_path("../files", __dir__) }
|
8
|
+
let(:file_path) { File.join(file_dir, file_name) }
|
9
|
+
|
10
|
+
describe ".new" do
|
11
|
+
let(:file_path) { File.join(file_dir, "data.tar.gz") }
|
12
|
+
it do
|
13
|
+
expect(normalizer.is_a?(DarwinCore::ClassificationNormalizer)).to be true
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
describe "#normalize" do
|
18
|
+
let(:file_name) { "data.tar.gz" }
|
19
|
+
|
20
|
+
it "returns normalized data" do
|
21
|
+
res = normalizer.normalize
|
22
|
+
expect(res).to be normalizer.normalized_data
|
23
|
+
end
|
24
|
+
|
25
|
+
context "flat list" do
|
26
|
+
let(:file_path) { File.join(file_dir, "flat_list.tar.gz") }
|
27
|
+
|
28
|
+
it "returns flat list" do
|
29
|
+
normalizer.normalize
|
30
|
+
expect(normalizer.normalized_data).to be_kind_of Hash
|
31
|
+
expect(normalizer.normalized_data.size).to be > 0
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
context "synonyms from core" do
|
36
|
+
let(:file_name) { "synonyms_in_core_accepted_name_field.tar.gz" }
|
37
|
+
|
38
|
+
it "ingests synonyms using accepted_name field" do
|
39
|
+
res = normalizer.normalize
|
40
|
+
syn = res.reject { |_, v| v.synonyms.empty? }.values
|
41
|
+
expect(syn.size).to be > 0
|
42
|
+
expect(syn[0].synonyms[0]).to be_kind_of DarwinCore::SynonymNormalized
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
context "synonyms from extension" do
|
47
|
+
let(:file_name) { "synonyms_in_extension.tar.gz" }
|
48
|
+
it "ingests synonyms from extension" do
|
49
|
+
res = normalizer.normalize
|
50
|
+
syn = res.reject { |_, v| v.synonyms.empty? }.values
|
51
|
+
expect(syn.size).to be > 0
|
52
|
+
expect(syn[0].synonyms[0]).to be_kind_of DarwinCore::SynonymNormalized
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
context "synonyms are not extensions" do
|
57
|
+
let(:file_name) { "not_synonym_in_extension.tar.gz" }
|
58
|
+
|
59
|
+
it "does not ingest synonyms" do
|
60
|
+
res = normalizer.normalize
|
61
|
+
syn = res.reject { |_, v| v.synonyms.empty? }.values
|
62
|
+
expect(syn).to be_empty
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
context "with_extensions flag set on false" do
|
67
|
+
let(:file_name) { "synonyms_in_extension.tar.gz" }
|
68
|
+
it "should not harvest extensions" do
|
69
|
+
res = normalizer.normalize(with_extensions: false)
|
70
|
+
syn = res.reject { |_, v| v.synonyms.empty? }.values
|
71
|
+
expect(syn).to be_empty
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
context "linnean classification in file (class, order etc fields)" do
|
76
|
+
let(:file_name) { "linnean.tar.gz" }
|
77
|
+
|
78
|
+
it "assembles classification" do
|
79
|
+
res = normalizer.normalize
|
80
|
+
expect(res.first[1]).to be_kind_of DarwinCore::TaxonNormalized
|
81
|
+
expect(res.first[1].linnean_classification_path).
|
82
|
+
to eq [["Animalia", :kingdom],
|
83
|
+
["Arthropoda", :phylum],
|
84
|
+
["Insecta", :class],
|
85
|
+
["Diptera", :order],
|
86
|
+
["Cecidomyiidae", :family],
|
87
|
+
["Resseliella", :genus]]
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
context "no linnean fields are given" do
|
92
|
+
it "returns empty linnean classification" do
|
93
|
+
res = normalizer.normalize
|
94
|
+
expect(res.first[1]).to be_kind_of DarwinCore::TaxonNormalized
|
95
|
+
expect(res.first[1].linnean_classification_path).to be_empty
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
context "in the presence of scientificNameAuthorship field" do
|
100
|
+
let(:file_name) { "sci_name_authorship.tar.gz" }
|
101
|
+
it "returns normalized data" do
|
102
|
+
normalizer.normalize
|
103
|
+
expect(normalizer.darwin_core.file_name).
|
104
|
+
to eq "sci_name_authorship.tar.gz"
|
105
|
+
expect(normalizer.normalized_data).to be_kind_of Hash
|
106
|
+
expect(normalizer.normalized_data.size).to be > 0
|
107
|
+
tn = normalizer.normalized_data["leptogastrinae:tid:2688"]
|
108
|
+
expect(tn.current_name).to eq "Leptogaster fornicata Martin, 1957"
|
109
|
+
expect(tn.current_name_canonical).to eq "Leptogaster fornicata"
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
context "when scientificNameAuthorship duplicates author info" do
|
114
|
+
let(:file_name) { "sci_name_authorship_dup.tar.gz" }
|
115
|
+
it "returns normalized data" do
|
116
|
+
normalizer.normalize
|
117
|
+
expect(normalizer.darwin_core.file_name).
|
118
|
+
to eq "sci_name_authorship_dup.tar.gz"
|
119
|
+
expect(normalizer.normalized_data).to be_kind_of Hash
|
120
|
+
expect(normalizer.normalized_data.size).to be > 0
|
121
|
+
tn = normalizer.normalized_data["leptogastrinae:tid:2688"]
|
122
|
+
expect(tn.current_name).to eq "Leptogaster fornicata Martin, 1957"
|
123
|
+
expect(tn.current_name_canonical).to eq "Leptogaster fornicata"
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
context "coreid is empty" do
|
128
|
+
let(:file_name) { "empty_coreid.tar.gz" }
|
129
|
+
it "should ingest information" do
|
130
|
+
res = normalizer.normalize
|
131
|
+
expect(normalizer.darwin_core.file_name).
|
132
|
+
to eq "empty_coreid.tar.gz"
|
133
|
+
tn = res["Taxon9"]
|
134
|
+
expect(tn.current_name).to eq "Amanita phalloides"
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
context "vernacular locality info" do
|
139
|
+
let(:file_name) { "language_locality.tar.gz" }
|
140
|
+
it "should ingest locality and language" do
|
141
|
+
res = normalizer.normalize
|
142
|
+
tn = res["leptogastrinae:tid:42"]
|
143
|
+
vn = tn.vernacular_names[0]
|
144
|
+
expect(vn.language).to eq "en"
|
145
|
+
expect(vn.locality).to eq "New England"
|
146
|
+
end
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
describe "#name_strings" do
|
151
|
+
let(:file_path) { File.join(file_dir, "flat_list.tar.gz") }
|
152
|
+
|
153
|
+
context "before running #normalize" do
|
154
|
+
it "is empty" do
|
155
|
+
expect(normalizer.name_strings).to be_empty
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
context "after running #normalize" do
|
160
|
+
let(:normalized) { normalizer.tap(&:normalize) }
|
161
|
+
|
162
|
+
context "default attibutes" do
|
163
|
+
it "returns array" do
|
164
|
+
expect(normalized.name_strings).to be_kind_of Array
|
165
|
+
expect(normalized.name_strings.size).to be > 1
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
context "with_hash attribute" do
|
170
|
+
it "returns hash" do
|
171
|
+
strings = normalized.name_strings(with_hash: true)
|
172
|
+
expect(strings).to be_kind_of Hash
|
173
|
+
expect(strings.size).to be > 1
|
174
|
+
expect(strings.values.uniq).to eq [1]
|
175
|
+
end
|
176
|
+
end
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
describe "#vernacular_name_strings" do
|
181
|
+
let(:file_path) { File.join(file_dir, "flat_list.tar.gz") }
|
182
|
+
|
183
|
+
context "before running #normalize" do
|
184
|
+
subject(:vern) { normalizer.vernacular_name_strings }
|
185
|
+
|
186
|
+
it "is empty" do
|
187
|
+
expect(vern).to be_empty
|
188
|
+
end
|
189
|
+
end
|
190
|
+
|
191
|
+
context "after running #normalize" do
|
192
|
+
let(:normalized) { normalizer.tap(&:normalize) }
|
193
|
+
subject(:vern) { normalized.vernacular_name_strings }
|
194
|
+
subject(:vern_w_hash) do
|
195
|
+
normalized.vernacular_name_strings(with_hash: true)
|
196
|
+
end
|
197
|
+
|
198
|
+
context "default attibutes" do
|
199
|
+
it "returns array" do
|
200
|
+
expect(vern).to be_kind_of Array
|
201
|
+
expect(vern.size).to be > 0
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
context "with_hash attribute" do
|
206
|
+
it "returns hash" do
|
207
|
+
expect(vern_w_hash).to be_kind_of Hash
|
208
|
+
expect(vern_w_hash.size).to be > 0
|
209
|
+
expect(vern_w_hash.values.uniq).to eq [1]
|
210
|
+
end
|
211
|
+
end
|
212
|
+
end
|
213
|
+
end
|
214
|
+
end
|