dwc-archive 0.9.10 → 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. checksums.yaml +5 -5
  2. data/.gitignore +1 -0
  3. data/.rspec +2 -1
  4. data/.rubocop.yml +23 -0
  5. data/.ruby-version +1 -1
  6. data/.travis.yml +4 -7
  7. data/CHANGELOG +14 -8
  8. data/Gemfile +3 -1
  9. data/LICENSE +1 -1
  10. data/README.md +119 -107
  11. data/Rakefile +13 -36
  12. data/dwc-archive.gemspec +23 -19
  13. data/features/step_definitions/dwc-creator_steps.rb +5 -5
  14. data/features/step_definitions/dwc-reader_steps.rb +47 -28
  15. data/features/support/env.rb +1 -1
  16. data/lib/dwc_archive.rb +124 -0
  17. data/lib/dwc_archive/archive.rb +60 -0
  18. data/lib/dwc_archive/classification_normalizer.rb +382 -0
  19. data/lib/dwc_archive/core.rb +25 -0
  20. data/lib/{dwc-archive → dwc_archive}/errors.rb +10 -0
  21. data/lib/dwc_archive/expander.rb +88 -0
  22. data/lib/{dwc-archive → dwc_archive}/extension.rb +5 -3
  23. data/lib/dwc_archive/generator.rb +91 -0
  24. data/lib/dwc_archive/generator_eml_xml.rb +116 -0
  25. data/lib/dwc_archive/generator_meta_xml.rb +72 -0
  26. data/lib/dwc_archive/gnub_taxon.rb +14 -0
  27. data/lib/dwc_archive/ingester.rb +106 -0
  28. data/lib/dwc_archive/metadata.rb +57 -0
  29. data/lib/dwc_archive/taxon_normalized.rb +23 -0
  30. data/lib/dwc_archive/version.rb +6 -0
  31. data/lib/dwc_archive/xml_reader.rb +90 -0
  32. data/spec/files/file with characters(3).gz b/data/spec/files/file with → characters(3).tar.gz +0 -0
  33. data/spec/files/generator_eml.xml +47 -0
  34. data/spec/files/generator_meta.xml +19 -0
  35. data/spec/lib/classification_normalizer_spec.rb +96 -105
  36. data/spec/lib/core_spec.rb +43 -41
  37. data/spec/lib/darwin_core_spec.rb +108 -138
  38. data/spec/lib/generator_eml_xml_spec.rb +12 -11
  39. data/spec/lib/generator_meta_xml_spec.rb +12 -11
  40. data/spec/lib/generator_spec.rb +77 -69
  41. data/spec/lib/gnub_taxon_spec.rb +15 -17
  42. data/spec/lib/metadata_spec.rb +50 -41
  43. data/spec/lib/taxon_normalized_spec.rb +62 -65
  44. data/spec/lib/xml_reader_spec.rb +9 -12
  45. data/spec/spec_helper.rb +54 -51
  46. metadata +105 -88
  47. data/.rvmrc +0 -1
  48. data/] +0 -40
  49. data/lib/dwc-archive.rb +0 -107
  50. data/lib/dwc-archive/archive.rb +0 -40
  51. data/lib/dwc-archive/classification_normalizer.rb +0 -428
  52. data/lib/dwc-archive/core.rb +0 -17
  53. data/lib/dwc-archive/expander.rb +0 -84
  54. data/lib/dwc-archive/generator.rb +0 -85
  55. data/lib/dwc-archive/generator_eml_xml.rb +0 -86
  56. data/lib/dwc-archive/generator_meta_xml.rb +0 -58
  57. data/lib/dwc-archive/ingester.rb +0 -101
  58. data/lib/dwc-archive/metadata.rb +0 -48
  59. data/lib/dwc-archive/version.rb +0 -3
  60. data/lib/dwc-archive/xml_reader.rb +0 -64
@@ -0,0 +1,106 @@
1
+ # encoding: utf-8
2
+ class DarwinCore
3
+ # This module abstracts information for reading csv file to be used
4
+ # in several classes which need such functionality
5
+ module Ingester
6
+ attr_reader :data, :properties, :encoding, :fields_separator, :size
7
+ attr_reader :file_path, :fields, :line_separator, :quote_character,
8
+ :ignore_headers
9
+
10
+ def size
11
+ @size ||= init_size
12
+ end
13
+
14
+ def read(batch_size = 10_000)
15
+ DarwinCore.logger_write(@dwc.object_id, "Reading #{name} data")
16
+ res = []
17
+ errors = []
18
+ args = define_csv_args
19
+ min_size = @fields.map { |f| f[:index].to_i || 0 }.sort[-1] + 1
20
+ csv = CSV.new(open(@file_path), args)
21
+ csv.each_with_index do |r, i|
22
+ next if @ignore_headers && i == 0
23
+ min_size > r.size ? errors << r : process_csv_row(res, errors, r)
24
+ next if i == 0 || i % batch_size != 0
25
+ DarwinCore.logger_write(@dwc.object_id,
26
+ format("Ingested %s records from %s",
27
+ i, name))
28
+ next unless block_given?
29
+ yield [res, errors]
30
+ res = []
31
+ errors = []
32
+ end
33
+ yield [res, errors] if block_given?
34
+ [res, errors]
35
+ end
36
+
37
+ private
38
+
39
+ def define_csv_args
40
+ args = { col_sep: @field_separator }
41
+ @quote_character = "\b" if @quote_character.empty?
42
+ args.merge(quote_char: @quote_character)
43
+ end
44
+
45
+ def name
46
+ self.class.to_s.split("::")[-1].downcase
47
+ end
48
+
49
+ def process_csv_row(result, errors, row)
50
+ str = row.join("")
51
+ str = str.force_encoding("utf-8")
52
+ if str.encoding.name == "UTF-8" && str.valid_encoding?
53
+ result << row.map { |f| f.nil? ? nil : f.force_encoding("utf-8") }
54
+ else
55
+ errors << row
56
+ end
57
+ end
58
+
59
+ def init_attributes
60
+ @properties = @data[:attributes]
61
+ init_encoding
62
+ @field_separator = init_field_separator
63
+ @quote_character = @properties[:fieldsEnclosedBy] || ""
64
+ @line_separator = @properties[:linesTerminatedBy] || "\n"
65
+ @ignore_headers = @properties[:ignoreHeaderLines] &&
66
+ [1, true].include?(@properties[:ignoreHeaderLines])
67
+ init_file_path
68
+ init_fields
69
+ end
70
+
71
+ def init_encoding
72
+ @encoding = @properties[:encoding] || "UTF-8"
73
+ accepted_encoding = ["utf-8", "utf8", "utf-16", "utf16"].
74
+ include?(@encoding.downcase)
75
+ fail(
76
+ DarwinCore::EncodingError,
77
+ "No support for encodings other than utf-8 or utf-16 at the moment"
78
+ ) unless accepted_encoding
79
+ end
80
+
81
+ def init_file_path
82
+ file = @data[:location] ||
83
+ @data[:attributes][:location] ||
84
+ @data[:files][:location]
85
+ @file_path = File.join(@path, file)
86
+ fail DarwinCore::FileNotFoundError, "No file data" unless @file_path
87
+ end
88
+
89
+ def init_fields
90
+ @data[:field] = [data[:field]] if data[:field].class != Array
91
+ @fields = @data[:field].map { |f| f[:attributes] }
92
+ fail DarwinCore::InvalidArchiveError,
93
+ "No data fields are found" if @fields.empty?
94
+ end
95
+
96
+ def init_field_separator
97
+ res = @properties[:fieldsTerminatedBy] || ","
98
+ res = "\t" if res == "\\t"
99
+ res
100
+ end
101
+
102
+ def init_size
103
+ `wc -l #{@file_path}`.match(/^\s*([\d]+)\s/)[1].to_i
104
+ end
105
+ end
106
+ end
@@ -0,0 +1,57 @@
1
+ # frozen_string_literal: true
2
+
3
+ class DarwinCore
4
+ # Represents data from EML (Ecological Metadata Language) file
5
+ class Metadata
6
+ attr_reader :metadata
7
+ alias data metadata
8
+
9
+ def initialize(archive = nil)
10
+ @archive = archive
11
+ @metadata = @archive.eml
12
+ end
13
+
14
+ def id
15
+ fix_nil { @metadata[:eml][:dataset][:attributes][:id] }
16
+ end
17
+
18
+ def package_id
19
+ fix_nil { @metadata.data[:eml][:attributes][:packageId] }
20
+ end
21
+
22
+ def title
23
+ fix_nil { @metadata[:eml][:dataset][:title] }
24
+ end
25
+
26
+ def authors
27
+ return nil unless defined?(@metadata[:eml][:dataset][:creator])
28
+
29
+ authors = [@metadata[:eml][:dataset][:creator]].flatten
30
+ authors.map do |au|
31
+ { first_name: au[:individualName][:givenName],
32
+ last_name: au[:individualName][:surName],
33
+ email: au[:electronicMailAddress] }
34
+ end
35
+ end
36
+
37
+ def abstract
38
+ fix_nil { @metadata[:eml][:dataset][:abstract] }
39
+ end
40
+
41
+ def citation
42
+ fix_nil { @metadata[:eml][:additionalMetadata][:metadata][:citation] }
43
+ end
44
+
45
+ def url
46
+ fix_nil { @metadata[:eml][:dataset][:distribution][:online][:url] }
47
+ end
48
+
49
+ private
50
+
51
+ def fix_nil
52
+ yield
53
+ rescue NoMethodError
54
+ nil
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ class DarwinCore
4
+ # Describes normalized taxon
5
+ class TaxonNormalized
6
+ attr_accessor :id, :local_id, :global_id, :source, :parent_id,
7
+ :classification_path_id, :classification_path,
8
+ :linnean_classification_path, :current_name,
9
+ :current_name_canonical, :synonyms, :vernacular_names,
10
+ :rank, :status
11
+
12
+ def initialize
13
+ @id = @parent_id = @rank = @status = nil
14
+ @current_name = @current_name_canonical = @source = @local_id = ""
15
+ @global_id = ""
16
+ @classification_path = []
17
+ @classification_path_id = []
18
+ @synonyms = []
19
+ @vernacular_names = []
20
+ @linnean_classification_path = []
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Version constant of the class
4
+ class DarwinCore
5
+ VERSION = "1.1.2"
6
+ end
@@ -0,0 +1,90 @@
1
+ # frozen_string_literal: true
2
+
3
+ class DarwinCore
4
+ # USAGE: Hash.from_xml:(YOUR_XML_STRING)
5
+ # modified from
6
+ # http://stackoverflow.com/questions/1230741/
7
+ # convert-a-nokogiri-document-to-a-ruby-hash/1231297#1231297
8
+ module XmlReader
9
+ def self.from_xml(xml_io)
10
+ result = Nokogiri::XML(xml_io)
11
+ { result.root.name.to_sym => self::Node.new(result.root).value }
12
+ end
13
+
14
+ # Node is a helper class to parse xml into hash
15
+ class Node
16
+ def initialize(node)
17
+ @node = node
18
+ @val = {}
19
+ end
20
+
21
+ def value
22
+ if @node.element?
23
+ prepare_node_element
24
+ else
25
+ prepare(@node.content.to_s)
26
+ end
27
+ end
28
+
29
+ private
30
+
31
+ def prepare_node_element
32
+ add_attributes
33
+ add_children if @node.children.size.positive?
34
+ @val
35
+ end
36
+
37
+ def prepare(data)
38
+ data.instance_of?(String) && data.to_i.to_s == data ? data.to_i : data
39
+ end
40
+
41
+ def add_attributes
42
+ return if @node.attributes.empty?
43
+
44
+ @val[:attributes] = {}
45
+ @node.attributes.each_key do |key|
46
+ add_attribute(@val[:attributes], @node.attributes[key])
47
+ end
48
+ end
49
+
50
+ def add_attribute(attributes, attribute)
51
+ attributes[attribute.name.to_sym] = prepare(attribute.value)
52
+ end
53
+
54
+ def add_children
55
+ @node.children.each do |child|
56
+ process_child(child)
57
+ end
58
+ end
59
+
60
+ def process_child(child)
61
+ value = DarwinCore::XmlReader::Node.new(child).value
62
+ if child.name == "text"
63
+ handle_text(child, value)
64
+ else
65
+ add_child_to_value(child, value)
66
+ end
67
+ end
68
+
69
+ def add_child_to_value(child, value)
70
+ if @val[child.name.to_sym]
71
+ handle_child_node(child.name.to_sym, value)
72
+ else
73
+ @val[child.name.to_sym] = prepare(value)
74
+ end
75
+ end
76
+
77
+ def handle_child_node(child, val)
78
+ if @val[child].is_a?(Object::Array)
79
+ @val[child] << prepare(val)
80
+ else
81
+ @val[child] = [@val[child], prepare(val)]
82
+ end
83
+ end
84
+
85
+ def handle_text(child, val)
86
+ @val = prepare(val) unless child.next_sibling || child.previous_sibling
87
+ end
88
+ end
89
+ end
90
+ end
@@ -0,0 +1,47 @@
1
+ <?xml version="1.0"?>
2
+ <eml:eml xmlns:eml="eml://ecoinformatics.org/eml-2.1.1" xmlns:md="eml://ecoinformatics.org/methods-2.1.1" xmlns:proj="eml://ecoinformatics.org/project-2.1.1" xmlns:d="eml://ecoinformatics.org/dataset-2.1.1" xmlns:res="eml://ecoinformatics.org/resource-2.1.1" xmlns:dc="http://purl.org/dc/terms/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" packageId="1234/2013-12-30::19:45:33" system="http://globalnames.org" xml:lang="en" xsi:schemaLocation="eml://ecoinformatics.org/eml-2.1.1 http://rs.gbif.org/schema/eml-gbif-profile/1.0.1/eml.xsd">
3
+ <dataset id="1234">
4
+ <title>Test Classification</title>
5
+ <license>http://creativecommons.org/licenses/by-sa/3.0/</license>
6
+ <creator id="1" scope="document">
7
+ <individualName>
8
+ <givenName>John</givenName>
9
+ <surName>Doe</surName>
10
+ </individualName>
11
+ <organizationName>Example</organizationName>
12
+ <positionName>Assistant Professor</positionName>
13
+ <onlineUrl>http://example.org</onlineUrl>
14
+ <electronicMailAddress>jdoe@example.com</electronicMailAddress>
15
+ </creator>
16
+ <creator id="2" scope="document">
17
+ <individualName>
18
+ <givenName>Jane</givenName>
19
+ <surName>Doe</surName>
20
+ </individualName>
21
+ <electronicMailAddress>jane@example.com</electronicMailAddress>
22
+ </creator>
23
+ <metadataProvider>
24
+ <individualName>
25
+ <givenName>Jim</givenName>
26
+ <surName>Doe</surName>
27
+ </individualName>
28
+ <onlineUrl>http://aggregator.example.org</onlineUrl>
29
+ <electronicMailAddress>jimdoe@example.com</electronicMailAddress>
30
+ </metadataProvider>
31
+ <pubDate>2013-12-30 14:45:33 -0500</pubDate>
32
+ <abstract>
33
+ <para>test classification</para>
34
+ </abstract>
35
+ <contact>
36
+ <references>1</references>
37
+ </contact>
38
+ <contact>
39
+ <references>2</references>
40
+ </contact>
41
+ </dataset>
42
+ <additionalMetadata>
43
+ <metadata>
44
+ <citation>Test classification: Doe John, Doe Jane, Taxnonmy, 10, 1, 2010</citation>
45
+ </metadata>
46
+ </additionalMetadata>
47
+ </eml:eml>
@@ -0,0 +1,19 @@
1
+ <?xml version="1.0"?>
2
+ <archive xmlns="http://rs.tdwg.org/dwc/text/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://rs.tdwg.org/dwc/terms/xsd/archive/ http://darwincore.googlecode.com/svn/trunk/text/tdwg_dwc_text.xsd">
3
+ <core encoding="UTF-8" fieldsTerminatedBy="," fieldsEnclosedBy="&quot;" linesTerminatedBy="&#10;" rowType="http://rs.tdwg.org/dwc/terms/Taxon" ignoreHeaderLines="1">
4
+ <files>
5
+ <location>core.csv</location>
6
+ </files>
7
+ <id index="0"/>
8
+ <field term="http://rs.tdwg.org/dwc/terms/parentNameUsageID" index="1"/>
9
+ <field term="http://rs.tdwg.org/dwc/terms/scientificName" index="2"/>
10
+ <field term="http://rs.tdwg.org/dwc/terms/taxonRank" index="3"/>
11
+ </core>
12
+ <extension encoding="UTF-8" fieldsTerminatedBy="," fieldsEnclosedBy="&quot;" linesTerminatedBy="&#10;" rowType="http://rs.gbif.org/terms/1.0/VernacularName" ignoreHeaderLines="1">
13
+ <files>
14
+ <location>vern.csv</location>
15
+ </files>
16
+ <coreid index="0"/>
17
+ <field term="http://rs.tdwg.org/dwc/terms/vernacularName" index="1"/>
18
+ </extension>
19
+ </archive>
@@ -1,223 +1,214 @@
1
- require_relative '../spec_helper'
2
- # encoding: utf-8
1
+ # frozen_string_literal: true
3
2
 
4
3
  describe DarwinCore::ClassificationNormalizer do
5
-
6
4
  subject(:dwca) { DarwinCore.new(file_path) }
7
5
  subject(:normalizer) { DarwinCore::ClassificationNormalizer.new(dwca) }
8
-
9
- let(:file_dir) { File.expand_path('../../files', __FILE__) }
6
+
7
+ let(:file_dir) { File.expand_path("../files", __dir__) }
10
8
  let(:file_path) { File.join(file_dir, file_name) }
11
9
 
12
- describe '.new' do
13
- let(:file_path) { File.join(file_dir, 'data.tar.gz') }
14
- it { expect(normalizer.is_a? DarwinCore::ClassificationNormalizer).
15
- to be_true }
16
- end
10
+ describe ".new" do
11
+ let(:file_path) { File.join(file_dir, "data.tar.gz") }
12
+ it do
13
+ expect(normalizer.is_a?(DarwinCore::ClassificationNormalizer)).to be true
14
+ end
15
+ end
17
16
 
18
- describe '#normalize' do
19
- let(:file_name) { 'data.tar.gz' }
17
+ describe "#normalize" do
18
+ let(:file_name) { "data.tar.gz" }
20
19
 
21
- it 'returns normalized data' do
20
+ it "returns normalized data" do
22
21
  res = normalizer.normalize
23
22
  expect(res).to be normalizer.normalized_data
24
23
  end
25
24
 
25
+ context "flat list" do
26
+ let(:file_path) { File.join(file_dir, "flat_list.tar.gz") }
26
27
 
27
- context 'flat list' do
28
- let(:file_path) { File.join(file_dir, 'flat_list.tar.gz') }
29
-
30
- it 'returns flat list' do
28
+ it "returns flat list" do
31
29
  normalizer.normalize
32
30
  expect(normalizer.normalized_data).to be_kind_of Hash
33
31
  expect(normalizer.normalized_data.size).to be > 0
34
32
  end
35
33
  end
36
34
 
37
- context 'synonyms from core' do
38
- let(:file_name) { 'synonyms_in_core_accepted_name_field.tar.gz' }
35
+ context "synonyms from core" do
36
+ let(:file_name) { "synonyms_in_core_accepted_name_field.tar.gz" }
39
37
 
40
- it 'ingests synonyms using accepted_name field' do
38
+ it "ingests synonyms using accepted_name field" do
41
39
  res = normalizer.normalize
42
- syn = res.select { |k,v| !v.synonyms.empty? }.
43
- map { |k,v| v }
40
+ syn = res.reject { |_, v| v.synonyms.empty? }.values
44
41
  expect(syn.size).to be > 0
45
42
  expect(syn[0].synonyms[0]).to be_kind_of DarwinCore::SynonymNormalized
46
43
  end
47
44
  end
48
45
 
49
- context 'synonyms from extension' do
50
- let(:file_name) { 'synonyms_in_extension.tar.gz' }
51
- it 'ingests synonyms from extension' do
46
+ context "synonyms from extension" do
47
+ let(:file_name) { "synonyms_in_extension.tar.gz" }
48
+ it "ingests synonyms from extension" do
52
49
  res = normalizer.normalize
53
- syn = res.select { |k,v| !v.synonyms.empty? }.
54
- map { |k,v| v }
50
+ syn = res.reject { |_, v| v.synonyms.empty? }.values
55
51
  expect(syn.size).to be > 0
56
52
  expect(syn[0].synonyms[0]).to be_kind_of DarwinCore::SynonymNormalized
57
53
  end
58
54
  end
59
55
 
60
- context 'synonyms are not extensions' do
61
- let(:file_name) { 'not_synonym_in_extension.tar.gz' }
56
+ context "synonyms are not extensions" do
57
+ let(:file_name) { "not_synonym_in_extension.tar.gz" }
62
58
 
63
- it 'does not ingest synonyms' do
59
+ it "does not ingest synonyms" do
64
60
  res = normalizer.normalize
65
- syn = res.select { |k,v| !v.synonyms.empty? }.
66
- map { |k,v| v }
61
+ syn = res.reject { |_, v| v.synonyms.empty? }.values
67
62
  expect(syn).to be_empty
68
63
  end
69
64
  end
70
65
 
71
- context 'with_extensions flag set on false' do
72
- let(:file_name) { 'synonyms_in_extension.tar.gz' }
73
- it 'should not harvest extensions' do
66
+ context "with_extensions flag set on false" do
67
+ let(:file_name) { "synonyms_in_extension.tar.gz" }
68
+ it "should not harvest extensions" do
74
69
  res = normalizer.normalize(with_extensions: false)
75
- syn = res.select { |k,v| !v.synonyms.empty? }.
76
- map { |k,v| v }
70
+ syn = res.reject { |_, v| v.synonyms.empty? }.values
77
71
  expect(syn).to be_empty
78
72
  end
79
73
  end
80
74
 
81
- context 'linnean classification in file (class, order etc fields)' do
82
- let(:file_name) { 'linnean.tar.gz' }
75
+ context "linnean classification in file (class, order etc fields)" do
76
+ let(:file_name) { "linnean.tar.gz" }
83
77
 
84
- it 'assembles classification' do
78
+ it "assembles classification" do
85
79
  res = normalizer.normalize
86
80
  expect(res.first[1]).to be_kind_of DarwinCore::TaxonNormalized
87
81
  expect(res.first[1].linnean_classification_path).
88
- to eq [["Animalia", :kingdom],
89
- ["Arthropoda", :phylum],
90
- ["Insecta", :class],
91
- ["Diptera", :order],
92
- ["Cecidomyiidae", :family],
82
+ to eq [["Animalia", :kingdom],
83
+ ["Arthropoda", :phylum],
84
+ ["Insecta", :class],
85
+ ["Diptera", :order],
86
+ ["Cecidomyiidae", :family],
93
87
  ["Resseliella", :genus]]
94
-
95
88
  end
96
89
  end
97
90
 
98
- context 'no linnean fields are given' do
99
- it 'returns empty linnean classification' do
91
+ context "no linnean fields are given" do
92
+ it "returns empty linnean classification" do
100
93
  res = normalizer.normalize
101
94
  expect(res.first[1]).to be_kind_of DarwinCore::TaxonNormalized
102
95
  expect(res.first[1].linnean_classification_path).to be_empty
103
96
  end
104
97
  end
105
98
 
106
- context 'in the presence of scientificNameAuthorship field' do
107
- let(:file_name) { 'sci_name_authorship.tar.gz' }
108
- it 'returns normalized data' do
99
+ context "in the presence of scientificNameAuthorship field" do
100
+ let(:file_name) { "sci_name_authorship.tar.gz" }
101
+ it "returns normalized data" do
109
102
  normalizer.normalize
110
103
  expect(normalizer.darwin_core.file_name).
111
- to eq 'sci_name_authorship.tar.gz'
104
+ to eq "sci_name_authorship.tar.gz"
112
105
  expect(normalizer.normalized_data).to be_kind_of Hash
113
106
  expect(normalizer.normalized_data.size).to be > 0
114
- tn = normalizer.normalized_data['leptogastrinae:tid:2688']
115
- expect(tn.current_name).to eq 'Leptogaster fornicata Martin, 1957'
116
- expect(tn.current_name_canonical).to eq 'Leptogaster fornicata'
107
+ tn = normalizer.normalized_data["leptogastrinae:tid:2688"]
108
+ expect(tn.current_name).to eq "Leptogaster fornicata Martin, 1957"
109
+ expect(tn.current_name_canonical).to eq "Leptogaster fornicata"
117
110
  end
118
111
  end
119
112
 
120
- context 'when scientificNameAuthorship duplicates author info' do
121
- let(:file_name) { 'sci_name_authorship_dup.tar.gz' }
122
- it 'returns normalized data' do
113
+ context "when scientificNameAuthorship duplicates author info" do
114
+ let(:file_name) { "sci_name_authorship_dup.tar.gz" }
115
+ it "returns normalized data" do
123
116
  normalizer.normalize
124
117
  expect(normalizer.darwin_core.file_name).
125
- to eq 'sci_name_authorship_dup.tar.gz'
118
+ to eq "sci_name_authorship_dup.tar.gz"
126
119
  expect(normalizer.normalized_data).to be_kind_of Hash
127
120
  expect(normalizer.normalized_data.size).to be > 0
128
- tn = normalizer.normalized_data['leptogastrinae:tid:2688']
129
- expect(tn.current_name).to eq 'Leptogaster fornicata Martin, 1957'
130
- expect(tn.current_name_canonical).to eq 'Leptogaster fornicata'
121
+ tn = normalizer.normalized_data["leptogastrinae:tid:2688"]
122
+ expect(tn.current_name).to eq "Leptogaster fornicata Martin, 1957"
123
+ expect(tn.current_name_canonical).to eq "Leptogaster fornicata"
131
124
  end
132
125
  end
133
126
 
134
- context 'coreid is empty' do
135
- let(:file_name) { 'empty_coreid.tar.gz' }
136
- it 'should ingest information' do
127
+ context "coreid is empty" do
128
+ let(:file_name) { "empty_coreid.tar.gz" }
129
+ it "should ingest information" do
137
130
  res = normalizer.normalize
138
131
  expect(normalizer.darwin_core.file_name).
139
- to eq 'empty_coreid.tar.gz'
140
- tn = res['Taxon9']
141
- expect(tn.current_name).to eq 'Amanita phalloides'
132
+ to eq "empty_coreid.tar.gz"
133
+ tn = res["Taxon9"]
134
+ expect(tn.current_name).to eq "Amanita phalloides"
142
135
  end
143
136
  end
144
137
 
145
- context 'vernacular locality info' do
146
- let(:file_name) { 'language_locality.tar.gz' }
147
- it 'should ingest locality and language' do
138
+ context "vernacular locality info" do
139
+ let(:file_name) { "language_locality.tar.gz" }
140
+ it "should ingest locality and language" do
148
141
  res = normalizer.normalize
149
- tn = res['leptogastrinae:tid:42']
142
+ tn = res["leptogastrinae:tid:42"]
150
143
  vn = tn.vernacular_names[0]
151
- expect(vn.language).to eq 'en'
152
- expect(vn.locality).to eq 'New England'
144
+ expect(vn.language).to eq "en"
145
+ expect(vn.locality).to eq "New England"
153
146
  end
154
147
  end
155
148
  end
156
149
 
157
- describe '#name_strings' do
158
- let(:file_path) { File.join(file_dir, 'flat_list.tar.gz') }
150
+ describe "#name_strings" do
151
+ let(:file_path) { File.join(file_dir, "flat_list.tar.gz") }
159
152
 
160
- context 'before running #normalize' do
161
- it 'is empty' do
153
+ context "before running #normalize" do
154
+ it "is empty" do
162
155
  expect(normalizer.name_strings).to be_empty
163
156
  end
164
157
  end
165
-
166
- context 'after running #normalize' do
167
- let(:normalized) { normalizer.tap { |n| n.normalize } }
168
158
 
169
- context 'default attibutes' do
170
- it 'returns array' do
159
+ context "after running #normalize" do
160
+ let(:normalized) { normalizer.tap(&:normalize) }
161
+
162
+ context "default attibutes" do
163
+ it "returns array" do
171
164
  expect(normalized.name_strings).to be_kind_of Array
172
165
  expect(normalized.name_strings.size).to be > 1
173
166
  end
174
167
  end
175
168
 
176
- context 'with_hash attribute' do
177
- it 'returns hash' do
178
- strings = normalized.name_strings(with_hash:true)
169
+ context "with_hash attribute" do
170
+ it "returns hash" do
171
+ strings = normalized.name_strings(with_hash: true)
179
172
  expect(strings).to be_kind_of Hash
180
173
  expect(strings.size).to be > 1
181
174
  expect(strings.values.uniq).to eq [1]
182
175
  end
183
176
  end
184
177
  end
185
-
186
178
  end
187
179
 
188
- describe '#vernacular_name_strings' do
189
- let(:file_path) { File.join(file_dir, 'flat_list.tar.gz') }
180
+ describe "#vernacular_name_strings" do
181
+ let(:file_path) { File.join(file_dir, "flat_list.tar.gz") }
190
182
 
191
- context 'before running #normalize' do
183
+ context "before running #normalize" do
192
184
  subject(:vern) { normalizer.vernacular_name_strings }
193
- it 'is empty' do
185
+
186
+ it "is empty" do
194
187
  expect(vern).to be_empty
195
188
  end
196
189
  end
197
-
198
- context 'after running #normalize' do
199
- let(:normalized) { normalizer.tap { |n| n.normalize } }
190
+
191
+ context "after running #normalize" do
192
+ let(:normalized) { normalizer.tap(&:normalize) }
200
193
  subject(:vern) { normalized.vernacular_name_strings }
201
- subject(:vern_w_hash) { normalized.
202
- vernacular_name_strings(with_hash: true) }
194
+ subject(:vern_w_hash) do
195
+ normalized.vernacular_name_strings(with_hash: true)
196
+ end
203
197
 
204
- context 'default attibutes' do
205
- it 'returns array' do
198
+ context "default attibutes" do
199
+ it "returns array" do
206
200
  expect(vern).to be_kind_of Array
207
- expect(vern.size).to be > 0
201
+ expect(vern.size).to be > 0
208
202
  end
209
203
  end
210
204
 
211
- context 'with_hash attribute' do
212
- it 'returns hash' do
205
+ context "with_hash attribute" do
206
+ it "returns hash" do
213
207
  expect(vern_w_hash).to be_kind_of Hash
214
- expect(vern_w_hash.size).to be > 0
208
+ expect(vern_w_hash.size).to be > 0
215
209
  expect(vern_w_hash.values.uniq).to eq [1]
216
210
  end
217
211
  end
218
-
219
212
  end
220
-
221
213
  end
222
-
223
214
  end