dwc-archive 0.9.10 → 1.1.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (60) hide show
  1. checksums.yaml +5 -5
  2. data/.gitignore +1 -0
  3. data/.rspec +2 -1
  4. data/.rubocop.yml +23 -0
  5. data/.ruby-version +1 -1
  6. data/.travis.yml +4 -7
  7. data/CHANGELOG +14 -8
  8. data/Gemfile +3 -1
  9. data/LICENSE +1 -1
  10. data/README.md +119 -107
  11. data/Rakefile +13 -36
  12. data/dwc-archive.gemspec +23 -19
  13. data/features/step_definitions/dwc-creator_steps.rb +5 -5
  14. data/features/step_definitions/dwc-reader_steps.rb +47 -28
  15. data/features/support/env.rb +1 -1
  16. data/lib/dwc_archive.rb +124 -0
  17. data/lib/dwc_archive/archive.rb +60 -0
  18. data/lib/dwc_archive/classification_normalizer.rb +382 -0
  19. data/lib/dwc_archive/core.rb +25 -0
  20. data/lib/{dwc-archive → dwc_archive}/errors.rb +10 -0
  21. data/lib/dwc_archive/expander.rb +88 -0
  22. data/lib/{dwc-archive → dwc_archive}/extension.rb +5 -3
  23. data/lib/dwc_archive/generator.rb +91 -0
  24. data/lib/dwc_archive/generator_eml_xml.rb +116 -0
  25. data/lib/dwc_archive/generator_meta_xml.rb +72 -0
  26. data/lib/dwc_archive/gnub_taxon.rb +14 -0
  27. data/lib/dwc_archive/ingester.rb +106 -0
  28. data/lib/dwc_archive/metadata.rb +57 -0
  29. data/lib/dwc_archive/taxon_normalized.rb +23 -0
  30. data/lib/dwc_archive/version.rb +6 -0
  31. data/lib/dwc_archive/xml_reader.rb +90 -0
  32. data/spec/files/file with characters(3).gz b/data/spec/files/file with → characters(3).tar.gz +0 -0
  33. data/spec/files/generator_eml.xml +47 -0
  34. data/spec/files/generator_meta.xml +19 -0
  35. data/spec/lib/classification_normalizer_spec.rb +96 -105
  36. data/spec/lib/core_spec.rb +43 -41
  37. data/spec/lib/darwin_core_spec.rb +108 -138
  38. data/spec/lib/generator_eml_xml_spec.rb +12 -11
  39. data/spec/lib/generator_meta_xml_spec.rb +12 -11
  40. data/spec/lib/generator_spec.rb +77 -69
  41. data/spec/lib/gnub_taxon_spec.rb +15 -17
  42. data/spec/lib/metadata_spec.rb +50 -41
  43. data/spec/lib/taxon_normalized_spec.rb +62 -65
  44. data/spec/lib/xml_reader_spec.rb +9 -12
  45. data/spec/spec_helper.rb +54 -51
  46. metadata +105 -88
  47. data/.rvmrc +0 -1
  48. data/] +0 -40
  49. data/lib/dwc-archive.rb +0 -107
  50. data/lib/dwc-archive/archive.rb +0 -40
  51. data/lib/dwc-archive/classification_normalizer.rb +0 -428
  52. data/lib/dwc-archive/core.rb +0 -17
  53. data/lib/dwc-archive/expander.rb +0 -84
  54. data/lib/dwc-archive/generator.rb +0 -85
  55. data/lib/dwc-archive/generator_eml_xml.rb +0 -86
  56. data/lib/dwc-archive/generator_meta_xml.rb +0 -58
  57. data/lib/dwc-archive/ingester.rb +0 -101
  58. data/lib/dwc-archive/metadata.rb +0 -48
  59. data/lib/dwc-archive/version.rb +0 -3
  60. data/lib/dwc-archive/xml_reader.rb +0 -64
@@ -0,0 +1,106 @@
1
+ # encoding: utf-8
2
+ class DarwinCore
3
+ # This module abstracts information for reading csv file to be used
4
+ # in several classes which need such functionality
5
+ module Ingester
6
+ attr_reader :data, :properties, :encoding, :fields_separator, :size
7
+ attr_reader :file_path, :fields, :line_separator, :quote_character,
8
+ :ignore_headers
9
+
10
+ def size
11
+ @size ||= init_size
12
+ end
13
+
14
+ def read(batch_size = 10_000)
15
+ DarwinCore.logger_write(@dwc.object_id, "Reading #{name} data")
16
+ res = []
17
+ errors = []
18
+ args = define_csv_args
19
+ min_size = @fields.map { |f| f[:index].to_i || 0 }.sort[-1] + 1
20
+ csv = CSV.new(open(@file_path), args)
21
+ csv.each_with_index do |r, i|
22
+ next if @ignore_headers && i == 0
23
+ min_size > r.size ? errors << r : process_csv_row(res, errors, r)
24
+ next if i == 0 || i % batch_size != 0
25
+ DarwinCore.logger_write(@dwc.object_id,
26
+ format("Ingested %s records from %s",
27
+ i, name))
28
+ next unless block_given?
29
+ yield [res, errors]
30
+ res = []
31
+ errors = []
32
+ end
33
+ yield [res, errors] if block_given?
34
+ [res, errors]
35
+ end
36
+
37
+ private
38
+
39
+ def define_csv_args
40
+ args = { col_sep: @field_separator }
41
+ @quote_character = "\b" if @quote_character.empty?
42
+ args.merge(quote_char: @quote_character)
43
+ end
44
+
45
+ def name
46
+ self.class.to_s.split("::")[-1].downcase
47
+ end
48
+
49
+ def process_csv_row(result, errors, row)
50
+ str = row.join("")
51
+ str = str.force_encoding("utf-8")
52
+ if str.encoding.name == "UTF-8" && str.valid_encoding?
53
+ result << row.map { |f| f.nil? ? nil : f.force_encoding("utf-8") }
54
+ else
55
+ errors << row
56
+ end
57
+ end
58
+
59
+ def init_attributes
60
+ @properties = @data[:attributes]
61
+ init_encoding
62
+ @field_separator = init_field_separator
63
+ @quote_character = @properties[:fieldsEnclosedBy] || ""
64
+ @line_separator = @properties[:linesTerminatedBy] || "\n"
65
+ @ignore_headers = @properties[:ignoreHeaderLines] &&
66
+ [1, true].include?(@properties[:ignoreHeaderLines])
67
+ init_file_path
68
+ init_fields
69
+ end
70
+
71
+ def init_encoding
72
+ @encoding = @properties[:encoding] || "UTF-8"
73
+ accepted_encoding = ["utf-8", "utf8", "utf-16", "utf16"].
74
+ include?(@encoding.downcase)
75
+ fail(
76
+ DarwinCore::EncodingError,
77
+ "No support for encodings other than utf-8 or utf-16 at the moment"
78
+ ) unless accepted_encoding
79
+ end
80
+
81
+ def init_file_path
82
+ file = @data[:location] ||
83
+ @data[:attributes][:location] ||
84
+ @data[:files][:location]
85
+ @file_path = File.join(@path, file)
86
+ fail DarwinCore::FileNotFoundError, "No file data" unless @file_path
87
+ end
88
+
89
+ def init_fields
90
+ @data[:field] = [data[:field]] if data[:field].class != Array
91
+ @fields = @data[:field].map { |f| f[:attributes] }
92
+ fail DarwinCore::InvalidArchiveError,
93
+ "No data fields are found" if @fields.empty?
94
+ end
95
+
96
+ def init_field_separator
97
+ res = @properties[:fieldsTerminatedBy] || ","
98
+ res = "\t" if res == "\\t"
99
+ res
100
+ end
101
+
102
+ def init_size
103
+ `wc -l #{@file_path}`.match(/^\s*([\d]+)\s/)[1].to_i
104
+ end
105
+ end
106
+ end
@@ -0,0 +1,57 @@
1
+ # frozen_string_literal: true
2
+
3
+ class DarwinCore
4
+ # Represents data from EML (Ecological Metadata Language) file
5
+ class Metadata
6
+ attr_reader :metadata
7
+ alias data metadata
8
+
9
+ def initialize(archive = nil)
10
+ @archive = archive
11
+ @metadata = @archive.eml
12
+ end
13
+
14
+ def id
15
+ fix_nil { @metadata[:eml][:dataset][:attributes][:id] }
16
+ end
17
+
18
+ def package_id
19
+ fix_nil { @metadata.data[:eml][:attributes][:packageId] }
20
+ end
21
+
22
+ def title
23
+ fix_nil { @metadata[:eml][:dataset][:title] }
24
+ end
25
+
26
+ def authors
27
+ return nil unless defined?(@metadata[:eml][:dataset][:creator])
28
+
29
+ authors = [@metadata[:eml][:dataset][:creator]].flatten
30
+ authors.map do |au|
31
+ { first_name: au[:individualName][:givenName],
32
+ last_name: au[:individualName][:surName],
33
+ email: au[:electronicMailAddress] }
34
+ end
35
+ end
36
+
37
+ def abstract
38
+ fix_nil { @metadata[:eml][:dataset][:abstract] }
39
+ end
40
+
41
+ def citation
42
+ fix_nil { @metadata[:eml][:additionalMetadata][:metadata][:citation] }
43
+ end
44
+
45
+ def url
46
+ fix_nil { @metadata[:eml][:dataset][:distribution][:online][:url] }
47
+ end
48
+
49
+ private
50
+
51
+ def fix_nil
52
+ yield
53
+ rescue NoMethodError
54
+ nil
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ class DarwinCore
4
+ # Describes normalized taxon
5
+ class TaxonNormalized
6
+ attr_accessor :id, :local_id, :global_id, :source, :parent_id,
7
+ :classification_path_id, :classification_path,
8
+ :linnean_classification_path, :current_name,
9
+ :current_name_canonical, :synonyms, :vernacular_names,
10
+ :rank, :status
11
+
12
+ def initialize
13
+ @id = @parent_id = @rank = @status = nil
14
+ @current_name = @current_name_canonical = @source = @local_id = ""
15
+ @global_id = ""
16
+ @classification_path = []
17
+ @classification_path_id = []
18
+ @synonyms = []
19
+ @vernacular_names = []
20
+ @linnean_classification_path = []
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Version constant of the class
4
+ class DarwinCore
5
+ VERSION = "1.1.2"
6
+ end
@@ -0,0 +1,90 @@
1
+ # frozen_string_literal: true
2
+
3
+ class DarwinCore
4
+ # USAGE: Hash.from_xml:(YOUR_XML_STRING)
5
+ # modified from
6
+ # http://stackoverflow.com/questions/1230741/
7
+ # convert-a-nokogiri-document-to-a-ruby-hash/1231297#1231297
8
+ module XmlReader
9
+ def self.from_xml(xml_io)
10
+ result = Nokogiri::XML(xml_io)
11
+ { result.root.name.to_sym => self::Node.new(result.root).value }
12
+ end
13
+
14
+ # Node is a helper class to parse xml into hash
15
+ class Node
16
+ def initialize(node)
17
+ @node = node
18
+ @val = {}
19
+ end
20
+
21
+ def value
22
+ if @node.element?
23
+ prepare_node_element
24
+ else
25
+ prepare(@node.content.to_s)
26
+ end
27
+ end
28
+
29
+ private
30
+
31
+ def prepare_node_element
32
+ add_attributes
33
+ add_children if @node.children.size.positive?
34
+ @val
35
+ end
36
+
37
+ def prepare(data)
38
+ data.instance_of?(String) && data.to_i.to_s == data ? data.to_i : data
39
+ end
40
+
41
+ def add_attributes
42
+ return if @node.attributes.empty?
43
+
44
+ @val[:attributes] = {}
45
+ @node.attributes.each_key do |key|
46
+ add_attribute(@val[:attributes], @node.attributes[key])
47
+ end
48
+ end
49
+
50
+ def add_attribute(attributes, attribute)
51
+ attributes[attribute.name.to_sym] = prepare(attribute.value)
52
+ end
53
+
54
+ def add_children
55
+ @node.children.each do |child|
56
+ process_child(child)
57
+ end
58
+ end
59
+
60
+ def process_child(child)
61
+ value = DarwinCore::XmlReader::Node.new(child).value
62
+ if child.name == "text"
63
+ handle_text(child, value)
64
+ else
65
+ add_child_to_value(child, value)
66
+ end
67
+ end
68
+
69
+ def add_child_to_value(child, value)
70
+ if @val[child.name.to_sym]
71
+ handle_child_node(child.name.to_sym, value)
72
+ else
73
+ @val[child.name.to_sym] = prepare(value)
74
+ end
75
+ end
76
+
77
+ def handle_child_node(child, val)
78
+ if @val[child].is_a?(Object::Array)
79
+ @val[child] << prepare(val)
80
+ else
81
+ @val[child] = [@val[child], prepare(val)]
82
+ end
83
+ end
84
+
85
+ def handle_text(child, val)
86
+ @val = prepare(val) unless child.next_sibling || child.previous_sibling
87
+ end
88
+ end
89
+ end
90
+ end
@@ -0,0 +1,47 @@
1
+ <?xml version="1.0"?>
2
+ <eml:eml xmlns:eml="eml://ecoinformatics.org/eml-2.1.1" xmlns:md="eml://ecoinformatics.org/methods-2.1.1" xmlns:proj="eml://ecoinformatics.org/project-2.1.1" xmlns:d="eml://ecoinformatics.org/dataset-2.1.1" xmlns:res="eml://ecoinformatics.org/resource-2.1.1" xmlns:dc="http://purl.org/dc/terms/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" packageId="1234/2013-12-30::19:45:33" system="http://globalnames.org" xml:lang="en" xsi:schemaLocation="eml://ecoinformatics.org/eml-2.1.1 http://rs.gbif.org/schema/eml-gbif-profile/1.0.1/eml.xsd">
3
+ <dataset id="1234">
4
+ <title>Test Classification</title>
5
+ <license>http://creativecommons.org/licenses/by-sa/3.0/</license>
6
+ <creator id="1" scope="document">
7
+ <individualName>
8
+ <givenName>John</givenName>
9
+ <surName>Doe</surName>
10
+ </individualName>
11
+ <organizationName>Example</organizationName>
12
+ <positionName>Assistant Professor</positionName>
13
+ <onlineUrl>http://example.org</onlineUrl>
14
+ <electronicMailAddress>jdoe@example.com</electronicMailAddress>
15
+ </creator>
16
+ <creator id="2" scope="document">
17
+ <individualName>
18
+ <givenName>Jane</givenName>
19
+ <surName>Doe</surName>
20
+ </individualName>
21
+ <electronicMailAddress>jane@example.com</electronicMailAddress>
22
+ </creator>
23
+ <metadataProvider>
24
+ <individualName>
25
+ <givenName>Jim</givenName>
26
+ <surName>Doe</surName>
27
+ </individualName>
28
+ <onlineUrl>http://aggregator.example.org</onlineUrl>
29
+ <electronicMailAddress>jimdoe@example.com</electronicMailAddress>
30
+ </metadataProvider>
31
+ <pubDate>2013-12-30 14:45:33 -0500</pubDate>
32
+ <abstract>
33
+ <para>test classification</para>
34
+ </abstract>
35
+ <contact>
36
+ <references>1</references>
37
+ </contact>
38
+ <contact>
39
+ <references>2</references>
40
+ </contact>
41
+ </dataset>
42
+ <additionalMetadata>
43
+ <metadata>
44
+ <citation>Test classification: Doe John, Doe Jane, Taxnonmy, 10, 1, 2010</citation>
45
+ </metadata>
46
+ </additionalMetadata>
47
+ </eml:eml>
@@ -0,0 +1,19 @@
1
+ <?xml version="1.0"?>
2
+ <archive xmlns="http://rs.tdwg.org/dwc/text/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://rs.tdwg.org/dwc/terms/xsd/archive/ http://darwincore.googlecode.com/svn/trunk/text/tdwg_dwc_text.xsd">
3
+ <core encoding="UTF-8" fieldsTerminatedBy="," fieldsEnclosedBy="&quot;" linesTerminatedBy="&#10;" rowType="http://rs.tdwg.org/dwc/terms/Taxon" ignoreHeaderLines="1">
4
+ <files>
5
+ <location>core.csv</location>
6
+ </files>
7
+ <id index="0"/>
8
+ <field term="http://rs.tdwg.org/dwc/terms/parentNameUsageID" index="1"/>
9
+ <field term="http://rs.tdwg.org/dwc/terms/scientificName" index="2"/>
10
+ <field term="http://rs.tdwg.org/dwc/terms/taxonRank" index="3"/>
11
+ </core>
12
+ <extension encoding="UTF-8" fieldsTerminatedBy="," fieldsEnclosedBy="&quot;" linesTerminatedBy="&#10;" rowType="http://rs.gbif.org/terms/1.0/VernacularName" ignoreHeaderLines="1">
13
+ <files>
14
+ <location>vern.csv</location>
15
+ </files>
16
+ <coreid index="0"/>
17
+ <field term="http://rs.tdwg.org/dwc/terms/vernacularName" index="1"/>
18
+ </extension>
19
+ </archive>
@@ -1,223 +1,214 @@
1
- require_relative '../spec_helper'
2
- # encoding: utf-8
1
+ # frozen_string_literal: true
3
2
 
4
3
  describe DarwinCore::ClassificationNormalizer do
5
-
6
4
  subject(:dwca) { DarwinCore.new(file_path) }
7
5
  subject(:normalizer) { DarwinCore::ClassificationNormalizer.new(dwca) }
8
-
9
- let(:file_dir) { File.expand_path('../../files', __FILE__) }
6
+
7
+ let(:file_dir) { File.expand_path("../files", __dir__) }
10
8
  let(:file_path) { File.join(file_dir, file_name) }
11
9
 
12
- describe '.new' do
13
- let(:file_path) { File.join(file_dir, 'data.tar.gz') }
14
- it { expect(normalizer.is_a? DarwinCore::ClassificationNormalizer).
15
- to be_true }
16
- end
10
+ describe ".new" do
11
+ let(:file_path) { File.join(file_dir, "data.tar.gz") }
12
+ it do
13
+ expect(normalizer.is_a?(DarwinCore::ClassificationNormalizer)).to be true
14
+ end
15
+ end
17
16
 
18
- describe '#normalize' do
19
- let(:file_name) { 'data.tar.gz' }
17
+ describe "#normalize" do
18
+ let(:file_name) { "data.tar.gz" }
20
19
 
21
- it 'returns normalized data' do
20
+ it "returns normalized data" do
22
21
  res = normalizer.normalize
23
22
  expect(res).to be normalizer.normalized_data
24
23
  end
25
24
 
25
+ context "flat list" do
26
+ let(:file_path) { File.join(file_dir, "flat_list.tar.gz") }
26
27
 
27
- context 'flat list' do
28
- let(:file_path) { File.join(file_dir, 'flat_list.tar.gz') }
29
-
30
- it 'returns flat list' do
28
+ it "returns flat list" do
31
29
  normalizer.normalize
32
30
  expect(normalizer.normalized_data).to be_kind_of Hash
33
31
  expect(normalizer.normalized_data.size).to be > 0
34
32
  end
35
33
  end
36
34
 
37
- context 'synonyms from core' do
38
- let(:file_name) { 'synonyms_in_core_accepted_name_field.tar.gz' }
35
+ context "synonyms from core" do
36
+ let(:file_name) { "synonyms_in_core_accepted_name_field.tar.gz" }
39
37
 
40
- it 'ingests synonyms using accepted_name field' do
38
+ it "ingests synonyms using accepted_name field" do
41
39
  res = normalizer.normalize
42
- syn = res.select { |k,v| !v.synonyms.empty? }.
43
- map { |k,v| v }
40
+ syn = res.reject { |_, v| v.synonyms.empty? }.values
44
41
  expect(syn.size).to be > 0
45
42
  expect(syn[0].synonyms[0]).to be_kind_of DarwinCore::SynonymNormalized
46
43
  end
47
44
  end
48
45
 
49
- context 'synonyms from extension' do
50
- let(:file_name) { 'synonyms_in_extension.tar.gz' }
51
- it 'ingests synonyms from extension' do
46
+ context "synonyms from extension" do
47
+ let(:file_name) { "synonyms_in_extension.tar.gz" }
48
+ it "ingests synonyms from extension" do
52
49
  res = normalizer.normalize
53
- syn = res.select { |k,v| !v.synonyms.empty? }.
54
- map { |k,v| v }
50
+ syn = res.reject { |_, v| v.synonyms.empty? }.values
55
51
  expect(syn.size).to be > 0
56
52
  expect(syn[0].synonyms[0]).to be_kind_of DarwinCore::SynonymNormalized
57
53
  end
58
54
  end
59
55
 
60
- context 'synonyms are not extensions' do
61
- let(:file_name) { 'not_synonym_in_extension.tar.gz' }
56
+ context "synonyms are not extensions" do
57
+ let(:file_name) { "not_synonym_in_extension.tar.gz" }
62
58
 
63
- it 'does not ingest synonyms' do
59
+ it "does not ingest synonyms" do
64
60
  res = normalizer.normalize
65
- syn = res.select { |k,v| !v.synonyms.empty? }.
66
- map { |k,v| v }
61
+ syn = res.reject { |_, v| v.synonyms.empty? }.values
67
62
  expect(syn).to be_empty
68
63
  end
69
64
  end
70
65
 
71
- context 'with_extensions flag set on false' do
72
- let(:file_name) { 'synonyms_in_extension.tar.gz' }
73
- it 'should not harvest extensions' do
66
+ context "with_extensions flag set on false" do
67
+ let(:file_name) { "synonyms_in_extension.tar.gz" }
68
+ it "should not harvest extensions" do
74
69
  res = normalizer.normalize(with_extensions: false)
75
- syn = res.select { |k,v| !v.synonyms.empty? }.
76
- map { |k,v| v }
70
+ syn = res.reject { |_, v| v.synonyms.empty? }.values
77
71
  expect(syn).to be_empty
78
72
  end
79
73
  end
80
74
 
81
- context 'linnean classification in file (class, order etc fields)' do
82
- let(:file_name) { 'linnean.tar.gz' }
75
+ context "linnean classification in file (class, order etc fields)" do
76
+ let(:file_name) { "linnean.tar.gz" }
83
77
 
84
- it 'assembles classification' do
78
+ it "assembles classification" do
85
79
  res = normalizer.normalize
86
80
  expect(res.first[1]).to be_kind_of DarwinCore::TaxonNormalized
87
81
  expect(res.first[1].linnean_classification_path).
88
- to eq [["Animalia", :kingdom],
89
- ["Arthropoda", :phylum],
90
- ["Insecta", :class],
91
- ["Diptera", :order],
92
- ["Cecidomyiidae", :family],
82
+ to eq [["Animalia", :kingdom],
83
+ ["Arthropoda", :phylum],
84
+ ["Insecta", :class],
85
+ ["Diptera", :order],
86
+ ["Cecidomyiidae", :family],
93
87
  ["Resseliella", :genus]]
94
-
95
88
  end
96
89
  end
97
90
 
98
- context 'no linnean fields are given' do
99
- it 'returns empty linnean classification' do
91
+ context "no linnean fields are given" do
92
+ it "returns empty linnean classification" do
100
93
  res = normalizer.normalize
101
94
  expect(res.first[1]).to be_kind_of DarwinCore::TaxonNormalized
102
95
  expect(res.first[1].linnean_classification_path).to be_empty
103
96
  end
104
97
  end
105
98
 
106
- context 'in the presence of scientificNameAuthorship field' do
107
- let(:file_name) { 'sci_name_authorship.tar.gz' }
108
- it 'returns normalized data' do
99
+ context "in the presence of scientificNameAuthorship field" do
100
+ let(:file_name) { "sci_name_authorship.tar.gz" }
101
+ it "returns normalized data" do
109
102
  normalizer.normalize
110
103
  expect(normalizer.darwin_core.file_name).
111
- to eq 'sci_name_authorship.tar.gz'
104
+ to eq "sci_name_authorship.tar.gz"
112
105
  expect(normalizer.normalized_data).to be_kind_of Hash
113
106
  expect(normalizer.normalized_data.size).to be > 0
114
- tn = normalizer.normalized_data['leptogastrinae:tid:2688']
115
- expect(tn.current_name).to eq 'Leptogaster fornicata Martin, 1957'
116
- expect(tn.current_name_canonical).to eq 'Leptogaster fornicata'
107
+ tn = normalizer.normalized_data["leptogastrinae:tid:2688"]
108
+ expect(tn.current_name).to eq "Leptogaster fornicata Martin, 1957"
109
+ expect(tn.current_name_canonical).to eq "Leptogaster fornicata"
117
110
  end
118
111
  end
119
112
 
120
- context 'when scientificNameAuthorship duplicates author info' do
121
- let(:file_name) { 'sci_name_authorship_dup.tar.gz' }
122
- it 'returns normalized data' do
113
+ context "when scientificNameAuthorship duplicates author info" do
114
+ let(:file_name) { "sci_name_authorship_dup.tar.gz" }
115
+ it "returns normalized data" do
123
116
  normalizer.normalize
124
117
  expect(normalizer.darwin_core.file_name).
125
- to eq 'sci_name_authorship_dup.tar.gz'
118
+ to eq "sci_name_authorship_dup.tar.gz"
126
119
  expect(normalizer.normalized_data).to be_kind_of Hash
127
120
  expect(normalizer.normalized_data.size).to be > 0
128
- tn = normalizer.normalized_data['leptogastrinae:tid:2688']
129
- expect(tn.current_name).to eq 'Leptogaster fornicata Martin, 1957'
130
- expect(tn.current_name_canonical).to eq 'Leptogaster fornicata'
121
+ tn = normalizer.normalized_data["leptogastrinae:tid:2688"]
122
+ expect(tn.current_name).to eq "Leptogaster fornicata Martin, 1957"
123
+ expect(tn.current_name_canonical).to eq "Leptogaster fornicata"
131
124
  end
132
125
  end
133
126
 
134
- context 'coreid is empty' do
135
- let(:file_name) { 'empty_coreid.tar.gz' }
136
- it 'should ingest information' do
127
+ context "coreid is empty" do
128
+ let(:file_name) { "empty_coreid.tar.gz" }
129
+ it "should ingest information" do
137
130
  res = normalizer.normalize
138
131
  expect(normalizer.darwin_core.file_name).
139
- to eq 'empty_coreid.tar.gz'
140
- tn = res['Taxon9']
141
- expect(tn.current_name).to eq 'Amanita phalloides'
132
+ to eq "empty_coreid.tar.gz"
133
+ tn = res["Taxon9"]
134
+ expect(tn.current_name).to eq "Amanita phalloides"
142
135
  end
143
136
  end
144
137
 
145
- context 'vernacular locality info' do
146
- let(:file_name) { 'language_locality.tar.gz' }
147
- it 'should ingest locality and language' do
138
+ context "vernacular locality info" do
139
+ let(:file_name) { "language_locality.tar.gz" }
140
+ it "should ingest locality and language" do
148
141
  res = normalizer.normalize
149
- tn = res['leptogastrinae:tid:42']
142
+ tn = res["leptogastrinae:tid:42"]
150
143
  vn = tn.vernacular_names[0]
151
- expect(vn.language).to eq 'en'
152
- expect(vn.locality).to eq 'New England'
144
+ expect(vn.language).to eq "en"
145
+ expect(vn.locality).to eq "New England"
153
146
  end
154
147
  end
155
148
  end
156
149
 
157
- describe '#name_strings' do
158
- let(:file_path) { File.join(file_dir, 'flat_list.tar.gz') }
150
+ describe "#name_strings" do
151
+ let(:file_path) { File.join(file_dir, "flat_list.tar.gz") }
159
152
 
160
- context 'before running #normalize' do
161
- it 'is empty' do
153
+ context "before running #normalize" do
154
+ it "is empty" do
162
155
  expect(normalizer.name_strings).to be_empty
163
156
  end
164
157
  end
165
-
166
- context 'after running #normalize' do
167
- let(:normalized) { normalizer.tap { |n| n.normalize } }
168
158
 
169
- context 'default attibutes' do
170
- it 'returns array' do
159
+ context "after running #normalize" do
160
+ let(:normalized) { normalizer.tap(&:normalize) }
161
+
162
+ context "default attibutes" do
163
+ it "returns array" do
171
164
  expect(normalized.name_strings).to be_kind_of Array
172
165
  expect(normalized.name_strings.size).to be > 1
173
166
  end
174
167
  end
175
168
 
176
- context 'with_hash attribute' do
177
- it 'returns hash' do
178
- strings = normalized.name_strings(with_hash:true)
169
+ context "with_hash attribute" do
170
+ it "returns hash" do
171
+ strings = normalized.name_strings(with_hash: true)
179
172
  expect(strings).to be_kind_of Hash
180
173
  expect(strings.size).to be > 1
181
174
  expect(strings.values.uniq).to eq [1]
182
175
  end
183
176
  end
184
177
  end
185
-
186
178
  end
187
179
 
188
- describe '#vernacular_name_strings' do
189
- let(:file_path) { File.join(file_dir, 'flat_list.tar.gz') }
180
+ describe "#vernacular_name_strings" do
181
+ let(:file_path) { File.join(file_dir, "flat_list.tar.gz") }
190
182
 
191
- context 'before running #normalize' do
183
+ context "before running #normalize" do
192
184
  subject(:vern) { normalizer.vernacular_name_strings }
193
- it 'is empty' do
185
+
186
+ it "is empty" do
194
187
  expect(vern).to be_empty
195
188
  end
196
189
  end
197
-
198
- context 'after running #normalize' do
199
- let(:normalized) { normalizer.tap { |n| n.normalize } }
190
+
191
+ context "after running #normalize" do
192
+ let(:normalized) { normalizer.tap(&:normalize) }
200
193
  subject(:vern) { normalized.vernacular_name_strings }
201
- subject(:vern_w_hash) { normalized.
202
- vernacular_name_strings(with_hash: true) }
194
+ subject(:vern_w_hash) do
195
+ normalized.vernacular_name_strings(with_hash: true)
196
+ end
203
197
 
204
- context 'default attibutes' do
205
- it 'returns array' do
198
+ context "default attibutes" do
199
+ it "returns array" do
206
200
  expect(vern).to be_kind_of Array
207
- expect(vern.size).to be > 0
201
+ expect(vern.size).to be > 0
208
202
  end
209
203
  end
210
204
 
211
- context 'with_hash attribute' do
212
- it 'returns hash' do
205
+ context "with_hash attribute" do
206
+ it "returns hash" do
213
207
  expect(vern_w_hash).to be_kind_of Hash
214
- expect(vern_w_hash.size).to be > 0
208
+ expect(vern_w_hash.size).to be > 0
215
209
  expect(vern_w_hash.values.uniq).to eq [1]
216
210
  end
217
211
  end
218
-
219
212
  end
220
-
221
213
  end
222
-
223
214
  end