dwc-archive 0.9.10 → 0.9.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8e8c929203d1b652f8ba345b0c4c39cfc87a0369
4
- data.tar.gz: 739a064221bf52523990bfea38749f11b4d986c0
3
+ metadata.gz: 5c6c5b2a4de324abded5b3adb81d6bcd9603965f
4
+ data.tar.gz: ac00e15f95766838ff42a9bdc3f1a682c4a8e9ff
5
5
  SHA512:
6
- metadata.gz: adfd46bea84e301ceca6de355f189b884bc6a9a22eb8d98e3976f17e3407d2de7f15963ec0d6f29958de057de9d579c81d8007d72c38fc1b9f9ada9295381152
7
- data.tar.gz: 2e188f828d0bbe28baf5f1d8aa7aab12ca330000bf3f5ee145501d78961f9ca5e696be1171998a433952cf84546a8d5fd61c6cf45958576a7e0800fbf4542dae
6
+ metadata.gz: d84f2974ed7bcbabc62d2d3b5c5b15dd8d6d2aa38836a955c12896ada2391e978ca2d420d86bf59db2ab61cc801c9c43036be13699ba7b3902d8daa4f366d45b
7
+ data.tar.gz: a07a91365cba60fa418b85d7b516781c9209ea607dbe5ec45da86febd14bfa104cc16e4b55619a157f5bbb487dff2fd7e318399d815a63372197a9732b3820fb
@@ -1,5 +1,5 @@
1
1
  rvm:
2
- - 1.9.3-p448
2
+ - 1.9.3-p484
3
3
  - 2.0.0-p353
4
4
  before_install:
5
5
  - sudo apt-get update
data/CHANGELOG CHANGED
@@ -1,15 +1,17 @@
1
- 0.9.7 Refactoring and tests improvements
1
+ 0.9.11 Removed VERSION duplicate
2
2
 
3
- 0.9.6 Added support for GNUB DwCA files
3
+ 0.9.7 Refactoring and tests improvements
4
4
 
5
- 0.9.4 Gem dependencies updated, added travis support
5
+ 0.9.6 Added support for GNUB DwCA files
6
6
 
7
- 0.9.0 Migrated code to ruby 1.9.3
7
+ 0.9.4 Gem dependencies updated, added travis support
8
8
 
9
- 0.8.3 Updated outdated exception rasing
9
+ 0.9.0 Migrated code to ruby 1.9.3
10
10
 
11
- 0.8.2 Removed species info from linnean classification path
11
+ 0.8.3 Updated outdated exception rasing
12
12
 
13
- 0.8.1 Linnean classification path is now only for species and infraspecies with canonical forms. It ends with a canonical form of the taxon
13
+ 0.8.2 Removed species info from linnean classification path
14
14
 
15
- 0.8.0 Added linnean classification path to normalized data from DwCA. It consists of data associated with clades like 'kingdom', 'order' etc.
15
+ 0.8.1 Linnean classification path is now only for species and infraspecies with canonical forms. It ends with a canonical form of the taxon
16
+
17
+ 0.8.0 Added linnean classification path to normalized data from DwCA. It consists of data associated with clades like 'kingdom', 'order' etc.
data/README.md CHANGED
@@ -175,7 +175,11 @@ Note on Patches/Pull Requests
175
175
  Copyright
176
176
  ---------
177
177
 
178
- Copyright (c) 2010-2013 Marine Biological Laboratory. See LICENSE for details.
178
+ Author -- [Dmitry Mozzherin][13]
179
+
180
+ Contributors -- [Matt Yoder][14]
181
+
182
+ Copyright (c) 2010-2014 [Marine Biological Laboratory][15]. See LICENSE for details.
179
183
 
180
184
  [1]: https://badge.fury.io/rb/dwc-archive.png
181
185
  [2]: http://badge.fury.io/rb/dwc-archive
@@ -189,3 +193,6 @@ Copyright (c) 2010-2013 Marine Biological Laboratory. See LICENSE for details.
189
193
  [10]: https://gemnasium.com/GlobalNamesArchitecture/dwc-archive
190
194
  [11]: http://bit.ly/2IxcBA
191
195
  [12]: http://redis.io/topics/quickstart
196
+ [13]: https://github.com/dimus
197
+ [14]: https://github.com/mjy
198
+ [15]: http://mbl.edu
@@ -8,6 +8,7 @@ require 'ostruct'
8
8
  require 'digest'
9
9
  require 'csv'
10
10
  require 'logger'
11
+ require 'nokogiri'
11
12
  require_relative 'dwc-archive/xml_reader'
12
13
  require_relative 'dwc-archive/ingester'
13
14
  require_relative 'dwc-archive/errors'
@@ -24,7 +25,6 @@ require_relative 'dwc-archive/version'
24
25
 
25
26
  class DarwinCore
26
27
 
27
- VERSION = DarwinCore::VERSION
28
28
  DEFAULT_TMP_DIR = "/tmp"
29
29
 
30
30
  attr_reader :archive, :core, :metadata, :extensions,
@@ -1,7 +1,7 @@
1
- require 'nokogiri'
2
1
  class DarwinCore
3
2
  class Archive
4
3
  attr_reader :meta, :eml
4
+
5
5
  def initialize(archive_path, tmp_dir)
6
6
  @archive_path = archive_path
7
7
  @tmp_dir = tmp_dir
@@ -72,21 +72,11 @@ class DarwinCore
72
72
  end
73
73
 
74
74
  def name_strings(opts = {})
75
- opts = { with_hash: false }.merge(opts)
76
- if !!opts[:with_hash]
77
- @name_strings
78
- else
79
- @name_strings.keys
80
- end
75
+ process_strings(@name_strings, opts)
81
76
  end
82
77
 
83
78
  def vernacular_name_strings(opts = {})
84
- opts = { with_hash: false }.merge(opts)
85
- if !!opts[:with_hash]
86
- @vernacular_name_strings
87
- else
88
- @vernacular_name_strings.keys
89
- end
79
+ process_strings(@vernacular_name_strings, opts)
90
80
  end
91
81
 
92
82
  def normalize(opts = {})
@@ -110,6 +100,15 @@ class DarwinCore
110
100
 
111
101
  private
112
102
 
103
+ def process_strings(strings, opts)
104
+ opts = { with_hash: false }.merge(opts)
105
+ if !!opts[:with_hash]
106
+ strings
107
+ else
108
+ strings.keys
109
+ end
110
+ end
111
+
113
112
  def get_canonical_name(a_scientific_name)
114
113
  if @with_canonical_names
115
114
  canonical_name = @parser.parse(a_scientific_name,
@@ -2,6 +2,7 @@ class DarwinCore
2
2
  class Core
3
3
  include DarwinCore::Ingester
4
4
  attr_reader :id
5
+
5
6
  def initialize(dwc)
6
7
  @dwc = dwc
7
8
  @archive = @dwc.archive
@@ -9,9 +10,10 @@ class DarwinCore
9
10
  root_key = @archive.meta.keys[0]
10
11
  @data = @archive.meta[root_key][:core]
11
12
  raise DarwinCore::CoreFileError.
12
- new("Cannot find core in meta.xml, is meta.xml valid?") unless @data
13
+ new('Cannot find core in meta.xml, is meta.xml valid?') unless @data
13
14
  @id = @data[:id][:attributes]
14
15
  get_attributes(DarwinCore::CoreFileError)
15
16
  end
17
+
16
18
  end
17
19
  end
@@ -1,5 +1,6 @@
1
1
  class DarwinCore
2
2
  class Expander
3
+
3
4
  def initialize(archive_path, tmp_dir)
4
5
  @archive_path = archive_path
5
6
  @tmp_dir = tmp_dir
@@ -21,12 +21,7 @@ class DarwinCore
21
21
  def add_core(data, file_name, keep_headers = true)
22
22
  c = CSV.open(File.join(@path,file_name), @write)
23
23
  header = data.shift
24
- fields = header.map do |f|
25
- f.strip!
26
- err = 'No header in core data, or header fields are not urls'
27
- raise DarwinCore::GeneratorError.new(err) unless f.match(/^http:\/\//)
28
- f.split('/')[-1]
29
- end
24
+ fields = get_fields(header, 'core')
30
25
  data.unshift(fields) if keep_headers
31
26
  ignore_header_lines = keep_headers ? 1 : 0
32
27
  @meta_xml_data[:core] = { fields: header,
@@ -41,12 +36,7 @@ class DarwinCore
41
36
  row_type = 'http://rs.tdwg.org/dwc/terms/Taxon')
42
37
  c = CSV.open(File.join(@path,file_name), @write)
43
38
  header = data.shift
44
- fields = header.map do |f|
45
- f.strip!
46
- err = 'No header in core data, or header fields are not urls'
47
- raise DarwinCore::GeneratorError.new(err) unless f.match(/^http:\/\//)
48
- f.split('/')[-1]
49
- end
39
+ fields = get_fields(header, 'extension')
50
40
  data.unshift(fields) if keep_headers
51
41
  ignore_header_lines = keep_headers ? 1 : 0
52
42
  @meta_xml_data[:extensions] << { fields: header,
@@ -81,5 +71,16 @@ class DarwinCore
81
71
  a = "cd #{@path}; tar -zcf #{@dwc_path} *"
82
72
  system(a)
83
73
  end
74
+
75
+ private
76
+
77
+ def get_fields(header, file_type)
78
+ header.map do |f|
79
+ f.strip!
80
+ err = "No header in %s data, or header fields are not urls" % file_type
81
+ raise DarwinCore::GeneratorError.new(err) unless f.match(/^http:\/\//)
82
+ f.split('/')[-1]
83
+ end
84
+ end
84
85
  end
85
86
  end
@@ -22,51 +22,9 @@ class DarwinCore
22
22
  :'xmlns:res' => 'eml://ecoinformatics.org/resource-2.1.1',
23
23
  :'xmlns:dc' => 'http://purl.org/dc/terms/',
24
24
  :'xmlns:xsi' => 'http://www.w3.org/2001/XMLSchema-instance',
25
- :'xsi:schemaLocation' => 'eml_uri') do
26
- xml.dataset(id: @data[:id]) do
27
- xml.title(@data[:title])
28
- xml.license(@data[:license])
29
- contacts = []
30
- @data[:authors].each_with_index do |a, i|
31
- creator_id = i + 1
32
- contacts << creator_id
33
- xml.creator(id: creator_id, scope: 'document') do
34
- xml.individualName do
35
- xml.givenName(a[:first_name])
36
- xml.surName(a[:last_name])
37
- end
38
- xml.organizationName(a[:organization]) if a[:organization]
39
- xml.positionName(a[:position]) if a[:position]
40
- xml.onlineUrl(a[:url]) if a[:url]
41
- xml.electronicMailAddress(a[:email])
42
- end
43
- end
44
- @data[:metadata_providers].each_with_index do |a, i|
45
- xml.metadataProvider do
46
- xml.individualName do
47
- xml.givenName(a[:first_name])
48
- xml.surName(a[:last_name])
49
- end
50
- xml.organizationName(a[:organization]) if a[:organization]
51
- xml.positionName(a[:position]) if a[:position]
52
- xml.onlineUrl(a[:url]) if a[:url]
53
- xml.electronicMailAddress(a[:email])
54
- end
55
- end if @data[:metadata_providers]
56
- xml.pubDate(Time.now.to_s)
57
- xml.abstract() do
58
- xml.para(@data[:abstract])
59
- end
60
- contacts.each do |contact|
61
- xml.contact { xml.references(contact) }
62
- end
63
- end
64
- xml.additionalMetadata do
65
- xml.metadata do
66
- xml.citation(@data[:citation])
67
- xml.resourceLogoUrl(@data[:logo_url]) if @data[:logo_url]
68
- end
69
- end
25
+ :'xsi:schemaLocation' => 'eml_uri') do
26
+ build_dataset(xml)
27
+ build_additional_metadata(xml)
70
28
  xml.parent.namespace = xml.parent.namespace_definitions.first
71
29
  end
72
30
  end
@@ -77,6 +35,71 @@ class DarwinCore
77
35
  end
78
36
 
79
37
  private
38
+
39
+ def build_dataset(xml)
40
+ xml.dataset(id: @data[:id]) do
41
+ xml.title(@data[:title])
42
+ xml.license(@data[:license])
43
+ contacts = []
44
+ build_authors(xml, contacts)
45
+ build_metadata_providers(xml)
46
+ xml.pubDate(Time.now.to_s)
47
+ build_abstract(xml)
48
+ build_contacts(xml, contacts)
49
+ end
50
+ end
51
+
52
+ def build_abstract(xml)
53
+ xml.abstract() do
54
+ xml.para(@data[:abstract])
55
+ end
56
+ end
57
+
58
+ def build_contacts(xml, contacts)
59
+ contacts.each do |contact|
60
+ xml.contact { xml.references(contact) }
61
+ end
62
+ end
63
+
64
+ def build_metadata_providers(xml)
65
+ @data[:metadata_providers].each_with_index do |a, i|
66
+ xml.metadataProvider do
67
+ build_person(xml, a)
68
+ end
69
+ end if @data[:metadata_providers]
70
+ end
71
+
72
+ def build_authors(xml, contacts)
73
+ @data[:authors].each_with_index do |a, i|
74
+ creator_id = i + 1
75
+ contacts << creator_id
76
+ xml.creator(id: creator_id, scope: 'document') do
77
+ build_person(xml, a)
78
+ end
79
+ end
80
+ end
81
+
82
+ def build_additional_metadata(xml)
83
+ xml.additionalMetadata do
84
+ xml.metadata do
85
+ xml.citation(@data[:citation])
86
+ xml.resourceLogoUrl(@data[:logo_url]) if @data[:logo_url]
87
+ end
88
+ end
89
+ end
90
+
91
+ def build_person(xml, data)
92
+ a = data
93
+ xml.individualName do
94
+ xml.givenName(a[:first_name])
95
+ xml.surName(a[:last_name])
96
+ end
97
+ xml.organizationName(a[:organization]) if a[:organization]
98
+ xml.positionName(a[:position]) if a[:position]
99
+ xml.onlineUrl(a[:url]) if a[:url]
100
+ xml.electronicMailAddress(a[:email])
101
+ end
102
+
80
103
  def timestamp
81
104
  t = Time.now.getutc.to_a[0..5].reverse
82
105
  t[0..2].join('-') + '::' + t[-3..-1].join(':')
@@ -16,26 +16,7 @@ class DarwinCore
16
16
  fieldsEnclosedBy: '"',
17
17
  linesTerminatedBy: "\n",
18
18
  rowType: 'http://rs.tdwg.org/dwc/terms/Taxon' }
19
- xml.archive(xmlns: 'http://rs.tdwg.org/dwc/text/',
20
- :'xmlns:xsi' => 'http://www.w3.org/2001/XMLSchema-instance',
21
- :'xsi:schemaLocation' => schema_uri) do
22
- xml.core(opts.merge(ignoreHeaderLines:
23
- @data[:core][:ignoreHeaderLines])) do
24
- xml.files { xml.location(@data[:core][:location]) }
25
- taxon_id, fields = find_taxon_id(@data[:core][:fields])
26
- xml.id_(index: taxon_id[1])
27
- fields.each { |f| xml.field(term: f[0], index: f[1]) }
28
- end
29
- @data[:extensions].each do |e|
30
- xml.extension(opts.merge(ignoreHeaderLines: e[:ignoreHeaderLines],
31
- rowType: e[:rowType])) do
32
- xml.files { xml.location(e[:location]) }
33
- taxon_id, fields = find_taxon_id(e[:fields])
34
- xml.coreid(index: taxon_id[1])
35
- fields.each { |f| xml.field(term: f[0], index: f[1]) }
36
- end
37
- end
38
- end
19
+ build_archive(xml, opts, schema_uri)
39
20
  end
40
21
  meta_xml_data = builder.to_xml
41
22
  meta_file = open(File.join(@path, 'meta.xml'), @write)
@@ -44,6 +25,38 @@ class DarwinCore
44
25
  end
45
26
 
46
27
  private
28
+
29
+ def build_archive(xml, opts, schema_uri)
30
+ xml.archive(xmlns: 'http://rs.tdwg.org/dwc/text/',
31
+ :'xmlns:xsi' => 'http://www.w3.org/2001/XMLSchema-instance',
32
+ :'xsi:schemaLocation' => schema_uri) do
33
+ build_core(xml, opts)
34
+ build_extensions(xml, opts)
35
+ end
36
+ end
37
+
38
+ def build_core(xml, opts)
39
+ xml.core(opts.merge(ignoreHeaderLines:
40
+ @data[:core][:ignoreHeaderLines])) do
41
+ xml.files { xml.location(@data[:core][:location]) }
42
+ taxon_id, fields = find_taxon_id(@data[:core][:fields])
43
+ xml.id_(index: taxon_id[1])
44
+ fields.each { |f| xml.field(term: f[0], index: f[1]) }
45
+ end
46
+ end
47
+
48
+ def build_extensions(xml, opts)
49
+ @data[:extensions].each do |e|
50
+ xml.extension(opts.merge(ignoreHeaderLines: e[:ignoreHeaderLines],
51
+ rowType: e[:rowType])) do
52
+ xml.files { xml.location(e[:location]) }
53
+ taxon_id, fields = find_taxon_id(e[:fields])
54
+ xml.coreid(index: taxon_id[1])
55
+ fields.each { |f| xml.field(term: f[0], index: f[1]) }
56
+ end
57
+ end
58
+ end
59
+
47
60
  def find_taxon_id(data)
48
61
  fields = []
49
62
  data.each_with_index { |f, i| fields << [f.strip, i] }
@@ -1,3 +1,3 @@
1
1
  class DarwinCore
2
- VERSION = "0.9.10"
2
+ VERSION = "0.9.11"
3
3
  end
@@ -1,64 +1,80 @@
1
1
  # USAGE: Hash.from_xml:(YOUR_XML_STRING)
2
- require 'nokogiri'
3
2
  # modified from
4
3
  # http://stackoverflow.com/questions/1230741/
5
4
  # convert-a-nokogiri-document-to-a-ruby-hash/1231297#1231297
6
5
  class DarwinCore
7
6
  module XmlReader
8
7
  class << self
8
+
9
9
  def from_xml(xml_io)
10
10
  result = Nokogiri::XML(xml_io)
11
11
  return { result.root.name.to_sym => xml_node_to_hash(result.root)}
12
12
  end
13
13
 
14
14
  private
15
-
16
15
  def xml_node_to_hash(node)
17
16
  # If we are at the root of the document, start the hash
18
17
  if node.element?
19
- result_hash = {}
20
- if node.attributes != {}
21
- result_hash[:attributes] = {}
22
- node.attributes.keys.each do |key|
23
- result_hash[:attributes][node.attributes[key].
24
- name.to_sym] = prepare(node.attributes[key].value)
25
- end
26
- end
27
- if node.children.size > 0
28
- node.children.each do |child|
29
- result = xml_node_to_hash(child)
30
-
31
- if child.name == "text"
32
- unless child.next_sibling || child.previous_sibling
33
- return prepare(result)
34
- end
35
- elsif result_hash[child.name.to_sym]
36
- if result_hash[child.name.to_sym].is_a?(Object::Array)
37
- result_hash[child.name.to_sym] << prepare(result)
38
- else
39
- result_hash[child.name.to_sym] =
40
- [result_hash[child.name.to_sym]] << prepare(result)
41
- end
42
- else
43
- result_hash[child.name.to_sym] = prepare(result)
44
- end
45
- end
46
-
47
- return result_hash
48
- else
49
- return result_hash
50
- end
18
+ prepare_node_element(node)
51
19
  else
52
20
  return prepare(node.content.to_s)
53
21
  end
54
22
  end
23
+
24
+ def add_attributes(node, result_hash)
25
+ if node.attributes != {}
26
+ result_hash[:attributes] = {}
27
+ node.attributes.keys.each do |key|
28
+ result_hash[:attributes][node.attributes[key].name.to_sym] =
29
+ prepare(node.attributes[key].value)
30
+ end
31
+ end
32
+ end
33
+
34
+ def prepare_node_element(node)
35
+ result_hash = {}
36
+ add_attributes(node, result_hash)
37
+ if node.children.size > 0
38
+ result_hash = add_children(node, result_hash)
39
+ end
40
+ result_hash
41
+ end
42
+
43
+ def add_children(node, result_hash)
44
+ node.children.each do |child|
45
+ result = xml_node_to_hash(child)
46
+
47
+ if child.name == "text"
48
+ text = handle_text(child, result)
49
+ return text if text
50
+ elsif result_hash[child.name.to_sym]
51
+ handle_child_node(child, result_hash, result)
52
+ else
53
+ result_hash[child.name.to_sym] = prepare(result)
54
+ end
55
+ end
56
+ result_hash
57
+ end
58
+
59
+ def handle_child_node(child, result_hash, result)
60
+ if result_hash[child.name.to_sym].is_a?(Object::Array)
61
+ result_hash[child.name.to_sym] << prepare(result)
62
+ else
63
+ result_hash[child.name.to_sym] =
64
+ [result_hash[child.name.to_sym]] << prepare(result)
65
+ end
66
+ end
67
+
68
+ def handle_text(child, result)
69
+ unless child.next_sibling || child.previous_sibling
70
+ prepare(result)
71
+ end
72
+ end
55
73
 
56
74
  def prepare(data)
57
- return data if data.class != String
58
- return true if data.strip == "true"
59
- return false if data.strip == "false"
60
- data.to_i.to_s == data ? data.to_i : data
75
+ (data.class == String && data.to_i.to_s == data) ? data.to_i : data
61
76
  end
77
+
62
78
  end
63
79
  end
64
80
  end
@@ -0,0 +1,47 @@
1
+ <?xml version="1.0"?>
2
+ <eml:eml xmlns:eml="eml://ecoinformatics.org/eml-2.1.1" xmlns:md="eml://ecoinformatics.org/methods-2.1.1" xmlns:proj="eml://ecoinformatics.org/project-2.1.1" xmlns:d="eml://ecoinformatics.org/dataset-2.1.1" xmlns:res="eml://ecoinformatics.org/resource-2.1.1" xmlns:dc="http://purl.org/dc/terms/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" packageId="1234/2013-12-30::19:45:33" system="http://globalnames.org" xml:lang="en" xsi:schemaLocation="eml_uri">
3
+ <dataset id="1234">
4
+ <title>Test Classification</title>
5
+ <license>http://creativecommons.org/licenses/by-sa/3.0/</license>
6
+ <creator id="1" scope="document">
7
+ <individualName>
8
+ <givenName>John</givenName>
9
+ <surName>Doe</surName>
10
+ </individualName>
11
+ <organizationName>Example</organizationName>
12
+ <positionName>Assistant Professor</positionName>
13
+ <onlineUrl>http://example.org</onlineUrl>
14
+ <electronicMailAddress>jdoe@example.com</electronicMailAddress>
15
+ </creator>
16
+ <creator id="2" scope="document">
17
+ <individualName>
18
+ <givenName>Jane</givenName>
19
+ <surName>Doe</surName>
20
+ </individualName>
21
+ <electronicMailAddress>jane@example.com</electronicMailAddress>
22
+ </creator>
23
+ <metadataProvider>
24
+ <individualName>
25
+ <givenName>Jim</givenName>
26
+ <surName>Doe</surName>
27
+ </individualName>
28
+ <onlineUrl>http://aggregator.example.org</onlineUrl>
29
+ <electronicMailAddress>jimdoe@example.com</electronicMailAddress>
30
+ </metadataProvider>
31
+ <pubDate>2013-12-30 14:45:33 -0500</pubDate>
32
+ <abstract>
33
+ <para>test classification</para>
34
+ </abstract>
35
+ <contact>
36
+ <references>1</references>
37
+ </contact>
38
+ <contact>
39
+ <references>2</references>
40
+ </contact>
41
+ </dataset>
42
+ <additionalMetadata>
43
+ <metadata>
44
+ <citation>Test classification: Doe John, Doe Jane, Taxnonmy, 10, 1, 2010</citation>
45
+ </metadata>
46
+ </additionalMetadata>
47
+ </eml:eml>
@@ -0,0 +1,19 @@
1
+ <?xml version="1.0"?>
2
+ <archive xmlns="http://rs.tdwg.org/dwc/text/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://rs.tdwg.org/dwc/terms/xsd/archive/ http://darwincore.googlecode.com/svn/trunk/text/tdwg_dwc_text.xsd">
3
+ <core encoding="UTF-8" fieldsTerminatedBy="," fieldsEnclosedBy="&quot;" linesTerminatedBy="&#10;" rowType="http://rs.tdwg.org/dwc/terms/Taxon" ignoreHeaderLines="1">
4
+ <files>
5
+ <location>core.csv</location>
6
+ </files>
7
+ <id index="0"/>
8
+ <field term="http://rs.tdwg.org/dwc/terms/parentNameUsageID" index="1"/>
9
+ <field term="http://rs.tdwg.org/dwc/terms/scientificName" index="2"/>
10
+ <field term="http://rs.tdwg.org/dwc/terms/taxonRank" index="3"/>
11
+ </core>
12
+ <extension encoding="UTF-8" fieldsTerminatedBy="," fieldsEnclosedBy="&quot;" linesTerminatedBy="&#10;" rowType="http://rs.gbif.org/terms/1.0/VernacularName" ignoreHeaderLines="1">
13
+ <files>
14
+ <location>vern.csv</location>
15
+ </files>
16
+ <coreid index="0"/>
17
+ <field term="http://rs.tdwg.org/dwc/terms/vernacularName" index="1"/>
18
+ </extension>
19
+ </archive>
@@ -50,16 +50,25 @@ describe DarwinCore::Generator do
50
50
  'http://rs.gbif.org/terms/1.0/VernacularName')
51
51
 
52
52
  gen.add_meta_xml
53
- meta = File.read(File.join(gen.path, 'meta.xml'))
54
- expect(meta).to match %r|<location>core.csv</location>|
53
+ meta = File.read(File.join(gen.path, 'meta.xml')).strip
54
+ meta_from_file= File.read(File.expand_path(
55
+ '../../files/generator_meta.xml',
56
+ __FILE__)).strip
57
+ expect(meta).to eq meta_from_file
55
58
  end
56
59
  end
57
60
 
58
61
  describe '#add_eml_data' do
59
62
  it 'adds eml data' do
60
63
  gen.add_eml_xml(EML_DATA)
61
- eml = File.read(File.join(gen.path, 'eml.xml'))
62
- expect(eml).to match /jdoe@example.com/
64
+ eml = File.read(File.join(gen.path, 'eml.xml')).strip
65
+ eml.gsub!(%r|(<pubDate>).*?(</pubDate>)|, '\12013-12-30 14:45:33 -0500\2')
66
+ eml.gsub!(/(packageId=").*?"/, '\11234/2013-12-30::19:45:33"')
67
+
68
+ eml_from_file = File.read(File.expand_path(
69
+ '../../files/generator_eml.xml',
70
+ __FILE__)).strip
71
+ expect(eml.strip).to eq eml_from_file.strip
63
72
  end
64
73
  end
65
74
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dwc-archive
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.10
4
+ version: 0.9.11
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dmitry Mozzherin
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-12-27 00:00:00.000000000 Z
11
+ date: 2014-01-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -171,7 +171,6 @@ files:
171
171
  - LICENSE
172
172
  - README.md
173
173
  - Rakefile
174
- - ']'
175
174
  - dwc-archive.gemspec
176
175
  - features/dwca-creator.feature
177
176
  - features/dwca-reader.feature
@@ -200,6 +199,8 @@ files:
200
199
  - spec/files/empty_coreid.tar.gz
201
200
  - spec/files/file with characters(3).gz
202
201
  - spec/files/flat_list.tar.gz
202
+ - spec/files/generator_eml.xml
203
+ - spec/files/generator_meta.xml
203
204
  - spec/files/gnub.tar.gz
204
205
  - spec/files/invalid.tar.gz
205
206
  - spec/files/junk_dir_inside.zip
@@ -263,6 +264,8 @@ test_files:
263
264
  - spec/files/empty_coreid.tar.gz
264
265
  - spec/files/file with characters(3).gz
265
266
  - spec/files/flat_list.tar.gz
267
+ - spec/files/generator_eml.xml
268
+ - spec/files/generator_meta.xml
266
269
  - spec/files/gnub.tar.gz
267
270
  - spec/files/invalid.tar.gz
268
271
  - spec/files/junk_dir_inside.zip
data/] DELETED
@@ -1,40 +0,0 @@
1
- require_relative '../spec_helper'
2
-
3
- describe DarwinCore::Core do
4
- subject(:dwca) { DarwinCore.new(file_path) }
5
- subject(:core) { DarwinCore::Core.new(dwca) }
6
- let(:file_path) { File.join(File.expand_path('../../files', __FILE__),
7
- file_name) }
8
- let(:file_name) { 'data.tar.gz' }
9
-
10
-
11
- describe '.new' do
12
- it 'creates new core' do
13
- expect(core).to be_kind_of DarwinCore::Core
14
- end
15
- end
16
-
17
- describe '#id' do
18
-
19
- it 'returns core id' do
20
- expect(core.id[:index]).to eq 0
21
- expect(core.id[:term]).to eq 'http://rs.tdwg.org/dwc/terms/TaxonID'
22
- end
23
-
24
- context 'no coreid' do
25
- let(:file_name) { 'empty_coreid.tar.gz' }
26
-
27
- it 'does not return coreid' do
28
- expect(core.id[:index]).to eq 0
29
- expect(core.id[:term]).to be_nil
30
- end
31
- end
32
- end
33
-
34
- it 'reads core file from archive' do
35
-
36
- core.read
37
-
38
- end
39
-
40
- end