dwc-archive 0.9.10 → 0.9.11

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8e8c929203d1b652f8ba345b0c4c39cfc87a0369
4
- data.tar.gz: 739a064221bf52523990bfea38749f11b4d986c0
3
+ metadata.gz: 5c6c5b2a4de324abded5b3adb81d6bcd9603965f
4
+ data.tar.gz: ac00e15f95766838ff42a9bdc3f1a682c4a8e9ff
5
5
  SHA512:
6
- metadata.gz: adfd46bea84e301ceca6de355f189b884bc6a9a22eb8d98e3976f17e3407d2de7f15963ec0d6f29958de057de9d579c81d8007d72c38fc1b9f9ada9295381152
7
- data.tar.gz: 2e188f828d0bbe28baf5f1d8aa7aab12ca330000bf3f5ee145501d78961f9ca5e696be1171998a433952cf84546a8d5fd61c6cf45958576a7e0800fbf4542dae
6
+ metadata.gz: d84f2974ed7bcbabc62d2d3b5c5b15dd8d6d2aa38836a955c12896ada2391e978ca2d420d86bf59db2ab61cc801c9c43036be13699ba7b3902d8daa4f366d45b
7
+ data.tar.gz: a07a91365cba60fa418b85d7b516781c9209ea607dbe5ec45da86febd14bfa104cc16e4b55619a157f5bbb487dff2fd7e318399d815a63372197a9732b3820fb
@@ -1,5 +1,5 @@
1
1
  rvm:
2
- - 1.9.3-p448
2
+ - 1.9.3-p484
3
3
  - 2.0.0-p353
4
4
  before_install:
5
5
  - sudo apt-get update
data/CHANGELOG CHANGED
@@ -1,15 +1,17 @@
1
- 0.9.7 Refactoring and tests improvements
1
+ 0.9.11 Removed VERSION duplicate
2
2
 
3
- 0.9.6 Added support for GNUB DwCA files
3
+ 0.9.7 Refactoring and tests improvements
4
4
 
5
- 0.9.4 Gem dependencies updated, added travis support
5
+ 0.9.6 Added support for GNUB DwCA files
6
6
 
7
- 0.9.0 Migrated code to ruby 1.9.3
7
+ 0.9.4 Gem dependencies updated, added travis support
8
8
 
9
- 0.8.3 Updated outdated exception rasing
9
+ 0.9.0 Migrated code to ruby 1.9.3
10
10
 
11
- 0.8.2 Removed species info from linnean classification path
11
+ 0.8.3 Updated outdated exception rasing
12
12
 
13
- 0.8.1 Linnean classification path is now only for species and infraspecies with canonical forms. It ends with a canonical form of the taxon
13
+ 0.8.2 Removed species info from linnean classification path
14
14
 
15
- 0.8.0 Added linnean classification path to normalized data from DwCA. It consists of data associated with clades like 'kingdom', 'order' etc.
15
+ 0.8.1 Linnean classification path is now only for species and infraspecies with canonical forms. It ends with a canonical form of the taxon
16
+
17
+ 0.8.0 Added linnean classification path to normalized data from DwCA. It consists of data associated with clades like 'kingdom', 'order' etc.
data/README.md CHANGED
@@ -175,7 +175,11 @@ Note on Patches/Pull Requests
175
175
  Copyright
176
176
  ---------
177
177
 
178
- Copyright (c) 2010-2013 Marine Biological Laboratory. See LICENSE for details.
178
+ Author -- [Dmitry Mozzherin][13]
179
+
180
+ Contributors -- [Matt Yoder][14]
181
+
182
+ Copyright (c) 2010-2014 [Marine Biological Laboratory][15]. See LICENSE for details.
179
183
 
180
184
  [1]: https://badge.fury.io/rb/dwc-archive.png
181
185
  [2]: http://badge.fury.io/rb/dwc-archive
@@ -189,3 +193,6 @@ Copyright (c) 2010-2013 Marine Biological Laboratory. See LICENSE for details.
189
193
  [10]: https://gemnasium.com/GlobalNamesArchitecture/dwc-archive
190
194
  [11]: http://bit.ly/2IxcBA
191
195
  [12]: http://redis.io/topics/quickstart
196
+ [13]: https://github.com/dimus
197
+ [14]: https://github.com/mjy
198
+ [15]: http://mbl.edu
@@ -8,6 +8,7 @@ require 'ostruct'
8
8
  require 'digest'
9
9
  require 'csv'
10
10
  require 'logger'
11
+ require 'nokogiri'
11
12
  require_relative 'dwc-archive/xml_reader'
12
13
  require_relative 'dwc-archive/ingester'
13
14
  require_relative 'dwc-archive/errors'
@@ -24,7 +25,6 @@ require_relative 'dwc-archive/version'
24
25
 
25
26
  class DarwinCore
26
27
 
27
- VERSION = DarwinCore::VERSION
28
28
  DEFAULT_TMP_DIR = "/tmp"
29
29
 
30
30
  attr_reader :archive, :core, :metadata, :extensions,
@@ -1,7 +1,7 @@
1
- require 'nokogiri'
2
1
  class DarwinCore
3
2
  class Archive
4
3
  attr_reader :meta, :eml
4
+
5
5
  def initialize(archive_path, tmp_dir)
6
6
  @archive_path = archive_path
7
7
  @tmp_dir = tmp_dir
@@ -72,21 +72,11 @@ class DarwinCore
72
72
  end
73
73
 
74
74
  def name_strings(opts = {})
75
- opts = { with_hash: false }.merge(opts)
76
- if !!opts[:with_hash]
77
- @name_strings
78
- else
79
- @name_strings.keys
80
- end
75
+ process_strings(@name_strings, opts)
81
76
  end
82
77
 
83
78
  def vernacular_name_strings(opts = {})
84
- opts = { with_hash: false }.merge(opts)
85
- if !!opts[:with_hash]
86
- @vernacular_name_strings
87
- else
88
- @vernacular_name_strings.keys
89
- end
79
+ process_strings(@vernacular_name_strings, opts)
90
80
  end
91
81
 
92
82
  def normalize(opts = {})
@@ -110,6 +100,15 @@ class DarwinCore
110
100
 
111
101
  private
112
102
 
103
+ def process_strings(strings, opts)
104
+ opts = { with_hash: false }.merge(opts)
105
+ if !!opts[:with_hash]
106
+ strings
107
+ else
108
+ strings.keys
109
+ end
110
+ end
111
+
113
112
  def get_canonical_name(a_scientific_name)
114
113
  if @with_canonical_names
115
114
  canonical_name = @parser.parse(a_scientific_name,
@@ -2,6 +2,7 @@ class DarwinCore
2
2
  class Core
3
3
  include DarwinCore::Ingester
4
4
  attr_reader :id
5
+
5
6
  def initialize(dwc)
6
7
  @dwc = dwc
7
8
  @archive = @dwc.archive
@@ -9,9 +10,10 @@ class DarwinCore
9
10
  root_key = @archive.meta.keys[0]
10
11
  @data = @archive.meta[root_key][:core]
11
12
  raise DarwinCore::CoreFileError.
12
- new("Cannot find core in meta.xml, is meta.xml valid?") unless @data
13
+ new('Cannot find core in meta.xml, is meta.xml valid?') unless @data
13
14
  @id = @data[:id][:attributes]
14
15
  get_attributes(DarwinCore::CoreFileError)
15
16
  end
17
+
16
18
  end
17
19
  end
@@ -1,5 +1,6 @@
1
1
  class DarwinCore
2
2
  class Expander
3
+
3
4
  def initialize(archive_path, tmp_dir)
4
5
  @archive_path = archive_path
5
6
  @tmp_dir = tmp_dir
@@ -21,12 +21,7 @@ class DarwinCore
21
21
  def add_core(data, file_name, keep_headers = true)
22
22
  c = CSV.open(File.join(@path,file_name), @write)
23
23
  header = data.shift
24
- fields = header.map do |f|
25
- f.strip!
26
- err = 'No header in core data, or header fields are not urls'
27
- raise DarwinCore::GeneratorError.new(err) unless f.match(/^http:\/\//)
28
- f.split('/')[-1]
29
- end
24
+ fields = get_fields(header, 'core')
30
25
  data.unshift(fields) if keep_headers
31
26
  ignore_header_lines = keep_headers ? 1 : 0
32
27
  @meta_xml_data[:core] = { fields: header,
@@ -41,12 +36,7 @@ class DarwinCore
41
36
  row_type = 'http://rs.tdwg.org/dwc/terms/Taxon')
42
37
  c = CSV.open(File.join(@path,file_name), @write)
43
38
  header = data.shift
44
- fields = header.map do |f|
45
- f.strip!
46
- err = 'No header in core data, or header fields are not urls'
47
- raise DarwinCore::GeneratorError.new(err) unless f.match(/^http:\/\//)
48
- f.split('/')[-1]
49
- end
39
+ fields = get_fields(header, 'extension')
50
40
  data.unshift(fields) if keep_headers
51
41
  ignore_header_lines = keep_headers ? 1 : 0
52
42
  @meta_xml_data[:extensions] << { fields: header,
@@ -81,5 +71,16 @@ class DarwinCore
81
71
  a = "cd #{@path}; tar -zcf #{@dwc_path} *"
82
72
  system(a)
83
73
  end
74
+
75
+ private
76
+
77
+ def get_fields(header, file_type)
78
+ header.map do |f|
79
+ f.strip!
80
+ err = "No header in %s data, or header fields are not urls" % file_type
81
+ raise DarwinCore::GeneratorError.new(err) unless f.match(/^http:\/\//)
82
+ f.split('/')[-1]
83
+ end
84
+ end
84
85
  end
85
86
  end
@@ -22,51 +22,9 @@ class DarwinCore
22
22
  :'xmlns:res' => 'eml://ecoinformatics.org/resource-2.1.1',
23
23
  :'xmlns:dc' => 'http://purl.org/dc/terms/',
24
24
  :'xmlns:xsi' => 'http://www.w3.org/2001/XMLSchema-instance',
25
- :'xsi:schemaLocation' => 'eml_uri') do
26
- xml.dataset(id: @data[:id]) do
27
- xml.title(@data[:title])
28
- xml.license(@data[:license])
29
- contacts = []
30
- @data[:authors].each_with_index do |a, i|
31
- creator_id = i + 1
32
- contacts << creator_id
33
- xml.creator(id: creator_id, scope: 'document') do
34
- xml.individualName do
35
- xml.givenName(a[:first_name])
36
- xml.surName(a[:last_name])
37
- end
38
- xml.organizationName(a[:organization]) if a[:organization]
39
- xml.positionName(a[:position]) if a[:position]
40
- xml.onlineUrl(a[:url]) if a[:url]
41
- xml.electronicMailAddress(a[:email])
42
- end
43
- end
44
- @data[:metadata_providers].each_with_index do |a, i|
45
- xml.metadataProvider do
46
- xml.individualName do
47
- xml.givenName(a[:first_name])
48
- xml.surName(a[:last_name])
49
- end
50
- xml.organizationName(a[:organization]) if a[:organization]
51
- xml.positionName(a[:position]) if a[:position]
52
- xml.onlineUrl(a[:url]) if a[:url]
53
- xml.electronicMailAddress(a[:email])
54
- end
55
- end if @data[:metadata_providers]
56
- xml.pubDate(Time.now.to_s)
57
- xml.abstract() do
58
- xml.para(@data[:abstract])
59
- end
60
- contacts.each do |contact|
61
- xml.contact { xml.references(contact) }
62
- end
63
- end
64
- xml.additionalMetadata do
65
- xml.metadata do
66
- xml.citation(@data[:citation])
67
- xml.resourceLogoUrl(@data[:logo_url]) if @data[:logo_url]
68
- end
69
- end
25
+ :'xsi:schemaLocation' => 'eml_uri') do
26
+ build_dataset(xml)
27
+ build_additional_metadata(xml)
70
28
  xml.parent.namespace = xml.parent.namespace_definitions.first
71
29
  end
72
30
  end
@@ -77,6 +35,71 @@ class DarwinCore
77
35
  end
78
36
 
79
37
  private
38
+
39
+ def build_dataset(xml)
40
+ xml.dataset(id: @data[:id]) do
41
+ xml.title(@data[:title])
42
+ xml.license(@data[:license])
43
+ contacts = []
44
+ build_authors(xml, contacts)
45
+ build_metadata_providers(xml)
46
+ xml.pubDate(Time.now.to_s)
47
+ build_abstract(xml)
48
+ build_contacts(xml, contacts)
49
+ end
50
+ end
51
+
52
+ def build_abstract(xml)
53
+ xml.abstract() do
54
+ xml.para(@data[:abstract])
55
+ end
56
+ end
57
+
58
+ def build_contacts(xml, contacts)
59
+ contacts.each do |contact|
60
+ xml.contact { xml.references(contact) }
61
+ end
62
+ end
63
+
64
+ def build_metadata_providers(xml)
65
+ @data[:metadata_providers].each_with_index do |a, i|
66
+ xml.metadataProvider do
67
+ build_person(xml, a)
68
+ end
69
+ end if @data[:metadata_providers]
70
+ end
71
+
72
+ def build_authors(xml, contacts)
73
+ @data[:authors].each_with_index do |a, i|
74
+ creator_id = i + 1
75
+ contacts << creator_id
76
+ xml.creator(id: creator_id, scope: 'document') do
77
+ build_person(xml, a)
78
+ end
79
+ end
80
+ end
81
+
82
+ def build_additional_metadata(xml)
83
+ xml.additionalMetadata do
84
+ xml.metadata do
85
+ xml.citation(@data[:citation])
86
+ xml.resourceLogoUrl(@data[:logo_url]) if @data[:logo_url]
87
+ end
88
+ end
89
+ end
90
+
91
+ def build_person(xml, data)
92
+ a = data
93
+ xml.individualName do
94
+ xml.givenName(a[:first_name])
95
+ xml.surName(a[:last_name])
96
+ end
97
+ xml.organizationName(a[:organization]) if a[:organization]
98
+ xml.positionName(a[:position]) if a[:position]
99
+ xml.onlineUrl(a[:url]) if a[:url]
100
+ xml.electronicMailAddress(a[:email])
101
+ end
102
+
80
103
  def timestamp
81
104
  t = Time.now.getutc.to_a[0..5].reverse
82
105
  t[0..2].join('-') + '::' + t[-3..-1].join(':')
@@ -16,26 +16,7 @@ class DarwinCore
16
16
  fieldsEnclosedBy: '"',
17
17
  linesTerminatedBy: "\n",
18
18
  rowType: 'http://rs.tdwg.org/dwc/terms/Taxon' }
19
- xml.archive(xmlns: 'http://rs.tdwg.org/dwc/text/',
20
- :'xmlns:xsi' => 'http://www.w3.org/2001/XMLSchema-instance',
21
- :'xsi:schemaLocation' => schema_uri) do
22
- xml.core(opts.merge(ignoreHeaderLines:
23
- @data[:core][:ignoreHeaderLines])) do
24
- xml.files { xml.location(@data[:core][:location]) }
25
- taxon_id, fields = find_taxon_id(@data[:core][:fields])
26
- xml.id_(index: taxon_id[1])
27
- fields.each { |f| xml.field(term: f[0], index: f[1]) }
28
- end
29
- @data[:extensions].each do |e|
30
- xml.extension(opts.merge(ignoreHeaderLines: e[:ignoreHeaderLines],
31
- rowType: e[:rowType])) do
32
- xml.files { xml.location(e[:location]) }
33
- taxon_id, fields = find_taxon_id(e[:fields])
34
- xml.coreid(index: taxon_id[1])
35
- fields.each { |f| xml.field(term: f[0], index: f[1]) }
36
- end
37
- end
38
- end
19
+ build_archive(xml, opts, schema_uri)
39
20
  end
40
21
  meta_xml_data = builder.to_xml
41
22
  meta_file = open(File.join(@path, 'meta.xml'), @write)
@@ -44,6 +25,38 @@ class DarwinCore
44
25
  end
45
26
 
46
27
  private
28
+
29
+ def build_archive(xml, opts, schema_uri)
30
+ xml.archive(xmlns: 'http://rs.tdwg.org/dwc/text/',
31
+ :'xmlns:xsi' => 'http://www.w3.org/2001/XMLSchema-instance',
32
+ :'xsi:schemaLocation' => schema_uri) do
33
+ build_core(xml, opts)
34
+ build_extensions(xml, opts)
35
+ end
36
+ end
37
+
38
+ def build_core(xml, opts)
39
+ xml.core(opts.merge(ignoreHeaderLines:
40
+ @data[:core][:ignoreHeaderLines])) do
41
+ xml.files { xml.location(@data[:core][:location]) }
42
+ taxon_id, fields = find_taxon_id(@data[:core][:fields])
43
+ xml.id_(index: taxon_id[1])
44
+ fields.each { |f| xml.field(term: f[0], index: f[1]) }
45
+ end
46
+ end
47
+
48
+ def build_extensions(xml, opts)
49
+ @data[:extensions].each do |e|
50
+ xml.extension(opts.merge(ignoreHeaderLines: e[:ignoreHeaderLines],
51
+ rowType: e[:rowType])) do
52
+ xml.files { xml.location(e[:location]) }
53
+ taxon_id, fields = find_taxon_id(e[:fields])
54
+ xml.coreid(index: taxon_id[1])
55
+ fields.each { |f| xml.field(term: f[0], index: f[1]) }
56
+ end
57
+ end
58
+ end
59
+
47
60
  def find_taxon_id(data)
48
61
  fields = []
49
62
  data.each_with_index { |f, i| fields << [f.strip, i] }
@@ -1,3 +1,3 @@
1
1
  class DarwinCore
2
- VERSION = "0.9.10"
2
+ VERSION = "0.9.11"
3
3
  end
@@ -1,64 +1,80 @@
1
1
  # USAGE: Hash.from_xml:(YOUR_XML_STRING)
2
- require 'nokogiri'
3
2
  # modified from
4
3
  # http://stackoverflow.com/questions/1230741/
5
4
  # convert-a-nokogiri-document-to-a-ruby-hash/1231297#1231297
6
5
  class DarwinCore
7
6
  module XmlReader
8
7
  class << self
8
+
9
9
  def from_xml(xml_io)
10
10
  result = Nokogiri::XML(xml_io)
11
11
  return { result.root.name.to_sym => xml_node_to_hash(result.root)}
12
12
  end
13
13
 
14
14
  private
15
-
16
15
  def xml_node_to_hash(node)
17
16
  # If we are at the root of the document, start the hash
18
17
  if node.element?
19
- result_hash = {}
20
- if node.attributes != {}
21
- result_hash[:attributes] = {}
22
- node.attributes.keys.each do |key|
23
- result_hash[:attributes][node.attributes[key].
24
- name.to_sym] = prepare(node.attributes[key].value)
25
- end
26
- end
27
- if node.children.size > 0
28
- node.children.each do |child|
29
- result = xml_node_to_hash(child)
30
-
31
- if child.name == "text"
32
- unless child.next_sibling || child.previous_sibling
33
- return prepare(result)
34
- end
35
- elsif result_hash[child.name.to_sym]
36
- if result_hash[child.name.to_sym].is_a?(Object::Array)
37
- result_hash[child.name.to_sym] << prepare(result)
38
- else
39
- result_hash[child.name.to_sym] =
40
- [result_hash[child.name.to_sym]] << prepare(result)
41
- end
42
- else
43
- result_hash[child.name.to_sym] = prepare(result)
44
- end
45
- end
46
-
47
- return result_hash
48
- else
49
- return result_hash
50
- end
18
+ prepare_node_element(node)
51
19
  else
52
20
  return prepare(node.content.to_s)
53
21
  end
54
22
  end
23
+
24
+ def add_attributes(node, result_hash)
25
+ if node.attributes != {}
26
+ result_hash[:attributes] = {}
27
+ node.attributes.keys.each do |key|
28
+ result_hash[:attributes][node.attributes[key].name.to_sym] =
29
+ prepare(node.attributes[key].value)
30
+ end
31
+ end
32
+ end
33
+
34
+ def prepare_node_element(node)
35
+ result_hash = {}
36
+ add_attributes(node, result_hash)
37
+ if node.children.size > 0
38
+ result_hash = add_children(node, result_hash)
39
+ end
40
+ result_hash
41
+ end
42
+
43
+ def add_children(node, result_hash)
44
+ node.children.each do |child|
45
+ result = xml_node_to_hash(child)
46
+
47
+ if child.name == "text"
48
+ text = handle_text(child, result)
49
+ return text if text
50
+ elsif result_hash[child.name.to_sym]
51
+ handle_child_node(child, result_hash, result)
52
+ else
53
+ result_hash[child.name.to_sym] = prepare(result)
54
+ end
55
+ end
56
+ result_hash
57
+ end
58
+
59
+ def handle_child_node(child, result_hash, result)
60
+ if result_hash[child.name.to_sym].is_a?(Object::Array)
61
+ result_hash[child.name.to_sym] << prepare(result)
62
+ else
63
+ result_hash[child.name.to_sym] =
64
+ [result_hash[child.name.to_sym]] << prepare(result)
65
+ end
66
+ end
67
+
68
+ def handle_text(child, result)
69
+ unless child.next_sibling || child.previous_sibling
70
+ prepare(result)
71
+ end
72
+ end
55
73
 
56
74
  def prepare(data)
57
- return data if data.class != String
58
- return true if data.strip == "true"
59
- return false if data.strip == "false"
60
- data.to_i.to_s == data ? data.to_i : data
75
+ (data.class == String && data.to_i.to_s == data) ? data.to_i : data
61
76
  end
77
+
62
78
  end
63
79
  end
64
80
  end
@@ -0,0 +1,47 @@
1
+ <?xml version="1.0"?>
2
+ <eml:eml xmlns:eml="eml://ecoinformatics.org/eml-2.1.1" xmlns:md="eml://ecoinformatics.org/methods-2.1.1" xmlns:proj="eml://ecoinformatics.org/project-2.1.1" xmlns:d="eml://ecoinformatics.org/dataset-2.1.1" xmlns:res="eml://ecoinformatics.org/resource-2.1.1" xmlns:dc="http://purl.org/dc/terms/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" packageId="1234/2013-12-30::19:45:33" system="http://globalnames.org" xml:lang="en" xsi:schemaLocation="eml_uri">
3
+ <dataset id="1234">
4
+ <title>Test Classification</title>
5
+ <license>http://creativecommons.org/licenses/by-sa/3.0/</license>
6
+ <creator id="1" scope="document">
7
+ <individualName>
8
+ <givenName>John</givenName>
9
+ <surName>Doe</surName>
10
+ </individualName>
11
+ <organizationName>Example</organizationName>
12
+ <positionName>Assistant Professor</positionName>
13
+ <onlineUrl>http://example.org</onlineUrl>
14
+ <electronicMailAddress>jdoe@example.com</electronicMailAddress>
15
+ </creator>
16
+ <creator id="2" scope="document">
17
+ <individualName>
18
+ <givenName>Jane</givenName>
19
+ <surName>Doe</surName>
20
+ </individualName>
21
+ <electronicMailAddress>jane@example.com</electronicMailAddress>
22
+ </creator>
23
+ <metadataProvider>
24
+ <individualName>
25
+ <givenName>Jim</givenName>
26
+ <surName>Doe</surName>
27
+ </individualName>
28
+ <onlineUrl>http://aggregator.example.org</onlineUrl>
29
+ <electronicMailAddress>jimdoe@example.com</electronicMailAddress>
30
+ </metadataProvider>
31
+ <pubDate>2013-12-30 14:45:33 -0500</pubDate>
32
+ <abstract>
33
+ <para>test classification</para>
34
+ </abstract>
35
+ <contact>
36
+ <references>1</references>
37
+ </contact>
38
+ <contact>
39
+ <references>2</references>
40
+ </contact>
41
+ </dataset>
42
+ <additionalMetadata>
43
+ <metadata>
44
+ <citation>Test classification: Doe John, Doe Jane, Taxnonmy, 10, 1, 2010</citation>
45
+ </metadata>
46
+ </additionalMetadata>
47
+ </eml:eml>
@@ -0,0 +1,19 @@
1
+ <?xml version="1.0"?>
2
+ <archive xmlns="http://rs.tdwg.org/dwc/text/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://rs.tdwg.org/dwc/terms/xsd/archive/ http://darwincore.googlecode.com/svn/trunk/text/tdwg_dwc_text.xsd">
3
+ <core encoding="UTF-8" fieldsTerminatedBy="," fieldsEnclosedBy="&quot;" linesTerminatedBy="&#10;" rowType="http://rs.tdwg.org/dwc/terms/Taxon" ignoreHeaderLines="1">
4
+ <files>
5
+ <location>core.csv</location>
6
+ </files>
7
+ <id index="0"/>
8
+ <field term="http://rs.tdwg.org/dwc/terms/parentNameUsageID" index="1"/>
9
+ <field term="http://rs.tdwg.org/dwc/terms/scientificName" index="2"/>
10
+ <field term="http://rs.tdwg.org/dwc/terms/taxonRank" index="3"/>
11
+ </core>
12
+ <extension encoding="UTF-8" fieldsTerminatedBy="," fieldsEnclosedBy="&quot;" linesTerminatedBy="&#10;" rowType="http://rs.gbif.org/terms/1.0/VernacularName" ignoreHeaderLines="1">
13
+ <files>
14
+ <location>vern.csv</location>
15
+ </files>
16
+ <coreid index="0"/>
17
+ <field term="http://rs.tdwg.org/dwc/terms/vernacularName" index="1"/>
18
+ </extension>
19
+ </archive>
@@ -50,16 +50,25 @@ describe DarwinCore::Generator do
50
50
  'http://rs.gbif.org/terms/1.0/VernacularName')
51
51
 
52
52
  gen.add_meta_xml
53
- meta = File.read(File.join(gen.path, 'meta.xml'))
54
- expect(meta).to match %r|<location>core.csv</location>|
53
+ meta = File.read(File.join(gen.path, 'meta.xml')).strip
54
+ meta_from_file= File.read(File.expand_path(
55
+ '../../files/generator_meta.xml',
56
+ __FILE__)).strip
57
+ expect(meta).to eq meta_from_file
55
58
  end
56
59
  end
57
60
 
58
61
  describe '#add_eml_data' do
59
62
  it 'adds eml data' do
60
63
  gen.add_eml_xml(EML_DATA)
61
- eml = File.read(File.join(gen.path, 'eml.xml'))
62
- expect(eml).to match /jdoe@example.com/
64
+ eml = File.read(File.join(gen.path, 'eml.xml')).strip
65
+ eml.gsub!(%r|(<pubDate>).*?(</pubDate>)|, '\12013-12-30 14:45:33 -0500\2')
66
+ eml.gsub!(/(packageId=").*?"/, '\11234/2013-12-30::19:45:33"')
67
+
68
+ eml_from_file = File.read(File.expand_path(
69
+ '../../files/generator_eml.xml',
70
+ __FILE__)).strip
71
+ expect(eml.strip).to eq eml_from_file.strip
63
72
  end
64
73
  end
65
74
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dwc-archive
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.10
4
+ version: 0.9.11
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dmitry Mozzherin
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-12-27 00:00:00.000000000 Z
11
+ date: 2014-01-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -171,7 +171,6 @@ files:
171
171
  - LICENSE
172
172
  - README.md
173
173
  - Rakefile
174
- - ']'
175
174
  - dwc-archive.gemspec
176
175
  - features/dwca-creator.feature
177
176
  - features/dwca-reader.feature
@@ -200,6 +199,8 @@ files:
200
199
  - spec/files/empty_coreid.tar.gz
201
200
  - spec/files/file with characters(3).gz
202
201
  - spec/files/flat_list.tar.gz
202
+ - spec/files/generator_eml.xml
203
+ - spec/files/generator_meta.xml
203
204
  - spec/files/gnub.tar.gz
204
205
  - spec/files/invalid.tar.gz
205
206
  - spec/files/junk_dir_inside.zip
@@ -263,6 +264,8 @@ test_files:
263
264
  - spec/files/empty_coreid.tar.gz
264
265
  - spec/files/file with characters(3).gz
265
266
  - spec/files/flat_list.tar.gz
267
+ - spec/files/generator_eml.xml
268
+ - spec/files/generator_meta.xml
266
269
  - spec/files/gnub.tar.gz
267
270
  - spec/files/invalid.tar.gz
268
271
  - spec/files/junk_dir_inside.zip
data/] DELETED
@@ -1,40 +0,0 @@
1
- require_relative '../spec_helper'
2
-
3
- describe DarwinCore::Core do
4
- subject(:dwca) { DarwinCore.new(file_path) }
5
- subject(:core) { DarwinCore::Core.new(dwca) }
6
- let(:file_path) { File.join(File.expand_path('../../files', __FILE__),
7
- file_name) }
8
- let(:file_name) { 'data.tar.gz' }
9
-
10
-
11
- describe '.new' do
12
- it 'creates new core' do
13
- expect(core).to be_kind_of DarwinCore::Core
14
- end
15
- end
16
-
17
- describe '#id' do
18
-
19
- it 'returns core id' do
20
- expect(core.id[:index]).to eq 0
21
- expect(core.id[:term]).to eq 'http://rs.tdwg.org/dwc/terms/TaxonID'
22
- end
23
-
24
- context 'no coreid' do
25
- let(:file_name) { 'empty_coreid.tar.gz' }
26
-
27
- it 'does not return coreid' do
28
- expect(core.id[:index]).to eq 0
29
- expect(core.id[:term]).to be_nil
30
- end
31
- end
32
- end
33
-
34
- it 'reads core file from archive' do
35
-
36
- core.read
37
-
38
- end
39
-
40
- end