dwc-archive 0.9.10 → 0.9.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +1 -1
- data/CHANGELOG +10 -8
- data/README.md +8 -1
- data/lib/dwc-archive.rb +1 -1
- data/lib/dwc-archive/archive.rb +1 -1
- data/lib/dwc-archive/classification_normalizer.rb +11 -12
- data/lib/dwc-archive/core.rb +3 -1
- data/lib/dwc-archive/expander.rb +1 -0
- data/lib/dwc-archive/generator.rb +13 -12
- data/lib/dwc-archive/generator_eml_xml.rb +68 -45
- data/lib/dwc-archive/generator_meta_xml.rb +33 -20
- data/lib/dwc-archive/version.rb +1 -1
- data/lib/dwc-archive/xml_reader.rb +54 -38
- data/spec/files/generator_eml.xml +47 -0
- data/spec/files/generator_meta.xml +19 -0
- data/spec/lib/generator_spec.rb +13 -4
- metadata +6 -3
- data/] +0 -40
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5c6c5b2a4de324abded5b3adb81d6bcd9603965f
|
4
|
+
data.tar.gz: ac00e15f95766838ff42a9bdc3f1a682c4a8e9ff
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d84f2974ed7bcbabc62d2d3b5c5b15dd8d6d2aa38836a955c12896ada2391e978ca2d420d86bf59db2ab61cc801c9c43036be13699ba7b3902d8daa4f366d45b
|
7
|
+
data.tar.gz: a07a91365cba60fa418b85d7b516781c9209ea607dbe5ec45da86febd14bfa104cc16e4b55619a157f5bbb487dff2fd7e318399d815a63372197a9732b3820fb
|
data/.travis.yml
CHANGED
data/CHANGELOG
CHANGED
@@ -1,15 +1,17 @@
|
|
1
|
-
0.9.
|
1
|
+
0.9.11 Removed VERSION duplicate
|
2
2
|
|
3
|
-
0.9.
|
3
|
+
0.9.7 Refactoring and tests improvements
|
4
4
|
|
5
|
-
0.9.
|
5
|
+
0.9.6 Added support for GNUB DwCA files
|
6
6
|
|
7
|
-
0.9.
|
7
|
+
0.9.4 Gem dependencies updated, added travis support
|
8
8
|
|
9
|
-
0.
|
9
|
+
0.9.0 Migrated code to ruby 1.9.3
|
10
10
|
|
11
|
-
0.8.
|
11
|
+
0.8.3 Updated outdated exception rasing
|
12
12
|
|
13
|
-
0.8.
|
13
|
+
0.8.2 Removed species info from linnean classification path
|
14
14
|
|
15
|
-
0.8.
|
15
|
+
0.8.1 Linnean classification path is now only for species and infraspecies with canonical forms. It ends with a canonical form of the taxon
|
16
|
+
|
17
|
+
0.8.0 Added linnean classification path to normalized data from DwCA. It consists of data associated with clades like 'kingdom', 'order' etc.
|
data/README.md
CHANGED
@@ -175,7 +175,11 @@ Note on Patches/Pull Requests
|
|
175
175
|
Copyright
|
176
176
|
---------
|
177
177
|
|
178
|
-
|
178
|
+
Author -- [Dmitry Mozzherin][13]
|
179
|
+
|
180
|
+
Contributors -- [Matt Yoder][14]
|
181
|
+
|
182
|
+
Copyright (c) 2010-2014 [Marine Biological Laboratory][15]. See LICENSE for details.
|
179
183
|
|
180
184
|
[1]: https://badge.fury.io/rb/dwc-archive.png
|
181
185
|
[2]: http://badge.fury.io/rb/dwc-archive
|
@@ -189,3 +193,6 @@ Copyright (c) 2010-2013 Marine Biological Laboratory. See LICENSE for details.
|
|
189
193
|
[10]: https://gemnasium.com/GlobalNamesArchitecture/dwc-archive
|
190
194
|
[11]: http://bit.ly/2IxcBA
|
191
195
|
[12]: http://redis.io/topics/quickstart
|
196
|
+
[13]: https://github.com/dimus
|
197
|
+
[14]: https://github.com/mjy
|
198
|
+
[15]: http://mbl.edu
|
data/lib/dwc-archive.rb
CHANGED
@@ -8,6 +8,7 @@ require 'ostruct'
|
|
8
8
|
require 'digest'
|
9
9
|
require 'csv'
|
10
10
|
require 'logger'
|
11
|
+
require 'nokogiri'
|
11
12
|
require_relative 'dwc-archive/xml_reader'
|
12
13
|
require_relative 'dwc-archive/ingester'
|
13
14
|
require_relative 'dwc-archive/errors'
|
@@ -24,7 +25,6 @@ require_relative 'dwc-archive/version'
|
|
24
25
|
|
25
26
|
class DarwinCore
|
26
27
|
|
27
|
-
VERSION = DarwinCore::VERSION
|
28
28
|
DEFAULT_TMP_DIR = "/tmp"
|
29
29
|
|
30
30
|
attr_reader :archive, :core, :metadata, :extensions,
|
data/lib/dwc-archive/archive.rb
CHANGED
@@ -72,21 +72,11 @@ class DarwinCore
|
|
72
72
|
end
|
73
73
|
|
74
74
|
def name_strings(opts = {})
|
75
|
-
|
76
|
-
if !!opts[:with_hash]
|
77
|
-
@name_strings
|
78
|
-
else
|
79
|
-
@name_strings.keys
|
80
|
-
end
|
75
|
+
process_strings(@name_strings, opts)
|
81
76
|
end
|
82
77
|
|
83
78
|
def vernacular_name_strings(opts = {})
|
84
|
-
|
85
|
-
if !!opts[:with_hash]
|
86
|
-
@vernacular_name_strings
|
87
|
-
else
|
88
|
-
@vernacular_name_strings.keys
|
89
|
-
end
|
79
|
+
process_strings(@vernacular_name_strings, opts)
|
90
80
|
end
|
91
81
|
|
92
82
|
def normalize(opts = {})
|
@@ -110,6 +100,15 @@ class DarwinCore
|
|
110
100
|
|
111
101
|
private
|
112
102
|
|
103
|
+
def process_strings(strings, opts)
|
104
|
+
opts = { with_hash: false }.merge(opts)
|
105
|
+
if !!opts[:with_hash]
|
106
|
+
strings
|
107
|
+
else
|
108
|
+
strings.keys
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
113
112
|
def get_canonical_name(a_scientific_name)
|
114
113
|
if @with_canonical_names
|
115
114
|
canonical_name = @parser.parse(a_scientific_name,
|
data/lib/dwc-archive/core.rb
CHANGED
@@ -2,6 +2,7 @@ class DarwinCore
|
|
2
2
|
class Core
|
3
3
|
include DarwinCore::Ingester
|
4
4
|
attr_reader :id
|
5
|
+
|
5
6
|
def initialize(dwc)
|
6
7
|
@dwc = dwc
|
7
8
|
@archive = @dwc.archive
|
@@ -9,9 +10,10 @@ class DarwinCore
|
|
9
10
|
root_key = @archive.meta.keys[0]
|
10
11
|
@data = @archive.meta[root_key][:core]
|
11
12
|
raise DarwinCore::CoreFileError.
|
12
|
-
new(
|
13
|
+
new('Cannot find core in meta.xml, is meta.xml valid?') unless @data
|
13
14
|
@id = @data[:id][:attributes]
|
14
15
|
get_attributes(DarwinCore::CoreFileError)
|
15
16
|
end
|
17
|
+
|
16
18
|
end
|
17
19
|
end
|
data/lib/dwc-archive/expander.rb
CHANGED
@@ -21,12 +21,7 @@ class DarwinCore
|
|
21
21
|
def add_core(data, file_name, keep_headers = true)
|
22
22
|
c = CSV.open(File.join(@path,file_name), @write)
|
23
23
|
header = data.shift
|
24
|
-
fields = header
|
25
|
-
f.strip!
|
26
|
-
err = 'No header in core data, or header fields are not urls'
|
27
|
-
raise DarwinCore::GeneratorError.new(err) unless f.match(/^http:\/\//)
|
28
|
-
f.split('/')[-1]
|
29
|
-
end
|
24
|
+
fields = get_fields(header, 'core')
|
30
25
|
data.unshift(fields) if keep_headers
|
31
26
|
ignore_header_lines = keep_headers ? 1 : 0
|
32
27
|
@meta_xml_data[:core] = { fields: header,
|
@@ -41,12 +36,7 @@ class DarwinCore
|
|
41
36
|
row_type = 'http://rs.tdwg.org/dwc/terms/Taxon')
|
42
37
|
c = CSV.open(File.join(@path,file_name), @write)
|
43
38
|
header = data.shift
|
44
|
-
fields = header
|
45
|
-
f.strip!
|
46
|
-
err = 'No header in core data, or header fields are not urls'
|
47
|
-
raise DarwinCore::GeneratorError.new(err) unless f.match(/^http:\/\//)
|
48
|
-
f.split('/')[-1]
|
49
|
-
end
|
39
|
+
fields = get_fields(header, 'extension')
|
50
40
|
data.unshift(fields) if keep_headers
|
51
41
|
ignore_header_lines = keep_headers ? 1 : 0
|
52
42
|
@meta_xml_data[:extensions] << { fields: header,
|
@@ -81,5 +71,16 @@ class DarwinCore
|
|
81
71
|
a = "cd #{@path}; tar -zcf #{@dwc_path} *"
|
82
72
|
system(a)
|
83
73
|
end
|
74
|
+
|
75
|
+
private
|
76
|
+
|
77
|
+
def get_fields(header, file_type)
|
78
|
+
header.map do |f|
|
79
|
+
f.strip!
|
80
|
+
err = "No header in %s data, or header fields are not urls" % file_type
|
81
|
+
raise DarwinCore::GeneratorError.new(err) unless f.match(/^http:\/\//)
|
82
|
+
f.split('/')[-1]
|
83
|
+
end
|
84
|
+
end
|
84
85
|
end
|
85
86
|
end
|
@@ -22,51 +22,9 @@ class DarwinCore
|
|
22
22
|
:'xmlns:res' => 'eml://ecoinformatics.org/resource-2.1.1',
|
23
23
|
:'xmlns:dc' => 'http://purl.org/dc/terms/',
|
24
24
|
:'xmlns:xsi' => 'http://www.w3.org/2001/XMLSchema-instance',
|
25
|
-
:'xsi:schemaLocation' => 'eml_uri')
|
26
|
-
xml
|
27
|
-
|
28
|
-
xml.license(@data[:license])
|
29
|
-
contacts = []
|
30
|
-
@data[:authors].each_with_index do |a, i|
|
31
|
-
creator_id = i + 1
|
32
|
-
contacts << creator_id
|
33
|
-
xml.creator(id: creator_id, scope: 'document') do
|
34
|
-
xml.individualName do
|
35
|
-
xml.givenName(a[:first_name])
|
36
|
-
xml.surName(a[:last_name])
|
37
|
-
end
|
38
|
-
xml.organizationName(a[:organization]) if a[:organization]
|
39
|
-
xml.positionName(a[:position]) if a[:position]
|
40
|
-
xml.onlineUrl(a[:url]) if a[:url]
|
41
|
-
xml.electronicMailAddress(a[:email])
|
42
|
-
end
|
43
|
-
end
|
44
|
-
@data[:metadata_providers].each_with_index do |a, i|
|
45
|
-
xml.metadataProvider do
|
46
|
-
xml.individualName do
|
47
|
-
xml.givenName(a[:first_name])
|
48
|
-
xml.surName(a[:last_name])
|
49
|
-
end
|
50
|
-
xml.organizationName(a[:organization]) if a[:organization]
|
51
|
-
xml.positionName(a[:position]) if a[:position]
|
52
|
-
xml.onlineUrl(a[:url]) if a[:url]
|
53
|
-
xml.electronicMailAddress(a[:email])
|
54
|
-
end
|
55
|
-
end if @data[:metadata_providers]
|
56
|
-
xml.pubDate(Time.now.to_s)
|
57
|
-
xml.abstract() do
|
58
|
-
xml.para(@data[:abstract])
|
59
|
-
end
|
60
|
-
contacts.each do |contact|
|
61
|
-
xml.contact { xml.references(contact) }
|
62
|
-
end
|
63
|
-
end
|
64
|
-
xml.additionalMetadata do
|
65
|
-
xml.metadata do
|
66
|
-
xml.citation(@data[:citation])
|
67
|
-
xml.resourceLogoUrl(@data[:logo_url]) if @data[:logo_url]
|
68
|
-
end
|
69
|
-
end
|
25
|
+
:'xsi:schemaLocation' => 'eml_uri') do
|
26
|
+
build_dataset(xml)
|
27
|
+
build_additional_metadata(xml)
|
70
28
|
xml.parent.namespace = xml.parent.namespace_definitions.first
|
71
29
|
end
|
72
30
|
end
|
@@ -77,6 +35,71 @@ class DarwinCore
|
|
77
35
|
end
|
78
36
|
|
79
37
|
private
|
38
|
+
|
39
|
+
def build_dataset(xml)
|
40
|
+
xml.dataset(id: @data[:id]) do
|
41
|
+
xml.title(@data[:title])
|
42
|
+
xml.license(@data[:license])
|
43
|
+
contacts = []
|
44
|
+
build_authors(xml, contacts)
|
45
|
+
build_metadata_providers(xml)
|
46
|
+
xml.pubDate(Time.now.to_s)
|
47
|
+
build_abstract(xml)
|
48
|
+
build_contacts(xml, contacts)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def build_abstract(xml)
|
53
|
+
xml.abstract() do
|
54
|
+
xml.para(@data[:abstract])
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def build_contacts(xml, contacts)
|
59
|
+
contacts.each do |contact|
|
60
|
+
xml.contact { xml.references(contact) }
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def build_metadata_providers(xml)
|
65
|
+
@data[:metadata_providers].each_with_index do |a, i|
|
66
|
+
xml.metadataProvider do
|
67
|
+
build_person(xml, a)
|
68
|
+
end
|
69
|
+
end if @data[:metadata_providers]
|
70
|
+
end
|
71
|
+
|
72
|
+
def build_authors(xml, contacts)
|
73
|
+
@data[:authors].each_with_index do |a, i|
|
74
|
+
creator_id = i + 1
|
75
|
+
contacts << creator_id
|
76
|
+
xml.creator(id: creator_id, scope: 'document') do
|
77
|
+
build_person(xml, a)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
def build_additional_metadata(xml)
|
83
|
+
xml.additionalMetadata do
|
84
|
+
xml.metadata do
|
85
|
+
xml.citation(@data[:citation])
|
86
|
+
xml.resourceLogoUrl(@data[:logo_url]) if @data[:logo_url]
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
def build_person(xml, data)
|
92
|
+
a = data
|
93
|
+
xml.individualName do
|
94
|
+
xml.givenName(a[:first_name])
|
95
|
+
xml.surName(a[:last_name])
|
96
|
+
end
|
97
|
+
xml.organizationName(a[:organization]) if a[:organization]
|
98
|
+
xml.positionName(a[:position]) if a[:position]
|
99
|
+
xml.onlineUrl(a[:url]) if a[:url]
|
100
|
+
xml.electronicMailAddress(a[:email])
|
101
|
+
end
|
102
|
+
|
80
103
|
def timestamp
|
81
104
|
t = Time.now.getutc.to_a[0..5].reverse
|
82
105
|
t[0..2].join('-') + '::' + t[-3..-1].join(':')
|
@@ -16,26 +16,7 @@ class DarwinCore
|
|
16
16
|
fieldsEnclosedBy: '"',
|
17
17
|
linesTerminatedBy: "\n",
|
18
18
|
rowType: 'http://rs.tdwg.org/dwc/terms/Taxon' }
|
19
|
-
xml
|
20
|
-
:'xmlns:xsi' => 'http://www.w3.org/2001/XMLSchema-instance',
|
21
|
-
:'xsi:schemaLocation' => schema_uri) do
|
22
|
-
xml.core(opts.merge(ignoreHeaderLines:
|
23
|
-
@data[:core][:ignoreHeaderLines])) do
|
24
|
-
xml.files { xml.location(@data[:core][:location]) }
|
25
|
-
taxon_id, fields = find_taxon_id(@data[:core][:fields])
|
26
|
-
xml.id_(index: taxon_id[1])
|
27
|
-
fields.each { |f| xml.field(term: f[0], index: f[1]) }
|
28
|
-
end
|
29
|
-
@data[:extensions].each do |e|
|
30
|
-
xml.extension(opts.merge(ignoreHeaderLines: e[:ignoreHeaderLines],
|
31
|
-
rowType: e[:rowType])) do
|
32
|
-
xml.files { xml.location(e[:location]) }
|
33
|
-
taxon_id, fields = find_taxon_id(e[:fields])
|
34
|
-
xml.coreid(index: taxon_id[1])
|
35
|
-
fields.each { |f| xml.field(term: f[0], index: f[1]) }
|
36
|
-
end
|
37
|
-
end
|
38
|
-
end
|
19
|
+
build_archive(xml, opts, schema_uri)
|
39
20
|
end
|
40
21
|
meta_xml_data = builder.to_xml
|
41
22
|
meta_file = open(File.join(@path, 'meta.xml'), @write)
|
@@ -44,6 +25,38 @@ class DarwinCore
|
|
44
25
|
end
|
45
26
|
|
46
27
|
private
|
28
|
+
|
29
|
+
def build_archive(xml, opts, schema_uri)
|
30
|
+
xml.archive(xmlns: 'http://rs.tdwg.org/dwc/text/',
|
31
|
+
:'xmlns:xsi' => 'http://www.w3.org/2001/XMLSchema-instance',
|
32
|
+
:'xsi:schemaLocation' => schema_uri) do
|
33
|
+
build_core(xml, opts)
|
34
|
+
build_extensions(xml, opts)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def build_core(xml, opts)
|
39
|
+
xml.core(opts.merge(ignoreHeaderLines:
|
40
|
+
@data[:core][:ignoreHeaderLines])) do
|
41
|
+
xml.files { xml.location(@data[:core][:location]) }
|
42
|
+
taxon_id, fields = find_taxon_id(@data[:core][:fields])
|
43
|
+
xml.id_(index: taxon_id[1])
|
44
|
+
fields.each { |f| xml.field(term: f[0], index: f[1]) }
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def build_extensions(xml, opts)
|
49
|
+
@data[:extensions].each do |e|
|
50
|
+
xml.extension(opts.merge(ignoreHeaderLines: e[:ignoreHeaderLines],
|
51
|
+
rowType: e[:rowType])) do
|
52
|
+
xml.files { xml.location(e[:location]) }
|
53
|
+
taxon_id, fields = find_taxon_id(e[:fields])
|
54
|
+
xml.coreid(index: taxon_id[1])
|
55
|
+
fields.each { |f| xml.field(term: f[0], index: f[1]) }
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
47
60
|
def find_taxon_id(data)
|
48
61
|
fields = []
|
49
62
|
data.each_with_index { |f, i| fields << [f.strip, i] }
|
data/lib/dwc-archive/version.rb
CHANGED
@@ -1,64 +1,80 @@
|
|
1
1
|
# USAGE: Hash.from_xml:(YOUR_XML_STRING)
|
2
|
-
require 'nokogiri'
|
3
2
|
# modified from
|
4
3
|
# http://stackoverflow.com/questions/1230741/
|
5
4
|
# convert-a-nokogiri-document-to-a-ruby-hash/1231297#1231297
|
6
5
|
class DarwinCore
|
7
6
|
module XmlReader
|
8
7
|
class << self
|
8
|
+
|
9
9
|
def from_xml(xml_io)
|
10
10
|
result = Nokogiri::XML(xml_io)
|
11
11
|
return { result.root.name.to_sym => xml_node_to_hash(result.root)}
|
12
12
|
end
|
13
13
|
|
14
14
|
private
|
15
|
-
|
16
15
|
def xml_node_to_hash(node)
|
17
16
|
# If we are at the root of the document, start the hash
|
18
17
|
if node.element?
|
19
|
-
|
20
|
-
if node.attributes != {}
|
21
|
-
result_hash[:attributes] = {}
|
22
|
-
node.attributes.keys.each do |key|
|
23
|
-
result_hash[:attributes][node.attributes[key].
|
24
|
-
name.to_sym] = prepare(node.attributes[key].value)
|
25
|
-
end
|
26
|
-
end
|
27
|
-
if node.children.size > 0
|
28
|
-
node.children.each do |child|
|
29
|
-
result = xml_node_to_hash(child)
|
30
|
-
|
31
|
-
if child.name == "text"
|
32
|
-
unless child.next_sibling || child.previous_sibling
|
33
|
-
return prepare(result)
|
34
|
-
end
|
35
|
-
elsif result_hash[child.name.to_sym]
|
36
|
-
if result_hash[child.name.to_sym].is_a?(Object::Array)
|
37
|
-
result_hash[child.name.to_sym] << prepare(result)
|
38
|
-
else
|
39
|
-
result_hash[child.name.to_sym] =
|
40
|
-
[result_hash[child.name.to_sym]] << prepare(result)
|
41
|
-
end
|
42
|
-
else
|
43
|
-
result_hash[child.name.to_sym] = prepare(result)
|
44
|
-
end
|
45
|
-
end
|
46
|
-
|
47
|
-
return result_hash
|
48
|
-
else
|
49
|
-
return result_hash
|
50
|
-
end
|
18
|
+
prepare_node_element(node)
|
51
19
|
else
|
52
20
|
return prepare(node.content.to_s)
|
53
21
|
end
|
54
22
|
end
|
23
|
+
|
24
|
+
def add_attributes(node, result_hash)
|
25
|
+
if node.attributes != {}
|
26
|
+
result_hash[:attributes] = {}
|
27
|
+
node.attributes.keys.each do |key|
|
28
|
+
result_hash[:attributes][node.attributes[key].name.to_sym] =
|
29
|
+
prepare(node.attributes[key].value)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def prepare_node_element(node)
|
35
|
+
result_hash = {}
|
36
|
+
add_attributes(node, result_hash)
|
37
|
+
if node.children.size > 0
|
38
|
+
result_hash = add_children(node, result_hash)
|
39
|
+
end
|
40
|
+
result_hash
|
41
|
+
end
|
42
|
+
|
43
|
+
def add_children(node, result_hash)
|
44
|
+
node.children.each do |child|
|
45
|
+
result = xml_node_to_hash(child)
|
46
|
+
|
47
|
+
if child.name == "text"
|
48
|
+
text = handle_text(child, result)
|
49
|
+
return text if text
|
50
|
+
elsif result_hash[child.name.to_sym]
|
51
|
+
handle_child_node(child, result_hash, result)
|
52
|
+
else
|
53
|
+
result_hash[child.name.to_sym] = prepare(result)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
result_hash
|
57
|
+
end
|
58
|
+
|
59
|
+
def handle_child_node(child, result_hash, result)
|
60
|
+
if result_hash[child.name.to_sym].is_a?(Object::Array)
|
61
|
+
result_hash[child.name.to_sym] << prepare(result)
|
62
|
+
else
|
63
|
+
result_hash[child.name.to_sym] =
|
64
|
+
[result_hash[child.name.to_sym]] << prepare(result)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def handle_text(child, result)
|
69
|
+
unless child.next_sibling || child.previous_sibling
|
70
|
+
prepare(result)
|
71
|
+
end
|
72
|
+
end
|
55
73
|
|
56
74
|
def prepare(data)
|
57
|
-
|
58
|
-
return true if data.strip == "true"
|
59
|
-
return false if data.strip == "false"
|
60
|
-
data.to_i.to_s == data ? data.to_i : data
|
75
|
+
(data.class == String && data.to_i.to_s == data) ? data.to_i : data
|
61
76
|
end
|
77
|
+
|
62
78
|
end
|
63
79
|
end
|
64
80
|
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
<?xml version="1.0"?>
|
2
|
+
<eml:eml xmlns:eml="eml://ecoinformatics.org/eml-2.1.1" xmlns:md="eml://ecoinformatics.org/methods-2.1.1" xmlns:proj="eml://ecoinformatics.org/project-2.1.1" xmlns:d="eml://ecoinformatics.org/dataset-2.1.1" xmlns:res="eml://ecoinformatics.org/resource-2.1.1" xmlns:dc="http://purl.org/dc/terms/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" packageId="1234/2013-12-30::19:45:33" system="http://globalnames.org" xml:lang="en" xsi:schemaLocation="eml_uri">
|
3
|
+
<dataset id="1234">
|
4
|
+
<title>Test Classification</title>
|
5
|
+
<license>http://creativecommons.org/licenses/by-sa/3.0/</license>
|
6
|
+
<creator id="1" scope="document">
|
7
|
+
<individualName>
|
8
|
+
<givenName>John</givenName>
|
9
|
+
<surName>Doe</surName>
|
10
|
+
</individualName>
|
11
|
+
<organizationName>Example</organizationName>
|
12
|
+
<positionName>Assistant Professor</positionName>
|
13
|
+
<onlineUrl>http://example.org</onlineUrl>
|
14
|
+
<electronicMailAddress>jdoe@example.com</electronicMailAddress>
|
15
|
+
</creator>
|
16
|
+
<creator id="2" scope="document">
|
17
|
+
<individualName>
|
18
|
+
<givenName>Jane</givenName>
|
19
|
+
<surName>Doe</surName>
|
20
|
+
</individualName>
|
21
|
+
<electronicMailAddress>jane@example.com</electronicMailAddress>
|
22
|
+
</creator>
|
23
|
+
<metadataProvider>
|
24
|
+
<individualName>
|
25
|
+
<givenName>Jim</givenName>
|
26
|
+
<surName>Doe</surName>
|
27
|
+
</individualName>
|
28
|
+
<onlineUrl>http://aggregator.example.org</onlineUrl>
|
29
|
+
<electronicMailAddress>jimdoe@example.com</electronicMailAddress>
|
30
|
+
</metadataProvider>
|
31
|
+
<pubDate>2013-12-30 14:45:33 -0500</pubDate>
|
32
|
+
<abstract>
|
33
|
+
<para>test classification</para>
|
34
|
+
</abstract>
|
35
|
+
<contact>
|
36
|
+
<references>1</references>
|
37
|
+
</contact>
|
38
|
+
<contact>
|
39
|
+
<references>2</references>
|
40
|
+
</contact>
|
41
|
+
</dataset>
|
42
|
+
<additionalMetadata>
|
43
|
+
<metadata>
|
44
|
+
<citation>Test classification: Doe John, Doe Jane, Taxnonmy, 10, 1, 2010</citation>
|
45
|
+
</metadata>
|
46
|
+
</additionalMetadata>
|
47
|
+
</eml:eml>
|
@@ -0,0 +1,19 @@
|
|
1
|
+
<?xml version="1.0"?>
|
2
|
+
<archive xmlns="http://rs.tdwg.org/dwc/text/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://rs.tdwg.org/dwc/terms/xsd/archive/ http://darwincore.googlecode.com/svn/trunk/text/tdwg_dwc_text.xsd">
|
3
|
+
<core encoding="UTF-8" fieldsTerminatedBy="," fieldsEnclosedBy=""" linesTerminatedBy=" " rowType="http://rs.tdwg.org/dwc/terms/Taxon" ignoreHeaderLines="1">
|
4
|
+
<files>
|
5
|
+
<location>core.csv</location>
|
6
|
+
</files>
|
7
|
+
<id index="0"/>
|
8
|
+
<field term="http://rs.tdwg.org/dwc/terms/parentNameUsageID" index="1"/>
|
9
|
+
<field term="http://rs.tdwg.org/dwc/terms/scientificName" index="2"/>
|
10
|
+
<field term="http://rs.tdwg.org/dwc/terms/taxonRank" index="3"/>
|
11
|
+
</core>
|
12
|
+
<extension encoding="UTF-8" fieldsTerminatedBy="," fieldsEnclosedBy=""" linesTerminatedBy=" " rowType="http://rs.gbif.org/terms/1.0/VernacularName" ignoreHeaderLines="1">
|
13
|
+
<files>
|
14
|
+
<location>vern.csv</location>
|
15
|
+
</files>
|
16
|
+
<coreid index="0"/>
|
17
|
+
<field term="http://rs.tdwg.org/dwc/terms/vernacularName" index="1"/>
|
18
|
+
</extension>
|
19
|
+
</archive>
|
data/spec/lib/generator_spec.rb
CHANGED
@@ -50,16 +50,25 @@ describe DarwinCore::Generator do
|
|
50
50
|
'http://rs.gbif.org/terms/1.0/VernacularName')
|
51
51
|
|
52
52
|
gen.add_meta_xml
|
53
|
-
meta = File.read(File.join(gen.path, 'meta.xml'))
|
54
|
-
|
53
|
+
meta = File.read(File.join(gen.path, 'meta.xml')).strip
|
54
|
+
meta_from_file= File.read(File.expand_path(
|
55
|
+
'../../files/generator_meta.xml',
|
56
|
+
__FILE__)).strip
|
57
|
+
expect(meta).to eq meta_from_file
|
55
58
|
end
|
56
59
|
end
|
57
60
|
|
58
61
|
describe '#add_eml_data' do
|
59
62
|
it 'adds eml data' do
|
60
63
|
gen.add_eml_xml(EML_DATA)
|
61
|
-
eml = File.read(File.join(gen.path, 'eml.xml'))
|
62
|
-
|
64
|
+
eml = File.read(File.join(gen.path, 'eml.xml')).strip
|
65
|
+
eml.gsub!(%r|(<pubDate>).*?(</pubDate>)|, '\12013-12-30 14:45:33 -0500\2')
|
66
|
+
eml.gsub!(/(packageId=").*?"/, '\11234/2013-12-30::19:45:33"')
|
67
|
+
|
68
|
+
eml_from_file = File.read(File.expand_path(
|
69
|
+
'../../files/generator_eml.xml',
|
70
|
+
__FILE__)).strip
|
71
|
+
expect(eml.strip).to eq eml_from_file.strip
|
63
72
|
end
|
64
73
|
end
|
65
74
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dwc-archive
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.9.
|
4
|
+
version: 0.9.11
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dmitry Mozzherin
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2014-01-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -171,7 +171,6 @@ files:
|
|
171
171
|
- LICENSE
|
172
172
|
- README.md
|
173
173
|
- Rakefile
|
174
|
-
- ']'
|
175
174
|
- dwc-archive.gemspec
|
176
175
|
- features/dwca-creator.feature
|
177
176
|
- features/dwca-reader.feature
|
@@ -200,6 +199,8 @@ files:
|
|
200
199
|
- spec/files/empty_coreid.tar.gz
|
201
200
|
- spec/files/file with characters(3).gz
|
202
201
|
- spec/files/flat_list.tar.gz
|
202
|
+
- spec/files/generator_eml.xml
|
203
|
+
- spec/files/generator_meta.xml
|
203
204
|
- spec/files/gnub.tar.gz
|
204
205
|
- spec/files/invalid.tar.gz
|
205
206
|
- spec/files/junk_dir_inside.zip
|
@@ -263,6 +264,8 @@ test_files:
|
|
263
264
|
- spec/files/empty_coreid.tar.gz
|
264
265
|
- spec/files/file with characters(3).gz
|
265
266
|
- spec/files/flat_list.tar.gz
|
267
|
+
- spec/files/generator_eml.xml
|
268
|
+
- spec/files/generator_meta.xml
|
266
269
|
- spec/files/gnub.tar.gz
|
267
270
|
- spec/files/invalid.tar.gz
|
268
271
|
- spec/files/junk_dir_inside.zip
|
data/]
DELETED
@@ -1,40 +0,0 @@
|
|
1
|
-
require_relative '../spec_helper'
|
2
|
-
|
3
|
-
describe DarwinCore::Core do
|
4
|
-
subject(:dwca) { DarwinCore.new(file_path) }
|
5
|
-
subject(:core) { DarwinCore::Core.new(dwca) }
|
6
|
-
let(:file_path) { File.join(File.expand_path('../../files', __FILE__),
|
7
|
-
file_name) }
|
8
|
-
let(:file_name) { 'data.tar.gz' }
|
9
|
-
|
10
|
-
|
11
|
-
describe '.new' do
|
12
|
-
it 'creates new core' do
|
13
|
-
expect(core).to be_kind_of DarwinCore::Core
|
14
|
-
end
|
15
|
-
end
|
16
|
-
|
17
|
-
describe '#id' do
|
18
|
-
|
19
|
-
it 'returns core id' do
|
20
|
-
expect(core.id[:index]).to eq 0
|
21
|
-
expect(core.id[:term]).to eq 'http://rs.tdwg.org/dwc/terms/TaxonID'
|
22
|
-
end
|
23
|
-
|
24
|
-
context 'no coreid' do
|
25
|
-
let(:file_name) { 'empty_coreid.tar.gz' }
|
26
|
-
|
27
|
-
it 'does not return coreid' do
|
28
|
-
expect(core.id[:index]).to eq 0
|
29
|
-
expect(core.id[:term]).to be_nil
|
30
|
-
end
|
31
|
-
end
|
32
|
-
end
|
33
|
-
|
34
|
-
it 'reads core file from archive' do
|
35
|
-
|
36
|
-
core.read
|
37
|
-
|
38
|
-
end
|
39
|
-
|
40
|
-
end
|