dwc-archive 0.9.10 → 0.9.11
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +1 -1
- data/CHANGELOG +10 -8
- data/README.md +8 -1
- data/lib/dwc-archive.rb +1 -1
- data/lib/dwc-archive/archive.rb +1 -1
- data/lib/dwc-archive/classification_normalizer.rb +11 -12
- data/lib/dwc-archive/core.rb +3 -1
- data/lib/dwc-archive/expander.rb +1 -0
- data/lib/dwc-archive/generator.rb +13 -12
- data/lib/dwc-archive/generator_eml_xml.rb +68 -45
- data/lib/dwc-archive/generator_meta_xml.rb +33 -20
- data/lib/dwc-archive/version.rb +1 -1
- data/lib/dwc-archive/xml_reader.rb +54 -38
- data/spec/files/generator_eml.xml +47 -0
- data/spec/files/generator_meta.xml +19 -0
- data/spec/lib/generator_spec.rb +13 -4
- metadata +6 -3
- data/] +0 -40
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5c6c5b2a4de324abded5b3adb81d6bcd9603965f
|
4
|
+
data.tar.gz: ac00e15f95766838ff42a9bdc3f1a682c4a8e9ff
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d84f2974ed7bcbabc62d2d3b5c5b15dd8d6d2aa38836a955c12896ada2391e978ca2d420d86bf59db2ab61cc801c9c43036be13699ba7b3902d8daa4f366d45b
|
7
|
+
data.tar.gz: a07a91365cba60fa418b85d7b516781c9209ea607dbe5ec45da86febd14bfa104cc16e4b55619a157f5bbb487dff2fd7e318399d815a63372197a9732b3820fb
|
data/.travis.yml
CHANGED
data/CHANGELOG
CHANGED
@@ -1,15 +1,17 @@
|
|
1
|
-
0.9.
|
1
|
+
0.9.11 Removed VERSION duplicate
|
2
2
|
|
3
|
-
0.9.
|
3
|
+
0.9.7 Refactoring and tests improvements
|
4
4
|
|
5
|
-
0.9.
|
5
|
+
0.9.6 Added support for GNUB DwCA files
|
6
6
|
|
7
|
-
0.9.
|
7
|
+
0.9.4 Gem dependencies updated, added travis support
|
8
8
|
|
9
|
-
0.
|
9
|
+
0.9.0 Migrated code to ruby 1.9.3
|
10
10
|
|
11
|
-
0.8.
|
11
|
+
0.8.3 Updated outdated exception rasing
|
12
12
|
|
13
|
-
0.8.
|
13
|
+
0.8.2 Removed species info from linnean classification path
|
14
14
|
|
15
|
-
0.8.
|
15
|
+
0.8.1 Linnean classification path is now only for species and infraspecies with canonical forms. It ends with a canonical form of the taxon
|
16
|
+
|
17
|
+
0.8.0 Added linnean classification path to normalized data from DwCA. It consists of data associated with clades like 'kingdom', 'order' etc.
|
data/README.md
CHANGED
@@ -175,7 +175,11 @@ Note on Patches/Pull Requests
|
|
175
175
|
Copyright
|
176
176
|
---------
|
177
177
|
|
178
|
-
|
178
|
+
Author -- [Dmitry Mozzherin][13]
|
179
|
+
|
180
|
+
Contributors -- [Matt Yoder][14]
|
181
|
+
|
182
|
+
Copyright (c) 2010-2014 [Marine Biological Laboratory][15]. See LICENSE for details.
|
179
183
|
|
180
184
|
[1]: https://badge.fury.io/rb/dwc-archive.png
|
181
185
|
[2]: http://badge.fury.io/rb/dwc-archive
|
@@ -189,3 +193,6 @@ Copyright (c) 2010-2013 Marine Biological Laboratory. See LICENSE for details.
|
|
189
193
|
[10]: https://gemnasium.com/GlobalNamesArchitecture/dwc-archive
|
190
194
|
[11]: http://bit.ly/2IxcBA
|
191
195
|
[12]: http://redis.io/topics/quickstart
|
196
|
+
[13]: https://github.com/dimus
|
197
|
+
[14]: https://github.com/mjy
|
198
|
+
[15]: http://mbl.edu
|
data/lib/dwc-archive.rb
CHANGED
@@ -8,6 +8,7 @@ require 'ostruct'
|
|
8
8
|
require 'digest'
|
9
9
|
require 'csv'
|
10
10
|
require 'logger'
|
11
|
+
require 'nokogiri'
|
11
12
|
require_relative 'dwc-archive/xml_reader'
|
12
13
|
require_relative 'dwc-archive/ingester'
|
13
14
|
require_relative 'dwc-archive/errors'
|
@@ -24,7 +25,6 @@ require_relative 'dwc-archive/version'
|
|
24
25
|
|
25
26
|
class DarwinCore
|
26
27
|
|
27
|
-
VERSION = DarwinCore::VERSION
|
28
28
|
DEFAULT_TMP_DIR = "/tmp"
|
29
29
|
|
30
30
|
attr_reader :archive, :core, :metadata, :extensions,
|
data/lib/dwc-archive/archive.rb
CHANGED
@@ -72,21 +72,11 @@ class DarwinCore
|
|
72
72
|
end
|
73
73
|
|
74
74
|
def name_strings(opts = {})
|
75
|
-
|
76
|
-
if !!opts[:with_hash]
|
77
|
-
@name_strings
|
78
|
-
else
|
79
|
-
@name_strings.keys
|
80
|
-
end
|
75
|
+
process_strings(@name_strings, opts)
|
81
76
|
end
|
82
77
|
|
83
78
|
def vernacular_name_strings(opts = {})
|
84
|
-
|
85
|
-
if !!opts[:with_hash]
|
86
|
-
@vernacular_name_strings
|
87
|
-
else
|
88
|
-
@vernacular_name_strings.keys
|
89
|
-
end
|
79
|
+
process_strings(@vernacular_name_strings, opts)
|
90
80
|
end
|
91
81
|
|
92
82
|
def normalize(opts = {})
|
@@ -110,6 +100,15 @@ class DarwinCore
|
|
110
100
|
|
111
101
|
private
|
112
102
|
|
103
|
+
def process_strings(strings, opts)
|
104
|
+
opts = { with_hash: false }.merge(opts)
|
105
|
+
if !!opts[:with_hash]
|
106
|
+
strings
|
107
|
+
else
|
108
|
+
strings.keys
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
113
112
|
def get_canonical_name(a_scientific_name)
|
114
113
|
if @with_canonical_names
|
115
114
|
canonical_name = @parser.parse(a_scientific_name,
|
data/lib/dwc-archive/core.rb
CHANGED
@@ -2,6 +2,7 @@ class DarwinCore
|
|
2
2
|
class Core
|
3
3
|
include DarwinCore::Ingester
|
4
4
|
attr_reader :id
|
5
|
+
|
5
6
|
def initialize(dwc)
|
6
7
|
@dwc = dwc
|
7
8
|
@archive = @dwc.archive
|
@@ -9,9 +10,10 @@ class DarwinCore
|
|
9
10
|
root_key = @archive.meta.keys[0]
|
10
11
|
@data = @archive.meta[root_key][:core]
|
11
12
|
raise DarwinCore::CoreFileError.
|
12
|
-
new(
|
13
|
+
new('Cannot find core in meta.xml, is meta.xml valid?') unless @data
|
13
14
|
@id = @data[:id][:attributes]
|
14
15
|
get_attributes(DarwinCore::CoreFileError)
|
15
16
|
end
|
17
|
+
|
16
18
|
end
|
17
19
|
end
|
data/lib/dwc-archive/expander.rb
CHANGED
@@ -21,12 +21,7 @@ class DarwinCore
|
|
21
21
|
def add_core(data, file_name, keep_headers = true)
|
22
22
|
c = CSV.open(File.join(@path,file_name), @write)
|
23
23
|
header = data.shift
|
24
|
-
fields = header
|
25
|
-
f.strip!
|
26
|
-
err = 'No header in core data, or header fields are not urls'
|
27
|
-
raise DarwinCore::GeneratorError.new(err) unless f.match(/^http:\/\//)
|
28
|
-
f.split('/')[-1]
|
29
|
-
end
|
24
|
+
fields = get_fields(header, 'core')
|
30
25
|
data.unshift(fields) if keep_headers
|
31
26
|
ignore_header_lines = keep_headers ? 1 : 0
|
32
27
|
@meta_xml_data[:core] = { fields: header,
|
@@ -41,12 +36,7 @@ class DarwinCore
|
|
41
36
|
row_type = 'http://rs.tdwg.org/dwc/terms/Taxon')
|
42
37
|
c = CSV.open(File.join(@path,file_name), @write)
|
43
38
|
header = data.shift
|
44
|
-
fields = header
|
45
|
-
f.strip!
|
46
|
-
err = 'No header in core data, or header fields are not urls'
|
47
|
-
raise DarwinCore::GeneratorError.new(err) unless f.match(/^http:\/\//)
|
48
|
-
f.split('/')[-1]
|
49
|
-
end
|
39
|
+
fields = get_fields(header, 'extension')
|
50
40
|
data.unshift(fields) if keep_headers
|
51
41
|
ignore_header_lines = keep_headers ? 1 : 0
|
52
42
|
@meta_xml_data[:extensions] << { fields: header,
|
@@ -81,5 +71,16 @@ class DarwinCore
|
|
81
71
|
a = "cd #{@path}; tar -zcf #{@dwc_path} *"
|
82
72
|
system(a)
|
83
73
|
end
|
74
|
+
|
75
|
+
private
|
76
|
+
|
77
|
+
def get_fields(header, file_type)
|
78
|
+
header.map do |f|
|
79
|
+
f.strip!
|
80
|
+
err = "No header in %s data, or header fields are not urls" % file_type
|
81
|
+
raise DarwinCore::GeneratorError.new(err) unless f.match(/^http:\/\//)
|
82
|
+
f.split('/')[-1]
|
83
|
+
end
|
84
|
+
end
|
84
85
|
end
|
85
86
|
end
|
@@ -22,51 +22,9 @@ class DarwinCore
|
|
22
22
|
:'xmlns:res' => 'eml://ecoinformatics.org/resource-2.1.1',
|
23
23
|
:'xmlns:dc' => 'http://purl.org/dc/terms/',
|
24
24
|
:'xmlns:xsi' => 'http://www.w3.org/2001/XMLSchema-instance',
|
25
|
-
:'xsi:schemaLocation' => 'eml_uri')
|
26
|
-
xml
|
27
|
-
|
28
|
-
xml.license(@data[:license])
|
29
|
-
contacts = []
|
30
|
-
@data[:authors].each_with_index do |a, i|
|
31
|
-
creator_id = i + 1
|
32
|
-
contacts << creator_id
|
33
|
-
xml.creator(id: creator_id, scope: 'document') do
|
34
|
-
xml.individualName do
|
35
|
-
xml.givenName(a[:first_name])
|
36
|
-
xml.surName(a[:last_name])
|
37
|
-
end
|
38
|
-
xml.organizationName(a[:organization]) if a[:organization]
|
39
|
-
xml.positionName(a[:position]) if a[:position]
|
40
|
-
xml.onlineUrl(a[:url]) if a[:url]
|
41
|
-
xml.electronicMailAddress(a[:email])
|
42
|
-
end
|
43
|
-
end
|
44
|
-
@data[:metadata_providers].each_with_index do |a, i|
|
45
|
-
xml.metadataProvider do
|
46
|
-
xml.individualName do
|
47
|
-
xml.givenName(a[:first_name])
|
48
|
-
xml.surName(a[:last_name])
|
49
|
-
end
|
50
|
-
xml.organizationName(a[:organization]) if a[:organization]
|
51
|
-
xml.positionName(a[:position]) if a[:position]
|
52
|
-
xml.onlineUrl(a[:url]) if a[:url]
|
53
|
-
xml.electronicMailAddress(a[:email])
|
54
|
-
end
|
55
|
-
end if @data[:metadata_providers]
|
56
|
-
xml.pubDate(Time.now.to_s)
|
57
|
-
xml.abstract() do
|
58
|
-
xml.para(@data[:abstract])
|
59
|
-
end
|
60
|
-
contacts.each do |contact|
|
61
|
-
xml.contact { xml.references(contact) }
|
62
|
-
end
|
63
|
-
end
|
64
|
-
xml.additionalMetadata do
|
65
|
-
xml.metadata do
|
66
|
-
xml.citation(@data[:citation])
|
67
|
-
xml.resourceLogoUrl(@data[:logo_url]) if @data[:logo_url]
|
68
|
-
end
|
69
|
-
end
|
25
|
+
:'xsi:schemaLocation' => 'eml_uri') do
|
26
|
+
build_dataset(xml)
|
27
|
+
build_additional_metadata(xml)
|
70
28
|
xml.parent.namespace = xml.parent.namespace_definitions.first
|
71
29
|
end
|
72
30
|
end
|
@@ -77,6 +35,71 @@ class DarwinCore
|
|
77
35
|
end
|
78
36
|
|
79
37
|
private
|
38
|
+
|
39
|
+
def build_dataset(xml)
|
40
|
+
xml.dataset(id: @data[:id]) do
|
41
|
+
xml.title(@data[:title])
|
42
|
+
xml.license(@data[:license])
|
43
|
+
contacts = []
|
44
|
+
build_authors(xml, contacts)
|
45
|
+
build_metadata_providers(xml)
|
46
|
+
xml.pubDate(Time.now.to_s)
|
47
|
+
build_abstract(xml)
|
48
|
+
build_contacts(xml, contacts)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def build_abstract(xml)
|
53
|
+
xml.abstract() do
|
54
|
+
xml.para(@data[:abstract])
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def build_contacts(xml, contacts)
|
59
|
+
contacts.each do |contact|
|
60
|
+
xml.contact { xml.references(contact) }
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def build_metadata_providers(xml)
|
65
|
+
@data[:metadata_providers].each_with_index do |a, i|
|
66
|
+
xml.metadataProvider do
|
67
|
+
build_person(xml, a)
|
68
|
+
end
|
69
|
+
end if @data[:metadata_providers]
|
70
|
+
end
|
71
|
+
|
72
|
+
def build_authors(xml, contacts)
|
73
|
+
@data[:authors].each_with_index do |a, i|
|
74
|
+
creator_id = i + 1
|
75
|
+
contacts << creator_id
|
76
|
+
xml.creator(id: creator_id, scope: 'document') do
|
77
|
+
build_person(xml, a)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
def build_additional_metadata(xml)
|
83
|
+
xml.additionalMetadata do
|
84
|
+
xml.metadata do
|
85
|
+
xml.citation(@data[:citation])
|
86
|
+
xml.resourceLogoUrl(@data[:logo_url]) if @data[:logo_url]
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
def build_person(xml, data)
|
92
|
+
a = data
|
93
|
+
xml.individualName do
|
94
|
+
xml.givenName(a[:first_name])
|
95
|
+
xml.surName(a[:last_name])
|
96
|
+
end
|
97
|
+
xml.organizationName(a[:organization]) if a[:organization]
|
98
|
+
xml.positionName(a[:position]) if a[:position]
|
99
|
+
xml.onlineUrl(a[:url]) if a[:url]
|
100
|
+
xml.electronicMailAddress(a[:email])
|
101
|
+
end
|
102
|
+
|
80
103
|
def timestamp
|
81
104
|
t = Time.now.getutc.to_a[0..5].reverse
|
82
105
|
t[0..2].join('-') + '::' + t[-3..-1].join(':')
|
@@ -16,26 +16,7 @@ class DarwinCore
|
|
16
16
|
fieldsEnclosedBy: '"',
|
17
17
|
linesTerminatedBy: "\n",
|
18
18
|
rowType: 'http://rs.tdwg.org/dwc/terms/Taxon' }
|
19
|
-
xml
|
20
|
-
:'xmlns:xsi' => 'http://www.w3.org/2001/XMLSchema-instance',
|
21
|
-
:'xsi:schemaLocation' => schema_uri) do
|
22
|
-
xml.core(opts.merge(ignoreHeaderLines:
|
23
|
-
@data[:core][:ignoreHeaderLines])) do
|
24
|
-
xml.files { xml.location(@data[:core][:location]) }
|
25
|
-
taxon_id, fields = find_taxon_id(@data[:core][:fields])
|
26
|
-
xml.id_(index: taxon_id[1])
|
27
|
-
fields.each { |f| xml.field(term: f[0], index: f[1]) }
|
28
|
-
end
|
29
|
-
@data[:extensions].each do |e|
|
30
|
-
xml.extension(opts.merge(ignoreHeaderLines: e[:ignoreHeaderLines],
|
31
|
-
rowType: e[:rowType])) do
|
32
|
-
xml.files { xml.location(e[:location]) }
|
33
|
-
taxon_id, fields = find_taxon_id(e[:fields])
|
34
|
-
xml.coreid(index: taxon_id[1])
|
35
|
-
fields.each { |f| xml.field(term: f[0], index: f[1]) }
|
36
|
-
end
|
37
|
-
end
|
38
|
-
end
|
19
|
+
build_archive(xml, opts, schema_uri)
|
39
20
|
end
|
40
21
|
meta_xml_data = builder.to_xml
|
41
22
|
meta_file = open(File.join(@path, 'meta.xml'), @write)
|
@@ -44,6 +25,38 @@ class DarwinCore
|
|
44
25
|
end
|
45
26
|
|
46
27
|
private
|
28
|
+
|
29
|
+
def build_archive(xml, opts, schema_uri)
|
30
|
+
xml.archive(xmlns: 'http://rs.tdwg.org/dwc/text/',
|
31
|
+
:'xmlns:xsi' => 'http://www.w3.org/2001/XMLSchema-instance',
|
32
|
+
:'xsi:schemaLocation' => schema_uri) do
|
33
|
+
build_core(xml, opts)
|
34
|
+
build_extensions(xml, opts)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def build_core(xml, opts)
|
39
|
+
xml.core(opts.merge(ignoreHeaderLines:
|
40
|
+
@data[:core][:ignoreHeaderLines])) do
|
41
|
+
xml.files { xml.location(@data[:core][:location]) }
|
42
|
+
taxon_id, fields = find_taxon_id(@data[:core][:fields])
|
43
|
+
xml.id_(index: taxon_id[1])
|
44
|
+
fields.each { |f| xml.field(term: f[0], index: f[1]) }
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def build_extensions(xml, opts)
|
49
|
+
@data[:extensions].each do |e|
|
50
|
+
xml.extension(opts.merge(ignoreHeaderLines: e[:ignoreHeaderLines],
|
51
|
+
rowType: e[:rowType])) do
|
52
|
+
xml.files { xml.location(e[:location]) }
|
53
|
+
taxon_id, fields = find_taxon_id(e[:fields])
|
54
|
+
xml.coreid(index: taxon_id[1])
|
55
|
+
fields.each { |f| xml.field(term: f[0], index: f[1]) }
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
47
60
|
def find_taxon_id(data)
|
48
61
|
fields = []
|
49
62
|
data.each_with_index { |f, i| fields << [f.strip, i] }
|
data/lib/dwc-archive/version.rb
CHANGED
@@ -1,64 +1,80 @@
|
|
1
1
|
# USAGE: Hash.from_xml:(YOUR_XML_STRING)
|
2
|
-
require 'nokogiri'
|
3
2
|
# modified from
|
4
3
|
# http://stackoverflow.com/questions/1230741/
|
5
4
|
# convert-a-nokogiri-document-to-a-ruby-hash/1231297#1231297
|
6
5
|
class DarwinCore
|
7
6
|
module XmlReader
|
8
7
|
class << self
|
8
|
+
|
9
9
|
def from_xml(xml_io)
|
10
10
|
result = Nokogiri::XML(xml_io)
|
11
11
|
return { result.root.name.to_sym => xml_node_to_hash(result.root)}
|
12
12
|
end
|
13
13
|
|
14
14
|
private
|
15
|
-
|
16
15
|
def xml_node_to_hash(node)
|
17
16
|
# If we are at the root of the document, start the hash
|
18
17
|
if node.element?
|
19
|
-
|
20
|
-
if node.attributes != {}
|
21
|
-
result_hash[:attributes] = {}
|
22
|
-
node.attributes.keys.each do |key|
|
23
|
-
result_hash[:attributes][node.attributes[key].
|
24
|
-
name.to_sym] = prepare(node.attributes[key].value)
|
25
|
-
end
|
26
|
-
end
|
27
|
-
if node.children.size > 0
|
28
|
-
node.children.each do |child|
|
29
|
-
result = xml_node_to_hash(child)
|
30
|
-
|
31
|
-
if child.name == "text"
|
32
|
-
unless child.next_sibling || child.previous_sibling
|
33
|
-
return prepare(result)
|
34
|
-
end
|
35
|
-
elsif result_hash[child.name.to_sym]
|
36
|
-
if result_hash[child.name.to_sym].is_a?(Object::Array)
|
37
|
-
result_hash[child.name.to_sym] << prepare(result)
|
38
|
-
else
|
39
|
-
result_hash[child.name.to_sym] =
|
40
|
-
[result_hash[child.name.to_sym]] << prepare(result)
|
41
|
-
end
|
42
|
-
else
|
43
|
-
result_hash[child.name.to_sym] = prepare(result)
|
44
|
-
end
|
45
|
-
end
|
46
|
-
|
47
|
-
return result_hash
|
48
|
-
else
|
49
|
-
return result_hash
|
50
|
-
end
|
18
|
+
prepare_node_element(node)
|
51
19
|
else
|
52
20
|
return prepare(node.content.to_s)
|
53
21
|
end
|
54
22
|
end
|
23
|
+
|
24
|
+
def add_attributes(node, result_hash)
|
25
|
+
if node.attributes != {}
|
26
|
+
result_hash[:attributes] = {}
|
27
|
+
node.attributes.keys.each do |key|
|
28
|
+
result_hash[:attributes][node.attributes[key].name.to_sym] =
|
29
|
+
prepare(node.attributes[key].value)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def prepare_node_element(node)
|
35
|
+
result_hash = {}
|
36
|
+
add_attributes(node, result_hash)
|
37
|
+
if node.children.size > 0
|
38
|
+
result_hash = add_children(node, result_hash)
|
39
|
+
end
|
40
|
+
result_hash
|
41
|
+
end
|
42
|
+
|
43
|
+
def add_children(node, result_hash)
|
44
|
+
node.children.each do |child|
|
45
|
+
result = xml_node_to_hash(child)
|
46
|
+
|
47
|
+
if child.name == "text"
|
48
|
+
text = handle_text(child, result)
|
49
|
+
return text if text
|
50
|
+
elsif result_hash[child.name.to_sym]
|
51
|
+
handle_child_node(child, result_hash, result)
|
52
|
+
else
|
53
|
+
result_hash[child.name.to_sym] = prepare(result)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
result_hash
|
57
|
+
end
|
58
|
+
|
59
|
+
def handle_child_node(child, result_hash, result)
|
60
|
+
if result_hash[child.name.to_sym].is_a?(Object::Array)
|
61
|
+
result_hash[child.name.to_sym] << prepare(result)
|
62
|
+
else
|
63
|
+
result_hash[child.name.to_sym] =
|
64
|
+
[result_hash[child.name.to_sym]] << prepare(result)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def handle_text(child, result)
|
69
|
+
unless child.next_sibling || child.previous_sibling
|
70
|
+
prepare(result)
|
71
|
+
end
|
72
|
+
end
|
55
73
|
|
56
74
|
def prepare(data)
|
57
|
-
|
58
|
-
return true if data.strip == "true"
|
59
|
-
return false if data.strip == "false"
|
60
|
-
data.to_i.to_s == data ? data.to_i : data
|
75
|
+
(data.class == String && data.to_i.to_s == data) ? data.to_i : data
|
61
76
|
end
|
77
|
+
|
62
78
|
end
|
63
79
|
end
|
64
80
|
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
<?xml version="1.0"?>
|
2
|
+
<eml:eml xmlns:eml="eml://ecoinformatics.org/eml-2.1.1" xmlns:md="eml://ecoinformatics.org/methods-2.1.1" xmlns:proj="eml://ecoinformatics.org/project-2.1.1" xmlns:d="eml://ecoinformatics.org/dataset-2.1.1" xmlns:res="eml://ecoinformatics.org/resource-2.1.1" xmlns:dc="http://purl.org/dc/terms/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" packageId="1234/2013-12-30::19:45:33" system="http://globalnames.org" xml:lang="en" xsi:schemaLocation="eml_uri">
|
3
|
+
<dataset id="1234">
|
4
|
+
<title>Test Classification</title>
|
5
|
+
<license>http://creativecommons.org/licenses/by-sa/3.0/</license>
|
6
|
+
<creator id="1" scope="document">
|
7
|
+
<individualName>
|
8
|
+
<givenName>John</givenName>
|
9
|
+
<surName>Doe</surName>
|
10
|
+
</individualName>
|
11
|
+
<organizationName>Example</organizationName>
|
12
|
+
<positionName>Assistant Professor</positionName>
|
13
|
+
<onlineUrl>http://example.org</onlineUrl>
|
14
|
+
<electronicMailAddress>jdoe@example.com</electronicMailAddress>
|
15
|
+
</creator>
|
16
|
+
<creator id="2" scope="document">
|
17
|
+
<individualName>
|
18
|
+
<givenName>Jane</givenName>
|
19
|
+
<surName>Doe</surName>
|
20
|
+
</individualName>
|
21
|
+
<electronicMailAddress>jane@example.com</electronicMailAddress>
|
22
|
+
</creator>
|
23
|
+
<metadataProvider>
|
24
|
+
<individualName>
|
25
|
+
<givenName>Jim</givenName>
|
26
|
+
<surName>Doe</surName>
|
27
|
+
</individualName>
|
28
|
+
<onlineUrl>http://aggregator.example.org</onlineUrl>
|
29
|
+
<electronicMailAddress>jimdoe@example.com</electronicMailAddress>
|
30
|
+
</metadataProvider>
|
31
|
+
<pubDate>2013-12-30 14:45:33 -0500</pubDate>
|
32
|
+
<abstract>
|
33
|
+
<para>test classification</para>
|
34
|
+
</abstract>
|
35
|
+
<contact>
|
36
|
+
<references>1</references>
|
37
|
+
</contact>
|
38
|
+
<contact>
|
39
|
+
<references>2</references>
|
40
|
+
</contact>
|
41
|
+
</dataset>
|
42
|
+
<additionalMetadata>
|
43
|
+
<metadata>
|
44
|
+
<citation>Test classification: Doe John, Doe Jane, Taxnonmy, 10, 1, 2010</citation>
|
45
|
+
</metadata>
|
46
|
+
</additionalMetadata>
|
47
|
+
</eml:eml>
|
@@ -0,0 +1,19 @@
|
|
1
|
+
<?xml version="1.0"?>
|
2
|
+
<archive xmlns="http://rs.tdwg.org/dwc/text/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://rs.tdwg.org/dwc/terms/xsd/archive/ http://darwincore.googlecode.com/svn/trunk/text/tdwg_dwc_text.xsd">
|
3
|
+
<core encoding="UTF-8" fieldsTerminatedBy="," fieldsEnclosedBy=""" linesTerminatedBy=" " rowType="http://rs.tdwg.org/dwc/terms/Taxon" ignoreHeaderLines="1">
|
4
|
+
<files>
|
5
|
+
<location>core.csv</location>
|
6
|
+
</files>
|
7
|
+
<id index="0"/>
|
8
|
+
<field term="http://rs.tdwg.org/dwc/terms/parentNameUsageID" index="1"/>
|
9
|
+
<field term="http://rs.tdwg.org/dwc/terms/scientificName" index="2"/>
|
10
|
+
<field term="http://rs.tdwg.org/dwc/terms/taxonRank" index="3"/>
|
11
|
+
</core>
|
12
|
+
<extension encoding="UTF-8" fieldsTerminatedBy="," fieldsEnclosedBy=""" linesTerminatedBy=" " rowType="http://rs.gbif.org/terms/1.0/VernacularName" ignoreHeaderLines="1">
|
13
|
+
<files>
|
14
|
+
<location>vern.csv</location>
|
15
|
+
</files>
|
16
|
+
<coreid index="0"/>
|
17
|
+
<field term="http://rs.tdwg.org/dwc/terms/vernacularName" index="1"/>
|
18
|
+
</extension>
|
19
|
+
</archive>
|
data/spec/lib/generator_spec.rb
CHANGED
@@ -50,16 +50,25 @@ describe DarwinCore::Generator do
|
|
50
50
|
'http://rs.gbif.org/terms/1.0/VernacularName')
|
51
51
|
|
52
52
|
gen.add_meta_xml
|
53
|
-
meta = File.read(File.join(gen.path, 'meta.xml'))
|
54
|
-
|
53
|
+
meta = File.read(File.join(gen.path, 'meta.xml')).strip
|
54
|
+
meta_from_file= File.read(File.expand_path(
|
55
|
+
'../../files/generator_meta.xml',
|
56
|
+
__FILE__)).strip
|
57
|
+
expect(meta).to eq meta_from_file
|
55
58
|
end
|
56
59
|
end
|
57
60
|
|
58
61
|
describe '#add_eml_data' do
|
59
62
|
it 'adds eml data' do
|
60
63
|
gen.add_eml_xml(EML_DATA)
|
61
|
-
eml = File.read(File.join(gen.path, 'eml.xml'))
|
62
|
-
|
64
|
+
eml = File.read(File.join(gen.path, 'eml.xml')).strip
|
65
|
+
eml.gsub!(%r|(<pubDate>).*?(</pubDate>)|, '\12013-12-30 14:45:33 -0500\2')
|
66
|
+
eml.gsub!(/(packageId=").*?"/, '\11234/2013-12-30::19:45:33"')
|
67
|
+
|
68
|
+
eml_from_file = File.read(File.expand_path(
|
69
|
+
'../../files/generator_eml.xml',
|
70
|
+
__FILE__)).strip
|
71
|
+
expect(eml.strip).to eq eml_from_file.strip
|
63
72
|
end
|
64
73
|
end
|
65
74
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dwc-archive
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.9.
|
4
|
+
version: 0.9.11
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dmitry Mozzherin
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2014-01-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -171,7 +171,6 @@ files:
|
|
171
171
|
- LICENSE
|
172
172
|
- README.md
|
173
173
|
- Rakefile
|
174
|
-
- ']'
|
175
174
|
- dwc-archive.gemspec
|
176
175
|
- features/dwca-creator.feature
|
177
176
|
- features/dwca-reader.feature
|
@@ -200,6 +199,8 @@ files:
|
|
200
199
|
- spec/files/empty_coreid.tar.gz
|
201
200
|
- spec/files/file with characters(3).gz
|
202
201
|
- spec/files/flat_list.tar.gz
|
202
|
+
- spec/files/generator_eml.xml
|
203
|
+
- spec/files/generator_meta.xml
|
203
204
|
- spec/files/gnub.tar.gz
|
204
205
|
- spec/files/invalid.tar.gz
|
205
206
|
- spec/files/junk_dir_inside.zip
|
@@ -263,6 +264,8 @@ test_files:
|
|
263
264
|
- spec/files/empty_coreid.tar.gz
|
264
265
|
- spec/files/file with characters(3).gz
|
265
266
|
- spec/files/flat_list.tar.gz
|
267
|
+
- spec/files/generator_eml.xml
|
268
|
+
- spec/files/generator_meta.xml
|
266
269
|
- spec/files/gnub.tar.gz
|
267
270
|
- spec/files/invalid.tar.gz
|
268
271
|
- spec/files/junk_dir_inside.zip
|
data/]
DELETED
@@ -1,40 +0,0 @@
|
|
1
|
-
require_relative '../spec_helper'
|
2
|
-
|
3
|
-
describe DarwinCore::Core do
|
4
|
-
subject(:dwca) { DarwinCore.new(file_path) }
|
5
|
-
subject(:core) { DarwinCore::Core.new(dwca) }
|
6
|
-
let(:file_path) { File.join(File.expand_path('../../files', __FILE__),
|
7
|
-
file_name) }
|
8
|
-
let(:file_name) { 'data.tar.gz' }
|
9
|
-
|
10
|
-
|
11
|
-
describe '.new' do
|
12
|
-
it 'creates new core' do
|
13
|
-
expect(core).to be_kind_of DarwinCore::Core
|
14
|
-
end
|
15
|
-
end
|
16
|
-
|
17
|
-
describe '#id' do
|
18
|
-
|
19
|
-
it 'returns core id' do
|
20
|
-
expect(core.id[:index]).to eq 0
|
21
|
-
expect(core.id[:term]).to eq 'http://rs.tdwg.org/dwc/terms/TaxonID'
|
22
|
-
end
|
23
|
-
|
24
|
-
context 'no coreid' do
|
25
|
-
let(:file_name) { 'empty_coreid.tar.gz' }
|
26
|
-
|
27
|
-
it 'does not return coreid' do
|
28
|
-
expect(core.id[:index]).to eq 0
|
29
|
-
expect(core.id[:term]).to be_nil
|
30
|
-
end
|
31
|
-
end
|
32
|
-
end
|
33
|
-
|
34
|
-
it 'reads core file from archive' do
|
35
|
-
|
36
|
-
core.read
|
37
|
-
|
38
|
-
end
|
39
|
-
|
40
|
-
end
|