research_metadata 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +18 -0
- data/CHANGELOG.md +10 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +51 -0
- data/Rakefile +1 -0
- data/lib/research_metadata/transformer/dataset.rb +233 -0
- data/lib/research_metadata/version.rb +5 -0
- data/lib/research_metadata.rb +4 -0
- data/research_metadata.gemspec +23 -0
- data/spec/spec_helper.rb +4 -0
- data/spec/transformer/dataset_spec.rb +39 -0
- metadata +87 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 44b8ec73389b2fb8f75aa96fca6213b0fbe7fb1e
|
4
|
+
data.tar.gz: 5dcdf92b3c6437da322181ae345b418e37e0312c
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 95c845a1a08d31f1d63085382598be3011230a400bfd76ce31c5604c0e03bc1678b425974a996e0b014e319d4ec10f42c7895738b406c8ab0e9158b42832a58f
|
7
|
+
data.tar.gz: def6a7e3e6078b1974ed06f8156908316acfdb6a58bdc9164bf49c42bcc8c5598c46bfe335a28652a611f60d23b5e3e12210c4704de126cb8675eab248c7d24b
|
data/.gitignore
ADDED
data/CHANGELOG.md
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
# Change Log
|
2
|
+
All notable changes to this project will be documented in this file.
|
3
|
+
This project adheres to [Semantic Versioning](http://semver.org/).
|
4
|
+
|
5
|
+
## Unreleased
|
6
|
+
|
7
|
+
## 0.1.0 - 2017-02-03
|
8
|
+
### Added
|
9
|
+
- Working product supports datasets.
|
10
|
+
- RSpec tests for datasets.
|
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2017 Adrian Albin-Clark
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
# ResearchMetadata [](https://badge.fury.io/rb/research_metadata)
|
2
|
+
|
3
|
+
Extraction and Transformation for Loading by DataCite's API.
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
gem 'research_metadata'
|
10
|
+
|
11
|
+
And then execute:
|
12
|
+
|
13
|
+
$ bundle
|
14
|
+
|
15
|
+
Or install it yourself as:
|
16
|
+
|
17
|
+
$ gem install research_metadata
|
18
|
+
|
19
|
+
## Usage
|
20
|
+
|
21
|
+
### Configuration
|
22
|
+
Configure Purée.
|
23
|
+
|
24
|
+
```ruby
|
25
|
+
Puree.base_url = ENV['PURE_BASE_URL']
|
26
|
+
Puree.username = ENV['PURE_USERNAME']
|
27
|
+
Puree.password = ENV['PURE_PASSWORD']
|
28
|
+
Puree.basic_auth = true
|
29
|
+
```
|
30
|
+
|
31
|
+
### Transformation
|
32
|
+
|
33
|
+
Create a metadata transformer for a Pure dataset...
|
34
|
+
|
35
|
+
```ruby
|
36
|
+
transformer = ResearchMetadata::Transformer::Dataset.new
|
37
|
+
```
|
38
|
+
|
39
|
+
...and give it a Pure identifier and a DOI...
|
40
|
+
|
41
|
+
```ruby
|
42
|
+
metadata = transformer.transform uuid: 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx',
|
43
|
+
doi: '10.1234/foo/bar/1'
|
44
|
+
```
|
45
|
+
|
46
|
+
...to get DataCite-ready metadata.
|
47
|
+
|
48
|
+
## Documentation
|
49
|
+
[API in YARD](http://www.rubydoc.info/gems/research_metadata)
|
50
|
+
|
51
|
+
[Detailed usage in GitBook](https://aalbinclark.gitbooks.io/research_metadata)
|
data/Rakefile
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
@@ -0,0 +1,233 @@
|
|
1
|
+
# Extraction (from Pure) and Transformation for Loading by DataCite's API
|
2
|
+
#
|
3
|
+
module ResearchMetadata
|
4
|
+
|
5
|
+
# Transformer
|
6
|
+
#
|
7
|
+
module Transformer
|
8
|
+
|
9
|
+
# Dataset
|
10
|
+
#
|
11
|
+
class Dataset
|
12
|
+
|
13
|
+
# Dataset transformation
|
14
|
+
#
|
15
|
+
# @param id [String]
|
16
|
+
# @param uuid [String]
|
17
|
+
# @param doi [String]
|
18
|
+
# @return [String]
|
19
|
+
def transform(id: nil, uuid: nil, doi: nil)
|
20
|
+
@dataset = extract uuid: uuid, id: id
|
21
|
+
raise 'No metadata to transform' if @dataset.metadata.empty?
|
22
|
+
person_o = person
|
23
|
+
file_o = file
|
24
|
+
resource = ::Datacite::Mapping::Resource.new(
|
25
|
+
identifier: identifier(doi),
|
26
|
+
creators: person_o['creator'],
|
27
|
+
titles: [ title ],
|
28
|
+
publisher: publisher,
|
29
|
+
publication_year: publication_year,
|
30
|
+
subjects: subjects,
|
31
|
+
contributors: person_o['contributor'],
|
32
|
+
dates: dates,
|
33
|
+
language: language,
|
34
|
+
resource_type: resource_type,
|
35
|
+
related_identifiers: related_identifiers,
|
36
|
+
sizes: file_o.map { |i| i['size'] },
|
37
|
+
formats: file_o.map { |i| i['mime'] },
|
38
|
+
rights_list: file_o.map { |i| i['license']['name'] },
|
39
|
+
descriptions: description,
|
40
|
+
geo_locations: spatial
|
41
|
+
)
|
42
|
+
resource.write_xml
|
43
|
+
end
|
44
|
+
|
45
|
+
private
|
46
|
+
|
47
|
+
def affiliations(person)
|
48
|
+
person.affiliation.map { |i| i['name'] }
|
49
|
+
end
|
50
|
+
|
51
|
+
def dates
|
52
|
+
a = []
|
53
|
+
available = ::Datacite::Mapping::Date.new value: Puree::Date.iso(@dataset.available),
|
54
|
+
type: ::Datacite::Mapping::DateType::AVAILABLE
|
55
|
+
a << available
|
56
|
+
|
57
|
+
temporal = @dataset.temporal
|
58
|
+
temporal_range = ''
|
59
|
+
if !temporal['start']['year'].empty?
|
60
|
+
temporal_range << Puree::Date.iso(temporal['start'])
|
61
|
+
if !temporal['end']['year'].empty?
|
62
|
+
temporal_range << '/'
|
63
|
+
temporal_range << Puree::Date.iso(temporal['end'])
|
64
|
+
end
|
65
|
+
if !temporal_range.empty?
|
66
|
+
collected = ::Datacite::Mapping::Date.new value: temporal_range,
|
67
|
+
type: ::Datacite::Mapping::DateType::COLLECTED
|
68
|
+
a << collected
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
a
|
73
|
+
end
|
74
|
+
|
75
|
+
def description
|
76
|
+
desc = @dataset.description
|
77
|
+
if !desc.empty?
|
78
|
+
d = ::Datacite::Mapping::Description.new value: desc,
|
79
|
+
type: ::Datacite::Mapping::DescriptionType::ABSTRACT
|
80
|
+
[d]
|
81
|
+
else
|
82
|
+
[]
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
def extract(uuid: nil, id: nil)
|
87
|
+
d = Puree::Dataset.new
|
88
|
+
if !uuid.nil?
|
89
|
+
d.find uuid: uuid
|
90
|
+
else
|
91
|
+
d.find id: id
|
92
|
+
end
|
93
|
+
d
|
94
|
+
end
|
95
|
+
|
96
|
+
def file
|
97
|
+
@dataset.file
|
98
|
+
end
|
99
|
+
|
100
|
+
def identifier(doi)
|
101
|
+
::Datacite::Mapping::Identifier.new(value: doi)
|
102
|
+
end
|
103
|
+
|
104
|
+
def language
|
105
|
+
@dataset.locale
|
106
|
+
end
|
107
|
+
|
108
|
+
def name_identifier_orcid(person)
|
109
|
+
name_identifier = nil
|
110
|
+
if !person.orcid.empty?
|
111
|
+
name_identifier = ::Datacite::Mapping::NameIdentifier.new scheme: 'ORCID',
|
112
|
+
scheme_uri: URI('http://orcid.org/'),
|
113
|
+
value: person.orcid
|
114
|
+
end
|
115
|
+
name_identifier
|
116
|
+
end
|
117
|
+
|
118
|
+
def person
|
119
|
+
o = {}
|
120
|
+
o['creator'] = []
|
121
|
+
o['contributor'] = []
|
122
|
+
person_types = %w(internal external other)
|
123
|
+
person_types.each do |person_type|
|
124
|
+
@dataset.person[person_type].each do |dataset_person|
|
125
|
+
pure_role = dataset_person['role'].gsub(/\s+/, '')
|
126
|
+
name = "#{dataset_person['name']['last']}, #{dataset_person['name']['first']}"
|
127
|
+
if pure_role == 'Creator'
|
128
|
+
human = ::Datacite::Mapping::Creator.new name: name
|
129
|
+
else
|
130
|
+
pure_role = 'Other' if pure_role === 'Contributor'
|
131
|
+
contributor_type = ::Datacite::Mapping::ContributorType.find_by_value pure_role
|
132
|
+
if contributor_type
|
133
|
+
human = ::Datacite::Mapping::Contributor.new name: name,
|
134
|
+
type: contributor_type
|
135
|
+
end
|
136
|
+
end
|
137
|
+
if human
|
138
|
+
if !dataset_person['uuid'].empty?
|
139
|
+
person = Puree::Person.new
|
140
|
+
person.find uuid: dataset_person['uuid']
|
141
|
+
if !person.metadata.empty?
|
142
|
+
identifier = name_identifier_orcid(person)
|
143
|
+
human.identifier = identifier if !identifier.nil?
|
144
|
+
|
145
|
+
affiliation = affiliations(person)
|
146
|
+
human.affiliations = affiliation if !affiliation.empty?
|
147
|
+
end
|
148
|
+
end
|
149
|
+
if dataset_person['role'] == 'Creator'
|
150
|
+
o['creator'] << human
|
151
|
+
else
|
152
|
+
o['contributor'] << human
|
153
|
+
end
|
154
|
+
end
|
155
|
+
end
|
156
|
+
end
|
157
|
+
o
|
158
|
+
end
|
159
|
+
|
160
|
+
def publication_year
|
161
|
+
@dataset.available['year']
|
162
|
+
end
|
163
|
+
|
164
|
+
def publisher
|
165
|
+
@dataset.publisher
|
166
|
+
end
|
167
|
+
|
168
|
+
def related_identifiers
|
169
|
+
publications = @dataset.publication
|
170
|
+
data = []
|
171
|
+
publications.each do |i|
|
172
|
+
if i['type'] === 'Dataset'
|
173
|
+
# Do nothing as the relationship cannot currently be determined
|
174
|
+
# pub = Puree::Dataset.new
|
175
|
+
next
|
176
|
+
else
|
177
|
+
pub = Puree::Publication.new
|
178
|
+
end
|
179
|
+
pub.find uuid: i['uuid']
|
180
|
+
doi = pub.doi
|
181
|
+
if doi && !doi.empty?
|
182
|
+
doi_part_to_remove = 'http://dx.doi.org/'
|
183
|
+
doi_short = doi.gsub(doi_part_to_remove, '')
|
184
|
+
doi_short.gsub!('/', '-')
|
185
|
+
related_identifier =
|
186
|
+
::Datacite::Mapping::RelatedIdentifier.new(
|
187
|
+
value: doi_short,
|
188
|
+
identifier_type: ::Datacite::Mapping::RelatedIdentifierType::DOI,
|
189
|
+
relation_type: ::Datacite::Mapping::RelationType::IS_REFERENCED_BY)
|
190
|
+
data << related_identifier
|
191
|
+
end
|
192
|
+
end
|
193
|
+
data
|
194
|
+
end
|
195
|
+
|
196
|
+
def resource_type
|
197
|
+
::Datacite::Mapping::ResourceType.new(
|
198
|
+
resource_type_general: ::Datacite::Mapping::ResourceTypeGeneral::DATASET,
|
199
|
+
value: 'Dataset'
|
200
|
+
)
|
201
|
+
end
|
202
|
+
|
203
|
+
def spatial
|
204
|
+
# Pure has free text to list place names and does not allow a point to
|
205
|
+
# be associated with a specific place
|
206
|
+
|
207
|
+
# Place names
|
208
|
+
arr = @dataset.spatial.map { |i| ::Datacite::Mapping::GeoLocation.new place: i }
|
209
|
+
|
210
|
+
# Lat Long point
|
211
|
+
spatial_point = @dataset.spatial_point
|
212
|
+
if !spatial_point.empty?
|
213
|
+
point = ::Datacite::Mapping::GeoLocationPoint.new latitude: spatial_point['latitude'],
|
214
|
+
longitude: spatial_point['longitude']
|
215
|
+
geolocation = ::Datacite::Mapping::GeoLocation.new point: point
|
216
|
+
arr << geolocation
|
217
|
+
end
|
218
|
+
arr
|
219
|
+
end
|
220
|
+
|
221
|
+
def subjects
|
222
|
+
@dataset.keyword.map { |i| ::Datacite::Mapping::Subject.new value: i }
|
223
|
+
end
|
224
|
+
|
225
|
+
def title
|
226
|
+
::Datacite::Mapping::Title.new value: @dataset.title
|
227
|
+
end
|
228
|
+
|
229
|
+
end
|
230
|
+
|
231
|
+
end
|
232
|
+
|
233
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'research_metadata/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "research_metadata"
|
8
|
+
spec.version = ResearchMetadata::VERSION
|
9
|
+
spec.authors = ["Adrian Albin-Clark"]
|
10
|
+
spec.email = ["a.albin-clark@lancaster.ac.uk"]
|
11
|
+
spec.summary = %q{Extraction and Transformation for Loading by the DataCite API.}
|
12
|
+
spec.description = %q{Extraction and Transformation for Loading by the DataCite API.}
|
13
|
+
spec.homepage = "https://aalbinclark.gitbooks.io/research_metadata"
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files -z`.split("\x0")
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_runtime_dependency "puree", "~> 0.20"
|
22
|
+
spec.add_runtime_dependency "datacite-mapping", "~> 0.2"
|
23
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe 'Dataset' do
|
4
|
+
|
5
|
+
def setup
|
6
|
+
Puree.base_url = ENV['PURE_BASE_URL']
|
7
|
+
Puree.username = ENV['PURE_USERNAME']
|
8
|
+
Puree.password = ENV['PURE_PASSWORD']
|
9
|
+
Puree.basic_auth = true
|
10
|
+
@t = ResearchMetadata::Transformer::Dataset.new
|
11
|
+
end
|
12
|
+
|
13
|
+
it '#new' do
|
14
|
+
t = ResearchMetadata::Transformer::Dataset.new
|
15
|
+
expect(t).to be_an_instance_of ResearchMetadata::Transformer::Dataset
|
16
|
+
end
|
17
|
+
|
18
|
+
describe 'data transformation' do
|
19
|
+
before(:all) do
|
20
|
+
setup
|
21
|
+
end
|
22
|
+
|
23
|
+
it '#transform with valid UUID' do
|
24
|
+
metadata = @t.transform uuid: ENV['PURE_DATASET_UUID'],
|
25
|
+
doi: '10.1234/foo/bar/1'
|
26
|
+
is_xml = metadata.downcase.start_with?('<resource')
|
27
|
+
expect(is_xml).to match(true)
|
28
|
+
end
|
29
|
+
|
30
|
+
it '#transform with valid ID' do
|
31
|
+
metadata = @t.transform id: ENV['PURE_DATASET_ID'],
|
32
|
+
doi: '10.1234/foo/bar/1'
|
33
|
+
is_xml = metadata.downcase.start_with?('<resource')
|
34
|
+
expect(is_xml).to match(true)
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
38
|
+
|
39
|
+
end
|
metadata
ADDED
@@ -0,0 +1,87 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: research_metadata
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Adrian Albin-Clark
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2017-02-06 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: puree
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0.20'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0.20'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: datacite-mapping
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0.2'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0.2'
|
41
|
+
description: Extraction and Transformation for Loading by the DataCite API.
|
42
|
+
email:
|
43
|
+
- a.albin-clark@lancaster.ac.uk
|
44
|
+
executables: []
|
45
|
+
extensions: []
|
46
|
+
extra_rdoc_files: []
|
47
|
+
files:
|
48
|
+
- ".gitignore"
|
49
|
+
- CHANGELOG.md
|
50
|
+
- Gemfile
|
51
|
+
- LICENSE.txt
|
52
|
+
- README.md
|
53
|
+
- Rakefile
|
54
|
+
- lib/research_metadata.rb
|
55
|
+
- lib/research_metadata/transformer/dataset.rb
|
56
|
+
- lib/research_metadata/version.rb
|
57
|
+
- research_metadata.gemspec
|
58
|
+
- spec/spec_helper.rb
|
59
|
+
- spec/transformer/dataset_spec.rb
|
60
|
+
homepage: https://aalbinclark.gitbooks.io/research_metadata
|
61
|
+
licenses:
|
62
|
+
- MIT
|
63
|
+
metadata: {}
|
64
|
+
post_install_message:
|
65
|
+
rdoc_options: []
|
66
|
+
require_paths:
|
67
|
+
- lib
|
68
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
69
|
+
requirements:
|
70
|
+
- - ">="
|
71
|
+
- !ruby/object:Gem::Version
|
72
|
+
version: '0'
|
73
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
74
|
+
requirements:
|
75
|
+
- - ">="
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0'
|
78
|
+
requirements: []
|
79
|
+
rubyforge_project:
|
80
|
+
rubygems_version: 2.2.2
|
81
|
+
signing_key:
|
82
|
+
specification_version: 4
|
83
|
+
summary: Extraction and Transformation for Loading by the DataCite API.
|
84
|
+
test_files:
|
85
|
+
- spec/spec_helper.rb
|
86
|
+
- spec/transformer/dataset_spec.rb
|
87
|
+
has_rdoc:
|