research_metadata 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 44b8ec73389b2fb8f75aa96fca6213b0fbe7fb1e
4
+ data.tar.gz: 5dcdf92b3c6437da322181ae345b418e37e0312c
5
+ SHA512:
6
+ metadata.gz: 95c845a1a08d31f1d63085382598be3011230a400bfd76ce31c5604c0e03bc1678b425974a996e0b014e319d4ec10f42c7895738b406c8ab0e9158b42832a58f
7
+ data.tar.gz: def6a7e3e6078b1974ed06f8156908316acfdb6a58bdc9164bf49c42bcc8c5598c46bfe335a28652a611f60d23b5e3e12210c4704de126cb8675eab248c7d24b
data/.gitignore ADDED
@@ -0,0 +1,18 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ .idea
data/CHANGELOG.md ADDED
@@ -0,0 +1,10 @@
1
+ # Change Log
2
+ All notable changes to this project will be documented in this file.
3
+ This project adheres to [Semantic Versioning](http://semver.org/).
4
+
5
+ ## Unreleased
6
+
7
+ ## 0.1.0 - 2017-02-03
8
+ ### Added
9
+ - Working product supports datasets.
10
+ - RSpec tests for datasets.
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in research_metadata.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2017 Adrian Albin-Clark
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,51 @@
1
+ # ResearchMetadata [![Gem Version](https://badge.fury.io/rb/research_metadata.svg)](https://badge.fury.io/rb/research_metadata)
2
+
3
+ Extraction and Transformation for Loading by DataCite's API.
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'research_metadata'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install research_metadata
18
+
19
+ ## Usage
20
+
21
+ ### Configuration
22
+ Configure Purée.
23
+
24
+ ```ruby
25
+ Puree.base_url = ENV['PURE_BASE_URL']
26
+ Puree.username = ENV['PURE_USERNAME']
27
+ Puree.password = ENV['PURE_PASSWORD']
28
+ Puree.basic_auth = true
29
+ ```
30
+
31
+ ### Transformation
32
+
33
+ Create a metadata transformer for a Pure dataset...
34
+
35
+ ```ruby
36
+ transformer = ResearchMetadata::Transformer::Dataset.new
37
+ ```
38
+
39
+ ...and give it a Pure identifier and a DOI...
40
+
41
+ ```ruby
42
+ metadata = transformer.transform uuid: 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx',
43
+ doi: '10.1234/foo/bar/1'
44
+ ```
45
+
46
+ ...to get DataCite-ready metadata.
47
+
48
+ ## Documentation
49
+ [API in YARD](http://www.rubydoc.info/gems/research_metadata)
50
+
51
+ [Detailed usage in GitBook](https://aalbinclark.gitbooks.io/research_metadata)
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,233 @@
1
+ # Extraction (from Pure) and Transformation for Loading by DataCite's API
2
+ #
3
+ module ResearchMetadata
4
+
5
+ # Transformer
6
+ #
7
+ module Transformer
8
+
9
+ # Dataset
10
+ #
11
+ class Dataset
12
+
13
+ # Dataset transformation
14
+ #
15
+ # @param id [String]
16
+ # @param uuid [String]
17
+ # @param doi [String]
18
+ # @return [String]
19
+ def transform(id: nil, uuid: nil, doi: nil)
20
+ @dataset = extract uuid: uuid, id: id
21
+ raise 'No metadata to transform' if @dataset.metadata.empty?
22
+ person_o = person
23
+ file_o = file
24
+ resource = ::Datacite::Mapping::Resource.new(
25
+ identifier: identifier(doi),
26
+ creators: person_o['creator'],
27
+ titles: [ title ],
28
+ publisher: publisher,
29
+ publication_year: publication_year,
30
+ subjects: subjects,
31
+ contributors: person_o['contributor'],
32
+ dates: dates,
33
+ language: language,
34
+ resource_type: resource_type,
35
+ related_identifiers: related_identifiers,
36
+ sizes: file_o.map { |i| i['size'] },
37
+ formats: file_o.map { |i| i['mime'] },
38
+ rights_list: file_o.map { |i| i['license']['name'] },
39
+ descriptions: description,
40
+ geo_locations: spatial
41
+ )
42
+ resource.write_xml
43
+ end
44
+
45
+ private
46
+
47
+ def affiliations(person)
48
+ person.affiliation.map { |i| i['name'] }
49
+ end
50
+
51
+ def dates
52
+ a = []
53
+ available = ::Datacite::Mapping::Date.new value: Puree::Date.iso(@dataset.available),
54
+ type: ::Datacite::Mapping::DateType::AVAILABLE
55
+ a << available
56
+
57
+ temporal = @dataset.temporal
58
+ temporal_range = ''
59
+ if !temporal['start']['year'].empty?
60
+ temporal_range << Puree::Date.iso(temporal['start'])
61
+ if !temporal['end']['year'].empty?
62
+ temporal_range << '/'
63
+ temporal_range << Puree::Date.iso(temporal['end'])
64
+ end
65
+ if !temporal_range.empty?
66
+ collected = ::Datacite::Mapping::Date.new value: temporal_range,
67
+ type: ::Datacite::Mapping::DateType::COLLECTED
68
+ a << collected
69
+ end
70
+ end
71
+
72
+ a
73
+ end
74
+
75
+ def description
76
+ desc = @dataset.description
77
+ if !desc.empty?
78
+ d = ::Datacite::Mapping::Description.new value: desc,
79
+ type: ::Datacite::Mapping::DescriptionType::ABSTRACT
80
+ [d]
81
+ else
82
+ []
83
+ end
84
+ end
85
+
86
+ def extract(uuid: nil, id: nil)
87
+ d = Puree::Dataset.new
88
+ if !uuid.nil?
89
+ d.find uuid: uuid
90
+ else
91
+ d.find id: id
92
+ end
93
+ d
94
+ end
95
+
96
+ def file
97
+ @dataset.file
98
+ end
99
+
100
+ def identifier(doi)
101
+ ::Datacite::Mapping::Identifier.new(value: doi)
102
+ end
103
+
104
+ def language
105
+ @dataset.locale
106
+ end
107
+
108
+ def name_identifier_orcid(person)
109
+ name_identifier = nil
110
+ if !person.orcid.empty?
111
+ name_identifier = ::Datacite::Mapping::NameIdentifier.new scheme: 'ORCID',
112
+ scheme_uri: URI('http://orcid.org/'),
113
+ value: person.orcid
114
+ end
115
+ name_identifier
116
+ end
117
+
118
+ def person
119
+ o = {}
120
+ o['creator'] = []
121
+ o['contributor'] = []
122
+ person_types = %w(internal external other)
123
+ person_types.each do |person_type|
124
+ @dataset.person[person_type].each do |dataset_person|
125
+ pure_role = dataset_person['role'].gsub(/\s+/, '')
126
+ name = "#{dataset_person['name']['last']}, #{dataset_person['name']['first']}"
127
+ if pure_role == 'Creator'
128
+ human = ::Datacite::Mapping::Creator.new name: name
129
+ else
130
+ pure_role = 'Other' if pure_role === 'Contributor'
131
+ contributor_type = ::Datacite::Mapping::ContributorType.find_by_value pure_role
132
+ if contributor_type
133
+ human = ::Datacite::Mapping::Contributor.new name: name,
134
+ type: contributor_type
135
+ end
136
+ end
137
+ if human
138
+ if !dataset_person['uuid'].empty?
139
+ person = Puree::Person.new
140
+ person.find uuid: dataset_person['uuid']
141
+ if !person.metadata.empty?
142
+ identifier = name_identifier_orcid(person)
143
+ human.identifier = identifier if !identifier.nil?
144
+
145
+ affiliation = affiliations(person)
146
+ human.affiliations = affiliation if !affiliation.empty?
147
+ end
148
+ end
149
+ if dataset_person['role'] == 'Creator'
150
+ o['creator'] << human
151
+ else
152
+ o['contributor'] << human
153
+ end
154
+ end
155
+ end
156
+ end
157
+ o
158
+ end
159
+
160
+ def publication_year
161
+ @dataset.available['year']
162
+ end
163
+
164
+ def publisher
165
+ @dataset.publisher
166
+ end
167
+
168
+ def related_identifiers
169
+ publications = @dataset.publication
170
+ data = []
171
+ publications.each do |i|
172
+ if i['type'] === 'Dataset'
173
+ # Do nothing as the relationship cannot currently be determined
174
+ # pub = Puree::Dataset.new
175
+ next
176
+ else
177
+ pub = Puree::Publication.new
178
+ end
179
+ pub.find uuid: i['uuid']
180
+ doi = pub.doi
181
+ if doi && !doi.empty?
182
+ doi_part_to_remove = 'http://dx.doi.org/'
183
+ doi_short = doi.gsub(doi_part_to_remove, '')
184
+ doi_short.gsub!('/', '-')
185
+ related_identifier =
186
+ ::Datacite::Mapping::RelatedIdentifier.new(
187
+ value: doi_short,
188
+ identifier_type: ::Datacite::Mapping::RelatedIdentifierType::DOI,
189
+ relation_type: ::Datacite::Mapping::RelationType::IS_REFERENCED_BY)
190
+ data << related_identifier
191
+ end
192
+ end
193
+ data
194
+ end
195
+
196
+ def resource_type
197
+ ::Datacite::Mapping::ResourceType.new(
198
+ resource_type_general: ::Datacite::Mapping::ResourceTypeGeneral::DATASET,
199
+ value: 'Dataset'
200
+ )
201
+ end
202
+
203
+ def spatial
204
+ # Pure has free text to list place names and does not allow a point to
205
+ # be associated with a specific place
206
+
207
+ # Place names
208
+ arr = @dataset.spatial.map { |i| ::Datacite::Mapping::GeoLocation.new place: i }
209
+
210
+ # Lat Long point
211
+ spatial_point = @dataset.spatial_point
212
+ if !spatial_point.empty?
213
+ point = ::Datacite::Mapping::GeoLocationPoint.new latitude: spatial_point['latitude'],
214
+ longitude: spatial_point['longitude']
215
+ geolocation = ::Datacite::Mapping::GeoLocation.new point: point
216
+ arr << geolocation
217
+ end
218
+ arr
219
+ end
220
+
221
+ def subjects
222
+ @dataset.keyword.map { |i| ::Datacite::Mapping::Subject.new value: i }
223
+ end
224
+
225
+ def title
226
+ ::Datacite::Mapping::Title.new value: @dataset.title
227
+ end
228
+
229
+ end
230
+
231
+ end
232
+
233
+ end
@@ -0,0 +1,5 @@
1
+ module ResearchMetadata
2
+ # Semantic version number
3
+ #
4
+ VERSION = "0.1.0"
5
+ end
@@ -0,0 +1,4 @@
1
+ require 'puree'
2
+ require 'datacite/mapping'
3
+ require 'research_metadata/transformer/dataset'
4
+ require 'research_metadata/version'
@@ -0,0 +1,23 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'research_metadata/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "research_metadata"
8
+ spec.version = ResearchMetadata::VERSION
9
+ spec.authors = ["Adrian Albin-Clark"]
10
+ spec.email = ["a.albin-clark@lancaster.ac.uk"]
11
+ spec.summary = %q{Extraction and Transformation for Loading by the DataCite API.}
12
+ spec.description = %q{Extraction and Transformation for Loading by the DataCite API.}
13
+ spec.homepage = "https://aalbinclark.gitbooks.io/research_metadata"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_runtime_dependency "puree", "~> 0.20"
22
+ spec.add_runtime_dependency "datacite-mapping", "~> 0.2"
23
+ end
@@ -0,0 +1,4 @@
1
+ require 'puree'
2
+ require 'datacite/mapping'
3
+ require 'research_metadata/transformer/dataset'
4
+ require 'research_metadata/version'
@@ -0,0 +1,39 @@
1
+ require 'spec_helper'
2
+
3
+ describe 'Dataset' do
4
+
5
+ def setup
6
+ Puree.base_url = ENV['PURE_BASE_URL']
7
+ Puree.username = ENV['PURE_USERNAME']
8
+ Puree.password = ENV['PURE_PASSWORD']
9
+ Puree.basic_auth = true
10
+ @t = ResearchMetadata::Transformer::Dataset.new
11
+ end
12
+
13
+ it '#new' do
14
+ t = ResearchMetadata::Transformer::Dataset.new
15
+ expect(t).to be_an_instance_of ResearchMetadata::Transformer::Dataset
16
+ end
17
+
18
+ describe 'data transformation' do
19
+ before(:all) do
20
+ setup
21
+ end
22
+
23
+ it '#transform with valid UUID' do
24
+ metadata = @t.transform uuid: ENV['PURE_DATASET_UUID'],
25
+ doi: '10.1234/foo/bar/1'
26
+ is_xml = metadata.downcase.start_with?('<resource')
27
+ expect(is_xml).to match(true)
28
+ end
29
+
30
+ it '#transform with valid ID' do
31
+ metadata = @t.transform id: ENV['PURE_DATASET_ID'],
32
+ doi: '10.1234/foo/bar/1'
33
+ is_xml = metadata.downcase.start_with?('<resource')
34
+ expect(is_xml).to match(true)
35
+ end
36
+
37
+ end
38
+
39
+ end
metadata ADDED
@@ -0,0 +1,87 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: research_metadata
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Adrian Albin-Clark
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2017-02-06 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: puree
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '0.20'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '0.20'
27
+ - !ruby/object:Gem::Dependency
28
+ name: datacite-mapping
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '0.2'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '0.2'
41
+ description: Extraction and Transformation for Loading by the DataCite API.
42
+ email:
43
+ - a.albin-clark@lancaster.ac.uk
44
+ executables: []
45
+ extensions: []
46
+ extra_rdoc_files: []
47
+ files:
48
+ - ".gitignore"
49
+ - CHANGELOG.md
50
+ - Gemfile
51
+ - LICENSE.txt
52
+ - README.md
53
+ - Rakefile
54
+ - lib/research_metadata.rb
55
+ - lib/research_metadata/transformer/dataset.rb
56
+ - lib/research_metadata/version.rb
57
+ - research_metadata.gemspec
58
+ - spec/spec_helper.rb
59
+ - spec/transformer/dataset_spec.rb
60
+ homepage: https://aalbinclark.gitbooks.io/research_metadata
61
+ licenses:
62
+ - MIT
63
+ metadata: {}
64
+ post_install_message:
65
+ rdoc_options: []
66
+ require_paths:
67
+ - lib
68
+ required_ruby_version: !ruby/object:Gem::Requirement
69
+ requirements:
70
+ - - ">="
71
+ - !ruby/object:Gem::Version
72
+ version: '0'
73
+ required_rubygems_version: !ruby/object:Gem::Requirement
74
+ requirements:
75
+ - - ">="
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ requirements: []
79
+ rubyforge_project:
80
+ rubygems_version: 2.2.2
81
+ signing_key:
82
+ specification_version: 4
83
+ summary: Extraction and Transformation for Loading by the DataCite API.
84
+ test_files:
85
+ - spec/spec_helper.rb
86
+ - spec/transformer/dataset_spec.rb
87
+ has_rdoc: