research_metadata 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 44b8ec73389b2fb8f75aa96fca6213b0fbe7fb1e
4
+ data.tar.gz: 5dcdf92b3c6437da322181ae345b418e37e0312c
5
+ SHA512:
6
+ metadata.gz: 95c845a1a08d31f1d63085382598be3011230a400bfd76ce31c5604c0e03bc1678b425974a996e0b014e319d4ec10f42c7895738b406c8ab0e9158b42832a58f
7
+ data.tar.gz: def6a7e3e6078b1974ed06f8156908316acfdb6a58bdc9164bf49c42bcc8c5598c46bfe335a28652a611f60d23b5e3e12210c4704de126cb8675eab248c7d24b
data/.gitignore ADDED
@@ -0,0 +1,18 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ .idea
data/CHANGELOG.md ADDED
@@ -0,0 +1,10 @@
1
+ # Change Log
2
+ All notable changes to this project will be documented in this file.
3
+ This project adheres to [Semantic Versioning](http://semver.org/).
4
+
5
+ ## Unreleased
6
+
7
+ ## 0.1.0 - 2017-02-03
8
+ ### Added
9
+ - Working product supports datasets.
10
+ - RSpec tests for datasets.
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in research_metadata.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2017 Adrian Albin-Clark
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,51 @@
1
+ # ResearchMetadata [![Gem Version](https://badge.fury.io/rb/research_metadata.svg)](https://badge.fury.io/rb/research_metadata)
2
+
3
+ Extraction and Transformation for Loading by DataCite's API.
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'research_metadata'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install research_metadata
18
+
19
+ ## Usage
20
+
21
+ ### Configuration
22
+ Configure Purée.
23
+
24
+ ```ruby
25
+ Puree.base_url = ENV['PURE_BASE_URL']
26
+ Puree.username = ENV['PURE_USERNAME']
27
+ Puree.password = ENV['PURE_PASSWORD']
28
+ Puree.basic_auth = true
29
+ ```
30
+
31
+ ### Transformation
32
+
33
+ Create a metadata transformer for a Pure dataset...
34
+
35
+ ```ruby
36
+ transformer = ResearchMetadata::Transformer::Dataset.new
37
+ ```
38
+
39
+ ...and give it a Pure identifier and a DOI...
40
+
41
+ ```ruby
42
+ metadata = transformer.transform uuid: 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx',
43
+ doi: '10.1234/foo/bar/1'
44
+ ```
45
+
46
+ ...to get DataCite-ready metadata.
47
+
48
+ ## Documentation
49
+ [API in YARD](http://www.rubydoc.info/gems/research_metadata)
50
+
51
+ [Detailed usage in GitBook](https://aalbinclark.gitbooks.io/research_metadata)
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,233 @@
1
+ # Extraction (from Pure) and Transformation for Loading by DataCite's API
2
+ #
3
+ module ResearchMetadata
4
+
5
+ # Transformer
6
+ #
7
+ module Transformer
8
+
9
+ # Dataset
10
+ #
11
+ class Dataset
12
+
13
+ # Dataset transformation
14
+ #
15
+ # @param id [String]
16
+ # @param uuid [String]
17
+ # @param doi [String]
18
+ # @return [String]
19
+ def transform(id: nil, uuid: nil, doi: nil)
20
+ @dataset = extract uuid: uuid, id: id
21
+ raise 'No metadata to transform' if @dataset.metadata.empty?
22
+ person_o = person
23
+ file_o = file
24
+ resource = ::Datacite::Mapping::Resource.new(
25
+ identifier: identifier(doi),
26
+ creators: person_o['creator'],
27
+ titles: [ title ],
28
+ publisher: publisher,
29
+ publication_year: publication_year,
30
+ subjects: subjects,
31
+ contributors: person_o['contributor'],
32
+ dates: dates,
33
+ language: language,
34
+ resource_type: resource_type,
35
+ related_identifiers: related_identifiers,
36
+ sizes: file_o.map { |i| i['size'] },
37
+ formats: file_o.map { |i| i['mime'] },
38
+ rights_list: file_o.map { |i| i['license']['name'] },
39
+ descriptions: description,
40
+ geo_locations: spatial
41
+ )
42
+ resource.write_xml
43
+ end
44
+
45
+ private
46
+
47
+ def affiliations(person)
48
+ person.affiliation.map { |i| i['name'] }
49
+ end
50
+
51
+ def dates
52
+ a = []
53
+ available = ::Datacite::Mapping::Date.new value: Puree::Date.iso(@dataset.available),
54
+ type: ::Datacite::Mapping::DateType::AVAILABLE
55
+ a << available
56
+
57
+ temporal = @dataset.temporal
58
+ temporal_range = ''
59
+ if !temporal['start']['year'].empty?
60
+ temporal_range << Puree::Date.iso(temporal['start'])
61
+ if !temporal['end']['year'].empty?
62
+ temporal_range << '/'
63
+ temporal_range << Puree::Date.iso(temporal['end'])
64
+ end
65
+ if !temporal_range.empty?
66
+ collected = ::Datacite::Mapping::Date.new value: temporal_range,
67
+ type: ::Datacite::Mapping::DateType::COLLECTED
68
+ a << collected
69
+ end
70
+ end
71
+
72
+ a
73
+ end
74
+
75
+ def description
76
+ desc = @dataset.description
77
+ if !desc.empty?
78
+ d = ::Datacite::Mapping::Description.new value: desc,
79
+ type: ::Datacite::Mapping::DescriptionType::ABSTRACT
80
+ [d]
81
+ else
82
+ []
83
+ end
84
+ end
85
+
86
+ def extract(uuid: nil, id: nil)
87
+ d = Puree::Dataset.new
88
+ if !uuid.nil?
89
+ d.find uuid: uuid
90
+ else
91
+ d.find id: id
92
+ end
93
+ d
94
+ end
95
+
96
+ def file
97
+ @dataset.file
98
+ end
99
+
100
+ def identifier(doi)
101
+ ::Datacite::Mapping::Identifier.new(value: doi)
102
+ end
103
+
104
+ def language
105
+ @dataset.locale
106
+ end
107
+
108
+ def name_identifier_orcid(person)
109
+ name_identifier = nil
110
+ if !person.orcid.empty?
111
+ name_identifier = ::Datacite::Mapping::NameIdentifier.new scheme: 'ORCID',
112
+ scheme_uri: URI('http://orcid.org/'),
113
+ value: person.orcid
114
+ end
115
+ name_identifier
116
+ end
117
+
118
+ def person
119
+ o = {}
120
+ o['creator'] = []
121
+ o['contributor'] = []
122
+ person_types = %w(internal external other)
123
+ person_types.each do |person_type|
124
+ @dataset.person[person_type].each do |dataset_person|
125
+ pure_role = dataset_person['role'].gsub(/\s+/, '')
126
+ name = "#{dataset_person['name']['last']}, #{dataset_person['name']['first']}"
127
+ if pure_role == 'Creator'
128
+ human = ::Datacite::Mapping::Creator.new name: name
129
+ else
130
+ pure_role = 'Other' if pure_role === 'Contributor'
131
+ contributor_type = ::Datacite::Mapping::ContributorType.find_by_value pure_role
132
+ if contributor_type
133
+ human = ::Datacite::Mapping::Contributor.new name: name,
134
+ type: contributor_type
135
+ end
136
+ end
137
+ if human
138
+ if !dataset_person['uuid'].empty?
139
+ person = Puree::Person.new
140
+ person.find uuid: dataset_person['uuid']
141
+ if !person.metadata.empty?
142
+ identifier = name_identifier_orcid(person)
143
+ human.identifier = identifier if !identifier.nil?
144
+
145
+ affiliation = affiliations(person)
146
+ human.affiliations = affiliation if !affiliation.empty?
147
+ end
148
+ end
149
+ if dataset_person['role'] == 'Creator'
150
+ o['creator'] << human
151
+ else
152
+ o['contributor'] << human
153
+ end
154
+ end
155
+ end
156
+ end
157
+ o
158
+ end
159
+
160
+ def publication_year
161
+ @dataset.available['year']
162
+ end
163
+
164
+ def publisher
165
+ @dataset.publisher
166
+ end
167
+
168
+ def related_identifiers
169
+ publications = @dataset.publication
170
+ data = []
171
+ publications.each do |i|
172
+ if i['type'] === 'Dataset'
173
+ # Do nothing as the relationship cannot currently be determined
174
+ # pub = Puree::Dataset.new
175
+ next
176
+ else
177
+ pub = Puree::Publication.new
178
+ end
179
+ pub.find uuid: i['uuid']
180
+ doi = pub.doi
181
+ if doi && !doi.empty?
182
+ doi_part_to_remove = 'http://dx.doi.org/'
183
+ doi_short = doi.gsub(doi_part_to_remove, '')
184
+ doi_short.gsub!('/', '-')
185
+ related_identifier =
186
+ ::Datacite::Mapping::RelatedIdentifier.new(
187
+ value: doi_short,
188
+ identifier_type: ::Datacite::Mapping::RelatedIdentifierType::DOI,
189
+ relation_type: ::Datacite::Mapping::RelationType::IS_REFERENCED_BY)
190
+ data << related_identifier
191
+ end
192
+ end
193
+ data
194
+ end
195
+
196
+ def resource_type
197
+ ::Datacite::Mapping::ResourceType.new(
198
+ resource_type_general: ::Datacite::Mapping::ResourceTypeGeneral::DATASET,
199
+ value: 'Dataset'
200
+ )
201
+ end
202
+
203
+ def spatial
204
+ # Pure has free text to list place names and does not allow a point to
205
+ # be associated with a specific place
206
+
207
+ # Place names
208
+ arr = @dataset.spatial.map { |i| ::Datacite::Mapping::GeoLocation.new place: i }
209
+
210
+ # Lat Long point
211
+ spatial_point = @dataset.spatial_point
212
+ if !spatial_point.empty?
213
+ point = ::Datacite::Mapping::GeoLocationPoint.new latitude: spatial_point['latitude'],
214
+ longitude: spatial_point['longitude']
215
+ geolocation = ::Datacite::Mapping::GeoLocation.new point: point
216
+ arr << geolocation
217
+ end
218
+ arr
219
+ end
220
+
221
+ def subjects
222
+ @dataset.keyword.map { |i| ::Datacite::Mapping::Subject.new value: i }
223
+ end
224
+
225
+ def title
226
+ ::Datacite::Mapping::Title.new value: @dataset.title
227
+ end
228
+
229
+ end
230
+
231
+ end
232
+
233
+ end
@@ -0,0 +1,5 @@
1
+ module ResearchMetadata
2
+ # Semantic version number
3
+ #
4
+ VERSION = "0.1.0"
5
+ end
@@ -0,0 +1,4 @@
1
+ require 'puree'
2
+ require 'datacite/mapping'
3
+ require 'research_metadata/transformer/dataset'
4
+ require 'research_metadata/version'
@@ -0,0 +1,23 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'research_metadata/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "research_metadata"
8
+ spec.version = ResearchMetadata::VERSION
9
+ spec.authors = ["Adrian Albin-Clark"]
10
+ spec.email = ["a.albin-clark@lancaster.ac.uk"]
11
+ spec.summary = %q{Extraction and Transformation for Loading by the DataCite API.}
12
+ spec.description = %q{Extraction and Transformation for Loading by the DataCite API.}
13
+ spec.homepage = "https://aalbinclark.gitbooks.io/research_metadata"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_runtime_dependency "puree", "~> 0.20"
22
+ spec.add_runtime_dependency "datacite-mapping", "~> 0.2"
23
+ end
@@ -0,0 +1,4 @@
1
+ require 'puree'
2
+ require 'datacite/mapping'
3
+ require 'research_metadata/transformer/dataset'
4
+ require 'research_metadata/version'
@@ -0,0 +1,39 @@
1
+ require 'spec_helper'
2
+
3
+ describe 'Dataset' do
4
+
5
+ def setup
6
+ Puree.base_url = ENV['PURE_BASE_URL']
7
+ Puree.username = ENV['PURE_USERNAME']
8
+ Puree.password = ENV['PURE_PASSWORD']
9
+ Puree.basic_auth = true
10
+ @t = ResearchMetadata::Transformer::Dataset.new
11
+ end
12
+
13
+ it '#new' do
14
+ t = ResearchMetadata::Transformer::Dataset.new
15
+ expect(t).to be_an_instance_of ResearchMetadata::Transformer::Dataset
16
+ end
17
+
18
+ describe 'data transformation' do
19
+ before(:all) do
20
+ setup
21
+ end
22
+
23
+ it '#transform with valid UUID' do
24
+ metadata = @t.transform uuid: ENV['PURE_DATASET_UUID'],
25
+ doi: '10.1234/foo/bar/1'
26
+ is_xml = metadata.downcase.start_with?('<resource')
27
+ expect(is_xml).to match(true)
28
+ end
29
+
30
+ it '#transform with valid ID' do
31
+ metadata = @t.transform id: ENV['PURE_DATASET_ID'],
32
+ doi: '10.1234/foo/bar/1'
33
+ is_xml = metadata.downcase.start_with?('<resource')
34
+ expect(is_xml).to match(true)
35
+ end
36
+
37
+ end
38
+
39
+ end
metadata ADDED
@@ -0,0 +1,87 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: research_metadata
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Adrian Albin-Clark
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2017-02-06 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: puree
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '0.20'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '0.20'
27
+ - !ruby/object:Gem::Dependency
28
+ name: datacite-mapping
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '0.2'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '0.2'
41
+ description: Extraction and Transformation for Loading by the DataCite API.
42
+ email:
43
+ - a.albin-clark@lancaster.ac.uk
44
+ executables: []
45
+ extensions: []
46
+ extra_rdoc_files: []
47
+ files:
48
+ - ".gitignore"
49
+ - CHANGELOG.md
50
+ - Gemfile
51
+ - LICENSE.txt
52
+ - README.md
53
+ - Rakefile
54
+ - lib/research_metadata.rb
55
+ - lib/research_metadata/transformer/dataset.rb
56
+ - lib/research_metadata/version.rb
57
+ - research_metadata.gemspec
58
+ - spec/spec_helper.rb
59
+ - spec/transformer/dataset_spec.rb
60
+ homepage: https://aalbinclark.gitbooks.io/research_metadata
61
+ licenses:
62
+ - MIT
63
+ metadata: {}
64
+ post_install_message:
65
+ rdoc_options: []
66
+ require_paths:
67
+ - lib
68
+ required_ruby_version: !ruby/object:Gem::Requirement
69
+ requirements:
70
+ - - ">="
71
+ - !ruby/object:Gem::Version
72
+ version: '0'
73
+ required_rubygems_version: !ruby/object:Gem::Requirement
74
+ requirements:
75
+ - - ">="
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ requirements: []
79
+ rubyforge_project:
80
+ rubygems_version: 2.2.2
81
+ signing_key:
82
+ specification_version: 4
83
+ summary: Extraction and Transformation for Loading by the DataCite API.
84
+ test_files:
85
+ - spec/spec_helper.rb
86
+ - spec/transformer/dataset_spec.rb
87
+ has_rdoc: