puree 0.9.0 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +64 -5
- data/lib/puree/dataset.rb +140 -57
- data/lib/puree/publication.rb +4 -4
- data/lib/puree/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 098c3fc6fe0dc9773261d20e4f2f8e021e9f8255
|
4
|
+
data.tar.gz: 49b91baf4a68f02a337b986c15f6fe35c67b4396
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d3670c89f2adb4ff43f1c262ab10dbb140b4ab5438103a267f63524894292a760e41fc4af0839243cb61688608375b1c996a0960fe31e902b483861dddd1c723
|
7
|
+
data.tar.gz: c962958c92997235fc5730e310e15593cdd9d3ad6c7ab9db7c60db7ef61577c0abac86cd3ee359385354753a4ff899a2dd46d397413e0e624fc56f53bfc8d6b1
|
data/CHANGELOG.md
CHANGED
@@ -2,6 +2,12 @@
|
|
2
2
|
All notable changes to this project will be documented in this file.
|
3
3
|
This project adheres to [Semantic Versioning](http://semver.org/).
|
4
4
|
|
5
|
+
## 0.10.0 - 2016-05-17
|
6
|
+
### Added
|
7
|
+
- Dataset metadata (associated, link, project, production as range, person for those without uuid, publication for all research outputs, publisher).
|
8
|
+
### Fixed
|
9
|
+
- Dataset metadata (description splitting, geographical stripping).
|
10
|
+
|
5
11
|
## 0.9.0 - 2016-05-16
|
6
12
|
### Added
|
7
13
|
- Dataset metadata (production).
|
data/README.md
CHANGED
@@ -41,6 +41,7 @@ d.get uuid: 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx',
|
|
41
41
|
|
42
42
|
# Filter metadata into simple data structures
|
43
43
|
d.access
|
44
|
+
d.associated
|
44
45
|
d.available
|
45
46
|
d.description
|
46
47
|
d.doi
|
@@ -48,6 +49,7 @@ d.file
|
|
48
49
|
d.geographical
|
49
50
|
d.keyword
|
50
51
|
d.person
|
52
|
+
d.project
|
51
53
|
d.production
|
52
54
|
d.publication
|
53
55
|
d.temporal
|
@@ -123,8 +125,20 @@ An array of files.
|
|
123
125
|
]
|
124
126
|
```
|
125
127
|
|
128
|
+
### link
|
129
|
+
An array of links.
|
130
|
+
|
131
|
+
```ruby
|
132
|
+
[
|
133
|
+
{
|
134
|
+
"url": "http://www.example.com/~abc1234/xyz/",
|
135
|
+
"description": "An interesting description"
|
136
|
+
},
|
137
|
+
]
|
138
|
+
```
|
139
|
+
|
126
140
|
### person
|
127
|
-
Contains an array of internal persons and an array of
|
141
|
+
Contains an array of internal persons, an array of external persons and an array of other persons.
|
128
142
|
|
129
143
|
```ruby
|
130
144
|
{
|
@@ -139,20 +153,65 @@ Contains an array of internal persons and an array of external persons.
|
|
139
153
|
},
|
140
154
|
],
|
141
155
|
"external"=>[
|
156
|
+
],
|
157
|
+
"other"=>[
|
158
|
+
"name"=>{
|
159
|
+
"first"=>"Hal",
|
160
|
+
"last"=>"Roach"
|
161
|
+
},
|
162
|
+
"role"=>"Contributor",
|
163
|
+
"uuid"=>""
|
142
164
|
]
|
143
165
|
}
|
144
166
|
```
|
145
167
|
|
168
|
+
### project
|
169
|
+
An array of projects associated with the dataset.
|
170
|
+
|
171
|
+
```ruby
|
172
|
+
[
|
173
|
+
{
|
174
|
+
"title": "An interesting project title",
|
175
|
+
"uuid": "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx"
|
176
|
+
},
|
177
|
+
]
|
178
|
+
```
|
179
|
+
|
146
180
|
### publication
|
147
|
-
An array of
|
181
|
+
An array of research outputs associated with the dataset.
|
148
182
|
|
149
183
|
```ruby
|
150
184
|
[
|
151
185
|
{
|
152
|
-
"type"
|
153
|
-
"title"
|
154
|
-
"uuid"
|
186
|
+
"type": "Journal article",
|
187
|
+
"title": "An interesting journal article title",
|
188
|
+
"uuid": "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx"
|
189
|
+
},
|
190
|
+
{
|
191
|
+
"type": "Conference paper",
|
192
|
+
"title": "An interesting conference paper title",
|
193
|
+
"uuid": "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx"
|
194
|
+
},
|
195
|
+
{
|
196
|
+
"type": "Working paper",
|
197
|
+
"title": "An interesting working paper title",
|
198
|
+
"uuid": "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx"
|
199
|
+
},
|
200
|
+
{
|
201
|
+
"type": "Paper",
|
202
|
+
"title": "An interesting paper title",
|
203
|
+
"uuid": "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx"
|
155
204
|
},
|
205
|
+
{
|
206
|
+
"type": "Dataset",
|
207
|
+
"title": "An interesting dataset title",
|
208
|
+
"uuid": "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx"
|
209
|
+
},
|
210
|
+
{
|
211
|
+
"type": "Chapter",
|
212
|
+
"title": "An interesting chapter title",
|
213
|
+
"uuid": "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx"
|
214
|
+
}
|
156
215
|
]
|
157
216
|
```
|
158
217
|
|
data/lib/puree/dataset.rb
CHANGED
@@ -8,17 +8,81 @@ module Puree
|
|
8
8
|
super(:dataset)
|
9
9
|
end
|
10
10
|
|
11
|
+
# Link
|
12
|
+
#
|
13
|
+
# @return [Array<Hash>]
|
14
|
+
def link
|
15
|
+
path = '//links/link'
|
16
|
+
xpath_result = xpath_query path
|
17
|
+
data = []
|
18
|
+
xpath_result.each { |i|
|
19
|
+
o = {}
|
20
|
+
o['url'] = i.xpath('url').text
|
21
|
+
o['description'] = i.xpath('description').text
|
22
|
+
data << o
|
23
|
+
}
|
24
|
+
return data.uniq
|
25
|
+
end
|
26
|
+
|
27
|
+
# Publisher
|
28
|
+
#
|
29
|
+
# @return [String]
|
30
|
+
def publisher
|
31
|
+
path = '//publisher/name'
|
32
|
+
xpath_result = xpath_query path
|
33
|
+
xpath_result ? xpath_result.text : ''
|
34
|
+
end
|
35
|
+
|
36
|
+
# Combines project and publication
|
37
|
+
#
|
38
|
+
# @return [Hash]
|
39
|
+
def associated
|
40
|
+
path = '//associatedContent//relatedContent'
|
41
|
+
xpath_result = xpath_query path
|
42
|
+
data_arr = []
|
43
|
+
xpath_result.each { |i|
|
44
|
+
data = {}
|
45
|
+
data['type'] = i.xpath('typeClassification').text
|
46
|
+
data['title'] = i.xpath('title').text
|
47
|
+
data['uuid'] = i.attr('uuid')
|
48
|
+
data_arr << data
|
49
|
+
}
|
50
|
+
data_arr.uniq
|
51
|
+
end
|
52
|
+
|
53
|
+
|
54
|
+
# Project
|
55
|
+
#
|
56
|
+
# @return [Array<Hash>]
|
57
|
+
def project
|
58
|
+
associated_type('Research').uniq
|
59
|
+
end
|
60
|
+
|
61
|
+
# Publication
|
62
|
+
#
|
63
|
+
# @return [Array<Hash>]
|
64
|
+
def publication
|
65
|
+
data_arr = []
|
66
|
+
associated.each do |i|
|
67
|
+
if i['type'] != 'Research'
|
68
|
+
data_arr << i
|
69
|
+
end
|
70
|
+
end
|
71
|
+
data_arr.uniq
|
72
|
+
end
|
73
|
+
|
74
|
+
|
11
75
|
# Title
|
12
76
|
#
|
13
77
|
# @return [Array<String>]
|
14
78
|
def title
|
15
79
|
data = node 'title'
|
80
|
+
data_arr = []
|
16
81
|
if !data.nil? && !data.empty?
|
17
82
|
data = data['localizedString']["__content__"]
|
18
|
-
data.is_a?(Array) ? data : data
|
19
|
-
else
|
20
|
-
[]
|
83
|
+
data.is_a?(Array) ? data_arr = data : data_arr << data
|
21
84
|
end
|
85
|
+
data_arr.uniq
|
22
86
|
end
|
23
87
|
|
24
88
|
# Keyword
|
@@ -26,12 +90,12 @@ module Puree
|
|
26
90
|
# @return [Array<String>]
|
27
91
|
def keyword
|
28
92
|
data = node 'keywordGroups'
|
93
|
+
data_arr = []
|
29
94
|
if !data.nil? && !data.empty?
|
30
95
|
data = data['keywordGroup']['keyword']['userDefinedKeyword']['freeKeyword']
|
31
|
-
data.is_a?(Array) ? data : data
|
32
|
-
else
|
33
|
-
[]
|
96
|
+
data.is_a?(Array) ? data_arr = data : data_arr << data
|
34
97
|
end
|
98
|
+
data_arr.uniq
|
35
99
|
end
|
36
100
|
|
37
101
|
# Description
|
@@ -39,17 +103,17 @@ module Puree
|
|
39
103
|
# @return [Array<String>]
|
40
104
|
def description
|
41
105
|
data = node 'descriptions'
|
106
|
+
data_arr = []
|
42
107
|
if !data.nil? && !data.empty?
|
43
108
|
data = data['classificationDefinedField']['value']['localizedString']['__content__'].tr("\n", '')
|
44
|
-
data.is_a?(Array) ? data : data
|
45
|
-
else
|
46
|
-
[]
|
109
|
+
data.is_a?(Array) ? data_arr = data : data_arr << data
|
47
110
|
end
|
111
|
+
data_arr.uniq
|
48
112
|
end
|
49
113
|
|
50
|
-
# Person
|
114
|
+
# Person (internal, external, other)
|
51
115
|
#
|
52
|
-
# @return [Hash
|
116
|
+
# @return [Hash]
|
53
117
|
def person
|
54
118
|
data = node('persons')
|
55
119
|
persons = {}
|
@@ -60,6 +124,7 @@ module Puree
|
|
60
124
|
end
|
61
125
|
internal_persons = []
|
62
126
|
external_persons = []
|
127
|
+
other_persons = []
|
63
128
|
case data
|
64
129
|
when Array
|
65
130
|
data.each do |d|
|
@@ -72,6 +137,10 @@ module Puree
|
|
72
137
|
person['uuid'] = d['externalPerson']['uuid']
|
73
138
|
external_persons << person
|
74
139
|
end
|
140
|
+
if !d.key?('person') && !d.key?('externalPerson')
|
141
|
+
person['uuid'] = ''
|
142
|
+
other_persons << person
|
143
|
+
end
|
75
144
|
end
|
76
145
|
when Hash
|
77
146
|
person = generic_person data
|
@@ -83,36 +152,18 @@ module Puree
|
|
83
152
|
person['uuid'] = data['externalPerson']['uuid']
|
84
153
|
external_persons << person
|
85
154
|
end
|
155
|
+
if !data.key?('person') && !data.key?('externalPerson')
|
156
|
+
person['uuid'] = ''
|
157
|
+
other_persons << person
|
158
|
+
end
|
86
159
|
end
|
87
|
-
persons['internal'] = internal_persons
|
88
|
-
persons['external'] = external_persons
|
160
|
+
persons['internal'] = internal_persons.uniq
|
161
|
+
persons['external'] = external_persons.uniq
|
162
|
+
persons['other'] = other_persons.uniq
|
89
163
|
persons
|
90
164
|
end
|
91
165
|
|
92
|
-
|
93
|
-
#
|
94
|
-
# @return [Array<Hash>]
|
95
|
-
def publication
|
96
|
-
data = node('relatedPublications')
|
97
|
-
publications = []
|
98
|
-
if !data.nil? && !data.empty?
|
99
|
-
# convert to array
|
100
|
-
data_arr = []
|
101
|
-
if data['relatedContent'].is_a?(Array)
|
102
|
-
data_arr = data['relatedContent']
|
103
|
-
else
|
104
|
-
data_arr[0] = data['relatedContent']
|
105
|
-
end
|
106
|
-
data_arr.each do |d|
|
107
|
-
o = {}
|
108
|
-
o['type'] = d['typeClassification']
|
109
|
-
o['title'] = d['title']
|
110
|
-
o['uuid'] = d['uuid']
|
111
|
-
publications << o
|
112
|
-
end
|
113
|
-
end
|
114
|
-
publications
|
115
|
-
end
|
166
|
+
|
116
167
|
|
117
168
|
# Date made available
|
118
169
|
#
|
@@ -129,7 +180,7 @@ module Puree
|
|
129
180
|
data = node 'geographicalCoverage'
|
130
181
|
if !data.nil? && !data.empty?
|
131
182
|
data = data['localizedString']["__content__"]
|
132
|
-
data.is_a?(Array) ? data : data.split(',')
|
183
|
+
data.is_a?(Array) ? data.uniq : data.split(',').map(&:strip).uniq
|
133
184
|
else
|
134
185
|
[]
|
135
186
|
end
|
@@ -139,26 +190,15 @@ module Puree
|
|
139
190
|
#
|
140
191
|
# @return [Hash]
|
141
192
|
def production
|
142
|
-
|
143
|
-
Puree::Date.normalise(data)
|
193
|
+
temporal_range 'dateOfDataProduction', 'endDateOfDataProduction'
|
144
194
|
end
|
145
195
|
|
196
|
+
|
146
197
|
# Temporal coverage
|
147
198
|
#
|
148
199
|
# @return [Hash]
|
149
200
|
def temporal
|
150
|
-
|
151
|
-
data['start'] = {}
|
152
|
-
data['end'] = {}
|
153
|
-
start_date = temporal_coverage_start_date
|
154
|
-
if !start_date.nil? && !start_date.empty?
|
155
|
-
data['start'] = start_date
|
156
|
-
end
|
157
|
-
end_date = temporal_coverage_end_date
|
158
|
-
if !end_date.nil? && !end_date.empty?
|
159
|
-
data['end'] = end_date
|
160
|
-
end
|
161
|
-
data
|
201
|
+
temporal_range 'temporalCoverageStartDate', 'temporalCoverageEndDate'
|
162
202
|
end
|
163
203
|
|
164
204
|
# Open access permission
|
@@ -210,7 +250,7 @@ module Puree
|
|
210
250
|
|
211
251
|
end
|
212
252
|
end
|
213
|
-
docs
|
253
|
+
docs.uniq
|
214
254
|
end
|
215
255
|
|
216
256
|
# Digital Object Identifier
|
@@ -233,15 +273,19 @@ module Puree
|
|
233
273
|
def metadata
|
234
274
|
o = {}
|
235
275
|
o['access'] = access
|
276
|
+
o['associated'] = associated
|
236
277
|
o['available'] = available
|
237
278
|
o['description'] = description
|
238
279
|
o['doi'] = doi
|
239
280
|
o['file'] = file
|
240
281
|
o['geographical'] = geographical
|
241
282
|
o['keyword'] = keyword
|
283
|
+
o['link'] = link
|
242
284
|
o['person'] = person
|
285
|
+
o['project'] = project
|
243
286
|
o['production'] = production
|
244
287
|
o['publication'] = publication
|
288
|
+
o['publisher'] = publisher
|
245
289
|
o['temporal'] = temporal
|
246
290
|
o['title'] = title
|
247
291
|
o
|
@@ -251,6 +295,8 @@ module Puree
|
|
251
295
|
|
252
296
|
private
|
253
297
|
|
298
|
+
|
299
|
+
|
254
300
|
# Assembles basic information about a person
|
255
301
|
#
|
256
302
|
# @param generic_data [Hash]
|
@@ -265,20 +311,57 @@ module Puree
|
|
265
311
|
person
|
266
312
|
end
|
267
313
|
|
314
|
+
# Temporal range
|
315
|
+
#
|
316
|
+
# @return [Hash]
|
317
|
+
def temporal_range(start_node, end_node)
|
318
|
+
data = {}
|
319
|
+
data['start'] = {}
|
320
|
+
data['end'] = {}
|
321
|
+
start_date = temporal_start_date start_node
|
322
|
+
if !start_date.nil? && !start_date.empty?
|
323
|
+
data['start'] = start_date
|
324
|
+
end
|
325
|
+
end_date = temporal_end_date end_node
|
326
|
+
if !end_date.nil? && !end_date.empty?
|
327
|
+
data['end'] = end_date
|
328
|
+
end
|
329
|
+
data
|
330
|
+
end
|
331
|
+
|
268
332
|
# Temporal coverage start date
|
269
333
|
#
|
270
334
|
# @return [Hash]
|
271
|
-
def
|
272
|
-
data = node
|
335
|
+
def temporal_start_date(start_node)
|
336
|
+
data = node start_node
|
273
337
|
!data.nil? && !data.empty? ? Puree::Date.normalise(data) : {}
|
274
338
|
end
|
275
339
|
|
276
340
|
# Temporal coverage end date
|
277
341
|
#
|
278
342
|
# @return [Hash]
|
279
|
-
def
|
280
|
-
data = node
|
343
|
+
def temporal_end_date(end_node)
|
344
|
+
data = node end_node
|
281
345
|
!data.nil? && !data.empty? ? Puree::Date.normalise(data) : {}
|
282
346
|
end
|
347
|
+
|
348
|
+
# Associated type
|
349
|
+
#
|
350
|
+
# @return [Hash]
|
351
|
+
def associated_type(type)
|
352
|
+
associated_arr = associated
|
353
|
+
data_arr = []
|
354
|
+
associated_arr.each do |i|
|
355
|
+
data = {}
|
356
|
+
if i['type'] === type
|
357
|
+
data['title'] = i['title']
|
358
|
+
data['uuid'] = i['uuid']
|
359
|
+
data_arr << data
|
360
|
+
end
|
361
|
+
end
|
362
|
+
data_arr
|
363
|
+
end
|
364
|
+
|
283
365
|
end
|
366
|
+
|
284
367
|
end
|
data/lib/puree/publication.rb
CHANGED
@@ -16,7 +16,7 @@ module Puree
|
|
16
16
|
xpath_result = xpath_query path
|
17
17
|
data_arr = []
|
18
18
|
xpath_result.each { |i| data_arr << i.text }
|
19
|
-
data_arr
|
19
|
+
data_arr.uniq
|
20
20
|
end
|
21
21
|
|
22
22
|
# Digital Object Identifier
|
@@ -44,7 +44,7 @@ module Puree
|
|
44
44
|
doc['url'] = d.xpath('url').text
|
45
45
|
docs << doc
|
46
46
|
end
|
47
|
-
docs
|
47
|
+
docs.uniq
|
48
48
|
end
|
49
49
|
|
50
50
|
# Title
|
@@ -55,7 +55,7 @@ module Puree
|
|
55
55
|
xpath_result = xpath_query path
|
56
56
|
data_arr = []
|
57
57
|
xpath_result.each { |i| data_arr << i.text }
|
58
|
-
data_arr
|
58
|
+
data_arr.uniq
|
59
59
|
end
|
60
60
|
|
61
61
|
# Subtitle
|
@@ -66,7 +66,7 @@ module Puree
|
|
66
66
|
xpath_result = xpath_query path
|
67
67
|
data_arr = []
|
68
68
|
xpath_result.each { |i| data_arr << i.text }
|
69
|
-
data_arr
|
69
|
+
data_arr.uniq
|
70
70
|
end
|
71
71
|
|
72
72
|
# All metadata
|
data/lib/puree/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: puree
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.10.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Adrian Albin-Clark
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-05-
|
11
|
+
date: 2016-05-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: httparty
|