puree 0.9.0 → 0.10.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +64 -5
- data/lib/puree/dataset.rb +140 -57
- data/lib/puree/publication.rb +4 -4
- data/lib/puree/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 098c3fc6fe0dc9773261d20e4f2f8e021e9f8255
|
4
|
+
data.tar.gz: 49b91baf4a68f02a337b986c15f6fe35c67b4396
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d3670c89f2adb4ff43f1c262ab10dbb140b4ab5438103a267f63524894292a760e41fc4af0839243cb61688608375b1c996a0960fe31e902b483861dddd1c723
|
7
|
+
data.tar.gz: c962958c92997235fc5730e310e15593cdd9d3ad6c7ab9db7c60db7ef61577c0abac86cd3ee359385354753a4ff899a2dd46d397413e0e624fc56f53bfc8d6b1
|
data/CHANGELOG.md
CHANGED
@@ -2,6 +2,12 @@
|
|
2
2
|
All notable changes to this project will be documented in this file.
|
3
3
|
This project adheres to [Semantic Versioning](http://semver.org/).
|
4
4
|
|
5
|
+
## 0.10.0 - 2016-05-17
|
6
|
+
### Added
|
7
|
+
- Dataset metadata (associated, link, project, production as range, person for those without uuid, publication for all research outputs, publisher).
|
8
|
+
### Fixed
|
9
|
+
- Dataset metadata (description splitting, geographical stripping).
|
10
|
+
|
5
11
|
## 0.9.0 - 2016-05-16
|
6
12
|
### Added
|
7
13
|
- Dataset metadata (production).
|
data/README.md
CHANGED
@@ -41,6 +41,7 @@ d.get uuid: 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx',
|
|
41
41
|
|
42
42
|
# Filter metadata into simple data structures
|
43
43
|
d.access
|
44
|
+
d.associated
|
44
45
|
d.available
|
45
46
|
d.description
|
46
47
|
d.doi
|
@@ -48,6 +49,7 @@ d.file
|
|
48
49
|
d.geographical
|
49
50
|
d.keyword
|
50
51
|
d.person
|
52
|
+
d.project
|
51
53
|
d.production
|
52
54
|
d.publication
|
53
55
|
d.temporal
|
@@ -123,8 +125,20 @@ An array of files.
|
|
123
125
|
]
|
124
126
|
```
|
125
127
|
|
128
|
+
### link
|
129
|
+
An array of links.
|
130
|
+
|
131
|
+
```ruby
|
132
|
+
[
|
133
|
+
{
|
134
|
+
"url": "http://www.example.com/~abc1234/xyz/",
|
135
|
+
"description": "An interesting description"
|
136
|
+
},
|
137
|
+
]
|
138
|
+
```
|
139
|
+
|
126
140
|
### person
|
127
|
-
Contains an array of internal persons and an array of
|
141
|
+
Contains an array of internal persons, an array of external persons and an array of other persons.
|
128
142
|
|
129
143
|
```ruby
|
130
144
|
{
|
@@ -139,20 +153,65 @@ Contains an array of internal persons and an array of external persons.
|
|
139
153
|
},
|
140
154
|
],
|
141
155
|
"external"=>[
|
156
|
+
],
|
157
|
+
"other"=>[
|
158
|
+
"name"=>{
|
159
|
+
"first"=>"Hal",
|
160
|
+
"last"=>"Roach"
|
161
|
+
},
|
162
|
+
"role"=>"Contributor",
|
163
|
+
"uuid"=>""
|
142
164
|
]
|
143
165
|
}
|
144
166
|
```
|
145
167
|
|
168
|
+
### project
|
169
|
+
An array of projects associated with the dataset.
|
170
|
+
|
171
|
+
```ruby
|
172
|
+
[
|
173
|
+
{
|
174
|
+
"title": "An interesting project title",
|
175
|
+
"uuid": "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx"
|
176
|
+
},
|
177
|
+
]
|
178
|
+
```
|
179
|
+
|
146
180
|
### publication
|
147
|
-
An array of
|
181
|
+
An array of research outputs associated with the dataset.
|
148
182
|
|
149
183
|
```ruby
|
150
184
|
[
|
151
185
|
{
|
152
|
-
"type"
|
153
|
-
"title"
|
154
|
-
"uuid"
|
186
|
+
"type": "Journal article",
|
187
|
+
"title": "An interesting journal article title",
|
188
|
+
"uuid": "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx"
|
189
|
+
},
|
190
|
+
{
|
191
|
+
"type": "Conference paper",
|
192
|
+
"title": "An interesting conference paper title",
|
193
|
+
"uuid": "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx"
|
194
|
+
},
|
195
|
+
{
|
196
|
+
"type": "Working paper",
|
197
|
+
"title": "An interesting working paper title",
|
198
|
+
"uuid": "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx"
|
199
|
+
},
|
200
|
+
{
|
201
|
+
"type": "Paper",
|
202
|
+
"title": "An interesting paper title",
|
203
|
+
"uuid": "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx"
|
155
204
|
},
|
205
|
+
{
|
206
|
+
"type": "Dataset",
|
207
|
+
"title": "An interesting dataset title",
|
208
|
+
"uuid": "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx"
|
209
|
+
},
|
210
|
+
{
|
211
|
+
"type": "Chapter",
|
212
|
+
"title": "An interesting chapter title",
|
213
|
+
"uuid": "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx"
|
214
|
+
}
|
156
215
|
]
|
157
216
|
```
|
158
217
|
|
data/lib/puree/dataset.rb
CHANGED
@@ -8,17 +8,81 @@ module Puree
|
|
8
8
|
super(:dataset)
|
9
9
|
end
|
10
10
|
|
11
|
+
# Link
|
12
|
+
#
|
13
|
+
# @return [Array<Hash>]
|
14
|
+
def link
|
15
|
+
path = '//links/link'
|
16
|
+
xpath_result = xpath_query path
|
17
|
+
data = []
|
18
|
+
xpath_result.each { |i|
|
19
|
+
o = {}
|
20
|
+
o['url'] = i.xpath('url').text
|
21
|
+
o['description'] = i.xpath('description').text
|
22
|
+
data << o
|
23
|
+
}
|
24
|
+
return data.uniq
|
25
|
+
end
|
26
|
+
|
27
|
+
# Publisher
|
28
|
+
#
|
29
|
+
# @return [String]
|
30
|
+
def publisher
|
31
|
+
path = '//publisher/name'
|
32
|
+
xpath_result = xpath_query path
|
33
|
+
xpath_result ? xpath_result.text : ''
|
34
|
+
end
|
35
|
+
|
36
|
+
# Combines project and publication
|
37
|
+
#
|
38
|
+
# @return [Hash]
|
39
|
+
def associated
|
40
|
+
path = '//associatedContent//relatedContent'
|
41
|
+
xpath_result = xpath_query path
|
42
|
+
data_arr = []
|
43
|
+
xpath_result.each { |i|
|
44
|
+
data = {}
|
45
|
+
data['type'] = i.xpath('typeClassification').text
|
46
|
+
data['title'] = i.xpath('title').text
|
47
|
+
data['uuid'] = i.attr('uuid')
|
48
|
+
data_arr << data
|
49
|
+
}
|
50
|
+
data_arr.uniq
|
51
|
+
end
|
52
|
+
|
53
|
+
|
54
|
+
# Project
|
55
|
+
#
|
56
|
+
# @return [Array<Hash>]
|
57
|
+
def project
|
58
|
+
associated_type('Research').uniq
|
59
|
+
end
|
60
|
+
|
61
|
+
# Publication
|
62
|
+
#
|
63
|
+
# @return [Array<Hash>]
|
64
|
+
def publication
|
65
|
+
data_arr = []
|
66
|
+
associated.each do |i|
|
67
|
+
if i['type'] != 'Research'
|
68
|
+
data_arr << i
|
69
|
+
end
|
70
|
+
end
|
71
|
+
data_arr.uniq
|
72
|
+
end
|
73
|
+
|
74
|
+
|
11
75
|
# Title
|
12
76
|
#
|
13
77
|
# @return [Array<String>]
|
14
78
|
def title
|
15
79
|
data = node 'title'
|
80
|
+
data_arr = []
|
16
81
|
if !data.nil? && !data.empty?
|
17
82
|
data = data['localizedString']["__content__"]
|
18
|
-
data.is_a?(Array) ? data : data
|
19
|
-
else
|
20
|
-
[]
|
83
|
+
data.is_a?(Array) ? data_arr = data : data_arr << data
|
21
84
|
end
|
85
|
+
data_arr.uniq
|
22
86
|
end
|
23
87
|
|
24
88
|
# Keyword
|
@@ -26,12 +90,12 @@ module Puree
|
|
26
90
|
# @return [Array<String>]
|
27
91
|
def keyword
|
28
92
|
data = node 'keywordGroups'
|
93
|
+
data_arr = []
|
29
94
|
if !data.nil? && !data.empty?
|
30
95
|
data = data['keywordGroup']['keyword']['userDefinedKeyword']['freeKeyword']
|
31
|
-
data.is_a?(Array) ? data : data
|
32
|
-
else
|
33
|
-
[]
|
96
|
+
data.is_a?(Array) ? data_arr = data : data_arr << data
|
34
97
|
end
|
98
|
+
data_arr.uniq
|
35
99
|
end
|
36
100
|
|
37
101
|
# Description
|
@@ -39,17 +103,17 @@ module Puree
|
|
39
103
|
# @return [Array<String>]
|
40
104
|
def description
|
41
105
|
data = node 'descriptions'
|
106
|
+
data_arr = []
|
42
107
|
if !data.nil? && !data.empty?
|
43
108
|
data = data['classificationDefinedField']['value']['localizedString']['__content__'].tr("\n", '')
|
44
|
-
data.is_a?(Array) ? data : data
|
45
|
-
else
|
46
|
-
[]
|
109
|
+
data.is_a?(Array) ? data_arr = data : data_arr << data
|
47
110
|
end
|
111
|
+
data_arr.uniq
|
48
112
|
end
|
49
113
|
|
50
|
-
# Person
|
114
|
+
# Person (internal, external, other)
|
51
115
|
#
|
52
|
-
# @return [Hash
|
116
|
+
# @return [Hash]
|
53
117
|
def person
|
54
118
|
data = node('persons')
|
55
119
|
persons = {}
|
@@ -60,6 +124,7 @@ module Puree
|
|
60
124
|
end
|
61
125
|
internal_persons = []
|
62
126
|
external_persons = []
|
127
|
+
other_persons = []
|
63
128
|
case data
|
64
129
|
when Array
|
65
130
|
data.each do |d|
|
@@ -72,6 +137,10 @@ module Puree
|
|
72
137
|
person['uuid'] = d['externalPerson']['uuid']
|
73
138
|
external_persons << person
|
74
139
|
end
|
140
|
+
if !d.key?('person') && !d.key?('externalPerson')
|
141
|
+
person['uuid'] = ''
|
142
|
+
other_persons << person
|
143
|
+
end
|
75
144
|
end
|
76
145
|
when Hash
|
77
146
|
person = generic_person data
|
@@ -83,36 +152,18 @@ module Puree
|
|
83
152
|
person['uuid'] = data['externalPerson']['uuid']
|
84
153
|
external_persons << person
|
85
154
|
end
|
155
|
+
if !data.key?('person') && !data.key?('externalPerson')
|
156
|
+
person['uuid'] = ''
|
157
|
+
other_persons << person
|
158
|
+
end
|
86
159
|
end
|
87
|
-
persons['internal'] = internal_persons
|
88
|
-
persons['external'] = external_persons
|
160
|
+
persons['internal'] = internal_persons.uniq
|
161
|
+
persons['external'] = external_persons.uniq
|
162
|
+
persons['other'] = other_persons.uniq
|
89
163
|
persons
|
90
164
|
end
|
91
165
|
|
92
|
-
|
93
|
-
#
|
94
|
-
# @return [Array<Hash>]
|
95
|
-
def publication
|
96
|
-
data = node('relatedPublications')
|
97
|
-
publications = []
|
98
|
-
if !data.nil? && !data.empty?
|
99
|
-
# convert to array
|
100
|
-
data_arr = []
|
101
|
-
if data['relatedContent'].is_a?(Array)
|
102
|
-
data_arr = data['relatedContent']
|
103
|
-
else
|
104
|
-
data_arr[0] = data['relatedContent']
|
105
|
-
end
|
106
|
-
data_arr.each do |d|
|
107
|
-
o = {}
|
108
|
-
o['type'] = d['typeClassification']
|
109
|
-
o['title'] = d['title']
|
110
|
-
o['uuid'] = d['uuid']
|
111
|
-
publications << o
|
112
|
-
end
|
113
|
-
end
|
114
|
-
publications
|
115
|
-
end
|
166
|
+
|
116
167
|
|
117
168
|
# Date made available
|
118
169
|
#
|
@@ -129,7 +180,7 @@ module Puree
|
|
129
180
|
data = node 'geographicalCoverage'
|
130
181
|
if !data.nil? && !data.empty?
|
131
182
|
data = data['localizedString']["__content__"]
|
132
|
-
data.is_a?(Array) ? data : data.split(',')
|
183
|
+
data.is_a?(Array) ? data.uniq : data.split(',').map(&:strip).uniq
|
133
184
|
else
|
134
185
|
[]
|
135
186
|
end
|
@@ -139,26 +190,15 @@ module Puree
|
|
139
190
|
#
|
140
191
|
# @return [Hash]
|
141
192
|
def production
|
142
|
-
|
143
|
-
Puree::Date.normalise(data)
|
193
|
+
temporal_range 'dateOfDataProduction', 'endDateOfDataProduction'
|
144
194
|
end
|
145
195
|
|
196
|
+
|
146
197
|
# Temporal coverage
|
147
198
|
#
|
148
199
|
# @return [Hash]
|
149
200
|
def temporal
|
150
|
-
|
151
|
-
data['start'] = {}
|
152
|
-
data['end'] = {}
|
153
|
-
start_date = temporal_coverage_start_date
|
154
|
-
if !start_date.nil? && !start_date.empty?
|
155
|
-
data['start'] = start_date
|
156
|
-
end
|
157
|
-
end_date = temporal_coverage_end_date
|
158
|
-
if !end_date.nil? && !end_date.empty?
|
159
|
-
data['end'] = end_date
|
160
|
-
end
|
161
|
-
data
|
201
|
+
temporal_range 'temporalCoverageStartDate', 'temporalCoverageEndDate'
|
162
202
|
end
|
163
203
|
|
164
204
|
# Open access permission
|
@@ -210,7 +250,7 @@ module Puree
|
|
210
250
|
|
211
251
|
end
|
212
252
|
end
|
213
|
-
docs
|
253
|
+
docs.uniq
|
214
254
|
end
|
215
255
|
|
216
256
|
# Digital Object Identifier
|
@@ -233,15 +273,19 @@ module Puree
|
|
233
273
|
def metadata
|
234
274
|
o = {}
|
235
275
|
o['access'] = access
|
276
|
+
o['associated'] = associated
|
236
277
|
o['available'] = available
|
237
278
|
o['description'] = description
|
238
279
|
o['doi'] = doi
|
239
280
|
o['file'] = file
|
240
281
|
o['geographical'] = geographical
|
241
282
|
o['keyword'] = keyword
|
283
|
+
o['link'] = link
|
242
284
|
o['person'] = person
|
285
|
+
o['project'] = project
|
243
286
|
o['production'] = production
|
244
287
|
o['publication'] = publication
|
288
|
+
o['publisher'] = publisher
|
245
289
|
o['temporal'] = temporal
|
246
290
|
o['title'] = title
|
247
291
|
o
|
@@ -251,6 +295,8 @@ module Puree
|
|
251
295
|
|
252
296
|
private
|
253
297
|
|
298
|
+
|
299
|
+
|
254
300
|
# Assembles basic information about a person
|
255
301
|
#
|
256
302
|
# @param generic_data [Hash]
|
@@ -265,20 +311,57 @@ module Puree
|
|
265
311
|
person
|
266
312
|
end
|
267
313
|
|
314
|
+
# Temporal range
|
315
|
+
#
|
316
|
+
# @return [Hash]
|
317
|
+
def temporal_range(start_node, end_node)
|
318
|
+
data = {}
|
319
|
+
data['start'] = {}
|
320
|
+
data['end'] = {}
|
321
|
+
start_date = temporal_start_date start_node
|
322
|
+
if !start_date.nil? && !start_date.empty?
|
323
|
+
data['start'] = start_date
|
324
|
+
end
|
325
|
+
end_date = temporal_end_date end_node
|
326
|
+
if !end_date.nil? && !end_date.empty?
|
327
|
+
data['end'] = end_date
|
328
|
+
end
|
329
|
+
data
|
330
|
+
end
|
331
|
+
|
268
332
|
# Temporal coverage start date
|
269
333
|
#
|
270
334
|
# @return [Hash]
|
271
|
-
def
|
272
|
-
data = node
|
335
|
+
def temporal_start_date(start_node)
|
336
|
+
data = node start_node
|
273
337
|
!data.nil? && !data.empty? ? Puree::Date.normalise(data) : {}
|
274
338
|
end
|
275
339
|
|
276
340
|
# Temporal coverage end date
|
277
341
|
#
|
278
342
|
# @return [Hash]
|
279
|
-
def
|
280
|
-
data = node
|
343
|
+
def temporal_end_date(end_node)
|
344
|
+
data = node end_node
|
281
345
|
!data.nil? && !data.empty? ? Puree::Date.normalise(data) : {}
|
282
346
|
end
|
347
|
+
|
348
|
+
# Associated type
|
349
|
+
#
|
350
|
+
# @return [Hash]
|
351
|
+
def associated_type(type)
|
352
|
+
associated_arr = associated
|
353
|
+
data_arr = []
|
354
|
+
associated_arr.each do |i|
|
355
|
+
data = {}
|
356
|
+
if i['type'] === type
|
357
|
+
data['title'] = i['title']
|
358
|
+
data['uuid'] = i['uuid']
|
359
|
+
data_arr << data
|
360
|
+
end
|
361
|
+
end
|
362
|
+
data_arr
|
363
|
+
end
|
364
|
+
|
283
365
|
end
|
366
|
+
|
284
367
|
end
|
data/lib/puree/publication.rb
CHANGED
@@ -16,7 +16,7 @@ module Puree
|
|
16
16
|
xpath_result = xpath_query path
|
17
17
|
data_arr = []
|
18
18
|
xpath_result.each { |i| data_arr << i.text }
|
19
|
-
data_arr
|
19
|
+
data_arr.uniq
|
20
20
|
end
|
21
21
|
|
22
22
|
# Digital Object Identifier
|
@@ -44,7 +44,7 @@ module Puree
|
|
44
44
|
doc['url'] = d.xpath('url').text
|
45
45
|
docs << doc
|
46
46
|
end
|
47
|
-
docs
|
47
|
+
docs.uniq
|
48
48
|
end
|
49
49
|
|
50
50
|
# Title
|
@@ -55,7 +55,7 @@ module Puree
|
|
55
55
|
xpath_result = xpath_query path
|
56
56
|
data_arr = []
|
57
57
|
xpath_result.each { |i| data_arr << i.text }
|
58
|
-
data_arr
|
58
|
+
data_arr.uniq
|
59
59
|
end
|
60
60
|
|
61
61
|
# Subtitle
|
@@ -66,7 +66,7 @@ module Puree
|
|
66
66
|
xpath_result = xpath_query path
|
67
67
|
data_arr = []
|
68
68
|
xpath_result.each { |i| data_arr << i.text }
|
69
|
-
data_arr
|
69
|
+
data_arr.uniq
|
70
70
|
end
|
71
71
|
|
72
72
|
# All metadata
|
data/lib/puree/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: puree
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.10.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Adrian Albin-Clark
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-05-
|
11
|
+
date: 2016-05-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: httparty
|