puree 0.9.0 → 0.10.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e16d74fee89f71aae7593434a3bb8041e6ae9451
4
- data.tar.gz: 2180947af2af6b771305ac16712f447721f23f63
3
+ metadata.gz: 098c3fc6fe0dc9773261d20e4f2f8e021e9f8255
4
+ data.tar.gz: 49b91baf4a68f02a337b986c15f6fe35c67b4396
5
5
  SHA512:
6
- metadata.gz: f56565ef90e7d266adf52c7f6a346e7e7d885e25f42064353f8b2840d6c31654d044e98d0a48f8cb677fb94f35ad47cfb26488b7db0b562cdbfe530bc93a3a6f
7
- data.tar.gz: 360d2d041c5bb2c71739a245c4cfea4059c41c274cee2cae1289e3775bb9ab98c55297e59702e32137300e47da696d6cc17b5dd89fc4dcec261e766de46b2e47
6
+ metadata.gz: d3670c89f2adb4ff43f1c262ab10dbb140b4ab5438103a267f63524894292a760e41fc4af0839243cb61688608375b1c996a0960fe31e902b483861dddd1c723
7
+ data.tar.gz: c962958c92997235fc5730e310e15593cdd9d3ad6c7ab9db7c60db7ef61577c0abac86cd3ee359385354753a4ff899a2dd46d397413e0e624fc56f53bfc8d6b1
@@ -2,6 +2,12 @@
2
2
  All notable changes to this project will be documented in this file.
3
3
  This project adheres to [Semantic Versioning](http://semver.org/).
4
4
 
5
+ ## 0.10.0 - 2016-05-17
6
+ ### Added
7
+ - Dataset metadata (associated, link, project, production as range, person for those without uuid, publication for all research outputs, publisher).
8
+ ### Fixed
9
+ - Dataset metadata (description splitting, geographical stripping).
10
+
5
11
  ## 0.9.0 - 2016-05-16
6
12
  ### Added
7
13
  - Dataset metadata (production).
data/README.md CHANGED
@@ -41,6 +41,7 @@ d.get uuid: 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx',
41
41
 
42
42
  # Filter metadata into simple data structures
43
43
  d.access
44
+ d.associated
44
45
  d.available
45
46
  d.description
46
47
  d.doi
@@ -48,6 +49,7 @@ d.file
48
49
  d.geographical
49
50
  d.keyword
50
51
  d.person
52
+ d.project
51
53
  d.production
52
54
  d.publication
53
55
  d.temporal
@@ -123,8 +125,20 @@ An array of files.
123
125
  ]
124
126
  ```
125
127
 
128
+ ### link
129
+ An array of links.
130
+
131
+ ```ruby
132
+ [
133
+ {
134
+ "url": "http://www.example.com/~abc1234/xyz/",
135
+ "description": "An interesting description"
136
+ },
137
+ ]
138
+ ```
139
+
126
140
  ### person
127
- Contains an array of internal persons and an array of external persons.
141
+ Contains an array of internal persons, an array of external persons and an array of other persons.
128
142
 
129
143
  ```ruby
130
144
  {
@@ -139,20 +153,65 @@ Contains an array of internal persons and an array of external persons.
139
153
  },
140
154
  ],
141
155
  "external"=>[
156
+ ],
157
+ "other"=>[
158
+ "name"=>{
159
+ "first"=>"Hal",
160
+ "last"=>"Roach"
161
+ },
162
+ "role"=>"Contributor",
163
+ "uuid"=>""
142
164
  ]
143
165
  }
144
166
  ```
145
167
 
168
+ ### project
169
+ An array of projects associated with the dataset.
170
+
171
+ ```ruby
172
+ [
173
+ {
174
+ "title": "An interesting project title",
175
+ "uuid": "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx"
176
+ },
177
+ ]
178
+ ```
179
+
146
180
  ### publication
147
- An array of related publications.
181
+ An array of research outputs associated with the dataset.
148
182
 
149
183
  ```ruby
150
184
  [
151
185
  {
152
- "type"=>"Journal article",
153
- "title"=>"An interesting title",
154
- "uuid"=>"xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx"
186
+ "type": "Journal article",
187
+ "title": "An interesting journal article title",
188
+ "uuid": "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx"
189
+ },
190
+ {
191
+ "type": "Conference paper",
192
+ "title": "An interesting conference paper title",
193
+ "uuid": "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx"
194
+ },
195
+ {
196
+ "type": "Working paper",
197
+ "title": "An interesting working paper title",
198
+ "uuid": "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx"
199
+ },
200
+ {
201
+ "type": "Paper",
202
+ "title": "An interesting paper title",
203
+ "uuid": "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx"
155
204
  },
205
+ {
206
+ "type": "Dataset",
207
+ "title": "An interesting dataset title",
208
+ "uuid": "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx"
209
+ },
210
+ {
211
+ "type": "Chapter",
212
+ "title": "An interesting chapter title",
213
+ "uuid": "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx"
214
+ }
156
215
  ]
157
216
  ```
158
217
 
@@ -8,17 +8,81 @@ module Puree
8
8
  super(:dataset)
9
9
  end
10
10
 
11
+ # Link
12
+ #
13
+ # @return [Array<Hash>]
14
+ def link
15
+ path = '//links/link'
16
+ xpath_result = xpath_query path
17
+ data = []
18
+ xpath_result.each { |i|
19
+ o = {}
20
+ o['url'] = i.xpath('url').text
21
+ o['description'] = i.xpath('description').text
22
+ data << o
23
+ }
24
+ return data.uniq
25
+ end
26
+
27
+ # Publisher
28
+ #
29
+ # @return [String]
30
+ def publisher
31
+ path = '//publisher/name'
32
+ xpath_result = xpath_query path
33
+ xpath_result ? xpath_result.text : ''
34
+ end
35
+
36
+ # Combines project and publication
37
+ #
38
+ # @return [Hash]
39
+ def associated
40
+ path = '//associatedContent//relatedContent'
41
+ xpath_result = xpath_query path
42
+ data_arr = []
43
+ xpath_result.each { |i|
44
+ data = {}
45
+ data['type'] = i.xpath('typeClassification').text
46
+ data['title'] = i.xpath('title').text
47
+ data['uuid'] = i.attr('uuid')
48
+ data_arr << data
49
+ }
50
+ data_arr.uniq
51
+ end
52
+
53
+
54
+ # Project
55
+ #
56
+ # @return [Array<Hash>]
57
+ def project
58
+ associated_type('Research').uniq
59
+ end
60
+
61
+ # Publication
62
+ #
63
+ # @return [Array<Hash>]
64
+ def publication
65
+ data_arr = []
66
+ associated.each do |i|
67
+ if i['type'] != 'Research'
68
+ data_arr << i
69
+ end
70
+ end
71
+ data_arr.uniq
72
+ end
73
+
74
+
11
75
  # Title
12
76
  #
13
77
  # @return [Array<String>]
14
78
  def title
15
79
  data = node 'title'
80
+ data_arr = []
16
81
  if !data.nil? && !data.empty?
17
82
  data = data['localizedString']["__content__"]
18
- data.is_a?(Array) ? data : data.split(',')
19
- else
20
- []
83
+ data.is_a?(Array) ? data_arr = data : data_arr << data
21
84
  end
85
+ data_arr.uniq
22
86
  end
23
87
 
24
88
  # Keyword
@@ -26,12 +90,12 @@ module Puree
26
90
  # @return [Array<String>]
27
91
  def keyword
28
92
  data = node 'keywordGroups'
93
+ data_arr = []
29
94
  if !data.nil? && !data.empty?
30
95
  data = data['keywordGroup']['keyword']['userDefinedKeyword']['freeKeyword']
31
- data.is_a?(Array) ? data : data.split(',')
32
- else
33
- []
96
+ data.is_a?(Array) ? data_arr = data : data_arr << data
34
97
  end
98
+ data_arr.uniq
35
99
  end
36
100
 
37
101
  # Description
@@ -39,17 +103,17 @@ module Puree
39
103
  # @return [Array<String>]
40
104
  def description
41
105
  data = node 'descriptions'
106
+ data_arr = []
42
107
  if !data.nil? && !data.empty?
43
108
  data = data['classificationDefinedField']['value']['localizedString']['__content__'].tr("\n", '')
44
- data.is_a?(Array) ? data : data.split(',')
45
- else
46
- []
109
+ data.is_a?(Array) ? data_arr = data : data_arr << data
47
110
  end
111
+ data_arr.uniq
48
112
  end
49
113
 
50
- # Person, internal and external
114
+ # Person (internal, external, other)
51
115
  #
52
- # @return [Hash<Array, Array>]
116
+ # @return [Hash]
53
117
  def person
54
118
  data = node('persons')
55
119
  persons = {}
@@ -60,6 +124,7 @@ module Puree
60
124
  end
61
125
  internal_persons = []
62
126
  external_persons = []
127
+ other_persons = []
63
128
  case data
64
129
  when Array
65
130
  data.each do |d|
@@ -72,6 +137,10 @@ module Puree
72
137
  person['uuid'] = d['externalPerson']['uuid']
73
138
  external_persons << person
74
139
  end
140
+ if !d.key?('person') && !d.key?('externalPerson')
141
+ person['uuid'] = ''
142
+ other_persons << person
143
+ end
75
144
  end
76
145
  when Hash
77
146
  person = generic_person data
@@ -83,36 +152,18 @@ module Puree
83
152
  person['uuid'] = data['externalPerson']['uuid']
84
153
  external_persons << person
85
154
  end
155
+ if !data.key?('person') && !data.key?('externalPerson')
156
+ person['uuid'] = ''
157
+ other_persons << person
158
+ end
86
159
  end
87
- persons['internal'] = internal_persons
88
- persons['external'] = external_persons
160
+ persons['internal'] = internal_persons.uniq
161
+ persons['external'] = external_persons.uniq
162
+ persons['other'] = other_persons.uniq
89
163
  persons
90
164
  end
91
165
 
92
- # Publication
93
- #
94
- # @return [Array<Hash>]
95
- def publication
96
- data = node('relatedPublications')
97
- publications = []
98
- if !data.nil? && !data.empty?
99
- # convert to array
100
- data_arr = []
101
- if data['relatedContent'].is_a?(Array)
102
- data_arr = data['relatedContent']
103
- else
104
- data_arr[0] = data['relatedContent']
105
- end
106
- data_arr.each do |d|
107
- o = {}
108
- o['type'] = d['typeClassification']
109
- o['title'] = d['title']
110
- o['uuid'] = d['uuid']
111
- publications << o
112
- end
113
- end
114
- publications
115
- end
166
+
116
167
 
117
168
  # Date made available
118
169
  #
@@ -129,7 +180,7 @@ module Puree
129
180
  data = node 'geographicalCoverage'
130
181
  if !data.nil? && !data.empty?
131
182
  data = data['localizedString']["__content__"]
132
- data.is_a?(Array) ? data : data.split(',')
183
+ data.is_a?(Array) ? data.uniq : data.split(',').map(&:strip).uniq
133
184
  else
134
185
  []
135
186
  end
@@ -139,26 +190,15 @@ module Puree
139
190
  #
140
191
  # @return [Hash]
141
192
  def production
142
- data = node('dateOfDataProduction')
143
- Puree::Date.normalise(data)
193
+ temporal_range 'dateOfDataProduction', 'endDateOfDataProduction'
144
194
  end
145
195
 
196
+
146
197
  # Temporal coverage
147
198
  #
148
199
  # @return [Hash]
149
200
  def temporal
150
- data = {}
151
- data['start'] = {}
152
- data['end'] = {}
153
- start_date = temporal_coverage_start_date
154
- if !start_date.nil? && !start_date.empty?
155
- data['start'] = start_date
156
- end
157
- end_date = temporal_coverage_end_date
158
- if !end_date.nil? && !end_date.empty?
159
- data['end'] = end_date
160
- end
161
- data
201
+ temporal_range 'temporalCoverageStartDate', 'temporalCoverageEndDate'
162
202
  end
163
203
 
164
204
  # Open access permission
@@ -210,7 +250,7 @@ module Puree
210
250
 
211
251
  end
212
252
  end
213
- docs
253
+ docs.uniq
214
254
  end
215
255
 
216
256
  # Digital Object Identifier
@@ -233,15 +273,19 @@ module Puree
233
273
  def metadata
234
274
  o = {}
235
275
  o['access'] = access
276
+ o['associated'] = associated
236
277
  o['available'] = available
237
278
  o['description'] = description
238
279
  o['doi'] = doi
239
280
  o['file'] = file
240
281
  o['geographical'] = geographical
241
282
  o['keyword'] = keyword
283
+ o['link'] = link
242
284
  o['person'] = person
285
+ o['project'] = project
243
286
  o['production'] = production
244
287
  o['publication'] = publication
288
+ o['publisher'] = publisher
245
289
  o['temporal'] = temporal
246
290
  o['title'] = title
247
291
  o
@@ -251,6 +295,8 @@ module Puree
251
295
 
252
296
  private
253
297
 
298
+
299
+
254
300
  # Assembles basic information about a person
255
301
  #
256
302
  # @param generic_data [Hash]
@@ -265,20 +311,57 @@ module Puree
265
311
  person
266
312
  end
267
313
 
314
+ # Temporal range
315
+ #
316
+ # @return [Hash]
317
+ def temporal_range(start_node, end_node)
318
+ data = {}
319
+ data['start'] = {}
320
+ data['end'] = {}
321
+ start_date = temporal_start_date start_node
322
+ if !start_date.nil? && !start_date.empty?
323
+ data['start'] = start_date
324
+ end
325
+ end_date = temporal_end_date end_node
326
+ if !end_date.nil? && !end_date.empty?
327
+ data['end'] = end_date
328
+ end
329
+ data
330
+ end
331
+
268
332
  # Temporal coverage start date
269
333
  #
270
334
  # @return [Hash]
271
- def temporal_coverage_start_date
272
- data = node('temporalCoverageStartDate')
335
+ def temporal_start_date(start_node)
336
+ data = node start_node
273
337
  !data.nil? && !data.empty? ? Puree::Date.normalise(data) : {}
274
338
  end
275
339
 
276
340
  # Temporal coverage end date
277
341
  #
278
342
  # @return [Hash]
279
- def temporal_coverage_end_date
280
- data = node('temporalCoverageEndDate')
343
+ def temporal_end_date(end_node)
344
+ data = node end_node
281
345
  !data.nil? && !data.empty? ? Puree::Date.normalise(data) : {}
282
346
  end
347
+
348
+ # Associated type
349
+ #
350
+ # @return [Hash]
351
+ def associated_type(type)
352
+ associated_arr = associated
353
+ data_arr = []
354
+ associated_arr.each do |i|
355
+ data = {}
356
+ if i['type'] === type
357
+ data['title'] = i['title']
358
+ data['uuid'] = i['uuid']
359
+ data_arr << data
360
+ end
361
+ end
362
+ data_arr
363
+ end
364
+
283
365
  end
366
+
284
367
  end
@@ -16,7 +16,7 @@ module Puree
16
16
  xpath_result = xpath_query path
17
17
  data_arr = []
18
18
  xpath_result.each { |i| data_arr << i.text }
19
- data_arr
19
+ data_arr.uniq
20
20
  end
21
21
 
22
22
  # Digital Object Identifier
@@ -44,7 +44,7 @@ module Puree
44
44
  doc['url'] = d.xpath('url').text
45
45
  docs << doc
46
46
  end
47
- docs
47
+ docs.uniq
48
48
  end
49
49
 
50
50
  # Title
@@ -55,7 +55,7 @@ module Puree
55
55
  xpath_result = xpath_query path
56
56
  data_arr = []
57
57
  xpath_result.each { |i| data_arr << i.text }
58
- data_arr
58
+ data_arr.uniq
59
59
  end
60
60
 
61
61
  # Subtitle
@@ -66,7 +66,7 @@ module Puree
66
66
  xpath_result = xpath_query path
67
67
  data_arr = []
68
68
  xpath_result.each { |i| data_arr << i.text }
69
- data_arr
69
+ data_arr.uniq
70
70
  end
71
71
 
72
72
  # All metadata
@@ -1,3 +1,3 @@
1
1
  module Puree
2
- VERSION = "0.9.0"
2
+ VERSION = "0.10.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: puree
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.0
4
+ version: 0.10.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Adrian Albin-Clark
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-05-16 00:00:00.000000000 Z
11
+ date: 2016-05-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: httparty