puree 0.9.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e16d74fee89f71aae7593434a3bb8041e6ae9451
4
- data.tar.gz: 2180947af2af6b771305ac16712f447721f23f63
3
+ metadata.gz: 098c3fc6fe0dc9773261d20e4f2f8e021e9f8255
4
+ data.tar.gz: 49b91baf4a68f02a337b986c15f6fe35c67b4396
5
5
  SHA512:
6
- metadata.gz: f56565ef90e7d266adf52c7f6a346e7e7d885e25f42064353f8b2840d6c31654d044e98d0a48f8cb677fb94f35ad47cfb26488b7db0b562cdbfe530bc93a3a6f
7
- data.tar.gz: 360d2d041c5bb2c71739a245c4cfea4059c41c274cee2cae1289e3775bb9ab98c55297e59702e32137300e47da696d6cc17b5dd89fc4dcec261e766de46b2e47
6
+ metadata.gz: d3670c89f2adb4ff43f1c262ab10dbb140b4ab5438103a267f63524894292a760e41fc4af0839243cb61688608375b1c996a0960fe31e902b483861dddd1c723
7
+ data.tar.gz: c962958c92997235fc5730e310e15593cdd9d3ad6c7ab9db7c60db7ef61577c0abac86cd3ee359385354753a4ff899a2dd46d397413e0e624fc56f53bfc8d6b1
@@ -2,6 +2,12 @@
2
2
  All notable changes to this project will be documented in this file.
3
3
  This project adheres to [Semantic Versioning](http://semver.org/).
4
4
 
5
+ ## 0.10.0 - 2016-05-17
6
+ ### Added
7
+ - Dataset metadata (associated, link, project, production as range, person for those without uuid, publication for all research outputs, publisher).
8
+ ### Fixed
9
+ - Dataset metadata (description splitting, geographical stripping).
10
+
5
11
  ## 0.9.0 - 2016-05-16
6
12
  ### Added
7
13
  - Dataset metadata (production).
data/README.md CHANGED
@@ -41,6 +41,7 @@ d.get uuid: 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx',
41
41
 
42
42
  # Filter metadata into simple data structures
43
43
  d.access
44
+ d.associated
44
45
  d.available
45
46
  d.description
46
47
  d.doi
@@ -48,6 +49,7 @@ d.file
48
49
  d.geographical
49
50
  d.keyword
50
51
  d.person
52
+ d.project
51
53
  d.production
52
54
  d.publication
53
55
  d.temporal
@@ -123,8 +125,20 @@ An array of files.
123
125
  ]
124
126
  ```
125
127
 
128
+ ### link
129
+ An array of links.
130
+
131
+ ```ruby
132
+ [
133
+ {
134
+ "url": "http://www.example.com/~abc1234/xyz/",
135
+ "description": "An interesting description"
136
+ },
137
+ ]
138
+ ```
139
+
126
140
  ### person
127
- Contains an array of internal persons and an array of external persons.
141
+ Contains an array of internal persons, an array of external persons and an array of other persons.
128
142
 
129
143
  ```ruby
130
144
  {
@@ -139,20 +153,65 @@ Contains an array of internal persons and an array of external persons.
139
153
  },
140
154
  ],
141
155
  "external"=>[
156
+ ],
157
+ "other"=>[
158
+ "name"=>{
159
+ "first"=>"Hal",
160
+ "last"=>"Roach"
161
+ },
162
+ "role"=>"Contributor",
163
+ "uuid"=>""
142
164
  ]
143
165
  }
144
166
  ```
145
167
 
168
+ ### project
169
+ An array of projects associated with the dataset.
170
+
171
+ ```ruby
172
+ [
173
+ {
174
+ "title": "An interesting project title",
175
+ "uuid": "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx"
176
+ },
177
+ ]
178
+ ```
179
+
146
180
  ### publication
147
- An array of related publications.
181
+ An array of research outputs associated with the dataset.
148
182
 
149
183
  ```ruby
150
184
  [
151
185
  {
152
- "type"=>"Journal article",
153
- "title"=>"An interesting title",
154
- "uuid"=>"xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx"
186
+ "type": "Journal article",
187
+ "title": "An interesting journal article title",
188
+ "uuid": "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx"
189
+ },
190
+ {
191
+ "type": "Conference paper",
192
+ "title": "An interesting conference paper title",
193
+ "uuid": "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx"
194
+ },
195
+ {
196
+ "type": "Working paper",
197
+ "title": "An interesting working paper title",
198
+ "uuid": "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx"
199
+ },
200
+ {
201
+ "type": "Paper",
202
+ "title": "An interesting paper title",
203
+ "uuid": "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx"
155
204
  },
205
+ {
206
+ "type": "Dataset",
207
+ "title": "An interesting dataset title",
208
+ "uuid": "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx"
209
+ },
210
+ {
211
+ "type": "Chapter",
212
+ "title": "An interesting chapter title",
213
+ "uuid": "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx"
214
+ }
156
215
  ]
157
216
  ```
158
217
 
@@ -8,17 +8,81 @@ module Puree
8
8
  super(:dataset)
9
9
  end
10
10
 
11
+ # Link
12
+ #
13
+ # @return [Array<Hash>]
14
+ def link
15
+ path = '//links/link'
16
+ xpath_result = xpath_query path
17
+ data = []
18
+ xpath_result.each { |i|
19
+ o = {}
20
+ o['url'] = i.xpath('url').text
21
+ o['description'] = i.xpath('description').text
22
+ data << o
23
+ }
24
+ return data.uniq
25
+ end
26
+
27
+ # Publisher
28
+ #
29
+ # @return [String]
30
+ def publisher
31
+ path = '//publisher/name'
32
+ xpath_result = xpath_query path
33
+ xpath_result ? xpath_result.text : ''
34
+ end
35
+
36
+ # Combines project and publication
37
+ #
38
+ # @return [Hash]
39
+ def associated
40
+ path = '//associatedContent//relatedContent'
41
+ xpath_result = xpath_query path
42
+ data_arr = []
43
+ xpath_result.each { |i|
44
+ data = {}
45
+ data['type'] = i.xpath('typeClassification').text
46
+ data['title'] = i.xpath('title').text
47
+ data['uuid'] = i.attr('uuid')
48
+ data_arr << data
49
+ }
50
+ data_arr.uniq
51
+ end
52
+
53
+
54
+ # Project
55
+ #
56
+ # @return [Array<Hash>]
57
+ def project
58
+ associated_type('Research').uniq
59
+ end
60
+
61
+ # Publication
62
+ #
63
+ # @return [Array<Hash>]
64
+ def publication
65
+ data_arr = []
66
+ associated.each do |i|
67
+ if i['type'] != 'Research'
68
+ data_arr << i
69
+ end
70
+ end
71
+ data_arr.uniq
72
+ end
73
+
74
+
11
75
  # Title
12
76
  #
13
77
  # @return [Array<String>]
14
78
  def title
15
79
  data = node 'title'
80
+ data_arr = []
16
81
  if !data.nil? && !data.empty?
17
82
  data = data['localizedString']["__content__"]
18
- data.is_a?(Array) ? data : data.split(',')
19
- else
20
- []
83
+ data.is_a?(Array) ? data_arr = data : data_arr << data
21
84
  end
85
+ data_arr.uniq
22
86
  end
23
87
 
24
88
  # Keyword
@@ -26,12 +90,12 @@ module Puree
26
90
  # @return [Array<String>]
27
91
  def keyword
28
92
  data = node 'keywordGroups'
93
+ data_arr = []
29
94
  if !data.nil? && !data.empty?
30
95
  data = data['keywordGroup']['keyword']['userDefinedKeyword']['freeKeyword']
31
- data.is_a?(Array) ? data : data.split(',')
32
- else
33
- []
96
+ data.is_a?(Array) ? data_arr = data : data_arr << data
34
97
  end
98
+ data_arr.uniq
35
99
  end
36
100
 
37
101
  # Description
@@ -39,17 +103,17 @@ module Puree
39
103
  # @return [Array<String>]
40
104
  def description
41
105
  data = node 'descriptions'
106
+ data_arr = []
42
107
  if !data.nil? && !data.empty?
43
108
  data = data['classificationDefinedField']['value']['localizedString']['__content__'].tr("\n", '')
44
- data.is_a?(Array) ? data : data.split(',')
45
- else
46
- []
109
+ data.is_a?(Array) ? data_arr = data : data_arr << data
47
110
  end
111
+ data_arr.uniq
48
112
  end
49
113
 
50
- # Person, internal and external
114
+ # Person (internal, external, other)
51
115
  #
52
- # @return [Hash<Array, Array>]
116
+ # @return [Hash]
53
117
  def person
54
118
  data = node('persons')
55
119
  persons = {}
@@ -60,6 +124,7 @@ module Puree
60
124
  end
61
125
  internal_persons = []
62
126
  external_persons = []
127
+ other_persons = []
63
128
  case data
64
129
  when Array
65
130
  data.each do |d|
@@ -72,6 +137,10 @@ module Puree
72
137
  person['uuid'] = d['externalPerson']['uuid']
73
138
  external_persons << person
74
139
  end
140
+ if !d.key?('person') && !d.key?('externalPerson')
141
+ person['uuid'] = ''
142
+ other_persons << person
143
+ end
75
144
  end
76
145
  when Hash
77
146
  person = generic_person data
@@ -83,36 +152,18 @@ module Puree
83
152
  person['uuid'] = data['externalPerson']['uuid']
84
153
  external_persons << person
85
154
  end
155
+ if !data.key?('person') && !data.key?('externalPerson')
156
+ person['uuid'] = ''
157
+ other_persons << person
158
+ end
86
159
  end
87
- persons['internal'] = internal_persons
88
- persons['external'] = external_persons
160
+ persons['internal'] = internal_persons.uniq
161
+ persons['external'] = external_persons.uniq
162
+ persons['other'] = other_persons.uniq
89
163
  persons
90
164
  end
91
165
 
92
- # Publication
93
- #
94
- # @return [Array<Hash>]
95
- def publication
96
- data = node('relatedPublications')
97
- publications = []
98
- if !data.nil? && !data.empty?
99
- # convert to array
100
- data_arr = []
101
- if data['relatedContent'].is_a?(Array)
102
- data_arr = data['relatedContent']
103
- else
104
- data_arr[0] = data['relatedContent']
105
- end
106
- data_arr.each do |d|
107
- o = {}
108
- o['type'] = d['typeClassification']
109
- o['title'] = d['title']
110
- o['uuid'] = d['uuid']
111
- publications << o
112
- end
113
- end
114
- publications
115
- end
166
+
116
167
 
117
168
  # Date made available
118
169
  #
@@ -129,7 +180,7 @@ module Puree
129
180
  data = node 'geographicalCoverage'
130
181
  if !data.nil? && !data.empty?
131
182
  data = data['localizedString']["__content__"]
132
- data.is_a?(Array) ? data : data.split(',')
183
+ data.is_a?(Array) ? data.uniq : data.split(',').map(&:strip).uniq
133
184
  else
134
185
  []
135
186
  end
@@ -139,26 +190,15 @@ module Puree
139
190
  #
140
191
  # @return [Hash]
141
192
  def production
142
- data = node('dateOfDataProduction')
143
- Puree::Date.normalise(data)
193
+ temporal_range 'dateOfDataProduction', 'endDateOfDataProduction'
144
194
  end
145
195
 
196
+
146
197
  # Temporal coverage
147
198
  #
148
199
  # @return [Hash]
149
200
  def temporal
150
- data = {}
151
- data['start'] = {}
152
- data['end'] = {}
153
- start_date = temporal_coverage_start_date
154
- if !start_date.nil? && !start_date.empty?
155
- data['start'] = start_date
156
- end
157
- end_date = temporal_coverage_end_date
158
- if !end_date.nil? && !end_date.empty?
159
- data['end'] = end_date
160
- end
161
- data
201
+ temporal_range 'temporalCoverageStartDate', 'temporalCoverageEndDate'
162
202
  end
163
203
 
164
204
  # Open access permission
@@ -210,7 +250,7 @@ module Puree
210
250
 
211
251
  end
212
252
  end
213
- docs
253
+ docs.uniq
214
254
  end
215
255
 
216
256
  # Digital Object Identifier
@@ -233,15 +273,19 @@ module Puree
233
273
  def metadata
234
274
  o = {}
235
275
  o['access'] = access
276
+ o['associated'] = associated
236
277
  o['available'] = available
237
278
  o['description'] = description
238
279
  o['doi'] = doi
239
280
  o['file'] = file
240
281
  o['geographical'] = geographical
241
282
  o['keyword'] = keyword
283
+ o['link'] = link
242
284
  o['person'] = person
285
+ o['project'] = project
243
286
  o['production'] = production
244
287
  o['publication'] = publication
288
+ o['publisher'] = publisher
245
289
  o['temporal'] = temporal
246
290
  o['title'] = title
247
291
  o
@@ -251,6 +295,8 @@ module Puree
251
295
 
252
296
  private
253
297
 
298
+
299
+
254
300
  # Assembles basic information about a person
255
301
  #
256
302
  # @param generic_data [Hash]
@@ -265,20 +311,57 @@ module Puree
265
311
  person
266
312
  end
267
313
 
314
+ # Temporal range
315
+ #
316
+ # @return [Hash]
317
+ def temporal_range(start_node, end_node)
318
+ data = {}
319
+ data['start'] = {}
320
+ data['end'] = {}
321
+ start_date = temporal_start_date start_node
322
+ if !start_date.nil? && !start_date.empty?
323
+ data['start'] = start_date
324
+ end
325
+ end_date = temporal_end_date end_node
326
+ if !end_date.nil? && !end_date.empty?
327
+ data['end'] = end_date
328
+ end
329
+ data
330
+ end
331
+
268
332
  # Temporal coverage start date
269
333
  #
270
334
  # @return [Hash]
271
- def temporal_coverage_start_date
272
- data = node('temporalCoverageStartDate')
335
+ def temporal_start_date(start_node)
336
+ data = node start_node
273
337
  !data.nil? && !data.empty? ? Puree::Date.normalise(data) : {}
274
338
  end
275
339
 
276
340
  # Temporal coverage end date
277
341
  #
278
342
  # @return [Hash]
279
- def temporal_coverage_end_date
280
- data = node('temporalCoverageEndDate')
343
+ def temporal_end_date(end_node)
344
+ data = node end_node
281
345
  !data.nil? && !data.empty? ? Puree::Date.normalise(data) : {}
282
346
  end
347
+
348
+ # Associated type
349
+ #
350
+ # @return [Hash]
351
+ def associated_type(type)
352
+ associated_arr = associated
353
+ data_arr = []
354
+ associated_arr.each do |i|
355
+ data = {}
356
+ if i['type'] === type
357
+ data['title'] = i['title']
358
+ data['uuid'] = i['uuid']
359
+ data_arr << data
360
+ end
361
+ end
362
+ data_arr
363
+ end
364
+
283
365
  end
366
+
284
367
  end
@@ -16,7 +16,7 @@ module Puree
16
16
  xpath_result = xpath_query path
17
17
  data_arr = []
18
18
  xpath_result.each { |i| data_arr << i.text }
19
- data_arr
19
+ data_arr.uniq
20
20
  end
21
21
 
22
22
  # Digital Object Identifier
@@ -44,7 +44,7 @@ module Puree
44
44
  doc['url'] = d.xpath('url').text
45
45
  docs << doc
46
46
  end
47
- docs
47
+ docs.uniq
48
48
  end
49
49
 
50
50
  # Title
@@ -55,7 +55,7 @@ module Puree
55
55
  xpath_result = xpath_query path
56
56
  data_arr = []
57
57
  xpath_result.each { |i| data_arr << i.text }
58
- data_arr
58
+ data_arr.uniq
59
59
  end
60
60
 
61
61
  # Subtitle
@@ -66,7 +66,7 @@ module Puree
66
66
  xpath_result = xpath_query path
67
67
  data_arr = []
68
68
  xpath_result.each { |i| data_arr << i.text }
69
- data_arr
69
+ data_arr.uniq
70
70
  end
71
71
 
72
72
  # All metadata
@@ -1,3 +1,3 @@
1
1
  module Puree
2
- VERSION = "0.9.0"
2
+ VERSION = "0.10.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: puree
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.0
4
+ version: 0.10.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Adrian Albin-Clark
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-05-16 00:00:00.000000000 Z
11
+ date: 2016-05-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: httparty