puree 0.20.0 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (110) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +16 -2
  3. data/PITCHME.md +43 -21
  4. data/README.md +72 -18
  5. data/lib/puree.rb +66 -21
  6. data/lib/puree/api/api.rb +9 -0
  7. data/lib/puree/api/authentication.rb +33 -0
  8. data/lib/puree/api/configuration.rb +43 -0
  9. data/lib/puree/api/map.rb +76 -0
  10. data/lib/puree/api/request.rb +116 -0
  11. data/lib/puree/extractor/collection.rb +131 -0
  12. data/lib/puree/extractor/dataset.rb +48 -0
  13. data/lib/puree/extractor/download.rb +71 -0
  14. data/lib/puree/extractor/event.rb +33 -0
  15. data/lib/puree/extractor/extractor.rb +10 -0
  16. data/lib/puree/extractor/journal.rb +29 -0
  17. data/lib/puree/extractor/organisation.rb +34 -0
  18. data/lib/puree/extractor/person.rb +32 -0
  19. data/lib/puree/extractor/project.rb +40 -0
  20. data/lib/puree/extractor/publication.rb +40 -0
  21. data/lib/puree/extractor/publisher.rb +27 -0
  22. data/lib/puree/extractor/resource.rb +69 -0
  23. data/lib/puree/extractor/server.rb +56 -0
  24. data/lib/puree/model/address.rb +50 -0
  25. data/lib/puree/model/copyright_license.rb +26 -0
  26. data/lib/puree/model/dataset.rb +84 -0
  27. data/lib/puree/model/download_header.rb +21 -0
  28. data/lib/puree/model/endeavour_person.rb +34 -0
  29. data/lib/puree/model/event.rb +31 -0
  30. data/lib/puree/model/event_header.rb +26 -0
  31. data/lib/puree/model/file.rb +45 -0
  32. data/lib/puree/model/helper/validation.rb +15 -0
  33. data/lib/puree/model/journal.rb +20 -0
  34. data/lib/puree/model/legal_condition.rb +26 -0
  35. data/lib/puree/model/link.rb +26 -0
  36. data/lib/puree/model/model.rb +7 -0
  37. data/lib/puree/model/organisation.rb +34 -0
  38. data/lib/puree/model/organisation_header.rb +34 -0
  39. data/lib/puree/model/person.rb +28 -0
  40. data/lib/puree/model/person_name.rb +52 -0
  41. data/lib/puree/model/project.rb +49 -0
  42. data/lib/puree/model/publication.rb +53 -0
  43. data/lib/puree/model/publication_status.rb +21 -0
  44. data/lib/puree/model/publisher.rb +13 -0
  45. data/lib/puree/model/related_content_header.rb +34 -0
  46. data/lib/puree/model/resource.rb +42 -0
  47. data/lib/puree/model/server.rb +13 -0
  48. data/lib/puree/model/spatial_point.rb +16 -0
  49. data/lib/puree/model/structure.rb +18 -0
  50. data/lib/puree/model/temporal_range.rb +15 -0
  51. data/lib/puree/util/date.rb +86 -0
  52. data/lib/puree/util/util.rb +8 -0
  53. data/lib/puree/version.rb +1 -1
  54. data/lib/puree/xml_extractor/base.rb +47 -0
  55. data/lib/puree/xml_extractor/collection.rb +40 -0
  56. data/lib/puree/xml_extractor/dataset.rb +305 -0
  57. data/lib/puree/xml_extractor/download.rb +42 -0
  58. data/lib/puree/xml_extractor/event.rb +63 -0
  59. data/lib/puree/xml_extractor/journal.rb +33 -0
  60. data/lib/puree/xml_extractor/organisation.rb +75 -0
  61. data/lib/puree/xml_extractor/person.rb +57 -0
  62. data/lib/puree/xml_extractor/project.rb +135 -0
  63. data/lib/puree/xml_extractor/publication.rb +189 -0
  64. data/lib/puree/xml_extractor/publisher.rb +28 -0
  65. data/lib/puree/xml_extractor/resource.rb +71 -0
  66. data/lib/puree/xml_extractor/server.rb +32 -0
  67. data/lib/puree/xml_extractor/shared.rb +31 -0
  68. data/lib/puree/xml_extractor/xml_extractor.rb +10 -0
  69. data/puree.gemspec +11 -8
  70. data/spec/download_http_spec.rb +31 -0
  71. data/spec/open_api_dataset_http_spec.rb +15 -0
  72. data/spec/resource/collection_all_http_spec.rb +77 -0
  73. data/spec/resource/collection_http_spec.rb +65 -0
  74. data/spec/resource/dataset_http_spec.rb +104 -0
  75. data/spec/resource/event_http_spec.rb +52 -0
  76. data/spec/resource/journal_http_spec.rb +36 -0
  77. data/spec/resource/organisation_http_spec.rb +52 -0
  78. data/spec/resource/person_http_spec.rb +48 -0
  79. data/spec/resource/project_http_spec.rb +76 -0
  80. data/spec/resource/publication_http_spec.rb +78 -0
  81. data/spec/resource/publisher_http_spec.rb +26 -0
  82. data/spec/server_http_spec.rb +26 -0
  83. data/spec/spec_helper.rb +106 -21
  84. metadata +110 -46
  85. data/lib/puree/collection.rb +0 -285
  86. data/lib/puree/configuration.rb +0 -15
  87. data/lib/puree/dataset.rb +0 -483
  88. data/lib/puree/date.rb +0 -63
  89. data/lib/puree/download.rb +0 -189
  90. data/lib/puree/event.rb +0 -133
  91. data/lib/puree/journal.rb +0 -75
  92. data/lib/puree/map.rb +0 -68
  93. data/lib/puree/organisation.rb +0 -177
  94. data/lib/puree/person.rb +0 -136
  95. data/lib/puree/project.rb +0 -231
  96. data/lib/puree/publication.rb +0 -258
  97. data/lib/puree/publisher.rb +0 -64
  98. data/lib/puree/resource.rb +0 -261
  99. data/lib/puree/server.rb +0 -156
  100. data/spec/collection_spec.rb +0 -62
  101. data/spec/dataset_spec.rb +0 -148
  102. data/spec/download_spec.rb +0 -33
  103. data/spec/event_spec.rb +0 -108
  104. data/spec/journal_spec.rb +0 -92
  105. data/spec/organisation_spec.rb +0 -112
  106. data/spec/person_spec.rb +0 -104
  107. data/spec/project_spec.rb +0 -120
  108. data/spec/publication_spec.rb +0 -128
  109. data/spec/publisher_spec.rb +0 -89
  110. data/spec/server_spec.rb +0 -36
@@ -1,15 +0,0 @@
1
- module Puree
2
-
3
- # Configuration options
4
- #
5
- module Configuration
6
-
7
- attr_accessor :base_url, :username, :password, :basic_auth
8
-
9
- def configure
10
- yield self
11
- end
12
-
13
- end
14
-
15
- end
data/lib/puree/dataset.rb DELETED
@@ -1,483 +0,0 @@
1
- module Puree
2
-
3
- # Dataset resource
4
- #
5
- class Dataset < Resource
6
-
7
- # @param base_url [String]
8
- # @param username [String]
9
- # @param password [String]
10
- # @param basic_auth [Boolean]
11
- def initialize(base_url: nil, username: nil, password: nil, basic_auth: nil)
12
- super(api: :dataset,
13
- base_url: base_url,
14
- username: username,
15
- password: password,
16
- basic_auth: basic_auth)
17
- end
18
-
19
- # Open access permission
20
- #
21
- # @return [String]
22
- def access
23
- @metadata['access']
24
- end
25
-
26
- # Combines project and publication
27
- #
28
- # @return [Array<Hash>]
29
- def associated
30
- @metadata['associated']
31
- end
32
-
33
- # Date made available
34
- #
35
- # @return [Hash]
36
- def available
37
- @metadata['available']
38
- end
39
-
40
- # Description
41
- #
42
- # @return [String]
43
- def description
44
- @metadata['description']
45
- end
46
-
47
- # Digital Object Identifier
48
- #
49
- # @return [String]
50
- def doi
51
- @metadata['doi']
52
- end
53
-
54
- # Supporting file
55
- #
56
- # @return [Array<Hash>]
57
- def file
58
- @metadata['file']
59
- end
60
-
61
- # Keyword
62
- #
63
- # @return [Array<String>]
64
- def keyword
65
- @metadata['keyword']
66
- end
67
-
68
- # Link
69
- #
70
- # @return [Array<Hash>]
71
- def link
72
- @metadata['link']
73
- end
74
-
75
- # Organisation
76
- #
77
- # @return [Array<Hash>]
78
- def organisation
79
- @metadata['organisation']
80
- end
81
-
82
- # Owner
83
- #
84
- # @return [Hash]
85
- def owner
86
- @metadata['owner']
87
- end
88
-
89
- # Person (internal, external, other)
90
- #
91
- # @return [Array<Hash>]
92
- def person
93
- @metadata['person']
94
- end
95
-
96
- # Date of data production
97
- #
98
- # @return [Hash]
99
- def production
100
- @metadata['production']
101
- end
102
-
103
- # Project
104
- #
105
- # @return [Array<Hash>]
106
- def project
107
- @metadata['project']
108
- end
109
-
110
- # Publication
111
- #
112
- # @return [Array<Hash>]
113
- def publication
114
- @metadata['publication']
115
- end
116
-
117
- # Publisher
118
- #
119
- # @return [String]
120
- def publisher
121
- @metadata['publisher']
122
- end
123
-
124
- # Spatial coverage (place names)
125
- #
126
- # @return [Array<String>]
127
- def spatial
128
- @metadata['spatial']
129
- end
130
-
131
- # Spatial coverage point
132
- #
133
- # @return [Hash]
134
- def spatial_point
135
- @metadata['spatial_point']
136
- end
137
-
138
- # Temporal coverage
139
- #
140
- # @return [Hash]
141
- def temporal
142
- @metadata['temporal']
143
- end
144
-
145
- # Title
146
- #
147
- # @return [String]
148
- def title
149
- @metadata['title']
150
- end
151
-
152
- # All metadata
153
- #
154
- # @return [Hash]
155
- def metadata
156
- @metadata
157
- end
158
-
159
-
160
- private
161
-
162
- def extract_access
163
- path = '/openAccessPermission/term/localizedString'
164
- xpath_query_for_single_value path
165
- end
166
-
167
- def extract_associated
168
- path = '/associatedContent//relatedContent'
169
- xpath_result = xpath_query path
170
- data_arr = []
171
- xpath_result.each { |i|
172
- data = {}
173
- data['type'] = i.xpath('typeClassification').text.strip
174
- data['title'] = i.xpath('title').text.strip
175
- data['uuid'] = i.attr('uuid').strip
176
- data_arr << data
177
- }
178
- data_arr.uniq
179
- end
180
-
181
- def extract_available
182
- temporal_start_date 'dateMadeAvailable'
183
- end
184
-
185
- def extract_description
186
- path = '/descriptions/classificationDefinedField/value/localizedString'
187
- xpath_query_for_single_value path
188
- end
189
-
190
- def extract_doi
191
- path = '/doi'
192
- xpath_query_for_single_value path
193
- end
194
-
195
- def extract_file
196
- path = '/documents/document'
197
- xpath_result = xpath_query path
198
-
199
- docs = []
200
-
201
- xpath_result.each do |d|
202
- doc = {}
203
- # doc['id'] = f.xpath('id').text.strip
204
- doc['name'] = d.xpath('fileName').text.strip
205
- doc['mime'] = d.xpath('mimeType').text.strip
206
- doc['size'] = d.xpath('size').text.strip
207
- doc['url'] = d.xpath('url').text.strip
208
- doc['title'] = d.xpath('title').text.strip
209
- # doc['createdDate'] = d.xpath('createdDate').text.strip
210
- # doc['visibleOnPortalDate'] = d.xpath('visibleOnPortalDate').text.strip
211
- # doc['limitedVisibility'] = d.xpath('limitedVisibility').text.strip
212
-
213
- license = {}
214
- license_name = d.xpath('documentLicense/term/localizedString').text.strip
215
- license['name'] = license_name
216
- license_url = d.xpath('documentLicense/description/localizedString').text.strip
217
- license['url'] = license_url
218
- doc['license'] = license
219
- docs << doc
220
-
221
- end
222
- docs.uniq
223
- end
224
-
225
- def extract_keyword
226
- path = '/keywordGroups/keywordGroup/keyword/userDefinedKeyword/freeKeyword'
227
- xpath_result = xpath_query path
228
- data_arr = xpath_result.map { |i| i.text.strip }
229
- data_arr.uniq
230
- end
231
-
232
- def extract_link
233
- path = '/links/link'
234
- xpath_result = xpath_query path
235
- data = []
236
- xpath_result.each { |i|
237
- o = {}
238
- o['url'] = i.xpath('url').text.strip
239
- o['description'] = i.xpath('description').text.strip
240
- data << o
241
- }
242
- data.uniq
243
- end
244
-
245
- def extract_organisation
246
- path = '/organisations/organisation'
247
- xpath_result = xpath_query path
248
- data = []
249
- xpath_result.each do |i|
250
- o = {}
251
- o['uuid'] = i.xpath('@uuid').text.strip
252
- o['name'] = i.xpath('name/localizedString').text.strip
253
- o['type'] = i.xpath('typeClassification/term/localizedString').text.strip
254
- data << o
255
- end
256
- data
257
- end
258
-
259
- def extract_owner
260
- path = '/managedBy'
261
- xpath_result = xpath_query path
262
- o = {}
263
- o['uuid'] = xpath_result.xpath('@uuid').text.strip
264
- o['name'] = xpath_result.xpath('name/localizedString').text.strip
265
- o['type'] = xpath_result.xpath('typeClassification/term/localizedString').text.strip
266
- o
267
- end
268
-
269
- def extract_person
270
- data = {}
271
- path = '/persons/dataSetPersonAssociation'
272
- xpath_result = xpath_query path
273
- internal = []
274
- external = []
275
- other = []
276
-
277
- xpath_result.each do |i|
278
- o = {}
279
- name = {}
280
- name['first'] = i.xpath('name/firstName').text.strip
281
- name['last'] = i.xpath('name/lastName').text.strip
282
- o['name'] = name
283
-
284
- roles = {
285
- '/dk/atira/pure/dataset/roles/dataset/contributor' => 'Contributor',
286
- '/dk/atira/pure/dataset/roles/dataset/creator' => 'Creator',
287
- '/dk/atira/pure/dataset/roles/dataset/datacollector' => 'Data Collector',
288
- '/dk/atira/pure/dataset/roles/dataset/datamanager' => 'Data Manager',
289
- '/dk/atira/pure/dataset/roles/dataset/distributor' => 'Distributor',
290
- '/dk/atira/pure/dataset/roles/dataset/editor' => 'Editor',
291
- '/dk/atira/pure/dataset/roles/dataset/funder' => 'Funder',
292
- '/dk/atira/pure/dataset/roles/dataset/owner' => 'Owner',
293
- '/dk/atira/pure/dataset/roles/dataset/other' => 'Other',
294
- '/dk/atira/pure/dataset/roles/dataset/producer' => 'Producer',
295
- '/dk/atira/pure/dataset/roles/dataset/rightsholder' => 'Rights Holder',
296
- '/dk/atira/pure/dataset/roles/dataset/sponsor' => 'Sponsor',
297
- '/dk/atira/pure/dataset/roles/dataset/supervisor' => 'Supervisor'
298
- }
299
- role_uri = i.xpath('personRole/uri').text.strip
300
- o['role'] = roles[role_uri].to_s
301
-
302
- uuid_internal = i.at_xpath('person/@uuid')
303
- uuid_external = i.at_xpath('externalPerson/@uuid')
304
- if uuid_internal
305
- o['uuid'] = uuid_internal.text.strip
306
- internal << o
307
- elsif uuid_external
308
- o['uuid'] = uuid_external.text.strip
309
- external << o
310
- else
311
- other << o
312
- o['uuid'] = ''
313
- end
314
- end
315
- data['internal'] = internal
316
- data['external'] = external
317
- data['other'] = other
318
- data
319
- end
320
-
321
- def extract_production
322
- temporal_range 'dateOfDataProduction', 'endDateOfDataProduction'
323
- end
324
-
325
- def extract_project
326
- associated_type('Research').uniq
327
- end
328
-
329
- def extract_publication
330
- data_arr = []
331
- extract_associated.each do |i|
332
- if i['type'] != 'Research'
333
- data_arr << i
334
- end
335
- end
336
- data_arr.uniq
337
- end
338
-
339
- def extract_publisher
340
- path = '/publisher/name'
341
- xpath_query_for_single_value path
342
- end
343
-
344
- def extract_spatial
345
- # Data from free-form text box
346
- path = '/geographicalCoverage/localizedString'
347
- xpath_result = xpath_query path
348
- data = []
349
- xpath_result.each do |i|
350
- data << i.text.strip
351
- end
352
- data
353
- end
354
-
355
- def extract_spatial_point
356
- path = '/geoLocation/point'
357
- xpath_result = xpath_query path
358
- o = {}
359
- if !xpath_result[0].nil?
360
- arr = xpath_result.text.split(',')
361
- o['latitude'] = arr[0].strip.to_f
362
- o['longitude'] = arr[1].strip.to_f
363
- end
364
- o
365
- end
366
-
367
- def extract_temporal
368
- temporal_range 'temporalCoverageStartDate', 'temporalCoverageEndDate'
369
- end
370
-
371
- def extract_title
372
- path = '/title/localizedString'
373
- xpath_query_for_single_value path
374
- end
375
-
376
- # def state
377
- # # useful?
378
- # data = node 'startedWorkflows'
379
- # !data.empty? ? data['startedWorkflow']['state'] : ''
380
- # end
381
-
382
- def combine_metadata
383
- o = super
384
- o['access'] = extract_access
385
- o['associated'] = extract_associated
386
- o['available'] = extract_available
387
- o['description'] = extract_description
388
- o['doi'] = extract_doi
389
- o['file'] = extract_file
390
- o['keyword'] = extract_keyword
391
- o['link'] = extract_link
392
- o['organisation'] = extract_organisation
393
- o['owner'] = extract_owner
394
- o['person'] = extract_person
395
- o['project'] = extract_project
396
- o['production'] = extract_production
397
- o['publication'] = extract_publication
398
- o['publisher'] = extract_publisher
399
- o['spatial'] = extract_spatial
400
- o['spatial_point'] = extract_spatial_point
401
- o['temporal'] = extract_temporal
402
- o['title'] = extract_title
403
- @metadata = o
404
- end
405
-
406
- # Assembles basic information about a person
407
- #
408
- # @param generic_data [Hash]
409
- # @return [Hash]
410
- def generic_person(generic_data)
411
- person = {}
412
- name = {}
413
- name['first'] = generic_data['name']['firstName']
414
- name['last'] = generic_data['name']['lastName']
415
- person['name'] = name
416
- person['role'] = generic_data['personRole']['term']['localizedString']["__content__"]
417
- person
418
- end
419
-
420
- # Temporal range
421
- #
422
- # @return [Hash]
423
- def temporal_range(start_node, end_node)
424
- data = {}
425
- data['start'] = {}
426
- data['end'] = {}
427
- start_date = temporal_start_date start_node
428
- if !start_date.nil? && !start_date.empty?
429
- data['start'] = start_date
430
- end
431
- end_date = temporal_end_date end_node
432
- if !end_date.nil? && !end_date.empty?
433
- data['end'] = end_date
434
- end
435
- data
436
- end
437
-
438
- # Temporal coverage start date
439
- #
440
- # @return [Hash]
441
- def temporal_start_date(start_node)
442
- path = "/#{start_node}"
443
- xpath_result = xpath_query path
444
- o = {}
445
- o['day'] = xpath_result.xpath('day').text.strip
446
- o['month'] = xpath_result.xpath('month').text.strip
447
- o['year'] = xpath_result.xpath('year').text.strip
448
- Puree::Date.normalise(o)
449
- end
450
-
451
- # Temporal coverage end date
452
- #
453
- # @return [Hash]
454
- def temporal_end_date(end_node)
455
- path = "/#{end_node}"
456
- xpath_result = xpath_query path
457
- o = {}
458
- o['day'] = xpath_result.xpath('day').text.strip
459
- o['month'] = xpath_result.xpath('month').text.strip
460
- o['year'] = xpath_result.xpath('year').text.strip
461
- Puree::Date.normalise(o)
462
- end
463
-
464
- # Associated type
465
- #
466
- # @return [Hash]
467
- def associated_type(type)
468
- associated_arr = extract_associated
469
- data_arr = []
470
- associated_arr.each do |i|
471
- data = {}
472
- if i['type'] === type
473
- data['title'] = i['title']
474
- data['uuid'] = i['uuid']
475
- data_arr << data
476
- end
477
- end
478
- data_arr
479
- end
480
-
481
- end
482
-
483
- end