puree 0.20.0 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (110) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +16 -2
  3. data/PITCHME.md +43 -21
  4. data/README.md +72 -18
  5. data/lib/puree.rb +66 -21
  6. data/lib/puree/api/api.rb +9 -0
  7. data/lib/puree/api/authentication.rb +33 -0
  8. data/lib/puree/api/configuration.rb +43 -0
  9. data/lib/puree/api/map.rb +76 -0
  10. data/lib/puree/api/request.rb +116 -0
  11. data/lib/puree/extractor/collection.rb +131 -0
  12. data/lib/puree/extractor/dataset.rb +48 -0
  13. data/lib/puree/extractor/download.rb +71 -0
  14. data/lib/puree/extractor/event.rb +33 -0
  15. data/lib/puree/extractor/extractor.rb +10 -0
  16. data/lib/puree/extractor/journal.rb +29 -0
  17. data/lib/puree/extractor/organisation.rb +34 -0
  18. data/lib/puree/extractor/person.rb +32 -0
  19. data/lib/puree/extractor/project.rb +40 -0
  20. data/lib/puree/extractor/publication.rb +40 -0
  21. data/lib/puree/extractor/publisher.rb +27 -0
  22. data/lib/puree/extractor/resource.rb +69 -0
  23. data/lib/puree/extractor/server.rb +56 -0
  24. data/lib/puree/model/address.rb +50 -0
  25. data/lib/puree/model/copyright_license.rb +26 -0
  26. data/lib/puree/model/dataset.rb +84 -0
  27. data/lib/puree/model/download_header.rb +21 -0
  28. data/lib/puree/model/endeavour_person.rb +34 -0
  29. data/lib/puree/model/event.rb +31 -0
  30. data/lib/puree/model/event_header.rb +26 -0
  31. data/lib/puree/model/file.rb +45 -0
  32. data/lib/puree/model/helper/validation.rb +15 -0
  33. data/lib/puree/model/journal.rb +20 -0
  34. data/lib/puree/model/legal_condition.rb +26 -0
  35. data/lib/puree/model/link.rb +26 -0
  36. data/lib/puree/model/model.rb +7 -0
  37. data/lib/puree/model/organisation.rb +34 -0
  38. data/lib/puree/model/organisation_header.rb +34 -0
  39. data/lib/puree/model/person.rb +28 -0
  40. data/lib/puree/model/person_name.rb +52 -0
  41. data/lib/puree/model/project.rb +49 -0
  42. data/lib/puree/model/publication.rb +53 -0
  43. data/lib/puree/model/publication_status.rb +21 -0
  44. data/lib/puree/model/publisher.rb +13 -0
  45. data/lib/puree/model/related_content_header.rb +34 -0
  46. data/lib/puree/model/resource.rb +42 -0
  47. data/lib/puree/model/server.rb +13 -0
  48. data/lib/puree/model/spatial_point.rb +16 -0
  49. data/lib/puree/model/structure.rb +18 -0
  50. data/lib/puree/model/temporal_range.rb +15 -0
  51. data/lib/puree/util/date.rb +86 -0
  52. data/lib/puree/util/util.rb +8 -0
  53. data/lib/puree/version.rb +1 -1
  54. data/lib/puree/xml_extractor/base.rb +47 -0
  55. data/lib/puree/xml_extractor/collection.rb +40 -0
  56. data/lib/puree/xml_extractor/dataset.rb +305 -0
  57. data/lib/puree/xml_extractor/download.rb +42 -0
  58. data/lib/puree/xml_extractor/event.rb +63 -0
  59. data/lib/puree/xml_extractor/journal.rb +33 -0
  60. data/lib/puree/xml_extractor/organisation.rb +75 -0
  61. data/lib/puree/xml_extractor/person.rb +57 -0
  62. data/lib/puree/xml_extractor/project.rb +135 -0
  63. data/lib/puree/xml_extractor/publication.rb +189 -0
  64. data/lib/puree/xml_extractor/publisher.rb +28 -0
  65. data/lib/puree/xml_extractor/resource.rb +71 -0
  66. data/lib/puree/xml_extractor/server.rb +32 -0
  67. data/lib/puree/xml_extractor/shared.rb +31 -0
  68. data/lib/puree/xml_extractor/xml_extractor.rb +10 -0
  69. data/puree.gemspec +11 -8
  70. data/spec/download_http_spec.rb +31 -0
  71. data/spec/open_api_dataset_http_spec.rb +15 -0
  72. data/spec/resource/collection_all_http_spec.rb +77 -0
  73. data/spec/resource/collection_http_spec.rb +65 -0
  74. data/spec/resource/dataset_http_spec.rb +104 -0
  75. data/spec/resource/event_http_spec.rb +52 -0
  76. data/spec/resource/journal_http_spec.rb +36 -0
  77. data/spec/resource/organisation_http_spec.rb +52 -0
  78. data/spec/resource/person_http_spec.rb +48 -0
  79. data/spec/resource/project_http_spec.rb +76 -0
  80. data/spec/resource/publication_http_spec.rb +78 -0
  81. data/spec/resource/publisher_http_spec.rb +26 -0
  82. data/spec/server_http_spec.rb +26 -0
  83. data/spec/spec_helper.rb +106 -21
  84. metadata +110 -46
  85. data/lib/puree/collection.rb +0 -285
  86. data/lib/puree/configuration.rb +0 -15
  87. data/lib/puree/dataset.rb +0 -483
  88. data/lib/puree/date.rb +0 -63
  89. data/lib/puree/download.rb +0 -189
  90. data/lib/puree/event.rb +0 -133
  91. data/lib/puree/journal.rb +0 -75
  92. data/lib/puree/map.rb +0 -68
  93. data/lib/puree/organisation.rb +0 -177
  94. data/lib/puree/person.rb +0 -136
  95. data/lib/puree/project.rb +0 -231
  96. data/lib/puree/publication.rb +0 -258
  97. data/lib/puree/publisher.rb +0 -64
  98. data/lib/puree/resource.rb +0 -261
  99. data/lib/puree/server.rb +0 -156
  100. data/spec/collection_spec.rb +0 -62
  101. data/spec/dataset_spec.rb +0 -148
  102. data/spec/download_spec.rb +0 -33
  103. data/spec/event_spec.rb +0 -108
  104. data/spec/journal_spec.rb +0 -92
  105. data/spec/organisation_spec.rb +0 -112
  106. data/spec/person_spec.rb +0 -104
  107. data/spec/project_spec.rb +0 -120
  108. data/spec/publication_spec.rb +0 -128
  109. data/spec/publisher_spec.rb +0 -89
  110. data/spec/server_spec.rb +0 -36
@@ -0,0 +1,116 @@
1
+ module Puree
2
+
3
+ module API
4
+
5
+ # Handles requests to Pure.
6
+ #
7
+ class Request
8
+
9
+ def initialize(url:)
10
+ @url = url
11
+ @api_map = Puree::API::Map.new
12
+ @headers = {}
13
+ end
14
+
15
+ # Provide credentials if necessary
16
+ #
17
+ # @param username [String]
18
+ # @param password [String]
19
+ def basic_auth(username:, password:)
20
+ auth = Base64::strict_encode64("#{username}:#{password}")
21
+ @headers['Authorization'] = 'Basic ' + auth
22
+ end
23
+
24
+ # Perform a GET request to Pure
25
+ #
26
+ # @param uuid [String]
27
+ # @param id [String]
28
+ # @param rendering [String]
29
+ # @param latest_api [Boolean]
30
+ # @param resource_type [String]
31
+ # @param limit [Fixnum]
32
+ # @param offset [Fixnum]
33
+ # @param created_start [String]
34
+ # @param created_end [String]
35
+ # @param modified_start [String]
36
+ # @param modified_end [String]
37
+ # @param content_type [String]
38
+ # @return [HTTP::Response]
39
+ def get(uuid: nil,
40
+ id: nil,
41
+ rendering: :xml_long,
42
+ latest_api: true,
43
+ resource_type:,
44
+ limit: 20,
45
+ offset: 0,
46
+ created_start: nil,
47
+ created_end: nil,
48
+ modified_start: nil,
49
+ modified_end: nil,
50
+ content_type: nil)
51
+ @latest_api = latest_api
52
+ @resource_type = resource_type.to_sym
53
+ @rendering = rendering
54
+ @uuid = uuid
55
+ @id = id
56
+ @limit = limit
57
+ @offset = offset
58
+ @created_start = created_start
59
+ @created_end = created_end
60
+ @modified_start = modified_start
61
+ @modified_end = modified_end
62
+ @content_type = content_type
63
+
64
+ # strip any trailing slash
65
+ @url = @url.sub(/(\/)+$/, '')
66
+ @headers['Accept'] = 'application/xml'
67
+ @req = HTTP.headers accept: @headers['Accept']
68
+ if @headers['Authorization']
69
+ @req = @req.auth @headers['Authorization']
70
+ end
71
+ @req.get(build_url, params: params)
72
+ end
73
+
74
+ private
75
+
76
+ def params
77
+ query = {}
78
+ if @uuid
79
+ query['uuids.uuid'] = @uuid
80
+ else
81
+ if @id
82
+ query['pureInternalIds.id'] = @id
83
+ end
84
+ end
85
+ query['rendering'] = @rendering
86
+ if @resource_type != :server
87
+ query['window.size'] = @limit
88
+ query['window.offset'] = @offset if @limit > 0
89
+ end
90
+
91
+ # Pure does allow blank value
92
+ query['contentType'] = @content_type if @content_type
93
+
94
+ # Pure does not allow blanks for these
95
+ query['createdDate.toDate'] = @created_start if @created_start
96
+ query['createdDate.fromDate'] = @created_end if @created_end
97
+ query['modifiedDate.toDate'] = @modified_start if @modified_start
98
+ query['modifiedDate.fromDate'] = @modified_end if @modified_end
99
+ query
100
+ end
101
+
102
+ def build_url
103
+ service = @api_map.service_name(@resource_type)
104
+ if @latest_api === false
105
+ service_api_mode = service
106
+ else
107
+ service_api_mode = service + '.current'
108
+ end
109
+ @url + '/' + service_api_mode
110
+ end
111
+
112
+ end
113
+
114
+ end
115
+
116
+ end
@@ -0,0 +1,131 @@
1
+ module Puree
2
+
3
+ module Extractor
4
+
5
+ # A collection extractor can retrieve any number of resources of the same type.
6
+ #
7
+ class Collection
8
+ include Puree::API::Authentication
9
+
10
+ # @option (see Puree::API::Authentication#configure_api)
11
+ # @param resource [Symbol]
12
+ def initialize(config:, resource:)
13
+ @resource_type = resource
14
+ @api_map = Puree::API::Map.new.get
15
+ configure_api config
16
+ end
17
+
18
+ # Gets an array of objects of resource type specified in constructor.
19
+ #
20
+ # @param limit [Fixnum]
21
+ # @param offset [Fixnum]
22
+ # @param created_start [String]
23
+ # @param created_end [String]
24
+ # @param modified_start [String]
25
+ # @param modified_end [String]
26
+ # @return [Array<Puree::Model::Resource subclass>] Resource metadata e.g. Puree::Model::Dataset.
27
+ def get(
28
+ limit: 0,
29
+ offset: 0,
30
+ created_start: nil,
31
+ created_end: nil,
32
+ modified_start: nil,
33
+ modified_end: nil
34
+ )
35
+ @response = @request.get rendering: :system,
36
+ limit: limit,
37
+ offset: offset,
38
+ resource_type: @resource_type,
39
+ created_start: created_start,
40
+ created_end: created_end,
41
+ modified_start: modified_start,
42
+ modified_end: modified_end
43
+ set_content @response.body
44
+ end
45
+
46
+ # Gets a random resource of type specified in constructor.
47
+ #
48
+ # @return [Puree::Model::Resource subclass, nil] Resource metadata e.g. Puree::Model::Dataset.
49
+ def random_resource
50
+ @response = @request.get rendering: :system,
51
+ limit: 1,
52
+ offset: rand(0..count-1),
53
+ resource_type: @resource_type
54
+ content = set_content @response.body
55
+ content[0] if content
56
+ end
57
+
58
+
59
+ # Count of records available for a resource type.
60
+ #
61
+ # @return [Fixnum]
62
+ def count
63
+ get_count
64
+ end
65
+
66
+ private
67
+
68
+ # Array of UUIDs (from system response).
69
+ #
70
+ # @return [Array<String>]
71
+ def uuids
72
+ @extractor.uuids
73
+ end
74
+
75
+ def combine_metadata
76
+ collect_resources
77
+ end
78
+
79
+ def get_count
80
+ @response = @request.get resource_type: @resource_type,
81
+ rendering: :system,
82
+ limit: 0
83
+ make_xml_extractor
84
+ @extractor.count
85
+ end
86
+
87
+ def collect_resources
88
+ data = []
89
+ resource_class = "Puree::Extractor::#{@resource_type.to_s.capitalize}"
90
+
91
+ # whitelist symbol
92
+ if @api_map[:resource_type].has_key?(@resource_type)
93
+ config = {
94
+ url: @config.url,
95
+ username: @config.username,
96
+ password: @config.password
97
+ }
98
+
99
+ uuids.each do |u|
100
+ r = Object.const_get(resource_class).new config
101
+ record = r.find uuid: u
102
+ data << record
103
+ end
104
+ data
105
+ else
106
+ raise 'Invalid resource class'
107
+ end
108
+ end
109
+
110
+ # Set content from XML. In order for metadata extraction to work, the XML must have
111
+ # been retrieved using the .current version of the Pure API endpoints
112
+ #
113
+ # @param xml [String]
114
+ def set_content(xml)
115
+ if xml
116
+ make_xml_extractor
117
+ @extractor.get_data? ? combine_metadata : nil
118
+ end
119
+ end
120
+
121
+ def make_xml_extractor
122
+ @extractor = Puree::XMLExtractor::Collection.new xml: @response.body
123
+ end
124
+
125
+ alias :find :get
126
+
127
+ end
128
+
129
+ end
130
+
131
+ end
@@ -0,0 +1,48 @@
1
+ module Puree
2
+
3
+ module Extractor
4
+
5
+ # Dataset extractor.
6
+ #
7
+ class Dataset < Puree::Extractor::Resource
8
+
9
+ # @option (see Puree::Extractor::Resource#initialize)
10
+ def initialize(config)
11
+ super
12
+ setup :dataset
13
+ end
14
+
15
+ private
16
+
17
+ def combine_metadata
18
+ super
19
+ @model.access = @extractor.access
20
+ @model.associated = @extractor.associated
21
+ @model.available = @extractor.available
22
+ @model.description = @extractor.description
23
+ @model.doi = @extractor.doi
24
+ @model.files = @extractor.files
25
+ @model.keywords = @extractor.keywords
26
+ @model.links = @extractor.links
27
+ @model.legal_conditions = @extractor.legal_conditions
28
+ @model.organisations = @extractor.organisations
29
+ @model.owner = @extractor.owner
30
+ @model.persons_internal = @extractor.persons_internal
31
+ @model.persons_external = @extractor.persons_external
32
+ @model.persons_other = @extractor.persons_other
33
+ @model.projects = @extractor.projects
34
+ @model.production = @extractor.production
35
+ @model.publications = @extractor.publications
36
+ @model.publisher = @extractor.publisher
37
+ @model.spatial_places = @extractor.spatial_places
38
+ @model.spatial_point = @extractor.spatial_point
39
+ @model.temporal = @extractor.temporal
40
+ @model.title = @extractor.title
41
+ @model
42
+ end
43
+
44
+ end
45
+
46
+ end
47
+
48
+ end
@@ -0,0 +1,71 @@
1
+ module Puree
2
+
3
+ module Extractor
4
+
5
+ # Download extractor.
6
+ #
7
+ class Download
8
+ include Puree::API::Authentication
9
+
10
+ # @option (see Puree::API::Authentication#configure_api)
11
+ def initialize(config)
12
+ @resource_type = :download
13
+ @api_map = Puree::API::Map.new.get # Workararound to provide access to service_family
14
+ configure_api config
15
+ end
16
+
17
+ # Get download statistics. Only for Datasets.
18
+ #
19
+ # @param limit [Fixnum]
20
+ # @param offset [Fixnum]
21
+ # @param resource [Symbol] The resource being reported
22
+ # @return [Array<Puree::Model::DownloadHeader>]
23
+ def get(limit: 0,
24
+ offset: 0,
25
+ resource:)
26
+ raise 'Cannot perform a request without a configuration' if @config.nil?
27
+ @response = @request.get rendering: :system,
28
+ limit: limit,
29
+ offset: offset,
30
+ resource_type: @resource_type,
31
+ content_type: service_family(resource)
32
+ set_content @response.body
33
+ end
34
+
35
+ private
36
+
37
+ def combine_metadata
38
+ @extractor.statistics
39
+ end
40
+
41
+ # Set content from XML. In order for metadata extraction to work, the XML must have
42
+ # been retrieved using the .current version of the Pure API endpoints
43
+ #
44
+ # @param xml [String]
45
+ def set_content(xml)
46
+ if xml
47
+ make_extractor
48
+ @extractor.get_data? ? combine_metadata : []
49
+ end
50
+ end
51
+
52
+ def make_extractor
53
+ @extractor = Puree::XMLExtractor::Download.new xml: @response.body
54
+ end
55
+
56
+ def service_family(resource_type)
57
+ if @api_map[:resource_type].has_key? resource_type
58
+ # Family data is only populated for datasets (required for download)
59
+ @api_map[:resource_type][resource_type][:family]
60
+ else
61
+ raise "#{resource_type} is an unrecognised resource type"
62
+ end
63
+ end
64
+
65
+ alias :find :get
66
+
67
+ end
68
+
69
+ end
70
+
71
+ end
@@ -0,0 +1,33 @@
1
+ module Puree
2
+
3
+ module Extractor
4
+
5
+ # Event extractor.
6
+ #
7
+ class Event < Puree::Extractor::Resource
8
+
9
+ # @option (see Puree::Extractor::Resource#initialize)
10
+ def initialize(config)
11
+ super
12
+ setup :event
13
+ end
14
+
15
+ private
16
+
17
+ def combine_metadata
18
+ super
19
+ @model.city = @extractor.city
20
+ @model.country = @extractor.country
21
+ @model.date = @extractor.date
22
+ @model.description = @extractor.description
23
+ @model.location = @extractor.location
24
+ @model.title = @extractor.title
25
+ @model.type = @extractor.type
26
+ @model
27
+ end
28
+
29
+ end
30
+
31
+ end
32
+
33
+ end
@@ -0,0 +1,10 @@
1
+ module Puree
2
+
3
+ # An Extractor manages HTTP requests to Pure and the subsequent
4
+ # extraction of metadata from an XML response into a Ruby data model.
5
+ #
6
+ module Extractor
7
+
8
+ end
9
+
10
+ end
@@ -0,0 +1,29 @@
1
+ module Puree
2
+
3
+ module Extractor
4
+
5
+ # Journal extractor.
6
+ #
7
+ class Journal < Puree::Extractor::Resource
8
+
9
+ # @option (see Puree::Extractor::Resource#initialize)
10
+ def initialize(config)
11
+ super
12
+ setup :journal
13
+ end
14
+
15
+ private
16
+
17
+ def combine_metadata
18
+ super
19
+ @model.issn = @extractor.issn
20
+ @model.publisher = @extractor.publisher
21
+ @model.title = @extractor.title
22
+ @model
23
+ end
24
+
25
+ end
26
+
27
+ end
28
+
29
+ end