puree 0.20.0 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -2
- data/PITCHME.md +43 -21
- data/README.md +72 -18
- data/lib/puree.rb +66 -21
- data/lib/puree/api/api.rb +9 -0
- data/lib/puree/api/authentication.rb +33 -0
- data/lib/puree/api/configuration.rb +43 -0
- data/lib/puree/api/map.rb +76 -0
- data/lib/puree/api/request.rb +116 -0
- data/lib/puree/extractor/collection.rb +131 -0
- data/lib/puree/extractor/dataset.rb +48 -0
- data/lib/puree/extractor/download.rb +71 -0
- data/lib/puree/extractor/event.rb +33 -0
- data/lib/puree/extractor/extractor.rb +10 -0
- data/lib/puree/extractor/journal.rb +29 -0
- data/lib/puree/extractor/organisation.rb +34 -0
- data/lib/puree/extractor/person.rb +32 -0
- data/lib/puree/extractor/project.rb +40 -0
- data/lib/puree/extractor/publication.rb +40 -0
- data/lib/puree/extractor/publisher.rb +27 -0
- data/lib/puree/extractor/resource.rb +69 -0
- data/lib/puree/extractor/server.rb +56 -0
- data/lib/puree/model/address.rb +50 -0
- data/lib/puree/model/copyright_license.rb +26 -0
- data/lib/puree/model/dataset.rb +84 -0
- data/lib/puree/model/download_header.rb +21 -0
- data/lib/puree/model/endeavour_person.rb +34 -0
- data/lib/puree/model/event.rb +31 -0
- data/lib/puree/model/event_header.rb +26 -0
- data/lib/puree/model/file.rb +45 -0
- data/lib/puree/model/helper/validation.rb +15 -0
- data/lib/puree/model/journal.rb +20 -0
- data/lib/puree/model/legal_condition.rb +26 -0
- data/lib/puree/model/link.rb +26 -0
- data/lib/puree/model/model.rb +7 -0
- data/lib/puree/model/organisation.rb +34 -0
- data/lib/puree/model/organisation_header.rb +34 -0
- data/lib/puree/model/person.rb +28 -0
- data/lib/puree/model/person_name.rb +52 -0
- data/lib/puree/model/project.rb +49 -0
- data/lib/puree/model/publication.rb +53 -0
- data/lib/puree/model/publication_status.rb +21 -0
- data/lib/puree/model/publisher.rb +13 -0
- data/lib/puree/model/related_content_header.rb +34 -0
- data/lib/puree/model/resource.rb +42 -0
- data/lib/puree/model/server.rb +13 -0
- data/lib/puree/model/spatial_point.rb +16 -0
- data/lib/puree/model/structure.rb +18 -0
- data/lib/puree/model/temporal_range.rb +15 -0
- data/lib/puree/util/date.rb +86 -0
- data/lib/puree/util/util.rb +8 -0
- data/lib/puree/version.rb +1 -1
- data/lib/puree/xml_extractor/base.rb +47 -0
- data/lib/puree/xml_extractor/collection.rb +40 -0
- data/lib/puree/xml_extractor/dataset.rb +305 -0
- data/lib/puree/xml_extractor/download.rb +42 -0
- data/lib/puree/xml_extractor/event.rb +63 -0
- data/lib/puree/xml_extractor/journal.rb +33 -0
- data/lib/puree/xml_extractor/organisation.rb +75 -0
- data/lib/puree/xml_extractor/person.rb +57 -0
- data/lib/puree/xml_extractor/project.rb +135 -0
- data/lib/puree/xml_extractor/publication.rb +189 -0
- data/lib/puree/xml_extractor/publisher.rb +28 -0
- data/lib/puree/xml_extractor/resource.rb +71 -0
- data/lib/puree/xml_extractor/server.rb +32 -0
- data/lib/puree/xml_extractor/shared.rb +31 -0
- data/lib/puree/xml_extractor/xml_extractor.rb +10 -0
- data/puree.gemspec +11 -8
- data/spec/download_http_spec.rb +31 -0
- data/spec/open_api_dataset_http_spec.rb +15 -0
- data/spec/resource/collection_all_http_spec.rb +77 -0
- data/spec/resource/collection_http_spec.rb +65 -0
- data/spec/resource/dataset_http_spec.rb +104 -0
- data/spec/resource/event_http_spec.rb +52 -0
- data/spec/resource/journal_http_spec.rb +36 -0
- data/spec/resource/organisation_http_spec.rb +52 -0
- data/spec/resource/person_http_spec.rb +48 -0
- data/spec/resource/project_http_spec.rb +76 -0
- data/spec/resource/publication_http_spec.rb +78 -0
- data/spec/resource/publisher_http_spec.rb +26 -0
- data/spec/server_http_spec.rb +26 -0
- data/spec/spec_helper.rb +106 -21
- metadata +110 -46
- data/lib/puree/collection.rb +0 -285
- data/lib/puree/configuration.rb +0 -15
- data/lib/puree/dataset.rb +0 -483
- data/lib/puree/date.rb +0 -63
- data/lib/puree/download.rb +0 -189
- data/lib/puree/event.rb +0 -133
- data/lib/puree/journal.rb +0 -75
- data/lib/puree/map.rb +0 -68
- data/lib/puree/organisation.rb +0 -177
- data/lib/puree/person.rb +0 -136
- data/lib/puree/project.rb +0 -231
- data/lib/puree/publication.rb +0 -258
- data/lib/puree/publisher.rb +0 -64
- data/lib/puree/resource.rb +0 -261
- data/lib/puree/server.rb +0 -156
- data/spec/collection_spec.rb +0 -62
- data/spec/dataset_spec.rb +0 -148
- data/spec/download_spec.rb +0 -33
- data/spec/event_spec.rb +0 -108
- data/spec/journal_spec.rb +0 -92
- data/spec/organisation_spec.rb +0 -112
- data/spec/person_spec.rb +0 -104
- data/spec/project_spec.rb +0 -120
- data/spec/publication_spec.rb +0 -128
- data/spec/publisher_spec.rb +0 -89
- data/spec/server_spec.rb +0 -36
data/lib/puree/date.rb
DELETED
@@ -1,63 +0,0 @@
|
|
1
|
-
module Puree
|
2
|
-
|
3
|
-
# Date utilities
|
4
|
-
#
|
5
|
-
module Date
|
6
|
-
|
7
|
-
# Converts a date with three components (year, month, day) to ISO 8601 date format
|
8
|
-
#
|
9
|
-
# @param data [Hash]
|
10
|
-
# @return [String]
|
11
|
-
def self.iso(data)
|
12
|
-
iso_date = ''
|
13
|
-
year = data['year']
|
14
|
-
month = data['month']
|
15
|
-
day = data['day']
|
16
|
-
if !year.empty?
|
17
|
-
iso_date << year
|
18
|
-
else
|
19
|
-
iso_date
|
20
|
-
end
|
21
|
-
if !month.empty?
|
22
|
-
# Add leading zero to convert to ISO 8601 date component
|
23
|
-
if month.length < 2
|
24
|
-
month.insert(0, '0')
|
25
|
-
end
|
26
|
-
iso_date << '-' + month
|
27
|
-
else
|
28
|
-
iso_date
|
29
|
-
end
|
30
|
-
if !day.empty?
|
31
|
-
# Add leading zero to convert to ISO 8601 date component
|
32
|
-
if day.length < 2
|
33
|
-
day.insert(0, '0')
|
34
|
-
end
|
35
|
-
iso_date << '-' + day
|
36
|
-
end
|
37
|
-
iso_date
|
38
|
-
end
|
39
|
-
|
40
|
-
private
|
41
|
-
|
42
|
-
# Forces a date to have three components (year, month, day)
|
43
|
-
#
|
44
|
-
# @param data [Hash]
|
45
|
-
# @return [Hash]
|
46
|
-
def self.normalise(data)
|
47
|
-
if !data.nil? && !data.empty?
|
48
|
-
date = {}
|
49
|
-
year = data['year']
|
50
|
-
month = data['month']
|
51
|
-
day = data['day']
|
52
|
-
date['year'] = year ? year : ''
|
53
|
-
date['month'] = month ? month : ''
|
54
|
-
date['day'] = day ? day : ''
|
55
|
-
date
|
56
|
-
else
|
57
|
-
{}
|
58
|
-
end
|
59
|
-
end
|
60
|
-
|
61
|
-
end
|
62
|
-
|
63
|
-
end
|
data/lib/puree/download.rb
DELETED
@@ -1,189 +0,0 @@
|
|
1
|
-
module Puree
|
2
|
-
|
3
|
-
# Download
|
4
|
-
#
|
5
|
-
class Download
|
6
|
-
attr_reader :response
|
7
|
-
|
8
|
-
# @param base_url [String]
|
9
|
-
# @param username [String]
|
10
|
-
# @param password [String]
|
11
|
-
# @param basic_auth [Boolean]
|
12
|
-
def initialize(base_url: nil,
|
13
|
-
username: nil,
|
14
|
-
password: nil,
|
15
|
-
basic_auth: nil)
|
16
|
-
@resource_type = :download
|
17
|
-
@api_map = Puree::Map.new.get
|
18
|
-
@base_url = base_url.nil? ? Puree.base_url : base_url
|
19
|
-
@basic_auth = basic_auth.nil? ? Puree.basic_auth : basic_auth
|
20
|
-
if @basic_auth === true
|
21
|
-
@username = username.nil? ? Puree.username : username
|
22
|
-
@password = password.nil? ? Puree.password : password
|
23
|
-
end
|
24
|
-
end
|
25
|
-
|
26
|
-
# Get
|
27
|
-
#
|
28
|
-
# @param limit [Integer]
|
29
|
-
# @param offset [Integer]
|
30
|
-
# @param resource [Symbol]
|
31
|
-
# @return [Array<Hash>]
|
32
|
-
def get(limit: 20,
|
33
|
-
offset: 0,
|
34
|
-
resource: nil)
|
35
|
-
missing = missing_credentials
|
36
|
-
if !missing.empty?
|
37
|
-
missing.each do |m|
|
38
|
-
puts "#{self.class.name}" + '#' + "#{__method__} missing #{m}"
|
39
|
-
end
|
40
|
-
exit
|
41
|
-
end
|
42
|
-
|
43
|
-
# strip any trailing slash
|
44
|
-
@base_url = @base_url.sub(/(\/)+$/, '')
|
45
|
-
@auth = Base64::strict_encode64(@username + ':' + @password)
|
46
|
-
|
47
|
-
@options = {
|
48
|
-
basic_auth: @basic_auth,
|
49
|
-
latest_api: true,
|
50
|
-
resource_type: @resource_type.to_sym,
|
51
|
-
rendering: :system,
|
52
|
-
limit: limit,
|
53
|
-
offset: offset,
|
54
|
-
resource: resource.to_sym
|
55
|
-
}
|
56
|
-
headers = {
|
57
|
-
'Accept' => 'application/xml',
|
58
|
-
'Authorization' => 'Basic ' + @auth
|
59
|
-
}
|
60
|
-
query = {}
|
61
|
-
query['rendering'] = @options[:rendering]
|
62
|
-
|
63
|
-
if @options[:limit]
|
64
|
-
query['window.size'] = @options[:limit]
|
65
|
-
end
|
66
|
-
|
67
|
-
if @options[:offset]
|
68
|
-
query['window.offset'] = @options[:offset]
|
69
|
-
end
|
70
|
-
|
71
|
-
if @options[:resource]
|
72
|
-
query['contentType'] = service_family
|
73
|
-
end
|
74
|
-
|
75
|
-
begin
|
76
|
-
url = build_url
|
77
|
-
req = HTTP.headers accept: headers['Accept']
|
78
|
-
if @options[:basic_auth]
|
79
|
-
req = req.auth headers['Authorization']
|
80
|
-
end
|
81
|
-
@response = req.get(url, params: query)
|
82
|
-
@doc = Nokogiri::XML @response.body
|
83
|
-
@doc.remove_namespaces!
|
84
|
-
rescue HTTP::Error => e
|
85
|
-
puts 'HTTP::Error '+ e.message
|
86
|
-
end
|
87
|
-
|
88
|
-
get_data? ? combine_metadata : []
|
89
|
-
end
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
# All metadata
|
95
|
-
#
|
96
|
-
# @return [Array<Hash>]
|
97
|
-
def metadata
|
98
|
-
@metadata
|
99
|
-
end
|
100
|
-
|
101
|
-
|
102
|
-
private
|
103
|
-
|
104
|
-
|
105
|
-
def combine_metadata
|
106
|
-
@metadata = extract_statistic
|
107
|
-
end
|
108
|
-
|
109
|
-
# Is there any data after get?
|
110
|
-
#
|
111
|
-
# @return [Boolean]
|
112
|
-
def get_data?
|
113
|
-
# n.b. arbitrary element existence check
|
114
|
-
path = service_response_name + '/downloadCount'
|
115
|
-
xpath_result = @doc.xpath path
|
116
|
-
xpath_result.size ? true : false
|
117
|
-
end
|
118
|
-
|
119
|
-
# Statistic
|
120
|
-
#
|
121
|
-
# @return [Array<Hash>]
|
122
|
-
def extract_statistic
|
123
|
-
path = service_response_name + '/downloadCount'
|
124
|
-
xpath_result = xpath_query path
|
125
|
-
data_arr = []
|
126
|
-
xpath_result.each { |i|
|
127
|
-
data = {}
|
128
|
-
data['uuid'] = i.attr('uuid').strip
|
129
|
-
data['download'] = i.attr('downloads').strip
|
130
|
-
data_arr << data
|
131
|
-
}
|
132
|
-
data_arr.uniq
|
133
|
-
end
|
134
|
-
|
135
|
-
def service_family
|
136
|
-
resource_type = @options[:resource]
|
137
|
-
if @api_map[:resource_type].has_key? resource_type
|
138
|
-
@api_map[:resource_type][resource_type][:family]
|
139
|
-
else
|
140
|
-
puts "#{resource_type} is an unrecognised resource type"
|
141
|
-
exit
|
142
|
-
end
|
143
|
-
end
|
144
|
-
|
145
|
-
def service_name
|
146
|
-
resource_type = @options[:resource_type]
|
147
|
-
@api_map[:resource_type][resource_type][:service]
|
148
|
-
end
|
149
|
-
|
150
|
-
def service_response_name
|
151
|
-
resource_type = @options[:resource_type]
|
152
|
-
@api_map[:resource_type][resource_type][:response]
|
153
|
-
end
|
154
|
-
|
155
|
-
def build_url
|
156
|
-
service = service_name
|
157
|
-
if @options[:latest_api] === false
|
158
|
-
service_api_mode = service
|
159
|
-
else
|
160
|
-
service_api_mode = service + '.current'
|
161
|
-
end
|
162
|
-
@base_url + '/' + service_api_mode
|
163
|
-
end
|
164
|
-
|
165
|
-
def xpath_query(path)
|
166
|
-
xml = @response.body
|
167
|
-
doc = Nokogiri::XML xml
|
168
|
-
doc.remove_namespaces!
|
169
|
-
doc.xpath path
|
170
|
-
end
|
171
|
-
|
172
|
-
def missing_credentials
|
173
|
-
missing = []
|
174
|
-
if @base_url.nil?
|
175
|
-
missing << 'base_url'
|
176
|
-
end
|
177
|
-
if @username.nil?
|
178
|
-
missing << 'username'
|
179
|
-
end
|
180
|
-
if @password.nil?
|
181
|
-
missing << 'password'
|
182
|
-
end
|
183
|
-
missing
|
184
|
-
end
|
185
|
-
|
186
|
-
alias :find :get
|
187
|
-
|
188
|
-
end
|
189
|
-
end
|
data/lib/puree/event.rb
DELETED
@@ -1,133 +0,0 @@
|
|
1
|
-
module Puree
|
2
|
-
|
3
|
-
# Event resource
|
4
|
-
#
|
5
|
-
class Event < Resource
|
6
|
-
|
7
|
-
# @param base_url [String]
|
8
|
-
# @param username [String]
|
9
|
-
# @param password [String]
|
10
|
-
# @param basic_auth [Boolean]
|
11
|
-
def initialize(base_url: nil, username: nil, password: nil, basic_auth: nil)
|
12
|
-
super(api: :event,
|
13
|
-
base_url: base_url,
|
14
|
-
username: username,
|
15
|
-
password: password,
|
16
|
-
basic_auth: basic_auth)
|
17
|
-
end
|
18
|
-
|
19
|
-
|
20
|
-
# City
|
21
|
-
#
|
22
|
-
# @return [String]
|
23
|
-
def city
|
24
|
-
@metadata['city']
|
25
|
-
end
|
26
|
-
|
27
|
-
# Country
|
28
|
-
#
|
29
|
-
# @return [String]
|
30
|
-
def country
|
31
|
-
@metadata['country']
|
32
|
-
end
|
33
|
-
|
34
|
-
# Date
|
35
|
-
#
|
36
|
-
# @return [Hash]
|
37
|
-
def date
|
38
|
-
@metadata['date']
|
39
|
-
end
|
40
|
-
|
41
|
-
# Description
|
42
|
-
#
|
43
|
-
# @return [String]
|
44
|
-
def description
|
45
|
-
@metadata['description']
|
46
|
-
end
|
47
|
-
|
48
|
-
# Location
|
49
|
-
#
|
50
|
-
# @return [String]
|
51
|
-
def location
|
52
|
-
@metadata['location']
|
53
|
-
end
|
54
|
-
|
55
|
-
# Title
|
56
|
-
#
|
57
|
-
# @return [String]
|
58
|
-
def title
|
59
|
-
@metadata['title']
|
60
|
-
end
|
61
|
-
|
62
|
-
# Type
|
63
|
-
#
|
64
|
-
# @return [String]
|
65
|
-
def type
|
66
|
-
@metadata['type']
|
67
|
-
end
|
68
|
-
|
69
|
-
# All metadata
|
70
|
-
#
|
71
|
-
# @return [Hash]
|
72
|
-
def metadata
|
73
|
-
@metadata
|
74
|
-
end
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
private
|
79
|
-
|
80
|
-
def extract_city
|
81
|
-
path = '/city'
|
82
|
-
xpath_query(path).text.strip
|
83
|
-
end
|
84
|
-
|
85
|
-
def extract_country
|
86
|
-
path = '/country/term/localizedString'
|
87
|
-
xpath_query(path).text.strip
|
88
|
-
end
|
89
|
-
|
90
|
-
def extract_date
|
91
|
-
data = {}
|
92
|
-
path = '/dateRange'
|
93
|
-
range = xpath_query path
|
94
|
-
data['start'] = range.xpath('startDate').text.strip
|
95
|
-
data['end'] = range.xpath('startDate').text.strip
|
96
|
-
data
|
97
|
-
end
|
98
|
-
|
99
|
-
def extract_description
|
100
|
-
path = '/description'
|
101
|
-
xpath_query(path).text.strip
|
102
|
-
end
|
103
|
-
|
104
|
-
def extract_location
|
105
|
-
path = '/location'
|
106
|
-
xpath_query(path).text.strip
|
107
|
-
end
|
108
|
-
|
109
|
-
def extract_title
|
110
|
-
path = '/title/localizedString'
|
111
|
-
xpath_query_for_single_value path
|
112
|
-
end
|
113
|
-
|
114
|
-
def extract_type
|
115
|
-
path = '//typeClassification/term/localizedString'
|
116
|
-
xpath_query_for_single_value path
|
117
|
-
end
|
118
|
-
|
119
|
-
def combine_metadata
|
120
|
-
o = super
|
121
|
-
o['city'] = extract_city
|
122
|
-
o['country'] = extract_country
|
123
|
-
o['date'] = extract_date
|
124
|
-
o['description'] = extract_description
|
125
|
-
o['location'] = extract_location
|
126
|
-
o['title'] = extract_title
|
127
|
-
o['type'] = extract_type
|
128
|
-
@metadata = o
|
129
|
-
end
|
130
|
-
|
131
|
-
end
|
132
|
-
|
133
|
-
end
|
data/lib/puree/journal.rb
DELETED
@@ -1,75 +0,0 @@
|
|
1
|
-
module Puree
|
2
|
-
|
3
|
-
# Journal resource
|
4
|
-
#
|
5
|
-
class Journal < Resource
|
6
|
-
|
7
|
-
# @param base_url [String]
|
8
|
-
# @param username [String]
|
9
|
-
# @param password [String]
|
10
|
-
# @param basic_auth [Boolean]
|
11
|
-
def initialize(base_url: nil, username: nil, password: nil, basic_auth: nil)
|
12
|
-
super(api: :journal,
|
13
|
-
base_url: base_url,
|
14
|
-
username: username,
|
15
|
-
password: password,
|
16
|
-
basic_auth: basic_auth)
|
17
|
-
end
|
18
|
-
|
19
|
-
# ISSN
|
20
|
-
#
|
21
|
-
# @return [String]
|
22
|
-
def issn
|
23
|
-
@metadata['issn']
|
24
|
-
end
|
25
|
-
|
26
|
-
# Publisher
|
27
|
-
#
|
28
|
-
# @return [String]
|
29
|
-
def publisher
|
30
|
-
@metadata['publisher']
|
31
|
-
end
|
32
|
-
|
33
|
-
# Title
|
34
|
-
#
|
35
|
-
# @return [String]
|
36
|
-
def title
|
37
|
-
@metadata['title']
|
38
|
-
end
|
39
|
-
|
40
|
-
# All metadata
|
41
|
-
#
|
42
|
-
# @return [Hash]
|
43
|
-
def metadata
|
44
|
-
@metadata
|
45
|
-
end
|
46
|
-
|
47
|
-
|
48
|
-
private
|
49
|
-
|
50
|
-
def extract_issn
|
51
|
-
path = '/issns/issn/string'
|
52
|
-
xpath_query_for_single_value path
|
53
|
-
end
|
54
|
-
|
55
|
-
def extract_publisher
|
56
|
-
path = '/publisher/name'
|
57
|
-
xpath_query_for_single_value path
|
58
|
-
end
|
59
|
-
|
60
|
-
def extract_title
|
61
|
-
path = '/titles/title/string'
|
62
|
-
xpath_query_for_single_value path
|
63
|
-
end
|
64
|
-
|
65
|
-
def combine_metadata
|
66
|
-
o = super
|
67
|
-
o['issn'] = extract_issn
|
68
|
-
o['publisher'] = extract_publisher
|
69
|
-
o['title'] = extract_title
|
70
|
-
@metadata = o
|
71
|
-
end
|
72
|
-
|
73
|
-
end
|
74
|
-
|
75
|
-
end
|