opener-webservice 2.0.0 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +135 -17
- data/lib/opener/webservice/configuration.rb +90 -0
- data/lib/opener/webservice/error_handler.rb +29 -0
- data/lib/opener/webservice/input_extractor.rb +43 -0
- data/lib/opener/webservice/input_sanitizer.rb +65 -0
- data/lib/opener/webservice/option_parser.rb +175 -0
- data/lib/opener/webservice/server.rb +297 -0
- data/lib/opener/webservice/uploader.rb +50 -0
- data/lib/opener/webservice/version.rb +4 -6
- data/lib/opener/webservice.rb +18 -417
- data/opener-webservice.gemspec +18 -10
- metadata +109 -5
- data/lib/opener/webservice/opt_parser.rb +0 -103
@@ -0,0 +1,297 @@
|
|
1
|
+
module Opener
|
2
|
+
module Webservice
|
3
|
+
##
|
4
|
+
# The meat of the webservices: the actual Sinatra application. Components
|
5
|
+
# should extend this class and configure it (e.g. to specify what component
|
6
|
+
# class to use).
|
7
|
+
#
|
8
|
+
class Server < Sinatra::Base
|
9
|
+
##
|
10
|
+
# List of fields that can contain input to process.
|
11
|
+
#
|
12
|
+
# @return [Array]
|
13
|
+
#
|
14
|
+
INPUT_FIELDS = %w{input input_url}
|
15
|
+
|
16
|
+
##
|
17
|
+
# Sets the accepted component parameters. Parameter names are always
|
18
|
+
# stored as symbols.
|
19
|
+
#
|
20
|
+
# @param [Array] params
|
21
|
+
#
|
22
|
+
def self.accepted_params=(params)
|
23
|
+
@accepted_params = params.map(&:to_sym)
|
24
|
+
end
|
25
|
+
|
26
|
+
##
|
27
|
+
# Returns the accepted component parameters.
|
28
|
+
#
|
29
|
+
# @return [Array]
|
30
|
+
#
|
31
|
+
def self.accepted_params
|
32
|
+
return @accepted_params ||= []
|
33
|
+
end
|
34
|
+
|
35
|
+
##
|
36
|
+
# Sets the text processor to use.
|
37
|
+
#
|
38
|
+
# @param [Class] processor
|
39
|
+
#
|
40
|
+
def self.text_processor=(processor)
|
41
|
+
@text_processor = processor
|
42
|
+
end
|
43
|
+
|
44
|
+
##
|
45
|
+
# Returns the text processor to use.
|
46
|
+
#
|
47
|
+
# @return [Class]
|
48
|
+
#
|
49
|
+
def self.text_processor
|
50
|
+
return @text_processor
|
51
|
+
end
|
52
|
+
|
53
|
+
configure :production do
|
54
|
+
set :raise_errors, false
|
55
|
+
set :dump_errors, false
|
56
|
+
end
|
57
|
+
|
58
|
+
error do
|
59
|
+
Rollbar.report_exception(env['sinatra.error'])
|
60
|
+
|
61
|
+
halt(
|
62
|
+
500,
|
63
|
+
'An error occurred. A team of garden gnomes has been dispatched to ' \
|
64
|
+
'look into the problem.',
|
65
|
+
)
|
66
|
+
end
|
67
|
+
|
68
|
+
# Require authentication for non static files if authentication is
|
69
|
+
# enabled.
|
70
|
+
before %r{^((?!.css|.jpg|.png|.js|.ico).)+$} do
|
71
|
+
authenticate! if Configuration.authentication?
|
72
|
+
end
|
73
|
+
|
74
|
+
##
|
75
|
+
# Shows a form that allows users to submit data directly from their
|
76
|
+
# browser.
|
77
|
+
#
|
78
|
+
get '/' do
|
79
|
+
erb :index
|
80
|
+
end
|
81
|
+
|
82
|
+
##
|
83
|
+
# Processes the input using a component.
|
84
|
+
#
|
85
|
+
# Data can be submitted in two ways:
|
86
|
+
#
|
87
|
+
# 1. As regular POST fields
|
88
|
+
# 2. A single JSON object as the POST body
|
89
|
+
#
|
90
|
+
# When submitting data, you can use the following fields (either as POST
|
91
|
+
# fields or as the fields of a JSON object):
|
92
|
+
#
|
93
|
+
# | Field | Description |
|
94
|
+
# |:---------------|:--------------------------------------------|
|
95
|
+
# | input | The raw input text/KAF to process |
|
96
|
+
# | input_url | A URL to a document to download and process |
|
97
|
+
# | callbacks | An array of callback URLs |
|
98
|
+
# | error_callback | A URL to submit errors to |
|
99
|
+
# | request_id | A unique ID to associate with the document |
|
100
|
+
# | metadata | A custom metadata object to store in S3 |
|
101
|
+
#
|
102
|
+
# In case of a JSON object the input body would look something like the
|
103
|
+
# following:
|
104
|
+
#
|
105
|
+
# {"input": "Hello world, this is....", request_id: "123abc"}
|
106
|
+
#
|
107
|
+
post '/' do
|
108
|
+
if json_input?
|
109
|
+
options = params_from_json
|
110
|
+
else
|
111
|
+
options = params
|
112
|
+
end
|
113
|
+
|
114
|
+
options = InputSanitizer.new.prepare_parameters(options)
|
115
|
+
has_input = false
|
116
|
+
|
117
|
+
INPUT_FIELDS.each do |field|
|
118
|
+
if options[field] and !options[field].empty?
|
119
|
+
has_input = true
|
120
|
+
|
121
|
+
break
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
unless has_input
|
126
|
+
halt(400, 'No input specified in the "input" or "input_url" field')
|
127
|
+
end
|
128
|
+
|
129
|
+
if options['callbacks'] and !options['callbacks'].empty?
|
130
|
+
process_async(options)
|
131
|
+
else
|
132
|
+
process_sync(options)
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
##
|
137
|
+
# Processes a request synchronously, results are sent as the response upon
|
138
|
+
# completion.
|
139
|
+
#
|
140
|
+
# @param [Hash] options
|
141
|
+
# @return [String]
|
142
|
+
#
|
143
|
+
def process_sync(options)
|
144
|
+
output, ctype = analyze(options)
|
145
|
+
|
146
|
+
content_type(ctype)
|
147
|
+
|
148
|
+
return output
|
149
|
+
end
|
150
|
+
|
151
|
+
##
|
152
|
+
# Processes a request asynchronously, results are submitted to the next
|
153
|
+
# callback URL.
|
154
|
+
#
|
155
|
+
# @param [Hash] options
|
156
|
+
# @return [Hash]
|
157
|
+
#
|
158
|
+
def process_async(options)
|
159
|
+
request_id = options['request_id'] || SecureRandom.hex
|
160
|
+
final_url = options['callbacks'].last
|
161
|
+
|
162
|
+
async { analyze_async(options, request_id) }
|
163
|
+
|
164
|
+
content_type :json
|
165
|
+
|
166
|
+
return JSON.dump(
|
167
|
+
:request_id => request_id,
|
168
|
+
:output_url => "#{final_url}/#{request_id}"
|
169
|
+
)
|
170
|
+
end
|
171
|
+
|
172
|
+
##
|
173
|
+
# Analyzes the input and returns an Array containing the output and
|
174
|
+
# content type.
|
175
|
+
#
|
176
|
+
# @param [Hash] options
|
177
|
+
# @return [Array]
|
178
|
+
#
|
179
|
+
def analyze(options)
|
180
|
+
comp_options = InputSanitizer.new.whitelist_options(
|
181
|
+
options,
|
182
|
+
self.class.accepted_params
|
183
|
+
)
|
184
|
+
|
185
|
+
input = InputExtractor.new.extract(options)
|
186
|
+
processor = self.class.text_processor.new(comp_options)
|
187
|
+
output = processor.run(input)
|
188
|
+
|
189
|
+
if processor.respond_to?(:output_type)
|
190
|
+
type = processor.output_type
|
191
|
+
else
|
192
|
+
type = :xml
|
193
|
+
end
|
194
|
+
|
195
|
+
return output, type
|
196
|
+
end
|
197
|
+
|
198
|
+
##
|
199
|
+
# Analyzes the input asynchronously.
|
200
|
+
#
|
201
|
+
# @param [Hash] options
|
202
|
+
# @param [String] request_id
|
203
|
+
#
|
204
|
+
def analyze_async(options, request_id)
|
205
|
+
output, _ = analyze(options)
|
206
|
+
|
207
|
+
submit_output(output, request_id, options)
|
208
|
+
|
209
|
+
# Submit the error to the error callback, re-raise so Rollbar can also
|
210
|
+
# report it.
|
211
|
+
rescue Exception => error
|
212
|
+
ErrorHandler.new.submit(error, request_id) if options['error_callback']
|
213
|
+
|
214
|
+
raise error
|
215
|
+
end
|
216
|
+
|
217
|
+
##
|
218
|
+
# Submits the output to the next callback URL.
|
219
|
+
#
|
220
|
+
# @param [String] output
|
221
|
+
# @param [String] request_id
|
222
|
+
# @param [Hash] options
|
223
|
+
#
|
224
|
+
def submit_output(output, request_id, options)
|
225
|
+
callbacks = options['callbacks'].dup
|
226
|
+
next_url = callbacks.shift
|
227
|
+
|
228
|
+
# Re-use the old payload so that any extra data (e.g. metadata) is kept
|
229
|
+
# in place.
|
230
|
+
new_payload = options.merge(
|
231
|
+
'callbacks' => callbacks,
|
232
|
+
'request_id' => request_id
|
233
|
+
)
|
234
|
+
|
235
|
+
# Make sure we don't re-send this to the next component.
|
236
|
+
new_payload.delete('input')
|
237
|
+
|
238
|
+
if Configuration.output_bucket
|
239
|
+
uploader = Uploader.new
|
240
|
+
object = uploader.upload(request_id, output, options['metadata'])
|
241
|
+
|
242
|
+
new_payload['input_url'] = object.url_for(:read, :expires => 3600)
|
243
|
+
else
|
244
|
+
new_payload['input'] = output
|
245
|
+
end
|
246
|
+
|
247
|
+
CallbackHandler.new.post(next_url, new_payload)
|
248
|
+
end
|
249
|
+
|
250
|
+
##
|
251
|
+
# Returns a Hash containing the parameters from a JSON payload. The keys
|
252
|
+
# of this Hash are returned as _strings_ to prevent Symbol DOS attacks.
|
253
|
+
#
|
254
|
+
# @return [Hash]
|
255
|
+
#
|
256
|
+
def params_from_json
|
257
|
+
return JSON.load(request.body.read)
|
258
|
+
end
|
259
|
+
|
260
|
+
##
|
261
|
+
# Returns `true` if the input data is in JSON, false otherwise
|
262
|
+
#
|
263
|
+
# @return [TrueClass|FalseClass]
|
264
|
+
#
|
265
|
+
def json_input?
|
266
|
+
return request.content_type == 'application/json'
|
267
|
+
end
|
268
|
+
|
269
|
+
##
|
270
|
+
# Authenticates the current request.
|
271
|
+
#
|
272
|
+
def authenticate!
|
273
|
+
token = Configuration.authentication_token
|
274
|
+
secret = Configuration.authentication_secret
|
275
|
+
creds = {token => params[token], secret => params[secret]}
|
276
|
+
|
277
|
+
response = HTTPClient.get(Configuration.authentication_endpoint, creds)
|
278
|
+
|
279
|
+
unless response.ok?
|
280
|
+
halt(403, "Authentication failed: #{response.body}")
|
281
|
+
end
|
282
|
+
end
|
283
|
+
|
284
|
+
##
|
285
|
+
# Runs the block in a separate thread. When running a test environment the
|
286
|
+
# block is instead yielded normally.
|
287
|
+
#
|
288
|
+
def async
|
289
|
+
if self.class.environment == :test
|
290
|
+
yield
|
291
|
+
else
|
292
|
+
Thread.new { yield }
|
293
|
+
end
|
294
|
+
end
|
295
|
+
end # Server
|
296
|
+
end # Webservice
|
297
|
+
end # Opener
|
@@ -0,0 +1,50 @@
|
|
1
|
+
module Opener
|
2
|
+
module Webservice
|
3
|
+
##
|
4
|
+
# Class for uploading KAF documents to Amazon S3.
|
5
|
+
#
|
6
|
+
class Uploader
|
7
|
+
##
|
8
|
+
# Uploads the given KAF document.
|
9
|
+
#
|
10
|
+
# @param [String] identifier
|
11
|
+
# @param [String] document
|
12
|
+
# @param [Hash] metadata description
|
13
|
+
#
|
14
|
+
# @return [AWS::S3::S3Object]
|
15
|
+
#
|
16
|
+
def upload(identifier, document, metadata = {})
|
17
|
+
object = create(
|
18
|
+
"#{identifier}.xml",
|
19
|
+
document,
|
20
|
+
:metadata => metadata,
|
21
|
+
:content_type => 'application/xml'
|
22
|
+
)
|
23
|
+
|
24
|
+
return object
|
25
|
+
end
|
26
|
+
|
27
|
+
##
|
28
|
+
# @param [Array] args
|
29
|
+
# @return [AWS::S3::S3Object]
|
30
|
+
#
|
31
|
+
def create(*args)
|
32
|
+
return bucket.objects.create(*args)
|
33
|
+
end
|
34
|
+
|
35
|
+
##
|
36
|
+
# @return [AWS::S3.new]
|
37
|
+
#
|
38
|
+
def s3
|
39
|
+
return @s3 ||= AWS::S3.new
|
40
|
+
end
|
41
|
+
|
42
|
+
##
|
43
|
+
# @return [AWS::S3::Bucket]
|
44
|
+
#
|
45
|
+
def bucket
|
46
|
+
return @bucket ||= s3.buckets[Configuration.output_bucket]
|
47
|
+
end
|
48
|
+
end # Uploader
|
49
|
+
end # Daemons
|
50
|
+
end # Opener
|