opener-webservice 2.0.0 → 2.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,297 @@
1
+ module Opener
2
+ module Webservice
3
+ ##
4
+ # The meat of the webservices: the actual Sinatra application. Components
5
+ # should extend this class and configure it (e.g. to specify what component
6
+ # class to use).
7
+ #
8
+ class Server < Sinatra::Base
9
+ ##
10
+ # List of fields that can contain input to process.
11
+ #
12
+ # @return [Array]
13
+ #
14
+ INPUT_FIELDS = %w{input input_url}
15
+
16
+ ##
17
+ # Sets the accepted component parameters. Parameter names are always
18
+ # stored as symbols.
19
+ #
20
+ # @param [Array] params
21
+ #
22
+ def self.accepted_params=(params)
23
+ @accepted_params = params.map(&:to_sym)
24
+ end
25
+
26
+ ##
27
+ # Returns the accepted component parameters.
28
+ #
29
+ # @return [Array]
30
+ #
31
+ def self.accepted_params
32
+ return @accepted_params ||= []
33
+ end
34
+
35
+ ##
36
+ # Sets the text processor to use.
37
+ #
38
+ # @param [Class] processor
39
+ #
40
+ def self.text_processor=(processor)
41
+ @text_processor = processor
42
+ end
43
+
44
+ ##
45
+ # Returns the text processor to use.
46
+ #
47
+ # @return [Class]
48
+ #
49
+ def self.text_processor
50
+ return @text_processor
51
+ end
52
+
53
+ configure :production do
54
+ set :raise_errors, false
55
+ set :dump_errors, false
56
+ end
57
+
58
+ error do
59
+ Rollbar.report_exception(env['sinatra.error'])
60
+
61
+ halt(
62
+ 500,
63
+ 'An error occurred. A team of garden gnomes has been dispatched to ' \
64
+ 'look into the problem.',
65
+ )
66
+ end
67
+
68
+ # Require authentication for non static files if authentication is
69
+ # enabled.
70
+ before %r{^((?!.css|.jpg|.png|.js|.ico).)+$} do
71
+ authenticate! if Configuration.authentication?
72
+ end
73
+
74
+ ##
75
+ # Shows a form that allows users to submit data directly from their
76
+ # browser.
77
+ #
78
+ get '/' do
79
+ erb :index
80
+ end
81
+
82
+ ##
83
+ # Processes the input using a component.
84
+ #
85
+ # Data can be submitted in two ways:
86
+ #
87
+ # 1. As regular POST fields
88
+ # 2. A single JSON object as the POST body
89
+ #
90
+ # When submitting data, you can use the following fields (either as POST
91
+ # fields or as the fields of a JSON object):
92
+ #
93
+ # | Field | Description |
94
+ # |:---------------|:--------------------------------------------|
95
+ # | input | The raw input text/KAF to process |
96
+ # | input_url | A URL to a document to download and process |
97
+ # | callbacks | An array of callback URLs |
98
+ # | error_callback | A URL to submit errors to |
99
+ # | request_id | A unique ID to associate with the document |
100
+ # | metadata | A custom metadata object to store in S3 |
101
+ #
102
+ # In case of a JSON object the input body would look something like the
103
+ # following:
104
+ #
105
+ # {"input": "Hello world, this is....", request_id: "123abc"}
106
+ #
107
+ post '/' do
108
+ if json_input?
109
+ options = params_from_json
110
+ else
111
+ options = params
112
+ end
113
+
114
+ options = InputSanitizer.new.prepare_parameters(options)
115
+ has_input = false
116
+
117
+ INPUT_FIELDS.each do |field|
118
+ if options[field] and !options[field].empty?
119
+ has_input = true
120
+
121
+ break
122
+ end
123
+ end
124
+
125
+ unless has_input
126
+ halt(400, 'No input specified in the "input" or "input_url" field')
127
+ end
128
+
129
+ if options['callbacks'] and !options['callbacks'].empty?
130
+ process_async(options)
131
+ else
132
+ process_sync(options)
133
+ end
134
+ end
135
+
136
+ ##
137
+ # Processes a request synchronously, results are sent as the response upon
138
+ # completion.
139
+ #
140
+ # @param [Hash] options
141
+ # @return [String]
142
+ #
143
+ def process_sync(options)
144
+ output, ctype = analyze(options)
145
+
146
+ content_type(ctype)
147
+
148
+ return output
149
+ end
150
+
151
+ ##
152
+ # Processes a request asynchronously, results are submitted to the next
153
+ # callback URL.
154
+ #
155
+ # @param [Hash] options
156
+ # @return [Hash]
157
+ #
158
+ def process_async(options)
159
+ request_id = options['request_id'] || SecureRandom.hex
160
+ final_url = options['callbacks'].last
161
+
162
+ async { analyze_async(options, request_id) }
163
+
164
+ content_type :json
165
+
166
+ return JSON.dump(
167
+ :request_id => request_id,
168
+ :output_url => "#{final_url}/#{request_id}"
169
+ )
170
+ end
171
+
172
+ ##
173
+ # Analyzes the input and returns an Array containing the output and
174
+ # content type.
175
+ #
176
+ # @param [Hash] options
177
+ # @return [Array]
178
+ #
179
+ def analyze(options)
180
+ comp_options = InputSanitizer.new.whitelist_options(
181
+ options,
182
+ self.class.accepted_params
183
+ )
184
+
185
+ input = InputExtractor.new.extract(options)
186
+ processor = self.class.text_processor.new(comp_options)
187
+ output = processor.run(input)
188
+
189
+ if processor.respond_to?(:output_type)
190
+ type = processor.output_type
191
+ else
192
+ type = :xml
193
+ end
194
+
195
+ return output, type
196
+ end
197
+
198
+ ##
199
+ # Analyzes the input asynchronously.
200
+ #
201
+ # @param [Hash] options
202
+ # @param [String] request_id
203
+ #
204
+ def analyze_async(options, request_id)
205
+ output, _ = analyze(options)
206
+
207
+ submit_output(output, request_id, options)
208
+
209
+ # Submit the error to the error callback, re-raise so Rollbar can also
210
+ # report it.
211
+ rescue Exception => error
212
+ ErrorHandler.new.submit(error, request_id) if options['error_callback']
213
+
214
+ raise error
215
+ end
216
+
217
+ ##
218
+ # Submits the output to the next callback URL.
219
+ #
220
+ # @param [String] output
221
+ # @param [String] request_id
222
+ # @param [Hash] options
223
+ #
224
+ def submit_output(output, request_id, options)
225
+ callbacks = options['callbacks'].dup
226
+ next_url = callbacks.shift
227
+
228
+ # Re-use the old payload so that any extra data (e.g. metadata) is kept
229
+ # in place.
230
+ new_payload = options.merge(
231
+ 'callbacks' => callbacks,
232
+ 'request_id' => request_id
233
+ )
234
+
235
+ # Make sure we don't re-send this to the next component.
236
+ new_payload.delete('input')
237
+
238
+ if Configuration.output_bucket
239
+ uploader = Uploader.new
240
+ object = uploader.upload(request_id, output, options['metadata'])
241
+
242
+ new_payload['input_url'] = object.url_for(:read, :expires => 3600)
243
+ else
244
+ new_payload['input'] = output
245
+ end
246
+
247
+ CallbackHandler.new.post(next_url, new_payload)
248
+ end
249
+
250
+ ##
251
+ # Returns a Hash containing the parameters from a JSON payload. The keys
252
+ # of this Hash are returned as _strings_ to prevent Symbol DOS attacks.
253
+ #
254
+ # @return [Hash]
255
+ #
256
+ def params_from_json
257
+ return JSON.load(request.body.read)
258
+ end
259
+
260
+ ##
261
+ # Returns `true` if the input data is in JSON, false otherwise
262
+ #
263
+ # @return [TrueClass|FalseClass]
264
+ #
265
+ def json_input?
266
+ return request.content_type == 'application/json'
267
+ end
268
+
269
+ ##
270
+ # Authenticates the current request.
271
+ #
272
+ def authenticate!
273
+ token = Configuration.authentication_token
274
+ secret = Configuration.authentication_secret
275
+ creds = {token => params[token], secret => params[secret]}
276
+
277
+ response = HTTPClient.get(Configuration.authentication_endpoint, creds)
278
+
279
+ unless response.ok?
280
+ halt(403, "Authentication failed: #{response.body}")
281
+ end
282
+ end
283
+
284
+ ##
285
+ # Runs the block in a separate thread. When running a test environment the
286
+ # block is instead yielded normally.
287
+ #
288
+ def async
289
+ if self.class.environment == :test
290
+ yield
291
+ else
292
+ Thread.new { yield }
293
+ end
294
+ end
295
+ end # Server
296
+ end # Webservice
297
+ end # Opener
@@ -0,0 +1,50 @@
1
+ module Opener
2
+ module Webservice
3
+ ##
4
+ # Class for uploading KAF documents to Amazon S3.
5
+ #
6
+ class Uploader
7
+ ##
8
+ # Uploads the given KAF document.
9
+ #
10
+ # @param [String] identifier
11
+ # @param [String] document
12
+ # @param [Hash] metadata description
13
+ #
14
+ # @return [AWS::S3::S3Object]
15
+ #
16
+ def upload(identifier, document, metadata = {})
17
+ object = create(
18
+ "#{identifier}.xml",
19
+ document,
20
+ :metadata => metadata,
21
+ :content_type => 'application/xml'
22
+ )
23
+
24
+ return object
25
+ end
26
+
27
+ ##
28
+ # @param [Array] args
29
+ # @return [AWS::S3::S3Object]
30
+ #
31
+ def create(*args)
32
+ return bucket.objects.create(*args)
33
+ end
34
+
35
+ ##
36
+ # @return [AWS::S3.new]
37
+ #
38
+ def s3
39
+ return @s3 ||= AWS::S3.new
40
+ end
41
+
42
+ ##
43
+ # @return [AWS::S3::Bucket]
44
+ #
45
+ def bucket
46
+ return @bucket ||= s3.buckets[Configuration.output_bucket]
47
+ end
48
+ end # Uploader
49
+ end # Daemons
50
+ end # Opener
@@ -1,7 +1,5 @@
1
- require 'sinatra/base'
2
-
3
1
  module Opener
4
- class Webservice < Sinatra::Base
5
- VERSION = "2.0.0"
6
- end
7
- end
2
+ module Webservice
3
+ VERSION = '2.1.0'
4
+ end # Webservice
5
+ end # Opener