opener-webservice 2.0.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,297 @@
1
+ module Opener
2
+ module Webservice
3
+ ##
4
+ # The meat of the webservices: the actual Sinatra application. Components
5
+ # should extend this class and configure it (e.g. to specify what component
6
+ # class to use).
7
+ #
8
+ class Server < Sinatra::Base
9
+ ##
10
+ # List of fields that can contain input to process.
11
+ #
12
+ # @return [Array]
13
+ #
14
+ INPUT_FIELDS = %w{input input_url}
15
+
16
+ ##
17
+ # Sets the accepted component parameters. Parameter names are always
18
+ # stored as symbols.
19
+ #
20
+ # @param [Array] params
21
+ #
22
+ def self.accepted_params=(params)
23
+ @accepted_params = params.map(&:to_sym)
24
+ end
25
+
26
+ ##
27
+ # Returns the accepted component parameters.
28
+ #
29
+ # @return [Array]
30
+ #
31
+ def self.accepted_params
32
+ return @accepted_params ||= []
33
+ end
34
+
35
+ ##
36
+ # Sets the text processor to use.
37
+ #
38
+ # @param [Class] processor
39
+ #
40
+ def self.text_processor=(processor)
41
+ @text_processor = processor
42
+ end
43
+
44
+ ##
45
+ # Returns the text processor to use.
46
+ #
47
+ # @return [Class]
48
+ #
49
+ def self.text_processor
50
+ return @text_processor
51
+ end
52
+
53
+ configure :production do
54
+ set :raise_errors, false
55
+ set :dump_errors, false
56
+ end
57
+
58
+ error do
59
+ Rollbar.report_exception(env['sinatra.error'])
60
+
61
+ halt(
62
+ 500,
63
+ 'An error occurred. A team of garden gnomes has been dispatched to ' \
64
+ 'look into the problem.',
65
+ )
66
+ end
67
+
68
+ # Require authentication for non static files if authentication is
69
+ # enabled.
70
+ before %r{^((?!.css|.jpg|.png|.js|.ico).)+$} do
71
+ authenticate! if Configuration.authentication?
72
+ end
73
+
74
+ ##
75
+ # Shows a form that allows users to submit data directly from their
76
+ # browser.
77
+ #
78
+ get '/' do
79
+ erb :index
80
+ end
81
+
82
+ ##
83
+ # Processes the input using a component.
84
+ #
85
+ # Data can be submitted in two ways:
86
+ #
87
+ # 1. As regular POST fields
88
+ # 2. A single JSON object as the POST body
89
+ #
90
+ # When submitting data, you can use the following fields (either as POST
91
+ # fields or as the fields of a JSON object):
92
+ #
93
+ # | Field | Description |
94
+ # |:---------------|:--------------------------------------------|
95
+ # | input | The raw input text/KAF to process |
96
+ # | input_url | A URL to a document to download and process |
97
+ # | callbacks | An array of callback URLs |
98
+ # | error_callback | A URL to submit errors to |
99
+ # | request_id | A unique ID to associate with the document |
100
+ # | metadata | A custom metadata object to store in S3 |
101
+ #
102
+ # In case of a JSON object the input body would look something like the
103
+ # following:
104
+ #
105
+ # {"input": "Hello world, this is....", request_id: "123abc"}
106
+ #
107
+ post '/' do
108
+ if json_input?
109
+ options = params_from_json
110
+ else
111
+ options = params
112
+ end
113
+
114
+ options = InputSanitizer.new.prepare_parameters(options)
115
+ has_input = false
116
+
117
+ INPUT_FIELDS.each do |field|
118
+ if options[field] and !options[field].empty?
119
+ has_input = true
120
+
121
+ break
122
+ end
123
+ end
124
+
125
+ unless has_input
126
+ halt(400, 'No input specified in the "input" or "input_url" field')
127
+ end
128
+
129
+ if options['callbacks'] and !options['callbacks'].empty?
130
+ process_async(options)
131
+ else
132
+ process_sync(options)
133
+ end
134
+ end
135
+
136
+ ##
137
+ # Processes a request synchronously, results are sent as the response upon
138
+ # completion.
139
+ #
140
+ # @param [Hash] options
141
+ # @return [String]
142
+ #
143
+ def process_sync(options)
144
+ output, ctype = analyze(options)
145
+
146
+ content_type(ctype)
147
+
148
+ return output
149
+ end
150
+
151
+ ##
152
+ # Processes a request asynchronously, results are submitted to the next
153
+ # callback URL.
154
+ #
155
+ # @param [Hash] options
156
+ # @return [Hash]
157
+ #
158
+ def process_async(options)
159
+ request_id = options['request_id'] || SecureRandom.hex
160
+ final_url = options['callbacks'].last
161
+
162
+ async { analyze_async(options, request_id) }
163
+
164
+ content_type :json
165
+
166
+ return JSON.dump(
167
+ :request_id => request_id,
168
+ :output_url => "#{final_url}/#{request_id}"
169
+ )
170
+ end
171
+
172
+ ##
173
+ # Analyzes the input and returns an Array containing the output and
174
+ # content type.
175
+ #
176
+ # @param [Hash] options
177
+ # @return [Array]
178
+ #
179
+ def analyze(options)
180
+ comp_options = InputSanitizer.new.whitelist_options(
181
+ options,
182
+ self.class.accepted_params
183
+ )
184
+
185
+ input = InputExtractor.new.extract(options)
186
+ processor = self.class.text_processor.new(comp_options)
187
+ output = processor.run(input)
188
+
189
+ if processor.respond_to?(:output_type)
190
+ type = processor.output_type
191
+ else
192
+ type = :xml
193
+ end
194
+
195
+ return output, type
196
+ end
197
+
198
+ ##
199
+ # Analyzes the input asynchronously.
200
+ #
201
+ # @param [Hash] options
202
+ # @param [String] request_id
203
+ #
204
+ def analyze_async(options, request_id)
205
+ output, _ = analyze(options)
206
+
207
+ submit_output(output, request_id, options)
208
+
209
+ # Submit the error to the error callback, re-raise so Rollbar can also
210
+ # report it.
211
+ rescue Exception => error
212
+ ErrorHandler.new.submit(error, request_id) if options['error_callback']
213
+
214
+ raise error
215
+ end
216
+
217
+ ##
218
+ # Submits the output to the next callback URL.
219
+ #
220
+ # @param [String] output
221
+ # @param [String] request_id
222
+ # @param [Hash] options
223
+ #
224
+ def submit_output(output, request_id, options)
225
+ callbacks = options['callbacks'].dup
226
+ next_url = callbacks.shift
227
+
228
+ # Re-use the old payload so that any extra data (e.g. metadata) is kept
229
+ # in place.
230
+ new_payload = options.merge(
231
+ 'callbacks' => callbacks,
232
+ 'request_id' => request_id
233
+ )
234
+
235
+ # Make sure we don't re-send this to the next component.
236
+ new_payload.delete('input')
237
+
238
+ if Configuration.output_bucket
239
+ uploader = Uploader.new
240
+ object = uploader.upload(request_id, output, options['metadata'])
241
+
242
+ new_payload['input_url'] = object.url_for(:read, :expires => 3600)
243
+ else
244
+ new_payload['input'] = output
245
+ end
246
+
247
+ CallbackHandler.new.post(next_url, new_payload)
248
+ end
249
+
250
+ ##
251
+ # Returns a Hash containing the parameters from a JSON payload. The keys
252
+ # of this Hash are returned as _strings_ to prevent Symbol DOS attacks.
253
+ #
254
+ # @return [Hash]
255
+ #
256
+ def params_from_json
257
+ return JSON.load(request.body.read)
258
+ end
259
+
260
+ ##
261
+ # Returns `true` if the input data is in JSON, false otherwise
262
+ #
263
+ # @return [TrueClass|FalseClass]
264
+ #
265
+ def json_input?
266
+ return request.content_type == 'application/json'
267
+ end
268
+
269
+ ##
270
+ # Authenticates the current request.
271
+ #
272
+ def authenticate!
273
+ token = Configuration.authentication_token
274
+ secret = Configuration.authentication_secret
275
+ creds = {token => params[token], secret => params[secret]}
276
+
277
+ response = HTTPClient.get(Configuration.authentication_endpoint, creds)
278
+
279
+ unless response.ok?
280
+ halt(403, "Authentication failed: #{response.body}")
281
+ end
282
+ end
283
+
284
+ ##
285
+ # Runs the block in a separate thread. When running a test environment the
286
+ # block is instead yielded normally.
287
+ #
288
+ def async
289
+ if self.class.environment == :test
290
+ yield
291
+ else
292
+ Thread.new { yield }
293
+ end
294
+ end
295
+ end # Server
296
+ end # Webservice
297
+ end # Opener
@@ -0,0 +1,50 @@
1
+ module Opener
2
+ module Webservice
3
+ ##
4
+ # Class for uploading KAF documents to Amazon S3.
5
+ #
6
+ class Uploader
7
+ ##
8
+ # Uploads the given KAF document.
9
+ #
10
+ # @param [String] identifier
11
+ # @param [String] document
12
+ # @param [Hash] metadata description
13
+ #
14
+ # @return [AWS::S3::S3Object]
15
+ #
16
+ def upload(identifier, document, metadata = {})
17
+ object = create(
18
+ "#{identifier}.xml",
19
+ document,
20
+ :metadata => metadata,
21
+ :content_type => 'application/xml'
22
+ )
23
+
24
+ return object
25
+ end
26
+
27
+ ##
28
+ # @param [Array] args
29
+ # @return [AWS::S3::S3Object]
30
+ #
31
+ def create(*args)
32
+ return bucket.objects.create(*args)
33
+ end
34
+
35
+ ##
36
+ # @return [AWS::S3.new]
37
+ #
38
+ def s3
39
+ return @s3 ||= AWS::S3.new
40
+ end
41
+
42
+ ##
43
+ # @return [AWS::S3::Bucket]
44
+ #
45
+ def bucket
46
+ return @bucket ||= s3.buckets[Configuration.output_bucket]
47
+ end
48
+ end # Uploader
49
+ end # Daemons
50
+ end # Opener
@@ -1,7 +1,5 @@
1
- require 'sinatra/base'
2
-
3
1
  module Opener
4
- class Webservice < Sinatra::Base
5
- VERSION = "2.0.0"
6
- end
7
- end
2
+ module Webservice
3
+ VERSION = '2.1.0'
4
+ end # Webservice
5
+ end # Opener