opener-daemons 1.3.0 → 2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,229 +1,120 @@
1
- # encoding: UTF-8
1
+ module Opener
2
+ module Daemons
3
+ ##
4
+ # The Daemon class communicates with an AWS SQS queue and delegates work to
5
+ # the mapper and worker classes.
6
+ #
7
+ # @!attribute [r] component
8
+ # @return [Class]
9
+ #
10
+ class Daemon < Oni::Daemons::SQS
11
+ attr_reader :component
2
12
 
3
- require 'thread'
4
- require 'opener/daemons/sqs'
5
- require 'json'
13
+ set :worker, Worker
14
+ set :mapper, Mapper
6
15
 
7
- Encoding.default_internal = Encoding::UTF_8
8
- Encoding.default_external = Encoding::UTF_8
16
+ # The name of the SQS input queue to use.
17
+ set :queue_name, proc { Daemons.input_queue }
9
18
 
10
- module Opener
11
- module Daemons
12
- class Daemon
13
- attr_reader :batch_size, :buffer_size, :sleep_interval,
14
- :input_queue, :output_queue,
15
- :input_buffer, :output_buffer,
16
- :bucket_name, :bucket_dir, :file_suffix,
17
- :klass,
18
- :logger,
19
- :script_name
20
-
21
- attr_accessor :threads, :thread_counts
22
-
23
- def initialize(klass, options={})
24
-
25
- @threads = {:readers=>[], :workers=>[], :writers=>[], :reporters=>[]}
26
- @thread_counts = {:readers => options.fetch(:readers, 1),
27
- :workers => options.fetch(:workers, 5),
28
- :writers => options.fetch(:writers, 1)}
29
-
30
- @relentless = options.fetch(:relentless, false)
31
- @sleep_interval = options.fetch(:sleep_interval, 5)
32
-
33
- # Initialize queues
34
- @input_queue = Opener::Daemons::SQS.find(options.fetch(:input_queue))
35
- if options[:output_queue]
36
- @output_queue = Opener::Daemons::SQS.find(options[:output_queue])
37
- end
38
-
39
- # Get bucket name and other bucket options, if any.
40
- if @bucket_name = options[:bucket_name]
41
- @bucket_dir = options.fetch(:bucket_dir, nil)
42
- @file_suffix = options.fetch(:file_suffix, nil)
43
- end
44
-
45
- # Initialize Buffers
46
- @input_buffer = Queue.new
47
- @output_buffer = Queue.new
48
-
49
- # Batch and Buffer size for a smooth flow.
50
- @batch_size = options.fetch(:batch_size, 10)
51
- @buffer_size = options[:buffer_size]
52
-
53
- # Working component
54
- @klass = klass
55
-
56
- @script_name = File.basename($0, ".rb")
57
-
58
- @logger = Logger.new(options.fetch(:log, STDOUT))
59
- @logger.level = if options.fetch(:debug, false)
60
- Logger::DEBUG
61
- else
62
- Logger::INFO
63
- end
64
-
65
- logger.debug(options.to_json)
66
- end
19
+ # The amount of threads to use.
20
+ set :threads, proc { Daemons.daemon_threads }
67
21
 
68
- def buffer_new_messages
69
- return if input_buffer.size > buffer_size
70
- return if output_buffer.size > buffer_size
71
- messages = input_queue.receive_messages(batch_size)
22
+ ##
23
+ # @param [Class] component The component to run in the worker.
24
+ #
25
+ def initialize(component)
26
+ @component = component
72
27
 
73
- if messages.nil?
74
- sleep(sleep_interval)
75
- return
76
- end
77
- messages.each do |message|
78
- input_buffer << message
79
- end
28
+ super() # keep parenthesis, parent method doesn't take arguments.
80
29
  end
81
30
 
82
- def start
83
- Thread.abort_on_exception = true
31
+ ##
32
+ # Called before the daemon is started.
33
+ #
34
+ def before_start
35
+ Syslog.open(ENV['APP_NAME'], ::Syslog::LOG_CONS | ::Syslog::LOG_PID)
84
36
 
85
- start_readers
86
- start_workers
87
- start_writers
88
- start_reporters
37
+ Syslog.info(
38
+ 'Starting daemon',
39
+ :queue => option(:queue_name),
40
+ :threads => threads
41
+ )
89
42
 
90
- threads[:readers].each(&:join)
91
- threads[:workers].each(&:join)
92
- threads[:writers].each(&:join)
93
- threads[:reporters].each(&:join)
94
- end
43
+ GC::Profiler.enable
95
44
 
96
- def start_readers
97
- thread_counts[:readers].times do |t|
98
- threads[:readers] << Thread.new do
99
- logger.info "Reader #{t+1} ready for action..."
100
- loop do
101
- buffer_new_messages
102
- end
103
- end
104
- end
105
- end
45
+ Daemons.configure_rollbar
106
46
 
107
- def start_workers
108
- thread_counts[:workers].times do |t|
109
- threads[:workers] << Thread.new do
110
- logger.info "Worker #{t+1} launching..."
111
- identifier = klass.new
112
- loop do
113
- message = input_buffer.pop
114
-
115
- input = get_input(message[:body])
116
- input,* = input if input.kind_of?(Array)
117
-
118
- begin
119
- output, * = identifier.run(input)
120
- if output.empty?
121
- raise "The component returned an empty response."
122
- end
123
- rescue Exception => e
124
- if relentless?
125
- raise
126
- else
127
- logger.error(e)
128
- output = input
129
- end
130
- end
131
- message[:body].delete("input")
132
- output_buffer.push({ :output=>output,
133
- :body => message[:body],
134
- :handle=>message[:receipt_handle]
135
- })
136
- end
137
- end
138
- end
47
+ NewRelic::Agent.manual_start if Daemons.newrelic?
139
48
  end
140
49
 
141
- def start_writers
142
- thread_counts[:writers].times do |t|
143
- threads[:writers] << Thread.new do
144
- logger.info "Pusher #{t+1} ready for action..."
145
- loop do
146
- message = output_buffer.pop
147
- callbacks = extract_callbacks(message[:body]["callbacks[]"])
148
- handler = Opener::CallbackHandler.new
149
-
150
- if bucket_name
151
- filename = [message[:body]["request_id"], script_name, Time.now.to_i].join("-")
152
- s3 = Opener::Daemons::S3.new(bucket_name, message[:output].force_encoding("UTF-8"), filename, bucket_dir, file_suffix)
153
- s3.upload
154
- message[:body][:input_url] = s3.url
155
- else
156
- message[:body][:input] = message[:output].force_encoding("UTF-8")
157
- end
158
-
159
-
160
- unless callbacks.empty?
161
- callback_url = callbacks.shift
162
- message[:body][:'callbacks[]'] = callbacks
163
- payload = {:body => message[:body]}
164
- handler.post(callback_url, payload)
165
- else
166
- payload = {:body => message[:body]}
167
- handler.post(output_queue.queue_url, payload)
168
- end
169
- input_queue.delete_message(message[:handle])
170
-
171
- end
172
- end
50
+ ##
51
+ # Overwrites the original method so that we can inject the component into
52
+ # the mapper.
53
+ #
54
+ # @see [Oni::Daemon#create_mapper]
55
+ #
56
+ def create_mapper
57
+ unless option(:mapper)
58
+ raise ArgumentError, 'No mapper has been set in the `:mapper` option'
173
59
  end
174
- end
175
60
 
176
- def start_reporters
177
- threads[:reporters] << Thread.new do
178
- loop do
179
- log = {:buffers=>{:input=>input_buffer.size}}
180
- log[:buffers][:output] = output_buffer.size if output_buffer
61
+ return option(:mapper).new(component)
62
+ end
181
63
 
182
- logger.debug log.to_json
183
- sleep(2)
184
- end
185
- end
64
+ ##
65
+ # Called when an error occurs.
66
+ #
67
+ # @param [StandardError] error
68
+ #
69
+ def error(error)
70
+ error, params = unwrap_error(error)
186
71
 
187
- threads[:reporters] << Thread.new do
188
- loop do
189
- thread_types = threads.keys - [:reporters]
190
- thread_counts = thread_types.map do |type|
191
- threads[type].select{|thread| thread.status}.count
192
- end
193
- zip = thread_types.zip(thread_counts)
194
- logger.debug "active thread counts: #{zip}"
195
-
196
- sleep(10)
197
- end
198
- end
72
+ report_exception(error, params)
199
73
  end
200
74
 
201
- def buffer_size
202
- @buffer_size ||= (4 * batch_size)
75
+ ##
76
+ # @param [AWS::SQS::ReceivedMessage] message
77
+ # @param [Mixed] output
78
+ # @param [Benchmark::Tms] timings
79
+ #
80
+ def complete(message, output, timings)
81
+ log_msg = "Finished message #{message.id}"
82
+
83
+ Syslog.info(log_msg)
203
84
  end
204
85
 
205
- def relentless?
206
- @relentless
86
+ ##
87
+ # Sends an error to Rollbar.
88
+ #
89
+ # @param [StandardError] error
90
+ # @param [Hash] parameters
91
+ #
92
+ def report_exception(error, parameters = {})
93
+ if Daemons.rollbar?
94
+ Rollbar.error(
95
+ error,
96
+ :active_threads => Thread.list.count,
97
+ :ruby_description => RUBY_DESCRIPTION,
98
+ :parameters => parameters
99
+ )
100
+ else
101
+ raise error
102
+ end
207
103
  end
208
-
104
+
209
105
  ##
210
- # Returns an Array containing the callback URLs, ignoring empty values.
106
+ # Takes either a regular error or a `Oni::WrappedError` and unwraps it,
107
+ # returning the original error and the parameters (if any).
211
108
  #
212
- # @param [Array|String] input
109
+ # @param [StandardError] error
213
110
  # @return [Array]
214
111
  #
215
- def extract_callbacks(input)
216
- return [] if input.nil? || input.empty?
112
+ def unwrap_error(error)
113
+ params = error.respond_to?(:parameters) ? error.parameters : {}
114
+ error = error.original_error if error.respond_to?(:original_error)
217
115
 
218
- callbacks = input.compact.reject(&:empty?)
219
-
220
- return callbacks
221
- end
222
-
223
- def get_input(body)
224
- return body.delete("input") if body["input"]
225
- return HTTPClient.new.get(body.delete("input_url")).body if body["input_url"]
116
+ return error, params
226
117
  end
227
- end
228
- end
229
- end
118
+ end # Daemon
119
+ end # Daemons
120
+ end # Opener
@@ -0,0 +1,75 @@
1
+ module Opener
2
+ module Daemons
3
+ ##
4
+ # Returns `true` if New Relic monitoring should be enabled.
5
+ #
6
+ # @return [TrueClass|FalseClass]
7
+ #
8
+ def self.newrelic?
9
+ return !!ENV['NEWRELIC_TOKEN']
10
+ end
11
+
12
+ ##
13
+ # Returns `true` if Rollbar error tracking should be enabled.
14
+ #
15
+ # @return [TrueClass|FalseClass]
16
+ #
17
+ def self.rollbar?
18
+ return !!ENV['ROLLBAR_TOKEN']
19
+ end
20
+
21
+ ##
22
+ # Returns `true` if Syslog should be enabled.
23
+ #
24
+ # @return [TrueClass|FalseClass]
25
+ #
26
+ def self.syslog?
27
+ return !!ENV['ENABLE_SYSLOG']
28
+ end
29
+
30
+ ##
31
+ # Returns the name of the input queue to use.
32
+ #
33
+ # @return [String]
34
+ #
35
+ def self.input_queue
36
+ return ENV['INPUT_QUEUE']
37
+ end
38
+
39
+ ##
40
+ # The name of the S3 bucket to store output in.
41
+ #
42
+ # @return [String]
43
+ #
44
+ def self.output_bucket
45
+ return ENV['OUTPUT_BUCKET']
46
+ end
47
+
48
+ ##
49
+ # Returns the amount of daemon threads to run.
50
+ #
51
+ # @return [Fixnum]
52
+ #
53
+ def self.daemon_threads
54
+ return ENV['DAEMON_THREADS'].to_i
55
+ end
56
+
57
+ ##
58
+ # Configures Rollbar.
59
+ #
60
+ def self.configure_rollbar
61
+ Rollbar.configure do |config|
62
+ config.access_token = ENV['ROLLBAR_TOKEN']
63
+ config.enabled = rollbar?
64
+ config.environment = environment
65
+ end
66
+ end
67
+
68
+ ##
69
+ # @return [String]
70
+ #
71
+ def self.environment
72
+ return ENV['DAEMON_ENV'] || ENV['RACK_ENV'] || ENV['RAILS_ENV']
73
+ end
74
+ end # Daemons
75
+ end # Opener
@@ -0,0 +1,36 @@
1
+ module Opener
2
+ module Daemons
3
+ ##
4
+ # Downloads and validates text/XML documents used as input.
5
+ #
6
+ # @!attribute [r] http
7
+ # @return [HTTPClient]
8
+ #
9
+ class Downloader
10
+ attr_reader :http
11
+
12
+ def initialize
13
+ @http = HTTPClient.new
14
+ end
15
+
16
+ ##
17
+ # Downloads the document located at `url`.
18
+ #
19
+ # @param [String] url
20
+ # @return [String]
21
+ #
22
+ def download(url)
23
+ resp = http.get(url, :follow_redirect => true)
24
+
25
+ unless resp.ok?
26
+ raise(
27
+ HTTPClient::BadResponseError,
28
+ "Got HTTP #{resp.status}: #{resp.body}"
29
+ )
30
+ end
31
+
32
+ return resp.body
33
+ end
34
+ end # Downloader
35
+ end # Daemons
36
+ end # Opener
@@ -0,0 +1,57 @@
1
+ module Opener
2
+ module Daemons
3
+ ##
4
+ # Maps the input/output between the daemon and the worker in such a format
5
+ # that both ends can work with it easily.
6
+ #
7
+ # @!attribute [r] component
8
+ # @return [Class]
9
+ #
10
+ class Mapper < Oni::Mapper
11
+ attr_reader :component
12
+
13
+ ##
14
+ # The directory containing JSON schema files.
15
+ #
16
+ # @return [String]
17
+ #
18
+ SCHEMA_DIRECTORY = File.expand_path('../../../../schema', __FILE__)
19
+
20
+ ##
21
+ # Path to the schema file.
22
+ #
23
+ # @return [String]
24
+ #
25
+ INPUT_SCHEMA = File.join(SCHEMA_DIRECTORY, 'sqs_input.json')
26
+
27
+ ##
28
+ # @param [Class] component
29
+ #
30
+ def initialize(component)
31
+ @component = component
32
+ end
33
+
34
+ ##
35
+ # @param [AWS::SQS::ReceivedMessage] message
36
+ # @return [Hash]
37
+ #
38
+ def map_input(message)
39
+ decoded = JSON(message.body)
40
+
41
+ validate_input!(decoded)
42
+
43
+ return Configuration.new(component, decoded)
44
+ end
45
+
46
+ ##
47
+ # Validates the given input Hash.
48
+ #
49
+ # @param [Hash] input
50
+ # @raise [JSON::Schema::ValidationError]
51
+ #
52
+ def validate_input!(input)
53
+ JSON::Validator.validate!(INPUT_SCHEMA, input)
54
+ end
55
+ end # Mapper
56
+ end # Daemons
57
+ end # Opener
@@ -0,0 +1,139 @@
1
+ module Opener
2
+ module Daemons
3
+ ##
4
+ # Slop wrapper for parsing daemon options and passing them to the underlying
5
+ # script.
6
+ #
7
+ # @!attribute [r] name
8
+ # The name of the program to display in help messages.
9
+ # @return [String]
10
+ #
11
+ # @!attribute [r] parser
12
+ # @return [Slop]
13
+ #
14
+ class OptionParser
15
+ attr_reader :name, :parser
16
+
17
+ ##
18
+ # @param [String] name
19
+ #
20
+ def initialize(name)
21
+ @name = name
22
+ @parser = configure_slop
23
+ end
24
+
25
+ ##
26
+ # @see [Slop#parse]
27
+ #
28
+ def parse(*args)
29
+ return parser.parse(*args)
30
+ end
31
+
32
+ ##
33
+ # @return [Slop]
34
+ #
35
+ def configure_slop
36
+ # Slop uses instance_eval, which means `self` will point to the `Slop`
37
+ # instance in the block below.
38
+ outer = self
39
+ daemon_name = "#{name}-daemon"
40
+ cli_name = daemon_name.sub('opener-', '')
41
+
42
+ # Using :strict => false ensures that unrecognized options are kept in
43
+ # ARGV.
44
+ return Slop.new(:strict => false, :indent => 2) do
45
+ banner "Usage: #{cli_name} <start|stop|restart> [OPTIONS]"
46
+
47
+ separator <<-EOF.chomp
48
+
49
+ About:
50
+
51
+ Runs the OpeNER component as a daemon. By default the daemon runs in the
52
+ foreground but using the "start" command it can detach itself. Output is
53
+ logged using Syslog, allowing easy distribution and management of log
54
+ output.
55
+
56
+ Commands:
57
+
58
+ * start: starts the daemon in the background
59
+ * stop: stops the daemon
60
+ * restart: restarts the daemon
61
+
62
+ Not providing a specific command will result in the daemon running in the
63
+ foreground.
64
+
65
+ Environment Variables:
66
+
67
+ These daemons make use of Amazon SQS queues and other Amazon services. In
68
+ order to use these services you should make sure the following environment
69
+ variables are set:
70
+
71
+ * AWS_ACCESS_KEY_ID
72
+ * AWS_SECRET_ACCESS_KEY
73
+ * AWS_REGION
74
+
75
+ If you're running this daemon on an EC2 instance then the first two
76
+ environment variables will be set automatically if the instance has an
77
+ associated IAM profile. The AWS_REGION variable must _always_ be set.
78
+
79
+ Optionally you can also set the following extra variables:
80
+
81
+ * NEWRELIC_TOKEN: when set the daemon will send profiling data to New Relic
82
+ using this token. The application name will be "#{daemon_name}".
83
+
84
+ * ROLLBAR_TOKEN: when set the daemon will report errors to Rollbar using
85
+ this token. You can freely use this in combination with NEWRELIC_TOKEN.
86
+
87
+ Component Options:
88
+
89
+ Certain OpeNER components might define their own commandline options. The
90
+ most common one is the "--resource-path" option which can be used to load
91
+ (and optionally download) a set of models and/or lexicons.
92
+
93
+ Options that are not explicitly defined below will be passed straight to
94
+ the component. Refer to the documentation of the individual components to
95
+ see which options are available.
96
+ EOF
97
+
98
+ separator "\nOptions:\n"
99
+
100
+ on :h, :help, 'Shows this help message' do
101
+ abort to_s
102
+ end
103
+
104
+ on :i=,
105
+ :input=,
106
+ "The name of the input queue",
107
+ :as => String,
108
+ :default => outer.name
109
+
110
+ on :b=,
111
+ :bucket=,
112
+ 'The S3 bucket to store output in',
113
+ :as => String,
114
+ :default => outer.name
115
+
116
+ on :P=,
117
+ :pidfile=,
118
+ "Path to the PID file",
119
+ :as => String,
120
+ :default => "/var/run/opener/#{daemon_name}.pid"
121
+
122
+ on :t=,
123
+ :threads=,
124
+ 'The amount of threads to use',
125
+ :as => Integer,
126
+ :default => 10
127
+
128
+ on :w=,
129
+ :wait=,
130
+ 'The amount of seconds to wait for the daemon to start',
131
+ :as => Integer,
132
+ :default => 3
133
+
134
+ on :'disable-syslog', 'Disables Syslog logging (enabled by default)'
135
+ end
136
+ end
137
+ end # OptionParser
138
+ end # Daemons
139
+ end # Opener