tootsie 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +15 -0
- data/Gemfile +2 -0
- data/License +7 -0
- data/README.md +256 -0
- data/Rakefile +1 -0
- data/Tootsie.gemspec +36 -0
- data/bin/tootsie_task_manager +82 -0
- data/config.ru +22 -0
- data/config/development-sample.yml +4 -0
- data/lib/tootsie.rb +21 -0
- data/lib/tootsie/application.rb +48 -0
- data/lib/tootsie/client.rb +12 -0
- data/lib/tootsie/command_runner.rb +58 -0
- data/lib/tootsie/configuration.rb +29 -0
- data/lib/tootsie/daemon.rb +282 -0
- data/lib/tootsie/ffmpeg_adapter.rb +132 -0
- data/lib/tootsie/image_metadata_extractor.rb +64 -0
- data/lib/tootsie/input.rb +55 -0
- data/lib/tootsie/output.rb +67 -0
- data/lib/tootsie/processors/image_processor.rb +181 -0
- data/lib/tootsie/processors/video_processor.rb +85 -0
- data/lib/tootsie/queues/file_system_queue.rb +65 -0
- data/lib/tootsie/queues/sqs_queue.rb +93 -0
- data/lib/tootsie/s3_utilities.rb +24 -0
- data/lib/tootsie/spawner.rb +99 -0
- data/lib/tootsie/task_manager.rb +51 -0
- data/lib/tootsie/tasks/job_task.rb +111 -0
- data/lib/tootsie/tasks/notify_task.rb +27 -0
- data/lib/tootsie/version.rb +3 -0
- data/lib/tootsie/web_service.rb +37 -0
- data/spec/spec_helper.rb +21 -0
- data/spec/test_files/BF 0622 1820.tif +0 -0
- data/spec/tootsie/command_runner_spec.rb +29 -0
- data/spec/tootsie/image_metadata_extracter_spec.rb +39 -0
- data/spec/tootsie/s3_utilities_spec.rb +40 -0
- metadata +337 -0
@@ -0,0 +1,65 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'tempfile'
|
3
|
+
require 'fileutils'
|
4
|
+
|
5
|
+
module Tootsie
|
6
|
+
|
7
|
+
# A simple, naive queue implementation that stores items as JSON files
|
8
|
+
# in the file system.
|
9
|
+
class FileSystemQueue
|
10
|
+
|
11
|
+
def initialize(directory)
|
12
|
+
@directory = directory
|
13
|
+
FileUtils.mkdir_p(@directory)
|
14
|
+
end
|
15
|
+
|
16
|
+
def count
|
17
|
+
Dir.glob(File.join(@directory, "*.json")).length
|
18
|
+
end
|
19
|
+
|
20
|
+
def push(item)
|
21
|
+
Tempfile.open('tootsie') do |tempfile|
|
22
|
+
tempfile << item.to_json
|
23
|
+
tempfile.close
|
24
|
+
FileUtils.mv(tempfile.path, File.join(@directory, "#{Time.now.to_f}.json"))
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def pop(options = {})
|
29
|
+
loop do
|
30
|
+
lock do
|
31
|
+
file_name = Dir.glob(File.join(@directory, "*.json")).sort.first
|
32
|
+
if file_name
|
33
|
+
item = JSON.parse(File.read(file_name))
|
34
|
+
FileUtils.rm(file_name)
|
35
|
+
return item
|
36
|
+
end
|
37
|
+
end
|
38
|
+
if options[:wait]
|
39
|
+
sleep(1.0)
|
40
|
+
else
|
41
|
+
return nil
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
private
|
47
|
+
|
48
|
+
def lock
|
49
|
+
lock_file_name = File.join(@directory, "lock");
|
50
|
+
begin
|
51
|
+
FileUtils.mkdir(lock_file_name)
|
52
|
+
rescue Errno::EEXIST
|
53
|
+
sleep(0.2)
|
54
|
+
retry
|
55
|
+
end
|
56
|
+
begin
|
57
|
+
yield
|
58
|
+
ensure
|
59
|
+
FileUtils.rmdir(lock_file_name)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
end
|
64
|
+
|
65
|
+
end
|
@@ -0,0 +1,93 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'sqs'
|
3
|
+
require 'timeout'
|
4
|
+
|
5
|
+
module Tootsie
|
6
|
+
|
7
|
+
class SqsQueueCouldNotFindQueueError < Exception; end
|
8
|
+
|
9
|
+
# A queue which uses Amazon's Simple Queue Service (SQS).
|
10
|
+
class SqsQueue
|
11
|
+
|
12
|
+
def initialize(queue_name, sqs_service)
|
13
|
+
@logger = Application.get.logger
|
14
|
+
@sqs_service = sqs_service
|
15
|
+
@queue = @sqs_service.queues.find_first(queue_name)
|
16
|
+
unless @queue
|
17
|
+
@sqs_service.queues.create(queue_name)
|
18
|
+
begin
|
19
|
+
timeout(5) do
|
20
|
+
while not @queue
|
21
|
+
sleep(0.5)
|
22
|
+
@queue = @sqs_service.queues.find_first(queue_name)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
rescue Timeout::Error
|
26
|
+
raise SqsQueueCouldNotFindQueueError
|
27
|
+
end
|
28
|
+
end
|
29
|
+
@backoff = 0.5
|
30
|
+
end
|
31
|
+
|
32
|
+
def count
|
33
|
+
@queue.attributes['ApproximateNumberOfMessages'].to_i
|
34
|
+
end
|
35
|
+
|
36
|
+
def push(item)
|
37
|
+
retries_left = 5
|
38
|
+
begin
|
39
|
+
return @queue.create_message(item.to_json)
|
40
|
+
rescue Exception => exception
|
41
|
+
check_exception(exception)
|
42
|
+
if retries_left > 0
|
43
|
+
@logger.warn("Writing queue failed with exception (#{exception.message}), will retry")
|
44
|
+
retries_left -= 1
|
45
|
+
sleep(0.5)
|
46
|
+
retry
|
47
|
+
else
|
48
|
+
@logger.error("Writing queue failed with exception #{exception.class}: #{exception.message}")
|
49
|
+
raise exception
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def pop(options = {})
|
55
|
+
item = nil
|
56
|
+
loop do
|
57
|
+
begin
|
58
|
+
message = @queue.message(5)
|
59
|
+
rescue Exception => exception
|
60
|
+
check_exception(exception)
|
61
|
+
@logger.error("Reading queue failed with exception #{exception.class}: #{exception.message}")
|
62
|
+
break unless options[:wait]
|
63
|
+
sleep(0.5)
|
64
|
+
retry
|
65
|
+
end
|
66
|
+
if message
|
67
|
+
begin
|
68
|
+
item = JSON.parse(message.body)
|
69
|
+
ensure
|
70
|
+
# Always destroy, even if parsing fails
|
71
|
+
message.destroy
|
72
|
+
end
|
73
|
+
@backoff /= 2.0
|
74
|
+
break
|
75
|
+
else
|
76
|
+
@backoff = [@backoff * 0.2, 2.0].min
|
77
|
+
end
|
78
|
+
break unless options[:wait]
|
79
|
+
sleep(@backoff)
|
80
|
+
end
|
81
|
+
item
|
82
|
+
end
|
83
|
+
|
84
|
+
private
|
85
|
+
|
86
|
+
def check_exception(exception)
|
87
|
+
raise exception if exception.is_a?(SystemExit)
|
88
|
+
raise exception if exception.is_a?(SignalException) and not exception.is_a?(Timeout::Error)
|
89
|
+
end
|
90
|
+
|
91
|
+
end
|
92
|
+
|
93
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module Tootsie
|
2
|
+
|
3
|
+
module S3Utilities
|
4
|
+
|
5
|
+
class << self
|
6
|
+
def parse_uri(url)
|
7
|
+
if url =~ /^s3:([^\/]+)\/+(.+?)(?:\?(.*))?$/
|
8
|
+
output = {}.with_indifferent_access
|
9
|
+
output[:bucket], output[:key], option_string = $1, $2, $3
|
10
|
+
unless option_string.blank?
|
11
|
+
option_string.split('&').map { |pair| pair.scan(/^(.*?)=(.*)$/)[0] }.each do |k, v|
|
12
|
+
output[k] = v.to_sym
|
13
|
+
end
|
14
|
+
end
|
15
|
+
output
|
16
|
+
else
|
17
|
+
raise ArgumentError, "Not an S3 URL"
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
@@ -0,0 +1,99 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'logger'
|
4
|
+
require 'set'
|
5
|
+
require 'timeout'
|
6
|
+
|
7
|
+
class Spawner
|
8
|
+
|
9
|
+
def initialize(options = {})
|
10
|
+
@num_children = options[:num_children] || 1
|
11
|
+
@pids = Set.new
|
12
|
+
@logger = options[:logger]
|
13
|
+
@terminating = false
|
14
|
+
@parent = true
|
15
|
+
end
|
16
|
+
|
17
|
+
def on_spawn(&block)
|
18
|
+
@on_spawn = block
|
19
|
+
end
|
20
|
+
|
21
|
+
def run(&block)
|
22
|
+
loop do
|
23
|
+
unless @terminating
|
24
|
+
while @pids.length < @num_children
|
25
|
+
pid = Process.fork
|
26
|
+
if pid
|
27
|
+
# In parent process
|
28
|
+
@pids << pid
|
29
|
+
logger.info("Child PID=#{pid} spawned")
|
30
|
+
else
|
31
|
+
# In child process
|
32
|
+
@parent = false
|
33
|
+
@on_spawn.call
|
34
|
+
exit(0)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
wait_for_children
|
39
|
+
break if @terminated and @pids.empty?
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def wait_for_children
|
44
|
+
pid = Process.waitpid(-1)
|
45
|
+
if pid
|
46
|
+
status = $?
|
47
|
+
if status.exited?
|
48
|
+
if status.exitstatus == 0
|
49
|
+
logger.info("Child PID=#{pid} exited normally")
|
50
|
+
else
|
51
|
+
logger.info("Child PID=#{pid} exited unexpectedly with exit code #{status.exitstatus}")
|
52
|
+
end
|
53
|
+
elsif status.stopped?
|
54
|
+
logger.info("Child PID=#{pid} stopped unexpectedly with signal #{status.stopsig}")
|
55
|
+
elsif status.signaled?
|
56
|
+
logger.info("Child PID=#{pid} died unexpectedly by signal #{status.termsig}")
|
57
|
+
else
|
58
|
+
logger.info("Child PID=#{pid} died unexpectedly")
|
59
|
+
end
|
60
|
+
@pids.delete(pid)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def terminate
|
65
|
+
if @parent
|
66
|
+
logger.info("Parent terminating, will terminate all child PIDs")
|
67
|
+
@terminating = true
|
68
|
+
@pids.each do |pid|
|
69
|
+
logger.info("Terminating child PID=#{pid}")
|
70
|
+
begin
|
71
|
+
Process.kill("TERM", pid)
|
72
|
+
rescue Errno::ESRCH
|
73
|
+
# Ignore
|
74
|
+
end
|
75
|
+
end
|
76
|
+
begin
|
77
|
+
timeout(5) do
|
78
|
+
while @pids.any?
|
79
|
+
sleep(0.5)
|
80
|
+
wait_for_children
|
81
|
+
end
|
82
|
+
end
|
83
|
+
rescue Timeout::Error
|
84
|
+
logger.error("Timed out waiting for children, killing them")
|
85
|
+
@pids.each do |pid|
|
86
|
+
logger.info("Killing child PID=#{pid}")
|
87
|
+
begin
|
88
|
+
Process.kill("KILL", pid)
|
89
|
+
rescue Errno::ESRCH
|
90
|
+
# Ignore
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
attr_reader :logger
|
98
|
+
|
99
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
require 'httpclient'
|
2
|
+
require 'uri'
|
3
|
+
require 'benchmark'
|
4
|
+
|
5
|
+
module Tootsie
|
6
|
+
|
7
|
+
class TaskManager
|
8
|
+
|
9
|
+
def initialize(queue)
|
10
|
+
@queue = queue
|
11
|
+
@logger = Application.get.logger
|
12
|
+
end
|
13
|
+
|
14
|
+
def schedule(task)
|
15
|
+
type = task.class.name.gsub(/^(?:[^:]+::)*(.*?)Task$/, '\1').underscore
|
16
|
+
data = task.attributes
|
17
|
+
@logger.info("Scheduling task #{type.inspect}: #{data.inspect}")
|
18
|
+
@queue.push({:task => type, :data => data})
|
19
|
+
end
|
20
|
+
|
21
|
+
def run!
|
22
|
+
@logger.info "Ready to process tasks"
|
23
|
+
loop do
|
24
|
+
begin
|
25
|
+
task = @queue.pop(:wait => true)
|
26
|
+
if task
|
27
|
+
task = task.with_indifferent_access
|
28
|
+
type, data = task[:task], task[:data]
|
29
|
+
@logger.info("Processing task #{type.inspect}: #{data.inspect}")
|
30
|
+
begin
|
31
|
+
task_class = Tasks.const_get("#{type.camelcase}Task")
|
32
|
+
rescue NameError
|
33
|
+
@logger.error("Invalid task encountered on queue: #{task.inspect}")
|
34
|
+
else
|
35
|
+
task = task_class.new(data)
|
36
|
+
task.execute!
|
37
|
+
end
|
38
|
+
end
|
39
|
+
rescue Interrupt, SignalException, SystemExit
|
40
|
+
raise
|
41
|
+
rescue Exception => exception
|
42
|
+
backtrace = exception.backtrace.map { |s| " #{s}\n" }.join
|
43
|
+
@logger.error "Task manager exception: #{exception.class}: #{exception}\n#{backtrace}"
|
44
|
+
end
|
45
|
+
end
|
46
|
+
@logger.info "Task manager done"
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
@@ -0,0 +1,111 @@
|
|
1
|
+
module Tootsie
|
2
|
+
module Tasks
|
3
|
+
|
4
|
+
class JobTask
|
5
|
+
|
6
|
+
DEFAULT_MAX_RETRIES = 5
|
7
|
+
|
8
|
+
PROGRESS_NOTIFICATION_INTERVAL = 10.seconds
|
9
|
+
|
10
|
+
VALID_TYPES = %w(video audio image).freeze
|
11
|
+
|
12
|
+
def initialize(attributes = {})
|
13
|
+
attributes = attributes.with_indifferent_access
|
14
|
+
@type = attributes[:type].to_s
|
15
|
+
@retries_left = attributes[:retries_left] || DEFAULT_MAX_RETRIES
|
16
|
+
@access_key = attributes[:access_key]
|
17
|
+
@created_at = Time.now
|
18
|
+
@notification_url = attributes[:notification_url]
|
19
|
+
@params = attributes[:params]
|
20
|
+
@logger = Application.get.logger
|
21
|
+
@use_tasks_for_notifications = false # TODO: Disabled for now, SQS does not preserve order
|
22
|
+
end
|
23
|
+
|
24
|
+
def valid?
|
25
|
+
return @type && VALID_TYPES.include?(@type)
|
26
|
+
end
|
27
|
+
|
28
|
+
def execute!
|
29
|
+
@logger.info("Begin processing job: #{attributes.inspect}")
|
30
|
+
notify!(:event => :started)
|
31
|
+
begin
|
32
|
+
result = nil
|
33
|
+
elapsed_time = Benchmark.realtime {
|
34
|
+
next_notify = Time.now + PROGRESS_NOTIFICATION_INTERVAL
|
35
|
+
processor = Processors.const_get("#{@type.camelcase}Processor").new(@params)
|
36
|
+
result = processor.execute! { |progress_data|
|
37
|
+
if Time.now >= next_notify
|
38
|
+
notify!(progress_data.merge(:event => :progress))
|
39
|
+
next_notify = Time.now + PROGRESS_NOTIFICATION_INTERVAL
|
40
|
+
end
|
41
|
+
}
|
42
|
+
}
|
43
|
+
result ||= {}
|
44
|
+
notify!({
|
45
|
+
:event => :completed,
|
46
|
+
:time_taken => elapsed_time
|
47
|
+
}.merge(result))
|
48
|
+
rescue Interrupt
|
49
|
+
@logger.error "Job interrupted"
|
50
|
+
notify!(:event => :failed, :reason => 'Cancelled')
|
51
|
+
raise
|
52
|
+
rescue Exception => exception
|
53
|
+
@logger.error "Job failed with exception #{exception.class}: #{exception}\n" <<
|
54
|
+
"#{exception.backtrace.map { |line| "#{line}\n" }.join}"
|
55
|
+
if @retries_left > 0
|
56
|
+
@retries_left -= 1
|
57
|
+
@logger.info "Pushing job back on queue to retry it"
|
58
|
+
notify!(:event => :failed_will_retry, :reason => exception.message)
|
59
|
+
Application.get.task_manager.schedule(self)
|
60
|
+
else
|
61
|
+
@logger.error "No more retries for job, marking as failed"
|
62
|
+
notify!(:event => :failed, :reason => exception.message)
|
63
|
+
end
|
64
|
+
else
|
65
|
+
@logger.info "Completed job #{attributes.inspect}"
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
# Notify the caller of this job with some message.
|
70
|
+
def notify!(message)
|
71
|
+
notification_url = @notification_url
|
72
|
+
if notification_url
|
73
|
+
message = message.merge(:signature => Client.generate_signature(@access_key)) if @access_key
|
74
|
+
message_json = message.stringify_keys.to_json
|
75
|
+
if @use_tasks_for_notifications
|
76
|
+
Application.get.task_manager.schedule(
|
77
|
+
Tasks::NotifyTask.new(:url => notification_url, :message => message_json))
|
78
|
+
else
|
79
|
+
# TODO: Retry on failure
|
80
|
+
@logger.info "Notifying #{notification_url} with message: #{message_json}"
|
81
|
+
begin
|
82
|
+
HTTPClient.new.post(notification_url, message_json,
|
83
|
+
'Content-Type' => 'application/json; charset=utf-8')
|
84
|
+
rescue Exception => exception
|
85
|
+
@logger.error "Notification failed with exception, ignoring it: #{exception.message}"
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
def attributes
|
92
|
+
return {
|
93
|
+
:type => @type,
|
94
|
+
:notification_url => @notification_url,
|
95
|
+
:retries_left => @retries_left,
|
96
|
+
:access_key => @access_key,
|
97
|
+
:params => @params
|
98
|
+
}
|
99
|
+
end
|
100
|
+
|
101
|
+
attr_accessor :retries_left
|
102
|
+
attr_accessor :created_at
|
103
|
+
attr_accessor :access_key
|
104
|
+
attr_accessor :notification_url
|
105
|
+
attr_accessor :params
|
106
|
+
attr_accessor :type
|
107
|
+
|
108
|
+
end
|
109
|
+
|
110
|
+
end
|
111
|
+
end
|