aeden-refinery 0.9.0 → 0.9.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.autotest +10 -0
- data/.gitignore +5 -0
- data/Rakefile +17 -1
- data/VERSION +1 -0
- data/config/config.example.yml +18 -0
- data/lib/refinery.rb +74 -0
- data/lib/refinery/config.rb +48 -0
- data/lib/refinery/configurable.rb +15 -0
- data/lib/refinery/daemon.rb +124 -0
- data/lib/refinery/event_publisher.rb +120 -0
- data/lib/refinery/heartbeat.rb +30 -0
- data/lib/refinery/loggable.rb +9 -0
- data/lib/refinery/monitor.rb +116 -0
- data/lib/refinery/publisher.rb +24 -0
- data/lib/refinery/queueable.rb +20 -0
- data/lib/refinery/server.rb +86 -0
- data/lib/refinery/statistics.rb +61 -0
- data/lib/refinery/stats_server.rb +134 -0
- data/lib/refinery/utilities.rb +33 -0
- data/lib/refinery/validations.rb +48 -0
- data/lib/refinery/worker.rb +65 -0
- data/logs/README +1 -0
- data/publishers/error.rb +8 -0
- data/publishers/sample.rb +8 -0
- data/publishers/sleep.rb +7 -0
- data/refinery.gemspec +105 -0
- data/test/config.yml +10 -0
- data/test/test_helper.rb +21 -0
- data/test/unit/config_test.rb +42 -0
- data/test/unit/configurable_test.rb +11 -0
- data/test/unit/daemon_test.rb +37 -0
- data/test/unit/event_publisher_test.rb +11 -0
- data/test/unit/heartbeat_test.rb +22 -0
- data/test/unit/loggable_test.rb +11 -0
- data/test/unit/publisher_test.rb +13 -0
- data/test/unit/queueable_test.rb +24 -0
- data/test/unit/server_test.rb +39 -0
- data/test/unit/statistics_test.rb +41 -0
- data/test/unit/utilities_test.rb +25 -0
- data/test/unit/validations_test.rb +37 -0
- data/test/unit/worker_test.rb +44 -0
- data/workers/error.rb +8 -0
- data/workers/sample.rb +8 -0
- data/workers/sleep.rb +7 -0
- metadata +74 -16
data/.autotest
ADDED
data/.gitignore
ADDED
data/Rakefile
CHANGED
@@ -20,4 +20,20 @@ Rake::RDocTask.new(:rdoc) do |rdoc|
|
|
20
20
|
rdoc.rdoc_files.include('README.rdoc')
|
21
21
|
rdoc.rdoc_files.include('lib/*.rb')
|
22
22
|
rdoc.rdoc_files.include('lib/**/*.rb')
|
23
|
-
end
|
23
|
+
end
|
24
|
+
|
25
|
+
begin
|
26
|
+
require 'jeweler'
|
27
|
+
Jeweler::Tasks.new do |gemspec|
|
28
|
+
gemspec.name = "refinery"
|
29
|
+
gemspec.summary = "Refinery processes data in a distributed environment."
|
30
|
+
gemspec.email = "anthonyeden@gmail.com"
|
31
|
+
gemspec.homepage = "http://github.com/aeden/refinery"
|
32
|
+
gemspec.description = "Process data in a distributed fashion."
|
33
|
+
gemspec.authors = ["Anthony Eden"]
|
34
|
+
gemspec.files.exclude 'docs/**/*'
|
35
|
+
gemspec.rubyforge_project = 'refinery'
|
36
|
+
end
|
37
|
+
rescue LoadError
|
38
|
+
puts "Jeweler not available. Install it with: sudo gem install technicalpickles-jeweler -s http://gems.github.com"
|
39
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.9.1
|
@@ -0,0 +1,18 @@
|
|
1
|
+
aws:
|
2
|
+
credentials:
|
3
|
+
access_key_id: "access_key_id"
|
4
|
+
secret_access_key: "secret_access_key"
|
5
|
+
processors:
|
6
|
+
sample:
|
7
|
+
queue: 'sample' # can be omitted
|
8
|
+
publishers:
|
9
|
+
delay: 30
|
10
|
+
workers:
|
11
|
+
initial: 3
|
12
|
+
data_store:
|
13
|
+
class: s3
|
14
|
+
error:
|
15
|
+
publishers:
|
16
|
+
delay: 30
|
17
|
+
workers:
|
18
|
+
initial: 1
|
data/lib/refinery.rb
ADDED
@@ -0,0 +1,74 @@
|
|
1
|
+
$:.unshift(File.dirname(__FILE__))
|
2
|
+
|
3
|
+
require 'logger'
|
4
|
+
require 'socket'
|
5
|
+
require 'benchmark'
|
6
|
+
|
7
|
+
# The Refinery module contains all of the classes for the refinery system.
|
8
|
+
module Refinery
|
9
|
+
|
10
|
+
# Require the specified library.
|
11
|
+
#
|
12
|
+
# The short name is the require path and the display_name will be shown
|
13
|
+
# if the library cannot be loaded.
|
14
|
+
def self.require_library(short_name, display_name)
|
15
|
+
begin
|
16
|
+
require short_name
|
17
|
+
rescue LoadError
|
18
|
+
puts "#{display_name} is required, please install it"
|
19
|
+
exit
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
# Require all of the dependencies.
|
24
|
+
def self.require_libraries
|
25
|
+
require_library('rubygems', 'Rubygems')
|
26
|
+
require_library('right_aws', 'RightScale AWS gem')
|
27
|
+
require_library('json', 'JSON gem')
|
28
|
+
require_library('moneta', 'Moneta gem')
|
29
|
+
require_library('moneta/s3', 'Moneta S3 implementation')
|
30
|
+
end
|
31
|
+
|
32
|
+
def self.require_optional_library(short_name, display_name)
|
33
|
+
begin
|
34
|
+
require short_name
|
35
|
+
rescue LoadError
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def self.require_optional_libraries
|
40
|
+
require_optional_library('sequel', 'Sequel gem')
|
41
|
+
require_optional_library('ramaze', 'Ramaze')
|
42
|
+
end
|
43
|
+
|
44
|
+
# Require internal code files
|
45
|
+
def self.require_internals
|
46
|
+
require 'refinery/loggable'
|
47
|
+
require 'refinery/configurable'
|
48
|
+
require 'refinery/queueable'
|
49
|
+
|
50
|
+
require 'refinery/utilities'
|
51
|
+
|
52
|
+
require 'refinery/validations'
|
53
|
+
|
54
|
+
require 'refinery/config'
|
55
|
+
require 'refinery/heartbeat'
|
56
|
+
require 'refinery/server'
|
57
|
+
require 'refinery/daemon'
|
58
|
+
require 'refinery/worker'
|
59
|
+
require 'refinery/event_publisher'
|
60
|
+
require 'refinery/publisher'
|
61
|
+
require 'refinery/monitor'
|
62
|
+
require 'refinery/statistics'
|
63
|
+
require 'refinery/stats_server'
|
64
|
+
|
65
|
+
end
|
66
|
+
|
67
|
+
# Raised if a source file cannot be loaded
|
68
|
+
class SourceFileNotFound < RuntimeError
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
Refinery::require_libraries
|
73
|
+
Refinery::require_optional_libraries
|
74
|
+
Refinery::require_internals
|
@@ -0,0 +1,48 @@
|
|
1
|
+
module Refinery #:nodoc:
|
2
|
+
# Configuration class.
|
3
|
+
class Config
|
4
|
+
# Get a shared configuration
|
5
|
+
def self.default
|
6
|
+
@default ||= new({
|
7
|
+
'aws' => {
|
8
|
+
'credentials' => {}
|
9
|
+
},
|
10
|
+
'processors' => {}
|
11
|
+
})
|
12
|
+
end
|
13
|
+
|
14
|
+
# Initialize the config with the given data
|
15
|
+
def initialize(data={})
|
16
|
+
@data = data
|
17
|
+
end
|
18
|
+
|
19
|
+
# Get the configuration value
|
20
|
+
def [](key)
|
21
|
+
data[key.to_s]
|
22
|
+
end
|
23
|
+
|
24
|
+
# Set the configuration value
|
25
|
+
def []=(key, value)
|
26
|
+
data[key.to_s] = value
|
27
|
+
end
|
28
|
+
|
29
|
+
# Load configuration from a YAML file
|
30
|
+
def load_file(file)
|
31
|
+
@file = file
|
32
|
+
@data = YAML::load_file(@file)
|
33
|
+
@last_load = File.mtime(@file)
|
34
|
+
end
|
35
|
+
|
36
|
+
# Refresh the configuration from the YAML file if necessary.
|
37
|
+
def refresh
|
38
|
+
if File.mtime(@file) != @last_load
|
39
|
+
@data = YAML::load_file(@file)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
private
|
44
|
+
def data
|
45
|
+
@data ||= {}
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module Refinery #:nodoc:
|
2
|
+
# Include this module to get access to a shared configuration
|
3
|
+
module Configurable
|
4
|
+
# Get the configuration. If the config is nil then this will use
|
5
|
+
# the default shared configuration.
|
6
|
+
def config
|
7
|
+
@config ||= Refinery::Config.default
|
8
|
+
end
|
9
|
+
|
10
|
+
# Set the configuration.
|
11
|
+
def config=(config)
|
12
|
+
@config = config
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,124 @@
|
|
1
|
+
module Refinery #:nodoc:
|
2
|
+
# A daemon provides a thread to run workers in.
|
3
|
+
class Daemon
|
4
|
+
include Refinery::Loggable
|
5
|
+
include Refinery::Configurable
|
6
|
+
include Refinery::Utilities
|
7
|
+
|
8
|
+
RUNNING = 'running'
|
9
|
+
STOPPED = 'stopped'
|
10
|
+
|
11
|
+
# The daemon's thread
|
12
|
+
attr_reader :thread
|
13
|
+
# The name of the daemon
|
14
|
+
attr_reader :name
|
15
|
+
# The queue for incoming messages to process
|
16
|
+
attr_reader :waiting_queue
|
17
|
+
# The queue for outgoing messages once they've been processed
|
18
|
+
attr_reader :done_queue
|
19
|
+
# The queue for error messages
|
20
|
+
attr_reader :error_queue
|
21
|
+
|
22
|
+
# Stop the daemon
|
23
|
+
def stop
|
24
|
+
self.state = STOPPED
|
25
|
+
end
|
26
|
+
|
27
|
+
# Return the daemon state
|
28
|
+
def state
|
29
|
+
@state ||= RUNNING
|
30
|
+
end
|
31
|
+
|
32
|
+
# Set the daemon state.
|
33
|
+
def state=(state)
|
34
|
+
@state = state
|
35
|
+
end
|
36
|
+
protected :state
|
37
|
+
|
38
|
+
# Return true if the daemon state is running.
|
39
|
+
def running?
|
40
|
+
state == RUNNING
|
41
|
+
end
|
42
|
+
|
43
|
+
# Initialize the daemon.
|
44
|
+
#
|
45
|
+
# * <tt>server</tt>: The server instance
|
46
|
+
# * <tt>name</tt>: The processor name
|
47
|
+
# * <tt>waiting_queue</tt>: The waiting queue that provides messages to be processed
|
48
|
+
# * <tt>error_queue</tt>: The queue where errors are posted.
|
49
|
+
# * <tt>done_queue</tt>: The queue for messages that have been processed.
|
50
|
+
def initialize(server, name, waiting_queue, error_queue, done_queue)
|
51
|
+
Refinery::Server.logger.debug "Starting daemon"
|
52
|
+
|
53
|
+
@server = server
|
54
|
+
@name = name
|
55
|
+
@waiting_queue = waiting_queue
|
56
|
+
@error_queue = error_queue
|
57
|
+
@done_queue = done_queue
|
58
|
+
|
59
|
+
@thread = Thread.new(self) do |daemon|
|
60
|
+
logger.debug "Running daemon thread"
|
61
|
+
while(running?)
|
62
|
+
begin
|
63
|
+
while (message = waiting_queue.receive)
|
64
|
+
worker = load_worker_class(name).new(self)
|
65
|
+
begin
|
66
|
+
result, run_time = worker.run(decode_message(message.body))
|
67
|
+
if result
|
68
|
+
done_message = {
|
69
|
+
'host_info' => host_info,
|
70
|
+
'original' => message.body,
|
71
|
+
'run_time' => run_time
|
72
|
+
}
|
73
|
+
logger.debug "Sending 'done' message to #{done_queue.name}"
|
74
|
+
done_queue.send_message(encode_message(done_message))
|
75
|
+
|
76
|
+
logger.debug "Deleting message from queue"
|
77
|
+
message.delete()
|
78
|
+
end
|
79
|
+
rescue Exception => e
|
80
|
+
error_message = {
|
81
|
+
'error' => {
|
82
|
+
'message' => e.message,
|
83
|
+
'class' => e.class.name
|
84
|
+
},
|
85
|
+
'host_info' => host_info,
|
86
|
+
'original' => message.body
|
87
|
+
}
|
88
|
+
error_queue.send_message(encode_message(error_message))
|
89
|
+
message.delete()
|
90
|
+
end
|
91
|
+
end
|
92
|
+
sleep(1)
|
93
|
+
rescue Exception => e
|
94
|
+
logger.error "An error occurred while receiving from the waiting queue: #{e.message}"
|
95
|
+
end
|
96
|
+
end
|
97
|
+
logger.debug "Exiting daemon thread"
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
# A hash of worker classes
|
102
|
+
def workers
|
103
|
+
@workers ||= {}
|
104
|
+
end
|
105
|
+
|
106
|
+
private
|
107
|
+
# Load the appropriate worker class
|
108
|
+
def load_worker_class(name)
|
109
|
+
source_file = "#{@server.workers_directory}/#{name}.rb"
|
110
|
+
if File.exist?(source_file)
|
111
|
+
modified_at = File.mtime(source_file)
|
112
|
+
if workers[name] != modified_at
|
113
|
+
logger.debug "Loading #{source_file}"
|
114
|
+
load(source_file)
|
115
|
+
workers[name] = modified_at
|
116
|
+
end
|
117
|
+
else
|
118
|
+
raise SourceFileNotFound, "Source file not found: #{source_file}"
|
119
|
+
end
|
120
|
+
|
121
|
+
Object.const_get(camelize(name))
|
122
|
+
end
|
123
|
+
end
|
124
|
+
end
|
@@ -0,0 +1,120 @@
|
|
1
|
+
module Refinery #:nodoc:
|
2
|
+
# Publish events.
|
3
|
+
class EventPublisher
|
4
|
+
include Refinery::Loggable
|
5
|
+
include Refinery::Configurable
|
6
|
+
include Refinery::Queueable
|
7
|
+
include Refinery::Utilities
|
8
|
+
|
9
|
+
STARTING = 'starting' #:nodoc:
|
10
|
+
RUNNING = 'running' #:nodoc:
|
11
|
+
STOPPED = 'stopped' #:nodoc:
|
12
|
+
|
13
|
+
attr_accessor :publishers_directory
|
14
|
+
|
15
|
+
# Initialize the event publisher
|
16
|
+
#
|
17
|
+
# Options:
|
18
|
+
# * <tt>:debug</tt>: Set to true to enable debug logging
|
19
|
+
# * <tt>:config</tt>: Provide a file path to load that config
|
20
|
+
def initialize(options={})
|
21
|
+
logger.level = Logger::INFO if options[:verbose]
|
22
|
+
logger.level = Logger::DEBUG if options[:debug]
|
23
|
+
config.load_file(options[:config]) if options[:config]
|
24
|
+
self.publishers_directory = options[:publishers] if options[:publishers]
|
25
|
+
end
|
26
|
+
|
27
|
+
# Get the event publisher state
|
28
|
+
def state
|
29
|
+
@state ||= STARTING
|
30
|
+
end
|
31
|
+
|
32
|
+
# Return true if the event publisher is running
|
33
|
+
def running?
|
34
|
+
state == RUNNING
|
35
|
+
end
|
36
|
+
|
37
|
+
# The directory where publishers are found. Defaults to ./publishers
|
38
|
+
def publishers_directory
|
39
|
+
@publishers_directory ||= './publishers'
|
40
|
+
end
|
41
|
+
|
42
|
+
# A hash of all publisher classes mapped to last modified timestamps.
|
43
|
+
def publishers
|
44
|
+
@publishers ||= {}
|
45
|
+
end
|
46
|
+
|
47
|
+
# Run the specified publisher once and return
|
48
|
+
def run_once(key)
|
49
|
+
settings = config['processors'][key]
|
50
|
+
raise RuntimeError, "No processor configuration found for #{key}" unless settings
|
51
|
+
queue_name = settings['queue'] || key
|
52
|
+
logger.debug "Using queue #{queue_name}_waiting"
|
53
|
+
waiting_queue = queue("#{queue_name}_waiting")
|
54
|
+
load_publisher_class(key).new(waiting_queue).execute
|
55
|
+
end
|
56
|
+
|
57
|
+
# Run the event publisher
|
58
|
+
def run
|
59
|
+
@state = RUNNING
|
60
|
+
logger.info "Starting event publisher"
|
61
|
+
config['processors'].each do |key, settings|
|
62
|
+
run_publisher(key, settings)
|
63
|
+
end
|
64
|
+
|
65
|
+
begin
|
66
|
+
threads.each { |thread| thread.join }
|
67
|
+
rescue Interrupt => e
|
68
|
+
end
|
69
|
+
|
70
|
+
logger.info "Exiting event publisher"
|
71
|
+
end
|
72
|
+
|
73
|
+
private
|
74
|
+
# An array of threads, one for each publisher instance
|
75
|
+
def threads
|
76
|
+
@threads ||= []
|
77
|
+
end
|
78
|
+
|
79
|
+
# Run the publisher for the given key
|
80
|
+
def run_publisher(key, settings)
|
81
|
+
logger.info "Creating publisher for #{key}"
|
82
|
+
queue_name = settings['queue'] || key
|
83
|
+
logger.debug "Using queue #{queue_name}_waiting"
|
84
|
+
waiting_queue = queue("#{queue_name}_waiting")
|
85
|
+
|
86
|
+
threads << Thread.new(waiting_queue, settings) do |waiting_queue, settings|
|
87
|
+
while(running?)
|
88
|
+
begin
|
89
|
+
load_publisher_class(key).new(waiting_queue).execute
|
90
|
+
rescue Exception => e
|
91
|
+
logger.error e
|
92
|
+
raise e
|
93
|
+
end
|
94
|
+
|
95
|
+
delay = settings['publishers']['delay'] || 60
|
96
|
+
logger.debug "Sleeping #{delay} seconds"
|
97
|
+
sleep delay
|
98
|
+
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
def load_publisher_class(key)
|
104
|
+
source_file = "#{publishers_directory}/#{key}.rb"
|
105
|
+
if File.exist?(source_file)
|
106
|
+
modified_at = File.mtime(source_file)
|
107
|
+
if publishers[key] != modified_at
|
108
|
+
logger.debug "Loading #{source_file}"
|
109
|
+
load(source_file)
|
110
|
+
publishers[key] = modified_at
|
111
|
+
end
|
112
|
+
else
|
113
|
+
raise SourceFileNotFound, "Source file not found: #{source_file}"
|
114
|
+
end
|
115
|
+
|
116
|
+
Object.const_get(camelize(key))
|
117
|
+
end
|
118
|
+
|
119
|
+
end
|
120
|
+
end
|