aeden-refinery 0.9.0 → 0.9.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. data/.autotest +10 -0
  2. data/.gitignore +5 -0
  3. data/Rakefile +17 -1
  4. data/VERSION +1 -0
  5. data/config/config.example.yml +18 -0
  6. data/lib/refinery.rb +74 -0
  7. data/lib/refinery/config.rb +48 -0
  8. data/lib/refinery/configurable.rb +15 -0
  9. data/lib/refinery/daemon.rb +124 -0
  10. data/lib/refinery/event_publisher.rb +120 -0
  11. data/lib/refinery/heartbeat.rb +30 -0
  12. data/lib/refinery/loggable.rb +9 -0
  13. data/lib/refinery/monitor.rb +116 -0
  14. data/lib/refinery/publisher.rb +24 -0
  15. data/lib/refinery/queueable.rb +20 -0
  16. data/lib/refinery/server.rb +86 -0
  17. data/lib/refinery/statistics.rb +61 -0
  18. data/lib/refinery/stats_server.rb +134 -0
  19. data/lib/refinery/utilities.rb +33 -0
  20. data/lib/refinery/validations.rb +48 -0
  21. data/lib/refinery/worker.rb +65 -0
  22. data/logs/README +1 -0
  23. data/publishers/error.rb +8 -0
  24. data/publishers/sample.rb +8 -0
  25. data/publishers/sleep.rb +7 -0
  26. data/refinery.gemspec +105 -0
  27. data/test/config.yml +10 -0
  28. data/test/test_helper.rb +21 -0
  29. data/test/unit/config_test.rb +42 -0
  30. data/test/unit/configurable_test.rb +11 -0
  31. data/test/unit/daemon_test.rb +37 -0
  32. data/test/unit/event_publisher_test.rb +11 -0
  33. data/test/unit/heartbeat_test.rb +22 -0
  34. data/test/unit/loggable_test.rb +11 -0
  35. data/test/unit/publisher_test.rb +13 -0
  36. data/test/unit/queueable_test.rb +24 -0
  37. data/test/unit/server_test.rb +39 -0
  38. data/test/unit/statistics_test.rb +41 -0
  39. data/test/unit/utilities_test.rb +25 -0
  40. data/test/unit/validations_test.rb +37 -0
  41. data/test/unit/worker_test.rb +44 -0
  42. data/workers/error.rb +8 -0
  43. data/workers/sample.rb +8 -0
  44. data/workers/sleep.rb +7 -0
  45. metadata +74 -16
data/.autotest ADDED
@@ -0,0 +1,10 @@
1
+ module Autotest::CustomTestMatch
2
+ Autotest.add_hook :initialize do |at|
3
+ at.add_mapping(/test/) do |f, _|
4
+ at.files_matching(/_test\.rb$/)
5
+ end
6
+ at.add_mapping(/lib\/.*/) do |f, _|
7
+ at.files_matching(/_test\.rb$/)
8
+ end
9
+ end
10
+ end
data/.gitignore ADDED
@@ -0,0 +1,5 @@
1
+ logs/*.log
2
+ config/config.yml
3
+ rdoc/*
4
+ refinery-*.gem
5
+ stats.db
data/Rakefile CHANGED
@@ -20,4 +20,20 @@ Rake::RDocTask.new(:rdoc) do |rdoc|
20
20
  rdoc.rdoc_files.include('README.rdoc')
21
21
  rdoc.rdoc_files.include('lib/*.rb')
22
22
  rdoc.rdoc_files.include('lib/**/*.rb')
23
- end
23
+ end
24
+
25
+ begin
26
+ require 'jeweler'
27
+ Jeweler::Tasks.new do |gemspec|
28
+ gemspec.name = "refinery"
29
+ gemspec.summary = "Refinery processes data in a distributed environment."
30
+ gemspec.email = "anthonyeden@gmail.com"
31
+ gemspec.homepage = "http://github.com/aeden/refinery"
32
+ gemspec.description = "Process data in a distributed fashion."
33
+ gemspec.authors = ["Anthony Eden"]
34
+ gemspec.files.exclude 'docs/**/*'
35
+ gemspec.rubyforge_project = 'refinery'
36
+ end
37
+ rescue LoadError
38
+ puts "Jeweler not available. Install it with: sudo gem install technicalpickles-jeweler -s http://gems.github.com"
39
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.9.1
@@ -0,0 +1,18 @@
1
+ aws:
2
+ credentials:
3
+ access_key_id: "access_key_id"
4
+ secret_access_key: "secret_access_key"
5
+ processors:
6
+ sample:
7
+ queue: 'sample' # can be omitted
8
+ publishers:
9
+ delay: 30
10
+ workers:
11
+ initial: 3
12
+ data_store:
13
+ class: s3
14
+ error:
15
+ publishers:
16
+ delay: 30
17
+ workers:
18
+ initial: 1
data/lib/refinery.rb ADDED
@@ -0,0 +1,74 @@
1
+ $:.unshift(File.dirname(__FILE__))
2
+
3
+ require 'logger'
4
+ require 'socket'
5
+ require 'benchmark'
6
+
7
+ # The Refinery module contains all of the classes for the refinery system.
8
+ module Refinery
9
+
10
+ # Require the specified library.
11
+ #
12
+ # The short name is the require path and the display_name will be shown
13
+ # if the library cannot be loaded.
14
+ def self.require_library(short_name, display_name)
15
+ begin
16
+ require short_name
17
+ rescue LoadError
18
+ puts "#{display_name} is required, please install it"
19
+ exit
20
+ end
21
+ end
22
+
23
+ # Require all of the dependencies.
24
+ def self.require_libraries
25
+ require_library('rubygems', 'Rubygems')
26
+ require_library('right_aws', 'RightScale AWS gem')
27
+ require_library('json', 'JSON gem')
28
+ require_library('moneta', 'Moneta gem')
29
+ require_library('moneta/s3', 'Moneta S3 implementation')
30
+ end
31
+
32
+ def self.require_optional_library(short_name, display_name)
33
+ begin
34
+ require short_name
35
+ rescue LoadError
36
+ end
37
+ end
38
+
39
+ def self.require_optional_libraries
40
+ require_optional_library('sequel', 'Sequel gem')
41
+ require_optional_library('ramaze', 'Ramaze')
42
+ end
43
+
44
+ # Require internal code files
45
+ def self.require_internals
46
+ require 'refinery/loggable'
47
+ require 'refinery/configurable'
48
+ require 'refinery/queueable'
49
+
50
+ require 'refinery/utilities'
51
+
52
+ require 'refinery/validations'
53
+
54
+ require 'refinery/config'
55
+ require 'refinery/heartbeat'
56
+ require 'refinery/server'
57
+ require 'refinery/daemon'
58
+ require 'refinery/worker'
59
+ require 'refinery/event_publisher'
60
+ require 'refinery/publisher'
61
+ require 'refinery/monitor'
62
+ require 'refinery/statistics'
63
+ require 'refinery/stats_server'
64
+
65
+ end
66
+
67
+ # Raised if a source file cannot be loaded
68
+ class SourceFileNotFound < RuntimeError
69
+ end
70
+ end
71
+
72
+ Refinery::require_libraries
73
+ Refinery::require_optional_libraries
74
+ Refinery::require_internals
@@ -0,0 +1,48 @@
1
+ module Refinery #:nodoc:
2
+ # Configuration class.
3
+ class Config
4
+ # Get a shared configuration
5
+ def self.default
6
+ @default ||= new({
7
+ 'aws' => {
8
+ 'credentials' => {}
9
+ },
10
+ 'processors' => {}
11
+ })
12
+ end
13
+
14
+ # Initialize the config with the given data
15
+ def initialize(data={})
16
+ @data = data
17
+ end
18
+
19
+ # Get the configuration value
20
+ def [](key)
21
+ data[key.to_s]
22
+ end
23
+
24
+ # Set the configuration value
25
+ def []=(key, value)
26
+ data[key.to_s] = value
27
+ end
28
+
29
+ # Load configuration from a YAML file
30
+ def load_file(file)
31
+ @file = file
32
+ @data = YAML::load_file(@file)
33
+ @last_load = File.mtime(@file)
34
+ end
35
+
36
+ # Refresh the configuration from the YAML file if necessary.
37
+ def refresh
38
+ if File.mtime(@file) != @last_load
39
+ @data = YAML::load_file(@file)
40
+ end
41
+ end
42
+
43
+ private
44
+ def data
45
+ @data ||= {}
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,15 @@
1
+ module Refinery #:nodoc:
2
+ # Include this module to get access to a shared configuration
3
+ module Configurable
4
+ # Get the configuration. If the config is nil then this will use
5
+ # the default shared configuration.
6
+ def config
7
+ @config ||= Refinery::Config.default
8
+ end
9
+
10
+ # Set the configuration.
11
+ def config=(config)
12
+ @config = config
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,124 @@
1
+ module Refinery #:nodoc:
2
+ # A daemon provides a thread to run workers in.
3
+ class Daemon
4
+ include Refinery::Loggable
5
+ include Refinery::Configurable
6
+ include Refinery::Utilities
7
+
8
+ RUNNING = 'running'
9
+ STOPPED = 'stopped'
10
+
11
+ # The daemon's thread
12
+ attr_reader :thread
13
+ # The name of the daemon
14
+ attr_reader :name
15
+ # The queue for incoming messages to process
16
+ attr_reader :waiting_queue
17
+ # The queue for outgoing messages once they've been processed
18
+ attr_reader :done_queue
19
+ # The queue for error messages
20
+ attr_reader :error_queue
21
+
22
+ # Stop the daemon
23
+ def stop
24
+ self.state = STOPPED
25
+ end
26
+
27
+ # Return the daemon state
28
+ def state
29
+ @state ||= RUNNING
30
+ end
31
+
32
+ # Set the daemon state.
33
+ def state=(state)
34
+ @state = state
35
+ end
36
+ protected :state
37
+
38
+ # Return true if the daemon state is running.
39
+ def running?
40
+ state == RUNNING
41
+ end
42
+
43
+ # Initialize the daemon.
44
+ #
45
+ # * <tt>server</tt>: The server instance
46
+ # * <tt>name</tt>: The processor name
47
+ # * <tt>waiting_queue</tt>: The waiting queue that provides messages to be processed
48
+ # * <tt>error_queue</tt>: The queue where errors are posted.
49
+ # * <tt>done_queue</tt>: The queue for messages that have been processed.
50
+ def initialize(server, name, waiting_queue, error_queue, done_queue)
51
+ Refinery::Server.logger.debug "Starting daemon"
52
+
53
+ @server = server
54
+ @name = name
55
+ @waiting_queue = waiting_queue
56
+ @error_queue = error_queue
57
+ @done_queue = done_queue
58
+
59
+ @thread = Thread.new(self) do |daemon|
60
+ logger.debug "Running daemon thread"
61
+ while(running?)
62
+ begin
63
+ while (message = waiting_queue.receive)
64
+ worker = load_worker_class(name).new(self)
65
+ begin
66
+ result, run_time = worker.run(decode_message(message.body))
67
+ if result
68
+ done_message = {
69
+ 'host_info' => host_info,
70
+ 'original' => message.body,
71
+ 'run_time' => run_time
72
+ }
73
+ logger.debug "Sending 'done' message to #{done_queue.name}"
74
+ done_queue.send_message(encode_message(done_message))
75
+
76
+ logger.debug "Deleting message from queue"
77
+ message.delete()
78
+ end
79
+ rescue Exception => e
80
+ error_message = {
81
+ 'error' => {
82
+ 'message' => e.message,
83
+ 'class' => e.class.name
84
+ },
85
+ 'host_info' => host_info,
86
+ 'original' => message.body
87
+ }
88
+ error_queue.send_message(encode_message(error_message))
89
+ message.delete()
90
+ end
91
+ end
92
+ sleep(1)
93
+ rescue Exception => e
94
+ logger.error "An error occurred while receiving from the waiting queue: #{e.message}"
95
+ end
96
+ end
97
+ logger.debug "Exiting daemon thread"
98
+ end
99
+ end
100
+
101
+ # A hash of worker classes
102
+ def workers
103
+ @workers ||= {}
104
+ end
105
+
106
+ private
107
+ # Load the appropriate worker class
108
+ def load_worker_class(name)
109
+ source_file = "#{@server.workers_directory}/#{name}.rb"
110
+ if File.exist?(source_file)
111
+ modified_at = File.mtime(source_file)
112
+ if workers[name] != modified_at
113
+ logger.debug "Loading #{source_file}"
114
+ load(source_file)
115
+ workers[name] = modified_at
116
+ end
117
+ else
118
+ raise SourceFileNotFound, "Source file not found: #{source_file}"
119
+ end
120
+
121
+ Object.const_get(camelize(name))
122
+ end
123
+ end
124
+ end
@@ -0,0 +1,120 @@
1
+ module Refinery #:nodoc:
2
+ # Publish events.
3
+ class EventPublisher
4
+ include Refinery::Loggable
5
+ include Refinery::Configurable
6
+ include Refinery::Queueable
7
+ include Refinery::Utilities
8
+
9
+ STARTING = 'starting' #:nodoc:
10
+ RUNNING = 'running' #:nodoc:
11
+ STOPPED = 'stopped' #:nodoc:
12
+
13
+ attr_accessor :publishers_directory
14
+
15
+ # Initialize the event publisher
16
+ #
17
+ # Options:
18
+ # * <tt>:debug</tt>: Set to true to enable debug logging
19
+ # * <tt>:config</tt>: Provide a file path to load that config
20
+ def initialize(options={})
21
+ logger.level = Logger::INFO if options[:verbose]
22
+ logger.level = Logger::DEBUG if options[:debug]
23
+ config.load_file(options[:config]) if options[:config]
24
+ self.publishers_directory = options[:publishers] if options[:publishers]
25
+ end
26
+
27
+ # Get the event publisher state
28
+ def state
29
+ @state ||= STARTING
30
+ end
31
+
32
+ # Return true if the event publisher is running
33
+ def running?
34
+ state == RUNNING
35
+ end
36
+
37
+ # The directory where publishers are found. Defaults to ./publishers
38
+ def publishers_directory
39
+ @publishers_directory ||= './publishers'
40
+ end
41
+
42
+ # A hash of all publisher classes mapped to last modified timestamps.
43
+ def publishers
44
+ @publishers ||= {}
45
+ end
46
+
47
+ # Run the specified publisher once and return
48
+ def run_once(key)
49
+ settings = config['processors'][key]
50
+ raise RuntimeError, "No processor configuration found for #{key}" unless settings
51
+ queue_name = settings['queue'] || key
52
+ logger.debug "Using queue #{queue_name}_waiting"
53
+ waiting_queue = queue("#{queue_name}_waiting")
54
+ load_publisher_class(key).new(waiting_queue).execute
55
+ end
56
+
57
+ # Run the event publisher
58
+ def run
59
+ @state = RUNNING
60
+ logger.info "Starting event publisher"
61
+ config['processors'].each do |key, settings|
62
+ run_publisher(key, settings)
63
+ end
64
+
65
+ begin
66
+ threads.each { |thread| thread.join }
67
+ rescue Interrupt => e
68
+ end
69
+
70
+ logger.info "Exiting event publisher"
71
+ end
72
+
73
+ private
74
+ # An array of threads, one for each publisher instance
75
+ def threads
76
+ @threads ||= []
77
+ end
78
+
79
+ # Run the publisher for the given key
80
+ def run_publisher(key, settings)
81
+ logger.info "Creating publisher for #{key}"
82
+ queue_name = settings['queue'] || key
83
+ logger.debug "Using queue #{queue_name}_waiting"
84
+ waiting_queue = queue("#{queue_name}_waiting")
85
+
86
+ threads << Thread.new(waiting_queue, settings) do |waiting_queue, settings|
87
+ while(running?)
88
+ begin
89
+ load_publisher_class(key).new(waiting_queue).execute
90
+ rescue Exception => e
91
+ logger.error e
92
+ raise e
93
+ end
94
+
95
+ delay = settings['publishers']['delay'] || 60
96
+ logger.debug "Sleeping #{delay} seconds"
97
+ sleep delay
98
+
99
+ end
100
+ end
101
+ end
102
+
103
+ def load_publisher_class(key)
104
+ source_file = "#{publishers_directory}/#{key}.rb"
105
+ if File.exist?(source_file)
106
+ modified_at = File.mtime(source_file)
107
+ if publishers[key] != modified_at
108
+ logger.debug "Loading #{source_file}"
109
+ load(source_file)
110
+ publishers[key] = modified_at
111
+ end
112
+ else
113
+ raise SourceFileNotFound, "Source file not found: #{source_file}"
114
+ end
115
+
116
+ Object.const_get(camelize(key))
117
+ end
118
+
119
+ end
120
+ end