aeden-refinery 0.9.0 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. data/.autotest +10 -0
  2. data/.gitignore +5 -0
  3. data/Rakefile +17 -1
  4. data/VERSION +1 -0
  5. data/config/config.example.yml +18 -0
  6. data/lib/refinery.rb +74 -0
  7. data/lib/refinery/config.rb +48 -0
  8. data/lib/refinery/configurable.rb +15 -0
  9. data/lib/refinery/daemon.rb +124 -0
  10. data/lib/refinery/event_publisher.rb +120 -0
  11. data/lib/refinery/heartbeat.rb +30 -0
  12. data/lib/refinery/loggable.rb +9 -0
  13. data/lib/refinery/monitor.rb +116 -0
  14. data/lib/refinery/publisher.rb +24 -0
  15. data/lib/refinery/queueable.rb +20 -0
  16. data/lib/refinery/server.rb +86 -0
  17. data/lib/refinery/statistics.rb +61 -0
  18. data/lib/refinery/stats_server.rb +134 -0
  19. data/lib/refinery/utilities.rb +33 -0
  20. data/lib/refinery/validations.rb +48 -0
  21. data/lib/refinery/worker.rb +65 -0
  22. data/logs/README +1 -0
  23. data/publishers/error.rb +8 -0
  24. data/publishers/sample.rb +8 -0
  25. data/publishers/sleep.rb +7 -0
  26. data/refinery.gemspec +105 -0
  27. data/test/config.yml +10 -0
  28. data/test/test_helper.rb +21 -0
  29. data/test/unit/config_test.rb +42 -0
  30. data/test/unit/configurable_test.rb +11 -0
  31. data/test/unit/daemon_test.rb +37 -0
  32. data/test/unit/event_publisher_test.rb +11 -0
  33. data/test/unit/heartbeat_test.rb +22 -0
  34. data/test/unit/loggable_test.rb +11 -0
  35. data/test/unit/publisher_test.rb +13 -0
  36. data/test/unit/queueable_test.rb +24 -0
  37. data/test/unit/server_test.rb +39 -0
  38. data/test/unit/statistics_test.rb +41 -0
  39. data/test/unit/utilities_test.rb +25 -0
  40. data/test/unit/validations_test.rb +37 -0
  41. data/test/unit/worker_test.rb +44 -0
  42. data/workers/error.rb +8 -0
  43. data/workers/sample.rb +8 -0
  44. data/workers/sleep.rb +7 -0
  45. metadata +74 -16
data/.autotest ADDED
@@ -0,0 +1,10 @@
1
+ module Autotest::CustomTestMatch
2
+ Autotest.add_hook :initialize do |at|
3
+ at.add_mapping(/test/) do |f, _|
4
+ at.files_matching(/_test\.rb$/)
5
+ end
6
+ at.add_mapping(/lib\/.*/) do |f, _|
7
+ at.files_matching(/_test\.rb$/)
8
+ end
9
+ end
10
+ end
data/.gitignore ADDED
@@ -0,0 +1,5 @@
1
+ logs/*.log
2
+ config/config.yml
3
+ rdoc/*
4
+ refinery-*.gem
5
+ stats.db
data/Rakefile CHANGED
@@ -20,4 +20,20 @@ Rake::RDocTask.new(:rdoc) do |rdoc|
20
20
  rdoc.rdoc_files.include('README.rdoc')
21
21
  rdoc.rdoc_files.include('lib/*.rb')
22
22
  rdoc.rdoc_files.include('lib/**/*.rb')
23
- end
23
+ end
24
+
25
+ begin
26
+ require 'jeweler'
27
+ Jeweler::Tasks.new do |gemspec|
28
+ gemspec.name = "refinery"
29
+ gemspec.summary = "Refinery processes data in a distributed environment."
30
+ gemspec.email = "anthonyeden@gmail.com"
31
+ gemspec.homepage = "http://github.com/aeden/refinery"
32
+ gemspec.description = "Process data in a distributed fashion."
33
+ gemspec.authors = ["Anthony Eden"]
34
+ gemspec.files.exclude 'docs/**/*'
35
+ gemspec.rubyforge_project = 'refinery'
36
+ end
37
+ rescue LoadError
38
+ puts "Jeweler not available. Install it with: sudo gem install technicalpickles-jeweler -s http://gems.github.com"
39
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.9.1
@@ -0,0 +1,18 @@
1
+ aws:
2
+ credentials:
3
+ access_key_id: "access_key_id"
4
+ secret_access_key: "secret_access_key"
5
+ processors:
6
+ sample:
7
+ queue: 'sample' # can be omitted
8
+ publishers:
9
+ delay: 30
10
+ workers:
11
+ initial: 3
12
+ data_store:
13
+ class: s3
14
+ error:
15
+ publishers:
16
+ delay: 30
17
+ workers:
18
+ initial: 1
data/lib/refinery.rb ADDED
@@ -0,0 +1,74 @@
1
+ $:.unshift(File.dirname(__FILE__))
2
+
3
+ require 'logger'
4
+ require 'socket'
5
+ require 'benchmark'
6
+
7
+ # The Refinery module contains all of the classes for the refinery system.
8
+ module Refinery
9
+
10
+ # Require the specified library.
11
+ #
12
+ # The short name is the require path and the display_name will be shown
13
+ # if the library cannot be loaded.
14
+ def self.require_library(short_name, display_name)
15
+ begin
16
+ require short_name
17
+ rescue LoadError
18
+ puts "#{display_name} is required, please install it"
19
+ exit
20
+ end
21
+ end
22
+
23
+ # Require all of the dependencies.
24
+ def self.require_libraries
25
+ require_library('rubygems', 'Rubygems')
26
+ require_library('right_aws', 'RightScale AWS gem')
27
+ require_library('json', 'JSON gem')
28
+ require_library('moneta', 'Moneta gem')
29
+ require_library('moneta/s3', 'Moneta S3 implementation')
30
+ end
31
+
32
+ def self.require_optional_library(short_name, display_name)
33
+ begin
34
+ require short_name
35
+ rescue LoadError
36
+ end
37
+ end
38
+
39
+ def self.require_optional_libraries
40
+ require_optional_library('sequel', 'Sequel gem')
41
+ require_optional_library('ramaze', 'Ramaze')
42
+ end
43
+
44
+ # Require internal code files
45
+ def self.require_internals
46
+ require 'refinery/loggable'
47
+ require 'refinery/configurable'
48
+ require 'refinery/queueable'
49
+
50
+ require 'refinery/utilities'
51
+
52
+ require 'refinery/validations'
53
+
54
+ require 'refinery/config'
55
+ require 'refinery/heartbeat'
56
+ require 'refinery/server'
57
+ require 'refinery/daemon'
58
+ require 'refinery/worker'
59
+ require 'refinery/event_publisher'
60
+ require 'refinery/publisher'
61
+ require 'refinery/monitor'
62
+ require 'refinery/statistics'
63
+ require 'refinery/stats_server'
64
+
65
+ end
66
+
67
+ # Raised if a source file cannot be loaded
68
+ class SourceFileNotFound < RuntimeError
69
+ end
70
+ end
71
+
72
+ Refinery::require_libraries
73
+ Refinery::require_optional_libraries
74
+ Refinery::require_internals
@@ -0,0 +1,48 @@
1
+ module Refinery #:nodoc:
2
+ # Configuration class.
3
+ class Config
4
+ # Get a shared configuration
5
+ def self.default
6
+ @default ||= new({
7
+ 'aws' => {
8
+ 'credentials' => {}
9
+ },
10
+ 'processors' => {}
11
+ })
12
+ end
13
+
14
+ # Initialize the config with the given data
15
+ def initialize(data={})
16
+ @data = data
17
+ end
18
+
19
+ # Get the configuration value
20
+ def [](key)
21
+ data[key.to_s]
22
+ end
23
+
24
+ # Set the configuration value
25
+ def []=(key, value)
26
+ data[key.to_s] = value
27
+ end
28
+
29
+ # Load configuration from a YAML file
30
+ def load_file(file)
31
+ @file = file
32
+ @data = YAML::load_file(@file)
33
+ @last_load = File.mtime(@file)
34
+ end
35
+
36
+ # Refresh the configuration from the YAML file if necessary.
37
+ def refresh
38
+ if File.mtime(@file) != @last_load
39
+ @data = YAML::load_file(@file)
40
+ end
41
+ end
42
+
43
+ private
44
+ def data
45
+ @data ||= {}
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,15 @@
1
+ module Refinery #:nodoc:
2
+ # Include this module to get access to a shared configuration
3
+ module Configurable
4
+ # Get the configuration. If the config is nil then this will use
5
+ # the default shared configuration.
6
+ def config
7
+ @config ||= Refinery::Config.default
8
+ end
9
+
10
+ # Set the configuration.
11
+ def config=(config)
12
+ @config = config
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,124 @@
1
+ module Refinery #:nodoc:
2
+ # A daemon provides a thread to run workers in.
3
+ class Daemon
4
+ include Refinery::Loggable
5
+ include Refinery::Configurable
6
+ include Refinery::Utilities
7
+
8
+ RUNNING = 'running'
9
+ STOPPED = 'stopped'
10
+
11
+ # The daemon's thread
12
+ attr_reader :thread
13
+ # The name of the daemon
14
+ attr_reader :name
15
+ # The queue for incoming messages to process
16
+ attr_reader :waiting_queue
17
+ # The queue for outgoing messages once they've been processed
18
+ attr_reader :done_queue
19
+ # The queue for error messages
20
+ attr_reader :error_queue
21
+
22
+ # Stop the daemon
23
+ def stop
24
+ self.state = STOPPED
25
+ end
26
+
27
+ # Return the daemon state
28
+ def state
29
+ @state ||= RUNNING
30
+ end
31
+
32
+ # Set the daemon state.
33
+ def state=(state)
34
+ @state = state
35
+ end
36
+ protected :state
37
+
38
+ # Return true if the daemon state is running.
39
+ def running?
40
+ state == RUNNING
41
+ end
42
+
43
+ # Initialize the daemon.
44
+ #
45
+ # * <tt>server</tt>: The server instance
46
+ # * <tt>name</tt>: The processor name
47
+ # * <tt>waiting_queue</tt>: The waiting queue that provides messages to be processed
48
+ # * <tt>error_queue</tt>: The queue where errors are posted.
49
+ # * <tt>done_queue</tt>: The queue for messages that have been processed.
50
+ def initialize(server, name, waiting_queue, error_queue, done_queue)
51
+ Refinery::Server.logger.debug "Starting daemon"
52
+
53
+ @server = server
54
+ @name = name
55
+ @waiting_queue = waiting_queue
56
+ @error_queue = error_queue
57
+ @done_queue = done_queue
58
+
59
+ @thread = Thread.new(self) do |daemon|
60
+ logger.debug "Running daemon thread"
61
+ while(running?)
62
+ begin
63
+ while (message = waiting_queue.receive)
64
+ worker = load_worker_class(name).new(self)
65
+ begin
66
+ result, run_time = worker.run(decode_message(message.body))
67
+ if result
68
+ done_message = {
69
+ 'host_info' => host_info,
70
+ 'original' => message.body,
71
+ 'run_time' => run_time
72
+ }
73
+ logger.debug "Sending 'done' message to #{done_queue.name}"
74
+ done_queue.send_message(encode_message(done_message))
75
+
76
+ logger.debug "Deleting message from queue"
77
+ message.delete()
78
+ end
79
+ rescue Exception => e
80
+ error_message = {
81
+ 'error' => {
82
+ 'message' => e.message,
83
+ 'class' => e.class.name
84
+ },
85
+ 'host_info' => host_info,
86
+ 'original' => message.body
87
+ }
88
+ error_queue.send_message(encode_message(error_message))
89
+ message.delete()
90
+ end
91
+ end
92
+ sleep(1)
93
+ rescue Exception => e
94
+ logger.error "An error occurred while receiving from the waiting queue: #{e.message}"
95
+ end
96
+ end
97
+ logger.debug "Exiting daemon thread"
98
+ end
99
+ end
100
+
101
+ # A hash of worker classes
102
+ def workers
103
+ @workers ||= {}
104
+ end
105
+
106
+ private
107
+ # Load the appropriate worker class
108
+ def load_worker_class(name)
109
+ source_file = "#{@server.workers_directory}/#{name}.rb"
110
+ if File.exist?(source_file)
111
+ modified_at = File.mtime(source_file)
112
+ if workers[name] != modified_at
113
+ logger.debug "Loading #{source_file}"
114
+ load(source_file)
115
+ workers[name] = modified_at
116
+ end
117
+ else
118
+ raise SourceFileNotFound, "Source file not found: #{source_file}"
119
+ end
120
+
121
+ Object.const_get(camelize(name))
122
+ end
123
+ end
124
+ end
@@ -0,0 +1,120 @@
1
+ module Refinery #:nodoc:
2
+ # Publish events.
3
+ class EventPublisher
4
+ include Refinery::Loggable
5
+ include Refinery::Configurable
6
+ include Refinery::Queueable
7
+ include Refinery::Utilities
8
+
9
+ STARTING = 'starting' #:nodoc:
10
+ RUNNING = 'running' #:nodoc:
11
+ STOPPED = 'stopped' #:nodoc:
12
+
13
+ attr_accessor :publishers_directory
14
+
15
+ # Initialize the event publisher
16
+ #
17
+ # Options:
18
+ # * <tt>:debug</tt>: Set to true to enable debug logging
19
+ # * <tt>:config</tt>: Provide a file path to load that config
20
+ def initialize(options={})
21
+ logger.level = Logger::INFO if options[:verbose]
22
+ logger.level = Logger::DEBUG if options[:debug]
23
+ config.load_file(options[:config]) if options[:config]
24
+ self.publishers_directory = options[:publishers] if options[:publishers]
25
+ end
26
+
27
+ # Get the event publisher state
28
+ def state
29
+ @state ||= STARTING
30
+ end
31
+
32
+ # Return true if the event publisher is running
33
+ def running?
34
+ state == RUNNING
35
+ end
36
+
37
+ # The directory where publishers are found. Defaults to ./publishers
38
+ def publishers_directory
39
+ @publishers_directory ||= './publishers'
40
+ end
41
+
42
+ # A hash of all publisher classes mapped to last modified timestamps.
43
+ def publishers
44
+ @publishers ||= {}
45
+ end
46
+
47
+ # Run the specified publisher once and return
48
+ def run_once(key)
49
+ settings = config['processors'][key]
50
+ raise RuntimeError, "No processor configuration found for #{key}" unless settings
51
+ queue_name = settings['queue'] || key
52
+ logger.debug "Using queue #{queue_name}_waiting"
53
+ waiting_queue = queue("#{queue_name}_waiting")
54
+ load_publisher_class(key).new(waiting_queue).execute
55
+ end
56
+
57
+ # Run the event publisher
58
+ def run
59
+ @state = RUNNING
60
+ logger.info "Starting event publisher"
61
+ config['processors'].each do |key, settings|
62
+ run_publisher(key, settings)
63
+ end
64
+
65
+ begin
66
+ threads.each { |thread| thread.join }
67
+ rescue Interrupt => e
68
+ end
69
+
70
+ logger.info "Exiting event publisher"
71
+ end
72
+
73
+ private
74
+ # An array of threads, one for each publisher instance
75
+ def threads
76
+ @threads ||= []
77
+ end
78
+
79
+ # Run the publisher for the given key
80
+ def run_publisher(key, settings)
81
+ logger.info "Creating publisher for #{key}"
82
+ queue_name = settings['queue'] || key
83
+ logger.debug "Using queue #{queue_name}_waiting"
84
+ waiting_queue = queue("#{queue_name}_waiting")
85
+
86
+ threads << Thread.new(waiting_queue, settings) do |waiting_queue, settings|
87
+ while(running?)
88
+ begin
89
+ load_publisher_class(key).new(waiting_queue).execute
90
+ rescue Exception => e
91
+ logger.error e
92
+ raise e
93
+ end
94
+
95
+ delay = settings['publishers']['delay'] || 60
96
+ logger.debug "Sleeping #{delay} seconds"
97
+ sleep delay
98
+
99
+ end
100
+ end
101
+ end
102
+
103
+ def load_publisher_class(key)
104
+ source_file = "#{publishers_directory}/#{key}.rb"
105
+ if File.exist?(source_file)
106
+ modified_at = File.mtime(source_file)
107
+ if publishers[key] != modified_at
108
+ logger.debug "Loading #{source_file}"
109
+ load(source_file)
110
+ publishers[key] = modified_at
111
+ end
112
+ else
113
+ raise SourceFileNotFound, "Source file not found: #{source_file}"
114
+ end
115
+
116
+ Object.const_get(camelize(key))
117
+ end
118
+
119
+ end
120
+ end