aeden-refinery 0.9.0 → 0.9.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. data/.autotest +10 -0
  2. data/.gitignore +5 -0
  3. data/Rakefile +17 -1
  4. data/VERSION +1 -0
  5. data/config/config.example.yml +18 -0
  6. data/lib/refinery.rb +74 -0
  7. data/lib/refinery/config.rb +48 -0
  8. data/lib/refinery/configurable.rb +15 -0
  9. data/lib/refinery/daemon.rb +124 -0
  10. data/lib/refinery/event_publisher.rb +120 -0
  11. data/lib/refinery/heartbeat.rb +30 -0
  12. data/lib/refinery/loggable.rb +9 -0
  13. data/lib/refinery/monitor.rb +116 -0
  14. data/lib/refinery/publisher.rb +24 -0
  15. data/lib/refinery/queueable.rb +20 -0
  16. data/lib/refinery/server.rb +86 -0
  17. data/lib/refinery/statistics.rb +61 -0
  18. data/lib/refinery/stats_server.rb +134 -0
  19. data/lib/refinery/utilities.rb +33 -0
  20. data/lib/refinery/validations.rb +48 -0
  21. data/lib/refinery/worker.rb +65 -0
  22. data/logs/README +1 -0
  23. data/publishers/error.rb +8 -0
  24. data/publishers/sample.rb +8 -0
  25. data/publishers/sleep.rb +7 -0
  26. data/refinery.gemspec +105 -0
  27. data/test/config.yml +10 -0
  28. data/test/test_helper.rb +21 -0
  29. data/test/unit/config_test.rb +42 -0
  30. data/test/unit/configurable_test.rb +11 -0
  31. data/test/unit/daemon_test.rb +37 -0
  32. data/test/unit/event_publisher_test.rb +11 -0
  33. data/test/unit/heartbeat_test.rb +22 -0
  34. data/test/unit/loggable_test.rb +11 -0
  35. data/test/unit/publisher_test.rb +13 -0
  36. data/test/unit/queueable_test.rb +24 -0
  37. data/test/unit/server_test.rb +39 -0
  38. data/test/unit/statistics_test.rb +41 -0
  39. data/test/unit/utilities_test.rb +25 -0
  40. data/test/unit/validations_test.rb +37 -0
  41. data/test/unit/worker_test.rb +44 -0
  42. data/workers/error.rb +8 -0
  43. data/workers/sample.rb +8 -0
  44. data/workers/sleep.rb +7 -0
  45. metadata +74 -16
@@ -0,0 +1,33 @@
1
+ module Refinery #:nodoc:
2
+ # Utilities that can be mixed into a class
3
+ module Utilities
4
+ # Camelize the given word.
5
+ def camelize(word, first_letter_in_uppercase = true)
6
+ if first_letter_in_uppercase
7
+ word.to_s.gsub(/\/(.?)/) { "::#{$1.upcase}" }.gsub(/(?:^|_)(.)/) { $1.upcase }
8
+ else
9
+ word.first.downcase + camelize(word)[1..-1]
10
+ end
11
+ end
12
+
13
+ # Decode the message_body from Base 64 and then parse from JSON.
14
+ def decode_message(message_body)
15
+ JSON.parse(Base64.decode64(message_body))
16
+ end
17
+
18
+ # Convert the given message_data object to JSON and then Base 64
19
+ # encode it
20
+ def encode_message(message_data)
21
+ Base64.encode64(message_data.to_json)
22
+ end
23
+
24
+ # Get a Hash of useful host information that can be sent with
25
+ # messages to the monitoring system.
26
+ def host_info
27
+ {
28
+ 'hostname' => Socket.gethostname,
29
+ 'pid' => $$
30
+ }
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,48 @@
1
+ module Refinery #:nodoc:
2
+ # Error that is raised when a message is invalid.
3
+ class InvalidMessageError < RuntimeError
4
+ end
5
+
6
+ # Module containing all validations.
7
+ module Validations
8
+ def self.included(base) # :nodoc:
9
+ base.extend(ClassMethods)
10
+ end
11
+
12
+ # Class methods that are added to the worker.
13
+ module ClassMethods
14
+ # A list of all of the validators. Validators are lambdas
15
+ # that will be called with the message as its only arg.
16
+ # Note that the order of validators is retained.
17
+ def validators
18
+ @validators ||= []
19
+ end
20
+
21
+ # Validate with the given block. The block must receive a single
22
+ # argument that is the message
23
+ def validate_with(&block)
24
+ validators << block
25
+ end
26
+ alias :validate :validate_with
27
+
28
+ # Validate that each of the keys exists in the message.
29
+ def validate_key_exists(*args)
30
+ args.each do |key|
31
+ validators << lambda do |message|
32
+ raise Refinery::InvalidMessageError, "Key does not exist in message: #{key}" unless message[key]
33
+ end
34
+ end
35
+ end
36
+ alias :validates_key_exists :validate_key_exists
37
+ alias :validates_presence_of :validate_key_exists
38
+ end
39
+
40
+ # Validate the given message
41
+ protected
42
+ def validate(message)
43
+ self.class.validators.each do |validator|
44
+ validator.call(message)
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,65 @@
1
+ module Refinery #:nodoc:
2
+ # Base class for workers. Place subclasses of this in the workers
3
+ # directory.
4
+ #
5
+ # Workers may include validation logic to verify that the message
6
+ # has the correct keys and values before processing.
7
+ class Worker
8
+ include Refinery::Loggable
9
+ include Refinery::Configurable
10
+ include Refinery::Utilities
11
+ include Refinery::Validations
12
+ include Refinery::Queueable
13
+
14
+ # Initialize the worker with the given daemon.
15
+ def initialize(daemon)
16
+ @daemon = daemon
17
+ end
18
+
19
+ # Run the worker with the given message. The result from the worker's
20
+ # <code>execute</code> method is returned along with the run time.
21
+ #
22
+ # Validation will occur prior to calling execute.
23
+ def run(message)
24
+ result = false
25
+
26
+ validate(message)
27
+
28
+ logger.debug "Executing worker #{self.class.name}"
29
+ time = Benchmark.realtime do
30
+ begin
31
+ result = execute(message)
32
+ rescue Exception => e
33
+ logger.error "Error executing worker #{self.class.name}: #{e.message}"
34
+ raise e
35
+ end
36
+ end
37
+ logger.debug "Completed worker #{self.class.name} in #{time} seconds"
38
+ return result, time
39
+ end
40
+
41
+ # Get the data store for the worker.
42
+ #
43
+ # The data store is provided through the Moneta interface.
44
+ #
45
+ # If the configuration providers a data_store:class option then that class
46
+ # will be used (the class must be in the Moneta module), otherwise
47
+ # Moneta::S3 will be used.
48
+ def data_store(options)
49
+ class_name = processor_config['workers']['data_store']['class'] rescue 'S3'
50
+ ds_class = Moneta.const_get(camelize(class_name))
51
+ (@data_store ||= {})[options] ||= ds_class.new(
52
+ :access_key_id => config['aws']['credentials']['access_key_id'],
53
+ :secret_access_key => config['aws']['credentials']['secret_access_key'],
54
+ :bucket => options[:bucket],
55
+ :multi_thread => true
56
+ )
57
+ end
58
+
59
+ private
60
+ # Get's the config element starting at the processer
61
+ def processor_config
62
+ config['processors'][daemon.name]
63
+ end
64
+ end
65
+ end
data/logs/README ADDED
@@ -0,0 +1 @@
1
+ Log files from Refinery will be written here. Do not remove this directory.
@@ -0,0 +1,8 @@
1
+ # An example publisher that posts a message to the queue that should raise an error.
2
+ class Error < Refinery::Publisher
3
+ def execute
4
+ if waiting_queue.size == 0
5
+ publish({'text' => 'fire an error, please'})
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,8 @@
1
+ # A sample publisher that posts a message to the queue.
2
+ class Sample < Refinery::Publisher
3
+ def execute
4
+ if waiting_queue.size == 0
5
+ publish({'text' => 'hey there!'})
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,7 @@
1
+ class Sleep < Refinery::Publisher
2
+ def execute
3
+ if waiting_queue.size == 0
4
+ publish({'seconds' => rand(5) + 0.5})
5
+ end
6
+ end
7
+ end
data/refinery.gemspec ADDED
@@ -0,0 +1,105 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = %q{refinery}
5
+ s.version = "0.9.1"
6
+
7
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
8
+ s.authors = ["Anthony Eden"]
9
+ s.date = %q{2009-06-01}
10
+ s.description = %q{Process data in a distributed fashion.}
11
+ s.email = %q{anthonyeden@gmail.com}
12
+ s.executables = ["epub", "monitor", "pubnow", "refinery"]
13
+ s.extra_rdoc_files = [
14
+ "LICENSE",
15
+ "README.rdoc",
16
+ "README.textile"
17
+ ]
18
+ s.files = [
19
+ ".autotest",
20
+ ".gitignore",
21
+ "CHANGELOG",
22
+ "LICENSE",
23
+ "README.rdoc",
24
+ "README.textile",
25
+ "Rakefile",
26
+ "VERSION",
27
+ "bin/epub",
28
+ "bin/monitor",
29
+ "bin/pubnow",
30
+ "bin/refinery",
31
+ "config/config.example.yml",
32
+ "lib/refinery.rb",
33
+ "lib/refinery/config.rb",
34
+ "lib/refinery/configurable.rb",
35
+ "lib/refinery/daemon.rb",
36
+ "lib/refinery/event_publisher.rb",
37
+ "lib/refinery/heartbeat.rb",
38
+ "lib/refinery/loggable.rb",
39
+ "lib/refinery/monitor.rb",
40
+ "lib/refinery/publisher.rb",
41
+ "lib/refinery/queueable.rb",
42
+ "lib/refinery/server.rb",
43
+ "lib/refinery/statistics.rb",
44
+ "lib/refinery/stats_server.rb",
45
+ "lib/refinery/utilities.rb",
46
+ "lib/refinery/validations.rb",
47
+ "lib/refinery/worker.rb",
48
+ "logs/README",
49
+ "publishers/error.rb",
50
+ "publishers/sample.rb",
51
+ "publishers/sleep.rb",
52
+ "refinery.gemspec",
53
+ "test/config.yml",
54
+ "test/test_helper.rb",
55
+ "test/unit/config_test.rb",
56
+ "test/unit/configurable_test.rb",
57
+ "test/unit/daemon_test.rb",
58
+ "test/unit/event_publisher_test.rb",
59
+ "test/unit/heartbeat_test.rb",
60
+ "test/unit/loggable_test.rb",
61
+ "test/unit/publisher_test.rb",
62
+ "test/unit/queueable_test.rb",
63
+ "test/unit/server_test.rb",
64
+ "test/unit/statistics_test.rb",
65
+ "test/unit/utilities_test.rb",
66
+ "test/unit/validations_test.rb",
67
+ "test/unit/worker_test.rb",
68
+ "workers/error.rb",
69
+ "workers/sample.rb",
70
+ "workers/sleep.rb"
71
+ ]
72
+ s.has_rdoc = true
73
+ s.homepage = %q{http://github.com/aeden/refinery}
74
+ s.rdoc_options = ["--charset=UTF-8"]
75
+ s.require_paths = ["lib"]
76
+ s.rubyforge_project = %q{refinery}
77
+ s.rubygems_version = %q{1.3.1}
78
+ s.summary = %q{Refinery processes data in a distributed environment.}
79
+ s.test_files = [
80
+ "test/test_helper.rb",
81
+ "test/unit/config_test.rb",
82
+ "test/unit/configurable_test.rb",
83
+ "test/unit/daemon_test.rb",
84
+ "test/unit/event_publisher_test.rb",
85
+ "test/unit/heartbeat_test.rb",
86
+ "test/unit/loggable_test.rb",
87
+ "test/unit/publisher_test.rb",
88
+ "test/unit/queueable_test.rb",
89
+ "test/unit/server_test.rb",
90
+ "test/unit/statistics_test.rb",
91
+ "test/unit/utilities_test.rb",
92
+ "test/unit/validations_test.rb",
93
+ "test/unit/worker_test.rb"
94
+ ]
95
+
96
+ if s.respond_to? :specification_version then
97
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
98
+ s.specification_version = 2
99
+
100
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
101
+ else
102
+ end
103
+ else
104
+ end
105
+ end
data/test/config.yml ADDED
@@ -0,0 +1,10 @@
1
+ aws:
2
+ credentials:
3
+ access_key_id: "aaa"
4
+ secret_access_key: "bbb"
5
+ processors:
6
+ sample:
7
+ publishers:
8
+ delay: 10
9
+ workers:
10
+ initial: 3
@@ -0,0 +1,21 @@
1
+ require 'test/unit'
2
+ require 'rubygems'
3
+ require 'shoulda'
4
+ require 'mocha'
5
+ require File.dirname(__FILE__) + '/../lib/refinery'
6
+
7
+ class Test::Unit::TestCase
8
+ def setup_default_config
9
+ Refinery::Config.stubs(:default).returns(Refinery::Config.new(
10
+ {
11
+ 'aws' => {
12
+ 'credentials' => {
13
+ 'access_key_id' => 'aki',
14
+ 'secret_access_key' => 'sak'
15
+ }
16
+ },
17
+ 'processors' => []
18
+ }
19
+ ))
20
+ end
21
+ end
@@ -0,0 +1,42 @@
1
+ require File.dirname(__FILE__) + '/../test_helper'
2
+
3
+ class ConfigTest < Test::Unit::TestCase
4
+ context "the config class" do
5
+ should "provide a default configuration" do
6
+ assert_not_nil Refinery::Config.default
7
+ end
8
+
9
+ context "default configuration" do
10
+ setup do
11
+ @config = Refinery::Config.default
12
+ end
13
+ should "provide an empty aws credentials hash" do
14
+ assert_equal Hash.new, @config['aws']['credentials']
15
+ end
16
+ should "provide an empty processors hash" do
17
+ assert_equal Hash.new, @config['processors']
18
+ end
19
+ end
20
+
21
+ context "after loading configuration from a YAML file" do
22
+ setup do
23
+ @config_file = File.dirname(__FILE__) + '/../config.yml'
24
+ @config = Refinery::Config.new
25
+ @config.load_file(@config_file)
26
+ end
27
+ should "have aws credentials" do
28
+ assert_equal 'aaa', @config['aws']['credentials']['access_key_id']
29
+ assert_equal 'bbb', @config['aws']['credentials']['secret_access_key']
30
+ end
31
+ should "reload the file when changed" do
32
+ `touch #{@config_file}`
33
+ YAML.expects(:load_file).once
34
+ @config.refresh
35
+ end
36
+ should "not reload the file when not changed" do
37
+ YAML.expects(:load_file).never
38
+ @config.refresh
39
+ end
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,11 @@
1
+ require File.dirname(__FILE__) + '/../test_helper'
2
+ class ConfigureMe
3
+ include Refinery::Configurable
4
+ end
5
+ class ConfigurableTest < Test::Unit::TestCase
6
+ context "a class with the configurable module" do
7
+ should "provide a config" do
8
+ assert_not_nil ConfigureMe.new.config
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,37 @@
1
+ require File.dirname(__FILE__) + '/../test_helper'
2
+ class DaemonTest < Test::Unit::TestCase
3
+ context "a daemon" do
4
+ setup do
5
+ @server = stub('Server')
6
+ @waiting_queue = stub('Queue(waiting)')
7
+ @waiting_queue.stubs(:receive)
8
+ @error_queue = stub('Queue(error)')
9
+ @done_queue = stub('Queue(done)')
10
+ end
11
+ should "be startable" do
12
+ assert_nothing_raised do
13
+ daemon = Refinery::Daemon.new(@server, 'sample', @waiting_queue, @error_queue, @done_queue)
14
+ end
15
+ end
16
+ should "have logging" do
17
+ daemon = Refinery::Daemon.new(@server, 'sample', @waiting_queue, @error_queue, @done_queue)
18
+ assert_not_nil daemon.logger
19
+ end
20
+ context "that is started" do
21
+ setup do
22
+ @daemon = Refinery::Daemon.new(@server, 'sample', @waiting_queue, @error_queue, @done_queue)
23
+ end
24
+ should "have a state of running" do
25
+ assert @daemon.running?
26
+ end
27
+ context "after calling stop" do
28
+ setup do
29
+ @daemon.stop
30
+ end
31
+ should "not be running" do
32
+ assert !@daemon.running?
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,11 @@
1
+ require File.dirname(__FILE__) + '/../test_helper'
2
+ class EventPublisherTest < Test::Unit::TestCase
3
+ context "an event publisher" do
4
+ should "raise an error if credentials are not set" do
5
+ publishing_settings = {'sample' => {'delay' => 10}}
6
+ Refinery::Config.any_instance.stubs(:publishing).returns(publishing_settings)
7
+ event_publisher = Refinery::EventPublisher.new
8
+ event_publisher.run
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,22 @@
1
+ require File.dirname(__FILE__) + '/../test_helper'
2
+ class HeartbeatTest < Test::Unit::TestCase
3
+ context "a heartbeat" do
4
+ setup do
5
+ setup_default_config
6
+
7
+ @server = stub('server')
8
+ @server.stubs(:daemons).returns([])
9
+
10
+ heartbeat_queue = stub('heartbeat queue')
11
+ heartbeat_queue.stubs(:send_message)
12
+ queue_provider = stub('queue provider')
13
+ queue_provider.expects(:queue).with('heartbeat').returns(heartbeat_queue)
14
+ RightAws::SqsGen2.stubs(:new).with(
15
+ 'aki', 'sak', {:multi_thread => true}
16
+ ).returns(queue_provider)
17
+ end
18
+ should "be initializable" do
19
+ Refinery::Heartbeat.new(@server)
20
+ end
21
+ end
22
+ end