hot_potato 0.12.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. data/.gitignore +4 -0
  2. data/Gemfile +4 -0
  3. data/LICENSE +19 -0
  4. data/Rakefile +25 -0
  5. data/bin/hotpotato +14 -0
  6. data/hot_potato.gemspec +28 -0
  7. data/lib/hot_potato/admin/public/admin.css +30 -0
  8. data/lib/hot_potato/admin/views/index.erb +58 -0
  9. data/lib/hot_potato/admin.rb +67 -0
  10. data/lib/hot_potato/app_task.rb +42 -0
  11. data/lib/hot_potato/app_task_info.rb +80 -0
  12. data/lib/hot_potato/app_task_server.rb +92 -0
  13. data/lib/hot_potato/cache.rb +45 -0
  14. data/lib/hot_potato/core.rb +62 -0
  15. data/lib/hot_potato/dsl.rb +172 -0
  16. data/lib/hot_potato/faucet.rb +41 -0
  17. data/lib/hot_potato/generate.rb +55 -0
  18. data/lib/hot_potato/generate_app_task.rb +41 -0
  19. data/lib/hot_potato/queue_logger.rb +33 -0
  20. data/lib/hot_potato/sink.rb +33 -0
  21. data/lib/hot_potato/supervisor_info.rb +64 -0
  22. data/lib/hot_potato/supervisor_server.rb +225 -0
  23. data/lib/hot_potato/templates/Gemfile +6 -0
  24. data/lib/hot_potato/templates/Rakefile +9 -0
  25. data/lib/hot_potato/templates/admin +4 -0
  26. data/lib/hot_potato/templates/app_task +4 -0
  27. data/lib/hot_potato/templates/boot.rb +21 -0
  28. data/lib/hot_potato/templates/config.yml +11 -0
  29. data/lib/hot_potato/templates/development.rb +0 -0
  30. data/lib/hot_potato/templates/generate +4 -0
  31. data/lib/hot_potato/templates/production.rb +0 -0
  32. data/lib/hot_potato/templates/routes.rb +3 -0
  33. data/lib/hot_potato/templates/supervisor +4 -0
  34. data/lib/hot_potato/templates/template_faucet.rb +8 -0
  35. data/lib/hot_potato/templates/template_sink.rb +7 -0
  36. data/lib/hot_potato/templates/template_worker.rb +8 -0
  37. data/lib/hot_potato/templates/test.rb +0 -0
  38. data/lib/hot_potato/utils.rb +43 -0
  39. data/lib/hot_potato/version.rb +3 -0
  40. data/lib/hot_potato/worker.rb +40 -0
  41. data/lib/hot_potato.rb +20 -0
  42. data/readme.md +219 -0
  43. data/test/helper.rb +7 -0
  44. data/test/version_test.rb +9 -0
  45. metadata +166 -0
@@ -0,0 +1,21 @@
1
+ require 'rubygems'
2
+
3
+ APP_PATH ||= File.expand_path('../..', __FILE__)
4
+ RACK_ENV ||= ENV['RACK_ENV'] || 'development'
5
+
6
+ # Set up gems listed in the Gemfile.
7
+ gemfile = File.expand_path('../../Gemfile', __FILE__)
8
+ begin
9
+ ENV['BUNDLE_GEMFILE'] = gemfile
10
+ require 'bundler'
11
+ Bundler.setup
12
+ Bundler.require(:default)
13
+ require File.expand_path('../routes', __FILE__)
14
+ require File.expand_path("../environments/#{RACK_ENV}", __FILE__)
15
+ Dir["#{File.expand_path('../../app', __FILE__)}/*.rb"].each { |f| require f }
16
+ rescue Bundler::GemNotFound => e
17
+ STDERR.puts e.message
18
+ STDERR.puts "Try running `bundle install`."
19
+ exit!
20
+ end if File.exist?(gemfile)
21
+
@@ -0,0 +1,11 @@
1
+ development:
2
+ redis_hostname: localhost
3
+ redis_port: 6379
4
+
5
+ test:
6
+ redis_hostname: localhost
7
+ redis_port: 6379
8
+
9
+ production:
10
+ redis_hostname: localhost
11
+ redis_port: 6379
File without changes
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require File.expand_path('../../config/boot', __FILE__)
4
+ HotPotato::GenerateAppTask.new
File without changes
@@ -0,0 +1,3 @@
1
+ HotPotato::Route.build do
2
+
3
+ end
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require File.expand_path('../../config/boot', __FILE__)
4
+ HotPotato::SupervisorServer.new
@@ -0,0 +1,8 @@
1
+ class __NAME__ < HotPotato::Faucet
2
+
3
+ def perform
4
+ # TODO: Implement
5
+ # Be sure to call: send_message(message)
6
+ end
7
+
8
+ end
@@ -0,0 +1,7 @@
1
+ class __NAME__ < HotPotato::Sink
2
+
3
+ def perform(message)
4
+ # TODO: Implement
5
+ end
6
+
7
+ end
@@ -0,0 +1,8 @@
1
+ class __NAME__ < HotPotato::Worker
2
+
3
+ def perform(message)
4
+ # TODO: Implement
5
+ # Be sure to call: send_message(message)
6
+ end
7
+
8
+ end
File without changes
@@ -0,0 +1,43 @@
1
+ module Process
2
+
3
+ # Return true if the process is still running, false otherwise.
4
+ def self.alive?(pid)
5
+ begin
6
+ Process.getpgid(pid)
7
+ return true
8
+ rescue Errno::ESRCH
9
+ return false
10
+ end
11
+ end
12
+
13
+ end
14
+
15
+ module Kernel
16
+
17
+ # Taken from Rails ActiveSupport::Inflector
18
+ def classify(name)
19
+ # strip out any leading schema name
20
+ camelize(name.to_s.sub(/.*\./, ''))
21
+ end
22
+
23
+ # Taken from Rails ActiveSupport::Inflector
24
+ def camelize(lower_case_and_underscored_word, first_letter_in_uppercase = true)
25
+ if first_letter_in_uppercase
26
+ lower_case_and_underscored_word.to_s.gsub(/\/(.?)/) { "::#{$1.upcase}" }.gsub(/(?:^|_)(.)/) { $1.upcase }
27
+ else
28
+ lower_case_and_underscored_word.to_s[0].chr.downcase + camelize(lower_case_and_underscored_word)[1..-1]
29
+ end
30
+ end
31
+
32
+ # Taken from Rails ActiveSupport::Inflector
33
+ def underscore(camel_cased_word)
34
+ word = camel_cased_word.to_s.dup
35
+ word.gsub!(/::/, '/')
36
+ word.gsub!(/([A-Z]+)([A-Z][a-z])/,'\1_\2')
37
+ word.gsub!(/([a-z\d])([A-Z])/,'\1_\2')
38
+ word.tr!("-", "_")
39
+ word.downcase!
40
+ word
41
+ end
42
+
43
+ end
@@ -0,0 +1,3 @@
1
+ module HotPotato
2
+ VERSION = "0.12.1"
3
+ end
@@ -0,0 +1,40 @@
1
+ module HotPotato
2
+
3
+ # Workers manipulate data from other workers or faucets. Examples include: Calculate Scores, Merge
4
+ # Data, and Filter Data. Each worker is a ruby file in the app directory that extends HotPotato::Worker
5
+ # and implements the perform(message) method. For each message the worker wants to send to the next AppTask,
6
+ # the send_message method should be called.
7
+ #
8
+ # class Influencer < HotPotato::Worker
9
+ #
10
+ # def perform(message)
11
+ # message["influence"] = rand(100)
12
+ # send_message message
13
+ # end
14
+ #
15
+ # end
16
+ class Worker
17
+
18
+ include HotPotato::AppTask
19
+
20
+ def start(queue_in)
21
+ @queue_out = underscore(self.class.name.to_sym)
22
+ if !self.respond_to?('perform')
23
+ log.error "The Worker #{self.class.name} does not implement a perform method."
24
+ exit 1
25
+ end
26
+ start_heartbeat_service
27
+ queue_subscribe(queue_in) do |m|
28
+ count_message_in
29
+ perform m
30
+ end
31
+ end
32
+
33
+ def send_message(m)
34
+ queue_inject @queue_out, m
35
+ count_message_out
36
+ end
37
+
38
+ end
39
+
40
+ end
data/lib/hot_potato.rb ADDED
@@ -0,0 +1,20 @@
1
+ APP_PATH ||= '.'
2
+ RACK_ENV ||= 'development'
3
+
4
+ require 'hot_potato/core'
5
+ require 'hot_potato/cache'
6
+ require 'hot_potato/dsl'
7
+ require 'hot_potato/admin'
8
+ require 'hot_potato/app_task'
9
+ require 'hot_potato/app_task_info'
10
+ require 'hot_potato/app_task_server'
11
+ require 'hot_potato/supervisor_server'
12
+ require 'hot_potato/supervisor_info'
13
+ require 'hot_potato/queue_logger'
14
+ require 'hot_potato/faucet'
15
+ require 'hot_potato/worker'
16
+ require 'hot_potato/sink'
17
+ require 'hot_potato/utils'
18
+ require 'hot_potato/generate'
19
+ require 'hot_potato/generate_app_task'
20
+ require 'hot_potato/version'
data/readme.md ADDED
@@ -0,0 +1,219 @@
1
+ # Hot Potato
2
+
3
+ A Real-time Processing Framework
4
+
5
+ ## Description
6
+
7
+ Hot Potato is an open source real-time processing framework written in Ruby. Originally designed to process the Twitter firehose at 3,000+ tweets per second, it has been extended to support any type of streaming data as input or output to the framework. The framework excels with applications such as, social media analysis, log processing, fraud prevention, spam detection, instant messaging, and many others that include the processing of streaming data.
8
+
9
+ ## What is it?
10
+
11
+ * Written in Ruby (requires 1.9)
12
+ * Handles streaming data
13
+ * Designed for scale (can easily handle the twitter firehose on one server)
14
+ * Simple interface for writing AppTasks
15
+
16
+ # Getting Started
17
+
18
+ Start by downloading the gem (this requires Ruby 1.9):
19
+
20
+ $ gem install hotpotato
21
+
22
+ Next create a project:
23
+
24
+ $ hotpotato sample
25
+ Hot Potato (v0.12.1)
26
+ Generating application sample...
27
+ create sample
28
+ add sample/Gemfile
29
+ add sample/Rakefile
30
+ create sample/app
31
+ create sample/bin
32
+ add sample/bin/admin
33
+ add sample/bin/app_task
34
+ add sample/bin/supervisor
35
+ create sample/config
36
+ add sample/config/boot.rb
37
+ add sample/config/config.yml
38
+ add sample/config/routes.rb
39
+ create sample/docs
40
+ create sample/logs
41
+ create sample/test
42
+ create sample/tmp
43
+
44
+ ## Generating AppTasks
45
+
46
+ To help with creating AppTasks, there is a generator available:
47
+
48
+ $ bin/generate [faucet|worker|sink] name
49
+
50
+ # The Details
51
+
52
+ ## Environments
53
+
54
+ By setting the RACK_ENV environment variable, one can control which environment is loaded.
55
+ By default there are three: development, test, and production.
56
+
57
+ ## AppTasks
58
+
59
+ AppTasks are the controllers in the framework. The Supervisor (See below) is responsible for
60
+ starting AppTasks. There are three types:
61
+
62
+ ### Faucets
63
+
64
+ Faucets inject data into the system. Examples include: Twitter Reader, SMTP, and Tail Log File.
65
+ Each faucet is a ruby file in the app directory that extends HotPotato::Faucet and implements
66
+ the perform method. For each message received, the method should call the send_message to send
67
+ it to the next AppTask.
68
+
69
+ class TwitterFaucet < HotPotato::Faucet
70
+
71
+ def perform
72
+ TweetStream::Client.new("user", "secret").sample do |s|
73
+ message = {}
74
+ message["username"] = s.user.screen_name
75
+ message["text"] = s.text
76
+ send_message message
77
+ end
78
+ end
79
+
80
+ end
81
+
82
+ ### Workers
83
+
84
+ Workers manipulate data from other workers or faucets. Examples include: Calculate Scores, Merge
85
+ Data, and Filter Data. Each worker is a ruby file in the app directory that extends HotPotato::Worker
86
+ and implements the perform(message) method. For each message the worker wants to send to the next AppTask,
87
+ the send_message method should be called.
88
+
89
+ class Influencer < HotPotato::Worker
90
+
91
+ def perform(message)
92
+ message["influence"] = rand(100)
93
+ send_message message
94
+ end
95
+
96
+ end
97
+
98
+ ### Sinks
99
+
100
+ Sinks send data out of the system. Examples include: WebSocket, Database (Data Warehouse), and
101
+ File Writer. Each sink is a ruby file in the app directory that extends HotPotato::Sink and implements
102
+ the perform(message) method. There is no send_message for the sink to call since it is a final destination
103
+ for the message.
104
+
105
+ class LogWriter < HotPotato::Sink
106
+
107
+ def perform(message)
108
+ log.debug "#{message["username"]}:#{message["influence"]}"
109
+ end
110
+
111
+ end
112
+
113
+ ## Supervisor
114
+
115
+ The supervisor is a process that runs on each machine that participates in the cluster.
116
+ When it starts it does the following:
117
+
118
+ 0. Read the routes file
119
+ 1. Connect to the Redis server and get the appTask process ID table
120
+ 2. Acquire the global lock
121
+ 3. If a process is needed, fork a new process for AppTask
122
+ 4. Release the global lock
123
+ 5. Rinse and Repeat
124
+
125
+ The supervisor also starts the Heartbeat service and logging service as background threads.
126
+
127
+ The supervisor can be managed from the command line:
128
+
129
+ $ bin/supervisor [run|start|stop|restart]
130
+
131
+ If started without any settings, it will default to run.
132
+
133
+ ## Admin Server
134
+
135
+ The admin server is a Sinatra-based application to display statistical and diagnostic information.
136
+
137
+ The admin server can be managed from the command line:
138
+
139
+ $ bin/admin --help
140
+
141
+ Usage: ./admin [options]
142
+
143
+ Vegas options:
144
+ -K, --kill kill the running process and exit
145
+ -S, --status display the current running PID and URL then quit
146
+ -s, --server SERVER serve using SERVER (thin/mongrel/webrick)
147
+ -o, --host HOST listen on HOST (default: 0.0.0.0)
148
+ -p, --port PORT use PORT (default: 5678)
149
+ -x, --no-proxy ignore env proxy settings (e.g. http_proxy)
150
+ -e, --env ENVIRONMENT use ENVIRONMENT for defaults (default: development)
151
+ -F, --foreground don't daemonize, run in the foreground
152
+ -L, --no-launch don't launch the browser
153
+ -d, --debug raise the log level to :debug (default: :info)
154
+ --app-dir APP_DIR set the app dir where files are stored (default: ~/.vegas/Hot_Potato_Admin_Server)/)
155
+ -P, --pid-file PID_FILE set the path to the pid file (default: app_dir/Hot_Potato_Admin_Server.pid)
156
+ --log-file LOG_FILE set the path to the log file (default: app_dir/Hot_Potato_Admin_Server.log)
157
+ --url-file URL_FILE set the path to the URL file (default: app_dir/Hot_Potato_Admin_Server.url)
158
+
159
+ Common options:
160
+ -h, --help Show this message
161
+ --version Show version
162
+
163
+ The page can be accessed at http://localhost:5678
164
+
165
+ ## Routes
166
+
167
+ The routes file (config/routes.rb) is a Ruby DSL that does the following:
168
+
169
+ * Defines AppTasks (Faucets, Workers, Sinks)
170
+ * Defines processing chain for AppTasks
171
+ * Restrict AppTasks to a host group
172
+ * Limit number of instances
173
+
174
+ Example:
175
+
176
+ HotPotato::Route.build do
177
+
178
+ faucet :twitter_faucet
179
+ worker :influencer, :source => :twitter_faucet
180
+ sink :log_writer, :source => :influencer
181
+
182
+ end
183
+
184
+ Multiple sources can be attached to a worker or sink:
185
+
186
+ worker :influencer, :source => [:twitter_faucet. :other_source]
187
+
188
+ The number of instances is set to 1. This can be changed by setting the number of instances:
189
+
190
+ worker :influencer, :source => :twitter_faucet, :instances => 2
191
+
192
+ AppTasks can be limited to a specific server (or set of servers) by creating a group in the
193
+ config/config.yml file:
194
+
195
+ development:
196
+ redis_hostname: localhost
197
+ redis_port: 6379
198
+ servers:
199
+ - hostname: worker01
200
+ group: incoming
201
+ max_app_tasks: 15
202
+ - hostname: worker02
203
+ group: worker
204
+ max_app_tasks: 15
205
+
206
+ and specifying the group in the routes files:
207
+
208
+ faucet :twitter_faucet, :group => :incoming
209
+
210
+ # Support
211
+
212
+ If you have a question try our group: http://groups.google.com/group/hotpotato-rb
213
+
214
+ If you have a bug, file it in GitHub.
215
+
216
+ # Contributing
217
+
218
+ [Fork the project](http://github.com/dshimy/HotPotato) and send pull requests.
219
+
data/test/helper.rb ADDED
@@ -0,0 +1,7 @@
1
+ require 'test/unit'
2
+ $LOAD_PATH << File.dirname(__FILE__) + '/../lib/'
3
+
4
+ APP_PATH ||= File.expand_path('..', __FILE__)
5
+ RACK_ENV ||= 'test'
6
+
7
+ require 'hot_potato'
@@ -0,0 +1,9 @@
1
+ require 'helper'
2
+
3
+ class VersionTest < Test::Unit::TestCase
4
+
5
+ def test_version
6
+ assert_not_nil HotPotato::VERSION
7
+ end
8
+
9
+ end
metadata ADDED
@@ -0,0 +1,166 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: hot_potato
3
+ version: !ruby/object:Gem::Version
4
+ prerelease:
5
+ version: 0.12.1
6
+ platform: ruby
7
+ authors:
8
+ - Darian Shimy
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+
13
+ date: 2011-07-26 00:00:00 -07:00
14
+ default_executable:
15
+ dependencies:
16
+ - !ruby/object:Gem::Dependency
17
+ name: json
18
+ prerelease: false
19
+ requirement: &id001 !ruby/object:Gem::Requirement
20
+ none: false
21
+ requirements:
22
+ - - ">="
23
+ - !ruby/object:Gem::Version
24
+ version: "0"
25
+ type: :runtime
26
+ version_requirements: *id001
27
+ - !ruby/object:Gem::Dependency
28
+ name: redis
29
+ prerelease: false
30
+ requirement: &id002 !ruby/object:Gem::Requirement
31
+ none: false
32
+ requirements:
33
+ - - ">="
34
+ - !ruby/object:Gem::Version
35
+ version: "0"
36
+ type: :runtime
37
+ version_requirements: *id002
38
+ - !ruby/object:Gem::Dependency
39
+ name: bunny
40
+ prerelease: false
41
+ requirement: &id003 !ruby/object:Gem::Requirement
42
+ none: false
43
+ requirements:
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: "0"
47
+ type: :runtime
48
+ version_requirements: *id003
49
+ - !ruby/object:Gem::Dependency
50
+ name: sinatra
51
+ prerelease: false
52
+ requirement: &id004 !ruby/object:Gem::Requirement
53
+ none: false
54
+ requirements:
55
+ - - ">="
56
+ - !ruby/object:Gem::Version
57
+ version: "0"
58
+ type: :runtime
59
+ version_requirements: *id004
60
+ - !ruby/object:Gem::Dependency
61
+ name: vegas
62
+ prerelease: false
63
+ requirement: &id005 !ruby/object:Gem::Requirement
64
+ none: false
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: "0"
69
+ type: :runtime
70
+ version_requirements: *id005
71
+ - !ruby/object:Gem::Dependency
72
+ name: snappy
73
+ prerelease: false
74
+ requirement: &id006 !ruby/object:Gem::Requirement
75
+ none: false
76
+ requirements:
77
+ - - ">="
78
+ - !ruby/object:Gem::Version
79
+ version: "0"
80
+ type: :runtime
81
+ version_requirements: *id006
82
+ description: Hot Potato is an open source real-time processing framework written in Ruby. Originally designed to process the Twitter firehose at 3,000+ tweets per second, it has been extended to support any type of streaming data as input or output to the framework. The framework excels with applications such as, social media analysis, log processing, fraud prevention, spam detection, instant messaging, and many others that include the processing of streaming data.
83
+ email:
84
+ - dshimy@gmail.com
85
+ executables:
86
+ - hotpotato
87
+ extensions: []
88
+
89
+ extra_rdoc_files: []
90
+
91
+ files:
92
+ - .gitignore
93
+ - Gemfile
94
+ - LICENSE
95
+ - Rakefile
96
+ - bin/hotpotato
97
+ - hot_potato.gemspec
98
+ - lib/hot_potato.rb
99
+ - lib/hot_potato/admin.rb
100
+ - lib/hot_potato/admin/public/admin.css
101
+ - lib/hot_potato/admin/views/index.erb
102
+ - lib/hot_potato/app_task.rb
103
+ - lib/hot_potato/app_task_info.rb
104
+ - lib/hot_potato/app_task_server.rb
105
+ - lib/hot_potato/cache.rb
106
+ - lib/hot_potato/core.rb
107
+ - lib/hot_potato/dsl.rb
108
+ - lib/hot_potato/faucet.rb
109
+ - lib/hot_potato/generate.rb
110
+ - lib/hot_potato/generate_app_task.rb
111
+ - lib/hot_potato/queue_logger.rb
112
+ - lib/hot_potato/sink.rb
113
+ - lib/hot_potato/supervisor_info.rb
114
+ - lib/hot_potato/supervisor_server.rb
115
+ - lib/hot_potato/templates/Gemfile
116
+ - lib/hot_potato/templates/Rakefile
117
+ - lib/hot_potato/templates/admin
118
+ - lib/hot_potato/templates/app_task
119
+ - lib/hot_potato/templates/boot.rb
120
+ - lib/hot_potato/templates/config.yml
121
+ - lib/hot_potato/templates/development.rb
122
+ - lib/hot_potato/templates/generate
123
+ - lib/hot_potato/templates/production.rb
124
+ - lib/hot_potato/templates/routes.rb
125
+ - lib/hot_potato/templates/supervisor
126
+ - lib/hot_potato/templates/template_faucet.rb
127
+ - lib/hot_potato/templates/template_sink.rb
128
+ - lib/hot_potato/templates/template_worker.rb
129
+ - lib/hot_potato/templates/test.rb
130
+ - lib/hot_potato/utils.rb
131
+ - lib/hot_potato/version.rb
132
+ - lib/hot_potato/worker.rb
133
+ - readme.md
134
+ - test/helper.rb
135
+ - test/version_test.rb
136
+ has_rdoc: true
137
+ homepage: http://github.com/dshimy/hotpotato
138
+ licenses: []
139
+
140
+ post_install_message:
141
+ rdoc_options: []
142
+
143
+ require_paths:
144
+ - lib
145
+ required_ruby_version: !ruby/object:Gem::Requirement
146
+ none: false
147
+ requirements:
148
+ - - ">="
149
+ - !ruby/object:Gem::Version
150
+ version: "0"
151
+ required_rubygems_version: !ruby/object:Gem::Requirement
152
+ none: false
153
+ requirements:
154
+ - - ">="
155
+ - !ruby/object:Gem::Version
156
+ version: "0"
157
+ requirements: []
158
+
159
+ rubyforge_project: hot_potato
160
+ rubygems_version: 1.6.2
161
+ signing_key:
162
+ specification_version: 3
163
+ summary: A Real-time Processing Framework
164
+ test_files:
165
+ - test/helper.rb
166
+ - test/version_test.rb