edamame 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (59) hide show
  1. data/LICENSE.textile +20 -0
  2. data/README.textile +90 -0
  3. data/app/edamame_san/config.ru +4 -0
  4. data/app/edamame_san/config.yml +17 -0
  5. data/app/edamame_san/edamame_san.rb +71 -0
  6. data/app/edamame_san/public/favicon.ico +0 -0
  7. data/app/edamame_san/public/images/edamame_logo.icns +0 -0
  8. data/app/edamame_san/public/images/edamame_logo.ico +0 -0
  9. data/app/edamame_san/public/images/edamame_logo.png +0 -0
  10. data/app/edamame_san/public/images/edamame_logo_2.icns +0 -0
  11. data/app/edamame_san/public/javascripts/application.js +8 -0
  12. data/app/edamame_san/public/javascripts/jquery/jquery-ui.js +8694 -0
  13. data/app/edamame_san/public/javascripts/jquery/jquery.js +4376 -0
  14. data/app/edamame_san/public/stylesheets/application.css +32 -0
  15. data/app/edamame_san/public/stylesheets/layout.css +88 -0
  16. data/app/edamame_san/views/layout.haml +13 -0
  17. data/app/edamame_san/views/load.haml +37 -0
  18. data/app/edamame_san/views/root.haml +25 -0
  19. data/bin/edamame-nuke +20 -0
  20. data/bin/edamame-ps +2 -0
  21. data/bin/edamame-stats +13 -0
  22. data/bin/edamame-sync +21 -0
  23. data/bin/edamame_util_opts.rb +10 -0
  24. data/bin/test_run.rb +14 -0
  25. data/lib/edamame.rb +29 -0
  26. data/lib/edamame/broker.rb +38 -0
  27. data/lib/edamame/job.rb +114 -0
  28. data/lib/edamame/monitoring.rb +7 -0
  29. data/lib/edamame/monitoring/README-god.textile +54 -0
  30. data/lib/edamame/monitoring/beanstalkd_god.rb +28 -0
  31. data/lib/edamame/monitoring/god_email.rb +45 -0
  32. data/lib/edamame/monitoring/god_process.rb +205 -0
  33. data/lib/edamame/monitoring/process_groups.rb +32 -0
  34. data/lib/edamame/monitoring/sinatra_god.rb +34 -0
  35. data/lib/edamame/monitoring/tyrant_god.rb +59 -0
  36. data/lib/edamame/persistent_queue.rb +152 -0
  37. data/lib/edamame/queue.rb +6 -0
  38. data/lib/edamame/queue/beanstalk.rb +134 -0
  39. data/lib/edamame/scheduling.rb +79 -0
  40. data/lib/edamame/store.rb +8 -0
  41. data/lib/edamame/store/base.rb +62 -0
  42. data/lib/edamame/store/tyrant_store.rb +49 -0
  43. data/lib/methods.txt +94 -0
  44. data/spec/edamame_spec.rb +7 -0
  45. data/spec/spec_helper.rb +10 -0
  46. data/utils/god/edamame.god +36 -0
  47. data/utils/god/edamame.yaml +61 -0
  48. data/utils/god/god-etc-init-dot-d-example +40 -0
  49. data/utils/god/god.conf +22 -0
  50. data/utils/god/god_site_config.rb +4 -0
  51. data/utils/god/wuclan.god +36 -0
  52. data/utils/simulation/Add Percent Variation.vi +0 -0
  53. data/utils/simulation/Harmonic Average.vi +0 -0
  54. data/utils/simulation/Rescheduling Simulation.aliases +3 -0
  55. data/utils/simulation/Rescheduling Simulation.lvlps +3 -0
  56. data/utils/simulation/Rescheduling Simulation.lvproj +22 -0
  57. data/utils/simulation/Rescheduling.vi +0 -0
  58. data/utils/simulation/Weighted Average.vi +0 -0
  59. metadata +147 -0
@@ -0,0 +1,32 @@
1
+ /* ===========================================================================
2
+ Default styling for the upload bar
3
+ */
4
+
5
+ body { margin: 0px ; }
6
+
7
+ .bar {
8
+ width: 300px;
9
+ }
10
+ #progress {
11
+ border: 1px solid #222;
12
+ display: block;
13
+ float: left;
14
+ margin-right: 0.25em;
15
+ }
16
+ #progressbar {
17
+ width: 0px;
18
+ height: 24px;
19
+ }
20
+
21
+ #progress { background: #eee; }
22
+ #progress #progressbar { background: #bbf; }
23
+ #progress.working { background: #eef; }
24
+ #progress.success #progressbar { background: #cfd; }
25
+ #progress.error { background: #fcc; }
26
+ #progress.error #progressbar { background: #fcd; }
27
+
28
+ iframe.yuploader {
29
+ border: 0px none white;
30
+ margin: 0px;
31
+ padding: 0px;
32
+ }
@@ -0,0 +1,88 @@
1
+
2
+ /*
3
+ Page layout
4
+ */
5
+
6
+ body {
7
+ background-color: #4B7399;
8
+ font-family: Verdana, Helvetica, Arial;
9
+ font-size: 14px;
10
+ padding: 0;
11
+ margin: 0;
12
+ }
13
+
14
+ a img {
15
+ border: none;
16
+ }
17
+
18
+ a {
19
+ color: #0000FF;
20
+ }
21
+
22
+ .clear {
23
+ clear: both;
24
+ height: 0;
25
+ overflow: hidden;
26
+ }
27
+
28
+ #container {
29
+ width: 75%;
30
+ margin: 0 auto;
31
+ background-color: #FFF;
32
+ padding: 20px 40px;
33
+ border: solid 1px black;
34
+ margin-top: 20px;
35
+ }
36
+
37
+ #flash_notice, #flash_error {
38
+ padding: 5px 8px;
39
+ margin: 10px 0;
40
+ }
41
+
42
+ #flash_notice {
43
+ background-color: #CFC;
44
+ border: solid 1px #6C6;
45
+ }
46
+
47
+ #flash_error {
48
+ background-color: #FCC;
49
+ border: solid 1px #C66;
50
+ }
51
+
52
+ .fieldWithErrors {
53
+ display: inline;
54
+ }
55
+
56
+ #errorExplanation {
57
+ width: 400px;
58
+ border: 2px solid #CF0000;
59
+ padding: 0px;
60
+ padding-bottom: 12px;
61
+ margin-bottom: 20px;
62
+ background-color: #f0f0f0;
63
+ }
64
+
65
+ #errorExplanation h2 {
66
+ text-align: left;
67
+ font-weight: bold;
68
+ padding: 5px 5px 5px 15px;
69
+ font-size: 12px;
70
+ margin: 0;
71
+ background-color: #c00;
72
+ color: #fff;
73
+ }
74
+
75
+ #errorExplanation p {
76
+ color: #333;
77
+ margin-bottom: 0;
78
+ padding: 8px;
79
+ }
80
+
81
+ #errorExplanation ul {
82
+ margin: 2px 24px;
83
+ }
84
+
85
+ #errorExplanation ul li {
86
+ font-size: 12px;
87
+ list-style: disc;
88
+ }
@@ -0,0 +1,13 @@
1
+ !!! XML
2
+ !!! Strict
3
+ %html{ "xml:lang" => "en", :lang => "en", :xmlns => "http://www.w3.org/1999/xhtml" }
4
+ %head
5
+ %link{ :href => "/stylesheets/application.css", :rel => "stylesheet", :type => "text/css" }
6
+ %link{ :href => "/favicon.ico", :rel => "shortcut icon", :type => "image/x-icon" }
7
+
8
+ %body
9
+ #container
10
+ =yield
11
+
12
+ -# %script{ :type => "text/javascript", :src => "http://ajax.googleapis.com/ajax/libs/jquery/1.3.2/jquery.min.js" }
13
+ -# %script{ :type => "text/javascript", :src => "http://ajax.googleapis.com/ajax/libs/jqueryui/1.7.1/jquery-ui.min.js" }
@@ -0,0 +1,37 @@
1
+ %style{ :type => 'text/css' }
2
+ @import url('/stylesheets/layout.css');
3
+
4
+ %p
5
+
6
+ Jobs in the edamame job store:
7
+
8
+
9
+ %table
10
+ %tr
11
+ %th query_term
12
+ %th priority
13
+ %th prev_items
14
+ %th prev_rate
15
+ %th prev_span_min
16
+ %th prev_span_max
17
+ - @dest_store.each_as(Edamame::Job) do |key, obj|
18
+ %tr
19
+ %td=h key.inspect
20
+ %td=h obj.inspect
21
+ %td=h obj.key
22
+ -# %td=h obj[:query_term]
23
+ -# %td=h obj[:priority]
24
+ -# %td=h obj[:prev_items]
25
+ -# %td=h obj[:prev_rate]
26
+ -# %td=h obj[:prev_span_min]
27
+ -# %td=h obj[:prev_span_max]
28
+
29
+
30
+
31
+
32
+
33
+
34
+
35
+
36
+
37
+
@@ -0,0 +1,25 @@
1
+ %style{ :type => 'text/css' }
2
+ @import url('/stylesheets/layout.css');
3
+
4
+ %p
5
+
6
+ Jobs in the edamame job store:
7
+
8
+
9
+ %table
10
+ %tr
11
+ %th query_term
12
+ %th priority
13
+ %th prev_items
14
+ %th prev_rate
15
+ %th prev_span_min
16
+ %th prev_span_max
17
+ - @store.each_as(Wuclan::Domains::Twitter::Scrape::TwitterSearchJob) do |key, obj|
18
+ %tr
19
+ -# %td=h key.inspect
20
+ %td=h obj[:query_term]
21
+ %td=h obj[:priority]
22
+ %td=h obj[:prev_items]
23
+ %td=h obj[:prev_rate]
24
+ %td=h obj[:prev_span_min]
25
+ %td=h obj[:prev_span_max]
@@ -0,0 +1,20 @@
1
+ #!/usr/bin/env ruby
2
+ $: << File.dirname(__FILE__)+'/../../lib'
3
+ require 'rubygems'
4
+ require 'edamame'
5
+ require 'monkeyshines/monitor'
6
+ require 'monkeyshines/utils/trollop'
7
+ require File.dirname(__FILE__)+'/edamame_util_opts'
8
+
9
+ pq = Edamame::PersistentQueue.new( :tube => CONFIG[:tube],
10
+ :queue => { :type => 'BeanstalkQueue', :uris => [CONFIG[:queue]] },
11
+ :store => { :type => 'TyrantStore', :uri => CONFIG[:store] }
12
+ )
13
+
14
+ periodic_log = Monkeyshines::Monitor::PeriodicLogger.new(:iters => 1000, :time => 30)
15
+ pq.queue.empty_all do |job|
16
+ periodic_log.periodically{ [ job.tube, job.priority, job.delay, job.scheduling, job.obj['key'] ] }
17
+ end
18
+ pq.store.each do |key, val|
19
+ pq.store.db.out key
20
+ end
@@ -0,0 +1,2 @@
1
+ #!/usr/bin/env bash
2
+ ps aux | egrep '(beanstalk|ttserver|god|ruby|scrape|shotgun|thin)' | egrep -v ' grep |mdns://|^_lp' | cut -c 1-200 | sort -k11
@@ -0,0 +1,13 @@
1
+ #!/usr/bin/env ruby
2
+ $: << File.dirname(__FILE__)+'/../../lib'
3
+ require 'rubygems'
4
+ require 'edamame'
5
+ require 'monkeyshines/monitor'
6
+
7
+ pq = Edamame::PersistentQueue.new(
8
+ :tube => ARGV[0],
9
+ :queue => { :type => 'BeanstalkQueue', :uris => ['localhost:11210'] },
10
+ :store => { :type => 'TyrantStore', :uri => ':11212' }
11
+ )
12
+
13
+ p pq.stats
@@ -0,0 +1,21 @@
1
+ #!/usr/bin/env ruby
2
+ $: << File.dirname(__FILE__)+'/../../lib'
3
+ require 'rubygems'
4
+ require 'edamame'
5
+ require 'monkeyshines/monitor'
6
+ require 'monkeyshines/utils/trollop'
7
+ require 'pathname'
8
+ require File.dirname(__FILE__)+'/edamame_util_opts'
9
+ Log = Monkeyshines.logger
10
+
11
+ pq = Edamame::PersistentQueue.new( :tube => CONFIG[:tube],
12
+ :queue => { :type => 'BeanstalkQueue', :uris => [CONFIG[:queue]] },
13
+ :store => { :type => 'TyrantStore', :uri => CONFIG[:store] }
14
+ )
15
+
16
+ periodic_log = Monkeyshines::Monitor::PeriodicLogger.new(:iters => 1, :time => 30)
17
+ pq.load do |job|
18
+ obj = job.obj rescue nil
19
+
20
+ periodic_log.periodically{ [ pq.store.size, job.loggable, job.class, obj.inspect ] }
21
+ end
@@ -0,0 +1,10 @@
1
+
2
+ CONFIG = Trollop::options do
3
+ opt :queue, 'host:port for the beanstalkd queue', :type => String, :required => true
4
+ opt :store, 'host:port for the backing store', :type => String, :required => true
5
+ opt :handle, 'label for this scrape', :type => String, :required => true
6
+ end
7
+ CONFIG[:store].gsub!(/^localhost:/, ':') # queue must *not* have localhost:
8
+ CONFIG[:queue].gsub!(/^:/, 'localhost:') # queue must have localhost:
9
+ CONFIG[:tube] = (CONFIG[:handle] || 'default').gsub(/[^A-Z0-9a-z\-]+/,'')
10
+
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+ $: << File.dirname(__FILE__)+'/../../lib'
3
+ require 'rubygems'
4
+ require 'json'
5
+ require 'edamame'
6
+
7
+ broker = Edamame::Broker.new(
8
+ :queue => { :type => 'BeanstalkQueue', :uris => ['localhost:11210'] },
9
+ :store => { :type => 'TyrantStore', :uri => ':11212' }
10
+ )
11
+
12
+ broker.work do |job|
13
+ Log.info [job, job.scheduling, job.stats, job.obj].inspect
14
+ end
@@ -0,0 +1,29 @@
1
+ require 'beanstalk-client'
2
+ require 'wukong/extensions'
3
+ require 'monkeyshines/utils/factory_module'
4
+ require 'monkeyshines/utils/logger'
5
+ require 'edamame/scheduling'
6
+ require 'edamame/job'
7
+ require 'edamame/queue'
8
+ require 'edamame/store'
9
+
10
+ # Edamame combines the Beanstalk priority queue with a Tokyo Tyrant database to
11
+ # produce a persistent distributed priority job queue system.
12
+ #
13
+ # * fast, scalable, lightweight and distributed
14
+ # * persistent and recoverable
15
+ # * scalable up to your memory limits
16
+ # * queryable and enumerable jobs
17
+ # * named jobs
18
+ # * reasonably-good availability.
19
+ #
20
+ # Like beanstalk, it is a job queue, not just a message queue:
21
+ # * priority job scheduling, not just FIFO
22
+ # * Supports multiple queues ('tubes')
23
+ # * reliable scheduling: jobs that time out are re-assigned
24
+ #
25
+ # You should start by looking at [Edamame::PersistentQueue]
26
+ module Edamame
27
+ autoload :PersistentQueue, 'edamame/persistent_queue'
28
+ autoload :Broker, 'edamame/broker'
29
+ end
@@ -0,0 +1,38 @@
1
+ module Edamame
2
+ #
3
+ # Repeatedly poll the queue for jobs and dispatch them to a worker loop
4
+ #
5
+ # Those jobs can be rescheduled (with updated parameters) for later
6
+ # re-processing.
7
+ #
8
+ class Broker < PersistentQueue
9
+ # Enter the work loop
10
+ def work timeout=nil, klass=nil, &block
11
+ loop do
12
+ job = reserve(timeout, klass) or break
13
+ result = block.call(job)
14
+ reschedule job
15
+ end
16
+ end
17
+
18
+ # Inserts the job back into the queue at its sepcified delay --
19
+ # or, if delay is nil, remove the job from the queue
20
+ #
21
+ # You'll probably want to use Edamame::Scheduling with all this
22
+ def reschedule job
23
+ delay = job.scheduling.delay
24
+ if delay
25
+ release job
26
+ else
27
+ log_action 'deleting', job
28
+ delete job
29
+ end
30
+ end
31
+
32
+ # Log info about an action on a job
33
+ def log_action action, job, *stuff
34
+ Log.info [action, job.loggable, *stuff].flatten.join("\t")
35
+ end
36
+
37
+ end
38
+ end
@@ -0,0 +1,114 @@
1
+ module Edamame
2
+ #
3
+ #
4
+ # id, name, body, timeouts, time-left, age, state, delay, pri, ttr
5
+ #
6
+ #
7
+ # * A job, pulled from the queue: it is connected to its beanstalk presence
8
+ # body contains
9
+ # ** obj
10
+ # ** scheduling
11
+ # ** stats
12
+ #
13
+ # * A DB job
14
+ # body contains
15
+ # ** tube, priority, ttr, state
16
+ # ** obj
17
+ # ** scheduling
18
+ # ** stats
19
+ class Job < Struct.new(
20
+ :tube, :priority, :ttr, :state,
21
+ :scheduling, :obj
22
+ )
23
+ # connection back to the job queue's instance of this job
24
+ attr_accessor :qjob
25
+
26
+ DEFAULT_OPTIONS = {
27
+ 'priority' => 65536,
28
+ 'ttr' => 120,
29
+ 'state' => 1,
30
+ 'scheduling' => Edamame::Scheduling::Once.new()
31
+ }
32
+
33
+ # attr_accessor :runs, :failures, :prev_run_at
34
+ def initialize *args
35
+ super *args
36
+ DEFAULT_OPTIONS.each{|key,val| self[key] ||= val }
37
+ [:priority, :ttr, :state].each{|key| self[key] = self[key].to_i }
38
+ case self.scheduling
39
+ when String
40
+ scheduling_hash = YAML.load(self.scheduling) rescue nil
41
+ self.scheduling = Scheduling.from_hash(scheduling_hash) if scheduling_hash
42
+ when Hash
43
+ self.scheduling = Scheduling.from_hash(scheduling)
44
+ else
45
+ # else it should behave like a scheduling
46
+ end
47
+ if self.obj.is_a?(String) then self.obj = YAML.load(self.obj) rescue nil ; end
48
+ end
49
+
50
+ def key
51
+ key = (obj.respond_to?(:key) ? obj.key : (obj[:key]||obj['key']))
52
+ [ tube, key ].join('-')
53
+ end
54
+
55
+ #
56
+ def since_last
57
+ scheduling.last_run - Time.now
58
+ end
59
+
60
+ #
61
+ # Delegation to scheduling strategy.
62
+ #
63
+ def prev_max() self.scheduling.prev_max end
64
+ def prev_max=(val) self.scheduling.prev_max = val end
65
+ def prev_items() self.scheduling.prev_items end
66
+ def prev_items=(val) self.scheduling.prev_items = val end
67
+ def prev_items_rate() self.scheduling.prev_items_rate end
68
+ def prev_items_rate=(val) self.scheduling.prev_items_rate = val end
69
+ def delay() self.scheduling.delay end
70
+ def delay=(val) self.scheduling.delay = val end
71
+ def last_run() self.scheduling.last_run end
72
+
73
+ # Override this for rescheduling
74
+ def update!
75
+ scheduling.total_runs = scheduling.total_runs.to_i + qjob.stats['releases']
76
+ scheduling.last_run = Time.now
77
+ end
78
+
79
+ # Fields suitable for emission as a log line.
80
+ def loggable
81
+ "%-15s\t%7d\t%7.2f\t%-23s" % [tube, priority, delay, key]
82
+ end
83
+
84
+ def to_hash flatten=true
85
+ hsh = super()
86
+ hsh["scheduling"] = scheduling.to_hash
87
+ hsh["obj"] = obj.to_hash
88
+ if flatten
89
+ hsh["scheduling"] = hsh['scheduling'].to_yaml
90
+ hsh["obj"] = hsh['obj'].to_yaml
91
+ end
92
+ hsh
93
+ end
94
+ end
95
+ end
96
+
97
+ Beanstalk::Job.class_eval do
98
+ def key
99
+ body
100
+ end
101
+
102
+ def priority
103
+ pri
104
+ end
105
+
106
+ def tube
107
+ stats['tube']
108
+ end
109
+
110
+ # Fields suitable for emission as a log line.
111
+ def loggable
112
+ "%-15s\t%7d\t%7.2f\t%-23s" % [tube, priority, delay, key]
113
+ end
114
+ end