edamame 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. data/LICENSE.textile +20 -0
  2. data/README.textile +90 -0
  3. data/app/edamame_san/config.ru +4 -0
  4. data/app/edamame_san/config.yml +17 -0
  5. data/app/edamame_san/edamame_san.rb +71 -0
  6. data/app/edamame_san/public/favicon.ico +0 -0
  7. data/app/edamame_san/public/images/edamame_logo.icns +0 -0
  8. data/app/edamame_san/public/images/edamame_logo.ico +0 -0
  9. data/app/edamame_san/public/images/edamame_logo.png +0 -0
  10. data/app/edamame_san/public/images/edamame_logo_2.icns +0 -0
  11. data/app/edamame_san/public/javascripts/application.js +8 -0
  12. data/app/edamame_san/public/javascripts/jquery/jquery-ui.js +8694 -0
  13. data/app/edamame_san/public/javascripts/jquery/jquery.js +4376 -0
  14. data/app/edamame_san/public/stylesheets/application.css +32 -0
  15. data/app/edamame_san/public/stylesheets/layout.css +88 -0
  16. data/app/edamame_san/views/layout.haml +13 -0
  17. data/app/edamame_san/views/load.haml +37 -0
  18. data/app/edamame_san/views/root.haml +25 -0
  19. data/bin/edamame-nuke +20 -0
  20. data/bin/edamame-ps +2 -0
  21. data/bin/edamame-stats +13 -0
  22. data/bin/edamame-sync +21 -0
  23. data/bin/edamame_util_opts.rb +10 -0
  24. data/bin/test_run.rb +14 -0
  25. data/lib/edamame.rb +29 -0
  26. data/lib/edamame/broker.rb +38 -0
  27. data/lib/edamame/job.rb +114 -0
  28. data/lib/edamame/monitoring.rb +7 -0
  29. data/lib/edamame/monitoring/README-god.textile +54 -0
  30. data/lib/edamame/monitoring/beanstalkd_god.rb +28 -0
  31. data/lib/edamame/monitoring/god_email.rb +45 -0
  32. data/lib/edamame/monitoring/god_process.rb +205 -0
  33. data/lib/edamame/monitoring/process_groups.rb +32 -0
  34. data/lib/edamame/monitoring/sinatra_god.rb +34 -0
  35. data/lib/edamame/monitoring/tyrant_god.rb +59 -0
  36. data/lib/edamame/persistent_queue.rb +152 -0
  37. data/lib/edamame/queue.rb +6 -0
  38. data/lib/edamame/queue/beanstalk.rb +134 -0
  39. data/lib/edamame/scheduling.rb +79 -0
  40. data/lib/edamame/store.rb +8 -0
  41. data/lib/edamame/store/base.rb +62 -0
  42. data/lib/edamame/store/tyrant_store.rb +49 -0
  43. data/lib/methods.txt +94 -0
  44. data/spec/edamame_spec.rb +7 -0
  45. data/spec/spec_helper.rb +10 -0
  46. data/utils/god/edamame.god +36 -0
  47. data/utils/god/edamame.yaml +61 -0
  48. data/utils/god/god-etc-init-dot-d-example +40 -0
  49. data/utils/god/god.conf +22 -0
  50. data/utils/god/god_site_config.rb +4 -0
  51. data/utils/god/wuclan.god +36 -0
  52. data/utils/simulation/Add Percent Variation.vi +0 -0
  53. data/utils/simulation/Harmonic Average.vi +0 -0
  54. data/utils/simulation/Rescheduling Simulation.aliases +3 -0
  55. data/utils/simulation/Rescheduling Simulation.lvlps +3 -0
  56. data/utils/simulation/Rescheduling Simulation.lvproj +22 -0
  57. data/utils/simulation/Rescheduling.vi +0 -0
  58. data/utils/simulation/Weighted Average.vi +0 -0
  59. metadata +147 -0
@@ -0,0 +1,32 @@
1
+ /* ===========================================================================
2
+ Default styling for the upload bar
3
+ */
4
+
5
+ body { margin: 0px ; }
6
+
7
+ .bar {
8
+ width: 300px;
9
+ }
10
+ #progress {
11
+ border: 1px solid #222;
12
+ display: block;
13
+ float: left;
14
+ margin-right: 0.25em;
15
+ }
16
+ #progressbar {
17
+ width: 0px;
18
+ height: 24px;
19
+ }
20
+
21
+ #progress { background: #eee; }
22
+ #progress #progressbar { background: #bbf; }
23
+ #progress.working { background: #eef; }
24
+ #progress.success #progressbar { background: #cfd; }
25
+ #progress.error { background: #fcc; }
26
+ #progress.error #progressbar { background: #fcd; }
27
+
28
+ iframe.yuploader {
29
+ border: 0px none white;
30
+ margin: 0px;
31
+ padding: 0px;
32
+ }
@@ -0,0 +1,88 @@
1
+
2
+ /*
3
+ Page layout
4
+ */
5
+
6
+ body {
7
+ background-color: #4B7399;
8
+ font-family: Verdana, Helvetica, Arial;
9
+ font-size: 14px;
10
+ padding: 0;
11
+ margin: 0;
12
+ }
13
+
14
+ a img {
15
+ border: none;
16
+ }
17
+
18
+ a {
19
+ color: #0000FF;
20
+ }
21
+
22
+ .clear {
23
+ clear: both;
24
+ height: 0;
25
+ overflow: hidden;
26
+ }
27
+
28
+ #container {
29
+ width: 75%;
30
+ margin: 0 auto;
31
+ background-color: #FFF;
32
+ padding: 20px 40px;
33
+ border: solid 1px black;
34
+ margin-top: 20px;
35
+ }
36
+
37
+ #flash_notice, #flash_error {
38
+ padding: 5px 8px;
39
+ margin: 10px 0;
40
+ }
41
+
42
+ #flash_notice {
43
+ background-color: #CFC;
44
+ border: solid 1px #6C6;
45
+ }
46
+
47
+ #flash_error {
48
+ background-color: #FCC;
49
+ border: solid 1px #C66;
50
+ }
51
+
52
+ .fieldWithErrors {
53
+ display: inline;
54
+ }
55
+
56
+ #errorExplanation {
57
+ width: 400px;
58
+ border: 2px solid #CF0000;
59
+ padding: 0px;
60
+ padding-bottom: 12px;
61
+ margin-bottom: 20px;
62
+ background-color: #f0f0f0;
63
+ }
64
+
65
+ #errorExplanation h2 {
66
+ text-align: left;
67
+ font-weight: bold;
68
+ padding: 5px 5px 5px 15px;
69
+ font-size: 12px;
70
+ margin: 0;
71
+ background-color: #c00;
72
+ color: #fff;
73
+ }
74
+
75
+ #errorExplanation p {
76
+ color: #333;
77
+ margin-bottom: 0;
78
+ padding: 8px;
79
+ }
80
+
81
+ #errorExplanation ul {
82
+ margin: 2px 24px;
83
+ }
84
+
85
+ #errorExplanation ul li {
86
+ font-size: 12px;
87
+ list-style: disc;
88
+ }
@@ -0,0 +1,13 @@
1
+ !!! XML
2
+ !!! Strict
3
+ %html{ "xml:lang" => "en", :lang => "en", :xmlns => "http://www.w3.org/1999/xhtml" }
4
+ %head
5
+ %link{ :href => "/stylesheets/application.css", :rel => "stylesheet", :type => "text/css" }
6
+ %link{ :href => "/favicon.ico", :rel => "shortcut icon", :type => "image/x-icon" }
7
+
8
+ %body
9
+ #container
10
+ =yield
11
+
12
+ -# %script{ :type => "text/javascript", :src => "http://ajax.googleapis.com/ajax/libs/jquery/1.3.2/jquery.min.js" }
13
+ -# %script{ :type => "text/javascript", :src => "http://ajax.googleapis.com/ajax/libs/jqueryui/1.7.1/jquery-ui.min.js" }
@@ -0,0 +1,37 @@
1
+ %style{ :type => 'text/css' }
2
+ @import url('/stylesheets/layout.css');
3
+
4
+ %p
5
+
6
+ Jobs in the edamame job store:
7
+
8
+
9
+ %table
10
+ %tr
11
+ %th query_term
12
+ %th priority
13
+ %th prev_items
14
+ %th prev_rate
15
+ %th prev_span_min
16
+ %th prev_span_max
17
+ - @dest_store.each_as(Edamame::Job) do |key, obj|
18
+ %tr
19
+ %td=h key.inspect
20
+ %td=h obj.inspect
21
+ %td=h obj.key
22
+ -# %td=h obj[:query_term]
23
+ -# %td=h obj[:priority]
24
+ -# %td=h obj[:prev_items]
25
+ -# %td=h obj[:prev_rate]
26
+ -# %td=h obj[:prev_span_min]
27
+ -# %td=h obj[:prev_span_max]
28
+
29
+
30
+
31
+
32
+
33
+
34
+
35
+
36
+
37
+
@@ -0,0 +1,25 @@
1
+ %style{ :type => 'text/css' }
2
+ @import url('/stylesheets/layout.css');
3
+
4
+ %p
5
+
6
+ Jobs in the edamame job store:
7
+
8
+
9
+ %table
10
+ %tr
11
+ %th query_term
12
+ %th priority
13
+ %th prev_items
14
+ %th prev_rate
15
+ %th prev_span_min
16
+ %th prev_span_max
17
+ - @store.each_as(Wuclan::Domains::Twitter::Scrape::TwitterSearchJob) do |key, obj|
18
+ %tr
19
+ -# %td=h key.inspect
20
+ %td=h obj[:query_term]
21
+ %td=h obj[:priority]
22
+ %td=h obj[:prev_items]
23
+ %td=h obj[:prev_rate]
24
+ %td=h obj[:prev_span_min]
25
+ %td=h obj[:prev_span_max]
@@ -0,0 +1,20 @@
1
+ #!/usr/bin/env ruby
2
+ $: << File.dirname(__FILE__)+'/../../lib'
3
+ require 'rubygems'
4
+ require 'edamame'
5
+ require 'monkeyshines/monitor'
6
+ require 'monkeyshines/utils/trollop'
7
+ require File.dirname(__FILE__)+'/edamame_util_opts'
8
+
9
+ pq = Edamame::PersistentQueue.new( :tube => CONFIG[:tube],
10
+ :queue => { :type => 'BeanstalkQueue', :uris => [CONFIG[:queue]] },
11
+ :store => { :type => 'TyrantStore', :uri => CONFIG[:store] }
12
+ )
13
+
14
+ periodic_log = Monkeyshines::Monitor::PeriodicLogger.new(:iters => 1000, :time => 30)
15
+ pq.queue.empty_all do |job|
16
+ periodic_log.periodically{ [ job.tube, job.priority, job.delay, job.scheduling, job.obj['key'] ] }
17
+ end
18
+ pq.store.each do |key, val|
19
+ pq.store.db.out key
20
+ end
@@ -0,0 +1,2 @@
1
+ #!/usr/bin/env bash
2
+ ps aux | egrep '(beanstalk|ttserver|god|ruby|scrape|shotgun|thin)' | egrep -v ' grep |mdns://|^_lp' | cut -c 1-200 | sort -k11
@@ -0,0 +1,13 @@
1
+ #!/usr/bin/env ruby
2
+ $: << File.dirname(__FILE__)+'/../../lib'
3
+ require 'rubygems'
4
+ require 'edamame'
5
+ require 'monkeyshines/monitor'
6
+
7
+ pq = Edamame::PersistentQueue.new(
8
+ :tube => ARGV[0],
9
+ :queue => { :type => 'BeanstalkQueue', :uris => ['localhost:11210'] },
10
+ :store => { :type => 'TyrantStore', :uri => ':11212' }
11
+ )
12
+
13
+ p pq.stats
@@ -0,0 +1,21 @@
1
+ #!/usr/bin/env ruby
2
+ $: << File.dirname(__FILE__)+'/../../lib'
3
+ require 'rubygems'
4
+ require 'edamame'
5
+ require 'monkeyshines/monitor'
6
+ require 'monkeyshines/utils/trollop'
7
+ require 'pathname'
8
+ require File.dirname(__FILE__)+'/edamame_util_opts'
9
+ Log = Monkeyshines.logger
10
+
11
+ pq = Edamame::PersistentQueue.new( :tube => CONFIG[:tube],
12
+ :queue => { :type => 'BeanstalkQueue', :uris => [CONFIG[:queue]] },
13
+ :store => { :type => 'TyrantStore', :uri => CONFIG[:store] }
14
+ )
15
+
16
+ periodic_log = Monkeyshines::Monitor::PeriodicLogger.new(:iters => 1, :time => 30)
17
+ pq.load do |job|
18
+ obj = job.obj rescue nil
19
+
20
+ periodic_log.periodically{ [ pq.store.size, job.loggable, job.class, obj.inspect ] }
21
+ end
@@ -0,0 +1,10 @@
1
+
2
+ CONFIG = Trollop::options do
3
+ opt :queue, 'host:port for the beanstalkd queue', :type => String, :required => true
4
+ opt :store, 'host:port for the backing store', :type => String, :required => true
5
+ opt :handle, 'label for this scrape', :type => String, :required => true
6
+ end
7
+ CONFIG[:store].gsub!(/^localhost:/, ':') # queue must *not* have localhost:
8
+ CONFIG[:queue].gsub!(/^:/, 'localhost:') # queue must have localhost:
9
+ CONFIG[:tube] = (CONFIG[:handle] || 'default').gsub(/[^A-Z0-9a-z\-]+/,'')
10
+
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+ $: << File.dirname(__FILE__)+'/../../lib'
3
+ require 'rubygems'
4
+ require 'json'
5
+ require 'edamame'
6
+
7
+ broker = Edamame::Broker.new(
8
+ :queue => { :type => 'BeanstalkQueue', :uris => ['localhost:11210'] },
9
+ :store => { :type => 'TyrantStore', :uri => ':11212' }
10
+ )
11
+
12
+ broker.work do |job|
13
+ Log.info [job, job.scheduling, job.stats, job.obj].inspect
14
+ end
@@ -0,0 +1,29 @@
1
+ require 'beanstalk-client'
2
+ require 'wukong/extensions'
3
+ require 'monkeyshines/utils/factory_module'
4
+ require 'monkeyshines/utils/logger'
5
+ require 'edamame/scheduling'
6
+ require 'edamame/job'
7
+ require 'edamame/queue'
8
+ require 'edamame/store'
9
+
10
+ # Edamame combines the Beanstalk priority queue with a Tokyo Tyrant database to
11
+ # produce a persistent distributed priority job queue system.
12
+ #
13
+ # * fast, scalable, lightweight and distributed
14
+ # * persistent and recoverable
15
+ # * scalable up to your memory limits
16
+ # * queryable and enumerable jobs
17
+ # * named jobs
18
+ # * reasonably-good availability.
19
+ #
20
+ # Like beanstalk, it is a job queue, not just a message queue:
21
+ # * priority job scheduling, not just FIFO
22
+ # * Supports multiple queues ('tubes')
23
+ # * reliable scheduling: jobs that time out are re-assigned
24
+ #
25
+ # You should start by looking at [Edamame::PersistentQueue]
26
+ module Edamame
27
+ autoload :PersistentQueue, 'edamame/persistent_queue'
28
+ autoload :Broker, 'edamame/broker'
29
+ end
@@ -0,0 +1,38 @@
1
+ module Edamame
2
+ #
3
+ # Repeatedly poll the queue for jobs and dispatch them to a worker loop
4
+ #
5
+ # Those jobs can be rescheduled (with updated parameters) for later
6
+ # re-processing.
7
+ #
8
+ class Broker < PersistentQueue
9
+ # Enter the work loop
10
+ def work timeout=nil, klass=nil, &block
11
+ loop do
12
+ job = reserve(timeout, klass) or break
13
+ result = block.call(job)
14
+ reschedule job
15
+ end
16
+ end
17
+
18
+ # Inserts the job back into the queue at its sepcified delay --
19
+ # or, if delay is nil, remove the job from the queue
20
+ #
21
+ # You'll probably want to use Edamame::Scheduling with all this
22
+ def reschedule job
23
+ delay = job.scheduling.delay
24
+ if delay
25
+ release job
26
+ else
27
+ log_action 'deleting', job
28
+ delete job
29
+ end
30
+ end
31
+
32
+ # Log info about an action on a job
33
+ def log_action action, job, *stuff
34
+ Log.info [action, job.loggable, *stuff].flatten.join("\t")
35
+ end
36
+
37
+ end
38
+ end
@@ -0,0 +1,114 @@
1
+ module Edamame
2
+ #
3
+ #
4
+ # id, name, body, timeouts, time-left, age, state, delay, pri, ttr
5
+ #
6
+ #
7
+ # * A job, pulled from the queue: it is connected to its beanstalk presence
8
+ # body contains
9
+ # ** obj
10
+ # ** scheduling
11
+ # ** stats
12
+ #
13
+ # * A DB job
14
+ # body contains
15
+ # ** tube, priority, ttr, state
16
+ # ** obj
17
+ # ** scheduling
18
+ # ** stats
19
+ class Job < Struct.new(
20
+ :tube, :priority, :ttr, :state,
21
+ :scheduling, :obj
22
+ )
23
+ # connection back to the job queue's instance of this job
24
+ attr_accessor :qjob
25
+
26
+ DEFAULT_OPTIONS = {
27
+ 'priority' => 65536,
28
+ 'ttr' => 120,
29
+ 'state' => 1,
30
+ 'scheduling' => Edamame::Scheduling::Once.new()
31
+ }
32
+
33
+ # attr_accessor :runs, :failures, :prev_run_at
34
+ def initialize *args
35
+ super *args
36
+ DEFAULT_OPTIONS.each{|key,val| self[key] ||= val }
37
+ [:priority, :ttr, :state].each{|key| self[key] = self[key].to_i }
38
+ case self.scheduling
39
+ when String
40
+ scheduling_hash = YAML.load(self.scheduling) rescue nil
41
+ self.scheduling = Scheduling.from_hash(scheduling_hash) if scheduling_hash
42
+ when Hash
43
+ self.scheduling = Scheduling.from_hash(scheduling)
44
+ else
45
+ # else it should behave like a scheduling
46
+ end
47
+ if self.obj.is_a?(String) then self.obj = YAML.load(self.obj) rescue nil ; end
48
+ end
49
+
50
+ def key
51
+ key = (obj.respond_to?(:key) ? obj.key : (obj[:key]||obj['key']))
52
+ [ tube, key ].join('-')
53
+ end
54
+
55
+ #
56
+ def since_last
57
+ scheduling.last_run - Time.now
58
+ end
59
+
60
+ #
61
+ # Delegation to scheduling strategy.
62
+ #
63
+ def prev_max() self.scheduling.prev_max end
64
+ def prev_max=(val) self.scheduling.prev_max = val end
65
+ def prev_items() self.scheduling.prev_items end
66
+ def prev_items=(val) self.scheduling.prev_items = val end
67
+ def prev_items_rate() self.scheduling.prev_items_rate end
68
+ def prev_items_rate=(val) self.scheduling.prev_items_rate = val end
69
+ def delay() self.scheduling.delay end
70
+ def delay=(val) self.scheduling.delay = val end
71
+ def last_run() self.scheduling.last_run end
72
+
73
+ # Override this for rescheduling
74
+ def update!
75
+ scheduling.total_runs = scheduling.total_runs.to_i + qjob.stats['releases']
76
+ scheduling.last_run = Time.now
77
+ end
78
+
79
+ # Fields suitable for emission as a log line.
80
+ def loggable
81
+ "%-15s\t%7d\t%7.2f\t%-23s" % [tube, priority, delay, key]
82
+ end
83
+
84
+ def to_hash flatten=true
85
+ hsh = super()
86
+ hsh["scheduling"] = scheduling.to_hash
87
+ hsh["obj"] = obj.to_hash
88
+ if flatten
89
+ hsh["scheduling"] = hsh['scheduling'].to_yaml
90
+ hsh["obj"] = hsh['obj'].to_yaml
91
+ end
92
+ hsh
93
+ end
94
+ end
95
+ end
96
+
97
+ Beanstalk::Job.class_eval do
98
+ def key
99
+ body
100
+ end
101
+
102
+ def priority
103
+ pri
104
+ end
105
+
106
+ def tube
107
+ stats['tube']
108
+ end
109
+
110
+ # Fields suitable for emission as a log line.
111
+ def loggable
112
+ "%-15s\t%7d\t%7.2f\t%-23s" % [tube, priority, delay, key]
113
+ end
114
+ end