edamame 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE.textile +20 -0
- data/README.textile +90 -0
- data/app/edamame_san/config.ru +4 -0
- data/app/edamame_san/config.yml +17 -0
- data/app/edamame_san/edamame_san.rb +71 -0
- data/app/edamame_san/public/favicon.ico +0 -0
- data/app/edamame_san/public/images/edamame_logo.icns +0 -0
- data/app/edamame_san/public/images/edamame_logo.ico +0 -0
- data/app/edamame_san/public/images/edamame_logo.png +0 -0
- data/app/edamame_san/public/images/edamame_logo_2.icns +0 -0
- data/app/edamame_san/public/javascripts/application.js +8 -0
- data/app/edamame_san/public/javascripts/jquery/jquery-ui.js +8694 -0
- data/app/edamame_san/public/javascripts/jquery/jquery.js +4376 -0
- data/app/edamame_san/public/stylesheets/application.css +32 -0
- data/app/edamame_san/public/stylesheets/layout.css +88 -0
- data/app/edamame_san/views/layout.haml +13 -0
- data/app/edamame_san/views/load.haml +37 -0
- data/app/edamame_san/views/root.haml +25 -0
- data/bin/edamame-nuke +20 -0
- data/bin/edamame-ps +2 -0
- data/bin/edamame-stats +13 -0
- data/bin/edamame-sync +21 -0
- data/bin/edamame_util_opts.rb +10 -0
- data/bin/test_run.rb +14 -0
- data/lib/edamame.rb +29 -0
- data/lib/edamame/broker.rb +38 -0
- data/lib/edamame/job.rb +114 -0
- data/lib/edamame/monitoring.rb +7 -0
- data/lib/edamame/monitoring/README-god.textile +54 -0
- data/lib/edamame/monitoring/beanstalkd_god.rb +28 -0
- data/lib/edamame/monitoring/god_email.rb +45 -0
- data/lib/edamame/monitoring/god_process.rb +205 -0
- data/lib/edamame/monitoring/process_groups.rb +32 -0
- data/lib/edamame/monitoring/sinatra_god.rb +34 -0
- data/lib/edamame/monitoring/tyrant_god.rb +59 -0
- data/lib/edamame/persistent_queue.rb +152 -0
- data/lib/edamame/queue.rb +6 -0
- data/lib/edamame/queue/beanstalk.rb +134 -0
- data/lib/edamame/scheduling.rb +79 -0
- data/lib/edamame/store.rb +8 -0
- data/lib/edamame/store/base.rb +62 -0
- data/lib/edamame/store/tyrant_store.rb +49 -0
- data/lib/methods.txt +94 -0
- data/spec/edamame_spec.rb +7 -0
- data/spec/spec_helper.rb +10 -0
- data/utils/god/edamame.god +36 -0
- data/utils/god/edamame.yaml +61 -0
- data/utils/god/god-etc-init-dot-d-example +40 -0
- data/utils/god/god.conf +22 -0
- data/utils/god/god_site_config.rb +4 -0
- data/utils/god/wuclan.god +36 -0
- data/utils/simulation/Add Percent Variation.vi +0 -0
- data/utils/simulation/Harmonic Average.vi +0 -0
- data/utils/simulation/Rescheduling Simulation.aliases +3 -0
- data/utils/simulation/Rescheduling Simulation.lvlps +3 -0
- data/utils/simulation/Rescheduling Simulation.lvproj +22 -0
- data/utils/simulation/Rescheduling.vi +0 -0
- data/utils/simulation/Weighted Average.vi +0 -0
- metadata +147 -0
@@ -0,0 +1,32 @@
|
|
1
|
+
/* ===========================================================================
|
2
|
+
Default styling for the upload bar
|
3
|
+
*/
|
4
|
+
|
5
|
+
body { margin: 0px ; }
|
6
|
+
|
7
|
+
.bar {
|
8
|
+
width: 300px;
|
9
|
+
}
|
10
|
+
#progress {
|
11
|
+
border: 1px solid #222;
|
12
|
+
display: block;
|
13
|
+
float: left;
|
14
|
+
margin-right: 0.25em;
|
15
|
+
}
|
16
|
+
#progressbar {
|
17
|
+
width: 0px;
|
18
|
+
height: 24px;
|
19
|
+
}
|
20
|
+
|
21
|
+
#progress { background: #eee; }
|
22
|
+
#progress #progressbar { background: #bbf; }
|
23
|
+
#progress.working { background: #eef; }
|
24
|
+
#progress.success #progressbar { background: #cfd; }
|
25
|
+
#progress.error { background: #fcc; }
|
26
|
+
#progress.error #progressbar { background: #fcd; }
|
27
|
+
|
28
|
+
iframe.yuploader {
|
29
|
+
border: 0px none white;
|
30
|
+
margin: 0px;
|
31
|
+
padding: 0px;
|
32
|
+
}
|
@@ -0,0 +1,88 @@
|
|
1
|
+
|
2
|
+
/*
|
3
|
+
Page layout
|
4
|
+
*/
|
5
|
+
|
6
|
+
body {
|
7
|
+
background-color: #4B7399;
|
8
|
+
font-family: Verdana, Helvetica, Arial;
|
9
|
+
font-size: 14px;
|
10
|
+
padding: 0;
|
11
|
+
margin: 0;
|
12
|
+
}
|
13
|
+
|
14
|
+
a img {
|
15
|
+
border: none;
|
16
|
+
}
|
17
|
+
|
18
|
+
a {
|
19
|
+
color: #0000FF;
|
20
|
+
}
|
21
|
+
|
22
|
+
.clear {
|
23
|
+
clear: both;
|
24
|
+
height: 0;
|
25
|
+
overflow: hidden;
|
26
|
+
}
|
27
|
+
|
28
|
+
#container {
|
29
|
+
width: 75%;
|
30
|
+
margin: 0 auto;
|
31
|
+
background-color: #FFF;
|
32
|
+
padding: 20px 40px;
|
33
|
+
border: solid 1px black;
|
34
|
+
margin-top: 20px;
|
35
|
+
}
|
36
|
+
|
37
|
+
#flash_notice, #flash_error {
|
38
|
+
padding: 5px 8px;
|
39
|
+
margin: 10px 0;
|
40
|
+
}
|
41
|
+
|
42
|
+
#flash_notice {
|
43
|
+
background-color: #CFC;
|
44
|
+
border: solid 1px #6C6;
|
45
|
+
}
|
46
|
+
|
47
|
+
#flash_error {
|
48
|
+
background-color: #FCC;
|
49
|
+
border: solid 1px #C66;
|
50
|
+
}
|
51
|
+
|
52
|
+
.fieldWithErrors {
|
53
|
+
display: inline;
|
54
|
+
}
|
55
|
+
|
56
|
+
#errorExplanation {
|
57
|
+
width: 400px;
|
58
|
+
border: 2px solid #CF0000;
|
59
|
+
padding: 0px;
|
60
|
+
padding-bottom: 12px;
|
61
|
+
margin-bottom: 20px;
|
62
|
+
background-color: #f0f0f0;
|
63
|
+
}
|
64
|
+
|
65
|
+
#errorExplanation h2 {
|
66
|
+
text-align: left;
|
67
|
+
font-weight: bold;
|
68
|
+
padding: 5px 5px 5px 15px;
|
69
|
+
font-size: 12px;
|
70
|
+
margin: 0;
|
71
|
+
background-color: #c00;
|
72
|
+
color: #fff;
|
73
|
+
}
|
74
|
+
|
75
|
+
#errorExplanation p {
|
76
|
+
color: #333;
|
77
|
+
margin-bottom: 0;
|
78
|
+
padding: 8px;
|
79
|
+
}
|
80
|
+
|
81
|
+
#errorExplanation ul {
|
82
|
+
margin: 2px 24px;
|
83
|
+
}
|
84
|
+
|
85
|
+
#errorExplanation ul li {
|
86
|
+
font-size: 12px;
|
87
|
+
list-style: disc;
|
88
|
+
}
|
@@ -0,0 +1,13 @@
|
|
1
|
+
!!! XML
|
2
|
+
!!! Strict
|
3
|
+
%html{ "xml:lang" => "en", :lang => "en", :xmlns => "http://www.w3.org/1999/xhtml" }
|
4
|
+
%head
|
5
|
+
%link{ :href => "/stylesheets/application.css", :rel => "stylesheet", :type => "text/css" }
|
6
|
+
%link{ :href => "/favicon.ico", :rel => "shortcut icon", :type => "image/x-icon" }
|
7
|
+
|
8
|
+
%body
|
9
|
+
#container
|
10
|
+
=yield
|
11
|
+
|
12
|
+
-# %script{ :type => "text/javascript", :src => "http://ajax.googleapis.com/ajax/libs/jquery/1.3.2/jquery.min.js" }
|
13
|
+
-# %script{ :type => "text/javascript", :src => "http://ajax.googleapis.com/ajax/libs/jqueryui/1.7.1/jquery-ui.min.js" }
|
@@ -0,0 +1,37 @@
|
|
1
|
+
%style{ :type => 'text/css' }
|
2
|
+
@import url('/stylesheets/layout.css');
|
3
|
+
|
4
|
+
%p
|
5
|
+
|
6
|
+
Jobs in the edamame job store:
|
7
|
+
|
8
|
+
|
9
|
+
%table
|
10
|
+
%tr
|
11
|
+
%th query_term
|
12
|
+
%th priority
|
13
|
+
%th prev_items
|
14
|
+
%th prev_rate
|
15
|
+
%th prev_span_min
|
16
|
+
%th prev_span_max
|
17
|
+
- @dest_store.each_as(Edamame::Job) do |key, obj|
|
18
|
+
%tr
|
19
|
+
%td=h key.inspect
|
20
|
+
%td=h obj.inspect
|
21
|
+
%td=h obj.key
|
22
|
+
-# %td=h obj[:query_term]
|
23
|
+
-# %td=h obj[:priority]
|
24
|
+
-# %td=h obj[:prev_items]
|
25
|
+
-# %td=h obj[:prev_rate]
|
26
|
+
-# %td=h obj[:prev_span_min]
|
27
|
+
-# %td=h obj[:prev_span_max]
|
28
|
+
|
29
|
+
|
30
|
+
|
31
|
+
|
32
|
+
|
33
|
+
|
34
|
+
|
35
|
+
|
36
|
+
|
37
|
+
|
@@ -0,0 +1,25 @@
|
|
1
|
+
%style{ :type => 'text/css' }
|
2
|
+
@import url('/stylesheets/layout.css');
|
3
|
+
|
4
|
+
%p
|
5
|
+
|
6
|
+
Jobs in the edamame job store:
|
7
|
+
|
8
|
+
|
9
|
+
%table
|
10
|
+
%tr
|
11
|
+
%th query_term
|
12
|
+
%th priority
|
13
|
+
%th prev_items
|
14
|
+
%th prev_rate
|
15
|
+
%th prev_span_min
|
16
|
+
%th prev_span_max
|
17
|
+
- @store.each_as(Wuclan::Domains::Twitter::Scrape::TwitterSearchJob) do |key, obj|
|
18
|
+
%tr
|
19
|
+
-# %td=h key.inspect
|
20
|
+
%td=h obj[:query_term]
|
21
|
+
%td=h obj[:priority]
|
22
|
+
%td=h obj[:prev_items]
|
23
|
+
%td=h obj[:prev_rate]
|
24
|
+
%td=h obj[:prev_span_min]
|
25
|
+
%td=h obj[:prev_span_max]
|
data/bin/edamame-nuke
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
$: << File.dirname(__FILE__)+'/../../lib'
|
3
|
+
require 'rubygems'
|
4
|
+
require 'edamame'
|
5
|
+
require 'monkeyshines/monitor'
|
6
|
+
require 'monkeyshines/utils/trollop'
|
7
|
+
require File.dirname(__FILE__)+'/edamame_util_opts'
|
8
|
+
|
9
|
+
pq = Edamame::PersistentQueue.new( :tube => CONFIG[:tube],
|
10
|
+
:queue => { :type => 'BeanstalkQueue', :uris => [CONFIG[:queue]] },
|
11
|
+
:store => { :type => 'TyrantStore', :uri => CONFIG[:store] }
|
12
|
+
)
|
13
|
+
|
14
|
+
periodic_log = Monkeyshines::Monitor::PeriodicLogger.new(:iters => 1000, :time => 30)
|
15
|
+
pq.queue.empty_all do |job|
|
16
|
+
periodic_log.periodically{ [ job.tube, job.priority, job.delay, job.scheduling, job.obj['key'] ] }
|
17
|
+
end
|
18
|
+
pq.store.each do |key, val|
|
19
|
+
pq.store.db.out key
|
20
|
+
end
|
data/bin/edamame-ps
ADDED
data/bin/edamame-stats
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
$: << File.dirname(__FILE__)+'/../../lib'
|
3
|
+
require 'rubygems'
|
4
|
+
require 'edamame'
|
5
|
+
require 'monkeyshines/monitor'
|
6
|
+
|
7
|
+
pq = Edamame::PersistentQueue.new(
|
8
|
+
:tube => ARGV[0],
|
9
|
+
:queue => { :type => 'BeanstalkQueue', :uris => ['localhost:11210'] },
|
10
|
+
:store => { :type => 'TyrantStore', :uri => ':11212' }
|
11
|
+
)
|
12
|
+
|
13
|
+
p pq.stats
|
data/bin/edamame-sync
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
$: << File.dirname(__FILE__)+'/../../lib'
|
3
|
+
require 'rubygems'
|
4
|
+
require 'edamame'
|
5
|
+
require 'monkeyshines/monitor'
|
6
|
+
require 'monkeyshines/utils/trollop'
|
7
|
+
require 'pathname'
|
8
|
+
require File.dirname(__FILE__)+'/edamame_util_opts'
|
9
|
+
Log = Monkeyshines.logger
|
10
|
+
|
11
|
+
pq = Edamame::PersistentQueue.new( :tube => CONFIG[:tube],
|
12
|
+
:queue => { :type => 'BeanstalkQueue', :uris => [CONFIG[:queue]] },
|
13
|
+
:store => { :type => 'TyrantStore', :uri => CONFIG[:store] }
|
14
|
+
)
|
15
|
+
|
16
|
+
periodic_log = Monkeyshines::Monitor::PeriodicLogger.new(:iters => 1, :time => 30)
|
17
|
+
pq.load do |job|
|
18
|
+
obj = job.obj rescue nil
|
19
|
+
|
20
|
+
periodic_log.periodically{ [ pq.store.size, job.loggable, job.class, obj.inspect ] }
|
21
|
+
end
|
@@ -0,0 +1,10 @@
|
|
1
|
+
|
2
|
+
CONFIG = Trollop::options do
|
3
|
+
opt :queue, 'host:port for the beanstalkd queue', :type => String, :required => true
|
4
|
+
opt :store, 'host:port for the backing store', :type => String, :required => true
|
5
|
+
opt :handle, 'label for this scrape', :type => String, :required => true
|
6
|
+
end
|
7
|
+
CONFIG[:store].gsub!(/^localhost:/, ':') # queue must *not* have localhost:
|
8
|
+
CONFIG[:queue].gsub!(/^:/, 'localhost:') # queue must have localhost:
|
9
|
+
CONFIG[:tube] = (CONFIG[:handle] || 'default').gsub(/[^A-Z0-9a-z\-]+/,'')
|
10
|
+
|
data/bin/test_run.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
$: << File.dirname(__FILE__)+'/../../lib'
|
3
|
+
require 'rubygems'
|
4
|
+
require 'json'
|
5
|
+
require 'edamame'
|
6
|
+
|
7
|
+
broker = Edamame::Broker.new(
|
8
|
+
:queue => { :type => 'BeanstalkQueue', :uris => ['localhost:11210'] },
|
9
|
+
:store => { :type => 'TyrantStore', :uri => ':11212' }
|
10
|
+
)
|
11
|
+
|
12
|
+
broker.work do |job|
|
13
|
+
Log.info [job, job.scheduling, job.stats, job.obj].inspect
|
14
|
+
end
|
data/lib/edamame.rb
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'beanstalk-client'
|
2
|
+
require 'wukong/extensions'
|
3
|
+
require 'monkeyshines/utils/factory_module'
|
4
|
+
require 'monkeyshines/utils/logger'
|
5
|
+
require 'edamame/scheduling'
|
6
|
+
require 'edamame/job'
|
7
|
+
require 'edamame/queue'
|
8
|
+
require 'edamame/store'
|
9
|
+
|
10
|
+
# Edamame combines the Beanstalk priority queue with a Tokyo Tyrant database to
|
11
|
+
# produce a persistent distributed priority job queue system.
|
12
|
+
#
|
13
|
+
# * fast, scalable, lightweight and distributed
|
14
|
+
# * persistent and recoverable
|
15
|
+
# * scalable up to your memory limits
|
16
|
+
# * queryable and enumerable jobs
|
17
|
+
# * named jobs
|
18
|
+
# * reasonably-good availability.
|
19
|
+
#
|
20
|
+
# Like beanstalk, it is a job queue, not just a message queue:
|
21
|
+
# * priority job scheduling, not just FIFO
|
22
|
+
# * Supports multiple queues ('tubes')
|
23
|
+
# * reliable scheduling: jobs that time out are re-assigned
|
24
|
+
#
|
25
|
+
# You should start by looking at [Edamame::PersistentQueue]
|
26
|
+
module Edamame
|
27
|
+
autoload :PersistentQueue, 'edamame/persistent_queue'
|
28
|
+
autoload :Broker, 'edamame/broker'
|
29
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module Edamame
|
2
|
+
#
|
3
|
+
# Repeatedly poll the queue for jobs and dispatch them to a worker loop
|
4
|
+
#
|
5
|
+
# Those jobs can be rescheduled (with updated parameters) for later
|
6
|
+
# re-processing.
|
7
|
+
#
|
8
|
+
class Broker < PersistentQueue
|
9
|
+
# Enter the work loop
|
10
|
+
def work timeout=nil, klass=nil, &block
|
11
|
+
loop do
|
12
|
+
job = reserve(timeout, klass) or break
|
13
|
+
result = block.call(job)
|
14
|
+
reschedule job
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
# Inserts the job back into the queue at its sepcified delay --
|
19
|
+
# or, if delay is nil, remove the job from the queue
|
20
|
+
#
|
21
|
+
# You'll probably want to use Edamame::Scheduling with all this
|
22
|
+
def reschedule job
|
23
|
+
delay = job.scheduling.delay
|
24
|
+
if delay
|
25
|
+
release job
|
26
|
+
else
|
27
|
+
log_action 'deleting', job
|
28
|
+
delete job
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
# Log info about an action on a job
|
33
|
+
def log_action action, job, *stuff
|
34
|
+
Log.info [action, job.loggable, *stuff].flatten.join("\t")
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
38
|
+
end
|
data/lib/edamame/job.rb
ADDED
@@ -0,0 +1,114 @@
|
|
1
|
+
module Edamame
|
2
|
+
#
|
3
|
+
#
|
4
|
+
# id, name, body, timeouts, time-left, age, state, delay, pri, ttr
|
5
|
+
#
|
6
|
+
#
|
7
|
+
# * A job, pulled from the queue: it is connected to its beanstalk presence
|
8
|
+
# body contains
|
9
|
+
# ** obj
|
10
|
+
# ** scheduling
|
11
|
+
# ** stats
|
12
|
+
#
|
13
|
+
# * A DB job
|
14
|
+
# body contains
|
15
|
+
# ** tube, priority, ttr, state
|
16
|
+
# ** obj
|
17
|
+
# ** scheduling
|
18
|
+
# ** stats
|
19
|
+
class Job < Struct.new(
|
20
|
+
:tube, :priority, :ttr, :state,
|
21
|
+
:scheduling, :obj
|
22
|
+
)
|
23
|
+
# connection back to the job queue's instance of this job
|
24
|
+
attr_accessor :qjob
|
25
|
+
|
26
|
+
DEFAULT_OPTIONS = {
|
27
|
+
'priority' => 65536,
|
28
|
+
'ttr' => 120,
|
29
|
+
'state' => 1,
|
30
|
+
'scheduling' => Edamame::Scheduling::Once.new()
|
31
|
+
}
|
32
|
+
|
33
|
+
# attr_accessor :runs, :failures, :prev_run_at
|
34
|
+
def initialize *args
|
35
|
+
super *args
|
36
|
+
DEFAULT_OPTIONS.each{|key,val| self[key] ||= val }
|
37
|
+
[:priority, :ttr, :state].each{|key| self[key] = self[key].to_i }
|
38
|
+
case self.scheduling
|
39
|
+
when String
|
40
|
+
scheduling_hash = YAML.load(self.scheduling) rescue nil
|
41
|
+
self.scheduling = Scheduling.from_hash(scheduling_hash) if scheduling_hash
|
42
|
+
when Hash
|
43
|
+
self.scheduling = Scheduling.from_hash(scheduling)
|
44
|
+
else
|
45
|
+
# else it should behave like a scheduling
|
46
|
+
end
|
47
|
+
if self.obj.is_a?(String) then self.obj = YAML.load(self.obj) rescue nil ; end
|
48
|
+
end
|
49
|
+
|
50
|
+
def key
|
51
|
+
key = (obj.respond_to?(:key) ? obj.key : (obj[:key]||obj['key']))
|
52
|
+
[ tube, key ].join('-')
|
53
|
+
end
|
54
|
+
|
55
|
+
#
|
56
|
+
def since_last
|
57
|
+
scheduling.last_run - Time.now
|
58
|
+
end
|
59
|
+
|
60
|
+
#
|
61
|
+
# Delegation to scheduling strategy.
|
62
|
+
#
|
63
|
+
def prev_max() self.scheduling.prev_max end
|
64
|
+
def prev_max=(val) self.scheduling.prev_max = val end
|
65
|
+
def prev_items() self.scheduling.prev_items end
|
66
|
+
def prev_items=(val) self.scheduling.prev_items = val end
|
67
|
+
def prev_items_rate() self.scheduling.prev_items_rate end
|
68
|
+
def prev_items_rate=(val) self.scheduling.prev_items_rate = val end
|
69
|
+
def delay() self.scheduling.delay end
|
70
|
+
def delay=(val) self.scheduling.delay = val end
|
71
|
+
def last_run() self.scheduling.last_run end
|
72
|
+
|
73
|
+
# Override this for rescheduling
|
74
|
+
def update!
|
75
|
+
scheduling.total_runs = scheduling.total_runs.to_i + qjob.stats['releases']
|
76
|
+
scheduling.last_run = Time.now
|
77
|
+
end
|
78
|
+
|
79
|
+
# Fields suitable for emission as a log line.
|
80
|
+
def loggable
|
81
|
+
"%-15s\t%7d\t%7.2f\t%-23s" % [tube, priority, delay, key]
|
82
|
+
end
|
83
|
+
|
84
|
+
def to_hash flatten=true
|
85
|
+
hsh = super()
|
86
|
+
hsh["scheduling"] = scheduling.to_hash
|
87
|
+
hsh["obj"] = obj.to_hash
|
88
|
+
if flatten
|
89
|
+
hsh["scheduling"] = hsh['scheduling'].to_yaml
|
90
|
+
hsh["obj"] = hsh['obj'].to_yaml
|
91
|
+
end
|
92
|
+
hsh
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
Beanstalk::Job.class_eval do
|
98
|
+
def key
|
99
|
+
body
|
100
|
+
end
|
101
|
+
|
102
|
+
def priority
|
103
|
+
pri
|
104
|
+
end
|
105
|
+
|
106
|
+
def tube
|
107
|
+
stats['tube']
|
108
|
+
end
|
109
|
+
|
110
|
+
# Fields suitable for emission as a log line.
|
111
|
+
def loggable
|
112
|
+
"%-15s\t%7d\t%7.2f\t%-23s" % [tube, priority, delay, key]
|
113
|
+
end
|
114
|
+
end
|