starfish 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (66) hide show
  1. data/LICENSE +10 -0
  2. data/README +136 -0
  3. data/Rakefile +89 -0
  4. data/bin/starfish +27 -0
  5. data/doc/classes/MapReduce/ActiveRecord/Base/Client.html +177 -0
  6. data/doc/classes/MapReduce/ActiveRecord/Base/Client.src/M000016.html +19 -0
  7. data/doc/classes/MapReduce/ActiveRecord/Base/Client.src/M000017.html +20 -0
  8. data/doc/classes/MapReduce/ActiveRecord/Base/Client.src/M000018.html +18 -0
  9. data/doc/classes/MapReduce/ActiveRecord/Base.html +170 -0
  10. data/doc/classes/MapReduce/ActiveRecord/Base.src/M000015.html +39 -0
  11. data/doc/classes/MapReduce/ActiveRecord.html +111 -0
  12. data/doc/classes/MapReduce/Array.html +105 -0
  13. data/doc/classes/MapReduce/File.html +105 -0
  14. data/doc/classes/MapReduce.html +395 -0
  15. data/doc/classes/MapReduce.src/M000001.html +31 -0
  16. data/doc/classes/MapReduce.src/M000002.html +18 -0
  17. data/doc/classes/MapReduce.src/M000003.html +25 -0
  18. data/doc/classes/MapReduce.src/M000004.html +21 -0
  19. data/doc/classes/MapReduce.src/M000005.html +18 -0
  20. data/doc/classes/MapReduce.src/M000006.html +18 -0
  21. data/doc/classes/MapReduce.src/M000007.html +19 -0
  22. data/doc/classes/MapReduce.src/M000008.html +18 -0
  23. data/doc/classes/MapReduce.src/M000009.html +19 -0
  24. data/doc/classes/MapReduce.src/M000010.html +23 -0
  25. data/doc/classes/MapReduce.src/M000011.html +24 -0
  26. data/doc/classes/MapReduce.src/M000012.html +23 -0
  27. data/doc/classes/MapReduce.src/M000013.html +18 -0
  28. data/doc/classes/MapReduce.src/M000014.html +26 -0
  29. data/doc/classes/MapReduceError.html +111 -0
  30. data/doc/classes/Starfish/RingFinger.html +131 -0
  31. data/doc/classes/Starfish/RingFinger.src/M000031.html +20 -0
  32. data/doc/classes/Starfish.html +357 -0
  33. data/doc/classes/Starfish.src/M000019.html +19 -0
  34. data/doc/classes/Starfish.src/M000020.html +18 -0
  35. data/doc/classes/Starfish.src/M000021.html +45 -0
  36. data/doc/classes/Starfish.src/M000022.html +71 -0
  37. data/doc/classes/Starfish.src/M000023.html +23 -0
  38. data/doc/classes/Starfish.src/M000024.html +36 -0
  39. data/doc/classes/Starfish.src/M000025.html +35 -0
  40. data/doc/classes/Starfish.src/M000026.html +18 -0
  41. data/doc/classes/Starfish.src/M000027.html +22 -0
  42. data/doc/classes/Starfish.src/M000028.html +18 -0
  43. data/doc/classes/Starfish.src/M000029.html +18 -0
  44. data/doc/classes/Starfish.src/M000030.html +18 -0
  45. data/doc/classes/StarfishError.html +111 -0
  46. data/doc/created.rid +1 -0
  47. data/doc/files/README.html +336 -0
  48. data/doc/files/lib/map_reduce/active_record_rb.html +109 -0
  49. data/doc/files/lib/map_reduce/array_rb.html +101 -0
  50. data/doc/files/lib/map_reduce/file_rb.html +101 -0
  51. data/doc/files/lib/map_reduce_rb.html +111 -0
  52. data/doc/files/lib/starfish_rb.html +114 -0
  53. data/doc/fr_class_index.html +36 -0
  54. data/doc/fr_file_index.html +32 -0
  55. data/doc/fr_method_index.html +57 -0
  56. data/doc/index.html +24 -0
  57. data/doc/rdoc-style.css +208 -0
  58. data/examples/foo.rb +20 -0
  59. data/examples/map_reduce/active_record.rb +10 -0
  60. data/examples/map_reduce/file.rb +11 -0
  61. data/lib/map_reduce/active_record.rb +98 -0
  62. data/lib/map_reduce/array.rb +4 -0
  63. data/lib/map_reduce/file.rb +4 -0
  64. data/lib/map_reduce.rb +121 -0
  65. data/lib/starfish.rb +200 -0
  66. metadata +124 -0
@@ -0,0 +1,208 @@
1
+
2
+ body {
3
+ font-family: Verdana,Arial,Helvetica,sans-serif;
4
+ font-size: 90%;
5
+ margin: 0;
6
+ margin-left: 40px;
7
+ padding: 0;
8
+ background: white;
9
+ }
10
+
11
+ h1,h2,h3,h4 { margin: 0; color: #efefef; background: transparent; }
12
+ h1 { font-size: 150%; }
13
+ h2,h3,h4 { margin-top: 1em; }
14
+
15
+ a { background: #eef; color: #039; text-decoration: none; }
16
+ a:hover { background: #039; color: #eef; }
17
+
18
+ /* Override the base stylesheet's Anchor inside a table cell */
19
+ td > a {
20
+ background: transparent;
21
+ color: #039;
22
+ text-decoration: none;
23
+ }
24
+
25
+ /* and inside a section title */
26
+ .section-title > a {
27
+ background: transparent;
28
+ color: #eee;
29
+ text-decoration: none;
30
+ }
31
+
32
+ /* === Structural elements =================================== */
33
+
34
+ div#index {
35
+ margin: 0;
36
+ margin-left: -40px;
37
+ padding: 0;
38
+ font-size: 90%;
39
+ }
40
+
41
+
42
+ div#index a {
43
+ margin-left: 0.7em;
44
+ }
45
+
46
+ div#index .section-bar {
47
+ margin-left: 0px;
48
+ padding-left: 0.7em;
49
+ background: #ccc;
50
+ font-size: small;
51
+ }
52
+
53
+
54
+ div#classHeader, div#fileHeader {
55
+ width: auto;
56
+ color: white;
57
+ padding: 0.5em 1.5em 0.5em 1.5em;
58
+ margin: 0;
59
+ margin-left: -40px;
60
+ border-bottom: 3px solid #006;
61
+ }
62
+
63
+ div#classHeader a, div#fileHeader a {
64
+ background: inherit;
65
+ color: white;
66
+ }
67
+
68
+ div#classHeader td, div#fileHeader td {
69
+ background: inherit;
70
+ color: white;
71
+ }
72
+
73
+
74
+ div#fileHeader {
75
+ background: #057;
76
+ }
77
+
78
+ div#classHeader {
79
+ background: #048;
80
+ }
81
+
82
+
83
+ .class-name-in-header {
84
+ font-size: 180%;
85
+ font-weight: bold;
86
+ }
87
+
88
+
89
+ div#bodyContent {
90
+ padding: 0 1.5em 0 1.5em;
91
+ }
92
+
93
+ div#description {
94
+ padding: 0.5em 1.5em;
95
+ background: #efefef;
96
+ border: 1px dotted #999;
97
+ }
98
+
99
+ div#description h1,h2,h3,h4,h5,h6 {
100
+ color: #125;;
101
+ background: transparent;
102
+ }
103
+
104
+ div#validator-badges {
105
+ text-align: center;
106
+ }
107
+ div#validator-badges img { border: 0; }
108
+
109
+ div#copyright {
110
+ color: #333;
111
+ background: #efefef;
112
+ font: 0.75em sans-serif;
113
+ margin-top: 5em;
114
+ margin-bottom: 0;
115
+ padding: 0.5em 2em;
116
+ }
117
+
118
+
119
+ /* === Classes =================================== */
120
+
121
+ table.header-table {
122
+ color: white;
123
+ font-size: small;
124
+ }
125
+
126
+ .type-note {
127
+ font-size: small;
128
+ color: #DEDEDE;
129
+ }
130
+
131
+ .xxsection-bar {
132
+ background: #eee;
133
+ color: #333;
134
+ padding: 3px;
135
+ }
136
+
137
+ .section-bar {
138
+ color: #333;
139
+ border-bottom: 1px solid #999;
140
+ margin-left: -20px;
141
+ }
142
+
143
+
144
+ .section-title {
145
+ background: #79a;
146
+ color: #eee;
147
+ padding: 3px;
148
+ margin-top: 2em;
149
+ margin-left: -30px;
150
+ border: 1px solid #999;
151
+ }
152
+
153
+ .top-aligned-row { vertical-align: top }
154
+ .bottom-aligned-row { vertical-align: bottom }
155
+
156
+ /* --- Context section classes ----------------------- */
157
+
158
+ .context-row { }
159
+ .context-item-name { font-family: monospace; font-weight: bold; color: black; }
160
+ .context-item-value { font-size: small; color: #448; }
161
+ .context-item-desc { color: #333; padding-left: 2em; }
162
+
163
+ /* --- Method classes -------------------------- */
164
+ .method-detail {
165
+ background: #efefef;
166
+ padding: 0;
167
+ margin-top: 0.5em;
168
+ margin-bottom: 1em;
169
+ border: 1px dotted #ccc;
170
+ }
171
+ .method-heading {
172
+ color: black;
173
+ background: #ccc;
174
+ border-bottom: 1px solid #666;
175
+ padding: 0.2em 0.5em 0 0.5em;
176
+ }
177
+ .method-signature { color: black; background: inherit; }
178
+ .method-name { font-weight: bold; }
179
+ .method-args { font-style: italic; }
180
+ .method-description { padding: 0 0.5em 0 0.5em; }
181
+
182
+ /* --- Source code sections -------------------- */
183
+
184
+ a.source-toggle { font-size: 90%; }
185
+ div.method-source-code {
186
+ background: #262626;
187
+ color: #ffdead;
188
+ margin: 1em;
189
+ padding: 0.5em;
190
+ border: 1px dashed #999;
191
+ overflow: hidden;
192
+ }
193
+
194
+ div.method-source-code pre { color: #ffdead; overflow: hidden; }
195
+
196
+ /* --- Ruby keyword styles --------------------- */
197
+
198
+ .standalone-code { background: #221111; color: #ffdead; overflow: hidden; }
199
+
200
+ .ruby-constant { color: #7fffd4; background: transparent; }
201
+ .ruby-keyword { color: #00ffff; background: transparent; }
202
+ .ruby-ivar { color: #eedd82; background: transparent; }
203
+ .ruby-operator { color: #00ffee; background: transparent; }
204
+ .ruby-identifier { color: #ffdead; background: transparent; }
205
+ .ruby-node { color: #ffa07a; background: transparent; }
206
+ .ruby-comment { color: #b22222; font-weight: bold; background: transparent; }
207
+ .ruby-regexp { color: #ffa07a; background: transparent; }
208
+ .ruby-value { color: #7fffd4; background: transparent; }
data/examples/foo.rb ADDED
@@ -0,0 +1,20 @@
1
+ class Foo
2
+ attr_reader :i
3
+
4
+ def initialize
5
+ @i = 0
6
+ end
7
+
8
+ def inc
9
+ logger.info "YAY it incremented by 1 up to #{@i}"
10
+ @i += 1
11
+ end
12
+ end
13
+
14
+ server :log => "/tmp/foo.log" do |object|
15
+ object = Foo.new
16
+ end
17
+
18
+ client do |object|
19
+ object.inc
20
+ end
@@ -0,0 +1,10 @@
1
+ class Foo < ActiveRecord::Base; end
2
+
3
+ server do |map_reduce|
4
+ map_reduce.type = Foo
5
+ map_reduce.conditions = ["bar = ?", 1]
6
+ end
7
+
8
+ client do |foo|
9
+ foo.do_some_hard_task
10
+ end
@@ -0,0 +1,11 @@
1
+ server :log => "/tmp/mylog" do |map_reduce|
2
+ map_reduce.type = File,
3
+ map_reduce.input = "/usr/local/big_ass_file",
4
+ map_reduce.per_client = 1000
5
+ end
6
+
7
+ client do |line|
8
+ if line =~ /some_regex/
9
+ logger.info(line)
10
+ end
11
+ end
@@ -0,0 +1,98 @@
1
+ require 'rubygems'
2
+ require 'active_record'
3
+
4
+ class MapReduce
5
+ module ActiveRecord
6
+ module Base
7
+ attr_accessor :queue_size, :locked_queue_wait, :empty_queue_wait, :rescan_when_complete, :vigilant
8
+
9
+ class Client
10
+ include DRbUndumped
11
+
12
+ include Enumerable
13
+
14
+ def initialize(server_object)
15
+ @server_object = server_object
16
+ @type = eval(server_object.type_to_s)
17
+ end
18
+
19
+ def each
20
+ @server_object.limit.times do
21
+ yield get_value_from(@server_object.get_id)
22
+ end
23
+ end
24
+
25
+ def logger(*args)
26
+ @server_object.logger(*args)
27
+ end
28
+
29
+ private
30
+
31
+ def get_value_from(object_id)
32
+ case object_id
33
+ when :locked_queue_wait
34
+ sleep @server_object.locked_queue_wait || 1
35
+ get_value_from(@server_object.get_id)
36
+ when :empty_queue_wait
37
+ sleep @server_object.empty_queue_wait || 30
38
+ get_value_from(@server_object.get_id)
39
+ else
40
+ @type.find(object_id)
41
+ end
42
+ end
43
+ end
44
+
45
+ def get_id
46
+ if @queue.empty? && @offset == 0
47
+ @total = type.count(input)
48
+ if not @rescan_when_complete
49
+ @queue_size ||= @total
50
+ end
51
+ end
52
+
53
+ t = Time.now
54
+
55
+ object_id = if @lock
56
+ :locked_queue_wait
57
+ else
58
+ queue.shift || :empty_queue_wait
59
+ end
60
+
61
+ @time_spent_grabbing_objects += (Time.now - t)
62
+
63
+ if object_id
64
+ @num_objects_grabbed += 1
65
+ end
66
+
67
+ return object_id
68
+ end
69
+
70
+ private
71
+
72
+ def queue
73
+ if @queue.empty? && @offset == @total && !@rescan_when_complete && !@vigilant
74
+ exit
75
+ end
76
+
77
+ if @queue.empty?
78
+ GC.start
79
+
80
+ @time_began = Time.now if @time_began == 0
81
+ @lock = true
82
+ t = Time.now
83
+
84
+ @queue = type.find(:all, :conditions => input, :limit => @queue_size, :offset => @offset).map{|object|object.id}
85
+
86
+ @time_spent_grabbing_queues += (Time.now - t)
87
+ @num_queues_grabbed += 1
88
+
89
+ @offset += @queue.size unless @queue.empty?
90
+ @offset = 0 if @offset == @total && @rescan_when_complete
91
+ @lock = false
92
+ end
93
+
94
+ @queue
95
+ end
96
+ end
97
+ end
98
+ end
@@ -0,0 +1,4 @@
1
+ class MapReduce
2
+ module Array
3
+ end
4
+ end
@@ -0,0 +1,4 @@
1
+ class MapReduce
2
+ module File
3
+ end
4
+ end
data/lib/map_reduce.rb ADDED
@@ -0,0 +1,121 @@
1
+ require 'drb'
2
+
3
+ require 'map_reduce/active_record'
4
+ require 'map_reduce/file'
5
+ require 'map_reduce/array'
6
+
7
+ class MapReduceError < StandardError; end
8
+ class MapReduce
9
+ @@types = {
10
+ ::ActiveRecord::Base => [::Array, String, NilClass]
11
+ }
12
+
13
+ attr_accessor :type, :input, :limit
14
+ alias :conditions :input
15
+ alias :conditions= :input=
16
+
17
+ def initialize
18
+ @lock = false
19
+ @offset = 0
20
+ @limit = 1
21
+
22
+ @time_began = 0
23
+
24
+ @num_objects_grabbed = 0
25
+ @time_spent_grabbing_objects = 0.0
26
+ @time_spent_processing_objects = 0.0
27
+
28
+ @num_queues_grabbed = 0
29
+ @time_spent_grabbing_queues = 0.0
30
+
31
+ @queue = []
32
+ end
33
+
34
+ def add_time_spent_processing_objects(time)
35
+ @time_spent_processing_objects += time
36
+ end
37
+
38
+ def stats
39
+ {
40
+ :time_began => @time_began,
41
+ :num_queues_grabbed => @num_queues_grabbed,
42
+ :time_spent_grabbing_queues => @time_spent_grabbing_objects,
43
+ :num_objects_grabbed => @num_objects_grabbed,
44
+ :time_spent_grabbing_objects => @time_spent_grabbing_objects,
45
+ :time_spent_processing_objects => @time_spent_processing_objects
46
+ }
47
+ end
48
+
49
+ def type=(type)
50
+ @type = type
51
+ if valid_type?
52
+ self.class.instance_eval "include MapReduce::#{base_type}"
53
+ end
54
+ end
55
+
56
+ def base_type_to_s
57
+ base_type.to_s if valid_type?
58
+ end
59
+
60
+ def type_to_s
61
+ type.to_s if valid_type?
62
+ end
63
+
64
+ def spool=(type, input)
65
+ self.type = type
66
+ self.input = input
67
+ end
68
+
69
+ def valid?
70
+ valid_type? && valid_input?
71
+ end
72
+
73
+ def valid_type?
74
+ @@types.keys.each {|type| return true if @type && (@type < type || @type == type)}
75
+ return false
76
+ end
77
+
78
+ def valid_input?
79
+ if not valid_type?
80
+ return false
81
+ else
82
+ Array(@@types[base_type]).each {|input_type| return true if input.is_a?(input_type)}
83
+ raise MapReduceError, "invalid input (#{@input.inspect}) for type: #{base_type}. Try one of the following: #{Array(@@types[base_type]).join(", ")}"
84
+ end
85
+ end
86
+
87
+ def method_missing(name, *args)
88
+ if name.to_s =~ /(.*)=$/ && args[0].is_a?(Proc)
89
+ self.class.instance_eval do
90
+ define_method($1, args[0])
91
+ end
92
+ else
93
+ super
94
+ end
95
+ end
96
+
97
+ def raise_if_invalid!
98
+ if not valid_type?
99
+ raise MapReduceError, "invalid type, please make sure you provide one of the following classes or sub-classes thereof: ActiveRecord::Base, File, or Array"
100
+ end
101
+ if not valid_input?
102
+ raise MapReduceError, "invalid input, please make sure you provide one of the following: #{Array(@@types[type]).join(", ")}"
103
+ end
104
+ end
105
+
106
+ def map_reduce?
107
+ true
108
+ end
109
+
110
+ def base_type
111
+ check_type = @type
112
+ type_found = false
113
+ while check_type.superclass
114
+ if @@types.include?(check_type)
115
+ return check_type
116
+ else
117
+ check_type = check_type.superclass
118
+ end
119
+ end
120
+ end
121
+ end
data/lib/starfish.rb ADDED
@@ -0,0 +1,200 @@
1
+ require 'rinda/ring'
2
+ require 'rinda/tuplespace'
3
+ require 'timeout'
4
+ require 'logger'
5
+ require 'md5'
6
+ require 'yaml'
7
+
8
+ require 'map_reduce'
9
+
10
+ DRb.start_service
11
+
12
+ class StarfishError < StandardError; end
13
+ class Starfish
14
+ VERSION = "1.0.0"
15
+
16
+ @@server = false
17
+ @@client = false
18
+ @@options = {
19
+ :log => "/tmp/#{File.basename(ARGV.first)}.log"
20
+ }
21
+
22
+ include Rinda
23
+ attr_accessor :started, :ring_server, :server
24
+
25
+ # set the uniq identifier
26
+ def initialize(uniq=ARGV.first)
27
+ @retry_count = 0
28
+ @uniq = uniq
29
+ end
30
+
31
+ def uniq
32
+ MD5.new(@uniq).to_s
33
+ end
34
+
35
+ def server
36
+ unless @@server
37
+ $stderr.puts "You must specify a server"
38
+ exit
39
+ end
40
+
41
+ map_reduce = MapReduce.new
42
+ object = @@server.call(map_reduce)
43
+
44
+ if map_reduce.valid?
45
+ object = map_reduce
46
+ end
47
+
48
+ sanitize object
49
+
50
+ ts = Rinda::TupleSpace.new
51
+ begin
52
+ ts.write([:name, uniq.intern, object, @uniq])
53
+ ring_server = Rinda::RingServer.new(ts)
54
+ rescue Errno::EADDRINUSE
55
+ ts = RingFinger.primary
56
+ ts.write([:name, uniq.intern, object, @uniq])
57
+ end
58
+
59
+ File.open(@@options[:pid] || "/tmp/starfish-#{uniq}.pid","w"){|f|f<<Process.pid}
60
+
61
+ $stderr.puts "server started for #{object.inspect}"
62
+
63
+ DRb.thread.join
64
+ end
65
+
66
+ def client
67
+ unless @@client
68
+ $stderr.puts "You must specify a client"
69
+ exit
70
+ end
71
+
72
+ negotiate
73
+ Timeout::timeout(5) { @server_object = @ring_server.read([:name, uniq.intern, nil, nil])[2] }
74
+
75
+ loop do
76
+ begin
77
+ @server_object.map_reduce?
78
+ rescue NoMethodError
79
+ @called = @@client.call(@server_object)
80
+ end
81
+
82
+ unless @called
83
+ if @server_object.map_reduce? && @server_object.valid?
84
+ map_reduce_client = eval("MapReduce::#{@server_object.base_type_to_s}::Client").new(@server_object)
85
+
86
+ $server_object = @server_object
87
+ Object.instance_eval do
88
+ define_method(:logger) do |*args|
89
+ $server_object._logger(*args)
90
+ end
91
+ end
92
+
93
+ map_reduce_client.each do |object|
94
+ t = Time.now
95
+ @@client.call(object)
96
+ @server_object.add_time_spent_processing_objects(Time.now-t)
97
+ end
98
+ else
99
+ raise MapReduceError, "invalid map reduce server (possibly missing type or input)"
100
+ end
101
+ end
102
+ end
103
+
104
+ rescue Timeout::Error => m
105
+ spawn
106
+ @retry_count += 1
107
+ if @retry_count <= 5
108
+ retry
109
+ else
110
+ raise Timeout::Error, m
111
+ end
112
+ rescue DRb::DRbConnError => m
113
+ stop
114
+ negotiate
115
+ @retry_count += 1
116
+ if @retry_count <= 5
117
+ retry
118
+ else
119
+ raise DRb::DRbConnError, m
120
+ end
121
+ end
122
+
123
+ def stats
124
+ negotiate
125
+ Timeout::timeout(5) { @server_object = @ring_server.read([:name, uniq.intern, nil, nil])[2] }
126
+
127
+ puts @server_object.stats.to_yaml
128
+ rescue NoMethodError
129
+ $stderr.puts "The stats method is not defined for your server"
130
+ end
131
+
132
+ def sanitize(object)
133
+ object.class.instance_eval { include DRbUndumped }
134
+
135
+ @@log = case @@options[:log]
136
+ when String
137
+ Logger.new(@@options[:log])
138
+ when Class
139
+ @@options[:log].new
140
+ when nil, false
141
+ Logger.new("/dev/null")
142
+ else
143
+ @@options[:log]
144
+ end
145
+
146
+ def object.logger
147
+ @logger ||= @@log
148
+ end
149
+ def object._logger
150
+ @logger ||= @@log
151
+ end
152
+ end
153
+
154
+ def negotiate
155
+ begin
156
+ @ring_server = RingFinger.primary
157
+ rescue RuntimeError => m
158
+ # allow multiple un-cached calls to RingFinger.finger
159
+ def RingFinger.finger
160
+ @@finger = self.new
161
+ @@finger.lookup_ring_any
162
+ @@finger
163
+ end
164
+
165
+ spawn
166
+ @retry_count += 1
167
+ if @retry_count <= 5
168
+ retry
169
+ else
170
+ raise RuntimeError, m
171
+ end
172
+ end
173
+ end
174
+
175
+ def spawn
176
+ @started ||= fork { system("ruby #{File.dirname(__FILE__)}/../bin/starfish #{@uniq} server > /dev/null") }
177
+ end
178
+
179
+ def stop
180
+ puts "stopping the server"
181
+ Process.kill("SIGHUP", IO.read(@@options[:pid] || "/tmp/starfish-#{uniq}.pid").to_i)
182
+ rescue Errno::ENOENT
183
+ puts "Fatal error, please kill all starfish processes manually and try again"
184
+ system("ps auxww|grep starfish")
185
+ end
186
+
187
+ class << self
188
+ def server=(server)
189
+ @@server = server
190
+ end
191
+
192
+ def client=(client)
193
+ @@client = client
194
+ end
195
+
196
+ def options=(options={})
197
+ @@options.update(options)
198
+ end
199
+ end
200
+ end