roma 0.8.2 → 0.8.10

Sign up to get free protection for your applications and to get access to all the features.
Files changed (114) hide show
  1. data/CHANG +326 -0
  2. data/CHANGELOG +132 -0
  3. data/{README.rdoc → FETCH_HEAD} +0 -0
  4. data/{LICENSE.rdoc → LICENSE} +0 -1
  5. data/README +17 -0
  6. data/Rakefile +33 -18
  7. data/ruby/server/bin/chg_redundancy +10 -0
  8. data/ruby/server/bin/key_access +7 -0
  9. data/ruby/server/bin/key_list +7 -0
  10. data/ruby/server/bin/mkconfig +19 -0
  11. data/{bin → ruby/server/bin}/mkrecent +0 -1
  12. data/{bin → ruby/server/bin}/mkroute +0 -1
  13. data/ruby/server/bin/multi_commander +19 -0
  14. data/ruby/server/bin/recoverlost +10 -0
  15. data/ruby/server/bin/recoverlost_alist +10 -0
  16. data/ruby/server/bin/recoverlost_alist_all +10 -0
  17. data/ruby/server/bin/recoverlost_alist_keys +10 -0
  18. data/{bin/recoverlost → ruby/server/bin/roma_watcher} +1 -2
  19. data/ruby/server/bin/romad +36 -0
  20. data/{bin → ruby/server/bin}/sample_watcher +0 -1
  21. data/{bin → ruby/server/bin}/sample_watcher2 +0 -1
  22. data/{bin/simple_bench → ruby/server/bin/sample_watcher3} +1 -2
  23. data/ruby/server/bin/simple_bench +26 -0
  24. data/{bin → ruby/server/bin}/ssroute +0 -1
  25. data/ruby/server/bin/test-scenario +11 -0
  26. data/{bin → ruby/server/bin}/tribunus +0 -1
  27. data/{lib → ruby/server/lib}/roma/async_process.rb +67 -15
  28. data/{lib → ruby/server/lib}/roma/command/bg_command_receiver.rb +1 -1
  29. data/ruby/server/lib/roma/command/command_definition.rb +422 -0
  30. data/ruby/server/lib/roma/command/mh_command_receiver.rb +127 -0
  31. data/ruby/server/lib/roma/command/receiver.rb +64 -0
  32. data/{lib → ruby/server/lib}/roma/command/rt_command_receiver.rb +6 -1
  33. data/ruby/server/lib/roma/command/sys_command_receiver.rb +609 -0
  34. data/{lib → ruby/server/lib}/roma/command/util_command_receiver.rb +15 -5
  35. data/{lib → ruby/server/lib}/roma/command/vn_command_receiver.rb +12 -4
  36. data/{lib → ruby/server/lib}/roma/command_plugin.rb +0 -0
  37. data/ruby/server/lib/roma/config.rb +84 -0
  38. data/{lib → ruby/server/lib}/roma/event/con_pool.rb +12 -1
  39. data/ruby/server/lib/roma/event/handler.rb +256 -0
  40. data/ruby/server/lib/roma/live_patch-20120302-001.rb +107 -0
  41. data/ruby/server/lib/roma/logging/rlogger.rb +163 -0
  42. data/ruby/server/lib/roma/messaging/con_pool.rb +92 -0
  43. data/{lib → ruby/server/lib}/roma/plugin/plugin_alist.rb +118 -240
  44. data/ruby/server/lib/roma/plugin/plugin_debug.rb +31 -0
  45. data/ruby/server/lib/roma/plugin/plugin_map.rb +177 -0
  46. data/ruby/server/lib/roma/plugin/plugin_mapcount.rb +185 -0
  47. data/{lib/roma/command/st_command_receiver.rb → ruby/server/lib/roma/plugin/plugin_storage.rb} +170 -146
  48. data/ruby/server/lib/roma/plugin/plugin_stub.rb +283 -0
  49. data/{lib → ruby/server/lib}/roma/plugin/plugin_test.rb +0 -0
  50. data/{lib → ruby/server/lib}/roma/romad.rb +221 -94
  51. data/{lib → ruby/server/lib}/roma/routing/cb_rttable.rb +4 -6
  52. data/{lib → ruby/server/lib}/roma/routing/merkle_tree.rb +0 -0
  53. data/ruby/server/lib/roma/routing/routing_data.rb +307 -0
  54. data/{lib → ruby/server/lib}/roma/routing/rttable.rb +4 -0
  55. data/{lib → ruby/server/lib}/roma/stats.rb +19 -3
  56. data/{lib → ruby/server/lib}/roma/storage/basic_storage.rb +25 -26
  57. data/{lib → ruby/server/lib}/roma/storage/dbm_storage.rb +1 -23
  58. data/{lib → ruby/server/lib}/roma/storage/dummy_storage.rb +0 -0
  59. data/{lib → ruby/server/lib}/roma/storage/rh_storage.rb +0 -0
  60. data/{lib → ruby/server/lib}/roma/storage/sqlite3_storage.rb +0 -0
  61. data/{lib → ruby/server/lib}/roma/storage/tc_storage.rb +62 -2
  62. data/ruby/server/lib/roma/tools/chg_redundancy.rb +36 -0
  63. data/ruby/server/lib/roma/tools/key_access.rb +105 -0
  64. data/ruby/server/lib/roma/tools/key_list.rb +94 -0
  65. data/ruby/server/lib/roma/tools/mkconfig.rb +535 -0
  66. data/{lib → ruby/server/lib}/roma/tools/mkrecent.rb +0 -0
  67. data/{lib → ruby/server/lib}/roma/tools/mkroute.rb +0 -0
  68. data/ruby/server/lib/roma/tools/multi_commander.rb +45 -0
  69. data/{lib → ruby/server/lib}/roma/tools/recoverlost.rb +0 -0
  70. data/{lib → ruby/server/lib}/roma/tools/recoverlost_alist.rb +0 -0
  71. data/ruby/server/lib/roma/tools/recoverlost_alist_all.rb +8 -0
  72. data/ruby/server/lib/roma/tools/recoverlost_alist_keys.rb +16 -0
  73. data/ruby/server/lib/roma/tools/recoverlost_lib.rb +349 -0
  74. data/ruby/server/lib/roma/tools/roma_watcher.rb +150 -0
  75. data/ruby/server/lib/roma/tools/roma_watcher_config.yml.example +20 -0
  76. data/{lib → ruby/server/lib}/roma/tools/sample_watcher.rb +3 -1
  77. data/{lib → ruby/server/lib}/roma/tools/sample_watcher2.rb +3 -1
  78. data/ruby/server/lib/roma/tools/sample_watcher3.rb +49 -0
  79. data/{lib → ruby/server/lib}/roma/tools/simple_bench.rb +2 -0
  80. data/ruby/server/lib/roma/tools/simple_bench2.rb +78 -0
  81. data/{lib → ruby/server/lib}/roma/tools/ssroute.rb +0 -0
  82. data/ruby/server/lib/roma/tools/test-scenario.rb +327 -0
  83. data/{lib → ruby/server/lib}/roma/tools/tribunus.rb +0 -0
  84. data/ruby/server/lib/roma/version.rb +4 -0
  85. data/{lib → ruby/server/lib}/roma/write_behind.rb +1 -0
  86. data/ruby/server/test/config4mhash.rb +68 -0
  87. data/ruby/server/test/config4storage_error.rb +69 -0
  88. data/{lib/roma/config.rb → ruby/server/test/config4test.rb} +6 -3
  89. data/{test → ruby/server/test}/rcirb.rb +0 -1
  90. data/{test → ruby/server/test}/roma-test-utils.rb +21 -8
  91. data/{test → ruby/server/test}/run-test.rb +3 -2
  92. data/ruby/server/test/storage_error_storage.rb +37 -0
  93. data/ruby/server/test/t_command_definition.rb +326 -0
  94. data/{test → ruby/server/test}/t_cpdata.rb +9 -3
  95. data/{test → ruby/server/test}/t_listplugin.rb +48 -12
  96. data/ruby/server/test/t_mapcountplugin.rb +231 -0
  97. data/ruby/server/test/t_mapplugin.rb +131 -0
  98. data/ruby/server/test/t_mhash.rb +222 -0
  99. data/ruby/server/test/t_rclient.rb +199 -0
  100. data/{test → ruby/server/test}/t_routing_data.rb +56 -0
  101. data/{test → ruby/server/test}/t_storage.rb +107 -111
  102. data/ruby/server/test/t_storage_error.rb +61 -0
  103. data/ruby/server/test/t_writebehind.rb +374 -0
  104. metadata +150 -82
  105. data/bin/recoverlost_alist +0 -8
  106. data/bin/romad +0 -7
  107. data/lib/roma/command/mh_command_receiver.rb +0 -117
  108. data/lib/roma/command/receiver.rb +0 -287
  109. data/lib/roma/event/handler.rb +0 -159
  110. data/lib/roma/plugin/plugin_debug.rb +0 -19
  111. data/lib/roma/tools/recoverlost_lib.rb +0 -217
  112. data/lib/roma/version.rb +0 -4
  113. data/test/t_rclient.rb +0 -318
  114. data/test/t_writebehind.rb +0 -200
File without changes
File without changes
@@ -0,0 +1,45 @@
1
+ #!/usr/bin/env ruby
2
+ # -*- coding: utf-8 -*-
3
+ require 'roma/logging/rlogger'
4
+ require 'roma/messaging/con_pool'
5
+ require 'roma/routing/routing_data'
6
+ require 'roma/client/sender'
7
+
8
+ module Roma
9
+
10
+ class MultiCommander
11
+
12
+ Roma::Client::Sender.class_eval{
13
+ def multiplelines_receiver2(con)
14
+ ret = []
15
+ while select [con], nil, nil, 0.05
16
+ ret << con.gets.chomp
17
+ end
18
+ ret
19
+ end
20
+ }
21
+
22
+ def initialize(nid)
23
+ @sender = Roma::Client::Sender.new
24
+ @rd = @sender.send_routedump_command(nid)
25
+ end
26
+
27
+ def send_cmd(cmd, nid = nil)
28
+ nid = @rd.nodes[0] unless nid
29
+ res = ''
30
+ res << @sender.send_command(nid, cmd, nil, :multiplelines_receiver2).join("\r\n")
31
+ res << "\r\n"
32
+ end
33
+
34
+ def send_cmd_all(cmd)
35
+ res = ''
36
+ @rd.nodes.each{|nid|
37
+ res << "****** #{nid}\r\n"
38
+ res << send_cmd(cmd, nid)
39
+ }
40
+ res
41
+ end
42
+
43
+ end # class MultiCommander
44
+
45
+ end # module Roma
File without changes
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # usage:recoverlost_alist address port storage-path
4
+ #
5
+ require 'roma/tools/recoverlost_lib'
6
+
7
+ Roma::RecoverLost.new('recoverlost_alist_all', 'alist_spushv', ARGV, true).suite
8
+ puts "Recover process has succeed."
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # usage:recoverlost_alist_keys address port storage-path key-list
4
+ #
5
+ require 'roma/tools/recoverlost_lib'
6
+
7
+ r = Roma::RecoverLost.new('recoverlost_alist_keys', 'alist_spushv', ARGV, true)
8
+
9
+ keys = []
10
+ while(key = STDIN.gets)
11
+ keys << key.chomp
12
+ end
13
+
14
+ r.suite_with_keys(keys)
15
+
16
+ puts "Recover process has succeed."
@@ -0,0 +1,349 @@
1
+ #!/usr/bin/env ruby
2
+ # -*- coding: utf-8 -*-
3
+ #
4
+ # usage:recoverlost address port storage-path [yyyymmddhhmmss]
5
+ #
6
+ require 'roma/client/sender'
7
+ require 'roma/messaging/con_pool'
8
+ require 'roma/routing/routing_data'
9
+
10
+ module Roma
11
+ module Storage
12
+ end
13
+ Storage::autoload(:TCStorage,'roma/storage/tc_storage')
14
+ Storage::autoload(:DbmStorage,'roma/storage/dbm_storage')
15
+ Storage::autoload(:SQLite3Storage,'roma/storage/sqlite3_storage')
16
+
17
+ class RecoverLost
18
+
19
+ def initialize(pname, pushv_cmd, argv, alldata = false)
20
+ if alldata == false && argv.length < 4
21
+ puts "usage:#{pname} address port storage-path [yyyymmddhhmmss]"
22
+ exit
23
+ end
24
+
25
+ if alldata && argv.length != 3
26
+ puts "usage:#{pname} address port storage-path"
27
+ exit
28
+ end
29
+
30
+ @addr = argv[0]
31
+ @port = argv[1]
32
+ @strgpath = argv[2]
33
+ @ymdhms = argv[3]
34
+
35
+ if @port =~ /\D/
36
+ STDERR.puts "port was not numeric."
37
+ exit
38
+ end
39
+
40
+ if @ymdhms && (@ymdhms.length != 14 || @ymdhms =~ /\D/)
41
+ STDERR.puts "yyyymmddhhmmss format mismatch."
42
+ exit
43
+ end
44
+ @pushv_cmd = pushv_cmd
45
+ @nodeid = "#{@addr}_#{@port}"
46
+ @stream_copy_wait_param = 0.0001
47
+ @alldata = alldata
48
+ end
49
+
50
+ def suite
51
+ @rd = get_routing_data(@nodeid)
52
+ unless @alldata
53
+ @lost_vnodes = get_lost_vnodes(@rd,@ymdhms)
54
+ puts "#{@lost_vnodes.length} vnodes where data was lost."
55
+
56
+ exit if @lost_vnodes.length == 0
57
+ else
58
+ @lost_vnodes = @rd.v_idx.keys
59
+ end
60
+
61
+ each_hash(@strgpath){|hname,dir|
62
+ puts "#{hname} #{dir}"
63
+ @storage = open_storage(dir,@lost_vnodes)
64
+ start_recover(hname)
65
+ @storage.closedb
66
+ }
67
+ end
68
+
69
+ def suite_with_keys(keys)
70
+ @rd = get_routing_data(@nodeid)
71
+ @lost_vnodes = @rd.v_idx.keys
72
+
73
+ each_hash(@strgpath){|hname,dir|
74
+ puts "#{hname} #{dir}"
75
+ @storage = open_storage(dir,@lost_vnodes)
76
+ start_recover_width_keys(hname, keys)
77
+ # start_recover_width_keys2(hname, keys)
78
+ @storage.closedb
79
+ }
80
+ end
81
+
82
+ def each_hash(path)
83
+ Dir::glob("#{path}/*").each{|dir|
84
+ next unless File::directory?(dir)
85
+ hname = dir[dir.rindex('/')+1..-1]
86
+ yield hname,dir
87
+ }
88
+ end
89
+
90
+ def get_routing_data(nid)
91
+ sender = Roma::Client::Sender.new
92
+ sender.send_routedump_command(nid)
93
+ end
94
+
95
+ def get_lost_vnodes(rd,ymdhms)
96
+ ret = rd.get_lost_vnodes
97
+ if ymdhms
98
+ ret |= get_history_of_lost(@nodeid,ymdhms)
99
+ end
100
+ ret
101
+ end
102
+
103
+ def get_history_of_lost(nid,ymdhms)
104
+ ret = []
105
+ con = Roma::Messaging::ConPool.instance.get_connection(nid)
106
+ con.write("history_of_lost #{ymdhms}\r\n")
107
+ while((buf = con.gets) != "END\r\n")
108
+ ret << buf.chomp.to_i
109
+ end
110
+ Roma::Messaging::ConPool.instance.return_connection(nid, con)
111
+ ret
112
+ end
113
+
114
+ def open_storage(path,vn_list)
115
+ unless File::directory?(path)
116
+ STDERR.puts "#{path} dose not found."
117
+ return nil
118
+ end
119
+
120
+ # get a file extension
121
+ ext = File::extname(Dir::glob("#{path}/0.*")[0])[1..-1]
122
+ # count a number of divided files
123
+ divnum = Dir::glob("#{path}/*.#{ext}").length
124
+
125
+ st = new_storage(ext)
126
+ st.divnum = divnum
127
+ st.vn_list = vn_list
128
+ st.storage_path = path
129
+ st.opendb
130
+ st
131
+ end
132
+
133
+ def new_storage(ext)
134
+ case(ext)
135
+ when 'tc'
136
+ return ::Roma::Storage::TCStorage.new
137
+ when 'dbm'
138
+ return Roma::Storage::DbmStorage.new
139
+ when 'sql3'
140
+ return Roma::Storage::SQLite3Storage.new
141
+ else
142
+ return nil
143
+ end
144
+ end
145
+
146
+ def start_recover(hname)
147
+ @lost_vnodes.each_with_index{|vn, idx|
148
+ nodes = @rd.v_idx[vn]
149
+ if nodes == nil || nodes.length == 0
150
+ nids = []
151
+ nids[0] = @rd.nodes[rand(@rd.nodes.length)]
152
+ puts "#{idx}/#{@lost_vnodes.length} #{vn} assign to #{nids.inspect}"
153
+ else
154
+ nids = nodes
155
+ puts "#{idx}/#{@lost_vnodes.length} #{vn} was auto assigned at #{nids.inspect}"
156
+ end
157
+
158
+ nids.each{|nid|
159
+ if push_a_vnode_stream(hname, vn, nid)!="STORED"
160
+ STDERR.puts "push_a_vnode_stream aborted in #{vn}"
161
+ exit
162
+ end
163
+ }
164
+
165
+ if nodes == nil || nodes.length == 0
166
+ cmd = "setroute #{vn} #{@rd.v_clk[vn]} #{nid}\r\n"
167
+ exit unless send_cmd(nid ,cmd)
168
+ broadcast_cmd(cmd, nid)
169
+ end
170
+ }
171
+ end
172
+
173
+ def push_a_vnode_stream(hname, vn, nid)
174
+ con = Roma::Messaging::ConPool.instance.get_connection(nid)
175
+
176
+ con.write("#{@pushv_cmd} #{hname} #{vn}\r\n")
177
+
178
+ res = con.gets # READY\r\n or error string
179
+ if res != "READY\r\n"
180
+ con.close
181
+ return res.chomp
182
+ end
183
+
184
+ @storage.each_vn_dump(vn){|data|
185
+ con.write(clk_to_zero(data))
186
+ sleep @stream_copy_wait_param
187
+ }
188
+ con.write("\0"*20) # end of steram
189
+
190
+ res = con.gets # STORED\r\n or error string
191
+ Roma::Messaging::ConPool.instance.return_connection(nid,con)
192
+ res.chomp! if res
193
+ res
194
+ rescue =>e
195
+ STDERR.puts "#{e}\n#{$@}"
196
+ nil
197
+ end
198
+
199
+ def make_node_hash(keys)
200
+ res = {}
201
+ @rd.nodes.each{|nid| res[nid] = [] }
202
+ keys.each{|key|
203
+ d = Digest::SHA1.hexdigest(key).hex % (2**@rd.dgst_bits)
204
+ @rd.v_idx[d & @rd.search_mask].each{|nid| res[nid] << key }
205
+ }
206
+ res
207
+ end
208
+
209
+ def start_recover_width_keys2(hname,keys)
210
+ node_hash = make_node_hash(keys)
211
+ node_hash.each{|nid,ks|
212
+ puts nid
213
+ upload_data2(hname, nid, ks)
214
+ }
215
+ end
216
+
217
+ def upload_data2(hname, nid, keys)
218
+ con = Roma::Messaging::ConPool.instance.get_connection(nid)
219
+
220
+ cmd = "#{@pushv_cmd} #{hname} 0\r\n"
221
+ con.write(cmd)
222
+ res = con.gets # READY\r\n or error string
223
+ if res != "READY\r\n"
224
+ con.close
225
+ return res.chomp
226
+ end
227
+
228
+ n = keys.length
229
+ m = n / 100
230
+ m = 1 if m < 1
231
+ keys.each_with_index{|k,i|
232
+ print "#{i}/#{n}\r" if i%m == 0
233
+ data = @storage.get_raw2(k)
234
+ next unless data
235
+ d = Digest::SHA1.hexdigest(k).hex % (2**@rd.dgst_bits)
236
+ vn = d & @rd.search_mask
237
+
238
+ vn_old, last, clk, expt, val = data
239
+ # puts "old vn = #{vn_old}"
240
+ if val
241
+ wd = [vn, last, 0, expt, k.length, k, val.length, val].pack("NNNNNa#{k.length}Na#{val.length}")
242
+ else
243
+ wd = [vn, last, 0, expt, k.length, k, 0].pack("NNNNNa#{k.length}N")
244
+ end
245
+
246
+ con.write(wd)
247
+ sleep @stream_copy_wait_param
248
+ }
249
+ con.write("\0"*20) # end of steram
250
+
251
+ res = con.gets # STORED\r\n or error string
252
+ Roma::Messaging::ConPool.instance.return_connection(nid,con)
253
+ res.chomp! if res
254
+ res
255
+ rescue =>e
256
+ STDERR.puts "#{e}\n#{$@}"
257
+ nil
258
+ end
259
+
260
+ def start_recover_width_keys(hname,keys)
261
+ keys.each{|key|
262
+ data = @storage.get_raw2(key)
263
+ if data
264
+ puts "hit => #{key}"
265
+ d = Digest::SHA1.hexdigest(key).hex % (2**@rd.dgst_bits)
266
+ vn = d & @rd.search_mask
267
+ nodes = @rd.v_idx[vn]
268
+ nodes.each{|nid|
269
+ print "#{nid}=>"
270
+ res = upload_data(hname, vn, nid, key, data)
271
+ puts res
272
+ }
273
+ end
274
+ }
275
+ end
276
+
277
+ def upload_data(hname, vn, nid, k, data)
278
+ con = Roma::Messaging::ConPool.instance.get_connection(nid)
279
+
280
+ cmd = "#{@pushv_cmd} #{hname} #{vn}\r\n"
281
+ con.write(cmd)
282
+ # puts "new vn = #{vn}"
283
+ res = con.gets # READY\r\n or error string
284
+ if res != "READY\r\n"
285
+ con.close
286
+ return res.chomp
287
+ end
288
+
289
+ vn_old, last, clk, expt, val = data
290
+ # puts "old vn = #{vn_old}"
291
+ if val
292
+ wd = [vn, last, 0, expt, k.length, k, val.length, val].pack("NNNNNa#{k.length}Na#{val.length}")
293
+ else
294
+ wd = [vn, last, 0, expt, k.length, k, 0].pack("NNNNNa#{k.length}N")
295
+ end
296
+
297
+ con.write(wd)
298
+ sleep @stream_copy_wait_param
299
+
300
+ con.write("\0"*20) # end of steram
301
+
302
+ res = con.gets # STORED\r\n or error string
303
+ Roma::Messaging::ConPool.instance.return_connection(nid,con)
304
+ res.chomp! if res
305
+ res
306
+ rescue =>e
307
+ STDERR.puts "#{e}\n#{$@}"
308
+ nil
309
+ end
310
+
311
+ def clk_to_zero(data)
312
+ vn, last, clk, expt, klen = data.unpack('NNNNN')
313
+ k, vlen = data[20..-1].unpack("a#{klen}N")
314
+ if vlen != 0
315
+ v, = data[(20+klen+4)..-1].unpack("a#{vlen}")
316
+ [vn, last, 0, expt, klen, k, vlen, v].pack("NNNNNa#{klen}Na#{vlen}")
317
+ else
318
+ [vn, last, 0, expt, klen, k, 0].pack("NNNNNa#{klen}N")
319
+ end
320
+ end
321
+
322
+ def broadcast_cmd(cmd,without_nids=nil)
323
+ without_nids=[] unless without_nids
324
+ res = {}
325
+ @rd.nodes.each{ |nid|
326
+ res[nid] = send_cmd(nid,cmd) unless without_nids.include?(nid)
327
+ }
328
+ res
329
+ rescue => e
330
+ STDERR.puts("#{e}\n#{$@}")
331
+ nil
332
+ end
333
+
334
+ def send_cmd(nid, cmd)
335
+ con = Roma::Messaging::ConPool.instance.get_connection(nid)
336
+ con.write(cmd)
337
+ res = con.gets
338
+ Roma::Messaging::ConPool.instance.return_connection(nid, con)
339
+ if res
340
+ res.chomp!
341
+ end
342
+ res
343
+ rescue => e
344
+ STDERR.puts("#{__FILE__}:#{__LINE__}:Send command failed that node-id is #{nid},command is #{cmd}.")
345
+ nil
346
+ end
347
+
348
+ end # class RecoverLost
349
+ end # module Roma
@@ -0,0 +1,150 @@
1
+ #!/usr/bin/env ruby
2
+ # -*- coding: utf-8 -*-
3
+
4
+ require 'kconv'
5
+ require 'logger'
6
+ require 'socket'
7
+ require 'timeout'
8
+ require 'yaml'
9
+
10
+ module Roma
11
+ module Watch
12
+ module Message
13
+ ERROR_NODE_DOWN = 'A node down'
14
+ ERROR_SPLIT_BRAIN = 'Split brain'
15
+ COMMAND_NODELIST = 'nodelist'
16
+ COMMAND_QUIT = 'quit'
17
+ end
18
+
19
+ class Mailer
20
+ MAILER = '/usr/lib/sendmail'
21
+
22
+ attr :from
23
+ attr :to
24
+ attr :mailer
25
+
26
+ def initialize(from, to, mailer = nil)
27
+ @from = from
28
+ @to = to
29
+ @mailer = mailer
30
+ @mailer ||= MAILER
31
+ end
32
+
33
+ def send_mail(sub, msg)
34
+ open("| #{@mailer} -f #{@from} -t", 'w') do |f|
35
+ f.puts "From: #{@from}"
36
+ f.puts "To: #{@to}"
37
+ #f.puts "Subject: #{sub.tojis}"
38
+ f.puts "Subject: #{sub}"
39
+ f.puts "Reply-To: #{@from}"
40
+ f.puts
41
+ f.puts msg.tojis
42
+ 2.times{ f.puts }
43
+ f.puts "."
44
+ end
45
+ end
46
+ end # Mailer
47
+
48
+ class Main
49
+ attr :conf
50
+ attr :log
51
+ attr :nodelist_inf
52
+ attr :errors
53
+ attr :mailer
54
+
55
+ def initialize config
56
+ @conf = config
57
+ @log = Logger.new @conf['log']['path'], @conf['log']['rotate']
58
+ @nodelist_inf = {}
59
+ @errors = {}
60
+ @subject_prefix = @conf['mail']['subject_prefix']
61
+ @mailer = Mailer.new @conf['mail']['from'], @conf['mail']['to'], @conf['mail']['mailer']
62
+ end
63
+
64
+ def watch
65
+ @log.info "start watching a ROMA"
66
+ watch_nodes
67
+ @log.info "end watching"
68
+ @log.info "start checking a ROMA"
69
+ check_nodes
70
+ @log.info "end checking"
71
+ end
72
+
73
+ def watch_nodes
74
+ @conf['roma'].each { |node|
75
+ nodes = watch_node node
76
+ @nodelist_inf[node] = nodes if nodes
77
+ }
78
+ end
79
+
80
+ def watch_node node
81
+ @log.debug "start watching a node: #{node}"
82
+ host, port = node.split(':')
83
+ sock = nil
84
+ begin
85
+ timeout(@conf['timeout'].to_i) {
86
+ line = nil
87
+ TCPSocket.open(host, port) do |sock|
88
+ sock.puts Message::COMMAND_NODELIST
89
+ line = sock.gets.chomp!
90
+ sock.puts Message::COMMAND_QUIT
91
+ end
92
+ @log.debug "end watching a node: #{node}"
93
+ line.split(' ')
94
+ }
95
+ rescue Exception => e
96
+ emsg = "Catch an error when checking a node #{node}: #{e.to_s}"
97
+ @log.error emsg
98
+ if (cnt ||= 0; cnt += 1) < @conf['retry']['count'].to_i
99
+ @log.info "retry: #{cnt} times"
100
+ sleep @conf['retry']['period'].to_i
101
+ retry
102
+ end
103
+ @errors[node] = emsg
104
+ nil
105
+ end
106
+ end
107
+
108
+ def check_nodes
109
+ check_vital
110
+ check_splitbrain
111
+ end
112
+
113
+ def check_vital
114
+ @log.debug "start checking the vital"
115
+ @errors.each { |node, emsg|
116
+ @mailer.send_mail(@subject_prefix + Message::ERROR_NODE_DOWN, emsg)
117
+ }
118
+ @log.debug "end checking the vital"
119
+ end
120
+
121
+ def check_splitbrain
122
+ @log.debug "start checking a splitbrain"
123
+ all_ring = []
124
+ @nodelist_inf.each { |node, ring|
125
+ all_ring << ring unless all_ring.include? ring
126
+ }
127
+
128
+ if all_ring.size != 1
129
+ emsg = ""
130
+ all_ring.each { |ring|
131
+ emsg += "#{ring.join(',')}\r\n"
132
+ }
133
+ @mailer.send_mail(@subject_prefix + Message::ERROR_SPLIT_BRAIN, emsg)
134
+ end
135
+ @log.debug "end checking a splitbrain"
136
+ end
137
+ end
138
+ end # Watch
139
+ end # Roma
140
+
141
+ def usage
142
+ puts File.basename(__FILE__) + " config.yml"
143
+ end
144
+
145
+ if 1 == ARGV.length
146
+ config = YAML.load_file(ARGV[0])
147
+ Roma::Watch::Main.new(config).watch
148
+ else
149
+ usage
150
+ end