roma 0.8.2 → 0.8.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. data/CHANG +326 -0
  2. data/CHANGELOG +132 -0
  3. data/{README.rdoc → FETCH_HEAD} +0 -0
  4. data/{LICENSE.rdoc → LICENSE} +0 -1
  5. data/README +17 -0
  6. data/Rakefile +33 -18
  7. data/ruby/server/bin/chg_redundancy +10 -0
  8. data/ruby/server/bin/key_access +7 -0
  9. data/ruby/server/bin/key_list +7 -0
  10. data/ruby/server/bin/mkconfig +19 -0
  11. data/{bin → ruby/server/bin}/mkrecent +0 -1
  12. data/{bin → ruby/server/bin}/mkroute +0 -1
  13. data/ruby/server/bin/multi_commander +19 -0
  14. data/ruby/server/bin/recoverlost +10 -0
  15. data/ruby/server/bin/recoverlost_alist +10 -0
  16. data/ruby/server/bin/recoverlost_alist_all +10 -0
  17. data/ruby/server/bin/recoverlost_alist_keys +10 -0
  18. data/{bin/recoverlost → ruby/server/bin/roma_watcher} +1 -2
  19. data/ruby/server/bin/romad +36 -0
  20. data/{bin → ruby/server/bin}/sample_watcher +0 -1
  21. data/{bin → ruby/server/bin}/sample_watcher2 +0 -1
  22. data/{bin/simple_bench → ruby/server/bin/sample_watcher3} +1 -2
  23. data/ruby/server/bin/simple_bench +26 -0
  24. data/{bin → ruby/server/bin}/ssroute +0 -1
  25. data/ruby/server/bin/test-scenario +11 -0
  26. data/{bin → ruby/server/bin}/tribunus +0 -1
  27. data/{lib → ruby/server/lib}/roma/async_process.rb +67 -15
  28. data/{lib → ruby/server/lib}/roma/command/bg_command_receiver.rb +1 -1
  29. data/ruby/server/lib/roma/command/command_definition.rb +422 -0
  30. data/ruby/server/lib/roma/command/mh_command_receiver.rb +127 -0
  31. data/ruby/server/lib/roma/command/receiver.rb +64 -0
  32. data/{lib → ruby/server/lib}/roma/command/rt_command_receiver.rb +6 -1
  33. data/ruby/server/lib/roma/command/sys_command_receiver.rb +609 -0
  34. data/{lib → ruby/server/lib}/roma/command/util_command_receiver.rb +15 -5
  35. data/{lib → ruby/server/lib}/roma/command/vn_command_receiver.rb +12 -4
  36. data/{lib → ruby/server/lib}/roma/command_plugin.rb +0 -0
  37. data/ruby/server/lib/roma/config.rb +84 -0
  38. data/{lib → ruby/server/lib}/roma/event/con_pool.rb +12 -1
  39. data/ruby/server/lib/roma/event/handler.rb +256 -0
  40. data/ruby/server/lib/roma/live_patch-20120302-001.rb +107 -0
  41. data/ruby/server/lib/roma/logging/rlogger.rb +163 -0
  42. data/ruby/server/lib/roma/messaging/con_pool.rb +92 -0
  43. data/{lib → ruby/server/lib}/roma/plugin/plugin_alist.rb +118 -240
  44. data/ruby/server/lib/roma/plugin/plugin_debug.rb +31 -0
  45. data/ruby/server/lib/roma/plugin/plugin_map.rb +177 -0
  46. data/ruby/server/lib/roma/plugin/plugin_mapcount.rb +185 -0
  47. data/{lib/roma/command/st_command_receiver.rb → ruby/server/lib/roma/plugin/plugin_storage.rb} +170 -146
  48. data/ruby/server/lib/roma/plugin/plugin_stub.rb +283 -0
  49. data/{lib → ruby/server/lib}/roma/plugin/plugin_test.rb +0 -0
  50. data/{lib → ruby/server/lib}/roma/romad.rb +221 -94
  51. data/{lib → ruby/server/lib}/roma/routing/cb_rttable.rb +4 -6
  52. data/{lib → ruby/server/lib}/roma/routing/merkle_tree.rb +0 -0
  53. data/ruby/server/lib/roma/routing/routing_data.rb +307 -0
  54. data/{lib → ruby/server/lib}/roma/routing/rttable.rb +4 -0
  55. data/{lib → ruby/server/lib}/roma/stats.rb +19 -3
  56. data/{lib → ruby/server/lib}/roma/storage/basic_storage.rb +25 -26
  57. data/{lib → ruby/server/lib}/roma/storage/dbm_storage.rb +1 -23
  58. data/{lib → ruby/server/lib}/roma/storage/dummy_storage.rb +0 -0
  59. data/{lib → ruby/server/lib}/roma/storage/rh_storage.rb +0 -0
  60. data/{lib → ruby/server/lib}/roma/storage/sqlite3_storage.rb +0 -0
  61. data/{lib → ruby/server/lib}/roma/storage/tc_storage.rb +62 -2
  62. data/ruby/server/lib/roma/tools/chg_redundancy.rb +36 -0
  63. data/ruby/server/lib/roma/tools/key_access.rb +105 -0
  64. data/ruby/server/lib/roma/tools/key_list.rb +94 -0
  65. data/ruby/server/lib/roma/tools/mkconfig.rb +535 -0
  66. data/{lib → ruby/server/lib}/roma/tools/mkrecent.rb +0 -0
  67. data/{lib → ruby/server/lib}/roma/tools/mkroute.rb +0 -0
  68. data/ruby/server/lib/roma/tools/multi_commander.rb +45 -0
  69. data/{lib → ruby/server/lib}/roma/tools/recoverlost.rb +0 -0
  70. data/{lib → ruby/server/lib}/roma/tools/recoverlost_alist.rb +0 -0
  71. data/ruby/server/lib/roma/tools/recoverlost_alist_all.rb +8 -0
  72. data/ruby/server/lib/roma/tools/recoverlost_alist_keys.rb +16 -0
  73. data/ruby/server/lib/roma/tools/recoverlost_lib.rb +349 -0
  74. data/ruby/server/lib/roma/tools/roma_watcher.rb +150 -0
  75. data/ruby/server/lib/roma/tools/roma_watcher_config.yml.example +20 -0
  76. data/{lib → ruby/server/lib}/roma/tools/sample_watcher.rb +3 -1
  77. data/{lib → ruby/server/lib}/roma/tools/sample_watcher2.rb +3 -1
  78. data/ruby/server/lib/roma/tools/sample_watcher3.rb +49 -0
  79. data/{lib → ruby/server/lib}/roma/tools/simple_bench.rb +2 -0
  80. data/ruby/server/lib/roma/tools/simple_bench2.rb +78 -0
  81. data/{lib → ruby/server/lib}/roma/tools/ssroute.rb +0 -0
  82. data/ruby/server/lib/roma/tools/test-scenario.rb +327 -0
  83. data/{lib → ruby/server/lib}/roma/tools/tribunus.rb +0 -0
  84. data/ruby/server/lib/roma/version.rb +4 -0
  85. data/{lib → ruby/server/lib}/roma/write_behind.rb +1 -0
  86. data/ruby/server/test/config4mhash.rb +68 -0
  87. data/ruby/server/test/config4storage_error.rb +69 -0
  88. data/{lib/roma/config.rb → ruby/server/test/config4test.rb} +6 -3
  89. data/{test → ruby/server/test}/rcirb.rb +0 -1
  90. data/{test → ruby/server/test}/roma-test-utils.rb +21 -8
  91. data/{test → ruby/server/test}/run-test.rb +3 -2
  92. data/ruby/server/test/storage_error_storage.rb +37 -0
  93. data/ruby/server/test/t_command_definition.rb +326 -0
  94. data/{test → ruby/server/test}/t_cpdata.rb +9 -3
  95. data/{test → ruby/server/test}/t_listplugin.rb +48 -12
  96. data/ruby/server/test/t_mapcountplugin.rb +231 -0
  97. data/ruby/server/test/t_mapplugin.rb +131 -0
  98. data/ruby/server/test/t_mhash.rb +222 -0
  99. data/ruby/server/test/t_rclient.rb +199 -0
  100. data/{test → ruby/server/test}/t_routing_data.rb +56 -0
  101. data/{test → ruby/server/test}/t_storage.rb +107 -111
  102. data/ruby/server/test/t_storage_error.rb +61 -0
  103. data/ruby/server/test/t_writebehind.rb +374 -0
  104. metadata +150 -82
  105. data/bin/recoverlost_alist +0 -8
  106. data/bin/romad +0 -7
  107. data/lib/roma/command/mh_command_receiver.rb +0 -117
  108. data/lib/roma/command/receiver.rb +0 -287
  109. data/lib/roma/event/handler.rb +0 -159
  110. data/lib/roma/plugin/plugin_debug.rb +0 -19
  111. data/lib/roma/tools/recoverlost_lib.rb +0 -217
  112. data/lib/roma/version.rb +0 -4
  113. data/test/t_rclient.rb +0 -318
  114. data/test/t_writebehind.rb +0 -200
File without changes
File without changes
@@ -0,0 +1,45 @@
1
+ #!/usr/bin/env ruby
2
+ # -*- coding: utf-8 -*-
3
+ require 'roma/logging/rlogger'
4
+ require 'roma/messaging/con_pool'
5
+ require 'roma/routing/routing_data'
6
+ require 'roma/client/sender'
7
+
8
+ module Roma
9
+
10
+ class MultiCommander
11
+
12
+ Roma::Client::Sender.class_eval{
13
+ def multiplelines_receiver2(con)
14
+ ret = []
15
+ while select [con], nil, nil, 0.05
16
+ ret << con.gets.chomp
17
+ end
18
+ ret
19
+ end
20
+ }
21
+
22
+ def initialize(nid)
23
+ @sender = Roma::Client::Sender.new
24
+ @rd = @sender.send_routedump_command(nid)
25
+ end
26
+
27
+ def send_cmd(cmd, nid = nil)
28
+ nid = @rd.nodes[0] unless nid
29
+ res = ''
30
+ res << @sender.send_command(nid, cmd, nil, :multiplelines_receiver2).join("\r\n")
31
+ res << "\r\n"
32
+ end
33
+
34
+ def send_cmd_all(cmd)
35
+ res = ''
36
+ @rd.nodes.each{|nid|
37
+ res << "****** #{nid}\r\n"
38
+ res << send_cmd(cmd, nid)
39
+ }
40
+ res
41
+ end
42
+
43
+ end # class MultiCommander
44
+
45
+ end # module Roma
File without changes
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # usage:recoverlost_alist address port storage-path
4
+ #
5
+ require 'roma/tools/recoverlost_lib'
6
+
7
+ Roma::RecoverLost.new('recoverlost_alist_all', 'alist_spushv', ARGV, true).suite
8
+ puts "Recover process has succeed."
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # usage:recoverlost_alist_keys address port storage-path key-list
4
+ #
5
+ require 'roma/tools/recoverlost_lib'
6
+
7
+ r = Roma::RecoverLost.new('recoverlost_alist_keys', 'alist_spushv', ARGV, true)
8
+
9
+ keys = []
10
+ while(key = STDIN.gets)
11
+ keys << key.chomp
12
+ end
13
+
14
+ r.suite_with_keys(keys)
15
+
16
+ puts "Recover process has succeed."
@@ -0,0 +1,349 @@
1
+ #!/usr/bin/env ruby
2
+ # -*- coding: utf-8 -*-
3
+ #
4
+ # usage:recoverlost address port storage-path [yyyymmddhhmmss]
5
+ #
6
+ require 'roma/client/sender'
7
+ require 'roma/messaging/con_pool'
8
+ require 'roma/routing/routing_data'
9
+
10
+ module Roma
11
+ module Storage
12
+ end
13
+ Storage::autoload(:TCStorage,'roma/storage/tc_storage')
14
+ Storage::autoload(:DbmStorage,'roma/storage/dbm_storage')
15
+ Storage::autoload(:SQLite3Storage,'roma/storage/sqlite3_storage')
16
+
17
+ class RecoverLost
18
+
19
+ def initialize(pname, pushv_cmd, argv, alldata = false)
20
+ if alldata == false && argv.length < 4
21
+ puts "usage:#{pname} address port storage-path [yyyymmddhhmmss]"
22
+ exit
23
+ end
24
+
25
+ if alldata && argv.length != 3
26
+ puts "usage:#{pname} address port storage-path"
27
+ exit
28
+ end
29
+
30
+ @addr = argv[0]
31
+ @port = argv[1]
32
+ @strgpath = argv[2]
33
+ @ymdhms = argv[3]
34
+
35
+ if @port =~ /\D/
36
+ STDERR.puts "port was not numeric."
37
+ exit
38
+ end
39
+
40
+ if @ymdhms && (@ymdhms.length != 14 || @ymdhms =~ /\D/)
41
+ STDERR.puts "yyyymmddhhmmss format mismatch."
42
+ exit
43
+ end
44
+ @pushv_cmd = pushv_cmd
45
+ @nodeid = "#{@addr}_#{@port}"
46
+ @stream_copy_wait_param = 0.0001
47
+ @alldata = alldata
48
+ end
49
+
50
+ def suite
51
+ @rd = get_routing_data(@nodeid)
52
+ unless @alldata
53
+ @lost_vnodes = get_lost_vnodes(@rd,@ymdhms)
54
+ puts "#{@lost_vnodes.length} vnodes where data was lost."
55
+
56
+ exit if @lost_vnodes.length == 0
57
+ else
58
+ @lost_vnodes = @rd.v_idx.keys
59
+ end
60
+
61
+ each_hash(@strgpath){|hname,dir|
62
+ puts "#{hname} #{dir}"
63
+ @storage = open_storage(dir,@lost_vnodes)
64
+ start_recover(hname)
65
+ @storage.closedb
66
+ }
67
+ end
68
+
69
+ def suite_with_keys(keys)
70
+ @rd = get_routing_data(@nodeid)
71
+ @lost_vnodes = @rd.v_idx.keys
72
+
73
+ each_hash(@strgpath){|hname,dir|
74
+ puts "#{hname} #{dir}"
75
+ @storage = open_storage(dir,@lost_vnodes)
76
+ start_recover_width_keys(hname, keys)
77
+ # start_recover_width_keys2(hname, keys)
78
+ @storage.closedb
79
+ }
80
+ end
81
+
82
+ def each_hash(path)
83
+ Dir::glob("#{path}/*").each{|dir|
84
+ next unless File::directory?(dir)
85
+ hname = dir[dir.rindex('/')+1..-1]
86
+ yield hname,dir
87
+ }
88
+ end
89
+
90
+ def get_routing_data(nid)
91
+ sender = Roma::Client::Sender.new
92
+ sender.send_routedump_command(nid)
93
+ end
94
+
95
+ def get_lost_vnodes(rd,ymdhms)
96
+ ret = rd.get_lost_vnodes
97
+ if ymdhms
98
+ ret |= get_history_of_lost(@nodeid,ymdhms)
99
+ end
100
+ ret
101
+ end
102
+
103
+ def get_history_of_lost(nid,ymdhms)
104
+ ret = []
105
+ con = Roma::Messaging::ConPool.instance.get_connection(nid)
106
+ con.write("history_of_lost #{ymdhms}\r\n")
107
+ while((buf = con.gets) != "END\r\n")
108
+ ret << buf.chomp.to_i
109
+ end
110
+ Roma::Messaging::ConPool.instance.return_connection(nid, con)
111
+ ret
112
+ end
113
+
114
+ def open_storage(path,vn_list)
115
+ unless File::directory?(path)
116
+ STDERR.puts "#{path} dose not found."
117
+ return nil
118
+ end
119
+
120
+ # get a file extension
121
+ ext = File::extname(Dir::glob("#{path}/0.*")[0])[1..-1]
122
+ # count a number of divided files
123
+ divnum = Dir::glob("#{path}/*.#{ext}").length
124
+
125
+ st = new_storage(ext)
126
+ st.divnum = divnum
127
+ st.vn_list = vn_list
128
+ st.storage_path = path
129
+ st.opendb
130
+ st
131
+ end
132
+
133
+ def new_storage(ext)
134
+ case(ext)
135
+ when 'tc'
136
+ return ::Roma::Storage::TCStorage.new
137
+ when 'dbm'
138
+ return Roma::Storage::DbmStorage.new
139
+ when 'sql3'
140
+ return Roma::Storage::SQLite3Storage.new
141
+ else
142
+ return nil
143
+ end
144
+ end
145
+
146
+ def start_recover(hname)
147
+ @lost_vnodes.each_with_index{|vn, idx|
148
+ nodes = @rd.v_idx[vn]
149
+ if nodes == nil || nodes.length == 0
150
+ nids = []
151
+ nids[0] = @rd.nodes[rand(@rd.nodes.length)]
152
+ puts "#{idx}/#{@lost_vnodes.length} #{vn} assign to #{nids.inspect}"
153
+ else
154
+ nids = nodes
155
+ puts "#{idx}/#{@lost_vnodes.length} #{vn} was auto assigned at #{nids.inspect}"
156
+ end
157
+
158
+ nids.each{|nid|
159
+ if push_a_vnode_stream(hname, vn, nid)!="STORED"
160
+ STDERR.puts "push_a_vnode_stream aborted in #{vn}"
161
+ exit
162
+ end
163
+ }
164
+
165
+ if nodes == nil || nodes.length == 0
166
+ cmd = "setroute #{vn} #{@rd.v_clk[vn]} #{nid}\r\n"
167
+ exit unless send_cmd(nid ,cmd)
168
+ broadcast_cmd(cmd, nid)
169
+ end
170
+ }
171
+ end
172
+
173
+ def push_a_vnode_stream(hname, vn, nid)
174
+ con = Roma::Messaging::ConPool.instance.get_connection(nid)
175
+
176
+ con.write("#{@pushv_cmd} #{hname} #{vn}\r\n")
177
+
178
+ res = con.gets # READY\r\n or error string
179
+ if res != "READY\r\n"
180
+ con.close
181
+ return res.chomp
182
+ end
183
+
184
+ @storage.each_vn_dump(vn){|data|
185
+ con.write(clk_to_zero(data))
186
+ sleep @stream_copy_wait_param
187
+ }
188
+ con.write("\0"*20) # end of steram
189
+
190
+ res = con.gets # STORED\r\n or error string
191
+ Roma::Messaging::ConPool.instance.return_connection(nid,con)
192
+ res.chomp! if res
193
+ res
194
+ rescue =>e
195
+ STDERR.puts "#{e}\n#{$@}"
196
+ nil
197
+ end
198
+
199
+ def make_node_hash(keys)
200
+ res = {}
201
+ @rd.nodes.each{|nid| res[nid] = [] }
202
+ keys.each{|key|
203
+ d = Digest::SHA1.hexdigest(key).hex % (2**@rd.dgst_bits)
204
+ @rd.v_idx[d & @rd.search_mask].each{|nid| res[nid] << key }
205
+ }
206
+ res
207
+ end
208
+
209
+ def start_recover_width_keys2(hname,keys)
210
+ node_hash = make_node_hash(keys)
211
+ node_hash.each{|nid,ks|
212
+ puts nid
213
+ upload_data2(hname, nid, ks)
214
+ }
215
+ end
216
+
217
+ def upload_data2(hname, nid, keys)
218
+ con = Roma::Messaging::ConPool.instance.get_connection(nid)
219
+
220
+ cmd = "#{@pushv_cmd} #{hname} 0\r\n"
221
+ con.write(cmd)
222
+ res = con.gets # READY\r\n or error string
223
+ if res != "READY\r\n"
224
+ con.close
225
+ return res.chomp
226
+ end
227
+
228
+ n = keys.length
229
+ m = n / 100
230
+ m = 1 if m < 1
231
+ keys.each_with_index{|k,i|
232
+ print "#{i}/#{n}\r" if i%m == 0
233
+ data = @storage.get_raw2(k)
234
+ next unless data
235
+ d = Digest::SHA1.hexdigest(k).hex % (2**@rd.dgst_bits)
236
+ vn = d & @rd.search_mask
237
+
238
+ vn_old, last, clk, expt, val = data
239
+ # puts "old vn = #{vn_old}"
240
+ if val
241
+ wd = [vn, last, 0, expt, k.length, k, val.length, val].pack("NNNNNa#{k.length}Na#{val.length}")
242
+ else
243
+ wd = [vn, last, 0, expt, k.length, k, 0].pack("NNNNNa#{k.length}N")
244
+ end
245
+
246
+ con.write(wd)
247
+ sleep @stream_copy_wait_param
248
+ }
249
+ con.write("\0"*20) # end of steram
250
+
251
+ res = con.gets # STORED\r\n or error string
252
+ Roma::Messaging::ConPool.instance.return_connection(nid,con)
253
+ res.chomp! if res
254
+ res
255
+ rescue =>e
256
+ STDERR.puts "#{e}\n#{$@}"
257
+ nil
258
+ end
259
+
260
+ def start_recover_width_keys(hname,keys)
261
+ keys.each{|key|
262
+ data = @storage.get_raw2(key)
263
+ if data
264
+ puts "hit => #{key}"
265
+ d = Digest::SHA1.hexdigest(key).hex % (2**@rd.dgst_bits)
266
+ vn = d & @rd.search_mask
267
+ nodes = @rd.v_idx[vn]
268
+ nodes.each{|nid|
269
+ print "#{nid}=>"
270
+ res = upload_data(hname, vn, nid, key, data)
271
+ puts res
272
+ }
273
+ end
274
+ }
275
+ end
276
+
277
+ def upload_data(hname, vn, nid, k, data)
278
+ con = Roma::Messaging::ConPool.instance.get_connection(nid)
279
+
280
+ cmd = "#{@pushv_cmd} #{hname} #{vn}\r\n"
281
+ con.write(cmd)
282
+ # puts "new vn = #{vn}"
283
+ res = con.gets # READY\r\n or error string
284
+ if res != "READY\r\n"
285
+ con.close
286
+ return res.chomp
287
+ end
288
+
289
+ vn_old, last, clk, expt, val = data
290
+ # puts "old vn = #{vn_old}"
291
+ if val
292
+ wd = [vn, last, 0, expt, k.length, k, val.length, val].pack("NNNNNa#{k.length}Na#{val.length}")
293
+ else
294
+ wd = [vn, last, 0, expt, k.length, k, 0].pack("NNNNNa#{k.length}N")
295
+ end
296
+
297
+ con.write(wd)
298
+ sleep @stream_copy_wait_param
299
+
300
+ con.write("\0"*20) # end of steram
301
+
302
+ res = con.gets # STORED\r\n or error string
303
+ Roma::Messaging::ConPool.instance.return_connection(nid,con)
304
+ res.chomp! if res
305
+ res
306
+ rescue =>e
307
+ STDERR.puts "#{e}\n#{$@}"
308
+ nil
309
+ end
310
+
311
+ def clk_to_zero(data)
312
+ vn, last, clk, expt, klen = data.unpack('NNNNN')
313
+ k, vlen = data[20..-1].unpack("a#{klen}N")
314
+ if vlen != 0
315
+ v, = data[(20+klen+4)..-1].unpack("a#{vlen}")
316
+ [vn, last, 0, expt, klen, k, vlen, v].pack("NNNNNa#{klen}Na#{vlen}")
317
+ else
318
+ [vn, last, 0, expt, klen, k, 0].pack("NNNNNa#{klen}N")
319
+ end
320
+ end
321
+
322
+ def broadcast_cmd(cmd,without_nids=nil)
323
+ without_nids=[] unless without_nids
324
+ res = {}
325
+ @rd.nodes.each{ |nid|
326
+ res[nid] = send_cmd(nid,cmd) unless without_nids.include?(nid)
327
+ }
328
+ res
329
+ rescue => e
330
+ STDERR.puts("#{e}\n#{$@}")
331
+ nil
332
+ end
333
+
334
+ def send_cmd(nid, cmd)
335
+ con = Roma::Messaging::ConPool.instance.get_connection(nid)
336
+ con.write(cmd)
337
+ res = con.gets
338
+ Roma::Messaging::ConPool.instance.return_connection(nid, con)
339
+ if res
340
+ res.chomp!
341
+ end
342
+ res
343
+ rescue => e
344
+ STDERR.puts("#{__FILE__}:#{__LINE__}:Send command failed that node-id is #{nid},command is #{cmd}.")
345
+ nil
346
+ end
347
+
348
+ end # class RecoverLost
349
+ end # module Roma
@@ -0,0 +1,150 @@
1
+ #!/usr/bin/env ruby
2
+ # -*- coding: utf-8 -*-
3
+
4
+ require 'kconv'
5
+ require 'logger'
6
+ require 'socket'
7
+ require 'timeout'
8
+ require 'yaml'
9
+
10
+ module Roma
11
+ module Watch
12
+ module Message
13
+ ERROR_NODE_DOWN = 'A node down'
14
+ ERROR_SPLIT_BRAIN = 'Split brain'
15
+ COMMAND_NODELIST = 'nodelist'
16
+ COMMAND_QUIT = 'quit'
17
+ end
18
+
19
+ class Mailer
20
+ MAILER = '/usr/lib/sendmail'
21
+
22
+ attr :from
23
+ attr :to
24
+ attr :mailer
25
+
26
+ def initialize(from, to, mailer = nil)
27
+ @from = from
28
+ @to = to
29
+ @mailer = mailer
30
+ @mailer ||= MAILER
31
+ end
32
+
33
+ def send_mail(sub, msg)
34
+ open("| #{@mailer} -f #{@from} -t", 'w') do |f|
35
+ f.puts "From: #{@from}"
36
+ f.puts "To: #{@to}"
37
+ #f.puts "Subject: #{sub.tojis}"
38
+ f.puts "Subject: #{sub}"
39
+ f.puts "Reply-To: #{@from}"
40
+ f.puts
41
+ f.puts msg.tojis
42
+ 2.times{ f.puts }
43
+ f.puts "."
44
+ end
45
+ end
46
+ end # Mailer
47
+
48
+ class Main
49
+ attr :conf
50
+ attr :log
51
+ attr :nodelist_inf
52
+ attr :errors
53
+ attr :mailer
54
+
55
+ def initialize config
56
+ @conf = config
57
+ @log = Logger.new @conf['log']['path'], @conf['log']['rotate']
58
+ @nodelist_inf = {}
59
+ @errors = {}
60
+ @subject_prefix = @conf['mail']['subject_prefix']
61
+ @mailer = Mailer.new @conf['mail']['from'], @conf['mail']['to'], @conf['mail']['mailer']
62
+ end
63
+
64
+ def watch
65
+ @log.info "start watching a ROMA"
66
+ watch_nodes
67
+ @log.info "end watching"
68
+ @log.info "start checking a ROMA"
69
+ check_nodes
70
+ @log.info "end checking"
71
+ end
72
+
73
+ def watch_nodes
74
+ @conf['roma'].each { |node|
75
+ nodes = watch_node node
76
+ @nodelist_inf[node] = nodes if nodes
77
+ }
78
+ end
79
+
80
+ def watch_node node
81
+ @log.debug "start watching a node: #{node}"
82
+ host, port = node.split(':')
83
+ sock = nil
84
+ begin
85
+ timeout(@conf['timeout'].to_i) {
86
+ line = nil
87
+ TCPSocket.open(host, port) do |sock|
88
+ sock.puts Message::COMMAND_NODELIST
89
+ line = sock.gets.chomp!
90
+ sock.puts Message::COMMAND_QUIT
91
+ end
92
+ @log.debug "end watching a node: #{node}"
93
+ line.split(' ')
94
+ }
95
+ rescue Exception => e
96
+ emsg = "Catch an error when checking a node #{node}: #{e.to_s}"
97
+ @log.error emsg
98
+ if (cnt ||= 0; cnt += 1) < @conf['retry']['count'].to_i
99
+ @log.info "retry: #{cnt} times"
100
+ sleep @conf['retry']['period'].to_i
101
+ retry
102
+ end
103
+ @errors[node] = emsg
104
+ nil
105
+ end
106
+ end
107
+
108
+ def check_nodes
109
+ check_vital
110
+ check_splitbrain
111
+ end
112
+
113
+ def check_vital
114
+ @log.debug "start checking the vital"
115
+ @errors.each { |node, emsg|
116
+ @mailer.send_mail(@subject_prefix + Message::ERROR_NODE_DOWN, emsg)
117
+ }
118
+ @log.debug "end checking the vital"
119
+ end
120
+
121
+ def check_splitbrain
122
+ @log.debug "start checking a splitbrain"
123
+ all_ring = []
124
+ @nodelist_inf.each { |node, ring|
125
+ all_ring << ring unless all_ring.include? ring
126
+ }
127
+
128
+ if all_ring.size != 1
129
+ emsg = ""
130
+ all_ring.each { |ring|
131
+ emsg += "#{ring.join(',')}\r\n"
132
+ }
133
+ @mailer.send_mail(@subject_prefix + Message::ERROR_SPLIT_BRAIN, emsg)
134
+ end
135
+ @log.debug "end checking a splitbrain"
136
+ end
137
+ end
138
+ end # Watch
139
+ end # Roma
140
+
141
+ def usage
142
+ puts File.basename(__FILE__) + " config.yml"
143
+ end
144
+
145
+ if 1 == ARGV.length
146
+ config = YAML.load_file(ARGV[0])
147
+ Roma::Watch::Main.new(config).watch
148
+ else
149
+ usage
150
+ end