roma 0.8.12 → 0.8.13p1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (44) hide show
  1. data/CHANGELOG +34 -1
  2. data/Gemfile +17 -0
  3. data/Rakefile +2 -3
  4. data/ruby/server/lib/roma/async_process.rb +158 -20
  5. data/ruby/server/lib/roma/command/bg_command_receiver.rb +3 -4
  6. data/ruby/server/lib/roma/command/rt_command_receiver.rb +133 -5
  7. data/ruby/server/lib/roma/command/sys_command_receiver.rb +353 -0
  8. data/ruby/server/lib/roma/command/vn_command_receiver.rb +9 -3
  9. data/ruby/server/lib/roma/config.rb +6 -0
  10. data/ruby/server/lib/roma/dns_cache.rb +40 -0
  11. data/ruby/server/lib/roma/event/con_pool.rb +3 -1
  12. data/ruby/server/lib/roma/event/handler.rb +18 -3
  13. data/ruby/server/lib/roma/messaging/con_pool.rb +3 -1
  14. data/ruby/server/lib/roma/romad.rb +19 -3
  15. data/ruby/server/lib/roma/routing/cb_rttable.rb +22 -1
  16. data/ruby/server/lib/roma/routing/random_balancer.rb +76 -0
  17. data/ruby/server/lib/roma/routing/routing_data.rb +16 -2
  18. data/ruby/server/lib/roma/routing/rttable.rb +55 -8
  19. data/ruby/server/lib/roma/stats.rb +32 -0
  20. data/ruby/server/lib/roma/storage/sqlite3_storage.rb +9 -3
  21. data/ruby/server/lib/roma/tools/mkconfig.rb +135 -73
  22. data/ruby/server/lib/roma/tools/mkrecent.rb +3 -4
  23. data/ruby/server/lib/roma/tools/mkroute.rb +6 -7
  24. data/ruby/server/lib/roma/tools/multi_commander.rb +3 -4
  25. data/ruby/server/lib/roma/tools/recoverlost.rb +0 -1
  26. data/ruby/server/lib/roma/tools/recoverlost_alist.rb +0 -1
  27. data/ruby/server/lib/roma/tools/recoverlost_lib.rb +10 -11
  28. data/ruby/server/lib/roma/tools/roma_watcher.rb +0 -1
  29. data/ruby/server/lib/roma/tools/sample_watcher.rb +3 -4
  30. data/ruby/server/lib/roma/tools/sample_watcher2.rb +3 -4
  31. data/ruby/server/lib/roma/tools/sample_watcher3.rb +0 -1
  32. data/ruby/server/lib/roma/tools/simple_bench.rb +3 -3
  33. data/ruby/server/lib/roma/tools/simple_bench2.rb +1 -2
  34. data/ruby/server/lib/roma/tools/ssroute.rb +0 -1
  35. data/ruby/server/lib/roma/tools/tribunus.rb +5 -6
  36. data/ruby/server/lib/roma/version.rb +1 -1
  37. data/ruby/server/lib/roma/write_behind.rb +4 -1
  38. data/ruby/server/test/rcirb.rb +0 -1
  39. data/ruby/server/test/t_cpdata.rb +8 -9
  40. data/ruby/server/test/t_rclient.rb +1 -2
  41. data/ruby/server/test/t_routing_data.rb +13 -14
  42. data/ruby/server/test/t_storage.rb +1 -1
  43. data/ruby/server/test/t_writebehind.rb +29 -30
  44. metadata +25 -24
@@ -14,7 +14,11 @@ module Roma
14
14
  attr :trans # transaction
15
15
  attr :leave_proc
16
16
  attr :lost_proc
17
+ attr :recover_proc
17
18
  attr_accessor :lost_action
19
+ attr_accessor :auto_recover
20
+ attr_accessor :auto_recover_status
21
+ attr_accessor :auto_recover_time
18
22
  attr_reader :version_of_nodes
19
23
  attr_reader :min_version
20
24
 
@@ -25,7 +29,11 @@ module Roma
25
29
  @fname=fname
26
30
  @leave_proc=nil
27
31
  @lost_proc=nil
32
+ @recover_proc=nil
28
33
  @lost_action=:no_action
34
+ @auto_recover=false
35
+ @auto_recover_status="waiting"
36
+ @auto_recover_time=1800
29
37
  @enabled_failover=false
30
38
  @lock = Mutex.new
31
39
  @version_of_nodes = Hash.new(0)
@@ -36,6 +44,9 @@ module Roma
36
44
  def get_stat(ap)
37
45
  ret = super(ap)
38
46
  ret['routing.lost_action'] = @lost_action.to_s
47
+ ret['routing.auto_recover'] = @auto_recover.to_s
48
+ ret['routing.auto_recover_status'] = @auto_recover_status.to_s
49
+ ret['routing.auto_recover_time'] = @auto_recover_time
39
50
  ret['routing.version_of_nodes'] = @version_of_nodes.inspect
40
51
  ret['routing.min_version'] = @min_version
41
52
  ret
@@ -60,6 +71,10 @@ module Roma
60
71
  @lost_proc=block
61
72
  end
62
73
 
74
+ def set_recover_proc(&block)
75
+ @recover_proc=block
76
+ end
77
+
63
78
  def open_log
64
79
  log_list=@rd.get_file_list(@fname)
65
80
  if log_list.length==0
@@ -217,6 +232,7 @@ module Roma
217
232
  write_log("leave #{nid}")
218
233
 
219
234
  lost_vnodes=[]
235
+ short_vnodes=[]
220
236
  @lock.synchronize {
221
237
  @rd.v_idx.each_pair{ |vn, nids|
222
238
  buf = nids.clone
@@ -225,7 +241,9 @@ module Roma
225
241
  if buf.length == 0
226
242
  lost_vnodes << vn
227
243
  @log.error("Vnode data is lost.(Vnode=#{vn})")
228
- end
244
+ elsif buf.length < @rd.rn
245
+ short_vnodes << vn
246
+ end
229
247
  end
230
248
  }
231
249
  }
@@ -236,6 +254,9 @@ module Roma
236
254
  set_route_and_inc_clk_inside_sync( vn, next_alive_vnode(vn) )
237
255
  }
238
256
  end
257
+ elsif short_vnodes.length > 0
258
+ @log.error("Short vnodes exist.")
259
+ @recover_proc.call('start_auto_recover_process') if @recover_proc
239
260
  end
240
261
  @fail_cnt.delete(nid)
241
262
  end
@@ -0,0 +1,76 @@
1
+ module Roma
2
+ module Routing
3
+ module RandomBalancer
4
+
5
+ # The randomly selected +from+'s vertual-node changes to +to+.
6
+ # +idx+:: As for 0 is primary, 1 or more are secondary.
7
+ def randomly_change_nid!(idx, from, to, repethost = false)
8
+ vns = []
9
+ v_idx.each_pair do |vn, nids|
10
+ cnt = 0
11
+ nids.each_with_index do |nid, i|
12
+ if idx == i
13
+ cnt += 1 if nid == from
14
+ else
15
+ if repethost == true
16
+ cnt += 1 if nid != to
17
+ else
18
+ cnt += 1 if nid.split('_')[0] != to.split('_')[0]
19
+ end
20
+ end
21
+ end
22
+ vns << vn if cnt == nids.length
23
+ end
24
+ return nil if vns.length == 0
25
+ vn = vns[rand(vns.length)]
26
+ #puts "#{vn} #{v_idx[vn]}"
27
+ v_idx[vn][idx] = to
28
+ #puts "#{vn} #{v_idx[vn]}"
29
+ vn
30
+ end
31
+
32
+ # Returns min/max values and correspondent node-id of the histgram.
33
+ # +idx+:: As for 0 is primary, 1 or more are secondary.
34
+ def get_min_max_histgram(idx)
35
+ h = get_histgram
36
+ min_nid = max_nid = nil
37
+ min = v_idx.length
38
+ max = 0
39
+ h.each do |nid, v|
40
+ if v[idx] < min
41
+ min = v[idx]
42
+ min_nid = nid
43
+ end
44
+ if v[idx] > max
45
+ max = v[idx]
46
+ max_nid = nid
47
+ end
48
+ end
49
+ [min, min_nid, max, max_nid]
50
+ end
51
+
52
+ # Returns a replacement list for balanced routing.
53
+ def get_balanced_vn_replacement_list(repethost = false)
54
+ rd = clone
55
+ ret = []
56
+ @rn.times do |idx| # primary, secondary1, ...
57
+ loop do # until balanced
58
+ min, min_nid, max, max_nid = rd.get_min_max_histgram(idx)
59
+ break if max - min < 2 || min_nid == max_nid
60
+ vn = rd.randomly_change_nid!(idx, max_nid, min_nid, repethost)
61
+ return nil unless vn # error
62
+ ret << {:vn=>vn, :idx=>idx, :from=>max_nid, :to=>min_nid}
63
+ end
64
+ end
65
+ ret
66
+ end
67
+
68
+ def balance!(vn_replacement_list, repethost = false)
69
+ vn_replacement_list.each do |rep|
70
+ v_idx[rep[:vn]][rep[:idx]] = rep[:to]
71
+ end
72
+ end
73
+
74
+ end # module RandomBalancer
75
+ end
76
+ end
@@ -1,9 +1,13 @@
1
1
  require 'yaml'
2
+ require 'roma/routing/random_balancer'
2
3
 
3
4
  module Roma
4
5
  module Routing
5
6
 
6
7
  class RoutingData
8
+
9
+ include Routing::RandomBalancer
10
+
7
11
  attr_accessor :dgst_bits
8
12
  attr_accessor :div_bits
9
13
  attr_accessor :rn
@@ -88,6 +92,11 @@ module Roma
88
92
  rd
89
93
  end
90
94
 
95
+ # for deep copy
96
+ def clone
97
+ Marshal.load(Marshal.dump(self))
98
+ end
99
+
91
100
  # 2 bytes('RT'):magic code
92
101
  # unsigned short:format version
93
102
  # unsigned char:dgst_bits
@@ -210,11 +219,16 @@ module Roma
210
219
 
211
220
  rnlm=RandomNodeListMaker.new(nodes,repethost)
212
221
 
213
- (2**div_bits).times{|i|
222
+ (2**div_bits).times do |i|
214
223
  vn=i<<(dgst_bits-div_bits)
215
224
  ret.v_clk[vn]=0
216
225
  ret.v_idx[vn]=rnlm.list(rn)
217
- }
226
+ end
227
+
228
+ # vnode balanceing process
229
+ rlist = ret.get_balanced_vn_replacement_list(repethost)
230
+ ret.balance!(rlist, repethost) if rlist
231
+
218
232
  ret
219
233
  end
220
234
 
@@ -1,4 +1,3 @@
1
- # -*- coding: utf-8 -*-
2
1
  require 'roma/logging/rlogger'
3
2
  require 'roma/routing/routing_data'
4
3
  require 'roma/routing/merkle_tree'
@@ -19,6 +18,7 @@ module Roma
19
18
  attr_reader :div_bits
20
19
  attr_accessor :fail_cnt_threshold
21
20
  attr_accessor :fail_cnt_gap
21
+ attr_accessor :sub_nid
22
22
 
23
23
  def initialize(rd)
24
24
  @log = Roma::Logging::RLogger.instance
@@ -31,6 +31,7 @@ module Roma
31
31
  @fail_cnt_threshold = 5
32
32
  @fail_cnt_gap = 0
33
33
  @fail_time = Time.now
34
+ @sub_nid = {}
34
35
  init_mtree
35
36
  end
36
37
 
@@ -46,7 +47,7 @@ module Roma
46
47
  elsif nids.include?(ap)
47
48
  sn += 1
48
49
  end
49
- short += 1 if nids.length < @rd.rn
50
+ short += 1 if nids.length < @rd.rn
50
51
  }
51
52
 
52
53
  ret = {}
@@ -62,6 +63,7 @@ module Roma
62
63
  ret['routing.lost_vnodes'] = lost
63
64
  ret['routing.fail_cnt_threshold'] = @fail_cnt_threshold
64
65
  ret['routing.fail_cnt_gap'] = @fail_cnt_gap
66
+ ret['routing.sub_nid'] = @sub_nid.inspect
65
67
  ret
66
68
  end
67
69
 
@@ -80,12 +82,12 @@ module Roma
80
82
  @rd.v_idx.keys
81
83
  end
82
84
 
83
- # ハッシュ値からvnode idを返す
85
+ # get vnode id from hash value
84
86
  def get_vnode_id(d)
85
87
  d & @search_mask
86
88
  end
87
89
 
88
- # vnode があるノードIDの配列を返す
90
+ # get array of node ID which have vnode
89
91
  # +vn+: vnode id
90
92
  def search_nodes(vn)
91
93
  @rd.v_idx[vn].clone
@@ -93,11 +95,11 @@ module Roma
93
95
  nil
94
96
  end
95
97
 
96
- # 離脱ノードを検索リストから削除する
97
- # +nid+: 離脱ノード
98
+ # delete dropping node from list
99
+ # +nid+: dropping node
98
100
  def leave(nid)
99
101
  @rd.nodes.delete(nid)
100
- # リストから nid を消す
102
+ # delet nid from list
101
103
  @rd.v_idx.each_pair{ |vn, nids|
102
104
  nids.delete_if{ |nid2| nid2 == nid}
103
105
  if nids.length == 0
@@ -141,11 +143,56 @@ module Roma
141
143
  @fail_cnt.delete(nid)
142
144
  end
143
145
 
144
- # v_idx から nodes を再構築する
146
+ # Reconstruct vnodes from v_idx
145
147
  def create_nodes_from_v_idx
146
148
  @rd.create_nodes_from_v_idx
147
149
  end
148
150
 
151
+ # Returns a new RoutingData object which replaced host name by the sub_nid attribute.
152
+ def sub_nid_rd(addr)
153
+ sub_nid.each do |mask, sub|
154
+ if check_netmask?(addr, mask)
155
+ return get_replaced_rd(sub[:regexp], sub[:replace])
156
+ end
157
+ end
158
+ nil
159
+ end
160
+
161
+ private
162
+
163
+ def get_replaced_rd(regxp, replace)
164
+ rd = Marshal.load(dump)
165
+
166
+ rd.nodes.map! do |nid|
167
+ nid.sub(regxp, replace)
168
+ end
169
+
170
+ rd.v_idx.each_value do |nids|
171
+ nids.map! do |nid|
172
+ nid.sub(regxp, replace)
173
+ end
174
+ end
175
+ rd
176
+ end
177
+
178
+ def check_netmask?(addr, mask)
179
+ if addr =~ /(\d+)\.(\d+)\.(\d+)\.(\d+)/
180
+ iaddr = ($1.to_i << 24) + ($2.to_i << 16) + ($3.to_i << 8) + $4.to_i
181
+ else
182
+ @log.error("#{__method__}:Illigal format addr #{addr}")
183
+ return false
184
+ end
185
+
186
+ if mask =~ /(\d+)\.(\d+)\.(\d+)\.(\d+)\/(\d+)/
187
+ imask_addr = ($1.to_i << 24) + ($2.to_i << 16) + ($3.to_i << 8) + $4.to_i
188
+ imask = (2 ** $5.to_i - 1) << (32 - $5.to_i)
189
+ else
190
+ @log.error("#{__method__}:Illigal format mask #{mask}")
191
+ return false
192
+ end
193
+ (iaddr & imask) == (imask_addr & imask)
194
+ end
195
+
149
196
  end # class RoutingTable
150
197
 
151
198
  end # module Routing
@@ -56,6 +56,21 @@ module Roma
56
56
  # for write behind
57
57
  attr_accessor :wb_command_map
58
58
 
59
+ # for latency average check
60
+ attr_accessor :latency_log
61
+ attr_accessor :latency_check_cmd
62
+ attr_accessor :latency_check_time_count
63
+ attr_accessor :latency_data
64
+ #attr_accessor :latency_denominator
65
+
66
+ # for vnode copy parameter
67
+ attr_accessor :spushv_klength_warn
68
+ attr_accessor :spushv_vlength_warn
69
+ attr_accessor :spushv_read_timeout
70
+ attr_accessor :reqpushv_timeout_count
71
+
72
+ attr_accessor :routing_trans_timeout
73
+
59
74
  def initialize
60
75
  @config_path = nil
61
76
  @run_recover = false
@@ -81,6 +96,15 @@ module Roma
81
96
  @size_of_zredundant = 0
82
97
  @hilatency_warn_time = 5
83
98
  @wb_command_map = {}
99
+ @latency_log = false
100
+ @latency_check_cmd =["get", "set", "delete"]
101
+ @latency_check_time_count = nil
102
+ @latency_data = Hash.new { |hash,key| hash[key] = {} } #double hash
103
+ @spushv_klength_warn = 1024 # 1kB
104
+ @spushv_vlength_warn = 1024 * 1024 # 1MB
105
+ @spushv_read_timeout = 100
106
+ @reqpushv_timeout_count = 300 # 0.1 * 300 sec
107
+ @routing_trans_timeout = 3600 * 3 # 3hr
84
108
  end
85
109
 
86
110
  def ap_str
@@ -118,6 +142,14 @@ module Roma
118
142
  ret['stats.redundant_count'] = @redundant_count
119
143
  ret['stats.hilatency_warn_time'] = @hilatency_warn_time
120
144
  ret['stats.wb_command_map'] = @wb_command_map.inspect
145
+ ret['stats.latency_log'] = @latency_log
146
+ ret['stats.latency_check_cmd'] = @latency_check_cmd
147
+ ret['stats.latency_check_time_count'] = @latency_check_time_count
148
+ ret['stats.spushv_klength_warn'] = @spushv_klength_warn
149
+ ret['stats.spushv_vlength_warn'] = @spushv_vlength_warn
150
+ ret['stats.spushv_read_timeout'] = @spushv_read_timeout
151
+ ret['stats.reqpushv_timeout_count'] = @reqpushv_timeout_count
152
+ ret['stats.routing_trans_timeout'] = @routing_trans_timeout
121
153
  ret
122
154
  end
123
155
 
@@ -6,7 +6,9 @@ module Roma
6
6
 
7
7
  module SQLite3_Ext
8
8
  def put(k,v)
9
- k = k.encode("ascii-8bit") if RUBY_VERSION >= "1.9.3"
9
+ if RUBY_VERSION >= "1.9.1"
10
+ k = k.encode("ascii-8bit") if k.encoding != Encoding::ASCII_8BIT
11
+ end
10
12
  if self.execute("select count(*) from t_roma where key=?",k)[0][0].to_i==0
11
13
  self.execute("insert into t_roma values (?,?)",k,SQLite3::Blob.new(v))
12
14
  else
@@ -15,14 +17,18 @@ module Roma
15
17
  end
16
18
 
17
19
  def get(k)
18
- k = k.encode("ascii-8bit") if RUBY_VERSION >= "1.9.3"
20
+ if RUBY_VERSION >= "1.9.1"
21
+ k = k.encode("ascii-8bit") if k.encoding != Encoding::ASCII_8BIT
22
+ end
19
23
  r = self.execute("select * from t_roma where key=?",k)
20
24
  return nil if r.length==0
21
25
  r[0][1]
22
26
  end
23
27
 
24
28
  def out(k)
25
- k = k.encode("ascii-8bit") if RUBY_VERSION >= "1.9.3"
29
+ if RUBY_VERSION >= "1.9.1"
30
+ k = k.encode("ascii-8bit") if k.encoding != Encoding::ASCII_8BIT
31
+ end
26
32
  return nil if get(k) == nil
27
33
  self.execute("delete from t_roma where key=?",k)
28
34
  end