rbbt-util 5.13.36 → 5.13.37

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 48414e48036dde6708ce828f47fa10e1169cf26c
4
- data.tar.gz: 818bb5d1cf90da015d716050c9283821435eecbf
3
+ metadata.gz: 26df367cce98f5b14b985bd311d4b6594dcb62a9
4
+ data.tar.gz: 1931a495ac7eb8eb17dcfa0f154be8c9d61e8b7f
5
5
  SHA512:
6
- metadata.gz: a35a5de8c4870b10e86353030787fb261b7086eac2553e79b86609352d88d69d09f65246cbd07e43f65d1a9bf863c0f00e070c9f380b4f2ae1d541b6f89f61a9
7
- data.tar.gz: e649af70a02eb33d3a3255766bf495197baa48d0274fa3a6b8e723a43daa4f8dc497e7aa781291ad83d75b6a50fa4f2913a1530bb9924147ecd670c4ce672fc5
6
+ metadata.gz: e71fee09c427fdf3faf326b75897dc6d070ec605bc94b3785d8cacf88f2bd3fa1fdd8d017e5e06d5e48cff48960cb8e7debb337b3a4eeb3361d2fd3735b2b1f3
7
+ data.tar.gz: 14a1b01824edd6bfa2a6ead19828454ea57b1c3846eb1f332a0b15faee9bf5019fdda715732ef57b1326ce3870d8c2f8d8c6c9f2faf6e70218ceea76f373436d
@@ -1,6 +1,6 @@
1
1
  class FixWidthTable
2
2
 
3
- attr_accessor :filename, :file, :value_size, :record_size, :range, :size
3
+ attr_accessor :filename, :file, :value_size, :record_size, :range, :size, :mask
4
4
  def initialize(filename, value_size = nil, range = nil, update = false, in_memory = true)
5
5
  @filename = filename
6
6
 
@@ -8,13 +8,14 @@ class FixWidthTable
8
8
  Log.debug "FixWidthTable create: #{ filename }"
9
9
  @value_size = value_size
10
10
  @range = range
11
- @record_size = @value_size + (@range ? 12 : 4)
11
+ @record_size = @value_size + (@range ? 16 : 8)
12
12
 
13
13
  if %w(memory stringio).include? filename.to_s.downcase
14
14
  @filename = :memory
15
15
  @file = StringIO.new
16
16
  else
17
17
  FileUtils.rm @filename if File.exists? @filename
18
+ FileUtils.mkdir_p File.dirname(@filename) unless File.exists? @filename
18
19
  @file = File.open(@filename, 'wb')
19
20
  end
20
21
 
@@ -24,17 +25,26 @@ class FixWidthTable
24
25
  else
25
26
  Log.debug "FixWidthTable up-to-date: #{ filename }"
26
27
  if in_memory
27
- @file = StringIO.new(Open.read(@filename, :mode => 'rb'), 'r')
28
+ @file = StringIO.new(Open.read(@filename, :mode => 'r:ASCII-8BIT'), 'r')
28
29
  else
29
- @file = File.open(@filename, 'r')
30
+ @file = File.open(@filename, 'r:ASCII-8BIT')
30
31
  end
31
32
  @value_size = @file.read(4).unpack("L").first
32
33
  @range = @file.read(1).unpack("C").first == 1
33
34
  @record_size = @value_size + (@range ? 12 : 4)
34
35
  @size = (File.size(@filename) - 5) / (@record_size)
35
36
  end
37
+
38
+ @mask = "a#{value_size}"
39
+ end
40
+
41
+ def persistence_path
42
+ @filename
36
43
  end
37
44
 
45
+ def persistence_path=(value)
46
+ @filename=value
47
+ end
38
48
 
39
49
  CONNECTIONS = {} unless defined? CONNECTIONS
40
50
  def self.get(filename, value_size = nil, range = nil, update = false)
@@ -50,28 +60,18 @@ class FixWidthTable
50
60
  def format(pos, value)
51
61
  padding = value_size - value.length
52
62
  if range
53
- (pos + [value + ("\0" * padding)]).pack("llla#{value_size}")
63
+ (pos + [padding, value + ("\0" * padding)]).pack("llll#{mask}")
54
64
  else
55
- [pos, value + ("\0" * padding)].pack("la#{value_size}")
56
- end
57
- end
58
-
59
- def unformat(format)
60
- if range
61
- pos_start, pos_end, pos_overlap, value = format.unpack("llla#{value_size}")
62
- [[pos_start, pos_end, pos_overlap], value.strip]
63
- else
64
- pos, value = format.unpack("la#{value_size}")
65
- [pos, value.strip]
65
+ [pos, padding, value + ("\0" * padding)].pack("ll#{mask}")
66
66
  end
67
67
  end
68
68
 
69
69
  def add(pos, value)
70
70
  format = format(pos, value)
71
71
  @file.write format
72
+
72
73
  @size += 1
73
74
  end
74
- alias << add
75
75
 
76
76
  def last_pos
77
77
  pos(size - 1)
@@ -98,13 +98,15 @@ class FixWidthTable
98
98
  def value(index)
99
99
  return nil if index < 0 or index >= size
100
100
  @file.seek((range ? 17 : 9 ) + (record_size) * index, IO::SEEK_SET)
101
- @file.read(value_size).unpack("a#{value_size}").first.strip
101
+ padding = @file.read(4).unpack("l").first+1
102
+ txt = @file.read(value_size)
103
+ txt.unpack(mask).first[0..-padding]
102
104
  end
103
105
 
104
- def read
106
+ def read(force = false)
105
107
  return if @filename == :memory
106
108
  @file.close unless @file.closed?
107
- @file = File.open(@filename, 'r')
109
+ @file = File.open(filename, 'r:ASCII-8BIT')
108
110
  end
109
111
 
110
112
  def close
@@ -125,17 +127,20 @@ class FixWidthTable
125
127
  end
126
128
  end
127
129
 
130
+ def add_range_point(pos, value)
131
+ @latest ||= []
132
+ while @latest.any? and @latest[0] < pos[0]
133
+ @latest.shift
134
+ end
135
+ overlap = @latest.length
136
+ add pos + [overlap], value
137
+ @latest << pos[1]
138
+ end
139
+
128
140
  def add_range(data)
129
- latest = []
141
+ @latest = []
130
142
  data.sort_by{|value, pos| pos[0] }.each do |value, pos|
131
- while latest.any? and latest[0] < pos[0]
132
- latest.shift
133
- end
134
-
135
- overlap = latest.length
136
-
137
- add pos + [overlap], value
138
- latest << pos[1]
143
+ add_range_point(pos, value)
139
144
  end
140
145
  end
141
146
 
@@ -169,12 +174,14 @@ class FixWidthTable
169
174
  end
170
175
 
171
176
  def get_range(pos)
172
- if Range === pos
177
+ case pos
178
+ when Range
173
179
  r_start = pos.begin
174
180
  r_end = pos.end
181
+ when Array
182
+ r_start, r_end = pos
175
183
  else
176
- r_start = pos.to_i
177
- r_end = pos.to_i
184
+ r_start, r_end = pos, pos
178
185
  end
179
186
 
180
187
  idx = closest(r_start)
@@ -243,6 +250,7 @@ class FixWidthTable
243
250
  end
244
251
  end
245
252
 
253
+
246
254
  def values_at(*list)
247
255
  list.collect{|pos|
248
256
  self[pos]
data/lib/rbbt/monitor.rb CHANGED
@@ -3,7 +3,8 @@ require 'rbbt'
3
3
  module Rbbt
4
4
 
5
5
  LOCK_DIRS = Rbbt.share.find_all + Rbbt.var.cache.persistence.find_all + Rbbt.var.jobs.find_all +
6
- Rbbt.tmp.tsv_open_locks.find_all + Rbbt.tmp.persist_locks.find_all + Rbbt.tmp.sensiblewrite_lock_dir.find_all
6
+ Rbbt.tmp.tsv_open_locks.find_all + Rbbt.tmp.persist_locks.find_all + Rbbt.tmp.sensiblewrite_lock_dir.find_all +
7
+ Rbbt.tmp.produce_locks.find_all
7
8
 
8
9
  SENSIBLE_WRITE_DIRS = Misc.sensiblewrite_dir.find_all
9
10
 
@@ -0,0 +1,79 @@
1
+ class PackedIndex
2
+ attr_accessor :file, :mask, :mask_length, :offset, :item_size, :stream, :nil_string
3
+
4
+ ELEMS = {
5
+ "I" => ["q", 8],
6
+ "i" => ["l", 4],
7
+ "f" => ["f", 4],
8
+ "F" => ["D", 8],
9
+ }
10
+
11
+ def self.process_mask(mask)
12
+ str = ""
13
+ size = 0
14
+ mask.each do |e|
15
+ if ELEMS.include? e
16
+ str << ELEMS[e][0]
17
+ size += ELEMS[e][1]
18
+ elsif e =~ /^(\d+)s$/
19
+ num = $1.to_i
20
+ str << "a" << num.to_s
21
+ size += num
22
+ else
23
+ e, num = e.split(":")
24
+ str << e
25
+ size = (num.nil? ? size + 1 : size + num.to_i)
26
+ end
27
+ end
28
+ [str, size]
29
+ end
30
+
31
+ def initialize(file, write = false, pattern = nil)
32
+ @file = file
33
+ if write
34
+ @stream = Open.open(file, :mode => 'wb')
35
+ @mask, @item_size = PackedIndex.process_mask pattern
36
+ header = [@mask.length, @item_size].pack("ll")
37
+ @stream.write(header)
38
+ @stream.write(mask)
39
+ @offset = @mask.length + 8
40
+ else
41
+ @stream = Open.open(file, :mode => 'rb')
42
+ header = @stream.read(8)
43
+ mask_length, @item_size = header.unpack("ll")
44
+ @mask = @stream.read(mask_length)
45
+ @offset = @mask.length + 8
46
+ end
47
+ @nil_string = "[NIL]" + "-" * (@item_size - 5)
48
+ end
49
+
50
+ def read
51
+ close
52
+ @stream = Open.open(file, :mode => 'rb')
53
+ end
54
+
55
+ def <<(payload)
56
+ if payload.nil?
57
+ @stream.write nil_string
58
+ else
59
+ @stream.write payload.pack(mask)
60
+ end
61
+ end
62
+
63
+ def [](position)
64
+ @stream.seek(position * item_size + offset)
65
+ encoded = @stream.read(item_size)
66
+ return nil if encoded == nil_string
67
+ encoded.unpack mask
68
+ end
69
+
70
+ def values_at(*positions)
71
+ positions.collect{|p|
72
+ self[p]
73
+ }
74
+ end
75
+
76
+ def close
77
+ stream.close unless stream.closed?
78
+ end
79
+ end
data/lib/rbbt/persist.rb CHANGED
@@ -204,7 +204,7 @@ module Persist
204
204
  ConcurrentStream.setup(out, :threads => saver_thread, :filename => path)
205
205
  out.callback = callback
206
206
  out.abort_callback = abort_callback
207
- out.lockfile = stream.lockfile
207
+ out.lockfile = stream.lockfile if stream.respond_to? :lockfile
208
208
  out
209
209
  end
210
210
 
@@ -1,5 +1,7 @@
1
1
  require 'rbbt/persist/tsv/adapter'
2
2
 
3
+ require 'rbbt/persist/tsv/fix_width_table'
4
+
3
5
  begin
4
6
  require 'rbbt/persist/tsv/tokyocabinet'
5
7
  rescue Exception
@@ -46,7 +48,7 @@ module Persist
46
48
  end || source.object_id.to_s
47
49
  end
48
50
 
49
- def self.open_database(path, write, serializer = nil, type = "HDB")
51
+ def self.open_database(path, write, serializer = nil, type = "HDB", options = {})
50
52
  case type
51
53
  when "LevelDB"
52
54
  Persist.open_leveldb(path, write, serializer)
@@ -56,6 +58,13 @@ module Persist
56
58
  Persist.open_lmdb(path, write, serializer)
57
59
  when 'kch', 'kct'
58
60
  Persist.open_kyotocabinet(path, write, serializer, type)
61
+ when 'fwt'
62
+ value_size, range, update, in_memory, pos_function = Misc.process_options options.dup, :value_size, :range, :update, :in_memory, :pos_function
63
+ if pos_function
64
+ Persist.open_fwt(path, value_size, range, serializer, update, in_memory, &pos_function)
65
+ else
66
+ Persist.open_fwt(path, value_size, range, serializer, update, in_memory)
67
+ end
59
68
  else
60
69
  Persist.open_tokyocabinet(path, write, serializer, type)
61
70
  end
@@ -86,9 +95,9 @@ module Persist
86
95
  if is_persisted?(path) and not persist_options[:update]
87
96
  Log.debug "TSV persistence up-to-date: #{ path }"
88
97
  if persist_options[:shard_function]
89
- return open_sharder(path, false, nil, persist_options[:engine], &persist_options[:shard_function])
98
+ return open_sharder(path, false, nil, persist_options[:engine], persist_options, &persist_options[:shard_function])
90
99
  else
91
- return open_database(path, false, nil, persist_options[:engine] || TokyoCabinet::HDB)
100
+ return open_database(path, false, nil, persist_options[:engine] || TokyoCabinet::HDB, persist_options)
92
101
  end
93
102
  end
94
103
 
@@ -98,9 +107,9 @@ module Persist
98
107
  Log.debug "TSV persistence (suddenly) up-to-date: #{ path }"
99
108
 
100
109
  if persist_options[:shard_function]
101
- return open_sharder(path, false, nil, persist_options[:engine], &persist_options[:shard_function])
110
+ return open_sharder(path, false, nil, persist_options[:engine], persist_options, &persist_options[:shard_function])
102
111
  else
103
- return open_database(path, false, nil, persist_options[:engine] || TokyoCabinet::HDB)
112
+ return open_database(path, false, nil, persist_options[:engine] || TokyoCabinet::HDB, persist_options)
104
113
  end
105
114
  end
106
115
 
@@ -111,9 +120,9 @@ module Persist
111
120
  tmp_path = path + '.persist'
112
121
 
113
122
  data = if persist_options[:shard_function]
114
- open_sharder(tmp_path, true, persist_options[:serializer], persist_options[:engine], &persist_options[:shard_function])
123
+ open_sharder(tmp_path, true, persist_options[:serializer], persist_options[:engine], persist_options, &persist_options[:shard_function])
115
124
  else
116
- open_database(tmp_path, true, persist_options[:serializer], persist_options[:engine] || TokyoCabinet::HDB)
125
+ open_database(tmp_path, true, persist_options[:serializer], persist_options[:engine] || TokyoCabinet::HDB, persist_options)
117
126
  end
118
127
 
119
128
  if TSV === data and data.serializer.nil?
@@ -127,6 +136,7 @@ module Persist
127
136
  FileUtils.mv data.persistence_path, path if File.exists? data.persistence_path and not File.exists? path
128
137
  tsv = CONNECTIONS[path] = CONNECTIONS.delete tmp_path
129
138
  tsv.persistence_path = path
139
+
130
140
  tsv.fix_io if tsv.respond_to? :fix_io
131
141
 
132
142
  data
@@ -0,0 +1,110 @@
1
+ require 'rbbt/fix_width_table'
2
+
3
+ module Persist
4
+
5
+ module FWTAdapter
6
+ include Persist::TSVAdapter
7
+
8
+ attr_accessor :pos_function
9
+
10
+ def self.open(path, value_size, range = false, update = false, in_memory = false, &pos_function)
11
+ db = CONNECTIONS[path] ||= FixWidthTable.new(path, value_size, range, update, in_memory)
12
+ db.extend Persist::FWTAdapter
13
+ db.persistence_path = path
14
+ db.pos_function = pos_function
15
+ db
16
+ end
17
+
18
+ def persistence_path=(value)
19
+ @persistence_path = value
20
+ @filename = value
21
+ end
22
+
23
+ def metadata_file
24
+ @metadata_file ||= self.persistence_path + '.metadata'
25
+ end
26
+
27
+ def metadata
28
+ return {} unless File.exists? metadata_file
29
+ Open.open(metadata_file, :mode => "rb") do |f|
30
+ Marshal.load(f)
31
+ end
32
+ end
33
+
34
+ def set_metadata(k,v)
35
+ metadata = self.metadata
36
+ metadata[k] = v
37
+ Misc.sensiblewrite(metadata_file, Marshal.dump(metadata))
38
+ end
39
+
40
+ def [](key)
41
+ if TSV::ENTRY_KEYS.include? key
42
+ metadata[key]
43
+ else
44
+ key = pos_function.call(key) if pos_function
45
+ res = super(key)
46
+ res.extend MultipleResult
47
+ res
48
+ end
49
+ end
50
+
51
+ def []=(key, value)
52
+ if TSV::ENTRY_KEYS.include? key
53
+ set_metadata(key, value)
54
+ else
55
+ if range
56
+ add_range_point key, value
57
+ else
58
+ add key, value
59
+ end
60
+ end
61
+ end
62
+
63
+ def add(key, value)
64
+ key = pos_function.call(key) if pos_function and not (range and Array === key)
65
+ super(key, value)
66
+ end
67
+
68
+ def add_range_point(key, value)
69
+ key = pos_function.call(key) if pos_function
70
+ super(key, value)
71
+ end
72
+
73
+ def <<(key, value)
74
+ self.send(:[]=, *i)
75
+ end
76
+
77
+ def include?(i)
78
+ return true if Fixnum === i and i < pos(@size)
79
+ return true if metadata.include? i
80
+ false
81
+ end
82
+
83
+ def size
84
+ @size #+ metadata.keys.length
85
+ end
86
+
87
+ def each
88
+ @size.times do |i|
89
+ yield i, value(i)
90
+ end
91
+ end
92
+
93
+ def keys
94
+ []
95
+ end
96
+ end
97
+
98
+ def self.open_fwt(path, value_size, range = false, serializer = nil, update = false, in_memory = false, &pos_function)
99
+ FileUtils.mkdir_p File.dirname(path) unless File.exists?(File.dirname(path))
100
+
101
+ database = Persist::FWTAdapter.open(path, value_size, range, update, in_memory, &pos_function)
102
+
103
+ unless serializer == :clean
104
+ TSV.setup database
105
+ database.serializer = serializer || database.serializer
106
+ end
107
+
108
+ database
109
+ end
110
+ end
@@ -1,21 +1,21 @@
1
1
  module Persist
2
2
  module SharderAdapter
3
- def self.open(path, write, type=nil, &block)
3
+ def self.open(path, write, type=nil, options = {}, &block)
4
4
 
5
- database = CONNECTIONS[path] ||= Sharder.new(path, write, type, &block)
5
+ database = CONNECTIONS[path] ||= Sharder.new(path, write, type, options, &block)
6
6
 
7
7
  database.extend Persist::SharderAdapter unless Persist::SharderAdapter === database
8
8
 
9
9
  database
10
10
  end
11
-
12
11
  end
13
12
 
14
13
  class Sharder
15
- attr_accessor :persistence_path, :shard_function, :databases, :closed, :writable, :mutex, :db_type
14
+ attr_accessor :persistence_path, :shard_function, :databases, :closed, :writable, :mutex, :db_type, :options
16
15
 
17
- def initialize(persistence_path, write = false, db_type=nil, &block)
16
+ def initialize(persistence_path, write = false, db_type=nil, options = {}, &block)
18
17
  @shard_function = block
18
+ @options = options
19
19
  @persistence_path = Path.setup(persistence_path)
20
20
  @mutex = Mutex.new
21
21
  @writable = write
@@ -32,6 +32,7 @@ module Persist
32
32
 
33
33
  def persistence_path=(path)
34
34
  @persistence_path = path
35
+ databases.values.each{|db| db.persistence_path = File.join(path, File.basename(db.persistence_path))}
35
36
  end
36
37
 
37
38
  def databases
@@ -39,21 +40,32 @@ module Persist
39
40
  hash = {}
40
41
  @persistence_path.glob('shard-*').each do |f|
41
42
  shard = File.basename(f).match(/shard-(.*)/)[1]
42
- hash[shard] = Persist.open_database(f, false, :clean, db_type)
43
+ if shard == 'metadata'
44
+ hash[shard] = Persist.open_database(f, false, :clean, "HDB", @options)
45
+ else
46
+ hash[shard] = Persist.open_database(f, false, :clean, db_type, @options)
47
+ end
43
48
  end
44
49
  hash
45
50
  end
46
51
  end
47
52
 
48
53
  def database(key)
49
- shard = key =~ /__tsv_/ ? "0" : shard_function.call(key)
54
+ shard = key =~ /__tsv_/ ? "metadata" : shard_function.call(key)
50
55
  if databases.include? shard
51
56
  databases[shard]
52
57
  else
53
- database ||= begin
58
+ if shard == 'metadata'
59
+ database ||= begin
60
+ path = File.join(persistence_path, 'shard-' << shard.to_s)
61
+ (writable or File.exists?(path)) ? Persist.open_database(path, writable, :clean, "HDB", @options) : nil
62
+ end
63
+ else
64
+ database ||= begin
54
65
  path = File.join(persistence_path, 'shard-' << shard.to_s)
55
- (writable or File.exists?(path)) ? Persist.open_database(path, writable, :clean, db_type) : nil
66
+ (writable or File.exists?(path)) ? Persist.open_database(path, writable, :clean, db_type, @options) : nil
56
67
  end
68
+ end
57
69
  if database
58
70
  databases[shard] = database
59
71
  else
@@ -84,6 +96,7 @@ module Persist
84
96
  end
85
97
 
86
98
  def read(force = false)
99
+ raise "SIOT"
87
100
  return if not write? and not closed and not force
88
101
  self.close
89
102
  databases.each{|d| d.read }
@@ -204,23 +217,30 @@ module Persist
204
217
  databases.values.each{|database| database.write }
205
218
  end
206
219
 
207
- def read
208
- databases.values.each{|database| database.read }
220
+ def read(force = false)
221
+ databases.values.each{|database| database.read(force) }
209
222
  end
210
223
 
211
224
  def close
212
225
  databases.values.each{|database| database.close }
213
226
  end
227
+
228
+ def size
229
+ databases.inject(0){|acc,i|
230
+ shard, db = i;
231
+ acc += db.size
232
+ }
233
+ end
214
234
  end
215
235
 
216
- def self.open_sharder(path, write, serializer = nil, tokyocabinet_class = TokyoCabinet::HDB, &shard_function)
236
+ def self.open_sharder(path, write, serializer = nil, type = TokyoCabinet::HDB, options, &shard_function)
217
237
  write = true unless File.exists? path
218
238
 
219
239
  FileUtils.mkdir_p File.dirname(path) unless File.exists?(File.dirname(path))
220
240
 
221
- database = Persist::SharderAdapter.open(path, write, tokyocabinet_class, &shard_function)
241
+ database = Persist::SharderAdapter.open(path, write, type, options, &shard_function)
222
242
 
223
- unless serializer == :clean
243
+ unless serializer == :clean #or type.to_s == 'fwt'
224
244
  TSV.setup database
225
245
  database.serializer = serializer if serializer
226
246
  end
data/lib/rbbt/resource.rb CHANGED
@@ -6,6 +6,15 @@ require 'set'
6
6
 
7
7
 
8
8
  module Resource
9
+
10
+ class << self
11
+ attr_accessor :lock_dir
12
+
13
+ def lock_dir
14
+ @lock_dir ||= Rbbt.tmp.produce_locks.find
15
+ end
16
+ end
17
+
9
18
  def self.remote_servers
10
19
  @remote_servers = Rbbt.etc.file_servers.exists? ? Rbbt.etc.file_servers.yaml : {}
11
20
  end
@@ -65,7 +74,8 @@ module Resource
65
74
  begin
66
75
  @server_missing_resource_cache ||= Set.new
67
76
  raise "Resource Not Found" if @server_missing_resource_cache.include? url
68
- Misc.lock final_path do
77
+ lock_filename = Persist.persistence_path(final_path, {:dir => Resource.lock_dir})
78
+ Misc.lock lock_filename do
69
79
  Net::HTTP.get_response URI(url) do |response|
70
80
  case response
71
81
  when Net::HTTPSuccess, Net::HTTPOK
@@ -115,15 +125,16 @@ module Resource
115
125
  final_path = path.respond_to?(:find) ? (force ? path.find(:user) : path.find) : path
116
126
  if not File.exists? final_path or force
117
127
  Log.medium "Producing: #{ final_path }"
118
- Misc.lock final_path + '.produce' do
128
+ lock_filename = Persist.persistence_path(final_path, {:dir => Resource.lock_dir})
129
+ Misc.lock lock_filename do
119
130
  if not File.exists? final_path or force
120
131
  (remote_server and get_from_server(path, final_path)) or
121
132
  begin
122
133
  case type
123
134
  when :string
124
- Open.write(final_path, content)
135
+ Misc.sensiblewrite(final_path, content)
125
136
  when :url
126
- Open.write(final_path, Open.open(content))
137
+ Misc.sensiblewrite(final_path, Open.open(content))
127
138
  when :proc
128
139
  data = case content.arity
129
140
  when 0
@@ -131,7 +142,7 @@ module Resource
131
142
  when 1
132
143
  content.call final_path
133
144
  end
134
- Open.write(final_path, data) unless data.nil?
145
+ Misc.sensiblewrite(final_path, data) unless data.nil?
135
146
  when :rake
136
147
  run_rake(path, content, rake_dir)
137
148
  when :install
@@ -38,20 +38,22 @@ module Rake
38
38
  raise TaskNotFound if Rake::Task[task].nil?
39
39
 
40
40
  t = nil
41
- #pid = Process.fork{
42
- begin
43
- Misc.in_dir(dir) do
44
- Rake::Task[task].invoke
45
-
46
- Rake::Task.clear
47
- Rake::FileTask.clear_files
41
+ pid = Process.fork{
42
+ Misc.pre_fork
43
+ begin
44
+ Misc.in_dir(dir) do
45
+ Rake::Task[task].invoke
46
+
47
+ Rake::Task.clear
48
+ Rake::FileTask.clear_files
49
+ end
50
+ rescue
51
+ Log.error "Error in rake: #{$!.message}"
52
+ raise $!
48
53
  end
49
- rescue
50
- Log.error "Error in rake: #{$!.message}"
51
- raise $!
52
- end
53
- #}
54
- #Process.wait(pid)
54
+ }
55
+ Process.waitpid(pid)
56
+ raise "Rake failed" unless $?.success?
55
57
 
56
58
  end
57
59
  end
@@ -94,6 +94,7 @@ module TSV
94
94
  begin
95
95
  super
96
96
  rescue Exception
97
+ Log.exception $!
97
98
  @writable = false
98
99
  self
99
100
  end
@@ -197,12 +198,7 @@ module TSV
197
198
 
198
199
  #{{{ GETTERS AND SETTERS
199
200
 
200
-
201
- def [](key, clean = false)
202
- value = super(key)
203
- return value if clean or value.nil?
204
- @serializer_module ||= self.serializer_module
205
-
201
+ def prepare_value(key, value)
206
202
  value = @serializer_module.load(value) if @serializer_module and not TSV::CleanSerializer == @serializer_module
207
203
 
208
204
  return value if @unnamed or fields.nil?
@@ -218,6 +214,20 @@ module TSV
218
214
  value
219
215
  end
220
216
 
217
+ def [](key, clean = false)
218
+ value = super(key)
219
+ return value if clean or value.nil?
220
+ @serializer_module ||= self.serializer_module
221
+
222
+ if MultipleResult === value
223
+ res = value.collect{|v| prepare_value key, v }
224
+ res.extend MultipleResult
225
+ res
226
+ else
227
+ prepare_value key, value
228
+ end
229
+ end
230
+
221
231
  def []=(key, value, clean = false)
222
232
  return super(key, value) if clean or value.nil? or TSV::CleanSerializer == self.serializer_module
223
233
  super(key, @serializer_module.dump(value))
@@ -634,6 +644,7 @@ Example:
634
644
  if merge
635
645
  self.through do |key,values|
636
646
  field_values = values.delete_at field_pos
647
+ next if field_values.nil?
637
648
  zipped = values.zip_fields
638
649
  field_values.zip(zipped).each do |field_value,rest|
639
650
  k = [key,field_value]*":"
@@ -648,6 +659,7 @@ Example:
648
659
  else
649
660
  self.through do |key,values|
650
661
  field_values = values.delete_at field_pos
662
+ next if field_values.nil?
651
663
  zipped = Misc.zip_fields(values)
652
664
  field_values.zip(zipped).each do |field_value,rest|
653
665
  k = [key,field_value]*":"
@@ -1,7 +1,5 @@
1
1
  module TSV
2
2
 
3
- module MultipleResult; end
4
-
5
3
  def self.obj_stream(obj)
6
4
  case obj
7
5
  when nil
@@ -510,13 +508,15 @@ module TSV
510
508
 
511
509
  bar = Misc.process_options options, :bar
512
510
  bar ||= Misc.process_options options, :progress
513
- max = guess_max(obj)
514
511
  options[:bar] = case bar
515
512
  when String
513
+ max = guess_max(obj)
516
514
  Log::ProgressBar.new_bar(max, {:desc => bar})
517
515
  when TrueClass
516
+ max = guess_max(obj)
518
517
  Log::ProgressBar.new_bar(max, nil)
519
518
  when Fixnum
519
+ max = guess_max(obj)
520
520
  Log::ProgressBar.new_bar(bar)
521
521
  when Hash
522
522
  max = Misc.process_options(bar, :max) || max
@@ -554,6 +554,10 @@ module TSV
554
554
  self
555
555
  end
556
556
 
557
+ def identify_field(field)
558
+ TSV.identify_field(key_field, fields, field)
559
+ end
560
+
557
561
  def self.traverse(stream, options = {}, &block)
558
562
  parser = Parser.new(stream, options)
559
563
  parser.traverse(options, &block)
@@ -1,4 +1,5 @@
1
1
  module TSV
2
+
2
3
  class CleanSerializer
3
4
  def self.dump(o); o end
4
5
  def self.load(o); o end
@@ -26,7 +27,7 @@ module TSV
26
27
 
27
28
  class StringSerializer
28
29
  def self.dump(str); str.to_s; end
29
- def self.load(str); str; end
30
+ def self.load(str); str.dup; end
30
31
  end
31
32
 
32
33
  class StringArraySerializer
@@ -61,7 +62,6 @@ module TSV
61
62
  end
62
63
  end
63
64
 
64
-
65
65
  class TSVSerializer
66
66
  def self.dump(tsv)
67
67
  tsv.to_s
@@ -69,7 +69,6 @@ module TSV
69
69
  out = Misc.open_pipe do |sin|
70
70
  num_streams = streams.length
71
71
 
72
-
73
72
  streams = streams.collect do |stream|
74
73
  sorted = Misc.sort_stream(stream)
75
74
  stream.annotate sorted if stream.respond_to? :annotate
@@ -150,6 +149,7 @@ module TSV
150
149
 
151
150
  sin.puts [min, str*sep] * sep
152
151
  end
152
+
153
153
  streams.each do |stream|
154
154
  stream.join if stream.respond_to? :join
155
155
  end
@@ -3,12 +3,13 @@ require 'rbbt/util/semaphore'
3
3
  class RbbtProcessQueue
4
4
  class RbbtProcessSocket
5
5
 
6
- Serializer = Marshal
7
-
8
6
  attr_accessor :sread, :swrite, :write_sem, :read_sem
9
- def initialize
7
+ def initialize(serializer = nil)
10
8
  @sread, @swrite = Misc.pipe
11
9
 
10
+ @serializer = serializer || Marshal
11
+
12
+
12
13
  key = "/" << rand(100000000).to_s;
13
14
  @write_sem = key + '.in'
14
15
  @read_sem = key + '.out'
@@ -30,16 +31,15 @@ class RbbtProcessQueue
30
31
  case obj
31
32
  when String
32
33
  payload = obj
33
- size_head = [payload.bytesize,"S"].pack 'La'
34
+ size_head = [payload.bytesize,"C"].pack 'La'
34
35
  str = size_head << payload
35
36
  else
36
- payload = Serializer.dump(obj)
37
- size_head = [payload.bytesize,"M"].pack 'La'
37
+ payload = @serializer.dump(obj)
38
+ size_head = [payload.bytesize,"S"].pack 'La'
38
39
  str = size_head << payload
39
40
  end
40
41
 
41
42
  write_length = str.length
42
- #IO.select(nil, [stream])
43
43
  wrote = stream.write(str)
44
44
  while wrote < write_length
45
45
  wrote += stream.write(str[wrote..-1])
@@ -54,9 +54,9 @@ class RbbtProcessQueue
54
54
  begin
55
55
  payload = Misc.read_stream stream, size
56
56
  case type
57
- when "M"
58
- Serializer.load(payload)
59
57
  when "S"
58
+ @serializer.load(payload)
59
+ when "C"
60
60
  payload
61
61
  end
62
62
  rescue TryAgain
@@ -7,15 +7,6 @@ class RbbtProcessQueue
7
7
 
8
8
  @pid = Process.fork do
9
9
  begin
10
-
11
- #Persist::CONNECTIONS.values.each do |db| db.close if db.write? end
12
- #ObjectSpace.each_object(Mutex) do |m|
13
- # begin
14
- # m.unlock
15
- # rescue ThreadError
16
- # end if m.locked?
17
- #end
18
-
19
10
  Misc.pre_fork
20
11
 
21
12
  @cleanup.call if @cleanup
@@ -20,6 +20,8 @@ require 'rbbt/util/misc/system'
20
20
  require 'rbbt/util/misc/objects'
21
21
  require 'rbbt/util/misc/manipulation'
22
22
 
23
+ module MultipleResult; end
24
+
23
25
  module Misc
24
26
  end
25
27
 
@@ -133,7 +133,6 @@ module ConcurrentStream
133
133
 
134
134
  abort_threads
135
135
  abort_pids
136
- iii [:abort, lockfile]
137
136
  lockfile.unlock if lockfile and lockfile.locked?
138
137
  end
139
138
  Log.medium "Aborted stream #{Misc.fingerprint self} -- #{@abort_callback} [#{@aborted}]"
@@ -35,5 +35,45 @@ class TestSharder < Test::Unit::TestCase
35
35
 
36
36
  end
37
37
  end
38
+
39
+ def test_shard_fwt
40
+ TmpFile.with_file do |dir|
41
+ shard_function = Proc.new do |key|
42
+ key[0..(key.index(":")-1)]
43
+ end
44
+
45
+ pos_function = Proc.new do |key|
46
+ key.split(":").last.to_i
47
+ end
48
+
49
+ size = 10
50
+ sharder = Persist.persist_tsv(nil, "ShardTest", {}, :update => true, :range => false, :value_size => 64, :engine => 'fwt', :file => dir, :shard_function => shard_function, :pos_function => pos_function, :persist => true, :serializer => :float) do |db|
51
+ size.times do |v|
52
+ v = v + 1
53
+ chr = "chr" << (v % 5).to_s
54
+ key = chr + ":" << v.to_s
55
+ db << [key, v*2]
56
+ end
57
+ end
58
+ sharder.read
59
+
60
+ assert_equal dir, sharder.persistence_path
61
+ assert_equal size, sharder.size
62
+
63
+ assert_equal [4.0], sharder["chr2:2"]
64
+
65
+ count = 0
66
+ sharder.through do |k,v|
67
+ count += 1
68
+ end
69
+ assert_equal count, size
70
+
71
+ sharder = Persist.open_sharder(dir, false, :float, 'fwt', {:range => false, :value_size => 64, :pos_function => pos_function}, &shard_function)
72
+
73
+ assert_equal [4.0], sharder["chr2:2"]
74
+
75
+ assert_equal size, sharder.size
76
+ end
77
+ end
38
78
  end
39
79
 
@@ -0,0 +1,25 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '..', 'test_helper.rb')
2
+ require 'rbbt/packed_index'
3
+
4
+ class TestPackedIndex < Test::Unit::TestCase
5
+ def test_index
6
+
7
+ TmpFile.with_file do |tmpfile|
8
+ pi = PackedIndex.new tmpfile, true, %w(i i 23s f f f f f)
9
+ 100.times do |i|
10
+ pi << [i, i+2, i.to_s * 10, rand, rand, rand, rand, rand]
11
+ end
12
+ pi << nil
13
+ pi << nil
14
+ pi.close
15
+ pi = PackedIndex.new tmpfile, false
16
+ 100.times do |i|
17
+ assert_equal i, pi[i][0]
18
+ assert_equal i+2, pi[i][1]
19
+ end
20
+ assert_equal nil, pi[100]
21
+ assert_equal nil, pi[101]
22
+ end
23
+ end
24
+ end
25
+
@@ -6,7 +6,7 @@ class StopException < StandardError; end
6
6
 
7
7
  class TestTSVParallelThrough < Test::Unit::TestCase
8
8
 
9
- def _test_traverse_tsv
9
+ def test_traverse_tsv
10
10
  require 'rbbt/sources/organism'
11
11
 
12
12
  head = 100
@@ -25,7 +25,7 @@ class TestTSVParallelThrough < Test::Unit::TestCase
25
25
  assert_equal head, res.keys.compact.sort.length
26
26
  end
27
27
 
28
- def _test_traverse_tsv_cpus
28
+ def test_traverse_tsv_cpus
29
29
  require 'rbbt/sources/organism'
30
30
 
31
31
  head = 100
@@ -47,7 +47,7 @@ class TestTSVParallelThrough < Test::Unit::TestCase
47
47
  assert res.values.compact.flatten.uniq.length > 0
48
48
  end
49
49
 
50
- def _test_traverse_stream
50
+ def test_traverse_stream
51
51
  require 'rbbt/sources/organism'
52
52
 
53
53
  head = 1000
@@ -61,7 +61,7 @@ class TestTSVParallelThrough < Test::Unit::TestCase
61
61
  assert_equal head, res.keys.compact.sort.length
62
62
  end
63
63
 
64
- def _test_traverse_stream_cpus
64
+ def test_traverse_stream_cpus
65
65
  require 'rbbt/sources/organism'
66
66
 
67
67
  head = 1000
@@ -75,7 +75,7 @@ class TestTSVParallelThrough < Test::Unit::TestCase
75
75
  assert_equal head, res.keys.compact.sort.length
76
76
  end
77
77
 
78
- def _test_traverse_stream_keys
78
+ def test_traverse_stream_keys
79
79
  require 'rbbt/sources/organism'
80
80
 
81
81
  head = 1000
@@ -99,7 +99,7 @@ class TestTSVParallelThrough < Test::Unit::TestCase
99
99
  assert_equal res.sort, Organism.identifiers("Hsa").tsv(:head => head).keys.sort
100
100
  end
101
101
 
102
- def _test_traverse_array
102
+ def test_traverse_array
103
103
  require 'rbbt/sources/organism'
104
104
 
105
105
  array = []
@@ -121,7 +121,7 @@ class TestTSVParallelThrough < Test::Unit::TestCase
121
121
  assert_equal array, res
122
122
  end
123
123
 
124
- def _test_traverse_array_threads
124
+ def test_traverse_array_threads
125
125
  require 'rbbt/sources/organism'
126
126
 
127
127
  array = []
@@ -142,7 +142,7 @@ class TestTSVParallelThrough < Test::Unit::TestCase
142
142
  assert_equal array.sort, res.sort
143
143
  end
144
144
 
145
- def _test_traverse_array_cpus
145
+ def test_traverse_array_cpus
146
146
  require 'rbbt/sources/organism'
147
147
 
148
148
  array = []
@@ -157,7 +157,7 @@ class TestTSVParallelThrough < Test::Unit::TestCase
157
157
  assert_equal array.sort, res.sort
158
158
  end
159
159
 
160
- def _test_traverse_benchmark
160
+ def test_traverse_benchmark
161
161
  require 'rbbt/sources/organism'
162
162
 
163
163
  head = 2_000
@@ -179,7 +179,7 @@ class TestTSVParallelThrough < Test::Unit::TestCase
179
179
  end
180
180
  end
181
181
 
182
- def _test_traverse_into_dumper
182
+ def test_traverse_into_dumper
183
183
  require 'rbbt/sources/organism'
184
184
 
185
185
  head = 2_000
@@ -197,7 +197,7 @@ class TestTSVParallelThrough < Test::Unit::TestCase
197
197
  assert_equal head, res.size
198
198
  end
199
199
 
200
- def _test_traverse_into_dumper_threads
200
+ def test_traverse_into_dumper_threads
201
201
  require 'rbbt/sources/organism'
202
202
 
203
203
  head = 2_000
@@ -217,7 +217,7 @@ class TestTSVParallelThrough < Test::Unit::TestCase
217
217
  assert_equal head, res.size
218
218
  end
219
219
 
220
- def _test_traverse_into_dumper_cpus
220
+ def test_traverse_into_dumper_cpus
221
221
  require 'rbbt/sources/organism'
222
222
 
223
223
  head = 2_000
@@ -238,7 +238,7 @@ class TestTSVParallelThrough < Test::Unit::TestCase
238
238
 
239
239
  #{{{ TRAVERSE DUMPER
240
240
 
241
- def _test_traverse_dumper
241
+ def test_traverse_dumper
242
242
  require 'rbbt/sources/organism'
243
243
 
244
244
  head = 2_000
@@ -259,7 +259,7 @@ class TestTSVParallelThrough < Test::Unit::TestCase
259
259
  assert_equal head, res.size
260
260
  end
261
261
 
262
- def _test_traverse_dumper_threads
262
+ def test_traverse_dumper_threads
263
263
  require 'rbbt/sources/organism'
264
264
 
265
265
  head = 2_000
@@ -283,7 +283,7 @@ class TestTSVParallelThrough < Test::Unit::TestCase
283
283
  assert_equal head, res.size
284
284
  end
285
285
 
286
- def _test_traverse_dumper_cpus
286
+ def test_traverse_dumper_cpus
287
287
  require 'rbbt/sources/organism'
288
288
 
289
289
  head = 10_000
@@ -305,7 +305,7 @@ class TestTSVParallelThrough < Test::Unit::TestCase
305
305
  assert_equal head, res.size
306
306
  end
307
307
 
308
- def _test_traverse_dumper_exception
308
+ def test_traverse_dumper_exception
309
309
  require 'rbbt/sources/organism'
310
310
 
311
311
  head = 2_000
@@ -346,7 +346,7 @@ class TestTSVParallelThrough < Test::Unit::TestCase
346
346
  end
347
347
  end
348
348
 
349
- def _test_traverse_into_stream
349
+ def test_traverse_into_stream
350
350
  size = 100
351
351
  array = (1..size).to_a.collect{|n| n.to_s}
352
352
  stream = TSV.traverse array, :into => :stream do |e|
@@ -355,7 +355,7 @@ class TestTSVParallelThrough < Test::Unit::TestCase
355
355
  assert_equal size, stream.read.split("\n").length
356
356
  end
357
357
 
358
- def _test_traverse_progress
358
+ def test_traverse_progress
359
359
  size = 1000
360
360
  array = (1..size).to_a.collect{|n| n.to_s}
361
361
  stream = TSV.traverse array, :bar => {:max => size, :desc => "Array"}, :cpus => 5, :into => :stream do |e|
@@ -389,12 +389,12 @@ class TestTSVParallelThrough < Test::Unit::TestCase
389
389
  assert_equal size, stream.read.split("\n").length
390
390
  end
391
391
 
392
- def _test_store_multiple
392
+ def test_store_multiple
393
393
  size = 1000
394
394
  array = (1..size).to_a.collect{|n| n.to_s}
395
395
  stream = TSV.traverse array, :bar => {:max => size, :desc => "Array"}, :cpus => 5, :into => :stream do |e|
396
396
  sleep 0.01
397
- [e,e+".alt"].extend TSV::MultipleResult
397
+ [e,e+".alt"].extend MultipleResult
398
398
  end
399
399
  assert_equal size*2, stream.read.split("\n").length
400
400
  end
@@ -11,9 +11,8 @@ class TestConcurrency < Test::Unit::TestCase
11
11
  obj3 = "some string"
12
12
  obj4 = TSV.setup({1 => 1})
13
13
 
14
- socket = RbbtProcessQueue::RbbtProcessSocket.new
14
+ socket = RbbtProcessQueue::RbbtProcessSocket.new Marshal
15
15
  10.times do
16
-
17
16
  socket.push(obj1)
18
17
  socket.push(obj2)
19
18
  socket.push(obj3)
@@ -24,7 +23,6 @@ class TestConcurrency < Test::Unit::TestCase
24
23
  assert_equal obj3, socket.pop
25
24
  assert_equal obj4, socket.pop
26
25
 
27
-
28
26
  end
29
27
 
30
28
  socket.swrite.close
@@ -36,4 +34,37 @@ class TestConcurrency < Test::Unit::TestCase
36
34
  end
37
35
  end
38
36
 
37
+ if false and __FILE__ == $0
38
+ socket = RbbtProcessQueue::RbbtProcessSocket.new
39
+
40
+ obj = "Some string" * 1000
41
+ Misc.benchmark(1000) do
42
+ socket.push(obj)
43
+ socket.pop
44
+ end
45
+
46
+ obj = ["Some string"] * 1000
47
+ Misc.benchmark(1000) do
48
+ socket.push(obj)
49
+ socket.pop
50
+ end
51
+ socket.clean
52
+
53
+
54
+ socket = RbbtProcessQueue::RbbtProcessSocket.new Marshal
55
+
56
+ obj = "Some string" * 1000
57
+ Misc.benchmark(1000) do
58
+ socket.push(obj)
59
+ socket.pop
60
+ end
61
+ socket.clean
39
62
 
63
+ socket = RbbtProcessQueue::RbbtProcessSocket.new TSV::StringArraySerializer
64
+ obj = ["Some string"] * 1000
65
+ Misc.benchmark(1000) do
66
+ socket.push(obj)
67
+ socket.pop
68
+ end
69
+ socket.clean
70
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-util
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.13.36
4
+ version: 5.13.37
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-06-09 00:00:00.000000000 Z
11
+ date: 2014-06-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -131,10 +131,12 @@ files:
131
131
  - lib/rbbt/fix_width_table.rb
132
132
  - lib/rbbt/knowledge_base.rb
133
133
  - lib/rbbt/monitor.rb
134
+ - lib/rbbt/packed_index.rb
134
135
  - lib/rbbt/persist.rb
135
136
  - lib/rbbt/persist/tsv.rb
136
137
  - lib/rbbt/persist/tsv/adapter.rb
137
138
  - lib/rbbt/persist/tsv/cdb.rb
139
+ - lib/rbbt/persist/tsv/fix_width_table.rb
138
140
  - lib/rbbt/persist/tsv/kyotocabinet.rb
139
141
  - lib/rbbt/persist/tsv/leveldb.rb
140
142
  - lib/rbbt/persist/tsv/lmdb.rb
@@ -294,6 +296,7 @@ files:
294
296
  - test/rbbt/test_fix_width_table.rb
295
297
  - test/rbbt/test_knowledge_base.rb
296
298
  - test/rbbt/test_monitor.rb
299
+ - test/rbbt/test_packed_index.rb
297
300
  - test/rbbt/test_persist.rb
298
301
  - test/rbbt/test_resource.rb
299
302
  - test/rbbt/test_tsv.rb
@@ -390,6 +393,7 @@ test_files:
390
393
  - test/rbbt/util/test_misc.rb
391
394
  - test/rbbt/util/test_tmpfile.rb
392
395
  - test/rbbt/util/R/test_eval.rb
396
+ - test/rbbt/test_packed_index.rb
393
397
  - test/rbbt/test_association.rb
394
398
  - test/rbbt/test_resource.rb
395
399
  - test/rbbt/test_entity.rb