rbbt-util 5.13.36 → 5.13.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 48414e48036dde6708ce828f47fa10e1169cf26c
4
- data.tar.gz: 818bb5d1cf90da015d716050c9283821435eecbf
3
+ metadata.gz: 26df367cce98f5b14b985bd311d4b6594dcb62a9
4
+ data.tar.gz: 1931a495ac7eb8eb17dcfa0f154be8c9d61e8b7f
5
5
  SHA512:
6
- metadata.gz: a35a5de8c4870b10e86353030787fb261b7086eac2553e79b86609352d88d69d09f65246cbd07e43f65d1a9bf863c0f00e070c9f380b4f2ae1d541b6f89f61a9
7
- data.tar.gz: e649af70a02eb33d3a3255766bf495197baa48d0274fa3a6b8e723a43daa4f8dc497e7aa781291ad83d75b6a50fa4f2913a1530bb9924147ecd670c4ce672fc5
6
+ metadata.gz: e71fee09c427fdf3faf326b75897dc6d070ec605bc94b3785d8cacf88f2bd3fa1fdd8d017e5e06d5e48cff48960cb8e7debb337b3a4eeb3361d2fd3735b2b1f3
7
+ data.tar.gz: 14a1b01824edd6bfa2a6ead19828454ea57b1c3846eb1f332a0b15faee9bf5019fdda715732ef57b1326ce3870d8c2f8d8c6c9f2faf6e70218ceea76f373436d
@@ -1,6 +1,6 @@
1
1
  class FixWidthTable
2
2
 
3
- attr_accessor :filename, :file, :value_size, :record_size, :range, :size
3
+ attr_accessor :filename, :file, :value_size, :record_size, :range, :size, :mask
4
4
  def initialize(filename, value_size = nil, range = nil, update = false, in_memory = true)
5
5
  @filename = filename
6
6
 
@@ -8,13 +8,14 @@ class FixWidthTable
8
8
  Log.debug "FixWidthTable create: #{ filename }"
9
9
  @value_size = value_size
10
10
  @range = range
11
- @record_size = @value_size + (@range ? 12 : 4)
11
+ @record_size = @value_size + (@range ? 16 : 8)
12
12
 
13
13
  if %w(memory stringio).include? filename.to_s.downcase
14
14
  @filename = :memory
15
15
  @file = StringIO.new
16
16
  else
17
17
  FileUtils.rm @filename if File.exists? @filename
18
+ FileUtils.mkdir_p File.dirname(@filename) unless File.exists? @filename
18
19
  @file = File.open(@filename, 'wb')
19
20
  end
20
21
 
@@ -24,17 +25,26 @@ class FixWidthTable
24
25
  else
25
26
  Log.debug "FixWidthTable up-to-date: #{ filename }"
26
27
  if in_memory
27
- @file = StringIO.new(Open.read(@filename, :mode => 'rb'), 'r')
28
+ @file = StringIO.new(Open.read(@filename, :mode => 'r:ASCII-8BIT'), 'r')
28
29
  else
29
- @file = File.open(@filename, 'r')
30
+ @file = File.open(@filename, 'r:ASCII-8BIT')
30
31
  end
31
32
  @value_size = @file.read(4).unpack("L").first
32
33
  @range = @file.read(1).unpack("C").first == 1
33
34
  @record_size = @value_size + (@range ? 12 : 4)
34
35
  @size = (File.size(@filename) - 5) / (@record_size)
35
36
  end
37
+
38
+ @mask = "a#{value_size}"
39
+ end
40
+
41
+ def persistence_path
42
+ @filename
36
43
  end
37
44
 
45
+ def persistence_path=(value)
46
+ @filename=value
47
+ end
38
48
 
39
49
  CONNECTIONS = {} unless defined? CONNECTIONS
40
50
  def self.get(filename, value_size = nil, range = nil, update = false)
@@ -50,28 +60,18 @@ class FixWidthTable
50
60
  def format(pos, value)
51
61
  padding = value_size - value.length
52
62
  if range
53
- (pos + [value + ("\0" * padding)]).pack("llla#{value_size}")
63
+ (pos + [padding, value + ("\0" * padding)]).pack("llll#{mask}")
54
64
  else
55
- [pos, value + ("\0" * padding)].pack("la#{value_size}")
56
- end
57
- end
58
-
59
- def unformat(format)
60
- if range
61
- pos_start, pos_end, pos_overlap, value = format.unpack("llla#{value_size}")
62
- [[pos_start, pos_end, pos_overlap], value.strip]
63
- else
64
- pos, value = format.unpack("la#{value_size}")
65
- [pos, value.strip]
65
+ [pos, padding, value + ("\0" * padding)].pack("ll#{mask}")
66
66
  end
67
67
  end
68
68
 
69
69
  def add(pos, value)
70
70
  format = format(pos, value)
71
71
  @file.write format
72
+
72
73
  @size += 1
73
74
  end
74
- alias << add
75
75
 
76
76
  def last_pos
77
77
  pos(size - 1)
@@ -98,13 +98,15 @@ class FixWidthTable
98
98
  def value(index)
99
99
  return nil if index < 0 or index >= size
100
100
  @file.seek((range ? 17 : 9 ) + (record_size) * index, IO::SEEK_SET)
101
- @file.read(value_size).unpack("a#{value_size}").first.strip
101
+ padding = @file.read(4).unpack("l").first+1
102
+ txt = @file.read(value_size)
103
+ txt.unpack(mask).first[0..-padding]
102
104
  end
103
105
 
104
- def read
106
+ def read(force = false)
105
107
  return if @filename == :memory
106
108
  @file.close unless @file.closed?
107
- @file = File.open(@filename, 'r')
109
+ @file = File.open(filename, 'r:ASCII-8BIT')
108
110
  end
109
111
 
110
112
  def close
@@ -125,17 +127,20 @@ class FixWidthTable
125
127
  end
126
128
  end
127
129
 
130
+ def add_range_point(pos, value)
131
+ @latest ||= []
132
+ while @latest.any? and @latest[0] < pos[0]
133
+ @latest.shift
134
+ end
135
+ overlap = @latest.length
136
+ add pos + [overlap], value
137
+ @latest << pos[1]
138
+ end
139
+
128
140
  def add_range(data)
129
- latest = []
141
+ @latest = []
130
142
  data.sort_by{|value, pos| pos[0] }.each do |value, pos|
131
- while latest.any? and latest[0] < pos[0]
132
- latest.shift
133
- end
134
-
135
- overlap = latest.length
136
-
137
- add pos + [overlap], value
138
- latest << pos[1]
143
+ add_range_point(pos, value)
139
144
  end
140
145
  end
141
146
 
@@ -169,12 +174,14 @@ class FixWidthTable
169
174
  end
170
175
 
171
176
  def get_range(pos)
172
- if Range === pos
177
+ case pos
178
+ when Range
173
179
  r_start = pos.begin
174
180
  r_end = pos.end
181
+ when Array
182
+ r_start, r_end = pos
175
183
  else
176
- r_start = pos.to_i
177
- r_end = pos.to_i
184
+ r_start, r_end = pos, pos
178
185
  end
179
186
 
180
187
  idx = closest(r_start)
@@ -243,6 +250,7 @@ class FixWidthTable
243
250
  end
244
251
  end
245
252
 
253
+
246
254
  def values_at(*list)
247
255
  list.collect{|pos|
248
256
  self[pos]
data/lib/rbbt/monitor.rb CHANGED
@@ -3,7 +3,8 @@ require 'rbbt'
3
3
  module Rbbt
4
4
 
5
5
  LOCK_DIRS = Rbbt.share.find_all + Rbbt.var.cache.persistence.find_all + Rbbt.var.jobs.find_all +
6
- Rbbt.tmp.tsv_open_locks.find_all + Rbbt.tmp.persist_locks.find_all + Rbbt.tmp.sensiblewrite_lock_dir.find_all
6
+ Rbbt.tmp.tsv_open_locks.find_all + Rbbt.tmp.persist_locks.find_all + Rbbt.tmp.sensiblewrite_lock_dir.find_all +
7
+ Rbbt.tmp.produce_locks.find_all
7
8
 
8
9
  SENSIBLE_WRITE_DIRS = Misc.sensiblewrite_dir.find_all
9
10
 
@@ -0,0 +1,79 @@
1
+ class PackedIndex
2
+ attr_accessor :file, :mask, :mask_length, :offset, :item_size, :stream, :nil_string
3
+
4
+ ELEMS = {
5
+ "I" => ["q", 8],
6
+ "i" => ["l", 4],
7
+ "f" => ["f", 4],
8
+ "F" => ["D", 8],
9
+ }
10
+
11
+ def self.process_mask(mask)
12
+ str = ""
13
+ size = 0
14
+ mask.each do |e|
15
+ if ELEMS.include? e
16
+ str << ELEMS[e][0]
17
+ size += ELEMS[e][1]
18
+ elsif e =~ /^(\d+)s$/
19
+ num = $1.to_i
20
+ str << "a" << num.to_s
21
+ size += num
22
+ else
23
+ e, num = e.split(":")
24
+ str << e
25
+ size = (num.nil? ? size + 1 : size + num.to_i)
26
+ end
27
+ end
28
+ [str, size]
29
+ end
30
+
31
+ def initialize(file, write = false, pattern = nil)
32
+ @file = file
33
+ if write
34
+ @stream = Open.open(file, :mode => 'wb')
35
+ @mask, @item_size = PackedIndex.process_mask pattern
36
+ header = [@mask.length, @item_size].pack("ll")
37
+ @stream.write(header)
38
+ @stream.write(mask)
39
+ @offset = @mask.length + 8
40
+ else
41
+ @stream = Open.open(file, :mode => 'rb')
42
+ header = @stream.read(8)
43
+ mask_length, @item_size = header.unpack("ll")
44
+ @mask = @stream.read(mask_length)
45
+ @offset = @mask.length + 8
46
+ end
47
+ @nil_string = "[NIL]" + "-" * (@item_size - 5)
48
+ end
49
+
50
+ def read
51
+ close
52
+ @stream = Open.open(file, :mode => 'rb')
53
+ end
54
+
55
+ def <<(payload)
56
+ if payload.nil?
57
+ @stream.write nil_string
58
+ else
59
+ @stream.write payload.pack(mask)
60
+ end
61
+ end
62
+
63
+ def [](position)
64
+ @stream.seek(position * item_size + offset)
65
+ encoded = @stream.read(item_size)
66
+ return nil if encoded == nil_string
67
+ encoded.unpack mask
68
+ end
69
+
70
+ def values_at(*positions)
71
+ positions.collect{|p|
72
+ self[p]
73
+ }
74
+ end
75
+
76
+ def close
77
+ stream.close unless stream.closed?
78
+ end
79
+ end
data/lib/rbbt/persist.rb CHANGED
@@ -204,7 +204,7 @@ module Persist
204
204
  ConcurrentStream.setup(out, :threads => saver_thread, :filename => path)
205
205
  out.callback = callback
206
206
  out.abort_callback = abort_callback
207
- out.lockfile = stream.lockfile
207
+ out.lockfile = stream.lockfile if stream.respond_to? :lockfile
208
208
  out
209
209
  end
210
210
 
@@ -1,5 +1,7 @@
1
1
  require 'rbbt/persist/tsv/adapter'
2
2
 
3
+ require 'rbbt/persist/tsv/fix_width_table'
4
+
3
5
  begin
4
6
  require 'rbbt/persist/tsv/tokyocabinet'
5
7
  rescue Exception
@@ -46,7 +48,7 @@ module Persist
46
48
  end || source.object_id.to_s
47
49
  end
48
50
 
49
- def self.open_database(path, write, serializer = nil, type = "HDB")
51
+ def self.open_database(path, write, serializer = nil, type = "HDB", options = {})
50
52
  case type
51
53
  when "LevelDB"
52
54
  Persist.open_leveldb(path, write, serializer)
@@ -56,6 +58,13 @@ module Persist
56
58
  Persist.open_lmdb(path, write, serializer)
57
59
  when 'kch', 'kct'
58
60
  Persist.open_kyotocabinet(path, write, serializer, type)
61
+ when 'fwt'
62
+ value_size, range, update, in_memory, pos_function = Misc.process_options options.dup, :value_size, :range, :update, :in_memory, :pos_function
63
+ if pos_function
64
+ Persist.open_fwt(path, value_size, range, serializer, update, in_memory, &pos_function)
65
+ else
66
+ Persist.open_fwt(path, value_size, range, serializer, update, in_memory)
67
+ end
59
68
  else
60
69
  Persist.open_tokyocabinet(path, write, serializer, type)
61
70
  end
@@ -86,9 +95,9 @@ module Persist
86
95
  if is_persisted?(path) and not persist_options[:update]
87
96
  Log.debug "TSV persistence up-to-date: #{ path }"
88
97
  if persist_options[:shard_function]
89
- return open_sharder(path, false, nil, persist_options[:engine], &persist_options[:shard_function])
98
+ return open_sharder(path, false, nil, persist_options[:engine], persist_options, &persist_options[:shard_function])
90
99
  else
91
- return open_database(path, false, nil, persist_options[:engine] || TokyoCabinet::HDB)
100
+ return open_database(path, false, nil, persist_options[:engine] || TokyoCabinet::HDB, persist_options)
92
101
  end
93
102
  end
94
103
 
@@ -98,9 +107,9 @@ module Persist
98
107
  Log.debug "TSV persistence (suddenly) up-to-date: #{ path }"
99
108
 
100
109
  if persist_options[:shard_function]
101
- return open_sharder(path, false, nil, persist_options[:engine], &persist_options[:shard_function])
110
+ return open_sharder(path, false, nil, persist_options[:engine], persist_options, &persist_options[:shard_function])
102
111
  else
103
- return open_database(path, false, nil, persist_options[:engine] || TokyoCabinet::HDB)
112
+ return open_database(path, false, nil, persist_options[:engine] || TokyoCabinet::HDB, persist_options)
104
113
  end
105
114
  end
106
115
 
@@ -111,9 +120,9 @@ module Persist
111
120
  tmp_path = path + '.persist'
112
121
 
113
122
  data = if persist_options[:shard_function]
114
- open_sharder(tmp_path, true, persist_options[:serializer], persist_options[:engine], &persist_options[:shard_function])
123
+ open_sharder(tmp_path, true, persist_options[:serializer], persist_options[:engine], persist_options, &persist_options[:shard_function])
115
124
  else
116
- open_database(tmp_path, true, persist_options[:serializer], persist_options[:engine] || TokyoCabinet::HDB)
125
+ open_database(tmp_path, true, persist_options[:serializer], persist_options[:engine] || TokyoCabinet::HDB, persist_options)
117
126
  end
118
127
 
119
128
  if TSV === data and data.serializer.nil?
@@ -127,6 +136,7 @@ module Persist
127
136
  FileUtils.mv data.persistence_path, path if File.exists? data.persistence_path and not File.exists? path
128
137
  tsv = CONNECTIONS[path] = CONNECTIONS.delete tmp_path
129
138
  tsv.persistence_path = path
139
+
130
140
  tsv.fix_io if tsv.respond_to? :fix_io
131
141
 
132
142
  data
@@ -0,0 +1,110 @@
1
+ require 'rbbt/fix_width_table'
2
+
3
+ module Persist
4
+
5
+ module FWTAdapter
6
+ include Persist::TSVAdapter
7
+
8
+ attr_accessor :pos_function
9
+
10
+ def self.open(path, value_size, range = false, update = false, in_memory = false, &pos_function)
11
+ db = CONNECTIONS[path] ||= FixWidthTable.new(path, value_size, range, update, in_memory)
12
+ db.extend Persist::FWTAdapter
13
+ db.persistence_path = path
14
+ db.pos_function = pos_function
15
+ db
16
+ end
17
+
18
+ def persistence_path=(value)
19
+ @persistence_path = value
20
+ @filename = value
21
+ end
22
+
23
+ def metadata_file
24
+ @metadata_file ||= self.persistence_path + '.metadata'
25
+ end
26
+
27
+ def metadata
28
+ return {} unless File.exists? metadata_file
29
+ Open.open(metadata_file, :mode => "rb") do |f|
30
+ Marshal.load(f)
31
+ end
32
+ end
33
+
34
+ def set_metadata(k,v)
35
+ metadata = self.metadata
36
+ metadata[k] = v
37
+ Misc.sensiblewrite(metadata_file, Marshal.dump(metadata))
38
+ end
39
+
40
+ def [](key)
41
+ if TSV::ENTRY_KEYS.include? key
42
+ metadata[key]
43
+ else
44
+ key = pos_function.call(key) if pos_function
45
+ res = super(key)
46
+ res.extend MultipleResult
47
+ res
48
+ end
49
+ end
50
+
51
+ def []=(key, value)
52
+ if TSV::ENTRY_KEYS.include? key
53
+ set_metadata(key, value)
54
+ else
55
+ if range
56
+ add_range_point key, value
57
+ else
58
+ add key, value
59
+ end
60
+ end
61
+ end
62
+
63
+ def add(key, value)
64
+ key = pos_function.call(key) if pos_function and not (range and Array === key)
65
+ super(key, value)
66
+ end
67
+
68
+ def add_range_point(key, value)
69
+ key = pos_function.call(key) if pos_function
70
+ super(key, value)
71
+ end
72
+
73
+ def <<(key, value)
74
+ self.send(:[]=, *i)
75
+ end
76
+
77
+ def include?(i)
78
+ return true if Fixnum === i and i < pos(@size)
79
+ return true if metadata.include? i
80
+ false
81
+ end
82
+
83
+ def size
84
+ @size #+ metadata.keys.length
85
+ end
86
+
87
+ def each
88
+ @size.times do |i|
89
+ yield i, value(i)
90
+ end
91
+ end
92
+
93
+ def keys
94
+ []
95
+ end
96
+ end
97
+
98
+ def self.open_fwt(path, value_size, range = false, serializer = nil, update = false, in_memory = false, &pos_function)
99
+ FileUtils.mkdir_p File.dirname(path) unless File.exists?(File.dirname(path))
100
+
101
+ database = Persist::FWTAdapter.open(path, value_size, range, update, in_memory, &pos_function)
102
+
103
+ unless serializer == :clean
104
+ TSV.setup database
105
+ database.serializer = serializer || database.serializer
106
+ end
107
+
108
+ database
109
+ end
110
+ end
@@ -1,21 +1,21 @@
1
1
  module Persist
2
2
  module SharderAdapter
3
- def self.open(path, write, type=nil, &block)
3
+ def self.open(path, write, type=nil, options = {}, &block)
4
4
 
5
- database = CONNECTIONS[path] ||= Sharder.new(path, write, type, &block)
5
+ database = CONNECTIONS[path] ||= Sharder.new(path, write, type, options, &block)
6
6
 
7
7
  database.extend Persist::SharderAdapter unless Persist::SharderAdapter === database
8
8
 
9
9
  database
10
10
  end
11
-
12
11
  end
13
12
 
14
13
  class Sharder
15
- attr_accessor :persistence_path, :shard_function, :databases, :closed, :writable, :mutex, :db_type
14
+ attr_accessor :persistence_path, :shard_function, :databases, :closed, :writable, :mutex, :db_type, :options
16
15
 
17
- def initialize(persistence_path, write = false, db_type=nil, &block)
16
+ def initialize(persistence_path, write = false, db_type=nil, options = {}, &block)
18
17
  @shard_function = block
18
+ @options = options
19
19
  @persistence_path = Path.setup(persistence_path)
20
20
  @mutex = Mutex.new
21
21
  @writable = write
@@ -32,6 +32,7 @@ module Persist
32
32
 
33
33
  def persistence_path=(path)
34
34
  @persistence_path = path
35
+ databases.values.each{|db| db.persistence_path = File.join(path, File.basename(db.persistence_path))}
35
36
  end
36
37
 
37
38
  def databases
@@ -39,21 +40,32 @@ module Persist
39
40
  hash = {}
40
41
  @persistence_path.glob('shard-*').each do |f|
41
42
  shard = File.basename(f).match(/shard-(.*)/)[1]
42
- hash[shard] = Persist.open_database(f, false, :clean, db_type)
43
+ if shard == 'metadata'
44
+ hash[shard] = Persist.open_database(f, false, :clean, "HDB", @options)
45
+ else
46
+ hash[shard] = Persist.open_database(f, false, :clean, db_type, @options)
47
+ end
43
48
  end
44
49
  hash
45
50
  end
46
51
  end
47
52
 
48
53
  def database(key)
49
- shard = key =~ /__tsv_/ ? "0" : shard_function.call(key)
54
+ shard = key =~ /__tsv_/ ? "metadata" : shard_function.call(key)
50
55
  if databases.include? shard
51
56
  databases[shard]
52
57
  else
53
- database ||= begin
58
+ if shard == 'metadata'
59
+ database ||= begin
60
+ path = File.join(persistence_path, 'shard-' << shard.to_s)
61
+ (writable or File.exists?(path)) ? Persist.open_database(path, writable, :clean, "HDB", @options) : nil
62
+ end
63
+ else
64
+ database ||= begin
54
65
  path = File.join(persistence_path, 'shard-' << shard.to_s)
55
- (writable or File.exists?(path)) ? Persist.open_database(path, writable, :clean, db_type) : nil
66
+ (writable or File.exists?(path)) ? Persist.open_database(path, writable, :clean, db_type, @options) : nil
56
67
  end
68
+ end
57
69
  if database
58
70
  databases[shard] = database
59
71
  else
@@ -84,6 +96,7 @@ module Persist
84
96
  end
85
97
 
86
98
  def read(force = false)
99
+ raise "SIOT"
87
100
  return if not write? and not closed and not force
88
101
  self.close
89
102
  databases.each{|d| d.read }
@@ -204,23 +217,30 @@ module Persist
204
217
  databases.values.each{|database| database.write }
205
218
  end
206
219
 
207
- def read
208
- databases.values.each{|database| database.read }
220
+ def read(force = false)
221
+ databases.values.each{|database| database.read(force) }
209
222
  end
210
223
 
211
224
  def close
212
225
  databases.values.each{|database| database.close }
213
226
  end
227
+
228
+ def size
229
+ databases.inject(0){|acc,i|
230
+ shard, db = i;
231
+ acc += db.size
232
+ }
233
+ end
214
234
  end
215
235
 
216
- def self.open_sharder(path, write, serializer = nil, tokyocabinet_class = TokyoCabinet::HDB, &shard_function)
236
+ def self.open_sharder(path, write, serializer = nil, type = TokyoCabinet::HDB, options, &shard_function)
217
237
  write = true unless File.exists? path
218
238
 
219
239
  FileUtils.mkdir_p File.dirname(path) unless File.exists?(File.dirname(path))
220
240
 
221
- database = Persist::SharderAdapter.open(path, write, tokyocabinet_class, &shard_function)
241
+ database = Persist::SharderAdapter.open(path, write, type, options, &shard_function)
222
242
 
223
- unless serializer == :clean
243
+ unless serializer == :clean #or type.to_s == 'fwt'
224
244
  TSV.setup database
225
245
  database.serializer = serializer if serializer
226
246
  end
data/lib/rbbt/resource.rb CHANGED
@@ -6,6 +6,15 @@ require 'set'
6
6
 
7
7
 
8
8
  module Resource
9
+
10
+ class << self
11
+ attr_accessor :lock_dir
12
+
13
+ def lock_dir
14
+ @lock_dir ||= Rbbt.tmp.produce_locks.find
15
+ end
16
+ end
17
+
9
18
  def self.remote_servers
10
19
  @remote_servers = Rbbt.etc.file_servers.exists? ? Rbbt.etc.file_servers.yaml : {}
11
20
  end
@@ -65,7 +74,8 @@ module Resource
65
74
  begin
66
75
  @server_missing_resource_cache ||= Set.new
67
76
  raise "Resource Not Found" if @server_missing_resource_cache.include? url
68
- Misc.lock final_path do
77
+ lock_filename = Persist.persistence_path(final_path, {:dir => Resource.lock_dir})
78
+ Misc.lock lock_filename do
69
79
  Net::HTTP.get_response URI(url) do |response|
70
80
  case response
71
81
  when Net::HTTPSuccess, Net::HTTPOK
@@ -115,15 +125,16 @@ module Resource
115
125
  final_path = path.respond_to?(:find) ? (force ? path.find(:user) : path.find) : path
116
126
  if not File.exists? final_path or force
117
127
  Log.medium "Producing: #{ final_path }"
118
- Misc.lock final_path + '.produce' do
128
+ lock_filename = Persist.persistence_path(final_path, {:dir => Resource.lock_dir})
129
+ Misc.lock lock_filename do
119
130
  if not File.exists? final_path or force
120
131
  (remote_server and get_from_server(path, final_path)) or
121
132
  begin
122
133
  case type
123
134
  when :string
124
- Open.write(final_path, content)
135
+ Misc.sensiblewrite(final_path, content)
125
136
  when :url
126
- Open.write(final_path, Open.open(content))
137
+ Misc.sensiblewrite(final_path, Open.open(content))
127
138
  when :proc
128
139
  data = case content.arity
129
140
  when 0
@@ -131,7 +142,7 @@ module Resource
131
142
  when 1
132
143
  content.call final_path
133
144
  end
134
- Open.write(final_path, data) unless data.nil?
145
+ Misc.sensiblewrite(final_path, data) unless data.nil?
135
146
  when :rake
136
147
  run_rake(path, content, rake_dir)
137
148
  when :install
@@ -38,20 +38,22 @@ module Rake
38
38
  raise TaskNotFound if Rake::Task[task].nil?
39
39
 
40
40
  t = nil
41
- #pid = Process.fork{
42
- begin
43
- Misc.in_dir(dir) do
44
- Rake::Task[task].invoke
45
-
46
- Rake::Task.clear
47
- Rake::FileTask.clear_files
41
+ pid = Process.fork{
42
+ Misc.pre_fork
43
+ begin
44
+ Misc.in_dir(dir) do
45
+ Rake::Task[task].invoke
46
+
47
+ Rake::Task.clear
48
+ Rake::FileTask.clear_files
49
+ end
50
+ rescue
51
+ Log.error "Error in rake: #{$!.message}"
52
+ raise $!
48
53
  end
49
- rescue
50
- Log.error "Error in rake: #{$!.message}"
51
- raise $!
52
- end
53
- #}
54
- #Process.wait(pid)
54
+ }
55
+ Process.waitpid(pid)
56
+ raise "Rake failed" unless $?.success?
55
57
 
56
58
  end
57
59
  end
@@ -94,6 +94,7 @@ module TSV
94
94
  begin
95
95
  super
96
96
  rescue Exception
97
+ Log.exception $!
97
98
  @writable = false
98
99
  self
99
100
  end
@@ -197,12 +198,7 @@ module TSV
197
198
 
198
199
  #{{{ GETTERS AND SETTERS
199
200
 
200
-
201
- def [](key, clean = false)
202
- value = super(key)
203
- return value if clean or value.nil?
204
- @serializer_module ||= self.serializer_module
205
-
201
+ def prepare_value(key, value)
206
202
  value = @serializer_module.load(value) if @serializer_module and not TSV::CleanSerializer == @serializer_module
207
203
 
208
204
  return value if @unnamed or fields.nil?
@@ -218,6 +214,20 @@ module TSV
218
214
  value
219
215
  end
220
216
 
217
+ def [](key, clean = false)
218
+ value = super(key)
219
+ return value if clean or value.nil?
220
+ @serializer_module ||= self.serializer_module
221
+
222
+ if MultipleResult === value
223
+ res = value.collect{|v| prepare_value key, v }
224
+ res.extend MultipleResult
225
+ res
226
+ else
227
+ prepare_value key, value
228
+ end
229
+ end
230
+
221
231
  def []=(key, value, clean = false)
222
232
  return super(key, value) if clean or value.nil? or TSV::CleanSerializer == self.serializer_module
223
233
  super(key, @serializer_module.dump(value))
@@ -634,6 +644,7 @@ Example:
634
644
  if merge
635
645
  self.through do |key,values|
636
646
  field_values = values.delete_at field_pos
647
+ next if field_values.nil?
637
648
  zipped = values.zip_fields
638
649
  field_values.zip(zipped).each do |field_value,rest|
639
650
  k = [key,field_value]*":"
@@ -648,6 +659,7 @@ Example:
648
659
  else
649
660
  self.through do |key,values|
650
661
  field_values = values.delete_at field_pos
662
+ next if field_values.nil?
651
663
  zipped = Misc.zip_fields(values)
652
664
  field_values.zip(zipped).each do |field_value,rest|
653
665
  k = [key,field_value]*":"
@@ -1,7 +1,5 @@
1
1
  module TSV
2
2
 
3
- module MultipleResult; end
4
-
5
3
  def self.obj_stream(obj)
6
4
  case obj
7
5
  when nil
@@ -510,13 +508,15 @@ module TSV
510
508
 
511
509
  bar = Misc.process_options options, :bar
512
510
  bar ||= Misc.process_options options, :progress
513
- max = guess_max(obj)
514
511
  options[:bar] = case bar
515
512
  when String
513
+ max = guess_max(obj)
516
514
  Log::ProgressBar.new_bar(max, {:desc => bar})
517
515
  when TrueClass
516
+ max = guess_max(obj)
518
517
  Log::ProgressBar.new_bar(max, nil)
519
518
  when Fixnum
519
+ max = guess_max(obj)
520
520
  Log::ProgressBar.new_bar(bar)
521
521
  when Hash
522
522
  max = Misc.process_options(bar, :max) || max
@@ -554,6 +554,10 @@ module TSV
554
554
  self
555
555
  end
556
556
 
557
+ def identify_field(field)
558
+ TSV.identify_field(key_field, fields, field)
559
+ end
560
+
557
561
  def self.traverse(stream, options = {}, &block)
558
562
  parser = Parser.new(stream, options)
559
563
  parser.traverse(options, &block)
@@ -1,4 +1,5 @@
1
1
  module TSV
2
+
2
3
  class CleanSerializer
3
4
  def self.dump(o); o end
4
5
  def self.load(o); o end
@@ -26,7 +27,7 @@ module TSV
26
27
 
27
28
  class StringSerializer
28
29
  def self.dump(str); str.to_s; end
29
- def self.load(str); str; end
30
+ def self.load(str); str.dup; end
30
31
  end
31
32
 
32
33
  class StringArraySerializer
@@ -61,7 +62,6 @@ module TSV
61
62
  end
62
63
  end
63
64
 
64
-
65
65
  class TSVSerializer
66
66
  def self.dump(tsv)
67
67
  tsv.to_s
@@ -69,7 +69,6 @@ module TSV
69
69
  out = Misc.open_pipe do |sin|
70
70
  num_streams = streams.length
71
71
 
72
-
73
72
  streams = streams.collect do |stream|
74
73
  sorted = Misc.sort_stream(stream)
75
74
  stream.annotate sorted if stream.respond_to? :annotate
@@ -150,6 +149,7 @@ module TSV
150
149
 
151
150
  sin.puts [min, str*sep] * sep
152
151
  end
152
+
153
153
  streams.each do |stream|
154
154
  stream.join if stream.respond_to? :join
155
155
  end
@@ -3,12 +3,13 @@ require 'rbbt/util/semaphore'
3
3
  class RbbtProcessQueue
4
4
  class RbbtProcessSocket
5
5
 
6
- Serializer = Marshal
7
-
8
6
  attr_accessor :sread, :swrite, :write_sem, :read_sem
9
- def initialize
7
+ def initialize(serializer = nil)
10
8
  @sread, @swrite = Misc.pipe
11
9
 
10
+ @serializer = serializer || Marshal
11
+
12
+
12
13
  key = "/" << rand(100000000).to_s;
13
14
  @write_sem = key + '.in'
14
15
  @read_sem = key + '.out'
@@ -30,16 +31,15 @@ class RbbtProcessQueue
30
31
  case obj
31
32
  when String
32
33
  payload = obj
33
- size_head = [payload.bytesize,"S"].pack 'La'
34
+ size_head = [payload.bytesize,"C"].pack 'La'
34
35
  str = size_head << payload
35
36
  else
36
- payload = Serializer.dump(obj)
37
- size_head = [payload.bytesize,"M"].pack 'La'
37
+ payload = @serializer.dump(obj)
38
+ size_head = [payload.bytesize,"S"].pack 'La'
38
39
  str = size_head << payload
39
40
  end
40
41
 
41
42
  write_length = str.length
42
- #IO.select(nil, [stream])
43
43
  wrote = stream.write(str)
44
44
  while wrote < write_length
45
45
  wrote += stream.write(str[wrote..-1])
@@ -54,9 +54,9 @@ class RbbtProcessQueue
54
54
  begin
55
55
  payload = Misc.read_stream stream, size
56
56
  case type
57
- when "M"
58
- Serializer.load(payload)
59
57
  when "S"
58
+ @serializer.load(payload)
59
+ when "C"
60
60
  payload
61
61
  end
62
62
  rescue TryAgain
@@ -7,15 +7,6 @@ class RbbtProcessQueue
7
7
 
8
8
  @pid = Process.fork do
9
9
  begin
10
-
11
- #Persist::CONNECTIONS.values.each do |db| db.close if db.write? end
12
- #ObjectSpace.each_object(Mutex) do |m|
13
- # begin
14
- # m.unlock
15
- # rescue ThreadError
16
- # end if m.locked?
17
- #end
18
-
19
10
  Misc.pre_fork
20
11
 
21
12
  @cleanup.call if @cleanup
@@ -20,6 +20,8 @@ require 'rbbt/util/misc/system'
20
20
  require 'rbbt/util/misc/objects'
21
21
  require 'rbbt/util/misc/manipulation'
22
22
 
23
+ module MultipleResult; end
24
+
23
25
  module Misc
24
26
  end
25
27
 
@@ -133,7 +133,6 @@ module ConcurrentStream
133
133
 
134
134
  abort_threads
135
135
  abort_pids
136
- iii [:abort, lockfile]
137
136
  lockfile.unlock if lockfile and lockfile.locked?
138
137
  end
139
138
  Log.medium "Aborted stream #{Misc.fingerprint self} -- #{@abort_callback} [#{@aborted}]"
@@ -35,5 +35,45 @@ class TestSharder < Test::Unit::TestCase
35
35
 
36
36
  end
37
37
  end
38
+
39
+ def test_shard_fwt
40
+ TmpFile.with_file do |dir|
41
+ shard_function = Proc.new do |key|
42
+ key[0..(key.index(":")-1)]
43
+ end
44
+
45
+ pos_function = Proc.new do |key|
46
+ key.split(":").last.to_i
47
+ end
48
+
49
+ size = 10
50
+ sharder = Persist.persist_tsv(nil, "ShardTest", {}, :update => true, :range => false, :value_size => 64, :engine => 'fwt', :file => dir, :shard_function => shard_function, :pos_function => pos_function, :persist => true, :serializer => :float) do |db|
51
+ size.times do |v|
52
+ v = v + 1
53
+ chr = "chr" << (v % 5).to_s
54
+ key = chr + ":" << v.to_s
55
+ db << [key, v*2]
56
+ end
57
+ end
58
+ sharder.read
59
+
60
+ assert_equal dir, sharder.persistence_path
61
+ assert_equal size, sharder.size
62
+
63
+ assert_equal [4.0], sharder["chr2:2"]
64
+
65
+ count = 0
66
+ sharder.through do |k,v|
67
+ count += 1
68
+ end
69
+ assert_equal count, size
70
+
71
+ sharder = Persist.open_sharder(dir, false, :float, 'fwt', {:range => false, :value_size => 64, :pos_function => pos_function}, &shard_function)
72
+
73
+ assert_equal [4.0], sharder["chr2:2"]
74
+
75
+ assert_equal size, sharder.size
76
+ end
77
+ end
38
78
  end
39
79
 
@@ -0,0 +1,25 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '..', 'test_helper.rb')
2
+ require 'rbbt/packed_index'
3
+
4
+ class TestPackedIndex < Test::Unit::TestCase
5
+ def test_index
6
+
7
+ TmpFile.with_file do |tmpfile|
8
+ pi = PackedIndex.new tmpfile, true, %w(i i 23s f f f f f)
9
+ 100.times do |i|
10
+ pi << [i, i+2, i.to_s * 10, rand, rand, rand, rand, rand]
11
+ end
12
+ pi << nil
13
+ pi << nil
14
+ pi.close
15
+ pi = PackedIndex.new tmpfile, false
16
+ 100.times do |i|
17
+ assert_equal i, pi[i][0]
18
+ assert_equal i+2, pi[i][1]
19
+ end
20
+ assert_equal nil, pi[100]
21
+ assert_equal nil, pi[101]
22
+ end
23
+ end
24
+ end
25
+
@@ -6,7 +6,7 @@ class StopException < StandardError; end
6
6
 
7
7
  class TestTSVParallelThrough < Test::Unit::TestCase
8
8
 
9
- def _test_traverse_tsv
9
+ def test_traverse_tsv
10
10
  require 'rbbt/sources/organism'
11
11
 
12
12
  head = 100
@@ -25,7 +25,7 @@ class TestTSVParallelThrough < Test::Unit::TestCase
25
25
  assert_equal head, res.keys.compact.sort.length
26
26
  end
27
27
 
28
- def _test_traverse_tsv_cpus
28
+ def test_traverse_tsv_cpus
29
29
  require 'rbbt/sources/organism'
30
30
 
31
31
  head = 100
@@ -47,7 +47,7 @@ class TestTSVParallelThrough < Test::Unit::TestCase
47
47
  assert res.values.compact.flatten.uniq.length > 0
48
48
  end
49
49
 
50
- def _test_traverse_stream
50
+ def test_traverse_stream
51
51
  require 'rbbt/sources/organism'
52
52
 
53
53
  head = 1000
@@ -61,7 +61,7 @@ class TestTSVParallelThrough < Test::Unit::TestCase
61
61
  assert_equal head, res.keys.compact.sort.length
62
62
  end
63
63
 
64
- def _test_traverse_stream_cpus
64
+ def test_traverse_stream_cpus
65
65
  require 'rbbt/sources/organism'
66
66
 
67
67
  head = 1000
@@ -75,7 +75,7 @@ class TestTSVParallelThrough < Test::Unit::TestCase
75
75
  assert_equal head, res.keys.compact.sort.length
76
76
  end
77
77
 
78
- def _test_traverse_stream_keys
78
+ def test_traverse_stream_keys
79
79
  require 'rbbt/sources/organism'
80
80
 
81
81
  head = 1000
@@ -99,7 +99,7 @@ class TestTSVParallelThrough < Test::Unit::TestCase
99
99
  assert_equal res.sort, Organism.identifiers("Hsa").tsv(:head => head).keys.sort
100
100
  end
101
101
 
102
- def _test_traverse_array
102
+ def test_traverse_array
103
103
  require 'rbbt/sources/organism'
104
104
 
105
105
  array = []
@@ -121,7 +121,7 @@ class TestTSVParallelThrough < Test::Unit::TestCase
121
121
  assert_equal array, res
122
122
  end
123
123
 
124
- def _test_traverse_array_threads
124
+ def test_traverse_array_threads
125
125
  require 'rbbt/sources/organism'
126
126
 
127
127
  array = []
@@ -142,7 +142,7 @@ class TestTSVParallelThrough < Test::Unit::TestCase
142
142
  assert_equal array.sort, res.sort
143
143
  end
144
144
 
145
- def _test_traverse_array_cpus
145
+ def test_traverse_array_cpus
146
146
  require 'rbbt/sources/organism'
147
147
 
148
148
  array = []
@@ -157,7 +157,7 @@ class TestTSVParallelThrough < Test::Unit::TestCase
157
157
  assert_equal array.sort, res.sort
158
158
  end
159
159
 
160
- def _test_traverse_benchmark
160
+ def test_traverse_benchmark
161
161
  require 'rbbt/sources/organism'
162
162
 
163
163
  head = 2_000
@@ -179,7 +179,7 @@ class TestTSVParallelThrough < Test::Unit::TestCase
179
179
  end
180
180
  end
181
181
 
182
- def _test_traverse_into_dumper
182
+ def test_traverse_into_dumper
183
183
  require 'rbbt/sources/organism'
184
184
 
185
185
  head = 2_000
@@ -197,7 +197,7 @@ class TestTSVParallelThrough < Test::Unit::TestCase
197
197
  assert_equal head, res.size
198
198
  end
199
199
 
200
- def _test_traverse_into_dumper_threads
200
+ def test_traverse_into_dumper_threads
201
201
  require 'rbbt/sources/organism'
202
202
 
203
203
  head = 2_000
@@ -217,7 +217,7 @@ class TestTSVParallelThrough < Test::Unit::TestCase
217
217
  assert_equal head, res.size
218
218
  end
219
219
 
220
- def _test_traverse_into_dumper_cpus
220
+ def test_traverse_into_dumper_cpus
221
221
  require 'rbbt/sources/organism'
222
222
 
223
223
  head = 2_000
@@ -238,7 +238,7 @@ class TestTSVParallelThrough < Test::Unit::TestCase
238
238
 
239
239
  #{{{ TRAVERSE DUMPER
240
240
 
241
- def _test_traverse_dumper
241
+ def test_traverse_dumper
242
242
  require 'rbbt/sources/organism'
243
243
 
244
244
  head = 2_000
@@ -259,7 +259,7 @@ class TestTSVParallelThrough < Test::Unit::TestCase
259
259
  assert_equal head, res.size
260
260
  end
261
261
 
262
- def _test_traverse_dumper_threads
262
+ def test_traverse_dumper_threads
263
263
  require 'rbbt/sources/organism'
264
264
 
265
265
  head = 2_000
@@ -283,7 +283,7 @@ class TestTSVParallelThrough < Test::Unit::TestCase
283
283
  assert_equal head, res.size
284
284
  end
285
285
 
286
- def _test_traverse_dumper_cpus
286
+ def test_traverse_dumper_cpus
287
287
  require 'rbbt/sources/organism'
288
288
 
289
289
  head = 10_000
@@ -305,7 +305,7 @@ class TestTSVParallelThrough < Test::Unit::TestCase
305
305
  assert_equal head, res.size
306
306
  end
307
307
 
308
- def _test_traverse_dumper_exception
308
+ def test_traverse_dumper_exception
309
309
  require 'rbbt/sources/organism'
310
310
 
311
311
  head = 2_000
@@ -346,7 +346,7 @@ class TestTSVParallelThrough < Test::Unit::TestCase
346
346
  end
347
347
  end
348
348
 
349
- def _test_traverse_into_stream
349
+ def test_traverse_into_stream
350
350
  size = 100
351
351
  array = (1..size).to_a.collect{|n| n.to_s}
352
352
  stream = TSV.traverse array, :into => :stream do |e|
@@ -355,7 +355,7 @@ class TestTSVParallelThrough < Test::Unit::TestCase
355
355
  assert_equal size, stream.read.split("\n").length
356
356
  end
357
357
 
358
- def _test_traverse_progress
358
+ def test_traverse_progress
359
359
  size = 1000
360
360
  array = (1..size).to_a.collect{|n| n.to_s}
361
361
  stream = TSV.traverse array, :bar => {:max => size, :desc => "Array"}, :cpus => 5, :into => :stream do |e|
@@ -389,12 +389,12 @@ class TestTSVParallelThrough < Test::Unit::TestCase
389
389
  assert_equal size, stream.read.split("\n").length
390
390
  end
391
391
 
392
- def _test_store_multiple
392
+ def test_store_multiple
393
393
  size = 1000
394
394
  array = (1..size).to_a.collect{|n| n.to_s}
395
395
  stream = TSV.traverse array, :bar => {:max => size, :desc => "Array"}, :cpus => 5, :into => :stream do |e|
396
396
  sleep 0.01
397
- [e,e+".alt"].extend TSV::MultipleResult
397
+ [e,e+".alt"].extend MultipleResult
398
398
  end
399
399
  assert_equal size*2, stream.read.split("\n").length
400
400
  end
@@ -11,9 +11,8 @@ class TestConcurrency < Test::Unit::TestCase
11
11
  obj3 = "some string"
12
12
  obj4 = TSV.setup({1 => 1})
13
13
 
14
- socket = RbbtProcessQueue::RbbtProcessSocket.new
14
+ socket = RbbtProcessQueue::RbbtProcessSocket.new Marshal
15
15
  10.times do
16
-
17
16
  socket.push(obj1)
18
17
  socket.push(obj2)
19
18
  socket.push(obj3)
@@ -24,7 +23,6 @@ class TestConcurrency < Test::Unit::TestCase
24
23
  assert_equal obj3, socket.pop
25
24
  assert_equal obj4, socket.pop
26
25
 
27
-
28
26
  end
29
27
 
30
28
  socket.swrite.close
@@ -36,4 +34,37 @@ class TestConcurrency < Test::Unit::TestCase
36
34
  end
37
35
  end
38
36
 
37
+ if false and __FILE__ == $0
38
+ socket = RbbtProcessQueue::RbbtProcessSocket.new
39
+
40
+ obj = "Some string" * 1000
41
+ Misc.benchmark(1000) do
42
+ socket.push(obj)
43
+ socket.pop
44
+ end
45
+
46
+ obj = ["Some string"] * 1000
47
+ Misc.benchmark(1000) do
48
+ socket.push(obj)
49
+ socket.pop
50
+ end
51
+ socket.clean
52
+
53
+
54
+ socket = RbbtProcessQueue::RbbtProcessSocket.new Marshal
55
+
56
+ obj = "Some string" * 1000
57
+ Misc.benchmark(1000) do
58
+ socket.push(obj)
59
+ socket.pop
60
+ end
61
+ socket.clean
39
62
 
63
+ socket = RbbtProcessQueue::RbbtProcessSocket.new TSV::StringArraySerializer
64
+ obj = ["Some string"] * 1000
65
+ Misc.benchmark(1000) do
66
+ socket.push(obj)
67
+ socket.pop
68
+ end
69
+ socket.clean
70
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-util
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.13.36
4
+ version: 5.13.37
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-06-09 00:00:00.000000000 Z
11
+ date: 2014-06-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -131,10 +131,12 @@ files:
131
131
  - lib/rbbt/fix_width_table.rb
132
132
  - lib/rbbt/knowledge_base.rb
133
133
  - lib/rbbt/monitor.rb
134
+ - lib/rbbt/packed_index.rb
134
135
  - lib/rbbt/persist.rb
135
136
  - lib/rbbt/persist/tsv.rb
136
137
  - lib/rbbt/persist/tsv/adapter.rb
137
138
  - lib/rbbt/persist/tsv/cdb.rb
139
+ - lib/rbbt/persist/tsv/fix_width_table.rb
138
140
  - lib/rbbt/persist/tsv/kyotocabinet.rb
139
141
  - lib/rbbt/persist/tsv/leveldb.rb
140
142
  - lib/rbbt/persist/tsv/lmdb.rb
@@ -294,6 +296,7 @@ files:
294
296
  - test/rbbt/test_fix_width_table.rb
295
297
  - test/rbbt/test_knowledge_base.rb
296
298
  - test/rbbt/test_monitor.rb
299
+ - test/rbbt/test_packed_index.rb
297
300
  - test/rbbt/test_persist.rb
298
301
  - test/rbbt/test_resource.rb
299
302
  - test/rbbt/test_tsv.rb
@@ -390,6 +393,7 @@ test_files:
390
393
  - test/rbbt/util/test_misc.rb
391
394
  - test/rbbt/util/test_tmpfile.rb
392
395
  - test/rbbt/util/R/test_eval.rb
396
+ - test/rbbt/test_packed_index.rb
393
397
  - test/rbbt/test_association.rb
394
398
  - test/rbbt/test_resource.rb
395
399
  - test/rbbt/test_entity.rb