rbbt-util 1.2.1 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. data/lib/rbbt-util.rb +2 -1
  2. data/lib/rbbt/util/R.rb +18 -1
  3. data/lib/rbbt/util/cmd.rb +7 -6
  4. data/lib/rbbt/util/data_module.rb +31 -11
  5. data/lib/rbbt/util/fix_width_table.rb +209 -0
  6. data/lib/rbbt/util/log.rb +12 -2
  7. data/lib/rbbt/util/misc.rb +91 -12
  8. data/lib/rbbt/util/open.rb +18 -9
  9. data/lib/rbbt/util/path.rb +152 -0
  10. data/lib/rbbt/util/persistence.rb +282 -75
  11. data/lib/rbbt/util/pkg_data.rb +16 -59
  12. data/lib/rbbt/util/pkg_software.rb +15 -1
  13. data/lib/rbbt/util/rake.rb +5 -1
  14. data/lib/rbbt/util/tc_hash.rb +129 -59
  15. data/lib/rbbt/util/tsv.rb +109 -1284
  16. data/lib/rbbt/util/tsv/accessor.rb +273 -0
  17. data/lib/rbbt/util/tsv/attach.rb +228 -0
  18. data/lib/rbbt/util/tsv/index.rb +303 -0
  19. data/lib/rbbt/util/tsv/manipulate.rb +271 -0
  20. data/lib/rbbt/util/tsv/parse.rb +258 -0
  21. data/share/lib/R/util.R +5 -3
  22. data/test/rbbt/util/test_R.rb +9 -1
  23. data/test/rbbt/util/test_data_module.rb +5 -0
  24. data/test/rbbt/util/test_fix_width_table.rb +107 -0
  25. data/test/rbbt/util/test_misc.rb +43 -0
  26. data/test/rbbt/util/test_open.rb +0 -1
  27. data/test/rbbt/util/test_path.rb +10 -0
  28. data/test/rbbt/util/test_persistence.rb +63 -2
  29. data/test/rbbt/util/test_pkg_data.rb +29 -8
  30. data/test/rbbt/util/test_tc_hash.rb +52 -0
  31. data/test/rbbt/util/test_tsv.rb +55 -678
  32. data/test/rbbt/util/tsv/test_accessor.rb +109 -0
  33. data/test/rbbt/util/tsv/test_attach.rb +271 -0
  34. data/test/rbbt/util/tsv/test_index.rb +158 -0
  35. data/test/rbbt/util/tsv/test_manipulate.rb +226 -0
  36. data/test/rbbt/util/tsv/test_parse.rb +72 -0
  37. data/test/test_helper.rb +1 -0
  38. metadata +25 -4
@@ -80,7 +80,7 @@ module Open
80
80
  # Cache
81
81
 
82
82
  def self.in_cache(url, options = {})
83
- digest = Digest::MD5.hexdigest(url)
83
+ digest = Digest::MD5.hexdigest([url, options["--post-data"]].inspect)
84
84
 
85
85
  filename = File.join(REMOTE_CACHEDIR, digest)
86
86
  if File.exists? filename
@@ -91,7 +91,7 @@ module Open
91
91
  end
92
92
 
93
93
  def self.add_cache(url, data, options = {})
94
- digest = Digest::MD5.hexdigest(url)
94
+ digest = Digest::MD5.hexdigest([url, options["--post-data"]].inspect)
95
95
  Misc.sensiblewrite(File.join(REMOTE_CACHEDIR, digest), data)
96
96
  end
97
97
 
@@ -163,19 +163,28 @@ module Open
163
163
  when (not remote?(url))
164
164
  file_open(url, options[:grep])
165
165
  when options[:nocache]
166
+ # What about grep?
166
167
  wget(url, wget_options)
167
- when in_cache(url)
168
- file_open(in_cache(url), options[:grep])
168
+ when in_cache(url, wget_options)
169
+ file_open(in_cache(url, wget_options), options[:grep])
169
170
  else
170
171
  io = wget(url, wget_options)
171
- add_cache(url, io)
172
+ add_cache(url, io, wget_options)
172
173
  io.close
173
- file_open(in_cache(url), options[:grep])
174
+ file_open(in_cache(url, wget_options), options[:grep])
174
175
  end
175
- io = unzip(io) if (zip? url and not options[:noz]) or options[:zip]
176
- io = gunzip(io) if (gzip? url and not options[:noz]) or options[:gzip]
176
+ io = unzip(io) if (zip?(url) and not options[:noz]) or options[:zip]
177
+ io = gunzip(io) if (gzip?(url) and not options[:noz]) or options[:gzip]
177
178
 
178
- io
179
+ if block_given?
180
+ yield io
181
+ else
182
+ io
183
+ end
184
+ end
185
+
186
+ def self.can_open?(file)
187
+ String === file and (File.exists?(file) or remote?(file))
179
188
  end
180
189
 
181
190
  def self.read(file, options = {}, &block)
@@ -0,0 +1,152 @@
1
+ require 'rbbt/util/misc'
2
+ require 'rbbt/util/tsv'
3
+
4
+ module Path
5
+ attr_accessor :pkg_module, :datadir
6
+
7
+ def self.find_files_back_to(path, target, subdir)
8
+ return [] if path.nil?
9
+ raise "Path #{ path } not in directory #{ subdir }" if not Misc.in_directory? path, subdir
10
+
11
+ pkg_module = path.pkg_module
12
+
13
+ files = []
14
+ while path != subdir
15
+ path = File.dirname(path)
16
+ path.extend Path
17
+ path.pkg_module = pkg_module
18
+ path.datadir = path.datadir
19
+ if path[target].exists?
20
+ files << path[target]
21
+ end
22
+ end
23
+
24
+ files
25
+ end
26
+
27
+ def self.path(string, datadir = nil, pkg_module = nil)
28
+ string.extend Path
29
+ string.datadir = datadir
30
+ string.pkg_module = case
31
+ when pkg_module.nil?
32
+ nil
33
+ when String === pkg_module
34
+ Misc.string2const pkg_module
35
+ else
36
+ pkg_module
37
+ end
38
+ string
39
+ end
40
+
41
+ def method_missing(name, *args, &block)
42
+ new = File.join(self.dup, name.to_s)
43
+ new.extend Path
44
+ new.pkg_module = pkg_module
45
+ new.datadir = datadir
46
+ new
47
+ end
48
+
49
+ def [](name)
50
+ new = File.join(self.dup, name.to_s)
51
+ new.extend Path
52
+ new.pkg_module = pkg_module
53
+ new.datadir = datadir
54
+ new
55
+ end
56
+
57
+ def namespace
58
+ return nil if self.nil? or self.empty? or (not datadir.nil? and (self == datadir or File.dirname(self) == datadir))
59
+ if File.directory? self
60
+ File.basename(self)
61
+ else
62
+ File.basename(File.dirname(self))
63
+ end
64
+ end
65
+
66
+ def identifier_files
67
+ if datadir.nil?
68
+ path = File.join(File.dirname(self), 'identifiers')
69
+ path.extend Path
70
+ path.pkg_module = pkg_module
71
+ if path.exists?
72
+ [path]
73
+ else
74
+ []
75
+ end
76
+ else
77
+ identifier_files = Path.find_files_back_to(self, 'identifiers', datadir)
78
+ return identifier_files.collect{|f| Path.path(f, datadir, pkg_module)}
79
+ end
80
+ end
81
+
82
+ def tsv(key = nil, options = {})
83
+ if options.empty? and Hash === key
84
+ options, key = key, nil
85
+ end
86
+
87
+ produce
88
+ TSV.new self, key, options.merge(:datadir => datadir)
89
+ end
90
+
91
+ def index(options = {})
92
+ produce
93
+ TSV.index self, options
94
+ end
95
+
96
+ def open(options = {})
97
+ produce
98
+ Open.open(self, options)
99
+ end
100
+
101
+ def read(options = {})
102
+ produce
103
+ Open.read(self, options)
104
+ end
105
+
106
+ def fields(sep = nil, header_hash = nil)
107
+ produce
108
+ TSV.parse_header(self.open, sep, header_hash)[1].collect{|f| f.extend TSV::Field; f.namespace = namespace ;f}
109
+ end
110
+
111
+ def all_fields(sep = nil, header_hash = nil)
112
+ produce
113
+ key_field, fields = TSV.parse_header(self.open, sep, header_hash).values_at(0, 1).flatten.collect{|f| f.extend TSV::Field; f.namespace = namespace; f}
114
+ end
115
+
116
+ def fields_in_namespace(sep = nil, header_hash = nil)
117
+ produce
118
+ TSV.parse_header(self.open, sep, header_hash)[1].collect{|f| f.extend TSV::Field; f.namespace = namespace ;f}.select{|f| f.namespace == namespace}
119
+ end
120
+
121
+ def all_namespace_fields(sep = nil, header_hash = nil)
122
+ produce
123
+ key_field, fields = TSV.parse_header(self.open, sep, header_hash).values_at(0, 1).flatten.collect{|f| f.extend TSV::Field; f.namespace = namespace; f}.select{|f| f.namespace == namespace}
124
+ end
125
+
126
+
127
+ def filename
128
+ self.to_s
129
+ end
130
+
131
+ def exists?
132
+ begin
133
+ produce
134
+ true
135
+ rescue
136
+ false
137
+ end
138
+ end
139
+
140
+ def produce
141
+ return if File.exists? self
142
+
143
+ Log.debug("Trying to produce '#{ self }'")
144
+ file, producer = pkg_module.reclaim self
145
+
146
+ raise "File #{self} has not been claimed, cannot produce" if file.nil? or producer.nil?
147
+
148
+ pkg_module.produce(self, producer[:get], producer[:subdir], producer[:sharedir])
149
+ end
150
+ end
151
+
152
+
@@ -1,3 +1,4 @@
1
+ require 'rbbt/util/tsv'
1
2
  require 'rbbt/util/misc'
2
3
  require 'rbbt/util/open'
3
4
  require 'yaml'
@@ -19,91 +20,297 @@ module Persistence
19
20
  end
20
21
 
21
22
  def self.get_persistence_file(file, prefix, options = {})
22
- File.join(CACHEDIR, prefix.to_s.gsub(/\s/,'_').gsub(/\//,'>') + Digest::MD5.hexdigest([file, options].inspect))
23
+ name = prefix.to_s << ":" << file.to_s << ":"
24
+ File.join(CACHEDIR, name.to_s.gsub(/\s/,'_').gsub(/\//,'>') + Digest::MD5.hexdigest([file, options].inspect))
23
25
  end
24
26
 
25
- def self.persist(file, prefix = "", persistence_type = :string, options = {})
26
- options = Misc.add_defaults options, :persistence => true
27
+ def self.get_filename(file)
28
+ case
29
+ when (String === file and File.exists? file)
30
+ File.expand_path file
31
+ when File === file
32
+ File.expand_path file.path
33
+ when Object::TSV === file
34
+ file.filename
35
+ when String === file
36
+ file
37
+ else
38
+ file.class.to_s
39
+ end
40
+ end
41
+
42
+ def self.persist_string(file, prefix = "", options = {})
43
+ options =
44
+ Misc.add_defaults options, :persistence_update => false, :persistence_file => nil, :filename => nil
45
+ persistence_update, persistence_file, filename =
46
+ Misc.process_options options, :persistence_update, :persistence_file, :filename
47
+
48
+ filename ||= get_filename(file)
49
+ persistence_file ||= get_persistence_file(filename, prefix, options)
50
+
51
+ if persistence_update or not File.exists? persistence_file
52
+ Log.debug "Creating #{ persistence_file }. Prefix = #{prefix}"
53
+
54
+ res = yield file, options, filename, persistence_file
55
+ Open.write(persistence_file, res.to_s)
56
+ res
57
+ else
58
+ Log.debug "Loading #{ persistence_file }. Prefix = #{prefix}"
59
+
60
+ Open.read(persistence_file)
61
+ end
62
+ end
63
+
64
+ def self.persist_marshal(file, prefix = "", options = {})
65
+ options =
66
+ Misc.add_defaults options, :persistence_update => false, :persistence_file => nil, :filename => nil
67
+ persistence_update, persistence_file, filename =
68
+ Misc.process_options options, :persistence_update, :persistence_file, :filename
69
+
70
+ filename ||= get_filename(file)
71
+ persistence_file ||= get_persistence_file(filename, prefix, options)
72
+
73
+ if persistence_update or not File.exists? persistence_file
74
+ Log.debug "Creating #{ persistence_file }. Prefix = #{prefix}"
75
+ res = yield file, options
76
+ Open.write(persistence_file, Marshal.dump(res))
77
+ res
78
+ else
79
+ Log.debug "Loading #{ persistence_file }. Prefix = #{prefix}"
80
+ Marshal.load(Open.open(persistence_file))
81
+ end
82
+ end
83
+
84
+ def self.persist_yaml(file, prefix = "", options = {})
85
+ options =
86
+ Misc.add_defaults options, :persistence_update => false, :persistence_file => nil, :filename => nil
87
+ persistence_update, persistence_file, filename =
88
+ Misc.process_options options, :persistence_update, :persistence_file, :filename
89
+
90
+ filename ||= get_filename(file)
91
+ persistence_file ||= get_persistence_file(filename, prefix, options)
92
+
93
+ if persistence_update or not File.exists? persistence_file
94
+ Log.debug "Creating #{ persistence_file }. Prefix = #{prefix}"
95
+ res = yield file, options, filename, persistence_file
96
+ Open.write(persistence_file, YAML.dump(res))
97
+ res
98
+ else
99
+ Log.debug "Loading #{ persistence_file }. Prefix = #{prefix}"
100
+ YAML.load(Open.open(persistence_file))
101
+ end
102
+ end
103
+
104
+ def self.persist_tsv_string(file, prefix = "", options = {})
105
+ options =
106
+ Misc.add_defaults options, :persistence_update => false, :persistence_file => nil, :filename => nil
107
+ persistence_update, persistence_file, filename =
108
+ Misc.process_options options, :persistence_update, :persistence_file, :filename
109
+
110
+ filename ||= get_filename(file)
111
+ persistence_file ||= get_persistence_file(filename, prefix, options)
112
+
113
+ if persistence_update or not File.exists? persistence_file
114
+ Log.debug "Creating #{ persistence_file }. Prefix = #{prefix}"
115
+ res = yield file, options, filename, persistence_file
116
+ Open.write(persistence_file, res.to_s)
117
+ res
118
+ else
119
+ Log.debug "Loading #{ persistence_file }. Prefix = #{prefix}"
120
+ tsv = Object::TSV.new persistence_file
121
+ tsv.filename = filename
122
+ tsv
123
+ end
124
+ end
125
+
126
+ def self.tsv_serializer(data)
127
+ case
128
+ when (not Object::TSV === data)
129
+ :marshal
130
+ when data.type == :double
131
+ :double
132
+ when data.type == :single
133
+ :single
134
+ else
135
+ :list
136
+ end
137
+ end
138
+
139
+ def self.persist_tsv(file, prefix = "", options = {})
140
+ options =
141
+ Misc.add_defaults options, :persistence_update => false, :persistence_file => nil, :filename => nil
142
+ persistence_update, persistence_file, filename =
143
+ Misc.process_options options, :persistence_update, :persistence_file, :filename
27
144
 
28
- persistence, persistence_file =
29
- Misc.process_options options, :persistence, :persistence_file
30
-
31
- filename = Misc.process_options options, :filename
32
- filename ||= case
33
- when (String === file and File.exists? file)
34
- File.expand_path file
35
- when File === file
36
- File.expand_path file.path
37
- when TSV === file
38
- file.filename
39
- else
40
- Digest::MD5.hexdigest(file.inspect)
41
- end
42
-
43
- if persistence
44
- persistence_file ||= get_persistence_file(filename, prefix, options)
45
-
46
- #{{{ CREATE
47
- if ! File.exists? persistence_file
48
- Log.low "Creating Persistence #{ persistence_file } for #{ filename }"
49
- res = yield file, options, filename, persistence_file
50
- if Array === res and res.length == 2 and (Hash === res[1] or res[1].nil?)
51
- data, extra = res
52
- else
53
- data, extra = [res, nil]
145
+ filename ||= get_filename(file)
146
+ persistence_file ||= get_persistence_file(filename, prefix, options)
147
+
148
+ if persistence_update or not File.exists? persistence_file
149
+ Log.debug "Creating #{ persistence_file }. Prefix = #{prefix}"
150
+
151
+ res = yield file, options, filename, persistence_file
152
+ serializer = tsv_serializer res
153
+
154
+ if File.exists? persistence_file
155
+ Log.debug "Erasing old #{ persistence_file }. Prefix = #{prefix}"
156
+ FileUtils.rm persistence_file
157
+ end
158
+
159
+ per = Persistence::TSV.get persistence_file, true, serializer
160
+
161
+ per.write
162
+ per.merge! res
163
+
164
+ Persistence::TSV::FIELD_INFO_ENTRIES.keys.each do |key|
165
+ if res.respond_to?(key.to_sym) and per.respond_to?("#{key}=".to_sym)
166
+ per.send "#{key}=".to_sym, res.send(key.to_sym)
54
167
  end
55
- ddd data.filename
56
-
57
- case persistence_type.to_sym
58
- when :tsv
59
- if Hash === data or Object::TSV === data
60
- Log.debug "Creating #{Persistence::TSV} for #{ persistence_file }"
61
- per = Persistence::TSV.get persistence_file
62
- per.write
63
- data.each{|k,v| per[k.to_s] = v}
64
- %w(case_insensitive fields key_field type filename). each do |key|
65
- if data.respond_to? key
66
- per.send "#{key}=".to_sym, data.send(key.to_sym)
67
- else
68
- per.send "#{key}=".to_sym, extra[key.to_sym]
69
- end
70
- end
71
- per.read
72
-
73
- data = per
74
- end
75
- when :string
76
- Open.write(persistence_file, data.to_s)
77
- when :marshal
78
- Open.write(persistence_file, Marshal.dump(data))
79
- when :yaml
80
- Open.write(persistence_file, YAML.dump(data))
168
+ end
169
+
170
+ per.read
171
+
172
+ tsv = Object::TSV.new per
173
+
174
+ tsv
175
+ else
176
+ Log.debug "Loading #{ persistence_file }. Prefix = #{prefix}"
177
+
178
+ per = Persistence::TSV.get persistence_file, true, serializer
179
+ tsv = Object::TSV.new per
180
+ Persistence::TSV::FIELD_INFO_ENTRIES.keys.each do |key|
181
+ if tsv.respond_to?(key.to_sym) and per.respond_to?(key.to_sym)
182
+ tsv.send "#{key}=".to_sym, per.send(key.to_sym)
81
183
  end
82
-
83
- return [data, extra]
184
+ end
84
185
 
85
- #{{{ LOAD
86
- else
87
- Log.low "Opening Persistence #{ persistence_file } for #{ filename }"
88
- case persistence_type.to_sym
89
- when :tsv
90
- data = Persistence::TSV.get persistence_file
91
-
92
- extra = {}
93
- %W(case_insensitive fields key_field type filename).each{|key| extra[key.to_sym] = data.send key.to_sym}
94
-
95
- return [data, extra]
96
- when :string
97
- return [Open.read(persistence_type), nil]
98
- when :marshal
99
- return [File.open(persistence_file){|f| Marshal.load(f)}, nil]
100
- when :yaml
101
- return [File.open(persistence_file){|f| YAML.load(f)}, nil]
186
+ tsv
187
+ end
188
+ end
189
+
190
+ def self.persist_tsv_extra(file, prefix = "", options = {})
191
+ options =
192
+ Misc.add_defaults options, :persistence_update => false, :persistence_file => nil, :filename => nil
193
+ persistence_update, persistence_file, filename =
194
+ Misc.process_options options, :persistence_update, :persistence_file, :filename
195
+
196
+ filename ||= get_filename(file)
197
+ persistence_file ||= get_persistence_file(filename, prefix, options)
198
+
199
+ if persistence_update or not File.exists? persistence_file
200
+ Log.debug "Creating #{ persistence_file }. Prefix = #{prefix}"
201
+ res, extra = yield file, options, filename, persistence_file
202
+ serializer = tsv_serializer res
203
+
204
+ per = Persistence::TSV.get persistence_file, true, serializer
205
+
206
+ per.write
207
+ per.merge! res
208
+ Persistence::TSV::FIELD_INFO_ENTRIES.keys.each do |key|
209
+ if extra.include?(key.to_sym) and per.respond_to?(key.to_sym)
210
+ per.send "#{key}=".to_sym, extra[key.to_sym]
102
211
  end
212
+ end
213
+ per.read
103
214
 
215
+ [ per, extra ]
216
+ else
217
+ Log.debug "Loading #{ persistence_file }. Prefix = #{prefix}"
218
+ per = Persistence::TSV.get persistence_file, true, serializer
219
+
220
+ extra = {}
221
+ Persistence::TSV::FIELD_INFO_ENTRIES.keys.each do |key|
222
+ if per.respond_to?(key.to_sym)
223
+ extra[key] = per.send(key.to_sym)
224
+ end
104
225
  end
226
+
227
+ [ per, extra ]
228
+ end
229
+ end
230
+
231
+ def self.persist_fwt(file, prefix = "", options = {})
232
+ options =
233
+ Misc.add_defaults options, :persistence_update => false, :persistence_file => nil, :filename => nil
234
+ persistence_update, persistence_file, filename =
235
+ Misc.process_options options, :persistence_update, :persistence_file, :filename
236
+
237
+ filename ||= get_filename(file)
238
+ persistence_file ||= get_persistence_file(filename, prefix, options)
239
+
240
+ if persistence_update or not File.exists? persistence_file
241
+ Log.debug "Creating #{ persistence_file }. Prefix = #{prefix}"
242
+
243
+ range = options[:range]
244
+
245
+ res = yield file, options, filename, persistence_file
246
+
247
+ if File.exists? persistence_file
248
+ Log.debug "Erasing old #{ persistence_file }. Prefix = #{prefix}"
249
+ FileUtils.rm persistence_file
250
+ end
251
+
252
+
253
+ max_length = res.collect{|k,v| k.length}.max
254
+
255
+ if range
256
+ begin
257
+ fwt = FixWidthTable.new persistence_file, max_length, true
258
+ fwt.add_range res
259
+ rescue
260
+ FileUtils.rm persistence_file
261
+ raise $!
262
+ end
263
+ else
264
+ begin
265
+ fwt = FixWidthTable.new persistence_file, max_length, false
266
+ fwt.add_point res
267
+ rescue
268
+ FileUtils.rm persistence_file
269
+ raise $!
270
+ end
271
+ end
272
+
273
+ fwt.read
274
+
275
+ fwt
105
276
  else
106
- yield file, options
277
+ Log.debug "Loading #{ persistence_file }. Prefix = #{prefix}"
278
+
279
+ fwt = FixWidthTable.new persistence_file, nil, nil
280
+
281
+ fwt
282
+ end
283
+ end
284
+
285
+ def self.persist(file, prefix = "", persistence_type = :string, options = {}, &block)
286
+ options = Misc.add_defaults options, :persistence => true
287
+ persistence =
288
+ Misc.process_options options, :persistence
289
+
290
+ filename = get_filename(file)
291
+
292
+ if not persistence
293
+ Log.low "Non Persistent Loading for #{filename}. Prefix: #{prefix}"
294
+ yield file, options, filename
295
+ else
296
+ Log.low "Persistent Loading for #{filename}. Prefix: #{prefix}. Type #{persistence_type.to_s}"
297
+
298
+ case persistence_type.to_sym
299
+ when :string
300
+ persist_string(file, prefix, options, &block)
301
+ when :marshal
302
+ persist_marshal(file, prefix, options, &block)
303
+ when :yaml
304
+ persist_yaml(file, prefix, options, &block)
305
+ when :tsv
306
+ persist_tsv(file, prefix, options, &block)
307
+ when :tsv_string
308
+ persist_tsv_string(file, prefix, options, &block)
309
+ when :tsv_extra
310
+ persist_tsv_extra(file, prefix, options, &block)
311
+ when :fwt
312
+ persist_fwt(file, prefix, options, &block)
313
+ end
107
314
  end
108
315
  end
109
316
  end